1 //===- InstCombineCalls.cpp -----------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the visitCall, visitInvoke, and visitCallBr functions.
11 //===----------------------------------------------------------------------===//
13 #include "InstCombineInternal.h"
14 #include "llvm/ADT/APFloat.h"
15 #include "llvm/ADT/APInt.h"
16 #include "llvm/ADT/APSInt.h"
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/ADT/STLFunctionalExtras.h"
19 #include "llvm/ADT/SmallBitVector.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/ADT/Statistic.h"
22 #include "llvm/Analysis/AliasAnalysis.h"
23 #include "llvm/Analysis/AssumeBundleQueries.h"
24 #include "llvm/Analysis/AssumptionCache.h"
25 #include "llvm/Analysis/InstructionSimplify.h"
26 #include "llvm/Analysis/Loads.h"
27 #include "llvm/Analysis/MemoryBuiltins.h"
28 #include "llvm/Analysis/ValueTracking.h"
29 #include "llvm/Analysis/VectorUtils.h"
30 #include "llvm/IR/AttributeMask.h"
31 #include "llvm/IR/Attributes.h"
32 #include "llvm/IR/BasicBlock.h"
33 #include "llvm/IR/Constant.h"
34 #include "llvm/IR/Constants.h"
35 #include "llvm/IR/DataLayout.h"
36 #include "llvm/IR/DebugInfo.h"
37 #include "llvm/IR/DerivedTypes.h"
38 #include "llvm/IR/Function.h"
39 #include "llvm/IR/GlobalVariable.h"
40 #include "llvm/IR/InlineAsm.h"
41 #include "llvm/IR/InstrTypes.h"
42 #include "llvm/IR/Instruction.h"
43 #include "llvm/IR/Instructions.h"
44 #include "llvm/IR/IntrinsicInst.h"
45 #include "llvm/IR/Intrinsics.h"
46 #include "llvm/IR/IntrinsicsAArch64.h"
47 #include "llvm/IR/IntrinsicsAMDGPU.h"
48 #include "llvm/IR/IntrinsicsARM.h"
49 #include "llvm/IR/IntrinsicsHexagon.h"
50 #include "llvm/IR/LLVMContext.h"
51 #include "llvm/IR/Metadata.h"
52 #include "llvm/IR/PatternMatch.h"
53 #include "llvm/IR/Statepoint.h"
54 #include "llvm/IR/Type.h"
55 #include "llvm/IR/User.h"
56 #include "llvm/IR/Value.h"
57 #include "llvm/IR/ValueHandle.h"
58 #include "llvm/Support/AtomicOrdering.h"
59 #include "llvm/Support/Casting.h"
60 #include "llvm/Support/CommandLine.h"
61 #include "llvm/Support/Compiler.h"
62 #include "llvm/Support/Debug.h"
63 #include "llvm/Support/ErrorHandling.h"
64 #include "llvm/Support/KnownBits.h"
65 #include "llvm/Support/MathExtras.h"
66 #include "llvm/Support/raw_ostream.h"
67 #include "llvm/Transforms/InstCombine/InstCombiner.h"
68 #include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
69 #include "llvm/Transforms/Utils/Local.h"
70 #include "llvm/Transforms/Utils/SimplifyLibCalls.h"
78 #define DEBUG_TYPE "instcombine"
79 #include "llvm/Transforms/Utils/InstructionWorklist.h"
82 using namespace PatternMatch
;
84 STATISTIC(NumSimplified
, "Number of library calls simplified");
86 static cl::opt
<unsigned> GuardWideningWindow(
87 "instcombine-guard-widening-window",
89 cl::desc("How wide an instruction window to bypass looking for "
93 /// enable preservation of attributes in assume like:
94 /// call void @llvm.assume(i1 true) [ "nonnull"(i32* %PTR) ]
95 extern cl::opt
<bool> EnableKnowledgeRetention
;
98 /// Return the specified type promoted as it would be to pass though a va_arg
100 static Type
*getPromotedType(Type
*Ty
) {
101 if (IntegerType
* ITy
= dyn_cast
<IntegerType
>(Ty
)) {
102 if (ITy
->getBitWidth() < 32)
103 return Type::getInt32Ty(Ty
->getContext());
108 /// Recognize a memcpy/memmove from a trivially otherwise unused alloca.
109 /// TODO: This should probably be integrated with visitAllocSites, but that
110 /// requires a deeper change to allow either unread or unwritten objects.
111 static bool hasUndefSource(AnyMemTransferInst
*MI
) {
112 auto *Src
= MI
->getRawSource();
113 while (isa
<GetElementPtrInst
>(Src
) || isa
<BitCastInst
>(Src
)) {
114 if (!Src
->hasOneUse())
116 Src
= cast
<Instruction
>(Src
)->getOperand(0);
118 return isa
<AllocaInst
>(Src
) && Src
->hasOneUse();
121 Instruction
*InstCombinerImpl::SimplifyAnyMemTransfer(AnyMemTransferInst
*MI
) {
122 Align DstAlign
= getKnownAlignment(MI
->getRawDest(), DL
, MI
, &AC
, &DT
);
123 MaybeAlign CopyDstAlign
= MI
->getDestAlign();
124 if (!CopyDstAlign
|| *CopyDstAlign
< DstAlign
) {
125 MI
->setDestAlignment(DstAlign
);
129 Align SrcAlign
= getKnownAlignment(MI
->getRawSource(), DL
, MI
, &AC
, &DT
);
130 MaybeAlign CopySrcAlign
= MI
->getSourceAlign();
131 if (!CopySrcAlign
|| *CopySrcAlign
< SrcAlign
) {
132 MI
->setSourceAlignment(SrcAlign
);
136 // If we have a store to a location which is known constant, we can conclude
137 // that the store must be storing the constant value (else the memory
138 // wouldn't be constant), and this must be a noop.
139 if (!isModSet(AA
->getModRefInfoMask(MI
->getDest()))) {
140 // Set the size of the copy to 0, it will be deleted on the next iteration.
141 MI
->setLength(Constant::getNullValue(MI
->getLength()->getType()));
145 // If the source is provably undef, the memcpy/memmove doesn't do anything
146 // (unless the transfer is volatile).
147 if (hasUndefSource(MI
) && !MI
->isVolatile()) {
148 // Set the size of the copy to 0, it will be deleted on the next iteration.
149 MI
->setLength(Constant::getNullValue(MI
->getLength()->getType()));
153 // If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with
155 ConstantInt
*MemOpLength
= dyn_cast
<ConstantInt
>(MI
->getLength());
156 if (!MemOpLength
) return nullptr;
158 // Source and destination pointer types are always "i8*" for intrinsic. See
159 // if the size is something we can handle with a single primitive load/store.
160 // A single load+store correctly handles overlapping memory in the memmove
162 uint64_t Size
= MemOpLength
->getLimitedValue();
163 assert(Size
&& "0-sized memory transferring should be removed already.");
165 if (Size
> 8 || (Size
&(Size
-1)))
166 return nullptr; // If not 1/2/4/8 bytes, exit.
168 // If it is an atomic and alignment is less than the size then we will
169 // introduce the unaligned memory access which will be later transformed
170 // into libcall in CodeGen. This is not evident performance gain so disable
172 if (isa
<AtomicMemTransferInst
>(MI
))
173 if (*CopyDstAlign
< Size
|| *CopySrcAlign
< Size
)
176 // Use an integer load+store unless we can find something better.
177 IntegerType
* IntType
= IntegerType::get(MI
->getContext(), Size
<<3);
179 // If the memcpy has metadata describing the members, see if we can get the
180 // TBAA tag describing our copy.
181 MDNode
*CopyMD
= nullptr;
182 if (MDNode
*M
= MI
->getMetadata(LLVMContext::MD_tbaa
)) {
184 } else if (MDNode
*M
= MI
->getMetadata(LLVMContext::MD_tbaa_struct
)) {
185 if (M
->getNumOperands() == 3 && M
->getOperand(0) &&
186 mdconst::hasa
<ConstantInt
>(M
->getOperand(0)) &&
187 mdconst::extract
<ConstantInt
>(M
->getOperand(0))->isZero() &&
189 mdconst::hasa
<ConstantInt
>(M
->getOperand(1)) &&
190 mdconst::extract
<ConstantInt
>(M
->getOperand(1))->getValue() ==
192 M
->getOperand(2) && isa
<MDNode
>(M
->getOperand(2)))
193 CopyMD
= cast
<MDNode
>(M
->getOperand(2));
196 Value
*Src
= MI
->getArgOperand(1);
197 Value
*Dest
= MI
->getArgOperand(0);
198 LoadInst
*L
= Builder
.CreateLoad(IntType
, Src
);
199 // Alignment from the mem intrinsic will be better, so use it.
200 L
->setAlignment(*CopySrcAlign
);
202 L
->setMetadata(LLVMContext::MD_tbaa
, CopyMD
);
203 MDNode
*LoopMemParallelMD
=
204 MI
->getMetadata(LLVMContext::MD_mem_parallel_loop_access
);
205 if (LoopMemParallelMD
)
206 L
->setMetadata(LLVMContext::MD_mem_parallel_loop_access
, LoopMemParallelMD
);
207 MDNode
*AccessGroupMD
= MI
->getMetadata(LLVMContext::MD_access_group
);
209 L
->setMetadata(LLVMContext::MD_access_group
, AccessGroupMD
);
211 StoreInst
*S
= Builder
.CreateStore(L
, Dest
);
212 // Alignment from the mem intrinsic will be better, so use it.
213 S
->setAlignment(*CopyDstAlign
);
215 S
->setMetadata(LLVMContext::MD_tbaa
, CopyMD
);
216 if (LoopMemParallelMD
)
217 S
->setMetadata(LLVMContext::MD_mem_parallel_loop_access
, LoopMemParallelMD
);
219 S
->setMetadata(LLVMContext::MD_access_group
, AccessGroupMD
);
220 S
->copyMetadata(*MI
, LLVMContext::MD_DIAssignID
);
222 if (auto *MT
= dyn_cast
<MemTransferInst
>(MI
)) {
223 // non-atomics can be volatile
224 L
->setVolatile(MT
->isVolatile());
225 S
->setVolatile(MT
->isVolatile());
227 if (isa
<AtomicMemTransferInst
>(MI
)) {
228 // atomics have to be unordered
229 L
->setOrdering(AtomicOrdering::Unordered
);
230 S
->setOrdering(AtomicOrdering::Unordered
);
233 // Set the size of the copy to 0, it will be deleted on the next iteration.
234 MI
->setLength(Constant::getNullValue(MemOpLength
->getType()));
238 Instruction
*InstCombinerImpl::SimplifyAnyMemSet(AnyMemSetInst
*MI
) {
239 const Align KnownAlignment
=
240 getKnownAlignment(MI
->getDest(), DL
, MI
, &AC
, &DT
);
241 MaybeAlign MemSetAlign
= MI
->getDestAlign();
242 if (!MemSetAlign
|| *MemSetAlign
< KnownAlignment
) {
243 MI
->setDestAlignment(KnownAlignment
);
247 // If we have a store to a location which is known constant, we can conclude
248 // that the store must be storing the constant value (else the memory
249 // wouldn't be constant), and this must be a noop.
250 if (!isModSet(AA
->getModRefInfoMask(MI
->getDest()))) {
251 // Set the size of the copy to 0, it will be deleted on the next iteration.
252 MI
->setLength(Constant::getNullValue(MI
->getLength()->getType()));
256 // Remove memset with an undef value.
257 // FIXME: This is technically incorrect because it might overwrite a poison
258 // value. Change to PoisonValue once #52930 is resolved.
259 if (isa
<UndefValue
>(MI
->getValue())) {
260 // Set the size of the copy to 0, it will be deleted on the next iteration.
261 MI
->setLength(Constant::getNullValue(MI
->getLength()->getType()));
265 // Extract the length and alignment and fill if they are constant.
266 ConstantInt
*LenC
= dyn_cast
<ConstantInt
>(MI
->getLength());
267 ConstantInt
*FillC
= dyn_cast
<ConstantInt
>(MI
->getValue());
268 if (!LenC
|| !FillC
|| !FillC
->getType()->isIntegerTy(8))
270 const uint64_t Len
= LenC
->getLimitedValue();
271 assert(Len
&& "0-sized memory setting should be removed already.");
272 const Align Alignment
= MI
->getDestAlign().valueOrOne();
274 // If it is an atomic and alignment is less than the size then we will
275 // introduce the unaligned memory access which will be later transformed
276 // into libcall in CodeGen. This is not evident performance gain so disable
278 if (isa
<AtomicMemSetInst
>(MI
))
282 // memset(s,c,n) -> store s, c (for n=1,2,4,8)
283 if (Len
<= 8 && isPowerOf2_32((uint32_t)Len
)) {
284 Type
*ITy
= IntegerType::get(MI
->getContext(), Len
*8); // n=1 -> i8.
286 Value
*Dest
= MI
->getDest();
288 // Extract the fill value and store.
289 const uint64_t Fill
= FillC
->getZExtValue()*0x0101010101010101ULL
;
290 Constant
*FillVal
= ConstantInt::get(ITy
, Fill
);
291 StoreInst
*S
= Builder
.CreateStore(FillVal
, Dest
, MI
->isVolatile());
292 S
->copyMetadata(*MI
, LLVMContext::MD_DIAssignID
);
293 for (auto *DAI
: at::getAssignmentMarkers(S
)) {
294 if (llvm::is_contained(DAI
->location_ops(), FillC
))
295 DAI
->replaceVariableLocationOp(FillC
, FillVal
);
298 S
->setAlignment(Alignment
);
299 if (isa
<AtomicMemSetInst
>(MI
))
300 S
->setOrdering(AtomicOrdering::Unordered
);
302 // Set the size of the copy to 0, it will be deleted on the next iteration.
303 MI
->setLength(Constant::getNullValue(LenC
->getType()));
310 // TODO, Obvious Missing Transforms:
311 // * Narrow width by halfs excluding zero/undef lanes
312 Value
*InstCombinerImpl::simplifyMaskedLoad(IntrinsicInst
&II
) {
313 Value
*LoadPtr
= II
.getArgOperand(0);
314 const Align Alignment
=
315 cast
<ConstantInt
>(II
.getArgOperand(1))->getAlignValue();
317 // If the mask is all ones or undefs, this is a plain vector load of the 1st
319 if (maskIsAllOneOrUndef(II
.getArgOperand(2))) {
320 LoadInst
*L
= Builder
.CreateAlignedLoad(II
.getType(), LoadPtr
, Alignment
,
326 // If we can unconditionally load from this address, replace with a
327 // load/select idiom. TODO: use DT for context sensitive query
328 if (isDereferenceablePointer(LoadPtr
, II
.getType(),
329 II
.getModule()->getDataLayout(), &II
, &AC
)) {
330 LoadInst
*LI
= Builder
.CreateAlignedLoad(II
.getType(), LoadPtr
, Alignment
,
332 LI
->copyMetadata(II
);
333 return Builder
.CreateSelect(II
.getArgOperand(2), LI
, II
.getArgOperand(3));
339 // TODO, Obvious Missing Transforms:
340 // * Single constant active lane -> store
341 // * Narrow width by halfs excluding zero/undef lanes
342 Instruction
*InstCombinerImpl::simplifyMaskedStore(IntrinsicInst
&II
) {
343 auto *ConstMask
= dyn_cast
<Constant
>(II
.getArgOperand(3));
347 // If the mask is all zeros, this instruction does nothing.
348 if (ConstMask
->isNullValue())
349 return eraseInstFromFunction(II
);
351 // If the mask is all ones, this is a plain vector store of the 1st argument.
352 if (ConstMask
->isAllOnesValue()) {
353 Value
*StorePtr
= II
.getArgOperand(1);
354 Align Alignment
= cast
<ConstantInt
>(II
.getArgOperand(2))->getAlignValue();
356 new StoreInst(II
.getArgOperand(0), StorePtr
, false, Alignment
);
361 if (isa
<ScalableVectorType
>(ConstMask
->getType()))
364 // Use masked off lanes to simplify operands via SimplifyDemandedVectorElts
365 APInt DemandedElts
= possiblyDemandedEltsInMask(ConstMask
);
366 APInt
UndefElts(DemandedElts
.getBitWidth(), 0);
368 SimplifyDemandedVectorElts(II
.getOperand(0), DemandedElts
, UndefElts
))
369 return replaceOperand(II
, 0, V
);
374 // TODO, Obvious Missing Transforms:
375 // * Single constant active lane load -> load
376 // * Dereferenceable address & few lanes -> scalarize speculative load/selects
377 // * Adjacent vector addresses -> masked.load
378 // * Narrow width by halfs excluding zero/undef lanes
379 // * Vector incrementing address -> vector masked load
380 Instruction
*InstCombinerImpl::simplifyMaskedGather(IntrinsicInst
&II
) {
381 auto *ConstMask
= dyn_cast
<Constant
>(II
.getArgOperand(2));
385 // Vector splat address w/known mask -> scalar load
386 // Fold the gather to load the source vector first lane
387 // because it is reloading the same value each time
388 if (ConstMask
->isAllOnesValue())
389 if (auto *SplatPtr
= getSplatValue(II
.getArgOperand(0))) {
390 auto *VecTy
= cast
<VectorType
>(II
.getType());
391 const Align Alignment
=
392 cast
<ConstantInt
>(II
.getArgOperand(1))->getAlignValue();
393 LoadInst
*L
= Builder
.CreateAlignedLoad(VecTy
->getElementType(), SplatPtr
,
394 Alignment
, "load.scalar");
396 Builder
.CreateVectorSplat(VecTy
->getElementCount(), L
, "broadcast");
397 return replaceInstUsesWith(II
, cast
<Instruction
>(Shuf
));
403 // TODO, Obvious Missing Transforms:
404 // * Single constant active lane -> store
405 // * Adjacent vector addresses -> masked.store
406 // * Narrow store width by halfs excluding zero/undef lanes
407 // * Vector incrementing address -> vector masked store
408 Instruction
*InstCombinerImpl::simplifyMaskedScatter(IntrinsicInst
&II
) {
409 auto *ConstMask
= dyn_cast
<Constant
>(II
.getArgOperand(3));
413 // If the mask is all zeros, a scatter does nothing.
414 if (ConstMask
->isNullValue())
415 return eraseInstFromFunction(II
);
417 // Vector splat address -> scalar store
418 if (auto *SplatPtr
= getSplatValue(II
.getArgOperand(1))) {
419 // scatter(splat(value), splat(ptr), non-zero-mask) -> store value, ptr
420 if (auto *SplatValue
= getSplatValue(II
.getArgOperand(0))) {
421 Align Alignment
= cast
<ConstantInt
>(II
.getArgOperand(2))->getAlignValue();
423 new StoreInst(SplatValue
, SplatPtr
, /*IsVolatile=*/false, Alignment
);
427 // scatter(vector, splat(ptr), splat(true)) -> store extract(vector,
429 if (ConstMask
->isAllOnesValue()) {
430 Align Alignment
= cast
<ConstantInt
>(II
.getArgOperand(2))->getAlignValue();
431 VectorType
*WideLoadTy
= cast
<VectorType
>(II
.getArgOperand(1)->getType());
432 ElementCount VF
= WideLoadTy
->getElementCount();
433 Value
*RunTimeVF
= Builder
.CreateElementCount(Builder
.getInt32Ty(), VF
);
434 Value
*LastLane
= Builder
.CreateSub(RunTimeVF
, Builder
.getInt32(1));
436 Builder
.CreateExtractElement(II
.getArgOperand(0), LastLane
);
438 new StoreInst(Extract
, SplatPtr
, /*IsVolatile=*/false, Alignment
);
443 if (isa
<ScalableVectorType
>(ConstMask
->getType()))
446 // Use masked off lanes to simplify operands via SimplifyDemandedVectorElts
447 APInt DemandedElts
= possiblyDemandedEltsInMask(ConstMask
);
448 APInt
UndefElts(DemandedElts
.getBitWidth(), 0);
450 SimplifyDemandedVectorElts(II
.getOperand(0), DemandedElts
, UndefElts
))
451 return replaceOperand(II
, 0, V
);
453 SimplifyDemandedVectorElts(II
.getOperand(1), DemandedElts
, UndefElts
))
454 return replaceOperand(II
, 1, V
);
459 /// This function transforms launder.invariant.group and strip.invariant.group
461 /// launder(launder(%x)) -> launder(%x) (the result is not the argument)
462 /// launder(strip(%x)) -> launder(%x)
463 /// strip(strip(%x)) -> strip(%x) (the result is not the argument)
464 /// strip(launder(%x)) -> strip(%x)
465 /// This is legal because it preserves the most recent information about
466 /// the presence or absence of invariant.group.
467 static Instruction
*simplifyInvariantGroupIntrinsic(IntrinsicInst
&II
,
468 InstCombinerImpl
&IC
) {
469 auto *Arg
= II
.getArgOperand(0);
470 auto *StrippedArg
= Arg
->stripPointerCasts();
471 auto *StrippedInvariantGroupsArg
= StrippedArg
;
472 while (auto *Intr
= dyn_cast
<IntrinsicInst
>(StrippedInvariantGroupsArg
)) {
473 if (Intr
->getIntrinsicID() != Intrinsic::launder_invariant_group
&&
474 Intr
->getIntrinsicID() != Intrinsic::strip_invariant_group
)
476 StrippedInvariantGroupsArg
= Intr
->getArgOperand(0)->stripPointerCasts();
478 if (StrippedArg
== StrippedInvariantGroupsArg
)
479 return nullptr; // No launders/strips to remove.
481 Value
*Result
= nullptr;
483 if (II
.getIntrinsicID() == Intrinsic::launder_invariant_group
)
484 Result
= IC
.Builder
.CreateLaunderInvariantGroup(StrippedInvariantGroupsArg
);
485 else if (II
.getIntrinsicID() == Intrinsic::strip_invariant_group
)
486 Result
= IC
.Builder
.CreateStripInvariantGroup(StrippedInvariantGroupsArg
);
489 "simplifyInvariantGroupIntrinsic only handles launder and strip");
490 if (Result
->getType()->getPointerAddressSpace() !=
491 II
.getType()->getPointerAddressSpace())
492 Result
= IC
.Builder
.CreateAddrSpaceCast(Result
, II
.getType());
494 return cast
<Instruction
>(Result
);
497 static Instruction
*foldCttzCtlz(IntrinsicInst
&II
, InstCombinerImpl
&IC
) {
498 assert((II
.getIntrinsicID() == Intrinsic::cttz
||
499 II
.getIntrinsicID() == Intrinsic::ctlz
) &&
500 "Expected cttz or ctlz intrinsic");
501 bool IsTZ
= II
.getIntrinsicID() == Intrinsic::cttz
;
502 Value
*Op0
= II
.getArgOperand(0);
503 Value
*Op1
= II
.getArgOperand(1);
505 // ctlz(bitreverse(x)) -> cttz(x)
506 // cttz(bitreverse(x)) -> ctlz(x)
507 if (match(Op0
, m_BitReverse(m_Value(X
)))) {
508 Intrinsic::ID ID
= IsTZ
? Intrinsic::ctlz
: Intrinsic::cttz
;
509 Function
*F
= Intrinsic::getDeclaration(II
.getModule(), ID
, II
.getType());
510 return CallInst::Create(F
, {X
, II
.getArgOperand(1)});
513 if (II
.getType()->isIntOrIntVectorTy(1)) {
514 // ctlz/cttz i1 Op0 --> not Op0
515 if (match(Op1
, m_Zero()))
516 return BinaryOperator::CreateNot(Op0
);
517 // If zero is poison, then the input can be assumed to be "true", so the
518 // instruction simplifies to "false".
519 assert(match(Op1
, m_One()) && "Expected ctlz/cttz operand to be 0 or 1");
520 return IC
.replaceInstUsesWith(II
, ConstantInt::getNullValue(II
.getType()));
524 // cttz(-x) -> cttz(x)
525 if (match(Op0
, m_Neg(m_Value(X
))))
526 return IC
.replaceOperand(II
, 0, X
);
528 // cttz(-x & x) -> cttz(x)
529 if (match(Op0
, m_c_And(m_Neg(m_Value(X
)), m_Deferred(X
))))
530 return IC
.replaceOperand(II
, 0, X
);
532 // cttz(sext(x)) -> cttz(zext(x))
533 if (match(Op0
, m_OneUse(m_SExt(m_Value(X
))))) {
534 auto *Zext
= IC
.Builder
.CreateZExt(X
, II
.getType());
536 IC
.Builder
.CreateBinaryIntrinsic(Intrinsic::cttz
, Zext
, Op1
);
537 return IC
.replaceInstUsesWith(II
, CttzZext
);
540 // Zext doesn't change the number of trailing zeros, so narrow:
541 // cttz(zext(x)) -> zext(cttz(x)) if the 'ZeroIsPoison' parameter is 'true'.
542 if (match(Op0
, m_OneUse(m_ZExt(m_Value(X
)))) && match(Op1
, m_One())) {
543 auto *Cttz
= IC
.Builder
.CreateBinaryIntrinsic(Intrinsic::cttz
, X
,
544 IC
.Builder
.getTrue());
545 auto *ZextCttz
= IC
.Builder
.CreateZExt(Cttz
, II
.getType());
546 return IC
.replaceInstUsesWith(II
, ZextCttz
);
549 // cttz(abs(x)) -> cttz(x)
550 // cttz(nabs(x)) -> cttz(x)
552 SelectPatternFlavor SPF
= matchSelectPattern(Op0
, X
, Y
).Flavor
;
553 if (SPF
== SPF_ABS
|| SPF
== SPF_NABS
)
554 return IC
.replaceOperand(II
, 0, X
);
556 if (match(Op0
, m_Intrinsic
<Intrinsic::abs
>(m_Value(X
))))
557 return IC
.replaceOperand(II
, 0, X
);
560 KnownBits Known
= IC
.computeKnownBits(Op0
, 0, &II
);
562 // Create a mask for bits above (ctlz) or below (cttz) the first known one.
563 unsigned PossibleZeros
= IsTZ
? Known
.countMaxTrailingZeros()
564 : Known
.countMaxLeadingZeros();
565 unsigned DefiniteZeros
= IsTZ
? Known
.countMinTrailingZeros()
566 : Known
.countMinLeadingZeros();
568 // If all bits above (ctlz) or below (cttz) the first known one are known
569 // zero, this value is constant.
570 // FIXME: This should be in InstSimplify because we're replacing an
571 // instruction with a constant.
572 if (PossibleZeros
== DefiniteZeros
) {
573 auto *C
= ConstantInt::get(Op0
->getType(), DefiniteZeros
);
574 return IC
.replaceInstUsesWith(II
, C
);
577 // If the input to cttz/ctlz is known to be non-zero,
578 // then change the 'ZeroIsPoison' parameter to 'true'
579 // because we know the zero behavior can't affect the result.
580 if (!Known
.One
.isZero() ||
581 isKnownNonZero(Op0
, IC
.getDataLayout(), 0, &IC
.getAssumptionCache(), &II
,
582 &IC
.getDominatorTree())) {
583 if (!match(II
.getArgOperand(1), m_One()))
584 return IC
.replaceOperand(II
, 1, IC
.Builder
.getTrue());
587 // Add range metadata since known bits can't completely reflect what we know.
588 auto *IT
= cast
<IntegerType
>(Op0
->getType()->getScalarType());
589 if (IT
&& IT
->getBitWidth() != 1 && !II
.getMetadata(LLVMContext::MD_range
)) {
590 Metadata
*LowAndHigh
[] = {
591 ConstantAsMetadata::get(ConstantInt::get(IT
, DefiniteZeros
)),
592 ConstantAsMetadata::get(ConstantInt::get(IT
, PossibleZeros
+ 1))};
593 II
.setMetadata(LLVMContext::MD_range
,
594 MDNode::get(II
.getContext(), LowAndHigh
));
601 static Instruction
*foldCtpop(IntrinsicInst
&II
, InstCombinerImpl
&IC
) {
602 assert(II
.getIntrinsicID() == Intrinsic::ctpop
&&
603 "Expected ctpop intrinsic");
604 Type
*Ty
= II
.getType();
605 unsigned BitWidth
= Ty
->getScalarSizeInBits();
606 Value
*Op0
= II
.getArgOperand(0);
609 // ctpop(bitreverse(x)) -> ctpop(x)
610 // ctpop(bswap(x)) -> ctpop(x)
611 if (match(Op0
, m_BitReverse(m_Value(X
))) || match(Op0
, m_BSwap(m_Value(X
))))
612 return IC
.replaceOperand(II
, 0, X
);
614 // ctpop(rot(x)) -> ctpop(x)
615 if ((match(Op0
, m_FShl(m_Value(X
), m_Value(Y
), m_Value())) ||
616 match(Op0
, m_FShr(m_Value(X
), m_Value(Y
), m_Value()))) &&
618 return IC
.replaceOperand(II
, 0, X
);
620 // ctpop(x | -x) -> bitwidth - cttz(x, false)
621 if (Op0
->hasOneUse() &&
622 match(Op0
, m_c_Or(m_Value(X
), m_Neg(m_Deferred(X
))))) {
624 Intrinsic::getDeclaration(II
.getModule(), Intrinsic::cttz
, Ty
);
625 auto *Cttz
= IC
.Builder
.CreateCall(F
, {X
, IC
.Builder
.getFalse()});
626 auto *Bw
= ConstantInt::get(Ty
, APInt(BitWidth
, BitWidth
));
627 return IC
.replaceInstUsesWith(II
, IC
.Builder
.CreateSub(Bw
, Cttz
));
630 // ctpop(~x & (x - 1)) -> cttz(x, false)
632 m_c_And(m_Not(m_Value(X
)), m_Add(m_Deferred(X
), m_AllOnes())))) {
634 Intrinsic::getDeclaration(II
.getModule(), Intrinsic::cttz
, Ty
);
635 return CallInst::Create(F
, {X
, IC
.Builder
.getFalse()});
638 // Zext doesn't change the number of set bits, so narrow:
639 // ctpop (zext X) --> zext (ctpop X)
640 if (match(Op0
, m_OneUse(m_ZExt(m_Value(X
))))) {
641 Value
*NarrowPop
= IC
.Builder
.CreateUnaryIntrinsic(Intrinsic::ctpop
, X
);
642 return CastInst::Create(Instruction::ZExt
, NarrowPop
, Ty
);
645 KnownBits
Known(BitWidth
);
646 IC
.computeKnownBits(Op0
, Known
, 0, &II
);
648 // If all bits are zero except for exactly one fixed bit, then the result
649 // must be 0 or 1, and we can get that answer by shifting to LSB:
650 // ctpop (X & 32) --> (X & 32) >> 5
651 // TODO: Investigate removing this as its likely unnecessary given the below
652 // `isKnownToBeAPowerOfTwo` check.
653 if ((~Known
.Zero
).isPowerOf2())
654 return BinaryOperator::CreateLShr(
655 Op0
, ConstantInt::get(Ty
, (~Known
.Zero
).exactLogBase2()));
657 // More generally we can also handle non-constant power of 2 patterns such as
658 // shl/shr(Pow2, X), (X & -X), etc... by transforming:
659 // ctpop(Pow2OrZero) --> icmp ne X, 0
660 if (IC
.isKnownToBeAPowerOfTwo(Op0
, /* OrZero */ true))
661 return CastInst::Create(Instruction::ZExt
,
662 IC
.Builder
.CreateICmp(ICmpInst::ICMP_NE
, Op0
,
663 Constant::getNullValue(Ty
)),
666 // Add range metadata since known bits can't completely reflect what we know.
667 auto *IT
= cast
<IntegerType
>(Ty
->getScalarType());
668 unsigned MinCount
= Known
.countMinPopulation();
669 unsigned MaxCount
= Known
.countMaxPopulation();
670 if (IT
->getBitWidth() != 1 && !II
.getMetadata(LLVMContext::MD_range
)) {
671 Metadata
*LowAndHigh
[] = {
672 ConstantAsMetadata::get(ConstantInt::get(IT
, MinCount
)),
673 ConstantAsMetadata::get(ConstantInt::get(IT
, MaxCount
+ 1))};
674 II
.setMetadata(LLVMContext::MD_range
,
675 MDNode::get(II
.getContext(), LowAndHigh
));
682 /// Convert a table lookup to shufflevector if the mask is constant.
683 /// This could benefit tbl1 if the mask is { 7,6,5,4,3,2,1,0 }, in
684 /// which case we could lower the shufflevector with rev64 instructions
685 /// as it's actually a byte reverse.
686 static Value
*simplifyNeonTbl1(const IntrinsicInst
&II
,
687 InstCombiner::BuilderTy
&Builder
) {
688 // Bail out if the mask is not a constant.
689 auto *C
= dyn_cast
<Constant
>(II
.getArgOperand(1));
693 auto *VecTy
= cast
<FixedVectorType
>(II
.getType());
694 unsigned NumElts
= VecTy
->getNumElements();
696 // Only perform this transformation for <8 x i8> vector types.
697 if (!VecTy
->getElementType()->isIntegerTy(8) || NumElts
!= 8)
702 for (unsigned I
= 0; I
< NumElts
; ++I
) {
703 Constant
*COp
= C
->getAggregateElement(I
);
705 if (!COp
|| !isa
<ConstantInt
>(COp
))
708 Indexes
[I
] = cast
<ConstantInt
>(COp
)->getLimitedValue();
710 // Make sure the mask indices are in range.
711 if ((unsigned)Indexes
[I
] >= NumElts
)
715 auto *V1
= II
.getArgOperand(0);
716 auto *V2
= Constant::getNullValue(V1
->getType());
717 return Builder
.CreateShuffleVector(V1
, V2
, ArrayRef(Indexes
));
720 // Returns true iff the 2 intrinsics have the same operands, limiting the
721 // comparison to the first NumOperands.
722 static bool haveSameOperands(const IntrinsicInst
&I
, const IntrinsicInst
&E
,
723 unsigned NumOperands
) {
724 assert(I
.arg_size() >= NumOperands
&& "Not enough operands");
725 assert(E
.arg_size() >= NumOperands
&& "Not enough operands");
726 for (unsigned i
= 0; i
< NumOperands
; i
++)
727 if (I
.getArgOperand(i
) != E
.getArgOperand(i
))
732 // Remove trivially empty start/end intrinsic ranges, i.e. a start
733 // immediately followed by an end (ignoring debuginfo or other
734 // start/end intrinsics in between). As this handles only the most trivial
735 // cases, tracking the nesting level is not needed:
737 // call @llvm.foo.start(i1 0)
738 // call @llvm.foo.start(i1 0) ; This one won't be skipped: it will be removed
739 // call @llvm.foo.end(i1 0)
740 // call @llvm.foo.end(i1 0) ; &I
742 removeTriviallyEmptyRange(IntrinsicInst
&EndI
, InstCombinerImpl
&IC
,
743 std::function
<bool(const IntrinsicInst
&)> IsStart
) {
744 // We start from the end intrinsic and scan backwards, so that InstCombine
745 // has already processed (and potentially removed) all the instructions
746 // before the end intrinsic.
747 BasicBlock::reverse_iterator
BI(EndI
), BE(EndI
.getParent()->rend());
748 for (; BI
!= BE
; ++BI
) {
749 if (auto *I
= dyn_cast
<IntrinsicInst
>(&*BI
)) {
750 if (I
->isDebugOrPseudoInst() ||
751 I
->getIntrinsicID() == EndI
.getIntrinsicID())
754 if (haveSameOperands(EndI
, *I
, EndI
.arg_size())) {
755 IC
.eraseInstFromFunction(*I
);
756 IC
.eraseInstFromFunction(EndI
);
759 // Skip start intrinsics that don't pair with this end intrinsic.
769 Instruction
*InstCombinerImpl::visitVAEndInst(VAEndInst
&I
) {
770 removeTriviallyEmptyRange(I
, *this, [](const IntrinsicInst
&I
) {
771 return I
.getIntrinsicID() == Intrinsic::vastart
||
772 I
.getIntrinsicID() == Intrinsic::vacopy
;
777 static CallInst
*canonicalizeConstantArg0ToArg1(CallInst
&Call
) {
778 assert(Call
.arg_size() > 1 && "Need at least 2 args to swap");
779 Value
*Arg0
= Call
.getArgOperand(0), *Arg1
= Call
.getArgOperand(1);
780 if (isa
<Constant
>(Arg0
) && !isa
<Constant
>(Arg1
)) {
781 Call
.setArgOperand(0, Arg1
);
782 Call
.setArgOperand(1, Arg0
);
788 /// Creates a result tuple for an overflow intrinsic \p II with a given
789 /// \p Result and a constant \p Overflow value.
790 static Instruction
*createOverflowTuple(IntrinsicInst
*II
, Value
*Result
,
791 Constant
*Overflow
) {
792 Constant
*V
[] = {PoisonValue::get(Result
->getType()), Overflow
};
793 StructType
*ST
= cast
<StructType
>(II
->getType());
794 Constant
*Struct
= ConstantStruct::get(ST
, V
);
795 return InsertValueInst::Create(Struct
, Result
, 0);
799 InstCombinerImpl::foldIntrinsicWithOverflowCommon(IntrinsicInst
*II
) {
800 WithOverflowInst
*WO
= cast
<WithOverflowInst
>(II
);
801 Value
*OperationResult
= nullptr;
802 Constant
*OverflowResult
= nullptr;
803 if (OptimizeOverflowCheck(WO
->getBinaryOp(), WO
->isSigned(), WO
->getLHS(),
804 WO
->getRHS(), *WO
, OperationResult
, OverflowResult
))
805 return createOverflowTuple(WO
, OperationResult
, OverflowResult
);
809 static bool inputDenormalIsIEEE(const Function
&F
, const Type
*Ty
) {
810 Ty
= Ty
->getScalarType();
811 return F
.getDenormalMode(Ty
->getFltSemantics()).Input
== DenormalMode::IEEE
;
814 static bool inputDenormalIsDAZ(const Function
&F
, const Type
*Ty
) {
815 Ty
= Ty
->getScalarType();
816 return F
.getDenormalMode(Ty
->getFltSemantics()).inputsAreZero();
819 /// \returns the compare predicate type if the test performed by
820 /// llvm.is.fpclass(x, \p Mask) is equivalent to fcmp o__ x, 0.0 with the
821 /// floating-point environment assumed for \p F for type \p Ty
822 static FCmpInst::Predicate
fpclassTestIsFCmp0(FPClassTest Mask
,
823 const Function
&F
, Type
*Ty
) {
824 switch (static_cast<unsigned>(Mask
)) {
826 if (inputDenormalIsIEEE(F
, Ty
))
827 return FCmpInst::FCMP_OEQ
;
829 case fcZero
| fcSubnormal
:
830 if (inputDenormalIsDAZ(F
, Ty
))
831 return FCmpInst::FCMP_OEQ
;
833 case fcPositive
| fcNegZero
:
834 if (inputDenormalIsIEEE(F
, Ty
))
835 return FCmpInst::FCMP_OGE
;
837 case fcPositive
| fcNegZero
| fcNegSubnormal
:
838 if (inputDenormalIsDAZ(F
, Ty
))
839 return FCmpInst::FCMP_OGE
;
841 case fcPosSubnormal
| fcPosNormal
| fcPosInf
:
842 if (inputDenormalIsIEEE(F
, Ty
))
843 return FCmpInst::FCMP_OGT
;
845 case fcNegative
| fcPosZero
:
846 if (inputDenormalIsIEEE(F
, Ty
))
847 return FCmpInst::FCMP_OLE
;
849 case fcNegative
| fcPosZero
| fcPosSubnormal
:
850 if (inputDenormalIsDAZ(F
, Ty
))
851 return FCmpInst::FCMP_OLE
;
853 case fcNegSubnormal
| fcNegNormal
| fcNegInf
:
854 if (inputDenormalIsIEEE(F
, Ty
))
855 return FCmpInst::FCMP_OLT
;
857 case fcPosNormal
| fcPosInf
:
858 if (inputDenormalIsDAZ(F
, Ty
))
859 return FCmpInst::FCMP_OGT
;
861 case fcNegNormal
| fcNegInf
:
862 if (inputDenormalIsDAZ(F
, Ty
))
863 return FCmpInst::FCMP_OLT
;
865 case ~fcZero
& ~fcNan
:
866 if (inputDenormalIsIEEE(F
, Ty
))
867 return FCmpInst::FCMP_ONE
;
869 case ~(fcZero
| fcSubnormal
) & ~fcNan
:
870 if (inputDenormalIsDAZ(F
, Ty
))
871 return FCmpInst::FCMP_ONE
;
877 return FCmpInst::BAD_FCMP_PREDICATE
;
880 Instruction
*InstCombinerImpl::foldIntrinsicIsFPClass(IntrinsicInst
&II
) {
881 Value
*Src0
= II
.getArgOperand(0);
882 Value
*Src1
= II
.getArgOperand(1);
883 const ConstantInt
*CMask
= cast
<ConstantInt
>(Src1
);
884 FPClassTest Mask
= static_cast<FPClassTest
>(CMask
->getZExtValue());
885 const bool IsUnordered
= (Mask
& fcNan
) == fcNan
;
886 const bool IsOrdered
= (Mask
& fcNan
) == fcNone
;
887 const FPClassTest OrderedMask
= Mask
& ~fcNan
;
888 const FPClassTest OrderedInvertedMask
= ~OrderedMask
& ~fcNan
;
890 const bool IsStrict
= II
.isStrictFP();
893 if (match(Src0
, m_FNeg(m_Value(FNegSrc
)))) {
894 // is.fpclass (fneg x), mask -> is.fpclass x, (fneg mask)
896 II
.setArgOperand(1, ConstantInt::get(Src1
->getType(), fneg(Mask
)));
897 return replaceOperand(II
, 0, FNegSrc
);
901 if (match(Src0
, m_FAbs(m_Value(FAbsSrc
)))) {
902 II
.setArgOperand(1, ConstantInt::get(Src1
->getType(), inverse_fabs(Mask
)));
903 return replaceOperand(II
, 0, FAbsSrc
);
906 if ((OrderedMask
== fcInf
|| OrderedInvertedMask
== fcInf
) &&
907 (IsOrdered
|| IsUnordered
) && !IsStrict
) {
908 // is.fpclass(x, fcInf) -> fcmp oeq fabs(x), +inf
909 // is.fpclass(x, ~fcInf) -> fcmp one fabs(x), +inf
910 // is.fpclass(x, fcInf|fcNan) -> fcmp ueq fabs(x), +inf
911 // is.fpclass(x, ~(fcInf|fcNan)) -> fcmp une fabs(x), +inf
912 Constant
*Inf
= ConstantFP::getInfinity(Src0
->getType());
913 FCmpInst::Predicate Pred
=
914 IsUnordered
? FCmpInst::FCMP_UEQ
: FCmpInst::FCMP_OEQ
;
915 if (OrderedInvertedMask
== fcInf
)
916 Pred
= IsUnordered
? FCmpInst::FCMP_UNE
: FCmpInst::FCMP_ONE
;
918 Value
*Fabs
= Builder
.CreateUnaryIntrinsic(Intrinsic::fabs
, Src0
);
919 Value
*CmpInf
= Builder
.CreateFCmp(Pred
, Fabs
, Inf
);
920 CmpInf
->takeName(&II
);
921 return replaceInstUsesWith(II
, CmpInf
);
924 if ((OrderedMask
== fcPosInf
|| OrderedMask
== fcNegInf
) &&
925 (IsOrdered
|| IsUnordered
) && !IsStrict
) {
926 // is.fpclass(x, fcPosInf) -> fcmp oeq x, +inf
927 // is.fpclass(x, fcNegInf) -> fcmp oeq x, -inf
928 // is.fpclass(x, fcPosInf|fcNan) -> fcmp ueq x, +inf
929 // is.fpclass(x, fcNegInf|fcNan) -> fcmp ueq x, -inf
931 ConstantFP::getInfinity(Src0
->getType(), OrderedMask
== fcNegInf
);
932 Value
*EqInf
= IsUnordered
? Builder
.CreateFCmpUEQ(Src0
, Inf
)
933 : Builder
.CreateFCmpOEQ(Src0
, Inf
);
935 EqInf
->takeName(&II
);
936 return replaceInstUsesWith(II
, EqInf
);
939 if ((OrderedInvertedMask
== fcPosInf
|| OrderedInvertedMask
== fcNegInf
) &&
940 (IsOrdered
|| IsUnordered
) && !IsStrict
) {
941 // is.fpclass(x, ~fcPosInf) -> fcmp one x, +inf
942 // is.fpclass(x, ~fcNegInf) -> fcmp one x, -inf
943 // is.fpclass(x, ~fcPosInf|fcNan) -> fcmp une x, +inf
944 // is.fpclass(x, ~fcNegInf|fcNan) -> fcmp une x, -inf
945 Constant
*Inf
= ConstantFP::getInfinity(Src0
->getType(),
946 OrderedInvertedMask
== fcNegInf
);
947 Value
*NeInf
= IsUnordered
? Builder
.CreateFCmpUNE(Src0
, Inf
)
948 : Builder
.CreateFCmpONE(Src0
, Inf
);
949 NeInf
->takeName(&II
);
950 return replaceInstUsesWith(II
, NeInf
);
953 if (Mask
== fcNan
&& !IsStrict
) {
954 // Equivalent of isnan. Replace with standard fcmp if we don't care about FP
957 Builder
.CreateFCmpUNO(Src0
, ConstantFP::getZero(Src0
->getType()));
958 IsNan
->takeName(&II
);
959 return replaceInstUsesWith(II
, IsNan
);
962 if (Mask
== (~fcNan
& fcAllFlags
) && !IsStrict
) {
963 // Equivalent of !isnan. Replace with standard fcmp.
965 Builder
.CreateFCmpORD(Src0
, ConstantFP::getZero(Src0
->getType()));
967 return replaceInstUsesWith(II
, FCmp
);
970 FCmpInst::Predicate PredType
= FCmpInst::BAD_FCMP_PREDICATE
;
972 // Try to replace with an fcmp with 0
974 // is.fpclass(x, fcZero) -> fcmp oeq x, 0.0
975 // is.fpclass(x, fcZero | fcNan) -> fcmp ueq x, 0.0
976 // is.fpclass(x, ~fcZero & ~fcNan) -> fcmp one x, 0.0
977 // is.fpclass(x, ~fcZero) -> fcmp une x, 0.0
979 // is.fpclass(x, fcPosSubnormal | fcPosNormal | fcPosInf) -> fcmp ogt x, 0.0
980 // is.fpclass(x, fcPositive | fcNegZero) -> fcmp oge x, 0.0
982 // is.fpclass(x, fcNegSubnormal | fcNegNormal | fcNegInf) -> fcmp olt x, 0.0
983 // is.fpclass(x, fcNegative | fcPosZero) -> fcmp ole x, 0.0
985 if (!IsStrict
&& (IsOrdered
|| IsUnordered
) &&
986 (PredType
= fpclassTestIsFCmp0(OrderedMask
, *II
.getFunction(),
988 FCmpInst::BAD_FCMP_PREDICATE
) {
989 Constant
*Zero
= ConstantFP::getZero(Src0
->getType());
990 // Equivalent of == 0.
991 Value
*FCmp
= Builder
.CreateFCmp(
992 IsUnordered
? FCmpInst::getUnorderedPredicate(PredType
) : PredType
,
996 return replaceInstUsesWith(II
, FCmp
);
999 KnownFPClass Known
= computeKnownFPClass(Src0
, Mask
, &II
);
1001 // Clear test bits we know must be false from the source value.
1002 // fp_class (nnan x), qnan|snan|other -> fp_class (nnan x), other
1003 // fp_class (ninf x), ninf|pinf|other -> fp_class (ninf x), other
1004 if ((Mask
& Known
.KnownFPClasses
) != Mask
) {
1006 1, ConstantInt::get(Src1
->getType(), Mask
& Known
.KnownFPClasses
));
1010 // If none of the tests which can return false are possible, fold to true.
1011 // fp_class (nnan x), ~(qnan|snan) -> true
1012 // fp_class (ninf x), ~(ninf|pinf) -> true
1013 if (Mask
== Known
.KnownFPClasses
)
1014 return replaceInstUsesWith(II
, ConstantInt::get(II
.getType(), true));
1019 static std::optional
<bool> getKnownSign(Value
*Op
, Instruction
*CxtI
,
1020 const DataLayout
&DL
, AssumptionCache
*AC
,
1021 DominatorTree
*DT
) {
1022 KnownBits Known
= computeKnownBits(Op
, DL
, 0, AC
, CxtI
, DT
);
1023 if (Known
.isNonNegative())
1025 if (Known
.isNegative())
1029 if (match(Op
, m_NSWSub(m_Value(X
), m_Value(Y
))))
1030 return isImpliedByDomCondition(ICmpInst::ICMP_SLT
, X
, Y
, CxtI
, DL
);
1032 return isImpliedByDomCondition(
1033 ICmpInst::ICMP_SLT
, Op
, Constant::getNullValue(Op
->getType()), CxtI
, DL
);
1036 static std::optional
<bool> getKnownSignOrZero(Value
*Op
, Instruction
*CxtI
,
1037 const DataLayout
&DL
,
1038 AssumptionCache
*AC
,
1039 DominatorTree
*DT
) {
1040 if (std::optional
<bool> Sign
= getKnownSign(Op
, CxtI
, DL
, AC
, DT
))
1044 if (match(Op
, m_NSWSub(m_Value(X
), m_Value(Y
))))
1045 return isImpliedByDomCondition(ICmpInst::ICMP_SLE
, X
, Y
, CxtI
, DL
);
1047 return std::nullopt
;
1050 /// Return true if two values \p Op0 and \p Op1 are known to have the same sign.
1051 static bool signBitMustBeTheSame(Value
*Op0
, Value
*Op1
, Instruction
*CxtI
,
1052 const DataLayout
&DL
, AssumptionCache
*AC
,
1053 DominatorTree
*DT
) {
1054 std::optional
<bool> Known1
= getKnownSign(Op1
, CxtI
, DL
, AC
, DT
);
1057 std::optional
<bool> Known0
= getKnownSign(Op0
, CxtI
, DL
, AC
, DT
);
1060 return *Known0
== *Known1
;
1063 /// Try to canonicalize min/max(X + C0, C1) as min/max(X, C1 - C0) + C0. This
1064 /// can trigger other combines.
1065 static Instruction
*moveAddAfterMinMax(IntrinsicInst
*II
,
1066 InstCombiner::BuilderTy
&Builder
) {
1067 Intrinsic::ID MinMaxID
= II
->getIntrinsicID();
1068 assert((MinMaxID
== Intrinsic::smax
|| MinMaxID
== Intrinsic::smin
||
1069 MinMaxID
== Intrinsic::umax
|| MinMaxID
== Intrinsic::umin
) &&
1070 "Expected a min or max intrinsic");
1072 // TODO: Match vectors with undef elements, but undef may not propagate.
1073 Value
*Op0
= II
->getArgOperand(0), *Op1
= II
->getArgOperand(1);
1075 const APInt
*C0
, *C1
;
1076 if (!match(Op0
, m_OneUse(m_Add(m_Value(X
), m_APInt(C0
)))) ||
1077 !match(Op1
, m_APInt(C1
)))
1080 // Check for necessary no-wrap and overflow constraints.
1081 bool IsSigned
= MinMaxID
== Intrinsic::smax
|| MinMaxID
== Intrinsic::smin
;
1082 auto *Add
= cast
<BinaryOperator
>(Op0
);
1083 if ((IsSigned
&& !Add
->hasNoSignedWrap()) ||
1084 (!IsSigned
&& !Add
->hasNoUnsignedWrap()))
1087 // If the constant difference overflows, then instsimplify should reduce the
1088 // min/max to the add or C1.
1091 IsSigned
? C1
->ssub_ov(*C0
, Overflow
) : C1
->usub_ov(*C0
, Overflow
);
1092 assert(!Overflow
&& "Expected simplify of min/max");
1094 // min/max (add X, C0), C1 --> add (min/max X, C1 - C0), C0
1095 // Note: the "mismatched" no-overflow setting does not propagate.
1096 Constant
*NewMinMaxC
= ConstantInt::get(II
->getType(), CDiff
);
1097 Value
*NewMinMax
= Builder
.CreateBinaryIntrinsic(MinMaxID
, X
, NewMinMaxC
);
1098 return IsSigned
? BinaryOperator::CreateNSWAdd(NewMinMax
, Add
->getOperand(1))
1099 : BinaryOperator::CreateNUWAdd(NewMinMax
, Add
->getOperand(1));
1101 /// Match a sadd_sat or ssub_sat which is using min/max to clamp the value.
1102 Instruction
*InstCombinerImpl::matchSAddSubSat(IntrinsicInst
&MinMax1
) {
1103 Type
*Ty
= MinMax1
.getType();
1105 // We are looking for a tree of:
1106 // max(INT_MIN, min(INT_MAX, add(sext(A), sext(B))))
1107 // Where the min and max could be reversed
1108 Instruction
*MinMax2
;
1109 BinaryOperator
*AddSub
;
1110 const APInt
*MinValue
, *MaxValue
;
1111 if (match(&MinMax1
, m_SMin(m_Instruction(MinMax2
), m_APInt(MaxValue
)))) {
1112 if (!match(MinMax2
, m_SMax(m_BinOp(AddSub
), m_APInt(MinValue
))))
1114 } else if (match(&MinMax1
,
1115 m_SMax(m_Instruction(MinMax2
), m_APInt(MinValue
)))) {
1116 if (!match(MinMax2
, m_SMin(m_BinOp(AddSub
), m_APInt(MaxValue
))))
1121 // Check that the constants clamp a saturate, and that the new type would be
1122 // sensible to convert to.
1123 if (!(*MaxValue
+ 1).isPowerOf2() || -*MinValue
!= *MaxValue
+ 1)
1125 // In what bitwidth can this be treated as saturating arithmetics?
1126 unsigned NewBitWidth
= (*MaxValue
+ 1).logBase2() + 1;
1127 // FIXME: This isn't quite right for vectors, but using the scalar type is a
1128 // good first approximation for what should be done there.
1129 if (!shouldChangeType(Ty
->getScalarType()->getIntegerBitWidth(), NewBitWidth
))
1132 // Also make sure that the inner min/max and the add/sub have one use.
1133 if (!MinMax2
->hasOneUse() || !AddSub
->hasOneUse())
1136 // Create the new type (which can be a vector type)
1137 Type
*NewTy
= Ty
->getWithNewBitWidth(NewBitWidth
);
1139 Intrinsic::ID IntrinsicID
;
1140 if (AddSub
->getOpcode() == Instruction::Add
)
1141 IntrinsicID
= Intrinsic::sadd_sat
;
1142 else if (AddSub
->getOpcode() == Instruction::Sub
)
1143 IntrinsicID
= Intrinsic::ssub_sat
;
1147 // The two operands of the add/sub must be nsw-truncatable to the NewTy. This
1148 // is usually achieved via a sext from a smaller type.
1149 if (ComputeMaxSignificantBits(AddSub
->getOperand(0), 0, AddSub
) >
1151 ComputeMaxSignificantBits(AddSub
->getOperand(1), 0, AddSub
) > NewBitWidth
)
1154 // Finally create and return the sat intrinsic, truncated to the new type
1155 Function
*F
= Intrinsic::getDeclaration(MinMax1
.getModule(), IntrinsicID
, NewTy
);
1156 Value
*AT
= Builder
.CreateTrunc(AddSub
->getOperand(0), NewTy
);
1157 Value
*BT
= Builder
.CreateTrunc(AddSub
->getOperand(1), NewTy
);
1158 Value
*Sat
= Builder
.CreateCall(F
, {AT
, BT
});
1159 return CastInst::Create(Instruction::SExt
, Sat
, Ty
);
1163 /// If we have a clamp pattern like max (min X, 42), 41 -- where the output
1164 /// can only be one of two possible constant values -- turn that into a select
1166 static Instruction
*foldClampRangeOfTwo(IntrinsicInst
*II
,
1167 InstCombiner::BuilderTy
&Builder
) {
1168 Value
*I0
= II
->getArgOperand(0), *I1
= II
->getArgOperand(1);
1170 const APInt
*C0
, *C1
;
1171 if (!match(I1
, m_APInt(C1
)) || !I0
->hasOneUse())
1174 CmpInst::Predicate Pred
= CmpInst::BAD_ICMP_PREDICATE
;
1175 switch (II
->getIntrinsicID()) {
1176 case Intrinsic::smax
:
1177 if (match(I0
, m_SMin(m_Value(X
), m_APInt(C0
))) && *C0
== *C1
+ 1)
1178 Pred
= ICmpInst::ICMP_SGT
;
1180 case Intrinsic::smin
:
1181 if (match(I0
, m_SMax(m_Value(X
), m_APInt(C0
))) && *C1
== *C0
+ 1)
1182 Pred
= ICmpInst::ICMP_SLT
;
1184 case Intrinsic::umax
:
1185 if (match(I0
, m_UMin(m_Value(X
), m_APInt(C0
))) && *C0
== *C1
+ 1)
1186 Pred
= ICmpInst::ICMP_UGT
;
1188 case Intrinsic::umin
:
1189 if (match(I0
, m_UMax(m_Value(X
), m_APInt(C0
))) && *C1
== *C0
+ 1)
1190 Pred
= ICmpInst::ICMP_ULT
;
1193 llvm_unreachable("Expected min/max intrinsic");
1195 if (Pred
== CmpInst::BAD_ICMP_PREDICATE
)
1198 // max (min X, 42), 41 --> X > 41 ? 42 : 41
1199 // min (max X, 42), 43 --> X < 43 ? 42 : 43
1200 Value
*Cmp
= Builder
.CreateICmp(Pred
, X
, I1
);
1201 return SelectInst::Create(Cmp
, ConstantInt::get(II
->getType(), *C0
), I1
);
1204 /// If this min/max has a constant operand and an operand that is a matching
1205 /// min/max with a constant operand, constant-fold the 2 constant operands.
1206 static Value
*reassociateMinMaxWithConstants(IntrinsicInst
*II
,
1207 IRBuilderBase
&Builder
) {
1208 Intrinsic::ID MinMaxID
= II
->getIntrinsicID();
1209 auto *LHS
= dyn_cast
<IntrinsicInst
>(II
->getArgOperand(0));
1210 if (!LHS
|| LHS
->getIntrinsicID() != MinMaxID
)
1214 if (!match(LHS
->getArgOperand(1), m_ImmConstant(C0
)) ||
1215 !match(II
->getArgOperand(1), m_ImmConstant(C1
)))
1218 // max (max X, C0), C1 --> max X, (max C0, C1) --> max X, NewC
1219 ICmpInst::Predicate Pred
= MinMaxIntrinsic::getPredicate(MinMaxID
);
1220 Value
*CondC
= Builder
.CreateICmp(Pred
, C0
, C1
);
1221 Value
*NewC
= Builder
.CreateSelect(CondC
, C0
, C1
);
1222 return Builder
.CreateIntrinsic(MinMaxID
, II
->getType(),
1223 {LHS
->getArgOperand(0), NewC
});
1226 /// If this min/max has a matching min/max operand with a constant, try to push
1227 /// the constant operand into this instruction. This can enable more folds.
1228 static Instruction
*
1229 reassociateMinMaxWithConstantInOperand(IntrinsicInst
*II
,
1230 InstCombiner::BuilderTy
&Builder
) {
1231 // Match and capture a min/max operand candidate.
1235 if (!match(II
, m_c_MaxOrMin(m_OneUse(m_CombineAnd(
1236 m_Instruction(Inner
),
1237 m_MaxOrMin(m_Value(X
), m_ImmConstant(C
)))),
1241 // The inner op must match. Check for constants to avoid infinite loops.
1242 Intrinsic::ID MinMaxID
= II
->getIntrinsicID();
1243 auto *InnerMM
= dyn_cast
<IntrinsicInst
>(Inner
);
1244 if (!InnerMM
|| InnerMM
->getIntrinsicID() != MinMaxID
||
1245 match(X
, m_ImmConstant()) || match(Y
, m_ImmConstant()))
1248 // max (max X, C), Y --> max (max X, Y), C
1250 Intrinsic::getDeclaration(II
->getModule(), MinMaxID
, II
->getType());
1251 Value
*NewInner
= Builder
.CreateBinaryIntrinsic(MinMaxID
, X
, Y
);
1252 NewInner
->takeName(Inner
);
1253 return CallInst::Create(MinMax
, {NewInner
, C
});
1256 /// Reduce a sequence of min/max intrinsics with a common operand.
1257 static Instruction
*factorizeMinMaxTree(IntrinsicInst
*II
) {
1258 // Match 3 of the same min/max ops. Example: umin(umin(), umin()).
1259 auto *LHS
= dyn_cast
<IntrinsicInst
>(II
->getArgOperand(0));
1260 auto *RHS
= dyn_cast
<IntrinsicInst
>(II
->getArgOperand(1));
1261 Intrinsic::ID MinMaxID
= II
->getIntrinsicID();
1262 if (!LHS
|| !RHS
|| LHS
->getIntrinsicID() != MinMaxID
||
1263 RHS
->getIntrinsicID() != MinMaxID
||
1264 (!LHS
->hasOneUse() && !RHS
->hasOneUse()))
1267 Value
*A
= LHS
->getArgOperand(0);
1268 Value
*B
= LHS
->getArgOperand(1);
1269 Value
*C
= RHS
->getArgOperand(0);
1270 Value
*D
= RHS
->getArgOperand(1);
1272 // Look for a common operand.
1273 Value
*MinMaxOp
= nullptr;
1274 Value
*ThirdOp
= nullptr;
1275 if (LHS
->hasOneUse()) {
1276 // If the LHS is only used in this chain and the RHS is used outside of it,
1277 // reuse the RHS min/max because that will eliminate the LHS.
1278 if (D
== A
|| C
== A
) {
1279 // min(min(a, b), min(c, a)) --> min(min(c, a), b)
1280 // min(min(a, b), min(a, d)) --> min(min(a, d), b)
1283 } else if (D
== B
|| C
== B
) {
1284 // min(min(a, b), min(c, b)) --> min(min(c, b), a)
1285 // min(min(a, b), min(b, d)) --> min(min(b, d), a)
1290 assert(RHS
->hasOneUse() && "Expected one-use operand");
1291 // Reuse the LHS. This will eliminate the RHS.
1292 if (D
== A
|| D
== B
) {
1293 // min(min(a, b), min(c, a)) --> min(min(a, b), c)
1294 // min(min(a, b), min(c, b)) --> min(min(a, b), c)
1297 } else if (C
== A
|| C
== B
) {
1298 // min(min(a, b), min(b, d)) --> min(min(a, b), d)
1299 // min(min(a, b), min(c, b)) --> min(min(a, b), d)
1305 if (!MinMaxOp
|| !ThirdOp
)
1308 Module
*Mod
= II
->getModule();
1309 Function
*MinMax
= Intrinsic::getDeclaration(Mod
, MinMaxID
, II
->getType());
1310 return CallInst::Create(MinMax
, { MinMaxOp
, ThirdOp
});
1313 /// If all arguments of the intrinsic are unary shuffles with the same mask,
1314 /// try to shuffle after the intrinsic.
1315 static Instruction
*
1316 foldShuffledIntrinsicOperands(IntrinsicInst
*II
,
1317 InstCombiner::BuilderTy
&Builder
) {
1318 // TODO: This should be extended to handle other intrinsics like fshl, ctpop,
1319 // etc. Use llvm::isTriviallyVectorizable() and related to determine
1320 // which intrinsics are safe to shuffle?
1321 switch (II
->getIntrinsicID()) {
1322 case Intrinsic::smax
:
1323 case Intrinsic::smin
:
1324 case Intrinsic::umax
:
1325 case Intrinsic::umin
:
1326 case Intrinsic::fma
:
1327 case Intrinsic::fshl
:
1328 case Intrinsic::fshr
:
1336 if (!match(II
->getArgOperand(0),
1337 m_Shuffle(m_Value(X
), m_Undef(), m_Mask(Mask
))))
1340 // At least 1 operand must have 1 use because we are creating 2 instructions.
1341 if (none_of(II
->args(), [](Value
*V
) { return V
->hasOneUse(); }))
1344 // See if all arguments are shuffled with the same mask.
1345 SmallVector
<Value
*, 4> NewArgs(II
->arg_size());
1347 Type
*SrcTy
= X
->getType();
1348 for (unsigned i
= 1, e
= II
->arg_size(); i
!= e
; ++i
) {
1349 if (!match(II
->getArgOperand(i
),
1350 m_Shuffle(m_Value(X
), m_Undef(), m_SpecificMask(Mask
))) ||
1351 X
->getType() != SrcTy
)
1356 // intrinsic (shuf X, M), (shuf Y, M), ... --> shuf (intrinsic X, Y, ...), M
1357 Instruction
*FPI
= isa
<FPMathOperator
>(II
) ? II
: nullptr;
1358 Value
*NewIntrinsic
=
1359 Builder
.CreateIntrinsic(II
->getIntrinsicID(), SrcTy
, NewArgs
, FPI
);
1360 return new ShuffleVectorInst(NewIntrinsic
, Mask
);
1363 /// Fold the following cases and accepts bswap and bitreverse intrinsics:
1364 /// bswap(logic_op(bswap(x), y)) --> logic_op(x, bswap(y))
1365 /// bswap(logic_op(bswap(x), bswap(y))) --> logic_op(x, y) (ignores multiuse)
1366 template <Intrinsic::ID IntrID
>
1367 static Instruction
*foldBitOrderCrossLogicOp(Value
*V
,
1368 InstCombiner::BuilderTy
&Builder
) {
1369 static_assert(IntrID
== Intrinsic::bswap
|| IntrID
== Intrinsic::bitreverse
,
1370 "This helper only supports BSWAP and BITREVERSE intrinsics");
1373 // Find bitwise logic op. Check that it is a BinaryOperator explicitly so we
1374 // don't match ConstantExpr that aren't meaningful for this transform.
1375 if (match(V
, m_OneUse(m_BitwiseLogic(m_Value(X
), m_Value(Y
)))) &&
1376 isa
<BinaryOperator
>(V
)) {
1377 Value
*OldReorderX
, *OldReorderY
;
1378 BinaryOperator::BinaryOps Op
= cast
<BinaryOperator
>(V
)->getOpcode();
1380 // If both X and Y are bswap/bitreverse, the transform reduces the number
1381 // of instructions even if there's multiuse.
1382 // If only one operand is bswap/bitreverse, we need to ensure the operand
1383 // have only one use.
1384 if (match(X
, m_Intrinsic
<IntrID
>(m_Value(OldReorderX
))) &&
1385 match(Y
, m_Intrinsic
<IntrID
>(m_Value(OldReorderY
)))) {
1386 return BinaryOperator::Create(Op
, OldReorderX
, OldReorderY
);
1389 if (match(X
, m_OneUse(m_Intrinsic
<IntrID
>(m_Value(OldReorderX
))))) {
1390 Value
*NewReorder
= Builder
.CreateUnaryIntrinsic(IntrID
, Y
);
1391 return BinaryOperator::Create(Op
, OldReorderX
, NewReorder
);
1394 if (match(Y
, m_OneUse(m_Intrinsic
<IntrID
>(m_Value(OldReorderY
))))) {
1395 Value
*NewReorder
= Builder
.CreateUnaryIntrinsic(IntrID
, X
);
1396 return BinaryOperator::Create(Op
, NewReorder
, OldReorderY
);
1402 /// CallInst simplification. This mostly only handles folding of intrinsic
1403 /// instructions. For normal calls, it allows visitCallBase to do the heavy
1405 Instruction
*InstCombinerImpl::visitCallInst(CallInst
&CI
) {
1406 // Don't try to simplify calls without uses. It will not do anything useful,
1407 // but will result in the following folds being skipped.
1408 if (!CI
.use_empty()) {
1409 SmallVector
<Value
*, 4> Args
;
1410 Args
.reserve(CI
.arg_size());
1411 for (Value
*Op
: CI
.args())
1413 if (Value
*V
= simplifyCall(&CI
, CI
.getCalledOperand(), Args
,
1414 SQ
.getWithInstruction(&CI
)))
1415 return replaceInstUsesWith(CI
, V
);
1418 if (Value
*FreedOp
= getFreedOperand(&CI
, &TLI
))
1419 return visitFree(CI
, FreedOp
);
1421 // If the caller function (i.e. us, the function that contains this CallInst)
1422 // is nounwind, mark the call as nounwind, even if the callee isn't.
1423 if (CI
.getFunction()->doesNotThrow() && !CI
.doesNotThrow()) {
1424 CI
.setDoesNotThrow();
1428 IntrinsicInst
*II
= dyn_cast
<IntrinsicInst
>(&CI
);
1429 if (!II
) return visitCallBase(CI
);
1431 // For atomic unordered mem intrinsics if len is not a positive or
1432 // not a multiple of element size then behavior is undefined.
1433 if (auto *AMI
= dyn_cast
<AtomicMemIntrinsic
>(II
))
1434 if (ConstantInt
*NumBytes
= dyn_cast
<ConstantInt
>(AMI
->getLength()))
1435 if (NumBytes
->isNegative() ||
1436 (NumBytes
->getZExtValue() % AMI
->getElementSizeInBytes() != 0)) {
1437 CreateNonTerminatorUnreachable(AMI
);
1438 assert(AMI
->getType()->isVoidTy() &&
1439 "non void atomic unordered mem intrinsic");
1440 return eraseInstFromFunction(*AMI
);
1443 // Intrinsics cannot occur in an invoke or a callbr, so handle them here
1444 // instead of in visitCallBase.
1445 if (auto *MI
= dyn_cast
<AnyMemIntrinsic
>(II
)) {
1446 bool Changed
= false;
1448 // memmove/cpy/set of zero bytes is a noop.
1449 if (Constant
*NumBytes
= dyn_cast
<Constant
>(MI
->getLength())) {
1450 if (NumBytes
->isNullValue())
1451 return eraseInstFromFunction(CI
);
1454 // No other transformations apply to volatile transfers.
1455 if (auto *M
= dyn_cast
<MemIntrinsic
>(MI
))
1456 if (M
->isVolatile())
1459 // If we have a memmove and the source operation is a constant global,
1460 // then the source and dest pointers can't alias, so we can change this
1461 // into a call to memcpy.
1462 if (auto *MMI
= dyn_cast
<AnyMemMoveInst
>(MI
)) {
1463 if (GlobalVariable
*GVSrc
= dyn_cast
<GlobalVariable
>(MMI
->getSource()))
1464 if (GVSrc
->isConstant()) {
1465 Module
*M
= CI
.getModule();
1466 Intrinsic::ID MemCpyID
=
1467 isa
<AtomicMemMoveInst
>(MMI
)
1468 ? Intrinsic::memcpy_element_unordered_atomic
1469 : Intrinsic::memcpy
;
1470 Type
*Tys
[3] = { CI
.getArgOperand(0)->getType(),
1471 CI
.getArgOperand(1)->getType(),
1472 CI
.getArgOperand(2)->getType() };
1473 CI
.setCalledFunction(Intrinsic::getDeclaration(M
, MemCpyID
, Tys
));
1478 if (AnyMemTransferInst
*MTI
= dyn_cast
<AnyMemTransferInst
>(MI
)) {
1479 // memmove(x,x,size) -> noop.
1480 if (MTI
->getSource() == MTI
->getDest())
1481 return eraseInstFromFunction(CI
);
1484 // If we can determine a pointer alignment that is bigger than currently
1485 // set, update the alignment.
1486 if (auto *MTI
= dyn_cast
<AnyMemTransferInst
>(MI
)) {
1487 if (Instruction
*I
= SimplifyAnyMemTransfer(MTI
))
1489 } else if (auto *MSI
= dyn_cast
<AnyMemSetInst
>(MI
)) {
1490 if (Instruction
*I
= SimplifyAnyMemSet(MSI
))
1494 if (Changed
) return II
;
1497 // For fixed width vector result intrinsics, use the generic demanded vector
1499 if (auto *IIFVTy
= dyn_cast
<FixedVectorType
>(II
->getType())) {
1500 auto VWidth
= IIFVTy
->getNumElements();
1501 APInt
UndefElts(VWidth
, 0);
1502 APInt
AllOnesEltMask(APInt::getAllOnes(VWidth
));
1503 if (Value
*V
= SimplifyDemandedVectorElts(II
, AllOnesEltMask
, UndefElts
)) {
1505 return replaceInstUsesWith(*II
, V
);
1510 if (II
->isCommutative()) {
1511 if (CallInst
*NewCall
= canonicalizeConstantArg0ToArg1(CI
))
1515 // Unused constrained FP intrinsic calls may have declared side effect, which
1516 // prevents it from being removed. In some cases however the side effect is
1517 // actually absent. To detect this case, call SimplifyConstrainedFPCall. If it
1518 // returns a replacement, the call may be removed.
1519 if (CI
.use_empty() && isa
<ConstrainedFPIntrinsic
>(CI
)) {
1520 if (simplifyConstrainedFPCall(&CI
, SQ
.getWithInstruction(&CI
)))
1521 return eraseInstFromFunction(CI
);
1524 Intrinsic::ID IID
= II
->getIntrinsicID();
1526 case Intrinsic::objectsize
: {
1527 SmallVector
<Instruction
*> InsertedInstructions
;
1528 if (Value
*V
= lowerObjectSizeCall(II
, DL
, &TLI
, AA
, /*MustSucceed=*/false,
1529 &InsertedInstructions
)) {
1530 for (Instruction
*Inserted
: InsertedInstructions
)
1531 Worklist
.add(Inserted
);
1532 return replaceInstUsesWith(CI
, V
);
1536 case Intrinsic::abs
: {
1537 Value
*IIOperand
= II
->getArgOperand(0);
1538 bool IntMinIsPoison
= cast
<Constant
>(II
->getArgOperand(1))->isOneValue();
1540 // abs(-x) -> abs(x)
1541 // TODO: Copy nsw if it was present on the neg?
1543 if (match(IIOperand
, m_Neg(m_Value(X
))))
1544 return replaceOperand(*II
, 0, X
);
1545 if (match(IIOperand
, m_Select(m_Value(), m_Value(X
), m_Neg(m_Deferred(X
)))))
1546 return replaceOperand(*II
, 0, X
);
1547 if (match(IIOperand
, m_Select(m_Value(), m_Neg(m_Value(X
)), m_Deferred(X
))))
1548 return replaceOperand(*II
, 0, X
);
1550 if (std::optional
<bool> Known
=
1551 getKnownSignOrZero(IIOperand
, II
, DL
, &AC
, &DT
)) {
1552 // abs(x) -> x if x >= 0 (include abs(x-y) --> x - y where x >= y)
1553 // abs(x) -> x if x > 0 (include abs(x-y) --> x - y where x > y)
1555 return replaceInstUsesWith(*II
, IIOperand
);
1557 // abs(x) -> -x if x < 0
1558 // abs(x) -> -x if x < = 0 (include abs(x-y) --> y - x where x <= y)
1560 return BinaryOperator::CreateNSWNeg(IIOperand
);
1561 return BinaryOperator::CreateNeg(IIOperand
);
1564 // abs (sext X) --> zext (abs X*)
1565 // Clear the IsIntMin (nsw) bit on the abs to allow narrowing.
1566 if (match(IIOperand
, m_OneUse(m_SExt(m_Value(X
))))) {
1568 Builder
.CreateBinaryIntrinsic(Intrinsic::abs
, X
, Builder
.getFalse());
1569 return CastInst::Create(Instruction::ZExt
, NarrowAbs
, II
->getType());
1572 // Match a complicated way to check if a number is odd/even:
1573 // abs (srem X, 2) --> and X, 1
1575 if (match(IIOperand
, m_SRem(m_Value(X
), m_APInt(C
))) && *C
== 2)
1576 return BinaryOperator::CreateAnd(X
, ConstantInt::get(II
->getType(), 1));
1580 case Intrinsic::umin
: {
1581 Value
*I0
= II
->getArgOperand(0), *I1
= II
->getArgOperand(1);
1582 // umin(x, 1) == zext(x != 0)
1583 if (match(I1
, m_One())) {
1584 assert(II
->getType()->getScalarSizeInBits() != 1 &&
1585 "Expected simplify of umin with max constant");
1586 Value
*Zero
= Constant::getNullValue(I0
->getType());
1587 Value
*Cmp
= Builder
.CreateICmpNE(I0
, Zero
);
1588 return CastInst::Create(Instruction::ZExt
, Cmp
, II
->getType());
1592 case Intrinsic::umax
: {
1593 Value
*I0
= II
->getArgOperand(0), *I1
= II
->getArgOperand(1);
1595 if (match(I0
, m_ZExt(m_Value(X
))) && match(I1
, m_ZExt(m_Value(Y
))) &&
1596 (I0
->hasOneUse() || I1
->hasOneUse()) && X
->getType() == Y
->getType()) {
1597 Value
*NarrowMaxMin
= Builder
.CreateBinaryIntrinsic(IID
, X
, Y
);
1598 return CastInst::Create(Instruction::ZExt
, NarrowMaxMin
, II
->getType());
1601 if (match(I0
, m_ZExt(m_Value(X
))) && match(I1
, m_Constant(C
)) &&
1603 if (Constant
*NarrowC
= getLosslessUnsignedTrunc(C
, X
->getType())) {
1604 Value
*NarrowMaxMin
= Builder
.CreateBinaryIntrinsic(IID
, X
, NarrowC
);
1605 return CastInst::Create(Instruction::ZExt
, NarrowMaxMin
, II
->getType());
1608 // If both operands of unsigned min/max are sign-extended, it is still ok
1609 // to narrow the operation.
1612 case Intrinsic::smax
:
1613 case Intrinsic::smin
: {
1614 Value
*I0
= II
->getArgOperand(0), *I1
= II
->getArgOperand(1);
1616 if (match(I0
, m_SExt(m_Value(X
))) && match(I1
, m_SExt(m_Value(Y
))) &&
1617 (I0
->hasOneUse() || I1
->hasOneUse()) && X
->getType() == Y
->getType()) {
1618 Value
*NarrowMaxMin
= Builder
.CreateBinaryIntrinsic(IID
, X
, Y
);
1619 return CastInst::Create(Instruction::SExt
, NarrowMaxMin
, II
->getType());
1623 if (match(I0
, m_SExt(m_Value(X
))) && match(I1
, m_Constant(C
)) &&
1625 if (Constant
*NarrowC
= getLosslessSignedTrunc(C
, X
->getType())) {
1626 Value
*NarrowMaxMin
= Builder
.CreateBinaryIntrinsic(IID
, X
, NarrowC
);
1627 return CastInst::Create(Instruction::SExt
, NarrowMaxMin
, II
->getType());
1631 // umin(i1 X, i1 Y) -> and i1 X, Y
1632 // smax(i1 X, i1 Y) -> and i1 X, Y
1633 if ((IID
== Intrinsic::umin
|| IID
== Intrinsic::smax
) &&
1634 II
->getType()->isIntOrIntVectorTy(1)) {
1635 return BinaryOperator::CreateAnd(I0
, I1
);
1638 // umax(i1 X, i1 Y) -> or i1 X, Y
1639 // smin(i1 X, i1 Y) -> or i1 X, Y
1640 if ((IID
== Intrinsic::umax
|| IID
== Intrinsic::smin
) &&
1641 II
->getType()->isIntOrIntVectorTy(1)) {
1642 return BinaryOperator::CreateOr(I0
, I1
);
1645 if (IID
== Intrinsic::smax
|| IID
== Intrinsic::smin
) {
1646 // smax (neg nsw X), (neg nsw Y) --> neg nsw (smin X, Y)
1647 // smin (neg nsw X), (neg nsw Y) --> neg nsw (smax X, Y)
1648 // TODO: Canonicalize neg after min/max if I1 is constant.
1649 if (match(I0
, m_NSWNeg(m_Value(X
))) && match(I1
, m_NSWNeg(m_Value(Y
))) &&
1650 (I0
->hasOneUse() || I1
->hasOneUse())) {
1651 Intrinsic::ID InvID
= getInverseMinMaxIntrinsic(IID
);
1652 Value
*InvMaxMin
= Builder
.CreateBinaryIntrinsic(InvID
, X
, Y
);
1653 return BinaryOperator::CreateNSWNeg(InvMaxMin
);
1657 // (umax X, (xor X, Pow2))
1659 // (umin X, (xor X, Pow2))
1660 // -> (and X, ~Pow2)
1661 // (smax X, (xor X, Pos_Pow2))
1662 // -> (or X, Pos_Pow2)
1663 // (smin X, (xor X, Pos_Pow2))
1664 // -> (and X, ~Pos_Pow2)
1665 // (smax X, (xor X, Neg_Pow2))
1666 // -> (and X, ~Neg_Pow2)
1667 // (smin X, (xor X, Neg_Pow2))
1668 // -> (or X, Neg_Pow2)
1669 if ((match(I0
, m_c_Xor(m_Specific(I1
), m_Value(X
))) ||
1670 match(I1
, m_c_Xor(m_Specific(I0
), m_Value(X
)))) &&
1671 isKnownToBeAPowerOfTwo(X
, /* OrZero */ true)) {
1672 bool UseOr
= IID
== Intrinsic::smax
|| IID
== Intrinsic::umax
;
1673 bool UseAndN
= IID
== Intrinsic::smin
|| IID
== Intrinsic::umin
;
1675 if (IID
== Intrinsic::smax
|| IID
== Intrinsic::smin
) {
1676 auto KnownSign
= getKnownSign(X
, II
, DL
, &AC
, &DT
);
1677 if (KnownSign
== std::nullopt
) {
1680 } else if (*KnownSign
/* true is Signed. */) {
1683 Type
*Ty
= I0
->getType();
1684 // Negative power of 2 must be IntMin. It's possible to be able to
1685 // prove negative / power of 2 without actually having known bits, so
1686 // just get the value by hand.
1687 X
= Constant::getIntegerValue(
1688 Ty
, APInt::getSignedMinValue(Ty
->getScalarSizeInBits()));
1692 return BinaryOperator::CreateOr(I0
, X
);
1694 return BinaryOperator::CreateAnd(I0
, Builder
.CreateNot(X
));
1697 // If we can eliminate ~A and Y is free to invert:
1698 // max ~A, Y --> ~(min A, ~Y)
1701 // max ~A, ~Y --> ~(min A, Y)
1702 // max ~A, C --> ~(min A, ~C)
1703 // max ~A, (max ~Y, ~Z) --> ~min( A, (min Y, Z))
1704 auto moveNotAfterMinMax
= [&](Value
*X
, Value
*Y
) -> Instruction
* {
1706 if (match(X
, m_OneUse(m_Not(m_Value(A
)))) &&
1707 !isFreeToInvert(A
, A
->hasOneUse()) &&
1708 isFreeToInvert(Y
, Y
->hasOneUse())) {
1709 Value
*NotY
= Builder
.CreateNot(Y
);
1710 Intrinsic::ID InvID
= getInverseMinMaxIntrinsic(IID
);
1711 Value
*InvMaxMin
= Builder
.CreateBinaryIntrinsic(InvID
, A
, NotY
);
1712 return BinaryOperator::CreateNot(InvMaxMin
);
1717 if (Instruction
*I
= moveNotAfterMinMax(I0
, I1
))
1719 if (Instruction
*I
= moveNotAfterMinMax(I1
, I0
))
1722 if (Instruction
*I
= moveAddAfterMinMax(II
, Builder
))
1725 // smax(X, -X) --> abs(X)
1726 // smin(X, -X) --> -abs(X)
1727 // umax(X, -X) --> -abs(X)
1728 // umin(X, -X) --> abs(X)
1729 if (isKnownNegation(I0
, I1
)) {
1730 // We can choose either operand as the input to abs(), but if we can
1731 // eliminate the only use of a value, that's better for subsequent
1732 // transforms/analysis.
1733 if (I0
->hasOneUse() && !I1
->hasOneUse())
1736 // This is some variant of abs(). See if we can propagate 'nsw' to the abs
1737 // operation and potentially its negation.
1738 bool IntMinIsPoison
= isKnownNegation(I0
, I1
, /* NeedNSW */ true);
1739 Value
*Abs
= Builder
.CreateBinaryIntrinsic(
1741 ConstantInt::getBool(II
->getContext(), IntMinIsPoison
));
1743 // We don't have a "nabs" intrinsic, so negate if needed based on the
1744 // max/min operation.
1745 if (IID
== Intrinsic::smin
|| IID
== Intrinsic::umax
)
1746 Abs
= Builder
.CreateNeg(Abs
, "nabs", /* NUW */ false, IntMinIsPoison
);
1747 return replaceInstUsesWith(CI
, Abs
);
1750 if (Instruction
*Sel
= foldClampRangeOfTwo(II
, Builder
))
1753 if (Instruction
*SAdd
= matchSAddSubSat(*II
))
1756 if (Value
*NewMinMax
= reassociateMinMaxWithConstants(II
, Builder
))
1757 return replaceInstUsesWith(*II
, NewMinMax
);
1759 if (Instruction
*R
= reassociateMinMaxWithConstantInOperand(II
, Builder
))
1762 if (Instruction
*NewMinMax
= factorizeMinMaxTree(II
))
1767 case Intrinsic::bitreverse
: {
1768 Value
*IIOperand
= II
->getArgOperand(0);
1769 // bitrev (zext i1 X to ?) --> X ? SignBitC : 0
1771 if (match(IIOperand
, m_ZExt(m_Value(X
))) &&
1772 X
->getType()->isIntOrIntVectorTy(1)) {
1773 Type
*Ty
= II
->getType();
1774 APInt SignBit
= APInt::getSignMask(Ty
->getScalarSizeInBits());
1775 return SelectInst::Create(X
, ConstantInt::get(Ty
, SignBit
),
1776 ConstantInt::getNullValue(Ty
));
1779 if (Instruction
*crossLogicOpFold
=
1780 foldBitOrderCrossLogicOp
<Intrinsic::bitreverse
>(IIOperand
, Builder
))
1781 return crossLogicOpFold
;
1785 case Intrinsic::bswap
: {
1786 Value
*IIOperand
= II
->getArgOperand(0);
1788 // Try to canonicalize bswap-of-logical-shift-by-8-bit-multiple as
1789 // inverse-shift-of-bswap:
1790 // bswap (shl X, Y) --> lshr (bswap X), Y
1791 // bswap (lshr X, Y) --> shl (bswap X), Y
1793 if (match(IIOperand
, m_OneUse(m_LogicalShift(m_Value(X
), m_Value(Y
))))) {
1794 // The transform allows undef vector elements, so try a constant match
1795 // first. If knownbits can handle that case, that clause could be removed.
1796 unsigned BitWidth
= IIOperand
->getType()->getScalarSizeInBits();
1798 if ((match(Y
, m_APIntAllowUndef(C
)) && (*C
& 7) == 0) ||
1799 MaskedValueIsZero(Y
, APInt::getLowBitsSet(BitWidth
, 3))) {
1800 Value
*NewSwap
= Builder
.CreateUnaryIntrinsic(Intrinsic::bswap
, X
);
1801 BinaryOperator::BinaryOps InverseShift
=
1802 cast
<BinaryOperator
>(IIOperand
)->getOpcode() == Instruction::Shl
1805 return BinaryOperator::Create(InverseShift
, NewSwap
, Y
);
1809 KnownBits Known
= computeKnownBits(IIOperand
, 0, II
);
1810 uint64_t LZ
= alignDown(Known
.countMinLeadingZeros(), 8);
1811 uint64_t TZ
= alignDown(Known
.countMinTrailingZeros(), 8);
1812 unsigned BW
= Known
.getBitWidth();
1814 // bswap(x) -> shift(x) if x has exactly one "active byte"
1815 if (BW
- LZ
- TZ
== 8) {
1816 assert(LZ
!= TZ
&& "active byte cannot be in the middle");
1817 if (LZ
> TZ
) // -> shl(x) if the "active byte" is in the low part of x
1818 return BinaryOperator::CreateNUWShl(
1819 IIOperand
, ConstantInt::get(IIOperand
->getType(), LZ
- TZ
));
1820 // -> lshr(x) if the "active byte" is in the high part of x
1821 return BinaryOperator::CreateExactLShr(
1822 IIOperand
, ConstantInt::get(IIOperand
->getType(), TZ
- LZ
));
1825 // bswap(trunc(bswap(x))) -> trunc(lshr(x, c))
1826 if (match(IIOperand
, m_Trunc(m_BSwap(m_Value(X
))))) {
1827 unsigned C
= X
->getType()->getScalarSizeInBits() - BW
;
1828 Value
*CV
= ConstantInt::get(X
->getType(), C
);
1829 Value
*V
= Builder
.CreateLShr(X
, CV
);
1830 return new TruncInst(V
, IIOperand
->getType());
1833 if (Instruction
*crossLogicOpFold
=
1834 foldBitOrderCrossLogicOp
<Intrinsic::bswap
>(IIOperand
, Builder
)) {
1835 return crossLogicOpFold
;
1840 case Intrinsic::masked_load
:
1841 if (Value
*SimplifiedMaskedOp
= simplifyMaskedLoad(*II
))
1842 return replaceInstUsesWith(CI
, SimplifiedMaskedOp
);
1844 case Intrinsic::masked_store
:
1845 return simplifyMaskedStore(*II
);
1846 case Intrinsic::masked_gather
:
1847 return simplifyMaskedGather(*II
);
1848 case Intrinsic::masked_scatter
:
1849 return simplifyMaskedScatter(*II
);
1850 case Intrinsic::launder_invariant_group
:
1851 case Intrinsic::strip_invariant_group
:
1852 if (auto *SkippedBarrier
= simplifyInvariantGroupIntrinsic(*II
, *this))
1853 return replaceInstUsesWith(*II
, SkippedBarrier
);
1855 case Intrinsic::powi
:
1856 if (ConstantInt
*Power
= dyn_cast
<ConstantInt
>(II
->getArgOperand(1))) {
1857 // 0 and 1 are handled in instsimplify
1858 // powi(x, -1) -> 1/x
1859 if (Power
->isMinusOne())
1860 return BinaryOperator::CreateFDivFMF(ConstantFP::get(CI
.getType(), 1.0),
1861 II
->getArgOperand(0), II
);
1862 // powi(x, 2) -> x*x
1863 if (Power
->equalsInt(2))
1864 return BinaryOperator::CreateFMulFMF(II
->getArgOperand(0),
1865 II
->getArgOperand(0), II
);
1867 if (!Power
->getValue()[0]) {
1869 // If power is even:
1870 // powi(-x, p) -> powi(x, p)
1871 // powi(fabs(x), p) -> powi(x, p)
1872 // powi(copysign(x, y), p) -> powi(x, p)
1873 if (match(II
->getArgOperand(0), m_FNeg(m_Value(X
))) ||
1874 match(II
->getArgOperand(0), m_FAbs(m_Value(X
))) ||
1875 match(II
->getArgOperand(0),
1876 m_Intrinsic
<Intrinsic::copysign
>(m_Value(X
), m_Value())))
1877 return replaceOperand(*II
, 0, X
);
1882 case Intrinsic::cttz
:
1883 case Intrinsic::ctlz
:
1884 if (auto *I
= foldCttzCtlz(*II
, *this))
1888 case Intrinsic::ctpop
:
1889 if (auto *I
= foldCtpop(*II
, *this))
1893 case Intrinsic::fshl
:
1894 case Intrinsic::fshr
: {
1895 Value
*Op0
= II
->getArgOperand(0), *Op1
= II
->getArgOperand(1);
1896 Type
*Ty
= II
->getType();
1897 unsigned BitWidth
= Ty
->getScalarSizeInBits();
1899 if (match(II
->getArgOperand(2), m_ImmConstant(ShAmtC
))) {
1900 // Canonicalize a shift amount constant operand to modulo the bit-width.
1901 Constant
*WidthC
= ConstantInt::get(Ty
, BitWidth
);
1903 ConstantFoldBinaryOpOperands(Instruction::URem
, ShAmtC
, WidthC
, DL
);
1906 if (ModuloC
!= ShAmtC
)
1907 return replaceOperand(*II
, 2, ModuloC
);
1909 assert(ConstantExpr::getICmp(ICmpInst::ICMP_UGT
, WidthC
, ShAmtC
) ==
1910 ConstantInt::getTrue(CmpInst::makeCmpResultType(Ty
)) &&
1911 "Shift amount expected to be modulo bitwidth");
1913 // Canonicalize funnel shift right by constant to funnel shift left. This
1914 // is not entirely arbitrary. For historical reasons, the backend may
1915 // recognize rotate left patterns but miss rotate right patterns.
1916 if (IID
== Intrinsic::fshr
) {
1917 // fshr X, Y, C --> fshl X, Y, (BitWidth - C)
1918 Constant
*LeftShiftC
= ConstantExpr::getSub(WidthC
, ShAmtC
);
1919 Module
*Mod
= II
->getModule();
1920 Function
*Fshl
= Intrinsic::getDeclaration(Mod
, Intrinsic::fshl
, Ty
);
1921 return CallInst::Create(Fshl
, { Op0
, Op1
, LeftShiftC
});
1923 assert(IID
== Intrinsic::fshl
&&
1924 "All funnel shifts by simple constants should go left");
1926 // fshl(X, 0, C) --> shl X, C
1927 // fshl(X, undef, C) --> shl X, C
1928 if (match(Op1
, m_ZeroInt()) || match(Op1
, m_Undef()))
1929 return BinaryOperator::CreateShl(Op0
, ShAmtC
);
1931 // fshl(0, X, C) --> lshr X, (BW-C)
1932 // fshl(undef, X, C) --> lshr X, (BW-C)
1933 if (match(Op0
, m_ZeroInt()) || match(Op0
, m_Undef()))
1934 return BinaryOperator::CreateLShr(Op1
,
1935 ConstantExpr::getSub(WidthC
, ShAmtC
));
1937 // fshl i16 X, X, 8 --> bswap i16 X (reduce to more-specific form)
1938 if (Op0
== Op1
&& BitWidth
== 16 && match(ShAmtC
, m_SpecificInt(8))) {
1939 Module
*Mod
= II
->getModule();
1940 Function
*Bswap
= Intrinsic::getDeclaration(Mod
, Intrinsic::bswap
, Ty
);
1941 return CallInst::Create(Bswap
, { Op0
});
1943 if (Instruction
*BitOp
=
1944 matchBSwapOrBitReverse(*II
, /*MatchBSwaps*/ true,
1945 /*MatchBitReversals*/ true))
1949 // Left or right might be masked.
1950 if (SimplifyDemandedInstructionBits(*II
))
1953 // The shift amount (operand 2) of a funnel shift is modulo the bitwidth,
1954 // so only the low bits of the shift amount are demanded if the bitwidth is
1956 if (!isPowerOf2_32(BitWidth
))
1958 APInt Op2Demanded
= APInt::getLowBitsSet(BitWidth
, Log2_32_Ceil(BitWidth
));
1959 KnownBits
Op2Known(BitWidth
);
1960 if (SimplifyDemandedBits(II
, 2, Op2Demanded
, Op2Known
))
1964 case Intrinsic::ptrmask
: {
1965 Value
*InnerPtr
, *InnerMask
;
1966 if (match(II
->getArgOperand(0),
1967 m_OneUse(m_Intrinsic
<Intrinsic::ptrmask
>(m_Value(InnerPtr
),
1968 m_Value(InnerMask
))))) {
1969 assert(II
->getArgOperand(1)->getType() == InnerMask
->getType() &&
1970 "Mask types must match");
1971 Value
*NewMask
= Builder
.CreateAnd(II
->getArgOperand(1), InnerMask
);
1972 return replaceInstUsesWith(
1973 *II
, Builder
.CreateIntrinsic(InnerPtr
->getType(), Intrinsic::ptrmask
,
1974 {InnerPtr
, NewMask
}));
1978 case Intrinsic::uadd_with_overflow
:
1979 case Intrinsic::sadd_with_overflow
: {
1980 if (Instruction
*I
= foldIntrinsicWithOverflowCommon(II
))
1983 // Given 2 constant operands whose sum does not overflow:
1984 // uaddo (X +nuw C0), C1 -> uaddo X, C0 + C1
1985 // saddo (X +nsw C0), C1 -> saddo X, C0 + C1
1987 const APInt
*C0
, *C1
;
1988 Value
*Arg0
= II
->getArgOperand(0);
1989 Value
*Arg1
= II
->getArgOperand(1);
1990 bool IsSigned
= IID
== Intrinsic::sadd_with_overflow
;
1991 bool HasNWAdd
= IsSigned
? match(Arg0
, m_NSWAdd(m_Value(X
), m_APInt(C0
)))
1992 : match(Arg0
, m_NUWAdd(m_Value(X
), m_APInt(C0
)));
1993 if (HasNWAdd
&& match(Arg1
, m_APInt(C1
))) {
1996 IsSigned
? C1
->sadd_ov(*C0
, Overflow
) : C1
->uadd_ov(*C0
, Overflow
);
1998 return replaceInstUsesWith(
1999 *II
, Builder
.CreateBinaryIntrinsic(
2000 IID
, X
, ConstantInt::get(Arg1
->getType(), NewC
)));
2005 case Intrinsic::umul_with_overflow
:
2006 case Intrinsic::smul_with_overflow
:
2007 case Intrinsic::usub_with_overflow
:
2008 if (Instruction
*I
= foldIntrinsicWithOverflowCommon(II
))
2012 case Intrinsic::ssub_with_overflow
: {
2013 if (Instruction
*I
= foldIntrinsicWithOverflowCommon(II
))
2017 Value
*Arg0
= II
->getArgOperand(0);
2018 Value
*Arg1
= II
->getArgOperand(1);
2019 // Given a constant C that is not the minimum signed value
2020 // for an integer of a given bit width:
2022 // ssubo X, C -> saddo X, -C
2023 if (match(Arg1
, m_Constant(C
)) && C
->isNotMinSignedValue()) {
2024 Value
*NegVal
= ConstantExpr::getNeg(C
);
2025 // Build a saddo call that is equivalent to the discovered
2027 return replaceInstUsesWith(
2028 *II
, Builder
.CreateBinaryIntrinsic(Intrinsic::sadd_with_overflow
,
2035 case Intrinsic::uadd_sat
:
2036 case Intrinsic::sadd_sat
:
2037 case Intrinsic::usub_sat
:
2038 case Intrinsic::ssub_sat
: {
2039 SaturatingInst
*SI
= cast
<SaturatingInst
>(II
);
2040 Type
*Ty
= SI
->getType();
2041 Value
*Arg0
= SI
->getLHS();
2042 Value
*Arg1
= SI
->getRHS();
2044 // Make use of known overflow information.
2045 OverflowResult OR
= computeOverflow(SI
->getBinaryOp(), SI
->isSigned(),
2048 case OverflowResult::MayOverflow
:
2050 case OverflowResult::NeverOverflows
:
2052 return BinaryOperator::CreateNSW(SI
->getBinaryOp(), Arg0
, Arg1
);
2054 return BinaryOperator::CreateNUW(SI
->getBinaryOp(), Arg0
, Arg1
);
2055 case OverflowResult::AlwaysOverflowsLow
: {
2056 unsigned BitWidth
= Ty
->getScalarSizeInBits();
2057 APInt Min
= APSInt::getMinValue(BitWidth
, !SI
->isSigned());
2058 return replaceInstUsesWith(*SI
, ConstantInt::get(Ty
, Min
));
2060 case OverflowResult::AlwaysOverflowsHigh
: {
2061 unsigned BitWidth
= Ty
->getScalarSizeInBits();
2062 APInt Max
= APSInt::getMaxValue(BitWidth
, !SI
->isSigned());
2063 return replaceInstUsesWith(*SI
, ConstantInt::get(Ty
, Max
));
2067 // ssub.sat(X, C) -> sadd.sat(X, -C) if C != MIN
2069 if (IID
== Intrinsic::ssub_sat
&& match(Arg1
, m_Constant(C
)) &&
2070 C
->isNotMinSignedValue()) {
2071 Value
*NegVal
= ConstantExpr::getNeg(C
);
2072 return replaceInstUsesWith(
2073 *II
, Builder
.CreateBinaryIntrinsic(
2074 Intrinsic::sadd_sat
, Arg0
, NegVal
));
2077 // sat(sat(X + Val2) + Val) -> sat(X + (Val+Val2))
2078 // sat(sat(X - Val2) - Val) -> sat(X - (Val+Val2))
2079 // if Val and Val2 have the same sign
2080 if (auto *Other
= dyn_cast
<IntrinsicInst
>(Arg0
)) {
2082 const APInt
*Val
, *Val2
;
2085 IID
== Intrinsic::uadd_sat
|| IID
== Intrinsic::usub_sat
;
2086 if (Other
->getIntrinsicID() == IID
&&
2087 match(Arg1
, m_APInt(Val
)) &&
2088 match(Other
->getArgOperand(0), m_Value(X
)) &&
2089 match(Other
->getArgOperand(1), m_APInt(Val2
))) {
2091 NewVal
= Val
->uadd_sat(*Val2
);
2092 else if (Val
->isNonNegative() == Val2
->isNonNegative()) {
2094 NewVal
= Val
->sadd_ov(*Val2
, Overflow
);
2096 // Both adds together may add more than SignedMaxValue
2097 // without saturating the final result.
2101 // Cannot fold saturated addition with different signs.
2105 return replaceInstUsesWith(
2106 *II
, Builder
.CreateBinaryIntrinsic(
2107 IID
, X
, ConstantInt::get(II
->getType(), NewVal
)));
2113 case Intrinsic::minnum
:
2114 case Intrinsic::maxnum
:
2115 case Intrinsic::minimum
:
2116 case Intrinsic::maximum
: {
2117 Value
*Arg0
= II
->getArgOperand(0);
2118 Value
*Arg1
= II
->getArgOperand(1);
2120 if (match(Arg0
, m_FNeg(m_Value(X
))) && match(Arg1
, m_FNeg(m_Value(Y
))) &&
2121 (Arg0
->hasOneUse() || Arg1
->hasOneUse())) {
2122 // If both operands are negated, invert the call and negate the result:
2123 // min(-X, -Y) --> -(max(X, Y))
2124 // max(-X, -Y) --> -(min(X, Y))
2125 Intrinsic::ID NewIID
;
2127 case Intrinsic::maxnum
:
2128 NewIID
= Intrinsic::minnum
;
2130 case Intrinsic::minnum
:
2131 NewIID
= Intrinsic::maxnum
;
2133 case Intrinsic::maximum
:
2134 NewIID
= Intrinsic::minimum
;
2136 case Intrinsic::minimum
:
2137 NewIID
= Intrinsic::maximum
;
2140 llvm_unreachable("unexpected intrinsic ID");
2142 Value
*NewCall
= Builder
.CreateBinaryIntrinsic(NewIID
, X
, Y
, II
);
2143 Instruction
*FNeg
= UnaryOperator::CreateFNeg(NewCall
);
2144 FNeg
->copyIRFlags(II
);
2148 // m(m(X, C2), C1) -> m(X, C)
2149 const APFloat
*C1
, *C2
;
2150 if (auto *M
= dyn_cast
<IntrinsicInst
>(Arg0
)) {
2151 if (M
->getIntrinsicID() == IID
&& match(Arg1
, m_APFloat(C1
)) &&
2152 ((match(M
->getArgOperand(0), m_Value(X
)) &&
2153 match(M
->getArgOperand(1), m_APFloat(C2
))) ||
2154 (match(M
->getArgOperand(1), m_Value(X
)) &&
2155 match(M
->getArgOperand(0), m_APFloat(C2
))))) {
2158 case Intrinsic::maxnum
:
2159 Res
= maxnum(*C1
, *C2
);
2161 case Intrinsic::minnum
:
2162 Res
= minnum(*C1
, *C2
);
2164 case Intrinsic::maximum
:
2165 Res
= maximum(*C1
, *C2
);
2167 case Intrinsic::minimum
:
2168 Res
= minimum(*C1
, *C2
);
2171 llvm_unreachable("unexpected intrinsic ID");
2173 Instruction
*NewCall
= Builder
.CreateBinaryIntrinsic(
2174 IID
, X
, ConstantFP::get(Arg0
->getType(), Res
), II
);
2175 // TODO: Conservatively intersecting FMF. If Res == C2, the transform
2176 // was a simplification (so Arg0 and its original flags could
2178 NewCall
->andIRFlags(M
);
2179 return replaceInstUsesWith(*II
, NewCall
);
2183 // m((fpext X), (fpext Y)) -> fpext (m(X, Y))
2184 if (match(Arg0
, m_OneUse(m_FPExt(m_Value(X
)))) &&
2185 match(Arg1
, m_OneUse(m_FPExt(m_Value(Y
)))) &&
2186 X
->getType() == Y
->getType()) {
2188 Builder
.CreateBinaryIntrinsic(IID
, X
, Y
, II
, II
->getName());
2189 return new FPExtInst(NewCall
, II
->getType());
2192 // max X, -X --> fabs X
2193 // min X, -X --> -(fabs X)
2194 // TODO: Remove one-use limitation? That is obviously better for max.
2195 // It would be an extra instruction for min (fnabs), but that is
2196 // still likely better for analysis and codegen.
2197 if ((match(Arg0
, m_OneUse(m_FNeg(m_Value(X
)))) && Arg1
== X
) ||
2198 (match(Arg1
, m_OneUse(m_FNeg(m_Value(X
)))) && Arg0
== X
)) {
2199 Value
*R
= Builder
.CreateUnaryIntrinsic(Intrinsic::fabs
, X
, II
);
2200 if (IID
== Intrinsic::minimum
|| IID
== Intrinsic::minnum
)
2201 R
= Builder
.CreateFNegFMF(R
, II
);
2202 return replaceInstUsesWith(*II
, R
);
2207 case Intrinsic::matrix_multiply
: {
2208 // Optimize negation in matrix multiplication.
2212 if (match(II
->getArgOperand(0), m_FNeg(m_Value(A
))) &&
2213 match(II
->getArgOperand(1), m_FNeg(m_Value(B
)))) {
2214 replaceOperand(*II
, 0, A
);
2215 replaceOperand(*II
, 1, B
);
2219 Value
*Op0
= II
->getOperand(0);
2220 Value
*Op1
= II
->getOperand(1);
2221 Value
*OpNotNeg
, *NegatedOp
;
2222 unsigned NegatedOpArg
, OtherOpArg
;
2223 if (match(Op0
, m_FNeg(m_Value(OpNotNeg
)))) {
2227 } else if (match(Op1
, m_FNeg(m_Value(OpNotNeg
)))) {
2232 // Multiplication doesn't have a negated operand.
2235 // Only optimize if the negated operand has only one use.
2236 if (!NegatedOp
->hasOneUse())
2239 Value
*OtherOp
= II
->getOperand(OtherOpArg
);
2240 VectorType
*RetTy
= cast
<VectorType
>(II
->getType());
2241 VectorType
*NegatedOpTy
= cast
<VectorType
>(NegatedOp
->getType());
2242 VectorType
*OtherOpTy
= cast
<VectorType
>(OtherOp
->getType());
2243 ElementCount NegatedCount
= NegatedOpTy
->getElementCount();
2244 ElementCount OtherCount
= OtherOpTy
->getElementCount();
2245 ElementCount RetCount
= RetTy
->getElementCount();
2246 // (-A) * B -> A * (-B), if it is cheaper to negate B and vice versa.
2247 if (ElementCount::isKnownGT(NegatedCount
, OtherCount
) &&
2248 ElementCount::isKnownLT(OtherCount
, RetCount
)) {
2249 Value
*InverseOtherOp
= Builder
.CreateFNeg(OtherOp
);
2250 replaceOperand(*II
, NegatedOpArg
, OpNotNeg
);
2251 replaceOperand(*II
, OtherOpArg
, InverseOtherOp
);
2254 // (-A) * B -> -(A * B), if it is cheaper to negate the result
2255 if (ElementCount::isKnownGT(NegatedCount
, RetCount
)) {
2256 SmallVector
<Value
*, 5> NewArgs(II
->args());
2257 NewArgs
[NegatedOpArg
] = OpNotNeg
;
2258 Instruction
*NewMul
=
2259 Builder
.CreateIntrinsic(II
->getType(), IID
, NewArgs
, II
);
2260 return replaceInstUsesWith(*II
, Builder
.CreateFNegFMF(NewMul
, II
));
2264 case Intrinsic::fmuladd
: {
2265 // Canonicalize fast fmuladd to the separate fmul + fadd.
2267 BuilderTy::FastMathFlagGuard
Guard(Builder
);
2268 Builder
.setFastMathFlags(II
->getFastMathFlags());
2269 Value
*Mul
= Builder
.CreateFMul(II
->getArgOperand(0),
2270 II
->getArgOperand(1));
2271 Value
*Add
= Builder
.CreateFAdd(Mul
, II
->getArgOperand(2));
2273 return replaceInstUsesWith(*II
, Add
);
2276 // Try to simplify the underlying FMul.
2277 if (Value
*V
= simplifyFMulInst(II
->getArgOperand(0), II
->getArgOperand(1),
2278 II
->getFastMathFlags(),
2279 SQ
.getWithInstruction(II
))) {
2280 auto *FAdd
= BinaryOperator::CreateFAdd(V
, II
->getArgOperand(2));
2281 FAdd
->copyFastMathFlags(II
);
2287 case Intrinsic::fma
: {
2288 // fma fneg(x), fneg(y), z -> fma x, y, z
2289 Value
*Src0
= II
->getArgOperand(0);
2290 Value
*Src1
= II
->getArgOperand(1);
2292 if (match(Src0
, m_FNeg(m_Value(X
))) && match(Src1
, m_FNeg(m_Value(Y
)))) {
2293 replaceOperand(*II
, 0, X
);
2294 replaceOperand(*II
, 1, Y
);
2298 // fma fabs(x), fabs(x), z -> fma x, x, z
2299 if (match(Src0
, m_FAbs(m_Value(X
))) &&
2300 match(Src1
, m_FAbs(m_Specific(X
)))) {
2301 replaceOperand(*II
, 0, X
);
2302 replaceOperand(*II
, 1, X
);
2306 // Try to simplify the underlying FMul. We can only apply simplifications
2307 // that do not require rounding.
2308 if (Value
*V
= simplifyFMAFMul(II
->getArgOperand(0), II
->getArgOperand(1),
2309 II
->getFastMathFlags(),
2310 SQ
.getWithInstruction(II
))) {
2311 auto *FAdd
= BinaryOperator::CreateFAdd(V
, II
->getArgOperand(2));
2312 FAdd
->copyFastMathFlags(II
);
2316 // fma x, y, 0 -> fmul x, y
2317 // This is always valid for -0.0, but requires nsz for +0.0 as
2318 // -0.0 + 0.0 = 0.0, which would not be the same as the fmul on its own.
2319 if (match(II
->getArgOperand(2), m_NegZeroFP()) ||
2320 (match(II
->getArgOperand(2), m_PosZeroFP()) &&
2321 II
->getFastMathFlags().noSignedZeros()))
2322 return BinaryOperator::CreateFMulFMF(Src0
, Src1
, II
);
2326 case Intrinsic::copysign
: {
2327 Value
*Mag
= II
->getArgOperand(0), *Sign
= II
->getArgOperand(1);
2328 if (SignBitMustBeZero(Sign
, DL
, &TLI
)) {
2329 // If we know that the sign argument is positive, reduce to FABS:
2330 // copysign Mag, +Sign --> fabs Mag
2331 Value
*Fabs
= Builder
.CreateUnaryIntrinsic(Intrinsic::fabs
, Mag
, II
);
2332 return replaceInstUsesWith(*II
, Fabs
);
2334 // TODO: There should be a ValueTracking sibling like SignBitMustBeOne.
2336 if (match(Sign
, m_APFloat(C
)) && C
->isNegative()) {
2337 // If we know that the sign argument is negative, reduce to FNABS:
2338 // copysign Mag, -Sign --> fneg (fabs Mag)
2339 Value
*Fabs
= Builder
.CreateUnaryIntrinsic(Intrinsic::fabs
, Mag
, II
);
2340 return replaceInstUsesWith(*II
, Builder
.CreateFNegFMF(Fabs
, II
));
2343 // Propagate sign argument through nested calls:
2344 // copysign Mag, (copysign ?, X) --> copysign Mag, X
2346 if (match(Sign
, m_Intrinsic
<Intrinsic::copysign
>(m_Value(), m_Value(X
))))
2347 return replaceOperand(*II
, 1, X
);
2349 // Peek through changes of magnitude's sign-bit. This call rewrites those:
2350 // copysign (fabs X), Sign --> copysign X, Sign
2351 // copysign (fneg X), Sign --> copysign X, Sign
2352 if (match(Mag
, m_FAbs(m_Value(X
))) || match(Mag
, m_FNeg(m_Value(X
))))
2353 return replaceOperand(*II
, 0, X
);
2357 case Intrinsic::fabs
: {
2358 Value
*Cond
, *TVal
, *FVal
;
2359 if (match(II
->getArgOperand(0),
2360 m_Select(m_Value(Cond
), m_Value(TVal
), m_Value(FVal
)))) {
2361 // fabs (select Cond, TrueC, FalseC) --> select Cond, AbsT, AbsF
2362 if (isa
<Constant
>(TVal
) && isa
<Constant
>(FVal
)) {
2363 CallInst
*AbsT
= Builder
.CreateCall(II
->getCalledFunction(), {TVal
});
2364 CallInst
*AbsF
= Builder
.CreateCall(II
->getCalledFunction(), {FVal
});
2365 return SelectInst::Create(Cond
, AbsT
, AbsF
);
2367 // fabs (select Cond, -FVal, FVal) --> fabs FVal
2368 if (match(TVal
, m_FNeg(m_Specific(FVal
))))
2369 return replaceOperand(*II
, 0, FVal
);
2370 // fabs (select Cond, TVal, -TVal) --> fabs TVal
2371 if (match(FVal
, m_FNeg(m_Specific(TVal
))))
2372 return replaceOperand(*II
, 0, TVal
);
2375 Value
*Magnitude
, *Sign
;
2376 if (match(II
->getArgOperand(0),
2377 m_CopySign(m_Value(Magnitude
), m_Value(Sign
)))) {
2378 // fabs (copysign x, y) -> (fabs x)
2380 Builder
.CreateCall(II
->getCalledFunction(), {Magnitude
});
2381 AbsSign
->copyFastMathFlags(II
);
2382 return replaceInstUsesWith(*II
, AbsSign
);
2387 case Intrinsic::ceil
:
2388 case Intrinsic::floor
:
2389 case Intrinsic::round
:
2390 case Intrinsic::roundeven
:
2391 case Intrinsic::nearbyint
:
2392 case Intrinsic::rint
:
2393 case Intrinsic::trunc
: {
2395 if (match(II
->getArgOperand(0), m_OneUse(m_FPExt(m_Value(ExtSrc
))))) {
2396 // Narrow the call: intrinsic (fpext x) -> fpext (intrinsic x)
2397 Value
*NarrowII
= Builder
.CreateUnaryIntrinsic(IID
, ExtSrc
, II
);
2398 return new FPExtInst(NarrowII
, II
->getType());
2402 case Intrinsic::cos
:
2403 case Intrinsic::amdgcn_cos
: {
2405 Value
*Src
= II
->getArgOperand(0);
2406 if (match(Src
, m_FNeg(m_Value(X
))) || match(Src
, m_FAbs(m_Value(X
)))) {
2407 // cos(-x) -> cos(x)
2408 // cos(fabs(x)) -> cos(x)
2409 return replaceOperand(*II
, 0, X
);
2413 case Intrinsic::sin
: {
2415 if (match(II
->getArgOperand(0), m_OneUse(m_FNeg(m_Value(X
))))) {
2416 // sin(-x) --> -sin(x)
2417 Value
*NewSin
= Builder
.CreateUnaryIntrinsic(Intrinsic::sin
, X
, II
);
2418 Instruction
*FNeg
= UnaryOperator::CreateFNeg(NewSin
);
2419 FNeg
->copyFastMathFlags(II
);
2424 case Intrinsic::ldexp
: {
2425 // ldexp(ldexp(x, a), b) -> ldexp(x, a + b)
2427 // The danger is if the first ldexp would overflow to infinity or underflow
2428 // to zero, but the combined exponent avoids it. We ignore this with
2431 // It's also safe to fold if we know both exponents are >= 0 or <= 0 since
2432 // it would just double down on the overflow/underflow which would occur
2435 // TODO: Could do better if we had range tracking for the input value
2436 // exponent. Also could broaden sign check to cover == 0 case.
2437 Value
*Src
= II
->getArgOperand(0);
2438 Value
*Exp
= II
->getArgOperand(1);
2441 if (match(Src
, m_OneUse(m_Intrinsic
<Intrinsic::ldexp
>(
2442 m_Value(InnerSrc
), m_Value(InnerExp
)))) &&
2443 Exp
->getType() == InnerExp
->getType()) {
2444 FastMathFlags FMF
= II
->getFastMathFlags();
2445 FastMathFlags InnerFlags
= cast
<FPMathOperator
>(Src
)->getFastMathFlags();
2447 if ((FMF
.allowReassoc() && InnerFlags
.allowReassoc()) ||
2448 signBitMustBeTheSame(Exp
, InnerExp
, II
, DL
, &AC
, &DT
)) {
2449 // TODO: Add nsw/nuw probably safe if integer type exceeds exponent
2451 Value
*NewExp
= Builder
.CreateAdd(InnerExp
, Exp
);
2452 II
->setArgOperand(1, NewExp
);
2453 II
->setFastMathFlags(InnerFlags
); // Or the inner flags.
2454 return replaceOperand(*II
, 0, InnerSrc
);
2460 case Intrinsic::ptrauth_auth
:
2461 case Intrinsic::ptrauth_resign
: {
2462 // (sign|resign) + (auth|resign) can be folded by omitting the middle
2463 // sign+auth component if the key and discriminator match.
2464 bool NeedSign
= II
->getIntrinsicID() == Intrinsic::ptrauth_resign
;
2465 Value
*Key
= II
->getArgOperand(1);
2466 Value
*Disc
= II
->getArgOperand(2);
2468 // AuthKey will be the key we need to end up authenticating against in
2469 // whatever we replace this sequence with.
2470 Value
*AuthKey
= nullptr, *AuthDisc
= nullptr, *BasePtr
;
2471 if (auto CI
= dyn_cast
<CallBase
>(II
->getArgOperand(0))) {
2472 BasePtr
= CI
->getArgOperand(0);
2473 if (CI
->getIntrinsicID() == Intrinsic::ptrauth_sign
) {
2474 if (CI
->getArgOperand(1) != Key
|| CI
->getArgOperand(2) != Disc
)
2476 } else if (CI
->getIntrinsicID() == Intrinsic::ptrauth_resign
) {
2477 if (CI
->getArgOperand(3) != Key
|| CI
->getArgOperand(4) != Disc
)
2479 AuthKey
= CI
->getArgOperand(1);
2480 AuthDisc
= CI
->getArgOperand(2);
2487 if (AuthKey
&& NeedSign
) {
2488 // resign(0,1) + resign(1,2) = resign(0, 2)
2489 NewIntrin
= Intrinsic::ptrauth_resign
;
2490 } else if (AuthKey
) {
2491 // resign(0,1) + auth(1) = auth(0)
2492 NewIntrin
= Intrinsic::ptrauth_auth
;
2493 } else if (NeedSign
) {
2494 // sign(0) + resign(0, 1) = sign(1)
2495 NewIntrin
= Intrinsic::ptrauth_sign
;
2497 // sign(0) + auth(0) = nop
2498 replaceInstUsesWith(*II
, BasePtr
);
2499 eraseInstFromFunction(*II
);
2503 SmallVector
<Value
*, 4> CallArgs
;
2504 CallArgs
.push_back(BasePtr
);
2506 CallArgs
.push_back(AuthKey
);
2507 CallArgs
.push_back(AuthDisc
);
2511 CallArgs
.push_back(II
->getArgOperand(3));
2512 CallArgs
.push_back(II
->getArgOperand(4));
2515 Function
*NewFn
= Intrinsic::getDeclaration(II
->getModule(), NewIntrin
);
2516 return CallInst::Create(NewFn
, CallArgs
);
2518 case Intrinsic::arm_neon_vtbl1
:
2519 case Intrinsic::aarch64_neon_tbl1
:
2520 if (Value
*V
= simplifyNeonTbl1(*II
, Builder
))
2521 return replaceInstUsesWith(*II
, V
);
2524 case Intrinsic::arm_neon_vmulls
:
2525 case Intrinsic::arm_neon_vmullu
:
2526 case Intrinsic::aarch64_neon_smull
:
2527 case Intrinsic::aarch64_neon_umull
: {
2528 Value
*Arg0
= II
->getArgOperand(0);
2529 Value
*Arg1
= II
->getArgOperand(1);
2531 // Handle mul by zero first:
2532 if (isa
<ConstantAggregateZero
>(Arg0
) || isa
<ConstantAggregateZero
>(Arg1
)) {
2533 return replaceInstUsesWith(CI
, ConstantAggregateZero::get(II
->getType()));
2536 // Check for constant LHS & RHS - in this case we just simplify.
2537 bool Zext
= (IID
== Intrinsic::arm_neon_vmullu
||
2538 IID
== Intrinsic::aarch64_neon_umull
);
2539 VectorType
*NewVT
= cast
<VectorType
>(II
->getType());
2540 if (Constant
*CV0
= dyn_cast
<Constant
>(Arg0
)) {
2541 if (Constant
*CV1
= dyn_cast
<Constant
>(Arg1
)) {
2542 Value
*V0
= Builder
.CreateIntCast(CV0
, NewVT
, /*isSigned=*/!Zext
);
2543 Value
*V1
= Builder
.CreateIntCast(CV1
, NewVT
, /*isSigned=*/!Zext
);
2544 return replaceInstUsesWith(CI
, Builder
.CreateMul(V0
, V1
));
2547 // Couldn't simplify - canonicalize constant to the RHS.
2548 std::swap(Arg0
, Arg1
);
2551 // Handle mul by one:
2552 if (Constant
*CV1
= dyn_cast
<Constant
>(Arg1
))
2553 if (ConstantInt
*Splat
=
2554 dyn_cast_or_null
<ConstantInt
>(CV1
->getSplatValue()))
2556 return CastInst::CreateIntegerCast(Arg0
, II
->getType(),
2557 /*isSigned=*/!Zext
);
2561 case Intrinsic::arm_neon_aesd
:
2562 case Intrinsic::arm_neon_aese
:
2563 case Intrinsic::aarch64_crypto_aesd
:
2564 case Intrinsic::aarch64_crypto_aese
: {
2565 Value
*DataArg
= II
->getArgOperand(0);
2566 Value
*KeyArg
= II
->getArgOperand(1);
2568 // Try to use the builtin XOR in AESE and AESD to eliminate a prior XOR
2570 if (match(KeyArg
, m_ZeroInt()) &&
2571 match(DataArg
, m_Xor(m_Value(Data
), m_Value(Key
)))) {
2572 replaceOperand(*II
, 0, Data
);
2573 replaceOperand(*II
, 1, Key
);
2578 case Intrinsic::hexagon_V6_vandvrt
:
2579 case Intrinsic::hexagon_V6_vandvrt_128B
: {
2580 // Simplify Q -> V -> Q conversion.
2581 if (auto Op0
= dyn_cast
<IntrinsicInst
>(II
->getArgOperand(0))) {
2582 Intrinsic::ID ID0
= Op0
->getIntrinsicID();
2583 if (ID0
!= Intrinsic::hexagon_V6_vandqrt
&&
2584 ID0
!= Intrinsic::hexagon_V6_vandqrt_128B
)
2586 Value
*Bytes
= Op0
->getArgOperand(1), *Mask
= II
->getArgOperand(1);
2587 uint64_t Bytes1
= computeKnownBits(Bytes
, 0, Op0
).One
.getZExtValue();
2588 uint64_t Mask1
= computeKnownBits(Mask
, 0, II
).One
.getZExtValue();
2589 // Check if every byte has common bits in Bytes and Mask.
2590 uint64_t C
= Bytes1
& Mask1
;
2591 if ((C
& 0xFF) && (C
& 0xFF00) && (C
& 0xFF0000) && (C
& 0xFF000000))
2592 return replaceInstUsesWith(*II
, Op0
->getArgOperand(0));
2596 case Intrinsic::stackrestore
: {
2597 enum class ClassifyResult
{
2601 CallWithSideEffects
,
2603 auto Classify
= [](const Instruction
*I
) {
2604 if (isa
<AllocaInst
>(I
))
2605 return ClassifyResult::Alloca
;
2607 if (auto *CI
= dyn_cast
<CallInst
>(I
)) {
2608 if (auto *II
= dyn_cast
<IntrinsicInst
>(CI
)) {
2609 if (II
->getIntrinsicID() == Intrinsic::stackrestore
)
2610 return ClassifyResult::StackRestore
;
2612 if (II
->mayHaveSideEffects())
2613 return ClassifyResult::CallWithSideEffects
;
2615 // Consider all non-intrinsic calls to be side effects
2616 return ClassifyResult::CallWithSideEffects
;
2620 return ClassifyResult::None
;
2623 // If the stacksave and the stackrestore are in the same BB, and there is
2624 // no intervening call, alloca, or stackrestore of a different stacksave,
2625 // remove the restore. This can happen when variable allocas are DCE'd.
2626 if (IntrinsicInst
*SS
= dyn_cast
<IntrinsicInst
>(II
->getArgOperand(0))) {
2627 if (SS
->getIntrinsicID() == Intrinsic::stacksave
&&
2628 SS
->getParent() == II
->getParent()) {
2629 BasicBlock::iterator
BI(SS
);
2630 bool CannotRemove
= false;
2631 for (++BI
; &*BI
!= II
; ++BI
) {
2632 switch (Classify(&*BI
)) {
2633 case ClassifyResult::None
:
2634 // So far so good, look at next instructions.
2637 case ClassifyResult::StackRestore
:
2638 // If we found an intervening stackrestore for a different
2639 // stacksave, we can't remove the stackrestore. Otherwise, continue.
2640 if (cast
<IntrinsicInst
>(*BI
).getArgOperand(0) != SS
)
2641 CannotRemove
= true;
2644 case ClassifyResult::Alloca
:
2645 case ClassifyResult::CallWithSideEffects
:
2646 // If we found an alloca, a non-intrinsic call, or an intrinsic
2647 // call with side effects, we can't remove the stackrestore.
2648 CannotRemove
= true;
2656 return eraseInstFromFunction(CI
);
2660 // Scan down this block to see if there is another stack restore in the
2661 // same block without an intervening call/alloca.
2662 BasicBlock::iterator
BI(II
);
2663 Instruction
*TI
= II
->getParent()->getTerminator();
2664 bool CannotRemove
= false;
2665 for (++BI
; &*BI
!= TI
; ++BI
) {
2666 switch (Classify(&*BI
)) {
2667 case ClassifyResult::None
:
2668 // So far so good, look at next instructions.
2671 case ClassifyResult::StackRestore
:
2672 // If there is a stackrestore below this one, remove this one.
2673 return eraseInstFromFunction(CI
);
2675 case ClassifyResult::Alloca
:
2676 case ClassifyResult::CallWithSideEffects
:
2677 // If we found an alloca, a non-intrinsic call, or an intrinsic call
2678 // with side effects (such as llvm.stacksave and llvm.read_register),
2679 // we can't remove the stack restore.
2680 CannotRemove
= true;
2687 // If the stack restore is in a return, resume, or unwind block and if there
2688 // are no allocas or calls between the restore and the return, nuke the
2690 if (!CannotRemove
&& (isa
<ReturnInst
>(TI
) || isa
<ResumeInst
>(TI
)))
2691 return eraseInstFromFunction(CI
);
2694 case Intrinsic::lifetime_end
:
2695 // Asan needs to poison memory to detect invalid access which is possible
2696 // even for empty lifetime range.
2697 if (II
->getFunction()->hasFnAttribute(Attribute::SanitizeAddress
) ||
2698 II
->getFunction()->hasFnAttribute(Attribute::SanitizeMemory
) ||
2699 II
->getFunction()->hasFnAttribute(Attribute::SanitizeHWAddress
))
2702 if (removeTriviallyEmptyRange(*II
, *this, [](const IntrinsicInst
&I
) {
2703 return I
.getIntrinsicID() == Intrinsic::lifetime_start
;
2707 case Intrinsic::assume
: {
2708 Value
*IIOperand
= II
->getArgOperand(0);
2709 SmallVector
<OperandBundleDef
, 4> OpBundles
;
2710 II
->getOperandBundlesAsDefs(OpBundles
);
2712 /// This will remove the boolean Condition from the assume given as
2713 /// argument and remove the assume if it becomes useless.
2714 /// always returns nullptr for use as a return values.
2715 auto RemoveConditionFromAssume
= [&](Instruction
*Assume
) -> Instruction
* {
2716 assert(isa
<AssumeInst
>(Assume
));
2717 if (isAssumeWithEmptyBundle(*cast
<AssumeInst
>(II
)))
2718 return eraseInstFromFunction(CI
);
2719 replaceUse(II
->getOperandUse(0), ConstantInt::getTrue(II
->getContext()));
2722 // Remove an assume if it is followed by an identical assume.
2723 // TODO: Do we need this? Unless there are conflicting assumptions, the
2724 // computeKnownBits(IIOperand) below here eliminates redundant assumes.
2725 Instruction
*Next
= II
->getNextNonDebugInstruction();
2726 if (match(Next
, m_Intrinsic
<Intrinsic::assume
>(m_Specific(IIOperand
))))
2727 return RemoveConditionFromAssume(Next
);
2729 // Canonicalize assume(a && b) -> assume(a); assume(b);
2730 // Note: New assumption intrinsics created here are registered by
2731 // the InstCombineIRInserter object.
2732 FunctionType
*AssumeIntrinsicTy
= II
->getFunctionType();
2733 Value
*AssumeIntrinsic
= II
->getCalledOperand();
2735 if (match(IIOperand
, m_LogicalAnd(m_Value(A
), m_Value(B
)))) {
2736 Builder
.CreateCall(AssumeIntrinsicTy
, AssumeIntrinsic
, A
, OpBundles
,
2738 Builder
.CreateCall(AssumeIntrinsicTy
, AssumeIntrinsic
, B
, II
->getName());
2739 return eraseInstFromFunction(*II
);
2741 // assume(!(a || b)) -> assume(!a); assume(!b);
2742 if (match(IIOperand
, m_Not(m_LogicalOr(m_Value(A
), m_Value(B
))))) {
2743 Builder
.CreateCall(AssumeIntrinsicTy
, AssumeIntrinsic
,
2744 Builder
.CreateNot(A
), OpBundles
, II
->getName());
2745 Builder
.CreateCall(AssumeIntrinsicTy
, AssumeIntrinsic
,
2746 Builder
.CreateNot(B
), II
->getName());
2747 return eraseInstFromFunction(*II
);
2750 // assume( (load addr) != null ) -> add 'nonnull' metadata to load
2751 // (if assume is valid at the load)
2752 CmpInst::Predicate Pred
;
2754 if (match(IIOperand
, m_ICmp(Pred
, m_Instruction(LHS
), m_Zero())) &&
2755 Pred
== ICmpInst::ICMP_NE
&& LHS
->getOpcode() == Instruction::Load
&&
2756 LHS
->getType()->isPointerTy() &&
2757 isValidAssumeForContext(II
, LHS
, &DT
)) {
2758 MDNode
*MD
= MDNode::get(II
->getContext(), std::nullopt
);
2759 LHS
->setMetadata(LLVMContext::MD_nonnull
, MD
);
2760 LHS
->setMetadata(LLVMContext::MD_noundef
, MD
);
2761 return RemoveConditionFromAssume(II
);
2763 // TODO: apply nonnull return attributes to calls and invokes
2764 // TODO: apply range metadata for range check patterns?
2767 // Separate storage assumptions apply to the underlying allocations, not any
2768 // particular pointer within them. When evaluating the hints for AA purposes
2769 // we getUnderlyingObject them; by precomputing the answers here we can
2770 // avoid having to do so repeatedly there.
2771 for (unsigned Idx
= 0; Idx
< II
->getNumOperandBundles(); Idx
++) {
2772 OperandBundleUse OBU
= II
->getOperandBundleAt(Idx
);
2773 if (OBU
.getTagName() == "separate_storage") {
2774 assert(OBU
.Inputs
.size() == 2);
2775 auto MaybeSimplifyHint
= [&](const Use
&U
) {
2776 Value
*Hint
= U
.get();
2777 // Not having a limit is safe because InstCombine removes unreachable
2779 Value
*UnderlyingObject
= getUnderlyingObject(Hint
, /*MaxLookup*/ 0);
2780 if (Hint
!= UnderlyingObject
)
2781 replaceUse(const_cast<Use
&>(U
), UnderlyingObject
);
2783 MaybeSimplifyHint(OBU
.Inputs
[0]);
2784 MaybeSimplifyHint(OBU
.Inputs
[1]);
2788 // Convert nonnull assume like:
2789 // %A = icmp ne i32* %PTR, null
2790 // call void @llvm.assume(i1 %A)
2792 // call void @llvm.assume(i1 true) [ "nonnull"(i32* %PTR) ]
2793 if (EnableKnowledgeRetention
&&
2794 match(IIOperand
, m_Cmp(Pred
, m_Value(A
), m_Zero())) &&
2795 Pred
== CmpInst::ICMP_NE
&& A
->getType()->isPointerTy()) {
2796 if (auto *Replacement
= buildAssumeFromKnowledge(
2797 {RetainedKnowledge
{Attribute::NonNull
, 0, A
}}, Next
, &AC
, &DT
)) {
2799 Replacement
->insertBefore(Next
);
2800 AC
.registerAssumption(Replacement
);
2801 return RemoveConditionFromAssume(II
);
2805 // Convert alignment assume like:
2806 // %B = ptrtoint i32* %A to i64
2807 // %C = and i64 %B, Constant
2808 // %D = icmp eq i64 %C, 0
2809 // call void @llvm.assume(i1 %D)
2811 // call void @llvm.assume(i1 true) [ "align"(i32* [[A]], i64 Constant + 1)]
2813 if (EnableKnowledgeRetention
&&
2815 m_Cmp(Pred
, m_And(m_Value(A
), m_ConstantInt(AlignMask
)),
2817 Pred
== CmpInst::ICMP_EQ
) {
2818 if (isPowerOf2_64(AlignMask
+ 1)) {
2819 uint64_t Offset
= 0;
2820 match(A
, m_Add(m_Value(A
), m_ConstantInt(Offset
)));
2821 if (match(A
, m_PtrToInt(m_Value(A
)))) {
2822 /// Note: this doesn't preserve the offset information but merges
2823 /// offset and alignment.
2824 /// TODO: we can generate a GEP instead of merging the alignment with
2826 RetainedKnowledge RK
{Attribute::Alignment
,
2827 (unsigned)MinAlign(Offset
, AlignMask
+ 1), A
};
2828 if (auto *Replacement
=
2829 buildAssumeFromKnowledge(RK
, Next
, &AC
, &DT
)) {
2831 Replacement
->insertAfter(II
);
2832 AC
.registerAssumption(Replacement
);
2834 return RemoveConditionFromAssume(II
);
2839 /// Canonicalize Knowledge in operand bundles.
2840 if (EnableKnowledgeRetention
&& II
->hasOperandBundles()) {
2841 for (unsigned Idx
= 0; Idx
< II
->getNumOperandBundles(); Idx
++) {
2842 auto &BOI
= II
->bundle_op_info_begin()[Idx
];
2843 RetainedKnowledge RK
=
2844 llvm::getKnowledgeFromBundle(cast
<AssumeInst
>(*II
), BOI
);
2845 if (BOI
.End
- BOI
.Begin
> 2)
2846 continue; // Prevent reducing knowledge in an align with offset since
2847 // extracting a RetainedKnowledge from them looses offset
2849 RetainedKnowledge CanonRK
=
2850 llvm::simplifyRetainedKnowledge(cast
<AssumeInst
>(II
), RK
,
2851 &getAssumptionCache(),
2852 &getDominatorTree());
2856 if (BOI
.End
- BOI
.Begin
> 0) {
2857 Worklist
.pushValue(II
->op_begin()[BOI
.Begin
]);
2858 Value::dropDroppableUse(II
->op_begin()[BOI
.Begin
]);
2862 assert(RK
.AttrKind
== CanonRK
.AttrKind
);
2863 if (BOI
.End
- BOI
.Begin
> 0)
2864 II
->op_begin()[BOI
.Begin
].set(CanonRK
.WasOn
);
2865 if (BOI
.End
- BOI
.Begin
> 1)
2866 II
->op_begin()[BOI
.Begin
+ 1].set(ConstantInt::get(
2867 Type::getInt64Ty(II
->getContext()), CanonRK
.ArgValue
));
2869 Worklist
.pushValue(RK
.WasOn
);
2874 // If there is a dominating assume with the same condition as this one,
2875 // then this one is redundant, and should be removed.
2877 computeKnownBits(IIOperand
, Known
, 0, II
);
2878 if (Known
.isAllOnes() && isAssumeWithEmptyBundle(cast
<AssumeInst
>(*II
)))
2879 return eraseInstFromFunction(*II
);
2881 // assume(false) is unreachable.
2882 if (match(IIOperand
, m_CombineOr(m_Zero(), m_Undef()))) {
2883 CreateNonTerminatorUnreachable(II
);
2884 return eraseInstFromFunction(*II
);
2887 // Update the cache of affected values for this assumption (we might be
2888 // here because we just simplified the condition).
2889 AC
.updateAffectedValues(cast
<AssumeInst
>(II
));
2892 case Intrinsic::experimental_guard
: {
2893 // Is this guard followed by another guard? We scan forward over a small
2894 // fixed window of instructions to handle common cases with conditions
2895 // computed between guards.
2896 Instruction
*NextInst
= II
->getNextNonDebugInstruction();
2897 for (unsigned i
= 0; i
< GuardWideningWindow
; i
++) {
2898 // Note: Using context-free form to avoid compile time blow up
2899 if (!isSafeToSpeculativelyExecute(NextInst
))
2901 NextInst
= NextInst
->getNextNonDebugInstruction();
2903 Value
*NextCond
= nullptr;
2905 m_Intrinsic
<Intrinsic::experimental_guard
>(m_Value(NextCond
)))) {
2906 Value
*CurrCond
= II
->getArgOperand(0);
2908 // Remove a guard that it is immediately preceded by an identical guard.
2909 // Otherwise canonicalize guard(a); guard(b) -> guard(a & b).
2910 if (CurrCond
!= NextCond
) {
2911 Instruction
*MoveI
= II
->getNextNonDebugInstruction();
2912 while (MoveI
!= NextInst
) {
2914 MoveI
= MoveI
->getNextNonDebugInstruction();
2915 Temp
->moveBefore(II
);
2917 replaceOperand(*II
, 0, Builder
.CreateAnd(CurrCond
, NextCond
));
2919 eraseInstFromFunction(*NextInst
);
2924 case Intrinsic::vector_insert
: {
2925 Value
*Vec
= II
->getArgOperand(0);
2926 Value
*SubVec
= II
->getArgOperand(1);
2927 Value
*Idx
= II
->getArgOperand(2);
2928 auto *DstTy
= dyn_cast
<FixedVectorType
>(II
->getType());
2929 auto *VecTy
= dyn_cast
<FixedVectorType
>(Vec
->getType());
2930 auto *SubVecTy
= dyn_cast
<FixedVectorType
>(SubVec
->getType());
2932 // Only canonicalize if the destination vector, Vec, and SubVec are all
2934 if (DstTy
&& VecTy
&& SubVecTy
) {
2935 unsigned DstNumElts
= DstTy
->getNumElements();
2936 unsigned VecNumElts
= VecTy
->getNumElements();
2937 unsigned SubVecNumElts
= SubVecTy
->getNumElements();
2938 unsigned IdxN
= cast
<ConstantInt
>(Idx
)->getZExtValue();
2940 // An insert that entirely overwrites Vec with SubVec is a nop.
2941 if (VecNumElts
== SubVecNumElts
)
2942 return replaceInstUsesWith(CI
, SubVec
);
2944 // Widen SubVec into a vector of the same width as Vec, since
2945 // shufflevector requires the two input vectors to be the same width.
2946 // Elements beyond the bounds of SubVec within the widened vector are
2948 SmallVector
<int, 8> WidenMask
;
2950 for (i
= 0; i
!= SubVecNumElts
; ++i
)
2951 WidenMask
.push_back(i
);
2952 for (; i
!= VecNumElts
; ++i
)
2953 WidenMask
.push_back(PoisonMaskElem
);
2955 Value
*WidenShuffle
= Builder
.CreateShuffleVector(SubVec
, WidenMask
);
2957 SmallVector
<int, 8> Mask
;
2958 for (unsigned i
= 0; i
!= IdxN
; ++i
)
2960 for (unsigned i
= DstNumElts
; i
!= DstNumElts
+ SubVecNumElts
; ++i
)
2962 for (unsigned i
= IdxN
+ SubVecNumElts
; i
!= DstNumElts
; ++i
)
2965 Value
*Shuffle
= Builder
.CreateShuffleVector(Vec
, WidenShuffle
, Mask
);
2966 return replaceInstUsesWith(CI
, Shuffle
);
2970 case Intrinsic::vector_extract
: {
2971 Value
*Vec
= II
->getArgOperand(0);
2972 Value
*Idx
= II
->getArgOperand(1);
2974 Type
*ReturnType
= II
->getType();
2975 // (extract_vector (insert_vector InsertTuple, InsertValue, InsertIdx),
2977 unsigned ExtractIdx
= cast
<ConstantInt
>(Idx
)->getZExtValue();
2978 Value
*InsertTuple
, *InsertIdx
, *InsertValue
;
2979 if (match(Vec
, m_Intrinsic
<Intrinsic::vector_insert
>(m_Value(InsertTuple
),
2980 m_Value(InsertValue
),
2981 m_Value(InsertIdx
))) &&
2982 InsertValue
->getType() == ReturnType
) {
2983 unsigned Index
= cast
<ConstantInt
>(InsertIdx
)->getZExtValue();
2984 // Case where we get the same index right after setting it.
2985 // extract.vector(insert.vector(InsertTuple, InsertValue, Idx), Idx) -->
2987 if (ExtractIdx
== Index
)
2988 return replaceInstUsesWith(CI
, InsertValue
);
2989 // If we are getting a different index than what was set in the
2990 // insert.vector intrinsic. We can just set the input tuple to the one up
2991 // in the chain. extract.vector(insert.vector(InsertTuple, InsertValue,
2992 // InsertIndex), ExtractIndex)
2993 // --> extract.vector(InsertTuple, ExtractIndex)
2995 return replaceOperand(CI
, 0, InsertTuple
);
2998 auto *DstTy
= dyn_cast
<VectorType
>(ReturnType
);
2999 auto *VecTy
= dyn_cast
<VectorType
>(Vec
->getType());
3001 if (DstTy
&& VecTy
) {
3002 auto DstEltCnt
= DstTy
->getElementCount();
3003 auto VecEltCnt
= VecTy
->getElementCount();
3004 unsigned IdxN
= cast
<ConstantInt
>(Idx
)->getZExtValue();
3006 // Extracting the entirety of Vec is a nop.
3007 if (DstEltCnt
== VecTy
->getElementCount()) {
3008 replaceInstUsesWith(CI
, Vec
);
3009 return eraseInstFromFunction(CI
);
3012 // Only canonicalize to shufflevector if the destination vector and
3013 // Vec are fixed vectors.
3014 if (VecEltCnt
.isScalable() || DstEltCnt
.isScalable())
3017 SmallVector
<int, 8> Mask
;
3018 for (unsigned i
= 0; i
!= DstEltCnt
.getKnownMinValue(); ++i
)
3019 Mask
.push_back(IdxN
+ i
);
3021 Value
*Shuffle
= Builder
.CreateShuffleVector(Vec
, Mask
);
3022 return replaceInstUsesWith(CI
, Shuffle
);
3026 case Intrinsic::experimental_vector_reverse
: {
3027 Value
*BO0
, *BO1
, *X
, *Y
;
3028 Value
*Vec
= II
->getArgOperand(0);
3029 if (match(Vec
, m_OneUse(m_BinOp(m_Value(BO0
), m_Value(BO1
))))) {
3030 auto *OldBinOp
= cast
<BinaryOperator
>(Vec
);
3031 if (match(BO0
, m_VecReverse(m_Value(X
)))) {
3032 // rev(binop rev(X), rev(Y)) --> binop X, Y
3033 if (match(BO1
, m_VecReverse(m_Value(Y
))))
3034 return replaceInstUsesWith(CI
,
3035 BinaryOperator::CreateWithCopiedFlags(
3036 OldBinOp
->getOpcode(), X
, Y
, OldBinOp
,
3037 OldBinOp
->getName(), II
));
3038 // rev(binop rev(X), BO1Splat) --> binop X, BO1Splat
3039 if (isSplatValue(BO1
))
3040 return replaceInstUsesWith(CI
,
3041 BinaryOperator::CreateWithCopiedFlags(
3042 OldBinOp
->getOpcode(), X
, BO1
,
3043 OldBinOp
, OldBinOp
->getName(), II
));
3045 // rev(binop BO0Splat, rev(Y)) --> binop BO0Splat, Y
3046 if (match(BO1
, m_VecReverse(m_Value(Y
))) && isSplatValue(BO0
))
3047 return replaceInstUsesWith(CI
, BinaryOperator::CreateWithCopiedFlags(
3048 OldBinOp
->getOpcode(), BO0
, Y
,
3049 OldBinOp
, OldBinOp
->getName(), II
));
3051 // rev(unop rev(X)) --> unop X
3052 if (match(Vec
, m_OneUse(m_UnOp(m_VecReverse(m_Value(X
)))))) {
3053 auto *OldUnOp
= cast
<UnaryOperator
>(Vec
);
3054 auto *NewUnOp
= UnaryOperator::CreateWithCopiedFlags(
3055 OldUnOp
->getOpcode(), X
, OldUnOp
, OldUnOp
->getName(), II
);
3056 return replaceInstUsesWith(CI
, NewUnOp
);
3060 case Intrinsic::vector_reduce_or
:
3061 case Intrinsic::vector_reduce_and
: {
3062 // Canonicalize logical or/and reductions:
3063 // Or reduction for i1 is represented as:
3064 // %val = bitcast <ReduxWidth x i1> to iReduxWidth
3065 // %res = cmp ne iReduxWidth %val, 0
3066 // And reduction for i1 is represented as:
3067 // %val = bitcast <ReduxWidth x i1> to iReduxWidth
3068 // %res = cmp eq iReduxWidth %val, 11111
3069 Value
*Arg
= II
->getArgOperand(0);
3071 if (match(Arg
, m_ZExtOrSExtOrSelf(m_Value(Vect
)))) {
3072 if (auto *FTy
= dyn_cast
<FixedVectorType
>(Vect
->getType()))
3073 if (FTy
->getElementType() == Builder
.getInt1Ty()) {
3074 Value
*Res
= Builder
.CreateBitCast(
3075 Vect
, Builder
.getIntNTy(FTy
->getNumElements()));
3076 if (IID
== Intrinsic::vector_reduce_and
) {
3077 Res
= Builder
.CreateICmpEQ(
3078 Res
, ConstantInt::getAllOnesValue(Res
->getType()));
3080 assert(IID
== Intrinsic::vector_reduce_or
&&
3081 "Expected or reduction.");
3082 Res
= Builder
.CreateIsNotNull(Res
);
3085 Res
= Builder
.CreateCast(cast
<CastInst
>(Arg
)->getOpcode(), Res
,
3087 return replaceInstUsesWith(CI
, Res
);
3092 case Intrinsic::vector_reduce_add
: {
3093 if (IID
== Intrinsic::vector_reduce_add
) {
3094 // Convert vector_reduce_add(ZExt(<n x i1>)) to
3095 // ZExtOrTrunc(ctpop(bitcast <n x i1> to in)).
3096 // Convert vector_reduce_add(SExt(<n x i1>)) to
3097 // -ZExtOrTrunc(ctpop(bitcast <n x i1> to in)).
3098 // Convert vector_reduce_add(<n x i1>) to
3099 // Trunc(ctpop(bitcast <n x i1> to in)).
3100 Value
*Arg
= II
->getArgOperand(0);
3102 if (match(Arg
, m_ZExtOrSExtOrSelf(m_Value(Vect
)))) {
3103 if (auto *FTy
= dyn_cast
<FixedVectorType
>(Vect
->getType()))
3104 if (FTy
->getElementType() == Builder
.getInt1Ty()) {
3105 Value
*V
= Builder
.CreateBitCast(
3106 Vect
, Builder
.getIntNTy(FTy
->getNumElements()));
3107 Value
*Res
= Builder
.CreateUnaryIntrinsic(Intrinsic::ctpop
, V
);
3108 if (Res
->getType() != II
->getType())
3109 Res
= Builder
.CreateZExtOrTrunc(Res
, II
->getType());
3111 cast
<Instruction
>(Arg
)->getOpcode() == Instruction::SExt
)
3112 Res
= Builder
.CreateNeg(Res
);
3113 return replaceInstUsesWith(CI
, Res
);
3119 case Intrinsic::vector_reduce_xor
: {
3120 if (IID
== Intrinsic::vector_reduce_xor
) {
3121 // Exclusive disjunction reduction over the vector with
3122 // (potentially-extended) i1 element type is actually a
3123 // (potentially-extended) arithmetic `add` reduction over the original
3124 // non-extended value:
3125 // vector_reduce_xor(?ext(<n x i1>))
3127 // ?ext(vector_reduce_add(<n x i1>))
3128 Value
*Arg
= II
->getArgOperand(0);
3130 if (match(Arg
, m_ZExtOrSExtOrSelf(m_Value(Vect
)))) {
3131 if (auto *FTy
= dyn_cast
<FixedVectorType
>(Vect
->getType()))
3132 if (FTy
->getElementType() == Builder
.getInt1Ty()) {
3133 Value
*Res
= Builder
.CreateAddReduce(Vect
);
3135 Res
= Builder
.CreateCast(cast
<CastInst
>(Arg
)->getOpcode(), Res
,
3137 return replaceInstUsesWith(CI
, Res
);
3143 case Intrinsic::vector_reduce_mul
: {
3144 if (IID
== Intrinsic::vector_reduce_mul
) {
3145 // Multiplicative reduction over the vector with (potentially-extended)
3146 // i1 element type is actually a (potentially zero-extended)
3147 // logical `and` reduction over the original non-extended value:
3148 // vector_reduce_mul(?ext(<n x i1>))
3150 // zext(vector_reduce_and(<n x i1>))
3151 Value
*Arg
= II
->getArgOperand(0);
3153 if (match(Arg
, m_ZExtOrSExtOrSelf(m_Value(Vect
)))) {
3154 if (auto *FTy
= dyn_cast
<FixedVectorType
>(Vect
->getType()))
3155 if (FTy
->getElementType() == Builder
.getInt1Ty()) {
3156 Value
*Res
= Builder
.CreateAndReduce(Vect
);
3157 if (Res
->getType() != II
->getType())
3158 Res
= Builder
.CreateZExt(Res
, II
->getType());
3159 return replaceInstUsesWith(CI
, Res
);
3165 case Intrinsic::vector_reduce_umin
:
3166 case Intrinsic::vector_reduce_umax
: {
3167 if (IID
== Intrinsic::vector_reduce_umin
||
3168 IID
== Intrinsic::vector_reduce_umax
) {
3169 // UMin/UMax reduction over the vector with (potentially-extended)
3170 // i1 element type is actually a (potentially-extended)
3171 // logical `and`/`or` reduction over the original non-extended value:
3172 // vector_reduce_u{min,max}(?ext(<n x i1>))
3174 // ?ext(vector_reduce_{and,or}(<n x i1>))
3175 Value
*Arg
= II
->getArgOperand(0);
3177 if (match(Arg
, m_ZExtOrSExtOrSelf(m_Value(Vect
)))) {
3178 if (auto *FTy
= dyn_cast
<FixedVectorType
>(Vect
->getType()))
3179 if (FTy
->getElementType() == Builder
.getInt1Ty()) {
3180 Value
*Res
= IID
== Intrinsic::vector_reduce_umin
3181 ? Builder
.CreateAndReduce(Vect
)
3182 : Builder
.CreateOrReduce(Vect
);
3184 Res
= Builder
.CreateCast(cast
<CastInst
>(Arg
)->getOpcode(), Res
,
3186 return replaceInstUsesWith(CI
, Res
);
3192 case Intrinsic::vector_reduce_smin
:
3193 case Intrinsic::vector_reduce_smax
: {
3194 if (IID
== Intrinsic::vector_reduce_smin
||
3195 IID
== Intrinsic::vector_reduce_smax
) {
3196 // SMin/SMax reduction over the vector with (potentially-extended)
3197 // i1 element type is actually a (potentially-extended)
3198 // logical `and`/`or` reduction over the original non-extended value:
3199 // vector_reduce_s{min,max}(<n x i1>)
3201 // vector_reduce_{or,and}(<n x i1>)
3203 // vector_reduce_s{min,max}(sext(<n x i1>))
3205 // sext(vector_reduce_{or,and}(<n x i1>))
3207 // vector_reduce_s{min,max}(zext(<n x i1>))
3209 // zext(vector_reduce_{and,or}(<n x i1>))
3210 Value
*Arg
= II
->getArgOperand(0);
3212 if (match(Arg
, m_ZExtOrSExtOrSelf(m_Value(Vect
)))) {
3213 if (auto *FTy
= dyn_cast
<FixedVectorType
>(Vect
->getType()))
3214 if (FTy
->getElementType() == Builder
.getInt1Ty()) {
3215 Instruction::CastOps ExtOpc
= Instruction::CastOps::CastOpsEnd
;
3217 ExtOpc
= cast
<CastInst
>(Arg
)->getOpcode();
3218 Value
*Res
= ((IID
== Intrinsic::vector_reduce_smin
) ==
3219 (ExtOpc
== Instruction::CastOps::ZExt
))
3220 ? Builder
.CreateAndReduce(Vect
)
3221 : Builder
.CreateOrReduce(Vect
);
3223 Res
= Builder
.CreateCast(ExtOpc
, Res
, II
->getType());
3224 return replaceInstUsesWith(CI
, Res
);
3230 case Intrinsic::vector_reduce_fmax
:
3231 case Intrinsic::vector_reduce_fmin
:
3232 case Intrinsic::vector_reduce_fadd
:
3233 case Intrinsic::vector_reduce_fmul
: {
3234 bool CanBeReassociated
= (IID
!= Intrinsic::vector_reduce_fadd
&&
3235 IID
!= Intrinsic::vector_reduce_fmul
) ||
3236 II
->hasAllowReassoc();
3237 const unsigned ArgIdx
= (IID
== Intrinsic::vector_reduce_fadd
||
3238 IID
== Intrinsic::vector_reduce_fmul
)
3241 Value
*Arg
= II
->getArgOperand(ArgIdx
);
3244 if (!isa
<FixedVectorType
>(Arg
->getType()) || !CanBeReassociated
||
3245 !match(Arg
, m_Shuffle(m_Value(V
), m_Undef(), m_Mask(Mask
))) ||
3246 !cast
<ShuffleVectorInst
>(Arg
)->isSingleSource())
3248 int Sz
= Mask
.size();
3249 SmallBitVector
UsedIndices(Sz
);
3250 for (int Idx
: Mask
) {
3251 if (Idx
== PoisonMaskElem
|| UsedIndices
.test(Idx
))
3253 UsedIndices
.set(Idx
);
3255 // Can remove shuffle iff just shuffled elements, no repeats, undefs, or
3257 if (UsedIndices
.all()) {
3258 replaceUse(II
->getOperandUse(ArgIdx
), V
);
3263 case Intrinsic::is_fpclass
: {
3264 if (Instruction
*I
= foldIntrinsicIsFPClass(*II
))
3269 // Handle target specific intrinsics
3270 std::optional
<Instruction
*> V
= targetInstCombineIntrinsic(*II
);
3277 // Try to fold intrinsic into select operands. This is legal if:
3278 // * The intrinsic is speculatable.
3279 // * The select condition is not a vector, or the intrinsic does not
3280 // perform cross-lane operations.
3282 case Intrinsic::ctlz
:
3283 case Intrinsic::cttz
:
3284 case Intrinsic::ctpop
:
3285 case Intrinsic::umin
:
3286 case Intrinsic::umax
:
3287 case Intrinsic::smin
:
3288 case Intrinsic::smax
:
3289 case Intrinsic::usub_sat
:
3290 case Intrinsic::uadd_sat
:
3291 case Intrinsic::ssub_sat
:
3292 case Intrinsic::sadd_sat
:
3293 for (Value
*Op
: II
->args())
3294 if (auto *Sel
= dyn_cast
<SelectInst
>(Op
))
3295 if (Instruction
*R
= FoldOpIntoSelect(*II
, Sel
))
3302 if (Instruction
*Shuf
= foldShuffledIntrinsicOperands(II
, Builder
))
3305 // Some intrinsics (like experimental_gc_statepoint) can be used in invoke
3306 // context, so it is handled in visitCallBase and we should trigger it.
3307 return visitCallBase(*II
);
3310 // Fence instruction simplification
3311 Instruction
*InstCombinerImpl::visitFenceInst(FenceInst
&FI
) {
3312 auto *NFI
= dyn_cast
<FenceInst
>(FI
.getNextNonDebugInstruction());
3313 // This check is solely here to handle arbitrary target-dependent syncscopes.
3314 // TODO: Can remove if does not matter in practice.
3315 if (NFI
&& FI
.isIdenticalTo(NFI
))
3316 return eraseInstFromFunction(FI
);
3318 // Returns true if FI1 is identical or stronger fence than FI2.
3319 auto isIdenticalOrStrongerFence
= [](FenceInst
*FI1
, FenceInst
*FI2
) {
3320 auto FI1SyncScope
= FI1
->getSyncScopeID();
3321 // Consider same scope, where scope is global or single-thread.
3322 if (FI1SyncScope
!= FI2
->getSyncScopeID() ||
3323 (FI1SyncScope
!= SyncScope::System
&&
3324 FI1SyncScope
!= SyncScope::SingleThread
))
3327 return isAtLeastOrStrongerThan(FI1
->getOrdering(), FI2
->getOrdering());
3329 if (NFI
&& isIdenticalOrStrongerFence(NFI
, &FI
))
3330 return eraseInstFromFunction(FI
);
3332 if (auto *PFI
= dyn_cast_or_null
<FenceInst
>(FI
.getPrevNonDebugInstruction()))
3333 if (isIdenticalOrStrongerFence(PFI
, &FI
))
3334 return eraseInstFromFunction(FI
);
3338 // InvokeInst simplification
3339 Instruction
*InstCombinerImpl::visitInvokeInst(InvokeInst
&II
) {
3340 return visitCallBase(II
);
3343 // CallBrInst simplification
3344 Instruction
*InstCombinerImpl::visitCallBrInst(CallBrInst
&CBI
) {
3345 return visitCallBase(CBI
);
3348 Instruction
*InstCombinerImpl::tryOptimizeCall(CallInst
*CI
) {
3349 if (!CI
->getCalledFunction()) return nullptr;
3351 // Skip optimizing notail and musttail calls so
3352 // LibCallSimplifier::optimizeCall doesn't have to preserve those invariants.
3353 // LibCallSimplifier::optimizeCall should try to preseve tail calls though.
3354 if (CI
->isMustTailCall() || CI
->isNoTailCall())
3357 auto InstCombineRAUW
= [this](Instruction
*From
, Value
*With
) {
3358 replaceInstUsesWith(*From
, With
);
3360 auto InstCombineErase
= [this](Instruction
*I
) {
3361 eraseInstFromFunction(*I
);
3363 LibCallSimplifier
Simplifier(DL
, &TLI
, &AC
, ORE
, BFI
, PSI
, InstCombineRAUW
,
3365 if (Value
*With
= Simplifier
.optimizeCall(CI
, Builder
)) {
3367 return CI
->use_empty() ? CI
: replaceInstUsesWith(*CI
, With
);
3373 static IntrinsicInst
*findInitTrampolineFromAlloca(Value
*TrampMem
) {
3374 // Strip off at most one level of pointer casts, looking for an alloca. This
3375 // is good enough in practice and simpler than handling any number of casts.
3376 Value
*Underlying
= TrampMem
->stripPointerCasts();
3377 if (Underlying
!= TrampMem
&&
3378 (!Underlying
->hasOneUse() || Underlying
->user_back() != TrampMem
))
3380 if (!isa
<AllocaInst
>(Underlying
))
3383 IntrinsicInst
*InitTrampoline
= nullptr;
3384 for (User
*U
: TrampMem
->users()) {
3385 IntrinsicInst
*II
= dyn_cast
<IntrinsicInst
>(U
);
3388 if (II
->getIntrinsicID() == Intrinsic::init_trampoline
) {
3390 // More than one init_trampoline writes to this value. Give up.
3392 InitTrampoline
= II
;
3395 if (II
->getIntrinsicID() == Intrinsic::adjust_trampoline
)
3396 // Allow any number of calls to adjust.trampoline.
3401 // No call to init.trampoline found.
3402 if (!InitTrampoline
)
3405 // Check that the alloca is being used in the expected way.
3406 if (InitTrampoline
->getOperand(0) != TrampMem
)
3409 return InitTrampoline
;
3412 static IntrinsicInst
*findInitTrampolineFromBB(IntrinsicInst
*AdjustTramp
,
3414 // Visit all the previous instructions in the basic block, and try to find a
3415 // init.trampoline which has a direct path to the adjust.trampoline.
3416 for (BasicBlock::iterator I
= AdjustTramp
->getIterator(),
3417 E
= AdjustTramp
->getParent()->begin();
3419 Instruction
*Inst
= &*--I
;
3420 if (IntrinsicInst
*II
= dyn_cast
<IntrinsicInst
>(I
))
3421 if (II
->getIntrinsicID() == Intrinsic::init_trampoline
&&
3422 II
->getOperand(0) == TrampMem
)
3424 if (Inst
->mayWriteToMemory())
3430 // Given a call to llvm.adjust.trampoline, find and return the corresponding
3431 // call to llvm.init.trampoline if the call to the trampoline can be optimized
3432 // to a direct call to a function. Otherwise return NULL.
3433 static IntrinsicInst
*findInitTrampoline(Value
*Callee
) {
3434 Callee
= Callee
->stripPointerCasts();
3435 IntrinsicInst
*AdjustTramp
= dyn_cast
<IntrinsicInst
>(Callee
);
3437 AdjustTramp
->getIntrinsicID() != Intrinsic::adjust_trampoline
)
3440 Value
*TrampMem
= AdjustTramp
->getOperand(0);
3442 if (IntrinsicInst
*IT
= findInitTrampolineFromAlloca(TrampMem
))
3444 if (IntrinsicInst
*IT
= findInitTrampolineFromBB(AdjustTramp
, TrampMem
))
3449 bool InstCombinerImpl::annotateAnyAllocSite(CallBase
&Call
,
3450 const TargetLibraryInfo
*TLI
) {
3451 // Note: We only handle cases which can't be driven from generic attributes
3452 // here. So, for example, nonnull and noalias (which are common properties
3453 // of some allocation functions) are expected to be handled via annotation
3454 // of the respective allocator declaration with generic attributes.
3455 bool Changed
= false;
3457 if (!Call
.getType()->isPointerTy())
3460 std::optional
<APInt
> Size
= getAllocSize(&Call
, TLI
);
3461 if (Size
&& *Size
!= 0) {
3462 // TODO: We really should just emit deref_or_null here and then
3463 // let the generic inference code combine that with nonnull.
3464 if (Call
.hasRetAttr(Attribute::NonNull
)) {
3465 Changed
= !Call
.hasRetAttr(Attribute::Dereferenceable
);
3466 Call
.addRetAttr(Attribute::getWithDereferenceableBytes(
3467 Call
.getContext(), Size
->getLimitedValue()));
3469 Changed
= !Call
.hasRetAttr(Attribute::DereferenceableOrNull
);
3470 Call
.addRetAttr(Attribute::getWithDereferenceableOrNullBytes(
3471 Call
.getContext(), Size
->getLimitedValue()));
3475 // Add alignment attribute if alignment is a power of two constant.
3476 Value
*Alignment
= getAllocAlignment(&Call
, TLI
);
3480 ConstantInt
*AlignOpC
= dyn_cast
<ConstantInt
>(Alignment
);
3481 if (AlignOpC
&& AlignOpC
->getValue().ult(llvm::Value::MaximumAlignment
)) {
3482 uint64_t AlignmentVal
= AlignOpC
->getZExtValue();
3483 if (llvm::isPowerOf2_64(AlignmentVal
)) {
3484 Align ExistingAlign
= Call
.getRetAlign().valueOrOne();
3485 Align NewAlign
= Align(AlignmentVal
);
3486 if (NewAlign
> ExistingAlign
) {
3488 Attribute::getWithAlignment(Call
.getContext(), NewAlign
));
3496 /// Improvements for call, callbr and invoke instructions.
3497 Instruction
*InstCombinerImpl::visitCallBase(CallBase
&Call
) {
3498 bool Changed
= annotateAnyAllocSite(Call
, &TLI
);
3500 // Mark any parameters that are known to be non-null with the nonnull
3501 // attribute. This is helpful for inlining calls to functions with null
3502 // checks on their arguments.
3503 SmallVector
<unsigned, 4> ArgNos
;
3506 for (Value
*V
: Call
.args()) {
3507 if (V
->getType()->isPointerTy() &&
3508 !Call
.paramHasAttr(ArgNo
, Attribute::NonNull
) &&
3509 isKnownNonZero(V
, DL
, 0, &AC
, &Call
, &DT
))
3510 ArgNos
.push_back(ArgNo
);
3514 assert(ArgNo
== Call
.arg_size() && "Call arguments not processed correctly.");
3516 if (!ArgNos
.empty()) {
3517 AttributeList AS
= Call
.getAttributes();
3518 LLVMContext
&Ctx
= Call
.getContext();
3519 AS
= AS
.addParamAttribute(Ctx
, ArgNos
,
3520 Attribute::get(Ctx
, Attribute::NonNull
));
3521 Call
.setAttributes(AS
);
3525 // If the callee is a pointer to a function, attempt to move any casts to the
3526 // arguments of the call/callbr/invoke.
3527 Value
*Callee
= Call
.getCalledOperand();
3528 Function
*CalleeF
= dyn_cast
<Function
>(Callee
);
3529 if ((!CalleeF
|| CalleeF
->getFunctionType() != Call
.getFunctionType()) &&
3530 transformConstExprCastCall(Call
))
3534 // Remove the convergent attr on calls when the callee is not convergent.
3535 if (Call
.isConvergent() && !CalleeF
->isConvergent() &&
3536 !CalleeF
->isIntrinsic()) {
3537 LLVM_DEBUG(dbgs() << "Removing convergent attr from instr " << Call
3539 Call
.setNotConvergent();
3543 // If the call and callee calling conventions don't match, and neither one
3544 // of the calling conventions is compatible with C calling convention
3545 // this call must be unreachable, as the call is undefined.
3546 if ((CalleeF
->getCallingConv() != Call
.getCallingConv() &&
3547 !(CalleeF
->getCallingConv() == llvm::CallingConv::C
&&
3548 TargetLibraryInfoImpl::isCallingConvCCompatible(&Call
)) &&
3549 !(Call
.getCallingConv() == llvm::CallingConv::C
&&
3550 TargetLibraryInfoImpl::isCallingConvCCompatible(CalleeF
))) &&
3551 // Only do this for calls to a function with a body. A prototype may
3552 // not actually end up matching the implementation's calling conv for a
3553 // variety of reasons (e.g. it may be written in assembly).
3554 !CalleeF
->isDeclaration()) {
3555 Instruction
*OldCall
= &Call
;
3556 CreateNonTerminatorUnreachable(OldCall
);
3557 // If OldCall does not return void then replaceInstUsesWith poison.
3558 // This allows ValueHandlers and custom metadata to adjust itself.
3559 if (!OldCall
->getType()->isVoidTy())
3560 replaceInstUsesWith(*OldCall
, PoisonValue::get(OldCall
->getType()));
3561 if (isa
<CallInst
>(OldCall
))
3562 return eraseInstFromFunction(*OldCall
);
3564 // We cannot remove an invoke or a callbr, because it would change thexi
3565 // CFG, just change the callee to a null pointer.
3566 cast
<CallBase
>(OldCall
)->setCalledFunction(
3567 CalleeF
->getFunctionType(),
3568 Constant::getNullValue(CalleeF
->getType()));
3573 // Calling a null function pointer is undefined if a null address isn't
3575 if ((isa
<ConstantPointerNull
>(Callee
) &&
3576 !NullPointerIsDefined(Call
.getFunction())) ||
3577 isa
<UndefValue
>(Callee
)) {
3578 // If Call does not return void then replaceInstUsesWith poison.
3579 // This allows ValueHandlers and custom metadata to adjust itself.
3580 if (!Call
.getType()->isVoidTy())
3581 replaceInstUsesWith(Call
, PoisonValue::get(Call
.getType()));
3583 if (Call
.isTerminator()) {
3584 // Can't remove an invoke or callbr because we cannot change the CFG.
3588 // This instruction is not reachable, just remove it.
3589 CreateNonTerminatorUnreachable(&Call
);
3590 return eraseInstFromFunction(Call
);
3593 if (IntrinsicInst
*II
= findInitTrampoline(Callee
))
3594 return transformCallThroughTrampoline(Call
, *II
);
3596 if (isa
<InlineAsm
>(Callee
) && !Call
.doesNotThrow()) {
3597 InlineAsm
*IA
= cast
<InlineAsm
>(Callee
);
3598 if (!IA
->canThrow()) {
3599 // Normal inline asm calls cannot throw - mark them
3601 Call
.setDoesNotThrow();
3606 // Try to optimize the call if possible, we require DataLayout for most of
3607 // this. None of these calls are seen as possibly dead so go ahead and
3608 // delete the instruction now.
3609 if (CallInst
*CI
= dyn_cast
<CallInst
>(&Call
)) {
3610 Instruction
*I
= tryOptimizeCall(CI
);
3611 // If we changed something return the result, etc. Otherwise let
3612 // the fallthrough check.
3613 if (I
) return eraseInstFromFunction(*I
);
3616 if (!Call
.use_empty() && !Call
.isMustTailCall())
3617 if (Value
*ReturnedArg
= Call
.getReturnedArgOperand()) {
3618 Type
*CallTy
= Call
.getType();
3619 Type
*RetArgTy
= ReturnedArg
->getType();
3620 if (RetArgTy
->canLosslesslyBitCastTo(CallTy
))
3621 return replaceInstUsesWith(
3622 Call
, Builder
.CreateBitOrPointerCast(ReturnedArg
, CallTy
));
3625 // Drop unnecessary kcfi operand bundles from calls that were converted
3626 // into direct calls.
3627 auto Bundle
= Call
.getOperandBundle(LLVMContext::OB_kcfi
);
3628 if (Bundle
&& !Call
.isIndirectCall()) {
3629 DEBUG_WITH_TYPE(DEBUG_TYPE
"-kcfi", {
3631 ConstantInt
*FunctionType
= nullptr;
3632 ConstantInt
*ExpectedType
= cast
<ConstantInt
>(Bundle
->Inputs
[0]);
3634 if (MDNode
*MD
= CalleeF
->getMetadata(LLVMContext::MD_kcfi_type
))
3635 FunctionType
= mdconst::extract
<ConstantInt
>(MD
->getOperand(0));
3638 FunctionType
->getZExtValue() != ExpectedType
->getZExtValue())
3639 dbgs() << Call
.getModule()->getName()
3640 << ": warning: kcfi: " << Call
.getCaller()->getName()
3641 << ": call to " << CalleeF
->getName()
3642 << " using a mismatching function pointer type\n";
3646 return CallBase::removeOperandBundle(&Call
, LLVMContext::OB_kcfi
);
3649 if (isRemovableAlloc(&Call
, &TLI
))
3650 return visitAllocSite(Call
);
3652 // Handle intrinsics which can be used in both call and invoke context.
3653 switch (Call
.getIntrinsicID()) {
3654 case Intrinsic::experimental_gc_statepoint
: {
3655 GCStatepointInst
&GCSP
= *cast
<GCStatepointInst
>(&Call
);
3656 SmallPtrSet
<Value
*, 32> LiveGcValues
;
3657 for (const GCRelocateInst
*Reloc
: GCSP
.getGCRelocates()) {
3658 GCRelocateInst
&GCR
= *const_cast<GCRelocateInst
*>(Reloc
);
3660 // Remove the relocation if unused.
3661 if (GCR
.use_empty()) {
3662 eraseInstFromFunction(GCR
);
3666 Value
*DerivedPtr
= GCR
.getDerivedPtr();
3667 Value
*BasePtr
= GCR
.getBasePtr();
3669 // Undef is undef, even after relocation.
3670 if (isa
<UndefValue
>(DerivedPtr
) || isa
<UndefValue
>(BasePtr
)) {
3671 replaceInstUsesWith(GCR
, UndefValue::get(GCR
.getType()));
3672 eraseInstFromFunction(GCR
);
3676 if (auto *PT
= dyn_cast
<PointerType
>(GCR
.getType())) {
3677 // The relocation of null will be null for most any collector.
3678 // TODO: provide a hook for this in GCStrategy. There might be some
3679 // weird collector this property does not hold for.
3680 if (isa
<ConstantPointerNull
>(DerivedPtr
)) {
3681 // Use null-pointer of gc_relocate's type to replace it.
3682 replaceInstUsesWith(GCR
, ConstantPointerNull::get(PT
));
3683 eraseInstFromFunction(GCR
);
3687 // isKnownNonNull -> nonnull attribute
3688 if (!GCR
.hasRetAttr(Attribute::NonNull
) &&
3689 isKnownNonZero(DerivedPtr
, DL
, 0, &AC
, &Call
, &DT
)) {
3690 GCR
.addRetAttr(Attribute::NonNull
);
3691 // We discovered new fact, re-check users.
3692 Worklist
.pushUsersToWorkList(GCR
);
3696 // If we have two copies of the same pointer in the statepoint argument
3697 // list, canonicalize to one. This may let us common gc.relocates.
3698 if (GCR
.getBasePtr() == GCR
.getDerivedPtr() &&
3699 GCR
.getBasePtrIndex() != GCR
.getDerivedPtrIndex()) {
3700 auto *OpIntTy
= GCR
.getOperand(2)->getType();
3701 GCR
.setOperand(2, ConstantInt::get(OpIntTy
, GCR
.getBasePtrIndex()));
3704 // TODO: bitcast(relocate(p)) -> relocate(bitcast(p))
3705 // Canonicalize on the type from the uses to the defs
3707 // TODO: relocate((gep p, C, C2, ...)) -> gep(relocate(p), C, C2, ...)
3708 LiveGcValues
.insert(BasePtr
);
3709 LiveGcValues
.insert(DerivedPtr
);
3711 std::optional
<OperandBundleUse
> Bundle
=
3712 GCSP
.getOperandBundle(LLVMContext::OB_gc_live
);
3713 unsigned NumOfGCLives
= LiveGcValues
.size();
3714 if (!Bundle
|| NumOfGCLives
== Bundle
->Inputs
.size())
3716 // We can reduce the size of gc live bundle.
3717 DenseMap
<Value
*, unsigned> Val2Idx
;
3718 std::vector
<Value
*> NewLiveGc
;
3719 for (Value
*V
: Bundle
->Inputs
) {
3720 if (Val2Idx
.count(V
))
3722 if (LiveGcValues
.count(V
)) {
3723 Val2Idx
[V
] = NewLiveGc
.size();
3724 NewLiveGc
.push_back(V
);
3726 Val2Idx
[V
] = NumOfGCLives
;
3728 // Update all gc.relocates
3729 for (const GCRelocateInst
*Reloc
: GCSP
.getGCRelocates()) {
3730 GCRelocateInst
&GCR
= *const_cast<GCRelocateInst
*>(Reloc
);
3731 Value
*BasePtr
= GCR
.getBasePtr();
3732 assert(Val2Idx
.count(BasePtr
) && Val2Idx
[BasePtr
] != NumOfGCLives
&&
3733 "Missed live gc for base pointer");
3734 auto *OpIntTy1
= GCR
.getOperand(1)->getType();
3735 GCR
.setOperand(1, ConstantInt::get(OpIntTy1
, Val2Idx
[BasePtr
]));
3736 Value
*DerivedPtr
= GCR
.getDerivedPtr();
3737 assert(Val2Idx
.count(DerivedPtr
) && Val2Idx
[DerivedPtr
] != NumOfGCLives
&&
3738 "Missed live gc for derived pointer");
3739 auto *OpIntTy2
= GCR
.getOperand(2)->getType();
3740 GCR
.setOperand(2, ConstantInt::get(OpIntTy2
, Val2Idx
[DerivedPtr
]));
3742 // Create new statepoint instruction.
3743 OperandBundleDef
NewBundle("gc-live", NewLiveGc
);
3744 return CallBase::Create(&Call
, NewBundle
);
3749 return Changed
? &Call
: nullptr;
3752 /// If the callee is a constexpr cast of a function, attempt to move the cast to
3753 /// the arguments of the call/invoke.
3754 /// CallBrInst is not supported.
3755 bool InstCombinerImpl::transformConstExprCastCall(CallBase
&Call
) {
3757 dyn_cast
<Function
>(Call
.getCalledOperand()->stripPointerCasts());
3761 assert(!isa
<CallBrInst
>(Call
) &&
3762 "CallBr's don't have a single point after a def to insert at");
3764 // If this is a call to a thunk function, don't remove the cast. Thunks are
3765 // used to transparently forward all incoming parameters and outgoing return
3766 // values, so it's important to leave the cast in place.
3767 if (Callee
->hasFnAttribute("thunk"))
3770 // If this is a musttail call, the callee's prototype must match the caller's
3771 // prototype with the exception of pointee types. The code below doesn't
3772 // implement that, so we can't do this transform.
3773 // TODO: Do the transform if it only requires adding pointer casts.
3774 if (Call
.isMustTailCall())
3777 Instruction
*Caller
= &Call
;
3778 const AttributeList
&CallerPAL
= Call
.getAttributes();
3780 // Okay, this is a cast from a function to a different type. Unless doing so
3781 // would cause a type conversion of one of our arguments, change this call to
3782 // be a direct call with arguments casted to the appropriate types.
3783 FunctionType
*FT
= Callee
->getFunctionType();
3784 Type
*OldRetTy
= Caller
->getType();
3785 Type
*NewRetTy
= FT
->getReturnType();
3787 // Check to see if we are changing the return type...
3788 if (OldRetTy
!= NewRetTy
) {
3790 if (NewRetTy
->isStructTy())
3791 return false; // TODO: Handle multiple return values.
3793 if (!CastInst::isBitOrNoopPointerCastable(NewRetTy
, OldRetTy
, DL
)) {
3794 if (Callee
->isDeclaration())
3795 return false; // Cannot transform this return value.
3797 if (!Caller
->use_empty() &&
3798 // void -> non-void is handled specially
3799 !NewRetTy
->isVoidTy())
3800 return false; // Cannot transform this return value.
3803 if (!CallerPAL
.isEmpty() && !Caller
->use_empty()) {
3804 AttrBuilder
RAttrs(FT
->getContext(), CallerPAL
.getRetAttrs());
3805 if (RAttrs
.overlaps(AttributeFuncs::typeIncompatible(NewRetTy
)))
3806 return false; // Attribute not compatible with transformed value.
3809 // If the callbase is an invoke instruction, and the return value is
3810 // used by a PHI node in a successor, we cannot change the return type of
3811 // the call because there is no place to put the cast instruction (without
3812 // breaking the critical edge). Bail out in this case.
3813 if (!Caller
->use_empty()) {
3814 BasicBlock
*PhisNotSupportedBlock
= nullptr;
3815 if (auto *II
= dyn_cast
<InvokeInst
>(Caller
))
3816 PhisNotSupportedBlock
= II
->getNormalDest();
3817 if (PhisNotSupportedBlock
)
3818 for (User
*U
: Caller
->users())
3819 if (PHINode
*PN
= dyn_cast
<PHINode
>(U
))
3820 if (PN
->getParent() == PhisNotSupportedBlock
)
3825 unsigned NumActualArgs
= Call
.arg_size();
3826 unsigned NumCommonArgs
= std::min(FT
->getNumParams(), NumActualArgs
);
3828 // Prevent us turning:
3829 // declare void @takes_i32_inalloca(i32* inalloca)
3830 // call void bitcast (void (i32*)* @takes_i32_inalloca to void (i32)*)(i32 0)
3833 // call void @takes_i32_inalloca(i32* null)
3835 // Similarly, avoid folding away bitcasts of byval calls.
3836 if (Callee
->getAttributes().hasAttrSomewhere(Attribute::InAlloca
) ||
3837 Callee
->getAttributes().hasAttrSomewhere(Attribute::Preallocated
))
3840 auto AI
= Call
.arg_begin();
3841 for (unsigned i
= 0, e
= NumCommonArgs
; i
!= e
; ++i
, ++AI
) {
3842 Type
*ParamTy
= FT
->getParamType(i
);
3843 Type
*ActTy
= (*AI
)->getType();
3845 if (!CastInst::isBitOrNoopPointerCastable(ActTy
, ParamTy
, DL
))
3846 return false; // Cannot transform this parameter value.
3848 // Check if there are any incompatible attributes we cannot drop safely.
3849 if (AttrBuilder(FT
->getContext(), CallerPAL
.getParamAttrs(i
))
3850 .overlaps(AttributeFuncs::typeIncompatible(
3851 ParamTy
, AttributeFuncs::ASK_UNSAFE_TO_DROP
)))
3852 return false; // Attribute not compatible with transformed value.
3854 if (Call
.isInAllocaArgument(i
) ||
3855 CallerPAL
.hasParamAttr(i
, Attribute::Preallocated
))
3856 return false; // Cannot transform to and from inalloca/preallocated.
3858 if (CallerPAL
.hasParamAttr(i
, Attribute::SwiftError
))
3861 if (CallerPAL
.hasParamAttr(i
, Attribute::ByVal
) !=
3862 Callee
->getAttributes().hasParamAttr(i
, Attribute::ByVal
))
3863 return false; // Cannot transform to or from byval.
3866 if (Callee
->isDeclaration()) {
3867 // Do not delete arguments unless we have a function body.
3868 if (FT
->getNumParams() < NumActualArgs
&& !FT
->isVarArg())
3871 // If the callee is just a declaration, don't change the varargsness of the
3872 // call. We don't want to introduce a varargs call where one doesn't
3874 if (FT
->isVarArg() != Call
.getFunctionType()->isVarArg())
3877 // If both the callee and the cast type are varargs, we still have to make
3878 // sure the number of fixed parameters are the same or we have the same
3879 // ABI issues as if we introduce a varargs call.
3880 if (FT
->isVarArg() && Call
.getFunctionType()->isVarArg() &&
3881 FT
->getNumParams() != Call
.getFunctionType()->getNumParams())
3885 if (FT
->getNumParams() < NumActualArgs
&& FT
->isVarArg() &&
3886 !CallerPAL
.isEmpty()) {
3887 // In this case we have more arguments than the new function type, but we
3888 // won't be dropping them. Check that these extra arguments have attributes
3889 // that are compatible with being a vararg call argument.
3891 if (CallerPAL
.hasAttrSomewhere(Attribute::StructRet
, &SRetIdx
) &&
3892 SRetIdx
- AttributeList::FirstArgIndex
>= FT
->getNumParams())
3896 // Okay, we decided that this is a safe thing to do: go ahead and start
3897 // inserting cast instructions as necessary.
3898 SmallVector
<Value
*, 8> Args
;
3899 SmallVector
<AttributeSet
, 8> ArgAttrs
;
3900 Args
.reserve(NumActualArgs
);
3901 ArgAttrs
.reserve(NumActualArgs
);
3903 // Get any return attributes.
3904 AttrBuilder
RAttrs(FT
->getContext(), CallerPAL
.getRetAttrs());
3906 // If the return value is not being used, the type may not be compatible
3907 // with the existing attributes. Wipe out any problematic attributes.
3908 RAttrs
.remove(AttributeFuncs::typeIncompatible(NewRetTy
));
3910 LLVMContext
&Ctx
= Call
.getContext();
3911 AI
= Call
.arg_begin();
3912 for (unsigned i
= 0; i
!= NumCommonArgs
; ++i
, ++AI
) {
3913 Type
*ParamTy
= FT
->getParamType(i
);
3915 Value
*NewArg
= *AI
;
3916 if ((*AI
)->getType() != ParamTy
)
3917 NewArg
= Builder
.CreateBitOrPointerCast(*AI
, ParamTy
);
3918 Args
.push_back(NewArg
);
3920 // Add any parameter attributes except the ones incompatible with the new
3921 // type. Note that we made sure all incompatible ones are safe to drop.
3922 AttributeMask IncompatibleAttrs
= AttributeFuncs::typeIncompatible(
3923 ParamTy
, AttributeFuncs::ASK_SAFE_TO_DROP
);
3925 CallerPAL
.getParamAttrs(i
).removeAttributes(Ctx
, IncompatibleAttrs
));
3928 // If the function takes more arguments than the call was taking, add them
3930 for (unsigned i
= NumCommonArgs
; i
!= FT
->getNumParams(); ++i
) {
3931 Args
.push_back(Constant::getNullValue(FT
->getParamType(i
)));
3932 ArgAttrs
.push_back(AttributeSet());
3935 // If we are removing arguments to the function, emit an obnoxious warning.
3936 if (FT
->getNumParams() < NumActualArgs
) {
3937 // TODO: if (!FT->isVarArg()) this call may be unreachable. PR14722
3938 if (FT
->isVarArg()) {
3939 // Add all of the arguments in their promoted form to the arg list.
3940 for (unsigned i
= FT
->getNumParams(); i
!= NumActualArgs
; ++i
, ++AI
) {
3941 Type
*PTy
= getPromotedType((*AI
)->getType());
3942 Value
*NewArg
= *AI
;
3943 if (PTy
!= (*AI
)->getType()) {
3944 // Must promote to pass through va_arg area!
3945 Instruction::CastOps opcode
=
3946 CastInst::getCastOpcode(*AI
, false, PTy
, false);
3947 NewArg
= Builder
.CreateCast(opcode
, *AI
, PTy
);
3949 Args
.push_back(NewArg
);
3951 // Add any parameter attributes.
3952 ArgAttrs
.push_back(CallerPAL
.getParamAttrs(i
));
3957 AttributeSet FnAttrs
= CallerPAL
.getFnAttrs();
3959 if (NewRetTy
->isVoidTy())
3960 Caller
->setName(""); // Void type should not have a name.
3962 assert((ArgAttrs
.size() == FT
->getNumParams() || FT
->isVarArg()) &&
3963 "missing argument attributes");
3964 AttributeList NewCallerPAL
= AttributeList::get(
3965 Ctx
, FnAttrs
, AttributeSet::get(Ctx
, RAttrs
), ArgAttrs
);
3967 SmallVector
<OperandBundleDef
, 1> OpBundles
;
3968 Call
.getOperandBundlesAsDefs(OpBundles
);
3971 if (InvokeInst
*II
= dyn_cast
<InvokeInst
>(Caller
)) {
3972 NewCall
= Builder
.CreateInvoke(Callee
, II
->getNormalDest(),
3973 II
->getUnwindDest(), Args
, OpBundles
);
3975 NewCall
= Builder
.CreateCall(Callee
, Args
, OpBundles
);
3976 cast
<CallInst
>(NewCall
)->setTailCallKind(
3977 cast
<CallInst
>(Caller
)->getTailCallKind());
3979 NewCall
->takeName(Caller
);
3980 NewCall
->setCallingConv(Call
.getCallingConv());
3981 NewCall
->setAttributes(NewCallerPAL
);
3983 // Preserve prof metadata if any.
3984 NewCall
->copyMetadata(*Caller
, {LLVMContext::MD_prof
});
3986 // Insert a cast of the return type as necessary.
3987 Instruction
*NC
= NewCall
;
3989 if (OldRetTy
!= NV
->getType() && !Caller
->use_empty()) {
3990 if (!NV
->getType()->isVoidTy()) {
3991 NV
= NC
= CastInst::CreateBitOrPointerCast(NC
, OldRetTy
);
3992 NC
->setDebugLoc(Caller
->getDebugLoc());
3994 Instruction
*InsertPt
= NewCall
->getInsertionPointAfterDef();
3995 assert(InsertPt
&& "No place to insert cast");
3996 InsertNewInstBefore(NC
, InsertPt
->getIterator());
3997 Worklist
.pushUsersToWorkList(*Caller
);
3999 NV
= PoisonValue::get(Caller
->getType());
4003 if (!Caller
->use_empty())
4004 replaceInstUsesWith(*Caller
, NV
);
4005 else if (Caller
->hasValueHandle()) {
4006 if (OldRetTy
== NV
->getType())
4007 ValueHandleBase::ValueIsRAUWd(Caller
, NV
);
4009 // We cannot call ValueIsRAUWd with a different type, and the
4010 // actual tracked value will disappear.
4011 ValueHandleBase::ValueIsDeleted(Caller
);
4014 eraseInstFromFunction(*Caller
);
4018 /// Turn a call to a function created by init_trampoline / adjust_trampoline
4019 /// intrinsic pair into a direct call to the underlying function.
4021 InstCombinerImpl::transformCallThroughTrampoline(CallBase
&Call
,
4022 IntrinsicInst
&Tramp
) {
4023 FunctionType
*FTy
= Call
.getFunctionType();
4024 AttributeList Attrs
= Call
.getAttributes();
4026 // If the call already has the 'nest' attribute somewhere then give up -
4027 // otherwise 'nest' would occur twice after splicing in the chain.
4028 if (Attrs
.hasAttrSomewhere(Attribute::Nest
))
4031 Function
*NestF
= cast
<Function
>(Tramp
.getArgOperand(1)->stripPointerCasts());
4032 FunctionType
*NestFTy
= NestF
->getFunctionType();
4034 AttributeList NestAttrs
= NestF
->getAttributes();
4035 if (!NestAttrs
.isEmpty()) {
4036 unsigned NestArgNo
= 0;
4037 Type
*NestTy
= nullptr;
4038 AttributeSet NestAttr
;
4040 // Look for a parameter marked with the 'nest' attribute.
4041 for (FunctionType::param_iterator I
= NestFTy
->param_begin(),
4042 E
= NestFTy
->param_end();
4043 I
!= E
; ++NestArgNo
, ++I
) {
4044 AttributeSet AS
= NestAttrs
.getParamAttrs(NestArgNo
);
4045 if (AS
.hasAttribute(Attribute::Nest
)) {
4046 // Record the parameter type and any other attributes.
4054 std::vector
<Value
*> NewArgs
;
4055 std::vector
<AttributeSet
> NewArgAttrs
;
4056 NewArgs
.reserve(Call
.arg_size() + 1);
4057 NewArgAttrs
.reserve(Call
.arg_size());
4059 // Insert the nest argument into the call argument list, which may
4060 // mean appending it. Likewise for attributes.
4064 auto I
= Call
.arg_begin(), E
= Call
.arg_end();
4066 if (ArgNo
== NestArgNo
) {
4067 // Add the chain argument and attributes.
4068 Value
*NestVal
= Tramp
.getArgOperand(2);
4069 if (NestVal
->getType() != NestTy
)
4070 NestVal
= Builder
.CreateBitCast(NestVal
, NestTy
, "nest");
4071 NewArgs
.push_back(NestVal
);
4072 NewArgAttrs
.push_back(NestAttr
);
4078 // Add the original argument and attributes.
4079 NewArgs
.push_back(*I
);
4080 NewArgAttrs
.push_back(Attrs
.getParamAttrs(ArgNo
));
4087 // The trampoline may have been bitcast to a bogus type (FTy).
4088 // Handle this by synthesizing a new function type, equal to FTy
4089 // with the chain parameter inserted.
4091 std::vector
<Type
*> NewTypes
;
4092 NewTypes
.reserve(FTy
->getNumParams()+1);
4094 // Insert the chain's type into the list of parameter types, which may
4095 // mean appending it.
4098 FunctionType::param_iterator I
= FTy
->param_begin(),
4099 E
= FTy
->param_end();
4102 if (ArgNo
== NestArgNo
)
4103 // Add the chain's type.
4104 NewTypes
.push_back(NestTy
);
4109 // Add the original type.
4110 NewTypes
.push_back(*I
);
4117 // Replace the trampoline call with a direct call. Let the generic
4118 // code sort out any function type mismatches.
4119 FunctionType
*NewFTy
=
4120 FunctionType::get(FTy
->getReturnType(), NewTypes
, FTy
->isVarArg());
4121 AttributeList NewPAL
=
4122 AttributeList::get(FTy
->getContext(), Attrs
.getFnAttrs(),
4123 Attrs
.getRetAttrs(), NewArgAttrs
);
4125 SmallVector
<OperandBundleDef
, 1> OpBundles
;
4126 Call
.getOperandBundlesAsDefs(OpBundles
);
4128 Instruction
*NewCaller
;
4129 if (InvokeInst
*II
= dyn_cast
<InvokeInst
>(&Call
)) {
4130 NewCaller
= InvokeInst::Create(NewFTy
, NestF
, II
->getNormalDest(),
4131 II
->getUnwindDest(), NewArgs
, OpBundles
);
4132 cast
<InvokeInst
>(NewCaller
)->setCallingConv(II
->getCallingConv());
4133 cast
<InvokeInst
>(NewCaller
)->setAttributes(NewPAL
);
4134 } else if (CallBrInst
*CBI
= dyn_cast
<CallBrInst
>(&Call
)) {
4136 CallBrInst::Create(NewFTy
, NestF
, CBI
->getDefaultDest(),
4137 CBI
->getIndirectDests(), NewArgs
, OpBundles
);
4138 cast
<CallBrInst
>(NewCaller
)->setCallingConv(CBI
->getCallingConv());
4139 cast
<CallBrInst
>(NewCaller
)->setAttributes(NewPAL
);
4141 NewCaller
= CallInst::Create(NewFTy
, NestF
, NewArgs
, OpBundles
);
4142 cast
<CallInst
>(NewCaller
)->setTailCallKind(
4143 cast
<CallInst
>(Call
).getTailCallKind());
4144 cast
<CallInst
>(NewCaller
)->setCallingConv(
4145 cast
<CallInst
>(Call
).getCallingConv());
4146 cast
<CallInst
>(NewCaller
)->setAttributes(NewPAL
);
4148 NewCaller
->setDebugLoc(Call
.getDebugLoc());
4154 // Replace the trampoline call with a direct call. Since there is no 'nest'
4155 // parameter, there is no need to adjust the argument list. Let the generic
4156 // code sort out any function type mismatches.
4157 Call
.setCalledFunction(FTy
, NestF
);