1 //===- InstructionCombining.cpp - Combine multiple instructions -----------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // InstructionCombining - Combine instructions to form fewer, simple
10 // instructions. This pass does not modify the CFG. This pass is where
11 // algebraic simplification happens.
13 // This pass combines things like:
19 // This is a simple worklist driven algorithm.
21 // This pass guarantees that the following canonicalizations are performed on
23 // 1. If a binary operator has a constant operand, it is moved to the RHS
24 // 2. Bitwise operators with constant operands are always grouped so that
25 // shifts are performed first, then or's, then and's, then xor's.
26 // 3. Compare instructions are converted from <,>,<=,>= to ==,!= if possible
27 // 4. All cmp instructions on boolean values are replaced with logical ops
28 // 5. add X, X is represented as (X*2) => (X << 1)
29 // 6. Multiplies with a power-of-two constant argument are transformed into
33 //===----------------------------------------------------------------------===//
35 #include "InstCombineInternal.h"
36 #include "llvm/ADT/APInt.h"
37 #include "llvm/ADT/ArrayRef.h"
38 #include "llvm/ADT/DenseMap.h"
39 #include "llvm/ADT/SmallPtrSet.h"
40 #include "llvm/ADT/SmallVector.h"
41 #include "llvm/ADT/Statistic.h"
42 #include "llvm/Analysis/AliasAnalysis.h"
43 #include "llvm/Analysis/AssumptionCache.h"
44 #include "llvm/Analysis/BasicAliasAnalysis.h"
45 #include "llvm/Analysis/BlockFrequencyInfo.h"
46 #include "llvm/Analysis/CFG.h"
47 #include "llvm/Analysis/ConstantFolding.h"
48 #include "llvm/Analysis/GlobalsModRef.h"
49 #include "llvm/Analysis/InstructionSimplify.h"
50 #include "llvm/Analysis/LazyBlockFrequencyInfo.h"
51 #include "llvm/Analysis/LoopInfo.h"
52 #include "llvm/Analysis/MemoryBuiltins.h"
53 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
54 #include "llvm/Analysis/ProfileSummaryInfo.h"
55 #include "llvm/Analysis/TargetFolder.h"
56 #include "llvm/Analysis/TargetLibraryInfo.h"
57 #include "llvm/Analysis/TargetTransformInfo.h"
58 #include "llvm/Analysis/Utils/Local.h"
59 #include "llvm/Analysis/ValueTracking.h"
60 #include "llvm/Analysis/VectorUtils.h"
61 #include "llvm/IR/BasicBlock.h"
62 #include "llvm/IR/CFG.h"
63 #include "llvm/IR/Constant.h"
64 #include "llvm/IR/Constants.h"
65 #include "llvm/IR/DIBuilder.h"
66 #include "llvm/IR/DataLayout.h"
67 #include "llvm/IR/DebugInfo.h"
68 #include "llvm/IR/DerivedTypes.h"
69 #include "llvm/IR/Dominators.h"
70 #include "llvm/IR/EHPersonalities.h"
71 #include "llvm/IR/Function.h"
72 #include "llvm/IR/GetElementPtrTypeIterator.h"
73 #include "llvm/IR/IRBuilder.h"
74 #include "llvm/IR/InstrTypes.h"
75 #include "llvm/IR/Instruction.h"
76 #include "llvm/IR/Instructions.h"
77 #include "llvm/IR/IntrinsicInst.h"
78 #include "llvm/IR/Intrinsics.h"
79 #include "llvm/IR/Metadata.h"
80 #include "llvm/IR/Operator.h"
81 #include "llvm/IR/PassManager.h"
82 #include "llvm/IR/PatternMatch.h"
83 #include "llvm/IR/Type.h"
84 #include "llvm/IR/Use.h"
85 #include "llvm/IR/User.h"
86 #include "llvm/IR/Value.h"
87 #include "llvm/IR/ValueHandle.h"
88 #include "llvm/InitializePasses.h"
89 #include "llvm/Support/Casting.h"
90 #include "llvm/Support/CommandLine.h"
91 #include "llvm/Support/Compiler.h"
92 #include "llvm/Support/Debug.h"
93 #include "llvm/Support/DebugCounter.h"
94 #include "llvm/Support/ErrorHandling.h"
95 #include "llvm/Support/KnownBits.h"
96 #include "llvm/Support/raw_ostream.h"
97 #include "llvm/Transforms/InstCombine/InstCombine.h"
98 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
99 #include "llvm/Transforms/Utils/Local.h"
108 #define DEBUG_TYPE "instcombine"
109 #include "llvm/Transforms/Utils/InstructionWorklist.h"
112 using namespace llvm
;
113 using namespace llvm::PatternMatch
;
115 STATISTIC(NumWorklistIterations
,
116 "Number of instruction combining iterations performed");
117 STATISTIC(NumOneIteration
, "Number of functions with one iteration");
118 STATISTIC(NumTwoIterations
, "Number of functions with two iterations");
119 STATISTIC(NumThreeIterations
, "Number of functions with three iterations");
120 STATISTIC(NumFourOrMoreIterations
,
121 "Number of functions with four or more iterations");
123 STATISTIC(NumCombined
, "Number of insts combined");
124 STATISTIC(NumConstProp
, "Number of constant folds");
125 STATISTIC(NumDeadInst
, "Number of dead inst eliminated");
126 STATISTIC(NumSunkInst
, "Number of instructions sunk");
127 STATISTIC(NumExpand
, "Number of expansions");
128 STATISTIC(NumFactor
, "Number of factorizations");
129 STATISTIC(NumReassoc
, "Number of reassociations");
130 DEBUG_COUNTER(VisitCounter
, "instcombine-visit",
131 "Controls which instructions are visited");
134 EnableCodeSinking("instcombine-code-sinking", cl::desc("Enable code sinking"),
137 static cl::opt
<unsigned> MaxSinkNumUsers(
138 "instcombine-max-sink-users", cl::init(32),
139 cl::desc("Maximum number of undroppable users for instruction sinking"));
141 static cl::opt
<unsigned>
142 MaxArraySize("instcombine-maxarray-size", cl::init(1024),
143 cl::desc("Maximum array size considered when doing a combine"));
145 // FIXME: Remove this flag when it is no longer necessary to convert
146 // llvm.dbg.declare to avoid inaccurate debug info. Setting this to false
147 // increases variable availability at the cost of accuracy. Variables that
148 // cannot be promoted by mem2reg or SROA will be described as living in memory
149 // for their entire lifetime. However, passes like DSE and instcombine can
150 // delete stores to the alloca, leading to misleading and inaccurate debug
151 // information. This flag can be removed when those passes are fixed.
152 static cl::opt
<unsigned> ShouldLowerDbgDeclare("instcombine-lower-dbg-declare",
153 cl::Hidden
, cl::init(true));
155 std::optional
<Instruction
*>
156 InstCombiner::targetInstCombineIntrinsic(IntrinsicInst
&II
) {
157 // Handle target specific intrinsics
158 if (II
.getCalledFunction()->isTargetIntrinsic()) {
159 return TTI
.instCombineIntrinsic(*this, II
);
164 std::optional
<Value
*> InstCombiner::targetSimplifyDemandedUseBitsIntrinsic(
165 IntrinsicInst
&II
, APInt DemandedMask
, KnownBits
&Known
,
166 bool &KnownBitsComputed
) {
167 // Handle target specific intrinsics
168 if (II
.getCalledFunction()->isTargetIntrinsic()) {
169 return TTI
.simplifyDemandedUseBitsIntrinsic(*this, II
, DemandedMask
, Known
,
175 std::optional
<Value
*> InstCombiner::targetSimplifyDemandedVectorEltsIntrinsic(
176 IntrinsicInst
&II
, APInt DemandedElts
, APInt
&UndefElts
, APInt
&UndefElts2
,
178 std::function
<void(Instruction
*, unsigned, APInt
, APInt
&)>
180 // Handle target specific intrinsics
181 if (II
.getCalledFunction()->isTargetIntrinsic()) {
182 return TTI
.simplifyDemandedVectorEltsIntrinsic(
183 *this, II
, DemandedElts
, UndefElts
, UndefElts2
, UndefElts3
,
189 bool InstCombiner::isValidAddrSpaceCast(unsigned FromAS
, unsigned ToAS
) const {
190 return TTI
.isValidAddrSpaceCast(FromAS
, ToAS
);
193 Value
*InstCombinerImpl::EmitGEPOffset(User
*GEP
) {
194 return llvm::emitGEPOffset(&Builder
, DL
, GEP
);
197 /// Legal integers and common types are considered desirable. This is used to
198 /// avoid creating instructions with types that may not be supported well by the
200 /// NOTE: This treats i8, i16 and i32 specially because they are common
201 /// types in frontend languages.
202 bool InstCombinerImpl::isDesirableIntType(unsigned BitWidth
) const {
209 return DL
.isLegalInteger(BitWidth
);
213 /// Return true if it is desirable to convert an integer computation from a
214 /// given bit width to a new bit width.
215 /// We don't want to convert from a legal or desirable type (like i8) to an
216 /// illegal type or from a smaller to a larger illegal type. A width of '1'
217 /// is always treated as a desirable type because i1 is a fundamental type in
218 /// IR, and there are many specialized optimizations for i1 types.
219 /// Common/desirable widths are equally treated as legal to convert to, in
220 /// order to open up more combining opportunities.
221 bool InstCombinerImpl::shouldChangeType(unsigned FromWidth
,
222 unsigned ToWidth
) const {
223 bool FromLegal
= FromWidth
== 1 || DL
.isLegalInteger(FromWidth
);
224 bool ToLegal
= ToWidth
== 1 || DL
.isLegalInteger(ToWidth
);
226 // Convert to desirable widths even if they are not legal types.
227 // Only shrink types, to prevent infinite loops.
228 if (ToWidth
< FromWidth
&& isDesirableIntType(ToWidth
))
231 // If this is a legal or desiable integer from type, and the result would be
232 // an illegal type, don't do the transformation.
233 if ((FromLegal
|| isDesirableIntType(FromWidth
)) && !ToLegal
)
236 // Otherwise, if both are illegal, do not increase the size of the result. We
237 // do allow things like i160 -> i64, but not i64 -> i160.
238 if (!FromLegal
&& !ToLegal
&& ToWidth
> FromWidth
)
244 /// Return true if it is desirable to convert a computation from 'From' to 'To'.
245 /// We don't want to convert from a legal to an illegal type or from a smaller
246 /// to a larger illegal type. i1 is always treated as a legal type because it is
247 /// a fundamental type in IR, and there are many specialized optimizations for
249 bool InstCombinerImpl::shouldChangeType(Type
*From
, Type
*To
) const {
250 // TODO: This could be extended to allow vectors. Datalayout changes might be
251 // needed to properly support that.
252 if (!From
->isIntegerTy() || !To
->isIntegerTy())
255 unsigned FromWidth
= From
->getPrimitiveSizeInBits();
256 unsigned ToWidth
= To
->getPrimitiveSizeInBits();
257 return shouldChangeType(FromWidth
, ToWidth
);
260 // Return true, if No Signed Wrap should be maintained for I.
261 // The No Signed Wrap flag can be kept if the operation "B (I.getOpcode) C",
262 // where both B and C should be ConstantInts, results in a constant that does
263 // not overflow. This function only handles the Add and Sub opcodes. For
264 // all other opcodes, the function conservatively returns false.
265 static bool maintainNoSignedWrap(BinaryOperator
&I
, Value
*B
, Value
*C
) {
266 auto *OBO
= dyn_cast
<OverflowingBinaryOperator
>(&I
);
267 if (!OBO
|| !OBO
->hasNoSignedWrap())
270 // We reason about Add and Sub Only.
271 Instruction::BinaryOps Opcode
= I
.getOpcode();
272 if (Opcode
!= Instruction::Add
&& Opcode
!= Instruction::Sub
)
275 const APInt
*BVal
, *CVal
;
276 if (!match(B
, m_APInt(BVal
)) || !match(C
, m_APInt(CVal
)))
279 bool Overflow
= false;
280 if (Opcode
== Instruction::Add
)
281 (void)BVal
->sadd_ov(*CVal
, Overflow
);
283 (void)BVal
->ssub_ov(*CVal
, Overflow
);
288 static bool hasNoUnsignedWrap(BinaryOperator
&I
) {
289 auto *OBO
= dyn_cast
<OverflowingBinaryOperator
>(&I
);
290 return OBO
&& OBO
->hasNoUnsignedWrap();
293 static bool hasNoSignedWrap(BinaryOperator
&I
) {
294 auto *OBO
= dyn_cast
<OverflowingBinaryOperator
>(&I
);
295 return OBO
&& OBO
->hasNoSignedWrap();
298 /// Conservatively clears subclassOptionalData after a reassociation or
299 /// commutation. We preserve fast-math flags when applicable as they can be
301 static void ClearSubclassDataAfterReassociation(BinaryOperator
&I
) {
302 FPMathOperator
*FPMO
= dyn_cast
<FPMathOperator
>(&I
);
304 I
.clearSubclassOptionalData();
308 FastMathFlags FMF
= I
.getFastMathFlags();
309 I
.clearSubclassOptionalData();
310 I
.setFastMathFlags(FMF
);
313 /// Combine constant operands of associative operations either before or after a
314 /// cast to eliminate one of the associative operations:
315 /// (op (cast (op X, C2)), C1) --> (cast (op X, op (C1, C2)))
316 /// (op (cast (op X, C2)), C1) --> (op (cast X), op (C1, C2))
317 static bool simplifyAssocCastAssoc(BinaryOperator
*BinOp1
,
318 InstCombinerImpl
&IC
) {
319 auto *Cast
= dyn_cast
<CastInst
>(BinOp1
->getOperand(0));
320 if (!Cast
|| !Cast
->hasOneUse())
323 // TODO: Enhance logic for other casts and remove this check.
324 auto CastOpcode
= Cast
->getOpcode();
325 if (CastOpcode
!= Instruction::ZExt
)
328 // TODO: Enhance logic for other BinOps and remove this check.
329 if (!BinOp1
->isBitwiseLogicOp())
332 auto AssocOpcode
= BinOp1
->getOpcode();
333 auto *BinOp2
= dyn_cast
<BinaryOperator
>(Cast
->getOperand(0));
334 if (!BinOp2
|| !BinOp2
->hasOneUse() || BinOp2
->getOpcode() != AssocOpcode
)
338 if (!match(BinOp1
->getOperand(1), m_Constant(C1
)) ||
339 !match(BinOp2
->getOperand(1), m_Constant(C2
)))
342 // TODO: This assumes a zext cast.
343 // Eg, if it was a trunc, we'd cast C1 to the source type because casting C2
344 // to the destination type might lose bits.
346 // Fold the constants together in the destination type:
347 // (op (cast (op X, C2)), C1) --> (op (cast X), FoldedC)
348 const DataLayout
&DL
= IC
.getDataLayout();
349 Type
*DestTy
= C1
->getType();
350 Constant
*CastC2
= ConstantFoldCastOperand(CastOpcode
, C2
, DestTy
, DL
);
353 Constant
*FoldedC
= ConstantFoldBinaryOpOperands(AssocOpcode
, C1
, CastC2
, DL
);
357 IC
.replaceOperand(*Cast
, 0, BinOp2
->getOperand(0));
358 IC
.replaceOperand(*BinOp1
, 1, FoldedC
);
362 // Simplifies IntToPtr/PtrToInt RoundTrip Cast.
363 // inttoptr ( ptrtoint (x) ) --> x
364 Value
*InstCombinerImpl::simplifyIntToPtrRoundTripCast(Value
*Val
) {
365 auto *IntToPtr
= dyn_cast
<IntToPtrInst
>(Val
);
366 if (IntToPtr
&& DL
.getTypeSizeInBits(IntToPtr
->getDestTy()) ==
367 DL
.getTypeSizeInBits(IntToPtr
->getSrcTy())) {
368 auto *PtrToInt
= dyn_cast
<PtrToIntInst
>(IntToPtr
->getOperand(0));
369 Type
*CastTy
= IntToPtr
->getDestTy();
371 CastTy
->getPointerAddressSpace() ==
372 PtrToInt
->getSrcTy()->getPointerAddressSpace() &&
373 DL
.getTypeSizeInBits(PtrToInt
->getSrcTy()) ==
374 DL
.getTypeSizeInBits(PtrToInt
->getDestTy()))
375 return PtrToInt
->getOperand(0);
380 /// This performs a few simplifications for operators that are associative or
383 /// Commutative operators:
385 /// 1. Order operands such that they are listed from right (least complex) to
386 /// left (most complex). This puts constants before unary operators before
387 /// binary operators.
389 /// Associative operators:
391 /// 2. Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies.
392 /// 3. Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies.
394 /// Associative and commutative operators:
396 /// 4. Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies.
397 /// 5. Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies.
398 /// 6. Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)"
399 /// if C1 and C2 are constants.
400 bool InstCombinerImpl::SimplifyAssociativeOrCommutative(BinaryOperator
&I
) {
401 Instruction::BinaryOps Opcode
= I
.getOpcode();
402 bool Changed
= false;
405 // Order operands such that they are listed from right (least complex) to
406 // left (most complex). This puts constants before unary operators before
408 if (I
.isCommutative() && getComplexity(I
.getOperand(0)) <
409 getComplexity(I
.getOperand(1)))
410 Changed
= !I
.swapOperands();
412 BinaryOperator
*Op0
= dyn_cast
<BinaryOperator
>(I
.getOperand(0));
413 BinaryOperator
*Op1
= dyn_cast
<BinaryOperator
>(I
.getOperand(1));
415 if (I
.isAssociative()) {
416 // Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies.
417 if (Op0
&& Op0
->getOpcode() == Opcode
) {
418 Value
*A
= Op0
->getOperand(0);
419 Value
*B
= Op0
->getOperand(1);
420 Value
*C
= I
.getOperand(1);
422 // Does "B op C" simplify?
423 if (Value
*V
= simplifyBinOp(Opcode
, B
, C
, SQ
.getWithInstruction(&I
))) {
424 // It simplifies to V. Form "A op V".
425 replaceOperand(I
, 0, A
);
426 replaceOperand(I
, 1, V
);
427 bool IsNUW
= hasNoUnsignedWrap(I
) && hasNoUnsignedWrap(*Op0
);
428 bool IsNSW
= maintainNoSignedWrap(I
, B
, C
) && hasNoSignedWrap(*Op0
);
430 // Conservatively clear all optional flags since they may not be
431 // preserved by the reassociation. Reset nsw/nuw based on the above
433 ClearSubclassDataAfterReassociation(I
);
435 // Note: this is only valid because SimplifyBinOp doesn't look at
436 // the operands to Op0.
438 I
.setHasNoUnsignedWrap(true);
441 I
.setHasNoSignedWrap(true);
449 // Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies.
450 if (Op1
&& Op1
->getOpcode() == Opcode
) {
451 Value
*A
= I
.getOperand(0);
452 Value
*B
= Op1
->getOperand(0);
453 Value
*C
= Op1
->getOperand(1);
455 // Does "A op B" simplify?
456 if (Value
*V
= simplifyBinOp(Opcode
, A
, B
, SQ
.getWithInstruction(&I
))) {
457 // It simplifies to V. Form "V op C".
458 replaceOperand(I
, 0, V
);
459 replaceOperand(I
, 1, C
);
460 // Conservatively clear the optional flags, since they may not be
461 // preserved by the reassociation.
462 ClearSubclassDataAfterReassociation(I
);
470 if (I
.isAssociative() && I
.isCommutative()) {
471 if (simplifyAssocCastAssoc(&I
, *this)) {
477 // Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies.
478 if (Op0
&& Op0
->getOpcode() == Opcode
) {
479 Value
*A
= Op0
->getOperand(0);
480 Value
*B
= Op0
->getOperand(1);
481 Value
*C
= I
.getOperand(1);
483 // Does "C op A" simplify?
484 if (Value
*V
= simplifyBinOp(Opcode
, C
, A
, SQ
.getWithInstruction(&I
))) {
485 // It simplifies to V. Form "V op B".
486 replaceOperand(I
, 0, V
);
487 replaceOperand(I
, 1, B
);
488 // Conservatively clear the optional flags, since they may not be
489 // preserved by the reassociation.
490 ClearSubclassDataAfterReassociation(I
);
497 // Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies.
498 if (Op1
&& Op1
->getOpcode() == Opcode
) {
499 Value
*A
= I
.getOperand(0);
500 Value
*B
= Op1
->getOperand(0);
501 Value
*C
= Op1
->getOperand(1);
503 // Does "C op A" simplify?
504 if (Value
*V
= simplifyBinOp(Opcode
, C
, A
, SQ
.getWithInstruction(&I
))) {
505 // It simplifies to V. Form "B op V".
506 replaceOperand(I
, 0, B
);
507 replaceOperand(I
, 1, V
);
508 // Conservatively clear the optional flags, since they may not be
509 // preserved by the reassociation.
510 ClearSubclassDataAfterReassociation(I
);
517 // Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)"
518 // if C1 and C2 are constants.
520 Constant
*C1
, *C2
, *CRes
;
522 Op0
->getOpcode() == Opcode
&& Op1
->getOpcode() == Opcode
&&
523 match(Op0
, m_OneUse(m_BinOp(m_Value(A
), m_Constant(C1
)))) &&
524 match(Op1
, m_OneUse(m_BinOp(m_Value(B
), m_Constant(C2
)))) &&
525 (CRes
= ConstantFoldBinaryOpOperands(Opcode
, C1
, C2
, DL
))) {
526 bool IsNUW
= hasNoUnsignedWrap(I
) &&
527 hasNoUnsignedWrap(*Op0
) &&
528 hasNoUnsignedWrap(*Op1
);
529 BinaryOperator
*NewBO
= (IsNUW
&& Opcode
== Instruction::Add
) ?
530 BinaryOperator::CreateNUW(Opcode
, A
, B
) :
531 BinaryOperator::Create(Opcode
, A
, B
);
533 if (isa
<FPMathOperator
>(NewBO
)) {
534 FastMathFlags Flags
= I
.getFastMathFlags() &
535 Op0
->getFastMathFlags() &
536 Op1
->getFastMathFlags();
537 NewBO
->setFastMathFlags(Flags
);
539 InsertNewInstWith(NewBO
, I
.getIterator());
540 NewBO
->takeName(Op1
);
541 replaceOperand(I
, 0, NewBO
);
542 replaceOperand(I
, 1, CRes
);
543 // Conservatively clear the optional flags, since they may not be
544 // preserved by the reassociation.
545 ClearSubclassDataAfterReassociation(I
);
547 I
.setHasNoUnsignedWrap(true);
554 // No further simplifications.
559 /// Return whether "X LOp (Y ROp Z)" is always equal to
560 /// "(X LOp Y) ROp (X LOp Z)".
561 static bool leftDistributesOverRight(Instruction::BinaryOps LOp
,
562 Instruction::BinaryOps ROp
) {
563 // X & (Y | Z) <--> (X & Y) | (X & Z)
564 // X & (Y ^ Z) <--> (X & Y) ^ (X & Z)
565 if (LOp
== Instruction::And
)
566 return ROp
== Instruction::Or
|| ROp
== Instruction::Xor
;
568 // X | (Y & Z) <--> (X | Y) & (X | Z)
569 if (LOp
== Instruction::Or
)
570 return ROp
== Instruction::And
;
572 // X * (Y + Z) <--> (X * Y) + (X * Z)
573 // X * (Y - Z) <--> (X * Y) - (X * Z)
574 if (LOp
== Instruction::Mul
)
575 return ROp
== Instruction::Add
|| ROp
== Instruction::Sub
;
580 /// Return whether "(X LOp Y) ROp Z" is always equal to
581 /// "(X ROp Z) LOp (Y ROp Z)".
582 static bool rightDistributesOverLeft(Instruction::BinaryOps LOp
,
583 Instruction::BinaryOps ROp
) {
584 if (Instruction::isCommutative(ROp
))
585 return leftDistributesOverRight(ROp
, LOp
);
587 // (X {&|^} Y) >> Z <--> (X >> Z) {&|^} (Y >> Z) for all shifts.
588 return Instruction::isBitwiseLogicOp(LOp
) && Instruction::isShift(ROp
);
590 // TODO: It would be nice to handle division, aka "(X + Y)/Z = X/Z + Y/Z",
591 // but this requires knowing that the addition does not overflow and other
595 /// This function returns identity value for given opcode, which can be used to
596 /// factor patterns like (X * 2) + X ==> (X * 2) + (X * 1) ==> X * (2 + 1).
597 static Value
*getIdentityValue(Instruction::BinaryOps Opcode
, Value
*V
) {
598 if (isa
<Constant
>(V
))
601 return ConstantExpr::getBinOpIdentity(Opcode
, V
->getType());
604 /// This function predicates factorization using distributive laws. By default,
605 /// it just returns the 'Op' inputs. But for special-cases like
606 /// 'add(shl(X, 5), ...)', this function will have TopOpcode == Instruction::Add
607 /// and Op = shl(X, 5). The 'shl' is treated as the more general 'mul X, 32' to
608 /// allow more factorization opportunities.
609 static Instruction::BinaryOps
610 getBinOpsForFactorization(Instruction::BinaryOps TopOpcode
, BinaryOperator
*Op
,
611 Value
*&LHS
, Value
*&RHS
) {
612 assert(Op
&& "Expected a binary operator");
613 LHS
= Op
->getOperand(0);
614 RHS
= Op
->getOperand(1);
615 if (TopOpcode
== Instruction::Add
|| TopOpcode
== Instruction::Sub
) {
617 if (match(Op
, m_Shl(m_Value(), m_Constant(C
)))) {
618 // X << C --> X * (1 << C)
619 RHS
= ConstantExpr::getShl(ConstantInt::get(Op
->getType(), 1), C
);
620 return Instruction::Mul
;
622 // TODO: We can add other conversions e.g. shr => div etc.
624 return Op
->getOpcode();
627 /// This tries to simplify binary operations by factorizing out common terms
628 /// (e. g. "(A*B)+(A*C)" -> "A*(B+C)").
629 static Value
*tryFactorization(BinaryOperator
&I
, const SimplifyQuery
&SQ
,
630 InstCombiner::BuilderTy
&Builder
,
631 Instruction::BinaryOps InnerOpcode
, Value
*A
,
632 Value
*B
, Value
*C
, Value
*D
) {
633 assert(A
&& B
&& C
&& D
&& "All values must be provided");
636 Value
*RetVal
= nullptr;
637 Value
*LHS
= I
.getOperand(0), *RHS
= I
.getOperand(1);
638 Instruction::BinaryOps TopLevelOpcode
= I
.getOpcode();
640 // Does "X op' Y" always equal "Y op' X"?
641 bool InnerCommutative
= Instruction::isCommutative(InnerOpcode
);
643 // Does "X op' (Y op Z)" always equal "(X op' Y) op (X op' Z)"?
644 if (leftDistributesOverRight(InnerOpcode
, TopLevelOpcode
)) {
645 // Does the instruction have the form "(A op' B) op (A op' D)" or, in the
646 // commutative case, "(A op' B) op (C op' A)"?
647 if (A
== C
|| (InnerCommutative
&& A
== D
)) {
650 // Consider forming "A op' (B op D)".
651 // If "B op D" simplifies then it can be formed with no cost.
652 V
= simplifyBinOp(TopLevelOpcode
, B
, D
, SQ
.getWithInstruction(&I
));
654 // If "B op D" doesn't simplify then only go on if one of the existing
655 // operations "A op' B" and "C op' D" will be zapped as no longer used.
656 if (!V
&& (LHS
->hasOneUse() || RHS
->hasOneUse()))
657 V
= Builder
.CreateBinOp(TopLevelOpcode
, B
, D
, RHS
->getName());
659 RetVal
= Builder
.CreateBinOp(InnerOpcode
, A
, V
);
663 // Does "(X op Y) op' Z" always equal "(X op' Z) op (Y op' Z)"?
664 if (!RetVal
&& rightDistributesOverLeft(TopLevelOpcode
, InnerOpcode
)) {
665 // Does the instruction have the form "(A op' B) op (C op' B)" or, in the
666 // commutative case, "(A op' B) op (B op' D)"?
667 if (B
== D
|| (InnerCommutative
&& B
== C
)) {
670 // Consider forming "(A op C) op' B".
671 // If "A op C" simplifies then it can be formed with no cost.
672 V
= simplifyBinOp(TopLevelOpcode
, A
, C
, SQ
.getWithInstruction(&I
));
674 // If "A op C" doesn't simplify then only go on if one of the existing
675 // operations "A op' B" and "C op' D" will be zapped as no longer used.
676 if (!V
&& (LHS
->hasOneUse() || RHS
->hasOneUse()))
677 V
= Builder
.CreateBinOp(TopLevelOpcode
, A
, C
, LHS
->getName());
679 RetVal
= Builder
.CreateBinOp(InnerOpcode
, V
, B
);
687 RetVal
->takeName(&I
);
689 // Try to add no-overflow flags to the final value.
690 if (isa
<OverflowingBinaryOperator
>(RetVal
)) {
693 if (isa
<OverflowingBinaryOperator
>(&I
)) {
694 HasNSW
= I
.hasNoSignedWrap();
695 HasNUW
= I
.hasNoUnsignedWrap();
697 if (auto *LOBO
= dyn_cast
<OverflowingBinaryOperator
>(LHS
)) {
698 HasNSW
&= LOBO
->hasNoSignedWrap();
699 HasNUW
&= LOBO
->hasNoUnsignedWrap();
702 if (auto *ROBO
= dyn_cast
<OverflowingBinaryOperator
>(RHS
)) {
703 HasNSW
&= ROBO
->hasNoSignedWrap();
704 HasNUW
&= ROBO
->hasNoUnsignedWrap();
707 if (TopLevelOpcode
== Instruction::Add
&& InnerOpcode
== Instruction::Mul
) {
708 // We can propagate 'nsw' if we know that
709 // %Y = mul nsw i16 %X, C
710 // %Z = add nsw i16 %Y, %X
712 // %Z = mul nsw i16 %X, C+1
714 // iff C+1 isn't INT_MIN
716 if (match(V
, m_APInt(CInt
)) && !CInt
->isMinSignedValue())
717 cast
<Instruction
>(RetVal
)->setHasNoSignedWrap(HasNSW
);
719 // nuw can be propagated with any constant or nuw value.
720 cast
<Instruction
>(RetVal
)->setHasNoUnsignedWrap(HasNUW
);
726 // (Binop1 (Binop2 (logic_shift X, C), C1), (logic_shift Y, C))
728 // 1) the logic_shifts match
729 // 2) either both binops are binops and one is `and` or
731 // (logic_shift (inv_logic_shift C1, C), C) == C1 or
733 // -> (logic_shift (Binop1 (Binop2 X, inv_logic_shift(C1, C)), Y), C)
735 // (Binop1 (Binop2 (logic_shift X, Amt), Mask), (logic_shift Y, Amt))
737 // 1) the logic_shifts match
738 // 2) BinOp1 == BinOp2 (if BinOp == `add`, then also requires `shl`).
740 // -> (BinOp (logic_shift (BinOp X, Y)), Mask)
742 // (Binop1 (Binop2 (arithmetic_shift X, Amt), Mask), (arithmetic_shift Y, Amt))
744 // 1) Binop1 is bitwise logical operator `and`, `or` or `xor`
745 // 2) Binop2 is `not`
747 // -> (arithmetic_shift Binop1((not X), Y), Amt)
749 Instruction
*InstCombinerImpl::foldBinOpShiftWithShift(BinaryOperator
&I
) {
750 auto IsValidBinOpc
= [](unsigned Opc
) {
754 case Instruction::And
:
755 case Instruction::Or
:
756 case Instruction::Xor
:
757 case Instruction::Add
:
758 // Skip Sub as we only match constant masks which will canonicalize to use
764 // Check if we can distribute binop arbitrarily. `add` + `lshr` has extra
766 auto IsCompletelyDistributable
= [](unsigned BinOpc1
, unsigned BinOpc2
,
768 assert(ShOpc
!= Instruction::AShr
);
769 return (BinOpc1
!= Instruction::Add
&& BinOpc2
!= Instruction::Add
) ||
770 ShOpc
== Instruction::Shl
;
773 auto GetInvShift
= [](unsigned ShOpc
) {
774 assert(ShOpc
!= Instruction::AShr
);
775 return ShOpc
== Instruction::LShr
? Instruction::Shl
: Instruction::LShr
;
778 auto CanDistributeBinops
= [&](unsigned BinOpc1
, unsigned BinOpc2
,
779 unsigned ShOpc
, Constant
*CMask
,
781 // If the BinOp1 is `and` we don't need to check the mask.
782 if (BinOpc1
== Instruction::And
)
785 // For all other possible transfers we need complete distributable
786 // binop/shift (anything but `add` + `lshr`).
787 if (!IsCompletelyDistributable(BinOpc1
, BinOpc2
, ShOpc
))
790 // If BinOp2 is `and`, any mask works (this only really helps for non-splat
791 // vecs, otherwise the mask will be simplified and the following check will
793 if (BinOpc2
== Instruction::And
)
796 // Otherwise, need mask that meets the below requirement.
797 // (logic_shift (inv_logic_shift Mask, ShAmt), ShAmt) == Mask
798 return ConstantExpr::get(
799 ShOpc
, ConstantExpr::get(GetInvShift(ShOpc
), CMask
, CShift
),
803 auto MatchBinOp
= [&](unsigned ShOpnum
) -> Instruction
* {
804 Constant
*CMask
, *CShift
;
805 Value
*X
, *Y
, *ShiftedX
, *Mask
, *Shift
;
806 if (!match(I
.getOperand(ShOpnum
),
807 m_OneUse(m_Shift(m_Value(Y
), m_Value(Shift
)))))
809 if (!match(I
.getOperand(1 - ShOpnum
),
810 m_BinOp(m_Value(ShiftedX
), m_Value(Mask
))))
813 if (!match(ShiftedX
, m_OneUse(m_Shift(m_Value(X
), m_Specific(Shift
)))))
816 // Make sure we are matching instruction shifts and not ConstantExpr
817 auto *IY
= dyn_cast
<Instruction
>(I
.getOperand(ShOpnum
));
818 auto *IX
= dyn_cast
<Instruction
>(ShiftedX
);
822 // LHS and RHS need same shift opcode
823 unsigned ShOpc
= IY
->getOpcode();
824 if (ShOpc
!= IX
->getOpcode())
827 // Make sure binop is real instruction and not ConstantExpr
828 auto *BO2
= dyn_cast
<Instruction
>(I
.getOperand(1 - ShOpnum
));
832 unsigned BinOpc
= BO2
->getOpcode();
833 // Make sure we have valid binops.
834 if (!IsValidBinOpc(I
.getOpcode()) || !IsValidBinOpc(BinOpc
))
837 if (ShOpc
== Instruction::AShr
) {
838 if (Instruction::isBitwiseLogicOp(I
.getOpcode()) &&
839 BinOpc
== Instruction::Xor
&& match(Mask
, m_AllOnes())) {
840 Value
*NotX
= Builder
.CreateNot(X
);
841 Value
*NewBinOp
= Builder
.CreateBinOp(I
.getOpcode(), Y
, NotX
);
842 return BinaryOperator::Create(
843 static_cast<Instruction::BinaryOps
>(ShOpc
), NewBinOp
, Shift
);
849 // If BinOp1 == BinOp2 and it's bitwise or shl with add, then just
850 // distribute to drop the shift irrelevant of constants.
851 if (BinOpc
== I
.getOpcode() &&
852 IsCompletelyDistributable(I
.getOpcode(), BinOpc
, ShOpc
)) {
853 Value
*NewBinOp2
= Builder
.CreateBinOp(I
.getOpcode(), X
, Y
);
854 Value
*NewBinOp1
= Builder
.CreateBinOp(
855 static_cast<Instruction::BinaryOps
>(ShOpc
), NewBinOp2
, Shift
);
856 return BinaryOperator::Create(I
.getOpcode(), NewBinOp1
, Mask
);
859 // Otherwise we can only distribute by constant shifting the mask, so
860 // ensure we have constants.
861 if (!match(Shift
, m_ImmConstant(CShift
)))
863 if (!match(Mask
, m_ImmConstant(CMask
)))
866 // Check if we can distribute the binops.
867 if (!CanDistributeBinops(I
.getOpcode(), BinOpc
, ShOpc
, CMask
, CShift
))
870 Constant
*NewCMask
= ConstantExpr::get(GetInvShift(ShOpc
), CMask
, CShift
);
871 Value
*NewBinOp2
= Builder
.CreateBinOp(
872 static_cast<Instruction::BinaryOps
>(BinOpc
), X
, NewCMask
);
873 Value
*NewBinOp1
= Builder
.CreateBinOp(I
.getOpcode(), Y
, NewBinOp2
);
874 return BinaryOperator::Create(static_cast<Instruction::BinaryOps
>(ShOpc
),
878 if (Instruction
*R
= MatchBinOp(0))
880 return MatchBinOp(1);
883 // (Binop (zext C), (select C, T, F))
884 // -> (select C, (binop 1, T), (binop 0, F))
886 // (Binop (sext C), (select C, T, F))
887 // -> (select C, (binop -1, T), (binop 0, F))
889 // Attempt to simplify binary operations into a select with folded args, when
890 // one operand of the binop is a select instruction and the other operand is a
891 // zext/sext extension, whose value is the select condition.
893 InstCombinerImpl::foldBinOpOfSelectAndCastOfSelectCondition(BinaryOperator
&I
) {
894 // TODO: this simplification may be extended to any speculatable instruction,
895 // not just binops, and would possibly be handled better in FoldOpIntoSelect.
896 Instruction::BinaryOps Opc
= I
.getOpcode();
897 Value
*LHS
= I
.getOperand(0), *RHS
= I
.getOperand(1);
898 Value
*A
, *CondVal
, *TrueVal
, *FalseVal
;
901 auto MatchSelectAndCast
= [&](Value
*CastOp
, Value
*SelectOp
) {
902 return match(CastOp
, m_ZExtOrSExt(m_Value(A
))) &&
903 A
->getType()->getScalarSizeInBits() == 1 &&
904 match(SelectOp
, m_Select(m_Value(CondVal
), m_Value(TrueVal
),
908 // Make sure one side of the binop is a select instruction, and the other is a
909 // zero/sign extension operating on a i1.
910 if (MatchSelectAndCast(LHS
, RHS
))
912 else if (MatchSelectAndCast(RHS
, LHS
))
917 auto NewFoldedConst
= [&](bool IsTrueArm
, Value
*V
) {
918 bool IsCastOpRHS
= (CastOp
== RHS
);
919 bool IsZExt
= isa
<ZExtOperator
>(CastOp
);
923 C
= Constant::getNullValue(V
->getType());
925 unsigned BitWidth
= V
->getType()->getScalarSizeInBits();
926 C
= Constant::getIntegerValue(V
->getType(), APInt(BitWidth
, 1));
928 C
= Constant::getAllOnesValue(V
->getType());
931 return IsCastOpRHS
? Builder
.CreateBinOp(Opc
, V
, C
)
932 : Builder
.CreateBinOp(Opc
, C
, V
);
935 // If the value used in the zext/sext is the select condition, or the negated
936 // of the select condition, the binop can be simplified.
938 Value
*NewTrueVal
= NewFoldedConst(false, TrueVal
);
939 return SelectInst::Create(CondVal
, NewTrueVal
,
940 NewFoldedConst(true, FalseVal
));
943 if (match(A
, m_Not(m_Specific(CondVal
)))) {
944 Value
*NewTrueVal
= NewFoldedConst(true, TrueVal
);
945 return SelectInst::Create(CondVal
, NewTrueVal
,
946 NewFoldedConst(false, FalseVal
));
952 Value
*InstCombinerImpl::tryFactorizationFolds(BinaryOperator
&I
) {
953 Value
*LHS
= I
.getOperand(0), *RHS
= I
.getOperand(1);
954 BinaryOperator
*Op0
= dyn_cast
<BinaryOperator
>(LHS
);
955 BinaryOperator
*Op1
= dyn_cast
<BinaryOperator
>(RHS
);
956 Instruction::BinaryOps TopLevelOpcode
= I
.getOpcode();
957 Value
*A
, *B
, *C
, *D
;
958 Instruction::BinaryOps LHSOpcode
, RHSOpcode
;
961 LHSOpcode
= getBinOpsForFactorization(TopLevelOpcode
, Op0
, A
, B
);
963 RHSOpcode
= getBinOpsForFactorization(TopLevelOpcode
, Op1
, C
, D
);
965 // The instruction has the form "(A op' B) op (C op' D)". Try to factorize
967 if (Op0
&& Op1
&& LHSOpcode
== RHSOpcode
)
968 if (Value
*V
= tryFactorization(I
, SQ
, Builder
, LHSOpcode
, A
, B
, C
, D
))
971 // The instruction has the form "(A op' B) op (C)". Try to factorize common
974 if (Value
*Ident
= getIdentityValue(LHSOpcode
, RHS
))
976 tryFactorization(I
, SQ
, Builder
, LHSOpcode
, A
, B
, RHS
, Ident
))
979 // The instruction has the form "(B) op (C op' D)". Try to factorize common
982 if (Value
*Ident
= getIdentityValue(RHSOpcode
, LHS
))
984 tryFactorization(I
, SQ
, Builder
, RHSOpcode
, LHS
, Ident
, C
, D
))
990 /// This tries to simplify binary operations which some other binary operation
991 /// distributes over either by factorizing out common terms
992 /// (eg "(A*B)+(A*C)" -> "A*(B+C)") or expanding out if this results in
993 /// simplifications (eg: "A & (B | C) -> (A&B) | (A&C)" if this is a win).
994 /// Returns the simplified value, or null if it didn't simplify.
995 Value
*InstCombinerImpl::foldUsingDistributiveLaws(BinaryOperator
&I
) {
996 Value
*LHS
= I
.getOperand(0), *RHS
= I
.getOperand(1);
997 BinaryOperator
*Op0
= dyn_cast
<BinaryOperator
>(LHS
);
998 BinaryOperator
*Op1
= dyn_cast
<BinaryOperator
>(RHS
);
999 Instruction::BinaryOps TopLevelOpcode
= I
.getOpcode();
1002 if (Value
*R
= tryFactorizationFolds(I
))
1006 if (Op0
&& rightDistributesOverLeft(Op0
->getOpcode(), TopLevelOpcode
)) {
1007 // The instruction has the form "(A op' B) op C". See if expanding it out
1008 // to "(A op C) op' (B op C)" results in simplifications.
1009 Value
*A
= Op0
->getOperand(0), *B
= Op0
->getOperand(1), *C
= RHS
;
1010 Instruction::BinaryOps InnerOpcode
= Op0
->getOpcode(); // op'
1012 // Disable the use of undef because it's not safe to distribute undef.
1013 auto SQDistributive
= SQ
.getWithInstruction(&I
).getWithoutUndef();
1014 Value
*L
= simplifyBinOp(TopLevelOpcode
, A
, C
, SQDistributive
);
1015 Value
*R
= simplifyBinOp(TopLevelOpcode
, B
, C
, SQDistributive
);
1017 // Do "A op C" and "B op C" both simplify?
1019 // They do! Return "L op' R".
1021 C
= Builder
.CreateBinOp(InnerOpcode
, L
, R
);
1026 // Does "A op C" simplify to the identity value for the inner opcode?
1027 if (L
&& L
== ConstantExpr::getBinOpIdentity(InnerOpcode
, L
->getType())) {
1028 // They do! Return "B op C".
1030 C
= Builder
.CreateBinOp(TopLevelOpcode
, B
, C
);
1035 // Does "B op C" simplify to the identity value for the inner opcode?
1036 if (R
&& R
== ConstantExpr::getBinOpIdentity(InnerOpcode
, R
->getType())) {
1037 // They do! Return "A op C".
1039 C
= Builder
.CreateBinOp(TopLevelOpcode
, A
, C
);
1045 if (Op1
&& leftDistributesOverRight(TopLevelOpcode
, Op1
->getOpcode())) {
1046 // The instruction has the form "A op (B op' C)". See if expanding it out
1047 // to "(A op B) op' (A op C)" results in simplifications.
1048 Value
*A
= LHS
, *B
= Op1
->getOperand(0), *C
= Op1
->getOperand(1);
1049 Instruction::BinaryOps InnerOpcode
= Op1
->getOpcode(); // op'
1051 // Disable the use of undef because it's not safe to distribute undef.
1052 auto SQDistributive
= SQ
.getWithInstruction(&I
).getWithoutUndef();
1053 Value
*L
= simplifyBinOp(TopLevelOpcode
, A
, B
, SQDistributive
);
1054 Value
*R
= simplifyBinOp(TopLevelOpcode
, A
, C
, SQDistributive
);
1056 // Do "A op B" and "A op C" both simplify?
1058 // They do! Return "L op' R".
1060 A
= Builder
.CreateBinOp(InnerOpcode
, L
, R
);
1065 // Does "A op B" simplify to the identity value for the inner opcode?
1066 if (L
&& L
== ConstantExpr::getBinOpIdentity(InnerOpcode
, L
->getType())) {
1067 // They do! Return "A op C".
1069 A
= Builder
.CreateBinOp(TopLevelOpcode
, A
, C
);
1074 // Does "A op C" simplify to the identity value for the inner opcode?
1075 if (R
&& R
== ConstantExpr::getBinOpIdentity(InnerOpcode
, R
->getType())) {
1076 // They do! Return "A op B".
1078 A
= Builder
.CreateBinOp(TopLevelOpcode
, A
, B
);
1084 return SimplifySelectsFeedingBinaryOp(I
, LHS
, RHS
);
1087 Value
*InstCombinerImpl::SimplifySelectsFeedingBinaryOp(BinaryOperator
&I
,
1090 Value
*A
, *B
, *C
, *D
, *E
, *F
;
1091 bool LHSIsSelect
= match(LHS
, m_Select(m_Value(A
), m_Value(B
), m_Value(C
)));
1092 bool RHSIsSelect
= match(RHS
, m_Select(m_Value(D
), m_Value(E
), m_Value(F
)));
1093 if (!LHSIsSelect
&& !RHSIsSelect
)
1097 BuilderTy::FastMathFlagGuard
Guard(Builder
);
1098 if (isa
<FPMathOperator
>(&I
)) {
1099 FMF
= I
.getFastMathFlags();
1100 Builder
.setFastMathFlags(FMF
);
1103 Instruction::BinaryOps Opcode
= I
.getOpcode();
1104 SimplifyQuery Q
= SQ
.getWithInstruction(&I
);
1106 Value
*Cond
, *True
= nullptr, *False
= nullptr;
1108 // Special-case for add/negate combination. Replace the zero in the negation
1109 // with the trailing add operand:
1110 // (Cond ? TVal : -N) + Z --> Cond ? True : (Z - N)
1111 // (Cond ? -N : FVal) + Z --> Cond ? (Z - N) : False
1112 auto foldAddNegate
= [&](Value
*TVal
, Value
*FVal
, Value
*Z
) -> Value
* {
1113 // We need an 'add' and exactly 1 arm of the select to have been simplified.
1114 if (Opcode
!= Instruction::Add
|| (!True
&& !False
) || (True
&& False
))
1118 if (True
&& match(FVal
, m_Neg(m_Value(N
)))) {
1119 Value
*Sub
= Builder
.CreateSub(Z
, N
);
1120 return Builder
.CreateSelect(Cond
, True
, Sub
, I
.getName());
1122 if (False
&& match(TVal
, m_Neg(m_Value(N
)))) {
1123 Value
*Sub
= Builder
.CreateSub(Z
, N
);
1124 return Builder
.CreateSelect(Cond
, Sub
, False
, I
.getName());
1129 if (LHSIsSelect
&& RHSIsSelect
&& A
== D
) {
1130 // (A ? B : C) op (A ? E : F) -> A ? (B op E) : (C op F)
1132 True
= simplifyBinOp(Opcode
, B
, E
, FMF
, Q
);
1133 False
= simplifyBinOp(Opcode
, C
, F
, FMF
, Q
);
1135 if (LHS
->hasOneUse() && RHS
->hasOneUse()) {
1137 True
= Builder
.CreateBinOp(Opcode
, B
, E
);
1138 else if (True
&& !False
)
1139 False
= Builder
.CreateBinOp(Opcode
, C
, F
);
1141 } else if (LHSIsSelect
&& LHS
->hasOneUse()) {
1142 // (A ? B : C) op Y -> A ? (B op Y) : (C op Y)
1144 True
= simplifyBinOp(Opcode
, B
, RHS
, FMF
, Q
);
1145 False
= simplifyBinOp(Opcode
, C
, RHS
, FMF
, Q
);
1146 if (Value
*NewSel
= foldAddNegate(B
, C
, RHS
))
1148 } else if (RHSIsSelect
&& RHS
->hasOneUse()) {
1149 // X op (D ? E : F) -> D ? (X op E) : (X op F)
1151 True
= simplifyBinOp(Opcode
, LHS
, E
, FMF
, Q
);
1152 False
= simplifyBinOp(Opcode
, LHS
, F
, FMF
, Q
);
1153 if (Value
*NewSel
= foldAddNegate(E
, F
, LHS
))
1157 if (!True
|| !False
)
1160 Value
*SI
= Builder
.CreateSelect(Cond
, True
, False
);
1165 /// Freely adapt every user of V as-if V was changed to !V.
1166 /// WARNING: only if canFreelyInvertAllUsersOf() said this can be done.
1167 void InstCombinerImpl::freelyInvertAllUsersOf(Value
*I
, Value
*IgnoredUser
) {
1168 assert(!isa
<Constant
>(I
) && "Shouldn't invert users of constant");
1169 for (User
*U
: make_early_inc_range(I
->users())) {
1170 if (U
== IgnoredUser
)
1171 continue; // Don't consider this user.
1172 switch (cast
<Instruction
>(U
)->getOpcode()) {
1173 case Instruction::Select
: {
1174 auto *SI
= cast
<SelectInst
>(U
);
1176 SI
->swapProfMetadata();
1179 case Instruction::Br
:
1180 cast
<BranchInst
>(U
)->swapSuccessors(); // swaps prof metadata too
1182 case Instruction::Xor
:
1183 replaceInstUsesWith(cast
<Instruction
>(*U
), I
);
1186 llvm_unreachable("Got unexpected user - out of sync with "
1187 "canFreelyInvertAllUsersOf() ?");
1192 /// Given a 'sub' instruction, return the RHS of the instruction if the LHS is a
1193 /// constant zero (which is the 'negate' form).
1194 Value
*InstCombinerImpl::dyn_castNegVal(Value
*V
) const {
1196 if (match(V
, m_Neg(m_Value(NegV
))))
1199 // Constants can be considered to be negated values if they can be folded.
1200 if (ConstantInt
*C
= dyn_cast
<ConstantInt
>(V
))
1201 return ConstantExpr::getNeg(C
);
1203 if (ConstantDataVector
*C
= dyn_cast
<ConstantDataVector
>(V
))
1204 if (C
->getType()->getElementType()->isIntegerTy())
1205 return ConstantExpr::getNeg(C
);
1207 if (ConstantVector
*CV
= dyn_cast
<ConstantVector
>(V
)) {
1208 for (unsigned i
= 0, e
= CV
->getNumOperands(); i
!= e
; ++i
) {
1209 Constant
*Elt
= CV
->getAggregateElement(i
);
1213 if (isa
<UndefValue
>(Elt
))
1216 if (!isa
<ConstantInt
>(Elt
))
1219 return ConstantExpr::getNeg(CV
);
1222 // Negate integer vector splats.
1223 if (auto *CV
= dyn_cast
<Constant
>(V
))
1224 if (CV
->getType()->isVectorTy() &&
1225 CV
->getType()->getScalarType()->isIntegerTy() && CV
->getSplatValue())
1226 return ConstantExpr::getNeg(CV
);
1231 /// A binop with a constant operand and a sign-extended boolean operand may be
1232 /// converted into a select of constants by applying the binary operation to
1233 /// the constant with the two possible values of the extended boolean (0 or -1).
1234 Instruction
*InstCombinerImpl::foldBinopOfSextBoolToSelect(BinaryOperator
&BO
) {
1235 // TODO: Handle non-commutative binop (constant is operand 0).
1236 // TODO: Handle zext.
1237 // TODO: Peek through 'not' of cast.
1238 Value
*BO0
= BO
.getOperand(0);
1239 Value
*BO1
= BO
.getOperand(1);
1242 if (!match(BO0
, m_SExt(m_Value(X
))) || !match(BO1
, m_ImmConstant(C
)) ||
1243 !X
->getType()->isIntOrIntVectorTy(1))
1246 // bo (sext i1 X), C --> select X, (bo -1, C), (bo 0, C)
1247 Constant
*Ones
= ConstantInt::getAllOnesValue(BO
.getType());
1248 Constant
*Zero
= ConstantInt::getNullValue(BO
.getType());
1249 Value
*TVal
= Builder
.CreateBinOp(BO
.getOpcode(), Ones
, C
);
1250 Value
*FVal
= Builder
.CreateBinOp(BO
.getOpcode(), Zero
, C
);
1251 return SelectInst::Create(X
, TVal
, FVal
);
1254 static Constant
*constantFoldOperationIntoSelectOperand(Instruction
&I
,
1257 SmallVector
<Constant
*> ConstOps
;
1258 for (Value
*Op
: I
.operands()) {
1259 CmpInst::Predicate Pred
;
1260 Constant
*C
= nullptr;
1262 C
= dyn_cast
<Constant
>(IsTrueArm
? SI
->getTrueValue()
1263 : SI
->getFalseValue());
1264 } else if (match(SI
->getCondition(),
1265 m_ICmp(Pred
, m_Specific(Op
), m_Constant(C
))) &&
1266 Pred
== (IsTrueArm
? ICmpInst::ICMP_EQ
: ICmpInst::ICMP_NE
) &&
1267 isGuaranteedNotToBeUndefOrPoison(C
)) {
1270 C
= dyn_cast
<Constant
>(Op
);
1275 ConstOps
.push_back(C
);
1278 return ConstantFoldInstOperands(&I
, ConstOps
, I
.getModule()->getDataLayout());
1281 static Value
*foldOperationIntoSelectOperand(Instruction
&I
, SelectInst
*SI
,
1282 Value
*NewOp
, InstCombiner
&IC
) {
1283 Instruction
*Clone
= I
.clone();
1284 Clone
->replaceUsesOfWith(SI
, NewOp
);
1285 IC
.InsertNewInstBefore(Clone
, SI
->getIterator());
1289 Instruction
*InstCombinerImpl::FoldOpIntoSelect(Instruction
&Op
, SelectInst
*SI
,
1290 bool FoldWithMultiUse
) {
1291 // Don't modify shared select instructions unless set FoldWithMultiUse
1292 if (!SI
->hasOneUse() && !FoldWithMultiUse
)
1295 Value
*TV
= SI
->getTrueValue();
1296 Value
*FV
= SI
->getFalseValue();
1297 if (!(isa
<Constant
>(TV
) || isa
<Constant
>(FV
)))
1300 // Bool selects with constant operands can be folded to logical ops.
1301 if (SI
->getType()->isIntOrIntVectorTy(1))
1304 // If it's a bitcast involving vectors, make sure it has the same number of
1305 // elements on both sides.
1306 if (auto *BC
= dyn_cast
<BitCastInst
>(&Op
)) {
1307 VectorType
*DestTy
= dyn_cast
<VectorType
>(BC
->getDestTy());
1308 VectorType
*SrcTy
= dyn_cast
<VectorType
>(BC
->getSrcTy());
1310 // Verify that either both or neither are vectors.
1311 if ((SrcTy
== nullptr) != (DestTy
== nullptr))
1314 // If vectors, verify that they have the same number of elements.
1315 if (SrcTy
&& SrcTy
->getElementCount() != DestTy
->getElementCount())
1319 // Test if a FCmpInst instruction is used exclusively by a select as
1320 // part of a minimum or maximum operation. If so, refrain from doing
1321 // any other folding. This helps out other analyses which understand
1322 // non-obfuscated minimum and maximum idioms. And in this case, at
1323 // least one of the comparison operands has at least one user besides
1324 // the compare (the select), which would often largely negate the
1325 // benefit of folding anyway.
1326 if (auto *CI
= dyn_cast
<FCmpInst
>(SI
->getCondition())) {
1327 if (CI
->hasOneUse()) {
1328 Value
*Op0
= CI
->getOperand(0), *Op1
= CI
->getOperand(1);
1329 if ((TV
== Op0
&& FV
== Op1
) || (FV
== Op0
&& TV
== Op1
))
1334 // Make sure that one of the select arms constant folds successfully.
1335 Value
*NewTV
= constantFoldOperationIntoSelectOperand(Op
, SI
, /*IsTrueArm*/ true);
1336 Value
*NewFV
= constantFoldOperationIntoSelectOperand(Op
, SI
, /*IsTrueArm*/ false);
1337 if (!NewTV
&& !NewFV
)
1340 // Create an instruction for the arm that did not fold.
1342 NewTV
= foldOperationIntoSelectOperand(Op
, SI
, TV
, *this);
1344 NewFV
= foldOperationIntoSelectOperand(Op
, SI
, FV
, *this);
1345 return SelectInst::Create(SI
->getCondition(), NewTV
, NewFV
, "", nullptr, SI
);
1348 static Value
*simplifyInstructionWithPHI(Instruction
&I
, PHINode
*PN
,
1349 Value
*InValue
, BasicBlock
*InBB
,
1350 const DataLayout
&DL
,
1351 const SimplifyQuery SQ
) {
1352 // NB: It is a precondition of this transform that the operands be
1353 // phi translatable! This is usually trivially satisfied by limiting it
1354 // to constant ops, and for selects we do a more sophisticated check.
1355 SmallVector
<Value
*> Ops
;
1356 for (Value
*Op
: I
.operands()) {
1358 Ops
.push_back(InValue
);
1360 Ops
.push_back(Op
->DoPHITranslation(PN
->getParent(), InBB
));
1363 // Don't consider the simplification successful if we get back a constant
1364 // expression. That's just an instruction in hiding.
1365 // Also reject the case where we simplify back to the phi node. We wouldn't
1366 // be able to remove it in that case.
1367 Value
*NewVal
= simplifyInstructionWithOperands(
1368 &I
, Ops
, SQ
.getWithInstruction(InBB
->getTerminator()));
1369 if (NewVal
&& NewVal
!= PN
&& !match(NewVal
, m_ConstantExpr()))
1372 // Check if incoming PHI value can be replaced with constant
1373 // based on implied condition.
1374 BranchInst
*TerminatorBI
= dyn_cast
<BranchInst
>(InBB
->getTerminator());
1375 const ICmpInst
*ICmp
= dyn_cast
<ICmpInst
>(&I
);
1376 if (TerminatorBI
&& TerminatorBI
->isConditional() &&
1377 TerminatorBI
->getSuccessor(0) != TerminatorBI
->getSuccessor(1) && ICmp
) {
1378 bool LHSIsTrue
= TerminatorBI
->getSuccessor(0) == PN
->getParent();
1379 std::optional
<bool> ImpliedCond
=
1380 isImpliedCondition(TerminatorBI
->getCondition(), ICmp
->getPredicate(),
1381 Ops
[0], Ops
[1], DL
, LHSIsTrue
);
1383 return ConstantInt::getBool(I
.getType(), ImpliedCond
.value());
1389 Instruction
*InstCombinerImpl::foldOpIntoPhi(Instruction
&I
, PHINode
*PN
) {
1390 unsigned NumPHIValues
= PN
->getNumIncomingValues();
1391 if (NumPHIValues
== 0)
1394 // We normally only transform phis with a single use. However, if a PHI has
1395 // multiple uses and they are all the same operation, we can fold *all* of the
1396 // uses into the PHI.
1397 if (!PN
->hasOneUse()) {
1398 // Walk the use list for the instruction, comparing them to I.
1399 for (User
*U
: PN
->users()) {
1400 Instruction
*UI
= cast
<Instruction
>(U
);
1401 if (UI
!= &I
&& !I
.isIdenticalTo(UI
))
1404 // Otherwise, we can replace *all* users with the new PHI we form.
1407 // Check to see whether the instruction can be folded into each phi operand.
1408 // If there is one operand that does not fold, remember the BB it is in.
1409 // If there is more than one or if *it* is a PHI, bail out.
1410 SmallVector
<Value
*> NewPhiValues
;
1411 BasicBlock
*NonSimplifiedBB
= nullptr;
1412 Value
*NonSimplifiedInVal
= nullptr;
1413 for (unsigned i
= 0; i
!= NumPHIValues
; ++i
) {
1414 Value
*InVal
= PN
->getIncomingValue(i
);
1415 BasicBlock
*InBB
= PN
->getIncomingBlock(i
);
1417 if (auto *NewVal
= simplifyInstructionWithPHI(I
, PN
, InVal
, InBB
, DL
, SQ
)) {
1418 NewPhiValues
.push_back(NewVal
);
1422 if (NonSimplifiedBB
) return nullptr; // More than one non-simplified value.
1424 NonSimplifiedBB
= InBB
;
1425 NonSimplifiedInVal
= InVal
;
1426 NewPhiValues
.push_back(nullptr);
1428 // If the InVal is an invoke at the end of the pred block, then we can't
1429 // insert a computation after it without breaking the edge.
1430 if (isa
<InvokeInst
>(InVal
))
1431 if (cast
<Instruction
>(InVal
)->getParent() == NonSimplifiedBB
)
1434 // If the incoming non-constant value is reachable from the phis block,
1435 // we'll push the operation across a loop backedge. This could result in
1436 // an infinite combine loop, and is generally non-profitable (especially
1437 // if the operation was originally outside the loop).
1438 if (isPotentiallyReachable(PN
->getParent(), NonSimplifiedBB
, nullptr, &DT
,
1443 // If there is exactly one non-simplified value, we can insert a copy of the
1444 // operation in that block. However, if this is a critical edge, we would be
1445 // inserting the computation on some other paths (e.g. inside a loop). Only
1446 // do this if the pred block is unconditionally branching into the phi block.
1447 // Also, make sure that the pred block is not dead code.
1448 if (NonSimplifiedBB
!= nullptr) {
1449 BranchInst
*BI
= dyn_cast
<BranchInst
>(NonSimplifiedBB
->getTerminator());
1450 if (!BI
|| !BI
->isUnconditional() ||
1451 !DT
.isReachableFromEntry(NonSimplifiedBB
))
1455 // Okay, we can do the transformation: create the new PHI node.
1456 PHINode
*NewPN
= PHINode::Create(I
.getType(), PN
->getNumIncomingValues());
1457 InsertNewInstBefore(NewPN
, PN
->getIterator());
1458 NewPN
->takeName(PN
);
1459 NewPN
->setDebugLoc(PN
->getDebugLoc());
1461 // If we are going to have to insert a new computation, do so right before the
1462 // predecessor's terminator.
1463 Instruction
*Clone
= nullptr;
1464 if (NonSimplifiedBB
) {
1466 for (Use
&U
: Clone
->operands()) {
1468 U
= NonSimplifiedInVal
;
1470 U
= U
->DoPHITranslation(PN
->getParent(), NonSimplifiedBB
);
1472 InsertNewInstBefore(Clone
, NonSimplifiedBB
->getTerminator()->getIterator());
1475 for (unsigned i
= 0; i
!= NumPHIValues
; ++i
) {
1476 if (NewPhiValues
[i
])
1477 NewPN
->addIncoming(NewPhiValues
[i
], PN
->getIncomingBlock(i
));
1479 NewPN
->addIncoming(Clone
, PN
->getIncomingBlock(i
));
1482 for (User
*U
: make_early_inc_range(PN
->users())) {
1483 Instruction
*User
= cast
<Instruction
>(U
);
1484 if (User
== &I
) continue;
1485 replaceInstUsesWith(*User
, NewPN
);
1486 eraseInstFromFunction(*User
);
1489 replaceAllDbgUsesWith(const_cast<PHINode
&>(*PN
),
1490 const_cast<PHINode
&>(*NewPN
),
1491 const_cast<PHINode
&>(*PN
), DT
);
1492 return replaceInstUsesWith(I
, NewPN
);
1495 Instruction
*InstCombinerImpl::foldBinopWithPhiOperands(BinaryOperator
&BO
) {
1496 // TODO: This should be similar to the incoming values check in foldOpIntoPhi:
1497 // we are guarding against replicating the binop in >1 predecessor.
1498 // This could miss matching a phi with 2 constant incoming values.
1499 auto *Phi0
= dyn_cast
<PHINode
>(BO
.getOperand(0));
1500 auto *Phi1
= dyn_cast
<PHINode
>(BO
.getOperand(1));
1501 if (!Phi0
|| !Phi1
|| !Phi0
->hasOneUse() || !Phi1
->hasOneUse() ||
1502 Phi0
->getNumOperands() != Phi1
->getNumOperands())
1505 // TODO: Remove the restriction for binop being in the same block as the phis.
1506 if (BO
.getParent() != Phi0
->getParent() ||
1507 BO
.getParent() != Phi1
->getParent())
1510 // Fold if there is at least one specific constant value in phi0 or phi1's
1511 // incoming values that comes from the same block and this specific constant
1512 // value can be used to do optimization for specific binary operator.
1514 // %phi0 = phi i32 [0, %bb0], [%i, %bb1]
1515 // %phi1 = phi i32 [%j, %bb0], [0, %bb1]
1516 // %add = add i32 %phi0, %phi1
1518 // %add = phi i32 [%j, %bb0], [%i, %bb1]
1519 Constant
*C
= ConstantExpr::getBinOpIdentity(BO
.getOpcode(), BO
.getType(),
1520 /*AllowRHSConstant*/ false);
1522 SmallVector
<Value
*, 4> NewIncomingValues
;
1523 auto CanFoldIncomingValuePair
= [&](std::tuple
<Use
&, Use
&> T
) {
1524 auto &Phi0Use
= std::get
<0>(T
);
1525 auto &Phi1Use
= std::get
<1>(T
);
1526 if (Phi0
->getIncomingBlock(Phi0Use
) != Phi1
->getIncomingBlock(Phi1Use
))
1528 Value
*Phi0UseV
= Phi0Use
.get();
1529 Value
*Phi1UseV
= Phi1Use
.get();
1531 NewIncomingValues
.push_back(Phi1UseV
);
1532 else if (Phi1UseV
== C
)
1533 NewIncomingValues
.push_back(Phi0UseV
);
1539 if (all_of(zip(Phi0
->operands(), Phi1
->operands()),
1540 CanFoldIncomingValuePair
)) {
1542 PHINode::Create(Phi0
->getType(), Phi0
->getNumOperands());
1543 assert(NewIncomingValues
.size() == Phi0
->getNumOperands() &&
1544 "The number of collected incoming values should equal the number "
1545 "of the original PHINode operands!");
1546 for (unsigned I
= 0; I
< Phi0
->getNumOperands(); I
++)
1547 NewPhi
->addIncoming(NewIncomingValues
[I
], Phi0
->getIncomingBlock(I
));
1552 if (Phi0
->getNumOperands() != 2 || Phi1
->getNumOperands() != 2)
1555 // Match a pair of incoming constants for one of the predecessor blocks.
1556 BasicBlock
*ConstBB
, *OtherBB
;
1558 if (match(Phi0
->getIncomingValue(0), m_ImmConstant(C0
))) {
1559 ConstBB
= Phi0
->getIncomingBlock(0);
1560 OtherBB
= Phi0
->getIncomingBlock(1);
1561 } else if (match(Phi0
->getIncomingValue(1), m_ImmConstant(C0
))) {
1562 ConstBB
= Phi0
->getIncomingBlock(1);
1563 OtherBB
= Phi0
->getIncomingBlock(0);
1567 if (!match(Phi1
->getIncomingValueForBlock(ConstBB
), m_ImmConstant(C1
)))
1570 // The block that we are hoisting to must reach here unconditionally.
1571 // Otherwise, we could be speculatively executing an expensive or
1572 // non-speculative op.
1573 auto *PredBlockBranch
= dyn_cast
<BranchInst
>(OtherBB
->getTerminator());
1574 if (!PredBlockBranch
|| PredBlockBranch
->isConditional() ||
1575 !DT
.isReachableFromEntry(OtherBB
))
1578 // TODO: This check could be tightened to only apply to binops (div/rem) that
1579 // are not safe to speculatively execute. But that could allow hoisting
1580 // potentially expensive instructions (fdiv for example).
1581 for (auto BBIter
= BO
.getParent()->begin(); &*BBIter
!= &BO
; ++BBIter
)
1582 if (!isGuaranteedToTransferExecutionToSuccessor(&*BBIter
))
1585 // Fold constants for the predecessor block with constant incoming values.
1586 Constant
*NewC
= ConstantFoldBinaryOpOperands(BO
.getOpcode(), C0
, C1
, DL
);
1590 // Make a new binop in the predecessor block with the non-constant incoming
1592 Builder
.SetInsertPoint(PredBlockBranch
);
1593 Value
*NewBO
= Builder
.CreateBinOp(BO
.getOpcode(),
1594 Phi0
->getIncomingValueForBlock(OtherBB
),
1595 Phi1
->getIncomingValueForBlock(OtherBB
));
1596 if (auto *NotFoldedNewBO
= dyn_cast
<BinaryOperator
>(NewBO
))
1597 NotFoldedNewBO
->copyIRFlags(&BO
);
1599 // Replace the binop with a phi of the new values. The old phis are dead.
1600 PHINode
*NewPhi
= PHINode::Create(BO
.getType(), 2);
1601 NewPhi
->addIncoming(NewBO
, OtherBB
);
1602 NewPhi
->addIncoming(NewC
, ConstBB
);
1606 Instruction
*InstCombinerImpl::foldBinOpIntoSelectOrPhi(BinaryOperator
&I
) {
1607 if (!isa
<Constant
>(I
.getOperand(1)))
1610 if (auto *Sel
= dyn_cast
<SelectInst
>(I
.getOperand(0))) {
1611 if (Instruction
*NewSel
= FoldOpIntoSelect(I
, Sel
))
1613 } else if (auto *PN
= dyn_cast
<PHINode
>(I
.getOperand(0))) {
1614 if (Instruction
*NewPhi
= foldOpIntoPhi(I
, PN
))
1620 static bool shouldMergeGEPs(GEPOperator
&GEP
, GEPOperator
&Src
) {
1621 // If this GEP has only 0 indices, it is the same pointer as
1622 // Src. If Src is not a trivial GEP too, don't combine
1624 if (GEP
.hasAllZeroIndices() && !Src
.hasAllZeroIndices() &&
1630 Instruction
*InstCombinerImpl::foldVectorBinop(BinaryOperator
&Inst
) {
1631 if (!isa
<VectorType
>(Inst
.getType()))
1634 BinaryOperator::BinaryOps Opcode
= Inst
.getOpcode();
1635 Value
*LHS
= Inst
.getOperand(0), *RHS
= Inst
.getOperand(1);
1636 assert(cast
<VectorType
>(LHS
->getType())->getElementCount() ==
1637 cast
<VectorType
>(Inst
.getType())->getElementCount());
1638 assert(cast
<VectorType
>(RHS
->getType())->getElementCount() ==
1639 cast
<VectorType
>(Inst
.getType())->getElementCount());
1641 // If both operands of the binop are vector concatenations, then perform the
1642 // narrow binop on each pair of the source operands followed by concatenation
1644 Value
*L0
, *L1
, *R0
, *R1
;
1646 if (match(LHS
, m_Shuffle(m_Value(L0
), m_Value(L1
), m_Mask(Mask
))) &&
1647 match(RHS
, m_Shuffle(m_Value(R0
), m_Value(R1
), m_SpecificMask(Mask
))) &&
1648 LHS
->hasOneUse() && RHS
->hasOneUse() &&
1649 cast
<ShuffleVectorInst
>(LHS
)->isConcat() &&
1650 cast
<ShuffleVectorInst
>(RHS
)->isConcat()) {
1651 // This transform does not have the speculative execution constraint as
1652 // below because the shuffle is a concatenation. The new binops are
1653 // operating on exactly the same elements as the existing binop.
1654 // TODO: We could ease the mask requirement to allow different undef lanes,
1655 // but that requires an analysis of the binop-with-undef output value.
1656 Value
*NewBO0
= Builder
.CreateBinOp(Opcode
, L0
, R0
);
1657 if (auto *BO
= dyn_cast
<BinaryOperator
>(NewBO0
))
1658 BO
->copyIRFlags(&Inst
);
1659 Value
*NewBO1
= Builder
.CreateBinOp(Opcode
, L1
, R1
);
1660 if (auto *BO
= dyn_cast
<BinaryOperator
>(NewBO1
))
1661 BO
->copyIRFlags(&Inst
);
1662 return new ShuffleVectorInst(NewBO0
, NewBO1
, Mask
);
1665 auto createBinOpReverse
= [&](Value
*X
, Value
*Y
) {
1666 Value
*V
= Builder
.CreateBinOp(Opcode
, X
, Y
, Inst
.getName());
1667 if (auto *BO
= dyn_cast
<BinaryOperator
>(V
))
1668 BO
->copyIRFlags(&Inst
);
1669 Module
*M
= Inst
.getModule();
1670 Function
*F
= Intrinsic::getDeclaration(
1671 M
, Intrinsic::experimental_vector_reverse
, V
->getType());
1672 return CallInst::Create(F
, V
);
1675 // NOTE: Reverse shuffles don't require the speculative execution protection
1676 // below because they don't affect which lanes take part in the computation.
1679 if (match(LHS
, m_VecReverse(m_Value(V1
)))) {
1680 // Op(rev(V1), rev(V2)) -> rev(Op(V1, V2))
1681 if (match(RHS
, m_VecReverse(m_Value(V2
))) &&
1682 (LHS
->hasOneUse() || RHS
->hasOneUse() ||
1683 (LHS
== RHS
&& LHS
->hasNUses(2))))
1684 return createBinOpReverse(V1
, V2
);
1686 // Op(rev(V1), RHSSplat)) -> rev(Op(V1, RHSSplat))
1687 if (LHS
->hasOneUse() && isSplatValue(RHS
))
1688 return createBinOpReverse(V1
, RHS
);
1690 // Op(LHSSplat, rev(V2)) -> rev(Op(LHSSplat, V2))
1691 else if (isSplatValue(LHS
) && match(RHS
, m_OneUse(m_VecReverse(m_Value(V2
)))))
1692 return createBinOpReverse(LHS
, V2
);
1694 // It may not be safe to reorder shuffles and things like div, urem, etc.
1695 // because we may trap when executing those ops on unknown vector elements.
1697 if (!isSafeToSpeculativelyExecute(&Inst
))
1700 auto createBinOpShuffle
= [&](Value
*X
, Value
*Y
, ArrayRef
<int> M
) {
1701 Value
*XY
= Builder
.CreateBinOp(Opcode
, X
, Y
);
1702 if (auto *BO
= dyn_cast
<BinaryOperator
>(XY
))
1703 BO
->copyIRFlags(&Inst
);
1704 return new ShuffleVectorInst(XY
, M
);
1707 // If both arguments of the binary operation are shuffles that use the same
1708 // mask and shuffle within a single vector, move the shuffle after the binop.
1709 if (match(LHS
, m_Shuffle(m_Value(V1
), m_Undef(), m_Mask(Mask
))) &&
1710 match(RHS
, m_Shuffle(m_Value(V2
), m_Undef(), m_SpecificMask(Mask
))) &&
1711 V1
->getType() == V2
->getType() &&
1712 (LHS
->hasOneUse() || RHS
->hasOneUse() || LHS
== RHS
)) {
1713 // Op(shuffle(V1, Mask), shuffle(V2, Mask)) -> shuffle(Op(V1, V2), Mask)
1714 return createBinOpShuffle(V1
, V2
, Mask
);
1717 // If both arguments of a commutative binop are select-shuffles that use the
1718 // same mask with commuted operands, the shuffles are unnecessary.
1719 if (Inst
.isCommutative() &&
1720 match(LHS
, m_Shuffle(m_Value(V1
), m_Value(V2
), m_Mask(Mask
))) &&
1722 m_Shuffle(m_Specific(V2
), m_Specific(V1
), m_SpecificMask(Mask
)))) {
1723 auto *LShuf
= cast
<ShuffleVectorInst
>(LHS
);
1724 auto *RShuf
= cast
<ShuffleVectorInst
>(RHS
);
1725 // TODO: Allow shuffles that contain undefs in the mask?
1726 // That is legal, but it reduces undef knowledge.
1727 // TODO: Allow arbitrary shuffles by shuffling after binop?
1728 // That might be legal, but we have to deal with poison.
1729 if (LShuf
->isSelect() &&
1730 !is_contained(LShuf
->getShuffleMask(), PoisonMaskElem
) &&
1731 RShuf
->isSelect() &&
1732 !is_contained(RShuf
->getShuffleMask(), PoisonMaskElem
)) {
1734 // LHS = shuffle V1, V2, <0, 5, 6, 3>
1735 // RHS = shuffle V2, V1, <0, 5, 6, 3>
1736 // LHS + RHS --> (V10+V20, V21+V11, V22+V12, V13+V23) --> V1 + V2
1737 Instruction
*NewBO
= BinaryOperator::Create(Opcode
, V1
, V2
);
1738 NewBO
->copyIRFlags(&Inst
);
1743 // If one argument is a shuffle within one vector and the other is a constant,
1744 // try moving the shuffle after the binary operation. This canonicalization
1745 // intends to move shuffles closer to other shuffles and binops closer to
1746 // other binops, so they can be folded. It may also enable demanded elements
1749 auto *InstVTy
= dyn_cast
<FixedVectorType
>(Inst
.getType());
1752 m_c_BinOp(m_OneUse(m_Shuffle(m_Value(V1
), m_Undef(), m_Mask(Mask
))),
1753 m_ImmConstant(C
))) &&
1754 cast
<FixedVectorType
>(V1
->getType())->getNumElements() <=
1755 InstVTy
->getNumElements()) {
1756 assert(InstVTy
->getScalarType() == V1
->getType()->getScalarType() &&
1757 "Shuffle should not change scalar type");
1759 // Find constant NewC that has property:
1760 // shuffle(NewC, ShMask) = C
1761 // If such constant does not exist (example: ShMask=<0,0> and C=<1,2>)
1762 // reorder is not possible. A 1-to-1 mapping is not required. Example:
1763 // ShMask = <1,1,2,2> and C = <5,5,6,6> --> NewC = <undef,5,6,undef>
1764 bool ConstOp1
= isa
<Constant
>(RHS
);
1765 ArrayRef
<int> ShMask
= Mask
;
1766 unsigned SrcVecNumElts
=
1767 cast
<FixedVectorType
>(V1
->getType())->getNumElements();
1768 UndefValue
*UndefScalar
= UndefValue::get(C
->getType()->getScalarType());
1769 SmallVector
<Constant
*, 16> NewVecC(SrcVecNumElts
, UndefScalar
);
1770 bool MayChange
= true;
1771 unsigned NumElts
= InstVTy
->getNumElements();
1772 for (unsigned I
= 0; I
< NumElts
; ++I
) {
1773 Constant
*CElt
= C
->getAggregateElement(I
);
1774 if (ShMask
[I
] >= 0) {
1775 assert(ShMask
[I
] < (int)NumElts
&& "Not expecting narrowing shuffle");
1776 Constant
*NewCElt
= NewVecC
[ShMask
[I
]];
1778 // 1. The constant vector contains a constant expression.
1779 // 2. The shuffle needs an element of the constant vector that can't
1780 // be mapped to a new constant vector.
1781 // 3. This is a widening shuffle that copies elements of V1 into the
1782 // extended elements (extending with undef is allowed).
1783 if (!CElt
|| (!isa
<UndefValue
>(NewCElt
) && NewCElt
!= CElt
) ||
1784 I
>= SrcVecNumElts
) {
1788 NewVecC
[ShMask
[I
]] = CElt
;
1790 // If this is a widening shuffle, we must be able to extend with undef
1791 // elements. If the original binop does not produce an undef in the high
1792 // lanes, then this transform is not safe.
1793 // Similarly for undef lanes due to the shuffle mask, we can only
1794 // transform binops that preserve undef.
1795 // TODO: We could shuffle those non-undef constant values into the
1796 // result by using a constant vector (rather than an undef vector)
1797 // as operand 1 of the new binop, but that might be too aggressive
1798 // for target-independent shuffle creation.
1799 if (I
>= SrcVecNumElts
|| ShMask
[I
] < 0) {
1800 Constant
*MaybeUndef
=
1802 ? ConstantFoldBinaryOpOperands(Opcode
, UndefScalar
, CElt
, DL
)
1803 : ConstantFoldBinaryOpOperands(Opcode
, CElt
, UndefScalar
, DL
);
1804 if (!MaybeUndef
|| !match(MaybeUndef
, m_Undef())) {
1811 Constant
*NewC
= ConstantVector::get(NewVecC
);
1812 // It may not be safe to execute a binop on a vector with undef elements
1813 // because the entire instruction can be folded to undef or create poison
1814 // that did not exist in the original code.
1815 if (Inst
.isIntDivRem() || (Inst
.isShift() && ConstOp1
))
1816 NewC
= getSafeVectorConstantForBinop(Opcode
, NewC
, ConstOp1
);
1818 // Op(shuffle(V1, Mask), C) -> shuffle(Op(V1, NewC), Mask)
1819 // Op(C, shuffle(V1, Mask)) -> shuffle(Op(NewC, V1), Mask)
1820 Value
*NewLHS
= ConstOp1
? V1
: NewC
;
1821 Value
*NewRHS
= ConstOp1
? NewC
: V1
;
1822 return createBinOpShuffle(NewLHS
, NewRHS
, Mask
);
1826 // Try to reassociate to sink a splat shuffle after a binary operation.
1827 if (Inst
.isAssociative() && Inst
.isCommutative()) {
1828 // Canonicalize shuffle operand as LHS.
1829 if (isa
<ShuffleVectorInst
>(RHS
))
1830 std::swap(LHS
, RHS
);
1833 ArrayRef
<int> MaskC
;
1837 m_OneUse(m_Shuffle(m_Value(X
), m_Undef(), m_Mask(MaskC
)))) ||
1838 !match(MaskC
, m_SplatOrUndefMask(SplatIndex
)) ||
1839 X
->getType() != Inst
.getType() ||
1840 !match(RHS
, m_OneUse(m_BinOp(Opcode
, m_Value(Y
), m_Value(OtherOp
)))))
1843 // FIXME: This may not be safe if the analysis allows undef elements. By
1844 // moving 'Y' before the splat shuffle, we are implicitly assuming
1845 // that it is not undef/poison at the splat index.
1846 if (isSplatValue(OtherOp
, SplatIndex
)) {
1847 std::swap(Y
, OtherOp
);
1848 } else if (!isSplatValue(Y
, SplatIndex
)) {
1852 // X and Y are splatted values, so perform the binary operation on those
1853 // values followed by a splat followed by the 2nd binary operation:
1854 // bo (splat X), (bo Y, OtherOp) --> bo (splat (bo X, Y)), OtherOp
1855 Value
*NewBO
= Builder
.CreateBinOp(Opcode
, X
, Y
);
1856 SmallVector
<int, 8> NewMask(MaskC
.size(), SplatIndex
);
1857 Value
*NewSplat
= Builder
.CreateShuffleVector(NewBO
, NewMask
);
1858 Instruction
*R
= BinaryOperator::Create(Opcode
, NewSplat
, OtherOp
);
1860 // Intersect FMF on both new binops. Other (poison-generating) flags are
1861 // dropped to be safe.
1862 if (isa
<FPMathOperator
>(R
)) {
1863 R
->copyFastMathFlags(&Inst
);
1866 if (auto *NewInstBO
= dyn_cast
<BinaryOperator
>(NewBO
))
1867 NewInstBO
->copyIRFlags(R
);
1874 /// Try to narrow the width of a binop if at least 1 operand is an extend of
1875 /// of a value. This requires a potentially expensive known bits check to make
1876 /// sure the narrow op does not overflow.
1877 Instruction
*InstCombinerImpl::narrowMathIfNoOverflow(BinaryOperator
&BO
) {
1878 // We need at least one extended operand.
1879 Value
*Op0
= BO
.getOperand(0), *Op1
= BO
.getOperand(1);
1881 // If this is a sub, we swap the operands since we always want an extension
1882 // on the RHS. The LHS can be an extension or a constant.
1883 if (BO
.getOpcode() == Instruction::Sub
)
1884 std::swap(Op0
, Op1
);
1887 bool IsSext
= match(Op0
, m_SExt(m_Value(X
)));
1888 if (!IsSext
&& !match(Op0
, m_ZExt(m_Value(X
))))
1891 // If both operands are the same extension from the same source type and we
1892 // can eliminate at least one (hasOneUse), this might work.
1893 CastInst::CastOps CastOpc
= IsSext
? Instruction::SExt
: Instruction::ZExt
;
1895 if (!(match(Op1
, m_ZExtOrSExt(m_Value(Y
))) && X
->getType() == Y
->getType() &&
1896 cast
<Operator
>(Op1
)->getOpcode() == CastOpc
&&
1897 (Op0
->hasOneUse() || Op1
->hasOneUse()))) {
1898 // If that did not match, see if we have a suitable constant operand.
1899 // Truncating and extending must produce the same constant.
1901 if (!Op0
->hasOneUse() || !match(Op1
, m_Constant(WideC
)))
1903 Constant
*NarrowC
= getLosslessTrunc(WideC
, X
->getType(), CastOpc
);
1909 // Swap back now that we found our operands.
1910 if (BO
.getOpcode() == Instruction::Sub
)
1913 // Both operands have narrow versions. Last step: the math must not overflow
1914 // in the narrow width.
1915 if (!willNotOverflow(BO
.getOpcode(), X
, Y
, BO
, IsSext
))
1918 // bo (ext X), (ext Y) --> ext (bo X, Y)
1919 // bo (ext X), C --> ext (bo X, C')
1920 Value
*NarrowBO
= Builder
.CreateBinOp(BO
.getOpcode(), X
, Y
, "narrow");
1921 if (auto *NewBinOp
= dyn_cast
<BinaryOperator
>(NarrowBO
)) {
1923 NewBinOp
->setHasNoSignedWrap();
1925 NewBinOp
->setHasNoUnsignedWrap();
1927 return CastInst::Create(CastOpc
, NarrowBO
, BO
.getType());
1930 static bool isMergedGEPInBounds(GEPOperator
&GEP1
, GEPOperator
&GEP2
) {
1931 // At least one GEP must be inbounds.
1932 if (!GEP1
.isInBounds() && !GEP2
.isInBounds())
1935 return (GEP1
.isInBounds() || GEP1
.hasAllZeroIndices()) &&
1936 (GEP2
.isInBounds() || GEP2
.hasAllZeroIndices());
1939 /// Thread a GEP operation with constant indices through the constant true/false
1940 /// arms of a select.
1941 static Instruction
*foldSelectGEP(GetElementPtrInst
&GEP
,
1942 InstCombiner::BuilderTy
&Builder
) {
1943 if (!GEP
.hasAllConstantIndices())
1948 Constant
*TrueC
, *FalseC
;
1949 if (!match(GEP
.getPointerOperand(), m_Instruction(Sel
)) ||
1951 m_Select(m_Value(Cond
), m_Constant(TrueC
), m_Constant(FalseC
))))
1954 // gep (select Cond, TrueC, FalseC), IndexC --> select Cond, TrueC', FalseC'
1955 // Propagate 'inbounds' and metadata from existing instructions.
1956 // Note: using IRBuilder to create the constants for efficiency.
1957 SmallVector
<Value
*, 4> IndexC(GEP
.indices());
1958 bool IsInBounds
= GEP
.isInBounds();
1959 Type
*Ty
= GEP
.getSourceElementType();
1960 Value
*NewTrueC
= Builder
.CreateGEP(Ty
, TrueC
, IndexC
, "", IsInBounds
);
1961 Value
*NewFalseC
= Builder
.CreateGEP(Ty
, FalseC
, IndexC
, "", IsInBounds
);
1962 return SelectInst::Create(Cond
, NewTrueC
, NewFalseC
, "", nullptr, Sel
);
1965 Instruction
*InstCombinerImpl::visitGEPOfGEP(GetElementPtrInst
&GEP
,
1967 // Combine Indices - If the source pointer to this getelementptr instruction
1968 // is a getelementptr instruction with matching element type, combine the
1969 // indices of the two getelementptr instructions into a single instruction.
1970 if (!shouldMergeGEPs(*cast
<GEPOperator
>(&GEP
), *Src
))
1973 // For constant GEPs, use a more general offset-based folding approach.
1974 Type
*PtrTy
= Src
->getType()->getScalarType();
1975 if (GEP
.hasAllConstantIndices() &&
1976 (Src
->hasOneUse() || Src
->hasAllConstantIndices())) {
1977 // Split Src into a variable part and a constant suffix.
1978 gep_type_iterator GTI
= gep_type_begin(*Src
);
1979 Type
*BaseType
= GTI
.getIndexedType();
1980 bool IsFirstType
= true;
1981 unsigned NumVarIndices
= 0;
1982 for (auto Pair
: enumerate(Src
->indices())) {
1983 if (!isa
<ConstantInt
>(Pair
.value())) {
1984 BaseType
= GTI
.getIndexedType();
1985 IsFirstType
= false;
1986 NumVarIndices
= Pair
.index() + 1;
1991 // Determine the offset for the constant suffix of Src.
1992 APInt
Offset(DL
.getIndexTypeSizeInBits(PtrTy
), 0);
1993 if (NumVarIndices
!= Src
->getNumIndices()) {
1994 // FIXME: getIndexedOffsetInType() does not handled scalable vectors.
1995 if (BaseType
->isScalableTy())
1998 SmallVector
<Value
*> ConstantIndices
;
2000 ConstantIndices
.push_back(
2001 Constant::getNullValue(Type::getInt32Ty(GEP
.getContext())));
2002 append_range(ConstantIndices
, drop_begin(Src
->indices(), NumVarIndices
));
2003 Offset
+= DL
.getIndexedOffsetInType(BaseType
, ConstantIndices
);
2006 // Add the offset for GEP (which is fully constant).
2007 if (!GEP
.accumulateConstantOffset(DL
, Offset
))
2010 APInt OffsetOld
= Offset
;
2011 // Convert the total offset back into indices.
2012 SmallVector
<APInt
> ConstIndices
=
2013 DL
.getGEPIndicesForOffset(BaseType
, Offset
);
2014 if (!Offset
.isZero() || (!IsFirstType
&& !ConstIndices
[0].isZero())) {
2015 // If both GEP are constant-indexed, and cannot be merged in either way,
2016 // convert them to a GEP of i8.
2017 if (Src
->hasAllConstantIndices())
2018 return replaceInstUsesWith(
2019 GEP
, Builder
.CreateGEP(
2020 Builder
.getInt8Ty(), Src
->getOperand(0),
2021 Builder
.getInt(OffsetOld
), "",
2022 isMergedGEPInBounds(*Src
, *cast
<GEPOperator
>(&GEP
))));
2026 bool IsInBounds
= isMergedGEPInBounds(*Src
, *cast
<GEPOperator
>(&GEP
));
2027 SmallVector
<Value
*> Indices
;
2028 append_range(Indices
, drop_end(Src
->indices(),
2029 Src
->getNumIndices() - NumVarIndices
));
2030 for (const APInt
&Idx
: drop_begin(ConstIndices
, !IsFirstType
)) {
2031 Indices
.push_back(ConstantInt::get(GEP
.getContext(), Idx
));
2032 // Even if the total offset is inbounds, we may end up representing it
2033 // by first performing a larger negative offset, and then a smaller
2034 // positive one. The large negative offset might go out of bounds. Only
2035 // preserve inbounds if all signs are the same.
2036 IsInBounds
&= Idx
.isNonNegative() == ConstIndices
[0].isNonNegative();
2039 return replaceInstUsesWith(
2040 GEP
, Builder
.CreateGEP(Src
->getSourceElementType(), Src
->getOperand(0),
2041 Indices
, "", IsInBounds
));
2044 if (Src
->getResultElementType() != GEP
.getSourceElementType())
2047 SmallVector
<Value
*, 8> Indices
;
2049 // Find out whether the last index in the source GEP is a sequential idx.
2050 bool EndsWithSequential
= false;
2051 for (gep_type_iterator I
= gep_type_begin(*Src
), E
= gep_type_end(*Src
);
2053 EndsWithSequential
= I
.isSequential();
2055 // Can we combine the two pointer arithmetics offsets?
2056 if (EndsWithSequential
) {
2057 // Replace: gep (gep %P, long B), long A, ...
2058 // With: T = long A+B; gep %P, T, ...
2059 Value
*SO1
= Src
->getOperand(Src
->getNumOperands()-1);
2060 Value
*GO1
= GEP
.getOperand(1);
2062 // If they aren't the same type, then the input hasn't been processed
2063 // by the loop above yet (which canonicalizes sequential index types to
2064 // intptr_t). Just avoid transforming this until the input has been
2066 if (SO1
->getType() != GO1
->getType())
2070 simplifyAddInst(GO1
, SO1
, false, false, SQ
.getWithInstruction(&GEP
));
2071 // Only do the combine when we are sure the cost after the
2072 // merge is never more than that before the merge.
2076 // Update the GEP in place if possible.
2077 if (Src
->getNumOperands() == 2) {
2078 GEP
.setIsInBounds(isMergedGEPInBounds(*Src
, *cast
<GEPOperator
>(&GEP
)));
2079 replaceOperand(GEP
, 0, Src
->getOperand(0));
2080 replaceOperand(GEP
, 1, Sum
);
2083 Indices
.append(Src
->op_begin()+1, Src
->op_end()-1);
2084 Indices
.push_back(Sum
);
2085 Indices
.append(GEP
.op_begin()+2, GEP
.op_end());
2086 } else if (isa
<Constant
>(*GEP
.idx_begin()) &&
2087 cast
<Constant
>(*GEP
.idx_begin())->isNullValue() &&
2088 Src
->getNumOperands() != 1) {
2089 // Otherwise we can do the fold if the first index of the GEP is a zero
2090 Indices
.append(Src
->op_begin()+1, Src
->op_end());
2091 Indices
.append(GEP
.idx_begin()+1, GEP
.idx_end());
2094 if (!Indices
.empty())
2095 return replaceInstUsesWith(
2096 GEP
, Builder
.CreateGEP(
2097 Src
->getSourceElementType(), Src
->getOperand(0), Indices
, "",
2098 isMergedGEPInBounds(*Src
, *cast
<GEPOperator
>(&GEP
))));
2103 Instruction
*InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst
&GEP
) {
2104 Value
*PtrOp
= GEP
.getOperand(0);
2105 SmallVector
<Value
*, 8> Indices(GEP
.indices());
2106 Type
*GEPType
= GEP
.getType();
2107 Type
*GEPEltType
= GEP
.getSourceElementType();
2108 bool IsGEPSrcEleScalable
= GEPEltType
->isScalableTy();
2109 if (Value
*V
= simplifyGEPInst(GEPEltType
, PtrOp
, Indices
, GEP
.isInBounds(),
2110 SQ
.getWithInstruction(&GEP
)))
2111 return replaceInstUsesWith(GEP
, V
);
2113 // For vector geps, use the generic demanded vector support.
2114 // Skip if GEP return type is scalable. The number of elements is unknown at
2116 if (auto *GEPFVTy
= dyn_cast
<FixedVectorType
>(GEPType
)) {
2117 auto VWidth
= GEPFVTy
->getNumElements();
2118 APInt
UndefElts(VWidth
, 0);
2119 APInt
AllOnesEltMask(APInt::getAllOnes(VWidth
));
2120 if (Value
*V
= SimplifyDemandedVectorElts(&GEP
, AllOnesEltMask
,
2123 return replaceInstUsesWith(GEP
, V
);
2127 // TODO: 1) Scalarize splat operands, 2) scalarize entire instruction if
2128 // possible (decide on canonical form for pointer broadcast), 3) exploit
2129 // undef elements to decrease demanded bits
2132 // Eliminate unneeded casts for indices, and replace indices which displace
2133 // by multiples of a zero size type with zero.
2134 bool MadeChange
= false;
2136 // Index width may not be the same width as pointer width.
2137 // Data layout chooses the right type based on supported integer types.
2138 Type
*NewScalarIndexTy
=
2139 DL
.getIndexType(GEP
.getPointerOperandType()->getScalarType());
2141 gep_type_iterator GTI
= gep_type_begin(GEP
);
2142 for (User::op_iterator I
= GEP
.op_begin() + 1, E
= GEP
.op_end(); I
!= E
;
2144 // Skip indices into struct types.
2148 Type
*IndexTy
= (*I
)->getType();
2149 Type
*NewIndexType
=
2150 IndexTy
->isVectorTy()
2151 ? VectorType::get(NewScalarIndexTy
,
2152 cast
<VectorType
>(IndexTy
)->getElementCount())
2155 // If the element type has zero size then any index over it is equivalent
2156 // to an index of zero, so replace it with zero if it is not zero already.
2157 Type
*EltTy
= GTI
.getIndexedType();
2158 if (EltTy
->isSized() && DL
.getTypeAllocSize(EltTy
).isZero())
2159 if (!isa
<Constant
>(*I
) || !match(I
->get(), m_Zero())) {
2160 *I
= Constant::getNullValue(NewIndexType
);
2164 if (IndexTy
!= NewIndexType
) {
2165 // If we are using a wider index than needed for this platform, shrink
2166 // it to what we need. If narrower, sign-extend it to what we need.
2167 // This explicit cast can make subsequent optimizations more obvious.
2168 *I
= Builder
.CreateIntCast(*I
, NewIndexType
, true);
2175 // Check to see if the inputs to the PHI node are getelementptr instructions.
2176 if (auto *PN
= dyn_cast
<PHINode
>(PtrOp
)) {
2177 auto *Op1
= dyn_cast
<GetElementPtrInst
>(PN
->getOperand(0));
2181 // Don't fold a GEP into itself through a PHI node. This can only happen
2182 // through the back-edge of a loop. Folding a GEP into itself means that
2183 // the value of the previous iteration needs to be stored in the meantime,
2184 // thus requiring an additional register variable to be live, but not
2185 // actually achieving anything (the GEP still needs to be executed once per
2192 for (auto I
= PN
->op_begin()+1, E
= PN
->op_end(); I
!=E
; ++I
) {
2193 auto *Op2
= dyn_cast
<GetElementPtrInst
>(*I
);
2194 if (!Op2
|| Op1
->getNumOperands() != Op2
->getNumOperands() ||
2195 Op1
->getSourceElementType() != Op2
->getSourceElementType())
2198 // As for Op1 above, don't try to fold a GEP into itself.
2202 // Keep track of the type as we walk the GEP.
2203 Type
*CurTy
= nullptr;
2205 for (unsigned J
= 0, F
= Op1
->getNumOperands(); J
!= F
; ++J
) {
2206 if (Op1
->getOperand(J
)->getType() != Op2
->getOperand(J
)->getType())
2209 if (Op1
->getOperand(J
) != Op2
->getOperand(J
)) {
2211 // We have not seen any differences yet in the GEPs feeding the
2212 // PHI yet, so we record this one if it is allowed to be a
2215 // The first two arguments can vary for any GEP, the rest have to be
2216 // static for struct slots
2218 assert(CurTy
&& "No current type?");
2219 if (CurTy
->isStructTy())
2225 // The GEP is different by more than one input. While this could be
2226 // extended to support GEPs that vary by more than one variable it
2227 // doesn't make sense since it greatly increases the complexity and
2228 // would result in an R+R+R addressing mode which no backend
2229 // directly supports and would need to be broken into several
2230 // simpler instructions anyway.
2235 // Sink down a layer of the type for the next iteration.
2238 CurTy
= Op1
->getSourceElementType();
2241 GetElementPtrInst::getTypeAtIndex(CurTy
, Op1
->getOperand(J
));
2247 // If not all GEPs are identical we'll have to create a new PHI node.
2248 // Check that the old PHI node has only one use so that it will get
2250 if (DI
!= -1 && !PN
->hasOneUse())
2253 auto *NewGEP
= cast
<GetElementPtrInst
>(Op1
->clone());
2255 // All the GEPs feeding the PHI are identical. Clone one down into our
2256 // BB so that it can be merged with the current GEP.
2258 // All the GEPs feeding the PHI differ at a single offset. Clone a GEP
2259 // into the current block so it can be merged, and create a new PHI to
2263 IRBuilderBase::InsertPointGuard
Guard(Builder
);
2264 Builder
.SetInsertPoint(PN
);
2265 NewPN
= Builder
.CreatePHI(Op1
->getOperand(DI
)->getType(),
2266 PN
->getNumOperands());
2269 for (auto &I
: PN
->operands())
2270 NewPN
->addIncoming(cast
<GEPOperator
>(I
)->getOperand(DI
),
2271 PN
->getIncomingBlock(I
));
2273 NewGEP
->setOperand(DI
, NewPN
);
2276 NewGEP
->insertBefore(*GEP
.getParent(), GEP
.getParent()->getFirstInsertionPt());
2277 return replaceOperand(GEP
, 0, NewGEP
);
2280 if (auto *Src
= dyn_cast
<GEPOperator
>(PtrOp
))
2281 if (Instruction
*I
= visitGEPOfGEP(GEP
, Src
))
2284 // Skip if GEP source element type is scalable. The type alloc size is unknown
2286 if (GEP
.getNumIndices() == 1 && !IsGEPSrcEleScalable
) {
2287 unsigned AS
= GEP
.getPointerAddressSpace();
2288 if (GEP
.getOperand(1)->getType()->getScalarSizeInBits() ==
2289 DL
.getIndexSizeInBits(AS
)) {
2290 uint64_t TyAllocSize
= DL
.getTypeAllocSize(GEPEltType
).getFixedValue();
2292 bool Matched
= false;
2295 if (TyAllocSize
== 1) {
2296 V
= GEP
.getOperand(1);
2298 } else if (match(GEP
.getOperand(1),
2299 m_AShr(m_Value(V
), m_ConstantInt(C
)))) {
2300 if (TyAllocSize
== 1ULL << C
)
2302 } else if (match(GEP
.getOperand(1),
2303 m_SDiv(m_Value(V
), m_ConstantInt(C
)))) {
2304 if (TyAllocSize
== C
)
2308 // Canonicalize (gep i8* X, (ptrtoint Y)-(ptrtoint X)) to (bitcast Y), but
2309 // only if both point to the same underlying object (otherwise provenance
2310 // is not necessarily retained).
2312 Value
*X
= GEP
.getOperand(0);
2314 match(V
, m_Sub(m_PtrToInt(m_Value(Y
)), m_PtrToInt(m_Specific(X
)))) &&
2315 getUnderlyingObject(X
) == getUnderlyingObject(Y
))
2316 return CastInst::CreatePointerBitCastOrAddrSpaceCast(Y
, GEPType
);
2319 // We do not handle pointer-vector geps here.
2320 if (GEPType
->isVectorTy())
2323 if (GEP
.getNumIndices() == 1) {
2324 // Try to replace ADD + GEP with GEP + GEP.
2326 if (match(GEP
.getOperand(1),
2327 m_OneUse(m_Add(m_Value(Idx1
), m_Value(Idx2
))))) {
2328 // %idx = add i64 %idx1, %idx2
2329 // %gep = getelementptr i32, i32* %ptr, i64 %idx
2331 // %newptr = getelementptr i32, i32* %ptr, i64 %idx1
2332 // %newgep = getelementptr i32, i32* %newptr, i64 %idx2
2333 auto *NewPtr
= Builder
.CreateGEP(GEP
.getResultElementType(),
2334 GEP
.getPointerOperand(), Idx1
);
2335 return GetElementPtrInst::Create(GEP
.getResultElementType(), NewPtr
,
2340 if (!GEP
.isInBounds()) {
2342 DL
.getIndexSizeInBits(PtrOp
->getType()->getPointerAddressSpace());
2343 APInt
BasePtrOffset(IdxWidth
, 0);
2344 Value
*UnderlyingPtrOp
=
2345 PtrOp
->stripAndAccumulateInBoundsConstantOffsets(DL
,
2347 bool CanBeNull
, CanBeFreed
;
2348 uint64_t DerefBytes
= UnderlyingPtrOp
->getPointerDereferenceableBytes(
2349 DL
, CanBeNull
, CanBeFreed
);
2350 if (!CanBeNull
&& !CanBeFreed
&& DerefBytes
!= 0) {
2351 if (GEP
.accumulateConstantOffset(DL
, BasePtrOffset
) &&
2352 BasePtrOffset
.isNonNegative()) {
2353 APInt
AllocSize(IdxWidth
, DerefBytes
);
2354 if (BasePtrOffset
.ule(AllocSize
)) {
2355 return GetElementPtrInst::CreateInBounds(
2356 GEP
.getSourceElementType(), PtrOp
, Indices
, GEP
.getName());
2362 if (Instruction
*R
= foldSelectGEP(GEP
, Builder
))
2368 static bool isNeverEqualToUnescapedAlloc(Value
*V
, const TargetLibraryInfo
&TLI
,
2370 if (isa
<ConstantPointerNull
>(V
))
2372 if (auto *LI
= dyn_cast
<LoadInst
>(V
))
2373 return isa
<GlobalVariable
>(LI
->getPointerOperand());
2374 // Two distinct allocations will never be equal.
2375 return isAllocLikeFn(V
, &TLI
) && V
!= AI
;
2378 /// Given a call CB which uses an address UsedV, return true if we can prove the
2379 /// call's only possible effect is storing to V.
2380 static bool isRemovableWrite(CallBase
&CB
, Value
*UsedV
,
2381 const TargetLibraryInfo
&TLI
) {
2382 if (!CB
.use_empty())
2383 // TODO: add recursion if returned attribute is present
2386 if (CB
.isTerminator())
2387 // TODO: remove implementation restriction
2390 if (!CB
.willReturn() || !CB
.doesNotThrow())
2393 // If the only possible side effect of the call is writing to the alloca,
2394 // and the result isn't used, we can safely remove any reads implied by the
2395 // call including those which might read the alloca itself.
2396 std::optional
<MemoryLocation
> Dest
= MemoryLocation::getForDest(&CB
, TLI
);
2397 return Dest
&& Dest
->Ptr
== UsedV
;
2400 static bool isAllocSiteRemovable(Instruction
*AI
,
2401 SmallVectorImpl
<WeakTrackingVH
> &Users
,
2402 const TargetLibraryInfo
&TLI
) {
2403 SmallVector
<Instruction
*, 4> Worklist
;
2404 const std::optional
<StringRef
> Family
= getAllocationFamily(AI
, &TLI
);
2405 Worklist
.push_back(AI
);
2408 Instruction
*PI
= Worklist
.pop_back_val();
2409 for (User
*U
: PI
->users()) {
2410 Instruction
*I
= cast
<Instruction
>(U
);
2411 switch (I
->getOpcode()) {
2413 // Give up the moment we see something we can't handle.
2416 case Instruction::AddrSpaceCast
:
2417 case Instruction::BitCast
:
2418 case Instruction::GetElementPtr
:
2419 Users
.emplace_back(I
);
2420 Worklist
.push_back(I
);
2423 case Instruction::ICmp
: {
2424 ICmpInst
*ICI
= cast
<ICmpInst
>(I
);
2425 // We can fold eq/ne comparisons with null to false/true, respectively.
2426 // We also fold comparisons in some conditions provided the alloc has
2427 // not escaped (see isNeverEqualToUnescapedAlloc).
2428 if (!ICI
->isEquality())
2430 unsigned OtherIndex
= (ICI
->getOperand(0) == PI
) ? 1 : 0;
2431 if (!isNeverEqualToUnescapedAlloc(ICI
->getOperand(OtherIndex
), TLI
, AI
))
2434 // Do not fold compares to aligned_alloc calls, as they may have to
2435 // return null in case the required alignment cannot be satisfied,
2436 // unless we can prove that both alignment and size are valid.
2437 auto AlignmentAndSizeKnownValid
= [](CallBase
*CB
) {
2438 // Check if alignment and size of a call to aligned_alloc is valid,
2439 // that is alignment is a power-of-2 and the size is a multiple of the
2441 const APInt
*Alignment
;
2443 return match(CB
->getArgOperand(0), m_APInt(Alignment
)) &&
2444 match(CB
->getArgOperand(1), m_APInt(Size
)) &&
2445 Alignment
->isPowerOf2() && Size
->urem(*Alignment
).isZero();
2447 auto *CB
= dyn_cast
<CallBase
>(AI
);
2449 if (CB
&& TLI
.getLibFunc(*CB
->getCalledFunction(), TheLibFunc
) &&
2450 TLI
.has(TheLibFunc
) && TheLibFunc
== LibFunc_aligned_alloc
&&
2451 !AlignmentAndSizeKnownValid(CB
))
2453 Users
.emplace_back(I
);
2457 case Instruction::Call
:
2458 // Ignore no-op and store intrinsics.
2459 if (IntrinsicInst
*II
= dyn_cast
<IntrinsicInst
>(I
)) {
2460 switch (II
->getIntrinsicID()) {
2464 case Intrinsic::memmove
:
2465 case Intrinsic::memcpy
:
2466 case Intrinsic::memset
: {
2467 MemIntrinsic
*MI
= cast
<MemIntrinsic
>(II
);
2468 if (MI
->isVolatile() || MI
->getRawDest() != PI
)
2472 case Intrinsic::assume
:
2473 case Intrinsic::invariant_start
:
2474 case Intrinsic::invariant_end
:
2475 case Intrinsic::lifetime_start
:
2476 case Intrinsic::lifetime_end
:
2477 case Intrinsic::objectsize
:
2478 Users
.emplace_back(I
);
2480 case Intrinsic::launder_invariant_group
:
2481 case Intrinsic::strip_invariant_group
:
2482 Users
.emplace_back(I
);
2483 Worklist
.push_back(I
);
2488 if (isRemovableWrite(*cast
<CallBase
>(I
), PI
, TLI
)) {
2489 Users
.emplace_back(I
);
2493 if (getFreedOperand(cast
<CallBase
>(I
), &TLI
) == PI
&&
2494 getAllocationFamily(I
, &TLI
) == Family
) {
2496 Users
.emplace_back(I
);
2500 if (getReallocatedOperand(cast
<CallBase
>(I
)) == PI
&&
2501 getAllocationFamily(I
, &TLI
) == Family
) {
2503 Users
.emplace_back(I
);
2504 Worklist
.push_back(I
);
2510 case Instruction::Store
: {
2511 StoreInst
*SI
= cast
<StoreInst
>(I
);
2512 if (SI
->isVolatile() || SI
->getPointerOperand() != PI
)
2514 Users
.emplace_back(I
);
2518 llvm_unreachable("missing a return?");
2520 } while (!Worklist
.empty());
2524 Instruction
*InstCombinerImpl::visitAllocSite(Instruction
&MI
) {
2525 assert(isa
<AllocaInst
>(MI
) || isRemovableAlloc(&cast
<CallBase
>(MI
), &TLI
));
2527 // If we have a malloc call which is only used in any amount of comparisons to
2528 // null and free calls, delete the calls and replace the comparisons with true
2529 // or false as appropriate.
2531 // This is based on the principle that we can substitute our own allocation
2532 // function (which will never return null) rather than knowledge of the
2533 // specific function being called. In some sense this can change the permitted
2534 // outputs of a program (when we convert a malloc to an alloca, the fact that
2535 // the allocation is now on the stack is potentially visible, for example),
2536 // but we believe in a permissible manner.
2537 SmallVector
<WeakTrackingVH
, 64> Users
;
2539 // If we are removing an alloca with a dbg.declare, insert dbg.value calls
2540 // before each store.
2541 SmallVector
<DbgVariableIntrinsic
*, 8> DVIs
;
2542 std::unique_ptr
<DIBuilder
> DIB
;
2543 if (isa
<AllocaInst
>(MI
)) {
2544 findDbgUsers(DVIs
, &MI
);
2545 DIB
.reset(new DIBuilder(*MI
.getModule(), /*AllowUnresolved=*/false));
2548 if (isAllocSiteRemovable(&MI
, Users
, TLI
)) {
2549 for (unsigned i
= 0, e
= Users
.size(); i
!= e
; ++i
) {
2550 // Lowering all @llvm.objectsize calls first because they may
2551 // use a bitcast/GEP of the alloca we are removing.
2555 Instruction
*I
= cast
<Instruction
>(&*Users
[i
]);
2557 if (IntrinsicInst
*II
= dyn_cast
<IntrinsicInst
>(I
)) {
2558 if (II
->getIntrinsicID() == Intrinsic::objectsize
) {
2559 SmallVector
<Instruction
*> InsertedInstructions
;
2560 Value
*Result
= lowerObjectSizeCall(
2561 II
, DL
, &TLI
, AA
, /*MustSucceed=*/true, &InsertedInstructions
);
2562 for (Instruction
*Inserted
: InsertedInstructions
)
2563 Worklist
.add(Inserted
);
2564 replaceInstUsesWith(*I
, Result
);
2565 eraseInstFromFunction(*I
);
2566 Users
[i
] = nullptr; // Skip examining in the next loop.
2570 for (unsigned i
= 0, e
= Users
.size(); i
!= e
; ++i
) {
2574 Instruction
*I
= cast
<Instruction
>(&*Users
[i
]);
2576 if (ICmpInst
*C
= dyn_cast
<ICmpInst
>(I
)) {
2577 replaceInstUsesWith(*C
,
2578 ConstantInt::get(Type::getInt1Ty(C
->getContext()),
2579 C
->isFalseWhenEqual()));
2580 } else if (auto *SI
= dyn_cast
<StoreInst
>(I
)) {
2581 for (auto *DVI
: DVIs
)
2582 if (DVI
->isAddressOfVariable())
2583 ConvertDebugDeclareToDebugValue(DVI
, SI
, *DIB
);
2585 // Casts, GEP, or anything else: we're about to delete this instruction,
2586 // so it can not have any valid uses.
2587 replaceInstUsesWith(*I
, PoisonValue::get(I
->getType()));
2589 eraseInstFromFunction(*I
);
2592 if (InvokeInst
*II
= dyn_cast
<InvokeInst
>(&MI
)) {
2593 // Replace invoke with a NOP intrinsic to maintain the original CFG
2594 Module
*M
= II
->getModule();
2595 Function
*F
= Intrinsic::getDeclaration(M
, Intrinsic::donothing
);
2596 InvokeInst::Create(F
, II
->getNormalDest(), II
->getUnwindDest(),
2597 std::nullopt
, "", II
->getParent());
2600 // Remove debug intrinsics which describe the value contained within the
2601 // alloca. In addition to removing dbg.{declare,addr} which simply point to
2602 // the alloca, remove dbg.value(<alloca>, ..., DW_OP_deref)'s as well, e.g.:
2605 // define void @foo(i32 %0) {
2606 // %a = alloca i32 ; Deleted.
2607 // store i32 %0, i32* %a
2608 // dbg.value(i32 %0, "arg0") ; Not deleted.
2609 // dbg.value(i32* %a, "arg0", DW_OP_deref) ; Deleted.
2610 // call void @trivially_inlinable_no_op(i32* %a)
2615 // This may not be required if we stop describing the contents of allocas
2616 // using dbg.value(<alloca>, ..., DW_OP_deref), but we currently do this in
2617 // the LowerDbgDeclare utility.
2619 // If there is a dead store to `%a` in @trivially_inlinable_no_op, the
2620 // "arg0" dbg.value may be stale after the call. However, failing to remove
2621 // the DW_OP_deref dbg.value causes large gaps in location coverage.
2622 for (auto *DVI
: DVIs
)
2623 if (DVI
->isAddressOfVariable() || DVI
->getExpression()->startsWithDeref())
2624 DVI
->eraseFromParent();
2626 return eraseInstFromFunction(MI
);
2631 /// Move the call to free before a NULL test.
2633 /// Check if this free is accessed after its argument has been test
2634 /// against NULL (property 0).
2635 /// If yes, it is legal to move this call in its predecessor block.
2637 /// The move is performed only if the block containing the call to free
2638 /// will be removed, i.e.:
2639 /// 1. it has only one predecessor P, and P has two successors
2640 /// 2. it contains the call, noops, and an unconditional branch
2641 /// 3. its successor is the same as its predecessor's successor
2643 /// The profitability is out-of concern here and this function should
2644 /// be called only if the caller knows this transformation would be
2645 /// profitable (e.g., for code size).
2646 static Instruction
*tryToMoveFreeBeforeNullTest(CallInst
&FI
,
2647 const DataLayout
&DL
) {
2648 Value
*Op
= FI
.getArgOperand(0);
2649 BasicBlock
*FreeInstrBB
= FI
.getParent();
2650 BasicBlock
*PredBB
= FreeInstrBB
->getSinglePredecessor();
2652 // Validate part of constraint #1: Only one predecessor
2653 // FIXME: We can extend the number of predecessor, but in that case, we
2654 // would duplicate the call to free in each predecessor and it may
2655 // not be profitable even for code size.
2659 // Validate constraint #2: Does this block contains only the call to
2660 // free, noops, and an unconditional branch?
2662 Instruction
*FreeInstrBBTerminator
= FreeInstrBB
->getTerminator();
2663 if (!match(FreeInstrBBTerminator
, m_UnconditionalBr(SuccBB
)))
2666 // If there are only 2 instructions in the block, at this point,
2667 // this is the call to free and unconditional.
2668 // If there are more than 2 instructions, check that they are noops
2669 // i.e., they won't hurt the performance of the generated code.
2670 if (FreeInstrBB
->size() != 2) {
2671 for (const Instruction
&Inst
: FreeInstrBB
->instructionsWithoutDebug()) {
2672 if (&Inst
== &FI
|| &Inst
== FreeInstrBBTerminator
)
2674 auto *Cast
= dyn_cast
<CastInst
>(&Inst
);
2675 if (!Cast
|| !Cast
->isNoopCast(DL
))
2679 // Validate the rest of constraint #1 by matching on the pred branch.
2680 Instruction
*TI
= PredBB
->getTerminator();
2681 BasicBlock
*TrueBB
, *FalseBB
;
2682 ICmpInst::Predicate Pred
;
2683 if (!match(TI
, m_Br(m_ICmp(Pred
,
2684 m_CombineOr(m_Specific(Op
),
2685 m_Specific(Op
->stripPointerCasts())),
2689 if (Pred
!= ICmpInst::ICMP_EQ
&& Pred
!= ICmpInst::ICMP_NE
)
2692 // Validate constraint #3: Ensure the null case just falls through.
2693 if (SuccBB
!= (Pred
== ICmpInst::ICMP_EQ
? TrueBB
: FalseBB
))
2695 assert(FreeInstrBB
== (Pred
== ICmpInst::ICMP_EQ
? FalseBB
: TrueBB
) &&
2696 "Broken CFG: missing edge from predecessor to successor");
2698 // At this point, we know that everything in FreeInstrBB can be moved
2700 for (Instruction
&Instr
: llvm::make_early_inc_range(*FreeInstrBB
)) {
2701 if (&Instr
== FreeInstrBBTerminator
)
2703 Instr
.moveBeforePreserving(TI
);
2705 assert(FreeInstrBB
->size() == 1 &&
2706 "Only the branch instruction should remain");
2708 // Now that we've moved the call to free before the NULL check, we have to
2709 // remove any attributes on its parameter that imply it's non-null, because
2710 // those attributes might have only been valid because of the NULL check, and
2711 // we can get miscompiles if we keep them. This is conservative if non-null is
2712 // also implied by something other than the NULL check, but it's guaranteed to
2713 // be correct, and the conservativeness won't matter in practice, since the
2714 // attributes are irrelevant for the call to free itself and the pointer
2715 // shouldn't be used after the call.
2716 AttributeList Attrs
= FI
.getAttributes();
2717 Attrs
= Attrs
.removeParamAttribute(FI
.getContext(), 0, Attribute::NonNull
);
2718 Attribute Dereferenceable
= Attrs
.getParamAttr(0, Attribute::Dereferenceable
);
2719 if (Dereferenceable
.isValid()) {
2720 uint64_t Bytes
= Dereferenceable
.getDereferenceableBytes();
2721 Attrs
= Attrs
.removeParamAttribute(FI
.getContext(), 0,
2722 Attribute::Dereferenceable
);
2723 Attrs
= Attrs
.addDereferenceableOrNullParamAttr(FI
.getContext(), 0, Bytes
);
2725 FI
.setAttributes(Attrs
);
2730 Instruction
*InstCombinerImpl::visitFree(CallInst
&FI
, Value
*Op
) {
2731 // free undef -> unreachable.
2732 if (isa
<UndefValue
>(Op
)) {
2733 // Leave a marker since we can't modify the CFG here.
2734 CreateNonTerminatorUnreachable(&FI
);
2735 return eraseInstFromFunction(FI
);
2738 // If we have 'free null' delete the instruction. This can happen in stl code
2739 // when lots of inlining happens.
2740 if (isa
<ConstantPointerNull
>(Op
))
2741 return eraseInstFromFunction(FI
);
2743 // If we had free(realloc(...)) with no intervening uses, then eliminate the
2744 // realloc() entirely.
2745 CallInst
*CI
= dyn_cast
<CallInst
>(Op
);
2746 if (CI
&& CI
->hasOneUse())
2747 if (Value
*ReallocatedOp
= getReallocatedOperand(CI
))
2748 return eraseInstFromFunction(*replaceInstUsesWith(*CI
, ReallocatedOp
));
2750 // If we optimize for code size, try to move the call to free before the null
2751 // test so that simplify cfg can remove the empty block and dead code
2752 // elimination the branch. I.e., helps to turn something like:
2753 // if (foo) free(foo);
2757 // Note that we can only do this for 'free' and not for any flavor of
2758 // 'operator delete'; there is no 'operator delete' symbol for which we are
2759 // permitted to invent a call, even if we're passing in a null pointer.
2762 if (TLI
.getLibFunc(FI
, Func
) && TLI
.has(Func
) && Func
== LibFunc_free
)
2763 if (Instruction
*I
= tryToMoveFreeBeforeNullTest(FI
, DL
))
2770 Instruction
*InstCombinerImpl::visitReturnInst(ReturnInst
&RI
) {
2775 // WARNING: keep in sync with SimplifyCFGOpt::simplifyUnreachable()!
2776 bool InstCombinerImpl::removeInstructionsBeforeUnreachable(Instruction
&I
) {
2777 // Try to remove the previous instruction if it must lead to unreachable.
2778 // This includes instructions like stores and "llvm.assume" that may not get
2779 // removed by simple dead code elimination.
2780 bool Changed
= false;
2781 while (Instruction
*Prev
= I
.getPrevNonDebugInstruction()) {
2782 // While we theoretically can erase EH, that would result in a block that
2783 // used to start with an EH no longer starting with EH, which is invalid.
2784 // To make it valid, we'd need to fixup predecessors to no longer refer to
2785 // this block, but that changes CFG, which is not allowed in InstCombine.
2786 if (Prev
->isEHPad())
2787 break; // Can not drop any more instructions. We're done here.
2789 if (!isGuaranteedToTransferExecutionToSuccessor(Prev
))
2790 break; // Can not drop any more instructions. We're done here.
2791 // Otherwise, this instruction can be freely erased,
2792 // even if it is not side-effect free.
2794 // A value may still have uses before we process it here (for example, in
2795 // another unreachable block), so convert those to poison.
2796 replaceInstUsesWith(*Prev
, PoisonValue::get(Prev
->getType()));
2797 eraseInstFromFunction(*Prev
);
2803 Instruction
*InstCombinerImpl::visitUnreachableInst(UnreachableInst
&I
) {
2804 removeInstructionsBeforeUnreachable(I
);
2808 Instruction
*InstCombinerImpl::visitUnconditionalBranchInst(BranchInst
&BI
) {
2809 assert(BI
.isUnconditional() && "Only for unconditional branches.");
2811 // If this store is the second-to-last instruction in the basic block
2812 // (excluding debug info and bitcasts of pointers) and if the block ends with
2813 // an unconditional branch, try to move the store to the successor block.
2815 auto GetLastSinkableStore
= [](BasicBlock::iterator BBI
) {
2816 auto IsNoopInstrForStoreMerging
= [](BasicBlock::iterator BBI
) {
2817 return BBI
->isDebugOrPseudoInst() ||
2818 (isa
<BitCastInst
>(BBI
) && BBI
->getType()->isPointerTy());
2821 BasicBlock::iterator FirstInstr
= BBI
->getParent()->begin();
2823 if (BBI
!= FirstInstr
)
2825 } while (BBI
!= FirstInstr
&& IsNoopInstrForStoreMerging(BBI
));
2827 return dyn_cast
<StoreInst
>(BBI
);
2830 if (StoreInst
*SI
= GetLastSinkableStore(BasicBlock::iterator(BI
)))
2831 if (mergeStoreIntoSuccessor(*SI
))
2837 void InstCombinerImpl::addDeadEdge(BasicBlock
*From
, BasicBlock
*To
,
2838 SmallVectorImpl
<BasicBlock
*> &Worklist
) {
2839 if (!DeadEdges
.insert({From
, To
}).second
)
2842 // Replace phi node operands in successor with poison.
2843 for (PHINode
&PN
: To
->phis())
2844 for (Use
&U
: PN
.incoming_values())
2845 if (PN
.getIncomingBlock(U
) == From
&& !isa
<PoisonValue
>(U
)) {
2846 replaceUse(U
, PoisonValue::get(PN
.getType()));
2848 MadeIRChange
= true;
2851 Worklist
.push_back(To
);
2854 // Under the assumption that I is unreachable, remove it and following
2855 // instructions. Changes are reported directly to MadeIRChange.
2856 void InstCombinerImpl::handleUnreachableFrom(
2857 Instruction
*I
, SmallVectorImpl
<BasicBlock
*> &Worklist
) {
2858 BasicBlock
*BB
= I
->getParent();
2859 for (Instruction
&Inst
: make_early_inc_range(
2860 make_range(std::next(BB
->getTerminator()->getReverseIterator()),
2861 std::next(I
->getReverseIterator())))) {
2862 if (!Inst
.use_empty() && !Inst
.getType()->isTokenTy()) {
2863 replaceInstUsesWith(Inst
, PoisonValue::get(Inst
.getType()));
2864 MadeIRChange
= true;
2866 if (Inst
.isEHPad() || Inst
.getType()->isTokenTy())
2868 eraseInstFromFunction(Inst
);
2869 MadeIRChange
= true;
2872 // Handle potentially dead successors.
2873 for (BasicBlock
*Succ
: successors(BB
))
2874 addDeadEdge(BB
, Succ
, Worklist
);
2877 void InstCombinerImpl::handlePotentiallyDeadBlocks(
2878 SmallVectorImpl
<BasicBlock
*> &Worklist
) {
2879 while (!Worklist
.empty()) {
2880 BasicBlock
*BB
= Worklist
.pop_back_val();
2881 if (!all_of(predecessors(BB
), [&](BasicBlock
*Pred
) {
2882 return DeadEdges
.contains({Pred
, BB
}) || DT
.dominates(BB
, Pred
);
2886 handleUnreachableFrom(&BB
->front(), Worklist
);
2890 void InstCombinerImpl::handlePotentiallyDeadSuccessors(BasicBlock
*BB
,
2891 BasicBlock
*LiveSucc
) {
2892 SmallVector
<BasicBlock
*> Worklist
;
2893 for (BasicBlock
*Succ
: successors(BB
)) {
2894 // The live successor isn't dead.
2895 if (Succ
== LiveSucc
)
2898 addDeadEdge(BB
, Succ
, Worklist
);
2901 handlePotentiallyDeadBlocks(Worklist
);
2904 Instruction
*InstCombinerImpl::visitBranchInst(BranchInst
&BI
) {
2905 if (BI
.isUnconditional())
2906 return visitUnconditionalBranchInst(BI
);
2908 // Change br (not X), label True, label False to: br X, label False, True
2909 Value
*Cond
= BI
.getCondition();
2911 if (match(Cond
, m_Not(m_Value(X
))) && !isa
<Constant
>(X
)) {
2912 // Swap Destinations and condition...
2913 BI
.swapSuccessors();
2914 return replaceOperand(BI
, 0, X
);
2917 // Canonicalize logical-and-with-invert as logical-or-with-invert.
2918 // This is done by inverting the condition and swapping successors:
2919 // br (X && !Y), T, F --> br !(X && !Y), F, T --> br (!X || Y), F, T
2921 if (isa
<SelectInst
>(Cond
) &&
2923 m_OneUse(m_LogicalAnd(m_Value(X
), m_OneUse(m_Not(m_Value(Y
))))))) {
2924 Value
*NotX
= Builder
.CreateNot(X
, "not." + X
->getName());
2925 Value
*Or
= Builder
.CreateLogicalOr(NotX
, Y
);
2926 BI
.swapSuccessors();
2927 return replaceOperand(BI
, 0, Or
);
2930 // If the condition is irrelevant, remove the use so that other
2931 // transforms on the condition become more effective.
2932 if (!isa
<ConstantInt
>(Cond
) && BI
.getSuccessor(0) == BI
.getSuccessor(1))
2933 return replaceOperand(BI
, 0, ConstantInt::getFalse(Cond
->getType()));
2935 // Canonicalize, for example, fcmp_one -> fcmp_oeq.
2936 CmpInst::Predicate Pred
;
2937 if (match(Cond
, m_OneUse(m_FCmp(Pred
, m_Value(), m_Value()))) &&
2938 !isCanonicalPredicate(Pred
)) {
2939 // Swap destinations and condition.
2940 auto *Cmp
= cast
<CmpInst
>(Cond
);
2941 Cmp
->setPredicate(CmpInst::getInversePredicate(Pred
));
2942 BI
.swapSuccessors();
2947 if (isa
<UndefValue
>(Cond
)) {
2948 handlePotentiallyDeadSuccessors(BI
.getParent(), /*LiveSucc*/ nullptr);
2951 if (auto *CI
= dyn_cast
<ConstantInt
>(Cond
)) {
2952 handlePotentiallyDeadSuccessors(BI
.getParent(),
2953 BI
.getSuccessor(!CI
->getZExtValue()));
2960 Instruction
*InstCombinerImpl::visitSwitchInst(SwitchInst
&SI
) {
2961 Value
*Cond
= SI
.getCondition();
2963 ConstantInt
*AddRHS
;
2964 if (match(Cond
, m_Add(m_Value(Op0
), m_ConstantInt(AddRHS
)))) {
2965 // Change 'switch (X+4) case 1:' into 'switch (X) case -3'.
2966 for (auto Case
: SI
.cases()) {
2967 Constant
*NewCase
= ConstantExpr::getSub(Case
.getCaseValue(), AddRHS
);
2968 assert(isa
<ConstantInt
>(NewCase
) &&
2969 "Result of expression should be constant");
2970 Case
.setValue(cast
<ConstantInt
>(NewCase
));
2972 return replaceOperand(SI
, 0, Op0
);
2975 KnownBits Known
= computeKnownBits(Cond
, 0, &SI
);
2976 unsigned LeadingKnownZeros
= Known
.countMinLeadingZeros();
2977 unsigned LeadingKnownOnes
= Known
.countMinLeadingOnes();
2979 // Compute the number of leading bits we can ignore.
2980 // TODO: A better way to determine this would use ComputeNumSignBits().
2981 for (const auto &C
: SI
.cases()) {
2983 std::min(LeadingKnownZeros
, C
.getCaseValue()->getValue().countl_zero());
2985 std::min(LeadingKnownOnes
, C
.getCaseValue()->getValue().countl_one());
2988 unsigned NewWidth
= Known
.getBitWidth() - std::max(LeadingKnownZeros
, LeadingKnownOnes
);
2990 // Shrink the condition operand if the new type is smaller than the old type.
2991 // But do not shrink to a non-standard type, because backend can't generate
2992 // good code for that yet.
2993 // TODO: We can make it aggressive again after fixing PR39569.
2994 if (NewWidth
> 0 && NewWidth
< Known
.getBitWidth() &&
2995 shouldChangeType(Known
.getBitWidth(), NewWidth
)) {
2996 IntegerType
*Ty
= IntegerType::get(SI
.getContext(), NewWidth
);
2997 Builder
.SetInsertPoint(&SI
);
2998 Value
*NewCond
= Builder
.CreateTrunc(Cond
, Ty
, "trunc");
3000 for (auto Case
: SI
.cases()) {
3001 APInt TruncatedCase
= Case
.getCaseValue()->getValue().trunc(NewWidth
);
3002 Case
.setValue(ConstantInt::get(SI
.getContext(), TruncatedCase
));
3004 return replaceOperand(SI
, 0, NewCond
);
3007 if (isa
<UndefValue
>(Cond
)) {
3008 handlePotentiallyDeadSuccessors(SI
.getParent(), /*LiveSucc*/ nullptr);
3011 if (auto *CI
= dyn_cast
<ConstantInt
>(Cond
)) {
3012 handlePotentiallyDeadSuccessors(SI
.getParent(),
3013 SI
.findCaseValue(CI
)->getCaseSuccessor());
3021 InstCombinerImpl::foldExtractOfOverflowIntrinsic(ExtractValueInst
&EV
) {
3022 auto *WO
= dyn_cast
<WithOverflowInst
>(EV
.getAggregateOperand());
3026 Intrinsic::ID OvID
= WO
->getIntrinsicID();
3027 const APInt
*C
= nullptr;
3028 if (match(WO
->getRHS(), m_APIntAllowUndef(C
))) {
3029 if (*EV
.idx_begin() == 0 && (OvID
== Intrinsic::smul_with_overflow
||
3030 OvID
== Intrinsic::umul_with_overflow
)) {
3031 // extractvalue (any_mul_with_overflow X, -1), 0 --> -X
3033 return BinaryOperator::CreateNeg(WO
->getLHS());
3034 // extractvalue (any_mul_with_overflow X, 2^n), 0 --> X << n
3035 if (C
->isPowerOf2()) {
3036 return BinaryOperator::CreateShl(
3038 ConstantInt::get(WO
->getLHS()->getType(), C
->logBase2()));
3043 // We're extracting from an overflow intrinsic. See if we're the only user.
3044 // That allows us to simplify multiple result intrinsics to simpler things
3045 // that just get one value.
3046 if (!WO
->hasOneUse())
3049 // Check if we're grabbing only the result of a 'with overflow' intrinsic
3050 // and replace it with a traditional binary instruction.
3051 if (*EV
.idx_begin() == 0) {
3052 Instruction::BinaryOps BinOp
= WO
->getBinaryOp();
3053 Value
*LHS
= WO
->getLHS(), *RHS
= WO
->getRHS();
3054 // Replace the old instruction's uses with poison.
3055 replaceInstUsesWith(*WO
, PoisonValue::get(WO
->getType()));
3056 eraseInstFromFunction(*WO
);
3057 return BinaryOperator::Create(BinOp
, LHS
, RHS
);
3060 assert(*EV
.idx_begin() == 1 && "Unexpected extract index for overflow inst");
3062 // (usub LHS, RHS) overflows when LHS is unsigned-less-than RHS.
3063 if (OvID
== Intrinsic::usub_with_overflow
)
3064 return new ICmpInst(ICmpInst::ICMP_ULT
, WO
->getLHS(), WO
->getRHS());
3066 // smul with i1 types overflows when both sides are set: -1 * -1 == +1, but
3067 // +1 is not possible because we assume signed values.
3068 if (OvID
== Intrinsic::smul_with_overflow
&&
3069 WO
->getLHS()->getType()->isIntOrIntVectorTy(1))
3070 return BinaryOperator::CreateAnd(WO
->getLHS(), WO
->getRHS());
3072 // If only the overflow result is used, and the right hand side is a
3073 // constant (or constant splat), we can remove the intrinsic by directly
3074 // checking for overflow.
3076 // Compute the no-wrap range for LHS given RHS=C, then construct an
3077 // equivalent icmp, potentially using an offset.
3078 ConstantRange NWR
= ConstantRange::makeExactNoWrapRegion(
3079 WO
->getBinaryOp(), *C
, WO
->getNoWrapKind());
3081 CmpInst::Predicate Pred
;
3082 APInt NewRHSC
, Offset
;
3083 NWR
.getEquivalentICmp(Pred
, NewRHSC
, Offset
);
3084 auto *OpTy
= WO
->getRHS()->getType();
3085 auto *NewLHS
= WO
->getLHS();
3087 NewLHS
= Builder
.CreateAdd(NewLHS
, ConstantInt::get(OpTy
, Offset
));
3088 return new ICmpInst(ICmpInst::getInversePredicate(Pred
), NewLHS
,
3089 ConstantInt::get(OpTy
, NewRHSC
));
3095 Instruction
*InstCombinerImpl::visitExtractValueInst(ExtractValueInst
&EV
) {
3096 Value
*Agg
= EV
.getAggregateOperand();
3098 if (!EV
.hasIndices())
3099 return replaceInstUsesWith(EV
, Agg
);
3101 if (Value
*V
= simplifyExtractValueInst(Agg
, EV
.getIndices(),
3102 SQ
.getWithInstruction(&EV
)))
3103 return replaceInstUsesWith(EV
, V
);
3105 if (InsertValueInst
*IV
= dyn_cast
<InsertValueInst
>(Agg
)) {
3106 // We're extracting from an insertvalue instruction, compare the indices
3107 const unsigned *exti
, *exte
, *insi
, *inse
;
3108 for (exti
= EV
.idx_begin(), insi
= IV
->idx_begin(),
3109 exte
= EV
.idx_end(), inse
= IV
->idx_end();
3110 exti
!= exte
&& insi
!= inse
;
3113 // The insert and extract both reference distinctly different elements.
3114 // This means the extract is not influenced by the insert, and we can
3115 // replace the aggregate operand of the extract with the aggregate
3116 // operand of the insert. i.e., replace
3117 // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1
3118 // %E = extractvalue { i32, { i32 } } %I, 0
3120 // %E = extractvalue { i32, { i32 } } %A, 0
3121 return ExtractValueInst::Create(IV
->getAggregateOperand(),
3124 if (exti
== exte
&& insi
== inse
)
3125 // Both iterators are at the end: Index lists are identical. Replace
3126 // %B = insertvalue { i32, { i32 } } %A, i32 42, 1, 0
3127 // %C = extractvalue { i32, { i32 } } %B, 1, 0
3129 return replaceInstUsesWith(EV
, IV
->getInsertedValueOperand());
3131 // The extract list is a prefix of the insert list. i.e. replace
3132 // %I = insertvalue { i32, { i32 } } %A, i32 42, 1, 0
3133 // %E = extractvalue { i32, { i32 } } %I, 1
3135 // %X = extractvalue { i32, { i32 } } %A, 1
3136 // %E = insertvalue { i32 } %X, i32 42, 0
3137 // by switching the order of the insert and extract (though the
3138 // insertvalue should be left in, since it may have other uses).
3139 Value
*NewEV
= Builder
.CreateExtractValue(IV
->getAggregateOperand(),
3141 return InsertValueInst::Create(NewEV
, IV
->getInsertedValueOperand(),
3142 ArrayRef(insi
, inse
));
3145 // The insert list is a prefix of the extract list
3146 // We can simply remove the common indices from the extract and make it
3147 // operate on the inserted value instead of the insertvalue result.
3149 // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1
3150 // %E = extractvalue { i32, { i32 } } %I, 1, 0
3152 // %E extractvalue { i32 } { i32 42 }, 0
3153 return ExtractValueInst::Create(IV
->getInsertedValueOperand(),
3154 ArrayRef(exti
, exte
));
3157 if (Instruction
*R
= foldExtractOfOverflowIntrinsic(EV
))
3160 if (LoadInst
*L
= dyn_cast
<LoadInst
>(Agg
)) {
3161 // Bail out if the aggregate contains scalable vector type
3162 if (auto *STy
= dyn_cast
<StructType
>(Agg
->getType());
3163 STy
&& STy
->containsScalableVectorType())
3166 // If the (non-volatile) load only has one use, we can rewrite this to a
3167 // load from a GEP. This reduces the size of the load. If a load is used
3168 // only by extractvalue instructions then this either must have been
3169 // optimized before, or it is a struct with padding, in which case we
3170 // don't want to do the transformation as it loses padding knowledge.
3171 if (L
->isSimple() && L
->hasOneUse()) {
3172 // extractvalue has integer indices, getelementptr has Value*s. Convert.
3173 SmallVector
<Value
*, 4> Indices
;
3174 // Prefix an i32 0 since we need the first element.
3175 Indices
.push_back(Builder
.getInt32(0));
3176 for (unsigned Idx
: EV
.indices())
3177 Indices
.push_back(Builder
.getInt32(Idx
));
3179 // We need to insert these at the location of the old load, not at that of
3180 // the extractvalue.
3181 Builder
.SetInsertPoint(L
);
3182 Value
*GEP
= Builder
.CreateInBoundsGEP(L
->getType(),
3183 L
->getPointerOperand(), Indices
);
3184 Instruction
*NL
= Builder
.CreateLoad(EV
.getType(), GEP
);
3185 // Whatever aliasing information we had for the orignal load must also
3186 // hold for the smaller load, so propagate the annotations.
3187 NL
->setAAMetadata(L
->getAAMetadata());
3188 // Returning the load directly will cause the main loop to insert it in
3189 // the wrong spot, so use replaceInstUsesWith().
3190 return replaceInstUsesWith(EV
, NL
);
3194 if (auto *PN
= dyn_cast
<PHINode
>(Agg
))
3195 if (Instruction
*Res
= foldOpIntoPhi(EV
, PN
))
3198 // We could simplify extracts from other values. Note that nested extracts may
3199 // already be simplified implicitly by the above: extract (extract (insert) )
3200 // will be translated into extract ( insert ( extract ) ) first and then just
3201 // the value inserted, if appropriate. Similarly for extracts from single-use
3202 // loads: extract (extract (load)) will be translated to extract (load (gep))
3203 // and if again single-use then via load (gep (gep)) to load (gep).
3204 // However, double extracts from e.g. function arguments or return values
3205 // aren't handled yet.
3209 /// Return 'true' if the given typeinfo will match anything.
3210 static bool isCatchAll(EHPersonality Personality
, Constant
*TypeInfo
) {
3211 switch (Personality
) {
3212 case EHPersonality::GNU_C
:
3213 case EHPersonality::GNU_C_SjLj
:
3214 case EHPersonality::Rust
:
3215 // The GCC C EH and Rust personality only exists to support cleanups, so
3216 // it's not clear what the semantics of catch clauses are.
3218 case EHPersonality::Unknown
:
3220 case EHPersonality::GNU_Ada
:
3221 // While __gnat_all_others_value will match any Ada exception, it doesn't
3222 // match foreign exceptions (or didn't, before gcc-4.7).
3224 case EHPersonality::GNU_CXX
:
3225 case EHPersonality::GNU_CXX_SjLj
:
3226 case EHPersonality::GNU_ObjC
:
3227 case EHPersonality::MSVC_X86SEH
:
3228 case EHPersonality::MSVC_TableSEH
:
3229 case EHPersonality::MSVC_CXX
:
3230 case EHPersonality::CoreCLR
:
3231 case EHPersonality::Wasm_CXX
:
3232 case EHPersonality::XL_CXX
:
3233 return TypeInfo
->isNullValue();
3235 llvm_unreachable("invalid enum");
3238 static bool shorter_filter(const Value
*LHS
, const Value
*RHS
) {
3240 cast
<ArrayType
>(LHS
->getType())->getNumElements()
3242 cast
<ArrayType
>(RHS
->getType())->getNumElements();
3245 Instruction
*InstCombinerImpl::visitLandingPadInst(LandingPadInst
&LI
) {
3246 // The logic here should be correct for any real-world personality function.
3247 // However if that turns out not to be true, the offending logic can always
3248 // be conditioned on the personality function, like the catch-all logic is.
3249 EHPersonality Personality
=
3250 classifyEHPersonality(LI
.getParent()->getParent()->getPersonalityFn());
3252 // Simplify the list of clauses, eg by removing repeated catch clauses
3253 // (these are often created by inlining).
3254 bool MakeNewInstruction
= false; // If true, recreate using the following:
3255 SmallVector
<Constant
*, 16> NewClauses
; // - Clauses for the new instruction;
3256 bool CleanupFlag
= LI
.isCleanup(); // - The new instruction is a cleanup.
3258 SmallPtrSet
<Value
*, 16> AlreadyCaught
; // Typeinfos known caught already.
3259 for (unsigned i
= 0, e
= LI
.getNumClauses(); i
!= e
; ++i
) {
3260 bool isLastClause
= i
+ 1 == e
;
3261 if (LI
.isCatch(i
)) {
3263 Constant
*CatchClause
= LI
.getClause(i
);
3264 Constant
*TypeInfo
= CatchClause
->stripPointerCasts();
3266 // If we already saw this clause, there is no point in having a second
3268 if (AlreadyCaught
.insert(TypeInfo
).second
) {
3269 // This catch clause was not already seen.
3270 NewClauses
.push_back(CatchClause
);
3272 // Repeated catch clause - drop the redundant copy.
3273 MakeNewInstruction
= true;
3276 // If this is a catch-all then there is no point in keeping any following
3277 // clauses or marking the landingpad as having a cleanup.
3278 if (isCatchAll(Personality
, TypeInfo
)) {
3280 MakeNewInstruction
= true;
3281 CleanupFlag
= false;
3285 // A filter clause. If any of the filter elements were already caught
3286 // then they can be dropped from the filter. It is tempting to try to
3287 // exploit the filter further by saying that any typeinfo that does not
3288 // occur in the filter can't be caught later (and thus can be dropped).
3289 // However this would be wrong, since typeinfos can match without being
3290 // equal (for example if one represents a C++ class, and the other some
3291 // class derived from it).
3292 assert(LI
.isFilter(i
) && "Unsupported landingpad clause!");
3293 Constant
*FilterClause
= LI
.getClause(i
);
3294 ArrayType
*FilterType
= cast
<ArrayType
>(FilterClause
->getType());
3295 unsigned NumTypeInfos
= FilterType
->getNumElements();
3297 // An empty filter catches everything, so there is no point in keeping any
3298 // following clauses or marking the landingpad as having a cleanup. By
3299 // dealing with this case here the following code is made a bit simpler.
3300 if (!NumTypeInfos
) {
3301 NewClauses
.push_back(FilterClause
);
3303 MakeNewInstruction
= true;
3304 CleanupFlag
= false;
3308 bool MakeNewFilter
= false; // If true, make a new filter.
3309 SmallVector
<Constant
*, 16> NewFilterElts
; // New elements.
3310 if (isa
<ConstantAggregateZero
>(FilterClause
)) {
3311 // Not an empty filter - it contains at least one null typeinfo.
3312 assert(NumTypeInfos
> 0 && "Should have handled empty filter already!");
3313 Constant
*TypeInfo
=
3314 Constant::getNullValue(FilterType
->getElementType());
3315 // If this typeinfo is a catch-all then the filter can never match.
3316 if (isCatchAll(Personality
, TypeInfo
)) {
3317 // Throw the filter away.
3318 MakeNewInstruction
= true;
3322 // There is no point in having multiple copies of this typeinfo, so
3323 // discard all but the first copy if there is more than one.
3324 NewFilterElts
.push_back(TypeInfo
);
3325 if (NumTypeInfos
> 1)
3326 MakeNewFilter
= true;
3328 ConstantArray
*Filter
= cast
<ConstantArray
>(FilterClause
);
3329 SmallPtrSet
<Value
*, 16> SeenInFilter
; // For uniquing the elements.
3330 NewFilterElts
.reserve(NumTypeInfos
);
3332 // Remove any filter elements that were already caught or that already
3333 // occurred in the filter. While there, see if any of the elements are
3334 // catch-alls. If so, the filter can be discarded.
3335 bool SawCatchAll
= false;
3336 for (unsigned j
= 0; j
!= NumTypeInfos
; ++j
) {
3337 Constant
*Elt
= Filter
->getOperand(j
);
3338 Constant
*TypeInfo
= Elt
->stripPointerCasts();
3339 if (isCatchAll(Personality
, TypeInfo
)) {
3340 // This element is a catch-all. Bail out, noting this fact.
3345 // Even if we've seen a type in a catch clause, we don't want to
3346 // remove it from the filter. An unexpected type handler may be
3347 // set up for a call site which throws an exception of the same
3348 // type caught. In order for the exception thrown by the unexpected
3349 // handler to propagate correctly, the filter must be correctly
3350 // described for the call site.
3354 // void unexpected() { throw 1;}
3355 // void foo() throw (int) {
3356 // std::set_unexpected(unexpected);
3359 // } catch (int i) {}
3362 // There is no point in having multiple copies of the same typeinfo in
3363 // a filter, so only add it if we didn't already.
3364 if (SeenInFilter
.insert(TypeInfo
).second
)
3365 NewFilterElts
.push_back(cast
<Constant
>(Elt
));
3367 // A filter containing a catch-all cannot match anything by definition.
3369 // Throw the filter away.
3370 MakeNewInstruction
= true;
3374 // If we dropped something from the filter, make a new one.
3375 if (NewFilterElts
.size() < NumTypeInfos
)
3376 MakeNewFilter
= true;
3378 if (MakeNewFilter
) {
3379 FilterType
= ArrayType::get(FilterType
->getElementType(),
3380 NewFilterElts
.size());
3381 FilterClause
= ConstantArray::get(FilterType
, NewFilterElts
);
3382 MakeNewInstruction
= true;
3385 NewClauses
.push_back(FilterClause
);
3387 // If the new filter is empty then it will catch everything so there is
3388 // no point in keeping any following clauses or marking the landingpad
3389 // as having a cleanup. The case of the original filter being empty was
3390 // already handled above.
3391 if (MakeNewFilter
&& !NewFilterElts
.size()) {
3392 assert(MakeNewInstruction
&& "New filter but not a new instruction!");
3393 CleanupFlag
= false;
3399 // If several filters occur in a row then reorder them so that the shortest
3400 // filters come first (those with the smallest number of elements). This is
3401 // advantageous because shorter filters are more likely to match, speeding up
3402 // unwinding, but mostly because it increases the effectiveness of the other
3403 // filter optimizations below.
3404 for (unsigned i
= 0, e
= NewClauses
.size(); i
+ 1 < e
; ) {
3406 // Find the maximal 'j' s.t. the range [i, j) consists entirely of filters.
3407 for (j
= i
; j
!= e
; ++j
)
3408 if (!isa
<ArrayType
>(NewClauses
[j
]->getType()))
3411 // Check whether the filters are already sorted by length. We need to know
3412 // if sorting them is actually going to do anything so that we only make a
3413 // new landingpad instruction if it does.
3414 for (unsigned k
= i
; k
+ 1 < j
; ++k
)
3415 if (shorter_filter(NewClauses
[k
+1], NewClauses
[k
])) {
3416 // Not sorted, so sort the filters now. Doing an unstable sort would be
3417 // correct too but reordering filters pointlessly might confuse users.
3418 std::stable_sort(NewClauses
.begin() + i
, NewClauses
.begin() + j
,
3420 MakeNewInstruction
= true;
3424 // Look for the next batch of filters.
3428 // If typeinfos matched if and only if equal, then the elements of a filter L
3429 // that occurs later than a filter F could be replaced by the intersection of
3430 // the elements of F and L. In reality two typeinfos can match without being
3431 // equal (for example if one represents a C++ class, and the other some class
3432 // derived from it) so it would be wrong to perform this transform in general.
3433 // However the transform is correct and useful if F is a subset of L. In that
3434 // case L can be replaced by F, and thus removed altogether since repeating a
3435 // filter is pointless. So here we look at all pairs of filters F and L where
3436 // L follows F in the list of clauses, and remove L if every element of F is
3437 // an element of L. This can occur when inlining C++ functions with exception
3439 for (unsigned i
= 0; i
+ 1 < NewClauses
.size(); ++i
) {
3440 // Examine each filter in turn.
3441 Value
*Filter
= NewClauses
[i
];
3442 ArrayType
*FTy
= dyn_cast
<ArrayType
>(Filter
->getType());
3444 // Not a filter - skip it.
3446 unsigned FElts
= FTy
->getNumElements();
3447 // Examine each filter following this one. Doing this backwards means that
3448 // we don't have to worry about filters disappearing under us when removed.
3449 for (unsigned j
= NewClauses
.size() - 1; j
!= i
; --j
) {
3450 Value
*LFilter
= NewClauses
[j
];
3451 ArrayType
*LTy
= dyn_cast
<ArrayType
>(LFilter
->getType());
3453 // Not a filter - skip it.
3455 // If Filter is a subset of LFilter, i.e. every element of Filter is also
3456 // an element of LFilter, then discard LFilter.
3457 SmallVectorImpl
<Constant
*>::iterator J
= NewClauses
.begin() + j
;
3458 // If Filter is empty then it is a subset of LFilter.
3461 NewClauses
.erase(J
);
3462 MakeNewInstruction
= true;
3463 // Move on to the next filter.
3466 unsigned LElts
= LTy
->getNumElements();
3467 // If Filter is longer than LFilter then it cannot be a subset of it.
3469 // Move on to the next filter.
3471 // At this point we know that LFilter has at least one element.
3472 if (isa
<ConstantAggregateZero
>(LFilter
)) { // LFilter only contains zeros.
3473 // Filter is a subset of LFilter iff Filter contains only zeros (as we
3474 // already know that Filter is not longer than LFilter).
3475 if (isa
<ConstantAggregateZero
>(Filter
)) {
3476 assert(FElts
<= LElts
&& "Should have handled this case earlier!");
3478 NewClauses
.erase(J
);
3479 MakeNewInstruction
= true;
3481 // Move on to the next filter.
3484 ConstantArray
*LArray
= cast
<ConstantArray
>(LFilter
);
3485 if (isa
<ConstantAggregateZero
>(Filter
)) { // Filter only contains zeros.
3486 // Since Filter is non-empty and contains only zeros, it is a subset of
3487 // LFilter iff LFilter contains a zero.
3488 assert(FElts
> 0 && "Should have eliminated the empty filter earlier!");
3489 for (unsigned l
= 0; l
!= LElts
; ++l
)
3490 if (LArray
->getOperand(l
)->isNullValue()) {
3491 // LFilter contains a zero - discard it.
3492 NewClauses
.erase(J
);
3493 MakeNewInstruction
= true;
3496 // Move on to the next filter.
3499 // At this point we know that both filters are ConstantArrays. Loop over
3500 // operands to see whether every element of Filter is also an element of
3501 // LFilter. Since filters tend to be short this is probably faster than
3502 // using a method that scales nicely.
3503 ConstantArray
*FArray
= cast
<ConstantArray
>(Filter
);
3504 bool AllFound
= true;
3505 for (unsigned f
= 0; f
!= FElts
; ++f
) {
3506 Value
*FTypeInfo
= FArray
->getOperand(f
)->stripPointerCasts();
3508 for (unsigned l
= 0; l
!= LElts
; ++l
) {
3509 Value
*LTypeInfo
= LArray
->getOperand(l
)->stripPointerCasts();
3510 if (LTypeInfo
== FTypeInfo
) {
3520 NewClauses
.erase(J
);
3521 MakeNewInstruction
= true;
3523 // Move on to the next filter.
3527 // If we changed any of the clauses, replace the old landingpad instruction
3529 if (MakeNewInstruction
) {
3530 LandingPadInst
*NLI
= LandingPadInst::Create(LI
.getType(),
3532 for (unsigned i
= 0, e
= NewClauses
.size(); i
!= e
; ++i
)
3533 NLI
->addClause(NewClauses
[i
]);
3534 // A landing pad with no clauses must have the cleanup flag set. It is
3535 // theoretically possible, though highly unlikely, that we eliminated all
3536 // clauses. If so, force the cleanup flag to true.
3537 if (NewClauses
.empty())
3539 NLI
->setCleanup(CleanupFlag
);
3543 // Even if none of the clauses changed, we may nonetheless have understood
3544 // that the cleanup flag is pointless. Clear it if so.
3545 if (LI
.isCleanup() != CleanupFlag
) {
3546 assert(!CleanupFlag
&& "Adding a cleanup, not removing one?!");
3547 LI
.setCleanup(CleanupFlag
);
3555 InstCombinerImpl::pushFreezeToPreventPoisonFromPropagating(FreezeInst
&OrigFI
) {
3556 // Try to push freeze through instructions that propagate but don't produce
3557 // poison as far as possible. If an operand of freeze follows three
3558 // conditions 1) one-use, 2) does not produce poison, and 3) has all but one
3559 // guaranteed-non-poison operands then push the freeze through to the one
3560 // operand that is not guaranteed non-poison. The actual transform is as
3562 // Op1 = ... ; Op1 can be posion
3563 // Op0 = Inst(Op1, NonPoisonOps...) ; Op0 has only one use and only have
3564 // ; single guaranteed-non-poison operands
3565 // ... = Freeze(Op0)
3568 // Op1.fr = Freeze(Op1)
3569 // ... = Inst(Op1.fr, NonPoisonOps...)
3570 auto *OrigOp
= OrigFI
.getOperand(0);
3571 auto *OrigOpInst
= dyn_cast
<Instruction
>(OrigOp
);
3573 // While we could change the other users of OrigOp to use freeze(OrigOp), that
3574 // potentially reduces their optimization potential, so let's only do this iff
3575 // the OrigOp is only used by the freeze.
3576 if (!OrigOpInst
|| !OrigOpInst
->hasOneUse() || isa
<PHINode
>(OrigOp
))
3579 // We can't push the freeze through an instruction which can itself create
3580 // poison. If the only source of new poison is flags, we can simply
3581 // strip them (since we know the only use is the freeze and nothing can
3582 // benefit from them.)
3583 if (canCreateUndefOrPoison(cast
<Operator
>(OrigOp
),
3584 /*ConsiderFlagsAndMetadata*/ false))
3587 // If operand is guaranteed not to be poison, there is no need to add freeze
3588 // to the operand. So we first find the operand that is not guaranteed to be
3590 Use
*MaybePoisonOperand
= nullptr;
3591 for (Use
&U
: OrigOpInst
->operands()) {
3592 if (isa
<MetadataAsValue
>(U
.get()) ||
3593 isGuaranteedNotToBeUndefOrPoison(U
.get()))
3595 if (!MaybePoisonOperand
)
3596 MaybePoisonOperand
= &U
;
3601 OrigOpInst
->dropPoisonGeneratingFlagsAndMetadata();
3603 // If all operands are guaranteed to be non-poison, we can drop freeze.
3604 if (!MaybePoisonOperand
)
3607 Builder
.SetInsertPoint(OrigOpInst
);
3608 auto *FrozenMaybePoisonOperand
= Builder
.CreateFreeze(
3609 MaybePoisonOperand
->get(), MaybePoisonOperand
->get()->getName() + ".fr");
3611 replaceUse(*MaybePoisonOperand
, FrozenMaybePoisonOperand
);
3615 Instruction
*InstCombinerImpl::foldFreezeIntoRecurrence(FreezeInst
&FI
,
3617 // Detect whether this is a recurrence with a start value and some number of
3618 // backedge values. We'll check whether we can push the freeze through the
3619 // backedge values (possibly dropping poison flags along the way) until we
3620 // reach the phi again. In that case, we can move the freeze to the start
3622 Use
*StartU
= nullptr;
3623 SmallVector
<Value
*> Worklist
;
3624 for (Use
&U
: PN
->incoming_values()) {
3625 if (DT
.dominates(PN
->getParent(), PN
->getIncomingBlock(U
))) {
3626 // Add backedge value to worklist.
3627 Worklist
.push_back(U
.get());
3631 // Don't bother handling multiple start values.
3637 if (!StartU
|| Worklist
.empty())
3638 return nullptr; // Not a recurrence.
3640 Value
*StartV
= StartU
->get();
3641 BasicBlock
*StartBB
= PN
->getIncomingBlock(*StartU
);
3642 bool StartNeedsFreeze
= !isGuaranteedNotToBeUndefOrPoison(StartV
);
3643 // We can't insert freeze if the start value is the result of the
3644 // terminator (e.g. an invoke).
3645 if (StartNeedsFreeze
&& StartBB
->getTerminator() == StartV
)
3648 SmallPtrSet
<Value
*, 32> Visited
;
3649 SmallVector
<Instruction
*> DropFlags
;
3650 while (!Worklist
.empty()) {
3651 Value
*V
= Worklist
.pop_back_val();
3652 if (!Visited
.insert(V
).second
)
3655 if (Visited
.size() > 32)
3656 return nullptr; // Limit the total number of values we inspect.
3658 // Assume that PN is non-poison, because it will be after the transform.
3659 if (V
== PN
|| isGuaranteedNotToBeUndefOrPoison(V
))
3662 Instruction
*I
= dyn_cast
<Instruction
>(V
);
3663 if (!I
|| canCreateUndefOrPoison(cast
<Operator
>(I
),
3664 /*ConsiderFlagsAndMetadata*/ false))
3667 DropFlags
.push_back(I
);
3668 append_range(Worklist
, I
->operands());
3671 for (Instruction
*I
: DropFlags
)
3672 I
->dropPoisonGeneratingFlagsAndMetadata();
3674 if (StartNeedsFreeze
) {
3675 Builder
.SetInsertPoint(StartBB
->getTerminator());
3676 Value
*FrozenStartV
= Builder
.CreateFreeze(StartV
,
3677 StartV
->getName() + ".fr");
3678 replaceUse(*StartU
, FrozenStartV
);
3680 return replaceInstUsesWith(FI
, PN
);
3683 bool InstCombinerImpl::freezeOtherUses(FreezeInst
&FI
) {
3684 Value
*Op
= FI
.getOperand(0);
3686 if (isa
<Constant
>(Op
) || Op
->hasOneUse())
3689 // Move the freeze directly after the definition of its operand, so that
3690 // it dominates the maximum number of uses. Note that it may not dominate
3691 // *all* uses if the operand is an invoke/callbr and the use is in a phi on
3692 // the normal/default destination. This is why the domination check in the
3693 // replacement below is still necessary.
3694 Instruction
*MoveBefore
;
3695 if (isa
<Argument
>(Op
)) {
3697 &*FI
.getFunction()->getEntryBlock().getFirstNonPHIOrDbgOrAlloca();
3699 MoveBefore
= cast
<Instruction
>(Op
)->getInsertionPointAfterDef();
3704 bool Changed
= false;
3705 if (&FI
!= MoveBefore
) {
3706 FI
.moveBefore(*MoveBefore
->getParent(), MoveBefore
->getIterator());
3710 Op
->replaceUsesWithIf(&FI
, [&](Use
&U
) -> bool {
3711 bool Dominates
= DT
.dominates(&FI
, U
);
3712 Changed
|= Dominates
;
3719 // Check if any direct or bitcast user of this value is a shuffle instruction.
3720 static bool isUsedWithinShuffleVector(Value
*V
) {
3721 for (auto *U
: V
->users()) {
3722 if (isa
<ShuffleVectorInst
>(U
))
3724 else if (match(U
, m_BitCast(m_Specific(V
))) && isUsedWithinShuffleVector(U
))
3730 Instruction
*InstCombinerImpl::visitFreeze(FreezeInst
&I
) {
3731 Value
*Op0
= I
.getOperand(0);
3733 if (Value
*V
= simplifyFreezeInst(Op0
, SQ
.getWithInstruction(&I
)))
3734 return replaceInstUsesWith(I
, V
);
3736 // freeze (phi const, x) --> phi const, (freeze x)
3737 if (auto *PN
= dyn_cast
<PHINode
>(Op0
)) {
3738 if (Instruction
*NV
= foldOpIntoPhi(I
, PN
))
3740 if (Instruction
*NV
= foldFreezeIntoRecurrence(I
, PN
))
3744 if (Value
*NI
= pushFreezeToPreventPoisonFromPropagating(I
))
3745 return replaceInstUsesWith(I
, NI
);
3747 // If I is freeze(undef), check its uses and fold it to a fixed constant.
3749 // - select's condition: if the true value is constant, choose it by making
3750 // the condition true.
3751 // - default: pick 0
3753 // Note that this transform is intentionally done here rather than
3754 // via an analysis in InstSimplify or at individual user sites. That is
3755 // because we must produce the same value for all uses of the freeze -
3756 // it's the reason "freeze" exists!
3758 // TODO: This could use getBinopAbsorber() / getBinopIdentity() to avoid
3759 // duplicating logic for binops at least.
3760 auto getUndefReplacement
= [&I
](Type
*Ty
) {
3761 Constant
*BestValue
= nullptr;
3762 Constant
*NullValue
= Constant::getNullValue(Ty
);
3763 for (const auto *U
: I
.users()) {
3764 Constant
*C
= NullValue
;
3765 if (match(U
, m_Or(m_Value(), m_Value())))
3766 C
= ConstantInt::getAllOnesValue(Ty
);
3767 else if (match(U
, m_Select(m_Specific(&I
), m_Constant(), m_Value())))
3768 C
= ConstantInt::getTrue(Ty
);
3772 else if (BestValue
!= C
)
3773 BestValue
= NullValue
;
3775 assert(BestValue
&& "Must have at least one use");
3779 if (match(Op0
, m_Undef())) {
3780 // Don't fold freeze(undef/poison) if it's used as a vector operand in
3781 // a shuffle. This may improve codegen for shuffles that allow
3782 // unspecified inputs.
3783 if (isUsedWithinShuffleVector(&I
))
3785 return replaceInstUsesWith(I
, getUndefReplacement(I
.getType()));
3789 if (match(Op0
, m_Constant(C
)) && C
->containsUndefOrPoisonElement()) {
3790 Constant
*ReplaceC
= getUndefReplacement(I
.getType()->getScalarType());
3791 return replaceInstUsesWith(I
, Constant::replaceUndefsWith(C
, ReplaceC
));
3794 // Replace uses of Op with freeze(Op).
3795 if (freezeOtherUses(I
))
3801 /// Check for case where the call writes to an otherwise dead alloca. This
3802 /// shows up for unused out-params in idiomatic C/C++ code. Note that this
3803 /// helper *only* analyzes the write; doesn't check any other legality aspect.
3804 static bool SoleWriteToDeadLocal(Instruction
*I
, TargetLibraryInfo
&TLI
) {
3805 auto *CB
= dyn_cast
<CallBase
>(I
);
3807 // TODO: handle e.g. store to alloca here - only worth doing if we extend
3808 // to allow reload along used path as described below. Otherwise, this
3809 // is simply a store to a dead allocation which will be removed.
3811 std::optional
<MemoryLocation
> Dest
= MemoryLocation::getForDest(CB
, TLI
);
3814 auto *AI
= dyn_cast
<AllocaInst
>(getUnderlyingObject(Dest
->Ptr
));
3816 // TODO: allow malloc?
3818 // TODO: allow memory access dominated by move point? Note that since AI
3819 // could have a reference to itself captured by the call, we would need to
3820 // account for cycles in doing so.
3821 SmallVector
<const User
*> AllocaUsers
;
3822 SmallPtrSet
<const User
*, 4> Visited
;
3823 auto pushUsers
= [&](const Instruction
&I
) {
3824 for (const User
*U
: I
.users()) {
3825 if (Visited
.insert(U
).second
)
3826 AllocaUsers
.push_back(U
);
3830 while (!AllocaUsers
.empty()) {
3831 auto *UserI
= cast
<Instruction
>(AllocaUsers
.pop_back_val());
3832 if (isa
<BitCastInst
>(UserI
) || isa
<GetElementPtrInst
>(UserI
) ||
3833 isa
<AddrSpaceCastInst
>(UserI
)) {
3839 // TODO: support lifetime.start/end here
3845 /// Try to move the specified instruction from its current block into the
3846 /// beginning of DestBlock, which can only happen if it's safe to move the
3847 /// instruction past all of the instructions between it and the end of its
3849 bool InstCombinerImpl::tryToSinkInstruction(Instruction
*I
,
3850 BasicBlock
*DestBlock
) {
3851 BasicBlock
*SrcBlock
= I
->getParent();
3853 // Cannot move control-flow-involving, volatile loads, vaarg, etc.
3854 if (isa
<PHINode
>(I
) || I
->isEHPad() || I
->mayThrow() || !I
->willReturn() ||
3858 // Do not sink static or dynamic alloca instructions. Static allocas must
3859 // remain in the entry block, and dynamic allocas must not be sunk in between
3860 // a stacksave / stackrestore pair, which would incorrectly shorten its
3862 if (isa
<AllocaInst
>(I
))
3865 // Do not sink into catchswitch blocks.
3866 if (isa
<CatchSwitchInst
>(DestBlock
->getTerminator()))
3869 // Do not sink convergent call instructions.
3870 if (auto *CI
= dyn_cast
<CallInst
>(I
)) {
3871 if (CI
->isConvergent())
3875 // Unless we can prove that the memory write isn't visibile except on the
3876 // path we're sinking to, we must bail.
3877 if (I
->mayWriteToMemory()) {
3878 if (!SoleWriteToDeadLocal(I
, TLI
))
3882 // We can only sink load instructions if there is nothing between the load and
3883 // the end of block that could change the value.
3884 if (I
->mayReadFromMemory()) {
3885 // We don't want to do any sophisticated alias analysis, so we only check
3886 // the instructions after I in I's parent block if we try to sink to its
3888 if (DestBlock
->getUniquePredecessor() != I
->getParent())
3890 for (BasicBlock::iterator Scan
= std::next(I
->getIterator()),
3891 E
= I
->getParent()->end();
3893 if (Scan
->mayWriteToMemory())
3897 I
->dropDroppableUses([&](const Use
*U
) {
3898 auto *I
= dyn_cast
<Instruction
>(U
->getUser());
3899 if (I
&& I
->getParent() != DestBlock
) {
3905 /// FIXME: We could remove droppable uses that are not dominated by
3906 /// the new position.
3908 BasicBlock::iterator InsertPos
= DestBlock
->getFirstInsertionPt();
3909 I
->moveBefore(*DestBlock
, InsertPos
);
3912 // Also sink all related debug uses from the source basic block. Otherwise we
3913 // get debug use before the def. Attempt to salvage debug uses first, to
3914 // maximise the range variables have location for. If we cannot salvage, then
3915 // mark the location undef: we know it was supposed to receive a new location
3916 // here, but that computation has been sunk.
3917 SmallVector
<DbgVariableIntrinsic
*, 2> DbgUsers
;
3918 findDbgUsers(DbgUsers
, I
);
3920 // For all debug values in the destination block, the sunk instruction
3921 // will still be available, so they do not need to be dropped.
3922 SmallVector
<DbgVariableIntrinsic
*, 2> DbgUsersToSalvage
;
3923 for (auto &DbgUser
: DbgUsers
)
3924 if (DbgUser
->getParent() != DestBlock
)
3925 DbgUsersToSalvage
.push_back(DbgUser
);
3927 // Process the sinking DbgUsersToSalvage in reverse order, as we only want
3928 // to clone the last appearing debug intrinsic for each given variable.
3929 SmallVector
<DbgVariableIntrinsic
*, 2> DbgUsersToSink
;
3930 for (DbgVariableIntrinsic
*DVI
: DbgUsersToSalvage
)
3931 if (DVI
->getParent() == SrcBlock
)
3932 DbgUsersToSink
.push_back(DVI
);
3933 llvm::sort(DbgUsersToSink
,
3934 [](auto *A
, auto *B
) { return B
->comesBefore(A
); });
3936 SmallVector
<DbgVariableIntrinsic
*, 2> DIIClones
;
3937 SmallSet
<DebugVariable
, 4> SunkVariables
;
3938 for (auto *User
: DbgUsersToSink
) {
3939 // A dbg.declare instruction should not be cloned, since there can only be
3940 // one per variable fragment. It should be left in the original place
3941 // because the sunk instruction is not an alloca (otherwise we could not be
3943 if (isa
<DbgDeclareInst
>(User
))
3946 DebugVariable DbgUserVariable
=
3947 DebugVariable(User
->getVariable(), User
->getExpression(),
3948 User
->getDebugLoc()->getInlinedAt());
3950 if (!SunkVariables
.insert(DbgUserVariable
).second
)
3953 // Leave dbg.assign intrinsics in their original positions and there should
3954 // be no need to insert a clone.
3955 if (isa
<DbgAssignIntrinsic
>(User
))
3958 DIIClones
.emplace_back(cast
<DbgVariableIntrinsic
>(User
->clone()));
3959 if (isa
<DbgDeclareInst
>(User
) && isa
<CastInst
>(I
))
3960 DIIClones
.back()->replaceVariableLocationOp(I
, I
->getOperand(0));
3961 LLVM_DEBUG(dbgs() << "CLONE: " << *DIIClones
.back() << '\n');
3964 // Perform salvaging without the clones, then sink the clones.
3965 if (!DIIClones
.empty()) {
3966 salvageDebugInfoForDbgValues(*I
, DbgUsersToSalvage
);
3967 // The clones are in reverse order of original appearance, reverse again to
3968 // maintain the original order.
3969 for (auto &DIIClone
: llvm::reverse(DIIClones
)) {
3970 DIIClone
->insertBefore(&*InsertPos
);
3971 LLVM_DEBUG(dbgs() << "SINK: " << *DIIClone
<< '\n');
3978 bool InstCombinerImpl::run() {
3979 while (!Worklist
.isEmpty()) {
3980 // Walk deferred instructions in reverse order, and push them to the
3981 // worklist, which means they'll end up popped from the worklist in-order.
3982 while (Instruction
*I
= Worklist
.popDeferred()) {
3983 // Check to see if we can DCE the instruction. We do this already here to
3984 // reduce the number of uses and thus allow other folds to trigger.
3985 // Note that eraseInstFromFunction() may push additional instructions on
3986 // the deferred worklist, so this will DCE whole instruction chains.
3987 if (isInstructionTriviallyDead(I
, &TLI
)) {
3988 eraseInstFromFunction(*I
);
3996 Instruction
*I
= Worklist
.removeOne();
3997 if (I
== nullptr) continue; // skip null values.
3999 // Check to see if we can DCE the instruction.
4000 if (isInstructionTriviallyDead(I
, &TLI
)) {
4001 eraseInstFromFunction(*I
);
4006 if (!DebugCounter::shouldExecute(VisitCounter
))
4009 // See if we can trivially sink this instruction to its user if we can
4010 // prove that the successor is not executed more frequently than our block.
4011 // Return the UserBlock if successful.
4012 auto getOptionalSinkBlockForInst
=
4013 [this](Instruction
*I
) -> std::optional
<BasicBlock
*> {
4014 if (!EnableCodeSinking
)
4015 return std::nullopt
;
4017 BasicBlock
*BB
= I
->getParent();
4018 BasicBlock
*UserParent
= nullptr;
4019 unsigned NumUsers
= 0;
4021 for (auto *U
: I
->users()) {
4022 if (U
->isDroppable())
4024 if (NumUsers
> MaxSinkNumUsers
)
4025 return std::nullopt
;
4027 Instruction
*UserInst
= cast
<Instruction
>(U
);
4028 // Special handling for Phi nodes - get the block the use occurs in.
4029 if (PHINode
*PN
= dyn_cast
<PHINode
>(UserInst
)) {
4030 for (unsigned i
= 0; i
< PN
->getNumIncomingValues(); i
++) {
4031 if (PN
->getIncomingValue(i
) == I
) {
4032 // Bail out if we have uses in different blocks. We don't do any
4033 // sophisticated analysis (i.e finding NearestCommonDominator of
4034 // these use blocks).
4035 if (UserParent
&& UserParent
!= PN
->getIncomingBlock(i
))
4036 return std::nullopt
;
4037 UserParent
= PN
->getIncomingBlock(i
);
4040 assert(UserParent
&& "expected to find user block!");
4042 if (UserParent
&& UserParent
!= UserInst
->getParent())
4043 return std::nullopt
;
4044 UserParent
= UserInst
->getParent();
4047 // Make sure these checks are done only once, naturally we do the checks
4048 // the first time we get the userparent, this will save compile time.
4049 if (NumUsers
== 0) {
4050 // Try sinking to another block. If that block is unreachable, then do
4051 // not bother. SimplifyCFG should handle it.
4052 if (UserParent
== BB
|| !DT
.isReachableFromEntry(UserParent
))
4053 return std::nullopt
;
4055 auto *Term
= UserParent
->getTerminator();
4056 // See if the user is one of our successors that has only one
4057 // predecessor, so that we don't have to split the critical edge.
4058 // Another option where we can sink is a block that ends with a
4059 // terminator that does not pass control to other block (such as
4060 // return or unreachable or resume). In this case:
4061 // - I dominates the User (by SSA form);
4062 // - the User will be executed at most once.
4063 // So sinking I down to User is always profitable or neutral.
4064 if (UserParent
->getUniquePredecessor() != BB
&& !succ_empty(Term
))
4065 return std::nullopt
;
4067 assert(DT
.dominates(BB
, UserParent
) && "Dominance relation broken?");
4073 // No user or only has droppable users.
4075 return std::nullopt
;
4080 auto OptBB
= getOptionalSinkBlockForInst(I
);
4082 auto *UserParent
= *OptBB
;
4083 // Okay, the CFG is simple enough, try to sink this instruction.
4084 if (tryToSinkInstruction(I
, UserParent
)) {
4085 LLVM_DEBUG(dbgs() << "IC: Sink: " << *I
<< '\n');
4086 MadeIRChange
= true;
4087 // We'll add uses of the sunk instruction below, but since
4088 // sinking can expose opportunities for it's *operands* add
4089 // them to the worklist
4090 for (Use
&U
: I
->operands())
4091 if (Instruction
*OpI
= dyn_cast
<Instruction
>(U
.get()))
4096 // Now that we have an instruction, try combining it to simplify it.
4097 Builder
.SetInsertPoint(I
);
4098 Builder
.CollectMetadataToCopy(
4099 I
, {LLVMContext::MD_dbg
, LLVMContext::MD_annotation
});
4104 LLVM_DEBUG(raw_string_ostream
SS(OrigI
); I
->print(SS
); OrigI
= SS
.str(););
4105 LLVM_DEBUG(dbgs() << "IC: Visiting: " << OrigI
<< '\n');
4107 if (Instruction
*Result
= visit(*I
)) {
4109 // Should we replace the old instruction with a new one?
4111 LLVM_DEBUG(dbgs() << "IC: Old = " << *I
<< '\n'
4112 << " New = " << *Result
<< '\n');
4114 Result
->copyMetadata(*I
,
4115 {LLVMContext::MD_dbg
, LLVMContext::MD_annotation
});
4116 // Everything uses the new instruction now.
4117 I
->replaceAllUsesWith(Result
);
4119 // Move the name to the new instruction first.
4120 Result
->takeName(I
);
4122 // Insert the new instruction into the basic block...
4123 BasicBlock
*InstParent
= I
->getParent();
4124 BasicBlock::iterator InsertPos
= I
->getIterator();
4126 // Are we replace a PHI with something that isn't a PHI, or vice versa?
4127 if (isa
<PHINode
>(Result
) != isa
<PHINode
>(I
)) {
4128 // We need to fix up the insertion point.
4129 if (isa
<PHINode
>(I
)) // PHI -> Non-PHI
4130 InsertPos
= InstParent
->getFirstInsertionPt();
4131 else // Non-PHI -> PHI
4132 InsertPos
= InstParent
->getFirstNonPHI()->getIterator();
4135 Result
->insertInto(InstParent
, InsertPos
);
4137 // Push the new instruction and any users onto the worklist.
4138 Worklist
.pushUsersToWorkList(*Result
);
4139 Worklist
.push(Result
);
4141 eraseInstFromFunction(*I
);
4143 LLVM_DEBUG(dbgs() << "IC: Mod = " << OrigI
<< '\n'
4144 << " New = " << *I
<< '\n');
4146 // If the instruction was modified, it's possible that it is now dead.
4147 // if so, remove it.
4148 if (isInstructionTriviallyDead(I
, &TLI
)) {
4149 eraseInstFromFunction(*I
);
4151 Worklist
.pushUsersToWorkList(*I
);
4155 MadeIRChange
= true;
4160 return MadeIRChange
;
4163 // Track the scopes used by !alias.scope and !noalias. In a function, a
4164 // @llvm.experimental.noalias.scope.decl is only useful if that scope is used
4165 // by both sets. If not, the declaration of the scope can be safely omitted.
4166 // The MDNode of the scope can be omitted as well for the instructions that are
4167 // part of this function. We do not do that at this point, as this might become
4168 // too time consuming to do.
4169 class AliasScopeTracker
{
4170 SmallPtrSet
<const MDNode
*, 8> UsedAliasScopesAndLists
;
4171 SmallPtrSet
<const MDNode
*, 8> UsedNoAliasScopesAndLists
;
4174 void analyse(Instruction
*I
) {
4175 // This seems to be faster than checking 'mayReadOrWriteMemory()'.
4176 if (!I
->hasMetadataOtherThanDebugLoc())
4179 auto Track
= [](Metadata
*ScopeList
, auto &Container
) {
4180 const auto *MDScopeList
= dyn_cast_or_null
<MDNode
>(ScopeList
);
4181 if (!MDScopeList
|| !Container
.insert(MDScopeList
).second
)
4183 for (const auto &MDOperand
: MDScopeList
->operands())
4184 if (auto *MDScope
= dyn_cast
<MDNode
>(MDOperand
))
4185 Container
.insert(MDScope
);
4188 Track(I
->getMetadata(LLVMContext::MD_alias_scope
), UsedAliasScopesAndLists
);
4189 Track(I
->getMetadata(LLVMContext::MD_noalias
), UsedNoAliasScopesAndLists
);
4192 bool isNoAliasScopeDeclDead(Instruction
*Inst
) {
4193 NoAliasScopeDeclInst
*Decl
= dyn_cast
<NoAliasScopeDeclInst
>(Inst
);
4197 assert(Decl
->use_empty() &&
4198 "llvm.experimental.noalias.scope.decl in use ?");
4199 const MDNode
*MDSL
= Decl
->getScopeList();
4200 assert(MDSL
->getNumOperands() == 1 &&
4201 "llvm.experimental.noalias.scope should refer to a single scope");
4202 auto &MDOperand
= MDSL
->getOperand(0);
4203 if (auto *MD
= dyn_cast
<MDNode
>(MDOperand
))
4204 return !UsedAliasScopesAndLists
.contains(MD
) ||
4205 !UsedNoAliasScopesAndLists
.contains(MD
);
4207 // Not an MDNode ? throw away.
4212 /// Populate the IC worklist from a function, by walking it in reverse
4213 /// post-order and adding all reachable code to the worklist.
4215 /// This has a couple of tricks to make the code faster and more powerful. In
4216 /// particular, we constant fold and DCE instructions as we go, to avoid adding
4217 /// them to the worklist (this significantly speeds up instcombine on code where
4218 /// many instructions are dead or constant). Additionally, if we find a branch
4219 /// whose condition is a known constant, we only visit the reachable successors.
4220 bool InstCombinerImpl::prepareWorklist(
4221 Function
&F
, ReversePostOrderTraversal
<BasicBlock
*> &RPOT
) {
4222 bool MadeIRChange
= false;
4223 SmallPtrSet
<BasicBlock
*, 32> LiveBlocks
;
4224 SmallVector
<Instruction
*, 128> InstrsForInstructionWorklist
;
4225 DenseMap
<Constant
*, Constant
*> FoldedConstants
;
4226 AliasScopeTracker SeenAliasScopes
;
4228 auto HandleOnlyLiveSuccessor
= [&](BasicBlock
*BB
, BasicBlock
*LiveSucc
) {
4229 for (BasicBlock
*Succ
: successors(BB
))
4230 if (Succ
!= LiveSucc
&& DeadEdges
.insert({BB
, Succ
}).second
)
4231 for (PHINode
&PN
: Succ
->phis())
4232 for (Use
&U
: PN
.incoming_values())
4233 if (PN
.getIncomingBlock(U
) == BB
&& !isa
<PoisonValue
>(U
)) {
4234 U
.set(PoisonValue::get(PN
.getType()));
4235 MadeIRChange
= true;
4239 for (BasicBlock
*BB
: RPOT
) {
4240 if (!BB
->isEntryBlock() && all_of(predecessors(BB
), [&](BasicBlock
*Pred
) {
4241 return DeadEdges
.contains({Pred
, BB
}) || DT
.dominates(BB
, Pred
);
4243 HandleOnlyLiveSuccessor(BB
, nullptr);
4246 LiveBlocks
.insert(BB
);
4248 for (Instruction
&Inst
: llvm::make_early_inc_range(*BB
)) {
4249 // ConstantProp instruction if trivially constant.
4250 if (!Inst
.use_empty() &&
4251 (Inst
.getNumOperands() == 0 || isa
<Constant
>(Inst
.getOperand(0))))
4252 if (Constant
*C
= ConstantFoldInstruction(&Inst
, DL
, &TLI
)) {
4253 LLVM_DEBUG(dbgs() << "IC: ConstFold to: " << *C
<< " from: " << Inst
4255 Inst
.replaceAllUsesWith(C
);
4257 if (isInstructionTriviallyDead(&Inst
, &TLI
))
4258 Inst
.eraseFromParent();
4259 MadeIRChange
= true;
4263 // See if we can constant fold its operands.
4264 for (Use
&U
: Inst
.operands()) {
4265 if (!isa
<ConstantVector
>(U
) && !isa
<ConstantExpr
>(U
))
4268 auto *C
= cast
<Constant
>(U
);
4269 Constant
*&FoldRes
= FoldedConstants
[C
];
4271 FoldRes
= ConstantFoldConstant(C
, DL
, &TLI
);
4274 LLVM_DEBUG(dbgs() << "IC: ConstFold operand of: " << Inst
4275 << "\n Old = " << *C
4276 << "\n New = " << *FoldRes
<< '\n');
4278 MadeIRChange
= true;
4282 // Skip processing debug and pseudo intrinsics in InstCombine. Processing
4283 // these call instructions consumes non-trivial amount of time and
4284 // provides no value for the optimization.
4285 if (!Inst
.isDebugOrPseudoInst()) {
4286 InstrsForInstructionWorklist
.push_back(&Inst
);
4287 SeenAliasScopes
.analyse(&Inst
);
4291 // If this is a branch or switch on a constant, mark only the single
4292 // live successor. Otherwise assume all successors are live.
4293 Instruction
*TI
= BB
->getTerminator();
4294 if (BranchInst
*BI
= dyn_cast
<BranchInst
>(TI
); BI
&& BI
->isConditional()) {
4295 if (isa
<UndefValue
>(BI
->getCondition())) {
4296 // Branch on undef is UB.
4297 HandleOnlyLiveSuccessor(BB
, nullptr);
4300 if (auto *Cond
= dyn_cast
<ConstantInt
>(BI
->getCondition())) {
4301 bool CondVal
= Cond
->getZExtValue();
4302 HandleOnlyLiveSuccessor(BB
, BI
->getSuccessor(!CondVal
));
4305 } else if (SwitchInst
*SI
= dyn_cast
<SwitchInst
>(TI
)) {
4306 if (isa
<UndefValue
>(SI
->getCondition())) {
4307 // Switch on undef is UB.
4308 HandleOnlyLiveSuccessor(BB
, nullptr);
4311 if (auto *Cond
= dyn_cast
<ConstantInt
>(SI
->getCondition())) {
4312 HandleOnlyLiveSuccessor(BB
,
4313 SI
->findCaseValue(Cond
)->getCaseSuccessor());
4319 // Remove instructions inside unreachable blocks. This prevents the
4320 // instcombine code from having to deal with some bad special cases, and
4321 // reduces use counts of instructions.
4322 for (BasicBlock
&BB
: F
) {
4323 if (LiveBlocks
.count(&BB
))
4326 unsigned NumDeadInstInBB
;
4327 unsigned NumDeadDbgInstInBB
;
4328 std::tie(NumDeadInstInBB
, NumDeadDbgInstInBB
) =
4329 removeAllNonTerminatorAndEHPadInstructions(&BB
);
4331 MadeIRChange
|= NumDeadInstInBB
+ NumDeadDbgInstInBB
> 0;
4332 NumDeadInst
+= NumDeadInstInBB
;
4335 // Once we've found all of the instructions to add to instcombine's worklist,
4336 // add them in reverse order. This way instcombine will visit from the top
4337 // of the function down. This jives well with the way that it adds all uses
4338 // of instructions to the worklist after doing a transformation, thus avoiding
4339 // some N^2 behavior in pathological cases.
4340 Worklist
.reserve(InstrsForInstructionWorklist
.size());
4341 for (Instruction
*Inst
: reverse(InstrsForInstructionWorklist
)) {
4342 // DCE instruction if trivially dead. As we iterate in reverse program
4343 // order here, we will clean up whole chains of dead instructions.
4344 if (isInstructionTriviallyDead(Inst
, &TLI
) ||
4345 SeenAliasScopes
.isNoAliasScopeDeclDead(Inst
)) {
4347 LLVM_DEBUG(dbgs() << "IC: DCE: " << *Inst
<< '\n');
4348 salvageDebugInfo(*Inst
);
4349 Inst
->eraseFromParent();
4350 MadeIRChange
= true;
4354 Worklist
.push(Inst
);
4357 return MadeIRChange
;
4360 static bool combineInstructionsOverFunction(
4361 Function
&F
, InstructionWorklist
&Worklist
, AliasAnalysis
*AA
,
4362 AssumptionCache
&AC
, TargetLibraryInfo
&TLI
, TargetTransformInfo
&TTI
,
4363 DominatorTree
&DT
, OptimizationRemarkEmitter
&ORE
, BlockFrequencyInfo
*BFI
,
4364 ProfileSummaryInfo
*PSI
, unsigned MaxIterations
, bool VerifyFixpoint
,
4366 auto &DL
= F
.getParent()->getDataLayout();
4368 /// Builder - This is an IRBuilder that automatically inserts new
4369 /// instructions into the worklist when they are created.
4370 IRBuilder
<TargetFolder
, IRBuilderCallbackInserter
> Builder(
4371 F
.getContext(), TargetFolder(DL
),
4372 IRBuilderCallbackInserter([&Worklist
, &AC
](Instruction
*I
) {
4374 if (auto *Assume
= dyn_cast
<AssumeInst
>(I
))
4375 AC
.registerAssumption(Assume
);
4378 ReversePostOrderTraversal
<BasicBlock
*> RPOT(&F
.front());
4380 // Lower dbg.declare intrinsics otherwise their value may be clobbered
4382 bool MadeIRChange
= false;
4383 if (ShouldLowerDbgDeclare
)
4384 MadeIRChange
= LowerDbgDeclare(F
);
4386 // Iterate while there is work to do.
4387 unsigned Iteration
= 0;
4391 if (Iteration
> MaxIterations
&& !VerifyFixpoint
) {
4392 LLVM_DEBUG(dbgs() << "\n\n[IC] Iteration limit #" << MaxIterations
4393 << " on " << F
.getName()
4394 << " reached; stopping without verifying fixpoint\n");
4398 ++NumWorklistIterations
;
4399 LLVM_DEBUG(dbgs() << "\n\nINSTCOMBINE ITERATION #" << Iteration
<< " on "
4400 << F
.getName() << "\n");
4402 InstCombinerImpl
IC(Worklist
, Builder
, F
.hasMinSize(), AA
, AC
, TLI
, TTI
, DT
,
4403 ORE
, BFI
, PSI
, DL
, LI
);
4404 IC
.MaxArraySizeForCombine
= MaxArraySize
;
4405 bool MadeChangeInThisIteration
= IC
.prepareWorklist(F
, RPOT
);
4406 MadeChangeInThisIteration
|= IC
.run();
4407 if (!MadeChangeInThisIteration
)
4410 MadeIRChange
= true;
4411 if (Iteration
> MaxIterations
) {
4413 "Instruction Combining did not reach a fixpoint after " +
4414 Twine(MaxIterations
) + " iterations");
4420 else if (Iteration
== 2)
4422 else if (Iteration
== 3)
4423 ++NumThreeIterations
;
4425 ++NumFourOrMoreIterations
;
4427 return MadeIRChange
;
4430 InstCombinePass::InstCombinePass(InstCombineOptions Opts
) : Options(Opts
) {}
4432 void InstCombinePass::printPipeline(
4433 raw_ostream
&OS
, function_ref
<StringRef(StringRef
)> MapClassName2PassName
) {
4434 static_cast<PassInfoMixin
<InstCombinePass
> *>(this)->printPipeline(
4435 OS
, MapClassName2PassName
);
4437 OS
<< "max-iterations=" << Options
.MaxIterations
<< ";";
4438 OS
<< (Options
.UseLoopInfo
? "" : "no-") << "use-loop-info;";
4439 OS
<< (Options
.VerifyFixpoint
? "" : "no-") << "verify-fixpoint";
4443 PreservedAnalyses
InstCombinePass::run(Function
&F
,
4444 FunctionAnalysisManager
&AM
) {
4445 auto &AC
= AM
.getResult
<AssumptionAnalysis
>(F
);
4446 auto &DT
= AM
.getResult
<DominatorTreeAnalysis
>(F
);
4447 auto &TLI
= AM
.getResult
<TargetLibraryAnalysis
>(F
);
4448 auto &ORE
= AM
.getResult
<OptimizationRemarkEmitterAnalysis
>(F
);
4449 auto &TTI
= AM
.getResult
<TargetIRAnalysis
>(F
);
4451 // TODO: Only use LoopInfo when the option is set. This requires that the
4452 // callers in the pass pipeline explicitly set the option.
4453 auto *LI
= AM
.getCachedResult
<LoopAnalysis
>(F
);
4454 if (!LI
&& Options
.UseLoopInfo
)
4455 LI
= &AM
.getResult
<LoopAnalysis
>(F
);
4457 auto *AA
= &AM
.getResult
<AAManager
>(F
);
4458 auto &MAMProxy
= AM
.getResult
<ModuleAnalysisManagerFunctionProxy
>(F
);
4459 ProfileSummaryInfo
*PSI
=
4460 MAMProxy
.getCachedResult
<ProfileSummaryAnalysis
>(*F
.getParent());
4461 auto *BFI
= (PSI
&& PSI
->hasProfileSummary()) ?
4462 &AM
.getResult
<BlockFrequencyAnalysis
>(F
) : nullptr;
4464 if (!combineInstructionsOverFunction(F
, Worklist
, AA
, AC
, TLI
, TTI
, DT
, ORE
,
4465 BFI
, PSI
, Options
.MaxIterations
,
4466 Options
.VerifyFixpoint
, LI
))
4467 // No changes, all analyses are preserved.
4468 return PreservedAnalyses::all();
4470 // Mark all the analyses that instcombine updates as preserved.
4471 PreservedAnalyses PA
;
4472 PA
.preserveSet
<CFGAnalyses
>();
4476 void InstructionCombiningPass::getAnalysisUsage(AnalysisUsage
&AU
) const {
4477 AU
.setPreservesCFG();
4478 AU
.addRequired
<AAResultsWrapperPass
>();
4479 AU
.addRequired
<AssumptionCacheTracker
>();
4480 AU
.addRequired
<TargetLibraryInfoWrapperPass
>();
4481 AU
.addRequired
<TargetTransformInfoWrapperPass
>();
4482 AU
.addRequired
<DominatorTreeWrapperPass
>();
4483 AU
.addRequired
<OptimizationRemarkEmitterWrapperPass
>();
4484 AU
.addPreserved
<DominatorTreeWrapperPass
>();
4485 AU
.addPreserved
<AAResultsWrapperPass
>();
4486 AU
.addPreserved
<BasicAAWrapperPass
>();
4487 AU
.addPreserved
<GlobalsAAWrapperPass
>();
4488 AU
.addRequired
<ProfileSummaryInfoWrapperPass
>();
4489 LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU
);
4492 bool InstructionCombiningPass::runOnFunction(Function
&F
) {
4493 if (skipFunction(F
))
4496 // Required analyses.
4497 auto AA
= &getAnalysis
<AAResultsWrapperPass
>().getAAResults();
4498 auto &AC
= getAnalysis
<AssumptionCacheTracker
>().getAssumptionCache(F
);
4499 auto &TLI
= getAnalysis
<TargetLibraryInfoWrapperPass
>().getTLI(F
);
4500 auto &TTI
= getAnalysis
<TargetTransformInfoWrapperPass
>().getTTI(F
);
4501 auto &DT
= getAnalysis
<DominatorTreeWrapperPass
>().getDomTree();
4502 auto &ORE
= getAnalysis
<OptimizationRemarkEmitterWrapperPass
>().getORE();
4504 // Optional analyses.
4505 auto *LIWP
= getAnalysisIfAvailable
<LoopInfoWrapperPass
>();
4506 auto *LI
= LIWP
? &LIWP
->getLoopInfo() : nullptr;
4507 ProfileSummaryInfo
*PSI
=
4508 &getAnalysis
<ProfileSummaryInfoWrapperPass
>().getPSI();
4509 BlockFrequencyInfo
*BFI
=
4510 (PSI
&& PSI
->hasProfileSummary()) ?
4511 &getAnalysis
<LazyBlockFrequencyInfoPass
>().getBFI() :
4514 return combineInstructionsOverFunction(F
, Worklist
, AA
, AC
, TLI
, TTI
, DT
, ORE
,
4516 InstCombineDefaultMaxIterations
,
4517 /*VerifyFixpoint */ false, LI
);
4520 char InstructionCombiningPass::ID
= 0;
4522 InstructionCombiningPass::InstructionCombiningPass() : FunctionPass(ID
) {
4523 initializeInstructionCombiningPassPass(*PassRegistry::getPassRegistry());
4526 INITIALIZE_PASS_BEGIN(InstructionCombiningPass
, "instcombine",
4527 "Combine redundant instructions", false, false)
4528 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker
)
4529 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass
)
4530 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass
)
4531 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass
)
4532 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass
)
4533 INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass
)
4534 INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass
)
4535 INITIALIZE_PASS_DEPENDENCY(LazyBlockFrequencyInfoPass
)
4536 INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass
)
4537 INITIALIZE_PASS_END(InstructionCombiningPass
, "instcombine",
4538 "Combine redundant instructions", false, false)
4540 // Initialization Routines
4541 void llvm::initializeInstCombine(PassRegistry
&Registry
) {
4542 initializeInstructionCombiningPassPass(Registry
);
4545 FunctionPass
*llvm::createInstructionCombiningPass() {
4546 return new InstructionCombiningPass();