1 //===- InstCombineAddSub.cpp ------------------------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the visit functions for add, fadd, sub, and fsub.
11 //===----------------------------------------------------------------------===//
13 #include "InstCombineInternal.h"
14 #include "llvm/ADT/APFloat.h"
15 #include "llvm/ADT/APInt.h"
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/Analysis/InstructionSimplify.h"
19 #include "llvm/Analysis/ValueTracking.h"
20 #include "llvm/IR/Constant.h"
21 #include "llvm/IR/Constants.h"
22 #include "llvm/IR/InstrTypes.h"
23 #include "llvm/IR/Instruction.h"
24 #include "llvm/IR/Instructions.h"
25 #include "llvm/IR/Operator.h"
26 #include "llvm/IR/PatternMatch.h"
27 #include "llvm/IR/Type.h"
28 #include "llvm/IR/Value.h"
29 #include "llvm/Support/AlignOf.h"
30 #include "llvm/Support/Casting.h"
31 #include "llvm/Support/KnownBits.h"
36 using namespace PatternMatch
;
38 #define DEBUG_TYPE "instcombine"
42 /// Class representing coefficient of floating-point addend.
43 /// This class needs to be highly efficient, which is especially true for
44 /// the constructor. As of I write this comment, the cost of the default
45 /// constructor is merely 4-byte-store-zero (Assuming compiler is able to
46 /// perform write-merging).
50 // The constructor has to initialize a APFloat, which is unnecessary for
51 // most addends which have coefficient either 1 or -1. So, the constructor
52 // is expensive. In order to avoid the cost of the constructor, we should
53 // reuse some instances whenever possible. The pre-created instances
54 // FAddCombine::Add[0-5] embodies this idea.
55 FAddendCoef() = default;
58 // If possible, don't define operator+/operator- etc because these
59 // operators inevitably call FAddendCoef's constructor which is not cheap.
60 void operator=(const FAddendCoef
&A
);
61 void operator+=(const FAddendCoef
&A
);
62 void operator*=(const FAddendCoef
&S
);
65 assert(!insaneIntVal(C
) && "Insane coefficient");
66 IsFp
= false; IntVal
= C
;
69 void set(const APFloat
& C
);
73 bool isZero() const { return isInt() ? !IntVal
: getFpVal().isZero(); }
74 Value
*getValue(Type
*) const;
76 bool isOne() const { return isInt() && IntVal
== 1; }
77 bool isTwo() const { return isInt() && IntVal
== 2; }
78 bool isMinusOne() const { return isInt() && IntVal
== -1; }
79 bool isMinusTwo() const { return isInt() && IntVal
== -2; }
82 bool insaneIntVal(int V
) { return V
> 4 || V
< -4; }
84 APFloat
*getFpValPtr()
85 { return reinterpret_cast<APFloat
*>(&FpValBuf
.buffer
[0]); }
87 const APFloat
*getFpValPtr() const
88 { return reinterpret_cast<const APFloat
*>(&FpValBuf
.buffer
[0]); }
90 const APFloat
&getFpVal() const {
91 assert(IsFp
&& BufHasFpVal
&& "Incorret state");
92 return *getFpValPtr();
96 assert(IsFp
&& BufHasFpVal
&& "Incorret state");
97 return *getFpValPtr();
100 bool isInt() const { return !IsFp
; }
102 // If the coefficient is represented by an integer, promote it to a
104 void convertToFpType(const fltSemantics
&Sem
);
106 // Construct an APFloat from a signed integer.
107 // TODO: We should get rid of this function when APFloat can be constructed
108 // from an *SIGNED* integer.
109 APFloat
createAPFloatFromInt(const fltSemantics
&Sem
, int Val
);
113 // True iff FpValBuf contains an instance of APFloat.
114 bool BufHasFpVal
= false;
116 // The integer coefficient of an individual addend is either 1 or -1,
117 // and we try to simplify at most 4 addends from neighboring at most
118 // two instructions. So the range of <IntVal> falls in [-4, 4]. APInt
119 // is overkill of this end.
122 AlignedCharArrayUnion
<APFloat
> FpValBuf
;
125 /// FAddend is used to represent floating-point addend. An addend is
126 /// represented as <C, V>, where the V is a symbolic value, and C is a
127 /// constant coefficient. A constant addend is represented as <C, 0>.
132 void operator+=(const FAddend
&T
) {
133 assert((Val
== T
.Val
) && "Symbolic-values disagree");
137 Value
*getSymVal() const { return Val
; }
138 const FAddendCoef
&getCoef() const { return Coeff
; }
140 bool isConstant() const { return Val
== nullptr; }
141 bool isZero() const { return Coeff
.isZero(); }
143 void set(short Coefficient
, Value
*V
) {
144 Coeff
.set(Coefficient
);
147 void set(const APFloat
&Coefficient
, Value
*V
) {
148 Coeff
.set(Coefficient
);
151 void set(const ConstantFP
*Coefficient
, Value
*V
) {
152 Coeff
.set(Coefficient
->getValueAPF());
156 void negate() { Coeff
.negate(); }
158 /// Drill down the U-D chain one step to find the definition of V, and
159 /// try to break the definition into one or two addends.
160 static unsigned drillValueDownOneStep(Value
* V
, FAddend
&A0
, FAddend
&A1
);
162 /// Similar to FAddend::drillDownOneStep() except that the value being
163 /// splitted is the addend itself.
164 unsigned drillAddendDownOneStep(FAddend
&Addend0
, FAddend
&Addend1
) const;
167 void Scale(const FAddendCoef
& ScaleAmt
) { Coeff
*= ScaleAmt
; }
169 // This addend has the value of "Coeff * Val".
170 Value
*Val
= nullptr;
174 /// FAddCombine is the class for optimizing an unsafe fadd/fsub along
175 /// with its neighboring at most two instructions.
179 FAddCombine(InstCombiner::BuilderTy
&B
) : Builder(B
) {}
181 Value
*simplify(Instruction
*FAdd
);
184 using AddendVect
= SmallVector
<const FAddend
*, 4>;
186 Value
*simplifyFAdd(AddendVect
& V
, unsigned InstrQuota
);
188 /// Convert given addend to a Value
189 Value
*createAddendVal(const FAddend
&A
, bool& NeedNeg
);
191 /// Return the number of instructions needed to emit the N-ary addition.
192 unsigned calcInstrNumber(const AddendVect
& Vect
);
194 Value
*createFSub(Value
*Opnd0
, Value
*Opnd1
);
195 Value
*createFAdd(Value
*Opnd0
, Value
*Opnd1
);
196 Value
*createFMul(Value
*Opnd0
, Value
*Opnd1
);
197 Value
*createFNeg(Value
*V
);
198 Value
*createNaryFAdd(const AddendVect
& Opnds
, unsigned InstrQuota
);
199 void createInstPostProc(Instruction
*NewInst
, bool NoNumber
= false);
201 // Debugging stuff are clustered here.
203 unsigned CreateInstrNum
;
204 void initCreateInstNum() { CreateInstrNum
= 0; }
205 void incCreateInstNum() { CreateInstrNum
++; }
207 void initCreateInstNum() {}
208 void incCreateInstNum() {}
211 InstCombiner::BuilderTy
&Builder
;
212 Instruction
*Instr
= nullptr;
215 } // end anonymous namespace
217 //===----------------------------------------------------------------------===//
220 // {FAddendCoef, FAddend, FAddition, FAddCombine}.
222 //===----------------------------------------------------------------------===//
223 FAddendCoef::~FAddendCoef() {
225 getFpValPtr()->~APFloat();
228 void FAddendCoef::set(const APFloat
& C
) {
229 APFloat
*P
= getFpValPtr();
232 // As the buffer is meanless byte stream, we cannot call
233 // APFloat::operator=().
238 IsFp
= BufHasFpVal
= true;
241 void FAddendCoef::convertToFpType(const fltSemantics
&Sem
) {
245 APFloat
*P
= getFpValPtr();
247 new(P
) APFloat(Sem
, IntVal
);
249 new(P
) APFloat(Sem
, 0 - IntVal
);
252 IsFp
= BufHasFpVal
= true;
255 APFloat
FAddendCoef::createAPFloatFromInt(const fltSemantics
&Sem
, int Val
) {
257 return APFloat(Sem
, Val
);
259 APFloat
T(Sem
, 0 - Val
);
265 void FAddendCoef::operator=(const FAddendCoef
&That
) {
269 set(That
.getFpVal());
272 void FAddendCoef::operator+=(const FAddendCoef
&That
) {
273 enum APFloat::roundingMode RndMode
= APFloat::rmNearestTiesToEven
;
274 if (isInt() == That
.isInt()) {
276 IntVal
+= That
.IntVal
;
278 getFpVal().add(That
.getFpVal(), RndMode
);
283 const APFloat
&T
= That
.getFpVal();
284 convertToFpType(T
.getSemantics());
285 getFpVal().add(T
, RndMode
);
289 APFloat
&T
= getFpVal();
290 T
.add(createAPFloatFromInt(T
.getSemantics(), That
.IntVal
), RndMode
);
293 void FAddendCoef::operator*=(const FAddendCoef
&That
) {
297 if (That
.isMinusOne()) {
302 if (isInt() && That
.isInt()) {
303 int Res
= IntVal
* (int)That
.IntVal
;
304 assert(!insaneIntVal(Res
) && "Insane int value");
309 const fltSemantics
&Semantic
=
310 isInt() ? That
.getFpVal().getSemantics() : getFpVal().getSemantics();
313 convertToFpType(Semantic
);
314 APFloat
&F0
= getFpVal();
317 F0
.multiply(createAPFloatFromInt(Semantic
, That
.IntVal
),
318 APFloat::rmNearestTiesToEven
);
320 F0
.multiply(That
.getFpVal(), APFloat::rmNearestTiesToEven
);
323 void FAddendCoef::negate() {
327 getFpVal().changeSign();
330 Value
*FAddendCoef::getValue(Type
*Ty
) const {
332 ConstantFP::get(Ty
, float(IntVal
)) :
333 ConstantFP::get(Ty
->getContext(), getFpVal());
336 // The definition of <Val> Addends
337 // =========================================
338 // A + B <1, A>, <1,B>
339 // A - B <1, A>, <1,B>
342 // A + C <1, A> <C, NULL>
343 // 0 +/- 0 <0, NULL> (corner case)
345 // Legend: A and B are not constant, C is constant
346 unsigned FAddend::drillValueDownOneStep
347 (Value
*Val
, FAddend
&Addend0
, FAddend
&Addend1
) {
348 Instruction
*I
= nullptr;
349 if (!Val
|| !(I
= dyn_cast
<Instruction
>(Val
)))
352 unsigned Opcode
= I
->getOpcode();
354 if (Opcode
== Instruction::FAdd
|| Opcode
== Instruction::FSub
) {
356 Value
*Opnd0
= I
->getOperand(0);
357 Value
*Opnd1
= I
->getOperand(1);
358 if ((C0
= dyn_cast
<ConstantFP
>(Opnd0
)) && C0
->isZero())
361 if ((C1
= dyn_cast
<ConstantFP
>(Opnd1
)) && C1
->isZero())
366 Addend0
.set(1, Opnd0
);
368 Addend0
.set(C0
, nullptr);
372 FAddend
&Addend
= Opnd0
? Addend1
: Addend0
;
374 Addend
.set(1, Opnd1
);
376 Addend
.set(C1
, nullptr);
377 if (Opcode
== Instruction::FSub
)
382 return Opnd0
&& Opnd1
? 2 : 1;
384 // Both operands are zero. Weird!
385 Addend0
.set(APFloat(C0
->getValueAPF().getSemantics()), nullptr);
389 if (I
->getOpcode() == Instruction::FMul
) {
390 Value
*V0
= I
->getOperand(0);
391 Value
*V1
= I
->getOperand(1);
392 if (ConstantFP
*C
= dyn_cast
<ConstantFP
>(V0
)) {
397 if (ConstantFP
*C
= dyn_cast
<ConstantFP
>(V1
)) {
406 // Try to break *this* addend into two addends. e.g. Suppose this addend is
407 // <2.3, V>, and V = X + Y, by calling this function, we obtain two addends,
408 // i.e. <2.3, X> and <2.3, Y>.
409 unsigned FAddend::drillAddendDownOneStep
410 (FAddend
&Addend0
, FAddend
&Addend1
) const {
414 unsigned BreakNum
= FAddend::drillValueDownOneStep(Val
, Addend0
, Addend1
);
415 if (!BreakNum
|| Coeff
.isOne())
418 Addend0
.Scale(Coeff
);
421 Addend1
.Scale(Coeff
);
426 Value
*FAddCombine::simplify(Instruction
*I
) {
427 assert(I
->hasAllowReassoc() && I
->hasNoSignedZeros() &&
428 "Expected 'reassoc'+'nsz' instruction");
430 // Currently we are not able to handle vector type.
431 if (I
->getType()->isVectorTy())
434 assert((I
->getOpcode() == Instruction::FAdd
||
435 I
->getOpcode() == Instruction::FSub
) && "Expect add/sub");
437 // Save the instruction before calling other member-functions.
440 FAddend Opnd0
, Opnd1
, Opnd0_0
, Opnd0_1
, Opnd1_0
, Opnd1_1
;
442 unsigned OpndNum
= FAddend::drillValueDownOneStep(I
, Opnd0
, Opnd1
);
444 // Step 1: Expand the 1st addend into Opnd0_0 and Opnd0_1.
445 unsigned Opnd0_ExpNum
= 0;
446 unsigned Opnd1_ExpNum
= 0;
448 if (!Opnd0
.isConstant())
449 Opnd0_ExpNum
= Opnd0
.drillAddendDownOneStep(Opnd0_0
, Opnd0_1
);
451 // Step 2: Expand the 2nd addend into Opnd1_0 and Opnd1_1.
452 if (OpndNum
== 2 && !Opnd1
.isConstant())
453 Opnd1_ExpNum
= Opnd1
.drillAddendDownOneStep(Opnd1_0
, Opnd1_1
);
455 // Step 3: Try to optimize Opnd0_0 + Opnd0_1 + Opnd1_0 + Opnd1_1
456 if (Opnd0_ExpNum
&& Opnd1_ExpNum
) {
458 AllOpnds
.push_back(&Opnd0_0
);
459 AllOpnds
.push_back(&Opnd1_0
);
460 if (Opnd0_ExpNum
== 2)
461 AllOpnds
.push_back(&Opnd0_1
);
462 if (Opnd1_ExpNum
== 2)
463 AllOpnds
.push_back(&Opnd1_1
);
465 // Compute instruction quota. We should save at least one instruction.
466 unsigned InstQuota
= 0;
468 Value
*V0
= I
->getOperand(0);
469 Value
*V1
= I
->getOperand(1);
470 InstQuota
= ((!isa
<Constant
>(V0
) && V0
->hasOneUse()) &&
471 (!isa
<Constant
>(V1
) && V1
->hasOneUse())) ? 2 : 1;
473 if (Value
*R
= simplifyFAdd(AllOpnds
, InstQuota
))
478 // The input instruction is : "I=0.0 +/- V". If the "V" were able to be
479 // splitted into two addends, say "V = X - Y", the instruction would have
480 // been optimized into "I = Y - X" in the previous steps.
482 const FAddendCoef
&CE
= Opnd0
.getCoef();
483 return CE
.isOne() ? Opnd0
.getSymVal() : nullptr;
486 // step 4: Try to optimize Opnd0 + Opnd1_0 [+ Opnd1_1]
489 AllOpnds
.push_back(&Opnd0
);
490 AllOpnds
.push_back(&Opnd1_0
);
491 if (Opnd1_ExpNum
== 2)
492 AllOpnds
.push_back(&Opnd1_1
);
494 if (Value
*R
= simplifyFAdd(AllOpnds
, 1))
498 // step 5: Try to optimize Opnd1 + Opnd0_0 [+ Opnd0_1]
501 AllOpnds
.push_back(&Opnd1
);
502 AllOpnds
.push_back(&Opnd0_0
);
503 if (Opnd0_ExpNum
== 2)
504 AllOpnds
.push_back(&Opnd0_1
);
506 if (Value
*R
= simplifyFAdd(AllOpnds
, 1))
513 Value
*FAddCombine::simplifyFAdd(AddendVect
& Addends
, unsigned InstrQuota
) {
514 unsigned AddendNum
= Addends
.size();
515 assert(AddendNum
<= 4 && "Too many addends");
517 // For saving intermediate results;
518 unsigned NextTmpIdx
= 0;
519 FAddend TmpResult
[3];
521 // Points to the constant addend of the resulting simplified expression.
522 // If the resulting expr has constant-addend, this constant-addend is
523 // desirable to reside at the top of the resulting expression tree. Placing
524 // constant close to supper-expr(s) will potentially reveal some optimization
525 // opportunities in super-expr(s).
526 const FAddend
*ConstAdd
= nullptr;
528 // Simplified addends are placed <SimpVect>.
531 // The outer loop works on one symbolic-value at a time. Suppose the input
532 // addends are : <a1, x>, <b1, y>, <a2, x>, <c1, z>, <b2, y>, ...
533 // The symbolic-values will be processed in this order: x, y, z.
534 for (unsigned SymIdx
= 0; SymIdx
< AddendNum
; SymIdx
++) {
536 const FAddend
*ThisAddend
= Addends
[SymIdx
];
538 // This addend was processed before.
542 Value
*Val
= ThisAddend
->getSymVal();
543 unsigned StartIdx
= SimpVect
.size();
544 SimpVect
.push_back(ThisAddend
);
546 // The inner loop collects addends sharing same symbolic-value, and these
547 // addends will be later on folded into a single addend. Following above
548 // example, if the symbolic value "y" is being processed, the inner loop
549 // will collect two addends "<b1,y>" and "<b2,Y>". These two addends will
550 // be later on folded into "<b1+b2, y>".
551 for (unsigned SameSymIdx
= SymIdx
+ 1;
552 SameSymIdx
< AddendNum
; SameSymIdx
++) {
553 const FAddend
*T
= Addends
[SameSymIdx
];
554 if (T
&& T
->getSymVal() == Val
) {
555 // Set null such that next iteration of the outer loop will not process
556 // this addend again.
557 Addends
[SameSymIdx
] = nullptr;
558 SimpVect
.push_back(T
);
562 // If multiple addends share same symbolic value, fold them together.
563 if (StartIdx
+ 1 != SimpVect
.size()) {
564 FAddend
&R
= TmpResult
[NextTmpIdx
++];
565 R
= *SimpVect
[StartIdx
];
566 for (unsigned Idx
= StartIdx
+ 1; Idx
< SimpVect
.size(); Idx
++)
569 // Pop all addends being folded and push the resulting folded addend.
570 SimpVect
.resize(StartIdx
);
573 SimpVect
.push_back(&R
);
576 // Don't push constant addend at this time. It will be the last element
583 assert((NextTmpIdx
<= array_lengthof(TmpResult
) + 1) &&
584 "out-of-bound access");
587 SimpVect
.push_back(ConstAdd
);
590 if (!SimpVect
.empty())
591 Result
= createNaryFAdd(SimpVect
, InstrQuota
);
593 // The addition is folded to 0.0.
594 Result
= ConstantFP::get(Instr
->getType(), 0.0);
600 Value
*FAddCombine::createNaryFAdd
601 (const AddendVect
&Opnds
, unsigned InstrQuota
) {
602 assert(!Opnds
.empty() && "Expect at least one addend");
604 // Step 1: Check if the # of instructions needed exceeds the quota.
606 unsigned InstrNeeded
= calcInstrNumber(Opnds
);
607 if (InstrNeeded
> InstrQuota
)
612 // step 2: Emit the N-ary addition.
613 // Note that at most three instructions are involved in Fadd-InstCombine: the
614 // addition in question, and at most two neighboring instructions.
615 // The resulting optimized addition should have at least one less instruction
616 // than the original addition expression tree. This implies that the resulting
617 // N-ary addition has at most two instructions, and we don't need to worry
618 // about tree-height when constructing the N-ary addition.
620 Value
*LastVal
= nullptr;
621 bool LastValNeedNeg
= false;
623 // Iterate the addends, creating fadd/fsub using adjacent two addends.
624 for (const FAddend
*Opnd
: Opnds
) {
626 Value
*V
= createAddendVal(*Opnd
, NeedNeg
);
629 LastValNeedNeg
= NeedNeg
;
633 if (LastValNeedNeg
== NeedNeg
) {
634 LastVal
= createFAdd(LastVal
, V
);
639 LastVal
= createFSub(V
, LastVal
);
641 LastVal
= createFSub(LastVal
, V
);
643 LastValNeedNeg
= false;
646 if (LastValNeedNeg
) {
647 LastVal
= createFNeg(LastVal
);
651 assert(CreateInstrNum
== InstrNeeded
&&
652 "Inconsistent in instruction numbers");
658 Value
*FAddCombine::createFSub(Value
*Opnd0
, Value
*Opnd1
) {
659 Value
*V
= Builder
.CreateFSub(Opnd0
, Opnd1
);
660 if (Instruction
*I
= dyn_cast
<Instruction
>(V
))
661 createInstPostProc(I
);
665 Value
*FAddCombine::createFNeg(Value
*V
) {
666 Value
*Zero
= cast
<Value
>(ConstantFP::getZeroValueForNegation(V
->getType()));
667 Value
*NewV
= createFSub(Zero
, V
);
668 if (Instruction
*I
= dyn_cast
<Instruction
>(NewV
))
669 createInstPostProc(I
, true); // fneg's don't receive instruction numbers.
673 Value
*FAddCombine::createFAdd(Value
*Opnd0
, Value
*Opnd1
) {
674 Value
*V
= Builder
.CreateFAdd(Opnd0
, Opnd1
);
675 if (Instruction
*I
= dyn_cast
<Instruction
>(V
))
676 createInstPostProc(I
);
680 Value
*FAddCombine::createFMul(Value
*Opnd0
, Value
*Opnd1
) {
681 Value
*V
= Builder
.CreateFMul(Opnd0
, Opnd1
);
682 if (Instruction
*I
= dyn_cast
<Instruction
>(V
))
683 createInstPostProc(I
);
687 void FAddCombine::createInstPostProc(Instruction
*NewInstr
, bool NoNumber
) {
688 NewInstr
->setDebugLoc(Instr
->getDebugLoc());
690 // Keep track of the number of instruction created.
694 // Propagate fast-math flags
695 NewInstr
->setFastMathFlags(Instr
->getFastMathFlags());
698 // Return the number of instruction needed to emit the N-ary addition.
699 // NOTE: Keep this function in sync with createAddendVal().
700 unsigned FAddCombine::calcInstrNumber(const AddendVect
&Opnds
) {
701 unsigned OpndNum
= Opnds
.size();
702 unsigned InstrNeeded
= OpndNum
- 1;
704 // The number of addends in the form of "(-1)*x".
705 unsigned NegOpndNum
= 0;
707 // Adjust the number of instructions needed to emit the N-ary add.
708 for (const FAddend
*Opnd
: Opnds
) {
709 if (Opnd
->isConstant())
712 // The constant check above is really for a few special constant
714 if (isa
<UndefValue
>(Opnd
->getSymVal()))
717 const FAddendCoef
&CE
= Opnd
->getCoef();
718 if (CE
.isMinusOne() || CE
.isMinusTwo())
721 // Let the addend be "c * x". If "c == +/-1", the value of the addend
722 // is immediately available; otherwise, it needs exactly one instruction
723 // to evaluate the value.
724 if (!CE
.isMinusOne() && !CE
.isOne())
727 if (NegOpndNum
== OpndNum
)
732 // Input Addend Value NeedNeg(output)
733 // ================================================================
734 // Constant C C false
735 // <+/-1, V> V coefficient is -1
736 // <2/-2, V> "fadd V, V" coefficient is -2
737 // <C, V> "fmul V, C" false
739 // NOTE: Keep this function in sync with FAddCombine::calcInstrNumber.
740 Value
*FAddCombine::createAddendVal(const FAddend
&Opnd
, bool &NeedNeg
) {
741 const FAddendCoef
&Coeff
= Opnd
.getCoef();
743 if (Opnd
.isConstant()) {
745 return Coeff
.getValue(Instr
->getType());
748 Value
*OpndVal
= Opnd
.getSymVal();
750 if (Coeff
.isMinusOne() || Coeff
.isOne()) {
751 NeedNeg
= Coeff
.isMinusOne();
755 if (Coeff
.isTwo() || Coeff
.isMinusTwo()) {
756 NeedNeg
= Coeff
.isMinusTwo();
757 return createFAdd(OpndVal
, OpndVal
);
761 return createFMul(OpndVal
, Coeff
.getValue(Instr
->getType()));
764 // Checks if any operand is negative and we can convert add to sub.
765 // This function checks for following negative patterns
766 // ADD(XOR(OR(Z, NOT(C)), C)), 1) == NEG(AND(Z, C))
767 // ADD(XOR(AND(Z, C), C), 1) == NEG(OR(Z, ~C))
768 // XOR(AND(Z, C), (C + 1)) == NEG(OR(Z, ~C)) if C is even
769 static Value
*checkForNegativeOperand(BinaryOperator
&I
,
770 InstCombiner::BuilderTy
&Builder
) {
771 Value
*LHS
= I
.getOperand(0), *RHS
= I
.getOperand(1);
773 // This function creates 2 instructions to replace ADD, we need at least one
774 // of LHS or RHS to have one use to ensure benefit in transform.
775 if (!LHS
->hasOneUse() && !RHS
->hasOneUse())
778 Value
*X
= nullptr, *Y
= nullptr, *Z
= nullptr;
779 const APInt
*C1
= nullptr, *C2
= nullptr;
781 // if ONE is on other side, swap
782 if (match(RHS
, m_Add(m_Value(X
), m_One())))
785 if (match(LHS
, m_Add(m_Value(X
), m_One()))) {
786 // if XOR on other side, swap
787 if (match(RHS
, m_Xor(m_Value(Y
), m_APInt(C1
))))
790 if (match(X
, m_Xor(m_Value(Y
), m_APInt(C1
)))) {
791 // X = XOR(Y, C1), Y = OR(Z, C2), C2 = NOT(C1) ==> X == NOT(AND(Z, C1))
792 // ADD(ADD(X, 1), RHS) == ADD(X, ADD(RHS, 1)) == SUB(RHS, AND(Z, C1))
793 if (match(Y
, m_Or(m_Value(Z
), m_APInt(C2
))) && (*C2
== ~(*C1
))) {
794 Value
*NewAnd
= Builder
.CreateAnd(Z
, *C1
);
795 return Builder
.CreateSub(RHS
, NewAnd
, "sub");
796 } else if (match(Y
, m_And(m_Value(Z
), m_APInt(C2
))) && (*C1
== *C2
)) {
797 // X = XOR(Y, C1), Y = AND(Z, C2), C2 == C1 ==> X == NOT(OR(Z, ~C1))
798 // ADD(ADD(X, 1), RHS) == ADD(X, ADD(RHS, 1)) == SUB(RHS, OR(Z, ~C1))
799 Value
*NewOr
= Builder
.CreateOr(Z
, ~(*C1
));
800 return Builder
.CreateSub(RHS
, NewOr
, "sub");
805 // Restore LHS and RHS
806 LHS
= I
.getOperand(0);
807 RHS
= I
.getOperand(1);
809 // if XOR is on other side, swap
810 if (match(RHS
, m_Xor(m_Value(Y
), m_APInt(C1
))))
814 // LHS = XOR(Y, C1), Y = AND(Z, C2), C1 == (C2 + 1) => LHS == NEG(OR(Z, ~C2))
815 // ADD(LHS, RHS) == SUB(RHS, OR(Z, ~C2))
816 if (match(LHS
, m_Xor(m_Value(Y
), m_APInt(C1
))))
817 if (C1
->countTrailingZeros() == 0)
818 if (match(Y
, m_And(m_Value(Z
), m_APInt(C2
))) && *C1
== (*C2
+ 1)) {
819 Value
*NewOr
= Builder
.CreateOr(Z
, ~(*C2
));
820 return Builder
.CreateSub(RHS
, NewOr
, "sub");
825 /// Wrapping flags may allow combining constants separated by an extend.
826 static Instruction
*foldNoWrapAdd(BinaryOperator
&Add
,
827 InstCombiner::BuilderTy
&Builder
) {
828 Value
*Op0
= Add
.getOperand(0), *Op1
= Add
.getOperand(1);
829 Type
*Ty
= Add
.getType();
831 if (!match(Op1
, m_Constant(Op1C
)))
834 // Try this match first because it results in an add in the narrow type.
835 // (zext (X +nuw C2)) + C1 --> zext (X + (C2 + trunc(C1)))
837 const APInt
*C1
, *C2
;
838 if (match(Op1
, m_APInt(C1
)) &&
839 match(Op0
, m_OneUse(m_ZExt(m_NUWAdd(m_Value(X
), m_APInt(C2
))))) &&
840 C1
->isNegative() && C1
->sge(-C2
->sext(C1
->getBitWidth()))) {
842 ConstantInt::get(X
->getType(), *C2
+ C1
->trunc(C2
->getBitWidth()));
843 return new ZExtInst(Builder
.CreateNUWAdd(X
, NewC
), Ty
);
846 // More general combining of constants in the wide type.
847 // (sext (X +nsw NarrowC)) + C --> (sext X) + (sext(NarrowC) + C)
849 if (match(Op0
, m_OneUse(m_SExt(m_NSWAdd(m_Value(X
), m_Constant(NarrowC
)))))) {
850 Constant
*WideC
= ConstantExpr::getSExt(NarrowC
, Ty
);
851 Constant
*NewC
= ConstantExpr::getAdd(WideC
, Op1C
);
852 Value
*WideX
= Builder
.CreateSExt(X
, Ty
);
853 return BinaryOperator::CreateAdd(WideX
, NewC
);
855 // (zext (X +nuw NarrowC)) + C --> (zext X) + (zext(NarrowC) + C)
856 if (match(Op0
, m_OneUse(m_ZExt(m_NUWAdd(m_Value(X
), m_Constant(NarrowC
)))))) {
857 Constant
*WideC
= ConstantExpr::getZExt(NarrowC
, Ty
);
858 Constant
*NewC
= ConstantExpr::getAdd(WideC
, Op1C
);
859 Value
*WideX
= Builder
.CreateZExt(X
, Ty
);
860 return BinaryOperator::CreateAdd(WideX
, NewC
);
866 Instruction
*InstCombiner::foldAddWithConstant(BinaryOperator
&Add
) {
867 Value
*Op0
= Add
.getOperand(0), *Op1
= Add
.getOperand(1);
869 if (!match(Op1
, m_Constant(Op1C
)))
872 if (Instruction
*NV
= foldBinOpIntoSelectOrPhi(Add
))
878 // add (sub C1, X), C2 --> sub (add C1, C2), X
879 if (match(Op0
, m_Sub(m_Constant(Op00C
), m_Value(X
))))
880 return BinaryOperator::CreateSub(ConstantExpr::getAdd(Op00C
, Op1C
), X
);
884 // add (sub X, Y), -1 --> add (not Y), X
885 if (match(Op0
, m_OneUse(m_Sub(m_Value(X
), m_Value(Y
)))) &&
886 match(Op1
, m_AllOnes()))
887 return BinaryOperator::CreateAdd(Builder
.CreateNot(Y
), X
);
889 // zext(bool) + C -> bool ? C + 1 : C
890 if (match(Op0
, m_ZExt(m_Value(X
))) &&
891 X
->getType()->getScalarSizeInBits() == 1)
892 return SelectInst::Create(X
, AddOne(Op1C
), Op1
);
894 // ~X + C --> (C-1) - X
895 if (match(Op0
, m_Not(m_Value(X
))))
896 return BinaryOperator::CreateSub(SubOne(Op1C
), X
);
899 if (!match(Op1
, m_APInt(C
)))
902 // (X | C2) + C --> (X | C2) ^ C2 iff (C2 == -C)
904 if (match(Op0
, m_Or(m_Value(), m_APInt(C2
))) && *C2
== -*C
)
905 return BinaryOperator::CreateXor(Op0
, ConstantInt::get(Add
.getType(), *C2
));
907 if (C
->isSignMask()) {
908 // If wrapping is not allowed, then the addition must set the sign bit:
909 // X + (signmask) --> X | signmask
910 if (Add
.hasNoSignedWrap() || Add
.hasNoUnsignedWrap())
911 return BinaryOperator::CreateOr(Op0
, Op1
);
913 // If wrapping is allowed, then the addition flips the sign bit of LHS:
914 // X + (signmask) --> X ^ signmask
915 return BinaryOperator::CreateXor(Op0
, Op1
);
918 // Is this add the last step in a convoluted sext?
919 // add(zext(xor i16 X, -32768), -32768) --> sext X
920 Type
*Ty
= Add
.getType();
921 if (match(Op0
, m_ZExt(m_Xor(m_Value(X
), m_APInt(C2
)))) &&
922 C2
->isMinSignedValue() && C2
->sext(Ty
->getScalarSizeInBits()) == *C
)
923 return CastInst::Create(Instruction::SExt
, X
, Ty
);
925 if (C
->isOneValue() && Op0
->hasOneUse()) {
926 // add (sext i1 X), 1 --> zext (not X)
927 // TODO: The smallest IR representation is (select X, 0, 1), and that would
928 // not require the one-use check. But we need to remove a transform in
929 // visitSelect and make sure that IR value tracking for select is equal or
930 // better than for these ops.
931 if (match(Op0
, m_SExt(m_Value(X
))) &&
932 X
->getType()->getScalarSizeInBits() == 1)
933 return new ZExtInst(Builder
.CreateNot(X
), Ty
);
935 // Shifts and add used to flip and mask off the low bit:
936 // add (ashr (shl i32 X, 31), 31), 1 --> and (not X), 1
938 if (match(Op0
, m_AShr(m_Shl(m_Value(X
), m_APInt(C2
)), m_APInt(C3
))) &&
939 C2
== C3
&& *C2
== Ty
->getScalarSizeInBits() - 1) {
940 Value
*NotX
= Builder
.CreateNot(X
);
941 return BinaryOperator::CreateAnd(NotX
, ConstantInt::get(Ty
, 1));
948 // Matches multiplication expression Op * C where C is a constant. Returns the
949 // constant value in C and the other operand in Op. Returns true if such a
951 static bool MatchMul(Value
*E
, Value
*&Op
, APInt
&C
) {
953 if (match(E
, m_Mul(m_Value(Op
), m_APInt(AI
)))) {
957 if (match(E
, m_Shl(m_Value(Op
), m_APInt(AI
)))) {
958 C
= APInt(AI
->getBitWidth(), 1);
965 // Matches remainder expression Op % C where C is a constant. Returns the
966 // constant value in C and the other operand in Op. Returns the signedness of
967 // the remainder operation in IsSigned. Returns true if such a match is
969 static bool MatchRem(Value
*E
, Value
*&Op
, APInt
&C
, bool &IsSigned
) {
972 if (match(E
, m_SRem(m_Value(Op
), m_APInt(AI
)))) {
977 if (match(E
, m_URem(m_Value(Op
), m_APInt(AI
)))) {
981 if (match(E
, m_And(m_Value(Op
), m_APInt(AI
))) && (*AI
+ 1).isPowerOf2()) {
988 // Matches division expression Op / C with the given signedness as indicated
989 // by IsSigned, where C is a constant. Returns the constant value in C and the
990 // other operand in Op. Returns true if such a match is found.
991 static bool MatchDiv(Value
*E
, Value
*&Op
, APInt
&C
, bool IsSigned
) {
993 if (IsSigned
&& match(E
, m_SDiv(m_Value(Op
), m_APInt(AI
)))) {
998 if (match(E
, m_UDiv(m_Value(Op
), m_APInt(AI
)))) {
1002 if (match(E
, m_LShr(m_Value(Op
), m_APInt(AI
)))) {
1003 C
= APInt(AI
->getBitWidth(), 1);
1011 // Returns whether C0 * C1 with the given signedness overflows.
1012 static bool MulWillOverflow(APInt
&C0
, APInt
&C1
, bool IsSigned
) {
1015 (void)C0
.smul_ov(C1
, overflow
);
1017 (void)C0
.umul_ov(C1
, overflow
);
1021 // Simplifies X % C0 + (( X / C0 ) % C1) * C0 to X % (C0 * C1), where (C0 * C1)
1022 // does not overflow.
1023 Value
*InstCombiner::SimplifyAddWithRemainder(BinaryOperator
&I
) {
1024 Value
*LHS
= I
.getOperand(0), *RHS
= I
.getOperand(1);
1028 // Match I = X % C0 + MulOpV * C0
1029 if (((MatchRem(LHS
, X
, C0
, IsSigned
) && MatchMul(RHS
, MulOpV
, MulOpC
)) ||
1030 (MatchRem(RHS
, X
, C0
, IsSigned
) && MatchMul(LHS
, MulOpV
, MulOpC
))) &&
1035 // Match MulOpC = RemOpV % C1
1036 if (MatchRem(MulOpV
, RemOpV
, C1
, Rem2IsSigned
) &&
1037 IsSigned
== Rem2IsSigned
) {
1040 // Match RemOpV = X / C0
1041 if (MatchDiv(RemOpV
, DivOpV
, DivOpC
, IsSigned
) && X
== DivOpV
&&
1042 C0
== DivOpC
&& !MulWillOverflow(C0
, C1
, IsSigned
)) {
1044 ConstantInt::get(X
->getType()->getContext(), C0
* C1
);
1045 return IsSigned
? Builder
.CreateSRem(X
, NewDivisor
, "srem")
1046 : Builder
.CreateURem(X
, NewDivisor
, "urem");
1055 /// (1 << NBits) - 1
1057 /// ~(-(1 << NBits))
1058 /// Because a 'not' is better for bit-tracking analysis and other transforms
1059 /// than an 'add'. The new shl is always nsw, and is nuw if old `and` was.
1060 static Instruction
*canonicalizeLowbitMask(BinaryOperator
&I
,
1061 InstCombiner::BuilderTy
&Builder
) {
1063 if (!match(&I
, m_Add(m_OneUse(m_Shl(m_One(), m_Value(NBits
))), m_AllOnes())))
1066 Constant
*MinusOne
= Constant::getAllOnesValue(NBits
->getType());
1067 Value
*NotMask
= Builder
.CreateShl(MinusOne
, NBits
, "notmask");
1068 // Be wary of constant folding.
1069 if (auto *BOp
= dyn_cast
<BinaryOperator
>(NotMask
)) {
1070 // Always NSW. But NUW propagates from `add`.
1071 BOp
->setHasNoSignedWrap();
1072 BOp
->setHasNoUnsignedWrap(I
.hasNoUnsignedWrap());
1075 return BinaryOperator::CreateNot(NotMask
, I
.getName());
1078 static Instruction
*foldToUnsignedSaturatedAdd(BinaryOperator
&I
) {
1079 assert(I
.getOpcode() == Instruction::Add
&& "Expecting add instruction");
1080 Type
*Ty
= I
.getType();
1081 auto getUAddSat
= [&]() {
1082 return Intrinsic::getDeclaration(I
.getModule(), Intrinsic::uadd_sat
, Ty
);
1085 // add (umin X, ~Y), Y --> uaddsat X, Y
1087 if (match(&I
, m_c_Add(m_c_UMin(m_Value(X
), m_Not(m_Value(Y
))),
1089 return CallInst::Create(getUAddSat(), { X
, Y
});
1091 // add (umin X, ~C), C --> uaddsat X, C
1092 const APInt
*C
, *NotC
;
1093 if (match(&I
, m_Add(m_UMin(m_Value(X
), m_APInt(NotC
)), m_APInt(C
))) &&
1095 return CallInst::Create(getUAddSat(), { X
, ConstantInt::get(Ty
, *C
) });
1100 Instruction
*InstCombiner::visitAdd(BinaryOperator
&I
) {
1101 if (Value
*V
= SimplifyAddInst(I
.getOperand(0), I
.getOperand(1),
1102 I
.hasNoSignedWrap(), I
.hasNoUnsignedWrap(),
1103 SQ
.getWithInstruction(&I
)))
1104 return replaceInstUsesWith(I
, V
);
1106 if (SimplifyAssociativeOrCommutative(I
))
1109 if (Instruction
*X
= foldVectorBinop(I
))
1112 // (A*B)+(A*C) -> A*(B+C) etc
1113 if (Value
*V
= SimplifyUsingDistributiveLaws(I
))
1114 return replaceInstUsesWith(I
, V
);
1116 if (Instruction
*X
= foldAddWithConstant(I
))
1119 if (Instruction
*X
= foldNoWrapAdd(I
, Builder
))
1122 // FIXME: This should be moved into the above helper function to allow these
1123 // transforms for general constant or constant splat vectors.
1124 Value
*LHS
= I
.getOperand(0), *RHS
= I
.getOperand(1);
1125 Type
*Ty
= I
.getType();
1126 if (ConstantInt
*CI
= dyn_cast
<ConstantInt
>(RHS
)) {
1127 Value
*XorLHS
= nullptr; ConstantInt
*XorRHS
= nullptr;
1128 if (match(LHS
, m_Xor(m_Value(XorLHS
), m_ConstantInt(XorRHS
)))) {
1129 unsigned TySizeBits
= Ty
->getScalarSizeInBits();
1130 const APInt
&RHSVal
= CI
->getValue();
1131 unsigned ExtendAmt
= 0;
1132 // If we have ADD(XOR(AND(X, 0xFF), 0x80), 0xF..F80), it's a sext.
1133 // If we have ADD(XOR(AND(X, 0xFF), 0xF..F80), 0x80), it's a sext.
1134 if (XorRHS
->getValue() == -RHSVal
) {
1135 if (RHSVal
.isPowerOf2())
1136 ExtendAmt
= TySizeBits
- RHSVal
.logBase2() - 1;
1137 else if (XorRHS
->getValue().isPowerOf2())
1138 ExtendAmt
= TySizeBits
- XorRHS
->getValue().logBase2() - 1;
1142 APInt Mask
= APInt::getHighBitsSet(TySizeBits
, ExtendAmt
);
1143 if (!MaskedValueIsZero(XorLHS
, Mask
, 0, &I
))
1148 Constant
*ShAmt
= ConstantInt::get(Ty
, ExtendAmt
);
1149 Value
*NewShl
= Builder
.CreateShl(XorLHS
, ShAmt
, "sext");
1150 return BinaryOperator::CreateAShr(NewShl
, ShAmt
);
1153 // If this is a xor that was canonicalized from a sub, turn it back into
1154 // a sub and fuse this add with it.
1155 if (LHS
->hasOneUse() && (XorRHS
->getValue()+1).isPowerOf2()) {
1156 KnownBits LHSKnown
= computeKnownBits(XorLHS
, 0, &I
);
1157 if ((XorRHS
->getValue() | LHSKnown
.Zero
).isAllOnesValue())
1158 return BinaryOperator::CreateSub(ConstantExpr::getAdd(XorRHS
, CI
),
1161 // (X + signmask) + C could have gotten canonicalized to (X^signmask) + C,
1162 // transform them into (X + (signmask ^ C))
1163 if (XorRHS
->getValue().isSignMask())
1164 return BinaryOperator::CreateAdd(XorLHS
,
1165 ConstantExpr::getXor(XorRHS
, CI
));
1169 if (Ty
->isIntOrIntVectorTy(1))
1170 return BinaryOperator::CreateXor(LHS
, RHS
);
1174 auto *Shl
= BinaryOperator::CreateShl(LHS
, ConstantInt::get(Ty
, 1));
1175 Shl
->setHasNoSignedWrap(I
.hasNoSignedWrap());
1176 Shl
->setHasNoUnsignedWrap(I
.hasNoUnsignedWrap());
1181 if (match(LHS
, m_Neg(m_Value(A
)))) {
1182 // -A + -B --> -(A + B)
1183 if (match(RHS
, m_Neg(m_Value(B
))))
1184 return BinaryOperator::CreateNeg(Builder
.CreateAdd(A
, B
));
1187 return BinaryOperator::CreateSub(RHS
, A
);
1190 // Canonicalize sext to zext for better value tracking potential.
1191 // add A, sext(B) --> sub A, zext(B)
1192 if (match(&I
, m_c_Add(m_Value(A
), m_OneUse(m_SExt(m_Value(B
))))) &&
1193 B
->getType()->isIntOrIntVectorTy(1))
1194 return BinaryOperator::CreateSub(A
, Builder
.CreateZExt(B
, Ty
));
1197 if (match(RHS
, m_Neg(m_Value(B
))))
1198 return BinaryOperator::CreateSub(LHS
, B
);
1200 if (Value
*V
= checkForNegativeOperand(I
, Builder
))
1201 return replaceInstUsesWith(I
, V
);
1203 // (A + 1) + ~B --> A - B
1204 // ~B + (A + 1) --> A - B
1205 // (~B + A) + 1 --> A - B
1206 // (A + ~B) + 1 --> A - B
1207 if (match(&I
, m_c_BinOp(m_Add(m_Value(A
), m_One()), m_Not(m_Value(B
)))) ||
1208 match(&I
, m_BinOp(m_c_Add(m_Not(m_Value(B
)), m_Value(A
)), m_One())))
1209 return BinaryOperator::CreateSub(A
, B
);
1211 // X % C0 + (( X / C0 ) % C1) * C0 => X % (C0 * C1)
1212 if (Value
*V
= SimplifyAddWithRemainder(I
)) return replaceInstUsesWith(I
, V
);
1214 // A+B --> A|B iff A and B have no bits set in common.
1215 if (haveNoCommonBitsSet(LHS
, RHS
, DL
, &AC
, &I
, &DT
))
1216 return BinaryOperator::CreateOr(LHS
, RHS
);
1218 // FIXME: We already did a check for ConstantInt RHS above this.
1219 // FIXME: Is this pattern covered by another fold? No regression tests fail on
1221 if (ConstantInt
*CRHS
= dyn_cast
<ConstantInt
>(RHS
)) {
1222 // (X & FF00) + xx00 -> (X+xx00) & FF00
1225 if (LHS
->hasOneUse() &&
1226 match(LHS
, m_And(m_Value(X
), m_ConstantInt(C2
))) &&
1227 CRHS
->getValue() == (CRHS
->getValue() & C2
->getValue())) {
1228 // See if all bits from the first bit set in the Add RHS up are included
1229 // in the mask. First, get the rightmost bit.
1230 const APInt
&AddRHSV
= CRHS
->getValue();
1232 // Form a mask of all bits from the lowest bit added through the top.
1233 APInt
AddRHSHighBits(~((AddRHSV
& -AddRHSV
)-1));
1235 // See if the and mask includes all of these bits.
1236 APInt
AddRHSHighBitsAnd(AddRHSHighBits
& C2
->getValue());
1238 if (AddRHSHighBits
== AddRHSHighBitsAnd
) {
1239 // Okay, the xform is safe. Insert the new add pronto.
1240 Value
*NewAdd
= Builder
.CreateAdd(X
, CRHS
, LHS
->getName());
1241 return BinaryOperator::CreateAnd(NewAdd
, C2
);
1246 // add (select X 0 (sub n A)) A --> select X A n
1248 SelectInst
*SI
= dyn_cast
<SelectInst
>(LHS
);
1251 SI
= dyn_cast
<SelectInst
>(RHS
);
1254 if (SI
&& SI
->hasOneUse()) {
1255 Value
*TV
= SI
->getTrueValue();
1256 Value
*FV
= SI
->getFalseValue();
1259 // Can we fold the add into the argument of the select?
1260 // We check both true and false select arguments for a matching subtract.
1261 if (match(FV
, m_Zero()) && match(TV
, m_Sub(m_Value(N
), m_Specific(A
))))
1262 // Fold the add into the true select value.
1263 return SelectInst::Create(SI
->getCondition(), N
, A
);
1265 if (match(TV
, m_Zero()) && match(FV
, m_Sub(m_Value(N
), m_Specific(A
))))
1266 // Fold the add into the false select value.
1267 return SelectInst::Create(SI
->getCondition(), A
, N
);
1271 if (Instruction
*Ext
= narrowMathIfNoOverflow(I
))
1274 // (add (xor A, B) (and A, B)) --> (or A, B)
1275 // (add (and A, B) (xor A, B)) --> (or A, B)
1276 if (match(&I
, m_c_BinOp(m_Xor(m_Value(A
), m_Value(B
)),
1277 m_c_And(m_Deferred(A
), m_Deferred(B
)))))
1278 return BinaryOperator::CreateOr(A
, B
);
1280 // (add (or A, B) (and A, B)) --> (add A, B)
1281 // (add (and A, B) (or A, B)) --> (add A, B)
1282 if (match(&I
, m_c_BinOp(m_Or(m_Value(A
), m_Value(B
)),
1283 m_c_And(m_Deferred(A
), m_Deferred(B
))))) {
1289 // TODO(jingyue): Consider willNotOverflowSignedAdd and
1290 // willNotOverflowUnsignedAdd to reduce the number of invocations of
1291 // computeKnownBits.
1292 bool Changed
= false;
1293 if (!I
.hasNoSignedWrap() && willNotOverflowSignedAdd(LHS
, RHS
, I
)) {
1295 I
.setHasNoSignedWrap(true);
1297 if (!I
.hasNoUnsignedWrap() && willNotOverflowUnsignedAdd(LHS
, RHS
, I
)) {
1299 I
.setHasNoUnsignedWrap(true);
1302 if (Instruction
*V
= canonicalizeLowbitMask(I
, Builder
))
1305 if (Instruction
*SatAdd
= foldToUnsignedSaturatedAdd(I
))
1308 return Changed
? &I
: nullptr;
1311 /// Eliminate an op from a linear interpolation (lerp) pattern.
1312 static Instruction
*factorizeLerp(BinaryOperator
&I
,
1313 InstCombiner::BuilderTy
&Builder
) {
1315 if (!match(&I
, m_c_FAdd(m_OneUse(m_c_FMul(m_Value(Y
),
1316 m_OneUse(m_FSub(m_FPOne(),
1318 m_OneUse(m_c_FMul(m_Value(X
), m_Deferred(Z
))))))
1321 // (Y * (1.0 - Z)) + (X * Z) --> Y + Z * (X - Y) [8 commuted variants]
1322 Value
*XY
= Builder
.CreateFSubFMF(X
, Y
, &I
);
1323 Value
*MulZ
= Builder
.CreateFMulFMF(Z
, XY
, &I
);
1324 return BinaryOperator::CreateFAddFMF(Y
, MulZ
, &I
);
1327 /// Factor a common operand out of fadd/fsub of fmul/fdiv.
1328 static Instruction
*factorizeFAddFSub(BinaryOperator
&I
,
1329 InstCombiner::BuilderTy
&Builder
) {
1330 assert((I
.getOpcode() == Instruction::FAdd
||
1331 I
.getOpcode() == Instruction::FSub
) && "Expecting fadd/fsub");
1332 assert(I
.hasAllowReassoc() && I
.hasNoSignedZeros() &&
1333 "FP factorization requires FMF");
1335 if (Instruction
*Lerp
= factorizeLerp(I
, Builder
))
1338 Value
*Op0
= I
.getOperand(0), *Op1
= I
.getOperand(1);
1341 if ((match(Op0
, m_OneUse(m_FMul(m_Value(X
), m_Value(Z
)))) &&
1342 match(Op1
, m_OneUse(m_c_FMul(m_Value(Y
), m_Specific(Z
))))) ||
1343 (match(Op0
, m_OneUse(m_FMul(m_Value(Z
), m_Value(X
)))) &&
1344 match(Op1
, m_OneUse(m_c_FMul(m_Value(Y
), m_Specific(Z
))))))
1346 else if (match(Op0
, m_OneUse(m_FDiv(m_Value(X
), m_Value(Z
)))) &&
1347 match(Op1
, m_OneUse(m_FDiv(m_Value(Y
), m_Specific(Z
)))))
1352 // (X * Z) + (Y * Z) --> (X + Y) * Z
1353 // (X * Z) - (Y * Z) --> (X - Y) * Z
1354 // (X / Z) + (Y / Z) --> (X + Y) / Z
1355 // (X / Z) - (Y / Z) --> (X - Y) / Z
1356 bool IsFAdd
= I
.getOpcode() == Instruction::FAdd
;
1357 Value
*XY
= IsFAdd
? Builder
.CreateFAddFMF(X
, Y
, &I
)
1358 : Builder
.CreateFSubFMF(X
, Y
, &I
);
1360 // Bail out if we just created a denormal constant.
1361 // TODO: This is copied from a previous implementation. Is it necessary?
1363 if (match(XY
, m_APFloat(C
)) && !C
->isNormal())
1366 return IsFMul
? BinaryOperator::CreateFMulFMF(XY
, Z
, &I
)
1367 : BinaryOperator::CreateFDivFMF(XY
, Z
, &I
);
1370 Instruction
*InstCombiner::visitFAdd(BinaryOperator
&I
) {
1371 if (Value
*V
= SimplifyFAddInst(I
.getOperand(0), I
.getOperand(1),
1372 I
.getFastMathFlags(),
1373 SQ
.getWithInstruction(&I
)))
1374 return replaceInstUsesWith(I
, V
);
1376 if (SimplifyAssociativeOrCommutative(I
))
1379 if (Instruction
*X
= foldVectorBinop(I
))
1382 if (Instruction
*FoldedFAdd
= foldBinOpIntoSelectOrPhi(I
))
1385 // (-X) + Y --> Y - X
1387 if (match(&I
, m_c_FAdd(m_FNeg(m_Value(X
)), m_Value(Y
))))
1388 return BinaryOperator::CreateFSubFMF(Y
, X
, &I
);
1390 // Similar to above, but look through fmul/fdiv for the negated term.
1391 // (-X * Y) + Z --> Z - (X * Y) [4 commuted variants]
1393 if (match(&I
, m_c_FAdd(m_OneUse(m_c_FMul(m_FNeg(m_Value(X
)), m_Value(Y
))),
1395 Value
*XY
= Builder
.CreateFMulFMF(X
, Y
, &I
);
1396 return BinaryOperator::CreateFSubFMF(Z
, XY
, &I
);
1398 // (-X / Y) + Z --> Z - (X / Y) [2 commuted variants]
1399 // (X / -Y) + Z --> Z - (X / Y) [2 commuted variants]
1400 if (match(&I
, m_c_FAdd(m_OneUse(m_FDiv(m_FNeg(m_Value(X
)), m_Value(Y
))),
1402 match(&I
, m_c_FAdd(m_OneUse(m_FDiv(m_Value(X
), m_FNeg(m_Value(Y
)))),
1404 Value
*XY
= Builder
.CreateFDivFMF(X
, Y
, &I
);
1405 return BinaryOperator::CreateFSubFMF(Z
, XY
, &I
);
1408 // Check for (fadd double (sitofp x), y), see if we can merge this into an
1409 // integer add followed by a promotion.
1410 Value
*LHS
= I
.getOperand(0), *RHS
= I
.getOperand(1);
1411 if (SIToFPInst
*LHSConv
= dyn_cast
<SIToFPInst
>(LHS
)) {
1412 Value
*LHSIntVal
= LHSConv
->getOperand(0);
1413 Type
*FPType
= LHSConv
->getType();
1415 // TODO: This check is overly conservative. In many cases known bits
1416 // analysis can tell us that the result of the addition has less significant
1417 // bits than the integer type can hold.
1418 auto IsValidPromotion
= [](Type
*FTy
, Type
*ITy
) {
1419 Type
*FScalarTy
= FTy
->getScalarType();
1420 Type
*IScalarTy
= ITy
->getScalarType();
1422 // Do we have enough bits in the significand to represent the result of
1423 // the integer addition?
1424 unsigned MaxRepresentableBits
=
1425 APFloat::semanticsPrecision(FScalarTy
->getFltSemantics());
1426 return IScalarTy
->getIntegerBitWidth() <= MaxRepresentableBits
;
1429 // (fadd double (sitofp x), fpcst) --> (sitofp (add int x, intcst))
1430 // ... if the constant fits in the integer value. This is useful for things
1431 // like (double)(x & 1234) + 4.0 -> (double)((X & 1234)+4) which no longer
1432 // requires a constant pool load, and generally allows the add to be better
1434 if (ConstantFP
*CFP
= dyn_cast
<ConstantFP
>(RHS
))
1435 if (IsValidPromotion(FPType
, LHSIntVal
->getType())) {
1437 ConstantExpr::getFPToSI(CFP
, LHSIntVal
->getType());
1438 if (LHSConv
->hasOneUse() &&
1439 ConstantExpr::getSIToFP(CI
, I
.getType()) == CFP
&&
1440 willNotOverflowSignedAdd(LHSIntVal
, CI
, I
)) {
1441 // Insert the new integer add.
1442 Value
*NewAdd
= Builder
.CreateNSWAdd(LHSIntVal
, CI
, "addconv");
1443 return new SIToFPInst(NewAdd
, I
.getType());
1447 // (fadd double (sitofp x), (sitofp y)) --> (sitofp (add int x, y))
1448 if (SIToFPInst
*RHSConv
= dyn_cast
<SIToFPInst
>(RHS
)) {
1449 Value
*RHSIntVal
= RHSConv
->getOperand(0);
1450 // It's enough to check LHS types only because we require int types to
1451 // be the same for this transform.
1452 if (IsValidPromotion(FPType
, LHSIntVal
->getType())) {
1453 // Only do this if x/y have the same type, if at least one of them has a
1454 // single use (so we don't increase the number of int->fp conversions),
1455 // and if the integer add will not overflow.
1456 if (LHSIntVal
->getType() == RHSIntVal
->getType() &&
1457 (LHSConv
->hasOneUse() || RHSConv
->hasOneUse()) &&
1458 willNotOverflowSignedAdd(LHSIntVal
, RHSIntVal
, I
)) {
1459 // Insert the new integer add.
1460 Value
*NewAdd
= Builder
.CreateNSWAdd(LHSIntVal
, RHSIntVal
, "addconv");
1461 return new SIToFPInst(NewAdd
, I
.getType());
1467 // Handle specials cases for FAdd with selects feeding the operation
1468 if (Value
*V
= SimplifySelectsFeedingBinaryOp(I
, LHS
, RHS
))
1469 return replaceInstUsesWith(I
, V
);
1471 if (I
.hasAllowReassoc() && I
.hasNoSignedZeros()) {
1472 if (Instruction
*F
= factorizeFAddFSub(I
, Builder
))
1474 if (Value
*V
= FAddCombine(Builder
).simplify(&I
))
1475 return replaceInstUsesWith(I
, V
);
1481 /// Optimize pointer differences into the same array into a size. Consider:
1482 /// &A[10] - &A[0]: we should compile this to "10". LHS/RHS are the pointer
1483 /// operands to the ptrtoint instructions for the LHS/RHS of the subtract.
1484 Value
*InstCombiner::OptimizePointerDifference(Value
*LHS
, Value
*RHS
,
1486 // If LHS is a gep based on RHS or RHS is a gep based on LHS, we can optimize
1488 bool Swapped
= false;
1489 GEPOperator
*GEP1
= nullptr, *GEP2
= nullptr;
1491 // For now we require one side to be the base pointer "A" or a constant
1492 // GEP derived from it.
1493 if (GEPOperator
*LHSGEP
= dyn_cast
<GEPOperator
>(LHS
)) {
1495 if (LHSGEP
->getOperand(0) == RHS
) {
1498 } else if (GEPOperator
*RHSGEP
= dyn_cast
<GEPOperator
>(RHS
)) {
1499 // (gep X, ...) - (gep X, ...)
1500 if (LHSGEP
->getOperand(0)->stripPointerCasts() ==
1501 RHSGEP
->getOperand(0)->stripPointerCasts()) {
1509 if (GEPOperator
*RHSGEP
= dyn_cast
<GEPOperator
>(RHS
)) {
1511 if (RHSGEP
->getOperand(0) == LHS
) {
1514 } else if (GEPOperator
*LHSGEP
= dyn_cast
<GEPOperator
>(LHS
)) {
1515 // (gep X, ...) - (gep X, ...)
1516 if (RHSGEP
->getOperand(0)->stripPointerCasts() ==
1517 LHSGEP
->getOperand(0)->stripPointerCasts()) {
1530 // (gep X, ...) - (gep X, ...)
1532 // Avoid duplicating the arithmetic if there are more than one non-constant
1533 // indices between the two GEPs and either GEP has a non-constant index and
1534 // multiple users. If zero non-constant index, the result is a constant and
1535 // there is no duplication. If one non-constant index, the result is an add
1536 // or sub with a constant, which is no larger than the original code, and
1537 // there's no duplicated arithmetic, even if either GEP has multiple
1538 // users. If more than one non-constant indices combined, as long as the GEP
1539 // with at least one non-constant index doesn't have multiple users, there
1540 // is no duplication.
1541 unsigned NumNonConstantIndices1
= GEP1
->countNonConstantIndices();
1542 unsigned NumNonConstantIndices2
= GEP2
->countNonConstantIndices();
1543 if (NumNonConstantIndices1
+ NumNonConstantIndices2
> 1 &&
1544 ((NumNonConstantIndices1
> 0 && !GEP1
->hasOneUse()) ||
1545 (NumNonConstantIndices2
> 0 && !GEP2
->hasOneUse()))) {
1550 // Emit the offset of the GEP and an intptr_t.
1551 Value
*Result
= EmitGEPOffset(GEP1
);
1553 // If we had a constant expression GEP on the other side offsetting the
1554 // pointer, subtract it from the offset we have.
1556 Value
*Offset
= EmitGEPOffset(GEP2
);
1557 Result
= Builder
.CreateSub(Result
, Offset
);
1560 // If we have p - gep(p, ...) then we have to negate the result.
1562 Result
= Builder
.CreateNeg(Result
, "diff.neg");
1564 return Builder
.CreateIntCast(Result
, Ty
, true);
1567 Instruction
*InstCombiner::visitSub(BinaryOperator
&I
) {
1568 if (Value
*V
= SimplifySubInst(I
.getOperand(0), I
.getOperand(1),
1569 I
.hasNoSignedWrap(), I
.hasNoUnsignedWrap(),
1570 SQ
.getWithInstruction(&I
)))
1571 return replaceInstUsesWith(I
, V
);
1573 if (Instruction
*X
= foldVectorBinop(I
))
1576 // (A*B)-(A*C) -> A*(B-C) etc
1577 if (Value
*V
= SimplifyUsingDistributiveLaws(I
))
1578 return replaceInstUsesWith(I
, V
);
1580 // If this is a 'B = x-(-A)', change to B = x+A.
1581 Value
*Op0
= I
.getOperand(0), *Op1
= I
.getOperand(1);
1582 if (Value
*V
= dyn_castNegVal(Op1
)) {
1583 BinaryOperator
*Res
= BinaryOperator::CreateAdd(Op0
, V
);
1585 if (const auto *BO
= dyn_cast
<BinaryOperator
>(Op1
)) {
1586 assert(BO
->getOpcode() == Instruction::Sub
&&
1587 "Expected a subtraction operator!");
1588 if (BO
->hasNoSignedWrap() && I
.hasNoSignedWrap())
1589 Res
->setHasNoSignedWrap(true);
1591 if (cast
<Constant
>(Op1
)->isNotMinSignedValue() && I
.hasNoSignedWrap())
1592 Res
->setHasNoSignedWrap(true);
1598 if (I
.getType()->isIntOrIntVectorTy(1))
1599 return BinaryOperator::CreateXor(Op0
, Op1
);
1601 // Replace (-1 - A) with (~A).
1602 if (match(Op0
, m_AllOnes()))
1603 return BinaryOperator::CreateNot(Op1
);
1605 // (~X) - (~Y) --> Y - X
1607 if (match(Op0
, m_Not(m_Value(X
))) && match(Op1
, m_Not(m_Value(Y
))))
1608 return BinaryOperator::CreateSub(Y
, X
);
1610 // (X + -1) - Y --> ~Y + X
1611 if (match(Op0
, m_OneUse(m_Add(m_Value(X
), m_AllOnes()))))
1612 return BinaryOperator::CreateAdd(Builder
.CreateNot(Op1
), X
);
1614 // Y - (X + 1) --> ~X + Y
1615 if (match(Op1
, m_OneUse(m_Add(m_Value(X
), m_One()))))
1616 return BinaryOperator::CreateAdd(Builder
.CreateNot(X
), Op0
);
1618 // Y - ~X --> (X + 1) + Y
1619 if (match(Op1
, m_OneUse(m_Not(m_Value(X
))))) {
1620 return BinaryOperator::CreateAdd(
1621 Builder
.CreateAdd(Op0
, ConstantInt::get(I
.getType(), 1)), X
);
1624 if (Constant
*C
= dyn_cast
<Constant
>(Op0
)) {
1625 bool IsNegate
= match(C
, m_ZeroInt());
1627 if (match(Op1
, m_ZExt(m_Value(X
))) && X
->getType()->isIntOrIntVectorTy(1)) {
1628 // 0 - (zext bool) --> sext bool
1629 // C - (zext bool) --> bool ? C - 1 : C
1631 return CastInst::CreateSExtOrBitCast(X
, I
.getType());
1632 return SelectInst::Create(X
, SubOne(C
), C
);
1634 if (match(Op1
, m_SExt(m_Value(X
))) && X
->getType()->isIntOrIntVectorTy(1)) {
1635 // 0 - (sext bool) --> zext bool
1636 // C - (sext bool) --> bool ? C + 1 : C
1638 return CastInst::CreateZExtOrBitCast(X
, I
.getType());
1639 return SelectInst::Create(X
, AddOne(C
), C
);
1642 // C - ~X == X + (1+C)
1643 if (match(Op1
, m_Not(m_Value(X
))))
1644 return BinaryOperator::CreateAdd(X
, AddOne(C
));
1646 // Try to fold constant sub into select arguments.
1647 if (SelectInst
*SI
= dyn_cast
<SelectInst
>(Op1
))
1648 if (Instruction
*R
= FoldOpIntoSelect(I
, SI
))
1651 // Try to fold constant sub into PHI values.
1652 if (PHINode
*PN
= dyn_cast
<PHINode
>(Op1
))
1653 if (Instruction
*R
= foldOpIntoPhi(I
, PN
))
1658 // C-(C2-X) --> X+(C-C2)
1659 if (match(Op1
, m_Sub(m_Constant(C2
), m_Value(X
))))
1660 return BinaryOperator::CreateAdd(X
, ConstantExpr::getSub(C
, C2
));
1662 // C-(X+C2) --> (C-C2)-X
1663 if (match(Op1
, m_Add(m_Value(X
), m_Constant(C2
))))
1664 return BinaryOperator::CreateSub(ConstantExpr::getSub(C
, C2
), X
);
1668 if (match(Op0
, m_APInt(Op0C
))) {
1669 unsigned BitWidth
= I
.getType()->getScalarSizeInBits();
1671 // -(X >>u 31) -> (X >>s 31)
1672 // -(X >>s 31) -> (X >>u 31)
1673 if (Op0C
->isNullValue()) {
1676 if (match(Op1
, m_LShr(m_Value(X
), m_APInt(ShAmt
))) &&
1677 *ShAmt
== BitWidth
- 1) {
1678 Value
*ShAmtOp
= cast
<Instruction
>(Op1
)->getOperand(1);
1679 return BinaryOperator::CreateAShr(X
, ShAmtOp
);
1681 if (match(Op1
, m_AShr(m_Value(X
), m_APInt(ShAmt
))) &&
1682 *ShAmt
== BitWidth
- 1) {
1683 Value
*ShAmtOp
= cast
<Instruction
>(Op1
)->getOperand(1);
1684 return BinaryOperator::CreateLShr(X
, ShAmtOp
);
1687 if (Op1
->hasOneUse()) {
1689 SelectPatternFlavor SPF
= matchSelectPattern(Op1
, LHS
, RHS
).Flavor
;
1690 if (SPF
== SPF_ABS
|| SPF
== SPF_NABS
) {
1691 // This is a negate of an ABS/NABS pattern. Just swap the operands
1693 cast
<SelectInst
>(Op1
)->swapValues();
1694 // Don't swap prof metadata, we didn't change the branch behavior.
1695 return replaceInstUsesWith(I
, Op1
);
1700 // Turn this into a xor if LHS is 2^n-1 and the remaining bits are known
1702 if (Op0C
->isMask()) {
1703 KnownBits RHSKnown
= computeKnownBits(Op1
, 0, &I
);
1704 if ((*Op0C
| RHSKnown
.Zero
).isAllOnesValue())
1705 return BinaryOperator::CreateXor(Op1
, Op0
);
1711 // X-(X+Y) == -Y X-(Y+X) == -Y
1712 if (match(Op1
, m_c_Add(m_Specific(Op0
), m_Value(Y
))))
1713 return BinaryOperator::CreateNeg(Y
);
1716 if (match(Op0
, m_Sub(m_Specific(Op1
), m_Value(Y
))))
1717 return BinaryOperator::CreateNeg(Y
);
1720 // (sub (or A, B), (xor A, B)) --> (and A, B)
1723 if (match(Op1
, m_Xor(m_Value(A
), m_Value(B
))) &&
1724 match(Op0
, m_c_Or(m_Specific(A
), m_Specific(B
))))
1725 return BinaryOperator::CreateAnd(A
, B
);
1730 // ((X | Y) - X) --> (~X & Y)
1731 if (match(Op0
, m_OneUse(m_c_Or(m_Value(Y
), m_Specific(Op1
)))))
1732 return BinaryOperator::CreateAnd(
1733 Y
, Builder
.CreateNot(Op1
, Op1
->getName() + ".not"));
1736 if (Op1
->hasOneUse()) {
1737 Value
*X
= nullptr, *Y
= nullptr, *Z
= nullptr;
1738 Constant
*C
= nullptr;
1740 // (X - (Y - Z)) --> (X + (Z - Y)).
1741 if (match(Op1
, m_Sub(m_Value(Y
), m_Value(Z
))))
1742 return BinaryOperator::CreateAdd(Op0
,
1743 Builder
.CreateSub(Z
, Y
, Op1
->getName()));
1745 // (X - (X & Y)) --> (X & ~Y)
1746 if (match(Op1
, m_c_And(m_Value(Y
), m_Specific(Op0
))))
1747 return BinaryOperator::CreateAnd(Op0
,
1748 Builder
.CreateNot(Y
, Y
->getName() + ".not"));
1750 // 0 - (X sdiv C) -> (X sdiv -C) provided the negation doesn't overflow.
1751 // TODO: This could be extended to match arbitrary vector constants.
1753 if (match(Op0
, m_Zero()) && match(Op1
, m_SDiv(m_Value(X
), m_APInt(DivC
))) &&
1754 !DivC
->isMinSignedValue() && *DivC
!= 1) {
1755 Constant
*NegDivC
= ConstantInt::get(I
.getType(), -(*DivC
));
1756 Instruction
*BO
= BinaryOperator::CreateSDiv(X
, NegDivC
);
1757 BO
->setIsExact(cast
<BinaryOperator
>(Op1
)->isExact());
1761 // 0 - (X << Y) -> (-X << Y) when X is freely negatable.
1762 if (match(Op1
, m_Shl(m_Value(X
), m_Value(Y
))) && match(Op0
, m_Zero()))
1763 if (Value
*XNeg
= dyn_castNegVal(X
))
1764 return BinaryOperator::CreateShl(XNeg
, Y
);
1766 // Subtracting -1/0 is the same as adding 1/0:
1767 // sub [nsw] Op0, sext(bool Y) -> add [nsw] Op0, zext(bool Y)
1768 // 'nuw' is dropped in favor of the canonical form.
1769 if (match(Op1
, m_SExt(m_Value(Y
))) &&
1770 Y
->getType()->getScalarSizeInBits() == 1) {
1771 Value
*Zext
= Builder
.CreateZExt(Y
, I
.getType());
1772 BinaryOperator
*Add
= BinaryOperator::CreateAdd(Op0
, Zext
);
1773 Add
->setHasNoSignedWrap(I
.hasNoSignedWrap());
1777 // X - A*-B -> X + A*B
1778 // X - -A*B -> X + A*B
1780 if (match(Op1
, m_c_Mul(m_Value(A
), m_Neg(m_Value(B
)))))
1781 return BinaryOperator::CreateAdd(Op0
, Builder
.CreateMul(A
, B
));
1783 // X - A*C -> X + A*-C
1784 // No need to handle commuted multiply because multiply handling will
1785 // ensure constant will be move to the right hand side.
1786 if (match(Op1
, m_Mul(m_Value(A
), m_Constant(C
))) && !isa
<ConstantExpr
>(C
)) {
1787 Value
*NewMul
= Builder
.CreateMul(A
, ConstantExpr::getNeg(C
));
1788 return BinaryOperator::CreateAdd(Op0
, NewMul
);
1793 // ~A - Min/Max(~A, O) -> Max/Min(A, ~O) - A
1794 // ~A - Min/Max(O, ~A) -> Max/Min(A, ~O) - A
1795 // Min/Max(~A, O) - ~A -> A - Max/Min(A, ~O)
1796 // Min/Max(O, ~A) - ~A -> A - Max/Min(A, ~O)
1797 // So long as O here is freely invertible, this will be neutral or a win.
1798 Value
*LHS
, *RHS
, *A
;
1799 Value
*NotA
= Op0
, *MinMax
= Op1
;
1800 SelectPatternFlavor SPF
= matchSelectPattern(MinMax
, LHS
, RHS
).Flavor
;
1801 if (!SelectPatternResult::isMinOrMax(SPF
)) {
1804 SPF
= matchSelectPattern(MinMax
, LHS
, RHS
).Flavor
;
1806 if (SelectPatternResult::isMinOrMax(SPF
) &&
1807 match(NotA
, m_Not(m_Value(A
))) && (NotA
== LHS
|| NotA
== RHS
)) {
1809 std::swap(LHS
, RHS
);
1810 // LHS is now O above and expected to have at least 2 uses (the min/max)
1811 // NotA is epected to have 2 uses from the min/max and 1 from the sub.
1812 if (isFreeToInvert(LHS
, !LHS
->hasNUsesOrMore(3)) &&
1813 !NotA
->hasNUsesOrMore(4)) {
1814 // Note: We don't generate the inverse max/min, just create the not of
1815 // it and let other folds do the rest.
1816 Value
*Not
= Builder
.CreateNot(MinMax
);
1818 return BinaryOperator::CreateSub(Not
, A
);
1820 return BinaryOperator::CreateSub(A
, Not
);
1825 // Optimize pointer differences into the same array into a size. Consider:
1826 // &A[10] - &A[0]: we should compile this to "10".
1827 Value
*LHSOp
, *RHSOp
;
1828 if (match(Op0
, m_PtrToInt(m_Value(LHSOp
))) &&
1829 match(Op1
, m_PtrToInt(m_Value(RHSOp
))))
1830 if (Value
*Res
= OptimizePointerDifference(LHSOp
, RHSOp
, I
.getType()))
1831 return replaceInstUsesWith(I
, Res
);
1833 // trunc(p)-trunc(q) -> trunc(p-q)
1834 if (match(Op0
, m_Trunc(m_PtrToInt(m_Value(LHSOp
)))) &&
1835 match(Op1
, m_Trunc(m_PtrToInt(m_Value(RHSOp
)))))
1836 if (Value
*Res
= OptimizePointerDifference(LHSOp
, RHSOp
, I
.getType()))
1837 return replaceInstUsesWith(I
, Res
);
1839 // Canonicalize a shifty way to code absolute value to the common pattern.
1840 // There are 2 potential commuted variants.
1841 // We're relying on the fact that we only do this transform when the shift has
1842 // exactly 2 uses and the xor has exactly 1 use (otherwise, we might increase
1846 Type
*Ty
= I
.getType();
1847 if (match(Op1
, m_AShr(m_Value(A
), m_APInt(ShAmt
))) &&
1848 Op1
->hasNUses(2) && *ShAmt
== Ty
->getScalarSizeInBits() - 1 &&
1849 match(Op0
, m_OneUse(m_c_Xor(m_Specific(A
), m_Specific(Op1
))))) {
1850 // B = ashr i32 A, 31 ; smear the sign bit
1851 // sub (xor A, B), B ; flip bits if negative and subtract -1 (add 1)
1852 // --> (A < 0) ? -A : A
1853 Value
*Cmp
= Builder
.CreateICmpSLT(A
, ConstantInt::getNullValue(Ty
));
1854 // Copy the nuw/nsw flags from the sub to the negate.
1855 Value
*Neg
= Builder
.CreateNeg(A
, "", I
.hasNoUnsignedWrap(),
1856 I
.hasNoSignedWrap());
1857 return SelectInst::Create(Cmp
, Neg
, A
);
1860 if (Instruction
*Ext
= narrowMathIfNoOverflow(I
))
1863 bool Changed
= false;
1864 if (!I
.hasNoSignedWrap() && willNotOverflowSignedSub(Op0
, Op1
, I
)) {
1866 I
.setHasNoSignedWrap(true);
1868 if (!I
.hasNoUnsignedWrap() && willNotOverflowUnsignedSub(Op0
, Op1
, I
)) {
1870 I
.setHasNoUnsignedWrap(true);
1873 return Changed
? &I
: nullptr;
1876 /// This eliminates floating-point negation in either 'fneg(X)' or
1877 /// 'fsub(-0.0, X)' form by combining into a constant operand.
1878 static Instruction
*foldFNegIntoConstant(Instruction
&I
) {
1882 // Fold negation into constant operand. This is limited with one-use because
1883 // fneg is assumed better for analysis and cheaper in codegen than fmul/fdiv.
1884 // -(X * C) --> X * (-C)
1885 // FIXME: It's arguable whether these should be m_OneUse or not. The current
1886 // belief is that the FNeg allows for better reassociation opportunities.
1887 if (match(&I
, m_FNeg(m_OneUse(m_FMul(m_Value(X
), m_Constant(C
))))))
1888 return BinaryOperator::CreateFMulFMF(X
, ConstantExpr::getFNeg(C
), &I
);
1889 // -(X / C) --> X / (-C)
1890 if (match(&I
, m_FNeg(m_OneUse(m_FDiv(m_Value(X
), m_Constant(C
))))))
1891 return BinaryOperator::CreateFDivFMF(X
, ConstantExpr::getFNeg(C
), &I
);
1892 // -(C / X) --> (-C) / X
1893 if (match(&I
, m_FNeg(m_OneUse(m_FDiv(m_Constant(C
), m_Value(X
))))))
1894 return BinaryOperator::CreateFDivFMF(ConstantExpr::getFNeg(C
), X
, &I
);
1899 static Instruction
*hoistFNegAboveFMulFDiv(Instruction
&I
,
1900 InstCombiner::BuilderTy
&Builder
) {
1902 if (!match(&I
, m_FNeg(m_Value(FNeg
))))
1906 if (match(FNeg
, m_OneUse(m_FMul(m_Value(X
), m_Value(Y
)))))
1907 return BinaryOperator::CreateFMulFMF(Builder
.CreateFNegFMF(X
, &I
), Y
, &I
);
1909 if (match(FNeg
, m_OneUse(m_FDiv(m_Value(X
), m_Value(Y
)))))
1910 return BinaryOperator::CreateFDivFMF(Builder
.CreateFNegFMF(X
, &I
), Y
, &I
);
1915 Instruction
*InstCombiner::visitFNeg(UnaryOperator
&I
) {
1916 Value
*Op
= I
.getOperand(0);
1918 if (Value
*V
= SimplifyFNegInst(Op
, I
.getFastMathFlags(),
1919 SQ
.getWithInstruction(&I
)))
1920 return replaceInstUsesWith(I
, V
);
1922 if (Instruction
*X
= foldFNegIntoConstant(I
))
1927 // If we can ignore the sign of zeros: -(X - Y) --> (Y - X)
1928 if (I
.hasNoSignedZeros() &&
1929 match(Op
, m_OneUse(m_FSub(m_Value(X
), m_Value(Y
)))))
1930 return BinaryOperator::CreateFSubFMF(Y
, X
, &I
);
1932 if (Instruction
*R
= hoistFNegAboveFMulFDiv(I
, Builder
))
1938 Instruction
*InstCombiner::visitFSub(BinaryOperator
&I
) {
1939 if (Value
*V
= SimplifyFSubInst(I
.getOperand(0), I
.getOperand(1),
1940 I
.getFastMathFlags(),
1941 SQ
.getWithInstruction(&I
)))
1942 return replaceInstUsesWith(I
, V
);
1944 if (Instruction
*X
= foldVectorBinop(I
))
1947 // Subtraction from -0.0 is the canonical form of fneg.
1948 // fsub nsz 0, X ==> fsub nsz -0.0, X
1949 Value
*Op0
= I
.getOperand(0), *Op1
= I
.getOperand(1);
1950 if (I
.hasNoSignedZeros() && match(Op0
, m_PosZeroFP()))
1951 return BinaryOperator::CreateFNegFMF(Op1
, &I
);
1953 if (Instruction
*X
= foldFNegIntoConstant(I
))
1956 if (Instruction
*R
= hoistFNegAboveFMulFDiv(I
, Builder
))
1962 // If Op0 is not -0.0 or we can ignore -0.0: Z - (X - Y) --> Z + (Y - X)
1963 // Canonicalize to fadd to make analysis easier.
1964 // This can also help codegen because fadd is commutative.
1965 // Note that if this fsub was really an fneg, the fadd with -0.0 will get
1966 // killed later. We still limit that particular transform with 'hasOneUse'
1967 // because an fneg is assumed better/cheaper than a generic fsub.
1968 if (I
.hasNoSignedZeros() || CannotBeNegativeZero(Op0
, SQ
.TLI
)) {
1969 if (match(Op1
, m_OneUse(m_FSub(m_Value(X
), m_Value(Y
))))) {
1970 Value
*NewSub
= Builder
.CreateFSubFMF(Y
, X
, &I
);
1971 return BinaryOperator::CreateFAddFMF(Op0
, NewSub
, &I
);
1975 if (isa
<Constant
>(Op0
))
1976 if (SelectInst
*SI
= dyn_cast
<SelectInst
>(Op1
))
1977 if (Instruction
*NV
= FoldOpIntoSelect(I
, SI
))
1980 // X - C --> X + (-C)
1981 // But don't transform constant expressions because there's an inverse fold
1982 // for X + (-Y) --> X - Y.
1983 if (match(Op1
, m_Constant(C
)) && !isa
<ConstantExpr
>(Op1
))
1984 return BinaryOperator::CreateFAddFMF(Op0
, ConstantExpr::getFNeg(C
), &I
);
1986 // X - (-Y) --> X + Y
1987 if (match(Op1
, m_FNeg(m_Value(Y
))))
1988 return BinaryOperator::CreateFAddFMF(Op0
, Y
, &I
);
1990 // Similar to above, but look through a cast of the negated value:
1991 // X - (fptrunc(-Y)) --> X + fptrunc(Y)
1992 Type
*Ty
= I
.getType();
1993 if (match(Op1
, m_OneUse(m_FPTrunc(m_FNeg(m_Value(Y
))))))
1994 return BinaryOperator::CreateFAddFMF(Op0
, Builder
.CreateFPTrunc(Y
, Ty
), &I
);
1996 // X - (fpext(-Y)) --> X + fpext(Y)
1997 if (match(Op1
, m_OneUse(m_FPExt(m_FNeg(m_Value(Y
))))))
1998 return BinaryOperator::CreateFAddFMF(Op0
, Builder
.CreateFPExt(Y
, Ty
), &I
);
2000 // Similar to above, but look through fmul/fdiv of the negated value:
2001 // Op0 - (-X * Y) --> Op0 + (X * Y)
2002 // Op0 - (Y * -X) --> Op0 + (X * Y)
2003 if (match(Op1
, m_OneUse(m_c_FMul(m_FNeg(m_Value(X
)), m_Value(Y
))))) {
2004 Value
*FMul
= Builder
.CreateFMulFMF(X
, Y
, &I
);
2005 return BinaryOperator::CreateFAddFMF(Op0
, FMul
, &I
);
2007 // Op0 - (-X / Y) --> Op0 + (X / Y)
2008 // Op0 - (X / -Y) --> Op0 + (X / Y)
2009 if (match(Op1
, m_OneUse(m_FDiv(m_FNeg(m_Value(X
)), m_Value(Y
)))) ||
2010 match(Op1
, m_OneUse(m_FDiv(m_Value(X
), m_FNeg(m_Value(Y
)))))) {
2011 Value
*FDiv
= Builder
.CreateFDivFMF(X
, Y
, &I
);
2012 return BinaryOperator::CreateFAddFMF(Op0
, FDiv
, &I
);
2015 // Handle special cases for FSub with selects feeding the operation
2016 if (Value
*V
= SimplifySelectsFeedingBinaryOp(I
, Op0
, Op1
))
2017 return replaceInstUsesWith(I
, V
);
2019 if (I
.hasAllowReassoc() && I
.hasNoSignedZeros()) {
2020 // (Y - X) - Y --> -X
2021 if (match(Op0
, m_FSub(m_Specific(Op1
), m_Value(X
))))
2022 return BinaryOperator::CreateFNegFMF(X
, &I
);
2024 // Y - (X + Y) --> -X
2025 // Y - (Y + X) --> -X
2026 if (match(Op1
, m_c_FAdd(m_Specific(Op0
), m_Value(X
))))
2027 return BinaryOperator::CreateFNegFMF(X
, &I
);
2029 // (X * C) - X --> X * (C - 1.0)
2030 if (match(Op0
, m_FMul(m_Specific(Op1
), m_Constant(C
)))) {
2031 Constant
*CSubOne
= ConstantExpr::getFSub(C
, ConstantFP::get(Ty
, 1.0));
2032 return BinaryOperator::CreateFMulFMF(Op1
, CSubOne
, &I
);
2034 // X - (X * C) --> X * (1.0 - C)
2035 if (match(Op1
, m_FMul(m_Specific(Op0
), m_Constant(C
)))) {
2036 Constant
*OneSubC
= ConstantExpr::getFSub(ConstantFP::get(Ty
, 1.0), C
);
2037 return BinaryOperator::CreateFMulFMF(Op0
, OneSubC
, &I
);
2040 if (Instruction
*F
= factorizeFAddFSub(I
, Builder
))
2043 // TODO: This performs reassociative folds for FP ops. Some fraction of the
2044 // functionality has been subsumed by simple pattern matching here and in
2045 // InstSimplify. We should let a dedicated reassociation pass handle more
2046 // complex pattern matching and remove this from InstCombine.
2047 if (Value
*V
= FAddCombine(Builder
).simplify(&I
))
2048 return replaceInstUsesWith(I
, V
);