1 //===- InstCombineAddSub.cpp ------------------------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the visit functions for add, fadd, sub, and fsub.
11 //===----------------------------------------------------------------------===//
13 #include "InstCombineInternal.h"
14 #include "llvm/ADT/APFloat.h"
15 #include "llvm/ADT/APInt.h"
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/Analysis/InstructionSimplify.h"
19 #include "llvm/Analysis/ValueTracking.h"
20 #include "llvm/IR/Constant.h"
21 #include "llvm/IR/Constants.h"
22 #include "llvm/IR/InstrTypes.h"
23 #include "llvm/IR/Instruction.h"
24 #include "llvm/IR/Instructions.h"
25 #include "llvm/IR/Operator.h"
26 #include "llvm/IR/PatternMatch.h"
27 #include "llvm/IR/Type.h"
28 #include "llvm/IR/Value.h"
29 #include "llvm/Support/AlignOf.h"
30 #include "llvm/Support/Casting.h"
31 #include "llvm/Support/KnownBits.h"
36 using namespace PatternMatch
;
38 #define DEBUG_TYPE "instcombine"
42 /// Class representing coefficient of floating-point addend.
43 /// This class needs to be highly efficient, which is especially true for
44 /// the constructor. As of I write this comment, the cost of the default
45 /// constructor is merely 4-byte-store-zero (Assuming compiler is able to
46 /// perform write-merging).
50 // The constructor has to initialize a APFloat, which is unnecessary for
51 // most addends which have coefficient either 1 or -1. So, the constructor
52 // is expensive. In order to avoid the cost of the constructor, we should
53 // reuse some instances whenever possible. The pre-created instances
54 // FAddCombine::Add[0-5] embodies this idea.
55 FAddendCoef() = default;
58 // If possible, don't define operator+/operator- etc because these
59 // operators inevitably call FAddendCoef's constructor which is not cheap.
60 void operator=(const FAddendCoef
&A
);
61 void operator+=(const FAddendCoef
&A
);
62 void operator*=(const FAddendCoef
&S
);
65 assert(!insaneIntVal(C
) && "Insane coefficient");
66 IsFp
= false; IntVal
= C
;
69 void set(const APFloat
& C
);
73 bool isZero() const { return isInt() ? !IntVal
: getFpVal().isZero(); }
74 Value
*getValue(Type
*) const;
76 bool isOne() const { return isInt() && IntVal
== 1; }
77 bool isTwo() const { return isInt() && IntVal
== 2; }
78 bool isMinusOne() const { return isInt() && IntVal
== -1; }
79 bool isMinusTwo() const { return isInt() && IntVal
== -2; }
82 bool insaneIntVal(int V
) { return V
> 4 || V
< -4; }
84 APFloat
*getFpValPtr()
85 { return reinterpret_cast<APFloat
*>(&FpValBuf
.buffer
[0]); }
87 const APFloat
*getFpValPtr() const
88 { return reinterpret_cast<const APFloat
*>(&FpValBuf
.buffer
[0]); }
90 const APFloat
&getFpVal() const {
91 assert(IsFp
&& BufHasFpVal
&& "Incorret state");
92 return *getFpValPtr();
96 assert(IsFp
&& BufHasFpVal
&& "Incorret state");
97 return *getFpValPtr();
100 bool isInt() const { return !IsFp
; }
102 // If the coefficient is represented by an integer, promote it to a
104 void convertToFpType(const fltSemantics
&Sem
);
106 // Construct an APFloat from a signed integer.
107 // TODO: We should get rid of this function when APFloat can be constructed
108 // from an *SIGNED* integer.
109 APFloat
createAPFloatFromInt(const fltSemantics
&Sem
, int Val
);
113 // True iff FpValBuf contains an instance of APFloat.
114 bool BufHasFpVal
= false;
116 // The integer coefficient of an individual addend is either 1 or -1,
117 // and we try to simplify at most 4 addends from neighboring at most
118 // two instructions. So the range of <IntVal> falls in [-4, 4]. APInt
119 // is overkill of this end.
122 AlignedCharArrayUnion
<APFloat
> FpValBuf
;
125 /// FAddend is used to represent floating-point addend. An addend is
126 /// represented as <C, V>, where the V is a symbolic value, and C is a
127 /// constant coefficient. A constant addend is represented as <C, 0>.
132 void operator+=(const FAddend
&T
) {
133 assert((Val
== T
.Val
) && "Symbolic-values disagree");
137 Value
*getSymVal() const { return Val
; }
138 const FAddendCoef
&getCoef() const { return Coeff
; }
140 bool isConstant() const { return Val
== nullptr; }
141 bool isZero() const { return Coeff
.isZero(); }
143 void set(short Coefficient
, Value
*V
) {
144 Coeff
.set(Coefficient
);
147 void set(const APFloat
&Coefficient
, Value
*V
) {
148 Coeff
.set(Coefficient
);
151 void set(const ConstantFP
*Coefficient
, Value
*V
) {
152 Coeff
.set(Coefficient
->getValueAPF());
156 void negate() { Coeff
.negate(); }
158 /// Drill down the U-D chain one step to find the definition of V, and
159 /// try to break the definition into one or two addends.
160 static unsigned drillValueDownOneStep(Value
* V
, FAddend
&A0
, FAddend
&A1
);
162 /// Similar to FAddend::drillDownOneStep() except that the value being
163 /// splitted is the addend itself.
164 unsigned drillAddendDownOneStep(FAddend
&Addend0
, FAddend
&Addend1
) const;
167 void Scale(const FAddendCoef
& ScaleAmt
) { Coeff
*= ScaleAmt
; }
169 // This addend has the value of "Coeff * Val".
170 Value
*Val
= nullptr;
174 /// FAddCombine is the class for optimizing an unsafe fadd/fsub along
175 /// with its neighboring at most two instructions.
179 FAddCombine(InstCombiner::BuilderTy
&B
) : Builder(B
) {}
181 Value
*simplify(Instruction
*FAdd
);
184 using AddendVect
= SmallVector
<const FAddend
*, 4>;
186 Value
*simplifyFAdd(AddendVect
& V
, unsigned InstrQuota
);
188 /// Convert given addend to a Value
189 Value
*createAddendVal(const FAddend
&A
, bool& NeedNeg
);
191 /// Return the number of instructions needed to emit the N-ary addition.
192 unsigned calcInstrNumber(const AddendVect
& Vect
);
194 Value
*createFSub(Value
*Opnd0
, Value
*Opnd1
);
195 Value
*createFAdd(Value
*Opnd0
, Value
*Opnd1
);
196 Value
*createFMul(Value
*Opnd0
, Value
*Opnd1
);
197 Value
*createFNeg(Value
*V
);
198 Value
*createNaryFAdd(const AddendVect
& Opnds
, unsigned InstrQuota
);
199 void createInstPostProc(Instruction
*NewInst
, bool NoNumber
= false);
201 // Debugging stuff are clustered here.
203 unsigned CreateInstrNum
;
204 void initCreateInstNum() { CreateInstrNum
= 0; }
205 void incCreateInstNum() { CreateInstrNum
++; }
207 void initCreateInstNum() {}
208 void incCreateInstNum() {}
211 InstCombiner::BuilderTy
&Builder
;
212 Instruction
*Instr
= nullptr;
215 } // end anonymous namespace
217 //===----------------------------------------------------------------------===//
220 // {FAddendCoef, FAddend, FAddition, FAddCombine}.
222 //===----------------------------------------------------------------------===//
223 FAddendCoef::~FAddendCoef() {
225 getFpValPtr()->~APFloat();
228 void FAddendCoef::set(const APFloat
& C
) {
229 APFloat
*P
= getFpValPtr();
232 // As the buffer is meanless byte stream, we cannot call
233 // APFloat::operator=().
238 IsFp
= BufHasFpVal
= true;
241 void FAddendCoef::convertToFpType(const fltSemantics
&Sem
) {
245 APFloat
*P
= getFpValPtr();
247 new(P
) APFloat(Sem
, IntVal
);
249 new(P
) APFloat(Sem
, 0 - IntVal
);
252 IsFp
= BufHasFpVal
= true;
255 APFloat
FAddendCoef::createAPFloatFromInt(const fltSemantics
&Sem
, int Val
) {
257 return APFloat(Sem
, Val
);
259 APFloat
T(Sem
, 0 - Val
);
265 void FAddendCoef::operator=(const FAddendCoef
&That
) {
269 set(That
.getFpVal());
272 void FAddendCoef::operator+=(const FAddendCoef
&That
) {
273 enum APFloat::roundingMode RndMode
= APFloat::rmNearestTiesToEven
;
274 if (isInt() == That
.isInt()) {
276 IntVal
+= That
.IntVal
;
278 getFpVal().add(That
.getFpVal(), RndMode
);
283 const APFloat
&T
= That
.getFpVal();
284 convertToFpType(T
.getSemantics());
285 getFpVal().add(T
, RndMode
);
289 APFloat
&T
= getFpVal();
290 T
.add(createAPFloatFromInt(T
.getSemantics(), That
.IntVal
), RndMode
);
293 void FAddendCoef::operator*=(const FAddendCoef
&That
) {
297 if (That
.isMinusOne()) {
302 if (isInt() && That
.isInt()) {
303 int Res
= IntVal
* (int)That
.IntVal
;
304 assert(!insaneIntVal(Res
) && "Insane int value");
309 const fltSemantics
&Semantic
=
310 isInt() ? That
.getFpVal().getSemantics() : getFpVal().getSemantics();
313 convertToFpType(Semantic
);
314 APFloat
&F0
= getFpVal();
317 F0
.multiply(createAPFloatFromInt(Semantic
, That
.IntVal
),
318 APFloat::rmNearestTiesToEven
);
320 F0
.multiply(That
.getFpVal(), APFloat::rmNearestTiesToEven
);
323 void FAddendCoef::negate() {
327 getFpVal().changeSign();
330 Value
*FAddendCoef::getValue(Type
*Ty
) const {
332 ConstantFP::get(Ty
, float(IntVal
)) :
333 ConstantFP::get(Ty
->getContext(), getFpVal());
336 // The definition of <Val> Addends
337 // =========================================
338 // A + B <1, A>, <1,B>
339 // A - B <1, A>, <1,B>
342 // A + C <1, A> <C, NULL>
343 // 0 +/- 0 <0, NULL> (corner case)
345 // Legend: A and B are not constant, C is constant
346 unsigned FAddend::drillValueDownOneStep
347 (Value
*Val
, FAddend
&Addend0
, FAddend
&Addend1
) {
348 Instruction
*I
= nullptr;
349 if (!Val
|| !(I
= dyn_cast
<Instruction
>(Val
)))
352 unsigned Opcode
= I
->getOpcode();
354 if (Opcode
== Instruction::FAdd
|| Opcode
== Instruction::FSub
) {
356 Value
*Opnd0
= I
->getOperand(0);
357 Value
*Opnd1
= I
->getOperand(1);
358 if ((C0
= dyn_cast
<ConstantFP
>(Opnd0
)) && C0
->isZero())
361 if ((C1
= dyn_cast
<ConstantFP
>(Opnd1
)) && C1
->isZero())
366 Addend0
.set(1, Opnd0
);
368 Addend0
.set(C0
, nullptr);
372 FAddend
&Addend
= Opnd0
? Addend1
: Addend0
;
374 Addend
.set(1, Opnd1
);
376 Addend
.set(C1
, nullptr);
377 if (Opcode
== Instruction::FSub
)
382 return Opnd0
&& Opnd1
? 2 : 1;
384 // Both operands are zero. Weird!
385 Addend0
.set(APFloat(C0
->getValueAPF().getSemantics()), nullptr);
389 if (I
->getOpcode() == Instruction::FMul
) {
390 Value
*V0
= I
->getOperand(0);
391 Value
*V1
= I
->getOperand(1);
392 if (ConstantFP
*C
= dyn_cast
<ConstantFP
>(V0
)) {
397 if (ConstantFP
*C
= dyn_cast
<ConstantFP
>(V1
)) {
406 // Try to break *this* addend into two addends. e.g. Suppose this addend is
407 // <2.3, V>, and V = X + Y, by calling this function, we obtain two addends,
408 // i.e. <2.3, X> and <2.3, Y>.
409 unsigned FAddend::drillAddendDownOneStep
410 (FAddend
&Addend0
, FAddend
&Addend1
) const {
414 unsigned BreakNum
= FAddend::drillValueDownOneStep(Val
, Addend0
, Addend1
);
415 if (!BreakNum
|| Coeff
.isOne())
418 Addend0
.Scale(Coeff
);
421 Addend1
.Scale(Coeff
);
426 Value
*FAddCombine::simplify(Instruction
*I
) {
427 assert(I
->hasAllowReassoc() && I
->hasNoSignedZeros() &&
428 "Expected 'reassoc'+'nsz' instruction");
430 // Currently we are not able to handle vector type.
431 if (I
->getType()->isVectorTy())
434 assert((I
->getOpcode() == Instruction::FAdd
||
435 I
->getOpcode() == Instruction::FSub
) && "Expect add/sub");
437 // Save the instruction before calling other member-functions.
440 FAddend Opnd0
, Opnd1
, Opnd0_0
, Opnd0_1
, Opnd1_0
, Opnd1_1
;
442 unsigned OpndNum
= FAddend::drillValueDownOneStep(I
, Opnd0
, Opnd1
);
444 // Step 1: Expand the 1st addend into Opnd0_0 and Opnd0_1.
445 unsigned Opnd0_ExpNum
= 0;
446 unsigned Opnd1_ExpNum
= 0;
448 if (!Opnd0
.isConstant())
449 Opnd0_ExpNum
= Opnd0
.drillAddendDownOneStep(Opnd0_0
, Opnd0_1
);
451 // Step 2: Expand the 2nd addend into Opnd1_0 and Opnd1_1.
452 if (OpndNum
== 2 && !Opnd1
.isConstant())
453 Opnd1_ExpNum
= Opnd1
.drillAddendDownOneStep(Opnd1_0
, Opnd1_1
);
455 // Step 3: Try to optimize Opnd0_0 + Opnd0_1 + Opnd1_0 + Opnd1_1
456 if (Opnd0_ExpNum
&& Opnd1_ExpNum
) {
458 AllOpnds
.push_back(&Opnd0_0
);
459 AllOpnds
.push_back(&Opnd1_0
);
460 if (Opnd0_ExpNum
== 2)
461 AllOpnds
.push_back(&Opnd0_1
);
462 if (Opnd1_ExpNum
== 2)
463 AllOpnds
.push_back(&Opnd1_1
);
465 // Compute instruction quota. We should save at least one instruction.
466 unsigned InstQuota
= 0;
468 Value
*V0
= I
->getOperand(0);
469 Value
*V1
= I
->getOperand(1);
470 InstQuota
= ((!isa
<Constant
>(V0
) && V0
->hasOneUse()) &&
471 (!isa
<Constant
>(V1
) && V1
->hasOneUse())) ? 2 : 1;
473 if (Value
*R
= simplifyFAdd(AllOpnds
, InstQuota
))
478 // The input instruction is : "I=0.0 +/- V". If the "V" were able to be
479 // splitted into two addends, say "V = X - Y", the instruction would have
480 // been optimized into "I = Y - X" in the previous steps.
482 const FAddendCoef
&CE
= Opnd0
.getCoef();
483 return CE
.isOne() ? Opnd0
.getSymVal() : nullptr;
486 // step 4: Try to optimize Opnd0 + Opnd1_0 [+ Opnd1_1]
489 AllOpnds
.push_back(&Opnd0
);
490 AllOpnds
.push_back(&Opnd1_0
);
491 if (Opnd1_ExpNum
== 2)
492 AllOpnds
.push_back(&Opnd1_1
);
494 if (Value
*R
= simplifyFAdd(AllOpnds
, 1))
498 // step 5: Try to optimize Opnd1 + Opnd0_0 [+ Opnd0_1]
501 AllOpnds
.push_back(&Opnd1
);
502 AllOpnds
.push_back(&Opnd0_0
);
503 if (Opnd0_ExpNum
== 2)
504 AllOpnds
.push_back(&Opnd0_1
);
506 if (Value
*R
= simplifyFAdd(AllOpnds
, 1))
513 Value
*FAddCombine::simplifyFAdd(AddendVect
& Addends
, unsigned InstrQuota
) {
514 unsigned AddendNum
= Addends
.size();
515 assert(AddendNum
<= 4 && "Too many addends");
517 // For saving intermediate results;
518 unsigned NextTmpIdx
= 0;
519 FAddend TmpResult
[3];
521 // Points to the constant addend of the resulting simplified expression.
522 // If the resulting expr has constant-addend, this constant-addend is
523 // desirable to reside at the top of the resulting expression tree. Placing
524 // constant close to supper-expr(s) will potentially reveal some optimization
525 // opportunities in super-expr(s).
526 const FAddend
*ConstAdd
= nullptr;
528 // Simplified addends are placed <SimpVect>.
531 // The outer loop works on one symbolic-value at a time. Suppose the input
532 // addends are : <a1, x>, <b1, y>, <a2, x>, <c1, z>, <b2, y>, ...
533 // The symbolic-values will be processed in this order: x, y, z.
534 for (unsigned SymIdx
= 0; SymIdx
< AddendNum
; SymIdx
++) {
536 const FAddend
*ThisAddend
= Addends
[SymIdx
];
538 // This addend was processed before.
542 Value
*Val
= ThisAddend
->getSymVal();
543 unsigned StartIdx
= SimpVect
.size();
544 SimpVect
.push_back(ThisAddend
);
546 // The inner loop collects addends sharing same symbolic-value, and these
547 // addends will be later on folded into a single addend. Following above
548 // example, if the symbolic value "y" is being processed, the inner loop
549 // will collect two addends "<b1,y>" and "<b2,Y>". These two addends will
550 // be later on folded into "<b1+b2, y>".
551 for (unsigned SameSymIdx
= SymIdx
+ 1;
552 SameSymIdx
< AddendNum
; SameSymIdx
++) {
553 const FAddend
*T
= Addends
[SameSymIdx
];
554 if (T
&& T
->getSymVal() == Val
) {
555 // Set null such that next iteration of the outer loop will not process
556 // this addend again.
557 Addends
[SameSymIdx
] = nullptr;
558 SimpVect
.push_back(T
);
562 // If multiple addends share same symbolic value, fold them together.
563 if (StartIdx
+ 1 != SimpVect
.size()) {
564 FAddend
&R
= TmpResult
[NextTmpIdx
++];
565 R
= *SimpVect
[StartIdx
];
566 for (unsigned Idx
= StartIdx
+ 1; Idx
< SimpVect
.size(); Idx
++)
569 // Pop all addends being folded and push the resulting folded addend.
570 SimpVect
.resize(StartIdx
);
573 SimpVect
.push_back(&R
);
576 // Don't push constant addend at this time. It will be the last element
583 assert((NextTmpIdx
<= array_lengthof(TmpResult
) + 1) &&
584 "out-of-bound access");
587 SimpVect
.push_back(ConstAdd
);
590 if (!SimpVect
.empty())
591 Result
= createNaryFAdd(SimpVect
, InstrQuota
);
593 // The addition is folded to 0.0.
594 Result
= ConstantFP::get(Instr
->getType(), 0.0);
600 Value
*FAddCombine::createNaryFAdd
601 (const AddendVect
&Opnds
, unsigned InstrQuota
) {
602 assert(!Opnds
.empty() && "Expect at least one addend");
604 // Step 1: Check if the # of instructions needed exceeds the quota.
606 unsigned InstrNeeded
= calcInstrNumber(Opnds
);
607 if (InstrNeeded
> InstrQuota
)
612 // step 2: Emit the N-ary addition.
613 // Note that at most three instructions are involved in Fadd-InstCombine: the
614 // addition in question, and at most two neighboring instructions.
615 // The resulting optimized addition should have at least one less instruction
616 // than the original addition expression tree. This implies that the resulting
617 // N-ary addition has at most two instructions, and we don't need to worry
618 // about tree-height when constructing the N-ary addition.
620 Value
*LastVal
= nullptr;
621 bool LastValNeedNeg
= false;
623 // Iterate the addends, creating fadd/fsub using adjacent two addends.
624 for (const FAddend
*Opnd
: Opnds
) {
626 Value
*V
= createAddendVal(*Opnd
, NeedNeg
);
629 LastValNeedNeg
= NeedNeg
;
633 if (LastValNeedNeg
== NeedNeg
) {
634 LastVal
= createFAdd(LastVal
, V
);
639 LastVal
= createFSub(V
, LastVal
);
641 LastVal
= createFSub(LastVal
, V
);
643 LastValNeedNeg
= false;
646 if (LastValNeedNeg
) {
647 LastVal
= createFNeg(LastVal
);
651 assert(CreateInstrNum
== InstrNeeded
&&
652 "Inconsistent in instruction numbers");
658 Value
*FAddCombine::createFSub(Value
*Opnd0
, Value
*Opnd1
) {
659 Value
*V
= Builder
.CreateFSub(Opnd0
, Opnd1
);
660 if (Instruction
*I
= dyn_cast
<Instruction
>(V
))
661 createInstPostProc(I
);
665 Value
*FAddCombine::createFNeg(Value
*V
) {
666 Value
*Zero
= cast
<Value
>(ConstantFP::getZeroValueForNegation(V
->getType()));
667 Value
*NewV
= createFSub(Zero
, V
);
668 if (Instruction
*I
= dyn_cast
<Instruction
>(NewV
))
669 createInstPostProc(I
, true); // fneg's don't receive instruction numbers.
673 Value
*FAddCombine::createFAdd(Value
*Opnd0
, Value
*Opnd1
) {
674 Value
*V
= Builder
.CreateFAdd(Opnd0
, Opnd1
);
675 if (Instruction
*I
= dyn_cast
<Instruction
>(V
))
676 createInstPostProc(I
);
680 Value
*FAddCombine::createFMul(Value
*Opnd0
, Value
*Opnd1
) {
681 Value
*V
= Builder
.CreateFMul(Opnd0
, Opnd1
);
682 if (Instruction
*I
= dyn_cast
<Instruction
>(V
))
683 createInstPostProc(I
);
687 void FAddCombine::createInstPostProc(Instruction
*NewInstr
, bool NoNumber
) {
688 NewInstr
->setDebugLoc(Instr
->getDebugLoc());
690 // Keep track of the number of instruction created.
694 // Propagate fast-math flags
695 NewInstr
->setFastMathFlags(Instr
->getFastMathFlags());
698 // Return the number of instruction needed to emit the N-ary addition.
699 // NOTE: Keep this function in sync with createAddendVal().
700 unsigned FAddCombine::calcInstrNumber(const AddendVect
&Opnds
) {
701 unsigned OpndNum
= Opnds
.size();
702 unsigned InstrNeeded
= OpndNum
- 1;
704 // The number of addends in the form of "(-1)*x".
705 unsigned NegOpndNum
= 0;
707 // Adjust the number of instructions needed to emit the N-ary add.
708 for (const FAddend
*Opnd
: Opnds
) {
709 if (Opnd
->isConstant())
712 // The constant check above is really for a few special constant
714 if (isa
<UndefValue
>(Opnd
->getSymVal()))
717 const FAddendCoef
&CE
= Opnd
->getCoef();
718 if (CE
.isMinusOne() || CE
.isMinusTwo())
721 // Let the addend be "c * x". If "c == +/-1", the value of the addend
722 // is immediately available; otherwise, it needs exactly one instruction
723 // to evaluate the value.
724 if (!CE
.isMinusOne() && !CE
.isOne())
727 if (NegOpndNum
== OpndNum
)
732 // Input Addend Value NeedNeg(output)
733 // ================================================================
734 // Constant C C false
735 // <+/-1, V> V coefficient is -1
736 // <2/-2, V> "fadd V, V" coefficient is -2
737 // <C, V> "fmul V, C" false
739 // NOTE: Keep this function in sync with FAddCombine::calcInstrNumber.
740 Value
*FAddCombine::createAddendVal(const FAddend
&Opnd
, bool &NeedNeg
) {
741 const FAddendCoef
&Coeff
= Opnd
.getCoef();
743 if (Opnd
.isConstant()) {
745 return Coeff
.getValue(Instr
->getType());
748 Value
*OpndVal
= Opnd
.getSymVal();
750 if (Coeff
.isMinusOne() || Coeff
.isOne()) {
751 NeedNeg
= Coeff
.isMinusOne();
755 if (Coeff
.isTwo() || Coeff
.isMinusTwo()) {
756 NeedNeg
= Coeff
.isMinusTwo();
757 return createFAdd(OpndVal
, OpndVal
);
761 return createFMul(OpndVal
, Coeff
.getValue(Instr
->getType()));
764 // Checks if any operand is negative and we can convert add to sub.
765 // This function checks for following negative patterns
766 // ADD(XOR(OR(Z, NOT(C)), C)), 1) == NEG(AND(Z, C))
767 // ADD(XOR(AND(Z, C), C), 1) == NEG(OR(Z, ~C))
768 // XOR(AND(Z, C), (C + 1)) == NEG(OR(Z, ~C)) if C is even
769 static Value
*checkForNegativeOperand(BinaryOperator
&I
,
770 InstCombiner::BuilderTy
&Builder
) {
771 Value
*LHS
= I
.getOperand(0), *RHS
= I
.getOperand(1);
773 // This function creates 2 instructions to replace ADD, we need at least one
774 // of LHS or RHS to have one use to ensure benefit in transform.
775 if (!LHS
->hasOneUse() && !RHS
->hasOneUse())
778 Value
*X
= nullptr, *Y
= nullptr, *Z
= nullptr;
779 const APInt
*C1
= nullptr, *C2
= nullptr;
781 // if ONE is on other side, swap
782 if (match(RHS
, m_Add(m_Value(X
), m_One())))
785 if (match(LHS
, m_Add(m_Value(X
), m_One()))) {
786 // if XOR on other side, swap
787 if (match(RHS
, m_Xor(m_Value(Y
), m_APInt(C1
))))
790 if (match(X
, m_Xor(m_Value(Y
), m_APInt(C1
)))) {
791 // X = XOR(Y, C1), Y = OR(Z, C2), C2 = NOT(C1) ==> X == NOT(AND(Z, C1))
792 // ADD(ADD(X, 1), RHS) == ADD(X, ADD(RHS, 1)) == SUB(RHS, AND(Z, C1))
793 if (match(Y
, m_Or(m_Value(Z
), m_APInt(C2
))) && (*C2
== ~(*C1
))) {
794 Value
*NewAnd
= Builder
.CreateAnd(Z
, *C1
);
795 return Builder
.CreateSub(RHS
, NewAnd
, "sub");
796 } else if (match(Y
, m_And(m_Value(Z
), m_APInt(C2
))) && (*C1
== *C2
)) {
797 // X = XOR(Y, C1), Y = AND(Z, C2), C2 == C1 ==> X == NOT(OR(Z, ~C1))
798 // ADD(ADD(X, 1), RHS) == ADD(X, ADD(RHS, 1)) == SUB(RHS, OR(Z, ~C1))
799 Value
*NewOr
= Builder
.CreateOr(Z
, ~(*C1
));
800 return Builder
.CreateSub(RHS
, NewOr
, "sub");
805 // Restore LHS and RHS
806 LHS
= I
.getOperand(0);
807 RHS
= I
.getOperand(1);
809 // if XOR is on other side, swap
810 if (match(RHS
, m_Xor(m_Value(Y
), m_APInt(C1
))))
814 // LHS = XOR(Y, C1), Y = AND(Z, C2), C1 == (C2 + 1) => LHS == NEG(OR(Z, ~C2))
815 // ADD(LHS, RHS) == SUB(RHS, OR(Z, ~C2))
816 if (match(LHS
, m_Xor(m_Value(Y
), m_APInt(C1
))))
817 if (C1
->countTrailingZeros() == 0)
818 if (match(Y
, m_And(m_Value(Z
), m_APInt(C2
))) && *C1
== (*C2
+ 1)) {
819 Value
*NewOr
= Builder
.CreateOr(Z
, ~(*C2
));
820 return Builder
.CreateSub(RHS
, NewOr
, "sub");
825 /// Wrapping flags may allow combining constants separated by an extend.
826 static Instruction
*foldNoWrapAdd(BinaryOperator
&Add
,
827 InstCombiner::BuilderTy
&Builder
) {
828 Value
*Op0
= Add
.getOperand(0), *Op1
= Add
.getOperand(1);
829 Type
*Ty
= Add
.getType();
831 if (!match(Op1
, m_Constant(Op1C
)))
834 // Try this match first because it results in an add in the narrow type.
835 // (zext (X +nuw C2)) + C1 --> zext (X + (C2 + trunc(C1)))
837 const APInt
*C1
, *C2
;
838 if (match(Op1
, m_APInt(C1
)) &&
839 match(Op0
, m_OneUse(m_ZExt(m_NUWAdd(m_Value(X
), m_APInt(C2
))))) &&
840 C1
->isNegative() && C1
->sge(-C2
->sext(C1
->getBitWidth()))) {
842 ConstantInt::get(X
->getType(), *C2
+ C1
->trunc(C2
->getBitWidth()));
843 return new ZExtInst(Builder
.CreateNUWAdd(X
, NewC
), Ty
);
846 // More general combining of constants in the wide type.
847 // (sext (X +nsw NarrowC)) + C --> (sext X) + (sext(NarrowC) + C)
849 if (match(Op0
, m_OneUse(m_SExt(m_NSWAdd(m_Value(X
), m_Constant(NarrowC
)))))) {
850 Constant
*WideC
= ConstantExpr::getSExt(NarrowC
, Ty
);
851 Constant
*NewC
= ConstantExpr::getAdd(WideC
, Op1C
);
852 Value
*WideX
= Builder
.CreateSExt(X
, Ty
);
853 return BinaryOperator::CreateAdd(WideX
, NewC
);
855 // (zext (X +nuw NarrowC)) + C --> (zext X) + (zext(NarrowC) + C)
856 if (match(Op0
, m_OneUse(m_ZExt(m_NUWAdd(m_Value(X
), m_Constant(NarrowC
)))))) {
857 Constant
*WideC
= ConstantExpr::getZExt(NarrowC
, Ty
);
858 Constant
*NewC
= ConstantExpr::getAdd(WideC
, Op1C
);
859 Value
*WideX
= Builder
.CreateZExt(X
, Ty
);
860 return BinaryOperator::CreateAdd(WideX
, NewC
);
866 Instruction
*InstCombiner::foldAddWithConstant(BinaryOperator
&Add
) {
867 Value
*Op0
= Add
.getOperand(0), *Op1
= Add
.getOperand(1);
869 if (!match(Op1
, m_Constant(Op1C
)))
872 if (Instruction
*NV
= foldBinOpIntoSelectOrPhi(Add
))
878 // add (sub C1, X), C2 --> sub (add C1, C2), X
879 if (match(Op0
, m_Sub(m_Constant(Op00C
), m_Value(X
))))
880 return BinaryOperator::CreateSub(ConstantExpr::getAdd(Op00C
, Op1C
), X
);
884 // add (sub X, Y), -1 --> add (not Y), X
885 if (match(Op0
, m_OneUse(m_Sub(m_Value(X
), m_Value(Y
)))) &&
886 match(Op1
, m_AllOnes()))
887 return BinaryOperator::CreateAdd(Builder
.CreateNot(Y
), X
);
889 // zext(bool) + C -> bool ? C + 1 : C
890 if (match(Op0
, m_ZExt(m_Value(X
))) &&
891 X
->getType()->getScalarSizeInBits() == 1)
892 return SelectInst::Create(X
, AddOne(Op1C
), Op1
);
894 // ~X + C --> (C-1) - X
895 if (match(Op0
, m_Not(m_Value(X
))))
896 return BinaryOperator::CreateSub(SubOne(Op1C
), X
);
899 if (!match(Op1
, m_APInt(C
)))
902 // (X | C2) + C --> (X | C2) ^ C2 iff (C2 == -C)
904 if (match(Op0
, m_Or(m_Value(), m_APInt(C2
))) && *C2
== -*C
)
905 return BinaryOperator::CreateXor(Op0
, ConstantInt::get(Add
.getType(), *C2
));
907 if (C
->isSignMask()) {
908 // If wrapping is not allowed, then the addition must set the sign bit:
909 // X + (signmask) --> X | signmask
910 if (Add
.hasNoSignedWrap() || Add
.hasNoUnsignedWrap())
911 return BinaryOperator::CreateOr(Op0
, Op1
);
913 // If wrapping is allowed, then the addition flips the sign bit of LHS:
914 // X + (signmask) --> X ^ signmask
915 return BinaryOperator::CreateXor(Op0
, Op1
);
918 // Is this add the last step in a convoluted sext?
919 // add(zext(xor i16 X, -32768), -32768) --> sext X
920 Type
*Ty
= Add
.getType();
921 if (match(Op0
, m_ZExt(m_Xor(m_Value(X
), m_APInt(C2
)))) &&
922 C2
->isMinSignedValue() && C2
->sext(Ty
->getScalarSizeInBits()) == *C
)
923 return CastInst::Create(Instruction::SExt
, X
, Ty
);
925 if (C
->isOneValue() && Op0
->hasOneUse()) {
926 // add (sext i1 X), 1 --> zext (not X)
927 // TODO: The smallest IR representation is (select X, 0, 1), and that would
928 // not require the one-use check. But we need to remove a transform in
929 // visitSelect and make sure that IR value tracking for select is equal or
930 // better than for these ops.
931 if (match(Op0
, m_SExt(m_Value(X
))) &&
932 X
->getType()->getScalarSizeInBits() == 1)
933 return new ZExtInst(Builder
.CreateNot(X
), Ty
);
935 // Shifts and add used to flip and mask off the low bit:
936 // add (ashr (shl i32 X, 31), 31), 1 --> and (not X), 1
938 if (match(Op0
, m_AShr(m_Shl(m_Value(X
), m_APInt(C2
)), m_APInt(C3
))) &&
939 C2
== C3
&& *C2
== Ty
->getScalarSizeInBits() - 1) {
940 Value
*NotX
= Builder
.CreateNot(X
);
941 return BinaryOperator::CreateAnd(NotX
, ConstantInt::get(Ty
, 1));
948 // Matches multiplication expression Op * C where C is a constant. Returns the
949 // constant value in C and the other operand in Op. Returns true if such a
951 static bool MatchMul(Value
*E
, Value
*&Op
, APInt
&C
) {
953 if (match(E
, m_Mul(m_Value(Op
), m_APInt(AI
)))) {
957 if (match(E
, m_Shl(m_Value(Op
), m_APInt(AI
)))) {
958 C
= APInt(AI
->getBitWidth(), 1);
965 // Matches remainder expression Op % C where C is a constant. Returns the
966 // constant value in C and the other operand in Op. Returns the signedness of
967 // the remainder operation in IsSigned. Returns true if such a match is
969 static bool MatchRem(Value
*E
, Value
*&Op
, APInt
&C
, bool &IsSigned
) {
972 if (match(E
, m_SRem(m_Value(Op
), m_APInt(AI
)))) {
977 if (match(E
, m_URem(m_Value(Op
), m_APInt(AI
)))) {
981 if (match(E
, m_And(m_Value(Op
), m_APInt(AI
))) && (*AI
+ 1).isPowerOf2()) {
988 // Matches division expression Op / C with the given signedness as indicated
989 // by IsSigned, where C is a constant. Returns the constant value in C and the
990 // other operand in Op. Returns true if such a match is found.
991 static bool MatchDiv(Value
*E
, Value
*&Op
, APInt
&C
, bool IsSigned
) {
993 if (IsSigned
&& match(E
, m_SDiv(m_Value(Op
), m_APInt(AI
)))) {
998 if (match(E
, m_UDiv(m_Value(Op
), m_APInt(AI
)))) {
1002 if (match(E
, m_LShr(m_Value(Op
), m_APInt(AI
)))) {
1003 C
= APInt(AI
->getBitWidth(), 1);
1011 // Returns whether C0 * C1 with the given signedness overflows.
1012 static bool MulWillOverflow(APInt
&C0
, APInt
&C1
, bool IsSigned
) {
1015 (void)C0
.smul_ov(C1
, overflow
);
1017 (void)C0
.umul_ov(C1
, overflow
);
1021 // Simplifies X % C0 + (( X / C0 ) % C1) * C0 to X % (C0 * C1), where (C0 * C1)
1022 // does not overflow.
1023 Value
*InstCombiner::SimplifyAddWithRemainder(BinaryOperator
&I
) {
1024 Value
*LHS
= I
.getOperand(0), *RHS
= I
.getOperand(1);
1028 // Match I = X % C0 + MulOpV * C0
1029 if (((MatchRem(LHS
, X
, C0
, IsSigned
) && MatchMul(RHS
, MulOpV
, MulOpC
)) ||
1030 (MatchRem(RHS
, X
, C0
, IsSigned
) && MatchMul(LHS
, MulOpV
, MulOpC
))) &&
1035 // Match MulOpC = RemOpV % C1
1036 if (MatchRem(MulOpV
, RemOpV
, C1
, Rem2IsSigned
) &&
1037 IsSigned
== Rem2IsSigned
) {
1040 // Match RemOpV = X / C0
1041 if (MatchDiv(RemOpV
, DivOpV
, DivOpC
, IsSigned
) && X
== DivOpV
&&
1042 C0
== DivOpC
&& !MulWillOverflow(C0
, C1
, IsSigned
)) {
1044 ConstantInt::get(X
->getType()->getContext(), C0
* C1
);
1045 return IsSigned
? Builder
.CreateSRem(X
, NewDivisor
, "srem")
1046 : Builder
.CreateURem(X
, NewDivisor
, "urem");
1055 /// (1 << NBits) - 1
1057 /// ~(-(1 << NBits))
1058 /// Because a 'not' is better for bit-tracking analysis and other transforms
1059 /// than an 'add'. The new shl is always nsw, and is nuw if old `and` was.
1060 static Instruction
*canonicalizeLowbitMask(BinaryOperator
&I
,
1061 InstCombiner::BuilderTy
&Builder
) {
1063 if (!match(&I
, m_Add(m_OneUse(m_Shl(m_One(), m_Value(NBits
))), m_AllOnes())))
1066 Constant
*MinusOne
= Constant::getAllOnesValue(NBits
->getType());
1067 Value
*NotMask
= Builder
.CreateShl(MinusOne
, NBits
, "notmask");
1068 // Be wary of constant folding.
1069 if (auto *BOp
= dyn_cast
<BinaryOperator
>(NotMask
)) {
1070 // Always NSW. But NUW propagates from `add`.
1071 BOp
->setHasNoSignedWrap();
1072 BOp
->setHasNoUnsignedWrap(I
.hasNoUnsignedWrap());
1075 return BinaryOperator::CreateNot(NotMask
, I
.getName());
1078 static Instruction
*foldToUnsignedSaturatedAdd(BinaryOperator
&I
) {
1079 assert(I
.getOpcode() == Instruction::Add
&& "Expecting add instruction");
1080 Type
*Ty
= I
.getType();
1081 auto getUAddSat
= [&]() {
1082 return Intrinsic::getDeclaration(I
.getModule(), Intrinsic::uadd_sat
, Ty
);
1085 // add (umin X, ~Y), Y --> uaddsat X, Y
1087 if (match(&I
, m_c_Add(m_c_UMin(m_Value(X
), m_Not(m_Value(Y
))),
1089 return CallInst::Create(getUAddSat(), { X
, Y
});
1091 // add (umin X, ~C), C --> uaddsat X, C
1092 const APInt
*C
, *NotC
;
1093 if (match(&I
, m_Add(m_UMin(m_Value(X
), m_APInt(NotC
)), m_APInt(C
))) &&
1095 return CallInst::Create(getUAddSat(), { X
, ConstantInt::get(Ty
, *C
) });
1101 InstCombiner::canonicalizeCondSignextOfHighBitExtractToSignextHighBitExtract(
1102 BinaryOperator
&I
) {
1103 assert((I
.getOpcode() == Instruction::Add
||
1104 I
.getOpcode() == Instruction::Or
||
1105 I
.getOpcode() == Instruction::Sub
) &&
1106 "Expecting add/or/sub instruction");
1108 // We have a subtraction/addition between a (potentially truncated) *logical*
1109 // right-shift of X and a "select".
1111 Instruction
*LowBitsToSkip
, *Extract
;
1112 if (!match(&I
, m_c_BinOp(m_TruncOrSelf(m_CombineAnd(
1113 m_LShr(m_Value(X
), m_Instruction(LowBitsToSkip
)),
1114 m_Instruction(Extract
))),
1118 // `add`/`or` is commutative; but for `sub`, "select" *must* be on RHS.
1119 if (I
.getOpcode() == Instruction::Sub
&& I
.getOperand(1) != Select
)
1122 Type
*XTy
= X
->getType();
1123 bool HadTrunc
= I
.getType() != XTy
;
1125 // If there was a truncation of extracted value, then we'll need to produce
1126 // one extra instruction, so we need to ensure one instruction will go away.
1127 if (HadTrunc
&& !match(&I
, m_c_BinOp(m_OneUse(m_Value()), m_Value())))
1130 // Extraction should extract high NBits bits, with shift amount calculated as:
1131 // low bits to skip = shift bitwidth - high bits to extract
1132 // The shift amount itself may be extended, and we need to look past zero-ext
1133 // when matching NBits, that will matter for matching later.
1138 m_ZExtOrSelf(m_Sub(m_Constant(C
), m_ZExtOrSelf(m_Value(NBits
))))) ||
1139 !match(C
, m_SpecificInt_ICMP(ICmpInst::Predicate::ICMP_EQ
,
1140 APInt(C
->getType()->getScalarSizeInBits(),
1141 X
->getType()->getScalarSizeInBits()))))
1144 // Sign-extending value can be zero-extended if we `sub`tract it,
1145 // or sign-extended otherwise.
1146 auto SkipExtInMagic
= [&I
](Value
*&V
) {
1147 if (I
.getOpcode() == Instruction::Sub
)
1148 match(V
, m_ZExtOrSelf(m_Value(V
)));
1150 match(V
, m_SExtOrSelf(m_Value(V
)));
1153 // Now, finally validate the sign-extending magic.
1154 // `select` itself may be appropriately extended, look past that.
1155 SkipExtInMagic(Select
);
1157 ICmpInst::Predicate Pred
;
1159 Value
*SignExtendingValue
, *Zero
;
1161 // It must be a select between two values we will later establish to be a
1162 // sign-extending value and a zero constant. The condition guarding the
1163 // sign-extension must be based on a sign bit of the same X we had in `lshr`.
1164 if (!match(Select
, m_Select(m_ICmp(Pred
, m_Specific(X
), m_APInt(Thr
)),
1165 m_Value(SignExtendingValue
), m_Value(Zero
))) ||
1166 !isSignBitCheck(Pred
, *Thr
, ShouldSignext
))
1169 // icmp-select pair is commutative.
1171 std::swap(SignExtendingValue
, Zero
);
1173 // If we should not perform sign-extension then we must add/or/subtract zero.
1174 if (!match(Zero
, m_Zero()))
1176 // Otherwise, it should be some constant, left-shifted by the same NBits we
1177 // had in `lshr`. Said left-shift can also be appropriately extended.
1178 // Again, we must look past zero-ext when looking for NBits.
1179 SkipExtInMagic(SignExtendingValue
);
1180 Constant
*SignExtendingValueBaseConstant
;
1181 if (!match(SignExtendingValue
,
1182 m_Shl(m_Constant(SignExtendingValueBaseConstant
),
1183 m_ZExtOrSelf(m_Specific(NBits
)))))
1185 // If we `sub`, then the constant should be one, else it should be all-ones.
1186 if (I
.getOpcode() == Instruction::Sub
1187 ? !match(SignExtendingValueBaseConstant
, m_One())
1188 : !match(SignExtendingValueBaseConstant
, m_AllOnes()))
1191 auto *NewAShr
= BinaryOperator::CreateAShr(X
, LowBitsToSkip
,
1192 Extract
->getName() + ".sext");
1193 NewAShr
->copyIRFlags(Extract
); // Preserve `exact`-ness.
1197 Builder
.Insert(NewAShr
);
1198 return TruncInst::CreateTruncOrBitCast(NewAShr
, I
.getType());
1201 Instruction
*InstCombiner::visitAdd(BinaryOperator
&I
) {
1202 if (Value
*V
= SimplifyAddInst(I
.getOperand(0), I
.getOperand(1),
1203 I
.hasNoSignedWrap(), I
.hasNoUnsignedWrap(),
1204 SQ
.getWithInstruction(&I
)))
1205 return replaceInstUsesWith(I
, V
);
1207 if (SimplifyAssociativeOrCommutative(I
))
1210 if (Instruction
*X
= foldVectorBinop(I
))
1213 // (A*B)+(A*C) -> A*(B+C) etc
1214 if (Value
*V
= SimplifyUsingDistributiveLaws(I
))
1215 return replaceInstUsesWith(I
, V
);
1217 if (Instruction
*X
= foldAddWithConstant(I
))
1220 if (Instruction
*X
= foldNoWrapAdd(I
, Builder
))
1223 // FIXME: This should be moved into the above helper function to allow these
1224 // transforms for general constant or constant splat vectors.
1225 Value
*LHS
= I
.getOperand(0), *RHS
= I
.getOperand(1);
1226 Type
*Ty
= I
.getType();
1227 if (ConstantInt
*CI
= dyn_cast
<ConstantInt
>(RHS
)) {
1228 Value
*XorLHS
= nullptr; ConstantInt
*XorRHS
= nullptr;
1229 if (match(LHS
, m_Xor(m_Value(XorLHS
), m_ConstantInt(XorRHS
)))) {
1230 unsigned TySizeBits
= Ty
->getScalarSizeInBits();
1231 const APInt
&RHSVal
= CI
->getValue();
1232 unsigned ExtendAmt
= 0;
1233 // If we have ADD(XOR(AND(X, 0xFF), 0x80), 0xF..F80), it's a sext.
1234 // If we have ADD(XOR(AND(X, 0xFF), 0xF..F80), 0x80), it's a sext.
1235 if (XorRHS
->getValue() == -RHSVal
) {
1236 if (RHSVal
.isPowerOf2())
1237 ExtendAmt
= TySizeBits
- RHSVal
.logBase2() - 1;
1238 else if (XorRHS
->getValue().isPowerOf2())
1239 ExtendAmt
= TySizeBits
- XorRHS
->getValue().logBase2() - 1;
1243 APInt Mask
= APInt::getHighBitsSet(TySizeBits
, ExtendAmt
);
1244 if (!MaskedValueIsZero(XorLHS
, Mask
, 0, &I
))
1249 Constant
*ShAmt
= ConstantInt::get(Ty
, ExtendAmt
);
1250 Value
*NewShl
= Builder
.CreateShl(XorLHS
, ShAmt
, "sext");
1251 return BinaryOperator::CreateAShr(NewShl
, ShAmt
);
1254 // If this is a xor that was canonicalized from a sub, turn it back into
1255 // a sub and fuse this add with it.
1256 if (LHS
->hasOneUse() && (XorRHS
->getValue()+1).isPowerOf2()) {
1257 KnownBits LHSKnown
= computeKnownBits(XorLHS
, 0, &I
);
1258 if ((XorRHS
->getValue() | LHSKnown
.Zero
).isAllOnesValue())
1259 return BinaryOperator::CreateSub(ConstantExpr::getAdd(XorRHS
, CI
),
1262 // (X + signmask) + C could have gotten canonicalized to (X^signmask) + C,
1263 // transform them into (X + (signmask ^ C))
1264 if (XorRHS
->getValue().isSignMask())
1265 return BinaryOperator::CreateAdd(XorLHS
,
1266 ConstantExpr::getXor(XorRHS
, CI
));
1270 if (Ty
->isIntOrIntVectorTy(1))
1271 return BinaryOperator::CreateXor(LHS
, RHS
);
1275 auto *Shl
= BinaryOperator::CreateShl(LHS
, ConstantInt::get(Ty
, 1));
1276 Shl
->setHasNoSignedWrap(I
.hasNoSignedWrap());
1277 Shl
->setHasNoUnsignedWrap(I
.hasNoUnsignedWrap());
1282 if (match(LHS
, m_Neg(m_Value(A
)))) {
1283 // -A + -B --> -(A + B)
1284 if (match(RHS
, m_Neg(m_Value(B
))))
1285 return BinaryOperator::CreateNeg(Builder
.CreateAdd(A
, B
));
1288 return BinaryOperator::CreateSub(RHS
, A
);
1291 // Canonicalize sext to zext for better value tracking potential.
1292 // add A, sext(B) --> sub A, zext(B)
1293 if (match(&I
, m_c_Add(m_Value(A
), m_OneUse(m_SExt(m_Value(B
))))) &&
1294 B
->getType()->isIntOrIntVectorTy(1))
1295 return BinaryOperator::CreateSub(A
, Builder
.CreateZExt(B
, Ty
));
1298 if (match(RHS
, m_Neg(m_Value(B
))))
1299 return BinaryOperator::CreateSub(LHS
, B
);
1301 if (Value
*V
= checkForNegativeOperand(I
, Builder
))
1302 return replaceInstUsesWith(I
, V
);
1304 // (A + 1) + ~B --> A - B
1305 // ~B + (A + 1) --> A - B
1306 // (~B + A) + 1 --> A - B
1307 // (A + ~B) + 1 --> A - B
1308 if (match(&I
, m_c_BinOp(m_Add(m_Value(A
), m_One()), m_Not(m_Value(B
)))) ||
1309 match(&I
, m_BinOp(m_c_Add(m_Not(m_Value(B
)), m_Value(A
)), m_One())))
1310 return BinaryOperator::CreateSub(A
, B
);
1312 // X % C0 + (( X / C0 ) % C1) * C0 => X % (C0 * C1)
1313 if (Value
*V
= SimplifyAddWithRemainder(I
)) return replaceInstUsesWith(I
, V
);
1315 // A+B --> A|B iff A and B have no bits set in common.
1316 if (haveNoCommonBitsSet(LHS
, RHS
, DL
, &AC
, &I
, &DT
))
1317 return BinaryOperator::CreateOr(LHS
, RHS
);
1319 // FIXME: We already did a check for ConstantInt RHS above this.
1320 // FIXME: Is this pattern covered by another fold? No regression tests fail on
1322 if (ConstantInt
*CRHS
= dyn_cast
<ConstantInt
>(RHS
)) {
1323 // (X & FF00) + xx00 -> (X+xx00) & FF00
1326 if (LHS
->hasOneUse() &&
1327 match(LHS
, m_And(m_Value(X
), m_ConstantInt(C2
))) &&
1328 CRHS
->getValue() == (CRHS
->getValue() & C2
->getValue())) {
1329 // See if all bits from the first bit set in the Add RHS up are included
1330 // in the mask. First, get the rightmost bit.
1331 const APInt
&AddRHSV
= CRHS
->getValue();
1333 // Form a mask of all bits from the lowest bit added through the top.
1334 APInt
AddRHSHighBits(~((AddRHSV
& -AddRHSV
)-1));
1336 // See if the and mask includes all of these bits.
1337 APInt
AddRHSHighBitsAnd(AddRHSHighBits
& C2
->getValue());
1339 if (AddRHSHighBits
== AddRHSHighBitsAnd
) {
1340 // Okay, the xform is safe. Insert the new add pronto.
1341 Value
*NewAdd
= Builder
.CreateAdd(X
, CRHS
, LHS
->getName());
1342 return BinaryOperator::CreateAnd(NewAdd
, C2
);
1347 // add (select X 0 (sub n A)) A --> select X A n
1349 SelectInst
*SI
= dyn_cast
<SelectInst
>(LHS
);
1352 SI
= dyn_cast
<SelectInst
>(RHS
);
1355 if (SI
&& SI
->hasOneUse()) {
1356 Value
*TV
= SI
->getTrueValue();
1357 Value
*FV
= SI
->getFalseValue();
1360 // Can we fold the add into the argument of the select?
1361 // We check both true and false select arguments for a matching subtract.
1362 if (match(FV
, m_Zero()) && match(TV
, m_Sub(m_Value(N
), m_Specific(A
))))
1363 // Fold the add into the true select value.
1364 return SelectInst::Create(SI
->getCondition(), N
, A
);
1366 if (match(TV
, m_Zero()) && match(FV
, m_Sub(m_Value(N
), m_Specific(A
))))
1367 // Fold the add into the false select value.
1368 return SelectInst::Create(SI
->getCondition(), A
, N
);
1372 if (Instruction
*Ext
= narrowMathIfNoOverflow(I
))
1375 // (add (xor A, B) (and A, B)) --> (or A, B)
1376 // (add (and A, B) (xor A, B)) --> (or A, B)
1377 if (match(&I
, m_c_BinOp(m_Xor(m_Value(A
), m_Value(B
)),
1378 m_c_And(m_Deferred(A
), m_Deferred(B
)))))
1379 return BinaryOperator::CreateOr(A
, B
);
1381 // (add (or A, B) (and A, B)) --> (add A, B)
1382 // (add (and A, B) (or A, B)) --> (add A, B)
1383 if (match(&I
, m_c_BinOp(m_Or(m_Value(A
), m_Value(B
)),
1384 m_c_And(m_Deferred(A
), m_Deferred(B
))))) {
1390 // TODO(jingyue): Consider willNotOverflowSignedAdd and
1391 // willNotOverflowUnsignedAdd to reduce the number of invocations of
1392 // computeKnownBits.
1393 bool Changed
= false;
1394 if (!I
.hasNoSignedWrap() && willNotOverflowSignedAdd(LHS
, RHS
, I
)) {
1396 I
.setHasNoSignedWrap(true);
1398 if (!I
.hasNoUnsignedWrap() && willNotOverflowUnsignedAdd(LHS
, RHS
, I
)) {
1400 I
.setHasNoUnsignedWrap(true);
1403 if (Instruction
*V
= canonicalizeLowbitMask(I
, Builder
))
1406 if (Instruction
*V
=
1407 canonicalizeCondSignextOfHighBitExtractToSignextHighBitExtract(I
))
1410 if (Instruction
*SatAdd
= foldToUnsignedSaturatedAdd(I
))
1413 return Changed
? &I
: nullptr;
1416 /// Eliminate an op from a linear interpolation (lerp) pattern.
1417 static Instruction
*factorizeLerp(BinaryOperator
&I
,
1418 InstCombiner::BuilderTy
&Builder
) {
1420 if (!match(&I
, m_c_FAdd(m_OneUse(m_c_FMul(m_Value(Y
),
1421 m_OneUse(m_FSub(m_FPOne(),
1423 m_OneUse(m_c_FMul(m_Value(X
), m_Deferred(Z
))))))
1426 // (Y * (1.0 - Z)) + (X * Z) --> Y + Z * (X - Y) [8 commuted variants]
1427 Value
*XY
= Builder
.CreateFSubFMF(X
, Y
, &I
);
1428 Value
*MulZ
= Builder
.CreateFMulFMF(Z
, XY
, &I
);
1429 return BinaryOperator::CreateFAddFMF(Y
, MulZ
, &I
);
1432 /// Factor a common operand out of fadd/fsub of fmul/fdiv.
1433 static Instruction
*factorizeFAddFSub(BinaryOperator
&I
,
1434 InstCombiner::BuilderTy
&Builder
) {
1435 assert((I
.getOpcode() == Instruction::FAdd
||
1436 I
.getOpcode() == Instruction::FSub
) && "Expecting fadd/fsub");
1437 assert(I
.hasAllowReassoc() && I
.hasNoSignedZeros() &&
1438 "FP factorization requires FMF");
1440 if (Instruction
*Lerp
= factorizeLerp(I
, Builder
))
1443 Value
*Op0
= I
.getOperand(0), *Op1
= I
.getOperand(1);
1446 if ((match(Op0
, m_OneUse(m_FMul(m_Value(X
), m_Value(Z
)))) &&
1447 match(Op1
, m_OneUse(m_c_FMul(m_Value(Y
), m_Specific(Z
))))) ||
1448 (match(Op0
, m_OneUse(m_FMul(m_Value(Z
), m_Value(X
)))) &&
1449 match(Op1
, m_OneUse(m_c_FMul(m_Value(Y
), m_Specific(Z
))))))
1451 else if (match(Op0
, m_OneUse(m_FDiv(m_Value(X
), m_Value(Z
)))) &&
1452 match(Op1
, m_OneUse(m_FDiv(m_Value(Y
), m_Specific(Z
)))))
1457 // (X * Z) + (Y * Z) --> (X + Y) * Z
1458 // (X * Z) - (Y * Z) --> (X - Y) * Z
1459 // (X / Z) + (Y / Z) --> (X + Y) / Z
1460 // (X / Z) - (Y / Z) --> (X - Y) / Z
1461 bool IsFAdd
= I
.getOpcode() == Instruction::FAdd
;
1462 Value
*XY
= IsFAdd
? Builder
.CreateFAddFMF(X
, Y
, &I
)
1463 : Builder
.CreateFSubFMF(X
, Y
, &I
);
1465 // Bail out if we just created a denormal constant.
1466 // TODO: This is copied from a previous implementation. Is it necessary?
1468 if (match(XY
, m_APFloat(C
)) && !C
->isNormal())
1471 return IsFMul
? BinaryOperator::CreateFMulFMF(XY
, Z
, &I
)
1472 : BinaryOperator::CreateFDivFMF(XY
, Z
, &I
);
1475 Instruction
*InstCombiner::visitFAdd(BinaryOperator
&I
) {
1476 if (Value
*V
= SimplifyFAddInst(I
.getOperand(0), I
.getOperand(1),
1477 I
.getFastMathFlags(),
1478 SQ
.getWithInstruction(&I
)))
1479 return replaceInstUsesWith(I
, V
);
1481 if (SimplifyAssociativeOrCommutative(I
))
1484 if (Instruction
*X
= foldVectorBinop(I
))
1487 if (Instruction
*FoldedFAdd
= foldBinOpIntoSelectOrPhi(I
))
1490 // (-X) + Y --> Y - X
1492 if (match(&I
, m_c_FAdd(m_FNeg(m_Value(X
)), m_Value(Y
))))
1493 return BinaryOperator::CreateFSubFMF(Y
, X
, &I
);
1495 // Similar to above, but look through fmul/fdiv for the negated term.
1496 // (-X * Y) + Z --> Z - (X * Y) [4 commuted variants]
1498 if (match(&I
, m_c_FAdd(m_OneUse(m_c_FMul(m_FNeg(m_Value(X
)), m_Value(Y
))),
1500 Value
*XY
= Builder
.CreateFMulFMF(X
, Y
, &I
);
1501 return BinaryOperator::CreateFSubFMF(Z
, XY
, &I
);
1503 // (-X / Y) + Z --> Z - (X / Y) [2 commuted variants]
1504 // (X / -Y) + Z --> Z - (X / Y) [2 commuted variants]
1505 if (match(&I
, m_c_FAdd(m_OneUse(m_FDiv(m_FNeg(m_Value(X
)), m_Value(Y
))),
1507 match(&I
, m_c_FAdd(m_OneUse(m_FDiv(m_Value(X
), m_FNeg(m_Value(Y
)))),
1509 Value
*XY
= Builder
.CreateFDivFMF(X
, Y
, &I
);
1510 return BinaryOperator::CreateFSubFMF(Z
, XY
, &I
);
1513 // Check for (fadd double (sitofp x), y), see if we can merge this into an
1514 // integer add followed by a promotion.
1515 Value
*LHS
= I
.getOperand(0), *RHS
= I
.getOperand(1);
1516 if (SIToFPInst
*LHSConv
= dyn_cast
<SIToFPInst
>(LHS
)) {
1517 Value
*LHSIntVal
= LHSConv
->getOperand(0);
1518 Type
*FPType
= LHSConv
->getType();
1520 // TODO: This check is overly conservative. In many cases known bits
1521 // analysis can tell us that the result of the addition has less significant
1522 // bits than the integer type can hold.
1523 auto IsValidPromotion
= [](Type
*FTy
, Type
*ITy
) {
1524 Type
*FScalarTy
= FTy
->getScalarType();
1525 Type
*IScalarTy
= ITy
->getScalarType();
1527 // Do we have enough bits in the significand to represent the result of
1528 // the integer addition?
1529 unsigned MaxRepresentableBits
=
1530 APFloat::semanticsPrecision(FScalarTy
->getFltSemantics());
1531 return IScalarTy
->getIntegerBitWidth() <= MaxRepresentableBits
;
1534 // (fadd double (sitofp x), fpcst) --> (sitofp (add int x, intcst))
1535 // ... if the constant fits in the integer value. This is useful for things
1536 // like (double)(x & 1234) + 4.0 -> (double)((X & 1234)+4) which no longer
1537 // requires a constant pool load, and generally allows the add to be better
1539 if (ConstantFP
*CFP
= dyn_cast
<ConstantFP
>(RHS
))
1540 if (IsValidPromotion(FPType
, LHSIntVal
->getType())) {
1542 ConstantExpr::getFPToSI(CFP
, LHSIntVal
->getType());
1543 if (LHSConv
->hasOneUse() &&
1544 ConstantExpr::getSIToFP(CI
, I
.getType()) == CFP
&&
1545 willNotOverflowSignedAdd(LHSIntVal
, CI
, I
)) {
1546 // Insert the new integer add.
1547 Value
*NewAdd
= Builder
.CreateNSWAdd(LHSIntVal
, CI
, "addconv");
1548 return new SIToFPInst(NewAdd
, I
.getType());
1552 // (fadd double (sitofp x), (sitofp y)) --> (sitofp (add int x, y))
1553 if (SIToFPInst
*RHSConv
= dyn_cast
<SIToFPInst
>(RHS
)) {
1554 Value
*RHSIntVal
= RHSConv
->getOperand(0);
1555 // It's enough to check LHS types only because we require int types to
1556 // be the same for this transform.
1557 if (IsValidPromotion(FPType
, LHSIntVal
->getType())) {
1558 // Only do this if x/y have the same type, if at least one of them has a
1559 // single use (so we don't increase the number of int->fp conversions),
1560 // and if the integer add will not overflow.
1561 if (LHSIntVal
->getType() == RHSIntVal
->getType() &&
1562 (LHSConv
->hasOneUse() || RHSConv
->hasOneUse()) &&
1563 willNotOverflowSignedAdd(LHSIntVal
, RHSIntVal
, I
)) {
1564 // Insert the new integer add.
1565 Value
*NewAdd
= Builder
.CreateNSWAdd(LHSIntVal
, RHSIntVal
, "addconv");
1566 return new SIToFPInst(NewAdd
, I
.getType());
1572 // Handle specials cases for FAdd with selects feeding the operation
1573 if (Value
*V
= SimplifySelectsFeedingBinaryOp(I
, LHS
, RHS
))
1574 return replaceInstUsesWith(I
, V
);
1576 if (I
.hasAllowReassoc() && I
.hasNoSignedZeros()) {
1577 if (Instruction
*F
= factorizeFAddFSub(I
, Builder
))
1579 if (Value
*V
= FAddCombine(Builder
).simplify(&I
))
1580 return replaceInstUsesWith(I
, V
);
1586 /// Optimize pointer differences into the same array into a size. Consider:
1587 /// &A[10] - &A[0]: we should compile this to "10". LHS/RHS are the pointer
1588 /// operands to the ptrtoint instructions for the LHS/RHS of the subtract.
1589 Value
*InstCombiner::OptimizePointerDifference(Value
*LHS
, Value
*RHS
,
1591 // If LHS is a gep based on RHS or RHS is a gep based on LHS, we can optimize
1593 bool Swapped
= false;
1594 GEPOperator
*GEP1
= nullptr, *GEP2
= nullptr;
1596 // For now we require one side to be the base pointer "A" or a constant
1597 // GEP derived from it.
1598 if (GEPOperator
*LHSGEP
= dyn_cast
<GEPOperator
>(LHS
)) {
1600 if (LHSGEP
->getOperand(0) == RHS
) {
1603 } else if (GEPOperator
*RHSGEP
= dyn_cast
<GEPOperator
>(RHS
)) {
1604 // (gep X, ...) - (gep X, ...)
1605 if (LHSGEP
->getOperand(0)->stripPointerCasts() ==
1606 RHSGEP
->getOperand(0)->stripPointerCasts()) {
1614 if (GEPOperator
*RHSGEP
= dyn_cast
<GEPOperator
>(RHS
)) {
1616 if (RHSGEP
->getOperand(0) == LHS
) {
1619 } else if (GEPOperator
*LHSGEP
= dyn_cast
<GEPOperator
>(LHS
)) {
1620 // (gep X, ...) - (gep X, ...)
1621 if (RHSGEP
->getOperand(0)->stripPointerCasts() ==
1622 LHSGEP
->getOperand(0)->stripPointerCasts()) {
1635 // (gep X, ...) - (gep X, ...)
1637 // Avoid duplicating the arithmetic if there are more than one non-constant
1638 // indices between the two GEPs and either GEP has a non-constant index and
1639 // multiple users. If zero non-constant index, the result is a constant and
1640 // there is no duplication. If one non-constant index, the result is an add
1641 // or sub with a constant, which is no larger than the original code, and
1642 // there's no duplicated arithmetic, even if either GEP has multiple
1643 // users. If more than one non-constant indices combined, as long as the GEP
1644 // with at least one non-constant index doesn't have multiple users, there
1645 // is no duplication.
1646 unsigned NumNonConstantIndices1
= GEP1
->countNonConstantIndices();
1647 unsigned NumNonConstantIndices2
= GEP2
->countNonConstantIndices();
1648 if (NumNonConstantIndices1
+ NumNonConstantIndices2
> 1 &&
1649 ((NumNonConstantIndices1
> 0 && !GEP1
->hasOneUse()) ||
1650 (NumNonConstantIndices2
> 0 && !GEP2
->hasOneUse()))) {
1655 // Emit the offset of the GEP and an intptr_t.
1656 Value
*Result
= EmitGEPOffset(GEP1
);
1658 // If we had a constant expression GEP on the other side offsetting the
1659 // pointer, subtract it from the offset we have.
1661 Value
*Offset
= EmitGEPOffset(GEP2
);
1662 Result
= Builder
.CreateSub(Result
, Offset
);
1665 // If we have p - gep(p, ...) then we have to negate the result.
1667 Result
= Builder
.CreateNeg(Result
, "diff.neg");
1669 return Builder
.CreateIntCast(Result
, Ty
, true);
1672 Instruction
*InstCombiner::visitSub(BinaryOperator
&I
) {
1673 if (Value
*V
= SimplifySubInst(I
.getOperand(0), I
.getOperand(1),
1674 I
.hasNoSignedWrap(), I
.hasNoUnsignedWrap(),
1675 SQ
.getWithInstruction(&I
)))
1676 return replaceInstUsesWith(I
, V
);
1678 if (Instruction
*X
= foldVectorBinop(I
))
1681 // (A*B)-(A*C) -> A*(B-C) etc
1682 if (Value
*V
= SimplifyUsingDistributiveLaws(I
))
1683 return replaceInstUsesWith(I
, V
);
1685 // If this is a 'B = x-(-A)', change to B = x+A.
1686 Value
*Op0
= I
.getOperand(0), *Op1
= I
.getOperand(1);
1687 if (Value
*V
= dyn_castNegVal(Op1
)) {
1688 BinaryOperator
*Res
= BinaryOperator::CreateAdd(Op0
, V
);
1690 if (const auto *BO
= dyn_cast
<BinaryOperator
>(Op1
)) {
1691 assert(BO
->getOpcode() == Instruction::Sub
&&
1692 "Expected a subtraction operator!");
1693 if (BO
->hasNoSignedWrap() && I
.hasNoSignedWrap())
1694 Res
->setHasNoSignedWrap(true);
1696 if (cast
<Constant
>(Op1
)->isNotMinSignedValue() && I
.hasNoSignedWrap())
1697 Res
->setHasNoSignedWrap(true);
1703 if (I
.getType()->isIntOrIntVectorTy(1))
1704 return BinaryOperator::CreateXor(Op0
, Op1
);
1706 // Replace (-1 - A) with (~A).
1707 if (match(Op0
, m_AllOnes()))
1708 return BinaryOperator::CreateNot(Op1
);
1710 // (~X) - (~Y) --> Y - X
1712 if (match(Op0
, m_Not(m_Value(X
))) && match(Op1
, m_Not(m_Value(Y
))))
1713 return BinaryOperator::CreateSub(Y
, X
);
1715 // (X + -1) - Y --> ~Y + X
1716 if (match(Op0
, m_OneUse(m_Add(m_Value(X
), m_AllOnes()))))
1717 return BinaryOperator::CreateAdd(Builder
.CreateNot(Op1
), X
);
1719 // Y - (X + 1) --> ~X + Y
1720 if (match(Op1
, m_OneUse(m_Add(m_Value(X
), m_One()))))
1721 return BinaryOperator::CreateAdd(Builder
.CreateNot(X
), Op0
);
1723 // Y - ~X --> (X + 1) + Y
1724 if (match(Op1
, m_OneUse(m_Not(m_Value(X
))))) {
1725 return BinaryOperator::CreateAdd(
1726 Builder
.CreateAdd(Op0
, ConstantInt::get(I
.getType(), 1)), X
);
1729 if (Constant
*C
= dyn_cast
<Constant
>(Op0
)) {
1730 bool IsNegate
= match(C
, m_ZeroInt());
1732 if (match(Op1
, m_ZExt(m_Value(X
))) && X
->getType()->isIntOrIntVectorTy(1)) {
1733 // 0 - (zext bool) --> sext bool
1734 // C - (zext bool) --> bool ? C - 1 : C
1736 return CastInst::CreateSExtOrBitCast(X
, I
.getType());
1737 return SelectInst::Create(X
, SubOne(C
), C
);
1739 if (match(Op1
, m_SExt(m_Value(X
))) && X
->getType()->isIntOrIntVectorTy(1)) {
1740 // 0 - (sext bool) --> zext bool
1741 // C - (sext bool) --> bool ? C + 1 : C
1743 return CastInst::CreateZExtOrBitCast(X
, I
.getType());
1744 return SelectInst::Create(X
, AddOne(C
), C
);
1747 // C - ~X == X + (1+C)
1748 if (match(Op1
, m_Not(m_Value(X
))))
1749 return BinaryOperator::CreateAdd(X
, AddOne(C
));
1751 // Try to fold constant sub into select arguments.
1752 if (SelectInst
*SI
= dyn_cast
<SelectInst
>(Op1
))
1753 if (Instruction
*R
= FoldOpIntoSelect(I
, SI
))
1756 // Try to fold constant sub into PHI values.
1757 if (PHINode
*PN
= dyn_cast
<PHINode
>(Op1
))
1758 if (Instruction
*R
= foldOpIntoPhi(I
, PN
))
1763 // C-(C2-X) --> X+(C-C2)
1764 if (match(Op1
, m_Sub(m_Constant(C2
), m_Value(X
))))
1765 return BinaryOperator::CreateAdd(X
, ConstantExpr::getSub(C
, C2
));
1767 // C-(X+C2) --> (C-C2)-X
1768 if (match(Op1
, m_Add(m_Value(X
), m_Constant(C2
))))
1769 return BinaryOperator::CreateSub(ConstantExpr::getSub(C
, C2
), X
);
1773 if (match(Op0
, m_APInt(Op0C
))) {
1775 if (Op0C
->isNullValue()) {
1777 match(Op1
, m_TruncOrSelf(m_Value(Op1Wide
)));
1778 bool HadTrunc
= Op1Wide
!= Op1
;
1779 bool NoTruncOrTruncIsOneUse
= !HadTrunc
|| Op1
->hasOneUse();
1780 unsigned BitWidth
= Op1Wide
->getType()->getScalarSizeInBits();
1784 // -(X >>u 31) -> (X >>s 31)
1785 if (NoTruncOrTruncIsOneUse
&&
1786 match(Op1Wide
, m_LShr(m_Value(X
), m_APInt(ShAmt
))) &&
1787 *ShAmt
== BitWidth
- 1) {
1788 Value
*ShAmtOp
= cast
<Instruction
>(Op1Wide
)->getOperand(1);
1789 Instruction
*NewShift
= BinaryOperator::CreateAShr(X
, ShAmtOp
);
1790 NewShift
->copyIRFlags(Op1Wide
);
1793 Builder
.Insert(NewShift
);
1794 return TruncInst::CreateTruncOrBitCast(NewShift
, Op1
->getType());
1796 // -(X >>s 31) -> (X >>u 31)
1797 if (NoTruncOrTruncIsOneUse
&&
1798 match(Op1Wide
, m_AShr(m_Value(X
), m_APInt(ShAmt
))) &&
1799 *ShAmt
== BitWidth
- 1) {
1800 Value
*ShAmtOp
= cast
<Instruction
>(Op1Wide
)->getOperand(1);
1801 Instruction
*NewShift
= BinaryOperator::CreateLShr(X
, ShAmtOp
);
1802 NewShift
->copyIRFlags(Op1Wide
);
1805 Builder
.Insert(NewShift
);
1806 return TruncInst::CreateTruncOrBitCast(NewShift
, Op1
->getType());
1809 if (!HadTrunc
&& Op1
->hasOneUse()) {
1811 SelectPatternFlavor SPF
= matchSelectPattern(Op1
, LHS
, RHS
).Flavor
;
1812 if (SPF
== SPF_ABS
|| SPF
== SPF_NABS
) {
1813 // This is a negate of an ABS/NABS pattern. Just swap the operands
1815 cast
<SelectInst
>(Op1
)->swapValues();
1816 // Don't swap prof metadata, we didn't change the branch behavior.
1817 return replaceInstUsesWith(I
, Op1
);
1822 // Turn this into a xor if LHS is 2^n-1 and the remaining bits are known
1824 if (Op0C
->isMask()) {
1825 KnownBits RHSKnown
= computeKnownBits(Op1
, 0, &I
);
1826 if ((*Op0C
| RHSKnown
.Zero
).isAllOnesValue())
1827 return BinaryOperator::CreateXor(Op1
, Op0
);
1833 // X-(X+Y) == -Y X-(Y+X) == -Y
1834 if (match(Op1
, m_c_Add(m_Specific(Op0
), m_Value(Y
))))
1835 return BinaryOperator::CreateNeg(Y
);
1838 if (match(Op0
, m_Sub(m_Specific(Op1
), m_Value(Y
))))
1839 return BinaryOperator::CreateNeg(Y
);
1842 // (sub (or A, B) (and A, B)) --> (xor A, B)
1845 if (match(Op1
, m_And(m_Value(A
), m_Value(B
))) &&
1846 match(Op0
, m_c_Or(m_Specific(A
), m_Specific(B
))))
1847 return BinaryOperator::CreateXor(A
, B
);
1850 // (sub (and A, B) (or A, B)) --> neg (xor A, B)
1853 if (match(Op0
, m_And(m_Value(A
), m_Value(B
))) &&
1854 match(Op1
, m_c_Or(m_Specific(A
), m_Specific(B
))) &&
1855 (Op0
->hasOneUse() || Op1
->hasOneUse()))
1856 return BinaryOperator::CreateNeg(Builder
.CreateXor(A
, B
));
1859 // (sub (or A, B), (xor A, B)) --> (and A, B)
1862 if (match(Op1
, m_Xor(m_Value(A
), m_Value(B
))) &&
1863 match(Op0
, m_c_Or(m_Specific(A
), m_Specific(B
))))
1864 return BinaryOperator::CreateAnd(A
, B
);
1867 // (sub (xor A, B) (or A, B)) --> neg (and A, B)
1870 if (match(Op0
, m_Xor(m_Value(A
), m_Value(B
))) &&
1871 match(Op1
, m_c_Or(m_Specific(A
), m_Specific(B
))) &&
1872 (Op0
->hasOneUse() || Op1
->hasOneUse()))
1873 return BinaryOperator::CreateNeg(Builder
.CreateAnd(A
, B
));
1878 // ((X | Y) - X) --> (~X & Y)
1879 if (match(Op0
, m_OneUse(m_c_Or(m_Value(Y
), m_Specific(Op1
)))))
1880 return BinaryOperator::CreateAnd(
1881 Y
, Builder
.CreateNot(Op1
, Op1
->getName() + ".not"));
1884 if (Op1
->hasOneUse()) {
1885 Value
*X
= nullptr, *Y
= nullptr, *Z
= nullptr;
1886 Constant
*C
= nullptr;
1888 // (X - (Y - Z)) --> (X + (Z - Y)).
1889 if (match(Op1
, m_Sub(m_Value(Y
), m_Value(Z
))))
1890 return BinaryOperator::CreateAdd(Op0
,
1891 Builder
.CreateSub(Z
, Y
, Op1
->getName()));
1893 // (X - (X & Y)) --> (X & ~Y)
1894 if (match(Op1
, m_c_And(m_Value(Y
), m_Specific(Op0
))))
1895 return BinaryOperator::CreateAnd(Op0
,
1896 Builder
.CreateNot(Y
, Y
->getName() + ".not"));
1898 // 0 - (X sdiv C) -> (X sdiv -C) provided the negation doesn't overflow.
1899 // TODO: This could be extended to match arbitrary vector constants.
1901 if (match(Op0
, m_Zero()) && match(Op1
, m_SDiv(m_Value(X
), m_APInt(DivC
))) &&
1902 !DivC
->isMinSignedValue() && *DivC
!= 1) {
1903 Constant
*NegDivC
= ConstantInt::get(I
.getType(), -(*DivC
));
1904 Instruction
*BO
= BinaryOperator::CreateSDiv(X
, NegDivC
);
1905 BO
->setIsExact(cast
<BinaryOperator
>(Op1
)->isExact());
1909 // 0 - (X << Y) -> (-X << Y) when X is freely negatable.
1910 if (match(Op1
, m_Shl(m_Value(X
), m_Value(Y
))) && match(Op0
, m_Zero()))
1911 if (Value
*XNeg
= dyn_castNegVal(X
))
1912 return BinaryOperator::CreateShl(XNeg
, Y
);
1914 // Subtracting -1/0 is the same as adding 1/0:
1915 // sub [nsw] Op0, sext(bool Y) -> add [nsw] Op0, zext(bool Y)
1916 // 'nuw' is dropped in favor of the canonical form.
1917 if (match(Op1
, m_SExt(m_Value(Y
))) &&
1918 Y
->getType()->getScalarSizeInBits() == 1) {
1919 Value
*Zext
= Builder
.CreateZExt(Y
, I
.getType());
1920 BinaryOperator
*Add
= BinaryOperator::CreateAdd(Op0
, Zext
);
1921 Add
->setHasNoSignedWrap(I
.hasNoSignedWrap());
1925 // X - A*-B -> X + A*B
1926 // X - -A*B -> X + A*B
1928 if (match(Op1
, m_c_Mul(m_Value(A
), m_Neg(m_Value(B
)))))
1929 return BinaryOperator::CreateAdd(Op0
, Builder
.CreateMul(A
, B
));
1931 // X - A*C -> X + A*-C
1932 // No need to handle commuted multiply because multiply handling will
1933 // ensure constant will be move to the right hand side.
1934 if (match(Op1
, m_Mul(m_Value(A
), m_Constant(C
))) && !isa
<ConstantExpr
>(C
)) {
1935 Value
*NewMul
= Builder
.CreateMul(A
, ConstantExpr::getNeg(C
));
1936 return BinaryOperator::CreateAdd(Op0
, NewMul
);
1941 // ~A - Min/Max(~A, O) -> Max/Min(A, ~O) - A
1942 // ~A - Min/Max(O, ~A) -> Max/Min(A, ~O) - A
1943 // Min/Max(~A, O) - ~A -> A - Max/Min(A, ~O)
1944 // Min/Max(O, ~A) - ~A -> A - Max/Min(A, ~O)
1945 // So long as O here is freely invertible, this will be neutral or a win.
1946 Value
*LHS
, *RHS
, *A
;
1947 Value
*NotA
= Op0
, *MinMax
= Op1
;
1948 SelectPatternFlavor SPF
= matchSelectPattern(MinMax
, LHS
, RHS
).Flavor
;
1949 if (!SelectPatternResult::isMinOrMax(SPF
)) {
1952 SPF
= matchSelectPattern(MinMax
, LHS
, RHS
).Flavor
;
1954 if (SelectPatternResult::isMinOrMax(SPF
) &&
1955 match(NotA
, m_Not(m_Value(A
))) && (NotA
== LHS
|| NotA
== RHS
)) {
1957 std::swap(LHS
, RHS
);
1958 // LHS is now O above and expected to have at least 2 uses (the min/max)
1959 // NotA is epected to have 2 uses from the min/max and 1 from the sub.
1960 if (isFreeToInvert(LHS
, !LHS
->hasNUsesOrMore(3)) &&
1961 !NotA
->hasNUsesOrMore(4)) {
1962 // Note: We don't generate the inverse max/min, just create the not of
1963 // it and let other folds do the rest.
1964 Value
*Not
= Builder
.CreateNot(MinMax
);
1966 return BinaryOperator::CreateSub(Not
, A
);
1968 return BinaryOperator::CreateSub(A
, Not
);
1973 // Optimize pointer differences into the same array into a size. Consider:
1974 // &A[10] - &A[0]: we should compile this to "10".
1975 Value
*LHSOp
, *RHSOp
;
1976 if (match(Op0
, m_PtrToInt(m_Value(LHSOp
))) &&
1977 match(Op1
, m_PtrToInt(m_Value(RHSOp
))))
1978 if (Value
*Res
= OptimizePointerDifference(LHSOp
, RHSOp
, I
.getType()))
1979 return replaceInstUsesWith(I
, Res
);
1981 // trunc(p)-trunc(q) -> trunc(p-q)
1982 if (match(Op0
, m_Trunc(m_PtrToInt(m_Value(LHSOp
)))) &&
1983 match(Op1
, m_Trunc(m_PtrToInt(m_Value(RHSOp
)))))
1984 if (Value
*Res
= OptimizePointerDifference(LHSOp
, RHSOp
, I
.getType()))
1985 return replaceInstUsesWith(I
, Res
);
1987 // Canonicalize a shifty way to code absolute value to the common pattern.
1988 // There are 2 potential commuted variants.
1989 // We're relying on the fact that we only do this transform when the shift has
1990 // exactly 2 uses and the xor has exactly 1 use (otherwise, we might increase
1994 Type
*Ty
= I
.getType();
1995 if (match(Op1
, m_AShr(m_Value(A
), m_APInt(ShAmt
))) &&
1996 Op1
->hasNUses(2) && *ShAmt
== Ty
->getScalarSizeInBits() - 1 &&
1997 match(Op0
, m_OneUse(m_c_Xor(m_Specific(A
), m_Specific(Op1
))))) {
1998 // B = ashr i32 A, 31 ; smear the sign bit
1999 // sub (xor A, B), B ; flip bits if negative and subtract -1 (add 1)
2000 // --> (A < 0) ? -A : A
2001 Value
*Cmp
= Builder
.CreateICmpSLT(A
, ConstantInt::getNullValue(Ty
));
2002 // Copy the nuw/nsw flags from the sub to the negate.
2003 Value
*Neg
= Builder
.CreateNeg(A
, "", I
.hasNoUnsignedWrap(),
2004 I
.hasNoSignedWrap());
2005 return SelectInst::Create(Cmp
, Neg
, A
);
2008 if (Instruction
*V
=
2009 canonicalizeCondSignextOfHighBitExtractToSignextHighBitExtract(I
))
2012 if (Instruction
*Ext
= narrowMathIfNoOverflow(I
))
2015 bool Changed
= false;
2016 if (!I
.hasNoSignedWrap() && willNotOverflowSignedSub(Op0
, Op1
, I
)) {
2018 I
.setHasNoSignedWrap(true);
2020 if (!I
.hasNoUnsignedWrap() && willNotOverflowUnsignedSub(Op0
, Op1
, I
)) {
2022 I
.setHasNoUnsignedWrap(true);
2025 return Changed
? &I
: nullptr;
2028 /// This eliminates floating-point negation in either 'fneg(X)' or
2029 /// 'fsub(-0.0, X)' form by combining into a constant operand.
2030 static Instruction
*foldFNegIntoConstant(Instruction
&I
) {
2034 // Fold negation into constant operand. This is limited with one-use because
2035 // fneg is assumed better for analysis and cheaper in codegen than fmul/fdiv.
2036 // -(X * C) --> X * (-C)
2037 // FIXME: It's arguable whether these should be m_OneUse or not. The current
2038 // belief is that the FNeg allows for better reassociation opportunities.
2039 if (match(&I
, m_FNeg(m_OneUse(m_FMul(m_Value(X
), m_Constant(C
))))))
2040 return BinaryOperator::CreateFMulFMF(X
, ConstantExpr::getFNeg(C
), &I
);
2041 // -(X / C) --> X / (-C)
2042 if (match(&I
, m_FNeg(m_OneUse(m_FDiv(m_Value(X
), m_Constant(C
))))))
2043 return BinaryOperator::CreateFDivFMF(X
, ConstantExpr::getFNeg(C
), &I
);
2044 // -(C / X) --> (-C) / X
2045 if (match(&I
, m_FNeg(m_OneUse(m_FDiv(m_Constant(C
), m_Value(X
))))))
2046 return BinaryOperator::CreateFDivFMF(ConstantExpr::getFNeg(C
), X
, &I
);
2051 static Instruction
*hoistFNegAboveFMulFDiv(Instruction
&I
,
2052 InstCombiner::BuilderTy
&Builder
) {
2054 if (!match(&I
, m_FNeg(m_Value(FNeg
))))
2058 if (match(FNeg
, m_OneUse(m_FMul(m_Value(X
), m_Value(Y
)))))
2059 return BinaryOperator::CreateFMulFMF(Builder
.CreateFNegFMF(X
, &I
), Y
, &I
);
2061 if (match(FNeg
, m_OneUse(m_FDiv(m_Value(X
), m_Value(Y
)))))
2062 return BinaryOperator::CreateFDivFMF(Builder
.CreateFNegFMF(X
, &I
), Y
, &I
);
2067 Instruction
*InstCombiner::visitFNeg(UnaryOperator
&I
) {
2068 Value
*Op
= I
.getOperand(0);
2070 if (Value
*V
= SimplifyFNegInst(Op
, I
.getFastMathFlags(),
2071 SQ
.getWithInstruction(&I
)))
2072 return replaceInstUsesWith(I
, V
);
2074 if (Instruction
*X
= foldFNegIntoConstant(I
))
2079 // If we can ignore the sign of zeros: -(X - Y) --> (Y - X)
2080 if (I
.hasNoSignedZeros() &&
2081 match(Op
, m_OneUse(m_FSub(m_Value(X
), m_Value(Y
)))))
2082 return BinaryOperator::CreateFSubFMF(Y
, X
, &I
);
2084 if (Instruction
*R
= hoistFNegAboveFMulFDiv(I
, Builder
))
2090 Instruction
*InstCombiner::visitFSub(BinaryOperator
&I
) {
2091 if (Value
*V
= SimplifyFSubInst(I
.getOperand(0), I
.getOperand(1),
2092 I
.getFastMathFlags(),
2093 SQ
.getWithInstruction(&I
)))
2094 return replaceInstUsesWith(I
, V
);
2096 if (Instruction
*X
= foldVectorBinop(I
))
2099 // Subtraction from -0.0 is the canonical form of fneg.
2100 // fsub nsz 0, X ==> fsub nsz -0.0, X
2101 Value
*Op0
= I
.getOperand(0), *Op1
= I
.getOperand(1);
2102 if (I
.hasNoSignedZeros() && match(Op0
, m_PosZeroFP()))
2103 return BinaryOperator::CreateFNegFMF(Op1
, &I
);
2105 if (Instruction
*X
= foldFNegIntoConstant(I
))
2108 if (Instruction
*R
= hoistFNegAboveFMulFDiv(I
, Builder
))
2114 // If Op0 is not -0.0 or we can ignore -0.0: Z - (X - Y) --> Z + (Y - X)
2115 // Canonicalize to fadd to make analysis easier.
2116 // This can also help codegen because fadd is commutative.
2117 // Note that if this fsub was really an fneg, the fadd with -0.0 will get
2118 // killed later. We still limit that particular transform with 'hasOneUse'
2119 // because an fneg is assumed better/cheaper than a generic fsub.
2120 if (I
.hasNoSignedZeros() || CannotBeNegativeZero(Op0
, SQ
.TLI
)) {
2121 if (match(Op1
, m_OneUse(m_FSub(m_Value(X
), m_Value(Y
))))) {
2122 Value
*NewSub
= Builder
.CreateFSubFMF(Y
, X
, &I
);
2123 return BinaryOperator::CreateFAddFMF(Op0
, NewSub
, &I
);
2127 if (isa
<Constant
>(Op0
))
2128 if (SelectInst
*SI
= dyn_cast
<SelectInst
>(Op1
))
2129 if (Instruction
*NV
= FoldOpIntoSelect(I
, SI
))
2132 // X - C --> X + (-C)
2133 // But don't transform constant expressions because there's an inverse fold
2134 // for X + (-Y) --> X - Y.
2135 if (match(Op1
, m_Constant(C
)) && !isa
<ConstantExpr
>(Op1
))
2136 return BinaryOperator::CreateFAddFMF(Op0
, ConstantExpr::getFNeg(C
), &I
);
2138 // X - (-Y) --> X + Y
2139 if (match(Op1
, m_FNeg(m_Value(Y
))))
2140 return BinaryOperator::CreateFAddFMF(Op0
, Y
, &I
);
2142 // Similar to above, but look through a cast of the negated value:
2143 // X - (fptrunc(-Y)) --> X + fptrunc(Y)
2144 Type
*Ty
= I
.getType();
2145 if (match(Op1
, m_OneUse(m_FPTrunc(m_FNeg(m_Value(Y
))))))
2146 return BinaryOperator::CreateFAddFMF(Op0
, Builder
.CreateFPTrunc(Y
, Ty
), &I
);
2148 // X - (fpext(-Y)) --> X + fpext(Y)
2149 if (match(Op1
, m_OneUse(m_FPExt(m_FNeg(m_Value(Y
))))))
2150 return BinaryOperator::CreateFAddFMF(Op0
, Builder
.CreateFPExt(Y
, Ty
), &I
);
2152 // Similar to above, but look through fmul/fdiv of the negated value:
2153 // Op0 - (-X * Y) --> Op0 + (X * Y)
2154 // Op0 - (Y * -X) --> Op0 + (X * Y)
2155 if (match(Op1
, m_OneUse(m_c_FMul(m_FNeg(m_Value(X
)), m_Value(Y
))))) {
2156 Value
*FMul
= Builder
.CreateFMulFMF(X
, Y
, &I
);
2157 return BinaryOperator::CreateFAddFMF(Op0
, FMul
, &I
);
2159 // Op0 - (-X / Y) --> Op0 + (X / Y)
2160 // Op0 - (X / -Y) --> Op0 + (X / Y)
2161 if (match(Op1
, m_OneUse(m_FDiv(m_FNeg(m_Value(X
)), m_Value(Y
)))) ||
2162 match(Op1
, m_OneUse(m_FDiv(m_Value(X
), m_FNeg(m_Value(Y
)))))) {
2163 Value
*FDiv
= Builder
.CreateFDivFMF(X
, Y
, &I
);
2164 return BinaryOperator::CreateFAddFMF(Op0
, FDiv
, &I
);
2167 // Handle special cases for FSub with selects feeding the operation
2168 if (Value
*V
= SimplifySelectsFeedingBinaryOp(I
, Op0
, Op1
))
2169 return replaceInstUsesWith(I
, V
);
2171 if (I
.hasAllowReassoc() && I
.hasNoSignedZeros()) {
2172 // (Y - X) - Y --> -X
2173 if (match(Op0
, m_FSub(m_Specific(Op1
), m_Value(X
))))
2174 return BinaryOperator::CreateFNegFMF(X
, &I
);
2176 // Y - (X + Y) --> -X
2177 // Y - (Y + X) --> -X
2178 if (match(Op1
, m_c_FAdd(m_Specific(Op0
), m_Value(X
))))
2179 return BinaryOperator::CreateFNegFMF(X
, &I
);
2181 // (X * C) - X --> X * (C - 1.0)
2182 if (match(Op0
, m_FMul(m_Specific(Op1
), m_Constant(C
)))) {
2183 Constant
*CSubOne
= ConstantExpr::getFSub(C
, ConstantFP::get(Ty
, 1.0));
2184 return BinaryOperator::CreateFMulFMF(Op1
, CSubOne
, &I
);
2186 // X - (X * C) --> X * (1.0 - C)
2187 if (match(Op1
, m_FMul(m_Specific(Op0
), m_Constant(C
)))) {
2188 Constant
*OneSubC
= ConstantExpr::getFSub(ConstantFP::get(Ty
, 1.0), C
);
2189 return BinaryOperator::CreateFMulFMF(Op0
, OneSubC
, &I
);
2192 if (Instruction
*F
= factorizeFAddFSub(I
, Builder
))
2195 // TODO: This performs reassociative folds for FP ops. Some fraction of the
2196 // functionality has been subsumed by simple pattern matching here and in
2197 // InstSimplify. We should let a dedicated reassociation pass handle more
2198 // complex pattern matching and remove this from InstCombine.
2199 if (Value
*V
= FAddCombine(Builder
).simplify(&I
))
2200 return replaceInstUsesWith(I
, V
);