1 //===- InstCombineSimplifyDemanded.cpp ------------------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains logic for simplifying instructions based on information
11 // about how they are used.
13 //===----------------------------------------------------------------------===//
16 #include "InstCombine.h"
17 #include "llvm/Target/TargetData.h"
18 #include "llvm/IntrinsicInst.h"
23 /// ShrinkDemandedConstant - Check to see if the specified operand of the
24 /// specified instruction is a constant integer. If so, check to see if there
25 /// are any bits set in the constant that are not demanded. If so, shrink the
26 /// constant and return true.
27 static bool ShrinkDemandedConstant(Instruction
*I
, unsigned OpNo
,
29 assert(I
&& "No instruction?");
30 assert(OpNo
< I
->getNumOperands() && "Operand index too large");
32 // If the operand is not a constant integer, nothing to do.
33 ConstantInt
*OpC
= dyn_cast
<ConstantInt
>(I
->getOperand(OpNo
));
34 if (!OpC
) return false;
36 // If there are no bits set that aren't demanded, nothing to do.
37 Demanded
= Demanded
.zextOrTrunc(OpC
->getValue().getBitWidth());
38 if ((~Demanded
& OpC
->getValue()) == 0)
41 // This instruction is producing bits that are not demanded. Shrink the RHS.
42 Demanded
&= OpC
->getValue();
43 I
->setOperand(OpNo
, ConstantInt::get(OpC
->getType(), Demanded
));
49 /// SimplifyDemandedInstructionBits - Inst is an integer instruction that
50 /// SimplifyDemandedBits knows about. See if the instruction has any
51 /// properties that allow us to simplify its operands.
52 bool InstCombiner::SimplifyDemandedInstructionBits(Instruction
&Inst
) {
53 unsigned BitWidth
= Inst
.getType()->getScalarSizeInBits();
54 APInt
KnownZero(BitWidth
, 0), KnownOne(BitWidth
, 0);
55 APInt
DemandedMask(APInt::getAllOnesValue(BitWidth
));
57 Value
*V
= SimplifyDemandedUseBits(&Inst
, DemandedMask
,
58 KnownZero
, KnownOne
, 0);
59 if (V
== 0) return false;
60 if (V
== &Inst
) return true;
61 ReplaceInstUsesWith(Inst
, V
);
65 /// SimplifyDemandedBits - This form of SimplifyDemandedBits simplifies the
66 /// specified instruction operand if possible, updating it in place. It returns
67 /// true if it made any change and false otherwise.
68 bool InstCombiner::SimplifyDemandedBits(Use
&U
, APInt DemandedMask
,
69 APInt
&KnownZero
, APInt
&KnownOne
,
71 Value
*NewVal
= SimplifyDemandedUseBits(U
.get(), DemandedMask
,
72 KnownZero
, KnownOne
, Depth
);
73 if (NewVal
== 0) return false;
79 /// SimplifyDemandedUseBits - This function attempts to replace V with a simpler
80 /// value based on the demanded bits. When this function is called, it is known
81 /// that only the bits set in DemandedMask of the result of V are ever used
82 /// downstream. Consequently, depending on the mask and V, it may be possible
83 /// to replace V with a constant or one of its operands. In such cases, this
84 /// function does the replacement and returns true. In all other cases, it
85 /// returns false after analyzing the expression and setting KnownOne and known
86 /// to be one in the expression. KnownZero contains all the bits that are known
87 /// to be zero in the expression. These are provided to potentially allow the
88 /// caller (which might recursively be SimplifyDemandedBits itself) to simplify
89 /// the expression. KnownOne and KnownZero always follow the invariant that
90 /// KnownOne & KnownZero == 0. That is, a bit can't be both 1 and 0. Note that
91 /// the bits in KnownOne and KnownZero may only be accurate for those bits set
92 /// in DemandedMask. Note also that the bitwidth of V, DemandedMask, KnownZero
93 /// and KnownOne must all be the same.
95 /// This returns null if it did not change anything and it permits no
96 /// simplification. This returns V itself if it did some simplification of V's
97 /// operands based on the information about what bits are demanded. This returns
98 /// some other non-null value if it found out that V is equal to another value
99 /// in the context where the specified bits are demanded, but not for all users.
100 Value
*InstCombiner::SimplifyDemandedUseBits(Value
*V
, APInt DemandedMask
,
101 APInt
&KnownZero
, APInt
&KnownOne
,
103 assert(V
!= 0 && "Null pointer of Value???");
104 assert(Depth
<= 6 && "Limit Search Depth");
105 uint32_t BitWidth
= DemandedMask
.getBitWidth();
106 const Type
*VTy
= V
->getType();
107 assert((TD
|| !VTy
->isPointerTy()) &&
108 "SimplifyDemandedBits needs to know bit widths!");
109 assert((!TD
|| TD
->getTypeSizeInBits(VTy
->getScalarType()) == BitWidth
) &&
110 (!VTy
->isIntOrIntVectorTy() ||
111 VTy
->getScalarSizeInBits() == BitWidth
) &&
112 KnownZero
.getBitWidth() == BitWidth
&&
113 KnownOne
.getBitWidth() == BitWidth
&&
114 "Value *V, DemandedMask, KnownZero and KnownOne "
115 "must have same BitWidth");
116 if (ConstantInt
*CI
= dyn_cast
<ConstantInt
>(V
)) {
117 // We know all of the bits for a constant!
118 KnownOne
= CI
->getValue() & DemandedMask
;
119 KnownZero
= ~KnownOne
& DemandedMask
;
122 if (isa
<ConstantPointerNull
>(V
)) {
123 // We know all of the bits for a constant!
124 KnownOne
.clearAllBits();
125 KnownZero
= DemandedMask
;
129 KnownZero
.clearAllBits();
130 KnownOne
.clearAllBits();
131 if (DemandedMask
== 0) { // Not demanding any bits from V.
132 if (isa
<UndefValue
>(V
))
134 return UndefValue::get(VTy
);
137 if (Depth
== 6) // Limit search depth.
140 APInt
LHSKnownZero(BitWidth
, 0), LHSKnownOne(BitWidth
, 0);
141 APInt
RHSKnownZero(BitWidth
, 0), RHSKnownOne(BitWidth
, 0);
143 Instruction
*I
= dyn_cast
<Instruction
>(V
);
145 ComputeMaskedBits(V
, DemandedMask
, KnownZero
, KnownOne
, Depth
);
146 return 0; // Only analyze instructions.
149 // If there are multiple uses of this value and we aren't at the root, then
150 // we can't do any simplifications of the operands, because DemandedMask
151 // only reflects the bits demanded by *one* of the users.
152 if (Depth
!= 0 && !I
->hasOneUse()) {
153 // Despite the fact that we can't simplify this instruction in all User's
154 // context, we can at least compute the knownzero/knownone bits, and we can
155 // do simplifications that apply to *just* the one user if we know that
156 // this instruction has a simpler value in that context.
157 if (I
->getOpcode() == Instruction::And
) {
158 // If either the LHS or the RHS are Zero, the result is zero.
159 ComputeMaskedBits(I
->getOperand(1), DemandedMask
,
160 RHSKnownZero
, RHSKnownOne
, Depth
+1);
161 ComputeMaskedBits(I
->getOperand(0), DemandedMask
& ~RHSKnownZero
,
162 LHSKnownZero
, LHSKnownOne
, Depth
+1);
164 // If all of the demanded bits are known 1 on one side, return the other.
165 // These bits cannot contribute to the result of the 'and' in this
167 if ((DemandedMask
& ~LHSKnownZero
& RHSKnownOne
) ==
168 (DemandedMask
& ~LHSKnownZero
))
169 return I
->getOperand(0);
170 if ((DemandedMask
& ~RHSKnownZero
& LHSKnownOne
) ==
171 (DemandedMask
& ~RHSKnownZero
))
172 return I
->getOperand(1);
174 // If all of the demanded bits in the inputs are known zeros, return zero.
175 if ((DemandedMask
& (RHSKnownZero
|LHSKnownZero
)) == DemandedMask
)
176 return Constant::getNullValue(VTy
);
178 } else if (I
->getOpcode() == Instruction::Or
) {
179 // We can simplify (X|Y) -> X or Y in the user's context if we know that
180 // only bits from X or Y are demanded.
182 // If either the LHS or the RHS are One, the result is One.
183 ComputeMaskedBits(I
->getOperand(1), DemandedMask
,
184 RHSKnownZero
, RHSKnownOne
, Depth
+1);
185 ComputeMaskedBits(I
->getOperand(0), DemandedMask
& ~RHSKnownOne
,
186 LHSKnownZero
, LHSKnownOne
, Depth
+1);
188 // If all of the demanded bits are known zero on one side, return the
189 // other. These bits cannot contribute to the result of the 'or' in this
191 if ((DemandedMask
& ~LHSKnownOne
& RHSKnownZero
) ==
192 (DemandedMask
& ~LHSKnownOne
))
193 return I
->getOperand(0);
194 if ((DemandedMask
& ~RHSKnownOne
& LHSKnownZero
) ==
195 (DemandedMask
& ~RHSKnownOne
))
196 return I
->getOperand(1);
198 // If all of the potentially set bits on one side are known to be set on
199 // the other side, just use the 'other' side.
200 if ((DemandedMask
& (~RHSKnownZero
) & LHSKnownOne
) ==
201 (DemandedMask
& (~RHSKnownZero
)))
202 return I
->getOperand(0);
203 if ((DemandedMask
& (~LHSKnownZero
) & RHSKnownOne
) ==
204 (DemandedMask
& (~LHSKnownZero
)))
205 return I
->getOperand(1);
208 // Compute the KnownZero/KnownOne bits to simplify things downstream.
209 ComputeMaskedBits(I
, DemandedMask
, KnownZero
, KnownOne
, Depth
);
213 // If this is the root being simplified, allow it to have multiple uses,
214 // just set the DemandedMask to all bits so that we can try to simplify the
215 // operands. This allows visitTruncInst (for example) to simplify the
216 // operand of a trunc without duplicating all the logic below.
217 if (Depth
== 0 && !V
->hasOneUse())
218 DemandedMask
= APInt::getAllOnesValue(BitWidth
);
220 switch (I
->getOpcode()) {
222 ComputeMaskedBits(I
, DemandedMask
, KnownZero
, KnownOne
, Depth
);
224 case Instruction::And
:
225 // If either the LHS or the RHS are Zero, the result is zero.
226 if (SimplifyDemandedBits(I
->getOperandUse(1), DemandedMask
,
227 RHSKnownZero
, RHSKnownOne
, Depth
+1) ||
228 SimplifyDemandedBits(I
->getOperandUse(0), DemandedMask
& ~RHSKnownZero
,
229 LHSKnownZero
, LHSKnownOne
, Depth
+1))
231 assert(!(RHSKnownZero
& RHSKnownOne
) && "Bits known to be one AND zero?");
232 assert(!(LHSKnownZero
& LHSKnownOne
) && "Bits known to be one AND zero?");
234 // If all of the demanded bits are known 1 on one side, return the other.
235 // These bits cannot contribute to the result of the 'and'.
236 if ((DemandedMask
& ~LHSKnownZero
& RHSKnownOne
) ==
237 (DemandedMask
& ~LHSKnownZero
))
238 return I
->getOperand(0);
239 if ((DemandedMask
& ~RHSKnownZero
& LHSKnownOne
) ==
240 (DemandedMask
& ~RHSKnownZero
))
241 return I
->getOperand(1);
243 // If all of the demanded bits in the inputs are known zeros, return zero.
244 if ((DemandedMask
& (RHSKnownZero
|LHSKnownZero
)) == DemandedMask
)
245 return Constant::getNullValue(VTy
);
247 // If the RHS is a constant, see if we can simplify it.
248 if (ShrinkDemandedConstant(I
, 1, DemandedMask
& ~LHSKnownZero
))
251 // Output known-1 bits are only known if set in both the LHS & RHS.
252 KnownOne
= RHSKnownOne
& LHSKnownOne
;
253 // Output known-0 are known to be clear if zero in either the LHS | RHS.
254 KnownZero
= RHSKnownZero
| LHSKnownZero
;
256 case Instruction::Or
:
257 // If either the LHS or the RHS are One, the result is One.
258 if (SimplifyDemandedBits(I
->getOperandUse(1), DemandedMask
,
259 RHSKnownZero
, RHSKnownOne
, Depth
+1) ||
260 SimplifyDemandedBits(I
->getOperandUse(0), DemandedMask
& ~RHSKnownOne
,
261 LHSKnownZero
, LHSKnownOne
, Depth
+1))
263 assert(!(RHSKnownZero
& RHSKnownOne
) && "Bits known to be one AND zero?");
264 assert(!(LHSKnownZero
& LHSKnownOne
) && "Bits known to be one AND zero?");
266 // If all of the demanded bits are known zero on one side, return the other.
267 // These bits cannot contribute to the result of the 'or'.
268 if ((DemandedMask
& ~LHSKnownOne
& RHSKnownZero
) ==
269 (DemandedMask
& ~LHSKnownOne
))
270 return I
->getOperand(0);
271 if ((DemandedMask
& ~RHSKnownOne
& LHSKnownZero
) ==
272 (DemandedMask
& ~RHSKnownOne
))
273 return I
->getOperand(1);
275 // If all of the potentially set bits on one side are known to be set on
276 // the other side, just use the 'other' side.
277 if ((DemandedMask
& (~RHSKnownZero
) & LHSKnownOne
) ==
278 (DemandedMask
& (~RHSKnownZero
)))
279 return I
->getOperand(0);
280 if ((DemandedMask
& (~LHSKnownZero
) & RHSKnownOne
) ==
281 (DemandedMask
& (~LHSKnownZero
)))
282 return I
->getOperand(1);
284 // If the RHS is a constant, see if we can simplify it.
285 if (ShrinkDemandedConstant(I
, 1, DemandedMask
))
288 // Output known-0 bits are only known if clear in both the LHS & RHS.
289 KnownZero
= RHSKnownZero
& LHSKnownZero
;
290 // Output known-1 are known to be set if set in either the LHS | RHS.
291 KnownOne
= RHSKnownOne
| LHSKnownOne
;
293 case Instruction::Xor
: {
294 if (SimplifyDemandedBits(I
->getOperandUse(1), DemandedMask
,
295 RHSKnownZero
, RHSKnownOne
, Depth
+1) ||
296 SimplifyDemandedBits(I
->getOperandUse(0), DemandedMask
,
297 LHSKnownZero
, LHSKnownOne
, Depth
+1))
299 assert(!(RHSKnownZero
& RHSKnownOne
) && "Bits known to be one AND zero?");
300 assert(!(LHSKnownZero
& LHSKnownOne
) && "Bits known to be one AND zero?");
302 // If all of the demanded bits are known zero on one side, return the other.
303 // These bits cannot contribute to the result of the 'xor'.
304 if ((DemandedMask
& RHSKnownZero
) == DemandedMask
)
305 return I
->getOperand(0);
306 if ((DemandedMask
& LHSKnownZero
) == DemandedMask
)
307 return I
->getOperand(1);
309 // If all of the demanded bits are known to be zero on one side or the
310 // other, turn this into an *inclusive* or.
311 // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
312 if ((DemandedMask
& ~RHSKnownZero
& ~LHSKnownZero
) == 0) {
314 BinaryOperator::CreateOr(I
->getOperand(0), I
->getOperand(1),
316 return InsertNewInstBefore(Or
, *I
);
319 // If all of the demanded bits on one side are known, and all of the set
320 // bits on that side are also known to be set on the other side, turn this
321 // into an AND, as we know the bits will be cleared.
322 // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
323 if ((DemandedMask
& (RHSKnownZero
|RHSKnownOne
)) == DemandedMask
) {
325 if ((RHSKnownOne
& LHSKnownOne
) == RHSKnownOne
) {
326 Constant
*AndC
= Constant::getIntegerValue(VTy
,
327 ~RHSKnownOne
& DemandedMask
);
329 BinaryOperator::CreateAnd(I
->getOperand(0), AndC
, "tmp");
330 return InsertNewInstBefore(And
, *I
);
334 // If the RHS is a constant, see if we can simplify it.
335 // FIXME: for XOR, we prefer to force bits to 1 if they will make a -1.
336 if (ShrinkDemandedConstant(I
, 1, DemandedMask
))
339 // If our LHS is an 'and' and if it has one use, and if any of the bits we
340 // are flipping are known to be set, then the xor is just resetting those
341 // bits to zero. We can just knock out bits from the 'and' and the 'xor',
342 // simplifying both of them.
343 if (Instruction
*LHSInst
= dyn_cast
<Instruction
>(I
->getOperand(0)))
344 if (LHSInst
->getOpcode() == Instruction::And
&& LHSInst
->hasOneUse() &&
345 isa
<ConstantInt
>(I
->getOperand(1)) &&
346 isa
<ConstantInt
>(LHSInst
->getOperand(1)) &&
347 (LHSKnownOne
& RHSKnownOne
& DemandedMask
) != 0) {
348 ConstantInt
*AndRHS
= cast
<ConstantInt
>(LHSInst
->getOperand(1));
349 ConstantInt
*XorRHS
= cast
<ConstantInt
>(I
->getOperand(1));
350 APInt NewMask
= ~(LHSKnownOne
& RHSKnownOne
& DemandedMask
);
353 ConstantInt::get(I
->getType(), NewMask
& AndRHS
->getValue());
354 Instruction
*NewAnd
=
355 BinaryOperator::CreateAnd(I
->getOperand(0), AndC
, "tmp");
356 InsertNewInstBefore(NewAnd
, *I
);
359 ConstantInt::get(I
->getType(), NewMask
& XorRHS
->getValue());
360 Instruction
*NewXor
=
361 BinaryOperator::CreateXor(NewAnd
, XorC
, "tmp");
362 return InsertNewInstBefore(NewXor
, *I
);
365 // Output known-0 bits are known if clear or set in both the LHS & RHS.
366 KnownZero
= (RHSKnownZero
& LHSKnownZero
) | (RHSKnownOne
& LHSKnownOne
);
367 // Output known-1 are known to be set if set in only one of the LHS, RHS.
368 KnownOne
= (RHSKnownZero
& LHSKnownOne
) | (RHSKnownOne
& LHSKnownZero
);
371 case Instruction::Select
:
372 if (SimplifyDemandedBits(I
->getOperandUse(2), DemandedMask
,
373 RHSKnownZero
, RHSKnownOne
, Depth
+1) ||
374 SimplifyDemandedBits(I
->getOperandUse(1), DemandedMask
,
375 LHSKnownZero
, LHSKnownOne
, Depth
+1))
377 assert(!(RHSKnownZero
& RHSKnownOne
) && "Bits known to be one AND zero?");
378 assert(!(LHSKnownZero
& LHSKnownOne
) && "Bits known to be one AND zero?");
380 // If the operands are constants, see if we can simplify them.
381 if (ShrinkDemandedConstant(I
, 1, DemandedMask
) ||
382 ShrinkDemandedConstant(I
, 2, DemandedMask
))
385 // Only known if known in both the LHS and RHS.
386 KnownOne
= RHSKnownOne
& LHSKnownOne
;
387 KnownZero
= RHSKnownZero
& LHSKnownZero
;
389 case Instruction::Trunc
: {
390 unsigned truncBf
= I
->getOperand(0)->getType()->getScalarSizeInBits();
391 DemandedMask
= DemandedMask
.zext(truncBf
);
392 KnownZero
= KnownZero
.zext(truncBf
);
393 KnownOne
= KnownOne
.zext(truncBf
);
394 if (SimplifyDemandedBits(I
->getOperandUse(0), DemandedMask
,
395 KnownZero
, KnownOne
, Depth
+1))
397 DemandedMask
= DemandedMask
.trunc(BitWidth
);
398 KnownZero
= KnownZero
.trunc(BitWidth
);
399 KnownOne
= KnownOne
.trunc(BitWidth
);
400 assert(!(KnownZero
& KnownOne
) && "Bits known to be one AND zero?");
403 case Instruction::BitCast
:
404 if (!I
->getOperand(0)->getType()->isIntOrIntVectorTy())
405 return 0; // vector->int or fp->int?
407 if (const VectorType
*DstVTy
= dyn_cast
<VectorType
>(I
->getType())) {
408 if (const VectorType
*SrcVTy
=
409 dyn_cast
<VectorType
>(I
->getOperand(0)->getType())) {
410 if (DstVTy
->getNumElements() != SrcVTy
->getNumElements())
411 // Don't touch a bitcast between vectors of different element counts.
414 // Don't touch a scalar-to-vector bitcast.
416 } else if (I
->getOperand(0)->getType()->isVectorTy())
417 // Don't touch a vector-to-scalar bitcast.
420 if (SimplifyDemandedBits(I
->getOperandUse(0), DemandedMask
,
421 KnownZero
, KnownOne
, Depth
+1))
423 assert(!(KnownZero
& KnownOne
) && "Bits known to be one AND zero?");
425 case Instruction::ZExt
: {
426 // Compute the bits in the result that are not present in the input.
427 unsigned SrcBitWidth
=I
->getOperand(0)->getType()->getScalarSizeInBits();
429 DemandedMask
= DemandedMask
.trunc(SrcBitWidth
);
430 KnownZero
= KnownZero
.trunc(SrcBitWidth
);
431 KnownOne
= KnownOne
.trunc(SrcBitWidth
);
432 if (SimplifyDemandedBits(I
->getOperandUse(0), DemandedMask
,
433 KnownZero
, KnownOne
, Depth
+1))
435 DemandedMask
= DemandedMask
.zext(BitWidth
);
436 KnownZero
= KnownZero
.zext(BitWidth
);
437 KnownOne
= KnownOne
.zext(BitWidth
);
438 assert(!(KnownZero
& KnownOne
) && "Bits known to be one AND zero?");
439 // The top bits are known to be zero.
440 KnownZero
|= APInt::getHighBitsSet(BitWidth
, BitWidth
- SrcBitWidth
);
443 case Instruction::SExt
: {
444 // Compute the bits in the result that are not present in the input.
445 unsigned SrcBitWidth
=I
->getOperand(0)->getType()->getScalarSizeInBits();
447 APInt InputDemandedBits
= DemandedMask
&
448 APInt::getLowBitsSet(BitWidth
, SrcBitWidth
);
450 APInt
NewBits(APInt::getHighBitsSet(BitWidth
, BitWidth
- SrcBitWidth
));
451 // If any of the sign extended bits are demanded, we know that the sign
453 if ((NewBits
& DemandedMask
) != 0)
454 InputDemandedBits
.setBit(SrcBitWidth
-1);
456 InputDemandedBits
= InputDemandedBits
.trunc(SrcBitWidth
);
457 KnownZero
= KnownZero
.trunc(SrcBitWidth
);
458 KnownOne
= KnownOne
.trunc(SrcBitWidth
);
459 if (SimplifyDemandedBits(I
->getOperandUse(0), InputDemandedBits
,
460 KnownZero
, KnownOne
, Depth
+1))
462 InputDemandedBits
= InputDemandedBits
.zext(BitWidth
);
463 KnownZero
= KnownZero
.zext(BitWidth
);
464 KnownOne
= KnownOne
.zext(BitWidth
);
465 assert(!(KnownZero
& KnownOne
) && "Bits known to be one AND zero?");
467 // If the sign bit of the input is known set or clear, then we know the
468 // top bits of the result.
470 // If the input sign bit is known zero, or if the NewBits are not demanded
471 // convert this into a zero extension.
472 if (KnownZero
[SrcBitWidth
-1] || (NewBits
& ~DemandedMask
) == NewBits
) {
473 // Convert to ZExt cast
474 CastInst
*NewCast
= new ZExtInst(I
->getOperand(0), VTy
, I
->getName());
475 return InsertNewInstBefore(NewCast
, *I
);
476 } else if (KnownOne
[SrcBitWidth
-1]) { // Input sign bit known set
481 case Instruction::Add
: {
482 // Figure out what the input bits are. If the top bits of the and result
483 // are not demanded, then the add doesn't demand them from its input
485 unsigned NLZ
= DemandedMask
.countLeadingZeros();
487 // If there is a constant on the RHS, there are a variety of xformations
489 if (ConstantInt
*RHS
= dyn_cast
<ConstantInt
>(I
->getOperand(1))) {
490 // If null, this should be simplified elsewhere. Some of the xforms here
491 // won't work if the RHS is zero.
495 // If the top bit of the output is demanded, demand everything from the
496 // input. Otherwise, we demand all the input bits except NLZ top bits.
497 APInt
InDemandedBits(APInt::getLowBitsSet(BitWidth
, BitWidth
- NLZ
));
499 // Find information about known zero/one bits in the input.
500 if (SimplifyDemandedBits(I
->getOperandUse(0), InDemandedBits
,
501 LHSKnownZero
, LHSKnownOne
, Depth
+1))
504 // If the RHS of the add has bits set that can't affect the input, reduce
506 if (ShrinkDemandedConstant(I
, 1, InDemandedBits
))
509 // Avoid excess work.
510 if (LHSKnownZero
== 0 && LHSKnownOne
== 0)
513 // Turn it into OR if input bits are zero.
514 if ((LHSKnownZero
& RHS
->getValue()) == RHS
->getValue()) {
516 BinaryOperator::CreateOr(I
->getOperand(0), I
->getOperand(1),
518 return InsertNewInstBefore(Or
, *I
);
521 // We can say something about the output known-zero and known-one bits,
522 // depending on potential carries from the input constant and the
523 // unknowns. For example if the LHS is known to have at most the 0x0F0F0
524 // bits set and the RHS constant is 0x01001, then we know we have a known
525 // one mask of 0x00001 and a known zero mask of 0xE0F0E.
527 // To compute this, we first compute the potential carry bits. These are
528 // the bits which may be modified. I'm not aware of a better way to do
530 const APInt
&RHSVal
= RHS
->getValue();
531 APInt
CarryBits((~LHSKnownZero
+ RHSVal
) ^ (~LHSKnownZero
^ RHSVal
));
533 // Now that we know which bits have carries, compute the known-1/0 sets.
535 // Bits are known one if they are known zero in one operand and one in the
536 // other, and there is no input carry.
537 KnownOne
= ((LHSKnownZero
& RHSVal
) |
538 (LHSKnownOne
& ~RHSVal
)) & ~CarryBits
;
540 // Bits are known zero if they are known zero in both operands and there
541 // is no input carry.
542 KnownZero
= LHSKnownZero
& ~RHSVal
& ~CarryBits
;
544 // If the high-bits of this ADD are not demanded, then it does not demand
545 // the high bits of its LHS or RHS.
546 if (DemandedMask
[BitWidth
-1] == 0) {
547 // Right fill the mask of bits for this ADD to demand the most
548 // significant bit and all those below it.
549 APInt
DemandedFromOps(APInt::getLowBitsSet(BitWidth
, BitWidth
-NLZ
));
550 if (SimplifyDemandedBits(I
->getOperandUse(0), DemandedFromOps
,
551 LHSKnownZero
, LHSKnownOne
, Depth
+1) ||
552 SimplifyDemandedBits(I
->getOperandUse(1), DemandedFromOps
,
553 LHSKnownZero
, LHSKnownOne
, Depth
+1))
559 case Instruction::Sub
:
560 // If the high-bits of this SUB are not demanded, then it does not demand
561 // the high bits of its LHS or RHS.
562 if (DemandedMask
[BitWidth
-1] == 0) {
563 // Right fill the mask of bits for this SUB to demand the most
564 // significant bit and all those below it.
565 uint32_t NLZ
= DemandedMask
.countLeadingZeros();
566 APInt
DemandedFromOps(APInt::getLowBitsSet(BitWidth
, BitWidth
-NLZ
));
567 if (SimplifyDemandedBits(I
->getOperandUse(0), DemandedFromOps
,
568 LHSKnownZero
, LHSKnownOne
, Depth
+1) ||
569 SimplifyDemandedBits(I
->getOperandUse(1), DemandedFromOps
,
570 LHSKnownZero
, LHSKnownOne
, Depth
+1))
573 // Otherwise just hand the sub off to ComputeMaskedBits to fill in
574 // the known zeros and ones.
575 ComputeMaskedBits(V
, DemandedMask
, KnownZero
, KnownOne
, Depth
);
577 case Instruction::Shl
:
578 if (ConstantInt
*SA
= dyn_cast
<ConstantInt
>(I
->getOperand(1))) {
579 uint64_t ShiftAmt
= SA
->getLimitedValue(BitWidth
-1);
580 APInt
DemandedMaskIn(DemandedMask
.lshr(ShiftAmt
));
582 // If the shift is NUW/NSW, then it does demand the high bits.
583 ShlOperator
*IOp
= cast
<ShlOperator
>(I
);
584 if (IOp
->hasNoSignedWrap())
585 DemandedMaskIn
|= APInt::getHighBitsSet(BitWidth
, ShiftAmt
+1);
586 else if (IOp
->hasNoUnsignedWrap())
587 DemandedMaskIn
|= APInt::getHighBitsSet(BitWidth
, ShiftAmt
);
589 if (SimplifyDemandedBits(I
->getOperandUse(0), DemandedMaskIn
,
590 KnownZero
, KnownOne
, Depth
+1))
592 assert(!(KnownZero
& KnownOne
) && "Bits known to be one AND zero?");
593 KnownZero
<<= ShiftAmt
;
594 KnownOne
<<= ShiftAmt
;
595 // low bits known zero.
597 KnownZero
|= APInt::getLowBitsSet(BitWidth
, ShiftAmt
);
600 case Instruction::LShr
:
601 // For a logical shift right
602 if (ConstantInt
*SA
= dyn_cast
<ConstantInt
>(I
->getOperand(1))) {
603 uint64_t ShiftAmt
= SA
->getLimitedValue(BitWidth
-1);
605 // Unsigned shift right.
606 APInt
DemandedMaskIn(DemandedMask
.shl(ShiftAmt
));
608 // If the shift is exact, then it does demand the low bits (and knows that
610 if (cast
<LShrOperator
>(I
)->isExact())
611 DemandedMaskIn
|= APInt::getLowBitsSet(BitWidth
, ShiftAmt
);
613 if (SimplifyDemandedBits(I
->getOperandUse(0), DemandedMaskIn
,
614 KnownZero
, KnownOne
, Depth
+1))
616 assert(!(KnownZero
& KnownOne
) && "Bits known to be one AND zero?");
617 KnownZero
= APIntOps::lshr(KnownZero
, ShiftAmt
);
618 KnownOne
= APIntOps::lshr(KnownOne
, ShiftAmt
);
620 // Compute the new bits that are at the top now.
621 APInt
HighBits(APInt::getHighBitsSet(BitWidth
, ShiftAmt
));
622 KnownZero
|= HighBits
; // high bits known zero.
626 case Instruction::AShr
:
627 // If this is an arithmetic shift right and only the low-bit is set, we can
628 // always convert this into a logical shr, even if the shift amount is
629 // variable. The low bit of the shift cannot be an input sign bit unless
630 // the shift amount is >= the size of the datatype, which is undefined.
631 if (DemandedMask
== 1) {
632 // Perform the logical shift right.
633 Instruction
*NewVal
= BinaryOperator::CreateLShr(
634 I
->getOperand(0), I
->getOperand(1), I
->getName());
635 return InsertNewInstBefore(NewVal
, *I
);
638 // If the sign bit is the only bit demanded by this ashr, then there is no
639 // need to do it, the shift doesn't change the high bit.
640 if (DemandedMask
.isSignBit())
641 return I
->getOperand(0);
643 if (ConstantInt
*SA
= dyn_cast
<ConstantInt
>(I
->getOperand(1))) {
644 uint32_t ShiftAmt
= SA
->getLimitedValue(BitWidth
-1);
646 // Signed shift right.
647 APInt
DemandedMaskIn(DemandedMask
.shl(ShiftAmt
));
648 // If any of the "high bits" are demanded, we should set the sign bit as
650 if (DemandedMask
.countLeadingZeros() <= ShiftAmt
)
651 DemandedMaskIn
.setBit(BitWidth
-1);
653 // If the shift is exact, then it does demand the low bits (and knows that
655 if (cast
<AShrOperator
>(I
)->isExact())
656 DemandedMaskIn
|= APInt::getLowBitsSet(BitWidth
, ShiftAmt
);
658 if (SimplifyDemandedBits(I
->getOperandUse(0), DemandedMaskIn
,
659 KnownZero
, KnownOne
, Depth
+1))
661 assert(!(KnownZero
& KnownOne
) && "Bits known to be one AND zero?");
662 // Compute the new bits that are at the top now.
663 APInt
HighBits(APInt::getHighBitsSet(BitWidth
, ShiftAmt
));
664 KnownZero
= APIntOps::lshr(KnownZero
, ShiftAmt
);
665 KnownOne
= APIntOps::lshr(KnownOne
, ShiftAmt
);
667 // Handle the sign bits.
668 APInt
SignBit(APInt::getSignBit(BitWidth
));
669 // Adjust to where it is now in the mask.
670 SignBit
= APIntOps::lshr(SignBit
, ShiftAmt
);
672 // If the input sign bit is known to be zero, or if none of the top bits
673 // are demanded, turn this into an unsigned shift right.
674 if (BitWidth
<= ShiftAmt
|| KnownZero
[BitWidth
-ShiftAmt
-1] ||
675 (HighBits
& ~DemandedMask
) == HighBits
) {
676 // Perform the logical shift right.
677 Instruction
*NewVal
= BinaryOperator::CreateLShr(
678 I
->getOperand(0), SA
, I
->getName());
679 return InsertNewInstBefore(NewVal
, *I
);
680 } else if ((KnownOne
& SignBit
) != 0) { // New bits are known one.
681 KnownOne
|= HighBits
;
685 case Instruction::SRem
:
686 if (ConstantInt
*Rem
= dyn_cast
<ConstantInt
>(I
->getOperand(1))) {
687 // X % -1 demands all the bits because we don't want to introduce
688 // INT_MIN % -1 (== undef) by accident.
689 if (Rem
->isAllOnesValue())
691 APInt RA
= Rem
->getValue().abs();
692 if (RA
.isPowerOf2()) {
693 if (DemandedMask
.ult(RA
)) // srem won't affect demanded bits
694 return I
->getOperand(0);
696 APInt LowBits
= RA
- 1;
697 APInt Mask2
= LowBits
| APInt::getSignBit(BitWidth
);
698 if (SimplifyDemandedBits(I
->getOperandUse(0), Mask2
,
699 LHSKnownZero
, LHSKnownOne
, Depth
+1))
702 // The low bits of LHS are unchanged by the srem.
703 KnownZero
= LHSKnownZero
& LowBits
;
704 KnownOne
= LHSKnownOne
& LowBits
;
706 // If LHS is non-negative or has all low bits zero, then the upper bits
708 if (LHSKnownZero
[BitWidth
-1] || ((LHSKnownZero
& LowBits
) == LowBits
))
709 KnownZero
|= ~LowBits
;
711 // If LHS is negative and not all low bits are zero, then the upper bits
713 if (LHSKnownOne
[BitWidth
-1] && ((LHSKnownOne
& LowBits
) != 0))
714 KnownOne
|= ~LowBits
;
716 assert(!(KnownZero
& KnownOne
) && "Bits known to be one AND zero?");
720 // The sign bit is the LHS's sign bit, except when the result of the
721 // remainder is zero.
722 if (DemandedMask
.isNegative() && KnownZero
.isNonNegative()) {
723 APInt Mask2
= APInt::getSignBit(BitWidth
);
724 APInt
LHSKnownZero(BitWidth
, 0), LHSKnownOne(BitWidth
, 0);
725 ComputeMaskedBits(I
->getOperand(0), Mask2
, LHSKnownZero
, LHSKnownOne
,
727 // If it's known zero, our sign bit is also zero.
728 if (LHSKnownZero
.isNegative())
729 KnownZero
|= LHSKnownZero
;
732 case Instruction::URem
: {
733 APInt
KnownZero2(BitWidth
, 0), KnownOne2(BitWidth
, 0);
734 APInt AllOnes
= APInt::getAllOnesValue(BitWidth
);
735 if (SimplifyDemandedBits(I
->getOperandUse(0), AllOnes
,
736 KnownZero2
, KnownOne2
, Depth
+1) ||
737 SimplifyDemandedBits(I
->getOperandUse(1), AllOnes
,
738 KnownZero2
, KnownOne2
, Depth
+1))
741 unsigned Leaders
= KnownZero2
.countLeadingOnes();
742 Leaders
= std::max(Leaders
,
743 KnownZero2
.countLeadingOnes());
744 KnownZero
= APInt::getHighBitsSet(BitWidth
, Leaders
) & DemandedMask
;
747 case Instruction::Call
:
748 if (IntrinsicInst
*II
= dyn_cast
<IntrinsicInst
>(I
)) {
749 switch (II
->getIntrinsicID()) {
751 case Intrinsic::bswap
: {
752 // If the only bits demanded come from one byte of the bswap result,
753 // just shift the input byte into position to eliminate the bswap.
754 unsigned NLZ
= DemandedMask
.countLeadingZeros();
755 unsigned NTZ
= DemandedMask
.countTrailingZeros();
757 // Round NTZ down to the next byte. If we have 11 trailing zeros, then
758 // we need all the bits down to bit 8. Likewise, round NLZ. If we
759 // have 14 leading zeros, round to 8.
762 // If we need exactly one byte, we can do this transformation.
763 if (BitWidth
-NLZ
-NTZ
== 8) {
764 unsigned ResultBit
= NTZ
;
765 unsigned InputBit
= BitWidth
-NTZ
-8;
767 // Replace this with either a left or right shift to get the byte into
770 if (InputBit
> ResultBit
)
771 NewVal
= BinaryOperator::CreateLShr(II
->getArgOperand(0),
772 ConstantInt::get(I
->getType(), InputBit
-ResultBit
));
774 NewVal
= BinaryOperator::CreateShl(II
->getArgOperand(0),
775 ConstantInt::get(I
->getType(), ResultBit
-InputBit
));
777 return InsertNewInstBefore(NewVal
, *I
);
780 // TODO: Could compute known zero/one bits based on the input.
785 ComputeMaskedBits(V
, DemandedMask
, KnownZero
, KnownOne
, Depth
);
789 // If the client is only demanding bits that we know, return the known
791 if ((DemandedMask
& (KnownZero
|KnownOne
)) == DemandedMask
)
792 return Constant::getIntegerValue(VTy
, KnownOne
);
797 /// SimplifyDemandedVectorElts - The specified value produces a vector with
798 /// any number of elements. DemandedElts contains the set of elements that are
799 /// actually used by the caller. This method analyzes which elements of the
800 /// operand are undef and returns that information in UndefElts.
802 /// If the information about demanded elements can be used to simplify the
803 /// operation, the operation is simplified, then the resultant value is
804 /// returned. This returns null if no change was made.
805 Value
*InstCombiner::SimplifyDemandedVectorElts(Value
*V
, APInt DemandedElts
,
808 unsigned VWidth
= cast
<VectorType
>(V
->getType())->getNumElements();
809 APInt
EltMask(APInt::getAllOnesValue(VWidth
));
810 assert((DemandedElts
& ~EltMask
) == 0 && "Invalid DemandedElts!");
812 if (isa
<UndefValue
>(V
)) {
813 // If the entire vector is undefined, just return this info.
818 if (DemandedElts
== 0) { // If nothing is demanded, provide undef.
820 return UndefValue::get(V
->getType());
824 if (ConstantVector
*CV
= dyn_cast
<ConstantVector
>(V
)) {
825 const Type
*EltTy
= cast
<VectorType
>(V
->getType())->getElementType();
826 Constant
*Undef
= UndefValue::get(EltTy
);
828 std::vector
<Constant
*> Elts
;
829 for (unsigned i
= 0; i
!= VWidth
; ++i
)
830 if (!DemandedElts
[i
]) { // If not demanded, set to undef.
831 Elts
.push_back(Undef
);
833 } else if (isa
<UndefValue
>(CV
->getOperand(i
))) { // Already undef.
834 Elts
.push_back(Undef
);
836 } else { // Otherwise, defined.
837 Elts
.push_back(CV
->getOperand(i
));
840 // If we changed the constant, return it.
841 Constant
*NewCP
= ConstantVector::get(Elts
);
842 return NewCP
!= CV
? NewCP
: 0;
845 if (isa
<ConstantAggregateZero
>(V
)) {
846 // Simplify the CAZ to a ConstantVector where the non-demanded elements are
849 // Check if this is identity. If so, return 0 since we are not simplifying
851 if (DemandedElts
.isAllOnesValue())
854 const Type
*EltTy
= cast
<VectorType
>(V
->getType())->getElementType();
855 Constant
*Zero
= Constant::getNullValue(EltTy
);
856 Constant
*Undef
= UndefValue::get(EltTy
);
857 std::vector
<Constant
*> Elts
;
858 for (unsigned i
= 0; i
!= VWidth
; ++i
) {
859 Constant
*Elt
= DemandedElts
[i
] ? Zero
: Undef
;
862 UndefElts
= DemandedElts
^ EltMask
;
863 return ConstantVector::get(Elts
);
866 // Limit search depth.
870 // If multiple users are using the root value, procede with
871 // simplification conservatively assuming that all elements
873 if (!V
->hasOneUse()) {
874 // Quit if we find multiple users of a non-root value though.
875 // They'll be handled when it's their turn to be visited by
876 // the main instcombine process.
878 // TODO: Just compute the UndefElts information recursively.
881 // Conservatively assume that all elements are needed.
882 DemandedElts
= EltMask
;
885 Instruction
*I
= dyn_cast
<Instruction
>(V
);
886 if (!I
) return 0; // Only analyze instructions.
888 bool MadeChange
= false;
889 APInt
UndefElts2(VWidth
, 0);
891 switch (I
->getOpcode()) {
894 case Instruction::InsertElement
: {
895 // If this is a variable index, we don't know which element it overwrites.
896 // demand exactly the same input as we produce.
897 ConstantInt
*Idx
= dyn_cast
<ConstantInt
>(I
->getOperand(2));
899 // Note that we can't propagate undef elt info, because we don't know
900 // which elt is getting updated.
901 TmpV
= SimplifyDemandedVectorElts(I
->getOperand(0), DemandedElts
,
902 UndefElts2
, Depth
+1);
903 if (TmpV
) { I
->setOperand(0, TmpV
); MadeChange
= true; }
907 // If this is inserting an element that isn't demanded, remove this
909 unsigned IdxNo
= Idx
->getZExtValue();
910 if (IdxNo
>= VWidth
|| !DemandedElts
[IdxNo
]) {
912 return I
->getOperand(0);
915 // Otherwise, the element inserted overwrites whatever was there, so the
916 // input demanded set is simpler than the output set.
917 APInt DemandedElts2
= DemandedElts
;
918 DemandedElts2
.clearBit(IdxNo
);
919 TmpV
= SimplifyDemandedVectorElts(I
->getOperand(0), DemandedElts2
,
921 if (TmpV
) { I
->setOperand(0, TmpV
); MadeChange
= true; }
923 // The inserted element is defined.
924 UndefElts
.clearBit(IdxNo
);
927 case Instruction::ShuffleVector
: {
928 ShuffleVectorInst
*Shuffle
= cast
<ShuffleVectorInst
>(I
);
930 cast
<VectorType
>(Shuffle
->getOperand(0)->getType())->getNumElements();
931 APInt
LeftDemanded(LHSVWidth
, 0), RightDemanded(LHSVWidth
, 0);
932 for (unsigned i
= 0; i
< VWidth
; i
++) {
933 if (DemandedElts
[i
]) {
934 unsigned MaskVal
= Shuffle
->getMaskValue(i
);
935 if (MaskVal
!= -1u) {
936 assert(MaskVal
< LHSVWidth
* 2 &&
937 "shufflevector mask index out of range!");
938 if (MaskVal
< LHSVWidth
)
939 LeftDemanded
.setBit(MaskVal
);
941 RightDemanded
.setBit(MaskVal
- LHSVWidth
);
946 APInt
UndefElts4(LHSVWidth
, 0);
947 TmpV
= SimplifyDemandedVectorElts(I
->getOperand(0), LeftDemanded
,
948 UndefElts4
, Depth
+1);
949 if (TmpV
) { I
->setOperand(0, TmpV
); MadeChange
= true; }
951 APInt
UndefElts3(LHSVWidth
, 0);
952 TmpV
= SimplifyDemandedVectorElts(I
->getOperand(1), RightDemanded
,
953 UndefElts3
, Depth
+1);
954 if (TmpV
) { I
->setOperand(1, TmpV
); MadeChange
= true; }
956 bool NewUndefElts
= false;
957 for (unsigned i
= 0; i
< VWidth
; i
++) {
958 unsigned MaskVal
= Shuffle
->getMaskValue(i
);
959 if (MaskVal
== -1u) {
961 } else if (MaskVal
< LHSVWidth
) {
962 if (UndefElts4
[MaskVal
]) {
967 if (UndefElts3
[MaskVal
- LHSVWidth
]) {
975 // Add additional discovered undefs.
976 std::vector
<Constant
*> Elts
;
977 for (unsigned i
= 0; i
< VWidth
; ++i
) {
979 Elts
.push_back(UndefValue::get(Type::getInt32Ty(I
->getContext())));
981 Elts
.push_back(ConstantInt::get(Type::getInt32Ty(I
->getContext()),
982 Shuffle
->getMaskValue(i
)));
984 I
->setOperand(2, ConstantVector::get(Elts
));
989 case Instruction::BitCast
: {
990 // Vector->vector casts only.
991 const VectorType
*VTy
= dyn_cast
<VectorType
>(I
->getOperand(0)->getType());
993 unsigned InVWidth
= VTy
->getNumElements();
994 APInt
InputDemandedElts(InVWidth
, 0);
997 if (VWidth
== InVWidth
) {
998 // If we are converting from <4 x i32> -> <4 x f32>, we demand the same
999 // elements as are demanded of us.
1001 InputDemandedElts
= DemandedElts
;
1002 } else if (VWidth
> InVWidth
) {
1006 // If there are more elements in the result than there are in the source,
1007 // then an input element is live if any of the corresponding output
1008 // elements are live.
1009 Ratio
= VWidth
/InVWidth
;
1010 for (unsigned OutIdx
= 0; OutIdx
!= VWidth
; ++OutIdx
) {
1011 if (DemandedElts
[OutIdx
])
1012 InputDemandedElts
.setBit(OutIdx
/Ratio
);
1018 // If there are more elements in the source than there are in the result,
1019 // then an input element is live if the corresponding output element is
1021 Ratio
= InVWidth
/VWidth
;
1022 for (unsigned InIdx
= 0; InIdx
!= InVWidth
; ++InIdx
)
1023 if (DemandedElts
[InIdx
/Ratio
])
1024 InputDemandedElts
.setBit(InIdx
);
1027 // div/rem demand all inputs, because they don't want divide by zero.
1028 TmpV
= SimplifyDemandedVectorElts(I
->getOperand(0), InputDemandedElts
,
1029 UndefElts2
, Depth
+1);
1031 I
->setOperand(0, TmpV
);
1035 UndefElts
= UndefElts2
;
1036 if (VWidth
> InVWidth
) {
1037 llvm_unreachable("Unimp");
1038 // If there are more elements in the result than there are in the source,
1039 // then an output element is undef if the corresponding input element is
1041 for (unsigned OutIdx
= 0; OutIdx
!= VWidth
; ++OutIdx
)
1042 if (UndefElts2
[OutIdx
/Ratio
])
1043 UndefElts
.setBit(OutIdx
);
1044 } else if (VWidth
< InVWidth
) {
1045 llvm_unreachable("Unimp");
1046 // If there are more elements in the source than there are in the result,
1047 // then a result element is undef if all of the corresponding input
1048 // elements are undef.
1049 UndefElts
= ~0ULL >> (64-VWidth
); // Start out all undef.
1050 for (unsigned InIdx
= 0; InIdx
!= InVWidth
; ++InIdx
)
1051 if (!UndefElts2
[InIdx
]) // Not undef?
1052 UndefElts
.clearBit(InIdx
/Ratio
); // Clear undef bit.
1056 case Instruction::And
:
1057 case Instruction::Or
:
1058 case Instruction::Xor
:
1059 case Instruction::Add
:
1060 case Instruction::Sub
:
1061 case Instruction::Mul
:
1062 // div/rem demand all inputs, because they don't want divide by zero.
1063 TmpV
= SimplifyDemandedVectorElts(I
->getOperand(0), DemandedElts
,
1064 UndefElts
, Depth
+1);
1065 if (TmpV
) { I
->setOperand(0, TmpV
); MadeChange
= true; }
1066 TmpV
= SimplifyDemandedVectorElts(I
->getOperand(1), DemandedElts
,
1067 UndefElts2
, Depth
+1);
1068 if (TmpV
) { I
->setOperand(1, TmpV
); MadeChange
= true; }
1070 // Output elements are undefined if both are undefined. Consider things
1071 // like undef&0. The result is known zero, not undef.
1072 UndefElts
&= UndefElts2
;
1075 case Instruction::Call
: {
1076 IntrinsicInst
*II
= dyn_cast
<IntrinsicInst
>(I
);
1078 switch (II
->getIntrinsicID()) {
1081 // Binary vector operations that work column-wise. A dest element is a
1082 // function of the corresponding input elements from the two inputs.
1083 case Intrinsic::x86_sse_sub_ss
:
1084 case Intrinsic::x86_sse_mul_ss
:
1085 case Intrinsic::x86_sse_min_ss
:
1086 case Intrinsic::x86_sse_max_ss
:
1087 case Intrinsic::x86_sse2_sub_sd
:
1088 case Intrinsic::x86_sse2_mul_sd
:
1089 case Intrinsic::x86_sse2_min_sd
:
1090 case Intrinsic::x86_sse2_max_sd
:
1091 TmpV
= SimplifyDemandedVectorElts(II
->getArgOperand(0), DemandedElts
,
1092 UndefElts
, Depth
+1);
1093 if (TmpV
) { II
->setArgOperand(0, TmpV
); MadeChange
= true; }
1094 TmpV
= SimplifyDemandedVectorElts(II
->getArgOperand(1), DemandedElts
,
1095 UndefElts2
, Depth
+1);
1096 if (TmpV
) { II
->setArgOperand(1, TmpV
); MadeChange
= true; }
1098 // If only the low elt is demanded and this is a scalarizable intrinsic,
1099 // scalarize it now.
1100 if (DemandedElts
== 1) {
1101 switch (II
->getIntrinsicID()) {
1103 case Intrinsic::x86_sse_sub_ss
:
1104 case Intrinsic::x86_sse_mul_ss
:
1105 case Intrinsic::x86_sse2_sub_sd
:
1106 case Intrinsic::x86_sse2_mul_sd
:
1107 // TODO: Lower MIN/MAX/ABS/etc
1108 Value
*LHS
= II
->getArgOperand(0);
1109 Value
*RHS
= II
->getArgOperand(1);
1110 // Extract the element as scalars.
1111 LHS
= InsertNewInstBefore(ExtractElementInst::Create(LHS
,
1112 ConstantInt::get(Type::getInt32Ty(I
->getContext()), 0U)), *II
);
1113 RHS
= InsertNewInstBefore(ExtractElementInst::Create(RHS
,
1114 ConstantInt::get(Type::getInt32Ty(I
->getContext()), 0U)), *II
);
1116 switch (II
->getIntrinsicID()) {
1117 default: llvm_unreachable("Case stmts out of sync!");
1118 case Intrinsic::x86_sse_sub_ss
:
1119 case Intrinsic::x86_sse2_sub_sd
:
1120 TmpV
= InsertNewInstBefore(BinaryOperator::CreateFSub(LHS
, RHS
,
1121 II
->getName()), *II
);
1123 case Intrinsic::x86_sse_mul_ss
:
1124 case Intrinsic::x86_sse2_mul_sd
:
1125 TmpV
= InsertNewInstBefore(BinaryOperator::CreateFMul(LHS
, RHS
,
1126 II
->getName()), *II
);
1131 InsertElementInst::Create(
1132 UndefValue::get(II
->getType()), TmpV
,
1133 ConstantInt::get(Type::getInt32Ty(I
->getContext()), 0U, false),
1135 InsertNewInstBefore(New
, *II
);
1140 // Output elements are undefined if both are undefined. Consider things
1141 // like undef&0. The result is known zero, not undef.
1142 UndefElts
&= UndefElts2
;
1148 return MadeChange
? I
: 0;