1 //===- InstCombineSimplifyDemanded.cpp ------------------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains logic for simplifying instructions based on information
11 // about how they are used.
13 //===----------------------------------------------------------------------===//
16 #include "InstCombine.h"
17 #include "llvm/Target/TargetData.h"
18 #include "llvm/IntrinsicInst.h"
23 /// ShrinkDemandedConstant - Check to see if the specified operand of the
24 /// specified instruction is a constant integer. If so, check to see if there
25 /// are any bits set in the constant that are not demanded. If so, shrink the
26 /// constant and return true.
27 static bool ShrinkDemandedConstant(Instruction
*I
, unsigned OpNo
,
29 assert(I
&& "No instruction?");
30 assert(OpNo
< I
->getNumOperands() && "Operand index too large");
32 // If the operand is not a constant integer, nothing to do.
33 ConstantInt
*OpC
= dyn_cast
<ConstantInt
>(I
->getOperand(OpNo
));
34 if (!OpC
) return false;
36 // If there are no bits set that aren't demanded, nothing to do.
37 Demanded
= Demanded
.zextOrTrunc(OpC
->getValue().getBitWidth());
38 if ((~Demanded
& OpC
->getValue()) == 0)
41 // This instruction is producing bits that are not demanded. Shrink the RHS.
42 Demanded
&= OpC
->getValue();
43 I
->setOperand(OpNo
, ConstantInt::get(OpC
->getType(), Demanded
));
49 /// SimplifyDemandedInstructionBits - Inst is an integer instruction that
50 /// SimplifyDemandedBits knows about. See if the instruction has any
51 /// properties that allow us to simplify its operands.
52 bool InstCombiner::SimplifyDemandedInstructionBits(Instruction
&Inst
) {
53 unsigned BitWidth
= Inst
.getType()->getScalarSizeInBits();
54 APInt
KnownZero(BitWidth
, 0), KnownOne(BitWidth
, 0);
55 APInt
DemandedMask(APInt::getAllOnesValue(BitWidth
));
57 Value
*V
= SimplifyDemandedUseBits(&Inst
, DemandedMask
,
58 KnownZero
, KnownOne
, 0);
59 if (V
== 0) return false;
60 if (V
== &Inst
) return true;
61 ReplaceInstUsesWith(Inst
, V
);
65 /// SimplifyDemandedBits - This form of SimplifyDemandedBits simplifies the
66 /// specified instruction operand if possible, updating it in place. It returns
67 /// true if it made any change and false otherwise.
68 bool InstCombiner::SimplifyDemandedBits(Use
&U
, APInt DemandedMask
,
69 APInt
&KnownZero
, APInt
&KnownOne
,
71 Value
*NewVal
= SimplifyDemandedUseBits(U
.get(), DemandedMask
,
72 KnownZero
, KnownOne
, Depth
);
73 if (NewVal
== 0) return false;
79 /// SimplifyDemandedUseBits - This function attempts to replace V with a simpler
80 /// value based on the demanded bits. When this function is called, it is known
81 /// that only the bits set in DemandedMask of the result of V are ever used
82 /// downstream. Consequently, depending on the mask and V, it may be possible
83 /// to replace V with a constant or one of its operands. In such cases, this
84 /// function does the replacement and returns true. In all other cases, it
85 /// returns false after analyzing the expression and setting KnownOne and known
86 /// to be one in the expression. KnownZero contains all the bits that are known
87 /// to be zero in the expression. These are provided to potentially allow the
88 /// caller (which might recursively be SimplifyDemandedBits itself) to simplify
89 /// the expression. KnownOne and KnownZero always follow the invariant that
90 /// KnownOne & KnownZero == 0. That is, a bit can't be both 1 and 0. Note that
91 /// the bits in KnownOne and KnownZero may only be accurate for those bits set
92 /// in DemandedMask. Note also that the bitwidth of V, DemandedMask, KnownZero
93 /// and KnownOne must all be the same.
95 /// This returns null if it did not change anything and it permits no
96 /// simplification. This returns V itself if it did some simplification of V's
97 /// operands based on the information about what bits are demanded. This returns
98 /// some other non-null value if it found out that V is equal to another value
99 /// in the context where the specified bits are demanded, but not for all users.
100 Value
*InstCombiner::SimplifyDemandedUseBits(Value
*V
, APInt DemandedMask
,
101 APInt
&KnownZero
, APInt
&KnownOne
,
103 assert(V
!= 0 && "Null pointer of Value???");
104 assert(Depth
<= 6 && "Limit Search Depth");
105 uint32_t BitWidth
= DemandedMask
.getBitWidth();
106 const Type
*VTy
= V
->getType();
107 assert((TD
|| !VTy
->isPointerTy()) &&
108 "SimplifyDemandedBits needs to know bit widths!");
109 assert((!TD
|| TD
->getTypeSizeInBits(VTy
->getScalarType()) == BitWidth
) &&
110 (!VTy
->isIntOrIntVectorTy() ||
111 VTy
->getScalarSizeInBits() == BitWidth
) &&
112 KnownZero
.getBitWidth() == BitWidth
&&
113 KnownOne
.getBitWidth() == BitWidth
&&
114 "Value *V, DemandedMask, KnownZero and KnownOne "
115 "must have same BitWidth");
116 if (ConstantInt
*CI
= dyn_cast
<ConstantInt
>(V
)) {
117 // We know all of the bits for a constant!
118 KnownOne
= CI
->getValue() & DemandedMask
;
119 KnownZero
= ~KnownOne
& DemandedMask
;
122 if (isa
<ConstantPointerNull
>(V
)) {
123 // We know all of the bits for a constant!
124 KnownOne
.clearAllBits();
125 KnownZero
= DemandedMask
;
129 KnownZero
.clearAllBits();
130 KnownOne
.clearAllBits();
131 if (DemandedMask
== 0) { // Not demanding any bits from V.
132 if (isa
<UndefValue
>(V
))
134 return UndefValue::get(VTy
);
137 if (Depth
== 6) // Limit search depth.
140 APInt
LHSKnownZero(BitWidth
, 0), LHSKnownOne(BitWidth
, 0);
141 APInt
RHSKnownZero(BitWidth
, 0), RHSKnownOne(BitWidth
, 0);
143 Instruction
*I
= dyn_cast
<Instruction
>(V
);
145 ComputeMaskedBits(V
, DemandedMask
, KnownZero
, KnownOne
, Depth
);
146 return 0; // Only analyze instructions.
149 // If there are multiple uses of this value and we aren't at the root, then
150 // we can't do any simplifications of the operands, because DemandedMask
151 // only reflects the bits demanded by *one* of the users.
152 if (Depth
!= 0 && !I
->hasOneUse()) {
153 // Despite the fact that we can't simplify this instruction in all User's
154 // context, we can at least compute the knownzero/knownone bits, and we can
155 // do simplifications that apply to *just* the one user if we know that
156 // this instruction has a simpler value in that context.
157 if (I
->getOpcode() == Instruction::And
) {
158 // If either the LHS or the RHS are Zero, the result is zero.
159 ComputeMaskedBits(I
->getOperand(1), DemandedMask
,
160 RHSKnownZero
, RHSKnownOne
, Depth
+1);
161 ComputeMaskedBits(I
->getOperand(0), DemandedMask
& ~RHSKnownZero
,
162 LHSKnownZero
, LHSKnownOne
, Depth
+1);
164 // If all of the demanded bits are known 1 on one side, return the other.
165 // These bits cannot contribute to the result of the 'and' in this
167 if ((DemandedMask
& ~LHSKnownZero
& RHSKnownOne
) ==
168 (DemandedMask
& ~LHSKnownZero
))
169 return I
->getOperand(0);
170 if ((DemandedMask
& ~RHSKnownZero
& LHSKnownOne
) ==
171 (DemandedMask
& ~RHSKnownZero
))
172 return I
->getOperand(1);
174 // If all of the demanded bits in the inputs are known zeros, return zero.
175 if ((DemandedMask
& (RHSKnownZero
|LHSKnownZero
)) == DemandedMask
)
176 return Constant::getNullValue(VTy
);
178 } else if (I
->getOpcode() == Instruction::Or
) {
179 // We can simplify (X|Y) -> X or Y in the user's context if we know that
180 // only bits from X or Y are demanded.
182 // If either the LHS or the RHS are One, the result is One.
183 ComputeMaskedBits(I
->getOperand(1), DemandedMask
,
184 RHSKnownZero
, RHSKnownOne
, Depth
+1);
185 ComputeMaskedBits(I
->getOperand(0), DemandedMask
& ~RHSKnownOne
,
186 LHSKnownZero
, LHSKnownOne
, Depth
+1);
188 // If all of the demanded bits are known zero on one side, return the
189 // other. These bits cannot contribute to the result of the 'or' in this
191 if ((DemandedMask
& ~LHSKnownOne
& RHSKnownZero
) ==
192 (DemandedMask
& ~LHSKnownOne
))
193 return I
->getOperand(0);
194 if ((DemandedMask
& ~RHSKnownOne
& LHSKnownZero
) ==
195 (DemandedMask
& ~RHSKnownOne
))
196 return I
->getOperand(1);
198 // If all of the potentially set bits on one side are known to be set on
199 // the other side, just use the 'other' side.
200 if ((DemandedMask
& (~RHSKnownZero
) & LHSKnownOne
) ==
201 (DemandedMask
& (~RHSKnownZero
)))
202 return I
->getOperand(0);
203 if ((DemandedMask
& (~LHSKnownZero
) & RHSKnownOne
) ==
204 (DemandedMask
& (~LHSKnownZero
)))
205 return I
->getOperand(1);
208 // Compute the KnownZero/KnownOne bits to simplify things downstream.
209 ComputeMaskedBits(I
, DemandedMask
, KnownZero
, KnownOne
, Depth
);
213 // If this is the root being simplified, allow it to have multiple uses,
214 // just set the DemandedMask to all bits so that we can try to simplify the
215 // operands. This allows visitTruncInst (for example) to simplify the
216 // operand of a trunc without duplicating all the logic below.
217 if (Depth
== 0 && !V
->hasOneUse())
218 DemandedMask
= APInt::getAllOnesValue(BitWidth
);
220 switch (I
->getOpcode()) {
222 ComputeMaskedBits(I
, DemandedMask
, KnownZero
, KnownOne
, Depth
);
224 case Instruction::And
:
225 // If either the LHS or the RHS are Zero, the result is zero.
226 if (SimplifyDemandedBits(I
->getOperandUse(1), DemandedMask
,
227 RHSKnownZero
, RHSKnownOne
, Depth
+1) ||
228 SimplifyDemandedBits(I
->getOperandUse(0), DemandedMask
& ~RHSKnownZero
,
229 LHSKnownZero
, LHSKnownOne
, Depth
+1))
231 assert(!(RHSKnownZero
& RHSKnownOne
) && "Bits known to be one AND zero?");
232 assert(!(LHSKnownZero
& LHSKnownOne
) && "Bits known to be one AND zero?");
234 // If all of the demanded bits are known 1 on one side, return the other.
235 // These bits cannot contribute to the result of the 'and'.
236 if ((DemandedMask
& ~LHSKnownZero
& RHSKnownOne
) ==
237 (DemandedMask
& ~LHSKnownZero
))
238 return I
->getOperand(0);
239 if ((DemandedMask
& ~RHSKnownZero
& LHSKnownOne
) ==
240 (DemandedMask
& ~RHSKnownZero
))
241 return I
->getOperand(1);
243 // If all of the demanded bits in the inputs are known zeros, return zero.
244 if ((DemandedMask
& (RHSKnownZero
|LHSKnownZero
)) == DemandedMask
)
245 return Constant::getNullValue(VTy
);
247 // If the RHS is a constant, see if we can simplify it.
248 if (ShrinkDemandedConstant(I
, 1, DemandedMask
& ~LHSKnownZero
))
251 // Output known-1 bits are only known if set in both the LHS & RHS.
252 KnownOne
= RHSKnownOne
& LHSKnownOne
;
253 // Output known-0 are known to be clear if zero in either the LHS | RHS.
254 KnownZero
= RHSKnownZero
| LHSKnownZero
;
256 case Instruction::Or
:
257 // If either the LHS or the RHS are One, the result is One.
258 if (SimplifyDemandedBits(I
->getOperandUse(1), DemandedMask
,
259 RHSKnownZero
, RHSKnownOne
, Depth
+1) ||
260 SimplifyDemandedBits(I
->getOperandUse(0), DemandedMask
& ~RHSKnownOne
,
261 LHSKnownZero
, LHSKnownOne
, Depth
+1))
263 assert(!(RHSKnownZero
& RHSKnownOne
) && "Bits known to be one AND zero?");
264 assert(!(LHSKnownZero
& LHSKnownOne
) && "Bits known to be one AND zero?");
266 // If all of the demanded bits are known zero on one side, return the other.
267 // These bits cannot contribute to the result of the 'or'.
268 if ((DemandedMask
& ~LHSKnownOne
& RHSKnownZero
) ==
269 (DemandedMask
& ~LHSKnownOne
))
270 return I
->getOperand(0);
271 if ((DemandedMask
& ~RHSKnownOne
& LHSKnownZero
) ==
272 (DemandedMask
& ~RHSKnownOne
))
273 return I
->getOperand(1);
275 // If all of the potentially set bits on one side are known to be set on
276 // the other side, just use the 'other' side.
277 if ((DemandedMask
& (~RHSKnownZero
) & LHSKnownOne
) ==
278 (DemandedMask
& (~RHSKnownZero
)))
279 return I
->getOperand(0);
280 if ((DemandedMask
& (~LHSKnownZero
) & RHSKnownOne
) ==
281 (DemandedMask
& (~LHSKnownZero
)))
282 return I
->getOperand(1);
284 // If the RHS is a constant, see if we can simplify it.
285 if (ShrinkDemandedConstant(I
, 1, DemandedMask
))
288 // Output known-0 bits are only known if clear in both the LHS & RHS.
289 KnownZero
= RHSKnownZero
& LHSKnownZero
;
290 // Output known-1 are known to be set if set in either the LHS | RHS.
291 KnownOne
= RHSKnownOne
| LHSKnownOne
;
293 case Instruction::Xor
: {
294 if (SimplifyDemandedBits(I
->getOperandUse(1), DemandedMask
,
295 RHSKnownZero
, RHSKnownOne
, Depth
+1) ||
296 SimplifyDemandedBits(I
->getOperandUse(0), DemandedMask
,
297 LHSKnownZero
, LHSKnownOne
, Depth
+1))
299 assert(!(RHSKnownZero
& RHSKnownOne
) && "Bits known to be one AND zero?");
300 assert(!(LHSKnownZero
& LHSKnownOne
) && "Bits known to be one AND zero?");
302 // If all of the demanded bits are known zero on one side, return the other.
303 // These bits cannot contribute to the result of the 'xor'.
304 if ((DemandedMask
& RHSKnownZero
) == DemandedMask
)
305 return I
->getOperand(0);
306 if ((DemandedMask
& LHSKnownZero
) == DemandedMask
)
307 return I
->getOperand(1);
309 // If all of the demanded bits are known to be zero on one side or the
310 // other, turn this into an *inclusive* or.
311 // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
312 if ((DemandedMask
& ~RHSKnownZero
& ~LHSKnownZero
) == 0) {
314 BinaryOperator::CreateOr(I
->getOperand(0), I
->getOperand(1),
316 return InsertNewInstWith(Or
, *I
);
319 // If all of the demanded bits on one side are known, and all of the set
320 // bits on that side are also known to be set on the other side, turn this
321 // into an AND, as we know the bits will be cleared.
322 // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
323 if ((DemandedMask
& (RHSKnownZero
|RHSKnownOne
)) == DemandedMask
) {
325 if ((RHSKnownOne
& LHSKnownOne
) == RHSKnownOne
) {
326 Constant
*AndC
= Constant::getIntegerValue(VTy
,
327 ~RHSKnownOne
& DemandedMask
);
329 BinaryOperator::CreateAnd(I
->getOperand(0), AndC
, "tmp");
330 return InsertNewInstWith(And
, *I
);
334 // If the RHS is a constant, see if we can simplify it.
335 // FIXME: for XOR, we prefer to force bits to 1 if they will make a -1.
336 if (ShrinkDemandedConstant(I
, 1, DemandedMask
))
339 // If our LHS is an 'and' and if it has one use, and if any of the bits we
340 // are flipping are known to be set, then the xor is just resetting those
341 // bits to zero. We can just knock out bits from the 'and' and the 'xor',
342 // simplifying both of them.
343 if (Instruction
*LHSInst
= dyn_cast
<Instruction
>(I
->getOperand(0)))
344 if (LHSInst
->getOpcode() == Instruction::And
&& LHSInst
->hasOneUse() &&
345 isa
<ConstantInt
>(I
->getOperand(1)) &&
346 isa
<ConstantInt
>(LHSInst
->getOperand(1)) &&
347 (LHSKnownOne
& RHSKnownOne
& DemandedMask
) != 0) {
348 ConstantInt
*AndRHS
= cast
<ConstantInt
>(LHSInst
->getOperand(1));
349 ConstantInt
*XorRHS
= cast
<ConstantInt
>(I
->getOperand(1));
350 APInt NewMask
= ~(LHSKnownOne
& RHSKnownOne
& DemandedMask
);
353 ConstantInt::get(I
->getType(), NewMask
& AndRHS
->getValue());
354 Instruction
*NewAnd
=
355 BinaryOperator::CreateAnd(I
->getOperand(0), AndC
, "tmp");
356 InsertNewInstWith(NewAnd
, *I
);
359 ConstantInt::get(I
->getType(), NewMask
& XorRHS
->getValue());
360 Instruction
*NewXor
=
361 BinaryOperator::CreateXor(NewAnd
, XorC
, "tmp");
362 return InsertNewInstWith(NewXor
, *I
);
365 // Output known-0 bits are known if clear or set in both the LHS & RHS.
366 KnownZero
= (RHSKnownZero
& LHSKnownZero
) | (RHSKnownOne
& LHSKnownOne
);
367 // Output known-1 are known to be set if set in only one of the LHS, RHS.
368 KnownOne
= (RHSKnownZero
& LHSKnownOne
) | (RHSKnownOne
& LHSKnownZero
);
371 case Instruction::Select
:
372 if (SimplifyDemandedBits(I
->getOperandUse(2), DemandedMask
,
373 RHSKnownZero
, RHSKnownOne
, Depth
+1) ||
374 SimplifyDemandedBits(I
->getOperandUse(1), DemandedMask
,
375 LHSKnownZero
, LHSKnownOne
, Depth
+1))
377 assert(!(RHSKnownZero
& RHSKnownOne
) && "Bits known to be one AND zero?");
378 assert(!(LHSKnownZero
& LHSKnownOne
) && "Bits known to be one AND zero?");
380 // If the operands are constants, see if we can simplify them.
381 if (ShrinkDemandedConstant(I
, 1, DemandedMask
) ||
382 ShrinkDemandedConstant(I
, 2, DemandedMask
))
385 // Only known if known in both the LHS and RHS.
386 KnownOne
= RHSKnownOne
& LHSKnownOne
;
387 KnownZero
= RHSKnownZero
& LHSKnownZero
;
389 case Instruction::Trunc
: {
390 unsigned truncBf
= I
->getOperand(0)->getType()->getScalarSizeInBits();
391 DemandedMask
= DemandedMask
.zext(truncBf
);
392 KnownZero
= KnownZero
.zext(truncBf
);
393 KnownOne
= KnownOne
.zext(truncBf
);
394 if (SimplifyDemandedBits(I
->getOperandUse(0), DemandedMask
,
395 KnownZero
, KnownOne
, Depth
+1))
397 DemandedMask
= DemandedMask
.trunc(BitWidth
);
398 KnownZero
= KnownZero
.trunc(BitWidth
);
399 KnownOne
= KnownOne
.trunc(BitWidth
);
400 assert(!(KnownZero
& KnownOne
) && "Bits known to be one AND zero?");
403 case Instruction::BitCast
:
404 if (!I
->getOperand(0)->getType()->isIntOrIntVectorTy())
405 return 0; // vector->int or fp->int?
407 if (const VectorType
*DstVTy
= dyn_cast
<VectorType
>(I
->getType())) {
408 if (const VectorType
*SrcVTy
=
409 dyn_cast
<VectorType
>(I
->getOperand(0)->getType())) {
410 if (DstVTy
->getNumElements() != SrcVTy
->getNumElements())
411 // Don't touch a bitcast between vectors of different element counts.
414 // Don't touch a scalar-to-vector bitcast.
416 } else if (I
->getOperand(0)->getType()->isVectorTy())
417 // Don't touch a vector-to-scalar bitcast.
420 if (SimplifyDemandedBits(I
->getOperandUse(0), DemandedMask
,
421 KnownZero
, KnownOne
, Depth
+1))
423 assert(!(KnownZero
& KnownOne
) && "Bits known to be one AND zero?");
425 case Instruction::ZExt
: {
426 // Compute the bits in the result that are not present in the input.
427 unsigned SrcBitWidth
=I
->getOperand(0)->getType()->getScalarSizeInBits();
429 DemandedMask
= DemandedMask
.trunc(SrcBitWidth
);
430 KnownZero
= KnownZero
.trunc(SrcBitWidth
);
431 KnownOne
= KnownOne
.trunc(SrcBitWidth
);
432 if (SimplifyDemandedBits(I
->getOperandUse(0), DemandedMask
,
433 KnownZero
, KnownOne
, Depth
+1))
435 DemandedMask
= DemandedMask
.zext(BitWidth
);
436 KnownZero
= KnownZero
.zext(BitWidth
);
437 KnownOne
= KnownOne
.zext(BitWidth
);
438 assert(!(KnownZero
& KnownOne
) && "Bits known to be one AND zero?");
439 // The top bits are known to be zero.
440 KnownZero
|= APInt::getHighBitsSet(BitWidth
, BitWidth
- SrcBitWidth
);
443 case Instruction::SExt
: {
444 // Compute the bits in the result that are not present in the input.
445 unsigned SrcBitWidth
=I
->getOperand(0)->getType()->getScalarSizeInBits();
447 APInt InputDemandedBits
= DemandedMask
&
448 APInt::getLowBitsSet(BitWidth
, SrcBitWidth
);
450 APInt
NewBits(APInt::getHighBitsSet(BitWidth
, BitWidth
- SrcBitWidth
));
451 // If any of the sign extended bits are demanded, we know that the sign
453 if ((NewBits
& DemandedMask
) != 0)
454 InputDemandedBits
.setBit(SrcBitWidth
-1);
456 InputDemandedBits
= InputDemandedBits
.trunc(SrcBitWidth
);
457 KnownZero
= KnownZero
.trunc(SrcBitWidth
);
458 KnownOne
= KnownOne
.trunc(SrcBitWidth
);
459 if (SimplifyDemandedBits(I
->getOperandUse(0), InputDemandedBits
,
460 KnownZero
, KnownOne
, Depth
+1))
462 InputDemandedBits
= InputDemandedBits
.zext(BitWidth
);
463 KnownZero
= KnownZero
.zext(BitWidth
);
464 KnownOne
= KnownOne
.zext(BitWidth
);
465 assert(!(KnownZero
& KnownOne
) && "Bits known to be one AND zero?");
467 // If the sign bit of the input is known set or clear, then we know the
468 // top bits of the result.
470 // If the input sign bit is known zero, or if the NewBits are not demanded
471 // convert this into a zero extension.
472 if (KnownZero
[SrcBitWidth
-1] || (NewBits
& ~DemandedMask
) == NewBits
) {
473 // Convert to ZExt cast
474 CastInst
*NewCast
= new ZExtInst(I
->getOperand(0), VTy
, I
->getName());
475 return InsertNewInstWith(NewCast
, *I
);
476 } else if (KnownOne
[SrcBitWidth
-1]) { // Input sign bit known set
481 case Instruction::Add
: {
482 // Figure out what the input bits are. If the top bits of the and result
483 // are not demanded, then the add doesn't demand them from its input
485 unsigned NLZ
= DemandedMask
.countLeadingZeros();
487 // If there is a constant on the RHS, there are a variety of xformations
489 if (ConstantInt
*RHS
= dyn_cast
<ConstantInt
>(I
->getOperand(1))) {
490 // If null, this should be simplified elsewhere. Some of the xforms here
491 // won't work if the RHS is zero.
495 // If the top bit of the output is demanded, demand everything from the
496 // input. Otherwise, we demand all the input bits except NLZ top bits.
497 APInt
InDemandedBits(APInt::getLowBitsSet(BitWidth
, BitWidth
- NLZ
));
499 // Find information about known zero/one bits in the input.
500 if (SimplifyDemandedBits(I
->getOperandUse(0), InDemandedBits
,
501 LHSKnownZero
, LHSKnownOne
, Depth
+1))
504 // If the RHS of the add has bits set that can't affect the input, reduce
506 if (ShrinkDemandedConstant(I
, 1, InDemandedBits
))
509 // Avoid excess work.
510 if (LHSKnownZero
== 0 && LHSKnownOne
== 0)
513 // Turn it into OR if input bits are zero.
514 if ((LHSKnownZero
& RHS
->getValue()) == RHS
->getValue()) {
516 BinaryOperator::CreateOr(I
->getOperand(0), I
->getOperand(1),
518 return InsertNewInstWith(Or
, *I
);
521 // We can say something about the output known-zero and known-one bits,
522 // depending on potential carries from the input constant and the
523 // unknowns. For example if the LHS is known to have at most the 0x0F0F0
524 // bits set and the RHS constant is 0x01001, then we know we have a known
525 // one mask of 0x00001 and a known zero mask of 0xE0F0E.
527 // To compute this, we first compute the potential carry bits. These are
528 // the bits which may be modified. I'm not aware of a better way to do
530 const APInt
&RHSVal
= RHS
->getValue();
531 APInt
CarryBits((~LHSKnownZero
+ RHSVal
) ^ (~LHSKnownZero
^ RHSVal
));
533 // Now that we know which bits have carries, compute the known-1/0 sets.
535 // Bits are known one if they are known zero in one operand and one in the
536 // other, and there is no input carry.
537 KnownOne
= ((LHSKnownZero
& RHSVal
) |
538 (LHSKnownOne
& ~RHSVal
)) & ~CarryBits
;
540 // Bits are known zero if they are known zero in both operands and there
541 // is no input carry.
542 KnownZero
= LHSKnownZero
& ~RHSVal
& ~CarryBits
;
544 // If the high-bits of this ADD are not demanded, then it does not demand
545 // the high bits of its LHS or RHS.
546 if (DemandedMask
[BitWidth
-1] == 0) {
547 // Right fill the mask of bits for this ADD to demand the most
548 // significant bit and all those below it.
549 APInt
DemandedFromOps(APInt::getLowBitsSet(BitWidth
, BitWidth
-NLZ
));
550 if (SimplifyDemandedBits(I
->getOperandUse(0), DemandedFromOps
,
551 LHSKnownZero
, LHSKnownOne
, Depth
+1) ||
552 SimplifyDemandedBits(I
->getOperandUse(1), DemandedFromOps
,
553 LHSKnownZero
, LHSKnownOne
, Depth
+1))
559 case Instruction::Sub
:
560 // If the high-bits of this SUB are not demanded, then it does not demand
561 // the high bits of its LHS or RHS.
562 if (DemandedMask
[BitWidth
-1] == 0) {
563 // Right fill the mask of bits for this SUB to demand the most
564 // significant bit and all those below it.
565 uint32_t NLZ
= DemandedMask
.countLeadingZeros();
566 APInt
DemandedFromOps(APInt::getLowBitsSet(BitWidth
, BitWidth
-NLZ
));
567 if (SimplifyDemandedBits(I
->getOperandUse(0), DemandedFromOps
,
568 LHSKnownZero
, LHSKnownOne
, Depth
+1) ||
569 SimplifyDemandedBits(I
->getOperandUse(1), DemandedFromOps
,
570 LHSKnownZero
, LHSKnownOne
, Depth
+1))
573 // Otherwise just hand the sub off to ComputeMaskedBits to fill in
574 // the known zeros and ones.
575 ComputeMaskedBits(V
, DemandedMask
, KnownZero
, KnownOne
, Depth
);
577 case Instruction::Shl
:
578 if (ConstantInt
*SA
= dyn_cast
<ConstantInt
>(I
->getOperand(1))) {
579 uint64_t ShiftAmt
= SA
->getLimitedValue(BitWidth
-1);
580 APInt
DemandedMaskIn(DemandedMask
.lshr(ShiftAmt
));
582 // If the shift is NUW/NSW, then it does demand the high bits.
583 ShlOperator
*IOp
= cast
<ShlOperator
>(I
);
584 if (IOp
->hasNoSignedWrap())
585 DemandedMaskIn
|= APInt::getHighBitsSet(BitWidth
, ShiftAmt
+1);
586 else if (IOp
->hasNoUnsignedWrap())
587 DemandedMaskIn
|= APInt::getHighBitsSet(BitWidth
, ShiftAmt
);
589 if (SimplifyDemandedBits(I
->getOperandUse(0), DemandedMaskIn
,
590 KnownZero
, KnownOne
, Depth
+1))
592 assert(!(KnownZero
& KnownOne
) && "Bits known to be one AND zero?");
593 KnownZero
<<= ShiftAmt
;
594 KnownOne
<<= ShiftAmt
;
595 // low bits known zero.
597 KnownZero
|= APInt::getLowBitsSet(BitWidth
, ShiftAmt
);
600 case Instruction::LShr
:
601 // For a logical shift right
602 if (ConstantInt
*SA
= dyn_cast
<ConstantInt
>(I
->getOperand(1))) {
603 uint64_t ShiftAmt
= SA
->getLimitedValue(BitWidth
-1);
605 // Unsigned shift right.
606 APInt
DemandedMaskIn(DemandedMask
.shl(ShiftAmt
));
608 // If the shift is exact, then it does demand the low bits (and knows that
610 if (cast
<LShrOperator
>(I
)->isExact())
611 DemandedMaskIn
|= APInt::getLowBitsSet(BitWidth
, ShiftAmt
);
613 if (SimplifyDemandedBits(I
->getOperandUse(0), DemandedMaskIn
,
614 KnownZero
, KnownOne
, Depth
+1))
616 assert(!(KnownZero
& KnownOne
) && "Bits known to be one AND zero?");
617 KnownZero
= APIntOps::lshr(KnownZero
, ShiftAmt
);
618 KnownOne
= APIntOps::lshr(KnownOne
, ShiftAmt
);
620 // Compute the new bits that are at the top now.
621 APInt
HighBits(APInt::getHighBitsSet(BitWidth
, ShiftAmt
));
622 KnownZero
|= HighBits
; // high bits known zero.
626 case Instruction::AShr
:
627 // If this is an arithmetic shift right and only the low-bit is set, we can
628 // always convert this into a logical shr, even if the shift amount is
629 // variable. The low bit of the shift cannot be an input sign bit unless
630 // the shift amount is >= the size of the datatype, which is undefined.
631 if (DemandedMask
== 1) {
632 // Perform the logical shift right.
633 Instruction
*NewVal
= BinaryOperator::CreateLShr(
634 I
->getOperand(0), I
->getOperand(1), I
->getName());
635 return InsertNewInstWith(NewVal
, *I
);
638 // If the sign bit is the only bit demanded by this ashr, then there is no
639 // need to do it, the shift doesn't change the high bit.
640 if (DemandedMask
.isSignBit())
641 return I
->getOperand(0);
643 if (ConstantInt
*SA
= dyn_cast
<ConstantInt
>(I
->getOperand(1))) {
644 uint32_t ShiftAmt
= SA
->getLimitedValue(BitWidth
-1);
646 // Signed shift right.
647 APInt
DemandedMaskIn(DemandedMask
.shl(ShiftAmt
));
648 // If any of the "high bits" are demanded, we should set the sign bit as
650 if (DemandedMask
.countLeadingZeros() <= ShiftAmt
)
651 DemandedMaskIn
.setBit(BitWidth
-1);
653 // If the shift is exact, then it does demand the low bits (and knows that
655 if (cast
<AShrOperator
>(I
)->isExact())
656 DemandedMaskIn
|= APInt::getLowBitsSet(BitWidth
, ShiftAmt
);
658 if (SimplifyDemandedBits(I
->getOperandUse(0), DemandedMaskIn
,
659 KnownZero
, KnownOne
, Depth
+1))
661 assert(!(KnownZero
& KnownOne
) && "Bits known to be one AND zero?");
662 // Compute the new bits that are at the top now.
663 APInt
HighBits(APInt::getHighBitsSet(BitWidth
, ShiftAmt
));
664 KnownZero
= APIntOps::lshr(KnownZero
, ShiftAmt
);
665 KnownOne
= APIntOps::lshr(KnownOne
, ShiftAmt
);
667 // Handle the sign bits.
668 APInt
SignBit(APInt::getSignBit(BitWidth
));
669 // Adjust to where it is now in the mask.
670 SignBit
= APIntOps::lshr(SignBit
, ShiftAmt
);
672 // If the input sign bit is known to be zero, or if none of the top bits
673 // are demanded, turn this into an unsigned shift right.
674 if (BitWidth
<= ShiftAmt
|| KnownZero
[BitWidth
-ShiftAmt
-1] ||
675 (HighBits
& ~DemandedMask
) == HighBits
) {
676 // Perform the logical shift right.
677 Instruction
*NewVal
= BinaryOperator::CreateLShr(
678 I
->getOperand(0), SA
, I
->getName());
679 return InsertNewInstWith(NewVal
, *I
);
680 } else if ((KnownOne
& SignBit
) != 0) { // New bits are known one.
681 KnownOne
|= HighBits
;
685 case Instruction::SRem
:
686 if (ConstantInt
*Rem
= dyn_cast
<ConstantInt
>(I
->getOperand(1))) {
687 // X % -1 demands all the bits because we don't want to introduce
688 // INT_MIN % -1 (== undef) by accident.
689 if (Rem
->isAllOnesValue())
691 APInt RA
= Rem
->getValue().abs();
692 if (RA
.isPowerOf2()) {
693 if (DemandedMask
.ult(RA
)) // srem won't affect demanded bits
694 return I
->getOperand(0);
696 APInt LowBits
= RA
- 1;
697 APInt Mask2
= LowBits
| APInt::getSignBit(BitWidth
);
698 if (SimplifyDemandedBits(I
->getOperandUse(0), Mask2
,
699 LHSKnownZero
, LHSKnownOne
, Depth
+1))
702 // The low bits of LHS are unchanged by the srem.
703 KnownZero
= LHSKnownZero
& LowBits
;
704 KnownOne
= LHSKnownOne
& LowBits
;
706 // If LHS is non-negative or has all low bits zero, then the upper bits
708 if (LHSKnownZero
[BitWidth
-1] || ((LHSKnownZero
& LowBits
) == LowBits
))
709 KnownZero
|= ~LowBits
;
711 // If LHS is negative and not all low bits are zero, then the upper bits
713 if (LHSKnownOne
[BitWidth
-1] && ((LHSKnownOne
& LowBits
) != 0))
714 KnownOne
|= ~LowBits
;
716 assert(!(KnownZero
& KnownOne
) && "Bits known to be one AND zero?");
720 // The sign bit is the LHS's sign bit, except when the result of the
721 // remainder is zero.
722 if (DemandedMask
.isNegative() && KnownZero
.isNonNegative()) {
723 APInt Mask2
= APInt::getSignBit(BitWidth
);
724 APInt
LHSKnownZero(BitWidth
, 0), LHSKnownOne(BitWidth
, 0);
725 ComputeMaskedBits(I
->getOperand(0), Mask2
, LHSKnownZero
, LHSKnownOne
,
727 // If it's known zero, our sign bit is also zero.
728 if (LHSKnownZero
.isNegative())
729 KnownZero
|= LHSKnownZero
;
732 case Instruction::URem
: {
733 APInt
KnownZero2(BitWidth
, 0), KnownOne2(BitWidth
, 0);
734 APInt AllOnes
= APInt::getAllOnesValue(BitWidth
);
735 if (SimplifyDemandedBits(I
->getOperandUse(0), AllOnes
,
736 KnownZero2
, KnownOne2
, Depth
+1) ||
737 SimplifyDemandedBits(I
->getOperandUse(1), AllOnes
,
738 KnownZero2
, KnownOne2
, Depth
+1))
741 unsigned Leaders
= KnownZero2
.countLeadingOnes();
742 Leaders
= std::max(Leaders
,
743 KnownZero2
.countLeadingOnes());
744 KnownZero
= APInt::getHighBitsSet(BitWidth
, Leaders
) & DemandedMask
;
747 case Instruction::Call
:
748 if (IntrinsicInst
*II
= dyn_cast
<IntrinsicInst
>(I
)) {
749 switch (II
->getIntrinsicID()) {
751 case Intrinsic::bswap
: {
752 // If the only bits demanded come from one byte of the bswap result,
753 // just shift the input byte into position to eliminate the bswap.
754 unsigned NLZ
= DemandedMask
.countLeadingZeros();
755 unsigned NTZ
= DemandedMask
.countTrailingZeros();
757 // Round NTZ down to the next byte. If we have 11 trailing zeros, then
758 // we need all the bits down to bit 8. Likewise, round NLZ. If we
759 // have 14 leading zeros, round to 8.
762 // If we need exactly one byte, we can do this transformation.
763 if (BitWidth
-NLZ
-NTZ
== 8) {
764 unsigned ResultBit
= NTZ
;
765 unsigned InputBit
= BitWidth
-NTZ
-8;
767 // Replace this with either a left or right shift to get the byte into
770 if (InputBit
> ResultBit
)
771 NewVal
= BinaryOperator::CreateLShr(II
->getArgOperand(0),
772 ConstantInt::get(I
->getType(), InputBit
-ResultBit
));
774 NewVal
= BinaryOperator::CreateShl(II
->getArgOperand(0),
775 ConstantInt::get(I
->getType(), ResultBit
-InputBit
));
777 return InsertNewInstWith(NewVal
, *I
);
780 // TODO: Could compute known zero/one bits based on the input.
783 case Intrinsic::x86_sse42_crc32_64_8
:
784 case Intrinsic::x86_sse42_crc32_64_64
:
785 KnownZero
= APInt::getHighBitsSet(64, 32);
789 ComputeMaskedBits(V
, DemandedMask
, KnownZero
, KnownOne
, Depth
);
793 // If the client is only demanding bits that we know, return the known
795 if ((DemandedMask
& (KnownZero
|KnownOne
)) == DemandedMask
)
796 return Constant::getIntegerValue(VTy
, KnownOne
);
801 /// SimplifyDemandedVectorElts - The specified value produces a vector with
802 /// any number of elements. DemandedElts contains the set of elements that are
803 /// actually used by the caller. This method analyzes which elements of the
804 /// operand are undef and returns that information in UndefElts.
806 /// If the information about demanded elements can be used to simplify the
807 /// operation, the operation is simplified, then the resultant value is
808 /// returned. This returns null if no change was made.
809 Value
*InstCombiner::SimplifyDemandedVectorElts(Value
*V
, APInt DemandedElts
,
812 unsigned VWidth
= cast
<VectorType
>(V
->getType())->getNumElements();
813 APInt
EltMask(APInt::getAllOnesValue(VWidth
));
814 assert((DemandedElts
& ~EltMask
) == 0 && "Invalid DemandedElts!");
816 if (isa
<UndefValue
>(V
)) {
817 // If the entire vector is undefined, just return this info.
822 if (DemandedElts
== 0) { // If nothing is demanded, provide undef.
824 return UndefValue::get(V
->getType());
828 if (ConstantVector
*CV
= dyn_cast
<ConstantVector
>(V
)) {
829 const Type
*EltTy
= cast
<VectorType
>(V
->getType())->getElementType();
830 Constant
*Undef
= UndefValue::get(EltTy
);
832 std::vector
<Constant
*> Elts
;
833 for (unsigned i
= 0; i
!= VWidth
; ++i
)
834 if (!DemandedElts
[i
]) { // If not demanded, set to undef.
835 Elts
.push_back(Undef
);
837 } else if (isa
<UndefValue
>(CV
->getOperand(i
))) { // Already undef.
838 Elts
.push_back(Undef
);
840 } else { // Otherwise, defined.
841 Elts
.push_back(CV
->getOperand(i
));
844 // If we changed the constant, return it.
845 Constant
*NewCP
= ConstantVector::get(Elts
);
846 return NewCP
!= CV
? NewCP
: 0;
849 if (isa
<ConstantAggregateZero
>(V
)) {
850 // Simplify the CAZ to a ConstantVector where the non-demanded elements are
853 // Check if this is identity. If so, return 0 since we are not simplifying
855 if (DemandedElts
.isAllOnesValue())
858 const Type
*EltTy
= cast
<VectorType
>(V
->getType())->getElementType();
859 Constant
*Zero
= Constant::getNullValue(EltTy
);
860 Constant
*Undef
= UndefValue::get(EltTy
);
861 std::vector
<Constant
*> Elts
;
862 for (unsigned i
= 0; i
!= VWidth
; ++i
) {
863 Constant
*Elt
= DemandedElts
[i
] ? Zero
: Undef
;
866 UndefElts
= DemandedElts
^ EltMask
;
867 return ConstantVector::get(Elts
);
870 // Limit search depth.
874 // If multiple users are using the root value, proceed with
875 // simplification conservatively assuming that all elements
877 if (!V
->hasOneUse()) {
878 // Quit if we find multiple users of a non-root value though.
879 // They'll be handled when it's their turn to be visited by
880 // the main instcombine process.
882 // TODO: Just compute the UndefElts information recursively.
885 // Conservatively assume that all elements are needed.
886 DemandedElts
= EltMask
;
889 Instruction
*I
= dyn_cast
<Instruction
>(V
);
890 if (!I
) return 0; // Only analyze instructions.
892 bool MadeChange
= false;
893 APInt
UndefElts2(VWidth
, 0);
895 switch (I
->getOpcode()) {
898 case Instruction::InsertElement
: {
899 // If this is a variable index, we don't know which element it overwrites.
900 // demand exactly the same input as we produce.
901 ConstantInt
*Idx
= dyn_cast
<ConstantInt
>(I
->getOperand(2));
903 // Note that we can't propagate undef elt info, because we don't know
904 // which elt is getting updated.
905 TmpV
= SimplifyDemandedVectorElts(I
->getOperand(0), DemandedElts
,
906 UndefElts2
, Depth
+1);
907 if (TmpV
) { I
->setOperand(0, TmpV
); MadeChange
= true; }
911 // If this is inserting an element that isn't demanded, remove this
913 unsigned IdxNo
= Idx
->getZExtValue();
914 if (IdxNo
>= VWidth
|| !DemandedElts
[IdxNo
]) {
916 return I
->getOperand(0);
919 // Otherwise, the element inserted overwrites whatever was there, so the
920 // input demanded set is simpler than the output set.
921 APInt DemandedElts2
= DemandedElts
;
922 DemandedElts2
.clearBit(IdxNo
);
923 TmpV
= SimplifyDemandedVectorElts(I
->getOperand(0), DemandedElts2
,
925 if (TmpV
) { I
->setOperand(0, TmpV
); MadeChange
= true; }
927 // The inserted element is defined.
928 UndefElts
.clearBit(IdxNo
);
931 case Instruction::ShuffleVector
: {
932 ShuffleVectorInst
*Shuffle
= cast
<ShuffleVectorInst
>(I
);
934 cast
<VectorType
>(Shuffle
->getOperand(0)->getType())->getNumElements();
935 APInt
LeftDemanded(LHSVWidth
, 0), RightDemanded(LHSVWidth
, 0);
936 for (unsigned i
= 0; i
< VWidth
; i
++) {
937 if (DemandedElts
[i
]) {
938 unsigned MaskVal
= Shuffle
->getMaskValue(i
);
939 if (MaskVal
!= -1u) {
940 assert(MaskVal
< LHSVWidth
* 2 &&
941 "shufflevector mask index out of range!");
942 if (MaskVal
< LHSVWidth
)
943 LeftDemanded
.setBit(MaskVal
);
945 RightDemanded
.setBit(MaskVal
- LHSVWidth
);
950 APInt
UndefElts4(LHSVWidth
, 0);
951 TmpV
= SimplifyDemandedVectorElts(I
->getOperand(0), LeftDemanded
,
952 UndefElts4
, Depth
+1);
953 if (TmpV
) { I
->setOperand(0, TmpV
); MadeChange
= true; }
955 APInt
UndefElts3(LHSVWidth
, 0);
956 TmpV
= SimplifyDemandedVectorElts(I
->getOperand(1), RightDemanded
,
957 UndefElts3
, Depth
+1);
958 if (TmpV
) { I
->setOperand(1, TmpV
); MadeChange
= true; }
960 bool NewUndefElts
= false;
961 for (unsigned i
= 0; i
< VWidth
; i
++) {
962 unsigned MaskVal
= Shuffle
->getMaskValue(i
);
963 if (MaskVal
== -1u) {
965 } else if (MaskVal
< LHSVWidth
) {
966 if (UndefElts4
[MaskVal
]) {
971 if (UndefElts3
[MaskVal
- LHSVWidth
]) {
979 // Add additional discovered undefs.
980 std::vector
<Constant
*> Elts
;
981 for (unsigned i
= 0; i
< VWidth
; ++i
) {
983 Elts
.push_back(UndefValue::get(Type::getInt32Ty(I
->getContext())));
985 Elts
.push_back(ConstantInt::get(Type::getInt32Ty(I
->getContext()),
986 Shuffle
->getMaskValue(i
)));
988 I
->setOperand(2, ConstantVector::get(Elts
));
993 case Instruction::BitCast
: {
994 // Vector->vector casts only.
995 const VectorType
*VTy
= dyn_cast
<VectorType
>(I
->getOperand(0)->getType());
997 unsigned InVWidth
= VTy
->getNumElements();
998 APInt
InputDemandedElts(InVWidth
, 0);
1001 if (VWidth
== InVWidth
) {
1002 // If we are converting from <4 x i32> -> <4 x f32>, we demand the same
1003 // elements as are demanded of us.
1005 InputDemandedElts
= DemandedElts
;
1006 } else if (VWidth
> InVWidth
) {
1010 // If there are more elements in the result than there are in the source,
1011 // then an input element is live if any of the corresponding output
1012 // elements are live.
1013 Ratio
= VWidth
/InVWidth
;
1014 for (unsigned OutIdx
= 0; OutIdx
!= VWidth
; ++OutIdx
) {
1015 if (DemandedElts
[OutIdx
])
1016 InputDemandedElts
.setBit(OutIdx
/Ratio
);
1022 // If there are more elements in the source than there are in the result,
1023 // then an input element is live if the corresponding output element is
1025 Ratio
= InVWidth
/VWidth
;
1026 for (unsigned InIdx
= 0; InIdx
!= InVWidth
; ++InIdx
)
1027 if (DemandedElts
[InIdx
/Ratio
])
1028 InputDemandedElts
.setBit(InIdx
);
1031 // div/rem demand all inputs, because they don't want divide by zero.
1032 TmpV
= SimplifyDemandedVectorElts(I
->getOperand(0), InputDemandedElts
,
1033 UndefElts2
, Depth
+1);
1035 I
->setOperand(0, TmpV
);
1039 UndefElts
= UndefElts2
;
1040 if (VWidth
> InVWidth
) {
1041 llvm_unreachable("Unimp");
1042 // If there are more elements in the result than there are in the source,
1043 // then an output element is undef if the corresponding input element is
1045 for (unsigned OutIdx
= 0; OutIdx
!= VWidth
; ++OutIdx
)
1046 if (UndefElts2
[OutIdx
/Ratio
])
1047 UndefElts
.setBit(OutIdx
);
1048 } else if (VWidth
< InVWidth
) {
1049 llvm_unreachable("Unimp");
1050 // If there are more elements in the source than there are in the result,
1051 // then a result element is undef if all of the corresponding input
1052 // elements are undef.
1053 UndefElts
= ~0ULL >> (64-VWidth
); // Start out all undef.
1054 for (unsigned InIdx
= 0; InIdx
!= InVWidth
; ++InIdx
)
1055 if (!UndefElts2
[InIdx
]) // Not undef?
1056 UndefElts
.clearBit(InIdx
/Ratio
); // Clear undef bit.
1060 case Instruction::And
:
1061 case Instruction::Or
:
1062 case Instruction::Xor
:
1063 case Instruction::Add
:
1064 case Instruction::Sub
:
1065 case Instruction::Mul
:
1066 // div/rem demand all inputs, because they don't want divide by zero.
1067 TmpV
= SimplifyDemandedVectorElts(I
->getOperand(0), DemandedElts
,
1068 UndefElts
, Depth
+1);
1069 if (TmpV
) { I
->setOperand(0, TmpV
); MadeChange
= true; }
1070 TmpV
= SimplifyDemandedVectorElts(I
->getOperand(1), DemandedElts
,
1071 UndefElts2
, Depth
+1);
1072 if (TmpV
) { I
->setOperand(1, TmpV
); MadeChange
= true; }
1074 // Output elements are undefined if both are undefined. Consider things
1075 // like undef&0. The result is known zero, not undef.
1076 UndefElts
&= UndefElts2
;
1079 case Instruction::Call
: {
1080 IntrinsicInst
*II
= dyn_cast
<IntrinsicInst
>(I
);
1082 switch (II
->getIntrinsicID()) {
1085 // Binary vector operations that work column-wise. A dest element is a
1086 // function of the corresponding input elements from the two inputs.
1087 case Intrinsic::x86_sse_sub_ss
:
1088 case Intrinsic::x86_sse_mul_ss
:
1089 case Intrinsic::x86_sse_min_ss
:
1090 case Intrinsic::x86_sse_max_ss
:
1091 case Intrinsic::x86_sse2_sub_sd
:
1092 case Intrinsic::x86_sse2_mul_sd
:
1093 case Intrinsic::x86_sse2_min_sd
:
1094 case Intrinsic::x86_sse2_max_sd
:
1095 TmpV
= SimplifyDemandedVectorElts(II
->getArgOperand(0), DemandedElts
,
1096 UndefElts
, Depth
+1);
1097 if (TmpV
) { II
->setArgOperand(0, TmpV
); MadeChange
= true; }
1098 TmpV
= SimplifyDemandedVectorElts(II
->getArgOperand(1), DemandedElts
,
1099 UndefElts2
, Depth
+1);
1100 if (TmpV
) { II
->setArgOperand(1, TmpV
); MadeChange
= true; }
1102 // If only the low elt is demanded and this is a scalarizable intrinsic,
1103 // scalarize it now.
1104 if (DemandedElts
== 1) {
1105 switch (II
->getIntrinsicID()) {
1107 case Intrinsic::x86_sse_sub_ss
:
1108 case Intrinsic::x86_sse_mul_ss
:
1109 case Intrinsic::x86_sse2_sub_sd
:
1110 case Intrinsic::x86_sse2_mul_sd
:
1111 // TODO: Lower MIN/MAX/ABS/etc
1112 Value
*LHS
= II
->getArgOperand(0);
1113 Value
*RHS
= II
->getArgOperand(1);
1114 // Extract the element as scalars.
1115 LHS
= InsertNewInstWith(ExtractElementInst::Create(LHS
,
1116 ConstantInt::get(Type::getInt32Ty(I
->getContext()), 0U)), *II
);
1117 RHS
= InsertNewInstWith(ExtractElementInst::Create(RHS
,
1118 ConstantInt::get(Type::getInt32Ty(I
->getContext()), 0U)), *II
);
1120 switch (II
->getIntrinsicID()) {
1121 default: llvm_unreachable("Case stmts out of sync!");
1122 case Intrinsic::x86_sse_sub_ss
:
1123 case Intrinsic::x86_sse2_sub_sd
:
1124 TmpV
= InsertNewInstWith(BinaryOperator::CreateFSub(LHS
, RHS
,
1125 II
->getName()), *II
);
1127 case Intrinsic::x86_sse_mul_ss
:
1128 case Intrinsic::x86_sse2_mul_sd
:
1129 TmpV
= InsertNewInstWith(BinaryOperator::CreateFMul(LHS
, RHS
,
1130 II
->getName()), *II
);
1135 InsertElementInst::Create(
1136 UndefValue::get(II
->getType()), TmpV
,
1137 ConstantInt::get(Type::getInt32Ty(I
->getContext()), 0U, false),
1139 InsertNewInstWith(New
, *II
);
1144 // Output elements are undefined if both are undefined. Consider things
1145 // like undef&0. The result is known zero, not undef.
1146 UndefElts
&= UndefElts2
;
1152 return MadeChange
? I
: 0;