[ORC] Add std::tuple support to SimplePackedSerialization.
[llvm-project.git] / llvm / lib / Transforms / InstCombine / InstCombineVectorOps.cpp
blob7a4a80d7f440eece0823cf7eee829d8c19c4489b
1 //===- InstCombineVectorOps.cpp -------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements instcombine for ExtractElement, InsertElement and
10 // ShuffleVector.
12 //===----------------------------------------------------------------------===//
14 #include "InstCombineInternal.h"
15 #include "llvm/ADT/APInt.h"
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/DenseMap.h"
18 #include "llvm/ADT/STLExtras.h"
19 #include "llvm/ADT/SmallBitVector.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/ADT/Statistic.h"
22 #include "llvm/Analysis/InstructionSimplify.h"
23 #include "llvm/Analysis/VectorUtils.h"
24 #include "llvm/IR/BasicBlock.h"
25 #include "llvm/IR/Constant.h"
26 #include "llvm/IR/Constants.h"
27 #include "llvm/IR/DerivedTypes.h"
28 #include "llvm/IR/InstrTypes.h"
29 #include "llvm/IR/Instruction.h"
30 #include "llvm/IR/Instructions.h"
31 #include "llvm/IR/Operator.h"
32 #include "llvm/IR/PatternMatch.h"
33 #include "llvm/IR/Type.h"
34 #include "llvm/IR/User.h"
35 #include "llvm/IR/Value.h"
36 #include "llvm/Support/Casting.h"
37 #include "llvm/Support/ErrorHandling.h"
38 #include "llvm/Transforms/InstCombine/InstCombineWorklist.h"
39 #include "llvm/Transforms/InstCombine/InstCombiner.h"
40 #include <cassert>
41 #include <cstdint>
42 #include <iterator>
43 #include <utility>
45 using namespace llvm;
46 using namespace PatternMatch;
48 #define DEBUG_TYPE "instcombine"
50 STATISTIC(NumAggregateReconstructionsSimplified,
51 "Number of aggregate reconstructions turned into reuse of the "
52 "original aggregate");
54 /// Return true if the value is cheaper to scalarize than it is to leave as a
55 /// vector operation. If the extract index \p EI is a constant integer then
56 /// some operations may be cheap to scalarize.
57 ///
58 /// FIXME: It's possible to create more instructions than previously existed.
59 static bool cheapToScalarize(Value *V, Value *EI) {
60 ConstantInt *CEI = dyn_cast<ConstantInt>(EI);
62 // If we can pick a scalar constant value out of a vector, that is free.
63 if (auto *C = dyn_cast<Constant>(V))
64 return CEI || C->getSplatValue();
66 if (CEI && match(V, m_Intrinsic<Intrinsic::experimental_stepvector>())) {
67 ElementCount EC = cast<VectorType>(V->getType())->getElementCount();
68 // Index needs to be lower than the minimum size of the vector, because
69 // for scalable vector, the vector size is known at run time.
70 return CEI->getValue().ult(EC.getKnownMinValue());
73 // An insertelement to the same constant index as our extract will simplify
74 // to the scalar inserted element. An insertelement to a different constant
75 // index is irrelevant to our extract.
76 if (match(V, m_InsertElt(m_Value(), m_Value(), m_ConstantInt())))
77 return CEI;
79 if (match(V, m_OneUse(m_Load(m_Value()))))
80 return true;
82 if (match(V, m_OneUse(m_UnOp())))
83 return true;
85 Value *V0, *V1;
86 if (match(V, m_OneUse(m_BinOp(m_Value(V0), m_Value(V1)))))
87 if (cheapToScalarize(V0, EI) || cheapToScalarize(V1, EI))
88 return true;
90 CmpInst::Predicate UnusedPred;
91 if (match(V, m_OneUse(m_Cmp(UnusedPred, m_Value(V0), m_Value(V1)))))
92 if (cheapToScalarize(V0, EI) || cheapToScalarize(V1, EI))
93 return true;
95 return false;
98 // If we have a PHI node with a vector type that is only used to feed
99 // itself and be an operand of extractelement at a constant location,
100 // try to replace the PHI of the vector type with a PHI of a scalar type.
101 Instruction *InstCombinerImpl::scalarizePHI(ExtractElementInst &EI,
102 PHINode *PN) {
103 SmallVector<Instruction *, 2> Extracts;
104 // The users we want the PHI to have are:
105 // 1) The EI ExtractElement (we already know this)
106 // 2) Possibly more ExtractElements with the same index.
107 // 3) Another operand, which will feed back into the PHI.
108 Instruction *PHIUser = nullptr;
109 for (auto U : PN->users()) {
110 if (ExtractElementInst *EU = dyn_cast<ExtractElementInst>(U)) {
111 if (EI.getIndexOperand() == EU->getIndexOperand())
112 Extracts.push_back(EU);
113 else
114 return nullptr;
115 } else if (!PHIUser) {
116 PHIUser = cast<Instruction>(U);
117 } else {
118 return nullptr;
122 if (!PHIUser)
123 return nullptr;
125 // Verify that this PHI user has one use, which is the PHI itself,
126 // and that it is a binary operation which is cheap to scalarize.
127 // otherwise return nullptr.
128 if (!PHIUser->hasOneUse() || !(PHIUser->user_back() == PN) ||
129 !(isa<BinaryOperator>(PHIUser)) ||
130 !cheapToScalarize(PHIUser, EI.getIndexOperand()))
131 return nullptr;
133 // Create a scalar PHI node that will replace the vector PHI node
134 // just before the current PHI node.
135 PHINode *scalarPHI = cast<PHINode>(InsertNewInstWith(
136 PHINode::Create(EI.getType(), PN->getNumIncomingValues(), ""), *PN));
137 // Scalarize each PHI operand.
138 for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
139 Value *PHIInVal = PN->getIncomingValue(i);
140 BasicBlock *inBB = PN->getIncomingBlock(i);
141 Value *Elt = EI.getIndexOperand();
142 // If the operand is the PHI induction variable:
143 if (PHIInVal == PHIUser) {
144 // Scalarize the binary operation. Its first operand is the
145 // scalar PHI, and the second operand is extracted from the other
146 // vector operand.
147 BinaryOperator *B0 = cast<BinaryOperator>(PHIUser);
148 unsigned opId = (B0->getOperand(0) == PN) ? 1 : 0;
149 Value *Op = InsertNewInstWith(
150 ExtractElementInst::Create(B0->getOperand(opId), Elt,
151 B0->getOperand(opId)->getName() + ".Elt"),
152 *B0);
153 Value *newPHIUser = InsertNewInstWith(
154 BinaryOperator::CreateWithCopiedFlags(B0->getOpcode(),
155 scalarPHI, Op, B0), *B0);
156 scalarPHI->addIncoming(newPHIUser, inBB);
157 } else {
158 // Scalarize PHI input:
159 Instruction *newEI = ExtractElementInst::Create(PHIInVal, Elt, "");
160 // Insert the new instruction into the predecessor basic block.
161 Instruction *pos = dyn_cast<Instruction>(PHIInVal);
162 BasicBlock::iterator InsertPos;
163 if (pos && !isa<PHINode>(pos)) {
164 InsertPos = ++pos->getIterator();
165 } else {
166 InsertPos = inBB->getFirstInsertionPt();
169 InsertNewInstWith(newEI, *InsertPos);
171 scalarPHI->addIncoming(newEI, inBB);
175 for (auto E : Extracts)
176 replaceInstUsesWith(*E, scalarPHI);
178 return &EI;
181 static Instruction *foldBitcastExtElt(ExtractElementInst &Ext,
182 InstCombiner::BuilderTy &Builder,
183 bool IsBigEndian) {
184 Value *X;
185 uint64_t ExtIndexC;
186 if (!match(Ext.getVectorOperand(), m_BitCast(m_Value(X))) ||
187 !X->getType()->isVectorTy() ||
188 !match(Ext.getIndexOperand(), m_ConstantInt(ExtIndexC)))
189 return nullptr;
191 // If this extractelement is using a bitcast from a vector of the same number
192 // of elements, see if we can find the source element from the source vector:
193 // extelt (bitcast VecX), IndexC --> bitcast X[IndexC]
194 auto *SrcTy = cast<VectorType>(X->getType());
195 Type *DestTy = Ext.getType();
196 ElementCount NumSrcElts = SrcTy->getElementCount();
197 ElementCount NumElts =
198 cast<VectorType>(Ext.getVectorOperandType())->getElementCount();
199 if (NumSrcElts == NumElts)
200 if (Value *Elt = findScalarElement(X, ExtIndexC))
201 return new BitCastInst(Elt, DestTy);
203 assert(NumSrcElts.isScalable() == NumElts.isScalable() &&
204 "Src and Dst must be the same sort of vector type");
206 // If the source elements are wider than the destination, try to shift and
207 // truncate a subset of scalar bits of an insert op.
208 if (NumSrcElts.getKnownMinValue() < NumElts.getKnownMinValue()) {
209 Value *Scalar;
210 uint64_t InsIndexC;
211 if (!match(X, m_InsertElt(m_Value(), m_Value(Scalar),
212 m_ConstantInt(InsIndexC))))
213 return nullptr;
215 // The extract must be from the subset of vector elements that we inserted
216 // into. Example: if we inserted element 1 of a <2 x i64> and we are
217 // extracting an i16 (narrowing ratio = 4), then this extract must be from 1
218 // of elements 4-7 of the bitcasted vector.
219 unsigned NarrowingRatio =
220 NumElts.getKnownMinValue() / NumSrcElts.getKnownMinValue();
221 if (ExtIndexC / NarrowingRatio != InsIndexC)
222 return nullptr;
224 // We are extracting part of the original scalar. How that scalar is
225 // inserted into the vector depends on the endian-ness. Example:
226 // Vector Byte Elt Index: 0 1 2 3 4 5 6 7
227 // +--+--+--+--+--+--+--+--+
228 // inselt <2 x i32> V, <i32> S, 1: |V0|V1|V2|V3|S0|S1|S2|S3|
229 // extelt <4 x i16> V', 3: | |S2|S3|
230 // +--+--+--+--+--+--+--+--+
231 // If this is little-endian, S2|S3 are the MSB of the 32-bit 'S' value.
232 // If this is big-endian, S2|S3 are the LSB of the 32-bit 'S' value.
233 // In this example, we must right-shift little-endian. Big-endian is just a
234 // truncate.
235 unsigned Chunk = ExtIndexC % NarrowingRatio;
236 if (IsBigEndian)
237 Chunk = NarrowingRatio - 1 - Chunk;
239 // Bail out if this is an FP vector to FP vector sequence. That would take
240 // more instructions than we started with unless there is no shift, and it
241 // may not be handled as well in the backend.
242 bool NeedSrcBitcast = SrcTy->getScalarType()->isFloatingPointTy();
243 bool NeedDestBitcast = DestTy->isFloatingPointTy();
244 if (NeedSrcBitcast && NeedDestBitcast)
245 return nullptr;
247 unsigned SrcWidth = SrcTy->getScalarSizeInBits();
248 unsigned DestWidth = DestTy->getPrimitiveSizeInBits();
249 unsigned ShAmt = Chunk * DestWidth;
251 // TODO: This limitation is more strict than necessary. We could sum the
252 // number of new instructions and subtract the number eliminated to know if
253 // we can proceed.
254 if (!X->hasOneUse() || !Ext.getVectorOperand()->hasOneUse())
255 if (NeedSrcBitcast || NeedDestBitcast)
256 return nullptr;
258 if (NeedSrcBitcast) {
259 Type *SrcIntTy = IntegerType::getIntNTy(Scalar->getContext(), SrcWidth);
260 Scalar = Builder.CreateBitCast(Scalar, SrcIntTy);
263 if (ShAmt) {
264 // Bail out if we could end with more instructions than we started with.
265 if (!Ext.getVectorOperand()->hasOneUse())
266 return nullptr;
267 Scalar = Builder.CreateLShr(Scalar, ShAmt);
270 if (NeedDestBitcast) {
271 Type *DestIntTy = IntegerType::getIntNTy(Scalar->getContext(), DestWidth);
272 return new BitCastInst(Builder.CreateTrunc(Scalar, DestIntTy), DestTy);
274 return new TruncInst(Scalar, DestTy);
277 return nullptr;
280 /// Find elements of V demanded by UserInstr.
281 static APInt findDemandedEltsBySingleUser(Value *V, Instruction *UserInstr) {
282 unsigned VWidth = cast<FixedVectorType>(V->getType())->getNumElements();
284 // Conservatively assume that all elements are needed.
285 APInt UsedElts(APInt::getAllOnesValue(VWidth));
287 switch (UserInstr->getOpcode()) {
288 case Instruction::ExtractElement: {
289 ExtractElementInst *EEI = cast<ExtractElementInst>(UserInstr);
290 assert(EEI->getVectorOperand() == V);
291 ConstantInt *EEIIndexC = dyn_cast<ConstantInt>(EEI->getIndexOperand());
292 if (EEIIndexC && EEIIndexC->getValue().ult(VWidth)) {
293 UsedElts = APInt::getOneBitSet(VWidth, EEIIndexC->getZExtValue());
295 break;
297 case Instruction::ShuffleVector: {
298 ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(UserInstr);
299 unsigned MaskNumElts =
300 cast<FixedVectorType>(UserInstr->getType())->getNumElements();
302 UsedElts = APInt(VWidth, 0);
303 for (unsigned i = 0; i < MaskNumElts; i++) {
304 unsigned MaskVal = Shuffle->getMaskValue(i);
305 if (MaskVal == -1u || MaskVal >= 2 * VWidth)
306 continue;
307 if (Shuffle->getOperand(0) == V && (MaskVal < VWidth))
308 UsedElts.setBit(MaskVal);
309 if (Shuffle->getOperand(1) == V &&
310 ((MaskVal >= VWidth) && (MaskVal < 2 * VWidth)))
311 UsedElts.setBit(MaskVal - VWidth);
313 break;
315 default:
316 break;
318 return UsedElts;
321 /// Find union of elements of V demanded by all its users.
322 /// If it is known by querying findDemandedEltsBySingleUser that
323 /// no user demands an element of V, then the corresponding bit
324 /// remains unset in the returned value.
325 static APInt findDemandedEltsByAllUsers(Value *V) {
326 unsigned VWidth = cast<FixedVectorType>(V->getType())->getNumElements();
328 APInt UnionUsedElts(VWidth, 0);
329 for (const Use &U : V->uses()) {
330 if (Instruction *I = dyn_cast<Instruction>(U.getUser())) {
331 UnionUsedElts |= findDemandedEltsBySingleUser(V, I);
332 } else {
333 UnionUsedElts = APInt::getAllOnesValue(VWidth);
334 break;
337 if (UnionUsedElts.isAllOnesValue())
338 break;
341 return UnionUsedElts;
344 Instruction *InstCombinerImpl::visitExtractElementInst(ExtractElementInst &EI) {
345 Value *SrcVec = EI.getVectorOperand();
346 Value *Index = EI.getIndexOperand();
347 if (Value *V = SimplifyExtractElementInst(SrcVec, Index,
348 SQ.getWithInstruction(&EI)))
349 return replaceInstUsesWith(EI, V);
351 // If extracting a specified index from the vector, see if we can recursively
352 // find a previously computed scalar that was inserted into the vector.
353 auto *IndexC = dyn_cast<ConstantInt>(Index);
354 if (IndexC) {
355 ElementCount EC = EI.getVectorOperandType()->getElementCount();
356 unsigned NumElts = EC.getKnownMinValue();
358 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(SrcVec)) {
359 Intrinsic::ID IID = II->getIntrinsicID();
360 // Index needs to be lower than the minimum size of the vector, because
361 // for scalable vector, the vector size is known at run time.
362 if (IID == Intrinsic::experimental_stepvector &&
363 IndexC->getValue().ult(NumElts)) {
364 Type *Ty = EI.getType();
365 unsigned BitWidth = Ty->getIntegerBitWidth();
366 Value *Idx;
367 // Return index when its value does not exceed the allowed limit
368 // for the element type of the vector, otherwise return undefined.
369 if (IndexC->getValue().getActiveBits() <= BitWidth)
370 Idx = ConstantInt::get(Ty, IndexC->getValue().zextOrTrunc(BitWidth));
371 else
372 Idx = UndefValue::get(Ty);
373 return replaceInstUsesWith(EI, Idx);
377 // InstSimplify should handle cases where the index is invalid.
378 // For fixed-length vector, it's invalid to extract out-of-range element.
379 if (!EC.isScalable() && IndexC->getValue().uge(NumElts))
380 return nullptr;
382 // This instruction only demands the single element from the input vector.
383 // Skip for scalable type, the number of elements is unknown at
384 // compile-time.
385 if (!EC.isScalable() && NumElts != 1) {
386 // If the input vector has a single use, simplify it based on this use
387 // property.
388 if (SrcVec->hasOneUse()) {
389 APInt UndefElts(NumElts, 0);
390 APInt DemandedElts(NumElts, 0);
391 DemandedElts.setBit(IndexC->getZExtValue());
392 if (Value *V =
393 SimplifyDemandedVectorElts(SrcVec, DemandedElts, UndefElts))
394 return replaceOperand(EI, 0, V);
395 } else {
396 // If the input vector has multiple uses, simplify it based on a union
397 // of all elements used.
398 APInt DemandedElts = findDemandedEltsByAllUsers(SrcVec);
399 if (!DemandedElts.isAllOnesValue()) {
400 APInt UndefElts(NumElts, 0);
401 if (Value *V = SimplifyDemandedVectorElts(
402 SrcVec, DemandedElts, UndefElts, 0 /* Depth */,
403 true /* AllowMultipleUsers */)) {
404 if (V != SrcVec) {
405 SrcVec->replaceAllUsesWith(V);
406 return &EI;
413 if (Instruction *I = foldBitcastExtElt(EI, Builder, DL.isBigEndian()))
414 return I;
416 // If there's a vector PHI feeding a scalar use through this extractelement
417 // instruction, try to scalarize the PHI.
418 if (auto *Phi = dyn_cast<PHINode>(SrcVec))
419 if (Instruction *ScalarPHI = scalarizePHI(EI, Phi))
420 return ScalarPHI;
423 // TODO come up with a n-ary matcher that subsumes both unary and
424 // binary matchers.
425 UnaryOperator *UO;
426 if (match(SrcVec, m_UnOp(UO)) && cheapToScalarize(SrcVec, Index)) {
427 // extelt (unop X), Index --> unop (extelt X, Index)
428 Value *X = UO->getOperand(0);
429 Value *E = Builder.CreateExtractElement(X, Index);
430 return UnaryOperator::CreateWithCopiedFlags(UO->getOpcode(), E, UO);
433 BinaryOperator *BO;
434 if (match(SrcVec, m_BinOp(BO)) && cheapToScalarize(SrcVec, Index)) {
435 // extelt (binop X, Y), Index --> binop (extelt X, Index), (extelt Y, Index)
436 Value *X = BO->getOperand(0), *Y = BO->getOperand(1);
437 Value *E0 = Builder.CreateExtractElement(X, Index);
438 Value *E1 = Builder.CreateExtractElement(Y, Index);
439 return BinaryOperator::CreateWithCopiedFlags(BO->getOpcode(), E0, E1, BO);
442 Value *X, *Y;
443 CmpInst::Predicate Pred;
444 if (match(SrcVec, m_Cmp(Pred, m_Value(X), m_Value(Y))) &&
445 cheapToScalarize(SrcVec, Index)) {
446 // extelt (cmp X, Y), Index --> cmp (extelt X, Index), (extelt Y, Index)
447 Value *E0 = Builder.CreateExtractElement(X, Index);
448 Value *E1 = Builder.CreateExtractElement(Y, Index);
449 return CmpInst::Create(cast<CmpInst>(SrcVec)->getOpcode(), Pred, E0, E1);
452 if (auto *I = dyn_cast<Instruction>(SrcVec)) {
453 if (auto *IE = dyn_cast<InsertElementInst>(I)) {
454 // Extracting the inserted element?
455 if (IE->getOperand(2) == Index)
456 return replaceInstUsesWith(EI, IE->getOperand(1));
457 // If the inserted and extracted elements are constants, they must not
458 // be the same value, extract from the pre-inserted value instead.
459 if (isa<Constant>(IE->getOperand(2)) && IndexC)
460 return replaceOperand(EI, 0, IE->getOperand(0));
461 } else if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
462 auto *VecType = cast<VectorType>(GEP->getType());
463 ElementCount EC = VecType->getElementCount();
464 uint64_t IdxVal = IndexC ? IndexC->getZExtValue() : 0;
465 if (IndexC && IdxVal < EC.getKnownMinValue() && GEP->hasOneUse()) {
466 // Find out why we have a vector result - these are a few examples:
467 // 1. We have a scalar pointer and a vector of indices, or
468 // 2. We have a vector of pointers and a scalar index, or
469 // 3. We have a vector of pointers and a vector of indices, etc.
470 // Here we only consider combining when there is exactly one vector
471 // operand, since the optimization is less obviously a win due to
472 // needing more than one extractelements.
474 unsigned VectorOps =
475 llvm::count_if(GEP->operands(), [](const Value *V) {
476 return isa<VectorType>(V->getType());
478 if (VectorOps > 1)
479 return nullptr;
480 assert(VectorOps == 1 && "Expected exactly one vector GEP operand!");
482 Value *NewPtr = GEP->getPointerOperand();
483 if (isa<VectorType>(NewPtr->getType()))
484 NewPtr = Builder.CreateExtractElement(NewPtr, IndexC);
486 SmallVector<Value *> NewOps;
487 for (unsigned I = 1; I != GEP->getNumOperands(); ++I) {
488 Value *Op = GEP->getOperand(I);
489 if (isa<VectorType>(Op->getType()))
490 NewOps.push_back(Builder.CreateExtractElement(Op, IndexC));
491 else
492 NewOps.push_back(Op);
495 GetElementPtrInst *NewGEP = GetElementPtrInst::Create(
496 cast<PointerType>(NewPtr->getType())->getElementType(), NewPtr,
497 NewOps);
498 NewGEP->setIsInBounds(GEP->isInBounds());
499 return NewGEP;
501 return nullptr;
502 } else if (auto *SVI = dyn_cast<ShuffleVectorInst>(I)) {
503 // If this is extracting an element from a shufflevector, figure out where
504 // it came from and extract from the appropriate input element instead.
505 // Restrict the following transformation to fixed-length vector.
506 if (isa<FixedVectorType>(SVI->getType()) && isa<ConstantInt>(Index)) {
507 int SrcIdx =
508 SVI->getMaskValue(cast<ConstantInt>(Index)->getZExtValue());
509 Value *Src;
510 unsigned LHSWidth = cast<FixedVectorType>(SVI->getOperand(0)->getType())
511 ->getNumElements();
513 if (SrcIdx < 0)
514 return replaceInstUsesWith(EI, UndefValue::get(EI.getType()));
515 if (SrcIdx < (int)LHSWidth)
516 Src = SVI->getOperand(0);
517 else {
518 SrcIdx -= LHSWidth;
519 Src = SVI->getOperand(1);
521 Type *Int32Ty = Type::getInt32Ty(EI.getContext());
522 return ExtractElementInst::Create(
523 Src, ConstantInt::get(Int32Ty, SrcIdx, false));
525 } else if (auto *CI = dyn_cast<CastInst>(I)) {
526 // Canonicalize extractelement(cast) -> cast(extractelement).
527 // Bitcasts can change the number of vector elements, and they cost
528 // nothing.
529 if (CI->hasOneUse() && (CI->getOpcode() != Instruction::BitCast)) {
530 Value *EE = Builder.CreateExtractElement(CI->getOperand(0), Index);
531 return CastInst::Create(CI->getOpcode(), EE, EI.getType());
535 return nullptr;
538 /// If V is a shuffle of values that ONLY returns elements from either LHS or
539 /// RHS, return the shuffle mask and true. Otherwise, return false.
540 static bool collectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
541 SmallVectorImpl<int> &Mask) {
542 assert(LHS->getType() == RHS->getType() &&
543 "Invalid CollectSingleShuffleElements");
544 unsigned NumElts = cast<FixedVectorType>(V->getType())->getNumElements();
546 if (match(V, m_Undef())) {
547 Mask.assign(NumElts, -1);
548 return true;
551 if (V == LHS) {
552 for (unsigned i = 0; i != NumElts; ++i)
553 Mask.push_back(i);
554 return true;
557 if (V == RHS) {
558 for (unsigned i = 0; i != NumElts; ++i)
559 Mask.push_back(i + NumElts);
560 return true;
563 if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
564 // If this is an insert of an extract from some other vector, include it.
565 Value *VecOp = IEI->getOperand(0);
566 Value *ScalarOp = IEI->getOperand(1);
567 Value *IdxOp = IEI->getOperand(2);
569 if (!isa<ConstantInt>(IdxOp))
570 return false;
571 unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
573 if (isa<UndefValue>(ScalarOp)) { // inserting undef into vector.
574 // We can handle this if the vector we are inserting into is
575 // transitively ok.
576 if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
577 // If so, update the mask to reflect the inserted undef.
578 Mask[InsertedIdx] = -1;
579 return true;
581 } else if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)){
582 if (isa<ConstantInt>(EI->getOperand(1))) {
583 unsigned ExtractedIdx =
584 cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
585 unsigned NumLHSElts =
586 cast<FixedVectorType>(LHS->getType())->getNumElements();
588 // This must be extracting from either LHS or RHS.
589 if (EI->getOperand(0) == LHS || EI->getOperand(0) == RHS) {
590 // We can handle this if the vector we are inserting into is
591 // transitively ok.
592 if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
593 // If so, update the mask to reflect the inserted value.
594 if (EI->getOperand(0) == LHS) {
595 Mask[InsertedIdx % NumElts] = ExtractedIdx;
596 } else {
597 assert(EI->getOperand(0) == RHS);
598 Mask[InsertedIdx % NumElts] = ExtractedIdx + NumLHSElts;
600 return true;
607 return false;
610 /// If we have insertion into a vector that is wider than the vector that we
611 /// are extracting from, try to widen the source vector to allow a single
612 /// shufflevector to replace one or more insert/extract pairs.
613 static void replaceExtractElements(InsertElementInst *InsElt,
614 ExtractElementInst *ExtElt,
615 InstCombinerImpl &IC) {
616 auto *InsVecType = cast<FixedVectorType>(InsElt->getType());
617 auto *ExtVecType = cast<FixedVectorType>(ExtElt->getVectorOperandType());
618 unsigned NumInsElts = InsVecType->getNumElements();
619 unsigned NumExtElts = ExtVecType->getNumElements();
621 // The inserted-to vector must be wider than the extracted-from vector.
622 if (InsVecType->getElementType() != ExtVecType->getElementType() ||
623 NumExtElts >= NumInsElts)
624 return;
626 // Create a shuffle mask to widen the extended-from vector using poison
627 // values. The mask selects all of the values of the original vector followed
628 // by as many poison values as needed to create a vector of the same length
629 // as the inserted-to vector.
630 SmallVector<int, 16> ExtendMask;
631 for (unsigned i = 0; i < NumExtElts; ++i)
632 ExtendMask.push_back(i);
633 for (unsigned i = NumExtElts; i < NumInsElts; ++i)
634 ExtendMask.push_back(-1);
636 Value *ExtVecOp = ExtElt->getVectorOperand();
637 auto *ExtVecOpInst = dyn_cast<Instruction>(ExtVecOp);
638 BasicBlock *InsertionBlock = (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst))
639 ? ExtVecOpInst->getParent()
640 : ExtElt->getParent();
642 // TODO: This restriction matches the basic block check below when creating
643 // new extractelement instructions. If that limitation is removed, this one
644 // could also be removed. But for now, we just bail out to ensure that we
645 // will replace the extractelement instruction that is feeding our
646 // insertelement instruction. This allows the insertelement to then be
647 // replaced by a shufflevector. If the insertelement is not replaced, we can
648 // induce infinite looping because there's an optimization for extractelement
649 // that will delete our widening shuffle. This would trigger another attempt
650 // here to create that shuffle, and we spin forever.
651 if (InsertionBlock != InsElt->getParent())
652 return;
654 // TODO: This restriction matches the check in visitInsertElementInst() and
655 // prevents an infinite loop caused by not turning the extract/insert pair
656 // into a shuffle. We really should not need either check, but we're lacking
657 // folds for shufflevectors because we're afraid to generate shuffle masks
658 // that the backend can't handle.
659 if (InsElt->hasOneUse() && isa<InsertElementInst>(InsElt->user_back()))
660 return;
662 auto *WideVec =
663 new ShuffleVectorInst(ExtVecOp, PoisonValue::get(ExtVecType), ExtendMask);
665 // Insert the new shuffle after the vector operand of the extract is defined
666 // (as long as it's not a PHI) or at the start of the basic block of the
667 // extract, so any subsequent extracts in the same basic block can use it.
668 // TODO: Insert before the earliest ExtractElementInst that is replaced.
669 if (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst))
670 WideVec->insertAfter(ExtVecOpInst);
671 else
672 IC.InsertNewInstWith(WideVec, *ExtElt->getParent()->getFirstInsertionPt());
674 // Replace extracts from the original narrow vector with extracts from the new
675 // wide vector.
676 for (User *U : ExtVecOp->users()) {
677 ExtractElementInst *OldExt = dyn_cast<ExtractElementInst>(U);
678 if (!OldExt || OldExt->getParent() != WideVec->getParent())
679 continue;
680 auto *NewExt = ExtractElementInst::Create(WideVec, OldExt->getOperand(1));
681 NewExt->insertAfter(OldExt);
682 IC.replaceInstUsesWith(*OldExt, NewExt);
686 /// We are building a shuffle to create V, which is a sequence of insertelement,
687 /// extractelement pairs. If PermittedRHS is set, then we must either use it or
688 /// not rely on the second vector source. Return a std::pair containing the
689 /// left and right vectors of the proposed shuffle (or 0), and set the Mask
690 /// parameter as required.
692 /// Note: we intentionally don't try to fold earlier shuffles since they have
693 /// often been chosen carefully to be efficiently implementable on the target.
694 using ShuffleOps = std::pair<Value *, Value *>;
696 static ShuffleOps collectShuffleElements(Value *V, SmallVectorImpl<int> &Mask,
697 Value *PermittedRHS,
698 InstCombinerImpl &IC) {
699 assert(V->getType()->isVectorTy() && "Invalid shuffle!");
700 unsigned NumElts = cast<FixedVectorType>(V->getType())->getNumElements();
702 if (match(V, m_Undef())) {
703 Mask.assign(NumElts, -1);
704 return std::make_pair(
705 PermittedRHS ? UndefValue::get(PermittedRHS->getType()) : V, nullptr);
708 if (isa<ConstantAggregateZero>(V)) {
709 Mask.assign(NumElts, 0);
710 return std::make_pair(V, nullptr);
713 if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
714 // If this is an insert of an extract from some other vector, include it.
715 Value *VecOp = IEI->getOperand(0);
716 Value *ScalarOp = IEI->getOperand(1);
717 Value *IdxOp = IEI->getOperand(2);
719 if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) {
720 if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp)) {
721 unsigned ExtractedIdx =
722 cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
723 unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
725 // Either the extracted from or inserted into vector must be RHSVec,
726 // otherwise we'd end up with a shuffle of three inputs.
727 if (EI->getOperand(0) == PermittedRHS || PermittedRHS == nullptr) {
728 Value *RHS = EI->getOperand(0);
729 ShuffleOps LR = collectShuffleElements(VecOp, Mask, RHS, IC);
730 assert(LR.second == nullptr || LR.second == RHS);
732 if (LR.first->getType() != RHS->getType()) {
733 // Although we are giving up for now, see if we can create extracts
734 // that match the inserts for another round of combining.
735 replaceExtractElements(IEI, EI, IC);
737 // We tried our best, but we can't find anything compatible with RHS
738 // further up the chain. Return a trivial shuffle.
739 for (unsigned i = 0; i < NumElts; ++i)
740 Mask[i] = i;
741 return std::make_pair(V, nullptr);
744 unsigned NumLHSElts =
745 cast<FixedVectorType>(RHS->getType())->getNumElements();
746 Mask[InsertedIdx % NumElts] = NumLHSElts + ExtractedIdx;
747 return std::make_pair(LR.first, RHS);
750 if (VecOp == PermittedRHS) {
751 // We've gone as far as we can: anything on the other side of the
752 // extractelement will already have been converted into a shuffle.
753 unsigned NumLHSElts =
754 cast<FixedVectorType>(EI->getOperand(0)->getType())
755 ->getNumElements();
756 for (unsigned i = 0; i != NumElts; ++i)
757 Mask.push_back(i == InsertedIdx ? ExtractedIdx : NumLHSElts + i);
758 return std::make_pair(EI->getOperand(0), PermittedRHS);
761 // If this insertelement is a chain that comes from exactly these two
762 // vectors, return the vector and the effective shuffle.
763 if (EI->getOperand(0)->getType() == PermittedRHS->getType() &&
764 collectSingleShuffleElements(IEI, EI->getOperand(0), PermittedRHS,
765 Mask))
766 return std::make_pair(EI->getOperand(0), PermittedRHS);
771 // Otherwise, we can't do anything fancy. Return an identity vector.
772 for (unsigned i = 0; i != NumElts; ++i)
773 Mask.push_back(i);
774 return std::make_pair(V, nullptr);
777 /// Look for chain of insertvalue's that fully define an aggregate, and trace
778 /// back the values inserted, see if they are all were extractvalue'd from
779 /// the same source aggregate from the exact same element indexes.
780 /// If they were, just reuse the source aggregate.
781 /// This potentially deals with PHI indirections.
782 Instruction *InstCombinerImpl::foldAggregateConstructionIntoAggregateReuse(
783 InsertValueInst &OrigIVI) {
784 Type *AggTy = OrigIVI.getType();
785 unsigned NumAggElts;
786 switch (AggTy->getTypeID()) {
787 case Type::StructTyID:
788 NumAggElts = AggTy->getStructNumElements();
789 break;
790 case Type::ArrayTyID:
791 NumAggElts = AggTy->getArrayNumElements();
792 break;
793 default:
794 llvm_unreachable("Unhandled aggregate type?");
797 // Arbitrary aggregate size cut-off. Motivation for limit of 2 is to be able
798 // to handle clang C++ exception struct (which is hardcoded as {i8*, i32}),
799 // FIXME: any interesting patterns to be caught with larger limit?
800 assert(NumAggElts > 0 && "Aggregate should have elements.");
801 if (NumAggElts > 2)
802 return nullptr;
804 static constexpr auto NotFound = None;
805 static constexpr auto FoundMismatch = nullptr;
807 // Try to find a value of each element of an aggregate.
808 // FIXME: deal with more complex, not one-dimensional, aggregate types
809 SmallVector<Optional<Instruction *>, 2> AggElts(NumAggElts, NotFound);
811 // Do we know values for each element of the aggregate?
812 auto KnowAllElts = [&AggElts]() {
813 return all_of(AggElts,
814 [](Optional<Instruction *> Elt) { return Elt != NotFound; });
817 int Depth = 0;
819 // Arbitrary `insertvalue` visitation depth limit. Let's be okay with
820 // every element being overwritten twice, which should never happen.
821 static const int DepthLimit = 2 * NumAggElts;
823 // Recurse up the chain of `insertvalue` aggregate operands until either we've
824 // reconstructed full initializer or can't visit any more `insertvalue`'s.
825 for (InsertValueInst *CurrIVI = &OrigIVI;
826 Depth < DepthLimit && CurrIVI && !KnowAllElts();
827 CurrIVI = dyn_cast<InsertValueInst>(CurrIVI->getAggregateOperand()),
828 ++Depth) {
829 auto *InsertedValue =
830 dyn_cast<Instruction>(CurrIVI->getInsertedValueOperand());
831 if (!InsertedValue)
832 return nullptr; // Inserted value must be produced by an instruction.
834 ArrayRef<unsigned int> Indices = CurrIVI->getIndices();
836 // Don't bother with more than single-level aggregates.
837 if (Indices.size() != 1)
838 return nullptr; // FIXME: deal with more complex aggregates?
840 // Now, we may have already previously recorded the value for this element
841 // of an aggregate. If we did, that means the CurrIVI will later be
842 // overwritten with the already-recorded value. But if not, let's record it!
843 Optional<Instruction *> &Elt = AggElts[Indices.front()];
844 Elt = Elt.getValueOr(InsertedValue);
846 // FIXME: should we handle chain-terminating undef base operand?
849 // Was that sufficient to deduce the full initializer for the aggregate?
850 if (!KnowAllElts())
851 return nullptr; // Give up then.
853 // We now want to find the source[s] of the aggregate elements we've found.
854 // And with "source" we mean the original aggregate[s] from which
855 // the inserted elements were extracted. This may require PHI translation.
857 enum class AggregateDescription {
858 /// When analyzing the value that was inserted into an aggregate, we did
859 /// not manage to find defining `extractvalue` instruction to analyze.
860 NotFound,
861 /// When analyzing the value that was inserted into an aggregate, we did
862 /// manage to find defining `extractvalue` instruction[s], and everything
863 /// matched perfectly - aggregate type, element insertion/extraction index.
864 Found,
865 /// When analyzing the value that was inserted into an aggregate, we did
866 /// manage to find defining `extractvalue` instruction, but there was
867 /// a mismatch: either the source type from which the extraction was didn't
868 /// match the aggregate type into which the insertion was,
869 /// or the extraction/insertion channels mismatched,
870 /// or different elements had different source aggregates.
871 FoundMismatch
873 auto Describe = [](Optional<Value *> SourceAggregate) {
874 if (SourceAggregate == NotFound)
875 return AggregateDescription::NotFound;
876 if (*SourceAggregate == FoundMismatch)
877 return AggregateDescription::FoundMismatch;
878 return AggregateDescription::Found;
881 // Given the value \p Elt that was being inserted into element \p EltIdx of an
882 // aggregate AggTy, see if \p Elt was originally defined by an
883 // appropriate extractvalue (same element index, same aggregate type).
884 // If found, return the source aggregate from which the extraction was.
885 // If \p PredBB is provided, does PHI translation of an \p Elt first.
886 auto FindSourceAggregate =
887 [&](Instruction *Elt, unsigned EltIdx, Optional<BasicBlock *> UseBB,
888 Optional<BasicBlock *> PredBB) -> Optional<Value *> {
889 // For now(?), only deal with, at most, a single level of PHI indirection.
890 if (UseBB && PredBB)
891 Elt = dyn_cast<Instruction>(Elt->DoPHITranslation(*UseBB, *PredBB));
892 // FIXME: deal with multiple levels of PHI indirection?
894 // Did we find an extraction?
895 auto *EVI = dyn_cast_or_null<ExtractValueInst>(Elt);
896 if (!EVI)
897 return NotFound;
899 Value *SourceAggregate = EVI->getAggregateOperand();
901 // Is the extraction from the same type into which the insertion was?
902 if (SourceAggregate->getType() != AggTy)
903 return FoundMismatch;
904 // And the element index doesn't change between extraction and insertion?
905 if (EVI->getNumIndices() != 1 || EltIdx != EVI->getIndices().front())
906 return FoundMismatch;
908 return SourceAggregate; // AggregateDescription::Found
911 // Given elements AggElts that were constructing an aggregate OrigIVI,
912 // see if we can find appropriate source aggregate for each of the elements,
913 // and see it's the same aggregate for each element. If so, return it.
914 auto FindCommonSourceAggregate =
915 [&](Optional<BasicBlock *> UseBB,
916 Optional<BasicBlock *> PredBB) -> Optional<Value *> {
917 Optional<Value *> SourceAggregate;
919 for (auto I : enumerate(AggElts)) {
920 assert(Describe(SourceAggregate) != AggregateDescription::FoundMismatch &&
921 "We don't store nullptr in SourceAggregate!");
922 assert((Describe(SourceAggregate) == AggregateDescription::Found) ==
923 (I.index() != 0) &&
924 "SourceAggregate should be valid after the the first element,");
926 // For this element, is there a plausible source aggregate?
927 // FIXME: we could special-case undef element, IFF we know that in the
928 // source aggregate said element isn't poison.
929 Optional<Value *> SourceAggregateForElement =
930 FindSourceAggregate(*I.value(), I.index(), UseBB, PredBB);
932 // Okay, what have we found? Does that correlate with previous findings?
934 // Regardless of whether or not we have previously found source
935 // aggregate for previous elements (if any), if we didn't find one for
936 // this element, passthrough whatever we have just found.
937 if (Describe(SourceAggregateForElement) != AggregateDescription::Found)
938 return SourceAggregateForElement;
940 // Okay, we have found source aggregate for this element.
941 // Let's see what we already know from previous elements, if any.
942 switch (Describe(SourceAggregate)) {
943 case AggregateDescription::NotFound:
944 // This is apparently the first element that we have examined.
945 SourceAggregate = SourceAggregateForElement; // Record the aggregate!
946 continue; // Great, now look at next element.
947 case AggregateDescription::Found:
948 // We have previously already successfully examined other elements.
949 // Is this the same source aggregate we've found for other elements?
950 if (*SourceAggregateForElement != *SourceAggregate)
951 return FoundMismatch;
952 continue; // Still the same aggregate, look at next element.
953 case AggregateDescription::FoundMismatch:
954 llvm_unreachable("Can't happen. We would have early-exited then.");
958 assert(Describe(SourceAggregate) == AggregateDescription::Found &&
959 "Must be a valid Value");
960 return *SourceAggregate;
963 Optional<Value *> SourceAggregate;
965 // Can we find the source aggregate without looking at predecessors?
966 SourceAggregate = FindCommonSourceAggregate(/*UseBB=*/None, /*PredBB=*/None);
967 if (Describe(SourceAggregate) != AggregateDescription::NotFound) {
968 if (Describe(SourceAggregate) == AggregateDescription::FoundMismatch)
969 return nullptr; // Conflicting source aggregates!
970 ++NumAggregateReconstructionsSimplified;
971 return replaceInstUsesWith(OrigIVI, *SourceAggregate);
974 // Okay, apparently we need to look at predecessors.
976 // We should be smart about picking the "use" basic block, which will be the
977 // merge point for aggregate, where we'll insert the final PHI that will be
978 // used instead of OrigIVI. Basic block of OrigIVI is *not* the right choice.
979 // We should look in which blocks each of the AggElts is being defined,
980 // they all should be defined in the same basic block.
981 BasicBlock *UseBB = nullptr;
983 for (const Optional<Instruction *> &I : AggElts) {
984 BasicBlock *BB = (*I)->getParent();
985 // If it's the first instruction we've encountered, record the basic block.
986 if (!UseBB) {
987 UseBB = BB;
988 continue;
990 // Otherwise, this must be the same basic block we've seen previously.
991 if (UseBB != BB)
992 return nullptr;
995 // If *all* of the elements are basic-block-independent, meaning they are
996 // either function arguments, or constant expressions, then if we didn't
997 // handle them without predecessor-aware handling, we won't handle them now.
998 if (!UseBB)
999 return nullptr;
1001 // If we didn't manage to find source aggregate without looking at
1002 // predecessors, and there are no predecessors to look at, then we're done.
1003 if (pred_empty(UseBB))
1004 return nullptr;
1006 // Arbitrary predecessor count limit.
1007 static const int PredCountLimit = 64;
1009 // Cache the (non-uniqified!) list of predecessors in a vector,
1010 // checking the limit at the same time for efficiency.
1011 SmallVector<BasicBlock *, 4> Preds; // May have duplicates!
1012 for (BasicBlock *Pred : predecessors(UseBB)) {
1013 // Don't bother if there are too many predecessors.
1014 if (Preds.size() >= PredCountLimit) // FIXME: only count duplicates once?
1015 return nullptr;
1016 Preds.emplace_back(Pred);
1019 // For each predecessor, what is the source aggregate,
1020 // from which all the elements were originally extracted from?
1021 // Note that we want for the map to have stable iteration order!
1022 SmallDenseMap<BasicBlock *, Value *, 4> SourceAggregates;
1023 for (BasicBlock *Pred : Preds) {
1024 std::pair<decltype(SourceAggregates)::iterator, bool> IV =
1025 SourceAggregates.insert({Pred, nullptr});
1026 // Did we already evaluate this predecessor?
1027 if (!IV.second)
1028 continue;
1030 // Let's hope that when coming from predecessor Pred, all elements of the
1031 // aggregate produced by OrigIVI must have been originally extracted from
1032 // the same aggregate. Is that so? Can we find said original aggregate?
1033 SourceAggregate = FindCommonSourceAggregate(UseBB, Pred);
1034 if (Describe(SourceAggregate) != AggregateDescription::Found)
1035 return nullptr; // Give up.
1036 IV.first->second = *SourceAggregate;
1039 // All good! Now we just need to thread the source aggregates here.
1040 // Note that we have to insert the new PHI here, ourselves, because we can't
1041 // rely on InstCombinerImpl::run() inserting it into the right basic block.
1042 // Note that the same block can be a predecessor more than once,
1043 // and we need to preserve that invariant for the PHI node.
1044 BuilderTy::InsertPointGuard Guard(Builder);
1045 Builder.SetInsertPoint(UseBB->getFirstNonPHI());
1046 auto *PHI =
1047 Builder.CreatePHI(AggTy, Preds.size(), OrigIVI.getName() + ".merged");
1048 for (BasicBlock *Pred : Preds)
1049 PHI->addIncoming(SourceAggregates[Pred], Pred);
1051 ++NumAggregateReconstructionsSimplified;
1052 return replaceInstUsesWith(OrigIVI, PHI);
1055 /// Try to find redundant insertvalue instructions, like the following ones:
1056 /// %0 = insertvalue { i8, i32 } undef, i8 %x, 0
1057 /// %1 = insertvalue { i8, i32 } %0, i8 %y, 0
1058 /// Here the second instruction inserts values at the same indices, as the
1059 /// first one, making the first one redundant.
1060 /// It should be transformed to:
1061 /// %0 = insertvalue { i8, i32 } undef, i8 %y, 0
1062 Instruction *InstCombinerImpl::visitInsertValueInst(InsertValueInst &I) {
1063 bool IsRedundant = false;
1064 ArrayRef<unsigned int> FirstIndices = I.getIndices();
1066 // If there is a chain of insertvalue instructions (each of them except the
1067 // last one has only one use and it's another insertvalue insn from this
1068 // chain), check if any of the 'children' uses the same indices as the first
1069 // instruction. In this case, the first one is redundant.
1070 Value *V = &I;
1071 unsigned Depth = 0;
1072 while (V->hasOneUse() && Depth < 10) {
1073 User *U = V->user_back();
1074 auto UserInsInst = dyn_cast<InsertValueInst>(U);
1075 if (!UserInsInst || U->getOperand(0) != V)
1076 break;
1077 if (UserInsInst->getIndices() == FirstIndices) {
1078 IsRedundant = true;
1079 break;
1081 V = UserInsInst;
1082 Depth++;
1085 if (IsRedundant)
1086 return replaceInstUsesWith(I, I.getOperand(0));
1088 if (Instruction *NewI = foldAggregateConstructionIntoAggregateReuse(I))
1089 return NewI;
1091 return nullptr;
1094 static bool isShuffleEquivalentToSelect(ShuffleVectorInst &Shuf) {
1095 // Can not analyze scalable type, the number of elements is not a compile-time
1096 // constant.
1097 if (isa<ScalableVectorType>(Shuf.getOperand(0)->getType()))
1098 return false;
1100 int MaskSize = Shuf.getShuffleMask().size();
1101 int VecSize =
1102 cast<FixedVectorType>(Shuf.getOperand(0)->getType())->getNumElements();
1104 // A vector select does not change the size of the operands.
1105 if (MaskSize != VecSize)
1106 return false;
1108 // Each mask element must be undefined or choose a vector element from one of
1109 // the source operands without crossing vector lanes.
1110 for (int i = 0; i != MaskSize; ++i) {
1111 int Elt = Shuf.getMaskValue(i);
1112 if (Elt != -1 && Elt != i && Elt != i + VecSize)
1113 return false;
1116 return true;
1119 /// Turn a chain of inserts that splats a value into an insert + shuffle:
1120 /// insertelt(insertelt(insertelt(insertelt X, %k, 0), %k, 1), %k, 2) ... ->
1121 /// shufflevector(insertelt(X, %k, 0), poison, zero)
1122 static Instruction *foldInsSequenceIntoSplat(InsertElementInst &InsElt) {
1123 // We are interested in the last insert in a chain. So if this insert has a
1124 // single user and that user is an insert, bail.
1125 if (InsElt.hasOneUse() && isa<InsertElementInst>(InsElt.user_back()))
1126 return nullptr;
1128 VectorType *VecTy = InsElt.getType();
1129 // Can not handle scalable type, the number of elements is not a compile-time
1130 // constant.
1131 if (isa<ScalableVectorType>(VecTy))
1132 return nullptr;
1133 unsigned NumElements = cast<FixedVectorType>(VecTy)->getNumElements();
1135 // Do not try to do this for a one-element vector, since that's a nop,
1136 // and will cause an inf-loop.
1137 if (NumElements == 1)
1138 return nullptr;
1140 Value *SplatVal = InsElt.getOperand(1);
1141 InsertElementInst *CurrIE = &InsElt;
1142 SmallBitVector ElementPresent(NumElements, false);
1143 InsertElementInst *FirstIE = nullptr;
1145 // Walk the chain backwards, keeping track of which indices we inserted into,
1146 // until we hit something that isn't an insert of the splatted value.
1147 while (CurrIE) {
1148 auto *Idx = dyn_cast<ConstantInt>(CurrIE->getOperand(2));
1149 if (!Idx || CurrIE->getOperand(1) != SplatVal)
1150 return nullptr;
1152 auto *NextIE = dyn_cast<InsertElementInst>(CurrIE->getOperand(0));
1153 // Check none of the intermediate steps have any additional uses, except
1154 // for the root insertelement instruction, which can be re-used, if it
1155 // inserts at position 0.
1156 if (CurrIE != &InsElt &&
1157 (!CurrIE->hasOneUse() && (NextIE != nullptr || !Idx->isZero())))
1158 return nullptr;
1160 ElementPresent[Idx->getZExtValue()] = true;
1161 FirstIE = CurrIE;
1162 CurrIE = NextIE;
1165 // If this is just a single insertelement (not a sequence), we are done.
1166 if (FirstIE == &InsElt)
1167 return nullptr;
1169 // If we are not inserting into an undef vector, make sure we've seen an
1170 // insert into every element.
1171 // TODO: If the base vector is not undef, it might be better to create a splat
1172 // and then a select-shuffle (blend) with the base vector.
1173 if (!match(FirstIE->getOperand(0), m_Undef()))
1174 if (!ElementPresent.all())
1175 return nullptr;
1177 // Create the insert + shuffle.
1178 Type *Int32Ty = Type::getInt32Ty(InsElt.getContext());
1179 PoisonValue *PoisonVec = PoisonValue::get(VecTy);
1180 Constant *Zero = ConstantInt::get(Int32Ty, 0);
1181 if (!cast<ConstantInt>(FirstIE->getOperand(2))->isZero())
1182 FirstIE = InsertElementInst::Create(PoisonVec, SplatVal, Zero, "", &InsElt);
1184 // Splat from element 0, but replace absent elements with undef in the mask.
1185 SmallVector<int, 16> Mask(NumElements, 0);
1186 for (unsigned i = 0; i != NumElements; ++i)
1187 if (!ElementPresent[i])
1188 Mask[i] = -1;
1190 return new ShuffleVectorInst(FirstIE, PoisonVec, Mask);
1193 /// Try to fold an insert element into an existing splat shuffle by changing
1194 /// the shuffle's mask to include the index of this insert element.
1195 static Instruction *foldInsEltIntoSplat(InsertElementInst &InsElt) {
1196 // Check if the vector operand of this insert is a canonical splat shuffle.
1197 auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0));
1198 if (!Shuf || !Shuf->isZeroEltSplat())
1199 return nullptr;
1201 // Bail out early if shuffle is scalable type. The number of elements in
1202 // shuffle mask is unknown at compile-time.
1203 if (isa<ScalableVectorType>(Shuf->getType()))
1204 return nullptr;
1206 // Check for a constant insertion index.
1207 uint64_t IdxC;
1208 if (!match(InsElt.getOperand(2), m_ConstantInt(IdxC)))
1209 return nullptr;
1211 // Check if the splat shuffle's input is the same as this insert's scalar op.
1212 Value *X = InsElt.getOperand(1);
1213 Value *Op0 = Shuf->getOperand(0);
1214 if (!match(Op0, m_InsertElt(m_Undef(), m_Specific(X), m_ZeroInt())))
1215 return nullptr;
1217 // Replace the shuffle mask element at the index of this insert with a zero.
1218 // For example:
1219 // inselt (shuf (inselt undef, X, 0), undef, <0,undef,0,undef>), X, 1
1220 // --> shuf (inselt undef, X, 0), undef, <0,0,0,undef>
1221 unsigned NumMaskElts =
1222 cast<FixedVectorType>(Shuf->getType())->getNumElements();
1223 SmallVector<int, 16> NewMask(NumMaskElts);
1224 for (unsigned i = 0; i != NumMaskElts; ++i)
1225 NewMask[i] = i == IdxC ? 0 : Shuf->getMaskValue(i);
1227 return new ShuffleVectorInst(Op0, UndefValue::get(Op0->getType()), NewMask);
1230 /// Try to fold an extract+insert element into an existing identity shuffle by
1231 /// changing the shuffle's mask to include the index of this insert element.
1232 static Instruction *foldInsEltIntoIdentityShuffle(InsertElementInst &InsElt) {
1233 // Check if the vector operand of this insert is an identity shuffle.
1234 auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0));
1235 if (!Shuf || !match(Shuf->getOperand(1), m_Undef()) ||
1236 !(Shuf->isIdentityWithExtract() || Shuf->isIdentityWithPadding()))
1237 return nullptr;
1239 // Bail out early if shuffle is scalable type. The number of elements in
1240 // shuffle mask is unknown at compile-time.
1241 if (isa<ScalableVectorType>(Shuf->getType()))
1242 return nullptr;
1244 // Check for a constant insertion index.
1245 uint64_t IdxC;
1246 if (!match(InsElt.getOperand(2), m_ConstantInt(IdxC)))
1247 return nullptr;
1249 // Check if this insert's scalar op is extracted from the identity shuffle's
1250 // input vector.
1251 Value *Scalar = InsElt.getOperand(1);
1252 Value *X = Shuf->getOperand(0);
1253 if (!match(Scalar, m_ExtractElt(m_Specific(X), m_SpecificInt(IdxC))))
1254 return nullptr;
1256 // Replace the shuffle mask element at the index of this extract+insert with
1257 // that same index value.
1258 // For example:
1259 // inselt (shuf X, IdMask), (extelt X, IdxC), IdxC --> shuf X, IdMask'
1260 unsigned NumMaskElts =
1261 cast<FixedVectorType>(Shuf->getType())->getNumElements();
1262 SmallVector<int, 16> NewMask(NumMaskElts);
1263 ArrayRef<int> OldMask = Shuf->getShuffleMask();
1264 for (unsigned i = 0; i != NumMaskElts; ++i) {
1265 if (i != IdxC) {
1266 // All mask elements besides the inserted element remain the same.
1267 NewMask[i] = OldMask[i];
1268 } else if (OldMask[i] == (int)IdxC) {
1269 // If the mask element was already set, there's nothing to do
1270 // (demanded elements analysis may unset it later).
1271 return nullptr;
1272 } else {
1273 assert(OldMask[i] == UndefMaskElem &&
1274 "Unexpected shuffle mask element for identity shuffle");
1275 NewMask[i] = IdxC;
1279 return new ShuffleVectorInst(X, Shuf->getOperand(1), NewMask);
1282 /// If we have an insertelement instruction feeding into another insertelement
1283 /// and the 2nd is inserting a constant into the vector, canonicalize that
1284 /// constant insertion before the insertion of a variable:
1286 /// insertelement (insertelement X, Y, IdxC1), ScalarC, IdxC2 -->
1287 /// insertelement (insertelement X, ScalarC, IdxC2), Y, IdxC1
1289 /// This has the potential of eliminating the 2nd insertelement instruction
1290 /// via constant folding of the scalar constant into a vector constant.
1291 static Instruction *hoistInsEltConst(InsertElementInst &InsElt2,
1292 InstCombiner::BuilderTy &Builder) {
1293 auto *InsElt1 = dyn_cast<InsertElementInst>(InsElt2.getOperand(0));
1294 if (!InsElt1 || !InsElt1->hasOneUse())
1295 return nullptr;
1297 Value *X, *Y;
1298 Constant *ScalarC;
1299 ConstantInt *IdxC1, *IdxC2;
1300 if (match(InsElt1->getOperand(0), m_Value(X)) &&
1301 match(InsElt1->getOperand(1), m_Value(Y)) && !isa<Constant>(Y) &&
1302 match(InsElt1->getOperand(2), m_ConstantInt(IdxC1)) &&
1303 match(InsElt2.getOperand(1), m_Constant(ScalarC)) &&
1304 match(InsElt2.getOperand(2), m_ConstantInt(IdxC2)) && IdxC1 != IdxC2) {
1305 Value *NewInsElt1 = Builder.CreateInsertElement(X, ScalarC, IdxC2);
1306 return InsertElementInst::Create(NewInsElt1, Y, IdxC1);
1309 return nullptr;
1312 /// insertelt (shufflevector X, CVec, Mask|insertelt X, C1, CIndex1), C, CIndex
1313 /// --> shufflevector X, CVec', Mask'
1314 static Instruction *foldConstantInsEltIntoShuffle(InsertElementInst &InsElt) {
1315 auto *Inst = dyn_cast<Instruction>(InsElt.getOperand(0));
1316 // Bail out if the parent has more than one use. In that case, we'd be
1317 // replacing the insertelt with a shuffle, and that's not a clear win.
1318 if (!Inst || !Inst->hasOneUse())
1319 return nullptr;
1320 if (auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0))) {
1321 // The shuffle must have a constant vector operand. The insertelt must have
1322 // a constant scalar being inserted at a constant position in the vector.
1323 Constant *ShufConstVec, *InsEltScalar;
1324 uint64_t InsEltIndex;
1325 if (!match(Shuf->getOperand(1), m_Constant(ShufConstVec)) ||
1326 !match(InsElt.getOperand(1), m_Constant(InsEltScalar)) ||
1327 !match(InsElt.getOperand(2), m_ConstantInt(InsEltIndex)))
1328 return nullptr;
1330 // Adding an element to an arbitrary shuffle could be expensive, but a
1331 // shuffle that selects elements from vectors without crossing lanes is
1332 // assumed cheap.
1333 // If we're just adding a constant into that shuffle, it will still be
1334 // cheap.
1335 if (!isShuffleEquivalentToSelect(*Shuf))
1336 return nullptr;
1338 // From the above 'select' check, we know that the mask has the same number
1339 // of elements as the vector input operands. We also know that each constant
1340 // input element is used in its lane and can not be used more than once by
1341 // the shuffle. Therefore, replace the constant in the shuffle's constant
1342 // vector with the insertelt constant. Replace the constant in the shuffle's
1343 // mask vector with the insertelt index plus the length of the vector
1344 // (because the constant vector operand of a shuffle is always the 2nd
1345 // operand).
1346 ArrayRef<int> Mask = Shuf->getShuffleMask();
1347 unsigned NumElts = Mask.size();
1348 SmallVector<Constant *, 16> NewShufElts(NumElts);
1349 SmallVector<int, 16> NewMaskElts(NumElts);
1350 for (unsigned I = 0; I != NumElts; ++I) {
1351 if (I == InsEltIndex) {
1352 NewShufElts[I] = InsEltScalar;
1353 NewMaskElts[I] = InsEltIndex + NumElts;
1354 } else {
1355 // Copy over the existing values.
1356 NewShufElts[I] = ShufConstVec->getAggregateElement(I);
1357 NewMaskElts[I] = Mask[I];
1361 // Create new operands for a shuffle that includes the constant of the
1362 // original insertelt. The old shuffle will be dead now.
1363 return new ShuffleVectorInst(Shuf->getOperand(0),
1364 ConstantVector::get(NewShufElts), NewMaskElts);
1365 } else if (auto *IEI = dyn_cast<InsertElementInst>(Inst)) {
1366 // Transform sequences of insertelements ops with constant data/indexes into
1367 // a single shuffle op.
1368 // Can not handle scalable type, the number of elements needed to create
1369 // shuffle mask is not a compile-time constant.
1370 if (isa<ScalableVectorType>(InsElt.getType()))
1371 return nullptr;
1372 unsigned NumElts =
1373 cast<FixedVectorType>(InsElt.getType())->getNumElements();
1375 uint64_t InsertIdx[2];
1376 Constant *Val[2];
1377 if (!match(InsElt.getOperand(2), m_ConstantInt(InsertIdx[0])) ||
1378 !match(InsElt.getOperand(1), m_Constant(Val[0])) ||
1379 !match(IEI->getOperand(2), m_ConstantInt(InsertIdx[1])) ||
1380 !match(IEI->getOperand(1), m_Constant(Val[1])))
1381 return nullptr;
1382 SmallVector<Constant *, 16> Values(NumElts);
1383 SmallVector<int, 16> Mask(NumElts);
1384 auto ValI = std::begin(Val);
1385 // Generate new constant vector and mask.
1386 // We have 2 values/masks from the insertelements instructions. Insert them
1387 // into new value/mask vectors.
1388 for (uint64_t I : InsertIdx) {
1389 if (!Values[I]) {
1390 Values[I] = *ValI;
1391 Mask[I] = NumElts + I;
1393 ++ValI;
1395 // Remaining values are filled with 'undef' values.
1396 for (unsigned I = 0; I < NumElts; ++I) {
1397 if (!Values[I]) {
1398 Values[I] = UndefValue::get(InsElt.getType()->getElementType());
1399 Mask[I] = I;
1402 // Create new operands for a shuffle that includes the constant of the
1403 // original insertelt.
1404 return new ShuffleVectorInst(IEI->getOperand(0),
1405 ConstantVector::get(Values), Mask);
1407 return nullptr;
1410 Instruction *InstCombinerImpl::visitInsertElementInst(InsertElementInst &IE) {
1411 Value *VecOp = IE.getOperand(0);
1412 Value *ScalarOp = IE.getOperand(1);
1413 Value *IdxOp = IE.getOperand(2);
1415 if (auto *V = SimplifyInsertElementInst(
1416 VecOp, ScalarOp, IdxOp, SQ.getWithInstruction(&IE)))
1417 return replaceInstUsesWith(IE, V);
1419 // If the scalar is bitcast and inserted into undef, do the insert in the
1420 // source type followed by bitcast.
1421 // TODO: Generalize for insert into any constant, not just undef?
1422 Value *ScalarSrc;
1423 if (match(VecOp, m_Undef()) &&
1424 match(ScalarOp, m_OneUse(m_BitCast(m_Value(ScalarSrc)))) &&
1425 (ScalarSrc->getType()->isIntegerTy() ||
1426 ScalarSrc->getType()->isFloatingPointTy())) {
1427 // inselt undef, (bitcast ScalarSrc), IdxOp -->
1428 // bitcast (inselt undef, ScalarSrc, IdxOp)
1429 Type *ScalarTy = ScalarSrc->getType();
1430 Type *VecTy = VectorType::get(ScalarTy, IE.getType()->getElementCount());
1431 UndefValue *NewUndef = UndefValue::get(VecTy);
1432 Value *NewInsElt = Builder.CreateInsertElement(NewUndef, ScalarSrc, IdxOp);
1433 return new BitCastInst(NewInsElt, IE.getType());
1436 // If the vector and scalar are both bitcast from the same element type, do
1437 // the insert in that source type followed by bitcast.
1438 Value *VecSrc;
1439 if (match(VecOp, m_BitCast(m_Value(VecSrc))) &&
1440 match(ScalarOp, m_BitCast(m_Value(ScalarSrc))) &&
1441 (VecOp->hasOneUse() || ScalarOp->hasOneUse()) &&
1442 VecSrc->getType()->isVectorTy() && !ScalarSrc->getType()->isVectorTy() &&
1443 cast<VectorType>(VecSrc->getType())->getElementType() ==
1444 ScalarSrc->getType()) {
1445 // inselt (bitcast VecSrc), (bitcast ScalarSrc), IdxOp -->
1446 // bitcast (inselt VecSrc, ScalarSrc, IdxOp)
1447 Value *NewInsElt = Builder.CreateInsertElement(VecSrc, ScalarSrc, IdxOp);
1448 return new BitCastInst(NewInsElt, IE.getType());
1451 // If the inserted element was extracted from some other fixed-length vector
1452 // and both indexes are valid constants, try to turn this into a shuffle.
1453 // Can not handle scalable vector type, the number of elements needed to
1454 // create shuffle mask is not a compile-time constant.
1455 uint64_t InsertedIdx, ExtractedIdx;
1456 Value *ExtVecOp;
1457 if (isa<FixedVectorType>(IE.getType()) &&
1458 match(IdxOp, m_ConstantInt(InsertedIdx)) &&
1459 match(ScalarOp,
1460 m_ExtractElt(m_Value(ExtVecOp), m_ConstantInt(ExtractedIdx))) &&
1461 isa<FixedVectorType>(ExtVecOp->getType()) &&
1462 ExtractedIdx <
1463 cast<FixedVectorType>(ExtVecOp->getType())->getNumElements()) {
1464 // TODO: Looking at the user(s) to determine if this insert is a
1465 // fold-to-shuffle opportunity does not match the usual instcombine
1466 // constraints. We should decide if the transform is worthy based only
1467 // on this instruction and its operands, but that may not work currently.
1469 // Here, we are trying to avoid creating shuffles before reaching
1470 // the end of a chain of extract-insert pairs. This is complicated because
1471 // we do not generally form arbitrary shuffle masks in instcombine
1472 // (because those may codegen poorly), but collectShuffleElements() does
1473 // exactly that.
1475 // The rules for determining what is an acceptable target-independent
1476 // shuffle mask are fuzzy because they evolve based on the backend's
1477 // capabilities and real-world impact.
1478 auto isShuffleRootCandidate = [](InsertElementInst &Insert) {
1479 if (!Insert.hasOneUse())
1480 return true;
1481 auto *InsertUser = dyn_cast<InsertElementInst>(Insert.user_back());
1482 if (!InsertUser)
1483 return true;
1484 return false;
1487 // Try to form a shuffle from a chain of extract-insert ops.
1488 if (isShuffleRootCandidate(IE)) {
1489 SmallVector<int, 16> Mask;
1490 ShuffleOps LR = collectShuffleElements(&IE, Mask, nullptr, *this);
1492 // The proposed shuffle may be trivial, in which case we shouldn't
1493 // perform the combine.
1494 if (LR.first != &IE && LR.second != &IE) {
1495 // We now have a shuffle of LHS, RHS, Mask.
1496 if (LR.second == nullptr)
1497 LR.second = UndefValue::get(LR.first->getType());
1498 return new ShuffleVectorInst(LR.first, LR.second, Mask);
1503 if (auto VecTy = dyn_cast<FixedVectorType>(VecOp->getType())) {
1504 unsigned VWidth = VecTy->getNumElements();
1505 APInt UndefElts(VWidth, 0);
1506 APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
1507 if (Value *V = SimplifyDemandedVectorElts(&IE, AllOnesEltMask, UndefElts)) {
1508 if (V != &IE)
1509 return replaceInstUsesWith(IE, V);
1510 return &IE;
1514 if (Instruction *Shuf = foldConstantInsEltIntoShuffle(IE))
1515 return Shuf;
1517 if (Instruction *NewInsElt = hoistInsEltConst(IE, Builder))
1518 return NewInsElt;
1520 if (Instruction *Broadcast = foldInsSequenceIntoSplat(IE))
1521 return Broadcast;
1523 if (Instruction *Splat = foldInsEltIntoSplat(IE))
1524 return Splat;
1526 if (Instruction *IdentityShuf = foldInsEltIntoIdentityShuffle(IE))
1527 return IdentityShuf;
1529 return nullptr;
1532 /// Return true if we can evaluate the specified expression tree if the vector
1533 /// elements were shuffled in a different order.
1534 static bool canEvaluateShuffled(Value *V, ArrayRef<int> Mask,
1535 unsigned Depth = 5) {
1536 // We can always reorder the elements of a constant.
1537 if (isa<Constant>(V))
1538 return true;
1540 // We won't reorder vector arguments. No IPO here.
1541 Instruction *I = dyn_cast<Instruction>(V);
1542 if (!I) return false;
1544 // Two users may expect different orders of the elements. Don't try it.
1545 if (!I->hasOneUse())
1546 return false;
1548 if (Depth == 0) return false;
1550 switch (I->getOpcode()) {
1551 case Instruction::UDiv:
1552 case Instruction::SDiv:
1553 case Instruction::URem:
1554 case Instruction::SRem:
1555 // Propagating an undefined shuffle mask element to integer div/rem is not
1556 // allowed because those opcodes can create immediate undefined behavior
1557 // from an undefined element in an operand.
1558 if (llvm::is_contained(Mask, -1))
1559 return false;
1560 LLVM_FALLTHROUGH;
1561 case Instruction::Add:
1562 case Instruction::FAdd:
1563 case Instruction::Sub:
1564 case Instruction::FSub:
1565 case Instruction::Mul:
1566 case Instruction::FMul:
1567 case Instruction::FDiv:
1568 case Instruction::FRem:
1569 case Instruction::Shl:
1570 case Instruction::LShr:
1571 case Instruction::AShr:
1572 case Instruction::And:
1573 case Instruction::Or:
1574 case Instruction::Xor:
1575 case Instruction::ICmp:
1576 case Instruction::FCmp:
1577 case Instruction::Trunc:
1578 case Instruction::ZExt:
1579 case Instruction::SExt:
1580 case Instruction::FPToUI:
1581 case Instruction::FPToSI:
1582 case Instruction::UIToFP:
1583 case Instruction::SIToFP:
1584 case Instruction::FPTrunc:
1585 case Instruction::FPExt:
1586 case Instruction::GetElementPtr: {
1587 // Bail out if we would create longer vector ops. We could allow creating
1588 // longer vector ops, but that may result in more expensive codegen.
1589 Type *ITy = I->getType();
1590 if (ITy->isVectorTy() &&
1591 Mask.size() > cast<FixedVectorType>(ITy)->getNumElements())
1592 return false;
1593 for (Value *Operand : I->operands()) {
1594 if (!canEvaluateShuffled(Operand, Mask, Depth - 1))
1595 return false;
1597 return true;
1599 case Instruction::InsertElement: {
1600 ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(2));
1601 if (!CI) return false;
1602 int ElementNumber = CI->getLimitedValue();
1604 // Verify that 'CI' does not occur twice in Mask. A single 'insertelement'
1605 // can't put an element into multiple indices.
1606 bool SeenOnce = false;
1607 for (int i = 0, e = Mask.size(); i != e; ++i) {
1608 if (Mask[i] == ElementNumber) {
1609 if (SeenOnce)
1610 return false;
1611 SeenOnce = true;
1614 return canEvaluateShuffled(I->getOperand(0), Mask, Depth - 1);
1617 return false;
1620 /// Rebuild a new instruction just like 'I' but with the new operands given.
1621 /// In the event of type mismatch, the type of the operands is correct.
1622 static Value *buildNew(Instruction *I, ArrayRef<Value*> NewOps) {
1623 // We don't want to use the IRBuilder here because we want the replacement
1624 // instructions to appear next to 'I', not the builder's insertion point.
1625 switch (I->getOpcode()) {
1626 case Instruction::Add:
1627 case Instruction::FAdd:
1628 case Instruction::Sub:
1629 case Instruction::FSub:
1630 case Instruction::Mul:
1631 case Instruction::FMul:
1632 case Instruction::UDiv:
1633 case Instruction::SDiv:
1634 case Instruction::FDiv:
1635 case Instruction::URem:
1636 case Instruction::SRem:
1637 case Instruction::FRem:
1638 case Instruction::Shl:
1639 case Instruction::LShr:
1640 case Instruction::AShr:
1641 case Instruction::And:
1642 case Instruction::Or:
1643 case Instruction::Xor: {
1644 BinaryOperator *BO = cast<BinaryOperator>(I);
1645 assert(NewOps.size() == 2 && "binary operator with #ops != 2");
1646 BinaryOperator *New =
1647 BinaryOperator::Create(cast<BinaryOperator>(I)->getOpcode(),
1648 NewOps[0], NewOps[1], "", BO);
1649 if (isa<OverflowingBinaryOperator>(BO)) {
1650 New->setHasNoUnsignedWrap(BO->hasNoUnsignedWrap());
1651 New->setHasNoSignedWrap(BO->hasNoSignedWrap());
1653 if (isa<PossiblyExactOperator>(BO)) {
1654 New->setIsExact(BO->isExact());
1656 if (isa<FPMathOperator>(BO))
1657 New->copyFastMathFlags(I);
1658 return New;
1660 case Instruction::ICmp:
1661 assert(NewOps.size() == 2 && "icmp with #ops != 2");
1662 return new ICmpInst(I, cast<ICmpInst>(I)->getPredicate(),
1663 NewOps[0], NewOps[1]);
1664 case Instruction::FCmp:
1665 assert(NewOps.size() == 2 && "fcmp with #ops != 2");
1666 return new FCmpInst(I, cast<FCmpInst>(I)->getPredicate(),
1667 NewOps[0], NewOps[1]);
1668 case Instruction::Trunc:
1669 case Instruction::ZExt:
1670 case Instruction::SExt:
1671 case Instruction::FPToUI:
1672 case Instruction::FPToSI:
1673 case Instruction::UIToFP:
1674 case Instruction::SIToFP:
1675 case Instruction::FPTrunc:
1676 case Instruction::FPExt: {
1677 // It's possible that the mask has a different number of elements from
1678 // the original cast. We recompute the destination type to match the mask.
1679 Type *DestTy = VectorType::get(
1680 I->getType()->getScalarType(),
1681 cast<VectorType>(NewOps[0]->getType())->getElementCount());
1682 assert(NewOps.size() == 1 && "cast with #ops != 1");
1683 return CastInst::Create(cast<CastInst>(I)->getOpcode(), NewOps[0], DestTy,
1684 "", I);
1686 case Instruction::GetElementPtr: {
1687 Value *Ptr = NewOps[0];
1688 ArrayRef<Value*> Idx = NewOps.slice(1);
1689 GetElementPtrInst *GEP = GetElementPtrInst::Create(
1690 cast<GetElementPtrInst>(I)->getSourceElementType(), Ptr, Idx, "", I);
1691 GEP->setIsInBounds(cast<GetElementPtrInst>(I)->isInBounds());
1692 return GEP;
1695 llvm_unreachable("failed to rebuild vector instructions");
1698 static Value *evaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask) {
1699 // Mask.size() does not need to be equal to the number of vector elements.
1701 assert(V->getType()->isVectorTy() && "can't reorder non-vector elements");
1702 Type *EltTy = V->getType()->getScalarType();
1703 Type *I32Ty = IntegerType::getInt32Ty(V->getContext());
1704 if (match(V, m_Undef()))
1705 return UndefValue::get(FixedVectorType::get(EltTy, Mask.size()));
1707 if (isa<ConstantAggregateZero>(V))
1708 return ConstantAggregateZero::get(FixedVectorType::get(EltTy, Mask.size()));
1710 if (Constant *C = dyn_cast<Constant>(V))
1711 return ConstantExpr::getShuffleVector(C, PoisonValue::get(C->getType()),
1712 Mask);
1714 Instruction *I = cast<Instruction>(V);
1715 switch (I->getOpcode()) {
1716 case Instruction::Add:
1717 case Instruction::FAdd:
1718 case Instruction::Sub:
1719 case Instruction::FSub:
1720 case Instruction::Mul:
1721 case Instruction::FMul:
1722 case Instruction::UDiv:
1723 case Instruction::SDiv:
1724 case Instruction::FDiv:
1725 case Instruction::URem:
1726 case Instruction::SRem:
1727 case Instruction::FRem:
1728 case Instruction::Shl:
1729 case Instruction::LShr:
1730 case Instruction::AShr:
1731 case Instruction::And:
1732 case Instruction::Or:
1733 case Instruction::Xor:
1734 case Instruction::ICmp:
1735 case Instruction::FCmp:
1736 case Instruction::Trunc:
1737 case Instruction::ZExt:
1738 case Instruction::SExt:
1739 case Instruction::FPToUI:
1740 case Instruction::FPToSI:
1741 case Instruction::UIToFP:
1742 case Instruction::SIToFP:
1743 case Instruction::FPTrunc:
1744 case Instruction::FPExt:
1745 case Instruction::Select:
1746 case Instruction::GetElementPtr: {
1747 SmallVector<Value*, 8> NewOps;
1748 bool NeedsRebuild =
1749 (Mask.size() !=
1750 cast<FixedVectorType>(I->getType())->getNumElements());
1751 for (int i = 0, e = I->getNumOperands(); i != e; ++i) {
1752 Value *V;
1753 // Recursively call evaluateInDifferentElementOrder on vector arguments
1754 // as well. E.g. GetElementPtr may have scalar operands even if the
1755 // return value is a vector, so we need to examine the operand type.
1756 if (I->getOperand(i)->getType()->isVectorTy())
1757 V = evaluateInDifferentElementOrder(I->getOperand(i), Mask);
1758 else
1759 V = I->getOperand(i);
1760 NewOps.push_back(V);
1761 NeedsRebuild |= (V != I->getOperand(i));
1763 if (NeedsRebuild) {
1764 return buildNew(I, NewOps);
1766 return I;
1768 case Instruction::InsertElement: {
1769 int Element = cast<ConstantInt>(I->getOperand(2))->getLimitedValue();
1771 // The insertelement was inserting at Element. Figure out which element
1772 // that becomes after shuffling. The answer is guaranteed to be unique
1773 // by CanEvaluateShuffled.
1774 bool Found = false;
1775 int Index = 0;
1776 for (int e = Mask.size(); Index != e; ++Index) {
1777 if (Mask[Index] == Element) {
1778 Found = true;
1779 break;
1783 // If element is not in Mask, no need to handle the operand 1 (element to
1784 // be inserted). Just evaluate values in operand 0 according to Mask.
1785 if (!Found)
1786 return evaluateInDifferentElementOrder(I->getOperand(0), Mask);
1788 Value *V = evaluateInDifferentElementOrder(I->getOperand(0), Mask);
1789 return InsertElementInst::Create(V, I->getOperand(1),
1790 ConstantInt::get(I32Ty, Index), "", I);
1793 llvm_unreachable("failed to reorder elements of vector instruction!");
1796 // Returns true if the shuffle is extracting a contiguous range of values from
1797 // LHS, for example:
1798 // +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
1799 // Input: |AA|BB|CC|DD|EE|FF|GG|HH|II|JJ|KK|LL|MM|NN|OO|PP|
1800 // Shuffles to: |EE|FF|GG|HH|
1801 // +--+--+--+--+
1802 static bool isShuffleExtractingFromLHS(ShuffleVectorInst &SVI,
1803 ArrayRef<int> Mask) {
1804 unsigned LHSElems =
1805 cast<FixedVectorType>(SVI.getOperand(0)->getType())->getNumElements();
1806 unsigned MaskElems = Mask.size();
1807 unsigned BegIdx = Mask.front();
1808 unsigned EndIdx = Mask.back();
1809 if (BegIdx > EndIdx || EndIdx >= LHSElems || EndIdx - BegIdx != MaskElems - 1)
1810 return false;
1811 for (unsigned I = 0; I != MaskElems; ++I)
1812 if (static_cast<unsigned>(Mask[I]) != BegIdx + I)
1813 return false;
1814 return true;
1817 /// These are the ingredients in an alternate form binary operator as described
1818 /// below.
1819 struct BinopElts {
1820 BinaryOperator::BinaryOps Opcode;
1821 Value *Op0;
1822 Value *Op1;
1823 BinopElts(BinaryOperator::BinaryOps Opc = (BinaryOperator::BinaryOps)0,
1824 Value *V0 = nullptr, Value *V1 = nullptr) :
1825 Opcode(Opc), Op0(V0), Op1(V1) {}
1826 operator bool() const { return Opcode != 0; }
1829 /// Binops may be transformed into binops with different opcodes and operands.
1830 /// Reverse the usual canonicalization to enable folds with the non-canonical
1831 /// form of the binop. If a transform is possible, return the elements of the
1832 /// new binop. If not, return invalid elements.
1833 static BinopElts getAlternateBinop(BinaryOperator *BO, const DataLayout &DL) {
1834 Value *BO0 = BO->getOperand(0), *BO1 = BO->getOperand(1);
1835 Type *Ty = BO->getType();
1836 switch (BO->getOpcode()) {
1837 case Instruction::Shl: {
1838 // shl X, C --> mul X, (1 << C)
1839 Constant *C;
1840 if (match(BO1, m_Constant(C))) {
1841 Constant *ShlOne = ConstantExpr::getShl(ConstantInt::get(Ty, 1), C);
1842 return { Instruction::Mul, BO0, ShlOne };
1844 break;
1846 case Instruction::Or: {
1847 // or X, C --> add X, C (when X and C have no common bits set)
1848 const APInt *C;
1849 if (match(BO1, m_APInt(C)) && MaskedValueIsZero(BO0, *C, DL))
1850 return { Instruction::Add, BO0, BO1 };
1851 break;
1853 default:
1854 break;
1856 return {};
1859 static Instruction *foldSelectShuffleWith1Binop(ShuffleVectorInst &Shuf) {
1860 assert(Shuf.isSelect() && "Must have select-equivalent shuffle");
1862 // Are we shuffling together some value and that same value after it has been
1863 // modified by a binop with a constant?
1864 Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
1865 Constant *C;
1866 bool Op0IsBinop;
1867 if (match(Op0, m_BinOp(m_Specific(Op1), m_Constant(C))))
1868 Op0IsBinop = true;
1869 else if (match(Op1, m_BinOp(m_Specific(Op0), m_Constant(C))))
1870 Op0IsBinop = false;
1871 else
1872 return nullptr;
1874 // The identity constant for a binop leaves a variable operand unchanged. For
1875 // a vector, this is a splat of something like 0, -1, or 1.
1876 // If there's no identity constant for this binop, we're done.
1877 auto *BO = cast<BinaryOperator>(Op0IsBinop ? Op0 : Op1);
1878 BinaryOperator::BinaryOps BOpcode = BO->getOpcode();
1879 Constant *IdC = ConstantExpr::getBinOpIdentity(BOpcode, Shuf.getType(), true);
1880 if (!IdC)
1881 return nullptr;
1883 // Shuffle identity constants into the lanes that return the original value.
1884 // Example: shuf (mul X, {-1,-2,-3,-4}), X, {0,5,6,3} --> mul X, {-1,1,1,-4}
1885 // Example: shuf X, (add X, {-1,-2,-3,-4}), {0,1,6,7} --> add X, {0,0,-3,-4}
1886 // The existing binop constant vector remains in the same operand position.
1887 ArrayRef<int> Mask = Shuf.getShuffleMask();
1888 Constant *NewC = Op0IsBinop ? ConstantExpr::getShuffleVector(C, IdC, Mask) :
1889 ConstantExpr::getShuffleVector(IdC, C, Mask);
1891 bool MightCreatePoisonOrUB =
1892 is_contained(Mask, UndefMaskElem) &&
1893 (Instruction::isIntDivRem(BOpcode) || Instruction::isShift(BOpcode));
1894 if (MightCreatePoisonOrUB)
1895 NewC = InstCombiner::getSafeVectorConstantForBinop(BOpcode, NewC, true);
1897 // shuf (bop X, C), X, M --> bop X, C'
1898 // shuf X, (bop X, C), M --> bop X, C'
1899 Value *X = Op0IsBinop ? Op1 : Op0;
1900 Instruction *NewBO = BinaryOperator::Create(BOpcode, X, NewC);
1901 NewBO->copyIRFlags(BO);
1903 // An undef shuffle mask element may propagate as an undef constant element in
1904 // the new binop. That would produce poison where the original code might not.
1905 // If we already made a safe constant, then there's no danger.
1906 if (is_contained(Mask, UndefMaskElem) && !MightCreatePoisonOrUB)
1907 NewBO->dropPoisonGeneratingFlags();
1908 return NewBO;
1911 /// If we have an insert of a scalar to a non-zero element of an undefined
1912 /// vector and then shuffle that value, that's the same as inserting to the zero
1913 /// element and shuffling. Splatting from the zero element is recognized as the
1914 /// canonical form of splat.
1915 static Instruction *canonicalizeInsertSplat(ShuffleVectorInst &Shuf,
1916 InstCombiner::BuilderTy &Builder) {
1917 Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
1918 ArrayRef<int> Mask = Shuf.getShuffleMask();
1919 Value *X;
1920 uint64_t IndexC;
1922 // Match a shuffle that is a splat to a non-zero element.
1923 if (!match(Op0, m_OneUse(m_InsertElt(m_Undef(), m_Value(X),
1924 m_ConstantInt(IndexC)))) ||
1925 !match(Op1, m_Undef()) || match(Mask, m_ZeroMask()) || IndexC == 0)
1926 return nullptr;
1928 // Insert into element 0 of an undef vector.
1929 UndefValue *UndefVec = UndefValue::get(Shuf.getType());
1930 Constant *Zero = Builder.getInt32(0);
1931 Value *NewIns = Builder.CreateInsertElement(UndefVec, X, Zero);
1933 // Splat from element 0. Any mask element that is undefined remains undefined.
1934 // For example:
1935 // shuf (inselt undef, X, 2), undef, <2,2,undef>
1936 // --> shuf (inselt undef, X, 0), undef, <0,0,undef>
1937 unsigned NumMaskElts =
1938 cast<FixedVectorType>(Shuf.getType())->getNumElements();
1939 SmallVector<int, 16> NewMask(NumMaskElts, 0);
1940 for (unsigned i = 0; i != NumMaskElts; ++i)
1941 if (Mask[i] == UndefMaskElem)
1942 NewMask[i] = Mask[i];
1944 return new ShuffleVectorInst(NewIns, UndefVec, NewMask);
1947 /// Try to fold shuffles that are the equivalent of a vector select.
1948 static Instruction *foldSelectShuffle(ShuffleVectorInst &Shuf,
1949 InstCombiner::BuilderTy &Builder,
1950 const DataLayout &DL) {
1951 if (!Shuf.isSelect())
1952 return nullptr;
1954 // Canonicalize to choose from operand 0 first unless operand 1 is undefined.
1955 // Commuting undef to operand 0 conflicts with another canonicalization.
1956 unsigned NumElts = cast<FixedVectorType>(Shuf.getType())->getNumElements();
1957 if (!match(Shuf.getOperand(1), m_Undef()) &&
1958 Shuf.getMaskValue(0) >= (int)NumElts) {
1959 // TODO: Can we assert that both operands of a shuffle-select are not undef
1960 // (otherwise, it would have been folded by instsimplify?
1961 Shuf.commute();
1962 return &Shuf;
1965 if (Instruction *I = foldSelectShuffleWith1Binop(Shuf))
1966 return I;
1968 BinaryOperator *B0, *B1;
1969 if (!match(Shuf.getOperand(0), m_BinOp(B0)) ||
1970 !match(Shuf.getOperand(1), m_BinOp(B1)))
1971 return nullptr;
1973 Value *X, *Y;
1974 Constant *C0, *C1;
1975 bool ConstantsAreOp1;
1976 if (match(B0, m_BinOp(m_Value(X), m_Constant(C0))) &&
1977 match(B1, m_BinOp(m_Value(Y), m_Constant(C1))))
1978 ConstantsAreOp1 = true;
1979 else if (match(B0, m_BinOp(m_Constant(C0), m_Value(X))) &&
1980 match(B1, m_BinOp(m_Constant(C1), m_Value(Y))))
1981 ConstantsAreOp1 = false;
1982 else
1983 return nullptr;
1985 // We need matching binops to fold the lanes together.
1986 BinaryOperator::BinaryOps Opc0 = B0->getOpcode();
1987 BinaryOperator::BinaryOps Opc1 = B1->getOpcode();
1988 bool DropNSW = false;
1989 if (ConstantsAreOp1 && Opc0 != Opc1) {
1990 // TODO: We drop "nsw" if shift is converted into multiply because it may
1991 // not be correct when the shift amount is BitWidth - 1. We could examine
1992 // each vector element to determine if it is safe to keep that flag.
1993 if (Opc0 == Instruction::Shl || Opc1 == Instruction::Shl)
1994 DropNSW = true;
1995 if (BinopElts AltB0 = getAlternateBinop(B0, DL)) {
1996 assert(isa<Constant>(AltB0.Op1) && "Expecting constant with alt binop");
1997 Opc0 = AltB0.Opcode;
1998 C0 = cast<Constant>(AltB0.Op1);
1999 } else if (BinopElts AltB1 = getAlternateBinop(B1, DL)) {
2000 assert(isa<Constant>(AltB1.Op1) && "Expecting constant with alt binop");
2001 Opc1 = AltB1.Opcode;
2002 C1 = cast<Constant>(AltB1.Op1);
2006 if (Opc0 != Opc1)
2007 return nullptr;
2009 // The opcodes must be the same. Use a new name to make that clear.
2010 BinaryOperator::BinaryOps BOpc = Opc0;
2012 // Select the constant elements needed for the single binop.
2013 ArrayRef<int> Mask = Shuf.getShuffleMask();
2014 Constant *NewC = ConstantExpr::getShuffleVector(C0, C1, Mask);
2016 // We are moving a binop after a shuffle. When a shuffle has an undefined
2017 // mask element, the result is undefined, but it is not poison or undefined
2018 // behavior. That is not necessarily true for div/rem/shift.
2019 bool MightCreatePoisonOrUB =
2020 is_contained(Mask, UndefMaskElem) &&
2021 (Instruction::isIntDivRem(BOpc) || Instruction::isShift(BOpc));
2022 if (MightCreatePoisonOrUB)
2023 NewC = InstCombiner::getSafeVectorConstantForBinop(BOpc, NewC,
2024 ConstantsAreOp1);
2026 Value *V;
2027 if (X == Y) {
2028 // Remove a binop and the shuffle by rearranging the constant:
2029 // shuffle (op V, C0), (op V, C1), M --> op V, C'
2030 // shuffle (op C0, V), (op C1, V), M --> op C', V
2031 V = X;
2032 } else {
2033 // If there are 2 different variable operands, we must create a new shuffle
2034 // (select) first, so check uses to ensure that we don't end up with more
2035 // instructions than we started with.
2036 if (!B0->hasOneUse() && !B1->hasOneUse())
2037 return nullptr;
2039 // If we use the original shuffle mask and op1 is *variable*, we would be
2040 // putting an undef into operand 1 of div/rem/shift. This is either UB or
2041 // poison. We do not have to guard against UB when *constants* are op1
2042 // because safe constants guarantee that we do not overflow sdiv/srem (and
2043 // there's no danger for other opcodes).
2044 // TODO: To allow this case, create a new shuffle mask with no undefs.
2045 if (MightCreatePoisonOrUB && !ConstantsAreOp1)
2046 return nullptr;
2048 // Note: In general, we do not create new shuffles in InstCombine because we
2049 // do not know if a target can lower an arbitrary shuffle optimally. In this
2050 // case, the shuffle uses the existing mask, so there is no additional risk.
2052 // Select the variable vectors first, then perform the binop:
2053 // shuffle (op X, C0), (op Y, C1), M --> op (shuffle X, Y, M), C'
2054 // shuffle (op C0, X), (op C1, Y), M --> op C', (shuffle X, Y, M)
2055 V = Builder.CreateShuffleVector(X, Y, Mask);
2058 Instruction *NewBO = ConstantsAreOp1 ? BinaryOperator::Create(BOpc, V, NewC) :
2059 BinaryOperator::Create(BOpc, NewC, V);
2061 // Flags are intersected from the 2 source binops. But there are 2 exceptions:
2062 // 1. If we changed an opcode, poison conditions might have changed.
2063 // 2. If the shuffle had undef mask elements, the new binop might have undefs
2064 // where the original code did not. But if we already made a safe constant,
2065 // then there's no danger.
2066 NewBO->copyIRFlags(B0);
2067 NewBO->andIRFlags(B1);
2068 if (DropNSW)
2069 NewBO->setHasNoSignedWrap(false);
2070 if (is_contained(Mask, UndefMaskElem) && !MightCreatePoisonOrUB)
2071 NewBO->dropPoisonGeneratingFlags();
2072 return NewBO;
2075 /// Convert a narrowing shuffle of a bitcasted vector into a vector truncate.
2076 /// Example (little endian):
2077 /// shuf (bitcast <4 x i16> X to <8 x i8>), <0, 2, 4, 6> --> trunc X to <4 x i8>
2078 static Instruction *foldTruncShuffle(ShuffleVectorInst &Shuf,
2079 bool IsBigEndian) {
2080 // This must be a bitcasted shuffle of 1 vector integer operand.
2081 Type *DestType = Shuf.getType();
2082 Value *X;
2083 if (!match(Shuf.getOperand(0), m_BitCast(m_Value(X))) ||
2084 !match(Shuf.getOperand(1), m_Undef()) || !DestType->isIntOrIntVectorTy())
2085 return nullptr;
2087 // The source type must have the same number of elements as the shuffle,
2088 // and the source element type must be larger than the shuffle element type.
2089 Type *SrcType = X->getType();
2090 if (!SrcType->isVectorTy() || !SrcType->isIntOrIntVectorTy() ||
2091 cast<FixedVectorType>(SrcType)->getNumElements() !=
2092 cast<FixedVectorType>(DestType)->getNumElements() ||
2093 SrcType->getScalarSizeInBits() % DestType->getScalarSizeInBits() != 0)
2094 return nullptr;
2096 assert(Shuf.changesLength() && !Shuf.increasesLength() &&
2097 "Expected a shuffle that decreases length");
2099 // Last, check that the mask chooses the correct low bits for each narrow
2100 // element in the result.
2101 uint64_t TruncRatio =
2102 SrcType->getScalarSizeInBits() / DestType->getScalarSizeInBits();
2103 ArrayRef<int> Mask = Shuf.getShuffleMask();
2104 for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
2105 if (Mask[i] == UndefMaskElem)
2106 continue;
2107 uint64_t LSBIndex = IsBigEndian ? (i + 1) * TruncRatio - 1 : i * TruncRatio;
2108 assert(LSBIndex <= INT32_MAX && "Overflowed 32-bits");
2109 if (Mask[i] != (int)LSBIndex)
2110 return nullptr;
2113 return new TruncInst(X, DestType);
2116 /// Match a shuffle-select-shuffle pattern where the shuffles are widening and
2117 /// narrowing (concatenating with undef and extracting back to the original
2118 /// length). This allows replacing the wide select with a narrow select.
2119 static Instruction *narrowVectorSelect(ShuffleVectorInst &Shuf,
2120 InstCombiner::BuilderTy &Builder) {
2121 // This must be a narrowing identity shuffle. It extracts the 1st N elements
2122 // of the 1st vector operand of a shuffle.
2123 if (!match(Shuf.getOperand(1), m_Undef()) || !Shuf.isIdentityWithExtract())
2124 return nullptr;
2126 // The vector being shuffled must be a vector select that we can eliminate.
2127 // TODO: The one-use requirement could be eased if X and/or Y are constants.
2128 Value *Cond, *X, *Y;
2129 if (!match(Shuf.getOperand(0),
2130 m_OneUse(m_Select(m_Value(Cond), m_Value(X), m_Value(Y)))))
2131 return nullptr;
2133 // We need a narrow condition value. It must be extended with undef elements
2134 // and have the same number of elements as this shuffle.
2135 unsigned NarrowNumElts =
2136 cast<FixedVectorType>(Shuf.getType())->getNumElements();
2137 Value *NarrowCond;
2138 if (!match(Cond, m_OneUse(m_Shuffle(m_Value(NarrowCond), m_Undef()))) ||
2139 cast<FixedVectorType>(NarrowCond->getType())->getNumElements() !=
2140 NarrowNumElts ||
2141 !cast<ShuffleVectorInst>(Cond)->isIdentityWithPadding())
2142 return nullptr;
2144 // shuf (sel (shuf NarrowCond, undef, WideMask), X, Y), undef, NarrowMask) -->
2145 // sel NarrowCond, (shuf X, undef, NarrowMask), (shuf Y, undef, NarrowMask)
2146 Value *NarrowX = Builder.CreateShuffleVector(X, Shuf.getShuffleMask());
2147 Value *NarrowY = Builder.CreateShuffleVector(Y, Shuf.getShuffleMask());
2148 return SelectInst::Create(NarrowCond, NarrowX, NarrowY);
2151 /// Try to fold an extract subvector operation.
2152 static Instruction *foldIdentityExtractShuffle(ShuffleVectorInst &Shuf) {
2153 Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
2154 if (!Shuf.isIdentityWithExtract() || !match(Op1, m_Undef()))
2155 return nullptr;
2157 // Check if we are extracting all bits of an inserted scalar:
2158 // extract-subvec (bitcast (inselt ?, X, 0) --> bitcast X to subvec type
2159 Value *X;
2160 if (match(Op0, m_BitCast(m_InsertElt(m_Value(), m_Value(X), m_Zero()))) &&
2161 X->getType()->getPrimitiveSizeInBits() ==
2162 Shuf.getType()->getPrimitiveSizeInBits())
2163 return new BitCastInst(X, Shuf.getType());
2165 // Try to combine 2 shuffles into 1 shuffle by concatenating a shuffle mask.
2166 Value *Y;
2167 ArrayRef<int> Mask;
2168 if (!match(Op0, m_Shuffle(m_Value(X), m_Value(Y), m_Mask(Mask))))
2169 return nullptr;
2171 // Be conservative with shuffle transforms. If we can't kill the 1st shuffle,
2172 // then combining may result in worse codegen.
2173 if (!Op0->hasOneUse())
2174 return nullptr;
2176 // We are extracting a subvector from a shuffle. Remove excess elements from
2177 // the 1st shuffle mask to eliminate the extract.
2179 // This transform is conservatively limited to identity extracts because we do
2180 // not allow arbitrary shuffle mask creation as a target-independent transform
2181 // (because we can't guarantee that will lower efficiently).
2183 // If the extracting shuffle has an undef mask element, it transfers to the
2184 // new shuffle mask. Otherwise, copy the original mask element. Example:
2185 // shuf (shuf X, Y, <C0, C1, C2, undef, C4>), undef, <0, undef, 2, 3> -->
2186 // shuf X, Y, <C0, undef, C2, undef>
2187 unsigned NumElts = cast<FixedVectorType>(Shuf.getType())->getNumElements();
2188 SmallVector<int, 16> NewMask(NumElts);
2189 assert(NumElts < Mask.size() &&
2190 "Identity with extract must have less elements than its inputs");
2192 for (unsigned i = 0; i != NumElts; ++i) {
2193 int ExtractMaskElt = Shuf.getMaskValue(i);
2194 int MaskElt = Mask[i];
2195 NewMask[i] = ExtractMaskElt == UndefMaskElem ? ExtractMaskElt : MaskElt;
2197 return new ShuffleVectorInst(X, Y, NewMask);
2200 /// Try to replace a shuffle with an insertelement or try to replace a shuffle
2201 /// operand with the operand of an insertelement.
2202 static Instruction *foldShuffleWithInsert(ShuffleVectorInst &Shuf,
2203 InstCombinerImpl &IC) {
2204 Value *V0 = Shuf.getOperand(0), *V1 = Shuf.getOperand(1);
2205 SmallVector<int, 16> Mask;
2206 Shuf.getShuffleMask(Mask);
2208 // The shuffle must not change vector sizes.
2209 // TODO: This restriction could be removed if the insert has only one use
2210 // (because the transform would require a new length-changing shuffle).
2211 int NumElts = Mask.size();
2212 if (NumElts != (int)(cast<FixedVectorType>(V0->getType())->getNumElements()))
2213 return nullptr;
2215 // This is a specialization of a fold in SimplifyDemandedVectorElts. We may
2216 // not be able to handle it there if the insertelement has >1 use.
2217 // If the shuffle has an insertelement operand but does not choose the
2218 // inserted scalar element from that value, then we can replace that shuffle
2219 // operand with the source vector of the insertelement.
2220 Value *X;
2221 uint64_t IdxC;
2222 if (match(V0, m_InsertElt(m_Value(X), m_Value(), m_ConstantInt(IdxC)))) {
2223 // shuf (inselt X, ?, IdxC), ?, Mask --> shuf X, ?, Mask
2224 if (!is_contained(Mask, (int)IdxC))
2225 return IC.replaceOperand(Shuf, 0, X);
2227 if (match(V1, m_InsertElt(m_Value(X), m_Value(), m_ConstantInt(IdxC)))) {
2228 // Offset the index constant by the vector width because we are checking for
2229 // accesses to the 2nd vector input of the shuffle.
2230 IdxC += NumElts;
2231 // shuf ?, (inselt X, ?, IdxC), Mask --> shuf ?, X, Mask
2232 if (!is_contained(Mask, (int)IdxC))
2233 return IC.replaceOperand(Shuf, 1, X);
2236 // shuffle (insert ?, Scalar, IndexC), V1, Mask --> insert V1, Scalar, IndexC'
2237 auto isShufflingScalarIntoOp1 = [&](Value *&Scalar, ConstantInt *&IndexC) {
2238 // We need an insertelement with a constant index.
2239 if (!match(V0, m_InsertElt(m_Value(), m_Value(Scalar),
2240 m_ConstantInt(IndexC))))
2241 return false;
2243 // Test the shuffle mask to see if it splices the inserted scalar into the
2244 // operand 1 vector of the shuffle.
2245 int NewInsIndex = -1;
2246 for (int i = 0; i != NumElts; ++i) {
2247 // Ignore undef mask elements.
2248 if (Mask[i] == -1)
2249 continue;
2251 // The shuffle takes elements of operand 1 without lane changes.
2252 if (Mask[i] == NumElts + i)
2253 continue;
2255 // The shuffle must choose the inserted scalar exactly once.
2256 if (NewInsIndex != -1 || Mask[i] != IndexC->getSExtValue())
2257 return false;
2259 // The shuffle is placing the inserted scalar into element i.
2260 NewInsIndex = i;
2263 assert(NewInsIndex != -1 && "Did not fold shuffle with unused operand?");
2265 // Index is updated to the potentially translated insertion lane.
2266 IndexC = ConstantInt::get(IndexC->getType(), NewInsIndex);
2267 return true;
2270 // If the shuffle is unnecessary, insert the scalar operand directly into
2271 // operand 1 of the shuffle. Example:
2272 // shuffle (insert ?, S, 1), V1, <1, 5, 6, 7> --> insert V1, S, 0
2273 Value *Scalar;
2274 ConstantInt *IndexC;
2275 if (isShufflingScalarIntoOp1(Scalar, IndexC))
2276 return InsertElementInst::Create(V1, Scalar, IndexC);
2278 // Try again after commuting shuffle. Example:
2279 // shuffle V0, (insert ?, S, 0), <0, 1, 2, 4> -->
2280 // shuffle (insert ?, S, 0), V0, <4, 5, 6, 0> --> insert V0, S, 3
2281 std::swap(V0, V1);
2282 ShuffleVectorInst::commuteShuffleMask(Mask, NumElts);
2283 if (isShufflingScalarIntoOp1(Scalar, IndexC))
2284 return InsertElementInst::Create(V1, Scalar, IndexC);
2286 return nullptr;
2289 static Instruction *foldIdentityPaddedShuffles(ShuffleVectorInst &Shuf) {
2290 // Match the operands as identity with padding (also known as concatenation
2291 // with undef) shuffles of the same source type. The backend is expected to
2292 // recreate these concatenations from a shuffle of narrow operands.
2293 auto *Shuffle0 = dyn_cast<ShuffleVectorInst>(Shuf.getOperand(0));
2294 auto *Shuffle1 = dyn_cast<ShuffleVectorInst>(Shuf.getOperand(1));
2295 if (!Shuffle0 || !Shuffle0->isIdentityWithPadding() ||
2296 !Shuffle1 || !Shuffle1->isIdentityWithPadding())
2297 return nullptr;
2299 // We limit this transform to power-of-2 types because we expect that the
2300 // backend can convert the simplified IR patterns to identical nodes as the
2301 // original IR.
2302 // TODO: If we can verify the same behavior for arbitrary types, the
2303 // power-of-2 checks can be removed.
2304 Value *X = Shuffle0->getOperand(0);
2305 Value *Y = Shuffle1->getOperand(0);
2306 if (X->getType() != Y->getType() ||
2307 !isPowerOf2_32(cast<FixedVectorType>(Shuf.getType())->getNumElements()) ||
2308 !isPowerOf2_32(
2309 cast<FixedVectorType>(Shuffle0->getType())->getNumElements()) ||
2310 !isPowerOf2_32(cast<FixedVectorType>(X->getType())->getNumElements()) ||
2311 match(X, m_Undef()) || match(Y, m_Undef()))
2312 return nullptr;
2313 assert(match(Shuffle0->getOperand(1), m_Undef()) &&
2314 match(Shuffle1->getOperand(1), m_Undef()) &&
2315 "Unexpected operand for identity shuffle");
2317 // This is a shuffle of 2 widening shuffles. We can shuffle the narrow source
2318 // operands directly by adjusting the shuffle mask to account for the narrower
2319 // types:
2320 // shuf (widen X), (widen Y), Mask --> shuf X, Y, Mask'
2321 int NarrowElts = cast<FixedVectorType>(X->getType())->getNumElements();
2322 int WideElts = cast<FixedVectorType>(Shuffle0->getType())->getNumElements();
2323 assert(WideElts > NarrowElts && "Unexpected types for identity with padding");
2325 ArrayRef<int> Mask = Shuf.getShuffleMask();
2326 SmallVector<int, 16> NewMask(Mask.size(), -1);
2327 for (int i = 0, e = Mask.size(); i != e; ++i) {
2328 if (Mask[i] == -1)
2329 continue;
2331 // If this shuffle is choosing an undef element from 1 of the sources, that
2332 // element is undef.
2333 if (Mask[i] < WideElts) {
2334 if (Shuffle0->getMaskValue(Mask[i]) == -1)
2335 continue;
2336 } else {
2337 if (Shuffle1->getMaskValue(Mask[i] - WideElts) == -1)
2338 continue;
2341 // If this shuffle is choosing from the 1st narrow op, the mask element is
2342 // the same. If this shuffle is choosing from the 2nd narrow op, the mask
2343 // element is offset down to adjust for the narrow vector widths.
2344 if (Mask[i] < WideElts) {
2345 assert(Mask[i] < NarrowElts && "Unexpected shuffle mask");
2346 NewMask[i] = Mask[i];
2347 } else {
2348 assert(Mask[i] < (WideElts + NarrowElts) && "Unexpected shuffle mask");
2349 NewMask[i] = Mask[i] - (WideElts - NarrowElts);
2352 return new ShuffleVectorInst(X, Y, NewMask);
2355 Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
2356 Value *LHS = SVI.getOperand(0);
2357 Value *RHS = SVI.getOperand(1);
2358 SimplifyQuery ShufQuery = SQ.getWithInstruction(&SVI);
2359 if (auto *V = SimplifyShuffleVectorInst(LHS, RHS, SVI.getShuffleMask(),
2360 SVI.getType(), ShufQuery))
2361 return replaceInstUsesWith(SVI, V);
2363 // Bail out for scalable vectors
2364 if (isa<ScalableVectorType>(LHS->getType()))
2365 return nullptr;
2367 unsigned VWidth = cast<FixedVectorType>(SVI.getType())->getNumElements();
2368 unsigned LHSWidth = cast<FixedVectorType>(LHS->getType())->getNumElements();
2370 // shuffle (bitcast X), (bitcast Y), Mask --> bitcast (shuffle X, Y, Mask)
2372 // if X and Y are of the same (vector) type, and the element size is not
2373 // changed by the bitcasts, we can distribute the bitcasts through the
2374 // shuffle, hopefully reducing the number of instructions. We make sure that
2375 // at least one bitcast only has one use, so we don't *increase* the number of
2376 // instructions here.
2377 Value *X, *Y;
2378 if (match(LHS, m_BitCast(m_Value(X))) && match(RHS, m_BitCast(m_Value(Y))) &&
2379 X->getType()->isVectorTy() && X->getType() == Y->getType() &&
2380 X->getType()->getScalarSizeInBits() ==
2381 SVI.getType()->getScalarSizeInBits() &&
2382 (LHS->hasOneUse() || RHS->hasOneUse())) {
2383 Value *V = Builder.CreateShuffleVector(X, Y, SVI.getShuffleMask(),
2384 SVI.getName() + ".uncasted");
2385 return new BitCastInst(V, SVI.getType());
2388 ArrayRef<int> Mask = SVI.getShuffleMask();
2389 Type *Int32Ty = Type::getInt32Ty(SVI.getContext());
2391 // Peek through a bitcasted shuffle operand by scaling the mask. If the
2392 // simulated shuffle can simplify, then this shuffle is unnecessary:
2393 // shuf (bitcast X), undef, Mask --> bitcast X'
2394 // TODO: This could be extended to allow length-changing shuffles.
2395 // The transform might also be obsoleted if we allowed canonicalization
2396 // of bitcasted shuffles.
2397 if (match(LHS, m_BitCast(m_Value(X))) && match(RHS, m_Undef()) &&
2398 X->getType()->isVectorTy() && VWidth == LHSWidth) {
2399 // Try to create a scaled mask constant.
2400 auto *XType = cast<FixedVectorType>(X->getType());
2401 unsigned XNumElts = XType->getNumElements();
2402 SmallVector<int, 16> ScaledMask;
2403 if (XNumElts >= VWidth) {
2404 assert(XNumElts % VWidth == 0 && "Unexpected vector bitcast");
2405 narrowShuffleMaskElts(XNumElts / VWidth, Mask, ScaledMask);
2406 } else {
2407 assert(VWidth % XNumElts == 0 && "Unexpected vector bitcast");
2408 if (!widenShuffleMaskElts(VWidth / XNumElts, Mask, ScaledMask))
2409 ScaledMask.clear();
2411 if (!ScaledMask.empty()) {
2412 // If the shuffled source vector simplifies, cast that value to this
2413 // shuffle's type.
2414 if (auto *V = SimplifyShuffleVectorInst(X, UndefValue::get(XType),
2415 ScaledMask, XType, ShufQuery))
2416 return BitCastInst::Create(Instruction::BitCast, V, SVI.getType());
2420 // shuffle x, x, mask --> shuffle x, undef, mask'
2421 if (LHS == RHS) {
2422 assert(!match(RHS, m_Undef()) &&
2423 "Shuffle with 2 undef ops not simplified?");
2424 // Remap any references to RHS to use LHS.
2425 SmallVector<int, 16> Elts;
2426 for (unsigned i = 0; i != VWidth; ++i) {
2427 // Propagate undef elements or force mask to LHS.
2428 if (Mask[i] < 0)
2429 Elts.push_back(UndefMaskElem);
2430 else
2431 Elts.push_back(Mask[i] % LHSWidth);
2433 return new ShuffleVectorInst(LHS, UndefValue::get(RHS->getType()), Elts);
2436 // shuffle undef, x, mask --> shuffle x, undef, mask'
2437 if (match(LHS, m_Undef())) {
2438 SVI.commute();
2439 return &SVI;
2442 if (Instruction *I = canonicalizeInsertSplat(SVI, Builder))
2443 return I;
2445 if (Instruction *I = foldSelectShuffle(SVI, Builder, DL))
2446 return I;
2448 if (Instruction *I = foldTruncShuffle(SVI, DL.isBigEndian()))
2449 return I;
2451 if (Instruction *I = narrowVectorSelect(SVI, Builder))
2452 return I;
2454 APInt UndefElts(VWidth, 0);
2455 APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
2456 if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) {
2457 if (V != &SVI)
2458 return replaceInstUsesWith(SVI, V);
2459 return &SVI;
2462 if (Instruction *I = foldIdentityExtractShuffle(SVI))
2463 return I;
2465 // These transforms have the potential to lose undef knowledge, so they are
2466 // intentionally placed after SimplifyDemandedVectorElts().
2467 if (Instruction *I = foldShuffleWithInsert(SVI, *this))
2468 return I;
2469 if (Instruction *I = foldIdentityPaddedShuffles(SVI))
2470 return I;
2472 if (match(RHS, m_Undef()) && canEvaluateShuffled(LHS, Mask)) {
2473 Value *V = evaluateInDifferentElementOrder(LHS, Mask);
2474 return replaceInstUsesWith(SVI, V);
2477 // SROA generates shuffle+bitcast when the extracted sub-vector is bitcast to
2478 // a non-vector type. We can instead bitcast the original vector followed by
2479 // an extract of the desired element:
2481 // %sroa = shufflevector <16 x i8> %in, <16 x i8> undef,
2482 // <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2483 // %1 = bitcast <4 x i8> %sroa to i32
2484 // Becomes:
2485 // %bc = bitcast <16 x i8> %in to <4 x i32>
2486 // %ext = extractelement <4 x i32> %bc, i32 0
2488 // If the shuffle is extracting a contiguous range of values from the input
2489 // vector then each use which is a bitcast of the extracted size can be
2490 // replaced. This will work if the vector types are compatible, and the begin
2491 // index is aligned to a value in the casted vector type. If the begin index
2492 // isn't aligned then we can shuffle the original vector (keeping the same
2493 // vector type) before extracting.
2495 // This code will bail out if the target type is fundamentally incompatible
2496 // with vectors of the source type.
2498 // Example of <16 x i8>, target type i32:
2499 // Index range [4,8): v-----------v Will work.
2500 // +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
2501 // <16 x i8>: | | | | | | | | | | | | | | | | |
2502 // <4 x i32>: | | | | |
2503 // +-----------+-----------+-----------+-----------+
2504 // Index range [6,10): ^-----------^ Needs an extra shuffle.
2505 // Target type i40: ^--------------^ Won't work, bail.
2506 bool MadeChange = false;
2507 if (isShuffleExtractingFromLHS(SVI, Mask)) {
2508 Value *V = LHS;
2509 unsigned MaskElems = Mask.size();
2510 auto *SrcTy = cast<FixedVectorType>(V->getType());
2511 unsigned VecBitWidth = SrcTy->getPrimitiveSizeInBits().getFixedSize();
2512 unsigned SrcElemBitWidth = DL.getTypeSizeInBits(SrcTy->getElementType());
2513 assert(SrcElemBitWidth && "vector elements must have a bitwidth");
2514 unsigned SrcNumElems = SrcTy->getNumElements();
2515 SmallVector<BitCastInst *, 8> BCs;
2516 DenseMap<Type *, Value *> NewBCs;
2517 for (User *U : SVI.users())
2518 if (BitCastInst *BC = dyn_cast<BitCastInst>(U))
2519 if (!BC->use_empty())
2520 // Only visit bitcasts that weren't previously handled.
2521 BCs.push_back(BC);
2522 for (BitCastInst *BC : BCs) {
2523 unsigned BegIdx = Mask.front();
2524 Type *TgtTy = BC->getDestTy();
2525 unsigned TgtElemBitWidth = DL.getTypeSizeInBits(TgtTy);
2526 if (!TgtElemBitWidth)
2527 continue;
2528 unsigned TgtNumElems = VecBitWidth / TgtElemBitWidth;
2529 bool VecBitWidthsEqual = VecBitWidth == TgtNumElems * TgtElemBitWidth;
2530 bool BegIsAligned = 0 == ((SrcElemBitWidth * BegIdx) % TgtElemBitWidth);
2531 if (!VecBitWidthsEqual)
2532 continue;
2533 if (!VectorType::isValidElementType(TgtTy))
2534 continue;
2535 auto *CastSrcTy = FixedVectorType::get(TgtTy, TgtNumElems);
2536 if (!BegIsAligned) {
2537 // Shuffle the input so [0,NumElements) contains the output, and
2538 // [NumElems,SrcNumElems) is undef.
2539 SmallVector<int, 16> ShuffleMask(SrcNumElems, -1);
2540 for (unsigned I = 0, E = MaskElems, Idx = BegIdx; I != E; ++Idx, ++I)
2541 ShuffleMask[I] = Idx;
2542 V = Builder.CreateShuffleVector(V, ShuffleMask,
2543 SVI.getName() + ".extract");
2544 BegIdx = 0;
2546 unsigned SrcElemsPerTgtElem = TgtElemBitWidth / SrcElemBitWidth;
2547 assert(SrcElemsPerTgtElem);
2548 BegIdx /= SrcElemsPerTgtElem;
2549 bool BCAlreadyExists = NewBCs.find(CastSrcTy) != NewBCs.end();
2550 auto *NewBC =
2551 BCAlreadyExists
2552 ? NewBCs[CastSrcTy]
2553 : Builder.CreateBitCast(V, CastSrcTy, SVI.getName() + ".bc");
2554 if (!BCAlreadyExists)
2555 NewBCs[CastSrcTy] = NewBC;
2556 auto *Ext = Builder.CreateExtractElement(
2557 NewBC, ConstantInt::get(Int32Ty, BegIdx), SVI.getName() + ".extract");
2558 // The shufflevector isn't being replaced: the bitcast that used it
2559 // is. InstCombine will visit the newly-created instructions.
2560 replaceInstUsesWith(*BC, Ext);
2561 MadeChange = true;
2565 // If the LHS is a shufflevector itself, see if we can combine it with this
2566 // one without producing an unusual shuffle.
2567 // Cases that might be simplified:
2568 // 1.
2569 // x1=shuffle(v1,v2,mask1)
2570 // x=shuffle(x1,undef,mask)
2571 // ==>
2572 // x=shuffle(v1,undef,newMask)
2573 // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : -1
2574 // 2.
2575 // x1=shuffle(v1,undef,mask1)
2576 // x=shuffle(x1,x2,mask)
2577 // where v1.size() == mask1.size()
2578 // ==>
2579 // x=shuffle(v1,x2,newMask)
2580 // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : mask[i]
2581 // 3.
2582 // x2=shuffle(v2,undef,mask2)
2583 // x=shuffle(x1,x2,mask)
2584 // where v2.size() == mask2.size()
2585 // ==>
2586 // x=shuffle(x1,v2,newMask)
2587 // newMask[i] = (mask[i] < x1.size())
2588 // ? mask[i] : mask2[mask[i]-x1.size()]+x1.size()
2589 // 4.
2590 // x1=shuffle(v1,undef,mask1)
2591 // x2=shuffle(v2,undef,mask2)
2592 // x=shuffle(x1,x2,mask)
2593 // where v1.size() == v2.size()
2594 // ==>
2595 // x=shuffle(v1,v2,newMask)
2596 // newMask[i] = (mask[i] < x1.size())
2597 // ? mask1[mask[i]] : mask2[mask[i]-x1.size()]+v1.size()
2599 // Here we are really conservative:
2600 // we are absolutely afraid of producing a shuffle mask not in the input
2601 // program, because the code gen may not be smart enough to turn a merged
2602 // shuffle into two specific shuffles: it may produce worse code. As such,
2603 // we only merge two shuffles if the result is either a splat or one of the
2604 // input shuffle masks. In this case, merging the shuffles just removes
2605 // one instruction, which we know is safe. This is good for things like
2606 // turning: (splat(splat)) -> splat, or
2607 // merge(V[0..n], V[n+1..2n]) -> V[0..2n]
2608 ShuffleVectorInst* LHSShuffle = dyn_cast<ShuffleVectorInst>(LHS);
2609 ShuffleVectorInst* RHSShuffle = dyn_cast<ShuffleVectorInst>(RHS);
2610 if (LHSShuffle)
2611 if (!match(LHSShuffle->getOperand(1), m_Undef()) && !match(RHS, m_Undef()))
2612 LHSShuffle = nullptr;
2613 if (RHSShuffle)
2614 if (!match(RHSShuffle->getOperand(1), m_Undef()))
2615 RHSShuffle = nullptr;
2616 if (!LHSShuffle && !RHSShuffle)
2617 return MadeChange ? &SVI : nullptr;
2619 Value* LHSOp0 = nullptr;
2620 Value* LHSOp1 = nullptr;
2621 Value* RHSOp0 = nullptr;
2622 unsigned LHSOp0Width = 0;
2623 unsigned RHSOp0Width = 0;
2624 if (LHSShuffle) {
2625 LHSOp0 = LHSShuffle->getOperand(0);
2626 LHSOp1 = LHSShuffle->getOperand(1);
2627 LHSOp0Width = cast<FixedVectorType>(LHSOp0->getType())->getNumElements();
2629 if (RHSShuffle) {
2630 RHSOp0 = RHSShuffle->getOperand(0);
2631 RHSOp0Width = cast<FixedVectorType>(RHSOp0->getType())->getNumElements();
2633 Value* newLHS = LHS;
2634 Value* newRHS = RHS;
2635 if (LHSShuffle) {
2636 // case 1
2637 if (match(RHS, m_Undef())) {
2638 newLHS = LHSOp0;
2639 newRHS = LHSOp1;
2641 // case 2 or 4
2642 else if (LHSOp0Width == LHSWidth) {
2643 newLHS = LHSOp0;
2646 // case 3 or 4
2647 if (RHSShuffle && RHSOp0Width == LHSWidth) {
2648 newRHS = RHSOp0;
2650 // case 4
2651 if (LHSOp0 == RHSOp0) {
2652 newLHS = LHSOp0;
2653 newRHS = nullptr;
2656 if (newLHS == LHS && newRHS == RHS)
2657 return MadeChange ? &SVI : nullptr;
2659 ArrayRef<int> LHSMask;
2660 ArrayRef<int> RHSMask;
2661 if (newLHS != LHS)
2662 LHSMask = LHSShuffle->getShuffleMask();
2663 if (RHSShuffle && newRHS != RHS)
2664 RHSMask = RHSShuffle->getShuffleMask();
2666 unsigned newLHSWidth = (newLHS != LHS) ? LHSOp0Width : LHSWidth;
2667 SmallVector<int, 16> newMask;
2668 bool isSplat = true;
2669 int SplatElt = -1;
2670 // Create a new mask for the new ShuffleVectorInst so that the new
2671 // ShuffleVectorInst is equivalent to the original one.
2672 for (unsigned i = 0; i < VWidth; ++i) {
2673 int eltMask;
2674 if (Mask[i] < 0) {
2675 // This element is an undef value.
2676 eltMask = -1;
2677 } else if (Mask[i] < (int)LHSWidth) {
2678 // This element is from left hand side vector operand.
2680 // If LHS is going to be replaced (case 1, 2, or 4), calculate the
2681 // new mask value for the element.
2682 if (newLHS != LHS) {
2683 eltMask = LHSMask[Mask[i]];
2684 // If the value selected is an undef value, explicitly specify it
2685 // with a -1 mask value.
2686 if (eltMask >= (int)LHSOp0Width && isa<UndefValue>(LHSOp1))
2687 eltMask = -1;
2688 } else
2689 eltMask = Mask[i];
2690 } else {
2691 // This element is from right hand side vector operand
2693 // If the value selected is an undef value, explicitly specify it
2694 // with a -1 mask value. (case 1)
2695 if (match(RHS, m_Undef()))
2696 eltMask = -1;
2697 // If RHS is going to be replaced (case 3 or 4), calculate the
2698 // new mask value for the element.
2699 else if (newRHS != RHS) {
2700 eltMask = RHSMask[Mask[i]-LHSWidth];
2701 // If the value selected is an undef value, explicitly specify it
2702 // with a -1 mask value.
2703 if (eltMask >= (int)RHSOp0Width) {
2704 assert(match(RHSShuffle->getOperand(1), m_Undef()) &&
2705 "should have been check above");
2706 eltMask = -1;
2708 } else
2709 eltMask = Mask[i]-LHSWidth;
2711 // If LHS's width is changed, shift the mask value accordingly.
2712 // If newRHS == nullptr, i.e. LHSOp0 == RHSOp0, we want to remap any
2713 // references from RHSOp0 to LHSOp0, so we don't need to shift the mask.
2714 // If newRHS == newLHS, we want to remap any references from newRHS to
2715 // newLHS so that we can properly identify splats that may occur due to
2716 // obfuscation across the two vectors.
2717 if (eltMask >= 0 && newRHS != nullptr && newLHS != newRHS)
2718 eltMask += newLHSWidth;
2721 // Check if this could still be a splat.
2722 if (eltMask >= 0) {
2723 if (SplatElt >= 0 && SplatElt != eltMask)
2724 isSplat = false;
2725 SplatElt = eltMask;
2728 newMask.push_back(eltMask);
2731 // If the result mask is equal to one of the original shuffle masks,
2732 // or is a splat, do the replacement.
2733 if (isSplat || newMask == LHSMask || newMask == RHSMask || newMask == Mask) {
2734 if (!newRHS)
2735 newRHS = UndefValue::get(newLHS->getType());
2736 return new ShuffleVectorInst(newLHS, newRHS, newMask);
2739 return MadeChange ? &SVI : nullptr;