1 //===------- VectorCombine.cpp - Optimize partial vector operations -------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This pass optimizes scalar/vector interactions using target cost models. The
10 // transforms implemented here may not fit in traditional loop-based or SLP
11 // vectorization passes.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/Transforms/Vectorize/VectorCombine.h"
16 #include "llvm/ADT/Statistic.h"
17 #include "llvm/Analysis/AssumptionCache.h"
18 #include "llvm/Analysis/BasicAliasAnalysis.h"
19 #include "llvm/Analysis/GlobalsModRef.h"
20 #include "llvm/Analysis/Loads.h"
21 #include "llvm/Analysis/TargetTransformInfo.h"
22 #include "llvm/Analysis/ValueTracking.h"
23 #include "llvm/Analysis/VectorUtils.h"
24 #include "llvm/IR/Dominators.h"
25 #include "llvm/IR/Function.h"
26 #include "llvm/IR/IRBuilder.h"
27 #include "llvm/IR/PatternMatch.h"
28 #include "llvm/InitializePasses.h"
29 #include "llvm/Pass.h"
30 #include "llvm/Support/CommandLine.h"
31 #include "llvm/Transforms/Utils/Local.h"
32 #include "llvm/Transforms/Vectorize.h"
35 using namespace llvm::PatternMatch
;
37 #define DEBUG_TYPE "vector-combine"
38 STATISTIC(NumVecLoad
, "Number of vector loads formed");
39 STATISTIC(NumVecCmp
, "Number of vector compares formed");
40 STATISTIC(NumVecBO
, "Number of vector binops formed");
41 STATISTIC(NumVecCmpBO
, "Number of vector compare + binop formed");
42 STATISTIC(NumShufOfBitcast
, "Number of shuffles moved after bitcast");
43 STATISTIC(NumScalarBO
, "Number of scalar binops formed");
44 STATISTIC(NumScalarCmp
, "Number of scalar compares formed");
46 static cl::opt
<bool> DisableVectorCombine(
47 "disable-vector-combine", cl::init(false), cl::Hidden
,
48 cl::desc("Disable all vector combine transforms"));
50 static cl::opt
<bool> DisableBinopExtractShuffle(
51 "disable-binop-extract-shuffle", cl::init(false), cl::Hidden
,
52 cl::desc("Disable binop extract to shuffle transforms"));
54 static cl::opt
<unsigned> MaxInstrsToScan(
55 "vector-combine-max-scan-instrs", cl::init(30), cl::Hidden
,
56 cl::desc("Max number of instructions to scan for vector combining."));
58 static const unsigned InvalidIndex
= std::numeric_limits
<unsigned>::max();
63 VectorCombine(Function
&F
, const TargetTransformInfo
&TTI
,
64 const DominatorTree
&DT
, AAResults
&AA
, AssumptionCache
&AC
)
65 : F(F
), Builder(F
.getContext()), TTI(TTI
), DT(DT
), AA(AA
), AC(AC
) {}
72 const TargetTransformInfo
&TTI
;
73 const DominatorTree
&DT
;
77 bool vectorizeLoadInsert(Instruction
&I
);
78 ExtractElementInst
*getShuffleExtract(ExtractElementInst
*Ext0
,
79 ExtractElementInst
*Ext1
,
80 unsigned PreferredExtractIndex
) const;
81 bool isExtractExtractCheap(ExtractElementInst
*Ext0
, ExtractElementInst
*Ext1
,
83 ExtractElementInst
*&ConvertToShuffle
,
84 unsigned PreferredExtractIndex
);
85 void foldExtExtCmp(ExtractElementInst
*Ext0
, ExtractElementInst
*Ext1
,
87 void foldExtExtBinop(ExtractElementInst
*Ext0
, ExtractElementInst
*Ext1
,
89 bool foldExtractExtract(Instruction
&I
);
90 bool foldBitcastShuf(Instruction
&I
);
91 bool scalarizeBinopOrCmp(Instruction
&I
);
92 bool foldExtractedCmps(Instruction
&I
);
93 bool foldSingleElementStore(Instruction
&I
);
94 bool scalarizeLoadExtract(Instruction
&I
);
98 static void replaceValue(Value
&Old
, Value
&New
) {
99 Old
.replaceAllUsesWith(&New
);
103 bool VectorCombine::vectorizeLoadInsert(Instruction
&I
) {
104 // Match insert into fixed vector of scalar value.
105 // TODO: Handle non-zero insert index.
106 auto *Ty
= dyn_cast
<FixedVectorType
>(I
.getType());
108 if (!Ty
|| !match(&I
, m_InsertElt(m_Undef(), m_Value(Scalar
), m_ZeroInt())) ||
109 !Scalar
->hasOneUse())
112 // Optionally match an extract from another vector.
114 bool HasExtract
= match(Scalar
, m_ExtractElt(m_Value(X
), m_ZeroInt()));
118 // Match source value as load of scalar or vector.
119 // Do not vectorize scalar load (widening) if atomic/volatile or under
120 // asan/hwasan/memtag/tsan. The widened load may load data from dirty regions
121 // or create data races non-existent in the source.
122 auto *Load
= dyn_cast
<LoadInst
>(X
);
123 if (!Load
|| !Load
->isSimple() || !Load
->hasOneUse() ||
124 Load
->getFunction()->hasFnAttribute(Attribute::SanitizeMemTag
) ||
125 mustSuppressSpeculation(*Load
))
128 const DataLayout
&DL
= I
.getModule()->getDataLayout();
129 Value
*SrcPtr
= Load
->getPointerOperand()->stripPointerCasts();
130 assert(isa
<PointerType
>(SrcPtr
->getType()) && "Expected a pointer type");
132 // If original AS != Load's AS, we can't bitcast the original pointer and have
133 // to use Load's operand instead. Ideally we would want to strip pointer casts
134 // without changing AS, but there's no API to do that ATM.
135 unsigned AS
= Load
->getPointerAddressSpace();
136 if (AS
!= SrcPtr
->getType()->getPointerAddressSpace())
137 SrcPtr
= Load
->getPointerOperand();
139 // We are potentially transforming byte-sized (8-bit) memory accesses, so make
140 // sure we have all of our type-based constraints in place for this target.
141 Type
*ScalarTy
= Scalar
->getType();
142 uint64_t ScalarSize
= ScalarTy
->getPrimitiveSizeInBits();
143 unsigned MinVectorSize
= TTI
.getMinVectorRegisterBitWidth();
144 if (!ScalarSize
|| !MinVectorSize
|| MinVectorSize
% ScalarSize
!= 0 ||
148 // Check safety of replacing the scalar load with a larger vector load.
149 // We use minimal alignment (maximum flexibility) because we only care about
150 // the dereferenceable region. When calculating cost and creating a new op,
151 // we may use a larger value based on alignment attributes.
152 unsigned MinVecNumElts
= MinVectorSize
/ ScalarSize
;
153 auto *MinVecTy
= VectorType::get(ScalarTy
, MinVecNumElts
, false);
154 unsigned OffsetEltIndex
= 0;
155 Align Alignment
= Load
->getAlign();
156 if (!isSafeToLoadUnconditionally(SrcPtr
, MinVecTy
, Align(1), DL
, Load
, &DT
)) {
157 // It is not safe to load directly from the pointer, but we can still peek
158 // through gep offsets and check if it safe to load from a base address with
159 // updated alignment. If it is, we can shuffle the element(s) into place
161 unsigned OffsetBitWidth
= DL
.getIndexTypeSizeInBits(SrcPtr
->getType());
162 APInt
Offset(OffsetBitWidth
, 0);
163 SrcPtr
= SrcPtr
->stripAndAccumulateInBoundsConstantOffsets(DL
, Offset
);
165 // We want to shuffle the result down from a high element of a vector, so
166 // the offset must be positive.
167 if (Offset
.isNegative())
170 // The offset must be a multiple of the scalar element to shuffle cleanly
171 // in the element's size.
172 uint64_t ScalarSizeInBytes
= ScalarSize
/ 8;
173 if (Offset
.urem(ScalarSizeInBytes
) != 0)
176 // If we load MinVecNumElts, will our target element still be loaded?
177 OffsetEltIndex
= Offset
.udiv(ScalarSizeInBytes
).getZExtValue();
178 if (OffsetEltIndex
>= MinVecNumElts
)
181 if (!isSafeToLoadUnconditionally(SrcPtr
, MinVecTy
, Align(1), DL
, Load
, &DT
))
184 // Update alignment with offset value. Note that the offset could be negated
185 // to more accurately represent "(new) SrcPtr - Offset = (old) SrcPtr", but
186 // negation does not change the result of the alignment calculation.
187 Alignment
= commonAlignment(Alignment
, Offset
.getZExtValue());
190 // Original pattern: insertelt undef, load [free casts of] PtrOp, 0
191 // Use the greater of the alignment on the load or its source pointer.
192 Alignment
= std::max(SrcPtr
->getPointerAlignment(DL
), Alignment
);
193 Type
*LoadTy
= Load
->getType();
194 InstructionCost OldCost
=
195 TTI
.getMemoryOpCost(Instruction::Load
, LoadTy
, Alignment
, AS
);
196 APInt DemandedElts
= APInt::getOneBitSet(MinVecNumElts
, 0);
197 OldCost
+= TTI
.getScalarizationOverhead(MinVecTy
, DemandedElts
,
198 /* Insert */ true, HasExtract
);
200 // New pattern: load VecPtr
201 InstructionCost NewCost
=
202 TTI
.getMemoryOpCost(Instruction::Load
, MinVecTy
, Alignment
, AS
);
203 // Optionally, we are shuffling the loaded vector element(s) into place.
204 // For the mask set everything but element 0 to undef to prevent poison from
205 // propagating from the extra loaded memory. This will also optionally
206 // shrink/grow the vector from the loaded size to the output size.
207 // We assume this operation has no cost in codegen if there was no offset.
208 // Note that we could use freeze to avoid poison problems, but then we might
209 // still need a shuffle to change the vector size.
210 unsigned OutputNumElts
= Ty
->getNumElements();
211 SmallVector
<int, 16> Mask(OutputNumElts
, UndefMaskElem
);
212 assert(OffsetEltIndex
< MinVecNumElts
&& "Address offset too big");
213 Mask
[0] = OffsetEltIndex
;
215 NewCost
+= TTI
.getShuffleCost(TTI::SK_PermuteSingleSrc
, MinVecTy
, Mask
);
217 // We can aggressively convert to the vector form because the backend can
218 // invert this transform if it does not result in a performance win.
219 if (OldCost
< NewCost
|| !NewCost
.isValid())
222 // It is safe and potentially profitable to load a vector directly:
223 // inselt undef, load Scalar, 0 --> load VecPtr
224 IRBuilder
<> Builder(Load
);
225 Value
*CastedPtr
= Builder
.CreateBitCast(SrcPtr
, MinVecTy
->getPointerTo(AS
));
226 Value
*VecLd
= Builder
.CreateAlignedLoad(MinVecTy
, CastedPtr
, Alignment
);
227 VecLd
= Builder
.CreateShuffleVector(VecLd
, Mask
);
229 replaceValue(I
, *VecLd
);
234 /// Determine which, if any, of the inputs should be replaced by a shuffle
235 /// followed by extract from a different index.
236 ExtractElementInst
*VectorCombine::getShuffleExtract(
237 ExtractElementInst
*Ext0
, ExtractElementInst
*Ext1
,
238 unsigned PreferredExtractIndex
= InvalidIndex
) const {
239 assert(isa
<ConstantInt
>(Ext0
->getIndexOperand()) &&
240 isa
<ConstantInt
>(Ext1
->getIndexOperand()) &&
241 "Expected constant extract indexes");
243 unsigned Index0
= cast
<ConstantInt
>(Ext0
->getIndexOperand())->getZExtValue();
244 unsigned Index1
= cast
<ConstantInt
>(Ext1
->getIndexOperand())->getZExtValue();
246 // If the extract indexes are identical, no shuffle is needed.
247 if (Index0
== Index1
)
250 Type
*VecTy
= Ext0
->getVectorOperand()->getType();
251 assert(VecTy
== Ext1
->getVectorOperand()->getType() && "Need matching types");
252 InstructionCost Cost0
=
253 TTI
.getVectorInstrCost(Ext0
->getOpcode(), VecTy
, Index0
);
254 InstructionCost Cost1
=
255 TTI
.getVectorInstrCost(Ext1
->getOpcode(), VecTy
, Index1
);
257 // If both costs are invalid no shuffle is needed
258 if (!Cost0
.isValid() && !Cost1
.isValid())
261 // We are extracting from 2 different indexes, so one operand must be shuffled
262 // before performing a vector operation and/or extract. The more expensive
263 // extract will be replaced by a shuffle.
269 // If the costs are equal and there is a preferred extract index, shuffle the
271 if (PreferredExtractIndex
== Index0
)
273 if (PreferredExtractIndex
== Index1
)
276 // Otherwise, replace the extract with the higher index.
277 return Index0
> Index1
? Ext0
: Ext1
;
280 /// Compare the relative costs of 2 extracts followed by scalar operation vs.
281 /// vector operation(s) followed by extract. Return true if the existing
282 /// instructions are cheaper than a vector alternative. Otherwise, return false
283 /// and if one of the extracts should be transformed to a shufflevector, set
284 /// \p ConvertToShuffle to that extract instruction.
285 bool VectorCombine::isExtractExtractCheap(ExtractElementInst
*Ext0
,
286 ExtractElementInst
*Ext1
,
288 ExtractElementInst
*&ConvertToShuffle
,
289 unsigned PreferredExtractIndex
) {
290 assert(isa
<ConstantInt
>(Ext0
->getOperand(1)) &&
291 isa
<ConstantInt
>(Ext1
->getOperand(1)) &&
292 "Expected constant extract indexes");
293 Type
*ScalarTy
= Ext0
->getType();
294 auto *VecTy
= cast
<VectorType
>(Ext0
->getOperand(0)->getType());
295 InstructionCost ScalarOpCost
, VectorOpCost
;
297 // Get cost estimates for scalar and vector versions of the operation.
298 bool IsBinOp
= Instruction::isBinaryOp(Opcode
);
300 ScalarOpCost
= TTI
.getArithmeticInstrCost(Opcode
, ScalarTy
);
301 VectorOpCost
= TTI
.getArithmeticInstrCost(Opcode
, VecTy
);
303 assert((Opcode
== Instruction::ICmp
|| Opcode
== Instruction::FCmp
) &&
304 "Expected a compare");
305 ScalarOpCost
= TTI
.getCmpSelInstrCost(Opcode
, ScalarTy
,
306 CmpInst::makeCmpResultType(ScalarTy
));
307 VectorOpCost
= TTI
.getCmpSelInstrCost(Opcode
, VecTy
,
308 CmpInst::makeCmpResultType(VecTy
));
311 // Get cost estimates for the extract elements. These costs will factor into
313 unsigned Ext0Index
= cast
<ConstantInt
>(Ext0
->getOperand(1))->getZExtValue();
314 unsigned Ext1Index
= cast
<ConstantInt
>(Ext1
->getOperand(1))->getZExtValue();
316 InstructionCost Extract0Cost
=
317 TTI
.getVectorInstrCost(Instruction::ExtractElement
, VecTy
, Ext0Index
);
318 InstructionCost Extract1Cost
=
319 TTI
.getVectorInstrCost(Instruction::ExtractElement
, VecTy
, Ext1Index
);
321 // A more expensive extract will always be replaced by a splat shuffle.
322 // For example, if Ext0 is more expensive:
323 // opcode (extelt V0, Ext0), (ext V1, Ext1) -->
324 // extelt (opcode (splat V0, Ext0), V1), Ext1
325 // TODO: Evaluate whether that always results in lowest cost. Alternatively,
326 // check the cost of creating a broadcast shuffle and shuffling both
327 // operands to element 0.
328 InstructionCost CheapExtractCost
= std::min(Extract0Cost
, Extract1Cost
);
330 // Extra uses of the extracts mean that we include those costs in the
331 // vector total because those instructions will not be eliminated.
332 InstructionCost OldCost
, NewCost
;
333 if (Ext0
->getOperand(0) == Ext1
->getOperand(0) && Ext0Index
== Ext1Index
) {
334 // Handle a special case. If the 2 extracts are identical, adjust the
335 // formulas to account for that. The extra use charge allows for either the
336 // CSE'd pattern or an unoptimized form with identical values:
337 // opcode (extelt V, C), (extelt V, C) --> extelt (opcode V, V), C
338 bool HasUseTax
= Ext0
== Ext1
? !Ext0
->hasNUses(2)
339 : !Ext0
->hasOneUse() || !Ext1
->hasOneUse();
340 OldCost
= CheapExtractCost
+ ScalarOpCost
;
341 NewCost
= VectorOpCost
+ CheapExtractCost
+ HasUseTax
* CheapExtractCost
;
343 // Handle the general case. Each extract is actually a different value:
344 // opcode (extelt V0, C0), (extelt V1, C1) --> extelt (opcode V0, V1), C
345 OldCost
= Extract0Cost
+ Extract1Cost
+ ScalarOpCost
;
346 NewCost
= VectorOpCost
+ CheapExtractCost
+
347 !Ext0
->hasOneUse() * Extract0Cost
+
348 !Ext1
->hasOneUse() * Extract1Cost
;
351 ConvertToShuffle
= getShuffleExtract(Ext0
, Ext1
, PreferredExtractIndex
);
352 if (ConvertToShuffle
) {
353 if (IsBinOp
&& DisableBinopExtractShuffle
)
356 // If we are extracting from 2 different indexes, then one operand must be
357 // shuffled before performing the vector operation. The shuffle mask is
358 // undefined except for 1 lane that is being translated to the remaining
359 // extraction lane. Therefore, it is a splat shuffle. Ex:
360 // ShufMask = { undef, undef, 0, undef }
361 // TODO: The cost model has an option for a "broadcast" shuffle
362 // (splat-from-element-0), but no option for a more general splat.
364 TTI
.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc
, VecTy
);
367 // Aggressively form a vector op if the cost is equal because the transform
368 // may enable further optimization.
369 // Codegen can reverse this transform (scalarize) if it was not profitable.
370 return OldCost
< NewCost
;
373 /// Create a shuffle that translates (shifts) 1 element from the input vector
374 /// to a new element location.
375 static Value
*createShiftShuffle(Value
*Vec
, unsigned OldIndex
,
376 unsigned NewIndex
, IRBuilder
<> &Builder
) {
377 // The shuffle mask is undefined except for 1 lane that is being translated
378 // to the new element index. Example for OldIndex == 2 and NewIndex == 0:
379 // ShufMask = { 2, undef, undef, undef }
380 auto *VecTy
= cast
<FixedVectorType
>(Vec
->getType());
381 SmallVector
<int, 32> ShufMask(VecTy
->getNumElements(), UndefMaskElem
);
382 ShufMask
[NewIndex
] = OldIndex
;
383 return Builder
.CreateShuffleVector(Vec
, ShufMask
, "shift");
386 /// Given an extract element instruction with constant index operand, shuffle
387 /// the source vector (shift the scalar element) to a NewIndex for extraction.
388 /// Return null if the input can be constant folded, so that we are not creating
389 /// unnecessary instructions.
390 static ExtractElementInst
*translateExtract(ExtractElementInst
*ExtElt
,
392 IRBuilder
<> &Builder
) {
393 // If the extract can be constant-folded, this code is unsimplified. Defer
394 // to other passes to handle that.
395 Value
*X
= ExtElt
->getVectorOperand();
396 Value
*C
= ExtElt
->getIndexOperand();
397 assert(isa
<ConstantInt
>(C
) && "Expected a constant index operand");
398 if (isa
<Constant
>(X
))
401 Value
*Shuf
= createShiftShuffle(X
, cast
<ConstantInt
>(C
)->getZExtValue(),
403 return cast
<ExtractElementInst
>(Builder
.CreateExtractElement(Shuf
, NewIndex
));
406 /// Try to reduce extract element costs by converting scalar compares to vector
407 /// compares followed by extract.
408 /// cmp (ext0 V0, C), (ext1 V1, C)
409 void VectorCombine::foldExtExtCmp(ExtractElementInst
*Ext0
,
410 ExtractElementInst
*Ext1
, Instruction
&I
) {
411 assert(isa
<CmpInst
>(&I
) && "Expected a compare");
412 assert(cast
<ConstantInt
>(Ext0
->getIndexOperand())->getZExtValue() ==
413 cast
<ConstantInt
>(Ext1
->getIndexOperand())->getZExtValue() &&
414 "Expected matching constant extract indexes");
416 // cmp Pred (extelt V0, C), (extelt V1, C) --> extelt (cmp Pred V0, V1), C
418 CmpInst::Predicate Pred
= cast
<CmpInst
>(&I
)->getPredicate();
419 Value
*V0
= Ext0
->getVectorOperand(), *V1
= Ext1
->getVectorOperand();
420 Value
*VecCmp
= Builder
.CreateCmp(Pred
, V0
, V1
);
421 Value
*NewExt
= Builder
.CreateExtractElement(VecCmp
, Ext0
->getIndexOperand());
422 replaceValue(I
, *NewExt
);
425 /// Try to reduce extract element costs by converting scalar binops to vector
426 /// binops followed by extract.
427 /// bo (ext0 V0, C), (ext1 V1, C)
428 void VectorCombine::foldExtExtBinop(ExtractElementInst
*Ext0
,
429 ExtractElementInst
*Ext1
, Instruction
&I
) {
430 assert(isa
<BinaryOperator
>(&I
) && "Expected a binary operator");
431 assert(cast
<ConstantInt
>(Ext0
->getIndexOperand())->getZExtValue() ==
432 cast
<ConstantInt
>(Ext1
->getIndexOperand())->getZExtValue() &&
433 "Expected matching constant extract indexes");
435 // bo (extelt V0, C), (extelt V1, C) --> extelt (bo V0, V1), C
437 Value
*V0
= Ext0
->getVectorOperand(), *V1
= Ext1
->getVectorOperand();
439 Builder
.CreateBinOp(cast
<BinaryOperator
>(&I
)->getOpcode(), V0
, V1
);
441 // All IR flags are safe to back-propagate because any potential poison
442 // created in unused vector elements is discarded by the extract.
443 if (auto *VecBOInst
= dyn_cast
<Instruction
>(VecBO
))
444 VecBOInst
->copyIRFlags(&I
);
446 Value
*NewExt
= Builder
.CreateExtractElement(VecBO
, Ext0
->getIndexOperand());
447 replaceValue(I
, *NewExt
);
450 /// Match an instruction with extracted vector operands.
451 bool VectorCombine::foldExtractExtract(Instruction
&I
) {
452 // It is not safe to transform things like div, urem, etc. because we may
453 // create undefined behavior when executing those on unknown vector elements.
454 if (!isSafeToSpeculativelyExecute(&I
))
457 Instruction
*I0
, *I1
;
458 CmpInst::Predicate Pred
= CmpInst::BAD_ICMP_PREDICATE
;
459 if (!match(&I
, m_Cmp(Pred
, m_Instruction(I0
), m_Instruction(I1
))) &&
460 !match(&I
, m_BinOp(m_Instruction(I0
), m_Instruction(I1
))))
465 if (!match(I0
, m_ExtractElt(m_Value(V0
), m_ConstantInt(C0
))) ||
466 !match(I1
, m_ExtractElt(m_Value(V1
), m_ConstantInt(C1
))) ||
467 V0
->getType() != V1
->getType())
470 // If the scalar value 'I' is going to be re-inserted into a vector, then try
471 // to create an extract to that same element. The extract/insert can be
472 // reduced to a "select shuffle".
473 // TODO: If we add a larger pattern match that starts from an insert, this
474 // probably becomes unnecessary.
475 auto *Ext0
= cast
<ExtractElementInst
>(I0
);
476 auto *Ext1
= cast
<ExtractElementInst
>(I1
);
477 uint64_t InsertIndex
= InvalidIndex
;
480 m_InsertElt(m_Value(), m_Value(), m_ConstantInt(InsertIndex
)));
482 ExtractElementInst
*ExtractToChange
;
483 if (isExtractExtractCheap(Ext0
, Ext1
, I
.getOpcode(), ExtractToChange
,
487 if (ExtractToChange
) {
488 unsigned CheapExtractIdx
= ExtractToChange
== Ext0
? C1
: C0
;
489 ExtractElementInst
*NewExtract
=
490 translateExtract(ExtractToChange
, CheapExtractIdx
, Builder
);
493 if (ExtractToChange
== Ext0
)
499 if (Pred
!= CmpInst::BAD_ICMP_PREDICATE
)
500 foldExtExtCmp(Ext0
, Ext1
, I
);
502 foldExtExtBinop(Ext0
, Ext1
, I
);
507 /// If this is a bitcast of a shuffle, try to bitcast the source vector to the
508 /// destination type followed by shuffle. This can enable further transforms by
509 /// moving bitcasts or shuffles together.
510 bool VectorCombine::foldBitcastShuf(Instruction
&I
) {
513 if (!match(&I
, m_BitCast(
514 m_OneUse(m_Shuffle(m_Value(V
), m_Undef(), m_Mask(Mask
))))))
517 // 1) Do not fold bitcast shuffle for scalable type. First, shuffle cost for
518 // scalable type is unknown; Second, we cannot reason if the narrowed shuffle
519 // mask for scalable type is a splat or not.
520 // 2) Disallow non-vector casts and length-changing shuffles.
521 // TODO: We could allow any shuffle.
522 auto *DestTy
= dyn_cast
<FixedVectorType
>(I
.getType());
523 auto *SrcTy
= dyn_cast
<FixedVectorType
>(V
->getType());
524 if (!SrcTy
|| !DestTy
|| I
.getOperand(0)->getType() != SrcTy
)
527 unsigned DestNumElts
= DestTy
->getNumElements();
528 unsigned SrcNumElts
= SrcTy
->getNumElements();
529 SmallVector
<int, 16> NewMask
;
530 if (SrcNumElts
<= DestNumElts
) {
531 // The bitcast is from wide to narrow/equal elements. The shuffle mask can
532 // always be expanded to the equivalent form choosing narrower elements.
533 assert(DestNumElts
% SrcNumElts
== 0 && "Unexpected shuffle mask");
534 unsigned ScaleFactor
= DestNumElts
/ SrcNumElts
;
535 narrowShuffleMaskElts(ScaleFactor
, Mask
, NewMask
);
537 // The bitcast is from narrow elements to wide elements. The shuffle mask
538 // must choose consecutive elements to allow casting first.
539 assert(SrcNumElts
% DestNumElts
== 0 && "Unexpected shuffle mask");
540 unsigned ScaleFactor
= SrcNumElts
/ DestNumElts
;
541 if (!widenShuffleMaskElts(ScaleFactor
, Mask
, NewMask
))
545 // The new shuffle must not cost more than the old shuffle. The bitcast is
546 // moved ahead of the shuffle, so assume that it has the same cost as before.
547 InstructionCost DestCost
= TTI
.getShuffleCost(
548 TargetTransformInfo::SK_PermuteSingleSrc
, DestTy
, NewMask
);
549 InstructionCost SrcCost
=
550 TTI
.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc
, SrcTy
, Mask
);
551 if (DestCost
> SrcCost
|| !DestCost
.isValid())
554 // bitcast (shuf V, MaskC) --> shuf (bitcast V), MaskC'
556 Value
*CastV
= Builder
.CreateBitCast(V
, DestTy
);
557 Value
*Shuf
= Builder
.CreateShuffleVector(CastV
, NewMask
);
558 replaceValue(I
, *Shuf
);
562 /// Match a vector binop or compare instruction with at least one inserted
563 /// scalar operand and convert to scalar binop/cmp followed by insertelement.
564 bool VectorCombine::scalarizeBinopOrCmp(Instruction
&I
) {
565 CmpInst::Predicate Pred
= CmpInst::BAD_ICMP_PREDICATE
;
567 if (!match(&I
, m_BinOp(m_Value(Ins0
), m_Value(Ins1
))) &&
568 !match(&I
, m_Cmp(Pred
, m_Value(Ins0
), m_Value(Ins1
))))
571 // Do not convert the vector condition of a vector select into a scalar
572 // condition. That may cause problems for codegen because of differences in
573 // boolean formats and register-file transfers.
574 // TODO: Can we account for that in the cost model?
575 bool IsCmp
= Pred
!= CmpInst::Predicate::BAD_ICMP_PREDICATE
;
577 for (User
*U
: I
.users())
578 if (match(U
, m_Select(m_Specific(&I
), m_Value(), m_Value())))
581 // Match against one or both scalar values being inserted into constant
583 // vec_op VecC0, (inselt VecC1, V1, Index)
584 // vec_op (inselt VecC0, V0, Index), VecC1
585 // vec_op (inselt VecC0, V0, Index), (inselt VecC1, V1, Index)
586 // TODO: Deal with mismatched index constants and variable indexes?
587 Constant
*VecC0
= nullptr, *VecC1
= nullptr;
588 Value
*V0
= nullptr, *V1
= nullptr;
589 uint64_t Index0
= 0, Index1
= 0;
590 if (!match(Ins0
, m_InsertElt(m_Constant(VecC0
), m_Value(V0
),
591 m_ConstantInt(Index0
))) &&
592 !match(Ins0
, m_Constant(VecC0
)))
594 if (!match(Ins1
, m_InsertElt(m_Constant(VecC1
), m_Value(V1
),
595 m_ConstantInt(Index1
))) &&
596 !match(Ins1
, m_Constant(VecC1
)))
601 if (IsConst0
&& IsConst1
)
603 if (!IsConst0
&& !IsConst1
&& Index0
!= Index1
)
606 // Bail for single insertion if it is a load.
607 // TODO: Handle this once getVectorInstrCost can cost for load/stores.
608 auto *I0
= dyn_cast_or_null
<Instruction
>(V0
);
609 auto *I1
= dyn_cast_or_null
<Instruction
>(V1
);
610 if ((IsConst0
&& I1
&& I1
->mayReadFromMemory()) ||
611 (IsConst1
&& I0
&& I0
->mayReadFromMemory()))
614 uint64_t Index
= IsConst0
? Index1
: Index0
;
615 Type
*ScalarTy
= IsConst0
? V1
->getType() : V0
->getType();
616 Type
*VecTy
= I
.getType();
617 assert(VecTy
->isVectorTy() &&
618 (IsConst0
|| IsConst1
|| V0
->getType() == V1
->getType()) &&
619 (ScalarTy
->isIntegerTy() || ScalarTy
->isFloatingPointTy() ||
620 ScalarTy
->isPointerTy()) &&
621 "Unexpected types for insert element into binop or cmp");
623 unsigned Opcode
= I
.getOpcode();
624 InstructionCost ScalarOpCost
, VectorOpCost
;
626 ScalarOpCost
= TTI
.getCmpSelInstrCost(Opcode
, ScalarTy
);
627 VectorOpCost
= TTI
.getCmpSelInstrCost(Opcode
, VecTy
);
629 ScalarOpCost
= TTI
.getArithmeticInstrCost(Opcode
, ScalarTy
);
630 VectorOpCost
= TTI
.getArithmeticInstrCost(Opcode
, VecTy
);
633 // Get cost estimate for the insert element. This cost will factor into
635 InstructionCost InsertCost
=
636 TTI
.getVectorInstrCost(Instruction::InsertElement
, VecTy
, Index
);
637 InstructionCost OldCost
=
638 (IsConst0
? 0 : InsertCost
) + (IsConst1
? 0 : InsertCost
) + VectorOpCost
;
639 InstructionCost NewCost
= ScalarOpCost
+ InsertCost
+
640 (IsConst0
? 0 : !Ins0
->hasOneUse() * InsertCost
) +
641 (IsConst1
? 0 : !Ins1
->hasOneUse() * InsertCost
);
643 // We want to scalarize unless the vector variant actually has lower cost.
644 if (OldCost
< NewCost
|| !NewCost
.isValid())
647 // vec_op (inselt VecC0, V0, Index), (inselt VecC1, V1, Index) -->
648 // inselt NewVecC, (scalar_op V0, V1), Index
654 // For constant cases, extract the scalar element, this should constant fold.
656 V0
= ConstantExpr::getExtractElement(VecC0
, Builder
.getInt64(Index
));
658 V1
= ConstantExpr::getExtractElement(VecC1
, Builder
.getInt64(Index
));
661 IsCmp
? Builder
.CreateCmp(Pred
, V0
, V1
)
662 : Builder
.CreateBinOp((Instruction::BinaryOps
)Opcode
, V0
, V1
);
664 Scalar
->setName(I
.getName() + ".scalar");
666 // All IR flags are safe to back-propagate. There is no potential for extra
667 // poison to be created by the scalar instruction.
668 if (auto *ScalarInst
= dyn_cast
<Instruction
>(Scalar
))
669 ScalarInst
->copyIRFlags(&I
);
671 // Fold the vector constants in the original vectors into a new base vector.
672 Constant
*NewVecC
= IsCmp
? ConstantExpr::getCompare(Pred
, VecC0
, VecC1
)
673 : ConstantExpr::get(Opcode
, VecC0
, VecC1
);
674 Value
*Insert
= Builder
.CreateInsertElement(NewVecC
, Scalar
, Index
);
675 replaceValue(I
, *Insert
);
679 /// Try to combine a scalar binop + 2 scalar compares of extracted elements of
680 /// a vector into vector operations followed by extract. Note: The SLP pass
681 /// may miss this pattern because of implementation problems.
682 bool VectorCombine::foldExtractedCmps(Instruction
&I
) {
683 // We are looking for a scalar binop of booleans.
684 // binop i1 (cmp Pred I0, C0), (cmp Pred I1, C1)
685 if (!I
.isBinaryOp() || !I
.getType()->isIntegerTy(1))
688 // The compare predicates should match, and each compare should have a
690 // TODO: Relax the one-use constraints.
691 Value
*B0
= I
.getOperand(0), *B1
= I
.getOperand(1);
692 Instruction
*I0
, *I1
;
694 CmpInst::Predicate P0
, P1
;
695 if (!match(B0
, m_OneUse(m_Cmp(P0
, m_Instruction(I0
), m_Constant(C0
)))) ||
696 !match(B1
, m_OneUse(m_Cmp(P1
, m_Instruction(I1
), m_Constant(C1
)))) ||
700 // The compare operands must be extracts of the same vector with constant
702 // TODO: Relax the one-use constraints.
704 uint64_t Index0
, Index1
;
705 if (!match(I0
, m_OneUse(m_ExtractElt(m_Value(X
), m_ConstantInt(Index0
)))) ||
706 !match(I1
, m_OneUse(m_ExtractElt(m_Specific(X
), m_ConstantInt(Index1
)))))
709 auto *Ext0
= cast
<ExtractElementInst
>(I0
);
710 auto *Ext1
= cast
<ExtractElementInst
>(I1
);
711 ExtractElementInst
*ConvertToShuf
= getShuffleExtract(Ext0
, Ext1
);
715 // The original scalar pattern is:
716 // binop i1 (cmp Pred (ext X, Index0), C0), (cmp Pred (ext X, Index1), C1)
717 CmpInst::Predicate Pred
= P0
;
718 unsigned CmpOpcode
= CmpInst::isFPPredicate(Pred
) ? Instruction::FCmp
720 auto *VecTy
= dyn_cast
<FixedVectorType
>(X
->getType());
724 InstructionCost OldCost
=
725 TTI
.getVectorInstrCost(Ext0
->getOpcode(), VecTy
, Index0
);
726 OldCost
+= TTI
.getVectorInstrCost(Ext1
->getOpcode(), VecTy
, Index1
);
727 OldCost
+= TTI
.getCmpSelInstrCost(CmpOpcode
, I0
->getType()) * 2;
728 OldCost
+= TTI
.getArithmeticInstrCost(I
.getOpcode(), I
.getType());
730 // The proposed vector pattern is:
731 // vcmp = cmp Pred X, VecC
732 // ext (binop vNi1 vcmp, (shuffle vcmp, Index1)), Index0
733 int CheapIndex
= ConvertToShuf
== Ext0
? Index1
: Index0
;
734 int ExpensiveIndex
= ConvertToShuf
== Ext0
? Index0
: Index1
;
735 auto *CmpTy
= cast
<FixedVectorType
>(CmpInst::makeCmpResultType(X
->getType()));
736 InstructionCost NewCost
= TTI
.getCmpSelInstrCost(CmpOpcode
, X
->getType());
737 SmallVector
<int, 32> ShufMask(VecTy
->getNumElements(), UndefMaskElem
);
738 ShufMask
[CheapIndex
] = ExpensiveIndex
;
739 NewCost
+= TTI
.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc
, CmpTy
,
741 NewCost
+= TTI
.getArithmeticInstrCost(I
.getOpcode(), CmpTy
);
742 NewCost
+= TTI
.getVectorInstrCost(Ext0
->getOpcode(), CmpTy
, CheapIndex
);
744 // Aggressively form vector ops if the cost is equal because the transform
745 // may enable further optimization.
746 // Codegen can reverse this transform (scalarize) if it was not profitable.
747 if (OldCost
< NewCost
|| !NewCost
.isValid())
750 // Create a vector constant from the 2 scalar constants.
751 SmallVector
<Constant
*, 32> CmpC(VecTy
->getNumElements(),
752 UndefValue::get(VecTy
->getElementType()));
755 Value
*VCmp
= Builder
.CreateCmp(Pred
, X
, ConstantVector::get(CmpC
));
757 Value
*Shuf
= createShiftShuffle(VCmp
, ExpensiveIndex
, CheapIndex
, Builder
);
758 Value
*VecLogic
= Builder
.CreateBinOp(cast
<BinaryOperator
>(I
).getOpcode(),
760 Value
*NewExt
= Builder
.CreateExtractElement(VecLogic
, CheapIndex
);
761 replaceValue(I
, *NewExt
);
766 // Check if memory loc modified between two instrs in the same BB
767 static bool isMemModifiedBetween(BasicBlock::iterator Begin
,
768 BasicBlock::iterator End
,
769 const MemoryLocation
&Loc
, AAResults
&AA
) {
770 unsigned NumScanned
= 0;
771 return std::any_of(Begin
, End
, [&](const Instruction
&Instr
) {
772 return isModSet(AA
.getModRefInfo(&Instr
, Loc
)) ||
773 ++NumScanned
> MaxInstrsToScan
;
777 /// Check if it is legal to scalarize a memory access to \p VecTy at index \p
778 /// Idx. \p Idx must access a valid vector element.
779 static bool canScalarizeAccess(FixedVectorType
*VecTy
, Value
*Idx
,
780 Instruction
*CtxI
, AssumptionCache
&AC
) {
781 if (auto *C
= dyn_cast
<ConstantInt
>(Idx
))
782 return C
->getValue().ult(VecTy
->getNumElements());
784 if (!isGuaranteedNotToBePoison(Idx
, &AC
))
787 APInt
Zero(Idx
->getType()->getScalarSizeInBits(), 0);
788 APInt
MaxElts(Idx
->getType()->getScalarSizeInBits(), VecTy
->getNumElements());
789 ConstantRange
ValidIndices(Zero
, MaxElts
);
790 ConstantRange IdxRange
= computeConstantRange(Idx
, true, &AC
, CtxI
, 0);
791 return ValidIndices
.contains(IdxRange
);
794 /// The memory operation on a vector of \p ScalarType had alignment of
795 /// \p VectorAlignment. Compute the maximal, but conservatively correct,
796 /// alignment that will be valid for the memory operation on a single scalar
797 /// element of the same type with index \p Idx.
798 static Align
computeAlignmentAfterScalarization(Align VectorAlignment
,
799 Type
*ScalarType
, Value
*Idx
,
800 const DataLayout
&DL
) {
801 if (auto *C
= dyn_cast
<ConstantInt
>(Idx
))
802 return commonAlignment(VectorAlignment
,
803 C
->getZExtValue() * DL
.getTypeStoreSize(ScalarType
));
804 return commonAlignment(VectorAlignment
, DL
.getTypeStoreSize(ScalarType
));
807 // Combine patterns like:
808 // %0 = load <4 x i32>, <4 x i32>* %a
809 // %1 = insertelement <4 x i32> %0, i32 %b, i32 1
810 // store <4 x i32> %1, <4 x i32>* %a
812 // %0 = bitcast <4 x i32>* %a to i32*
813 // %1 = getelementptr inbounds i32, i32* %0, i64 0, i64 1
814 // store i32 %b, i32* %1
815 bool VectorCombine::foldSingleElementStore(Instruction
&I
) {
816 StoreInst
*SI
= dyn_cast
<StoreInst
>(&I
);
817 if (!SI
|| !SI
->isSimple() ||
818 !isa
<FixedVectorType
>(SI
->getValueOperand()->getType()))
821 // TODO: Combine more complicated patterns (multiple insert) by referencing
822 // TargetTransformInfo.
826 if (!match(SI
->getValueOperand(),
827 m_InsertElt(m_Instruction(Source
), m_Value(NewElement
),
831 if (auto *Load
= dyn_cast
<LoadInst
>(Source
)) {
832 auto VecTy
= cast
<FixedVectorType
>(SI
->getValueOperand()->getType());
833 const DataLayout
&DL
= I
.getModule()->getDataLayout();
834 Value
*SrcAddr
= Load
->getPointerOperand()->stripPointerCasts();
835 // Don't optimize for atomic/volatile load or store. Ensure memory is not
836 // modified between, vector type matches store size, and index is inbounds.
837 if (!Load
->isSimple() || Load
->getParent() != SI
->getParent() ||
838 !DL
.typeSizeEqualsStoreSize(Load
->getType()) ||
839 !canScalarizeAccess(VecTy
, Idx
, Load
, AC
) ||
840 SrcAddr
!= SI
->getPointerOperand()->stripPointerCasts() ||
841 isMemModifiedBetween(Load
->getIterator(), SI
->getIterator(),
842 MemoryLocation::get(SI
), AA
))
845 Value
*GEP
= Builder
.CreateInBoundsGEP(
846 SI
->getValueOperand()->getType(), SI
->getPointerOperand(),
847 {ConstantInt::get(Idx
->getType(), 0), Idx
});
848 StoreInst
*NSI
= Builder
.CreateStore(NewElement
, GEP
);
849 NSI
->copyMetadata(*SI
);
850 Align ScalarOpAlignment
= computeAlignmentAfterScalarization(
851 std::max(SI
->getAlign(), Load
->getAlign()), NewElement
->getType(), Idx
,
853 NSI
->setAlignment(ScalarOpAlignment
);
854 replaceValue(I
, *NSI
);
855 // Need erasing the store manually.
863 /// Try to scalarize vector loads feeding extractelement instructions.
864 bool VectorCombine::scalarizeLoadExtract(Instruction
&I
) {
867 if (!match(&I
, m_ExtractElt(m_Load(m_Value(Ptr
)), m_Value(Idx
))))
870 auto *LI
= cast
<LoadInst
>(I
.getOperand(0));
871 const DataLayout
&DL
= I
.getModule()->getDataLayout();
872 if (LI
->isVolatile() || !DL
.typeSizeEqualsStoreSize(LI
->getType()))
875 auto *FixedVT
= dyn_cast
<FixedVectorType
>(LI
->getType());
879 InstructionCost OriginalCost
= TTI
.getMemoryOpCost(
880 Instruction::Load
, LI
->getType(), Align(LI
->getAlignment()),
881 LI
->getPointerAddressSpace());
882 InstructionCost ScalarizedCost
= 0;
884 Instruction
*LastCheckedInst
= LI
;
885 unsigned NumInstChecked
= 0;
886 // Check if all users of the load are extracts with no memory modifications
887 // between the load and the extract. Compute the cost of both the original
888 // code and the scalarized version.
889 for (User
*U
: LI
->users()) {
890 auto *UI
= dyn_cast
<ExtractElementInst
>(U
);
891 if (!UI
|| UI
->getParent() != LI
->getParent())
894 if (!isGuaranteedNotToBePoison(UI
->getOperand(1), &AC
, LI
, &DT
))
897 // Check if any instruction between the load and the extract may modify
899 if (LastCheckedInst
->comesBefore(UI
)) {
900 for (Instruction
&I
:
901 make_range(std::next(LI
->getIterator()), UI
->getIterator())) {
902 // Bail out if we reached the check limit or the instruction may write
904 if (NumInstChecked
== MaxInstrsToScan
|| I
.mayWriteToMemory())
910 if (!LastCheckedInst
)
911 LastCheckedInst
= UI
;
912 else if (LastCheckedInst
->comesBefore(UI
))
913 LastCheckedInst
= UI
;
915 if (!canScalarizeAccess(FixedVT
, UI
->getOperand(1), &I
, AC
))
918 auto *Index
= dyn_cast
<ConstantInt
>(UI
->getOperand(1));
920 TTI
.getVectorInstrCost(Instruction::ExtractElement
, LI
->getType(),
921 Index
? Index
->getZExtValue() : -1);
923 TTI
.getMemoryOpCost(Instruction::Load
, FixedVT
->getElementType(),
924 Align(1), LI
->getPointerAddressSpace());
925 ScalarizedCost
+= TTI
.getAddressComputationCost(FixedVT
->getElementType());
928 if (ScalarizedCost
>= OriginalCost
)
931 // Replace extracts with narrow scalar loads.
932 for (User
*U
: LI
->users()) {
933 auto *EI
= cast
<ExtractElementInst
>(U
);
934 Builder
.SetInsertPoint(EI
);
936 Value
*Idx
= EI
->getOperand(1);
938 Builder
.CreateInBoundsGEP(FixedVT
, Ptr
, {Builder
.getInt32(0), Idx
});
939 auto *NewLoad
= cast
<LoadInst
>(Builder
.CreateLoad(
940 FixedVT
->getElementType(), GEP
, EI
->getName() + ".scalar"));
942 Align ScalarOpAlignment
= computeAlignmentAfterScalarization(
943 LI
->getAlign(), FixedVT
->getElementType(), Idx
, DL
);
944 NewLoad
->setAlignment(ScalarOpAlignment
);
946 replaceValue(*EI
, *NewLoad
);
952 /// This is the entry point for all transforms. Pass manager differences are
953 /// handled in the callers of this function.
954 bool VectorCombine::run() {
955 if (DisableVectorCombine
)
958 // Don't attempt vectorization if the target does not support vectors.
959 if (!TTI
.getNumberOfRegisters(TTI
.getRegisterClassForType(/*Vector*/ true)))
962 bool MadeChange
= false;
963 for (BasicBlock
&BB
: F
) {
964 // Ignore unreachable basic blocks.
965 if (!DT
.isReachableFromEntry(&BB
))
967 // Use early increment range so that we can erase instructions in loop.
968 for (Instruction
&I
: make_early_inc_range(BB
)) {
969 if (isa
<DbgInfoIntrinsic
>(I
))
971 Builder
.SetInsertPoint(&I
);
972 MadeChange
|= vectorizeLoadInsert(I
);
973 MadeChange
|= foldExtractExtract(I
);
974 MadeChange
|= foldBitcastShuf(I
);
975 MadeChange
|= scalarizeBinopOrCmp(I
);
976 MadeChange
|= foldExtractedCmps(I
);
977 MadeChange
|= scalarizeLoadExtract(I
);
978 MadeChange
|= foldSingleElementStore(I
);
982 // We're done with transforms, so remove dead instructions.
984 for (BasicBlock
&BB
: F
)
985 SimplifyInstructionsInBlock(&BB
);
990 // Pass manager boilerplate below here.
993 class VectorCombineLegacyPass
: public FunctionPass
{
996 VectorCombineLegacyPass() : FunctionPass(ID
) {
997 initializeVectorCombineLegacyPassPass(*PassRegistry::getPassRegistry());
1000 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
1001 AU
.addRequired
<AssumptionCacheTracker
>();
1002 AU
.addRequired
<DominatorTreeWrapperPass
>();
1003 AU
.addRequired
<TargetTransformInfoWrapperPass
>();
1004 AU
.addRequired
<AAResultsWrapperPass
>();
1005 AU
.setPreservesCFG();
1006 AU
.addPreserved
<DominatorTreeWrapperPass
>();
1007 AU
.addPreserved
<GlobalsAAWrapperPass
>();
1008 AU
.addPreserved
<AAResultsWrapperPass
>();
1009 AU
.addPreserved
<BasicAAWrapperPass
>();
1010 FunctionPass::getAnalysisUsage(AU
);
1013 bool runOnFunction(Function
&F
) override
{
1014 if (skipFunction(F
))
1016 auto &AC
= getAnalysis
<AssumptionCacheTracker
>().getAssumptionCache(F
);
1017 auto &TTI
= getAnalysis
<TargetTransformInfoWrapperPass
>().getTTI(F
);
1018 auto &DT
= getAnalysis
<DominatorTreeWrapperPass
>().getDomTree();
1019 auto &AA
= getAnalysis
<AAResultsWrapperPass
>().getAAResults();
1020 VectorCombine
Combiner(F
, TTI
, DT
, AA
, AC
);
1021 return Combiner
.run();
1026 char VectorCombineLegacyPass::ID
= 0;
1027 INITIALIZE_PASS_BEGIN(VectorCombineLegacyPass
, "vector-combine",
1028 "Optimize scalar/vector ops", false,
1030 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker
)
1031 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass
)
1032 INITIALIZE_PASS_END(VectorCombineLegacyPass
, "vector-combine",
1033 "Optimize scalar/vector ops", false, false)
1034 Pass
*llvm::createVectorCombinePass() {
1035 return new VectorCombineLegacyPass();
1038 PreservedAnalyses
VectorCombinePass::run(Function
&F
,
1039 FunctionAnalysisManager
&FAM
) {
1040 auto &AC
= FAM
.getResult
<AssumptionAnalysis
>(F
);
1041 TargetTransformInfo
&TTI
= FAM
.getResult
<TargetIRAnalysis
>(F
);
1042 DominatorTree
&DT
= FAM
.getResult
<DominatorTreeAnalysis
>(F
);
1043 AAResults
&AA
= FAM
.getResult
<AAManager
>(F
);
1044 VectorCombine
Combiner(F
, TTI
, DT
, AA
, AC
);
1045 if (!Combiner
.run())
1046 return PreservedAnalyses::all();
1047 PreservedAnalyses PA
;
1048 PA
.preserveSet
<CFGAnalyses
>();