1 //===----- CodeGen/ExpandVectorPredication.cpp - Expand VP intrinsics -----===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This pass implements IR expansion for vector predication intrinsics, allowing
10 // targets to enable vector predication until just before codegen.
12 //===----------------------------------------------------------------------===//
14 #include "llvm/CodeGen/ExpandVectorPredication.h"
15 #include "llvm/ADT/Statistic.h"
16 #include "llvm/Analysis/TargetTransformInfo.h"
17 #include "llvm/Analysis/ValueTracking.h"
18 #include "llvm/Analysis/VectorUtils.h"
19 #include "llvm/CodeGen/Passes.h"
20 #include "llvm/IR/Constants.h"
21 #include "llvm/IR/Function.h"
22 #include "llvm/IR/IRBuilder.h"
23 #include "llvm/IR/InstIterator.h"
24 #include "llvm/IR/Instructions.h"
25 #include "llvm/IR/IntrinsicInst.h"
26 #include "llvm/IR/Intrinsics.h"
27 #include "llvm/InitializePasses.h"
28 #include "llvm/Pass.h"
29 #include "llvm/Support/CommandLine.h"
30 #include "llvm/Support/Compiler.h"
31 #include "llvm/Support/Debug.h"
36 using VPLegalization
= TargetTransformInfo::VPLegalization
;
37 using VPTransform
= TargetTransformInfo::VPLegalization::VPTransform
;
39 // Keep this in sync with TargetTransformInfo::VPLegalization.
40 #define VPINTERNAL_VPLEGAL_CASES \
41 VPINTERNAL_CASE(Legal) \
42 VPINTERNAL_CASE(Discard) \
43 VPINTERNAL_CASE(Convert)
45 #define VPINTERNAL_CASE(X) "|" #X
48 static cl::opt
<std::string
> EVLTransformOverride(
49 "expandvp-override-evl-transform", cl::init(""), cl::Hidden
,
50 cl::desc("Options: <empty>" VPINTERNAL_VPLEGAL_CASES
51 ". If non-empty, ignore "
52 "TargetTransformInfo and "
53 "always use this transformation for the %evl parameter (Used in "
56 static cl::opt
<std::string
> MaskTransformOverride(
57 "expandvp-override-mask-transform", cl::init(""), cl::Hidden
,
58 cl::desc("Options: <empty>" VPINTERNAL_VPLEGAL_CASES
59 ". If non-empty, Ignore "
60 "TargetTransformInfo and "
61 "always use this transformation for the %mask parameter (Used in "
64 #undef VPINTERNAL_CASE
65 #define VPINTERNAL_CASE(X) .Case(#X, VPLegalization::X)
67 static VPTransform
parseOverrideOption(const std::string
&TextOpt
) {
68 return StringSwitch
<VPTransform
>(TextOpt
) VPINTERNAL_VPLEGAL_CASES
;
71 #undef VPINTERNAL_VPLEGAL_CASES
73 // Whether any override options are set.
74 static bool anyExpandVPOverridesSet() {
75 return !EVLTransformOverride
.empty() || !MaskTransformOverride
.empty();
78 #define DEBUG_TYPE "expandvp"
80 STATISTIC(NumFoldedVL
, "Number of folded vector length params");
81 STATISTIC(NumLoweredVPOps
, "Number of folded vector predication operations");
85 /// \returns Whether the vector mask \p MaskVal has all lane bits set.
86 static bool isAllTrueMask(Value
*MaskVal
) {
87 if (Value
*SplattedVal
= getSplatValue(MaskVal
))
88 if (auto *ConstValue
= dyn_cast
<Constant
>(SplattedVal
))
89 return ConstValue
->isAllOnesValue();
94 /// \returns A non-excepting divisor constant for this type.
95 static Constant
*getSafeDivisor(Type
*DivTy
) {
96 assert(DivTy
->isIntOrIntVectorTy() && "Unsupported divisor type");
97 return ConstantInt::get(DivTy
, 1u, false);
100 /// Transfer operation properties from \p OldVPI to \p NewVal.
101 static void transferDecorations(Value
&NewVal
, VPIntrinsic
&VPI
) {
102 auto *NewInst
= dyn_cast
<Instruction
>(&NewVal
);
103 if (!NewInst
|| !isa
<FPMathOperator
>(NewVal
))
106 auto *OldFMOp
= dyn_cast
<FPMathOperator
>(&VPI
);
110 NewInst
->setFastMathFlags(OldFMOp
->getFastMathFlags());
113 /// Transfer all properties from \p OldOp to \p NewOp and replace all uses.
114 /// OldVP gets erased.
115 static void replaceOperation(Value
&NewOp
, VPIntrinsic
&OldOp
) {
116 transferDecorations(NewOp
, OldOp
);
117 OldOp
.replaceAllUsesWith(&NewOp
);
118 OldOp
.eraseFromParent();
121 static bool maySpeculateLanes(VPIntrinsic
&VPI
) {
122 // The result of VP reductions depends on the mask and evl.
123 if (isa
<VPReductionIntrinsic
>(VPI
))
125 // Fallback to whether the intrinsic is speculatable.
126 std::optional
<unsigned> OpcOpt
= VPI
.getFunctionalOpcode();
127 unsigned FunctionalOpc
= OpcOpt
.value_or((unsigned)Instruction::Call
);
128 return isSafeToSpeculativelyExecuteWithOpcode(FunctionalOpc
, &VPI
);
135 // Expansion pass state at function scope.
136 struct CachingVPExpander
{
138 const TargetTransformInfo
&TTI
;
140 /// \returns A (fixed length) vector with ascending integer indices
141 /// (<0, 1, ..., NumElems-1>).
143 /// Used for instruction creation.
145 /// Integer element type of the result vector.
147 /// Number of vector elements.
148 Value
*createStepVector(IRBuilder
<> &Builder
, Type
*LaneTy
,
151 /// \returns A bitmask that is true where the lane position is less-than \p
155 /// Used for instruction creation.
157 /// The explicit vector length parameter to test against the lane
160 /// Static (potentially scalable) number of vector elements.
161 Value
*convertEVLToMask(IRBuilder
<> &Builder
, Value
*EVLParam
,
162 ElementCount ElemCount
);
164 Value
*foldEVLIntoMask(VPIntrinsic
&VPI
);
166 /// "Remove" the %evl parameter of \p PI by setting it to the static vector
167 /// length of the operation.
168 void discardEVLParameter(VPIntrinsic
&PI
);
170 /// Lower this VP binary operator to a unpredicated binary operator.
171 Value
*expandPredicationInBinaryOperator(IRBuilder
<> &Builder
,
174 /// Lower this VP fp call to a unpredicated fp call.
175 Value
*expandPredicationToFPCall(IRBuilder
<> &Builder
, VPIntrinsic
&PI
,
176 unsigned UnpredicatedIntrinsicID
);
178 /// Lower this VP reduction to a call to an unpredicated reduction intrinsic.
179 Value
*expandPredicationInReduction(IRBuilder
<> &Builder
,
180 VPReductionIntrinsic
&PI
);
182 /// Lower this VP memory operation to a non-VP intrinsic.
183 Value
*expandPredicationInMemoryIntrinsic(IRBuilder
<> &Builder
,
186 /// Lower this VP comparison to a call to an unpredicated comparison.
187 Value
*expandPredicationInComparison(IRBuilder
<> &Builder
,
190 /// Query TTI and expand the vector predication in \p P accordingly.
191 Value
*expandPredication(VPIntrinsic
&PI
);
193 /// Determine how and whether the VPIntrinsic \p VPI shall be expanded. This
194 /// overrides TTI with the cl::opts listed at the top of this file.
195 VPLegalization
getVPLegalizationStrategy(const VPIntrinsic
&VPI
) const;
196 bool UsingTTIOverrides
;
199 CachingVPExpander(Function
&F
, const TargetTransformInfo
&TTI
)
200 : F(F
), TTI(TTI
), UsingTTIOverrides(anyExpandVPOverridesSet()) {}
202 bool expandVectorPredication();
205 //// CachingVPExpander {
207 Value
*CachingVPExpander::createStepVector(IRBuilder
<> &Builder
, Type
*LaneTy
,
210 SmallVector
<Constant
*, 16> ConstElems
;
212 for (unsigned Idx
= 0; Idx
< NumElems
; ++Idx
)
213 ConstElems
.push_back(ConstantInt::get(LaneTy
, Idx
, false));
215 return ConstantVector::get(ConstElems
);
218 Value
*CachingVPExpander::convertEVLToMask(IRBuilder
<> &Builder
,
220 ElementCount ElemCount
) {
222 // Scalable vector %evl conversion.
223 if (ElemCount
.isScalable()) {
224 auto *M
= Builder
.GetInsertBlock()->getModule();
225 Type
*BoolVecTy
= VectorType::get(Builder
.getInt1Ty(), ElemCount
);
226 Function
*ActiveMaskFunc
= Intrinsic::getDeclaration(
227 M
, Intrinsic::get_active_lane_mask
, {BoolVecTy
, EVLParam
->getType()});
228 // `get_active_lane_mask` performs an implicit less-than comparison.
229 Value
*ConstZero
= Builder
.getInt32(0);
230 return Builder
.CreateCall(ActiveMaskFunc
, {ConstZero
, EVLParam
});
233 // Fixed vector %evl conversion.
234 Type
*LaneTy
= EVLParam
->getType();
235 unsigned NumElems
= ElemCount
.getFixedValue();
236 Value
*VLSplat
= Builder
.CreateVectorSplat(NumElems
, EVLParam
);
237 Value
*IdxVec
= createStepVector(Builder
, LaneTy
, NumElems
);
238 return Builder
.CreateICmp(CmpInst::ICMP_ULT
, IdxVec
, VLSplat
);
242 CachingVPExpander::expandPredicationInBinaryOperator(IRBuilder
<> &Builder
,
244 assert((maySpeculateLanes(VPI
) || VPI
.canIgnoreVectorLengthParam()) &&
245 "Implicitly dropping %evl in non-speculatable operator!");
247 auto OC
= static_cast<Instruction::BinaryOps
>(*VPI
.getFunctionalOpcode());
248 assert(Instruction::isBinaryOp(OC
));
250 Value
*Op0
= VPI
.getOperand(0);
251 Value
*Op1
= VPI
.getOperand(1);
252 Value
*Mask
= VPI
.getMaskParam();
254 // Blend in safe operands.
255 if (Mask
&& !isAllTrueMask(Mask
)) {
258 // Can safely ignore the predicate.
261 // Division operators need a safe divisor on masked-off lanes (1).
262 case Instruction::UDiv
:
263 case Instruction::SDiv
:
264 case Instruction::URem
:
265 case Instruction::SRem
:
266 // 2nd operand must not be zero.
267 Value
*SafeDivisor
= getSafeDivisor(VPI
.getType());
268 Op1
= Builder
.CreateSelect(Mask
, Op1
, SafeDivisor
);
272 Value
*NewBinOp
= Builder
.CreateBinOp(OC
, Op0
, Op1
, VPI
.getName());
274 replaceOperation(*NewBinOp
, VPI
);
278 Value
*CachingVPExpander::expandPredicationToFPCall(
279 IRBuilder
<> &Builder
, VPIntrinsic
&VPI
, unsigned UnpredicatedIntrinsicID
) {
280 assert((maySpeculateLanes(VPI
) || VPI
.canIgnoreVectorLengthParam()) &&
281 "Implicitly dropping %evl in non-speculatable operator!");
283 switch (UnpredicatedIntrinsicID
) {
284 case Intrinsic::fabs
:
285 case Intrinsic::sqrt
: {
286 Value
*Op0
= VPI
.getOperand(0);
287 Function
*Fn
= Intrinsic::getDeclaration(
288 VPI
.getModule(), UnpredicatedIntrinsicID
, {VPI
.getType()});
289 Value
*NewOp
= Builder
.CreateCall(Fn
, {Op0
}, VPI
.getName());
290 replaceOperation(*NewOp
, VPI
);
293 case Intrinsic::experimental_constrained_fma
:
294 case Intrinsic::experimental_constrained_fmuladd
: {
295 Value
*Op0
= VPI
.getOperand(0);
296 Value
*Op1
= VPI
.getOperand(1);
297 Value
*Op2
= VPI
.getOperand(2);
298 Function
*Fn
= Intrinsic::getDeclaration(
299 VPI
.getModule(), UnpredicatedIntrinsicID
, {VPI
.getType()});
301 Builder
.CreateConstrainedFPCall(Fn
, {Op0
, Op1
, Op2
}, VPI
.getName());
302 replaceOperation(*NewOp
, VPI
);
310 static Value
*getNeutralReductionElement(const VPReductionIntrinsic
&VPI
,
312 bool Negative
= false;
313 unsigned EltBits
= EltTy
->getScalarSizeInBits();
314 switch (VPI
.getIntrinsicID()) {
316 llvm_unreachable("Expecting a VP reduction intrinsic");
317 case Intrinsic::vp_reduce_add
:
318 case Intrinsic::vp_reduce_or
:
319 case Intrinsic::vp_reduce_xor
:
320 case Intrinsic::vp_reduce_umax
:
321 return Constant::getNullValue(EltTy
);
322 case Intrinsic::vp_reduce_mul
:
323 return ConstantInt::get(EltTy
, 1, /*IsSigned*/ false);
324 case Intrinsic::vp_reduce_and
:
325 case Intrinsic::vp_reduce_umin
:
326 return ConstantInt::getAllOnesValue(EltTy
);
327 case Intrinsic::vp_reduce_smin
:
328 return ConstantInt::get(EltTy
->getContext(),
329 APInt::getSignedMaxValue(EltBits
));
330 case Intrinsic::vp_reduce_smax
:
331 return ConstantInt::get(EltTy
->getContext(),
332 APInt::getSignedMinValue(EltBits
));
333 case Intrinsic::vp_reduce_fmax
:
336 case Intrinsic::vp_reduce_fmin
: {
337 FastMathFlags Flags
= VPI
.getFastMathFlags();
338 const fltSemantics
&Semantics
= EltTy
->getFltSemantics();
339 return !Flags
.noNaNs() ? ConstantFP::getQNaN(EltTy
, Negative
)
341 ? ConstantFP::getInfinity(EltTy
, Negative
)
342 : ConstantFP::get(EltTy
,
343 APFloat::getLargest(Semantics
, Negative
));
345 case Intrinsic::vp_reduce_fadd
:
346 return ConstantFP::getNegativeZero(EltTy
);
347 case Intrinsic::vp_reduce_fmul
:
348 return ConstantFP::get(EltTy
, 1.0);
353 CachingVPExpander::expandPredicationInReduction(IRBuilder
<> &Builder
,
354 VPReductionIntrinsic
&VPI
) {
355 assert((maySpeculateLanes(VPI
) || VPI
.canIgnoreVectorLengthParam()) &&
356 "Implicitly dropping %evl in non-speculatable operator!");
358 Value
*Mask
= VPI
.getMaskParam();
359 Value
*RedOp
= VPI
.getOperand(VPI
.getVectorParamPos());
361 // Insert neutral element in masked-out positions
362 if (Mask
&& !isAllTrueMask(Mask
)) {
363 auto *NeutralElt
= getNeutralReductionElement(VPI
, VPI
.getType());
364 auto *NeutralVector
= Builder
.CreateVectorSplat(
365 cast
<VectorType
>(RedOp
->getType())->getElementCount(), NeutralElt
);
366 RedOp
= Builder
.CreateSelect(Mask
, RedOp
, NeutralVector
);
370 Value
*Start
= VPI
.getOperand(VPI
.getStartParamPos());
372 switch (VPI
.getIntrinsicID()) {
374 llvm_unreachable("Impossible reduction kind");
375 case Intrinsic::vp_reduce_add
:
376 Reduction
= Builder
.CreateAddReduce(RedOp
);
377 Reduction
= Builder
.CreateAdd(Reduction
, Start
);
379 case Intrinsic::vp_reduce_mul
:
380 Reduction
= Builder
.CreateMulReduce(RedOp
);
381 Reduction
= Builder
.CreateMul(Reduction
, Start
);
383 case Intrinsic::vp_reduce_and
:
384 Reduction
= Builder
.CreateAndReduce(RedOp
);
385 Reduction
= Builder
.CreateAnd(Reduction
, Start
);
387 case Intrinsic::vp_reduce_or
:
388 Reduction
= Builder
.CreateOrReduce(RedOp
);
389 Reduction
= Builder
.CreateOr(Reduction
, Start
);
391 case Intrinsic::vp_reduce_xor
:
392 Reduction
= Builder
.CreateXorReduce(RedOp
);
393 Reduction
= Builder
.CreateXor(Reduction
, Start
);
395 case Intrinsic::vp_reduce_smax
:
396 Reduction
= Builder
.CreateIntMaxReduce(RedOp
, /*IsSigned*/ true);
398 Builder
.CreateBinaryIntrinsic(Intrinsic::smax
, Reduction
, Start
);
400 case Intrinsic::vp_reduce_smin
:
401 Reduction
= Builder
.CreateIntMinReduce(RedOp
, /*IsSigned*/ true);
403 Builder
.CreateBinaryIntrinsic(Intrinsic::smin
, Reduction
, Start
);
405 case Intrinsic::vp_reduce_umax
:
406 Reduction
= Builder
.CreateIntMaxReduce(RedOp
, /*IsSigned*/ false);
408 Builder
.CreateBinaryIntrinsic(Intrinsic::umax
, Reduction
, Start
);
410 case Intrinsic::vp_reduce_umin
:
411 Reduction
= Builder
.CreateIntMinReduce(RedOp
, /*IsSigned*/ false);
413 Builder
.CreateBinaryIntrinsic(Intrinsic::umin
, Reduction
, Start
);
415 case Intrinsic::vp_reduce_fmax
:
416 Reduction
= Builder
.CreateFPMaxReduce(RedOp
);
417 transferDecorations(*Reduction
, VPI
);
419 Builder
.CreateBinaryIntrinsic(Intrinsic::maxnum
, Reduction
, Start
);
421 case Intrinsic::vp_reduce_fmin
:
422 Reduction
= Builder
.CreateFPMinReduce(RedOp
);
423 transferDecorations(*Reduction
, VPI
);
425 Builder
.CreateBinaryIntrinsic(Intrinsic::minnum
, Reduction
, Start
);
427 case Intrinsic::vp_reduce_fadd
:
428 Reduction
= Builder
.CreateFAddReduce(Start
, RedOp
);
430 case Intrinsic::vp_reduce_fmul
:
431 Reduction
= Builder
.CreateFMulReduce(Start
, RedOp
);
435 replaceOperation(*Reduction
, VPI
);
440 CachingVPExpander::expandPredicationInMemoryIntrinsic(IRBuilder
<> &Builder
,
442 assert(VPI
.canIgnoreVectorLengthParam());
444 const auto &DL
= F
.getParent()->getDataLayout();
446 Value
*MaskParam
= VPI
.getMaskParam();
447 Value
*PtrParam
= VPI
.getMemoryPointerParam();
448 Value
*DataParam
= VPI
.getMemoryDataParam();
449 bool IsUnmasked
= isAllTrueMask(MaskParam
);
451 MaybeAlign AlignOpt
= VPI
.getPointerAlignment();
453 Value
*NewMemoryInst
= nullptr;
454 switch (VPI
.getIntrinsicID()) {
456 llvm_unreachable("Not a VP memory intrinsic");
457 case Intrinsic::vp_store
:
459 StoreInst
*NewStore
=
460 Builder
.CreateStore(DataParam
, PtrParam
, /*IsVolatile*/ false);
461 if (AlignOpt
.has_value())
462 NewStore
->setAlignment(*AlignOpt
);
463 NewMemoryInst
= NewStore
;
465 NewMemoryInst
= Builder
.CreateMaskedStore(
466 DataParam
, PtrParam
, AlignOpt
.valueOrOne(), MaskParam
);
469 case Intrinsic::vp_load
:
472 Builder
.CreateLoad(VPI
.getType(), PtrParam
, /*IsVolatile*/ false);
473 if (AlignOpt
.has_value())
474 NewLoad
->setAlignment(*AlignOpt
);
475 NewMemoryInst
= NewLoad
;
477 NewMemoryInst
= Builder
.CreateMaskedLoad(
478 VPI
.getType(), PtrParam
, AlignOpt
.valueOrOne(), MaskParam
);
481 case Intrinsic::vp_scatter
: {
483 cast
<VectorType
>(DataParam
->getType())->getElementType();
484 NewMemoryInst
= Builder
.CreateMaskedScatter(
486 AlignOpt
.value_or(DL
.getPrefTypeAlign(ElementType
)), MaskParam
);
489 case Intrinsic::vp_gather
: {
490 auto *ElementType
= cast
<VectorType
>(VPI
.getType())->getElementType();
491 NewMemoryInst
= Builder
.CreateMaskedGather(
492 VPI
.getType(), PtrParam
,
493 AlignOpt
.value_or(DL
.getPrefTypeAlign(ElementType
)), MaskParam
, nullptr,
499 assert(NewMemoryInst
);
500 replaceOperation(*NewMemoryInst
, VPI
);
501 return NewMemoryInst
;
504 Value
*CachingVPExpander::expandPredicationInComparison(IRBuilder
<> &Builder
,
505 VPCmpIntrinsic
&VPI
) {
506 assert((maySpeculateLanes(VPI
) || VPI
.canIgnoreVectorLengthParam()) &&
507 "Implicitly dropping %evl in non-speculatable operator!");
509 assert(*VPI
.getFunctionalOpcode() == Instruction::ICmp
||
510 *VPI
.getFunctionalOpcode() == Instruction::FCmp
);
512 Value
*Op0
= VPI
.getOperand(0);
513 Value
*Op1
= VPI
.getOperand(1);
514 auto Pred
= VPI
.getPredicate();
516 auto *NewCmp
= Builder
.CreateCmp(Pred
, Op0
, Op1
);
518 replaceOperation(*NewCmp
, VPI
);
522 void CachingVPExpander::discardEVLParameter(VPIntrinsic
&VPI
) {
523 LLVM_DEBUG(dbgs() << "Discard EVL parameter in " << VPI
<< "\n");
525 if (VPI
.canIgnoreVectorLengthParam())
528 Value
*EVLParam
= VPI
.getVectorLengthParam();
532 ElementCount StaticElemCount
= VPI
.getStaticVectorLength();
533 Value
*MaxEVL
= nullptr;
534 Type
*Int32Ty
= Type::getInt32Ty(VPI
.getContext());
535 if (StaticElemCount
.isScalable()) {
537 auto *M
= VPI
.getModule();
538 Function
*VScaleFunc
=
539 Intrinsic::getDeclaration(M
, Intrinsic::vscale
, Int32Ty
);
540 IRBuilder
<> Builder(VPI
.getParent(), VPI
.getIterator());
541 Value
*FactorConst
= Builder
.getInt32(StaticElemCount
.getKnownMinValue());
542 Value
*VScale
= Builder
.CreateCall(VScaleFunc
, {}, "vscale");
543 MaxEVL
= Builder
.CreateMul(VScale
, FactorConst
, "scalable_size",
544 /*NUW*/ true, /*NSW*/ false);
546 MaxEVL
= ConstantInt::get(Int32Ty
, StaticElemCount
.getFixedValue(), false);
548 VPI
.setVectorLengthParam(MaxEVL
);
551 Value
*CachingVPExpander::foldEVLIntoMask(VPIntrinsic
&VPI
) {
552 LLVM_DEBUG(dbgs() << "Folding vlen for " << VPI
<< '\n');
554 IRBuilder
<> Builder(&VPI
);
556 // Ineffective %evl parameter and so nothing to do here.
557 if (VPI
.canIgnoreVectorLengthParam())
560 // Only VP intrinsics can have an %evl parameter.
561 Value
*OldMaskParam
= VPI
.getMaskParam();
562 Value
*OldEVLParam
= VPI
.getVectorLengthParam();
563 assert(OldMaskParam
&& "no mask param to fold the vl param into");
564 assert(OldEVLParam
&& "no EVL param to fold away");
566 LLVM_DEBUG(dbgs() << "OLD evl: " << *OldEVLParam
<< '\n');
567 LLVM_DEBUG(dbgs() << "OLD mask: " << *OldMaskParam
<< '\n');
569 // Convert the %evl predication into vector mask predication.
570 ElementCount ElemCount
= VPI
.getStaticVectorLength();
571 Value
*VLMask
= convertEVLToMask(Builder
, OldEVLParam
, ElemCount
);
572 Value
*NewMaskParam
= Builder
.CreateAnd(VLMask
, OldMaskParam
);
573 VPI
.setMaskParam(NewMaskParam
);
575 // Drop the %evl parameter.
576 discardEVLParameter(VPI
);
577 assert(VPI
.canIgnoreVectorLengthParam() &&
578 "transformation did not render the evl param ineffective!");
580 // Reassess the modified instruction.
584 Value
*CachingVPExpander::expandPredication(VPIntrinsic
&VPI
) {
585 LLVM_DEBUG(dbgs() << "Lowering to unpredicated op: " << VPI
<< '\n');
587 IRBuilder
<> Builder(&VPI
);
589 // Try lowering to a LLVM instruction first.
590 auto OC
= VPI
.getFunctionalOpcode();
592 if (OC
&& Instruction::isBinaryOp(*OC
))
593 return expandPredicationInBinaryOperator(Builder
, VPI
);
595 if (auto *VPRI
= dyn_cast
<VPReductionIntrinsic
>(&VPI
))
596 return expandPredicationInReduction(Builder
, *VPRI
);
598 if (auto *VPCmp
= dyn_cast
<VPCmpIntrinsic
>(&VPI
))
599 return expandPredicationInComparison(Builder
, *VPCmp
);
601 switch (VPI
.getIntrinsicID()) {
604 case Intrinsic::vp_fneg
: {
605 Value
*NewNegOp
= Builder
.CreateFNeg(VPI
.getOperand(0), VPI
.getName());
606 replaceOperation(*NewNegOp
, VPI
);
609 case Intrinsic::vp_fabs
:
610 return expandPredicationToFPCall(Builder
, VPI
, Intrinsic::fabs
);
611 case Intrinsic::vp_sqrt
:
612 return expandPredicationToFPCall(Builder
, VPI
, Intrinsic::sqrt
);
613 case Intrinsic::vp_load
:
614 case Intrinsic::vp_store
:
615 case Intrinsic::vp_gather
:
616 case Intrinsic::vp_scatter
:
617 return expandPredicationInMemoryIntrinsic(Builder
, VPI
);
620 if (auto CID
= VPI
.getConstrainedIntrinsicID())
621 if (Value
*Call
= expandPredicationToFPCall(Builder
, VPI
, *CID
))
627 //// } CachingVPExpander
629 struct TransformJob
{
631 TargetTransformInfo::VPLegalization Strategy
;
632 TransformJob(VPIntrinsic
*PI
, TargetTransformInfo::VPLegalization InitStrat
)
633 : PI(PI
), Strategy(InitStrat
) {}
635 bool isDone() const { return Strategy
.shouldDoNothing(); }
638 void sanitizeStrategy(VPIntrinsic
&VPI
, VPLegalization
&LegalizeStrat
) {
639 // Operations with speculatable lanes do not strictly need predication.
640 if (maySpeculateLanes(VPI
)) {
641 // Converting a speculatable VP intrinsic means dropping %mask and %evl.
642 // No need to expand %evl into the %mask only to ignore that code.
643 if (LegalizeStrat
.OpStrategy
== VPLegalization::Convert
)
644 LegalizeStrat
.EVLParamStrategy
= VPLegalization::Discard
;
648 // We have to preserve the predicating effect of %evl for this
649 // non-speculatable VP intrinsic.
650 // 1) Never discard %evl.
651 // 2) If this VP intrinsic will be expanded to non-VP code, make sure that
652 // %evl gets folded into %mask.
653 if ((LegalizeStrat
.EVLParamStrategy
== VPLegalization::Discard
) ||
654 (LegalizeStrat
.OpStrategy
== VPLegalization::Convert
)) {
655 LegalizeStrat
.EVLParamStrategy
= VPLegalization::Convert
;
660 CachingVPExpander::getVPLegalizationStrategy(const VPIntrinsic
&VPI
) const {
661 auto VPStrat
= TTI
.getVPLegalizationStrategy(VPI
);
662 if (LLVM_LIKELY(!UsingTTIOverrides
)) {
663 // No overrides - we are in production.
667 // Overrides set - we are in testing, the following does not need to be
669 VPStrat
.EVLParamStrategy
= parseOverrideOption(EVLTransformOverride
);
670 VPStrat
.OpStrategy
= parseOverrideOption(MaskTransformOverride
);
674 /// Expand llvm.vp.* intrinsics as requested by \p TTI.
675 bool CachingVPExpander::expandVectorPredication() {
676 SmallVector
<TransformJob
, 16> Worklist
;
678 // Collect all VPIntrinsics that need expansion and determine their expansion
680 for (auto &I
: instructions(F
)) {
681 auto *VPI
= dyn_cast
<VPIntrinsic
>(&I
);
684 auto VPStrat
= getVPLegalizationStrategy(*VPI
);
685 sanitizeStrategy(*VPI
, VPStrat
);
686 if (!VPStrat
.shouldDoNothing())
687 Worklist
.emplace_back(VPI
, VPStrat
);
689 if (Worklist
.empty())
692 // Transform all VPIntrinsics on the worklist.
693 LLVM_DEBUG(dbgs() << "\n:::: Transforming " << Worklist
.size()
694 << " instructions ::::\n");
695 for (TransformJob Job
: Worklist
) {
696 // Transform the EVL parameter.
697 switch (Job
.Strategy
.EVLParamStrategy
) {
698 case VPLegalization::Legal
:
700 case VPLegalization::Discard
:
701 discardEVLParameter(*Job
.PI
);
703 case VPLegalization::Convert
:
704 if (foldEVLIntoMask(*Job
.PI
))
708 Job
.Strategy
.EVLParamStrategy
= VPLegalization::Legal
;
710 // Replace with a non-predicated operation.
711 switch (Job
.Strategy
.OpStrategy
) {
712 case VPLegalization::Legal
:
714 case VPLegalization::Discard
:
715 llvm_unreachable("Invalid strategy for operators.");
716 case VPLegalization::Convert
:
717 expandPredication(*Job
.PI
);
721 Job
.Strategy
.OpStrategy
= VPLegalization::Legal
;
723 assert(Job
.isDone() && "incomplete transformation");
728 class ExpandVectorPredication
: public FunctionPass
{
731 ExpandVectorPredication() : FunctionPass(ID
) {
732 initializeExpandVectorPredicationPass(*PassRegistry::getPassRegistry());
735 bool runOnFunction(Function
&F
) override
{
736 const auto *TTI
= &getAnalysis
<TargetTransformInfoWrapperPass
>().getTTI(F
);
737 CachingVPExpander
VPExpander(F
, *TTI
);
738 return VPExpander
.expandVectorPredication();
741 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
742 AU
.addRequired
<TargetTransformInfoWrapperPass
>();
743 AU
.setPreservesCFG();
748 char ExpandVectorPredication::ID
;
749 INITIALIZE_PASS_BEGIN(ExpandVectorPredication
, "expandvp",
750 "Expand vector predication intrinsics", false, false)
751 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass
)
752 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass
)
753 INITIALIZE_PASS_END(ExpandVectorPredication
, "expandvp",
754 "Expand vector predication intrinsics", false, false)
756 FunctionPass
*llvm::createExpandVectorPredicationPass() {
757 return new ExpandVectorPredication();
761 ExpandVectorPredicationPass::run(Function
&F
, FunctionAnalysisManager
&AM
) {
762 const auto &TTI
= AM
.getResult
<TargetIRAnalysis
>(F
);
763 CachingVPExpander
VPExpander(F
, TTI
);
764 if (!VPExpander
.expandVectorPredication())
765 return PreservedAnalyses::all();
766 PreservedAnalyses PA
;
767 PA
.preserveSet
<CFGAnalyses
>();