1 //===----- CodeGen/ExpandVectorPredication.cpp - Expand VP intrinsics -----===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This pass implements IR expansion for vector predication intrinsics, allowing
10 // targets to enable vector predication until just before codegen.
12 //===----------------------------------------------------------------------===//
14 #include "llvm/CodeGen/ExpandVectorPredication.h"
15 #include "llvm/ADT/Statistic.h"
16 #include "llvm/Analysis/TargetTransformInfo.h"
17 #include "llvm/Analysis/ValueTracking.h"
18 #include "llvm/Analysis/VectorUtils.h"
19 #include "llvm/CodeGen/Passes.h"
20 #include "llvm/IR/Constants.h"
21 #include "llvm/IR/Function.h"
22 #include "llvm/IR/IRBuilder.h"
23 #include "llvm/IR/InstIterator.h"
24 #include "llvm/IR/Instructions.h"
25 #include "llvm/IR/IntrinsicInst.h"
26 #include "llvm/IR/Intrinsics.h"
27 #include "llvm/InitializePasses.h"
28 #include "llvm/Pass.h"
29 #include "llvm/Support/CommandLine.h"
30 #include "llvm/Support/Compiler.h"
31 #include "llvm/Support/Debug.h"
36 using VPLegalization
= TargetTransformInfo::VPLegalization
;
37 using VPTransform
= TargetTransformInfo::VPLegalization::VPTransform
;
39 // Keep this in sync with TargetTransformInfo::VPLegalization.
40 #define VPINTERNAL_VPLEGAL_CASES \
41 VPINTERNAL_CASE(Legal) \
42 VPINTERNAL_CASE(Discard) \
43 VPINTERNAL_CASE(Convert)
45 #define VPINTERNAL_CASE(X) "|" #X
48 static cl::opt
<std::string
> EVLTransformOverride(
49 "expandvp-override-evl-transform", cl::init(""), cl::Hidden
,
50 cl::desc("Options: <empty>" VPINTERNAL_VPLEGAL_CASES
51 ". If non-empty, ignore "
52 "TargetTransformInfo and "
53 "always use this transformation for the %evl parameter (Used in "
56 static cl::opt
<std::string
> MaskTransformOverride(
57 "expandvp-override-mask-transform", cl::init(""), cl::Hidden
,
58 cl::desc("Options: <empty>" VPINTERNAL_VPLEGAL_CASES
59 ". If non-empty, Ignore "
60 "TargetTransformInfo and "
61 "always use this transformation for the %mask parameter (Used in "
64 #undef VPINTERNAL_CASE
65 #define VPINTERNAL_CASE(X) .Case(#X, VPLegalization::X)
67 static VPTransform
parseOverrideOption(const std::string
&TextOpt
) {
68 return StringSwitch
<VPTransform
>(TextOpt
) VPINTERNAL_VPLEGAL_CASES
;
71 #undef VPINTERNAL_VPLEGAL_CASES
73 // Whether any override options are set.
74 static bool anyExpandVPOverridesSet() {
75 return !EVLTransformOverride
.empty() || !MaskTransformOverride
.empty();
78 #define DEBUG_TYPE "expandvp"
80 STATISTIC(NumFoldedVL
, "Number of folded vector length params");
81 STATISTIC(NumLoweredVPOps
, "Number of folded vector predication operations");
85 /// \returns Whether the vector mask \p MaskVal has all lane bits set.
86 static bool isAllTrueMask(Value
*MaskVal
) {
87 if (Value
*SplattedVal
= getSplatValue(MaskVal
))
88 if (auto *ConstValue
= dyn_cast
<Constant
>(SplattedVal
))
89 return ConstValue
->isAllOnesValue();
94 /// \returns A non-excepting divisor constant for this type.
95 static Constant
*getSafeDivisor(Type
*DivTy
) {
96 assert(DivTy
->isIntOrIntVectorTy() && "Unsupported divisor type");
97 return ConstantInt::get(DivTy
, 1u, false);
100 /// Transfer operation properties from \p OldVPI to \p NewVal.
101 static void transferDecorations(Value
&NewVal
, VPIntrinsic
&VPI
) {
102 auto *NewInst
= dyn_cast
<Instruction
>(&NewVal
);
103 if (!NewInst
|| !isa
<FPMathOperator
>(NewVal
))
106 auto *OldFMOp
= dyn_cast
<FPMathOperator
>(&VPI
);
110 NewInst
->setFastMathFlags(OldFMOp
->getFastMathFlags());
113 /// Transfer all properties from \p OldOp to \p NewOp and replace all uses.
114 /// OldVP gets erased.
115 static void replaceOperation(Value
&NewOp
, VPIntrinsic
&OldOp
) {
116 transferDecorations(NewOp
, OldOp
);
117 OldOp
.replaceAllUsesWith(&NewOp
);
118 OldOp
.eraseFromParent();
121 static bool maySpeculateLanes(VPIntrinsic
&VPI
) {
122 // The result of VP reductions depends on the mask and evl.
123 if (isa
<VPReductionIntrinsic
>(VPI
))
125 // Fallback to whether the intrinsic is speculatable.
126 if (auto IntrID
= VPI
.getFunctionalIntrinsicID())
127 return Intrinsic::getAttributes(VPI
.getContext(), *IntrID
)
128 .hasFnAttr(Attribute::AttrKind::Speculatable
);
129 if (auto Opc
= VPI
.getFunctionalOpcode())
130 return isSafeToSpeculativelyExecuteWithOpcode(*Opc
, &VPI
);
138 // Expansion pass state at function scope.
139 struct CachingVPExpander
{
141 const TargetTransformInfo
&TTI
;
143 /// \returns A (fixed length) vector with ascending integer indices
144 /// (<0, 1, ..., NumElems-1>).
146 /// Used for instruction creation.
148 /// Integer element type of the result vector.
150 /// Number of vector elements.
151 Value
*createStepVector(IRBuilder
<> &Builder
, Type
*LaneTy
,
154 /// \returns A bitmask that is true where the lane position is less-than \p
158 /// Used for instruction creation.
160 /// The explicit vector length parameter to test against the lane
163 /// Static (potentially scalable) number of vector elements.
164 Value
*convertEVLToMask(IRBuilder
<> &Builder
, Value
*EVLParam
,
165 ElementCount ElemCount
);
167 Value
*foldEVLIntoMask(VPIntrinsic
&VPI
);
169 /// "Remove" the %evl parameter of \p PI by setting it to the static vector
170 /// length of the operation.
171 void discardEVLParameter(VPIntrinsic
&PI
);
173 /// Lower this VP binary operator to a unpredicated binary operator.
174 Value
*expandPredicationInBinaryOperator(IRBuilder
<> &Builder
,
177 /// Lower this VP int call to a unpredicated int call.
178 Value
*expandPredicationToIntCall(IRBuilder
<> &Builder
, VPIntrinsic
&PI
,
179 unsigned UnpredicatedIntrinsicID
);
181 /// Lower this VP fp call to a unpredicated fp call.
182 Value
*expandPredicationToFPCall(IRBuilder
<> &Builder
, VPIntrinsic
&PI
,
183 unsigned UnpredicatedIntrinsicID
);
185 /// Lower this VP reduction to a call to an unpredicated reduction intrinsic.
186 Value
*expandPredicationInReduction(IRBuilder
<> &Builder
,
187 VPReductionIntrinsic
&PI
);
189 /// Lower this VP cast operation to a non-VP intrinsic.
190 Value
*expandPredicationToCastIntrinsic(IRBuilder
<> &Builder
,
193 /// Lower this VP memory operation to a non-VP intrinsic.
194 Value
*expandPredicationInMemoryIntrinsic(IRBuilder
<> &Builder
,
197 /// Lower this VP comparison to a call to an unpredicated comparison.
198 Value
*expandPredicationInComparison(IRBuilder
<> &Builder
,
201 /// Query TTI and expand the vector predication in \p P accordingly.
202 Value
*expandPredication(VPIntrinsic
&PI
);
204 /// Determine how and whether the VPIntrinsic \p VPI shall be expanded. This
205 /// overrides TTI with the cl::opts listed at the top of this file.
206 VPLegalization
getVPLegalizationStrategy(const VPIntrinsic
&VPI
) const;
207 bool UsingTTIOverrides
;
210 CachingVPExpander(Function
&F
, const TargetTransformInfo
&TTI
)
211 : F(F
), TTI(TTI
), UsingTTIOverrides(anyExpandVPOverridesSet()) {}
213 bool expandVectorPredication();
216 //// CachingVPExpander {
218 Value
*CachingVPExpander::createStepVector(IRBuilder
<> &Builder
, Type
*LaneTy
,
221 SmallVector
<Constant
*, 16> ConstElems
;
223 for (unsigned Idx
= 0; Idx
< NumElems
; ++Idx
)
224 ConstElems
.push_back(ConstantInt::get(LaneTy
, Idx
, false));
226 return ConstantVector::get(ConstElems
);
229 Value
*CachingVPExpander::convertEVLToMask(IRBuilder
<> &Builder
,
231 ElementCount ElemCount
) {
233 // Scalable vector %evl conversion.
234 if (ElemCount
.isScalable()) {
235 auto *M
= Builder
.GetInsertBlock()->getModule();
236 Type
*BoolVecTy
= VectorType::get(Builder
.getInt1Ty(), ElemCount
);
237 Function
*ActiveMaskFunc
= Intrinsic::getDeclaration(
238 M
, Intrinsic::get_active_lane_mask
, {BoolVecTy
, EVLParam
->getType()});
239 // `get_active_lane_mask` performs an implicit less-than comparison.
240 Value
*ConstZero
= Builder
.getInt32(0);
241 return Builder
.CreateCall(ActiveMaskFunc
, {ConstZero
, EVLParam
});
244 // Fixed vector %evl conversion.
245 Type
*LaneTy
= EVLParam
->getType();
246 unsigned NumElems
= ElemCount
.getFixedValue();
247 Value
*VLSplat
= Builder
.CreateVectorSplat(NumElems
, EVLParam
);
248 Value
*IdxVec
= createStepVector(Builder
, LaneTy
, NumElems
);
249 return Builder
.CreateICmp(CmpInst::ICMP_ULT
, IdxVec
, VLSplat
);
253 CachingVPExpander::expandPredicationInBinaryOperator(IRBuilder
<> &Builder
,
255 assert((maySpeculateLanes(VPI
) || VPI
.canIgnoreVectorLengthParam()) &&
256 "Implicitly dropping %evl in non-speculatable operator!");
258 auto OC
= static_cast<Instruction::BinaryOps
>(*VPI
.getFunctionalOpcode());
259 assert(Instruction::isBinaryOp(OC
));
261 Value
*Op0
= VPI
.getOperand(0);
262 Value
*Op1
= VPI
.getOperand(1);
263 Value
*Mask
= VPI
.getMaskParam();
265 // Blend in safe operands.
266 if (Mask
&& !isAllTrueMask(Mask
)) {
269 // Can safely ignore the predicate.
272 // Division operators need a safe divisor on masked-off lanes (1).
273 case Instruction::UDiv
:
274 case Instruction::SDiv
:
275 case Instruction::URem
:
276 case Instruction::SRem
:
277 // 2nd operand must not be zero.
278 Value
*SafeDivisor
= getSafeDivisor(VPI
.getType());
279 Op1
= Builder
.CreateSelect(Mask
, Op1
, SafeDivisor
);
283 Value
*NewBinOp
= Builder
.CreateBinOp(OC
, Op0
, Op1
, VPI
.getName());
285 replaceOperation(*NewBinOp
, VPI
);
289 Value
*CachingVPExpander::expandPredicationToIntCall(
290 IRBuilder
<> &Builder
, VPIntrinsic
&VPI
, unsigned UnpredicatedIntrinsicID
) {
291 switch (UnpredicatedIntrinsicID
) {
293 case Intrinsic::smax
:
294 case Intrinsic::smin
:
295 case Intrinsic::umax
:
296 case Intrinsic::umin
: {
297 Value
*Op0
= VPI
.getOperand(0);
298 Value
*Op1
= VPI
.getOperand(1);
299 Function
*Fn
= Intrinsic::getDeclaration(
300 VPI
.getModule(), UnpredicatedIntrinsicID
, {VPI
.getType()});
301 Value
*NewOp
= Builder
.CreateCall(Fn
, {Op0
, Op1
}, VPI
.getName());
302 replaceOperation(*NewOp
, VPI
);
305 case Intrinsic::bswap
:
306 case Intrinsic::bitreverse
: {
307 Value
*Op
= VPI
.getOperand(0);
308 Function
*Fn
= Intrinsic::getDeclaration(
309 VPI
.getModule(), UnpredicatedIntrinsicID
, {VPI
.getType()});
310 Value
*NewOp
= Builder
.CreateCall(Fn
, {Op
}, VPI
.getName());
311 replaceOperation(*NewOp
, VPI
);
318 Value
*CachingVPExpander::expandPredicationToFPCall(
319 IRBuilder
<> &Builder
, VPIntrinsic
&VPI
, unsigned UnpredicatedIntrinsicID
) {
320 assert((maySpeculateLanes(VPI
) || VPI
.canIgnoreVectorLengthParam()) &&
321 "Implicitly dropping %evl in non-speculatable operator!");
323 switch (UnpredicatedIntrinsicID
) {
324 case Intrinsic::fabs
:
325 case Intrinsic::sqrt
: {
326 Value
*Op0
= VPI
.getOperand(0);
327 Function
*Fn
= Intrinsic::getDeclaration(
328 VPI
.getModule(), UnpredicatedIntrinsicID
, {VPI
.getType()});
329 Value
*NewOp
= Builder
.CreateCall(Fn
, {Op0
}, VPI
.getName());
330 replaceOperation(*NewOp
, VPI
);
333 case Intrinsic::maxnum
:
334 case Intrinsic::minnum
: {
335 Value
*Op0
= VPI
.getOperand(0);
336 Value
*Op1
= VPI
.getOperand(1);
337 Function
*Fn
= Intrinsic::getDeclaration(
338 VPI
.getModule(), UnpredicatedIntrinsicID
, {VPI
.getType()});
339 Value
*NewOp
= Builder
.CreateCall(Fn
, {Op0
, Op1
}, VPI
.getName());
340 replaceOperation(*NewOp
, VPI
);
344 case Intrinsic::fmuladd
:
345 case Intrinsic::experimental_constrained_fma
:
346 case Intrinsic::experimental_constrained_fmuladd
: {
347 Value
*Op0
= VPI
.getOperand(0);
348 Value
*Op1
= VPI
.getOperand(1);
349 Value
*Op2
= VPI
.getOperand(2);
350 Function
*Fn
= Intrinsic::getDeclaration(
351 VPI
.getModule(), UnpredicatedIntrinsicID
, {VPI
.getType()});
353 if (Intrinsic::isConstrainedFPIntrinsic(UnpredicatedIntrinsicID
))
355 Builder
.CreateConstrainedFPCall(Fn
, {Op0
, Op1
, Op2
}, VPI
.getName());
357 NewOp
= Builder
.CreateCall(Fn
, {Op0
, Op1
, Op2
}, VPI
.getName());
358 replaceOperation(*NewOp
, VPI
);
366 static Value
*getNeutralReductionElement(const VPReductionIntrinsic
&VPI
,
368 bool Negative
= false;
369 unsigned EltBits
= EltTy
->getScalarSizeInBits();
370 Intrinsic::ID VID
= VPI
.getIntrinsicID();
373 llvm_unreachable("Expecting a VP reduction intrinsic");
374 case Intrinsic::vp_reduce_add
:
375 case Intrinsic::vp_reduce_or
:
376 case Intrinsic::vp_reduce_xor
:
377 case Intrinsic::vp_reduce_umax
:
378 return Constant::getNullValue(EltTy
);
379 case Intrinsic::vp_reduce_mul
:
380 return ConstantInt::get(EltTy
, 1, /*IsSigned*/ false);
381 case Intrinsic::vp_reduce_and
:
382 case Intrinsic::vp_reduce_umin
:
383 return ConstantInt::getAllOnesValue(EltTy
);
384 case Intrinsic::vp_reduce_smin
:
385 return ConstantInt::get(EltTy
->getContext(),
386 APInt::getSignedMaxValue(EltBits
));
387 case Intrinsic::vp_reduce_smax
:
388 return ConstantInt::get(EltTy
->getContext(),
389 APInt::getSignedMinValue(EltBits
));
390 case Intrinsic::vp_reduce_fmax
:
391 case Intrinsic::vp_reduce_fmaximum
:
394 case Intrinsic::vp_reduce_fmin
:
395 case Intrinsic::vp_reduce_fminimum
: {
396 bool PropagatesNaN
= VID
== Intrinsic::vp_reduce_fminimum
||
397 VID
== Intrinsic::vp_reduce_fmaximum
;
398 FastMathFlags Flags
= VPI
.getFastMathFlags();
399 const fltSemantics
&Semantics
= EltTy
->getFltSemantics();
400 return (!Flags
.noNaNs() && !PropagatesNaN
)
401 ? ConstantFP::getQNaN(EltTy
, Negative
)
403 ? ConstantFP::getInfinity(EltTy
, Negative
)
404 : ConstantFP::get(EltTy
,
405 APFloat::getLargest(Semantics
, Negative
));
407 case Intrinsic::vp_reduce_fadd
:
408 return ConstantFP::getNegativeZero(EltTy
);
409 case Intrinsic::vp_reduce_fmul
:
410 return ConstantFP::get(EltTy
, 1.0);
415 CachingVPExpander::expandPredicationInReduction(IRBuilder
<> &Builder
,
416 VPReductionIntrinsic
&VPI
) {
417 assert((maySpeculateLanes(VPI
) || VPI
.canIgnoreVectorLengthParam()) &&
418 "Implicitly dropping %evl in non-speculatable operator!");
420 Value
*Mask
= VPI
.getMaskParam();
421 Value
*RedOp
= VPI
.getOperand(VPI
.getVectorParamPos());
423 // Insert neutral element in masked-out positions
424 if (Mask
&& !isAllTrueMask(Mask
)) {
425 auto *NeutralElt
= getNeutralReductionElement(VPI
, VPI
.getType());
426 auto *NeutralVector
= Builder
.CreateVectorSplat(
427 cast
<VectorType
>(RedOp
->getType())->getElementCount(), NeutralElt
);
428 RedOp
= Builder
.CreateSelect(Mask
, RedOp
, NeutralVector
);
432 Value
*Start
= VPI
.getOperand(VPI
.getStartParamPos());
434 switch (VPI
.getIntrinsicID()) {
436 llvm_unreachable("Impossible reduction kind");
437 case Intrinsic::vp_reduce_add
:
438 Reduction
= Builder
.CreateAddReduce(RedOp
);
439 Reduction
= Builder
.CreateAdd(Reduction
, Start
);
441 case Intrinsic::vp_reduce_mul
:
442 Reduction
= Builder
.CreateMulReduce(RedOp
);
443 Reduction
= Builder
.CreateMul(Reduction
, Start
);
445 case Intrinsic::vp_reduce_and
:
446 Reduction
= Builder
.CreateAndReduce(RedOp
);
447 Reduction
= Builder
.CreateAnd(Reduction
, Start
);
449 case Intrinsic::vp_reduce_or
:
450 Reduction
= Builder
.CreateOrReduce(RedOp
);
451 Reduction
= Builder
.CreateOr(Reduction
, Start
);
453 case Intrinsic::vp_reduce_xor
:
454 Reduction
= Builder
.CreateXorReduce(RedOp
);
455 Reduction
= Builder
.CreateXor(Reduction
, Start
);
457 case Intrinsic::vp_reduce_smax
:
458 Reduction
= Builder
.CreateIntMaxReduce(RedOp
, /*IsSigned*/ true);
460 Builder
.CreateBinaryIntrinsic(Intrinsic::smax
, Reduction
, Start
);
462 case Intrinsic::vp_reduce_smin
:
463 Reduction
= Builder
.CreateIntMinReduce(RedOp
, /*IsSigned*/ true);
465 Builder
.CreateBinaryIntrinsic(Intrinsic::smin
, Reduction
, Start
);
467 case Intrinsic::vp_reduce_umax
:
468 Reduction
= Builder
.CreateIntMaxReduce(RedOp
, /*IsSigned*/ false);
470 Builder
.CreateBinaryIntrinsic(Intrinsic::umax
, Reduction
, Start
);
472 case Intrinsic::vp_reduce_umin
:
473 Reduction
= Builder
.CreateIntMinReduce(RedOp
, /*IsSigned*/ false);
475 Builder
.CreateBinaryIntrinsic(Intrinsic::umin
, Reduction
, Start
);
477 case Intrinsic::vp_reduce_fmax
:
478 Reduction
= Builder
.CreateFPMaxReduce(RedOp
);
479 transferDecorations(*Reduction
, VPI
);
481 Builder
.CreateBinaryIntrinsic(Intrinsic::maxnum
, Reduction
, Start
);
483 case Intrinsic::vp_reduce_fmin
:
484 Reduction
= Builder
.CreateFPMinReduce(RedOp
);
485 transferDecorations(*Reduction
, VPI
);
487 Builder
.CreateBinaryIntrinsic(Intrinsic::minnum
, Reduction
, Start
);
489 case Intrinsic::vp_reduce_fmaximum
:
490 Reduction
= Builder
.CreateFPMaximumReduce(RedOp
);
491 transferDecorations(*Reduction
, VPI
);
493 Builder
.CreateBinaryIntrinsic(Intrinsic::maximum
, Reduction
, Start
);
495 case Intrinsic::vp_reduce_fminimum
:
496 Reduction
= Builder
.CreateFPMinimumReduce(RedOp
);
497 transferDecorations(*Reduction
, VPI
);
499 Builder
.CreateBinaryIntrinsic(Intrinsic::minimum
, Reduction
, Start
);
501 case Intrinsic::vp_reduce_fadd
:
502 Reduction
= Builder
.CreateFAddReduce(Start
, RedOp
);
504 case Intrinsic::vp_reduce_fmul
:
505 Reduction
= Builder
.CreateFMulReduce(Start
, RedOp
);
509 replaceOperation(*Reduction
, VPI
);
513 Value
*CachingVPExpander::expandPredicationToCastIntrinsic(IRBuilder
<> &Builder
,
515 Value
*CastOp
= nullptr;
516 switch (VPI
.getIntrinsicID()) {
518 llvm_unreachable("Not a VP cast intrinsic");
519 case Intrinsic::vp_sext
:
521 Builder
.CreateSExt(VPI
.getOperand(0), VPI
.getType(), VPI
.getName());
523 case Intrinsic::vp_zext
:
525 Builder
.CreateZExt(VPI
.getOperand(0), VPI
.getType(), VPI
.getName());
527 case Intrinsic::vp_trunc
:
529 Builder
.CreateTrunc(VPI
.getOperand(0), VPI
.getType(), VPI
.getName());
531 case Intrinsic::vp_inttoptr
:
533 Builder
.CreateIntToPtr(VPI
.getOperand(0), VPI
.getType(), VPI
.getName());
535 case Intrinsic::vp_ptrtoint
:
537 Builder
.CreatePtrToInt(VPI
.getOperand(0), VPI
.getType(), VPI
.getName());
539 case Intrinsic::vp_fptosi
:
541 Builder
.CreateFPToSI(VPI
.getOperand(0), VPI
.getType(), VPI
.getName());
544 case Intrinsic::vp_fptoui
:
546 Builder
.CreateFPToUI(VPI
.getOperand(0), VPI
.getType(), VPI
.getName());
548 case Intrinsic::vp_sitofp
:
550 Builder
.CreateSIToFP(VPI
.getOperand(0), VPI
.getType(), VPI
.getName());
552 case Intrinsic::vp_uitofp
:
554 Builder
.CreateUIToFP(VPI
.getOperand(0), VPI
.getType(), VPI
.getName());
556 case Intrinsic::vp_fptrunc
:
558 Builder
.CreateFPTrunc(VPI
.getOperand(0), VPI
.getType(), VPI
.getName());
560 case Intrinsic::vp_fpext
:
562 Builder
.CreateFPExt(VPI
.getOperand(0), VPI
.getType(), VPI
.getName());
565 replaceOperation(*CastOp
, VPI
);
570 CachingVPExpander::expandPredicationInMemoryIntrinsic(IRBuilder
<> &Builder
,
572 assert(VPI
.canIgnoreVectorLengthParam());
574 const auto &DL
= F
.getDataLayout();
576 Value
*MaskParam
= VPI
.getMaskParam();
577 Value
*PtrParam
= VPI
.getMemoryPointerParam();
578 Value
*DataParam
= VPI
.getMemoryDataParam();
579 bool IsUnmasked
= isAllTrueMask(MaskParam
);
581 MaybeAlign AlignOpt
= VPI
.getPointerAlignment();
583 Value
*NewMemoryInst
= nullptr;
584 switch (VPI
.getIntrinsicID()) {
586 llvm_unreachable("Not a VP memory intrinsic");
587 case Intrinsic::vp_store
:
589 StoreInst
*NewStore
=
590 Builder
.CreateStore(DataParam
, PtrParam
, /*IsVolatile*/ false);
591 if (AlignOpt
.has_value())
592 NewStore
->setAlignment(*AlignOpt
);
593 NewMemoryInst
= NewStore
;
595 NewMemoryInst
= Builder
.CreateMaskedStore(
596 DataParam
, PtrParam
, AlignOpt
.valueOrOne(), MaskParam
);
599 case Intrinsic::vp_load
:
602 Builder
.CreateLoad(VPI
.getType(), PtrParam
, /*IsVolatile*/ false);
603 if (AlignOpt
.has_value())
604 NewLoad
->setAlignment(*AlignOpt
);
605 NewMemoryInst
= NewLoad
;
607 NewMemoryInst
= Builder
.CreateMaskedLoad(
608 VPI
.getType(), PtrParam
, AlignOpt
.valueOrOne(), MaskParam
);
611 case Intrinsic::vp_scatter
: {
613 cast
<VectorType
>(DataParam
->getType())->getElementType();
614 NewMemoryInst
= Builder
.CreateMaskedScatter(
616 AlignOpt
.value_or(DL
.getPrefTypeAlign(ElementType
)), MaskParam
);
619 case Intrinsic::vp_gather
: {
620 auto *ElementType
= cast
<VectorType
>(VPI
.getType())->getElementType();
621 NewMemoryInst
= Builder
.CreateMaskedGather(
622 VPI
.getType(), PtrParam
,
623 AlignOpt
.value_or(DL
.getPrefTypeAlign(ElementType
)), MaskParam
, nullptr,
629 assert(NewMemoryInst
);
630 replaceOperation(*NewMemoryInst
, VPI
);
631 return NewMemoryInst
;
634 Value
*CachingVPExpander::expandPredicationInComparison(IRBuilder
<> &Builder
,
635 VPCmpIntrinsic
&VPI
) {
636 assert((maySpeculateLanes(VPI
) || VPI
.canIgnoreVectorLengthParam()) &&
637 "Implicitly dropping %evl in non-speculatable operator!");
639 assert(*VPI
.getFunctionalOpcode() == Instruction::ICmp
||
640 *VPI
.getFunctionalOpcode() == Instruction::FCmp
);
642 Value
*Op0
= VPI
.getOperand(0);
643 Value
*Op1
= VPI
.getOperand(1);
644 auto Pred
= VPI
.getPredicate();
646 auto *NewCmp
= Builder
.CreateCmp(Pred
, Op0
, Op1
);
648 replaceOperation(*NewCmp
, VPI
);
652 void CachingVPExpander::discardEVLParameter(VPIntrinsic
&VPI
) {
653 LLVM_DEBUG(dbgs() << "Discard EVL parameter in " << VPI
<< "\n");
655 if (VPI
.canIgnoreVectorLengthParam())
658 Value
*EVLParam
= VPI
.getVectorLengthParam();
662 ElementCount StaticElemCount
= VPI
.getStaticVectorLength();
663 Value
*MaxEVL
= nullptr;
664 Type
*Int32Ty
= Type::getInt32Ty(VPI
.getContext());
665 if (StaticElemCount
.isScalable()) {
667 auto *M
= VPI
.getModule();
668 Function
*VScaleFunc
=
669 Intrinsic::getDeclaration(M
, Intrinsic::vscale
, Int32Ty
);
670 IRBuilder
<> Builder(VPI
.getParent(), VPI
.getIterator());
671 Value
*FactorConst
= Builder
.getInt32(StaticElemCount
.getKnownMinValue());
672 Value
*VScale
= Builder
.CreateCall(VScaleFunc
, {}, "vscale");
673 MaxEVL
= Builder
.CreateMul(VScale
, FactorConst
, "scalable_size",
674 /*NUW*/ true, /*NSW*/ false);
676 MaxEVL
= ConstantInt::get(Int32Ty
, StaticElemCount
.getFixedValue(), false);
678 VPI
.setVectorLengthParam(MaxEVL
);
681 Value
*CachingVPExpander::foldEVLIntoMask(VPIntrinsic
&VPI
) {
682 LLVM_DEBUG(dbgs() << "Folding vlen for " << VPI
<< '\n');
684 IRBuilder
<> Builder(&VPI
);
686 // Ineffective %evl parameter and so nothing to do here.
687 if (VPI
.canIgnoreVectorLengthParam())
690 // Only VP intrinsics can have an %evl parameter.
691 Value
*OldMaskParam
= VPI
.getMaskParam();
692 Value
*OldEVLParam
= VPI
.getVectorLengthParam();
693 assert(OldMaskParam
&& "no mask param to fold the vl param into");
694 assert(OldEVLParam
&& "no EVL param to fold away");
696 LLVM_DEBUG(dbgs() << "OLD evl: " << *OldEVLParam
<< '\n');
697 LLVM_DEBUG(dbgs() << "OLD mask: " << *OldMaskParam
<< '\n');
699 // Convert the %evl predication into vector mask predication.
700 ElementCount ElemCount
= VPI
.getStaticVectorLength();
701 Value
*VLMask
= convertEVLToMask(Builder
, OldEVLParam
, ElemCount
);
702 Value
*NewMaskParam
= Builder
.CreateAnd(VLMask
, OldMaskParam
);
703 VPI
.setMaskParam(NewMaskParam
);
705 // Drop the %evl parameter.
706 discardEVLParameter(VPI
);
707 assert(VPI
.canIgnoreVectorLengthParam() &&
708 "transformation did not render the evl param ineffective!");
710 // Reassess the modified instruction.
714 Value
*CachingVPExpander::expandPredication(VPIntrinsic
&VPI
) {
715 LLVM_DEBUG(dbgs() << "Lowering to unpredicated op: " << VPI
<< '\n');
717 IRBuilder
<> Builder(&VPI
);
719 // Try lowering to a LLVM instruction first.
720 auto OC
= VPI
.getFunctionalOpcode();
722 if (OC
&& Instruction::isBinaryOp(*OC
))
723 return expandPredicationInBinaryOperator(Builder
, VPI
);
725 if (auto *VPRI
= dyn_cast
<VPReductionIntrinsic
>(&VPI
))
726 return expandPredicationInReduction(Builder
, *VPRI
);
728 if (auto *VPCmp
= dyn_cast
<VPCmpIntrinsic
>(&VPI
))
729 return expandPredicationInComparison(Builder
, *VPCmp
);
731 if (VPCastIntrinsic::isVPCast(VPI
.getIntrinsicID())) {
732 return expandPredicationToCastIntrinsic(Builder
, VPI
);
735 switch (VPI
.getIntrinsicID()) {
738 case Intrinsic::vp_fneg
: {
739 Value
*NewNegOp
= Builder
.CreateFNeg(VPI
.getOperand(0), VPI
.getName());
740 replaceOperation(*NewNegOp
, VPI
);
743 case Intrinsic::vp_abs
:
744 case Intrinsic::vp_smax
:
745 case Intrinsic::vp_smin
:
746 case Intrinsic::vp_umax
:
747 case Intrinsic::vp_umin
:
748 case Intrinsic::vp_bswap
:
749 case Intrinsic::vp_bitreverse
:
750 return expandPredicationToIntCall(Builder
, VPI
,
751 VPI
.getFunctionalIntrinsicID().value());
752 case Intrinsic::vp_fabs
:
753 case Intrinsic::vp_sqrt
:
754 case Intrinsic::vp_maxnum
:
755 case Intrinsic::vp_minnum
:
756 case Intrinsic::vp_maximum
:
757 case Intrinsic::vp_minimum
:
758 case Intrinsic::vp_fma
:
759 case Intrinsic::vp_fmuladd
:
760 return expandPredicationToFPCall(Builder
, VPI
,
761 VPI
.getFunctionalIntrinsicID().value());
762 case Intrinsic::vp_load
:
763 case Intrinsic::vp_store
:
764 case Intrinsic::vp_gather
:
765 case Intrinsic::vp_scatter
:
766 return expandPredicationInMemoryIntrinsic(Builder
, VPI
);
769 if (auto CID
= VPI
.getConstrainedIntrinsicID())
770 if (Value
*Call
= expandPredicationToFPCall(Builder
, VPI
, *CID
))
776 //// } CachingVPExpander
778 struct TransformJob
{
780 TargetTransformInfo::VPLegalization Strategy
;
781 TransformJob(VPIntrinsic
*PI
, TargetTransformInfo::VPLegalization InitStrat
)
782 : PI(PI
), Strategy(InitStrat
) {}
784 bool isDone() const { return Strategy
.shouldDoNothing(); }
787 void sanitizeStrategy(VPIntrinsic
&VPI
, VPLegalization
&LegalizeStrat
) {
788 // Operations with speculatable lanes do not strictly need predication.
789 if (maySpeculateLanes(VPI
)) {
790 // Converting a speculatable VP intrinsic means dropping %mask and %evl.
791 // No need to expand %evl into the %mask only to ignore that code.
792 if (LegalizeStrat
.OpStrategy
== VPLegalization::Convert
)
793 LegalizeStrat
.EVLParamStrategy
= VPLegalization::Discard
;
797 // We have to preserve the predicating effect of %evl for this
798 // non-speculatable VP intrinsic.
799 // 1) Never discard %evl.
800 // 2) If this VP intrinsic will be expanded to non-VP code, make sure that
801 // %evl gets folded into %mask.
802 if ((LegalizeStrat
.EVLParamStrategy
== VPLegalization::Discard
) ||
803 (LegalizeStrat
.OpStrategy
== VPLegalization::Convert
)) {
804 LegalizeStrat
.EVLParamStrategy
= VPLegalization::Convert
;
809 CachingVPExpander::getVPLegalizationStrategy(const VPIntrinsic
&VPI
) const {
810 auto VPStrat
= TTI
.getVPLegalizationStrategy(VPI
);
811 if (LLVM_LIKELY(!UsingTTIOverrides
)) {
812 // No overrides - we are in production.
816 // Overrides set - we are in testing, the following does not need to be
818 VPStrat
.EVLParamStrategy
= parseOverrideOption(EVLTransformOverride
);
819 VPStrat
.OpStrategy
= parseOverrideOption(MaskTransformOverride
);
823 /// Expand llvm.vp.* intrinsics as requested by \p TTI.
824 bool CachingVPExpander::expandVectorPredication() {
825 SmallVector
<TransformJob
, 16> Worklist
;
827 // Collect all VPIntrinsics that need expansion and determine their expansion
829 for (auto &I
: instructions(F
)) {
830 auto *VPI
= dyn_cast
<VPIntrinsic
>(&I
);
833 auto VPStrat
= getVPLegalizationStrategy(*VPI
);
834 sanitizeStrategy(*VPI
, VPStrat
);
835 if (!VPStrat
.shouldDoNothing())
836 Worklist
.emplace_back(VPI
, VPStrat
);
838 if (Worklist
.empty())
841 // Transform all VPIntrinsics on the worklist.
842 LLVM_DEBUG(dbgs() << "\n:::: Transforming " << Worklist
.size()
843 << " instructions ::::\n");
844 for (TransformJob Job
: Worklist
) {
845 // Transform the EVL parameter.
846 switch (Job
.Strategy
.EVLParamStrategy
) {
847 case VPLegalization::Legal
:
849 case VPLegalization::Discard
:
850 discardEVLParameter(*Job
.PI
);
852 case VPLegalization::Convert
:
853 if (foldEVLIntoMask(*Job
.PI
))
857 Job
.Strategy
.EVLParamStrategy
= VPLegalization::Legal
;
859 // Replace with a non-predicated operation.
860 switch (Job
.Strategy
.OpStrategy
) {
861 case VPLegalization::Legal
:
863 case VPLegalization::Discard
:
864 llvm_unreachable("Invalid strategy for operators.");
865 case VPLegalization::Convert
:
866 expandPredication(*Job
.PI
);
870 Job
.Strategy
.OpStrategy
= VPLegalization::Legal
;
872 assert(Job
.isDone() && "incomplete transformation");
877 class ExpandVectorPredication
: public FunctionPass
{
880 ExpandVectorPredication() : FunctionPass(ID
) {
881 initializeExpandVectorPredicationPass(*PassRegistry::getPassRegistry());
884 bool runOnFunction(Function
&F
) override
{
885 const auto *TTI
= &getAnalysis
<TargetTransformInfoWrapperPass
>().getTTI(F
);
886 CachingVPExpander
VPExpander(F
, *TTI
);
887 return VPExpander
.expandVectorPredication();
890 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
891 AU
.addRequired
<TargetTransformInfoWrapperPass
>();
892 AU
.setPreservesCFG();
897 char ExpandVectorPredication::ID
;
898 INITIALIZE_PASS_BEGIN(ExpandVectorPredication
, "expandvp",
899 "Expand vector predication intrinsics", false, false)
900 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass
)
901 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass
)
902 INITIALIZE_PASS_END(ExpandVectorPredication
, "expandvp",
903 "Expand vector predication intrinsics", false, false)
905 FunctionPass
*llvm::createExpandVectorPredicationPass() {
906 return new ExpandVectorPredication();
910 ExpandVectorPredicationPass::run(Function
&F
, FunctionAnalysisManager
&AM
) {
911 const auto &TTI
= AM
.getResult
<TargetIRAnalysis
>(F
);
912 CachingVPExpander
VPExpander(F
, TTI
);
913 if (!VPExpander
.expandVectorPredication())
914 return PreservedAnalyses::all();
915 PreservedAnalyses PA
;
916 PA
.preserveSet
<CFGAnalyses
>();