1 //===--- ExpandReductions.cpp - Expand experimental reduction intrinsics --===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This pass implements IR expansion for reduction intrinsics, allowing targets
10 // to enable the experimental intrinsics until just before codegen.
12 //===----------------------------------------------------------------------===//
14 #include "llvm/CodeGen/ExpandReductions.h"
15 #include "llvm/Analysis/TargetTransformInfo.h"
16 #include "llvm/CodeGen/Passes.h"
17 #include "llvm/IR/Function.h"
18 #include "llvm/IR/IRBuilder.h"
19 #include "llvm/IR/InstIterator.h"
20 #include "llvm/IR/IntrinsicInst.h"
21 #include "llvm/IR/Intrinsics.h"
22 #include "llvm/IR/Module.h"
23 #include "llvm/Pass.h"
24 #include "llvm/Transforms/Utils/LoopUtils.h"
30 unsigned getOpcode(Intrinsic::ID ID
) {
32 case Intrinsic::experimental_vector_reduce_v2_fadd
:
33 return Instruction::FAdd
;
34 case Intrinsic::experimental_vector_reduce_v2_fmul
:
35 return Instruction::FMul
;
36 case Intrinsic::experimental_vector_reduce_add
:
37 return Instruction::Add
;
38 case Intrinsic::experimental_vector_reduce_mul
:
39 return Instruction::Mul
;
40 case Intrinsic::experimental_vector_reduce_and
:
41 return Instruction::And
;
42 case Intrinsic::experimental_vector_reduce_or
:
43 return Instruction::Or
;
44 case Intrinsic::experimental_vector_reduce_xor
:
45 return Instruction::Xor
;
46 case Intrinsic::experimental_vector_reduce_smax
:
47 case Intrinsic::experimental_vector_reduce_smin
:
48 case Intrinsic::experimental_vector_reduce_umax
:
49 case Intrinsic::experimental_vector_reduce_umin
:
50 return Instruction::ICmp
;
51 case Intrinsic::experimental_vector_reduce_fmax
:
52 case Intrinsic::experimental_vector_reduce_fmin
:
53 return Instruction::FCmp
;
55 llvm_unreachable("Unexpected ID");
59 RecurrenceDescriptor::MinMaxRecurrenceKind
getMRK(Intrinsic::ID ID
) {
61 case Intrinsic::experimental_vector_reduce_smax
:
62 return RecurrenceDescriptor::MRK_SIntMax
;
63 case Intrinsic::experimental_vector_reduce_smin
:
64 return RecurrenceDescriptor::MRK_SIntMin
;
65 case Intrinsic::experimental_vector_reduce_umax
:
66 return RecurrenceDescriptor::MRK_UIntMax
;
67 case Intrinsic::experimental_vector_reduce_umin
:
68 return RecurrenceDescriptor::MRK_UIntMin
;
69 case Intrinsic::experimental_vector_reduce_fmax
:
70 return RecurrenceDescriptor::MRK_FloatMax
;
71 case Intrinsic::experimental_vector_reduce_fmin
:
72 return RecurrenceDescriptor::MRK_FloatMin
;
74 return RecurrenceDescriptor::MRK_Invalid
;
78 bool expandReductions(Function
&F
, const TargetTransformInfo
*TTI
) {
80 SmallVector
<IntrinsicInst
*, 4> Worklist
;
81 for (inst_iterator I
= inst_begin(F
), E
= inst_end(F
); I
!= E
; ++I
)
82 if (auto II
= dyn_cast
<IntrinsicInst
>(&*I
))
83 Worklist
.push_back(II
);
85 for (auto *II
: Worklist
) {
86 if (!TTI
->shouldExpandReduction(II
))
90 isa
<FPMathOperator
>(II
) ? II
->getFastMathFlags() : FastMathFlags
{};
91 Intrinsic::ID ID
= II
->getIntrinsicID();
92 RecurrenceDescriptor::MinMaxRecurrenceKind MRK
= getMRK(ID
);
95 IRBuilder
<> Builder(II
);
96 IRBuilder
<>::FastMathFlagGuard
FMFGuard(Builder
);
97 Builder
.setFastMathFlags(FMF
);
99 case Intrinsic::experimental_vector_reduce_v2_fadd
:
100 case Intrinsic::experimental_vector_reduce_v2_fmul
: {
101 // FMFs must be attached to the call, otherwise it's an ordered reduction
102 // and it can't be handled by generating a shuffle sequence.
103 Value
*Acc
= II
->getArgOperand(0);
104 Value
*Vec
= II
->getArgOperand(1);
105 if (!FMF
.allowReassoc())
106 Rdx
= getOrderedReduction(Builder
, Acc
, Vec
, getOpcode(ID
), MRK
);
108 Rdx
= getShuffleReduction(Builder
, Vec
, getOpcode(ID
), MRK
);
109 Rdx
= Builder
.CreateBinOp((Instruction::BinaryOps
)getOpcode(ID
),
110 Acc
, Rdx
, "bin.rdx");
113 case Intrinsic::experimental_vector_reduce_add
:
114 case Intrinsic::experimental_vector_reduce_mul
:
115 case Intrinsic::experimental_vector_reduce_and
:
116 case Intrinsic::experimental_vector_reduce_or
:
117 case Intrinsic::experimental_vector_reduce_xor
:
118 case Intrinsic::experimental_vector_reduce_smax
:
119 case Intrinsic::experimental_vector_reduce_smin
:
120 case Intrinsic::experimental_vector_reduce_umax
:
121 case Intrinsic::experimental_vector_reduce_umin
:
122 case Intrinsic::experimental_vector_reduce_fmax
:
123 case Intrinsic::experimental_vector_reduce_fmin
: {
124 Value
*Vec
= II
->getArgOperand(0);
125 Rdx
= getShuffleReduction(Builder
, Vec
, getOpcode(ID
), MRK
);
130 II
->replaceAllUsesWith(Rdx
);
131 II
->eraseFromParent();
137 class ExpandReductions
: public FunctionPass
{
140 ExpandReductions() : FunctionPass(ID
) {
141 initializeExpandReductionsPass(*PassRegistry::getPassRegistry());
144 bool runOnFunction(Function
&F
) override
{
145 const auto *TTI
=&getAnalysis
<TargetTransformInfoWrapperPass
>().getTTI(F
);
146 return expandReductions(F
, TTI
);
149 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
150 AU
.addRequired
<TargetTransformInfoWrapperPass
>();
151 AU
.setPreservesCFG();
156 char ExpandReductions::ID
;
157 INITIALIZE_PASS_BEGIN(ExpandReductions
, "expand-reductions",
158 "Expand reduction intrinsics", false, false)
159 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass
)
160 INITIALIZE_PASS_END(ExpandReductions
, "expand-reductions",
161 "Expand reduction intrinsics", false, false)
163 FunctionPass
*llvm::createExpandReductionsPass() {
164 return new ExpandReductions();
167 PreservedAnalyses
ExpandReductionsPass::run(Function
&F
,
168 FunctionAnalysisManager
&AM
) {
169 const auto &TTI
= AM
.getResult
<TargetIRAnalysis
>(F
);
170 if (!expandReductions(F
, &TTI
))
171 return PreservedAnalyses::all();
172 PreservedAnalyses PA
;
173 PA
.preserveSet
<CFGAnalyses
>();