Another attempt to fix the build bot breaks after r360426
[llvm-core.git] / lib / CodeGen / ExpandReductions.cpp
blob34858883298edfb2fa22364098e44bca2ecb262e
1 //===--- ExpandReductions.cpp - Expand experimental reduction intrinsics --===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass implements IR expansion for reduction intrinsics, allowing targets
10 // to enable the experimental intrinsics until just before codegen.
12 //===----------------------------------------------------------------------===//
14 #include "llvm/CodeGen/ExpandReductions.h"
15 #include "llvm/Analysis/TargetTransformInfo.h"
16 #include "llvm/CodeGen/Passes.h"
17 #include "llvm/IR/Function.h"
18 #include "llvm/IR/IRBuilder.h"
19 #include "llvm/IR/InstIterator.h"
20 #include "llvm/IR/IntrinsicInst.h"
21 #include "llvm/IR/Intrinsics.h"
22 #include "llvm/IR/Module.h"
23 #include "llvm/Pass.h"
24 #include "llvm/Transforms/Utils/LoopUtils.h"
26 using namespace llvm;
28 namespace {
30 unsigned getOpcode(Intrinsic::ID ID) {
31 switch (ID) {
32 case Intrinsic::experimental_vector_reduce_fadd:
33 return Instruction::FAdd;
34 case Intrinsic::experimental_vector_reduce_fmul:
35 return Instruction::FMul;
36 case Intrinsic::experimental_vector_reduce_add:
37 return Instruction::Add;
38 case Intrinsic::experimental_vector_reduce_mul:
39 return Instruction::Mul;
40 case Intrinsic::experimental_vector_reduce_and:
41 return Instruction::And;
42 case Intrinsic::experimental_vector_reduce_or:
43 return Instruction::Or;
44 case Intrinsic::experimental_vector_reduce_xor:
45 return Instruction::Xor;
46 case Intrinsic::experimental_vector_reduce_smax:
47 case Intrinsic::experimental_vector_reduce_smin:
48 case Intrinsic::experimental_vector_reduce_umax:
49 case Intrinsic::experimental_vector_reduce_umin:
50 return Instruction::ICmp;
51 case Intrinsic::experimental_vector_reduce_fmax:
52 case Intrinsic::experimental_vector_reduce_fmin:
53 return Instruction::FCmp;
54 default:
55 llvm_unreachable("Unexpected ID");
59 RecurrenceDescriptor::MinMaxRecurrenceKind getMRK(Intrinsic::ID ID) {
60 switch (ID) {
61 case Intrinsic::experimental_vector_reduce_smax:
62 return RecurrenceDescriptor::MRK_SIntMax;
63 case Intrinsic::experimental_vector_reduce_smin:
64 return RecurrenceDescriptor::MRK_SIntMin;
65 case Intrinsic::experimental_vector_reduce_umax:
66 return RecurrenceDescriptor::MRK_UIntMax;
67 case Intrinsic::experimental_vector_reduce_umin:
68 return RecurrenceDescriptor::MRK_UIntMin;
69 case Intrinsic::experimental_vector_reduce_fmax:
70 return RecurrenceDescriptor::MRK_FloatMax;
71 case Intrinsic::experimental_vector_reduce_fmin:
72 return RecurrenceDescriptor::MRK_FloatMin;
73 default:
74 return RecurrenceDescriptor::MRK_Invalid;
78 bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
79 bool Changed = false;
80 SmallVector<IntrinsicInst *, 4> Worklist;
81 for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
82 if (auto II = dyn_cast<IntrinsicInst>(&*I))
83 Worklist.push_back(II);
85 for (auto *II : Worklist) {
86 IRBuilder<> Builder(II);
87 bool IsOrdered = false;
88 Value *Acc = nullptr;
89 Value *Vec = nullptr;
90 auto ID = II->getIntrinsicID();
91 auto MRK = RecurrenceDescriptor::MRK_Invalid;
92 switch (ID) {
93 case Intrinsic::experimental_vector_reduce_fadd:
94 case Intrinsic::experimental_vector_reduce_fmul:
95 // FMFs must be attached to the call, otherwise it's an ordered reduction
96 // and it can't be handled by generating a shuffle sequence.
97 if (!II->getFastMathFlags().isFast())
98 IsOrdered = true;
99 Acc = II->getArgOperand(0);
100 Vec = II->getArgOperand(1);
101 break;
102 case Intrinsic::experimental_vector_reduce_add:
103 case Intrinsic::experimental_vector_reduce_mul:
104 case Intrinsic::experimental_vector_reduce_and:
105 case Intrinsic::experimental_vector_reduce_or:
106 case Intrinsic::experimental_vector_reduce_xor:
107 case Intrinsic::experimental_vector_reduce_smax:
108 case Intrinsic::experimental_vector_reduce_smin:
109 case Intrinsic::experimental_vector_reduce_umax:
110 case Intrinsic::experimental_vector_reduce_umin:
111 case Intrinsic::experimental_vector_reduce_fmax:
112 case Intrinsic::experimental_vector_reduce_fmin:
113 Vec = II->getArgOperand(0);
114 MRK = getMRK(ID);
115 break;
116 default:
117 continue;
119 if (!TTI->shouldExpandReduction(II))
120 continue;
121 FastMathFlags FMF =
122 isa<FPMathOperator>(II) ? II->getFastMathFlags() : FastMathFlags{};
123 Value *Rdx =
124 IsOrdered ? getOrderedReduction(Builder, Acc, Vec, getOpcode(ID), MRK)
125 : getShuffleReduction(Builder, Vec, getOpcode(ID), MRK, FMF);
126 II->replaceAllUsesWith(Rdx);
127 II->eraseFromParent();
128 Changed = true;
130 return Changed;
133 class ExpandReductions : public FunctionPass {
134 public:
135 static char ID;
136 ExpandReductions() : FunctionPass(ID) {
137 initializeExpandReductionsPass(*PassRegistry::getPassRegistry());
140 bool runOnFunction(Function &F) override {
141 const auto *TTI =&getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
142 return expandReductions(F, TTI);
145 void getAnalysisUsage(AnalysisUsage &AU) const override {
146 AU.addRequired<TargetTransformInfoWrapperPass>();
147 AU.setPreservesCFG();
152 char ExpandReductions::ID;
153 INITIALIZE_PASS_BEGIN(ExpandReductions, "expand-reductions",
154 "Expand reduction intrinsics", false, false)
155 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
156 INITIALIZE_PASS_END(ExpandReductions, "expand-reductions",
157 "Expand reduction intrinsics", false, false)
159 FunctionPass *llvm::createExpandReductionsPass() {
160 return new ExpandReductions();
163 PreservedAnalyses ExpandReductionsPass::run(Function &F,
164 FunctionAnalysisManager &AM) {
165 const auto &TTI = AM.getResult<TargetIRAnalysis>(F);
166 if (!expandReductions(F, &TTI))
167 return PreservedAnalyses::all();
168 PreservedAnalyses PA;
169 PA.preserveSet<CFGAnalyses>();
170 return PA;