lib/CodeGen/ExpandReductions.cpp

   1 //===--- ExpandReductions.cpp - Expand experimental reduction intrinsics --===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This pass implements IR expansion for reduction intrinsics, allowing targets
  10 // to enable the experimental intrinsics until just before codegen.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "llvm/CodeGen/ExpandReductions.h"
  15 #include "llvm/Analysis/TargetTransformInfo.h"
  16 #include "llvm/CodeGen/Passes.h"
  17 #include "llvm/IR/Function.h"
  18 #include "llvm/IR/IRBuilder.h"
  19 #include "llvm/IR/InstIterator.h"
  20 #include "llvm/IR/IntrinsicInst.h"
  21 #include "llvm/IR/Intrinsics.h"
  22 #include "llvm/IR/Module.h"
  23 #include "llvm/Pass.h"
  24 #include "llvm/Transforms/Utils/LoopUtils.h"
  25
  26 using namespace llvm;
  27
  28 namespace {
  29
  30 unsigned getOpcode(Intrinsic::ID ID) {
  31   switch (ID) {
  32   case Intrinsic::experimental_vector_reduce_v2_fadd:
  33     return Instruction::FAdd;
  34   case Intrinsic::experimental_vector_reduce_v2_fmul:
  35     return Instruction::FMul;
  36   case Intrinsic::experimental_vector_reduce_add:
  37     return Instruction::Add;
  38   case Intrinsic::experimental_vector_reduce_mul:
  39     return Instruction::Mul;
  40   case Intrinsic::experimental_vector_reduce_and:
  41     return Instruction::And;
  42   case Intrinsic::experimental_vector_reduce_or:
  43     return Instruction::Or;
  44   case Intrinsic::experimental_vector_reduce_xor:
  45     return Instruction::Xor;
  46   case Intrinsic::experimental_vector_reduce_smax:
  47   case Intrinsic::experimental_vector_reduce_smin:
  48   case Intrinsic::experimental_vector_reduce_umax:
  49   case Intrinsic::experimental_vector_reduce_umin:
  50     return Instruction::ICmp;
  51   case Intrinsic::experimental_vector_reduce_fmax:
  52   case Intrinsic::experimental_vector_reduce_fmin:
  53     return Instruction::FCmp;
  54   default:
  55     llvm_unreachable("Unexpected ID");
  56   }
  57 }
  58
  59 RecurrenceDescriptor::MinMaxRecurrenceKind getMRK(Intrinsic::ID ID) {
  60   switch (ID) {
  61   case Intrinsic::experimental_vector_reduce_smax:
  62     return RecurrenceDescriptor::MRK_SIntMax;
  63   case Intrinsic::experimental_vector_reduce_smin:
  64     return RecurrenceDescriptor::MRK_SIntMin;
  65   case Intrinsic::experimental_vector_reduce_umax:
  66     return RecurrenceDescriptor::MRK_UIntMax;
  67   case Intrinsic::experimental_vector_reduce_umin:
  68     return RecurrenceDescriptor::MRK_UIntMin;
  69   case Intrinsic::experimental_vector_reduce_fmax:
  70     return RecurrenceDescriptor::MRK_FloatMax;
  71   case Intrinsic::experimental_vector_reduce_fmin:
  72     return RecurrenceDescriptor::MRK_FloatMin;
  73   default:
  74     return RecurrenceDescriptor::MRK_Invalid;
  75   }
  76 }
  77
  78 bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
  79   bool Changed = false;
  80   SmallVector<IntrinsicInst *, 4> Worklist;
  81   for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
  82     if (auto II = dyn_cast<IntrinsicInst>(&*I))
  83       Worklist.push_back(II);
  84
  85   for (auto *II : Worklist) {
  86     if (!TTI->shouldExpandReduction(II))
  87       continue;
  88
  89     FastMathFlags FMF =
  90         isa<FPMathOperator>(II) ? II->getFastMathFlags() : FastMathFlags{};
  91     Intrinsic::ID ID = II->getIntrinsicID();
  92     RecurrenceDescriptor::MinMaxRecurrenceKind MRK = getMRK(ID);
  93
  94     Value *Rdx = nullptr;
  95     IRBuilder<> Builder(II);
  96     IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
  97     Builder.setFastMathFlags(FMF);
  98     switch (ID) {
  99     case Intrinsic::experimental_vector_reduce_v2_fadd:
 100     case Intrinsic::experimental_vector_reduce_v2_fmul: {
 101       // FMFs must be attached to the call, otherwise it's an ordered reduction
 102       // and it can't be handled by generating a shuffle sequence.
 103       Value *Acc = II->getArgOperand(0);
 104       Value *Vec = II->getArgOperand(1);
 105       if (!FMF.allowReassoc())
 106         Rdx = getOrderedReduction(Builder, Acc, Vec, getOpcode(ID), MRK);
 107       else {
 108         Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), MRK);
 109         Rdx = Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(ID),
 110                                   Acc, Rdx, "bin.rdx");
 111       }
 112     } break;
 113     case Intrinsic::experimental_vector_reduce_add:
 114     case Intrinsic::experimental_vector_reduce_mul:
 115     case Intrinsic::experimental_vector_reduce_and:
 116     case Intrinsic::experimental_vector_reduce_or:
 117     case Intrinsic::experimental_vector_reduce_xor:
 118     case Intrinsic::experimental_vector_reduce_smax:
 119     case Intrinsic::experimental_vector_reduce_smin:
 120     case Intrinsic::experimental_vector_reduce_umax:
 121     case Intrinsic::experimental_vector_reduce_umin:
 122     case Intrinsic::experimental_vector_reduce_fmax:
 123     case Intrinsic::experimental_vector_reduce_fmin: {
 124       Value *Vec = II->getArgOperand(0);
 125       Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), MRK);
 126     } break;
 127     default:
 128       continue;
 129     }
 130     II->replaceAllUsesWith(Rdx);
 131     II->eraseFromParent();
 132     Changed = true;
 133   }
 134   return Changed;
 135 }
 136
 137 class ExpandReductions : public FunctionPass {
 138 public:
 139   static char ID;
 140   ExpandReductions() : FunctionPass(ID) {
 141     initializeExpandReductionsPass(*PassRegistry::getPassRegistry());
 142   }
 143
 144   bool runOnFunction(Function &F) override {
 145     const auto *TTI =&getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
 146     return expandReductions(F, TTI);
 147   }
 148
 149   void getAnalysisUsage(AnalysisUsage &AU) const override {
 150     AU.addRequired<TargetTransformInfoWrapperPass>();
 151     AU.setPreservesCFG();
 152   }
 153 };
 154 }
 155
 156 char ExpandReductions::ID;
 157 INITIALIZE_PASS_BEGIN(ExpandReductions, "expand-reductions",
 158                       "Expand reduction intrinsics", false, false)
 159 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
 160 INITIALIZE_PASS_END(ExpandReductions, "expand-reductions",
 161                     "Expand reduction intrinsics", false, false)
 162
 163 FunctionPass *llvm::createExpandReductionsPass() {
 164   return new ExpandReductions();
 165 }
 166
 167 PreservedAnalyses ExpandReductionsPass::run(Function &F,
 168                                             FunctionAnalysisManager &AM) {
 169   const auto &TTI = AM.getResult<TargetIRAnalysis>(F);
 170   if (!expandReductions(F, &TTI))
 171     return PreservedAnalyses::all();
 172   PreservedAnalyses PA;
 173   PA.preserveSet<CFGAnalyses>();
 174   return PA;
 175 }