llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp

   1 //===----- RISCVCodeGenPrepare.cpp ----------------------------------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This is a RISC-V specific version of CodeGenPrepare.
  10 // It munges the code in the input function to better prepare it for
  11 // SelectionDAG-based code generation. This works around limitations in it's
  12 // basic-block-at-a-time approach.
  13 //
  14 //===----------------------------------------------------------------------===//
  15
  16 #include "RISCV.h"
  17 #include "RISCVTargetMachine.h"
  18 #include "llvm/ADT/Statistic.h"
  19 #include "llvm/Analysis/ValueTracking.h"
  20 #include "llvm/CodeGen/TargetPassConfig.h"
  21 #include "llvm/IR/Dominators.h"
  22 #include "llvm/IR/IRBuilder.h"
  23 #include "llvm/IR/InstVisitor.h"
  24 #include "llvm/IR/Intrinsics.h"
  25 #include "llvm/IR/PatternMatch.h"
  26 #include "llvm/InitializePasses.h"
  27 #include "llvm/Pass.h"
  28
  29 using namespace llvm;
  30
  31 #define DEBUG_TYPE "riscv-codegenprepare"
  32 #define PASS_NAME "RISC-V CodeGenPrepare"
  33
  34 namespace {
  35
  36 class RISCVCodeGenPrepare : public FunctionPass,
  37                             public InstVisitor<RISCVCodeGenPrepare, bool> {
  38   const DataLayout *DL;
  39   const DominatorTree *DT;
  40   const RISCVSubtarget *ST;
  41
  42 public:
  43   static char ID;
  44
  45   RISCVCodeGenPrepare() : FunctionPass(ID) {}
  46
  47   bool runOnFunction(Function &F) override;
  48
  49   StringRef getPassName() const override { return PASS_NAME; }
  50
  51   void getAnalysisUsage(AnalysisUsage &AU) const override {
  52     AU.setPreservesCFG();
  53     AU.addRequired<DominatorTreeWrapperPass>();
  54     AU.addRequired<TargetPassConfig>();
  55   }
  56
  57   bool visitInstruction(Instruction &I) { return false; }
  58   bool visitAnd(BinaryOperator &BO);
  59   bool visitIntrinsicInst(IntrinsicInst &I);
  60   bool expandVPStrideLoad(IntrinsicInst &I);
  61 };
  62
  63 } // end anonymous namespace
  64
  65 // Try to optimize (i64 (and (zext/sext (i32 X), C1))) if C1 has bit 31 set,
  66 // but bits 63:32 are zero. If we know that bit 31 of X is 0, we can fill
  67 // the upper 32 bits with ones.
  68 bool RISCVCodeGenPrepare::visitAnd(BinaryOperator &BO) {
  69   if (!ST->is64Bit())
  70     return false;
  71
  72   if (!BO.getType()->isIntegerTy(64))
  73     return false;
  74
  75   using namespace PatternMatch;
  76
  77   // Left hand side should be a zext nneg.
  78   Value *LHSSrc;
  79   if (!match(BO.getOperand(0), m_NNegZExt(m_Value(LHSSrc))))
  80     return false;
  81
  82   if (!LHSSrc->getType()->isIntegerTy(32))
  83     return false;
  84
  85   // Right hand side should be a constant.
  86   Value *RHS = BO.getOperand(1);
  87
  88   auto *CI = dyn_cast<ConstantInt>(RHS);
  89   if (!CI)
  90     return false;
  91   uint64_t C = CI->getZExtValue();
  92
  93   // Look for constants that fit in 32 bits but not simm12, and can be made
  94   // into simm12 by sign extending bit 31. This will allow use of ANDI.
  95   // TODO: Is worth making simm32?
  96   if (!isUInt<32>(C) || isInt<12>(C) || !isInt<12>(SignExtend64<32>(C)))
  97     return false;
  98
  99   // Sign extend the constant and replace the And operand.
 100   C = SignExtend64<32>(C);
 101   BO.setOperand(1, ConstantInt::get(RHS->getType(), C));
 102
 103   return true;
 104 }
 105
 106 // LLVM vector reduction intrinsics return a scalar result, but on RISC-V vector
 107 // reduction instructions write the result in the first element of a vector
 108 // register. So when a reduction in a loop uses a scalar phi, we end up with
 109 // unnecessary scalar moves:
 110 //
 111 // loop:
 112 // vfmv.s.f v10, fa0
 113 // vfredosum.vs v8, v8, v10
 114 // vfmv.f.s fa0, v8
 115 //
 116 // This mainly affects ordered fadd reductions, since other types of reduction
 117 // typically use element-wise vectorisation in the loop body. This tries to
 118 // vectorize any scalar phis that feed into a fadd reduction:
 119 //
 120 // loop:
 121 // %phi = phi <float> [ ..., %entry ], [ %acc, %loop ]
 122 // %acc = call float @llvm.vector.reduce.fadd.nxv2f32(float %phi,
 123 //                                                    <vscale x 2 x float> %vec)
 124 //
 125 // ->
 126 //
 127 // loop:
 128 // %phi = phi <vscale x 2 x float> [ ..., %entry ], [ %acc.vec, %loop ]
 129 // %phi.scalar = extractelement <vscale x 2 x float> %phi, i64 0
 130 // %acc = call float @llvm.vector.reduce.fadd.nxv2f32(float %x,
 131 //                                                    <vscale x 2 x float> %vec)
 132 // %acc.vec = insertelement <vscale x 2 x float> poison, float %acc.next, i64 0
 133 //
 134 // Which eliminates the scalar -> vector -> scalar crossing during instruction
 135 // selection.
 136 bool RISCVCodeGenPrepare::visitIntrinsicInst(IntrinsicInst &I) {
 137   if (expandVPStrideLoad(I))
 138     return true;
 139
 140   if (I.getIntrinsicID() != Intrinsic::vector_reduce_fadd)
 141     return false;
 142
 143   auto *PHI = dyn_cast<PHINode>(I.getOperand(0));
 144   if (!PHI || !PHI->hasOneUse() ||
 145       !llvm::is_contained(PHI->incoming_values(), &I))
 146     return false;
 147
 148   Type *VecTy = I.getOperand(1)->getType();
 149   IRBuilder<> Builder(PHI);
 150   auto *VecPHI = Builder.CreatePHI(VecTy, PHI->getNumIncomingValues());
 151
 152   for (auto *BB : PHI->blocks()) {
 153     Builder.SetInsertPoint(BB->getTerminator());
 154     Value *InsertElt = Builder.CreateInsertElement(
 155         VecTy, PHI->getIncomingValueForBlock(BB), (uint64_t)0);
 156     VecPHI->addIncoming(InsertElt, BB);
 157   }
 158
 159   Builder.SetInsertPoint(&I);
 160   I.setOperand(0, Builder.CreateExtractElement(VecPHI, (uint64_t)0));
 161
 162   PHI->eraseFromParent();
 163
 164   return true;
 165 }
 166
 167 // Always expand zero strided loads so we match more .vx splat patterns, even if
 168 // we have +optimized-zero-stride-loads. RISCVDAGToDAGISel::Select will convert
 169 // it back to a strided load if it's optimized.
 170 bool RISCVCodeGenPrepare::expandVPStrideLoad(IntrinsicInst &II) {
 171   Value *BasePtr, *VL;
 172
 173   using namespace PatternMatch;
 174   if (!match(&II, m_Intrinsic<Intrinsic::experimental_vp_strided_load>(
 175                       m_Value(BasePtr), m_Zero(), m_AllOnes(), m_Value(VL))))
 176     return false;
 177
 178   // If SEW>XLEN then a splat will get lowered as a zero strided load anyway, so
 179   // avoid expanding here.
 180   if (II.getType()->getScalarSizeInBits() > ST->getXLen())
 181     return false;
 182
 183   if (!isKnownNonZero(VL, {*DL, DT, nullptr, &II}))
 184     return false;
 185
 186   auto *VTy = cast<VectorType>(II.getType());
 187
 188   IRBuilder<> Builder(&II);
 189   Type *STy = VTy->getElementType();
 190   Value *Val = Builder.CreateLoad(STy, BasePtr);
 191   Value *Res = Builder.CreateIntrinsic(Intrinsic::experimental_vp_splat, {VTy},
 192                                        {Val, II.getOperand(2), VL});
 193
 194   II.replaceAllUsesWith(Res);
 195   II.eraseFromParent();
 196   return true;
 197 }
 198
 199 bool RISCVCodeGenPrepare::runOnFunction(Function &F) {
 200   if (skipFunction(F))
 201     return false;
 202
 203   auto &TPC = getAnalysis<TargetPassConfig>();
 204   auto &TM = TPC.getTM<RISCVTargetMachine>();
 205   ST = &TM.getSubtarget<RISCVSubtarget>(F);
 206
 207   DL = &F.getDataLayout();
 208   DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
 209
 210   bool MadeChange = false;
 211   for (auto &BB : F)
 212     for (Instruction &I : llvm::make_early_inc_range(BB))
 213       MadeChange |= visit(I);
 214
 215   return MadeChange;
 216 }
 217
 218 INITIALIZE_PASS_BEGIN(RISCVCodeGenPrepare, DEBUG_TYPE, PASS_NAME, false, false)
 219 INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
 220 INITIALIZE_PASS_END(RISCVCodeGenPrepare, DEBUG_TYPE, PASS_NAME, false, false)
 221
 222 char RISCVCodeGenPrepare::ID = 0;
 223
 224 FunctionPass *llvm::createRISCVCodeGenPreparePass() {
 225   return new RISCVCodeGenPrepare();
 226 }