1 //===----- RISCVCodeGenPrepare.cpp ----------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This is a RISC-V specific version of CodeGenPrepare.
10 // It munges the code in the input function to better prepare it for
11 // SelectionDAG-based code generation. This works around limitations in it's
12 // basic-block-at-a-time approach.
14 //===----------------------------------------------------------------------===//
17 #include "RISCVTargetMachine.h"
18 #include "llvm/ADT/Statistic.h"
19 #include "llvm/Analysis/ValueTracking.h"
20 #include "llvm/CodeGen/TargetPassConfig.h"
21 #include "llvm/IR/Dominators.h"
22 #include "llvm/IR/IRBuilder.h"
23 #include "llvm/IR/InstVisitor.h"
24 #include "llvm/IR/Intrinsics.h"
25 #include "llvm/IR/PatternMatch.h"
26 #include "llvm/InitializePasses.h"
27 #include "llvm/Pass.h"
31 #define DEBUG_TYPE "riscv-codegenprepare"
32 #define PASS_NAME "RISC-V CodeGenPrepare"
36 class RISCVCodeGenPrepare
: public FunctionPass
,
37 public InstVisitor
<RISCVCodeGenPrepare
, bool> {
39 const DominatorTree
*DT
;
40 const RISCVSubtarget
*ST
;
45 RISCVCodeGenPrepare() : FunctionPass(ID
) {}
47 bool runOnFunction(Function
&F
) override
;
49 StringRef
getPassName() const override
{ return PASS_NAME
; }
51 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
53 AU
.addRequired
<DominatorTreeWrapperPass
>();
54 AU
.addRequired
<TargetPassConfig
>();
57 bool visitInstruction(Instruction
&I
) { return false; }
58 bool visitAnd(BinaryOperator
&BO
);
59 bool visitIntrinsicInst(IntrinsicInst
&I
);
60 bool expandVPStrideLoad(IntrinsicInst
&I
);
63 } // end anonymous namespace
65 // Try to optimize (i64 (and (zext/sext (i32 X), C1))) if C1 has bit 31 set,
66 // but bits 63:32 are zero. If we know that bit 31 of X is 0, we can fill
67 // the upper 32 bits with ones.
68 bool RISCVCodeGenPrepare::visitAnd(BinaryOperator
&BO
) {
72 if (!BO
.getType()->isIntegerTy(64))
75 using namespace PatternMatch
;
77 // Left hand side should be a zext nneg.
79 if (!match(BO
.getOperand(0), m_NNegZExt(m_Value(LHSSrc
))))
82 if (!LHSSrc
->getType()->isIntegerTy(32))
85 // Right hand side should be a constant.
86 Value
*RHS
= BO
.getOperand(1);
88 auto *CI
= dyn_cast
<ConstantInt
>(RHS
);
91 uint64_t C
= CI
->getZExtValue();
93 // Look for constants that fit in 32 bits but not simm12, and can be made
94 // into simm12 by sign extending bit 31. This will allow use of ANDI.
95 // TODO: Is worth making simm32?
96 if (!isUInt
<32>(C
) || isInt
<12>(C
) || !isInt
<12>(SignExtend64
<32>(C
)))
99 // Sign extend the constant and replace the And operand.
100 C
= SignExtend64
<32>(C
);
101 BO
.setOperand(1, ConstantInt::get(RHS
->getType(), C
));
106 // LLVM vector reduction intrinsics return a scalar result, but on RISC-V vector
107 // reduction instructions write the result in the first element of a vector
108 // register. So when a reduction in a loop uses a scalar phi, we end up with
109 // unnecessary scalar moves:
113 // vfredosum.vs v8, v8, v10
116 // This mainly affects ordered fadd reductions, since other types of reduction
117 // typically use element-wise vectorisation in the loop body. This tries to
118 // vectorize any scalar phis that feed into a fadd reduction:
121 // %phi = phi <float> [ ..., %entry ], [ %acc, %loop ]
122 // %acc = call float @llvm.vector.reduce.fadd.nxv2f32(float %phi,
123 // <vscale x 2 x float> %vec)
128 // %phi = phi <vscale x 2 x float> [ ..., %entry ], [ %acc.vec, %loop ]
129 // %phi.scalar = extractelement <vscale x 2 x float> %phi, i64 0
130 // %acc = call float @llvm.vector.reduce.fadd.nxv2f32(float %x,
131 // <vscale x 2 x float> %vec)
132 // %acc.vec = insertelement <vscale x 2 x float> poison, float %acc.next, i64 0
134 // Which eliminates the scalar -> vector -> scalar crossing during instruction
136 bool RISCVCodeGenPrepare::visitIntrinsicInst(IntrinsicInst
&I
) {
137 if (expandVPStrideLoad(I
))
140 if (I
.getIntrinsicID() != Intrinsic::vector_reduce_fadd
)
143 auto *PHI
= dyn_cast
<PHINode
>(I
.getOperand(0));
144 if (!PHI
|| !PHI
->hasOneUse() ||
145 !llvm::is_contained(PHI
->incoming_values(), &I
))
148 Type
*VecTy
= I
.getOperand(1)->getType();
149 IRBuilder
<> Builder(PHI
);
150 auto *VecPHI
= Builder
.CreatePHI(VecTy
, PHI
->getNumIncomingValues());
152 for (auto *BB
: PHI
->blocks()) {
153 Builder
.SetInsertPoint(BB
->getTerminator());
154 Value
*InsertElt
= Builder
.CreateInsertElement(
155 VecTy
, PHI
->getIncomingValueForBlock(BB
), (uint64_t)0);
156 VecPHI
->addIncoming(InsertElt
, BB
);
159 Builder
.SetInsertPoint(&I
);
160 I
.setOperand(0, Builder
.CreateExtractElement(VecPHI
, (uint64_t)0));
162 PHI
->eraseFromParent();
167 // Always expand zero strided loads so we match more .vx splat patterns, even if
168 // we have +optimized-zero-stride-loads. RISCVDAGToDAGISel::Select will convert
169 // it back to a strided load if it's optimized.
170 bool RISCVCodeGenPrepare::expandVPStrideLoad(IntrinsicInst
&II
) {
173 using namespace PatternMatch
;
174 if (!match(&II
, m_Intrinsic
<Intrinsic::experimental_vp_strided_load
>(
175 m_Value(BasePtr
), m_Zero(), m_AllOnes(), m_Value(VL
))))
178 // If SEW>XLEN then a splat will get lowered as a zero strided load anyway, so
179 // avoid expanding here.
180 if (II
.getType()->getScalarSizeInBits() > ST
->getXLen())
183 if (!isKnownNonZero(VL
, {*DL
, DT
, nullptr, &II
}))
186 auto *VTy
= cast
<VectorType
>(II
.getType());
188 IRBuilder
<> Builder(&II
);
189 Type
*STy
= VTy
->getElementType();
190 Value
*Val
= Builder
.CreateLoad(STy
, BasePtr
);
191 Value
*Res
= Builder
.CreateIntrinsic(Intrinsic::experimental_vp_splat
, {VTy
},
192 {Val
, II
.getOperand(2), VL
});
194 II
.replaceAllUsesWith(Res
);
195 II
.eraseFromParent();
199 bool RISCVCodeGenPrepare::runOnFunction(Function
&F
) {
203 auto &TPC
= getAnalysis
<TargetPassConfig
>();
204 auto &TM
= TPC
.getTM
<RISCVTargetMachine
>();
205 ST
= &TM
.getSubtarget
<RISCVSubtarget
>(F
);
207 DL
= &F
.getDataLayout();
208 DT
= &getAnalysis
<DominatorTreeWrapperPass
>().getDomTree();
210 bool MadeChange
= false;
212 for (Instruction
&I
: llvm::make_early_inc_range(BB
))
213 MadeChange
|= visit(I
);
218 INITIALIZE_PASS_BEGIN(RISCVCodeGenPrepare
, DEBUG_TYPE
, PASS_NAME
, false, false)
219 INITIALIZE_PASS_DEPENDENCY(TargetPassConfig
)
220 INITIALIZE_PASS_END(RISCVCodeGenPrepare
, DEBUG_TYPE
, PASS_NAME
, false, false)
222 char RISCVCodeGenPrepare::ID
= 0;
224 FunctionPass
*llvm::createRISCVCodeGenPreparePass() {
225 return new RISCVCodeGenPrepare();