Revert r354244 "[DAGCombiner] Eliminate dead stores to stack."
[llvm-complete.git] / lib / Target / Hexagon / HexagonTargetTransformInfo.cpp
blobc817cb1842de6e70a8af2efa3ee98b59dc70896d
1 //===- HexagonTargetTransformInfo.cpp - Hexagon specific TTI pass ---------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 /// \file
8 /// This file implements a TargetTransformInfo analysis pass specific to the
9 /// Hexagon target machine. It uses the target's detailed information to provide
10 /// more precise answers to certain TTI queries, while letting the target
11 /// independent and default TTI implementations handle the rest.
12 ///
13 //===----------------------------------------------------------------------===//
15 #include "HexagonTargetTransformInfo.h"
16 #include "HexagonSubtarget.h"
17 #include "llvm/Analysis/TargetTransformInfo.h"
18 #include "llvm/CodeGen/ValueTypes.h"
19 #include "llvm/IR/InstrTypes.h"
20 #include "llvm/IR/Instructions.h"
21 #include "llvm/IR/User.h"
22 #include "llvm/Support/Casting.h"
23 #include "llvm/Support/CommandLine.h"
24 #include "llvm/Transforms/Utils/UnrollLoop.h"
26 using namespace llvm;
28 #define DEBUG_TYPE "hexagontti"
30 static cl::opt<bool> HexagonAutoHVX("hexagon-autohvx", cl::init(false),
31 cl::Hidden, cl::desc("Enable loop vectorizer for HVX"));
33 static cl::opt<bool> EmitLookupTables("hexagon-emit-lookup-tables",
34 cl::init(true), cl::Hidden,
35 cl::desc("Control lookup table emission on Hexagon target"));
37 // Constant "cost factor" to make floating point operations more expensive
38 // in terms of vectorization cost. This isn't the best way, but it should
39 // do. Ultimately, the cost should use cycles.
40 static const unsigned FloatFactor = 4;
42 bool HexagonTTIImpl::useHVX() const {
43 return ST.useHVXOps() && HexagonAutoHVX;
46 bool HexagonTTIImpl::isTypeForHVX(Type *VecTy) const {
47 assert(VecTy->isVectorTy());
48 // Avoid types like <2 x i32*>.
49 if (!cast<VectorType>(VecTy)->getElementType()->isIntegerTy())
50 return false;
51 EVT VecVT = EVT::getEVT(VecTy);
52 if (!VecVT.isSimple() || VecVT.getSizeInBits() <= 64)
53 return false;
54 if (ST.isHVXVectorType(VecVT.getSimpleVT()))
55 return true;
56 auto Action = TLI.getPreferredVectorAction(VecVT.getSimpleVT());
57 return Action == TargetLoweringBase::TypeWidenVector;
60 unsigned HexagonTTIImpl::getTypeNumElements(Type *Ty) const {
61 if (Ty->isVectorTy())
62 return Ty->getVectorNumElements();
63 assert((Ty->isIntegerTy() || Ty->isFloatingPointTy()) &&
64 "Expecting scalar type");
65 return 1;
68 TargetTransformInfo::PopcntSupportKind
69 HexagonTTIImpl::getPopcntSupport(unsigned IntTyWidthInBit) const {
70 // Return fast hardware support as every input < 64 bits will be promoted
71 // to 64 bits.
72 return TargetTransformInfo::PSK_FastHardware;
75 // The Hexagon target can unroll loops with run-time trip counts.
76 void HexagonTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
77 TTI::UnrollingPreferences &UP) {
78 UP.Runtime = UP.Partial = true;
79 // Only try to peel innermost loops with small runtime trip counts.
80 if (L && L->empty() && canPeel(L) &&
81 SE.getSmallConstantTripCount(L) == 0 &&
82 SE.getSmallConstantMaxTripCount(L) > 0 &&
83 SE.getSmallConstantMaxTripCount(L) <= 5) {
84 UP.PeelCount = 2;
88 bool HexagonTTIImpl::shouldFavorPostInc() const {
89 return true;
92 /// --- Vector TTI begin ---
94 unsigned HexagonTTIImpl::getNumberOfRegisters(bool Vector) const {
95 if (Vector)
96 return useHVX() ? 32 : 0;
97 return 32;
100 unsigned HexagonTTIImpl::getMaxInterleaveFactor(unsigned VF) {
101 return useHVX() ? 2 : 0;
104 unsigned HexagonTTIImpl::getRegisterBitWidth(bool Vector) const {
105 return Vector ? getMinVectorRegisterBitWidth() : 32;
108 unsigned HexagonTTIImpl::getMinVectorRegisterBitWidth() const {
109 return useHVX() ? ST.getVectorLength()*8 : 0;
112 unsigned HexagonTTIImpl::getMinimumVF(unsigned ElemWidth) const {
113 return (8 * ST.getVectorLength()) / ElemWidth;
116 unsigned HexagonTTIImpl::getScalarizationOverhead(Type *Ty, bool Insert,
117 bool Extract) {
118 return BaseT::getScalarizationOverhead(Ty, Insert, Extract);
121 unsigned HexagonTTIImpl::getOperandsScalarizationOverhead(
122 ArrayRef<const Value*> Args, unsigned VF) {
123 return BaseT::getOperandsScalarizationOverhead(Args, VF);
126 unsigned HexagonTTIImpl::getCallInstrCost(Function *F, Type *RetTy,
127 ArrayRef<Type*> Tys) {
128 return BaseT::getCallInstrCost(F, RetTy, Tys);
131 unsigned HexagonTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
132 ArrayRef<Value*> Args, FastMathFlags FMF, unsigned VF) {
133 return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF);
136 unsigned HexagonTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
137 ArrayRef<Type*> Tys, FastMathFlags FMF,
138 unsigned ScalarizationCostPassed) {
139 if (ID == Intrinsic::bswap) {
140 std::pair<int, MVT> LT = TLI.getTypeLegalizationCost(DL, RetTy);
141 return LT.first + 2;
143 return BaseT::getIntrinsicInstrCost(ID, RetTy, Tys, FMF,
144 ScalarizationCostPassed);
147 unsigned HexagonTTIImpl::getAddressComputationCost(Type *Tp,
148 ScalarEvolution *SE, const SCEV *S) {
149 return 0;
152 unsigned HexagonTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
153 unsigned Alignment, unsigned AddressSpace, const Instruction *I) {
154 assert(Opcode == Instruction::Load || Opcode == Instruction::Store);
155 if (Opcode == Instruction::Store)
156 return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I);
158 if (Src->isVectorTy()) {
159 VectorType *VecTy = cast<VectorType>(Src);
160 unsigned VecWidth = VecTy->getBitWidth();
161 if (useHVX() && isTypeForHVX(VecTy)) {
162 unsigned RegWidth = getRegisterBitWidth(true);
163 Alignment = std::min(Alignment, RegWidth/8);
164 // Cost of HVX loads.
165 if (VecWidth % RegWidth == 0)
166 return VecWidth / RegWidth;
167 // Cost of constructing HVX vector from scalar loads.
168 unsigned AlignWidth = 8 * std::max(1u, Alignment);
169 unsigned NumLoads = alignTo(VecWidth, AlignWidth) / AlignWidth;
170 return 3*NumLoads;
173 // Non-HVX vectors.
174 // Add extra cost for floating point types.
175 unsigned Cost = VecTy->getElementType()->isFloatingPointTy() ? FloatFactor
176 : 1;
177 Alignment = std::min(Alignment, 8u);
178 unsigned AlignWidth = 8 * std::max(1u, Alignment);
179 unsigned NumLoads = alignTo(VecWidth, AlignWidth) / AlignWidth;
180 if (Alignment == 4 || Alignment == 8)
181 return Cost * NumLoads;
182 // Loads of less than 32 bits will need extra inserts to compose a vector.
183 unsigned LogA = Log2_32(Alignment);
184 return (3 - LogA) * Cost * NumLoads;
187 return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I);
190 unsigned HexagonTTIImpl::getMaskedMemoryOpCost(unsigned Opcode,
191 Type *Src, unsigned Alignment, unsigned AddressSpace) {
192 return BaseT::getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
195 unsigned HexagonTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp,
196 int Index, Type *SubTp) {
197 return 1;
200 unsigned HexagonTTIImpl::getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
201 Value *Ptr, bool VariableMask, unsigned Alignment) {
202 return BaseT::getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
203 Alignment);
206 unsigned HexagonTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode,
207 Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
208 unsigned Alignment, unsigned AddressSpace, bool UseMaskForCond,
209 bool UseMaskForGaps) {
210 if (Indices.size() != Factor || UseMaskForCond || UseMaskForGaps)
211 return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
212 Alignment, AddressSpace,
213 UseMaskForCond, UseMaskForGaps);
214 return getMemoryOpCost(Opcode, VecTy, Alignment, AddressSpace, nullptr);
217 unsigned HexagonTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
218 Type *CondTy, const Instruction *I) {
219 if (ValTy->isVectorTy()) {
220 std::pair<int, MVT> LT = TLI.getTypeLegalizationCost(DL, ValTy);
221 if (Opcode == Instruction::FCmp)
222 return LT.first + FloatFactor * getTypeNumElements(ValTy);
224 return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
227 unsigned HexagonTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
228 TTI::OperandValueKind Opd1Info, TTI::OperandValueKind Opd2Info,
229 TTI::OperandValueProperties Opd1PropInfo,
230 TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value*> Args) {
231 if (Ty->isVectorTy()) {
232 std::pair<int, MVT> LT = TLI.getTypeLegalizationCost(DL, Ty);
233 if (LT.second.isFloatingPoint())
234 return LT.first + FloatFactor * getTypeNumElements(Ty);
236 return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
237 Opd1PropInfo, Opd2PropInfo, Args);
240 unsigned HexagonTTIImpl::getCastInstrCost(unsigned Opcode, Type *DstTy,
241 Type *SrcTy, const Instruction *I) {
242 if (SrcTy->isFPOrFPVectorTy() || DstTy->isFPOrFPVectorTy()) {
243 unsigned SrcN = SrcTy->isFPOrFPVectorTy() ? getTypeNumElements(SrcTy) : 0;
244 unsigned DstN = DstTy->isFPOrFPVectorTy() ? getTypeNumElements(DstTy) : 0;
246 std::pair<int, MVT> SrcLT = TLI.getTypeLegalizationCost(DL, SrcTy);
247 std::pair<int, MVT> DstLT = TLI.getTypeLegalizationCost(DL, DstTy);
248 return std::max(SrcLT.first, DstLT.first) + FloatFactor * (SrcN + DstN);
250 return 1;
253 unsigned HexagonTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
254 unsigned Index) {
255 Type *ElemTy = Val->isVectorTy() ? cast<VectorType>(Val)->getElementType()
256 : Val;
257 if (Opcode == Instruction::InsertElement) {
258 // Need two rotations for non-zero index.
259 unsigned Cost = (Index != 0) ? 2 : 0;
260 if (ElemTy->isIntegerTy(32))
261 return Cost;
262 // If it's not a 32-bit value, there will need to be an extract.
263 return Cost + getVectorInstrCost(Instruction::ExtractElement, Val, Index);
266 if (Opcode == Instruction::ExtractElement)
267 return 2;
269 return 1;
272 /// --- Vector TTI end ---
274 unsigned HexagonTTIImpl::getPrefetchDistance() const {
275 return ST.getL1PrefetchDistance();
278 unsigned HexagonTTIImpl::getCacheLineSize() const {
279 return ST.getL1CacheLineSize();
282 int HexagonTTIImpl::getUserCost(const User *U,
283 ArrayRef<const Value *> Operands) {
284 auto isCastFoldedIntoLoad = [this](const CastInst *CI) -> bool {
285 if (!CI->isIntegerCast())
286 return false;
287 // Only extensions from an integer type shorter than 32-bit to i32
288 // can be folded into the load.
289 const DataLayout &DL = getDataLayout();
290 unsigned SBW = DL.getTypeSizeInBits(CI->getSrcTy());
291 unsigned DBW = DL.getTypeSizeInBits(CI->getDestTy());
292 if (DBW != 32 || SBW >= DBW)
293 return false;
295 const LoadInst *LI = dyn_cast<const LoadInst>(CI->getOperand(0));
296 // Technically, this code could allow multiple uses of the load, and
297 // check if all the uses are the same extension operation, but this
298 // should be sufficient for most cases.
299 return LI && LI->hasOneUse();
302 if (const CastInst *CI = dyn_cast<const CastInst>(U))
303 if (isCastFoldedIntoLoad(CI))
304 return TargetTransformInfo::TCC_Free;
305 return BaseT::getUserCost(U, Operands);
308 bool HexagonTTIImpl::shouldBuildLookupTables() const {
309 return EmitLookupTables;