1 //===- HexagonTargetTransformInfo.cpp - Hexagon specific TTI pass ---------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8 /// This file implements a TargetTransformInfo analysis pass specific to the
9 /// Hexagon target machine. It uses the target's detailed information to provide
10 /// more precise answers to certain TTI queries, while letting the target
11 /// independent and default TTI implementations handle the rest.
13 //===----------------------------------------------------------------------===//
15 #include "HexagonTargetTransformInfo.h"
16 #include "HexagonSubtarget.h"
17 #include "llvm/Analysis/TargetTransformInfo.h"
18 #include "llvm/CodeGen/ValueTypes.h"
19 #include "llvm/IR/InstrTypes.h"
20 #include "llvm/IR/Instructions.h"
21 #include "llvm/IR/User.h"
22 #include "llvm/Support/Casting.h"
23 #include "llvm/Support/CommandLine.h"
24 #include "llvm/Transforms/Utils/LoopPeel.h"
25 #include "llvm/Transforms/Utils/UnrollLoop.h"
29 #define DEBUG_TYPE "hexagontti"
31 static cl::opt
<bool> HexagonAutoHVX("hexagon-autohvx", cl::init(false),
32 cl::Hidden
, cl::desc("Enable loop vectorizer for HVX"));
34 static cl::opt
<bool> EnableV68FloatAutoHVX(
35 "force-hvx-float", cl::Hidden
,
36 cl::desc("Enable auto-vectorization of floatint point types on v68."));
38 static cl::opt
<bool> EmitLookupTables("hexagon-emit-lookup-tables",
39 cl::init(true), cl::Hidden
,
40 cl::desc("Control lookup table emission on Hexagon target"));
42 static cl::opt
<bool> HexagonMaskedVMem("hexagon-masked-vmem", cl::init(true),
43 cl::Hidden
, cl::desc("Enable masked loads/stores for HVX"));
45 // Constant "cost factor" to make floating point operations more expensive
46 // in terms of vectorization cost. This isn't the best way, but it should
47 // do. Ultimately, the cost should use cycles.
48 static const unsigned FloatFactor
= 4;
50 bool HexagonTTIImpl::useHVX() const {
51 return ST
.useHVXOps() && HexagonAutoHVX
;
54 bool HexagonTTIImpl::isHVXVectorType(Type
*Ty
) const {
55 auto *VecTy
= dyn_cast
<VectorType
>(Ty
);
58 if (!ST
.isTypeForHVX(VecTy
))
60 if (ST
.useHVXV69Ops() || !VecTy
->getElementType()->isFloatingPointTy())
62 return ST
.useHVXV68Ops() && EnableV68FloatAutoHVX
;
65 unsigned HexagonTTIImpl::getTypeNumElements(Type
*Ty
) const {
66 if (auto *VTy
= dyn_cast
<FixedVectorType
>(Ty
))
67 return VTy
->getNumElements();
68 assert((Ty
->isIntegerTy() || Ty
->isFloatingPointTy()) &&
69 "Expecting scalar type");
73 TargetTransformInfo::PopcntSupportKind
74 HexagonTTIImpl::getPopcntSupport(unsigned IntTyWidthInBit
) const {
75 // Return fast hardware support as every input < 64 bits will be promoted
77 return TargetTransformInfo::PSK_FastHardware
;
80 // The Hexagon target can unroll loops with run-time trip counts.
81 void HexagonTTIImpl::getUnrollingPreferences(Loop
*L
, ScalarEvolution
&SE
,
82 TTI::UnrollingPreferences
&UP
,
83 OptimizationRemarkEmitter
*ORE
) {
84 UP
.Runtime
= UP
.Partial
= true;
87 void HexagonTTIImpl::getPeelingPreferences(Loop
*L
, ScalarEvolution
&SE
,
88 TTI::PeelingPreferences
&PP
) {
89 BaseT::getPeelingPreferences(L
, SE
, PP
);
90 // Only try to peel innermost loops with small runtime trip counts.
91 if (L
&& L
->isInnermost() && canPeel(L
) &&
92 SE
.getSmallConstantTripCount(L
) == 0 &&
93 SE
.getSmallConstantMaxTripCount(L
) > 0 &&
94 SE
.getSmallConstantMaxTripCount(L
) <= 5) {
99 TTI::AddressingModeKind
100 HexagonTTIImpl::getPreferredAddressingMode(const Loop
*L
,
101 ScalarEvolution
*SE
) const {
102 return TTI::AMK_PostIndexed
;
105 /// --- Vector TTI begin ---
107 unsigned HexagonTTIImpl::getNumberOfRegisters(bool Vector
) const {
109 return useHVX() ? 32 : 0;
113 unsigned HexagonTTIImpl::getMaxInterleaveFactor(ElementCount VF
) {
114 return useHVX() ? 2 : 1;
118 HexagonTTIImpl::getRegisterBitWidth(TargetTransformInfo::RegisterKind K
) const {
120 case TargetTransformInfo::RGK_Scalar
:
121 return TypeSize::getFixed(32);
122 case TargetTransformInfo::RGK_FixedWidthVector
:
123 return TypeSize::getFixed(getMinVectorRegisterBitWidth());
124 case TargetTransformInfo::RGK_ScalableVector
:
125 return TypeSize::getScalable(0);
128 llvm_unreachable("Unsupported register kind");
131 unsigned HexagonTTIImpl::getMinVectorRegisterBitWidth() const {
132 return useHVX() ? ST
.getVectorLength()*8 : 32;
135 ElementCount
HexagonTTIImpl::getMinimumVF(unsigned ElemWidth
,
136 bool IsScalable
) const {
137 assert(!IsScalable
&& "Scalable VFs are not supported for Hexagon");
138 return ElementCount::getFixed((8 * ST
.getVectorLength()) / ElemWidth
);
141 InstructionCost
HexagonTTIImpl::getCallInstrCost(Function
*F
, Type
*RetTy
,
142 ArrayRef
<Type
*> Tys
,
143 TTI::TargetCostKind CostKind
) {
144 return BaseT::getCallInstrCost(F
, RetTy
, Tys
, CostKind
);
148 HexagonTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes
&ICA
,
149 TTI::TargetCostKind CostKind
) {
150 if (ICA
.getID() == Intrinsic::bswap
) {
151 std::pair
<InstructionCost
, MVT
> LT
=
152 getTypeLegalizationCost(ICA
.getReturnType());
155 return BaseT::getIntrinsicInstrCost(ICA
, CostKind
);
158 InstructionCost
HexagonTTIImpl::getAddressComputationCost(Type
*Tp
,
164 InstructionCost
HexagonTTIImpl::getMemoryOpCost(unsigned Opcode
, Type
*Src
,
165 MaybeAlign Alignment
,
166 unsigned AddressSpace
,
167 TTI::TargetCostKind CostKind
,
168 TTI::OperandValueInfo OpInfo
,
169 const Instruction
*I
) {
170 assert(Opcode
== Instruction::Load
|| Opcode
== Instruction::Store
);
171 // TODO: Handle other cost kinds.
172 if (CostKind
!= TTI::TCK_RecipThroughput
)
175 if (Opcode
== Instruction::Store
)
176 return BaseT::getMemoryOpCost(Opcode
, Src
, Alignment
, AddressSpace
,
177 CostKind
, OpInfo
, I
);
179 if (Src
->isVectorTy()) {
180 VectorType
*VecTy
= cast
<VectorType
>(Src
);
181 unsigned VecWidth
= VecTy
->getPrimitiveSizeInBits().getFixedValue();
182 if (isHVXVectorType(VecTy
)) {
184 getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector
)
186 assert(RegWidth
&& "Non-zero vector register width expected");
187 // Cost of HVX loads.
188 if (VecWidth
% RegWidth
== 0)
189 return VecWidth
/ RegWidth
;
190 // Cost of constructing HVX vector from scalar loads
191 const Align
RegAlign(RegWidth
/ 8);
192 if (!Alignment
|| *Alignment
> RegAlign
)
193 Alignment
= RegAlign
;
195 unsigned AlignWidth
= 8 * Alignment
->value();
196 unsigned NumLoads
= alignTo(VecWidth
, AlignWidth
) / AlignWidth
;
201 // Add extra cost for floating point types.
203 VecTy
->getElementType()->isFloatingPointTy() ? FloatFactor
: 1;
205 // At this point unspecified alignment is considered as Align(1).
206 const Align BoundAlignment
= std::min(Alignment
.valueOrOne(), Align(8));
207 unsigned AlignWidth
= 8 * BoundAlignment
.value();
208 unsigned NumLoads
= alignTo(VecWidth
, AlignWidth
) / AlignWidth
;
209 if (Alignment
== Align(4) || Alignment
== Align(8))
210 return Cost
* NumLoads
;
211 // Loads of less than 32 bits will need extra inserts to compose a vector.
212 assert(BoundAlignment
<= Align(8));
213 unsigned LogA
= Log2(BoundAlignment
);
214 return (3 - LogA
) * Cost
* NumLoads
;
217 return BaseT::getMemoryOpCost(Opcode
, Src
, Alignment
, AddressSpace
, CostKind
,
222 HexagonTTIImpl::getMaskedMemoryOpCost(unsigned Opcode
, Type
*Src
,
223 Align Alignment
, unsigned AddressSpace
,
224 TTI::TargetCostKind CostKind
) {
225 return BaseT::getMaskedMemoryOpCost(Opcode
, Src
, Alignment
, AddressSpace
,
229 InstructionCost
HexagonTTIImpl::getShuffleCost(TTI::ShuffleKind Kind
, Type
*Tp
,
231 TTI::TargetCostKind CostKind
,
232 int Index
, Type
*SubTp
,
233 ArrayRef
<const Value
*> Args
,
234 const Instruction
*CxtI
) {
238 InstructionCost
HexagonTTIImpl::getGatherScatterOpCost(
239 unsigned Opcode
, Type
*DataTy
, const Value
*Ptr
, bool VariableMask
,
240 Align Alignment
, TTI::TargetCostKind CostKind
, const Instruction
*I
) {
241 return BaseT::getGatherScatterOpCost(Opcode
, DataTy
, Ptr
, VariableMask
,
242 Alignment
, CostKind
, I
);
245 InstructionCost
HexagonTTIImpl::getInterleavedMemoryOpCost(
246 unsigned Opcode
, Type
*VecTy
, unsigned Factor
, ArrayRef
<unsigned> Indices
,
247 Align Alignment
, unsigned AddressSpace
, TTI::TargetCostKind CostKind
,
248 bool UseMaskForCond
, bool UseMaskForGaps
) {
249 if (Indices
.size() != Factor
|| UseMaskForCond
|| UseMaskForGaps
)
250 return BaseT::getInterleavedMemoryOpCost(Opcode
, VecTy
, Factor
, Indices
,
251 Alignment
, AddressSpace
,
253 UseMaskForCond
, UseMaskForGaps
);
254 return getMemoryOpCost(Opcode
, VecTy
, MaybeAlign(Alignment
), AddressSpace
,
258 InstructionCost
HexagonTTIImpl::getCmpSelInstrCost(
259 unsigned Opcode
, Type
*ValTy
, Type
*CondTy
, CmpInst::Predicate VecPred
,
260 TTI::TargetCostKind CostKind
, TTI::OperandValueInfo Op1Info
,
261 TTI::OperandValueInfo Op2Info
, const Instruction
*I
) {
262 if (ValTy
->isVectorTy() && CostKind
== TTI::TCK_RecipThroughput
) {
263 if (!isHVXVectorType(ValTy
) && ValTy
->isFPOrFPVectorTy())
264 return InstructionCost::getMax();
265 std::pair
<InstructionCost
, MVT
> LT
= getTypeLegalizationCost(ValTy
);
266 if (Opcode
== Instruction::FCmp
)
267 return LT
.first
+ FloatFactor
* getTypeNumElements(ValTy
);
269 return BaseT::getCmpSelInstrCost(Opcode
, ValTy
, CondTy
, VecPred
, CostKind
,
270 Op1Info
, Op2Info
, I
);
273 InstructionCost
HexagonTTIImpl::getArithmeticInstrCost(
274 unsigned Opcode
, Type
*Ty
, TTI::TargetCostKind CostKind
,
275 TTI::OperandValueInfo Op1Info
, TTI::OperandValueInfo Op2Info
,
276 ArrayRef
<const Value
*> Args
,
277 const Instruction
*CxtI
) {
278 // TODO: Handle more cost kinds.
279 if (CostKind
!= TTI::TCK_RecipThroughput
)
280 return BaseT::getArithmeticInstrCost(Opcode
, Ty
, CostKind
, Op1Info
,
281 Op2Info
, Args
, CxtI
);
283 if (Ty
->isVectorTy()) {
284 if (!isHVXVectorType(Ty
) && Ty
->isFPOrFPVectorTy())
285 return InstructionCost::getMax();
286 std::pair
<InstructionCost
, MVT
> LT
= getTypeLegalizationCost(Ty
);
287 if (LT
.second
.isFloatingPoint())
288 return LT
.first
+ FloatFactor
* getTypeNumElements(Ty
);
290 return BaseT::getArithmeticInstrCost(Opcode
, Ty
, CostKind
, Op1Info
, Op2Info
,
294 InstructionCost
HexagonTTIImpl::getCastInstrCost(unsigned Opcode
, Type
*DstTy
,
296 TTI::CastContextHint CCH
,
297 TTI::TargetCostKind CostKind
,
298 const Instruction
*I
) {
299 auto isNonHVXFP
= [this] (Type
*Ty
) {
300 return Ty
->isVectorTy() && !isHVXVectorType(Ty
) && Ty
->isFPOrFPVectorTy();
302 if (isNonHVXFP(SrcTy
) || isNonHVXFP(DstTy
))
303 return InstructionCost::getMax();
305 if (SrcTy
->isFPOrFPVectorTy() || DstTy
->isFPOrFPVectorTy()) {
306 unsigned SrcN
= SrcTy
->isFPOrFPVectorTy() ? getTypeNumElements(SrcTy
) : 0;
307 unsigned DstN
= DstTy
->isFPOrFPVectorTy() ? getTypeNumElements(DstTy
) : 0;
309 std::pair
<InstructionCost
, MVT
> SrcLT
= getTypeLegalizationCost(SrcTy
);
310 std::pair
<InstructionCost
, MVT
> DstLT
= getTypeLegalizationCost(DstTy
);
311 InstructionCost Cost
=
312 std::max(SrcLT
.first
, DstLT
.first
) + FloatFactor
* (SrcN
+ DstN
);
313 // TODO: Allow non-throughput costs that aren't binary.
314 if (CostKind
!= TTI::TCK_RecipThroughput
)
315 return Cost
== 0 ? 0 : 1;
321 InstructionCost
HexagonTTIImpl::getVectorInstrCost(unsigned Opcode
, Type
*Val
,
322 TTI::TargetCostKind CostKind
,
323 unsigned Index
, Value
*Op0
,
325 Type
*ElemTy
= Val
->isVectorTy() ? cast
<VectorType
>(Val
)->getElementType()
327 if (Opcode
== Instruction::InsertElement
) {
328 // Need two rotations for non-zero index.
329 unsigned Cost
= (Index
!= 0) ? 2 : 0;
330 if (ElemTy
->isIntegerTy(32))
332 // If it's not a 32-bit value, there will need to be an extract.
333 return Cost
+ getVectorInstrCost(Instruction::ExtractElement
, Val
, CostKind
,
337 if (Opcode
== Instruction::ExtractElement
)
343 bool HexagonTTIImpl::isLegalMaskedStore(Type
*DataType
, Align
/*Alignment*/) {
344 // This function is called from scalarize-masked-mem-intrin, which runs
345 // in pre-isel. Use ST directly instead of calling isHVXVectorType.
346 return HexagonMaskedVMem
&& ST
.isTypeForHVX(DataType
);
349 bool HexagonTTIImpl::isLegalMaskedLoad(Type
*DataType
, Align
/*Alignment*/) {
350 // This function is called from scalarize-masked-mem-intrin, which runs
351 // in pre-isel. Use ST directly instead of calling isHVXVectorType.
352 return HexagonMaskedVMem
&& ST
.isTypeForHVX(DataType
);
355 /// --- Vector TTI end ---
357 unsigned HexagonTTIImpl::getPrefetchDistance() const {
358 return ST
.getL1PrefetchDistance();
361 unsigned HexagonTTIImpl::getCacheLineSize() const {
362 return ST
.getL1CacheLineSize();
366 HexagonTTIImpl::getInstructionCost(const User
*U
,
367 ArrayRef
<const Value
*> Operands
,
368 TTI::TargetCostKind CostKind
) {
369 auto isCastFoldedIntoLoad
= [this](const CastInst
*CI
) -> bool {
370 if (!CI
->isIntegerCast())
372 // Only extensions from an integer type shorter than 32-bit to i32
373 // can be folded into the load.
374 const DataLayout
&DL
= getDataLayout();
375 unsigned SBW
= DL
.getTypeSizeInBits(CI
->getSrcTy());
376 unsigned DBW
= DL
.getTypeSizeInBits(CI
->getDestTy());
377 if (DBW
!= 32 || SBW
>= DBW
)
380 const LoadInst
*LI
= dyn_cast
<const LoadInst
>(CI
->getOperand(0));
381 // Technically, this code could allow multiple uses of the load, and
382 // check if all the uses are the same extension operation, but this
383 // should be sufficient for most cases.
384 return LI
&& LI
->hasOneUse();
387 if (const CastInst
*CI
= dyn_cast
<const CastInst
>(U
))
388 if (isCastFoldedIntoLoad(CI
))
389 return TargetTransformInfo::TCC_Free
;
390 return BaseT::getInstructionCost(U
, Operands
, CostKind
);
393 bool HexagonTTIImpl::shouldBuildLookupTables() const {
394 return EmitLookupTables
;