1 //===- AArch64TargetTransformInfo.h - AArch64 specific TTI ------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 /// This file a TargetTransformInfo::Concept conforming object specific to the
10 /// AArch64 target machine. It uses the target's detailed information to
11 /// provide more precise answers to certain TTI queries, while letting the
12 /// target independent and default TTI implementations handle the rest.
14 //===----------------------------------------------------------------------===//
16 #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H
17 #define LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H
20 #include "AArch64Subtarget.h"
21 #include "AArch64TargetMachine.h"
22 #include "llvm/ADT/ArrayRef.h"
23 #include "llvm/Analysis/TargetTransformInfo.h"
24 #include "llvm/CodeGen/BasicTTIImpl.h"
25 #include "llvm/IR/Function.h"
26 #include "llvm/IR/Intrinsics.h"
37 class ScalarEvolution
;
42 class AArch64TTIImpl
: public BasicTTIImplBase
<AArch64TTIImpl
> {
43 using BaseT
= BasicTTIImplBase
<AArch64TTIImpl
>;
44 using TTI
= TargetTransformInfo
;
48 const AArch64Subtarget
*ST
;
49 const AArch64TargetLowering
*TLI
;
51 const AArch64Subtarget
*getST() const { return ST
; }
52 const AArch64TargetLowering
*getTLI() const { return TLI
; }
54 enum MemIntrinsicType
{
55 VECTOR_LDST_TWO_ELEMENTS
,
56 VECTOR_LDST_THREE_ELEMENTS
,
57 VECTOR_LDST_FOUR_ELEMENTS
60 bool isWideningInstruction(Type
*DstTy
, unsigned Opcode
,
61 ArrayRef
<const Value
*> Args
,
62 Type
*SrcOverrideTy
= nullptr);
64 // A helper function called by 'getVectorInstrCost'.
66 // 'Val' and 'Index' are forwarded from 'getVectorInstrCost'; 'HasRealUse'
67 // indicates whether the vector instruction is available in the input IR or
68 // just imaginary in vectorizer passes.
69 InstructionCost
getVectorInstrCostHelper(const Instruction
*I
, Type
*Val
,
70 unsigned Index
, bool HasRealUse
);
73 explicit AArch64TTIImpl(const AArch64TargetMachine
*TM
, const Function
&F
)
74 : BaseT(TM
, F
.getParent()->getDataLayout()), ST(TM
->getSubtargetImpl(F
)),
75 TLI(ST
->getTargetLowering()) {}
77 bool areInlineCompatible(const Function
*Caller
,
78 const Function
*Callee
) const;
80 bool areTypesABICompatible(const Function
*Caller
, const Function
*Callee
,
81 const ArrayRef
<Type
*> &Types
) const;
83 unsigned getInlineCallPenalty(const Function
*F
, const CallBase
&Call
,
84 unsigned DefaultCallPenalty
) const;
86 /// \name Scalar TTI Implementations
89 using BaseT::getIntImmCost
;
90 InstructionCost
getIntImmCost(int64_t Val
);
91 InstructionCost
getIntImmCost(const APInt
&Imm
, Type
*Ty
,
92 TTI::TargetCostKind CostKind
);
93 InstructionCost
getIntImmCostInst(unsigned Opcode
, unsigned Idx
,
94 const APInt
&Imm
, Type
*Ty
,
95 TTI::TargetCostKind CostKind
,
96 Instruction
*Inst
= nullptr);
97 InstructionCost
getIntImmCostIntrin(Intrinsic::ID IID
, unsigned Idx
,
98 const APInt
&Imm
, Type
*Ty
,
99 TTI::TargetCostKind CostKind
);
100 TTI::PopcntSupportKind
getPopcntSupport(unsigned TyWidth
);
104 /// \name Vector TTI Implementations
107 bool enableInterleavedAccessVectorization() { return true; }
109 bool enableMaskedInterleavedAccessVectorization() { return ST
->hasSVE(); }
111 unsigned getNumberOfRegisters(unsigned ClassID
) const {
112 bool Vector
= (ClassID
== 1);
121 InstructionCost
getIntrinsicInstrCost(const IntrinsicCostAttributes
&ICA
,
122 TTI::TargetCostKind CostKind
);
124 std::optional
<Instruction
*> instCombineIntrinsic(InstCombiner
&IC
,
125 IntrinsicInst
&II
) const;
127 std::optional
<Value
*> simplifyDemandedVectorEltsIntrinsic(
128 InstCombiner
&IC
, IntrinsicInst
&II
, APInt DemandedElts
, APInt
&UndefElts
,
129 APInt
&UndefElts2
, APInt
&UndefElts3
,
130 std::function
<void(Instruction
*, unsigned, APInt
, APInt
&)>
131 SimplifyAndSetOp
) const;
133 TypeSize
getRegisterBitWidth(TargetTransformInfo::RegisterKind K
) const;
135 unsigned getMinVectorRegisterBitWidth() const {
136 return ST
->getMinVectorRegisterBitWidth();
139 std::optional
<unsigned> getVScaleForTuning() const {
140 return ST
->getVScaleForTuning();
143 bool isVScaleKnownToBeAPowerOfTwo() const { return true; }
145 bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K
) const;
147 /// Try to return an estimate cost factor that can be used as a multiplier
148 /// when scalarizing an operation for a vector with ElementCount \p VF.
149 /// For scalable vectors this currently takes the most pessimistic view based
150 /// upon the maximum possible value for vscale.
151 unsigned getMaxNumElements(ElementCount VF
) const {
152 if (!VF
.isScalable())
153 return VF
.getFixedValue();
155 return VF
.getKnownMinValue() * ST
->getVScaleForTuning();
158 unsigned getMaxInterleaveFactor(ElementCount VF
);
160 bool prefersVectorizedAddressing() const;
162 InstructionCost
getMaskedMemoryOpCost(unsigned Opcode
, Type
*Src
,
163 Align Alignment
, unsigned AddressSpace
,
164 TTI::TargetCostKind CostKind
);
166 InstructionCost
getGatherScatterOpCost(unsigned Opcode
, Type
*DataTy
,
167 const Value
*Ptr
, bool VariableMask
,
169 TTI::TargetCostKind CostKind
,
170 const Instruction
*I
= nullptr);
172 bool isExtPartOfAvgExpr(const Instruction
*ExtUser
, Type
*Dst
, Type
*Src
);
174 InstructionCost
getCastInstrCost(unsigned Opcode
, Type
*Dst
, Type
*Src
,
175 TTI::CastContextHint CCH
,
176 TTI::TargetCostKind CostKind
,
177 const Instruction
*I
= nullptr);
179 InstructionCost
getExtractWithExtendCost(unsigned Opcode
, Type
*Dst
,
180 VectorType
*VecTy
, unsigned Index
);
182 InstructionCost
getCFInstrCost(unsigned Opcode
, TTI::TargetCostKind CostKind
,
183 const Instruction
*I
= nullptr);
185 InstructionCost
getVectorInstrCost(unsigned Opcode
, Type
*Val
,
186 TTI::TargetCostKind CostKind
,
187 unsigned Index
, Value
*Op0
, Value
*Op1
);
188 InstructionCost
getVectorInstrCost(const Instruction
&I
, Type
*Val
,
189 TTI::TargetCostKind CostKind
,
192 InstructionCost
getMinMaxReductionCost(Intrinsic::ID IID
, VectorType
*Ty
,
194 TTI::TargetCostKind CostKind
);
196 InstructionCost
getArithmeticReductionCostSVE(unsigned Opcode
,
198 TTI::TargetCostKind CostKind
);
200 InstructionCost
getSpliceCost(VectorType
*Tp
, int Index
);
202 InstructionCost
getArithmeticInstrCost(
203 unsigned Opcode
, Type
*Ty
, TTI::TargetCostKind CostKind
,
204 TTI::OperandValueInfo Op1Info
= {TTI::OK_AnyValue
, TTI::OP_None
},
205 TTI::OperandValueInfo Op2Info
= {TTI::OK_AnyValue
, TTI::OP_None
},
206 ArrayRef
<const Value
*> Args
= ArrayRef
<const Value
*>(),
207 const Instruction
*CxtI
= nullptr);
209 InstructionCost
getAddressComputationCost(Type
*Ty
, ScalarEvolution
*SE
,
212 InstructionCost
getCmpSelInstrCost(unsigned Opcode
, Type
*ValTy
, Type
*CondTy
,
213 CmpInst::Predicate VecPred
,
214 TTI::TargetCostKind CostKind
,
215 const Instruction
*I
= nullptr);
217 TTI::MemCmpExpansionOptions
enableMemCmpExpansion(bool OptSize
,
218 bool IsZeroCmp
) const;
219 bool useNeonVector(const Type
*Ty
) const;
222 getMemoryOpCost(unsigned Opcode
, Type
*Src
, MaybeAlign Alignment
,
223 unsigned AddressSpace
, TTI::TargetCostKind CostKind
,
224 TTI::OperandValueInfo OpInfo
= {TTI::OK_AnyValue
, TTI::OP_None
},
225 const Instruction
*I
= nullptr);
227 InstructionCost
getCostOfKeepingLiveOverCall(ArrayRef
<Type
*> Tys
);
229 void getUnrollingPreferences(Loop
*L
, ScalarEvolution
&SE
,
230 TTI::UnrollingPreferences
&UP
,
231 OptimizationRemarkEmitter
*ORE
);
233 void getPeelingPreferences(Loop
*L
, ScalarEvolution
&SE
,
234 TTI::PeelingPreferences
&PP
);
236 Value
*getOrCreateResultFromMemIntrinsic(IntrinsicInst
*Inst
,
239 bool getTgtMemIntrinsic(IntrinsicInst
*Inst
, MemIntrinsicInfo
&Info
);
241 bool isElementTypeLegalForScalableVector(Type
*Ty
) const {
242 if (Ty
->isPointerTy())
245 if (Ty
->isBFloatTy() && ST
->hasBF16())
248 if (Ty
->isHalfTy() || Ty
->isFloatTy() || Ty
->isDoubleTy())
251 if (Ty
->isIntegerTy(8) || Ty
->isIntegerTy(16) ||
252 Ty
->isIntegerTy(32) || Ty
->isIntegerTy(64))
258 bool isLegalMaskedLoadStore(Type
*DataType
, Align Alignment
) {
262 // For fixed vectors, avoid scalarization if using SVE for them.
263 if (isa
<FixedVectorType
>(DataType
) && !ST
->useSVEForFixedLengthVectors() &&
264 DataType
->getPrimitiveSizeInBits() != 128)
265 return false; // Fall back to scalarization of masked operations.
267 return isElementTypeLegalForScalableVector(DataType
->getScalarType());
270 bool isLegalMaskedLoad(Type
*DataType
, Align Alignment
) {
271 return isLegalMaskedLoadStore(DataType
, Alignment
);
274 bool isLegalMaskedStore(Type
*DataType
, Align Alignment
) {
275 return isLegalMaskedLoadStore(DataType
, Alignment
);
278 bool isLegalMaskedGatherScatter(Type
*DataType
) const {
279 if (!ST
->hasSVE() || !ST
->isNeonAvailable())
282 // For fixed vectors, scalarize if not using SVE for them.
283 auto *DataTypeFVTy
= dyn_cast
<FixedVectorType
>(DataType
);
284 if (DataTypeFVTy
&& (!ST
->useSVEForFixedLengthVectors() ||
285 DataTypeFVTy
->getNumElements() < 2))
288 return isElementTypeLegalForScalableVector(DataType
->getScalarType());
291 bool isLegalMaskedGather(Type
*DataType
, Align Alignment
) const {
292 return isLegalMaskedGatherScatter(DataType
);
295 bool isLegalMaskedScatter(Type
*DataType
, Align Alignment
) const {
296 return isLegalMaskedGatherScatter(DataType
);
299 bool isLegalBroadcastLoad(Type
*ElementTy
, ElementCount NumElements
) const {
300 // Return true if we can generate a `ld1r` splat load instruction.
301 if (!ST
->hasNEON() || NumElements
.isScalable())
303 switch (unsigned ElementBits
= ElementTy
->getScalarSizeInBits()) {
308 // We accept bit-widths >= 64bits and elements {8,16,32,64} bits.
309 unsigned VectorBits
= NumElements
.getFixedValue() * ElementBits
;
310 return VectorBits
>= 64;
316 bool isLegalNTStoreLoad(Type
*DataType
, Align Alignment
) {
317 // NOTE: The logic below is mostly geared towards LV, which calls it with
318 // vectors with 2 elements. We might want to improve that, if other
320 // Nontemporal vector loads/stores can be directly lowered to LDNP/STNP, if
321 // the vector can be halved so that each half fits into a register. That's
322 // the case if the element type fits into a register and the number of
323 // elements is a power of 2 > 1.
324 if (auto *DataTypeTy
= dyn_cast
<FixedVectorType
>(DataType
)) {
325 unsigned NumElements
= DataTypeTy
->getNumElements();
326 unsigned EltSize
= DataTypeTy
->getElementType()->getScalarSizeInBits();
327 return NumElements
> 1 && isPowerOf2_64(NumElements
) && EltSize
>= 8 &&
328 EltSize
<= 128 && isPowerOf2_64(EltSize
);
330 return BaseT::isLegalNTStore(DataType
, Alignment
);
333 bool isLegalNTStore(Type
*DataType
, Align Alignment
) {
334 return isLegalNTStoreLoad(DataType
, Alignment
);
337 bool isLegalNTLoad(Type
*DataType
, Align Alignment
) {
338 // Only supports little-endian targets.
339 if (ST
->isLittleEndian())
340 return isLegalNTStoreLoad(DataType
, Alignment
);
341 return BaseT::isLegalNTLoad(DataType
, Alignment
);
344 bool enableOrderedReductions() const { return true; }
346 InstructionCost
getInterleavedMemoryOpCost(
347 unsigned Opcode
, Type
*VecTy
, unsigned Factor
, ArrayRef
<unsigned> Indices
,
348 Align Alignment
, unsigned AddressSpace
, TTI::TargetCostKind CostKind
,
349 bool UseMaskForCond
= false, bool UseMaskForGaps
= false);
352 shouldConsiderAddressTypePromotion(const Instruction
&I
,
353 bool &AllowPromotionWithoutCommonHeader
);
355 bool shouldExpandReduction(const IntrinsicInst
*II
) const { return false; }
357 unsigned getGISelRematGlobalCost() const {
361 unsigned getMinTripCountTailFoldingThreshold() const {
362 return ST
->hasSVE() ? 5 : 0;
365 TailFoldingStyle
getPreferredTailFoldingStyle(bool IVUpdateMayOverflow
) const {
367 return IVUpdateMayOverflow
368 ? TailFoldingStyle::DataAndControlFlowWithoutRuntimeCheck
369 : TailFoldingStyle::DataAndControlFlow
;
371 return TailFoldingStyle::DataWithoutLaneMask
;
374 bool preferPredicateOverEpilogue(TailFoldingInfo
*TFI
);
376 bool supportsScalableVectors() const { return ST
->hasSVE(); }
378 bool enableScalableVectorization() const { return ST
->hasSVE(); }
380 bool isLegalToVectorizeReduction(const RecurrenceDescriptor
&RdxDesc
,
381 ElementCount VF
) const;
383 bool preferPredicatedReductionSelect(unsigned Opcode
, Type
*Ty
,
384 TTI::ReductionFlags Flags
) const {
388 InstructionCost
getArithmeticReductionCost(unsigned Opcode
, VectorType
*Ty
,
389 std::optional
<FastMathFlags
> FMF
,
390 TTI::TargetCostKind CostKind
);
392 InstructionCost
getShuffleCost(TTI::ShuffleKind Kind
, VectorType
*Tp
,
394 TTI::TargetCostKind CostKind
, int Index
,
396 ArrayRef
<const Value
*> Args
= std::nullopt
);
398 InstructionCost
getScalarizationOverhead(VectorType
*Ty
,
399 const APInt
&DemandedElts
,
400 bool Insert
, bool Extract
,
401 TTI::TargetCostKind CostKind
);
403 /// Return the cost of the scaling factor used in the addressing
404 /// mode represented by AM for this target, for a load/store
405 /// of the specified type.
406 /// If the AM is supported, the return value must be >= 0.
407 /// If the AM is not supported, it returns a negative value.
408 InstructionCost
getScalingFactorCost(Type
*Ty
, GlobalValue
*BaseGV
,
409 int64_t BaseOffset
, bool HasBaseReg
,
410 int64_t Scale
, unsigned AddrSpace
) const;
413 bool enableSelectOptimize() { return ST
->enableSelectOptimize(); }
415 bool shouldTreatInstructionLikeSelect(const Instruction
*I
);
417 unsigned getStoreMinimumVF(unsigned VF
, Type
*ScalarMemTy
,
418 Type
*ScalarValTy
) const {
419 // We can vectorize store v4i8.
420 if (ScalarMemTy
->isIntegerTy(8) && isPowerOf2_32(VF
) && VF
>= 4)
423 return BaseT::getStoreMinimumVF(VF
, ScalarMemTy
, ScalarValTy
);
426 std::optional
<unsigned> getMinPageSize() const { return 4096; }
429 } // end namespace llvm
431 #endif // LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H