1 //===- AArch64TargetTransformInfo.h - AArch64 specific TTI ------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 /// This file a TargetTransformInfo::Concept conforming object specific to the
10 /// AArch64 target machine. It uses the target's detailed information to
11 /// provide more precise answers to certain TTI queries, while letting the
12 /// target independent and default TTI implementations handle the rest.
14 //===----------------------------------------------------------------------===//
16 #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H
17 #define LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H
20 #include "AArch64Subtarget.h"
21 #include "AArch64TargetMachine.h"
22 #include "llvm/ADT/ArrayRef.h"
23 #include "llvm/Analysis/TargetTransformInfo.h"
24 #include "llvm/CodeGen/BasicTTIImpl.h"
25 #include "llvm/IR/Function.h"
26 #include "llvm/IR/Intrinsics.h"
37 class ScalarEvolution
;
42 class AArch64TTIImpl
: public BasicTTIImplBase
<AArch64TTIImpl
> {
43 using BaseT
= BasicTTIImplBase
<AArch64TTIImpl
>;
44 using TTI
= TargetTransformInfo
;
48 const AArch64Subtarget
*ST
;
49 const AArch64TargetLowering
*TLI
;
51 const AArch64Subtarget
*getST() const { return ST
; }
52 const AArch64TargetLowering
*getTLI() const { return TLI
; }
54 enum MemIntrinsicType
{
55 VECTOR_LDST_TWO_ELEMENTS
,
56 VECTOR_LDST_THREE_ELEMENTS
,
57 VECTOR_LDST_FOUR_ELEMENTS
60 bool isWideningInstruction(Type
*DstTy
, unsigned Opcode
,
61 ArrayRef
<const Value
*> Args
,
62 Type
*SrcOverrideTy
= nullptr);
64 // A helper function called by 'getVectorInstrCost'.
66 // 'Val' and 'Index' are forwarded from 'getVectorInstrCost'; 'HasRealUse'
67 // indicates whether the vector instruction is available in the input IR or
68 // just imaginary in vectorizer passes.
69 InstructionCost
getVectorInstrCostHelper(const Instruction
*I
, Type
*Val
,
70 unsigned Index
, bool HasRealUse
);
73 explicit AArch64TTIImpl(const AArch64TargetMachine
*TM
, const Function
&F
)
74 : BaseT(TM
, F
.getParent()->getDataLayout()), ST(TM
->getSubtargetImpl(F
)),
75 TLI(ST
->getTargetLowering()) {}
77 bool areInlineCompatible(const Function
*Caller
,
78 const Function
*Callee
) const;
80 bool areTypesABICompatible(const Function
*Caller
, const Function
*Callee
,
81 const ArrayRef
<Type
*> &Types
) const;
83 /// \name Scalar TTI Implementations
86 using BaseT::getIntImmCost
;
87 InstructionCost
getIntImmCost(int64_t Val
);
88 InstructionCost
getIntImmCost(const APInt
&Imm
, Type
*Ty
,
89 TTI::TargetCostKind CostKind
);
90 InstructionCost
getIntImmCostInst(unsigned Opcode
, unsigned Idx
,
91 const APInt
&Imm
, Type
*Ty
,
92 TTI::TargetCostKind CostKind
,
93 Instruction
*Inst
= nullptr);
94 InstructionCost
getIntImmCostIntrin(Intrinsic::ID IID
, unsigned Idx
,
95 const APInt
&Imm
, Type
*Ty
,
96 TTI::TargetCostKind CostKind
);
97 TTI::PopcntSupportKind
getPopcntSupport(unsigned TyWidth
);
101 /// \name Vector TTI Implementations
104 bool enableInterleavedAccessVectorization() { return true; }
106 bool enableMaskedInterleavedAccessVectorization() { return ST
->hasSVE(); }
108 unsigned getNumberOfRegisters(unsigned ClassID
) const {
109 bool Vector
= (ClassID
== 1);
118 InstructionCost
getIntrinsicInstrCost(const IntrinsicCostAttributes
&ICA
,
119 TTI::TargetCostKind CostKind
);
121 std::optional
<Instruction
*> instCombineIntrinsic(InstCombiner
&IC
,
122 IntrinsicInst
&II
) const;
124 std::optional
<Value
*> simplifyDemandedVectorEltsIntrinsic(
125 InstCombiner
&IC
, IntrinsicInst
&II
, APInt DemandedElts
, APInt
&UndefElts
,
126 APInt
&UndefElts2
, APInt
&UndefElts3
,
127 std::function
<void(Instruction
*, unsigned, APInt
, APInt
&)>
128 SimplifyAndSetOp
) const;
130 TypeSize
getRegisterBitWidth(TargetTransformInfo::RegisterKind K
) const;
132 unsigned getMinVectorRegisterBitWidth() const {
133 return ST
->getMinVectorRegisterBitWidth();
136 std::optional
<unsigned> getVScaleForTuning() const {
137 return ST
->getVScaleForTuning();
140 bool isVScaleKnownToBeAPowerOfTwo() const { return true; }
142 bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K
) const;
144 /// Try to return an estimate cost factor that can be used as a multiplier
145 /// when scalarizing an operation for a vector with ElementCount \p VF.
146 /// For scalable vectors this currently takes the most pessimistic view based
147 /// upon the maximum possible value for vscale.
148 unsigned getMaxNumElements(ElementCount VF
) const {
149 if (!VF
.isScalable())
150 return VF
.getFixedValue();
152 return VF
.getKnownMinValue() * ST
->getVScaleForTuning();
155 unsigned getMaxInterleaveFactor(ElementCount VF
);
157 bool prefersVectorizedAddressing() const;
159 InstructionCost
getMaskedMemoryOpCost(unsigned Opcode
, Type
*Src
,
160 Align Alignment
, unsigned AddressSpace
,
161 TTI::TargetCostKind CostKind
);
163 InstructionCost
getGatherScatterOpCost(unsigned Opcode
, Type
*DataTy
,
164 const Value
*Ptr
, bool VariableMask
,
166 TTI::TargetCostKind CostKind
,
167 const Instruction
*I
= nullptr);
169 bool isExtPartOfAvgExpr(const Instruction
*ExtUser
, Type
*Dst
, Type
*Src
);
171 InstructionCost
getCastInstrCost(unsigned Opcode
, Type
*Dst
, Type
*Src
,
172 TTI::CastContextHint CCH
,
173 TTI::TargetCostKind CostKind
,
174 const Instruction
*I
= nullptr);
176 InstructionCost
getExtractWithExtendCost(unsigned Opcode
, Type
*Dst
,
177 VectorType
*VecTy
, unsigned Index
);
179 InstructionCost
getCFInstrCost(unsigned Opcode
, TTI::TargetCostKind CostKind
,
180 const Instruction
*I
= nullptr);
182 InstructionCost
getVectorInstrCost(unsigned Opcode
, Type
*Val
,
183 TTI::TargetCostKind CostKind
,
184 unsigned Index
, Value
*Op0
, Value
*Op1
);
185 InstructionCost
getVectorInstrCost(const Instruction
&I
, Type
*Val
,
186 TTI::TargetCostKind CostKind
,
189 InstructionCost
getMinMaxReductionCost(Intrinsic::ID IID
, VectorType
*Ty
,
191 TTI::TargetCostKind CostKind
);
193 InstructionCost
getArithmeticReductionCostSVE(unsigned Opcode
,
195 TTI::TargetCostKind CostKind
);
197 InstructionCost
getSpliceCost(VectorType
*Tp
, int Index
);
199 InstructionCost
getArithmeticInstrCost(
200 unsigned Opcode
, Type
*Ty
, TTI::TargetCostKind CostKind
,
201 TTI::OperandValueInfo Op1Info
= {TTI::OK_AnyValue
, TTI::OP_None
},
202 TTI::OperandValueInfo Op2Info
= {TTI::OK_AnyValue
, TTI::OP_None
},
203 ArrayRef
<const Value
*> Args
= ArrayRef
<const Value
*>(),
204 const Instruction
*CxtI
= nullptr);
206 InstructionCost
getAddressComputationCost(Type
*Ty
, ScalarEvolution
*SE
,
209 InstructionCost
getCmpSelInstrCost(unsigned Opcode
, Type
*ValTy
, Type
*CondTy
,
210 CmpInst::Predicate VecPred
,
211 TTI::TargetCostKind CostKind
,
212 const Instruction
*I
= nullptr);
214 TTI::MemCmpExpansionOptions
enableMemCmpExpansion(bool OptSize
,
215 bool IsZeroCmp
) const;
216 bool useNeonVector(const Type
*Ty
) const;
219 getMemoryOpCost(unsigned Opcode
, Type
*Src
, MaybeAlign Alignment
,
220 unsigned AddressSpace
, TTI::TargetCostKind CostKind
,
221 TTI::OperandValueInfo OpInfo
= {TTI::OK_AnyValue
, TTI::OP_None
},
222 const Instruction
*I
= nullptr);
224 InstructionCost
getCostOfKeepingLiveOverCall(ArrayRef
<Type
*> Tys
);
226 void getUnrollingPreferences(Loop
*L
, ScalarEvolution
&SE
,
227 TTI::UnrollingPreferences
&UP
,
228 OptimizationRemarkEmitter
*ORE
);
230 void getPeelingPreferences(Loop
*L
, ScalarEvolution
&SE
,
231 TTI::PeelingPreferences
&PP
);
233 Value
*getOrCreateResultFromMemIntrinsic(IntrinsicInst
*Inst
,
236 bool getTgtMemIntrinsic(IntrinsicInst
*Inst
, MemIntrinsicInfo
&Info
);
238 bool isElementTypeLegalForScalableVector(Type
*Ty
) const {
239 if (Ty
->isPointerTy())
242 if (Ty
->isBFloatTy() && ST
->hasBF16())
245 if (Ty
->isHalfTy() || Ty
->isFloatTy() || Ty
->isDoubleTy())
248 if (Ty
->isIntegerTy(8) || Ty
->isIntegerTy(16) ||
249 Ty
->isIntegerTy(32) || Ty
->isIntegerTy(64))
255 bool isLegalMaskedLoadStore(Type
*DataType
, Align Alignment
) {
259 // For fixed vectors, avoid scalarization if using SVE for them.
260 if (isa
<FixedVectorType
>(DataType
) && !ST
->useSVEForFixedLengthVectors() &&
261 DataType
->getPrimitiveSizeInBits() != 128)
262 return false; // Fall back to scalarization of masked operations.
264 return isElementTypeLegalForScalableVector(DataType
->getScalarType());
267 bool isLegalMaskedLoad(Type
*DataType
, Align Alignment
) {
268 return isLegalMaskedLoadStore(DataType
, Alignment
);
271 bool isLegalMaskedStore(Type
*DataType
, Align Alignment
) {
272 return isLegalMaskedLoadStore(DataType
, Alignment
);
275 bool isLegalMaskedGatherScatter(Type
*DataType
) const {
276 if (!ST
->hasSVE() || !ST
->isNeonAvailable())
279 // For fixed vectors, scalarize if not using SVE for them.
280 auto *DataTypeFVTy
= dyn_cast
<FixedVectorType
>(DataType
);
281 if (DataTypeFVTy
&& (!ST
->useSVEForFixedLengthVectors() ||
282 DataTypeFVTy
->getNumElements() < 2))
285 return isElementTypeLegalForScalableVector(DataType
->getScalarType());
288 bool isLegalMaskedGather(Type
*DataType
, Align Alignment
) const {
289 return isLegalMaskedGatherScatter(DataType
);
291 bool isLegalMaskedScatter(Type
*DataType
, Align Alignment
) const {
292 return isLegalMaskedGatherScatter(DataType
);
295 bool isLegalBroadcastLoad(Type
*ElementTy
, ElementCount NumElements
) const {
296 // Return true if we can generate a `ld1r` splat load instruction.
297 if (!ST
->hasNEON() || NumElements
.isScalable())
299 switch (unsigned ElementBits
= ElementTy
->getScalarSizeInBits()) {
304 // We accept bit-widths >= 64bits and elements {8,16,32,64} bits.
305 unsigned VectorBits
= NumElements
.getFixedValue() * ElementBits
;
306 return VectorBits
>= 64;
312 bool isLegalNTStoreLoad(Type
*DataType
, Align Alignment
) {
313 // NOTE: The logic below is mostly geared towards LV, which calls it with
314 // vectors with 2 elements. We might want to improve that, if other
316 // Nontemporal vector loads/stores can be directly lowered to LDNP/STNP, if
317 // the vector can be halved so that each half fits into a register. That's
318 // the case if the element type fits into a register and the number of
319 // elements is a power of 2 > 1.
320 if (auto *DataTypeTy
= dyn_cast
<FixedVectorType
>(DataType
)) {
321 unsigned NumElements
= DataTypeTy
->getNumElements();
322 unsigned EltSize
= DataTypeTy
->getElementType()->getScalarSizeInBits();
323 return NumElements
> 1 && isPowerOf2_64(NumElements
) && EltSize
>= 8 &&
324 EltSize
<= 128 && isPowerOf2_64(EltSize
);
326 return BaseT::isLegalNTStore(DataType
, Alignment
);
329 bool isLegalNTStore(Type
*DataType
, Align Alignment
) {
330 return isLegalNTStoreLoad(DataType
, Alignment
);
333 bool isLegalNTLoad(Type
*DataType
, Align Alignment
) {
334 // Only supports little-endian targets.
335 if (ST
->isLittleEndian())
336 return isLegalNTStoreLoad(DataType
, Alignment
);
337 return BaseT::isLegalNTLoad(DataType
, Alignment
);
340 bool enableOrderedReductions() const { return true; }
342 InstructionCost
getInterleavedMemoryOpCost(
343 unsigned Opcode
, Type
*VecTy
, unsigned Factor
, ArrayRef
<unsigned> Indices
,
344 Align Alignment
, unsigned AddressSpace
, TTI::TargetCostKind CostKind
,
345 bool UseMaskForCond
= false, bool UseMaskForGaps
= false);
348 shouldConsiderAddressTypePromotion(const Instruction
&I
,
349 bool &AllowPromotionWithoutCommonHeader
);
351 bool shouldExpandReduction(const IntrinsicInst
*II
) const { return false; }
353 unsigned getGISelRematGlobalCost() const {
357 unsigned getMinTripCountTailFoldingThreshold() const {
358 return ST
->hasSVE() ? 5 : 0;
361 TailFoldingStyle
getPreferredTailFoldingStyle(bool IVUpdateMayOverflow
) const {
363 return IVUpdateMayOverflow
364 ? TailFoldingStyle::DataAndControlFlowWithoutRuntimeCheck
365 : TailFoldingStyle::DataAndControlFlow
;
367 return TailFoldingStyle::DataWithoutLaneMask
;
370 bool preferPredicateOverEpilogue(TailFoldingInfo
*TFI
);
372 bool supportsScalableVectors() const { return ST
->hasSVE(); }
374 bool enableScalableVectorization() const { return ST
->hasSVE(); }
376 bool isLegalToVectorizeReduction(const RecurrenceDescriptor
&RdxDesc
,
377 ElementCount VF
) const;
379 bool preferPredicatedReductionSelect(unsigned Opcode
, Type
*Ty
,
380 TTI::ReductionFlags Flags
) const {
384 InstructionCost
getArithmeticReductionCost(unsigned Opcode
, VectorType
*Ty
,
385 std::optional
<FastMathFlags
> FMF
,
386 TTI::TargetCostKind CostKind
);
388 InstructionCost
getShuffleCost(TTI::ShuffleKind Kind
, VectorType
*Tp
,
390 TTI::TargetCostKind CostKind
, int Index
,
392 ArrayRef
<const Value
*> Args
= std::nullopt
);
394 InstructionCost
getScalarizationOverhead(VectorType
*Ty
,
395 const APInt
&DemandedElts
,
396 bool Insert
, bool Extract
,
397 TTI::TargetCostKind CostKind
);
399 /// Return the cost of the scaling factor used in the addressing
400 /// mode represented by AM for this target, for a load/store
401 /// of the specified type.
402 /// If the AM is supported, the return value must be >= 0.
403 /// If the AM is not supported, it returns a negative value.
404 InstructionCost
getScalingFactorCost(Type
*Ty
, GlobalValue
*BaseGV
,
405 int64_t BaseOffset
, bool HasBaseReg
,
406 int64_t Scale
, unsigned AddrSpace
) const;
409 bool enableSelectOptimize() { return ST
->enableSelectOptimize(); }
411 unsigned getStoreMinimumVF(unsigned VF
, Type
*ScalarMemTy
,
412 Type
*ScalarValTy
) const {
413 // We can vectorize store v4i8.
414 if (ScalarMemTy
->isIntegerTy(8) && isPowerOf2_32(VF
) && VF
>= 4)
417 return BaseT::getStoreMinimumVF(VF
, ScalarMemTy
, ScalarValTy
);
421 } // end namespace llvm
423 #endif // LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H