1 //===- ARMTargetTransformInfo.h - ARM specific TTI --------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// This file a TargetTransformInfo::Concept conforming object specific to the
11 /// ARM target machine. It uses the target's detailed information to
12 /// provide more precise answers to certain TTI queries, while letting the
13 /// target independent and default TTI implementations handle the rest.
15 //===----------------------------------------------------------------------===//
17 #ifndef LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H
18 #define LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H
21 #include "ARMSubtarget.h"
22 #include "ARMTargetMachine.h"
23 #include "llvm/ADT/ArrayRef.h"
24 #include "llvm/Analysis/TargetTransformInfo.h"
25 #include "llvm/CodeGen/BasicTTIImpl.h"
26 #include "llvm/IR/Constant.h"
27 #include "llvm/IR/Function.h"
28 #include "llvm/TargetParser/SubtargetFeature.h"
34 class ARMTargetLowering
;
38 class ScalarEvolution
;
42 namespace TailPredication
{
47 ForceEnabledNoReductions
,
52 // For controlling conversion of memcpy into Tail Predicated loop.
54 enum MemTransfer
{ ForceDisabled
= 0, ForceEnabled
, Allow
};
57 class ARMTTIImpl
: public BasicTTIImplBase
<ARMTTIImpl
> {
58 using BaseT
= BasicTTIImplBase
<ARMTTIImpl
>;
59 using TTI
= TargetTransformInfo
;
63 const ARMSubtarget
*ST
;
64 const ARMTargetLowering
*TLI
;
66 // Currently the following features are excluded from InlineFeaturesAllowed.
67 // ModeThumb, FeatureNoARM, ModeSoftFloat, FeatureFP64, FeatureD32
68 // Depending on whether they are set or unset, different
69 // instructions/registers are available. For example, inlining a callee with
70 // -thumb-mode in a caller with +thumb-mode, may cause the assembler to
71 // fail if the callee uses ARM only instructions, e.g. in inline asm.
72 const FeatureBitset InlineFeaturesAllowed
= {
73 ARM::FeatureVFP2
, ARM::FeatureVFP3
, ARM::FeatureNEON
, ARM::FeatureThumb2
,
74 ARM::FeatureFP16
, ARM::FeatureVFP4
, ARM::FeatureFPARMv8
,
75 ARM::FeatureFullFP16
, ARM::FeatureFP16FML
, ARM::FeatureHWDivThumb
,
76 ARM::FeatureHWDivARM
, ARM::FeatureDB
, ARM::FeatureV7Clrex
,
77 ARM::FeatureAcquireRelease
, ARM::FeatureSlowFPBrcc
,
78 ARM::FeaturePerfMon
, ARM::FeatureTrustZone
, ARM::Feature8MSecExt
,
79 ARM::FeatureCrypto
, ARM::FeatureCRC
, ARM::FeatureRAS
,
80 ARM::FeatureFPAO
, ARM::FeatureFuseAES
, ARM::FeatureZCZeroing
,
81 ARM::FeatureProfUnpredicate
, ARM::FeatureSlowVGETLNi32
,
82 ARM::FeatureSlowVDUP32
, ARM::FeaturePreferVMOVSR
,
83 ARM::FeaturePrefISHSTBarrier
, ARM::FeatureMuxedUnits
,
84 ARM::FeatureSlowOddRegister
, ARM::FeatureSlowLoadDSubreg
,
85 ARM::FeatureDontWidenVMOVS
, ARM::FeatureExpandMLx
,
86 ARM::FeatureHasVMLxHazards
, ARM::FeatureNEONForFPMovs
,
87 ARM::FeatureNEONForFP
, ARM::FeatureCheckVLDnAlign
,
88 ARM::FeatureHasSlowFPVMLx
, ARM::FeatureHasSlowFPVFMx
,
89 ARM::FeatureVMLxForwarding
, ARM::FeaturePref32BitThumb
,
90 ARM::FeatureAvoidPartialCPSR
, ARM::FeatureCheapPredicableCPSR
,
91 ARM::FeatureAvoidMOVsShOp
, ARM::FeatureHasRetAddrStack
,
92 ARM::FeatureHasNoBranchPredictor
, ARM::FeatureDSP
, ARM::FeatureMP
,
93 ARM::FeatureVirtualization
, ARM::FeatureMClass
, ARM::FeatureRClass
,
94 ARM::FeatureAClass
, ARM::FeatureNaClTrap
, ARM::FeatureStrictAlign
,
95 ARM::FeatureLongCalls
, ARM::FeatureExecuteOnly
, ARM::FeatureReserveR9
,
96 ARM::FeatureNoMovt
, ARM::FeatureNoNegativeImmediates
99 const ARMSubtarget
*getST() const { return ST
; }
100 const ARMTargetLowering
*getTLI() const { return TLI
; }
103 explicit ARMTTIImpl(const ARMBaseTargetMachine
*TM
, const Function
&F
)
104 : BaseT(TM
, F
.getDataLayout()), ST(TM
->getSubtargetImpl(F
)),
105 TLI(ST
->getTargetLowering()) {}
107 bool areInlineCompatible(const Function
*Caller
,
108 const Function
*Callee
) const;
110 bool enableInterleavedAccessVectorization() { return true; }
112 TTI::AddressingModeKind
113 getPreferredAddressingMode(const Loop
*L
, ScalarEvolution
*SE
) const;
115 /// Floating-point computation using ARMv8 AArch32 Advanced
116 /// SIMD instructions remains unchanged from ARMv7. Only AArch64 SIMD
117 /// and Arm MVE are IEEE-754 compliant.
118 bool isFPVectorizationPotentiallyUnsafe() {
119 return !ST
->isTargetDarwin() && !ST
->hasMVEFloatOps();
122 std::optional
<Instruction
*> instCombineIntrinsic(InstCombiner
&IC
,
123 IntrinsicInst
&II
) const;
124 std::optional
<Value
*> simplifyDemandedVectorEltsIntrinsic(
125 InstCombiner
&IC
, IntrinsicInst
&II
, APInt DemandedElts
, APInt
&UndefElts
,
126 APInt
&UndefElts2
, APInt
&UndefElts3
,
127 std::function
<void(Instruction
*, unsigned, APInt
, APInt
&)>
128 SimplifyAndSetOp
) const;
130 /// \name Scalar TTI Implementations
133 InstructionCost
getIntImmCodeSizeCost(unsigned Opcode
, unsigned Idx
,
134 const APInt
&Imm
, Type
*Ty
);
136 using BaseT::getIntImmCost
;
137 InstructionCost
getIntImmCost(const APInt
&Imm
, Type
*Ty
,
138 TTI::TargetCostKind CostKind
);
140 InstructionCost
getIntImmCostInst(unsigned Opcode
, unsigned Idx
,
141 const APInt
&Imm
, Type
*Ty
,
142 TTI::TargetCostKind CostKind
,
143 Instruction
*Inst
= nullptr);
147 /// \name Vector TTI Implementations
150 unsigned getNumberOfRegisters(unsigned ClassID
) const {
151 bool Vector
= (ClassID
== 1);
155 if (ST
->hasMVEIntegerOps())
160 if (ST
->isThumb1Only())
165 TypeSize
getRegisterBitWidth(TargetTransformInfo::RegisterKind K
) const {
167 case TargetTransformInfo::RGK_Scalar
:
168 return TypeSize::getFixed(32);
169 case TargetTransformInfo::RGK_FixedWidthVector
:
171 return TypeSize::getFixed(128);
172 if (ST
->hasMVEIntegerOps())
173 return TypeSize::getFixed(128);
174 return TypeSize::getFixed(0);
175 case TargetTransformInfo::RGK_ScalableVector
:
176 return TypeSize::getScalable(0);
178 llvm_unreachable("Unsupported register kind");
181 unsigned getMaxInterleaveFactor(ElementCount VF
) {
182 return ST
->getMaxInterleaveFactor();
185 bool isProfitableLSRChainElement(Instruction
*I
);
187 bool isLegalMaskedLoad(Type
*DataTy
, Align Alignment
);
189 bool isLegalMaskedStore(Type
*DataTy
, Align Alignment
) {
190 return isLegalMaskedLoad(DataTy
, Alignment
);
193 bool forceScalarizeMaskedGather(VectorType
*VTy
, Align Alignment
) {
194 // For MVE, we have a custom lowering pass that will already have custom
195 // legalised any gathers that we can lower to MVE intrinsics, and want to
196 // expand all the rest. The pass runs before the masked intrinsic lowering
201 bool forceScalarizeMaskedScatter(VectorType
*VTy
, Align Alignment
) {
202 return forceScalarizeMaskedGather(VTy
, Alignment
);
205 bool isLegalMaskedGather(Type
*Ty
, Align Alignment
);
207 bool isLegalMaskedScatter(Type
*Ty
, Align Alignment
) {
208 return isLegalMaskedGather(Ty
, Alignment
);
211 InstructionCost
getMemcpyCost(const Instruction
*I
);
213 uint64_t getMaxMemIntrinsicInlineSizeThreshold() const {
214 return ST
->getMaxInlineSizeThreshold();
217 int getNumMemOps(const IntrinsicInst
*I
) const;
219 InstructionCost
getShuffleCost(TTI::ShuffleKind Kind
, VectorType
*Tp
,
221 TTI::TargetCostKind CostKind
, int Index
,
223 ArrayRef
<const Value
*> Args
= {},
224 const Instruction
*CxtI
= nullptr);
226 bool preferInLoopReduction(unsigned Opcode
, Type
*Ty
,
227 TTI::ReductionFlags Flags
) const;
229 bool preferPredicatedReductionSelect(unsigned Opcode
, Type
*Ty
,
230 TTI::ReductionFlags Flags
) const;
232 bool shouldExpandReduction(const IntrinsicInst
*II
) const { return false; }
234 InstructionCost
getCFInstrCost(unsigned Opcode
, TTI::TargetCostKind CostKind
,
235 const Instruction
*I
= nullptr);
237 InstructionCost
getCastInstrCost(unsigned Opcode
, Type
*Dst
, Type
*Src
,
238 TTI::CastContextHint CCH
,
239 TTI::TargetCostKind CostKind
,
240 const Instruction
*I
= nullptr);
242 InstructionCost
getCmpSelInstrCost(
243 unsigned Opcode
, Type
*ValTy
, Type
*CondTy
, CmpInst::Predicate VecPred
,
244 TTI::TargetCostKind CostKind
,
245 TTI::OperandValueInfo Op1Info
= {TTI::OK_AnyValue
, TTI::OP_None
},
246 TTI::OperandValueInfo Op2Info
= {TTI::OK_AnyValue
, TTI::OP_None
},
247 const Instruction
*I
= nullptr);
249 using BaseT::getVectorInstrCost
;
250 InstructionCost
getVectorInstrCost(unsigned Opcode
, Type
*Val
,
251 TTI::TargetCostKind CostKind
,
252 unsigned Index
, Value
*Op0
, Value
*Op1
);
254 InstructionCost
getAddressComputationCost(Type
*Val
, ScalarEvolution
*SE
,
257 InstructionCost
getArithmeticInstrCost(
258 unsigned Opcode
, Type
*Ty
, TTI::TargetCostKind CostKind
,
259 TTI::OperandValueInfo Op1Info
= {TTI::OK_AnyValue
, TTI::OP_None
},
260 TTI::OperandValueInfo Op2Info
= {TTI::OK_AnyValue
, TTI::OP_None
},
261 ArrayRef
<const Value
*> Args
= {}, const Instruction
*CxtI
= nullptr);
264 getMemoryOpCost(unsigned Opcode
, Type
*Src
, MaybeAlign Alignment
,
265 unsigned AddressSpace
, TTI::TargetCostKind CostKind
,
266 TTI::OperandValueInfo OpInfo
= {TTI::OK_AnyValue
, TTI::OP_None
},
267 const Instruction
*I
= nullptr);
269 InstructionCost
getMaskedMemoryOpCost(unsigned Opcode
, Type
*Src
,
270 Align Alignment
, unsigned AddressSpace
,
271 TTI::TargetCostKind CostKind
);
273 InstructionCost
getInterleavedMemoryOpCost(
274 unsigned Opcode
, Type
*VecTy
, unsigned Factor
, ArrayRef
<unsigned> Indices
,
275 Align Alignment
, unsigned AddressSpace
, TTI::TargetCostKind CostKind
,
276 bool UseMaskForCond
= false, bool UseMaskForGaps
= false);
278 InstructionCost
getGatherScatterOpCost(unsigned Opcode
, Type
*DataTy
,
279 const Value
*Ptr
, bool VariableMask
,
281 TTI::TargetCostKind CostKind
,
282 const Instruction
*I
= nullptr);
284 InstructionCost
getArithmeticReductionCost(unsigned Opcode
, VectorType
*ValTy
,
285 std::optional
<FastMathFlags
> FMF
,
286 TTI::TargetCostKind CostKind
);
287 InstructionCost
getExtendedReductionCost(unsigned Opcode
, bool IsUnsigned
,
288 Type
*ResTy
, VectorType
*ValTy
,
290 TTI::TargetCostKind CostKind
);
291 InstructionCost
getMulAccReductionCost(bool IsUnsigned
, Type
*ResTy
,
293 TTI::TargetCostKind CostKind
);
295 InstructionCost
getMinMaxReductionCost(Intrinsic::ID IID
, VectorType
*Ty
,
297 TTI::TargetCostKind CostKind
);
299 InstructionCost
getIntrinsicInstrCost(const IntrinsicCostAttributes
&ICA
,
300 TTI::TargetCostKind CostKind
);
302 /// getScalingFactorCost - Return the cost of the scaling used in
303 /// addressing mode represented by AM.
304 /// If the AM is supported, the return value must be >= 0.
305 /// If the AM is not supported, the return value must be negative.
306 InstructionCost
getScalingFactorCost(Type
*Ty
, GlobalValue
*BaseGV
,
307 StackOffset BaseOffset
, bool HasBaseReg
,
308 int64_t Scale
, unsigned AddrSpace
) const;
310 bool maybeLoweredToCall(Instruction
&I
);
311 bool isLoweredToCall(const Function
*F
);
312 bool isHardwareLoopProfitable(Loop
*L
, ScalarEvolution
&SE
,
314 TargetLibraryInfo
*LibInfo
,
315 HardwareLoopInfo
&HWLoopInfo
);
316 bool preferPredicateOverEpilogue(TailFoldingInfo
*TFI
);
317 void getUnrollingPreferences(Loop
*L
, ScalarEvolution
&SE
,
318 TTI::UnrollingPreferences
&UP
,
319 OptimizationRemarkEmitter
*ORE
);
322 getPreferredTailFoldingStyle(bool IVUpdateMayOverflow
= true) const;
324 void getPeelingPreferences(Loop
*L
, ScalarEvolution
&SE
,
325 TTI::PeelingPreferences
&PP
);
326 bool shouldBuildLookupTablesForConstant(Constant
*C
) const {
327 // In the ROPI and RWPI relocation models we can't have pointers to global
328 // variables or functions in constant data, so don't convert switches to
329 // lookup tables if any of the values would need relocation.
330 if (ST
->isROPI() || ST
->isRWPI())
331 return !C
->needsDynamicRelocation();
336 bool hasArmWideBranch(bool Thumb
) const;
338 bool isProfitableToSinkOperands(Instruction
*I
,
339 SmallVectorImpl
<Use
*> &Ops
) const;
341 unsigned getNumBytesToPadGlobalArray(unsigned Size
, Type
*ArrayType
) const;
346 /// isVREVMask - Check if a vector shuffle corresponds to a VREV
347 /// instruction with the specified blocksize. (The order of the elements
348 /// within each block of the vector is reversed.)
349 inline bool isVREVMask(ArrayRef
<int> M
, EVT VT
, unsigned BlockSize
) {
350 assert((BlockSize
== 16 || BlockSize
== 32 || BlockSize
== 64) &&
351 "Only possible block sizes for VREV are: 16, 32, 64");
353 unsigned EltSz
= VT
.getScalarSizeInBits();
354 if (EltSz
!= 8 && EltSz
!= 16 && EltSz
!= 32)
357 unsigned BlockElts
= M
[0] + 1;
358 // If the first shuffle index is UNDEF, be optimistic.
360 BlockElts
= BlockSize
/ EltSz
;
362 if (BlockSize
<= EltSz
|| BlockSize
!= BlockElts
* EltSz
)
365 for (unsigned i
= 0, e
= M
.size(); i
< e
; ++i
) {
367 continue; // ignore UNDEF indices
368 if ((unsigned)M
[i
] != (i
- i
% BlockElts
) + (BlockElts
- 1 - i
% BlockElts
))
375 } // end namespace llvm
377 #endif // LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H