1 //===- ARMTargetTransformInfo.h - ARM specific TTI --------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// This file a TargetTransformInfo::Concept conforming object specific to the
11 /// ARM target machine. It uses the target's detailed information to
12 /// provide more precise answers to certain TTI queries, while letting the
13 /// target independent and default TTI implementations handle the rest.
15 //===----------------------------------------------------------------------===//
17 #ifndef LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H
18 #define LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H
21 #include "ARMSubtarget.h"
22 #include "ARMTargetMachine.h"
23 #include "llvm/ADT/ArrayRef.h"
24 #include "llvm/Analysis/TargetTransformInfo.h"
25 #include "llvm/CodeGen/BasicTTIImpl.h"
26 #include "llvm/IR/Constant.h"
27 #include "llvm/IR/Function.h"
28 #include "llvm/MC/SubtargetFeature.h"
33 class ARMTargetLowering
;
37 class ScalarEvolution
;
41 namespace TailPredication
{
46 ForceEnabledNoReductions
,
51 // For controlling conversion of memcpy into Tail Predicated loop.
53 enum MemTransfer
{ ForceDisabled
= 0, ForceEnabled
, Allow
};
56 class ARMTTIImpl
: public BasicTTIImplBase
<ARMTTIImpl
> {
57 using BaseT
= BasicTTIImplBase
<ARMTTIImpl
>;
58 using TTI
= TargetTransformInfo
;
62 const ARMSubtarget
*ST
;
63 const ARMTargetLowering
*TLI
;
65 // Currently the following features are excluded from InlineFeaturesAllowed.
66 // ModeThumb, FeatureNoARM, ModeSoftFloat, FeatureFP64, FeatureD32
67 // Depending on whether they are set or unset, different
68 // instructions/registers are available. For example, inlining a callee with
69 // -thumb-mode in a caller with +thumb-mode, may cause the assembler to
70 // fail if the callee uses ARM only instructions, e.g. in inline asm.
71 const FeatureBitset InlineFeaturesAllowed
= {
72 ARM::FeatureVFP2
, ARM::FeatureVFP3
, ARM::FeatureNEON
, ARM::FeatureThumb2
,
73 ARM::FeatureFP16
, ARM::FeatureVFP4
, ARM::FeatureFPARMv8
,
74 ARM::FeatureFullFP16
, ARM::FeatureFP16FML
, ARM::FeatureHWDivThumb
,
75 ARM::FeatureHWDivARM
, ARM::FeatureDB
, ARM::FeatureV7Clrex
,
76 ARM::FeatureAcquireRelease
, ARM::FeatureSlowFPBrcc
,
77 ARM::FeaturePerfMon
, ARM::FeatureTrustZone
, ARM::Feature8MSecExt
,
78 ARM::FeatureCrypto
, ARM::FeatureCRC
, ARM::FeatureRAS
,
79 ARM::FeatureFPAO
, ARM::FeatureFuseAES
, ARM::FeatureZCZeroing
,
80 ARM::FeatureProfUnpredicate
, ARM::FeatureSlowVGETLNi32
,
81 ARM::FeatureSlowVDUP32
, ARM::FeaturePreferVMOVSR
,
82 ARM::FeaturePrefISHSTBarrier
, ARM::FeatureMuxedUnits
,
83 ARM::FeatureSlowOddRegister
, ARM::FeatureSlowLoadDSubreg
,
84 ARM::FeatureDontWidenVMOVS
, ARM::FeatureExpandMLx
,
85 ARM::FeatureHasVMLxHazards
, ARM::FeatureNEONForFPMovs
,
86 ARM::FeatureNEONForFP
, ARM::FeatureCheckVLDnAlign
,
87 ARM::FeatureHasSlowFPVMLx
, ARM::FeatureHasSlowFPVFMx
,
88 ARM::FeatureVMLxForwarding
, ARM::FeaturePref32BitThumb
,
89 ARM::FeatureAvoidPartialCPSR
, ARM::FeatureCheapPredicableCPSR
,
90 ARM::FeatureAvoidMOVsShOp
, ARM::FeatureHasRetAddrStack
,
91 ARM::FeatureHasNoBranchPredictor
, ARM::FeatureDSP
, ARM::FeatureMP
,
92 ARM::FeatureVirtualization
, ARM::FeatureMClass
, ARM::FeatureRClass
,
93 ARM::FeatureAClass
, ARM::FeatureNaClTrap
, ARM::FeatureStrictAlign
,
94 ARM::FeatureLongCalls
, ARM::FeatureExecuteOnly
, ARM::FeatureReserveR9
,
95 ARM::FeatureNoMovt
, ARM::FeatureNoNegativeImmediates
98 const ARMSubtarget
*getST() const { return ST
; }
99 const ARMTargetLowering
*getTLI() const { return TLI
; }
102 explicit ARMTTIImpl(const ARMBaseTargetMachine
*TM
, const Function
&F
)
103 : BaseT(TM
, F
.getParent()->getDataLayout()), ST(TM
->getSubtargetImpl(F
)),
104 TLI(ST
->getTargetLowering()) {}
106 bool areInlineCompatible(const Function
*Caller
,
107 const Function
*Callee
) const;
109 bool enableInterleavedAccessVectorization() { return true; }
111 TTI::AddressingModeKind
112 getPreferredAddressingMode(const Loop
*L
, ScalarEvolution
*SE
) const;
114 /// Floating-point computation using ARMv8 AArch32 Advanced
115 /// SIMD instructions remains unchanged from ARMv7. Only AArch64 SIMD
116 /// and Arm MVE are IEEE-754 compliant.
117 bool isFPVectorizationPotentiallyUnsafe() {
118 return !ST
->isTargetDarwin() && !ST
->hasMVEFloatOps();
121 Optional
<Instruction
*> instCombineIntrinsic(InstCombiner
&IC
,
122 IntrinsicInst
&II
) const;
124 /// \name Scalar TTI Implementations
127 InstructionCost
getIntImmCodeSizeCost(unsigned Opcode
, unsigned Idx
,
128 const APInt
&Imm
, Type
*Ty
);
130 using BaseT::getIntImmCost
;
131 InstructionCost
getIntImmCost(const APInt
&Imm
, Type
*Ty
,
132 TTI::TargetCostKind CostKind
);
134 InstructionCost
getIntImmCostInst(unsigned Opcode
, unsigned Idx
,
135 const APInt
&Imm
, Type
*Ty
,
136 TTI::TargetCostKind CostKind
,
137 Instruction
*Inst
= nullptr);
141 /// \name Vector TTI Implementations
144 unsigned getNumberOfRegisters(unsigned ClassID
) const {
145 bool Vector
= (ClassID
== 1);
149 if (ST
->hasMVEIntegerOps())
154 if (ST
->isThumb1Only())
159 TypeSize
getRegisterBitWidth(TargetTransformInfo::RegisterKind K
) const {
161 case TargetTransformInfo::RGK_Scalar
:
162 return TypeSize::getFixed(32);
163 case TargetTransformInfo::RGK_FixedWidthVector
:
165 return TypeSize::getFixed(128);
166 if (ST
->hasMVEIntegerOps())
167 return TypeSize::getFixed(128);
168 return TypeSize::getFixed(0);
169 case TargetTransformInfo::RGK_ScalableVector
:
170 return TypeSize::getScalable(0);
172 llvm_unreachable("Unsupported register kind");
175 unsigned getMaxInterleaveFactor(unsigned VF
) {
176 return ST
->getMaxInterleaveFactor();
179 bool isProfitableLSRChainElement(Instruction
*I
);
181 bool isLegalMaskedLoad(Type
*DataTy
, Align Alignment
);
183 bool isLegalMaskedStore(Type
*DataTy
, Align Alignment
) {
184 return isLegalMaskedLoad(DataTy
, Alignment
);
187 bool isLegalMaskedGather(Type
*Ty
, Align Alignment
);
189 bool isLegalMaskedScatter(Type
*Ty
, Align Alignment
) {
190 return isLegalMaskedGather(Ty
, Alignment
);
193 InstructionCost
getMemcpyCost(const Instruction
*I
);
195 int getNumMemOps(const IntrinsicInst
*I
) const;
197 InstructionCost
getShuffleCost(TTI::ShuffleKind Kind
, VectorType
*Tp
,
198 ArrayRef
<int> Mask
, int Index
,
201 bool preferInLoopReduction(unsigned Opcode
, Type
*Ty
,
202 TTI::ReductionFlags Flags
) const;
204 bool preferPredicatedReductionSelect(unsigned Opcode
, Type
*Ty
,
205 TTI::ReductionFlags Flags
) const;
207 bool shouldExpandReduction(const IntrinsicInst
*II
) const { return false; }
209 InstructionCost
getCFInstrCost(unsigned Opcode
, TTI::TargetCostKind CostKind
,
210 const Instruction
*I
= nullptr);
212 InstructionCost
getCastInstrCost(unsigned Opcode
, Type
*Dst
, Type
*Src
,
213 TTI::CastContextHint CCH
,
214 TTI::TargetCostKind CostKind
,
215 const Instruction
*I
= nullptr);
217 InstructionCost
getCmpSelInstrCost(unsigned Opcode
, Type
*ValTy
, Type
*CondTy
,
218 CmpInst::Predicate VecPred
,
219 TTI::TargetCostKind CostKind
,
220 const Instruction
*I
= nullptr);
222 InstructionCost
getVectorInstrCost(unsigned Opcode
, Type
*Val
,
225 InstructionCost
getAddressComputationCost(Type
*Val
, ScalarEvolution
*SE
,
228 InstructionCost
getArithmeticInstrCost(
229 unsigned Opcode
, Type
*Ty
,
230 TTI::TargetCostKind CostKind
= TTI::TCK_RecipThroughput
,
231 TTI::OperandValueKind Op1Info
= TTI::OK_AnyValue
,
232 TTI::OperandValueKind Op2Info
= TTI::OK_AnyValue
,
233 TTI::OperandValueProperties Opd1PropInfo
= TTI::OP_None
,
234 TTI::OperandValueProperties Opd2PropInfo
= TTI::OP_None
,
235 ArrayRef
<const Value
*> Args
= ArrayRef
<const Value
*>(),
236 const Instruction
*CxtI
= nullptr);
238 InstructionCost
getMemoryOpCost(unsigned Opcode
, Type
*Src
,
239 MaybeAlign Alignment
, unsigned AddressSpace
,
240 TTI::TargetCostKind CostKind
,
241 const Instruction
*I
= nullptr);
243 InstructionCost
getMaskedMemoryOpCost(unsigned Opcode
, Type
*Src
,
244 Align Alignment
, unsigned AddressSpace
,
245 TTI::TargetCostKind CostKind
);
247 InstructionCost
getInterleavedMemoryOpCost(
248 unsigned Opcode
, Type
*VecTy
, unsigned Factor
, ArrayRef
<unsigned> Indices
,
249 Align Alignment
, unsigned AddressSpace
,
250 TTI::TargetCostKind CostKind
= TTI::TCK_SizeAndLatency
,
251 bool UseMaskForCond
= false, bool UseMaskForGaps
= false);
253 InstructionCost
getGatherScatterOpCost(unsigned Opcode
, Type
*DataTy
,
254 const Value
*Ptr
, bool VariableMask
,
256 TTI::TargetCostKind CostKind
,
257 const Instruction
*I
= nullptr);
259 InstructionCost
getArithmeticReductionCost(unsigned Opcode
, VectorType
*ValTy
,
260 Optional
<FastMathFlags
> FMF
,
261 TTI::TargetCostKind CostKind
);
262 InstructionCost
getExtendedAddReductionCost(bool IsMLA
, bool IsUnsigned
,
263 Type
*ResTy
, VectorType
*ValTy
,
264 TTI::TargetCostKind CostKind
);
266 InstructionCost
getIntrinsicInstrCost(const IntrinsicCostAttributes
&ICA
,
267 TTI::TargetCostKind CostKind
);
269 bool maybeLoweredToCall(Instruction
&I
);
270 bool isLoweredToCall(const Function
*F
);
271 bool isHardwareLoopProfitable(Loop
*L
, ScalarEvolution
&SE
,
273 TargetLibraryInfo
*LibInfo
,
274 HardwareLoopInfo
&HWLoopInfo
);
275 bool preferPredicateOverEpilogue(Loop
*L
, LoopInfo
*LI
,
278 TargetLibraryInfo
*TLI
,
280 const LoopAccessInfo
*LAI
);
281 void getUnrollingPreferences(Loop
*L
, ScalarEvolution
&SE
,
282 TTI::UnrollingPreferences
&UP
,
283 OptimizationRemarkEmitter
*ORE
);
285 bool emitGetActiveLaneMask() const;
287 void getPeelingPreferences(Loop
*L
, ScalarEvolution
&SE
,
288 TTI::PeelingPreferences
&PP
);
289 bool shouldBuildLookupTablesForConstant(Constant
*C
) const {
290 // In the ROPI and RWPI relocation models we can't have pointers to global
291 // variables or functions in constant data, so don't convert switches to
292 // lookup tables if any of the values would need relocation.
293 if (ST
->isROPI() || ST
->isRWPI())
294 return !C
->needsDynamicRelocation();
301 /// isVREVMask - Check if a vector shuffle corresponds to a VREV
302 /// instruction with the specified blocksize. (The order of the elements
303 /// within each block of the vector is reversed.)
304 inline bool isVREVMask(ArrayRef
<int> M
, EVT VT
, unsigned BlockSize
) {
305 assert((BlockSize
== 16 || BlockSize
== 32 || BlockSize
== 64) &&
306 "Only possible block sizes for VREV are: 16, 32, 64");
308 unsigned EltSz
= VT
.getScalarSizeInBits();
309 if (EltSz
!= 8 && EltSz
!= 16 && EltSz
!= 32)
312 unsigned BlockElts
= M
[0] + 1;
313 // If the first shuffle index is UNDEF, be optimistic.
315 BlockElts
= BlockSize
/ EltSz
;
317 if (BlockSize
<= EltSz
|| BlockSize
!= BlockElts
* EltSz
)
320 for (unsigned i
= 0, e
= M
.size(); i
< e
; ++i
) {
322 continue; // ignore UNDEF indices
323 if ((unsigned)M
[i
] != (i
- i
% BlockElts
) + (BlockElts
- 1 - i
% BlockElts
))
330 } // end namespace llvm
332 #endif // LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H