1 //===- ARMTargetTransformInfo.h - ARM specific TTI --------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// This file a TargetTransformInfo::Concept conforming object specific to the
11 /// ARM target machine. It uses the target's detailed information to
12 /// provide more precise answers to certain TTI queries, while letting the
13 /// target independent and default TTI implementations handle the rest.
15 //===----------------------------------------------------------------------===//
17 #ifndef LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H
18 #define LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H
21 #include "ARMSubtarget.h"
22 #include "ARMTargetMachine.h"
23 #include "llvm/ADT/ArrayRef.h"
24 #include "llvm/Analysis/TargetTransformInfo.h"
25 #include "llvm/CodeGen/BasicTTIImpl.h"
26 #include "llvm/IR/Constant.h"
27 #include "llvm/IR/Function.h"
28 #include "llvm/MC/SubtargetFeature.h"
33 class ARMTargetLowering
;
37 class ScalarEvolution
;
41 class ARMTTIImpl
: public BasicTTIImplBase
<ARMTTIImpl
> {
42 using BaseT
= BasicTTIImplBase
<ARMTTIImpl
>;
43 using TTI
= TargetTransformInfo
;
47 const ARMSubtarget
*ST
;
48 const ARMTargetLowering
*TLI
;
50 // Currently the following features are excluded from InlineFeatureWhitelist.
51 // ModeThumb, FeatureNoARM, ModeSoftFloat, FeatureFP64, FeatureD32
52 // Depending on whether they are set or unset, different
53 // instructions/registers are available. For example, inlining a callee with
54 // -thumb-mode in a caller with +thumb-mode, may cause the assembler to
55 // fail if the callee uses ARM only instructions, e.g. in inline asm.
56 const FeatureBitset InlineFeatureWhitelist
= {
57 ARM::FeatureVFP2
, ARM::FeatureVFP3
, ARM::FeatureNEON
, ARM::FeatureThumb2
,
58 ARM::FeatureFP16
, ARM::FeatureVFP4
, ARM::FeatureFPARMv8
,
59 ARM::FeatureFullFP16
, ARM::FeatureFP16FML
, ARM::FeatureHWDivThumb
,
60 ARM::FeatureHWDivARM
, ARM::FeatureDB
, ARM::FeatureV7Clrex
,
61 ARM::FeatureAcquireRelease
, ARM::FeatureSlowFPBrcc
,
62 ARM::FeaturePerfMon
, ARM::FeatureTrustZone
, ARM::Feature8MSecExt
,
63 ARM::FeatureCrypto
, ARM::FeatureCRC
, ARM::FeatureRAS
,
64 ARM::FeatureFPAO
, ARM::FeatureFuseAES
, ARM::FeatureZCZeroing
,
65 ARM::FeatureProfUnpredicate
, ARM::FeatureSlowVGETLNi32
,
66 ARM::FeatureSlowVDUP32
, ARM::FeaturePreferVMOVSR
,
67 ARM::FeaturePrefISHSTBarrier
, ARM::FeatureMuxedUnits
,
68 ARM::FeatureSlowOddRegister
, ARM::FeatureSlowLoadDSubreg
,
69 ARM::FeatureDontWidenVMOVS
, ARM::FeatureExpandMLx
,
70 ARM::FeatureHasVMLxHazards
, ARM::FeatureNEONForFPMovs
,
71 ARM::FeatureNEONForFP
, ARM::FeatureCheckVLDnAlign
,
72 ARM::FeatureHasSlowFPVMLx
, ARM::FeatureVMLxForwarding
,
73 ARM::FeaturePref32BitThumb
, ARM::FeatureAvoidPartialCPSR
,
74 ARM::FeatureCheapPredicableCPSR
, ARM::FeatureAvoidMOVsShOp
,
75 ARM::FeatureHasRetAddrStack
, ARM::FeatureHasNoBranchPredictor
,
76 ARM::FeatureDSP
, ARM::FeatureMP
, ARM::FeatureVirtualization
,
77 ARM::FeatureMClass
, ARM::FeatureRClass
, ARM::FeatureAClass
,
78 ARM::FeatureNaClTrap
, ARM::FeatureStrictAlign
, ARM::FeatureLongCalls
,
79 ARM::FeatureExecuteOnly
, ARM::FeatureReserveR9
, ARM::FeatureNoMovt
,
80 ARM::FeatureNoNegativeImmediates
83 const ARMSubtarget
*getST() const { return ST
; }
84 const ARMTargetLowering
*getTLI() const { return TLI
; }
87 explicit ARMTTIImpl(const ARMBaseTargetMachine
*TM
, const Function
&F
)
88 : BaseT(TM
, F
.getParent()->getDataLayout()), ST(TM
->getSubtargetImpl(F
)),
89 TLI(ST
->getTargetLowering()) {}
91 bool areInlineCompatible(const Function
*Caller
,
92 const Function
*Callee
) const;
94 bool enableInterleavedAccessVectorization() { return true; }
96 bool shouldFavorBackedgeIndex(const Loop
*L
) const {
97 if (L
->getHeader()->getParent()->hasOptSize())
99 return ST
->isMClass() && ST
->isThumb2() && L
->getNumBlocks() == 1;
102 /// Floating-point computation using ARMv8 AArch32 Advanced
103 /// SIMD instructions remains unchanged from ARMv7. Only AArch64 SIMD
104 /// and Arm MVE are IEEE-754 compliant.
105 bool isFPVectorizationPotentiallyUnsafe() {
106 return !ST
->isTargetDarwin() && !ST
->hasMVEFloatOps();
109 /// \name Scalar TTI Implementations
112 int getIntImmCodeSizeCost(unsigned Opcode
, unsigned Idx
, const APInt
&Imm
,
115 using BaseT::getIntImmCost
;
116 int getIntImmCost(const APInt
&Imm
, Type
*Ty
);
118 int getIntImmCost(unsigned Opcode
, unsigned Idx
, const APInt
&Imm
, Type
*Ty
);
122 /// \name Vector TTI Implementations
125 unsigned getNumberOfRegisters(bool Vector
) {
129 if (ST
->hasMVEIntegerOps())
134 if (ST
->isThumb1Only())
139 unsigned getRegisterBitWidth(bool Vector
) const {
143 if (ST
->hasMVEIntegerOps())
151 unsigned getMaxInterleaveFactor(unsigned VF
) {
152 return ST
->getMaxInterleaveFactor();
155 int getMemcpyCost(const Instruction
*I
);
157 int getShuffleCost(TTI::ShuffleKind Kind
, Type
*Tp
, int Index
, Type
*SubTp
);
159 bool useReductionIntrinsic(unsigned Opcode
, Type
*Ty
,
160 TTI::ReductionFlags Flags
) const;
162 bool shouldExpandReduction(const IntrinsicInst
*II
) const {
166 int getCastInstrCost(unsigned Opcode
, Type
*Dst
, Type
*Src
,
167 const Instruction
*I
= nullptr);
169 int getCmpSelInstrCost(unsigned Opcode
, Type
*ValTy
, Type
*CondTy
,
170 const Instruction
*I
= nullptr);
172 int getVectorInstrCost(unsigned Opcode
, Type
*Val
, unsigned Index
);
174 int getAddressComputationCost(Type
*Val
, ScalarEvolution
*SE
,
177 int getArithmeticInstrCost(
178 unsigned Opcode
, Type
*Ty
,
179 TTI::OperandValueKind Op1Info
= TTI::OK_AnyValue
,
180 TTI::OperandValueKind Op2Info
= TTI::OK_AnyValue
,
181 TTI::OperandValueProperties Opd1PropInfo
= TTI::OP_None
,
182 TTI::OperandValueProperties Opd2PropInfo
= TTI::OP_None
,
183 ArrayRef
<const Value
*> Args
= ArrayRef
<const Value
*>());
185 int getMemoryOpCost(unsigned Opcode
, Type
*Src
, unsigned Alignment
,
186 unsigned AddressSpace
, const Instruction
*I
= nullptr);
188 int getInterleavedMemoryOpCost(unsigned Opcode
, Type
*VecTy
, unsigned Factor
,
189 ArrayRef
<unsigned> Indices
, unsigned Alignment
,
190 unsigned AddressSpace
,
191 bool UseMaskForCond
= false,
192 bool UseMaskForGaps
= false);
194 bool isLoweredToCall(const Function
*F
);
195 bool isHardwareLoopProfitable(Loop
*L
, ScalarEvolution
&SE
,
197 TargetLibraryInfo
*LibInfo
,
198 HardwareLoopInfo
&HWLoopInfo
);
200 void getUnrollingPreferences(Loop
*L
, ScalarEvolution
&SE
,
201 TTI::UnrollingPreferences
&UP
);
203 bool shouldBuildLookupTablesForConstant(Constant
*C
) const {
204 // In the ROPI and RWPI relocation models we can't have pointers to global
205 // variables or functions in constant data, so don't convert switches to
206 // lookup tables if any of the values would need relocation.
207 if (ST
->isROPI() || ST
->isRWPI())
208 return !C
->needsRelocation();
215 } // end namespace llvm
217 #endif // LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H