1 //===- AMDGPUTargetTransformInfo.h - AMDGPU specific TTI --------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// This file a TargetTransformInfo::Concept conforming object specific to the
11 /// AMDGPU target machine. It uses the target's detailed information to
12 /// provide more precise answers to certain TTI queries, while letting the
13 /// target independent and default TTI implementations handle the rest.
15 //===----------------------------------------------------------------------===//
17 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
18 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
21 #include "AMDGPUSubtarget.h"
22 #include "AMDGPUTargetMachine.h"
23 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
24 #include "Utils/AMDGPUBaseInfo.h"
25 #include "llvm/ADT/ArrayRef.h"
26 #include "llvm/Analysis/TargetTransformInfo.h"
27 #include "llvm/CodeGen/BasicTTIImpl.h"
28 #include "llvm/IR/Function.h"
29 #include "llvm/MC/SubtargetFeature.h"
30 #include "llvm/Support/MathExtras.h"
35 class AMDGPUTargetLowering
;
37 class ScalarEvolution
;
41 class AMDGPUTTIImpl final
: public BasicTTIImplBase
<AMDGPUTTIImpl
> {
42 using BaseT
= BasicTTIImplBase
<AMDGPUTTIImpl
>;
43 using TTI
= TargetTransformInfo
;
50 explicit AMDGPUTTIImpl(const AMDGPUTargetMachine
*TM
, const Function
&F
)
51 : BaseT(TM
, F
.getParent()->getDataLayout()),
52 TargetTriple(TM
->getTargetTriple()) {}
54 void getUnrollingPreferences(Loop
*L
, ScalarEvolution
&SE
,
55 TTI::UnrollingPreferences
&UP
);
58 class GCNTTIImpl final
: public BasicTTIImplBase
<GCNTTIImpl
> {
59 using BaseT
= BasicTTIImplBase
<GCNTTIImpl
>;
60 using TTI
= TargetTransformInfo
;
64 const GCNSubtarget
*ST
;
65 const AMDGPUTargetLowering
*TLI
;
66 AMDGPUTTIImpl CommonTTI
;
67 bool IsGraphicsShader
;
69 const FeatureBitset InlineFeatureIgnoreList
= {
70 // Codegen control options which don't matter.
71 AMDGPU::FeatureEnableLoadStoreOpt
,
72 AMDGPU::FeatureEnableSIScheduler
,
73 AMDGPU::FeatureEnableUnsafeDSOffsetFolding
,
74 AMDGPU::FeatureFlatForGlobal
,
75 AMDGPU::FeaturePromoteAlloca
,
76 AMDGPU::FeatureUnalignedBufferAccess
,
77 AMDGPU::FeatureUnalignedScratchAccess
,
79 AMDGPU::FeatureAutoWaitcntBeforeBarrier
,
81 // Property of the kernel/environment which can't actually differ.
82 AMDGPU::FeatureSGPRInitBug
,
84 AMDGPU::FeatureTrapHandler
,
85 AMDGPU::FeatureCodeObjectV3
,
87 // The default assumption needs to be ecc is enabled, but no directly
88 // exposed operations depend on it, so it can be safely inlined.
89 AMDGPU::FeatureSRAMECC
,
91 // Perf-tuning features
92 AMDGPU::FeatureFastFMAF32
,
96 const GCNSubtarget
*getST() const { return ST
; }
97 const AMDGPUTargetLowering
*getTLI() const { return TLI
; }
99 static inline int getFullRateInstrCost() {
100 return TargetTransformInfo::TCC_Basic
;
103 static inline int getHalfRateInstrCost() {
104 return 2 * TargetTransformInfo::TCC_Basic
;
107 // TODO: The size is usually 8 bytes, but takes 4x as many cycles. Maybe
109 static inline int getQuarterRateInstrCost() {
110 return 3 * TargetTransformInfo::TCC_Basic
;
113 // On some parts, normal fp64 operations are half rate, and others
114 // quarter. This also applies to some integer operations.
115 inline int get64BitInstrCost() const {
116 return ST
->hasHalfRate64Ops() ?
117 getHalfRateInstrCost() : getQuarterRateInstrCost();
121 explicit GCNTTIImpl(const AMDGPUTargetMachine
*TM
, const Function
&F
)
122 : BaseT(TM
, F
.getParent()->getDataLayout()),
123 ST(static_cast<const GCNSubtarget
*>(TM
->getSubtargetImpl(F
))),
124 TLI(ST
->getTargetLowering()),
126 IsGraphicsShader(AMDGPU::isShader(F
.getCallingConv())) {}
128 bool hasBranchDivergence() { return true; }
130 void getUnrollingPreferences(Loop
*L
, ScalarEvolution
&SE
,
131 TTI::UnrollingPreferences
&UP
);
133 TTI::PopcntSupportKind
getPopcntSupport(unsigned TyWidth
) {
134 assert(isPowerOf2_32(TyWidth
) && "Ty width must be power of 2");
135 return TTI::PSK_FastHardware
;
138 unsigned getHardwareNumberOfRegisters(bool Vector
) const;
139 unsigned getNumberOfRegisters(bool Vector
) const;
140 unsigned getRegisterBitWidth(bool Vector
) const;
141 unsigned getMinVectorRegisterBitWidth() const;
142 unsigned getLoadVectorFactor(unsigned VF
, unsigned LoadSize
,
143 unsigned ChainSizeInBytes
,
144 VectorType
*VecTy
) const;
145 unsigned getStoreVectorFactor(unsigned VF
, unsigned StoreSize
,
146 unsigned ChainSizeInBytes
,
147 VectorType
*VecTy
) const;
148 unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace
) const;
150 bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes
,
152 unsigned AddrSpace
) const;
153 bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes
,
155 unsigned AddrSpace
) const;
156 bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes
,
158 unsigned AddrSpace
) const;
160 unsigned getMaxInterleaveFactor(unsigned VF
);
162 bool getTgtMemIntrinsic(IntrinsicInst
*Inst
, MemIntrinsicInfo
&Info
) const;
164 int getArithmeticInstrCost(
165 unsigned Opcode
, Type
*Ty
,
166 TTI::OperandValueKind Opd1Info
= TTI::OK_AnyValue
,
167 TTI::OperandValueKind Opd2Info
= TTI::OK_AnyValue
,
168 TTI::OperandValueProperties Opd1PropInfo
= TTI::OP_None
,
169 TTI::OperandValueProperties Opd2PropInfo
= TTI::OP_None
,
170 ArrayRef
<const Value
*> Args
= ArrayRef
<const Value
*>());
172 unsigned getCFInstrCost(unsigned Opcode
);
174 int getVectorInstrCost(unsigned Opcode
, Type
*ValTy
, unsigned Index
);
175 bool isSourceOfDivergence(const Value
*V
) const;
176 bool isAlwaysUniform(const Value
*V
) const;
178 unsigned getFlatAddressSpace() const {
179 // Don't bother running InferAddressSpaces pass on graphics shaders which
180 // don't use flat addressing.
181 if (IsGraphicsShader
)
183 return AMDGPUAS::FLAT_ADDRESS
;
186 unsigned getVectorSplitCost() { return 0; }
188 unsigned getShuffleCost(TTI::ShuffleKind Kind
, Type
*Tp
, int Index
,
191 bool areInlineCompatible(const Function
*Caller
,
192 const Function
*Callee
) const;
194 unsigned getInliningThresholdMultiplier() { return 7; }
196 int getInlinerVectorBonusPercent() { return 0; }
198 int getArithmeticReductionCost(unsigned Opcode
,
201 int getMinMaxReductionCost(Type
*Ty
, Type
*CondTy
,
206 class R600TTIImpl final
: public BasicTTIImplBase
<R600TTIImpl
> {
207 using BaseT
= BasicTTIImplBase
<R600TTIImpl
>;
208 using TTI
= TargetTransformInfo
;
212 const R600Subtarget
*ST
;
213 const AMDGPUTargetLowering
*TLI
;
214 AMDGPUTTIImpl CommonTTI
;
217 explicit R600TTIImpl(const AMDGPUTargetMachine
*TM
, const Function
&F
)
218 : BaseT(TM
, F
.getParent()->getDataLayout()),
219 ST(static_cast<const R600Subtarget
*>(TM
->getSubtargetImpl(F
))),
220 TLI(ST
->getTargetLowering()),
223 const R600Subtarget
*getST() const { return ST
; }
224 const AMDGPUTargetLowering
*getTLI() const { return TLI
; }
226 void getUnrollingPreferences(Loop
*L
, ScalarEvolution
&SE
,
227 TTI::UnrollingPreferences
&UP
);
228 unsigned getHardwareNumberOfRegisters(bool Vec
) const;
229 unsigned getNumberOfRegisters(bool Vec
) const;
230 unsigned getRegisterBitWidth(bool Vector
) const;
231 unsigned getMinVectorRegisterBitWidth() const;
232 unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace
) const;
233 bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes
, unsigned Alignment
,
234 unsigned AddrSpace
) const;
235 bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes
,
237 unsigned AddrSpace
) const;
238 bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes
,
240 unsigned AddrSpace
) const;
241 unsigned getMaxInterleaveFactor(unsigned VF
);
242 unsigned getCFInstrCost(unsigned Opcode
);
243 int getVectorInstrCost(unsigned Opcode
, Type
*ValTy
, unsigned Index
);
246 } // end namespace llvm
248 #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H