1 //===- AMDGPUTargetTransformInfo.h - AMDGPU specific TTI --------*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
11 /// This file a TargetTransformInfo::Concept conforming object specific to the
12 /// AMDGPU target machine. It uses the target's detailed information to
13 /// provide more precise answers to certain TTI queries, while letting the
14 /// target independent and default TTI implementations handle the rest.
16 //===----------------------------------------------------------------------===//
18 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
19 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
22 #include "AMDGPUSubtarget.h"
23 #include "AMDGPUTargetMachine.h"
24 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
25 #include "Utils/AMDGPUBaseInfo.h"
26 #include "llvm/ADT/ArrayRef.h"
27 #include "llvm/Analysis/TargetTransformInfo.h"
28 #include "llvm/CodeGen/BasicTTIImpl.h"
29 #include "llvm/IR/Function.h"
30 #include "llvm/MC/SubtargetFeature.h"
31 #include "llvm/Support/MathExtras.h"
36 class AMDGPUTargetLowering
;
38 class ScalarEvolution
;
42 class AMDGPUTTIImpl final
: public BasicTTIImplBase
<AMDGPUTTIImpl
> {
43 using BaseT
= BasicTTIImplBase
<AMDGPUTTIImpl
>;
44 using TTI
= TargetTransformInfo
;
51 explicit AMDGPUTTIImpl(const AMDGPUTargetMachine
*TM
, const Function
&F
)
52 : BaseT(TM
, F
.getParent()->getDataLayout()),
53 TargetTriple(TM
->getTargetTriple()) {}
55 void getUnrollingPreferences(Loop
*L
, ScalarEvolution
&SE
,
56 TTI::UnrollingPreferences
&UP
);
59 class GCNTTIImpl final
: public BasicTTIImplBase
<GCNTTIImpl
> {
60 using BaseT
= BasicTTIImplBase
<GCNTTIImpl
>;
61 using TTI
= TargetTransformInfo
;
65 const GCNSubtarget
*ST
;
66 const AMDGPUTargetLowering
*TLI
;
67 AMDGPUTTIImpl CommonTTI
;
68 bool IsGraphicsShader
;
70 const FeatureBitset InlineFeatureIgnoreList
= {
71 // Codegen control options which don't matter.
72 AMDGPU::FeatureEnableLoadStoreOpt
,
73 AMDGPU::FeatureEnableSIScheduler
,
74 AMDGPU::FeatureEnableUnsafeDSOffsetFolding
,
75 AMDGPU::FeatureFlatForGlobal
,
76 AMDGPU::FeaturePromoteAlloca
,
77 AMDGPU::FeatureUnalignedBufferAccess
,
78 AMDGPU::FeatureUnalignedScratchAccess
,
80 AMDGPU::FeatureAutoWaitcntBeforeBarrier
,
81 AMDGPU::FeatureDebuggerEmitPrologue
,
82 AMDGPU::FeatureDebuggerInsertNops
,
84 // Property of the kernel/environment which can't actually differ.
85 AMDGPU::FeatureSGPRInitBug
,
87 AMDGPU::FeatureTrapHandler
,
89 // Perf-tuning features
90 AMDGPU::FeatureFastFMAF32
,
94 const GCNSubtarget
*getST() const { return ST
; }
95 const AMDGPUTargetLowering
*getTLI() const { return TLI
; }
97 static inline int getFullRateInstrCost() {
98 return TargetTransformInfo::TCC_Basic
;
101 static inline int getHalfRateInstrCost() {
102 return 2 * TargetTransformInfo::TCC_Basic
;
105 // TODO: The size is usually 8 bytes, but takes 4x as many cycles. Maybe
107 static inline int getQuarterRateInstrCost() {
108 return 3 * TargetTransformInfo::TCC_Basic
;
111 // On some parts, normal fp64 operations are half rate, and others
112 // quarter. This also applies to some integer operations.
113 inline int get64BitInstrCost() const {
114 return ST
->hasHalfRate64Ops() ?
115 getHalfRateInstrCost() : getQuarterRateInstrCost();
119 explicit GCNTTIImpl(const AMDGPUTargetMachine
*TM
, const Function
&F
)
120 : BaseT(TM
, F
.getParent()->getDataLayout()),
121 ST(static_cast<const GCNSubtarget
*>(TM
->getSubtargetImpl(F
))),
122 TLI(ST
->getTargetLowering()),
124 IsGraphicsShader(AMDGPU::isShader(F
.getCallingConv())) {}
126 bool hasBranchDivergence() { return true; }
128 void getUnrollingPreferences(Loop
*L
, ScalarEvolution
&SE
,
129 TTI::UnrollingPreferences
&UP
);
131 TTI::PopcntSupportKind
getPopcntSupport(unsigned TyWidth
) {
132 assert(isPowerOf2_32(TyWidth
) && "Ty width must be power of 2");
133 return TTI::PSK_FastHardware
;
136 unsigned getHardwareNumberOfRegisters(bool Vector
) const;
137 unsigned getNumberOfRegisters(bool Vector
) const;
138 unsigned getRegisterBitWidth(bool Vector
) const;
139 unsigned getMinVectorRegisterBitWidth() const;
140 unsigned getLoadVectorFactor(unsigned VF
, unsigned LoadSize
,
141 unsigned ChainSizeInBytes
,
142 VectorType
*VecTy
) const;
143 unsigned getStoreVectorFactor(unsigned VF
, unsigned StoreSize
,
144 unsigned ChainSizeInBytes
,
145 VectorType
*VecTy
) const;
146 unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace
) const;
148 bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes
,
150 unsigned AddrSpace
) const;
151 bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes
,
153 unsigned AddrSpace
) const;
154 bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes
,
156 unsigned AddrSpace
) const;
158 unsigned getMaxInterleaveFactor(unsigned VF
);
160 bool getTgtMemIntrinsic(IntrinsicInst
*Inst
, MemIntrinsicInfo
&Info
) const;
162 int getArithmeticInstrCost(
163 unsigned Opcode
, Type
*Ty
,
164 TTI::OperandValueKind Opd1Info
= TTI::OK_AnyValue
,
165 TTI::OperandValueKind Opd2Info
= TTI::OK_AnyValue
,
166 TTI::OperandValueProperties Opd1PropInfo
= TTI::OP_None
,
167 TTI::OperandValueProperties Opd2PropInfo
= TTI::OP_None
,
168 ArrayRef
<const Value
*> Args
= ArrayRef
<const Value
*>());
170 unsigned getCFInstrCost(unsigned Opcode
);
172 int getVectorInstrCost(unsigned Opcode
, Type
*ValTy
, unsigned Index
);
173 bool isSourceOfDivergence(const Value
*V
) const;
174 bool isAlwaysUniform(const Value
*V
) const;
176 unsigned getFlatAddressSpace() const {
177 // Don't bother running InferAddressSpaces pass on graphics shaders which
178 // don't use flat addressing.
179 if (IsGraphicsShader
)
181 return ST
->hasFlatAddressSpace() ?
182 ST
->getAMDGPUAS().FLAT_ADDRESS
: ST
->getAMDGPUAS().UNKNOWN_ADDRESS_SPACE
;
185 unsigned getVectorSplitCost() { return 0; }
187 unsigned getShuffleCost(TTI::ShuffleKind Kind
, Type
*Tp
, int Index
,
190 bool areInlineCompatible(const Function
*Caller
,
191 const Function
*Callee
) const;
193 unsigned getInliningThresholdMultiplier() { return 9; }
195 int getArithmeticReductionCost(unsigned Opcode
,
198 int getMinMaxReductionCost(Type
*Ty
, Type
*CondTy
,
203 class R600TTIImpl final
: public BasicTTIImplBase
<R600TTIImpl
> {
204 using BaseT
= BasicTTIImplBase
<R600TTIImpl
>;
205 using TTI
= TargetTransformInfo
;
209 const R600Subtarget
*ST
;
210 const AMDGPUTargetLowering
*TLI
;
211 AMDGPUTTIImpl CommonTTI
;
214 explicit R600TTIImpl(const AMDGPUTargetMachine
*TM
, const Function
&F
)
215 : BaseT(TM
, F
.getParent()->getDataLayout()),
216 ST(static_cast<const R600Subtarget
*>(TM
->getSubtargetImpl(F
))),
217 TLI(ST
->getTargetLowering()),
220 const R600Subtarget
*getST() const { return ST
; }
221 const AMDGPUTargetLowering
*getTLI() const { return TLI
; }
223 void getUnrollingPreferences(Loop
*L
, ScalarEvolution
&SE
,
224 TTI::UnrollingPreferences
&UP
);
225 unsigned getHardwareNumberOfRegisters(bool Vec
) const;
226 unsigned getNumberOfRegisters(bool Vec
) const;
227 unsigned getRegisterBitWidth(bool Vector
) const;
228 unsigned getMinVectorRegisterBitWidth() const;
229 unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace
) const;
230 bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes
, unsigned Alignment
,
231 unsigned AddrSpace
) const;
232 bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes
,
234 unsigned AddrSpace
) const;
235 bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes
,
237 unsigned AddrSpace
) const;
238 unsigned getMaxInterleaveFactor(unsigned VF
);
239 unsigned getCFInstrCost(unsigned Opcode
);
240 int getVectorInstrCost(unsigned Opcode
, Type
*ValTy
, unsigned Index
);
243 } // end namespace llvm
245 #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H