[Codegen] Alter the default promotion for saturating adds and subs
[llvm-complete.git] / lib / Target / AMDGPU / AMDGPUTargetTransformInfo.h
blobc7cd2f2c7bfd096b9968995913eaff0073265239
1 //===- AMDGPUTargetTransformInfo.h - AMDGPU specific TTI --------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This file a TargetTransformInfo::Concept conforming object specific to the
11 /// AMDGPU target machine. It uses the target's detailed information to
12 /// provide more precise answers to certain TTI queries, while letting the
13 /// target independent and default TTI implementations handle the rest.
15 //===----------------------------------------------------------------------===//
17 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
18 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
20 #include "AMDGPU.h"
21 #include "AMDGPUSubtarget.h"
22 #include "AMDGPUTargetMachine.h"
23 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
24 #include "Utils/AMDGPUBaseInfo.h"
25 #include "llvm/ADT/ArrayRef.h"
26 #include "llvm/Analysis/TargetTransformInfo.h"
27 #include "llvm/CodeGen/BasicTTIImpl.h"
28 #include "llvm/IR/Function.h"
29 #include "llvm/MC/SubtargetFeature.h"
30 #include "llvm/Support/MathExtras.h"
31 #include <cassert>
33 namespace llvm {
35 class AMDGPUTargetLowering;
36 class Loop;
37 class ScalarEvolution;
38 class Type;
39 class Value;
41 class AMDGPUTTIImpl final : public BasicTTIImplBase<AMDGPUTTIImpl> {
42 using BaseT = BasicTTIImplBase<AMDGPUTTIImpl>;
43 using TTI = TargetTransformInfo;
45 friend BaseT;
47 Triple TargetTriple;
49 const TargetSubtargetInfo *ST;
50 const TargetLoweringBase *TLI;
52 const TargetSubtargetInfo *getST() const { return ST; }
53 const TargetLoweringBase *getTLI() const { return TLI; }
55 public:
56 explicit AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
57 : BaseT(TM, F.getParent()->getDataLayout()),
58 TargetTriple(TM->getTargetTriple()),
59 ST(static_cast<const GCNSubtarget *>(TM->getSubtargetImpl(F))),
60 TLI(ST->getTargetLowering()) {}
62 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
63 TTI::UnrollingPreferences &UP);
66 class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
67 using BaseT = BasicTTIImplBase<GCNTTIImpl>;
68 using TTI = TargetTransformInfo;
70 friend BaseT;
72 const GCNSubtarget *ST;
73 const AMDGPUTargetLowering *TLI;
74 AMDGPUTTIImpl CommonTTI;
75 bool IsGraphicsShader;
77 const FeatureBitset InlineFeatureIgnoreList = {
78 // Codegen control options which don't matter.
79 AMDGPU::FeatureEnableLoadStoreOpt,
80 AMDGPU::FeatureEnableSIScheduler,
81 AMDGPU::FeatureEnableUnsafeDSOffsetFolding,
82 AMDGPU::FeatureFlatForGlobal,
83 AMDGPU::FeaturePromoteAlloca,
84 AMDGPU::FeatureUnalignedBufferAccess,
85 AMDGPU::FeatureUnalignedScratchAccess,
87 AMDGPU::FeatureAutoWaitcntBeforeBarrier,
89 // Property of the kernel/environment which can't actually differ.
90 AMDGPU::FeatureSGPRInitBug,
91 AMDGPU::FeatureXNACK,
92 AMDGPU::FeatureTrapHandler,
93 AMDGPU::FeatureCodeObjectV3,
95 // The default assumption needs to be ecc is enabled, but no directly
96 // exposed operations depend on it, so it can be safely inlined.
97 AMDGPU::FeatureSRAMECC,
99 // Perf-tuning features
100 AMDGPU::FeatureFastFMAF32,
101 AMDGPU::HalfRate64Ops
104 const GCNSubtarget *getST() const { return ST; }
105 const AMDGPUTargetLowering *getTLI() const { return TLI; }
107 static inline int getFullRateInstrCost() {
108 return TargetTransformInfo::TCC_Basic;
111 static inline int getHalfRateInstrCost() {
112 return 2 * TargetTransformInfo::TCC_Basic;
115 // TODO: The size is usually 8 bytes, but takes 4x as many cycles. Maybe
116 // should be 2 or 4.
117 static inline int getQuarterRateInstrCost() {
118 return 3 * TargetTransformInfo::TCC_Basic;
121 // On some parts, normal fp64 operations are half rate, and others
122 // quarter. This also applies to some integer operations.
123 inline int get64BitInstrCost() const {
124 return ST->hasHalfRate64Ops() ?
125 getHalfRateInstrCost() : getQuarterRateInstrCost();
128 public:
129 explicit GCNTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
130 : BaseT(TM, F.getParent()->getDataLayout()),
131 ST(static_cast<const GCNSubtarget*>(TM->getSubtargetImpl(F))),
132 TLI(ST->getTargetLowering()),
133 CommonTTI(TM, F),
134 IsGraphicsShader(AMDGPU::isShader(F.getCallingConv())) {}
136 bool hasBranchDivergence() { return true; }
138 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
139 TTI::UnrollingPreferences &UP);
141 TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) {
142 assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
143 return TTI::PSK_FastHardware;
146 unsigned getHardwareNumberOfRegisters(bool Vector) const;
147 unsigned getNumberOfRegisters(bool Vector) const;
148 unsigned getRegisterBitWidth(bool Vector) const;
149 unsigned getMinVectorRegisterBitWidth() const;
150 unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
151 unsigned ChainSizeInBytes,
152 VectorType *VecTy) const;
153 unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
154 unsigned ChainSizeInBytes,
155 VectorType *VecTy) const;
156 unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
158 bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes,
159 unsigned Alignment,
160 unsigned AddrSpace) const;
161 bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
162 unsigned Alignment,
163 unsigned AddrSpace) const;
164 bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
165 unsigned Alignment,
166 unsigned AddrSpace) const;
168 unsigned getMaxInterleaveFactor(unsigned VF);
170 bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const;
172 int getArithmeticInstrCost(
173 unsigned Opcode, Type *Ty,
174 TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
175 TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
176 TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
177 TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
178 ArrayRef<const Value *> Args = ArrayRef<const Value *>());
180 unsigned getCFInstrCost(unsigned Opcode);
182 int getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index);
183 bool isSourceOfDivergence(const Value *V) const;
184 bool isAlwaysUniform(const Value *V) const;
186 unsigned getFlatAddressSpace() const {
187 // Don't bother running InferAddressSpaces pass on graphics shaders which
188 // don't use flat addressing.
189 if (IsGraphicsShader)
190 return -1;
191 return AMDGPUAS::FLAT_ADDRESS;
194 bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
195 Intrinsic::ID IID) const;
196 bool rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
197 Value *OldV, Value *NewV) const;
199 unsigned getVectorSplitCost() { return 0; }
201 unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
202 Type *SubTp);
204 bool areInlineCompatible(const Function *Caller,
205 const Function *Callee) const;
207 unsigned getInliningThresholdMultiplier() { return 7; }
209 int getInlinerVectorBonusPercent() { return 0; }
211 int getArithmeticReductionCost(unsigned Opcode,
212 Type *Ty,
213 bool IsPairwise);
214 int getMinMaxReductionCost(Type *Ty, Type *CondTy,
215 bool IsPairwiseForm,
216 bool IsUnsigned);
219 class R600TTIImpl final : public BasicTTIImplBase<R600TTIImpl> {
220 using BaseT = BasicTTIImplBase<R600TTIImpl>;
221 using TTI = TargetTransformInfo;
223 friend BaseT;
225 const R600Subtarget *ST;
226 const AMDGPUTargetLowering *TLI;
227 AMDGPUTTIImpl CommonTTI;
229 public:
230 explicit R600TTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
231 : BaseT(TM, F.getParent()->getDataLayout()),
232 ST(static_cast<const R600Subtarget*>(TM->getSubtargetImpl(F))),
233 TLI(ST->getTargetLowering()),
234 CommonTTI(TM, F) {}
236 const R600Subtarget *getST() const { return ST; }
237 const AMDGPUTargetLowering *getTLI() const { return TLI; }
239 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
240 TTI::UnrollingPreferences &UP);
241 unsigned getHardwareNumberOfRegisters(bool Vec) const;
242 unsigned getNumberOfRegisters(bool Vec) const;
243 unsigned getRegisterBitWidth(bool Vector) const;
244 unsigned getMinVectorRegisterBitWidth() const;
245 unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
246 bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, unsigned Alignment,
247 unsigned AddrSpace) const;
248 bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
249 unsigned Alignment,
250 unsigned AddrSpace) const;
251 bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
252 unsigned Alignment,
253 unsigned AddrSpace) const;
254 unsigned getMaxInterleaveFactor(unsigned VF);
255 unsigned getCFInstrCost(unsigned Opcode);
256 int getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index);
259 } // end namespace llvm
261 #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H