[PowerPC] Materialize more constants with CR-field set in late peephole
[llvm-core.git] / lib / Target / AMDGPU / AMDGPUTargetTransformInfo.h
blob8e63d789e17d7dc9213f0f886f067e5b9f08e37a
1 //===- AMDGPUTargetTransformInfo.h - AMDGPU specific TTI --------*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// This file a TargetTransformInfo::Concept conforming object specific to the
12 /// AMDGPU target machine. It uses the target's detailed information to
13 /// provide more precise answers to certain TTI queries, while letting the
14 /// target independent and default TTI implementations handle the rest.
16 //===----------------------------------------------------------------------===//
18 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
19 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
21 #include "AMDGPU.h"
22 #include "AMDGPUSubtarget.h"
23 #include "AMDGPUTargetMachine.h"
24 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
25 #include "Utils/AMDGPUBaseInfo.h"
26 #include "llvm/ADT/ArrayRef.h"
27 #include "llvm/Analysis/TargetTransformInfo.h"
28 #include "llvm/CodeGen/BasicTTIImpl.h"
29 #include "llvm/IR/Function.h"
30 #include "llvm/MC/SubtargetFeature.h"
31 #include "llvm/Support/MathExtras.h"
32 #include <cassert>
34 namespace llvm {
36 class AMDGPUTargetLowering;
37 class Loop;
38 class ScalarEvolution;
39 class Type;
40 class Value;
42 class AMDGPUTTIImpl final : public BasicTTIImplBase<AMDGPUTTIImpl> {
43 using BaseT = BasicTTIImplBase<AMDGPUTTIImpl>;
44 using TTI = TargetTransformInfo;
46 friend BaseT;
48 Triple TargetTriple;
50 public:
51 explicit AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
52 : BaseT(TM, F.getParent()->getDataLayout()),
53 TargetTriple(TM->getTargetTriple()) {}
55 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
56 TTI::UnrollingPreferences &UP);
59 class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
60 using BaseT = BasicTTIImplBase<GCNTTIImpl>;
61 using TTI = TargetTransformInfo;
63 friend BaseT;
65 const GCNSubtarget *ST;
66 const AMDGPUTargetLowering *TLI;
67 AMDGPUTTIImpl CommonTTI;
68 bool IsGraphicsShader;
70 const FeatureBitset InlineFeatureIgnoreList = {
71 // Codegen control options which don't matter.
72 AMDGPU::FeatureEnableLoadStoreOpt,
73 AMDGPU::FeatureEnableSIScheduler,
74 AMDGPU::FeatureEnableUnsafeDSOffsetFolding,
75 AMDGPU::FeatureFlatForGlobal,
76 AMDGPU::FeaturePromoteAlloca,
77 AMDGPU::FeatureUnalignedBufferAccess,
78 AMDGPU::FeatureUnalignedScratchAccess,
80 AMDGPU::FeatureAutoWaitcntBeforeBarrier,
81 AMDGPU::FeatureDebuggerEmitPrologue,
82 AMDGPU::FeatureDebuggerInsertNops,
84 // Property of the kernel/environment which can't actually differ.
85 AMDGPU::FeatureSGPRInitBug,
86 AMDGPU::FeatureXNACK,
87 AMDGPU::FeatureTrapHandler,
89 // Perf-tuning features
90 AMDGPU::FeatureFastFMAF32,
91 AMDGPU::HalfRate64Ops
94 const GCNSubtarget *getST() const { return ST; }
95 const AMDGPUTargetLowering *getTLI() const { return TLI; }
97 static inline int getFullRateInstrCost() {
98 return TargetTransformInfo::TCC_Basic;
101 static inline int getHalfRateInstrCost() {
102 return 2 * TargetTransformInfo::TCC_Basic;
105 // TODO: The size is usually 8 bytes, but takes 4x as many cycles. Maybe
106 // should be 2 or 4.
107 static inline int getQuarterRateInstrCost() {
108 return 3 * TargetTransformInfo::TCC_Basic;
111 // On some parts, normal fp64 operations are half rate, and others
112 // quarter. This also applies to some integer operations.
113 inline int get64BitInstrCost() const {
114 return ST->hasHalfRate64Ops() ?
115 getHalfRateInstrCost() : getQuarterRateInstrCost();
118 public:
119 explicit GCNTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
120 : BaseT(TM, F.getParent()->getDataLayout()),
121 ST(static_cast<const GCNSubtarget*>(TM->getSubtargetImpl(F))),
122 TLI(ST->getTargetLowering()),
123 CommonTTI(TM, F),
124 IsGraphicsShader(AMDGPU::isShader(F.getCallingConv())) {}
126 bool hasBranchDivergence() { return true; }
128 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
129 TTI::UnrollingPreferences &UP);
131 TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) {
132 assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
133 return TTI::PSK_FastHardware;
136 unsigned getHardwareNumberOfRegisters(bool Vector) const;
137 unsigned getNumberOfRegisters(bool Vector) const;
138 unsigned getRegisterBitWidth(bool Vector) const;
139 unsigned getMinVectorRegisterBitWidth() const;
140 unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
141 unsigned ChainSizeInBytes,
142 VectorType *VecTy) const;
143 unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
144 unsigned ChainSizeInBytes,
145 VectorType *VecTy) const;
146 unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
148 bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes,
149 unsigned Alignment,
150 unsigned AddrSpace) const;
151 bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
152 unsigned Alignment,
153 unsigned AddrSpace) const;
154 bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
155 unsigned Alignment,
156 unsigned AddrSpace) const;
158 unsigned getMaxInterleaveFactor(unsigned VF);
160 bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const;
162 int getArithmeticInstrCost(
163 unsigned Opcode, Type *Ty,
164 TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
165 TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
166 TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
167 TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
168 ArrayRef<const Value *> Args = ArrayRef<const Value *>());
170 unsigned getCFInstrCost(unsigned Opcode);
172 int getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index);
173 bool isSourceOfDivergence(const Value *V) const;
174 bool isAlwaysUniform(const Value *V) const;
176 unsigned getFlatAddressSpace() const {
177 // Don't bother running InferAddressSpaces pass on graphics shaders which
178 // don't use flat addressing.
179 if (IsGraphicsShader)
180 return -1;
181 return ST->hasFlatAddressSpace() ?
182 ST->getAMDGPUAS().FLAT_ADDRESS : ST->getAMDGPUAS().UNKNOWN_ADDRESS_SPACE;
185 unsigned getVectorSplitCost() { return 0; }
187 unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
188 Type *SubTp);
190 bool areInlineCompatible(const Function *Caller,
191 const Function *Callee) const;
193 unsigned getInliningThresholdMultiplier() { return 9; }
195 int getArithmeticReductionCost(unsigned Opcode,
196 Type *Ty,
197 bool IsPairwise);
198 int getMinMaxReductionCost(Type *Ty, Type *CondTy,
199 bool IsPairwiseForm,
200 bool IsUnsigned);
203 class R600TTIImpl final : public BasicTTIImplBase<R600TTIImpl> {
204 using BaseT = BasicTTIImplBase<R600TTIImpl>;
205 using TTI = TargetTransformInfo;
207 friend BaseT;
209 const R600Subtarget *ST;
210 const AMDGPUTargetLowering *TLI;
211 AMDGPUTTIImpl CommonTTI;
213 public:
214 explicit R600TTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
215 : BaseT(TM, F.getParent()->getDataLayout()),
216 ST(static_cast<const R600Subtarget*>(TM->getSubtargetImpl(F))),
217 TLI(ST->getTargetLowering()),
218 CommonTTI(TM, F) {}
220 const R600Subtarget *getST() const { return ST; }
221 const AMDGPUTargetLowering *getTLI() const { return TLI; }
223 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
224 TTI::UnrollingPreferences &UP);
225 unsigned getHardwareNumberOfRegisters(bool Vec) const;
226 unsigned getNumberOfRegisters(bool Vec) const;
227 unsigned getRegisterBitWidth(bool Vector) const;
228 unsigned getMinVectorRegisterBitWidth() const;
229 unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
230 bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, unsigned Alignment,
231 unsigned AddrSpace) const;
232 bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
233 unsigned Alignment,
234 unsigned AddrSpace) const;
235 bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
236 unsigned Alignment,
237 unsigned AddrSpace) const;
238 unsigned getMaxInterleaveFactor(unsigned VF);
239 unsigned getCFInstrCost(unsigned Opcode);
240 int getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index);
243 } // end namespace llvm
245 #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H