[x86] fix assert with horizontal math + broadcast of vector (PR43402)
[llvm-core.git] / lib / Target / AMDGPU / AMDGPUTargetTransformInfo.h
blob56a46bcdaa44eb380ff355f29c07495f48c90153
1 //===- AMDGPUTargetTransformInfo.h - AMDGPU specific TTI --------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This file a TargetTransformInfo::Concept conforming object specific to the
11 /// AMDGPU target machine. It uses the target's detailed information to
12 /// provide more precise answers to certain TTI queries, while letting the
13 /// target independent and default TTI implementations handle the rest.
15 //===----------------------------------------------------------------------===//
17 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
18 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
20 #include "AMDGPU.h"
21 #include "AMDGPUSubtarget.h"
22 #include "AMDGPUTargetMachine.h"
23 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
24 #include "Utils/AMDGPUBaseInfo.h"
25 #include "llvm/ADT/ArrayRef.h"
26 #include "llvm/Analysis/TargetTransformInfo.h"
27 #include "llvm/CodeGen/BasicTTIImpl.h"
28 #include "llvm/IR/Function.h"
29 #include "llvm/MC/SubtargetFeature.h"
30 #include "llvm/Support/MathExtras.h"
31 #include <cassert>
33 namespace llvm {
35 class AMDGPUTargetLowering;
36 class Loop;
37 class ScalarEvolution;
38 class Type;
39 class Value;
41 class AMDGPUTTIImpl final : public BasicTTIImplBase<AMDGPUTTIImpl> {
42 using BaseT = BasicTTIImplBase<AMDGPUTTIImpl>;
43 using TTI = TargetTransformInfo;
45 friend BaseT;
47 Triple TargetTriple;
49 public:
50 explicit AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
51 : BaseT(TM, F.getParent()->getDataLayout()),
52 TargetTriple(TM->getTargetTriple()) {}
54 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
55 TTI::UnrollingPreferences &UP);
58 class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
59 using BaseT = BasicTTIImplBase<GCNTTIImpl>;
60 using TTI = TargetTransformInfo;
62 friend BaseT;
64 const GCNSubtarget *ST;
65 const AMDGPUTargetLowering *TLI;
66 AMDGPUTTIImpl CommonTTI;
67 bool IsGraphicsShader;
69 const FeatureBitset InlineFeatureIgnoreList = {
70 // Codegen control options which don't matter.
71 AMDGPU::FeatureEnableLoadStoreOpt,
72 AMDGPU::FeatureEnableSIScheduler,
73 AMDGPU::FeatureEnableUnsafeDSOffsetFolding,
74 AMDGPU::FeatureFlatForGlobal,
75 AMDGPU::FeaturePromoteAlloca,
76 AMDGPU::FeatureUnalignedBufferAccess,
77 AMDGPU::FeatureUnalignedScratchAccess,
79 AMDGPU::FeatureAutoWaitcntBeforeBarrier,
81 // Property of the kernel/environment which can't actually differ.
82 AMDGPU::FeatureSGPRInitBug,
83 AMDGPU::FeatureXNACK,
84 AMDGPU::FeatureTrapHandler,
85 AMDGPU::FeatureCodeObjectV3,
87 // The default assumption needs to be ecc is enabled, but no directly
88 // exposed operations depend on it, so it can be safely inlined.
89 AMDGPU::FeatureSRAMECC,
91 // Perf-tuning features
92 AMDGPU::FeatureFastFMAF32,
93 AMDGPU::HalfRate64Ops
96 const GCNSubtarget *getST() const { return ST; }
97 const AMDGPUTargetLowering *getTLI() const { return TLI; }
99 static inline int getFullRateInstrCost() {
100 return TargetTransformInfo::TCC_Basic;
103 static inline int getHalfRateInstrCost() {
104 return 2 * TargetTransformInfo::TCC_Basic;
107 // TODO: The size is usually 8 bytes, but takes 4x as many cycles. Maybe
108 // should be 2 or 4.
109 static inline int getQuarterRateInstrCost() {
110 return 3 * TargetTransformInfo::TCC_Basic;
113 // On some parts, normal fp64 operations are half rate, and others
114 // quarter. This also applies to some integer operations.
115 inline int get64BitInstrCost() const {
116 return ST->hasHalfRate64Ops() ?
117 getHalfRateInstrCost() : getQuarterRateInstrCost();
120 public:
121 explicit GCNTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
122 : BaseT(TM, F.getParent()->getDataLayout()),
123 ST(static_cast<const GCNSubtarget*>(TM->getSubtargetImpl(F))),
124 TLI(ST->getTargetLowering()),
125 CommonTTI(TM, F),
126 IsGraphicsShader(AMDGPU::isShader(F.getCallingConv())) {}
128 bool hasBranchDivergence() { return true; }
130 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
131 TTI::UnrollingPreferences &UP);
133 TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) {
134 assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
135 return TTI::PSK_FastHardware;
138 unsigned getHardwareNumberOfRegisters(bool Vector) const;
139 unsigned getNumberOfRegisters(bool Vector) const;
140 unsigned getRegisterBitWidth(bool Vector) const;
141 unsigned getMinVectorRegisterBitWidth() const;
142 unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
143 unsigned ChainSizeInBytes,
144 VectorType *VecTy) const;
145 unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
146 unsigned ChainSizeInBytes,
147 VectorType *VecTy) const;
148 unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
150 bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes,
151 unsigned Alignment,
152 unsigned AddrSpace) const;
153 bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
154 unsigned Alignment,
155 unsigned AddrSpace) const;
156 bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
157 unsigned Alignment,
158 unsigned AddrSpace) const;
160 unsigned getMaxInterleaveFactor(unsigned VF);
162 bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const;
164 int getArithmeticInstrCost(
165 unsigned Opcode, Type *Ty,
166 TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
167 TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
168 TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
169 TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
170 ArrayRef<const Value *> Args = ArrayRef<const Value *>());
172 unsigned getCFInstrCost(unsigned Opcode);
174 int getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index);
175 bool isSourceOfDivergence(const Value *V) const;
176 bool isAlwaysUniform(const Value *V) const;
178 unsigned getFlatAddressSpace() const {
179 // Don't bother running InferAddressSpaces pass on graphics shaders which
180 // don't use flat addressing.
181 if (IsGraphicsShader)
182 return -1;
183 return AMDGPUAS::FLAT_ADDRESS;
186 bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
187 Intrinsic::ID IID) const;
188 bool rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
189 Value *OldV, Value *NewV) const;
191 unsigned getVectorSplitCost() { return 0; }
193 unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
194 Type *SubTp);
196 bool areInlineCompatible(const Function *Caller,
197 const Function *Callee) const;
199 unsigned getInliningThresholdMultiplier() { return 7; }
201 int getInlinerVectorBonusPercent() { return 0; }
203 int getArithmeticReductionCost(unsigned Opcode,
204 Type *Ty,
205 bool IsPairwise);
206 int getMinMaxReductionCost(Type *Ty, Type *CondTy,
207 bool IsPairwiseForm,
208 bool IsUnsigned);
211 class R600TTIImpl final : public BasicTTIImplBase<R600TTIImpl> {
212 using BaseT = BasicTTIImplBase<R600TTIImpl>;
213 using TTI = TargetTransformInfo;
215 friend BaseT;
217 const R600Subtarget *ST;
218 const AMDGPUTargetLowering *TLI;
219 AMDGPUTTIImpl CommonTTI;
221 public:
222 explicit R600TTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
223 : BaseT(TM, F.getParent()->getDataLayout()),
224 ST(static_cast<const R600Subtarget*>(TM->getSubtargetImpl(F))),
225 TLI(ST->getTargetLowering()),
226 CommonTTI(TM, F) {}
228 const R600Subtarget *getST() const { return ST; }
229 const AMDGPUTargetLowering *getTLI() const { return TLI; }
231 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
232 TTI::UnrollingPreferences &UP);
233 unsigned getHardwareNumberOfRegisters(bool Vec) const;
234 unsigned getNumberOfRegisters(bool Vec) const;
235 unsigned getRegisterBitWidth(bool Vector) const;
236 unsigned getMinVectorRegisterBitWidth() const;
237 unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
238 bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, unsigned Alignment,
239 unsigned AddrSpace) const;
240 bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
241 unsigned Alignment,
242 unsigned AddrSpace) const;
243 bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
244 unsigned Alignment,
245 unsigned AddrSpace) const;
246 unsigned getMaxInterleaveFactor(unsigned VF);
247 unsigned getCFInstrCost(unsigned Opcode);
248 int getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index);
251 } // end namespace llvm
253 #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H