lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h

   1 //===- AMDGPUTargetTransformInfo.h - AMDGPU specific TTI --------*- C++ -*-===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 /// \file
  10 /// This file a TargetTransformInfo::Concept conforming object specific to the
  11 /// AMDGPU target machine. It uses the target's detailed information to
  12 /// provide more precise answers to certain TTI queries, while letting the
  13 /// target independent and default TTI implementations handle the rest.
  14 //
  15 //===----------------------------------------------------------------------===//
  16
  17 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
  18 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
  19
  20 #include "AMDGPU.h"
  21 #include "AMDGPUSubtarget.h"
  22 #include "AMDGPUTargetMachine.h"
  23 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
  24 #include "Utils/AMDGPUBaseInfo.h"
  25 #include "llvm/ADT/ArrayRef.h"
  26 #include "llvm/Analysis/TargetTransformInfo.h"
  27 #include "llvm/CodeGen/BasicTTIImpl.h"
  28 #include "llvm/IR/Function.h"
  29 #include "llvm/MC/SubtargetFeature.h"
  30 #include "llvm/Support/MathExtras.h"
  31 #include <cassert>
  32
  33 namespace llvm {
  34
  35 class AMDGPUTargetLowering;
  36 class Loop;
  37 class ScalarEvolution;
  38 class Type;
  39 class Value;
  40
  41 class AMDGPUTTIImpl final : public BasicTTIImplBase<AMDGPUTTIImpl> {
  42   using BaseT = BasicTTIImplBase<AMDGPUTTIImpl>;
  43   using TTI = TargetTransformInfo;
  44
  45   friend BaseT;
  46
  47   Triple TargetTriple;
  48
  49 public:
  50   explicit AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
  51     : BaseT(TM, F.getParent()->getDataLayout()),
  52       TargetTriple(TM->getTargetTriple()) {}
  53
  54   void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
  55                                TTI::UnrollingPreferences &UP);
  56 };
  57
  58 class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
  59   using BaseT = BasicTTIImplBase<GCNTTIImpl>;
  60   using TTI = TargetTransformInfo;
  61
  62   friend BaseT;
  63
  64   const GCNSubtarget *ST;
  65   const AMDGPUTargetLowering *TLI;
  66   AMDGPUTTIImpl CommonTTI;
  67   bool IsGraphicsShader;
  68
  69   const FeatureBitset InlineFeatureIgnoreList = {
  70     // Codegen control options which don't matter.
  71     AMDGPU::FeatureEnableLoadStoreOpt,
  72     AMDGPU::FeatureEnableSIScheduler,
  73     AMDGPU::FeatureEnableUnsafeDSOffsetFolding,
  74     AMDGPU::FeatureFlatForGlobal,
  75     AMDGPU::FeaturePromoteAlloca,
  76     AMDGPU::FeatureUnalignedBufferAccess,
  77     AMDGPU::FeatureUnalignedScratchAccess,
  78
  79     AMDGPU::FeatureAutoWaitcntBeforeBarrier,
  80
  81     // Property of the kernel/environment which can't actually differ.
  82     AMDGPU::FeatureSGPRInitBug,
  83     AMDGPU::FeatureXNACK,
  84     AMDGPU::FeatureTrapHandler,
  85     AMDGPU::FeatureCodeObjectV3,
  86
  87     // The default assumption needs to be ecc is enabled, but no directly
  88     // exposed operations depend on it, so it can be safely inlined.
  89     AMDGPU::FeatureSRAMECC,
  90
  91     // Perf-tuning features
  92     AMDGPU::FeatureFastFMAF32,
  93     AMDGPU::HalfRate64Ops
  94   };
  95
  96   const GCNSubtarget *getST() const { return ST; }
  97   const AMDGPUTargetLowering *getTLI() const { return TLI; }
  98
  99   static inline int getFullRateInstrCost() {
 100     return TargetTransformInfo::TCC_Basic;
 101   }
 102
 103   static inline int getHalfRateInstrCost() {
 104     return 2 * TargetTransformInfo::TCC_Basic;
 105   }
 106
 107   // TODO: The size is usually 8 bytes, but takes 4x as many cycles. Maybe
 108   // should be 2 or 4.
 109   static inline int getQuarterRateInstrCost() {
 110     return 3 * TargetTransformInfo::TCC_Basic;
 111   }
 112
 113    // On some parts, normal fp64 operations are half rate, and others
 114    // quarter. This also applies to some integer operations.
 115   inline int get64BitInstrCost() const {
 116     return ST->hasHalfRate64Ops() ?
 117       getHalfRateInstrCost() : getQuarterRateInstrCost();
 118   }
 119
 120 public:
 121   explicit GCNTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
 122     : BaseT(TM, F.getParent()->getDataLayout()),
 123       ST(static_cast<const GCNSubtarget*>(TM->getSubtargetImpl(F))),
 124       TLI(ST->getTargetLowering()),
 125       CommonTTI(TM, F),
 126       IsGraphicsShader(AMDGPU::isShader(F.getCallingConv())) {}
 127
 128   bool hasBranchDivergence() { return true; }
 129
 130   void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
 131                                TTI::UnrollingPreferences &UP);
 132
 133   TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) {
 134     assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
 135     return TTI::PSK_FastHardware;
 136   }
 137
 138   unsigned getHardwareNumberOfRegisters(bool Vector) const;
 139   unsigned getNumberOfRegisters(bool Vector) const;
 140   unsigned getRegisterBitWidth(bool Vector) const;
 141   unsigned getMinVectorRegisterBitWidth() const;
 142   unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
 143                                unsigned ChainSizeInBytes,
 144                                VectorType *VecTy) const;
 145   unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
 146                                 unsigned ChainSizeInBytes,
 147                                 VectorType *VecTy) const;
 148   unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
 149
 150   bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes,
 151                                   unsigned Alignment,
 152                                   unsigned AddrSpace) const;
 153   bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
 154                                    unsigned Alignment,
 155                                    unsigned AddrSpace) const;
 156   bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
 157                                     unsigned Alignment,
 158                                     unsigned AddrSpace) const;
 159
 160   unsigned getMaxInterleaveFactor(unsigned VF);
 161
 162   bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const;
 163
 164   int getArithmeticInstrCost(
 165     unsigned Opcode, Type *Ty,
 166     TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
 167     TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
 168     TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
 169     TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
 170     ArrayRef<const Value *> Args = ArrayRef<const Value *>());
 171
 172   unsigned getCFInstrCost(unsigned Opcode);
 173
 174   int getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index);
 175   bool isSourceOfDivergence(const Value *V) const;
 176   bool isAlwaysUniform(const Value *V) const;
 177
 178   unsigned getFlatAddressSpace() const {
 179     // Don't bother running InferAddressSpaces pass on graphics shaders which
 180     // don't use flat addressing.
 181     if (IsGraphicsShader)
 182       return -1;
 183     return AMDGPUAS::FLAT_ADDRESS;
 184   }
 185
 186   bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
 187                                   Intrinsic::ID IID) const;
 188   bool rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
 189                                         Value *OldV, Value *NewV) const;
 190
 191   unsigned getVectorSplitCost() { return 0; }
 192
 193   unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
 194                           Type *SubTp);
 195
 196   bool areInlineCompatible(const Function *Caller,
 197                            const Function *Callee) const;
 198
 199   unsigned getInliningThresholdMultiplier() { return 7; }
 200
 201   int getInlinerVectorBonusPercent() { return 0; }
 202
 203   int getArithmeticReductionCost(unsigned Opcode,
 204                                  Type *Ty,
 205                                  bool IsPairwise);
 206   int getMinMaxReductionCost(Type *Ty, Type *CondTy,
 207                              bool IsPairwiseForm,
 208                              bool IsUnsigned);
 209 };
 210
 211 class R600TTIImpl final : public BasicTTIImplBase<R600TTIImpl> {
 212   using BaseT = BasicTTIImplBase<R600TTIImpl>;
 213   using TTI = TargetTransformInfo;
 214
 215   friend BaseT;
 216
 217   const R600Subtarget *ST;
 218   const AMDGPUTargetLowering *TLI;
 219   AMDGPUTTIImpl CommonTTI;
 220
 221 public:
 222   explicit R600TTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
 223     : BaseT(TM, F.getParent()->getDataLayout()),
 224       ST(static_cast<const R600Subtarget*>(TM->getSubtargetImpl(F))),
 225       TLI(ST->getTargetLowering()),
 226       CommonTTI(TM, F)  {}
 227
 228   const R600Subtarget *getST() const { return ST; }
 229   const AMDGPUTargetLowering *getTLI() const { return TLI; }
 230
 231   void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
 232                                TTI::UnrollingPreferences &UP);
 233   unsigned getHardwareNumberOfRegisters(bool Vec) const;
 234   unsigned getNumberOfRegisters(bool Vec) const;
 235   unsigned getRegisterBitWidth(bool Vector) const;
 236   unsigned getMinVectorRegisterBitWidth() const;
 237   unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
 238   bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, unsigned Alignment,
 239                                   unsigned AddrSpace) const;
 240   bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
 241                                    unsigned Alignment,
 242                                    unsigned AddrSpace) const;
 243   bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
 244                                     unsigned Alignment,
 245                                     unsigned AddrSpace) const;
 246   unsigned getMaxInterleaveFactor(unsigned VF);
 247   unsigned getCFInstrCost(unsigned Opcode);
 248   int getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index);
 249 };
 250
 251 } // end namespace llvm
 252
 253 #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H