lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h

   1 //===- AMDGPUTargetTransformInfo.h - AMDGPU specific TTI --------*- C++ -*-===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 /// \file
  11 /// This file a TargetTransformInfo::Concept conforming object specific to the
  12 /// AMDGPU target machine. It uses the target's detailed information to
  13 /// provide more precise answers to certain TTI queries, while letting the
  14 /// target independent and default TTI implementations handle the rest.
  15 //
  16 //===----------------------------------------------------------------------===//
  17
  18 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
  19 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
  20
  21 #include "AMDGPU.h"
  22 #include "AMDGPUSubtarget.h"
  23 #include "AMDGPUTargetMachine.h"
  24 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
  25 #include "Utils/AMDGPUBaseInfo.h"
  26 #include "llvm/ADT/ArrayRef.h"
  27 #include "llvm/Analysis/TargetTransformInfo.h"
  28 #include "llvm/CodeGen/BasicTTIImpl.h"
  29 #include "llvm/IR/Function.h"
  30 #include "llvm/MC/SubtargetFeature.h"
  31 #include "llvm/Support/MathExtras.h"
  32 #include <cassert>
  33
  34 namespace llvm {
  35
  36 class AMDGPUTargetLowering;
  37 class Loop;
  38 class ScalarEvolution;
  39 class Type;
  40 class Value;
  41
  42 class AMDGPUTTIImpl final : public BasicTTIImplBase<AMDGPUTTIImpl> {
  43   using BaseT = BasicTTIImplBase<AMDGPUTTIImpl>;
  44   using TTI = TargetTransformInfo;
  45
  46   friend BaseT;
  47
  48   Triple TargetTriple;
  49
  50 public:
  51   explicit AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
  52     : BaseT(TM, F.getParent()->getDataLayout()),
  53       TargetTriple(TM->getTargetTriple()) {}
  54
  55   void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
  56                                TTI::UnrollingPreferences &UP);
  57 };
  58
  59 class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
  60   using BaseT = BasicTTIImplBase<GCNTTIImpl>;
  61   using TTI = TargetTransformInfo;
  62
  63   friend BaseT;
  64
  65   const GCNSubtarget *ST;
  66   const AMDGPUTargetLowering *TLI;
  67   AMDGPUTTIImpl CommonTTI;
  68   bool IsGraphicsShader;
  69
  70   const FeatureBitset InlineFeatureIgnoreList = {
  71     // Codegen control options which don't matter.
  72     AMDGPU::FeatureEnableLoadStoreOpt,
  73     AMDGPU::FeatureEnableSIScheduler,
  74     AMDGPU::FeatureEnableUnsafeDSOffsetFolding,
  75     AMDGPU::FeatureFlatForGlobal,
  76     AMDGPU::FeaturePromoteAlloca,
  77     AMDGPU::FeatureUnalignedBufferAccess,
  78     AMDGPU::FeatureUnalignedScratchAccess,
  79
  80     AMDGPU::FeatureAutoWaitcntBeforeBarrier,
  81     AMDGPU::FeatureDebuggerEmitPrologue,
  82     AMDGPU::FeatureDebuggerInsertNops,
  83
  84     // Property of the kernel/environment which can't actually differ.
  85     AMDGPU::FeatureSGPRInitBug,
  86     AMDGPU::FeatureXNACK,
  87     AMDGPU::FeatureTrapHandler,
  88
  89     // Perf-tuning features
  90     AMDGPU::FeatureFastFMAF32,
  91     AMDGPU::HalfRate64Ops
  92   };
  93
  94   const GCNSubtarget *getST() const { return ST; }
  95   const AMDGPUTargetLowering *getTLI() const { return TLI; }
  96
  97   static inline int getFullRateInstrCost() {
  98     return TargetTransformInfo::TCC_Basic;
  99   }
 100
 101   static inline int getHalfRateInstrCost() {
 102     return 2 * TargetTransformInfo::TCC_Basic;
 103   }
 104
 105   // TODO: The size is usually 8 bytes, but takes 4x as many cycles. Maybe
 106   // should be 2 or 4.
 107   static inline int getQuarterRateInstrCost() {
 108     return 3 * TargetTransformInfo::TCC_Basic;
 109   }
 110
 111    // On some parts, normal fp64 operations are half rate, and others
 112    // quarter. This also applies to some integer operations.
 113   inline int get64BitInstrCost() const {
 114     return ST->hasHalfRate64Ops() ?
 115       getHalfRateInstrCost() : getQuarterRateInstrCost();
 116   }
 117
 118 public:
 119   explicit GCNTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
 120     : BaseT(TM, F.getParent()->getDataLayout()),
 121       ST(static_cast<const GCNSubtarget*>(TM->getSubtargetImpl(F))),
 122       TLI(ST->getTargetLowering()),
 123       CommonTTI(TM, F),
 124       IsGraphicsShader(AMDGPU::isShader(F.getCallingConv())) {}
 125
 126   bool hasBranchDivergence() { return true; }
 127
 128   void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
 129                                TTI::UnrollingPreferences &UP);
 130
 131   TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) {
 132     assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
 133     return TTI::PSK_FastHardware;
 134   }
 135
 136   unsigned getHardwareNumberOfRegisters(bool Vector) const;
 137   unsigned getNumberOfRegisters(bool Vector) const;
 138   unsigned getRegisterBitWidth(bool Vector) const;
 139   unsigned getMinVectorRegisterBitWidth() const;
 140   unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
 141                                unsigned ChainSizeInBytes,
 142                                VectorType *VecTy) const;
 143   unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
 144                                 unsigned ChainSizeInBytes,
 145                                 VectorType *VecTy) const;
 146   unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
 147
 148   bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes,
 149                                   unsigned Alignment,
 150                                   unsigned AddrSpace) const;
 151   bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
 152                                    unsigned Alignment,
 153                                    unsigned AddrSpace) const;
 154   bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
 155                                     unsigned Alignment,
 156                                     unsigned AddrSpace) const;
 157
 158   unsigned getMaxInterleaveFactor(unsigned VF);
 159
 160   bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const;
 161
 162   int getArithmeticInstrCost(
 163     unsigned Opcode, Type *Ty,
 164     TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
 165     TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
 166     TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
 167     TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
 168     ArrayRef<const Value *> Args = ArrayRef<const Value *>());
 169
 170   unsigned getCFInstrCost(unsigned Opcode);
 171
 172   int getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index);
 173   bool isSourceOfDivergence(const Value *V) const;
 174   bool isAlwaysUniform(const Value *V) const;
 175
 176   unsigned getFlatAddressSpace() const {
 177     // Don't bother running InferAddressSpaces pass on graphics shaders which
 178     // don't use flat addressing.
 179     if (IsGraphicsShader)
 180       return -1;
 181     return ST->hasFlatAddressSpace() ?
 182       ST->getAMDGPUAS().FLAT_ADDRESS : ST->getAMDGPUAS().UNKNOWN_ADDRESS_SPACE;
 183   }
 184
 185   unsigned getVectorSplitCost() { return 0; }
 186
 187   unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
 188                           Type *SubTp);
 189
 190   bool areInlineCompatible(const Function *Caller,
 191                            const Function *Callee) const;
 192
 193   unsigned getInliningThresholdMultiplier() { return 9; }
 194
 195   int getArithmeticReductionCost(unsigned Opcode,
 196                                  Type *Ty,
 197                                  bool IsPairwise);
 198   int getMinMaxReductionCost(Type *Ty, Type *CondTy,
 199                              bool IsPairwiseForm,
 200                              bool IsUnsigned);
 201 };
 202
 203 class R600TTIImpl final : public BasicTTIImplBase<R600TTIImpl> {
 204   using BaseT = BasicTTIImplBase<R600TTIImpl>;
 205   using TTI = TargetTransformInfo;
 206
 207   friend BaseT;
 208
 209   const R600Subtarget *ST;
 210   const AMDGPUTargetLowering *TLI;
 211   AMDGPUTTIImpl CommonTTI;
 212
 213 public:
 214   explicit R600TTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
 215     : BaseT(TM, F.getParent()->getDataLayout()),
 216       ST(static_cast<const R600Subtarget*>(TM->getSubtargetImpl(F))),
 217       TLI(ST->getTargetLowering()),
 218       CommonTTI(TM, F)  {}
 219
 220   const R600Subtarget *getST() const { return ST; }
 221   const AMDGPUTargetLowering *getTLI() const { return TLI; }
 222
 223   void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
 224                                TTI::UnrollingPreferences &UP);
 225   unsigned getHardwareNumberOfRegisters(bool Vec) const;
 226   unsigned getNumberOfRegisters(bool Vec) const;
 227   unsigned getRegisterBitWidth(bool Vector) const;
 228   unsigned getMinVectorRegisterBitWidth() const;
 229   unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
 230   bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, unsigned Alignment,
 231                                   unsigned AddrSpace) const;
 232   bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
 233                                    unsigned Alignment,
 234                                    unsigned AddrSpace) const;
 235   bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
 236                                     unsigned Alignment,
 237                                     unsigned AddrSpace) const;
 238   unsigned getMaxInterleaveFactor(unsigned VF);
 239   unsigned getCFInstrCost(unsigned Opcode);
 240   int getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index);
 241 };
 242
 243 } // end namespace llvm
 244
 245 #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H