llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h

   1 //===-- NVPTXTargetTransformInfo.h - NVPTX specific TTI ---------*- C++ -*-===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 /// \file
   9 /// This file a TargetTransformInfo::Concept conforming object specific to the
  10 /// NVPTX target machine. It uses the target's detailed information to
  11 /// provide more precise answers to certain TTI queries, while letting the
  12 /// target independent and default TTI implementations handle the rest.
  13 ///
  14 //===----------------------------------------------------------------------===//
  15
  16 #ifndef LLVM_LIB_TARGET_NVPTX_NVPTXTARGETTRANSFORMINFO_H
  17 #define LLVM_LIB_TARGET_NVPTX_NVPTXTARGETTRANSFORMINFO_H
  18
  19 #include "NVPTXTargetMachine.h"
  20 #include "MCTargetDesc/NVPTXBaseInfo.h"
  21 #include "llvm/Analysis/TargetTransformInfo.h"
  22 #include "llvm/CodeGen/BasicTTIImpl.h"
  23 #include "llvm/CodeGen/TargetLowering.h"
  24
  25 namespace llvm {
  26
  27 class NVPTXTTIImpl : public BasicTTIImplBase<NVPTXTTIImpl> {
  28   typedef BasicTTIImplBase<NVPTXTTIImpl> BaseT;
  29   typedef TargetTransformInfo TTI;
  30   friend BaseT;
  31
  32   const NVPTXSubtarget *ST;
  33   const NVPTXTargetLowering *TLI;
  34
  35   const NVPTXSubtarget *getST() const { return ST; };
  36   const NVPTXTargetLowering *getTLI() const { return TLI; };
  37
  38 public:
  39   explicit NVPTXTTIImpl(const NVPTXTargetMachine *TM, const Function &F)
  40       : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl()),
  41         TLI(ST->getTargetLowering()) {}
  42
  43   bool hasBranchDivergence() { return true; }
  44
  45   bool isSourceOfDivergence(const Value *V);
  46
  47   unsigned getFlatAddressSpace() const {
  48     return AddressSpace::ADDRESS_SPACE_GENERIC;
  49   }
  50
  51   Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
  52                                                IntrinsicInst &II) const;
  53
  54   // Loads and stores can be vectorized if the alignment is at least as big as
  55   // the load/store we want to vectorize.
  56   bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
  57                                    unsigned AddrSpace) const {
  58     return Alignment >= ChainSizeInBytes;
  59   }
  60   bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
  61                                     unsigned AddrSpace) const {
  62     return isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment, AddrSpace);
  63   }
  64
  65   // NVPTX has infinite registers of all kinds, but the actual machine doesn't.
  66   // We conservatively return 1 here which is just enough to enable the
  67   // vectorizers but disables heuristics based on the number of registers.
  68   // FIXME: Return a more reasonable number, while keeping an eye on
  69   // LoopVectorizer's unrolling heuristics.
  70   unsigned getNumberOfRegisters(bool Vector) const { return 1; }
  71
  72   // Only <2 x half> should be vectorized, so always return 32 for the vector
  73   // register size.
  74   TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
  75     return TypeSize::getFixed(32);
  76   }
  77   unsigned getMinVectorRegisterBitWidth() const { return 32; }
  78
  79   // We don't want to prevent inlining because of target-cpu and -features
  80   // attributes that were added to newer versions of LLVM/Clang: There are
  81   // no incompatible functions in PTX, ptxas will throw errors in such cases.
  82   bool areInlineCompatible(const Function *Caller,
  83                            const Function *Callee) const {
  84     return true;
  85   }
  86
  87   // Increase the inlining cost threshold by a factor of 5, reflecting that
  88   // calls are particularly expensive in NVPTX.
  89   unsigned getInliningThresholdMultiplier() { return 5; }
  90
  91   InstructionCost getArithmeticInstrCost(
  92       unsigned Opcode, Type *Ty,
  93       TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
  94       TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
  95       TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
  96       TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
  97       TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
  98       ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
  99       const Instruction *CxtI = nullptr);
 100
 101   void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
 102                                TTI::UnrollingPreferences &UP,
 103                                OptimizationRemarkEmitter *ORE);
 104
 105   void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
 106                              TTI::PeelingPreferences &PP);
 107
 108   bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) {
 109     // Volatile loads/stores are only supported for shared and global address
 110     // spaces, or for generic AS that maps to them.
 111     if (!(AddrSpace == llvm::ADDRESS_SPACE_GENERIC ||
 112           AddrSpace == llvm::ADDRESS_SPACE_GLOBAL ||
 113           AddrSpace == llvm::ADDRESS_SPACE_SHARED))
 114       return false;
 115
 116     switch(I->getOpcode()){
 117     default:
 118       return false;
 119     case Instruction::Load:
 120     case Instruction::Store:
 121       return true;
 122     }
 123   }
 124 };
 125
 126 } // end namespace llvm
 127
 128 #endif