llvm/lib/Target/NVPTX/NVPTXSubtarget.h

   1 //=====-- NVPTXSubtarget.h - Define Subtarget for the NVPTX ---*- C++ -*--====//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This file declares the NVPTX specific subclass of TargetSubtarget.
  10 //
  11 //===----------------------------------------------------------------------===//
  12
  13 #ifndef LLVM_LIB_TARGET_NVPTX_NVPTXSUBTARGET_H
  14 #define LLVM_LIB_TARGET_NVPTX_NVPTXSUBTARGET_H
  15
  16 #include "NVPTX.h"
  17 #include "NVPTXFrameLowering.h"
  18 #include "NVPTXISelLowering.h"
  19 #include "NVPTXInstrInfo.h"
  20 #include "NVPTXRegisterInfo.h"
  21 #include "llvm/CodeGen/SelectionDAGTargetInfo.h"
  22 #include "llvm/CodeGen/TargetSubtargetInfo.h"
  23 #include "llvm/IR/DataLayout.h"
  24 #include <string>
  25
  26 #define GET_SUBTARGETINFO_HEADER
  27 #include "NVPTXGenSubtargetInfo.inc"
  28
  29 namespace llvm {
  30
  31 class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
  32   virtual void anchor();
  33   std::string TargetName;
  34
  35   // PTX version x.y is represented as 10*x+y, e.g. 3.1 == 31
  36   unsigned PTXVersion;
  37
  38   // Full SM version x.y is represented as 100*x+10*y+feature, e.g. 3.1 == 310
  39   // sm_90a == 901
  40   unsigned int FullSmVersion;
  41
  42   // SM version x.y is represented as 10*x+y, e.g. 3.1 == 31. Derived from
  43   // FullSmVersion.
  44   unsigned int SmVersion;
  45
  46   const NVPTXTargetMachine &TM;
  47   NVPTXInstrInfo InstrInfo;
  48   NVPTXTargetLowering TLInfo;
  49   SelectionDAGTargetInfo TSInfo;
  50
  51   // NVPTX does not have any call stack frame, but need a NVPTX specific
  52   // FrameLowering class because TargetFrameLowering is abstract.
  53   NVPTXFrameLowering FrameLowering;
  54
  55 public:
  56   /// This constructor initializes the data members to match that
  57   /// of the specified module.
  58   ///
  59   NVPTXSubtarget(const Triple &TT, const std::string &CPU,
  60                  const std::string &FS, const NVPTXTargetMachine &TM);
  61
  62   const TargetFrameLowering *getFrameLowering() const override {
  63     return &FrameLowering;
  64   }
  65   const NVPTXInstrInfo *getInstrInfo() const override { return &InstrInfo; }
  66   const NVPTXRegisterInfo *getRegisterInfo() const override {
  67     return &InstrInfo.getRegisterInfo();
  68   }
  69   const NVPTXTargetLowering *getTargetLowering() const override {
  70     return &TLInfo;
  71   }
  72   const SelectionDAGTargetInfo *getSelectionDAGInfo() const override {
  73     return &TSInfo;
  74   }
  75
  76   bool hasAtomAddF64() const { return SmVersion >= 60; }
  77   bool hasAtomScope() const { return SmVersion >= 60; }
  78   bool hasAtomBitwise64() const { return SmVersion >= 32; }
  79   bool hasAtomMinMax64() const { return SmVersion >= 32; }
  80   bool hasAtomCas16() const { return SmVersion >= 70 && PTXVersion >= 63; }
  81   bool hasClusters() const { return SmVersion >= 90 && PTXVersion >= 78; }
  82   bool hasLDG() const { return SmVersion >= 32; }
  83   bool hasHWROT32() const { return SmVersion >= 32; }
  84   bool hasImageHandles() const;
  85   bool hasFP16Math() const { return SmVersion >= 53; }
  86   bool hasBF16Math() const { return SmVersion >= 80; }
  87   bool allowFP16Math() const;
  88   bool hasMaskOperator() const { return PTXVersion >= 71; }
  89   bool hasNoReturn() const { return SmVersion >= 30 && PTXVersion >= 64; }
  90   // Does SM & PTX support memory orderings (weak and atomic: relaxed, acquire,
  91   // release, acq_rel, sc) ?
  92   bool hasMemoryOrdering() const { return SmVersion >= 70 && PTXVersion >= 60; }
  93   // Does SM & PTX support atomic relaxed MMIO operations ?
  94   bool hasRelaxedMMIO() const { return SmVersion >= 70 && PTXVersion >= 82; }
  95   bool hasDotInstructions() const {
  96     return SmVersion >= 61 && PTXVersion >= 50;
  97   }
  98   // Prior to CUDA 12.3 ptxas did not recognize that the trap instruction
  99   // terminates a basic block. Instead, it would assume that control flow
 100   // continued to the next instruction. The next instruction could be in the
 101   // block that's lexically below it. This would lead to a phantom CFG edges
 102   // being created within ptxas. This issue was fixed in CUDA 12.3. Thus, when
 103   // PTX ISA versions 8.3+ we can confidently say that the bug will not be
 104   // present.
 105   bool hasPTXASUnreachableBug() const { return PTXVersion < 83; }
 106   bool hasCvtaParam() const { return SmVersion >= 70 && PTXVersion >= 77; }
 107   unsigned int getFullSmVersion() const { return FullSmVersion; }
 108   unsigned int getSmVersion() const { return getFullSmVersion() / 10; }
 109   // GPUs with "a" suffix have include architecture-accelerated features that
 110   // are supported on the specified architecture only, hence such targets do not
 111   // follow the onion layer model. hasAAFeatures() allows distinguishing such
 112   // GPU variants from the base GPU architecture.
 113   // - 0 represents base GPU model,
 114   // - non-zero value identifies particular architecture-accelerated variant.
 115   bool hasAAFeatures() const { return getFullSmVersion() % 10; }
 116   std::string getTargetName() const { return TargetName; }
 117
 118   // Get maximum value of required alignments among the supported data types.
 119   // From the PTX ISA doc, section 8.2.3:
 120   //  The memory consistency model relates operations executed on memory
 121   //  locations with scalar data-types, which have a maximum size and alignment
 122   //  of 64 bits. Memory operations with a vector data-type are modelled as a
 123   //  set of equivalent memory operations with a scalar data-type, executed in
 124   //  an unspecified order on the elements in the vector.
 125   unsigned getMaxRequiredAlignment() const { return 8; }
 126
 127   unsigned getPTXVersion() const { return PTXVersion; }
 128
 129   NVPTXSubtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS);
 130   void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);
 131
 132   void failIfClustersUnsupported(std::string const &FailureMessage) const;
 133 };
 134
 135 } // End llvm namespace
 136
 137 #endif