1 //=====-- NVPTXSubtarget.h - Define Subtarget for the NVPTX ---*- C++ -*--====//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file declares the NVPTX specific subclass of TargetSubtarget.
11 //===----------------------------------------------------------------------===//
13 #ifndef LLVM_LIB_TARGET_NVPTX_NVPTXSUBTARGET_H
14 #define LLVM_LIB_TARGET_NVPTX_NVPTXSUBTARGET_H
17 #include "NVPTXFrameLowering.h"
18 #include "NVPTXISelLowering.h"
19 #include "NVPTXInstrInfo.h"
20 #include "NVPTXRegisterInfo.h"
21 #include "llvm/CodeGen/SelectionDAGTargetInfo.h"
22 #include "llvm/CodeGen/TargetSubtargetInfo.h"
23 #include "llvm/IR/DataLayout.h"
26 #define GET_SUBTARGETINFO_HEADER
27 #include "NVPTXGenSubtargetInfo.inc"
31 class NVPTXSubtarget
: public NVPTXGenSubtargetInfo
{
32 virtual void anchor();
33 std::string TargetName
;
35 // PTX version x.y is represented as 10*x+y, e.g. 3.1 == 31
38 // Full SM version x.y is represented as 100*x+10*y+feature, e.g. 3.1 == 310
40 unsigned int FullSmVersion
;
42 // SM version x.y is represented as 10*x+y, e.g. 3.1 == 31. Derived from
44 unsigned int SmVersion
;
46 const NVPTXTargetMachine
&TM
;
47 NVPTXInstrInfo InstrInfo
;
48 NVPTXTargetLowering TLInfo
;
49 SelectionDAGTargetInfo TSInfo
;
51 // NVPTX does not have any call stack frame, but need a NVPTX specific
52 // FrameLowering class because TargetFrameLowering is abstract.
53 NVPTXFrameLowering FrameLowering
;
56 /// This constructor initializes the data members to match that
57 /// of the specified module.
59 NVPTXSubtarget(const Triple
&TT
, const std::string
&CPU
,
60 const std::string
&FS
, const NVPTXTargetMachine
&TM
);
62 const TargetFrameLowering
*getFrameLowering() const override
{
63 return &FrameLowering
;
65 const NVPTXInstrInfo
*getInstrInfo() const override
{ return &InstrInfo
; }
66 const NVPTXRegisterInfo
*getRegisterInfo() const override
{
67 return &InstrInfo
.getRegisterInfo();
69 const NVPTXTargetLowering
*getTargetLowering() const override
{
72 const SelectionDAGTargetInfo
*getSelectionDAGInfo() const override
{
76 bool hasAtomAddF64() const { return SmVersion
>= 60; }
77 bool hasAtomScope() const { return SmVersion
>= 60; }
78 bool hasAtomBitwise64() const { return SmVersion
>= 32; }
79 bool hasAtomMinMax64() const { return SmVersion
>= 32; }
80 bool hasAtomCas16() const { return SmVersion
>= 70 && PTXVersion
>= 63; }
81 bool hasClusters() const { return SmVersion
>= 90 && PTXVersion
>= 78; }
82 bool hasLDG() const { return SmVersion
>= 32; }
83 bool hasHWROT32() const { return SmVersion
>= 32; }
84 bool hasImageHandles() const;
85 bool hasFP16Math() const { return SmVersion
>= 53; }
86 bool hasBF16Math() const { return SmVersion
>= 80; }
87 bool allowFP16Math() const;
88 bool hasMaskOperator() const { return PTXVersion
>= 71; }
89 bool hasNoReturn() const { return SmVersion
>= 30 && PTXVersion
>= 64; }
90 // Does SM & PTX support memory orderings (weak and atomic: relaxed, acquire,
91 // release, acq_rel, sc) ?
92 bool hasMemoryOrdering() const { return SmVersion
>= 70 && PTXVersion
>= 60; }
93 // Does SM & PTX support atomic relaxed MMIO operations ?
94 bool hasRelaxedMMIO() const { return SmVersion
>= 70 && PTXVersion
>= 82; }
95 bool hasDotInstructions() const {
96 return SmVersion
>= 61 && PTXVersion
>= 50;
98 // Prior to CUDA 12.3 ptxas did not recognize that the trap instruction
99 // terminates a basic block. Instead, it would assume that control flow
100 // continued to the next instruction. The next instruction could be in the
101 // block that's lexically below it. This would lead to a phantom CFG edges
102 // being created within ptxas. This issue was fixed in CUDA 12.3. Thus, when
103 // PTX ISA versions 8.3+ we can confidently say that the bug will not be
105 bool hasPTXASUnreachableBug() const { return PTXVersion
< 83; }
106 bool hasCvtaParam() const { return SmVersion
>= 70 && PTXVersion
>= 77; }
107 unsigned int getFullSmVersion() const { return FullSmVersion
; }
108 unsigned int getSmVersion() const { return getFullSmVersion() / 10; }
109 // GPUs with "a" suffix have include architecture-accelerated features that
110 // are supported on the specified architecture only, hence such targets do not
111 // follow the onion layer model. hasAAFeatures() allows distinguishing such
112 // GPU variants from the base GPU architecture.
113 // - 0 represents base GPU model,
114 // - non-zero value identifies particular architecture-accelerated variant.
115 bool hasAAFeatures() const { return getFullSmVersion() % 10; }
116 std::string
getTargetName() const { return TargetName
; }
118 // Get maximum value of required alignments among the supported data types.
119 // From the PTX ISA doc, section 8.2.3:
120 // The memory consistency model relates operations executed on memory
121 // locations with scalar data-types, which have a maximum size and alignment
122 // of 64 bits. Memory operations with a vector data-type are modelled as a
123 // set of equivalent memory operations with a scalar data-type, executed in
124 // an unspecified order on the elements in the vector.
125 unsigned getMaxRequiredAlignment() const { return 8; }
127 unsigned getPTXVersion() const { return PTXVersion
; }
129 NVPTXSubtarget
&initializeSubtargetDependencies(StringRef CPU
, StringRef FS
);
130 void ParseSubtargetFeatures(StringRef CPU
, StringRef TuneCPU
, StringRef FS
);
132 void failIfClustersUnsupported(std::string
const &FailureMessage
) const;
135 } // End llvm namespace