1 //===--- NVPTX.cpp - Implement NVPTX target feature support ---------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements NVPTX TargetInfo objects.
11 //===----------------------------------------------------------------------===//
15 #include "clang/Basic/Builtins.h"
16 #include "clang/Basic/MacroBuilder.h"
17 #include "clang/Basic/TargetBuiltins.h"
18 #include "llvm/ADT/StringSwitch.h"
20 using namespace clang
;
21 using namespace clang::targets
;
23 static constexpr Builtin::Info BuiltinInfo
[] = {
24 #define BUILTIN(ID, TYPE, ATTRS) \
25 {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
26 #define LIBBUILTIN(ID, TYPE, ATTRS, HEADER) \
27 {#ID, TYPE, ATTRS, nullptr, HeaderDesc::HEADER, ALL_LANGUAGES},
28 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \
29 {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
30 #include "clang/Basic/BuiltinsNVPTX.def"
33 const char *const NVPTXTargetInfo::GCCRegNames
[] = {"r0"};
35 NVPTXTargetInfo::NVPTXTargetInfo(const llvm::Triple
&Triple
,
36 const TargetOptions
&Opts
,
37 unsigned TargetPointerWidth
)
38 : TargetInfo(Triple
) {
39 assert((TargetPointerWidth
== 32 || TargetPointerWidth
== 64) &&
40 "NVPTX only supports 32- and 64-bit modes.");
43 for (const StringRef Feature
: Opts
.FeaturesAsWritten
) {
45 if (!Feature
.startswith("+ptx") ||
46 Feature
.drop_front(4).getAsInteger(10, PTXV
))
48 PTXVersion
= PTXV
; // TODO: should it be max(PTXVersion, PTXV)?
53 AddrSpaceMap
= &NVPTXAddrSpaceMap
;
54 UseAddrSpaceMapMangling
= true;
55 // __bf16 is always available as a load/store only type.
56 BFloat16Width
= BFloat16Align
= 16;
57 BFloat16Format
= &llvm::APFloat::BFloat();
59 // Define available target features
60 // These must be defined in sorted order!
62 GPU
= CudaArch::SM_20
;
64 if (TargetPointerWidth
== 32)
65 resetDataLayout("e-p:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64");
66 else if (Opts
.NVPTXUseShortPointers
)
68 "e-p3:32:32-p4:32:32-p5:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64");
70 resetDataLayout("e-i64:64-i128:128-v16:16-v32:32-n16:32:64");
72 // If possible, get a TargetInfo for our host triple, so we can match its
74 llvm::Triple
HostTriple(Opts
.HostTriple
);
75 if (!HostTriple
.isNVPTX())
76 HostTarget
= AllocateTarget(llvm::Triple(Opts
.HostTriple
), Opts
);
78 // If no host target, make some guesses about the data layout and return.
80 LongWidth
= LongAlign
= TargetPointerWidth
;
81 PointerWidth
= PointerAlign
= TargetPointerWidth
;
82 switch (TargetPointerWidth
) {
84 SizeType
= TargetInfo::UnsignedInt
;
85 PtrDiffType
= TargetInfo::SignedInt
;
86 IntPtrType
= TargetInfo::SignedInt
;
89 SizeType
= TargetInfo::UnsignedLong
;
90 PtrDiffType
= TargetInfo::SignedLong
;
91 IntPtrType
= TargetInfo::SignedLong
;
94 llvm_unreachable("TargetPointerWidth must be 32 or 64");
97 MaxAtomicInlineWidth
= TargetPointerWidth
;
101 // Copy properties from host target.
102 PointerWidth
= HostTarget
->getPointerWidth(LangAS::Default
);
103 PointerAlign
= HostTarget
->getPointerAlign(LangAS::Default
);
104 BoolWidth
= HostTarget
->getBoolWidth();
105 BoolAlign
= HostTarget
->getBoolAlign();
106 IntWidth
= HostTarget
->getIntWidth();
107 IntAlign
= HostTarget
->getIntAlign();
108 HalfWidth
= HostTarget
->getHalfWidth();
109 HalfAlign
= HostTarget
->getHalfAlign();
110 FloatWidth
= HostTarget
->getFloatWidth();
111 FloatAlign
= HostTarget
->getFloatAlign();
112 DoubleWidth
= HostTarget
->getDoubleWidth();
113 DoubleAlign
= HostTarget
->getDoubleAlign();
114 LongWidth
= HostTarget
->getLongWidth();
115 LongAlign
= HostTarget
->getLongAlign();
116 LongLongWidth
= HostTarget
->getLongLongWidth();
117 LongLongAlign
= HostTarget
->getLongLongAlign();
118 MinGlobalAlign
= HostTarget
->getMinGlobalAlign(/* TypeSize = */ 0);
119 NewAlign
= HostTarget
->getNewAlign();
120 DefaultAlignForAttributeAligned
=
121 HostTarget
->getDefaultAlignForAttributeAligned();
122 SizeType
= HostTarget
->getSizeType();
123 IntMaxType
= HostTarget
->getIntMaxType();
124 PtrDiffType
= HostTarget
->getPtrDiffType(LangAS::Default
);
125 IntPtrType
= HostTarget
->getIntPtrType();
126 WCharType
= HostTarget
->getWCharType();
127 WIntType
= HostTarget
->getWIntType();
128 Char16Type
= HostTarget
->getChar16Type();
129 Char32Type
= HostTarget
->getChar32Type();
130 Int64Type
= HostTarget
->getInt64Type();
131 SigAtomicType
= HostTarget
->getSigAtomicType();
132 ProcessIDType
= HostTarget
->getProcessIDType();
134 UseBitFieldTypeAlignment
= HostTarget
->useBitFieldTypeAlignment();
135 UseZeroLengthBitfieldAlignment
= HostTarget
->useZeroLengthBitfieldAlignment();
136 UseExplicitBitFieldAlignment
= HostTarget
->useExplicitBitFieldAlignment();
137 ZeroLengthBitfieldBoundary
= HostTarget
->getZeroLengthBitfieldBoundary();
139 // This is a bit of a lie, but it controls __GCC_ATOMIC_XXX_LOCK_FREE, and
140 // we need those macros to be identical on host and device, because (among
141 // other things) they affect which standard library classes are defined, and
142 // we need all classes to be defined on both the host and device.
143 MaxAtomicInlineWidth
= HostTarget
->getMaxAtomicInlineWidth();
145 // Properties intentionally not copied from host:
146 // - LargeArrayMinWidth, LargeArrayAlign: Not visible across the
147 // host/device boundary.
148 // - SuitableAlign: Not visible across the host/device boundary, and may
149 // correctly be different on host/device, e.g. if host has wider vector
150 // types than device.
151 // - LongDoubleWidth, LongDoubleAlign: nvptx's long double type is the same
152 // as its double type, but that's not necessarily true on the host.
153 // TODO: nvcc emits a warning when using long double on device; we should
157 ArrayRef
<const char *> NVPTXTargetInfo::getGCCRegNames() const {
158 return llvm::ArrayRef(GCCRegNames
);
161 bool NVPTXTargetInfo::hasFeature(StringRef Feature
) const {
162 return llvm::StringSwitch
<bool>(Feature
)
163 .Cases("ptx", "nvptx", true)
167 void NVPTXTargetInfo::getTargetDefines(const LangOptions
&Opts
,
168 MacroBuilder
&Builder
) const {
169 Builder
.defineMacro("__PTX__");
170 Builder
.defineMacro("__NVPTX__");
171 if (Opts
.CUDAIsDevice
|| Opts
.OpenMPIsDevice
|| !HostTarget
) {
172 // Set __CUDA_ARCH__ for the GPU specified.
173 std::string CUDAArchCode
= [this] {
175 case CudaArch::GFX600
:
176 case CudaArch::GFX601
:
177 case CudaArch::GFX602
:
178 case CudaArch::GFX700
:
179 case CudaArch::GFX701
:
180 case CudaArch::GFX702
:
181 case CudaArch::GFX703
:
182 case CudaArch::GFX704
:
183 case CudaArch::GFX705
:
184 case CudaArch::GFX801
:
185 case CudaArch::GFX802
:
186 case CudaArch::GFX803
:
187 case CudaArch::GFX805
:
188 case CudaArch::GFX810
:
189 case CudaArch::GFX900
:
190 case CudaArch::GFX902
:
191 case CudaArch::GFX904
:
192 case CudaArch::GFX906
:
193 case CudaArch::GFX908
:
194 case CudaArch::GFX909
:
195 case CudaArch::GFX90a
:
196 case CudaArch::GFX90c
:
197 case CudaArch::GFX940
:
198 case CudaArch::GFX941
:
199 case CudaArch::GFX942
:
200 case CudaArch::GFX1010
:
201 case CudaArch::GFX1011
:
202 case CudaArch::GFX1012
:
203 case CudaArch::GFX1013
:
204 case CudaArch::GFX1030
:
205 case CudaArch::GFX1031
:
206 case CudaArch::GFX1032
:
207 case CudaArch::GFX1033
:
208 case CudaArch::GFX1034
:
209 case CudaArch::GFX1035
:
210 case CudaArch::GFX1036
:
211 case CudaArch::GFX1100
:
212 case CudaArch::GFX1101
:
213 case CudaArch::GFX1102
:
214 case CudaArch::GFX1103
:
215 case CudaArch::Generic
:
218 case CudaArch::UNUSED
:
219 case CudaArch::UNKNOWN
:
220 assert(false && "No GPU arch when compiling CUDA device code.");
222 case CudaArch::SM_20
:
224 case CudaArch::SM_21
:
226 case CudaArch::SM_30
:
228 case CudaArch::SM_32
:
230 case CudaArch::SM_35
:
232 case CudaArch::SM_37
:
234 case CudaArch::SM_50
:
236 case CudaArch::SM_52
:
238 case CudaArch::SM_53
:
240 case CudaArch::SM_60
:
242 case CudaArch::SM_61
:
244 case CudaArch::SM_62
:
246 case CudaArch::SM_70
:
248 case CudaArch::SM_72
:
250 case CudaArch::SM_75
:
252 case CudaArch::SM_80
:
254 case CudaArch::SM_86
:
256 case CudaArch::SM_87
:
258 case CudaArch::SM_89
:
260 case CudaArch::SM_90
:
263 llvm_unreachable("unhandled CudaArch");
265 Builder
.defineMacro("__CUDA_ARCH__", CUDAArchCode
);
269 ArrayRef
<Builtin::Info
> NVPTXTargetInfo::getTargetBuiltins() const {
270 return llvm::ArrayRef(BuiltinInfo
,
271 clang::NVPTX::LastTSBuiltin
- Builtin::FirstTSBuiltin
);