1 //===--- NVPTX.cpp - Implement NVPTX target feature support ---------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements NVPTX TargetInfo objects.
11 //===----------------------------------------------------------------------===//
15 #include "clang/Basic/Builtins.h"
16 #include "clang/Basic/MacroBuilder.h"
17 #include "clang/Basic/TargetBuiltins.h"
18 #include "llvm/ADT/StringSwitch.h"
20 using namespace clang
;
21 using namespace clang::targets
;
23 static constexpr Builtin::Info BuiltinInfo
[] = {
24 #define BUILTIN(ID, TYPE, ATTRS) \
25 {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
26 #define LIBBUILTIN(ID, TYPE, ATTRS, HEADER) \
27 {#ID, TYPE, ATTRS, nullptr, HeaderDesc::HEADER, ALL_LANGUAGES},
28 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \
29 {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
30 #include "clang/Basic/BuiltinsNVPTX.def"
33 const char *const NVPTXTargetInfo::GCCRegNames
[] = {"r0"};
35 NVPTXTargetInfo::NVPTXTargetInfo(const llvm::Triple
&Triple
,
36 const TargetOptions
&Opts
,
37 unsigned TargetPointerWidth
)
38 : TargetInfo(Triple
) {
39 assert((TargetPointerWidth
== 32 || TargetPointerWidth
== 64) &&
40 "NVPTX only supports 32- and 64-bit modes.");
43 for (const StringRef Feature
: Opts
.FeaturesAsWritten
) {
45 if (!Feature
.starts_with("+ptx") ||
46 Feature
.drop_front(4).getAsInteger(10, PTXV
))
48 PTXVersion
= PTXV
; // TODO: should it be max(PTXVersion, PTXV)?
53 AddrSpaceMap
= &NVPTXAddrSpaceMap
;
54 UseAddrSpaceMapMangling
= true;
55 // __bf16 is always available as a load/store only type.
56 BFloat16Width
= BFloat16Align
= 16;
57 BFloat16Format
= &llvm::APFloat::BFloat();
59 // Define available target features
60 // These must be defined in sorted order!
62 GPU
= OffloadArch::UNUSED
;
64 // PTX supports f16 as a fundamental type.
65 HasLegalHalfType
= true;
68 if (TargetPointerWidth
== 32)
69 resetDataLayout("e-p:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64");
70 else if (Opts
.NVPTXUseShortPointers
)
72 "e-p3:32:32-p4:32:32-p5:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64");
74 resetDataLayout("e-i64:64-i128:128-v16:16-v32:32-n16:32:64");
76 // If possible, get a TargetInfo for our host triple, so we can match its
78 llvm::Triple
HostTriple(Opts
.HostTriple
);
79 if (!HostTriple
.isNVPTX())
80 HostTarget
= AllocateTarget(llvm::Triple(Opts
.HostTriple
), Opts
);
82 // If no host target, make some guesses about the data layout and return.
84 LongWidth
= LongAlign
= TargetPointerWidth
;
85 PointerWidth
= PointerAlign
= TargetPointerWidth
;
86 switch (TargetPointerWidth
) {
88 SizeType
= TargetInfo::UnsignedInt
;
89 PtrDiffType
= TargetInfo::SignedInt
;
90 IntPtrType
= TargetInfo::SignedInt
;
93 SizeType
= TargetInfo::UnsignedLong
;
94 PtrDiffType
= TargetInfo::SignedLong
;
95 IntPtrType
= TargetInfo::SignedLong
;
98 llvm_unreachable("TargetPointerWidth must be 32 or 64");
101 MaxAtomicInlineWidth
= TargetPointerWidth
;
105 // Copy properties from host target.
106 PointerWidth
= HostTarget
->getPointerWidth(LangAS::Default
);
107 PointerAlign
= HostTarget
->getPointerAlign(LangAS::Default
);
108 BoolWidth
= HostTarget
->getBoolWidth();
109 BoolAlign
= HostTarget
->getBoolAlign();
110 IntWidth
= HostTarget
->getIntWidth();
111 IntAlign
= HostTarget
->getIntAlign();
112 HalfWidth
= HostTarget
->getHalfWidth();
113 HalfAlign
= HostTarget
->getHalfAlign();
114 FloatWidth
= HostTarget
->getFloatWidth();
115 FloatAlign
= HostTarget
->getFloatAlign();
116 DoubleWidth
= HostTarget
->getDoubleWidth();
117 DoubleAlign
= HostTarget
->getDoubleAlign();
118 LongWidth
= HostTarget
->getLongWidth();
119 LongAlign
= HostTarget
->getLongAlign();
120 LongLongWidth
= HostTarget
->getLongLongWidth();
121 LongLongAlign
= HostTarget
->getLongLongAlign();
122 MinGlobalAlign
= HostTarget
->getMinGlobalAlign(/* TypeSize = */ 0,
123 /* HasNonWeakDef = */ true);
124 NewAlign
= HostTarget
->getNewAlign();
125 DefaultAlignForAttributeAligned
=
126 HostTarget
->getDefaultAlignForAttributeAligned();
127 SizeType
= HostTarget
->getSizeType();
128 IntMaxType
= HostTarget
->getIntMaxType();
129 PtrDiffType
= HostTarget
->getPtrDiffType(LangAS::Default
);
130 IntPtrType
= HostTarget
->getIntPtrType();
131 WCharType
= HostTarget
->getWCharType();
132 WIntType
= HostTarget
->getWIntType();
133 Char16Type
= HostTarget
->getChar16Type();
134 Char32Type
= HostTarget
->getChar32Type();
135 Int64Type
= HostTarget
->getInt64Type();
136 SigAtomicType
= HostTarget
->getSigAtomicType();
137 ProcessIDType
= HostTarget
->getProcessIDType();
139 UseBitFieldTypeAlignment
= HostTarget
->useBitFieldTypeAlignment();
140 UseZeroLengthBitfieldAlignment
= HostTarget
->useZeroLengthBitfieldAlignment();
141 UseExplicitBitFieldAlignment
= HostTarget
->useExplicitBitFieldAlignment();
142 ZeroLengthBitfieldBoundary
= HostTarget
->getZeroLengthBitfieldBoundary();
144 // This is a bit of a lie, but it controls __GCC_ATOMIC_XXX_LOCK_FREE, and
145 // we need those macros to be identical on host and device, because (among
146 // other things) they affect which standard library classes are defined, and
147 // we need all classes to be defined on both the host and device.
148 MaxAtomicInlineWidth
= HostTarget
->getMaxAtomicInlineWidth();
150 // Properties intentionally not copied from host:
151 // - LargeArrayMinWidth, LargeArrayAlign: Not visible across the
152 // host/device boundary.
153 // - SuitableAlign: Not visible across the host/device boundary, and may
154 // correctly be different on host/device, e.g. if host has wider vector
155 // types than device.
156 // - LongDoubleWidth, LongDoubleAlign: nvptx's long double type is the same
157 // as its double type, but that's not necessarily true on the host.
158 // TODO: nvcc emits a warning when using long double on device; we should
162 ArrayRef
<const char *> NVPTXTargetInfo::getGCCRegNames() const {
163 return llvm::ArrayRef(GCCRegNames
);
166 bool NVPTXTargetInfo::hasFeature(StringRef Feature
) const {
167 return llvm::StringSwitch
<bool>(Feature
)
168 .Cases("ptx", "nvptx", true)
172 void NVPTXTargetInfo::getTargetDefines(const LangOptions
&Opts
,
173 MacroBuilder
&Builder
) const {
174 Builder
.defineMacro("__PTX__");
175 Builder
.defineMacro("__NVPTX__");
177 // Skip setting architecture dependent macros if undefined.
178 if (GPU
== OffloadArch::UNUSED
&& !HostTarget
)
181 if (Opts
.CUDAIsDevice
|| Opts
.OpenMPIsTargetDevice
|| !HostTarget
) {
182 // Set __CUDA_ARCH__ for the GPU specified.
183 std::string CUDAArchCode
= [this] {
185 case OffloadArch::GFX600
:
186 case OffloadArch::GFX601
:
187 case OffloadArch::GFX602
:
188 case OffloadArch::GFX700
:
189 case OffloadArch::GFX701
:
190 case OffloadArch::GFX702
:
191 case OffloadArch::GFX703
:
192 case OffloadArch::GFX704
:
193 case OffloadArch::GFX705
:
194 case OffloadArch::GFX801
:
195 case OffloadArch::GFX802
:
196 case OffloadArch::GFX803
:
197 case OffloadArch::GFX805
:
198 case OffloadArch::GFX810
:
199 case OffloadArch::GFX9_GENERIC
:
200 case OffloadArch::GFX900
:
201 case OffloadArch::GFX902
:
202 case OffloadArch::GFX904
:
203 case OffloadArch::GFX906
:
204 case OffloadArch::GFX908
:
205 case OffloadArch::GFX909
:
206 case OffloadArch::GFX90a
:
207 case OffloadArch::GFX90c
:
208 case OffloadArch::GFX9_4_GENERIC
:
209 case OffloadArch::GFX940
:
210 case OffloadArch::GFX941
:
211 case OffloadArch::GFX942
:
212 case OffloadArch::GFX950
:
213 case OffloadArch::GFX10_1_GENERIC
:
214 case OffloadArch::GFX1010
:
215 case OffloadArch::GFX1011
:
216 case OffloadArch::GFX1012
:
217 case OffloadArch::GFX1013
:
218 case OffloadArch::GFX10_3_GENERIC
:
219 case OffloadArch::GFX1030
:
220 case OffloadArch::GFX1031
:
221 case OffloadArch::GFX1032
:
222 case OffloadArch::GFX1033
:
223 case OffloadArch::GFX1034
:
224 case OffloadArch::GFX1035
:
225 case OffloadArch::GFX1036
:
226 case OffloadArch::GFX11_GENERIC
:
227 case OffloadArch::GFX1100
:
228 case OffloadArch::GFX1101
:
229 case OffloadArch::GFX1102
:
230 case OffloadArch::GFX1103
:
231 case OffloadArch::GFX1150
:
232 case OffloadArch::GFX1151
:
233 case OffloadArch::GFX1152
:
234 case OffloadArch::GFX1153
:
235 case OffloadArch::GFX12_GENERIC
:
236 case OffloadArch::GFX1200
:
237 case OffloadArch::GFX1201
:
238 case OffloadArch::AMDGCNSPIRV
:
239 case OffloadArch::Generic
:
240 case OffloadArch::LAST
:
242 case OffloadArch::UNKNOWN
:
243 assert(false && "No GPU arch when compiling CUDA device code.");
245 case OffloadArch::UNUSED
:
246 case OffloadArch::SM_20
:
248 case OffloadArch::SM_21
:
250 case OffloadArch::SM_30
:
252 case OffloadArch::SM_32_
:
254 case OffloadArch::SM_35
:
256 case OffloadArch::SM_37
:
258 case OffloadArch::SM_50
:
260 case OffloadArch::SM_52
:
262 case OffloadArch::SM_53
:
264 case OffloadArch::SM_60
:
266 case OffloadArch::SM_61
:
268 case OffloadArch::SM_62
:
270 case OffloadArch::SM_70
:
272 case OffloadArch::SM_72
:
274 case OffloadArch::SM_75
:
276 case OffloadArch::SM_80
:
278 case OffloadArch::SM_86
:
280 case OffloadArch::SM_87
:
282 case OffloadArch::SM_89
:
284 case OffloadArch::SM_90
:
285 case OffloadArch::SM_90a
:
287 case OffloadArch::SM_100
:
290 llvm_unreachable("unhandled OffloadArch");
292 Builder
.defineMacro("__CUDA_ARCH__", CUDAArchCode
);
293 if (GPU
== OffloadArch::SM_90a
)
294 Builder
.defineMacro("__CUDA_ARCH_FEAT_SM90_ALL", "1");
298 ArrayRef
<Builtin::Info
> NVPTXTargetInfo::getTargetBuiltins() const {
299 return llvm::ArrayRef(BuiltinInfo
,
300 clang::NVPTX::LastTSBuiltin
- Builtin::FirstTSBuiltin
);