1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements AMDGPU TargetInfo objects.
11 //===----------------------------------------------------------------------===//
14 #include "clang/Basic/Builtins.h"
15 #include "clang/Basic/CodeGenOptions.h"
16 #include "clang/Basic/Diagnostic.h"
17 #include "clang/Basic/LangOptions.h"
18 #include "clang/Basic/MacroBuilder.h"
19 #include "clang/Basic/TargetBuiltins.h"
20 #include "llvm/ADT/SmallString.h"
21 using namespace clang
;
22 using namespace clang::targets
;
27 // If you edit the description strings, make sure you update
28 // getPointerWidthV().
30 static const char *const DataLayoutStringR600
=
31 "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
32 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1";
34 static const char *const DataLayoutStringAMDGCN
=
35 "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
36 "-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:"
38 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1"
41 const LangASMap
AMDGPUTargetInfo::AMDGPUDefIsGenMap
= {
42 llvm::AMDGPUAS::FLAT_ADDRESS
, // Default
43 llvm::AMDGPUAS::GLOBAL_ADDRESS
, // opencl_global
44 llvm::AMDGPUAS::LOCAL_ADDRESS
, // opencl_local
45 llvm::AMDGPUAS::CONSTANT_ADDRESS
, // opencl_constant
46 llvm::AMDGPUAS::PRIVATE_ADDRESS
, // opencl_private
47 llvm::AMDGPUAS::FLAT_ADDRESS
, // opencl_generic
48 llvm::AMDGPUAS::GLOBAL_ADDRESS
, // opencl_global_device
49 llvm::AMDGPUAS::GLOBAL_ADDRESS
, // opencl_global_host
50 llvm::AMDGPUAS::GLOBAL_ADDRESS
, // cuda_device
51 llvm::AMDGPUAS::CONSTANT_ADDRESS
, // cuda_constant
52 llvm::AMDGPUAS::LOCAL_ADDRESS
, // cuda_shared
53 llvm::AMDGPUAS::GLOBAL_ADDRESS
, // sycl_global
54 llvm::AMDGPUAS::GLOBAL_ADDRESS
, // sycl_global_device
55 llvm::AMDGPUAS::GLOBAL_ADDRESS
, // sycl_global_host
56 llvm::AMDGPUAS::LOCAL_ADDRESS
, // sycl_local
57 llvm::AMDGPUAS::PRIVATE_ADDRESS
, // sycl_private
58 llvm::AMDGPUAS::FLAT_ADDRESS
, // ptr32_sptr
59 llvm::AMDGPUAS::FLAT_ADDRESS
, // ptr32_uptr
60 llvm::AMDGPUAS::FLAT_ADDRESS
, // ptr64
61 llvm::AMDGPUAS::FLAT_ADDRESS
, // hlsl_groupshared
64 const LangASMap
AMDGPUTargetInfo::AMDGPUDefIsPrivMap
= {
65 llvm::AMDGPUAS::PRIVATE_ADDRESS
, // Default
66 llvm::AMDGPUAS::GLOBAL_ADDRESS
, // opencl_global
67 llvm::AMDGPUAS::LOCAL_ADDRESS
, // opencl_local
68 llvm::AMDGPUAS::CONSTANT_ADDRESS
, // opencl_constant
69 llvm::AMDGPUAS::PRIVATE_ADDRESS
, // opencl_private
70 llvm::AMDGPUAS::FLAT_ADDRESS
, // opencl_generic
71 llvm::AMDGPUAS::GLOBAL_ADDRESS
, // opencl_global_device
72 llvm::AMDGPUAS::GLOBAL_ADDRESS
, // opencl_global_host
73 llvm::AMDGPUAS::GLOBAL_ADDRESS
, // cuda_device
74 llvm::AMDGPUAS::CONSTANT_ADDRESS
, // cuda_constant
75 llvm::AMDGPUAS::LOCAL_ADDRESS
, // cuda_shared
76 // SYCL address space values for this map are dummy
77 llvm::AMDGPUAS::FLAT_ADDRESS
, // sycl_global
78 llvm::AMDGPUAS::FLAT_ADDRESS
, // sycl_global_device
79 llvm::AMDGPUAS::FLAT_ADDRESS
, // sycl_global_host
80 llvm::AMDGPUAS::FLAT_ADDRESS
, // sycl_local
81 llvm::AMDGPUAS::FLAT_ADDRESS
, // sycl_private
82 llvm::AMDGPUAS::FLAT_ADDRESS
, // ptr32_sptr
83 llvm::AMDGPUAS::FLAT_ADDRESS
, // ptr32_uptr
84 llvm::AMDGPUAS::FLAT_ADDRESS
, // ptr64
85 llvm::AMDGPUAS::FLAT_ADDRESS
, // hlsl_groupshared
88 } // namespace targets
91 static constexpr int NumBuiltins
=
92 clang::AMDGPU::LastTSBuiltin
- Builtin::FirstTSBuiltin
;
94 static constexpr auto BuiltinStorage
= Builtin::Storage
<NumBuiltins
>::Make(
95 #define BUILTIN CLANG_BUILTIN_STR_TABLE
96 #define TARGET_BUILTIN CLANG_TARGET_BUILTIN_STR_TABLE
97 #include "clang/Basic/BuiltinsAMDGPU.def"
99 #define BUILTIN CLANG_BUILTIN_ENTRY
100 #define TARGET_BUILTIN CLANG_TARGET_BUILTIN_ENTRY
101 #include "clang/Basic/BuiltinsAMDGPU.def"
104 const char *const AMDGPUTargetInfo::GCCRegNames
[] = {
105 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
106 "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
107 "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
108 "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35",
109 "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44",
110 "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53",
111 "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62",
112 "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71",
113 "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80",
114 "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89",
115 "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98",
116 "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
117 "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
118 "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
119 "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
120 "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
121 "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
122 "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
123 "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
124 "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
125 "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
126 "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
127 "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
128 "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
129 "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
130 "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
131 "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
132 "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
133 "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4",
134 "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13",
135 "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22",
136 "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
137 "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40",
138 "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49",
139 "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58",
140 "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67",
141 "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76",
142 "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85",
143 "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94",
144 "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103",
145 "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112",
146 "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121",
147 "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc",
148 "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi",
149 "flat_scratch_lo", "flat_scratch_hi",
150 "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8",
151 "a9", "a10", "a11", "a12", "a13", "a14", "a15", "a16", "a17",
152 "a18", "a19", "a20", "a21", "a22", "a23", "a24", "a25", "a26",
153 "a27", "a28", "a29", "a30", "a31", "a32", "a33", "a34", "a35",
154 "a36", "a37", "a38", "a39", "a40", "a41", "a42", "a43", "a44",
155 "a45", "a46", "a47", "a48", "a49", "a50", "a51", "a52", "a53",
156 "a54", "a55", "a56", "a57", "a58", "a59", "a60", "a61", "a62",
157 "a63", "a64", "a65", "a66", "a67", "a68", "a69", "a70", "a71",
158 "a72", "a73", "a74", "a75", "a76", "a77", "a78", "a79", "a80",
159 "a81", "a82", "a83", "a84", "a85", "a86", "a87", "a88", "a89",
160 "a90", "a91", "a92", "a93", "a94", "a95", "a96", "a97", "a98",
161 "a99", "a100", "a101", "a102", "a103", "a104", "a105", "a106", "a107",
162 "a108", "a109", "a110", "a111", "a112", "a113", "a114", "a115", "a116",
163 "a117", "a118", "a119", "a120", "a121", "a122", "a123", "a124", "a125",
164 "a126", "a127", "a128", "a129", "a130", "a131", "a132", "a133", "a134",
165 "a135", "a136", "a137", "a138", "a139", "a140", "a141", "a142", "a143",
166 "a144", "a145", "a146", "a147", "a148", "a149", "a150", "a151", "a152",
167 "a153", "a154", "a155", "a156", "a157", "a158", "a159", "a160", "a161",
168 "a162", "a163", "a164", "a165", "a166", "a167", "a168", "a169", "a170",
169 "a171", "a172", "a173", "a174", "a175", "a176", "a177", "a178", "a179",
170 "a180", "a181", "a182", "a183", "a184", "a185", "a186", "a187", "a188",
171 "a189", "a190", "a191", "a192", "a193", "a194", "a195", "a196", "a197",
172 "a198", "a199", "a200", "a201", "a202", "a203", "a204", "a205", "a206",
173 "a207", "a208", "a209", "a210", "a211", "a212", "a213", "a214", "a215",
174 "a216", "a217", "a218", "a219", "a220", "a221", "a222", "a223", "a224",
175 "a225", "a226", "a227", "a228", "a229", "a230", "a231", "a232", "a233",
176 "a234", "a235", "a236", "a237", "a238", "a239", "a240", "a241", "a242",
177 "a243", "a244", "a245", "a246", "a247", "a248", "a249", "a250", "a251",
178 "a252", "a253", "a254", "a255"
181 ArrayRef
<const char *> AMDGPUTargetInfo::getGCCRegNames() const {
182 return llvm::ArrayRef(GCCRegNames
);
185 bool AMDGPUTargetInfo::initFeatureMap(
186 llvm::StringMap
<bool> &Features
, DiagnosticsEngine
&Diags
, StringRef CPU
,
187 const std::vector
<std::string
> &FeatureVec
) const {
189 using namespace llvm::AMDGPU
;
190 fillAMDGPUFeatureMap(CPU
, getTriple(), Features
);
191 if (!TargetInfo::initFeatureMap(Features
, Diags
, CPU
, FeatureVec
))
194 // TODO: Should move this logic into TargetParser
195 auto HasError
= insertWaveSizeFeature(CPU
, getTriple(), Features
);
196 switch (HasError
.first
) {
199 case llvm::AMDGPU::INVALID_FEATURE_COMBINATION
:
200 Diags
.Report(diag::err_invalid_feature_combination
) << HasError
.second
;
202 case llvm::AMDGPU::UNSUPPORTED_TARGET_FEATURE
:
203 Diags
.Report(diag::err_opt_not_valid_on_target
) << HasError
.second
;
210 void AMDGPUTargetInfo::fillValidCPUList(
211 SmallVectorImpl
<StringRef
> &Values
) const {
212 if (isAMDGCN(getTriple()))
213 llvm::AMDGPU::fillValidArchListAMDGCN(Values
);
215 llvm::AMDGPU::fillValidArchListR600(Values
);
218 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate
) {
219 AddrSpaceMap
= DefaultIsPrivate
? &AMDGPUDefIsPrivMap
: &AMDGPUDefIsGenMap
;
222 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple
&Triple
,
223 const TargetOptions
&Opts
)
224 : TargetInfo(Triple
),
225 GPUKind(isAMDGCN(Triple
) ?
226 llvm::AMDGPU::parseArchAMDGCN(Opts
.CPU
) :
227 llvm::AMDGPU::parseArchR600(Opts
.CPU
)),
228 GPUFeatures(isAMDGCN(Triple
) ?
229 llvm::AMDGPU::getArchAttrAMDGCN(GPUKind
) :
230 llvm::AMDGPU::getArchAttrR600(GPUKind
)) {
231 resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN
232 : DataLayoutStringR600
);
234 setAddressSpaceMap(Triple
.getOS() == llvm::Triple::Mesa3D
||
236 UseAddrSpaceMapMangling
= true;
238 if (isAMDGCN(Triple
)) {
239 // __bf16 is always available as a load/store only type on AMDGCN.
240 BFloat16Width
= BFloat16Align
= 16;
241 BFloat16Format
= &llvm::APFloat::BFloat();
244 HasLegalHalfType
= true;
246 WavefrontSize
= (GPUFeatures
& llvm::AMDGPU::FEATURE_WAVE32
) ? 32 : 64;
247 AllowAMDGPUUnsafeFPAtomics
= Opts
.AllowAMDGPUUnsafeFPAtomics
;
249 // Set pointer width and alignment for the generic address space.
250 PointerWidth
= PointerAlign
= getPointerWidthV(LangAS::Default
);
251 if (getMaxPointerWidth() == 64) {
252 LongWidth
= LongAlign
= 64;
253 SizeType
= UnsignedLong
;
254 PtrDiffType
= SignedLong
;
255 IntPtrType
= SignedLong
;
258 MaxAtomicPromoteWidth
= MaxAtomicInlineWidth
= 64;
259 CUMode
= !(GPUFeatures
& llvm::AMDGPU::FEATURE_WGP
);
260 for (auto F
: {"image-insts", "gws"})
261 ReadOnlyFeatures
.insert(F
);
262 HalfArgsAndReturns
= true;
265 void AMDGPUTargetInfo::adjust(DiagnosticsEngine
&Diags
, LangOptions
&Opts
) {
266 TargetInfo::adjust(Diags
, Opts
);
267 // ToDo: There are still a few places using default address space as private
268 // address space in OpenCL, which needs to be cleaned up, then the references
269 // to OpenCL can be removed from the following line.
270 setAddressSpaceMap((Opts
.OpenCL
&& !Opts
.OpenCLGenericAddressSpace
) ||
271 !isAMDGCN(getTriple()));
274 std::pair
<const char *, ArrayRef
<Builtin::Info
>>
275 AMDGPUTargetInfo::getTargetBuiltinStorage() const {
276 return {BuiltinStorage
.StringTable
, BuiltinStorage
.Infos
};
279 void AMDGPUTargetInfo::getTargetDefines(const LangOptions
&Opts
,
280 MacroBuilder
&Builder
) const {
281 Builder
.defineMacro("__AMD__");
282 Builder
.defineMacro("__AMDGPU__");
284 if (isAMDGCN(getTriple()))
285 Builder
.defineMacro("__AMDGCN__");
287 Builder
.defineMacro("__R600__");
289 // Legacy HIP host code relies on these default attributes to be defined.
290 bool IsHIPHost
= Opts
.HIP
&& !Opts
.CUDAIsDevice
;
291 if (GPUKind
== llvm::AMDGPU::GK_NONE
&& !IsHIPHost
)
294 llvm::SmallString
<16> CanonName
=
295 (isAMDGCN(getTriple()) ? getArchNameAMDGCN(GPUKind
)
296 : getArchNameR600(GPUKind
));
298 // Sanitize the name of generic targets.
299 // e.g. gfx10-1-generic -> gfx10_1_generic
300 if (GPUKind
>= llvm::AMDGPU::GK_AMDGCN_GENERIC_FIRST
&&
301 GPUKind
<= llvm::AMDGPU::GK_AMDGCN_GENERIC_LAST
) {
302 std::replace(CanonName
.begin(), CanonName
.end(), '-', '_');
305 Builder
.defineMacro(Twine("__") + Twine(CanonName
) + Twine("__"));
306 // Emit macros for gfx family e.g. gfx906 -> __GFX9__, gfx1030 -> __GFX10___
307 if (isAMDGCN(getTriple()) && !IsHIPHost
) {
308 assert(StringRef(CanonName
).starts_with("gfx") &&
309 "Invalid amdgcn canonical name");
310 StringRef CanonFamilyName
= getArchFamilyNameAMDGCN(GPUKind
);
311 Builder
.defineMacro(Twine("__") + Twine(CanonFamilyName
.upper()) +
313 Builder
.defineMacro("__amdgcn_processor__",
314 Twine("\"") + Twine(CanonName
) + Twine("\""));
315 Builder
.defineMacro("__amdgcn_target_id__",
316 Twine("\"") + Twine(*getTargetID()) + Twine("\""));
317 for (auto F
: getAllPossibleTargetIDFeatures(getTriple(), CanonName
)) {
318 auto Loc
= OffloadArchFeatures
.find(F
);
319 if (Loc
!= OffloadArchFeatures
.end()) {
320 std::string NewF
= F
.str();
321 std::replace(NewF
.begin(), NewF
.end(), '-', '_');
322 Builder
.defineMacro(Twine("__amdgcn_feature_") + Twine(NewF
) +
324 Loc
->second
? "1" : "0");
329 if (AllowAMDGPUUnsafeFPAtomics
)
330 Builder
.defineMacro("__AMDGCN_UNSAFE_FP_ATOMICS__");
332 // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be
333 // removed in the near future.
335 Builder
.defineMacro("__HAS_FMAF__");
337 Builder
.defineMacro("FP_FAST_FMAF");
339 Builder
.defineMacro("__HAS_LDEXPF__");
341 Builder
.defineMacro("__HAS_FP64__");
343 Builder
.defineMacro("FP_FAST_FMA");
345 Builder
.defineMacro("__AMDGCN_WAVEFRONT_SIZE__", Twine(WavefrontSize
),
346 "compile-time-constant access to the wavefront size will "
347 "be removed in a future release");
348 Builder
.defineMacro("__AMDGCN_WAVEFRONT_SIZE", Twine(WavefrontSize
),
349 "compile-time-constant access to the wavefront size will "
350 "be removed in a future release");
351 Builder
.defineMacro("__AMDGCN_CUMODE__", Twine(CUMode
));
354 void AMDGPUTargetInfo::setAuxTarget(const TargetInfo
*Aux
) {
355 assert(HalfFormat
== Aux
->HalfFormat
);
356 assert(FloatFormat
== Aux
->FloatFormat
);
357 assert(DoubleFormat
== Aux
->DoubleFormat
);
359 // On x86_64 long double is 80-bit extended precision format, which is
360 // not supported by AMDGPU. 128-bit floating point format is also not
361 // supported by AMDGPU. Therefore keep its own format for these two types.
362 auto SaveLongDoubleFormat
= LongDoubleFormat
;
363 auto SaveFloat128Format
= Float128Format
;
364 auto SaveLongDoubleWidth
= LongDoubleWidth
;
365 auto SaveLongDoubleAlign
= LongDoubleAlign
;
367 LongDoubleFormat
= SaveLongDoubleFormat
;
368 Float128Format
= SaveFloat128Format
;
369 LongDoubleWidth
= SaveLongDoubleWidth
;
370 LongDoubleAlign
= SaveLongDoubleAlign
;
371 // For certain builtin types support on the host target, claim they are
372 // support to pass the compilation of the host code during the device-side
374 // FIXME: As the side effect, we also accept `__float128` uses in the device
375 // code. To rejct these builtin types supported in the host target but not in
376 // the device target, one approach would support `device_builtin` attribute
377 // so that we could tell the device builtin types from the host ones. The
378 // also solves the different representations of the same builtin type, such
379 // as `size_t` in the MSVC environment.
380 if (Aux
->hasFloat128Type()) {
382 Float128Format
= DoubleFormat
;