1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements AMDGPU TargetInfo objects.
11 //===----------------------------------------------------------------------===//
14 #include "clang/Basic/Builtins.h"
15 #include "clang/Basic/CodeGenOptions.h"
16 #include "clang/Basic/Diagnostic.h"
17 #include "clang/Basic/LangOptions.h"
18 #include "clang/Basic/MacroBuilder.h"
19 #include "clang/Basic/TargetBuiltins.h"
20 using namespace clang
;
21 using namespace clang::targets
;
26 // If you edit the description strings, make sure you update
27 // getPointerWidthV().
29 static const char *const DataLayoutStringR600
=
30 "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
31 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1";
33 static const char *const DataLayoutStringAMDGCN
=
34 "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
35 "-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
36 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1"
39 const LangASMap
AMDGPUTargetInfo::AMDGPUDefIsGenMap
= {
41 Global
, // opencl_global
42 Local
, // opencl_local
43 Constant
, // opencl_constant
44 Private
, // opencl_private
45 Generic
, // opencl_generic
46 Global
, // opencl_global_device
47 Global
, // opencl_global_host
48 Global
, // cuda_device
49 Constant
, // cuda_constant
51 Global
, // sycl_global
52 Global
, // sycl_global_device
53 Global
, // sycl_global_host
55 Private
, // sycl_private
56 Generic
, // ptr32_sptr
57 Generic
, // ptr32_uptr
59 Generic
, // hlsl_groupshared
62 const LangASMap
AMDGPUTargetInfo::AMDGPUDefIsPrivMap
= {
64 Global
, // opencl_global
65 Local
, // opencl_local
66 Constant
, // opencl_constant
67 Private
, // opencl_private
68 Generic
, // opencl_generic
69 Global
, // opencl_global_device
70 Global
, // opencl_global_host
71 Global
, // cuda_device
72 Constant
, // cuda_constant
74 // SYCL address space values for this map are dummy
75 Generic
, // sycl_global
76 Generic
, // sycl_global_device
77 Generic
, // sycl_global_host
78 Generic
, // sycl_local
79 Generic
, // sycl_private
80 Generic
, // ptr32_sptr
81 Generic
, // ptr32_uptr
83 Generic
, // hlsl_groupshared
86 } // namespace targets
89 static constexpr Builtin::Info BuiltinInfo
[] = {
90 #define BUILTIN(ID, TYPE, ATTRS) \
91 {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
92 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \
93 {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
94 #include "clang/Basic/BuiltinsAMDGPU.def"
97 const char *const AMDGPUTargetInfo::GCCRegNames
[] = {
98 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
99 "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
100 "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
101 "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35",
102 "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44",
103 "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53",
104 "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62",
105 "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71",
106 "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80",
107 "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89",
108 "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98",
109 "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
110 "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
111 "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
112 "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
113 "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
114 "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
115 "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
116 "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
117 "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
118 "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
119 "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
120 "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
121 "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
122 "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
123 "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
124 "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
125 "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
126 "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4",
127 "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13",
128 "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22",
129 "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
130 "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40",
131 "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49",
132 "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58",
133 "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67",
134 "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76",
135 "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85",
136 "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94",
137 "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103",
138 "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112",
139 "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121",
140 "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc",
141 "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi",
142 "flat_scratch_lo", "flat_scratch_hi",
143 "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8",
144 "a9", "a10", "a11", "a12", "a13", "a14", "a15", "a16", "a17",
145 "a18", "a19", "a20", "a21", "a22", "a23", "a24", "a25", "a26",
146 "a27", "a28", "a29", "a30", "a31", "a32", "a33", "a34", "a35",
147 "a36", "a37", "a38", "a39", "a40", "a41", "a42", "a43", "a44",
148 "a45", "a46", "a47", "a48", "a49", "a50", "a51", "a52", "a53",
149 "a54", "a55", "a56", "a57", "a58", "a59", "a60", "a61", "a62",
150 "a63", "a64", "a65", "a66", "a67", "a68", "a69", "a70", "a71",
151 "a72", "a73", "a74", "a75", "a76", "a77", "a78", "a79", "a80",
152 "a81", "a82", "a83", "a84", "a85", "a86", "a87", "a88", "a89",
153 "a90", "a91", "a92", "a93", "a94", "a95", "a96", "a97", "a98",
154 "a99", "a100", "a101", "a102", "a103", "a104", "a105", "a106", "a107",
155 "a108", "a109", "a110", "a111", "a112", "a113", "a114", "a115", "a116",
156 "a117", "a118", "a119", "a120", "a121", "a122", "a123", "a124", "a125",
157 "a126", "a127", "a128", "a129", "a130", "a131", "a132", "a133", "a134",
158 "a135", "a136", "a137", "a138", "a139", "a140", "a141", "a142", "a143",
159 "a144", "a145", "a146", "a147", "a148", "a149", "a150", "a151", "a152",
160 "a153", "a154", "a155", "a156", "a157", "a158", "a159", "a160", "a161",
161 "a162", "a163", "a164", "a165", "a166", "a167", "a168", "a169", "a170",
162 "a171", "a172", "a173", "a174", "a175", "a176", "a177", "a178", "a179",
163 "a180", "a181", "a182", "a183", "a184", "a185", "a186", "a187", "a188",
164 "a189", "a190", "a191", "a192", "a193", "a194", "a195", "a196", "a197",
165 "a198", "a199", "a200", "a201", "a202", "a203", "a204", "a205", "a206",
166 "a207", "a208", "a209", "a210", "a211", "a212", "a213", "a214", "a215",
167 "a216", "a217", "a218", "a219", "a220", "a221", "a222", "a223", "a224",
168 "a225", "a226", "a227", "a228", "a229", "a230", "a231", "a232", "a233",
169 "a234", "a235", "a236", "a237", "a238", "a239", "a240", "a241", "a242",
170 "a243", "a244", "a245", "a246", "a247", "a248", "a249", "a250", "a251",
171 "a252", "a253", "a254", "a255"
174 ArrayRef
<const char *> AMDGPUTargetInfo::getGCCRegNames() const {
175 return llvm::ArrayRef(GCCRegNames
);
178 bool AMDGPUTargetInfo::initFeatureMap(
179 llvm::StringMap
<bool> &Features
, DiagnosticsEngine
&Diags
, StringRef CPU
,
180 const std::vector
<std::string
> &FeatureVec
) const {
182 using namespace llvm::AMDGPU
;
183 fillAMDGPUFeatureMap(CPU
, getTriple(), Features
);
184 if (!TargetInfo::initFeatureMap(Features
, Diags
, CPU
, FeatureVec
))
187 // TODO: Should move this logic into TargetParser
188 std::string ErrorMsg
;
189 if (!insertWaveSizeFeature(CPU
, getTriple(), Features
, ErrorMsg
)) {
190 Diags
.Report(diag::err_invalid_feature_combination
) << ErrorMsg
;
197 void AMDGPUTargetInfo::fillValidCPUList(
198 SmallVectorImpl
<StringRef
> &Values
) const {
199 if (isAMDGCN(getTriple()))
200 llvm::AMDGPU::fillValidArchListAMDGCN(Values
);
202 llvm::AMDGPU::fillValidArchListR600(Values
);
205 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate
) {
206 AddrSpaceMap
= DefaultIsPrivate
? &AMDGPUDefIsPrivMap
: &AMDGPUDefIsGenMap
;
209 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple
&Triple
,
210 const TargetOptions
&Opts
)
211 : TargetInfo(Triple
),
212 GPUKind(isAMDGCN(Triple
) ?
213 llvm::AMDGPU::parseArchAMDGCN(Opts
.CPU
) :
214 llvm::AMDGPU::parseArchR600(Opts
.CPU
)),
215 GPUFeatures(isAMDGCN(Triple
) ?
216 llvm::AMDGPU::getArchAttrAMDGCN(GPUKind
) :
217 llvm::AMDGPU::getArchAttrR600(GPUKind
)) {
218 resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN
219 : DataLayoutStringR600
);
221 setAddressSpaceMap(Triple
.getOS() == llvm::Triple::Mesa3D
||
223 UseAddrSpaceMapMangling
= true;
225 if (isAMDGCN(Triple
)) {
226 // __bf16 is always available as a load/store only type on AMDGCN.
227 BFloat16Width
= BFloat16Align
= 16;
228 BFloat16Format
= &llvm::APFloat::BFloat();
231 HasLegalHalfType
= true;
233 WavefrontSize
= GPUFeatures
& llvm::AMDGPU::FEATURE_WAVE32
? 32 : 64;
234 AllowAMDGPUUnsafeFPAtomics
= Opts
.AllowAMDGPUUnsafeFPAtomics
;
236 // Set pointer width and alignment for the generic address space.
237 PointerWidth
= PointerAlign
= getPointerWidthV(LangAS::Default
);
238 if (getMaxPointerWidth() == 64) {
239 LongWidth
= LongAlign
= 64;
240 SizeType
= UnsignedLong
;
241 PtrDiffType
= SignedLong
;
242 IntPtrType
= SignedLong
;
245 MaxAtomicPromoteWidth
= MaxAtomicInlineWidth
= 64;
246 CUMode
= !(GPUFeatures
& llvm::AMDGPU::FEATURE_WGP
);
249 void AMDGPUTargetInfo::adjust(DiagnosticsEngine
&Diags
, LangOptions
&Opts
) {
250 TargetInfo::adjust(Diags
, Opts
);
251 // ToDo: There are still a few places using default address space as private
252 // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL
253 // can be removed from the following line.
254 setAddressSpaceMap(/*DefaultIsPrivate=*/Opts
.OpenCL
||
255 !isAMDGCN(getTriple()));
258 ArrayRef
<Builtin::Info
> AMDGPUTargetInfo::getTargetBuiltins() const {
259 return llvm::ArrayRef(BuiltinInfo
,
260 clang::AMDGPU::LastTSBuiltin
- Builtin::FirstTSBuiltin
);
263 void AMDGPUTargetInfo::getTargetDefines(const LangOptions
&Opts
,
264 MacroBuilder
&Builder
) const {
265 Builder
.defineMacro("__AMD__");
266 Builder
.defineMacro("__AMDGPU__");
268 if (isAMDGCN(getTriple()))
269 Builder
.defineMacro("__AMDGCN__");
271 Builder
.defineMacro("__R600__");
273 if (GPUKind
!= llvm::AMDGPU::GK_NONE
) {
274 StringRef CanonName
= isAMDGCN(getTriple()) ?
275 getArchNameAMDGCN(GPUKind
) : getArchNameR600(GPUKind
);
276 Builder
.defineMacro(Twine("__") + Twine(CanonName
) + Twine("__"));
277 // Emit macros for gfx family e.g. gfx906 -> __GFX9__, gfx1030 -> __GFX10___
278 if (isAMDGCN(getTriple())) {
279 assert(CanonName
.startswith("gfx") && "Invalid amdgcn canonical name");
280 Builder
.defineMacro(Twine("__") + Twine(CanonName
.drop_back(2).upper()) +
283 if (isAMDGCN(getTriple())) {
284 Builder
.defineMacro("__amdgcn_processor__",
285 Twine("\"") + Twine(CanonName
) + Twine("\""));
286 Builder
.defineMacro("__amdgcn_target_id__",
287 Twine("\"") + Twine(*getTargetID()) + Twine("\""));
288 for (auto F
: getAllPossibleTargetIDFeatures(getTriple(), CanonName
)) {
289 auto Loc
= OffloadArchFeatures
.find(F
);
290 if (Loc
!= OffloadArchFeatures
.end()) {
291 std::string NewF
= F
.str();
292 std::replace(NewF
.begin(), NewF
.end(), '-', '_');
293 Builder
.defineMacro(Twine("__amdgcn_feature_") + Twine(NewF
) +
295 Loc
->second
? "1" : "0");
301 if (AllowAMDGPUUnsafeFPAtomics
)
302 Builder
.defineMacro("__AMDGCN_UNSAFE_FP_ATOMICS__");
304 // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be
305 // removed in the near future.
307 Builder
.defineMacro("__HAS_FMAF__");
309 Builder
.defineMacro("FP_FAST_FMAF");
311 Builder
.defineMacro("__HAS_LDEXPF__");
313 Builder
.defineMacro("__HAS_FP64__");
315 Builder
.defineMacro("FP_FAST_FMA");
317 Builder
.defineMacro("__AMDGCN_WAVEFRONT_SIZE", Twine(WavefrontSize
));
318 Builder
.defineMacro("__AMDGCN_CUMODE__", Twine(CUMode
));
321 void AMDGPUTargetInfo::setAuxTarget(const TargetInfo
*Aux
) {
322 assert(HalfFormat
== Aux
->HalfFormat
);
323 assert(FloatFormat
== Aux
->FloatFormat
);
324 assert(DoubleFormat
== Aux
->DoubleFormat
);
326 // On x86_64 long double is 80-bit extended precision format, which is
327 // not supported by AMDGPU. 128-bit floating point format is also not
328 // supported by AMDGPU. Therefore keep its own format for these two types.
329 auto SaveLongDoubleFormat
= LongDoubleFormat
;
330 auto SaveFloat128Format
= Float128Format
;
331 auto SaveLongDoubleWidth
= LongDoubleWidth
;
332 auto SaveLongDoubleAlign
= LongDoubleAlign
;
334 LongDoubleFormat
= SaveLongDoubleFormat
;
335 Float128Format
= SaveFloat128Format
;
336 LongDoubleWidth
= SaveLongDoubleWidth
;
337 LongDoubleAlign
= SaveLongDoubleAlign
;
338 // For certain builtin types support on the host target, claim they are
339 // support to pass the compilation of the host code during the device-side
341 // FIXME: As the side effect, we also accept `__float128` uses in the device
342 // code. To rejct these builtin types supported in the host target but not in
343 // the device target, one approach would support `device_builtin` attribute
344 // so that we could tell the device builtin types from the host ones. The
345 // also solves the different representations of the same builtin type, such
346 // as `size_t` in the MSVC environment.
347 if (Aux
->hasFloat128Type()) {
349 Float128Format
= DoubleFormat
;