[docs] Fix build-docs.sh
[llvm-project.git] / clang / lib / Basic / Targets / AMDGPU.cpp
blob1c42bbec837c2405b4fa43fec66512d38e5985da
1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements AMDGPU TargetInfo objects.
11 //===----------------------------------------------------------------------===//
13 #include "AMDGPU.h"
14 #include "clang/Basic/Builtins.h"
15 #include "clang/Basic/CodeGenOptions.h"
16 #include "clang/Basic/LangOptions.h"
17 #include "clang/Basic/MacroBuilder.h"
18 #include "clang/Basic/TargetBuiltins.h"
20 using namespace clang;
21 using namespace clang::targets;
23 namespace clang {
24 namespace targets {
26 // If you edit the description strings, make sure you update
27 // getPointerWidthV().
29 static const char *const DataLayoutStringR600 =
30 "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
31 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1";
33 static const char *const DataLayoutStringAMDGCN =
34 "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
35 "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
36 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1"
37 "-ni:7";
39 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
40 Generic, // Default
41 Global, // opencl_global
42 Local, // opencl_local
43 Constant, // opencl_constant
44 Private, // opencl_private
45 Generic, // opencl_generic
46 Global, // opencl_global_device
47 Global, // opencl_global_host
48 Global, // cuda_device
49 Constant, // cuda_constant
50 Local, // cuda_shared
51 Global, // sycl_global
52 Global, // sycl_global_device
53 Global, // sycl_global_host
54 Local, // sycl_local
55 Private, // sycl_private
56 Generic, // ptr32_sptr
57 Generic, // ptr32_uptr
58 Generic // ptr64
61 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = {
62 Private, // Default
63 Global, // opencl_global
64 Local, // opencl_local
65 Constant, // opencl_constant
66 Private, // opencl_private
67 Generic, // opencl_generic
68 Global, // opencl_global_device
69 Global, // opencl_global_host
70 Global, // cuda_device
71 Constant, // cuda_constant
72 Local, // cuda_shared
73 // SYCL address space values for this map are dummy
74 Generic, // sycl_global
75 Generic, // sycl_global_device
76 Generic, // sycl_global_host
77 Generic, // sycl_local
78 Generic, // sycl_private
79 Generic, // ptr32_sptr
80 Generic, // ptr32_uptr
81 Generic // ptr64
84 } // namespace targets
85 } // namespace clang
87 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = {
88 #define BUILTIN(ID, TYPE, ATTRS) \
89 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr},
90 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \
91 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE},
92 #include "clang/Basic/BuiltinsAMDGPU.def"
95 const char *const AMDGPUTargetInfo::GCCRegNames[] = {
96 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
97 "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
98 "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
99 "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35",
100 "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44",
101 "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53",
102 "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62",
103 "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71",
104 "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80",
105 "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89",
106 "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98",
107 "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
108 "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
109 "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
110 "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
111 "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
112 "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
113 "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
114 "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
115 "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
116 "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
117 "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
118 "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
119 "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
120 "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
121 "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
122 "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
123 "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
124 "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4",
125 "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13",
126 "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22",
127 "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
128 "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40",
129 "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49",
130 "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58",
131 "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67",
132 "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76",
133 "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85",
134 "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94",
135 "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103",
136 "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112",
137 "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121",
138 "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc",
139 "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi",
140 "flat_scratch_lo", "flat_scratch_hi",
141 "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8",
142 "a9", "a10", "a11", "a12", "a13", "a14", "a15", "a16", "a17",
143 "a18", "a19", "a20", "a21", "a22", "a23", "a24", "a25", "a26",
144 "a27", "a28", "a29", "a30", "a31", "a32", "a33", "a34", "a35",
145 "a36", "a37", "a38", "a39", "a40", "a41", "a42", "a43", "a44",
146 "a45", "a46", "a47", "a48", "a49", "a50", "a51", "a52", "a53",
147 "a54", "a55", "a56", "a57", "a58", "a59", "a60", "a61", "a62",
148 "a63", "a64", "a65", "a66", "a67", "a68", "a69", "a70", "a71",
149 "a72", "a73", "a74", "a75", "a76", "a77", "a78", "a79", "a80",
150 "a81", "a82", "a83", "a84", "a85", "a86", "a87", "a88", "a89",
151 "a90", "a91", "a92", "a93", "a94", "a95", "a96", "a97", "a98",
152 "a99", "a100", "a101", "a102", "a103", "a104", "a105", "a106", "a107",
153 "a108", "a109", "a110", "a111", "a112", "a113", "a114", "a115", "a116",
154 "a117", "a118", "a119", "a120", "a121", "a122", "a123", "a124", "a125",
155 "a126", "a127", "a128", "a129", "a130", "a131", "a132", "a133", "a134",
156 "a135", "a136", "a137", "a138", "a139", "a140", "a141", "a142", "a143",
157 "a144", "a145", "a146", "a147", "a148", "a149", "a150", "a151", "a152",
158 "a153", "a154", "a155", "a156", "a157", "a158", "a159", "a160", "a161",
159 "a162", "a163", "a164", "a165", "a166", "a167", "a168", "a169", "a170",
160 "a171", "a172", "a173", "a174", "a175", "a176", "a177", "a178", "a179",
161 "a180", "a181", "a182", "a183", "a184", "a185", "a186", "a187", "a188",
162 "a189", "a190", "a191", "a192", "a193", "a194", "a195", "a196", "a197",
163 "a198", "a199", "a200", "a201", "a202", "a203", "a204", "a205", "a206",
164 "a207", "a208", "a209", "a210", "a211", "a212", "a213", "a214", "a215",
165 "a216", "a217", "a218", "a219", "a220", "a221", "a222", "a223", "a224",
166 "a225", "a226", "a227", "a228", "a229", "a230", "a231", "a232", "a233",
167 "a234", "a235", "a236", "a237", "a238", "a239", "a240", "a241", "a242",
168 "a243", "a244", "a245", "a246", "a247", "a248", "a249", "a250", "a251",
169 "a252", "a253", "a254", "a255"
172 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const {
173 return llvm::makeArrayRef(GCCRegNames);
176 bool AMDGPUTargetInfo::initFeatureMap(
177 llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
178 const std::vector<std::string> &FeatureVec) const {
180 using namespace llvm::AMDGPU;
182 // XXX - What does the member GPU mean if device name string passed here?
183 if (isAMDGCN(getTriple())) {
184 switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) {
185 case GK_GFX1103:
186 case GK_GFX1102:
187 case GK_GFX1101:
188 case GK_GFX1100:
189 Features["ci-insts"] = true;
190 Features["dot1-insts"] = true;
191 Features["dot5-insts"] = true;
192 Features["dot6-insts"] = true;
193 Features["dot7-insts"] = true;
194 Features["dot8-insts"] = true;
195 Features["dl-insts"] = true;
196 Features["flat-address-space"] = true;
197 Features["16-bit-insts"] = true;
198 Features["dpp"] = true;
199 Features["gfx8-insts"] = true;
200 Features["gfx9-insts"] = true;
201 Features["gfx10-insts"] = true;
202 Features["gfx10-3-insts"] = true;
203 Features["gfx11-insts"] = true;
204 break;
205 case GK_GFX1036:
206 case GK_GFX1035:
207 case GK_GFX1034:
208 case GK_GFX1033:
209 case GK_GFX1032:
210 case GK_GFX1031:
211 case GK_GFX1030:
212 Features["ci-insts"] = true;
213 Features["dot1-insts"] = true;
214 Features["dot2-insts"] = true;
215 Features["dot5-insts"] = true;
216 Features["dot6-insts"] = true;
217 Features["dot7-insts"] = true;
218 Features["dl-insts"] = true;
219 Features["flat-address-space"] = true;
220 Features["16-bit-insts"] = true;
221 Features["dpp"] = true;
222 Features["gfx8-insts"] = true;
223 Features["gfx9-insts"] = true;
224 Features["gfx10-insts"] = true;
225 Features["gfx10-3-insts"] = true;
226 Features["s-memrealtime"] = true;
227 Features["s-memtime-inst"] = true;
228 break;
229 case GK_GFX1012:
230 case GK_GFX1011:
231 Features["dot1-insts"] = true;
232 Features["dot2-insts"] = true;
233 Features["dot5-insts"] = true;
234 Features["dot6-insts"] = true;
235 Features["dot7-insts"] = true;
236 [[fallthrough]];
237 case GK_GFX1013:
238 case GK_GFX1010:
239 Features["dl-insts"] = true;
240 Features["ci-insts"] = true;
241 Features["flat-address-space"] = true;
242 Features["16-bit-insts"] = true;
243 Features["dpp"] = true;
244 Features["gfx8-insts"] = true;
245 Features["gfx9-insts"] = true;
246 Features["gfx10-insts"] = true;
247 Features["s-memrealtime"] = true;
248 Features["s-memtime-inst"] = true;
249 break;
250 case GK_GFX940:
251 Features["gfx940-insts"] = true;
252 Features["fp8-insts"] = true;
253 [[fallthrough]];
254 case GK_GFX90A:
255 Features["gfx90a-insts"] = true;
256 [[fallthrough]];
257 case GK_GFX908:
258 Features["dot3-insts"] = true;
259 Features["dot4-insts"] = true;
260 Features["dot5-insts"] = true;
261 Features["dot6-insts"] = true;
262 Features["mai-insts"] = true;
263 [[fallthrough]];
264 case GK_GFX906:
265 Features["dl-insts"] = true;
266 Features["dot1-insts"] = true;
267 Features["dot2-insts"] = true;
268 Features["dot7-insts"] = true;
269 [[fallthrough]];
270 case GK_GFX90C:
271 case GK_GFX909:
272 case GK_GFX904:
273 case GK_GFX902:
274 case GK_GFX900:
275 Features["gfx9-insts"] = true;
276 [[fallthrough]];
277 case GK_GFX810:
278 case GK_GFX805:
279 case GK_GFX803:
280 case GK_GFX802:
281 case GK_GFX801:
282 Features["gfx8-insts"] = true;
283 Features["16-bit-insts"] = true;
284 Features["dpp"] = true;
285 Features["s-memrealtime"] = true;
286 [[fallthrough]];
287 case GK_GFX705:
288 case GK_GFX704:
289 case GK_GFX703:
290 case GK_GFX702:
291 case GK_GFX701:
292 case GK_GFX700:
293 Features["ci-insts"] = true;
294 Features["flat-address-space"] = true;
295 [[fallthrough]];
296 case GK_GFX602:
297 case GK_GFX601:
298 case GK_GFX600:
299 Features["s-memtime-inst"] = true;
300 break;
301 case GK_NONE:
302 break;
303 default:
304 llvm_unreachable("Unhandled GPU!");
306 } else {
307 if (CPU.empty())
308 CPU = "r600";
310 switch (llvm::AMDGPU::parseArchR600(CPU)) {
311 case GK_CAYMAN:
312 case GK_CYPRESS:
313 case GK_RV770:
314 case GK_RV670:
315 // TODO: Add fp64 when implemented.
316 break;
317 case GK_TURKS:
318 case GK_CAICOS:
319 case GK_BARTS:
320 case GK_SUMO:
321 case GK_REDWOOD:
322 case GK_JUNIPER:
323 case GK_CEDAR:
324 case GK_RV730:
325 case GK_RV710:
326 case GK_RS880:
327 case GK_R630:
328 case GK_R600:
329 break;
330 default:
331 llvm_unreachable("Unhandled GPU!");
335 return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec);
338 void AMDGPUTargetInfo::fillValidCPUList(
339 SmallVectorImpl<StringRef> &Values) const {
340 if (isAMDGCN(getTriple()))
341 llvm::AMDGPU::fillValidArchListAMDGCN(Values);
342 else
343 llvm::AMDGPU::fillValidArchListR600(Values);
346 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) {
347 AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap;
350 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
351 const TargetOptions &Opts)
352 : TargetInfo(Triple),
353 GPUKind(isAMDGCN(Triple) ?
354 llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) :
355 llvm::AMDGPU::parseArchR600(Opts.CPU)),
356 GPUFeatures(isAMDGCN(Triple) ?
357 llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) :
358 llvm::AMDGPU::getArchAttrR600(GPUKind)) {
359 resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN
360 : DataLayoutStringR600);
362 setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
363 !isAMDGCN(Triple));
364 UseAddrSpaceMapMangling = true;
366 HasLegalHalfType = true;
367 HasFloat16 = true;
368 WavefrontSize = GPUFeatures & llvm::AMDGPU::FEATURE_WAVE32 ? 32 : 64;
369 AllowAMDGPUUnsafeFPAtomics = Opts.AllowAMDGPUUnsafeFPAtomics;
371 // Set pointer width and alignment for target address space 0.
372 PointerWidth = PointerAlign = getPointerWidthV(Generic);
373 if (getMaxPointerWidth() == 64) {
374 LongWidth = LongAlign = 64;
375 SizeType = UnsignedLong;
376 PtrDiffType = SignedLong;
377 IntPtrType = SignedLong;
380 MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
383 void AMDGPUTargetInfo::adjust(DiagnosticsEngine &Diags, LangOptions &Opts) {
384 TargetInfo::adjust(Diags, Opts);
385 // ToDo: There are still a few places using default address space as private
386 // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL
387 // can be removed from the following line.
388 setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL ||
389 !isAMDGCN(getTriple()));
392 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const {
393 return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin -
394 Builtin::FirstTSBuiltin);
397 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
398 MacroBuilder &Builder) const {
399 Builder.defineMacro("__AMD__");
400 Builder.defineMacro("__AMDGPU__");
402 if (isAMDGCN(getTriple()))
403 Builder.defineMacro("__AMDGCN__");
404 else
405 Builder.defineMacro("__R600__");
407 if (GPUKind != llvm::AMDGPU::GK_NONE) {
408 StringRef CanonName = isAMDGCN(getTriple()) ?
409 getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind);
410 Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__"));
411 // Emit macros for gfx family e.g. gfx906 -> __GFX9__, gfx1030 -> __GFX10___
412 if (isAMDGCN(getTriple())) {
413 assert(CanonName.startswith("gfx") && "Invalid amdgcn canonical name");
414 Builder.defineMacro(Twine("__") + Twine(CanonName.drop_back(2).upper()) +
415 Twine("__"));
417 if (isAMDGCN(getTriple())) {
418 Builder.defineMacro("__amdgcn_processor__",
419 Twine("\"") + Twine(CanonName) + Twine("\""));
420 Builder.defineMacro("__amdgcn_target_id__",
421 Twine("\"") + Twine(*getTargetID()) + Twine("\""));
422 for (auto F : getAllPossibleTargetIDFeatures(getTriple(), CanonName)) {
423 auto Loc = OffloadArchFeatures.find(F);
424 if (Loc != OffloadArchFeatures.end()) {
425 std::string NewF = F.str();
426 std::replace(NewF.begin(), NewF.end(), '-', '_');
427 Builder.defineMacro(Twine("__amdgcn_feature_") + Twine(NewF) +
428 Twine("__"),
429 Loc->second ? "1" : "0");
435 if (AllowAMDGPUUnsafeFPAtomics)
436 Builder.defineMacro("__AMDGCN_UNSAFE_FP_ATOMICS__");
438 // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be
439 // removed in the near future.
440 if (hasFMAF())
441 Builder.defineMacro("__HAS_FMAF__");
442 if (hasFastFMAF())
443 Builder.defineMacro("FP_FAST_FMAF");
444 if (hasLDEXPF())
445 Builder.defineMacro("__HAS_LDEXPF__");
446 if (hasFP64())
447 Builder.defineMacro("__HAS_FP64__");
448 if (hasFastFMA())
449 Builder.defineMacro("FP_FAST_FMA");
451 Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE", Twine(WavefrontSize));
454 void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) {
455 assert(HalfFormat == Aux->HalfFormat);
456 assert(FloatFormat == Aux->FloatFormat);
457 assert(DoubleFormat == Aux->DoubleFormat);
459 // On x86_64 long double is 80-bit extended precision format, which is
460 // not supported by AMDGPU. 128-bit floating point format is also not
461 // supported by AMDGPU. Therefore keep its own format for these two types.
462 auto SaveLongDoubleFormat = LongDoubleFormat;
463 auto SaveFloat128Format = Float128Format;
464 auto SaveLongDoubleWidth = LongDoubleWidth;
465 auto SaveLongDoubleAlign = LongDoubleAlign;
466 copyAuxTarget(Aux);
467 LongDoubleFormat = SaveLongDoubleFormat;
468 Float128Format = SaveFloat128Format;
469 LongDoubleWidth = SaveLongDoubleWidth;
470 LongDoubleAlign = SaveLongDoubleAlign;
471 // For certain builtin types support on the host target, claim they are
472 // support to pass the compilation of the host code during the device-side
473 // compilation.
474 // FIXME: As the side effect, we also accept `__float128` uses in the device
475 // code. To rejct these builtin types supported in the host target but not in
476 // the device target, one approach would support `device_builtin` attribute
477 // so that we could tell the device builtin types from the host ones. The
478 // also solves the different representations of the same builtin type, such
479 // as `size_t` in the MSVC environment.
480 if (Aux->hasFloat128Type()) {
481 HasFloat128 = true;
482 Float128Format = DoubleFormat;