[clang-format] Fix a bug in aligning comments above PPDirective (#72791)
[llvm-project.git] / clang / lib / Basic / Targets / AMDGPU.cpp
blob409ae32ab4242151c22e223a94e2909d8ae5bd05
1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements AMDGPU TargetInfo objects.
11 //===----------------------------------------------------------------------===//
13 #include "AMDGPU.h"
14 #include "clang/Basic/Builtins.h"
15 #include "clang/Basic/CodeGenOptions.h"
16 #include "clang/Basic/Diagnostic.h"
17 #include "clang/Basic/LangOptions.h"
18 #include "clang/Basic/MacroBuilder.h"
19 #include "clang/Basic/TargetBuiltins.h"
20 using namespace clang;
21 using namespace clang::targets;
23 namespace clang {
24 namespace targets {
26 // If you edit the description strings, make sure you update
27 // getPointerWidthV().
29 static const char *const DataLayoutStringR600 =
30 "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
31 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1";
33 static const char *const DataLayoutStringAMDGCN =
34 "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
35 "-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
36 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1"
37 "-ni:7:8";
39 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
40 Generic, // Default
41 Global, // opencl_global
42 Local, // opencl_local
43 Constant, // opencl_constant
44 Private, // opencl_private
45 Generic, // opencl_generic
46 Global, // opencl_global_device
47 Global, // opencl_global_host
48 Global, // cuda_device
49 Constant, // cuda_constant
50 Local, // cuda_shared
51 Global, // sycl_global
52 Global, // sycl_global_device
53 Global, // sycl_global_host
54 Local, // sycl_local
55 Private, // sycl_private
56 Generic, // ptr32_sptr
57 Generic, // ptr32_uptr
58 Generic, // ptr64
59 Generic, // hlsl_groupshared
62 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = {
63 Private, // Default
64 Global, // opencl_global
65 Local, // opencl_local
66 Constant, // opencl_constant
67 Private, // opencl_private
68 Generic, // opencl_generic
69 Global, // opencl_global_device
70 Global, // opencl_global_host
71 Global, // cuda_device
72 Constant, // cuda_constant
73 Local, // cuda_shared
74 // SYCL address space values for this map are dummy
75 Generic, // sycl_global
76 Generic, // sycl_global_device
77 Generic, // sycl_global_host
78 Generic, // sycl_local
79 Generic, // sycl_private
80 Generic, // ptr32_sptr
81 Generic, // ptr32_uptr
82 Generic, // ptr64
83 Generic, // hlsl_groupshared
86 } // namespace targets
87 } // namespace clang
89 static constexpr Builtin::Info BuiltinInfo[] = {
90 #define BUILTIN(ID, TYPE, ATTRS) \
91 {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
92 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \
93 {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
94 #include "clang/Basic/BuiltinsAMDGPU.def"
97 const char *const AMDGPUTargetInfo::GCCRegNames[] = {
98 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
99 "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
100 "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
101 "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35",
102 "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44",
103 "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53",
104 "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62",
105 "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71",
106 "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80",
107 "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89",
108 "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98",
109 "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
110 "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
111 "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
112 "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
113 "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
114 "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
115 "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
116 "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
117 "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
118 "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
119 "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
120 "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
121 "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
122 "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
123 "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
124 "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
125 "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
126 "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4",
127 "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13",
128 "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22",
129 "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
130 "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40",
131 "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49",
132 "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58",
133 "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67",
134 "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76",
135 "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85",
136 "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94",
137 "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103",
138 "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112",
139 "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121",
140 "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc",
141 "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi",
142 "flat_scratch_lo", "flat_scratch_hi",
143 "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8",
144 "a9", "a10", "a11", "a12", "a13", "a14", "a15", "a16", "a17",
145 "a18", "a19", "a20", "a21", "a22", "a23", "a24", "a25", "a26",
146 "a27", "a28", "a29", "a30", "a31", "a32", "a33", "a34", "a35",
147 "a36", "a37", "a38", "a39", "a40", "a41", "a42", "a43", "a44",
148 "a45", "a46", "a47", "a48", "a49", "a50", "a51", "a52", "a53",
149 "a54", "a55", "a56", "a57", "a58", "a59", "a60", "a61", "a62",
150 "a63", "a64", "a65", "a66", "a67", "a68", "a69", "a70", "a71",
151 "a72", "a73", "a74", "a75", "a76", "a77", "a78", "a79", "a80",
152 "a81", "a82", "a83", "a84", "a85", "a86", "a87", "a88", "a89",
153 "a90", "a91", "a92", "a93", "a94", "a95", "a96", "a97", "a98",
154 "a99", "a100", "a101", "a102", "a103", "a104", "a105", "a106", "a107",
155 "a108", "a109", "a110", "a111", "a112", "a113", "a114", "a115", "a116",
156 "a117", "a118", "a119", "a120", "a121", "a122", "a123", "a124", "a125",
157 "a126", "a127", "a128", "a129", "a130", "a131", "a132", "a133", "a134",
158 "a135", "a136", "a137", "a138", "a139", "a140", "a141", "a142", "a143",
159 "a144", "a145", "a146", "a147", "a148", "a149", "a150", "a151", "a152",
160 "a153", "a154", "a155", "a156", "a157", "a158", "a159", "a160", "a161",
161 "a162", "a163", "a164", "a165", "a166", "a167", "a168", "a169", "a170",
162 "a171", "a172", "a173", "a174", "a175", "a176", "a177", "a178", "a179",
163 "a180", "a181", "a182", "a183", "a184", "a185", "a186", "a187", "a188",
164 "a189", "a190", "a191", "a192", "a193", "a194", "a195", "a196", "a197",
165 "a198", "a199", "a200", "a201", "a202", "a203", "a204", "a205", "a206",
166 "a207", "a208", "a209", "a210", "a211", "a212", "a213", "a214", "a215",
167 "a216", "a217", "a218", "a219", "a220", "a221", "a222", "a223", "a224",
168 "a225", "a226", "a227", "a228", "a229", "a230", "a231", "a232", "a233",
169 "a234", "a235", "a236", "a237", "a238", "a239", "a240", "a241", "a242",
170 "a243", "a244", "a245", "a246", "a247", "a248", "a249", "a250", "a251",
171 "a252", "a253", "a254", "a255"
174 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const {
175 return llvm::ArrayRef(GCCRegNames);
178 bool AMDGPUTargetInfo::initFeatureMap(
179 llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
180 const std::vector<std::string> &FeatureVec) const {
182 using namespace llvm::AMDGPU;
183 fillAMDGPUFeatureMap(CPU, getTriple(), Features);
184 if (!TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec))
185 return false;
187 // TODO: Should move this logic into TargetParser
188 std::string ErrorMsg;
189 if (!insertWaveSizeFeature(CPU, getTriple(), Features, ErrorMsg)) {
190 Diags.Report(diag::err_invalid_feature_combination) << ErrorMsg;
191 return false;
194 return true;
197 void AMDGPUTargetInfo::fillValidCPUList(
198 SmallVectorImpl<StringRef> &Values) const {
199 if (isAMDGCN(getTriple()))
200 llvm::AMDGPU::fillValidArchListAMDGCN(Values);
201 else
202 llvm::AMDGPU::fillValidArchListR600(Values);
205 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) {
206 AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap;
209 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
210 const TargetOptions &Opts)
211 : TargetInfo(Triple),
212 GPUKind(isAMDGCN(Triple) ?
213 llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) :
214 llvm::AMDGPU::parseArchR600(Opts.CPU)),
215 GPUFeatures(isAMDGCN(Triple) ?
216 llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) :
217 llvm::AMDGPU::getArchAttrR600(GPUKind)) {
218 resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN
219 : DataLayoutStringR600);
221 setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
222 !isAMDGCN(Triple));
223 UseAddrSpaceMapMangling = true;
225 if (isAMDGCN(Triple)) {
226 // __bf16 is always available as a load/store only type on AMDGCN.
227 BFloat16Width = BFloat16Align = 16;
228 BFloat16Format = &llvm::APFloat::BFloat();
231 HasLegalHalfType = true;
232 HasFloat16 = true;
233 WavefrontSize = GPUFeatures & llvm::AMDGPU::FEATURE_WAVE32 ? 32 : 64;
234 AllowAMDGPUUnsafeFPAtomics = Opts.AllowAMDGPUUnsafeFPAtomics;
236 // Set pointer width and alignment for the generic address space.
237 PointerWidth = PointerAlign = getPointerWidthV(LangAS::Default);
238 if (getMaxPointerWidth() == 64) {
239 LongWidth = LongAlign = 64;
240 SizeType = UnsignedLong;
241 PtrDiffType = SignedLong;
242 IntPtrType = SignedLong;
245 MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
246 CUMode = !(GPUFeatures & llvm::AMDGPU::FEATURE_WGP);
247 for (auto F : {"image-insts", "gws"})
248 ReadOnlyFeatures.insert(F);
249 HalfArgsAndReturns = true;
252 void AMDGPUTargetInfo::adjust(DiagnosticsEngine &Diags, LangOptions &Opts) {
253 TargetInfo::adjust(Diags, Opts);
254 // ToDo: There are still a few places using default address space as private
255 // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL
256 // can be removed from the following line.
257 setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL ||
258 !isAMDGCN(getTriple()));
261 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const {
262 return llvm::ArrayRef(BuiltinInfo,
263 clang::AMDGPU::LastTSBuiltin - Builtin::FirstTSBuiltin);
266 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
267 MacroBuilder &Builder) const {
268 Builder.defineMacro("__AMD__");
269 Builder.defineMacro("__AMDGPU__");
271 if (isAMDGCN(getTriple()))
272 Builder.defineMacro("__AMDGCN__");
273 else
274 Builder.defineMacro("__R600__");
276 if (GPUKind != llvm::AMDGPU::GK_NONE) {
277 StringRef CanonName = isAMDGCN(getTriple()) ?
278 getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind);
279 Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__"));
280 // Emit macros for gfx family e.g. gfx906 -> __GFX9__, gfx1030 -> __GFX10___
281 if (isAMDGCN(getTriple())) {
282 assert(CanonName.startswith("gfx") && "Invalid amdgcn canonical name");
283 Builder.defineMacro(Twine("__") + Twine(CanonName.drop_back(2).upper()) +
284 Twine("__"));
286 if (isAMDGCN(getTriple())) {
287 Builder.defineMacro("__amdgcn_processor__",
288 Twine("\"") + Twine(CanonName) + Twine("\""));
289 Builder.defineMacro("__amdgcn_target_id__",
290 Twine("\"") + Twine(*getTargetID()) + Twine("\""));
291 for (auto F : getAllPossibleTargetIDFeatures(getTriple(), CanonName)) {
292 auto Loc = OffloadArchFeatures.find(F);
293 if (Loc != OffloadArchFeatures.end()) {
294 std::string NewF = F.str();
295 std::replace(NewF.begin(), NewF.end(), '-', '_');
296 Builder.defineMacro(Twine("__amdgcn_feature_") + Twine(NewF) +
297 Twine("__"),
298 Loc->second ? "1" : "0");
304 if (AllowAMDGPUUnsafeFPAtomics)
305 Builder.defineMacro("__AMDGCN_UNSAFE_FP_ATOMICS__");
307 // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be
308 // removed in the near future.
309 if (hasFMAF())
310 Builder.defineMacro("__HAS_FMAF__");
311 if (hasFastFMAF())
312 Builder.defineMacro("FP_FAST_FMAF");
313 if (hasLDEXPF())
314 Builder.defineMacro("__HAS_LDEXPF__");
315 if (hasFP64())
316 Builder.defineMacro("__HAS_FP64__");
317 if (hasFastFMA())
318 Builder.defineMacro("FP_FAST_FMA");
320 Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE__", Twine(WavefrontSize));
321 // ToDo: deprecate this macro for naming consistency.
322 Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE", Twine(WavefrontSize));
323 Builder.defineMacro("__AMDGCN_CUMODE__", Twine(CUMode));
326 void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) {
327 assert(HalfFormat == Aux->HalfFormat);
328 assert(FloatFormat == Aux->FloatFormat);
329 assert(DoubleFormat == Aux->DoubleFormat);
331 // On x86_64 long double is 80-bit extended precision format, which is
332 // not supported by AMDGPU. 128-bit floating point format is also not
333 // supported by AMDGPU. Therefore keep its own format for these two types.
334 auto SaveLongDoubleFormat = LongDoubleFormat;
335 auto SaveFloat128Format = Float128Format;
336 auto SaveLongDoubleWidth = LongDoubleWidth;
337 auto SaveLongDoubleAlign = LongDoubleAlign;
338 copyAuxTarget(Aux);
339 LongDoubleFormat = SaveLongDoubleFormat;
340 Float128Format = SaveFloat128Format;
341 LongDoubleWidth = SaveLongDoubleWidth;
342 LongDoubleAlign = SaveLongDoubleAlign;
343 // For certain builtin types support on the host target, claim they are
344 // support to pass the compilation of the host code during the device-side
345 // compilation.
346 // FIXME: As the side effect, we also accept `__float128` uses in the device
347 // code. To rejct these builtin types supported in the host target but not in
348 // the device target, one approach would support `device_builtin` attribute
349 // so that we could tell the device builtin types from the host ones. The
350 // also solves the different representations of the same builtin type, such
351 // as `size_t` in the MSVC environment.
352 if (Aux->hasFloat128Type()) {
353 HasFloat128 = true;
354 Float128Format = DoubleFormat;