clang/lib/Basic/Targets/AMDGPU.cpp

   1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This file implements AMDGPU TargetInfo objects.
  10 //
  11 //===----------------------------------------------------------------------===//
  12
  13 #include "AMDGPU.h"
  14 #include "clang/Basic/Builtins.h"
  15 #include "clang/Basic/CodeGenOptions.h"
  16 #include "clang/Basic/Diagnostic.h"
  17 #include "clang/Basic/LangOptions.h"
  18 #include "clang/Basic/MacroBuilder.h"
  19 #include "clang/Basic/TargetBuiltins.h"
  20 #include "llvm/ADT/SmallString.h"
  21 using namespace clang;
  22 using namespace clang::targets;
  23
  24 namespace clang {
  25 namespace targets {
  26
  27 // If you edit the description strings, make sure you update
  28 // getPointerWidthV().
  29
  30 static const char *const DataLayoutStringR600 =
  31     "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
  32     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1";
  33
  34 static const char *const DataLayoutStringAMDGCN =
  35     "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
  36     "-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:"
  37     "32-v48:64-v96:128"
  38     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1"
  39     "-ni:7:8:9";
  40
  41 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
  42     llvm::AMDGPUAS::FLAT_ADDRESS,     // Default
  43     llvm::AMDGPUAS::GLOBAL_ADDRESS,   // opencl_global
  44     llvm::AMDGPUAS::LOCAL_ADDRESS,    // opencl_local
  45     llvm::AMDGPUAS::CONSTANT_ADDRESS, // opencl_constant
  46     llvm::AMDGPUAS::PRIVATE_ADDRESS,  // opencl_private
  47     llvm::AMDGPUAS::FLAT_ADDRESS,     // opencl_generic
  48     llvm::AMDGPUAS::GLOBAL_ADDRESS,   // opencl_global_device
  49     llvm::AMDGPUAS::GLOBAL_ADDRESS,   // opencl_global_host
  50     llvm::AMDGPUAS::GLOBAL_ADDRESS,   // cuda_device
  51     llvm::AMDGPUAS::CONSTANT_ADDRESS, // cuda_constant
  52     llvm::AMDGPUAS::LOCAL_ADDRESS,    // cuda_shared
  53     llvm::AMDGPUAS::GLOBAL_ADDRESS,   // sycl_global
  54     llvm::AMDGPUAS::GLOBAL_ADDRESS,   // sycl_global_device
  55     llvm::AMDGPUAS::GLOBAL_ADDRESS,   // sycl_global_host
  56     llvm::AMDGPUAS::LOCAL_ADDRESS,    // sycl_local
  57     llvm::AMDGPUAS::PRIVATE_ADDRESS,  // sycl_private
  58     llvm::AMDGPUAS::FLAT_ADDRESS,     // ptr32_sptr
  59     llvm::AMDGPUAS::FLAT_ADDRESS,     // ptr32_uptr
  60     llvm::AMDGPUAS::FLAT_ADDRESS,     // ptr64
  61     llvm::AMDGPUAS::FLAT_ADDRESS,     // hlsl_groupshared
  62 };
  63
  64 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = {
  65     llvm::AMDGPUAS::PRIVATE_ADDRESS,  // Default
  66     llvm::AMDGPUAS::GLOBAL_ADDRESS,   // opencl_global
  67     llvm::AMDGPUAS::LOCAL_ADDRESS,    // opencl_local
  68     llvm::AMDGPUAS::CONSTANT_ADDRESS, // opencl_constant
  69     llvm::AMDGPUAS::PRIVATE_ADDRESS,  // opencl_private
  70     llvm::AMDGPUAS::FLAT_ADDRESS,     // opencl_generic
  71     llvm::AMDGPUAS::GLOBAL_ADDRESS,   // opencl_global_device
  72     llvm::AMDGPUAS::GLOBAL_ADDRESS,   // opencl_global_host
  73     llvm::AMDGPUAS::GLOBAL_ADDRESS,   // cuda_device
  74     llvm::AMDGPUAS::CONSTANT_ADDRESS, // cuda_constant
  75     llvm::AMDGPUAS::LOCAL_ADDRESS,    // cuda_shared
  76     // SYCL address space values for this map are dummy
  77     llvm::AMDGPUAS::FLAT_ADDRESS, // sycl_global
  78     llvm::AMDGPUAS::FLAT_ADDRESS, // sycl_global_device
  79     llvm::AMDGPUAS::FLAT_ADDRESS, // sycl_global_host
  80     llvm::AMDGPUAS::FLAT_ADDRESS, // sycl_local
  81     llvm::AMDGPUAS::FLAT_ADDRESS, // sycl_private
  82     llvm::AMDGPUAS::FLAT_ADDRESS, // ptr32_sptr
  83     llvm::AMDGPUAS::FLAT_ADDRESS, // ptr32_uptr
  84     llvm::AMDGPUAS::FLAT_ADDRESS, // ptr64
  85     llvm::AMDGPUAS::FLAT_ADDRESS, // hlsl_groupshared
  86
  87 };
  88 } // namespace targets
  89 } // namespace clang
  90
  91 static constexpr int NumBuiltins =
  92     clang::AMDGPU::LastTSBuiltin - Builtin::FirstTSBuiltin;
  93
  94 static constexpr auto BuiltinStorage = Builtin::Storage<NumBuiltins>::Make(
  95 #define BUILTIN CLANG_BUILTIN_STR_TABLE
  96 #define TARGET_BUILTIN CLANG_TARGET_BUILTIN_STR_TABLE
  97 #include "clang/Basic/BuiltinsAMDGPU.def"
  98     , {
  99 #define BUILTIN CLANG_BUILTIN_ENTRY
 100 #define TARGET_BUILTIN CLANG_TARGET_BUILTIN_ENTRY
 101 #include "clang/Basic/BuiltinsAMDGPU.def"
 102       });
 103
 104 const char *const AMDGPUTargetInfo::GCCRegNames[] = {
 105   "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
 106   "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
 107   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
 108   "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35",
 109   "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44",
 110   "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53",
 111   "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62",
 112   "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71",
 113   "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80",
 114   "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89",
 115   "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98",
 116   "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
 117   "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
 118   "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
 119   "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
 120   "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
 121   "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
 122   "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
 123   "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
 124   "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
 125   "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
 126   "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
 127   "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
 128   "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
 129   "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
 130   "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
 131   "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
 132   "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
 133   "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4",
 134   "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13",
 135   "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22",
 136   "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
 137   "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40",
 138   "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49",
 139   "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58",
 140   "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67",
 141   "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76",
 142   "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85",
 143   "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94",
 144   "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103",
 145   "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112",
 146   "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121",
 147   "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc",
 148   "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi",
 149   "flat_scratch_lo", "flat_scratch_hi",
 150   "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8",
 151   "a9", "a10", "a11", "a12", "a13", "a14", "a15", "a16", "a17",
 152   "a18", "a19", "a20", "a21", "a22", "a23", "a24", "a25", "a26",
 153   "a27", "a28", "a29", "a30", "a31", "a32", "a33", "a34", "a35",
 154   "a36", "a37", "a38", "a39", "a40", "a41", "a42", "a43", "a44",
 155   "a45", "a46", "a47", "a48", "a49", "a50", "a51", "a52", "a53",
 156   "a54", "a55", "a56", "a57", "a58", "a59", "a60", "a61", "a62",
 157   "a63", "a64", "a65", "a66", "a67", "a68", "a69", "a70", "a71",
 158   "a72", "a73", "a74", "a75", "a76", "a77", "a78", "a79", "a80",
 159   "a81", "a82", "a83", "a84", "a85", "a86", "a87", "a88", "a89",
 160   "a90", "a91", "a92", "a93", "a94", "a95", "a96", "a97", "a98",
 161   "a99", "a100", "a101", "a102", "a103", "a104", "a105", "a106", "a107",
 162   "a108", "a109", "a110", "a111", "a112", "a113", "a114", "a115", "a116",
 163   "a117", "a118", "a119", "a120", "a121", "a122", "a123", "a124", "a125",
 164   "a126", "a127", "a128", "a129", "a130", "a131", "a132", "a133", "a134",
 165   "a135", "a136", "a137", "a138", "a139", "a140", "a141", "a142", "a143",
 166   "a144", "a145", "a146", "a147", "a148", "a149", "a150", "a151", "a152",
 167   "a153", "a154", "a155", "a156", "a157", "a158", "a159", "a160", "a161",
 168   "a162", "a163", "a164", "a165", "a166", "a167", "a168", "a169", "a170",
 169   "a171", "a172", "a173", "a174", "a175", "a176", "a177", "a178", "a179",
 170   "a180", "a181", "a182", "a183", "a184", "a185", "a186", "a187", "a188",
 171   "a189", "a190", "a191", "a192", "a193", "a194", "a195", "a196", "a197",
 172   "a198", "a199", "a200", "a201", "a202", "a203", "a204", "a205", "a206",
 173   "a207", "a208", "a209", "a210", "a211", "a212", "a213", "a214", "a215",
 174   "a216", "a217", "a218", "a219", "a220", "a221", "a222", "a223", "a224",
 175   "a225", "a226", "a227", "a228", "a229", "a230", "a231", "a232", "a233",
 176   "a234", "a235", "a236", "a237", "a238", "a239", "a240", "a241", "a242",
 177   "a243", "a244", "a245", "a246", "a247", "a248", "a249", "a250", "a251",
 178   "a252", "a253", "a254", "a255"
 179 };
 180
 181 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const {
 182   return llvm::ArrayRef(GCCRegNames);
 183 }
 184
 185 bool AMDGPUTargetInfo::initFeatureMap(
 186     llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
 187     const std::vector<std::string> &FeatureVec) const {
 188
 189   using namespace llvm::AMDGPU;
 190   fillAMDGPUFeatureMap(CPU, getTriple(), Features);
 191   if (!TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec))
 192     return false;
 193
 194   // TODO: Should move this logic into TargetParser
 195   auto HasError = insertWaveSizeFeature(CPU, getTriple(), Features);
 196   switch (HasError.first) {
 197   default:
 198     break;
 199   case llvm::AMDGPU::INVALID_FEATURE_COMBINATION:
 200     Diags.Report(diag::err_invalid_feature_combination) << HasError.second;
 201     return false;
 202   case llvm::AMDGPU::UNSUPPORTED_TARGET_FEATURE:
 203     Diags.Report(diag::err_opt_not_valid_on_target) << HasError.second;
 204     return false;
 205   }
 206
 207   return true;
 208 }
 209
 210 void AMDGPUTargetInfo::fillValidCPUList(
 211     SmallVectorImpl<StringRef> &Values) const {
 212   if (isAMDGCN(getTriple()))
 213     llvm::AMDGPU::fillValidArchListAMDGCN(Values);
 214   else
 215     llvm::AMDGPU::fillValidArchListR600(Values);
 216 }
 217
 218 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) {
 219   AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap;
 220 }
 221
 222 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
 223                                    const TargetOptions &Opts)
 224     : TargetInfo(Triple),
 225       GPUKind(isAMDGCN(Triple) ?
 226               llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) :
 227               llvm::AMDGPU::parseArchR600(Opts.CPU)),
 228       GPUFeatures(isAMDGCN(Triple) ?
 229                   llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) :
 230                   llvm::AMDGPU::getArchAttrR600(GPUKind)) {
 231   resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN
 232                                         : DataLayoutStringR600);
 233
 234   setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
 235                      !isAMDGCN(Triple));
 236   UseAddrSpaceMapMangling = true;
 237
 238   if (isAMDGCN(Triple)) {
 239     // __bf16 is always available as a load/store only type on AMDGCN.
 240     BFloat16Width = BFloat16Align = 16;
 241     BFloat16Format = &llvm::APFloat::BFloat();
 242   }
 243
 244   HasLegalHalfType = true;
 245   HasFloat16 = true;
 246   WavefrontSize = (GPUFeatures & llvm::AMDGPU::FEATURE_WAVE32) ? 32 : 64;
 247   AllowAMDGPUUnsafeFPAtomics = Opts.AllowAMDGPUUnsafeFPAtomics;
 248
 249   // Set pointer width and alignment for the generic address space.
 250   PointerWidth = PointerAlign = getPointerWidthV(LangAS::Default);
 251   if (getMaxPointerWidth() == 64) {
 252     LongWidth = LongAlign = 64;
 253     SizeType = UnsignedLong;
 254     PtrDiffType = SignedLong;
 255     IntPtrType = SignedLong;
 256   }
 257
 258   MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
 259   CUMode = !(GPUFeatures & llvm::AMDGPU::FEATURE_WGP);
 260   for (auto F : {"image-insts", "gws"})
 261     ReadOnlyFeatures.insert(F);
 262   HalfArgsAndReturns = true;
 263 }
 264
 265 void AMDGPUTargetInfo::adjust(DiagnosticsEngine &Diags, LangOptions &Opts) {
 266   TargetInfo::adjust(Diags, Opts);
 267   // ToDo: There are still a few places using default address space as private
 268   // address space in OpenCL, which needs to be cleaned up, then the references
 269   // to OpenCL can be removed from the following line.
 270   setAddressSpaceMap((Opts.OpenCL && !Opts.OpenCLGenericAddressSpace) ||
 271                      !isAMDGCN(getTriple()));
 272 }
 273
 274 std::pair<const char *, ArrayRef<Builtin::Info>>
 275 AMDGPUTargetInfo::getTargetBuiltinStorage() const {
 276   return {BuiltinStorage.StringTable, BuiltinStorage.Infos};
 277 }
 278
 279 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
 280                                         MacroBuilder &Builder) const {
 281   Builder.defineMacro("__AMD__");
 282   Builder.defineMacro("__AMDGPU__");
 283
 284   if (isAMDGCN(getTriple()))
 285     Builder.defineMacro("__AMDGCN__");
 286   else
 287     Builder.defineMacro("__R600__");
 288
 289   // Legacy HIP host code relies on these default attributes to be defined.
 290   bool IsHIPHost = Opts.HIP && !Opts.CUDAIsDevice;
 291   if (GPUKind == llvm::AMDGPU::GK_NONE && !IsHIPHost)
 292     return;
 293
 294   llvm::SmallString<16> CanonName =
 295       (isAMDGCN(getTriple()) ? getArchNameAMDGCN(GPUKind)
 296                              : getArchNameR600(GPUKind));
 297
 298   // Sanitize the name of generic targets.
 299   // e.g. gfx10-1-generic -> gfx10_1_generic
 300   if (GPUKind >= llvm::AMDGPU::GK_AMDGCN_GENERIC_FIRST &&
 301       GPUKind <= llvm::AMDGPU::GK_AMDGCN_GENERIC_LAST) {
 302     std::replace(CanonName.begin(), CanonName.end(), '-', '_');
 303   }
 304
 305   Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__"));
 306   // Emit macros for gfx family e.g. gfx906 -> __GFX9__, gfx1030 -> __GFX10___
 307   if (isAMDGCN(getTriple()) && !IsHIPHost) {
 308     assert(StringRef(CanonName).starts_with("gfx") &&
 309            "Invalid amdgcn canonical name");
 310     StringRef CanonFamilyName = getArchFamilyNameAMDGCN(GPUKind);
 311     Builder.defineMacro(Twine("__") + Twine(CanonFamilyName.upper()) +
 312                         Twine("__"));
 313     Builder.defineMacro("__amdgcn_processor__",
 314                         Twine("\"") + Twine(CanonName) + Twine("\""));
 315     Builder.defineMacro("__amdgcn_target_id__",
 316                         Twine("\"") + Twine(*getTargetID()) + Twine("\""));
 317     for (auto F : getAllPossibleTargetIDFeatures(getTriple(), CanonName)) {
 318       auto Loc = OffloadArchFeatures.find(F);
 319       if (Loc != OffloadArchFeatures.end()) {
 320         std::string NewF = F.str();
 321         std::replace(NewF.begin(), NewF.end(), '-', '_');
 322         Builder.defineMacro(Twine("__amdgcn_feature_") + Twine(NewF) +
 323                                 Twine("__"),
 324                             Loc->second ? "1" : "0");
 325       }
 326     }
 327   }
 328
 329   if (AllowAMDGPUUnsafeFPAtomics)
 330     Builder.defineMacro("__AMDGCN_UNSAFE_FP_ATOMICS__");
 331
 332   // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be
 333   // removed in the near future.
 334   if (hasFMAF())
 335     Builder.defineMacro("__HAS_FMAF__");
 336   if (hasFastFMAF())
 337     Builder.defineMacro("FP_FAST_FMAF");
 338   if (hasLDEXPF())
 339     Builder.defineMacro("__HAS_LDEXPF__");
 340   if (hasFP64())
 341     Builder.defineMacro("__HAS_FP64__");
 342   if (hasFastFMA())
 343     Builder.defineMacro("FP_FAST_FMA");
 344
 345   Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE__", Twine(WavefrontSize),
 346                       "compile-time-constant access to the wavefront size will "
 347                       "be removed in a future release");
 348   Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE", Twine(WavefrontSize),
 349                       "compile-time-constant access to the wavefront size will "
 350                       "be removed in a future release");
 351   Builder.defineMacro("__AMDGCN_CUMODE__", Twine(CUMode));
 352 }
 353
 354 void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) {
 355   assert(HalfFormat == Aux->HalfFormat);
 356   assert(FloatFormat == Aux->FloatFormat);
 357   assert(DoubleFormat == Aux->DoubleFormat);
 358
 359   // On x86_64 long double is 80-bit extended precision format, which is
 360   // not supported by AMDGPU. 128-bit floating point format is also not
 361   // supported by AMDGPU. Therefore keep its own format for these two types.
 362   auto SaveLongDoubleFormat = LongDoubleFormat;
 363   auto SaveFloat128Format = Float128Format;
 364   auto SaveLongDoubleWidth = LongDoubleWidth;
 365   auto SaveLongDoubleAlign = LongDoubleAlign;
 366   copyAuxTarget(Aux);
 367   LongDoubleFormat = SaveLongDoubleFormat;
 368   Float128Format = SaveFloat128Format;
 369   LongDoubleWidth = SaveLongDoubleWidth;
 370   LongDoubleAlign = SaveLongDoubleAlign;
 371   // For certain builtin types support on the host target, claim they are
 372   // support to pass the compilation of the host code during the device-side
 373   // compilation.
 374   // FIXME: As the side effect, we also accept `__float128` uses in the device
 375   // code. To rejct these builtin types supported in the host target but not in
 376   // the device target, one approach would support `device_builtin` attribute
 377   // so that we could tell the device builtin types from the host ones. The
 378   // also solves the different representations of the same builtin type, such
 379   // as `size_t` in the MSVC environment.
 380   if (Aux->hasFloat128Type()) {
 381     HasFloat128 = true;
 382     Float128Format = DoubleFormat;
 383   }
 384 }