1 //===--- AMDGPU.cpp - AMDGPU ToolChain Implementations ----------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 #include "CommonArgs.h"
11 #include "clang/Basic/TargetID.h"
12 #include "clang/Config/config.h"
13 #include "clang/Driver/Compilation.h"
14 #include "clang/Driver/DriverDiagnostic.h"
15 #include "clang/Driver/InputInfo.h"
16 #include "clang/Driver/Options.h"
17 #include "llvm/ADT/StringExtras.h"
18 #include "llvm/Option/ArgList.h"
19 #include "llvm/Support/Error.h"
20 #include "llvm/Support/LineIterator.h"
21 #include "llvm/Support/Path.h"
22 #include "llvm/Support/Process.h"
23 #include "llvm/Support/VirtualFileSystem.h"
24 #include "llvm/TargetParser/Host.h"
26 #include <system_error>
28 using namespace clang::driver
;
29 using namespace clang::driver::tools
;
30 using namespace clang::driver::toolchains
;
31 using namespace clang
;
32 using namespace llvm::opt
;
34 // Look for sub-directory starts with PackageName under ROCm candidate path.
35 // If there is one and only one matching sub-directory found, append the
36 // sub-directory to Path. If there is no matching sub-directory or there are
37 // more than one matching sub-directories, diagnose them. Returns the full
38 // path of the package if there is only one matching sub-directory, otherwise
39 // returns an empty string.
41 RocmInstallationDetector::findSPACKPackage(const Candidate
&Cand
,
42 StringRef PackageName
) {
46 std::string Prefix
= Twine(PackageName
+ "-" + Cand
.SPACKReleaseStr
).str();
47 llvm::SmallVector
<llvm::SmallString
<0>> SubDirs
;
48 for (llvm::vfs::directory_iterator File
= D
.getVFS().dir_begin(Cand
.Path
, EC
),
50 File
!= FileEnd
&& !EC
; File
.increment(EC
)) {
51 llvm::StringRef FileName
= llvm::sys::path::filename(File
->path());
52 if (FileName
.starts_with(Prefix
)) {
53 SubDirs
.push_back(FileName
);
54 if (SubDirs
.size() > 1)
58 if (SubDirs
.size() == 1) {
59 auto PackagePath
= Cand
.Path
;
60 llvm::sys::path::append(PackagePath
, SubDirs
[0]);
63 if (SubDirs
.size() == 0 && Verbose
) {
64 llvm::errs() << "SPACK package " << Prefix
<< " not found at " << Cand
.Path
69 if (SubDirs
.size() > 1 && Verbose
) {
70 llvm::errs() << "Cannot use SPACK package " << Prefix
<< " at " << Cand
.Path
71 << " due to multiple installations for the same version\n";
76 void RocmInstallationDetector::scanLibDevicePath(llvm::StringRef Path
) {
77 assert(!Path
.empty());
79 const StringRef
Suffix(".bc");
80 const StringRef
Suffix2(".amdgcn.bc");
83 for (llvm::vfs::directory_iterator LI
= D
.getVFS().dir_begin(Path
, EC
), LE
;
84 !EC
&& LI
!= LE
; LI
= LI
.increment(EC
)) {
85 StringRef FilePath
= LI
->path();
86 StringRef FileName
= llvm::sys::path::filename(FilePath
);
87 if (!FileName
.ends_with(Suffix
))
91 if (FileName
.ends_with(Suffix2
))
92 BaseName
= FileName
.drop_back(Suffix2
.size());
93 else if (FileName
.ends_with(Suffix
))
94 BaseName
= FileName
.drop_back(Suffix
.size());
96 const StringRef ABIVersionPrefix
= "oclc_abi_version_";
97 if (BaseName
== "ocml") {
99 } else if (BaseName
== "ockl") {
101 } else if (BaseName
== "opencl") {
103 } else if (BaseName
== "hip") {
105 } else if (BaseName
== "asanrtl") {
107 } else if (BaseName
== "oclc_finite_only_off") {
108 FiniteOnly
.Off
= FilePath
;
109 } else if (BaseName
== "oclc_finite_only_on") {
110 FiniteOnly
.On
= FilePath
;
111 } else if (BaseName
== "oclc_daz_opt_on") {
112 DenormalsAreZero
.On
= FilePath
;
113 } else if (BaseName
== "oclc_daz_opt_off") {
114 DenormalsAreZero
.Off
= FilePath
;
115 } else if (BaseName
== "oclc_correctly_rounded_sqrt_on") {
116 CorrectlyRoundedSqrt
.On
= FilePath
;
117 } else if (BaseName
== "oclc_correctly_rounded_sqrt_off") {
118 CorrectlyRoundedSqrt
.Off
= FilePath
;
119 } else if (BaseName
== "oclc_unsafe_math_on") {
120 UnsafeMath
.On
= FilePath
;
121 } else if (BaseName
== "oclc_unsafe_math_off") {
122 UnsafeMath
.Off
= FilePath
;
123 } else if (BaseName
== "oclc_wavefrontsize64_on") {
124 WavefrontSize64
.On
= FilePath
;
125 } else if (BaseName
== "oclc_wavefrontsize64_off") {
126 WavefrontSize64
.Off
= FilePath
;
127 } else if (BaseName
.starts_with(ABIVersionPrefix
)) {
128 unsigned ABIVersionNumber
;
129 if (BaseName
.drop_front(ABIVersionPrefix
.size())
130 .getAsInteger(/*Redex=*/0, ABIVersionNumber
))
132 ABIVersionMap
[ABIVersionNumber
] = FilePath
.str();
134 // Process all bitcode filenames that look like
135 // ocl_isa_version_XXX.amdgcn.bc
136 const StringRef DeviceLibPrefix
= "oclc_isa_version_";
137 if (!BaseName
.starts_with(DeviceLibPrefix
))
140 StringRef IsaVersionNumber
=
141 BaseName
.drop_front(DeviceLibPrefix
.size());
143 llvm::Twine GfxName
= Twine("gfx") + IsaVersionNumber
;
146 std::make_pair(GfxName
.toStringRef(Tmp
), FilePath
.str()));
151 // Parse and extract version numbers from `.hipVersion`. Return `true` if
152 // the parsing fails.
153 bool RocmInstallationDetector::parseHIPVersionFile(llvm::StringRef V
) {
154 SmallVector
<StringRef
, 4> VersionParts
;
155 V
.split(VersionParts
, '\n');
156 unsigned Major
= ~0U;
157 unsigned Minor
= ~0U;
158 for (auto Part
: VersionParts
) {
159 auto Splits
= Part
.rtrim().split('=');
160 if (Splits
.first
== "HIP_VERSION_MAJOR") {
161 if (Splits
.second
.getAsInteger(0, Major
))
163 } else if (Splits
.first
== "HIP_VERSION_MINOR") {
164 if (Splits
.second
.getAsInteger(0, Minor
))
166 } else if (Splits
.first
== "HIP_VERSION_PATCH")
167 VersionPatch
= Splits
.second
.str();
169 if (Major
== ~0U || Minor
== ~0U)
171 VersionMajorMinor
= llvm::VersionTuple(Major
, Minor
);
173 (Twine(Major
) + "." + Twine(Minor
) + "." + VersionPatch
).str();
177 /// \returns a list of candidate directories for ROCm installation, which is
178 /// cached and populated only once.
179 const SmallVectorImpl
<RocmInstallationDetector::Candidate
> &
180 RocmInstallationDetector::getInstallationPathCandidates() {
182 // Return the cached candidate list if it has already been populated.
183 if (!ROCmSearchDirs
.empty())
184 return ROCmSearchDirs
;
186 auto DoPrintROCmSearchDirs
= [&]() {
187 if (PrintROCmSearchDirs
)
188 for (auto Cand
: ROCmSearchDirs
) {
189 llvm::errs() << "ROCm installation search path";
191 llvm::errs() << " (Spack " << Cand
.SPACKReleaseStr
<< ")";
192 llvm::errs() << ": " << Cand
.Path
<< '\n';
196 // For candidate specified by --rocm-path we do not do strict check, i.e.,
197 // checking existence of HIP version file and device library files.
198 if (!RocmPathArg
.empty()) {
199 ROCmSearchDirs
.emplace_back(RocmPathArg
.str());
200 DoPrintROCmSearchDirs();
201 return ROCmSearchDirs
;
202 } else if (std::optional
<std::string
> RocmPathEnv
=
203 llvm::sys::Process::GetEnv("ROCM_PATH")) {
204 if (!RocmPathEnv
->empty()) {
205 ROCmSearchDirs
.emplace_back(std::move(*RocmPathEnv
));
206 DoPrintROCmSearchDirs();
207 return ROCmSearchDirs
;
211 // Try to find relative to the compiler binary.
212 const char *InstallDir
= D
.getInstalledDir();
214 // Check both a normal Unix prefix position of the clang binary, as well as
215 // the Windows-esque layout the ROCm packages use with the host architecture
216 // subdirectory of bin.
217 auto DeduceROCmPath
= [](StringRef ClangPath
) {
218 // Strip off directory (usually bin)
219 StringRef ParentDir
= llvm::sys::path::parent_path(ClangPath
);
220 StringRef ParentName
= llvm::sys::path::filename(ParentDir
);
222 // Some builds use bin/{host arch}, so go up again.
223 if (ParentName
== "bin") {
224 ParentDir
= llvm::sys::path::parent_path(ParentDir
);
225 ParentName
= llvm::sys::path::filename(ParentDir
);
228 // Detect ROCm packages built with SPACK.
229 // clang is installed at
230 // <rocm_root>/llvm-amdgpu-<rocm_release_string>-<hash>/bin directory.
231 // We only consider the parent directory of llvm-amdgpu package as ROCm
232 // installation candidate for SPACK.
233 if (ParentName
.starts_with("llvm-amdgpu-")) {
235 ParentName
.drop_front(strlen("llvm-amdgpu-")).split('-');
236 auto SPACKReleaseStr
= SPACKPostfix
.first
;
237 if (!SPACKReleaseStr
.empty()) {
238 ParentDir
= llvm::sys::path::parent_path(ParentDir
);
239 return Candidate(ParentDir
.str(), /*StrictChecking=*/true,
244 // Some versions of the rocm llvm package install to /opt/rocm/llvm/bin
245 // Some versions of the aomp package install to /opt/rocm/aomp/bin
246 if (ParentName
== "llvm" || ParentName
.starts_with("aomp"))
247 ParentDir
= llvm::sys::path::parent_path(ParentDir
);
249 return Candidate(ParentDir
.str(), /*StrictChecking=*/true);
252 // Deduce ROCm path by the path used to invoke clang. Do not resolve symbolic
253 // link of clang itself.
254 ROCmSearchDirs
.emplace_back(DeduceROCmPath(InstallDir
));
256 // Deduce ROCm path by the real path of the invoked clang, resolving symbolic
257 // link of clang itself.
258 llvm::SmallString
<256> RealClangPath
;
259 llvm::sys::fs::real_path(D
.getClangProgramPath(), RealClangPath
);
260 auto ParentPath
= llvm::sys::path::parent_path(RealClangPath
);
261 if (ParentPath
!= InstallDir
)
262 ROCmSearchDirs
.emplace_back(DeduceROCmPath(ParentPath
));
264 // Device library may be installed in clang or resource directory.
265 auto ClangRoot
= llvm::sys::path::parent_path(InstallDir
);
266 auto RealClangRoot
= llvm::sys::path::parent_path(ParentPath
);
267 ROCmSearchDirs
.emplace_back(ClangRoot
.str(), /*StrictChecking=*/true);
268 if (RealClangRoot
!= ClangRoot
)
269 ROCmSearchDirs
.emplace_back(RealClangRoot
.str(), /*StrictChecking=*/true);
270 ROCmSearchDirs
.emplace_back(D
.ResourceDir
,
271 /*StrictChecking=*/true);
273 ROCmSearchDirs
.emplace_back(D
.SysRoot
+ "/opt/rocm",
274 /*StrictChecking=*/true);
276 // Find the latest /opt/rocm-{release} directory.
278 std::string LatestROCm
;
279 llvm::VersionTuple LatestVer
;
280 // Get ROCm version from ROCm directory name.
281 auto GetROCmVersion
= [](StringRef DirName
) {
282 llvm::VersionTuple V
;
283 std::string VerStr
= DirName
.drop_front(strlen("rocm-")).str();
284 // The ROCm directory name follows the format of
285 // rocm-{major}.{minor}.{subMinor}[-{build}]
286 std::replace(VerStr
.begin(), VerStr
.end(), '-', '.');
290 for (llvm::vfs::directory_iterator
291 File
= D
.getVFS().dir_begin(D
.SysRoot
+ "/opt", EC
),
293 File
!= FileEnd
&& !EC
; File
.increment(EC
)) {
294 llvm::StringRef FileName
= llvm::sys::path::filename(File
->path());
295 if (!FileName
.starts_with("rocm-"))
297 if (LatestROCm
.empty()) {
298 LatestROCm
= FileName
.str();
299 LatestVer
= GetROCmVersion(LatestROCm
);
302 auto Ver
= GetROCmVersion(FileName
);
303 if (LatestVer
< Ver
) {
304 LatestROCm
= FileName
.str();
308 if (!LatestROCm
.empty())
309 ROCmSearchDirs
.emplace_back(D
.SysRoot
+ "/opt/" + LatestROCm
,
310 /*StrictChecking=*/true);
312 ROCmSearchDirs
.emplace_back(D
.SysRoot
+ "/usr/local",
313 /*StrictChecking=*/true);
314 ROCmSearchDirs
.emplace_back(D
.SysRoot
+ "/usr",
315 /*StrictChecking=*/true);
317 DoPrintROCmSearchDirs();
318 return ROCmSearchDirs
;
321 RocmInstallationDetector::RocmInstallationDetector(
322 const Driver
&D
, const llvm::Triple
&HostTriple
,
323 const llvm::opt::ArgList
&Args
, bool DetectHIPRuntime
, bool DetectDeviceLib
)
325 Verbose
= Args
.hasArg(options::OPT_v
);
326 RocmPathArg
= Args
.getLastArgValue(clang::driver::options::OPT_rocm_path_EQ
);
327 PrintROCmSearchDirs
=
328 Args
.hasArg(clang::driver::options::OPT_print_rocm_search_dirs
);
329 RocmDeviceLibPathArg
=
330 Args
.getAllArgValues(clang::driver::options::OPT_rocm_device_lib_path_EQ
);
331 HIPPathArg
= Args
.getLastArgValue(clang::driver::options::OPT_hip_path_EQ
);
333 Args
.getLastArgValue(clang::driver::options::OPT_hipstdpar_path_EQ
);
334 HasHIPStdParLibrary
=
335 !HIPStdParPathArg
.empty() && D
.getVFS().exists(HIPStdParPathArg
+
336 "/hipstdpar_lib.hpp");
337 HIPRocThrustPathArg
=
338 Args
.getLastArgValue(clang::driver::options::OPT_hipstdpar_thrust_path_EQ
);
339 HasRocThrustLibrary
= !HIPRocThrustPathArg
.empty() &&
340 D
.getVFS().exists(HIPRocThrustPathArg
+ "/thrust");
342 Args
.getLastArgValue(clang::driver::options::OPT_hipstdpar_prim_path_EQ
);
343 HasRocPrimLibrary
= !HIPRocPrimPathArg
.empty() &&
344 D
.getVFS().exists(HIPRocPrimPathArg
+ "/rocprim");
346 if (auto *A
= Args
.getLastArg(clang::driver::options::OPT_hip_version_EQ
)) {
347 HIPVersionArg
= A
->getValue();
348 unsigned Major
= ~0U;
349 unsigned Minor
= ~0U;
350 SmallVector
<StringRef
, 3> Parts
;
351 HIPVersionArg
.split(Parts
, '.');
353 Parts
[0].getAsInteger(0, Major
);
354 if (Parts
.size() > 1)
355 Parts
[1].getAsInteger(0, Minor
);
356 if (Parts
.size() > 2)
357 VersionPatch
= Parts
[2].str();
358 if (VersionPatch
.empty())
360 if (Major
!= ~0U && Minor
== ~0U)
362 if (Major
== ~0U || Minor
== ~0U)
363 D
.Diag(diag::err_drv_invalid_value
)
364 << A
->getAsString(Args
) << HIPVersionArg
;
366 VersionMajorMinor
= llvm::VersionTuple(Major
, Minor
);
368 (Twine(Major
) + "." + Twine(Minor
) + "." + VersionPatch
).str();
370 VersionPatch
= DefaultVersionPatch
;
372 llvm::VersionTuple(DefaultVersionMajor
, DefaultVersionMinor
);
373 DetectedVersion
= (Twine(DefaultVersionMajor
) + "." +
374 Twine(DefaultVersionMinor
) + "." + VersionPatch
)
378 if (DetectHIPRuntime
)
381 detectDeviceLibrary();
384 void RocmInstallationDetector::detectDeviceLibrary() {
385 assert(LibDevicePath
.empty());
387 if (!RocmDeviceLibPathArg
.empty())
388 LibDevicePath
= RocmDeviceLibPathArg
[RocmDeviceLibPathArg
.size() - 1];
389 else if (std::optional
<std::string
> LibPathEnv
=
390 llvm::sys::Process::GetEnv("HIP_DEVICE_LIB_PATH"))
391 LibDevicePath
= std::move(*LibPathEnv
);
393 auto &FS
= D
.getVFS();
394 if (!LibDevicePath
.empty()) {
395 // Maintain compatability with HIP flag/envvar pointing directly at the
396 // bitcode library directory. This points directly at the library path instead
397 // of the rocm root installation.
398 if (!FS
.exists(LibDevicePath
))
401 scanLibDevicePath(LibDevicePath
);
402 HasDeviceLibrary
= allGenericLibsValid() && !LibDeviceMap
.empty();
406 // Check device library exists at the given path.
407 auto CheckDeviceLib
= [&](StringRef Path
, bool StrictChecking
) {
408 bool CheckLibDevice
= (!NoBuiltinLibs
|| StrictChecking
);
409 if (CheckLibDevice
&& !FS
.exists(Path
))
412 scanLibDevicePath(Path
);
414 if (!NoBuiltinLibs
) {
415 // Check that the required non-target libraries are all available.
416 if (!allGenericLibsValid())
419 // Check that we have found at least one libdevice that we can link in
420 // if -nobuiltinlib hasn't been specified.
421 if (LibDeviceMap
.empty())
427 // Find device libraries in <LLVM_DIR>/lib/clang/<ver>/lib/amdgcn/bitcode
428 LibDevicePath
= D
.ResourceDir
;
429 llvm::sys::path::append(LibDevicePath
, CLANG_INSTALL_LIBDIR_BASENAME
,
430 "amdgcn", "bitcode");
431 HasDeviceLibrary
= CheckDeviceLib(LibDevicePath
, true);
432 if (HasDeviceLibrary
)
435 // Find device libraries in a legacy ROCm directory structure
436 // ${ROCM_ROOT}/amdgcn/bitcode/*
437 auto &ROCmDirs
= getInstallationPathCandidates();
438 for (const auto &Candidate
: ROCmDirs
) {
439 LibDevicePath
= Candidate
.Path
;
440 llvm::sys::path::append(LibDevicePath
, "amdgcn", "bitcode");
441 HasDeviceLibrary
= CheckDeviceLib(LibDevicePath
, Candidate
.StrictChecking
);
442 if (HasDeviceLibrary
)
447 void RocmInstallationDetector::detectHIPRuntime() {
448 SmallVector
<Candidate
, 4> HIPSearchDirs
;
449 if (!HIPPathArg
.empty())
450 HIPSearchDirs
.emplace_back(HIPPathArg
.str());
451 else if (std::optional
<std::string
> HIPPathEnv
=
452 llvm::sys::Process::GetEnv("HIP_PATH")) {
453 if (!HIPPathEnv
->empty())
454 HIPSearchDirs
.emplace_back(std::move(*HIPPathEnv
));
456 if (HIPSearchDirs
.empty())
457 HIPSearchDirs
.append(getInstallationPathCandidates());
458 auto &FS
= D
.getVFS();
460 for (const auto &Candidate
: HIPSearchDirs
) {
461 InstallPath
= Candidate
.Path
;
462 if (InstallPath
.empty() || !FS
.exists(InstallPath
))
464 // HIP runtime built by SPACK is installed to
465 // <rocm_root>/hip-<rocm_release_string>-<hash> directory.
466 auto SPACKPath
= findSPACKPackage(Candidate
, "hip");
467 InstallPath
= SPACKPath
.empty() ? InstallPath
: SPACKPath
;
469 BinPath
= InstallPath
;
470 llvm::sys::path::append(BinPath
, "bin");
471 IncludePath
= InstallPath
;
472 llvm::sys::path::append(IncludePath
, "include");
473 LibPath
= InstallPath
;
474 llvm::sys::path::append(LibPath
, "lib");
475 SharePath
= InstallPath
;
476 llvm::sys::path::append(SharePath
, "share");
478 // Get parent of InstallPath and append "share"
479 SmallString
<0> ParentSharePath
= llvm::sys::path::parent_path(InstallPath
);
480 llvm::sys::path::append(ParentSharePath
, "share");
482 auto Append
= [](SmallString
<0> &path
, const Twine
&a
, const Twine
&b
= "",
483 const Twine
&c
= "", const Twine
&d
= "") {
484 SmallString
<0> newpath
= path
;
485 llvm::sys::path::append(newpath
, a
, b
, c
, d
);
488 // If HIP version file can be found and parsed, use HIP version from there.
489 for (const auto &VersionFilePath
:
490 {Append(SharePath
, "hip", "version"),
491 Append(ParentSharePath
, "hip", "version"),
492 Append(BinPath
, ".hipVersion")}) {
493 llvm::ErrorOr
<std::unique_ptr
<llvm::MemoryBuffer
>> VersionFile
=
494 FS
.getBufferForFile(VersionFilePath
);
497 if (HIPVersionArg
.empty() && VersionFile
)
498 if (parseHIPVersionFile((*VersionFile
)->getBuffer()))
501 HasHIPRuntime
= true;
504 // Otherwise, if -rocm-path is specified (no strict checking), use the
505 // default HIP version or specified by --hip-version.
506 if (!Candidate
.StrictChecking
) {
507 HasHIPRuntime
= true;
511 HasHIPRuntime
= false;
514 void RocmInstallationDetector::print(raw_ostream
&OS
) const {
516 OS
<< "Found HIP installation: " << InstallPath
<< ", version "
517 << DetectedVersion
<< '\n';
520 void RocmInstallationDetector::AddHIPIncludeArgs(const ArgList
&DriverArgs
,
521 ArgStringList
&CC1Args
) const {
522 bool UsesRuntimeWrapper
= VersionMajorMinor
> llvm::VersionTuple(3, 5) &&
523 !DriverArgs
.hasArg(options::OPT_nohipwrapperinc
);
524 bool HasHipStdPar
= DriverArgs
.hasArg(options::OPT_hipstdpar
);
526 if (!DriverArgs
.hasArg(options::OPT_nobuiltininc
)) {
527 // HIP header includes standard library wrapper headers under clang
528 // cuda_wrappers directory. Since these wrapper headers include_next
529 // standard C++ headers, whereas libc++ headers include_next other clang
530 // headers. The include paths have to follow this order:
531 // - wrapper include path
532 // - standard C++ include path
533 // - other clang include path
534 // Since standard C++ and other clang include paths are added in other
535 // places after this function, here we only need to make sure wrapper
536 // include path is added.
538 // ROCm 3.5 does not fully support the wrapper headers. Therefore it needs
540 SmallString
<128> P(D
.ResourceDir
);
541 if (UsesRuntimeWrapper
)
542 llvm::sys::path::append(P
, "include", "cuda_wrappers");
543 CC1Args
.push_back("-internal-isystem");
544 CC1Args
.push_back(DriverArgs
.MakeArgString(P
));
547 const auto HandleHipStdPar
= [=, &DriverArgs
, &CC1Args
]() {
548 if (!hasHIPStdParLibrary()) {
549 D
.Diag(diag::err_drv_no_hipstdpar_lib
);
552 if (!HasRocThrustLibrary
&&
553 !D
.getVFS().exists(getIncludePath() + "/thrust")) {
554 D
.Diag(diag::err_drv_no_hipstdpar_thrust_lib
);
557 if (!HasRocPrimLibrary
&&
558 !D
.getVFS().exists(getIncludePath() + "/rocprim")) {
559 D
.Diag(diag::err_drv_no_hipstdpar_prim_lib
);
563 const char *ThrustPath
;
564 if (HasRocThrustLibrary
)
565 ThrustPath
= DriverArgs
.MakeArgString(HIPRocThrustPathArg
);
567 ThrustPath
= DriverArgs
.MakeArgString(getIncludePath() + "/thrust");
569 const char *PrimPath
;
570 if (HasRocPrimLibrary
)
571 PrimPath
= DriverArgs
.MakeArgString(HIPRocPrimPathArg
);
573 PrimPath
= DriverArgs
.MakeArgString(getIncludePath() + "/rocprim");
575 CC1Args
.append({"-idirafter", ThrustPath
, "-idirafter", PrimPath
,
576 "-idirafter", DriverArgs
.MakeArgString(HIPStdParPathArg
),
577 "-include", "hipstdpar_lib.hpp"});
580 if (DriverArgs
.hasArg(options::OPT_nogpuinc
)) {
587 if (!hasHIPRuntime()) {
588 D
.Diag(diag::err_drv_no_hip_runtime
);
592 CC1Args
.push_back("-idirafter");
593 CC1Args
.push_back(DriverArgs
.MakeArgString(getIncludePath()));
594 if (UsesRuntimeWrapper
)
595 CC1Args
.append({"-include", "__clang_hip_runtime_wrapper.h"});
600 void amdgpu::Linker::ConstructJob(Compilation
&C
, const JobAction
&JA
,
601 const InputInfo
&Output
,
602 const InputInfoList
&Inputs
,
604 const char *LinkingOutput
) const {
606 std::string Linker
= getToolChain().GetProgramPath(getShortName());
607 ArgStringList CmdArgs
;
608 CmdArgs
.push_back("--no-undefined");
609 CmdArgs
.push_back("-shared");
611 addLinkerCompressDebugSectionsOption(getToolChain(), Args
, CmdArgs
);
612 Args
.AddAllArgs(CmdArgs
, options::OPT_L
);
613 AddLinkerInputs(getToolChain(), Inputs
, Args
, CmdArgs
, JA
);
614 if (C
.getDriver().isUsingLTO())
615 addLTOOptions(getToolChain(), Args
, CmdArgs
, Output
, Inputs
[0],
616 C
.getDriver().getLTOMode() == LTOK_Thin
);
617 else if (Args
.hasArg(options::OPT_mcpu_EQ
))
618 CmdArgs
.push_back(Args
.MakeArgString(
619 "-plugin-opt=mcpu=" + Args
.getLastArgValue(options::OPT_mcpu_EQ
)));
620 CmdArgs
.push_back("-o");
621 CmdArgs
.push_back(Output
.getFilename());
622 C
.addCommand(std::make_unique
<Command
>(
623 JA
, *this, ResponseFileSupport::AtFileCurCP(), Args
.MakeArgString(Linker
),
624 CmdArgs
, Inputs
, Output
));
627 void amdgpu::getAMDGPUTargetFeatures(const Driver
&D
,
628 const llvm::Triple
&Triple
,
629 const llvm::opt::ArgList
&Args
,
630 std::vector
<StringRef
> &Features
) {
631 // Add target ID features to -target-feature options. No diagnostics should
632 // be emitted here since invalid target ID is diagnosed at other places.
633 StringRef TargetID
= Args
.getLastArgValue(options::OPT_mcpu_EQ
);
634 if (!TargetID
.empty()) {
635 llvm::StringMap
<bool> FeatureMap
;
636 auto OptionalGpuArch
= parseTargetID(Triple
, TargetID
, &FeatureMap
);
637 if (OptionalGpuArch
) {
638 StringRef GpuArch
= *OptionalGpuArch
;
639 // Iterate through all possible target ID features for the given GPU.
640 // If it is mapped to true, add +feature.
641 // If it is mapped to false, add -feature.
642 // If it is not in the map (default), do not add it
643 for (auto &&Feature
: getAllPossibleTargetIDFeatures(Triple
, GpuArch
)) {
644 auto Pos
= FeatureMap
.find(Feature
);
645 if (Pos
== FeatureMap
.end())
647 Features
.push_back(Args
.MakeArgStringRef(
648 (Twine(Pos
->second
? "+" : "-") + Feature
).str()));
653 if (Args
.hasFlag(options::OPT_mwavefrontsize64
,
654 options::OPT_mno_wavefrontsize64
, false))
655 Features
.push_back("+wavefrontsize64");
657 handleTargetFeaturesGroup(D
, Triple
, Args
, Features
,
658 options::OPT_m_amdgpu_Features_Group
);
662 AMDGPUToolChain::AMDGPUToolChain(const Driver
&D
, const llvm::Triple
&Triple
,
664 : Generic_ELF(D
, Triple
, Args
),
666 {{options::OPT_O
, "3"}, {options::OPT_cl_std_EQ
, "CL1.2"}}) {
667 // Check code object version options. Emit warnings for legacy options
668 // and errors for the last invalid code object version options.
669 // It is done here to avoid repeated warning or error messages for
670 // each tool invocation.
671 checkAMDGPUCodeObjectVersion(D
, Args
);
674 Tool
*AMDGPUToolChain::buildLinker() const {
675 return new tools::amdgpu::Linker(*this);
679 AMDGPUToolChain::TranslateArgs(const DerivedArgList
&Args
, StringRef BoundArch
,
680 Action::OffloadKind DeviceOffloadKind
) const {
682 DerivedArgList
*DAL
=
683 Generic_ELF::TranslateArgs(Args
, BoundArch
, DeviceOffloadKind
);
685 const OptTable
&Opts
= getDriver().getOpts();
688 DAL
= new DerivedArgList(Args
.getBaseArgs());
693 // Replace -mcpu=native with detected GPU.
694 Arg
*LastMCPUArg
= DAL
->getLastArg(options::OPT_mcpu_EQ
);
695 if (LastMCPUArg
&& StringRef(LastMCPUArg
->getValue()) == "native") {
696 DAL
->eraseArg(options::OPT_mcpu_EQ
);
697 auto GPUsOrErr
= getSystemGPUArchs(Args
);
699 getDriver().Diag(diag::err_drv_undetermined_gpu_arch
)
700 << llvm::Triple::getArchTypeName(getArch())
701 << llvm::toString(GPUsOrErr
.takeError()) << "-mcpu";
703 auto &GPUs
= *GPUsOrErr
;
704 if (GPUs
.size() > 1) {
705 getDriver().Diag(diag::warn_drv_multi_gpu_arch
)
706 << llvm::Triple::getArchTypeName(getArch())
707 << llvm::join(GPUs
, ", ") << "-mcpu";
709 DAL
->AddJoinedArg(nullptr, Opts
.getOption(options::OPT_mcpu_EQ
),
710 Args
.MakeArgString(GPUs
.front()));
716 if (!Args
.getLastArgValue(options::OPT_x
).equals("cl"))
719 // Phase 1 (.cl -> .bc)
720 if (Args
.hasArg(options::OPT_c
) && Args
.hasArg(options::OPT_emit_llvm
)) {
721 DAL
->AddFlagArg(nullptr, Opts
.getOption(getTriple().isArch64Bit()
723 : options::OPT_m32
));
725 // Have to check OPT_O4, OPT_O0 & OPT_Ofast separately
726 // as they defined that way in Options.td
727 if (!Args
.hasArg(options::OPT_O
, options::OPT_O0
, options::OPT_O4
,
729 DAL
->AddJoinedArg(nullptr, Opts
.getOption(options::OPT_O
),
730 getOptionDefault(options::OPT_O
));
736 bool AMDGPUToolChain::getDefaultDenormsAreZeroForTarget(
737 llvm::AMDGPU::GPUKind Kind
) {
739 // Assume nothing without a specific target.
740 if (Kind
== llvm::AMDGPU::GK_NONE
)
743 const unsigned ArchAttr
= llvm::AMDGPU::getArchAttrAMDGCN(Kind
);
745 // Default to enabling f32 denormals by default on subtargets where fma is
746 // fast with denormals
747 const bool BothDenormAndFMAFast
=
748 (ArchAttr
& llvm::AMDGPU::FEATURE_FAST_FMA_F32
) &&
749 (ArchAttr
& llvm::AMDGPU::FEATURE_FAST_DENORMAL_F32
);
750 return !BothDenormAndFMAFast
;
753 llvm::DenormalMode
AMDGPUToolChain::getDefaultDenormalModeForType(
754 const llvm::opt::ArgList
&DriverArgs
, const JobAction
&JA
,
755 const llvm::fltSemantics
*FPType
) const {
756 // Denormals should always be enabled for f16 and f64.
757 if (!FPType
|| FPType
!= &llvm::APFloat::IEEEsingle())
758 return llvm::DenormalMode::getIEEE();
760 if (JA
.getOffloadingDeviceKind() == Action::OFK_HIP
||
761 JA
.getOffloadingDeviceKind() == Action::OFK_Cuda
) {
762 auto Arch
= getProcessorFromTargetID(getTriple(), JA
.getOffloadingArch());
763 auto Kind
= llvm::AMDGPU::parseArchAMDGCN(Arch
);
764 if (FPType
&& FPType
== &llvm::APFloat::IEEEsingle() &&
765 DriverArgs
.hasFlag(options::OPT_fgpu_flush_denormals_to_zero
,
766 options::OPT_fno_gpu_flush_denormals_to_zero
,
767 getDefaultDenormsAreZeroForTarget(Kind
)))
768 return llvm::DenormalMode::getPreserveSign();
770 return llvm::DenormalMode::getIEEE();
773 const StringRef GpuArch
= getGPUArch(DriverArgs
);
774 auto Kind
= llvm::AMDGPU::parseArchAMDGCN(GpuArch
);
776 // TODO: There are way too many flags that change this. Do we need to check
778 bool DAZ
= DriverArgs
.hasArg(options::OPT_cl_denorms_are_zero
) ||
779 getDefaultDenormsAreZeroForTarget(Kind
);
781 // Outputs are flushed to zero (FTZ), preserving sign. Denormal inputs are
782 // also implicit treated as zero (DAZ).
783 return DAZ
? llvm::DenormalMode::getPreserveSign() :
784 llvm::DenormalMode::getIEEE();
787 bool AMDGPUToolChain::isWave64(const llvm::opt::ArgList
&DriverArgs
,
788 llvm::AMDGPU::GPUKind Kind
) {
789 const unsigned ArchAttr
= llvm::AMDGPU::getArchAttrAMDGCN(Kind
);
790 bool HasWave32
= (ArchAttr
& llvm::AMDGPU::FEATURE_WAVE32
);
792 return !HasWave32
|| DriverArgs
.hasFlag(
793 options::OPT_mwavefrontsize64
, options::OPT_mno_wavefrontsize64
, false);
798 ROCMToolChain::ROCMToolChain(const Driver
&D
, const llvm::Triple
&Triple
,
800 : AMDGPUToolChain(D
, Triple
, Args
) {
801 RocmInstallation
->detectDeviceLibrary();
804 void AMDGPUToolChain::addClangTargetOptions(
805 const llvm::opt::ArgList
&DriverArgs
,
806 llvm::opt::ArgStringList
&CC1Args
,
807 Action::OffloadKind DeviceOffloadingKind
) const {
808 // Default to "hidden" visibility, as object level linking will not be
809 // supported for the foreseeable future.
810 if (!DriverArgs
.hasArg(options::OPT_fvisibility_EQ
,
811 options::OPT_fvisibility_ms_compat
)) {
812 CC1Args
.push_back("-fvisibility=hidden");
813 CC1Args
.push_back("-fapply-global-visibility-to-externs");
818 AMDGPUToolChain::getGPUArch(const llvm::opt::ArgList
&DriverArgs
) const {
819 return getProcessorFromTargetID(
820 getTriple(), DriverArgs
.getLastArgValue(options::OPT_mcpu_EQ
));
823 AMDGPUToolChain::ParsedTargetIDType
824 AMDGPUToolChain::getParsedTargetID(const llvm::opt::ArgList
&DriverArgs
) const {
825 StringRef TargetID
= DriverArgs
.getLastArgValue(options::OPT_mcpu_EQ
);
826 if (TargetID
.empty())
827 return {std::nullopt
, std::nullopt
, std::nullopt
};
829 llvm::StringMap
<bool> FeatureMap
;
830 auto OptionalGpuArch
= parseTargetID(getTriple(), TargetID
, &FeatureMap
);
831 if (!OptionalGpuArch
)
832 return {TargetID
.str(), std::nullopt
, std::nullopt
};
834 return {TargetID
.str(), OptionalGpuArch
->str(), FeatureMap
};
837 void AMDGPUToolChain::checkTargetID(
838 const llvm::opt::ArgList
&DriverArgs
) const {
839 auto PTID
= getParsedTargetID(DriverArgs
);
840 if (PTID
.OptionalTargetID
&& !PTID
.OptionalGPUArch
) {
841 getDriver().Diag(clang::diag::err_drv_bad_target_id
)
842 << *PTID
.OptionalTargetID
;
846 Expected
<SmallVector
<std::string
>>
847 AMDGPUToolChain::getSystemGPUArchs(const ArgList
&Args
) const {
848 // Detect AMD GPUs availible on the system.
850 if (Arg
*A
= Args
.getLastArg(options::OPT_amdgpu_arch_tool_EQ
))
851 Program
= A
->getValue();
853 Program
= GetProgramPath("amdgpu-arch");
855 auto StdoutOrErr
= executeToolChainProgram(Program
);
857 return StdoutOrErr
.takeError();
859 SmallVector
<std::string
, 1> GPUArchs
;
860 for (StringRef Arch
: llvm::split((*StdoutOrErr
)->getBuffer(), "\n"))
862 GPUArchs
.push_back(Arch
.str());
864 if (GPUArchs
.empty())
865 return llvm::createStringError(std::error_code(),
866 "No AMD GPU detected in the system");
868 return std::move(GPUArchs
);
871 void ROCMToolChain::addClangTargetOptions(
872 const llvm::opt::ArgList
&DriverArgs
, llvm::opt::ArgStringList
&CC1Args
,
873 Action::OffloadKind DeviceOffloadingKind
) const {
874 AMDGPUToolChain::addClangTargetOptions(DriverArgs
, CC1Args
,
875 DeviceOffloadingKind
);
877 // For the OpenCL case where there is no offload target, accept -nostdlib to
878 // disable bitcode linking.
879 if (DeviceOffloadingKind
== Action::OFK_None
&&
880 DriverArgs
.hasArg(options::OPT_nostdlib
))
883 if (DriverArgs
.hasArg(options::OPT_nogpulib
))
886 // Get the device name and canonicalize it
887 const StringRef GpuArch
= getGPUArch(DriverArgs
);
888 auto Kind
= llvm::AMDGPU::parseArchAMDGCN(GpuArch
);
889 const StringRef CanonArch
= llvm::AMDGPU::getArchNameAMDGCN(Kind
);
890 StringRef LibDeviceFile
= RocmInstallation
->getLibDeviceFile(CanonArch
);
891 auto ABIVer
= DeviceLibABIVersion::fromCodeObjectVersion(
892 getAMDGPUCodeObjectVersion(getDriver(), DriverArgs
));
893 if (!RocmInstallation
->checkCommonBitcodeLibs(CanonArch
, LibDeviceFile
,
897 bool Wave64
= isWave64(DriverArgs
, Kind
);
899 // TODO: There are way too many flags that change this. Do we need to check
901 bool DAZ
= DriverArgs
.hasArg(options::OPT_cl_denorms_are_zero
) ||
902 getDefaultDenormsAreZeroForTarget(Kind
);
903 bool FiniteOnly
= DriverArgs
.hasArg(options::OPT_cl_finite_math_only
);
906 DriverArgs
.hasArg(options::OPT_cl_unsafe_math_optimizations
);
907 bool FastRelaxedMath
= DriverArgs
.hasArg(options::OPT_cl_fast_relaxed_math
);
909 DriverArgs
.hasArg(options::OPT_cl_fp32_correctly_rounded_divide_sqrt
);
911 // Add the OpenCL specific bitcode library.
912 llvm::SmallVector
<std::string
, 12> BCLibs
;
913 BCLibs
.push_back(RocmInstallation
->getOpenCLPath().str());
915 // Add the generic set of libraries.
916 BCLibs
.append(RocmInstallation
->getCommonBitcodeLibs(
917 DriverArgs
, LibDeviceFile
, Wave64
, DAZ
, FiniteOnly
, UnsafeMathOpt
,
918 FastRelaxedMath
, CorrectSqrt
, ABIVer
, false));
920 for (StringRef BCFile
: BCLibs
) {
921 CC1Args
.push_back("-mlink-builtin-bitcode");
922 CC1Args
.push_back(DriverArgs
.MakeArgString(BCFile
));
926 bool RocmInstallationDetector::checkCommonBitcodeLibs(
927 StringRef GPUArch
, StringRef LibDeviceFile
,
928 DeviceLibABIVersion ABIVer
) const {
929 if (!hasDeviceLibrary()) {
930 D
.Diag(diag::err_drv_no_rocm_device_lib
) << 0;
933 if (LibDeviceFile
.empty()) {
934 D
.Diag(diag::err_drv_no_rocm_device_lib
) << 1 << GPUArch
;
937 if (ABIVer
.requiresLibrary() && getABIVersionPath(ABIVer
).empty()) {
938 D
.Diag(diag::err_drv_no_rocm_device_lib
) << 2 << ABIVer
.toString();
944 llvm::SmallVector
<std::string
, 12>
945 RocmInstallationDetector::getCommonBitcodeLibs(
946 const llvm::opt::ArgList
&DriverArgs
, StringRef LibDeviceFile
, bool Wave64
,
947 bool DAZ
, bool FiniteOnly
, bool UnsafeMathOpt
, bool FastRelaxedMath
,
948 bool CorrectSqrt
, DeviceLibABIVersion ABIVer
, bool isOpenMP
= false) const {
949 llvm::SmallVector
<std::string
, 12> BCLibs
;
951 auto AddBCLib
= [&](StringRef BCFile
) { BCLibs
.push_back(BCFile
.str()); };
953 AddBCLib(getOCMLPath());
955 AddBCLib(getOCKLPath());
956 AddBCLib(getDenormalsAreZeroPath(DAZ
));
957 AddBCLib(getUnsafeMathPath(UnsafeMathOpt
|| FastRelaxedMath
));
958 AddBCLib(getFiniteOnlyPath(FiniteOnly
|| FastRelaxedMath
));
959 AddBCLib(getCorrectlyRoundedSqrtPath(CorrectSqrt
));
960 AddBCLib(getWavefrontSize64Path(Wave64
));
961 AddBCLib(LibDeviceFile
);
962 auto ABIVerPath
= getABIVersionPath(ABIVer
);
963 if (!ABIVerPath
.empty())
964 AddBCLib(ABIVerPath
);
969 llvm::SmallVector
<std::string
, 12>
970 ROCMToolChain::getCommonDeviceLibNames(const llvm::opt::ArgList
&DriverArgs
,
971 const std::string
&GPUArch
,
972 bool isOpenMP
) const {
973 auto Kind
= llvm::AMDGPU::parseArchAMDGCN(GPUArch
);
974 const StringRef CanonArch
= llvm::AMDGPU::getArchNameAMDGCN(Kind
);
976 StringRef LibDeviceFile
= RocmInstallation
->getLibDeviceFile(CanonArch
);
977 auto ABIVer
= DeviceLibABIVersion::fromCodeObjectVersion(
978 getAMDGPUCodeObjectVersion(getDriver(), DriverArgs
));
979 if (!RocmInstallation
->checkCommonBitcodeLibs(CanonArch
, LibDeviceFile
,
983 // If --hip-device-lib is not set, add the default bitcode libraries.
984 // TODO: There are way too many flags that change this. Do we need to check
986 bool DAZ
= DriverArgs
.hasFlag(options::OPT_fgpu_flush_denormals_to_zero
,
987 options::OPT_fno_gpu_flush_denormals_to_zero
,
988 getDefaultDenormsAreZeroForTarget(Kind
));
989 bool FiniteOnly
= DriverArgs
.hasFlag(
990 options::OPT_ffinite_math_only
, options::OPT_fno_finite_math_only
, false);
992 DriverArgs
.hasFlag(options::OPT_funsafe_math_optimizations
,
993 options::OPT_fno_unsafe_math_optimizations
, false);
994 bool FastRelaxedMath
= DriverArgs
.hasFlag(options::OPT_ffast_math
,
995 options::OPT_fno_fast_math
, false);
996 bool CorrectSqrt
= DriverArgs
.hasFlag(
997 options::OPT_fhip_fp32_correctly_rounded_divide_sqrt
,
998 options::OPT_fno_hip_fp32_correctly_rounded_divide_sqrt
, true);
999 bool Wave64
= isWave64(DriverArgs
, Kind
);
1001 return RocmInstallation
->getCommonBitcodeLibs(
1002 DriverArgs
, LibDeviceFile
, Wave64
, DAZ
, FiniteOnly
, UnsafeMathOpt
,
1003 FastRelaxedMath
, CorrectSqrt
, ABIVer
, isOpenMP
);