1 //===--- AMDGPU.cpp - AMDGPU ToolChain Implementations ----------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 #include "CommonArgs.h"
11 #include "clang/Basic/TargetID.h"
12 #include "clang/Driver/Compilation.h"
13 #include "clang/Driver/DriverDiagnostic.h"
14 #include "clang/Driver/InputInfo.h"
15 #include "clang/Driver/Options.h"
16 #include "llvm/Option/ArgList.h"
17 #include "llvm/Support/Error.h"
18 #include "llvm/Support/FileUtilities.h"
19 #include "llvm/Support/LineIterator.h"
20 #include "llvm/Support/Path.h"
21 #include "llvm/Support/VirtualFileSystem.h"
22 #include <system_error>
24 #define AMDGPU_ARCH_PROGRAM_NAME "amdgpu-arch"
26 using namespace clang::driver
;
27 using namespace clang::driver::tools
;
28 using namespace clang::driver::toolchains
;
29 using namespace clang
;
30 using namespace llvm::opt
;
32 // Look for sub-directory starts with PackageName under ROCm candidate path.
33 // If there is one and only one matching sub-directory found, append the
34 // sub-directory to Path. If there is no matching sub-directory or there are
35 // more than one matching sub-directories, diagnose them. Returns the full
36 // path of the package if there is only one matching sub-directory, otherwise
37 // returns an empty string.
39 RocmInstallationDetector::findSPACKPackage(const Candidate
&Cand
,
40 StringRef PackageName
) {
44 std::string Prefix
= Twine(PackageName
+ "-" + Cand
.SPACKReleaseStr
).str();
45 llvm::SmallVector
<llvm::SmallString
<0>> SubDirs
;
46 for (llvm::vfs::directory_iterator File
= D
.getVFS().dir_begin(Cand
.Path
, EC
),
48 File
!= FileEnd
&& !EC
; File
.increment(EC
)) {
49 llvm::StringRef FileName
= llvm::sys::path::filename(File
->path());
50 if (FileName
.startswith(Prefix
)) {
51 SubDirs
.push_back(FileName
);
52 if (SubDirs
.size() > 1)
56 if (SubDirs
.size() == 1) {
57 auto PackagePath
= Cand
.Path
;
58 llvm::sys::path::append(PackagePath
, SubDirs
[0]);
61 if (SubDirs
.size() == 0 && Verbose
) {
62 llvm::errs() << "SPACK package " << Prefix
<< " not found at " << Cand
.Path
67 if (SubDirs
.size() > 1 && Verbose
) {
68 llvm::errs() << "Cannot use SPACK package " << Prefix
<< " at " << Cand
.Path
69 << " due to multiple installations for the same version\n";
74 void RocmInstallationDetector::scanLibDevicePath(llvm::StringRef Path
) {
75 assert(!Path
.empty());
77 const StringRef
Suffix(".bc");
78 const StringRef
Suffix2(".amdgcn.bc");
81 for (llvm::vfs::directory_iterator LI
= D
.getVFS().dir_begin(Path
, EC
), LE
;
82 !EC
&& LI
!= LE
; LI
= LI
.increment(EC
)) {
83 StringRef FilePath
= LI
->path();
84 StringRef FileName
= llvm::sys::path::filename(FilePath
);
85 if (!FileName
.endswith(Suffix
))
89 if (FileName
.endswith(Suffix2
))
90 BaseName
= FileName
.drop_back(Suffix2
.size());
91 else if (FileName
.endswith(Suffix
))
92 BaseName
= FileName
.drop_back(Suffix
.size());
94 const StringRef ABIVersionPrefix
= "oclc_abi_version_";
95 if (BaseName
== "ocml") {
97 } else if (BaseName
== "ockl") {
99 } else if (BaseName
== "opencl") {
101 } else if (BaseName
== "hip") {
103 } else if (BaseName
== "asanrtl") {
105 } else if (BaseName
== "oclc_finite_only_off") {
106 FiniteOnly
.Off
= FilePath
;
107 } else if (BaseName
== "oclc_finite_only_on") {
108 FiniteOnly
.On
= FilePath
;
109 } else if (BaseName
== "oclc_daz_opt_on") {
110 DenormalsAreZero
.On
= FilePath
;
111 } else if (BaseName
== "oclc_daz_opt_off") {
112 DenormalsAreZero
.Off
= FilePath
;
113 } else if (BaseName
== "oclc_correctly_rounded_sqrt_on") {
114 CorrectlyRoundedSqrt
.On
= FilePath
;
115 } else if (BaseName
== "oclc_correctly_rounded_sqrt_off") {
116 CorrectlyRoundedSqrt
.Off
= FilePath
;
117 } else if (BaseName
== "oclc_unsafe_math_on") {
118 UnsafeMath
.On
= FilePath
;
119 } else if (BaseName
== "oclc_unsafe_math_off") {
120 UnsafeMath
.Off
= FilePath
;
121 } else if (BaseName
== "oclc_wavefrontsize64_on") {
122 WavefrontSize64
.On
= FilePath
;
123 } else if (BaseName
== "oclc_wavefrontsize64_off") {
124 WavefrontSize64
.Off
= FilePath
;
125 } else if (BaseName
.startswith(ABIVersionPrefix
)) {
126 unsigned ABIVersionNumber
;
127 if (BaseName
.drop_front(ABIVersionPrefix
.size())
128 .getAsInteger(/*Redex=*/0, ABIVersionNumber
))
130 ABIVersionMap
[ABIVersionNumber
] = FilePath
.str();
132 // Process all bitcode filenames that look like
133 // ocl_isa_version_XXX.amdgcn.bc
134 const StringRef DeviceLibPrefix
= "oclc_isa_version_";
135 if (!BaseName
.startswith(DeviceLibPrefix
))
138 StringRef IsaVersionNumber
=
139 BaseName
.drop_front(DeviceLibPrefix
.size());
141 llvm::Twine GfxName
= Twine("gfx") + IsaVersionNumber
;
144 std::make_pair(GfxName
.toStringRef(Tmp
), FilePath
.str()));
149 // Parse and extract version numbers from `.hipVersion`. Return `true` if
150 // the parsing fails.
151 bool RocmInstallationDetector::parseHIPVersionFile(llvm::StringRef V
) {
152 SmallVector
<StringRef
, 4> VersionParts
;
153 V
.split(VersionParts
, '\n');
154 unsigned Major
= ~0U;
155 unsigned Minor
= ~0U;
156 for (auto Part
: VersionParts
) {
157 auto Splits
= Part
.rtrim().split('=');
158 if (Splits
.first
== "HIP_VERSION_MAJOR") {
159 if (Splits
.second
.getAsInteger(0, Major
))
161 } else if (Splits
.first
== "HIP_VERSION_MINOR") {
162 if (Splits
.second
.getAsInteger(0, Minor
))
164 } else if (Splits
.first
== "HIP_VERSION_PATCH")
165 VersionPatch
= Splits
.second
.str();
167 if (Major
== ~0U || Minor
== ~0U)
169 VersionMajorMinor
= llvm::VersionTuple(Major
, Minor
);
171 (Twine(Major
) + "." + Twine(Minor
) + "." + VersionPatch
).str();
175 /// \returns a list of candidate directories for ROCm installation, which is
176 /// cached and populated only once.
177 const SmallVectorImpl
<RocmInstallationDetector::Candidate
> &
178 RocmInstallationDetector::getInstallationPathCandidates() {
180 // Return the cached candidate list if it has already been populated.
181 if (!ROCmSearchDirs
.empty())
182 return ROCmSearchDirs
;
184 auto DoPrintROCmSearchDirs
= [&]() {
185 if (PrintROCmSearchDirs
)
186 for (auto Cand
: ROCmSearchDirs
) {
187 llvm::errs() << "ROCm installation search path";
189 llvm::errs() << " (Spack " << Cand
.SPACKReleaseStr
<< ")";
190 llvm::errs() << ": " << Cand
.Path
<< '\n';
194 // For candidate specified by --rocm-path we do not do strict check, i.e.,
195 // checking existence of HIP version file and device library files.
196 if (!RocmPathArg
.empty()) {
197 ROCmSearchDirs
.emplace_back(RocmPathArg
.str());
198 DoPrintROCmSearchDirs();
199 return ROCmSearchDirs
;
200 } else if (const char *RocmPathEnv
= ::getenv("ROCM_PATH")) {
201 if (!StringRef(RocmPathEnv
).empty()) {
202 ROCmSearchDirs
.emplace_back(RocmPathEnv
);
203 DoPrintROCmSearchDirs();
204 return ROCmSearchDirs
;
208 // Try to find relative to the compiler binary.
209 const char *InstallDir
= D
.getInstalledDir();
211 // Check both a normal Unix prefix position of the clang binary, as well as
212 // the Windows-esque layout the ROCm packages use with the host architecture
213 // subdirectory of bin.
214 auto DeduceROCmPath
= [](StringRef ClangPath
) {
215 // Strip off directory (usually bin)
216 StringRef ParentDir
= llvm::sys::path::parent_path(ClangPath
);
217 StringRef ParentName
= llvm::sys::path::filename(ParentDir
);
219 // Some builds use bin/{host arch}, so go up again.
220 if (ParentName
== "bin") {
221 ParentDir
= llvm::sys::path::parent_path(ParentDir
);
222 ParentName
= llvm::sys::path::filename(ParentDir
);
225 // Detect ROCm packages built with SPACK.
226 // clang is installed at
227 // <rocm_root>/llvm-amdgpu-<rocm_release_string>-<hash>/bin directory.
228 // We only consider the parent directory of llvm-amdgpu package as ROCm
229 // installation candidate for SPACK.
230 if (ParentName
.startswith("llvm-amdgpu-")) {
232 ParentName
.drop_front(strlen("llvm-amdgpu-")).split('-');
233 auto SPACKReleaseStr
= SPACKPostfix
.first
;
234 if (!SPACKReleaseStr
.empty()) {
235 ParentDir
= llvm::sys::path::parent_path(ParentDir
);
236 return Candidate(ParentDir
.str(), /*StrictChecking=*/true,
241 // Some versions of the rocm llvm package install to /opt/rocm/llvm/bin
242 // Some versions of the aomp package install to /opt/rocm/aomp/bin
243 if (ParentName
== "llvm" || ParentName
.startswith("aomp"))
244 ParentDir
= llvm::sys::path::parent_path(ParentDir
);
246 return Candidate(ParentDir
.str(), /*StrictChecking=*/true);
249 // Deduce ROCm path by the path used to invoke clang. Do not resolve symbolic
250 // link of clang itself.
251 ROCmSearchDirs
.emplace_back(DeduceROCmPath(InstallDir
));
253 // Deduce ROCm path by the real path of the invoked clang, resolving symbolic
254 // link of clang itself.
255 llvm::SmallString
<256> RealClangPath
;
256 llvm::sys::fs::real_path(D
.getClangProgramPath(), RealClangPath
);
257 auto ParentPath
= llvm::sys::path::parent_path(RealClangPath
);
258 if (ParentPath
!= InstallDir
)
259 ROCmSearchDirs
.emplace_back(DeduceROCmPath(ParentPath
));
261 // Device library may be installed in clang or resource directory.
262 auto ClangRoot
= llvm::sys::path::parent_path(InstallDir
);
263 auto RealClangRoot
= llvm::sys::path::parent_path(ParentPath
);
264 ROCmSearchDirs
.emplace_back(ClangRoot
.str(), /*StrictChecking=*/true);
265 if (RealClangRoot
!= ClangRoot
)
266 ROCmSearchDirs
.emplace_back(RealClangRoot
.str(), /*StrictChecking=*/true);
267 ROCmSearchDirs
.emplace_back(D
.ResourceDir
,
268 /*StrictChecking=*/true);
270 ROCmSearchDirs
.emplace_back(D
.SysRoot
+ "/opt/rocm",
271 /*StrictChecking=*/true);
273 // Find the latest /opt/rocm-{release} directory.
275 std::string LatestROCm
;
276 llvm::VersionTuple LatestVer
;
277 // Get ROCm version from ROCm directory name.
278 auto GetROCmVersion
= [](StringRef DirName
) {
279 llvm::VersionTuple V
;
280 std::string VerStr
= DirName
.drop_front(strlen("rocm-")).str();
281 // The ROCm directory name follows the format of
282 // rocm-{major}.{minor}.{subMinor}[-{build}]
283 std::replace(VerStr
.begin(), VerStr
.end(), '-', '.');
287 for (llvm::vfs::directory_iterator
288 File
= D
.getVFS().dir_begin(D
.SysRoot
+ "/opt", EC
),
290 File
!= FileEnd
&& !EC
; File
.increment(EC
)) {
291 llvm::StringRef FileName
= llvm::sys::path::filename(File
->path());
292 if (!FileName
.startswith("rocm-"))
294 if (LatestROCm
.empty()) {
295 LatestROCm
= FileName
.str();
296 LatestVer
= GetROCmVersion(LatestROCm
);
299 auto Ver
= GetROCmVersion(FileName
);
300 if (LatestVer
< Ver
) {
301 LatestROCm
= FileName
.str();
305 if (!LatestROCm
.empty())
306 ROCmSearchDirs
.emplace_back(D
.SysRoot
+ "/opt/" + LatestROCm
,
307 /*StrictChecking=*/true);
309 DoPrintROCmSearchDirs();
310 return ROCmSearchDirs
;
313 RocmInstallationDetector::RocmInstallationDetector(
314 const Driver
&D
, const llvm::Triple
&HostTriple
,
315 const llvm::opt::ArgList
&Args
, bool DetectHIPRuntime
, bool DetectDeviceLib
)
317 Verbose
= Args
.hasArg(options::OPT_v
);
318 RocmPathArg
= Args
.getLastArgValue(clang::driver::options::OPT_rocm_path_EQ
);
319 PrintROCmSearchDirs
=
320 Args
.hasArg(clang::driver::options::OPT_print_rocm_search_dirs
);
321 RocmDeviceLibPathArg
=
322 Args
.getAllArgValues(clang::driver::options::OPT_rocm_device_lib_path_EQ
);
323 HIPPathArg
= Args
.getLastArgValue(clang::driver::options::OPT_hip_path_EQ
);
324 if (auto *A
= Args
.getLastArg(clang::driver::options::OPT_hip_version_EQ
)) {
325 HIPVersionArg
= A
->getValue();
326 unsigned Major
= ~0U;
327 unsigned Minor
= ~0U;
328 SmallVector
<StringRef
, 3> Parts
;
329 HIPVersionArg
.split(Parts
, '.');
331 Parts
[0].getAsInteger(0, Major
);
332 if (Parts
.size() > 1)
333 Parts
[1].getAsInteger(0, Minor
);
334 if (Parts
.size() > 2)
335 VersionPatch
= Parts
[2].str();
336 if (VersionPatch
.empty())
338 if (Major
!= ~0U && Minor
== ~0U)
340 if (Major
== ~0U || Minor
== ~0U)
341 D
.Diag(diag::err_drv_invalid_value
)
342 << A
->getAsString(Args
) << HIPVersionArg
;
344 VersionMajorMinor
= llvm::VersionTuple(Major
, Minor
);
346 (Twine(Major
) + "." + Twine(Minor
) + "." + VersionPatch
).str();
348 VersionPatch
= DefaultVersionPatch
;
350 llvm::VersionTuple(DefaultVersionMajor
, DefaultVersionMinor
);
351 DetectedVersion
= (Twine(DefaultVersionMajor
) + "." +
352 Twine(DefaultVersionMinor
) + "." + VersionPatch
)
356 if (DetectHIPRuntime
)
359 detectDeviceLibrary();
362 void RocmInstallationDetector::detectDeviceLibrary() {
363 assert(LibDevicePath
.empty());
365 if (!RocmDeviceLibPathArg
.empty())
366 LibDevicePath
= RocmDeviceLibPathArg
[RocmDeviceLibPathArg
.size() - 1];
367 else if (const char *LibPathEnv
= ::getenv("HIP_DEVICE_LIB_PATH"))
368 LibDevicePath
= LibPathEnv
;
370 auto &FS
= D
.getVFS();
371 if (!LibDevicePath
.empty()) {
372 // Maintain compatability with HIP flag/envvar pointing directly at the
373 // bitcode library directory. This points directly at the library path instead
374 // of the rocm root installation.
375 if (!FS
.exists(LibDevicePath
))
378 scanLibDevicePath(LibDevicePath
);
379 HasDeviceLibrary
= allGenericLibsValid() && !LibDeviceMap
.empty();
383 // The install path situation in old versions of ROCm is a real mess, and
384 // use a different install layout. Multiple copies of the device libraries
385 // exist for each frontend project, and differ depending on which build
386 // system produced the packages. Standalone OpenCL builds also have a
387 // different directory structure from the ROCm OpenCL package.
388 auto &ROCmDirs
= getInstallationPathCandidates();
389 for (const auto &Candidate
: ROCmDirs
) {
390 auto CandidatePath
= Candidate
.Path
;
392 // Check device library exists at the given path.
393 auto CheckDeviceLib
= [&](StringRef Path
) {
394 bool CheckLibDevice
= (!NoBuiltinLibs
|| Candidate
.StrictChecking
);
395 if (CheckLibDevice
&& !FS
.exists(Path
))
398 scanLibDevicePath(Path
);
400 if (!NoBuiltinLibs
) {
401 // Check that the required non-target libraries are all available.
402 if (!allGenericLibsValid())
405 // Check that we have found at least one libdevice that we can link in
406 // if -nobuiltinlib hasn't been specified.
407 if (LibDeviceMap
.empty())
413 // The possible structures are:
414 // - ${ROCM_ROOT}/amdgcn/bitcode/*
415 // - ${ROCM_ROOT}/lib/*
416 // - ${ROCM_ROOT}/lib/bitcode/*
417 // so try to detect these layouts.
418 static constexpr std::array
<const char *, 2> SubDirsList
[] = {
419 {"amdgcn", "bitcode"},
424 // Make a path by appending sub-directories to InstallPath.
425 auto MakePath
= [&](const llvm::ArrayRef
<const char *> &SubDirs
) {
426 auto Path
= CandidatePath
;
427 for (auto SubDir
: SubDirs
)
428 llvm::sys::path::append(Path
, SubDir
);
432 for (auto SubDirs
: SubDirsList
) {
433 LibDevicePath
= MakePath(SubDirs
);
434 HasDeviceLibrary
= CheckDeviceLib(LibDevicePath
);
435 if (HasDeviceLibrary
)
441 void RocmInstallationDetector::detectHIPRuntime() {
442 SmallVector
<Candidate
, 4> HIPSearchDirs
;
443 if (!HIPPathArg
.empty())
444 HIPSearchDirs
.emplace_back(HIPPathArg
.str(), /*StrictChecking=*/true);
446 HIPSearchDirs
.append(getInstallationPathCandidates());
447 auto &FS
= D
.getVFS();
449 for (const auto &Candidate
: HIPSearchDirs
) {
450 InstallPath
= Candidate
.Path
;
451 if (InstallPath
.empty() || !FS
.exists(InstallPath
))
453 // HIP runtime built by SPACK is installed to
454 // <rocm_root>/hip-<rocm_release_string>-<hash> directory.
455 auto SPACKPath
= findSPACKPackage(Candidate
, "hip");
456 InstallPath
= SPACKPath
.empty() ? InstallPath
: SPACKPath
;
458 BinPath
= InstallPath
;
459 llvm::sys::path::append(BinPath
, "bin");
460 IncludePath
= InstallPath
;
461 llvm::sys::path::append(IncludePath
, "include");
462 LibPath
= InstallPath
;
463 llvm::sys::path::append(LibPath
, "lib");
465 llvm::ErrorOr
<std::unique_ptr
<llvm::MemoryBuffer
>> VersionFile
=
466 FS
.getBufferForFile(BinPath
+ "/.hipVersion");
467 if (!VersionFile
&& Candidate
.StrictChecking
)
470 if (HIPVersionArg
.empty() && VersionFile
)
471 if (parseHIPVersionFile((*VersionFile
)->getBuffer()))
474 HasHIPRuntime
= true;
477 HasHIPRuntime
= false;
480 void RocmInstallationDetector::print(raw_ostream
&OS
) const {
482 OS
<< "Found HIP installation: " << InstallPath
<< ", version "
483 << DetectedVersion
<< '\n';
486 void RocmInstallationDetector::AddHIPIncludeArgs(const ArgList
&DriverArgs
,
487 ArgStringList
&CC1Args
) const {
488 bool UsesRuntimeWrapper
= VersionMajorMinor
> llvm::VersionTuple(3, 5) &&
489 !DriverArgs
.hasArg(options::OPT_nohipwrapperinc
);
491 if (!DriverArgs
.hasArg(options::OPT_nobuiltininc
)) {
492 // HIP header includes standard library wrapper headers under clang
493 // cuda_wrappers directory. Since these wrapper headers include_next
494 // standard C++ headers, whereas libc++ headers include_next other clang
495 // headers. The include paths have to follow this order:
496 // - wrapper include path
497 // - standard C++ include path
498 // - other clang include path
499 // Since standard C++ and other clang include paths are added in other
500 // places after this function, here we only need to make sure wrapper
501 // include path is added.
503 // ROCm 3.5 does not fully support the wrapper headers. Therefore it needs
505 SmallString
<128> P(D
.ResourceDir
);
506 if (UsesRuntimeWrapper
)
507 llvm::sys::path::append(P
, "include", "cuda_wrappers");
508 CC1Args
.push_back("-internal-isystem");
509 CC1Args
.push_back(DriverArgs
.MakeArgString(P
));
512 if (DriverArgs
.hasArg(options::OPT_nogpuinc
))
515 if (!hasHIPRuntime()) {
516 D
.Diag(diag::err_drv_no_hip_runtime
);
520 CC1Args
.push_back("-idirafter");
521 CC1Args
.push_back(DriverArgs
.MakeArgString(getIncludePath()));
522 if (UsesRuntimeWrapper
)
523 CC1Args
.append({"-include", "__clang_hip_runtime_wrapper.h"});
526 void amdgpu::Linker::ConstructJob(Compilation
&C
, const JobAction
&JA
,
527 const InputInfo
&Output
,
528 const InputInfoList
&Inputs
,
530 const char *LinkingOutput
) const {
532 std::string Linker
= getToolChain().GetProgramPath(getShortName());
533 ArgStringList CmdArgs
;
534 addLinkerCompressDebugSectionsOption(getToolChain(), Args
, CmdArgs
);
535 AddLinkerInputs(getToolChain(), Inputs
, Args
, CmdArgs
, JA
);
536 CmdArgs
.push_back("-shared");
537 CmdArgs
.push_back("-o");
538 CmdArgs
.push_back(Output
.getFilename());
539 C
.addCommand(std::make_unique
<Command
>(
540 JA
, *this, ResponseFileSupport::AtFileCurCP(), Args
.MakeArgString(Linker
),
541 CmdArgs
, Inputs
, Output
));
544 void amdgpu::getAMDGPUTargetFeatures(const Driver
&D
,
545 const llvm::Triple
&Triple
,
546 const llvm::opt::ArgList
&Args
,
547 std::vector
<StringRef
> &Features
) {
548 // Add target ID features to -target-feature options. No diagnostics should
549 // be emitted here since invalid target ID is diagnosed at other places.
550 StringRef TargetID
= Args
.getLastArgValue(options::OPT_mcpu_EQ
);
551 if (!TargetID
.empty()) {
552 llvm::StringMap
<bool> FeatureMap
;
553 auto OptionalGpuArch
= parseTargetID(Triple
, TargetID
, &FeatureMap
);
554 if (OptionalGpuArch
) {
555 StringRef GpuArch
= *OptionalGpuArch
;
556 // Iterate through all possible target ID features for the given GPU.
557 // If it is mapped to true, add +feature.
558 // If it is mapped to false, add -feature.
559 // If it is not in the map (default), do not add it
560 for (auto &&Feature
: getAllPossibleTargetIDFeatures(Triple
, GpuArch
)) {
561 auto Pos
= FeatureMap
.find(Feature
);
562 if (Pos
== FeatureMap
.end())
564 Features
.push_back(Args
.MakeArgStringRef(
565 (Twine(Pos
->second
? "+" : "-") + Feature
).str()));
570 if (Args
.hasFlag(options::OPT_mwavefrontsize64
,
571 options::OPT_mno_wavefrontsize64
, false))
572 Features
.push_back("+wavefrontsize64");
574 handleTargetFeaturesGroup(
575 Args
, Features
, options::OPT_m_amdgpu_Features_Group
);
579 AMDGPUToolChain::AMDGPUToolChain(const Driver
&D
, const llvm::Triple
&Triple
,
581 : Generic_ELF(D
, Triple
, Args
),
583 {{options::OPT_O
, "3"}, {options::OPT_cl_std_EQ
, "CL1.2"}}) {
584 // Check code object version options. Emit warnings for legacy options
585 // and errors for the last invalid code object version options.
586 // It is done here to avoid repeated warning or error messages for
587 // each tool invocation.
588 checkAMDGPUCodeObjectVersion(D
, Args
);
591 Tool
*AMDGPUToolChain::buildLinker() const {
592 return new tools::amdgpu::Linker(*this);
596 AMDGPUToolChain::TranslateArgs(const DerivedArgList
&Args
, StringRef BoundArch
,
597 Action::OffloadKind DeviceOffloadKind
) const {
599 DerivedArgList
*DAL
=
600 Generic_ELF::TranslateArgs(Args
, BoundArch
, DeviceOffloadKind
);
602 const OptTable
&Opts
= getDriver().getOpts();
605 DAL
= new DerivedArgList(Args
.getBaseArgs());
607 for (Arg
*A
: Args
) {
608 if (!shouldSkipArgument(A
))
614 if (!Args
.getLastArgValue(options::OPT_x
).equals("cl"))
617 // Phase 1 (.cl -> .bc)
618 if (Args
.hasArg(options::OPT_c
) && Args
.hasArg(options::OPT_emit_llvm
)) {
619 DAL
->AddFlagArg(nullptr, Opts
.getOption(getTriple().isArch64Bit()
621 : options::OPT_m32
));
623 // Have to check OPT_O4, OPT_O0 & OPT_Ofast separately
624 // as they defined that way in Options.td
625 if (!Args
.hasArg(options::OPT_O
, options::OPT_O0
, options::OPT_O4
,
627 DAL
->AddJoinedArg(nullptr, Opts
.getOption(options::OPT_O
),
628 getOptionDefault(options::OPT_O
));
634 bool AMDGPUToolChain::getDefaultDenormsAreZeroForTarget(
635 llvm::AMDGPU::GPUKind Kind
) {
637 // Assume nothing without a specific target.
638 if (Kind
== llvm::AMDGPU::GK_NONE
)
641 const unsigned ArchAttr
= llvm::AMDGPU::getArchAttrAMDGCN(Kind
);
643 // Default to enabling f32 denormals by default on subtargets where fma is
644 // fast with denormals
645 const bool BothDenormAndFMAFast
=
646 (ArchAttr
& llvm::AMDGPU::FEATURE_FAST_FMA_F32
) &&
647 (ArchAttr
& llvm::AMDGPU::FEATURE_FAST_DENORMAL_F32
);
648 return !BothDenormAndFMAFast
;
651 llvm::DenormalMode
AMDGPUToolChain::getDefaultDenormalModeForType(
652 const llvm::opt::ArgList
&DriverArgs
, const JobAction
&JA
,
653 const llvm::fltSemantics
*FPType
) const {
654 // Denormals should always be enabled for f16 and f64.
655 if (!FPType
|| FPType
!= &llvm::APFloat::IEEEsingle())
656 return llvm::DenormalMode::getIEEE();
658 if (JA
.getOffloadingDeviceKind() == Action::OFK_HIP
||
659 JA
.getOffloadingDeviceKind() == Action::OFK_Cuda
) {
660 auto Arch
= getProcessorFromTargetID(getTriple(), JA
.getOffloadingArch());
661 auto Kind
= llvm::AMDGPU::parseArchAMDGCN(Arch
);
662 if (FPType
&& FPType
== &llvm::APFloat::IEEEsingle() &&
663 DriverArgs
.hasFlag(options::OPT_fgpu_flush_denormals_to_zero
,
664 options::OPT_fno_gpu_flush_denormals_to_zero
,
665 getDefaultDenormsAreZeroForTarget(Kind
)))
666 return llvm::DenormalMode::getPreserveSign();
668 return llvm::DenormalMode::getIEEE();
671 const StringRef GpuArch
= getGPUArch(DriverArgs
);
672 auto Kind
= llvm::AMDGPU::parseArchAMDGCN(GpuArch
);
674 // TODO: There are way too many flags that change this. Do we need to check
676 bool DAZ
= DriverArgs
.hasArg(options::OPT_cl_denorms_are_zero
) ||
677 getDefaultDenormsAreZeroForTarget(Kind
);
679 // Outputs are flushed to zero (FTZ), preserving sign. Denormal inputs are
680 // also implicit treated as zero (DAZ).
681 return DAZ
? llvm::DenormalMode::getPreserveSign() :
682 llvm::DenormalMode::getIEEE();
685 bool AMDGPUToolChain::isWave64(const llvm::opt::ArgList
&DriverArgs
,
686 llvm::AMDGPU::GPUKind Kind
) {
687 const unsigned ArchAttr
= llvm::AMDGPU::getArchAttrAMDGCN(Kind
);
688 bool HasWave32
= (ArchAttr
& llvm::AMDGPU::FEATURE_WAVE32
);
690 return !HasWave32
|| DriverArgs
.hasFlag(
691 options::OPT_mwavefrontsize64
, options::OPT_mno_wavefrontsize64
, false);
696 ROCMToolChain::ROCMToolChain(const Driver
&D
, const llvm::Triple
&Triple
,
698 : AMDGPUToolChain(D
, Triple
, Args
) {
699 RocmInstallation
.detectDeviceLibrary();
702 void AMDGPUToolChain::addClangTargetOptions(
703 const llvm::opt::ArgList
&DriverArgs
,
704 llvm::opt::ArgStringList
&CC1Args
,
705 Action::OffloadKind DeviceOffloadingKind
) const {
706 // Default to "hidden" visibility, as object level linking will not be
707 // supported for the foreseeable future.
708 if (!DriverArgs
.hasArg(options::OPT_fvisibility_EQ
,
709 options::OPT_fvisibility_ms_compat
)) {
710 CC1Args
.push_back("-fvisibility=hidden");
711 CC1Args
.push_back("-fapply-global-visibility-to-externs");
716 AMDGPUToolChain::getGPUArch(const llvm::opt::ArgList
&DriverArgs
) const {
717 return getProcessorFromTargetID(
718 getTriple(), DriverArgs
.getLastArgValue(options::OPT_mcpu_EQ
));
721 AMDGPUToolChain::ParsedTargetIDType
722 AMDGPUToolChain::getParsedTargetID(const llvm::opt::ArgList
&DriverArgs
) const {
723 StringRef TargetID
= DriverArgs
.getLastArgValue(options::OPT_mcpu_EQ
);
724 if (TargetID
.empty())
725 return {None
, None
, None
};
727 llvm::StringMap
<bool> FeatureMap
;
728 auto OptionalGpuArch
= parseTargetID(getTriple(), TargetID
, &FeatureMap
);
729 if (!OptionalGpuArch
)
730 return {TargetID
.str(), None
, None
};
732 return {TargetID
.str(), OptionalGpuArch
->str(), FeatureMap
};
735 void AMDGPUToolChain::checkTargetID(
736 const llvm::opt::ArgList
&DriverArgs
) const {
737 auto PTID
= getParsedTargetID(DriverArgs
);
738 if (PTID
.OptionalTargetID
&& !PTID
.OptionalGPUArch
) {
739 getDriver().Diag(clang::diag::err_drv_bad_target_id
)
740 << *PTID
.OptionalTargetID
;
745 AMDGPUToolChain::detectSystemGPUs(const ArgList
&Args
,
746 SmallVector
<std::string
, 1> &GPUArchs
) const {
748 if (Arg
*A
= Args
.getLastArg(options::OPT_amdgpu_arch_tool_EQ
))
749 Program
= A
->getValue();
751 Program
= GetProgramPath(AMDGPU_ARCH_PROGRAM_NAME
);
752 llvm::SmallString
<64> OutputFile
;
753 llvm::sys::fs::createTemporaryFile("print-system-gpus", "" /* No Suffix */,
755 llvm::FileRemover
OutputRemover(OutputFile
.c_str());
756 llvm::Optional
<llvm::StringRef
> Redirects
[] = {
762 std::string ErrorMessage
;
763 if (int Result
= llvm::sys::ExecuteAndWait(
764 Program
, {}, {}, Redirects
, /* SecondsToWait */ 0,
765 /*MemoryLimit*/ 0, &ErrorMessage
)) {
767 ErrorMessage
= "Exited with error code " + std::to_string(Result
);
768 } else if (Result
== -1) {
769 ErrorMessage
= "Execute failed: " + ErrorMessage
;
771 ErrorMessage
= "Crashed: " + ErrorMessage
;
774 return llvm::createStringError(std::error_code(),
775 Program
+ ": " + ErrorMessage
);
778 llvm::ErrorOr
<std::unique_ptr
<llvm::MemoryBuffer
>> OutputBuf
=
779 llvm::MemoryBuffer::getFile(OutputFile
.c_str());
781 return llvm::createStringError(OutputBuf
.getError(),
782 "Failed to read stdout of " + Program
+
783 ": " + OutputBuf
.getError().message());
786 for (llvm::line_iterator
LineIt(**OutputBuf
); !LineIt
.is_at_end(); ++LineIt
) {
787 GPUArchs
.push_back(LineIt
->str());
789 return llvm::Error::success();
792 llvm::Error
AMDGPUToolChain::getSystemGPUArch(const ArgList
&Args
,
793 std::string
&GPUArch
) const {
794 // detect the AMDGPU installed in system
795 SmallVector
<std::string
, 1> GPUArchs
;
796 auto Err
= detectSystemGPUs(Args
, GPUArchs
);
800 if (GPUArchs
.empty()) {
801 return llvm::createStringError(std::error_code(),
802 "No AMD GPU detected in the system");
804 GPUArch
= GPUArchs
[0];
805 if (GPUArchs
.size() > 1) {
806 if (!llvm::all_equal(GPUArchs
))
807 return llvm::createStringError(
808 std::error_code(), "Multiple AMD GPUs found with different archs");
810 return llvm::Error::success();
813 void ROCMToolChain::addClangTargetOptions(
814 const llvm::opt::ArgList
&DriverArgs
, llvm::opt::ArgStringList
&CC1Args
,
815 Action::OffloadKind DeviceOffloadingKind
) const {
816 AMDGPUToolChain::addClangTargetOptions(DriverArgs
, CC1Args
,
817 DeviceOffloadingKind
);
819 // For the OpenCL case where there is no offload target, accept -nostdlib to
820 // disable bitcode linking.
821 if (DeviceOffloadingKind
== Action::OFK_None
&&
822 DriverArgs
.hasArg(options::OPT_nostdlib
))
825 if (DriverArgs
.hasArg(options::OPT_nogpulib
))
828 // Get the device name and canonicalize it
829 const StringRef GpuArch
= getGPUArch(DriverArgs
);
830 auto Kind
= llvm::AMDGPU::parseArchAMDGCN(GpuArch
);
831 const StringRef CanonArch
= llvm::AMDGPU::getArchNameAMDGCN(Kind
);
832 std::string LibDeviceFile
= RocmInstallation
.getLibDeviceFile(CanonArch
);
833 auto ABIVer
= DeviceLibABIVersion::fromCodeObjectVersion(
834 getAMDGPUCodeObjectVersion(getDriver(), DriverArgs
));
835 if (!RocmInstallation
.checkCommonBitcodeLibs(CanonArch
, LibDeviceFile
,
839 bool Wave64
= isWave64(DriverArgs
, Kind
);
841 // TODO: There are way too many flags that change this. Do we need to check
843 bool DAZ
= DriverArgs
.hasArg(options::OPT_cl_denorms_are_zero
) ||
844 getDefaultDenormsAreZeroForTarget(Kind
);
845 bool FiniteOnly
= DriverArgs
.hasArg(options::OPT_cl_finite_math_only
);
848 DriverArgs
.hasArg(options::OPT_cl_unsafe_math_optimizations
);
849 bool FastRelaxedMath
= DriverArgs
.hasArg(options::OPT_cl_fast_relaxed_math
);
851 DriverArgs
.hasArg(options::OPT_cl_fp32_correctly_rounded_divide_sqrt
);
853 // Add the OpenCL specific bitcode library.
854 llvm::SmallVector
<std::string
, 12> BCLibs
;
855 BCLibs
.push_back(RocmInstallation
.getOpenCLPath().str());
857 // Add the generic set of libraries.
858 BCLibs
.append(RocmInstallation
.getCommonBitcodeLibs(
859 DriverArgs
, LibDeviceFile
, Wave64
, DAZ
, FiniteOnly
, UnsafeMathOpt
,
860 FastRelaxedMath
, CorrectSqrt
, ABIVer
, false));
862 for (StringRef BCFile
: BCLibs
) {
863 CC1Args
.push_back("-mlink-builtin-bitcode");
864 CC1Args
.push_back(DriverArgs
.MakeArgString(BCFile
));
868 bool RocmInstallationDetector::checkCommonBitcodeLibs(
869 StringRef GPUArch
, StringRef LibDeviceFile
,
870 DeviceLibABIVersion ABIVer
) const {
871 if (!hasDeviceLibrary()) {
872 D
.Diag(diag::err_drv_no_rocm_device_lib
) << 0;
875 if (LibDeviceFile
.empty()) {
876 D
.Diag(diag::err_drv_no_rocm_device_lib
) << 1 << GPUArch
;
879 if (ABIVer
.requiresLibrary() && getABIVersionPath(ABIVer
).empty()) {
880 D
.Diag(diag::err_drv_no_rocm_device_lib
) << 2 << ABIVer
.toString();
886 llvm::SmallVector
<std::string
, 12>
887 RocmInstallationDetector::getCommonBitcodeLibs(
888 const llvm::opt::ArgList
&DriverArgs
, StringRef LibDeviceFile
, bool Wave64
,
889 bool DAZ
, bool FiniteOnly
, bool UnsafeMathOpt
, bool FastRelaxedMath
,
890 bool CorrectSqrt
, DeviceLibABIVersion ABIVer
, bool isOpenMP
= false) const {
891 llvm::SmallVector
<std::string
, 12> BCLibs
;
893 auto AddBCLib
= [&](StringRef BCFile
) { BCLibs
.push_back(BCFile
.str()); };
895 AddBCLib(getOCMLPath());
896 AddBCLib(getOCKLPath());
897 AddBCLib(getDenormalsAreZeroPath(DAZ
));
898 AddBCLib(getUnsafeMathPath(UnsafeMathOpt
|| FastRelaxedMath
));
899 AddBCLib(getFiniteOnlyPath(FiniteOnly
|| FastRelaxedMath
));
900 AddBCLib(getCorrectlyRoundedSqrtPath(CorrectSqrt
));
901 AddBCLib(getWavefrontSize64Path(Wave64
));
902 AddBCLib(LibDeviceFile
);
903 auto ABIVerPath
= getABIVersionPath(ABIVer
);
904 if (!ABIVerPath
.empty())
905 AddBCLib(ABIVerPath
);
910 bool AMDGPUToolChain::shouldSkipArgument(const llvm::opt::Arg
*A
) const {
911 Option O
= A
->getOption();
912 if (O
.matches(options::OPT_fPIE
) || O
.matches(options::OPT_fpie
))
917 llvm::SmallVector
<std::string
, 12>
918 ROCMToolChain::getCommonDeviceLibNames(const llvm::opt::ArgList
&DriverArgs
,
919 const std::string
&GPUArch
,
920 bool isOpenMP
) const {
921 auto Kind
= llvm::AMDGPU::parseArchAMDGCN(GPUArch
);
922 const StringRef CanonArch
= llvm::AMDGPU::getArchNameAMDGCN(Kind
);
924 std::string LibDeviceFile
= RocmInstallation
.getLibDeviceFile(CanonArch
);
925 auto ABIVer
= DeviceLibABIVersion::fromCodeObjectVersion(
926 getAMDGPUCodeObjectVersion(getDriver(), DriverArgs
));
927 if (!RocmInstallation
.checkCommonBitcodeLibs(CanonArch
, LibDeviceFile
,
931 // If --hip-device-lib is not set, add the default bitcode libraries.
932 // TODO: There are way too many flags that change this. Do we need to check
934 bool DAZ
= DriverArgs
.hasFlag(options::OPT_fgpu_flush_denormals_to_zero
,
935 options::OPT_fno_gpu_flush_denormals_to_zero
,
936 getDefaultDenormsAreZeroForTarget(Kind
));
937 bool FiniteOnly
= DriverArgs
.hasFlag(
938 options::OPT_ffinite_math_only
, options::OPT_fno_finite_math_only
, false);
940 DriverArgs
.hasFlag(options::OPT_funsafe_math_optimizations
,
941 options::OPT_fno_unsafe_math_optimizations
, false);
942 bool FastRelaxedMath
= DriverArgs
.hasFlag(options::OPT_ffast_math
,
943 options::OPT_fno_fast_math
, false);
944 bool CorrectSqrt
= DriverArgs
.hasFlag(
945 options::OPT_fhip_fp32_correctly_rounded_divide_sqrt
,
946 options::OPT_fno_hip_fp32_correctly_rounded_divide_sqrt
, true);
947 bool Wave64
= isWave64(DriverArgs
, Kind
);
949 return RocmInstallation
.getCommonBitcodeLibs(
950 DriverArgs
, LibDeviceFile
, Wave64
, DAZ
, FiniteOnly
, UnsafeMathOpt
,
951 FastRelaxedMath
, CorrectSqrt
, ABIVer
, isOpenMP
);