1 //===--- HIPAMD.cpp - HIP Tool and ToolChain Implementations ----*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
11 #include "CommonArgs.h"
12 #include "HIPUtility.h"
14 #include "clang/Basic/Cuda.h"
15 #include "clang/Basic/TargetID.h"
16 #include "clang/Driver/Compilation.h"
17 #include "clang/Driver/Driver.h"
18 #include "clang/Driver/DriverDiagnostic.h"
19 #include "clang/Driver/InputInfo.h"
20 #include "clang/Driver/Options.h"
21 #include "clang/Driver/SanitizerArgs.h"
22 #include "llvm/Support/Alignment.h"
23 #include "llvm/Support/FileSystem.h"
24 #include "llvm/Support/Path.h"
25 #include "llvm/TargetParser/TargetParser.h"
27 using namespace clang::driver
;
28 using namespace clang::driver::toolchains
;
29 using namespace clang::driver::tools
;
30 using namespace clang
;
31 using namespace llvm::opt
;
33 #if defined(_WIN32) || defined(_WIN64)
34 #define NULL_FILE "nul"
36 #define NULL_FILE "/dev/null"
39 void AMDGCN::Linker::constructLlvmLinkCommand(Compilation
&C
,
41 const InputInfoList
&Inputs
,
42 const InputInfo
&Output
,
43 const llvm::opt::ArgList
&Args
) const {
44 // Construct llvm-link command.
45 // The output from llvm-link is a bitcode file.
46 ArgStringList LlvmLinkArgs
;
48 assert(!Inputs
.empty() && "Must have at least one input.");
50 LlvmLinkArgs
.append({"-o", Output
.getFilename()});
51 for (auto Input
: Inputs
)
52 LlvmLinkArgs
.push_back(Input
.getFilename());
54 // Look for archive of bundled bitcode in arguments, and add temporary files
55 // for the extracted archive of bitcode to inputs.
56 auto TargetID
= Args
.getLastArgValue(options::OPT_mcpu_EQ
);
57 AddStaticDeviceLibsLinking(C
, *this, JA
, Inputs
, Args
, LlvmLinkArgs
, "amdgcn",
58 TargetID
, /*IsBitCodeSDL=*/true);
60 const char *LlvmLink
=
61 Args
.MakeArgString(getToolChain().GetProgramPath("llvm-link"));
62 C
.addCommand(std::make_unique
<Command
>(JA
, *this, ResponseFileSupport::None(),
63 LlvmLink
, LlvmLinkArgs
, Inputs
,
67 void AMDGCN::Linker::constructLldCommand(Compilation
&C
, const JobAction
&JA
,
68 const InputInfoList
&Inputs
,
69 const InputInfo
&Output
,
70 const llvm::opt::ArgList
&Args
) const {
71 // Construct lld command.
72 // The output from ld.lld is an HSA code object file.
73 ArgStringList LldArgs
{"-flavor",
79 "-plugin-opt=-amdgpu-internalize-symbols"};
80 if (Args
.hasArg(options::OPT_hipstdpar
))
81 LldArgs
.push_back("-plugin-opt=-amdgpu-enable-hipstdpar");
83 auto &TC
= getToolChain();
84 auto &D
= TC
.getDriver();
85 assert(!Inputs
.empty() && "Must have at least one input.");
86 bool IsThinLTO
= D
.getOffloadLTOMode() == LTOK_Thin
;
87 addLTOOptions(TC
, Args
, LldArgs
, Output
, Inputs
[0], IsThinLTO
);
89 // Extract all the -m options
90 std::vector
<llvm::StringRef
> Features
;
91 amdgpu::getAMDGPUTargetFeatures(D
, TC
.getTriple(), Args
, Features
);
93 // Add features to mattr such as cumode
94 std::string MAttrString
= "-plugin-opt=-mattr=";
95 for (auto OneFeature
: unifyTargetFeatures(Features
)) {
96 MAttrString
.append(Args
.MakeArgString(OneFeature
));
97 if (OneFeature
!= Features
.back())
98 MAttrString
.append(",");
100 if (!Features
.empty())
101 LldArgs
.push_back(Args
.MakeArgString(MAttrString
));
103 // ToDo: Remove this option after AMDGPU backend supports ISA-level linking.
104 // Since AMDGPU backend currently does not support ISA-level linking, all
105 // called functions need to be imported.
107 LldArgs
.push_back(Args
.MakeArgString("-plugin-opt=-force-import-all"));
109 for (const Arg
*A
: Args
.filtered(options::OPT_mllvm
)) {
111 Args
.MakeArgString(Twine("-plugin-opt=") + A
->getValue(0)));
114 if (C
.getDriver().isSaveTempsEnabled())
115 LldArgs
.push_back("-save-temps");
117 addLinkerCompressDebugSectionsOption(TC
, Args
, LldArgs
);
119 // Given that host and device linking happen in separate processes, the device
120 // linker doesn't always have the visibility as to which device symbols are
121 // needed by a program, especially for the device symbol dependencies that are
122 // introduced through the host symbol resolution.
123 // For example: host_A() (A.obj) --> host_B(B.obj) --> device_kernel_B()
124 // (B.obj) In this case, the device linker doesn't know that A.obj actually
125 // depends on the kernel functions in B.obj. When linking to static device
126 // library, the device linker may drop some of the device global symbols if
127 // they aren't referenced. As a workaround, we are adding to the
128 // --whole-archive flag such that all global symbols would be linked in.
129 LldArgs
.push_back("--whole-archive");
131 for (auto *Arg
: Args
.filtered(options::OPT_Xoffload_linker
)) {
132 StringRef ArgVal
= Arg
->getValue(1);
133 auto SplitArg
= ArgVal
.split("-mllvm=");
134 if (!SplitArg
.second
.empty()) {
136 Args
.MakeArgString(Twine("-plugin-opt=") + SplitArg
.second
));
138 LldArgs
.push_back(Args
.MakeArgString(ArgVal
));
143 LldArgs
.append({"-o", Output
.getFilename()});
144 for (auto Input
: Inputs
)
145 LldArgs
.push_back(Input
.getFilename());
147 // Look for archive of bundled bitcode in arguments, and add temporary files
148 // for the extracted archive of bitcode to inputs.
149 auto TargetID
= Args
.getLastArgValue(options::OPT_mcpu_EQ
);
150 AddStaticDeviceLibsLinking(C
, *this, JA
, Inputs
, Args
, LldArgs
, "amdgcn",
151 TargetID
, /*IsBitCodeSDL=*/true);
153 LldArgs
.push_back("--no-whole-archive");
155 const char *Lld
= Args
.MakeArgString(getToolChain().GetProgramPath("lld"));
156 C
.addCommand(std::make_unique
<Command
>(JA
, *this, ResponseFileSupport::None(),
157 Lld
, LldArgs
, Inputs
, Output
));
160 // For SPIR-V the inputs for the job are device AMDGCN SPIR-V flavoured bitcode
161 // and the output is either a compiled SPIR-V binary or bitcode (-emit-llvm). It
162 // calls llvm-link and then the llvm-spirv translator. Once the SPIR-V BE will
163 // be promoted from experimental, we will switch to using that. TODO: consider
164 // if we want to run any targeted optimisations over IR here, over generic
166 void AMDGCN::Linker::constructLinkAndEmitSpirvCommand(
167 Compilation
&C
, const JobAction
&JA
, const InputInfoList
&Inputs
,
168 const InputInfo
&Output
, const llvm::opt::ArgList
&Args
) const {
169 assert(!Inputs
.empty() && "Must have at least one input.");
171 constructLlvmLinkCommand(C
, JA
, Inputs
, Output
, Args
);
173 // Linked BC is now in Output
175 // Emit SPIR-V binary.
176 llvm::opt::ArgStringList TrArgs
{
177 "--spirv-max-version=1.6",
179 "--spirv-allow-extra-diexpressions",
180 "--spirv-allow-unknown-intrinsics",
181 "--spirv-lower-const-expr",
182 "--spirv-preserve-auxdata",
183 "--spirv-debug-info-version=nonsemantic-shader-200"};
184 SPIRV::constructTranslateCommand(C
, *this, JA
, Output
, Output
, TrArgs
);
187 // For amdgcn the inputs of the linker job are device bitcode and output is
188 // either an object file or bitcode (-emit-llvm). It calls llvm-link, opt,
189 // llc, then lld steps.
190 void AMDGCN::Linker::ConstructJob(Compilation
&C
, const JobAction
&JA
,
191 const InputInfo
&Output
,
192 const InputInfoList
&Inputs
,
194 const char *LinkingOutput
) const {
195 if (Inputs
.size() > 0 &&
196 Inputs
[0].getType() == types::TY_Image
&&
197 JA
.getType() == types::TY_Object
)
198 return HIP::constructGenerateObjFileFromHIPFatBinary(C
, Output
, Inputs
,
201 if (JA
.getType() == types::TY_HIP_FATBIN
)
202 return HIP::constructHIPFatbinCommand(C
, JA
, Output
.getFilename(), Inputs
,
205 if (JA
.getType() == types::TY_LLVM_BC
)
206 return constructLlvmLinkCommand(C
, JA
, Inputs
, Output
, Args
);
208 if (getToolChain().getEffectiveTriple().isSPIRV())
209 return constructLinkAndEmitSpirvCommand(C
, JA
, Inputs
, Output
, Args
);
211 return constructLldCommand(C
, JA
, Inputs
, Output
, Args
);
214 HIPAMDToolChain::HIPAMDToolChain(const Driver
&D
, const llvm::Triple
&Triple
,
215 const ToolChain
&HostTC
, const ArgList
&Args
)
216 : ROCMToolChain(D
, Triple
, Args
), HostTC(HostTC
) {
217 // Lookup binaries into the driver directory, this is used to
218 // discover the clang-offload-bundler executable.
219 getProgramPaths().push_back(getDriver().Dir
);
221 // Diagnose unsupported sanitizer options only once.
222 if (!Args
.hasFlag(options::OPT_fgpu_sanitize
, options::OPT_fno_gpu_sanitize
,
225 for (auto *A
: Args
.filtered(options::OPT_fsanitize_EQ
)) {
226 SanitizerMask K
= parseSanitizerValue(A
->getValue(), /*AllowGroups=*/false);
227 if (K
!= SanitizerKind::Address
)
228 D
.getDiags().Report(clang::diag::warn_drv_unsupported_option_for_target
)
229 << A
->getAsString(Args
) << getTriple().str();
233 void HIPAMDToolChain::addClangTargetOptions(
234 const llvm::opt::ArgList
&DriverArgs
, llvm::opt::ArgStringList
&CC1Args
,
235 Action::OffloadKind DeviceOffloadingKind
) const {
236 HostTC
.addClangTargetOptions(DriverArgs
, CC1Args
, DeviceOffloadingKind
);
238 assert(DeviceOffloadingKind
== Action::OFK_HIP
&&
239 "Only HIP offloading kinds are supported for GPUs.");
241 CC1Args
.append({"-fcuda-is-device", "-fno-threadsafe-statics"});
243 if (!DriverArgs
.hasFlag(options::OPT_fgpu_rdc
, options::OPT_fno_gpu_rdc
,
245 CC1Args
.append({"-mllvm", "-amdgpu-internalize-symbols"});
246 if (DriverArgs
.hasArgNoClaim(options::OPT_hipstdpar
))
247 CC1Args
.append({"-mllvm", "-amdgpu-enable-hipstdpar"});
249 StringRef MaxThreadsPerBlock
=
250 DriverArgs
.getLastArgValue(options::OPT_gpu_max_threads_per_block_EQ
);
251 if (!MaxThreadsPerBlock
.empty()) {
253 (Twine("--gpu-max-threads-per-block=") + MaxThreadsPerBlock
).str();
254 CC1Args
.push_back(DriverArgs
.MakeArgStringRef(ArgStr
));
257 CC1Args
.push_back("-fcuda-allow-variadic-functions");
259 // Default to "hidden" visibility, as object level linking will not be
260 // supported for the foreseeable future.
261 if (!DriverArgs
.hasArg(options::OPT_fvisibility_EQ
,
262 options::OPT_fvisibility_ms_compat
)) {
263 CC1Args
.append({"-fvisibility=hidden"});
264 CC1Args
.push_back("-fapply-global-visibility-to-externs");
267 if (getEffectiveTriple().isSPIRV()) {
268 // For SPIR-V we embed the command-line into the generated binary, in order
269 // to retrieve it at JIT time and be able to do target specific compilation
270 // with options that match the user-supplied ones.
271 if (!DriverArgs
.hasArg(options::OPT_fembed_bitcode_marker
))
272 CC1Args
.push_back("-fembed-bitcode=marker");
273 return; // No DeviceLibs for SPIR-V.
276 for (auto BCFile
: getDeviceLibs(DriverArgs
)) {
277 CC1Args
.push_back(BCFile
.ShouldInternalize
? "-mlink-builtin-bitcode"
278 : "-mlink-bitcode-file");
279 CC1Args
.push_back(DriverArgs
.MakeArgString(BCFile
.Path
));
283 llvm::opt::DerivedArgList
*
284 HIPAMDToolChain::TranslateArgs(const llvm::opt::DerivedArgList
&Args
,
286 Action::OffloadKind DeviceOffloadKind
) const {
287 DerivedArgList
*DAL
=
288 HostTC
.TranslateArgs(Args
, BoundArch
, DeviceOffloadKind
);
290 DAL
= new DerivedArgList(Args
.getBaseArgs());
292 const OptTable
&Opts
= getDriver().getOpts();
294 for (Arg
*A
: Args
) {
295 if (!shouldSkipSanitizeOption(*this, Args
, BoundArch
, A
))
299 if (!BoundArch
.empty()) {
300 DAL
->eraseArg(options::OPT_mcpu_EQ
);
301 DAL
->AddJoinedArg(nullptr, Opts
.getOption(options::OPT_mcpu_EQ
), BoundArch
);
308 Tool
*HIPAMDToolChain::buildLinker() const {
309 assert(getTriple().getArch() == llvm::Triple::amdgcn
||
310 getTriple().getArch() == llvm::Triple::spirv64
);
311 return new tools::AMDGCN::Linker(*this);
314 void HIPAMDToolChain::addClangWarningOptions(ArgStringList
&CC1Args
) const {
315 AMDGPUToolChain::addClangWarningOptions(CC1Args
);
316 HostTC
.addClangWarningOptions(CC1Args
);
319 ToolChain::CXXStdlibType
320 HIPAMDToolChain::GetCXXStdlibType(const ArgList
&Args
) const {
321 return HostTC
.GetCXXStdlibType(Args
);
324 void HIPAMDToolChain::AddClangSystemIncludeArgs(const ArgList
&DriverArgs
,
325 ArgStringList
&CC1Args
) const {
326 HostTC
.AddClangSystemIncludeArgs(DriverArgs
, CC1Args
);
329 void HIPAMDToolChain::AddClangCXXStdlibIncludeArgs(
330 const ArgList
&Args
, ArgStringList
&CC1Args
) const {
331 HostTC
.AddClangCXXStdlibIncludeArgs(Args
, CC1Args
);
334 void HIPAMDToolChain::AddIAMCUIncludeArgs(const ArgList
&Args
,
335 ArgStringList
&CC1Args
) const {
336 HostTC
.AddIAMCUIncludeArgs(Args
, CC1Args
);
339 void HIPAMDToolChain::AddHIPIncludeArgs(const ArgList
&DriverArgs
,
340 ArgStringList
&CC1Args
) const {
341 RocmInstallation
->AddHIPIncludeArgs(DriverArgs
, CC1Args
);
344 SanitizerMask
HIPAMDToolChain::getSupportedSanitizers() const {
345 // The HIPAMDToolChain only supports sanitizers in the sense that it allows
346 // sanitizer arguments on the command line if they are supported by the host
347 // toolchain. The HIPAMDToolChain will actually ignore any command line
348 // arguments for any of these "supported" sanitizers. That means that no
349 // sanitization of device code is actually supported at this time.
351 // This behavior is necessary because the host and device toolchains
352 // invocations often share the command line, so the device toolchain must
353 // tolerate flags meant only for the host toolchain.
354 return HostTC
.getSupportedSanitizers();
357 VersionTuple
HIPAMDToolChain::computeMSVCVersion(const Driver
*D
,
358 const ArgList
&Args
) const {
359 return HostTC
.computeMSVCVersion(D
, Args
);
362 llvm::SmallVector
<ToolChain::BitCodeLibraryInfo
, 12>
363 HIPAMDToolChain::getDeviceLibs(const llvm::opt::ArgList
&DriverArgs
) const {
364 llvm::SmallVector
<BitCodeLibraryInfo
, 12> BCLibs
;
365 if (DriverArgs
.hasArg(options::OPT_nogpulib
) ||
366 getGPUArch(DriverArgs
) == "amdgcnspirv")
368 ArgStringList LibraryPaths
;
370 // Find in --hip-device-lib-path and HIP_LIBRARY_PATH.
371 for (StringRef Path
: RocmInstallation
->getRocmDeviceLibPathArg())
372 LibraryPaths
.push_back(DriverArgs
.MakeArgString(Path
));
374 addDirectoryList(DriverArgs
, LibraryPaths
, "", "HIP_DEVICE_LIB_PATH");
376 // Maintain compatability with --hip-device-lib.
377 auto BCLibArgs
= DriverArgs
.getAllArgValues(options::OPT_hip_device_lib_EQ
);
378 if (!BCLibArgs
.empty()) {
379 llvm::for_each(BCLibArgs
, [&](StringRef BCName
) {
381 for (StringRef LibraryPath
: LibraryPaths
) {
382 SmallString
<128> Path(LibraryPath
);
383 llvm::sys::path::append(Path
, BCName
);
385 if (llvm::sys::fs::exists(FullName
)) {
386 BCLibs
.push_back(FullName
);
390 getDriver().Diag(diag::err_drv_no_such_file
) << BCName
;
393 if (!RocmInstallation
->hasDeviceLibrary()) {
394 getDriver().Diag(diag::err_drv_no_rocm_device_lib
) << 0;
397 StringRef GpuArch
= getGPUArch(DriverArgs
);
398 assert(!GpuArch
.empty() && "Must have an explicit GPU arch.");
400 // If --hip-device-lib is not set, add the default bitcode libraries.
401 if (DriverArgs
.hasFlag(options::OPT_fgpu_sanitize
,
402 options::OPT_fno_gpu_sanitize
, true) &&
403 getSanitizerArgs(DriverArgs
).needsAsanRt()) {
404 auto AsanRTL
= RocmInstallation
->getAsanRTLPath();
405 if (AsanRTL
.empty()) {
406 unsigned DiagID
= getDriver().getDiags().getCustomDiagID(
407 DiagnosticsEngine::Error
,
408 "AMDGPU address sanitizer runtime library (asanrtl) is not found. "
409 "Please install ROCm device library which supports address "
411 getDriver().Diag(DiagID
);
414 BCLibs
.emplace_back(AsanRTL
, /*ShouldInternalize=*/false);
417 // Add the HIP specific bitcode library.
418 BCLibs
.push_back(RocmInstallation
->getHIPPath());
420 // Add common device libraries like ocml etc.
421 for (StringRef N
: getCommonDeviceLibNames(DriverArgs
, GpuArch
.str()))
422 BCLibs
.emplace_back(N
);
424 // Add instrument lib.
426 DriverArgs
.getLastArgValue(options::OPT_gpu_instrument_lib_EQ
);
429 if (llvm::sys::fs::exists(InstLib
))
430 BCLibs
.push_back(InstLib
);
432 getDriver().Diag(diag::err_drv_no_such_file
) << InstLib
;
438 void HIPAMDToolChain::checkTargetID(
439 const llvm::opt::ArgList
&DriverArgs
) const {
440 auto PTID
= getParsedTargetID(DriverArgs
);
441 if (PTID
.OptionalTargetID
&& !PTID
.OptionalGPUArch
&&
442 PTID
.OptionalTargetID
!= "amdgcnspirv")
443 getDriver().Diag(clang::diag::err_drv_bad_target_id
)
444 << *PTID
.OptionalTargetID
;