1 //===-- clang-linker-wrapper/ClangLinkerWrapper.cpp - wrapper over linker-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===---------------------------------------------------------------------===//
9 // This tool works as a wrapper over a linking job. This tool is used to create
10 // linked device images for offloading. It scans the linker's input for embedded
11 // device offloading data stored in sections `.llvm.offloading` and extracts it
12 // as a temporary file. The extracted device files will then be passed to a
13 // device linking job to create a final device image.
15 //===---------------------------------------------------------------------===//
17 #include "OffloadWrapper.h"
18 #include "clang/Basic/Version.h"
19 #include "llvm/BinaryFormat/Magic.h"
20 #include "llvm/Bitcode/BitcodeWriter.h"
21 #include "llvm/CodeGen/CommandFlags.h"
22 #include "llvm/IR/Constants.h"
23 #include "llvm/IR/DiagnosticPrinter.h"
24 #include "llvm/IR/Module.h"
25 #include "llvm/IRReader/IRReader.h"
26 #include "llvm/LTO/LTO.h"
27 #include "llvm/MC/TargetRegistry.h"
28 #include "llvm/Object/Archive.h"
29 #include "llvm/Object/ArchiveWriter.h"
30 #include "llvm/Object/Binary.h"
31 #include "llvm/Object/ELFObjectFile.h"
32 #include "llvm/Object/IRObjectFile.h"
33 #include "llvm/Object/ObjectFile.h"
34 #include "llvm/Object/OffloadBinary.h"
35 #include "llvm/Option/ArgList.h"
36 #include "llvm/Option/OptTable.h"
37 #include "llvm/Option/Option.h"
38 #include "llvm/Support/CommandLine.h"
39 #include "llvm/Support/Errc.h"
40 #include "llvm/Support/FileOutputBuffer.h"
41 #include "llvm/Support/FileSystem.h"
42 #include "llvm/Support/InitLLVM.h"
43 #include "llvm/Support/MemoryBuffer.h"
44 #include "llvm/Support/Parallel.h"
45 #include "llvm/Support/Path.h"
46 #include "llvm/Support/Program.h"
47 #include "llvm/Support/Signals.h"
48 #include "llvm/Support/SourceMgr.h"
49 #include "llvm/Support/StringSaver.h"
50 #include "llvm/Support/TargetSelect.h"
51 #include "llvm/Support/WithColor.h"
52 #include "llvm/Support/raw_ostream.h"
53 #include "llvm/Target/TargetMachine.h"
54 #include "llvm/TargetParser/Host.h"
59 using namespace llvm::opt
;
60 using namespace llvm::object
;
62 /// Path of the current binary.
63 static const char *LinkerExecutable
;
65 /// Ssave intermediary results.
66 static bool SaveTemps
= false;
68 /// Print arguments without executing.
69 static bool DryRun
= false;
71 /// Print verbose output.
72 static bool Verbose
= false;
74 /// Filename of the executable being created.
75 static StringRef ExecutableName
;
77 /// Binary path for the CUDA installation.
78 static std::string CudaBinaryPath
;
80 /// Mutex lock to protect writes to shared TempFiles in parallel.
81 static std::mutex TempFilesMutex
;
83 /// Temporary files created by the linker wrapper.
84 static std::list
<SmallString
<128>> TempFiles
;
86 /// Codegen flags for LTO backend.
87 static codegen::RegisterCodeGenFlags CodeGenFlags
;
89 /// Global flag to indicate that the LTO pipeline threw an error.
90 static std::atomic
<bool> LTOError
;
92 using OffloadingImage
= OffloadBinary::OffloadingImage
;
95 // Provide DenseMapInfo so that OffloadKind can be used in a DenseMap.
96 template <> struct DenseMapInfo
<OffloadKind
> {
97 static inline OffloadKind
getEmptyKey() { return OFK_LAST
; }
98 static inline OffloadKind
getTombstoneKey() {
99 return static_cast<OffloadKind
>(OFK_LAST
+ 1);
101 static unsigned getHashValue(const OffloadKind
&Val
) { return Val
; }
103 static bool isEqual(const OffloadKind
&LHS
, const OffloadKind
&RHS
) {
110 using std::error_code
;
112 /// Must not overlap with llvm::opt::DriverFlag.
114 WrapperOnlyOption
= (1 << 4), // Options only used by the linker wrapper.
115 DeviceOnlyOption
= (1 << 5), // Options only used for device linking.
119 OPT_INVALID
= 0, // This is not an option ID.
120 #define OPTION(...) LLVM_MAKE_OPT_ID(__VA_ARGS__),
121 #include "LinkerWrapperOpts.inc"
126 #define PREFIX(NAME, VALUE) \
127 static constexpr StringLiteral NAME##_init[] = VALUE; \
128 static constexpr ArrayRef<StringLiteral> NAME(NAME##_init, \
129 std::size(NAME##_init) - 1);
130 #include "LinkerWrapperOpts.inc"
133 static constexpr OptTable::Info InfoTable
[] = {
134 #define OPTION(...) LLVM_CONSTRUCT_OPT_INFO(__VA_ARGS__),
135 #include "LinkerWrapperOpts.inc"
139 class WrapperOptTable
: public opt::GenericOptTable
{
141 WrapperOptTable() : opt::GenericOptTable(InfoTable
) {}
144 const OptTable
&getOptTable() {
145 static const WrapperOptTable
*Table
= []() {
146 auto Result
= std::make_unique
<WrapperOptTable
>();
147 return Result
.release();
152 void printCommands(ArrayRef
<StringRef
> CmdArgs
) {
156 llvm::errs() << " \"" << CmdArgs
.front() << "\" ";
157 for (auto IC
= std::next(CmdArgs
.begin()), IE
= CmdArgs
.end(); IC
!= IE
; ++IC
)
158 llvm::errs() << *IC
<< (std::next(IC
) != IE
? " " : "\n");
161 [[noreturn
]] void reportError(Error E
) {
163 logAllUnhandledErrors(std::move(E
),
164 WithColor::error(errs(), LinkerExecutable
));
168 /// Create an extra user-specified \p OffloadFile.
169 /// TODO: We should find a way to wrap these as libraries instead.
170 Expected
<OffloadFile
> getInputBitcodeLibrary(StringRef Input
) {
171 auto [Device
, Path
] = StringRef(Input
).split('=');
172 auto [String
, Arch
] = Device
.rsplit('-');
173 auto [Kind
, Triple
] = String
.split('-');
175 llvm::ErrorOr
<std::unique_ptr
<MemoryBuffer
>> ImageOrError
=
176 llvm::MemoryBuffer::getFileOrSTDIN(Path
);
177 if (std::error_code EC
= ImageOrError
.getError())
178 return createFileError(Path
, EC
);
180 OffloadingImage Image
{};
181 Image
.TheImageKind
= IMG_Bitcode
;
182 Image
.TheOffloadKind
= getOffloadKind(Kind
);
183 Image
.StringData
["triple"] = Triple
;
184 Image
.StringData
["arch"] = Arch
;
185 Image
.Image
= std::move(*ImageOrError
);
187 std::unique_ptr
<MemoryBuffer
> Binary
=
188 MemoryBuffer::getMemBufferCopy(OffloadBinary::write(Image
));
189 auto NewBinaryOrErr
= OffloadBinary::create(*Binary
);
191 return NewBinaryOrErr
.takeError();
192 return OffloadFile(std::move(*NewBinaryOrErr
), std::move(Binary
));
195 std::string
getMainExecutable(const char *Name
) {
196 void *Ptr
= (void *)(intptr_t)&getMainExecutable
;
197 auto COWPath
= sys::fs::getMainExecutable(Name
, Ptr
);
198 return sys::path::parent_path(COWPath
).str();
201 /// Get a temporary filename suitable for output.
202 Expected
<StringRef
> createOutputFile(const Twine
&Prefix
, StringRef Extension
) {
203 std::scoped_lock
<decltype(TempFilesMutex
)> Lock(TempFilesMutex
);
204 SmallString
<128> OutputFile
;
206 (Prefix
+ "." + Extension
).toNullTerminatedStringRef(OutputFile
);
208 if (std::error_code EC
=
209 sys::fs::createTemporaryFile(Prefix
, Extension
, OutputFile
))
210 return createFileError(OutputFile
, EC
);
213 TempFiles
.emplace_back(std::move(OutputFile
));
214 return TempFiles
.back();
217 /// Execute the command \p ExecutablePath with the arguments \p Args.
218 Error
executeCommands(StringRef ExecutablePath
, ArrayRef
<StringRef
> Args
) {
219 if (Verbose
|| DryRun
)
223 if (sys::ExecuteAndWait(ExecutablePath
, Args
))
224 return createStringError(inconvertibleErrorCode(),
225 "'" + sys::path::filename(ExecutablePath
) + "'" +
227 return Error::success();
230 Expected
<std::string
> findProgram(StringRef Name
, ArrayRef
<StringRef
> Paths
) {
232 ErrorOr
<std::string
> Path
= sys::findProgramByName(Name
, Paths
);
234 Path
= sys::findProgramByName(Name
);
238 return createStringError(Path
.getError(),
239 "Unable to find '" + Name
+ "' in path");
243 /// Runs the wrapped linker job with the newly created input.
244 Error
runLinker(ArrayRef
<StringRef
> Files
, const ArgList
&Args
) {
245 llvm::TimeTraceScope
TimeScope("Execute host linker");
247 // Render the linker arguments and add the newly created image. We add it
248 // after the output file to ensure it is linked with the correct libraries.
249 StringRef LinkerPath
= Args
.getLastArgValue(OPT_linker_path_EQ
);
250 ArgStringList NewLinkerArgs
;
251 for (const opt::Arg
*Arg
: Args
) {
252 // Do not forward arguments only intended for the linker wrapper.
253 if (Arg
->getOption().hasFlag(WrapperOnlyOption
))
256 Arg
->render(Args
, NewLinkerArgs
);
257 if (Arg
->getOption().matches(OPT_o
))
258 llvm::transform(Files
, std::back_inserter(NewLinkerArgs
),
259 [&](StringRef Arg
) { return Args
.MakeArgString(Arg
); });
262 SmallVector
<StringRef
> LinkerArgs({LinkerPath
});
263 for (StringRef Arg
: NewLinkerArgs
)
264 LinkerArgs
.push_back(Arg
);
265 if (Error Err
= executeCommands(LinkerPath
, LinkerArgs
))
267 return Error::success();
270 void printVersion(raw_ostream
&OS
) {
271 OS
<< clang::getClangToolFullVersion("clang-linker-wrapper") << '\n';
276 fatbinary(ArrayRef
<std::pair
<StringRef
, StringRef
>> InputFiles
,
277 const ArgList
&Args
) {
278 llvm::TimeTraceScope
TimeScope("NVPTX fatbinary");
279 // NVPTX uses the fatbinary program to bundle the linked images.
280 Expected
<std::string
> FatBinaryPath
=
281 findProgram("fatbinary", {CudaBinaryPath
+ "/bin"});
283 return FatBinaryPath
.takeError();
286 Args
.getLastArgValue(OPT_host_triple_EQ
, sys::getDefaultTargetTriple()));
288 // Create a new file to write the linked device image to.
290 createOutputFile(sys::path::filename(ExecutableName
), "fatbin");
292 return TempFileOrErr
.takeError();
294 SmallVector
<StringRef
, 16> CmdArgs
;
295 CmdArgs
.push_back(*FatBinaryPath
);
296 CmdArgs
.push_back(Triple
.isArch64Bit() ? "-64" : "-32");
297 CmdArgs
.push_back("--create");
298 CmdArgs
.push_back(*TempFileOrErr
);
299 for (const auto &[File
, Arch
] : InputFiles
)
301 Args
.MakeArgString("--image=profile=" + Arch
+ ",file=" + File
));
303 if (Error Err
= executeCommands(*FatBinaryPath
, CmdArgs
))
304 return std::move(Err
);
306 return *TempFileOrErr
;
312 fatbinary(ArrayRef
<std::pair
<StringRef
, StringRef
>> InputFiles
,
313 const ArgList
&Args
) {
314 llvm::TimeTraceScope
TimeScope("AMDGPU Fatbinary");
316 // AMDGPU uses the clang-offload-bundler to bundle the linked images.
317 Expected
<std::string
> OffloadBundlerPath
= findProgram(
318 "clang-offload-bundler", {getMainExecutable("clang-offload-bundler")});
319 if (!OffloadBundlerPath
)
320 return OffloadBundlerPath
.takeError();
323 Args
.getLastArgValue(OPT_host_triple_EQ
, sys::getDefaultTargetTriple()));
325 // Create a new file to write the linked device image to.
327 createOutputFile(sys::path::filename(ExecutableName
), "hipfb");
329 return TempFileOrErr
.takeError();
331 BumpPtrAllocator Alloc
;
332 StringSaver
Saver(Alloc
);
334 SmallVector
<StringRef
, 16> CmdArgs
;
335 CmdArgs
.push_back(*OffloadBundlerPath
);
336 CmdArgs
.push_back("-type=o");
337 CmdArgs
.push_back("-bundle-align=4096");
339 SmallVector
<StringRef
> Targets
= {"-targets=host-x86_64-unknown-linux"};
340 for (const auto &[File
, Arch
] : InputFiles
)
341 Targets
.push_back(Saver
.save("hipv4-amdgcn-amd-amdhsa--" + Arch
));
342 CmdArgs
.push_back(Saver
.save(llvm::join(Targets
, ",")));
344 CmdArgs
.push_back("-input=/dev/null");
345 for (const auto &[File
, Arch
] : InputFiles
)
346 CmdArgs
.push_back(Saver
.save("-input=" + File
));
348 CmdArgs
.push_back(Saver
.save("-output=" + *TempFileOrErr
));
350 if (Error Err
= executeCommands(*OffloadBundlerPath
, CmdArgs
))
351 return std::move(Err
);
353 return *TempFileOrErr
;
355 } // namespace amdgcn
358 Expected
<StringRef
> clang(ArrayRef
<StringRef
> InputFiles
, const ArgList
&Args
) {
359 llvm::TimeTraceScope
TimeScope("Clang");
360 // Use `clang` to invoke the appropriate device tools.
361 Expected
<std::string
> ClangPath
=
362 findProgram("clang", {getMainExecutable("clang")});
364 return ClangPath
.takeError();
366 const llvm::Triple
Triple(Args
.getLastArgValue(OPT_triple_EQ
));
367 StringRef Arch
= Args
.getLastArgValue(OPT_arch_EQ
);
370 // Create a new file to write the linked device image to. Assume that the
371 // input filename already has the device and architecture.
373 createOutputFile(sys::path::filename(ExecutableName
) + "." +
374 Triple
.getArchName() + "." + Arch
,
377 return TempFileOrErr
.takeError();
379 StringRef OptLevel
= Args
.getLastArgValue(OPT_opt_level
, "O2");
380 SmallVector
<StringRef
, 16> CmdArgs
{
384 Args
.MakeArgString("--target=" + Triple
.getTriple()),
385 Triple
.isAMDGPU() ? Args
.MakeArgString("-mcpu=" + Arch
)
386 : Args
.MakeArgString("-march=" + Arch
),
387 Args
.MakeArgString("-" + OptLevel
),
388 "-Wl,--no-undefined",
391 for (StringRef InputFile
: InputFiles
)
392 CmdArgs
.push_back(InputFile
);
394 // If this is CPU offloading we copy the input libraries.
395 if (!Triple
.isAMDGPU() && !Triple
.isNVPTX()) {
396 CmdArgs
.push_back("-Wl,-Bsymbolic");
397 CmdArgs
.push_back("-shared");
398 ArgStringList LinkerArgs
;
399 for (const opt::Arg
*Arg
: Args
.filtered(OPT_library
, OPT_library_path
))
400 Arg
->render(Args
, LinkerArgs
);
401 for (const opt::Arg
*Arg
: Args
.filtered(OPT_rpath
))
402 LinkerArgs
.push_back(
403 Args
.MakeArgString("-Wl,-rpath," + StringRef(Arg
->getValue())));
404 llvm::copy(LinkerArgs
, std::back_inserter(CmdArgs
));
407 // Pass on -mllvm options to the clang invocation.
408 for (const opt::Arg
*Arg
: Args
.filtered(OPT_mllvm
)) {
409 CmdArgs
.push_back("-mllvm");
410 CmdArgs
.push_back(Arg
->getValue());
413 if (Args
.hasArg(OPT_debug
))
414 CmdArgs
.push_back("-g");
417 CmdArgs
.push_back("-save-temps");
420 CmdArgs
.push_back("-v");
422 if (!CudaBinaryPath
.empty())
423 CmdArgs
.push_back(Args
.MakeArgString("--cuda-path=" + CudaBinaryPath
));
425 for (StringRef Arg
: Args
.getAllArgValues(OPT_ptxas_arg
))
426 llvm::copy(SmallVector
<StringRef
>({"-Xcuda-ptxas", Arg
}),
427 std::back_inserter(CmdArgs
));
429 for (StringRef Arg
: Args
.getAllArgValues(OPT_linker_arg_EQ
))
430 CmdArgs
.push_back(Args
.MakeArgString("-Wl," + Arg
));
432 for (StringRef Arg
: Args
.getAllArgValues(OPT_builtin_bitcode_EQ
)) {
433 if (llvm::Triple(Arg
.split('=').first
) == Triple
)
434 CmdArgs
.append({"-Xclang", "-mlink-builtin-bitcode", "-Xclang",
435 Args
.MakeArgString(Arg
.split('=').second
)});
438 // The OpenMPOpt pass can introduce new calls and is expensive, we do not want
439 // this when running CodeGen through clang.
440 if (Args
.hasArg(OPT_clang_backend
) || Args
.hasArg(OPT_builtin_bitcode_EQ
))
441 CmdArgs
.append({"-mllvm", "-openmp-opt-disable"});
443 if (Error Err
= executeCommands(*ClangPath
, CmdArgs
))
444 return std::move(Err
);
446 return *TempFileOrErr
;
448 } // namespace generic
450 Expected
<StringRef
> linkDevice(ArrayRef
<StringRef
> InputFiles
,
451 const ArgList
&Args
) {
452 const llvm::Triple
Triple(Args
.getLastArgValue(OPT_triple_EQ
));
453 switch (Triple
.getArch()) {
455 case Triple::nvptx64
:
459 case Triple::aarch64
:
460 case Triple::aarch64_be
:
462 case Triple::ppc64le
:
463 return generic::clang(InputFiles
, Args
);
465 return createStringError(inconvertibleErrorCode(),
466 Triple
.getArchName() +
467 " linking is not supported");
471 void diagnosticHandler(const DiagnosticInfo
&DI
) {
472 std::string ErrStorage
;
473 raw_string_ostream
OS(ErrStorage
);
474 DiagnosticPrinterRawOStream
DP(OS
);
477 switch (DI
.getSeverity()) {
479 WithColor::error(errs(), LinkerExecutable
) << ErrStorage
<< "\n";
483 WithColor::warning(errs(), LinkerExecutable
) << ErrStorage
<< "\n";
486 WithColor::note(errs(), LinkerExecutable
) << ErrStorage
<< "\n";
489 WithColor::remark(errs()) << ErrStorage
<< "\n";
494 // Get the list of target features from the input file and unify them such that
495 // if there are multiple +xxx or -xxx features we only keep the last one.
496 std::vector
<std::string
> getTargetFeatures(ArrayRef
<OffloadFile
> InputFiles
) {
497 SmallVector
<StringRef
> Features
;
498 for (const OffloadFile
&File
: InputFiles
) {
499 for (auto Arg
: llvm::split(File
.getBinary()->getString("feature"), ","))
500 Features
.emplace_back(Arg
);
503 // Only add a feature if it hasn't been seen before starting from the end.
504 std::vector
<std::string
> UnifiedFeatures
;
505 DenseSet
<StringRef
> UsedFeatures
;
506 for (StringRef Feature
: llvm::reverse(Features
)) {
507 if (UsedFeatures
.insert(Feature
.drop_front()).second
)
508 UnifiedFeatures
.push_back(Feature
.str());
511 return UnifiedFeatures
;
514 template <typename ModuleHook
= function_ref
<bool(size_t, const Module
&)>>
515 std::unique_ptr
<lto::LTO
> createLTO(
516 const ArgList
&Args
, const std::vector
<std::string
> &Features
,
517 ModuleHook Hook
= [](size_t, const Module
&) { return true; }) {
518 const llvm::Triple
Triple(Args
.getLastArgValue(OPT_triple_EQ
));
519 StringRef Arch
= Args
.getLastArgValue(OPT_arch_EQ
);
521 lto::ThinBackend Backend
;
522 // TODO: Handle index-only thin-LTO
524 lto::createInProcessThinBackend(llvm::heavyweight_hardware_concurrency());
526 Conf
.CPU
= Arch
.str();
527 Conf
.Options
= codegen::InitTargetOptionsFromCodeGenFlags(Triple
);
529 StringRef OptLevel
= Args
.getLastArgValue(OPT_opt_level
, "O2");
530 Conf
.MAttrs
= Features
;
531 std::optional
<CodeGenOpt::Level
> CGOptLevelOrNone
=
532 CodeGenOpt::parseLevel(OptLevel
[1]);
533 assert(CGOptLevelOrNone
&& "Invalid optimization level");
534 Conf
.CGOptLevel
= *CGOptLevelOrNone
;
535 Conf
.OptLevel
= OptLevel
[1] - '0';
536 Conf
.DefaultTriple
= Triple
.getTriple();
539 Conf
.DiagHandler
= diagnosticHandler
;
541 Conf
.PTO
.LoopVectorization
= Conf
.OptLevel
> 1;
542 Conf
.PTO
.SLPVectorization
= Conf
.OptLevel
> 1;
545 std::string TempName
= (sys::path::filename(ExecutableName
) + "." +
546 Triple
.getTriple() + "." + Arch
)
548 Conf
.PostInternalizeModuleHook
= [=](size_t Task
, const Module
&M
) {
550 !Task
? TempName
+ ".postlink.bc"
551 : TempName
+ "." + std::to_string(Task
) + ".postlink.bc";
553 raw_fd_ostream
LinkedBitcode(File
, EC
, sys::fs::OF_None
);
555 reportError(errorCodeToError(EC
));
556 WriteBitcodeToFile(M
, LinkedBitcode
);
559 Conf
.PreCodeGenModuleHook
= [=](size_t Task
, const Module
&M
) {
561 !Task
? TempName
+ ".postopt.bc"
562 : TempName
+ "." + std::to_string(Task
) + ".postopt.bc";
564 raw_fd_ostream
LinkedBitcode(File
, EC
, sys::fs::OF_None
);
566 reportError(errorCodeToError(EC
));
567 WriteBitcodeToFile(M
, LinkedBitcode
);
571 Conf
.PostOptModuleHook
= Hook
;
573 (Triple
.isNVPTX() || SaveTemps
) ? CGFT_AssemblyFile
: CGFT_ObjectFile
;
575 // TODO: Handle remark files
576 Conf
.HasWholeProgramVisibility
= Args
.hasArg(OPT_whole_program
);
578 return std::make_unique
<lto::LTO
>(std::move(Conf
), Backend
);
581 // Returns true if \p S is valid as a C language identifier and will be given
582 // `__start_` and `__stop_` symbols.
583 bool isValidCIdentifier(StringRef S
) {
584 return !S
.empty() && (isAlpha(S
[0]) || S
[0] == '_') &&
585 llvm::all_of(llvm::drop_begin(S
),
586 [](char C
) { return C
== '_' || isAlnum(C
); });
589 Error
linkBitcodeFiles(SmallVectorImpl
<OffloadFile
> &InputFiles
,
590 SmallVectorImpl
<StringRef
> &OutputFiles
,
591 const ArgList
&Args
) {
592 llvm::TimeTraceScope
TimeScope("Link bitcode files");
593 const llvm::Triple
Triple(Args
.getLastArgValue(OPT_triple_EQ
));
594 StringRef Arch
= Args
.getLastArgValue(OPT_arch_EQ
);
596 SmallVector
<OffloadFile
, 4> BitcodeInputFiles
;
597 DenseSet
<StringRef
> UsedInRegularObj
;
598 DenseSet
<StringRef
> UsedInSharedLib
;
599 BumpPtrAllocator Alloc
;
600 StringSaver
Saver(Alloc
);
602 // Search for bitcode files in the input and create an LTO input file. If it
603 // is not a bitcode file, scan its symbol table for symbols we need to save.
604 for (OffloadFile
&File
: InputFiles
) {
605 MemoryBufferRef Buffer
= MemoryBufferRef(File
.getBinary()->getImage(), "");
607 file_magic Type
= identify_magic(Buffer
.getBuffer());
609 case file_magic::bitcode
: {
610 BitcodeInputFiles
.emplace_back(std::move(File
));
613 case file_magic::elf_relocatable
:
614 case file_magic::elf_shared_object
: {
615 Expected
<std::unique_ptr
<ObjectFile
>> ObjFile
=
616 ObjectFile::createObjectFile(Buffer
);
620 for (SymbolRef Sym
: (*ObjFile
)->symbols()) {
621 Expected
<StringRef
> Name
= Sym
.getName();
623 return Name
.takeError();
625 // Record if we've seen these symbols in any object or shared libraries.
626 if ((*ObjFile
)->isRelocatableObject())
627 UsedInRegularObj
.insert(Saver
.save(*Name
));
629 UsedInSharedLib
.insert(Saver
.save(*Name
));
638 if (BitcodeInputFiles
.empty())
639 return Error::success();
641 // Remove all the bitcode files that we moved from the original input.
642 llvm::erase_if(InputFiles
, [](OffloadFile
&F
) { return !F
.getBinary(); });
644 // LTO Module hook to output bitcode without running the backend.
645 SmallVector
<StringRef
> BitcodeOutput
;
646 auto OutputBitcode
= [&](size_t, const Module
&M
) {
647 auto TempFileOrErr
= createOutputFile(sys::path::filename(ExecutableName
) +
648 "-jit-" + Triple
.getTriple(),
651 reportError(TempFileOrErr
.takeError());
654 raw_fd_ostream
LinkedBitcode(*TempFileOrErr
, EC
, sys::fs::OF_None
);
656 reportError(errorCodeToError(EC
));
657 WriteBitcodeToFile(M
, LinkedBitcode
);
658 BitcodeOutput
.push_back(*TempFileOrErr
);
662 // We assume visibility of the whole program if every input file was bitcode.
663 auto Features
= getTargetFeatures(BitcodeInputFiles
);
664 auto LTOBackend
= Args
.hasArg(OPT_embed_bitcode
) ||
665 Args
.hasArg(OPT_builtin_bitcode_EQ
) ||
666 Args
.hasArg(OPT_clang_backend
)
667 ? createLTO(Args
, Features
, OutputBitcode
)
668 : createLTO(Args
, Features
);
670 // We need to resolve the symbols so the LTO backend knows which symbols need
671 // to be kept or can be internalized. This is a simplified symbol resolution
672 // scheme to approximate the full resolution a linker would do.
674 DenseSet
<StringRef
> PrevailingSymbols
;
675 for (auto &BitcodeInput
: BitcodeInputFiles
) {
676 // Get a semi-unique buffer identifier for Thin-LTO.
677 StringRef Identifier
= Saver
.save(
678 std::to_string(Idx
++) + "." +
679 BitcodeInput
.getBinary()->getMemoryBufferRef().getBufferIdentifier());
680 MemoryBufferRef Buffer
=
681 MemoryBufferRef(BitcodeInput
.getBinary()->getImage(), Identifier
);
682 Expected
<std::unique_ptr
<lto::InputFile
>> BitcodeFileOrErr
=
683 llvm::lto::InputFile::create(Buffer
);
684 if (!BitcodeFileOrErr
)
685 return BitcodeFileOrErr
.takeError();
687 // Save the input file and the buffer associated with its memory.
688 const auto Symbols
= (*BitcodeFileOrErr
)->symbols();
689 SmallVector
<lto::SymbolResolution
, 16> Resolutions(Symbols
.size());
691 for (auto &Sym
: Symbols
) {
692 lto::SymbolResolution
&Res
= Resolutions
[Idx
++];
694 // We will use this as the prevailing symbol definition in LTO unless
695 // it is undefined or another definition has already been used.
697 !Sym
.isUndefined() &&
698 PrevailingSymbols
.insert(Saver
.save(Sym
.getName())).second
;
700 // We need LTO to preseve the following global symbols:
701 // 1) Symbols used in regular objects.
702 // 2) Sections that will be given a __start/__stop symbol.
703 // 3) Prevailing symbols that are needed visible to external libraries.
704 Res
.VisibleToRegularObj
=
705 UsedInRegularObj
.contains(Sym
.getName()) ||
706 isValidCIdentifier(Sym
.getSectionName()) ||
708 (Sym
.getVisibility() != GlobalValue::HiddenVisibility
&&
709 !Sym
.canBeOmittedFromSymbolTable()));
711 // Identify symbols that must be exported dynamically and can be
712 // referenced by other files.
714 Sym
.getVisibility() != GlobalValue::HiddenVisibility
&&
715 (UsedInSharedLib
.contains(Sym
.getName()) ||
716 !Sym
.canBeOmittedFromSymbolTable());
718 // The final definition will reside in this linkage unit if the symbol is
719 // defined and local to the module. This only checks for bitcode files,
720 // full assertion will require complete symbol resolution.
721 Res
.FinalDefinitionInLinkageUnit
=
722 Sym
.getVisibility() != GlobalValue::DefaultVisibility
&&
723 (!Sym
.isUndefined() && !Sym
.isCommon());
725 // We do not support linker redefined symbols (e.g. --wrap) for device
726 // image linking, so the symbols will not be changed after LTO.
727 Res
.LinkerRedefined
= false;
730 // Add the bitcode file with its resolved symbols to the LTO job.
731 if (Error Err
= LTOBackend
->add(std::move(*BitcodeFileOrErr
), Resolutions
))
735 // Run the LTO job to compile the bitcode.
736 size_t MaxTasks
= LTOBackend
->getMaxTasks();
737 SmallVector
<StringRef
> Files(MaxTasks
);
740 const Twine
&ModuleName
) -> std::unique_ptr
<CachedFileStream
> {
742 auto &TempFile
= Files
[Task
];
743 StringRef Extension
= (Triple
.isNVPTX() || SaveTemps
) ? "s" : "o";
744 std::string TaskStr
= Task
? "." + std::to_string(Task
) : "";
746 createOutputFile(sys::path::filename(ExecutableName
) + "." +
747 Triple
.getTriple() + "." + Arch
+ TaskStr
,
750 reportError(TempFileOrErr
.takeError());
751 TempFile
= *TempFileOrErr
;
752 if (std::error_code EC
= sys::fs::openFileForWrite(TempFile
, FD
))
753 reportError(errorCodeToError(EC
));
754 return std::make_unique
<CachedFileStream
>(
755 std::make_unique
<llvm::raw_fd_ostream
>(FD
, true));
758 if (Error Err
= LTOBackend
->run(AddStream
))
762 return createStringError(inconvertibleErrorCode(),
763 "Errors encountered inside the LTO pipeline.");
765 // If we are embedding bitcode we only need the intermediate output.
766 bool SingleOutput
= Files
.size() == 1;
767 if (Args
.hasArg(OPT_embed_bitcode
)) {
768 if (BitcodeOutput
.size() != 1 || !SingleOutput
)
769 return createStringError(inconvertibleErrorCode(),
770 "Cannot embed bitcode with multiple files.");
771 OutputFiles
.push_back(Args
.MakeArgString(BitcodeOutput
.front()));
772 return Error::success();
775 // Append the new inputs to the device linker input. If the user requested an
776 // internalizing link we need to pass the bitcode to clang.
777 for (StringRef File
:
778 Args
.hasArg(OPT_clang_backend
) || Args
.hasArg(OPT_builtin_bitcode_EQ
)
781 OutputFiles
.push_back(File
);
783 return Error::success();
786 Expected
<StringRef
> writeOffloadFile(const OffloadFile
&File
) {
787 const OffloadBinary
&Binary
= *File
.getBinary();
790 sys::path::stem(Binary
.getMemoryBufferRef().getBufferIdentifier());
791 StringRef Suffix
= getImageKindName(Binary
.getImageKind());
793 auto TempFileOrErr
= createOutputFile(
794 Prefix
+ "-" + Binary
.getTriple() + "-" + Binary
.getArch(), Suffix
);
796 return TempFileOrErr
.takeError();
798 Expected
<std::unique_ptr
<FileOutputBuffer
>> OutputOrErr
=
799 FileOutputBuffer::create(*TempFileOrErr
, Binary
.getImage().size());
801 return OutputOrErr
.takeError();
802 std::unique_ptr
<FileOutputBuffer
> Output
= std::move(*OutputOrErr
);
803 llvm::copy(Binary
.getImage(), Output
->getBufferStart());
804 if (Error E
= Output
->commit())
807 return *TempFileOrErr
;
810 // Compile the module to an object file using the appropriate target machine for
812 Expected
<StringRef
> compileModule(Module
&M
) {
813 llvm::TimeTraceScope
TimeScope("Compile module");
815 const Target
*T
= TargetRegistry::lookupTarget(M
.getTargetTriple(), Msg
);
817 return createStringError(inconvertibleErrorCode(), Msg
);
820 codegen::InitTargetOptionsFromCodeGenFlags(Triple(M
.getTargetTriple()));
822 StringRef Features
= "";
823 std::unique_ptr
<TargetMachine
> TM(
824 T
->createTargetMachine(M
.getTargetTriple(), CPU
, Features
, Options
,
825 Reloc::PIC_
, M
.getCodeModel()));
827 if (M
.getDataLayout().isDefault())
828 M
.setDataLayout(TM
->createDataLayout());
831 auto TempFileOrErr
= createOutputFile(
832 sys::path::filename(ExecutableName
) + ".image.wrapper", "o");
834 return TempFileOrErr
.takeError();
835 if (std::error_code EC
= sys::fs::openFileForWrite(*TempFileOrErr
, FD
))
836 return errorCodeToError(EC
);
838 auto OS
= std::make_unique
<llvm::raw_fd_ostream
>(FD
, true);
840 legacy::PassManager CodeGenPasses
;
841 TargetLibraryInfoImpl
TLII(Triple(M
.getTargetTriple()));
842 CodeGenPasses
.add(new TargetLibraryInfoWrapperPass(TLII
));
843 if (TM
->addPassesToEmitFile(CodeGenPasses
, *OS
, nullptr, CGFT_ObjectFile
))
844 return createStringError(inconvertibleErrorCode(),
845 "Failed to execute host backend");
846 CodeGenPasses
.run(M
);
848 return *TempFileOrErr
;
851 /// Creates the object file containing the device image and runtime
852 /// registration code from the device images stored in \p Images.
854 wrapDeviceImages(ArrayRef
<std::unique_ptr
<MemoryBuffer
>> Buffers
,
855 const ArgList
&Args
, OffloadKind Kind
) {
856 llvm::TimeTraceScope
TimeScope("Wrap bundled images");
858 SmallVector
<ArrayRef
<char>, 4> BuffersToWrap
;
859 for (const auto &Buffer
: Buffers
)
860 BuffersToWrap
.emplace_back(
861 ArrayRef
<char>(Buffer
->getBufferStart(), Buffer
->getBufferSize()));
864 Module
M("offload.wrapper.module", Context
);
866 Args
.getLastArgValue(OPT_host_triple_EQ
, sys::getDefaultTargetTriple()));
870 if (Error Err
= wrapOpenMPBinaries(M
, BuffersToWrap
))
871 return std::move(Err
);
874 if (Error Err
= wrapCudaBinary(M
, BuffersToWrap
.front()))
875 return std::move(Err
);
878 if (Error Err
= wrapHIPBinary(M
, BuffersToWrap
.front()))
879 return std::move(Err
);
882 return createStringError(inconvertibleErrorCode(),
883 getOffloadKindName(Kind
) +
884 " wrapping is not supported");
887 if (Args
.hasArg(OPT_print_wrapped_module
))
889 if (Args
.hasArg(OPT_save_temps
)) {
892 createOutputFile(sys::path::filename(ExecutableName
) + "." +
893 getOffloadKindName(Kind
) + ".image.wrapper",
896 return TempFileOrErr
.takeError();
897 if (std::error_code EC
= sys::fs::openFileForWrite(*TempFileOrErr
, FD
))
898 return errorCodeToError(EC
);
899 llvm::raw_fd_ostream
OS(FD
, true);
900 WriteBitcodeToFile(M
, OS
);
903 auto FileOrErr
= compileModule(M
);
905 return FileOrErr
.takeError();
909 Expected
<SmallVector
<std::unique_ptr
<MemoryBuffer
>>>
910 bundleOpenMP(ArrayRef
<OffloadingImage
> Images
) {
911 SmallVector
<std::unique_ptr
<MemoryBuffer
>> Buffers
;
912 for (const OffloadingImage
&Image
: Images
)
913 Buffers
.emplace_back(
914 MemoryBuffer::getMemBufferCopy(OffloadBinary::write(Image
)));
916 return std::move(Buffers
);
919 Expected
<SmallVector
<std::unique_ptr
<MemoryBuffer
>>>
920 bundleCuda(ArrayRef
<OffloadingImage
> Images
, const ArgList
&Args
) {
921 SmallVector
<std::pair
<StringRef
, StringRef
>, 4> InputFiles
;
922 for (const OffloadingImage
&Image
: Images
)
923 InputFiles
.emplace_back(std::make_pair(Image
.Image
->getBufferIdentifier(),
924 Image
.StringData
.lookup("arch")));
926 Triple TheTriple
= Triple(Images
.front().StringData
.lookup("triple"));
927 auto FileOrErr
= nvptx::fatbinary(InputFiles
, Args
);
929 return FileOrErr
.takeError();
931 llvm::ErrorOr
<std::unique_ptr
<llvm::MemoryBuffer
>> ImageOrError
=
932 llvm::MemoryBuffer::getFileOrSTDIN(*FileOrErr
);
934 SmallVector
<std::unique_ptr
<MemoryBuffer
>> Buffers
;
935 if (std::error_code EC
= ImageOrError
.getError())
936 return createFileError(*FileOrErr
, EC
);
937 Buffers
.emplace_back(std::move(*ImageOrError
));
939 return std::move(Buffers
);
942 Expected
<SmallVector
<std::unique_ptr
<MemoryBuffer
>>>
943 bundleHIP(ArrayRef
<OffloadingImage
> Images
, const ArgList
&Args
) {
944 SmallVector
<std::pair
<StringRef
, StringRef
>, 4> InputFiles
;
945 for (const OffloadingImage
&Image
: Images
)
946 InputFiles
.emplace_back(std::make_pair(Image
.Image
->getBufferIdentifier(),
947 Image
.StringData
.lookup("arch")));
949 Triple TheTriple
= Triple(Images
.front().StringData
.lookup("triple"));
950 auto FileOrErr
= amdgcn::fatbinary(InputFiles
, Args
);
952 return FileOrErr
.takeError();
954 llvm::ErrorOr
<std::unique_ptr
<llvm::MemoryBuffer
>> ImageOrError
=
955 llvm::MemoryBuffer::getFileOrSTDIN(*FileOrErr
);
957 SmallVector
<std::unique_ptr
<MemoryBuffer
>> Buffers
;
958 if (std::error_code EC
= ImageOrError
.getError())
959 return createFileError(*FileOrErr
, EC
);
960 Buffers
.emplace_back(std::move(*ImageOrError
));
962 return std::move(Buffers
);
965 /// Transforms the input \p Images into the binary format the runtime expects
966 /// for the given \p Kind.
967 Expected
<SmallVector
<std::unique_ptr
<MemoryBuffer
>>>
968 bundleLinkedOutput(ArrayRef
<OffloadingImage
> Images
, const ArgList
&Args
,
970 llvm::TimeTraceScope
TimeScope("Bundle linked output");
973 return bundleOpenMP(Images
);
975 return bundleCuda(Images
, Args
);
977 return bundleHIP(Images
, Args
);
979 return createStringError(inconvertibleErrorCode(),
980 getOffloadKindName(Kind
) +
981 " bundling is not supported");
985 /// Returns a new ArgList containg arguments used for the device linking phase.
986 DerivedArgList
getLinkerArgs(ArrayRef
<OffloadFile
> Input
,
987 const InputArgList
&Args
) {
988 DerivedArgList DAL
= DerivedArgList(DerivedArgList(Args
));
992 // Set the subarchitecture and target triple for this compilation.
993 const OptTable
&Tbl
= getOptTable();
994 DAL
.AddJoinedArg(nullptr, Tbl
.getOption(OPT_arch_EQ
),
995 Args
.MakeArgString(Input
.front().getBinary()->getArch()));
996 DAL
.AddJoinedArg(nullptr, Tbl
.getOption(OPT_triple_EQ
),
997 Args
.MakeArgString(Input
.front().getBinary()->getTriple()));
999 // If every input file is bitcode we have whole program visibility as we do
1000 // only support static linking with bitcode.
1001 auto ContainsBitcode
= [](const OffloadFile
&F
) {
1002 return identify_magic(F
.getBinary()->getImage()) == file_magic::bitcode
;
1004 if (llvm::all_of(Input
, ContainsBitcode
))
1005 DAL
.AddFlagArg(nullptr, Tbl
.getOption(OPT_whole_program
));
1007 // Forward '-Xoffload-linker' options to the appropriate backend.
1008 for (StringRef Arg
: Args
.getAllArgValues(OPT_device_linker_args_EQ
)) {
1009 auto [Triple
, Value
] = Arg
.split('=');
1011 DAL
.AddJoinedArg(nullptr, Tbl
.getOption(OPT_linker_arg_EQ
),
1012 Args
.MakeArgString(Triple
));
1013 else if (Triple
== DAL
.getLastArgValue(OPT_triple_EQ
))
1014 DAL
.AddJoinedArg(nullptr, Tbl
.getOption(OPT_linker_arg_EQ
),
1015 Args
.MakeArgString(Value
));
1021 /// Transforms all the extracted offloading input files into an image that can
1022 /// be registered by the runtime.
1023 Expected
<SmallVector
<StringRef
>>
1024 linkAndWrapDeviceFiles(SmallVectorImpl
<OffloadFile
> &LinkerInputFiles
,
1025 const InputArgList
&Args
, char **Argv
, int Argc
) {
1026 llvm::TimeTraceScope
TimeScope("Handle all device input");
1028 DenseMap
<OffloadFile::TargetID
, SmallVector
<OffloadFile
>> InputMap
;
1029 for (auto &File
: LinkerInputFiles
)
1030 InputMap
[File
].emplace_back(std::move(File
));
1031 LinkerInputFiles
.clear();
1033 SmallVector
<SmallVector
<OffloadFile
>> InputsForTarget
;
1034 for (auto &[ID
, Input
] : InputMap
)
1035 InputsForTarget
.emplace_back(std::move(Input
));
1038 std::mutex ImageMtx
;
1039 DenseMap
<OffloadKind
, SmallVector
<OffloadingImage
>> Images
;
1040 auto Err
= parallelForEachError(InputsForTarget
, [&](auto &Input
) -> Error
{
1041 llvm::TimeTraceScope
TimeScope("Link device input");
1043 // Each thread needs its own copy of the base arguments to maintain
1044 // per-device argument storage of synthetic strings.
1045 const OptTable
&Tbl
= getOptTable();
1046 BumpPtrAllocator Alloc
;
1047 StringSaver
Saver(Alloc
);
1049 Tbl
.parseArgs(Argc
, Argv
, OPT_INVALID
, Saver
, [](StringRef Err
) {
1050 reportError(createStringError(inconvertibleErrorCode(), Err
));
1052 auto LinkerArgs
= getLinkerArgs(Input
, BaseArgs
);
1054 DenseSet
<OffloadKind
> ActiveOffloadKinds
;
1055 for (const auto &File
: Input
)
1056 if (File
.getBinary()->getOffloadKind() != OFK_None
)
1057 ActiveOffloadKinds
.insert(File
.getBinary()->getOffloadKind());
1059 // First link and remove all the input files containing bitcode.
1060 SmallVector
<StringRef
> InputFiles
;
1061 if (Error Err
= linkBitcodeFiles(Input
, InputFiles
, LinkerArgs
))
1064 // Write any remaining device inputs to an output file for the linker.
1065 for (const OffloadFile
&File
: Input
) {
1066 auto FileNameOrErr
= writeOffloadFile(File
);
1068 return FileNameOrErr
.takeError();
1069 InputFiles
.emplace_back(*FileNameOrErr
);
1072 // Link the remaining device files using the device linker.
1073 auto OutputOrErr
= !Args
.hasArg(OPT_embed_bitcode
)
1074 ? linkDevice(InputFiles
, LinkerArgs
)
1075 : InputFiles
.front();
1077 return OutputOrErr
.takeError();
1079 // Store the offloading image for each linked output file.
1080 for (OffloadKind Kind
: ActiveOffloadKinds
) {
1081 llvm::ErrorOr
<std::unique_ptr
<llvm::MemoryBuffer
>> FileOrErr
=
1082 llvm::MemoryBuffer::getFileOrSTDIN(*OutputOrErr
);
1083 if (std::error_code EC
= FileOrErr
.getError()) {
1085 FileOrErr
= MemoryBuffer::getMemBuffer("");
1087 return createFileError(*OutputOrErr
, EC
);
1090 std::scoped_lock
<decltype(ImageMtx
)> Guard(ImageMtx
);
1091 OffloadingImage TheImage
{};
1092 TheImage
.TheImageKind
=
1093 Args
.hasArg(OPT_embed_bitcode
) ? IMG_Bitcode
: IMG_Object
;
1094 TheImage
.TheOffloadKind
= Kind
;
1095 TheImage
.StringData
["triple"] =
1096 Args
.MakeArgString(LinkerArgs
.getLastArgValue(OPT_triple_EQ
));
1097 TheImage
.StringData
["arch"] =
1098 Args
.MakeArgString(LinkerArgs
.getLastArgValue(OPT_arch_EQ
));
1099 TheImage
.Image
= std::move(*FileOrErr
);
1101 Images
[Kind
].emplace_back(std::move(TheImage
));
1103 return Error::success();
1106 return std::move(Err
);
1108 // Create a binary image of each offloading image and embed it into a new
1110 SmallVector
<StringRef
> WrappedOutput
;
1111 for (auto &[Kind
, Input
] : Images
) {
1112 // We sort the entries before bundling so they appear in a deterministic
1113 // order in the final binary.
1114 llvm::sort(Input
, [](OffloadingImage
&A
, OffloadingImage
&B
) {
1115 return A
.StringData
["triple"] > B
.StringData
["triple"] ||
1116 A
.StringData
["arch"] > B
.StringData
["arch"] ||
1117 A
.TheOffloadKind
< B
.TheOffloadKind
;
1119 auto BundledImagesOrErr
= bundleLinkedOutput(Input
, Args
, Kind
);
1120 if (!BundledImagesOrErr
)
1121 return BundledImagesOrErr
.takeError();
1122 auto OutputOrErr
= wrapDeviceImages(*BundledImagesOrErr
, Args
, Kind
);
1124 return OutputOrErr
.takeError();
1125 WrappedOutput
.push_back(*OutputOrErr
);
1128 return WrappedOutput
;
1131 std::optional
<std::string
> findFile(StringRef Dir
, StringRef Root
,
1132 const Twine
&Name
) {
1133 SmallString
<128> Path
;
1134 if (Dir
.startswith("="))
1135 sys::path::append(Path
, Root
, Dir
.substr(1), Name
);
1137 sys::path::append(Path
, Dir
, Name
);
1139 if (sys::fs::exists(Path
))
1140 return static_cast<std::string
>(Path
);
1141 return std::nullopt
;
1144 std::optional
<std::string
>
1145 findFromSearchPaths(StringRef Name
, StringRef Root
,
1146 ArrayRef
<StringRef
> SearchPaths
) {
1147 for (StringRef Dir
: SearchPaths
)
1148 if (std::optional
<std::string
> File
= findFile(Dir
, Root
, Name
))
1150 return std::nullopt
;
1153 std::optional
<std::string
>
1154 searchLibraryBaseName(StringRef Name
, StringRef Root
,
1155 ArrayRef
<StringRef
> SearchPaths
) {
1156 for (StringRef Dir
: SearchPaths
) {
1157 if (std::optional
<std::string
> File
=
1158 findFile(Dir
, Root
, "lib" + Name
+ ".so"))
1160 if (std::optional
<std::string
> File
=
1161 findFile(Dir
, Root
, "lib" + Name
+ ".a"))
1164 return std::nullopt
;
1167 /// Search for static libraries in the linker's library path given input like
1168 /// `-lfoo` or `-l:libfoo.a`.
1169 std::optional
<std::string
> searchLibrary(StringRef Input
, StringRef Root
,
1170 ArrayRef
<StringRef
> SearchPaths
) {
1171 if (Input
.startswith(":"))
1172 return findFromSearchPaths(Input
.drop_front(), Root
, SearchPaths
);
1173 return searchLibraryBaseName(Input
, Root
, SearchPaths
);
1176 /// Common redeclaration of needed symbol flags.
1177 enum Symbol
: uint32_t {
1179 Sym_Undefined
= 1U << 1,
1183 /// Scan the symbols from a BitcodeFile \p Buffer and record if we need to
1184 /// extract any symbols from it.
1185 Expected
<bool> getSymbolsFromBitcode(MemoryBufferRef Buffer
, OffloadKind Kind
,
1186 bool IsArchive
, StringSaver
&Saver
,
1187 DenseMap
<StringRef
, Symbol
> &Syms
) {
1188 Expected
<IRSymtabFile
> IRSymtabOrErr
= readIRSymtab(Buffer
);
1190 return IRSymtabOrErr
.takeError();
1192 bool ShouldExtract
= !IsArchive
;
1193 DenseMap
<StringRef
, Symbol
> TmpSyms
;
1194 for (unsigned I
= 0; I
!= IRSymtabOrErr
->Mods
.size(); ++I
) {
1195 for (const auto &Sym
: IRSymtabOrErr
->TheReader
.module_symbols(I
)) {
1196 if (Sym
.isFormatSpecific() || !Sym
.isGlobal())
1199 bool NewSymbol
= Syms
.count(Sym
.getName()) == 0;
1200 auto OldSym
= NewSymbol
? Sym_None
: Syms
[Sym
.getName()];
1202 // We will extract if it defines a currenlty undefined non-weak symbol.
1203 bool ResolvesStrongReference
=
1204 ((OldSym
& Sym_Undefined
&& !(OldSym
& Sym_Weak
)) &&
1205 !Sym
.isUndefined());
1206 // We will extract if it defines a new global symbol visible to the host.
1207 // This is only necessary for code targeting an offloading language.
1208 bool NewGlobalSymbol
=
1209 ((NewSymbol
|| (OldSym
& Sym_Undefined
)) && !Sym
.isUndefined() &&
1210 !Sym
.canBeOmittedFromSymbolTable() && Kind
!= object::OFK_None
&&
1211 (Sym
.getVisibility() != GlobalValue::HiddenVisibility
));
1212 ShouldExtract
|= ResolvesStrongReference
| NewGlobalSymbol
;
1214 // Update this symbol in the "table" with the new information.
1215 if (OldSym
& Sym_Undefined
&& !Sym
.isUndefined())
1216 TmpSyms
[Saver
.save(Sym
.getName())] =
1217 static_cast<Symbol
>(OldSym
& ~Sym_Undefined
);
1218 if (Sym
.isUndefined() && NewSymbol
)
1219 TmpSyms
[Saver
.save(Sym
.getName())] =
1220 static_cast<Symbol
>(OldSym
| Sym_Undefined
);
1222 TmpSyms
[Saver
.save(Sym
.getName())] =
1223 static_cast<Symbol
>(OldSym
| Sym_Weak
);
1227 // If the file gets extracted we update the table with the new symbols.
1229 Syms
.insert(std::begin(TmpSyms
), std::end(TmpSyms
));
1231 return ShouldExtract
;
1234 /// Scan the symbols from an ObjectFile \p Obj and record if we need to extract
1235 /// any symbols from it.
1236 Expected
<bool> getSymbolsFromObject(const ObjectFile
&Obj
, OffloadKind Kind
,
1237 bool IsArchive
, StringSaver
&Saver
,
1238 DenseMap
<StringRef
, Symbol
> &Syms
) {
1239 bool ShouldExtract
= !IsArchive
;
1240 DenseMap
<StringRef
, Symbol
> TmpSyms
;
1241 for (SymbolRef Sym
: Obj
.symbols()) {
1242 auto FlagsOrErr
= Sym
.getFlags();
1244 return FlagsOrErr
.takeError();
1246 if (!(*FlagsOrErr
& SymbolRef::SF_Global
) ||
1247 (*FlagsOrErr
& SymbolRef::SF_FormatSpecific
))
1250 auto NameOrErr
= Sym
.getName();
1252 return NameOrErr
.takeError();
1254 bool NewSymbol
= Syms
.count(*NameOrErr
) == 0;
1255 auto OldSym
= NewSymbol
? Sym_None
: Syms
[*NameOrErr
];
1257 // We will extract if it defines a currenlty undefined non-weak symbol.
1258 bool ResolvesStrongReference
= (OldSym
& Sym_Undefined
) &&
1259 !(OldSym
& Sym_Weak
) &&
1260 !(*FlagsOrErr
& SymbolRef::SF_Undefined
);
1262 // We will extract if it defines a new global symbol visible to the host.
1263 // This is only necessary for code targeting an offloading language.
1264 bool NewGlobalSymbol
=
1265 ((NewSymbol
|| (OldSym
& Sym_Undefined
)) &&
1266 !(*FlagsOrErr
& SymbolRef::SF_Undefined
) && Kind
!= object::OFK_None
&&
1267 !(*FlagsOrErr
& SymbolRef::SF_Hidden
));
1268 ShouldExtract
|= ResolvesStrongReference
| NewGlobalSymbol
;
1270 // Update this symbol in the "table" with the new information.
1271 if (OldSym
& Sym_Undefined
&& !(*FlagsOrErr
& SymbolRef::SF_Undefined
))
1272 TmpSyms
[Saver
.save(*NameOrErr
)] =
1273 static_cast<Symbol
>(OldSym
& ~Sym_Undefined
);
1274 if (*FlagsOrErr
& SymbolRef::SF_Undefined
&& NewSymbol
)
1275 TmpSyms
[Saver
.save(*NameOrErr
)] =
1276 static_cast<Symbol
>(OldSym
| Sym_Undefined
);
1277 if (*FlagsOrErr
& SymbolRef::SF_Weak
)
1278 TmpSyms
[Saver
.save(*NameOrErr
)] = static_cast<Symbol
>(OldSym
| Sym_Weak
);
1281 // If the file gets extracted we update the table with the new symbols.
1283 Syms
.insert(std::begin(TmpSyms
), std::end(TmpSyms
));
1285 return ShouldExtract
;
1288 /// Attempt to 'resolve' symbols found in input files. We use this to
1289 /// determine if an archive member needs to be extracted. An archive member
1290 /// will be extracted if any of the following is true.
1291 /// 1) It defines an undefined symbol in a regular object filie.
1292 /// 2) It defines a global symbol without hidden visibility that has not
1293 /// yet been defined.
1294 Expected
<bool> getSymbols(StringRef Image
, OffloadKind Kind
, bool IsArchive
,
1296 DenseMap
<StringRef
, Symbol
> &Syms
) {
1297 MemoryBufferRef Buffer
= MemoryBufferRef(Image
, "");
1298 switch (identify_magic(Image
)) {
1299 case file_magic::bitcode
:
1300 return getSymbolsFromBitcode(Buffer
, Kind
, IsArchive
, Saver
, Syms
);
1301 case file_magic::elf_relocatable
: {
1302 Expected
<std::unique_ptr
<ObjectFile
>> ObjFile
=
1303 ObjectFile::createObjectFile(Buffer
);
1305 return ObjFile
.takeError();
1306 return getSymbolsFromObject(**ObjFile
, Kind
, IsArchive
, Saver
, Syms
);
1313 /// Search the input files and libraries for embedded device offloading code
1314 /// and add it to the list of files to be linked. Files coming from static
1315 /// libraries are only added to the input if they are used by an existing
1317 Expected
<SmallVector
<OffloadFile
>> getDeviceInput(const ArgList
&Args
) {
1318 llvm::TimeTraceScope
TimeScope("ExtractDeviceCode");
1320 StringRef Root
= Args
.getLastArgValue(OPT_sysroot_EQ
);
1321 SmallVector
<StringRef
> LibraryPaths
;
1322 for (const opt::Arg
*Arg
: Args
.filtered(OPT_library_path
))
1323 LibraryPaths
.push_back(Arg
->getValue());
1325 BumpPtrAllocator Alloc
;
1326 StringSaver
Saver(Alloc
);
1328 // Try to extract device code from the linker input files.
1329 SmallVector
<OffloadFile
> InputFiles
;
1330 DenseMap
<OffloadFile::TargetID
, DenseMap
<StringRef
, Symbol
>> Syms
;
1331 bool WholeArchive
= false;
1332 for (const opt::Arg
*Arg
: Args
.filtered(
1333 OPT_INPUT
, OPT_library
, OPT_whole_archive
, OPT_no_whole_archive
)) {
1334 if (Arg
->getOption().matches(OPT_whole_archive
) ||
1335 Arg
->getOption().matches(OPT_no_whole_archive
)) {
1336 WholeArchive
= Arg
->getOption().matches(OPT_whole_archive
);
1340 std::optional
<std::string
> Filename
=
1341 Arg
->getOption().matches(OPT_library
)
1342 ? searchLibrary(Arg
->getValue(), Root
, LibraryPaths
)
1343 : std::string(Arg
->getValue());
1345 if (!Filename
&& Arg
->getOption().matches(OPT_library
))
1346 reportError(createStringError(inconvertibleErrorCode(),
1347 "unable to find library -l%s",
1350 if (!Filename
|| !sys::fs::exists(*Filename
) ||
1351 sys::fs::is_directory(*Filename
))
1354 ErrorOr
<std::unique_ptr
<MemoryBuffer
>> BufferOrErr
=
1355 MemoryBuffer::getFileOrSTDIN(*Filename
);
1356 if (std::error_code EC
= BufferOrErr
.getError())
1357 return createFileError(*Filename
, EC
);
1359 MemoryBufferRef Buffer
= **BufferOrErr
;
1360 if (identify_magic(Buffer
.getBuffer()) == file_magic::elf_shared_object
)
1363 SmallVector
<OffloadFile
> Binaries
;
1364 if (Error Err
= extractOffloadBinaries(Buffer
, Binaries
))
1365 return std::move(Err
);
1367 // We only extract archive members that are needed.
1368 bool IsArchive
= identify_magic(Buffer
.getBuffer()) == file_magic::archive
;
1369 bool Extracted
= true;
1372 for (OffloadFile
&Binary
: Binaries
) {
1373 if (!Binary
.getBinary())
1376 // If we don't have an object file for this architecture do not
1378 if (IsArchive
&& !WholeArchive
&& !Syms
.count(Binary
))
1381 Expected
<bool> ExtractOrErr
=
1382 getSymbols(Binary
.getBinary()->getImage(),
1383 Binary
.getBinary()->getOffloadKind(), IsArchive
, Saver
,
1386 return ExtractOrErr
.takeError();
1388 Extracted
= !WholeArchive
&& *ExtractOrErr
;
1390 if (!IsArchive
|| WholeArchive
|| Extracted
)
1391 InputFiles
.emplace_back(std::move(Binary
));
1393 // If we extracted any files we need to check all the symbols again.
1400 for (StringRef Library
: Args
.getAllArgValues(OPT_bitcode_library_EQ
)) {
1401 auto FileOrErr
= getInputBitcodeLibrary(Library
);
1403 return FileOrErr
.takeError();
1404 InputFiles
.push_back(std::move(*FileOrErr
));
1407 return std::move(InputFiles
);
1412 int main(int Argc
, char **Argv
) {
1413 InitLLVM
X(Argc
, Argv
);
1414 InitializeAllTargetInfos();
1415 InitializeAllTargets();
1416 InitializeAllTargetMCs();
1417 InitializeAllAsmParsers();
1418 InitializeAllAsmPrinters();
1420 LinkerExecutable
= Argv
[0];
1421 sys::PrintStackTraceOnErrorSignal(Argv
[0]);
1423 const OptTable
&Tbl
= getOptTable();
1424 BumpPtrAllocator Alloc
;
1425 StringSaver
Saver(Alloc
);
1426 auto Args
= Tbl
.parseArgs(Argc
, Argv
, OPT_INVALID
, Saver
, [&](StringRef Err
) {
1427 reportError(createStringError(inconvertibleErrorCode(), Err
));
1430 if (Args
.hasArg(OPT_help
) || Args
.hasArg(OPT_help_hidden
)) {
1433 "clang-linker-wrapper [options] -- <options to passed to the linker>",
1434 "\nA wrapper utility over the host linker. It scans the input files\n"
1435 "for sections that require additional processing prior to linking.\n"
1436 "The will then transparently pass all arguments and input to the\n"
1437 "specified host linker to create the final binary.\n",
1438 Args
.hasArg(OPT_help_hidden
), Args
.hasArg(OPT_help_hidden
));
1439 return EXIT_SUCCESS
;
1441 if (Args
.hasArg(OPT_v
)) {
1442 printVersion(outs());
1443 return EXIT_SUCCESS
;
1446 // This forwards '-mllvm' arguments to LLVM if present.
1447 SmallVector
<const char *> NewArgv
= {Argv
[0]};
1448 for (const opt::Arg
*Arg
: Args
.filtered(OPT_mllvm
))
1449 NewArgv
.push_back(Arg
->getValue());
1450 for (const opt::Arg
*Arg
: Args
.filtered(OPT_offload_opt_eq_minus
))
1451 NewArgv
.push_back(Args
.MakeArgString(StringRef("-") + Arg
->getValue()));
1452 cl::ParseCommandLineOptions(NewArgv
.size(), &NewArgv
[0]);
1454 Verbose
= Args
.hasArg(OPT_verbose
);
1455 DryRun
= Args
.hasArg(OPT_dry_run
);
1456 SaveTemps
= Args
.hasArg(OPT_save_temps
);
1457 ExecutableName
= Args
.getLastArgValue(OPT_o
, "a.out");
1458 CudaBinaryPath
= Args
.getLastArgValue(OPT_cuda_path_EQ
).str();
1460 parallel::strategy
= hardware_concurrency(1);
1461 if (auto *Arg
= Args
.getLastArg(OPT_wrapper_jobs
)) {
1462 unsigned Threads
= 0;
1463 if (!llvm::to_integer(Arg
->getValue(), Threads
) || Threads
== 0)
1464 reportError(createStringError(
1465 inconvertibleErrorCode(), "%s: expected a positive integer, got '%s'",
1466 Arg
->getSpelling().data(), Arg
->getValue()));
1467 parallel::strategy
= hardware_concurrency(Threads
);
1470 if (Args
.hasArg(OPT_wrapper_time_trace_eq
)) {
1471 unsigned Granularity
;
1472 Args
.getLastArgValue(OPT_wrapper_time_trace_granularity
, "500")
1473 .getAsInteger(10, Granularity
);
1474 timeTraceProfilerInitialize(Granularity
, Argv
[0]);
1478 llvm::TimeTraceScope
TimeScope("Execute linker wrapper");
1480 // Extract the device input files stored in the host fat binary.
1481 auto DeviceInputFiles
= getDeviceInput(Args
);
1482 if (!DeviceInputFiles
)
1483 reportError(DeviceInputFiles
.takeError());
1485 // Link and wrap the device images extracted from the linker input.
1487 linkAndWrapDeviceFiles(*DeviceInputFiles
, Args
, Argv
, Argc
);
1489 reportError(FilesOrErr
.takeError());
1491 // Run the host linking job with the rendered arguments.
1492 if (Error Err
= runLinker(*FilesOrErr
, Args
))
1493 reportError(std::move(Err
));
1496 if (const opt::Arg
*Arg
= Args
.getLastArg(OPT_wrapper_time_trace_eq
)) {
1497 if (Error Err
= timeTraceProfilerWrite(Arg
->getValue(), ExecutableName
))
1498 reportError(std::move(Err
));
1499 timeTraceProfilerCleanup();
1502 // Remove the temporary files created.
1504 for (const auto &TempFile
: TempFiles
)
1505 if (std::error_code EC
= sys::fs::remove(TempFile
))
1506 reportError(createFileError(TempFile
, EC
));
1508 return EXIT_SUCCESS
;