Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / clang / tools / clang-linker-wrapper / ClangLinkerWrapper.cpp
blobbafe8ace60d1cea72efee13de0adfde9cbd84bee
1 //===-- clang-linker-wrapper/ClangLinkerWrapper.cpp - wrapper over linker-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===---------------------------------------------------------------------===//
8 //
9 // This tool works as a wrapper over a linking job. This tool is used to create
10 // linked device images for offloading. It scans the linker's input for embedded
11 // device offloading data stored in sections `.llvm.offloading` and extracts it
12 // as a temporary file. The extracted device files will then be passed to a
13 // device linking job to create a final device image.
15 //===---------------------------------------------------------------------===//
17 #include "OffloadWrapper.h"
18 #include "clang/Basic/Version.h"
19 #include "llvm/BinaryFormat/Magic.h"
20 #include "llvm/Bitcode/BitcodeWriter.h"
21 #include "llvm/CodeGen/CommandFlags.h"
22 #include "llvm/IR/Constants.h"
23 #include "llvm/IR/DiagnosticPrinter.h"
24 #include "llvm/IR/Module.h"
25 #include "llvm/IRReader/IRReader.h"
26 #include "llvm/LTO/LTO.h"
27 #include "llvm/MC/TargetRegistry.h"
28 #include "llvm/Object/Archive.h"
29 #include "llvm/Object/ArchiveWriter.h"
30 #include "llvm/Object/Binary.h"
31 #include "llvm/Object/ELFObjectFile.h"
32 #include "llvm/Object/IRObjectFile.h"
33 #include "llvm/Object/ObjectFile.h"
34 #include "llvm/Object/OffloadBinary.h"
35 #include "llvm/Option/ArgList.h"
36 #include "llvm/Option/OptTable.h"
37 #include "llvm/Option/Option.h"
38 #include "llvm/Support/CommandLine.h"
39 #include "llvm/Support/Errc.h"
40 #include "llvm/Support/FileOutputBuffer.h"
41 #include "llvm/Support/FileSystem.h"
42 #include "llvm/Support/InitLLVM.h"
43 #include "llvm/Support/MemoryBuffer.h"
44 #include "llvm/Support/Parallel.h"
45 #include "llvm/Support/Path.h"
46 #include "llvm/Support/Program.h"
47 #include "llvm/Support/Signals.h"
48 #include "llvm/Support/SourceMgr.h"
49 #include "llvm/Support/StringSaver.h"
50 #include "llvm/Support/TargetSelect.h"
51 #include "llvm/Support/WithColor.h"
52 #include "llvm/Support/raw_ostream.h"
53 #include "llvm/Target/TargetMachine.h"
54 #include "llvm/TargetParser/Host.h"
55 #include <atomic>
56 #include <optional>
58 using namespace llvm;
59 using namespace llvm::opt;
60 using namespace llvm::object;
62 /// Path of the current binary.
63 static const char *LinkerExecutable;
65 /// Ssave intermediary results.
66 static bool SaveTemps = false;
68 /// Print arguments without executing.
69 static bool DryRun = false;
71 /// Print verbose output.
72 static bool Verbose = false;
74 /// Filename of the executable being created.
75 static StringRef ExecutableName;
77 /// Binary path for the CUDA installation.
78 static std::string CudaBinaryPath;
80 /// Mutex lock to protect writes to shared TempFiles in parallel.
81 static std::mutex TempFilesMutex;
83 /// Temporary files created by the linker wrapper.
84 static std::list<SmallString<128>> TempFiles;
86 /// Codegen flags for LTO backend.
87 static codegen::RegisterCodeGenFlags CodeGenFlags;
89 /// Global flag to indicate that the LTO pipeline threw an error.
90 static std::atomic<bool> LTOError;
92 using OffloadingImage = OffloadBinary::OffloadingImage;
94 namespace llvm {
95 // Provide DenseMapInfo so that OffloadKind can be used in a DenseMap.
96 template <> struct DenseMapInfo<OffloadKind> {
97 static inline OffloadKind getEmptyKey() { return OFK_LAST; }
98 static inline OffloadKind getTombstoneKey() {
99 return static_cast<OffloadKind>(OFK_LAST + 1);
101 static unsigned getHashValue(const OffloadKind &Val) { return Val; }
103 static bool isEqual(const OffloadKind &LHS, const OffloadKind &RHS) {
104 return LHS == RHS;
107 } // namespace llvm
109 namespace {
110 using std::error_code;
112 /// Must not overlap with llvm::opt::DriverFlag.
113 enum WrapperFlags {
114 WrapperOnlyOption = (1 << 4), // Options only used by the linker wrapper.
115 DeviceOnlyOption = (1 << 5), // Options only used for device linking.
118 enum ID {
119 OPT_INVALID = 0, // This is not an option ID.
120 #define OPTION(...) LLVM_MAKE_OPT_ID(__VA_ARGS__),
121 #include "LinkerWrapperOpts.inc"
122 LastOption
123 #undef OPTION
126 #define PREFIX(NAME, VALUE) \
127 static constexpr StringLiteral NAME##_init[] = VALUE; \
128 static constexpr ArrayRef<StringLiteral> NAME(NAME##_init, \
129 std::size(NAME##_init) - 1);
130 #include "LinkerWrapperOpts.inc"
131 #undef PREFIX
133 static constexpr OptTable::Info InfoTable[] = {
134 #define OPTION(...) LLVM_CONSTRUCT_OPT_INFO(__VA_ARGS__),
135 #include "LinkerWrapperOpts.inc"
136 #undef OPTION
139 class WrapperOptTable : public opt::GenericOptTable {
140 public:
141 WrapperOptTable() : opt::GenericOptTable(InfoTable) {}
144 const OptTable &getOptTable() {
145 static const WrapperOptTable *Table = []() {
146 auto Result = std::make_unique<WrapperOptTable>();
147 return Result.release();
148 }();
149 return *Table;
152 void printCommands(ArrayRef<StringRef> CmdArgs) {
153 if (CmdArgs.empty())
154 return;
156 llvm::errs() << " \"" << CmdArgs.front() << "\" ";
157 for (auto IC = std::next(CmdArgs.begin()), IE = CmdArgs.end(); IC != IE; ++IC)
158 llvm::errs() << *IC << (std::next(IC) != IE ? " " : "\n");
161 [[noreturn]] void reportError(Error E) {
162 outs().flush();
163 logAllUnhandledErrors(std::move(E),
164 WithColor::error(errs(), LinkerExecutable));
165 exit(EXIT_FAILURE);
168 /// Create an extra user-specified \p OffloadFile.
169 /// TODO: We should find a way to wrap these as libraries instead.
170 Expected<OffloadFile> getInputBitcodeLibrary(StringRef Input) {
171 auto [Device, Path] = StringRef(Input).split('=');
172 auto [String, Arch] = Device.rsplit('-');
173 auto [Kind, Triple] = String.split('-');
175 llvm::ErrorOr<std::unique_ptr<MemoryBuffer>> ImageOrError =
176 llvm::MemoryBuffer::getFileOrSTDIN(Path);
177 if (std::error_code EC = ImageOrError.getError())
178 return createFileError(Path, EC);
180 OffloadingImage Image{};
181 Image.TheImageKind = IMG_Bitcode;
182 Image.TheOffloadKind = getOffloadKind(Kind);
183 Image.StringData["triple"] = Triple;
184 Image.StringData["arch"] = Arch;
185 Image.Image = std::move(*ImageOrError);
187 std::unique_ptr<MemoryBuffer> Binary =
188 MemoryBuffer::getMemBufferCopy(OffloadBinary::write(Image));
189 auto NewBinaryOrErr = OffloadBinary::create(*Binary);
190 if (!NewBinaryOrErr)
191 return NewBinaryOrErr.takeError();
192 return OffloadFile(std::move(*NewBinaryOrErr), std::move(Binary));
195 std::string getMainExecutable(const char *Name) {
196 void *Ptr = (void *)(intptr_t)&getMainExecutable;
197 auto COWPath = sys::fs::getMainExecutable(Name, Ptr);
198 return sys::path::parent_path(COWPath).str();
201 /// Get a temporary filename suitable for output.
202 Expected<StringRef> createOutputFile(const Twine &Prefix, StringRef Extension) {
203 std::scoped_lock<decltype(TempFilesMutex)> Lock(TempFilesMutex);
204 SmallString<128> OutputFile;
205 if (SaveTemps) {
206 (Prefix + "." + Extension).toNullTerminatedStringRef(OutputFile);
207 } else {
208 if (std::error_code EC =
209 sys::fs::createTemporaryFile(Prefix, Extension, OutputFile))
210 return createFileError(OutputFile, EC);
213 TempFiles.emplace_back(std::move(OutputFile));
214 return TempFiles.back();
217 /// Execute the command \p ExecutablePath with the arguments \p Args.
218 Error executeCommands(StringRef ExecutablePath, ArrayRef<StringRef> Args) {
219 if (Verbose || DryRun)
220 printCommands(Args);
222 if (!DryRun)
223 if (sys::ExecuteAndWait(ExecutablePath, Args))
224 return createStringError(inconvertibleErrorCode(),
225 "'" + sys::path::filename(ExecutablePath) + "'" +
226 " failed");
227 return Error::success();
230 Expected<std::string> findProgram(StringRef Name, ArrayRef<StringRef> Paths) {
232 ErrorOr<std::string> Path = sys::findProgramByName(Name, Paths);
233 if (!Path)
234 Path = sys::findProgramByName(Name);
235 if (!Path && DryRun)
236 return Name.str();
237 if (!Path)
238 return createStringError(Path.getError(),
239 "Unable to find '" + Name + "' in path");
240 return *Path;
243 /// Runs the wrapped linker job with the newly created input.
244 Error runLinker(ArrayRef<StringRef> Files, const ArgList &Args) {
245 llvm::TimeTraceScope TimeScope("Execute host linker");
247 // Render the linker arguments and add the newly created image. We add it
248 // after the output file to ensure it is linked with the correct libraries.
249 StringRef LinkerPath = Args.getLastArgValue(OPT_linker_path_EQ);
250 ArgStringList NewLinkerArgs;
251 for (const opt::Arg *Arg : Args) {
252 // Do not forward arguments only intended for the linker wrapper.
253 if (Arg->getOption().hasFlag(WrapperOnlyOption))
254 continue;
256 Arg->render(Args, NewLinkerArgs);
257 if (Arg->getOption().matches(OPT_o))
258 llvm::transform(Files, std::back_inserter(NewLinkerArgs),
259 [&](StringRef Arg) { return Args.MakeArgString(Arg); });
262 SmallVector<StringRef> LinkerArgs({LinkerPath});
263 for (StringRef Arg : NewLinkerArgs)
264 LinkerArgs.push_back(Arg);
265 if (Error Err = executeCommands(LinkerPath, LinkerArgs))
266 return Err;
267 return Error::success();
270 void printVersion(raw_ostream &OS) {
271 OS << clang::getClangToolFullVersion("clang-linker-wrapper") << '\n';
274 namespace nvptx {
275 Expected<StringRef>
276 fatbinary(ArrayRef<std::pair<StringRef, StringRef>> InputFiles,
277 const ArgList &Args) {
278 llvm::TimeTraceScope TimeScope("NVPTX fatbinary");
279 // NVPTX uses the fatbinary program to bundle the linked images.
280 Expected<std::string> FatBinaryPath =
281 findProgram("fatbinary", {CudaBinaryPath + "/bin"});
282 if (!FatBinaryPath)
283 return FatBinaryPath.takeError();
285 llvm::Triple Triple(
286 Args.getLastArgValue(OPT_host_triple_EQ, sys::getDefaultTargetTriple()));
288 // Create a new file to write the linked device image to.
289 auto TempFileOrErr =
290 createOutputFile(sys::path::filename(ExecutableName), "fatbin");
291 if (!TempFileOrErr)
292 return TempFileOrErr.takeError();
294 SmallVector<StringRef, 16> CmdArgs;
295 CmdArgs.push_back(*FatBinaryPath);
296 CmdArgs.push_back(Triple.isArch64Bit() ? "-64" : "-32");
297 CmdArgs.push_back("--create");
298 CmdArgs.push_back(*TempFileOrErr);
299 for (const auto &[File, Arch] : InputFiles)
300 CmdArgs.push_back(
301 Args.MakeArgString("--image=profile=" + Arch + ",file=" + File));
303 if (Error Err = executeCommands(*FatBinaryPath, CmdArgs))
304 return std::move(Err);
306 return *TempFileOrErr;
308 } // namespace nvptx
310 namespace amdgcn {
311 Expected<StringRef>
312 fatbinary(ArrayRef<std::pair<StringRef, StringRef>> InputFiles,
313 const ArgList &Args) {
314 llvm::TimeTraceScope TimeScope("AMDGPU Fatbinary");
316 // AMDGPU uses the clang-offload-bundler to bundle the linked images.
317 Expected<std::string> OffloadBundlerPath = findProgram(
318 "clang-offload-bundler", {getMainExecutable("clang-offload-bundler")});
319 if (!OffloadBundlerPath)
320 return OffloadBundlerPath.takeError();
322 llvm::Triple Triple(
323 Args.getLastArgValue(OPT_host_triple_EQ, sys::getDefaultTargetTriple()));
325 // Create a new file to write the linked device image to.
326 auto TempFileOrErr =
327 createOutputFile(sys::path::filename(ExecutableName), "hipfb");
328 if (!TempFileOrErr)
329 return TempFileOrErr.takeError();
331 BumpPtrAllocator Alloc;
332 StringSaver Saver(Alloc);
334 SmallVector<StringRef, 16> CmdArgs;
335 CmdArgs.push_back(*OffloadBundlerPath);
336 CmdArgs.push_back("-type=o");
337 CmdArgs.push_back("-bundle-align=4096");
339 SmallVector<StringRef> Targets = {"-targets=host-x86_64-unknown-linux"};
340 for (const auto &[File, Arch] : InputFiles)
341 Targets.push_back(Saver.save("hipv4-amdgcn-amd-amdhsa--" + Arch));
342 CmdArgs.push_back(Saver.save(llvm::join(Targets, ",")));
344 CmdArgs.push_back("-input=/dev/null");
345 for (const auto &[File, Arch] : InputFiles)
346 CmdArgs.push_back(Saver.save("-input=" + File));
348 CmdArgs.push_back(Saver.save("-output=" + *TempFileOrErr));
350 if (Error Err = executeCommands(*OffloadBundlerPath, CmdArgs))
351 return std::move(Err);
353 return *TempFileOrErr;
355 } // namespace amdgcn
357 namespace generic {
358 Expected<StringRef> clang(ArrayRef<StringRef> InputFiles, const ArgList &Args) {
359 llvm::TimeTraceScope TimeScope("Clang");
360 // Use `clang` to invoke the appropriate device tools.
361 Expected<std::string> ClangPath =
362 findProgram("clang", {getMainExecutable("clang")});
363 if (!ClangPath)
364 return ClangPath.takeError();
366 const llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ));
367 StringRef Arch = Args.getLastArgValue(OPT_arch_EQ);
368 if (Arch.empty())
369 Arch = "native";
370 // Create a new file to write the linked device image to. Assume that the
371 // input filename already has the device and architecture.
372 auto TempFileOrErr =
373 createOutputFile(sys::path::filename(ExecutableName) + "." +
374 Triple.getArchName() + "." + Arch,
375 "img");
376 if (!TempFileOrErr)
377 return TempFileOrErr.takeError();
379 StringRef OptLevel = Args.getLastArgValue(OPT_opt_level, "O2");
380 SmallVector<StringRef, 16> CmdArgs{
381 *ClangPath,
382 "-o",
383 *TempFileOrErr,
384 Args.MakeArgString("--target=" + Triple.getTriple()),
385 Triple.isAMDGPU() ? Args.MakeArgString("-mcpu=" + Arch)
386 : Args.MakeArgString("-march=" + Arch),
387 Args.MakeArgString("-" + OptLevel),
388 "-Wl,--no-undefined",
391 for (StringRef InputFile : InputFiles)
392 CmdArgs.push_back(InputFile);
394 // If this is CPU offloading we copy the input libraries.
395 if (!Triple.isAMDGPU() && !Triple.isNVPTX()) {
396 CmdArgs.push_back("-Wl,-Bsymbolic");
397 CmdArgs.push_back("-shared");
398 ArgStringList LinkerArgs;
399 for (const opt::Arg *Arg : Args.filtered(OPT_library, OPT_library_path))
400 Arg->render(Args, LinkerArgs);
401 for (const opt::Arg *Arg : Args.filtered(OPT_rpath))
402 LinkerArgs.push_back(
403 Args.MakeArgString("-Wl,-rpath," + StringRef(Arg->getValue())));
404 llvm::copy(LinkerArgs, std::back_inserter(CmdArgs));
407 // Pass on -mllvm options to the clang invocation.
408 for (const opt::Arg *Arg : Args.filtered(OPT_mllvm)) {
409 CmdArgs.push_back("-mllvm");
410 CmdArgs.push_back(Arg->getValue());
413 if (Args.hasArg(OPT_debug))
414 CmdArgs.push_back("-g");
416 if (SaveTemps)
417 CmdArgs.push_back("-save-temps");
419 if (Verbose)
420 CmdArgs.push_back("-v");
422 if (!CudaBinaryPath.empty())
423 CmdArgs.push_back(Args.MakeArgString("--cuda-path=" + CudaBinaryPath));
425 for (StringRef Arg : Args.getAllArgValues(OPT_ptxas_arg))
426 llvm::copy(
427 SmallVector<StringRef>({"-Xcuda-ptxas", Args.MakeArgString(Arg)}),
428 std::back_inserter(CmdArgs));
430 for (StringRef Arg : Args.getAllArgValues(OPT_linker_arg_EQ))
431 CmdArgs.push_back(Args.MakeArgString("-Wl," + Arg));
433 for (StringRef Arg : Args.getAllArgValues(OPT_builtin_bitcode_EQ)) {
434 if (llvm::Triple(Arg.split('=').first) == Triple)
435 CmdArgs.append({"-Xclang", "-mlink-builtin-bitcode", "-Xclang",
436 Args.MakeArgString(Arg.split('=').second)});
439 // The OpenMPOpt pass can introduce new calls and is expensive, we do not want
440 // this when running CodeGen through clang.
441 if (Args.hasArg(OPT_clang_backend) || Args.hasArg(OPT_builtin_bitcode_EQ))
442 CmdArgs.append({"-mllvm", "-openmp-opt-disable"});
444 if (Error Err = executeCommands(*ClangPath, CmdArgs))
445 return std::move(Err);
447 return *TempFileOrErr;
449 } // namespace generic
451 Expected<StringRef> linkDevice(ArrayRef<StringRef> InputFiles,
452 const ArgList &Args) {
453 const llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ));
454 switch (Triple.getArch()) {
455 case Triple::nvptx:
456 case Triple::nvptx64:
457 case Triple::amdgcn:
458 case Triple::x86:
459 case Triple::x86_64:
460 case Triple::aarch64:
461 case Triple::aarch64_be:
462 case Triple::ppc64:
463 case Triple::ppc64le:
464 return generic::clang(InputFiles, Args);
465 default:
466 return createStringError(inconvertibleErrorCode(),
467 Triple.getArchName() +
468 " linking is not supported");
472 void diagnosticHandler(const DiagnosticInfo &DI) {
473 std::string ErrStorage;
474 raw_string_ostream OS(ErrStorage);
475 DiagnosticPrinterRawOStream DP(OS);
476 DI.print(DP);
478 switch (DI.getSeverity()) {
479 case DS_Error:
480 WithColor::error(errs(), LinkerExecutable) << ErrStorage << "\n";
481 LTOError = true;
482 break;
483 case DS_Warning:
484 WithColor::warning(errs(), LinkerExecutable) << ErrStorage << "\n";
485 break;
486 case DS_Note:
487 WithColor::note(errs(), LinkerExecutable) << ErrStorage << "\n";
488 break;
489 case DS_Remark:
490 WithColor::remark(errs()) << ErrStorage << "\n";
491 break;
495 // Get the list of target features from the input file and unify them such that
496 // if there are multiple +xxx or -xxx features we only keep the last one.
497 std::vector<std::string> getTargetFeatures(ArrayRef<OffloadFile> InputFiles) {
498 SmallVector<StringRef> Features;
499 for (const OffloadFile &File : InputFiles) {
500 for (auto Arg : llvm::split(File.getBinary()->getString("feature"), ","))
501 Features.emplace_back(Arg);
504 // Only add a feature if it hasn't been seen before starting from the end.
505 std::vector<std::string> UnifiedFeatures;
506 DenseSet<StringRef> UsedFeatures;
507 for (StringRef Feature : llvm::reverse(Features)) {
508 if (UsedFeatures.insert(Feature.drop_front()).second)
509 UnifiedFeatures.push_back(Feature.str());
512 return UnifiedFeatures;
515 template <typename ModuleHook = function_ref<bool(size_t, const Module &)>>
516 std::unique_ptr<lto::LTO> createLTO(
517 const ArgList &Args, const std::vector<std::string> &Features,
518 ModuleHook Hook = [](size_t, const Module &) { return true; }) {
519 const llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ));
520 StringRef Arch = Args.getLastArgValue(OPT_arch_EQ);
521 lto::Config Conf;
522 lto::ThinBackend Backend;
523 // TODO: Handle index-only thin-LTO
524 Backend =
525 lto::createInProcessThinBackend(llvm::heavyweight_hardware_concurrency());
527 Conf.CPU = Arch.str();
528 Conf.Options = codegen::InitTargetOptionsFromCodeGenFlags(Triple);
529 Conf.Freestanding = true;
531 StringRef OptLevel = Args.getLastArgValue(OPT_opt_level, "O2");
532 Conf.MAttrs = Features;
533 std::optional<CodeGenOptLevel> CGOptLevelOrNone =
534 CodeGenOpt::parseLevel(OptLevel[1]);
535 assert(CGOptLevelOrNone && "Invalid optimization level");
536 Conf.CGOptLevel = *CGOptLevelOrNone;
537 Conf.OptLevel = OptLevel[1] - '0';
538 Conf.DefaultTriple = Triple.getTriple();
540 LTOError = false;
541 Conf.DiagHandler = diagnosticHandler;
543 Conf.PTO.LoopVectorization = Conf.OptLevel > 1;
544 Conf.PTO.SLPVectorization = Conf.OptLevel > 1;
546 if (SaveTemps) {
547 std::string TempName = (sys::path::filename(ExecutableName) + "." +
548 Triple.getTriple() + "." + Arch)
549 .str();
550 Conf.PostInternalizeModuleHook = [=](size_t Task, const Module &M) {
551 std::string File =
552 !Task ? TempName + ".postlink.bc"
553 : TempName + "." + std::to_string(Task) + ".postlink.bc";
554 error_code EC;
555 raw_fd_ostream LinkedBitcode(File, EC, sys::fs::OF_None);
556 if (EC)
557 reportError(errorCodeToError(EC));
558 WriteBitcodeToFile(M, LinkedBitcode);
559 return true;
561 Conf.PreCodeGenModuleHook = [=](size_t Task, const Module &M) {
562 std::string File =
563 !Task ? TempName + ".postopt.bc"
564 : TempName + "." + std::to_string(Task) + ".postopt.bc";
565 error_code EC;
566 raw_fd_ostream LinkedBitcode(File, EC, sys::fs::OF_None);
567 if (EC)
568 reportError(errorCodeToError(EC));
569 WriteBitcodeToFile(M, LinkedBitcode);
570 return true;
573 Conf.PostOptModuleHook = Hook;
574 Conf.CGFileType = (Triple.isNVPTX() || SaveTemps)
575 ? CodeGenFileType::AssemblyFile
576 : CodeGenFileType::ObjectFile;
578 // TODO: Handle remark files
579 Conf.HasWholeProgramVisibility = Args.hasArg(OPT_whole_program);
581 return std::make_unique<lto::LTO>(std::move(Conf), Backend);
584 // Returns true if \p S is valid as a C language identifier and will be given
585 // `__start_` and `__stop_` symbols.
586 bool isValidCIdentifier(StringRef S) {
587 return !S.empty() && (isAlpha(S[0]) || S[0] == '_') &&
588 llvm::all_of(llvm::drop_begin(S),
589 [](char C) { return C == '_' || isAlnum(C); });
592 Error linkBitcodeFiles(SmallVectorImpl<OffloadFile> &InputFiles,
593 SmallVectorImpl<StringRef> &OutputFiles,
594 const ArgList &Args) {
595 llvm::TimeTraceScope TimeScope("Link bitcode files");
596 const llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ));
597 StringRef Arch = Args.getLastArgValue(OPT_arch_EQ);
599 SmallVector<OffloadFile, 4> BitcodeInputFiles;
600 DenseSet<StringRef> StrongResolutions;
601 DenseSet<StringRef> UsedInRegularObj;
602 DenseSet<StringRef> UsedInSharedLib;
603 BumpPtrAllocator Alloc;
604 StringSaver Saver(Alloc);
606 // Search for bitcode files in the input and create an LTO input file. If it
607 // is not a bitcode file, scan its symbol table for symbols we need to save.
608 for (OffloadFile &File : InputFiles) {
609 MemoryBufferRef Buffer = MemoryBufferRef(File.getBinary()->getImage(), "");
611 file_magic Type = identify_magic(Buffer.getBuffer());
612 switch (Type) {
613 case file_magic::bitcode: {
614 Expected<IRSymtabFile> IRSymtabOrErr = readIRSymtab(Buffer);
615 if (!IRSymtabOrErr)
616 return IRSymtabOrErr.takeError();
618 // Check for any strong resolutions we need to preserve.
619 for (unsigned I = 0; I != IRSymtabOrErr->Mods.size(); ++I) {
620 for (const auto &Sym : IRSymtabOrErr->TheReader.module_symbols(I)) {
621 if (!Sym.isFormatSpecific() && Sym.isGlobal() && !Sym.isWeak() &&
622 !Sym.isUndefined())
623 StrongResolutions.insert(Saver.save(Sym.Name));
626 BitcodeInputFiles.emplace_back(std::move(File));
627 continue;
629 case file_magic::elf_relocatable:
630 case file_magic::elf_shared_object: {
631 Expected<std::unique_ptr<ObjectFile>> ObjFile =
632 ObjectFile::createObjectFile(Buffer);
633 if (!ObjFile)
634 continue;
636 for (SymbolRef Sym : (*ObjFile)->symbols()) {
637 Expected<StringRef> Name = Sym.getName();
638 if (!Name)
639 return Name.takeError();
641 // Record if we've seen these symbols in any object or shared libraries.
642 if ((*ObjFile)->isRelocatableObject())
643 UsedInRegularObj.insert(Saver.save(*Name));
644 else
645 UsedInSharedLib.insert(Saver.save(*Name));
647 continue;
649 default:
650 continue;
654 if (BitcodeInputFiles.empty())
655 return Error::success();
657 // Remove all the bitcode files that we moved from the original input.
658 llvm::erase_if(InputFiles, [](OffloadFile &F) { return !F.getBinary(); });
660 // LTO Module hook to output bitcode without running the backend.
661 SmallVector<StringRef> BitcodeOutput;
662 auto OutputBitcode = [&](size_t, const Module &M) {
663 auto TempFileOrErr = createOutputFile(sys::path::filename(ExecutableName) +
664 "-jit-" + Triple.getTriple(),
665 "bc");
666 if (!TempFileOrErr)
667 reportError(TempFileOrErr.takeError());
669 std::error_code EC;
670 raw_fd_ostream LinkedBitcode(*TempFileOrErr, EC, sys::fs::OF_None);
671 if (EC)
672 reportError(errorCodeToError(EC));
673 WriteBitcodeToFile(M, LinkedBitcode);
674 BitcodeOutput.push_back(*TempFileOrErr);
675 return false;
678 // We assume visibility of the whole program if every input file was bitcode.
679 auto Features = getTargetFeatures(BitcodeInputFiles);
680 auto LTOBackend = Args.hasArg(OPT_embed_bitcode) ||
681 Args.hasArg(OPT_builtin_bitcode_EQ) ||
682 Args.hasArg(OPT_clang_backend)
683 ? createLTO(Args, Features, OutputBitcode)
684 : createLTO(Args, Features);
686 // We need to resolve the symbols so the LTO backend knows which symbols need
687 // to be kept or can be internalized. This is a simplified symbol resolution
688 // scheme to approximate the full resolution a linker would do.
689 uint64_t Idx = 0;
690 DenseSet<StringRef> PrevailingSymbols;
691 for (auto &BitcodeInput : BitcodeInputFiles) {
692 // Get a semi-unique buffer identifier for Thin-LTO.
693 StringRef Identifier = Saver.save(
694 std::to_string(Idx++) + "." +
695 BitcodeInput.getBinary()->getMemoryBufferRef().getBufferIdentifier());
696 MemoryBufferRef Buffer =
697 MemoryBufferRef(BitcodeInput.getBinary()->getImage(), Identifier);
698 Expected<std::unique_ptr<lto::InputFile>> BitcodeFileOrErr =
699 llvm::lto::InputFile::create(Buffer);
700 if (!BitcodeFileOrErr)
701 return BitcodeFileOrErr.takeError();
703 // Save the input file and the buffer associated with its memory.
704 const auto Symbols = (*BitcodeFileOrErr)->symbols();
705 SmallVector<lto::SymbolResolution, 16> Resolutions(Symbols.size());
706 size_t Idx = 0;
707 for (auto &Sym : Symbols) {
708 lto::SymbolResolution &Res = Resolutions[Idx++];
710 // We will use this as the prevailing symbol definition in LTO unless
711 // it is undefined or another definition has already been used.
712 Res.Prevailing =
713 !Sym.isUndefined() &&
714 !(Sym.isWeak() && StrongResolutions.contains(Sym.getName())) &&
715 PrevailingSymbols.insert(Saver.save(Sym.getName())).second;
717 // We need LTO to preseve the following global symbols:
718 // 1) Symbols used in regular objects.
719 // 2) Sections that will be given a __start/__stop symbol.
720 // 3) Prevailing symbols that are needed visible to external libraries.
721 Res.VisibleToRegularObj =
722 UsedInRegularObj.contains(Sym.getName()) ||
723 isValidCIdentifier(Sym.getSectionName()) ||
724 (Res.Prevailing &&
725 (Sym.getVisibility() != GlobalValue::HiddenVisibility &&
726 !Sym.canBeOmittedFromSymbolTable()));
728 // Identify symbols that must be exported dynamically and can be
729 // referenced by other files.
730 Res.ExportDynamic =
731 Sym.getVisibility() != GlobalValue::HiddenVisibility &&
732 (UsedInSharedLib.contains(Sym.getName()) ||
733 !Sym.canBeOmittedFromSymbolTable());
735 // The final definition will reside in this linkage unit if the symbol is
736 // defined and local to the module. This only checks for bitcode files,
737 // full assertion will require complete symbol resolution.
738 Res.FinalDefinitionInLinkageUnit =
739 Sym.getVisibility() != GlobalValue::DefaultVisibility &&
740 (!Sym.isUndefined() && !Sym.isCommon());
742 // We do not support linker redefined symbols (e.g. --wrap) for device
743 // image linking, so the symbols will not be changed after LTO.
744 Res.LinkerRedefined = false;
747 // Add the bitcode file with its resolved symbols to the LTO job.
748 if (Error Err = LTOBackend->add(std::move(*BitcodeFileOrErr), Resolutions))
749 return Err;
752 // Run the LTO job to compile the bitcode.
753 size_t MaxTasks = LTOBackend->getMaxTasks();
754 SmallVector<StringRef> Files(MaxTasks);
755 auto AddStream =
756 [&](size_t Task,
757 const Twine &ModuleName) -> std::unique_ptr<CachedFileStream> {
758 int FD = -1;
759 auto &TempFile = Files[Task];
760 StringRef Extension = (Triple.isNVPTX() || SaveTemps) ? "s" : "o";
761 std::string TaskStr = Task ? "." + std::to_string(Task) : "";
762 auto TempFileOrErr =
763 createOutputFile(sys::path::filename(ExecutableName) + "." +
764 Triple.getTriple() + "." + Arch + TaskStr,
765 Extension);
766 if (!TempFileOrErr)
767 reportError(TempFileOrErr.takeError());
768 TempFile = *TempFileOrErr;
769 if (std::error_code EC = sys::fs::openFileForWrite(TempFile, FD))
770 reportError(errorCodeToError(EC));
771 return std::make_unique<CachedFileStream>(
772 std::make_unique<llvm::raw_fd_ostream>(FD, true));
775 if (Error Err = LTOBackend->run(AddStream))
776 return Err;
778 if (LTOError)
779 return createStringError(inconvertibleErrorCode(),
780 "Errors encountered inside the LTO pipeline.");
782 // If we are embedding bitcode we only need the intermediate output.
783 bool SingleOutput = Files.size() == 1;
784 if (Args.hasArg(OPT_embed_bitcode)) {
785 if (BitcodeOutput.size() != 1 || !SingleOutput)
786 return createStringError(inconvertibleErrorCode(),
787 "Cannot embed bitcode with multiple files.");
788 OutputFiles.push_back(Args.MakeArgString(BitcodeOutput.front()));
789 return Error::success();
792 // Append the new inputs to the device linker input. If the user requested an
793 // internalizing link we need to pass the bitcode to clang.
794 for (StringRef File :
795 Args.hasArg(OPT_clang_backend) || Args.hasArg(OPT_builtin_bitcode_EQ)
796 ? BitcodeOutput
797 : Files)
798 OutputFiles.push_back(File);
800 return Error::success();
803 Expected<StringRef> writeOffloadFile(const OffloadFile &File) {
804 const OffloadBinary &Binary = *File.getBinary();
806 StringRef Prefix =
807 sys::path::stem(Binary.getMemoryBufferRef().getBufferIdentifier());
808 StringRef Suffix = getImageKindName(Binary.getImageKind());
810 auto TempFileOrErr = createOutputFile(
811 Prefix + "-" + Binary.getTriple() + "-" + Binary.getArch(), Suffix);
812 if (!TempFileOrErr)
813 return TempFileOrErr.takeError();
815 Expected<std::unique_ptr<FileOutputBuffer>> OutputOrErr =
816 FileOutputBuffer::create(*TempFileOrErr, Binary.getImage().size());
817 if (!OutputOrErr)
818 return OutputOrErr.takeError();
819 std::unique_ptr<FileOutputBuffer> Output = std::move(*OutputOrErr);
820 llvm::copy(Binary.getImage(), Output->getBufferStart());
821 if (Error E = Output->commit())
822 return std::move(E);
824 return *TempFileOrErr;
827 // Compile the module to an object file using the appropriate target machine for
828 // the host triple.
829 Expected<StringRef> compileModule(Module &M, OffloadKind Kind) {
830 llvm::TimeTraceScope TimeScope("Compile module");
831 std::string Msg;
832 const Target *T = TargetRegistry::lookupTarget(M.getTargetTriple(), Msg);
833 if (!T)
834 return createStringError(inconvertibleErrorCode(), Msg);
836 auto Options =
837 codegen::InitTargetOptionsFromCodeGenFlags(Triple(M.getTargetTriple()));
838 StringRef CPU = "";
839 StringRef Features = "";
840 std::unique_ptr<TargetMachine> TM(
841 T->createTargetMachine(M.getTargetTriple(), CPU, Features, Options,
842 Reloc::PIC_, M.getCodeModel()));
844 if (M.getDataLayout().isDefault())
845 M.setDataLayout(TM->createDataLayout());
847 int FD = -1;
848 auto TempFileOrErr =
849 createOutputFile(sys::path::filename(ExecutableName) + "." +
850 getOffloadKindName(Kind) + ".image.wrapper",
851 "o");
852 if (!TempFileOrErr)
853 return TempFileOrErr.takeError();
854 if (std::error_code EC = sys::fs::openFileForWrite(*TempFileOrErr, FD))
855 return errorCodeToError(EC);
857 auto OS = std::make_unique<llvm::raw_fd_ostream>(FD, true);
859 legacy::PassManager CodeGenPasses;
860 TargetLibraryInfoImpl TLII(Triple(M.getTargetTriple()));
861 CodeGenPasses.add(new TargetLibraryInfoWrapperPass(TLII));
862 if (TM->addPassesToEmitFile(CodeGenPasses, *OS, nullptr,
863 CodeGenFileType::ObjectFile))
864 return createStringError(inconvertibleErrorCode(),
865 "Failed to execute host backend");
866 CodeGenPasses.run(M);
868 return *TempFileOrErr;
871 /// Creates the object file containing the device image and runtime
872 /// registration code from the device images stored in \p Images.
873 Expected<StringRef>
874 wrapDeviceImages(ArrayRef<std::unique_ptr<MemoryBuffer>> Buffers,
875 const ArgList &Args, OffloadKind Kind) {
876 llvm::TimeTraceScope TimeScope("Wrap bundled images");
878 SmallVector<ArrayRef<char>, 4> BuffersToWrap;
879 for (const auto &Buffer : Buffers)
880 BuffersToWrap.emplace_back(
881 ArrayRef<char>(Buffer->getBufferStart(), Buffer->getBufferSize()));
883 LLVMContext Context;
884 Module M("offload.wrapper.module", Context);
885 M.setTargetTriple(
886 Args.getLastArgValue(OPT_host_triple_EQ, sys::getDefaultTargetTriple()));
888 switch (Kind) {
889 case OFK_OpenMP:
890 if (Error Err = wrapOpenMPBinaries(M, BuffersToWrap))
891 return std::move(Err);
892 break;
893 case OFK_Cuda:
894 if (Error Err = wrapCudaBinary(M, BuffersToWrap.front()))
895 return std::move(Err);
896 break;
897 case OFK_HIP:
898 if (Error Err = wrapHIPBinary(M, BuffersToWrap.front()))
899 return std::move(Err);
900 break;
901 default:
902 return createStringError(inconvertibleErrorCode(),
903 getOffloadKindName(Kind) +
904 " wrapping is not supported");
907 if (Args.hasArg(OPT_print_wrapped_module))
908 errs() << M;
909 if (Args.hasArg(OPT_save_temps)) {
910 int FD = -1;
911 auto TempFileOrErr =
912 createOutputFile(sys::path::filename(ExecutableName) + "." +
913 getOffloadKindName(Kind) + ".image.wrapper",
914 "bc");
915 if (!TempFileOrErr)
916 return TempFileOrErr.takeError();
917 if (std::error_code EC = sys::fs::openFileForWrite(*TempFileOrErr, FD))
918 return errorCodeToError(EC);
919 llvm::raw_fd_ostream OS(FD, true);
920 WriteBitcodeToFile(M, OS);
923 auto FileOrErr = compileModule(M, Kind);
924 if (!FileOrErr)
925 return FileOrErr.takeError();
926 return *FileOrErr;
929 Expected<SmallVector<std::unique_ptr<MemoryBuffer>>>
930 bundleOpenMP(ArrayRef<OffloadingImage> Images) {
931 SmallVector<std::unique_ptr<MemoryBuffer>> Buffers;
932 for (const OffloadingImage &Image : Images)
933 Buffers.emplace_back(
934 MemoryBuffer::getMemBufferCopy(OffloadBinary::write(Image)));
936 return std::move(Buffers);
939 Expected<SmallVector<std::unique_ptr<MemoryBuffer>>>
940 bundleCuda(ArrayRef<OffloadingImage> Images, const ArgList &Args) {
941 SmallVector<std::pair<StringRef, StringRef>, 4> InputFiles;
942 for (const OffloadingImage &Image : Images)
943 InputFiles.emplace_back(std::make_pair(Image.Image->getBufferIdentifier(),
944 Image.StringData.lookup("arch")));
946 Triple TheTriple = Triple(Images.front().StringData.lookup("triple"));
947 auto FileOrErr = nvptx::fatbinary(InputFiles, Args);
948 if (!FileOrErr)
949 return FileOrErr.takeError();
951 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> ImageOrError =
952 llvm::MemoryBuffer::getFileOrSTDIN(*FileOrErr);
954 SmallVector<std::unique_ptr<MemoryBuffer>> Buffers;
955 if (std::error_code EC = ImageOrError.getError())
956 return createFileError(*FileOrErr, EC);
957 Buffers.emplace_back(std::move(*ImageOrError));
959 return std::move(Buffers);
962 Expected<SmallVector<std::unique_ptr<MemoryBuffer>>>
963 bundleHIP(ArrayRef<OffloadingImage> Images, const ArgList &Args) {
964 SmallVector<std::pair<StringRef, StringRef>, 4> InputFiles;
965 for (const OffloadingImage &Image : Images)
966 InputFiles.emplace_back(std::make_pair(Image.Image->getBufferIdentifier(),
967 Image.StringData.lookup("arch")));
969 Triple TheTriple = Triple(Images.front().StringData.lookup("triple"));
970 auto FileOrErr = amdgcn::fatbinary(InputFiles, Args);
971 if (!FileOrErr)
972 return FileOrErr.takeError();
974 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> ImageOrError =
975 llvm::MemoryBuffer::getFileOrSTDIN(*FileOrErr);
977 SmallVector<std::unique_ptr<MemoryBuffer>> Buffers;
978 if (std::error_code EC = ImageOrError.getError())
979 return createFileError(*FileOrErr, EC);
980 Buffers.emplace_back(std::move(*ImageOrError));
982 return std::move(Buffers);
985 /// Transforms the input \p Images into the binary format the runtime expects
986 /// for the given \p Kind.
987 Expected<SmallVector<std::unique_ptr<MemoryBuffer>>>
988 bundleLinkedOutput(ArrayRef<OffloadingImage> Images, const ArgList &Args,
989 OffloadKind Kind) {
990 llvm::TimeTraceScope TimeScope("Bundle linked output");
991 switch (Kind) {
992 case OFK_OpenMP:
993 return bundleOpenMP(Images);
994 case OFK_Cuda:
995 return bundleCuda(Images, Args);
996 case OFK_HIP:
997 return bundleHIP(Images, Args);
998 default:
999 return createStringError(inconvertibleErrorCode(),
1000 getOffloadKindName(Kind) +
1001 " bundling is not supported");
1005 /// Returns a new ArgList containg arguments used for the device linking phase.
1006 DerivedArgList getLinkerArgs(ArrayRef<OffloadFile> Input,
1007 const InputArgList &Args) {
1008 DerivedArgList DAL = DerivedArgList(DerivedArgList(Args));
1009 for (Arg *A : Args)
1010 DAL.append(A);
1012 // Set the subarchitecture and target triple for this compilation.
1013 const OptTable &Tbl = getOptTable();
1014 DAL.AddJoinedArg(nullptr, Tbl.getOption(OPT_arch_EQ),
1015 Args.MakeArgString(Input.front().getBinary()->getArch()));
1016 DAL.AddJoinedArg(nullptr, Tbl.getOption(OPT_triple_EQ),
1017 Args.MakeArgString(Input.front().getBinary()->getTriple()));
1019 // If every input file is bitcode we have whole program visibility as we do
1020 // only support static linking with bitcode.
1021 auto ContainsBitcode = [](const OffloadFile &F) {
1022 return identify_magic(F.getBinary()->getImage()) == file_magic::bitcode;
1024 if (llvm::all_of(Input, ContainsBitcode))
1025 DAL.AddFlagArg(nullptr, Tbl.getOption(OPT_whole_program));
1027 // Forward '-Xoffload-linker' options to the appropriate backend.
1028 for (StringRef Arg : Args.getAllArgValues(OPT_device_linker_args_EQ)) {
1029 auto [Triple, Value] = Arg.split('=');
1030 if (Value.empty())
1031 DAL.AddJoinedArg(nullptr, Tbl.getOption(OPT_linker_arg_EQ),
1032 Args.MakeArgString(Triple));
1033 else if (Triple == DAL.getLastArgValue(OPT_triple_EQ))
1034 DAL.AddJoinedArg(nullptr, Tbl.getOption(OPT_linker_arg_EQ),
1035 Args.MakeArgString(Value));
1038 return DAL;
1041 /// Transforms all the extracted offloading input files into an image that can
1042 /// be registered by the runtime.
1043 Expected<SmallVector<StringRef>>
1044 linkAndWrapDeviceFiles(SmallVectorImpl<OffloadFile> &LinkerInputFiles,
1045 const InputArgList &Args, char **Argv, int Argc) {
1046 llvm::TimeTraceScope TimeScope("Handle all device input");
1048 DenseMap<OffloadFile::TargetID, SmallVector<OffloadFile>> InputMap;
1049 for (auto &File : LinkerInputFiles)
1050 InputMap[File].emplace_back(std::move(File));
1051 LinkerInputFiles.clear();
1053 SmallVector<SmallVector<OffloadFile>> InputsForTarget;
1054 for (auto &[ID, Input] : InputMap)
1055 InputsForTarget.emplace_back(std::move(Input));
1056 InputMap.clear();
1058 std::mutex ImageMtx;
1059 DenseMap<OffloadKind, SmallVector<OffloadingImage>> Images;
1060 auto Err = parallelForEachError(InputsForTarget, [&](auto &Input) -> Error {
1061 llvm::TimeTraceScope TimeScope("Link device input");
1063 // Each thread needs its own copy of the base arguments to maintain
1064 // per-device argument storage of synthetic strings.
1065 const OptTable &Tbl = getOptTable();
1066 BumpPtrAllocator Alloc;
1067 StringSaver Saver(Alloc);
1068 auto BaseArgs =
1069 Tbl.parseArgs(Argc, Argv, OPT_INVALID, Saver, [](StringRef Err) {
1070 reportError(createStringError(inconvertibleErrorCode(), Err));
1072 auto LinkerArgs = getLinkerArgs(Input, BaseArgs);
1074 DenseSet<OffloadKind> ActiveOffloadKinds;
1075 for (const auto &File : Input)
1076 if (File.getBinary()->getOffloadKind() != OFK_None)
1077 ActiveOffloadKinds.insert(File.getBinary()->getOffloadKind());
1079 // First link and remove all the input files containing bitcode.
1080 SmallVector<StringRef> InputFiles;
1081 if (Error Err = linkBitcodeFiles(Input, InputFiles, LinkerArgs))
1082 return Err;
1084 // Write any remaining device inputs to an output file for the linker.
1085 for (const OffloadFile &File : Input) {
1086 auto FileNameOrErr = writeOffloadFile(File);
1087 if (!FileNameOrErr)
1088 return FileNameOrErr.takeError();
1089 InputFiles.emplace_back(*FileNameOrErr);
1092 // Link the remaining device files using the device linker.
1093 auto OutputOrErr = !Args.hasArg(OPT_embed_bitcode)
1094 ? linkDevice(InputFiles, LinkerArgs)
1095 : InputFiles.front();
1096 if (!OutputOrErr)
1097 return OutputOrErr.takeError();
1099 // Store the offloading image for each linked output file.
1100 for (OffloadKind Kind : ActiveOffloadKinds) {
1101 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> FileOrErr =
1102 llvm::MemoryBuffer::getFileOrSTDIN(*OutputOrErr);
1103 if (std::error_code EC = FileOrErr.getError()) {
1104 if (DryRun)
1105 FileOrErr = MemoryBuffer::getMemBuffer("");
1106 else
1107 return createFileError(*OutputOrErr, EC);
1110 std::scoped_lock<decltype(ImageMtx)> Guard(ImageMtx);
1111 OffloadingImage TheImage{};
1112 TheImage.TheImageKind =
1113 Args.hasArg(OPT_embed_bitcode) ? IMG_Bitcode : IMG_Object;
1114 TheImage.TheOffloadKind = Kind;
1115 TheImage.StringData["triple"] =
1116 Args.MakeArgString(LinkerArgs.getLastArgValue(OPT_triple_EQ));
1117 TheImage.StringData["arch"] =
1118 Args.MakeArgString(LinkerArgs.getLastArgValue(OPT_arch_EQ));
1119 TheImage.Image = std::move(*FileOrErr);
1121 Images[Kind].emplace_back(std::move(TheImage));
1123 return Error::success();
1125 if (Err)
1126 return std::move(Err);
1128 // Create a binary image of each offloading image and embed it into a new
1129 // object file.
1130 SmallVector<StringRef> WrappedOutput;
1131 for (auto &[Kind, Input] : Images) {
1132 // We sort the entries before bundling so they appear in a deterministic
1133 // order in the final binary.
1134 llvm::sort(Input, [](OffloadingImage &A, OffloadingImage &B) {
1135 return A.StringData["triple"] > B.StringData["triple"] ||
1136 A.StringData["arch"] > B.StringData["arch"] ||
1137 A.TheOffloadKind < B.TheOffloadKind;
1139 auto BundledImagesOrErr = bundleLinkedOutput(Input, Args, Kind);
1140 if (!BundledImagesOrErr)
1141 return BundledImagesOrErr.takeError();
1142 auto OutputOrErr = wrapDeviceImages(*BundledImagesOrErr, Args, Kind);
1143 if (!OutputOrErr)
1144 return OutputOrErr.takeError();
1145 WrappedOutput.push_back(*OutputOrErr);
1148 return WrappedOutput;
1151 std::optional<std::string> findFile(StringRef Dir, StringRef Root,
1152 const Twine &Name) {
1153 SmallString<128> Path;
1154 if (Dir.startswith("="))
1155 sys::path::append(Path, Root, Dir.substr(1), Name);
1156 else
1157 sys::path::append(Path, Dir, Name);
1159 if (sys::fs::exists(Path))
1160 return static_cast<std::string>(Path);
1161 return std::nullopt;
1164 std::optional<std::string>
1165 findFromSearchPaths(StringRef Name, StringRef Root,
1166 ArrayRef<StringRef> SearchPaths) {
1167 for (StringRef Dir : SearchPaths)
1168 if (std::optional<std::string> File = findFile(Dir, Root, Name))
1169 return File;
1170 return std::nullopt;
1173 std::optional<std::string>
1174 searchLibraryBaseName(StringRef Name, StringRef Root,
1175 ArrayRef<StringRef> SearchPaths) {
1176 for (StringRef Dir : SearchPaths) {
1177 if (std::optional<std::string> File =
1178 findFile(Dir, Root, "lib" + Name + ".so"))
1179 return File;
1180 if (std::optional<std::string> File =
1181 findFile(Dir, Root, "lib" + Name + ".a"))
1182 return File;
1184 return std::nullopt;
1187 /// Search for static libraries in the linker's library path given input like
1188 /// `-lfoo` or `-l:libfoo.a`.
1189 std::optional<std::string> searchLibrary(StringRef Input, StringRef Root,
1190 ArrayRef<StringRef> SearchPaths) {
1191 if (Input.startswith(":"))
1192 return findFromSearchPaths(Input.drop_front(), Root, SearchPaths);
1193 return searchLibraryBaseName(Input, Root, SearchPaths);
1196 /// Common redeclaration of needed symbol flags.
1197 enum Symbol : uint32_t {
1198 Sym_None = 0,
1199 Sym_Undefined = 1U << 1,
1200 Sym_Weak = 1U << 2,
1203 /// Scan the symbols from a BitcodeFile \p Buffer and record if we need to
1204 /// extract any symbols from it.
1205 Expected<bool> getSymbolsFromBitcode(MemoryBufferRef Buffer, OffloadKind Kind,
1206 bool IsArchive, StringSaver &Saver,
1207 DenseMap<StringRef, Symbol> &Syms) {
1208 Expected<IRSymtabFile> IRSymtabOrErr = readIRSymtab(Buffer);
1209 if (!IRSymtabOrErr)
1210 return IRSymtabOrErr.takeError();
1212 bool ShouldExtract = !IsArchive;
1213 DenseMap<StringRef, Symbol> TmpSyms;
1214 for (unsigned I = 0; I != IRSymtabOrErr->Mods.size(); ++I) {
1215 for (const auto &Sym : IRSymtabOrErr->TheReader.module_symbols(I)) {
1216 if (Sym.isFormatSpecific() || !Sym.isGlobal())
1217 continue;
1219 bool NewSymbol = Syms.count(Sym.getName()) == 0;
1220 auto OldSym = NewSymbol ? Sym_None : Syms[Sym.getName()];
1222 // We will extract if it defines a currenlty undefined non-weak symbol.
1223 bool ResolvesStrongReference =
1224 ((OldSym & Sym_Undefined && !(OldSym & Sym_Weak)) &&
1225 !Sym.isUndefined());
1226 // We will extract if it defines a new global symbol visible to the host.
1227 // This is only necessary for code targeting an offloading language.
1228 bool NewGlobalSymbol =
1229 ((NewSymbol || (OldSym & Sym_Undefined)) && !Sym.isUndefined() &&
1230 !Sym.canBeOmittedFromSymbolTable() && Kind != object::OFK_None &&
1231 (Sym.getVisibility() != GlobalValue::HiddenVisibility));
1232 ShouldExtract |= ResolvesStrongReference | NewGlobalSymbol;
1234 // Update this symbol in the "table" with the new information.
1235 if (OldSym & Sym_Undefined && !Sym.isUndefined())
1236 TmpSyms[Saver.save(Sym.getName())] =
1237 static_cast<Symbol>(OldSym & ~Sym_Undefined);
1238 if (Sym.isUndefined() && NewSymbol)
1239 TmpSyms[Saver.save(Sym.getName())] =
1240 static_cast<Symbol>(OldSym | Sym_Undefined);
1241 if (Sym.isWeak())
1242 TmpSyms[Saver.save(Sym.getName())] =
1243 static_cast<Symbol>(OldSym | Sym_Weak);
1247 // If the file gets extracted we update the table with the new symbols.
1248 if (ShouldExtract)
1249 Syms.insert(std::begin(TmpSyms), std::end(TmpSyms));
1251 return ShouldExtract;
1254 /// Scan the symbols from an ObjectFile \p Obj and record if we need to extract
1255 /// any symbols from it.
1256 Expected<bool> getSymbolsFromObject(const ObjectFile &Obj, OffloadKind Kind,
1257 bool IsArchive, StringSaver &Saver,
1258 DenseMap<StringRef, Symbol> &Syms) {
1259 bool ShouldExtract = !IsArchive;
1260 DenseMap<StringRef, Symbol> TmpSyms;
1261 for (SymbolRef Sym : Obj.symbols()) {
1262 auto FlagsOrErr = Sym.getFlags();
1263 if (!FlagsOrErr)
1264 return FlagsOrErr.takeError();
1266 if (!(*FlagsOrErr & SymbolRef::SF_Global) ||
1267 (*FlagsOrErr & SymbolRef::SF_FormatSpecific))
1268 continue;
1270 auto NameOrErr = Sym.getName();
1271 if (!NameOrErr)
1272 return NameOrErr.takeError();
1274 bool NewSymbol = Syms.count(*NameOrErr) == 0;
1275 auto OldSym = NewSymbol ? Sym_None : Syms[*NameOrErr];
1277 // We will extract if it defines a currenlty undefined non-weak symbol.
1278 bool ResolvesStrongReference = (OldSym & Sym_Undefined) &&
1279 !(OldSym & Sym_Weak) &&
1280 !(*FlagsOrErr & SymbolRef::SF_Undefined);
1282 // We will extract if it defines a new global symbol visible to the host.
1283 // This is only necessary for code targeting an offloading language.
1284 bool NewGlobalSymbol =
1285 ((NewSymbol || (OldSym & Sym_Undefined)) &&
1286 !(*FlagsOrErr & SymbolRef::SF_Undefined) && Kind != object::OFK_None &&
1287 !(*FlagsOrErr & SymbolRef::SF_Hidden));
1288 ShouldExtract |= ResolvesStrongReference | NewGlobalSymbol;
1290 // Update this symbol in the "table" with the new information.
1291 if (OldSym & Sym_Undefined && !(*FlagsOrErr & SymbolRef::SF_Undefined))
1292 TmpSyms[Saver.save(*NameOrErr)] =
1293 static_cast<Symbol>(OldSym & ~Sym_Undefined);
1294 if (*FlagsOrErr & SymbolRef::SF_Undefined && NewSymbol)
1295 TmpSyms[Saver.save(*NameOrErr)] =
1296 static_cast<Symbol>(OldSym | Sym_Undefined);
1297 if (*FlagsOrErr & SymbolRef::SF_Weak)
1298 TmpSyms[Saver.save(*NameOrErr)] = static_cast<Symbol>(OldSym | Sym_Weak);
1301 // If the file gets extracted we update the table with the new symbols.
1302 if (ShouldExtract)
1303 Syms.insert(std::begin(TmpSyms), std::end(TmpSyms));
1305 return ShouldExtract;
1308 /// Attempt to 'resolve' symbols found in input files. We use this to
1309 /// determine if an archive member needs to be extracted. An archive member
1310 /// will be extracted if any of the following is true.
1311 /// 1) It defines an undefined symbol in a regular object filie.
1312 /// 2) It defines a global symbol without hidden visibility that has not
1313 /// yet been defined.
1314 Expected<bool> getSymbols(StringRef Image, OffloadKind Kind, bool IsArchive,
1315 StringSaver &Saver,
1316 DenseMap<StringRef, Symbol> &Syms) {
1317 MemoryBufferRef Buffer = MemoryBufferRef(Image, "");
1318 switch (identify_magic(Image)) {
1319 case file_magic::bitcode:
1320 return getSymbolsFromBitcode(Buffer, Kind, IsArchive, Saver, Syms);
1321 case file_magic::elf_relocatable: {
1322 Expected<std::unique_ptr<ObjectFile>> ObjFile =
1323 ObjectFile::createObjectFile(Buffer);
1324 if (!ObjFile)
1325 return ObjFile.takeError();
1326 return getSymbolsFromObject(**ObjFile, Kind, IsArchive, Saver, Syms);
1328 default:
1329 return false;
1333 /// Search the input files and libraries for embedded device offloading code
1334 /// and add it to the list of files to be linked. Files coming from static
1335 /// libraries are only added to the input if they are used by an existing
1336 /// input file.
1337 Expected<SmallVector<OffloadFile>> getDeviceInput(const ArgList &Args) {
1338 llvm::TimeTraceScope TimeScope("ExtractDeviceCode");
1340 StringRef Root = Args.getLastArgValue(OPT_sysroot_EQ);
1341 SmallVector<StringRef> LibraryPaths;
1342 for (const opt::Arg *Arg : Args.filtered(OPT_library_path))
1343 LibraryPaths.push_back(Arg->getValue());
1345 BumpPtrAllocator Alloc;
1346 StringSaver Saver(Alloc);
1348 // Try to extract device code from the linker input files.
1349 SmallVector<OffloadFile> InputFiles;
1350 DenseMap<OffloadFile::TargetID, DenseMap<StringRef, Symbol>> Syms;
1351 bool WholeArchive = false;
1352 for (const opt::Arg *Arg : Args.filtered(
1353 OPT_INPUT, OPT_library, OPT_whole_archive, OPT_no_whole_archive)) {
1354 if (Arg->getOption().matches(OPT_whole_archive) ||
1355 Arg->getOption().matches(OPT_no_whole_archive)) {
1356 WholeArchive = Arg->getOption().matches(OPT_whole_archive);
1357 continue;
1360 std::optional<std::string> Filename =
1361 Arg->getOption().matches(OPT_library)
1362 ? searchLibrary(Arg->getValue(), Root, LibraryPaths)
1363 : std::string(Arg->getValue());
1365 if (!Filename && Arg->getOption().matches(OPT_library))
1366 reportError(createStringError(inconvertibleErrorCode(),
1367 "unable to find library -l%s",
1368 Arg->getValue()));
1370 if (!Filename || !sys::fs::exists(*Filename) ||
1371 sys::fs::is_directory(*Filename))
1372 continue;
1374 ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
1375 MemoryBuffer::getFileOrSTDIN(*Filename);
1376 if (std::error_code EC = BufferOrErr.getError())
1377 return createFileError(*Filename, EC);
1379 MemoryBufferRef Buffer = **BufferOrErr;
1380 if (identify_magic(Buffer.getBuffer()) == file_magic::elf_shared_object)
1381 continue;
1383 SmallVector<OffloadFile> Binaries;
1384 if (Error Err = extractOffloadBinaries(Buffer, Binaries))
1385 return std::move(Err);
1387 // We only extract archive members that are needed.
1388 bool IsArchive = identify_magic(Buffer.getBuffer()) == file_magic::archive;
1389 bool Extracted = true;
1390 while (Extracted) {
1391 Extracted = false;
1392 for (OffloadFile &Binary : Binaries) {
1393 if (!Binary.getBinary())
1394 continue;
1396 // If we don't have an object file for this architecture do not
1397 // extract.
1398 if (IsArchive && !WholeArchive && !Syms.count(Binary))
1399 continue;
1401 Expected<bool> ExtractOrErr =
1402 getSymbols(Binary.getBinary()->getImage(),
1403 Binary.getBinary()->getOffloadKind(), IsArchive, Saver,
1404 Syms[Binary]);
1405 if (!ExtractOrErr)
1406 return ExtractOrErr.takeError();
1408 Extracted = !WholeArchive && *ExtractOrErr;
1410 if (!IsArchive || WholeArchive || Extracted)
1411 InputFiles.emplace_back(std::move(Binary));
1413 // If we extracted any files we need to check all the symbols again.
1414 if (Extracted)
1415 break;
1420 for (StringRef Library : Args.getAllArgValues(OPT_bitcode_library_EQ)) {
1421 auto FileOrErr = getInputBitcodeLibrary(Library);
1422 if (!FileOrErr)
1423 return FileOrErr.takeError();
1424 InputFiles.push_back(std::move(*FileOrErr));
1427 return std::move(InputFiles);
1430 } // namespace
1432 int main(int Argc, char **Argv) {
1433 InitLLVM X(Argc, Argv);
1434 InitializeAllTargetInfos();
1435 InitializeAllTargets();
1436 InitializeAllTargetMCs();
1437 InitializeAllAsmParsers();
1438 InitializeAllAsmPrinters();
1440 LinkerExecutable = Argv[0];
1441 sys::PrintStackTraceOnErrorSignal(Argv[0]);
1443 const OptTable &Tbl = getOptTable();
1444 BumpPtrAllocator Alloc;
1445 StringSaver Saver(Alloc);
1446 auto Args = Tbl.parseArgs(Argc, Argv, OPT_INVALID, Saver, [&](StringRef Err) {
1447 reportError(createStringError(inconvertibleErrorCode(), Err));
1450 if (Args.hasArg(OPT_help) || Args.hasArg(OPT_help_hidden)) {
1451 Tbl.printHelp(
1452 outs(),
1453 "clang-linker-wrapper [options] -- <options to passed to the linker>",
1454 "\nA wrapper utility over the host linker. It scans the input files\n"
1455 "for sections that require additional processing prior to linking.\n"
1456 "The will then transparently pass all arguments and input to the\n"
1457 "specified host linker to create the final binary.\n",
1458 Args.hasArg(OPT_help_hidden), Args.hasArg(OPT_help_hidden));
1459 return EXIT_SUCCESS;
1461 if (Args.hasArg(OPT_v)) {
1462 printVersion(outs());
1463 return EXIT_SUCCESS;
1466 // This forwards '-mllvm' arguments to LLVM if present.
1467 SmallVector<const char *> NewArgv = {Argv[0]};
1468 for (const opt::Arg *Arg : Args.filtered(OPT_mllvm))
1469 NewArgv.push_back(Arg->getValue());
1470 for (const opt::Arg *Arg : Args.filtered(OPT_offload_opt_eq_minus))
1471 NewArgv.push_back(Args.MakeArgString(StringRef("-") + Arg->getValue()));
1472 cl::ParseCommandLineOptions(NewArgv.size(), &NewArgv[0]);
1474 Verbose = Args.hasArg(OPT_verbose);
1475 DryRun = Args.hasArg(OPT_dry_run);
1476 SaveTemps = Args.hasArg(OPT_save_temps);
1477 ExecutableName = Args.getLastArgValue(OPT_o, "a.out");
1478 CudaBinaryPath = Args.getLastArgValue(OPT_cuda_path_EQ).str();
1480 parallel::strategy = hardware_concurrency(1);
1481 if (auto *Arg = Args.getLastArg(OPT_wrapper_jobs)) {
1482 unsigned Threads = 0;
1483 if (!llvm::to_integer(Arg->getValue(), Threads) || Threads == 0)
1484 reportError(createStringError(
1485 inconvertibleErrorCode(), "%s: expected a positive integer, got '%s'",
1486 Arg->getSpelling().data(), Arg->getValue()));
1487 parallel::strategy = hardware_concurrency(Threads);
1490 if (Args.hasArg(OPT_wrapper_time_trace_eq)) {
1491 unsigned Granularity;
1492 Args.getLastArgValue(OPT_wrapper_time_trace_granularity, "500")
1493 .getAsInteger(10, Granularity);
1494 timeTraceProfilerInitialize(Granularity, Argv[0]);
1498 llvm::TimeTraceScope TimeScope("Execute linker wrapper");
1500 // Extract the device input files stored in the host fat binary.
1501 auto DeviceInputFiles = getDeviceInput(Args);
1502 if (!DeviceInputFiles)
1503 reportError(DeviceInputFiles.takeError());
1505 // Link and wrap the device images extracted from the linker input.
1506 auto FilesOrErr =
1507 linkAndWrapDeviceFiles(*DeviceInputFiles, Args, Argv, Argc);
1508 if (!FilesOrErr)
1509 reportError(FilesOrErr.takeError());
1511 // Run the host linking job with the rendered arguments.
1512 if (Error Err = runLinker(*FilesOrErr, Args))
1513 reportError(std::move(Err));
1516 if (const opt::Arg *Arg = Args.getLastArg(OPT_wrapper_time_trace_eq)) {
1517 if (Error Err = timeTraceProfilerWrite(Arg->getValue(), ExecutableName))
1518 reportError(std::move(Err));
1519 timeTraceProfilerCleanup();
1522 // Remove the temporary files created.
1523 if (!SaveTemps)
1524 for (const auto &TempFile : TempFiles)
1525 if (std::error_code EC = sys::fs::remove(TempFile))
1526 reportError(createFileError(TempFile, EC));
1528 return EXIT_SUCCESS;