1 //===-ThinLTOCodeGenerator.cpp - LLVM Link Time Optimizer -----------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the Thin Link Time Optimization library. This library is
10 // intended to be used by linker to optimize code at link time.
12 //===----------------------------------------------------------------------===//
14 #include "llvm/LTO/legacy/ThinLTOCodeGenerator.h"
16 #include "llvm/ADT/Statistic.h"
17 #include "llvm/ADT/StringExtras.h"
18 #include "llvm/Analysis/ModuleSummaryAnalysis.h"
19 #include "llvm/Analysis/ProfileSummaryInfo.h"
20 #include "llvm/Analysis/TargetLibraryInfo.h"
21 #include "llvm/Analysis/TargetTransformInfo.h"
22 #include "llvm/Bitcode/BitcodeReader.h"
23 #include "llvm/Bitcode/BitcodeWriter.h"
24 #include "llvm/Bitcode/BitcodeWriterPass.h"
25 #include "llvm/Config/llvm-config.h"
26 #include "llvm/IR/DebugInfo.h"
27 #include "llvm/IR/DiagnosticPrinter.h"
28 #include "llvm/IR/LLVMContext.h"
29 #include "llvm/IR/LegacyPassManager.h"
30 #include "llvm/IR/Mangler.h"
31 #include "llvm/IR/PassTimingInfo.h"
32 #include "llvm/IR/RemarkStreamer.h"
33 #include "llvm/IR/Verifier.h"
34 #include "llvm/IRReader/IRReader.h"
35 #include "llvm/LTO/LTO.h"
36 #include "llvm/LTO/SummaryBasedOptimizations.h"
37 #include "llvm/MC/SubtargetFeature.h"
38 #include "llvm/Object/IRObjectFile.h"
39 #include "llvm/Support/CachePruning.h"
40 #include "llvm/Support/Debug.h"
41 #include "llvm/Support/Error.h"
42 #include "llvm/Support/FileUtilities.h"
43 #include "llvm/Support/Path.h"
44 #include "llvm/Support/SHA1.h"
45 #include "llvm/Support/SmallVectorMemoryBuffer.h"
46 #include "llvm/Support/TargetRegistry.h"
47 #include "llvm/Support/ThreadPool.h"
48 #include "llvm/Support/Threading.h"
49 #include "llvm/Support/ToolOutputFile.h"
50 #include "llvm/Support/VCSRevision.h"
51 #include "llvm/Target/TargetMachine.h"
52 #include "llvm/Transforms/IPO.h"
53 #include "llvm/Transforms/IPO/FunctionImport.h"
54 #include "llvm/Transforms/IPO/Internalize.h"
55 #include "llvm/Transforms/IPO/PassManagerBuilder.h"
56 #include "llvm/Transforms/ObjCARC.h"
57 #include "llvm/Transforms/Utils/FunctionImportUtils.h"
61 #if !defined(_MSC_VER) && !defined(__MINGW32__)
69 #define DEBUG_TYPE "thinlto"
72 // Flags -discard-value-names, defined in LTOCodeGenerator.cpp
73 extern cl::opt
<bool> LTODiscardValueNames
;
74 extern cl::opt
<std::string
> RemarksFilename
;
75 extern cl::opt
<std::string
> RemarksPasses
;
76 extern cl::opt
<bool> RemarksWithHotness
;
77 extern cl::opt
<std::string
> RemarksFormat
;
83 ThreadCount("threads", cl::init(llvm::heavyweight_hardware_concurrency()));
85 // Simple helper to save temporary files for debug.
86 static void saveTempBitcode(const Module
&TheModule
, StringRef TempDir
,
87 unsigned count
, StringRef Suffix
) {
90 // User asked to save temps, let dump the bitcode file after import.
91 std::string SaveTempPath
= (TempDir
+ llvm::Twine(count
) + Suffix
).str();
93 raw_fd_ostream
OS(SaveTempPath
, EC
, sys::fs::OF_None
);
95 report_fatal_error(Twine("Failed to open ") + SaveTempPath
+
96 " to save optimized bitcode\n");
97 WriteBitcodeToFile(TheModule
, OS
, /* ShouldPreserveUseListOrder */ true);
100 static const GlobalValueSummary
*
101 getFirstDefinitionForLinker(const GlobalValueSummaryList
&GVSummaryList
) {
102 // If there is any strong definition anywhere, get it.
103 auto StrongDefForLinker
= llvm::find_if(
104 GVSummaryList
, [](const std::unique_ptr
<GlobalValueSummary
> &Summary
) {
105 auto Linkage
= Summary
->linkage();
106 return !GlobalValue::isAvailableExternallyLinkage(Linkage
) &&
107 !GlobalValue::isWeakForLinker(Linkage
);
109 if (StrongDefForLinker
!= GVSummaryList
.end())
110 return StrongDefForLinker
->get();
111 // Get the first *linker visible* definition for this global in the summary
113 auto FirstDefForLinker
= llvm::find_if(
114 GVSummaryList
, [](const std::unique_ptr
<GlobalValueSummary
> &Summary
) {
115 auto Linkage
= Summary
->linkage();
116 return !GlobalValue::isAvailableExternallyLinkage(Linkage
);
118 // Extern templates can be emitted as available_externally.
119 if (FirstDefForLinker
== GVSummaryList
.end())
121 return FirstDefForLinker
->get();
124 // Populate map of GUID to the prevailing copy for any multiply defined
125 // symbols. Currently assume first copy is prevailing, or any strong
126 // definition. Can be refined with Linker information in the future.
127 static void computePrevailingCopies(
128 const ModuleSummaryIndex
&Index
,
129 DenseMap
<GlobalValue::GUID
, const GlobalValueSummary
*> &PrevailingCopy
) {
130 auto HasMultipleCopies
= [&](const GlobalValueSummaryList
&GVSummaryList
) {
131 return GVSummaryList
.size() > 1;
134 for (auto &I
: Index
) {
135 if (HasMultipleCopies(I
.second
.SummaryList
))
136 PrevailingCopy
[I
.first
] =
137 getFirstDefinitionForLinker(I
.second
.SummaryList
);
141 static StringMap
<lto::InputFile
*>
142 generateModuleMap(std::vector
<std::unique_ptr
<lto::InputFile
>> &Modules
) {
143 StringMap
<lto::InputFile
*> ModuleMap
;
144 for (auto &M
: Modules
) {
145 assert(ModuleMap
.find(M
->getName()) == ModuleMap
.end() &&
146 "Expect unique Buffer Identifier");
147 ModuleMap
[M
->getName()] = M
.get();
152 static void promoteModule(Module
&TheModule
, const ModuleSummaryIndex
&Index
) {
153 if (renameModuleForThinLTO(TheModule
, Index
))
154 report_fatal_error("renameModuleForThinLTO failed");
158 class ThinLTODiagnosticInfo
: public DiagnosticInfo
{
161 ThinLTODiagnosticInfo(const Twine
&DiagMsg
,
162 DiagnosticSeverity Severity
= DS_Error
)
163 : DiagnosticInfo(DK_Linker
, Severity
), Msg(DiagMsg
) {}
164 void print(DiagnosticPrinter
&DP
) const override
{ DP
<< Msg
; }
168 /// Verify the module and strip broken debug info.
169 static void verifyLoadedModule(Module
&TheModule
) {
170 bool BrokenDebugInfo
= false;
171 if (verifyModule(TheModule
, &dbgs(), &BrokenDebugInfo
))
172 report_fatal_error("Broken module found, compilation aborted!");
173 if (BrokenDebugInfo
) {
174 TheModule
.getContext().diagnose(ThinLTODiagnosticInfo(
175 "Invalid debug info found, debug info will be stripped", DS_Warning
));
176 StripDebugInfo(TheModule
);
180 static std::unique_ptr
<Module
> loadModuleFromInput(lto::InputFile
*Input
,
181 LLVMContext
&Context
,
184 auto &Mod
= Input
->getSingleBitcodeModule();
186 Expected
<std::unique_ptr
<Module
>> ModuleOrErr
=
187 Lazy
? Mod
.getLazyModule(Context
,
188 /* ShouldLazyLoadMetadata */ true, IsImporting
)
189 : Mod
.parseModule(Context
);
191 handleAllErrors(ModuleOrErr
.takeError(), [&](ErrorInfoBase
&EIB
) {
192 SMDiagnostic Err
= SMDiagnostic(Mod
.getModuleIdentifier(),
193 SourceMgr::DK_Error
, EIB
.message());
194 Err
.print("ThinLTO", errs());
196 report_fatal_error("Can't load module, abort.");
199 verifyLoadedModule(*ModuleOrErr
.get());
200 return std::move(*ModuleOrErr
);
204 crossImportIntoModule(Module
&TheModule
, const ModuleSummaryIndex
&Index
,
205 StringMap
<lto::InputFile
*> &ModuleMap
,
206 const FunctionImporter::ImportMapTy
&ImportList
) {
207 auto Loader
= [&](StringRef Identifier
) {
208 auto &Input
= ModuleMap
[Identifier
];
209 return loadModuleFromInput(Input
, TheModule
.getContext(),
210 /*Lazy=*/true, /*IsImporting*/ true);
213 FunctionImporter
Importer(Index
, Loader
);
214 Expected
<bool> Result
= Importer
.importFunctions(TheModule
, ImportList
);
216 handleAllErrors(Result
.takeError(), [&](ErrorInfoBase
&EIB
) {
217 SMDiagnostic Err
= SMDiagnostic(TheModule
.getModuleIdentifier(),
218 SourceMgr::DK_Error
, EIB
.message());
219 Err
.print("ThinLTO", errs());
221 report_fatal_error("importFunctions failed");
223 // Verify again after cross-importing.
224 verifyLoadedModule(TheModule
);
227 static void optimizeModule(Module
&TheModule
, TargetMachine
&TM
,
228 unsigned OptLevel
, bool Freestanding
) {
229 // Populate the PassManager
230 PassManagerBuilder PMB
;
231 PMB
.LibraryInfo
= new TargetLibraryInfoImpl(TM
.getTargetTriple());
233 PMB
.LibraryInfo
->disableAllFunctions();
234 PMB
.Inliner
= createFunctionInliningPass();
235 // FIXME: should get it from the bitcode?
236 PMB
.OptLevel
= OptLevel
;
237 PMB
.LoopVectorize
= true;
238 PMB
.SLPVectorize
= true;
239 // Already did this in verifyLoadedModule().
240 PMB
.VerifyInput
= false;
241 PMB
.VerifyOutput
= false;
243 legacy::PassManager PM
;
245 // Add the TTI (required to inform the vectorizer about register size for
247 PM
.add(createTargetTransformInfoWrapperPass(TM
.getTargetIRAnalysis()));
250 PMB
.populateThinLTOPassManager(PM
);
256 addUsedSymbolToPreservedGUID(const lto::InputFile
&File
,
257 DenseSet
<GlobalValue::GUID
> &PreservedGUID
) {
258 for (const auto &Sym
: File
.symbols()) {
260 PreservedGUID
.insert(GlobalValue::getGUID(Sym
.getIRName()));
264 // Convert the PreservedSymbols map from "Name" based to "GUID" based.
265 static DenseSet
<GlobalValue::GUID
>
266 computeGUIDPreservedSymbols(const StringSet
<> &PreservedSymbols
,
267 const Triple
&TheTriple
) {
268 DenseSet
<GlobalValue::GUID
> GUIDPreservedSymbols(PreservedSymbols
.size());
269 for (auto &Entry
: PreservedSymbols
) {
270 StringRef Name
= Entry
.first();
271 if (TheTriple
.isOSBinFormatMachO() && Name
.size() > 0 && Name
[0] == '_')
272 Name
= Name
.drop_front();
273 GUIDPreservedSymbols
.insert(GlobalValue::getGUID(Name
));
275 return GUIDPreservedSymbols
;
278 std::unique_ptr
<MemoryBuffer
> codegenModule(Module
&TheModule
,
280 SmallVector
<char, 128> OutputBuffer
;
284 raw_svector_ostream
OS(OutputBuffer
);
285 legacy::PassManager PM
;
287 // If the bitcode files contain ARC code and were compiled with optimization,
288 // the ObjCARCContractPass must be run, so do it unconditionally here.
289 PM
.add(createObjCARCContractPass());
291 // Setup the codegen now.
292 if (TM
.addPassesToEmitFile(PM
, OS
, nullptr, TargetMachine::CGFT_ObjectFile
,
293 /* DisableVerify */ true))
294 report_fatal_error("Failed to setup codegen");
296 // Run codegen now. resulting binary is in OutputBuffer.
299 return std::make_unique
<SmallVectorMemoryBuffer
>(std::move(OutputBuffer
));
302 /// Manage caching for a single Module.
303 class ModuleCacheEntry
{
304 SmallString
<128> EntryPath
;
307 // Create a cache entry. This compute a unique hash for the Module considering
308 // the current list of export/import, and offer an interface to query to
309 // access the content in the cache.
311 StringRef CachePath
, const ModuleSummaryIndex
&Index
, StringRef ModuleID
,
312 const FunctionImporter::ImportMapTy
&ImportList
,
313 const FunctionImporter::ExportSetTy
&ExportList
,
314 const std::map
<GlobalValue::GUID
, GlobalValue::LinkageTypes
> &ResolvedODR
,
315 const GVSummaryMapTy
&DefinedGVSummaries
, unsigned OptLevel
,
316 bool Freestanding
, const TargetMachineBuilder
&TMBuilder
) {
317 if (CachePath
.empty())
320 if (!Index
.modulePaths().count(ModuleID
))
321 // The module does not have an entry, it can't have a hash at all
324 if (all_of(Index
.getModuleHash(ModuleID
),
325 [](uint32_t V
) { return V
== 0; }))
326 // No hash entry, no caching!
329 llvm::lto::Config Conf
;
330 Conf
.OptLevel
= OptLevel
;
331 Conf
.Options
= TMBuilder
.Options
;
332 Conf
.CPU
= TMBuilder
.MCpu
;
333 Conf
.MAttrs
.push_back(TMBuilder
.MAttr
);
334 Conf
.RelocModel
= TMBuilder
.RelocModel
;
335 Conf
.CGOptLevel
= TMBuilder
.CGOptLevel
;
336 Conf
.Freestanding
= Freestanding
;
338 computeLTOCacheKey(Key
, Conf
, Index
, ModuleID
, ImportList
, ExportList
,
339 ResolvedODR
, DefinedGVSummaries
);
341 // This choice of file name allows the cache to be pruned (see pruneCache()
342 // in include/llvm/Support/CachePruning.h).
343 sys::path::append(EntryPath
, CachePath
, "llvmcache-" + Key
);
346 // Access the path to this entry in the cache.
347 StringRef
getEntryPath() { return EntryPath
; }
349 // Try loading the buffer for this cache entry.
350 ErrorOr
<std::unique_ptr
<MemoryBuffer
>> tryLoadingBuffer() {
351 if (EntryPath
.empty())
352 return std::error_code();
353 SmallString
<64> ResultPath
;
354 Expected
<sys::fs::file_t
> FDOrErr
= sys::fs::openNativeFileForRead(
355 Twine(EntryPath
), sys::fs::OF_UpdateAtime
, &ResultPath
);
357 return errorToErrorCode(FDOrErr
.takeError());
358 ErrorOr
<std::unique_ptr
<MemoryBuffer
>> MBOrErr
= MemoryBuffer::getOpenFile(
359 *FDOrErr
, EntryPath
, /*FileSize=*/-1, /*RequiresNullTerminator=*/false);
360 sys::fs::closeFile(*FDOrErr
);
364 // Cache the Produced object file
365 void write(const MemoryBuffer
&OutputBuffer
) {
366 if (EntryPath
.empty())
369 // Write to a temporary to avoid race condition
370 SmallString
<128> TempFilename
;
371 SmallString
<128> CachePath(EntryPath
);
372 llvm::sys::path::remove_filename(CachePath
);
373 sys::path::append(TempFilename
, CachePath
, "Thin-%%%%%%.tmp.o");
375 if (auto Err
= handleErrors(
376 llvm::writeFileAtomically(TempFilename
, EntryPath
,
377 OutputBuffer
.getBuffer()),
378 [](const llvm::AtomicFileWriteError
&E
) {
379 std::string ErrorMsgBuffer
;
380 llvm::raw_string_ostream
S(ErrorMsgBuffer
);
384 llvm::atomic_write_error::failed_to_create_uniq_file
) {
385 errs() << "Error: " << ErrorMsgBuffer
<< "\n";
386 report_fatal_error("ThinLTO: Can't get a temporary file");
390 consumeError(std::move(Err
));
395 static std::unique_ptr
<MemoryBuffer
>
396 ProcessThinLTOModule(Module
&TheModule
, ModuleSummaryIndex
&Index
,
397 StringMap
<lto::InputFile
*> &ModuleMap
, TargetMachine
&TM
,
398 const FunctionImporter::ImportMapTy
&ImportList
,
399 const FunctionImporter::ExportSetTy
&ExportList
,
400 const DenseSet
<GlobalValue::GUID
> &GUIDPreservedSymbols
,
401 const GVSummaryMapTy
&DefinedGlobals
,
402 const ThinLTOCodeGenerator::CachingOptions
&CacheOptions
,
403 bool DisableCodeGen
, StringRef SaveTempsDir
,
404 bool Freestanding
, unsigned OptLevel
, unsigned count
) {
406 // "Benchmark"-like optimization: single-source case
407 bool SingleModule
= (ModuleMap
.size() == 1);
410 promoteModule(TheModule
, Index
);
412 // Apply summary-based prevailing-symbol resolution decisions.
413 thinLTOResolvePrevailingInModule(TheModule
, DefinedGlobals
);
415 // Save temps: after promotion.
416 saveTempBitcode(TheModule
, SaveTempsDir
, count
, ".1.promoted.bc");
419 // Be friendly and don't nuke totally the module when the client didn't
420 // supply anything to preserve.
421 if (!ExportList
.empty() || !GUIDPreservedSymbols
.empty()) {
422 // Apply summary-based internalization decisions.
423 thinLTOInternalizeModule(TheModule
, DefinedGlobals
);
426 // Save internalized bitcode
427 saveTempBitcode(TheModule
, SaveTempsDir
, count
, ".2.internalized.bc");
430 crossImportIntoModule(TheModule
, Index
, ModuleMap
, ImportList
);
432 // Save temps: after cross-module import.
433 saveTempBitcode(TheModule
, SaveTempsDir
, count
, ".3.imported.bc");
436 optimizeModule(TheModule
, TM
, OptLevel
, Freestanding
);
438 saveTempBitcode(TheModule
, SaveTempsDir
, count
, ".4.opt.bc");
440 if (DisableCodeGen
) {
441 // Configured to stop before CodeGen, serialize the bitcode and return.
442 SmallVector
<char, 128> OutputBuffer
;
444 raw_svector_ostream
OS(OutputBuffer
);
445 ProfileSummaryInfo
PSI(TheModule
);
446 auto Index
= buildModuleSummaryIndex(TheModule
, nullptr, &PSI
);
447 WriteBitcodeToFile(TheModule
, OS
, true, &Index
);
449 return std::make_unique
<SmallVectorMemoryBuffer
>(std::move(OutputBuffer
));
452 return codegenModule(TheModule
, TM
);
455 /// Resolve prevailing symbols. Record resolutions in the \p ResolvedODR map
456 /// for caching, and in the \p Index for application during the ThinLTO
457 /// backends. This is needed for correctness for exported symbols (ensure
458 /// at least one copy kept) and a compile-time optimization (to drop duplicate
459 /// copies when possible).
460 static void resolvePrevailingInIndex(
461 ModuleSummaryIndex
&Index
,
462 StringMap
<std::map
<GlobalValue::GUID
, GlobalValue::LinkageTypes
>>
464 const DenseSet
<GlobalValue::GUID
> &GUIDPreservedSymbols
,
465 const DenseMap
<GlobalValue::GUID
, const GlobalValueSummary
*>
468 auto isPrevailing
= [&](GlobalValue::GUID GUID
, const GlobalValueSummary
*S
) {
469 const auto &Prevailing
= PrevailingCopy
.find(GUID
);
470 // Not in map means that there was only one copy, which must be prevailing.
471 if (Prevailing
== PrevailingCopy
.end())
473 return Prevailing
->second
== S
;
476 auto recordNewLinkage
= [&](StringRef ModuleIdentifier
,
477 GlobalValue::GUID GUID
,
478 GlobalValue::LinkageTypes NewLinkage
) {
479 ResolvedODR
[ModuleIdentifier
][GUID
] = NewLinkage
;
482 thinLTOResolvePrevailingInIndex(Index
, isPrevailing
, recordNewLinkage
,
483 GUIDPreservedSymbols
);
486 // Initialize the TargetMachine builder for a given Triple
487 static void initTMBuilder(TargetMachineBuilder
&TMBuilder
,
488 const Triple
&TheTriple
) {
489 // Set a default CPU for Darwin triples (copied from LTOCodeGenerator).
490 // FIXME this looks pretty terrible...
491 if (TMBuilder
.MCpu
.empty() && TheTriple
.isOSDarwin()) {
492 if (TheTriple
.getArch() == llvm::Triple::x86_64
)
493 TMBuilder
.MCpu
= "core2";
494 else if (TheTriple
.getArch() == llvm::Triple::x86
)
495 TMBuilder
.MCpu
= "yonah";
496 else if (TheTriple
.getArch() == llvm::Triple::aarch64
||
497 TheTriple
.getArch() == llvm::Triple::aarch64_32
)
498 TMBuilder
.MCpu
= "cyclone";
500 TMBuilder
.TheTriple
= std::move(TheTriple
);
503 } // end anonymous namespace
505 void ThinLTOCodeGenerator::addModule(StringRef Identifier
, StringRef Data
) {
506 MemoryBufferRef
Buffer(Data
, Identifier
);
508 auto InputOrError
= lto::InputFile::create(Buffer
);
510 report_fatal_error("ThinLTO cannot create input file: " +
511 toString(InputOrError
.takeError()));
513 auto TripleStr
= (*InputOrError
)->getTargetTriple();
514 Triple
TheTriple(TripleStr
);
517 initTMBuilder(TMBuilder
, Triple(TheTriple
));
518 else if (TMBuilder
.TheTriple
!= TheTriple
) {
519 if (!TMBuilder
.TheTriple
.isCompatibleWith(TheTriple
))
520 report_fatal_error("ThinLTO modules with incompatible triples not "
522 initTMBuilder(TMBuilder
, Triple(TMBuilder
.TheTriple
.merge(TheTriple
)));
525 Modules
.emplace_back(std::move(*InputOrError
));
528 void ThinLTOCodeGenerator::preserveSymbol(StringRef Name
) {
529 PreservedSymbols
.insert(Name
);
532 void ThinLTOCodeGenerator::crossReferenceSymbol(StringRef Name
) {
533 // FIXME: At the moment, we don't take advantage of this extra information,
534 // we're conservatively considering cross-references as preserved.
535 // CrossReferencedSymbols.insert(Name);
536 PreservedSymbols
.insert(Name
);
539 // TargetMachine factory
540 std::unique_ptr
<TargetMachine
> TargetMachineBuilder::create() const {
542 const Target
*TheTarget
=
543 TargetRegistry::lookupTarget(TheTriple
.str(), ErrMsg
);
545 report_fatal_error("Can't load target for this Triple: " + ErrMsg
);
548 // Use MAttr as the default set of features.
549 SubtargetFeatures
Features(MAttr
);
550 Features
.getDefaultSubtargetFeatures(TheTriple
);
551 std::string FeatureStr
= Features
.getString();
553 return std::unique_ptr
<TargetMachine
>(
554 TheTarget
->createTargetMachine(TheTriple
.str(), MCpu
, FeatureStr
, Options
,
555 RelocModel
, None
, CGOptLevel
));
559 * Produce the combined summary index from all the bitcode files:
562 std::unique_ptr
<ModuleSummaryIndex
> ThinLTOCodeGenerator::linkCombinedIndex() {
563 std::unique_ptr
<ModuleSummaryIndex
> CombinedIndex
=
564 std::make_unique
<ModuleSummaryIndex
>(/*HaveGVs=*/false);
565 uint64_t NextModuleId
= 0;
566 for (auto &Mod
: Modules
) {
567 auto &M
= Mod
->getSingleBitcodeModule();
569 M
.readSummary(*CombinedIndex
, Mod
->getName(), NextModuleId
++)) {
571 logAllUnhandledErrors(
572 std::move(Err
), errs(),
573 "error: can't create module summary index for buffer: ");
577 return CombinedIndex
;
580 static void internalizeAndPromoteInIndex(
581 const StringMap
<FunctionImporter::ExportSetTy
> &ExportLists
,
582 const DenseSet
<GlobalValue::GUID
> &GUIDPreservedSymbols
,
583 const DenseMap
<GlobalValue::GUID
, const GlobalValueSummary
*>
585 ModuleSummaryIndex
&Index
) {
586 auto isExported
= [&](StringRef ModuleIdentifier
, GlobalValue::GUID GUID
) {
587 const auto &ExportList
= ExportLists
.find(ModuleIdentifier
);
588 return (ExportList
!= ExportLists
.end() &&
589 ExportList
->second
.count(GUID
)) ||
590 GUIDPreservedSymbols
.count(GUID
);
593 auto isPrevailing
= [&](GlobalValue::GUID GUID
, const GlobalValueSummary
*S
) {
594 const auto &Prevailing
= PrevailingCopy
.find(GUID
);
595 // Not in map means that there was only one copy, which must be prevailing.
596 if (Prevailing
== PrevailingCopy
.end())
598 return Prevailing
->second
== S
;
601 thinLTOInternalizeAndPromoteInIndex(Index
, isExported
, isPrevailing
);
604 static void computeDeadSymbolsInIndex(
605 ModuleSummaryIndex
&Index
,
606 const DenseSet
<GlobalValue::GUID
> &GUIDPreservedSymbols
) {
607 // We have no symbols resolution available. And can't do any better now in the
608 // case where the prevailing symbol is in a native object. It can be refined
609 // with linker information in the future.
610 auto isPrevailing
= [&](GlobalValue::GUID G
) {
611 return PrevailingType::Unknown
;
613 computeDeadSymbolsWithConstProp(Index
, GUIDPreservedSymbols
, isPrevailing
,
614 /* ImportEnabled = */ true);
618 * Perform promotion and renaming of exported internal functions.
619 * Index is updated to reflect linkage changes from weak resolution.
621 void ThinLTOCodeGenerator::promote(Module
&TheModule
, ModuleSummaryIndex
&Index
,
622 const lto::InputFile
&File
) {
623 auto ModuleCount
= Index
.modulePaths().size();
624 auto ModuleIdentifier
= TheModule
.getModuleIdentifier();
626 // Collect for each module the list of function it defines (GUID -> Summary).
627 StringMap
<GVSummaryMapTy
> ModuleToDefinedGVSummaries
;
628 Index
.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries
);
630 // Convert the preserved symbols set from string to GUID
631 auto GUIDPreservedSymbols
= computeGUIDPreservedSymbols(
632 PreservedSymbols
, Triple(TheModule
.getTargetTriple()));
634 // Add used symbol to the preserved symbols.
635 addUsedSymbolToPreservedGUID(File
, GUIDPreservedSymbols
);
637 // Compute "dead" symbols, we don't want to import/export these!
638 computeDeadSymbolsInIndex(Index
, GUIDPreservedSymbols
);
640 // Generate import/export list
641 StringMap
<FunctionImporter::ImportMapTy
> ImportLists(ModuleCount
);
642 StringMap
<FunctionImporter::ExportSetTy
> ExportLists(ModuleCount
);
643 ComputeCrossModuleImport(Index
, ModuleToDefinedGVSummaries
, ImportLists
,
646 DenseMap
<GlobalValue::GUID
, const GlobalValueSummary
*> PrevailingCopy
;
647 computePrevailingCopies(Index
, PrevailingCopy
);
649 // Resolve prevailing symbols
650 StringMap
<std::map
<GlobalValue::GUID
, GlobalValue::LinkageTypes
>> ResolvedODR
;
651 resolvePrevailingInIndex(Index
, ResolvedODR
, GUIDPreservedSymbols
,
654 thinLTOResolvePrevailingInModule(
655 TheModule
, ModuleToDefinedGVSummaries
[ModuleIdentifier
]);
657 // Promote the exported values in the index, so that they are promoted
659 internalizeAndPromoteInIndex(ExportLists
, GUIDPreservedSymbols
,
660 PrevailingCopy
, Index
);
662 promoteModule(TheModule
, Index
);
666 * Perform cross-module importing for the module identified by ModuleIdentifier.
668 void ThinLTOCodeGenerator::crossModuleImport(Module
&TheModule
,
669 ModuleSummaryIndex
&Index
,
670 const lto::InputFile
&File
) {
671 auto ModuleMap
= generateModuleMap(Modules
);
672 auto ModuleCount
= Index
.modulePaths().size();
674 // Collect for each module the list of function it defines (GUID -> Summary).
675 StringMap
<GVSummaryMapTy
> ModuleToDefinedGVSummaries(ModuleCount
);
676 Index
.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries
);
678 // Convert the preserved symbols set from string to GUID
679 auto GUIDPreservedSymbols
= computeGUIDPreservedSymbols(
680 PreservedSymbols
, Triple(TheModule
.getTargetTriple()));
682 addUsedSymbolToPreservedGUID(File
, GUIDPreservedSymbols
);
684 // Compute "dead" symbols, we don't want to import/export these!
685 computeDeadSymbolsInIndex(Index
, GUIDPreservedSymbols
);
687 // Generate import/export list
688 StringMap
<FunctionImporter::ImportMapTy
> ImportLists(ModuleCount
);
689 StringMap
<FunctionImporter::ExportSetTy
> ExportLists(ModuleCount
);
690 ComputeCrossModuleImport(Index
, ModuleToDefinedGVSummaries
, ImportLists
,
692 auto &ImportList
= ImportLists
[TheModule
.getModuleIdentifier()];
694 crossImportIntoModule(TheModule
, Index
, ModuleMap
, ImportList
);
698 * Compute the list of summaries needed for importing into module.
700 void ThinLTOCodeGenerator::gatherImportedSummariesForModule(
701 Module
&TheModule
, ModuleSummaryIndex
&Index
,
702 std::map
<std::string
, GVSummaryMapTy
> &ModuleToSummariesForIndex
,
703 const lto::InputFile
&File
) {
704 auto ModuleCount
= Index
.modulePaths().size();
705 auto ModuleIdentifier
= TheModule
.getModuleIdentifier();
707 // Collect for each module the list of function it defines (GUID -> Summary).
708 StringMap
<GVSummaryMapTy
> ModuleToDefinedGVSummaries(ModuleCount
);
709 Index
.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries
);
711 // Convert the preserved symbols set from string to GUID
712 auto GUIDPreservedSymbols
= computeGUIDPreservedSymbols(
713 PreservedSymbols
, Triple(TheModule
.getTargetTriple()));
715 addUsedSymbolToPreservedGUID(File
, GUIDPreservedSymbols
);
717 // Compute "dead" symbols, we don't want to import/export these!
718 computeDeadSymbolsInIndex(Index
, GUIDPreservedSymbols
);
720 // Generate import/export list
721 StringMap
<FunctionImporter::ImportMapTy
> ImportLists(ModuleCount
);
722 StringMap
<FunctionImporter::ExportSetTy
> ExportLists(ModuleCount
);
723 ComputeCrossModuleImport(Index
, ModuleToDefinedGVSummaries
, ImportLists
,
726 llvm::gatherImportedSummariesForModule(
727 ModuleIdentifier
, ModuleToDefinedGVSummaries
,
728 ImportLists
[ModuleIdentifier
], ModuleToSummariesForIndex
);
732 * Emit the list of files needed for importing into module.
734 void ThinLTOCodeGenerator::emitImports(Module
&TheModule
, StringRef OutputName
,
735 ModuleSummaryIndex
&Index
,
736 const lto::InputFile
&File
) {
737 auto ModuleCount
= Index
.modulePaths().size();
738 auto ModuleIdentifier
= TheModule
.getModuleIdentifier();
740 // Collect for each module the list of function it defines (GUID -> Summary).
741 StringMap
<GVSummaryMapTy
> ModuleToDefinedGVSummaries(ModuleCount
);
742 Index
.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries
);
744 // Convert the preserved symbols set from string to GUID
745 auto GUIDPreservedSymbols
= computeGUIDPreservedSymbols(
746 PreservedSymbols
, Triple(TheModule
.getTargetTriple()));
748 addUsedSymbolToPreservedGUID(File
, GUIDPreservedSymbols
);
750 // Compute "dead" symbols, we don't want to import/export these!
751 computeDeadSymbolsInIndex(Index
, GUIDPreservedSymbols
);
753 // Generate import/export list
754 StringMap
<FunctionImporter::ImportMapTy
> ImportLists(ModuleCount
);
755 StringMap
<FunctionImporter::ExportSetTy
> ExportLists(ModuleCount
);
756 ComputeCrossModuleImport(Index
, ModuleToDefinedGVSummaries
, ImportLists
,
759 std::map
<std::string
, GVSummaryMapTy
> ModuleToSummariesForIndex
;
760 llvm::gatherImportedSummariesForModule(
761 ModuleIdentifier
, ModuleToDefinedGVSummaries
,
762 ImportLists
[ModuleIdentifier
], ModuleToSummariesForIndex
);
765 if ((EC
= EmitImportsFiles(ModuleIdentifier
, OutputName
,
766 ModuleToSummariesForIndex
)))
767 report_fatal_error(Twine("Failed to open ") + OutputName
+
768 " to save imports lists\n");
772 * Perform internalization. Runs promote and internalization together.
773 * Index is updated to reflect linkage changes.
775 void ThinLTOCodeGenerator::internalize(Module
&TheModule
,
776 ModuleSummaryIndex
&Index
,
777 const lto::InputFile
&File
) {
778 initTMBuilder(TMBuilder
, Triple(TheModule
.getTargetTriple()));
779 auto ModuleCount
= Index
.modulePaths().size();
780 auto ModuleIdentifier
= TheModule
.getModuleIdentifier();
782 // Convert the preserved symbols set from string to GUID
783 auto GUIDPreservedSymbols
=
784 computeGUIDPreservedSymbols(PreservedSymbols
, TMBuilder
.TheTriple
);
786 addUsedSymbolToPreservedGUID(File
, GUIDPreservedSymbols
);
788 // Collect for each module the list of function it defines (GUID -> Summary).
789 StringMap
<GVSummaryMapTy
> ModuleToDefinedGVSummaries(ModuleCount
);
790 Index
.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries
);
792 // Compute "dead" symbols, we don't want to import/export these!
793 computeDeadSymbolsInIndex(Index
, GUIDPreservedSymbols
);
795 // Generate import/export list
796 StringMap
<FunctionImporter::ImportMapTy
> ImportLists(ModuleCount
);
797 StringMap
<FunctionImporter::ExportSetTy
> ExportLists(ModuleCount
);
798 ComputeCrossModuleImport(Index
, ModuleToDefinedGVSummaries
, ImportLists
,
800 auto &ExportList
= ExportLists
[ModuleIdentifier
];
802 // Be friendly and don't nuke totally the module when the client didn't
803 // supply anything to preserve.
804 if (ExportList
.empty() && GUIDPreservedSymbols
.empty())
807 DenseMap
<GlobalValue::GUID
, const GlobalValueSummary
*> PrevailingCopy
;
808 computePrevailingCopies(Index
, PrevailingCopy
);
810 // Resolve prevailing symbols
811 StringMap
<std::map
<GlobalValue::GUID
, GlobalValue::LinkageTypes
>> ResolvedODR
;
812 resolvePrevailingInIndex(Index
, ResolvedODR
, GUIDPreservedSymbols
,
815 // Promote the exported values in the index, so that they are promoted
817 internalizeAndPromoteInIndex(ExportLists
, GUIDPreservedSymbols
,
818 PrevailingCopy
, Index
);
820 promoteModule(TheModule
, Index
);
823 thinLTOResolvePrevailingInModule(
824 TheModule
, ModuleToDefinedGVSummaries
[ModuleIdentifier
]);
826 thinLTOInternalizeModule(TheModule
,
827 ModuleToDefinedGVSummaries
[ModuleIdentifier
]);
831 * Perform post-importing ThinLTO optimizations.
833 void ThinLTOCodeGenerator::optimize(Module
&TheModule
) {
834 initTMBuilder(TMBuilder
, Triple(TheModule
.getTargetTriple()));
837 optimizeModule(TheModule
, *TMBuilder
.create(), OptLevel
, Freestanding
);
840 /// Write out the generated object file, either from CacheEntryPath or from
841 /// OutputBuffer, preferring hard-link when possible.
842 /// Returns the path to the generated file in SavedObjectsDirectoryPath.
844 ThinLTOCodeGenerator::writeGeneratedObject(int count
, StringRef CacheEntryPath
,
845 const MemoryBuffer
&OutputBuffer
) {
846 auto ArchName
= TMBuilder
.TheTriple
.getArchName();
847 SmallString
<128> OutputPath(SavedObjectsDirectoryPath
);
848 llvm::sys::path::append(OutputPath
,
849 Twine(count
) + "." + ArchName
+ ".thinlto.o");
850 OutputPath
.c_str(); // Ensure the string is null terminated.
851 if (sys::fs::exists(OutputPath
))
852 sys::fs::remove(OutputPath
);
854 // We don't return a memory buffer to the linker, just a list of files.
855 if (!CacheEntryPath
.empty()) {
856 // Cache is enabled, hard-link the entry (or copy if hard-link fails).
857 auto Err
= sys::fs::create_hard_link(CacheEntryPath
, OutputPath
);
859 return OutputPath
.str();
860 // Hard linking failed, try to copy.
861 Err
= sys::fs::copy_file(CacheEntryPath
, OutputPath
);
863 return OutputPath
.str();
864 // Copy failed (could be because the CacheEntry was removed from the cache
865 // in the meantime by another process), fall back and try to write down the
866 // buffer to the output.
867 errs() << "error: can't link or copy from cached entry '" << CacheEntryPath
868 << "' to '" << OutputPath
<< "'\n";
870 // No cache entry, just write out the buffer.
872 raw_fd_ostream
OS(OutputPath
, Err
, sys::fs::OF_None
);
874 report_fatal_error("Can't open output '" + OutputPath
+ "'\n");
875 OS
<< OutputBuffer
.getBuffer();
876 return OutputPath
.str();
879 // Main entry point for the ThinLTO processing
880 void ThinLTOCodeGenerator::run() {
881 // Prepare the resulting object vector
882 assert(ProducedBinaries
.empty() && "The generator should not be reused");
883 if (SavedObjectsDirectoryPath
.empty())
884 ProducedBinaries
.resize(Modules
.size());
886 sys::fs::create_directories(SavedObjectsDirectoryPath
);
888 sys::fs::is_directory(SavedObjectsDirectoryPath
, IsDir
);
890 report_fatal_error("Unexistent dir: '" + SavedObjectsDirectoryPath
+ "'");
891 ProducedBinaryFiles
.resize(Modules
.size());
895 // Perform only parallel codegen and return.
898 for (auto &Mod
: Modules
) {
899 Pool
.async([&](int count
) {
901 Context
.setDiscardValueNames(LTODiscardValueNames
);
904 auto TheModule
= loadModuleFromInput(Mod
.get(), Context
, false,
905 /*IsImporting*/ false);
908 auto OutputBuffer
= codegenModule(*TheModule
, *TMBuilder
.create());
909 if (SavedObjectsDirectoryPath
.empty())
910 ProducedBinaries
[count
] = std::move(OutputBuffer
);
912 ProducedBinaryFiles
[count
] =
913 writeGeneratedObject(count
, "", *OutputBuffer
);
920 // Sequential linking phase
921 auto Index
= linkCombinedIndex();
923 // Save temps: index.
924 if (!SaveTempsDir
.empty()) {
925 auto SaveTempPath
= SaveTempsDir
+ "index.bc";
927 raw_fd_ostream
OS(SaveTempPath
, EC
, sys::fs::OF_None
);
929 report_fatal_error(Twine("Failed to open ") + SaveTempPath
+
930 " to save optimized bitcode\n");
931 WriteIndexToFile(*Index
, OS
);
935 // Prepare the module map.
936 auto ModuleMap
= generateModuleMap(Modules
);
937 auto ModuleCount
= Modules
.size();
939 // Collect for each module the list of function it defines (GUID -> Summary).
940 StringMap
<GVSummaryMapTy
> ModuleToDefinedGVSummaries(ModuleCount
);
941 Index
->collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries
);
943 // Convert the preserved symbols set from string to GUID, this is needed for
944 // computing the caching hash and the internalization.
945 auto GUIDPreservedSymbols
=
946 computeGUIDPreservedSymbols(PreservedSymbols
, TMBuilder
.TheTriple
);
948 // Add used symbol from inputs to the preserved symbols.
949 for (const auto &M
: Modules
)
950 addUsedSymbolToPreservedGUID(*M
, GUIDPreservedSymbols
);
952 // Compute "dead" symbols, we don't want to import/export these!
953 computeDeadSymbolsInIndex(*Index
, GUIDPreservedSymbols
);
955 // Synthesize entry counts for functions in the combined index.
956 computeSyntheticCounts(*Index
);
958 // Collect the import/export lists for all modules from the call-graph in the
960 StringMap
<FunctionImporter::ImportMapTy
> ImportLists(ModuleCount
);
961 StringMap
<FunctionImporter::ExportSetTy
> ExportLists(ModuleCount
);
962 ComputeCrossModuleImport(*Index
, ModuleToDefinedGVSummaries
, ImportLists
,
965 // We use a std::map here to be able to have a defined ordering when
966 // producing a hash for the cache entry.
967 // FIXME: we should be able to compute the caching hash for the entry based
968 // on the index, and nuke this map.
969 StringMap
<std::map
<GlobalValue::GUID
, GlobalValue::LinkageTypes
>> ResolvedODR
;
971 DenseMap
<GlobalValue::GUID
, const GlobalValueSummary
*> PrevailingCopy
;
972 computePrevailingCopies(*Index
, PrevailingCopy
);
974 // Resolve prevailing symbols, this has to be computed early because it
975 // impacts the caching.
976 resolvePrevailingInIndex(*Index
, ResolvedODR
, GUIDPreservedSymbols
,
979 // Use global summary-based analysis to identify symbols that can be
980 // internalized (because they aren't exported or preserved as per callback).
981 // Changes are made in the index, consumed in the ThinLTO backends.
982 internalizeAndPromoteInIndex(ExportLists
, GUIDPreservedSymbols
,
983 PrevailingCopy
, *Index
);
985 // Make sure that every module has an entry in the ExportLists, ImportList,
986 // GVSummary and ResolvedODR maps to enable threaded access to these maps
988 for (auto &Module
: Modules
) {
989 auto ModuleIdentifier
= Module
->getName();
990 ExportLists
[ModuleIdentifier
];
991 ImportLists
[ModuleIdentifier
];
992 ResolvedODR
[ModuleIdentifier
];
993 ModuleToDefinedGVSummaries
[ModuleIdentifier
];
996 // Compute the ordering we will process the inputs: the rough heuristic here
997 // is to sort them per size so that the largest module get schedule as soon as
998 // possible. This is purely a compile-time optimization.
999 std::vector
<int> ModulesOrdering
;
1000 ModulesOrdering
.resize(Modules
.size());
1001 std::iota(ModulesOrdering
.begin(), ModulesOrdering
.end(), 0);
1002 llvm::sort(ModulesOrdering
, [&](int LeftIndex
, int RightIndex
) {
1004 Modules
[LeftIndex
]->getSingleBitcodeModule().getBuffer().size();
1006 Modules
[RightIndex
]->getSingleBitcodeModule().getBuffer().size();
1007 return LSize
> RSize
;
1010 // Parallel optimizer + codegen
1012 ThreadPool
Pool(ThreadCount
);
1013 for (auto IndexCount
: ModulesOrdering
) {
1014 auto &Mod
= Modules
[IndexCount
];
1015 Pool
.async([&](int count
) {
1016 auto ModuleIdentifier
= Mod
->getName();
1017 auto &ExportList
= ExportLists
[ModuleIdentifier
];
1019 auto &DefinedGVSummaries
= ModuleToDefinedGVSummaries
[ModuleIdentifier
];
1021 // The module may be cached, this helps handling it.
1022 ModuleCacheEntry
CacheEntry(CacheOptions
.Path
, *Index
, ModuleIdentifier
,
1023 ImportLists
[ModuleIdentifier
], ExportList
,
1024 ResolvedODR
[ModuleIdentifier
],
1025 DefinedGVSummaries
, OptLevel
, Freestanding
,
1027 auto CacheEntryPath
= CacheEntry
.getEntryPath();
1030 auto ErrOrBuffer
= CacheEntry
.tryLoadingBuffer();
1031 LLVM_DEBUG(dbgs() << "Cache " << (ErrOrBuffer
? "hit" : "miss")
1032 << " '" << CacheEntryPath
<< "' for buffer "
1033 << count
<< " " << ModuleIdentifier
<< "\n");
1037 if (SavedObjectsDirectoryPath
.empty())
1038 ProducedBinaries
[count
] = std::move(ErrOrBuffer
.get());
1040 ProducedBinaryFiles
[count
] = writeGeneratedObject(
1041 count
, CacheEntryPath
, *ErrOrBuffer
.get());
1046 LLVMContext Context
;
1047 Context
.setDiscardValueNames(LTODiscardValueNames
);
1048 Context
.enableDebugTypeODRUniquing();
1049 auto DiagFileOrErr
= lto::setupOptimizationRemarks(
1050 Context
, RemarksFilename
, RemarksPasses
, RemarksFormat
,
1051 RemarksWithHotness
, count
);
1052 if (!DiagFileOrErr
) {
1053 errs() << "Error: " << toString(DiagFileOrErr
.takeError()) << "\n";
1054 report_fatal_error("ThinLTO: Can't get an output file for the "
1059 auto TheModule
= loadModuleFromInput(Mod
.get(), Context
, false,
1060 /*IsImporting*/ false);
1062 // Save temps: original file.
1063 saveTempBitcode(*TheModule
, SaveTempsDir
, count
, ".0.original.bc");
1065 auto &ImportList
= ImportLists
[ModuleIdentifier
];
1066 // Run the main process now, and generates a binary
1067 auto OutputBuffer
= ProcessThinLTOModule(
1068 *TheModule
, *Index
, ModuleMap
, *TMBuilder
.create(), ImportList
,
1069 ExportList
, GUIDPreservedSymbols
,
1070 ModuleToDefinedGVSummaries
[ModuleIdentifier
], CacheOptions
,
1071 DisableCodeGen
, SaveTempsDir
, Freestanding
, OptLevel
, count
);
1073 // Commit to the cache (if enabled)
1074 CacheEntry
.write(*OutputBuffer
);
1076 if (SavedObjectsDirectoryPath
.empty()) {
1077 // We need to generated a memory buffer for the linker.
1078 if (!CacheEntryPath
.empty()) {
1079 // When cache is enabled, reload from the cache if possible.
1080 // Releasing the buffer from the heap and reloading it from the
1081 // cache file with mmap helps us to lower memory pressure.
1082 // The freed memory can be used for the next input file.
1083 // The final binary link will read from the VFS cache (hopefully!)
1084 // or from disk (if the memory pressure was too high).
1085 auto ReloadedBufferOrErr
= CacheEntry
.tryLoadingBuffer();
1086 if (auto EC
= ReloadedBufferOrErr
.getError()) {
1087 // On error, keep the preexisting buffer and print a diagnostic.
1088 errs() << "error: can't reload cached file '" << CacheEntryPath
1089 << "': " << EC
.message() << "\n";
1091 OutputBuffer
= std::move(*ReloadedBufferOrErr
);
1094 ProducedBinaries
[count
] = std::move(OutputBuffer
);
1097 ProducedBinaryFiles
[count
] = writeGeneratedObject(
1098 count
, CacheEntryPath
, *OutputBuffer
);
1103 pruneCache(CacheOptions
.Path
, CacheOptions
.Policy
);
1105 // If statistics were requested, print them out now.
1106 if (llvm::AreStatisticsEnabled())
1107 llvm::PrintStatistics();
1108 reportAndResetTimings();