1 //===-LTOBackend.cpp - LLVM Link Time Optimizer Backend -------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the "backend" phase of LTO, i.e. it performs
10 // optimization and code generation on a loaded module. It is generally used
11 // internally by the LTO class but can also be used independently, for example
12 // to implement a standalone ThinLTO backend.
14 //===----------------------------------------------------------------------===//
16 #include "llvm/LTO/LTOBackend.h"
17 #include "llvm/Analysis/AliasAnalysis.h"
18 #include "llvm/Analysis/CGSCCPassManager.h"
19 #include "llvm/Analysis/TargetLibraryInfo.h"
20 #include "llvm/Analysis/TargetTransformInfo.h"
21 #include "llvm/Bitcode/BitcodeReader.h"
22 #include "llvm/Bitcode/BitcodeWriter.h"
23 #include "llvm/IR/LegacyPassManager.h"
24 #include "llvm/IR/PassManager.h"
25 #include "llvm/IR/RemarkStreamer.h"
26 #include "llvm/IR/Verifier.h"
27 #include "llvm/LTO/LTO.h"
28 #include "llvm/MC/SubtargetFeature.h"
29 #include "llvm/Object/ModuleSymbolTable.h"
30 #include "llvm/Passes/PassBuilder.h"
31 #include "llvm/Passes/StandardInstrumentations.h"
32 #include "llvm/Support/Error.h"
33 #include "llvm/Support/FileSystem.h"
34 #include "llvm/Support/MemoryBuffer.h"
35 #include "llvm/Support/Path.h"
36 #include "llvm/Support/Program.h"
37 #include "llvm/Support/TargetRegistry.h"
38 #include "llvm/Support/ThreadPool.h"
39 #include "llvm/Support/raw_ostream.h"
40 #include "llvm/Target/TargetMachine.h"
41 #include "llvm/Transforms/IPO.h"
42 #include "llvm/Transforms/IPO/PassManagerBuilder.h"
43 #include "llvm/Transforms/Scalar/LoopPassManager.h"
44 #include "llvm/Transforms/Utils/FunctionImportUtils.h"
45 #include "llvm/Transforms/Utils/SplitModule.h"
50 LLVM_ATTRIBUTE_NORETURN
static void reportOpenError(StringRef Path
, Twine Msg
) {
51 errs() << "failed to open " << Path
<< ": " << Msg
<< '\n';
56 Error
Config::addSaveTemps(std::string OutputFileName
,
57 bool UseInputModulePath
) {
58 ShouldDiscardValueNames
= false;
61 ResolutionFile
= std::make_unique
<raw_fd_ostream
>(
62 OutputFileName
+ "resolution.txt", EC
, sys::fs::OpenFlags::OF_Text
);
64 return errorCodeToError(EC
);
66 auto setHook
= [&](std::string PathSuffix
, ModuleHookFn
&Hook
) {
67 // Keep track of the hook provided by the linker, which also needs to run.
68 ModuleHookFn LinkerHook
= Hook
;
69 Hook
= [=](unsigned Task
, const Module
&M
) {
70 // If the linker's hook returned false, we need to pass that result
72 if (LinkerHook
&& !LinkerHook(Task
, M
))
75 std::string PathPrefix
;
76 // If this is the combined module (not a ThinLTO backend compile) or the
77 // user hasn't requested using the input module's path, emit to a file
78 // named from the provided OutputFileName with the Task ID appended.
79 if (M
.getModuleIdentifier() == "ld-temp.o" || !UseInputModulePath
) {
80 PathPrefix
= OutputFileName
;
81 if (Task
!= (unsigned)-1)
82 PathPrefix
+= utostr(Task
) + ".";
84 PathPrefix
= M
.getModuleIdentifier() + ".";
85 std::string Path
= PathPrefix
+ PathSuffix
+ ".bc";
87 raw_fd_ostream
OS(Path
, EC
, sys::fs::OpenFlags::OF_None
);
88 // Because -save-temps is a debugging feature, we report the error
91 reportOpenError(Path
, EC
.message());
92 WriteBitcodeToFile(M
, OS
, /*ShouldPreserveUseListOrder=*/false);
97 setHook("0.preopt", PreOptModuleHook
);
98 setHook("1.promote", PostPromoteModuleHook
);
99 setHook("2.internalize", PostInternalizeModuleHook
);
100 setHook("3.import", PostImportModuleHook
);
101 setHook("4.opt", PostOptModuleHook
);
102 setHook("5.precodegen", PreCodeGenModuleHook
);
104 CombinedIndexHook
= [=](const ModuleSummaryIndex
&Index
) {
105 std::string Path
= OutputFileName
+ "index.bc";
107 raw_fd_ostream
OS(Path
, EC
, sys::fs::OpenFlags::OF_None
);
108 // Because -save-temps is a debugging feature, we report the error
109 // directly and exit.
111 reportOpenError(Path
, EC
.message());
112 WriteIndexToFile(Index
, OS
);
114 Path
= OutputFileName
+ "index.dot";
115 raw_fd_ostream
OSDot(Path
, EC
, sys::fs::OpenFlags::OF_None
);
117 reportOpenError(Path
, EC
.message());
118 Index
.exportToDot(OSDot
);
122 return Error::success();
127 std::unique_ptr
<TargetMachine
>
128 createTargetMachine(Config
&Conf
, const Target
*TheTarget
, Module
&M
) {
129 StringRef TheTriple
= M
.getTargetTriple();
130 SubtargetFeatures Features
;
131 Features
.getDefaultSubtargetFeatures(Triple(TheTriple
));
132 for (const std::string
&A
: Conf
.MAttrs
)
133 Features
.AddFeature(A
);
135 Reloc::Model RelocModel
;
137 RelocModel
= *Conf
.RelocModel
;
140 M
.getPICLevel() == PICLevel::NotPIC
? Reloc::Static
: Reloc::PIC_
;
142 Optional
<CodeModel::Model
> CodeModel
;
144 CodeModel
= *Conf
.CodeModel
;
146 CodeModel
= M
.getCodeModel();
148 return std::unique_ptr
<TargetMachine
>(TheTarget
->createTargetMachine(
149 TheTriple
, Conf
.CPU
, Features
.getString(), Conf
.Options
, RelocModel
,
150 CodeModel
, Conf
.CGOptLevel
));
153 static void runNewPMPasses(Config
&Conf
, Module
&Mod
, TargetMachine
*TM
,
154 unsigned OptLevel
, bool IsThinLTO
,
155 ModuleSummaryIndex
*ExportSummary
,
156 const ModuleSummaryIndex
*ImportSummary
) {
157 Optional
<PGOOptions
> PGOOpt
;
158 if (!Conf
.SampleProfile
.empty())
159 PGOOpt
= PGOOptions(Conf
.SampleProfile
, "", Conf
.ProfileRemapping
,
160 PGOOptions::SampleUse
, PGOOptions::NoCSAction
, true);
161 else if (Conf
.RunCSIRInstr
) {
162 PGOOpt
= PGOOptions("", Conf
.CSIRProfile
, Conf
.ProfileRemapping
,
163 PGOOptions::IRUse
, PGOOptions::CSIRInstr
);
164 } else if (!Conf
.CSIRProfile
.empty()) {
165 PGOOpt
= PGOOptions(Conf
.CSIRProfile
, "", Conf
.ProfileRemapping
,
166 PGOOptions::IRUse
, PGOOptions::CSIRUse
);
169 PassInstrumentationCallbacks PIC
;
170 StandardInstrumentations SI
;
171 SI
.registerCallbacks(PIC
);
172 PassBuilder
PB(TM
, PipelineTuningOptions(),PGOOpt
, &PIC
);
175 // Parse a custom AA pipeline if asked to.
176 if (auto Err
= PB
.parseAAPipeline(AA
, "default"))
177 report_fatal_error("Error parsing default AA pipeline");
179 LoopAnalysisManager
LAM(Conf
.DebugPassManager
);
180 FunctionAnalysisManager
FAM(Conf
.DebugPassManager
);
181 CGSCCAnalysisManager
CGAM(Conf
.DebugPassManager
);
182 ModuleAnalysisManager
MAM(Conf
.DebugPassManager
);
184 // Register the AA manager first so that our version is the one used.
185 FAM
.registerPass([&] { return std::move(AA
); });
187 // Register all the basic analyses with the managers.
188 PB
.registerModuleAnalyses(MAM
);
189 PB
.registerCGSCCAnalyses(CGAM
);
190 PB
.registerFunctionAnalyses(FAM
);
191 PB
.registerLoopAnalyses(LAM
);
192 PB
.crossRegisterProxies(LAM
, FAM
, CGAM
, MAM
);
194 ModulePassManager
MPM(Conf
.DebugPassManager
);
195 // FIXME (davide): verify the input.
197 PassBuilder::OptimizationLevel OL
;
201 llvm_unreachable("Invalid optimization level");
203 OL
= PassBuilder::O0
;
206 OL
= PassBuilder::O1
;
209 OL
= PassBuilder::O2
;
212 OL
= PassBuilder::O3
;
217 MPM
= PB
.buildThinLTODefaultPipeline(OL
, Conf
.DebugPassManager
,
220 MPM
= PB
.buildLTODefaultPipeline(OL
, Conf
.DebugPassManager
, ExportSummary
);
223 // FIXME (davide): verify the output.
226 static void runNewPMCustomPasses(Module
&Mod
, TargetMachine
*TM
,
227 std::string PipelineDesc
,
228 std::string AAPipelineDesc
,
229 bool DisableVerify
) {
233 // Parse a custom AA pipeline if asked to.
234 if (!AAPipelineDesc
.empty())
235 if (auto Err
= PB
.parseAAPipeline(AA
, AAPipelineDesc
))
236 report_fatal_error("unable to parse AA pipeline description '" +
237 AAPipelineDesc
+ "': " + toString(std::move(Err
)));
239 LoopAnalysisManager LAM
;
240 FunctionAnalysisManager FAM
;
241 CGSCCAnalysisManager CGAM
;
242 ModuleAnalysisManager MAM
;
244 // Register the AA manager first so that our version is the one used.
245 FAM
.registerPass([&] { return std::move(AA
); });
247 // Register all the basic analyses with the managers.
248 PB
.registerModuleAnalyses(MAM
);
249 PB
.registerCGSCCAnalyses(CGAM
);
250 PB
.registerFunctionAnalyses(FAM
);
251 PB
.registerLoopAnalyses(LAM
);
252 PB
.crossRegisterProxies(LAM
, FAM
, CGAM
, MAM
);
254 ModulePassManager MPM
;
256 // Always verify the input.
257 MPM
.addPass(VerifierPass());
259 // Now, add all the passes we've been requested to.
260 if (auto Err
= PB
.parsePassPipeline(MPM
, PipelineDesc
))
261 report_fatal_error("unable to parse pass pipeline description '" +
262 PipelineDesc
+ "': " + toString(std::move(Err
)));
265 MPM
.addPass(VerifierPass());
269 static void runOldPMPasses(Config
&Conf
, Module
&Mod
, TargetMachine
*TM
,
270 bool IsThinLTO
, ModuleSummaryIndex
*ExportSummary
,
271 const ModuleSummaryIndex
*ImportSummary
) {
272 legacy::PassManager passes
;
273 passes
.add(createTargetTransformInfoWrapperPass(TM
->getTargetIRAnalysis()));
275 PassManagerBuilder PMB
;
276 PMB
.LibraryInfo
= new TargetLibraryInfoImpl(Triple(TM
->getTargetTriple()));
277 PMB
.Inliner
= createFunctionInliningPass();
278 PMB
.ExportSummary
= ExportSummary
;
279 PMB
.ImportSummary
= ImportSummary
;
280 // Unconditionally verify input since it is not verified before this
281 // point and has unknown origin.
282 PMB
.VerifyInput
= true;
283 PMB
.VerifyOutput
= !Conf
.DisableVerify
;
284 PMB
.LoopVectorize
= true;
285 PMB
.SLPVectorize
= true;
286 PMB
.OptLevel
= Conf
.OptLevel
;
287 PMB
.PGOSampleUse
= Conf
.SampleProfile
;
288 PMB
.EnablePGOCSInstrGen
= Conf
.RunCSIRInstr
;
289 if (!Conf
.RunCSIRInstr
&& !Conf
.CSIRProfile
.empty()) {
290 PMB
.EnablePGOCSInstrUse
= true;
291 PMB
.PGOInstrUse
= Conf
.CSIRProfile
;
294 PMB
.populateThinLTOPassManager(passes
);
296 PMB
.populateLTOPassManager(passes
);
300 bool opt(Config
&Conf
, TargetMachine
*TM
, unsigned Task
, Module
&Mod
,
301 bool IsThinLTO
, ModuleSummaryIndex
*ExportSummary
,
302 const ModuleSummaryIndex
*ImportSummary
) {
303 // FIXME: Plumb the combined index into the new pass manager.
304 if (!Conf
.OptPipeline
.empty())
305 runNewPMCustomPasses(Mod
, TM
, Conf
.OptPipeline
, Conf
.AAPipeline
,
307 else if (Conf
.UseNewPM
)
308 runNewPMPasses(Conf
, Mod
, TM
, Conf
.OptLevel
, IsThinLTO
, ExportSummary
,
311 runOldPMPasses(Conf
, Mod
, TM
, IsThinLTO
, ExportSummary
, ImportSummary
);
312 return !Conf
.PostOptModuleHook
|| Conf
.PostOptModuleHook(Task
, Mod
);
315 void codegen(Config
&Conf
, TargetMachine
*TM
, AddStreamFn AddStream
,
316 unsigned Task
, Module
&Mod
) {
317 if (Conf
.PreCodeGenModuleHook
&& !Conf
.PreCodeGenModuleHook(Task
, Mod
))
320 std::unique_ptr
<ToolOutputFile
> DwoOut
;
321 SmallString
<1024> DwoFile(Conf
.SplitDwarfOutput
);
322 if (!Conf
.DwoDir
.empty()) {
324 if (auto EC
= llvm::sys::fs::create_directories(Conf
.DwoDir
))
325 report_fatal_error("Failed to create directory " + Conf
.DwoDir
+ ": " +
328 DwoFile
= Conf
.DwoDir
;
329 sys::path::append(DwoFile
, std::to_string(Task
) + ".dwo");
330 TM
->Options
.MCOptions
.SplitDwarfFile
= DwoFile
.str().str();
332 TM
->Options
.MCOptions
.SplitDwarfFile
= Conf
.SplitDwarfFile
;
334 if (!DwoFile
.empty()) {
336 DwoOut
= std::make_unique
<ToolOutputFile
>(DwoFile
, EC
, sys::fs::OF_None
);
338 report_fatal_error("Failed to open " + DwoFile
+ ": " + EC
.message());
341 auto Stream
= AddStream(Task
);
342 legacy::PassManager CodeGenPasses
;
343 if (TM
->addPassesToEmitFile(CodeGenPasses
, *Stream
->OS
,
344 DwoOut
? &DwoOut
->os() : nullptr,
346 report_fatal_error("Failed to setup codegen");
347 CodeGenPasses
.run(Mod
);
353 void splitCodeGen(Config
&C
, TargetMachine
*TM
, AddStreamFn AddStream
,
354 unsigned ParallelCodeGenParallelismLevel
,
355 std::unique_ptr
<Module
> Mod
) {
356 ThreadPool
CodegenThreadPool(ParallelCodeGenParallelismLevel
);
357 unsigned ThreadCount
= 0;
358 const Target
*T
= &TM
->getTarget();
361 std::move(Mod
), ParallelCodeGenParallelismLevel
,
362 [&](std::unique_ptr
<Module
> MPart
) {
363 // We want to clone the module in a new context to multi-thread the
364 // codegen. We do it by serializing partition modules to bitcode
365 // (while still on the main thread, in order to avoid data races) and
366 // spinning up new threads which deserialize the partitions into
367 // separate contexts.
368 // FIXME: Provide a more direct way to do this in LLVM.
370 raw_svector_ostream
BCOS(BC
);
371 WriteBitcodeToFile(*MPart
, BCOS
);
374 CodegenThreadPool
.async(
375 [&](const SmallString
<0> &BC
, unsigned ThreadId
) {
376 LTOLLVMContext
Ctx(C
);
377 Expected
<std::unique_ptr
<Module
>> MOrErr
= parseBitcodeFile(
378 MemoryBufferRef(StringRef(BC
.data(), BC
.size()), "ld-temp.o"),
381 report_fatal_error("Failed to read bitcode");
382 std::unique_ptr
<Module
> MPartInCtx
= std::move(MOrErr
.get());
384 std::unique_ptr
<TargetMachine
> TM
=
385 createTargetMachine(C
, T
, *MPartInCtx
);
387 codegen(C
, TM
.get(), AddStream
, ThreadId
, *MPartInCtx
);
389 // Pass BC using std::move to ensure that it get moved rather than
390 // copied into the thread's context.
391 std::move(BC
), ThreadCount
++);
395 // Because the inner lambda (which runs in a worker thread) captures our local
396 // variables, we need to wait for the worker threads to terminate before we
397 // can leave the function scope.
398 CodegenThreadPool
.wait();
401 Expected
<const Target
*> initAndLookupTarget(Config
&C
, Module
&Mod
) {
402 if (!C
.OverrideTriple
.empty())
403 Mod
.setTargetTriple(C
.OverrideTriple
);
404 else if (Mod
.getTargetTriple().empty())
405 Mod
.setTargetTriple(C
.DefaultTriple
);
408 const Target
*T
= TargetRegistry::lookupTarget(Mod
.getTargetTriple(), Msg
);
410 return make_error
<StringError
>(Msg
, inconvertibleErrorCode());
417 finalizeOptimizationRemarks(std::unique_ptr
<ToolOutputFile
> DiagOutputFile
) {
418 // Make sure we flush the diagnostic remarks file in case the linker doesn't
419 // call the global destructors before exiting.
421 return Error::success();
422 DiagOutputFile
->keep();
423 DiagOutputFile
->os().flush();
424 return Error::success();
427 Error
lto::backend(Config
&C
, AddStreamFn AddStream
,
428 unsigned ParallelCodeGenParallelismLevel
,
429 std::unique_ptr
<Module
> Mod
,
430 ModuleSummaryIndex
&CombinedIndex
) {
431 Expected
<const Target
*> TOrErr
= initAndLookupTarget(C
, *Mod
);
433 return TOrErr
.takeError();
435 std::unique_ptr
<TargetMachine
> TM
= createTargetMachine(C
, *TOrErr
, *Mod
);
437 // Setup optimization remarks.
438 auto DiagFileOrErr
= lto::setupOptimizationRemarks(
439 Mod
->getContext(), C
.RemarksFilename
, C
.RemarksPasses
, C
.RemarksFormat
,
440 C
.RemarksWithHotness
);
442 return DiagFileOrErr
.takeError();
443 auto DiagnosticOutputFile
= std::move(*DiagFileOrErr
);
445 if (!C
.CodeGenOnly
) {
446 if (!opt(C
, TM
.get(), 0, *Mod
, /*IsThinLTO=*/false,
447 /*ExportSummary=*/&CombinedIndex
, /*ImportSummary=*/nullptr))
448 return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile
));
451 if (ParallelCodeGenParallelismLevel
== 1) {
452 codegen(C
, TM
.get(), AddStream
, 0, *Mod
);
454 splitCodeGen(C
, TM
.get(), AddStream
, ParallelCodeGenParallelismLevel
,
457 return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile
));
460 static void dropDeadSymbols(Module
&Mod
, const GVSummaryMapTy
&DefinedGlobals
,
461 const ModuleSummaryIndex
&Index
) {
462 std::vector
<GlobalValue
*> DeadGVs
;
463 for (auto &GV
: Mod
.global_values())
464 if (GlobalValueSummary
*GVS
= DefinedGlobals
.lookup(GV
.getGUID()))
465 if (!Index
.isGlobalValueLive(GVS
)) {
466 DeadGVs
.push_back(&GV
);
467 convertToDeclaration(GV
);
470 // Now that all dead bodies have been dropped, delete the actual objects
471 // themselves when possible.
472 for (GlobalValue
*GV
: DeadGVs
) {
473 GV
->removeDeadConstantUsers();
474 // Might reference something defined in native object (i.e. dropped a
475 // non-prevailing IR def, but we need to keep the declaration).
477 GV
->eraseFromParent();
481 Error
lto::thinBackend(Config
&Conf
, unsigned Task
, AddStreamFn AddStream
,
482 Module
&Mod
, const ModuleSummaryIndex
&CombinedIndex
,
483 const FunctionImporter::ImportMapTy
&ImportList
,
484 const GVSummaryMapTy
&DefinedGlobals
,
485 MapVector
<StringRef
, BitcodeModule
> &ModuleMap
) {
486 Expected
<const Target
*> TOrErr
= initAndLookupTarget(Conf
, Mod
);
488 return TOrErr
.takeError();
490 std::unique_ptr
<TargetMachine
> TM
= createTargetMachine(Conf
, *TOrErr
, Mod
);
492 // Setup optimization remarks.
493 auto DiagFileOrErr
= lto::setupOptimizationRemarks(
494 Mod
.getContext(), Conf
.RemarksFilename
, Conf
.RemarksPasses
,
495 Conf
.RemarksFormat
, Conf
.RemarksWithHotness
, Task
);
497 return DiagFileOrErr
.takeError();
498 auto DiagnosticOutputFile
= std::move(*DiagFileOrErr
);
500 if (Conf
.CodeGenOnly
) {
501 codegen(Conf
, TM
.get(), AddStream
, Task
, Mod
);
502 return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile
));
505 if (Conf
.PreOptModuleHook
&& !Conf
.PreOptModuleHook(Task
, Mod
))
506 return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile
));
508 renameModuleForThinLTO(Mod
, CombinedIndex
);
510 dropDeadSymbols(Mod
, DefinedGlobals
, CombinedIndex
);
512 thinLTOResolvePrevailingInModule(Mod
, DefinedGlobals
);
514 if (Conf
.PostPromoteModuleHook
&& !Conf
.PostPromoteModuleHook(Task
, Mod
))
515 return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile
));
517 if (!DefinedGlobals
.empty())
518 thinLTOInternalizeModule(Mod
, DefinedGlobals
);
520 if (Conf
.PostInternalizeModuleHook
&&
521 !Conf
.PostInternalizeModuleHook(Task
, Mod
))
522 return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile
));
524 auto ModuleLoader
= [&](StringRef Identifier
) {
525 assert(Mod
.getContext().isODRUniquingDebugTypes() &&
526 "ODR Type uniquing should be enabled on the context");
527 auto I
= ModuleMap
.find(Identifier
);
528 assert(I
!= ModuleMap
.end());
529 return I
->second
.getLazyModule(Mod
.getContext(),
530 /*ShouldLazyLoadMetadata=*/true,
531 /*IsImporting*/ true);
534 FunctionImporter
Importer(CombinedIndex
, ModuleLoader
);
535 if (Error Err
= Importer
.importFunctions(Mod
, ImportList
).takeError())
538 if (Conf
.PostImportModuleHook
&& !Conf
.PostImportModuleHook(Task
, Mod
))
539 return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile
));
541 if (!opt(Conf
, TM
.get(), Task
, Mod
, /*IsThinLTO=*/true,
542 /*ExportSummary=*/nullptr, /*ImportSummary=*/&CombinedIndex
))
543 return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile
));
545 codegen(Conf
, TM
.get(), AddStream
, Task
, Mod
);
546 return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile
));