[llvm-exegesis] Fix missing std::move.
[llvm-complete.git] / lib / LTO / LTOBackend.cpp
blob20fc40de4b958187b5976b8677f2db0e917cf753
1 //===-LTOBackend.cpp - LLVM Link Time Optimizer Backend -------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the "backend" phase of LTO, i.e. it performs
11 // optimization and code generation on a loaded module. It is generally used
12 // internally by the LTO class but can also be used independently, for example
13 // to implement a standalone ThinLTO backend.
15 //===----------------------------------------------------------------------===//
17 #include "llvm/LTO/LTOBackend.h"
18 #include "llvm/Analysis/AliasAnalysis.h"
19 #include "llvm/Analysis/CGSCCPassManager.h"
20 #include "llvm/Analysis/TargetLibraryInfo.h"
21 #include "llvm/Analysis/TargetTransformInfo.h"
22 #include "llvm/Bitcode/BitcodeReader.h"
23 #include "llvm/Bitcode/BitcodeWriter.h"
24 #include "llvm/IR/LegacyPassManager.h"
25 #include "llvm/IR/PassManager.h"
26 #include "llvm/IR/Verifier.h"
27 #include "llvm/LTO/LTO.h"
28 #include "llvm/MC/SubtargetFeature.h"
29 #include "llvm/Object/ModuleSymbolTable.h"
30 #include "llvm/Passes/PassBuilder.h"
31 #include "llvm/Support/Error.h"
32 #include "llvm/Support/FileSystem.h"
33 #include "llvm/Support/MemoryBuffer.h"
34 #include "llvm/Support/Path.h"
35 #include "llvm/Support/Program.h"
36 #include "llvm/Support/raw_ostream.h"
37 #include "llvm/Support/TargetRegistry.h"
38 #include "llvm/Support/ThreadPool.h"
39 #include "llvm/Target/TargetMachine.h"
40 #include "llvm/Transforms/IPO.h"
41 #include "llvm/Transforms/IPO/PassManagerBuilder.h"
42 #include "llvm/Transforms/Scalar/LoopPassManager.h"
43 #include "llvm/Transforms/Utils/FunctionImportUtils.h"
44 #include "llvm/Transforms/Utils/SplitModule.h"
46 using namespace llvm;
47 using namespace lto;
49 LLVM_ATTRIBUTE_NORETURN static void reportOpenError(StringRef Path, Twine Msg) {
50 errs() << "failed to open " << Path << ": " << Msg << '\n';
51 errs().flush();
52 exit(1);
55 Error Config::addSaveTemps(std::string OutputFileName,
56 bool UseInputModulePath) {
57 ShouldDiscardValueNames = false;
59 std::error_code EC;
60 ResolutionFile = llvm::make_unique<raw_fd_ostream>(
61 OutputFileName + "resolution.txt", EC, sys::fs::OpenFlags::F_Text);
62 if (EC)
63 return errorCodeToError(EC);
65 auto setHook = [&](std::string PathSuffix, ModuleHookFn &Hook) {
66 // Keep track of the hook provided by the linker, which also needs to run.
67 ModuleHookFn LinkerHook = Hook;
68 Hook = [=](unsigned Task, const Module &M) {
69 // If the linker's hook returned false, we need to pass that result
70 // through.
71 if (LinkerHook && !LinkerHook(Task, M))
72 return false;
74 std::string PathPrefix;
75 // If this is the combined module (not a ThinLTO backend compile) or the
76 // user hasn't requested using the input module's path, emit to a file
77 // named from the provided OutputFileName with the Task ID appended.
78 if (M.getModuleIdentifier() == "ld-temp.o" || !UseInputModulePath) {
79 PathPrefix = OutputFileName;
80 if (Task != (unsigned)-1)
81 PathPrefix += utostr(Task) + ".";
82 } else
83 PathPrefix = M.getModuleIdentifier() + ".";
84 std::string Path = PathPrefix + PathSuffix + ".bc";
85 std::error_code EC;
86 raw_fd_ostream OS(Path, EC, sys::fs::OpenFlags::F_None);
87 // Because -save-temps is a debugging feature, we report the error
88 // directly and exit.
89 if (EC)
90 reportOpenError(Path, EC.message());
91 WriteBitcodeToFile(M, OS, /*ShouldPreserveUseListOrder=*/false);
92 return true;
96 setHook("0.preopt", PreOptModuleHook);
97 setHook("1.promote", PostPromoteModuleHook);
98 setHook("2.internalize", PostInternalizeModuleHook);
99 setHook("3.import", PostImportModuleHook);
100 setHook("4.opt", PostOptModuleHook);
101 setHook("5.precodegen", PreCodeGenModuleHook);
103 CombinedIndexHook = [=](const ModuleSummaryIndex &Index) {
104 std::string Path = OutputFileName + "index.bc";
105 std::error_code EC;
106 raw_fd_ostream OS(Path, EC, sys::fs::OpenFlags::F_None);
107 // Because -save-temps is a debugging feature, we report the error
108 // directly and exit.
109 if (EC)
110 reportOpenError(Path, EC.message());
111 WriteIndexToFile(Index, OS);
113 Path = OutputFileName + "index.dot";
114 raw_fd_ostream OSDot(Path, EC, sys::fs::OpenFlags::F_None);
115 if (EC)
116 reportOpenError(Path, EC.message());
117 Index.exportToDot(OSDot);
118 return true;
121 return Error::success();
124 namespace {
126 std::unique_ptr<TargetMachine>
127 createTargetMachine(Config &Conf, const Target *TheTarget, Module &M) {
128 StringRef TheTriple = M.getTargetTriple();
129 SubtargetFeatures Features;
130 Features.getDefaultSubtargetFeatures(Triple(TheTriple));
131 for (const std::string &A : Conf.MAttrs)
132 Features.AddFeature(A);
134 Reloc::Model RelocModel;
135 if (Conf.RelocModel)
136 RelocModel = *Conf.RelocModel;
137 else
138 RelocModel =
139 M.getPICLevel() == PICLevel::NotPIC ? Reloc::Static : Reloc::PIC_;
141 Optional<CodeModel::Model> CodeModel;
142 if (Conf.CodeModel)
143 CodeModel = *Conf.CodeModel;
144 else
145 CodeModel = M.getCodeModel();
147 return std::unique_ptr<TargetMachine>(TheTarget->createTargetMachine(
148 TheTriple, Conf.CPU, Features.getString(), Conf.Options, RelocModel,
149 CodeModel, Conf.CGOptLevel));
152 static void runNewPMPasses(Config &Conf, Module &Mod, TargetMachine *TM,
153 unsigned OptLevel, bool IsThinLTO,
154 ModuleSummaryIndex *ExportSummary,
155 const ModuleSummaryIndex *ImportSummary) {
156 Optional<PGOOptions> PGOOpt;
157 if (!Conf.SampleProfile.empty())
158 PGOOpt = PGOOptions("", "", Conf.SampleProfile, Conf.ProfileRemapping,
159 false, true);
161 PassBuilder PB(TM, PGOOpt);
162 AAManager AA;
164 // Parse a custom AA pipeline if asked to.
165 if (!PB.parseAAPipeline(AA, "default"))
166 report_fatal_error("Error parsing default AA pipeline");
168 LoopAnalysisManager LAM(Conf.DebugPassManager);
169 FunctionAnalysisManager FAM(Conf.DebugPassManager);
170 CGSCCAnalysisManager CGAM(Conf.DebugPassManager);
171 ModuleAnalysisManager MAM(Conf.DebugPassManager);
173 // Register the AA manager first so that our version is the one used.
174 FAM.registerPass([&] { return std::move(AA); });
176 // Register all the basic analyses with the managers.
177 PB.registerModuleAnalyses(MAM);
178 PB.registerCGSCCAnalyses(CGAM);
179 PB.registerFunctionAnalyses(FAM);
180 PB.registerLoopAnalyses(LAM);
181 PB.crossRegisterProxies(LAM, FAM, CGAM, MAM);
183 ModulePassManager MPM(Conf.DebugPassManager);
184 // FIXME (davide): verify the input.
186 PassBuilder::OptimizationLevel OL;
188 switch (OptLevel) {
189 default:
190 llvm_unreachable("Invalid optimization level");
191 case 0:
192 OL = PassBuilder::O0;
193 break;
194 case 1:
195 OL = PassBuilder::O1;
196 break;
197 case 2:
198 OL = PassBuilder::O2;
199 break;
200 case 3:
201 OL = PassBuilder::O3;
202 break;
205 if (IsThinLTO)
206 MPM = PB.buildThinLTODefaultPipeline(OL, Conf.DebugPassManager,
207 ImportSummary);
208 else
209 MPM = PB.buildLTODefaultPipeline(OL, Conf.DebugPassManager, ExportSummary);
210 MPM.run(Mod, MAM);
212 // FIXME (davide): verify the output.
215 static void runNewPMCustomPasses(Module &Mod, TargetMachine *TM,
216 std::string PipelineDesc,
217 std::string AAPipelineDesc,
218 bool DisableVerify) {
219 PassBuilder PB(TM);
220 AAManager AA;
222 // Parse a custom AA pipeline if asked to.
223 if (!AAPipelineDesc.empty())
224 if (!PB.parseAAPipeline(AA, AAPipelineDesc))
225 report_fatal_error("unable to parse AA pipeline description: " +
226 AAPipelineDesc);
228 LoopAnalysisManager LAM;
229 FunctionAnalysisManager FAM;
230 CGSCCAnalysisManager CGAM;
231 ModuleAnalysisManager MAM;
233 // Register the AA manager first so that our version is the one used.
234 FAM.registerPass([&] { return std::move(AA); });
236 // Register all the basic analyses with the managers.
237 PB.registerModuleAnalyses(MAM);
238 PB.registerCGSCCAnalyses(CGAM);
239 PB.registerFunctionAnalyses(FAM);
240 PB.registerLoopAnalyses(LAM);
241 PB.crossRegisterProxies(LAM, FAM, CGAM, MAM);
243 ModulePassManager MPM;
245 // Always verify the input.
246 MPM.addPass(VerifierPass());
248 // Now, add all the passes we've been requested to.
249 if (!PB.parsePassPipeline(MPM, PipelineDesc))
250 report_fatal_error("unable to parse pass pipeline description: " +
251 PipelineDesc);
253 if (!DisableVerify)
254 MPM.addPass(VerifierPass());
255 MPM.run(Mod, MAM);
258 static void runOldPMPasses(Config &Conf, Module &Mod, TargetMachine *TM,
259 bool IsThinLTO, ModuleSummaryIndex *ExportSummary,
260 const ModuleSummaryIndex *ImportSummary) {
261 legacy::PassManager passes;
262 passes.add(createTargetTransformInfoWrapperPass(TM->getTargetIRAnalysis()));
264 PassManagerBuilder PMB;
265 PMB.LibraryInfo = new TargetLibraryInfoImpl(Triple(TM->getTargetTriple()));
266 PMB.Inliner = createFunctionInliningPass();
267 PMB.ExportSummary = ExportSummary;
268 PMB.ImportSummary = ImportSummary;
269 // Unconditionally verify input since it is not verified before this
270 // point and has unknown origin.
271 PMB.VerifyInput = true;
272 PMB.VerifyOutput = !Conf.DisableVerify;
273 PMB.LoopVectorize = true;
274 PMB.SLPVectorize = true;
275 PMB.OptLevel = Conf.OptLevel;
276 PMB.PGOSampleUse = Conf.SampleProfile;
277 if (IsThinLTO)
278 PMB.populateThinLTOPassManager(passes);
279 else
280 PMB.populateLTOPassManager(passes);
281 passes.run(Mod);
284 bool opt(Config &Conf, TargetMachine *TM, unsigned Task, Module &Mod,
285 bool IsThinLTO, ModuleSummaryIndex *ExportSummary,
286 const ModuleSummaryIndex *ImportSummary) {
287 // FIXME: Plumb the combined index into the new pass manager.
288 if (!Conf.OptPipeline.empty())
289 runNewPMCustomPasses(Mod, TM, Conf.OptPipeline, Conf.AAPipeline,
290 Conf.DisableVerify);
291 else if (Conf.UseNewPM)
292 runNewPMPasses(Conf, Mod, TM, Conf.OptLevel, IsThinLTO, ExportSummary,
293 ImportSummary);
294 else
295 runOldPMPasses(Conf, Mod, TM, IsThinLTO, ExportSummary, ImportSummary);
296 return !Conf.PostOptModuleHook || Conf.PostOptModuleHook(Task, Mod);
299 void codegen(Config &Conf, TargetMachine *TM, AddStreamFn AddStream,
300 unsigned Task, Module &Mod) {
301 if (Conf.PreCodeGenModuleHook && !Conf.PreCodeGenModuleHook(Task, Mod))
302 return;
304 std::unique_ptr<ToolOutputFile> DwoOut;
305 SmallString<1024> DwoFile(Conf.DwoPath);
306 if (!Conf.DwoDir.empty()) {
307 std::error_code EC;
308 if (auto EC = llvm::sys::fs::create_directories(Conf.DwoDir))
309 report_fatal_error("Failed to create directory " + Conf.DwoDir + ": " +
310 EC.message());
312 DwoFile = Conf.DwoDir;
313 sys::path::append(DwoFile, std::to_string(Task) + ".dwo");
316 if (!DwoFile.empty()) {
317 std::error_code EC;
318 TM->Options.MCOptions.SplitDwarfFile = DwoFile.str().str();
319 DwoOut = llvm::make_unique<ToolOutputFile>(DwoFile, EC, sys::fs::F_None);
320 if (EC)
321 report_fatal_error("Failed to open " + DwoFile + ": " + EC.message());
324 auto Stream = AddStream(Task);
325 legacy::PassManager CodeGenPasses;
326 if (TM->addPassesToEmitFile(CodeGenPasses, *Stream->OS,
327 DwoOut ? &DwoOut->os() : nullptr,
328 Conf.CGFileType))
329 report_fatal_error("Failed to setup codegen");
330 CodeGenPasses.run(Mod);
332 if (DwoOut)
333 DwoOut->keep();
336 void splitCodeGen(Config &C, TargetMachine *TM, AddStreamFn AddStream,
337 unsigned ParallelCodeGenParallelismLevel,
338 std::unique_ptr<Module> Mod) {
339 ThreadPool CodegenThreadPool(ParallelCodeGenParallelismLevel);
340 unsigned ThreadCount = 0;
341 const Target *T = &TM->getTarget();
343 SplitModule(
344 std::move(Mod), ParallelCodeGenParallelismLevel,
345 [&](std::unique_ptr<Module> MPart) {
346 // We want to clone the module in a new context to multi-thread the
347 // codegen. We do it by serializing partition modules to bitcode
348 // (while still on the main thread, in order to avoid data races) and
349 // spinning up new threads which deserialize the partitions into
350 // separate contexts.
351 // FIXME: Provide a more direct way to do this in LLVM.
352 SmallString<0> BC;
353 raw_svector_ostream BCOS(BC);
354 WriteBitcodeToFile(*MPart, BCOS);
356 // Enqueue the task
357 CodegenThreadPool.async(
358 [&](const SmallString<0> &BC, unsigned ThreadId) {
359 LTOLLVMContext Ctx(C);
360 Expected<std::unique_ptr<Module>> MOrErr = parseBitcodeFile(
361 MemoryBufferRef(StringRef(BC.data(), BC.size()), "ld-temp.o"),
362 Ctx);
363 if (!MOrErr)
364 report_fatal_error("Failed to read bitcode");
365 std::unique_ptr<Module> MPartInCtx = std::move(MOrErr.get());
367 std::unique_ptr<TargetMachine> TM =
368 createTargetMachine(C, T, *MPartInCtx);
370 codegen(C, TM.get(), AddStream, ThreadId, *MPartInCtx);
372 // Pass BC using std::move to ensure that it get moved rather than
373 // copied into the thread's context.
374 std::move(BC), ThreadCount++);
376 false);
378 // Because the inner lambda (which runs in a worker thread) captures our local
379 // variables, we need to wait for the worker threads to terminate before we
380 // can leave the function scope.
381 CodegenThreadPool.wait();
384 Expected<const Target *> initAndLookupTarget(Config &C, Module &Mod) {
385 if (!C.OverrideTriple.empty())
386 Mod.setTargetTriple(C.OverrideTriple);
387 else if (Mod.getTargetTriple().empty())
388 Mod.setTargetTriple(C.DefaultTriple);
390 std::string Msg;
391 const Target *T = TargetRegistry::lookupTarget(Mod.getTargetTriple(), Msg);
392 if (!T)
393 return make_error<StringError>(Msg, inconvertibleErrorCode());
394 return T;
399 static Error
400 finalizeOptimizationRemarks(std::unique_ptr<ToolOutputFile> DiagOutputFile) {
401 // Make sure we flush the diagnostic remarks file in case the linker doesn't
402 // call the global destructors before exiting.
403 if (!DiagOutputFile)
404 return Error::success();
405 DiagOutputFile->keep();
406 DiagOutputFile->os().flush();
407 return Error::success();
410 Error lto::backend(Config &C, AddStreamFn AddStream,
411 unsigned ParallelCodeGenParallelismLevel,
412 std::unique_ptr<Module> Mod,
413 ModuleSummaryIndex &CombinedIndex) {
414 Expected<const Target *> TOrErr = initAndLookupTarget(C, *Mod);
415 if (!TOrErr)
416 return TOrErr.takeError();
418 std::unique_ptr<TargetMachine> TM = createTargetMachine(C, *TOrErr, *Mod);
420 // Setup optimization remarks.
421 auto DiagFileOrErr = lto::setupOptimizationRemarks(
422 Mod->getContext(), C.RemarksFilename, C.RemarksWithHotness);
423 if (!DiagFileOrErr)
424 return DiagFileOrErr.takeError();
425 auto DiagnosticOutputFile = std::move(*DiagFileOrErr);
427 if (!C.CodeGenOnly) {
428 if (!opt(C, TM.get(), 0, *Mod, /*IsThinLTO=*/false,
429 /*ExportSummary=*/&CombinedIndex, /*ImportSummary=*/nullptr))
430 return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
433 if (ParallelCodeGenParallelismLevel == 1) {
434 codegen(C, TM.get(), AddStream, 0, *Mod);
435 } else {
436 splitCodeGen(C, TM.get(), AddStream, ParallelCodeGenParallelismLevel,
437 std::move(Mod));
439 return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
442 static void dropDeadSymbols(Module &Mod, const GVSummaryMapTy &DefinedGlobals,
443 const ModuleSummaryIndex &Index) {
444 std::vector<GlobalValue*> DeadGVs;
445 for (auto &GV : Mod.global_values())
446 if (GlobalValueSummary *GVS = DefinedGlobals.lookup(GV.getGUID()))
447 if (!Index.isGlobalValueLive(GVS)) {
448 DeadGVs.push_back(&GV);
449 convertToDeclaration(GV);
452 // Now that all dead bodies have been dropped, delete the actual objects
453 // themselves when possible.
454 for (GlobalValue *GV : DeadGVs) {
455 GV->removeDeadConstantUsers();
456 // Might reference something defined in native object (i.e. dropped a
457 // non-prevailing IR def, but we need to keep the declaration).
458 if (GV->use_empty())
459 GV->eraseFromParent();
463 Error lto::thinBackend(Config &Conf, unsigned Task, AddStreamFn AddStream,
464 Module &Mod, const ModuleSummaryIndex &CombinedIndex,
465 const FunctionImporter::ImportMapTy &ImportList,
466 const GVSummaryMapTy &DefinedGlobals,
467 MapVector<StringRef, BitcodeModule> &ModuleMap) {
468 Expected<const Target *> TOrErr = initAndLookupTarget(Conf, Mod);
469 if (!TOrErr)
470 return TOrErr.takeError();
472 std::unique_ptr<TargetMachine> TM = createTargetMachine(Conf, *TOrErr, Mod);
474 // Setup optimization remarks.
475 auto DiagFileOrErr = lto::setupOptimizationRemarks(
476 Mod.getContext(), Conf.RemarksFilename, Conf.RemarksWithHotness, Task);
477 if (!DiagFileOrErr)
478 return DiagFileOrErr.takeError();
479 auto DiagnosticOutputFile = std::move(*DiagFileOrErr);
481 if (Conf.CodeGenOnly) {
482 codegen(Conf, TM.get(), AddStream, Task, Mod);
483 return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
486 if (Conf.PreOptModuleHook && !Conf.PreOptModuleHook(Task, Mod))
487 return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
489 renameModuleForThinLTO(Mod, CombinedIndex);
491 dropDeadSymbols(Mod, DefinedGlobals, CombinedIndex);
493 thinLTOResolveWeakForLinkerModule(Mod, DefinedGlobals);
495 if (Conf.PostPromoteModuleHook && !Conf.PostPromoteModuleHook(Task, Mod))
496 return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
498 if (!DefinedGlobals.empty())
499 thinLTOInternalizeModule(Mod, DefinedGlobals);
501 if (Conf.PostInternalizeModuleHook &&
502 !Conf.PostInternalizeModuleHook(Task, Mod))
503 return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
505 auto ModuleLoader = [&](StringRef Identifier) {
506 assert(Mod.getContext().isODRUniquingDebugTypes() &&
507 "ODR Type uniquing should be enabled on the context");
508 auto I = ModuleMap.find(Identifier);
509 assert(I != ModuleMap.end());
510 return I->second.getLazyModule(Mod.getContext(),
511 /*ShouldLazyLoadMetadata=*/true,
512 /*IsImporting*/ true);
515 FunctionImporter Importer(CombinedIndex, ModuleLoader);
516 if (Error Err = Importer.importFunctions(Mod, ImportList).takeError())
517 return Err;
519 if (Conf.PostImportModuleHook && !Conf.PostImportModuleHook(Task, Mod))
520 return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
522 if (!opt(Conf, TM.get(), Task, Mod, /*IsThinLTO=*/true,
523 /*ExportSummary=*/nullptr, /*ImportSummary=*/&CombinedIndex))
524 return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
526 codegen(Conf, TM.get(), AddStream, Task, Mod);
527 return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));