1 //===- ThinLTOBitcodeWriter.cpp - Bitcode writing pass for ThinLTO --------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "llvm/Transforms/IPO/ThinLTOBitcodeWriter.h"
10 #include "llvm/Analysis/BasicAliasAnalysis.h"
11 #include "llvm/Analysis/ModuleSummaryAnalysis.h"
12 #include "llvm/Analysis/ProfileSummaryInfo.h"
13 #include "llvm/Analysis/TypeMetadataUtils.h"
14 #include "llvm/Bitcode/BitcodeWriter.h"
15 #include "llvm/IR/Constants.h"
16 #include "llvm/IR/DebugInfo.h"
17 #include "llvm/IR/Intrinsics.h"
18 #include "llvm/IR/Module.h"
19 #include "llvm/IR/PassManager.h"
20 #include "llvm/Object/ModuleSymbolTable.h"
21 #include "llvm/Pass.h"
22 #include "llvm/Support/ScopedPrinter.h"
23 #include "llvm/Support/raw_ostream.h"
24 #include "llvm/Transforms/IPO.h"
25 #include "llvm/Transforms/IPO/FunctionAttrs.h"
26 #include "llvm/Transforms/IPO/FunctionImport.h"
27 #include "llvm/Transforms/Utils/Cloning.h"
28 #include "llvm/Transforms/Utils/ModuleUtils.h"
33 // Promote each local-linkage entity defined by ExportM and used by ImportM by
34 // changing visibility and appending the given ModuleId.
35 void promoteInternals(Module
&ExportM
, Module
&ImportM
, StringRef ModuleId
,
36 SetVector
<GlobalValue
*> &PromoteExtra
) {
37 DenseMap
<const Comdat
*, Comdat
*> RenamedComdats
;
38 for (auto &ExportGV
: ExportM
.global_values()) {
39 if (!ExportGV
.hasLocalLinkage())
42 auto Name
= ExportGV
.getName();
43 GlobalValue
*ImportGV
= nullptr;
44 if (!PromoteExtra
.count(&ExportGV
)) {
45 ImportGV
= ImportM
.getNamedValue(Name
);
48 ImportGV
->removeDeadConstantUsers();
49 if (ImportGV
->use_empty()) {
50 ImportGV
->eraseFromParent();
55 std::string NewName
= (Name
+ ModuleId
).str();
57 if (const auto *C
= ExportGV
.getComdat())
58 if (C
->getName() == Name
)
59 RenamedComdats
.try_emplace(C
, ExportM
.getOrInsertComdat(NewName
));
61 ExportGV
.setName(NewName
);
62 ExportGV
.setLinkage(GlobalValue::ExternalLinkage
);
63 ExportGV
.setVisibility(GlobalValue::HiddenVisibility
);
66 ImportGV
->setName(NewName
);
67 ImportGV
->setVisibility(GlobalValue::HiddenVisibility
);
71 if (!RenamedComdats
.empty())
72 for (auto &GO
: ExportM
.global_objects())
73 if (auto *C
= GO
.getComdat()) {
74 auto Replacement
= RenamedComdats
.find(C
);
75 if (Replacement
!= RenamedComdats
.end())
76 GO
.setComdat(Replacement
->second
);
80 // Promote all internal (i.e. distinct) type ids used by the module by replacing
81 // them with external type ids formed using the module id.
83 // Note that this needs to be done before we clone the module because each clone
84 // will receive its own set of distinct metadata nodes.
85 void promoteTypeIds(Module
&M
, StringRef ModuleId
) {
86 DenseMap
<Metadata
*, Metadata
*> LocalToGlobal
;
87 auto ExternalizeTypeId
= [&](CallInst
*CI
, unsigned ArgNo
) {
89 cast
<MetadataAsValue
>(CI
->getArgOperand(ArgNo
))->getMetadata();
91 if (isa
<MDNode
>(MD
) && cast
<MDNode
>(MD
)->isDistinct()) {
92 Metadata
*&GlobalMD
= LocalToGlobal
[MD
];
94 std::string NewName
= (Twine(LocalToGlobal
.size()) + ModuleId
).str();
95 GlobalMD
= MDString::get(M
.getContext(), NewName
);
98 CI
->setArgOperand(ArgNo
,
99 MetadataAsValue::get(M
.getContext(), GlobalMD
));
103 if (Function
*TypeTestFunc
=
104 M
.getFunction(Intrinsic::getName(Intrinsic::type_test
))) {
105 for (const Use
&U
: TypeTestFunc
->uses()) {
106 auto CI
= cast
<CallInst
>(U
.getUser());
107 ExternalizeTypeId(CI
, 1);
111 if (Function
*TypeCheckedLoadFunc
=
112 M
.getFunction(Intrinsic::getName(Intrinsic::type_checked_load
))) {
113 for (const Use
&U
: TypeCheckedLoadFunc
->uses()) {
114 auto CI
= cast
<CallInst
>(U
.getUser());
115 ExternalizeTypeId(CI
, 2);
119 for (GlobalObject
&GO
: M
.global_objects()) {
120 SmallVector
<MDNode
*, 1> MDs
;
121 GO
.getMetadata(LLVMContext::MD_type
, MDs
);
123 GO
.eraseMetadata(LLVMContext::MD_type
);
124 for (auto MD
: MDs
) {
125 auto I
= LocalToGlobal
.find(MD
->getOperand(1));
126 if (I
== LocalToGlobal
.end()) {
127 GO
.addMetadata(LLVMContext::MD_type
, *MD
);
131 LLVMContext::MD_type
,
132 *MDNode::get(M
.getContext(), {MD
->getOperand(0), I
->second
}));
137 // Drop unused globals, and drop type information from function declarations.
138 // FIXME: If we made functions typeless then there would be no need to do this.
139 void simplifyExternals(Module
&M
) {
140 FunctionType
*EmptyFT
=
141 FunctionType::get(Type::getVoidTy(M
.getContext()), false);
143 for (auto I
= M
.begin(), E
= M
.end(); I
!= E
;) {
145 if (F
.isDeclaration() && F
.use_empty()) {
150 if (!F
.isDeclaration() || F
.getFunctionType() == EmptyFT
||
151 // Changing the type of an intrinsic may invalidate the IR.
152 F
.getName().startswith("llvm."))
156 Function::Create(EmptyFT
, GlobalValue::ExternalLinkage
,
157 F
.getAddressSpace(), "", &M
);
158 NewF
->setVisibility(F
.getVisibility());
160 F
.replaceAllUsesWith(ConstantExpr::getBitCast(NewF
, F
.getType()));
164 for (auto I
= M
.global_begin(), E
= M
.global_end(); I
!= E
;) {
165 GlobalVariable
&GV
= *I
++;
166 if (GV
.isDeclaration() && GV
.use_empty()) {
167 GV
.eraseFromParent();
174 filterModule(Module
*M
,
175 function_ref
<bool(const GlobalValue
*)> ShouldKeepDefinition
) {
176 std::vector
<GlobalValue
*> V
;
177 for (GlobalValue
&GV
: M
->global_values())
178 if (!ShouldKeepDefinition(&GV
))
181 for (GlobalValue
*GV
: V
)
182 if (!convertToDeclaration(*GV
))
183 GV
->eraseFromParent();
186 void forEachVirtualFunction(Constant
*C
, function_ref
<void(Function
*)> Fn
) {
187 if (auto *F
= dyn_cast
<Function
>(C
))
189 if (isa
<GlobalValue
>(C
))
191 for (Value
*Op
: C
->operands())
192 forEachVirtualFunction(cast
<Constant
>(Op
), Fn
);
195 // If it's possible to split M into regular and thin LTO parts, do so and write
196 // a multi-module bitcode file with the two parts to OS. Otherwise, write only a
197 // regular LTO bitcode file to OS.
198 void splitAndWriteThinLTOBitcode(
199 raw_ostream
&OS
, raw_ostream
*ThinLinkOS
,
200 function_ref
<AAResults
&(Function
&)> AARGetter
, Module
&M
) {
201 std::string ModuleId
= getUniqueModuleId(&M
);
202 if (ModuleId
.empty()) {
203 // We couldn't generate a module ID for this module, write it out as a
204 // regular LTO module with an index for summary-based dead stripping.
205 ProfileSummaryInfo
PSI(M
);
206 M
.addModuleFlag(Module::Error
, "ThinLTO", uint32_t(0));
207 ModuleSummaryIndex Index
= buildModuleSummaryIndex(M
, nullptr, &PSI
);
208 WriteBitcodeToFile(M
, OS
, /*ShouldPreserveUseListOrder=*/false, &Index
);
211 // We don't have a ThinLTO part, but still write the module to the
212 // ThinLinkOS if requested so that the expected output file is produced.
213 WriteBitcodeToFile(M
, *ThinLinkOS
, /*ShouldPreserveUseListOrder=*/false,
219 promoteTypeIds(M
, ModuleId
);
221 // Returns whether a global has attached type metadata. Such globals may
222 // participate in CFI or whole-program devirtualization, so they need to
223 // appear in the merged module instead of the thin LTO module.
224 auto HasTypeMetadata
= [](const GlobalObject
*GO
) {
225 return GO
->hasMetadata(LLVMContext::MD_type
);
228 // Collect the set of virtual functions that are eligible for virtual constant
229 // propagation. Each eligible function must not access memory, must return
230 // an integer of width <=64 bits, must take at least one argument, must not
231 // use its first argument (assumed to be "this") and all arguments other than
232 // the first one must be of <=64 bit integer type.
234 // Note that we test whether this copy of the function is readnone, rather
235 // than testing function attributes, which must hold for any copy of the
236 // function, even a less optimized version substituted at link time. This is
237 // sound because the virtual constant propagation optimizations effectively
238 // inline all implementations of the virtual function into each call site,
239 // rather than using function attributes to perform local optimization.
240 DenseSet
<const Function
*> EligibleVirtualFns
;
241 // If any member of a comdat lives in MergedM, put all members of that
242 // comdat in MergedM to keep the comdat together.
243 DenseSet
<const Comdat
*> MergedMComdats
;
244 for (GlobalVariable
&GV
: M
.globals())
245 if (HasTypeMetadata(&GV
)) {
246 if (const auto *C
= GV
.getComdat())
247 MergedMComdats
.insert(C
);
248 forEachVirtualFunction(GV
.getInitializer(), [&](Function
*F
) {
249 auto *RT
= dyn_cast
<IntegerType
>(F
->getReturnType());
250 if (!RT
|| RT
->getBitWidth() > 64 || F
->arg_empty() ||
251 !F
->arg_begin()->use_empty())
253 for (auto &Arg
: make_range(std::next(F
->arg_begin()), F
->arg_end())) {
254 auto *ArgT
= dyn_cast
<IntegerType
>(Arg
.getType());
255 if (!ArgT
|| ArgT
->getBitWidth() > 64)
258 if (!F
->isDeclaration() &&
259 computeFunctionBodyMemoryAccess(*F
, AARGetter(*F
)) == MAK_ReadNone
)
260 EligibleVirtualFns
.insert(F
);
264 ValueToValueMapTy VMap
;
265 std::unique_ptr
<Module
> MergedM(
266 CloneModule(M
, VMap
, [&](const GlobalValue
*GV
) -> bool {
267 if (const auto *C
= GV
->getComdat())
268 if (MergedMComdats
.count(C
))
270 if (auto *F
= dyn_cast
<Function
>(GV
))
271 return EligibleVirtualFns
.count(F
);
272 if (auto *GVar
= dyn_cast_or_null
<GlobalVariable
>(GV
->getBaseObject()))
273 return HasTypeMetadata(GVar
);
276 StripDebugInfo(*MergedM
);
277 MergedM
->setModuleInlineAsm("");
279 for (Function
&F
: *MergedM
)
280 if (!F
.isDeclaration()) {
281 // Reset the linkage of all functions eligible for virtual constant
282 // propagation. The canonical definitions live in the thin LTO module so
283 // that they can be imported.
284 F
.setLinkage(GlobalValue::AvailableExternallyLinkage
);
285 F
.setComdat(nullptr);
288 SetVector
<GlobalValue
*> CfiFunctions
;
290 if ((!F
.hasLocalLinkage() || F
.hasAddressTaken()) && HasTypeMetadata(&F
))
291 CfiFunctions
.insert(&F
);
293 // Remove all globals with type metadata, globals with comdats that live in
294 // MergedM, and aliases pointing to such globals from the thin LTO module.
295 filterModule(&M
, [&](const GlobalValue
*GV
) {
296 if (auto *GVar
= dyn_cast_or_null
<GlobalVariable
>(GV
->getBaseObject()))
297 if (HasTypeMetadata(GVar
))
299 if (const auto *C
= GV
->getComdat())
300 if (MergedMComdats
.count(C
))
305 promoteInternals(*MergedM
, M
, ModuleId
, CfiFunctions
);
306 promoteInternals(M
, *MergedM
, ModuleId
, CfiFunctions
);
308 auto &Ctx
= MergedM
->getContext();
309 SmallVector
<MDNode
*, 8> CfiFunctionMDs
;
310 for (auto V
: CfiFunctions
) {
311 Function
&F
= *cast
<Function
>(V
);
312 SmallVector
<MDNode
*, 2> Types
;
313 F
.getMetadata(LLVMContext::MD_type
, Types
);
315 SmallVector
<Metadata
*, 4> Elts
;
316 Elts
.push_back(MDString::get(Ctx
, F
.getName()));
317 CfiFunctionLinkage Linkage
;
318 if (!F
.isDeclarationForLinker())
319 Linkage
= CFL_Definition
;
320 else if (F
.isWeakForLinker())
321 Linkage
= CFL_WeakDeclaration
;
323 Linkage
= CFL_Declaration
;
324 Elts
.push_back(ConstantAsMetadata::get(
325 llvm::ConstantInt::get(Type::getInt8Ty(Ctx
), Linkage
)));
326 for (auto Type
: Types
)
327 Elts
.push_back(Type
);
328 CfiFunctionMDs
.push_back(MDTuple::get(Ctx
, Elts
));
331 if(!CfiFunctionMDs
.empty()) {
332 NamedMDNode
*NMD
= MergedM
->getOrInsertNamedMetadata("cfi.functions");
333 for (auto MD
: CfiFunctionMDs
)
337 SmallVector
<MDNode
*, 8> FunctionAliases
;
338 for (auto &A
: M
.aliases()) {
339 if (!isa
<Function
>(A
.getAliasee()))
342 auto *F
= cast
<Function
>(A
.getAliasee());
345 MDString::get(Ctx
, A
.getName()),
346 MDString::get(Ctx
, F
->getName()),
347 ConstantAsMetadata::get(
348 ConstantInt::get(Type::getInt8Ty(Ctx
), A
.getVisibility())),
349 ConstantAsMetadata::get(
350 ConstantInt::get(Type::getInt8Ty(Ctx
), A
.isWeakForLinker())),
353 FunctionAliases
.push_back(MDTuple::get(Ctx
, Elts
));
356 if (!FunctionAliases
.empty()) {
357 NamedMDNode
*NMD
= MergedM
->getOrInsertNamedMetadata("aliases");
358 for (auto MD
: FunctionAliases
)
362 SmallVector
<MDNode
*, 8> Symvers
;
363 ModuleSymbolTable::CollectAsmSymvers(M
, [&](StringRef Name
, StringRef Alias
) {
364 Function
*F
= M
.getFunction(Name
);
365 if (!F
|| F
->use_empty())
368 Symvers
.push_back(MDTuple::get(
369 Ctx
, {MDString::get(Ctx
, Name
), MDString::get(Ctx
, Alias
)}));
372 if (!Symvers
.empty()) {
373 NamedMDNode
*NMD
= MergedM
->getOrInsertNamedMetadata("symvers");
374 for (auto MD
: Symvers
)
378 simplifyExternals(*MergedM
);
380 // FIXME: Try to re-use BSI and PFI from the original module here.
381 ProfileSummaryInfo
PSI(M
);
382 ModuleSummaryIndex Index
= buildModuleSummaryIndex(M
, nullptr, &PSI
);
384 // Mark the merged module as requiring full LTO. We still want an index for
385 // it though, so that it can participate in summary-based dead stripping.
386 MergedM
->addModuleFlag(Module::Error
, "ThinLTO", uint32_t(0));
387 ModuleSummaryIndex MergedMIndex
=
388 buildModuleSummaryIndex(*MergedM
, nullptr, &PSI
);
390 SmallVector
<char, 0> Buffer
;
392 BitcodeWriter
W(Buffer
);
393 // Save the module hash produced for the full bitcode, which will
394 // be used in the backends, and use that in the minimized bitcode
395 // produced for the full link.
396 ModuleHash ModHash
= {{0}};
397 W
.writeModule(M
, /*ShouldPreserveUseListOrder=*/false, &Index
,
398 /*GenerateHash=*/true, &ModHash
);
399 W
.writeModule(*MergedM
, /*ShouldPreserveUseListOrder=*/false, &MergedMIndex
);
404 // If a minimized bitcode module was requested for the thin link, only
405 // the information that is needed by thin link will be written in the
406 // given OS (the merged module will be written as usual).
409 BitcodeWriter
W2(Buffer
);
411 W2
.writeThinLinkBitcode(M
, Index
, ModHash
);
412 W2
.writeModule(*MergedM
, /*ShouldPreserveUseListOrder=*/false,
416 *ThinLinkOS
<< Buffer
;
420 // Returns whether this module needs to be split because splitting is
421 // enabled and it uses type metadata.
422 bool requiresSplit(Module
&M
) {
423 // First check if the LTO Unit splitting has been enabled.
424 bool EnableSplitLTOUnit
= false;
425 if (auto *MD
= mdconst::extract_or_null
<ConstantInt
>(
426 M
.getModuleFlag("EnableSplitLTOUnit")))
427 EnableSplitLTOUnit
= MD
->getZExtValue();
428 if (!EnableSplitLTOUnit
)
431 // Module only needs to be split if it contains type metadata.
432 for (auto &GO
: M
.global_objects()) {
433 if (GO
.hasMetadata(LLVMContext::MD_type
))
440 void writeThinLTOBitcode(raw_ostream
&OS
, raw_ostream
*ThinLinkOS
,
441 function_ref
<AAResults
&(Function
&)> AARGetter
,
442 Module
&M
, const ModuleSummaryIndex
*Index
) {
443 // Split module if splitting is enabled and it contains any type metadata.
444 if (requiresSplit(M
))
445 return splitAndWriteThinLTOBitcode(OS
, ThinLinkOS
, AARGetter
, M
);
447 // Otherwise we can just write it out as a regular module.
449 // Save the module hash produced for the full bitcode, which will
450 // be used in the backends, and use that in the minimized bitcode
451 // produced for the full link.
452 ModuleHash ModHash
= {{0}};
453 WriteBitcodeToFile(M
, OS
, /*ShouldPreserveUseListOrder=*/false, Index
,
454 /*GenerateHash=*/true, &ModHash
);
455 // If a minimized bitcode module was requested for the thin link, only
456 // the information that is needed by thin link will be written in the
458 if (ThinLinkOS
&& Index
)
459 WriteThinLinkBitcodeToFile(M
, *ThinLinkOS
, *Index
, ModHash
);
462 class WriteThinLTOBitcode
: public ModulePass
{
463 raw_ostream
&OS
; // raw_ostream to print on
464 // The output stream on which to emit a minimized module for use
465 // just in the thin link, if requested.
466 raw_ostream
*ThinLinkOS
;
469 static char ID
; // Pass identification, replacement for typeid
470 WriteThinLTOBitcode() : ModulePass(ID
), OS(dbgs()), ThinLinkOS(nullptr) {
471 initializeWriteThinLTOBitcodePass(*PassRegistry::getPassRegistry());
474 explicit WriteThinLTOBitcode(raw_ostream
&o
, raw_ostream
*ThinLinkOS
)
475 : ModulePass(ID
), OS(o
), ThinLinkOS(ThinLinkOS
) {
476 initializeWriteThinLTOBitcodePass(*PassRegistry::getPassRegistry());
479 StringRef
getPassName() const override
{ return "ThinLTO Bitcode Writer"; }
481 bool runOnModule(Module
&M
) override
{
482 const ModuleSummaryIndex
*Index
=
483 &(getAnalysis
<ModuleSummaryIndexWrapperPass
>().getIndex());
484 writeThinLTOBitcode(OS
, ThinLinkOS
, LegacyAARGetter(*this), M
, Index
);
487 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
488 AU
.setPreservesAll();
489 AU
.addRequired
<AssumptionCacheTracker
>();
490 AU
.addRequired
<ModuleSummaryIndexWrapperPass
>();
491 AU
.addRequired
<TargetLibraryInfoWrapperPass
>();
494 } // anonymous namespace
496 char WriteThinLTOBitcode::ID
= 0;
497 INITIALIZE_PASS_BEGIN(WriteThinLTOBitcode
, "write-thinlto-bitcode",
498 "Write ThinLTO Bitcode", false, true)
499 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker
)
500 INITIALIZE_PASS_DEPENDENCY(ModuleSummaryIndexWrapperPass
)
501 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass
)
502 INITIALIZE_PASS_END(WriteThinLTOBitcode
, "write-thinlto-bitcode",
503 "Write ThinLTO Bitcode", false, true)
505 ModulePass
*llvm::createWriteThinLTOBitcodePass(raw_ostream
&Str
,
506 raw_ostream
*ThinLinkOS
) {
507 return new WriteThinLTOBitcode(Str
, ThinLinkOS
);
511 llvm::ThinLTOBitcodeWriterPass::run(Module
&M
, ModuleAnalysisManager
&AM
) {
512 FunctionAnalysisManager
&FAM
=
513 AM
.getResult
<FunctionAnalysisManagerModuleProxy
>(M
).getManager();
514 writeThinLTOBitcode(OS
, ThinLinkOS
,
515 [&FAM
](Function
&F
) -> AAResults
& {
516 return FAM
.getResult
<AAManager
>(F
);
518 M
, &AM
.getResult
<ModuleSummaryIndexAnalysis
>(M
));
519 return PreservedAnalyses::all();