1 //===-- ModuleUtils.cpp - Functions to manipulate Modules -----------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This family of functions perform manipulations on Modules.
11 //===----------------------------------------------------------------------===//
13 #include "llvm/Transforms/Utils/ModuleUtils.h"
14 #include "llvm/Analysis/VectorUtils.h"
15 #include "llvm/ADT/SmallString.h"
16 #include "llvm/IR/DerivedTypes.h"
17 #include "llvm/IR/Function.h"
18 #include "llvm/IR/IRBuilder.h"
19 #include "llvm/IR/MDBuilder.h"
20 #include "llvm/IR/Module.h"
21 #include "llvm/Support/raw_ostream.h"
22 #include "llvm/Support/xxhash.h"
26 #define DEBUG_TYPE "moduleutils"
28 static void appendToGlobalArray(StringRef ArrayName
, Module
&M
, Function
*F
,
29 int Priority
, Constant
*Data
) {
30 IRBuilder
<> IRB(M
.getContext());
31 FunctionType
*FnTy
= FunctionType::get(IRB
.getVoidTy(), false);
33 // Get the current set of static global constructors and add the new ctor
35 SmallVector
<Constant
*, 16> CurrentCtors
;
37 if (GlobalVariable
*GVCtor
= M
.getNamedGlobal(ArrayName
)) {
38 EltTy
= cast
<StructType
>(GVCtor
->getValueType()->getArrayElementType());
39 if (Constant
*Init
= GVCtor
->getInitializer()) {
40 unsigned n
= Init
->getNumOperands();
41 CurrentCtors
.reserve(n
+ 1);
42 for (unsigned i
= 0; i
!= n
; ++i
)
43 CurrentCtors
.push_back(cast
<Constant
>(Init
->getOperand(i
)));
45 GVCtor
->eraseFromParent();
47 EltTy
= StructType::get(IRB
.getInt32Ty(),
48 PointerType::get(FnTy
, F
->getAddressSpace()),
52 // Build a 3 field global_ctor entry. We don't take a comdat key.
54 CSVals
[0] = IRB
.getInt32(Priority
);
56 CSVals
[2] = Data
? ConstantExpr::getPointerCast(Data
, IRB
.getPtrTy())
57 : Constant::getNullValue(IRB
.getPtrTy());
58 Constant
*RuntimeCtorInit
=
59 ConstantStruct::get(EltTy
, ArrayRef(CSVals
, EltTy
->getNumElements()));
61 CurrentCtors
.push_back(RuntimeCtorInit
);
63 // Create a new initializer.
64 ArrayType
*AT
= ArrayType::get(EltTy
, CurrentCtors
.size());
65 Constant
*NewInit
= ConstantArray::get(AT
, CurrentCtors
);
67 // Create the new global variable and replace all uses of
68 // the old global variable with the new one.
69 (void)new GlobalVariable(M
, NewInit
->getType(), false,
70 GlobalValue::AppendingLinkage
, NewInit
, ArrayName
);
73 void llvm::appendToGlobalCtors(Module
&M
, Function
*F
, int Priority
, Constant
*Data
) {
74 appendToGlobalArray("llvm.global_ctors", M
, F
, Priority
, Data
);
77 void llvm::appendToGlobalDtors(Module
&M
, Function
*F
, int Priority
, Constant
*Data
) {
78 appendToGlobalArray("llvm.global_dtors", M
, F
, Priority
, Data
);
81 static void collectUsedGlobals(GlobalVariable
*GV
,
82 SmallSetVector
<Constant
*, 16> &Init
) {
83 if (!GV
|| !GV
->hasInitializer())
86 auto *CA
= cast
<ConstantArray
>(GV
->getInitializer());
87 for (Use
&Op
: CA
->operands())
88 Init
.insert(cast
<Constant
>(Op
));
91 static void appendToUsedList(Module
&M
, StringRef Name
, ArrayRef
<GlobalValue
*> Values
) {
92 GlobalVariable
*GV
= M
.getGlobalVariable(Name
);
94 SmallSetVector
<Constant
*, 16> Init
;
95 collectUsedGlobals(GV
, Init
);
97 GV
->eraseFromParent();
99 Type
*ArrayEltTy
= llvm::PointerType::getUnqual(M
.getContext());
100 for (auto *V
: Values
)
101 Init
.insert(ConstantExpr::getPointerBitCastOrAddrSpaceCast(V
, ArrayEltTy
));
106 ArrayType
*ATy
= ArrayType::get(ArrayEltTy
, Init
.size());
107 GV
= new llvm::GlobalVariable(M
, ATy
, false, GlobalValue::AppendingLinkage
,
108 ConstantArray::get(ATy
, Init
.getArrayRef()),
110 GV
->setSection("llvm.metadata");
113 void llvm::appendToUsed(Module
&M
, ArrayRef
<GlobalValue
*> Values
) {
114 appendToUsedList(M
, "llvm.used", Values
);
117 void llvm::appendToCompilerUsed(Module
&M
, ArrayRef
<GlobalValue
*> Values
) {
118 appendToUsedList(M
, "llvm.compiler.used", Values
);
121 static void removeFromUsedList(Module
&M
, StringRef Name
,
122 function_ref
<bool(Constant
*)> ShouldRemove
) {
123 GlobalVariable
*GV
= M
.getNamedGlobal(Name
);
127 SmallSetVector
<Constant
*, 16> Init
;
128 collectUsedGlobals(GV
, Init
);
130 Type
*ArrayEltTy
= cast
<ArrayType
>(GV
->getValueType())->getElementType();
132 SmallVector
<Constant
*, 16> NewInit
;
133 for (Constant
*MaybeRemoved
: Init
) {
134 if (!ShouldRemove(MaybeRemoved
->stripPointerCasts()))
135 NewInit
.push_back(MaybeRemoved
);
138 if (!NewInit
.empty()) {
139 ArrayType
*ATy
= ArrayType::get(ArrayEltTy
, NewInit
.size());
140 GlobalVariable
*NewGV
=
141 new GlobalVariable(M
, ATy
, false, GlobalValue::AppendingLinkage
,
142 ConstantArray::get(ATy
, NewInit
), "", GV
,
143 GV
->getThreadLocalMode(), GV
->getAddressSpace());
144 NewGV
->setSection(GV
->getSection());
148 GV
->eraseFromParent();
151 void llvm::removeFromUsedLists(Module
&M
,
152 function_ref
<bool(Constant
*)> ShouldRemove
) {
153 removeFromUsedList(M
, "llvm.used", ShouldRemove
);
154 removeFromUsedList(M
, "llvm.compiler.used", ShouldRemove
);
157 void llvm::setKCFIType(Module
&M
, Function
&F
, StringRef MangledType
) {
158 if (!M
.getModuleFlag("kcfi"))
160 // Matches CodeGenModule::CreateKCFITypeId in Clang.
161 LLVMContext
&Ctx
= M
.getContext();
164 LLVMContext::MD_kcfi_type
,
165 MDNode::get(Ctx
, MDB
.createConstant(ConstantInt::get(
166 Type::getInt32Ty(Ctx
),
167 static_cast<uint32_t>(xxHash64(MangledType
))))));
168 // If the module was compiled with -fpatchable-function-entry, ensure
169 // we use the same patchable-function-prefix.
170 if (auto *MD
= mdconst::extract_or_null
<ConstantInt
>(
171 M
.getModuleFlag("kcfi-offset"))) {
172 if (unsigned Offset
= MD
->getZExtValue())
173 F
.addFnAttr("patchable-function-prefix", std::to_string(Offset
));
177 FunctionCallee
llvm::declareSanitizerInitFunction(Module
&M
, StringRef InitName
,
178 ArrayRef
<Type
*> InitArgTypes
,
180 assert(!InitName
.empty() && "Expected init function name");
181 auto *VoidTy
= Type::getVoidTy(M
.getContext());
182 auto *FnTy
= FunctionType::get(VoidTy
, InitArgTypes
, false);
183 auto FnCallee
= M
.getOrInsertFunction(InitName
, FnTy
);
184 auto *Fn
= cast
<Function
>(FnCallee
.getCallee());
185 if (Weak
&& Fn
->isDeclaration())
186 Fn
->setLinkage(Function::ExternalWeakLinkage
);
190 Function
*llvm::createSanitizerCtor(Module
&M
, StringRef CtorName
) {
191 Function
*Ctor
= Function::createWithDefaultAttr(
192 FunctionType::get(Type::getVoidTy(M
.getContext()), false),
193 GlobalValue::InternalLinkage
, M
.getDataLayout().getProgramAddressSpace(),
195 Ctor
->addFnAttr(Attribute::NoUnwind
);
196 setKCFIType(M
, *Ctor
, "_ZTSFvvE"); // void (*)(void)
197 BasicBlock
*CtorBB
= BasicBlock::Create(M
.getContext(), "", Ctor
);
198 ReturnInst::Create(M
.getContext(), CtorBB
);
199 // Ensure Ctor cannot be discarded, even if in a comdat.
200 appendToUsed(M
, {Ctor
});
204 std::pair
<Function
*, FunctionCallee
> llvm::createSanitizerCtorAndInitFunctions(
205 Module
&M
, StringRef CtorName
, StringRef InitName
,
206 ArrayRef
<Type
*> InitArgTypes
, ArrayRef
<Value
*> InitArgs
,
207 StringRef VersionCheckName
, bool Weak
) {
208 assert(!InitName
.empty() && "Expected init function name");
209 assert(InitArgs
.size() == InitArgTypes
.size() &&
210 "Sanitizer's init function expects different number of arguments");
211 FunctionCallee InitFunction
=
212 declareSanitizerInitFunction(M
, InitName
, InitArgTypes
, Weak
);
213 Function
*Ctor
= createSanitizerCtor(M
, CtorName
);
214 IRBuilder
<> IRB(M
.getContext());
216 BasicBlock
*RetBB
= &Ctor
->getEntryBlock();
218 RetBB
->setName("ret");
219 auto *EntryBB
= BasicBlock::Create(M
.getContext(), "entry", Ctor
, RetBB
);
221 BasicBlock::Create(M
.getContext(), "callfunc", Ctor
, RetBB
);
222 auto *InitFn
= cast
<Function
>(InitFunction
.getCallee());
224 PointerType::get(InitFn
->getType(), InitFn
->getAddressSpace());
225 IRB
.SetInsertPoint(EntryBB
);
227 IRB
.CreateICmpNE(InitFn
, ConstantPointerNull::get(InitFnPtr
));
228 IRB
.CreateCondBr(InitNotNull
, CallInitBB
, RetBB
);
229 IRB
.SetInsertPoint(CallInitBB
);
231 IRB
.SetInsertPoint(RetBB
->getTerminator());
234 IRB
.CreateCall(InitFunction
, InitArgs
);
235 if (!VersionCheckName
.empty()) {
236 FunctionCallee VersionCheckFunction
= M
.getOrInsertFunction(
237 VersionCheckName
, FunctionType::get(IRB
.getVoidTy(), {}, false),
239 IRB
.CreateCall(VersionCheckFunction
, {});
245 return std::make_pair(Ctor
, InitFunction
);
248 std::pair
<Function
*, FunctionCallee
>
249 llvm::getOrCreateSanitizerCtorAndInitFunctions(
250 Module
&M
, StringRef CtorName
, StringRef InitName
,
251 ArrayRef
<Type
*> InitArgTypes
, ArrayRef
<Value
*> InitArgs
,
252 function_ref
<void(Function
*, FunctionCallee
)> FunctionsCreatedCallback
,
253 StringRef VersionCheckName
, bool Weak
) {
254 assert(!CtorName
.empty() && "Expected ctor function name");
256 if (Function
*Ctor
= M
.getFunction(CtorName
))
257 // FIXME: Sink this logic into the module, similar to the handling of
258 // globals. This will make moving to a concurrent model much easier.
259 if (Ctor
->arg_empty() ||
260 Ctor
->getReturnType() == Type::getVoidTy(M
.getContext()))
262 declareSanitizerInitFunction(M
, InitName
, InitArgTypes
, Weak
)};
265 FunctionCallee InitFunction
;
266 std::tie(Ctor
, InitFunction
) = llvm::createSanitizerCtorAndInitFunctions(
267 M
, CtorName
, InitName
, InitArgTypes
, InitArgs
, VersionCheckName
, Weak
);
268 FunctionsCreatedCallback(Ctor
, InitFunction
);
269 return std::make_pair(Ctor
, InitFunction
);
272 void llvm::filterDeadComdatFunctions(
273 SmallVectorImpl
<Function
*> &DeadComdatFunctions
) {
274 SmallPtrSet
<Function
*, 32> MaybeDeadFunctions
;
275 SmallPtrSet
<Comdat
*, 32> MaybeDeadComdats
;
276 for (Function
*F
: DeadComdatFunctions
) {
277 MaybeDeadFunctions
.insert(F
);
278 if (Comdat
*C
= F
->getComdat())
279 MaybeDeadComdats
.insert(C
);
282 // Find comdats for which all users are dead now.
283 SmallPtrSet
<Comdat
*, 32> DeadComdats
;
284 for (Comdat
*C
: MaybeDeadComdats
) {
285 auto IsUserDead
= [&](GlobalObject
*GO
) {
286 auto *F
= dyn_cast
<Function
>(GO
);
287 return F
&& MaybeDeadFunctions
.contains(F
);
289 if (all_of(C
->getUsers(), IsUserDead
))
290 DeadComdats
.insert(C
);
293 // Only keep functions which have no comdat or a dead comdat.
294 erase_if(DeadComdatFunctions
, [&](Function
*F
) {
295 Comdat
*C
= F
->getComdat();
296 return C
&& !DeadComdats
.contains(C
);
300 std::string
llvm::getUniqueModuleId(Module
*M
) {
302 bool ExportsSymbols
= false;
303 auto AddGlobal
= [&](GlobalValue
&GV
) {
304 if (GV
.isDeclaration() || GV
.getName().starts_with("llvm.") ||
305 !GV
.hasExternalLinkage() || GV
.hasComdat())
307 ExportsSymbols
= true;
308 Md5
.update(GV
.getName());
309 Md5
.update(ArrayRef
<uint8_t>{0});
314 for (auto &GV
: M
->globals())
316 for (auto &GA
: M
->aliases())
318 for (auto &IF
: M
->ifuncs())
328 MD5::stringifyResult(R
, Str
);
329 return ("." + Str
).str();
332 void llvm::embedBufferInModule(Module
&M
, MemoryBufferRef Buf
,
333 StringRef SectionName
, Align Alignment
) {
334 // Embed the memory buffer into the module.
335 Constant
*ModuleConstant
= ConstantDataArray::get(
336 M
.getContext(), ArrayRef(Buf
.getBufferStart(), Buf
.getBufferSize()));
337 GlobalVariable
*GV
= new GlobalVariable(
338 M
, ModuleConstant
->getType(), true, GlobalValue::PrivateLinkage
,
339 ModuleConstant
, "llvm.embedded.object");
340 GV
->setSection(SectionName
);
341 GV
->setAlignment(Alignment
);
343 LLVMContext
&Ctx
= M
.getContext();
344 NamedMDNode
*MD
= M
.getOrInsertNamedMetadata("llvm.embedded.objects");
345 Metadata
*MDVals
[] = {ConstantAsMetadata::get(GV
),
346 MDString::get(Ctx
, SectionName
)};
348 MD
->addOperand(llvm::MDNode::get(Ctx
, MDVals
));
349 GV
->setMetadata(LLVMContext::MD_exclude
, llvm::MDNode::get(Ctx
, {}));
351 appendToCompilerUsed(M
, GV
);
354 bool llvm::lowerGlobalIFuncUsersAsGlobalCtor(
355 Module
&M
, ArrayRef
<GlobalIFunc
*> FilteredIFuncsToLower
) {
356 SmallVector
<GlobalIFunc
*, 32> AllIFuncs
;
357 ArrayRef
<GlobalIFunc
*> IFuncsToLower
= FilteredIFuncsToLower
;
358 if (FilteredIFuncsToLower
.empty()) { // Default to lowering all ifuncs
359 for (GlobalIFunc
&GI
: M
.ifuncs())
360 AllIFuncs
.push_back(&GI
);
361 IFuncsToLower
= AllIFuncs
;
364 bool UnhandledUsers
= false;
365 LLVMContext
&Ctx
= M
.getContext();
366 const DataLayout
&DL
= M
.getDataLayout();
368 PointerType
*TableEntryTy
=
369 PointerType::get(Ctx
, DL
.getProgramAddressSpace());
371 ArrayType
*FuncPtrTableTy
=
372 ArrayType::get(TableEntryTy
, IFuncsToLower
.size());
374 Align PtrAlign
= DL
.getABITypeAlign(TableEntryTy
);
376 // Create a global table of function pointers we'll initialize in a global
378 auto *FuncPtrTable
= new GlobalVariable(
379 M
, FuncPtrTableTy
, false, GlobalValue::InternalLinkage
,
380 PoisonValue::get(FuncPtrTableTy
), "", nullptr,
381 GlobalVariable::NotThreadLocal
, DL
.getDefaultGlobalsAddressSpace());
382 FuncPtrTable
->setAlignment(PtrAlign
);
384 // Create a function to initialize the function pointer table.
385 Function
*NewCtor
= Function::Create(
386 FunctionType::get(Type::getVoidTy(Ctx
), false), Function::InternalLinkage
,
387 DL
.getProgramAddressSpace(), "", &M
);
389 BasicBlock
*BB
= BasicBlock::Create(Ctx
, "", NewCtor
);
390 IRBuilder
<> InitBuilder(BB
);
392 size_t TableIndex
= 0;
393 for (GlobalIFunc
*GI
: IFuncsToLower
) {
394 Function
*ResolvedFunction
= GI
->getResolverFunction();
396 // We don't know what to pass to a resolver function taking arguments
398 // FIXME: Is this even valid? clang and gcc don't complain but this
399 // probably should be invalid IR. We could just pass through undef.
400 if (!std::empty(ResolvedFunction
->getFunctionType()->params())) {
401 LLVM_DEBUG(dbgs() << "Not lowering ifunc resolver function "
402 << ResolvedFunction
->getName() << " with parameters\n");
403 UnhandledUsers
= true;
407 // Initialize the function pointer table.
408 CallInst
*ResolvedFunc
= InitBuilder
.CreateCall(ResolvedFunction
);
409 Value
*Casted
= InitBuilder
.CreatePointerCast(ResolvedFunc
, TableEntryTy
);
410 Constant
*GEP
= cast
<Constant
>(InitBuilder
.CreateConstInBoundsGEP2_32(
411 FuncPtrTableTy
, FuncPtrTable
, 0, TableIndex
++));
412 InitBuilder
.CreateAlignedStore(Casted
, GEP
, PtrAlign
);
414 // Update all users to load a pointer from the global table.
415 for (User
*User
: make_early_inc_range(GI
->users())) {
416 Instruction
*UserInst
= dyn_cast
<Instruction
>(User
);
418 // TODO: Should handle constantexpr casts in user instructions. Probably
419 // can't do much about constant initializers.
420 UnhandledUsers
= true;
424 IRBuilder
<> UseBuilder(UserInst
);
425 LoadInst
*ResolvedTarget
=
426 UseBuilder
.CreateAlignedLoad(TableEntryTy
, GEP
, PtrAlign
);
427 Value
*ResolvedCast
=
428 UseBuilder
.CreatePointerCast(ResolvedTarget
, GI
->getType());
429 UserInst
->replaceUsesOfWith(GI
, ResolvedCast
);
432 // If we handled all users, erase the ifunc.
434 GI
->eraseFromParent();
437 InitBuilder
.CreateRetVoid();
439 PointerType
*ConstantDataTy
= PointerType::get(Ctx
, 0);
441 // TODO: Is this the right priority? Probably should be before any other
443 const int Priority
= 10;
444 appendToGlobalCtors(M
, NewCtor
, Priority
,
445 ConstantPointerNull::get(ConstantDataTy
));
446 return UnhandledUsers
;