1 //===-- ModuleUtils.cpp - Functions to manipulate Modules -----------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This family of functions perform manipulations on Modules.
11 //===----------------------------------------------------------------------===//
13 #include "llvm/Transforms/Utils/ModuleUtils.h"
14 #include "llvm/Analysis/VectorUtils.h"
15 #include "llvm/ADT/SmallString.h"
16 #include "llvm/IR/DerivedTypes.h"
17 #include "llvm/IR/Function.h"
18 #include "llvm/IR/IRBuilder.h"
19 #include "llvm/IR/MDBuilder.h"
20 #include "llvm/IR/Module.h"
21 #include "llvm/Support/MD5.h"
22 #include "llvm/Support/raw_ostream.h"
23 #include "llvm/Support/xxhash.h"
27 #define DEBUG_TYPE "moduleutils"
29 static void appendToGlobalArray(StringRef ArrayName
, Module
&M
, Function
*F
,
30 int Priority
, Constant
*Data
) {
31 IRBuilder
<> IRB(M
.getContext());
32 FunctionType
*FnTy
= FunctionType::get(IRB
.getVoidTy(), false);
34 // Get the current set of static global constructors and add the new ctor
36 SmallVector
<Constant
*, 16> CurrentCtors
;
38 if (GlobalVariable
*GVCtor
= M
.getNamedGlobal(ArrayName
)) {
39 EltTy
= cast
<StructType
>(GVCtor
->getValueType()->getArrayElementType());
40 if (Constant
*Init
= GVCtor
->getInitializer()) {
41 unsigned n
= Init
->getNumOperands();
42 CurrentCtors
.reserve(n
+ 1);
43 for (unsigned i
= 0; i
!= n
; ++i
)
44 CurrentCtors
.push_back(cast
<Constant
>(Init
->getOperand(i
)));
46 GVCtor
->eraseFromParent();
48 EltTy
= StructType::get(IRB
.getInt32Ty(),
49 PointerType::get(FnTy
, F
->getAddressSpace()),
53 // Build a 3 field global_ctor entry. We don't take a comdat key.
55 CSVals
[0] = IRB
.getInt32(Priority
);
57 CSVals
[2] = Data
? ConstantExpr::getPointerCast(Data
, IRB
.getPtrTy())
58 : Constant::getNullValue(IRB
.getPtrTy());
59 Constant
*RuntimeCtorInit
=
60 ConstantStruct::get(EltTy
, ArrayRef(CSVals
, EltTy
->getNumElements()));
62 CurrentCtors
.push_back(RuntimeCtorInit
);
64 // Create a new initializer.
65 ArrayType
*AT
= ArrayType::get(EltTy
, CurrentCtors
.size());
66 Constant
*NewInit
= ConstantArray::get(AT
, CurrentCtors
);
68 // Create the new global variable and replace all uses of
69 // the old global variable with the new one.
70 (void)new GlobalVariable(M
, NewInit
->getType(), false,
71 GlobalValue::AppendingLinkage
, NewInit
, ArrayName
);
74 void llvm::appendToGlobalCtors(Module
&M
, Function
*F
, int Priority
, Constant
*Data
) {
75 appendToGlobalArray("llvm.global_ctors", M
, F
, Priority
, Data
);
78 void llvm::appendToGlobalDtors(Module
&M
, Function
*F
, int Priority
, Constant
*Data
) {
79 appendToGlobalArray("llvm.global_dtors", M
, F
, Priority
, Data
);
82 static void transformGlobalArray(StringRef ArrayName
, Module
&M
,
83 const GlobalCtorTransformFn
&Fn
) {
84 GlobalVariable
*GVCtor
= M
.getNamedGlobal(ArrayName
);
88 IRBuilder
<> IRB(M
.getContext());
89 SmallVector
<Constant
*, 16> CurrentCtors
;
92 cast
<StructType
>(GVCtor
->getValueType()->getArrayElementType());
93 if (Constant
*Init
= GVCtor
->getInitializer()) {
94 CurrentCtors
.reserve(Init
->getNumOperands());
95 for (Value
*OP
: Init
->operands()) {
96 Constant
*C
= cast
<Constant
>(OP
);
97 Constant
*NewC
= Fn(C
);
98 Changed
|= (!NewC
|| NewC
!= C
);
100 CurrentCtors
.push_back(NewC
);
106 GVCtor
->eraseFromParent();
108 // Create a new initializer.
109 ArrayType
*AT
= ArrayType::get(EltTy
, CurrentCtors
.size());
110 Constant
*NewInit
= ConstantArray::get(AT
, CurrentCtors
);
112 // Create the new global variable and replace all uses of
113 // the old global variable with the new one.
114 (void)new GlobalVariable(M
, NewInit
->getType(), false,
115 GlobalValue::AppendingLinkage
, NewInit
, ArrayName
);
118 void llvm::transformGlobalCtors(Module
&M
, const GlobalCtorTransformFn
&Fn
) {
119 transformGlobalArray("llvm.global_ctors", M
, Fn
);
122 void llvm::transformGlobalDtors(Module
&M
, const GlobalCtorTransformFn
&Fn
) {
123 transformGlobalArray("llvm.global_dtors", M
, Fn
);
126 static void collectUsedGlobals(GlobalVariable
*GV
,
127 SmallSetVector
<Constant
*, 16> &Init
) {
128 if (!GV
|| !GV
->hasInitializer())
131 auto *CA
= cast
<ConstantArray
>(GV
->getInitializer());
132 for (Use
&Op
: CA
->operands())
133 Init
.insert(cast
<Constant
>(Op
));
136 static void appendToUsedList(Module
&M
, StringRef Name
, ArrayRef
<GlobalValue
*> Values
) {
137 GlobalVariable
*GV
= M
.getGlobalVariable(Name
);
139 SmallSetVector
<Constant
*, 16> Init
;
140 collectUsedGlobals(GV
, Init
);
142 GV
->eraseFromParent();
144 Type
*ArrayEltTy
= llvm::PointerType::getUnqual(M
.getContext());
145 for (auto *V
: Values
)
146 Init
.insert(ConstantExpr::getPointerBitCastOrAddrSpaceCast(V
, ArrayEltTy
));
151 ArrayType
*ATy
= ArrayType::get(ArrayEltTy
, Init
.size());
152 GV
= new llvm::GlobalVariable(M
, ATy
, false, GlobalValue::AppendingLinkage
,
153 ConstantArray::get(ATy
, Init
.getArrayRef()),
155 GV
->setSection("llvm.metadata");
158 void llvm::appendToUsed(Module
&M
, ArrayRef
<GlobalValue
*> Values
) {
159 appendToUsedList(M
, "llvm.used", Values
);
162 void llvm::appendToCompilerUsed(Module
&M
, ArrayRef
<GlobalValue
*> Values
) {
163 appendToUsedList(M
, "llvm.compiler.used", Values
);
166 static void removeFromUsedList(Module
&M
, StringRef Name
,
167 function_ref
<bool(Constant
*)> ShouldRemove
) {
168 GlobalVariable
*GV
= M
.getNamedGlobal(Name
);
172 SmallSetVector
<Constant
*, 16> Init
;
173 collectUsedGlobals(GV
, Init
);
175 Type
*ArrayEltTy
= cast
<ArrayType
>(GV
->getValueType())->getElementType();
177 SmallVector
<Constant
*, 16> NewInit
;
178 for (Constant
*MaybeRemoved
: Init
) {
179 if (!ShouldRemove(MaybeRemoved
->stripPointerCasts()))
180 NewInit
.push_back(MaybeRemoved
);
183 if (!NewInit
.empty()) {
184 ArrayType
*ATy
= ArrayType::get(ArrayEltTy
, NewInit
.size());
185 GlobalVariable
*NewGV
=
186 new GlobalVariable(M
, ATy
, false, GlobalValue::AppendingLinkage
,
187 ConstantArray::get(ATy
, NewInit
), "", GV
,
188 GV
->getThreadLocalMode(), GV
->getAddressSpace());
189 NewGV
->setSection(GV
->getSection());
193 GV
->eraseFromParent();
196 void llvm::removeFromUsedLists(Module
&M
,
197 function_ref
<bool(Constant
*)> ShouldRemove
) {
198 removeFromUsedList(M
, "llvm.used", ShouldRemove
);
199 removeFromUsedList(M
, "llvm.compiler.used", ShouldRemove
);
202 void llvm::setKCFIType(Module
&M
, Function
&F
, StringRef MangledType
) {
203 if (!M
.getModuleFlag("kcfi"))
205 // Matches CodeGenModule::CreateKCFITypeId in Clang.
206 LLVMContext
&Ctx
= M
.getContext();
208 std::string Type
= MangledType
.str();
209 if (M
.getModuleFlag("cfi-normalize-integers"))
210 Type
+= ".normalized";
211 F
.setMetadata(LLVMContext::MD_kcfi_type
,
212 MDNode::get(Ctx
, MDB
.createConstant(ConstantInt::get(
213 Type::getInt32Ty(Ctx
),
214 static_cast<uint32_t>(xxHash64(Type
))))));
215 // If the module was compiled with -fpatchable-function-entry, ensure
216 // we use the same patchable-function-prefix.
217 if (auto *MD
= mdconst::extract_or_null
<ConstantInt
>(
218 M
.getModuleFlag("kcfi-offset"))) {
219 if (unsigned Offset
= MD
->getZExtValue())
220 F
.addFnAttr("patchable-function-prefix", std::to_string(Offset
));
224 FunctionCallee
llvm::declareSanitizerInitFunction(Module
&M
, StringRef InitName
,
225 ArrayRef
<Type
*> InitArgTypes
,
227 assert(!InitName
.empty() && "Expected init function name");
228 auto *VoidTy
= Type::getVoidTy(M
.getContext());
229 auto *FnTy
= FunctionType::get(VoidTy
, InitArgTypes
, false);
230 auto FnCallee
= M
.getOrInsertFunction(InitName
, FnTy
);
231 auto *Fn
= cast
<Function
>(FnCallee
.getCallee());
232 if (Weak
&& Fn
->isDeclaration())
233 Fn
->setLinkage(Function::ExternalWeakLinkage
);
237 Function
*llvm::createSanitizerCtor(Module
&M
, StringRef CtorName
) {
238 Function
*Ctor
= Function::createWithDefaultAttr(
239 FunctionType::get(Type::getVoidTy(M
.getContext()), false),
240 GlobalValue::InternalLinkage
, M
.getDataLayout().getProgramAddressSpace(),
242 Ctor
->addFnAttr(Attribute::NoUnwind
);
243 setKCFIType(M
, *Ctor
, "_ZTSFvvE"); // void (*)(void)
244 BasicBlock
*CtorBB
= BasicBlock::Create(M
.getContext(), "", Ctor
);
245 ReturnInst::Create(M
.getContext(), CtorBB
);
246 // Ensure Ctor cannot be discarded, even if in a comdat.
247 appendToUsed(M
, {Ctor
});
251 std::pair
<Function
*, FunctionCallee
> llvm::createSanitizerCtorAndInitFunctions(
252 Module
&M
, StringRef CtorName
, StringRef InitName
,
253 ArrayRef
<Type
*> InitArgTypes
, ArrayRef
<Value
*> InitArgs
,
254 StringRef VersionCheckName
, bool Weak
) {
255 assert(!InitName
.empty() && "Expected init function name");
256 assert(InitArgs
.size() == InitArgTypes
.size() &&
257 "Sanitizer's init function expects different number of arguments");
258 FunctionCallee InitFunction
=
259 declareSanitizerInitFunction(M
, InitName
, InitArgTypes
, Weak
);
260 Function
*Ctor
= createSanitizerCtor(M
, CtorName
);
261 IRBuilder
<> IRB(M
.getContext());
263 BasicBlock
*RetBB
= &Ctor
->getEntryBlock();
265 RetBB
->setName("ret");
266 auto *EntryBB
= BasicBlock::Create(M
.getContext(), "entry", Ctor
, RetBB
);
268 BasicBlock::Create(M
.getContext(), "callfunc", Ctor
, RetBB
);
269 auto *InitFn
= cast
<Function
>(InitFunction
.getCallee());
271 PointerType::get(InitFn
->getType(), InitFn
->getAddressSpace());
272 IRB
.SetInsertPoint(EntryBB
);
274 IRB
.CreateICmpNE(InitFn
, ConstantPointerNull::get(InitFnPtr
));
275 IRB
.CreateCondBr(InitNotNull
, CallInitBB
, RetBB
);
276 IRB
.SetInsertPoint(CallInitBB
);
278 IRB
.SetInsertPoint(RetBB
->getTerminator());
281 IRB
.CreateCall(InitFunction
, InitArgs
);
282 if (!VersionCheckName
.empty()) {
283 FunctionCallee VersionCheckFunction
= M
.getOrInsertFunction(
284 VersionCheckName
, FunctionType::get(IRB
.getVoidTy(), {}, false),
286 IRB
.CreateCall(VersionCheckFunction
, {});
292 return std::make_pair(Ctor
, InitFunction
);
295 std::pair
<Function
*, FunctionCallee
>
296 llvm::getOrCreateSanitizerCtorAndInitFunctions(
297 Module
&M
, StringRef CtorName
, StringRef InitName
,
298 ArrayRef
<Type
*> InitArgTypes
, ArrayRef
<Value
*> InitArgs
,
299 function_ref
<void(Function
*, FunctionCallee
)> FunctionsCreatedCallback
,
300 StringRef VersionCheckName
, bool Weak
) {
301 assert(!CtorName
.empty() && "Expected ctor function name");
303 if (Function
*Ctor
= M
.getFunction(CtorName
))
304 // FIXME: Sink this logic into the module, similar to the handling of
305 // globals. This will make moving to a concurrent model much easier.
306 if (Ctor
->arg_empty() ||
307 Ctor
->getReturnType() == Type::getVoidTy(M
.getContext()))
309 declareSanitizerInitFunction(M
, InitName
, InitArgTypes
, Weak
)};
312 FunctionCallee InitFunction
;
313 std::tie(Ctor
, InitFunction
) = llvm::createSanitizerCtorAndInitFunctions(
314 M
, CtorName
, InitName
, InitArgTypes
, InitArgs
, VersionCheckName
, Weak
);
315 FunctionsCreatedCallback(Ctor
, InitFunction
);
316 return std::make_pair(Ctor
, InitFunction
);
319 void llvm::filterDeadComdatFunctions(
320 SmallVectorImpl
<Function
*> &DeadComdatFunctions
) {
321 SmallPtrSet
<Function
*, 32> MaybeDeadFunctions
;
322 SmallPtrSet
<Comdat
*, 32> MaybeDeadComdats
;
323 for (Function
*F
: DeadComdatFunctions
) {
324 MaybeDeadFunctions
.insert(F
);
325 if (Comdat
*C
= F
->getComdat())
326 MaybeDeadComdats
.insert(C
);
329 // Find comdats for which all users are dead now.
330 SmallPtrSet
<Comdat
*, 32> DeadComdats
;
331 for (Comdat
*C
: MaybeDeadComdats
) {
332 auto IsUserDead
= [&](GlobalObject
*GO
) {
333 auto *F
= dyn_cast
<Function
>(GO
);
334 return F
&& MaybeDeadFunctions
.contains(F
);
336 if (all_of(C
->getUsers(), IsUserDead
))
337 DeadComdats
.insert(C
);
340 // Only keep functions which have no comdat or a dead comdat.
341 erase_if(DeadComdatFunctions
, [&](Function
*F
) {
342 Comdat
*C
= F
->getComdat();
343 return C
&& !DeadComdats
.contains(C
);
347 std::string
llvm::getUniqueModuleId(Module
*M
) {
349 bool ExportsSymbols
= false;
350 auto AddGlobal
= [&](GlobalValue
&GV
) {
351 if (GV
.isDeclaration() || GV
.getName().starts_with("llvm.") ||
352 !GV
.hasExternalLinkage() || GV
.hasComdat())
354 ExportsSymbols
= true;
355 Md5
.update(GV
.getName());
356 Md5
.update(ArrayRef
<uint8_t>{0});
361 for (auto &GV
: M
->globals())
363 for (auto &GA
: M
->aliases())
365 for (auto &IF
: M
->ifuncs())
375 MD5::stringifyResult(R
, Str
);
376 return ("." + Str
).str();
379 void llvm::embedBufferInModule(Module
&M
, MemoryBufferRef Buf
,
380 StringRef SectionName
, Align Alignment
) {
381 // Embed the memory buffer into the module.
382 Constant
*ModuleConstant
= ConstantDataArray::get(
383 M
.getContext(), ArrayRef(Buf
.getBufferStart(), Buf
.getBufferSize()));
384 GlobalVariable
*GV
= new GlobalVariable(
385 M
, ModuleConstant
->getType(), true, GlobalValue::PrivateLinkage
,
386 ModuleConstant
, "llvm.embedded.object");
387 GV
->setSection(SectionName
);
388 GV
->setAlignment(Alignment
);
390 LLVMContext
&Ctx
= M
.getContext();
391 NamedMDNode
*MD
= M
.getOrInsertNamedMetadata("llvm.embedded.objects");
392 Metadata
*MDVals
[] = {ConstantAsMetadata::get(GV
),
393 MDString::get(Ctx
, SectionName
)};
395 MD
->addOperand(llvm::MDNode::get(Ctx
, MDVals
));
396 GV
->setMetadata(LLVMContext::MD_exclude
, llvm::MDNode::get(Ctx
, {}));
398 appendToCompilerUsed(M
, GV
);
401 bool llvm::lowerGlobalIFuncUsersAsGlobalCtor(
402 Module
&M
, ArrayRef
<GlobalIFunc
*> FilteredIFuncsToLower
) {
403 SmallVector
<GlobalIFunc
*, 32> AllIFuncs
;
404 ArrayRef
<GlobalIFunc
*> IFuncsToLower
= FilteredIFuncsToLower
;
405 if (FilteredIFuncsToLower
.empty()) { // Default to lowering all ifuncs
406 for (GlobalIFunc
&GI
: M
.ifuncs())
407 AllIFuncs
.push_back(&GI
);
408 IFuncsToLower
= AllIFuncs
;
411 bool UnhandledUsers
= false;
412 LLVMContext
&Ctx
= M
.getContext();
413 const DataLayout
&DL
= M
.getDataLayout();
415 PointerType
*TableEntryTy
=
416 PointerType::get(Ctx
, DL
.getProgramAddressSpace());
418 ArrayType
*FuncPtrTableTy
=
419 ArrayType::get(TableEntryTy
, IFuncsToLower
.size());
421 Align PtrAlign
= DL
.getABITypeAlign(TableEntryTy
);
423 // Create a global table of function pointers we'll initialize in a global
425 auto *FuncPtrTable
= new GlobalVariable(
426 M
, FuncPtrTableTy
, false, GlobalValue::InternalLinkage
,
427 PoisonValue::get(FuncPtrTableTy
), "", nullptr,
428 GlobalVariable::NotThreadLocal
, DL
.getDefaultGlobalsAddressSpace());
429 FuncPtrTable
->setAlignment(PtrAlign
);
431 // Create a function to initialize the function pointer table.
432 Function
*NewCtor
= Function::Create(
433 FunctionType::get(Type::getVoidTy(Ctx
), false), Function::InternalLinkage
,
434 DL
.getProgramAddressSpace(), "", &M
);
436 BasicBlock
*BB
= BasicBlock::Create(Ctx
, "", NewCtor
);
437 IRBuilder
<> InitBuilder(BB
);
439 size_t TableIndex
= 0;
440 for (GlobalIFunc
*GI
: IFuncsToLower
) {
441 Function
*ResolvedFunction
= GI
->getResolverFunction();
443 // We don't know what to pass to a resolver function taking arguments
445 // FIXME: Is this even valid? clang and gcc don't complain but this
446 // probably should be invalid IR. We could just pass through undef.
447 if (!std::empty(ResolvedFunction
->getFunctionType()->params())) {
448 LLVM_DEBUG(dbgs() << "Not lowering ifunc resolver function "
449 << ResolvedFunction
->getName() << " with parameters\n");
450 UnhandledUsers
= true;
454 // Initialize the function pointer table.
455 CallInst
*ResolvedFunc
= InitBuilder
.CreateCall(ResolvedFunction
);
456 Value
*Casted
= InitBuilder
.CreatePointerCast(ResolvedFunc
, TableEntryTy
);
457 Constant
*GEP
= cast
<Constant
>(InitBuilder
.CreateConstInBoundsGEP2_32(
458 FuncPtrTableTy
, FuncPtrTable
, 0, TableIndex
++));
459 InitBuilder
.CreateAlignedStore(Casted
, GEP
, PtrAlign
);
461 // Update all users to load a pointer from the global table.
462 for (User
*User
: make_early_inc_range(GI
->users())) {
463 Instruction
*UserInst
= dyn_cast
<Instruction
>(User
);
465 // TODO: Should handle constantexpr casts in user instructions. Probably
466 // can't do much about constant initializers.
467 UnhandledUsers
= true;
471 IRBuilder
<> UseBuilder(UserInst
);
472 LoadInst
*ResolvedTarget
=
473 UseBuilder
.CreateAlignedLoad(TableEntryTy
, GEP
, PtrAlign
);
474 Value
*ResolvedCast
=
475 UseBuilder
.CreatePointerCast(ResolvedTarget
, GI
->getType());
476 UserInst
->replaceUsesOfWith(GI
, ResolvedCast
);
479 // If we handled all users, erase the ifunc.
481 GI
->eraseFromParent();
484 InitBuilder
.CreateRetVoid();
486 PointerType
*ConstantDataTy
= PointerType::get(Ctx
, 0);
488 // TODO: Is this the right priority? Probably should be before any other
490 const int Priority
= 10;
491 appendToGlobalCtors(M
, NewCtor
, Priority
,
492 ConstantPointerNull::get(ConstantDataTy
));
493 return UnhandledUsers
;