[win/asan] GetInstructionSize: Fix `83 E4 XX` to return 3. (#119644)
[llvm-project.git] / llvm / lib / Transforms / Utils / ModuleUtils.cpp
blob7249571f34493879150f1b177f30e27f0a2876bd
1 //===-- ModuleUtils.cpp - Functions to manipulate Modules -----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This family of functions perform manipulations on Modules.
11 //===----------------------------------------------------------------------===//
13 #include "llvm/Transforms/Utils/ModuleUtils.h"
14 #include "llvm/Analysis/VectorUtils.h"
15 #include "llvm/ADT/SmallString.h"
16 #include "llvm/IR/DerivedTypes.h"
17 #include "llvm/IR/Function.h"
18 #include "llvm/IR/IRBuilder.h"
19 #include "llvm/IR/MDBuilder.h"
20 #include "llvm/IR/Module.h"
21 #include "llvm/Support/MD5.h"
22 #include "llvm/Support/raw_ostream.h"
23 #include "llvm/Support/xxhash.h"
25 using namespace llvm;
27 #define DEBUG_TYPE "moduleutils"
29 static void appendToGlobalArray(StringRef ArrayName, Module &M, Function *F,
30 int Priority, Constant *Data) {
31 IRBuilder<> IRB(M.getContext());
32 FunctionType *FnTy = FunctionType::get(IRB.getVoidTy(), false);
34 // Get the current set of static global constructors and add the new ctor
35 // to the list.
36 SmallVector<Constant *, 16> CurrentCtors;
37 StructType *EltTy;
38 if (GlobalVariable *GVCtor = M.getNamedGlobal(ArrayName)) {
39 EltTy = cast<StructType>(GVCtor->getValueType()->getArrayElementType());
40 if (Constant *Init = GVCtor->getInitializer()) {
41 unsigned n = Init->getNumOperands();
42 CurrentCtors.reserve(n + 1);
43 for (unsigned i = 0; i != n; ++i)
44 CurrentCtors.push_back(cast<Constant>(Init->getOperand(i)));
46 GVCtor->eraseFromParent();
47 } else {
48 EltTy = StructType::get(IRB.getInt32Ty(),
49 PointerType::get(FnTy, F->getAddressSpace()),
50 IRB.getPtrTy());
53 // Build a 3 field global_ctor entry. We don't take a comdat key.
54 Constant *CSVals[3];
55 CSVals[0] = IRB.getInt32(Priority);
56 CSVals[1] = F;
57 CSVals[2] = Data ? ConstantExpr::getPointerCast(Data, IRB.getPtrTy())
58 : Constant::getNullValue(IRB.getPtrTy());
59 Constant *RuntimeCtorInit =
60 ConstantStruct::get(EltTy, ArrayRef(CSVals, EltTy->getNumElements()));
62 CurrentCtors.push_back(RuntimeCtorInit);
64 // Create a new initializer.
65 ArrayType *AT = ArrayType::get(EltTy, CurrentCtors.size());
66 Constant *NewInit = ConstantArray::get(AT, CurrentCtors);
68 // Create the new global variable and replace all uses of
69 // the old global variable with the new one.
70 (void)new GlobalVariable(M, NewInit->getType(), false,
71 GlobalValue::AppendingLinkage, NewInit, ArrayName);
74 void llvm::appendToGlobalCtors(Module &M, Function *F, int Priority, Constant *Data) {
75 appendToGlobalArray("llvm.global_ctors", M, F, Priority, Data);
78 void llvm::appendToGlobalDtors(Module &M, Function *F, int Priority, Constant *Data) {
79 appendToGlobalArray("llvm.global_dtors", M, F, Priority, Data);
82 static void transformGlobalArray(StringRef ArrayName, Module &M,
83 const GlobalCtorTransformFn &Fn) {
84 GlobalVariable *GVCtor = M.getNamedGlobal(ArrayName);
85 if (!GVCtor)
86 return;
88 IRBuilder<> IRB(M.getContext());
89 SmallVector<Constant *, 16> CurrentCtors;
90 bool Changed = false;
91 StructType *EltTy =
92 cast<StructType>(GVCtor->getValueType()->getArrayElementType());
93 if (Constant *Init = GVCtor->getInitializer()) {
94 CurrentCtors.reserve(Init->getNumOperands());
95 for (Value *OP : Init->operands()) {
96 Constant *C = cast<Constant>(OP);
97 Constant *NewC = Fn(C);
98 Changed |= (!NewC || NewC != C);
99 if (NewC)
100 CurrentCtors.push_back(NewC);
103 if (!Changed)
104 return;
106 GVCtor->eraseFromParent();
108 // Create a new initializer.
109 ArrayType *AT = ArrayType::get(EltTy, CurrentCtors.size());
110 Constant *NewInit = ConstantArray::get(AT, CurrentCtors);
112 // Create the new global variable and replace all uses of
113 // the old global variable with the new one.
114 (void)new GlobalVariable(M, NewInit->getType(), false,
115 GlobalValue::AppendingLinkage, NewInit, ArrayName);
118 void llvm::transformGlobalCtors(Module &M, const GlobalCtorTransformFn &Fn) {
119 transformGlobalArray("llvm.global_ctors", M, Fn);
122 void llvm::transformGlobalDtors(Module &M, const GlobalCtorTransformFn &Fn) {
123 transformGlobalArray("llvm.global_dtors", M, Fn);
126 static void collectUsedGlobals(GlobalVariable *GV,
127 SmallSetVector<Constant *, 16> &Init) {
128 if (!GV || !GV->hasInitializer())
129 return;
131 auto *CA = cast<ConstantArray>(GV->getInitializer());
132 for (Use &Op : CA->operands())
133 Init.insert(cast<Constant>(Op));
136 static void appendToUsedList(Module &M, StringRef Name, ArrayRef<GlobalValue *> Values) {
137 GlobalVariable *GV = M.getGlobalVariable(Name);
139 SmallSetVector<Constant *, 16> Init;
140 collectUsedGlobals(GV, Init);
141 if (GV)
142 GV->eraseFromParent();
144 Type *ArrayEltTy = llvm::PointerType::getUnqual(M.getContext());
145 for (auto *V : Values)
146 Init.insert(ConstantExpr::getPointerBitCastOrAddrSpaceCast(V, ArrayEltTy));
148 if (Init.empty())
149 return;
151 ArrayType *ATy = ArrayType::get(ArrayEltTy, Init.size());
152 GV = new llvm::GlobalVariable(M, ATy, false, GlobalValue::AppendingLinkage,
153 ConstantArray::get(ATy, Init.getArrayRef()),
154 Name);
155 GV->setSection("llvm.metadata");
158 void llvm::appendToUsed(Module &M, ArrayRef<GlobalValue *> Values) {
159 appendToUsedList(M, "llvm.used", Values);
162 void llvm::appendToCompilerUsed(Module &M, ArrayRef<GlobalValue *> Values) {
163 appendToUsedList(M, "llvm.compiler.used", Values);
166 static void removeFromUsedList(Module &M, StringRef Name,
167 function_ref<bool(Constant *)> ShouldRemove) {
168 GlobalVariable *GV = M.getNamedGlobal(Name);
169 if (!GV)
170 return;
172 SmallSetVector<Constant *, 16> Init;
173 collectUsedGlobals(GV, Init);
175 Type *ArrayEltTy = cast<ArrayType>(GV->getValueType())->getElementType();
177 SmallVector<Constant *, 16> NewInit;
178 for (Constant *MaybeRemoved : Init) {
179 if (!ShouldRemove(MaybeRemoved->stripPointerCasts()))
180 NewInit.push_back(MaybeRemoved);
183 if (!NewInit.empty()) {
184 ArrayType *ATy = ArrayType::get(ArrayEltTy, NewInit.size());
185 GlobalVariable *NewGV =
186 new GlobalVariable(M, ATy, false, GlobalValue::AppendingLinkage,
187 ConstantArray::get(ATy, NewInit), "", GV,
188 GV->getThreadLocalMode(), GV->getAddressSpace());
189 NewGV->setSection(GV->getSection());
190 NewGV->takeName(GV);
193 GV->eraseFromParent();
196 void llvm::removeFromUsedLists(Module &M,
197 function_ref<bool(Constant *)> ShouldRemove) {
198 removeFromUsedList(M, "llvm.used", ShouldRemove);
199 removeFromUsedList(M, "llvm.compiler.used", ShouldRemove);
202 void llvm::setKCFIType(Module &M, Function &F, StringRef MangledType) {
203 if (!M.getModuleFlag("kcfi"))
204 return;
205 // Matches CodeGenModule::CreateKCFITypeId in Clang.
206 LLVMContext &Ctx = M.getContext();
207 MDBuilder MDB(Ctx);
208 std::string Type = MangledType.str();
209 if (M.getModuleFlag("cfi-normalize-integers"))
210 Type += ".normalized";
211 F.setMetadata(LLVMContext::MD_kcfi_type,
212 MDNode::get(Ctx, MDB.createConstant(ConstantInt::get(
213 Type::getInt32Ty(Ctx),
214 static_cast<uint32_t>(xxHash64(Type))))));
215 // If the module was compiled with -fpatchable-function-entry, ensure
216 // we use the same patchable-function-prefix.
217 if (auto *MD = mdconst::extract_or_null<ConstantInt>(
218 M.getModuleFlag("kcfi-offset"))) {
219 if (unsigned Offset = MD->getZExtValue())
220 F.addFnAttr("patchable-function-prefix", std::to_string(Offset));
224 FunctionCallee llvm::declareSanitizerInitFunction(Module &M, StringRef InitName,
225 ArrayRef<Type *> InitArgTypes,
226 bool Weak) {
227 assert(!InitName.empty() && "Expected init function name");
228 auto *VoidTy = Type::getVoidTy(M.getContext());
229 auto *FnTy = FunctionType::get(VoidTy, InitArgTypes, false);
230 auto FnCallee = M.getOrInsertFunction(InitName, FnTy);
231 auto *Fn = cast<Function>(FnCallee.getCallee());
232 if (Weak && Fn->isDeclaration())
233 Fn->setLinkage(Function::ExternalWeakLinkage);
234 return FnCallee;
237 Function *llvm::createSanitizerCtor(Module &M, StringRef CtorName) {
238 Function *Ctor = Function::createWithDefaultAttr(
239 FunctionType::get(Type::getVoidTy(M.getContext()), false),
240 GlobalValue::InternalLinkage, M.getDataLayout().getProgramAddressSpace(),
241 CtorName, &M);
242 Ctor->addFnAttr(Attribute::NoUnwind);
243 setKCFIType(M, *Ctor, "_ZTSFvvE"); // void (*)(void)
244 BasicBlock *CtorBB = BasicBlock::Create(M.getContext(), "", Ctor);
245 ReturnInst::Create(M.getContext(), CtorBB);
246 // Ensure Ctor cannot be discarded, even if in a comdat.
247 appendToUsed(M, {Ctor});
248 return Ctor;
251 std::pair<Function *, FunctionCallee> llvm::createSanitizerCtorAndInitFunctions(
252 Module &M, StringRef CtorName, StringRef InitName,
253 ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs,
254 StringRef VersionCheckName, bool Weak) {
255 assert(!InitName.empty() && "Expected init function name");
256 assert(InitArgs.size() == InitArgTypes.size() &&
257 "Sanitizer's init function expects different number of arguments");
258 FunctionCallee InitFunction =
259 declareSanitizerInitFunction(M, InitName, InitArgTypes, Weak);
260 Function *Ctor = createSanitizerCtor(M, CtorName);
261 IRBuilder<> IRB(M.getContext());
263 BasicBlock *RetBB = &Ctor->getEntryBlock();
264 if (Weak) {
265 RetBB->setName("ret");
266 auto *EntryBB = BasicBlock::Create(M.getContext(), "entry", Ctor, RetBB);
267 auto *CallInitBB =
268 BasicBlock::Create(M.getContext(), "callfunc", Ctor, RetBB);
269 auto *InitFn = cast<Function>(InitFunction.getCallee());
270 auto *InitFnPtr =
271 PointerType::get(InitFn->getType(), InitFn->getAddressSpace());
272 IRB.SetInsertPoint(EntryBB);
273 Value *InitNotNull =
274 IRB.CreateICmpNE(InitFn, ConstantPointerNull::get(InitFnPtr));
275 IRB.CreateCondBr(InitNotNull, CallInitBB, RetBB);
276 IRB.SetInsertPoint(CallInitBB);
277 } else {
278 IRB.SetInsertPoint(RetBB->getTerminator());
281 IRB.CreateCall(InitFunction, InitArgs);
282 if (!VersionCheckName.empty()) {
283 FunctionCallee VersionCheckFunction = M.getOrInsertFunction(
284 VersionCheckName, FunctionType::get(IRB.getVoidTy(), {}, false),
285 AttributeList());
286 IRB.CreateCall(VersionCheckFunction, {});
289 if (Weak)
290 IRB.CreateBr(RetBB);
292 return std::make_pair(Ctor, InitFunction);
295 std::pair<Function *, FunctionCallee>
296 llvm::getOrCreateSanitizerCtorAndInitFunctions(
297 Module &M, StringRef CtorName, StringRef InitName,
298 ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs,
299 function_ref<void(Function *, FunctionCallee)> FunctionsCreatedCallback,
300 StringRef VersionCheckName, bool Weak) {
301 assert(!CtorName.empty() && "Expected ctor function name");
303 if (Function *Ctor = M.getFunction(CtorName))
304 // FIXME: Sink this logic into the module, similar to the handling of
305 // globals. This will make moving to a concurrent model much easier.
306 if (Ctor->arg_empty() ||
307 Ctor->getReturnType() == Type::getVoidTy(M.getContext()))
308 return {Ctor,
309 declareSanitizerInitFunction(M, InitName, InitArgTypes, Weak)};
311 Function *Ctor;
312 FunctionCallee InitFunction;
313 std::tie(Ctor, InitFunction) = llvm::createSanitizerCtorAndInitFunctions(
314 M, CtorName, InitName, InitArgTypes, InitArgs, VersionCheckName, Weak);
315 FunctionsCreatedCallback(Ctor, InitFunction);
316 return std::make_pair(Ctor, InitFunction);
319 void llvm::filterDeadComdatFunctions(
320 SmallVectorImpl<Function *> &DeadComdatFunctions) {
321 SmallPtrSet<Function *, 32> MaybeDeadFunctions;
322 SmallPtrSet<Comdat *, 32> MaybeDeadComdats;
323 for (Function *F : DeadComdatFunctions) {
324 MaybeDeadFunctions.insert(F);
325 if (Comdat *C = F->getComdat())
326 MaybeDeadComdats.insert(C);
329 // Find comdats for which all users are dead now.
330 SmallPtrSet<Comdat *, 32> DeadComdats;
331 for (Comdat *C : MaybeDeadComdats) {
332 auto IsUserDead = [&](GlobalObject *GO) {
333 auto *F = dyn_cast<Function>(GO);
334 return F && MaybeDeadFunctions.contains(F);
336 if (all_of(C->getUsers(), IsUserDead))
337 DeadComdats.insert(C);
340 // Only keep functions which have no comdat or a dead comdat.
341 erase_if(DeadComdatFunctions, [&](Function *F) {
342 Comdat *C = F->getComdat();
343 return C && !DeadComdats.contains(C);
347 std::string llvm::getUniqueModuleId(Module *M) {
348 MD5 Md5;
349 bool ExportsSymbols = false;
350 auto AddGlobal = [&](GlobalValue &GV) {
351 if (GV.isDeclaration() || GV.getName().starts_with("llvm.") ||
352 !GV.hasExternalLinkage() || GV.hasComdat())
353 return;
354 ExportsSymbols = true;
355 Md5.update(GV.getName());
356 Md5.update(ArrayRef<uint8_t>{0});
359 for (auto &F : *M)
360 AddGlobal(F);
361 for (auto &GV : M->globals())
362 AddGlobal(GV);
363 for (auto &GA : M->aliases())
364 AddGlobal(GA);
365 for (auto &IF : M->ifuncs())
366 AddGlobal(IF);
368 if (!ExportsSymbols)
369 return "";
371 MD5::MD5Result R;
372 Md5.final(R);
374 SmallString<32> Str;
375 MD5::stringifyResult(R, Str);
376 return ("." + Str).str();
379 void llvm::embedBufferInModule(Module &M, MemoryBufferRef Buf,
380 StringRef SectionName, Align Alignment) {
381 // Embed the memory buffer into the module.
382 Constant *ModuleConstant = ConstantDataArray::get(
383 M.getContext(), ArrayRef(Buf.getBufferStart(), Buf.getBufferSize()));
384 GlobalVariable *GV = new GlobalVariable(
385 M, ModuleConstant->getType(), true, GlobalValue::PrivateLinkage,
386 ModuleConstant, "llvm.embedded.object");
387 GV->setSection(SectionName);
388 GV->setAlignment(Alignment);
390 LLVMContext &Ctx = M.getContext();
391 NamedMDNode *MD = M.getOrInsertNamedMetadata("llvm.embedded.objects");
392 Metadata *MDVals[] = {ConstantAsMetadata::get(GV),
393 MDString::get(Ctx, SectionName)};
395 MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
396 GV->setMetadata(LLVMContext::MD_exclude, llvm::MDNode::get(Ctx, {}));
398 appendToCompilerUsed(M, GV);
401 bool llvm::lowerGlobalIFuncUsersAsGlobalCtor(
402 Module &M, ArrayRef<GlobalIFunc *> FilteredIFuncsToLower) {
403 SmallVector<GlobalIFunc *, 32> AllIFuncs;
404 ArrayRef<GlobalIFunc *> IFuncsToLower = FilteredIFuncsToLower;
405 if (FilteredIFuncsToLower.empty()) { // Default to lowering all ifuncs
406 for (GlobalIFunc &GI : M.ifuncs())
407 AllIFuncs.push_back(&GI);
408 IFuncsToLower = AllIFuncs;
411 bool UnhandledUsers = false;
412 LLVMContext &Ctx = M.getContext();
413 const DataLayout &DL = M.getDataLayout();
415 PointerType *TableEntryTy =
416 PointerType::get(Ctx, DL.getProgramAddressSpace());
418 ArrayType *FuncPtrTableTy =
419 ArrayType::get(TableEntryTy, IFuncsToLower.size());
421 Align PtrAlign = DL.getABITypeAlign(TableEntryTy);
423 // Create a global table of function pointers we'll initialize in a global
424 // constructor.
425 auto *FuncPtrTable = new GlobalVariable(
426 M, FuncPtrTableTy, false, GlobalValue::InternalLinkage,
427 PoisonValue::get(FuncPtrTableTy), "", nullptr,
428 GlobalVariable::NotThreadLocal, DL.getDefaultGlobalsAddressSpace());
429 FuncPtrTable->setAlignment(PtrAlign);
431 // Create a function to initialize the function pointer table.
432 Function *NewCtor = Function::Create(
433 FunctionType::get(Type::getVoidTy(Ctx), false), Function::InternalLinkage,
434 DL.getProgramAddressSpace(), "", &M);
436 BasicBlock *BB = BasicBlock::Create(Ctx, "", NewCtor);
437 IRBuilder<> InitBuilder(BB);
439 size_t TableIndex = 0;
440 for (GlobalIFunc *GI : IFuncsToLower) {
441 Function *ResolvedFunction = GI->getResolverFunction();
443 // We don't know what to pass to a resolver function taking arguments
445 // FIXME: Is this even valid? clang and gcc don't complain but this
446 // probably should be invalid IR. We could just pass through undef.
447 if (!std::empty(ResolvedFunction->getFunctionType()->params())) {
448 LLVM_DEBUG(dbgs() << "Not lowering ifunc resolver function "
449 << ResolvedFunction->getName() << " with parameters\n");
450 UnhandledUsers = true;
451 continue;
454 // Initialize the function pointer table.
455 CallInst *ResolvedFunc = InitBuilder.CreateCall(ResolvedFunction);
456 Value *Casted = InitBuilder.CreatePointerCast(ResolvedFunc, TableEntryTy);
457 Constant *GEP = cast<Constant>(InitBuilder.CreateConstInBoundsGEP2_32(
458 FuncPtrTableTy, FuncPtrTable, 0, TableIndex++));
459 InitBuilder.CreateAlignedStore(Casted, GEP, PtrAlign);
461 // Update all users to load a pointer from the global table.
462 for (User *User : make_early_inc_range(GI->users())) {
463 Instruction *UserInst = dyn_cast<Instruction>(User);
464 if (!UserInst) {
465 // TODO: Should handle constantexpr casts in user instructions. Probably
466 // can't do much about constant initializers.
467 UnhandledUsers = true;
468 continue;
471 IRBuilder<> UseBuilder(UserInst);
472 LoadInst *ResolvedTarget =
473 UseBuilder.CreateAlignedLoad(TableEntryTy, GEP, PtrAlign);
474 Value *ResolvedCast =
475 UseBuilder.CreatePointerCast(ResolvedTarget, GI->getType());
476 UserInst->replaceUsesOfWith(GI, ResolvedCast);
479 // If we handled all users, erase the ifunc.
480 if (GI->use_empty())
481 GI->eraseFromParent();
484 InitBuilder.CreateRetVoid();
486 PointerType *ConstantDataTy = PointerType::get(Ctx, 0);
488 // TODO: Is this the right priority? Probably should be before any other
489 // constructors?
490 const int Priority = 10;
491 appendToGlobalCtors(M, NewCtor, Priority,
492 ConstantPointerNull::get(ConstantDataTy));
493 return UnhandledUsers;