1 //===- SanitizerBinaryMetadata.cpp - binary analysis sanitizers metadata --===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file is a part of SanitizerBinaryMetadata.
11 //===----------------------------------------------------------------------===//
13 #include "llvm/Transforms/Instrumentation/SanitizerBinaryMetadata.h"
14 #include "llvm/ADT/SetVector.h"
15 #include "llvm/ADT/SmallVector.h"
16 #include "llvm/ADT/Statistic.h"
17 #include "llvm/ADT/StringExtras.h"
18 #include "llvm/ADT/StringRef.h"
19 #include "llvm/ADT/Twine.h"
20 #include "llvm/Analysis/CaptureTracking.h"
21 #include "llvm/Analysis/ValueTracking.h"
22 #include "llvm/IR/Constant.h"
23 #include "llvm/IR/DerivedTypes.h"
24 #include "llvm/IR/Function.h"
25 #include "llvm/IR/GlobalValue.h"
26 #include "llvm/IR/GlobalVariable.h"
27 #include "llvm/IR/IRBuilder.h"
28 #include "llvm/IR/Instruction.h"
29 #include "llvm/IR/Instructions.h"
30 #include "llvm/IR/LLVMContext.h"
31 #include "llvm/IR/MDBuilder.h"
32 #include "llvm/IR/Metadata.h"
33 #include "llvm/IR/Module.h"
34 #include "llvm/IR/Type.h"
35 #include "llvm/IR/Value.h"
36 #include "llvm/ProfileData/InstrProf.h"
37 #include "llvm/Support/Allocator.h"
38 #include "llvm/Support/CommandLine.h"
39 #include "llvm/Support/SpecialCaseList.h"
40 #include "llvm/Support/StringSaver.h"
41 #include "llvm/Support/VirtualFileSystem.h"
42 #include "llvm/TargetParser/Triple.h"
43 #include "llvm/Transforms/Utils/ModuleUtils.h"
51 #define DEBUG_TYPE "sanmd"
55 //===--- Constants --------------------------------------------------------===//
57 constexpr uint32_t kVersionBase
= 2; // occupies lower 16 bits
58 constexpr uint32_t kVersionPtrSizeRel
= (1u << 16); // offsets are pointer-sized
59 constexpr int kCtorDtorPriority
= 2;
61 // Pairs of names of initialization callback functions and which section
62 // contains the relevant metadata.
65 const StringRef FunctionPrefix
;
66 const StringRef SectionSuffix
;
68 static const MetadataInfo Covered
;
69 static const MetadataInfo Atomics
;
72 // Forbid construction elsewhere.
73 explicit constexpr MetadataInfo(StringRef FunctionPrefix
,
74 StringRef SectionSuffix
)
75 : FunctionPrefix(FunctionPrefix
), SectionSuffix(SectionSuffix
) {}
77 const MetadataInfo
MetadataInfo::Covered
{
78 "__sanitizer_metadata_covered", kSanitizerBinaryMetadataCoveredSection
};
79 const MetadataInfo
MetadataInfo::Atomics
{
80 "__sanitizer_metadata_atomics", kSanitizerBinaryMetadataAtomicsSection
};
82 // The only instances of MetadataInfo are the constants above, so a set of
83 // them may simply store pointers to them. To deterministically generate code,
84 // we need to use a set with stable iteration order, such as SetVector.
85 using MetadataInfoSet
= SetVector
<const MetadataInfo
*>;
87 //===--- Command-line options ---------------------------------------------===//
89 cl::opt
<bool> ClWeakCallbacks(
90 "sanitizer-metadata-weak-callbacks",
91 cl::desc("Declare callbacks extern weak, and only call if non-null."),
92 cl::Hidden
, cl::init(true));
94 ClNoSanitize("sanitizer-metadata-nosanitize-attr",
95 cl::desc("Mark some metadata features uncovered in functions "
96 "with associated no_sanitize attributes."),
97 cl::Hidden
, cl::init(true));
99 cl::opt
<bool> ClEmitCovered("sanitizer-metadata-covered",
100 cl::desc("Emit PCs for covered functions."),
101 cl::Hidden
, cl::init(false));
102 cl::opt
<bool> ClEmitAtomics("sanitizer-metadata-atomics",
103 cl::desc("Emit PCs for atomic operations."),
104 cl::Hidden
, cl::init(false));
105 cl::opt
<bool> ClEmitUAR("sanitizer-metadata-uar",
106 cl::desc("Emit PCs for start of functions that are "
107 "subject for use-after-return checking"),
108 cl::Hidden
, cl::init(false));
110 //===--- Statistics -------------------------------------------------------===//
112 STATISTIC(NumMetadataCovered
, "Metadata attached to covered functions");
113 STATISTIC(NumMetadataAtomics
, "Metadata attached to atomics");
114 STATISTIC(NumMetadataUAR
, "Metadata attached to UAR functions");
116 //===----------------------------------------------------------------------===//
118 // Apply opt overrides.
119 SanitizerBinaryMetadataOptions
&&
120 transformOptionsFromCl(SanitizerBinaryMetadataOptions
&&Opts
) {
121 Opts
.Covered
|= ClEmitCovered
;
122 Opts
.Atomics
|= ClEmitAtomics
;
123 Opts
.UAR
|= ClEmitUAR
;
124 return std::move(Opts
);
127 class SanitizerBinaryMetadata
{
129 SanitizerBinaryMetadata(Module
&M
, SanitizerBinaryMetadataOptions Opts
,
130 std::unique_ptr
<SpecialCaseList
> Ignorelist
)
131 : Mod(M
), Options(transformOptionsFromCl(std::move(Opts
))),
132 Ignorelist(std::move(Ignorelist
)), TargetTriple(M
.getTargetTriple()),
133 VersionStr(utostr(getVersion())), IRB(M
.getContext()) {
134 // FIXME: Make it work with other formats.
135 assert(TargetTriple
.isOSBinFormatELF() && "ELF only");
136 assert(!(TargetTriple
.isNVPTX() || TargetTriple
.isAMDGPU()) &&
137 "Device targets are not supported");
143 uint32_t getVersion() const {
144 uint32_t Version
= kVersionBase
;
145 const auto CM
= Mod
.getCodeModel();
146 if (CM
.has_value() && (*CM
== CodeModel::Medium
|| *CM
== CodeModel::Large
))
147 Version
|= kVersionPtrSizeRel
;
151 void runOn(Function
&F
, MetadataInfoSet
&MIS
);
153 // Determines which set of metadata to collect for this instruction.
155 // Returns true if covered metadata is required to unambiguously interpret
156 // other metadata. For example, if we are interested in atomics metadata, any
157 // function with memory operations (atomic or not) requires covered metadata
158 // to determine if a memory operation is atomic or not in modules compiled
159 // with SanitizerBinaryMetadata.
160 bool runOn(Instruction
&I
, MetadataInfoSet
&MIS
, MDBuilder
&MDB
,
161 uint64_t &FeatureMask
);
163 // Get start/end section marker pointer.
164 GlobalVariable
*getSectionMarker(const Twine
&MarkerName
, Type
*Ty
);
166 // Returns the target-dependent section name.
167 StringRef
getSectionName(StringRef SectionSuffix
);
169 // Returns the section start marker name.
170 StringRef
getSectionStart(StringRef SectionSuffix
);
172 // Returns the section end marker name.
173 StringRef
getSectionEnd(StringRef SectionSuffix
);
175 // Returns true if the access to the address should be considered "atomic".
176 bool pretendAtomicAccess(const Value
*Addr
);
179 const SanitizerBinaryMetadataOptions Options
;
180 std::unique_ptr
<SpecialCaseList
> Ignorelist
;
181 const Triple TargetTriple
;
182 const std::string VersionStr
;
184 BumpPtrAllocator Alloc
;
185 UniqueStringSaver StringPool
{Alloc
};
188 bool SanitizerBinaryMetadata::run() {
191 for (Function
&F
: Mod
)
198 // Setup constructors and call all initialization functions for requested
199 // metadata features.
202 auto *PtrTy
= IRB
.getPtrTy();
203 auto *Int32Ty
= IRB
.getInt32Ty();
204 const std::array
<Type
*, 3> InitTypes
= {Int32Ty
, PtrTy
, PtrTy
};
205 auto *Version
= ConstantInt::get(Int32Ty
, getVersion());
207 for (const MetadataInfo
*MI
: MIS
) {
208 const std::array
<Value
*, InitTypes
.size()> InitArgs
= {
210 getSectionMarker(getSectionStart(MI
->SectionSuffix
), PtrTy
),
211 getSectionMarker(getSectionEnd(MI
->SectionSuffix
), PtrTy
),
214 // Calls to the initialization functions with different versions cannot be
215 // merged. Give the structors unique names based on the version, which will
216 // also be used as the COMDAT key.
217 const std::string StructorPrefix
= (MI
->FunctionPrefix
+ VersionStr
).str();
219 // We declare the _add and _del functions as weak, and only call them if
220 // there is a valid symbol linked. This allows building binaries with
221 // semantic metadata, but without having callbacks. When a tool that wants
222 // the metadata is linked which provides the callbacks, they will be called.
224 createSanitizerCtorAndInitFunctions(
225 Mod
, StructorPrefix
+ ".module_ctor",
226 (MI
->FunctionPrefix
+ "_add").str(), InitTypes
, InitArgs
,
227 /*VersionCheckName=*/StringRef(), /*Weak=*/ClWeakCallbacks
)
230 createSanitizerCtorAndInitFunctions(
231 Mod
, StructorPrefix
+ ".module_dtor",
232 (MI
->FunctionPrefix
+ "_del").str(), InitTypes
, InitArgs
,
233 /*VersionCheckName=*/StringRef(), /*Weak=*/ClWeakCallbacks
)
235 Constant
*CtorComdatKey
= nullptr;
236 Constant
*DtorComdatKey
= nullptr;
237 if (TargetTriple
.supportsCOMDAT()) {
238 // Use COMDAT to deduplicate constructor/destructor function. The COMDAT
239 // key needs to be a non-local linkage.
240 Ctor
->setComdat(Mod
.getOrInsertComdat(Ctor
->getName()));
241 Dtor
->setComdat(Mod
.getOrInsertComdat(Dtor
->getName()));
242 Ctor
->setLinkage(GlobalValue::ExternalLinkage
);
243 Dtor
->setLinkage(GlobalValue::ExternalLinkage
);
244 // DSOs should _not_ call another constructor/destructor!
245 Ctor
->setVisibility(GlobalValue::HiddenVisibility
);
246 Dtor
->setVisibility(GlobalValue::HiddenVisibility
);
247 CtorComdatKey
= Ctor
;
248 DtorComdatKey
= Dtor
;
250 appendToGlobalCtors(Mod
, Ctor
, kCtorDtorPriority
, CtorComdatKey
);
251 appendToGlobalDtors(Mod
, Dtor
, kCtorDtorPriority
, DtorComdatKey
);
257 void SanitizerBinaryMetadata::runOn(Function
&F
, MetadataInfoSet
&MIS
) {
260 // Do not apply any instrumentation for naked functions.
261 if (F
.hasFnAttribute(Attribute::Naked
))
263 if (F
.hasFnAttribute(Attribute::DisableSanitizerInstrumentation
))
265 if (Ignorelist
&& Ignorelist
->inSection("metadata", "fun", F
.getName()))
267 // Don't touch available_externally functions, their actual body is elsewhere.
268 if (F
.getLinkage() == GlobalValue::AvailableExternallyLinkage
)
271 MDBuilder
MDB(F
.getContext());
273 // The metadata features enabled for this function, stored along covered
274 // metadata (if enabled).
275 uint64_t FeatureMask
= 0;
276 // Don't emit unnecessary covered metadata for all functions to save space.
277 bool RequiresCovered
= false;
279 if (Options
.Atomics
|| Options
.UAR
) {
280 for (BasicBlock
&BB
: F
)
281 for (Instruction
&I
: BB
)
282 RequiresCovered
|= runOn(I
, MIS
, MDB
, FeatureMask
);
285 if (ClNoSanitize
&& F
.hasFnAttribute("no_sanitize_thread"))
286 FeatureMask
&= ~kSanitizerBinaryMetadataAtomics
;
288 FeatureMask
&= ~kSanitizerBinaryMetadataUAR
;
289 if (FeatureMask
& kSanitizerBinaryMetadataUAR
) {
290 RequiresCovered
= true;
294 // Covered metadata is always emitted if explicitly requested, otherwise only
295 // if some other metadata requires it to unambiguously interpret it for
296 // modules compiled with SanitizerBinaryMetadata.
297 if (Options
.Covered
|| (FeatureMask
&& RequiresCovered
)) {
298 NumMetadataCovered
++;
299 const auto *MI
= &MetadataInfo::Covered
;
301 const StringRef Section
= getSectionName(MI
->SectionSuffix
);
302 // The feature mask will be placed after the function size.
303 Constant
*CFM
= IRB
.getInt64(FeatureMask
);
304 F
.setMetadata(LLVMContext::MD_pcsections
,
305 MDB
.createPCSections({{Section
, {CFM
}}}));
309 bool isUARSafeCall(CallInst
*CI
) {
310 auto *F
= CI
->getCalledFunction();
311 // There are no intrinsic functions that leak arguments.
312 // If the called function does not return, the current function
313 // does not return as well, so no possibility of use-after-return.
314 // Sanitizer function also don't leak or don't return.
315 // It's safe to both pass pointers to local variables to them
316 // and to tail-call them.
317 return F
&& (F
->isIntrinsic() || F
->doesNotReturn() ||
318 F
->getName().starts_with("__asan_") ||
319 F
->getName().starts_with("__hwsan_") ||
320 F
->getName().starts_with("__ubsan_") ||
321 F
->getName().starts_with("__msan_") ||
322 F
->getName().starts_with("__tsan_"));
325 bool hasUseAfterReturnUnsafeUses(Value
&V
) {
326 for (User
*U
: V
.users()) {
327 if (auto *I
= dyn_cast
<Instruction
>(U
)) {
328 if (I
->isLifetimeStartOrEnd() || I
->isDroppable())
330 if (auto *CI
= dyn_cast
<CallInst
>(U
)) {
331 if (isUARSafeCall(CI
))
334 if (isa
<LoadInst
>(U
))
336 if (auto *SI
= dyn_cast
<StoreInst
>(U
)) {
337 // If storing TO the alloca, then the address isn't taken.
338 if (SI
->getOperand(1) == &V
)
341 if (auto *GEPI
= dyn_cast
<GetElementPtrInst
>(U
)) {
342 if (!hasUseAfterReturnUnsafeUses(*GEPI
))
344 } else if (auto *BCI
= dyn_cast
<BitCastInst
>(U
)) {
345 if (!hasUseAfterReturnUnsafeUses(*BCI
))
354 bool useAfterReturnUnsafe(Instruction
&I
) {
355 if (isa
<AllocaInst
>(I
))
356 return hasUseAfterReturnUnsafeUses(I
);
357 // Tail-called functions are not necessary intercepted
358 // at runtime because there is no call instruction.
359 // So conservatively mark the caller as requiring checking.
360 else if (auto *CI
= dyn_cast
<CallInst
>(&I
))
361 return CI
->isTailCall() && !isUARSafeCall(CI
);
365 bool SanitizerBinaryMetadata::pretendAtomicAccess(const Value
*Addr
) {
369 Addr
= Addr
->stripInBoundsOffsets();
370 auto *GV
= dyn_cast
<GlobalVariable
>(Addr
);
374 // Some compiler-generated accesses are known racy, to avoid false positives
375 // in data-race analysis pretend they're atomic.
376 if (GV
->hasSection()) {
377 const auto OF
= Triple(Mod
.getTargetTriple()).getObjectFormat();
379 getInstrProfSectionName(IPSK_cnts
, OF
, /*AddSegmentInfo=*/false);
380 if (GV
->getSection().ends_with(ProfSec
))
383 if (GV
->getName().starts_with("__llvm_gcov") ||
384 GV
->getName().starts_with("__llvm_gcda"))
390 // Returns true if the memory at `Addr` may be shared with other threads.
391 bool maybeSharedMutable(const Value
*Addr
) {
392 // By default assume memory may be shared.
396 if (isa
<AllocaInst
>(getUnderlyingObject(Addr
)) &&
397 !PointerMayBeCaptured(Addr
, true, true))
398 return false; // Object is on stack but does not escape.
400 Addr
= Addr
->stripInBoundsOffsets();
401 if (auto *GV
= dyn_cast
<GlobalVariable
>(Addr
)) {
402 if (GV
->isConstant())
403 return false; // Shared, but not mutable.
409 bool SanitizerBinaryMetadata::runOn(Instruction
&I
, MetadataInfoSet
&MIS
,
410 MDBuilder
&MDB
, uint64_t &FeatureMask
) {
411 SmallVector
<const MetadataInfo
*, 1> InstMetadata
;
412 bool RequiresCovered
= false;
414 // Only call if at least 1 type of metadata is requested.
415 assert(Options
.UAR
|| Options
.Atomics
);
417 if (Options
.UAR
&& !(FeatureMask
& kSanitizerBinaryMetadataUAR
)) {
418 if (useAfterReturnUnsafe(I
))
419 FeatureMask
|= kSanitizerBinaryMetadataUAR
;
422 if (Options
.Atomics
) {
423 const Value
*Addr
= nullptr;
424 if (auto *SI
= dyn_cast
<StoreInst
>(&I
))
425 Addr
= SI
->getPointerOperand();
426 else if (auto *LI
= dyn_cast
<LoadInst
>(&I
))
427 Addr
= LI
->getPointerOperand();
429 if (I
.mayReadOrWriteMemory() && maybeSharedMutable(Addr
)) {
430 auto SSID
= getAtomicSyncScopeID(&I
);
431 if ((SSID
.has_value() && *SSID
!= SyncScope::SingleThread
) ||
432 pretendAtomicAccess(Addr
)) {
433 NumMetadataAtomics
++;
434 InstMetadata
.push_back(&MetadataInfo::Atomics
);
436 FeatureMask
|= kSanitizerBinaryMetadataAtomics
;
437 RequiresCovered
= true;
441 // Attach MD_pcsections to instruction.
442 if (!InstMetadata
.empty()) {
443 MIS
.insert(InstMetadata
.begin(), InstMetadata
.end());
444 SmallVector
<MDBuilder::PCSection
, 1> Sections
;
445 for (const auto &MI
: InstMetadata
)
446 Sections
.push_back({getSectionName(MI
->SectionSuffix
), {}});
447 I
.setMetadata(LLVMContext::MD_pcsections
, MDB
.createPCSections(Sections
));
450 return RequiresCovered
;
454 SanitizerBinaryMetadata::getSectionMarker(const Twine
&MarkerName
, Type
*Ty
) {
455 // Use ExternalWeak so that if all sections are discarded due to section
456 // garbage collection, the linker will not report undefined symbol errors.
457 auto *Marker
= new GlobalVariable(Mod
, Ty
, /*isConstant=*/false,
458 GlobalVariable::ExternalWeakLinkage
,
459 /*Initializer=*/nullptr, MarkerName
);
460 Marker
->setVisibility(GlobalValue::HiddenVisibility
);
464 StringRef
SanitizerBinaryMetadata::getSectionName(StringRef SectionSuffix
) {
465 // FIXME: Other TargetTriples.
466 // Request ULEB128 encoding for all integer constants.
467 return StringPool
.save(SectionSuffix
+ VersionStr
+ "!C");
470 StringRef
SanitizerBinaryMetadata::getSectionStart(StringRef SectionSuffix
) {
471 // Twine only concatenates 2 strings; with >2 strings, concatenating them
472 // creates Twine temporaries, and returning the final Twine no longer works
473 // because we'd end up with a stack-use-after-return. So here we also use the
474 // StringPool to store the new string.
475 return StringPool
.save("__start_" + SectionSuffix
+ VersionStr
);
478 StringRef
SanitizerBinaryMetadata::getSectionEnd(StringRef SectionSuffix
) {
479 return StringPool
.save("__stop_" + SectionSuffix
+ VersionStr
);
484 SanitizerBinaryMetadataPass::SanitizerBinaryMetadataPass(
485 SanitizerBinaryMetadataOptions Opts
, ArrayRef
<std::string
> IgnorelistFiles
)
486 : Options(std::move(Opts
)), IgnorelistFiles(std::move(IgnorelistFiles
)) {}
489 SanitizerBinaryMetadataPass::run(Module
&M
, AnalysisManager
<Module
> &AM
) {
490 std::unique_ptr
<SpecialCaseList
> Ignorelist
;
491 if (!IgnorelistFiles
.empty()) {
492 Ignorelist
= SpecialCaseList::createOrDie(IgnorelistFiles
,
493 *vfs::getRealFileSystem());
494 if (Ignorelist
->inSection("metadata", "src", M
.getSourceFileName()))
495 return PreservedAnalyses::all();
498 SanitizerBinaryMetadata
Pass(M
, Options
, std::move(Ignorelist
));
500 return PreservedAnalyses::none();
501 return PreservedAnalyses::all();