[ProfileData] Avoid repeated hash lookups (NFC) (#125464)
[llvm-project.git] / llvm / lib / Transforms / Instrumentation / HWAddressSanitizer.cpp
blob645c1027526922fc35bb234ae278a448d2344634
1 //===- HWAddressSanitizer.cpp - memory access error detector --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This file is a part of HWAddressSanitizer, an address basic correctness
11 /// checker based on tagged addressing.
12 //===----------------------------------------------------------------------===//
14 #include "llvm/Transforms/Instrumentation/HWAddressSanitizer.h"
15 #include "llvm/ADT/MapVector.h"
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/Statistic.h"
19 #include "llvm/ADT/StringExtras.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/Analysis/BlockFrequencyInfo.h"
22 #include "llvm/Analysis/DomTreeUpdater.h"
23 #include "llvm/Analysis/GlobalsModRef.h"
24 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
25 #include "llvm/Analysis/PostDominators.h"
26 #include "llvm/Analysis/ProfileSummaryInfo.h"
27 #include "llvm/Analysis/StackSafetyAnalysis.h"
28 #include "llvm/Analysis/TargetLibraryInfo.h"
29 #include "llvm/Analysis/ValueTracking.h"
30 #include "llvm/BinaryFormat/Dwarf.h"
31 #include "llvm/BinaryFormat/ELF.h"
32 #include "llvm/IR/Attributes.h"
33 #include "llvm/IR/BasicBlock.h"
34 #include "llvm/IR/Constant.h"
35 #include "llvm/IR/Constants.h"
36 #include "llvm/IR/DataLayout.h"
37 #include "llvm/IR/DerivedTypes.h"
38 #include "llvm/IR/Dominators.h"
39 #include "llvm/IR/Function.h"
40 #include "llvm/IR/IRBuilder.h"
41 #include "llvm/IR/InlineAsm.h"
42 #include "llvm/IR/InstIterator.h"
43 #include "llvm/IR/Instruction.h"
44 #include "llvm/IR/Instructions.h"
45 #include "llvm/IR/IntrinsicInst.h"
46 #include "llvm/IR/Intrinsics.h"
47 #include "llvm/IR/LLVMContext.h"
48 #include "llvm/IR/MDBuilder.h"
49 #include "llvm/IR/Module.h"
50 #include "llvm/IR/Type.h"
51 #include "llvm/IR/Value.h"
52 #include "llvm/Support/Casting.h"
53 #include "llvm/Support/CommandLine.h"
54 #include "llvm/Support/Debug.h"
55 #include "llvm/Support/MD5.h"
56 #include "llvm/Support/RandomNumberGenerator.h"
57 #include "llvm/Support/raw_ostream.h"
58 #include "llvm/TargetParser/Triple.h"
59 #include "llvm/Transforms/Instrumentation/AddressSanitizerCommon.h"
60 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
61 #include "llvm/Transforms/Utils/Instrumentation.h"
62 #include "llvm/Transforms/Utils/Local.h"
63 #include "llvm/Transforms/Utils/MemoryTaggingSupport.h"
64 #include "llvm/Transforms/Utils/ModuleUtils.h"
65 #include "llvm/Transforms/Utils/PromoteMemToReg.h"
66 #include <optional>
67 #include <random>
69 using namespace llvm;
71 #define DEBUG_TYPE "hwasan"
73 const char kHwasanModuleCtorName[] = "hwasan.module_ctor";
74 const char kHwasanNoteName[] = "hwasan.note";
75 const char kHwasanInitName[] = "__hwasan_init";
76 const char kHwasanPersonalityThunkName[] = "__hwasan_personality_thunk";
78 const char kHwasanShadowMemoryDynamicAddress[] =
79 "__hwasan_shadow_memory_dynamic_address";
81 // Accesses sizes are powers of two: 1, 2, 4, 8, 16.
82 static const size_t kNumberOfAccessSizes = 5;
84 static const size_t kDefaultShadowScale = 4;
86 static const unsigned kShadowBaseAlignment = 32;
88 namespace {
89 enum class OffsetKind {
90 kFixed = 0,
91 kGlobal,
92 kIfunc,
93 kTls,
97 static cl::opt<std::string>
98 ClMemoryAccessCallbackPrefix("hwasan-memory-access-callback-prefix",
99 cl::desc("Prefix for memory access callbacks"),
100 cl::Hidden, cl::init("__hwasan_"));
102 static cl::opt<bool> ClKasanMemIntrinCallbackPrefix(
103 "hwasan-kernel-mem-intrinsic-prefix",
104 cl::desc("Use prefix for memory intrinsics in KASAN mode"), cl::Hidden,
105 cl::init(false));
107 static cl::opt<bool> ClInstrumentWithCalls(
108 "hwasan-instrument-with-calls",
109 cl::desc("instrument reads and writes with callbacks"), cl::Hidden,
110 cl::init(false));
112 static cl::opt<bool> ClInstrumentReads("hwasan-instrument-reads",
113 cl::desc("instrument read instructions"),
114 cl::Hidden, cl::init(true));
116 static cl::opt<bool>
117 ClInstrumentWrites("hwasan-instrument-writes",
118 cl::desc("instrument write instructions"), cl::Hidden,
119 cl::init(true));
121 static cl::opt<bool> ClInstrumentAtomics(
122 "hwasan-instrument-atomics",
123 cl::desc("instrument atomic instructions (rmw, cmpxchg)"), cl::Hidden,
124 cl::init(true));
126 static cl::opt<bool> ClInstrumentByval("hwasan-instrument-byval",
127 cl::desc("instrument byval arguments"),
128 cl::Hidden, cl::init(true));
130 static cl::opt<bool>
131 ClRecover("hwasan-recover",
132 cl::desc("Enable recovery mode (continue-after-error)."),
133 cl::Hidden, cl::init(false));
135 static cl::opt<bool> ClInstrumentStack("hwasan-instrument-stack",
136 cl::desc("instrument stack (allocas)"),
137 cl::Hidden, cl::init(true));
139 static cl::opt<bool>
140 ClUseStackSafety("hwasan-use-stack-safety", cl::Hidden, cl::init(true),
141 cl::Hidden, cl::desc("Use Stack Safety analysis results"),
142 cl::Optional);
144 static cl::opt<size_t> ClMaxLifetimes(
145 "hwasan-max-lifetimes-for-alloca", cl::Hidden, cl::init(3),
146 cl::ReallyHidden,
147 cl::desc("How many lifetime ends to handle for a single alloca."),
148 cl::Optional);
150 static cl::opt<bool>
151 ClUseAfterScope("hwasan-use-after-scope",
152 cl::desc("detect use after scope within function"),
153 cl::Hidden, cl::init(true));
155 static cl::opt<bool> ClGenerateTagsWithCalls(
156 "hwasan-generate-tags-with-calls",
157 cl::desc("generate new tags with runtime library calls"), cl::Hidden,
158 cl::init(false));
160 static cl::opt<bool> ClGlobals("hwasan-globals", cl::desc("Instrument globals"),
161 cl::Hidden, cl::init(false));
163 static cl::opt<int> ClMatchAllTag(
164 "hwasan-match-all-tag",
165 cl::desc("don't report bad accesses via pointers with this tag"),
166 cl::Hidden, cl::init(-1));
168 static cl::opt<bool>
169 ClEnableKhwasan("hwasan-kernel",
170 cl::desc("Enable KernelHWAddressSanitizer instrumentation"),
171 cl::Hidden, cl::init(false));
173 // These flags allow to change the shadow mapping and control how shadow memory
174 // is accessed. The shadow mapping looks like:
175 // Shadow = (Mem >> scale) + offset
177 static cl::opt<uint64_t>
178 ClMappingOffset("hwasan-mapping-offset",
179 cl::desc("HWASan shadow mapping offset [EXPERIMENTAL]"),
180 cl::Hidden);
182 static cl::opt<OffsetKind> ClMappingOffsetDynamic(
183 "hwasan-mapping-offset-dynamic",
184 cl::desc("HWASan shadow mapping dynamic offset location"), cl::Hidden,
185 cl::values(clEnumValN(OffsetKind::kGlobal, "global", "Use global"),
186 clEnumValN(OffsetKind::kIfunc, "ifunc", "Use ifunc global"),
187 clEnumValN(OffsetKind::kTls, "tls", "Use TLS")));
189 static cl::opt<bool>
190 ClFrameRecords("hwasan-with-frame-record",
191 cl::desc("Use ring buffer for stack allocations"),
192 cl::Hidden);
194 static cl::opt<int> ClHotPercentileCutoff("hwasan-percentile-cutoff-hot",
195 cl::desc("Hot percentile cutoff."));
197 static cl::opt<float>
198 ClRandomSkipRate("hwasan-random-rate",
199 cl::desc("Probability value in the range [0.0, 1.0] "
200 "to keep instrumentation of a function."));
202 STATISTIC(NumTotalFuncs, "Number of total funcs");
203 STATISTIC(NumInstrumentedFuncs, "Number of instrumented funcs");
204 STATISTIC(NumNoProfileSummaryFuncs, "Number of funcs without PS");
206 // Mode for selecting how to insert frame record info into the stack ring
207 // buffer.
208 enum RecordStackHistoryMode {
209 // Do not record frame record info.
210 none,
212 // Insert instructions into the prologue for storing into the stack ring
213 // buffer directly.
214 instr,
216 // Add a call to __hwasan_add_frame_record in the runtime.
217 libcall,
220 static cl::opt<RecordStackHistoryMode> ClRecordStackHistory(
221 "hwasan-record-stack-history",
222 cl::desc("Record stack frames with tagged allocations in a thread-local "
223 "ring buffer"),
224 cl::values(clEnumVal(none, "Do not record stack ring history"),
225 clEnumVal(instr, "Insert instructions into the prologue for "
226 "storing into the stack ring buffer directly"),
227 clEnumVal(libcall, "Add a call to __hwasan_add_frame_record for "
228 "storing into the stack ring buffer")),
229 cl::Hidden, cl::init(instr));
231 static cl::opt<bool>
232 ClInstrumentMemIntrinsics("hwasan-instrument-mem-intrinsics",
233 cl::desc("instrument memory intrinsics"),
234 cl::Hidden, cl::init(true));
236 static cl::opt<bool>
237 ClInstrumentLandingPads("hwasan-instrument-landing-pads",
238 cl::desc("instrument landing pads"), cl::Hidden,
239 cl::init(false));
241 static cl::opt<bool> ClUseShortGranules(
242 "hwasan-use-short-granules",
243 cl::desc("use short granules in allocas and outlined checks"), cl::Hidden,
244 cl::init(false));
246 static cl::opt<bool> ClInstrumentPersonalityFunctions(
247 "hwasan-instrument-personality-functions",
248 cl::desc("instrument personality functions"), cl::Hidden);
250 static cl::opt<bool> ClInlineAllChecks("hwasan-inline-all-checks",
251 cl::desc("inline all checks"),
252 cl::Hidden, cl::init(false));
254 static cl::opt<bool> ClInlineFastPathChecks("hwasan-inline-fast-path-checks",
255 cl::desc("inline all checks"),
256 cl::Hidden, cl::init(false));
258 // Enabled from clang by "-fsanitize-hwaddress-experimental-aliasing".
259 static cl::opt<bool> ClUsePageAliases("hwasan-experimental-use-page-aliases",
260 cl::desc("Use page aliasing in HWASan"),
261 cl::Hidden, cl::init(false));
263 namespace {
265 template <typename T> T optOr(cl::opt<T> &Opt, T Other) {
266 return Opt.getNumOccurrences() ? Opt : Other;
269 bool shouldUsePageAliases(const Triple &TargetTriple) {
270 return ClUsePageAliases && TargetTriple.getArch() == Triple::x86_64;
273 bool shouldInstrumentStack(const Triple &TargetTriple) {
274 return !shouldUsePageAliases(TargetTriple) && ClInstrumentStack;
277 bool shouldInstrumentWithCalls(const Triple &TargetTriple) {
278 return optOr(ClInstrumentWithCalls, TargetTriple.getArch() == Triple::x86_64);
281 bool mightUseStackSafetyAnalysis(bool DisableOptimization) {
282 return optOr(ClUseStackSafety, !DisableOptimization);
285 bool shouldUseStackSafetyAnalysis(const Triple &TargetTriple,
286 bool DisableOptimization) {
287 return shouldInstrumentStack(TargetTriple) &&
288 mightUseStackSafetyAnalysis(DisableOptimization);
291 bool shouldDetectUseAfterScope(const Triple &TargetTriple) {
292 return ClUseAfterScope && shouldInstrumentStack(TargetTriple);
295 /// An instrumentation pass implementing detection of addressability bugs
296 /// using tagged pointers.
297 class HWAddressSanitizer {
298 public:
299 HWAddressSanitizer(Module &M, bool CompileKernel, bool Recover,
300 const StackSafetyGlobalInfo *SSI)
301 : M(M), SSI(SSI) {
302 this->Recover = optOr(ClRecover, Recover);
303 this->CompileKernel = optOr(ClEnableKhwasan, CompileKernel);
304 this->Rng = ClRandomSkipRate.getNumOccurrences() ? M.createRNG(DEBUG_TYPE)
305 : nullptr;
307 initializeModule();
310 void sanitizeFunction(Function &F, FunctionAnalysisManager &FAM);
312 private:
313 struct ShadowTagCheckInfo {
314 Instruction *TagMismatchTerm = nullptr;
315 Value *PtrLong = nullptr;
316 Value *AddrLong = nullptr;
317 Value *PtrTag = nullptr;
318 Value *MemTag = nullptr;
321 bool selectiveInstrumentationShouldSkip(Function &F,
322 FunctionAnalysisManager &FAM) const;
323 void initializeModule();
324 void createHwasanCtorComdat();
325 void removeFnAttributes(Function *F);
327 void initializeCallbacks(Module &M);
329 Value *getOpaqueNoopCast(IRBuilder<> &IRB, Value *Val);
331 Value *getDynamicShadowIfunc(IRBuilder<> &IRB);
332 Value *getShadowNonTls(IRBuilder<> &IRB);
334 void untagPointerOperand(Instruction *I, Value *Addr);
335 Value *memToShadow(Value *Shadow, IRBuilder<> &IRB);
337 int64_t getAccessInfo(bool IsWrite, unsigned AccessSizeIndex);
338 ShadowTagCheckInfo insertShadowTagCheck(Value *Ptr, Instruction *InsertBefore,
339 DomTreeUpdater &DTU, LoopInfo *LI);
340 void instrumentMemAccessOutline(Value *Ptr, bool IsWrite,
341 unsigned AccessSizeIndex,
342 Instruction *InsertBefore,
343 DomTreeUpdater &DTU, LoopInfo *LI);
344 void instrumentMemAccessInline(Value *Ptr, bool IsWrite,
345 unsigned AccessSizeIndex,
346 Instruction *InsertBefore, DomTreeUpdater &DTU,
347 LoopInfo *LI);
348 bool ignoreMemIntrinsic(OptimizationRemarkEmitter &ORE, MemIntrinsic *MI);
349 void instrumentMemIntrinsic(MemIntrinsic *MI);
350 bool instrumentMemAccess(InterestingMemoryOperand &O, DomTreeUpdater &DTU,
351 LoopInfo *LI, const DataLayout &DL);
352 bool ignoreAccessWithoutRemark(Instruction *Inst, Value *Ptr);
353 bool ignoreAccess(OptimizationRemarkEmitter &ORE, Instruction *Inst,
354 Value *Ptr);
356 void getInterestingMemoryOperands(
357 OptimizationRemarkEmitter &ORE, Instruction *I,
358 const TargetLibraryInfo &TLI,
359 SmallVectorImpl<InterestingMemoryOperand> &Interesting);
361 void tagAlloca(IRBuilder<> &IRB, AllocaInst *AI, Value *Tag, size_t Size);
362 Value *tagPointer(IRBuilder<> &IRB, Type *Ty, Value *PtrLong, Value *Tag);
363 Value *untagPointer(IRBuilder<> &IRB, Value *PtrLong);
364 bool instrumentStack(memtag::StackInfo &Info, Value *StackTag, Value *UARTag,
365 const DominatorTree &DT, const PostDominatorTree &PDT,
366 const LoopInfo &LI);
367 bool instrumentLandingPads(SmallVectorImpl<Instruction *> &RetVec);
368 Value *getNextTagWithCall(IRBuilder<> &IRB);
369 Value *getStackBaseTag(IRBuilder<> &IRB);
370 Value *getAllocaTag(IRBuilder<> &IRB, Value *StackTag, unsigned AllocaNo);
371 Value *getUARTag(IRBuilder<> &IRB);
373 Value *getHwasanThreadSlotPtr(IRBuilder<> &IRB);
374 Value *applyTagMask(IRBuilder<> &IRB, Value *OldTag);
375 unsigned retagMask(unsigned AllocaNo);
377 void emitPrologue(IRBuilder<> &IRB, bool WithFrameRecord);
379 void instrumentGlobal(GlobalVariable *GV, uint8_t Tag);
380 void instrumentGlobals();
382 Value *getCachedFP(IRBuilder<> &IRB);
383 Value *getFrameRecordInfo(IRBuilder<> &IRB);
385 void instrumentPersonalityFunctions();
387 LLVMContext *C;
388 Module &M;
389 const StackSafetyGlobalInfo *SSI;
390 Triple TargetTriple;
391 std::unique_ptr<RandomNumberGenerator> Rng;
393 /// This struct defines the shadow mapping using the rule:
394 /// If `kFixed`, then
395 /// shadow = (mem >> Scale) + Offset.
396 /// If `kGlobal`, then
397 /// extern char* __hwasan_shadow_memory_dynamic_address;
398 /// shadow = (mem >> Scale) + __hwasan_shadow_memory_dynamic_address
399 /// If `kIfunc`, then
400 /// extern char __hwasan_shadow[];
401 /// shadow = (mem >> Scale) + &__hwasan_shadow
402 /// If `kTls`, then
403 /// extern char *__hwasan_tls;
404 /// shadow = (mem>>Scale) + align_up(__hwasan_shadow, kShadowBaseAlignment)
406 /// If WithFrameRecord is true, then __hwasan_tls will be used to access the
407 /// ring buffer for storing stack allocations on targets that support it.
408 class ShadowMapping {
409 OffsetKind Kind;
410 uint64_t Offset;
411 uint8_t Scale;
412 bool WithFrameRecord;
414 void SetFixed(uint64_t O) {
415 Kind = OffsetKind::kFixed;
416 Offset = O;
419 public:
420 void init(Triple &TargetTriple, bool InstrumentWithCalls);
421 Align getObjectAlignment() const { return Align(1ULL << Scale); }
422 bool isInGlobal() const { return Kind == OffsetKind::kGlobal; }
423 bool isInIfunc() const { return Kind == OffsetKind::kIfunc; }
424 bool isInTls() const { return Kind == OffsetKind::kTls; }
425 bool isFixed() const { return Kind == OffsetKind::kFixed; }
426 uint8_t scale() const { return Scale; };
427 uint64_t offset() const {
428 assert(isFixed());
429 return Offset;
431 bool withFrameRecord() const { return WithFrameRecord; };
434 ShadowMapping Mapping;
436 Type *VoidTy = Type::getVoidTy(M.getContext());
437 Type *IntptrTy = M.getDataLayout().getIntPtrType(M.getContext());
438 PointerType *PtrTy = PointerType::getUnqual(M.getContext());
439 Type *Int8Ty = Type::getInt8Ty(M.getContext());
440 Type *Int32Ty = Type::getInt32Ty(M.getContext());
441 Type *Int64Ty = Type::getInt64Ty(M.getContext());
443 bool CompileKernel;
444 bool Recover;
445 bool OutlinedChecks;
446 bool InlineFastPath;
447 bool UseShortGranules;
448 bool InstrumentLandingPads;
449 bool InstrumentWithCalls;
450 bool InstrumentStack;
451 bool InstrumentGlobals;
452 bool DetectUseAfterScope;
453 bool UsePageAliases;
454 bool UseMatchAllCallback;
456 std::optional<uint8_t> MatchAllTag;
458 unsigned PointerTagShift;
459 uint64_t TagMaskByte;
461 Function *HwasanCtorFunction;
463 FunctionCallee HwasanMemoryAccessCallback[2][kNumberOfAccessSizes];
464 FunctionCallee HwasanMemoryAccessCallbackSized[2];
466 FunctionCallee HwasanMemmove, HwasanMemcpy, HwasanMemset;
467 FunctionCallee HwasanHandleVfork;
469 FunctionCallee HwasanTagMemoryFunc;
470 FunctionCallee HwasanGenerateTagFunc;
471 FunctionCallee HwasanRecordFrameRecordFunc;
473 Constant *ShadowGlobal;
475 Value *ShadowBase = nullptr;
476 Value *StackBaseTag = nullptr;
477 Value *CachedFP = nullptr;
478 GlobalValue *ThreadPtrGlobal = nullptr;
481 } // end anonymous namespace
483 PreservedAnalyses HWAddressSanitizerPass::run(Module &M,
484 ModuleAnalysisManager &MAM) {
485 // Return early if nosanitize_hwaddress module flag is present for the module.
486 if (checkIfAlreadyInstrumented(M, "nosanitize_hwaddress"))
487 return PreservedAnalyses::all();
488 const StackSafetyGlobalInfo *SSI = nullptr;
489 auto TargetTriple = llvm::Triple(M.getTargetTriple());
490 if (shouldUseStackSafetyAnalysis(TargetTriple, Options.DisableOptimization))
491 SSI = &MAM.getResult<StackSafetyGlobalAnalysis>(M);
493 HWAddressSanitizer HWASan(M, Options.CompileKernel, Options.Recover, SSI);
494 auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
495 for (Function &F : M)
496 HWASan.sanitizeFunction(F, FAM);
498 PreservedAnalyses PA = PreservedAnalyses::none();
499 // DominatorTreeAnalysis, PostDominatorTreeAnalysis, and LoopAnalysis
500 // are incrementally updated throughout this pass whenever
501 // SplitBlockAndInsertIfThen is called.
502 PA.preserve<DominatorTreeAnalysis>();
503 PA.preserve<PostDominatorTreeAnalysis>();
504 PA.preserve<LoopAnalysis>();
505 // GlobalsAA is considered stateless and does not get invalidated unless
506 // explicitly invalidated; PreservedAnalyses::none() is not enough. Sanitizers
507 // make changes that require GlobalsAA to be invalidated.
508 PA.abandon<GlobalsAA>();
509 return PA;
511 void HWAddressSanitizerPass::printPipeline(
512 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
513 static_cast<PassInfoMixin<HWAddressSanitizerPass> *>(this)->printPipeline(
514 OS, MapClassName2PassName);
515 OS << '<';
516 if (Options.CompileKernel)
517 OS << "kernel;";
518 if (Options.Recover)
519 OS << "recover";
520 OS << '>';
523 void HWAddressSanitizer::createHwasanCtorComdat() {
524 std::tie(HwasanCtorFunction, std::ignore) =
525 getOrCreateSanitizerCtorAndInitFunctions(
526 M, kHwasanModuleCtorName, kHwasanInitName,
527 /*InitArgTypes=*/{},
528 /*InitArgs=*/{},
529 // This callback is invoked when the functions are created the first
530 // time. Hook them into the global ctors list in that case:
531 [&](Function *Ctor, FunctionCallee) {
532 Comdat *CtorComdat = M.getOrInsertComdat(kHwasanModuleCtorName);
533 Ctor->setComdat(CtorComdat);
534 appendToGlobalCtors(M, Ctor, 0, Ctor);
537 // Create a note that contains pointers to the list of global
538 // descriptors. Adding a note to the output file will cause the linker to
539 // create a PT_NOTE program header pointing to the note that we can use to
540 // find the descriptor list starting from the program headers. A function
541 // provided by the runtime initializes the shadow memory for the globals by
542 // accessing the descriptor list via the note. The dynamic loader needs to
543 // call this function whenever a library is loaded.
545 // The reason why we use a note for this instead of a more conventional
546 // approach of having a global constructor pass a descriptor list pointer to
547 // the runtime is because of an order of initialization problem. With
548 // constructors we can encounter the following problematic scenario:
550 // 1) library A depends on library B and also interposes one of B's symbols
551 // 2) B's constructors are called before A's (as required for correctness)
552 // 3) during construction, B accesses one of its "own" globals (actually
553 // interposed by A) and triggers a HWASAN failure due to the initialization
554 // for A not having happened yet
556 // Even without interposition it is possible to run into similar situations in
557 // cases where two libraries mutually depend on each other.
559 // We only need one note per binary, so put everything for the note in a
560 // comdat. This needs to be a comdat with an .init_array section to prevent
561 // newer versions of lld from discarding the note.
563 // Create the note even if we aren't instrumenting globals. This ensures that
564 // binaries linked from object files with both instrumented and
565 // non-instrumented globals will end up with a note, even if a comdat from an
566 // object file with non-instrumented globals is selected. The note is harmless
567 // if the runtime doesn't support it, since it will just be ignored.
568 Comdat *NoteComdat = M.getOrInsertComdat(kHwasanModuleCtorName);
570 Type *Int8Arr0Ty = ArrayType::get(Int8Ty, 0);
571 auto *Start =
572 new GlobalVariable(M, Int8Arr0Ty, true, GlobalVariable::ExternalLinkage,
573 nullptr, "__start_hwasan_globals");
574 Start->setVisibility(GlobalValue::HiddenVisibility);
575 auto *Stop =
576 new GlobalVariable(M, Int8Arr0Ty, true, GlobalVariable::ExternalLinkage,
577 nullptr, "__stop_hwasan_globals");
578 Stop->setVisibility(GlobalValue::HiddenVisibility);
580 // Null-terminated so actually 8 bytes, which are required in order to align
581 // the note properly.
582 auto *Name = ConstantDataArray::get(*C, "LLVM\0\0\0");
584 auto *NoteTy = StructType::get(Int32Ty, Int32Ty, Int32Ty, Name->getType(),
585 Int32Ty, Int32Ty);
586 auto *Note =
587 new GlobalVariable(M, NoteTy, /*isConstant=*/true,
588 GlobalValue::PrivateLinkage, nullptr, kHwasanNoteName);
589 Note->setSection(".note.hwasan.globals");
590 Note->setComdat(NoteComdat);
591 Note->setAlignment(Align(4));
593 // The pointers in the note need to be relative so that the note ends up being
594 // placed in rodata, which is the standard location for notes.
595 auto CreateRelPtr = [&](Constant *Ptr) {
596 return ConstantExpr::getTrunc(
597 ConstantExpr::getSub(ConstantExpr::getPtrToInt(Ptr, Int64Ty),
598 ConstantExpr::getPtrToInt(Note, Int64Ty)),
599 Int32Ty);
601 Note->setInitializer(ConstantStruct::getAnon(
602 {ConstantInt::get(Int32Ty, 8), // n_namesz
603 ConstantInt::get(Int32Ty, 8), // n_descsz
604 ConstantInt::get(Int32Ty, ELF::NT_LLVM_HWASAN_GLOBALS), // n_type
605 Name, CreateRelPtr(Start), CreateRelPtr(Stop)}));
606 appendToCompilerUsed(M, Note);
608 // Create a zero-length global in hwasan_globals so that the linker will
609 // always create start and stop symbols.
610 auto *Dummy = new GlobalVariable(
611 M, Int8Arr0Ty, /*isConstantGlobal*/ true, GlobalVariable::PrivateLinkage,
612 Constant::getNullValue(Int8Arr0Ty), "hwasan.dummy.global");
613 Dummy->setSection("hwasan_globals");
614 Dummy->setComdat(NoteComdat);
615 Dummy->setMetadata(LLVMContext::MD_associated,
616 MDNode::get(*C, ValueAsMetadata::get(Note)));
617 appendToCompilerUsed(M, Dummy);
620 void HWAddressSanitizer::removeFnAttributes(Function *F) {
621 // Remove memory attributes that are invalid with HWASan.
622 // HWASan checks read from shadow, which invalidates memory(argmem: *)
623 // Short granule checks on function arguments read from the argument memory
624 // (last byte of the granule), which invalidates writeonly.
626 // This is not only true for sanitized functions, because AttrInfer can
627 // infer those attributes on libc functions, which is not true if those
628 // are instrumented (Android) or intercepted.
630 // We might want to model HWASan shadow memory more opaquely to get rid of
631 // this problem altogether, by hiding the shadow memory write in an
632 // intrinsic, essentially like in the AArch64StackTagging pass. But that's
633 // for another day.
635 // The API is weird. `onlyReadsMemory` actually means "does not write", and
636 // `onlyWritesMemory` actually means "does not read". So we reconstruct
637 // "accesses memory" && "does not read" <=> "writes".
638 bool Changed = false;
639 if (!F->doesNotAccessMemory()) {
640 bool WritesMemory = !F->onlyReadsMemory();
641 bool ReadsMemory = !F->onlyWritesMemory();
642 if ((WritesMemory && !ReadsMemory) || F->onlyAccessesArgMemory()) {
643 F->removeFnAttr(Attribute::Memory);
644 Changed = true;
647 for (Argument &A : F->args()) {
648 if (A.hasAttribute(Attribute::WriteOnly)) {
649 A.removeAttr(Attribute::WriteOnly);
650 Changed = true;
653 if (Changed) {
654 // nobuiltin makes sure later passes don't restore assumptions about
655 // the function.
656 F->addFnAttr(Attribute::NoBuiltin);
660 /// Module-level initialization.
662 /// inserts a call to __hwasan_init to the module's constructor list.
663 void HWAddressSanitizer::initializeModule() {
664 LLVM_DEBUG(dbgs() << "Init " << M.getName() << "\n");
665 TargetTriple = Triple(M.getTargetTriple());
667 for (Function &F : M.functions())
668 removeFnAttributes(&F);
670 // x86_64 currently has two modes:
671 // - Intel LAM (default)
672 // - pointer aliasing (heap only)
673 bool IsX86_64 = TargetTriple.getArch() == Triple::x86_64;
674 UsePageAliases = shouldUsePageAliases(TargetTriple);
675 InstrumentWithCalls = shouldInstrumentWithCalls(TargetTriple);
676 InstrumentStack = shouldInstrumentStack(TargetTriple);
677 DetectUseAfterScope = shouldDetectUseAfterScope(TargetTriple);
678 PointerTagShift = IsX86_64 ? 57 : 56;
679 TagMaskByte = IsX86_64 ? 0x3F : 0xFF;
681 Mapping.init(TargetTriple, InstrumentWithCalls);
683 C = &(M.getContext());
684 IRBuilder<> IRB(*C);
686 HwasanCtorFunction = nullptr;
688 // Older versions of Android do not have the required runtime support for
689 // short granules, global or personality function instrumentation. On other
690 // platforms we currently require using the latest version of the runtime.
691 bool NewRuntime =
692 !TargetTriple.isAndroid() || !TargetTriple.isAndroidVersionLT(30);
694 UseShortGranules = optOr(ClUseShortGranules, NewRuntime);
695 OutlinedChecks = (TargetTriple.isAArch64() || TargetTriple.isRISCV64()) &&
696 TargetTriple.isOSBinFormatELF() &&
697 !optOr(ClInlineAllChecks, Recover);
699 // These platforms may prefer less inlining to reduce binary size.
700 InlineFastPath = optOr(ClInlineFastPathChecks, !(TargetTriple.isAndroid() ||
701 TargetTriple.isOSFuchsia()));
703 if (ClMatchAllTag.getNumOccurrences()) {
704 if (ClMatchAllTag != -1) {
705 MatchAllTag = ClMatchAllTag & 0xFF;
707 } else if (CompileKernel) {
708 MatchAllTag = 0xFF;
710 UseMatchAllCallback = !CompileKernel && MatchAllTag.has_value();
712 // If we don't have personality function support, fall back to landing pads.
713 InstrumentLandingPads = optOr(ClInstrumentLandingPads, !NewRuntime);
715 InstrumentGlobals =
716 !CompileKernel && !UsePageAliases && optOr(ClGlobals, NewRuntime);
718 if (!CompileKernel) {
719 createHwasanCtorComdat();
721 if (InstrumentGlobals)
722 instrumentGlobals();
724 bool InstrumentPersonalityFunctions =
725 optOr(ClInstrumentPersonalityFunctions, NewRuntime);
726 if (InstrumentPersonalityFunctions)
727 instrumentPersonalityFunctions();
730 if (!TargetTriple.isAndroid()) {
731 Constant *C = M.getOrInsertGlobal("__hwasan_tls", IntptrTy, [&] {
732 auto *GV = new GlobalVariable(M, IntptrTy, /*isConstant=*/false,
733 GlobalValue::ExternalLinkage, nullptr,
734 "__hwasan_tls", nullptr,
735 GlobalVariable::InitialExecTLSModel);
736 appendToCompilerUsed(M, GV);
737 return GV;
739 ThreadPtrGlobal = cast<GlobalVariable>(C);
743 void HWAddressSanitizer::initializeCallbacks(Module &M) {
744 IRBuilder<> IRB(*C);
745 const std::string MatchAllStr = UseMatchAllCallback ? "_match_all" : "";
746 FunctionType *HwasanMemoryAccessCallbackSizedFnTy,
747 *HwasanMemoryAccessCallbackFnTy, *HwasanMemTransferFnTy,
748 *HwasanMemsetFnTy;
749 if (UseMatchAllCallback) {
750 HwasanMemoryAccessCallbackSizedFnTy =
751 FunctionType::get(VoidTy, {IntptrTy, IntptrTy, Int8Ty}, false);
752 HwasanMemoryAccessCallbackFnTy =
753 FunctionType::get(VoidTy, {IntptrTy, Int8Ty}, false);
754 HwasanMemTransferFnTy =
755 FunctionType::get(PtrTy, {PtrTy, PtrTy, IntptrTy, Int8Ty}, false);
756 HwasanMemsetFnTy =
757 FunctionType::get(PtrTy, {PtrTy, Int32Ty, IntptrTy, Int8Ty}, false);
758 } else {
759 HwasanMemoryAccessCallbackSizedFnTy =
760 FunctionType::get(VoidTy, {IntptrTy, IntptrTy}, false);
761 HwasanMemoryAccessCallbackFnTy =
762 FunctionType::get(VoidTy, {IntptrTy}, false);
763 HwasanMemTransferFnTy =
764 FunctionType::get(PtrTy, {PtrTy, PtrTy, IntptrTy}, false);
765 HwasanMemsetFnTy =
766 FunctionType::get(PtrTy, {PtrTy, Int32Ty, IntptrTy}, false);
769 for (size_t AccessIsWrite = 0; AccessIsWrite <= 1; AccessIsWrite++) {
770 const std::string TypeStr = AccessIsWrite ? "store" : "load";
771 const std::string EndingStr = Recover ? "_noabort" : "";
773 HwasanMemoryAccessCallbackSized[AccessIsWrite] = M.getOrInsertFunction(
774 ClMemoryAccessCallbackPrefix + TypeStr + "N" + MatchAllStr + EndingStr,
775 HwasanMemoryAccessCallbackSizedFnTy);
777 for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes;
778 AccessSizeIndex++) {
779 HwasanMemoryAccessCallback[AccessIsWrite][AccessSizeIndex] =
780 M.getOrInsertFunction(ClMemoryAccessCallbackPrefix + TypeStr +
781 itostr(1ULL << AccessSizeIndex) +
782 MatchAllStr + EndingStr,
783 HwasanMemoryAccessCallbackFnTy);
787 const std::string MemIntrinCallbackPrefix =
788 (CompileKernel && !ClKasanMemIntrinCallbackPrefix)
789 ? std::string("")
790 : ClMemoryAccessCallbackPrefix;
792 HwasanMemmove = M.getOrInsertFunction(
793 MemIntrinCallbackPrefix + "memmove" + MatchAllStr, HwasanMemTransferFnTy);
794 HwasanMemcpy = M.getOrInsertFunction(
795 MemIntrinCallbackPrefix + "memcpy" + MatchAllStr, HwasanMemTransferFnTy);
796 HwasanMemset = M.getOrInsertFunction(
797 MemIntrinCallbackPrefix + "memset" + MatchAllStr, HwasanMemsetFnTy);
799 HwasanTagMemoryFunc = M.getOrInsertFunction("__hwasan_tag_memory", VoidTy,
800 PtrTy, Int8Ty, IntptrTy);
801 HwasanGenerateTagFunc =
802 M.getOrInsertFunction("__hwasan_generate_tag", Int8Ty);
804 HwasanRecordFrameRecordFunc =
805 M.getOrInsertFunction("__hwasan_add_frame_record", VoidTy, Int64Ty);
807 ShadowGlobal =
808 M.getOrInsertGlobal("__hwasan_shadow", ArrayType::get(Int8Ty, 0));
810 HwasanHandleVfork =
811 M.getOrInsertFunction("__hwasan_handle_vfork", VoidTy, IntptrTy);
814 Value *HWAddressSanitizer::getOpaqueNoopCast(IRBuilder<> &IRB, Value *Val) {
815 // An empty inline asm with input reg == output reg.
816 // An opaque no-op cast, basically.
817 // This prevents code bloat as a result of rematerializing trivial definitions
818 // such as constants or global addresses at every load and store.
819 InlineAsm *Asm =
820 InlineAsm::get(FunctionType::get(PtrTy, {Val->getType()}, false),
821 StringRef(""), StringRef("=r,0"),
822 /*hasSideEffects=*/false);
823 return IRB.CreateCall(Asm, {Val}, ".hwasan.shadow");
826 Value *HWAddressSanitizer::getDynamicShadowIfunc(IRBuilder<> &IRB) {
827 return getOpaqueNoopCast(IRB, ShadowGlobal);
830 Value *HWAddressSanitizer::getShadowNonTls(IRBuilder<> &IRB) {
831 if (Mapping.isFixed()) {
832 return getOpaqueNoopCast(
833 IRB, ConstantExpr::getIntToPtr(
834 ConstantInt::get(IntptrTy, Mapping.offset()), PtrTy));
837 if (Mapping.isInIfunc())
838 return getDynamicShadowIfunc(IRB);
840 Value *GlobalDynamicAddress =
841 IRB.GetInsertBlock()->getParent()->getParent()->getOrInsertGlobal(
842 kHwasanShadowMemoryDynamicAddress, PtrTy);
843 return IRB.CreateLoad(PtrTy, GlobalDynamicAddress);
846 bool HWAddressSanitizer::ignoreAccessWithoutRemark(Instruction *Inst,
847 Value *Ptr) {
848 // Do not instrument accesses from different address spaces; we cannot deal
849 // with them.
850 Type *PtrTy = cast<PointerType>(Ptr->getType()->getScalarType());
851 if (PtrTy->getPointerAddressSpace() != 0)
852 return true;
854 // Ignore swifterror addresses.
855 // swifterror memory addresses are mem2reg promoted by instruction
856 // selection. As such they cannot have regular uses like an instrumentation
857 // function and it makes no sense to track them as memory.
858 if (Ptr->isSwiftError())
859 return true;
861 if (findAllocaForValue(Ptr)) {
862 if (!InstrumentStack)
863 return true;
864 if (SSI && SSI->stackAccessIsSafe(*Inst))
865 return true;
868 if (isa<GlobalVariable>(getUnderlyingObject(Ptr))) {
869 if (!InstrumentGlobals)
870 return true;
871 // TODO: Optimize inbound global accesses, like Asan `instrumentMop`.
874 return false;
877 bool HWAddressSanitizer::ignoreAccess(OptimizationRemarkEmitter &ORE,
878 Instruction *Inst, Value *Ptr) {
879 bool Ignored = ignoreAccessWithoutRemark(Inst, Ptr);
880 if (Ignored) {
881 ORE.emit(
882 [&]() { return OptimizationRemark(DEBUG_TYPE, "ignoreAccess", Inst); });
883 } else {
884 ORE.emit([&]() {
885 return OptimizationRemarkMissed(DEBUG_TYPE, "ignoreAccess", Inst);
888 return Ignored;
891 void HWAddressSanitizer::getInterestingMemoryOperands(
892 OptimizationRemarkEmitter &ORE, Instruction *I,
893 const TargetLibraryInfo &TLI,
894 SmallVectorImpl<InterestingMemoryOperand> &Interesting) {
895 // Skip memory accesses inserted by another instrumentation.
896 if (I->hasMetadata(LLVMContext::MD_nosanitize))
897 return;
899 // Do not instrument the load fetching the dynamic shadow address.
900 if (ShadowBase == I)
901 return;
903 if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
904 if (!ClInstrumentReads || ignoreAccess(ORE, I, LI->getPointerOperand()))
905 return;
906 Interesting.emplace_back(I, LI->getPointerOperandIndex(), false,
907 LI->getType(), LI->getAlign());
908 } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
909 if (!ClInstrumentWrites || ignoreAccess(ORE, I, SI->getPointerOperand()))
910 return;
911 Interesting.emplace_back(I, SI->getPointerOperandIndex(), true,
912 SI->getValueOperand()->getType(), SI->getAlign());
913 } else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) {
914 if (!ClInstrumentAtomics || ignoreAccess(ORE, I, RMW->getPointerOperand()))
915 return;
916 Interesting.emplace_back(I, RMW->getPointerOperandIndex(), true,
917 RMW->getValOperand()->getType(), std::nullopt);
918 } else if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(I)) {
919 if (!ClInstrumentAtomics || ignoreAccess(ORE, I, XCHG->getPointerOperand()))
920 return;
921 Interesting.emplace_back(I, XCHG->getPointerOperandIndex(), true,
922 XCHG->getCompareOperand()->getType(),
923 std::nullopt);
924 } else if (auto *CI = dyn_cast<CallInst>(I)) {
925 for (unsigned ArgNo = 0; ArgNo < CI->arg_size(); ArgNo++) {
926 if (!ClInstrumentByval || !CI->isByValArgument(ArgNo) ||
927 ignoreAccess(ORE, I, CI->getArgOperand(ArgNo)))
928 continue;
929 Type *Ty = CI->getParamByValType(ArgNo);
930 Interesting.emplace_back(I, ArgNo, false, Ty, Align(1));
932 maybeMarkSanitizerLibraryCallNoBuiltin(CI, &TLI);
936 static unsigned getPointerOperandIndex(Instruction *I) {
937 if (LoadInst *LI = dyn_cast<LoadInst>(I))
938 return LI->getPointerOperandIndex();
939 if (StoreInst *SI = dyn_cast<StoreInst>(I))
940 return SI->getPointerOperandIndex();
941 if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I))
942 return RMW->getPointerOperandIndex();
943 if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(I))
944 return XCHG->getPointerOperandIndex();
945 report_fatal_error("Unexpected instruction");
946 return -1;
949 static size_t TypeSizeToSizeIndex(uint32_t TypeSize) {
950 size_t Res = llvm::countr_zero(TypeSize / 8);
951 assert(Res < kNumberOfAccessSizes);
952 return Res;
955 void HWAddressSanitizer::untagPointerOperand(Instruction *I, Value *Addr) {
956 if (TargetTriple.isAArch64() || TargetTriple.getArch() == Triple::x86_64 ||
957 TargetTriple.isRISCV64())
958 return;
960 IRBuilder<> IRB(I);
961 Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);
962 Value *UntaggedPtr =
963 IRB.CreateIntToPtr(untagPointer(IRB, AddrLong), Addr->getType());
964 I->setOperand(getPointerOperandIndex(I), UntaggedPtr);
967 Value *HWAddressSanitizer::memToShadow(Value *Mem, IRBuilder<> &IRB) {
968 // Mem >> Scale
969 Value *Shadow = IRB.CreateLShr(Mem, Mapping.scale());
970 if (Mapping.isFixed() && Mapping.offset() == 0)
971 return IRB.CreateIntToPtr(Shadow, PtrTy);
972 // (Mem >> Scale) + Offset
973 return IRB.CreatePtrAdd(ShadowBase, Shadow);
976 int64_t HWAddressSanitizer::getAccessInfo(bool IsWrite,
977 unsigned AccessSizeIndex) {
978 return (CompileKernel << HWASanAccessInfo::CompileKernelShift) |
979 (MatchAllTag.has_value() << HWASanAccessInfo::HasMatchAllShift) |
980 (MatchAllTag.value_or(0) << HWASanAccessInfo::MatchAllShift) |
981 (Recover << HWASanAccessInfo::RecoverShift) |
982 (IsWrite << HWASanAccessInfo::IsWriteShift) |
983 (AccessSizeIndex << HWASanAccessInfo::AccessSizeShift);
986 HWAddressSanitizer::ShadowTagCheckInfo
987 HWAddressSanitizer::insertShadowTagCheck(Value *Ptr, Instruction *InsertBefore,
988 DomTreeUpdater &DTU, LoopInfo *LI) {
989 ShadowTagCheckInfo R;
991 IRBuilder<> IRB(InsertBefore);
993 R.PtrLong = IRB.CreatePointerCast(Ptr, IntptrTy);
994 R.PtrTag =
995 IRB.CreateTrunc(IRB.CreateLShr(R.PtrLong, PointerTagShift), Int8Ty);
996 R.AddrLong = untagPointer(IRB, R.PtrLong);
997 Value *Shadow = memToShadow(R.AddrLong, IRB);
998 R.MemTag = IRB.CreateLoad(Int8Ty, Shadow);
999 Value *TagMismatch = IRB.CreateICmpNE(R.PtrTag, R.MemTag);
1001 if (MatchAllTag.has_value()) {
1002 Value *TagNotIgnored = IRB.CreateICmpNE(
1003 R.PtrTag, ConstantInt::get(R.PtrTag->getType(), *MatchAllTag));
1004 TagMismatch = IRB.CreateAnd(TagMismatch, TagNotIgnored);
1007 R.TagMismatchTerm = SplitBlockAndInsertIfThen(
1008 TagMismatch, InsertBefore, false,
1009 MDBuilder(*C).createUnlikelyBranchWeights(), &DTU, LI);
1011 return R;
1014 void HWAddressSanitizer::instrumentMemAccessOutline(Value *Ptr, bool IsWrite,
1015 unsigned AccessSizeIndex,
1016 Instruction *InsertBefore,
1017 DomTreeUpdater &DTU,
1018 LoopInfo *LI) {
1019 assert(!UsePageAliases);
1020 const int64_t AccessInfo = getAccessInfo(IsWrite, AccessSizeIndex);
1022 if (InlineFastPath)
1023 InsertBefore =
1024 insertShadowTagCheck(Ptr, InsertBefore, DTU, LI).TagMismatchTerm;
1026 IRBuilder<> IRB(InsertBefore);
1027 bool UseFixedShadowIntrinsic = false;
1028 // The memaccess fixed shadow intrinsic is only supported on AArch64,
1029 // which allows a 16-bit immediate to be left-shifted by 32.
1030 // Since kShadowBaseAlignment == 32, and Linux by default will not
1031 // mmap above 48-bits, practically any valid shadow offset is
1032 // representable.
1033 // In particular, an offset of 4TB (1024 << 32) is representable, and
1034 // ought to be good enough for anybody.
1035 if (TargetTriple.isAArch64() && Mapping.isFixed()) {
1036 uint16_t OffsetShifted = Mapping.offset() >> 32;
1037 UseFixedShadowIntrinsic =
1038 static_cast<uint64_t>(OffsetShifted) << 32 == Mapping.offset();
1041 if (UseFixedShadowIntrinsic) {
1042 IRB.CreateIntrinsic(
1043 UseShortGranules
1044 ? Intrinsic::hwasan_check_memaccess_shortgranules_fixedshadow
1045 : Intrinsic::hwasan_check_memaccess_fixedshadow,
1047 {Ptr, ConstantInt::get(Int32Ty, AccessInfo),
1048 ConstantInt::get(Int64Ty, Mapping.offset())});
1049 } else {
1050 IRB.CreateIntrinsic(
1051 UseShortGranules ? Intrinsic::hwasan_check_memaccess_shortgranules
1052 : Intrinsic::hwasan_check_memaccess,
1053 {}, {ShadowBase, Ptr, ConstantInt::get(Int32Ty, AccessInfo)});
1057 void HWAddressSanitizer::instrumentMemAccessInline(Value *Ptr, bool IsWrite,
1058 unsigned AccessSizeIndex,
1059 Instruction *InsertBefore,
1060 DomTreeUpdater &DTU,
1061 LoopInfo *LI) {
1062 assert(!UsePageAliases);
1063 const int64_t AccessInfo = getAccessInfo(IsWrite, AccessSizeIndex);
1065 ShadowTagCheckInfo TCI = insertShadowTagCheck(Ptr, InsertBefore, DTU, LI);
1067 IRBuilder<> IRB(TCI.TagMismatchTerm);
1068 Value *OutOfShortGranuleTagRange =
1069 IRB.CreateICmpUGT(TCI.MemTag, ConstantInt::get(Int8Ty, 15));
1070 Instruction *CheckFailTerm = SplitBlockAndInsertIfThen(
1071 OutOfShortGranuleTagRange, TCI.TagMismatchTerm, !Recover,
1072 MDBuilder(*C).createUnlikelyBranchWeights(), &DTU, LI);
1074 IRB.SetInsertPoint(TCI.TagMismatchTerm);
1075 Value *PtrLowBits = IRB.CreateTrunc(IRB.CreateAnd(TCI.PtrLong, 15), Int8Ty);
1076 PtrLowBits = IRB.CreateAdd(
1077 PtrLowBits, ConstantInt::get(Int8Ty, (1 << AccessSizeIndex) - 1));
1078 Value *PtrLowBitsOOB = IRB.CreateICmpUGE(PtrLowBits, TCI.MemTag);
1079 SplitBlockAndInsertIfThen(PtrLowBitsOOB, TCI.TagMismatchTerm, false,
1080 MDBuilder(*C).createUnlikelyBranchWeights(), &DTU,
1081 LI, CheckFailTerm->getParent());
1083 IRB.SetInsertPoint(TCI.TagMismatchTerm);
1084 Value *InlineTagAddr = IRB.CreateOr(TCI.AddrLong, 15);
1085 InlineTagAddr = IRB.CreateIntToPtr(InlineTagAddr, PtrTy);
1086 Value *InlineTag = IRB.CreateLoad(Int8Ty, InlineTagAddr);
1087 Value *InlineTagMismatch = IRB.CreateICmpNE(TCI.PtrTag, InlineTag);
1088 SplitBlockAndInsertIfThen(InlineTagMismatch, TCI.TagMismatchTerm, false,
1089 MDBuilder(*C).createUnlikelyBranchWeights(), &DTU,
1090 LI, CheckFailTerm->getParent());
1092 IRB.SetInsertPoint(CheckFailTerm);
1093 InlineAsm *Asm;
1094 switch (TargetTriple.getArch()) {
1095 case Triple::x86_64:
1096 // The signal handler will find the data address in rdi.
1097 Asm = InlineAsm::get(
1098 FunctionType::get(VoidTy, {TCI.PtrLong->getType()}, false),
1099 "int3\nnopl " +
1100 itostr(0x40 + (AccessInfo & HWASanAccessInfo::RuntimeMask)) +
1101 "(%rax)",
1102 "{rdi}",
1103 /*hasSideEffects=*/true);
1104 break;
1105 case Triple::aarch64:
1106 case Triple::aarch64_be:
1107 // The signal handler will find the data address in x0.
1108 Asm = InlineAsm::get(
1109 FunctionType::get(VoidTy, {TCI.PtrLong->getType()}, false),
1110 "brk #" + itostr(0x900 + (AccessInfo & HWASanAccessInfo::RuntimeMask)),
1111 "{x0}",
1112 /*hasSideEffects=*/true);
1113 break;
1114 case Triple::riscv64:
1115 // The signal handler will find the data address in x10.
1116 Asm = InlineAsm::get(
1117 FunctionType::get(VoidTy, {TCI.PtrLong->getType()}, false),
1118 "ebreak\naddiw x0, x11, " +
1119 itostr(0x40 + (AccessInfo & HWASanAccessInfo::RuntimeMask)),
1120 "{x10}",
1121 /*hasSideEffects=*/true);
1122 break;
1123 default:
1124 report_fatal_error("unsupported architecture");
1126 IRB.CreateCall(Asm, TCI.PtrLong);
1127 if (Recover)
1128 cast<BranchInst>(CheckFailTerm)
1129 ->setSuccessor(0, TCI.TagMismatchTerm->getParent());
1132 bool HWAddressSanitizer::ignoreMemIntrinsic(OptimizationRemarkEmitter &ORE,
1133 MemIntrinsic *MI) {
1134 if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) {
1135 return (!ClInstrumentWrites || ignoreAccess(ORE, MTI, MTI->getDest())) &&
1136 (!ClInstrumentReads || ignoreAccess(ORE, MTI, MTI->getSource()));
1138 if (isa<MemSetInst>(MI))
1139 return !ClInstrumentWrites || ignoreAccess(ORE, MI, MI->getDest());
1140 return false;
1143 void HWAddressSanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) {
1144 IRBuilder<> IRB(MI);
1145 if (isa<MemTransferInst>(MI)) {
1146 SmallVector<Value *, 4> Args{
1147 MI->getOperand(0), MI->getOperand(1),
1148 IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)};
1150 if (UseMatchAllCallback)
1151 Args.emplace_back(ConstantInt::get(Int8Ty, *MatchAllTag));
1152 IRB.CreateCall(isa<MemMoveInst>(MI) ? HwasanMemmove : HwasanMemcpy, Args);
1153 } else if (isa<MemSetInst>(MI)) {
1154 SmallVector<Value *, 4> Args{
1155 MI->getOperand(0),
1156 IRB.CreateIntCast(MI->getOperand(1), IRB.getInt32Ty(), false),
1157 IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)};
1158 if (UseMatchAllCallback)
1159 Args.emplace_back(ConstantInt::get(Int8Ty, *MatchAllTag));
1160 IRB.CreateCall(HwasanMemset, Args);
1162 MI->eraseFromParent();
1165 bool HWAddressSanitizer::instrumentMemAccess(InterestingMemoryOperand &O,
1166 DomTreeUpdater &DTU, LoopInfo *LI,
1167 const DataLayout &DL) {
1168 Value *Addr = O.getPtr();
1170 LLVM_DEBUG(dbgs() << "Instrumenting: " << O.getInsn() << "\n");
1172 // If the pointer is statically known to be zero, the tag check will pass
1173 // since:
1174 // 1) it has a zero tag
1175 // 2) the shadow memory corresponding to address 0 is initialized to zero and
1176 // never updated.
1177 // We can therefore elide the tag check.
1178 llvm::KnownBits Known(DL.getPointerTypeSizeInBits(Addr->getType()));
1179 llvm::computeKnownBits(Addr, Known, DL);
1180 if (Known.isZero())
1181 return false;
1183 if (O.MaybeMask)
1184 return false; // FIXME
1186 IRBuilder<> IRB(O.getInsn());
1187 if (!O.TypeStoreSize.isScalable() && isPowerOf2_64(O.TypeStoreSize) &&
1188 (O.TypeStoreSize / 8 <= (1ULL << (kNumberOfAccessSizes - 1))) &&
1189 (!O.Alignment || *O.Alignment >= Mapping.getObjectAlignment() ||
1190 *O.Alignment >= O.TypeStoreSize / 8)) {
1191 size_t AccessSizeIndex = TypeSizeToSizeIndex(O.TypeStoreSize);
1192 if (InstrumentWithCalls) {
1193 SmallVector<Value *, 2> Args{IRB.CreatePointerCast(Addr, IntptrTy)};
1194 if (UseMatchAllCallback)
1195 Args.emplace_back(ConstantInt::get(Int8Ty, *MatchAllTag));
1196 IRB.CreateCall(HwasanMemoryAccessCallback[O.IsWrite][AccessSizeIndex],
1197 Args);
1198 } else if (OutlinedChecks) {
1199 instrumentMemAccessOutline(Addr, O.IsWrite, AccessSizeIndex, O.getInsn(),
1200 DTU, LI);
1201 } else {
1202 instrumentMemAccessInline(Addr, O.IsWrite, AccessSizeIndex, O.getInsn(),
1203 DTU, LI);
1205 } else {
1206 SmallVector<Value *, 3> Args{
1207 IRB.CreatePointerCast(Addr, IntptrTy),
1208 IRB.CreateUDiv(IRB.CreateTypeSize(IntptrTy, O.TypeStoreSize),
1209 ConstantInt::get(IntptrTy, 8))};
1210 if (UseMatchAllCallback)
1211 Args.emplace_back(ConstantInt::get(Int8Ty, *MatchAllTag));
1212 IRB.CreateCall(HwasanMemoryAccessCallbackSized[O.IsWrite], Args);
1214 untagPointerOperand(O.getInsn(), Addr);
1216 return true;
1219 void HWAddressSanitizer::tagAlloca(IRBuilder<> &IRB, AllocaInst *AI, Value *Tag,
1220 size_t Size) {
1221 size_t AlignedSize = alignTo(Size, Mapping.getObjectAlignment());
1222 if (!UseShortGranules)
1223 Size = AlignedSize;
1225 Tag = IRB.CreateTrunc(Tag, Int8Ty);
1226 if (InstrumentWithCalls) {
1227 IRB.CreateCall(HwasanTagMemoryFunc,
1228 {IRB.CreatePointerCast(AI, PtrTy), Tag,
1229 ConstantInt::get(IntptrTy, AlignedSize)});
1230 } else {
1231 size_t ShadowSize = Size >> Mapping.scale();
1232 Value *AddrLong = untagPointer(IRB, IRB.CreatePointerCast(AI, IntptrTy));
1233 Value *ShadowPtr = memToShadow(AddrLong, IRB);
1234 // If this memset is not inlined, it will be intercepted in the hwasan
1235 // runtime library. That's OK, because the interceptor skips the checks if
1236 // the address is in the shadow region.
1237 // FIXME: the interceptor is not as fast as real memset. Consider lowering
1238 // llvm.memset right here into either a sequence of stores, or a call to
1239 // hwasan_tag_memory.
1240 if (ShadowSize)
1241 IRB.CreateMemSet(ShadowPtr, Tag, ShadowSize, Align(1));
1242 if (Size != AlignedSize) {
1243 const uint8_t SizeRemainder = Size % Mapping.getObjectAlignment().value();
1244 IRB.CreateStore(ConstantInt::get(Int8Ty, SizeRemainder),
1245 IRB.CreateConstGEP1_32(Int8Ty, ShadowPtr, ShadowSize));
1246 IRB.CreateStore(
1247 Tag, IRB.CreateConstGEP1_32(Int8Ty, IRB.CreatePointerCast(AI, PtrTy),
1248 AlignedSize - 1));
1253 unsigned HWAddressSanitizer::retagMask(unsigned AllocaNo) {
1254 if (TargetTriple.getArch() == Triple::x86_64)
1255 return AllocaNo & TagMaskByte;
1257 // A list of 8-bit numbers that have at most one run of non-zero bits.
1258 // x = x ^ (mask << 56) can be encoded as a single armv8 instruction for these
1259 // masks.
1260 // The list does not include the value 255, which is used for UAR.
1262 // Because we are more likely to use earlier elements of this list than later
1263 // ones, it is sorted in increasing order of probability of collision with a
1264 // mask allocated (temporally) nearby. The program that generated this list
1265 // can be found at:
1266 // https://github.com/google/sanitizers/blob/master/hwaddress-sanitizer/sort_masks.py
1267 static const unsigned FastMasks[] = {
1268 0, 128, 64, 192, 32, 96, 224, 112, 240, 48, 16, 120,
1269 248, 56, 24, 8, 124, 252, 60, 28, 12, 4, 126, 254,
1270 62, 30, 14, 6, 2, 127, 63, 31, 15, 7, 3, 1};
1271 return FastMasks[AllocaNo % std::size(FastMasks)];
1274 Value *HWAddressSanitizer::applyTagMask(IRBuilder<> &IRB, Value *OldTag) {
1275 if (TagMaskByte == 0xFF)
1276 return OldTag; // No need to clear the tag byte.
1277 return IRB.CreateAnd(OldTag,
1278 ConstantInt::get(OldTag->getType(), TagMaskByte));
1281 Value *HWAddressSanitizer::getNextTagWithCall(IRBuilder<> &IRB) {
1282 return IRB.CreateZExt(IRB.CreateCall(HwasanGenerateTagFunc), IntptrTy);
1285 Value *HWAddressSanitizer::getStackBaseTag(IRBuilder<> &IRB) {
1286 if (ClGenerateTagsWithCalls)
1287 return nullptr;
1288 if (StackBaseTag)
1289 return StackBaseTag;
1290 // Extract some entropy from the stack pointer for the tags.
1291 // Take bits 20..28 (ASLR entropy) and xor with bits 0..8 (these differ
1292 // between functions).
1293 Value *FramePointerLong = getCachedFP(IRB);
1294 Value *StackTag =
1295 applyTagMask(IRB, IRB.CreateXor(FramePointerLong,
1296 IRB.CreateLShr(FramePointerLong, 20)));
1297 StackTag->setName("hwasan.stack.base.tag");
1298 return StackTag;
1301 Value *HWAddressSanitizer::getAllocaTag(IRBuilder<> &IRB, Value *StackTag,
1302 unsigned AllocaNo) {
1303 if (ClGenerateTagsWithCalls)
1304 return getNextTagWithCall(IRB);
1305 return IRB.CreateXor(
1306 StackTag, ConstantInt::get(StackTag->getType(), retagMask(AllocaNo)));
1309 Value *HWAddressSanitizer::getUARTag(IRBuilder<> &IRB) {
1310 Value *FramePointerLong = getCachedFP(IRB);
1311 Value *UARTag =
1312 applyTagMask(IRB, IRB.CreateLShr(FramePointerLong, PointerTagShift));
1314 UARTag->setName("hwasan.uar.tag");
1315 return UARTag;
1318 // Add a tag to an address.
1319 Value *HWAddressSanitizer::tagPointer(IRBuilder<> &IRB, Type *Ty,
1320 Value *PtrLong, Value *Tag) {
1321 assert(!UsePageAliases);
1322 Value *TaggedPtrLong;
1323 if (CompileKernel) {
1324 // Kernel addresses have 0xFF in the most significant byte.
1325 Value *ShiftedTag =
1326 IRB.CreateOr(IRB.CreateShl(Tag, PointerTagShift),
1327 ConstantInt::get(IntptrTy, (1ULL << PointerTagShift) - 1));
1328 TaggedPtrLong = IRB.CreateAnd(PtrLong, ShiftedTag);
1329 } else {
1330 // Userspace can simply do OR (tag << PointerTagShift);
1331 Value *ShiftedTag = IRB.CreateShl(Tag, PointerTagShift);
1332 TaggedPtrLong = IRB.CreateOr(PtrLong, ShiftedTag);
1334 return IRB.CreateIntToPtr(TaggedPtrLong, Ty);
1337 // Remove tag from an address.
1338 Value *HWAddressSanitizer::untagPointer(IRBuilder<> &IRB, Value *PtrLong) {
1339 assert(!UsePageAliases);
1340 Value *UntaggedPtrLong;
1341 if (CompileKernel) {
1342 // Kernel addresses have 0xFF in the most significant byte.
1343 UntaggedPtrLong =
1344 IRB.CreateOr(PtrLong, ConstantInt::get(PtrLong->getType(),
1345 TagMaskByte << PointerTagShift));
1346 } else {
1347 // Userspace addresses have 0x00.
1348 UntaggedPtrLong = IRB.CreateAnd(
1349 PtrLong, ConstantInt::get(PtrLong->getType(),
1350 ~(TagMaskByte << PointerTagShift)));
1352 return UntaggedPtrLong;
1355 Value *HWAddressSanitizer::getHwasanThreadSlotPtr(IRBuilder<> &IRB) {
1356 // Android provides a fixed TLS slot for sanitizers. See TLS_SLOT_SANITIZER
1357 // in Bionic's libc/platform/bionic/tls_defines.h.
1358 constexpr int SanitizerSlot = 6;
1359 if (TargetTriple.isAArch64() && TargetTriple.isAndroid())
1360 return memtag::getAndroidSlotPtr(IRB, SanitizerSlot);
1361 return ThreadPtrGlobal;
1364 Value *HWAddressSanitizer::getCachedFP(IRBuilder<> &IRB) {
1365 if (!CachedFP)
1366 CachedFP = memtag::getFP(IRB);
1367 return CachedFP;
1370 Value *HWAddressSanitizer::getFrameRecordInfo(IRBuilder<> &IRB) {
1371 // Prepare ring buffer data.
1372 Value *PC = memtag::getPC(TargetTriple, IRB);
1373 Value *FP = getCachedFP(IRB);
1375 // Mix FP and PC.
1376 // Assumptions:
1377 // PC is 0x0000PPPPPPPPPPPP (48 bits are meaningful, others are zero)
1378 // FP is 0xfffffffffffFFFF0 (4 lower bits are zero)
1379 // We only really need ~20 lower non-zero bits (FFFF), so we mix like this:
1380 // 0xFFFFPPPPPPPPPPPP
1382 // FP works because in AArch64FrameLowering::getFrameIndexReference, we
1383 // prefer FP-relative offsets for functions compiled with HWASan.
1384 FP = IRB.CreateShl(FP, 44);
1385 return IRB.CreateOr(PC, FP);
1388 void HWAddressSanitizer::emitPrologue(IRBuilder<> &IRB, bool WithFrameRecord) {
1389 if (!Mapping.isInTls())
1390 ShadowBase = getShadowNonTls(IRB);
1391 else if (!WithFrameRecord && TargetTriple.isAndroid())
1392 ShadowBase = getDynamicShadowIfunc(IRB);
1394 if (!WithFrameRecord && ShadowBase)
1395 return;
1397 Value *SlotPtr = nullptr;
1398 Value *ThreadLong = nullptr;
1399 Value *ThreadLongMaybeUntagged = nullptr;
1401 auto getThreadLongMaybeUntagged = [&]() {
1402 if (!SlotPtr)
1403 SlotPtr = getHwasanThreadSlotPtr(IRB);
1404 if (!ThreadLong)
1405 ThreadLong = IRB.CreateLoad(IntptrTy, SlotPtr);
1406 // Extract the address field from ThreadLong. Unnecessary on AArch64 with
1407 // TBI.
1408 return TargetTriple.isAArch64() ? ThreadLong
1409 : untagPointer(IRB, ThreadLong);
1412 if (WithFrameRecord) {
1413 switch (ClRecordStackHistory) {
1414 case libcall: {
1415 // Emit a runtime call into hwasan rather than emitting instructions for
1416 // recording stack history.
1417 Value *FrameRecordInfo = getFrameRecordInfo(IRB);
1418 IRB.CreateCall(HwasanRecordFrameRecordFunc, {FrameRecordInfo});
1419 break;
1421 case instr: {
1422 ThreadLongMaybeUntagged = getThreadLongMaybeUntagged();
1424 StackBaseTag = IRB.CreateAShr(ThreadLong, 3);
1426 // Store data to ring buffer.
1427 Value *FrameRecordInfo = getFrameRecordInfo(IRB);
1428 Value *RecordPtr =
1429 IRB.CreateIntToPtr(ThreadLongMaybeUntagged, IRB.getPtrTy(0));
1430 IRB.CreateStore(FrameRecordInfo, RecordPtr);
1432 IRB.CreateStore(memtag::incrementThreadLong(IRB, ThreadLong, 8), SlotPtr);
1433 break;
1435 case none: {
1436 llvm_unreachable(
1437 "A stack history recording mode should've been selected.");
1442 if (!ShadowBase) {
1443 if (!ThreadLongMaybeUntagged)
1444 ThreadLongMaybeUntagged = getThreadLongMaybeUntagged();
1446 // Get shadow base address by aligning RecordPtr up.
1447 // Note: this is not correct if the pointer is already aligned.
1448 // Runtime library will make sure this never happens.
1449 ShadowBase = IRB.CreateAdd(
1450 IRB.CreateOr(
1451 ThreadLongMaybeUntagged,
1452 ConstantInt::get(IntptrTy, (1ULL << kShadowBaseAlignment) - 1)),
1453 ConstantInt::get(IntptrTy, 1), "hwasan.shadow");
1454 ShadowBase = IRB.CreateIntToPtr(ShadowBase, PtrTy);
1458 bool HWAddressSanitizer::instrumentLandingPads(
1459 SmallVectorImpl<Instruction *> &LandingPadVec) {
1460 for (auto *LP : LandingPadVec) {
1461 IRBuilder<> IRB(LP->getNextNonDebugInstruction());
1462 IRB.CreateCall(
1463 HwasanHandleVfork,
1464 {memtag::readRegister(
1465 IRB, (TargetTriple.getArch() == Triple::x86_64) ? "rsp" : "sp")});
1467 return true;
1470 bool HWAddressSanitizer::instrumentStack(memtag::StackInfo &SInfo,
1471 Value *StackTag, Value *UARTag,
1472 const DominatorTree &DT,
1473 const PostDominatorTree &PDT,
1474 const LoopInfo &LI) {
1475 // Ideally, we want to calculate tagged stack base pointer, and rewrite all
1476 // alloca addresses using that. Unfortunately, offsets are not known yet
1477 // (unless we use ASan-style mega-alloca). Instead we keep the base tag in a
1478 // temp, shift-OR it into each alloca address and xor with the retag mask.
1479 // This generates one extra instruction per alloca use.
1480 unsigned int I = 0;
1482 for (auto &KV : SInfo.AllocasToInstrument) {
1483 auto N = I++;
1484 auto *AI = KV.first;
1485 memtag::AllocaInfo &Info = KV.second;
1486 IRBuilder<> IRB(AI->getNextNonDebugInstruction());
1488 // Replace uses of the alloca with tagged address.
1489 Value *Tag = getAllocaTag(IRB, StackTag, N);
1490 Value *AILong = IRB.CreatePointerCast(AI, IntptrTy);
1491 Value *AINoTagLong = untagPointer(IRB, AILong);
1492 Value *Replacement = tagPointer(IRB, AI->getType(), AINoTagLong, Tag);
1493 std::string Name =
1494 AI->hasName() ? AI->getName().str() : "alloca." + itostr(N);
1495 Replacement->setName(Name + ".hwasan");
1497 size_t Size = memtag::getAllocaSizeInBytes(*AI);
1498 size_t AlignedSize = alignTo(Size, Mapping.getObjectAlignment());
1500 Value *AICast = IRB.CreatePointerCast(AI, PtrTy);
1502 auto HandleLifetime = [&](IntrinsicInst *II) {
1503 // Set the lifetime intrinsic to cover the whole alloca. This reduces the
1504 // set of assumptions we need to make about the lifetime. Without this we
1505 // would need to ensure that we can track the lifetime pointer to a
1506 // constant offset from the alloca, and would still need to change the
1507 // size to include the extra alignment we use for the untagging to make
1508 // the size consistent.
1510 // The check for standard lifetime below makes sure that we have exactly
1511 // one set of start / end in any execution (i.e. the ends are not
1512 // reachable from each other), so this will not cause any problems.
1513 II->setArgOperand(0, ConstantInt::get(Int64Ty, AlignedSize));
1514 II->setArgOperand(1, AICast);
1516 llvm::for_each(Info.LifetimeStart, HandleLifetime);
1517 llvm::for_each(Info.LifetimeEnd, HandleLifetime);
1519 AI->replaceUsesWithIf(Replacement, [AICast, AILong](const Use &U) {
1520 auto *User = U.getUser();
1521 return User != AILong && User != AICast && !isa<LifetimeIntrinsic>(User);
1524 memtag::annotateDebugRecords(Info, retagMask(N));
1526 auto TagEnd = [&](Instruction *Node) {
1527 IRB.SetInsertPoint(Node);
1528 // When untagging, use the `AlignedSize` because we need to set the tags
1529 // for the entire alloca to original. If we used `Size` here, we would
1530 // keep the last granule tagged, and store zero in the last byte of the
1531 // last granule, due to how short granules are implemented.
1532 tagAlloca(IRB, AI, UARTag, AlignedSize);
1534 // Calls to functions that may return twice (e.g. setjmp) confuse the
1535 // postdominator analysis, and will leave us to keep memory tagged after
1536 // function return. Work around this by always untagging at every return
1537 // statement if return_twice functions are called.
1538 bool StandardLifetime =
1539 !SInfo.CallsReturnTwice &&
1540 SInfo.UnrecognizedLifetimes.empty() &&
1541 memtag::isStandardLifetime(Info.LifetimeStart, Info.LifetimeEnd, &DT,
1542 &LI, ClMaxLifetimes);
1543 if (DetectUseAfterScope && StandardLifetime) {
1544 IntrinsicInst *Start = Info.LifetimeStart[0];
1545 IRB.SetInsertPoint(Start->getNextNode());
1546 tagAlloca(IRB, AI, Tag, Size);
1547 if (!memtag::forAllReachableExits(DT, PDT, LI, Start, Info.LifetimeEnd,
1548 SInfo.RetVec, TagEnd)) {
1549 for (auto *End : Info.LifetimeEnd)
1550 End->eraseFromParent();
1552 } else {
1553 tagAlloca(IRB, AI, Tag, Size);
1554 for (auto *RI : SInfo.RetVec)
1555 TagEnd(RI);
1556 // We inserted tagging outside of the lifetimes, so we have to remove
1557 // them.
1558 for (auto &II : Info.LifetimeStart)
1559 II->eraseFromParent();
1560 for (auto &II : Info.LifetimeEnd)
1561 II->eraseFromParent();
1563 memtag::alignAndPadAlloca(Info, Mapping.getObjectAlignment());
1565 for (auto &I : SInfo.UnrecognizedLifetimes)
1566 I->eraseFromParent();
1567 return true;
1570 static void emitRemark(const Function &F, OptimizationRemarkEmitter &ORE,
1571 bool Skip) {
1572 if (Skip) {
1573 ORE.emit([&]() {
1574 return OptimizationRemark(DEBUG_TYPE, "Skip", &F)
1575 << "Skipped: F=" << ore::NV("Function", &F);
1577 } else {
1578 ORE.emit([&]() {
1579 return OptimizationRemarkMissed(DEBUG_TYPE, "Sanitize", &F)
1580 << "Sanitized: F=" << ore::NV("Function", &F);
1585 bool HWAddressSanitizer::selectiveInstrumentationShouldSkip(
1586 Function &F, FunctionAnalysisManager &FAM) const {
1587 auto SkipHot = [&]() {
1588 if (!ClHotPercentileCutoff.getNumOccurrences())
1589 return false;
1590 auto &MAMProxy = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
1591 ProfileSummaryInfo *PSI =
1592 MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
1593 if (!PSI || !PSI->hasProfileSummary()) {
1594 ++NumNoProfileSummaryFuncs;
1595 return false;
1597 return PSI->isFunctionHotInCallGraphNthPercentile(
1598 ClHotPercentileCutoff, &F, FAM.getResult<BlockFrequencyAnalysis>(F));
1601 auto SkipRandom = [&]() {
1602 if (!ClRandomSkipRate.getNumOccurrences())
1603 return false;
1604 std::bernoulli_distribution D(ClRandomSkipRate);
1605 return !D(*Rng);
1608 bool Skip = SkipRandom() || SkipHot();
1609 emitRemark(F, FAM.getResult<OptimizationRemarkEmitterAnalysis>(F), Skip);
1610 return Skip;
1613 void HWAddressSanitizer::sanitizeFunction(Function &F,
1614 FunctionAnalysisManager &FAM) {
1615 if (&F == HwasanCtorFunction)
1616 return;
1618 // Do not apply any instrumentation for naked functions.
1619 if (F.hasFnAttribute(Attribute::Naked))
1620 return;
1622 if (!F.hasFnAttribute(Attribute::SanitizeHWAddress))
1623 return;
1625 if (F.empty())
1626 return;
1628 NumTotalFuncs++;
1630 OptimizationRemarkEmitter &ORE =
1631 FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
1633 if (selectiveInstrumentationShouldSkip(F, FAM))
1634 return;
1636 NumInstrumentedFuncs++;
1638 LLVM_DEBUG(dbgs() << "Function: " << F.getName() << "\n");
1640 SmallVector<InterestingMemoryOperand, 16> OperandsToInstrument;
1641 SmallVector<MemIntrinsic *, 16> IntrinToInstrument;
1642 SmallVector<Instruction *, 8> LandingPadVec;
1643 const TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
1645 memtag::StackInfoBuilder SIB(SSI, DEBUG_TYPE);
1646 for (auto &Inst : instructions(F)) {
1647 if (InstrumentStack) {
1648 SIB.visit(ORE, Inst);
1651 if (InstrumentLandingPads && isa<LandingPadInst>(Inst))
1652 LandingPadVec.push_back(&Inst);
1654 getInterestingMemoryOperands(ORE, &Inst, TLI, OperandsToInstrument);
1656 if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(&Inst))
1657 if (!ignoreMemIntrinsic(ORE, MI))
1658 IntrinToInstrument.push_back(MI);
1661 memtag::StackInfo &SInfo = SIB.get();
1663 initializeCallbacks(*F.getParent());
1665 if (!LandingPadVec.empty())
1666 instrumentLandingPads(LandingPadVec);
1668 if (SInfo.AllocasToInstrument.empty() && F.hasPersonalityFn() &&
1669 F.getPersonalityFn()->getName() == kHwasanPersonalityThunkName) {
1670 // __hwasan_personality_thunk is a no-op for functions without an
1671 // instrumented stack, so we can drop it.
1672 F.setPersonalityFn(nullptr);
1675 if (SInfo.AllocasToInstrument.empty() && OperandsToInstrument.empty() &&
1676 IntrinToInstrument.empty())
1677 return;
1679 assert(!ShadowBase);
1681 BasicBlock::iterator InsertPt = F.getEntryBlock().begin();
1682 IRBuilder<> EntryIRB(&F.getEntryBlock(), InsertPt);
1683 emitPrologue(EntryIRB,
1684 /*WithFrameRecord*/ ClRecordStackHistory != none &&
1685 Mapping.withFrameRecord() &&
1686 !SInfo.AllocasToInstrument.empty());
1688 if (!SInfo.AllocasToInstrument.empty()) {
1689 const DominatorTree &DT = FAM.getResult<DominatorTreeAnalysis>(F);
1690 const PostDominatorTree &PDT = FAM.getResult<PostDominatorTreeAnalysis>(F);
1691 const LoopInfo &LI = FAM.getResult<LoopAnalysis>(F);
1692 Value *StackTag = getStackBaseTag(EntryIRB);
1693 Value *UARTag = getUARTag(EntryIRB);
1694 instrumentStack(SInfo, StackTag, UARTag, DT, PDT, LI);
1697 // If we split the entry block, move any allocas that were originally in the
1698 // entry block back into the entry block so that they aren't treated as
1699 // dynamic allocas.
1700 if (EntryIRB.GetInsertBlock() != &F.getEntryBlock()) {
1701 InsertPt = F.getEntryBlock().begin();
1702 for (Instruction &I :
1703 llvm::make_early_inc_range(*EntryIRB.GetInsertBlock())) {
1704 if (auto *AI = dyn_cast<AllocaInst>(&I))
1705 if (isa<ConstantInt>(AI->getArraySize()))
1706 I.moveBefore(F.getEntryBlock(), InsertPt);
1710 DominatorTree *DT = FAM.getCachedResult<DominatorTreeAnalysis>(F);
1711 PostDominatorTree *PDT = FAM.getCachedResult<PostDominatorTreeAnalysis>(F);
1712 LoopInfo *LI = FAM.getCachedResult<LoopAnalysis>(F);
1713 DomTreeUpdater DTU(DT, PDT, DomTreeUpdater::UpdateStrategy::Lazy);
1714 const DataLayout &DL = F.getDataLayout();
1715 for (auto &Operand : OperandsToInstrument)
1716 instrumentMemAccess(Operand, DTU, LI, DL);
1717 DTU.flush();
1719 if (ClInstrumentMemIntrinsics && !IntrinToInstrument.empty()) {
1720 for (auto *Inst : IntrinToInstrument)
1721 instrumentMemIntrinsic(Inst);
1724 ShadowBase = nullptr;
1725 StackBaseTag = nullptr;
1726 CachedFP = nullptr;
1729 void HWAddressSanitizer::instrumentGlobal(GlobalVariable *GV, uint8_t Tag) {
1730 assert(!UsePageAliases);
1731 Constant *Initializer = GV->getInitializer();
1732 uint64_t SizeInBytes =
1733 M.getDataLayout().getTypeAllocSize(Initializer->getType());
1734 uint64_t NewSize = alignTo(SizeInBytes, Mapping.getObjectAlignment());
1735 if (SizeInBytes != NewSize) {
1736 // Pad the initializer out to the next multiple of 16 bytes and add the
1737 // required short granule tag.
1738 std::vector<uint8_t> Init(NewSize - SizeInBytes, 0);
1739 Init.back() = Tag;
1740 Constant *Padding = ConstantDataArray::get(*C, Init);
1741 Initializer = ConstantStruct::getAnon({Initializer, Padding});
1744 auto *NewGV = new GlobalVariable(M, Initializer->getType(), GV->isConstant(),
1745 GlobalValue::ExternalLinkage, Initializer,
1746 GV->getName() + ".hwasan");
1747 NewGV->copyAttributesFrom(GV);
1748 NewGV->setLinkage(GlobalValue::PrivateLinkage);
1749 NewGV->copyMetadata(GV, 0);
1750 NewGV->setAlignment(
1751 std::max(GV->getAlign().valueOrOne(), Mapping.getObjectAlignment()));
1753 // It is invalid to ICF two globals that have different tags. In the case
1754 // where the size of the global is a multiple of the tag granularity the
1755 // contents of the globals may be the same but the tags (i.e. symbol values)
1756 // may be different, and the symbols are not considered during ICF. In the
1757 // case where the size is not a multiple of the granularity, the short granule
1758 // tags would discriminate two globals with different tags, but there would
1759 // otherwise be nothing stopping such a global from being incorrectly ICF'd
1760 // with an uninstrumented (i.e. tag 0) global that happened to have the short
1761 // granule tag in the last byte.
1762 NewGV->setUnnamedAddr(GlobalValue::UnnamedAddr::None);
1764 // Descriptor format (assuming little-endian):
1765 // bytes 0-3: relative address of global
1766 // bytes 4-6: size of global (16MB ought to be enough for anyone, but in case
1767 // it isn't, we create multiple descriptors)
1768 // byte 7: tag
1769 auto *DescriptorTy = StructType::get(Int32Ty, Int32Ty);
1770 const uint64_t MaxDescriptorSize = 0xfffff0;
1771 for (uint64_t DescriptorPos = 0; DescriptorPos < SizeInBytes;
1772 DescriptorPos += MaxDescriptorSize) {
1773 auto *Descriptor =
1774 new GlobalVariable(M, DescriptorTy, true, GlobalValue::PrivateLinkage,
1775 nullptr, GV->getName() + ".hwasan.descriptor");
1776 auto *GVRelPtr = ConstantExpr::getTrunc(
1777 ConstantExpr::getAdd(
1778 ConstantExpr::getSub(
1779 ConstantExpr::getPtrToInt(NewGV, Int64Ty),
1780 ConstantExpr::getPtrToInt(Descriptor, Int64Ty)),
1781 ConstantInt::get(Int64Ty, DescriptorPos)),
1782 Int32Ty);
1783 uint32_t Size = std::min(SizeInBytes - DescriptorPos, MaxDescriptorSize);
1784 auto *SizeAndTag = ConstantInt::get(Int32Ty, Size | (uint32_t(Tag) << 24));
1785 Descriptor->setComdat(NewGV->getComdat());
1786 Descriptor->setInitializer(ConstantStruct::getAnon({GVRelPtr, SizeAndTag}));
1787 Descriptor->setSection("hwasan_globals");
1788 Descriptor->setMetadata(LLVMContext::MD_associated,
1789 MDNode::get(*C, ValueAsMetadata::get(NewGV)));
1790 appendToCompilerUsed(M, Descriptor);
1793 Constant *Aliasee = ConstantExpr::getIntToPtr(
1794 ConstantExpr::getAdd(
1795 ConstantExpr::getPtrToInt(NewGV, Int64Ty),
1796 ConstantInt::get(Int64Ty, uint64_t(Tag) << PointerTagShift)),
1797 GV->getType());
1798 auto *Alias = GlobalAlias::create(GV->getValueType(), GV->getAddressSpace(),
1799 GV->getLinkage(), "", Aliasee, &M);
1800 Alias->setVisibility(GV->getVisibility());
1801 Alias->takeName(GV);
1802 GV->replaceAllUsesWith(Alias);
1803 GV->eraseFromParent();
1806 void HWAddressSanitizer::instrumentGlobals() {
1807 std::vector<GlobalVariable *> Globals;
1808 for (GlobalVariable &GV : M.globals()) {
1809 if (GV.hasSanitizerMetadata() && GV.getSanitizerMetadata().NoHWAddress)
1810 continue;
1812 if (GV.isDeclarationForLinker() || GV.getName().starts_with("llvm.") ||
1813 GV.isThreadLocal())
1814 continue;
1816 // Common symbols can't have aliases point to them, so they can't be tagged.
1817 if (GV.hasCommonLinkage())
1818 continue;
1820 // Globals with custom sections may be used in __start_/__stop_ enumeration,
1821 // which would be broken both by adding tags and potentially by the extra
1822 // padding/alignment that we insert.
1823 if (GV.hasSection())
1824 continue;
1826 Globals.push_back(&GV);
1829 MD5 Hasher;
1830 Hasher.update(M.getSourceFileName());
1831 MD5::MD5Result Hash;
1832 Hasher.final(Hash);
1833 uint8_t Tag = Hash[0];
1835 assert(TagMaskByte >= 16);
1837 for (GlobalVariable *GV : Globals) {
1838 // Don't allow globals to be tagged with something that looks like a
1839 // short-granule tag, otherwise we lose inter-granule overflow detection, as
1840 // the fast path shadow-vs-address check succeeds.
1841 if (Tag < 16 || Tag > TagMaskByte)
1842 Tag = 16;
1843 instrumentGlobal(GV, Tag++);
1847 void HWAddressSanitizer::instrumentPersonalityFunctions() {
1848 // We need to untag stack frames as we unwind past them. That is the job of
1849 // the personality function wrapper, which either wraps an existing
1850 // personality function or acts as a personality function on its own. Each
1851 // function that has a personality function or that can be unwound past has
1852 // its personality function changed to a thunk that calls the personality
1853 // function wrapper in the runtime.
1854 MapVector<Constant *, std::vector<Function *>> PersonalityFns;
1855 for (Function &F : M) {
1856 if (F.isDeclaration() || !F.hasFnAttribute(Attribute::SanitizeHWAddress))
1857 continue;
1859 if (F.hasPersonalityFn()) {
1860 PersonalityFns[F.getPersonalityFn()->stripPointerCasts()].push_back(&F);
1861 } else if (!F.hasFnAttribute(Attribute::NoUnwind)) {
1862 PersonalityFns[nullptr].push_back(&F);
1866 if (PersonalityFns.empty())
1867 return;
1869 FunctionCallee HwasanPersonalityWrapper = M.getOrInsertFunction(
1870 "__hwasan_personality_wrapper", Int32Ty, Int32Ty, Int32Ty, Int64Ty, PtrTy,
1871 PtrTy, PtrTy, PtrTy, PtrTy);
1872 FunctionCallee UnwindGetGR = M.getOrInsertFunction("_Unwind_GetGR", VoidTy);
1873 FunctionCallee UnwindGetCFA = M.getOrInsertFunction("_Unwind_GetCFA", VoidTy);
1875 for (auto &P : PersonalityFns) {
1876 std::string ThunkName = kHwasanPersonalityThunkName;
1877 if (P.first)
1878 ThunkName += ("." + P.first->getName()).str();
1879 FunctionType *ThunkFnTy = FunctionType::get(
1880 Int32Ty, {Int32Ty, Int32Ty, Int64Ty, PtrTy, PtrTy}, false);
1881 bool IsLocal = P.first && (!isa<GlobalValue>(P.first) ||
1882 cast<GlobalValue>(P.first)->hasLocalLinkage());
1883 auto *ThunkFn = Function::Create(ThunkFnTy,
1884 IsLocal ? GlobalValue::InternalLinkage
1885 : GlobalValue::LinkOnceODRLinkage,
1886 ThunkName, &M);
1887 if (!IsLocal) {
1888 ThunkFn->setVisibility(GlobalValue::HiddenVisibility);
1889 ThunkFn->setComdat(M.getOrInsertComdat(ThunkName));
1892 auto *BB = BasicBlock::Create(*C, "entry", ThunkFn);
1893 IRBuilder<> IRB(BB);
1894 CallInst *WrapperCall = IRB.CreateCall(
1895 HwasanPersonalityWrapper,
1896 {ThunkFn->getArg(0), ThunkFn->getArg(1), ThunkFn->getArg(2),
1897 ThunkFn->getArg(3), ThunkFn->getArg(4),
1898 P.first ? P.first : Constant::getNullValue(PtrTy),
1899 UnwindGetGR.getCallee(), UnwindGetCFA.getCallee()});
1900 WrapperCall->setTailCall();
1901 IRB.CreateRet(WrapperCall);
1903 for (Function *F : P.second)
1904 F->setPersonalityFn(ThunkFn);
1908 void HWAddressSanitizer::ShadowMapping::init(Triple &TargetTriple,
1909 bool InstrumentWithCalls) {
1910 // Start with defaults.
1911 Scale = kDefaultShadowScale;
1912 Kind = OffsetKind::kTls;
1913 WithFrameRecord = true;
1915 // Tune for the target.
1916 if (TargetTriple.isOSFuchsia()) {
1917 // Fuchsia is always PIE, which means that the beginning of the address
1918 // space is always available.
1919 SetFixed(0);
1920 } else if (ClEnableKhwasan || InstrumentWithCalls) {
1921 SetFixed(0);
1922 WithFrameRecord = false;
1925 WithFrameRecord = optOr(ClFrameRecords, WithFrameRecord);
1927 // Apply the last of ClMappingOffset and ClMappingOffsetDynamic.
1928 Kind = optOr(ClMappingOffsetDynamic, Kind);
1929 if (ClMappingOffset.getNumOccurrences() > 0 &&
1930 !(ClMappingOffsetDynamic.getNumOccurrences() > 0 &&
1931 ClMappingOffsetDynamic.getPosition() > ClMappingOffset.getPosition())) {
1932 SetFixed(ClMappingOffset);