1 //===- AArch64StackTagging.cpp - Stack tagging in IR --===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
8 //===----------------------------------------------------------------------===//
11 #include "AArch64InstrInfo.h"
12 #include "AArch64Subtarget.h"
13 #include "AArch64TargetMachine.h"
14 #include "llvm/ADT/MapVector.h"
15 #include "llvm/ADT/SmallVector.h"
16 #include "llvm/ADT/Statistic.h"
17 #include "llvm/Analysis/AliasAnalysis.h"
18 #include "llvm/Analysis/CFG.h"
19 #include "llvm/Analysis/LoopInfo.h"
20 #include "llvm/Analysis/PostDominators.h"
21 #include "llvm/Analysis/ScalarEvolution.h"
22 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
23 #include "llvm/Analysis/StackSafetyAnalysis.h"
24 #include "llvm/Analysis/ValueTracking.h"
25 #include "llvm/CodeGen/LiveRegUnits.h"
26 #include "llvm/CodeGen/MachineBasicBlock.h"
27 #include "llvm/CodeGen/MachineFunction.h"
28 #include "llvm/CodeGen/MachineFunctionPass.h"
29 #include "llvm/CodeGen/MachineInstr.h"
30 #include "llvm/CodeGen/MachineInstrBuilder.h"
31 #include "llvm/CodeGen/MachineLoopInfo.h"
32 #include "llvm/CodeGen/MachineOperand.h"
33 #include "llvm/CodeGen/MachineRegisterInfo.h"
34 #include "llvm/CodeGen/TargetPassConfig.h"
35 #include "llvm/CodeGen/TargetRegisterInfo.h"
36 #include "llvm/IR/DebugLoc.h"
37 #include "llvm/IR/Dominators.h"
38 #include "llvm/IR/Function.h"
39 #include "llvm/IR/GetElementPtrTypeIterator.h"
40 #include "llvm/IR/IRBuilder.h"
41 #include "llvm/IR/InstIterator.h"
42 #include "llvm/IR/Instruction.h"
43 #include "llvm/IR/Instructions.h"
44 #include "llvm/IR/IntrinsicInst.h"
45 #include "llvm/IR/IntrinsicsAArch64.h"
46 #include "llvm/IR/Metadata.h"
47 #include "llvm/IR/ValueHandle.h"
48 #include "llvm/InitializePasses.h"
49 #include "llvm/Pass.h"
50 #include "llvm/Support/Casting.h"
51 #include "llvm/Support/Debug.h"
52 #include "llvm/Support/raw_ostream.h"
53 #include "llvm/Transforms/Utils/Local.h"
54 #include "llvm/Transforms/Utils/MemoryTaggingSupport.h"
62 #define DEBUG_TYPE "aarch64-stack-tagging"
64 static cl::opt
<bool> ClMergeInit(
65 "stack-tagging-merge-init", cl::Hidden
, cl::init(true),
66 cl::desc("merge stack variable initializers with tagging when possible"));
69 ClUseStackSafety("stack-tagging-use-stack-safety", cl::Hidden
,
71 cl::desc("Use Stack Safety analysis results"));
73 static cl::opt
<unsigned> ClScanLimit("stack-tagging-merge-init-scan-limit",
74 cl::init(40), cl::Hidden
);
76 static cl::opt
<unsigned>
77 ClMergeInitSizeLimit("stack-tagging-merge-init-size-limit", cl::init(272),
80 static cl::opt
<size_t> ClMaxLifetimes(
81 "stack-tagging-max-lifetimes-for-alloca", cl::Hidden
, cl::init(3),
83 cl::desc("How many lifetime ends to handle for a single alloca."),
86 static const Align kTagGranuleSize
= Align(16);
90 class InitializerBuilder
{
95 Function
*SetTagZeroFn
;
98 // List of initializers sorted by start offset.
103 SmallVector
<Range
, 4> Ranges
;
104 // 8-aligned offset => 8-byte initializer
105 // Missing keys are zero initialized.
106 std::map
<uint64_t, Value
*> Out
;
109 InitializerBuilder(uint64_t Size
, const DataLayout
*DL
, Value
*BasePtr
,
110 Function
*SetTagFn
, Function
*SetTagZeroFn
,
112 : Size(Size
), DL(DL
), BasePtr(BasePtr
), SetTagFn(SetTagFn
),
113 SetTagZeroFn(SetTagZeroFn
), StgpFn(StgpFn
) {}
115 bool addRange(uint64_t Start
, uint64_t End
, Instruction
*Inst
) {
117 llvm::lower_bound(Ranges
, Start
, [](const Range
&LHS
, uint64_t RHS
) {
118 return LHS
.End
<= RHS
;
120 if (I
!= Ranges
.end() && End
> I
->Start
) {
124 Ranges
.insert(I
, {Start
, End
, Inst
});
128 bool addStore(uint64_t Offset
, StoreInst
*SI
, const DataLayout
*DL
) {
129 int64_t StoreSize
= DL
->getTypeStoreSize(SI
->getOperand(0)->getType());
130 if (!addRange(Offset
, Offset
+ StoreSize
, SI
))
133 applyStore(IRB
, Offset
, Offset
+ StoreSize
, SI
->getOperand(0));
137 bool addMemSet(uint64_t Offset
, MemSetInst
*MSI
) {
138 uint64_t StoreSize
= cast
<ConstantInt
>(MSI
->getLength())->getZExtValue();
139 if (!addRange(Offset
, Offset
+ StoreSize
, MSI
))
141 IRBuilder
<> IRB(MSI
);
142 applyMemSet(IRB
, Offset
, Offset
+ StoreSize
,
143 cast
<ConstantInt
>(MSI
->getValue()));
147 void applyMemSet(IRBuilder
<> &IRB
, int64_t Start
, int64_t End
,
149 // Out[] does not distinguish between zero and undef, and we already know
150 // that this memset does not overlap with any other initializer. Nothing to
154 for (int64_t Offset
= Start
- Start
% 8; Offset
< End
; Offset
+= 8) {
155 uint64_t Cst
= 0x0101010101010101UL
;
156 int LowBits
= Offset
< Start
? (Start
- Offset
) * 8 : 0;
158 Cst
= (Cst
>> LowBits
) << LowBits
;
159 int HighBits
= End
- Offset
< 8 ? (8 - (End
- Offset
)) * 8 : 0;
161 Cst
= (Cst
<< HighBits
) >> HighBits
;
163 ConstantInt::get(IRB
.getInt64Ty(), Cst
* V
->getZExtValue());
165 Value
*&CurrentV
= Out
[Offset
];
169 CurrentV
= IRB
.CreateOr(CurrentV
, C
);
174 // Take a 64-bit slice of the value starting at the given offset (in bytes).
175 // Offset can be negative. Pad with zeroes on both sides when necessary.
176 Value
*sliceValue(IRBuilder
<> &IRB
, Value
*V
, int64_t Offset
) {
178 V
= IRB
.CreateLShr(V
, Offset
* 8);
179 V
= IRB
.CreateZExtOrTrunc(V
, IRB
.getInt64Ty());
180 } else if (Offset
< 0) {
181 V
= IRB
.CreateZExtOrTrunc(V
, IRB
.getInt64Ty());
182 V
= IRB
.CreateShl(V
, -Offset
* 8);
184 V
= IRB
.CreateZExtOrTrunc(V
, IRB
.getInt64Ty());
189 void applyStore(IRBuilder
<> &IRB
, int64_t Start
, int64_t End
,
190 Value
*StoredValue
) {
191 StoredValue
= flatten(IRB
, StoredValue
);
192 for (int64_t Offset
= Start
- Start
% 8; Offset
< End
; Offset
+= 8) {
193 Value
*V
= sliceValue(IRB
, StoredValue
, Offset
- Start
);
194 Value
*&CurrentV
= Out
[Offset
];
198 CurrentV
= IRB
.CreateOr(CurrentV
, V
);
203 void generate(IRBuilder
<> &IRB
) {
204 LLVM_DEBUG(dbgs() << "Combined initializer\n");
205 // No initializers => the entire allocation is undef.
206 if (Ranges
.empty()) {
207 emitUndef(IRB
, 0, Size
);
211 // Look through 8-byte initializer list 16 bytes at a time;
212 // If one of the two 8-byte halfs is non-zero non-undef, emit STGP.
213 // Otherwise, emit zeroes up to next available item.
214 uint64_t LastOffset
= 0;
215 for (uint64_t Offset
= 0; Offset
< Size
; Offset
+= 16) {
216 auto I1
= Out
.find(Offset
);
217 auto I2
= Out
.find(Offset
+ 8);
218 if (I1
== Out
.end() && I2
== Out
.end())
221 if (Offset
> LastOffset
)
222 emitZeroes(IRB
, LastOffset
, Offset
- LastOffset
);
224 Value
*Store1
= I1
== Out
.end() ? Constant::getNullValue(IRB
.getInt64Ty())
226 Value
*Store2
= I2
== Out
.end() ? Constant::getNullValue(IRB
.getInt64Ty())
228 emitPair(IRB
, Offset
, Store1
, Store2
);
229 LastOffset
= Offset
+ 16;
232 // memset(0) does not update Out[], therefore the tail can be either undef
234 if (LastOffset
< Size
)
235 emitZeroes(IRB
, LastOffset
, Size
- LastOffset
);
237 for (const auto &R
: Ranges
) {
238 R
.Inst
->eraseFromParent();
242 void emitZeroes(IRBuilder
<> &IRB
, uint64_t Offset
, uint64_t Size
) {
243 LLVM_DEBUG(dbgs() << " [" << Offset
<< ", " << Offset
+ Size
245 Value
*Ptr
= BasePtr
;
247 Ptr
= IRB
.CreateConstGEP1_32(IRB
.getInt8Ty(), Ptr
, Offset
);
248 IRB
.CreateCall(SetTagZeroFn
,
249 {Ptr
, ConstantInt::get(IRB
.getInt64Ty(), Size
)});
252 void emitUndef(IRBuilder
<> &IRB
, uint64_t Offset
, uint64_t Size
) {
253 LLVM_DEBUG(dbgs() << " [" << Offset
<< ", " << Offset
+ Size
255 Value
*Ptr
= BasePtr
;
257 Ptr
= IRB
.CreateConstGEP1_32(IRB
.getInt8Ty(), Ptr
, Offset
);
258 IRB
.CreateCall(SetTagFn
, {Ptr
, ConstantInt::get(IRB
.getInt64Ty(), Size
)});
261 void emitPair(IRBuilder
<> &IRB
, uint64_t Offset
, Value
*A
, Value
*B
) {
262 LLVM_DEBUG(dbgs() << " [" << Offset
<< ", " << Offset
+ 16 << "):\n");
263 LLVM_DEBUG(dbgs() << " " << *A
<< "\n " << *B
<< "\n");
264 Value
*Ptr
= BasePtr
;
266 Ptr
= IRB
.CreateConstGEP1_32(IRB
.getInt8Ty(), Ptr
, Offset
);
267 IRB
.CreateCall(StgpFn
, {Ptr
, A
, B
});
270 Value
*flatten(IRBuilder
<> &IRB
, Value
*V
) {
271 if (V
->getType()->isIntegerTy())
273 // vector of pointers -> vector of ints
274 if (VectorType
*VecTy
= dyn_cast
<VectorType
>(V
->getType())) {
275 LLVMContext
&Ctx
= IRB
.getContext();
276 Type
*EltTy
= VecTy
->getElementType();
277 if (EltTy
->isPointerTy()) {
278 uint32_t EltSize
= DL
->getTypeSizeInBits(EltTy
);
279 auto *NewTy
= FixedVectorType::get(
280 IntegerType::get(Ctx
, EltSize
),
281 cast
<FixedVectorType
>(VecTy
)->getNumElements());
282 V
= IRB
.CreatePointerCast(V
, NewTy
);
285 return IRB
.CreateBitOrPointerCast(
286 V
, IRB
.getIntNTy(DL
->getTypeStoreSize(V
->getType()) * 8));
290 class AArch64StackTagging
: public FunctionPass
{
291 const bool MergeInit
;
292 const bool UseStackSafety
;
295 static char ID
; // Pass ID, replacement for typeid
297 AArch64StackTagging(bool IsOptNone
= false)
299 MergeInit(ClMergeInit
.getNumOccurrences() ? ClMergeInit
: !IsOptNone
),
300 UseStackSafety(ClUseStackSafety
.getNumOccurrences() ? ClUseStackSafety
302 initializeAArch64StackTaggingPass(*PassRegistry::getPassRegistry());
305 void tagAlloca(AllocaInst
*AI
, Instruction
*InsertBefore
, Value
*Ptr
,
307 void untagAlloca(AllocaInst
*AI
, Instruction
*InsertBefore
, uint64_t Size
);
309 Instruction
*collectInitializers(Instruction
*StartInst
, Value
*StartPtr
,
310 uint64_t Size
, InitializerBuilder
&IB
);
312 Instruction
*insertBaseTaggedPointer(
313 const MapVector
<AllocaInst
*, memtag::AllocaInfo
> &Allocas
,
314 const DominatorTree
*DT
);
315 bool runOnFunction(Function
&F
) override
;
317 StringRef
getPassName() const override
{ return "AArch64 Stack Tagging"; }
320 Function
*F
= nullptr;
321 Function
*SetTagFunc
= nullptr;
322 const DataLayout
*DL
= nullptr;
323 AAResults
*AA
= nullptr;
324 const StackSafetyGlobalInfo
*SSI
= nullptr;
326 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
327 AU
.setPreservesCFG();
329 AU
.addRequired
<StackSafetyGlobalInfoWrapperPass
>();
331 AU
.addRequired
<AAResultsWrapperPass
>();
335 } // end anonymous namespace
337 char AArch64StackTagging::ID
= 0;
339 INITIALIZE_PASS_BEGIN(AArch64StackTagging
, DEBUG_TYPE
, "AArch64 Stack Tagging",
341 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass
)
342 INITIALIZE_PASS_DEPENDENCY(StackSafetyGlobalInfoWrapperPass
)
343 INITIALIZE_PASS_END(AArch64StackTagging
, DEBUG_TYPE
, "AArch64 Stack Tagging",
346 FunctionPass
*llvm::createAArch64StackTaggingPass(bool IsOptNone
) {
347 return new AArch64StackTagging(IsOptNone
);
350 Instruction
*AArch64StackTagging::collectInitializers(Instruction
*StartInst
,
353 InitializerBuilder
&IB
) {
354 MemoryLocation AllocaLoc
{StartPtr
, Size
};
355 Instruction
*LastInst
= StartInst
;
356 BasicBlock::iterator
BI(StartInst
);
359 for (; Count
< ClScanLimit
&& !BI
->isTerminator(); ++BI
) {
360 if (!isa
<DbgInfoIntrinsic
>(*BI
))
363 if (isNoModRef(AA
->getModRefInfo(&*BI
, AllocaLoc
)))
366 if (!isa
<StoreInst
>(BI
) && !isa
<MemSetInst
>(BI
)) {
367 // If the instruction is readnone, ignore it, otherwise bail out. We
368 // don't even allow readonly here because we don't want something like:
369 // A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A).
370 if (BI
->mayWriteToMemory() || BI
->mayReadFromMemory())
375 if (StoreInst
*NextStore
= dyn_cast
<StoreInst
>(BI
)) {
376 if (!NextStore
->isSimple())
379 // Check to see if this store is to a constant offset from the start ptr.
380 std::optional
<int64_t> Offset
=
381 NextStore
->getPointerOperand()->getPointerOffsetFrom(StartPtr
, *DL
);
385 if (!IB
.addStore(*Offset
, NextStore
, DL
))
387 LastInst
= NextStore
;
389 MemSetInst
*MSI
= cast
<MemSetInst
>(BI
);
391 if (MSI
->isVolatile() || !isa
<ConstantInt
>(MSI
->getLength()))
394 if (!isa
<ConstantInt
>(MSI
->getValue()))
397 // Check to see if this store is to a constant offset from the start ptr.
398 std::optional
<int64_t> Offset
=
399 MSI
->getDest()->getPointerOffsetFrom(StartPtr
, *DL
);
403 if (!IB
.addMemSet(*Offset
, MSI
))
411 void AArch64StackTagging::tagAlloca(AllocaInst
*AI
, Instruction
*InsertBefore
,
412 Value
*Ptr
, uint64_t Size
) {
413 auto SetTagZeroFunc
=
414 Intrinsic::getDeclaration(F
->getParent(), Intrinsic::aarch64_settag_zero
);
416 Intrinsic::getDeclaration(F
->getParent(), Intrinsic::aarch64_stgp
);
418 InitializerBuilder
IB(Size
, DL
, Ptr
, SetTagFunc
, SetTagZeroFunc
, StgpFunc
);
420 Triple(AI
->getModule()->getTargetTriple()).isLittleEndian();
421 // Current implementation of initializer merging assumes little endianness.
422 if (MergeInit
&& !F
->hasOptNone() && LittleEndian
&&
423 Size
< ClMergeInitSizeLimit
) {
424 LLVM_DEBUG(dbgs() << "collecting initializers for " << *AI
425 << ", size = " << Size
<< "\n");
426 InsertBefore
= collectInitializers(InsertBefore
, Ptr
, Size
, IB
);
429 IRBuilder
<> IRB(InsertBefore
);
433 void AArch64StackTagging::untagAlloca(AllocaInst
*AI
, Instruction
*InsertBefore
,
435 IRBuilder
<> IRB(InsertBefore
);
436 IRB
.CreateCall(SetTagFunc
, {IRB
.CreatePointerCast(AI
, IRB
.getPtrTy()),
437 ConstantInt::get(IRB
.getInt64Ty(), Size
)});
440 Instruction
*AArch64StackTagging::insertBaseTaggedPointer(
441 const MapVector
<AllocaInst
*, memtag::AllocaInfo
> &AllocasToInstrument
,
442 const DominatorTree
*DT
) {
443 BasicBlock
*PrologueBB
= nullptr;
444 // Try sinking IRG as deep as possible to avoid hurting shrink wrap.
445 for (auto &I
: AllocasToInstrument
) {
446 const memtag::AllocaInfo
&Info
= I
.second
;
447 AllocaInst
*AI
= Info
.AI
;
449 PrologueBB
= AI
->getParent();
452 PrologueBB
= DT
->findNearestCommonDominator(PrologueBB
, AI
->getParent());
456 IRBuilder
<> IRB(&PrologueBB
->front());
458 Intrinsic::getDeclaration(F
->getParent(), Intrinsic::aarch64_irg_sp
);
460 IRB
.CreateCall(IRG_SP
, {Constant::getNullValue(IRB
.getInt64Ty())});
461 Base
->setName("basetag");
465 // FIXME: check for MTE extension
466 bool AArch64StackTagging::runOnFunction(Function
&Fn
) {
467 if (!Fn
.hasFnAttribute(Attribute::SanitizeMemTag
))
471 SSI
= &getAnalysis
<StackSafetyGlobalInfoWrapperPass
>().getResult();
473 DL
= &Fn
.getParent()->getDataLayout();
475 AA
= &getAnalysis
<AAResultsWrapperPass
>().getAAResults();
477 memtag::StackInfoBuilder
SIB(SSI
);
478 for (Instruction
&I
: instructions(F
))
480 memtag::StackInfo
&SInfo
= SIB
.get();
482 if (SInfo
.AllocasToInstrument
.empty())
485 std::unique_ptr
<DominatorTree
> DeleteDT
;
486 DominatorTree
*DT
= nullptr;
487 if (auto *P
= getAnalysisIfAvailable
<DominatorTreeWrapperPass
>())
488 DT
= &P
->getDomTree();
491 DeleteDT
= std::make_unique
<DominatorTree
>(*F
);
495 std::unique_ptr
<PostDominatorTree
> DeletePDT
;
496 PostDominatorTree
*PDT
= nullptr;
497 if (auto *P
= getAnalysisIfAvailable
<PostDominatorTreeWrapperPass
>())
498 PDT
= &P
->getPostDomTree();
500 if (PDT
== nullptr) {
501 DeletePDT
= std::make_unique
<PostDominatorTree
>(*F
);
502 PDT
= DeletePDT
.get();
505 std::unique_ptr
<LoopInfo
> DeleteLI
;
506 LoopInfo
*LI
= nullptr;
507 if (auto *LIWP
= getAnalysisIfAvailable
<LoopInfoWrapperPass
>()) {
508 LI
= &LIWP
->getLoopInfo();
510 DeleteLI
= std::make_unique
<LoopInfo
>(*DT
);
515 Intrinsic::getDeclaration(F
->getParent(), Intrinsic::aarch64_settag
);
517 Instruction
*Base
= insertBaseTaggedPointer(SInfo
.AllocasToInstrument
, DT
);
520 for (auto &I
: SInfo
.AllocasToInstrument
) {
521 memtag::AllocaInfo
&Info
= I
.second
;
522 assert(Info
.AI
&& SIB
.isInterestingAlloca(*Info
.AI
));
523 TrackingVH
<Instruction
> OldAI
= Info
.AI
;
524 memtag::alignAndPadAlloca(Info
, kTagGranuleSize
);
525 AllocaInst
*AI
= Info
.AI
;
527 NextTag
= (NextTag
+ 1) % 16;
528 // Replace alloca with tagp(alloca).
529 IRBuilder
<> IRB(Info
.AI
->getNextNode());
530 Function
*TagP
= Intrinsic::getDeclaration(
531 F
->getParent(), Intrinsic::aarch64_tagp
, {Info
.AI
->getType()});
532 Instruction
*TagPCall
=
533 IRB
.CreateCall(TagP
, {Constant::getNullValue(Info
.AI
->getType()), Base
,
534 ConstantInt::get(IRB
.getInt64Ty(), Tag
)});
535 if (Info
.AI
->hasName())
536 TagPCall
->setName(Info
.AI
->getName() + ".tag");
537 Info
.AI
->replaceAllUsesWith(TagPCall
);
538 TagPCall
->setOperand(0, Info
.AI
);
540 // Calls to functions that may return twice (e.g. setjmp) confuse the
541 // postdominator analysis, and will leave us to keep memory tagged after
542 // function return. Work around this by always untagging at every return
543 // statement if return_twice functions are called.
544 bool StandardLifetime
=
545 SInfo
.UnrecognizedLifetimes
.empty() &&
546 memtag::isStandardLifetime(Info
.LifetimeStart
, Info
.LifetimeEnd
, DT
, LI
,
548 !SInfo
.CallsReturnTwice
;
549 if (StandardLifetime
) {
550 IntrinsicInst
*Start
= Info
.LifetimeStart
[0];
552 cast
<ConstantInt
>(Start
->getArgOperand(0))->getZExtValue();
553 Size
= alignTo(Size
, kTagGranuleSize
);
554 tagAlloca(AI
, Start
->getNextNode(), Start
->getArgOperand(1), Size
);
556 auto TagEnd
= [&](Instruction
*Node
) { untagAlloca(AI
, Node
, Size
); };
558 !memtag::forAllReachableExits(*DT
, *PDT
, *LI
, Start
, Info
.LifetimeEnd
,
559 SInfo
.RetVec
, TagEnd
)) {
560 for (auto *End
: Info
.LifetimeEnd
)
561 End
->eraseFromParent();
564 uint64_t Size
= *Info
.AI
->getAllocationSize(*DL
);
565 Value
*Ptr
= IRB
.CreatePointerCast(TagPCall
, IRB
.getPtrTy());
566 tagAlloca(AI
, &*IRB
.GetInsertPoint(), Ptr
, Size
);
567 for (auto *RI
: SInfo
.RetVec
) {
568 untagAlloca(AI
, RI
, Size
);
570 // We may have inserted tag/untag outside of any lifetime interval.
571 // Remove all lifetime intrinsics for this alloca.
572 for (auto *II
: Info
.LifetimeStart
)
573 II
->eraseFromParent();
574 for (auto *II
: Info
.LifetimeEnd
)
575 II
->eraseFromParent();
578 // Fixup debug intrinsics to point to the new alloca.
579 for (auto *DVI
: Info
.DbgVariableIntrinsics
)
580 DVI
->replaceVariableLocationOp(OldAI
, Info
.AI
);
583 // If we have instrumented at least one alloca, all unrecognized lifetime
584 // intrinsics have to go.
585 for (auto *I
: SInfo
.UnrecognizedLifetimes
)
586 I
->eraseFromParent();