1 //===-- AArch64StackTaggingPreRA.cpp --- Stack Tagging for AArch64 -----===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
11 #include "AArch64MachineFunctionInfo.h"
12 #include "AArch64InstrInfo.h"
13 #include "llvm/ADT/DepthFirstIterator.h"
14 #include "llvm/ADT/SetVector.h"
15 #include "llvm/ADT/Statistic.h"
16 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
17 #include "llvm/CodeGen/MachineFrameInfo.h"
18 #include "llvm/CodeGen/MachineFunction.h"
19 #include "llvm/CodeGen/MachineFunctionPass.h"
20 #include "llvm/CodeGen/MachineInstrBuilder.h"
21 #include "llvm/CodeGen/MachineLoopInfo.h"
22 #include "llvm/CodeGen/MachineRegisterInfo.h"
23 #include "llvm/CodeGen/MachineTraceMetrics.h"
24 #include "llvm/CodeGen/Passes.h"
25 #include "llvm/CodeGen/TargetInstrInfo.h"
26 #include "llvm/CodeGen/TargetRegisterInfo.h"
27 #include "llvm/CodeGen/TargetSubtargetInfo.h"
28 #include "llvm/Support/CommandLine.h"
29 #include "llvm/Support/Debug.h"
30 #include "llvm/Support/raw_ostream.h"
34 #define DEBUG_TYPE "aarch64-stack-tagging-pre-ra"
36 enum UncheckedLdStMode
{ UncheckedNever
, UncheckedSafe
, UncheckedAlways
};
38 cl::opt
<UncheckedLdStMode
> ClUncheckedLdSt(
39 "stack-tagging-unchecked-ld-st", cl::Hidden
,
40 cl::init(UncheckedSafe
),
42 "Unconditionally apply unchecked-ld-st optimization (even for large "
43 "stack frames, or in the presence of variable sized allocas)."),
45 clEnumValN(UncheckedNever
, "never", "never apply unchecked-ld-st"),
47 UncheckedSafe
, "safe",
48 "apply unchecked-ld-st when the target is definitely within range"),
49 clEnumValN(UncheckedAlways
, "always", "always apply unchecked-ld-st")));
52 ClFirstSlot("stack-tagging-first-slot-opt", cl::Hidden
, cl::init(true),
54 cl::desc("Apply first slot optimization for stack tagging "
55 "(eliminate ADDG Rt, Rn, 0, 0)."));
59 class AArch64StackTaggingPreRA
: public MachineFunctionPass
{
61 AArch64FunctionInfo
*AFI
;
62 MachineFrameInfo
*MFI
;
63 MachineRegisterInfo
*MRI
;
64 const AArch64RegisterInfo
*TRI
;
65 const AArch64InstrInfo
*TII
;
67 SmallVector
<MachineInstr
*, 16> ReTags
;
71 AArch64StackTaggingPreRA() : MachineFunctionPass(ID
) {
72 initializeAArch64StackTaggingPreRAPass(*PassRegistry::getPassRegistry());
75 bool mayUseUncheckedLoadStore();
76 void uncheckUsesOf(unsigned TaggedReg
, int FI
);
77 void uncheckLoadsAndStores();
78 Optional
<int> findFirstSlotCandidate();
80 bool runOnMachineFunction(MachineFunction
&Func
) override
;
81 StringRef
getPassName() const override
{
82 return "AArch64 Stack Tagging PreRA";
85 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
87 MachineFunctionPass::getAnalysisUsage(AU
);
90 } // end anonymous namespace
92 char AArch64StackTaggingPreRA::ID
= 0;
94 INITIALIZE_PASS_BEGIN(AArch64StackTaggingPreRA
, "aarch64-stack-tagging-pre-ra",
95 "AArch64 Stack Tagging PreRA Pass", false, false)
96 INITIALIZE_PASS_END(AArch64StackTaggingPreRA
, "aarch64-stack-tagging-pre-ra",
97 "AArch64 Stack Tagging PreRA Pass", false, false)
99 FunctionPass
*llvm::createAArch64StackTaggingPreRAPass() {
100 return new AArch64StackTaggingPreRA();
103 static bool isUncheckedLoadOrStoreOpcode(unsigned Opcode
) {
105 case AArch64::LDRBBui
:
106 case AArch64::LDRHHui
:
107 case AArch64::LDRWui
:
108 case AArch64::LDRXui
:
110 case AArch64::LDRBui
:
111 case AArch64::LDRHui
:
112 case AArch64::LDRSui
:
113 case AArch64::LDRDui
:
114 case AArch64::LDRQui
:
116 case AArch64::LDRSHWui
:
117 case AArch64::LDRSHXui
:
119 case AArch64::LDRSBWui
:
120 case AArch64::LDRSBXui
:
122 case AArch64::LDRSWui
:
124 case AArch64::STRBBui
:
125 case AArch64::STRHHui
:
126 case AArch64::STRWui
:
127 case AArch64::STRXui
:
129 case AArch64::STRBui
:
130 case AArch64::STRHui
:
131 case AArch64::STRSui
:
132 case AArch64::STRDui
:
133 case AArch64::STRQui
:
141 case AArch64::LDPSWi
:
154 bool AArch64StackTaggingPreRA::mayUseUncheckedLoadStore() {
155 if (ClUncheckedLdSt
== UncheckedNever
)
157 else if (ClUncheckedLdSt
== UncheckedAlways
)
160 // This estimate can be improved if we had harder guarantees about stack frame
161 // layout. With LocalStackAllocation we can estimate SP offset to any
162 // preallocated slot. AArch64FrameLowering::orderFrameObjects could put tagged
163 // objects ahead of non-tagged ones, but that's not always desirable.
165 // Underestimating SP offset here may require the use of LDG to materialize
166 // the tagged address of the stack slot, along with a scratch register
167 // allocation (post-regalloc!).
169 // For now we do the safe thing here and require that the entire stack frame
170 // is within range of the shortest of the unchecked instructions.
171 unsigned FrameSize
= 0;
172 for (unsigned i
= 0, e
= MFI
->getObjectIndexEnd(); i
!= e
; ++i
)
173 FrameSize
+= MFI
->getObjectSize(i
);
174 bool EntireFrameReachableFromSP
= FrameSize
< 0xf00;
175 return !MFI
->hasVarSizedObjects() && EntireFrameReachableFromSP
;
178 void AArch64StackTaggingPreRA::uncheckUsesOf(unsigned TaggedReg
, int FI
) {
179 for (auto UI
= MRI
->use_instr_begin(TaggedReg
), E
= MRI
->use_instr_end();
181 MachineInstr
*UseI
= &*(UI
++);
182 if (isUncheckedLoadOrStoreOpcode(UseI
->getOpcode())) {
183 // FI operand is always the one before the immediate offset.
184 unsigned OpIdx
= TII
->getLoadStoreImmIdx(UseI
->getOpcode()) - 1;
185 if (UseI
->getOperand(OpIdx
).isReg() &&
186 UseI
->getOperand(OpIdx
).getReg() == TaggedReg
) {
187 UseI
->getOperand(OpIdx
).ChangeToFrameIndex(FI
);
188 UseI
->getOperand(OpIdx
).setTargetFlags(AArch64II::MO_TAGGED
);
190 } else if (UseI
->isCopy() &&
191 Register::isVirtualRegister(UseI
->getOperand(0).getReg())) {
192 uncheckUsesOf(UseI
->getOperand(0).getReg(), FI
);
197 void AArch64StackTaggingPreRA::uncheckLoadsAndStores() {
198 for (auto *I
: ReTags
) {
199 unsigned TaggedReg
= I
->getOperand(0).getReg();
200 int FI
= I
->getOperand(1).getIndex();
201 uncheckUsesOf(TaggedReg
, FI
);
209 SlotWithTag(int FI
, int Tag
) : FI(FI
), Tag(Tag
) {}
210 explicit SlotWithTag(const MachineInstr
&MI
)
211 : FI(MI
.getOperand(1).getIndex()), Tag(MI
.getOperand(4).getImm()) {}
212 bool operator==(const SlotWithTag
&Other
) const {
213 return FI
== Other
.FI
&& Tag
== Other
.Tag
;
219 template <> struct DenseMapInfo
<SlotWithTag
> {
220 static inline SlotWithTag
getEmptyKey() { return {-2, -2}; }
221 static inline SlotWithTag
getTombstoneKey() { return {-3, -3}; }
222 static unsigned getHashValue(const SlotWithTag
&V
) {
223 return hash_combine(DenseMapInfo
<int>::getHashValue(V
.FI
),
224 DenseMapInfo
<int>::getHashValue(V
.Tag
));
226 static bool isEqual(const SlotWithTag
&A
, const SlotWithTag
&B
) {
232 static bool isSlotPreAllocated(MachineFrameInfo
*MFI
, int FI
) {
233 return MFI
->getUseLocalStackAllocationBlock() &&
234 MFI
->isObjectPreAllocated(FI
);
237 // Pin one of the tagged slots to offset 0 from the tagged base pointer.
238 // This would make its address available in a virtual register (IRG's def), as
239 // opposed to requiring an ADDG instruction to materialize. This effectively
240 // eliminates a vreg (by replacing it with direct uses of IRG, which is usually
241 // live almost everywhere anyway), and therefore needs to happen before
243 Optional
<int> AArch64StackTaggingPreRA::findFirstSlotCandidate() {
244 // Find the best (FI, Tag) pair to pin to offset 0.
245 // Looking at the possible uses of a tagged address, the advantage of pinning
247 // - COPY to physical register.
248 // Does not matter, this would trade a MOV instruction for an ADDG.
249 // - ST*G matter, but those mostly appear near the function prologue where all
250 // the tagged addresses need to be materialized anyway; also, counting ST*G
251 // uses would overweight large allocas that require more than one ST*G
253 // - Load/Store instructions in the address operand do not require a tagged
254 // pointer, so they also do not benefit. These operands have already been
255 // eliminated (see uncheckLoadsAndStores) so all remaining load/store
256 // instructions count.
257 // - Any other instruction may benefit from being pinned to offset 0.
258 LLVM_DEBUG(dbgs() << "AArch64StackTaggingPreRA::findFirstSlotCandidate\n");
262 DenseMap
<SlotWithTag
, int> RetagScore
;
263 SlotWithTag MaxScoreST
{-1, -1};
265 for (auto *I
: ReTags
) {
267 if (isSlotPreAllocated(MFI
, ST
.FI
))
270 Register RetagReg
= I
->getOperand(0).getReg();
271 if (!Register::isVirtualRegister(RetagReg
))
275 SmallVector
<Register
, 8> WorkList
;
276 WorkList
.push_back(RetagReg
);
278 while (!WorkList
.empty()) {
279 Register UseReg
= WorkList
.back();
281 for (auto &UseI
: MRI
->use_instructions(UseReg
)) {
282 unsigned Opcode
= UseI
.getOpcode();
283 if (Opcode
== AArch64::STGOffset
|| Opcode
== AArch64::ST2GOffset
||
284 Opcode
== AArch64::STZGOffset
|| Opcode
== AArch64::STZ2GOffset
||
285 Opcode
== AArch64::STGPi
|| Opcode
== AArch64::STGloop
||
286 Opcode
== AArch64::STZGloop
|| Opcode
== AArch64::STGloop_wback
||
287 Opcode
== AArch64::STZGloop_wback
)
290 Register DstReg
= UseI
.getOperand(0).getReg();
291 if (Register::isVirtualRegister(DstReg
))
292 WorkList
.push_back(DstReg
);
295 LLVM_DEBUG(dbgs() << "[" << ST
.FI
<< ":" << ST
.Tag
<< "] use of %"
296 << Register::virtReg2Index(UseReg
) << " in " << UseI
302 int TotalScore
= RetagScore
[ST
] += Score
;
303 if (TotalScore
> MaxScore
||
304 (TotalScore
== MaxScore
&& ST
.FI
> MaxScoreST
.FI
)) {
305 MaxScore
= TotalScore
;
310 if (MaxScoreST
.FI
< 0)
313 // If FI's tag is already 0, we are done.
314 if (MaxScoreST
.Tag
== 0)
315 return MaxScoreST
.FI
;
317 // Otherwise, find a random victim pair (FI, Tag) where Tag == 0.
318 SlotWithTag SwapST
{-1, -1};
319 for (auto *I
: ReTags
) {
327 // Swap tags between the victim and the highest scoring pair.
328 // If SwapWith is still (-1, -1), that's fine, too - we'll simply take tag for
329 // the highest score slot without changing anything else.
330 for (auto *&I
: ReTags
) {
332 MachineOperand
&TagOp
= I
->getOperand(4);
333 if (ST
== MaxScoreST
) {
335 } else if (ST
== SwapST
) {
336 TagOp
.setImm(MaxScoreST
.Tag
);
339 return MaxScoreST
.FI
;
342 bool AArch64StackTaggingPreRA::runOnMachineFunction(MachineFunction
&Func
) {
344 MRI
= &MF
->getRegInfo();
345 AFI
= MF
->getInfo
<AArch64FunctionInfo
>();
346 TII
= static_cast<const AArch64InstrInfo
*>(MF
->getSubtarget().getInstrInfo());
347 TRI
= static_cast<const AArch64RegisterInfo
*>(
348 MF
->getSubtarget().getRegisterInfo());
349 MFI
= &MF
->getFrameInfo();
352 assert(MRI
->isSSA());
354 LLVM_DEBUG(dbgs() << "********** AArch64 Stack Tagging PreRA **********\n"
355 << "********** Function: " << MF
->getName() << '\n');
357 SmallSetVector
<int, 8> TaggedSlots
;
358 for (auto &BB
: *MF
) {
360 if (I
.getOpcode() == AArch64::TAGPstack
) {
361 ReTags
.push_back(&I
);
362 int FI
= I
.getOperand(1).getIndex();
363 TaggedSlots
.insert(FI
);
364 // There should be no offsets in TAGP yet.
365 assert(I
.getOperand(2).getImm() == 0);
370 // Take over from SSP. It does nothing for tagged slots, and should not really
371 // have been enabled in the first place.
372 for (int FI
: TaggedSlots
)
373 MFI
->setObjectSSPLayout(FI
, MachineFrameInfo::SSPLK_None
);
378 if (mayUseUncheckedLoadStore())
379 uncheckLoadsAndStores();
381 // Find a slot that is used with zero tag offset, like ADDG #fi, 0.
382 // If the base tagged pointer is set up to the address of this slot,
383 // the ADDG instruction can be eliminated.
384 Optional
<int> BaseSlot
= findFirstSlotCandidate();
386 AFI
->setTaggedBasePointerIndex(*BaseSlot
);
388 for (auto *I
: ReTags
) {
389 int FI
= I
->getOperand(1).getIndex();
390 int Tag
= I
->getOperand(4).getImm();
391 Register Base
= I
->getOperand(3).getReg();
392 if (Tag
== 0 && FI
== BaseSlot
) {
393 BuildMI(*I
->getParent(), I
, {}, TII
->get(AArch64::COPY
),
394 I
->getOperand(0).getReg())
396 I
->eraseFromParent();