1 //===- bolt/Passes/FrameAnalysis.cpp --------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the FrameAnalysis class.
11 //===----------------------------------------------------------------------===//
13 #include "bolt/Passes/FrameAnalysis.h"
14 #include "bolt/Core/CallGraphWalker.h"
15 #include "bolt/Core/ParallelUtilities.h"
16 #include "llvm/MC/MCRegisterInfo.h"
17 #include "llvm/Support/Timer.h"
21 #define DEBUG_TYPE "fa"
26 extern cl::OptionCategory BoltOptCategory
;
27 extern cl::opt
<unsigned> Verbosity
;
29 static cl::list
<std::string
>
30 FrameOptFunctionNames("funcs-fop", cl::CommaSeparated
,
31 cl::desc("list of functions to apply frame opts"),
32 cl::value_desc("func1,func2,func3,..."));
34 static cl::opt
<std::string
> FrameOptFunctionNamesFile(
36 cl::desc("file with list of functions to frame optimize"));
38 static cl::opt
<bool> TimeFA("time-fa", cl::desc("time frame analysis steps"),
39 cl::ReallyHidden
, cl::cat(BoltOptCategory
));
42 ExperimentalSW("experimental-shrink-wrapping",
43 cl::desc("process functions with stack pointer arithmetic"),
44 cl::ReallyHidden
, cl::ZeroOrMore
, cl::cat(BoltOptCategory
));
46 bool shouldFrameOptimize(const llvm::bolt::BinaryFunction
&Function
) {
47 if (Function
.hasUnknownControlFlow())
50 if (!FrameOptFunctionNamesFile
.empty()) {
51 assert(!FrameOptFunctionNamesFile
.empty() && "unexpected empty file name");
52 std::ifstream
FuncsFile(FrameOptFunctionNamesFile
, std::ios::in
);
54 while (std::getline(FuncsFile
, FuncName
))
55 FrameOptFunctionNames
.push_back(FuncName
);
56 FrameOptFunctionNamesFile
= "";
59 if (FrameOptFunctionNames
.empty())
61 return llvm::any_of(FrameOptFunctionNames
, [&](std::string
&Name
) {
62 return Function
.hasName(Name
);
70 raw_ostream
&operator<<(raw_ostream
&OS
, const FrameIndexEntry
&FIE
) {
71 OS
<< "FrameIndexEntry<IsLoad: " << FIE
.IsLoad
<< ", IsStore: " << FIE
.IsStore
72 << ", IsStoreFromReg: " << FIE
.IsStoreFromReg
73 << ", RegOrImm: " << FIE
.RegOrImm
<< ", StackOffset: ";
74 if (FIE
.StackOffset
< 0)
75 OS
<< "-" << Twine::utohexstr(-FIE
.StackOffset
);
77 OS
<< "+" << Twine::utohexstr(FIE
.StackOffset
);
78 OS
<< ", Size: " << static_cast<int>(FIE
.Size
)
79 << ", IsSimple: " << FIE
.IsSimple
<< ">";
85 /// This class should be used to iterate through basic blocks in layout order
86 /// to analyze instructions for frame accesses. The user should call
87 /// enterNewBB() whenever starting analyzing a new BB and doNext() for each
88 /// instruction. After doNext(), if isValidAccess() returns true, it means the
89 /// current instruction accesses the frame and getFIE() may be used to obtain
90 /// details about this access.
91 class FrameAccessAnalysis
{
92 /// We depend on Stack Pointer Tracking to figure out the current SP offset
93 /// value at a given program point
94 StackPointerTracking
&SPT
;
97 const BinaryContext
&BC
;
98 const BinaryFunction
&BF
;
99 // Vars used for storing useful CFI info to give us a hint about how the stack
100 // is used in this function
103 int64_t CfaOffset
{-8};
105 std::stack
<std::pair
<int64_t, uint16_t>> CFIStack
;
106 /// Our pointer to access SPT info
107 const MCInst
*Prev
{nullptr};
108 /// Info about the last frame access
109 bool IsValidAccess
{false};
110 bool EscapesStackAddress
{false};
113 bool decodeFrameAccess(const MCInst
&Inst
) {
116 int64_t StackOffset
= 0;
117 bool IsIndexed
= false;
118 if (!BC
.MIB
->isStackAccess(
119 Inst
, FIE
.IsLoad
, FIE
.IsStore
, FIE
.IsStoreFromReg
, Reg
, SrcImm
,
120 FIE
.StackPtrReg
, StackOffset
, FIE
.Size
, FIE
.IsSimple
, IsIndexed
)) {
124 if (IsIndexed
|| (!FIE
.Size
&& (FIE
.IsLoad
|| FIE
.IsStore
))) {
125 LLVM_DEBUG(dbgs() << "Giving up on indexed memory access/unknown size\n");
126 LLVM_DEBUG(dbgs() << "Blame insn: ");
127 LLVM_DEBUG(BC
.printInstruction(dbgs(), Inst
, 0, &BF
, true, false, false));
128 LLVM_DEBUG(Inst
.dump());
132 assert(FIE
.Size
!= 0 || (!FIE
.IsLoad
&& !FIE
.IsStore
));
134 FIE
.RegOrImm
= SrcImm
;
135 if (FIE
.IsLoad
|| FIE
.IsStoreFromReg
)
138 if (FIE
.StackPtrReg
== BC
.MIB
->getStackPointer() && SPOffset
!= SPT
.EMPTY
&&
139 SPOffset
!= SPT
.SUPERPOSITION
) {
141 dbgs() << "Adding access via SP while CFA reg is another one\n");
142 FIE
.StackOffset
= SPOffset
+ StackOffset
;
143 } else if (FIE
.StackPtrReg
== BC
.MIB
->getFramePointer() &&
144 FPOffset
!= SPT
.EMPTY
&& FPOffset
!= SPT
.SUPERPOSITION
) {
146 dbgs() << "Adding access via FP while CFA reg is another one\n");
147 FIE
.StackOffset
= FPOffset
+ StackOffset
;
148 } else if (FIE
.StackPtrReg
==
149 *BC
.MRI
->getLLVMRegNum(CfaReg
, /*isEH=*/false)) {
150 FIE
.StackOffset
= CfaOffset
+ StackOffset
;
153 dbgs() << "Found stack access with reg different than cfa reg.\n");
154 LLVM_DEBUG(dbgs() << "\tCurrent CFA reg: " << CfaReg
155 << "\n\tStack access reg: " << FIE
.StackPtrReg
<< "\n");
156 LLVM_DEBUG(dbgs() << "Blame insn: ");
157 LLVM_DEBUG(Inst
.dump());
160 IsValidAccess
= true;
165 FrameAccessAnalysis(BinaryFunction
&BF
, StackPointerTracking
&SPT
)
166 : SPT(SPT
), BC(BF
.getBinaryContext()), BF(BF
) {}
168 void enterNewBB() { Prev
= nullptr; }
169 const FrameIndexEntry
&getFIE() const { return FIE
; }
170 int getSPOffset() const { return SPOffset
; }
171 bool isValidAccess() const { return IsValidAccess
; }
172 bool doesEscapeStackAddress() const { return EscapesStackAddress
; }
174 bool doNext(const BinaryBasicBlock
&BB
, const MCInst
&Inst
) {
175 IsValidAccess
= false;
176 EscapesStackAddress
= false;
177 std::tie(SPOffset
, FPOffset
) =
178 Prev
? *SPT
.getStateAt(*Prev
) : *SPT
.getStateAt(BB
);
180 // Use CFI information to keep track of which register is being used to
182 if (BC
.MIB
->isCFI(Inst
)) {
183 const MCCFIInstruction
*CFI
= BF
.getCFIFor(Inst
);
184 switch (CFI
->getOperation()) {
185 case MCCFIInstruction::OpDefCfa
:
186 CfaOffset
= CFI
->getOffset();
188 case MCCFIInstruction::OpDefCfaRegister
:
189 CfaReg
= CFI
->getRegister();
191 case MCCFIInstruction::OpDefCfaOffset
:
192 CfaOffset
= CFI
->getOffset();
194 case MCCFIInstruction::OpRememberState
:
195 CFIStack
.push(std::make_pair(CfaOffset
, CfaReg
));
197 case MCCFIInstruction::OpRestoreState
: {
198 if (CFIStack
.empty())
199 dbgs() << "Assertion is about to fail: " << BF
.getPrintName() << "\n";
200 assert(!CFIStack
.empty() && "Corrupt CFI stack");
201 std::pair
<int64_t, uint16_t> &Elem
= CFIStack
.top();
203 CfaOffset
= Elem
.first
;
204 CfaReg
= Elem
.second
;
207 case MCCFIInstruction::OpAdjustCfaOffset
:
208 llvm_unreachable("Unhandled AdjustCfaOffset");
216 if (BC
.MIB
->escapesVariable(Inst
, SPT
.HasFramePointer
)) {
217 EscapesStackAddress
= true;
218 if (!opts::ExperimentalSW
) {
220 dbgs() << "Leaked stack address, giving up on this function.\n");
221 LLVM_DEBUG(dbgs() << "Blame insn: ");
222 LLVM_DEBUG(Inst
.dump());
227 return decodeFrameAccess(Inst
);
231 } // end anonymous namespace
233 void FrameAnalysis::addArgAccessesFor(MCInst
&Inst
, ArgAccesses
&&AA
) {
234 if (ErrorOr
<ArgAccesses
&> OldAA
= getArgAccessesFor(Inst
)) {
235 if (OldAA
->AssumeEverything
)
237 *OldAA
= std::move(AA
);
240 if (AA
.AssumeEverything
) {
241 // Index 0 in ArgAccessesVector represents an "assumeeverything" entry
242 BC
.MIB
->addAnnotation(Inst
, "ArgAccessEntry", 0U);
245 BC
.MIB
->addAnnotation(Inst
, "ArgAccessEntry",
246 (unsigned)ArgAccessesVector
.size());
247 ArgAccessesVector
.emplace_back(std::move(AA
));
250 void FrameAnalysis::addArgInStackAccessFor(MCInst
&Inst
,
251 const ArgInStackAccess
&Arg
) {
252 ErrorOr
<ArgAccesses
&> AA
= getArgAccessesFor(Inst
);
254 addArgAccessesFor(Inst
, ArgAccesses(false));
255 AA
= getArgAccessesFor(Inst
);
256 assert(AA
&& "Object setup failed");
258 std::set
<ArgInStackAccess
> &Set
= AA
->Set
;
259 assert(!AA
->AssumeEverything
&& "Adding arg to AssumeEverything set");
263 void FrameAnalysis::addFIEFor(MCInst
&Inst
, const FrameIndexEntry
&FIE
) {
264 BC
.MIB
->addAnnotation(Inst
, "FrameAccessEntry", (unsigned)FIEVector
.size());
265 FIEVector
.emplace_back(FIE
);
268 ErrorOr
<ArgAccesses
&> FrameAnalysis::getArgAccessesFor(const MCInst
&Inst
) {
269 if (auto Idx
= BC
.MIB
->tryGetAnnotationAs
<unsigned>(Inst
, "ArgAccessEntry")) {
270 assert(ArgAccessesVector
.size() > *Idx
&& "Out of bounds");
271 return ArgAccessesVector
[*Idx
];
273 return make_error_code(errc::result_out_of_range
);
276 ErrorOr
<const ArgAccesses
&>
277 FrameAnalysis::getArgAccessesFor(const MCInst
&Inst
) const {
278 if (auto Idx
= BC
.MIB
->tryGetAnnotationAs
<unsigned>(Inst
, "ArgAccessEntry")) {
279 assert(ArgAccessesVector
.size() > *Idx
&& "Out of bounds");
280 return ArgAccessesVector
[*Idx
];
282 return make_error_code(errc::result_out_of_range
);
285 ErrorOr
<const FrameIndexEntry
&>
286 FrameAnalysis::getFIEFor(const MCInst
&Inst
) const {
288 BC
.MIB
->tryGetAnnotationAs
<unsigned>(Inst
, "FrameAccessEntry")) {
289 assert(FIEVector
.size() > *Idx
&& "Out of bounds");
290 return FIEVector
[*Idx
];
292 return make_error_code(errc::result_out_of_range
);
295 void FrameAnalysis::traverseCG(BinaryFunctionCallGraph
&CG
) {
296 CallGraphWalker
CGWalker(CG
);
298 CGWalker
.registerVisitor(
299 [&](BinaryFunction
*Func
) -> bool { return computeArgsAccessed(*Func
); });
303 DEBUG_WITH_TYPE("ra", {
304 for (auto &MapEntry
: ArgsTouchedMap
) {
305 const BinaryFunction
*Func
= MapEntry
.first
;
306 const auto &Set
= MapEntry
.second
;
307 dbgs() << "Args accessed for " << Func
->getPrintName() << ": ";
308 if (!Set
.empty() && Set
.count(std::make_pair(-1, 0)))
309 dbgs() << "assume everything";
311 for (const std::pair
<int64_t, uint8_t> &Entry
: Set
)
312 dbgs() << "[" << Entry
.first
<< ", " << (int)Entry
.second
<< "] ";
318 bool FrameAnalysis::updateArgsTouchedFor(const BinaryFunction
&BF
, MCInst
&Inst
,
320 if (!BC
.MIB
->isCall(Inst
))
323 std::set
<int64_t> Res
;
324 const MCSymbol
*TargetSymbol
= BC
.MIB
->getTargetSymbol(Inst
);
325 // If indirect call, we conservatively assume it accesses all stack positions
326 if (TargetSymbol
== nullptr) {
327 addArgAccessesFor(Inst
, ArgAccesses(/*AssumeEverything=*/true));
328 if (!FunctionsRequireAlignment
.count(&BF
)) {
329 FunctionsRequireAlignment
.insert(&BF
);
335 const BinaryFunction
*Function
= BC
.getFunctionForSymbol(TargetSymbol
);
336 // Call to a function without a BinaryFunction object. Conservatively assume
337 // it accesses all stack positions
338 if (Function
== nullptr) {
339 addArgAccessesFor(Inst
, ArgAccesses(/*AssumeEverything=*/true));
340 if (!FunctionsRequireAlignment
.count(&BF
)) {
341 FunctionsRequireAlignment
.insert(&BF
);
347 auto Iter
= ArgsTouchedMap
.find(Function
);
349 bool Changed
= false;
350 if (BC
.MIB
->isTailCall(Inst
) && Iter
!= ArgsTouchedMap
.end()) {
351 // Ignore checking CurOffset because we can't always reliably determine the
352 // offset specially after an epilogue, where tailcalls happen. It should be
354 for (std::pair
<int64_t, uint8_t> Elem
: Iter
->second
) {
355 if (!llvm::is_contained(ArgsTouchedMap
[&BF
], Elem
)) {
356 ArgsTouchedMap
[&BF
].emplace(Elem
);
361 if (FunctionsRequireAlignment
.count(Function
) &&
362 !FunctionsRequireAlignment
.count(&BF
)) {
364 FunctionsRequireAlignment
.insert(&BF
);
366 if (Iter
== ArgsTouchedMap
.end())
369 if (CurOffset
== StackPointerTracking::EMPTY
||
370 CurOffset
== StackPointerTracking::SUPERPOSITION
) {
371 addArgAccessesFor(Inst
, ArgAccesses(/*AssumeEverything=*/true));
375 for (std::pair
<int64_t, uint8_t> Elem
: Iter
->second
) {
376 if (Elem
.first
== -1) {
377 addArgAccessesFor(Inst
, ArgAccesses(/*AssumeEverything=*/true));
380 LLVM_DEBUG(dbgs() << "Added arg in stack access annotation "
381 << CurOffset
+ Elem
.first
<< "\n");
382 addArgInStackAccessFor(
383 Inst
, ArgInStackAccess
{/*StackOffset=*/CurOffset
+ Elem
.first
,
384 /*Size=*/Elem
.second
});
389 bool FrameAnalysis::computeArgsAccessed(BinaryFunction
&BF
) {
390 if (!BF
.isSimple() || !BF
.hasCFG()) {
391 LLVM_DEBUG(dbgs() << "Treating " << BF
.getPrintName()
392 << " conservatively.\n");
393 ArgsTouchedMap
[&BF
].emplace(std::make_pair(-1, 0));
394 if (!FunctionsRequireAlignment
.count(&BF
)) {
395 FunctionsRequireAlignment
.insert(&BF
);
401 LLVM_DEBUG(dbgs() << "Now computing args accessed for: " << BF
.getPrintName()
403 bool UpdatedArgsTouched
= false;
405 FrameAccessAnalysis
FAA(BF
, getSPT(BF
));
407 for (BinaryBasicBlock
*BB
: BF
.getLayout().blocks()) {
410 for (MCInst
&Inst
: *BB
) {
411 if (!FAA
.doNext(*BB
, Inst
) || FAA
.doesEscapeStackAddress()) {
412 ArgsTouchedMap
[&BF
].emplace(std::make_pair(-1, 0));
417 // Check for calls -- attach stack accessing info to them regarding their
419 if (updateArgsTouchedFor(BF
, Inst
, FAA
.getSPOffset()))
420 UpdatedArgsTouched
= true;
422 // Check for stack accesses that affect callers
423 if (!FAA
.isValidAccess())
426 const FrameIndexEntry
&FIE
= FAA
.getFIE();
427 if (FIE
.StackOffset
< 0)
429 if (ArgsTouchedMap
[&BF
].find(std::make_pair(FIE
.StackOffset
, FIE
.Size
)) !=
430 ArgsTouchedMap
[&BF
].end())
433 // Record accesses to the previous stack frame
434 ArgsTouchedMap
[&BF
].emplace(std::make_pair(FIE
.StackOffset
, FIE
.Size
));
435 UpdatedArgsTouched
= true;
437 dbgs() << "Arg access offset " << FIE
.StackOffset
<< " added to:\n";
438 BC
.printInstruction(dbgs(), Inst
, 0, &BF
, true);
444 if (FunctionsRequireAlignment
.count(&BF
))
445 return UpdatedArgsTouched
;
448 FunctionsRequireAlignment
.insert(&BF
);
452 for (BinaryBasicBlock
&BB
: BF
) {
453 for (MCInst
&Inst
: BB
) {
454 if (BC
.MIB
->requiresAlignedAddress(Inst
)) {
455 FunctionsRequireAlignment
.insert(&BF
);
460 return UpdatedArgsTouched
;
463 bool FrameAnalysis::restoreFrameIndex(BinaryFunction
&BF
) {
464 FrameAccessAnalysis
FAA(BF
, getSPT(BF
));
466 LLVM_DEBUG(dbgs() << "Restoring frame indices for \"" << BF
.getPrintName()
468 for (BinaryBasicBlock
*BB
: BF
.getLayout().blocks()) {
469 LLVM_DEBUG(dbgs() << "\tNow at BB " << BB
->getName() << "\n");
472 for (MCInst
&Inst
: *BB
) {
473 if (!FAA
.doNext(*BB
, Inst
))
476 dbgs() << "\t\tNow at ";
478 dbgs() << "\t\t\tSP offset is " << FAA
.getSPOffset() << "\n";
481 if (FAA
.doesEscapeStackAddress()) {
482 if (!FunctionsWithStackArithmetic
.count(&BF
))
483 FunctionsWithStackArithmetic
.insert(&BF
);
487 if (!FAA
.isValidAccess())
490 const FrameIndexEntry
&FIE
= FAA
.getFIE();
492 addFIEFor(Inst
, FIE
);
494 dbgs() << "Frame index annotation " << FIE
<< " added to:\n";
495 BC
.printInstruction(dbgs(), Inst
, 0, &BF
, true);
502 void FrameAnalysis::cleanAnnotations() {
503 NamedRegionTimer
T("cleanannotations", "clean annotations", "FA",
504 "FA breakdown", opts::TimeFA
);
506 ParallelUtilities::WorkFuncTy CleanFunction
= [&](BinaryFunction
&BF
) {
507 for (BinaryBasicBlock
&BB
: BF
) {
508 for (MCInst
&Inst
: BB
) {
509 BC
.MIB
->removeAnnotation(Inst
, "ArgAccessEntry");
510 BC
.MIB
->removeAnnotation(Inst
, "FrameAccessEntry");
515 ParallelUtilities::runOnEachFunction(
516 BC
, ParallelUtilities::SchedulingPolicy::SP_INST_LINEAR
, CleanFunction
,
517 ParallelUtilities::PredicateTy(nullptr), "cleanAnnotations");
520 FrameAnalysis::FrameAnalysis(BinaryContext
&BC
, BinaryFunctionCallGraph
&CG
)
522 // Position 0 of the vector should be always associated with "assume access
524 ArgAccessesVector
.emplace_back(ArgAccesses(/*AssumeEverything*/ true));
526 if (!opts::NoThreads
) {
527 NamedRegionTimer
T1("precomputespt", "pre-compute spt", "FA",
528 "FA breakdown", opts::TimeFA
);
533 NamedRegionTimer
T1("traversecg", "traverse call graph", "FA",
534 "FA breakdown", opts::TimeFA
);
538 for (auto &I
: BC
.getBinaryFunctions()) {
539 CountDenominator
+= I
.second
.getFunctionScore();
541 // "shouldOptimize" for passes that run after finalize
542 if (!(I
.second
.isSimple() && I
.second
.hasCFG() && !I
.second
.isIgnored()) ||
543 !opts::shouldFrameOptimize(I
.second
)) {
544 ++NumFunctionsNotOptimized
;
549 NamedRegionTimer
T1("restorefi", "restore frame index", "FA",
550 "FA breakdown", opts::TimeFA
);
551 if (!restoreFrameIndex(I
.second
)) {
552 ++NumFunctionsFailedRestoreFI
;
553 CountFunctionsFailedRestoreFI
+= I
.second
.getFunctionScore();
557 AnalyzedFunctions
.insert(&I
.second
);
561 NamedRegionTimer
T1("clearspt", "clear spt", "FA", "FA breakdown",
567 void FrameAnalysis::printStats() {
568 BC
.outs() << "BOLT-INFO: FRAME ANALYSIS: " << NumFunctionsNotOptimized
569 << " function(s) were not optimized.\n"
570 << "BOLT-INFO: FRAME ANALYSIS: " << NumFunctionsFailedRestoreFI
574 (100.0 * CountFunctionsFailedRestoreFI
/ CountDenominator
))
575 << " could not have its frame indices restored.\n";
578 void FrameAnalysis::clearSPTMap() {
579 if (opts::NoThreads
) {
584 ParallelUtilities::WorkFuncTy ClearFunctionSPT
= [&](BinaryFunction
&BF
) {
585 std::unique_ptr
<StackPointerTracking
> &SPTPtr
= SPTMap
.find(&BF
)->second
;
589 ParallelUtilities::PredicateTy SkipFunc
= [&](const BinaryFunction
&BF
) {
590 return !BF
.isSimple() || !BF
.hasCFG();
593 ParallelUtilities::runOnEachFunction(
594 BC
, ParallelUtilities::SchedulingPolicy::SP_INST_LINEAR
, ClearFunctionSPT
,
595 SkipFunc
, "clearSPTMap");
600 void FrameAnalysis::preComputeSPT() {
601 // Make sure that the SPTMap is empty
602 assert(SPTMap
.size() == 0);
604 // Create map entries to allow lock-free parallel execution
605 for (auto &BFI
: BC
.getBinaryFunctions()) {
606 BinaryFunction
&BF
= BFI
.second
;
607 if (!BF
.isSimple() || !BF
.hasCFG())
609 SPTMap
.emplace(&BF
, std::unique_ptr
<StackPointerTracking
>());
612 // Create an index for the SPT annotation to allow lock-free parallel
614 BC
.MIB
->getOrCreateAnnotationIndex("StackPointerTracking");
616 // Run SPT in parallel
617 ParallelUtilities::WorkFuncWithAllocTy ProcessFunction
=
618 [&](BinaryFunction
&BF
, MCPlusBuilder::AllocatorIdTy AllocId
) {
619 std::unique_ptr
<StackPointerTracking
> &SPTPtr
=
620 SPTMap
.find(&BF
)->second
;
621 SPTPtr
= std::make_unique
<StackPointerTracking
>(BF
, AllocId
);
625 ParallelUtilities::PredicateTy SkipPredicate
= [&](const BinaryFunction
&BF
) {
626 return !BF
.isSimple() || !BF
.hasCFG();
629 ParallelUtilities::runOnEachFunctionWithUniqueAllocId(
630 BC
, ParallelUtilities::SchedulingPolicy::SP_BB_QUADRATIC
, ProcessFunction
,
631 SkipPredicate
, "preComputeSPT");