1 //===- bolt/Passes/ShrinkWrapping.cpp -------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the ShrinkWrapping class.
11 //===----------------------------------------------------------------------===//
13 #include "bolt/Passes/ShrinkWrapping.h"
14 #include "bolt/Passes/DataflowInfoManager.h"
15 #include "bolt/Passes/MCF.h"
16 #include "bolt/Utils/CommandLineOpts.h"
21 #define DEBUG_TYPE "shrinkwrapping"
27 extern cl::opt
<bool> TimeOpts
;
28 extern cl::OptionCategory BoltOptCategory
;
30 static cl::opt
<unsigned> ShrinkWrappingThreshold(
31 "shrink-wrapping-threshold",
32 cl::desc("Percentage of prologue execution count to use as threshold when"
33 " evaluating whether a block is cold enough to be profitable to"
34 " move eligible spills there"),
35 cl::init(30), cl::ZeroOrMore
, cl::cat(BoltOptCategory
));
41 void CalleeSavedAnalysis::analyzeSaves() {
42 ReachingDefOrUse
</*Def=*/true> &RD
= Info
.getReachingDefs();
43 StackReachingUses
&SRU
= Info
.getStackReachingUses();
44 auto &InsnToBB
= Info
.getInsnToBBMap();
45 BitVector
BlacklistedRegs(BC
.MRI
->getNumRegs(), false);
47 LLVM_DEBUG(dbgs() << "Checking spill locations\n");
48 for (BinaryBasicBlock
&BB
: BF
) {
49 LLVM_DEBUG(dbgs() << "\tNow at BB " << BB
.getName() << "\n");
50 const MCInst
*Prev
= nullptr;
51 for (MCInst
&Inst
: BB
) {
52 if (ErrorOr
<const FrameIndexEntry
&> FIE
= FA
.getFIEFor(Inst
)) {
53 // Blacklist weird stores we don't understand
54 if ((!FIE
->IsSimple
|| FIE
->StackOffset
>= 0) && FIE
->IsStore
&&
55 FIE
->IsStoreFromReg
) {
56 BlacklistedRegs
.set(FIE
->RegOrImm
);
57 CalleeSaved
.reset(FIE
->RegOrImm
);
62 if (!FIE
->IsStore
|| !FIE
->IsStoreFromReg
||
63 BlacklistedRegs
[FIE
->RegOrImm
]) {
68 // If this reg is defined locally, it is not a callee-saved reg
69 if (RD
.isReachedBy(FIE
->RegOrImm
,
70 Prev
? RD
.expr_begin(*Prev
) : RD
.expr_begin(BB
))) {
71 BlacklistedRegs
.set(FIE
->RegOrImm
);
72 CalleeSaved
.reset(FIE
->RegOrImm
);
77 // If this stack position is accessed in another function, we are
78 // probably dealing with a parameter passed in a stack -- do not mess
80 if (SRU
.isStoreUsed(*FIE
,
81 Prev
? SRU
.expr_begin(*Prev
) : SRU
.expr_begin(BB
)),
82 /*IncludeLocalAccesses=*/false) {
83 BlacklistedRegs
.set(FIE
->RegOrImm
);
84 CalleeSaved
.reset(FIE
->RegOrImm
);
89 // If this stack position is loaded elsewhere in another reg, we can't
90 // update it, so blacklist it.
91 if (SRU
.isLoadedInDifferentReg(*FIE
, Prev
? SRU
.expr_begin(*Prev
)
92 : SRU
.expr_begin(BB
))) {
93 BlacklistedRegs
.set(FIE
->RegOrImm
);
94 CalleeSaved
.reset(FIE
->RegOrImm
);
99 // Ignore regs with multiple saves
100 if (CalleeSaved
[FIE
->RegOrImm
]) {
101 BlacklistedRegs
.set(FIE
->RegOrImm
);
102 CalleeSaved
.reset(FIE
->RegOrImm
);
107 CalleeSaved
.set(FIE
->RegOrImm
);
108 SaveFIEByReg
[FIE
->RegOrImm
] = &*FIE
;
109 SavingCost
[FIE
->RegOrImm
] += InsnToBB
[&Inst
]->getKnownExecutionCount();
110 BC
.MIB
->addAnnotation(Inst
, getSaveTag(), FIE
->RegOrImm
, AllocatorId
);
111 OffsetsByReg
[FIE
->RegOrImm
] = FIE
->StackOffset
;
112 LLVM_DEBUG(dbgs() << "Logging new candidate for Callee-Saved Reg: "
113 << FIE
->RegOrImm
<< "\n");
120 void CalleeSavedAnalysis::analyzeRestores() {
121 ReachingDefOrUse
</*Def=*/false> &RU
= Info
.getReachingUses();
123 // Now compute all restores of these callee-saved regs
124 for (BinaryBasicBlock
&BB
: BF
) {
125 const MCInst
*Prev
= nullptr;
126 for (MCInst
&Inst
: llvm::reverse(BB
)) {
127 if (ErrorOr
<const FrameIndexEntry
&> FIE
= FA
.getFIEFor(Inst
)) {
128 if (!FIE
->IsLoad
|| !CalleeSaved
[FIE
->RegOrImm
]) {
133 // If this reg is used locally after a restore, then we are probably
134 // not dealing with a callee-saved reg. Except if this use is by
135 // another store, but we don't cover this case yet.
136 // Also not callee-saved if this load accesses caller stack or isn't
138 if (!FIE
->IsSimple
|| FIE
->StackOffset
>= 0 ||
139 RU
.isReachedBy(FIE
->RegOrImm
,
140 Prev
? RU
.expr_begin(*Prev
) : RU
.expr_begin(BB
))) {
141 CalleeSaved
.reset(FIE
->RegOrImm
);
145 // If stack offsets between saves/store don't agree with each other,
146 // we don't completely understand what's happening here
147 if (FIE
->StackOffset
!= OffsetsByReg
[FIE
->RegOrImm
]) {
148 CalleeSaved
.reset(FIE
->RegOrImm
);
149 LLVM_DEBUG(dbgs() << "Dismissing Callee-Saved Reg because we found a "
150 "mismatching restore: "
151 << FIE
->RegOrImm
<< "\n");
156 LLVM_DEBUG(dbgs() << "Adding matching restore for: " << FIE
->RegOrImm
158 if (LoadFIEByReg
[FIE
->RegOrImm
] == nullptr)
159 LoadFIEByReg
[FIE
->RegOrImm
] = &*FIE
;
160 BC
.MIB
->addAnnotation(Inst
, getRestoreTag(), FIE
->RegOrImm
,
162 HasRestores
.set(FIE
->RegOrImm
);
169 std::vector
<MCInst
*> CalleeSavedAnalysis::getSavesByReg(uint16_t Reg
) {
170 std::vector
<MCInst
*> Results
;
171 for (BinaryBasicBlock
&BB
: BF
)
172 for (MCInst
&Inst
: BB
)
173 if (getSavedReg(Inst
) == Reg
)
174 Results
.push_back(&Inst
);
178 std::vector
<MCInst
*> CalleeSavedAnalysis::getRestoresByReg(uint16_t Reg
) {
179 std::vector
<MCInst
*> Results
;
180 for (BinaryBasicBlock
&BB
: BF
)
181 for (MCInst
&Inst
: BB
)
182 if (getRestoredReg(Inst
) == Reg
)
183 Results
.push_back(&Inst
);
187 CalleeSavedAnalysis::~CalleeSavedAnalysis() {
188 for (BinaryBasicBlock
&BB
: BF
) {
189 for (MCInst
&Inst
: BB
) {
190 BC
.MIB
->removeAnnotation(Inst
, getSaveTag());
191 BC
.MIB
->removeAnnotation(Inst
, getRestoreTag());
196 void StackLayoutModifier::blacklistRegion(int64_t Offset
, int64_t Size
) {
197 if (BlacklistedRegions
[Offset
] < Size
)
198 BlacklistedRegions
[Offset
] = Size
;
201 bool StackLayoutModifier::isRegionBlacklisted(int64_t Offset
, int64_t Size
) {
202 for (std::pair
<const int64_t, int64_t> Elem
: BlacklistedRegions
)
203 if (Offset
+ Size
> Elem
.first
&& Offset
< Elem
.first
+ Elem
.second
)
208 bool StackLayoutModifier::blacklistAllInConflictWith(int64_t Offset
,
210 bool HasConflict
= false;
211 for (auto Iter
= AvailableRegions
.begin(); Iter
!= AvailableRegions
.end();) {
212 std::pair
<const int64_t, int64_t> &Elem
= *Iter
;
213 if (Offset
+ Size
> Elem
.first
&& Offset
< Elem
.first
+ Elem
.second
&&
214 (Offset
!= Elem
.first
|| Size
!= Elem
.second
)) {
215 Iter
= AvailableRegions
.erase(Iter
);
222 blacklistRegion(Offset
, Size
);
228 void StackLayoutModifier::checkFramePointerInitialization(MCInst
&Point
) {
229 StackPointerTracking
&SPT
= Info
.getStackPointerTracking();
230 if (!BC
.MII
->get(Point
.getOpcode())
231 .hasDefOfPhysReg(Point
, BC
.MIB
->getFramePointer(), *BC
.MRI
))
235 std::tie(SPVal
, FPVal
) = *SPT
.getStateBefore(Point
);
236 std::pair
<MCPhysReg
, int64_t> FP
;
238 if (FPVal
!= SPT
.EMPTY
&& FPVal
!= SPT
.SUPERPOSITION
)
239 FP
= std::make_pair(BC
.MIB
->getFramePointer(), FPVal
);
241 FP
= std::make_pair(0, 0);
242 std::pair
<MCPhysReg
, int64_t> SP
;
244 if (SPVal
!= SPT
.EMPTY
&& SPVal
!= SPT
.SUPERPOSITION
)
245 SP
= std::make_pair(BC
.MIB
->getStackPointer(), SPVal
);
247 SP
= std::make_pair(0, 0);
250 if (!BC
.MIB
->evaluateStackOffsetExpr(Point
, Output
, SP
, FP
))
253 // Not your regular frame pointer initialization... bail
255 blacklistRegion(0, 0);
258 void StackLayoutModifier::checkStackPointerRestore(MCInst
&Point
) {
259 StackPointerTracking
&SPT
= Info
.getStackPointerTracking();
260 if (!BC
.MII
->get(Point
.getOpcode())
261 .hasDefOfPhysReg(Point
, BC
.MIB
->getStackPointer(), *BC
.MRI
))
263 // Check if the definition of SP comes from FP -- in this case, this
264 // value may need to be updated depending on our stack layout changes
265 bool UsesFP
= llvm::any_of(BC
.MIB
->useOperands(Point
), [&](MCOperand
&Op
) {
266 return Op
.isReg() && Op
.getReg() == BC
.MIB
->getFramePointer();
271 // Setting up evaluation
273 std::tie(SPVal
, FPVal
) = *SPT
.getStateBefore(Point
);
274 std::pair
<MCPhysReg
, int64_t> FP
;
276 if (FPVal
!= SPT
.EMPTY
&& FPVal
!= SPT
.SUPERPOSITION
)
277 FP
= std::make_pair(BC
.MIB
->getFramePointer(), FPVal
);
279 FP
= std::make_pair(0, 0);
280 std::pair
<MCPhysReg
, int64_t> SP
;
282 if (SPVal
!= SPT
.EMPTY
&& SPVal
!= SPT
.SUPERPOSITION
)
283 SP
= std::make_pair(BC
.MIB
->getStackPointer(), SPVal
);
285 SP
= std::make_pair(0, 0);
288 if (!BC
.MIB
->evaluateStackOffsetExpr(Point
, Output
, SP
, FP
))
291 // If the value is the same of FP, no need to adjust it
295 // If an allocation happened through FP, bail
296 if (Output
<= SPVal
) {
297 blacklistRegion(0, 0);
301 // We are restoring SP to an old value based on FP. Mark it as a stack
302 // access to be fixed later.
303 BC
.MIB
->addAnnotation(Point
, getSlotTag(), Output
, AllocatorId
);
306 void StackLayoutModifier::classifyStackAccesses() {
307 // Understand when stack slots are being used non-locally
308 StackReachingUses
&SRU
= Info
.getStackReachingUses();
310 for (BinaryBasicBlock
&BB
: BF
) {
311 const MCInst
*Prev
= nullptr;
312 for (MCInst
&Inst
: llvm::reverse(BB
)) {
313 checkFramePointerInitialization(Inst
);
314 checkStackPointerRestore(Inst
);
315 ErrorOr
<const FrameIndexEntry
&> FIEX
= FA
.getFIEFor(Inst
);
320 if (!FIEX
->IsSimple
|| (FIEX
->IsStore
&& !FIEX
->IsStoreFromReg
)) {
321 blacklistRegion(FIEX
->StackOffset
, FIEX
->Size
);
325 // If this stack position is accessed in another function, we are
326 // probably dealing with a parameter passed in a stack -- do not mess
328 if (SRU
.isStoreUsed(*FIEX
,
329 Prev
? SRU
.expr_begin(*Prev
) : SRU
.expr_begin(BB
),
330 /*IncludeLocalAccesses=*/false)) {
331 blacklistRegion(FIEX
->StackOffset
, FIEX
->Size
);
335 // Now we have a clear stack slot access. Check if its blacklisted or if
336 // it conflicts with another chunk.
337 if (isRegionBlacklisted(FIEX
->StackOffset
, FIEX
->Size
) ||
338 blacklistAllInConflictWith(FIEX
->StackOffset
, FIEX
->Size
)) {
342 // We are free to go. Add it as available stack slot which we know how
344 AvailableRegions
[FIEX
->StackOffset
] = FIEX
->Size
;
345 BC
.MIB
->addAnnotation(Inst
, getSlotTag(), FIEX
->StackOffset
, AllocatorId
);
346 RegionToRegMap
[FIEX
->StackOffset
].insert(FIEX
->RegOrImm
);
347 RegToRegionMap
[FIEX
->RegOrImm
].insert(FIEX
->StackOffset
);
348 LLVM_DEBUG(dbgs() << "Adding region " << FIEX
->StackOffset
<< " size "
349 << (int)FIEX
->Size
<< "\n");
354 void StackLayoutModifier::classifyCFIs() {
355 std::stack
<std::pair
<int64_t, uint16_t>> CFIStack
;
356 int64_t CfaOffset
= -8;
359 auto recordAccess
= [&](MCInst
*Inst
, int64_t Offset
) {
360 const uint16_t Reg
= *BC
.MRI
->getLLVMRegNum(CfaReg
, /*isEH=*/false);
361 if (Reg
== BC
.MIB
->getStackPointer() || Reg
== BC
.MIB
->getFramePointer()) {
362 BC
.MIB
->addAnnotation(*Inst
, getSlotTag(), Offset
, AllocatorId
);
363 LLVM_DEBUG(dbgs() << "Recording CFI " << Offset
<< "\n");
370 for (BinaryBasicBlock
*BB
: BF
.getLayout().blocks()) {
371 for (MCInst
&Inst
: *BB
) {
372 if (!BC
.MIB
->isCFI(Inst
))
374 const MCCFIInstruction
*CFI
= BF
.getCFIFor(Inst
);
375 switch (CFI
->getOperation()) {
376 case MCCFIInstruction::OpDefCfa
:
377 CfaOffset
= -CFI
->getOffset();
378 recordAccess(&Inst
, CfaOffset
);
380 case MCCFIInstruction::OpDefCfaRegister
:
381 CfaReg
= CFI
->getRegister();
383 case MCCFIInstruction::OpDefCfaOffset
:
384 CfaOffset
= -CFI
->getOffset();
385 recordAccess(&Inst
, CfaOffset
);
387 case MCCFIInstruction::OpOffset
:
388 recordAccess(&Inst
, CFI
->getOffset());
389 BC
.MIB
->addAnnotation(Inst
, getOffsetCFIRegTag(),
390 BC
.MRI
->getLLVMRegNum(CFI
->getRegister(),
394 case MCCFIInstruction::OpSameValue
:
395 BC
.MIB
->addAnnotation(Inst
, getOffsetCFIRegTag(),
396 BC
.MRI
->getLLVMRegNum(CFI
->getRegister(),
400 case MCCFIInstruction::OpRememberState
:
401 CFIStack
.push(std::make_pair(CfaOffset
, CfaReg
));
403 case MCCFIInstruction::OpRestoreState
: {
404 assert(!CFIStack
.empty() && "Corrupt CFI stack");
405 std::pair
<int64_t, uint16_t> &Elem
= CFIStack
.top();
407 CfaOffset
= Elem
.first
;
408 CfaReg
= Elem
.second
;
411 case MCCFIInstruction::OpRelOffset
:
412 case MCCFIInstruction::OpAdjustCfaOffset
:
413 llvm_unreachable("Unhandled AdjustCfaOffset");
422 void StackLayoutModifier::scheduleChange(
423 MCInst
&Inst
, StackLayoutModifier::WorklistItem Item
) {
424 auto &WList
= BC
.MIB
->getOrCreateAnnotationAs
<std::vector
<WorklistItem
>>(
425 Inst
, getTodoTag(), AllocatorId
);
426 WList
.push_back(Item
);
429 bool StackLayoutModifier::canCollapseRegion(MCInst
*DeletedPush
) {
430 if (!IsSimple
|| !BC
.MIB
->isPush(*DeletedPush
))
433 ErrorOr
<const FrameIndexEntry
&> FIE
= FA
.getFIEFor(*DeletedPush
);
437 return canCollapseRegion(FIE
->StackOffset
);
440 bool StackLayoutModifier::canCollapseRegion(int64_t RegionAddr
) {
446 if (CollapsedRegions
.count(RegionAddr
))
449 // Check if it is possible to readjust all accesses below RegionAddr
450 if (!BlacklistedRegions
.empty())
456 bool StackLayoutModifier::collapseRegion(MCInst
*DeletedPush
) {
457 ErrorOr
<const FrameIndexEntry
&> FIE
= FA
.getFIEFor(*DeletedPush
);
460 int64_t RegionAddr
= FIE
->StackOffset
;
461 int64_t RegionSz
= FIE
->Size
;
462 return collapseRegion(DeletedPush
, RegionAddr
, RegionSz
);
465 bool StackLayoutModifier::collapseRegion(MCInst
*Alloc
, int64_t RegionAddr
,
467 if (!canCollapseRegion(RegionAddr
))
470 assert(IsInitialized
);
471 StackAllocationAnalysis
&SAA
= Info
.getStackAllocationAnalysis();
473 for (BinaryBasicBlock
&BB
: BF
) {
474 for (MCInst
&Inst
: BB
) {
475 if (!BC
.MIB
->hasAnnotation(Inst
, getSlotTag()))
478 BC
.MIB
->getAnnotationAs
<decltype(FrameIndexEntry::StackOffset
)>(
480 if (!AvailableRegions
.count(Slot
))
482 // We need to ensure this access is affected by the deleted push
483 if (!(*SAA
.getStateBefore(Inst
))[SAA
.ExprToIdx
[Alloc
]])
486 if (BC
.MIB
->isCFI(Inst
)) {
487 if (Slot
> RegionAddr
)
489 scheduleChange(Inst
, WorklistItem(WorklistItem::AdjustCFI
, RegionSz
));
492 ErrorOr
<const FrameIndexEntry
&> FIE
= FA
.getFIEFor(Inst
);
494 if (Slot
> RegionAddr
)
496 // SP update based on frame pointer
498 Inst
, WorklistItem(WorklistItem::AdjustLoadStoreOffset
, RegionSz
));
502 if (Slot
== RegionAddr
) {
503 BC
.MIB
->addAnnotation(Inst
, "AccessesDeletedPos", 0U, AllocatorId
);
506 if (BC
.MIB
->isPush(Inst
) || BC
.MIB
->isPop(Inst
))
509 if (FIE
->StackPtrReg
== BC
.MIB
->getStackPointer() && Slot
< RegionAddr
)
512 if (FIE
->StackPtrReg
== BC
.MIB
->getFramePointer() && Slot
> RegionAddr
)
516 Inst
, WorklistItem(WorklistItem::AdjustLoadStoreOffset
, RegionSz
));
520 CollapsedRegions
.insert(RegionAddr
);
524 void StackLayoutModifier::setOffsetForCollapsedAccesses(int64_t NewOffset
) {
525 for (BinaryBasicBlock
&BB
: BF
) {
526 for (MCInst
&Inst
: BB
) {
527 if (!BC
.MIB
->hasAnnotation(Inst
, "AccessesDeletedPos"))
529 BC
.MIB
->removeAnnotation(Inst
, "AccessesDeletedPos");
531 Inst
, WorklistItem(WorklistItem::AdjustLoadStoreOffset
, NewOffset
));
536 bool StackLayoutModifier::canInsertRegion(ProgramPoint P
) {
542 StackPointerTracking
&SPT
= Info
.getStackPointerTracking();
543 int64_t RegionAddr
= SPT
.getStateBefore(P
)->first
;
544 if (RegionAddr
== SPT
.SUPERPOSITION
|| RegionAddr
== SPT
.EMPTY
)
547 if (InsertedRegions
.count(RegionAddr
))
550 // Check if we are going to screw up stack accesses at call sites that
551 // pass parameters via stack
552 if (!BlacklistedRegions
.empty())
558 bool StackLayoutModifier::insertRegion(ProgramPoint P
, int64_t RegionSz
) {
559 if (!canInsertRegion(P
))
562 assert(IsInitialized
);
563 StackPointerTracking
&SPT
= Info
.getStackPointerTracking();
564 // This RegionAddr is slightly different from the one seen in collapseRegion
565 // This is the value of SP before the allocation the user wants to make.
566 int64_t RegionAddr
= SPT
.getStateBefore(P
)->first
;
567 if (RegionAddr
== SPT
.SUPERPOSITION
|| RegionAddr
== SPT
.EMPTY
)
570 DominatorAnalysis
<false> &DA
= Info
.getDominatorAnalysis();
572 for (BinaryBasicBlock
&BB
: BF
) {
573 for (MCInst
&Inst
: BB
) {
574 if (!BC
.MIB
->hasAnnotation(Inst
, getSlotTag()))
577 BC
.MIB
->getAnnotationAs
<decltype(FrameIndexEntry::StackOffset
)>(
579 if (!AvailableRegions
.count(Slot
))
582 if (!(DA
.doesADominateB(P
, Inst
)))
585 if (BC
.MIB
->isCFI(Inst
)) {
586 if (Slot
>= RegionAddr
)
588 scheduleChange(Inst
, WorklistItem(WorklistItem::AdjustCFI
, -RegionSz
));
591 ErrorOr
<const FrameIndexEntry
&> FIE
= FA
.getFIEFor(Inst
);
593 if (Slot
>= RegionAddr
)
596 Inst
, WorklistItem(WorklistItem::AdjustLoadStoreOffset
, -RegionSz
));
600 if (FIE
->StackPtrReg
== BC
.MIB
->getStackPointer() && Slot
< RegionAddr
)
602 if (FIE
->StackPtrReg
== BC
.MIB
->getFramePointer() && Slot
>= RegionAddr
)
604 if (BC
.MIB
->isPush(Inst
) || BC
.MIB
->isPop(Inst
))
607 Inst
, WorklistItem(WorklistItem::AdjustLoadStoreOffset
, -RegionSz
));
611 InsertedRegions
.insert(RegionAddr
);
615 void StackLayoutModifier::performChanges() {
616 std::set
<uint32_t> ModifiedCFIIndices
;
617 for (BinaryBasicBlock
&BB
: BF
) {
618 for (MCInst
&Inst
: llvm::reverse(BB
)) {
619 if (BC
.MIB
->hasAnnotation(Inst
, "AccessesDeletedPos")) {
620 assert(BC
.MIB
->isPop(Inst
) || BC
.MIB
->isPush(Inst
));
621 BC
.MIB
->removeAnnotation(Inst
, "AccessesDeletedPos");
623 if (!BC
.MIB
->hasAnnotation(Inst
, getTodoTag()))
625 auto &WList
= BC
.MIB
->getAnnotationAs
<std::vector
<WorklistItem
>>(
627 int64_t Adjustment
= 0;
628 WorklistItem::ActionType AdjustmentType
= WorklistItem::None
;
629 for (WorklistItem
&WI
: WList
) {
630 if (WI
.Action
== WorklistItem::None
)
632 assert(WI
.Action
== WorklistItem::AdjustLoadStoreOffset
||
633 WI
.Action
== WorklistItem::AdjustCFI
);
634 assert((AdjustmentType
== WorklistItem::None
||
635 AdjustmentType
== WI
.Action
) &&
636 "Conflicting actions requested at the same program point");
637 AdjustmentType
= WI
.Action
;
638 Adjustment
+= WI
.OffsetUpdate
;
642 if (AdjustmentType
!= WorklistItem::AdjustLoadStoreOffset
) {
643 assert(BC
.MIB
->isCFI(Inst
));
644 uint32_t CFINum
= Inst
.getOperand(0).getImm();
645 if (ModifiedCFIIndices
.count(CFINum
))
647 ModifiedCFIIndices
.insert(CFINum
);
648 const MCCFIInstruction
*CFI
= BF
.getCFIFor(Inst
);
649 const MCCFIInstruction::OpType Operation
= CFI
->getOperation();
650 if (Operation
== MCCFIInstruction::OpDefCfa
||
651 Operation
== MCCFIInstruction::OpDefCfaOffset
)
652 Adjustment
= 0 - Adjustment
;
653 LLVM_DEBUG(dbgs() << "Changing CFI offset from " << CFI
->getOffset()
654 << " to " << (CFI
->getOffset() + Adjustment
) << "\n");
655 BF
.mutateCFIOffsetFor(Inst
, CFI
->getOffset() + Adjustment
);
660 MCPhysReg StackPtrReg
= 0;
661 int64_t StackOffset
= 0;
662 bool IsIndexed
= false;
664 bool IsStore
= false;
665 bool IsSimple
= false;
666 bool IsStoreFromReg
= false;
668 bool Success
= false;
669 Success
= BC
.MIB
->isStackAccess(Inst
, IsLoad
, IsStore
, IsStoreFromReg
,
670 Reg
, SrcImm
, StackPtrReg
, StackOffset
,
671 Size
, IsSimple
, IsIndexed
);
673 // SP update based on FP value
674 Success
= BC
.MIB
->addToImm(Inst
, Adjustment
, &*BC
.Ctx
);
678 assert(Success
&& IsSimple
&& !IsIndexed
&& (!IsStore
|| IsStoreFromReg
));
679 if (StackPtrReg
!= BC
.MIB
->getFramePointer())
680 Adjustment
= -Adjustment
;
682 BC
.MIB
->createRestoreFromStack(Inst
, StackPtrReg
,
683 StackOffset
+ Adjustment
, Reg
, Size
);
685 BC
.MIB
->createSaveToStack(Inst
, StackPtrReg
, StackOffset
+ Adjustment
,
688 dbgs() << "Adjusted instruction: ";
695 void StackLayoutModifier::initialize() {
696 classifyStackAccesses();
698 IsInitialized
= true;
701 std::atomic
<std::uint64_t> ShrinkWrapping::SpillsMovedRegularMode
{0};
702 std::atomic
<std::uint64_t> ShrinkWrapping::SpillsMovedPushPopMode
{0};
703 std::atomic
<std::uint64_t> ShrinkWrapping::SpillsMovedDynamicCount
{0};
704 std::atomic
<std::uint64_t> ShrinkWrapping::SpillsFailedDynamicCount
{0};
705 std::atomic
<std::uint64_t> ShrinkWrapping::InstrDynamicCount
{0};
706 std::atomic
<std::uint64_t> ShrinkWrapping::StoreDynamicCount
{0};
708 using BBIterTy
= BinaryBasicBlock::iterator
;
710 void ShrinkWrapping::classifyCSRUses() {
711 DominatorAnalysis
<false> &DA
= Info
.getDominatorAnalysis();
712 StackPointerTracking
&SPT
= Info
.getStackPointerTracking();
713 UsesByReg
= std::vector
<BitVector
>(BC
.MRI
->getNumRegs(),
714 BitVector(DA
.NumInstrs
, false));
716 const BitVector
&FPAliases
= BC
.MIB
->getAliases(BC
.MIB
->getFramePointer());
717 for (BinaryBasicBlock
&BB
: BF
) {
718 for (MCInst
&Inst
: BB
) {
719 if (BC
.MIB
->isCFI(Inst
))
721 BitVector BV
= BitVector(BC
.MRI
->getNumRegs(), false);
722 BC
.MIB
->getTouchedRegs(Inst
, BV
);
723 BV
&= CSA
.CalleeSaved
;
724 for (int I
: BV
.set_bits()) {
727 if (CSA
.getSavedReg(Inst
) != I
&& CSA
.getRestoredReg(Inst
) != I
)
728 UsesByReg
[I
].set(DA
.ExprToIdx
[&Inst
]);
730 if (!SPT
.HasFramePointer
|| !BC
.MIB
->isCall(Inst
))
732 BV
= CSA
.CalleeSaved
;
734 for (int I
: BV
.set_bits())
735 UsesByReg
[I
].set(DA
.ExprToIdx
[&Inst
]);
740 void ShrinkWrapping::pruneUnwantedCSRs() {
741 BitVector ParamRegs
= BC
.MIB
->getRegsUsedAsParams();
742 for (unsigned I
= 0, E
= BC
.MRI
->getNumRegs(); I
!= E
; ++I
) {
743 if (!CSA
.CalleeSaved
[I
])
746 CSA
.CalleeSaved
.reset(I
);
749 if (UsesByReg
[I
].empty()) {
752 << "Dismissing Callee-Saved Reg because we found no uses of it:" << I
754 CSA
.CalleeSaved
.reset(I
);
757 if (!CSA
.HasRestores
[I
]) {
759 dbgs() << "Dismissing Callee-Saved Reg because it does not have "
762 CSA
.CalleeSaved
.reset(I
);
767 void ShrinkWrapping::computeSaveLocations() {
768 BestSavePos
= std::vector
<std::vector
<MCInst
*>>(BC
.MRI
->getNumRegs());
769 ReachingInsns
<true> &RI
= Info
.getReachingInsnsBackwards();
770 DominatorAnalysis
<false> &DA
= Info
.getDominatorAnalysis();
771 StackPointerTracking
&SPT
= Info
.getStackPointerTracking();
773 LLVM_DEBUG(dbgs() << "Checking save/restore possibilities\n");
774 for (BinaryBasicBlock
&BB
: BF
) {
775 LLVM_DEBUG(dbgs() << "\tNow at BB " << BB
.getName() << "\n");
777 MCInst
*First
= BB
.begin() != BB
.end() ? &*BB
.begin() : nullptr;
781 // Use reaching instructions to detect if we are inside a loop - if we
782 // are, do not consider this BB as valid placement for saves.
786 const std::pair
<int, int> SPFP
= *SPT
.getStateBefore(*First
);
787 // If we don't know stack state at this point, bail
788 if ((SPFP
.first
== SPT
.SUPERPOSITION
|| SPFP
.first
== SPT
.EMPTY
) &&
789 (SPFP
.second
== SPT
.SUPERPOSITION
|| SPFP
.second
== SPT
.EMPTY
))
792 for (unsigned I
= 0, E
= BC
.MRI
->getNumRegs(); I
!= E
; ++I
) {
793 if (!CSA
.CalleeSaved
[I
])
796 BitVector BBDominatedUses
= BitVector(DA
.NumInstrs
, false);
797 for (int J
: UsesByReg
[I
].set_bits())
798 if (DA
.doesADominateB(*First
, J
))
799 BBDominatedUses
.set(J
);
800 LLVM_DEBUG(dbgs() << "\t\tBB " << BB
.getName() << " dominates "
801 << BBDominatedUses
.count() << " uses for reg " << I
802 << ". Total uses for reg is " << UsesByReg
[I
].count()
804 BBDominatedUses
&= UsesByReg
[I
];
805 if (BBDominatedUses
== UsesByReg
[I
]) {
806 LLVM_DEBUG(dbgs() << "\t\t\tAdded " << BB
.getName()
807 << " as a save pos for " << I
<< "\n");
808 BestSavePos
[I
].push_back(First
);
810 dbgs() << "Dominated uses are:\n";
811 for (int J
: UsesByReg
[I
].set_bits()) {
812 dbgs() << "Idx " << J
<< ": ";
813 BC
.printInstruction(dbgs(), *DA
.Expressions
[J
]);
814 DA
.Expressions
[J
]->dump();
821 BestSaveCount
= std::vector
<std::vector
<uint64_t>>(BC
.MRI
->getNumRegs());
823 auto &InsnToBB
= Info
.getInsnToBBMap();
824 for (unsigned I
= 0, E
= BC
.MRI
->getNumRegs(); I
!= E
; ++I
) {
825 if (!CSA
.CalleeSaved
[I
])
828 std::stable_sort(BestSavePos
[I
].begin(), BestSavePos
[I
].end(),
829 [&](const MCInst
*A
, const MCInst
*B
) {
830 const BinaryBasicBlock
*BBA
= InsnToBB
[A
];
831 const BinaryBasicBlock
*BBB
= InsnToBB
[B
];
832 const uint64_t CountA
= BBA
->getKnownExecutionCount();
833 const uint64_t CountB
= BBB
->getKnownExecutionCount();
834 return CountB
< CountA
;
837 for (MCInst
*Pos
: BestSavePos
[I
]) {
838 const BinaryBasicBlock
*BB
= InsnToBB
[Pos
];
839 const uint64_t Count
= BB
->getKnownExecutionCount();
840 BestSaveCount
[I
].push_back(Count
);
845 void ShrinkWrapping::computeDomOrder() {
846 DomOrder
= std::vector
<MCPhysReg
>(BC
.MRI
->getNumRegs(), 0);
847 std::vector
<MCPhysReg
> Order
;
848 for (MCPhysReg I
= 0, E
= BC
.MRI
->getNumRegs(); I
!= E
; ++I
) {
852 DominatorAnalysis
<false> &DA
= Info
.getDominatorAnalysis();
853 auto &InsnToBB
= Info
.getInsnToBBMap();
854 llvm::sort(Order
, [&](const MCPhysReg
&A
, const MCPhysReg
&B
) {
855 BinaryBasicBlock
*BBA
=
856 BestSavePos
[A
].size() ? InsnToBB
[BestSavePos
[A
].back()] : nullptr;
857 BinaryBasicBlock
*BBB
=
858 BestSavePos
[B
].size() ? InsnToBB
[BestSavePos
[B
].back()] : nullptr;
865 if (DA
.doesADominateB(*BestSavePos
[A
].back(), *BestSavePos
[B
].back()))
867 if (DA
.doesADominateB(*BestSavePos
[B
].back(), *BestSavePos
[A
].back()))
872 for (MCPhysReg I
= 0, E
= BC
.MRI
->getNumRegs(); I
!= E
; ++I
)
873 DomOrder
[Order
[I
]] = I
;
876 bool ShrinkWrapping::isBestSavePosCold(unsigned CSR
, MCInst
*&BestPosSave
,
877 uint64_t &TotalEstimatedWin
) {
878 const uint64_t CurSavingCost
= CSA
.SavingCost
[CSR
];
879 if (!CSA
.CalleeSaved
[CSR
])
882 assert(BestSaveCount
[CSR
].size() == BestSavePos
[CSR
].size() &&
883 "save position vectors out of sync");
884 if (BestSaveCount
[CSR
].empty())
887 const uint64_t BestCount
= BestSaveCount
[CSR
].back();
888 BestPosSave
= BestSavePos
[CSR
].back();
889 if (BestCount
>= (opts::ShrinkWrappingThreshold
/ 100.0) * CurSavingCost
)
893 auto &InsnToBB
= Info
.getInsnToBBMap();
894 dbgs() << "Better position for saves found in func " << BF
.getPrintName()
895 << " count << " << BF
.getKnownExecutionCount() << "\n";
896 dbgs() << "Reg: " << CSR
<< "; New BB: " << InsnToBB
[BestPosSave
]->getName()
897 << " Freq reduction: " << (CurSavingCost
- BestCount
) << "\n";
900 TotalEstimatedWin
= CurSavingCost
- BestCount
;
904 /// Auxiliary function used to create basic blocks for critical edges and update
905 /// the dominance frontier with these new locations
906 void ShrinkWrapping::splitFrontierCritEdges(
907 BinaryFunction
*Func
, SmallVector
<ProgramPoint
, 4> &Frontier
,
908 const SmallVector
<bool, 4> &IsCritEdge
,
909 const SmallVector
<BinaryBasicBlock
*, 4> &From
,
910 const SmallVector
<SmallVector
<BinaryBasicBlock
*, 4>, 4> &To
) {
911 LLVM_DEBUG(dbgs() << "splitFrontierCritEdges: Now handling func "
912 << BF
.getPrintName() << "\n");
913 // For every FromBB, there might be one or more critical edges, with
914 // To[I] containing destination BBs. It's important to memorize
915 // the original size of the Frontier as we may append to it while splitting
916 // critical edges originating with blocks with multiple destinations.
917 for (size_t I
= 0, IE
= Frontier
.size(); I
< IE
; ++I
) {
922 BinaryBasicBlock
*FromBB
= From
[I
];
923 LLVM_DEBUG(dbgs() << " - Now handling FrontierBB " << FromBB
->getName()
925 // Split edge for every DestinationBBs
926 for (size_t DI
= 0, DIE
= To
[I
].size(); DI
< DIE
; ++DI
) {
927 BinaryBasicBlock
*DestinationBB
= To
[I
][DI
];
928 LLVM_DEBUG(dbgs() << " - Dest : " << DestinationBB
->getName() << "\n");
929 BinaryBasicBlock
*NewBB
= Func
->splitEdge(FromBB
, DestinationBB
);
930 // Insert dummy instruction so this BB is never empty (we need this for
931 // PredictiveStackPointerTracking to work, since it annotates instructions
933 if (NewBB
->empty()) {
935 BC
.MIB
->createNoop(NewInst
);
936 NewBB
->addInstruction(std::move(NewInst
));
937 scheduleChange(&*NewBB
->begin(), WorklistItem(WorklistItem::Erase
, 0));
941 ProgramPoint NewFrontierPP
= ProgramPoint::getLastPointAt(*NewBB
);
943 // Update frontier inplace
944 Frontier
[I
] = NewFrontierPP
;
945 LLVM_DEBUG(dbgs() << " - Update frontier with " << NewBB
->getName()
948 // Append new frontier to the end of the list
949 Frontier
.push_back(NewFrontierPP
);
950 LLVM_DEBUG(dbgs() << " - Append frontier " << NewBB
->getName()
957 SmallVector
<ProgramPoint
, 4>
958 ShrinkWrapping::doRestorePlacement(MCInst
*BestPosSave
, unsigned CSR
,
959 uint64_t TotalEstimatedWin
) {
960 SmallVector
<ProgramPoint
, 4> Frontier
;
961 SmallVector
<bool, 4> IsCritEdge
;
962 DominatorAnalysis
<false> &DA
= Info
.getDominatorAnalysis();
964 SmallVector
<BinaryBasicBlock
*, 4> CritEdgesFrom
;
965 SmallVector
<SmallVector
<BinaryBasicBlock
*, 4>, 4> CritEdgesTo
;
966 // In case of a critical edge, we need to create extra BBs to host restores
967 // into edges transitioning to the dominance frontier, otherwise we pull these
968 // restores to inside the dominated area.
969 Frontier
= DA
.getDominanceFrontierFor(*BestPosSave
).takeVector();
971 dbgs() << "Dumping dominance frontier for ";
972 BC
.printInstruction(dbgs(), *BestPosSave
);
973 for (ProgramPoint
&PP
: Frontier
)
975 BC
.printInstruction(dbgs(), *PP
.getInst());
977 dbgs() << PP
.getBB()->getName() << "\n";
979 for (ProgramPoint
&PP
: Frontier
) {
980 bool HasCritEdges
= false;
981 if (PP
.isInst() && BC
.MIB
->isTerminator(*PP
.getInst()) &&
982 doesInstUsesCSR(*PP
.getInst(), CSR
)) {
986 BinaryBasicBlock
*FrontierBB
= Info
.getParentBB(PP
);
987 CritEdgesFrom
.emplace_back(FrontierBB
);
988 CritEdgesTo
.emplace_back(0);
989 SmallVector
<BinaryBasicBlock
*, 4> &Dests
= CritEdgesTo
.back();
990 // Check for invoke instructions at the dominance frontier, which indicates
991 // the landing pad is not dominated.
992 if (PP
.isInst() && BC
.MIB
->isInvoke(*PP
.getInst())) {
996 doForAllSuccs(*FrontierBB
, [&](ProgramPoint P
) {
997 if (!DA
.doesADominateB(*BestPosSave
, P
)) {
998 Dests
.emplace_back(Info
.getParentBB(P
));
1001 HasCritEdges
= true;
1003 IsCritEdge
.push_back(HasCritEdges
);
1005 // Restores cannot be placed in empty BBs because we have a dataflow
1006 // analysis that depends on insertions happening before real instructions
1007 // (PredictiveStackPointerTracking). Detect now for empty BBs and add a
1008 // dummy nop that is scheduled to be removed later.
1009 bool InvalidateRequired
= false;
1010 for (BinaryBasicBlock
*BB
: BF
.getLayout().blocks()) {
1011 if (BB
->size() != 0)
1014 BC
.MIB
->createNoop(NewInst
);
1015 auto II
= BB
->addInstruction(std::move(NewInst
));
1016 scheduleChange(&*II
, WorklistItem(WorklistItem::Erase
, 0));
1017 InvalidateRequired
= true;
1019 if (std::accumulate(IsCritEdge
.begin(), IsCritEdge
.end(), 0)) {
1021 dbgs() << "Now detected critical edges in the following frontier:\n";
1022 for (ProgramPoint
&PP
: Frontier
) {
1024 dbgs() << " BB: " << PP
.getBB()->getName() << "\n";
1026 dbgs() << " Inst: ";
1027 PP
.getInst()->dump();
1031 splitFrontierCritEdges(&BF
, Frontier
, IsCritEdge
, CritEdgesFrom
,
1033 InvalidateRequired
= true;
1035 if (InvalidateRequired
) {
1036 // BitVectors that represent all insns of the function are invalid now
1037 // since we changed BBs/Insts. Re-run steps that depend on pointers being
1039 Info
.invalidateAll();
1045 bool ShrinkWrapping::validatePushPopsMode(unsigned CSR
, MCInst
*BestPosSave
,
1046 int64_t SaveOffset
) {
1047 if (FA
.requiresAlignment(BF
)) {
1049 dbgs() << "Reg " << CSR
1050 << " is not using push/pops due to function "
1051 "alignment requirements.\n";
1055 if (FA
.hasStackArithmetic(BF
)) {
1057 dbgs() << "Reg " << CSR
1058 << " is not using push/pops due to function "
1059 "taking the address of a stack position.\n";
1063 for (MCInst
*Save
: CSA
.getSavesByReg(CSR
)) {
1064 if (!SLM
.canCollapseRegion(Save
)) {
1065 LLVM_DEBUG(dbgs() << "Reg " << CSR
<< " cannot collapse region.\n");
1069 // Abort if one of the restores for this CSR is not a POP.
1070 for (MCInst
*Load
: CSA
.getRestoresByReg(CSR
)) {
1071 if (!BC
.MIB
->isPop(*Load
)) {
1072 LLVM_DEBUG(dbgs() << "Reg " << CSR
<< " has a mismatching restore.\n");
1077 StackPointerTracking
&SPT
= Info
.getStackPointerTracking();
1078 // Abort if we are inserting a push into an entry BB (offset -8) and this
1079 // func sets up a frame pointer.
1080 if (!SLM
.canInsertRegion(BestPosSave
) || SaveOffset
== SPT
.SUPERPOSITION
||
1081 SaveOffset
== SPT
.EMPTY
|| (SaveOffset
== -8 && SPT
.HasFramePointer
)) {
1083 dbgs() << "Reg " << CSR
1084 << " cannot insert region or we are "
1085 "trying to insert a push into entry bb.\n";
1092 SmallVector
<ProgramPoint
, 4> ShrinkWrapping::fixPopsPlacements(
1093 const SmallVector
<ProgramPoint
, 4> &RestorePoints
, int64_t SaveOffset
,
1095 SmallVector
<ProgramPoint
, 4> FixedRestorePoints
= RestorePoints
;
1096 // Moving pop locations to the correct sp offset
1097 ReachingInsns
<true> &RI
= Info
.getReachingInsnsBackwards();
1098 StackPointerTracking
&SPT
= Info
.getStackPointerTracking();
1099 for (ProgramPoint
&PP
: FixedRestorePoints
) {
1100 BinaryBasicBlock
*BB
= Info
.getParentBB(PP
);
1102 if (SPT
.getStateAt(ProgramPoint::getLastPointAt(*BB
))->first
==
1104 BitVector BV
= *RI
.getStateAt(ProgramPoint::getLastPointAt(*BB
));
1105 BV
&= UsesByReg
[CSR
];
1112 for (MCInst
&Inst
: llvm::reverse(*BB
)) {
1113 if (SPT
.getStateBefore(Inst
)->first
== SaveOffset
) {
1114 BitVector BV
= *RI
.getStateAt(Inst
);
1115 BV
&= UsesByReg
[CSR
];
1125 dbgs() << "Could not find restore insertion point for " << CSR
1126 << ", falling back to load/store mode\n";
1128 FixedRestorePoints
.clear();
1129 return FixedRestorePoints
;
1132 return FixedRestorePoints
;
1135 void ShrinkWrapping::scheduleOldSaveRestoresRemoval(unsigned CSR
,
1138 for (BinaryBasicBlock
*BB
: BF
.getLayout().blocks()) {
1139 std::vector
<MCInst
*> CFIs
;
1140 for (MCInst
&Inst
: llvm::reverse(*BB
)) {
1141 if (BC
.MIB
->isCFI(Inst
)) {
1142 // Delete all offset CFIs related to this CSR
1143 if (SLM
.getOffsetCFIReg(Inst
) == CSR
) {
1144 HasDeletedOffsetCFIs
[CSR
] = true;
1145 scheduleChange(&Inst
, WorklistItem(WorklistItem::Erase
, CSR
));
1148 CFIs
.push_back(&Inst
);
1152 uint16_t SavedReg
= CSA
.getSavedReg(Inst
);
1153 uint16_t RestoredReg
= CSA
.getRestoredReg(Inst
);
1154 if (SavedReg
!= CSR
&& RestoredReg
!= CSR
) {
1159 scheduleChange(&Inst
, WorklistItem(UsePushPops
1160 ? WorklistItem::Erase
1161 : WorklistItem::ChangeToAdjustment
,
1164 // Delete associated CFIs
1165 const bool RecordDeletedPushCFIs
=
1166 SavedReg
== CSR
&& DeletedPushCFIs
[CSR
].empty();
1167 const bool RecordDeletedPopCFIs
=
1168 RestoredReg
== CSR
&& DeletedPopCFIs
[CSR
].empty();
1169 for (MCInst
*CFI
: CFIs
) {
1170 const MCCFIInstruction
*MCCFI
= BF
.getCFIFor(*CFI
);
1171 // Do not touch these...
1172 if (MCCFI
->getOperation() == MCCFIInstruction::OpRestoreState
||
1173 MCCFI
->getOperation() == MCCFIInstruction::OpRememberState
)
1175 scheduleChange(CFI
, WorklistItem(WorklistItem::Erase
, CSR
));
1176 if (RecordDeletedPushCFIs
) {
1177 // Do not record this to be replayed later because we are going to
1179 if (MCCFI
->getOperation() == MCCFIInstruction::OpDefCfaOffset
)
1181 DeletedPushCFIs
[CSR
].push_back(CFI
->getOperand(0).getImm());
1183 if (RecordDeletedPopCFIs
) {
1184 if (MCCFI
->getOperation() == MCCFIInstruction::OpDefCfaOffset
)
1186 DeletedPopCFIs
[CSR
].push_back(CFI
->getOperand(0).getImm());
1194 bool ShrinkWrapping::doesInstUsesCSR(const MCInst
&Inst
, uint16_t CSR
) {
1195 if (BC
.MIB
->isCFI(Inst
) || CSA
.getSavedReg(Inst
) == CSR
||
1196 CSA
.getRestoredReg(Inst
) == CSR
)
1198 BitVector BV
= BitVector(BC
.MRI
->getNumRegs(), false);
1199 BC
.MIB
->getTouchedRegs(Inst
, BV
);
1203 void ShrinkWrapping::scheduleSaveRestoreInsertions(
1204 unsigned CSR
, MCInst
*BestPosSave
,
1205 SmallVector
<ProgramPoint
, 4> &RestorePoints
, bool UsePushPops
) {
1206 auto &InsnToBB
= Info
.getInsnToBBMap();
1207 const FrameIndexEntry
*FIESave
= CSA
.SaveFIEByReg
[CSR
];
1208 const FrameIndexEntry
*FIELoad
= CSA
.LoadFIEByReg
[CSR
];
1209 assert(FIESave
&& FIELoad
&& "Invalid CSR");
1212 dbgs() << "Scheduling save insertion at: ";
1213 BestPosSave
->dump();
1216 scheduleChange(BestPosSave
,
1217 UsePushPops
? WorklistItem::InsertPushOrPop
1218 : WorklistItem::InsertLoadOrStore
,
1221 for (ProgramPoint
&PP
: RestorePoints
) {
1222 BinaryBasicBlock
*FrontierBB
= Info
.getParentBB(PP
);
1224 dbgs() << "Scheduling restore insertion at: ";
1226 PP
.getInst()->dump();
1228 dbgs() << PP
.getBB()->getName() << "\n";
1231 FrontierBB
->getTerminatorBefore(PP
.isInst() ? PP
.getInst() : nullptr);
1234 bool PrecededByPrefix
= false;
1236 auto Iter
= FrontierBB
->findInstruction(PP
.getInst());
1237 if (Iter
!= FrontierBB
->end() && Iter
!= FrontierBB
->begin()) {
1239 PrecededByPrefix
= BC
.MIB
->isPrefix(*Iter
);
1243 (doesInstUsesCSR(*PP
.getInst(), CSR
) || PrecededByPrefix
)) {
1244 assert(!InsnToBB
[PP
.getInst()]->hasTerminatorAfter(PP
.getInst()) &&
1245 "cannot move to end of bb");
1246 scheduleChange(InsnToBB
[PP
.getInst()],
1247 UsePushPops
? WorklistItem::InsertPushOrPop
1248 : WorklistItem::InsertLoadOrStore
,
1253 UsePushPops
? WorklistItem::InsertPushOrPop
1254 : WorklistItem::InsertLoadOrStore
,
1259 void ShrinkWrapping::moveSaveRestores() {
1260 bool DisablePushPopMode
= false;
1261 bool UsedPushPopMode
= false;
1262 // Keeps info about successfully moved regs: reg index, save position and
1264 std::vector
<std::tuple
<unsigned, MCInst
*, size_t>> MovedRegs
;
1265 uint64_t TotalEstimatedWin
= 0;
1268 for (unsigned I
= 0, E
= BC
.MRI
->getNumRegs(); I
!= E
; ++I
) {
1269 MCInst
*BestPosSave
= nullptr;
1270 uint64_t EstimatedWin
= 0;
1271 SmallVector
<ProgramPoint
, 4> RestorePoints
;
1272 while (RestorePoints
.empty() &&
1273 isBestSavePosCold(I
, BestPosSave
, EstimatedWin
)) {
1274 RestorePoints
= doRestorePlacement(BestPosSave
, I
, EstimatedWin
);
1275 if (RestorePoints
.empty()) {
1277 dbgs() << "Dropping opportunity because restore placement failed"
1278 " -- total est. freq reduc: "
1279 << EstimatedWin
<< ". Will try "
1280 << (BestSaveCount
[I
].size() - 1) << " more times.\n";
1282 BestSaveCount
[I
].pop_back();
1283 BestSavePos
[I
].pop_back();
1287 if (RestorePoints
.empty()) {
1288 SpillsFailedDynamicCount
+= EstimatedWin
;
1292 const FrameIndexEntry
*FIESave
= CSA
.SaveFIEByReg
[I
];
1293 const FrameIndexEntry
*FIELoad
= CSA
.LoadFIEByReg
[I
];
1295 assert(FIESave
&& FIELoad
);
1296 StackPointerTracking
&SPT
= Info
.getStackPointerTracking();
1297 const std::pair
<int, int> SPFP
= *SPT
.getStateBefore(*BestPosSave
);
1298 int SaveOffset
= SPFP
.first
;
1299 uint8_t SaveSize
= FIESave
->Size
;
1301 // If we don't know stack state at this point, bail
1302 if ((SPFP
.first
== SPT
.SUPERPOSITION
|| SPFP
.first
== SPT
.EMPTY
) &&
1303 (SPFP
.second
== SPT
.SUPERPOSITION
|| SPFP
.second
== SPT
.EMPTY
)) {
1304 SpillsFailedDynamicCount
+= EstimatedWin
;
1308 // Operation mode: if true, will insert push/pops instead of loads/restores
1309 bool UsePushPops
= validatePushPopsMode(I
, BestPosSave
, SaveOffset
);
1312 SmallVector
<ProgramPoint
, 4> FixedRestorePoints
=
1313 fixPopsPlacements(RestorePoints
, SaveOffset
, I
);
1314 if (FixedRestorePoints
.empty())
1315 UsePushPops
= false;
1317 RestorePoints
= FixedRestorePoints
;
1320 // Disable push-pop mode for all CSRs in this function
1322 DisablePushPopMode
= true;
1324 UsedPushPopMode
= true;
1326 scheduleOldSaveRestoresRemoval(I
, UsePushPops
);
1327 scheduleSaveRestoreInsertions(I
, BestPosSave
, RestorePoints
, UsePushPops
);
1328 MovedRegs
.emplace_back(std::make_tuple(I
, BestPosSave
, SaveSize
));
1329 TotalEstimatedWin
+= EstimatedWin
;
1332 // Revert push-pop mode if it failed for a single CSR
1333 if (DisablePushPopMode
&& UsedPushPopMode
) {
1334 UsedPushPopMode
= false;
1335 for (BinaryBasicBlock
&BB
: BF
) {
1336 auto WRI
= Todo
.find(&BB
);
1337 if (WRI
!= Todo
.end()) {
1338 std::vector
<WorklistItem
> &TodoList
= WRI
->second
;
1339 for (WorklistItem
&Item
: TodoList
)
1340 if (Item
.Action
== WorklistItem::InsertPushOrPop
)
1341 Item
.Action
= WorklistItem::InsertLoadOrStore
;
1343 for (MCInst
&Inst
: llvm::reverse(BB
)) {
1344 auto TodoList
= BC
.MIB
->tryGetAnnotationAs
<std::vector
<WorklistItem
>>(
1345 Inst
, getAnnotationIndex());
1348 bool isCFI
= BC
.MIB
->isCFI(Inst
);
1349 for (WorklistItem
&Item
: *TodoList
) {
1350 if (Item
.Action
== WorklistItem::InsertPushOrPop
)
1351 Item
.Action
= WorklistItem::InsertLoadOrStore
;
1352 if (!isCFI
&& Item
.Action
== WorklistItem::Erase
)
1353 Item
.Action
= WorklistItem::ChangeToAdjustment
;
1358 SpillsMovedDynamicCount
+= TotalEstimatedWin
;
1360 // Update statistics
1361 if (!UsedPushPopMode
) {
1362 SpillsMovedRegularMode
+= MovedRegs
.size();
1366 // Schedule modifications to stack-accessing instructions via
1367 // StackLayoutModifier.
1368 SpillsMovedPushPopMode
+= MovedRegs
.size();
1369 for (std::tuple
<unsigned, MCInst
*, size_t> &I
: MovedRegs
) {
1373 std::tie(RegNdx
, SavePos
, SaveSize
) = I
;
1374 for (MCInst
*Save
: CSA
.getSavesByReg(RegNdx
))
1375 SLM
.collapseRegion(Save
);
1376 SLM
.insertRegion(SavePos
, SaveSize
);
1381 /// Helper function to identify whether two basic blocks created by splitting
1382 /// a critical edge have the same contents.
1383 bool isIdenticalSplitEdgeBB(const BinaryContext
&BC
, const BinaryBasicBlock
&A
,
1384 const BinaryBasicBlock
&B
) {
1385 if (A
.succ_size() != B
.succ_size())
1387 if (A
.succ_size() != 1)
1390 if (*A
.succ_begin() != *B
.succ_begin())
1393 if (A
.size() != B
.size())
1396 // Compare instructions
1397 auto I
= A
.begin(), E
= A
.end();
1398 auto OtherI
= B
.begin(), OtherE
= B
.end();
1399 while (I
!= E
&& OtherI
!= OtherE
) {
1400 if (I
->getOpcode() != OtherI
->getOpcode())
1402 if (!BC
.MIB
->equals(*I
, *OtherI
, [](const MCSymbol
*A
, const MCSymbol
*B
) {
1413 bool ShrinkWrapping::foldIdenticalSplitEdges() {
1414 bool Changed
= false;
1415 for (auto Iter
= BF
.begin(); Iter
!= BF
.end(); ++Iter
) {
1416 BinaryBasicBlock
&BB
= *Iter
;
1417 if (!BB
.getName().starts_with(".LSplitEdge"))
1419 for (BinaryBasicBlock
&RBB
: llvm::reverse(BF
)) {
1422 if (!RBB
.getName().starts_with(".LSplitEdge") || !RBB
.isValid() ||
1423 !isIdenticalSplitEdgeBB(BC
, *Iter
, RBB
))
1425 assert(RBB
.pred_size() == 1 && "Invalid split edge BB");
1426 BinaryBasicBlock
*Pred
= *RBB
.pred_begin();
1427 uint64_t OrigCount
= Pred
->branch_info_begin()->Count
;
1428 uint64_t OrigMispreds
= Pred
->branch_info_begin()->MispredictedCount
;
1429 BF
.replaceJumpTableEntryIn(Pred
, &RBB
, &BB
);
1430 Pred
->replaceSuccessor(&RBB
, &BB
, OrigCount
, OrigMispreds
);
1432 // Remove the block from CFG
1433 RBB
.markValid(false);
1442 // A special StackPointerTracking that compensates for our future plans
1443 // in removing/adding insn.
1444 class PredictiveStackPointerTracking
1445 : public StackPointerTrackingBase
<PredictiveStackPointerTracking
> {
1446 friend class DataflowAnalysis
<PredictiveStackPointerTracking
,
1447 std::pair
<int, int>>;
1448 decltype(ShrinkWrapping::Todo
) &TodoMap
;
1449 DataflowInfoManager
&Info
;
1451 std::optional
<unsigned> AnnotationIndex
;
1454 void compNextAux(const MCInst
&Point
,
1455 const std::vector
<ShrinkWrapping::WorklistItem
> &TodoItems
,
1456 std::pair
<int, int> &Res
) {
1457 for (const ShrinkWrapping::WorklistItem
&Item
: TodoItems
) {
1458 if (Item
.Action
== ShrinkWrapping::WorklistItem::Erase
&&
1459 BC
.MIB
->isPush(Point
)) {
1460 Res
.first
+= BC
.MIB
->getPushSize(Point
);
1463 if (Item
.Action
== ShrinkWrapping::WorklistItem::Erase
&&
1464 BC
.MIB
->isPop(Point
)) {
1465 Res
.first
-= BC
.MIB
->getPopSize(Point
);
1468 if (Item
.Action
== ShrinkWrapping::WorklistItem::InsertPushOrPop
&&
1469 Item
.FIEToInsert
.IsStore
) {
1470 Res
.first
-= Item
.FIEToInsert
.Size
;
1473 if (Item
.Action
== ShrinkWrapping::WorklistItem::InsertPushOrPop
&&
1474 Item
.FIEToInsert
.IsLoad
) {
1475 Res
.first
+= Item
.FIEToInsert
.Size
;
1481 std::pair
<int, int> computeNext(const MCInst
&Point
,
1482 const std::pair
<int, int> &Cur
) {
1483 std::pair
<int, int> Res
=
1484 StackPointerTrackingBase
<PredictiveStackPointerTracking
>::computeNext(
1486 if (Res
.first
== StackPointerTracking::SUPERPOSITION
||
1487 Res
.first
== StackPointerTracking::EMPTY
)
1490 BC
.MIB
->tryGetAnnotationAs
<std::vector
<ShrinkWrapping::WorklistItem
>>(
1491 Point
, ShrinkWrapping::getAnnotationName());
1493 compNextAux(Point
, *TodoItems
, Res
);
1494 auto &InsnToBBMap
= Info
.getInsnToBBMap();
1495 if (&*InsnToBBMap
[&Point
]->rbegin() != &Point
)
1497 auto WRI
= TodoMap
.find(InsnToBBMap
[&Point
]);
1498 if (WRI
== TodoMap
.end())
1500 compNextAux(Point
, WRI
->second
, Res
);
1504 StringRef
getAnnotationName() const {
1505 return StringRef("PredictiveStackPointerTracking");
1509 PredictiveStackPointerTracking(BinaryFunction
&BF
,
1510 decltype(ShrinkWrapping::Todo
) &TodoMap
,
1511 DataflowInfoManager
&Info
,
1512 MCPlusBuilder::AllocatorIdTy AllocatorId
= 0)
1513 : StackPointerTrackingBase
<PredictiveStackPointerTracking
>(BF
,
1515 TodoMap(TodoMap
), Info(Info
) {}
1518 StackPointerTrackingBase
<PredictiveStackPointerTracking
>::run();
1522 } // end anonymous namespace
1524 void ShrinkWrapping::insertUpdatedCFI(unsigned CSR
, int SPValPush
,
1526 MCInst
*SavePoint
= nullptr;
1527 for (BinaryBasicBlock
&BB
: BF
) {
1528 for (MCInst
&Inst
: llvm::reverse(BB
)) {
1531 MCPhysReg StackPtrReg
= 0;
1532 int64_t StackOffset
= 0;
1533 bool IsIndexed
= false;
1534 bool IsLoad
= false;
1535 bool IsStore
= false;
1536 bool IsSimple
= false;
1537 bool IsStoreFromReg
= false;
1539 if (!BC
.MIB
->isStackAccess(Inst
, IsLoad
, IsStore
, IsStoreFromReg
, Reg
,
1540 SrcImm
, StackPtrReg
, StackOffset
, Size
,
1541 IsSimple
, IsIndexed
))
1543 if (Reg
!= CSR
|| !IsStore
|| !IsSimple
)
1553 dbgs() << "Now using as save point for reg " << CSR
<< " :";
1556 bool PrevAffectedZone
= false;
1557 BinaryBasicBlock
*PrevBB
= nullptr;
1558 DominatorAnalysis
<false> &DA
= Info
.getDominatorAnalysis();
1559 for (BinaryBasicBlock
*BB
: BF
.getLayout().blocks()) {
1560 if (BB
->size() == 0)
1562 const bool InAffectedZoneAtEnd
= DA
.count(*BB
->rbegin(), *SavePoint
);
1563 const bool InAffectedZoneAtBegin
=
1564 (*DA
.getStateBefore(*BB
->begin()))[DA
.ExprToIdx
[SavePoint
]];
1565 bool InAffectedZone
= InAffectedZoneAtBegin
;
1566 for (auto InstIter
= BB
->begin(); InstIter
!= BB
->end(); ++InstIter
) {
1567 const bool CurZone
= DA
.count(*InstIter
, *SavePoint
);
1568 if (InAffectedZone
!= CurZone
) {
1569 auto InsertionIter
= InstIter
;
1571 InAffectedZone
= CurZone
;
1573 InstIter
= insertCFIsForPushOrPop(*BB
, InsertionIter
, CSR
, true, 0,
1576 InstIter
= insertCFIsForPushOrPop(*BB
, InsertionIter
, CSR
, false, 0,
1581 // Are we at the first basic block or hot-cold split point?
1582 if (!PrevBB
|| (BF
.isSplit() && BB
->isCold() != PrevBB
->isCold())) {
1583 if (InAffectedZoneAtBegin
)
1584 insertCFIsForPushOrPop(*BB
, BB
->begin(), CSR
, true, 0, SPValPush
);
1585 } else if (InAffectedZoneAtBegin
!= PrevAffectedZone
) {
1586 if (InAffectedZoneAtBegin
)
1587 insertCFIsForPushOrPop(*PrevBB
, PrevBB
->end(), CSR
, true, 0, SPValPush
);
1589 insertCFIsForPushOrPop(*PrevBB
, PrevBB
->end(), CSR
, false, 0, SPValPop
);
1591 PrevAffectedZone
= InAffectedZoneAtEnd
;
1596 void ShrinkWrapping::rebuildCFIForSP() {
1597 for (BinaryBasicBlock
&BB
: BF
) {
1598 for (MCInst
&Inst
: BB
) {
1599 if (!BC
.MIB
->isCFI(Inst
))
1601 const MCCFIInstruction
*CFI
= BF
.getCFIFor(Inst
);
1602 if (CFI
->getOperation() == MCCFIInstruction::OpDefCfaOffset
)
1603 BC
.MIB
->addAnnotation(Inst
, "DeleteMe", 0U, AllocatorId
);
1608 BinaryBasicBlock
*PrevBB
= nullptr;
1609 StackPointerTracking
&SPT
= Info
.getStackPointerTracking();
1610 for (BinaryBasicBlock
*BB
: BF
.getLayout().blocks()) {
1611 if (BB
->size() == 0)
1613 const int SPValAtEnd
= SPT
.getStateAt(*BB
->rbegin())->first
;
1614 const int SPValAtBegin
= SPT
.getStateBefore(*BB
->begin())->first
;
1615 int SPVal
= SPValAtBegin
;
1616 for (auto Iter
= BB
->begin(); Iter
!= BB
->end(); ++Iter
) {
1617 const int CurVal
= SPT
.getStateAt(*Iter
)->first
;
1618 if (SPVal
!= CurVal
) {
1619 auto InsertionIter
= Iter
;
1621 Iter
= BF
.addCFIInstruction(
1623 MCCFIInstruction::cfiDefCfaOffset(nullptr, -CurVal
));
1627 if (BF
.isSplit() && PrevBB
&& BB
->isCold() != PrevBB
->isCold())
1628 BF
.addCFIInstruction(
1630 MCCFIInstruction::cfiDefCfaOffset(nullptr, -SPValAtBegin
));
1631 else if (SPValAtBegin
!= PrevSPVal
)
1632 BF
.addCFIInstruction(
1633 PrevBB
, PrevBB
->end(),
1634 MCCFIInstruction::cfiDefCfaOffset(nullptr, -SPValAtBegin
));
1635 PrevSPVal
= SPValAtEnd
;
1639 for (BinaryBasicBlock
&BB
: BF
)
1640 for (auto I
= BB
.begin(); I
!= BB
.end();)
1641 if (BC
.MIB
->hasAnnotation(*I
, "DeleteMe"))
1642 I
= BB
.eraseInstruction(I
);
1647 Expected
<MCInst
> ShrinkWrapping::createStackAccess(int SPVal
, int FPVal
,
1648 const FrameIndexEntry
&FIE
,
1649 bool CreatePushOrPop
) {
1651 if (SPVal
!= StackPointerTracking::SUPERPOSITION
&&
1652 SPVal
!= StackPointerTracking::EMPTY
) {
1654 BC
.MIB
->createRestoreFromStack(NewInst
, BC
.MIB
->getStackPointer(),
1655 FIE
.StackOffset
- SPVal
, FIE
.RegOrImm
,
1658 BC
.MIB
->createSaveToStack(NewInst
, BC
.MIB
->getStackPointer(),
1659 FIE
.StackOffset
- SPVal
, FIE
.RegOrImm
,
1662 if (CreatePushOrPop
)
1663 BC
.MIB
->changeToPushOrPop(NewInst
);
1666 assert(FPVal
!= StackPointerTracking::SUPERPOSITION
&&
1667 FPVal
!= StackPointerTracking::EMPTY
);
1670 BC
.MIB
->createRestoreFromStack(NewInst
, BC
.MIB
->getFramePointer(),
1671 FIE
.StackOffset
- FPVal
, FIE
.RegOrImm
,
1674 BC
.MIB
->createSaveToStack(NewInst
, BC
.MIB
->getFramePointer(),
1675 FIE
.StackOffset
- FPVal
, FIE
.RegOrImm
, FIE
.Size
);
1680 void ShrinkWrapping::updateCFIInstOffset(MCInst
&Inst
, int64_t NewOffset
) {
1681 const MCCFIInstruction
*CFI
= BF
.getCFIFor(Inst
);
1682 if (UpdatedCFIs
.count(CFI
))
1685 switch (CFI
->getOperation()) {
1686 case MCCFIInstruction::OpDefCfa
:
1687 case MCCFIInstruction::OpDefCfaRegister
:
1688 case MCCFIInstruction::OpDefCfaOffset
:
1689 CFI
= BF
.mutateCFIOffsetFor(Inst
, -NewOffset
);
1691 case MCCFIInstruction::OpOffset
:
1696 UpdatedCFIs
.insert(CFI
);
1699 BBIterTy
ShrinkWrapping::insertCFIsForPushOrPop(BinaryBasicBlock
&BB
,
1700 BBIterTy Pos
, unsigned Reg
,
1701 bool isPush
, int Sz
,
1702 int64_t NewOffset
) {
1704 for (uint32_t Idx
: DeletedPushCFIs
[Reg
]) {
1705 Pos
= BF
.addCFIPseudo(&BB
, Pos
, Idx
);
1706 updateCFIInstOffset(*Pos
++, NewOffset
);
1708 if (HasDeletedOffsetCFIs
[Reg
]) {
1709 Pos
= BF
.addCFIInstruction(
1711 MCCFIInstruction::createOffset(
1712 nullptr, BC
.MRI
->getDwarfRegNum(Reg
, false), NewOffset
));
1716 for (uint32_t Idx
: DeletedPopCFIs
[Reg
]) {
1717 Pos
= BF
.addCFIPseudo(&BB
, Pos
, Idx
);
1718 updateCFIInstOffset(*Pos
++, NewOffset
);
1720 if (HasDeletedOffsetCFIs
[Reg
]) {
1721 Pos
= BF
.addCFIInstruction(
1723 MCCFIInstruction::createSameValue(
1724 nullptr, BC
.MRI
->getDwarfRegNum(Reg
, false)));
1731 Expected
<BBIterTy
> ShrinkWrapping::processInsertion(BBIterTy InsertionPoint
,
1732 BinaryBasicBlock
*CurBB
,
1733 const WorklistItem
&Item
,
1736 // Trigger CFI reconstruction for this CSR if necessary - writing to
1737 // PushOffsetByReg/PopOffsetByReg *will* trigger CFI update
1738 if ((Item
.FIEToInsert
.IsStore
&&
1739 !DeletedPushCFIs
[Item
.AffectedReg
].empty()) ||
1740 (Item
.FIEToInsert
.IsLoad
&& !DeletedPopCFIs
[Item
.AffectedReg
].empty()) ||
1741 HasDeletedOffsetCFIs
[Item
.AffectedReg
]) {
1742 if (Item
.Action
== WorklistItem::InsertPushOrPop
) {
1743 if (Item
.FIEToInsert
.IsStore
)
1744 PushOffsetByReg
[Item
.AffectedReg
] = SPVal
- Item
.FIEToInsert
.Size
;
1746 PopOffsetByReg
[Item
.AffectedReg
] = SPVal
;
1748 if (Item
.FIEToInsert
.IsStore
)
1749 PushOffsetByReg
[Item
.AffectedReg
] = Item
.FIEToInsert
.StackOffset
;
1751 PopOffsetByReg
[Item
.AffectedReg
] = Item
.FIEToInsert
.StackOffset
;
1756 dbgs() << "Creating stack access with SPVal = " << SPVal
1757 << "; stack offset = " << Item
.FIEToInsert
.StackOffset
1758 << " Is push = " << (Item
.Action
== WorklistItem::InsertPushOrPop
)
1761 Expected
<MCInst
> NewInstOrErr
=
1762 createStackAccess(SPVal
, FPVal
, Item
.FIEToInsert
,
1763 Item
.Action
== WorklistItem::InsertPushOrPop
);
1764 if (auto E
= NewInstOrErr
.takeError())
1765 return Error(std::move(E
));
1766 MCInst
&NewInst
= *NewInstOrErr
;
1767 if (InsertionPoint
!= CurBB
->end()) {
1769 dbgs() << "Adding before Inst: ";
1770 InsertionPoint
->dump();
1771 dbgs() << "the following inst: ";
1775 CurBB
->insertInstruction(InsertionPoint
, std::move(NewInst
));
1778 CurBB
->addInstruction(std::move(NewInst
));
1779 LLVM_DEBUG(dbgs() << "Adding to BB!\n");
1780 return CurBB
->end();
1783 Expected
<BBIterTy
> ShrinkWrapping::processInsertionsList(
1784 BBIterTy InsertionPoint
, BinaryBasicBlock
*CurBB
,
1785 std::vector
<WorklistItem
> &TodoList
, int64_t SPVal
, int64_t FPVal
) {
1786 bool HasInsertions
= llvm::any_of(TodoList
, [&](WorklistItem
&Item
) {
1787 return Item
.Action
== WorklistItem::InsertLoadOrStore
||
1788 Item
.Action
== WorklistItem::InsertPushOrPop
;
1792 return InsertionPoint
;
1794 assert(((SPVal
!= StackPointerTracking::SUPERPOSITION
&&
1795 SPVal
!= StackPointerTracking::EMPTY
) ||
1796 (FPVal
!= StackPointerTracking::SUPERPOSITION
&&
1797 FPVal
!= StackPointerTracking::EMPTY
)) &&
1798 "Cannot insert if we have no idea of the stack state here");
1800 // Revert the effect of PSPT for this location, we want SP Value before
1802 if (InsertionPoint
== CurBB
->end()) {
1803 for (WorklistItem
&Item
: TodoList
) {
1804 if (Item
.Action
!= WorklistItem::InsertPushOrPop
)
1806 if (Item
.FIEToInsert
.IsStore
)
1807 SPVal
+= Item
.FIEToInsert
.Size
;
1808 if (Item
.FIEToInsert
.IsLoad
)
1809 SPVal
-= Item
.FIEToInsert
.Size
;
1813 // Reorder POPs to obey the correct dominance relation between them
1814 llvm::stable_sort(TodoList
, [&](const WorklistItem
&A
,
1815 const WorklistItem
&B
) {
1816 if ((A
.Action
!= WorklistItem::InsertPushOrPop
|| !A
.FIEToInsert
.IsLoad
) &&
1817 (B
.Action
!= WorklistItem::InsertPushOrPop
|| !B
.FIEToInsert
.IsLoad
))
1819 if ((A
.Action
!= WorklistItem::InsertPushOrPop
|| !A
.FIEToInsert
.IsLoad
))
1821 if ((B
.Action
!= WorklistItem::InsertPushOrPop
|| !B
.FIEToInsert
.IsLoad
))
1823 return DomOrder
[B
.AffectedReg
] < DomOrder
[A
.AffectedReg
];
1826 // Process insertions
1827 for (WorklistItem
&Item
: TodoList
) {
1828 if (Item
.Action
== WorklistItem::Erase
||
1829 Item
.Action
== WorklistItem::ChangeToAdjustment
)
1832 auto InsertionPointOrErr
=
1833 processInsertion(InsertionPoint
, CurBB
, Item
, SPVal
, FPVal
);
1834 if (auto E
= InsertionPointOrErr
.takeError())
1835 return Error(std::move(E
));
1836 InsertionPoint
= *InsertionPointOrErr
;
1837 if (Item
.Action
== WorklistItem::InsertPushOrPop
&&
1838 Item
.FIEToInsert
.IsStore
)
1839 SPVal
-= Item
.FIEToInsert
.Size
;
1840 if (Item
.Action
== WorklistItem::InsertPushOrPop
&&
1841 Item
.FIEToInsert
.IsLoad
)
1842 SPVal
+= Item
.FIEToInsert
.Size
;
1844 return InsertionPoint
;
1847 Expected
<bool> ShrinkWrapping::processInsertions() {
1848 PredictiveStackPointerTracking
PSPT(BF
, Todo
, Info
, AllocatorId
);
1851 bool Changes
= false;
1852 for (BinaryBasicBlock
&BB
: BF
) {
1853 // Process insertions before some inst.
1854 for (auto I
= BB
.begin(); I
!= BB
.end(); ++I
) {
1856 auto TodoList
= BC
.MIB
->tryGetAnnotationAs
<std::vector
<WorklistItem
>>(
1857 Inst
, getAnnotationIndex());
1861 std::vector
<WorklistItem
> List
= *TodoList
;
1863 dbgs() << "Now processing insertions in " << BB
.getName()
1864 << " before inst: ";
1868 std::pair
<int, int> SPTState
=
1869 *PSPT
.getStateAt(Iter
== BB
.begin() ? (ProgramPoint
)&BB
: &*(--Iter
));
1871 processInsertionsList(I
, &BB
, List
, SPTState
.first
, SPTState
.second
);
1872 if (auto E
= IterOrErr
.takeError())
1873 return Error(std::move(E
));
1876 // Process insertions at the end of bb
1877 auto WRI
= Todo
.find(&BB
);
1878 if (WRI
!= Todo
.end()) {
1879 std::pair
<int, int> SPTState
= *PSPT
.getStateAt(*BB
.rbegin());
1880 if (auto E
= processInsertionsList(BB
.end(), &BB
, WRI
->second
,
1881 SPTState
.first
, SPTState
.second
)
1883 return Error(std::move(E
));
1890 void ShrinkWrapping::processDeletions() {
1891 LivenessAnalysis
&LA
= Info
.getLivenessAnalysis();
1892 for (BinaryBasicBlock
&BB
: BF
) {
1893 for (auto II
= BB
.begin(); II
!= BB
.end(); ++II
) {
1895 auto TodoList
= BC
.MIB
->tryGetAnnotationAs
<std::vector
<WorklistItem
>>(
1896 Inst
, getAnnotationIndex());
1899 // Process all deletions
1900 for (WorklistItem
&Item
: *TodoList
) {
1901 if (Item
.Action
!= WorklistItem::Erase
&&
1902 Item
.Action
!= WorklistItem::ChangeToAdjustment
)
1905 if (Item
.Action
== WorklistItem::ChangeToAdjustment
) {
1906 // Is flag reg alive across this func?
1907 bool DontClobberFlags
= LA
.isAlive(&Inst
, BC
.MIB
->getFlagsReg());
1908 if (int Sz
= BC
.MIB
->getPushSize(Inst
)) {
1909 BC
.MIB
->createStackPointerIncrement(Inst
, Sz
, DontClobberFlags
);
1912 if (int Sz
= BC
.MIB
->getPopSize(Inst
)) {
1913 BC
.MIB
->createStackPointerDecrement(Inst
, Sz
, DontClobberFlags
);
1919 dbgs() << "Erasing: ";
1920 BC
.printInstruction(dbgs(), Inst
);
1922 II
= std::prev(BB
.eraseInstruction(II
));
1929 void ShrinkWrapping::rebuildCFI() {
1930 const bool FP
= Info
.getStackPointerTracking().HasFramePointer
;
1931 Info
.invalidateAll();
1934 Info
.invalidateAll();
1936 for (unsigned I
= 0, E
= BC
.MRI
->getNumRegs(); I
!= E
; ++I
) {
1937 if (PushOffsetByReg
[I
] == 0 || PopOffsetByReg
[I
] == 0)
1939 const int64_t SPValPush
= PushOffsetByReg
[I
];
1940 const int64_t SPValPop
= PopOffsetByReg
[I
];
1941 insertUpdatedCFI(I
, SPValPush
, SPValPop
);
1942 Info
.invalidateAll();
1946 Expected
<bool> ShrinkWrapping::perform(bool HotOnly
) {
1947 HasDeletedOffsetCFIs
= BitVector(BC
.MRI
->getNumRegs(), false);
1948 PushOffsetByReg
= std::vector
<int64_t>(BC
.MRI
->getNumRegs(), 0LL);
1949 PopOffsetByReg
= std::vector
<int64_t>(BC
.MRI
->getNumRegs(), 0LL);
1951 // Update pass statistics
1952 uint64_t TotalInstrs
= 0ULL;
1953 uint64_t TotalStoreInstrs
= 0ULL;
1954 for (BinaryBasicBlock
*BB
: BF
.getLayout().blocks()) {
1955 uint64_t BBExecCount
= BB
->getExecutionCount();
1956 if (!BBExecCount
|| BBExecCount
== BinaryBasicBlock::COUNT_NO_PROFILE
)
1958 for (const auto &Instr
: *BB
) {
1959 if (BC
.MIB
->isPseudo(Instr
))
1961 if (BC
.MIB
->mayStore(Instr
))
1962 TotalStoreInstrs
+= BBExecCount
;
1963 TotalInstrs
+= BBExecCount
;
1966 InstrDynamicCount
+= TotalInstrs
;
1967 StoreDynamicCount
+= TotalStoreInstrs
;
1969 if (!FA
.hasFrameInfo(BF
))
1972 if (HotOnly
&& (BF
.getKnownExecutionCount() < BC
.getHotThreshold()))
1975 if (opts::EqualizeBBCounts
)
1976 equalizeBBCounts(Info
, BF
);
1978 if (BF
.checkForAmbiguousJumpTables()) {
1979 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ambiguous JTs in " << BF
.getPrintName()
1981 // We could call disambiguateJumpTables here, but it is probably not worth
1982 // the cost (of duplicating potentially large jump tables that could regress
1983 // dcache misses). Moreover, ambiguous JTs are rare and coming from code
1984 // written in assembly language. Just bail.
1990 pruneUnwantedCSRs();
1991 computeSaveLocations();
1994 dbgs() << "Func before shrink-wrapping: \n";
1997 SLM
.performChanges();
1998 // Early exit if processInsertions doesn't detect any todo items
1999 auto ModifiedOrErr
= processInsertions();
2000 if (auto E
= ModifiedOrErr
.takeError())
2001 return Error(std::move(E
));
2002 const bool Modified
= *ModifiedOrErr
;
2006 if (foldIdenticalSplitEdges()) {
2007 const std::pair
<unsigned, uint64_t> Stats
= BF
.eraseInvalidBBs();
2009 LLVM_DEBUG(dbgs() << "Deleted " << Stats
.first
2010 << " redundant split edge BBs (" << Stats
.second
2011 << " bytes) for " << BF
.getPrintName() << "\n");
2014 // We may have split edges, creating BBs that need correct branching
2017 dbgs() << "Func after shrink-wrapping: \n";
2023 void ShrinkWrapping::printStats(BinaryContext
&BC
) {
2024 BC
.outs() << "BOLT-INFO: Shrink wrapping moved " << SpillsMovedRegularMode
2025 << " spills inserting load/stores and " << SpillsMovedPushPopMode
2026 << " spills inserting push/pops\n";
2027 if (!InstrDynamicCount
|| !StoreDynamicCount
)
2029 BC
.outs() << "BOLT-INFO: Shrink wrapping reduced " << SpillsMovedDynamicCount
2030 << " store executions ("
2031 << format("%.1lf%%",
2032 (100.0 * SpillsMovedDynamicCount
/ InstrDynamicCount
))
2033 << " total instructions executed, "
2034 << format("%.1lf%%",
2035 (100.0 * SpillsMovedDynamicCount
/ StoreDynamicCount
))
2036 << " store instructions)\n";
2037 BC
.outs() << "BOLT-INFO: Shrink wrapping failed at reducing "
2038 << SpillsFailedDynamicCount
<< " store executions ("
2039 << format("%.1lf%%",
2040 (100.0 * SpillsFailedDynamicCount
/ InstrDynamicCount
))
2041 << " total instructions executed, "
2042 << format("%.1lf%%",
2043 (100.0 * SpillsFailedDynamicCount
/ StoreDynamicCount
))
2044 << " store instructions)\n";
2047 // Operators necessary as a result of using MCAnnotation
2048 raw_ostream
&operator<<(raw_ostream
&OS
,
2049 const std::vector
<ShrinkWrapping::WorklistItem
> &Vec
) {
2051 const char *Sep
= "";
2052 for (const ShrinkWrapping::WorklistItem
&Item
: Vec
) {
2054 switch (Item
.Action
) {
2055 case ShrinkWrapping::WorklistItem::Erase
:
2058 case ShrinkWrapping::WorklistItem::ChangeToAdjustment
:
2059 OS
<< "ChangeToAdjustment";
2061 case ShrinkWrapping::WorklistItem::InsertLoadOrStore
:
2062 OS
<< "InsertLoadOrStore";
2064 case ShrinkWrapping::WorklistItem::InsertPushOrPop
:
2065 OS
<< "InsertPushOrPop";
2075 operator<<(raw_ostream
&OS
,
2076 const std::vector
<StackLayoutModifier::WorklistItem
> &Vec
) {
2078 const char *Sep
= "";
2079 for (const StackLayoutModifier::WorklistItem
&Item
: Vec
) {
2081 switch (Item
.Action
) {
2082 case StackLayoutModifier::WorklistItem::None
:
2085 case StackLayoutModifier::WorklistItem::AdjustLoadStoreOffset
:
2086 OS
<< "AdjustLoadStoreOffset";
2088 case StackLayoutModifier::WorklistItem::AdjustCFI
:
2098 bool operator==(const ShrinkWrapping::WorklistItem
&A
,
2099 const ShrinkWrapping::WorklistItem
&B
) {
2100 return (A
.Action
== B
.Action
&& A
.AffectedReg
== B
.AffectedReg
&&
2101 A
.Adjustment
== B
.Adjustment
&&
2102 A
.FIEToInsert
.IsLoad
== B
.FIEToInsert
.IsLoad
&&
2103 A
.FIEToInsert
.IsStore
== B
.FIEToInsert
.IsStore
&&
2104 A
.FIEToInsert
.RegOrImm
== B
.FIEToInsert
.RegOrImm
&&
2105 A
.FIEToInsert
.Size
== B
.FIEToInsert
.Size
&&
2106 A
.FIEToInsert
.IsSimple
== B
.FIEToInsert
.IsSimple
&&
2107 A
.FIEToInsert
.StackOffset
== B
.FIEToInsert
.StackOffset
);
2110 bool operator==(const StackLayoutModifier::WorklistItem
&A
,
2111 const StackLayoutModifier::WorklistItem
&B
) {
2112 return (A
.Action
== B
.Action
&& A
.OffsetUpdate
== B
.OffsetUpdate
);
2115 } // end namespace bolt
2116 } // end namespace llvm