1 //===- bolt/Passes/ShrinkWrapping.cpp -------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the ShrinkWrapping class.
11 //===----------------------------------------------------------------------===//
13 #include "bolt/Passes/ShrinkWrapping.h"
14 #include "bolt/Core/MCPlus.h"
15 #include "bolt/Passes/DataflowInfoManager.h"
16 #include "bolt/Passes/MCF.h"
17 #include "bolt/Utils/CommandLineOpts.h"
22 #define DEBUG_TYPE "shrinkwrapping"
28 extern cl::opt
<bool> TimeOpts
;
29 extern cl::OptionCategory BoltOptCategory
;
31 static cl::opt
<unsigned> ShrinkWrappingThreshold(
32 "shrink-wrapping-threshold",
33 cl::desc("Percentage of prologue execution count to use as threshold when"
34 " evaluating whether a block is cold enough to be profitable to"
35 " move eligible spills there"),
36 cl::init(30), cl::ZeroOrMore
, cl::cat(BoltOptCategory
));
42 void CalleeSavedAnalysis::analyzeSaves() {
43 ReachingDefOrUse
</*Def=*/true> &RD
= Info
.getReachingDefs();
44 StackReachingUses
&SRU
= Info
.getStackReachingUses();
45 auto &InsnToBB
= Info
.getInsnToBBMap();
46 BitVector
BlacklistedRegs(BC
.MRI
->getNumRegs(), false);
48 LLVM_DEBUG(dbgs() << "Checking spill locations\n");
49 for (BinaryBasicBlock
&BB
: BF
) {
50 LLVM_DEBUG(dbgs() << "\tNow at BB " << BB
.getName() << "\n");
51 const MCInst
*Prev
= nullptr;
52 for (MCInst
&Inst
: BB
) {
53 if (ErrorOr
<const FrameIndexEntry
&> FIE
= FA
.getFIEFor(Inst
)) {
54 // Blacklist weird stores we don't understand
55 if ((!FIE
->IsSimple
|| FIE
->StackOffset
>= 0) && FIE
->IsStore
&&
56 FIE
->IsStoreFromReg
) {
57 BlacklistedRegs
.set(FIE
->RegOrImm
);
58 CalleeSaved
.reset(FIE
->RegOrImm
);
63 if (!FIE
->IsStore
|| !FIE
->IsStoreFromReg
||
64 BlacklistedRegs
[FIE
->RegOrImm
]) {
69 // If this reg is defined locally, it is not a callee-saved reg
70 if (RD
.isReachedBy(FIE
->RegOrImm
,
71 Prev
? RD
.expr_begin(*Prev
) : RD
.expr_begin(BB
))) {
72 BlacklistedRegs
.set(FIE
->RegOrImm
);
73 CalleeSaved
.reset(FIE
->RegOrImm
);
78 // If this stack position is accessed in another function, we are
79 // probably dealing with a parameter passed in a stack -- do not mess
81 if (SRU
.isStoreUsed(*FIE
,
82 Prev
? SRU
.expr_begin(*Prev
) : SRU
.expr_begin(BB
)),
83 /*IncludeLocalAccesses=*/false) {
84 BlacklistedRegs
.set(FIE
->RegOrImm
);
85 CalleeSaved
.reset(FIE
->RegOrImm
);
90 // If this stack position is loaded elsewhere in another reg, we can't
91 // update it, so blacklist it.
92 if (SRU
.isLoadedInDifferentReg(*FIE
, Prev
? SRU
.expr_begin(*Prev
)
93 : SRU
.expr_begin(BB
))) {
94 BlacklistedRegs
.set(FIE
->RegOrImm
);
95 CalleeSaved
.reset(FIE
->RegOrImm
);
100 // Ignore regs with multiple saves
101 if (CalleeSaved
[FIE
->RegOrImm
]) {
102 BlacklistedRegs
.set(FIE
->RegOrImm
);
103 CalleeSaved
.reset(FIE
->RegOrImm
);
108 CalleeSaved
.set(FIE
->RegOrImm
);
109 SaveFIEByReg
[FIE
->RegOrImm
] = &*FIE
;
110 SavingCost
[FIE
->RegOrImm
] += InsnToBB
[&Inst
]->getKnownExecutionCount();
111 BC
.MIB
->addAnnotation(Inst
, getSaveTag(), FIE
->RegOrImm
, AllocatorId
);
112 OffsetsByReg
[FIE
->RegOrImm
] = FIE
->StackOffset
;
113 LLVM_DEBUG(dbgs() << "Logging new candidate for Callee-Saved Reg: "
114 << FIE
->RegOrImm
<< "\n");
121 void CalleeSavedAnalysis::analyzeRestores() {
122 ReachingDefOrUse
</*Def=*/false> &RU
= Info
.getReachingUses();
124 // Now compute all restores of these callee-saved regs
125 for (BinaryBasicBlock
&BB
: BF
) {
126 const MCInst
*Prev
= nullptr;
127 for (MCInst
&Inst
: llvm::reverse(BB
)) {
128 if (ErrorOr
<const FrameIndexEntry
&> FIE
= FA
.getFIEFor(Inst
)) {
129 if (!FIE
->IsLoad
|| !CalleeSaved
[FIE
->RegOrImm
]) {
134 // If this reg is used locally after a restore, then we are probably
135 // not dealing with a callee-saved reg. Except if this use is by
136 // another store, but we don't cover this case yet.
137 // Also not callee-saved if this load accesses caller stack or isn't
139 if (!FIE
->IsSimple
|| FIE
->StackOffset
>= 0 ||
140 RU
.isReachedBy(FIE
->RegOrImm
,
141 Prev
? RU
.expr_begin(*Prev
) : RU
.expr_begin(BB
))) {
142 CalleeSaved
.reset(FIE
->RegOrImm
);
146 // If stack offsets between saves/store don't agree with each other,
147 // we don't completely understand what's happening here
148 if (FIE
->StackOffset
!= OffsetsByReg
[FIE
->RegOrImm
]) {
149 CalleeSaved
.reset(FIE
->RegOrImm
);
150 LLVM_DEBUG(dbgs() << "Dismissing Callee-Saved Reg because we found a "
151 "mismatching restore: "
152 << FIE
->RegOrImm
<< "\n");
157 LLVM_DEBUG(dbgs() << "Adding matching restore for: " << FIE
->RegOrImm
159 if (LoadFIEByReg
[FIE
->RegOrImm
] == nullptr)
160 LoadFIEByReg
[FIE
->RegOrImm
] = &*FIE
;
161 BC
.MIB
->addAnnotation(Inst
, getRestoreTag(), FIE
->RegOrImm
,
163 HasRestores
.set(FIE
->RegOrImm
);
170 std::vector
<MCInst
*> CalleeSavedAnalysis::getSavesByReg(uint16_t Reg
) {
171 std::vector
<MCInst
*> Results
;
172 for (BinaryBasicBlock
&BB
: BF
)
173 for (MCInst
&Inst
: BB
)
174 if (getSavedReg(Inst
) == Reg
)
175 Results
.push_back(&Inst
);
179 std::vector
<MCInst
*> CalleeSavedAnalysis::getRestoresByReg(uint16_t Reg
) {
180 std::vector
<MCInst
*> Results
;
181 for (BinaryBasicBlock
&BB
: BF
)
182 for (MCInst
&Inst
: BB
)
183 if (getRestoredReg(Inst
) == Reg
)
184 Results
.push_back(&Inst
);
188 CalleeSavedAnalysis::~CalleeSavedAnalysis() {
189 for (BinaryBasicBlock
&BB
: BF
) {
190 for (MCInst
&Inst
: BB
) {
191 BC
.MIB
->removeAnnotation(Inst
, getSaveTag());
192 BC
.MIB
->removeAnnotation(Inst
, getRestoreTag());
197 void StackLayoutModifier::blacklistRegion(int64_t Offset
, int64_t Size
) {
198 if (BlacklistedRegions
[Offset
] < Size
)
199 BlacklistedRegions
[Offset
] = Size
;
202 bool StackLayoutModifier::isRegionBlacklisted(int64_t Offset
, int64_t Size
) {
203 for (std::pair
<const int64_t, int64_t> Elem
: BlacklistedRegions
)
204 if (Offset
+ Size
> Elem
.first
&& Offset
< Elem
.first
+ Elem
.second
)
209 bool StackLayoutModifier::blacklistAllInConflictWith(int64_t Offset
,
211 bool HasConflict
= false;
212 for (auto Iter
= AvailableRegions
.begin(); Iter
!= AvailableRegions
.end();) {
213 std::pair
<const int64_t, int64_t> &Elem
= *Iter
;
214 if (Offset
+ Size
> Elem
.first
&& Offset
< Elem
.first
+ Elem
.second
&&
215 (Offset
!= Elem
.first
|| Size
!= Elem
.second
)) {
216 Iter
= AvailableRegions
.erase(Iter
);
223 blacklistRegion(Offset
, Size
);
229 void StackLayoutModifier::checkFramePointerInitialization(MCInst
&Point
) {
230 StackPointerTracking
&SPT
= Info
.getStackPointerTracking();
231 if (!BC
.MII
->get(Point
.getOpcode())
232 .hasDefOfPhysReg(Point
, BC
.MIB
->getFramePointer(), *BC
.MRI
))
236 std::tie(SPVal
, FPVal
) = *SPT
.getStateBefore(Point
);
237 std::pair
<MCPhysReg
, int64_t> FP
;
239 if (FPVal
!= SPT
.EMPTY
&& FPVal
!= SPT
.SUPERPOSITION
)
240 FP
= std::make_pair(BC
.MIB
->getFramePointer(), FPVal
);
242 FP
= std::make_pair(0, 0);
243 std::pair
<MCPhysReg
, int64_t> SP
;
245 if (SPVal
!= SPT
.EMPTY
&& SPVal
!= SPT
.SUPERPOSITION
)
246 SP
= std::make_pair(BC
.MIB
->getStackPointer(), SPVal
);
248 SP
= std::make_pair(0, 0);
251 if (!BC
.MIB
->evaluateStackOffsetExpr(Point
, Output
, SP
, FP
))
254 // Not your regular frame pointer initialization... bail
256 blacklistRegion(0, 0);
259 void StackLayoutModifier::checkStackPointerRestore(MCInst
&Point
) {
260 StackPointerTracking
&SPT
= Info
.getStackPointerTracking();
261 if (!BC
.MII
->get(Point
.getOpcode())
262 .hasDefOfPhysReg(Point
, BC
.MIB
->getStackPointer(), *BC
.MRI
))
264 // Check if the definition of SP comes from FP -- in this case, this
265 // value may need to be updated depending on our stack layout changes
266 bool UsesFP
= llvm::any_of(BC
.MIB
->useOperands(Point
), [&](MCOperand
&Op
) {
267 return Op
.isReg() && Op
.getReg() == BC
.MIB
->getFramePointer();
272 // Setting up evaluation
274 std::tie(SPVal
, FPVal
) = *SPT
.getStateBefore(Point
);
275 std::pair
<MCPhysReg
, int64_t> FP
;
277 if (FPVal
!= SPT
.EMPTY
&& FPVal
!= SPT
.SUPERPOSITION
)
278 FP
= std::make_pair(BC
.MIB
->getFramePointer(), FPVal
);
280 FP
= std::make_pair(0, 0);
281 std::pair
<MCPhysReg
, int64_t> SP
;
283 if (SPVal
!= SPT
.EMPTY
&& SPVal
!= SPT
.SUPERPOSITION
)
284 SP
= std::make_pair(BC
.MIB
->getStackPointer(), SPVal
);
286 SP
= std::make_pair(0, 0);
289 if (!BC
.MIB
->evaluateStackOffsetExpr(Point
, Output
, SP
, FP
))
292 // If the value is the same of FP, no need to adjust it
296 // If an allocation happened through FP, bail
297 if (Output
<= SPVal
) {
298 blacklistRegion(0, 0);
302 // We are restoring SP to an old value based on FP. Mark it as a stack
303 // access to be fixed later.
304 BC
.MIB
->addAnnotation(Point
, getSlotTag(), Output
, AllocatorId
);
307 void StackLayoutModifier::classifyStackAccesses() {
308 // Understand when stack slots are being used non-locally
309 StackReachingUses
&SRU
= Info
.getStackReachingUses();
311 for (BinaryBasicBlock
&BB
: BF
) {
312 const MCInst
*Prev
= nullptr;
313 for (MCInst
&Inst
: llvm::reverse(BB
)) {
314 checkFramePointerInitialization(Inst
);
315 checkStackPointerRestore(Inst
);
316 ErrorOr
<const FrameIndexEntry
&> FIEX
= FA
.getFIEFor(Inst
);
321 if (!FIEX
->IsSimple
|| (FIEX
->IsStore
&& !FIEX
->IsStoreFromReg
)) {
322 blacklistRegion(FIEX
->StackOffset
, FIEX
->Size
);
326 // If this stack position is accessed in another function, we are
327 // probably dealing with a parameter passed in a stack -- do not mess
329 if (SRU
.isStoreUsed(*FIEX
,
330 Prev
? SRU
.expr_begin(*Prev
) : SRU
.expr_begin(BB
),
331 /*IncludeLocalAccesses=*/false)) {
332 blacklistRegion(FIEX
->StackOffset
, FIEX
->Size
);
336 // Now we have a clear stack slot access. Check if its blacklisted or if
337 // it conflicts with another chunk.
338 if (isRegionBlacklisted(FIEX
->StackOffset
, FIEX
->Size
) ||
339 blacklistAllInConflictWith(FIEX
->StackOffset
, FIEX
->Size
)) {
343 // We are free to go. Add it as available stack slot which we know how
345 AvailableRegions
[FIEX
->StackOffset
] = FIEX
->Size
;
346 BC
.MIB
->addAnnotation(Inst
, getSlotTag(), FIEX
->StackOffset
, AllocatorId
);
347 RegionToRegMap
[FIEX
->StackOffset
].insert(FIEX
->RegOrImm
);
348 RegToRegionMap
[FIEX
->RegOrImm
].insert(FIEX
->StackOffset
);
349 LLVM_DEBUG(dbgs() << "Adding region " << FIEX
->StackOffset
<< " size "
350 << (int)FIEX
->Size
<< "\n");
355 void StackLayoutModifier::classifyCFIs() {
356 std::stack
<std::pair
<int64_t, uint16_t>> CFIStack
;
357 int64_t CfaOffset
= -8;
360 auto recordAccess
= [&](MCInst
*Inst
, int64_t Offset
) {
361 const uint16_t Reg
= *BC
.MRI
->getLLVMRegNum(CfaReg
, /*isEH=*/false);
362 if (Reg
== BC
.MIB
->getStackPointer() || Reg
== BC
.MIB
->getFramePointer()) {
363 BC
.MIB
->addAnnotation(*Inst
, getSlotTag(), Offset
, AllocatorId
);
364 LLVM_DEBUG(dbgs() << "Recording CFI " << Offset
<< "\n");
371 for (BinaryBasicBlock
*BB
: BF
.getLayout().blocks()) {
372 for (MCInst
&Inst
: *BB
) {
373 if (!BC
.MIB
->isCFI(Inst
))
375 const MCCFIInstruction
*CFI
= BF
.getCFIFor(Inst
);
376 switch (CFI
->getOperation()) {
377 case MCCFIInstruction::OpDefCfa
:
378 CfaOffset
= -CFI
->getOffset();
379 recordAccess(&Inst
, CfaOffset
);
381 case MCCFIInstruction::OpDefCfaRegister
:
382 CfaReg
= CFI
->getRegister();
384 case MCCFIInstruction::OpDefCfaOffset
:
385 CfaOffset
= -CFI
->getOffset();
386 recordAccess(&Inst
, CfaOffset
);
388 case MCCFIInstruction::OpOffset
:
389 recordAccess(&Inst
, CFI
->getOffset());
390 BC
.MIB
->addAnnotation(Inst
, getOffsetCFIRegTag(),
391 BC
.MRI
->getLLVMRegNum(CFI
->getRegister(),
395 case MCCFIInstruction::OpSameValue
:
396 BC
.MIB
->addAnnotation(Inst
, getOffsetCFIRegTag(),
397 BC
.MRI
->getLLVMRegNum(CFI
->getRegister(),
401 case MCCFIInstruction::OpRememberState
:
402 CFIStack
.push(std::make_pair(CfaOffset
, CfaReg
));
404 case MCCFIInstruction::OpRestoreState
: {
405 assert(!CFIStack
.empty() && "Corrupt CFI stack");
406 std::pair
<int64_t, uint16_t> &Elem
= CFIStack
.top();
408 CfaOffset
= Elem
.first
;
409 CfaReg
= Elem
.second
;
412 case MCCFIInstruction::OpRelOffset
:
413 case MCCFIInstruction::OpAdjustCfaOffset
:
414 llvm_unreachable("Unhandled AdjustCfaOffset");
423 void StackLayoutModifier::scheduleChange(
424 MCInst
&Inst
, StackLayoutModifier::WorklistItem Item
) {
425 auto &WList
= BC
.MIB
->getOrCreateAnnotationAs
<std::vector
<WorklistItem
>>(
426 Inst
, getTodoTag(), AllocatorId
);
427 WList
.push_back(Item
);
430 bool StackLayoutModifier::canCollapseRegion(MCInst
*DeletedPush
) {
431 if (!IsSimple
|| !BC
.MIB
->isPush(*DeletedPush
))
434 ErrorOr
<const FrameIndexEntry
&> FIE
= FA
.getFIEFor(*DeletedPush
);
438 return canCollapseRegion(FIE
->StackOffset
);
441 bool StackLayoutModifier::canCollapseRegion(int64_t RegionAddr
) {
447 if (CollapsedRegions
.count(RegionAddr
))
450 // Check if it is possible to readjust all accesses below RegionAddr
451 if (!BlacklistedRegions
.empty())
457 bool StackLayoutModifier::collapseRegion(MCInst
*DeletedPush
) {
458 ErrorOr
<const FrameIndexEntry
&> FIE
= FA
.getFIEFor(*DeletedPush
);
461 int64_t RegionAddr
= FIE
->StackOffset
;
462 int64_t RegionSz
= FIE
->Size
;
463 return collapseRegion(DeletedPush
, RegionAddr
, RegionSz
);
466 bool StackLayoutModifier::collapseRegion(MCInst
*Alloc
, int64_t RegionAddr
,
468 if (!canCollapseRegion(RegionAddr
))
471 assert(IsInitialized
);
472 StackAllocationAnalysis
&SAA
= Info
.getStackAllocationAnalysis();
474 for (BinaryBasicBlock
&BB
: BF
) {
475 for (MCInst
&Inst
: BB
) {
476 if (!BC
.MIB
->hasAnnotation(Inst
, getSlotTag()))
479 BC
.MIB
->getAnnotationAs
<decltype(FrameIndexEntry::StackOffset
)>(
481 if (!AvailableRegions
.count(Slot
))
483 // We need to ensure this access is affected by the deleted push
484 if (!(*SAA
.getStateBefore(Inst
))[SAA
.ExprToIdx
[Alloc
]])
487 if (BC
.MIB
->isCFI(Inst
)) {
488 if (Slot
> RegionAddr
)
490 scheduleChange(Inst
, WorklistItem(WorklistItem::AdjustCFI
, RegionSz
));
493 ErrorOr
<const FrameIndexEntry
&> FIE
= FA
.getFIEFor(Inst
);
495 if (Slot
> RegionAddr
)
497 // SP update based on frame pointer
499 Inst
, WorklistItem(WorklistItem::AdjustLoadStoreOffset
, RegionSz
));
503 if (Slot
== RegionAddr
) {
504 BC
.MIB
->addAnnotation(Inst
, "AccessesDeletedPos", 0U, AllocatorId
);
507 if (BC
.MIB
->isPush(Inst
) || BC
.MIB
->isPop(Inst
))
510 if (FIE
->StackPtrReg
== BC
.MIB
->getStackPointer() && Slot
< RegionAddr
)
513 if (FIE
->StackPtrReg
== BC
.MIB
->getFramePointer() && Slot
> RegionAddr
)
517 Inst
, WorklistItem(WorklistItem::AdjustLoadStoreOffset
, RegionSz
));
521 CollapsedRegions
.insert(RegionAddr
);
525 void StackLayoutModifier::setOffsetForCollapsedAccesses(int64_t NewOffset
) {
526 for (BinaryBasicBlock
&BB
: BF
) {
527 for (MCInst
&Inst
: BB
) {
528 if (!BC
.MIB
->hasAnnotation(Inst
, "AccessesDeletedPos"))
530 BC
.MIB
->removeAnnotation(Inst
, "AccessesDeletedPos");
532 Inst
, WorklistItem(WorklistItem::AdjustLoadStoreOffset
, NewOffset
));
537 bool StackLayoutModifier::canInsertRegion(ProgramPoint P
) {
543 StackPointerTracking
&SPT
= Info
.getStackPointerTracking();
544 int64_t RegionAddr
= SPT
.getStateBefore(P
)->first
;
545 if (RegionAddr
== SPT
.SUPERPOSITION
|| RegionAddr
== SPT
.EMPTY
)
548 if (InsertedRegions
.count(RegionAddr
))
551 // Check if we are going to screw up stack accesses at call sites that
552 // pass parameters via stack
553 if (!BlacklistedRegions
.empty())
559 bool StackLayoutModifier::insertRegion(ProgramPoint P
, int64_t RegionSz
) {
560 if (!canInsertRegion(P
))
563 assert(IsInitialized
);
564 StackPointerTracking
&SPT
= Info
.getStackPointerTracking();
565 // This RegionAddr is slightly different from the one seen in collapseRegion
566 // This is the value of SP before the allocation the user wants to make.
567 int64_t RegionAddr
= SPT
.getStateBefore(P
)->first
;
568 if (RegionAddr
== SPT
.SUPERPOSITION
|| RegionAddr
== SPT
.EMPTY
)
571 DominatorAnalysis
<false> &DA
= Info
.getDominatorAnalysis();
573 for (BinaryBasicBlock
&BB
: BF
) {
574 for (MCInst
&Inst
: BB
) {
575 if (!BC
.MIB
->hasAnnotation(Inst
, getSlotTag()))
578 BC
.MIB
->getAnnotationAs
<decltype(FrameIndexEntry::StackOffset
)>(
580 if (!AvailableRegions
.count(Slot
))
583 if (!(DA
.doesADominateB(P
, Inst
)))
586 if (BC
.MIB
->isCFI(Inst
)) {
587 if (Slot
>= RegionAddr
)
589 scheduleChange(Inst
, WorklistItem(WorklistItem::AdjustCFI
, -RegionSz
));
592 ErrorOr
<const FrameIndexEntry
&> FIE
= FA
.getFIEFor(Inst
);
594 if (Slot
>= RegionAddr
)
597 Inst
, WorklistItem(WorklistItem::AdjustLoadStoreOffset
, -RegionSz
));
601 if (FIE
->StackPtrReg
== BC
.MIB
->getStackPointer() && Slot
< RegionAddr
)
603 if (FIE
->StackPtrReg
== BC
.MIB
->getFramePointer() && Slot
>= RegionAddr
)
605 if (BC
.MIB
->isPush(Inst
) || BC
.MIB
->isPop(Inst
))
608 Inst
, WorklistItem(WorklistItem::AdjustLoadStoreOffset
, -RegionSz
));
612 InsertedRegions
.insert(RegionAddr
);
616 void StackLayoutModifier::performChanges() {
617 std::set
<uint32_t> ModifiedCFIIndices
;
618 for (BinaryBasicBlock
&BB
: BF
) {
619 for (MCInst
&Inst
: llvm::reverse(BB
)) {
620 if (BC
.MIB
->hasAnnotation(Inst
, "AccessesDeletedPos")) {
621 assert(BC
.MIB
->isPop(Inst
) || BC
.MIB
->isPush(Inst
));
622 BC
.MIB
->removeAnnotation(Inst
, "AccessesDeletedPos");
624 if (!BC
.MIB
->hasAnnotation(Inst
, getTodoTag()))
626 auto &WList
= BC
.MIB
->getAnnotationAs
<std::vector
<WorklistItem
>>(
628 int64_t Adjustment
= 0;
629 WorklistItem::ActionType AdjustmentType
= WorklistItem::None
;
630 for (WorklistItem
&WI
: WList
) {
631 if (WI
.Action
== WorklistItem::None
)
633 assert(WI
.Action
== WorklistItem::AdjustLoadStoreOffset
||
634 WI
.Action
== WorklistItem::AdjustCFI
);
635 assert((AdjustmentType
== WorklistItem::None
||
636 AdjustmentType
== WI
.Action
) &&
637 "Conflicting actions requested at the same program point");
638 AdjustmentType
= WI
.Action
;
639 Adjustment
+= WI
.OffsetUpdate
;
643 if (AdjustmentType
!= WorklistItem::AdjustLoadStoreOffset
) {
644 assert(BC
.MIB
->isCFI(Inst
));
645 uint32_t CFINum
= Inst
.getOperand(0).getImm();
646 if (ModifiedCFIIndices
.count(CFINum
))
648 ModifiedCFIIndices
.insert(CFINum
);
649 const MCCFIInstruction
*CFI
= BF
.getCFIFor(Inst
);
650 const MCCFIInstruction::OpType Operation
= CFI
->getOperation();
651 if (Operation
== MCCFIInstruction::OpDefCfa
||
652 Operation
== MCCFIInstruction::OpDefCfaOffset
)
653 Adjustment
= 0 - Adjustment
;
654 LLVM_DEBUG(dbgs() << "Changing CFI offset from " << CFI
->getOffset()
655 << " to " << (CFI
->getOffset() + Adjustment
) << "\n");
656 BF
.mutateCFIOffsetFor(Inst
, CFI
->getOffset() + Adjustment
);
661 MCPhysReg StackPtrReg
= 0;
662 int64_t StackOffset
= 0;
663 bool IsIndexed
= false;
665 bool IsStore
= false;
666 bool IsSimple
= false;
667 bool IsStoreFromReg
= false;
669 bool Success
= false;
670 Success
= BC
.MIB
->isStackAccess(Inst
, IsLoad
, IsStore
, IsStoreFromReg
,
671 Reg
, SrcImm
, StackPtrReg
, StackOffset
,
672 Size
, IsSimple
, IsIndexed
);
674 // SP update based on FP value
675 Success
= BC
.MIB
->addToImm(Inst
, Adjustment
, &*BC
.Ctx
);
679 assert(Success
&& IsSimple
&& !IsIndexed
&& (!IsStore
|| IsStoreFromReg
));
680 if (StackPtrReg
!= BC
.MIB
->getFramePointer())
681 Adjustment
= -Adjustment
;
683 Success
= BC
.MIB
->createRestoreFromStack(
684 Inst
, StackPtrReg
, StackOffset
+ Adjustment
, Reg
, Size
);
686 Success
= BC
.MIB
->createSaveToStack(
687 Inst
, StackPtrReg
, StackOffset
+ Adjustment
, Reg
, Size
);
689 dbgs() << "Adjusted instruction: ";
697 void StackLayoutModifier::initialize() {
698 classifyStackAccesses();
700 IsInitialized
= true;
703 std::atomic
<std::uint64_t> ShrinkWrapping::SpillsMovedRegularMode
{0};
704 std::atomic
<std::uint64_t> ShrinkWrapping::SpillsMovedPushPopMode
{0};
705 std::atomic
<std::uint64_t> ShrinkWrapping::SpillsMovedDynamicCount
{0};
706 std::atomic
<std::uint64_t> ShrinkWrapping::SpillsFailedDynamicCount
{0};
707 std::atomic
<std::uint64_t> ShrinkWrapping::InstrDynamicCount
{0};
708 std::atomic
<std::uint64_t> ShrinkWrapping::StoreDynamicCount
{0};
710 using BBIterTy
= BinaryBasicBlock::iterator
;
712 void ShrinkWrapping::classifyCSRUses() {
713 DominatorAnalysis
<false> &DA
= Info
.getDominatorAnalysis();
714 StackPointerTracking
&SPT
= Info
.getStackPointerTracking();
715 UsesByReg
= std::vector
<BitVector
>(BC
.MRI
->getNumRegs(),
716 BitVector(DA
.NumInstrs
, false));
718 const BitVector
&FPAliases
= BC
.MIB
->getAliases(BC
.MIB
->getFramePointer());
719 for (BinaryBasicBlock
&BB
: BF
) {
720 for (MCInst
&Inst
: BB
) {
721 if (BC
.MIB
->isCFI(Inst
))
723 BitVector BV
= BitVector(BC
.MRI
->getNumRegs(), false);
724 BC
.MIB
->getTouchedRegs(Inst
, BV
);
725 BV
&= CSA
.CalleeSaved
;
726 for (int I
: BV
.set_bits()) {
729 if (CSA
.getSavedReg(Inst
) != I
&& CSA
.getRestoredReg(Inst
) != I
)
730 UsesByReg
[I
].set(DA
.ExprToIdx
[&Inst
]);
732 if (!SPT
.HasFramePointer
|| !BC
.MIB
->isCall(Inst
))
734 BV
= CSA
.CalleeSaved
;
736 for (int I
: BV
.set_bits())
737 UsesByReg
[I
].set(DA
.ExprToIdx
[&Inst
]);
742 void ShrinkWrapping::pruneUnwantedCSRs() {
743 BitVector ParamRegs
= BC
.MIB
->getRegsUsedAsParams();
744 for (unsigned I
= 0, E
= BC
.MRI
->getNumRegs(); I
!= E
; ++I
) {
745 if (!CSA
.CalleeSaved
[I
])
748 CSA
.CalleeSaved
.reset(I
);
751 if (UsesByReg
[I
].empty()) {
754 << "Dismissing Callee-Saved Reg because we found no uses of it:" << I
756 CSA
.CalleeSaved
.reset(I
);
759 if (!CSA
.HasRestores
[I
]) {
761 dbgs() << "Dismissing Callee-Saved Reg because it does not have "
764 CSA
.CalleeSaved
.reset(I
);
769 void ShrinkWrapping::computeSaveLocations() {
770 BestSavePos
= std::vector
<std::vector
<MCInst
*>>(BC
.MRI
->getNumRegs());
771 ReachingInsns
<true> &RI
= Info
.getReachingInsnsBackwards();
772 DominatorAnalysis
<false> &DA
= Info
.getDominatorAnalysis();
773 StackPointerTracking
&SPT
= Info
.getStackPointerTracking();
775 LLVM_DEBUG(dbgs() << "Checking save/restore possibilities\n");
776 for (BinaryBasicBlock
&BB
: BF
) {
777 LLVM_DEBUG(dbgs() << "\tNow at BB " << BB
.getName() << "\n");
779 MCInst
*First
= BB
.begin() != BB
.end() ? &*BB
.begin() : nullptr;
783 // Use reaching instructions to detect if we are inside a loop - if we
784 // are, do not consider this BB as valid placement for saves.
788 const std::pair
<int, int> SPFP
= *SPT
.getStateBefore(*First
);
789 // If we don't know stack state at this point, bail
790 if ((SPFP
.first
== SPT
.SUPERPOSITION
|| SPFP
.first
== SPT
.EMPTY
) &&
791 (SPFP
.second
== SPT
.SUPERPOSITION
|| SPFP
.second
== SPT
.EMPTY
))
794 for (unsigned I
= 0, E
= BC
.MRI
->getNumRegs(); I
!= E
; ++I
) {
795 if (!CSA
.CalleeSaved
[I
])
798 BitVector BBDominatedUses
= BitVector(DA
.NumInstrs
, false);
799 for (int J
: UsesByReg
[I
].set_bits())
800 if (DA
.doesADominateB(*First
, J
))
801 BBDominatedUses
.set(J
);
802 LLVM_DEBUG(dbgs() << "\t\tBB " << BB
.getName() << " dominates "
803 << BBDominatedUses
.count() << " uses for reg " << I
804 << ". Total uses for reg is " << UsesByReg
[I
].count()
806 BBDominatedUses
&= UsesByReg
[I
];
807 if (BBDominatedUses
== UsesByReg
[I
]) {
808 LLVM_DEBUG(dbgs() << "\t\t\tAdded " << BB
.getName()
809 << " as a save pos for " << I
<< "\n");
810 BestSavePos
[I
].push_back(First
);
812 dbgs() << "Dominated uses are:\n";
813 for (int J
: UsesByReg
[I
].set_bits()) {
814 dbgs() << "Idx " << J
<< ": ";
815 BC
.printInstruction(dbgs(), *DA
.Expressions
[J
]);
816 DA
.Expressions
[J
]->dump();
823 BestSaveCount
= std::vector
<std::vector
<uint64_t>>(BC
.MRI
->getNumRegs());
825 auto &InsnToBB
= Info
.getInsnToBBMap();
826 for (unsigned I
= 0, E
= BC
.MRI
->getNumRegs(); I
!= E
; ++I
) {
827 if (!CSA
.CalleeSaved
[I
])
830 std::stable_sort(BestSavePos
[I
].begin(), BestSavePos
[I
].end(),
831 [&](const MCInst
*A
, const MCInst
*B
) {
832 const BinaryBasicBlock
*BBA
= InsnToBB
[A
];
833 const BinaryBasicBlock
*BBB
= InsnToBB
[B
];
834 const uint64_t CountA
= BBA
->getKnownExecutionCount();
835 const uint64_t CountB
= BBB
->getKnownExecutionCount();
836 return CountB
< CountA
;
839 for (MCInst
*Pos
: BestSavePos
[I
]) {
840 const BinaryBasicBlock
*BB
= InsnToBB
[Pos
];
841 const uint64_t Count
= BB
->getKnownExecutionCount();
842 BestSaveCount
[I
].push_back(Count
);
847 void ShrinkWrapping::computeDomOrder() {
848 DomOrder
= std::vector
<MCPhysReg
>(BC
.MRI
->getNumRegs(), 0);
849 std::vector
<MCPhysReg
> Order
;
850 for (MCPhysReg I
= 0, E
= BC
.MRI
->getNumRegs(); I
!= E
; ++I
) {
854 DominatorAnalysis
<false> &DA
= Info
.getDominatorAnalysis();
855 auto &InsnToBB
= Info
.getInsnToBBMap();
856 llvm::sort(Order
, [&](const MCPhysReg
&A
, const MCPhysReg
&B
) {
857 BinaryBasicBlock
*BBA
=
858 BestSavePos
[A
].size() ? InsnToBB
[BestSavePos
[A
].back()] : nullptr;
859 BinaryBasicBlock
*BBB
=
860 BestSavePos
[B
].size() ? InsnToBB
[BestSavePos
[B
].back()] : nullptr;
867 if (DA
.doesADominateB(*BestSavePos
[A
].back(), *BestSavePos
[B
].back()))
869 if (DA
.doesADominateB(*BestSavePos
[B
].back(), *BestSavePos
[A
].back()))
874 for (MCPhysReg I
= 0, E
= BC
.MRI
->getNumRegs(); I
!= E
; ++I
)
875 DomOrder
[Order
[I
]] = I
;
878 bool ShrinkWrapping::isBestSavePosCold(unsigned CSR
, MCInst
*&BestPosSave
,
879 uint64_t &TotalEstimatedWin
) {
880 const uint64_t CurSavingCost
= CSA
.SavingCost
[CSR
];
881 if (!CSA
.CalleeSaved
[CSR
])
884 assert(BestSaveCount
[CSR
].size() == BestSavePos
[CSR
].size() &&
885 "save position vectors out of sync");
886 if (BestSaveCount
[CSR
].empty())
889 const uint64_t BestCount
= BestSaveCount
[CSR
].back();
890 BestPosSave
= BestSavePos
[CSR
].back();
891 if (BestCount
>= (opts::ShrinkWrappingThreshold
/ 100.0) * CurSavingCost
)
895 auto &InsnToBB
= Info
.getInsnToBBMap();
896 dbgs() << "Better position for saves found in func " << BF
.getPrintName()
897 << " count << " << BF
.getKnownExecutionCount() << "\n";
898 dbgs() << "Reg: " << CSR
<< "; New BB: " << InsnToBB
[BestPosSave
]->getName()
899 << " Freq reduction: " << (CurSavingCost
- BestCount
) << "\n";
902 TotalEstimatedWin
= CurSavingCost
- BestCount
;
906 /// Auxiliar function used to create basic blocks for critical edges and update
907 /// the dominance frontier with these new locations
908 void ShrinkWrapping::splitFrontierCritEdges(
909 BinaryFunction
*Func
, SmallVector
<ProgramPoint
, 4> &Frontier
,
910 const SmallVector
<bool, 4> &IsCritEdge
,
911 const SmallVector
<BinaryBasicBlock
*, 4> &From
,
912 const SmallVector
<SmallVector
<BinaryBasicBlock
*, 4>, 4> &To
) {
913 LLVM_DEBUG(dbgs() << "splitFrontierCritEdges: Now handling func "
914 << BF
.getPrintName() << "\n");
915 // For every FromBB, there might be one or more critical edges, with
916 // To[I] containing destination BBs. It's important to memorize
917 // the original size of the Frontier as we may append to it while splitting
918 // critical edges originating with blocks with multiple destinations.
919 for (size_t I
= 0, IE
= Frontier
.size(); I
< IE
; ++I
) {
924 BinaryBasicBlock
*FromBB
= From
[I
];
925 LLVM_DEBUG(dbgs() << " - Now handling FrontierBB " << FromBB
->getName()
927 // Split edge for every DestinationBBs
928 for (size_t DI
= 0, DIE
= To
[I
].size(); DI
< DIE
; ++DI
) {
929 BinaryBasicBlock
*DestinationBB
= To
[I
][DI
];
930 LLVM_DEBUG(dbgs() << " - Dest : " << DestinationBB
->getName() << "\n");
931 BinaryBasicBlock
*NewBB
= Func
->splitEdge(FromBB
, DestinationBB
);
932 // Insert dummy instruction so this BB is never empty (we need this for
933 // PredictiveStackPointerTracking to work, since it annotates instructions
935 if (NewBB
->empty()) {
937 BC
.MIB
->createNoop(NewInst
);
938 NewBB
->addInstruction(std::move(NewInst
));
939 scheduleChange(&*NewBB
->begin(), WorklistItem(WorklistItem::Erase
, 0));
943 ProgramPoint NewFrontierPP
= ProgramPoint::getLastPointAt(*NewBB
);
945 // Update frontier inplace
946 Frontier
[I
] = NewFrontierPP
;
947 LLVM_DEBUG(dbgs() << " - Update frontier with " << NewBB
->getName()
950 // Append new frontier to the end of the list
951 Frontier
.push_back(NewFrontierPP
);
952 LLVM_DEBUG(dbgs() << " - Append frontier " << NewBB
->getName()
959 SmallVector
<ProgramPoint
, 4>
960 ShrinkWrapping::doRestorePlacement(MCInst
*BestPosSave
, unsigned CSR
,
961 uint64_t TotalEstimatedWin
) {
962 SmallVector
<ProgramPoint
, 4> Frontier
;
963 SmallVector
<bool, 4> IsCritEdge
;
964 DominatorAnalysis
<false> &DA
= Info
.getDominatorAnalysis();
966 SmallVector
<BinaryBasicBlock
*, 4> CritEdgesFrom
;
967 SmallVector
<SmallVector
<BinaryBasicBlock
*, 4>, 4> CritEdgesTo
;
968 // In case of a critical edge, we need to create extra BBs to host restores
969 // into edges transitioning to the dominance frontier, otherwise we pull these
970 // restores to inside the dominated area.
971 Frontier
= DA
.getDominanceFrontierFor(*BestPosSave
).takeVector();
973 dbgs() << "Dumping dominance frontier for ";
974 BC
.printInstruction(dbgs(), *BestPosSave
);
975 for (ProgramPoint
&PP
: Frontier
)
977 BC
.printInstruction(dbgs(), *PP
.getInst());
979 dbgs() << PP
.getBB()->getName() << "\n";
981 for (ProgramPoint
&PP
: Frontier
) {
982 bool HasCritEdges
= false;
983 if (PP
.isInst() && BC
.MIB
->isTerminator(*PP
.getInst()) &&
984 doesInstUsesCSR(*PP
.getInst(), CSR
)) {
988 BinaryBasicBlock
*FrontierBB
= Info
.getParentBB(PP
);
989 CritEdgesFrom
.emplace_back(FrontierBB
);
990 CritEdgesTo
.emplace_back(0);
991 SmallVector
<BinaryBasicBlock
*, 4> &Dests
= CritEdgesTo
.back();
992 // Check for invoke instructions at the dominance frontier, which indicates
993 // the landing pad is not dominated.
994 if (PP
.isInst() && BC
.MIB
->isInvoke(*PP
.getInst())) {
998 doForAllSuccs(*FrontierBB
, [&](ProgramPoint P
) {
999 if (!DA
.doesADominateB(*BestPosSave
, P
)) {
1000 Dests
.emplace_back(Info
.getParentBB(P
));
1003 HasCritEdges
= true;
1005 IsCritEdge
.push_back(HasCritEdges
);
1007 // Restores cannot be placed in empty BBs because we have a dataflow
1008 // analysis that depends on insertions happening before real instructions
1009 // (PredictiveStackPointerTracking). Detect now for empty BBs and add a
1010 // dummy nop that is scheduled to be removed later.
1011 bool InvalidateRequired
= false;
1012 for (BinaryBasicBlock
*BB
: BF
.getLayout().blocks()) {
1013 if (BB
->size() != 0)
1016 BC
.MIB
->createNoop(NewInst
);
1017 auto II
= BB
->addInstruction(std::move(NewInst
));
1018 scheduleChange(&*II
, WorklistItem(WorklistItem::Erase
, 0));
1019 InvalidateRequired
= true;
1021 if (std::accumulate(IsCritEdge
.begin(), IsCritEdge
.end(), 0)) {
1023 dbgs() << "Now detected critical edges in the following frontier:\n";
1024 for (ProgramPoint
&PP
: Frontier
) {
1026 dbgs() << " BB: " << PP
.getBB()->getName() << "\n";
1028 dbgs() << " Inst: ";
1029 PP
.getInst()->dump();
1033 splitFrontierCritEdges(&BF
, Frontier
, IsCritEdge
, CritEdgesFrom
,
1035 InvalidateRequired
= true;
1037 if (InvalidateRequired
) {
1038 // BitVectors that represent all insns of the function are invalid now
1039 // since we changed BBs/Insts. Re-run steps that depend on pointers being
1041 Info
.invalidateAll();
1047 bool ShrinkWrapping::validatePushPopsMode(unsigned CSR
, MCInst
*BestPosSave
,
1048 int64_t SaveOffset
) {
1049 if (FA
.requiresAlignment(BF
)) {
1051 dbgs() << "Reg " << CSR
1052 << " is not using push/pops due to function "
1053 "alignment requirements.\n";
1057 if (FA
.hasStackArithmetic(BF
)) {
1059 dbgs() << "Reg " << CSR
1060 << " is not using push/pops due to function "
1061 "taking the address of a stack position.\n";
1065 for (MCInst
*Save
: CSA
.getSavesByReg(CSR
)) {
1066 if (!SLM
.canCollapseRegion(Save
)) {
1067 LLVM_DEBUG(dbgs() << "Reg " << CSR
<< " cannot collapse region.\n");
1071 // Abort if one of the restores for this CSR is not a POP.
1072 for (MCInst
*Load
: CSA
.getRestoresByReg(CSR
)) {
1073 if (!BC
.MIB
->isPop(*Load
)) {
1074 LLVM_DEBUG(dbgs() << "Reg " << CSR
<< " has a mismatching restore.\n");
1079 StackPointerTracking
&SPT
= Info
.getStackPointerTracking();
1080 // Abort if we are inserting a push into an entry BB (offset -8) and this
1081 // func sets up a frame pointer.
1082 if (!SLM
.canInsertRegion(BestPosSave
) || SaveOffset
== SPT
.SUPERPOSITION
||
1083 SaveOffset
== SPT
.EMPTY
|| (SaveOffset
== -8 && SPT
.HasFramePointer
)) {
1085 dbgs() << "Reg " << CSR
1086 << " cannot insert region or we are "
1087 "trying to insert a push into entry bb.\n";
1094 SmallVector
<ProgramPoint
, 4> ShrinkWrapping::fixPopsPlacements(
1095 const SmallVector
<ProgramPoint
, 4> &RestorePoints
, int64_t SaveOffset
,
1097 SmallVector
<ProgramPoint
, 4> FixedRestorePoints
= RestorePoints
;
1098 // Moving pop locations to the correct sp offset
1099 ReachingInsns
<true> &RI
= Info
.getReachingInsnsBackwards();
1100 StackPointerTracking
&SPT
= Info
.getStackPointerTracking();
1101 for (ProgramPoint
&PP
: FixedRestorePoints
) {
1102 BinaryBasicBlock
*BB
= Info
.getParentBB(PP
);
1104 if (SPT
.getStateAt(ProgramPoint::getLastPointAt(*BB
))->first
==
1106 BitVector BV
= *RI
.getStateAt(ProgramPoint::getLastPointAt(*BB
));
1107 BV
&= UsesByReg
[CSR
];
1114 for (MCInst
&Inst
: llvm::reverse(*BB
)) {
1115 if (SPT
.getStateBefore(Inst
)->first
== SaveOffset
) {
1116 BitVector BV
= *RI
.getStateAt(Inst
);
1117 BV
&= UsesByReg
[CSR
];
1127 dbgs() << "Could not find restore insertion point for " << CSR
1128 << ", falling back to load/store mode\n";
1130 FixedRestorePoints
.clear();
1131 return FixedRestorePoints
;
1134 return FixedRestorePoints
;
1137 void ShrinkWrapping::scheduleOldSaveRestoresRemoval(unsigned CSR
,
1140 for (BinaryBasicBlock
*BB
: BF
.getLayout().blocks()) {
1141 std::vector
<MCInst
*> CFIs
;
1142 for (MCInst
&Inst
: llvm::reverse(*BB
)) {
1143 if (BC
.MIB
->isCFI(Inst
)) {
1144 // Delete all offset CFIs related to this CSR
1145 if (SLM
.getOffsetCFIReg(Inst
) == CSR
) {
1146 HasDeletedOffsetCFIs
[CSR
] = true;
1147 scheduleChange(&Inst
, WorklistItem(WorklistItem::Erase
, CSR
));
1150 CFIs
.push_back(&Inst
);
1154 uint16_t SavedReg
= CSA
.getSavedReg(Inst
);
1155 uint16_t RestoredReg
= CSA
.getRestoredReg(Inst
);
1156 if (SavedReg
!= CSR
&& RestoredReg
!= CSR
) {
1161 scheduleChange(&Inst
, WorklistItem(UsePushPops
1162 ? WorklistItem::Erase
1163 : WorklistItem::ChangeToAdjustment
,
1166 // Delete associated CFIs
1167 const bool RecordDeletedPushCFIs
=
1168 SavedReg
== CSR
&& DeletedPushCFIs
[CSR
].empty();
1169 const bool RecordDeletedPopCFIs
=
1170 RestoredReg
== CSR
&& DeletedPopCFIs
[CSR
].empty();
1171 for (MCInst
*CFI
: CFIs
) {
1172 const MCCFIInstruction
*MCCFI
= BF
.getCFIFor(*CFI
);
1173 // Do not touch these...
1174 if (MCCFI
->getOperation() == MCCFIInstruction::OpRestoreState
||
1175 MCCFI
->getOperation() == MCCFIInstruction::OpRememberState
)
1177 scheduleChange(CFI
, WorklistItem(WorklistItem::Erase
, CSR
));
1178 if (RecordDeletedPushCFIs
) {
1179 // Do not record this to be replayed later because we are going to
1181 if (MCCFI
->getOperation() == MCCFIInstruction::OpDefCfaOffset
)
1183 DeletedPushCFIs
[CSR
].push_back(CFI
->getOperand(0).getImm());
1185 if (RecordDeletedPopCFIs
) {
1186 if (MCCFI
->getOperation() == MCCFIInstruction::OpDefCfaOffset
)
1188 DeletedPopCFIs
[CSR
].push_back(CFI
->getOperand(0).getImm());
1196 bool ShrinkWrapping::doesInstUsesCSR(const MCInst
&Inst
, uint16_t CSR
) {
1197 if (BC
.MIB
->isCFI(Inst
) || CSA
.getSavedReg(Inst
) == CSR
||
1198 CSA
.getRestoredReg(Inst
) == CSR
)
1200 BitVector BV
= BitVector(BC
.MRI
->getNumRegs(), false);
1201 BC
.MIB
->getTouchedRegs(Inst
, BV
);
1205 void ShrinkWrapping::scheduleSaveRestoreInsertions(
1206 unsigned CSR
, MCInst
*BestPosSave
,
1207 SmallVector
<ProgramPoint
, 4> &RestorePoints
, bool UsePushPops
) {
1208 auto &InsnToBB
= Info
.getInsnToBBMap();
1209 const FrameIndexEntry
*FIESave
= CSA
.SaveFIEByReg
[CSR
];
1210 const FrameIndexEntry
*FIELoad
= CSA
.LoadFIEByReg
[CSR
];
1211 assert(FIESave
&& FIELoad
&& "Invalid CSR");
1214 dbgs() << "Scheduling save insertion at: ";
1215 BestPosSave
->dump();
1218 scheduleChange(BestPosSave
,
1219 UsePushPops
? WorklistItem::InsertPushOrPop
1220 : WorklistItem::InsertLoadOrStore
,
1223 for (ProgramPoint
&PP
: RestorePoints
) {
1224 BinaryBasicBlock
*FrontierBB
= Info
.getParentBB(PP
);
1226 dbgs() << "Scheduling restore insertion at: ";
1228 PP
.getInst()->dump();
1230 dbgs() << PP
.getBB()->getName() << "\n";
1233 FrontierBB
->getTerminatorBefore(PP
.isInst() ? PP
.getInst() : nullptr);
1236 bool PrecededByPrefix
= false;
1238 auto Iter
= FrontierBB
->findInstruction(PP
.getInst());
1239 if (Iter
!= FrontierBB
->end() && Iter
!= FrontierBB
->begin()) {
1241 PrecededByPrefix
= BC
.MIB
->isPrefix(*Iter
);
1245 (doesInstUsesCSR(*PP
.getInst(), CSR
) || PrecededByPrefix
)) {
1246 assert(!InsnToBB
[PP
.getInst()]->hasTerminatorAfter(PP
.getInst()) &&
1247 "cannot move to end of bb");
1248 scheduleChange(InsnToBB
[PP
.getInst()],
1249 UsePushPops
? WorklistItem::InsertPushOrPop
1250 : WorklistItem::InsertLoadOrStore
,
1255 UsePushPops
? WorklistItem::InsertPushOrPop
1256 : WorklistItem::InsertLoadOrStore
,
1261 void ShrinkWrapping::moveSaveRestores() {
1262 bool DisablePushPopMode
= false;
1263 bool UsedPushPopMode
= false;
1264 // Keeps info about successfully moved regs: reg index, save position and
1266 std::vector
<std::tuple
<unsigned, MCInst
*, size_t>> MovedRegs
;
1267 uint64_t TotalEstimatedWin
= 0;
1270 for (unsigned I
= 0, E
= BC
.MRI
->getNumRegs(); I
!= E
; ++I
) {
1271 MCInst
*BestPosSave
= nullptr;
1272 uint64_t EstimatedWin
= 0;
1273 SmallVector
<ProgramPoint
, 4> RestorePoints
;
1274 while (RestorePoints
.empty() &&
1275 isBestSavePosCold(I
, BestPosSave
, EstimatedWin
)) {
1276 RestorePoints
= doRestorePlacement(BestPosSave
, I
, EstimatedWin
);
1277 if (RestorePoints
.empty()) {
1279 dbgs() << "Dropping opportunity because restore placement failed"
1280 " -- total est. freq reduc: "
1281 << EstimatedWin
<< ". Will try "
1282 << (BestSaveCount
[I
].size() - 1) << " more times.\n";
1284 BestSaveCount
[I
].pop_back();
1285 BestSavePos
[I
].pop_back();
1289 if (RestorePoints
.empty()) {
1290 SpillsFailedDynamicCount
+= EstimatedWin
;
1294 const FrameIndexEntry
*FIESave
= CSA
.SaveFIEByReg
[I
];
1295 const FrameIndexEntry
*FIELoad
= CSA
.LoadFIEByReg
[I
];
1297 assert(FIESave
&& FIELoad
);
1298 StackPointerTracking
&SPT
= Info
.getStackPointerTracking();
1299 const std::pair
<int, int> SPFP
= *SPT
.getStateBefore(*BestPosSave
);
1300 int SaveOffset
= SPFP
.first
;
1301 uint8_t SaveSize
= FIESave
->Size
;
1303 // If we don't know stack state at this point, bail
1304 if ((SPFP
.first
== SPT
.SUPERPOSITION
|| SPFP
.first
== SPT
.EMPTY
) &&
1305 (SPFP
.second
== SPT
.SUPERPOSITION
|| SPFP
.second
== SPT
.EMPTY
)) {
1306 SpillsFailedDynamicCount
+= EstimatedWin
;
1310 // Operation mode: if true, will insert push/pops instead of loads/restores
1311 bool UsePushPops
= validatePushPopsMode(I
, BestPosSave
, SaveOffset
);
1314 SmallVector
<ProgramPoint
, 4> FixedRestorePoints
=
1315 fixPopsPlacements(RestorePoints
, SaveOffset
, I
);
1316 if (FixedRestorePoints
.empty())
1317 UsePushPops
= false;
1319 RestorePoints
= FixedRestorePoints
;
1322 // Disable push-pop mode for all CSRs in this function
1324 DisablePushPopMode
= true;
1326 UsedPushPopMode
= true;
1328 scheduleOldSaveRestoresRemoval(I
, UsePushPops
);
1329 scheduleSaveRestoreInsertions(I
, BestPosSave
, RestorePoints
, UsePushPops
);
1330 MovedRegs
.emplace_back(std::make_tuple(I
, BestPosSave
, SaveSize
));
1331 TotalEstimatedWin
+= EstimatedWin
;
1334 // Revert push-pop mode if it failed for a single CSR
1335 if (DisablePushPopMode
&& UsedPushPopMode
) {
1336 UsedPushPopMode
= false;
1337 for (BinaryBasicBlock
&BB
: BF
) {
1338 auto WRI
= Todo
.find(&BB
);
1339 if (WRI
!= Todo
.end()) {
1340 std::vector
<WorklistItem
> &TodoList
= WRI
->second
;
1341 for (WorklistItem
&Item
: TodoList
)
1342 if (Item
.Action
== WorklistItem::InsertPushOrPop
)
1343 Item
.Action
= WorklistItem::InsertLoadOrStore
;
1345 for (MCInst
&Inst
: llvm::reverse(BB
)) {
1346 auto TodoList
= BC
.MIB
->tryGetAnnotationAs
<std::vector
<WorklistItem
>>(
1347 Inst
, getAnnotationIndex());
1350 bool isCFI
= BC
.MIB
->isCFI(Inst
);
1351 for (WorklistItem
&Item
: *TodoList
) {
1352 if (Item
.Action
== WorklistItem::InsertPushOrPop
)
1353 Item
.Action
= WorklistItem::InsertLoadOrStore
;
1354 if (!isCFI
&& Item
.Action
== WorklistItem::Erase
)
1355 Item
.Action
= WorklistItem::ChangeToAdjustment
;
1360 SpillsMovedDynamicCount
+= TotalEstimatedWin
;
1362 // Update statistics
1363 if (!UsedPushPopMode
) {
1364 SpillsMovedRegularMode
+= MovedRegs
.size();
1368 // Schedule modifications to stack-accessing instructions via
1369 // StackLayoutModifier.
1370 SpillsMovedPushPopMode
+= MovedRegs
.size();
1371 for (std::tuple
<unsigned, MCInst
*, size_t> &I
: MovedRegs
) {
1375 std::tie(RegNdx
, SavePos
, SaveSize
) = I
;
1376 for (MCInst
*Save
: CSA
.getSavesByReg(RegNdx
))
1377 SLM
.collapseRegion(Save
);
1378 SLM
.insertRegion(SavePos
, SaveSize
);
1383 /// Helper function to identify whether two basic blocks created by splitting
1384 /// a critical edge have the same contents.
1385 bool isIdenticalSplitEdgeBB(const BinaryContext
&BC
, const BinaryBasicBlock
&A
,
1386 const BinaryBasicBlock
&B
) {
1387 if (A
.succ_size() != B
.succ_size())
1389 if (A
.succ_size() != 1)
1392 if (*A
.succ_begin() != *B
.succ_begin())
1395 if (A
.size() != B
.size())
1398 // Compare instructions
1399 auto I
= A
.begin(), E
= A
.end();
1400 auto OtherI
= B
.begin(), OtherE
= B
.end();
1401 while (I
!= E
&& OtherI
!= OtherE
) {
1402 if (I
->getOpcode() != OtherI
->getOpcode())
1404 if (!BC
.MIB
->equals(*I
, *OtherI
, [](const MCSymbol
*A
, const MCSymbol
*B
) {
1415 bool ShrinkWrapping::foldIdenticalSplitEdges() {
1416 bool Changed
= false;
1417 for (auto Iter
= BF
.begin(); Iter
!= BF
.end(); ++Iter
) {
1418 BinaryBasicBlock
&BB
= *Iter
;
1419 if (!BB
.getName().startswith(".LSplitEdge"))
1421 for (BinaryBasicBlock
&RBB
: llvm::reverse(BF
)) {
1424 if (!RBB
.getName().startswith(".LSplitEdge") || !RBB
.isValid() ||
1425 !isIdenticalSplitEdgeBB(BC
, *Iter
, RBB
))
1427 assert(RBB
.pred_size() == 1 && "Invalid split edge BB");
1428 BinaryBasicBlock
*Pred
= *RBB
.pred_begin();
1429 uint64_t OrigCount
= Pred
->branch_info_begin()->Count
;
1430 uint64_t OrigMispreds
= Pred
->branch_info_begin()->MispredictedCount
;
1431 BF
.replaceJumpTableEntryIn(Pred
, &RBB
, &BB
);
1432 Pred
->replaceSuccessor(&RBB
, &BB
, OrigCount
, OrigMispreds
);
1434 // Remove the block from CFG
1435 RBB
.markValid(false);
1444 // A special StackPointerTracking that compensates for our future plans
1445 // in removing/adding insn.
1446 class PredictiveStackPointerTracking
1447 : public StackPointerTrackingBase
<PredictiveStackPointerTracking
> {
1448 friend class DataflowAnalysis
<PredictiveStackPointerTracking
,
1449 std::pair
<int, int>>;
1450 decltype(ShrinkWrapping::Todo
) &TodoMap
;
1451 DataflowInfoManager
&Info
;
1453 std::optional
<unsigned> AnnotationIndex
;
1456 void compNextAux(const MCInst
&Point
,
1457 const std::vector
<ShrinkWrapping::WorklistItem
> &TodoItems
,
1458 std::pair
<int, int> &Res
) {
1459 for (const ShrinkWrapping::WorklistItem
&Item
: TodoItems
) {
1460 if (Item
.Action
== ShrinkWrapping::WorklistItem::Erase
&&
1461 BC
.MIB
->isPush(Point
)) {
1462 Res
.first
+= BC
.MIB
->getPushSize(Point
);
1465 if (Item
.Action
== ShrinkWrapping::WorklistItem::Erase
&&
1466 BC
.MIB
->isPop(Point
)) {
1467 Res
.first
-= BC
.MIB
->getPopSize(Point
);
1470 if (Item
.Action
== ShrinkWrapping::WorklistItem::InsertPushOrPop
&&
1471 Item
.FIEToInsert
.IsStore
) {
1472 Res
.first
-= Item
.FIEToInsert
.Size
;
1475 if (Item
.Action
== ShrinkWrapping::WorklistItem::InsertPushOrPop
&&
1476 Item
.FIEToInsert
.IsLoad
) {
1477 Res
.first
+= Item
.FIEToInsert
.Size
;
1483 std::pair
<int, int> computeNext(const MCInst
&Point
,
1484 const std::pair
<int, int> &Cur
) {
1485 std::pair
<int, int> Res
=
1486 StackPointerTrackingBase
<PredictiveStackPointerTracking
>::computeNext(
1488 if (Res
.first
== StackPointerTracking::SUPERPOSITION
||
1489 Res
.first
== StackPointerTracking::EMPTY
)
1492 BC
.MIB
->tryGetAnnotationAs
<std::vector
<ShrinkWrapping::WorklistItem
>>(
1493 Point
, ShrinkWrapping::getAnnotationName());
1495 compNextAux(Point
, *TodoItems
, Res
);
1496 auto &InsnToBBMap
= Info
.getInsnToBBMap();
1497 if (&*InsnToBBMap
[&Point
]->rbegin() != &Point
)
1499 auto WRI
= TodoMap
.find(InsnToBBMap
[&Point
]);
1500 if (WRI
== TodoMap
.end())
1502 compNextAux(Point
, WRI
->second
, Res
);
1506 StringRef
getAnnotationName() const {
1507 return StringRef("PredictiveStackPointerTracking");
1511 PredictiveStackPointerTracking(BinaryFunction
&BF
,
1512 decltype(ShrinkWrapping::Todo
) &TodoMap
,
1513 DataflowInfoManager
&Info
,
1514 MCPlusBuilder::AllocatorIdTy AllocatorId
= 0)
1515 : StackPointerTrackingBase
<PredictiveStackPointerTracking
>(BF
,
1517 TodoMap(TodoMap
), Info(Info
) {}
1520 StackPointerTrackingBase
<PredictiveStackPointerTracking
>::run();
1524 } // end anonymous namespace
1526 void ShrinkWrapping::insertUpdatedCFI(unsigned CSR
, int SPValPush
,
1528 MCInst
*SavePoint
= nullptr;
1529 for (BinaryBasicBlock
&BB
: BF
) {
1530 for (MCInst
&Inst
: llvm::reverse(BB
)) {
1533 MCPhysReg StackPtrReg
= 0;
1534 int64_t StackOffset
= 0;
1535 bool IsIndexed
= false;
1536 bool IsLoad
= false;
1537 bool IsStore
= false;
1538 bool IsSimple
= false;
1539 bool IsStoreFromReg
= false;
1541 if (!BC
.MIB
->isStackAccess(Inst
, IsLoad
, IsStore
, IsStoreFromReg
, Reg
,
1542 SrcImm
, StackPtrReg
, StackOffset
, Size
,
1543 IsSimple
, IsIndexed
))
1545 if (Reg
!= CSR
|| !IsStore
|| !IsSimple
)
1555 dbgs() << "Now using as save point for reg " << CSR
<< " :";
1558 bool PrevAffectedZone
= false;
1559 BinaryBasicBlock
*PrevBB
= nullptr;
1560 DominatorAnalysis
<false> &DA
= Info
.getDominatorAnalysis();
1561 for (BinaryBasicBlock
*BB
: BF
.getLayout().blocks()) {
1562 if (BB
->size() == 0)
1564 const bool InAffectedZoneAtEnd
= DA
.count(*BB
->rbegin(), *SavePoint
);
1565 const bool InAffectedZoneAtBegin
=
1566 (*DA
.getStateBefore(*BB
->begin()))[DA
.ExprToIdx
[SavePoint
]];
1567 bool InAffectedZone
= InAffectedZoneAtBegin
;
1568 for (auto InstIter
= BB
->begin(); InstIter
!= BB
->end(); ++InstIter
) {
1569 const bool CurZone
= DA
.count(*InstIter
, *SavePoint
);
1570 if (InAffectedZone
!= CurZone
) {
1571 auto InsertionIter
= InstIter
;
1573 InAffectedZone
= CurZone
;
1575 InstIter
= insertCFIsForPushOrPop(*BB
, InsertionIter
, CSR
, true, 0,
1578 InstIter
= insertCFIsForPushOrPop(*BB
, InsertionIter
, CSR
, false, 0,
1583 // Are we at the first basic block or hot-cold split point?
1584 if (!PrevBB
|| (BF
.isSplit() && BB
->isCold() != PrevBB
->isCold())) {
1585 if (InAffectedZoneAtBegin
)
1586 insertCFIsForPushOrPop(*BB
, BB
->begin(), CSR
, true, 0, SPValPush
);
1587 } else if (InAffectedZoneAtBegin
!= PrevAffectedZone
) {
1588 if (InAffectedZoneAtBegin
)
1589 insertCFIsForPushOrPop(*PrevBB
, PrevBB
->end(), CSR
, true, 0, SPValPush
);
1591 insertCFIsForPushOrPop(*PrevBB
, PrevBB
->end(), CSR
, false, 0, SPValPop
);
1593 PrevAffectedZone
= InAffectedZoneAtEnd
;
1598 void ShrinkWrapping::rebuildCFIForSP() {
1599 for (BinaryBasicBlock
&BB
: BF
) {
1600 for (MCInst
&Inst
: BB
) {
1601 if (!BC
.MIB
->isCFI(Inst
))
1603 const MCCFIInstruction
*CFI
= BF
.getCFIFor(Inst
);
1604 if (CFI
->getOperation() == MCCFIInstruction::OpDefCfaOffset
)
1605 BC
.MIB
->addAnnotation(Inst
, "DeleteMe", 0U, AllocatorId
);
1610 BinaryBasicBlock
*PrevBB
= nullptr;
1611 StackPointerTracking
&SPT
= Info
.getStackPointerTracking();
1612 for (BinaryBasicBlock
*BB
: BF
.getLayout().blocks()) {
1613 if (BB
->size() == 0)
1615 const int SPValAtEnd
= SPT
.getStateAt(*BB
->rbegin())->first
;
1616 const int SPValAtBegin
= SPT
.getStateBefore(*BB
->begin())->first
;
1617 int SPVal
= SPValAtBegin
;
1618 for (auto Iter
= BB
->begin(); Iter
!= BB
->end(); ++Iter
) {
1619 const int CurVal
= SPT
.getStateAt(*Iter
)->first
;
1620 if (SPVal
!= CurVal
) {
1621 auto InsertionIter
= Iter
;
1623 Iter
= BF
.addCFIInstruction(
1625 MCCFIInstruction::cfiDefCfaOffset(nullptr, -CurVal
));
1629 if (BF
.isSplit() && PrevBB
&& BB
->isCold() != PrevBB
->isCold())
1630 BF
.addCFIInstruction(
1632 MCCFIInstruction::cfiDefCfaOffset(nullptr, -SPValAtBegin
));
1633 else if (SPValAtBegin
!= PrevSPVal
)
1634 BF
.addCFIInstruction(
1635 PrevBB
, PrevBB
->end(),
1636 MCCFIInstruction::cfiDefCfaOffset(nullptr, -SPValAtBegin
));
1637 PrevSPVal
= SPValAtEnd
;
1641 for (BinaryBasicBlock
&BB
: BF
)
1642 for (auto I
= BB
.begin(); I
!= BB
.end();)
1643 if (BC
.MIB
->hasAnnotation(*I
, "DeleteMe"))
1644 I
= BB
.eraseInstruction(I
);
1649 MCInst
ShrinkWrapping::createStackAccess(int SPVal
, int FPVal
,
1650 const FrameIndexEntry
&FIE
,
1651 bool CreatePushOrPop
) {
1653 if (SPVal
!= StackPointerTracking::SUPERPOSITION
&&
1654 SPVal
!= StackPointerTracking::EMPTY
) {
1656 if (!BC
.MIB
->createRestoreFromStack(NewInst
, BC
.MIB
->getStackPointer(),
1657 FIE
.StackOffset
- SPVal
, FIE
.RegOrImm
,
1659 errs() << "createRestoreFromStack: not supported on this platform\n";
1663 if (!BC
.MIB
->createSaveToStack(NewInst
, BC
.MIB
->getStackPointer(),
1664 FIE
.StackOffset
- SPVal
, FIE
.RegOrImm
,
1666 errs() << "createSaveToStack: not supported on this platform\n";
1670 if (CreatePushOrPop
)
1671 BC
.MIB
->changeToPushOrPop(NewInst
);
1674 assert(FPVal
!= StackPointerTracking::SUPERPOSITION
&&
1675 FPVal
!= StackPointerTracking::EMPTY
);
1678 if (!BC
.MIB
->createRestoreFromStack(NewInst
, BC
.MIB
->getFramePointer(),
1679 FIE
.StackOffset
- FPVal
, FIE
.RegOrImm
,
1681 errs() << "createRestoreFromStack: not supported on this platform\n";
1685 if (!BC
.MIB
->createSaveToStack(NewInst
, BC
.MIB
->getFramePointer(),
1686 FIE
.StackOffset
- FPVal
, FIE
.RegOrImm
,
1688 errs() << "createSaveToStack: not supported on this platform\n";
1695 void ShrinkWrapping::updateCFIInstOffset(MCInst
&Inst
, int64_t NewOffset
) {
1696 const MCCFIInstruction
*CFI
= BF
.getCFIFor(Inst
);
1697 if (UpdatedCFIs
.count(CFI
))
1700 switch (CFI
->getOperation()) {
1701 case MCCFIInstruction::OpDefCfa
:
1702 case MCCFIInstruction::OpDefCfaRegister
:
1703 case MCCFIInstruction::OpDefCfaOffset
:
1704 CFI
= BF
.mutateCFIOffsetFor(Inst
, -NewOffset
);
1706 case MCCFIInstruction::OpOffset
:
1711 UpdatedCFIs
.insert(CFI
);
1714 BBIterTy
ShrinkWrapping::insertCFIsForPushOrPop(BinaryBasicBlock
&BB
,
1715 BBIterTy Pos
, unsigned Reg
,
1716 bool isPush
, int Sz
,
1717 int64_t NewOffset
) {
1719 for (uint32_t Idx
: DeletedPushCFIs
[Reg
]) {
1720 Pos
= BF
.addCFIPseudo(&BB
, Pos
, Idx
);
1721 updateCFIInstOffset(*Pos
++, NewOffset
);
1723 if (HasDeletedOffsetCFIs
[Reg
]) {
1724 Pos
= BF
.addCFIInstruction(
1726 MCCFIInstruction::createOffset(
1727 nullptr, BC
.MRI
->getDwarfRegNum(Reg
, false), NewOffset
));
1731 for (uint32_t Idx
: DeletedPopCFIs
[Reg
]) {
1732 Pos
= BF
.addCFIPseudo(&BB
, Pos
, Idx
);
1733 updateCFIInstOffset(*Pos
++, NewOffset
);
1735 if (HasDeletedOffsetCFIs
[Reg
]) {
1736 Pos
= BF
.addCFIInstruction(
1738 MCCFIInstruction::createSameValue(
1739 nullptr, BC
.MRI
->getDwarfRegNum(Reg
, false)));
1746 BBIterTy
ShrinkWrapping::processInsertion(BBIterTy InsertionPoint
,
1747 BinaryBasicBlock
*CurBB
,
1748 const WorklistItem
&Item
,
1749 int64_t SPVal
, int64_t FPVal
) {
1750 // Trigger CFI reconstruction for this CSR if necessary - writing to
1751 // PushOffsetByReg/PopOffsetByReg *will* trigger CFI update
1752 if ((Item
.FIEToInsert
.IsStore
&&
1753 !DeletedPushCFIs
[Item
.AffectedReg
].empty()) ||
1754 (Item
.FIEToInsert
.IsLoad
&& !DeletedPopCFIs
[Item
.AffectedReg
].empty()) ||
1755 HasDeletedOffsetCFIs
[Item
.AffectedReg
]) {
1756 if (Item
.Action
== WorklistItem::InsertPushOrPop
) {
1757 if (Item
.FIEToInsert
.IsStore
)
1758 PushOffsetByReg
[Item
.AffectedReg
] = SPVal
- Item
.FIEToInsert
.Size
;
1760 PopOffsetByReg
[Item
.AffectedReg
] = SPVal
;
1762 if (Item
.FIEToInsert
.IsStore
)
1763 PushOffsetByReg
[Item
.AffectedReg
] = Item
.FIEToInsert
.StackOffset
;
1765 PopOffsetByReg
[Item
.AffectedReg
] = Item
.FIEToInsert
.StackOffset
;
1770 dbgs() << "Creating stack access with SPVal = " << SPVal
1771 << "; stack offset = " << Item
.FIEToInsert
.StackOffset
1772 << " Is push = " << (Item
.Action
== WorklistItem::InsertPushOrPop
)
1776 createStackAccess(SPVal
, FPVal
, Item
.FIEToInsert
,
1777 Item
.Action
== WorklistItem::InsertPushOrPop
);
1778 if (InsertionPoint
!= CurBB
->end()) {
1780 dbgs() << "Adding before Inst: ";
1781 InsertionPoint
->dump();
1782 dbgs() << "the following inst: ";
1786 CurBB
->insertInstruction(InsertionPoint
, std::move(NewInst
));
1789 CurBB
->addInstruction(std::move(NewInst
));
1790 LLVM_DEBUG(dbgs() << "Adding to BB!\n");
1791 return CurBB
->end();
1794 BBIterTy
ShrinkWrapping::processInsertionsList(
1795 BBIterTy InsertionPoint
, BinaryBasicBlock
*CurBB
,
1796 std::vector
<WorklistItem
> &TodoList
, int64_t SPVal
, int64_t FPVal
) {
1797 bool HasInsertions
= llvm::any_of(TodoList
, [&](WorklistItem
&Item
) {
1798 return Item
.Action
== WorklistItem::InsertLoadOrStore
||
1799 Item
.Action
== WorklistItem::InsertPushOrPop
;
1803 return InsertionPoint
;
1805 assert(((SPVal
!= StackPointerTracking::SUPERPOSITION
&&
1806 SPVal
!= StackPointerTracking::EMPTY
) ||
1807 (FPVal
!= StackPointerTracking::SUPERPOSITION
&&
1808 FPVal
!= StackPointerTracking::EMPTY
)) &&
1809 "Cannot insert if we have no idea of the stack state here");
1811 // Revert the effect of PSPT for this location, we want SP Value before
1813 if (InsertionPoint
== CurBB
->end()) {
1814 for (WorklistItem
&Item
: TodoList
) {
1815 if (Item
.Action
!= WorklistItem::InsertPushOrPop
)
1817 if (Item
.FIEToInsert
.IsStore
)
1818 SPVal
+= Item
.FIEToInsert
.Size
;
1819 if (Item
.FIEToInsert
.IsLoad
)
1820 SPVal
-= Item
.FIEToInsert
.Size
;
1824 // Reorder POPs to obey the correct dominance relation between them
1825 llvm::stable_sort(TodoList
, [&](const WorklistItem
&A
,
1826 const WorklistItem
&B
) {
1827 if ((A
.Action
!= WorklistItem::InsertPushOrPop
|| !A
.FIEToInsert
.IsLoad
) &&
1828 (B
.Action
!= WorklistItem::InsertPushOrPop
|| !B
.FIEToInsert
.IsLoad
))
1830 if ((A
.Action
!= WorklistItem::InsertPushOrPop
|| !A
.FIEToInsert
.IsLoad
))
1832 if ((B
.Action
!= WorklistItem::InsertPushOrPop
|| !B
.FIEToInsert
.IsLoad
))
1834 return DomOrder
[B
.AffectedReg
] < DomOrder
[A
.AffectedReg
];
1837 // Process insertions
1838 for (WorklistItem
&Item
: TodoList
) {
1839 if (Item
.Action
== WorklistItem::Erase
||
1840 Item
.Action
== WorklistItem::ChangeToAdjustment
)
1844 processInsertion(InsertionPoint
, CurBB
, Item
, SPVal
, FPVal
);
1845 if (Item
.Action
== WorklistItem::InsertPushOrPop
&&
1846 Item
.FIEToInsert
.IsStore
)
1847 SPVal
-= Item
.FIEToInsert
.Size
;
1848 if (Item
.Action
== WorklistItem::InsertPushOrPop
&&
1849 Item
.FIEToInsert
.IsLoad
)
1850 SPVal
+= Item
.FIEToInsert
.Size
;
1852 return InsertionPoint
;
1855 bool ShrinkWrapping::processInsertions() {
1856 PredictiveStackPointerTracking
PSPT(BF
, Todo
, Info
, AllocatorId
);
1859 bool Changes
= false;
1860 for (BinaryBasicBlock
&BB
: BF
) {
1861 // Process insertions before some inst.
1862 for (auto I
= BB
.begin(); I
!= BB
.end(); ++I
) {
1864 auto TodoList
= BC
.MIB
->tryGetAnnotationAs
<std::vector
<WorklistItem
>>(
1865 Inst
, getAnnotationIndex());
1869 std::vector
<WorklistItem
> List
= *TodoList
;
1871 dbgs() << "Now processing insertions in " << BB
.getName()
1872 << " before inst: ";
1876 std::pair
<int, int> SPTState
=
1877 *PSPT
.getStateAt(Iter
== BB
.begin() ? (ProgramPoint
)&BB
: &*(--Iter
));
1878 I
= processInsertionsList(I
, &BB
, List
, SPTState
.first
, SPTState
.second
);
1880 // Process insertions at the end of bb
1881 auto WRI
= Todo
.find(&BB
);
1882 if (WRI
!= Todo
.end()) {
1883 std::pair
<int, int> SPTState
= *PSPT
.getStateAt(*BB
.rbegin());
1884 processInsertionsList(BB
.end(), &BB
, WRI
->second
, SPTState
.first
,
1892 void ShrinkWrapping::processDeletions() {
1893 LivenessAnalysis
&LA
= Info
.getLivenessAnalysis();
1894 for (BinaryBasicBlock
&BB
: BF
) {
1895 for (auto II
= BB
.begin(); II
!= BB
.end(); ++II
) {
1897 auto TodoList
= BC
.MIB
->tryGetAnnotationAs
<std::vector
<WorklistItem
>>(
1898 Inst
, getAnnotationIndex());
1901 // Process all deletions
1902 for (WorklistItem
&Item
: *TodoList
) {
1903 if (Item
.Action
!= WorklistItem::Erase
&&
1904 Item
.Action
!= WorklistItem::ChangeToAdjustment
)
1907 if (Item
.Action
== WorklistItem::ChangeToAdjustment
) {
1908 // Is flag reg alive across this func?
1909 bool DontClobberFlags
= LA
.isAlive(&Inst
, BC
.MIB
->getFlagsReg());
1910 if (int Sz
= BC
.MIB
->getPushSize(Inst
)) {
1911 BC
.MIB
->createStackPointerIncrement(Inst
, Sz
, DontClobberFlags
);
1914 if (int Sz
= BC
.MIB
->getPopSize(Inst
)) {
1915 BC
.MIB
->createStackPointerDecrement(Inst
, Sz
, DontClobberFlags
);
1921 dbgs() << "Erasing: ";
1922 BC
.printInstruction(dbgs(), Inst
);
1924 II
= std::prev(BB
.eraseInstruction(II
));
1931 void ShrinkWrapping::rebuildCFI() {
1932 const bool FP
= Info
.getStackPointerTracking().HasFramePointer
;
1933 Info
.invalidateAll();
1936 Info
.invalidateAll();
1938 for (unsigned I
= 0, E
= BC
.MRI
->getNumRegs(); I
!= E
; ++I
) {
1939 if (PushOffsetByReg
[I
] == 0 || PopOffsetByReg
[I
] == 0)
1941 const int64_t SPValPush
= PushOffsetByReg
[I
];
1942 const int64_t SPValPop
= PopOffsetByReg
[I
];
1943 insertUpdatedCFI(I
, SPValPush
, SPValPop
);
1944 Info
.invalidateAll();
1948 bool ShrinkWrapping::perform(bool HotOnly
) {
1949 HasDeletedOffsetCFIs
= BitVector(BC
.MRI
->getNumRegs(), false);
1950 PushOffsetByReg
= std::vector
<int64_t>(BC
.MRI
->getNumRegs(), 0LL);
1951 PopOffsetByReg
= std::vector
<int64_t>(BC
.MRI
->getNumRegs(), 0LL);
1953 // Update pass statistics
1954 uint64_t TotalInstrs
= 0ULL;
1955 uint64_t TotalStoreInstrs
= 0ULL;
1956 for (BinaryBasicBlock
*BB
: BF
.getLayout().blocks()) {
1957 uint64_t BBExecCount
= BB
->getExecutionCount();
1958 if (!BBExecCount
|| BBExecCount
== BinaryBasicBlock::COUNT_NO_PROFILE
)
1960 for (const auto &Instr
: *BB
) {
1961 if (BC
.MIB
->isPseudo(Instr
))
1963 if (BC
.MIB
->mayStore(Instr
))
1964 TotalStoreInstrs
+= BBExecCount
;
1965 TotalInstrs
+= BBExecCount
;
1968 InstrDynamicCount
+= TotalInstrs
;
1969 StoreDynamicCount
+= TotalStoreInstrs
;
1971 if (!FA
.hasFrameInfo(BF
))
1974 if (HotOnly
&& (BF
.getKnownExecutionCount() < BC
.getHotThreshold()))
1977 if (opts::EqualizeBBCounts
)
1978 equalizeBBCounts(Info
, BF
);
1980 if (BF
.checkForAmbiguousJumpTables()) {
1981 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ambiguous JTs in " << BF
.getPrintName()
1983 // We could call disambiguateJumpTables here, but it is probably not worth
1984 // the cost (of duplicating potentially large jump tables that could regress
1985 // dcache misses). Moreover, ambiguous JTs are rare and coming from code
1986 // written in assembly language. Just bail.
1992 pruneUnwantedCSRs();
1993 computeSaveLocations();
1996 dbgs() << "Func before shrink-wrapping: \n";
1999 SLM
.performChanges();
2000 // Early exit if processInsertions doesn't detect any todo items
2001 if (!processInsertions())
2004 if (foldIdenticalSplitEdges()) {
2005 const std::pair
<unsigned, uint64_t> Stats
= BF
.eraseInvalidBBs();
2007 LLVM_DEBUG(dbgs() << "Deleted " << Stats
.first
2008 << " redundant split edge BBs (" << Stats
.second
2009 << " bytes) for " << BF
.getPrintName() << "\n");
2012 // We may have split edges, creating BBs that need correct branching
2015 dbgs() << "Func after shrink-wrapping: \n";
2021 void ShrinkWrapping::printStats() {
2022 outs() << "BOLT-INFO: Shrink wrapping moved " << SpillsMovedRegularMode
2023 << " spills inserting load/stores and " << SpillsMovedPushPopMode
2024 << " spills inserting push/pops\n";
2025 if (!InstrDynamicCount
|| !StoreDynamicCount
)
2027 outs() << "BOLT-INFO: Shrink wrapping reduced " << SpillsMovedDynamicCount
2028 << " store executions ("
2029 << format("%.1lf%%",
2030 (100.0 * SpillsMovedDynamicCount
/ InstrDynamicCount
))
2031 << " total instructions executed, "
2032 << format("%.1lf%%",
2033 (100.0 * SpillsMovedDynamicCount
/ StoreDynamicCount
))
2034 << " store instructions)\n";
2035 outs() << "BOLT-INFO: Shrink wrapping failed at reducing "
2036 << SpillsFailedDynamicCount
<< " store executions ("
2037 << format("%.1lf%%",
2038 (100.0 * SpillsFailedDynamicCount
/ InstrDynamicCount
))
2039 << " total instructions executed, "
2040 << format("%.1lf%%",
2041 (100.0 * SpillsFailedDynamicCount
/ StoreDynamicCount
))
2042 << " store instructions)\n";
2045 // Operators necessary as a result of using MCAnnotation
2046 raw_ostream
&operator<<(raw_ostream
&OS
,
2047 const std::vector
<ShrinkWrapping::WorklistItem
> &Vec
) {
2049 const char *Sep
= "";
2050 for (const ShrinkWrapping::WorklistItem
&Item
: Vec
) {
2052 switch (Item
.Action
) {
2053 case ShrinkWrapping::WorklistItem::Erase
:
2056 case ShrinkWrapping::WorklistItem::ChangeToAdjustment
:
2057 OS
<< "ChangeToAdjustment";
2059 case ShrinkWrapping::WorklistItem::InsertLoadOrStore
:
2060 OS
<< "InsertLoadOrStore";
2062 case ShrinkWrapping::WorklistItem::InsertPushOrPop
:
2063 OS
<< "InsertPushOrPop";
2073 operator<<(raw_ostream
&OS
,
2074 const std::vector
<StackLayoutModifier::WorklistItem
> &Vec
) {
2076 const char *Sep
= "";
2077 for (const StackLayoutModifier::WorklistItem
&Item
: Vec
) {
2079 switch (Item
.Action
) {
2080 case StackLayoutModifier::WorklistItem::None
:
2083 case StackLayoutModifier::WorklistItem::AdjustLoadStoreOffset
:
2084 OS
<< "AdjustLoadStoreOffset";
2086 case StackLayoutModifier::WorklistItem::AdjustCFI
:
2096 bool operator==(const ShrinkWrapping::WorklistItem
&A
,
2097 const ShrinkWrapping::WorklistItem
&B
) {
2098 return (A
.Action
== B
.Action
&& A
.AffectedReg
== B
.AffectedReg
&&
2099 A
.Adjustment
== B
.Adjustment
&&
2100 A
.FIEToInsert
.IsLoad
== B
.FIEToInsert
.IsLoad
&&
2101 A
.FIEToInsert
.IsStore
== B
.FIEToInsert
.IsStore
&&
2102 A
.FIEToInsert
.RegOrImm
== B
.FIEToInsert
.RegOrImm
&&
2103 A
.FIEToInsert
.Size
== B
.FIEToInsert
.Size
&&
2104 A
.FIEToInsert
.IsSimple
== B
.FIEToInsert
.IsSimple
&&
2105 A
.FIEToInsert
.StackOffset
== B
.FIEToInsert
.StackOffset
);
2108 bool operator==(const StackLayoutModifier::WorklistItem
&A
,
2109 const StackLayoutModifier::WorklistItem
&B
) {
2110 return (A
.Action
== B
.Action
&& A
.OffsetUpdate
== B
.OffsetUpdate
);
2113 } // end namespace bolt
2114 } // end namespace llvm