[lldb] Simplify DumpValueObjectOptions::PointerDepth (NFC) (#117504)
[llvm-project.git] / bolt / lib / Passes / ShrinkWrapping.cpp
blob176321c58dc903fdb62e76a237f31a15c7ffba12
1 //===- bolt/Passes/ShrinkWrapping.cpp -------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the ShrinkWrapping class.
11 //===----------------------------------------------------------------------===//
13 #include "bolt/Passes/ShrinkWrapping.h"
14 #include "bolt/Passes/DataflowInfoManager.h"
15 #include "bolt/Passes/MCF.h"
16 #include "bolt/Utils/CommandLineOpts.h"
17 #include <numeric>
18 #include <optional>
19 #include <stack>
21 #define DEBUG_TYPE "shrinkwrapping"
23 using namespace llvm;
25 namespace opts {
27 extern cl::opt<bool> TimeOpts;
28 extern cl::OptionCategory BoltOptCategory;
30 static cl::opt<unsigned> ShrinkWrappingThreshold(
31 "shrink-wrapping-threshold",
32 cl::desc("Percentage of prologue execution count to use as threshold when"
33 " evaluating whether a block is cold enough to be profitable to"
34 " move eligible spills there"),
35 cl::init(30), cl::ZeroOrMore, cl::cat(BoltOptCategory));
36 } // namespace opts
38 namespace llvm {
39 namespace bolt {
41 void CalleeSavedAnalysis::analyzeSaves() {
42 ReachingDefOrUse</*Def=*/true> &RD = Info.getReachingDefs();
43 StackReachingUses &SRU = Info.getStackReachingUses();
44 auto &InsnToBB = Info.getInsnToBBMap();
45 BitVector BlacklistedRegs(BC.MRI->getNumRegs(), false);
47 LLVM_DEBUG(dbgs() << "Checking spill locations\n");
48 for (BinaryBasicBlock &BB : BF) {
49 LLVM_DEBUG(dbgs() << "\tNow at BB " << BB.getName() << "\n");
50 const MCInst *Prev = nullptr;
51 for (MCInst &Inst : BB) {
52 if (ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(Inst)) {
53 // Blacklist weird stores we don't understand
54 if ((!FIE->IsSimple || FIE->StackOffset >= 0) && FIE->IsStore &&
55 FIE->IsStoreFromReg) {
56 BlacklistedRegs.set(FIE->RegOrImm);
57 CalleeSaved.reset(FIE->RegOrImm);
58 Prev = &Inst;
59 continue;
62 if (!FIE->IsStore || !FIE->IsStoreFromReg ||
63 BlacklistedRegs[FIE->RegOrImm]) {
64 Prev = &Inst;
65 continue;
68 // If this reg is defined locally, it is not a callee-saved reg
69 if (RD.isReachedBy(FIE->RegOrImm,
70 Prev ? RD.expr_begin(*Prev) : RD.expr_begin(BB))) {
71 BlacklistedRegs.set(FIE->RegOrImm);
72 CalleeSaved.reset(FIE->RegOrImm);
73 Prev = &Inst;
74 continue;
77 // If this stack position is accessed in another function, we are
78 // probably dealing with a parameter passed in a stack -- do not mess
79 // with it
80 if (SRU.isStoreUsed(*FIE,
81 Prev ? SRU.expr_begin(*Prev) : SRU.expr_begin(BB)),
82 /*IncludeLocalAccesses=*/false) {
83 BlacklistedRegs.set(FIE->RegOrImm);
84 CalleeSaved.reset(FIE->RegOrImm);
85 Prev = &Inst;
86 continue;
89 // If this stack position is loaded elsewhere in another reg, we can't
90 // update it, so blacklist it.
91 if (SRU.isLoadedInDifferentReg(*FIE, Prev ? SRU.expr_begin(*Prev)
92 : SRU.expr_begin(BB))) {
93 BlacklistedRegs.set(FIE->RegOrImm);
94 CalleeSaved.reset(FIE->RegOrImm);
95 Prev = &Inst;
96 continue;
99 // Ignore regs with multiple saves
100 if (CalleeSaved[FIE->RegOrImm]) {
101 BlacklistedRegs.set(FIE->RegOrImm);
102 CalleeSaved.reset(FIE->RegOrImm);
103 Prev = &Inst;
104 continue;
107 CalleeSaved.set(FIE->RegOrImm);
108 SaveFIEByReg[FIE->RegOrImm] = &*FIE;
109 SavingCost[FIE->RegOrImm] += InsnToBB[&Inst]->getKnownExecutionCount();
110 BC.MIB->addAnnotation(Inst, getSaveTag(), FIE->RegOrImm, AllocatorId);
111 OffsetsByReg[FIE->RegOrImm] = FIE->StackOffset;
112 LLVM_DEBUG(dbgs() << "Logging new candidate for Callee-Saved Reg: "
113 << FIE->RegOrImm << "\n");
115 Prev = &Inst;
120 void CalleeSavedAnalysis::analyzeRestores() {
121 ReachingDefOrUse</*Def=*/false> &RU = Info.getReachingUses();
123 // Now compute all restores of these callee-saved regs
124 for (BinaryBasicBlock &BB : BF) {
125 const MCInst *Prev = nullptr;
126 for (MCInst &Inst : llvm::reverse(BB)) {
127 if (ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(Inst)) {
128 if (!FIE->IsLoad || !CalleeSaved[FIE->RegOrImm]) {
129 Prev = &Inst;
130 continue;
133 // If this reg is used locally after a restore, then we are probably
134 // not dealing with a callee-saved reg. Except if this use is by
135 // another store, but we don't cover this case yet.
136 // Also not callee-saved if this load accesses caller stack or isn't
137 // simple.
138 if (!FIE->IsSimple || FIE->StackOffset >= 0 ||
139 RU.isReachedBy(FIE->RegOrImm,
140 Prev ? RU.expr_begin(*Prev) : RU.expr_begin(BB))) {
141 CalleeSaved.reset(FIE->RegOrImm);
142 Prev = &Inst;
143 continue;
145 // If stack offsets between saves/store don't agree with each other,
146 // we don't completely understand what's happening here
147 if (FIE->StackOffset != OffsetsByReg[FIE->RegOrImm]) {
148 CalleeSaved.reset(FIE->RegOrImm);
149 LLVM_DEBUG(dbgs() << "Dismissing Callee-Saved Reg because we found a "
150 "mismatching restore: "
151 << FIE->RegOrImm << "\n");
152 Prev = &Inst;
153 continue;
156 LLVM_DEBUG(dbgs() << "Adding matching restore for: " << FIE->RegOrImm
157 << "\n");
158 if (LoadFIEByReg[FIE->RegOrImm] == nullptr)
159 LoadFIEByReg[FIE->RegOrImm] = &*FIE;
160 BC.MIB->addAnnotation(Inst, getRestoreTag(), FIE->RegOrImm,
161 AllocatorId);
162 HasRestores.set(FIE->RegOrImm);
164 Prev = &Inst;
169 std::vector<MCInst *> CalleeSavedAnalysis::getSavesByReg(uint16_t Reg) {
170 std::vector<MCInst *> Results;
171 for (BinaryBasicBlock &BB : BF)
172 for (MCInst &Inst : BB)
173 if (getSavedReg(Inst) == Reg)
174 Results.push_back(&Inst);
175 return Results;
178 std::vector<MCInst *> CalleeSavedAnalysis::getRestoresByReg(uint16_t Reg) {
179 std::vector<MCInst *> Results;
180 for (BinaryBasicBlock &BB : BF)
181 for (MCInst &Inst : BB)
182 if (getRestoredReg(Inst) == Reg)
183 Results.push_back(&Inst);
184 return Results;
187 CalleeSavedAnalysis::~CalleeSavedAnalysis() {
188 for (BinaryBasicBlock &BB : BF) {
189 for (MCInst &Inst : BB) {
190 BC.MIB->removeAnnotation(Inst, getSaveTag());
191 BC.MIB->removeAnnotation(Inst, getRestoreTag());
196 void StackLayoutModifier::blacklistRegion(int64_t Offset, int64_t Size) {
197 if (BlacklistedRegions[Offset] < Size)
198 BlacklistedRegions[Offset] = Size;
201 bool StackLayoutModifier::isRegionBlacklisted(int64_t Offset, int64_t Size) {
202 for (std::pair<const int64_t, int64_t> Elem : BlacklistedRegions)
203 if (Offset + Size > Elem.first && Offset < Elem.first + Elem.second)
204 return true;
205 return false;
208 bool StackLayoutModifier::blacklistAllInConflictWith(int64_t Offset,
209 int64_t Size) {
210 bool HasConflict = false;
211 for (auto Iter = AvailableRegions.begin(); Iter != AvailableRegions.end();) {
212 std::pair<const int64_t, int64_t> &Elem = *Iter;
213 if (Offset + Size > Elem.first && Offset < Elem.first + Elem.second &&
214 (Offset != Elem.first || Size != Elem.second)) {
215 Iter = AvailableRegions.erase(Iter);
216 HasConflict = true;
217 continue;
219 ++Iter;
221 if (HasConflict) {
222 blacklistRegion(Offset, Size);
223 return true;
225 return false;
228 void StackLayoutModifier::checkFramePointerInitialization(MCInst &Point) {
229 StackPointerTracking &SPT = Info.getStackPointerTracking();
230 if (!BC.MII->get(Point.getOpcode())
231 .hasDefOfPhysReg(Point, BC.MIB->getFramePointer(), *BC.MRI))
232 return;
234 int SPVal, FPVal;
235 std::tie(SPVal, FPVal) = *SPT.getStateBefore(Point);
236 std::pair<MCPhysReg, int64_t> FP;
238 if (FPVal != SPT.EMPTY && FPVal != SPT.SUPERPOSITION)
239 FP = std::make_pair(BC.MIB->getFramePointer(), FPVal);
240 else
241 FP = std::make_pair(0, 0);
242 std::pair<MCPhysReg, int64_t> SP;
244 if (SPVal != SPT.EMPTY && SPVal != SPT.SUPERPOSITION)
245 SP = std::make_pair(BC.MIB->getStackPointer(), SPVal);
246 else
247 SP = std::make_pair(0, 0);
249 int64_t Output;
250 if (!BC.MIB->evaluateStackOffsetExpr(Point, Output, SP, FP))
251 return;
253 // Not your regular frame pointer initialization... bail
254 if (Output != SPVal)
255 blacklistRegion(0, 0);
258 void StackLayoutModifier::checkStackPointerRestore(MCInst &Point) {
259 StackPointerTracking &SPT = Info.getStackPointerTracking();
260 if (!BC.MII->get(Point.getOpcode())
261 .hasDefOfPhysReg(Point, BC.MIB->getStackPointer(), *BC.MRI))
262 return;
263 // Check if the definition of SP comes from FP -- in this case, this
264 // value may need to be updated depending on our stack layout changes
265 bool UsesFP = llvm::any_of(BC.MIB->useOperands(Point), [&](MCOperand &Op) {
266 return Op.isReg() && Op.getReg() == BC.MIB->getFramePointer();
268 if (!UsesFP)
269 return;
271 // Setting up evaluation
272 int SPVal, FPVal;
273 std::tie(SPVal, FPVal) = *SPT.getStateBefore(Point);
274 std::pair<MCPhysReg, int64_t> FP;
276 if (FPVal != SPT.EMPTY && FPVal != SPT.SUPERPOSITION)
277 FP = std::make_pair(BC.MIB->getFramePointer(), FPVal);
278 else
279 FP = std::make_pair(0, 0);
280 std::pair<MCPhysReg, int64_t> SP;
282 if (SPVal != SPT.EMPTY && SPVal != SPT.SUPERPOSITION)
283 SP = std::make_pair(BC.MIB->getStackPointer(), SPVal);
284 else
285 SP = std::make_pair(0, 0);
287 int64_t Output;
288 if (!BC.MIB->evaluateStackOffsetExpr(Point, Output, SP, FP))
289 return;
291 // If the value is the same of FP, no need to adjust it
292 if (Output == FPVal)
293 return;
295 // If an allocation happened through FP, bail
296 if (Output <= SPVal) {
297 blacklistRegion(0, 0);
298 return;
301 // We are restoring SP to an old value based on FP. Mark it as a stack
302 // access to be fixed later.
303 BC.MIB->addAnnotation(Point, getSlotTag(), Output, AllocatorId);
306 void StackLayoutModifier::classifyStackAccesses() {
307 // Understand when stack slots are being used non-locally
308 StackReachingUses &SRU = Info.getStackReachingUses();
310 for (BinaryBasicBlock &BB : BF) {
311 const MCInst *Prev = nullptr;
312 for (MCInst &Inst : llvm::reverse(BB)) {
313 checkFramePointerInitialization(Inst);
314 checkStackPointerRestore(Inst);
315 ErrorOr<const FrameIndexEntry &> FIEX = FA.getFIEFor(Inst);
316 if (!FIEX) {
317 Prev = &Inst;
318 continue;
320 if (!FIEX->IsSimple || (FIEX->IsStore && !FIEX->IsStoreFromReg)) {
321 blacklistRegion(FIEX->StackOffset, FIEX->Size);
322 Prev = &Inst;
323 continue;
325 // If this stack position is accessed in another function, we are
326 // probably dealing with a parameter passed in a stack -- do not mess
327 // with it
328 if (SRU.isStoreUsed(*FIEX,
329 Prev ? SRU.expr_begin(*Prev) : SRU.expr_begin(BB),
330 /*IncludeLocalAccesses=*/false)) {
331 blacklistRegion(FIEX->StackOffset, FIEX->Size);
332 Prev = &Inst;
333 continue;
335 // Now we have a clear stack slot access. Check if its blacklisted or if
336 // it conflicts with another chunk.
337 if (isRegionBlacklisted(FIEX->StackOffset, FIEX->Size) ||
338 blacklistAllInConflictWith(FIEX->StackOffset, FIEX->Size)) {
339 Prev = &Inst;
340 continue;
342 // We are free to go. Add it as available stack slot which we know how
343 // to move it.
344 AvailableRegions[FIEX->StackOffset] = FIEX->Size;
345 BC.MIB->addAnnotation(Inst, getSlotTag(), FIEX->StackOffset, AllocatorId);
346 RegionToRegMap[FIEX->StackOffset].insert(FIEX->RegOrImm);
347 RegToRegionMap[FIEX->RegOrImm].insert(FIEX->StackOffset);
348 LLVM_DEBUG(dbgs() << "Adding region " << FIEX->StackOffset << " size "
349 << (int)FIEX->Size << "\n");
354 void StackLayoutModifier::classifyCFIs() {
355 std::stack<std::pair<int64_t, uint16_t>> CFIStack;
356 int64_t CfaOffset = -8;
357 uint16_t CfaReg = 7;
359 auto recordAccess = [&](MCInst *Inst, int64_t Offset) {
360 const uint16_t Reg = *BC.MRI->getLLVMRegNum(CfaReg, /*isEH=*/false);
361 if (Reg == BC.MIB->getStackPointer() || Reg == BC.MIB->getFramePointer()) {
362 BC.MIB->addAnnotation(*Inst, getSlotTag(), Offset, AllocatorId);
363 LLVM_DEBUG(dbgs() << "Recording CFI " << Offset << "\n");
364 } else {
365 IsSimple = false;
366 return;
370 for (BinaryBasicBlock *BB : BF.getLayout().blocks()) {
371 for (MCInst &Inst : *BB) {
372 if (!BC.MIB->isCFI(Inst))
373 continue;
374 const MCCFIInstruction *CFI = BF.getCFIFor(Inst);
375 switch (CFI->getOperation()) {
376 case MCCFIInstruction::OpDefCfa:
377 CfaOffset = -CFI->getOffset();
378 recordAccess(&Inst, CfaOffset);
379 [[fallthrough]];
380 case MCCFIInstruction::OpDefCfaRegister:
381 CfaReg = CFI->getRegister();
382 break;
383 case MCCFIInstruction::OpDefCfaOffset:
384 CfaOffset = -CFI->getOffset();
385 recordAccess(&Inst, CfaOffset);
386 break;
387 case MCCFIInstruction::OpOffset:
388 recordAccess(&Inst, CFI->getOffset());
389 BC.MIB->addAnnotation(Inst, getOffsetCFIRegTag(),
390 BC.MRI->getLLVMRegNum(CFI->getRegister(),
391 /*isEH=*/false),
392 AllocatorId);
393 break;
394 case MCCFIInstruction::OpSameValue:
395 BC.MIB->addAnnotation(Inst, getOffsetCFIRegTag(),
396 BC.MRI->getLLVMRegNum(CFI->getRegister(),
397 /*isEH=*/false),
398 AllocatorId);
399 break;
400 case MCCFIInstruction::OpRememberState:
401 CFIStack.push(std::make_pair(CfaOffset, CfaReg));
402 break;
403 case MCCFIInstruction::OpRestoreState: {
404 assert(!CFIStack.empty() && "Corrupt CFI stack");
405 std::pair<int64_t, uint16_t> &Elem = CFIStack.top();
406 CFIStack.pop();
407 CfaOffset = Elem.first;
408 CfaReg = Elem.second;
409 break;
411 case MCCFIInstruction::OpRelOffset:
412 case MCCFIInstruction::OpAdjustCfaOffset:
413 llvm_unreachable("Unhandled AdjustCfaOffset");
414 break;
415 default:
416 break;
422 void StackLayoutModifier::scheduleChange(
423 MCInst &Inst, StackLayoutModifier::WorklistItem Item) {
424 auto &WList = BC.MIB->getOrCreateAnnotationAs<std::vector<WorklistItem>>(
425 Inst, getTodoTag(), AllocatorId);
426 WList.push_back(Item);
429 bool StackLayoutModifier::canCollapseRegion(MCInst *DeletedPush) {
430 if (!IsSimple || !BC.MIB->isPush(*DeletedPush))
431 return false;
433 ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(*DeletedPush);
434 if (!FIE)
435 return false;
437 return canCollapseRegion(FIE->StackOffset);
440 bool StackLayoutModifier::canCollapseRegion(int64_t RegionAddr) {
441 if (!IsInitialized)
442 initialize();
443 if (!IsSimple)
444 return false;
446 if (CollapsedRegions.count(RegionAddr))
447 return true;
449 // Check if it is possible to readjust all accesses below RegionAddr
450 if (!BlacklistedRegions.empty())
451 return false;
453 return true;
456 bool StackLayoutModifier::collapseRegion(MCInst *DeletedPush) {
457 ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(*DeletedPush);
458 if (!FIE)
459 return false;
460 int64_t RegionAddr = FIE->StackOffset;
461 int64_t RegionSz = FIE->Size;
462 return collapseRegion(DeletedPush, RegionAddr, RegionSz);
465 bool StackLayoutModifier::collapseRegion(MCInst *Alloc, int64_t RegionAddr,
466 int64_t RegionSz) {
467 if (!canCollapseRegion(RegionAddr))
468 return false;
470 assert(IsInitialized);
471 StackAllocationAnalysis &SAA = Info.getStackAllocationAnalysis();
473 for (BinaryBasicBlock &BB : BF) {
474 for (MCInst &Inst : BB) {
475 if (!BC.MIB->hasAnnotation(Inst, getSlotTag()))
476 continue;
477 auto Slot =
478 BC.MIB->getAnnotationAs<decltype(FrameIndexEntry::StackOffset)>(
479 Inst, getSlotTag());
480 if (!AvailableRegions.count(Slot))
481 continue;
482 // We need to ensure this access is affected by the deleted push
483 if (!(*SAA.getStateBefore(Inst))[SAA.ExprToIdx[Alloc]])
484 continue;
486 if (BC.MIB->isCFI(Inst)) {
487 if (Slot > RegionAddr)
488 continue;
489 scheduleChange(Inst, WorklistItem(WorklistItem::AdjustCFI, RegionSz));
490 continue;
492 ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(Inst);
493 if (!FIE) {
494 if (Slot > RegionAddr)
495 continue;
496 // SP update based on frame pointer
497 scheduleChange(
498 Inst, WorklistItem(WorklistItem::AdjustLoadStoreOffset, RegionSz));
499 continue;
502 if (Slot == RegionAddr) {
503 BC.MIB->addAnnotation(Inst, "AccessesDeletedPos", 0U, AllocatorId);
504 continue;
506 if (BC.MIB->isPush(Inst) || BC.MIB->isPop(Inst))
507 continue;
509 if (FIE->StackPtrReg == BC.MIB->getStackPointer() && Slot < RegionAddr)
510 continue;
512 if (FIE->StackPtrReg == BC.MIB->getFramePointer() && Slot > RegionAddr)
513 continue;
515 scheduleChange(
516 Inst, WorklistItem(WorklistItem::AdjustLoadStoreOffset, RegionSz));
520 CollapsedRegions.insert(RegionAddr);
521 return true;
524 void StackLayoutModifier::setOffsetForCollapsedAccesses(int64_t NewOffset) {
525 for (BinaryBasicBlock &BB : BF) {
526 for (MCInst &Inst : BB) {
527 if (!BC.MIB->hasAnnotation(Inst, "AccessesDeletedPos"))
528 continue;
529 BC.MIB->removeAnnotation(Inst, "AccessesDeletedPos");
530 scheduleChange(
531 Inst, WorklistItem(WorklistItem::AdjustLoadStoreOffset, NewOffset));
536 bool StackLayoutModifier::canInsertRegion(ProgramPoint P) {
537 if (!IsInitialized)
538 initialize();
539 if (!IsSimple)
540 return false;
542 StackPointerTracking &SPT = Info.getStackPointerTracking();
543 int64_t RegionAddr = SPT.getStateBefore(P)->first;
544 if (RegionAddr == SPT.SUPERPOSITION || RegionAddr == SPT.EMPTY)
545 return false;
547 if (InsertedRegions.count(RegionAddr))
548 return true;
550 // Check if we are going to screw up stack accesses at call sites that
551 // pass parameters via stack
552 if (!BlacklistedRegions.empty())
553 return false;
555 return true;
558 bool StackLayoutModifier::insertRegion(ProgramPoint P, int64_t RegionSz) {
559 if (!canInsertRegion(P))
560 return false;
562 assert(IsInitialized);
563 StackPointerTracking &SPT = Info.getStackPointerTracking();
564 // This RegionAddr is slightly different from the one seen in collapseRegion
565 // This is the value of SP before the allocation the user wants to make.
566 int64_t RegionAddr = SPT.getStateBefore(P)->first;
567 if (RegionAddr == SPT.SUPERPOSITION || RegionAddr == SPT.EMPTY)
568 return false;
570 DominatorAnalysis<false> &DA = Info.getDominatorAnalysis();
572 for (BinaryBasicBlock &BB : BF) {
573 for (MCInst &Inst : BB) {
574 if (!BC.MIB->hasAnnotation(Inst, getSlotTag()))
575 continue;
576 auto Slot =
577 BC.MIB->getAnnotationAs<decltype(FrameIndexEntry::StackOffset)>(
578 Inst, getSlotTag());
579 if (!AvailableRegions.count(Slot))
580 continue;
582 if (!(DA.doesADominateB(P, Inst)))
583 continue;
585 if (BC.MIB->isCFI(Inst)) {
586 if (Slot >= RegionAddr)
587 continue;
588 scheduleChange(Inst, WorklistItem(WorklistItem::AdjustCFI, -RegionSz));
589 continue;
591 ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(Inst);
592 if (!FIE) {
593 if (Slot >= RegionAddr)
594 continue;
595 scheduleChange(
596 Inst, WorklistItem(WorklistItem::AdjustLoadStoreOffset, -RegionSz));
597 continue;
600 if (FIE->StackPtrReg == BC.MIB->getStackPointer() && Slot < RegionAddr)
601 continue;
602 if (FIE->StackPtrReg == BC.MIB->getFramePointer() && Slot >= RegionAddr)
603 continue;
604 if (BC.MIB->isPush(Inst) || BC.MIB->isPop(Inst))
605 continue;
606 scheduleChange(
607 Inst, WorklistItem(WorklistItem::AdjustLoadStoreOffset, -RegionSz));
611 InsertedRegions.insert(RegionAddr);
612 return true;
615 void StackLayoutModifier::performChanges() {
616 std::set<uint32_t> ModifiedCFIIndices;
617 for (BinaryBasicBlock &BB : BF) {
618 for (MCInst &Inst : llvm::reverse(BB)) {
619 if (BC.MIB->hasAnnotation(Inst, "AccessesDeletedPos")) {
620 assert(BC.MIB->isPop(Inst) || BC.MIB->isPush(Inst));
621 BC.MIB->removeAnnotation(Inst, "AccessesDeletedPos");
623 if (!BC.MIB->hasAnnotation(Inst, getTodoTag()))
624 continue;
625 auto &WList = BC.MIB->getAnnotationAs<std::vector<WorklistItem>>(
626 Inst, getTodoTag());
627 int64_t Adjustment = 0;
628 WorklistItem::ActionType AdjustmentType = WorklistItem::None;
629 for (WorklistItem &WI : WList) {
630 if (WI.Action == WorklistItem::None)
631 continue;
632 assert(WI.Action == WorklistItem::AdjustLoadStoreOffset ||
633 WI.Action == WorklistItem::AdjustCFI);
634 assert((AdjustmentType == WorklistItem::None ||
635 AdjustmentType == WI.Action) &&
636 "Conflicting actions requested at the same program point");
637 AdjustmentType = WI.Action;
638 Adjustment += WI.OffsetUpdate;
640 if (!Adjustment)
641 continue;
642 if (AdjustmentType != WorklistItem::AdjustLoadStoreOffset) {
643 assert(BC.MIB->isCFI(Inst));
644 uint32_t CFINum = Inst.getOperand(0).getImm();
645 if (ModifiedCFIIndices.count(CFINum))
646 continue;
647 ModifiedCFIIndices.insert(CFINum);
648 const MCCFIInstruction *CFI = BF.getCFIFor(Inst);
649 const MCCFIInstruction::OpType Operation = CFI->getOperation();
650 if (Operation == MCCFIInstruction::OpDefCfa ||
651 Operation == MCCFIInstruction::OpDefCfaOffset)
652 Adjustment = 0 - Adjustment;
653 LLVM_DEBUG(dbgs() << "Changing CFI offset from " << CFI->getOffset()
654 << " to " << (CFI->getOffset() + Adjustment) << "\n");
655 BF.mutateCFIOffsetFor(Inst, CFI->getOffset() + Adjustment);
656 continue;
658 int32_t SrcImm = 0;
659 MCPhysReg Reg = 0;
660 MCPhysReg StackPtrReg = 0;
661 int64_t StackOffset = 0;
662 bool IsIndexed = false;
663 bool IsLoad = false;
664 bool IsStore = false;
665 bool IsSimple = false;
666 bool IsStoreFromReg = false;
667 uint8_t Size = 0;
668 bool Success = false;
669 Success = BC.MIB->isStackAccess(Inst, IsLoad, IsStore, IsStoreFromReg,
670 Reg, SrcImm, StackPtrReg, StackOffset,
671 Size, IsSimple, IsIndexed);
672 if (!Success) {
673 // SP update based on FP value
674 Success = BC.MIB->addToImm(Inst, Adjustment, &*BC.Ctx);
675 assert(Success);
676 continue;
678 assert(Success && IsSimple && !IsIndexed && (!IsStore || IsStoreFromReg));
679 if (StackPtrReg != BC.MIB->getFramePointer())
680 Adjustment = -Adjustment;
681 if (IsLoad)
682 BC.MIB->createRestoreFromStack(Inst, StackPtrReg,
683 StackOffset + Adjustment, Reg, Size);
684 else if (IsStore)
685 BC.MIB->createSaveToStack(Inst, StackPtrReg, StackOffset + Adjustment,
686 Reg, Size);
687 LLVM_DEBUG({
688 dbgs() << "Adjusted instruction: ";
689 Inst.dump();
695 void StackLayoutModifier::initialize() {
696 classifyStackAccesses();
697 classifyCFIs();
698 IsInitialized = true;
701 std::atomic<std::uint64_t> ShrinkWrapping::SpillsMovedRegularMode{0};
702 std::atomic<std::uint64_t> ShrinkWrapping::SpillsMovedPushPopMode{0};
703 std::atomic<std::uint64_t> ShrinkWrapping::SpillsMovedDynamicCount{0};
704 std::atomic<std::uint64_t> ShrinkWrapping::SpillsFailedDynamicCount{0};
705 std::atomic<std::uint64_t> ShrinkWrapping::InstrDynamicCount{0};
706 std::atomic<std::uint64_t> ShrinkWrapping::StoreDynamicCount{0};
708 using BBIterTy = BinaryBasicBlock::iterator;
710 void ShrinkWrapping::classifyCSRUses() {
711 DominatorAnalysis<false> &DA = Info.getDominatorAnalysis();
712 StackPointerTracking &SPT = Info.getStackPointerTracking();
713 UsesByReg = std::vector<BitVector>(BC.MRI->getNumRegs(),
714 BitVector(DA.NumInstrs, false));
716 const BitVector &FPAliases = BC.MIB->getAliases(BC.MIB->getFramePointer());
717 for (BinaryBasicBlock &BB : BF) {
718 for (MCInst &Inst : BB) {
719 if (BC.MIB->isCFI(Inst))
720 continue;
721 BitVector BV = BitVector(BC.MRI->getNumRegs(), false);
722 BC.MIB->getTouchedRegs(Inst, BV);
723 BV &= CSA.CalleeSaved;
724 for (int I : BV.set_bits()) {
725 if (I == 0)
726 continue;
727 if (CSA.getSavedReg(Inst) != I && CSA.getRestoredReg(Inst) != I)
728 UsesByReg[I].set(DA.ExprToIdx[&Inst]);
730 if (!SPT.HasFramePointer || !BC.MIB->isCall(Inst))
731 continue;
732 BV = CSA.CalleeSaved;
733 BV &= FPAliases;
734 for (int I : BV.set_bits())
735 UsesByReg[I].set(DA.ExprToIdx[&Inst]);
740 void ShrinkWrapping::pruneUnwantedCSRs() {
741 BitVector ParamRegs = BC.MIB->getRegsUsedAsParams();
742 for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) {
743 if (!CSA.CalleeSaved[I])
744 continue;
745 if (ParamRegs[I]) {
746 CSA.CalleeSaved.reset(I);
747 continue;
749 if (UsesByReg[I].empty()) {
750 LLVM_DEBUG(
751 dbgs()
752 << "Dismissing Callee-Saved Reg because we found no uses of it:" << I
753 << "\n");
754 CSA.CalleeSaved.reset(I);
755 continue;
757 if (!CSA.HasRestores[I]) {
758 LLVM_DEBUG(
759 dbgs() << "Dismissing Callee-Saved Reg because it does not have "
760 "restores:"
761 << I << "\n");
762 CSA.CalleeSaved.reset(I);
767 void ShrinkWrapping::computeSaveLocations() {
768 BestSavePos = std::vector<std::vector<MCInst *>>(BC.MRI->getNumRegs());
769 ReachingInsns<true> &RI = Info.getReachingInsnsBackwards();
770 DominatorAnalysis<false> &DA = Info.getDominatorAnalysis();
771 StackPointerTracking &SPT = Info.getStackPointerTracking();
773 LLVM_DEBUG(dbgs() << "Checking save/restore possibilities\n");
774 for (BinaryBasicBlock &BB : BF) {
775 LLVM_DEBUG(dbgs() << "\tNow at BB " << BB.getName() << "\n");
777 MCInst *First = BB.begin() != BB.end() ? &*BB.begin() : nullptr;
778 if (!First)
779 continue;
781 // Use reaching instructions to detect if we are inside a loop - if we
782 // are, do not consider this BB as valid placement for saves.
783 if (RI.isInLoop(BB))
784 continue;
786 const std::pair<int, int> SPFP = *SPT.getStateBefore(*First);
787 // If we don't know stack state at this point, bail
788 if ((SPFP.first == SPT.SUPERPOSITION || SPFP.first == SPT.EMPTY) &&
789 (SPFP.second == SPT.SUPERPOSITION || SPFP.second == SPT.EMPTY))
790 continue;
792 for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) {
793 if (!CSA.CalleeSaved[I])
794 continue;
796 BitVector BBDominatedUses = BitVector(DA.NumInstrs, false);
797 for (int J : UsesByReg[I].set_bits())
798 if (DA.doesADominateB(*First, J))
799 BBDominatedUses.set(J);
800 LLVM_DEBUG(dbgs() << "\t\tBB " << BB.getName() << " dominates "
801 << BBDominatedUses.count() << " uses for reg " << I
802 << ". Total uses for reg is " << UsesByReg[I].count()
803 << "\n");
804 BBDominatedUses &= UsesByReg[I];
805 if (BBDominatedUses == UsesByReg[I]) {
806 LLVM_DEBUG(dbgs() << "\t\t\tAdded " << BB.getName()
807 << " as a save pos for " << I << "\n");
808 BestSavePos[I].push_back(First);
809 LLVM_DEBUG({
810 dbgs() << "Dominated uses are:\n";
811 for (int J : UsesByReg[I].set_bits()) {
812 dbgs() << "Idx " << J << ": ";
813 BC.printInstruction(dbgs(), *DA.Expressions[J]);
814 DA.Expressions[J]->dump();
821 BestSaveCount = std::vector<std::vector<uint64_t>>(BC.MRI->getNumRegs());
823 auto &InsnToBB = Info.getInsnToBBMap();
824 for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) {
825 if (!CSA.CalleeSaved[I])
826 continue;
828 std::stable_sort(BestSavePos[I].begin(), BestSavePos[I].end(),
829 [&](const MCInst *A, const MCInst *B) {
830 const BinaryBasicBlock *BBA = InsnToBB[A];
831 const BinaryBasicBlock *BBB = InsnToBB[B];
832 const uint64_t CountA = BBA->getKnownExecutionCount();
833 const uint64_t CountB = BBB->getKnownExecutionCount();
834 return CountB < CountA;
837 for (MCInst *Pos : BestSavePos[I]) {
838 const BinaryBasicBlock *BB = InsnToBB[Pos];
839 const uint64_t Count = BB->getKnownExecutionCount();
840 BestSaveCount[I].push_back(Count);
845 void ShrinkWrapping::computeDomOrder() {
846 DomOrder = std::vector<MCPhysReg>(BC.MRI->getNumRegs(), 0);
847 std::vector<MCPhysReg> Order;
848 for (MCPhysReg I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) {
849 Order.push_back(I);
852 DominatorAnalysis<false> &DA = Info.getDominatorAnalysis();
853 auto &InsnToBB = Info.getInsnToBBMap();
854 llvm::sort(Order, [&](const MCPhysReg &A, const MCPhysReg &B) {
855 BinaryBasicBlock *BBA =
856 BestSavePos[A].size() ? InsnToBB[BestSavePos[A].back()] : nullptr;
857 BinaryBasicBlock *BBB =
858 BestSavePos[B].size() ? InsnToBB[BestSavePos[B].back()] : nullptr;
859 if (BBA == BBB)
860 return A < B;
861 if (!BBA && BBB)
862 return false;
863 if (BBA && !BBB)
864 return true;
865 if (DA.doesADominateB(*BestSavePos[A].back(), *BestSavePos[B].back()))
866 return true;
867 if (DA.doesADominateB(*BestSavePos[B].back(), *BestSavePos[A].back()))
868 return false;
869 return A < B;
872 for (MCPhysReg I = 0, E = BC.MRI->getNumRegs(); I != E; ++I)
873 DomOrder[Order[I]] = I;
876 bool ShrinkWrapping::isBestSavePosCold(unsigned CSR, MCInst *&BestPosSave,
877 uint64_t &TotalEstimatedWin) {
878 const uint64_t CurSavingCost = CSA.SavingCost[CSR];
879 if (!CSA.CalleeSaved[CSR])
880 return false;
882 assert(BestSaveCount[CSR].size() == BestSavePos[CSR].size() &&
883 "save position vectors out of sync");
884 if (BestSaveCount[CSR].empty())
885 return false;
887 const uint64_t BestCount = BestSaveCount[CSR].back();
888 BestPosSave = BestSavePos[CSR].back();
889 if (BestCount >= (opts::ShrinkWrappingThreshold / 100.0) * CurSavingCost)
890 return false;
892 LLVM_DEBUG({
893 auto &InsnToBB = Info.getInsnToBBMap();
894 dbgs() << "Better position for saves found in func " << BF.getPrintName()
895 << " count << " << BF.getKnownExecutionCount() << "\n";
896 dbgs() << "Reg: " << CSR << "; New BB: " << InsnToBB[BestPosSave]->getName()
897 << " Freq reduction: " << (CurSavingCost - BestCount) << "\n";
900 TotalEstimatedWin = CurSavingCost - BestCount;
901 return true;
904 /// Auxiliary function used to create basic blocks for critical edges and update
905 /// the dominance frontier with these new locations
906 void ShrinkWrapping::splitFrontierCritEdges(
907 BinaryFunction *Func, SmallVector<ProgramPoint, 4> &Frontier,
908 const SmallVector<bool, 4> &IsCritEdge,
909 const SmallVector<BinaryBasicBlock *, 4> &From,
910 const SmallVector<SmallVector<BinaryBasicBlock *, 4>, 4> &To) {
911 LLVM_DEBUG(dbgs() << "splitFrontierCritEdges: Now handling func "
912 << BF.getPrintName() << "\n");
913 // For every FromBB, there might be one or more critical edges, with
914 // To[I] containing destination BBs. It's important to memorize
915 // the original size of the Frontier as we may append to it while splitting
916 // critical edges originating with blocks with multiple destinations.
917 for (size_t I = 0, IE = Frontier.size(); I < IE; ++I) {
918 if (!IsCritEdge[I])
919 continue;
920 if (To[I].empty())
921 continue;
922 BinaryBasicBlock *FromBB = From[I];
923 LLVM_DEBUG(dbgs() << " - Now handling FrontierBB " << FromBB->getName()
924 << "\n");
925 // Split edge for every DestinationBBs
926 for (size_t DI = 0, DIE = To[I].size(); DI < DIE; ++DI) {
927 BinaryBasicBlock *DestinationBB = To[I][DI];
928 LLVM_DEBUG(dbgs() << " - Dest : " << DestinationBB->getName() << "\n");
929 BinaryBasicBlock *NewBB = Func->splitEdge(FromBB, DestinationBB);
930 // Insert dummy instruction so this BB is never empty (we need this for
931 // PredictiveStackPointerTracking to work, since it annotates instructions
932 // and not BBs).
933 if (NewBB->empty()) {
934 MCInst NewInst;
935 BC.MIB->createNoop(NewInst);
936 NewBB->addInstruction(std::move(NewInst));
937 scheduleChange(&*NewBB->begin(), WorklistItem(WorklistItem::Erase, 0));
940 // Update frontier
941 ProgramPoint NewFrontierPP = ProgramPoint::getLastPointAt(*NewBB);
942 if (DI == 0) {
943 // Update frontier inplace
944 Frontier[I] = NewFrontierPP;
945 LLVM_DEBUG(dbgs() << " - Update frontier with " << NewBB->getName()
946 << '\n');
947 } else {
948 // Append new frontier to the end of the list
949 Frontier.push_back(NewFrontierPP);
950 LLVM_DEBUG(dbgs() << " - Append frontier " << NewBB->getName()
951 << '\n');
957 SmallVector<ProgramPoint, 4>
958 ShrinkWrapping::doRestorePlacement(MCInst *BestPosSave, unsigned CSR,
959 uint64_t TotalEstimatedWin) {
960 SmallVector<ProgramPoint, 4> Frontier;
961 SmallVector<bool, 4> IsCritEdge;
962 DominatorAnalysis<false> &DA = Info.getDominatorAnalysis();
964 SmallVector<BinaryBasicBlock *, 4> CritEdgesFrom;
965 SmallVector<SmallVector<BinaryBasicBlock *, 4>, 4> CritEdgesTo;
966 // In case of a critical edge, we need to create extra BBs to host restores
967 // into edges transitioning to the dominance frontier, otherwise we pull these
968 // restores to inside the dominated area.
969 Frontier = DA.getDominanceFrontierFor(*BestPosSave).takeVector();
970 LLVM_DEBUG({
971 dbgs() << "Dumping dominance frontier for ";
972 BC.printInstruction(dbgs(), *BestPosSave);
973 for (ProgramPoint &PP : Frontier)
974 if (PP.isInst())
975 BC.printInstruction(dbgs(), *PP.getInst());
976 else
977 dbgs() << PP.getBB()->getName() << "\n";
979 for (ProgramPoint &PP : Frontier) {
980 bool HasCritEdges = false;
981 if (PP.isInst() && BC.MIB->isTerminator(*PP.getInst()) &&
982 doesInstUsesCSR(*PP.getInst(), CSR)) {
983 Frontier.clear();
984 return Frontier;
986 BinaryBasicBlock *FrontierBB = Info.getParentBB(PP);
987 CritEdgesFrom.emplace_back(FrontierBB);
988 CritEdgesTo.emplace_back(0);
989 SmallVector<BinaryBasicBlock *, 4> &Dests = CritEdgesTo.back();
990 // Check for invoke instructions at the dominance frontier, which indicates
991 // the landing pad is not dominated.
992 if (PP.isInst() && BC.MIB->isInvoke(*PP.getInst())) {
993 Frontier.clear();
994 return Frontier;
996 doForAllSuccs(*FrontierBB, [&](ProgramPoint P) {
997 if (!DA.doesADominateB(*BestPosSave, P)) {
998 Dests.emplace_back(Info.getParentBB(P));
999 return;
1001 HasCritEdges = true;
1003 IsCritEdge.push_back(HasCritEdges);
1005 // Restores cannot be placed in empty BBs because we have a dataflow
1006 // analysis that depends on insertions happening before real instructions
1007 // (PredictiveStackPointerTracking). Detect now for empty BBs and add a
1008 // dummy nop that is scheduled to be removed later.
1009 bool InvalidateRequired = false;
1010 for (BinaryBasicBlock *BB : BF.getLayout().blocks()) {
1011 if (BB->size() != 0)
1012 continue;
1013 MCInst NewInst;
1014 BC.MIB->createNoop(NewInst);
1015 auto II = BB->addInstruction(std::move(NewInst));
1016 scheduleChange(&*II, WorklistItem(WorklistItem::Erase, 0));
1017 InvalidateRequired = true;
1019 if (std::accumulate(IsCritEdge.begin(), IsCritEdge.end(), 0)) {
1020 LLVM_DEBUG({
1021 dbgs() << "Now detected critical edges in the following frontier:\n";
1022 for (ProgramPoint &PP : Frontier) {
1023 if (PP.isBB()) {
1024 dbgs() << " BB: " << PP.getBB()->getName() << "\n";
1025 } else {
1026 dbgs() << " Inst: ";
1027 PP.getInst()->dump();
1031 splitFrontierCritEdges(&BF, Frontier, IsCritEdge, CritEdgesFrom,
1032 CritEdgesTo);
1033 InvalidateRequired = true;
1035 if (InvalidateRequired) {
1036 // BitVectors that represent all insns of the function are invalid now
1037 // since we changed BBs/Insts. Re-run steps that depend on pointers being
1038 // valid
1039 Info.invalidateAll();
1040 classifyCSRUses();
1042 return Frontier;
1045 bool ShrinkWrapping::validatePushPopsMode(unsigned CSR, MCInst *BestPosSave,
1046 int64_t SaveOffset) {
1047 if (FA.requiresAlignment(BF)) {
1048 LLVM_DEBUG({
1049 dbgs() << "Reg " << CSR
1050 << " is not using push/pops due to function "
1051 "alignment requirements.\n";
1053 return false;
1055 if (FA.hasStackArithmetic(BF)) {
1056 LLVM_DEBUG({
1057 dbgs() << "Reg " << CSR
1058 << " is not using push/pops due to function "
1059 "taking the address of a stack position.\n";
1061 return false;
1063 for (MCInst *Save : CSA.getSavesByReg(CSR)) {
1064 if (!SLM.canCollapseRegion(Save)) {
1065 LLVM_DEBUG(dbgs() << "Reg " << CSR << " cannot collapse region.\n");
1066 return false;
1069 // Abort if one of the restores for this CSR is not a POP.
1070 for (MCInst *Load : CSA.getRestoresByReg(CSR)) {
1071 if (!BC.MIB->isPop(*Load)) {
1072 LLVM_DEBUG(dbgs() << "Reg " << CSR << " has a mismatching restore.\n");
1073 return false;
1077 StackPointerTracking &SPT = Info.getStackPointerTracking();
1078 // Abort if we are inserting a push into an entry BB (offset -8) and this
1079 // func sets up a frame pointer.
1080 if (!SLM.canInsertRegion(BestPosSave) || SaveOffset == SPT.SUPERPOSITION ||
1081 SaveOffset == SPT.EMPTY || (SaveOffset == -8 && SPT.HasFramePointer)) {
1082 LLVM_DEBUG({
1083 dbgs() << "Reg " << CSR
1084 << " cannot insert region or we are "
1085 "trying to insert a push into entry bb.\n";
1087 return false;
1089 return true;
1092 SmallVector<ProgramPoint, 4> ShrinkWrapping::fixPopsPlacements(
1093 const SmallVector<ProgramPoint, 4> &RestorePoints, int64_t SaveOffset,
1094 unsigned CSR) {
1095 SmallVector<ProgramPoint, 4> FixedRestorePoints = RestorePoints;
1096 // Moving pop locations to the correct sp offset
1097 ReachingInsns<true> &RI = Info.getReachingInsnsBackwards();
1098 StackPointerTracking &SPT = Info.getStackPointerTracking();
1099 for (ProgramPoint &PP : FixedRestorePoints) {
1100 BinaryBasicBlock *BB = Info.getParentBB(PP);
1101 bool Found = false;
1102 if (SPT.getStateAt(ProgramPoint::getLastPointAt(*BB))->first ==
1103 SaveOffset) {
1104 BitVector BV = *RI.getStateAt(ProgramPoint::getLastPointAt(*BB));
1105 BV &= UsesByReg[CSR];
1106 if (!BV.any()) {
1107 Found = true;
1108 PP = BB;
1109 continue;
1112 for (MCInst &Inst : llvm::reverse(*BB)) {
1113 if (SPT.getStateBefore(Inst)->first == SaveOffset) {
1114 BitVector BV = *RI.getStateAt(Inst);
1115 BV &= UsesByReg[CSR];
1116 if (!BV.any()) {
1117 Found = true;
1118 PP = &Inst;
1119 break;
1123 if (!Found) {
1124 LLVM_DEBUG({
1125 dbgs() << "Could not find restore insertion point for " << CSR
1126 << ", falling back to load/store mode\n";
1128 FixedRestorePoints.clear();
1129 return FixedRestorePoints;
1132 return FixedRestorePoints;
1135 void ShrinkWrapping::scheduleOldSaveRestoresRemoval(unsigned CSR,
1136 bool UsePushPops) {
1138 for (BinaryBasicBlock *BB : BF.getLayout().blocks()) {
1139 std::vector<MCInst *> CFIs;
1140 for (MCInst &Inst : llvm::reverse(*BB)) {
1141 if (BC.MIB->isCFI(Inst)) {
1142 // Delete all offset CFIs related to this CSR
1143 if (SLM.getOffsetCFIReg(Inst) == CSR) {
1144 HasDeletedOffsetCFIs[CSR] = true;
1145 scheduleChange(&Inst, WorklistItem(WorklistItem::Erase, CSR));
1146 continue;
1148 CFIs.push_back(&Inst);
1149 continue;
1152 uint16_t SavedReg = CSA.getSavedReg(Inst);
1153 uint16_t RestoredReg = CSA.getRestoredReg(Inst);
1154 if (SavedReg != CSR && RestoredReg != CSR) {
1155 CFIs.clear();
1156 continue;
1159 scheduleChange(&Inst, WorklistItem(UsePushPops
1160 ? WorklistItem::Erase
1161 : WorklistItem::ChangeToAdjustment,
1162 CSR));
1164 // Delete associated CFIs
1165 const bool RecordDeletedPushCFIs =
1166 SavedReg == CSR && DeletedPushCFIs[CSR].empty();
1167 const bool RecordDeletedPopCFIs =
1168 RestoredReg == CSR && DeletedPopCFIs[CSR].empty();
1169 for (MCInst *CFI : CFIs) {
1170 const MCCFIInstruction *MCCFI = BF.getCFIFor(*CFI);
1171 // Do not touch these...
1172 if (MCCFI->getOperation() == MCCFIInstruction::OpRestoreState ||
1173 MCCFI->getOperation() == MCCFIInstruction::OpRememberState)
1174 continue;
1175 scheduleChange(CFI, WorklistItem(WorklistItem::Erase, CSR));
1176 if (RecordDeletedPushCFIs) {
1177 // Do not record this to be replayed later because we are going to
1178 // rebuild it.
1179 if (MCCFI->getOperation() == MCCFIInstruction::OpDefCfaOffset)
1180 continue;
1181 DeletedPushCFIs[CSR].push_back(CFI->getOperand(0).getImm());
1183 if (RecordDeletedPopCFIs) {
1184 if (MCCFI->getOperation() == MCCFIInstruction::OpDefCfaOffset)
1185 continue;
1186 DeletedPopCFIs[CSR].push_back(CFI->getOperand(0).getImm());
1189 CFIs.clear();
1194 bool ShrinkWrapping::doesInstUsesCSR(const MCInst &Inst, uint16_t CSR) {
1195 if (BC.MIB->isCFI(Inst) || CSA.getSavedReg(Inst) == CSR ||
1196 CSA.getRestoredReg(Inst) == CSR)
1197 return false;
1198 BitVector BV = BitVector(BC.MRI->getNumRegs(), false);
1199 BC.MIB->getTouchedRegs(Inst, BV);
1200 return BV[CSR];
1203 void ShrinkWrapping::scheduleSaveRestoreInsertions(
1204 unsigned CSR, MCInst *BestPosSave,
1205 SmallVector<ProgramPoint, 4> &RestorePoints, bool UsePushPops) {
1206 auto &InsnToBB = Info.getInsnToBBMap();
1207 const FrameIndexEntry *FIESave = CSA.SaveFIEByReg[CSR];
1208 const FrameIndexEntry *FIELoad = CSA.LoadFIEByReg[CSR];
1209 assert(FIESave && FIELoad && "Invalid CSR");
1211 LLVM_DEBUG({
1212 dbgs() << "Scheduling save insertion at: ";
1213 BestPosSave->dump();
1216 scheduleChange(BestPosSave,
1217 UsePushPops ? WorklistItem::InsertPushOrPop
1218 : WorklistItem::InsertLoadOrStore,
1219 *FIESave, CSR);
1221 for (ProgramPoint &PP : RestorePoints) {
1222 BinaryBasicBlock *FrontierBB = Info.getParentBB(PP);
1223 LLVM_DEBUG({
1224 dbgs() << "Scheduling restore insertion at: ";
1225 if (PP.isInst())
1226 PP.getInst()->dump();
1227 else
1228 dbgs() << PP.getBB()->getName() << "\n";
1230 MCInst *Term =
1231 FrontierBB->getTerminatorBefore(PP.isInst() ? PP.getInst() : nullptr);
1232 if (Term)
1233 PP = Term;
1234 bool PrecededByPrefix = false;
1235 if (PP.isInst()) {
1236 auto Iter = FrontierBB->findInstruction(PP.getInst());
1237 if (Iter != FrontierBB->end() && Iter != FrontierBB->begin()) {
1238 --Iter;
1239 PrecededByPrefix = BC.MIB->isPrefix(*Iter);
1242 if (PP.isInst() &&
1243 (doesInstUsesCSR(*PP.getInst(), CSR) || PrecededByPrefix)) {
1244 assert(!InsnToBB[PP.getInst()]->hasTerminatorAfter(PP.getInst()) &&
1245 "cannot move to end of bb");
1246 scheduleChange(InsnToBB[PP.getInst()],
1247 UsePushPops ? WorklistItem::InsertPushOrPop
1248 : WorklistItem::InsertLoadOrStore,
1249 *FIELoad, CSR);
1250 continue;
1252 scheduleChange(PP,
1253 UsePushPops ? WorklistItem::InsertPushOrPop
1254 : WorklistItem::InsertLoadOrStore,
1255 *FIELoad, CSR);
1259 void ShrinkWrapping::moveSaveRestores() {
1260 bool DisablePushPopMode = false;
1261 bool UsedPushPopMode = false;
1262 // Keeps info about successfully moved regs: reg index, save position and
1263 // save size
1264 std::vector<std::tuple<unsigned, MCInst *, size_t>> MovedRegs;
1265 uint64_t TotalEstimatedWin = 0;
1267 computeDomOrder();
1268 for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) {
1269 MCInst *BestPosSave = nullptr;
1270 uint64_t EstimatedWin = 0;
1271 SmallVector<ProgramPoint, 4> RestorePoints;
1272 while (RestorePoints.empty() &&
1273 isBestSavePosCold(I, BestPosSave, EstimatedWin)) {
1274 RestorePoints = doRestorePlacement(BestPosSave, I, EstimatedWin);
1275 if (RestorePoints.empty()) {
1276 LLVM_DEBUG({
1277 dbgs() << "Dropping opportunity because restore placement failed"
1278 " -- total est. freq reduc: "
1279 << EstimatedWin << ". Will try "
1280 << (BestSaveCount[I].size() - 1) << " more times.\n";
1282 BestSaveCount[I].pop_back();
1283 BestSavePos[I].pop_back();
1284 computeDomOrder();
1287 if (RestorePoints.empty()) {
1288 SpillsFailedDynamicCount += EstimatedWin;
1289 continue;
1292 const FrameIndexEntry *FIESave = CSA.SaveFIEByReg[I];
1293 const FrameIndexEntry *FIELoad = CSA.LoadFIEByReg[I];
1294 (void)FIELoad;
1295 assert(FIESave && FIELoad);
1296 StackPointerTracking &SPT = Info.getStackPointerTracking();
1297 const std::pair<int, int> SPFP = *SPT.getStateBefore(*BestPosSave);
1298 int SaveOffset = SPFP.first;
1299 uint8_t SaveSize = FIESave->Size;
1301 // If we don't know stack state at this point, bail
1302 if ((SPFP.first == SPT.SUPERPOSITION || SPFP.first == SPT.EMPTY) &&
1303 (SPFP.second == SPT.SUPERPOSITION || SPFP.second == SPT.EMPTY)) {
1304 SpillsFailedDynamicCount += EstimatedWin;
1305 continue;
1308 // Operation mode: if true, will insert push/pops instead of loads/restores
1309 bool UsePushPops = validatePushPopsMode(I, BestPosSave, SaveOffset);
1311 if (UsePushPops) {
1312 SmallVector<ProgramPoint, 4> FixedRestorePoints =
1313 fixPopsPlacements(RestorePoints, SaveOffset, I);
1314 if (FixedRestorePoints.empty())
1315 UsePushPops = false;
1316 else
1317 RestorePoints = FixedRestorePoints;
1320 // Disable push-pop mode for all CSRs in this function
1321 if (!UsePushPops)
1322 DisablePushPopMode = true;
1323 else
1324 UsedPushPopMode = true;
1326 scheduleOldSaveRestoresRemoval(I, UsePushPops);
1327 scheduleSaveRestoreInsertions(I, BestPosSave, RestorePoints, UsePushPops);
1328 MovedRegs.emplace_back(std::make_tuple(I, BestPosSave, SaveSize));
1329 TotalEstimatedWin += EstimatedWin;
1332 // Revert push-pop mode if it failed for a single CSR
1333 if (DisablePushPopMode && UsedPushPopMode) {
1334 UsedPushPopMode = false;
1335 for (BinaryBasicBlock &BB : BF) {
1336 auto WRI = Todo.find(&BB);
1337 if (WRI != Todo.end()) {
1338 std::vector<WorklistItem> &TodoList = WRI->second;
1339 for (WorklistItem &Item : TodoList)
1340 if (Item.Action == WorklistItem::InsertPushOrPop)
1341 Item.Action = WorklistItem::InsertLoadOrStore;
1343 for (MCInst &Inst : llvm::reverse(BB)) {
1344 auto TodoList = BC.MIB->tryGetAnnotationAs<std::vector<WorklistItem>>(
1345 Inst, getAnnotationIndex());
1346 if (!TodoList)
1347 continue;
1348 bool isCFI = BC.MIB->isCFI(Inst);
1349 for (WorklistItem &Item : *TodoList) {
1350 if (Item.Action == WorklistItem::InsertPushOrPop)
1351 Item.Action = WorklistItem::InsertLoadOrStore;
1352 if (!isCFI && Item.Action == WorklistItem::Erase)
1353 Item.Action = WorklistItem::ChangeToAdjustment;
1358 SpillsMovedDynamicCount += TotalEstimatedWin;
1360 // Update statistics
1361 if (!UsedPushPopMode) {
1362 SpillsMovedRegularMode += MovedRegs.size();
1363 return;
1366 // Schedule modifications to stack-accessing instructions via
1367 // StackLayoutModifier.
1368 SpillsMovedPushPopMode += MovedRegs.size();
1369 for (std::tuple<unsigned, MCInst *, size_t> &I : MovedRegs) {
1370 unsigned RegNdx;
1371 MCInst *SavePos;
1372 size_t SaveSize;
1373 std::tie(RegNdx, SavePos, SaveSize) = I;
1374 for (MCInst *Save : CSA.getSavesByReg(RegNdx))
1375 SLM.collapseRegion(Save);
1376 SLM.insertRegion(SavePos, SaveSize);
1380 namespace {
1381 /// Helper function to identify whether two basic blocks created by splitting
1382 /// a critical edge have the same contents.
1383 bool isIdenticalSplitEdgeBB(const BinaryContext &BC, const BinaryBasicBlock &A,
1384 const BinaryBasicBlock &B) {
1385 if (A.succ_size() != B.succ_size())
1386 return false;
1387 if (A.succ_size() != 1)
1388 return false;
1390 if (*A.succ_begin() != *B.succ_begin())
1391 return false;
1393 if (A.size() != B.size())
1394 return false;
1396 // Compare instructions
1397 auto I = A.begin(), E = A.end();
1398 auto OtherI = B.begin(), OtherE = B.end();
1399 while (I != E && OtherI != OtherE) {
1400 if (I->getOpcode() != OtherI->getOpcode())
1401 return false;
1402 if (!BC.MIB->equals(*I, *OtherI, [](const MCSymbol *A, const MCSymbol *B) {
1403 return true;
1405 return false;
1406 ++I;
1407 ++OtherI;
1409 return true;
1411 } // namespace
1413 bool ShrinkWrapping::foldIdenticalSplitEdges() {
1414 bool Changed = false;
1415 for (auto Iter = BF.begin(); Iter != BF.end(); ++Iter) {
1416 BinaryBasicBlock &BB = *Iter;
1417 if (!BB.getName().starts_with(".LSplitEdge"))
1418 continue;
1419 for (BinaryBasicBlock &RBB : llvm::reverse(BF)) {
1420 if (&RBB == &BB)
1421 break;
1422 if (!RBB.getName().starts_with(".LSplitEdge") || !RBB.isValid() ||
1423 !isIdenticalSplitEdgeBB(BC, *Iter, RBB))
1424 continue;
1425 assert(RBB.pred_size() == 1 && "Invalid split edge BB");
1426 BinaryBasicBlock *Pred = *RBB.pred_begin();
1427 uint64_t OrigCount = Pred->branch_info_begin()->Count;
1428 uint64_t OrigMispreds = Pred->branch_info_begin()->MispredictedCount;
1429 BF.replaceJumpTableEntryIn(Pred, &RBB, &BB);
1430 Pred->replaceSuccessor(&RBB, &BB, OrigCount, OrigMispreds);
1431 Changed = true;
1432 // Remove the block from CFG
1433 RBB.markValid(false);
1437 return Changed;
1440 namespace {
1442 // A special StackPointerTracking that compensates for our future plans
1443 // in removing/adding insn.
1444 class PredictiveStackPointerTracking
1445 : public StackPointerTrackingBase<PredictiveStackPointerTracking> {
1446 friend class DataflowAnalysis<PredictiveStackPointerTracking,
1447 std::pair<int, int>>;
1448 decltype(ShrinkWrapping::Todo) &TodoMap;
1449 DataflowInfoManager &Info;
1451 std::optional<unsigned> AnnotationIndex;
1453 protected:
1454 void compNextAux(const MCInst &Point,
1455 const std::vector<ShrinkWrapping::WorklistItem> &TodoItems,
1456 std::pair<int, int> &Res) {
1457 for (const ShrinkWrapping::WorklistItem &Item : TodoItems) {
1458 if (Item.Action == ShrinkWrapping::WorklistItem::Erase &&
1459 BC.MIB->isPush(Point)) {
1460 Res.first += BC.MIB->getPushSize(Point);
1461 continue;
1463 if (Item.Action == ShrinkWrapping::WorklistItem::Erase &&
1464 BC.MIB->isPop(Point)) {
1465 Res.first -= BC.MIB->getPopSize(Point);
1466 continue;
1468 if (Item.Action == ShrinkWrapping::WorklistItem::InsertPushOrPop &&
1469 Item.FIEToInsert.IsStore) {
1470 Res.first -= Item.FIEToInsert.Size;
1471 continue;
1473 if (Item.Action == ShrinkWrapping::WorklistItem::InsertPushOrPop &&
1474 Item.FIEToInsert.IsLoad) {
1475 Res.first += Item.FIEToInsert.Size;
1476 continue;
1481 std::pair<int, int> computeNext(const MCInst &Point,
1482 const std::pair<int, int> &Cur) {
1483 std::pair<int, int> Res =
1484 StackPointerTrackingBase<PredictiveStackPointerTracking>::computeNext(
1485 Point, Cur);
1486 if (Res.first == StackPointerTracking::SUPERPOSITION ||
1487 Res.first == StackPointerTracking::EMPTY)
1488 return Res;
1489 auto TodoItems =
1490 BC.MIB->tryGetAnnotationAs<std::vector<ShrinkWrapping::WorklistItem>>(
1491 Point, ShrinkWrapping::getAnnotationName());
1492 if (TodoItems)
1493 compNextAux(Point, *TodoItems, Res);
1494 auto &InsnToBBMap = Info.getInsnToBBMap();
1495 if (&*InsnToBBMap[&Point]->rbegin() != &Point)
1496 return Res;
1497 auto WRI = TodoMap.find(InsnToBBMap[&Point]);
1498 if (WRI == TodoMap.end())
1499 return Res;
1500 compNextAux(Point, WRI->second, Res);
1501 return Res;
1504 StringRef getAnnotationName() const {
1505 return StringRef("PredictiveStackPointerTracking");
1508 public:
1509 PredictiveStackPointerTracking(BinaryFunction &BF,
1510 decltype(ShrinkWrapping::Todo) &TodoMap,
1511 DataflowInfoManager &Info,
1512 MCPlusBuilder::AllocatorIdTy AllocatorId = 0)
1513 : StackPointerTrackingBase<PredictiveStackPointerTracking>(BF,
1514 AllocatorId),
1515 TodoMap(TodoMap), Info(Info) {}
1517 void run() {
1518 StackPointerTrackingBase<PredictiveStackPointerTracking>::run();
1522 } // end anonymous namespace
1524 void ShrinkWrapping::insertUpdatedCFI(unsigned CSR, int SPValPush,
1525 int SPValPop) {
1526 MCInst *SavePoint = nullptr;
1527 for (BinaryBasicBlock &BB : BF) {
1528 for (MCInst &Inst : llvm::reverse(BB)) {
1529 int32_t SrcImm = 0;
1530 MCPhysReg Reg = 0;
1531 MCPhysReg StackPtrReg = 0;
1532 int64_t StackOffset = 0;
1533 bool IsIndexed = false;
1534 bool IsLoad = false;
1535 bool IsStore = false;
1536 bool IsSimple = false;
1537 bool IsStoreFromReg = false;
1538 uint8_t Size = 0;
1539 if (!BC.MIB->isStackAccess(Inst, IsLoad, IsStore, IsStoreFromReg, Reg,
1540 SrcImm, StackPtrReg, StackOffset, Size,
1541 IsSimple, IsIndexed))
1542 continue;
1543 if (Reg != CSR || !IsStore || !IsSimple)
1544 continue;
1545 SavePoint = &Inst;
1546 break;
1548 if (SavePoint)
1549 break;
1551 assert(SavePoint);
1552 LLVM_DEBUG({
1553 dbgs() << "Now using as save point for reg " << CSR << " :";
1554 SavePoint->dump();
1556 bool PrevAffectedZone = false;
1557 BinaryBasicBlock *PrevBB = nullptr;
1558 DominatorAnalysis<false> &DA = Info.getDominatorAnalysis();
1559 for (BinaryBasicBlock *BB : BF.getLayout().blocks()) {
1560 if (BB->size() == 0)
1561 continue;
1562 const bool InAffectedZoneAtEnd = DA.count(*BB->rbegin(), *SavePoint);
1563 const bool InAffectedZoneAtBegin =
1564 (*DA.getStateBefore(*BB->begin()))[DA.ExprToIdx[SavePoint]];
1565 bool InAffectedZone = InAffectedZoneAtBegin;
1566 for (auto InstIter = BB->begin(); InstIter != BB->end(); ++InstIter) {
1567 const bool CurZone = DA.count(*InstIter, *SavePoint);
1568 if (InAffectedZone != CurZone) {
1569 auto InsertionIter = InstIter;
1570 ++InsertionIter;
1571 InAffectedZone = CurZone;
1572 if (InAffectedZone)
1573 InstIter = insertCFIsForPushOrPop(*BB, InsertionIter, CSR, true, 0,
1574 SPValPop);
1575 else
1576 InstIter = insertCFIsForPushOrPop(*BB, InsertionIter, CSR, false, 0,
1577 SPValPush);
1578 --InstIter;
1581 // Are we at the first basic block or hot-cold split point?
1582 if (!PrevBB || (BF.isSplit() && BB->isCold() != PrevBB->isCold())) {
1583 if (InAffectedZoneAtBegin)
1584 insertCFIsForPushOrPop(*BB, BB->begin(), CSR, true, 0, SPValPush);
1585 } else if (InAffectedZoneAtBegin != PrevAffectedZone) {
1586 if (InAffectedZoneAtBegin)
1587 insertCFIsForPushOrPop(*PrevBB, PrevBB->end(), CSR, true, 0, SPValPush);
1588 else
1589 insertCFIsForPushOrPop(*PrevBB, PrevBB->end(), CSR, false, 0, SPValPop);
1591 PrevAffectedZone = InAffectedZoneAtEnd;
1592 PrevBB = BB;
1596 void ShrinkWrapping::rebuildCFIForSP() {
1597 for (BinaryBasicBlock &BB : BF) {
1598 for (MCInst &Inst : BB) {
1599 if (!BC.MIB->isCFI(Inst))
1600 continue;
1601 const MCCFIInstruction *CFI = BF.getCFIFor(Inst);
1602 if (CFI->getOperation() == MCCFIInstruction::OpDefCfaOffset)
1603 BC.MIB->addAnnotation(Inst, "DeleteMe", 0U, AllocatorId);
1607 int PrevSPVal = -8;
1608 BinaryBasicBlock *PrevBB = nullptr;
1609 StackPointerTracking &SPT = Info.getStackPointerTracking();
1610 for (BinaryBasicBlock *BB : BF.getLayout().blocks()) {
1611 if (BB->size() == 0)
1612 continue;
1613 const int SPValAtEnd = SPT.getStateAt(*BB->rbegin())->first;
1614 const int SPValAtBegin = SPT.getStateBefore(*BB->begin())->first;
1615 int SPVal = SPValAtBegin;
1616 for (auto Iter = BB->begin(); Iter != BB->end(); ++Iter) {
1617 const int CurVal = SPT.getStateAt(*Iter)->first;
1618 if (SPVal != CurVal) {
1619 auto InsertionIter = Iter;
1620 ++InsertionIter;
1621 Iter = BF.addCFIInstruction(
1622 BB, InsertionIter,
1623 MCCFIInstruction::cfiDefCfaOffset(nullptr, -CurVal));
1624 SPVal = CurVal;
1627 if (BF.isSplit() && PrevBB && BB->isCold() != PrevBB->isCold())
1628 BF.addCFIInstruction(
1629 BB, BB->begin(),
1630 MCCFIInstruction::cfiDefCfaOffset(nullptr, -SPValAtBegin));
1631 else if (SPValAtBegin != PrevSPVal)
1632 BF.addCFIInstruction(
1633 PrevBB, PrevBB->end(),
1634 MCCFIInstruction::cfiDefCfaOffset(nullptr, -SPValAtBegin));
1635 PrevSPVal = SPValAtEnd;
1636 PrevBB = BB;
1639 for (BinaryBasicBlock &BB : BF)
1640 for (auto I = BB.begin(); I != BB.end();)
1641 if (BC.MIB->hasAnnotation(*I, "DeleteMe"))
1642 I = BB.eraseInstruction(I);
1643 else
1644 ++I;
1647 Expected<MCInst> ShrinkWrapping::createStackAccess(int SPVal, int FPVal,
1648 const FrameIndexEntry &FIE,
1649 bool CreatePushOrPop) {
1650 MCInst NewInst;
1651 if (SPVal != StackPointerTracking::SUPERPOSITION &&
1652 SPVal != StackPointerTracking::EMPTY) {
1653 if (FIE.IsLoad) {
1654 BC.MIB->createRestoreFromStack(NewInst, BC.MIB->getStackPointer(),
1655 FIE.StackOffset - SPVal, FIE.RegOrImm,
1656 FIE.Size);
1657 } else {
1658 BC.MIB->createSaveToStack(NewInst, BC.MIB->getStackPointer(),
1659 FIE.StackOffset - SPVal, FIE.RegOrImm,
1660 FIE.Size);
1662 if (CreatePushOrPop)
1663 BC.MIB->changeToPushOrPop(NewInst);
1664 return NewInst;
1666 assert(FPVal != StackPointerTracking::SUPERPOSITION &&
1667 FPVal != StackPointerTracking::EMPTY);
1669 if (FIE.IsLoad) {
1670 BC.MIB->createRestoreFromStack(NewInst, BC.MIB->getFramePointer(),
1671 FIE.StackOffset - FPVal, FIE.RegOrImm,
1672 FIE.Size);
1673 } else {
1674 BC.MIB->createSaveToStack(NewInst, BC.MIB->getFramePointer(),
1675 FIE.StackOffset - FPVal, FIE.RegOrImm, FIE.Size);
1677 return NewInst;
1680 void ShrinkWrapping::updateCFIInstOffset(MCInst &Inst, int64_t NewOffset) {
1681 const MCCFIInstruction *CFI = BF.getCFIFor(Inst);
1682 if (UpdatedCFIs.count(CFI))
1683 return;
1685 switch (CFI->getOperation()) {
1686 case MCCFIInstruction::OpDefCfa:
1687 case MCCFIInstruction::OpDefCfaRegister:
1688 case MCCFIInstruction::OpDefCfaOffset:
1689 CFI = BF.mutateCFIOffsetFor(Inst, -NewOffset);
1690 break;
1691 case MCCFIInstruction::OpOffset:
1692 default:
1693 break;
1696 UpdatedCFIs.insert(CFI);
1699 BBIterTy ShrinkWrapping::insertCFIsForPushOrPop(BinaryBasicBlock &BB,
1700 BBIterTy Pos, unsigned Reg,
1701 bool isPush, int Sz,
1702 int64_t NewOffset) {
1703 if (isPush) {
1704 for (uint32_t Idx : DeletedPushCFIs[Reg]) {
1705 Pos = BF.addCFIPseudo(&BB, Pos, Idx);
1706 updateCFIInstOffset(*Pos++, NewOffset);
1708 if (HasDeletedOffsetCFIs[Reg]) {
1709 Pos = BF.addCFIInstruction(
1710 &BB, Pos,
1711 MCCFIInstruction::createOffset(
1712 nullptr, BC.MRI->getDwarfRegNum(Reg, false), NewOffset));
1713 ++Pos;
1715 } else {
1716 for (uint32_t Idx : DeletedPopCFIs[Reg]) {
1717 Pos = BF.addCFIPseudo(&BB, Pos, Idx);
1718 updateCFIInstOffset(*Pos++, NewOffset);
1720 if (HasDeletedOffsetCFIs[Reg]) {
1721 Pos = BF.addCFIInstruction(
1722 &BB, Pos,
1723 MCCFIInstruction::createSameValue(
1724 nullptr, BC.MRI->getDwarfRegNum(Reg, false)));
1725 ++Pos;
1728 return Pos;
1731 Expected<BBIterTy> ShrinkWrapping::processInsertion(BBIterTy InsertionPoint,
1732 BinaryBasicBlock *CurBB,
1733 const WorklistItem &Item,
1734 int64_t SPVal,
1735 int64_t FPVal) {
1736 // Trigger CFI reconstruction for this CSR if necessary - writing to
1737 // PushOffsetByReg/PopOffsetByReg *will* trigger CFI update
1738 if ((Item.FIEToInsert.IsStore &&
1739 !DeletedPushCFIs[Item.AffectedReg].empty()) ||
1740 (Item.FIEToInsert.IsLoad && !DeletedPopCFIs[Item.AffectedReg].empty()) ||
1741 HasDeletedOffsetCFIs[Item.AffectedReg]) {
1742 if (Item.Action == WorklistItem::InsertPushOrPop) {
1743 if (Item.FIEToInsert.IsStore)
1744 PushOffsetByReg[Item.AffectedReg] = SPVal - Item.FIEToInsert.Size;
1745 else
1746 PopOffsetByReg[Item.AffectedReg] = SPVal;
1747 } else {
1748 if (Item.FIEToInsert.IsStore)
1749 PushOffsetByReg[Item.AffectedReg] = Item.FIEToInsert.StackOffset;
1750 else
1751 PopOffsetByReg[Item.AffectedReg] = Item.FIEToInsert.StackOffset;
1755 LLVM_DEBUG({
1756 dbgs() << "Creating stack access with SPVal = " << SPVal
1757 << "; stack offset = " << Item.FIEToInsert.StackOffset
1758 << " Is push = " << (Item.Action == WorklistItem::InsertPushOrPop)
1759 << "\n";
1761 Expected<MCInst> NewInstOrErr =
1762 createStackAccess(SPVal, FPVal, Item.FIEToInsert,
1763 Item.Action == WorklistItem::InsertPushOrPop);
1764 if (auto E = NewInstOrErr.takeError())
1765 return Error(std::move(E));
1766 MCInst &NewInst = *NewInstOrErr;
1767 if (InsertionPoint != CurBB->end()) {
1768 LLVM_DEBUG({
1769 dbgs() << "Adding before Inst: ";
1770 InsertionPoint->dump();
1771 dbgs() << "the following inst: ";
1772 NewInst.dump();
1774 BBIterTy Iter =
1775 CurBB->insertInstruction(InsertionPoint, std::move(NewInst));
1776 return ++Iter;
1778 CurBB->addInstruction(std::move(NewInst));
1779 LLVM_DEBUG(dbgs() << "Adding to BB!\n");
1780 return CurBB->end();
1783 Expected<BBIterTy> ShrinkWrapping::processInsertionsList(
1784 BBIterTy InsertionPoint, BinaryBasicBlock *CurBB,
1785 std::vector<WorklistItem> &TodoList, int64_t SPVal, int64_t FPVal) {
1786 bool HasInsertions = llvm::any_of(TodoList, [&](WorklistItem &Item) {
1787 return Item.Action == WorklistItem::InsertLoadOrStore ||
1788 Item.Action == WorklistItem::InsertPushOrPop;
1791 if (!HasInsertions)
1792 return InsertionPoint;
1794 assert(((SPVal != StackPointerTracking::SUPERPOSITION &&
1795 SPVal != StackPointerTracking::EMPTY) ||
1796 (FPVal != StackPointerTracking::SUPERPOSITION &&
1797 FPVal != StackPointerTracking::EMPTY)) &&
1798 "Cannot insert if we have no idea of the stack state here");
1800 // Revert the effect of PSPT for this location, we want SP Value before
1801 // insertions
1802 if (InsertionPoint == CurBB->end()) {
1803 for (WorklistItem &Item : TodoList) {
1804 if (Item.Action != WorklistItem::InsertPushOrPop)
1805 continue;
1806 if (Item.FIEToInsert.IsStore)
1807 SPVal += Item.FIEToInsert.Size;
1808 if (Item.FIEToInsert.IsLoad)
1809 SPVal -= Item.FIEToInsert.Size;
1813 // Reorder POPs to obey the correct dominance relation between them
1814 llvm::stable_sort(TodoList, [&](const WorklistItem &A,
1815 const WorklistItem &B) {
1816 if ((A.Action != WorklistItem::InsertPushOrPop || !A.FIEToInsert.IsLoad) &&
1817 (B.Action != WorklistItem::InsertPushOrPop || !B.FIEToInsert.IsLoad))
1818 return false;
1819 if ((A.Action != WorklistItem::InsertPushOrPop || !A.FIEToInsert.IsLoad))
1820 return true;
1821 if ((B.Action != WorklistItem::InsertPushOrPop || !B.FIEToInsert.IsLoad))
1822 return false;
1823 return DomOrder[B.AffectedReg] < DomOrder[A.AffectedReg];
1826 // Process insertions
1827 for (WorklistItem &Item : TodoList) {
1828 if (Item.Action == WorklistItem::Erase ||
1829 Item.Action == WorklistItem::ChangeToAdjustment)
1830 continue;
1832 auto InsertionPointOrErr =
1833 processInsertion(InsertionPoint, CurBB, Item, SPVal, FPVal);
1834 if (auto E = InsertionPointOrErr.takeError())
1835 return Error(std::move(E));
1836 InsertionPoint = *InsertionPointOrErr;
1837 if (Item.Action == WorklistItem::InsertPushOrPop &&
1838 Item.FIEToInsert.IsStore)
1839 SPVal -= Item.FIEToInsert.Size;
1840 if (Item.Action == WorklistItem::InsertPushOrPop &&
1841 Item.FIEToInsert.IsLoad)
1842 SPVal += Item.FIEToInsert.Size;
1844 return InsertionPoint;
1847 Expected<bool> ShrinkWrapping::processInsertions() {
1848 PredictiveStackPointerTracking PSPT(BF, Todo, Info, AllocatorId);
1849 PSPT.run();
1851 bool Changes = false;
1852 for (BinaryBasicBlock &BB : BF) {
1853 // Process insertions before some inst.
1854 for (auto I = BB.begin(); I != BB.end(); ++I) {
1855 MCInst &Inst = *I;
1856 auto TodoList = BC.MIB->tryGetAnnotationAs<std::vector<WorklistItem>>(
1857 Inst, getAnnotationIndex());
1858 if (!TodoList)
1859 continue;
1860 Changes = true;
1861 std::vector<WorklistItem> List = *TodoList;
1862 LLVM_DEBUG({
1863 dbgs() << "Now processing insertions in " << BB.getName()
1864 << " before inst: ";
1865 Inst.dump();
1867 auto Iter = I;
1868 std::pair<int, int> SPTState =
1869 *PSPT.getStateAt(Iter == BB.begin() ? (ProgramPoint)&BB : &*(--Iter));
1870 auto IterOrErr =
1871 processInsertionsList(I, &BB, List, SPTState.first, SPTState.second);
1872 if (auto E = IterOrErr.takeError())
1873 return Error(std::move(E));
1874 I = *IterOrErr;
1876 // Process insertions at the end of bb
1877 auto WRI = Todo.find(&BB);
1878 if (WRI != Todo.end()) {
1879 std::pair<int, int> SPTState = *PSPT.getStateAt(*BB.rbegin());
1880 if (auto E = processInsertionsList(BB.end(), &BB, WRI->second,
1881 SPTState.first, SPTState.second)
1882 .takeError())
1883 return Error(std::move(E));
1884 Changes = true;
1887 return Changes;
1890 void ShrinkWrapping::processDeletions() {
1891 LivenessAnalysis &LA = Info.getLivenessAnalysis();
1892 for (BinaryBasicBlock &BB : BF) {
1893 for (auto II = BB.begin(); II != BB.end(); ++II) {
1894 MCInst &Inst = *II;
1895 auto TodoList = BC.MIB->tryGetAnnotationAs<std::vector<WorklistItem>>(
1896 Inst, getAnnotationIndex());
1897 if (!TodoList)
1898 continue;
1899 // Process all deletions
1900 for (WorklistItem &Item : *TodoList) {
1901 if (Item.Action != WorklistItem::Erase &&
1902 Item.Action != WorklistItem::ChangeToAdjustment)
1903 continue;
1905 if (Item.Action == WorklistItem::ChangeToAdjustment) {
1906 // Is flag reg alive across this func?
1907 bool DontClobberFlags = LA.isAlive(&Inst, BC.MIB->getFlagsReg());
1908 if (int Sz = BC.MIB->getPushSize(Inst)) {
1909 BC.MIB->createStackPointerIncrement(Inst, Sz, DontClobberFlags);
1910 continue;
1912 if (int Sz = BC.MIB->getPopSize(Inst)) {
1913 BC.MIB->createStackPointerDecrement(Inst, Sz, DontClobberFlags);
1914 continue;
1918 LLVM_DEBUG({
1919 dbgs() << "Erasing: ";
1920 BC.printInstruction(dbgs(), Inst);
1922 II = std::prev(BB.eraseInstruction(II));
1923 break;
1929 void ShrinkWrapping::rebuildCFI() {
1930 const bool FP = Info.getStackPointerTracking().HasFramePointer;
1931 Info.invalidateAll();
1932 if (!FP) {
1933 rebuildCFIForSP();
1934 Info.invalidateAll();
1936 for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) {
1937 if (PushOffsetByReg[I] == 0 || PopOffsetByReg[I] == 0)
1938 continue;
1939 const int64_t SPValPush = PushOffsetByReg[I];
1940 const int64_t SPValPop = PopOffsetByReg[I];
1941 insertUpdatedCFI(I, SPValPush, SPValPop);
1942 Info.invalidateAll();
1946 Expected<bool> ShrinkWrapping::perform(bool HotOnly) {
1947 HasDeletedOffsetCFIs = BitVector(BC.MRI->getNumRegs(), false);
1948 PushOffsetByReg = std::vector<int64_t>(BC.MRI->getNumRegs(), 0LL);
1949 PopOffsetByReg = std::vector<int64_t>(BC.MRI->getNumRegs(), 0LL);
1951 // Update pass statistics
1952 uint64_t TotalInstrs = 0ULL;
1953 uint64_t TotalStoreInstrs = 0ULL;
1954 for (BinaryBasicBlock *BB : BF.getLayout().blocks()) {
1955 uint64_t BBExecCount = BB->getExecutionCount();
1956 if (!BBExecCount || BBExecCount == BinaryBasicBlock::COUNT_NO_PROFILE)
1957 continue;
1958 for (const auto &Instr : *BB) {
1959 if (BC.MIB->isPseudo(Instr))
1960 continue;
1961 if (BC.MIB->mayStore(Instr))
1962 TotalStoreInstrs += BBExecCount;
1963 TotalInstrs += BBExecCount;
1966 InstrDynamicCount += TotalInstrs;
1967 StoreDynamicCount += TotalStoreInstrs;
1969 if (!FA.hasFrameInfo(BF))
1970 return false;
1972 if (HotOnly && (BF.getKnownExecutionCount() < BC.getHotThreshold()))
1973 return false;
1975 if (opts::EqualizeBBCounts)
1976 equalizeBBCounts(Info, BF);
1978 if (BF.checkForAmbiguousJumpTables()) {
1979 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ambiguous JTs in " << BF.getPrintName()
1980 << ".\n");
1981 // We could call disambiguateJumpTables here, but it is probably not worth
1982 // the cost (of duplicating potentially large jump tables that could regress
1983 // dcache misses). Moreover, ambiguous JTs are rare and coming from code
1984 // written in assembly language. Just bail.
1985 return false;
1987 SLM.initialize();
1988 CSA.compute();
1989 classifyCSRUses();
1990 pruneUnwantedCSRs();
1991 computeSaveLocations();
1992 moveSaveRestores();
1993 LLVM_DEBUG({
1994 dbgs() << "Func before shrink-wrapping: \n";
1995 BF.dump();
1997 SLM.performChanges();
1998 // Early exit if processInsertions doesn't detect any todo items
1999 auto ModifiedOrErr = processInsertions();
2000 if (auto E = ModifiedOrErr.takeError())
2001 return Error(std::move(E));
2002 const bool Modified = *ModifiedOrErr;
2003 if (!Modified)
2004 return false;
2005 processDeletions();
2006 if (foldIdenticalSplitEdges()) {
2007 const std::pair<unsigned, uint64_t> Stats = BF.eraseInvalidBBs();
2008 (void)Stats;
2009 LLVM_DEBUG(dbgs() << "Deleted " << Stats.first
2010 << " redundant split edge BBs (" << Stats.second
2011 << " bytes) for " << BF.getPrintName() << "\n");
2013 rebuildCFI();
2014 // We may have split edges, creating BBs that need correct branching
2015 BF.fixBranches();
2016 LLVM_DEBUG({
2017 dbgs() << "Func after shrink-wrapping: \n";
2018 BF.dump();
2020 return true;
2023 void ShrinkWrapping::printStats(BinaryContext &BC) {
2024 BC.outs() << "BOLT-INFO: Shrink wrapping moved " << SpillsMovedRegularMode
2025 << " spills inserting load/stores and " << SpillsMovedPushPopMode
2026 << " spills inserting push/pops\n";
2027 if (!InstrDynamicCount || !StoreDynamicCount)
2028 return;
2029 BC.outs() << "BOLT-INFO: Shrink wrapping reduced " << SpillsMovedDynamicCount
2030 << " store executions ("
2031 << format("%.1lf%%",
2032 (100.0 * SpillsMovedDynamicCount / InstrDynamicCount))
2033 << " total instructions executed, "
2034 << format("%.1lf%%",
2035 (100.0 * SpillsMovedDynamicCount / StoreDynamicCount))
2036 << " store instructions)\n";
2037 BC.outs() << "BOLT-INFO: Shrink wrapping failed at reducing "
2038 << SpillsFailedDynamicCount << " store executions ("
2039 << format("%.1lf%%",
2040 (100.0 * SpillsFailedDynamicCount / InstrDynamicCount))
2041 << " total instructions executed, "
2042 << format("%.1lf%%",
2043 (100.0 * SpillsFailedDynamicCount / StoreDynamicCount))
2044 << " store instructions)\n";
2047 // Operators necessary as a result of using MCAnnotation
2048 raw_ostream &operator<<(raw_ostream &OS,
2049 const std::vector<ShrinkWrapping::WorklistItem> &Vec) {
2050 OS << "SWTodo[";
2051 const char *Sep = "";
2052 for (const ShrinkWrapping::WorklistItem &Item : Vec) {
2053 OS << Sep;
2054 switch (Item.Action) {
2055 case ShrinkWrapping::WorklistItem::Erase:
2056 OS << "Erase";
2057 break;
2058 case ShrinkWrapping::WorklistItem::ChangeToAdjustment:
2059 OS << "ChangeToAdjustment";
2060 break;
2061 case ShrinkWrapping::WorklistItem::InsertLoadOrStore:
2062 OS << "InsertLoadOrStore";
2063 break;
2064 case ShrinkWrapping::WorklistItem::InsertPushOrPop:
2065 OS << "InsertPushOrPop";
2066 break;
2068 Sep = ", ";
2070 OS << "]";
2071 return OS;
2074 raw_ostream &
2075 operator<<(raw_ostream &OS,
2076 const std::vector<StackLayoutModifier::WorklistItem> &Vec) {
2077 OS << "SLMTodo[";
2078 const char *Sep = "";
2079 for (const StackLayoutModifier::WorklistItem &Item : Vec) {
2080 OS << Sep;
2081 switch (Item.Action) {
2082 case StackLayoutModifier::WorklistItem::None:
2083 OS << "None";
2084 break;
2085 case StackLayoutModifier::WorklistItem::AdjustLoadStoreOffset:
2086 OS << "AdjustLoadStoreOffset";
2087 break;
2088 case StackLayoutModifier::WorklistItem::AdjustCFI:
2089 OS << "AdjustCFI";
2090 break;
2092 Sep = ", ";
2094 OS << "]";
2095 return OS;
2098 bool operator==(const ShrinkWrapping::WorklistItem &A,
2099 const ShrinkWrapping::WorklistItem &B) {
2100 return (A.Action == B.Action && A.AffectedReg == B.AffectedReg &&
2101 A.Adjustment == B.Adjustment &&
2102 A.FIEToInsert.IsLoad == B.FIEToInsert.IsLoad &&
2103 A.FIEToInsert.IsStore == B.FIEToInsert.IsStore &&
2104 A.FIEToInsert.RegOrImm == B.FIEToInsert.RegOrImm &&
2105 A.FIEToInsert.Size == B.FIEToInsert.Size &&
2106 A.FIEToInsert.IsSimple == B.FIEToInsert.IsSimple &&
2107 A.FIEToInsert.StackOffset == B.FIEToInsert.StackOffset);
2110 bool operator==(const StackLayoutModifier::WorklistItem &A,
2111 const StackLayoutModifier::WorklistItem &B) {
2112 return (A.Action == B.Action && A.OffsetUpdate == B.OffsetUpdate);
2115 } // end namespace bolt
2116 } // end namespace llvm