Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / bolt / lib / Passes / ShrinkWrapping.cpp
blob17f169cc332b644e3a4ee99fb00c33fb873dab8e
1 //===- bolt/Passes/ShrinkWrapping.cpp -------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the ShrinkWrapping class.
11 //===----------------------------------------------------------------------===//
13 #include "bolt/Passes/ShrinkWrapping.h"
14 #include "bolt/Core/MCPlus.h"
15 #include "bolt/Passes/DataflowInfoManager.h"
16 #include "bolt/Passes/MCF.h"
17 #include "bolt/Utils/CommandLineOpts.h"
18 #include <numeric>
19 #include <optional>
20 #include <stack>
22 #define DEBUG_TYPE "shrinkwrapping"
24 using namespace llvm;
26 namespace opts {
28 extern cl::opt<bool> TimeOpts;
29 extern cl::OptionCategory BoltOptCategory;
31 static cl::opt<unsigned> ShrinkWrappingThreshold(
32 "shrink-wrapping-threshold",
33 cl::desc("Percentage of prologue execution count to use as threshold when"
34 " evaluating whether a block is cold enough to be profitable to"
35 " move eligible spills there"),
36 cl::init(30), cl::ZeroOrMore, cl::cat(BoltOptCategory));
37 } // namespace opts
39 namespace llvm {
40 namespace bolt {
42 void CalleeSavedAnalysis::analyzeSaves() {
43 ReachingDefOrUse</*Def=*/true> &RD = Info.getReachingDefs();
44 StackReachingUses &SRU = Info.getStackReachingUses();
45 auto &InsnToBB = Info.getInsnToBBMap();
46 BitVector BlacklistedRegs(BC.MRI->getNumRegs(), false);
48 LLVM_DEBUG(dbgs() << "Checking spill locations\n");
49 for (BinaryBasicBlock &BB : BF) {
50 LLVM_DEBUG(dbgs() << "\tNow at BB " << BB.getName() << "\n");
51 const MCInst *Prev = nullptr;
52 for (MCInst &Inst : BB) {
53 if (ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(Inst)) {
54 // Blacklist weird stores we don't understand
55 if ((!FIE->IsSimple || FIE->StackOffset >= 0) && FIE->IsStore &&
56 FIE->IsStoreFromReg) {
57 BlacklistedRegs.set(FIE->RegOrImm);
58 CalleeSaved.reset(FIE->RegOrImm);
59 Prev = &Inst;
60 continue;
63 if (!FIE->IsStore || !FIE->IsStoreFromReg ||
64 BlacklistedRegs[FIE->RegOrImm]) {
65 Prev = &Inst;
66 continue;
69 // If this reg is defined locally, it is not a callee-saved reg
70 if (RD.isReachedBy(FIE->RegOrImm,
71 Prev ? RD.expr_begin(*Prev) : RD.expr_begin(BB))) {
72 BlacklistedRegs.set(FIE->RegOrImm);
73 CalleeSaved.reset(FIE->RegOrImm);
74 Prev = &Inst;
75 continue;
78 // If this stack position is accessed in another function, we are
79 // probably dealing with a parameter passed in a stack -- do not mess
80 // with it
81 if (SRU.isStoreUsed(*FIE,
82 Prev ? SRU.expr_begin(*Prev) : SRU.expr_begin(BB)),
83 /*IncludeLocalAccesses=*/false) {
84 BlacklistedRegs.set(FIE->RegOrImm);
85 CalleeSaved.reset(FIE->RegOrImm);
86 Prev = &Inst;
87 continue;
90 // If this stack position is loaded elsewhere in another reg, we can't
91 // update it, so blacklist it.
92 if (SRU.isLoadedInDifferentReg(*FIE, Prev ? SRU.expr_begin(*Prev)
93 : SRU.expr_begin(BB))) {
94 BlacklistedRegs.set(FIE->RegOrImm);
95 CalleeSaved.reset(FIE->RegOrImm);
96 Prev = &Inst;
97 continue;
100 // Ignore regs with multiple saves
101 if (CalleeSaved[FIE->RegOrImm]) {
102 BlacklistedRegs.set(FIE->RegOrImm);
103 CalleeSaved.reset(FIE->RegOrImm);
104 Prev = &Inst;
105 continue;
108 CalleeSaved.set(FIE->RegOrImm);
109 SaveFIEByReg[FIE->RegOrImm] = &*FIE;
110 SavingCost[FIE->RegOrImm] += InsnToBB[&Inst]->getKnownExecutionCount();
111 BC.MIB->addAnnotation(Inst, getSaveTag(), FIE->RegOrImm, AllocatorId);
112 OffsetsByReg[FIE->RegOrImm] = FIE->StackOffset;
113 LLVM_DEBUG(dbgs() << "Logging new candidate for Callee-Saved Reg: "
114 << FIE->RegOrImm << "\n");
116 Prev = &Inst;
121 void CalleeSavedAnalysis::analyzeRestores() {
122 ReachingDefOrUse</*Def=*/false> &RU = Info.getReachingUses();
124 // Now compute all restores of these callee-saved regs
125 for (BinaryBasicBlock &BB : BF) {
126 const MCInst *Prev = nullptr;
127 for (MCInst &Inst : llvm::reverse(BB)) {
128 if (ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(Inst)) {
129 if (!FIE->IsLoad || !CalleeSaved[FIE->RegOrImm]) {
130 Prev = &Inst;
131 continue;
134 // If this reg is used locally after a restore, then we are probably
135 // not dealing with a callee-saved reg. Except if this use is by
136 // another store, but we don't cover this case yet.
137 // Also not callee-saved if this load accesses caller stack or isn't
138 // simple.
139 if (!FIE->IsSimple || FIE->StackOffset >= 0 ||
140 RU.isReachedBy(FIE->RegOrImm,
141 Prev ? RU.expr_begin(*Prev) : RU.expr_begin(BB))) {
142 CalleeSaved.reset(FIE->RegOrImm);
143 Prev = &Inst;
144 continue;
146 // If stack offsets between saves/store don't agree with each other,
147 // we don't completely understand what's happening here
148 if (FIE->StackOffset != OffsetsByReg[FIE->RegOrImm]) {
149 CalleeSaved.reset(FIE->RegOrImm);
150 LLVM_DEBUG(dbgs() << "Dismissing Callee-Saved Reg because we found a "
151 "mismatching restore: "
152 << FIE->RegOrImm << "\n");
153 Prev = &Inst;
154 continue;
157 LLVM_DEBUG(dbgs() << "Adding matching restore for: " << FIE->RegOrImm
158 << "\n");
159 if (LoadFIEByReg[FIE->RegOrImm] == nullptr)
160 LoadFIEByReg[FIE->RegOrImm] = &*FIE;
161 BC.MIB->addAnnotation(Inst, getRestoreTag(), FIE->RegOrImm,
162 AllocatorId);
163 HasRestores.set(FIE->RegOrImm);
165 Prev = &Inst;
170 std::vector<MCInst *> CalleeSavedAnalysis::getSavesByReg(uint16_t Reg) {
171 std::vector<MCInst *> Results;
172 for (BinaryBasicBlock &BB : BF)
173 for (MCInst &Inst : BB)
174 if (getSavedReg(Inst) == Reg)
175 Results.push_back(&Inst);
176 return Results;
179 std::vector<MCInst *> CalleeSavedAnalysis::getRestoresByReg(uint16_t Reg) {
180 std::vector<MCInst *> Results;
181 for (BinaryBasicBlock &BB : BF)
182 for (MCInst &Inst : BB)
183 if (getRestoredReg(Inst) == Reg)
184 Results.push_back(&Inst);
185 return Results;
188 CalleeSavedAnalysis::~CalleeSavedAnalysis() {
189 for (BinaryBasicBlock &BB : BF) {
190 for (MCInst &Inst : BB) {
191 BC.MIB->removeAnnotation(Inst, getSaveTag());
192 BC.MIB->removeAnnotation(Inst, getRestoreTag());
197 void StackLayoutModifier::blacklistRegion(int64_t Offset, int64_t Size) {
198 if (BlacklistedRegions[Offset] < Size)
199 BlacklistedRegions[Offset] = Size;
202 bool StackLayoutModifier::isRegionBlacklisted(int64_t Offset, int64_t Size) {
203 for (std::pair<const int64_t, int64_t> Elem : BlacklistedRegions)
204 if (Offset + Size > Elem.first && Offset < Elem.first + Elem.second)
205 return true;
206 return false;
209 bool StackLayoutModifier::blacklistAllInConflictWith(int64_t Offset,
210 int64_t Size) {
211 bool HasConflict = false;
212 for (auto Iter = AvailableRegions.begin(); Iter != AvailableRegions.end();) {
213 std::pair<const int64_t, int64_t> &Elem = *Iter;
214 if (Offset + Size > Elem.first && Offset < Elem.first + Elem.second &&
215 (Offset != Elem.first || Size != Elem.second)) {
216 Iter = AvailableRegions.erase(Iter);
217 HasConflict = true;
218 continue;
220 ++Iter;
222 if (HasConflict) {
223 blacklistRegion(Offset, Size);
224 return true;
226 return false;
229 void StackLayoutModifier::checkFramePointerInitialization(MCInst &Point) {
230 StackPointerTracking &SPT = Info.getStackPointerTracking();
231 if (!BC.MII->get(Point.getOpcode())
232 .hasDefOfPhysReg(Point, BC.MIB->getFramePointer(), *BC.MRI))
233 return;
235 int SPVal, FPVal;
236 std::tie(SPVal, FPVal) = *SPT.getStateBefore(Point);
237 std::pair<MCPhysReg, int64_t> FP;
239 if (FPVal != SPT.EMPTY && FPVal != SPT.SUPERPOSITION)
240 FP = std::make_pair(BC.MIB->getFramePointer(), FPVal);
241 else
242 FP = std::make_pair(0, 0);
243 std::pair<MCPhysReg, int64_t> SP;
245 if (SPVal != SPT.EMPTY && SPVal != SPT.SUPERPOSITION)
246 SP = std::make_pair(BC.MIB->getStackPointer(), SPVal);
247 else
248 SP = std::make_pair(0, 0);
250 int64_t Output;
251 if (!BC.MIB->evaluateStackOffsetExpr(Point, Output, SP, FP))
252 return;
254 // Not your regular frame pointer initialization... bail
255 if (Output != SPVal)
256 blacklistRegion(0, 0);
259 void StackLayoutModifier::checkStackPointerRestore(MCInst &Point) {
260 StackPointerTracking &SPT = Info.getStackPointerTracking();
261 if (!BC.MII->get(Point.getOpcode())
262 .hasDefOfPhysReg(Point, BC.MIB->getStackPointer(), *BC.MRI))
263 return;
264 // Check if the definition of SP comes from FP -- in this case, this
265 // value may need to be updated depending on our stack layout changes
266 bool UsesFP = llvm::any_of(BC.MIB->useOperands(Point), [&](MCOperand &Op) {
267 return Op.isReg() && Op.getReg() == BC.MIB->getFramePointer();
269 if (!UsesFP)
270 return;
272 // Setting up evaluation
273 int SPVal, FPVal;
274 std::tie(SPVal, FPVal) = *SPT.getStateBefore(Point);
275 std::pair<MCPhysReg, int64_t> FP;
277 if (FPVal != SPT.EMPTY && FPVal != SPT.SUPERPOSITION)
278 FP = std::make_pair(BC.MIB->getFramePointer(), FPVal);
279 else
280 FP = std::make_pair(0, 0);
281 std::pair<MCPhysReg, int64_t> SP;
283 if (SPVal != SPT.EMPTY && SPVal != SPT.SUPERPOSITION)
284 SP = std::make_pair(BC.MIB->getStackPointer(), SPVal);
285 else
286 SP = std::make_pair(0, 0);
288 int64_t Output;
289 if (!BC.MIB->evaluateStackOffsetExpr(Point, Output, SP, FP))
290 return;
292 // If the value is the same of FP, no need to adjust it
293 if (Output == FPVal)
294 return;
296 // If an allocation happened through FP, bail
297 if (Output <= SPVal) {
298 blacklistRegion(0, 0);
299 return;
302 // We are restoring SP to an old value based on FP. Mark it as a stack
303 // access to be fixed later.
304 BC.MIB->addAnnotation(Point, getSlotTag(), Output, AllocatorId);
307 void StackLayoutModifier::classifyStackAccesses() {
308 // Understand when stack slots are being used non-locally
309 StackReachingUses &SRU = Info.getStackReachingUses();
311 for (BinaryBasicBlock &BB : BF) {
312 const MCInst *Prev = nullptr;
313 for (MCInst &Inst : llvm::reverse(BB)) {
314 checkFramePointerInitialization(Inst);
315 checkStackPointerRestore(Inst);
316 ErrorOr<const FrameIndexEntry &> FIEX = FA.getFIEFor(Inst);
317 if (!FIEX) {
318 Prev = &Inst;
319 continue;
321 if (!FIEX->IsSimple || (FIEX->IsStore && !FIEX->IsStoreFromReg)) {
322 blacklistRegion(FIEX->StackOffset, FIEX->Size);
323 Prev = &Inst;
324 continue;
326 // If this stack position is accessed in another function, we are
327 // probably dealing with a parameter passed in a stack -- do not mess
328 // with it
329 if (SRU.isStoreUsed(*FIEX,
330 Prev ? SRU.expr_begin(*Prev) : SRU.expr_begin(BB),
331 /*IncludeLocalAccesses=*/false)) {
332 blacklistRegion(FIEX->StackOffset, FIEX->Size);
333 Prev = &Inst;
334 continue;
336 // Now we have a clear stack slot access. Check if its blacklisted or if
337 // it conflicts with another chunk.
338 if (isRegionBlacklisted(FIEX->StackOffset, FIEX->Size) ||
339 blacklistAllInConflictWith(FIEX->StackOffset, FIEX->Size)) {
340 Prev = &Inst;
341 continue;
343 // We are free to go. Add it as available stack slot which we know how
344 // to move it.
345 AvailableRegions[FIEX->StackOffset] = FIEX->Size;
346 BC.MIB->addAnnotation(Inst, getSlotTag(), FIEX->StackOffset, AllocatorId);
347 RegionToRegMap[FIEX->StackOffset].insert(FIEX->RegOrImm);
348 RegToRegionMap[FIEX->RegOrImm].insert(FIEX->StackOffset);
349 LLVM_DEBUG(dbgs() << "Adding region " << FIEX->StackOffset << " size "
350 << (int)FIEX->Size << "\n");
355 void StackLayoutModifier::classifyCFIs() {
356 std::stack<std::pair<int64_t, uint16_t>> CFIStack;
357 int64_t CfaOffset = -8;
358 uint16_t CfaReg = 7;
360 auto recordAccess = [&](MCInst *Inst, int64_t Offset) {
361 const uint16_t Reg = *BC.MRI->getLLVMRegNum(CfaReg, /*isEH=*/false);
362 if (Reg == BC.MIB->getStackPointer() || Reg == BC.MIB->getFramePointer()) {
363 BC.MIB->addAnnotation(*Inst, getSlotTag(), Offset, AllocatorId);
364 LLVM_DEBUG(dbgs() << "Recording CFI " << Offset << "\n");
365 } else {
366 IsSimple = false;
367 return;
371 for (BinaryBasicBlock *BB : BF.getLayout().blocks()) {
372 for (MCInst &Inst : *BB) {
373 if (!BC.MIB->isCFI(Inst))
374 continue;
375 const MCCFIInstruction *CFI = BF.getCFIFor(Inst);
376 switch (CFI->getOperation()) {
377 case MCCFIInstruction::OpDefCfa:
378 CfaOffset = -CFI->getOffset();
379 recordAccess(&Inst, CfaOffset);
380 [[fallthrough]];
381 case MCCFIInstruction::OpDefCfaRegister:
382 CfaReg = CFI->getRegister();
383 break;
384 case MCCFIInstruction::OpDefCfaOffset:
385 CfaOffset = -CFI->getOffset();
386 recordAccess(&Inst, CfaOffset);
387 break;
388 case MCCFIInstruction::OpOffset:
389 recordAccess(&Inst, CFI->getOffset());
390 BC.MIB->addAnnotation(Inst, getOffsetCFIRegTag(),
391 BC.MRI->getLLVMRegNum(CFI->getRegister(),
392 /*isEH=*/false),
393 AllocatorId);
394 break;
395 case MCCFIInstruction::OpSameValue:
396 BC.MIB->addAnnotation(Inst, getOffsetCFIRegTag(),
397 BC.MRI->getLLVMRegNum(CFI->getRegister(),
398 /*isEH=*/false),
399 AllocatorId);
400 break;
401 case MCCFIInstruction::OpRememberState:
402 CFIStack.push(std::make_pair(CfaOffset, CfaReg));
403 break;
404 case MCCFIInstruction::OpRestoreState: {
405 assert(!CFIStack.empty() && "Corrupt CFI stack");
406 std::pair<int64_t, uint16_t> &Elem = CFIStack.top();
407 CFIStack.pop();
408 CfaOffset = Elem.first;
409 CfaReg = Elem.second;
410 break;
412 case MCCFIInstruction::OpRelOffset:
413 case MCCFIInstruction::OpAdjustCfaOffset:
414 llvm_unreachable("Unhandled AdjustCfaOffset");
415 break;
416 default:
417 break;
423 void StackLayoutModifier::scheduleChange(
424 MCInst &Inst, StackLayoutModifier::WorklistItem Item) {
425 auto &WList = BC.MIB->getOrCreateAnnotationAs<std::vector<WorklistItem>>(
426 Inst, getTodoTag(), AllocatorId);
427 WList.push_back(Item);
430 bool StackLayoutModifier::canCollapseRegion(MCInst *DeletedPush) {
431 if (!IsSimple || !BC.MIB->isPush(*DeletedPush))
432 return false;
434 ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(*DeletedPush);
435 if (!FIE)
436 return false;
438 return canCollapseRegion(FIE->StackOffset);
441 bool StackLayoutModifier::canCollapseRegion(int64_t RegionAddr) {
442 if (!IsInitialized)
443 initialize();
444 if (!IsSimple)
445 return false;
447 if (CollapsedRegions.count(RegionAddr))
448 return true;
450 // Check if it is possible to readjust all accesses below RegionAddr
451 if (!BlacklistedRegions.empty())
452 return false;
454 return true;
457 bool StackLayoutModifier::collapseRegion(MCInst *DeletedPush) {
458 ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(*DeletedPush);
459 if (!FIE)
460 return false;
461 int64_t RegionAddr = FIE->StackOffset;
462 int64_t RegionSz = FIE->Size;
463 return collapseRegion(DeletedPush, RegionAddr, RegionSz);
466 bool StackLayoutModifier::collapseRegion(MCInst *Alloc, int64_t RegionAddr,
467 int64_t RegionSz) {
468 if (!canCollapseRegion(RegionAddr))
469 return false;
471 assert(IsInitialized);
472 StackAllocationAnalysis &SAA = Info.getStackAllocationAnalysis();
474 for (BinaryBasicBlock &BB : BF) {
475 for (MCInst &Inst : BB) {
476 if (!BC.MIB->hasAnnotation(Inst, getSlotTag()))
477 continue;
478 auto Slot =
479 BC.MIB->getAnnotationAs<decltype(FrameIndexEntry::StackOffset)>(
480 Inst, getSlotTag());
481 if (!AvailableRegions.count(Slot))
482 continue;
483 // We need to ensure this access is affected by the deleted push
484 if (!(*SAA.getStateBefore(Inst))[SAA.ExprToIdx[Alloc]])
485 continue;
487 if (BC.MIB->isCFI(Inst)) {
488 if (Slot > RegionAddr)
489 continue;
490 scheduleChange(Inst, WorklistItem(WorklistItem::AdjustCFI, RegionSz));
491 continue;
493 ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(Inst);
494 if (!FIE) {
495 if (Slot > RegionAddr)
496 continue;
497 // SP update based on frame pointer
498 scheduleChange(
499 Inst, WorklistItem(WorklistItem::AdjustLoadStoreOffset, RegionSz));
500 continue;
503 if (Slot == RegionAddr) {
504 BC.MIB->addAnnotation(Inst, "AccessesDeletedPos", 0U, AllocatorId);
505 continue;
507 if (BC.MIB->isPush(Inst) || BC.MIB->isPop(Inst))
508 continue;
510 if (FIE->StackPtrReg == BC.MIB->getStackPointer() && Slot < RegionAddr)
511 continue;
513 if (FIE->StackPtrReg == BC.MIB->getFramePointer() && Slot > RegionAddr)
514 continue;
516 scheduleChange(
517 Inst, WorklistItem(WorklistItem::AdjustLoadStoreOffset, RegionSz));
521 CollapsedRegions.insert(RegionAddr);
522 return true;
525 void StackLayoutModifier::setOffsetForCollapsedAccesses(int64_t NewOffset) {
526 for (BinaryBasicBlock &BB : BF) {
527 for (MCInst &Inst : BB) {
528 if (!BC.MIB->hasAnnotation(Inst, "AccessesDeletedPos"))
529 continue;
530 BC.MIB->removeAnnotation(Inst, "AccessesDeletedPos");
531 scheduleChange(
532 Inst, WorklistItem(WorklistItem::AdjustLoadStoreOffset, NewOffset));
537 bool StackLayoutModifier::canInsertRegion(ProgramPoint P) {
538 if (!IsInitialized)
539 initialize();
540 if (!IsSimple)
541 return false;
543 StackPointerTracking &SPT = Info.getStackPointerTracking();
544 int64_t RegionAddr = SPT.getStateBefore(P)->first;
545 if (RegionAddr == SPT.SUPERPOSITION || RegionAddr == SPT.EMPTY)
546 return false;
548 if (InsertedRegions.count(RegionAddr))
549 return true;
551 // Check if we are going to screw up stack accesses at call sites that
552 // pass parameters via stack
553 if (!BlacklistedRegions.empty())
554 return false;
556 return true;
559 bool StackLayoutModifier::insertRegion(ProgramPoint P, int64_t RegionSz) {
560 if (!canInsertRegion(P))
561 return false;
563 assert(IsInitialized);
564 StackPointerTracking &SPT = Info.getStackPointerTracking();
565 // This RegionAddr is slightly different from the one seen in collapseRegion
566 // This is the value of SP before the allocation the user wants to make.
567 int64_t RegionAddr = SPT.getStateBefore(P)->first;
568 if (RegionAddr == SPT.SUPERPOSITION || RegionAddr == SPT.EMPTY)
569 return false;
571 DominatorAnalysis<false> &DA = Info.getDominatorAnalysis();
573 for (BinaryBasicBlock &BB : BF) {
574 for (MCInst &Inst : BB) {
575 if (!BC.MIB->hasAnnotation(Inst, getSlotTag()))
576 continue;
577 auto Slot =
578 BC.MIB->getAnnotationAs<decltype(FrameIndexEntry::StackOffset)>(
579 Inst, getSlotTag());
580 if (!AvailableRegions.count(Slot))
581 continue;
583 if (!(DA.doesADominateB(P, Inst)))
584 continue;
586 if (BC.MIB->isCFI(Inst)) {
587 if (Slot >= RegionAddr)
588 continue;
589 scheduleChange(Inst, WorklistItem(WorklistItem::AdjustCFI, -RegionSz));
590 continue;
592 ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(Inst);
593 if (!FIE) {
594 if (Slot >= RegionAddr)
595 continue;
596 scheduleChange(
597 Inst, WorklistItem(WorklistItem::AdjustLoadStoreOffset, -RegionSz));
598 continue;
601 if (FIE->StackPtrReg == BC.MIB->getStackPointer() && Slot < RegionAddr)
602 continue;
603 if (FIE->StackPtrReg == BC.MIB->getFramePointer() && Slot >= RegionAddr)
604 continue;
605 if (BC.MIB->isPush(Inst) || BC.MIB->isPop(Inst))
606 continue;
607 scheduleChange(
608 Inst, WorklistItem(WorklistItem::AdjustLoadStoreOffset, -RegionSz));
612 InsertedRegions.insert(RegionAddr);
613 return true;
616 void StackLayoutModifier::performChanges() {
617 std::set<uint32_t> ModifiedCFIIndices;
618 for (BinaryBasicBlock &BB : BF) {
619 for (MCInst &Inst : llvm::reverse(BB)) {
620 if (BC.MIB->hasAnnotation(Inst, "AccessesDeletedPos")) {
621 assert(BC.MIB->isPop(Inst) || BC.MIB->isPush(Inst));
622 BC.MIB->removeAnnotation(Inst, "AccessesDeletedPos");
624 if (!BC.MIB->hasAnnotation(Inst, getTodoTag()))
625 continue;
626 auto &WList = BC.MIB->getAnnotationAs<std::vector<WorklistItem>>(
627 Inst, getTodoTag());
628 int64_t Adjustment = 0;
629 WorklistItem::ActionType AdjustmentType = WorklistItem::None;
630 for (WorklistItem &WI : WList) {
631 if (WI.Action == WorklistItem::None)
632 continue;
633 assert(WI.Action == WorklistItem::AdjustLoadStoreOffset ||
634 WI.Action == WorklistItem::AdjustCFI);
635 assert((AdjustmentType == WorklistItem::None ||
636 AdjustmentType == WI.Action) &&
637 "Conflicting actions requested at the same program point");
638 AdjustmentType = WI.Action;
639 Adjustment += WI.OffsetUpdate;
641 if (!Adjustment)
642 continue;
643 if (AdjustmentType != WorklistItem::AdjustLoadStoreOffset) {
644 assert(BC.MIB->isCFI(Inst));
645 uint32_t CFINum = Inst.getOperand(0).getImm();
646 if (ModifiedCFIIndices.count(CFINum))
647 continue;
648 ModifiedCFIIndices.insert(CFINum);
649 const MCCFIInstruction *CFI = BF.getCFIFor(Inst);
650 const MCCFIInstruction::OpType Operation = CFI->getOperation();
651 if (Operation == MCCFIInstruction::OpDefCfa ||
652 Operation == MCCFIInstruction::OpDefCfaOffset)
653 Adjustment = 0 - Adjustment;
654 LLVM_DEBUG(dbgs() << "Changing CFI offset from " << CFI->getOffset()
655 << " to " << (CFI->getOffset() + Adjustment) << "\n");
656 BF.mutateCFIOffsetFor(Inst, CFI->getOffset() + Adjustment);
657 continue;
659 int32_t SrcImm = 0;
660 MCPhysReg Reg = 0;
661 MCPhysReg StackPtrReg = 0;
662 int64_t StackOffset = 0;
663 bool IsIndexed = false;
664 bool IsLoad = false;
665 bool IsStore = false;
666 bool IsSimple = false;
667 bool IsStoreFromReg = false;
668 uint8_t Size = 0;
669 bool Success = false;
670 Success = BC.MIB->isStackAccess(Inst, IsLoad, IsStore, IsStoreFromReg,
671 Reg, SrcImm, StackPtrReg, StackOffset,
672 Size, IsSimple, IsIndexed);
673 if (!Success) {
674 // SP update based on FP value
675 Success = BC.MIB->addToImm(Inst, Adjustment, &*BC.Ctx);
676 assert(Success);
677 continue;
679 assert(Success && IsSimple && !IsIndexed && (!IsStore || IsStoreFromReg));
680 if (StackPtrReg != BC.MIB->getFramePointer())
681 Adjustment = -Adjustment;
682 if (IsLoad)
683 Success = BC.MIB->createRestoreFromStack(
684 Inst, StackPtrReg, StackOffset + Adjustment, Reg, Size);
685 else if (IsStore)
686 Success = BC.MIB->createSaveToStack(
687 Inst, StackPtrReg, StackOffset + Adjustment, Reg, Size);
688 LLVM_DEBUG({
689 dbgs() << "Adjusted instruction: ";
690 Inst.dump();
692 assert(Success);
697 void StackLayoutModifier::initialize() {
698 classifyStackAccesses();
699 classifyCFIs();
700 IsInitialized = true;
703 std::atomic<std::uint64_t> ShrinkWrapping::SpillsMovedRegularMode{0};
704 std::atomic<std::uint64_t> ShrinkWrapping::SpillsMovedPushPopMode{0};
705 std::atomic<std::uint64_t> ShrinkWrapping::SpillsMovedDynamicCount{0};
706 std::atomic<std::uint64_t> ShrinkWrapping::SpillsFailedDynamicCount{0};
707 std::atomic<std::uint64_t> ShrinkWrapping::InstrDynamicCount{0};
708 std::atomic<std::uint64_t> ShrinkWrapping::StoreDynamicCount{0};
710 using BBIterTy = BinaryBasicBlock::iterator;
712 void ShrinkWrapping::classifyCSRUses() {
713 DominatorAnalysis<false> &DA = Info.getDominatorAnalysis();
714 StackPointerTracking &SPT = Info.getStackPointerTracking();
715 UsesByReg = std::vector<BitVector>(BC.MRI->getNumRegs(),
716 BitVector(DA.NumInstrs, false));
718 const BitVector &FPAliases = BC.MIB->getAliases(BC.MIB->getFramePointer());
719 for (BinaryBasicBlock &BB : BF) {
720 for (MCInst &Inst : BB) {
721 if (BC.MIB->isCFI(Inst))
722 continue;
723 BitVector BV = BitVector(BC.MRI->getNumRegs(), false);
724 BC.MIB->getTouchedRegs(Inst, BV);
725 BV &= CSA.CalleeSaved;
726 for (int I : BV.set_bits()) {
727 if (I == 0)
728 continue;
729 if (CSA.getSavedReg(Inst) != I && CSA.getRestoredReg(Inst) != I)
730 UsesByReg[I].set(DA.ExprToIdx[&Inst]);
732 if (!SPT.HasFramePointer || !BC.MIB->isCall(Inst))
733 continue;
734 BV = CSA.CalleeSaved;
735 BV &= FPAliases;
736 for (int I : BV.set_bits())
737 UsesByReg[I].set(DA.ExprToIdx[&Inst]);
742 void ShrinkWrapping::pruneUnwantedCSRs() {
743 BitVector ParamRegs = BC.MIB->getRegsUsedAsParams();
744 for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) {
745 if (!CSA.CalleeSaved[I])
746 continue;
747 if (ParamRegs[I]) {
748 CSA.CalleeSaved.reset(I);
749 continue;
751 if (UsesByReg[I].empty()) {
752 LLVM_DEBUG(
753 dbgs()
754 << "Dismissing Callee-Saved Reg because we found no uses of it:" << I
755 << "\n");
756 CSA.CalleeSaved.reset(I);
757 continue;
759 if (!CSA.HasRestores[I]) {
760 LLVM_DEBUG(
761 dbgs() << "Dismissing Callee-Saved Reg because it does not have "
762 "restores:"
763 << I << "\n");
764 CSA.CalleeSaved.reset(I);
769 void ShrinkWrapping::computeSaveLocations() {
770 BestSavePos = std::vector<std::vector<MCInst *>>(BC.MRI->getNumRegs());
771 ReachingInsns<true> &RI = Info.getReachingInsnsBackwards();
772 DominatorAnalysis<false> &DA = Info.getDominatorAnalysis();
773 StackPointerTracking &SPT = Info.getStackPointerTracking();
775 LLVM_DEBUG(dbgs() << "Checking save/restore possibilities\n");
776 for (BinaryBasicBlock &BB : BF) {
777 LLVM_DEBUG(dbgs() << "\tNow at BB " << BB.getName() << "\n");
779 MCInst *First = BB.begin() != BB.end() ? &*BB.begin() : nullptr;
780 if (!First)
781 continue;
783 // Use reaching instructions to detect if we are inside a loop - if we
784 // are, do not consider this BB as valid placement for saves.
785 if (RI.isInLoop(BB))
786 continue;
788 const std::pair<int, int> SPFP = *SPT.getStateBefore(*First);
789 // If we don't know stack state at this point, bail
790 if ((SPFP.first == SPT.SUPERPOSITION || SPFP.first == SPT.EMPTY) &&
791 (SPFP.second == SPT.SUPERPOSITION || SPFP.second == SPT.EMPTY))
792 continue;
794 for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) {
795 if (!CSA.CalleeSaved[I])
796 continue;
798 BitVector BBDominatedUses = BitVector(DA.NumInstrs, false);
799 for (int J : UsesByReg[I].set_bits())
800 if (DA.doesADominateB(*First, J))
801 BBDominatedUses.set(J);
802 LLVM_DEBUG(dbgs() << "\t\tBB " << BB.getName() << " dominates "
803 << BBDominatedUses.count() << " uses for reg " << I
804 << ". Total uses for reg is " << UsesByReg[I].count()
805 << "\n");
806 BBDominatedUses &= UsesByReg[I];
807 if (BBDominatedUses == UsesByReg[I]) {
808 LLVM_DEBUG(dbgs() << "\t\t\tAdded " << BB.getName()
809 << " as a save pos for " << I << "\n");
810 BestSavePos[I].push_back(First);
811 LLVM_DEBUG({
812 dbgs() << "Dominated uses are:\n";
813 for (int J : UsesByReg[I].set_bits()) {
814 dbgs() << "Idx " << J << ": ";
815 BC.printInstruction(dbgs(), *DA.Expressions[J]);
816 DA.Expressions[J]->dump();
823 BestSaveCount = std::vector<std::vector<uint64_t>>(BC.MRI->getNumRegs());
825 auto &InsnToBB = Info.getInsnToBBMap();
826 for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) {
827 if (!CSA.CalleeSaved[I])
828 continue;
830 std::stable_sort(BestSavePos[I].begin(), BestSavePos[I].end(),
831 [&](const MCInst *A, const MCInst *B) {
832 const BinaryBasicBlock *BBA = InsnToBB[A];
833 const BinaryBasicBlock *BBB = InsnToBB[B];
834 const uint64_t CountA = BBA->getKnownExecutionCount();
835 const uint64_t CountB = BBB->getKnownExecutionCount();
836 return CountB < CountA;
839 for (MCInst *Pos : BestSavePos[I]) {
840 const BinaryBasicBlock *BB = InsnToBB[Pos];
841 const uint64_t Count = BB->getKnownExecutionCount();
842 BestSaveCount[I].push_back(Count);
847 void ShrinkWrapping::computeDomOrder() {
848 DomOrder = std::vector<MCPhysReg>(BC.MRI->getNumRegs(), 0);
849 std::vector<MCPhysReg> Order;
850 for (MCPhysReg I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) {
851 Order.push_back(I);
854 DominatorAnalysis<false> &DA = Info.getDominatorAnalysis();
855 auto &InsnToBB = Info.getInsnToBBMap();
856 llvm::sort(Order, [&](const MCPhysReg &A, const MCPhysReg &B) {
857 BinaryBasicBlock *BBA =
858 BestSavePos[A].size() ? InsnToBB[BestSavePos[A].back()] : nullptr;
859 BinaryBasicBlock *BBB =
860 BestSavePos[B].size() ? InsnToBB[BestSavePos[B].back()] : nullptr;
861 if (BBA == BBB)
862 return A < B;
863 if (!BBA && BBB)
864 return false;
865 if (BBA && !BBB)
866 return true;
867 if (DA.doesADominateB(*BestSavePos[A].back(), *BestSavePos[B].back()))
868 return true;
869 if (DA.doesADominateB(*BestSavePos[B].back(), *BestSavePos[A].back()))
870 return false;
871 return A < B;
874 for (MCPhysReg I = 0, E = BC.MRI->getNumRegs(); I != E; ++I)
875 DomOrder[Order[I]] = I;
878 bool ShrinkWrapping::isBestSavePosCold(unsigned CSR, MCInst *&BestPosSave,
879 uint64_t &TotalEstimatedWin) {
880 const uint64_t CurSavingCost = CSA.SavingCost[CSR];
881 if (!CSA.CalleeSaved[CSR])
882 return false;
884 assert(BestSaveCount[CSR].size() == BestSavePos[CSR].size() &&
885 "save position vectors out of sync");
886 if (BestSaveCount[CSR].empty())
887 return false;
889 const uint64_t BestCount = BestSaveCount[CSR].back();
890 BestPosSave = BestSavePos[CSR].back();
891 if (BestCount >= (opts::ShrinkWrappingThreshold / 100.0) * CurSavingCost)
892 return false;
894 LLVM_DEBUG({
895 auto &InsnToBB = Info.getInsnToBBMap();
896 dbgs() << "Better position for saves found in func " << BF.getPrintName()
897 << " count << " << BF.getKnownExecutionCount() << "\n";
898 dbgs() << "Reg: " << CSR << "; New BB: " << InsnToBB[BestPosSave]->getName()
899 << " Freq reduction: " << (CurSavingCost - BestCount) << "\n";
902 TotalEstimatedWin = CurSavingCost - BestCount;
903 return true;
906 /// Auxiliar function used to create basic blocks for critical edges and update
907 /// the dominance frontier with these new locations
908 void ShrinkWrapping::splitFrontierCritEdges(
909 BinaryFunction *Func, SmallVector<ProgramPoint, 4> &Frontier,
910 const SmallVector<bool, 4> &IsCritEdge,
911 const SmallVector<BinaryBasicBlock *, 4> &From,
912 const SmallVector<SmallVector<BinaryBasicBlock *, 4>, 4> &To) {
913 LLVM_DEBUG(dbgs() << "splitFrontierCritEdges: Now handling func "
914 << BF.getPrintName() << "\n");
915 // For every FromBB, there might be one or more critical edges, with
916 // To[I] containing destination BBs. It's important to memorize
917 // the original size of the Frontier as we may append to it while splitting
918 // critical edges originating with blocks with multiple destinations.
919 for (size_t I = 0, IE = Frontier.size(); I < IE; ++I) {
920 if (!IsCritEdge[I])
921 continue;
922 if (To[I].empty())
923 continue;
924 BinaryBasicBlock *FromBB = From[I];
925 LLVM_DEBUG(dbgs() << " - Now handling FrontierBB " << FromBB->getName()
926 << "\n");
927 // Split edge for every DestinationBBs
928 for (size_t DI = 0, DIE = To[I].size(); DI < DIE; ++DI) {
929 BinaryBasicBlock *DestinationBB = To[I][DI];
930 LLVM_DEBUG(dbgs() << " - Dest : " << DestinationBB->getName() << "\n");
931 BinaryBasicBlock *NewBB = Func->splitEdge(FromBB, DestinationBB);
932 // Insert dummy instruction so this BB is never empty (we need this for
933 // PredictiveStackPointerTracking to work, since it annotates instructions
934 // and not BBs).
935 if (NewBB->empty()) {
936 MCInst NewInst;
937 BC.MIB->createNoop(NewInst);
938 NewBB->addInstruction(std::move(NewInst));
939 scheduleChange(&*NewBB->begin(), WorklistItem(WorklistItem::Erase, 0));
942 // Update frontier
943 ProgramPoint NewFrontierPP = ProgramPoint::getLastPointAt(*NewBB);
944 if (DI == 0) {
945 // Update frontier inplace
946 Frontier[I] = NewFrontierPP;
947 LLVM_DEBUG(dbgs() << " - Update frontier with " << NewBB->getName()
948 << '\n');
949 } else {
950 // Append new frontier to the end of the list
951 Frontier.push_back(NewFrontierPP);
952 LLVM_DEBUG(dbgs() << " - Append frontier " << NewBB->getName()
953 << '\n');
959 SmallVector<ProgramPoint, 4>
960 ShrinkWrapping::doRestorePlacement(MCInst *BestPosSave, unsigned CSR,
961 uint64_t TotalEstimatedWin) {
962 SmallVector<ProgramPoint, 4> Frontier;
963 SmallVector<bool, 4> IsCritEdge;
964 DominatorAnalysis<false> &DA = Info.getDominatorAnalysis();
966 SmallVector<BinaryBasicBlock *, 4> CritEdgesFrom;
967 SmallVector<SmallVector<BinaryBasicBlock *, 4>, 4> CritEdgesTo;
968 // In case of a critical edge, we need to create extra BBs to host restores
969 // into edges transitioning to the dominance frontier, otherwise we pull these
970 // restores to inside the dominated area.
971 Frontier = DA.getDominanceFrontierFor(*BestPosSave).takeVector();
972 LLVM_DEBUG({
973 dbgs() << "Dumping dominance frontier for ";
974 BC.printInstruction(dbgs(), *BestPosSave);
975 for (ProgramPoint &PP : Frontier)
976 if (PP.isInst())
977 BC.printInstruction(dbgs(), *PP.getInst());
978 else
979 dbgs() << PP.getBB()->getName() << "\n";
981 for (ProgramPoint &PP : Frontier) {
982 bool HasCritEdges = false;
983 if (PP.isInst() && BC.MIB->isTerminator(*PP.getInst()) &&
984 doesInstUsesCSR(*PP.getInst(), CSR)) {
985 Frontier.clear();
986 return Frontier;
988 BinaryBasicBlock *FrontierBB = Info.getParentBB(PP);
989 CritEdgesFrom.emplace_back(FrontierBB);
990 CritEdgesTo.emplace_back(0);
991 SmallVector<BinaryBasicBlock *, 4> &Dests = CritEdgesTo.back();
992 // Check for invoke instructions at the dominance frontier, which indicates
993 // the landing pad is not dominated.
994 if (PP.isInst() && BC.MIB->isInvoke(*PP.getInst())) {
995 Frontier.clear();
996 return Frontier;
998 doForAllSuccs(*FrontierBB, [&](ProgramPoint P) {
999 if (!DA.doesADominateB(*BestPosSave, P)) {
1000 Dests.emplace_back(Info.getParentBB(P));
1001 return;
1003 HasCritEdges = true;
1005 IsCritEdge.push_back(HasCritEdges);
1007 // Restores cannot be placed in empty BBs because we have a dataflow
1008 // analysis that depends on insertions happening before real instructions
1009 // (PredictiveStackPointerTracking). Detect now for empty BBs and add a
1010 // dummy nop that is scheduled to be removed later.
1011 bool InvalidateRequired = false;
1012 for (BinaryBasicBlock *BB : BF.getLayout().blocks()) {
1013 if (BB->size() != 0)
1014 continue;
1015 MCInst NewInst;
1016 BC.MIB->createNoop(NewInst);
1017 auto II = BB->addInstruction(std::move(NewInst));
1018 scheduleChange(&*II, WorklistItem(WorklistItem::Erase, 0));
1019 InvalidateRequired = true;
1021 if (std::accumulate(IsCritEdge.begin(), IsCritEdge.end(), 0)) {
1022 LLVM_DEBUG({
1023 dbgs() << "Now detected critical edges in the following frontier:\n";
1024 for (ProgramPoint &PP : Frontier) {
1025 if (PP.isBB()) {
1026 dbgs() << " BB: " << PP.getBB()->getName() << "\n";
1027 } else {
1028 dbgs() << " Inst: ";
1029 PP.getInst()->dump();
1033 splitFrontierCritEdges(&BF, Frontier, IsCritEdge, CritEdgesFrom,
1034 CritEdgesTo);
1035 InvalidateRequired = true;
1037 if (InvalidateRequired) {
1038 // BitVectors that represent all insns of the function are invalid now
1039 // since we changed BBs/Insts. Re-run steps that depend on pointers being
1040 // valid
1041 Info.invalidateAll();
1042 classifyCSRUses();
1044 return Frontier;
1047 bool ShrinkWrapping::validatePushPopsMode(unsigned CSR, MCInst *BestPosSave,
1048 int64_t SaveOffset) {
1049 if (FA.requiresAlignment(BF)) {
1050 LLVM_DEBUG({
1051 dbgs() << "Reg " << CSR
1052 << " is not using push/pops due to function "
1053 "alignment requirements.\n";
1055 return false;
1057 if (FA.hasStackArithmetic(BF)) {
1058 LLVM_DEBUG({
1059 dbgs() << "Reg " << CSR
1060 << " is not using push/pops due to function "
1061 "taking the address of a stack position.\n";
1063 return false;
1065 for (MCInst *Save : CSA.getSavesByReg(CSR)) {
1066 if (!SLM.canCollapseRegion(Save)) {
1067 LLVM_DEBUG(dbgs() << "Reg " << CSR << " cannot collapse region.\n");
1068 return false;
1071 // Abort if one of the restores for this CSR is not a POP.
1072 for (MCInst *Load : CSA.getRestoresByReg(CSR)) {
1073 if (!BC.MIB->isPop(*Load)) {
1074 LLVM_DEBUG(dbgs() << "Reg " << CSR << " has a mismatching restore.\n");
1075 return false;
1079 StackPointerTracking &SPT = Info.getStackPointerTracking();
1080 // Abort if we are inserting a push into an entry BB (offset -8) and this
1081 // func sets up a frame pointer.
1082 if (!SLM.canInsertRegion(BestPosSave) || SaveOffset == SPT.SUPERPOSITION ||
1083 SaveOffset == SPT.EMPTY || (SaveOffset == -8 && SPT.HasFramePointer)) {
1084 LLVM_DEBUG({
1085 dbgs() << "Reg " << CSR
1086 << " cannot insert region or we are "
1087 "trying to insert a push into entry bb.\n";
1089 return false;
1091 return true;
1094 SmallVector<ProgramPoint, 4> ShrinkWrapping::fixPopsPlacements(
1095 const SmallVector<ProgramPoint, 4> &RestorePoints, int64_t SaveOffset,
1096 unsigned CSR) {
1097 SmallVector<ProgramPoint, 4> FixedRestorePoints = RestorePoints;
1098 // Moving pop locations to the correct sp offset
1099 ReachingInsns<true> &RI = Info.getReachingInsnsBackwards();
1100 StackPointerTracking &SPT = Info.getStackPointerTracking();
1101 for (ProgramPoint &PP : FixedRestorePoints) {
1102 BinaryBasicBlock *BB = Info.getParentBB(PP);
1103 bool Found = false;
1104 if (SPT.getStateAt(ProgramPoint::getLastPointAt(*BB))->first ==
1105 SaveOffset) {
1106 BitVector BV = *RI.getStateAt(ProgramPoint::getLastPointAt(*BB));
1107 BV &= UsesByReg[CSR];
1108 if (!BV.any()) {
1109 Found = true;
1110 PP = BB;
1111 continue;
1114 for (MCInst &Inst : llvm::reverse(*BB)) {
1115 if (SPT.getStateBefore(Inst)->first == SaveOffset) {
1116 BitVector BV = *RI.getStateAt(Inst);
1117 BV &= UsesByReg[CSR];
1118 if (!BV.any()) {
1119 Found = true;
1120 PP = &Inst;
1121 break;
1125 if (!Found) {
1126 LLVM_DEBUG({
1127 dbgs() << "Could not find restore insertion point for " << CSR
1128 << ", falling back to load/store mode\n";
1130 FixedRestorePoints.clear();
1131 return FixedRestorePoints;
1134 return FixedRestorePoints;
1137 void ShrinkWrapping::scheduleOldSaveRestoresRemoval(unsigned CSR,
1138 bool UsePushPops) {
1140 for (BinaryBasicBlock *BB : BF.getLayout().blocks()) {
1141 std::vector<MCInst *> CFIs;
1142 for (MCInst &Inst : llvm::reverse(*BB)) {
1143 if (BC.MIB->isCFI(Inst)) {
1144 // Delete all offset CFIs related to this CSR
1145 if (SLM.getOffsetCFIReg(Inst) == CSR) {
1146 HasDeletedOffsetCFIs[CSR] = true;
1147 scheduleChange(&Inst, WorklistItem(WorklistItem::Erase, CSR));
1148 continue;
1150 CFIs.push_back(&Inst);
1151 continue;
1154 uint16_t SavedReg = CSA.getSavedReg(Inst);
1155 uint16_t RestoredReg = CSA.getRestoredReg(Inst);
1156 if (SavedReg != CSR && RestoredReg != CSR) {
1157 CFIs.clear();
1158 continue;
1161 scheduleChange(&Inst, WorklistItem(UsePushPops
1162 ? WorklistItem::Erase
1163 : WorklistItem::ChangeToAdjustment,
1164 CSR));
1166 // Delete associated CFIs
1167 const bool RecordDeletedPushCFIs =
1168 SavedReg == CSR && DeletedPushCFIs[CSR].empty();
1169 const bool RecordDeletedPopCFIs =
1170 RestoredReg == CSR && DeletedPopCFIs[CSR].empty();
1171 for (MCInst *CFI : CFIs) {
1172 const MCCFIInstruction *MCCFI = BF.getCFIFor(*CFI);
1173 // Do not touch these...
1174 if (MCCFI->getOperation() == MCCFIInstruction::OpRestoreState ||
1175 MCCFI->getOperation() == MCCFIInstruction::OpRememberState)
1176 continue;
1177 scheduleChange(CFI, WorklistItem(WorklistItem::Erase, CSR));
1178 if (RecordDeletedPushCFIs) {
1179 // Do not record this to be replayed later because we are going to
1180 // rebuild it.
1181 if (MCCFI->getOperation() == MCCFIInstruction::OpDefCfaOffset)
1182 continue;
1183 DeletedPushCFIs[CSR].push_back(CFI->getOperand(0).getImm());
1185 if (RecordDeletedPopCFIs) {
1186 if (MCCFI->getOperation() == MCCFIInstruction::OpDefCfaOffset)
1187 continue;
1188 DeletedPopCFIs[CSR].push_back(CFI->getOperand(0).getImm());
1191 CFIs.clear();
1196 bool ShrinkWrapping::doesInstUsesCSR(const MCInst &Inst, uint16_t CSR) {
1197 if (BC.MIB->isCFI(Inst) || CSA.getSavedReg(Inst) == CSR ||
1198 CSA.getRestoredReg(Inst) == CSR)
1199 return false;
1200 BitVector BV = BitVector(BC.MRI->getNumRegs(), false);
1201 BC.MIB->getTouchedRegs(Inst, BV);
1202 return BV[CSR];
1205 void ShrinkWrapping::scheduleSaveRestoreInsertions(
1206 unsigned CSR, MCInst *BestPosSave,
1207 SmallVector<ProgramPoint, 4> &RestorePoints, bool UsePushPops) {
1208 auto &InsnToBB = Info.getInsnToBBMap();
1209 const FrameIndexEntry *FIESave = CSA.SaveFIEByReg[CSR];
1210 const FrameIndexEntry *FIELoad = CSA.LoadFIEByReg[CSR];
1211 assert(FIESave && FIELoad && "Invalid CSR");
1213 LLVM_DEBUG({
1214 dbgs() << "Scheduling save insertion at: ";
1215 BestPosSave->dump();
1218 scheduleChange(BestPosSave,
1219 UsePushPops ? WorklistItem::InsertPushOrPop
1220 : WorklistItem::InsertLoadOrStore,
1221 *FIESave, CSR);
1223 for (ProgramPoint &PP : RestorePoints) {
1224 BinaryBasicBlock *FrontierBB = Info.getParentBB(PP);
1225 LLVM_DEBUG({
1226 dbgs() << "Scheduling restore insertion at: ";
1227 if (PP.isInst())
1228 PP.getInst()->dump();
1229 else
1230 dbgs() << PP.getBB()->getName() << "\n";
1232 MCInst *Term =
1233 FrontierBB->getTerminatorBefore(PP.isInst() ? PP.getInst() : nullptr);
1234 if (Term)
1235 PP = Term;
1236 bool PrecededByPrefix = false;
1237 if (PP.isInst()) {
1238 auto Iter = FrontierBB->findInstruction(PP.getInst());
1239 if (Iter != FrontierBB->end() && Iter != FrontierBB->begin()) {
1240 --Iter;
1241 PrecededByPrefix = BC.MIB->isPrefix(*Iter);
1244 if (PP.isInst() &&
1245 (doesInstUsesCSR(*PP.getInst(), CSR) || PrecededByPrefix)) {
1246 assert(!InsnToBB[PP.getInst()]->hasTerminatorAfter(PP.getInst()) &&
1247 "cannot move to end of bb");
1248 scheduleChange(InsnToBB[PP.getInst()],
1249 UsePushPops ? WorklistItem::InsertPushOrPop
1250 : WorklistItem::InsertLoadOrStore,
1251 *FIELoad, CSR);
1252 continue;
1254 scheduleChange(PP,
1255 UsePushPops ? WorklistItem::InsertPushOrPop
1256 : WorklistItem::InsertLoadOrStore,
1257 *FIELoad, CSR);
1261 void ShrinkWrapping::moveSaveRestores() {
1262 bool DisablePushPopMode = false;
1263 bool UsedPushPopMode = false;
1264 // Keeps info about successfully moved regs: reg index, save position and
1265 // save size
1266 std::vector<std::tuple<unsigned, MCInst *, size_t>> MovedRegs;
1267 uint64_t TotalEstimatedWin = 0;
1269 computeDomOrder();
1270 for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) {
1271 MCInst *BestPosSave = nullptr;
1272 uint64_t EstimatedWin = 0;
1273 SmallVector<ProgramPoint, 4> RestorePoints;
1274 while (RestorePoints.empty() &&
1275 isBestSavePosCold(I, BestPosSave, EstimatedWin)) {
1276 RestorePoints = doRestorePlacement(BestPosSave, I, EstimatedWin);
1277 if (RestorePoints.empty()) {
1278 LLVM_DEBUG({
1279 dbgs() << "Dropping opportunity because restore placement failed"
1280 " -- total est. freq reduc: "
1281 << EstimatedWin << ". Will try "
1282 << (BestSaveCount[I].size() - 1) << " more times.\n";
1284 BestSaveCount[I].pop_back();
1285 BestSavePos[I].pop_back();
1286 computeDomOrder();
1289 if (RestorePoints.empty()) {
1290 SpillsFailedDynamicCount += EstimatedWin;
1291 continue;
1294 const FrameIndexEntry *FIESave = CSA.SaveFIEByReg[I];
1295 const FrameIndexEntry *FIELoad = CSA.LoadFIEByReg[I];
1296 (void)FIELoad;
1297 assert(FIESave && FIELoad);
1298 StackPointerTracking &SPT = Info.getStackPointerTracking();
1299 const std::pair<int, int> SPFP = *SPT.getStateBefore(*BestPosSave);
1300 int SaveOffset = SPFP.first;
1301 uint8_t SaveSize = FIESave->Size;
1303 // If we don't know stack state at this point, bail
1304 if ((SPFP.first == SPT.SUPERPOSITION || SPFP.first == SPT.EMPTY) &&
1305 (SPFP.second == SPT.SUPERPOSITION || SPFP.second == SPT.EMPTY)) {
1306 SpillsFailedDynamicCount += EstimatedWin;
1307 continue;
1310 // Operation mode: if true, will insert push/pops instead of loads/restores
1311 bool UsePushPops = validatePushPopsMode(I, BestPosSave, SaveOffset);
1313 if (UsePushPops) {
1314 SmallVector<ProgramPoint, 4> FixedRestorePoints =
1315 fixPopsPlacements(RestorePoints, SaveOffset, I);
1316 if (FixedRestorePoints.empty())
1317 UsePushPops = false;
1318 else
1319 RestorePoints = FixedRestorePoints;
1322 // Disable push-pop mode for all CSRs in this function
1323 if (!UsePushPops)
1324 DisablePushPopMode = true;
1325 else
1326 UsedPushPopMode = true;
1328 scheduleOldSaveRestoresRemoval(I, UsePushPops);
1329 scheduleSaveRestoreInsertions(I, BestPosSave, RestorePoints, UsePushPops);
1330 MovedRegs.emplace_back(std::make_tuple(I, BestPosSave, SaveSize));
1331 TotalEstimatedWin += EstimatedWin;
1334 // Revert push-pop mode if it failed for a single CSR
1335 if (DisablePushPopMode && UsedPushPopMode) {
1336 UsedPushPopMode = false;
1337 for (BinaryBasicBlock &BB : BF) {
1338 auto WRI = Todo.find(&BB);
1339 if (WRI != Todo.end()) {
1340 std::vector<WorklistItem> &TodoList = WRI->second;
1341 for (WorklistItem &Item : TodoList)
1342 if (Item.Action == WorklistItem::InsertPushOrPop)
1343 Item.Action = WorklistItem::InsertLoadOrStore;
1345 for (MCInst &Inst : llvm::reverse(BB)) {
1346 auto TodoList = BC.MIB->tryGetAnnotationAs<std::vector<WorklistItem>>(
1347 Inst, getAnnotationIndex());
1348 if (!TodoList)
1349 continue;
1350 bool isCFI = BC.MIB->isCFI(Inst);
1351 for (WorklistItem &Item : *TodoList) {
1352 if (Item.Action == WorklistItem::InsertPushOrPop)
1353 Item.Action = WorklistItem::InsertLoadOrStore;
1354 if (!isCFI && Item.Action == WorklistItem::Erase)
1355 Item.Action = WorklistItem::ChangeToAdjustment;
1360 SpillsMovedDynamicCount += TotalEstimatedWin;
1362 // Update statistics
1363 if (!UsedPushPopMode) {
1364 SpillsMovedRegularMode += MovedRegs.size();
1365 return;
1368 // Schedule modifications to stack-accessing instructions via
1369 // StackLayoutModifier.
1370 SpillsMovedPushPopMode += MovedRegs.size();
1371 for (std::tuple<unsigned, MCInst *, size_t> &I : MovedRegs) {
1372 unsigned RegNdx;
1373 MCInst *SavePos;
1374 size_t SaveSize;
1375 std::tie(RegNdx, SavePos, SaveSize) = I;
1376 for (MCInst *Save : CSA.getSavesByReg(RegNdx))
1377 SLM.collapseRegion(Save);
1378 SLM.insertRegion(SavePos, SaveSize);
1382 namespace {
1383 /// Helper function to identify whether two basic blocks created by splitting
1384 /// a critical edge have the same contents.
1385 bool isIdenticalSplitEdgeBB(const BinaryContext &BC, const BinaryBasicBlock &A,
1386 const BinaryBasicBlock &B) {
1387 if (A.succ_size() != B.succ_size())
1388 return false;
1389 if (A.succ_size() != 1)
1390 return false;
1392 if (*A.succ_begin() != *B.succ_begin())
1393 return false;
1395 if (A.size() != B.size())
1396 return false;
1398 // Compare instructions
1399 auto I = A.begin(), E = A.end();
1400 auto OtherI = B.begin(), OtherE = B.end();
1401 while (I != E && OtherI != OtherE) {
1402 if (I->getOpcode() != OtherI->getOpcode())
1403 return false;
1404 if (!BC.MIB->equals(*I, *OtherI, [](const MCSymbol *A, const MCSymbol *B) {
1405 return true;
1407 return false;
1408 ++I;
1409 ++OtherI;
1411 return true;
1413 } // namespace
1415 bool ShrinkWrapping::foldIdenticalSplitEdges() {
1416 bool Changed = false;
1417 for (auto Iter = BF.begin(); Iter != BF.end(); ++Iter) {
1418 BinaryBasicBlock &BB = *Iter;
1419 if (!BB.getName().startswith(".LSplitEdge"))
1420 continue;
1421 for (BinaryBasicBlock &RBB : llvm::reverse(BF)) {
1422 if (&RBB == &BB)
1423 break;
1424 if (!RBB.getName().startswith(".LSplitEdge") || !RBB.isValid() ||
1425 !isIdenticalSplitEdgeBB(BC, *Iter, RBB))
1426 continue;
1427 assert(RBB.pred_size() == 1 && "Invalid split edge BB");
1428 BinaryBasicBlock *Pred = *RBB.pred_begin();
1429 uint64_t OrigCount = Pred->branch_info_begin()->Count;
1430 uint64_t OrigMispreds = Pred->branch_info_begin()->MispredictedCount;
1431 BF.replaceJumpTableEntryIn(Pred, &RBB, &BB);
1432 Pred->replaceSuccessor(&RBB, &BB, OrigCount, OrigMispreds);
1433 Changed = true;
1434 // Remove the block from CFG
1435 RBB.markValid(false);
1439 return Changed;
1442 namespace {
1444 // A special StackPointerTracking that compensates for our future plans
1445 // in removing/adding insn.
1446 class PredictiveStackPointerTracking
1447 : public StackPointerTrackingBase<PredictiveStackPointerTracking> {
1448 friend class DataflowAnalysis<PredictiveStackPointerTracking,
1449 std::pair<int, int>>;
1450 decltype(ShrinkWrapping::Todo) &TodoMap;
1451 DataflowInfoManager &Info;
1453 std::optional<unsigned> AnnotationIndex;
1455 protected:
1456 void compNextAux(const MCInst &Point,
1457 const std::vector<ShrinkWrapping::WorklistItem> &TodoItems,
1458 std::pair<int, int> &Res) {
1459 for (const ShrinkWrapping::WorklistItem &Item : TodoItems) {
1460 if (Item.Action == ShrinkWrapping::WorklistItem::Erase &&
1461 BC.MIB->isPush(Point)) {
1462 Res.first += BC.MIB->getPushSize(Point);
1463 continue;
1465 if (Item.Action == ShrinkWrapping::WorklistItem::Erase &&
1466 BC.MIB->isPop(Point)) {
1467 Res.first -= BC.MIB->getPopSize(Point);
1468 continue;
1470 if (Item.Action == ShrinkWrapping::WorklistItem::InsertPushOrPop &&
1471 Item.FIEToInsert.IsStore) {
1472 Res.first -= Item.FIEToInsert.Size;
1473 continue;
1475 if (Item.Action == ShrinkWrapping::WorklistItem::InsertPushOrPop &&
1476 Item.FIEToInsert.IsLoad) {
1477 Res.first += Item.FIEToInsert.Size;
1478 continue;
1483 std::pair<int, int> computeNext(const MCInst &Point,
1484 const std::pair<int, int> &Cur) {
1485 std::pair<int, int> Res =
1486 StackPointerTrackingBase<PredictiveStackPointerTracking>::computeNext(
1487 Point, Cur);
1488 if (Res.first == StackPointerTracking::SUPERPOSITION ||
1489 Res.first == StackPointerTracking::EMPTY)
1490 return Res;
1491 auto TodoItems =
1492 BC.MIB->tryGetAnnotationAs<std::vector<ShrinkWrapping::WorklistItem>>(
1493 Point, ShrinkWrapping::getAnnotationName());
1494 if (TodoItems)
1495 compNextAux(Point, *TodoItems, Res);
1496 auto &InsnToBBMap = Info.getInsnToBBMap();
1497 if (&*InsnToBBMap[&Point]->rbegin() != &Point)
1498 return Res;
1499 auto WRI = TodoMap.find(InsnToBBMap[&Point]);
1500 if (WRI == TodoMap.end())
1501 return Res;
1502 compNextAux(Point, WRI->second, Res);
1503 return Res;
1506 StringRef getAnnotationName() const {
1507 return StringRef("PredictiveStackPointerTracking");
1510 public:
1511 PredictiveStackPointerTracking(BinaryFunction &BF,
1512 decltype(ShrinkWrapping::Todo) &TodoMap,
1513 DataflowInfoManager &Info,
1514 MCPlusBuilder::AllocatorIdTy AllocatorId = 0)
1515 : StackPointerTrackingBase<PredictiveStackPointerTracking>(BF,
1516 AllocatorId),
1517 TodoMap(TodoMap), Info(Info) {}
1519 void run() {
1520 StackPointerTrackingBase<PredictiveStackPointerTracking>::run();
1524 } // end anonymous namespace
1526 void ShrinkWrapping::insertUpdatedCFI(unsigned CSR, int SPValPush,
1527 int SPValPop) {
1528 MCInst *SavePoint = nullptr;
1529 for (BinaryBasicBlock &BB : BF) {
1530 for (MCInst &Inst : llvm::reverse(BB)) {
1531 int32_t SrcImm = 0;
1532 MCPhysReg Reg = 0;
1533 MCPhysReg StackPtrReg = 0;
1534 int64_t StackOffset = 0;
1535 bool IsIndexed = false;
1536 bool IsLoad = false;
1537 bool IsStore = false;
1538 bool IsSimple = false;
1539 bool IsStoreFromReg = false;
1540 uint8_t Size = 0;
1541 if (!BC.MIB->isStackAccess(Inst, IsLoad, IsStore, IsStoreFromReg, Reg,
1542 SrcImm, StackPtrReg, StackOffset, Size,
1543 IsSimple, IsIndexed))
1544 continue;
1545 if (Reg != CSR || !IsStore || !IsSimple)
1546 continue;
1547 SavePoint = &Inst;
1548 break;
1550 if (SavePoint)
1551 break;
1553 assert(SavePoint);
1554 LLVM_DEBUG({
1555 dbgs() << "Now using as save point for reg " << CSR << " :";
1556 SavePoint->dump();
1558 bool PrevAffectedZone = false;
1559 BinaryBasicBlock *PrevBB = nullptr;
1560 DominatorAnalysis<false> &DA = Info.getDominatorAnalysis();
1561 for (BinaryBasicBlock *BB : BF.getLayout().blocks()) {
1562 if (BB->size() == 0)
1563 continue;
1564 const bool InAffectedZoneAtEnd = DA.count(*BB->rbegin(), *SavePoint);
1565 const bool InAffectedZoneAtBegin =
1566 (*DA.getStateBefore(*BB->begin()))[DA.ExprToIdx[SavePoint]];
1567 bool InAffectedZone = InAffectedZoneAtBegin;
1568 for (auto InstIter = BB->begin(); InstIter != BB->end(); ++InstIter) {
1569 const bool CurZone = DA.count(*InstIter, *SavePoint);
1570 if (InAffectedZone != CurZone) {
1571 auto InsertionIter = InstIter;
1572 ++InsertionIter;
1573 InAffectedZone = CurZone;
1574 if (InAffectedZone)
1575 InstIter = insertCFIsForPushOrPop(*BB, InsertionIter, CSR, true, 0,
1576 SPValPop);
1577 else
1578 InstIter = insertCFIsForPushOrPop(*BB, InsertionIter, CSR, false, 0,
1579 SPValPush);
1580 --InstIter;
1583 // Are we at the first basic block or hot-cold split point?
1584 if (!PrevBB || (BF.isSplit() && BB->isCold() != PrevBB->isCold())) {
1585 if (InAffectedZoneAtBegin)
1586 insertCFIsForPushOrPop(*BB, BB->begin(), CSR, true, 0, SPValPush);
1587 } else if (InAffectedZoneAtBegin != PrevAffectedZone) {
1588 if (InAffectedZoneAtBegin)
1589 insertCFIsForPushOrPop(*PrevBB, PrevBB->end(), CSR, true, 0, SPValPush);
1590 else
1591 insertCFIsForPushOrPop(*PrevBB, PrevBB->end(), CSR, false, 0, SPValPop);
1593 PrevAffectedZone = InAffectedZoneAtEnd;
1594 PrevBB = BB;
1598 void ShrinkWrapping::rebuildCFIForSP() {
1599 for (BinaryBasicBlock &BB : BF) {
1600 for (MCInst &Inst : BB) {
1601 if (!BC.MIB->isCFI(Inst))
1602 continue;
1603 const MCCFIInstruction *CFI = BF.getCFIFor(Inst);
1604 if (CFI->getOperation() == MCCFIInstruction::OpDefCfaOffset)
1605 BC.MIB->addAnnotation(Inst, "DeleteMe", 0U, AllocatorId);
1609 int PrevSPVal = -8;
1610 BinaryBasicBlock *PrevBB = nullptr;
1611 StackPointerTracking &SPT = Info.getStackPointerTracking();
1612 for (BinaryBasicBlock *BB : BF.getLayout().blocks()) {
1613 if (BB->size() == 0)
1614 continue;
1615 const int SPValAtEnd = SPT.getStateAt(*BB->rbegin())->first;
1616 const int SPValAtBegin = SPT.getStateBefore(*BB->begin())->first;
1617 int SPVal = SPValAtBegin;
1618 for (auto Iter = BB->begin(); Iter != BB->end(); ++Iter) {
1619 const int CurVal = SPT.getStateAt(*Iter)->first;
1620 if (SPVal != CurVal) {
1621 auto InsertionIter = Iter;
1622 ++InsertionIter;
1623 Iter = BF.addCFIInstruction(
1624 BB, InsertionIter,
1625 MCCFIInstruction::cfiDefCfaOffset(nullptr, -CurVal));
1626 SPVal = CurVal;
1629 if (BF.isSplit() && PrevBB && BB->isCold() != PrevBB->isCold())
1630 BF.addCFIInstruction(
1631 BB, BB->begin(),
1632 MCCFIInstruction::cfiDefCfaOffset(nullptr, -SPValAtBegin));
1633 else if (SPValAtBegin != PrevSPVal)
1634 BF.addCFIInstruction(
1635 PrevBB, PrevBB->end(),
1636 MCCFIInstruction::cfiDefCfaOffset(nullptr, -SPValAtBegin));
1637 PrevSPVal = SPValAtEnd;
1638 PrevBB = BB;
1641 for (BinaryBasicBlock &BB : BF)
1642 for (auto I = BB.begin(); I != BB.end();)
1643 if (BC.MIB->hasAnnotation(*I, "DeleteMe"))
1644 I = BB.eraseInstruction(I);
1645 else
1646 ++I;
1649 MCInst ShrinkWrapping::createStackAccess(int SPVal, int FPVal,
1650 const FrameIndexEntry &FIE,
1651 bool CreatePushOrPop) {
1652 MCInst NewInst;
1653 if (SPVal != StackPointerTracking::SUPERPOSITION &&
1654 SPVal != StackPointerTracking::EMPTY) {
1655 if (FIE.IsLoad) {
1656 if (!BC.MIB->createRestoreFromStack(NewInst, BC.MIB->getStackPointer(),
1657 FIE.StackOffset - SPVal, FIE.RegOrImm,
1658 FIE.Size)) {
1659 errs() << "createRestoreFromStack: not supported on this platform\n";
1660 abort();
1662 } else {
1663 if (!BC.MIB->createSaveToStack(NewInst, BC.MIB->getStackPointer(),
1664 FIE.StackOffset - SPVal, FIE.RegOrImm,
1665 FIE.Size)) {
1666 errs() << "createSaveToStack: not supported on this platform\n";
1667 abort();
1670 if (CreatePushOrPop)
1671 BC.MIB->changeToPushOrPop(NewInst);
1672 return NewInst;
1674 assert(FPVal != StackPointerTracking::SUPERPOSITION &&
1675 FPVal != StackPointerTracking::EMPTY);
1677 if (FIE.IsLoad) {
1678 if (!BC.MIB->createRestoreFromStack(NewInst, BC.MIB->getFramePointer(),
1679 FIE.StackOffset - FPVal, FIE.RegOrImm,
1680 FIE.Size)) {
1681 errs() << "createRestoreFromStack: not supported on this platform\n";
1682 abort();
1684 } else {
1685 if (!BC.MIB->createSaveToStack(NewInst, BC.MIB->getFramePointer(),
1686 FIE.StackOffset - FPVal, FIE.RegOrImm,
1687 FIE.Size)) {
1688 errs() << "createSaveToStack: not supported on this platform\n";
1689 abort();
1692 return NewInst;
1695 void ShrinkWrapping::updateCFIInstOffset(MCInst &Inst, int64_t NewOffset) {
1696 const MCCFIInstruction *CFI = BF.getCFIFor(Inst);
1697 if (UpdatedCFIs.count(CFI))
1698 return;
1700 switch (CFI->getOperation()) {
1701 case MCCFIInstruction::OpDefCfa:
1702 case MCCFIInstruction::OpDefCfaRegister:
1703 case MCCFIInstruction::OpDefCfaOffset:
1704 CFI = BF.mutateCFIOffsetFor(Inst, -NewOffset);
1705 break;
1706 case MCCFIInstruction::OpOffset:
1707 default:
1708 break;
1711 UpdatedCFIs.insert(CFI);
1714 BBIterTy ShrinkWrapping::insertCFIsForPushOrPop(BinaryBasicBlock &BB,
1715 BBIterTy Pos, unsigned Reg,
1716 bool isPush, int Sz,
1717 int64_t NewOffset) {
1718 if (isPush) {
1719 for (uint32_t Idx : DeletedPushCFIs[Reg]) {
1720 Pos = BF.addCFIPseudo(&BB, Pos, Idx);
1721 updateCFIInstOffset(*Pos++, NewOffset);
1723 if (HasDeletedOffsetCFIs[Reg]) {
1724 Pos = BF.addCFIInstruction(
1725 &BB, Pos,
1726 MCCFIInstruction::createOffset(
1727 nullptr, BC.MRI->getDwarfRegNum(Reg, false), NewOffset));
1728 ++Pos;
1730 } else {
1731 for (uint32_t Idx : DeletedPopCFIs[Reg]) {
1732 Pos = BF.addCFIPseudo(&BB, Pos, Idx);
1733 updateCFIInstOffset(*Pos++, NewOffset);
1735 if (HasDeletedOffsetCFIs[Reg]) {
1736 Pos = BF.addCFIInstruction(
1737 &BB, Pos,
1738 MCCFIInstruction::createSameValue(
1739 nullptr, BC.MRI->getDwarfRegNum(Reg, false)));
1740 ++Pos;
1743 return Pos;
1746 BBIterTy ShrinkWrapping::processInsertion(BBIterTy InsertionPoint,
1747 BinaryBasicBlock *CurBB,
1748 const WorklistItem &Item,
1749 int64_t SPVal, int64_t FPVal) {
1750 // Trigger CFI reconstruction for this CSR if necessary - writing to
1751 // PushOffsetByReg/PopOffsetByReg *will* trigger CFI update
1752 if ((Item.FIEToInsert.IsStore &&
1753 !DeletedPushCFIs[Item.AffectedReg].empty()) ||
1754 (Item.FIEToInsert.IsLoad && !DeletedPopCFIs[Item.AffectedReg].empty()) ||
1755 HasDeletedOffsetCFIs[Item.AffectedReg]) {
1756 if (Item.Action == WorklistItem::InsertPushOrPop) {
1757 if (Item.FIEToInsert.IsStore)
1758 PushOffsetByReg[Item.AffectedReg] = SPVal - Item.FIEToInsert.Size;
1759 else
1760 PopOffsetByReg[Item.AffectedReg] = SPVal;
1761 } else {
1762 if (Item.FIEToInsert.IsStore)
1763 PushOffsetByReg[Item.AffectedReg] = Item.FIEToInsert.StackOffset;
1764 else
1765 PopOffsetByReg[Item.AffectedReg] = Item.FIEToInsert.StackOffset;
1769 LLVM_DEBUG({
1770 dbgs() << "Creating stack access with SPVal = " << SPVal
1771 << "; stack offset = " << Item.FIEToInsert.StackOffset
1772 << " Is push = " << (Item.Action == WorklistItem::InsertPushOrPop)
1773 << "\n";
1775 MCInst NewInst =
1776 createStackAccess(SPVal, FPVal, Item.FIEToInsert,
1777 Item.Action == WorklistItem::InsertPushOrPop);
1778 if (InsertionPoint != CurBB->end()) {
1779 LLVM_DEBUG({
1780 dbgs() << "Adding before Inst: ";
1781 InsertionPoint->dump();
1782 dbgs() << "the following inst: ";
1783 NewInst.dump();
1785 BBIterTy Iter =
1786 CurBB->insertInstruction(InsertionPoint, std::move(NewInst));
1787 return ++Iter;
1789 CurBB->addInstruction(std::move(NewInst));
1790 LLVM_DEBUG(dbgs() << "Adding to BB!\n");
1791 return CurBB->end();
1794 BBIterTy ShrinkWrapping::processInsertionsList(
1795 BBIterTy InsertionPoint, BinaryBasicBlock *CurBB,
1796 std::vector<WorklistItem> &TodoList, int64_t SPVal, int64_t FPVal) {
1797 bool HasInsertions = llvm::any_of(TodoList, [&](WorklistItem &Item) {
1798 return Item.Action == WorklistItem::InsertLoadOrStore ||
1799 Item.Action == WorklistItem::InsertPushOrPop;
1802 if (!HasInsertions)
1803 return InsertionPoint;
1805 assert(((SPVal != StackPointerTracking::SUPERPOSITION &&
1806 SPVal != StackPointerTracking::EMPTY) ||
1807 (FPVal != StackPointerTracking::SUPERPOSITION &&
1808 FPVal != StackPointerTracking::EMPTY)) &&
1809 "Cannot insert if we have no idea of the stack state here");
1811 // Revert the effect of PSPT for this location, we want SP Value before
1812 // insertions
1813 if (InsertionPoint == CurBB->end()) {
1814 for (WorklistItem &Item : TodoList) {
1815 if (Item.Action != WorklistItem::InsertPushOrPop)
1816 continue;
1817 if (Item.FIEToInsert.IsStore)
1818 SPVal += Item.FIEToInsert.Size;
1819 if (Item.FIEToInsert.IsLoad)
1820 SPVal -= Item.FIEToInsert.Size;
1824 // Reorder POPs to obey the correct dominance relation between them
1825 llvm::stable_sort(TodoList, [&](const WorklistItem &A,
1826 const WorklistItem &B) {
1827 if ((A.Action != WorklistItem::InsertPushOrPop || !A.FIEToInsert.IsLoad) &&
1828 (B.Action != WorklistItem::InsertPushOrPop || !B.FIEToInsert.IsLoad))
1829 return false;
1830 if ((A.Action != WorklistItem::InsertPushOrPop || !A.FIEToInsert.IsLoad))
1831 return true;
1832 if ((B.Action != WorklistItem::InsertPushOrPop || !B.FIEToInsert.IsLoad))
1833 return false;
1834 return DomOrder[B.AffectedReg] < DomOrder[A.AffectedReg];
1837 // Process insertions
1838 for (WorklistItem &Item : TodoList) {
1839 if (Item.Action == WorklistItem::Erase ||
1840 Item.Action == WorklistItem::ChangeToAdjustment)
1841 continue;
1843 InsertionPoint =
1844 processInsertion(InsertionPoint, CurBB, Item, SPVal, FPVal);
1845 if (Item.Action == WorklistItem::InsertPushOrPop &&
1846 Item.FIEToInsert.IsStore)
1847 SPVal -= Item.FIEToInsert.Size;
1848 if (Item.Action == WorklistItem::InsertPushOrPop &&
1849 Item.FIEToInsert.IsLoad)
1850 SPVal += Item.FIEToInsert.Size;
1852 return InsertionPoint;
1855 bool ShrinkWrapping::processInsertions() {
1856 PredictiveStackPointerTracking PSPT(BF, Todo, Info, AllocatorId);
1857 PSPT.run();
1859 bool Changes = false;
1860 for (BinaryBasicBlock &BB : BF) {
1861 // Process insertions before some inst.
1862 for (auto I = BB.begin(); I != BB.end(); ++I) {
1863 MCInst &Inst = *I;
1864 auto TodoList = BC.MIB->tryGetAnnotationAs<std::vector<WorklistItem>>(
1865 Inst, getAnnotationIndex());
1866 if (!TodoList)
1867 continue;
1868 Changes = true;
1869 std::vector<WorklistItem> List = *TodoList;
1870 LLVM_DEBUG({
1871 dbgs() << "Now processing insertions in " << BB.getName()
1872 << " before inst: ";
1873 Inst.dump();
1875 auto Iter = I;
1876 std::pair<int, int> SPTState =
1877 *PSPT.getStateAt(Iter == BB.begin() ? (ProgramPoint)&BB : &*(--Iter));
1878 I = processInsertionsList(I, &BB, List, SPTState.first, SPTState.second);
1880 // Process insertions at the end of bb
1881 auto WRI = Todo.find(&BB);
1882 if (WRI != Todo.end()) {
1883 std::pair<int, int> SPTState = *PSPT.getStateAt(*BB.rbegin());
1884 processInsertionsList(BB.end(), &BB, WRI->second, SPTState.first,
1885 SPTState.second);
1886 Changes = true;
1889 return Changes;
1892 void ShrinkWrapping::processDeletions() {
1893 LivenessAnalysis &LA = Info.getLivenessAnalysis();
1894 for (BinaryBasicBlock &BB : BF) {
1895 for (auto II = BB.begin(); II != BB.end(); ++II) {
1896 MCInst &Inst = *II;
1897 auto TodoList = BC.MIB->tryGetAnnotationAs<std::vector<WorklistItem>>(
1898 Inst, getAnnotationIndex());
1899 if (!TodoList)
1900 continue;
1901 // Process all deletions
1902 for (WorklistItem &Item : *TodoList) {
1903 if (Item.Action != WorklistItem::Erase &&
1904 Item.Action != WorklistItem::ChangeToAdjustment)
1905 continue;
1907 if (Item.Action == WorklistItem::ChangeToAdjustment) {
1908 // Is flag reg alive across this func?
1909 bool DontClobberFlags = LA.isAlive(&Inst, BC.MIB->getFlagsReg());
1910 if (int Sz = BC.MIB->getPushSize(Inst)) {
1911 BC.MIB->createStackPointerIncrement(Inst, Sz, DontClobberFlags);
1912 continue;
1914 if (int Sz = BC.MIB->getPopSize(Inst)) {
1915 BC.MIB->createStackPointerDecrement(Inst, Sz, DontClobberFlags);
1916 continue;
1920 LLVM_DEBUG({
1921 dbgs() << "Erasing: ";
1922 BC.printInstruction(dbgs(), Inst);
1924 II = std::prev(BB.eraseInstruction(II));
1925 break;
1931 void ShrinkWrapping::rebuildCFI() {
1932 const bool FP = Info.getStackPointerTracking().HasFramePointer;
1933 Info.invalidateAll();
1934 if (!FP) {
1935 rebuildCFIForSP();
1936 Info.invalidateAll();
1938 for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) {
1939 if (PushOffsetByReg[I] == 0 || PopOffsetByReg[I] == 0)
1940 continue;
1941 const int64_t SPValPush = PushOffsetByReg[I];
1942 const int64_t SPValPop = PopOffsetByReg[I];
1943 insertUpdatedCFI(I, SPValPush, SPValPop);
1944 Info.invalidateAll();
1948 bool ShrinkWrapping::perform(bool HotOnly) {
1949 HasDeletedOffsetCFIs = BitVector(BC.MRI->getNumRegs(), false);
1950 PushOffsetByReg = std::vector<int64_t>(BC.MRI->getNumRegs(), 0LL);
1951 PopOffsetByReg = std::vector<int64_t>(BC.MRI->getNumRegs(), 0LL);
1953 // Update pass statistics
1954 uint64_t TotalInstrs = 0ULL;
1955 uint64_t TotalStoreInstrs = 0ULL;
1956 for (BinaryBasicBlock *BB : BF.getLayout().blocks()) {
1957 uint64_t BBExecCount = BB->getExecutionCount();
1958 if (!BBExecCount || BBExecCount == BinaryBasicBlock::COUNT_NO_PROFILE)
1959 continue;
1960 for (const auto &Instr : *BB) {
1961 if (BC.MIB->isPseudo(Instr))
1962 continue;
1963 if (BC.MIB->mayStore(Instr))
1964 TotalStoreInstrs += BBExecCount;
1965 TotalInstrs += BBExecCount;
1968 InstrDynamicCount += TotalInstrs;
1969 StoreDynamicCount += TotalStoreInstrs;
1971 if (!FA.hasFrameInfo(BF))
1972 return false;
1974 if (HotOnly && (BF.getKnownExecutionCount() < BC.getHotThreshold()))
1975 return false;
1977 if (opts::EqualizeBBCounts)
1978 equalizeBBCounts(Info, BF);
1980 if (BF.checkForAmbiguousJumpTables()) {
1981 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ambiguous JTs in " << BF.getPrintName()
1982 << ".\n");
1983 // We could call disambiguateJumpTables here, but it is probably not worth
1984 // the cost (of duplicating potentially large jump tables that could regress
1985 // dcache misses). Moreover, ambiguous JTs are rare and coming from code
1986 // written in assembly language. Just bail.
1987 return false;
1989 SLM.initialize();
1990 CSA.compute();
1991 classifyCSRUses();
1992 pruneUnwantedCSRs();
1993 computeSaveLocations();
1994 moveSaveRestores();
1995 LLVM_DEBUG({
1996 dbgs() << "Func before shrink-wrapping: \n";
1997 BF.dump();
1999 SLM.performChanges();
2000 // Early exit if processInsertions doesn't detect any todo items
2001 if (!processInsertions())
2002 return false;
2003 processDeletions();
2004 if (foldIdenticalSplitEdges()) {
2005 const std::pair<unsigned, uint64_t> Stats = BF.eraseInvalidBBs();
2006 (void)Stats;
2007 LLVM_DEBUG(dbgs() << "Deleted " << Stats.first
2008 << " redundant split edge BBs (" << Stats.second
2009 << " bytes) for " << BF.getPrintName() << "\n");
2011 rebuildCFI();
2012 // We may have split edges, creating BBs that need correct branching
2013 BF.fixBranches();
2014 LLVM_DEBUG({
2015 dbgs() << "Func after shrink-wrapping: \n";
2016 BF.dump();
2018 return true;
2021 void ShrinkWrapping::printStats() {
2022 outs() << "BOLT-INFO: Shrink wrapping moved " << SpillsMovedRegularMode
2023 << " spills inserting load/stores and " << SpillsMovedPushPopMode
2024 << " spills inserting push/pops\n";
2025 if (!InstrDynamicCount || !StoreDynamicCount)
2026 return;
2027 outs() << "BOLT-INFO: Shrink wrapping reduced " << SpillsMovedDynamicCount
2028 << " store executions ("
2029 << format("%.1lf%%",
2030 (100.0 * SpillsMovedDynamicCount / InstrDynamicCount))
2031 << " total instructions executed, "
2032 << format("%.1lf%%",
2033 (100.0 * SpillsMovedDynamicCount / StoreDynamicCount))
2034 << " store instructions)\n";
2035 outs() << "BOLT-INFO: Shrink wrapping failed at reducing "
2036 << SpillsFailedDynamicCount << " store executions ("
2037 << format("%.1lf%%",
2038 (100.0 * SpillsFailedDynamicCount / InstrDynamicCount))
2039 << " total instructions executed, "
2040 << format("%.1lf%%",
2041 (100.0 * SpillsFailedDynamicCount / StoreDynamicCount))
2042 << " store instructions)\n";
2045 // Operators necessary as a result of using MCAnnotation
2046 raw_ostream &operator<<(raw_ostream &OS,
2047 const std::vector<ShrinkWrapping::WorklistItem> &Vec) {
2048 OS << "SWTodo[";
2049 const char *Sep = "";
2050 for (const ShrinkWrapping::WorklistItem &Item : Vec) {
2051 OS << Sep;
2052 switch (Item.Action) {
2053 case ShrinkWrapping::WorklistItem::Erase:
2054 OS << "Erase";
2055 break;
2056 case ShrinkWrapping::WorklistItem::ChangeToAdjustment:
2057 OS << "ChangeToAdjustment";
2058 break;
2059 case ShrinkWrapping::WorklistItem::InsertLoadOrStore:
2060 OS << "InsertLoadOrStore";
2061 break;
2062 case ShrinkWrapping::WorklistItem::InsertPushOrPop:
2063 OS << "InsertPushOrPop";
2064 break;
2066 Sep = ", ";
2068 OS << "]";
2069 return OS;
2072 raw_ostream &
2073 operator<<(raw_ostream &OS,
2074 const std::vector<StackLayoutModifier::WorklistItem> &Vec) {
2075 OS << "SLMTodo[";
2076 const char *Sep = "";
2077 for (const StackLayoutModifier::WorklistItem &Item : Vec) {
2078 OS << Sep;
2079 switch (Item.Action) {
2080 case StackLayoutModifier::WorklistItem::None:
2081 OS << "None";
2082 break;
2083 case StackLayoutModifier::WorklistItem::AdjustLoadStoreOffset:
2084 OS << "AdjustLoadStoreOffset";
2085 break;
2086 case StackLayoutModifier::WorklistItem::AdjustCFI:
2087 OS << "AdjustCFI";
2088 break;
2090 Sep = ", ";
2092 OS << "]";
2093 return OS;
2096 bool operator==(const ShrinkWrapping::WorklistItem &A,
2097 const ShrinkWrapping::WorklistItem &B) {
2098 return (A.Action == B.Action && A.AffectedReg == B.AffectedReg &&
2099 A.Adjustment == B.Adjustment &&
2100 A.FIEToInsert.IsLoad == B.FIEToInsert.IsLoad &&
2101 A.FIEToInsert.IsStore == B.FIEToInsert.IsStore &&
2102 A.FIEToInsert.RegOrImm == B.FIEToInsert.RegOrImm &&
2103 A.FIEToInsert.Size == B.FIEToInsert.Size &&
2104 A.FIEToInsert.IsSimple == B.FIEToInsert.IsSimple &&
2105 A.FIEToInsert.StackOffset == B.FIEToInsert.StackOffset);
2108 bool operator==(const StackLayoutModifier::WorklistItem &A,
2109 const StackLayoutModifier::WorklistItem &B) {
2110 return (A.Action == B.Action && A.OffsetUpdate == B.OffsetUpdate);
2113 } // end namespace bolt
2114 } // end namespace llvm