Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / bolt / lib / Passes / TailDuplication.cpp
blob12d33713c0ff22c52bb39d042cba1436c19eb82d
1 //===- bolt/Passes/TailDuplication.cpp ------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the TailDuplication class.
11 //===----------------------------------------------------------------------===//
13 #include "bolt/Passes/TailDuplication.h"
14 #include "llvm/ADT/DenseMap.h"
15 #include "llvm/MC/MCRegisterInfo.h"
16 #include <queue>
18 #include <numeric>
20 #define DEBUG_TYPE "taildup"
22 using namespace llvm;
24 namespace opts {
26 extern cl::OptionCategory BoltOptCategory;
27 extern cl::opt<bool> NoThreads;
29 cl::opt<bolt::TailDuplication::DuplicationMode> TailDuplicationMode(
30 "tail-duplication",
31 cl::desc("duplicate unconditional branches that cross a cache line"),
32 cl::init(bolt::TailDuplication::TD_NONE),
33 cl::values(clEnumValN(bolt::TailDuplication::TD_NONE, "none",
34 "do not apply"),
35 clEnumValN(bolt::TailDuplication::TD_AGGRESSIVE, "aggressive",
36 "aggressive strategy"),
37 clEnumValN(bolt::TailDuplication::TD_MODERATE, "moderate",
38 "moderate strategy"),
39 clEnumValN(bolt::TailDuplication::TD_CACHE, "cache",
40 "cache-aware duplication strategy")),
41 cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory));
43 static cl::opt<unsigned>
44 TailDuplicationMinimumOffset("tail-duplication-minimum-offset",
45 cl::desc("minimum offset needed between block "
46 "and successor to allow duplication"),
47 cl::ReallyHidden, cl::init(64),
48 cl::cat(BoltOptCategory));
50 static cl::opt<unsigned> TailDuplicationMaximumDuplication(
51 "tail-duplication-maximum-duplication",
52 cl::desc("tail blocks whose size (in bytes) exceeds the value are never "
53 "duplicated"),
54 cl::ZeroOrMore, cl::ReallyHidden, cl::init(24), cl::cat(BoltOptCategory));
56 static cl::opt<unsigned> TailDuplicationMinimumDuplication(
57 "tail-duplication-minimum-duplication",
58 cl::desc("tail blocks with size (in bytes) not exceeding the value are "
59 "always duplicated"),
60 cl::ReallyHidden, cl::init(2), cl::cat(BoltOptCategory));
62 static cl::opt<bool> TailDuplicationConstCopyPropagation(
63 "tail-duplication-const-copy-propagation",
64 cl::desc("enable const and copy propagation after tail duplication"),
65 cl::ReallyHidden, cl::init(false), cl::cat(BoltOptCategory));
67 static cl::opt<unsigned> TailDuplicationMaxCacheDistance(
68 "tail-duplication-max-cache-distance",
69 cl::desc("The weight of backward jumps for ExtTSP value"), cl::init(256),
70 cl::ReallyHidden, cl::cat(BoltOptCategory));
72 static cl::opt<double> TailDuplicationCacheBackwardWeight(
73 "tail-duplication-cache-backward-weight",
74 cl::desc(
75 "The maximum distance (in bytes) of backward jumps for ExtTSP value"),
76 cl::init(0.5), cl::ReallyHidden, cl::cat(BoltOptCategory));
78 } // namespace opts
80 namespace llvm {
81 namespace bolt {
83 void TailDuplication::getCallerSavedRegs(const MCInst &Inst, BitVector &Regs,
84 BinaryContext &BC) const {
85 if (!BC.MIB->isCall(Inst))
86 return;
87 BitVector CallRegs = BitVector(BC.MRI->getNumRegs(), false);
88 BC.MIB->getCalleeSavedRegs(CallRegs);
89 CallRegs.flip();
90 Regs |= CallRegs;
93 bool TailDuplication::regIsPossiblyOverwritten(const MCInst &Inst, unsigned Reg,
94 BinaryContext &BC) const {
95 BitVector WrittenRegs = BitVector(BC.MRI->getNumRegs(), false);
96 BC.MIB->getWrittenRegs(Inst, WrittenRegs);
97 getCallerSavedRegs(Inst, WrittenRegs, BC);
98 if (BC.MIB->isRep(Inst))
99 BC.MIB->getRepRegs(WrittenRegs);
100 WrittenRegs &= BC.MIB->getAliases(Reg, false);
101 return WrittenRegs.any();
104 bool TailDuplication::regIsDefinitelyOverwritten(const MCInst &Inst,
105 unsigned Reg,
106 BinaryContext &BC) const {
107 BitVector WrittenRegs = BitVector(BC.MRI->getNumRegs(), false);
108 BC.MIB->getWrittenRegs(Inst, WrittenRegs);
109 getCallerSavedRegs(Inst, WrittenRegs, BC);
110 if (BC.MIB->isRep(Inst))
111 BC.MIB->getRepRegs(WrittenRegs);
112 return (!regIsUsed(Inst, Reg, BC) && WrittenRegs.test(Reg) &&
113 !BC.MIB->isConditionalMove(Inst));
116 bool TailDuplication::regIsUsed(const MCInst &Inst, unsigned Reg,
117 BinaryContext &BC) const {
118 BitVector SrcRegs = BitVector(BC.MRI->getNumRegs(), false);
119 BC.MIB->getSrcRegs(Inst, SrcRegs);
120 SrcRegs &= BC.MIB->getAliases(Reg, true);
121 return SrcRegs.any();
124 bool TailDuplication::isOverwrittenBeforeUsed(BinaryBasicBlock &StartBB,
125 unsigned Reg) const {
126 BinaryFunction *BF = StartBB.getFunction();
127 BinaryContext &BC = BF->getBinaryContext();
128 std::queue<BinaryBasicBlock *> Q;
129 for (auto Itr = StartBB.succ_begin(); Itr != StartBB.succ_end(); ++Itr) {
130 BinaryBasicBlock *NextBB = *Itr;
131 Q.push(NextBB);
133 std::set<BinaryBasicBlock *> Visited;
134 // Breadth first search through successive blocks and see if Reg is ever used
135 // before its overwritten
136 while (Q.size() > 0) {
137 BinaryBasicBlock *CurrBB = Q.front();
138 Q.pop();
139 if (Visited.count(CurrBB))
140 continue;
141 Visited.insert(CurrBB);
142 bool Overwritten = false;
143 for (auto Itr = CurrBB->begin(); Itr != CurrBB->end(); ++Itr) {
144 MCInst &Inst = *Itr;
145 if (regIsUsed(Inst, Reg, BC))
146 return false;
147 if (regIsDefinitelyOverwritten(Inst, Reg, BC)) {
148 Overwritten = true;
149 break;
152 if (Overwritten)
153 continue;
154 for (auto Itr = CurrBB->succ_begin(); Itr != CurrBB->succ_end(); ++Itr) {
155 BinaryBasicBlock *NextBB = *Itr;
156 Q.push(NextBB);
159 return true;
162 void TailDuplication::constantAndCopyPropagate(
163 BinaryBasicBlock &OriginalBB,
164 std::vector<BinaryBasicBlock *> &BlocksToPropagate) {
165 BinaryFunction *BF = OriginalBB.getFunction();
166 BinaryContext &BC = BF->getBinaryContext();
168 BlocksToPropagate.insert(BlocksToPropagate.begin(), &OriginalBB);
169 // Iterate through the original instructions to find one to propagate
170 for (auto Itr = OriginalBB.begin(); Itr != OriginalBB.end(); ++Itr) {
171 MCInst &OriginalInst = *Itr;
172 // It must be a non conditional
173 if (BC.MIB->isConditionalMove(OriginalInst))
174 continue;
176 // Move immediate or move register
177 if ((!BC.MII->get(OriginalInst.getOpcode()).isMoveImmediate() ||
178 !OriginalInst.getOperand(1).isImm()) &&
179 (!BC.MII->get(OriginalInst.getOpcode()).isMoveReg() ||
180 !OriginalInst.getOperand(1).isReg()))
181 continue;
183 // True if this is constant propagation and not copy propagation
184 bool ConstantProp = BC.MII->get(OriginalInst.getOpcode()).isMoveImmediate();
185 // The Register to replaced
186 unsigned Reg = OriginalInst.getOperand(0).getReg();
187 // True if the register to replace was replaced everywhere it was used
188 bool ReplacedEverywhere = true;
189 // True if the register was definitely overwritten
190 bool Overwritten = false;
191 // True if the register to replace and the register to replace with (for
192 // copy propagation) has not been overwritten and is still usable
193 bool RegsActive = true;
195 // Iterate through successor blocks and through their instructions
196 for (BinaryBasicBlock *NextBB : BlocksToPropagate) {
197 for (auto PropagateItr =
198 ((NextBB == &OriginalBB) ? Itr + 1 : NextBB->begin());
199 PropagateItr < NextBB->end(); ++PropagateItr) {
200 MCInst &PropagateInst = *PropagateItr;
201 if (regIsUsed(PropagateInst, Reg, BC)) {
202 bool Replaced = false;
203 // If both registers are active for copy propagation or the register
204 // to replace is active for constant propagation
205 if (RegsActive) {
206 // Set Replaced and so ReplacedEverwhere to false if it cannot be
207 // replaced (no replacing that opcode, Register is src and dest)
208 if (ConstantProp)
209 Replaced = BC.MIB->replaceRegWithImm(
210 PropagateInst, Reg, OriginalInst.getOperand(1).getImm());
211 else
212 Replaced = BC.MIB->replaceRegWithReg(
213 PropagateInst, Reg, OriginalInst.getOperand(1).getReg());
215 ReplacedEverywhere = ReplacedEverywhere && Replaced;
217 // For copy propagation, make sure no propagation happens after the
218 // register to replace with is overwritten
219 if (!ConstantProp &&
220 regIsPossiblyOverwritten(PropagateInst,
221 OriginalInst.getOperand(1).getReg(), BC))
222 RegsActive = false;
224 // Make sure no propagation happens after the register to replace is
225 // overwritten
226 if (regIsPossiblyOverwritten(PropagateInst, Reg, BC))
227 RegsActive = false;
229 // Record if the register to replace is overwritten
230 if (regIsDefinitelyOverwritten(PropagateInst, Reg, BC)) {
231 Overwritten = true;
232 break;
235 if (Overwritten)
236 break;
239 // If the register was replaced everwhere and it was overwritten in either
240 // one of the iterated through blocks or one of the successor blocks, delete
241 // the original move instruction
242 if (ReplacedEverywhere &&
243 (Overwritten ||
244 isOverwrittenBeforeUsed(
245 *BlocksToPropagate[BlocksToPropagate.size() - 1], Reg))) {
246 // If both registers are active for copy propagation or the register
247 // to replace is active for constant propagation
248 StaticInstructionDeletionCount++;
249 DynamicInstructionDeletionCount += OriginalBB.getExecutionCount();
250 Itr = std::prev(OriginalBB.eraseInstruction(Itr));
255 bool TailDuplication::isInCacheLine(const BinaryBasicBlock &BB,
256 const BinaryBasicBlock &Succ) const {
257 if (&BB == &Succ)
258 return true;
260 uint64_t Distance = 0;
261 int Direction = (Succ.getLayoutIndex() > BB.getLayoutIndex()) ? 1 : -1;
263 for (unsigned I = BB.getLayoutIndex() + Direction; I != Succ.getLayoutIndex();
264 I += Direction) {
265 Distance += BB.getFunction()->getLayout().getBlock(I)->getOriginalSize();
266 if (Distance > opts::TailDuplicationMinimumOffset)
267 return false;
269 return true;
272 std::vector<BinaryBasicBlock *>
273 TailDuplication::moderateDuplicate(BinaryBasicBlock &BB,
274 BinaryBasicBlock &Tail) const {
275 std::vector<BinaryBasicBlock *> BlocksToDuplicate;
276 // The block must be hot
277 if (BB.getKnownExecutionCount() == 0)
278 return BlocksToDuplicate;
279 // and its sucessor is not already in the same cache line
280 if (isInCacheLine(BB, Tail))
281 return BlocksToDuplicate;
282 // and its size do not exceed the maximum allowed size
283 if (Tail.getOriginalSize() > opts::TailDuplicationMaximumDuplication)
284 return BlocksToDuplicate;
285 // If duplicating would introduce a new branch, don't duplicate
286 for (auto Itr = Tail.succ_begin(); Itr != Tail.succ_end(); ++Itr) {
287 if ((*Itr)->getLayoutIndex() == Tail.getLayoutIndex() + 1)
288 return BlocksToDuplicate;
291 BlocksToDuplicate.push_back(&Tail);
292 return BlocksToDuplicate;
295 std::vector<BinaryBasicBlock *>
296 TailDuplication::aggressiveDuplicate(BinaryBasicBlock &BB,
297 BinaryBasicBlock &Tail) const {
298 std::vector<BinaryBasicBlock *> BlocksToDuplicate;
299 // The block must be hot
300 if (BB.getKnownExecutionCount() == 0)
301 return BlocksToDuplicate;
302 // and its sucessor is not already in the same cache line
303 if (isInCacheLine(BB, Tail))
304 return BlocksToDuplicate;
306 BinaryBasicBlock *CurrBB = &Tail;
307 while (CurrBB) {
308 LLVM_DEBUG(dbgs() << "Aggressive tail duplication: adding "
309 << CurrBB->getName() << " to duplication list\n";);
310 BlocksToDuplicate.push_back(CurrBB);
312 if (CurrBB->hasJumpTable()) {
313 LLVM_DEBUG(dbgs() << "Aggressive tail duplication: clearing duplication "
314 "list due to a JT in "
315 << CurrBB->getName() << '\n';);
316 BlocksToDuplicate.clear();
317 break;
320 // With no successors, we've reached the end and should duplicate all of
321 // BlocksToDuplicate
322 if (CurrBB->succ_size() == 0)
323 break;
325 // With two successors, if they're both a jump, we should duplicate all
326 // blocks in BlocksToDuplicate. Otherwise, we cannot find a simple stream of
327 // blocks to copy
328 if (CurrBB->succ_size() >= 2) {
329 if (CurrBB->getConditionalSuccessor(false)->getLayoutIndex() ==
330 CurrBB->getLayoutIndex() + 1 ||
331 CurrBB->getConditionalSuccessor(true)->getLayoutIndex() ==
332 CurrBB->getLayoutIndex() + 1) {
333 LLVM_DEBUG(dbgs() << "Aggressive tail duplication: clearing "
334 "duplication list, can't find a simple stream at "
335 << CurrBB->getName() << '\n';);
336 BlocksToDuplicate.clear();
338 break;
341 // With one successor, if its a jump, we should duplicate all blocks in
342 // BlocksToDuplicate. Otherwise, we should keep going
343 BinaryBasicBlock *SuccBB = CurrBB->getSuccessor();
344 if (SuccBB->getLayoutIndex() != CurrBB->getLayoutIndex() + 1)
345 break;
346 CurrBB = SuccBB;
348 // Don't duplicate if its too much code
349 unsigned DuplicationByteCount = std::accumulate(
350 std::begin(BlocksToDuplicate), std::end(BlocksToDuplicate), 0,
351 [](int value, BinaryBasicBlock *p) {
352 return value + p->getOriginalSize();
354 if (DuplicationByteCount > opts::TailDuplicationMaximumDuplication) {
355 LLVM_DEBUG(dbgs() << "Aggressive tail duplication: duplication byte count ("
356 << DuplicationByteCount << ") exceeds maximum "
357 << opts::TailDuplicationMaximumDuplication << '\n';);
358 BlocksToDuplicate.clear();
360 LLVM_DEBUG(dbgs() << "Aggressive tail duplication: found "
361 << BlocksToDuplicate.size() << " blocks to duplicate\n";);
362 return BlocksToDuplicate;
365 bool TailDuplication::shouldDuplicate(BinaryBasicBlock *Pred,
366 BinaryBasicBlock *Tail) const {
367 if (Pred == Tail)
368 return false;
369 // Cannot duplicate non-tail blocks
370 if (Tail->succ_size() != 0)
371 return false;
372 // The blocks are already in the order
373 if (Pred->getLayoutIndex() + 1 == Tail->getLayoutIndex())
374 return false;
375 // No tail duplication for blocks with jump tables
376 if (Pred->hasJumpTable())
377 return false;
378 if (Tail->hasJumpTable())
379 return false;
381 return true;
384 double TailDuplication::cacheScore(uint64_t SrcAddr, uint64_t SrcSize,
385 uint64_t DstAddr, uint64_t DstSize,
386 uint64_t Count) const {
387 assert(Count != BinaryBasicBlock::COUNT_NO_PROFILE);
389 bool IsForwardJump = SrcAddr <= DstAddr;
390 uint64_t JumpDistance = 0;
391 // Computing the length of the jump so that it takes the sizes of the two
392 // blocks into consideration
393 if (IsForwardJump) {
394 JumpDistance = (DstAddr + DstSize) - (SrcAddr);
395 } else {
396 JumpDistance = (SrcAddr + SrcSize) - (DstAddr);
399 if (JumpDistance >= opts::TailDuplicationMaxCacheDistance)
400 return 0;
401 double Prob = 1.0 - static_cast<double>(JumpDistance) /
402 opts::TailDuplicationMaxCacheDistance;
403 return (IsForwardJump ? 1.0 : opts::TailDuplicationCacheBackwardWeight) *
404 Prob * Count;
407 bool TailDuplication::cacheScoreImproved(const MCCodeEmitter *Emitter,
408 BinaryFunction &BF,
409 BinaryBasicBlock *Pred,
410 BinaryBasicBlock *Tail) const {
411 // Collect (estimated) basic block sizes
412 DenseMap<const BinaryBasicBlock *, uint64_t> BBSize;
413 for (const BinaryBasicBlock &BB : BF) {
414 BBSize[&BB] = std::max<uint64_t>(BB.estimateSize(Emitter), 1);
417 // Build current addresses of basic blocks starting at the entry block
418 DenseMap<BinaryBasicBlock *, uint64_t> CurAddr;
419 uint64_t Addr = 0;
420 for (BinaryBasicBlock *SrcBB : BF.getLayout().blocks()) {
421 CurAddr[SrcBB] = Addr;
422 Addr += BBSize[SrcBB];
425 // Build new addresses (after duplication) starting at the entry block
426 DenseMap<BinaryBasicBlock *, uint64_t> NewAddr;
427 Addr = 0;
428 for (BinaryBasicBlock *SrcBB : BF.getLayout().blocks()) {
429 NewAddr[SrcBB] = Addr;
430 Addr += BBSize[SrcBB];
431 if (SrcBB == Pred)
432 Addr += BBSize[Tail];
435 // Compute the cache score for the existing layout of basic blocks
436 double CurScore = 0;
437 for (BinaryBasicBlock *SrcBB : BF.getLayout().blocks()) {
438 auto BI = SrcBB->branch_info_begin();
439 for (BinaryBasicBlock *DstBB : SrcBB->successors()) {
440 if (SrcBB != DstBB) {
441 CurScore += cacheScore(CurAddr[SrcBB], BBSize[SrcBB], CurAddr[DstBB],
442 BBSize[DstBB], BI->Count);
444 ++BI;
448 // Compute the cache score for the layout of blocks after tail duplication
449 double NewScore = 0;
450 for (BinaryBasicBlock *SrcBB : BF.getLayout().blocks()) {
451 auto BI = SrcBB->branch_info_begin();
452 for (BinaryBasicBlock *DstBB : SrcBB->successors()) {
453 if (SrcBB != DstBB) {
454 if (SrcBB == Pred && DstBB == Tail) {
455 NewScore += cacheScore(NewAddr[SrcBB], BBSize[SrcBB],
456 NewAddr[SrcBB] + BBSize[SrcBB], BBSize[DstBB],
457 BI->Count);
458 } else {
459 NewScore += cacheScore(NewAddr[SrcBB], BBSize[SrcBB], NewAddr[DstBB],
460 BBSize[DstBB], BI->Count);
463 ++BI;
467 return NewScore > CurScore;
470 std::vector<BinaryBasicBlock *>
471 TailDuplication::cacheDuplicate(const MCCodeEmitter *Emitter,
472 BinaryFunction &BF, BinaryBasicBlock *Pred,
473 BinaryBasicBlock *Tail) const {
474 std::vector<BinaryBasicBlock *> BlocksToDuplicate;
476 // No need to duplicate cold basic blocks
477 if (Pred->isCold() || Tail->isCold()) {
478 return BlocksToDuplicate;
480 // Always duplicate "small" tail basic blocks, which might be beneficial for
481 // code size, since a jump instruction is eliminated
482 if (Tail->estimateSize(Emitter) <= opts::TailDuplicationMinimumDuplication) {
483 BlocksToDuplicate.push_back(Tail);
484 return BlocksToDuplicate;
486 // Never duplicate "large" tail basic blocks
487 if (Tail->estimateSize(Emitter) > opts::TailDuplicationMaximumDuplication) {
488 return BlocksToDuplicate;
490 // Do not append basic blocks after the last hot block in the current layout
491 auto NextBlock = BF.getLayout().getBasicBlockAfter(Pred);
492 if (NextBlock == nullptr || (!Pred->isCold() && NextBlock->isCold())) {
493 return BlocksToDuplicate;
496 // Duplicate the tail only if it improves the cache score
497 if (cacheScoreImproved(Emitter, BF, Pred, Tail)) {
498 BlocksToDuplicate.push_back(Tail);
501 return BlocksToDuplicate;
504 std::vector<BinaryBasicBlock *> TailDuplication::duplicateBlocks(
505 BinaryBasicBlock &BB,
506 const std::vector<BinaryBasicBlock *> &BlocksToDuplicate) const {
507 BinaryFunction *BF = BB.getFunction();
508 BinaryContext &BC = BF->getBinaryContext();
510 // Ratio of this new branches execution count to the total size of the
511 // successor's execution count. Used to set this new branches execution count
512 // and lower the old successor's execution count
513 double ExecutionCountRatio =
514 BB.getExecutionCount() >= BB.getSuccessor()->getExecutionCount()
515 ? 1.0
516 : (double)BB.getExecutionCount() /
517 BB.getSuccessor()->getExecutionCount();
519 // Use the last branch info when adding a successor to LastBB
520 BinaryBasicBlock::BinaryBranchInfo &LastBI =
521 BB.getBranchInfo(*(BB.getSuccessor()));
523 BinaryBasicBlock *LastOriginalBB = &BB;
524 BinaryBasicBlock *LastDuplicatedBB = &BB;
525 assert(LastDuplicatedBB->succ_size() == 1 &&
526 "tail duplication cannot act on a block with more than 1 successor");
527 LastDuplicatedBB->removeSuccessor(LastDuplicatedBB->getSuccessor());
529 std::vector<std::unique_ptr<BinaryBasicBlock>> DuplicatedBlocks;
530 std::vector<BinaryBasicBlock *> DuplicatedBlocksToReturn;
532 for (BinaryBasicBlock *CurBB : BlocksToDuplicate) {
533 DuplicatedBlocks.emplace_back(
534 BF->createBasicBlock((BC.Ctx)->createNamedTempSymbol("tail-dup")));
535 BinaryBasicBlock *NewBB = DuplicatedBlocks.back().get();
537 NewBB->addInstructions(CurBB->begin(), CurBB->end());
538 // Set execution count as if it was just a copy of the original
539 NewBB->setExecutionCount(CurBB->getExecutionCount());
540 NewBB->setIsCold(CurBB->isCold());
541 LastDuplicatedBB->addSuccessor(NewBB, LastBI);
543 DuplicatedBlocksToReturn.push_back(NewBB);
545 // As long as its not the first block, adjust both original and duplicated
546 // to what they should be
547 if (LastDuplicatedBB != &BB) {
548 LastOriginalBB->adjustExecutionCount(1.0 - ExecutionCountRatio);
549 LastDuplicatedBB->adjustExecutionCount(ExecutionCountRatio);
552 if (CurBB->succ_size() == 1)
553 LastBI = CurBB->getBranchInfo(*(CurBB->getSuccessor()));
555 LastOriginalBB = CurBB;
556 LastDuplicatedBB = NewBB;
559 LastDuplicatedBB->addSuccessors(
560 LastOriginalBB->succ_begin(), LastOriginalBB->succ_end(),
561 LastOriginalBB->branch_info_begin(), LastOriginalBB->branch_info_end());
563 LastOriginalBB->adjustExecutionCount(1.0 - ExecutionCountRatio);
564 LastDuplicatedBB->adjustExecutionCount(ExecutionCountRatio);
566 BF->insertBasicBlocks(&BB, std::move(DuplicatedBlocks));
568 return DuplicatedBlocksToReturn;
571 void TailDuplication::runOnFunction(BinaryFunction &Function) {
572 // Create a separate MCCodeEmitter to allow lock-free execution
573 BinaryContext::IndependentCodeEmitter Emitter;
574 if (!opts::NoThreads) {
575 Emitter = Function.getBinaryContext().createIndependentMCCodeEmitter();
578 Function.getLayout().updateLayoutIndices();
580 // New blocks will be added and layout will change,
581 // so make a copy here to iterate over the original layout
582 BinaryFunction::BasicBlockOrderType BlockLayout(
583 Function.getLayout().block_begin(), Function.getLayout().block_end());
584 bool ModifiedFunction = false;
585 for (BinaryBasicBlock *BB : BlockLayout) {
586 AllDynamicCount += BB->getKnownExecutionCount();
588 // The block must be with one successor
589 if (BB->succ_size() != 1)
590 continue;
591 BinaryBasicBlock *Tail = BB->getSuccessor();
592 // Verify that the tail should be duplicated
593 if (!shouldDuplicate(BB, Tail))
594 continue;
596 std::vector<BinaryBasicBlock *> BlocksToDuplicate;
597 if (opts::TailDuplicationMode == TailDuplication::TD_AGGRESSIVE) {
598 BlocksToDuplicate = aggressiveDuplicate(*BB, *Tail);
599 } else if (opts::TailDuplicationMode == TailDuplication::TD_MODERATE) {
600 BlocksToDuplicate = moderateDuplicate(*BB, *Tail);
601 } else if (opts::TailDuplicationMode == TailDuplication::TD_CACHE) {
602 BlocksToDuplicate = cacheDuplicate(Emitter.MCE.get(), Function, BB, Tail);
603 } else {
604 llvm_unreachable("unknown tail duplication mode");
607 if (BlocksToDuplicate.empty())
608 continue;
610 // Apply the duplication
611 ModifiedFunction = true;
612 DuplicationsDynamicCount += BB->getExecutionCount();
613 auto DuplicatedBlocks = duplicateBlocks(*BB, BlocksToDuplicate);
614 for (BinaryBasicBlock *BB : DuplicatedBlocks) {
615 DuplicatedBlockCount++;
616 DuplicatedByteCount += BB->estimateSize(Emitter.MCE.get());
619 if (opts::TailDuplicationConstCopyPropagation) {
620 constantAndCopyPropagate(*BB, DuplicatedBlocks);
621 BinaryBasicBlock *FirstBB = BlocksToDuplicate[0];
622 if (FirstBB->pred_size() == 1) {
623 BinaryBasicBlock *PredBB = *FirstBB->pred_begin();
624 if (PredBB->succ_size() == 1)
625 constantAndCopyPropagate(*PredBB, BlocksToDuplicate);
629 // Layout indices might be stale after duplication
630 Function.getLayout().updateLayoutIndices();
632 if (ModifiedFunction)
633 ModifiedFunctions++;
636 void TailDuplication::runOnFunctions(BinaryContext &BC) {
637 if (opts::TailDuplicationMode == TailDuplication::TD_NONE)
638 return;
640 for (auto &It : BC.getBinaryFunctions()) {
641 BinaryFunction &Function = It.second;
642 if (!shouldOptimize(Function))
643 continue;
644 runOnFunction(Function);
647 outs() << "BOLT-INFO: tail duplication"
648 << format(" modified %zu (%.2f%%) functions;", ModifiedFunctions,
649 100.0 * ModifiedFunctions / BC.getBinaryFunctions().size())
650 << format(" duplicated %zu blocks (%zu bytes) responsible for",
651 DuplicatedBlockCount, DuplicatedByteCount)
652 << format(" %zu dynamic executions (%.2f%% of all block executions)",
653 DuplicationsDynamicCount,
654 100.0 * DuplicationsDynamicCount / AllDynamicCount)
655 << "\n";
657 if (opts::TailDuplicationConstCopyPropagation) {
658 outs() << "BOLT-INFO: tail duplication "
659 << format("applied %zu static and %zu dynamic propagation deletions",
660 StaticInstructionDeletionCount,
661 DynamicInstructionDeletionCount)
662 << "\n";
666 } // end namespace bolt
667 } // end namespace llvm