1 //===- bolt/Passes/TailDuplication.cpp ------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the TailDuplication class.
11 //===----------------------------------------------------------------------===//
13 #include "bolt/Passes/TailDuplication.h"
14 #include "llvm/ADT/DenseMap.h"
15 #include "llvm/MC/MCRegisterInfo.h"
20 #define DEBUG_TYPE "taildup"
26 extern cl::OptionCategory BoltOptCategory
;
27 extern cl::opt
<bool> NoThreads
;
29 cl::opt
<bolt::TailDuplication::DuplicationMode
> TailDuplicationMode(
31 cl::desc("duplicate unconditional branches that cross a cache line"),
32 cl::init(bolt::TailDuplication::TD_NONE
),
33 cl::values(clEnumValN(bolt::TailDuplication::TD_NONE
, "none",
35 clEnumValN(bolt::TailDuplication::TD_AGGRESSIVE
, "aggressive",
36 "aggressive strategy"),
37 clEnumValN(bolt::TailDuplication::TD_MODERATE
, "moderate",
39 clEnumValN(bolt::TailDuplication::TD_CACHE
, "cache",
40 "cache-aware duplication strategy")),
41 cl::ZeroOrMore
, cl::Hidden
, cl::cat(BoltOptCategory
));
43 static cl::opt
<unsigned>
44 TailDuplicationMinimumOffset("tail-duplication-minimum-offset",
45 cl::desc("minimum offset needed between block "
46 "and successor to allow duplication"),
47 cl::ReallyHidden
, cl::init(64),
48 cl::cat(BoltOptCategory
));
50 static cl::opt
<unsigned> TailDuplicationMaximumDuplication(
51 "tail-duplication-maximum-duplication",
52 cl::desc("tail blocks whose size (in bytes) exceeds the value are never "
54 cl::ZeroOrMore
, cl::ReallyHidden
, cl::init(24), cl::cat(BoltOptCategory
));
56 static cl::opt
<unsigned> TailDuplicationMinimumDuplication(
57 "tail-duplication-minimum-duplication",
58 cl::desc("tail blocks with size (in bytes) not exceeding the value are "
60 cl::ReallyHidden
, cl::init(2), cl::cat(BoltOptCategory
));
62 static cl::opt
<bool> TailDuplicationConstCopyPropagation(
63 "tail-duplication-const-copy-propagation",
64 cl::desc("enable const and copy propagation after tail duplication"),
65 cl::ReallyHidden
, cl::init(false), cl::cat(BoltOptCategory
));
67 static cl::opt
<unsigned> TailDuplicationMaxCacheDistance(
68 "tail-duplication-max-cache-distance",
69 cl::desc("The weight of backward jumps for ExtTSP value"), cl::init(256),
70 cl::ReallyHidden
, cl::cat(BoltOptCategory
));
72 static cl::opt
<double> TailDuplicationCacheBackwardWeight(
73 "tail-duplication-cache-backward-weight",
75 "The maximum distance (in bytes) of backward jumps for ExtTSP value"),
76 cl::init(0.5), cl::ReallyHidden
, cl::cat(BoltOptCategory
));
83 void TailDuplication::getCallerSavedRegs(const MCInst
&Inst
, BitVector
&Regs
,
84 BinaryContext
&BC
) const {
85 if (!BC
.MIB
->isCall(Inst
))
87 BitVector CallRegs
= BitVector(BC
.MRI
->getNumRegs(), false);
88 BC
.MIB
->getCalleeSavedRegs(CallRegs
);
93 bool TailDuplication::regIsPossiblyOverwritten(const MCInst
&Inst
, unsigned Reg
,
94 BinaryContext
&BC
) const {
95 BitVector WrittenRegs
= BitVector(BC
.MRI
->getNumRegs(), false);
96 BC
.MIB
->getWrittenRegs(Inst
, WrittenRegs
);
97 getCallerSavedRegs(Inst
, WrittenRegs
, BC
);
98 if (BC
.MIB
->isRep(Inst
))
99 BC
.MIB
->getRepRegs(WrittenRegs
);
100 WrittenRegs
&= BC
.MIB
->getAliases(Reg
, false);
101 return WrittenRegs
.any();
104 bool TailDuplication::regIsDefinitelyOverwritten(const MCInst
&Inst
,
106 BinaryContext
&BC
) const {
107 BitVector WrittenRegs
= BitVector(BC
.MRI
->getNumRegs(), false);
108 BC
.MIB
->getWrittenRegs(Inst
, WrittenRegs
);
109 getCallerSavedRegs(Inst
, WrittenRegs
, BC
);
110 if (BC
.MIB
->isRep(Inst
))
111 BC
.MIB
->getRepRegs(WrittenRegs
);
112 return (!regIsUsed(Inst
, Reg
, BC
) && WrittenRegs
.test(Reg
) &&
113 !BC
.MIB
->isConditionalMove(Inst
));
116 bool TailDuplication::regIsUsed(const MCInst
&Inst
, unsigned Reg
,
117 BinaryContext
&BC
) const {
118 BitVector SrcRegs
= BitVector(BC
.MRI
->getNumRegs(), false);
119 BC
.MIB
->getSrcRegs(Inst
, SrcRegs
);
120 SrcRegs
&= BC
.MIB
->getAliases(Reg
, true);
121 return SrcRegs
.any();
124 bool TailDuplication::isOverwrittenBeforeUsed(BinaryBasicBlock
&StartBB
,
125 unsigned Reg
) const {
126 BinaryFunction
*BF
= StartBB
.getFunction();
127 BinaryContext
&BC
= BF
->getBinaryContext();
128 std::queue
<BinaryBasicBlock
*> Q
;
129 for (auto Itr
= StartBB
.succ_begin(); Itr
!= StartBB
.succ_end(); ++Itr
) {
130 BinaryBasicBlock
*NextBB
= *Itr
;
133 std::set
<BinaryBasicBlock
*> Visited
;
134 // Breadth first search through successive blocks and see if Reg is ever used
135 // before its overwritten
136 while (Q
.size() > 0) {
137 BinaryBasicBlock
*CurrBB
= Q
.front();
139 if (Visited
.count(CurrBB
))
141 Visited
.insert(CurrBB
);
142 bool Overwritten
= false;
143 for (auto Itr
= CurrBB
->begin(); Itr
!= CurrBB
->end(); ++Itr
) {
145 if (regIsUsed(Inst
, Reg
, BC
))
147 if (regIsDefinitelyOverwritten(Inst
, Reg
, BC
)) {
154 for (auto Itr
= CurrBB
->succ_begin(); Itr
!= CurrBB
->succ_end(); ++Itr
) {
155 BinaryBasicBlock
*NextBB
= *Itr
;
162 void TailDuplication::constantAndCopyPropagate(
163 BinaryBasicBlock
&OriginalBB
,
164 std::vector
<BinaryBasicBlock
*> &BlocksToPropagate
) {
165 BinaryFunction
*BF
= OriginalBB
.getFunction();
166 BinaryContext
&BC
= BF
->getBinaryContext();
168 BlocksToPropagate
.insert(BlocksToPropagate
.begin(), &OriginalBB
);
169 // Iterate through the original instructions to find one to propagate
170 for (auto Itr
= OriginalBB
.begin(); Itr
!= OriginalBB
.end(); ++Itr
) {
171 MCInst
&OriginalInst
= *Itr
;
172 // It must be a non conditional
173 if (BC
.MIB
->isConditionalMove(OriginalInst
))
176 // Move immediate or move register
177 if ((!BC
.MII
->get(OriginalInst
.getOpcode()).isMoveImmediate() ||
178 !OriginalInst
.getOperand(1).isImm()) &&
179 (!BC
.MII
->get(OriginalInst
.getOpcode()).isMoveReg() ||
180 !OriginalInst
.getOperand(1).isReg()))
183 // True if this is constant propagation and not copy propagation
184 bool ConstantProp
= BC
.MII
->get(OriginalInst
.getOpcode()).isMoveImmediate();
185 // The Register to replaced
186 unsigned Reg
= OriginalInst
.getOperand(0).getReg();
187 // True if the register to replace was replaced everywhere it was used
188 bool ReplacedEverywhere
= true;
189 // True if the register was definitely overwritten
190 bool Overwritten
= false;
191 // True if the register to replace and the register to replace with (for
192 // copy propagation) has not been overwritten and is still usable
193 bool RegsActive
= true;
195 // Iterate through successor blocks and through their instructions
196 for (BinaryBasicBlock
*NextBB
: BlocksToPropagate
) {
197 for (auto PropagateItr
=
198 ((NextBB
== &OriginalBB
) ? Itr
+ 1 : NextBB
->begin());
199 PropagateItr
< NextBB
->end(); ++PropagateItr
) {
200 MCInst
&PropagateInst
= *PropagateItr
;
201 if (regIsUsed(PropagateInst
, Reg
, BC
)) {
202 bool Replaced
= false;
203 // If both registers are active for copy propagation or the register
204 // to replace is active for constant propagation
206 // Set Replaced and so ReplacedEverwhere to false if it cannot be
207 // replaced (no replacing that opcode, Register is src and dest)
209 Replaced
= BC
.MIB
->replaceRegWithImm(
210 PropagateInst
, Reg
, OriginalInst
.getOperand(1).getImm());
212 Replaced
= BC
.MIB
->replaceRegWithReg(
213 PropagateInst
, Reg
, OriginalInst
.getOperand(1).getReg());
215 ReplacedEverywhere
= ReplacedEverywhere
&& Replaced
;
217 // For copy propagation, make sure no propagation happens after the
218 // register to replace with is overwritten
220 regIsPossiblyOverwritten(PropagateInst
,
221 OriginalInst
.getOperand(1).getReg(), BC
))
224 // Make sure no propagation happens after the register to replace is
226 if (regIsPossiblyOverwritten(PropagateInst
, Reg
, BC
))
229 // Record if the register to replace is overwritten
230 if (regIsDefinitelyOverwritten(PropagateInst
, Reg
, BC
)) {
239 // If the register was replaced everwhere and it was overwritten in either
240 // one of the iterated through blocks or one of the successor blocks, delete
241 // the original move instruction
242 if (ReplacedEverywhere
&&
244 isOverwrittenBeforeUsed(
245 *BlocksToPropagate
[BlocksToPropagate
.size() - 1], Reg
))) {
246 // If both registers are active for copy propagation or the register
247 // to replace is active for constant propagation
248 StaticInstructionDeletionCount
++;
249 DynamicInstructionDeletionCount
+= OriginalBB
.getExecutionCount();
250 Itr
= std::prev(OriginalBB
.eraseInstruction(Itr
));
255 bool TailDuplication::isInCacheLine(const BinaryBasicBlock
&BB
,
256 const BinaryBasicBlock
&Succ
) const {
260 uint64_t Distance
= 0;
261 int Direction
= (Succ
.getLayoutIndex() > BB
.getLayoutIndex()) ? 1 : -1;
263 for (unsigned I
= BB
.getLayoutIndex() + Direction
; I
!= Succ
.getLayoutIndex();
265 Distance
+= BB
.getFunction()->getLayout().getBlock(I
)->getOriginalSize();
266 if (Distance
> opts::TailDuplicationMinimumOffset
)
272 std::vector
<BinaryBasicBlock
*>
273 TailDuplication::moderateDuplicate(BinaryBasicBlock
&BB
,
274 BinaryBasicBlock
&Tail
) const {
275 std::vector
<BinaryBasicBlock
*> BlocksToDuplicate
;
276 // The block must be hot
277 if (BB
.getKnownExecutionCount() == 0)
278 return BlocksToDuplicate
;
279 // and its sucessor is not already in the same cache line
280 if (isInCacheLine(BB
, Tail
))
281 return BlocksToDuplicate
;
282 // and its size do not exceed the maximum allowed size
283 if (Tail
.getOriginalSize() > opts::TailDuplicationMaximumDuplication
)
284 return BlocksToDuplicate
;
285 // If duplicating would introduce a new branch, don't duplicate
286 for (auto Itr
= Tail
.succ_begin(); Itr
!= Tail
.succ_end(); ++Itr
) {
287 if ((*Itr
)->getLayoutIndex() == Tail
.getLayoutIndex() + 1)
288 return BlocksToDuplicate
;
291 BlocksToDuplicate
.push_back(&Tail
);
292 return BlocksToDuplicate
;
295 std::vector
<BinaryBasicBlock
*>
296 TailDuplication::aggressiveDuplicate(BinaryBasicBlock
&BB
,
297 BinaryBasicBlock
&Tail
) const {
298 std::vector
<BinaryBasicBlock
*> BlocksToDuplicate
;
299 // The block must be hot
300 if (BB
.getKnownExecutionCount() == 0)
301 return BlocksToDuplicate
;
302 // and its sucessor is not already in the same cache line
303 if (isInCacheLine(BB
, Tail
))
304 return BlocksToDuplicate
;
306 BinaryBasicBlock
*CurrBB
= &Tail
;
308 LLVM_DEBUG(dbgs() << "Aggressive tail duplication: adding "
309 << CurrBB
->getName() << " to duplication list\n";);
310 BlocksToDuplicate
.push_back(CurrBB
);
312 if (CurrBB
->hasJumpTable()) {
313 LLVM_DEBUG(dbgs() << "Aggressive tail duplication: clearing duplication "
314 "list due to a JT in "
315 << CurrBB
->getName() << '\n';);
316 BlocksToDuplicate
.clear();
320 // With no successors, we've reached the end and should duplicate all of
322 if (CurrBB
->succ_size() == 0)
325 // With two successors, if they're both a jump, we should duplicate all
326 // blocks in BlocksToDuplicate. Otherwise, we cannot find a simple stream of
328 if (CurrBB
->succ_size() >= 2) {
329 if (CurrBB
->getConditionalSuccessor(false)->getLayoutIndex() ==
330 CurrBB
->getLayoutIndex() + 1 ||
331 CurrBB
->getConditionalSuccessor(true)->getLayoutIndex() ==
332 CurrBB
->getLayoutIndex() + 1) {
333 LLVM_DEBUG(dbgs() << "Aggressive tail duplication: clearing "
334 "duplication list, can't find a simple stream at "
335 << CurrBB
->getName() << '\n';);
336 BlocksToDuplicate
.clear();
341 // With one successor, if its a jump, we should duplicate all blocks in
342 // BlocksToDuplicate. Otherwise, we should keep going
343 BinaryBasicBlock
*SuccBB
= CurrBB
->getSuccessor();
344 if (SuccBB
->getLayoutIndex() != CurrBB
->getLayoutIndex() + 1)
348 // Don't duplicate if its too much code
349 unsigned DuplicationByteCount
= std::accumulate(
350 std::begin(BlocksToDuplicate
), std::end(BlocksToDuplicate
), 0,
351 [](int value
, BinaryBasicBlock
*p
) {
352 return value
+ p
->getOriginalSize();
354 if (DuplicationByteCount
> opts::TailDuplicationMaximumDuplication
) {
355 LLVM_DEBUG(dbgs() << "Aggressive tail duplication: duplication byte count ("
356 << DuplicationByteCount
<< ") exceeds maximum "
357 << opts::TailDuplicationMaximumDuplication
<< '\n';);
358 BlocksToDuplicate
.clear();
360 LLVM_DEBUG(dbgs() << "Aggressive tail duplication: found "
361 << BlocksToDuplicate
.size() << " blocks to duplicate\n";);
362 return BlocksToDuplicate
;
365 bool TailDuplication::shouldDuplicate(BinaryBasicBlock
*Pred
,
366 BinaryBasicBlock
*Tail
) const {
369 // Cannot duplicate non-tail blocks
370 if (Tail
->succ_size() != 0)
372 // The blocks are already in the order
373 if (Pred
->getLayoutIndex() + 1 == Tail
->getLayoutIndex())
375 // No tail duplication for blocks with jump tables
376 if (Pred
->hasJumpTable())
378 if (Tail
->hasJumpTable())
384 double TailDuplication::cacheScore(uint64_t SrcAddr
, uint64_t SrcSize
,
385 uint64_t DstAddr
, uint64_t DstSize
,
386 uint64_t Count
) const {
387 assert(Count
!= BinaryBasicBlock::COUNT_NO_PROFILE
);
389 bool IsForwardJump
= SrcAddr
<= DstAddr
;
390 uint64_t JumpDistance
= 0;
391 // Computing the length of the jump so that it takes the sizes of the two
392 // blocks into consideration
394 JumpDistance
= (DstAddr
+ DstSize
) - (SrcAddr
);
396 JumpDistance
= (SrcAddr
+ SrcSize
) - (DstAddr
);
399 if (JumpDistance
>= opts::TailDuplicationMaxCacheDistance
)
401 double Prob
= 1.0 - static_cast<double>(JumpDistance
) /
402 opts::TailDuplicationMaxCacheDistance
;
403 return (IsForwardJump
? 1.0 : opts::TailDuplicationCacheBackwardWeight
) *
407 bool TailDuplication::cacheScoreImproved(const MCCodeEmitter
*Emitter
,
409 BinaryBasicBlock
*Pred
,
410 BinaryBasicBlock
*Tail
) const {
411 // Collect (estimated) basic block sizes
412 DenseMap
<const BinaryBasicBlock
*, uint64_t> BBSize
;
413 for (const BinaryBasicBlock
&BB
: BF
) {
414 BBSize
[&BB
] = std::max
<uint64_t>(BB
.estimateSize(Emitter
), 1);
417 // Build current addresses of basic blocks starting at the entry block
418 DenseMap
<BinaryBasicBlock
*, uint64_t> CurAddr
;
420 for (BinaryBasicBlock
*SrcBB
: BF
.getLayout().blocks()) {
421 CurAddr
[SrcBB
] = Addr
;
422 Addr
+= BBSize
[SrcBB
];
425 // Build new addresses (after duplication) starting at the entry block
426 DenseMap
<BinaryBasicBlock
*, uint64_t> NewAddr
;
428 for (BinaryBasicBlock
*SrcBB
: BF
.getLayout().blocks()) {
429 NewAddr
[SrcBB
] = Addr
;
430 Addr
+= BBSize
[SrcBB
];
432 Addr
+= BBSize
[Tail
];
435 // Compute the cache score for the existing layout of basic blocks
437 for (BinaryBasicBlock
*SrcBB
: BF
.getLayout().blocks()) {
438 auto BI
= SrcBB
->branch_info_begin();
439 for (BinaryBasicBlock
*DstBB
: SrcBB
->successors()) {
440 if (SrcBB
!= DstBB
) {
441 CurScore
+= cacheScore(CurAddr
[SrcBB
], BBSize
[SrcBB
], CurAddr
[DstBB
],
442 BBSize
[DstBB
], BI
->Count
);
448 // Compute the cache score for the layout of blocks after tail duplication
450 for (BinaryBasicBlock
*SrcBB
: BF
.getLayout().blocks()) {
451 auto BI
= SrcBB
->branch_info_begin();
452 for (BinaryBasicBlock
*DstBB
: SrcBB
->successors()) {
453 if (SrcBB
!= DstBB
) {
454 if (SrcBB
== Pred
&& DstBB
== Tail
) {
455 NewScore
+= cacheScore(NewAddr
[SrcBB
], BBSize
[SrcBB
],
456 NewAddr
[SrcBB
] + BBSize
[SrcBB
], BBSize
[DstBB
],
459 NewScore
+= cacheScore(NewAddr
[SrcBB
], BBSize
[SrcBB
], NewAddr
[DstBB
],
460 BBSize
[DstBB
], BI
->Count
);
467 return NewScore
> CurScore
;
470 std::vector
<BinaryBasicBlock
*>
471 TailDuplication::cacheDuplicate(const MCCodeEmitter
*Emitter
,
472 BinaryFunction
&BF
, BinaryBasicBlock
*Pred
,
473 BinaryBasicBlock
*Tail
) const {
474 std::vector
<BinaryBasicBlock
*> BlocksToDuplicate
;
476 // No need to duplicate cold basic blocks
477 if (Pred
->isCold() || Tail
->isCold()) {
478 return BlocksToDuplicate
;
480 // Always duplicate "small" tail basic blocks, which might be beneficial for
481 // code size, since a jump instruction is eliminated
482 if (Tail
->estimateSize(Emitter
) <= opts::TailDuplicationMinimumDuplication
) {
483 BlocksToDuplicate
.push_back(Tail
);
484 return BlocksToDuplicate
;
486 // Never duplicate "large" tail basic blocks
487 if (Tail
->estimateSize(Emitter
) > opts::TailDuplicationMaximumDuplication
) {
488 return BlocksToDuplicate
;
490 // Do not append basic blocks after the last hot block in the current layout
491 auto NextBlock
= BF
.getLayout().getBasicBlockAfter(Pred
);
492 if (NextBlock
== nullptr || (!Pred
->isCold() && NextBlock
->isCold())) {
493 return BlocksToDuplicate
;
496 // Duplicate the tail only if it improves the cache score
497 if (cacheScoreImproved(Emitter
, BF
, Pred
, Tail
)) {
498 BlocksToDuplicate
.push_back(Tail
);
501 return BlocksToDuplicate
;
504 std::vector
<BinaryBasicBlock
*> TailDuplication::duplicateBlocks(
505 BinaryBasicBlock
&BB
,
506 const std::vector
<BinaryBasicBlock
*> &BlocksToDuplicate
) const {
507 BinaryFunction
*BF
= BB
.getFunction();
508 BinaryContext
&BC
= BF
->getBinaryContext();
510 // Ratio of this new branches execution count to the total size of the
511 // successor's execution count. Used to set this new branches execution count
512 // and lower the old successor's execution count
513 double ExecutionCountRatio
=
514 BB
.getExecutionCount() >= BB
.getSuccessor()->getExecutionCount()
516 : (double)BB
.getExecutionCount() /
517 BB
.getSuccessor()->getExecutionCount();
519 // Use the last branch info when adding a successor to LastBB
520 BinaryBasicBlock::BinaryBranchInfo
&LastBI
=
521 BB
.getBranchInfo(*(BB
.getSuccessor()));
523 BinaryBasicBlock
*LastOriginalBB
= &BB
;
524 BinaryBasicBlock
*LastDuplicatedBB
= &BB
;
525 assert(LastDuplicatedBB
->succ_size() == 1 &&
526 "tail duplication cannot act on a block with more than 1 successor");
527 LastDuplicatedBB
->removeSuccessor(LastDuplicatedBB
->getSuccessor());
529 std::vector
<std::unique_ptr
<BinaryBasicBlock
>> DuplicatedBlocks
;
530 std::vector
<BinaryBasicBlock
*> DuplicatedBlocksToReturn
;
532 for (BinaryBasicBlock
*CurBB
: BlocksToDuplicate
) {
533 DuplicatedBlocks
.emplace_back(
534 BF
->createBasicBlock((BC
.Ctx
)->createNamedTempSymbol("tail-dup")));
535 BinaryBasicBlock
*NewBB
= DuplicatedBlocks
.back().get();
537 NewBB
->addInstructions(CurBB
->begin(), CurBB
->end());
538 // Set execution count as if it was just a copy of the original
539 NewBB
->setExecutionCount(CurBB
->getExecutionCount());
540 NewBB
->setIsCold(CurBB
->isCold());
541 LastDuplicatedBB
->addSuccessor(NewBB
, LastBI
);
543 DuplicatedBlocksToReturn
.push_back(NewBB
);
545 // As long as its not the first block, adjust both original and duplicated
546 // to what they should be
547 if (LastDuplicatedBB
!= &BB
) {
548 LastOriginalBB
->adjustExecutionCount(1.0 - ExecutionCountRatio
);
549 LastDuplicatedBB
->adjustExecutionCount(ExecutionCountRatio
);
552 if (CurBB
->succ_size() == 1)
553 LastBI
= CurBB
->getBranchInfo(*(CurBB
->getSuccessor()));
555 LastOriginalBB
= CurBB
;
556 LastDuplicatedBB
= NewBB
;
559 LastDuplicatedBB
->addSuccessors(
560 LastOriginalBB
->succ_begin(), LastOriginalBB
->succ_end(),
561 LastOriginalBB
->branch_info_begin(), LastOriginalBB
->branch_info_end());
563 LastOriginalBB
->adjustExecutionCount(1.0 - ExecutionCountRatio
);
564 LastDuplicatedBB
->adjustExecutionCount(ExecutionCountRatio
);
566 BF
->insertBasicBlocks(&BB
, std::move(DuplicatedBlocks
));
568 return DuplicatedBlocksToReturn
;
571 void TailDuplication::runOnFunction(BinaryFunction
&Function
) {
572 // Create a separate MCCodeEmitter to allow lock-free execution
573 BinaryContext::IndependentCodeEmitter Emitter
;
574 if (!opts::NoThreads
) {
575 Emitter
= Function
.getBinaryContext().createIndependentMCCodeEmitter();
578 Function
.getLayout().updateLayoutIndices();
580 // New blocks will be added and layout will change,
581 // so make a copy here to iterate over the original layout
582 BinaryFunction::BasicBlockOrderType
BlockLayout(
583 Function
.getLayout().block_begin(), Function
.getLayout().block_end());
584 bool ModifiedFunction
= false;
585 for (BinaryBasicBlock
*BB
: BlockLayout
) {
586 AllDynamicCount
+= BB
->getKnownExecutionCount();
588 // The block must be with one successor
589 if (BB
->succ_size() != 1)
591 BinaryBasicBlock
*Tail
= BB
->getSuccessor();
592 // Verify that the tail should be duplicated
593 if (!shouldDuplicate(BB
, Tail
))
596 std::vector
<BinaryBasicBlock
*> BlocksToDuplicate
;
597 if (opts::TailDuplicationMode
== TailDuplication::TD_AGGRESSIVE
) {
598 BlocksToDuplicate
= aggressiveDuplicate(*BB
, *Tail
);
599 } else if (opts::TailDuplicationMode
== TailDuplication::TD_MODERATE
) {
600 BlocksToDuplicate
= moderateDuplicate(*BB
, *Tail
);
601 } else if (opts::TailDuplicationMode
== TailDuplication::TD_CACHE
) {
602 BlocksToDuplicate
= cacheDuplicate(Emitter
.MCE
.get(), Function
, BB
, Tail
);
604 llvm_unreachable("unknown tail duplication mode");
607 if (BlocksToDuplicate
.empty())
610 // Apply the duplication
611 ModifiedFunction
= true;
612 DuplicationsDynamicCount
+= BB
->getExecutionCount();
613 auto DuplicatedBlocks
= duplicateBlocks(*BB
, BlocksToDuplicate
);
614 for (BinaryBasicBlock
*BB
: DuplicatedBlocks
) {
615 DuplicatedBlockCount
++;
616 DuplicatedByteCount
+= BB
->estimateSize(Emitter
.MCE
.get());
619 if (opts::TailDuplicationConstCopyPropagation
) {
620 constantAndCopyPropagate(*BB
, DuplicatedBlocks
);
621 BinaryBasicBlock
*FirstBB
= BlocksToDuplicate
[0];
622 if (FirstBB
->pred_size() == 1) {
623 BinaryBasicBlock
*PredBB
= *FirstBB
->pred_begin();
624 if (PredBB
->succ_size() == 1)
625 constantAndCopyPropagate(*PredBB
, BlocksToDuplicate
);
629 // Layout indices might be stale after duplication
630 Function
.getLayout().updateLayoutIndices();
632 if (ModifiedFunction
)
636 void TailDuplication::runOnFunctions(BinaryContext
&BC
) {
637 if (opts::TailDuplicationMode
== TailDuplication::TD_NONE
)
640 for (auto &It
: BC
.getBinaryFunctions()) {
641 BinaryFunction
&Function
= It
.second
;
642 if (!shouldOptimize(Function
))
644 runOnFunction(Function
);
647 outs() << "BOLT-INFO: tail duplication"
648 << format(" modified %zu (%.2f%%) functions;", ModifiedFunctions
,
649 100.0 * ModifiedFunctions
/ BC
.getBinaryFunctions().size())
650 << format(" duplicated %zu blocks (%zu bytes) responsible for",
651 DuplicatedBlockCount
, DuplicatedByteCount
)
652 << format(" %zu dynamic executions (%.2f%% of all block executions)",
653 DuplicationsDynamicCount
,
654 100.0 * DuplicationsDynamicCount
/ AllDynamicCount
)
657 if (opts::TailDuplicationConstCopyPropagation
) {
658 outs() << "BOLT-INFO: tail duplication "
659 << format("applied %zu static and %zu dynamic propagation deletions",
660 StaticInstructionDeletionCount
,
661 DynamicInstructionDeletionCount
)
666 } // end namespace bolt
667 } // end namespace llvm