1 //===- TailDuplicator.cpp - Duplicate blocks into predecessors' tails -----===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This utility class duplicates basic blocks ending in unconditional branches
10 // into the tails of their predecessors.
12 //===----------------------------------------------------------------------===//
14 #include "llvm/CodeGen/TailDuplicator.h"
15 #include "llvm/ADT/DenseMap.h"
16 #include "llvm/ADT/DenseSet.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/ADT/SetVector.h"
19 #include "llvm/ADT/SmallPtrSet.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/ADT/Statistic.h"
22 #include "llvm/CodeGen/MachineBasicBlock.h"
23 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
24 #include "llvm/CodeGen/MachineFunction.h"
25 #include "llvm/CodeGen/MachineInstr.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineOperand.h"
28 #include "llvm/CodeGen/MachineRegisterInfo.h"
29 #include "llvm/CodeGen/MachineSSAUpdater.h"
30 #include "llvm/CodeGen/MachineSizeOpts.h"
31 #include "llvm/CodeGen/TargetInstrInfo.h"
32 #include "llvm/CodeGen/TargetRegisterInfo.h"
33 #include "llvm/CodeGen/TargetSubtargetInfo.h"
34 #include "llvm/IR/DebugLoc.h"
35 #include "llvm/IR/Function.h"
36 #include "llvm/Support/CommandLine.h"
37 #include "llvm/Support/Debug.h"
38 #include "llvm/Support/ErrorHandling.h"
39 #include "llvm/Support/raw_ostream.h"
40 #include "llvm/Target/TargetMachine.h"
48 #define DEBUG_TYPE "tailduplication"
50 STATISTIC(NumTails
, "Number of tails duplicated");
51 STATISTIC(NumTailDups
, "Number of tail duplicated blocks");
52 STATISTIC(NumTailDupAdded
,
53 "Number of instructions added due to tail duplication");
54 STATISTIC(NumTailDupRemoved
,
55 "Number of instructions removed due to tail duplication");
56 STATISTIC(NumDeadBlocks
, "Number of dead blocks removed");
57 STATISTIC(NumAddedPHIs
, "Number of phis added");
59 // Heuristic for tail duplication.
60 static cl::opt
<unsigned> TailDuplicateSize(
62 cl::desc("Maximum instructions to consider tail duplicating"), cl::init(2),
65 static cl::opt
<unsigned> TailDupIndirectBranchSize(
66 "tail-dup-indirect-size",
67 cl::desc("Maximum instructions to consider tail duplicating blocks that "
68 "end with indirect branches."), cl::init(20),
72 TailDupVerify("tail-dup-verify",
73 cl::desc("Verify sanity of PHI instructions during taildup"),
74 cl::init(false), cl::Hidden
);
76 static cl::opt
<unsigned> TailDupLimit("tail-dup-limit", cl::init(~0U),
79 void TailDuplicator::initMF(MachineFunction
&MFin
, bool PreRegAlloc
,
80 const MachineBranchProbabilityInfo
*MBPIin
,
82 ProfileSummaryInfo
*PSIin
,
83 bool LayoutModeIn
, unsigned TailDupSizeIn
) {
85 TII
= MF
->getSubtarget().getInstrInfo();
86 TRI
= MF
->getSubtarget().getRegisterInfo();
87 MRI
= &MF
->getRegInfo();
92 TailDupSize
= TailDupSizeIn
;
94 assert(MBPI
!= nullptr && "Machine Branch Probability Info required");
96 LayoutMode
= LayoutModeIn
;
97 this->PreRegAlloc
= PreRegAlloc
;
100 static void VerifyPHIs(MachineFunction
&MF
, bool CheckExtra
) {
101 for (MachineBasicBlock
&MBB
: llvm::drop_begin(MF
)) {
102 SmallSetVector
<MachineBasicBlock
*, 8> Preds(MBB
.pred_begin(),
104 MachineBasicBlock::iterator MI
= MBB
.begin();
105 while (MI
!= MBB
.end()) {
108 for (MachineBasicBlock
*PredBB
: Preds
) {
110 for (unsigned i
= 1, e
= MI
->getNumOperands(); i
!= e
; i
+= 2) {
111 MachineBasicBlock
*PHIBB
= MI
->getOperand(i
+ 1).getMBB();
112 if (PHIBB
== PredBB
) {
118 dbgs() << "Malformed PHI in " << printMBBReference(MBB
) << ": "
120 dbgs() << " missing input from predecessor "
121 << printMBBReference(*PredBB
) << '\n';
122 llvm_unreachable(nullptr);
126 for (unsigned i
= 1, e
= MI
->getNumOperands(); i
!= e
; i
+= 2) {
127 MachineBasicBlock
*PHIBB
= MI
->getOperand(i
+ 1).getMBB();
128 if (CheckExtra
&& !Preds
.count(PHIBB
)) {
129 dbgs() << "Warning: malformed PHI in " << printMBBReference(MBB
)
131 dbgs() << " extra input from predecessor "
132 << printMBBReference(*PHIBB
) << '\n';
133 llvm_unreachable(nullptr);
135 if (PHIBB
->getNumber() < 0) {
136 dbgs() << "Malformed PHI in " << printMBBReference(MBB
) << ": "
138 dbgs() << " non-existing " << printMBBReference(*PHIBB
) << '\n';
139 llvm_unreachable(nullptr);
147 /// Tail duplicate the block and cleanup.
148 /// \p IsSimple - return value of isSimpleBB
149 /// \p MBB - block to be duplicated
150 /// \p ForcedLayoutPred - If non-null, treat this block as the layout
151 /// predecessor, instead of using the ordering in MF
152 /// \p DuplicatedPreds - if non-null, \p DuplicatedPreds will contain a list of
153 /// all Preds that received a copy of \p MBB.
154 /// \p RemovalCallback - if non-null, called just before MBB is deleted.
155 bool TailDuplicator::tailDuplicateAndUpdate(
156 bool IsSimple
, MachineBasicBlock
*MBB
,
157 MachineBasicBlock
*ForcedLayoutPred
,
158 SmallVectorImpl
<MachineBasicBlock
*> *DuplicatedPreds
,
159 function_ref
<void(MachineBasicBlock
*)> *RemovalCallback
,
160 SmallVectorImpl
<MachineBasicBlock
*> *CandidatePtr
) {
161 // Save the successors list.
162 SmallSetVector
<MachineBasicBlock
*, 8> Succs(MBB
->succ_begin(),
165 SmallVector
<MachineBasicBlock
*, 8> TDBBs
;
166 SmallVector
<MachineInstr
*, 16> Copies
;
167 if (!tailDuplicate(IsSimple
, MBB
, ForcedLayoutPred
,
168 TDBBs
, Copies
, CandidatePtr
))
173 SmallVector
<MachineInstr
*, 8> NewPHIs
;
174 MachineSSAUpdater
SSAUpdate(*MF
, &NewPHIs
);
176 // TailBB's immediate successors are now successors of those predecessors
177 // which duplicated TailBB. Add the predecessors as sources to the PHI
179 bool isDead
= MBB
->pred_empty() && !MBB
->hasAddressTaken();
181 updateSuccessorsPHIs(MBB
, isDead
, TDBBs
, Succs
);
183 // If it is dead, remove it.
185 NumTailDupRemoved
+= MBB
->size();
186 removeDeadBlock(MBB
, RemovalCallback
);
191 if (!SSAUpdateVRs
.empty()) {
192 for (unsigned i
= 0, e
= SSAUpdateVRs
.size(); i
!= e
; ++i
) {
193 unsigned VReg
= SSAUpdateVRs
[i
];
194 SSAUpdate
.Initialize(VReg
);
196 // If the original definition is still around, add it as an available
198 MachineInstr
*DefMI
= MRI
->getVRegDef(VReg
);
199 MachineBasicBlock
*DefBB
= nullptr;
201 DefBB
= DefMI
->getParent();
202 SSAUpdate
.AddAvailableValue(DefBB
, VReg
);
205 // Add the new vregs as available values.
206 DenseMap
<Register
, AvailableValsTy
>::iterator LI
=
207 SSAUpdateVals
.find(VReg
);
208 for (std::pair
<MachineBasicBlock
*, Register
> &J
: LI
->second
) {
209 MachineBasicBlock
*SrcBB
= J
.first
;
210 Register SrcReg
= J
.second
;
211 SSAUpdate
.AddAvailableValue(SrcBB
, SrcReg
);
214 SmallVector
<MachineOperand
*> DebugUses
;
215 // Rewrite uses that are outside of the original def's block.
216 for (MachineOperand
&UseMO
:
217 llvm::make_early_inc_range(MRI
->use_operands(VReg
))) {
218 MachineInstr
*UseMI
= UseMO
.getParent();
219 // Rewrite debug uses last so that they can take advantage of any
220 // register mappings introduced by other users in its BB, since we
221 // cannot create new register definitions specifically for the debug
222 // instruction (as debug instructions should not affect CodeGen).
223 if (UseMI
->isDebugValue()) {
224 DebugUses
.push_back(&UseMO
);
227 if (UseMI
->getParent() == DefBB
&& !UseMI
->isPHI())
229 SSAUpdate
.RewriteUse(UseMO
);
231 for (auto *UseMO
: DebugUses
) {
232 MachineInstr
*UseMI
= UseMO
->getParent();
234 SSAUpdate
.GetValueInMiddleOfBlock(UseMI
->getParent(), true));
238 SSAUpdateVRs
.clear();
239 SSAUpdateVals
.clear();
242 // Eliminate some of the copies inserted by tail duplication to maintain
244 for (unsigned i
= 0, e
= Copies
.size(); i
!= e
; ++i
) {
245 MachineInstr
*Copy
= Copies
[i
];
248 Register Dst
= Copy
->getOperand(0).getReg();
249 Register Src
= Copy
->getOperand(1).getReg();
250 if (MRI
->hasOneNonDBGUse(Src
) &&
251 MRI
->constrainRegClass(Src
, MRI
->getRegClass(Dst
))) {
252 // Copy is the only use. Do trivial copy propagation here.
253 MRI
->replaceRegWith(Dst
, Src
);
254 Copy
->eraseFromParent();
259 NumAddedPHIs
+= NewPHIs
.size();
262 *DuplicatedPreds
= std::move(TDBBs
);
267 /// Look for small blocks that are unconditionally branched to and do not fall
268 /// through. Tail-duplicate their instructions into their predecessors to
269 /// eliminate (dynamic) branches.
270 bool TailDuplicator::tailDuplicateBlocks() {
271 bool MadeChange
= false;
273 if (PreRegAlloc
&& TailDupVerify
) {
274 LLVM_DEBUG(dbgs() << "\n*** Before tail-duplicating\n");
275 VerifyPHIs(*MF
, true);
278 for (MachineBasicBlock
&MBB
:
279 llvm::make_early_inc_range(llvm::drop_begin(*MF
))) {
280 if (NumTails
== TailDupLimit
)
283 bool IsSimple
= isSimpleBB(&MBB
);
285 if (!shouldTailDuplicate(IsSimple
, MBB
))
288 MadeChange
|= tailDuplicateAndUpdate(IsSimple
, &MBB
, nullptr);
291 if (PreRegAlloc
&& TailDupVerify
)
292 VerifyPHIs(*MF
, false);
297 static bool isDefLiveOut(Register Reg
, MachineBasicBlock
*BB
,
298 const MachineRegisterInfo
*MRI
) {
299 for (MachineInstr
&UseMI
: MRI
->use_instructions(Reg
)) {
300 if (UseMI
.isDebugValue())
302 if (UseMI
.getParent() != BB
)
308 static unsigned getPHISrcRegOpIdx(MachineInstr
*MI
, MachineBasicBlock
*SrcBB
) {
309 for (unsigned i
= 1, e
= MI
->getNumOperands(); i
!= e
; i
+= 2)
310 if (MI
->getOperand(i
+ 1).getMBB() == SrcBB
)
315 // Remember which registers are used by phis in this block. This is
316 // used to determine which registers are liveout while modifying the
317 // block (which is why we need to copy the information).
318 static void getRegsUsedByPHIs(const MachineBasicBlock
&BB
,
319 DenseSet
<Register
> *UsedByPhi
) {
320 for (const auto &MI
: BB
) {
323 for (unsigned i
= 1, e
= MI
.getNumOperands(); i
!= e
; i
+= 2) {
324 Register SrcReg
= MI
.getOperand(i
).getReg();
325 UsedByPhi
->insert(SrcReg
);
330 /// Add a definition and source virtual registers pair for SSA update.
331 void TailDuplicator::addSSAUpdateEntry(Register OrigReg
, Register NewReg
,
332 MachineBasicBlock
*BB
) {
333 DenseMap
<Register
, AvailableValsTy
>::iterator LI
=
334 SSAUpdateVals
.find(OrigReg
);
335 if (LI
!= SSAUpdateVals
.end())
336 LI
->second
.push_back(std::make_pair(BB
, NewReg
));
338 AvailableValsTy Vals
;
339 Vals
.push_back(std::make_pair(BB
, NewReg
));
340 SSAUpdateVals
.insert(std::make_pair(OrigReg
, Vals
));
341 SSAUpdateVRs
.push_back(OrigReg
);
345 /// Process PHI node in TailBB by turning it into a copy in PredBB. Remember the
346 /// source register that's contributed by PredBB and update SSA update map.
347 void TailDuplicator::processPHI(
348 MachineInstr
*MI
, MachineBasicBlock
*TailBB
, MachineBasicBlock
*PredBB
,
349 DenseMap
<Register
, RegSubRegPair
> &LocalVRMap
,
350 SmallVectorImpl
<std::pair
<Register
, RegSubRegPair
>> &Copies
,
351 const DenseSet
<Register
> &RegsUsedByPhi
, bool Remove
) {
352 Register DefReg
= MI
->getOperand(0).getReg();
353 unsigned SrcOpIdx
= getPHISrcRegOpIdx(MI
, PredBB
);
354 assert(SrcOpIdx
&& "Unable to find matching PHI source?");
355 Register SrcReg
= MI
->getOperand(SrcOpIdx
).getReg();
356 unsigned SrcSubReg
= MI
->getOperand(SrcOpIdx
).getSubReg();
357 const TargetRegisterClass
*RC
= MRI
->getRegClass(DefReg
);
358 LocalVRMap
.insert(std::make_pair(DefReg
, RegSubRegPair(SrcReg
, SrcSubReg
)));
360 // Insert a copy from source to the end of the block. The def register is the
361 // available value liveout of the block.
362 Register NewDef
= MRI
->createVirtualRegister(RC
);
363 Copies
.push_back(std::make_pair(NewDef
, RegSubRegPair(SrcReg
, SrcSubReg
)));
364 if (isDefLiveOut(DefReg
, TailBB
, MRI
) || RegsUsedByPhi
.count(DefReg
))
365 addSSAUpdateEntry(DefReg
, NewDef
, PredBB
);
370 // Remove PredBB from the PHI node.
371 MI
->removeOperand(SrcOpIdx
+ 1);
372 MI
->removeOperand(SrcOpIdx
);
373 if (MI
->getNumOperands() == 1)
374 MI
->eraseFromParent();
377 /// Duplicate a TailBB instruction to PredBB and update
378 /// the source operands due to earlier PHI translation.
379 void TailDuplicator::duplicateInstruction(
380 MachineInstr
*MI
, MachineBasicBlock
*TailBB
, MachineBasicBlock
*PredBB
,
381 DenseMap
<Register
, RegSubRegPair
> &LocalVRMap
,
382 const DenseSet
<Register
> &UsedByPhi
) {
383 // Allow duplication of CFI instructions.
384 if (MI
->isCFIInstruction()) {
385 BuildMI(*PredBB
, PredBB
->end(), PredBB
->findDebugLoc(PredBB
->begin()),
386 TII
->get(TargetOpcode::CFI_INSTRUCTION
))
387 .addCFIIndex(MI
->getOperand(0).getCFIIndex())
388 .setMIFlags(MI
->getFlags());
391 MachineInstr
&NewMI
= TII
->duplicate(*PredBB
, PredBB
->end(), *MI
);
393 for (unsigned i
= 0, e
= NewMI
.getNumOperands(); i
!= e
; ++i
) {
394 MachineOperand
&MO
= NewMI
.getOperand(i
);
397 Register Reg
= MO
.getReg();
398 if (!Register::isVirtualRegister(Reg
))
401 const TargetRegisterClass
*RC
= MRI
->getRegClass(Reg
);
402 Register NewReg
= MRI
->createVirtualRegister(RC
);
404 LocalVRMap
.insert(std::make_pair(Reg
, RegSubRegPair(NewReg
, 0)));
405 if (isDefLiveOut(Reg
, TailBB
, MRI
) || UsedByPhi
.count(Reg
))
406 addSSAUpdateEntry(Reg
, NewReg
, PredBB
);
408 auto VI
= LocalVRMap
.find(Reg
);
409 if (VI
!= LocalVRMap
.end()) {
410 // Need to make sure that the register class of the mapped register
411 // will satisfy the constraints of the class of the register being
413 auto *OrigRC
= MRI
->getRegClass(Reg
);
414 auto *MappedRC
= MRI
->getRegClass(VI
->second
.Reg
);
415 const TargetRegisterClass
*ConstrRC
;
416 if (VI
->second
.SubReg
!= 0) {
417 ConstrRC
= TRI
->getMatchingSuperRegClass(MappedRC
, OrigRC
,
420 // The actual constraining (as in "find appropriate new class")
421 // is done by getMatchingSuperRegClass, so now we only need to
422 // change the class of the mapped register.
423 MRI
->setRegClass(VI
->second
.Reg
, ConstrRC
);
426 // For mapped registers that do not have sub-registers, simply
427 // restrict their class to match the original one.
428 ConstrRC
= MRI
->constrainRegClass(VI
->second
.Reg
, OrigRC
);
432 // If the class constraining succeeded, we can simply replace
433 // the old register with the mapped one.
434 MO
.setReg(VI
->second
.Reg
);
435 // We have Reg -> VI.Reg:VI.SubReg, so if Reg is used with a
436 // sub-register, we need to compose the sub-register indices.
437 MO
.setSubReg(TRI
->composeSubRegIndices(MO
.getSubReg(),
440 // The direct replacement is not possible, due to failing register
441 // class constraints. An explicit COPY is necessary. Create one
442 // that can be reused
443 auto *NewRC
= MI
->getRegClassConstraint(i
, TII
, TRI
);
444 if (NewRC
== nullptr)
446 Register NewReg
= MRI
->createVirtualRegister(NewRC
);
447 BuildMI(*PredBB
, NewMI
, NewMI
.getDebugLoc(),
448 TII
->get(TargetOpcode::COPY
), NewReg
)
449 .addReg(VI
->second
.Reg
, 0, VI
->second
.SubReg
);
450 LocalVRMap
.erase(VI
);
451 LocalVRMap
.insert(std::make_pair(Reg
, RegSubRegPair(NewReg
, 0)));
453 // The composed VI.Reg:VI.SubReg is replaced with NewReg, which
454 // is equivalent to the whole register Reg. Hence, Reg:subreg
455 // is same as NewReg:subreg, so keep the sub-register index
458 // Clear any kill flags from this operand. The new register could
459 // have uses after this one, so kills are not valid here.
467 /// After FromBB is tail duplicated into its predecessor blocks, the successors
468 /// have gained new predecessors. Update the PHI instructions in them
470 void TailDuplicator::updateSuccessorsPHIs(
471 MachineBasicBlock
*FromBB
, bool isDead
,
472 SmallVectorImpl
<MachineBasicBlock
*> &TDBBs
,
473 SmallSetVector
<MachineBasicBlock
*, 8> &Succs
) {
474 for (MachineBasicBlock
*SuccBB
: Succs
) {
475 for (MachineInstr
&MI
: *SuccBB
) {
478 MachineInstrBuilder
MIB(*FromBB
->getParent(), MI
);
480 for (unsigned i
= 1, e
= MI
.getNumOperands(); i
!= e
; i
+= 2) {
481 MachineOperand
&MO
= MI
.getOperand(i
+ 1);
482 if (MO
.getMBB() == FromBB
) {
489 MachineOperand
&MO0
= MI
.getOperand(Idx
);
490 Register Reg
= MO0
.getReg();
492 // Folded into the previous BB.
493 // There could be duplicate phi source entries. FIXME: Should sdisel
494 // or earlier pass fixed this?
495 for (unsigned i
= MI
.getNumOperands() - 2; i
!= Idx
; i
-= 2) {
496 MachineOperand
&MO
= MI
.getOperand(i
+ 1);
497 if (MO
.getMBB() == FromBB
) {
498 MI
.removeOperand(i
+ 1);
505 // If Idx is set, the operands at Idx and Idx+1 must be removed.
506 // We reuse the location to avoid expensive removeOperand calls.
508 DenseMap
<Register
, AvailableValsTy
>::iterator LI
=
509 SSAUpdateVals
.find(Reg
);
510 if (LI
!= SSAUpdateVals
.end()) {
511 // This register is defined in the tail block.
512 for (const std::pair
<MachineBasicBlock
*, Register
> &J
: LI
->second
) {
513 MachineBasicBlock
*SrcBB
= J
.first
;
514 // If we didn't duplicate a bb into a particular predecessor, we
515 // might still have added an entry to SSAUpdateVals to correcly
516 // recompute SSA. If that case, avoid adding a dummy extra argument
518 if (!SrcBB
->isSuccessor(SuccBB
))
521 Register SrcReg
= J
.second
;
523 MI
.getOperand(Idx
).setReg(SrcReg
);
524 MI
.getOperand(Idx
+ 1).setMBB(SrcBB
);
527 MIB
.addReg(SrcReg
).addMBB(SrcBB
);
531 // Live in tail block, must also be live in predecessors.
532 for (MachineBasicBlock
*SrcBB
: TDBBs
) {
534 MI
.getOperand(Idx
).setReg(Reg
);
535 MI
.getOperand(Idx
+ 1).setMBB(SrcBB
);
538 MIB
.addReg(Reg
).addMBB(SrcBB
);
543 MI
.removeOperand(Idx
+ 1);
544 MI
.removeOperand(Idx
);
550 /// Determine if it is profitable to duplicate this block.
551 bool TailDuplicator::shouldTailDuplicate(bool IsSimple
,
552 MachineBasicBlock
&TailBB
) {
553 // When doing tail-duplication during layout, the block ordering is in flux,
554 // so canFallThrough returns a result based on incorrect information and
555 // should just be ignored.
556 if (!LayoutMode
&& TailBB
.canFallThrough())
559 // Don't try to tail-duplicate single-block loops.
560 if (TailBB
.isSuccessor(&TailBB
))
563 // Set the limit on the cost to duplicate. When optimizing for size,
564 // duplicate only one, because one branch instruction can be eliminated to
565 // compensate for the duplication.
566 unsigned MaxDuplicateCount
;
567 bool OptForSize
= MF
->getFunction().hasOptSize() ||
568 llvm::shouldOptimizeForSize(&TailBB
, PSI
, MBFI
);
569 if (TailDupSize
== 0)
570 MaxDuplicateCount
= TailDuplicateSize
;
572 MaxDuplicateCount
= TailDupSize
;
574 MaxDuplicateCount
= 1;
576 // If the block to be duplicated ends in an unanalyzable fallthrough, don't
578 // A similar check is necessary in MachineBlockPlacement to make sure pairs of
579 // blocks with unanalyzable fallthrough get layed out contiguously.
580 MachineBasicBlock
*PredTBB
= nullptr, *PredFBB
= nullptr;
581 SmallVector
<MachineOperand
, 4> PredCond
;
582 if (TII
->analyzeBranch(TailBB
, PredTBB
, PredFBB
, PredCond
) &&
583 TailBB
.canFallThrough())
586 // If the target has hardware branch prediction that can handle indirect
587 // branches, duplicating them can often make them predictable when there
588 // are common paths through the code. The limit needs to be high enough
589 // to allow undoing the effects of tail merging and other optimizations
590 // that rearrange the predecessors of the indirect branch.
592 bool HasIndirectbr
= false;
594 HasIndirectbr
= TailBB
.back().isIndirectBranch();
596 if (HasIndirectbr
&& PreRegAlloc
)
597 MaxDuplicateCount
= TailDupIndirectBranchSize
;
599 // Check the instructions in the block to determine whether tail-duplication
600 // is invalid or unlikely to be profitable.
601 unsigned InstrCount
= 0;
602 for (MachineInstr
&MI
: TailBB
) {
603 // Non-duplicable things shouldn't be tail-duplicated.
604 // CFI instructions are marked as non-duplicable, because Darwin compact
605 // unwind info emission can't handle multiple prologue setups. In case of
606 // DWARF, allow them be duplicated, so that their existence doesn't prevent
607 // tail duplication of some basic blocks, that would be duplicated otherwise.
608 if (MI
.isNotDuplicable() &&
609 (TailBB
.getParent()->getTarget().getTargetTriple().isOSDarwin() ||
610 !MI
.isCFIInstruction()))
613 // Convergent instructions can be duplicated only if doing so doesn't add
614 // new control dependencies, which is what we're going to do here.
615 if (MI
.isConvergent())
618 // Do not duplicate 'return' instructions if this is a pre-regalloc run.
619 // A return may expand into a lot more instructions (e.g. reload of callee
620 // saved registers) after PEI.
621 if (PreRegAlloc
&& MI
.isReturn())
624 // Avoid duplicating calls before register allocation. Calls presents a
625 // barrier to register allocation so duplicating them may end up increasing
627 if (PreRegAlloc
&& MI
.isCall())
630 // TailDuplicator::appendCopies will erroneously place COPYs after
631 // INLINEASM_BR instructions after 4b0aa5724fea, which demonstrates the same
632 // bug that was fixed in f7a53d82c090.
633 // FIXME: Use findPHICopyInsertPoint() to find the correct insertion point
634 // for the COPY when replacing PHIs.
635 if (MI
.getOpcode() == TargetOpcode::INLINEASM_BR
)
639 InstrCount
+= MI
.getBundleSize();
640 else if (!MI
.isPHI() && !MI
.isMetaInstruction())
643 if (InstrCount
> MaxDuplicateCount
)
647 // Check if any of the successors of TailBB has a PHI node in which the
648 // value corresponding to TailBB uses a subregister.
649 // If a phi node uses a register paired with a subregister, the actual
650 // "value type" of the phi may differ from the type of the register without
651 // any subregisters. Due to a bug, tail duplication may add a new operand
652 // without a necessary subregister, producing an invalid code. This is
653 // demonstrated by test/CodeGen/Hexagon/tail-dup-subreg-abort.ll.
654 // Disable tail duplication for this case for now, until the problem is
656 for (auto *SB
: TailBB
.successors()) {
657 for (auto &I
: *SB
) {
660 unsigned Idx
= getPHISrcRegOpIdx(&I
, &TailBB
);
662 MachineOperand
&PU
= I
.getOperand(Idx
);
663 if (PU
.getSubReg() != 0)
668 if (HasIndirectbr
&& PreRegAlloc
)
677 return canCompletelyDuplicateBB(TailBB
);
680 /// True if this BB has only one unconditional jump.
681 bool TailDuplicator::isSimpleBB(MachineBasicBlock
*TailBB
) {
682 if (TailBB
->succ_size() != 1)
684 if (TailBB
->pred_empty())
686 MachineBasicBlock::iterator I
= TailBB
->getFirstNonDebugInstr(true);
687 if (I
== TailBB
->end())
689 return I
->isUnconditionalBranch();
692 static bool bothUsedInPHI(const MachineBasicBlock
&A
,
693 const SmallPtrSet
<MachineBasicBlock
*, 8> &SuccsB
) {
694 for (MachineBasicBlock
*BB
: A
.successors())
695 if (SuccsB
.count(BB
) && !BB
->empty() && BB
->begin()->isPHI())
701 bool TailDuplicator::canCompletelyDuplicateBB(MachineBasicBlock
&BB
) {
702 for (MachineBasicBlock
*PredBB
: BB
.predecessors()) {
703 if (PredBB
->succ_size() > 1)
706 MachineBasicBlock
*PredTBB
= nullptr, *PredFBB
= nullptr;
707 SmallVector
<MachineOperand
, 4> PredCond
;
708 if (TII
->analyzeBranch(*PredBB
, PredTBB
, PredFBB
, PredCond
))
711 if (!PredCond
.empty())
717 bool TailDuplicator::duplicateSimpleBB(
718 MachineBasicBlock
*TailBB
, SmallVectorImpl
<MachineBasicBlock
*> &TDBBs
,
719 const DenseSet
<Register
> &UsedByPhi
) {
720 SmallPtrSet
<MachineBasicBlock
*, 8> Succs(TailBB
->succ_begin(),
722 SmallVector
<MachineBasicBlock
*, 8> Preds(TailBB
->predecessors());
723 bool Changed
= false;
724 for (MachineBasicBlock
*PredBB
: Preds
) {
725 if (PredBB
->hasEHPadSuccessor() || PredBB
->mayHaveInlineAsmBr())
728 if (bothUsedInPHI(*PredBB
, Succs
))
731 MachineBasicBlock
*PredTBB
= nullptr, *PredFBB
= nullptr;
732 SmallVector
<MachineOperand
, 4> PredCond
;
733 if (TII
->analyzeBranch(*PredBB
, PredTBB
, PredFBB
, PredCond
))
737 LLVM_DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB
738 << "From simple Succ: " << *TailBB
);
740 MachineBasicBlock
*NewTarget
= *TailBB
->succ_begin();
741 MachineBasicBlock
*NextBB
= PredBB
->getNextNode();
743 // Make PredFBB explicit.
744 if (PredCond
.empty())
747 // Make fall through explicit.
754 if (PredFBB
== TailBB
)
756 if (PredTBB
== TailBB
)
759 // Make the branch unconditional if possible
760 if (PredTBB
== PredFBB
) {
765 // Avoid adding fall through branches.
766 if (PredFBB
== NextBB
)
768 if (PredTBB
== NextBB
&& PredFBB
== nullptr)
771 auto DL
= PredBB
->findBranchDebugLoc();
772 TII
->removeBranch(*PredBB
);
774 if (!PredBB
->isSuccessor(NewTarget
))
775 PredBB
->replaceSuccessor(TailBB
, NewTarget
);
777 PredBB
->removeSuccessor(TailBB
, true);
778 assert(PredBB
->succ_size() <= 1);
782 TII
->insertBranch(*PredBB
, PredTBB
, PredFBB
, PredCond
, DL
);
784 TDBBs
.push_back(PredBB
);
789 bool TailDuplicator::canTailDuplicate(MachineBasicBlock
*TailBB
,
790 MachineBasicBlock
*PredBB
) {
791 // EH edges are ignored by analyzeBranch.
792 if (PredBB
->succ_size() > 1)
795 MachineBasicBlock
*PredTBB
= nullptr, *PredFBB
= nullptr;
796 SmallVector
<MachineOperand
, 4> PredCond
;
797 if (TII
->analyzeBranch(*PredBB
, PredTBB
, PredFBB
, PredCond
))
799 if (!PredCond
.empty())
804 /// If it is profitable, duplicate TailBB's contents in each
805 /// of its predecessors.
806 /// \p IsSimple result of isSimpleBB
807 /// \p TailBB Block to be duplicated.
808 /// \p ForcedLayoutPred When non-null, use this block as the layout predecessor
809 /// instead of the previous block in MF's order.
810 /// \p TDBBs A vector to keep track of all blocks tail-duplicated
812 /// \p Copies A vector of copy instructions inserted. Used later to
813 /// walk all the inserted copies and remove redundant ones.
814 bool TailDuplicator::tailDuplicate(bool IsSimple
, MachineBasicBlock
*TailBB
,
815 MachineBasicBlock
*ForcedLayoutPred
,
816 SmallVectorImpl
<MachineBasicBlock
*> &TDBBs
,
817 SmallVectorImpl
<MachineInstr
*> &Copies
,
818 SmallVectorImpl
<MachineBasicBlock
*> *CandidatePtr
) {
819 LLVM_DEBUG(dbgs() << "\n*** Tail-duplicating " << printMBBReference(*TailBB
)
822 bool ShouldUpdateTerminators
= TailBB
->canFallThrough();
824 DenseSet
<Register
> UsedByPhi
;
825 getRegsUsedByPHIs(*TailBB
, &UsedByPhi
);
828 return duplicateSimpleBB(TailBB
, TDBBs
, UsedByPhi
);
830 // Iterate through all the unique predecessors and tail-duplicate this
831 // block into them, if possible. Copying the list ahead of time also
832 // avoids trouble with the predecessor list reallocating.
833 bool Changed
= false;
834 SmallSetVector
<MachineBasicBlock
*, 8> Preds
;
836 Preds
.insert(CandidatePtr
->begin(), CandidatePtr
->end());
838 Preds
.insert(TailBB
->pred_begin(), TailBB
->pred_end());
840 for (MachineBasicBlock
*PredBB
: Preds
) {
841 assert(TailBB
!= PredBB
&&
842 "Single-block loop should have been rejected earlier!");
844 if (!canTailDuplicate(TailBB
, PredBB
))
847 // Don't duplicate into a fall-through predecessor (at least for now).
848 // If profile is available, findDuplicateCandidates can choose better
849 // fall-through predecessor.
850 if (!(MF
->getFunction().hasProfileData() && LayoutMode
)) {
851 bool IsLayoutSuccessor
= false;
852 if (ForcedLayoutPred
)
853 IsLayoutSuccessor
= (ForcedLayoutPred
== PredBB
);
854 else if (PredBB
->isLayoutSuccessor(TailBB
) && PredBB
->canFallThrough())
855 IsLayoutSuccessor
= true;
856 if (IsLayoutSuccessor
)
860 LLVM_DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB
861 << "From Succ: " << *TailBB
);
863 TDBBs
.push_back(PredBB
);
865 // Remove PredBB's unconditional branch.
866 TII
->removeBranch(*PredBB
);
868 // Clone the contents of TailBB into PredBB.
869 DenseMap
<Register
, RegSubRegPair
> LocalVRMap
;
870 SmallVector
<std::pair
<Register
, RegSubRegPair
>, 4> CopyInfos
;
871 for (MachineInstr
&MI
: llvm::make_early_inc_range(*TailBB
)) {
873 // Replace the uses of the def of the PHI with the register coming
875 processPHI(&MI
, TailBB
, PredBB
, LocalVRMap
, CopyInfos
, UsedByPhi
, true);
877 // Replace def of virtual registers with new registers, and update
878 // uses with PHI source register or the new registers.
879 duplicateInstruction(&MI
, TailBB
, PredBB
, LocalVRMap
, UsedByPhi
);
882 appendCopies(PredBB
, CopyInfos
, Copies
);
884 NumTailDupAdded
+= TailBB
->size() - 1; // subtract one for removed branch
887 PredBB
->removeSuccessor(PredBB
->succ_begin());
888 assert(PredBB
->succ_empty() &&
889 "TailDuplicate called on block with multiple successors!");
890 for (MachineBasicBlock
*Succ
: TailBB
->successors())
891 PredBB
->addSuccessor(Succ
, MBPI
->getEdgeProbability(TailBB
, Succ
));
893 // Update branches in pred to jump to tail's layout successor if needed.
894 if (ShouldUpdateTerminators
)
895 PredBB
->updateTerminator(TailBB
->getNextNode());
901 // If TailBB was duplicated into all its predecessors except for the prior
902 // block, which falls through unconditionally, move the contents of this
903 // block into the prior block.
904 MachineBasicBlock
*PrevBB
= ForcedLayoutPred
;
906 PrevBB
= &*std::prev(TailBB
->getIterator());
907 MachineBasicBlock
*PriorTBB
= nullptr, *PriorFBB
= nullptr;
908 SmallVector
<MachineOperand
, 4> PriorCond
;
909 // This has to check PrevBB->succ_size() because EH edges are ignored by
911 if (PrevBB
->succ_size() == 1 &&
912 // Layout preds are not always CFG preds. Check.
913 *PrevBB
->succ_begin() == TailBB
&&
914 !TII
->analyzeBranch(*PrevBB
, PriorTBB
, PriorFBB
, PriorCond
) &&
916 (!PriorTBB
|| PriorTBB
== TailBB
) &&
917 TailBB
->pred_size() == 1 &&
918 !TailBB
->hasAddressTaken()) {
919 LLVM_DEBUG(dbgs() << "\nMerging into block: " << *PrevBB
920 << "From MBB: " << *TailBB
);
921 // There may be a branch to the layout successor. This is unlikely but it
922 // happens. The correct thing to do is to remove the branch before
923 // duplicating the instructions in all cases.
924 bool RemovedBranches
= TII
->removeBranch(*PrevBB
) != 0;
926 // If there are still tail instructions, abort the merge
927 if (PrevBB
->getFirstTerminator() == PrevBB
->end()) {
929 DenseMap
<Register
, RegSubRegPair
> LocalVRMap
;
930 SmallVector
<std::pair
<Register
, RegSubRegPair
>, 4> CopyInfos
;
931 MachineBasicBlock::iterator I
= TailBB
->begin();
932 // Process PHI instructions first.
933 while (I
!= TailBB
->end() && I
->isPHI()) {
934 // Replace the uses of the def of the PHI with the register coming
936 MachineInstr
*MI
= &*I
++;
937 processPHI(MI
, TailBB
, PrevBB
, LocalVRMap
, CopyInfos
, UsedByPhi
,
941 // Now copy the non-PHI instructions.
942 while (I
!= TailBB
->end()) {
943 // Replace def of virtual registers with new registers, and update
944 // uses with PHI source register or the new registers.
945 MachineInstr
*MI
= &*I
++;
946 assert(!MI
->isBundle() && "Not expecting bundles before regalloc!");
947 duplicateInstruction(MI
, TailBB
, PrevBB
, LocalVRMap
, UsedByPhi
);
948 MI
->eraseFromParent();
950 appendCopies(PrevBB
, CopyInfos
, Copies
);
952 TII
->removeBranch(*PrevBB
);
953 // No PHIs to worry about, just splice the instructions over.
954 PrevBB
->splice(PrevBB
->end(), TailBB
, TailBB
->begin(), TailBB
->end());
956 PrevBB
->removeSuccessor(PrevBB
->succ_begin());
957 assert(PrevBB
->succ_empty());
958 PrevBB
->transferSuccessors(TailBB
);
960 // Update branches in PrevBB based on Tail's layout successor.
961 if (ShouldUpdateTerminators
)
962 PrevBB
->updateTerminator(TailBB
->getNextNode());
964 TDBBs
.push_back(PrevBB
);
967 LLVM_DEBUG(dbgs() << "Abort merging blocks, the predecessor still "
968 "contains terminator instructions");
969 // Return early if no changes were made
971 return RemovedBranches
;
973 Changed
|= RemovedBranches
;
976 // If this is after register allocation, there are no phis to fix.
980 // If we made no changes so far, we are safe.
984 // Handle the nasty case in that we duplicated a block that is part of a loop
985 // into some but not all of its predecessors. For example:
989 // if we duplicate 2 into 1 but not into 3, we end up with
990 // 12 -> 3 <-> 2 -> rest |
993 // If there was a "var = phi(1, 3)" in 2, it has to be ultimately replaced
994 // with a phi in 3 (which now dominates 2).
995 // What we do here is introduce a copy in 3 of the register defined by the
996 // phi, just like when we are duplicating 2 into 3, but we don't copy any
997 // real instructions or remove the 3 -> 2 edge from the phi in 2.
998 for (MachineBasicBlock
*PredBB
: Preds
) {
999 if (is_contained(TDBBs
, PredBB
))
1003 if (PredBB
->succ_size() != 1)
1006 DenseMap
<Register
, RegSubRegPair
> LocalVRMap
;
1007 SmallVector
<std::pair
<Register
, RegSubRegPair
>, 4> CopyInfos
;
1008 MachineBasicBlock::iterator I
= TailBB
->begin();
1009 // Process PHI instructions first.
1010 while (I
!= TailBB
->end() && I
->isPHI()) {
1011 // Replace the uses of the def of the PHI with the register coming
1013 MachineInstr
*MI
= &*I
++;
1014 processPHI(MI
, TailBB
, PredBB
, LocalVRMap
, CopyInfos
, UsedByPhi
, false);
1016 appendCopies(PredBB
, CopyInfos
, Copies
);
1022 /// At the end of the block \p MBB generate COPY instructions between registers
1023 /// described by \p CopyInfos. Append resulting instructions to \p Copies.
1024 void TailDuplicator::appendCopies(MachineBasicBlock
*MBB
,
1025 SmallVectorImpl
<std::pair
<Register
, RegSubRegPair
>> &CopyInfos
,
1026 SmallVectorImpl
<MachineInstr
*> &Copies
) {
1027 MachineBasicBlock::iterator Loc
= MBB
->getFirstTerminator();
1028 const MCInstrDesc
&CopyD
= TII
->get(TargetOpcode::COPY
);
1029 for (auto &CI
: CopyInfos
) {
1030 auto C
= BuildMI(*MBB
, Loc
, DebugLoc(), CopyD
, CI
.first
)
1031 .addReg(CI
.second
.Reg
, 0, CI
.second
.SubReg
);
1032 Copies
.push_back(C
);
1036 /// Remove the specified dead machine basic block from the function, updating
1038 void TailDuplicator::removeDeadBlock(
1039 MachineBasicBlock
*MBB
,
1040 function_ref
<void(MachineBasicBlock
*)> *RemovalCallback
) {
1041 assert(MBB
->pred_empty() && "MBB must be dead!");
1042 LLVM_DEBUG(dbgs() << "\nRemoving MBB: " << *MBB
);
1044 MachineFunction
*MF
= MBB
->getParent();
1045 // Update the call site info.
1046 for (const MachineInstr
&MI
: *MBB
)
1047 if (MI
.shouldUpdateCallSiteInfo())
1048 MF
->eraseCallSiteInfo(&MI
);
1050 if (RemovalCallback
)
1051 (*RemovalCallback
)(MBB
);
1053 // Remove all successors.
1054 while (!MBB
->succ_empty())
1055 MBB
->removeSuccessor(MBB
->succ_end() - 1);
1057 // Remove the block.
1058 MBB
->eraseFromParent();