1 //===- TailDuplicator.cpp - Duplicate blocks into predecessors' tails -----===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This utility class duplicates basic blocks ending in unconditional branches
10 // into the tails of their predecessors.
12 //===----------------------------------------------------------------------===//
14 #include "llvm/CodeGen/TailDuplicator.h"
15 #include "llvm/ADT/DenseMap.h"
16 #include "llvm/ADT/DenseSet.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/ADT/SetVector.h"
19 #include "llvm/ADT/SmallPtrSet.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/ADT/Statistic.h"
22 #include "llvm/CodeGen/MachineBasicBlock.h"
23 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
24 #include "llvm/CodeGen/MachineFunction.h"
25 #include "llvm/CodeGen/MachineInstr.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineOperand.h"
28 #include "llvm/CodeGen/MachineRegisterInfo.h"
29 #include "llvm/CodeGen/MachineSSAUpdater.h"
30 #include "llvm/CodeGen/TargetInstrInfo.h"
31 #include "llvm/CodeGen/TargetRegisterInfo.h"
32 #include "llvm/CodeGen/TargetSubtargetInfo.h"
33 #include "llvm/IR/DebugLoc.h"
34 #include "llvm/IR/Function.h"
35 #include "llvm/Support/CommandLine.h"
36 #include "llvm/Support/Debug.h"
37 #include "llvm/Support/ErrorHandling.h"
38 #include "llvm/Support/raw_ostream.h"
39 #include "llvm/Target/TargetMachine.h"
47 #define DEBUG_TYPE "tailduplication"
49 STATISTIC(NumTails
, "Number of tails duplicated");
50 STATISTIC(NumTailDups
, "Number of tail duplicated blocks");
51 STATISTIC(NumTailDupAdded
,
52 "Number of instructions added due to tail duplication");
53 STATISTIC(NumTailDupRemoved
,
54 "Number of instructions removed due to tail duplication");
55 STATISTIC(NumDeadBlocks
, "Number of dead blocks removed");
56 STATISTIC(NumAddedPHIs
, "Number of phis added");
58 // Heuristic for tail duplication.
59 static cl::opt
<unsigned> TailDuplicateSize(
61 cl::desc("Maximum instructions to consider tail duplicating"), cl::init(2),
64 static cl::opt
<unsigned> TailDupIndirectBranchSize(
65 "tail-dup-indirect-size",
66 cl::desc("Maximum instructions to consider tail duplicating blocks that "
67 "end with indirect branches."), cl::init(20),
71 TailDupVerify("tail-dup-verify",
72 cl::desc("Verify sanity of PHI instructions during taildup"),
73 cl::init(false), cl::Hidden
);
75 static cl::opt
<unsigned> TailDupLimit("tail-dup-limit", cl::init(~0U),
78 void TailDuplicator::initMF(MachineFunction
&MFin
, bool PreRegAlloc
,
79 const MachineBranchProbabilityInfo
*MBPIin
,
80 bool LayoutModeIn
, unsigned TailDupSizeIn
) {
82 TII
= MF
->getSubtarget().getInstrInfo();
83 TRI
= MF
->getSubtarget().getRegisterInfo();
84 MRI
= &MF
->getRegInfo();
87 TailDupSize
= TailDupSizeIn
;
89 assert(MBPI
!= nullptr && "Machine Branch Probability Info required");
91 LayoutMode
= LayoutModeIn
;
92 this->PreRegAlloc
= PreRegAlloc
;
95 static void VerifyPHIs(MachineFunction
&MF
, bool CheckExtra
) {
96 for (MachineFunction::iterator I
= ++MF
.begin(), E
= MF
.end(); I
!= E
; ++I
) {
97 MachineBasicBlock
*MBB
= &*I
;
98 SmallSetVector
<MachineBasicBlock
*, 8> Preds(MBB
->pred_begin(),
100 MachineBasicBlock::iterator MI
= MBB
->begin();
101 while (MI
!= MBB
->end()) {
104 for (MachineBasicBlock
*PredBB
: Preds
) {
106 for (unsigned i
= 1, e
= MI
->getNumOperands(); i
!= e
; i
+= 2) {
107 MachineBasicBlock
*PHIBB
= MI
->getOperand(i
+ 1).getMBB();
108 if (PHIBB
== PredBB
) {
114 dbgs() << "Malformed PHI in " << printMBBReference(*MBB
) << ": "
116 dbgs() << " missing input from predecessor "
117 << printMBBReference(*PredBB
) << '\n';
118 llvm_unreachable(nullptr);
122 for (unsigned i
= 1, e
= MI
->getNumOperands(); i
!= e
; i
+= 2) {
123 MachineBasicBlock
*PHIBB
= MI
->getOperand(i
+ 1).getMBB();
124 if (CheckExtra
&& !Preds
.count(PHIBB
)) {
125 dbgs() << "Warning: malformed PHI in " << printMBBReference(*MBB
)
127 dbgs() << " extra input from predecessor "
128 << printMBBReference(*PHIBB
) << '\n';
129 llvm_unreachable(nullptr);
131 if (PHIBB
->getNumber() < 0) {
132 dbgs() << "Malformed PHI in " << printMBBReference(*MBB
) << ": "
134 dbgs() << " non-existing " << printMBBReference(*PHIBB
) << '\n';
135 llvm_unreachable(nullptr);
143 /// Tail duplicate the block and cleanup.
144 /// \p IsSimple - return value of isSimpleBB
145 /// \p MBB - block to be duplicated
146 /// \p ForcedLayoutPred - If non-null, treat this block as the layout
147 /// predecessor, instead of using the ordering in MF
148 /// \p DuplicatedPreds - if non-null, \p DuplicatedPreds will contain a list of
149 /// all Preds that received a copy of \p MBB.
150 /// \p RemovalCallback - if non-null, called just before MBB is deleted.
151 bool TailDuplicator::tailDuplicateAndUpdate(
152 bool IsSimple
, MachineBasicBlock
*MBB
,
153 MachineBasicBlock
*ForcedLayoutPred
,
154 SmallVectorImpl
<MachineBasicBlock
*> *DuplicatedPreds
,
155 function_ref
<void(MachineBasicBlock
*)> *RemovalCallback
) {
156 // Save the successors list.
157 SmallSetVector
<MachineBasicBlock
*, 8> Succs(MBB
->succ_begin(),
160 SmallVector
<MachineBasicBlock
*, 8> TDBBs
;
161 SmallVector
<MachineInstr
*, 16> Copies
;
162 if (!tailDuplicate(IsSimple
, MBB
, ForcedLayoutPred
, TDBBs
, Copies
))
167 SmallVector
<MachineInstr
*, 8> NewPHIs
;
168 MachineSSAUpdater
SSAUpdate(*MF
, &NewPHIs
);
170 // TailBB's immediate successors are now successors of those predecessors
171 // which duplicated TailBB. Add the predecessors as sources to the PHI
173 bool isDead
= MBB
->pred_empty() && !MBB
->hasAddressTaken();
175 updateSuccessorsPHIs(MBB
, isDead
, TDBBs
, Succs
);
177 // If it is dead, remove it.
179 NumTailDupRemoved
+= MBB
->size();
180 removeDeadBlock(MBB
, RemovalCallback
);
185 if (!SSAUpdateVRs
.empty()) {
186 for (unsigned i
= 0, e
= SSAUpdateVRs
.size(); i
!= e
; ++i
) {
187 unsigned VReg
= SSAUpdateVRs
[i
];
188 SSAUpdate
.Initialize(VReg
);
190 // If the original definition is still around, add it as an available
192 MachineInstr
*DefMI
= MRI
->getVRegDef(VReg
);
193 MachineBasicBlock
*DefBB
= nullptr;
195 DefBB
= DefMI
->getParent();
196 SSAUpdate
.AddAvailableValue(DefBB
, VReg
);
199 // Add the new vregs as available values.
200 DenseMap
<unsigned, AvailableValsTy
>::iterator LI
=
201 SSAUpdateVals
.find(VReg
);
202 for (unsigned j
= 0, ee
= LI
->second
.size(); j
!= ee
; ++j
) {
203 MachineBasicBlock
*SrcBB
= LI
->second
[j
].first
;
204 unsigned SrcReg
= LI
->second
[j
].second
;
205 SSAUpdate
.AddAvailableValue(SrcBB
, SrcReg
);
208 // Rewrite uses that are outside of the original def's block.
209 MachineRegisterInfo::use_iterator UI
= MRI
->use_begin(VReg
);
210 while (UI
!= MRI
->use_end()) {
211 MachineOperand
&UseMO
= *UI
;
212 MachineInstr
*UseMI
= UseMO
.getParent();
214 if (UseMI
->isDebugValue()) {
215 // SSAUpdate can replace the use with an undef. That creates
216 // a debug instruction that is a kill.
217 // FIXME: Should it SSAUpdate job to delete debug instructions
218 // instead of replacing the use with undef?
219 UseMI
->eraseFromParent();
222 if (UseMI
->getParent() == DefBB
&& !UseMI
->isPHI())
224 SSAUpdate
.RewriteUse(UseMO
);
228 SSAUpdateVRs
.clear();
229 SSAUpdateVals
.clear();
232 // Eliminate some of the copies inserted by tail duplication to maintain
234 for (unsigned i
= 0, e
= Copies
.size(); i
!= e
; ++i
) {
235 MachineInstr
*Copy
= Copies
[i
];
238 Register Dst
= Copy
->getOperand(0).getReg();
239 Register Src
= Copy
->getOperand(1).getReg();
240 if (MRI
->hasOneNonDBGUse(Src
) &&
241 MRI
->constrainRegClass(Src
, MRI
->getRegClass(Dst
))) {
242 // Copy is the only use. Do trivial copy propagation here.
243 MRI
->replaceRegWith(Dst
, Src
);
244 Copy
->eraseFromParent();
249 NumAddedPHIs
+= NewPHIs
.size();
252 *DuplicatedPreds
= std::move(TDBBs
);
257 /// Look for small blocks that are unconditionally branched to and do not fall
258 /// through. Tail-duplicate their instructions into their predecessors to
259 /// eliminate (dynamic) branches.
260 bool TailDuplicator::tailDuplicateBlocks() {
261 bool MadeChange
= false;
263 if (PreRegAlloc
&& TailDupVerify
) {
264 LLVM_DEBUG(dbgs() << "\n*** Before tail-duplicating\n");
265 VerifyPHIs(*MF
, true);
268 for (MachineFunction::iterator I
= ++MF
->begin(), E
= MF
->end(); I
!= E
;) {
269 MachineBasicBlock
*MBB
= &*I
++;
271 if (NumTails
== TailDupLimit
)
274 bool IsSimple
= isSimpleBB(MBB
);
276 if (!shouldTailDuplicate(IsSimple
, *MBB
))
279 MadeChange
|= tailDuplicateAndUpdate(IsSimple
, MBB
, nullptr);
282 if (PreRegAlloc
&& TailDupVerify
)
283 VerifyPHIs(*MF
, false);
288 static bool isDefLiveOut(unsigned Reg
, MachineBasicBlock
*BB
,
289 const MachineRegisterInfo
*MRI
) {
290 for (MachineInstr
&UseMI
: MRI
->use_instructions(Reg
)) {
291 if (UseMI
.isDebugValue())
293 if (UseMI
.getParent() != BB
)
299 static unsigned getPHISrcRegOpIdx(MachineInstr
*MI
, MachineBasicBlock
*SrcBB
) {
300 for (unsigned i
= 1, e
= MI
->getNumOperands(); i
!= e
; i
+= 2)
301 if (MI
->getOperand(i
+ 1).getMBB() == SrcBB
)
306 // Remember which registers are used by phis in this block. This is
307 // used to determine which registers are liveout while modifying the
308 // block (which is why we need to copy the information).
309 static void getRegsUsedByPHIs(const MachineBasicBlock
&BB
,
310 DenseSet
<unsigned> *UsedByPhi
) {
311 for (const auto &MI
: BB
) {
314 for (unsigned i
= 1, e
= MI
.getNumOperands(); i
!= e
; i
+= 2) {
315 Register SrcReg
= MI
.getOperand(i
).getReg();
316 UsedByPhi
->insert(SrcReg
);
321 /// Add a definition and source virtual registers pair for SSA update.
322 void TailDuplicator::addSSAUpdateEntry(unsigned OrigReg
, unsigned NewReg
,
323 MachineBasicBlock
*BB
) {
324 DenseMap
<unsigned, AvailableValsTy
>::iterator LI
=
325 SSAUpdateVals
.find(OrigReg
);
326 if (LI
!= SSAUpdateVals
.end())
327 LI
->second
.push_back(std::make_pair(BB
, NewReg
));
329 AvailableValsTy Vals
;
330 Vals
.push_back(std::make_pair(BB
, NewReg
));
331 SSAUpdateVals
.insert(std::make_pair(OrigReg
, Vals
));
332 SSAUpdateVRs
.push_back(OrigReg
);
336 /// Process PHI node in TailBB by turning it into a copy in PredBB. Remember the
337 /// source register that's contributed by PredBB and update SSA update map.
338 void TailDuplicator::processPHI(
339 MachineInstr
*MI
, MachineBasicBlock
*TailBB
, MachineBasicBlock
*PredBB
,
340 DenseMap
<unsigned, RegSubRegPair
> &LocalVRMap
,
341 SmallVectorImpl
<std::pair
<unsigned, RegSubRegPair
>> &Copies
,
342 const DenseSet
<unsigned> &RegsUsedByPhi
, bool Remove
) {
343 Register DefReg
= MI
->getOperand(0).getReg();
344 unsigned SrcOpIdx
= getPHISrcRegOpIdx(MI
, PredBB
);
345 assert(SrcOpIdx
&& "Unable to find matching PHI source?");
346 Register SrcReg
= MI
->getOperand(SrcOpIdx
).getReg();
347 unsigned SrcSubReg
= MI
->getOperand(SrcOpIdx
).getSubReg();
348 const TargetRegisterClass
*RC
= MRI
->getRegClass(DefReg
);
349 LocalVRMap
.insert(std::make_pair(DefReg
, RegSubRegPair(SrcReg
, SrcSubReg
)));
351 // Insert a copy from source to the end of the block. The def register is the
352 // available value liveout of the block.
353 Register NewDef
= MRI
->createVirtualRegister(RC
);
354 Copies
.push_back(std::make_pair(NewDef
, RegSubRegPair(SrcReg
, SrcSubReg
)));
355 if (isDefLiveOut(DefReg
, TailBB
, MRI
) || RegsUsedByPhi
.count(DefReg
))
356 addSSAUpdateEntry(DefReg
, NewDef
, PredBB
);
361 // Remove PredBB from the PHI node.
362 MI
->RemoveOperand(SrcOpIdx
+ 1);
363 MI
->RemoveOperand(SrcOpIdx
);
364 if (MI
->getNumOperands() == 1)
365 MI
->eraseFromParent();
368 /// Duplicate a TailBB instruction to PredBB and update
369 /// the source operands due to earlier PHI translation.
370 void TailDuplicator::duplicateInstruction(
371 MachineInstr
*MI
, MachineBasicBlock
*TailBB
, MachineBasicBlock
*PredBB
,
372 DenseMap
<unsigned, RegSubRegPair
> &LocalVRMap
,
373 const DenseSet
<unsigned> &UsedByPhi
) {
374 // Allow duplication of CFI instructions.
375 if (MI
->isCFIInstruction()) {
376 BuildMI(*PredBB
, PredBB
->end(), PredBB
->findDebugLoc(PredBB
->begin()),
377 TII
->get(TargetOpcode::CFI_INSTRUCTION
)).addCFIIndex(
378 MI
->getOperand(0).getCFIIndex());
381 MachineInstr
&NewMI
= TII
->duplicate(*PredBB
, PredBB
->end(), *MI
);
383 for (unsigned i
= 0, e
= NewMI
.getNumOperands(); i
!= e
; ++i
) {
384 MachineOperand
&MO
= NewMI
.getOperand(i
);
387 Register Reg
= MO
.getReg();
388 if (!Register::isVirtualRegister(Reg
))
391 const TargetRegisterClass
*RC
= MRI
->getRegClass(Reg
);
392 Register NewReg
= MRI
->createVirtualRegister(RC
);
394 LocalVRMap
.insert(std::make_pair(Reg
, RegSubRegPair(NewReg
, 0)));
395 if (isDefLiveOut(Reg
, TailBB
, MRI
) || UsedByPhi
.count(Reg
))
396 addSSAUpdateEntry(Reg
, NewReg
, PredBB
);
398 auto VI
= LocalVRMap
.find(Reg
);
399 if (VI
!= LocalVRMap
.end()) {
400 // Need to make sure that the register class of the mapped register
401 // will satisfy the constraints of the class of the register being
403 auto *OrigRC
= MRI
->getRegClass(Reg
);
404 auto *MappedRC
= MRI
->getRegClass(VI
->second
.Reg
);
405 const TargetRegisterClass
*ConstrRC
;
406 if (VI
->second
.SubReg
!= 0) {
407 ConstrRC
= TRI
->getMatchingSuperRegClass(MappedRC
, OrigRC
,
410 // The actual constraining (as in "find appropriate new class")
411 // is done by getMatchingSuperRegClass, so now we only need to
412 // change the class of the mapped register.
413 MRI
->setRegClass(VI
->second
.Reg
, ConstrRC
);
416 // For mapped registers that do not have sub-registers, simply
417 // restrict their class to match the original one.
418 ConstrRC
= MRI
->constrainRegClass(VI
->second
.Reg
, OrigRC
);
422 // If the class constraining succeeded, we can simply replace
423 // the old register with the mapped one.
424 MO
.setReg(VI
->second
.Reg
);
425 // We have Reg -> VI.Reg:VI.SubReg, so if Reg is used with a
426 // sub-register, we need to compose the sub-register indices.
427 MO
.setSubReg(TRI
->composeSubRegIndices(MO
.getSubReg(),
430 // The direct replacement is not possible, due to failing register
431 // class constraints. An explicit COPY is necessary. Create one
432 // that can be reused
433 auto *NewRC
= MI
->getRegClassConstraint(i
, TII
, TRI
);
434 if (NewRC
== nullptr)
436 Register NewReg
= MRI
->createVirtualRegister(NewRC
);
437 BuildMI(*PredBB
, NewMI
, NewMI
.getDebugLoc(),
438 TII
->get(TargetOpcode::COPY
), NewReg
)
439 .addReg(VI
->second
.Reg
, 0, VI
->second
.SubReg
);
440 LocalVRMap
.erase(VI
);
441 LocalVRMap
.insert(std::make_pair(Reg
, RegSubRegPair(NewReg
, 0)));
443 // The composed VI.Reg:VI.SubReg is replaced with NewReg, which
444 // is equivalent to the whole register Reg. Hence, Reg:subreg
445 // is same as NewReg:subreg, so keep the sub-register index
448 // Clear any kill flags from this operand. The new register could
449 // have uses after this one, so kills are not valid here.
457 /// After FromBB is tail duplicated into its predecessor blocks, the successors
458 /// have gained new predecessors. Update the PHI instructions in them
460 void TailDuplicator::updateSuccessorsPHIs(
461 MachineBasicBlock
*FromBB
, bool isDead
,
462 SmallVectorImpl
<MachineBasicBlock
*> &TDBBs
,
463 SmallSetVector
<MachineBasicBlock
*, 8> &Succs
) {
464 for (MachineBasicBlock
*SuccBB
: Succs
) {
465 for (MachineInstr
&MI
: *SuccBB
) {
468 MachineInstrBuilder
MIB(*FromBB
->getParent(), MI
);
470 for (unsigned i
= 1, e
= MI
.getNumOperands(); i
!= e
; i
+= 2) {
471 MachineOperand
&MO
= MI
.getOperand(i
+ 1);
472 if (MO
.getMBB() == FromBB
) {
479 MachineOperand
&MO0
= MI
.getOperand(Idx
);
480 Register Reg
= MO0
.getReg();
482 // Folded into the previous BB.
483 // There could be duplicate phi source entries. FIXME: Should sdisel
484 // or earlier pass fixed this?
485 for (unsigned i
= MI
.getNumOperands() - 2; i
!= Idx
; i
-= 2) {
486 MachineOperand
&MO
= MI
.getOperand(i
+ 1);
487 if (MO
.getMBB() == FromBB
) {
488 MI
.RemoveOperand(i
+ 1);
495 // If Idx is set, the operands at Idx and Idx+1 must be removed.
496 // We reuse the location to avoid expensive RemoveOperand calls.
498 DenseMap
<unsigned, AvailableValsTy
>::iterator LI
=
499 SSAUpdateVals
.find(Reg
);
500 if (LI
!= SSAUpdateVals
.end()) {
501 // This register is defined in the tail block.
502 for (unsigned j
= 0, ee
= LI
->second
.size(); j
!= ee
; ++j
) {
503 MachineBasicBlock
*SrcBB
= LI
->second
[j
].first
;
504 // If we didn't duplicate a bb into a particular predecessor, we
505 // might still have added an entry to SSAUpdateVals to correcly
506 // recompute SSA. If that case, avoid adding a dummy extra argument
508 if (!SrcBB
->isSuccessor(SuccBB
))
511 unsigned SrcReg
= LI
->second
[j
].second
;
513 MI
.getOperand(Idx
).setReg(SrcReg
);
514 MI
.getOperand(Idx
+ 1).setMBB(SrcBB
);
517 MIB
.addReg(SrcReg
).addMBB(SrcBB
);
521 // Live in tail block, must also be live in predecessors.
522 for (unsigned j
= 0, ee
= TDBBs
.size(); j
!= ee
; ++j
) {
523 MachineBasicBlock
*SrcBB
= TDBBs
[j
];
525 MI
.getOperand(Idx
).setReg(Reg
);
526 MI
.getOperand(Idx
+ 1).setMBB(SrcBB
);
529 MIB
.addReg(Reg
).addMBB(SrcBB
);
534 MI
.RemoveOperand(Idx
+ 1);
535 MI
.RemoveOperand(Idx
);
541 /// Determine if it is profitable to duplicate this block.
542 bool TailDuplicator::shouldTailDuplicate(bool IsSimple
,
543 MachineBasicBlock
&TailBB
) {
544 // When doing tail-duplication during layout, the block ordering is in flux,
545 // so canFallThrough returns a result based on incorrect information and
546 // should just be ignored.
547 if (!LayoutMode
&& TailBB
.canFallThrough())
550 // Don't try to tail-duplicate single-block loops.
551 if (TailBB
.isSuccessor(&TailBB
))
554 // Set the limit on the cost to duplicate. When optimizing for size,
555 // duplicate only one, because one branch instruction can be eliminated to
556 // compensate for the duplication.
557 unsigned MaxDuplicateCount
;
558 if (TailDupSize
== 0 &&
559 TailDuplicateSize
.getNumOccurrences() == 0 &&
560 MF
->getFunction().hasOptSize())
561 MaxDuplicateCount
= 1;
562 else if (TailDupSize
== 0)
563 MaxDuplicateCount
= TailDuplicateSize
;
565 MaxDuplicateCount
= TailDupSize
;
567 // If the block to be duplicated ends in an unanalyzable fallthrough, don't
569 // A similar check is necessary in MachineBlockPlacement to make sure pairs of
570 // blocks with unanalyzable fallthrough get layed out contiguously.
571 MachineBasicBlock
*PredTBB
= nullptr, *PredFBB
= nullptr;
572 SmallVector
<MachineOperand
, 4> PredCond
;
573 if (TII
->analyzeBranch(TailBB
, PredTBB
, PredFBB
, PredCond
) &&
574 TailBB
.canFallThrough())
577 // If the target has hardware branch prediction that can handle indirect
578 // branches, duplicating them can often make them predictable when there
579 // are common paths through the code. The limit needs to be high enough
580 // to allow undoing the effects of tail merging and other optimizations
581 // that rearrange the predecessors of the indirect branch.
583 bool HasIndirectbr
= false;
585 HasIndirectbr
= TailBB
.back().isIndirectBranch();
587 if (HasIndirectbr
&& PreRegAlloc
)
588 MaxDuplicateCount
= TailDupIndirectBranchSize
;
590 // Check the instructions in the block to determine whether tail-duplication
591 // is invalid or unlikely to be profitable.
592 unsigned InstrCount
= 0;
593 for (MachineInstr
&MI
: TailBB
) {
594 // Non-duplicable things shouldn't be tail-duplicated.
595 // CFI instructions are marked as non-duplicable, because Darwin compact
596 // unwind info emission can't handle multiple prologue setups. In case of
597 // DWARF, allow them be duplicated, so that their existence doesn't prevent
598 // tail duplication of some basic blocks, that would be duplicated otherwise.
599 if (MI
.isNotDuplicable() &&
600 (TailBB
.getParent()->getTarget().getTargetTriple().isOSDarwin() ||
601 !MI
.isCFIInstruction()))
604 // Convergent instructions can be duplicated only if doing so doesn't add
605 // new control dependencies, which is what we're going to do here.
606 if (MI
.isConvergent())
609 // Do not duplicate 'return' instructions if this is a pre-regalloc run.
610 // A return may expand into a lot more instructions (e.g. reload of callee
611 // saved registers) after PEI.
612 if (PreRegAlloc
&& MI
.isReturn())
615 // Avoid duplicating calls before register allocation. Calls presents a
616 // barrier to register allocation so duplicating them may end up increasing
618 if (PreRegAlloc
&& MI
.isCall())
621 if (!MI
.isPHI() && !MI
.isMetaInstruction())
624 if (InstrCount
> MaxDuplicateCount
)
628 // Check if any of the successors of TailBB has a PHI node in which the
629 // value corresponding to TailBB uses a subregister.
630 // If a phi node uses a register paired with a subregister, the actual
631 // "value type" of the phi may differ from the type of the register without
632 // any subregisters. Due to a bug, tail duplication may add a new operand
633 // without a necessary subregister, producing an invalid code. This is
634 // demonstrated by test/CodeGen/Hexagon/tail-dup-subreg-abort.ll.
635 // Disable tail duplication for this case for now, until the problem is
637 for (auto SB
: TailBB
.successors()) {
638 for (auto &I
: *SB
) {
641 unsigned Idx
= getPHISrcRegOpIdx(&I
, &TailBB
);
643 MachineOperand
&PU
= I
.getOperand(Idx
);
644 if (PU
.getSubReg() != 0)
649 if (HasIndirectbr
&& PreRegAlloc
)
658 return canCompletelyDuplicateBB(TailBB
);
661 /// True if this BB has only one unconditional jump.
662 bool TailDuplicator::isSimpleBB(MachineBasicBlock
*TailBB
) {
663 if (TailBB
->succ_size() != 1)
665 if (TailBB
->pred_empty())
667 MachineBasicBlock::iterator I
= TailBB
->getFirstNonDebugInstr();
668 if (I
== TailBB
->end())
670 return I
->isUnconditionalBranch();
673 static bool bothUsedInPHI(const MachineBasicBlock
&A
,
674 const SmallPtrSet
<MachineBasicBlock
*, 8> &SuccsB
) {
675 for (MachineBasicBlock
*BB
: A
.successors())
676 if (SuccsB
.count(BB
) && !BB
->empty() && BB
->begin()->isPHI())
682 bool TailDuplicator::canCompletelyDuplicateBB(MachineBasicBlock
&BB
) {
683 for (MachineBasicBlock
*PredBB
: BB
.predecessors()) {
684 if (PredBB
->succ_size() > 1)
687 MachineBasicBlock
*PredTBB
= nullptr, *PredFBB
= nullptr;
688 SmallVector
<MachineOperand
, 4> PredCond
;
689 if (TII
->analyzeBranch(*PredBB
, PredTBB
, PredFBB
, PredCond
))
692 if (!PredCond
.empty())
698 bool TailDuplicator::duplicateSimpleBB(
699 MachineBasicBlock
*TailBB
, SmallVectorImpl
<MachineBasicBlock
*> &TDBBs
,
700 const DenseSet
<unsigned> &UsedByPhi
,
701 SmallVectorImpl
<MachineInstr
*> &Copies
) {
702 SmallPtrSet
<MachineBasicBlock
*, 8> Succs(TailBB
->succ_begin(),
704 SmallVector
<MachineBasicBlock
*, 8> Preds(TailBB
->pred_begin(),
706 bool Changed
= false;
707 for (MachineBasicBlock
*PredBB
: Preds
) {
708 if (PredBB
->hasEHPadSuccessor())
711 if (bothUsedInPHI(*PredBB
, Succs
))
714 MachineBasicBlock
*PredTBB
= nullptr, *PredFBB
= nullptr;
715 SmallVector
<MachineOperand
, 4> PredCond
;
716 if (TII
->analyzeBranch(*PredBB
, PredTBB
, PredFBB
, PredCond
))
720 LLVM_DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB
721 << "From simple Succ: " << *TailBB
);
723 MachineBasicBlock
*NewTarget
= *TailBB
->succ_begin();
724 MachineBasicBlock
*NextBB
= PredBB
->getNextNode();
726 // Make PredFBB explicit.
727 if (PredCond
.empty())
730 // Make fall through explicit.
737 if (PredFBB
== TailBB
)
739 if (PredTBB
== TailBB
)
742 // Make the branch unconditional if possible
743 if (PredTBB
== PredFBB
) {
748 // Avoid adding fall through branches.
749 if (PredFBB
== NextBB
)
751 if (PredTBB
== NextBB
&& PredFBB
== nullptr)
754 auto DL
= PredBB
->findBranchDebugLoc();
755 TII
->removeBranch(*PredBB
);
757 if (!PredBB
->isSuccessor(NewTarget
))
758 PredBB
->replaceSuccessor(TailBB
, NewTarget
);
760 PredBB
->removeSuccessor(TailBB
, true);
761 assert(PredBB
->succ_size() <= 1);
765 TII
->insertBranch(*PredBB
, PredTBB
, PredFBB
, PredCond
, DL
);
767 TDBBs
.push_back(PredBB
);
772 bool TailDuplicator::canTailDuplicate(MachineBasicBlock
*TailBB
,
773 MachineBasicBlock
*PredBB
) {
774 // EH edges are ignored by analyzeBranch.
775 if (PredBB
->succ_size() > 1)
778 MachineBasicBlock
*PredTBB
= nullptr, *PredFBB
= nullptr;
779 SmallVector
<MachineOperand
, 4> PredCond
;
780 if (TII
->analyzeBranch(*PredBB
, PredTBB
, PredFBB
, PredCond
))
782 if (!PredCond
.empty())
787 /// If it is profitable, duplicate TailBB's contents in each
788 /// of its predecessors.
789 /// \p IsSimple result of isSimpleBB
790 /// \p TailBB Block to be duplicated.
791 /// \p ForcedLayoutPred When non-null, use this block as the layout predecessor
792 /// instead of the previous block in MF's order.
793 /// \p TDBBs A vector to keep track of all blocks tail-duplicated
795 /// \p Copies A vector of copy instructions inserted. Used later to
796 /// walk all the inserted copies and remove redundant ones.
797 bool TailDuplicator::tailDuplicate(bool IsSimple
, MachineBasicBlock
*TailBB
,
798 MachineBasicBlock
*ForcedLayoutPred
,
799 SmallVectorImpl
<MachineBasicBlock
*> &TDBBs
,
800 SmallVectorImpl
<MachineInstr
*> &Copies
) {
801 LLVM_DEBUG(dbgs() << "\n*** Tail-duplicating " << printMBBReference(*TailBB
)
804 DenseSet
<unsigned> UsedByPhi
;
805 getRegsUsedByPHIs(*TailBB
, &UsedByPhi
);
808 return duplicateSimpleBB(TailBB
, TDBBs
, UsedByPhi
, Copies
);
810 // Iterate through all the unique predecessors and tail-duplicate this
811 // block into them, if possible. Copying the list ahead of time also
812 // avoids trouble with the predecessor list reallocating.
813 bool Changed
= false;
814 SmallSetVector
<MachineBasicBlock
*, 8> Preds(TailBB
->pred_begin(),
816 for (MachineBasicBlock
*PredBB
: Preds
) {
817 assert(TailBB
!= PredBB
&&
818 "Single-block loop should have been rejected earlier!");
820 if (!canTailDuplicate(TailBB
, PredBB
))
823 // Don't duplicate into a fall-through predecessor (at least for now).
824 bool IsLayoutSuccessor
= false;
825 if (ForcedLayoutPred
)
826 IsLayoutSuccessor
= (ForcedLayoutPred
== PredBB
);
827 else if (PredBB
->isLayoutSuccessor(TailBB
) && PredBB
->canFallThrough())
828 IsLayoutSuccessor
= true;
829 if (IsLayoutSuccessor
)
832 LLVM_DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB
833 << "From Succ: " << *TailBB
);
835 TDBBs
.push_back(PredBB
);
837 // Remove PredBB's unconditional branch.
838 TII
->removeBranch(*PredBB
);
840 // Clone the contents of TailBB into PredBB.
841 DenseMap
<unsigned, RegSubRegPair
> LocalVRMap
;
842 SmallVector
<std::pair
<unsigned, RegSubRegPair
>, 4> CopyInfos
;
843 for (MachineBasicBlock::iterator I
= TailBB
->begin(), E
= TailBB
->end();
844 I
!= E
; /* empty */) {
845 MachineInstr
*MI
= &*I
;
848 // Replace the uses of the def of the PHI with the register coming
850 processPHI(MI
, TailBB
, PredBB
, LocalVRMap
, CopyInfos
, UsedByPhi
, true);
852 // Replace def of virtual registers with new registers, and update
853 // uses with PHI source register or the new registers.
854 duplicateInstruction(MI
, TailBB
, PredBB
, LocalVRMap
, UsedByPhi
);
857 appendCopies(PredBB
, CopyInfos
, Copies
);
859 NumTailDupAdded
+= TailBB
->size() - 1; // subtract one for removed branch
862 PredBB
->removeSuccessor(PredBB
->succ_begin());
863 assert(PredBB
->succ_empty() &&
864 "TailDuplicate called on block with multiple successors!");
865 for (MachineBasicBlock
*Succ
: TailBB
->successors())
866 PredBB
->addSuccessor(Succ
, MBPI
->getEdgeProbability(TailBB
, Succ
));
872 // If TailBB was duplicated into all its predecessors except for the prior
873 // block, which falls through unconditionally, move the contents of this
874 // block into the prior block.
875 MachineBasicBlock
*PrevBB
= ForcedLayoutPred
;
877 PrevBB
= &*std::prev(TailBB
->getIterator());
878 MachineBasicBlock
*PriorTBB
= nullptr, *PriorFBB
= nullptr;
879 SmallVector
<MachineOperand
, 4> PriorCond
;
880 // This has to check PrevBB->succ_size() because EH edges are ignored by
882 if (PrevBB
->succ_size() == 1 &&
883 // Layout preds are not always CFG preds. Check.
884 *PrevBB
->succ_begin() == TailBB
&&
885 !TII
->analyzeBranch(*PrevBB
, PriorTBB
, PriorFBB
, PriorCond
) &&
887 (!PriorTBB
|| PriorTBB
== TailBB
) &&
888 TailBB
->pred_size() == 1 &&
889 !TailBB
->hasAddressTaken()) {
890 LLVM_DEBUG(dbgs() << "\nMerging into block: " << *PrevBB
891 << "From MBB: " << *TailBB
);
892 // There may be a branch to the layout successor. This is unlikely but it
893 // happens. The correct thing to do is to remove the branch before
894 // duplicating the instructions in all cases.
895 TII
->removeBranch(*PrevBB
);
897 DenseMap
<unsigned, RegSubRegPair
> LocalVRMap
;
898 SmallVector
<std::pair
<unsigned, RegSubRegPair
>, 4> CopyInfos
;
899 MachineBasicBlock::iterator I
= TailBB
->begin();
900 // Process PHI instructions first.
901 while (I
!= TailBB
->end() && I
->isPHI()) {
902 // Replace the uses of the def of the PHI with the register coming
904 MachineInstr
*MI
= &*I
++;
905 processPHI(MI
, TailBB
, PrevBB
, LocalVRMap
, CopyInfos
, UsedByPhi
, true);
908 // Now copy the non-PHI instructions.
909 while (I
!= TailBB
->end()) {
910 // Replace def of virtual registers with new registers, and update
911 // uses with PHI source register or the new registers.
912 MachineInstr
*MI
= &*I
++;
913 assert(!MI
->isBundle() && "Not expecting bundles before regalloc!");
914 duplicateInstruction(MI
, TailBB
, PrevBB
, LocalVRMap
, UsedByPhi
);
915 MI
->eraseFromParent();
917 appendCopies(PrevBB
, CopyInfos
, Copies
);
919 TII
->removeBranch(*PrevBB
);
920 // No PHIs to worry about, just splice the instructions over.
921 PrevBB
->splice(PrevBB
->end(), TailBB
, TailBB
->begin(), TailBB
->end());
923 PrevBB
->removeSuccessor(PrevBB
->succ_begin());
924 assert(PrevBB
->succ_empty());
925 PrevBB
->transferSuccessors(TailBB
);
926 TDBBs
.push_back(PrevBB
);
930 // If this is after register allocation, there are no phis to fix.
934 // If we made no changes so far, we are safe.
938 // Handle the nasty case in that we duplicated a block that is part of a loop
939 // into some but not all of its predecessors. For example:
943 // if we duplicate 2 into 1 but not into 3, we end up with
944 // 12 -> 3 <-> 2 -> rest |
947 // If there was a "var = phi(1, 3)" in 2, it has to be ultimately replaced
948 // with a phi in 3 (which now dominates 2).
949 // What we do here is introduce a copy in 3 of the register defined by the
950 // phi, just like when we are duplicating 2 into 3, but we don't copy any
951 // real instructions or remove the 3 -> 2 edge from the phi in 2.
952 for (MachineBasicBlock
*PredBB
: Preds
) {
953 if (is_contained(TDBBs
, PredBB
))
957 if (PredBB
->succ_size() != 1)
960 DenseMap
<unsigned, RegSubRegPair
> LocalVRMap
;
961 SmallVector
<std::pair
<unsigned, RegSubRegPair
>, 4> CopyInfos
;
962 MachineBasicBlock::iterator I
= TailBB
->begin();
963 // Process PHI instructions first.
964 while (I
!= TailBB
->end() && I
->isPHI()) {
965 // Replace the uses of the def of the PHI with the register coming
967 MachineInstr
*MI
= &*I
++;
968 processPHI(MI
, TailBB
, PredBB
, LocalVRMap
, CopyInfos
, UsedByPhi
, false);
970 appendCopies(PredBB
, CopyInfos
, Copies
);
976 /// At the end of the block \p MBB generate COPY instructions between registers
977 /// described by \p CopyInfos. Append resulting instructions to \p Copies.
978 void TailDuplicator::appendCopies(MachineBasicBlock
*MBB
,
979 SmallVectorImpl
<std::pair
<unsigned,RegSubRegPair
>> &CopyInfos
,
980 SmallVectorImpl
<MachineInstr
*> &Copies
) {
981 MachineBasicBlock::iterator Loc
= MBB
->getFirstTerminator();
982 const MCInstrDesc
&CopyD
= TII
->get(TargetOpcode::COPY
);
983 for (auto &CI
: CopyInfos
) {
984 auto C
= BuildMI(*MBB
, Loc
, DebugLoc(), CopyD
, CI
.first
)
985 .addReg(CI
.second
.Reg
, 0, CI
.second
.SubReg
);
990 /// Remove the specified dead machine basic block from the function, updating
992 void TailDuplicator::removeDeadBlock(
993 MachineBasicBlock
*MBB
,
994 function_ref
<void(MachineBasicBlock
*)> *RemovalCallback
) {
995 assert(MBB
->pred_empty() && "MBB must be dead!");
996 LLVM_DEBUG(dbgs() << "\nRemoving MBB: " << *MBB
);
999 (*RemovalCallback
)(MBB
);
1001 // Remove all successors.
1002 while (!MBB
->succ_empty())
1003 MBB
->removeSuccessor(MBB
->succ_end() - 1);
1005 // Remove the block.
1006 MBB
->eraseFromParent();