1 //===- TailDuplicator.cpp - Duplicate blocks into predecessors' tails -----===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This utility class duplicates basic blocks ending in unconditional branches
11 // into the tails of their predecessors.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/CodeGen/TailDuplicator.h"
16 #include "llvm/ADT/DenseMap.h"
17 #include "llvm/ADT/DenseSet.h"
18 #include "llvm/ADT/STLExtras.h"
19 #include "llvm/ADT/SetVector.h"
20 #include "llvm/ADT/SmallPtrSet.h"
21 #include "llvm/ADT/SmallVector.h"
22 #include "llvm/ADT/Statistic.h"
23 #include "llvm/CodeGen/MachineBasicBlock.h"
24 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
25 #include "llvm/CodeGen/MachineFunction.h"
26 #include "llvm/CodeGen/MachineInstr.h"
27 #include "llvm/CodeGen/MachineInstrBuilder.h"
28 #include "llvm/CodeGen/MachineOperand.h"
29 #include "llvm/CodeGen/MachineRegisterInfo.h"
30 #include "llvm/CodeGen/MachineSSAUpdater.h"
31 #include "llvm/CodeGen/TargetInstrInfo.h"
32 #include "llvm/CodeGen/TargetRegisterInfo.h"
33 #include "llvm/CodeGen/TargetSubtargetInfo.h"
34 #include "llvm/IR/DebugLoc.h"
35 #include "llvm/IR/Function.h"
36 #include "llvm/Support/CommandLine.h"
37 #include "llvm/Support/Debug.h"
38 #include "llvm/Support/ErrorHandling.h"
39 #include "llvm/Support/raw_ostream.h"
40 #include "llvm/Target/TargetMachine.h"
48 #define DEBUG_TYPE "tailduplication"
50 STATISTIC(NumTails
, "Number of tails duplicated");
51 STATISTIC(NumTailDups
, "Number of tail duplicated blocks");
52 STATISTIC(NumTailDupAdded
,
53 "Number of instructions added due to tail duplication");
54 STATISTIC(NumTailDupRemoved
,
55 "Number of instructions removed due to tail duplication");
56 STATISTIC(NumDeadBlocks
, "Number of dead blocks removed");
57 STATISTIC(NumAddedPHIs
, "Number of phis added");
59 // Heuristic for tail duplication.
60 static cl::opt
<unsigned> TailDuplicateSize(
62 cl::desc("Maximum instructions to consider tail duplicating"), cl::init(2),
65 static cl::opt
<unsigned> TailDupIndirectBranchSize(
66 "tail-dup-indirect-size",
67 cl::desc("Maximum instructions to consider tail duplicating blocks that "
68 "end with indirect branches."), cl::init(20),
72 TailDupVerify("tail-dup-verify",
73 cl::desc("Verify sanity of PHI instructions during taildup"),
74 cl::init(false), cl::Hidden
);
76 static cl::opt
<unsigned> TailDupLimit("tail-dup-limit", cl::init(~0U),
79 void TailDuplicator::initMF(MachineFunction
&MFin
, bool PreRegAlloc
,
80 const MachineBranchProbabilityInfo
*MBPIin
,
81 bool LayoutModeIn
, unsigned TailDupSizeIn
) {
83 TII
= MF
->getSubtarget().getInstrInfo();
84 TRI
= MF
->getSubtarget().getRegisterInfo();
85 MRI
= &MF
->getRegInfo();
88 TailDupSize
= TailDupSizeIn
;
90 assert(MBPI
!= nullptr && "Machine Branch Probability Info required");
92 LayoutMode
= LayoutModeIn
;
93 this->PreRegAlloc
= PreRegAlloc
;
96 static void VerifyPHIs(MachineFunction
&MF
, bool CheckExtra
) {
97 for (MachineFunction::iterator I
= ++MF
.begin(), E
= MF
.end(); I
!= E
; ++I
) {
98 MachineBasicBlock
*MBB
= &*I
;
99 SmallSetVector
<MachineBasicBlock
*, 8> Preds(MBB
->pred_begin(),
101 MachineBasicBlock::iterator MI
= MBB
->begin();
102 while (MI
!= MBB
->end()) {
105 for (MachineBasicBlock
*PredBB
: Preds
) {
107 for (unsigned i
= 1, e
= MI
->getNumOperands(); i
!= e
; i
+= 2) {
108 MachineBasicBlock
*PHIBB
= MI
->getOperand(i
+ 1).getMBB();
109 if (PHIBB
== PredBB
) {
115 dbgs() << "Malformed PHI in " << printMBBReference(*MBB
) << ": "
117 dbgs() << " missing input from predecessor "
118 << printMBBReference(*PredBB
) << '\n';
119 llvm_unreachable(nullptr);
123 for (unsigned i
= 1, e
= MI
->getNumOperands(); i
!= e
; i
+= 2) {
124 MachineBasicBlock
*PHIBB
= MI
->getOperand(i
+ 1).getMBB();
125 if (CheckExtra
&& !Preds
.count(PHIBB
)) {
126 dbgs() << "Warning: malformed PHI in " << printMBBReference(*MBB
)
128 dbgs() << " extra input from predecessor "
129 << printMBBReference(*PHIBB
) << '\n';
130 llvm_unreachable(nullptr);
132 if (PHIBB
->getNumber() < 0) {
133 dbgs() << "Malformed PHI in " << printMBBReference(*MBB
) << ": "
135 dbgs() << " non-existing " << printMBBReference(*PHIBB
) << '\n';
136 llvm_unreachable(nullptr);
144 /// Tail duplicate the block and cleanup.
145 /// \p IsSimple - return value of isSimpleBB
146 /// \p MBB - block to be duplicated
147 /// \p ForcedLayoutPred - If non-null, treat this block as the layout
148 /// predecessor, instead of using the ordering in MF
149 /// \p DuplicatedPreds - if non-null, \p DuplicatedPreds will contain a list of
150 /// all Preds that received a copy of \p MBB.
151 /// \p RemovalCallback - if non-null, called just before MBB is deleted.
152 bool TailDuplicator::tailDuplicateAndUpdate(
153 bool IsSimple
, MachineBasicBlock
*MBB
,
154 MachineBasicBlock
*ForcedLayoutPred
,
155 SmallVectorImpl
<MachineBasicBlock
*> *DuplicatedPreds
,
156 function_ref
<void(MachineBasicBlock
*)> *RemovalCallback
) {
157 // Save the successors list.
158 SmallSetVector
<MachineBasicBlock
*, 8> Succs(MBB
->succ_begin(),
161 SmallVector
<MachineBasicBlock
*, 8> TDBBs
;
162 SmallVector
<MachineInstr
*, 16> Copies
;
163 if (!tailDuplicate(IsSimple
, MBB
, ForcedLayoutPred
, TDBBs
, Copies
))
168 SmallVector
<MachineInstr
*, 8> NewPHIs
;
169 MachineSSAUpdater
SSAUpdate(*MF
, &NewPHIs
);
171 // TailBB's immediate successors are now successors of those predecessors
172 // which duplicated TailBB. Add the predecessors as sources to the PHI
174 bool isDead
= MBB
->pred_empty() && !MBB
->hasAddressTaken();
176 updateSuccessorsPHIs(MBB
, isDead
, TDBBs
, Succs
);
178 // If it is dead, remove it.
180 NumTailDupRemoved
+= MBB
->size();
181 removeDeadBlock(MBB
, RemovalCallback
);
186 if (!SSAUpdateVRs
.empty()) {
187 for (unsigned i
= 0, e
= SSAUpdateVRs
.size(); i
!= e
; ++i
) {
188 unsigned VReg
= SSAUpdateVRs
[i
];
189 SSAUpdate
.Initialize(VReg
);
191 // If the original definition is still around, add it as an available
193 MachineInstr
*DefMI
= MRI
->getVRegDef(VReg
);
194 MachineBasicBlock
*DefBB
= nullptr;
196 DefBB
= DefMI
->getParent();
197 SSAUpdate
.AddAvailableValue(DefBB
, VReg
);
200 // Add the new vregs as available values.
201 DenseMap
<unsigned, AvailableValsTy
>::iterator LI
=
202 SSAUpdateVals
.find(VReg
);
203 for (unsigned j
= 0, ee
= LI
->second
.size(); j
!= ee
; ++j
) {
204 MachineBasicBlock
*SrcBB
= LI
->second
[j
].first
;
205 unsigned SrcReg
= LI
->second
[j
].second
;
206 SSAUpdate
.AddAvailableValue(SrcBB
, SrcReg
);
209 // Rewrite uses that are outside of the original def's block.
210 MachineRegisterInfo::use_iterator UI
= MRI
->use_begin(VReg
);
211 while (UI
!= MRI
->use_end()) {
212 MachineOperand
&UseMO
= *UI
;
213 MachineInstr
*UseMI
= UseMO
.getParent();
215 if (UseMI
->isDebugValue()) {
216 // SSAUpdate can replace the use with an undef. That creates
217 // a debug instruction that is a kill.
218 // FIXME: Should it SSAUpdate job to delete debug instructions
219 // instead of replacing the use with undef?
220 UseMI
->eraseFromParent();
223 if (UseMI
->getParent() == DefBB
&& !UseMI
->isPHI())
225 SSAUpdate
.RewriteUse(UseMO
);
229 SSAUpdateVRs
.clear();
230 SSAUpdateVals
.clear();
233 // Eliminate some of the copies inserted by tail duplication to maintain
235 for (unsigned i
= 0, e
= Copies
.size(); i
!= e
; ++i
) {
236 MachineInstr
*Copy
= Copies
[i
];
239 unsigned Dst
= Copy
->getOperand(0).getReg();
240 unsigned Src
= Copy
->getOperand(1).getReg();
241 if (MRI
->hasOneNonDBGUse(Src
) &&
242 MRI
->constrainRegClass(Src
, MRI
->getRegClass(Dst
))) {
243 // Copy is the only use. Do trivial copy propagation here.
244 MRI
->replaceRegWith(Dst
, Src
);
245 Copy
->eraseFromParent();
250 NumAddedPHIs
+= NewPHIs
.size();
253 *DuplicatedPreds
= std::move(TDBBs
);
258 /// Look for small blocks that are unconditionally branched to and do not fall
259 /// through. Tail-duplicate their instructions into their predecessors to
260 /// eliminate (dynamic) branches.
261 bool TailDuplicator::tailDuplicateBlocks() {
262 bool MadeChange
= false;
264 if (PreRegAlloc
&& TailDupVerify
) {
265 LLVM_DEBUG(dbgs() << "\n*** Before tail-duplicating\n");
266 VerifyPHIs(*MF
, true);
269 for (MachineFunction::iterator I
= ++MF
->begin(), E
= MF
->end(); I
!= E
;) {
270 MachineBasicBlock
*MBB
= &*I
++;
272 if (NumTails
== TailDupLimit
)
275 bool IsSimple
= isSimpleBB(MBB
);
277 if (!shouldTailDuplicate(IsSimple
, *MBB
))
280 MadeChange
|= tailDuplicateAndUpdate(IsSimple
, MBB
, nullptr);
283 if (PreRegAlloc
&& TailDupVerify
)
284 VerifyPHIs(*MF
, false);
289 static bool isDefLiveOut(unsigned Reg
, MachineBasicBlock
*BB
,
290 const MachineRegisterInfo
*MRI
) {
291 for (MachineInstr
&UseMI
: MRI
->use_instructions(Reg
)) {
292 if (UseMI
.isDebugValue())
294 if (UseMI
.getParent() != BB
)
300 static unsigned getPHISrcRegOpIdx(MachineInstr
*MI
, MachineBasicBlock
*SrcBB
) {
301 for (unsigned i
= 1, e
= MI
->getNumOperands(); i
!= e
; i
+= 2)
302 if (MI
->getOperand(i
+ 1).getMBB() == SrcBB
)
307 // Remember which registers are used by phis in this block. This is
308 // used to determine which registers are liveout while modifying the
309 // block (which is why we need to copy the information).
310 static void getRegsUsedByPHIs(const MachineBasicBlock
&BB
,
311 DenseSet
<unsigned> *UsedByPhi
) {
312 for (const auto &MI
: BB
) {
315 for (unsigned i
= 1, e
= MI
.getNumOperands(); i
!= e
; i
+= 2) {
316 unsigned SrcReg
= MI
.getOperand(i
).getReg();
317 UsedByPhi
->insert(SrcReg
);
322 /// Add a definition and source virtual registers pair for SSA update.
323 void TailDuplicator::addSSAUpdateEntry(unsigned OrigReg
, unsigned NewReg
,
324 MachineBasicBlock
*BB
) {
325 DenseMap
<unsigned, AvailableValsTy
>::iterator LI
=
326 SSAUpdateVals
.find(OrigReg
);
327 if (LI
!= SSAUpdateVals
.end())
328 LI
->second
.push_back(std::make_pair(BB
, NewReg
));
330 AvailableValsTy Vals
;
331 Vals
.push_back(std::make_pair(BB
, NewReg
));
332 SSAUpdateVals
.insert(std::make_pair(OrigReg
, Vals
));
333 SSAUpdateVRs
.push_back(OrigReg
);
337 /// Process PHI node in TailBB by turning it into a copy in PredBB. Remember the
338 /// source register that's contributed by PredBB and update SSA update map.
339 void TailDuplicator::processPHI(
340 MachineInstr
*MI
, MachineBasicBlock
*TailBB
, MachineBasicBlock
*PredBB
,
341 DenseMap
<unsigned, RegSubRegPair
> &LocalVRMap
,
342 SmallVectorImpl
<std::pair
<unsigned, RegSubRegPair
>> &Copies
,
343 const DenseSet
<unsigned> &RegsUsedByPhi
, bool Remove
) {
344 unsigned DefReg
= MI
->getOperand(0).getReg();
345 unsigned SrcOpIdx
= getPHISrcRegOpIdx(MI
, PredBB
);
346 assert(SrcOpIdx
&& "Unable to find matching PHI source?");
347 unsigned SrcReg
= MI
->getOperand(SrcOpIdx
).getReg();
348 unsigned SrcSubReg
= MI
->getOperand(SrcOpIdx
).getSubReg();
349 const TargetRegisterClass
*RC
= MRI
->getRegClass(DefReg
);
350 LocalVRMap
.insert(std::make_pair(DefReg
, RegSubRegPair(SrcReg
, SrcSubReg
)));
352 // Insert a copy from source to the end of the block. The def register is the
353 // available value liveout of the block.
354 unsigned NewDef
= MRI
->createVirtualRegister(RC
);
355 Copies
.push_back(std::make_pair(NewDef
, RegSubRegPair(SrcReg
, SrcSubReg
)));
356 if (isDefLiveOut(DefReg
, TailBB
, MRI
) || RegsUsedByPhi
.count(DefReg
))
357 addSSAUpdateEntry(DefReg
, NewDef
, PredBB
);
362 // Remove PredBB from the PHI node.
363 MI
->RemoveOperand(SrcOpIdx
+ 1);
364 MI
->RemoveOperand(SrcOpIdx
);
365 if (MI
->getNumOperands() == 1)
366 MI
->eraseFromParent();
369 /// Duplicate a TailBB instruction to PredBB and update
370 /// the source operands due to earlier PHI translation.
371 void TailDuplicator::duplicateInstruction(
372 MachineInstr
*MI
, MachineBasicBlock
*TailBB
, MachineBasicBlock
*PredBB
,
373 DenseMap
<unsigned, RegSubRegPair
> &LocalVRMap
,
374 const DenseSet
<unsigned> &UsedByPhi
) {
375 // Allow duplication of CFI instructions.
376 if (MI
->isCFIInstruction()) {
377 BuildMI(*PredBB
, PredBB
->end(), PredBB
->findDebugLoc(PredBB
->begin()),
378 TII
->get(TargetOpcode::CFI_INSTRUCTION
)).addCFIIndex(
379 MI
->getOperand(0).getCFIIndex());
382 MachineInstr
&NewMI
= TII
->duplicate(*PredBB
, PredBB
->end(), *MI
);
384 for (unsigned i
= 0, e
= NewMI
.getNumOperands(); i
!= e
; ++i
) {
385 MachineOperand
&MO
= NewMI
.getOperand(i
);
388 unsigned Reg
= MO
.getReg();
389 if (!TargetRegisterInfo::isVirtualRegister(Reg
))
392 const TargetRegisterClass
*RC
= MRI
->getRegClass(Reg
);
393 unsigned NewReg
= MRI
->createVirtualRegister(RC
);
395 LocalVRMap
.insert(std::make_pair(Reg
, RegSubRegPair(NewReg
, 0)));
396 if (isDefLiveOut(Reg
, TailBB
, MRI
) || UsedByPhi
.count(Reg
))
397 addSSAUpdateEntry(Reg
, NewReg
, PredBB
);
399 auto VI
= LocalVRMap
.find(Reg
);
400 if (VI
!= LocalVRMap
.end()) {
401 // Need to make sure that the register class of the mapped register
402 // will satisfy the constraints of the class of the register being
404 auto *OrigRC
= MRI
->getRegClass(Reg
);
405 auto *MappedRC
= MRI
->getRegClass(VI
->second
.Reg
);
406 const TargetRegisterClass
*ConstrRC
;
407 if (VI
->second
.SubReg
!= 0) {
408 ConstrRC
= TRI
->getMatchingSuperRegClass(MappedRC
, OrigRC
,
411 // The actual constraining (as in "find appropriate new class")
412 // is done by getMatchingSuperRegClass, so now we only need to
413 // change the class of the mapped register.
414 MRI
->setRegClass(VI
->second
.Reg
, ConstrRC
);
417 // For mapped registers that do not have sub-registers, simply
418 // restrict their class to match the original one.
419 ConstrRC
= MRI
->constrainRegClass(VI
->second
.Reg
, OrigRC
);
423 // If the class constraining succeeded, we can simply replace
424 // the old register with the mapped one.
425 MO
.setReg(VI
->second
.Reg
);
426 // We have Reg -> VI.Reg:VI.SubReg, so if Reg is used with a
427 // sub-register, we need to compose the sub-register indices.
428 MO
.setSubReg(TRI
->composeSubRegIndices(MO
.getSubReg(),
431 // The direct replacement is not possible, due to failing register
432 // class constraints. An explicit COPY is necessary. Create one
433 // that can be reused
434 auto *NewRC
= MI
->getRegClassConstraint(i
, TII
, TRI
);
435 if (NewRC
== nullptr)
437 unsigned NewReg
= MRI
->createVirtualRegister(NewRC
);
438 BuildMI(*PredBB
, MI
, MI
->getDebugLoc(),
439 TII
->get(TargetOpcode::COPY
), NewReg
)
440 .addReg(VI
->second
.Reg
, 0, VI
->second
.SubReg
);
441 LocalVRMap
.erase(VI
);
442 LocalVRMap
.insert(std::make_pair(Reg
, RegSubRegPair(NewReg
, 0)));
444 // The composed VI.Reg:VI.SubReg is replaced with NewReg, which
445 // is equivalent to the whole register Reg. Hence, Reg:subreg
446 // is same as NewReg:subreg, so keep the sub-register index
449 // Clear any kill flags from this operand. The new register could
450 // have uses after this one, so kills are not valid here.
458 /// After FromBB is tail duplicated into its predecessor blocks, the successors
459 /// have gained new predecessors. Update the PHI instructions in them
461 void TailDuplicator::updateSuccessorsPHIs(
462 MachineBasicBlock
*FromBB
, bool isDead
,
463 SmallVectorImpl
<MachineBasicBlock
*> &TDBBs
,
464 SmallSetVector
<MachineBasicBlock
*, 8> &Succs
) {
465 for (MachineBasicBlock
*SuccBB
: Succs
) {
466 for (MachineInstr
&MI
: *SuccBB
) {
469 MachineInstrBuilder
MIB(*FromBB
->getParent(), MI
);
471 for (unsigned i
= 1, e
= MI
.getNumOperands(); i
!= e
; i
+= 2) {
472 MachineOperand
&MO
= MI
.getOperand(i
+ 1);
473 if (MO
.getMBB() == FromBB
) {
480 MachineOperand
&MO0
= MI
.getOperand(Idx
);
481 unsigned Reg
= MO0
.getReg();
483 // Folded into the previous BB.
484 // There could be duplicate phi source entries. FIXME: Should sdisel
485 // or earlier pass fixed this?
486 for (unsigned i
= MI
.getNumOperands() - 2; i
!= Idx
; i
-= 2) {
487 MachineOperand
&MO
= MI
.getOperand(i
+ 1);
488 if (MO
.getMBB() == FromBB
) {
489 MI
.RemoveOperand(i
+ 1);
496 // If Idx is set, the operands at Idx and Idx+1 must be removed.
497 // We reuse the location to avoid expensive RemoveOperand calls.
499 DenseMap
<unsigned, AvailableValsTy
>::iterator LI
=
500 SSAUpdateVals
.find(Reg
);
501 if (LI
!= SSAUpdateVals
.end()) {
502 // This register is defined in the tail block.
503 for (unsigned j
= 0, ee
= LI
->second
.size(); j
!= ee
; ++j
) {
504 MachineBasicBlock
*SrcBB
= LI
->second
[j
].first
;
505 // If we didn't duplicate a bb into a particular predecessor, we
506 // might still have added an entry to SSAUpdateVals to correcly
507 // recompute SSA. If that case, avoid adding a dummy extra argument
509 if (!SrcBB
->isSuccessor(SuccBB
))
512 unsigned SrcReg
= LI
->second
[j
].second
;
514 MI
.getOperand(Idx
).setReg(SrcReg
);
515 MI
.getOperand(Idx
+ 1).setMBB(SrcBB
);
518 MIB
.addReg(SrcReg
).addMBB(SrcBB
);
522 // Live in tail block, must also be live in predecessors.
523 for (unsigned j
= 0, ee
= TDBBs
.size(); j
!= ee
; ++j
) {
524 MachineBasicBlock
*SrcBB
= TDBBs
[j
];
526 MI
.getOperand(Idx
).setReg(Reg
);
527 MI
.getOperand(Idx
+ 1).setMBB(SrcBB
);
530 MIB
.addReg(Reg
).addMBB(SrcBB
);
535 MI
.RemoveOperand(Idx
+ 1);
536 MI
.RemoveOperand(Idx
);
542 /// Determine if it is profitable to duplicate this block.
543 bool TailDuplicator::shouldTailDuplicate(bool IsSimple
,
544 MachineBasicBlock
&TailBB
) {
545 // When doing tail-duplication during layout, the block ordering is in flux,
546 // so canFallThrough returns a result based on incorrect information and
547 // should just be ignored.
548 if (!LayoutMode
&& TailBB
.canFallThrough())
551 // Don't try to tail-duplicate single-block loops.
552 if (TailBB
.isSuccessor(&TailBB
))
555 // Set the limit on the cost to duplicate. When optimizing for size,
556 // duplicate only one, because one branch instruction can be eliminated to
557 // compensate for the duplication.
558 unsigned MaxDuplicateCount
;
559 if (TailDupSize
== 0 &&
560 TailDuplicateSize
.getNumOccurrences() == 0 &&
561 MF
->getFunction().optForSize())
562 MaxDuplicateCount
= 1;
563 else if (TailDupSize
== 0)
564 MaxDuplicateCount
= TailDuplicateSize
;
566 MaxDuplicateCount
= TailDupSize
;
568 // If the block to be duplicated ends in an unanalyzable fallthrough, don't
570 // A similar check is necessary in MachineBlockPlacement to make sure pairs of
571 // blocks with unanalyzable fallthrough get layed out contiguously.
572 MachineBasicBlock
*PredTBB
= nullptr, *PredFBB
= nullptr;
573 SmallVector
<MachineOperand
, 4> PredCond
;
574 if (TII
->analyzeBranch(TailBB
, PredTBB
, PredFBB
, PredCond
) &&
575 TailBB
.canFallThrough())
578 // If the target has hardware branch prediction that can handle indirect
579 // branches, duplicating them can often make them predictable when there
580 // are common paths through the code. The limit needs to be high enough
581 // to allow undoing the effects of tail merging and other optimizations
582 // that rearrange the predecessors of the indirect branch.
584 bool HasIndirectbr
= false;
586 HasIndirectbr
= TailBB
.back().isIndirectBranch();
588 if (HasIndirectbr
&& PreRegAlloc
)
589 MaxDuplicateCount
= TailDupIndirectBranchSize
;
591 // Check the instructions in the block to determine whether tail-duplication
592 // is invalid or unlikely to be profitable.
593 unsigned InstrCount
= 0;
594 for (MachineInstr
&MI
: TailBB
) {
595 // Non-duplicable things shouldn't be tail-duplicated.
596 // CFI instructions are marked as non-duplicable, because Darwin compact
597 // unwind info emission can't handle multiple prologue setups. In case of
598 // DWARF, allow them be duplicated, so that their existence doesn't prevent
599 // tail duplication of some basic blocks, that would be duplicated otherwise.
600 if (MI
.isNotDuplicable() &&
601 (TailBB
.getParent()->getTarget().getTargetTriple().isOSDarwin() ||
602 !MI
.isCFIInstruction()))
605 // Convergent instructions can be duplicated only if doing so doesn't add
606 // new control dependencies, which is what we're going to do here.
607 if (MI
.isConvergent())
610 // Do not duplicate 'return' instructions if this is a pre-regalloc run.
611 // A return may expand into a lot more instructions (e.g. reload of callee
612 // saved registers) after PEI.
613 if (PreRegAlloc
&& MI
.isReturn())
616 // Avoid duplicating calls before register allocation. Calls presents a
617 // barrier to register allocation so duplicating them may end up increasing
619 if (PreRegAlloc
&& MI
.isCall())
622 if (!MI
.isPHI() && !MI
.isMetaInstruction())
625 if (InstrCount
> MaxDuplicateCount
)
629 // Check if any of the successors of TailBB has a PHI node in which the
630 // value corresponding to TailBB uses a subregister.
631 // If a phi node uses a register paired with a subregister, the actual
632 // "value type" of the phi may differ from the type of the register without
633 // any subregisters. Due to a bug, tail duplication may add a new operand
634 // without a necessary subregister, producing an invalid code. This is
635 // demonstrated by test/CodeGen/Hexagon/tail-dup-subreg-abort.ll.
636 // Disable tail duplication for this case for now, until the problem is
638 for (auto SB
: TailBB
.successors()) {
639 for (auto &I
: *SB
) {
642 unsigned Idx
= getPHISrcRegOpIdx(&I
, &TailBB
);
644 MachineOperand
&PU
= I
.getOperand(Idx
);
645 if (PU
.getSubReg() != 0)
650 if (HasIndirectbr
&& PreRegAlloc
)
659 return canCompletelyDuplicateBB(TailBB
);
662 /// True if this BB has only one unconditional jump.
663 bool TailDuplicator::isSimpleBB(MachineBasicBlock
*TailBB
) {
664 if (TailBB
->succ_size() != 1)
666 if (TailBB
->pred_empty())
668 MachineBasicBlock::iterator I
= TailBB
->getFirstNonDebugInstr();
669 if (I
== TailBB
->end())
671 return I
->isUnconditionalBranch();
674 static bool bothUsedInPHI(const MachineBasicBlock
&A
,
675 const SmallPtrSet
<MachineBasicBlock
*, 8> &SuccsB
) {
676 for (MachineBasicBlock
*BB
: A
.successors())
677 if (SuccsB
.count(BB
) && !BB
->empty() && BB
->begin()->isPHI())
683 bool TailDuplicator::canCompletelyDuplicateBB(MachineBasicBlock
&BB
) {
684 for (MachineBasicBlock
*PredBB
: BB
.predecessors()) {
685 if (PredBB
->succ_size() > 1)
688 MachineBasicBlock
*PredTBB
= nullptr, *PredFBB
= nullptr;
689 SmallVector
<MachineOperand
, 4> PredCond
;
690 if (TII
->analyzeBranch(*PredBB
, PredTBB
, PredFBB
, PredCond
))
693 if (!PredCond
.empty())
699 bool TailDuplicator::duplicateSimpleBB(
700 MachineBasicBlock
*TailBB
, SmallVectorImpl
<MachineBasicBlock
*> &TDBBs
,
701 const DenseSet
<unsigned> &UsedByPhi
,
702 SmallVectorImpl
<MachineInstr
*> &Copies
) {
703 SmallPtrSet
<MachineBasicBlock
*, 8> Succs(TailBB
->succ_begin(),
705 SmallVector
<MachineBasicBlock
*, 8> Preds(TailBB
->pred_begin(),
707 bool Changed
= false;
708 for (MachineBasicBlock
*PredBB
: Preds
) {
709 if (PredBB
->hasEHPadSuccessor())
712 if (bothUsedInPHI(*PredBB
, Succs
))
715 MachineBasicBlock
*PredTBB
= nullptr, *PredFBB
= nullptr;
716 SmallVector
<MachineOperand
, 4> PredCond
;
717 if (TII
->analyzeBranch(*PredBB
, PredTBB
, PredFBB
, PredCond
))
721 LLVM_DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB
722 << "From simple Succ: " << *TailBB
);
724 MachineBasicBlock
*NewTarget
= *TailBB
->succ_begin();
725 MachineBasicBlock
*NextBB
= PredBB
->getNextNode();
727 // Make PredFBB explicit.
728 if (PredCond
.empty())
731 // Make fall through explicit.
738 if (PredFBB
== TailBB
)
740 if (PredTBB
== TailBB
)
743 // Make the branch unconditional if possible
744 if (PredTBB
== PredFBB
) {
749 // Avoid adding fall through branches.
750 if (PredFBB
== NextBB
)
752 if (PredTBB
== NextBB
&& PredFBB
== nullptr)
755 auto DL
= PredBB
->findBranchDebugLoc();
756 TII
->removeBranch(*PredBB
);
758 if (!PredBB
->isSuccessor(NewTarget
))
759 PredBB
->replaceSuccessor(TailBB
, NewTarget
);
761 PredBB
->removeSuccessor(TailBB
, true);
762 assert(PredBB
->succ_size() <= 1);
766 TII
->insertBranch(*PredBB
, PredTBB
, PredFBB
, PredCond
, DL
);
768 TDBBs
.push_back(PredBB
);
773 bool TailDuplicator::canTailDuplicate(MachineBasicBlock
*TailBB
,
774 MachineBasicBlock
*PredBB
) {
775 // EH edges are ignored by analyzeBranch.
776 if (PredBB
->succ_size() > 1)
779 MachineBasicBlock
*PredTBB
= nullptr, *PredFBB
= nullptr;
780 SmallVector
<MachineOperand
, 4> PredCond
;
781 if (TII
->analyzeBranch(*PredBB
, PredTBB
, PredFBB
, PredCond
))
783 if (!PredCond
.empty())
788 /// If it is profitable, duplicate TailBB's contents in each
789 /// of its predecessors.
790 /// \p IsSimple result of isSimpleBB
791 /// \p TailBB Block to be duplicated.
792 /// \p ForcedLayoutPred When non-null, use this block as the layout predecessor
793 /// instead of the previous block in MF's order.
794 /// \p TDBBs A vector to keep track of all blocks tail-duplicated
796 /// \p Copies A vector of copy instructions inserted. Used later to
797 /// walk all the inserted copies and remove redundant ones.
798 bool TailDuplicator::tailDuplicate(bool IsSimple
, MachineBasicBlock
*TailBB
,
799 MachineBasicBlock
*ForcedLayoutPred
,
800 SmallVectorImpl
<MachineBasicBlock
*> &TDBBs
,
801 SmallVectorImpl
<MachineInstr
*> &Copies
) {
802 LLVM_DEBUG(dbgs() << "\n*** Tail-duplicating " << printMBBReference(*TailBB
)
805 DenseSet
<unsigned> UsedByPhi
;
806 getRegsUsedByPHIs(*TailBB
, &UsedByPhi
);
809 return duplicateSimpleBB(TailBB
, TDBBs
, UsedByPhi
, Copies
);
811 // Iterate through all the unique predecessors and tail-duplicate this
812 // block into them, if possible. Copying the list ahead of time also
813 // avoids trouble with the predecessor list reallocating.
814 bool Changed
= false;
815 SmallSetVector
<MachineBasicBlock
*, 8> Preds(TailBB
->pred_begin(),
817 for (MachineBasicBlock
*PredBB
: Preds
) {
818 assert(TailBB
!= PredBB
&&
819 "Single-block loop should have been rejected earlier!");
821 if (!canTailDuplicate(TailBB
, PredBB
))
824 // Don't duplicate into a fall-through predecessor (at least for now).
825 bool IsLayoutSuccessor
= false;
826 if (ForcedLayoutPred
)
827 IsLayoutSuccessor
= (ForcedLayoutPred
== PredBB
);
828 else if (PredBB
->isLayoutSuccessor(TailBB
) && PredBB
->canFallThrough())
829 IsLayoutSuccessor
= true;
830 if (IsLayoutSuccessor
)
833 LLVM_DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB
834 << "From Succ: " << *TailBB
);
836 TDBBs
.push_back(PredBB
);
838 // Remove PredBB's unconditional branch.
839 TII
->removeBranch(*PredBB
);
841 // Clone the contents of TailBB into PredBB.
842 DenseMap
<unsigned, RegSubRegPair
> LocalVRMap
;
843 SmallVector
<std::pair
<unsigned, RegSubRegPair
>, 4> CopyInfos
;
844 for (MachineBasicBlock::iterator I
= TailBB
->begin(), E
= TailBB
->end();
845 I
!= E
; /* empty */) {
846 MachineInstr
*MI
= &*I
;
849 // Replace the uses of the def of the PHI with the register coming
851 processPHI(MI
, TailBB
, PredBB
, LocalVRMap
, CopyInfos
, UsedByPhi
, true);
853 // Replace def of virtual registers with new registers, and update
854 // uses with PHI source register or the new registers.
855 duplicateInstruction(MI
, TailBB
, PredBB
, LocalVRMap
, UsedByPhi
);
858 appendCopies(PredBB
, CopyInfos
, Copies
);
861 MachineBasicBlock
*PredTBB
= nullptr, *PredFBB
= nullptr;
862 SmallVector
<MachineOperand
, 4> PredCond
;
863 TII
->analyzeBranch(*PredBB
, PredTBB
, PredFBB
, PredCond
);
865 NumTailDupAdded
+= TailBB
->size() - 1; // subtract one for removed branch
868 PredBB
->removeSuccessor(PredBB
->succ_begin());
869 assert(PredBB
->succ_empty() &&
870 "TailDuplicate called on block with multiple successors!");
871 for (MachineBasicBlock
*Succ
: TailBB
->successors())
872 PredBB
->addSuccessor(Succ
, MBPI
->getEdgeProbability(TailBB
, Succ
));
878 // If TailBB was duplicated into all its predecessors except for the prior
879 // block, which falls through unconditionally, move the contents of this
880 // block into the prior block.
881 MachineBasicBlock
*PrevBB
= ForcedLayoutPred
;
883 PrevBB
= &*std::prev(TailBB
->getIterator());
884 MachineBasicBlock
*PriorTBB
= nullptr, *PriorFBB
= nullptr;
885 SmallVector
<MachineOperand
, 4> PriorCond
;
886 // This has to check PrevBB->succ_size() because EH edges are ignored by
888 if (PrevBB
->succ_size() == 1 &&
889 // Layout preds are not always CFG preds. Check.
890 *PrevBB
->succ_begin() == TailBB
&&
891 !TII
->analyzeBranch(*PrevBB
, PriorTBB
, PriorFBB
, PriorCond
) &&
893 (!PriorTBB
|| PriorTBB
== TailBB
) &&
894 TailBB
->pred_size() == 1 &&
895 !TailBB
->hasAddressTaken()) {
896 LLVM_DEBUG(dbgs() << "\nMerging into block: " << *PrevBB
897 << "From MBB: " << *TailBB
);
898 // There may be a branch to the layout successor. This is unlikely but it
899 // happens. The correct thing to do is to remove the branch before
900 // duplicating the instructions in all cases.
901 TII
->removeBranch(*PrevBB
);
903 DenseMap
<unsigned, RegSubRegPair
> LocalVRMap
;
904 SmallVector
<std::pair
<unsigned, RegSubRegPair
>, 4> CopyInfos
;
905 MachineBasicBlock::iterator I
= TailBB
->begin();
906 // Process PHI instructions first.
907 while (I
!= TailBB
->end() && I
->isPHI()) {
908 // Replace the uses of the def of the PHI with the register coming
910 MachineInstr
*MI
= &*I
++;
911 processPHI(MI
, TailBB
, PrevBB
, LocalVRMap
, CopyInfos
, UsedByPhi
, true);
914 // Now copy the non-PHI instructions.
915 while (I
!= TailBB
->end()) {
916 // Replace def of virtual registers with new registers, and update
917 // uses with PHI source register or the new registers.
918 MachineInstr
*MI
= &*I
++;
919 assert(!MI
->isBundle() && "Not expecting bundles before regalloc!");
920 duplicateInstruction(MI
, TailBB
, PrevBB
, LocalVRMap
, UsedByPhi
);
921 MI
->eraseFromParent();
923 appendCopies(PrevBB
, CopyInfos
, Copies
);
925 TII
->removeBranch(*PrevBB
);
926 // No PHIs to worry about, just splice the instructions over.
927 PrevBB
->splice(PrevBB
->end(), TailBB
, TailBB
->begin(), TailBB
->end());
929 PrevBB
->removeSuccessor(PrevBB
->succ_begin());
930 assert(PrevBB
->succ_empty());
931 PrevBB
->transferSuccessors(TailBB
);
932 TDBBs
.push_back(PrevBB
);
936 // If this is after register allocation, there are no phis to fix.
940 // If we made no changes so far, we are safe.
944 // Handle the nasty case in that we duplicated a block that is part of a loop
945 // into some but not all of its predecessors. For example:
949 // if we duplicate 2 into 1 but not into 3, we end up with
950 // 12 -> 3 <-> 2 -> rest |
953 // If there was a "var = phi(1, 3)" in 2, it has to be ultimately replaced
954 // with a phi in 3 (which now dominates 2).
955 // What we do here is introduce a copy in 3 of the register defined by the
956 // phi, just like when we are duplicating 2 into 3, but we don't copy any
957 // real instructions or remove the 3 -> 2 edge from the phi in 2.
958 for (MachineBasicBlock
*PredBB
: Preds
) {
959 if (is_contained(TDBBs
, PredBB
))
963 if (PredBB
->succ_size() != 1)
966 DenseMap
<unsigned, RegSubRegPair
> LocalVRMap
;
967 SmallVector
<std::pair
<unsigned, RegSubRegPair
>, 4> CopyInfos
;
968 MachineBasicBlock::iterator I
= TailBB
->begin();
969 // Process PHI instructions first.
970 while (I
!= TailBB
->end() && I
->isPHI()) {
971 // Replace the uses of the def of the PHI with the register coming
973 MachineInstr
*MI
= &*I
++;
974 processPHI(MI
, TailBB
, PredBB
, LocalVRMap
, CopyInfos
, UsedByPhi
, false);
976 appendCopies(PredBB
, CopyInfos
, Copies
);
982 /// At the end of the block \p MBB generate COPY instructions between registers
983 /// described by \p CopyInfos. Append resulting instructions to \p Copies.
984 void TailDuplicator::appendCopies(MachineBasicBlock
*MBB
,
985 SmallVectorImpl
<std::pair
<unsigned,RegSubRegPair
>> &CopyInfos
,
986 SmallVectorImpl
<MachineInstr
*> &Copies
) {
987 MachineBasicBlock::iterator Loc
= MBB
->getFirstTerminator();
988 const MCInstrDesc
&CopyD
= TII
->get(TargetOpcode::COPY
);
989 for (auto &CI
: CopyInfos
) {
990 auto C
= BuildMI(*MBB
, Loc
, DebugLoc(), CopyD
, CI
.first
)
991 .addReg(CI
.second
.Reg
, 0, CI
.second
.SubReg
);
996 /// Remove the specified dead machine basic block from the function, updating
998 void TailDuplicator::removeDeadBlock(
999 MachineBasicBlock
*MBB
,
1000 function_ref
<void(MachineBasicBlock
*)> *RemovalCallback
) {
1001 assert(MBB
->pred_empty() && "MBB must be dead!");
1002 LLVM_DEBUG(dbgs() << "\nRemoving MBB: " << *MBB
);
1004 if (RemovalCallback
)
1005 (*RemovalCallback
)(MBB
);
1007 // Remove all successors.
1008 while (!MBB
->succ_empty())
1009 MBB
->removeSuccessor(MBB
->succ_end() - 1);
1011 // Remove the block.
1012 MBB
->eraseFromParent();