1 //===- TailDuplicator.cpp - Duplicate blocks into predecessors' tails -----===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This utility class duplicates basic blocks ending in unconditional branches
10 // into the tails of their predecessors.
12 //===----------------------------------------------------------------------===//
14 #include "llvm/CodeGen/TailDuplicator.h"
15 #include "llvm/ADT/DenseMap.h"
16 #include "llvm/ADT/DenseSet.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/ADT/SetVector.h"
19 #include "llvm/ADT/SmallPtrSet.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/ADT/Statistic.h"
22 #include "llvm/CodeGen/MachineBasicBlock.h"
23 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
24 #include "llvm/CodeGen/MachineFunction.h"
25 #include "llvm/CodeGen/MachineInstr.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineOperand.h"
28 #include "llvm/CodeGen/MachineRegisterInfo.h"
29 #include "llvm/CodeGen/MachineSSAUpdater.h"
30 #include "llvm/CodeGen/MachineSizeOpts.h"
31 #include "llvm/CodeGen/TargetInstrInfo.h"
32 #include "llvm/CodeGen/TargetRegisterInfo.h"
33 #include "llvm/CodeGen/TargetSubtargetInfo.h"
34 #include "llvm/IR/DebugLoc.h"
35 #include "llvm/IR/Function.h"
36 #include "llvm/Support/CommandLine.h"
37 #include "llvm/Support/Debug.h"
38 #include "llvm/Support/ErrorHandling.h"
39 #include "llvm/Support/raw_ostream.h"
40 #include "llvm/Target/TargetMachine.h"
48 #define DEBUG_TYPE "tailduplication"
50 STATISTIC(NumTails
, "Number of tails duplicated");
51 STATISTIC(NumTailDups
, "Number of tail duplicated blocks");
52 STATISTIC(NumTailDupAdded
,
53 "Number of instructions added due to tail duplication");
54 STATISTIC(NumTailDupRemoved
,
55 "Number of instructions removed due to tail duplication");
56 STATISTIC(NumDeadBlocks
, "Number of dead blocks removed");
57 STATISTIC(NumAddedPHIs
, "Number of phis added");
59 // Heuristic for tail duplication.
60 static cl::opt
<unsigned> TailDuplicateSize(
62 cl::desc("Maximum instructions to consider tail duplicating"), cl::init(2),
65 static cl::opt
<unsigned> TailDupIndirectBranchSize(
66 "tail-dup-indirect-size",
67 cl::desc("Maximum instructions to consider tail duplicating blocks that "
68 "end with indirect branches."), cl::init(20),
72 TailDupVerify("tail-dup-verify",
73 cl::desc("Verify sanity of PHI instructions during taildup"),
74 cl::init(false), cl::Hidden
);
76 static cl::opt
<unsigned> TailDupLimit("tail-dup-limit", cl::init(~0U),
79 void TailDuplicator::initMF(MachineFunction
&MFin
, bool PreRegAlloc
,
80 const MachineBranchProbabilityInfo
*MBPIin
,
82 ProfileSummaryInfo
*PSIin
,
83 bool LayoutModeIn
, unsigned TailDupSizeIn
) {
85 TII
= MF
->getSubtarget().getInstrInfo();
86 TRI
= MF
->getSubtarget().getRegisterInfo();
87 MRI
= &MF
->getRegInfo();
92 TailDupSize
= TailDupSizeIn
;
94 assert(MBPI
!= nullptr && "Machine Branch Probability Info required");
96 LayoutMode
= LayoutModeIn
;
97 this->PreRegAlloc
= PreRegAlloc
;
100 static void VerifyPHIs(MachineFunction
&MF
, bool CheckExtra
) {
101 for (MachineBasicBlock
&MBB
: llvm::drop_begin(MF
)) {
102 SmallSetVector
<MachineBasicBlock
*, 8> Preds(MBB
.pred_begin(),
104 MachineBasicBlock::iterator MI
= MBB
.begin();
105 while (MI
!= MBB
.end()) {
108 for (MachineBasicBlock
*PredBB
: Preds
) {
110 for (unsigned i
= 1, e
= MI
->getNumOperands(); i
!= e
; i
+= 2) {
111 MachineBasicBlock
*PHIBB
= MI
->getOperand(i
+ 1).getMBB();
112 if (PHIBB
== PredBB
) {
118 dbgs() << "Malformed PHI in " << printMBBReference(MBB
) << ": "
120 dbgs() << " missing input from predecessor "
121 << printMBBReference(*PredBB
) << '\n';
122 llvm_unreachable(nullptr);
126 for (unsigned i
= 1, e
= MI
->getNumOperands(); i
!= e
; i
+= 2) {
127 MachineBasicBlock
*PHIBB
= MI
->getOperand(i
+ 1).getMBB();
128 if (CheckExtra
&& !Preds
.count(PHIBB
)) {
129 dbgs() << "Warning: malformed PHI in " << printMBBReference(MBB
)
131 dbgs() << " extra input from predecessor "
132 << printMBBReference(*PHIBB
) << '\n';
133 llvm_unreachable(nullptr);
135 if (PHIBB
->getNumber() < 0) {
136 dbgs() << "Malformed PHI in " << printMBBReference(MBB
) << ": "
138 dbgs() << " non-existing " << printMBBReference(*PHIBB
) << '\n';
139 llvm_unreachable(nullptr);
147 /// Tail duplicate the block and cleanup.
148 /// \p IsSimple - return value of isSimpleBB
149 /// \p MBB - block to be duplicated
150 /// \p ForcedLayoutPred - If non-null, treat this block as the layout
151 /// predecessor, instead of using the ordering in MF
152 /// \p DuplicatedPreds - if non-null, \p DuplicatedPreds will contain a list of
153 /// all Preds that received a copy of \p MBB.
154 /// \p RemovalCallback - if non-null, called just before MBB is deleted.
155 bool TailDuplicator::tailDuplicateAndUpdate(
156 bool IsSimple
, MachineBasicBlock
*MBB
,
157 MachineBasicBlock
*ForcedLayoutPred
,
158 SmallVectorImpl
<MachineBasicBlock
*> *DuplicatedPreds
,
159 function_ref
<void(MachineBasicBlock
*)> *RemovalCallback
,
160 SmallVectorImpl
<MachineBasicBlock
*> *CandidatePtr
) {
161 // Save the successors list.
162 SmallSetVector
<MachineBasicBlock
*, 8> Succs(MBB
->succ_begin(),
165 SmallVector
<MachineBasicBlock
*, 8> TDBBs
;
166 SmallVector
<MachineInstr
*, 16> Copies
;
167 if (!tailDuplicate(IsSimple
, MBB
, ForcedLayoutPred
,
168 TDBBs
, Copies
, CandidatePtr
))
173 SmallVector
<MachineInstr
*, 8> NewPHIs
;
174 MachineSSAUpdater
SSAUpdate(*MF
, &NewPHIs
);
176 // TailBB's immediate successors are now successors of those predecessors
177 // which duplicated TailBB. Add the predecessors as sources to the PHI
179 bool isDead
= MBB
->pred_empty() && !MBB
->hasAddressTaken();
181 updateSuccessorsPHIs(MBB
, isDead
, TDBBs
, Succs
);
183 // If it is dead, remove it.
185 NumTailDupRemoved
+= MBB
->size();
186 removeDeadBlock(MBB
, RemovalCallback
);
191 if (!SSAUpdateVRs
.empty()) {
192 for (unsigned i
= 0, e
= SSAUpdateVRs
.size(); i
!= e
; ++i
) {
193 unsigned VReg
= SSAUpdateVRs
[i
];
194 SSAUpdate
.Initialize(VReg
);
196 // If the original definition is still around, add it as an available
198 MachineInstr
*DefMI
= MRI
->getVRegDef(VReg
);
199 MachineBasicBlock
*DefBB
= nullptr;
201 DefBB
= DefMI
->getParent();
202 SSAUpdate
.AddAvailableValue(DefBB
, VReg
);
205 // Add the new vregs as available values.
206 DenseMap
<Register
, AvailableValsTy
>::iterator LI
=
207 SSAUpdateVals
.find(VReg
);
208 for (std::pair
<MachineBasicBlock
*, Register
> &J
: LI
->second
) {
209 MachineBasicBlock
*SrcBB
= J
.first
;
210 Register SrcReg
= J
.second
;
211 SSAUpdate
.AddAvailableValue(SrcBB
, SrcReg
);
214 SmallVector
<MachineOperand
*> DebugUses
;
215 // Rewrite uses that are outside of the original def's block.
216 for (MachineOperand
&UseMO
:
217 llvm::make_early_inc_range(MRI
->use_operands(VReg
))) {
218 MachineInstr
*UseMI
= UseMO
.getParent();
219 // Rewrite debug uses last so that they can take advantage of any
220 // register mappings introduced by other users in its BB, since we
221 // cannot create new register definitions specifically for the debug
222 // instruction (as debug instructions should not affect CodeGen).
223 if (UseMI
->isDebugValue()) {
224 DebugUses
.push_back(&UseMO
);
227 if (UseMI
->getParent() == DefBB
&& !UseMI
->isPHI())
229 SSAUpdate
.RewriteUse(UseMO
);
231 for (auto *UseMO
: DebugUses
) {
232 MachineInstr
*UseMI
= UseMO
->getParent();
234 SSAUpdate
.GetValueInMiddleOfBlock(UseMI
->getParent(), true));
238 SSAUpdateVRs
.clear();
239 SSAUpdateVals
.clear();
242 // Eliminate some of the copies inserted by tail duplication to maintain
244 for (unsigned i
= 0, e
= Copies
.size(); i
!= e
; ++i
) {
245 MachineInstr
*Copy
= Copies
[i
];
248 Register Dst
= Copy
->getOperand(0).getReg();
249 Register Src
= Copy
->getOperand(1).getReg();
250 if (MRI
->hasOneNonDBGUse(Src
) &&
251 MRI
->constrainRegClass(Src
, MRI
->getRegClass(Dst
))) {
252 // Copy is the only use. Do trivial copy propagation here.
253 MRI
->replaceRegWith(Dst
, Src
);
254 Copy
->eraseFromParent();
259 NumAddedPHIs
+= NewPHIs
.size();
262 *DuplicatedPreds
= std::move(TDBBs
);
267 /// Look for small blocks that are unconditionally branched to and do not fall
268 /// through. Tail-duplicate their instructions into their predecessors to
269 /// eliminate (dynamic) branches.
270 bool TailDuplicator::tailDuplicateBlocks() {
271 bool MadeChange
= false;
273 if (PreRegAlloc
&& TailDupVerify
) {
274 LLVM_DEBUG(dbgs() << "\n*** Before tail-duplicating\n");
275 VerifyPHIs(*MF
, true);
278 for (MachineBasicBlock
&MBB
:
279 llvm::make_early_inc_range(llvm::drop_begin(*MF
))) {
280 if (NumTails
== TailDupLimit
)
283 bool IsSimple
= isSimpleBB(&MBB
);
285 if (!shouldTailDuplicate(IsSimple
, MBB
))
288 MadeChange
|= tailDuplicateAndUpdate(IsSimple
, &MBB
, nullptr);
291 if (PreRegAlloc
&& TailDupVerify
)
292 VerifyPHIs(*MF
, false);
297 static bool isDefLiveOut(Register Reg
, MachineBasicBlock
*BB
,
298 const MachineRegisterInfo
*MRI
) {
299 for (MachineInstr
&UseMI
: MRI
->use_instructions(Reg
)) {
300 if (UseMI
.isDebugValue())
302 if (UseMI
.getParent() != BB
)
308 static unsigned getPHISrcRegOpIdx(MachineInstr
*MI
, MachineBasicBlock
*SrcBB
) {
309 for (unsigned i
= 1, e
= MI
->getNumOperands(); i
!= e
; i
+= 2)
310 if (MI
->getOperand(i
+ 1).getMBB() == SrcBB
)
315 // Remember which registers are used by phis in this block. This is
316 // used to determine which registers are liveout while modifying the
317 // block (which is why we need to copy the information).
318 static void getRegsUsedByPHIs(const MachineBasicBlock
&BB
,
319 DenseSet
<Register
> *UsedByPhi
) {
320 for (const auto &MI
: BB
) {
323 for (unsigned i
= 1, e
= MI
.getNumOperands(); i
!= e
; i
+= 2) {
324 Register SrcReg
= MI
.getOperand(i
).getReg();
325 UsedByPhi
->insert(SrcReg
);
330 /// Add a definition and source virtual registers pair for SSA update.
331 void TailDuplicator::addSSAUpdateEntry(Register OrigReg
, Register NewReg
,
332 MachineBasicBlock
*BB
) {
333 DenseMap
<Register
, AvailableValsTy
>::iterator LI
=
334 SSAUpdateVals
.find(OrigReg
);
335 if (LI
!= SSAUpdateVals
.end())
336 LI
->second
.push_back(std::make_pair(BB
, NewReg
));
338 AvailableValsTy Vals
;
339 Vals
.push_back(std::make_pair(BB
, NewReg
));
340 SSAUpdateVals
.insert(std::make_pair(OrigReg
, Vals
));
341 SSAUpdateVRs
.push_back(OrigReg
);
345 /// Process PHI node in TailBB by turning it into a copy in PredBB. Remember the
346 /// source register that's contributed by PredBB and update SSA update map.
347 void TailDuplicator::processPHI(
348 MachineInstr
*MI
, MachineBasicBlock
*TailBB
, MachineBasicBlock
*PredBB
,
349 DenseMap
<Register
, RegSubRegPair
> &LocalVRMap
,
350 SmallVectorImpl
<std::pair
<Register
, RegSubRegPair
>> &Copies
,
351 const DenseSet
<Register
> &RegsUsedByPhi
, bool Remove
) {
352 Register DefReg
= MI
->getOperand(0).getReg();
353 unsigned SrcOpIdx
= getPHISrcRegOpIdx(MI
, PredBB
);
354 assert(SrcOpIdx
&& "Unable to find matching PHI source?");
355 Register SrcReg
= MI
->getOperand(SrcOpIdx
).getReg();
356 unsigned SrcSubReg
= MI
->getOperand(SrcOpIdx
).getSubReg();
357 const TargetRegisterClass
*RC
= MRI
->getRegClass(DefReg
);
358 LocalVRMap
.insert(std::make_pair(DefReg
, RegSubRegPair(SrcReg
, SrcSubReg
)));
360 // Insert a copy from source to the end of the block. The def register is the
361 // available value liveout of the block.
362 Register NewDef
= MRI
->createVirtualRegister(RC
);
363 Copies
.push_back(std::make_pair(NewDef
, RegSubRegPair(SrcReg
, SrcSubReg
)));
364 if (isDefLiveOut(DefReg
, TailBB
, MRI
) || RegsUsedByPhi
.count(DefReg
))
365 addSSAUpdateEntry(DefReg
, NewDef
, PredBB
);
370 // Remove PredBB from the PHI node.
371 MI
->removeOperand(SrcOpIdx
+ 1);
372 MI
->removeOperand(SrcOpIdx
);
373 if (MI
->getNumOperands() == 1 && !TailBB
->hasAddressTaken())
374 MI
->eraseFromParent();
375 else if (MI
->getNumOperands() == 1)
376 MI
->setDesc(TII
->get(TargetOpcode::IMPLICIT_DEF
));
379 /// Duplicate a TailBB instruction to PredBB and update
380 /// the source operands due to earlier PHI translation.
381 void TailDuplicator::duplicateInstruction(
382 MachineInstr
*MI
, MachineBasicBlock
*TailBB
, MachineBasicBlock
*PredBB
,
383 DenseMap
<Register
, RegSubRegPair
> &LocalVRMap
,
384 const DenseSet
<Register
> &UsedByPhi
) {
385 // Allow duplication of CFI instructions.
386 if (MI
->isCFIInstruction()) {
387 BuildMI(*PredBB
, PredBB
->end(), PredBB
->findDebugLoc(PredBB
->begin()),
388 TII
->get(TargetOpcode::CFI_INSTRUCTION
))
389 .addCFIIndex(MI
->getOperand(0).getCFIIndex())
390 .setMIFlags(MI
->getFlags());
393 MachineInstr
&NewMI
= TII
->duplicate(*PredBB
, PredBB
->end(), *MI
);
395 for (unsigned i
= 0, e
= NewMI
.getNumOperands(); i
!= e
; ++i
) {
396 MachineOperand
&MO
= NewMI
.getOperand(i
);
399 Register Reg
= MO
.getReg();
400 if (!Reg
.isVirtual())
403 const TargetRegisterClass
*RC
= MRI
->getRegClass(Reg
);
404 Register NewReg
= MRI
->createVirtualRegister(RC
);
406 LocalVRMap
.insert(std::make_pair(Reg
, RegSubRegPair(NewReg
, 0)));
407 if (isDefLiveOut(Reg
, TailBB
, MRI
) || UsedByPhi
.count(Reg
))
408 addSSAUpdateEntry(Reg
, NewReg
, PredBB
);
410 auto VI
= LocalVRMap
.find(Reg
);
411 if (VI
!= LocalVRMap
.end()) {
412 // Need to make sure that the register class of the mapped register
413 // will satisfy the constraints of the class of the register being
415 auto *OrigRC
= MRI
->getRegClass(Reg
);
416 auto *MappedRC
= MRI
->getRegClass(VI
->second
.Reg
);
417 const TargetRegisterClass
*ConstrRC
;
418 if (VI
->second
.SubReg
!= 0) {
419 ConstrRC
= TRI
->getMatchingSuperRegClass(MappedRC
, OrigRC
,
422 // The actual constraining (as in "find appropriate new class")
423 // is done by getMatchingSuperRegClass, so now we only need to
424 // change the class of the mapped register.
425 MRI
->setRegClass(VI
->second
.Reg
, ConstrRC
);
428 // For mapped registers that do not have sub-registers, simply
429 // restrict their class to match the original one.
431 // We don't want debug instructions affecting the resulting code so
432 // if we're cloning a debug instruction then just use MappedRC
433 // rather than constraining the register class further.
434 ConstrRC
= NewMI
.isDebugInstr()
436 : MRI
->constrainRegClass(VI
->second
.Reg
, OrigRC
);
440 // If the class constraining succeeded, we can simply replace
441 // the old register with the mapped one.
442 MO
.setReg(VI
->second
.Reg
);
443 // We have Reg -> VI.Reg:VI.SubReg, so if Reg is used with a
444 // sub-register, we need to compose the sub-register indices.
446 TRI
->composeSubRegIndices(VI
->second
.SubReg
, MO
.getSubReg()));
448 // The direct replacement is not possible, due to failing register
449 // class constraints. An explicit COPY is necessary. Create one
450 // that can be reused.
451 Register NewReg
= MRI
->createVirtualRegister(OrigRC
);
452 BuildMI(*PredBB
, NewMI
, NewMI
.getDebugLoc(),
453 TII
->get(TargetOpcode::COPY
), NewReg
)
454 .addReg(VI
->second
.Reg
, 0, VI
->second
.SubReg
);
455 LocalVRMap
.erase(VI
);
456 LocalVRMap
.insert(std::make_pair(Reg
, RegSubRegPair(NewReg
, 0)));
458 // The composed VI.Reg:VI.SubReg is replaced with NewReg, which
459 // is equivalent to the whole register Reg. Hence, Reg:subreg
460 // is same as NewReg:subreg, so keep the sub-register index
463 // Clear any kill flags from this operand. The new register could
464 // have uses after this one, so kills are not valid here.
472 /// After FromBB is tail duplicated into its predecessor blocks, the successors
473 /// have gained new predecessors. Update the PHI instructions in them
475 void TailDuplicator::updateSuccessorsPHIs(
476 MachineBasicBlock
*FromBB
, bool isDead
,
477 SmallVectorImpl
<MachineBasicBlock
*> &TDBBs
,
478 SmallSetVector
<MachineBasicBlock
*, 8> &Succs
) {
479 for (MachineBasicBlock
*SuccBB
: Succs
) {
480 for (MachineInstr
&MI
: *SuccBB
) {
483 MachineInstrBuilder
MIB(*FromBB
->getParent(), MI
);
485 for (unsigned i
= 1, e
= MI
.getNumOperands(); i
!= e
; i
+= 2) {
486 MachineOperand
&MO
= MI
.getOperand(i
+ 1);
487 if (MO
.getMBB() == FromBB
) {
494 MachineOperand
&MO0
= MI
.getOperand(Idx
);
495 Register Reg
= MO0
.getReg();
497 // Folded into the previous BB.
498 // There could be duplicate phi source entries. FIXME: Should sdisel
499 // or earlier pass fixed this?
500 for (unsigned i
= MI
.getNumOperands() - 2; i
!= Idx
; i
-= 2) {
501 MachineOperand
&MO
= MI
.getOperand(i
+ 1);
502 if (MO
.getMBB() == FromBB
) {
503 MI
.removeOperand(i
+ 1);
510 // If Idx is set, the operands at Idx and Idx+1 must be removed.
511 // We reuse the location to avoid expensive removeOperand calls.
513 DenseMap
<Register
, AvailableValsTy
>::iterator LI
=
514 SSAUpdateVals
.find(Reg
);
515 if (LI
!= SSAUpdateVals
.end()) {
516 // This register is defined in the tail block.
517 for (const std::pair
<MachineBasicBlock
*, Register
> &J
: LI
->second
) {
518 MachineBasicBlock
*SrcBB
= J
.first
;
519 // If we didn't duplicate a bb into a particular predecessor, we
520 // might still have added an entry to SSAUpdateVals to correcly
521 // recompute SSA. If that case, avoid adding a dummy extra argument
523 if (!SrcBB
->isSuccessor(SuccBB
))
526 Register SrcReg
= J
.second
;
528 MI
.getOperand(Idx
).setReg(SrcReg
);
529 MI
.getOperand(Idx
+ 1).setMBB(SrcBB
);
532 MIB
.addReg(SrcReg
).addMBB(SrcBB
);
536 // Live in tail block, must also be live in predecessors.
537 for (MachineBasicBlock
*SrcBB
: TDBBs
) {
539 MI
.getOperand(Idx
).setReg(Reg
);
540 MI
.getOperand(Idx
+ 1).setMBB(SrcBB
);
543 MIB
.addReg(Reg
).addMBB(SrcBB
);
548 MI
.removeOperand(Idx
+ 1);
549 MI
.removeOperand(Idx
);
555 /// Determine if it is profitable to duplicate this block.
556 bool TailDuplicator::shouldTailDuplicate(bool IsSimple
,
557 MachineBasicBlock
&TailBB
) {
558 // When doing tail-duplication during layout, the block ordering is in flux,
559 // so canFallThrough returns a result based on incorrect information and
560 // should just be ignored.
561 if (!LayoutMode
&& TailBB
.canFallThrough())
564 // Don't try to tail-duplicate single-block loops.
565 if (TailBB
.isSuccessor(&TailBB
))
568 // Set the limit on the cost to duplicate. When optimizing for size,
569 // duplicate only one, because one branch instruction can be eliminated to
570 // compensate for the duplication.
571 unsigned MaxDuplicateCount
;
572 bool OptForSize
= MF
->getFunction().hasOptSize() ||
573 llvm::shouldOptimizeForSize(&TailBB
, PSI
, MBFI
);
574 if (TailDupSize
== 0)
575 MaxDuplicateCount
= TailDuplicateSize
;
577 MaxDuplicateCount
= TailDupSize
;
579 MaxDuplicateCount
= 1;
581 // If the block to be duplicated ends in an unanalyzable fallthrough, don't
583 // A similar check is necessary in MachineBlockPlacement to make sure pairs of
584 // blocks with unanalyzable fallthrough get layed out contiguously.
585 MachineBasicBlock
*PredTBB
= nullptr, *PredFBB
= nullptr;
586 SmallVector
<MachineOperand
, 4> PredCond
;
587 if (TII
->analyzeBranch(TailBB
, PredTBB
, PredFBB
, PredCond
) &&
588 TailBB
.canFallThrough())
591 // If the target has hardware branch prediction that can handle indirect
592 // branches, duplicating them can often make them predictable when there
593 // are common paths through the code. The limit needs to be high enough
594 // to allow undoing the effects of tail merging and other optimizations
595 // that rearrange the predecessors of the indirect branch.
597 bool HasIndirectbr
= false;
599 HasIndirectbr
= TailBB
.back().isIndirectBranch();
601 if (HasIndirectbr
&& PreRegAlloc
)
602 MaxDuplicateCount
= TailDupIndirectBranchSize
;
604 // Check the instructions in the block to determine whether tail-duplication
605 // is invalid or unlikely to be profitable.
606 unsigned InstrCount
= 0;
607 for (MachineInstr
&MI
: TailBB
) {
608 // Non-duplicable things shouldn't be tail-duplicated.
609 // CFI instructions are marked as non-duplicable, because Darwin compact
610 // unwind info emission can't handle multiple prologue setups. In case of
611 // DWARF, allow them be duplicated, so that their existence doesn't prevent
612 // tail duplication of some basic blocks, that would be duplicated otherwise.
613 if (MI
.isNotDuplicable() &&
614 (TailBB
.getParent()->getTarget().getTargetTriple().isOSDarwin() ||
615 !MI
.isCFIInstruction()))
618 // Convergent instructions can be duplicated only if doing so doesn't add
619 // new control dependencies, which is what we're going to do here.
620 if (MI
.isConvergent())
623 // Do not duplicate 'return' instructions if this is a pre-regalloc run.
624 // A return may expand into a lot more instructions (e.g. reload of callee
625 // saved registers) after PEI.
626 if (PreRegAlloc
&& MI
.isReturn())
629 // Avoid duplicating calls before register allocation. Calls presents a
630 // barrier to register allocation so duplicating them may end up increasing
632 if (PreRegAlloc
&& MI
.isCall())
635 // TailDuplicator::appendCopies will erroneously place COPYs after
636 // INLINEASM_BR instructions after 4b0aa5724fea, which demonstrates the same
637 // bug that was fixed in f7a53d82c090.
638 // FIXME: Use findPHICopyInsertPoint() to find the correct insertion point
639 // for the COPY when replacing PHIs.
640 if (MI
.getOpcode() == TargetOpcode::INLINEASM_BR
)
644 InstrCount
+= MI
.getBundleSize();
645 else if (!MI
.isPHI() && !MI
.isMetaInstruction())
648 if (InstrCount
> MaxDuplicateCount
)
652 // Check if any of the successors of TailBB has a PHI node in which the
653 // value corresponding to TailBB uses a subregister.
654 // If a phi node uses a register paired with a subregister, the actual
655 // "value type" of the phi may differ from the type of the register without
656 // any subregisters. Due to a bug, tail duplication may add a new operand
657 // without a necessary subregister, producing an invalid code. This is
658 // demonstrated by test/CodeGen/Hexagon/tail-dup-subreg-abort.ll.
659 // Disable tail duplication for this case for now, until the problem is
661 for (auto *SB
: TailBB
.successors()) {
662 for (auto &I
: *SB
) {
665 unsigned Idx
= getPHISrcRegOpIdx(&I
, &TailBB
);
667 MachineOperand
&PU
= I
.getOperand(Idx
);
668 if (PU
.getSubReg() != 0)
673 if (HasIndirectbr
&& PreRegAlloc
)
682 return canCompletelyDuplicateBB(TailBB
);
685 /// True if this BB has only one unconditional jump.
686 bool TailDuplicator::isSimpleBB(MachineBasicBlock
*TailBB
) {
687 if (TailBB
->succ_size() != 1)
689 if (TailBB
->pred_empty())
691 MachineBasicBlock::iterator I
= TailBB
->getFirstNonDebugInstr(true);
692 if (I
== TailBB
->end())
694 return I
->isUnconditionalBranch();
697 static bool bothUsedInPHI(const MachineBasicBlock
&A
,
698 const SmallPtrSet
<MachineBasicBlock
*, 8> &SuccsB
) {
699 for (MachineBasicBlock
*BB
: A
.successors())
700 if (SuccsB
.count(BB
) && !BB
->empty() && BB
->begin()->isPHI())
706 bool TailDuplicator::canCompletelyDuplicateBB(MachineBasicBlock
&BB
) {
707 for (MachineBasicBlock
*PredBB
: BB
.predecessors()) {
708 if (PredBB
->succ_size() > 1)
711 MachineBasicBlock
*PredTBB
= nullptr, *PredFBB
= nullptr;
712 SmallVector
<MachineOperand
, 4> PredCond
;
713 if (TII
->analyzeBranch(*PredBB
, PredTBB
, PredFBB
, PredCond
))
716 if (!PredCond
.empty())
722 bool TailDuplicator::duplicateSimpleBB(
723 MachineBasicBlock
*TailBB
, SmallVectorImpl
<MachineBasicBlock
*> &TDBBs
,
724 const DenseSet
<Register
> &UsedByPhi
) {
725 SmallPtrSet
<MachineBasicBlock
*, 8> Succs(TailBB
->succ_begin(),
727 SmallVector
<MachineBasicBlock
*, 8> Preds(TailBB
->predecessors());
728 bool Changed
= false;
729 for (MachineBasicBlock
*PredBB
: Preds
) {
730 if (PredBB
->hasEHPadSuccessor() || PredBB
->mayHaveInlineAsmBr())
733 if (bothUsedInPHI(*PredBB
, Succs
))
736 MachineBasicBlock
*PredTBB
= nullptr, *PredFBB
= nullptr;
737 SmallVector
<MachineOperand
, 4> PredCond
;
738 if (TII
->analyzeBranch(*PredBB
, PredTBB
, PredFBB
, PredCond
))
742 LLVM_DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB
743 << "From simple Succ: " << *TailBB
);
745 MachineBasicBlock
*NewTarget
= *TailBB
->succ_begin();
746 MachineBasicBlock
*NextBB
= PredBB
->getNextNode();
748 // Make PredFBB explicit.
749 if (PredCond
.empty())
752 // Make fall through explicit.
759 if (PredFBB
== TailBB
)
761 if (PredTBB
== TailBB
)
764 // Make the branch unconditional if possible
765 if (PredTBB
== PredFBB
) {
770 // Avoid adding fall through branches.
771 if (PredFBB
== NextBB
)
773 if (PredTBB
== NextBB
&& PredFBB
== nullptr)
776 auto DL
= PredBB
->findBranchDebugLoc();
777 TII
->removeBranch(*PredBB
);
779 if (!PredBB
->isSuccessor(NewTarget
))
780 PredBB
->replaceSuccessor(TailBB
, NewTarget
);
782 PredBB
->removeSuccessor(TailBB
, true);
783 assert(PredBB
->succ_size() <= 1);
787 TII
->insertBranch(*PredBB
, PredTBB
, PredFBB
, PredCond
, DL
);
789 TDBBs
.push_back(PredBB
);
794 bool TailDuplicator::canTailDuplicate(MachineBasicBlock
*TailBB
,
795 MachineBasicBlock
*PredBB
) {
796 // EH edges are ignored by analyzeBranch.
797 if (PredBB
->succ_size() > 1)
800 MachineBasicBlock
*PredTBB
= nullptr, *PredFBB
= nullptr;
801 SmallVector
<MachineOperand
, 4> PredCond
;
802 if (TII
->analyzeBranch(*PredBB
, PredTBB
, PredFBB
, PredCond
))
804 if (!PredCond
.empty())
806 // FIXME: This is overly conservative; it may be ok to relax this in the
807 // future under more specific conditions. If TailBB is an INLINEASM_BR
808 // indirect target, we need to see if the edge from PredBB to TailBB is from
809 // an INLINEASM_BR in PredBB, and then also if that edge was from the
810 // indirect target list, fallthrough/default target, or potentially both. If
811 // it's both, TailDuplicator::tailDuplicate will remove the edge, corrupting
812 // the successor list in PredBB and predecessor list in TailBB.
813 if (TailBB
->isInlineAsmBrIndirectTarget())
818 /// If it is profitable, duplicate TailBB's contents in each
819 /// of its predecessors.
820 /// \p IsSimple result of isSimpleBB
821 /// \p TailBB Block to be duplicated.
822 /// \p ForcedLayoutPred When non-null, use this block as the layout predecessor
823 /// instead of the previous block in MF's order.
824 /// \p TDBBs A vector to keep track of all blocks tail-duplicated
826 /// \p Copies A vector of copy instructions inserted. Used later to
827 /// walk all the inserted copies and remove redundant ones.
828 bool TailDuplicator::tailDuplicate(bool IsSimple
, MachineBasicBlock
*TailBB
,
829 MachineBasicBlock
*ForcedLayoutPred
,
830 SmallVectorImpl
<MachineBasicBlock
*> &TDBBs
,
831 SmallVectorImpl
<MachineInstr
*> &Copies
,
832 SmallVectorImpl
<MachineBasicBlock
*> *CandidatePtr
) {
833 LLVM_DEBUG(dbgs() << "\n*** Tail-duplicating " << printMBBReference(*TailBB
)
836 bool ShouldUpdateTerminators
= TailBB
->canFallThrough();
838 DenseSet
<Register
> UsedByPhi
;
839 getRegsUsedByPHIs(*TailBB
, &UsedByPhi
);
842 return duplicateSimpleBB(TailBB
, TDBBs
, UsedByPhi
);
844 // Iterate through all the unique predecessors and tail-duplicate this
845 // block into them, if possible. Copying the list ahead of time also
846 // avoids trouble with the predecessor list reallocating.
847 bool Changed
= false;
848 SmallSetVector
<MachineBasicBlock
*, 8> Preds
;
850 Preds
.insert(CandidatePtr
->begin(), CandidatePtr
->end());
852 Preds
.insert(TailBB
->pred_begin(), TailBB
->pred_end());
854 for (MachineBasicBlock
*PredBB
: Preds
) {
855 assert(TailBB
!= PredBB
&&
856 "Single-block loop should have been rejected earlier!");
858 if (!canTailDuplicate(TailBB
, PredBB
))
861 // Don't duplicate into a fall-through predecessor (at least for now).
862 // If profile is available, findDuplicateCandidates can choose better
863 // fall-through predecessor.
864 if (!(MF
->getFunction().hasProfileData() && LayoutMode
)) {
865 bool IsLayoutSuccessor
= false;
866 if (ForcedLayoutPred
)
867 IsLayoutSuccessor
= (ForcedLayoutPred
== PredBB
);
868 else if (PredBB
->isLayoutSuccessor(TailBB
) && PredBB
->canFallThrough())
869 IsLayoutSuccessor
= true;
870 if (IsLayoutSuccessor
)
874 LLVM_DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB
875 << "From Succ: " << *TailBB
);
877 TDBBs
.push_back(PredBB
);
879 // Remove PredBB's unconditional branch.
880 TII
->removeBranch(*PredBB
);
882 // Clone the contents of TailBB into PredBB.
883 DenseMap
<Register
, RegSubRegPair
> LocalVRMap
;
884 SmallVector
<std::pair
<Register
, RegSubRegPair
>, 4> CopyInfos
;
885 for (MachineInstr
&MI
: llvm::make_early_inc_range(*TailBB
)) {
887 // Replace the uses of the def of the PHI with the register coming
889 processPHI(&MI
, TailBB
, PredBB
, LocalVRMap
, CopyInfos
, UsedByPhi
, true);
891 // Replace def of virtual registers with new registers, and update
892 // uses with PHI source register or the new registers.
893 duplicateInstruction(&MI
, TailBB
, PredBB
, LocalVRMap
, UsedByPhi
);
896 appendCopies(PredBB
, CopyInfos
, Copies
);
898 NumTailDupAdded
+= TailBB
->size() - 1; // subtract one for removed branch
901 PredBB
->removeSuccessor(PredBB
->succ_begin());
902 assert(PredBB
->succ_empty() &&
903 "TailDuplicate called on block with multiple successors!");
904 for (MachineBasicBlock
*Succ
: TailBB
->successors())
905 PredBB
->addSuccessor(Succ
, MBPI
->getEdgeProbability(TailBB
, Succ
));
907 // Update branches in pred to jump to tail's layout successor if needed.
908 if (ShouldUpdateTerminators
)
909 PredBB
->updateTerminator(TailBB
->getNextNode());
915 // If TailBB was duplicated into all its predecessors except for the prior
916 // block, which falls through unconditionally, move the contents of this
917 // block into the prior block.
918 MachineBasicBlock
*PrevBB
= ForcedLayoutPred
;
920 PrevBB
= &*std::prev(TailBB
->getIterator());
921 MachineBasicBlock
*PriorTBB
= nullptr, *PriorFBB
= nullptr;
922 SmallVector
<MachineOperand
, 4> PriorCond
;
923 // This has to check PrevBB->succ_size() because EH edges are ignored by
925 if (PrevBB
->succ_size() == 1 &&
926 // Layout preds are not always CFG preds. Check.
927 *PrevBB
->succ_begin() == TailBB
&&
928 !TII
->analyzeBranch(*PrevBB
, PriorTBB
, PriorFBB
, PriorCond
) &&
930 (!PriorTBB
|| PriorTBB
== TailBB
) &&
931 TailBB
->pred_size() == 1 &&
932 !TailBB
->hasAddressTaken()) {
933 LLVM_DEBUG(dbgs() << "\nMerging into block: " << *PrevBB
934 << "From MBB: " << *TailBB
);
935 // There may be a branch to the layout successor. This is unlikely but it
936 // happens. The correct thing to do is to remove the branch before
937 // duplicating the instructions in all cases.
938 bool RemovedBranches
= TII
->removeBranch(*PrevBB
) != 0;
940 // If there are still tail instructions, abort the merge
941 if (PrevBB
->getFirstTerminator() == PrevBB
->end()) {
943 DenseMap
<Register
, RegSubRegPair
> LocalVRMap
;
944 SmallVector
<std::pair
<Register
, RegSubRegPair
>, 4> CopyInfos
;
945 MachineBasicBlock::iterator I
= TailBB
->begin();
946 // Process PHI instructions first.
947 while (I
!= TailBB
->end() && I
->isPHI()) {
948 // Replace the uses of the def of the PHI with the register coming
950 MachineInstr
*MI
= &*I
++;
951 processPHI(MI
, TailBB
, PrevBB
, LocalVRMap
, CopyInfos
, UsedByPhi
,
955 // Now copy the non-PHI instructions.
956 while (I
!= TailBB
->end()) {
957 // Replace def of virtual registers with new registers, and update
958 // uses with PHI source register or the new registers.
959 MachineInstr
*MI
= &*I
++;
960 assert(!MI
->isBundle() && "Not expecting bundles before regalloc!");
961 duplicateInstruction(MI
, TailBB
, PrevBB
, LocalVRMap
, UsedByPhi
);
962 MI
->eraseFromParent();
964 appendCopies(PrevBB
, CopyInfos
, Copies
);
966 TII
->removeBranch(*PrevBB
);
967 // No PHIs to worry about, just splice the instructions over.
968 PrevBB
->splice(PrevBB
->end(), TailBB
, TailBB
->begin(), TailBB
->end());
970 PrevBB
->removeSuccessor(PrevBB
->succ_begin());
971 assert(PrevBB
->succ_empty());
972 PrevBB
->transferSuccessors(TailBB
);
974 // Update branches in PrevBB based on Tail's layout successor.
975 if (ShouldUpdateTerminators
)
976 PrevBB
->updateTerminator(TailBB
->getNextNode());
978 TDBBs
.push_back(PrevBB
);
981 LLVM_DEBUG(dbgs() << "Abort merging blocks, the predecessor still "
982 "contains terminator instructions");
983 // Return early if no changes were made
985 return RemovedBranches
;
987 Changed
|= RemovedBranches
;
990 // If this is after register allocation, there are no phis to fix.
994 // If we made no changes so far, we are safe.
998 // Handle the nasty case in that we duplicated a block that is part of a loop
999 // into some but not all of its predecessors. For example:
1003 // if we duplicate 2 into 1 but not into 3, we end up with
1004 // 12 -> 3 <-> 2 -> rest |
1007 // If there was a "var = phi(1, 3)" in 2, it has to be ultimately replaced
1008 // with a phi in 3 (which now dominates 2).
1009 // What we do here is introduce a copy in 3 of the register defined by the
1010 // phi, just like when we are duplicating 2 into 3, but we don't copy any
1011 // real instructions or remove the 3 -> 2 edge from the phi in 2.
1012 for (MachineBasicBlock
*PredBB
: Preds
) {
1013 if (is_contained(TDBBs
, PredBB
))
1017 if (PredBB
->succ_size() != 1)
1020 DenseMap
<Register
, RegSubRegPair
> LocalVRMap
;
1021 SmallVector
<std::pair
<Register
, RegSubRegPair
>, 4> CopyInfos
;
1022 // Process PHI instructions first.
1023 for (MachineInstr
&MI
: make_early_inc_range(TailBB
->phis())) {
1024 // Replace the uses of the def of the PHI with the register coming
1026 processPHI(&MI
, TailBB
, PredBB
, LocalVRMap
, CopyInfos
, UsedByPhi
, false);
1028 appendCopies(PredBB
, CopyInfos
, Copies
);
1034 /// At the end of the block \p MBB generate COPY instructions between registers
1035 /// described by \p CopyInfos. Append resulting instructions to \p Copies.
1036 void TailDuplicator::appendCopies(MachineBasicBlock
*MBB
,
1037 SmallVectorImpl
<std::pair
<Register
, RegSubRegPair
>> &CopyInfos
,
1038 SmallVectorImpl
<MachineInstr
*> &Copies
) {
1039 MachineBasicBlock::iterator Loc
= MBB
->getFirstTerminator();
1040 const MCInstrDesc
&CopyD
= TII
->get(TargetOpcode::COPY
);
1041 for (auto &CI
: CopyInfos
) {
1042 auto C
= BuildMI(*MBB
, Loc
, DebugLoc(), CopyD
, CI
.first
)
1043 .addReg(CI
.second
.Reg
, 0, CI
.second
.SubReg
);
1044 Copies
.push_back(C
);
1048 /// Remove the specified dead machine basic block from the function, updating
1050 void TailDuplicator::removeDeadBlock(
1051 MachineBasicBlock
*MBB
,
1052 function_ref
<void(MachineBasicBlock
*)> *RemovalCallback
) {
1053 assert(MBB
->pred_empty() && "MBB must be dead!");
1054 LLVM_DEBUG(dbgs() << "\nRemoving MBB: " << *MBB
);
1056 MachineFunction
*MF
= MBB
->getParent();
1057 // Update the call site info.
1058 for (const MachineInstr
&MI
: *MBB
)
1059 if (MI
.shouldUpdateCallSiteInfo())
1060 MF
->eraseCallSiteInfo(&MI
);
1062 if (RemovalCallback
)
1063 (*RemovalCallback
)(MBB
);
1065 // Remove all successors.
1066 while (!MBB
->succ_empty())
1067 MBB
->removeSuccessor(MBB
->succ_end() - 1);
1069 // Remove the block.
1070 MBB
->eraseFromParent();