1 //===- TailDuplicator.cpp - Duplicate blocks into predecessors' tails -----===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This utility class duplicates basic blocks ending in unconditional branches
10 // into the tails of their predecessors.
12 //===----------------------------------------------------------------------===//
14 #include "llvm/CodeGen/TailDuplicator.h"
15 #include "llvm/ADT/DenseMap.h"
16 #include "llvm/ADT/DenseSet.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/ADT/SetVector.h"
19 #include "llvm/ADT/SmallPtrSet.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/ADT/Statistic.h"
22 #include "llvm/Analysis/ProfileSummaryInfo.h"
23 #include "llvm/CodeGen/MachineBasicBlock.h"
24 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
25 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
26 #include "llvm/CodeGen/MachineFunction.h"
27 #include "llvm/CodeGen/MachineInstr.h"
28 #include "llvm/CodeGen/MachineInstrBuilder.h"
29 #include "llvm/CodeGen/MachineOperand.h"
30 #include "llvm/CodeGen/MachineRegisterInfo.h"
31 #include "llvm/CodeGen/MachineSizeOpts.h"
32 #include "llvm/CodeGen/MachineSSAUpdater.h"
33 #include "llvm/CodeGen/TargetInstrInfo.h"
34 #include "llvm/CodeGen/TargetRegisterInfo.h"
35 #include "llvm/CodeGen/TargetSubtargetInfo.h"
36 #include "llvm/IR/DebugLoc.h"
37 #include "llvm/IR/Function.h"
38 #include "llvm/Support/CommandLine.h"
39 #include "llvm/Support/Debug.h"
40 #include "llvm/Support/ErrorHandling.h"
41 #include "llvm/Support/raw_ostream.h"
42 #include "llvm/Target/TargetMachine.h"
50 #define DEBUG_TYPE "tailduplication"
52 STATISTIC(NumTails
, "Number of tails duplicated");
53 STATISTIC(NumTailDups
, "Number of tail duplicated blocks");
54 STATISTIC(NumTailDupAdded
,
55 "Number of instructions added due to tail duplication");
56 STATISTIC(NumTailDupRemoved
,
57 "Number of instructions removed due to tail duplication");
58 STATISTIC(NumDeadBlocks
, "Number of dead blocks removed");
59 STATISTIC(NumAddedPHIs
, "Number of phis added");
61 // Heuristic for tail duplication.
62 static cl::opt
<unsigned> TailDuplicateSize(
64 cl::desc("Maximum instructions to consider tail duplicating"), cl::init(2),
67 static cl::opt
<unsigned> TailDupIndirectBranchSize(
68 "tail-dup-indirect-size",
69 cl::desc("Maximum instructions to consider tail duplicating blocks that "
70 "end with indirect branches."), cl::init(20),
74 TailDupVerify("tail-dup-verify",
75 cl::desc("Verify sanity of PHI instructions during taildup"),
76 cl::init(false), cl::Hidden
);
78 static cl::opt
<unsigned> TailDupLimit("tail-dup-limit", cl::init(~0U),
81 void TailDuplicator::initMF(MachineFunction
&MFin
, bool PreRegAlloc
,
82 const MachineBranchProbabilityInfo
*MBPIin
,
84 ProfileSummaryInfo
*PSIin
,
85 bool LayoutModeIn
, unsigned TailDupSizeIn
) {
87 TII
= MF
->getSubtarget().getInstrInfo();
88 TRI
= MF
->getSubtarget().getRegisterInfo();
89 MRI
= &MF
->getRegInfo();
94 TailDupSize
= TailDupSizeIn
;
96 assert(MBPI
!= nullptr && "Machine Branch Probability Info required");
98 LayoutMode
= LayoutModeIn
;
99 this->PreRegAlloc
= PreRegAlloc
;
102 static void VerifyPHIs(MachineFunction
&MF
, bool CheckExtra
) {
103 for (MachineFunction::iterator I
= ++MF
.begin(), E
= MF
.end(); I
!= E
; ++I
) {
104 MachineBasicBlock
*MBB
= &*I
;
105 SmallSetVector
<MachineBasicBlock
*, 8> Preds(MBB
->pred_begin(),
107 MachineBasicBlock::iterator MI
= MBB
->begin();
108 while (MI
!= MBB
->end()) {
111 for (MachineBasicBlock
*PredBB
: Preds
) {
113 for (unsigned i
= 1, e
= MI
->getNumOperands(); i
!= e
; i
+= 2) {
114 MachineBasicBlock
*PHIBB
= MI
->getOperand(i
+ 1).getMBB();
115 if (PHIBB
== PredBB
) {
121 dbgs() << "Malformed PHI in " << printMBBReference(*MBB
) << ": "
123 dbgs() << " missing input from predecessor "
124 << printMBBReference(*PredBB
) << '\n';
125 llvm_unreachable(nullptr);
129 for (unsigned i
= 1, e
= MI
->getNumOperands(); i
!= e
; i
+= 2) {
130 MachineBasicBlock
*PHIBB
= MI
->getOperand(i
+ 1).getMBB();
131 if (CheckExtra
&& !Preds
.count(PHIBB
)) {
132 dbgs() << "Warning: malformed PHI in " << printMBBReference(*MBB
)
134 dbgs() << " extra input from predecessor "
135 << printMBBReference(*PHIBB
) << '\n';
136 llvm_unreachable(nullptr);
138 if (PHIBB
->getNumber() < 0) {
139 dbgs() << "Malformed PHI in " << printMBBReference(*MBB
) << ": "
141 dbgs() << " non-existing " << printMBBReference(*PHIBB
) << '\n';
142 llvm_unreachable(nullptr);
150 /// Tail duplicate the block and cleanup.
151 /// \p IsSimple - return value of isSimpleBB
152 /// \p MBB - block to be duplicated
153 /// \p ForcedLayoutPred - If non-null, treat this block as the layout
154 /// predecessor, instead of using the ordering in MF
155 /// \p DuplicatedPreds - if non-null, \p DuplicatedPreds will contain a list of
156 /// all Preds that received a copy of \p MBB.
157 /// \p RemovalCallback - if non-null, called just before MBB is deleted.
158 bool TailDuplicator::tailDuplicateAndUpdate(
159 bool IsSimple
, MachineBasicBlock
*MBB
,
160 MachineBasicBlock
*ForcedLayoutPred
,
161 SmallVectorImpl
<MachineBasicBlock
*> *DuplicatedPreds
,
162 function_ref
<void(MachineBasicBlock
*)> *RemovalCallback
,
163 SmallVectorImpl
<MachineBasicBlock
*> *CandidatePtr
) {
164 // Save the successors list.
165 SmallSetVector
<MachineBasicBlock
*, 8> Succs(MBB
->succ_begin(),
168 SmallVector
<MachineBasicBlock
*, 8> TDBBs
;
169 SmallVector
<MachineInstr
*, 16> Copies
;
170 if (!tailDuplicate(IsSimple
, MBB
, ForcedLayoutPred
,
171 TDBBs
, Copies
, CandidatePtr
))
176 SmallVector
<MachineInstr
*, 8> NewPHIs
;
177 MachineSSAUpdater
SSAUpdate(*MF
, &NewPHIs
);
179 // TailBB's immediate successors are now successors of those predecessors
180 // which duplicated TailBB. Add the predecessors as sources to the PHI
182 bool isDead
= MBB
->pred_empty() && !MBB
->hasAddressTaken();
184 updateSuccessorsPHIs(MBB
, isDead
, TDBBs
, Succs
);
186 // If it is dead, remove it.
188 NumTailDupRemoved
+= MBB
->size();
189 removeDeadBlock(MBB
, RemovalCallback
);
194 if (!SSAUpdateVRs
.empty()) {
195 for (unsigned i
= 0, e
= SSAUpdateVRs
.size(); i
!= e
; ++i
) {
196 unsigned VReg
= SSAUpdateVRs
[i
];
197 SSAUpdate
.Initialize(VReg
);
199 // If the original definition is still around, add it as an available
201 MachineInstr
*DefMI
= MRI
->getVRegDef(VReg
);
202 MachineBasicBlock
*DefBB
= nullptr;
204 DefBB
= DefMI
->getParent();
205 SSAUpdate
.AddAvailableValue(DefBB
, VReg
);
208 // Add the new vregs as available values.
209 DenseMap
<Register
, AvailableValsTy
>::iterator LI
=
210 SSAUpdateVals
.find(VReg
);
211 for (unsigned j
= 0, ee
= LI
->second
.size(); j
!= ee
; ++j
) {
212 MachineBasicBlock
*SrcBB
= LI
->second
[j
].first
;
213 Register SrcReg
= LI
->second
[j
].second
;
214 SSAUpdate
.AddAvailableValue(SrcBB
, SrcReg
);
217 // Rewrite uses that are outside of the original def's block.
218 MachineRegisterInfo::use_iterator UI
= MRI
->use_begin(VReg
);
219 // Only remove instructions after loop, as DBG_VALUE_LISTs with multiple
220 // uses of VReg may invalidate the use iterator when erased.
221 SmallPtrSet
<MachineInstr
*, 4> InstrsToRemove
;
222 while (UI
!= MRI
->use_end()) {
223 MachineOperand
&UseMO
= *UI
;
224 MachineInstr
*UseMI
= UseMO
.getParent();
226 if (UseMI
->isDebugValue()) {
227 // SSAUpdate can replace the use with an undef. That creates
228 // a debug instruction that is a kill.
229 // FIXME: Should it SSAUpdate job to delete debug instructions
230 // instead of replacing the use with undef?
231 InstrsToRemove
.insert(UseMI
);
234 if (UseMI
->getParent() == DefBB
&& !UseMI
->isPHI())
236 SSAUpdate
.RewriteUse(UseMO
);
238 for (auto *MI
: InstrsToRemove
)
239 MI
->eraseFromParent();
242 SSAUpdateVRs
.clear();
243 SSAUpdateVals
.clear();
246 // Eliminate some of the copies inserted by tail duplication to maintain
248 for (unsigned i
= 0, e
= Copies
.size(); i
!= e
; ++i
) {
249 MachineInstr
*Copy
= Copies
[i
];
252 Register Dst
= Copy
->getOperand(0).getReg();
253 Register Src
= Copy
->getOperand(1).getReg();
254 if (MRI
->hasOneNonDBGUse(Src
) &&
255 MRI
->constrainRegClass(Src
, MRI
->getRegClass(Dst
))) {
256 // Copy is the only use. Do trivial copy propagation here.
257 MRI
->replaceRegWith(Dst
, Src
);
258 Copy
->eraseFromParent();
263 NumAddedPHIs
+= NewPHIs
.size();
266 *DuplicatedPreds
= std::move(TDBBs
);
271 /// Look for small blocks that are unconditionally branched to and do not fall
272 /// through. Tail-duplicate their instructions into their predecessors to
273 /// eliminate (dynamic) branches.
274 bool TailDuplicator::tailDuplicateBlocks() {
275 bool MadeChange
= false;
277 if (PreRegAlloc
&& TailDupVerify
) {
278 LLVM_DEBUG(dbgs() << "\n*** Before tail-duplicating\n");
279 VerifyPHIs(*MF
, true);
282 for (MachineFunction::iterator I
= ++MF
->begin(), E
= MF
->end(); I
!= E
;) {
283 MachineBasicBlock
*MBB
= &*I
++;
285 if (NumTails
== TailDupLimit
)
288 bool IsSimple
= isSimpleBB(MBB
);
290 if (!shouldTailDuplicate(IsSimple
, *MBB
))
293 MadeChange
|= tailDuplicateAndUpdate(IsSimple
, MBB
, nullptr);
296 if (PreRegAlloc
&& TailDupVerify
)
297 VerifyPHIs(*MF
, false);
302 static bool isDefLiveOut(Register Reg
, MachineBasicBlock
*BB
,
303 const MachineRegisterInfo
*MRI
) {
304 for (MachineInstr
&UseMI
: MRI
->use_instructions(Reg
)) {
305 if (UseMI
.isDebugValue())
307 if (UseMI
.getParent() != BB
)
313 static unsigned getPHISrcRegOpIdx(MachineInstr
*MI
, MachineBasicBlock
*SrcBB
) {
314 for (unsigned i
= 1, e
= MI
->getNumOperands(); i
!= e
; i
+= 2)
315 if (MI
->getOperand(i
+ 1).getMBB() == SrcBB
)
320 // Remember which registers are used by phis in this block. This is
321 // used to determine which registers are liveout while modifying the
322 // block (which is why we need to copy the information).
323 static void getRegsUsedByPHIs(const MachineBasicBlock
&BB
,
324 DenseSet
<Register
> *UsedByPhi
) {
325 for (const auto &MI
: BB
) {
328 for (unsigned i
= 1, e
= MI
.getNumOperands(); i
!= e
; i
+= 2) {
329 Register SrcReg
= MI
.getOperand(i
).getReg();
330 UsedByPhi
->insert(SrcReg
);
335 /// Add a definition and source virtual registers pair for SSA update.
336 void TailDuplicator::addSSAUpdateEntry(Register OrigReg
, Register NewReg
,
337 MachineBasicBlock
*BB
) {
338 DenseMap
<Register
, AvailableValsTy
>::iterator LI
=
339 SSAUpdateVals
.find(OrigReg
);
340 if (LI
!= SSAUpdateVals
.end())
341 LI
->second
.push_back(std::make_pair(BB
, NewReg
));
343 AvailableValsTy Vals
;
344 Vals
.push_back(std::make_pair(BB
, NewReg
));
345 SSAUpdateVals
.insert(std::make_pair(OrigReg
, Vals
));
346 SSAUpdateVRs
.push_back(OrigReg
);
350 /// Process PHI node in TailBB by turning it into a copy in PredBB. Remember the
351 /// source register that's contributed by PredBB and update SSA update map.
352 void TailDuplicator::processPHI(
353 MachineInstr
*MI
, MachineBasicBlock
*TailBB
, MachineBasicBlock
*PredBB
,
354 DenseMap
<Register
, RegSubRegPair
> &LocalVRMap
,
355 SmallVectorImpl
<std::pair
<Register
, RegSubRegPair
>> &Copies
,
356 const DenseSet
<Register
> &RegsUsedByPhi
, bool Remove
) {
357 Register DefReg
= MI
->getOperand(0).getReg();
358 unsigned SrcOpIdx
= getPHISrcRegOpIdx(MI
, PredBB
);
359 assert(SrcOpIdx
&& "Unable to find matching PHI source?");
360 Register SrcReg
= MI
->getOperand(SrcOpIdx
).getReg();
361 unsigned SrcSubReg
= MI
->getOperand(SrcOpIdx
).getSubReg();
362 const TargetRegisterClass
*RC
= MRI
->getRegClass(DefReg
);
363 LocalVRMap
.insert(std::make_pair(DefReg
, RegSubRegPair(SrcReg
, SrcSubReg
)));
365 // Insert a copy from source to the end of the block. The def register is the
366 // available value liveout of the block.
367 Register NewDef
= MRI
->createVirtualRegister(RC
);
368 Copies
.push_back(std::make_pair(NewDef
, RegSubRegPair(SrcReg
, SrcSubReg
)));
369 if (isDefLiveOut(DefReg
, TailBB
, MRI
) || RegsUsedByPhi
.count(DefReg
))
370 addSSAUpdateEntry(DefReg
, NewDef
, PredBB
);
375 // Remove PredBB from the PHI node.
376 MI
->RemoveOperand(SrcOpIdx
+ 1);
377 MI
->RemoveOperand(SrcOpIdx
);
378 if (MI
->getNumOperands() == 1)
379 MI
->eraseFromParent();
382 /// Duplicate a TailBB instruction to PredBB and update
383 /// the source operands due to earlier PHI translation.
384 void TailDuplicator::duplicateInstruction(
385 MachineInstr
*MI
, MachineBasicBlock
*TailBB
, MachineBasicBlock
*PredBB
,
386 DenseMap
<Register
, RegSubRegPair
> &LocalVRMap
,
387 const DenseSet
<Register
> &UsedByPhi
) {
388 // Allow duplication of CFI instructions.
389 if (MI
->isCFIInstruction()) {
390 BuildMI(*PredBB
, PredBB
->end(), PredBB
->findDebugLoc(PredBB
->begin()),
391 TII
->get(TargetOpcode::CFI_INSTRUCTION
)).addCFIIndex(
392 MI
->getOperand(0).getCFIIndex());
395 MachineInstr
&NewMI
= TII
->duplicate(*PredBB
, PredBB
->end(), *MI
);
397 for (unsigned i
= 0, e
= NewMI
.getNumOperands(); i
!= e
; ++i
) {
398 MachineOperand
&MO
= NewMI
.getOperand(i
);
401 Register Reg
= MO
.getReg();
402 if (!Register::isVirtualRegister(Reg
))
405 const TargetRegisterClass
*RC
= MRI
->getRegClass(Reg
);
406 Register NewReg
= MRI
->createVirtualRegister(RC
);
408 LocalVRMap
.insert(std::make_pair(Reg
, RegSubRegPair(NewReg
, 0)));
409 if (isDefLiveOut(Reg
, TailBB
, MRI
) || UsedByPhi
.count(Reg
))
410 addSSAUpdateEntry(Reg
, NewReg
, PredBB
);
412 auto VI
= LocalVRMap
.find(Reg
);
413 if (VI
!= LocalVRMap
.end()) {
414 // Need to make sure that the register class of the mapped register
415 // will satisfy the constraints of the class of the register being
417 auto *OrigRC
= MRI
->getRegClass(Reg
);
418 auto *MappedRC
= MRI
->getRegClass(VI
->second
.Reg
);
419 const TargetRegisterClass
*ConstrRC
;
420 if (VI
->second
.SubReg
!= 0) {
421 ConstrRC
= TRI
->getMatchingSuperRegClass(MappedRC
, OrigRC
,
424 // The actual constraining (as in "find appropriate new class")
425 // is done by getMatchingSuperRegClass, so now we only need to
426 // change the class of the mapped register.
427 MRI
->setRegClass(VI
->second
.Reg
, ConstrRC
);
430 // For mapped registers that do not have sub-registers, simply
431 // restrict their class to match the original one.
432 ConstrRC
= MRI
->constrainRegClass(VI
->second
.Reg
, OrigRC
);
436 // If the class constraining succeeded, we can simply replace
437 // the old register with the mapped one.
438 MO
.setReg(VI
->second
.Reg
);
439 // We have Reg -> VI.Reg:VI.SubReg, so if Reg is used with a
440 // sub-register, we need to compose the sub-register indices.
441 MO
.setSubReg(TRI
->composeSubRegIndices(MO
.getSubReg(),
444 // The direct replacement is not possible, due to failing register
445 // class constraints. An explicit COPY is necessary. Create one
446 // that can be reused
447 auto *NewRC
= MI
->getRegClassConstraint(i
, TII
, TRI
);
448 if (NewRC
== nullptr)
450 Register NewReg
= MRI
->createVirtualRegister(NewRC
);
451 BuildMI(*PredBB
, NewMI
, NewMI
.getDebugLoc(),
452 TII
->get(TargetOpcode::COPY
), NewReg
)
453 .addReg(VI
->second
.Reg
, 0, VI
->second
.SubReg
);
454 LocalVRMap
.erase(VI
);
455 LocalVRMap
.insert(std::make_pair(Reg
, RegSubRegPair(NewReg
, 0)));
457 // The composed VI.Reg:VI.SubReg is replaced with NewReg, which
458 // is equivalent to the whole register Reg. Hence, Reg:subreg
459 // is same as NewReg:subreg, so keep the sub-register index
462 // Clear any kill flags from this operand. The new register could
463 // have uses after this one, so kills are not valid here.
471 /// After FromBB is tail duplicated into its predecessor blocks, the successors
472 /// have gained new predecessors. Update the PHI instructions in them
474 void TailDuplicator::updateSuccessorsPHIs(
475 MachineBasicBlock
*FromBB
, bool isDead
,
476 SmallVectorImpl
<MachineBasicBlock
*> &TDBBs
,
477 SmallSetVector
<MachineBasicBlock
*, 8> &Succs
) {
478 for (MachineBasicBlock
*SuccBB
: Succs
) {
479 for (MachineInstr
&MI
: *SuccBB
) {
482 MachineInstrBuilder
MIB(*FromBB
->getParent(), MI
);
484 for (unsigned i
= 1, e
= MI
.getNumOperands(); i
!= e
; i
+= 2) {
485 MachineOperand
&MO
= MI
.getOperand(i
+ 1);
486 if (MO
.getMBB() == FromBB
) {
493 MachineOperand
&MO0
= MI
.getOperand(Idx
);
494 Register Reg
= MO0
.getReg();
496 // Folded into the previous BB.
497 // There could be duplicate phi source entries. FIXME: Should sdisel
498 // or earlier pass fixed this?
499 for (unsigned i
= MI
.getNumOperands() - 2; i
!= Idx
; i
-= 2) {
500 MachineOperand
&MO
= MI
.getOperand(i
+ 1);
501 if (MO
.getMBB() == FromBB
) {
502 MI
.RemoveOperand(i
+ 1);
509 // If Idx is set, the operands at Idx and Idx+1 must be removed.
510 // We reuse the location to avoid expensive RemoveOperand calls.
512 DenseMap
<Register
, AvailableValsTy
>::iterator LI
=
513 SSAUpdateVals
.find(Reg
);
514 if (LI
!= SSAUpdateVals
.end()) {
515 // This register is defined in the tail block.
516 for (unsigned j
= 0, ee
= LI
->second
.size(); j
!= ee
; ++j
) {
517 MachineBasicBlock
*SrcBB
= LI
->second
[j
].first
;
518 // If we didn't duplicate a bb into a particular predecessor, we
519 // might still have added an entry to SSAUpdateVals to correcly
520 // recompute SSA. If that case, avoid adding a dummy extra argument
522 if (!SrcBB
->isSuccessor(SuccBB
))
525 Register SrcReg
= LI
->second
[j
].second
;
527 MI
.getOperand(Idx
).setReg(SrcReg
);
528 MI
.getOperand(Idx
+ 1).setMBB(SrcBB
);
531 MIB
.addReg(SrcReg
).addMBB(SrcBB
);
535 // Live in tail block, must also be live in predecessors.
536 for (unsigned j
= 0, ee
= TDBBs
.size(); j
!= ee
; ++j
) {
537 MachineBasicBlock
*SrcBB
= TDBBs
[j
];
539 MI
.getOperand(Idx
).setReg(Reg
);
540 MI
.getOperand(Idx
+ 1).setMBB(SrcBB
);
543 MIB
.addReg(Reg
).addMBB(SrcBB
);
548 MI
.RemoveOperand(Idx
+ 1);
549 MI
.RemoveOperand(Idx
);
555 /// Determine if it is profitable to duplicate this block.
556 bool TailDuplicator::shouldTailDuplicate(bool IsSimple
,
557 MachineBasicBlock
&TailBB
) {
558 // When doing tail-duplication during layout, the block ordering is in flux,
559 // so canFallThrough returns a result based on incorrect information and
560 // should just be ignored.
561 if (!LayoutMode
&& TailBB
.canFallThrough())
564 // Don't try to tail-duplicate single-block loops.
565 if (TailBB
.isSuccessor(&TailBB
))
568 // Set the limit on the cost to duplicate. When optimizing for size,
569 // duplicate only one, because one branch instruction can be eliminated to
570 // compensate for the duplication.
571 unsigned MaxDuplicateCount
;
572 bool OptForSize
= MF
->getFunction().hasOptSize() ||
573 llvm::shouldOptimizeForSize(&TailBB
, PSI
, MBFI
);
574 if (TailDupSize
== 0)
575 MaxDuplicateCount
= TailDuplicateSize
;
577 MaxDuplicateCount
= TailDupSize
;
579 MaxDuplicateCount
= 1;
581 // If the block to be duplicated ends in an unanalyzable fallthrough, don't
583 // A similar check is necessary in MachineBlockPlacement to make sure pairs of
584 // blocks with unanalyzable fallthrough get layed out contiguously.
585 MachineBasicBlock
*PredTBB
= nullptr, *PredFBB
= nullptr;
586 SmallVector
<MachineOperand
, 4> PredCond
;
587 if (TII
->analyzeBranch(TailBB
, PredTBB
, PredFBB
, PredCond
) &&
588 TailBB
.canFallThrough())
591 // If the target has hardware branch prediction that can handle indirect
592 // branches, duplicating them can often make them predictable when there
593 // are common paths through the code. The limit needs to be high enough
594 // to allow undoing the effects of tail merging and other optimizations
595 // that rearrange the predecessors of the indirect branch.
597 bool HasIndirectbr
= false;
599 HasIndirectbr
= TailBB
.back().isIndirectBranch();
601 if (HasIndirectbr
&& PreRegAlloc
)
602 MaxDuplicateCount
= TailDupIndirectBranchSize
;
604 // Check the instructions in the block to determine whether tail-duplication
605 // is invalid or unlikely to be profitable.
606 unsigned InstrCount
= 0;
607 for (MachineInstr
&MI
: TailBB
) {
608 // Non-duplicable things shouldn't be tail-duplicated.
609 // CFI instructions are marked as non-duplicable, because Darwin compact
610 // unwind info emission can't handle multiple prologue setups. In case of
611 // DWARF, allow them be duplicated, so that their existence doesn't prevent
612 // tail duplication of some basic blocks, that would be duplicated otherwise.
613 if (MI
.isNotDuplicable() &&
614 (TailBB
.getParent()->getTarget().getTargetTriple().isOSDarwin() ||
615 !MI
.isCFIInstruction()))
618 // Convergent instructions can be duplicated only if doing so doesn't add
619 // new control dependencies, which is what we're going to do here.
620 if (MI
.isConvergent())
623 // Do not duplicate 'return' instructions if this is a pre-regalloc run.
624 // A return may expand into a lot more instructions (e.g. reload of callee
625 // saved registers) after PEI.
626 if (PreRegAlloc
&& MI
.isReturn())
629 // Avoid duplicating calls before register allocation. Calls presents a
630 // barrier to register allocation so duplicating them may end up increasing
632 if (PreRegAlloc
&& MI
.isCall())
635 // TailDuplicator::appendCopies will erroneously place COPYs after
636 // INLINEASM_BR instructions after 4b0aa5724fea, which demonstrates the same
637 // bug that was fixed in f7a53d82c090.
638 // FIXME: Use findPHICopyInsertPoint() to find the correct insertion point
639 // for the COPY when replacing PHIs.
640 if (MI
.getOpcode() == TargetOpcode::INLINEASM_BR
)
644 InstrCount
+= MI
.getBundleSize();
645 else if (!MI
.isPHI() && !MI
.isMetaInstruction())
648 if (InstrCount
> MaxDuplicateCount
)
652 // Check if any of the successors of TailBB has a PHI node in which the
653 // value corresponding to TailBB uses a subregister.
654 // If a phi node uses a register paired with a subregister, the actual
655 // "value type" of the phi may differ from the type of the register without
656 // any subregisters. Due to a bug, tail duplication may add a new operand
657 // without a necessary subregister, producing an invalid code. This is
658 // demonstrated by test/CodeGen/Hexagon/tail-dup-subreg-abort.ll.
659 // Disable tail duplication for this case for now, until the problem is
661 for (auto SB
: TailBB
.successors()) {
662 for (auto &I
: *SB
) {
665 unsigned Idx
= getPHISrcRegOpIdx(&I
, &TailBB
);
667 MachineOperand
&PU
= I
.getOperand(Idx
);
668 if (PU
.getSubReg() != 0)
673 if (HasIndirectbr
&& PreRegAlloc
)
682 return canCompletelyDuplicateBB(TailBB
);
685 /// True if this BB has only one unconditional jump.
686 bool TailDuplicator::isSimpleBB(MachineBasicBlock
*TailBB
) {
687 if (TailBB
->succ_size() != 1)
689 if (TailBB
->pred_empty())
691 MachineBasicBlock::iterator I
= TailBB
->getFirstNonDebugInstr(true);
692 if (I
== TailBB
->end())
694 return I
->isUnconditionalBranch();
697 static bool bothUsedInPHI(const MachineBasicBlock
&A
,
698 const SmallPtrSet
<MachineBasicBlock
*, 8> &SuccsB
) {
699 for (MachineBasicBlock
*BB
: A
.successors())
700 if (SuccsB
.count(BB
) && !BB
->empty() && BB
->begin()->isPHI())
706 bool TailDuplicator::canCompletelyDuplicateBB(MachineBasicBlock
&BB
) {
707 for (MachineBasicBlock
*PredBB
: BB
.predecessors()) {
708 if (PredBB
->succ_size() > 1)
711 MachineBasicBlock
*PredTBB
= nullptr, *PredFBB
= nullptr;
712 SmallVector
<MachineOperand
, 4> PredCond
;
713 if (TII
->analyzeBranch(*PredBB
, PredTBB
, PredFBB
, PredCond
))
716 if (!PredCond
.empty())
722 bool TailDuplicator::duplicateSimpleBB(
723 MachineBasicBlock
*TailBB
, SmallVectorImpl
<MachineBasicBlock
*> &TDBBs
,
724 const DenseSet
<Register
> &UsedByPhi
,
725 SmallVectorImpl
<MachineInstr
*> &Copies
) {
726 SmallPtrSet
<MachineBasicBlock
*, 8> Succs(TailBB
->succ_begin(),
728 SmallVector
<MachineBasicBlock
*, 8> Preds(TailBB
->predecessors());
729 bool Changed
= false;
730 for (MachineBasicBlock
*PredBB
: Preds
) {
731 if (PredBB
->hasEHPadSuccessor() || PredBB
->mayHaveInlineAsmBr())
734 if (bothUsedInPHI(*PredBB
, Succs
))
737 MachineBasicBlock
*PredTBB
= nullptr, *PredFBB
= nullptr;
738 SmallVector
<MachineOperand
, 4> PredCond
;
739 if (TII
->analyzeBranch(*PredBB
, PredTBB
, PredFBB
, PredCond
))
743 LLVM_DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB
744 << "From simple Succ: " << *TailBB
);
746 MachineBasicBlock
*NewTarget
= *TailBB
->succ_begin();
747 MachineBasicBlock
*NextBB
= PredBB
->getNextNode();
749 // Make PredFBB explicit.
750 if (PredCond
.empty())
753 // Make fall through explicit.
760 if (PredFBB
== TailBB
)
762 if (PredTBB
== TailBB
)
765 // Make the branch unconditional if possible
766 if (PredTBB
== PredFBB
) {
771 // Avoid adding fall through branches.
772 if (PredFBB
== NextBB
)
774 if (PredTBB
== NextBB
&& PredFBB
== nullptr)
777 auto DL
= PredBB
->findBranchDebugLoc();
778 TII
->removeBranch(*PredBB
);
780 if (!PredBB
->isSuccessor(NewTarget
))
781 PredBB
->replaceSuccessor(TailBB
, NewTarget
);
783 PredBB
->removeSuccessor(TailBB
, true);
784 assert(PredBB
->succ_size() <= 1);
788 TII
->insertBranch(*PredBB
, PredTBB
, PredFBB
, PredCond
, DL
);
790 TDBBs
.push_back(PredBB
);
795 bool TailDuplicator::canTailDuplicate(MachineBasicBlock
*TailBB
,
796 MachineBasicBlock
*PredBB
) {
797 // EH edges are ignored by analyzeBranch.
798 if (PredBB
->succ_size() > 1)
801 MachineBasicBlock
*PredTBB
= nullptr, *PredFBB
= nullptr;
802 SmallVector
<MachineOperand
, 4> PredCond
;
803 if (TII
->analyzeBranch(*PredBB
, PredTBB
, PredFBB
, PredCond
))
805 if (!PredCond
.empty())
810 /// If it is profitable, duplicate TailBB's contents in each
811 /// of its predecessors.
812 /// \p IsSimple result of isSimpleBB
813 /// \p TailBB Block to be duplicated.
814 /// \p ForcedLayoutPred When non-null, use this block as the layout predecessor
815 /// instead of the previous block in MF's order.
816 /// \p TDBBs A vector to keep track of all blocks tail-duplicated
818 /// \p Copies A vector of copy instructions inserted. Used later to
819 /// walk all the inserted copies and remove redundant ones.
820 bool TailDuplicator::tailDuplicate(bool IsSimple
, MachineBasicBlock
*TailBB
,
821 MachineBasicBlock
*ForcedLayoutPred
,
822 SmallVectorImpl
<MachineBasicBlock
*> &TDBBs
,
823 SmallVectorImpl
<MachineInstr
*> &Copies
,
824 SmallVectorImpl
<MachineBasicBlock
*> *CandidatePtr
) {
825 LLVM_DEBUG(dbgs() << "\n*** Tail-duplicating " << printMBBReference(*TailBB
)
828 bool ShouldUpdateTerminators
= TailBB
->canFallThrough();
830 DenseSet
<Register
> UsedByPhi
;
831 getRegsUsedByPHIs(*TailBB
, &UsedByPhi
);
834 return duplicateSimpleBB(TailBB
, TDBBs
, UsedByPhi
, Copies
);
836 // Iterate through all the unique predecessors and tail-duplicate this
837 // block into them, if possible. Copying the list ahead of time also
838 // avoids trouble with the predecessor list reallocating.
839 bool Changed
= false;
840 SmallSetVector
<MachineBasicBlock
*, 8> Preds
;
842 Preds
.insert(CandidatePtr
->begin(), CandidatePtr
->end());
844 Preds
.insert(TailBB
->pred_begin(), TailBB
->pred_end());
846 for (MachineBasicBlock
*PredBB
: Preds
) {
847 assert(TailBB
!= PredBB
&&
848 "Single-block loop should have been rejected earlier!");
850 if (!canTailDuplicate(TailBB
, PredBB
))
853 // Don't duplicate into a fall-through predecessor (at least for now).
854 // If profile is available, findDuplicateCandidates can choose better
855 // fall-through predecessor.
856 if (!(MF
->getFunction().hasProfileData() && LayoutMode
)) {
857 bool IsLayoutSuccessor
= false;
858 if (ForcedLayoutPred
)
859 IsLayoutSuccessor
= (ForcedLayoutPred
== PredBB
);
860 else if (PredBB
->isLayoutSuccessor(TailBB
) && PredBB
->canFallThrough())
861 IsLayoutSuccessor
= true;
862 if (IsLayoutSuccessor
)
866 LLVM_DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB
867 << "From Succ: " << *TailBB
);
869 TDBBs
.push_back(PredBB
);
871 // Remove PredBB's unconditional branch.
872 TII
->removeBranch(*PredBB
);
874 // Clone the contents of TailBB into PredBB.
875 DenseMap
<Register
, RegSubRegPair
> LocalVRMap
;
876 SmallVector
<std::pair
<Register
, RegSubRegPair
>, 4> CopyInfos
;
877 for (MachineBasicBlock::iterator I
= TailBB
->begin(), E
= TailBB
->end();
878 I
!= E
; /* empty */) {
879 MachineInstr
*MI
= &*I
;
882 // Replace the uses of the def of the PHI with the register coming
884 processPHI(MI
, TailBB
, PredBB
, LocalVRMap
, CopyInfos
, UsedByPhi
, true);
886 // Replace def of virtual registers with new registers, and update
887 // uses with PHI source register or the new registers.
888 duplicateInstruction(MI
, TailBB
, PredBB
, LocalVRMap
, UsedByPhi
);
891 appendCopies(PredBB
, CopyInfos
, Copies
);
893 NumTailDupAdded
+= TailBB
->size() - 1; // subtract one for removed branch
896 PredBB
->removeSuccessor(PredBB
->succ_begin());
897 assert(PredBB
->succ_empty() &&
898 "TailDuplicate called on block with multiple successors!");
899 for (MachineBasicBlock
*Succ
: TailBB
->successors())
900 PredBB
->addSuccessor(Succ
, MBPI
->getEdgeProbability(TailBB
, Succ
));
902 // Update branches in pred to jump to tail's layout successor if needed.
903 if (ShouldUpdateTerminators
)
904 PredBB
->updateTerminator(TailBB
->getNextNode());
910 // If TailBB was duplicated into all its predecessors except for the prior
911 // block, which falls through unconditionally, move the contents of this
912 // block into the prior block.
913 MachineBasicBlock
*PrevBB
= ForcedLayoutPred
;
915 PrevBB
= &*std::prev(TailBB
->getIterator());
916 MachineBasicBlock
*PriorTBB
= nullptr, *PriorFBB
= nullptr;
917 SmallVector
<MachineOperand
, 4> PriorCond
;
918 // This has to check PrevBB->succ_size() because EH edges are ignored by
920 if (PrevBB
->succ_size() == 1 &&
921 // Layout preds are not always CFG preds. Check.
922 *PrevBB
->succ_begin() == TailBB
&&
923 !TII
->analyzeBranch(*PrevBB
, PriorTBB
, PriorFBB
, PriorCond
) &&
925 (!PriorTBB
|| PriorTBB
== TailBB
) &&
926 TailBB
->pred_size() == 1 &&
927 !TailBB
->hasAddressTaken()) {
928 LLVM_DEBUG(dbgs() << "\nMerging into block: " << *PrevBB
929 << "From MBB: " << *TailBB
);
930 // There may be a branch to the layout successor. This is unlikely but it
931 // happens. The correct thing to do is to remove the branch before
932 // duplicating the instructions in all cases.
933 TII
->removeBranch(*PrevBB
);
935 DenseMap
<Register
, RegSubRegPair
> LocalVRMap
;
936 SmallVector
<std::pair
<Register
, RegSubRegPair
>, 4> CopyInfos
;
937 MachineBasicBlock::iterator I
= TailBB
->begin();
938 // Process PHI instructions first.
939 while (I
!= TailBB
->end() && I
->isPHI()) {
940 // Replace the uses of the def of the PHI with the register coming
942 MachineInstr
*MI
= &*I
++;
943 processPHI(MI
, TailBB
, PrevBB
, LocalVRMap
, CopyInfos
, UsedByPhi
, true);
946 // Now copy the non-PHI instructions.
947 while (I
!= TailBB
->end()) {
948 // Replace def of virtual registers with new registers, and update
949 // uses with PHI source register or the new registers.
950 MachineInstr
*MI
= &*I
++;
951 assert(!MI
->isBundle() && "Not expecting bundles before regalloc!");
952 duplicateInstruction(MI
, TailBB
, PrevBB
, LocalVRMap
, UsedByPhi
);
953 MI
->eraseFromParent();
955 appendCopies(PrevBB
, CopyInfos
, Copies
);
957 TII
->removeBranch(*PrevBB
);
958 // No PHIs to worry about, just splice the instructions over.
959 PrevBB
->splice(PrevBB
->end(), TailBB
, TailBB
->begin(), TailBB
->end());
961 PrevBB
->removeSuccessor(PrevBB
->succ_begin());
962 assert(PrevBB
->succ_empty());
963 PrevBB
->transferSuccessors(TailBB
);
965 // Update branches in PrevBB based on Tail's layout successor.
966 if (ShouldUpdateTerminators
)
967 PrevBB
->updateTerminator(TailBB
->getNextNode());
969 TDBBs
.push_back(PrevBB
);
973 // If this is after register allocation, there are no phis to fix.
977 // If we made no changes so far, we are safe.
981 // Handle the nasty case in that we duplicated a block that is part of a loop
982 // into some but not all of its predecessors. For example:
986 // if we duplicate 2 into 1 but not into 3, we end up with
987 // 12 -> 3 <-> 2 -> rest |
990 // If there was a "var = phi(1, 3)" in 2, it has to be ultimately replaced
991 // with a phi in 3 (which now dominates 2).
992 // What we do here is introduce a copy in 3 of the register defined by the
993 // phi, just like when we are duplicating 2 into 3, but we don't copy any
994 // real instructions or remove the 3 -> 2 edge from the phi in 2.
995 for (MachineBasicBlock
*PredBB
: Preds
) {
996 if (is_contained(TDBBs
, PredBB
))
1000 if (PredBB
->succ_size() != 1)
1003 DenseMap
<Register
, RegSubRegPair
> LocalVRMap
;
1004 SmallVector
<std::pair
<Register
, RegSubRegPair
>, 4> CopyInfos
;
1005 MachineBasicBlock::iterator I
= TailBB
->begin();
1006 // Process PHI instructions first.
1007 while (I
!= TailBB
->end() && I
->isPHI()) {
1008 // Replace the uses of the def of the PHI with the register coming
1010 MachineInstr
*MI
= &*I
++;
1011 processPHI(MI
, TailBB
, PredBB
, LocalVRMap
, CopyInfos
, UsedByPhi
, false);
1013 appendCopies(PredBB
, CopyInfos
, Copies
);
1019 /// At the end of the block \p MBB generate COPY instructions between registers
1020 /// described by \p CopyInfos. Append resulting instructions to \p Copies.
1021 void TailDuplicator::appendCopies(MachineBasicBlock
*MBB
,
1022 SmallVectorImpl
<std::pair
<Register
, RegSubRegPair
>> &CopyInfos
,
1023 SmallVectorImpl
<MachineInstr
*> &Copies
) {
1024 MachineBasicBlock::iterator Loc
= MBB
->getFirstTerminator();
1025 const MCInstrDesc
&CopyD
= TII
->get(TargetOpcode::COPY
);
1026 for (auto &CI
: CopyInfos
) {
1027 auto C
= BuildMI(*MBB
, Loc
, DebugLoc(), CopyD
, CI
.first
)
1028 .addReg(CI
.second
.Reg
, 0, CI
.second
.SubReg
);
1029 Copies
.push_back(C
);
1033 /// Remove the specified dead machine basic block from the function, updating
1035 void TailDuplicator::removeDeadBlock(
1036 MachineBasicBlock
*MBB
,
1037 function_ref
<void(MachineBasicBlock
*)> *RemovalCallback
) {
1038 assert(MBB
->pred_empty() && "MBB must be dead!");
1039 LLVM_DEBUG(dbgs() << "\nRemoving MBB: " << *MBB
);
1041 MachineFunction
*MF
= MBB
->getParent();
1042 // Update the call site info.
1043 for (const MachineInstr
&MI
: *MBB
)
1044 if (MI
.shouldUpdateCallSiteInfo())
1045 MF
->eraseCallSiteInfo(&MI
);
1047 if (RemovalCallback
)
1048 (*RemovalCallback
)(MBB
);
1050 // Remove all successors.
1051 while (!MBB
->succ_empty())
1052 MBB
->removeSuccessor(MBB
->succ_end() - 1);
1054 // Remove the block.
1055 MBB
->eraseFromParent();