1 //===- HexagonEarlyIfConv.cpp ---------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This implements a Hexagon-specific if-conversion pass that runs on the
11 // In SSA it is not straightforward to represent instructions that condi-
12 // tionally define registers, since a conditionally-defined register may
13 // only be used under the same condition on which the definition was based.
14 // To avoid complications of this nature, this patch will only generate
15 // predicated stores, and speculate other instructions from the "if-conver-
17 // The code will recognize CFG patterns where a block with a conditional
18 // branch "splits" into a "true block" and a "false block". Either of these
19 // could be omitted (in case of a triangle, for example).
20 // If after conversion of the side block(s) the CFG allows it, the resul-
21 // ting blocks may be merged. If the "join" block contained PHI nodes, they
22 // will be replaced with MUX (or MUX-like) instructions to maintain the
23 // semantics of the PHI.
27 // %40 = L2_loadrub_io killed %39, 1
28 // %41 = S2_tstbit_i killed %40, 0
29 // J2_jumpt killed %41, <%bb.5>, implicit dead %pc
30 // J2_jump <%bb.4>, implicit dead %pc
31 // Successors according to CFG: %bb.4(62) %bb.5(62)
33 // %bb.4: derived from LLVM BB %if.then
34 // Predecessors according to CFG: %bb.3
35 // %11 = A2_addp %6, %10
36 // S2_storerd_io %32, 16, %11
37 // Successors according to CFG: %bb.5
39 // %bb.5: derived from LLVM BB %if.end
40 // Predecessors according to CFG: %bb.3 %bb.4
41 // %12 = PHI %6, <%bb.3>, %11, <%bb.4>
42 // %13 = A2_addp %7, %12
43 // %42 = C2_cmpeqi %9, 10
44 // J2_jumpf killed %42, <%bb.3>, implicit dead %pc
45 // J2_jump <%bb.6>, implicit dead %pc
46 // Successors according to CFG: %bb.6(4) %bb.3(124)
50 // %40 = L2_loadrub_io killed %39, 1
51 // %41 = S2_tstbit_i killed %40, 0
52 // spec-> %11 = A2_addp %6, %10
53 // pred-> S2_pstorerdf_io %41, %32, 16, %11
54 // %46 = PS_pselect %41, %6, %11
55 // %13 = A2_addp %7, %46
56 // %42 = C2_cmpeqi %9, 10
57 // J2_jumpf killed %42, <%bb.3>, implicit dead %pc
58 // J2_jump <%bb.6>, implicit dead %pc
59 // Successors according to CFG: %bb.6 %bb.3
62 #include "HexagonInstrInfo.h"
63 #include "HexagonSubtarget.h"
64 #include "llvm/ADT/DenseSet.h"
65 #include "llvm/ADT/SmallVector.h"
66 #include "llvm/ADT/StringRef.h"
67 #include "llvm/ADT/iterator_range.h"
68 #include "llvm/CodeGen/MachineBasicBlock.h"
69 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
70 #include "llvm/CodeGen/MachineDominators.h"
71 #include "llvm/CodeGen/MachineFunction.h"
72 #include "llvm/CodeGen/MachineFunctionPass.h"
73 #include "llvm/CodeGen/MachineInstr.h"
74 #include "llvm/CodeGen/MachineInstrBuilder.h"
75 #include "llvm/CodeGen/MachineLoopInfo.h"
76 #include "llvm/CodeGen/MachineOperand.h"
77 #include "llvm/CodeGen/MachineRegisterInfo.h"
78 #include "llvm/CodeGen/TargetRegisterInfo.h"
79 #include "llvm/IR/DebugLoc.h"
80 #include "llvm/Pass.h"
81 #include "llvm/Support/BranchProbability.h"
82 #include "llvm/Support/CommandLine.h"
83 #include "llvm/Support/Compiler.h"
84 #include "llvm/Support/Debug.h"
85 #include "llvm/Support/ErrorHandling.h"
86 #include "llvm/Support/raw_ostream.h"
90 #define DEBUG_TYPE "hexagon-eif"
96 FunctionPass
*createHexagonEarlyIfConversion();
97 void initializeHexagonEarlyIfConversionPass(PassRegistry
& Registry
);
99 } // end namespace llvm
101 static cl::opt
<bool> EnableHexagonBP("enable-hexagon-br-prob", cl::Hidden
,
102 cl::init(true), cl::desc("Enable branch probability info"));
103 static cl::opt
<unsigned> SizeLimit("eif-limit", cl::init(6), cl::Hidden
,
104 cl::desc("Size limit in Hexagon early if-conversion"));
105 static cl::opt
<bool> SkipExitBranches("eif-no-loop-exit", cl::init(false),
106 cl::Hidden
, cl::desc("Do not convert branches that may exit the loop"));
111 PrintMB(const MachineBasicBlock
*B
) : MB(B
) {}
113 const MachineBasicBlock
*MB
;
115 raw_ostream
&operator<< (raw_ostream
&OS
, const PrintMB
&P
) {
117 return OS
<< "<none>";
118 return OS
<< '#' << P
.MB
->getNumber();
122 FlowPattern() = default;
123 FlowPattern(MachineBasicBlock
*B
, unsigned PR
, MachineBasicBlock
*TB
,
124 MachineBasicBlock
*FB
, MachineBasicBlock
*JB
)
125 : SplitB(B
), TrueB(TB
), FalseB(FB
), JoinB(JB
), PredR(PR
) {}
127 MachineBasicBlock
*SplitB
= nullptr;
128 MachineBasicBlock
*TrueB
= nullptr;
129 MachineBasicBlock
*FalseB
= nullptr;
130 MachineBasicBlock
*JoinB
= nullptr;
135 PrintFP(const FlowPattern
&P
, const TargetRegisterInfo
&T
)
138 const FlowPattern
&FP
;
139 const TargetRegisterInfo
&TRI
;
140 friend raw_ostream
&operator<< (raw_ostream
&OS
, const PrintFP
&P
);
142 raw_ostream
&operator<<(raw_ostream
&OS
,
143 const PrintFP
&P
) LLVM_ATTRIBUTE_UNUSED
;
144 raw_ostream
&operator<<(raw_ostream
&OS
, const PrintFP
&P
) {
145 OS
<< "{ SplitB:" << PrintMB(P
.FP
.SplitB
)
146 << ", PredR:" << printReg(P
.FP
.PredR
, &P
.TRI
)
147 << ", TrueB:" << PrintMB(P
.FP
.TrueB
)
148 << ", FalseB:" << PrintMB(P
.FP
.FalseB
)
149 << ", JoinB:" << PrintMB(P
.FP
.JoinB
) << " }";
153 class HexagonEarlyIfConversion
: public MachineFunctionPass
{
157 HexagonEarlyIfConversion() : MachineFunctionPass(ID
) {}
159 StringRef
getPassName() const override
{
160 return "Hexagon early if conversion";
163 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
164 AU
.addRequired
<MachineBranchProbabilityInfo
>();
165 AU
.addRequired
<MachineDominatorTree
>();
166 AU
.addPreserved
<MachineDominatorTree
>();
167 AU
.addRequired
<MachineLoopInfo
>();
168 MachineFunctionPass::getAnalysisUsage(AU
);
171 bool runOnMachineFunction(MachineFunction
&MF
) override
;
174 using BlockSetType
= DenseSet
<MachineBasicBlock
*>;
176 bool isPreheader(const MachineBasicBlock
*B
) const;
177 bool matchFlowPattern(MachineBasicBlock
*B
, MachineLoop
*L
,
179 bool visitBlock(MachineBasicBlock
*B
, MachineLoop
*L
);
180 bool visitLoop(MachineLoop
*L
);
182 bool hasEHLabel(const MachineBasicBlock
*B
) const;
183 bool hasUncondBranch(const MachineBasicBlock
*B
) const;
184 bool isValidCandidate(const MachineBasicBlock
*B
) const;
185 bool usesUndefVReg(const MachineInstr
*MI
) const;
186 bool isValid(const FlowPattern
&FP
) const;
187 unsigned countPredicateDefs(const MachineBasicBlock
*B
) const;
188 unsigned computePhiCost(const MachineBasicBlock
*B
,
189 const FlowPattern
&FP
) const;
190 bool isProfitable(const FlowPattern
&FP
) const;
191 bool isPredicableStore(const MachineInstr
*MI
) const;
192 bool isSafeToSpeculate(const MachineInstr
*MI
) const;
193 bool isPredicate(unsigned R
) const;
195 unsigned getCondStoreOpcode(unsigned Opc
, bool IfTrue
) const;
196 void predicateInstr(MachineBasicBlock
*ToB
, MachineBasicBlock::iterator At
,
197 MachineInstr
*MI
, unsigned PredR
, bool IfTrue
);
198 void predicateBlockNB(MachineBasicBlock
*ToB
,
199 MachineBasicBlock::iterator At
, MachineBasicBlock
*FromB
,
200 unsigned PredR
, bool IfTrue
);
202 unsigned buildMux(MachineBasicBlock
*B
, MachineBasicBlock::iterator At
,
203 const TargetRegisterClass
*DRC
, unsigned PredR
, unsigned TR
,
204 unsigned TSR
, unsigned FR
, unsigned FSR
);
205 void updatePhiNodes(MachineBasicBlock
*WhereB
, const FlowPattern
&FP
);
206 void convert(const FlowPattern
&FP
);
208 void removeBlock(MachineBasicBlock
*B
);
209 void eliminatePhis(MachineBasicBlock
*B
);
210 void mergeBlocks(MachineBasicBlock
*PredB
, MachineBasicBlock
*SuccB
);
211 void simplifyFlowGraph(const FlowPattern
&FP
);
213 const HexagonInstrInfo
*HII
= nullptr;
214 const TargetRegisterInfo
*TRI
= nullptr;
215 MachineFunction
*MFN
= nullptr;
216 MachineRegisterInfo
*MRI
= nullptr;
217 MachineDominatorTree
*MDT
= nullptr;
218 MachineLoopInfo
*MLI
= nullptr;
219 BlockSetType Deleted
;
220 const MachineBranchProbabilityInfo
*MBPI
;
223 } // end anonymous namespace
225 char HexagonEarlyIfConversion::ID
= 0;
227 INITIALIZE_PASS(HexagonEarlyIfConversion
, "hexagon-early-if",
228 "Hexagon early if conversion", false, false)
230 bool HexagonEarlyIfConversion::isPreheader(const MachineBasicBlock
*B
) const {
231 if (B
->succ_size() != 1)
233 MachineBasicBlock
*SB
= *B
->succ_begin();
234 MachineLoop
*L
= MLI
->getLoopFor(SB
);
235 return L
&& SB
== L
->getHeader() && MDT
->dominates(B
, SB
);
238 bool HexagonEarlyIfConversion::matchFlowPattern(MachineBasicBlock
*B
,
239 MachineLoop
*L
, FlowPattern
&FP
) {
240 LLVM_DEBUG(dbgs() << "Checking flow pattern at " << printMBBReference(*B
)
243 // Interested only in conditional branches, no .new, no new-value, etc.
244 // Check the terminators directly, it's easier than handling all responses
245 // from analyzeBranch.
246 MachineBasicBlock
*TB
= nullptr, *FB
= nullptr;
247 MachineBasicBlock::const_iterator T1I
= B
->getFirstTerminator();
250 unsigned Opc
= T1I
->getOpcode();
251 if (Opc
!= Hexagon::J2_jumpt
&& Opc
!= Hexagon::J2_jumpf
)
253 Register PredR
= T1I
->getOperand(0).getReg();
255 // Get the layout successor, or 0 if B does not have one.
256 MachineFunction::iterator NextBI
= std::next(MachineFunction::iterator(B
));
257 MachineBasicBlock
*NextB
= (NextBI
!= MFN
->end()) ? &*NextBI
: nullptr;
259 MachineBasicBlock
*T1B
= T1I
->getOperand(1).getMBB();
260 MachineBasicBlock::const_iterator T2I
= std::next(T1I
);
261 // The second terminator should be an unconditional branch.
262 assert(T2I
== B
->end() || T2I
->getOpcode() == Hexagon::J2_jump
);
263 MachineBasicBlock
*T2B
= (T2I
== B
->end()) ? NextB
264 : T2I
->getOperand(0).getMBB();
266 // XXX merge if T1B == NextB, or convert branch to unconditional.
267 // mark as diamond with both sides equal?
271 // Record the true/false blocks in such a way that "true" means "if (PredR)",
272 // and "false" means "if (!PredR)".
273 if (Opc
== Hexagon::J2_jumpt
)
278 if (!MDT
->properlyDominates(B
, TB
) || !MDT
->properlyDominates(B
, FB
))
281 // Detect triangle first. In case of a triangle, one of the blocks TB/FB
282 // can fall through into the other, in other words, it will be executed
283 // in both cases. We only want to predicate the block that is executed
285 unsigned TNP
= TB
->pred_size(), FNP
= FB
->pred_size();
286 unsigned TNS
= TB
->succ_size(), FNS
= FB
->succ_size();
288 // A block is predicable if it has one predecessor (it must be B), and
289 // it has a single successor. In fact, the block has to end either with
290 // an unconditional branch (which can be predicated), or with a fall-
292 // Also, skip blocks that do not belong to the same loop.
293 bool TOk
= (TNP
== 1 && TNS
== 1 && MLI
->getLoopFor(TB
) == L
);
294 bool FOk
= (FNP
== 1 && FNS
== 1 && MLI
->getLoopFor(FB
) == L
);
296 // If requested (via an option), do not consider branches where the
297 // true and false targets do not belong to the same loop.
298 if (SkipExitBranches
&& MLI
->getLoopFor(TB
) != MLI
->getLoopFor(FB
))
301 // If neither is predicable, there is nothing interesting.
305 MachineBasicBlock
*TSB
= (TNS
> 0) ? *TB
->succ_begin() : nullptr;
306 MachineBasicBlock
*FSB
= (FNS
> 0) ? *FB
->succ_begin() : nullptr;
307 MachineBasicBlock
*JB
= nullptr;
313 // Diamond: "if (P) then TB; else FB;".
321 // !TOk && FOk (at least one must be true by now).
326 // Don't try to predicate loop preheaders.
327 if ((TB
&& isPreheader(TB
)) || (FB
&& isPreheader(FB
))) {
328 LLVM_DEBUG(dbgs() << "One of blocks " << PrintMB(TB
) << ", " << PrintMB(FB
)
329 << " is a loop preheader. Skipping.\n");
333 FP
= FlowPattern(B
, PredR
, TB
, FB
, JB
);
334 LLVM_DEBUG(dbgs() << "Detected " << PrintFP(FP
, *TRI
) << "\n");
338 // KLUDGE: HexagonInstrInfo::analyzeBranch won't work on a block that
339 // contains EH_LABEL.
340 bool HexagonEarlyIfConversion::hasEHLabel(const MachineBasicBlock
*B
) const {
347 // KLUDGE: HexagonInstrInfo::analyzeBranch may be unable to recognize
348 // that a block can never fall-through.
349 bool HexagonEarlyIfConversion::hasUncondBranch(const MachineBasicBlock
*B
)
351 MachineBasicBlock::const_iterator I
= B
->getFirstTerminator(), E
= B
->end();
360 bool HexagonEarlyIfConversion::isValidCandidate(const MachineBasicBlock
*B
)
364 if (B
->isEHPad() || B
->hasAddressTaken())
366 if (B
->succ_size() == 0)
369 for (auto &MI
: *B
) {
370 if (MI
.isDebugInstr())
372 if (MI
.isConditionalBranch())
374 unsigned Opc
= MI
.getOpcode();
375 bool IsJMP
= (Opc
== Hexagon::J2_jump
);
376 if (!isPredicableStore(&MI
) && !IsJMP
&& !isSafeToSpeculate(&MI
))
378 // Look for predicate registers defined by this instruction. It's ok
379 // to speculate such an instruction, but the predicate register cannot
380 // be used outside of this block (or else it won't be possible to
381 // update the use of it after predication). PHI uses will be updated
382 // to use a result of a MUX, and a MUX cannot be created for predicate
384 for (const MachineOperand
&MO
: MI
.operands()) {
385 if (!MO
.isReg() || !MO
.isDef())
387 Register R
= MO
.getReg();
388 if (!Register::isVirtualRegister(R
))
392 for (auto U
= MRI
->use_begin(R
); U
!= MRI
->use_end(); ++U
)
393 if (U
->getParent()->isPHI())
400 bool HexagonEarlyIfConversion::usesUndefVReg(const MachineInstr
*MI
) const {
401 for (const MachineOperand
&MO
: MI
->operands()) {
402 if (!MO
.isReg() || !MO
.isUse())
404 Register R
= MO
.getReg();
405 if (!Register::isVirtualRegister(R
))
407 const MachineInstr
*DefI
= MRI
->getVRegDef(R
);
408 // "Undefined" virtual registers are actually defined via IMPLICIT_DEF.
409 assert(DefI
&& "Expecting a reaching def in MRI");
410 if (DefI
->isImplicitDef())
416 bool HexagonEarlyIfConversion::isValid(const FlowPattern
&FP
) const {
417 if (hasEHLabel(FP
.SplitB
)) // KLUDGE: see function definition
419 if (FP
.TrueB
&& !isValidCandidate(FP
.TrueB
))
421 if (FP
.FalseB
&& !isValidCandidate(FP
.FalseB
))
423 // Check the PHIs in the join block. If any of them use a register
424 // that is defined as IMPLICIT_DEF, do not convert this. This can
425 // legitimately happen if one side of the split never executes, but
426 // the compiler is unable to prove it. That side may then seem to
427 // provide an "undef" value to the join block, however it will never
428 // execute at run-time. If we convert this case, the "undef" will
429 // be used in a MUX instruction, and that may seem like actually
430 // using an undefined value to other optimizations. This could lead
431 // to trouble further down the optimization stream, cause assertions
434 const MachineBasicBlock
&B
= *FP
.JoinB
;
438 if (usesUndefVReg(&MI
))
440 Register DefR
= MI
.getOperand(0).getReg();
441 if (isPredicate(DefR
))
448 unsigned HexagonEarlyIfConversion::computePhiCost(const MachineBasicBlock
*B
,
449 const FlowPattern
&FP
) const {
450 if (B
->pred_size() < 2)
454 for (const MachineInstr
&MI
: *B
) {
457 // If both incoming blocks are one of the TrueB/FalseB/SplitB, then
458 // a MUX may be needed. Otherwise the PHI will need to be updated at
460 // Find the interesting PHI operands for further checks.
461 SmallVector
<unsigned,2> Inc
;
462 for (unsigned i
= 1, e
= MI
.getNumOperands(); i
!= e
; i
+= 2) {
463 const MachineBasicBlock
*BB
= MI
.getOperand(i
+1).getMBB();
464 if (BB
== FP
.SplitB
|| BB
== FP
.TrueB
|| BB
== FP
.FalseB
)
467 assert(Inc
.size() <= 2);
471 const MachineOperand
&RA
= MI
.getOperand(1);
472 const MachineOperand
&RB
= MI
.getOperand(3);
473 assert(RA
.isReg() && RB
.isReg());
474 // Must have a MUX if the phi uses a subregister.
475 if (RA
.getSubReg() != 0 || RB
.getSubReg() != 0) {
479 const MachineInstr
*Def1
= MRI
->getVRegDef(RA
.getReg());
480 const MachineInstr
*Def3
= MRI
->getVRegDef(RB
.getReg());
481 if (!HII
->isPredicable(*Def1
) || !HII
->isPredicable(*Def3
))
487 unsigned HexagonEarlyIfConversion::countPredicateDefs(
488 const MachineBasicBlock
*B
) const {
489 unsigned PredDefs
= 0;
490 for (auto &MI
: *B
) {
491 for (const MachineOperand
&MO
: MI
.operands()) {
492 if (!MO
.isReg() || !MO
.isDef())
494 Register R
= MO
.getReg();
495 if (!Register::isVirtualRegister(R
))
504 bool HexagonEarlyIfConversion::isProfitable(const FlowPattern
&FP
) const {
505 BranchProbability
JumpProb(1, 10);
506 BranchProbability
Prob(9, 10);
507 if (MBPI
&& FP
.TrueB
&& !FP
.FalseB
&&
508 (MBPI
->getEdgeProbability(FP
.SplitB
, FP
.TrueB
) < JumpProb
||
509 MBPI
->getEdgeProbability(FP
.SplitB
, FP
.TrueB
) > Prob
))
512 if (MBPI
&& !FP
.TrueB
&& FP
.FalseB
&&
513 (MBPI
->getEdgeProbability(FP
.SplitB
, FP
.FalseB
) < JumpProb
||
514 MBPI
->getEdgeProbability(FP
.SplitB
, FP
.FalseB
) > Prob
))
517 if (FP
.TrueB
&& FP
.FalseB
) {
518 // Do not IfCovert if the branch is one sided.
520 if (MBPI
->getEdgeProbability(FP
.SplitB
, FP
.TrueB
) > Prob
)
522 if (MBPI
->getEdgeProbability(FP
.SplitB
, FP
.FalseB
) > Prob
)
526 // If both sides are predicable, convert them if they join, and the
527 // join block has no other predecessors.
528 MachineBasicBlock
*TSB
= *FP
.TrueB
->succ_begin();
529 MachineBasicBlock
*FSB
= *FP
.FalseB
->succ_begin();
532 if (TSB
->pred_size() != 2)
536 // Calculate the total size of the predicated blocks.
537 // Assume instruction counts without branches to be the approximation of
538 // the code size. If the predicated blocks are smaller than a packet size,
539 // approximate the spare room in the packet that could be filled with the
540 // predicated/speculated instructions.
541 auto TotalCount
= [] (const MachineBasicBlock
*B
, unsigned &Spare
) {
544 unsigned T
= std::count_if(B
->begin(), B
->getFirstTerminator(),
545 [](const MachineInstr
&MI
) {
546 return !MI
.isMetaInstruction();
548 if (T
< HEXAGON_PACKET_SIZE
)
549 Spare
+= HEXAGON_PACKET_SIZE
-T
;
553 unsigned TotalIn
= TotalCount(FP
.TrueB
, Spare
) + TotalCount(FP
.FalseB
, Spare
);
555 dbgs() << "Total number of instructions to be predicated/speculated: "
556 << TotalIn
<< ", spare room: " << Spare
<< "\n");
557 if (TotalIn
>= SizeLimit
+Spare
)
560 // Count the number of PHI nodes that will need to be updated (converted
561 // to MUX). Those can be later converted to predicated instructions, so
562 // they aren't always adding extra cost.
563 // KLUDGE: Also, count the number of predicate register definitions in
564 // each block. The scheduler may increase the pressure of these and cause
565 // expensive spills (e.g. bitmnp01).
566 unsigned TotalPh
= 0;
567 unsigned PredDefs
= countPredicateDefs(FP
.SplitB
);
569 TotalPh
= computePhiCost(FP
.JoinB
, FP
);
570 PredDefs
+= countPredicateDefs(FP
.JoinB
);
572 if (FP
.TrueB
&& FP
.TrueB
->succ_size() > 0) {
573 MachineBasicBlock
*SB
= *FP
.TrueB
->succ_begin();
574 TotalPh
+= computePhiCost(SB
, FP
);
575 PredDefs
+= countPredicateDefs(SB
);
577 if (FP
.FalseB
&& FP
.FalseB
->succ_size() > 0) {
578 MachineBasicBlock
*SB
= *FP
.FalseB
->succ_begin();
579 TotalPh
+= computePhiCost(SB
, FP
);
580 PredDefs
+= countPredicateDefs(SB
);
583 LLVM_DEBUG(dbgs() << "Total number of extra muxes from converted phis: "
585 if (TotalIn
+TotalPh
>= SizeLimit
+Spare
)
588 LLVM_DEBUG(dbgs() << "Total number of predicate registers: " << PredDefs
596 bool HexagonEarlyIfConversion::visitBlock(MachineBasicBlock
*B
,
598 bool Changed
= false;
600 // Visit all dominated blocks from the same loop first, then process B.
601 MachineDomTreeNode
*N
= MDT
->getNode(B
);
603 using GTN
= GraphTraits
<MachineDomTreeNode
*>;
605 // We will change CFG/DT during this traversal, so take precautions to
606 // avoid problems related to invalidated iterators. In fact, processing
607 // a child C of B cannot cause another child to be removed, but it can
608 // cause a new child to be added (which was a child of C before C itself
609 // was removed. This new child C, however, would have been processed
610 // prior to processing B, so there is no need to process it again.
611 // Simply keep a list of children of B, and traverse that list.
612 using DTNodeVectType
= SmallVector
<MachineDomTreeNode
*, 4>;
613 DTNodeVectType
Cn(GTN::child_begin(N
), GTN::child_end(N
));
614 for (DTNodeVectType::iterator I
= Cn
.begin(), E
= Cn
.end(); I
!= E
; ++I
) {
615 MachineBasicBlock
*SB
= (*I
)->getBlock();
616 if (!Deleted
.count(SB
))
617 Changed
|= visitBlock(SB
, L
);
619 // When walking down the dominator tree, we want to traverse through
620 // blocks from nested (other) loops, because they can dominate blocks
621 // that are in L. Skip the non-L blocks only after the tree traversal.
622 if (MLI
->getLoopFor(B
) != L
)
626 if (!matchFlowPattern(B
, L
, FP
))
630 LLVM_DEBUG(dbgs() << "Conversion is not valid\n");
633 if (!isProfitable(FP
)) {
634 LLVM_DEBUG(dbgs() << "Conversion is not profitable\n");
639 simplifyFlowGraph(FP
);
643 bool HexagonEarlyIfConversion::visitLoop(MachineLoop
*L
) {
644 MachineBasicBlock
*HB
= L
? L
->getHeader() : nullptr;
645 LLVM_DEBUG((L
? dbgs() << "Visiting loop H:" << PrintMB(HB
)
646 : dbgs() << "Visiting function")
648 bool Changed
= false;
650 for (MachineLoop::iterator I
= L
->begin(), E
= L
->end(); I
!= E
; ++I
)
651 Changed
|= visitLoop(*I
);
654 MachineBasicBlock
*EntryB
= GraphTraits
<MachineFunction
*>::getEntryNode(MFN
);
655 Changed
|= visitBlock(L
? HB
: EntryB
, L
);
659 bool HexagonEarlyIfConversion::isPredicableStore(const MachineInstr
*MI
)
661 // HexagonInstrInfo::isPredicable will consider these stores are non-
662 // -predicable if the offset would become constant-extended after
664 unsigned Opc
= MI
->getOpcode();
666 case Hexagon::S2_storerb_io
:
667 case Hexagon::S2_storerbnew_io
:
668 case Hexagon::S2_storerh_io
:
669 case Hexagon::S2_storerhnew_io
:
670 case Hexagon::S2_storeri_io
:
671 case Hexagon::S2_storerinew_io
:
672 case Hexagon::S2_storerd_io
:
673 case Hexagon::S4_storeirb_io
:
674 case Hexagon::S4_storeirh_io
:
675 case Hexagon::S4_storeiri_io
:
679 // TargetInstrInfo::isPredicable takes a non-const pointer.
680 return MI
->mayStore() && HII
->isPredicable(const_cast<MachineInstr
&>(*MI
));
683 bool HexagonEarlyIfConversion::isSafeToSpeculate(const MachineInstr
*MI
)
685 if (MI
->mayLoad() || MI
->mayStore())
687 if (MI
->isCall() || MI
->isBarrier() || MI
->isBranch())
689 if (MI
->hasUnmodeledSideEffects())
691 if (MI
->getOpcode() == TargetOpcode::LIFETIME_END
)
697 bool HexagonEarlyIfConversion::isPredicate(unsigned R
) const {
698 const TargetRegisterClass
*RC
= MRI
->getRegClass(R
);
699 return RC
== &Hexagon::PredRegsRegClass
||
700 RC
== &Hexagon::HvxQRRegClass
;
703 unsigned HexagonEarlyIfConversion::getCondStoreOpcode(unsigned Opc
,
705 return HII
->getCondOpcode(Opc
, !IfTrue
);
708 void HexagonEarlyIfConversion::predicateInstr(MachineBasicBlock
*ToB
,
709 MachineBasicBlock::iterator At
, MachineInstr
*MI
,
710 unsigned PredR
, bool IfTrue
) {
712 if (At
!= ToB
->end())
713 DL
= At
->getDebugLoc();
714 else if (!ToB
->empty())
715 DL
= ToB
->back().getDebugLoc();
717 unsigned Opc
= MI
->getOpcode();
719 if (isPredicableStore(MI
)) {
720 unsigned COpc
= getCondStoreOpcode(Opc
, IfTrue
);
722 MachineInstrBuilder MIB
= BuildMI(*ToB
, At
, DL
, HII
->get(COpc
));
723 MachineInstr::mop_iterator MOI
= MI
->operands_begin();
724 if (HII
->isPostIncrement(*MI
)) {
729 for (const MachineOperand
&MO
: make_range(MOI
, MI
->operands_end()))
732 // Set memory references.
733 MIB
.cloneMemRefs(*MI
);
735 MI
->eraseFromParent();
739 if (Opc
== Hexagon::J2_jump
) {
740 MachineBasicBlock
*TB
= MI
->getOperand(0).getMBB();
741 const MCInstrDesc
&D
= HII
->get(IfTrue
? Hexagon::J2_jumpt
742 : Hexagon::J2_jumpf
);
743 BuildMI(*ToB
, At
, DL
, D
)
746 MI
->eraseFromParent();
750 // Print the offending instruction unconditionally as we are about to
753 llvm_unreachable("Unexpected instruction");
756 // Predicate/speculate non-branch instructions from FromB into block ToB.
757 // Leave the branches alone, they will be handled later. Btw, at this point
758 // FromB should have at most one branch, and it should be unconditional.
759 void HexagonEarlyIfConversion::predicateBlockNB(MachineBasicBlock
*ToB
,
760 MachineBasicBlock::iterator At
, MachineBasicBlock
*FromB
,
761 unsigned PredR
, bool IfTrue
) {
762 LLVM_DEBUG(dbgs() << "Predicating block " << PrintMB(FromB
) << "\n");
763 MachineBasicBlock::iterator End
= FromB
->getFirstTerminator();
764 MachineBasicBlock::iterator I
, NextI
;
766 for (I
= FromB
->begin(); I
!= End
; I
= NextI
) {
768 NextI
= std::next(I
);
769 if (isSafeToSpeculate(&*I
))
770 ToB
->splice(At
, FromB
, I
);
772 predicateInstr(ToB
, At
, &*I
, PredR
, IfTrue
);
776 unsigned HexagonEarlyIfConversion::buildMux(MachineBasicBlock
*B
,
777 MachineBasicBlock::iterator At
, const TargetRegisterClass
*DRC
,
778 unsigned PredR
, unsigned TR
, unsigned TSR
, unsigned FR
, unsigned FSR
) {
780 switch (DRC
->getID()) {
781 case Hexagon::IntRegsRegClassID
:
782 case Hexagon::IntRegsLow8RegClassID
:
783 Opc
= Hexagon::C2_mux
;
785 case Hexagon::DoubleRegsRegClassID
:
786 case Hexagon::GeneralDoubleLow8RegsRegClassID
:
787 Opc
= Hexagon::PS_pselect
;
789 case Hexagon::HvxVRRegClassID
:
790 Opc
= Hexagon::PS_vselect
;
792 case Hexagon::HvxWRRegClassID
:
793 Opc
= Hexagon::PS_wselect
;
796 llvm_unreachable("unexpected register type");
798 const MCInstrDesc
&D
= HII
->get(Opc
);
800 DebugLoc DL
= B
->findBranchDebugLoc();
801 Register MuxR
= MRI
->createVirtualRegister(DRC
);
802 BuildMI(*B
, At
, DL
, D
, MuxR
)
809 void HexagonEarlyIfConversion::updatePhiNodes(MachineBasicBlock
*WhereB
,
810 const FlowPattern
&FP
) {
811 // Visit all PHI nodes in the WhereB block and generate MUX instructions
812 // in the split block. Update the PHI nodes with the values of the MUX.
813 auto NonPHI
= WhereB
->getFirstNonPHI();
814 for (auto I
= WhereB
->begin(); I
!= NonPHI
; ++I
) {
815 MachineInstr
*PN
= &*I
;
816 // Registers and subregisters corresponding to TrueB, FalseB and SplitB.
817 unsigned TR
= 0, TSR
= 0, FR
= 0, FSR
= 0, SR
= 0, SSR
= 0;
818 for (int i
= PN
->getNumOperands()-2; i
> 0; i
-= 2) {
819 const MachineOperand
&RO
= PN
->getOperand(i
), &BO
= PN
->getOperand(i
+1);
820 if (BO
.getMBB() == FP
.SplitB
)
821 SR
= RO
.getReg(), SSR
= RO
.getSubReg();
822 else if (BO
.getMBB() == FP
.TrueB
)
823 TR
= RO
.getReg(), TSR
= RO
.getSubReg();
824 else if (BO
.getMBB() == FP
.FalseB
)
825 FR
= RO
.getReg(), FSR
= RO
.getSubReg();
828 PN
->RemoveOperand(i
+1);
829 PN
->RemoveOperand(i
);
837 unsigned MuxR
= 0, MuxSR
= 0;
840 Register DR
= PN
->getOperand(0).getReg();
841 const TargetRegisterClass
*RC
= MRI
->getRegClass(DR
);
842 MuxR
= buildMux(FP
.SplitB
, FP
.SplitB
->getFirstTerminator(), RC
,
843 FP
.PredR
, TR
, TSR
, FR
, FSR
);
852 PN
->addOperand(MachineOperand::CreateReg(MuxR
, false, false, false, false,
853 false, false, MuxSR
));
854 PN
->addOperand(MachineOperand::CreateMBB(FP
.SplitB
));
858 void HexagonEarlyIfConversion::convert(const FlowPattern
&FP
) {
859 MachineBasicBlock
*TSB
= nullptr, *FSB
= nullptr;
860 MachineBasicBlock::iterator OldTI
= FP
.SplitB
->getFirstTerminator();
861 assert(OldTI
!= FP
.SplitB
->end());
862 DebugLoc DL
= OldTI
->getDebugLoc();
865 TSB
= *FP
.TrueB
->succ_begin();
866 predicateBlockNB(FP
.SplitB
, OldTI
, FP
.TrueB
, FP
.PredR
, true);
869 FSB
= *FP
.FalseB
->succ_begin();
870 MachineBasicBlock::iterator At
= FP
.SplitB
->getFirstTerminator();
871 predicateBlockNB(FP
.SplitB
, At
, FP
.FalseB
, FP
.PredR
, false);
874 // Regenerate new terminators in the split block and update the successors.
875 // First, remember any information that may be needed later and remove the
876 // existing terminators/successors from the split block.
877 MachineBasicBlock
*SSB
= nullptr;
878 FP
.SplitB
->erase(OldTI
, FP
.SplitB
->end());
879 while (FP
.SplitB
->succ_size() > 0) {
880 MachineBasicBlock
*T
= *FP
.SplitB
->succ_begin();
881 // It's possible that the split block had a successor that is not a pre-
882 // dicated block. This could only happen if there was only one block to
883 // be predicated. Example:
885 // if (p) jump true_b
889 // unrelated2_b: ; can have other predecessors, so it's not "false_b"
891 // true_b: ; only reachable from split_b, can be predicated
894 // Find this successor (SSB) if it exists.
895 if (T
!= FP
.TrueB
&& T
!= FP
.FalseB
) {
899 FP
.SplitB
->removeSuccessor(FP
.SplitB
->succ_begin());
902 // Insert new branches and update the successors of the split block. This
903 // may create unconditional branches to the layout successor, etc., but
904 // that will be cleaned up later. For now, make sure that correct code is
907 assert(!SSB
|| SSB
== FP
.JoinB
);
908 BuildMI(*FP
.SplitB
, FP
.SplitB
->end(), DL
, HII
->get(Hexagon::J2_jump
))
910 FP
.SplitB
->addSuccessor(FP
.JoinB
);
912 bool HasBranch
= false;
914 BuildMI(*FP
.SplitB
, FP
.SplitB
->end(), DL
, HII
->get(Hexagon::J2_jumpt
))
917 FP
.SplitB
->addSuccessor(TSB
);
921 const MCInstrDesc
&D
= HasBranch
? HII
->get(Hexagon::J2_jump
)
922 : HII
->get(Hexagon::J2_jumpf
);
923 MachineInstrBuilder MIB
= BuildMI(*FP
.SplitB
, FP
.SplitB
->end(), DL
, D
);
925 MIB
.addReg(FP
.PredR
);
927 FP
.SplitB
->addSuccessor(FSB
);
930 // This cannot happen if both TSB and FSB are set. [TF]SB are the
931 // successor blocks of the TrueB and FalseB (or null of the TrueB
932 // or FalseB block is null). SSB is the potential successor block
933 // of the SplitB that is neither TrueB nor FalseB.
934 BuildMI(*FP
.SplitB
, FP
.SplitB
->end(), DL
, HII
->get(Hexagon::J2_jump
))
936 FP
.SplitB
->addSuccessor(SSB
);
940 // What is left to do is to update the PHI nodes that could have entries
941 // referring to predicated blocks.
943 updatePhiNodes(FP
.JoinB
, FP
);
946 updatePhiNodes(TSB
, FP
);
948 updatePhiNodes(FSB
, FP
);
949 // Nothing to update in SSB, since SSB's predecessors haven't changed.
953 void HexagonEarlyIfConversion::removeBlock(MachineBasicBlock
*B
) {
954 LLVM_DEBUG(dbgs() << "Removing block " << PrintMB(B
) << "\n");
956 // Transfer the immediate dominator information from B to its descendants.
957 MachineDomTreeNode
*N
= MDT
->getNode(B
);
958 MachineDomTreeNode
*IDN
= N
->getIDom();
960 MachineBasicBlock
*IDB
= IDN
->getBlock();
962 using GTN
= GraphTraits
<MachineDomTreeNode
*>;
963 using DTNodeVectType
= SmallVector
<MachineDomTreeNode
*, 4>;
965 DTNodeVectType
Cn(GTN::child_begin(N
), GTN::child_end(N
));
966 for (DTNodeVectType::iterator I
= Cn
.begin(), E
= Cn
.end(); I
!= E
; ++I
) {
967 MachineBasicBlock
*SB
= (*I
)->getBlock();
968 MDT
->changeImmediateDominator(SB
, IDB
);
972 while (B
->succ_size() > 0)
973 B
->removeSuccessor(B
->succ_begin());
975 for (auto I
= B
->pred_begin(), E
= B
->pred_end(); I
!= E
; ++I
)
976 (*I
)->removeSuccessor(B
, true);
980 MFN
->erase(B
->getIterator());
983 void HexagonEarlyIfConversion::eliminatePhis(MachineBasicBlock
*B
) {
984 LLVM_DEBUG(dbgs() << "Removing phi nodes from block " << PrintMB(B
) << "\n");
985 MachineBasicBlock::iterator I
, NextI
, NonPHI
= B
->getFirstNonPHI();
986 for (I
= B
->begin(); I
!= NonPHI
; I
= NextI
) {
987 NextI
= std::next(I
);
988 MachineInstr
*PN
= &*I
;
989 assert(PN
->getNumOperands() == 3 && "Invalid phi node");
990 MachineOperand
&UO
= PN
->getOperand(1);
991 Register UseR
= UO
.getReg(), UseSR
= UO
.getSubReg();
992 Register DefR
= PN
->getOperand(0).getReg();
993 unsigned NewR
= UseR
;
995 // MRI.replaceVregUsesWith does not allow to update the subregister,
996 // so instead of doing the use-iteration here, create a copy into a
997 // "non-subregistered" register.
998 const DebugLoc
&DL
= PN
->getDebugLoc();
999 const TargetRegisterClass
*RC
= MRI
->getRegClass(DefR
);
1000 NewR
= MRI
->createVirtualRegister(RC
);
1001 NonPHI
= BuildMI(*B
, NonPHI
, DL
, HII
->get(TargetOpcode::COPY
), NewR
)
1002 .addReg(UseR
, 0, UseSR
);
1004 MRI
->replaceRegWith(DefR
, NewR
);
1009 void HexagonEarlyIfConversion::mergeBlocks(MachineBasicBlock
*PredB
,
1010 MachineBasicBlock
*SuccB
) {
1011 LLVM_DEBUG(dbgs() << "Merging blocks " << PrintMB(PredB
) << " and "
1012 << PrintMB(SuccB
) << "\n");
1013 bool TermOk
= hasUncondBranch(SuccB
);
1014 eliminatePhis(SuccB
);
1015 HII
->removeBranch(*PredB
);
1016 PredB
->removeSuccessor(SuccB
);
1017 PredB
->splice(PredB
->end(), SuccB
, SuccB
->begin(), SuccB
->end());
1018 PredB
->transferSuccessorsAndUpdatePHIs(SuccB
);
1021 PredB
->updateTerminator();
1024 void HexagonEarlyIfConversion::simplifyFlowGraph(const FlowPattern
&FP
) {
1026 removeBlock(FP
.TrueB
);
1028 removeBlock(FP
.FalseB
);
1030 FP
.SplitB
->updateTerminator();
1031 if (FP
.SplitB
->succ_size() != 1)
1034 MachineBasicBlock
*SB
= *FP
.SplitB
->succ_begin();
1035 if (SB
->pred_size() != 1)
1038 // By now, the split block has only one successor (SB), and SB has only
1039 // one predecessor. We can try to merge them. We will need to update ter-
1040 // minators in FP.Split+SB, and that requires working analyzeBranch, which
1041 // fails on Hexagon for blocks that have EH_LABELs. However, if SB ends
1042 // with an unconditional branch, we won't need to touch the terminators.
1043 if (!hasEHLabel(SB
) || hasUncondBranch(SB
))
1044 mergeBlocks(FP
.SplitB
, SB
);
1047 bool HexagonEarlyIfConversion::runOnMachineFunction(MachineFunction
&MF
) {
1048 if (skipFunction(MF
.getFunction()))
1051 auto &ST
= MF
.getSubtarget
<HexagonSubtarget
>();
1052 HII
= ST
.getInstrInfo();
1053 TRI
= ST
.getRegisterInfo();
1055 MRI
= &MF
.getRegInfo();
1056 MDT
= &getAnalysis
<MachineDominatorTree
>();
1057 MLI
= &getAnalysis
<MachineLoopInfo
>();
1058 MBPI
= EnableHexagonBP
? &getAnalysis
<MachineBranchProbabilityInfo
>() :
1062 bool Changed
= false;
1064 for (MachineLoopInfo::iterator I
= MLI
->begin(), E
= MLI
->end(); I
!= E
; ++I
)
1065 Changed
|= visitLoop(*I
);
1066 Changed
|= visitLoop(nullptr);
1071 //===----------------------------------------------------------------------===//
1072 // Public Constructor Functions
1073 //===----------------------------------------------------------------------===//
1074 FunctionPass
*llvm::createHexagonEarlyIfConversion() {
1075 return new HexagonEarlyIfConversion();