1 //===- HexagonEarlyIfConv.cpp ---------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This implements a Hexagon-specific if-conversion pass that runs on the
11 // In SSA it is not straightforward to represent instructions that condi-
12 // tionally define registers, since a conditionally-defined register may
13 // only be used under the same condition on which the definition was based.
14 // To avoid complications of this nature, this patch will only generate
15 // predicated stores, and speculate other instructions from the "if-conver-
17 // The code will recognize CFG patterns where a block with a conditional
18 // branch "splits" into a "true block" and a "false block". Either of these
19 // could be omitted (in case of a triangle, for example).
20 // If after conversion of the side block(s) the CFG allows it, the resul-
21 // ting blocks may be merged. If the "join" block contained PHI nodes, they
22 // will be replaced with MUX (or MUX-like) instructions to maintain the
23 // semantics of the PHI.
27 // %40 = L2_loadrub_io killed %39, 1
28 // %41 = S2_tstbit_i killed %40, 0
29 // J2_jumpt killed %41, <%bb.5>, implicit dead %pc
30 // J2_jump <%bb.4>, implicit dead %pc
31 // Successors according to CFG: %bb.4(62) %bb.5(62)
33 // %bb.4: derived from LLVM BB %if.then
34 // Predecessors according to CFG: %bb.3
35 // %11 = A2_addp %6, %10
36 // S2_storerd_io %32, 16, %11
37 // Successors according to CFG: %bb.5
39 // %bb.5: derived from LLVM BB %if.end
40 // Predecessors according to CFG: %bb.3 %bb.4
41 // %12 = PHI %6, <%bb.3>, %11, <%bb.4>
42 // %13 = A2_addp %7, %12
43 // %42 = C2_cmpeqi %9, 10
44 // J2_jumpf killed %42, <%bb.3>, implicit dead %pc
45 // J2_jump <%bb.6>, implicit dead %pc
46 // Successors according to CFG: %bb.6(4) %bb.3(124)
50 // %40 = L2_loadrub_io killed %39, 1
51 // %41 = S2_tstbit_i killed %40, 0
52 // spec-> %11 = A2_addp %6, %10
53 // pred-> S2_pstorerdf_io %41, %32, 16, %11
54 // %46 = PS_pselect %41, %6, %11
55 // %13 = A2_addp %7, %46
56 // %42 = C2_cmpeqi %9, 10
57 // J2_jumpf killed %42, <%bb.3>, implicit dead %pc
58 // J2_jump <%bb.6>, implicit dead %pc
59 // Successors according to CFG: %bb.6 %bb.3
62 #include "HexagonInstrInfo.h"
63 #include "HexagonSubtarget.h"
64 #include "llvm/ADT/DenseSet.h"
65 #include "llvm/ADT/SmallVector.h"
66 #include "llvm/ADT/StringRef.h"
67 #include "llvm/ADT/iterator_range.h"
68 #include "llvm/CodeGen/MachineBasicBlock.h"
69 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
70 #include "llvm/CodeGen/MachineDominators.h"
71 #include "llvm/CodeGen/MachineFunction.h"
72 #include "llvm/CodeGen/MachineFunctionPass.h"
73 #include "llvm/CodeGen/MachineInstr.h"
74 #include "llvm/CodeGen/MachineInstrBuilder.h"
75 #include "llvm/CodeGen/MachineLoopInfo.h"
76 #include "llvm/CodeGen/MachineOperand.h"
77 #include "llvm/CodeGen/MachineRegisterInfo.h"
78 #include "llvm/CodeGen/TargetRegisterInfo.h"
79 #include "llvm/IR/DebugLoc.h"
80 #include "llvm/Pass.h"
81 #include "llvm/Support/BranchProbability.h"
82 #include "llvm/Support/CommandLine.h"
83 #include "llvm/Support/Compiler.h"
84 #include "llvm/Support/Debug.h"
85 #include "llvm/Support/ErrorHandling.h"
86 #include "llvm/Support/raw_ostream.h"
90 #define DEBUG_TYPE "hexagon-eif"
96 FunctionPass
*createHexagonEarlyIfConversion();
97 void initializeHexagonEarlyIfConversionPass(PassRegistry
& Registry
);
99 } // end namespace llvm
101 static cl::opt
<bool> EnableHexagonBP("enable-hexagon-br-prob", cl::Hidden
,
102 cl::init(true), cl::desc("Enable branch probability info"));
103 static cl::opt
<unsigned> SizeLimit("eif-limit", cl::init(6), cl::Hidden
,
104 cl::desc("Size limit in Hexagon early if-conversion"));
105 static cl::opt
<bool> SkipExitBranches("eif-no-loop-exit", cl::init(false),
106 cl::Hidden
, cl::desc("Do not convert branches that may exit the loop"));
111 PrintMB(const MachineBasicBlock
*B
) : MB(B
) {}
113 const MachineBasicBlock
*MB
;
115 raw_ostream
&operator<< (raw_ostream
&OS
, const PrintMB
&P
) {
117 return OS
<< "<none>";
118 return OS
<< '#' << P
.MB
->getNumber();
122 FlowPattern() = default;
123 FlowPattern(MachineBasicBlock
*B
, unsigned PR
, MachineBasicBlock
*TB
,
124 MachineBasicBlock
*FB
, MachineBasicBlock
*JB
)
125 : SplitB(B
), TrueB(TB
), FalseB(FB
), JoinB(JB
), PredR(PR
) {}
127 MachineBasicBlock
*SplitB
= nullptr;
128 MachineBasicBlock
*TrueB
= nullptr;
129 MachineBasicBlock
*FalseB
= nullptr;
130 MachineBasicBlock
*JoinB
= nullptr;
135 PrintFP(const FlowPattern
&P
, const TargetRegisterInfo
&T
)
138 const FlowPattern
&FP
;
139 const TargetRegisterInfo
&TRI
;
140 friend raw_ostream
&operator<< (raw_ostream
&OS
, const PrintFP
&P
);
142 raw_ostream
&operator<<(raw_ostream
&OS
,
143 const PrintFP
&P
) LLVM_ATTRIBUTE_UNUSED
;
144 raw_ostream
&operator<<(raw_ostream
&OS
, const PrintFP
&P
) {
145 OS
<< "{ SplitB:" << PrintMB(P
.FP
.SplitB
)
146 << ", PredR:" << printReg(P
.FP
.PredR
, &P
.TRI
)
147 << ", TrueB:" << PrintMB(P
.FP
.TrueB
)
148 << ", FalseB:" << PrintMB(P
.FP
.FalseB
)
149 << ", JoinB:" << PrintMB(P
.FP
.JoinB
) << " }";
153 class HexagonEarlyIfConversion
: public MachineFunctionPass
{
157 HexagonEarlyIfConversion() : MachineFunctionPass(ID
) {}
159 StringRef
getPassName() const override
{
160 return "Hexagon early if conversion";
163 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
164 AU
.addRequired
<MachineBranchProbabilityInfo
>();
165 AU
.addRequired
<MachineDominatorTree
>();
166 AU
.addPreserved
<MachineDominatorTree
>();
167 AU
.addRequired
<MachineLoopInfo
>();
168 MachineFunctionPass::getAnalysisUsage(AU
);
171 bool runOnMachineFunction(MachineFunction
&MF
) override
;
174 using BlockSetType
= DenseSet
<MachineBasicBlock
*>;
176 bool isPreheader(const MachineBasicBlock
*B
) const;
177 bool matchFlowPattern(MachineBasicBlock
*B
, MachineLoop
*L
,
179 bool visitBlock(MachineBasicBlock
*B
, MachineLoop
*L
);
180 bool visitLoop(MachineLoop
*L
);
182 bool hasEHLabel(const MachineBasicBlock
*B
) const;
183 bool hasUncondBranch(const MachineBasicBlock
*B
) const;
184 bool isValidCandidate(const MachineBasicBlock
*B
) const;
185 bool usesUndefVReg(const MachineInstr
*MI
) const;
186 bool isValid(const FlowPattern
&FP
) const;
187 unsigned countPredicateDefs(const MachineBasicBlock
*B
) const;
188 unsigned computePhiCost(const MachineBasicBlock
*B
,
189 const FlowPattern
&FP
) const;
190 bool isProfitable(const FlowPattern
&FP
) const;
191 bool isPredicableStore(const MachineInstr
*MI
) const;
192 bool isSafeToSpeculate(const MachineInstr
*MI
) const;
193 bool isPredicate(unsigned R
) const;
195 unsigned getCondStoreOpcode(unsigned Opc
, bool IfTrue
) const;
196 void predicateInstr(MachineBasicBlock
*ToB
, MachineBasicBlock::iterator At
,
197 MachineInstr
*MI
, unsigned PredR
, bool IfTrue
);
198 void predicateBlockNB(MachineBasicBlock
*ToB
,
199 MachineBasicBlock::iterator At
, MachineBasicBlock
*FromB
,
200 unsigned PredR
, bool IfTrue
);
202 unsigned buildMux(MachineBasicBlock
*B
, MachineBasicBlock::iterator At
,
203 const TargetRegisterClass
*DRC
, unsigned PredR
, unsigned TR
,
204 unsigned TSR
, unsigned FR
, unsigned FSR
);
205 void updatePhiNodes(MachineBasicBlock
*WhereB
, const FlowPattern
&FP
);
206 void convert(const FlowPattern
&FP
);
208 void removeBlock(MachineBasicBlock
*B
);
209 void eliminatePhis(MachineBasicBlock
*B
);
210 void mergeBlocks(MachineBasicBlock
*PredB
, MachineBasicBlock
*SuccB
);
211 void simplifyFlowGraph(const FlowPattern
&FP
);
213 const HexagonInstrInfo
*HII
= nullptr;
214 const TargetRegisterInfo
*TRI
= nullptr;
215 MachineFunction
*MFN
= nullptr;
216 MachineRegisterInfo
*MRI
= nullptr;
217 MachineDominatorTree
*MDT
= nullptr;
218 MachineLoopInfo
*MLI
= nullptr;
219 BlockSetType Deleted
;
220 const MachineBranchProbabilityInfo
*MBPI
= nullptr;
223 } // end anonymous namespace
225 char HexagonEarlyIfConversion::ID
= 0;
227 INITIALIZE_PASS(HexagonEarlyIfConversion
, "hexagon-early-if",
228 "Hexagon early if conversion", false, false)
230 bool HexagonEarlyIfConversion::isPreheader(const MachineBasicBlock
*B
) const {
231 if (B
->succ_size() != 1)
233 MachineBasicBlock
*SB
= *B
->succ_begin();
234 MachineLoop
*L
= MLI
->getLoopFor(SB
);
235 return L
&& SB
== L
->getHeader() && MDT
->dominates(B
, SB
);
238 bool HexagonEarlyIfConversion::matchFlowPattern(MachineBasicBlock
*B
,
239 MachineLoop
*L
, FlowPattern
&FP
) {
240 LLVM_DEBUG(dbgs() << "Checking flow pattern at " << printMBBReference(*B
)
243 // Interested only in conditional branches, no .new, no new-value, etc.
244 // Check the terminators directly, it's easier than handling all responses
245 // from analyzeBranch.
246 MachineBasicBlock
*TB
= nullptr, *FB
= nullptr;
247 MachineBasicBlock::const_iterator T1I
= B
->getFirstTerminator();
250 unsigned Opc
= T1I
->getOpcode();
251 if (Opc
!= Hexagon::J2_jumpt
&& Opc
!= Hexagon::J2_jumpf
)
253 Register PredR
= T1I
->getOperand(0).getReg();
255 // Get the layout successor, or 0 if B does not have one.
256 MachineFunction::iterator NextBI
= std::next(MachineFunction::iterator(B
));
257 MachineBasicBlock
*NextB
= (NextBI
!= MFN
->end()) ? &*NextBI
: nullptr;
259 MachineBasicBlock
*T1B
= T1I
->getOperand(1).getMBB();
260 MachineBasicBlock::const_iterator T2I
= std::next(T1I
);
261 // The second terminator should be an unconditional branch.
262 assert(T2I
== B
->end() || T2I
->getOpcode() == Hexagon::J2_jump
);
263 MachineBasicBlock
*T2B
= (T2I
== B
->end()) ? NextB
264 : T2I
->getOperand(0).getMBB();
266 // XXX merge if T1B == NextB, or convert branch to unconditional.
267 // mark as diamond with both sides equal?
271 // Record the true/false blocks in such a way that "true" means "if (PredR)",
272 // and "false" means "if (!PredR)".
273 if (Opc
== Hexagon::J2_jumpt
)
278 if (!MDT
->properlyDominates(B
, TB
) || !MDT
->properlyDominates(B
, FB
))
281 // Detect triangle first. In case of a triangle, one of the blocks TB/FB
282 // can fall through into the other, in other words, it will be executed
283 // in both cases. We only want to predicate the block that is executed
285 assert(TB
&& FB
&& "Failed to find triangle control flow blocks");
286 unsigned TNP
= TB
->pred_size(), FNP
= FB
->pred_size();
287 unsigned TNS
= TB
->succ_size(), FNS
= FB
->succ_size();
289 // A block is predicable if it has one predecessor (it must be B), and
290 // it has a single successor. In fact, the block has to end either with
291 // an unconditional branch (which can be predicated), or with a fall-
293 // Also, skip blocks that do not belong to the same loop.
294 bool TOk
= (TNP
== 1 && TNS
== 1 && MLI
->getLoopFor(TB
) == L
);
295 bool FOk
= (FNP
== 1 && FNS
== 1 && MLI
->getLoopFor(FB
) == L
);
297 // If requested (via an option), do not consider branches where the
298 // true and false targets do not belong to the same loop.
299 if (SkipExitBranches
&& MLI
->getLoopFor(TB
) != MLI
->getLoopFor(FB
))
302 // If neither is predicable, there is nothing interesting.
306 MachineBasicBlock
*TSB
= (TNS
> 0) ? *TB
->succ_begin() : nullptr;
307 MachineBasicBlock
*FSB
= (FNS
> 0) ? *FB
->succ_begin() : nullptr;
308 MachineBasicBlock
*JB
= nullptr;
314 // Diamond: "if (P) then TB; else FB;".
322 // !TOk && FOk (at least one must be true by now).
327 // Don't try to predicate loop preheaders.
328 if ((TB
&& isPreheader(TB
)) || (FB
&& isPreheader(FB
))) {
329 LLVM_DEBUG(dbgs() << "One of blocks " << PrintMB(TB
) << ", " << PrintMB(FB
)
330 << " is a loop preheader. Skipping.\n");
334 FP
= FlowPattern(B
, PredR
, TB
, FB
, JB
);
335 LLVM_DEBUG(dbgs() << "Detected " << PrintFP(FP
, *TRI
) << "\n");
339 // KLUDGE: HexagonInstrInfo::analyzeBranch won't work on a block that
340 // contains EH_LABEL.
341 bool HexagonEarlyIfConversion::hasEHLabel(const MachineBasicBlock
*B
) const {
348 // KLUDGE: HexagonInstrInfo::analyzeBranch may be unable to recognize
349 // that a block can never fall-through.
350 bool HexagonEarlyIfConversion::hasUncondBranch(const MachineBasicBlock
*B
)
352 MachineBasicBlock::const_iterator I
= B
->getFirstTerminator(), E
= B
->end();
361 bool HexagonEarlyIfConversion::isValidCandidate(const MachineBasicBlock
*B
)
365 if (B
->isEHPad() || B
->hasAddressTaken())
367 if (B
->succ_size() == 0)
370 for (auto &MI
: *B
) {
371 if (MI
.isDebugInstr())
373 if (MI
.isConditionalBranch())
375 unsigned Opc
= MI
.getOpcode();
376 bool IsJMP
= (Opc
== Hexagon::J2_jump
);
377 if (!isPredicableStore(&MI
) && !IsJMP
&& !isSafeToSpeculate(&MI
))
379 // Look for predicate registers defined by this instruction. It's ok
380 // to speculate such an instruction, but the predicate register cannot
381 // be used outside of this block (or else it won't be possible to
382 // update the use of it after predication). PHI uses will be updated
383 // to use a result of a MUX, and a MUX cannot be created for predicate
385 for (const MachineOperand
&MO
: MI
.operands()) {
386 if (!MO
.isReg() || !MO
.isDef())
388 Register R
= MO
.getReg();
393 for (auto U
= MRI
->use_begin(R
); U
!= MRI
->use_end(); ++U
)
394 if (U
->getParent()->isPHI())
401 bool HexagonEarlyIfConversion::usesUndefVReg(const MachineInstr
*MI
) const {
402 for (const MachineOperand
&MO
: MI
->operands()) {
403 if (!MO
.isReg() || !MO
.isUse())
405 Register R
= MO
.getReg();
408 const MachineInstr
*DefI
= MRI
->getVRegDef(R
);
409 // "Undefined" virtual registers are actually defined via IMPLICIT_DEF.
410 assert(DefI
&& "Expecting a reaching def in MRI");
411 if (DefI
->isImplicitDef())
417 bool HexagonEarlyIfConversion::isValid(const FlowPattern
&FP
) const {
418 if (hasEHLabel(FP
.SplitB
)) // KLUDGE: see function definition
420 if (FP
.TrueB
&& !isValidCandidate(FP
.TrueB
))
422 if (FP
.FalseB
&& !isValidCandidate(FP
.FalseB
))
424 // Check the PHIs in the join block. If any of them use a register
425 // that is defined as IMPLICIT_DEF, do not convert this. This can
426 // legitimately happen if one side of the split never executes, but
427 // the compiler is unable to prove it. That side may then seem to
428 // provide an "undef" value to the join block, however it will never
429 // execute at run-time. If we convert this case, the "undef" will
430 // be used in a MUX instruction, and that may seem like actually
431 // using an undefined value to other optimizations. This could lead
432 // to trouble further down the optimization stream, cause assertions
435 const MachineBasicBlock
&B
= *FP
.JoinB
;
439 if (usesUndefVReg(&MI
))
441 Register DefR
= MI
.getOperand(0).getReg();
442 if (isPredicate(DefR
))
449 unsigned HexagonEarlyIfConversion::computePhiCost(const MachineBasicBlock
*B
,
450 const FlowPattern
&FP
) const {
451 if (B
->pred_size() < 2)
455 for (const MachineInstr
&MI
: *B
) {
458 // If both incoming blocks are one of the TrueB/FalseB/SplitB, then
459 // a MUX may be needed. Otherwise the PHI will need to be updated at
461 // Find the interesting PHI operands for further checks.
462 SmallVector
<unsigned,2> Inc
;
463 for (unsigned i
= 1, e
= MI
.getNumOperands(); i
!= e
; i
+= 2) {
464 const MachineBasicBlock
*BB
= MI
.getOperand(i
+1).getMBB();
465 if (BB
== FP
.SplitB
|| BB
== FP
.TrueB
|| BB
== FP
.FalseB
)
468 assert(Inc
.size() <= 2);
472 const MachineOperand
&RA
= MI
.getOperand(1);
473 const MachineOperand
&RB
= MI
.getOperand(3);
474 assert(RA
.isReg() && RB
.isReg());
475 // Must have a MUX if the phi uses a subregister.
476 if (RA
.getSubReg() != 0 || RB
.getSubReg() != 0) {
480 const MachineInstr
*Def1
= MRI
->getVRegDef(RA
.getReg());
481 const MachineInstr
*Def3
= MRI
->getVRegDef(RB
.getReg());
482 if (!HII
->isPredicable(*Def1
) || !HII
->isPredicable(*Def3
))
488 unsigned HexagonEarlyIfConversion::countPredicateDefs(
489 const MachineBasicBlock
*B
) const {
490 unsigned PredDefs
= 0;
491 for (auto &MI
: *B
) {
492 for (const MachineOperand
&MO
: MI
.operands()) {
493 if (!MO
.isReg() || !MO
.isDef())
495 Register R
= MO
.getReg();
505 bool HexagonEarlyIfConversion::isProfitable(const FlowPattern
&FP
) const {
506 BranchProbability
JumpProb(1, 10);
507 BranchProbability
Prob(9, 10);
508 if (MBPI
&& FP
.TrueB
&& !FP
.FalseB
&&
509 (MBPI
->getEdgeProbability(FP
.SplitB
, FP
.TrueB
) < JumpProb
||
510 MBPI
->getEdgeProbability(FP
.SplitB
, FP
.TrueB
) > Prob
))
513 if (MBPI
&& !FP
.TrueB
&& FP
.FalseB
&&
514 (MBPI
->getEdgeProbability(FP
.SplitB
, FP
.FalseB
) < JumpProb
||
515 MBPI
->getEdgeProbability(FP
.SplitB
, FP
.FalseB
) > Prob
))
518 if (FP
.TrueB
&& FP
.FalseB
) {
519 // Do not IfCovert if the branch is one sided.
521 if (MBPI
->getEdgeProbability(FP
.SplitB
, FP
.TrueB
) > Prob
)
523 if (MBPI
->getEdgeProbability(FP
.SplitB
, FP
.FalseB
) > Prob
)
527 // If both sides are predicable, convert them if they join, and the
528 // join block has no other predecessors.
529 MachineBasicBlock
*TSB
= *FP
.TrueB
->succ_begin();
530 MachineBasicBlock
*FSB
= *FP
.FalseB
->succ_begin();
533 if (TSB
->pred_size() != 2)
537 // Calculate the total size of the predicated blocks.
538 // Assume instruction counts without branches to be the approximation of
539 // the code size. If the predicated blocks are smaller than a packet size,
540 // approximate the spare room in the packet that could be filled with the
541 // predicated/speculated instructions.
542 auto TotalCount
= [] (const MachineBasicBlock
*B
, unsigned &Spare
) {
545 unsigned T
= std::count_if(B
->begin(), B
->getFirstTerminator(),
546 [](const MachineInstr
&MI
) {
547 return !MI
.isMetaInstruction();
549 if (T
< HEXAGON_PACKET_SIZE
)
550 Spare
+= HEXAGON_PACKET_SIZE
-T
;
554 unsigned TotalIn
= TotalCount(FP
.TrueB
, Spare
) + TotalCount(FP
.FalseB
, Spare
);
556 dbgs() << "Total number of instructions to be predicated/speculated: "
557 << TotalIn
<< ", spare room: " << Spare
<< "\n");
558 if (TotalIn
>= SizeLimit
+Spare
)
561 // Count the number of PHI nodes that will need to be updated (converted
562 // to MUX). Those can be later converted to predicated instructions, so
563 // they aren't always adding extra cost.
564 // KLUDGE: Also, count the number of predicate register definitions in
565 // each block. The scheduler may increase the pressure of these and cause
566 // expensive spills (e.g. bitmnp01).
567 unsigned TotalPh
= 0;
568 unsigned PredDefs
= countPredicateDefs(FP
.SplitB
);
570 TotalPh
= computePhiCost(FP
.JoinB
, FP
);
571 PredDefs
+= countPredicateDefs(FP
.JoinB
);
573 if (FP
.TrueB
&& FP
.TrueB
->succ_size() > 0) {
574 MachineBasicBlock
*SB
= *FP
.TrueB
->succ_begin();
575 TotalPh
+= computePhiCost(SB
, FP
);
576 PredDefs
+= countPredicateDefs(SB
);
578 if (FP
.FalseB
&& FP
.FalseB
->succ_size() > 0) {
579 MachineBasicBlock
*SB
= *FP
.FalseB
->succ_begin();
580 TotalPh
+= computePhiCost(SB
, FP
);
581 PredDefs
+= countPredicateDefs(SB
);
584 LLVM_DEBUG(dbgs() << "Total number of extra muxes from converted phis: "
586 if (TotalIn
+TotalPh
>= SizeLimit
+Spare
)
589 LLVM_DEBUG(dbgs() << "Total number of predicate registers: " << PredDefs
597 bool HexagonEarlyIfConversion::visitBlock(MachineBasicBlock
*B
,
599 bool Changed
= false;
601 // Visit all dominated blocks from the same loop first, then process B.
602 MachineDomTreeNode
*N
= MDT
->getNode(B
);
604 using GTN
= GraphTraits
<MachineDomTreeNode
*>;
606 // We will change CFG/DT during this traversal, so take precautions to
607 // avoid problems related to invalidated iterators. In fact, processing
608 // a child C of B cannot cause another child to be removed, but it can
609 // cause a new child to be added (which was a child of C before C itself
610 // was removed. This new child C, however, would have been processed
611 // prior to processing B, so there is no need to process it again.
612 // Simply keep a list of children of B, and traverse that list.
613 using DTNodeVectType
= SmallVector
<MachineDomTreeNode
*, 4>;
614 DTNodeVectType
Cn(GTN::child_begin(N
), GTN::child_end(N
));
615 for (DTNodeVectType::iterator I
= Cn
.begin(), E
= Cn
.end(); I
!= E
; ++I
) {
616 MachineBasicBlock
*SB
= (*I
)->getBlock();
617 if (!Deleted
.count(SB
))
618 Changed
|= visitBlock(SB
, L
);
620 // When walking down the dominator tree, we want to traverse through
621 // blocks from nested (other) loops, because they can dominate blocks
622 // that are in L. Skip the non-L blocks only after the tree traversal.
623 if (MLI
->getLoopFor(B
) != L
)
627 if (!matchFlowPattern(B
, L
, FP
))
631 LLVM_DEBUG(dbgs() << "Conversion is not valid\n");
634 if (!isProfitable(FP
)) {
635 LLVM_DEBUG(dbgs() << "Conversion is not profitable\n");
640 simplifyFlowGraph(FP
);
644 bool HexagonEarlyIfConversion::visitLoop(MachineLoop
*L
) {
645 MachineBasicBlock
*HB
= L
? L
->getHeader() : nullptr;
646 LLVM_DEBUG((L
? dbgs() << "Visiting loop H:" << PrintMB(HB
)
647 : dbgs() << "Visiting function")
649 bool Changed
= false;
651 for (MachineLoop::iterator I
= L
->begin(), E
= L
->end(); I
!= E
; ++I
)
652 Changed
|= visitLoop(*I
);
655 MachineBasicBlock
*EntryB
= GraphTraits
<MachineFunction
*>::getEntryNode(MFN
);
656 Changed
|= visitBlock(L
? HB
: EntryB
, L
);
660 bool HexagonEarlyIfConversion::isPredicableStore(const MachineInstr
*MI
)
662 // HexagonInstrInfo::isPredicable will consider these stores are non-
663 // -predicable if the offset would become constant-extended after
665 unsigned Opc
= MI
->getOpcode();
667 case Hexagon::S2_storerb_io
:
668 case Hexagon::S2_storerbnew_io
:
669 case Hexagon::S2_storerh_io
:
670 case Hexagon::S2_storerhnew_io
:
671 case Hexagon::S2_storeri_io
:
672 case Hexagon::S2_storerinew_io
:
673 case Hexagon::S2_storerd_io
:
674 case Hexagon::S4_storeirb_io
:
675 case Hexagon::S4_storeirh_io
:
676 case Hexagon::S4_storeiri_io
:
680 // TargetInstrInfo::isPredicable takes a non-const pointer.
681 return MI
->mayStore() && HII
->isPredicable(const_cast<MachineInstr
&>(*MI
));
684 bool HexagonEarlyIfConversion::isSafeToSpeculate(const MachineInstr
*MI
)
686 if (MI
->mayLoadOrStore())
688 if (MI
->isCall() || MI
->isBarrier() || MI
->isBranch())
690 if (MI
->hasUnmodeledSideEffects())
692 if (MI
->getOpcode() == TargetOpcode::LIFETIME_END
)
698 bool HexagonEarlyIfConversion::isPredicate(unsigned R
) const {
699 const TargetRegisterClass
*RC
= MRI
->getRegClass(R
);
700 return RC
== &Hexagon::PredRegsRegClass
||
701 RC
== &Hexagon::HvxQRRegClass
;
704 unsigned HexagonEarlyIfConversion::getCondStoreOpcode(unsigned Opc
,
706 return HII
->getCondOpcode(Opc
, !IfTrue
);
709 void HexagonEarlyIfConversion::predicateInstr(MachineBasicBlock
*ToB
,
710 MachineBasicBlock::iterator At
, MachineInstr
*MI
,
711 unsigned PredR
, bool IfTrue
) {
713 if (At
!= ToB
->end())
714 DL
= At
->getDebugLoc();
715 else if (!ToB
->empty())
716 DL
= ToB
->back().getDebugLoc();
718 unsigned Opc
= MI
->getOpcode();
720 if (isPredicableStore(MI
)) {
721 unsigned COpc
= getCondStoreOpcode(Opc
, IfTrue
);
723 MachineInstrBuilder MIB
= BuildMI(*ToB
, At
, DL
, HII
->get(COpc
));
724 MachineInstr::mop_iterator MOI
= MI
->operands_begin();
725 if (HII
->isPostIncrement(*MI
)) {
730 for (const MachineOperand
&MO
: make_range(MOI
, MI
->operands_end()))
733 // Set memory references.
734 MIB
.cloneMemRefs(*MI
);
736 MI
->eraseFromParent();
740 if (Opc
== Hexagon::J2_jump
) {
741 MachineBasicBlock
*TB
= MI
->getOperand(0).getMBB();
742 const MCInstrDesc
&D
= HII
->get(IfTrue
? Hexagon::J2_jumpt
743 : Hexagon::J2_jumpf
);
744 BuildMI(*ToB
, At
, DL
, D
)
747 MI
->eraseFromParent();
751 // Print the offending instruction unconditionally as we are about to
754 llvm_unreachable("Unexpected instruction");
757 // Predicate/speculate non-branch instructions from FromB into block ToB.
758 // Leave the branches alone, they will be handled later. Btw, at this point
759 // FromB should have at most one branch, and it should be unconditional.
760 void HexagonEarlyIfConversion::predicateBlockNB(MachineBasicBlock
*ToB
,
761 MachineBasicBlock::iterator At
, MachineBasicBlock
*FromB
,
762 unsigned PredR
, bool IfTrue
) {
763 LLVM_DEBUG(dbgs() << "Predicating block " << PrintMB(FromB
) << "\n");
764 MachineBasicBlock::iterator End
= FromB
->getFirstTerminator();
765 MachineBasicBlock::iterator I
, NextI
;
767 for (I
= FromB
->begin(); I
!= End
; I
= NextI
) {
769 NextI
= std::next(I
);
770 if (isSafeToSpeculate(&*I
))
771 ToB
->splice(At
, FromB
, I
);
773 predicateInstr(ToB
, At
, &*I
, PredR
, IfTrue
);
777 unsigned HexagonEarlyIfConversion::buildMux(MachineBasicBlock
*B
,
778 MachineBasicBlock::iterator At
, const TargetRegisterClass
*DRC
,
779 unsigned PredR
, unsigned TR
, unsigned TSR
, unsigned FR
, unsigned FSR
) {
781 switch (DRC
->getID()) {
782 case Hexagon::IntRegsRegClassID
:
783 case Hexagon::IntRegsLow8RegClassID
:
784 Opc
= Hexagon::C2_mux
;
786 case Hexagon::DoubleRegsRegClassID
:
787 case Hexagon::GeneralDoubleLow8RegsRegClassID
:
788 Opc
= Hexagon::PS_pselect
;
790 case Hexagon::HvxVRRegClassID
:
791 Opc
= Hexagon::PS_vselect
;
793 case Hexagon::HvxWRRegClassID
:
794 Opc
= Hexagon::PS_wselect
;
797 llvm_unreachable("unexpected register type");
799 const MCInstrDesc
&D
= HII
->get(Opc
);
801 DebugLoc DL
= B
->findBranchDebugLoc();
802 Register MuxR
= MRI
->createVirtualRegister(DRC
);
803 BuildMI(*B
, At
, DL
, D
, MuxR
)
810 void HexagonEarlyIfConversion::updatePhiNodes(MachineBasicBlock
*WhereB
,
811 const FlowPattern
&FP
) {
812 // Visit all PHI nodes in the WhereB block and generate MUX instructions
813 // in the split block. Update the PHI nodes with the values of the MUX.
814 auto NonPHI
= WhereB
->getFirstNonPHI();
815 for (auto I
= WhereB
->begin(); I
!= NonPHI
; ++I
) {
816 MachineInstr
*PN
= &*I
;
817 // Registers and subregisters corresponding to TrueB, FalseB and SplitB.
818 unsigned TR
= 0, TSR
= 0, FR
= 0, FSR
= 0, SR
= 0, SSR
= 0;
819 for (int i
= PN
->getNumOperands()-2; i
> 0; i
-= 2) {
820 const MachineOperand
&RO
= PN
->getOperand(i
), &BO
= PN
->getOperand(i
+1);
821 if (BO
.getMBB() == FP
.SplitB
)
822 SR
= RO
.getReg(), SSR
= RO
.getSubReg();
823 else if (BO
.getMBB() == FP
.TrueB
)
824 TR
= RO
.getReg(), TSR
= RO
.getSubReg();
825 else if (BO
.getMBB() == FP
.FalseB
)
826 FR
= RO
.getReg(), FSR
= RO
.getSubReg();
829 PN
->RemoveOperand(i
+1);
830 PN
->RemoveOperand(i
);
838 unsigned MuxR
= 0, MuxSR
= 0;
841 Register DR
= PN
->getOperand(0).getReg();
842 const TargetRegisterClass
*RC
= MRI
->getRegClass(DR
);
843 MuxR
= buildMux(FP
.SplitB
, FP
.SplitB
->getFirstTerminator(), RC
,
844 FP
.PredR
, TR
, TSR
, FR
, FSR
);
853 PN
->addOperand(MachineOperand::CreateReg(MuxR
, false, false, false, false,
854 false, false, MuxSR
));
855 PN
->addOperand(MachineOperand::CreateMBB(FP
.SplitB
));
859 void HexagonEarlyIfConversion::convert(const FlowPattern
&FP
) {
860 MachineBasicBlock
*TSB
= nullptr, *FSB
= nullptr;
861 MachineBasicBlock::iterator OldTI
= FP
.SplitB
->getFirstTerminator();
862 assert(OldTI
!= FP
.SplitB
->end());
863 DebugLoc DL
= OldTI
->getDebugLoc();
866 TSB
= *FP
.TrueB
->succ_begin();
867 predicateBlockNB(FP
.SplitB
, OldTI
, FP
.TrueB
, FP
.PredR
, true);
870 FSB
= *FP
.FalseB
->succ_begin();
871 MachineBasicBlock::iterator At
= FP
.SplitB
->getFirstTerminator();
872 predicateBlockNB(FP
.SplitB
, At
, FP
.FalseB
, FP
.PredR
, false);
875 // Regenerate new terminators in the split block and update the successors.
876 // First, remember any information that may be needed later and remove the
877 // existing terminators/successors from the split block.
878 MachineBasicBlock
*SSB
= nullptr;
879 FP
.SplitB
->erase(OldTI
, FP
.SplitB
->end());
880 while (FP
.SplitB
->succ_size() > 0) {
881 MachineBasicBlock
*T
= *FP
.SplitB
->succ_begin();
882 // It's possible that the split block had a successor that is not a pre-
883 // dicated block. This could only happen if there was only one block to
884 // be predicated. Example:
886 // if (p) jump true_b
890 // unrelated2_b: ; can have other predecessors, so it's not "false_b"
892 // true_b: ; only reachable from split_b, can be predicated
895 // Find this successor (SSB) if it exists.
896 if (T
!= FP
.TrueB
&& T
!= FP
.FalseB
) {
900 FP
.SplitB
->removeSuccessor(FP
.SplitB
->succ_begin());
903 // Insert new branches and update the successors of the split block. This
904 // may create unconditional branches to the layout successor, etc., but
905 // that will be cleaned up later. For now, make sure that correct code is
908 assert(!SSB
|| SSB
== FP
.JoinB
);
909 BuildMI(*FP
.SplitB
, FP
.SplitB
->end(), DL
, HII
->get(Hexagon::J2_jump
))
911 FP
.SplitB
->addSuccessor(FP
.JoinB
);
913 bool HasBranch
= false;
915 BuildMI(*FP
.SplitB
, FP
.SplitB
->end(), DL
, HII
->get(Hexagon::J2_jumpt
))
918 FP
.SplitB
->addSuccessor(TSB
);
922 const MCInstrDesc
&D
= HasBranch
? HII
->get(Hexagon::J2_jump
)
923 : HII
->get(Hexagon::J2_jumpf
);
924 MachineInstrBuilder MIB
= BuildMI(*FP
.SplitB
, FP
.SplitB
->end(), DL
, D
);
926 MIB
.addReg(FP
.PredR
);
928 FP
.SplitB
->addSuccessor(FSB
);
931 // This cannot happen if both TSB and FSB are set. [TF]SB are the
932 // successor blocks of the TrueB and FalseB (or null of the TrueB
933 // or FalseB block is null). SSB is the potential successor block
934 // of the SplitB that is neither TrueB nor FalseB.
935 BuildMI(*FP
.SplitB
, FP
.SplitB
->end(), DL
, HII
->get(Hexagon::J2_jump
))
937 FP
.SplitB
->addSuccessor(SSB
);
941 // What is left to do is to update the PHI nodes that could have entries
942 // referring to predicated blocks.
944 updatePhiNodes(FP
.JoinB
, FP
);
947 updatePhiNodes(TSB
, FP
);
949 updatePhiNodes(FSB
, FP
);
950 // Nothing to update in SSB, since SSB's predecessors haven't changed.
954 void HexagonEarlyIfConversion::removeBlock(MachineBasicBlock
*B
) {
955 LLVM_DEBUG(dbgs() << "Removing block " << PrintMB(B
) << "\n");
957 // Transfer the immediate dominator information from B to its descendants.
958 MachineDomTreeNode
*N
= MDT
->getNode(B
);
959 MachineDomTreeNode
*IDN
= N
->getIDom();
961 MachineBasicBlock
*IDB
= IDN
->getBlock();
963 using GTN
= GraphTraits
<MachineDomTreeNode
*>;
964 using DTNodeVectType
= SmallVector
<MachineDomTreeNode
*, 4>;
966 DTNodeVectType
Cn(GTN::child_begin(N
), GTN::child_end(N
));
967 for (DTNodeVectType::iterator I
= Cn
.begin(), E
= Cn
.end(); I
!= E
; ++I
) {
968 MachineBasicBlock
*SB
= (*I
)->getBlock();
969 MDT
->changeImmediateDominator(SB
, IDB
);
973 while (B
->succ_size() > 0)
974 B
->removeSuccessor(B
->succ_begin());
976 for (auto I
= B
->pred_begin(), E
= B
->pred_end(); I
!= E
; ++I
)
977 (*I
)->removeSuccessor(B
, true);
981 MFN
->erase(B
->getIterator());
984 void HexagonEarlyIfConversion::eliminatePhis(MachineBasicBlock
*B
) {
985 LLVM_DEBUG(dbgs() << "Removing phi nodes from block " << PrintMB(B
) << "\n");
986 MachineBasicBlock::iterator I
, NextI
, NonPHI
= B
->getFirstNonPHI();
987 for (I
= B
->begin(); I
!= NonPHI
; I
= NextI
) {
988 NextI
= std::next(I
);
989 MachineInstr
*PN
= &*I
;
990 assert(PN
->getNumOperands() == 3 && "Invalid phi node");
991 MachineOperand
&UO
= PN
->getOperand(1);
992 Register UseR
= UO
.getReg(), UseSR
= UO
.getSubReg();
993 Register DefR
= PN
->getOperand(0).getReg();
994 unsigned NewR
= UseR
;
996 // MRI.replaceVregUsesWith does not allow to update the subregister,
997 // so instead of doing the use-iteration here, create a copy into a
998 // "non-subregistered" register.
999 const DebugLoc
&DL
= PN
->getDebugLoc();
1000 const TargetRegisterClass
*RC
= MRI
->getRegClass(DefR
);
1001 NewR
= MRI
->createVirtualRegister(RC
);
1002 NonPHI
= BuildMI(*B
, NonPHI
, DL
, HII
->get(TargetOpcode::COPY
), NewR
)
1003 .addReg(UseR
, 0, UseSR
);
1005 MRI
->replaceRegWith(DefR
, NewR
);
1010 void HexagonEarlyIfConversion::mergeBlocks(MachineBasicBlock
*PredB
,
1011 MachineBasicBlock
*SuccB
) {
1012 LLVM_DEBUG(dbgs() << "Merging blocks " << PrintMB(PredB
) << " and "
1013 << PrintMB(SuccB
) << "\n");
1014 bool TermOk
= hasUncondBranch(SuccB
);
1015 eliminatePhis(SuccB
);
1016 HII
->removeBranch(*PredB
);
1017 PredB
->removeSuccessor(SuccB
);
1018 PredB
->splice(PredB
->end(), SuccB
, SuccB
->begin(), SuccB
->end());
1019 PredB
->transferSuccessorsAndUpdatePHIs(SuccB
);
1020 MachineBasicBlock
*OldLayoutSuccessor
= SuccB
->getNextNode();
1023 PredB
->updateTerminator(OldLayoutSuccessor
);
1026 void HexagonEarlyIfConversion::simplifyFlowGraph(const FlowPattern
&FP
) {
1027 MachineBasicBlock
*OldLayoutSuccessor
= FP
.SplitB
->getNextNode();
1029 removeBlock(FP
.TrueB
);
1031 removeBlock(FP
.FalseB
);
1033 FP
.SplitB
->updateTerminator(OldLayoutSuccessor
);
1034 if (FP
.SplitB
->succ_size() != 1)
1037 MachineBasicBlock
*SB
= *FP
.SplitB
->succ_begin();
1038 if (SB
->pred_size() != 1)
1041 // By now, the split block has only one successor (SB), and SB has only
1042 // one predecessor. We can try to merge them. We will need to update ter-
1043 // minators in FP.Split+SB, and that requires working analyzeBranch, which
1044 // fails on Hexagon for blocks that have EH_LABELs. However, if SB ends
1045 // with an unconditional branch, we won't need to touch the terminators.
1046 if (!hasEHLabel(SB
) || hasUncondBranch(SB
))
1047 mergeBlocks(FP
.SplitB
, SB
);
1050 bool HexagonEarlyIfConversion::runOnMachineFunction(MachineFunction
&MF
) {
1051 if (skipFunction(MF
.getFunction()))
1054 auto &ST
= MF
.getSubtarget
<HexagonSubtarget
>();
1055 HII
= ST
.getInstrInfo();
1056 TRI
= ST
.getRegisterInfo();
1058 MRI
= &MF
.getRegInfo();
1059 MDT
= &getAnalysis
<MachineDominatorTree
>();
1060 MLI
= &getAnalysis
<MachineLoopInfo
>();
1061 MBPI
= EnableHexagonBP
? &getAnalysis
<MachineBranchProbabilityInfo
>() :
1065 bool Changed
= false;
1067 for (MachineLoopInfo::iterator I
= MLI
->begin(), E
= MLI
->end(); I
!= E
; ++I
)
1068 Changed
|= visitLoop(*I
);
1069 Changed
|= visitLoop(nullptr);
1074 //===----------------------------------------------------------------------===//
1075 // Public Constructor Functions
1076 //===----------------------------------------------------------------------===//
1077 FunctionPass
*llvm::createHexagonEarlyIfConversion() {
1078 return new HexagonEarlyIfConversion();