1 //===- HexagonEarlyIfConv.cpp ---------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This implements a Hexagon-specific if-conversion pass that runs on the
11 // In SSA it is not straightforward to represent instructions that condi-
12 // tionally define registers, since a conditionally-defined register may
13 // only be used under the same condition on which the definition was based.
14 // To avoid complications of this nature, this patch will only generate
15 // predicated stores, and speculate other instructions from the "if-conver-
17 // The code will recognize CFG patterns where a block with a conditional
18 // branch "splits" into a "true block" and a "false block". Either of these
19 // could be omitted (in case of a triangle, for example).
20 // If after conversion of the side block(s) the CFG allows it, the resul-
21 // ting blocks may be merged. If the "join" block contained PHI nodes, they
22 // will be replaced with MUX (or MUX-like) instructions to maintain the
23 // semantics of the PHI.
27 // %40 = L2_loadrub_io killed %39, 1
28 // %41 = S2_tstbit_i killed %40, 0
29 // J2_jumpt killed %41, <%bb.5>, implicit dead %pc
30 // J2_jump <%bb.4>, implicit dead %pc
31 // Successors according to CFG: %bb.4(62) %bb.5(62)
33 // %bb.4: derived from LLVM BB %if.then
34 // Predecessors according to CFG: %bb.3
35 // %11 = A2_addp %6, %10
36 // S2_storerd_io %32, 16, %11
37 // Successors according to CFG: %bb.5
39 // %bb.5: derived from LLVM BB %if.end
40 // Predecessors according to CFG: %bb.3 %bb.4
41 // %12 = PHI %6, <%bb.3>, %11, <%bb.4>
42 // %13 = A2_addp %7, %12
43 // %42 = C2_cmpeqi %9, 10
44 // J2_jumpf killed %42, <%bb.3>, implicit dead %pc
45 // J2_jump <%bb.6>, implicit dead %pc
46 // Successors according to CFG: %bb.6(4) %bb.3(124)
50 // %40 = L2_loadrub_io killed %39, 1
51 // %41 = S2_tstbit_i killed %40, 0
52 // spec-> %11 = A2_addp %6, %10
53 // pred-> S2_pstorerdf_io %41, %32, 16, %11
54 // %46 = PS_pselect %41, %6, %11
55 // %13 = A2_addp %7, %46
56 // %42 = C2_cmpeqi %9, 10
57 // J2_jumpf killed %42, <%bb.3>, implicit dead %pc
58 // J2_jump <%bb.6>, implicit dead %pc
59 // Successors according to CFG: %bb.6 %bb.3
62 #include "HexagonInstrInfo.h"
63 #include "HexagonSubtarget.h"
64 #include "llvm/ADT/DenseSet.h"
65 #include "llvm/ADT/SmallVector.h"
66 #include "llvm/ADT/StringRef.h"
67 #include "llvm/ADT/iterator_range.h"
68 #include "llvm/CodeGen/MachineBasicBlock.h"
69 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
70 #include "llvm/CodeGen/MachineDominators.h"
71 #include "llvm/CodeGen/MachineFunction.h"
72 #include "llvm/CodeGen/MachineFunctionPass.h"
73 #include "llvm/CodeGen/MachineInstr.h"
74 #include "llvm/CodeGen/MachineInstrBuilder.h"
75 #include "llvm/CodeGen/MachineLoopInfo.h"
76 #include "llvm/CodeGen/MachineOperand.h"
77 #include "llvm/CodeGen/MachineRegisterInfo.h"
78 #include "llvm/CodeGen/TargetRegisterInfo.h"
79 #include "llvm/IR/DebugLoc.h"
80 #include "llvm/Pass.h"
81 #include "llvm/Support/BranchProbability.h"
82 #include "llvm/Support/CommandLine.h"
83 #include "llvm/Support/Compiler.h"
84 #include "llvm/Support/Debug.h"
85 #include "llvm/Support/ErrorHandling.h"
86 #include "llvm/Support/raw_ostream.h"
90 #define DEBUG_TYPE "hexagon-eif"
96 FunctionPass
*createHexagonEarlyIfConversion();
97 void initializeHexagonEarlyIfConversionPass(PassRegistry
& Registry
);
99 } // end namespace llvm
101 static cl::opt
<bool> EnableHexagonBP("enable-hexagon-br-prob", cl::Hidden
,
102 cl::init(true), cl::desc("Enable branch probability info"));
103 static cl::opt
<unsigned> SizeLimit("eif-limit", cl::init(6), cl::Hidden
,
104 cl::desc("Size limit in Hexagon early if-conversion"));
105 static cl::opt
<bool> SkipExitBranches("eif-no-loop-exit", cl::init(false),
106 cl::Hidden
, cl::desc("Do not convert branches that may exit the loop"));
111 PrintMB(const MachineBasicBlock
*B
) : MB(B
) {}
113 const MachineBasicBlock
*MB
;
115 raw_ostream
&operator<< (raw_ostream
&OS
, const PrintMB
&P
) {
117 return OS
<< "<none>";
118 return OS
<< '#' << P
.MB
->getNumber();
122 FlowPattern() = default;
123 FlowPattern(MachineBasicBlock
*B
, unsigned PR
, MachineBasicBlock
*TB
,
124 MachineBasicBlock
*FB
, MachineBasicBlock
*JB
)
125 : SplitB(B
), TrueB(TB
), FalseB(FB
), JoinB(JB
), PredR(PR
) {}
127 MachineBasicBlock
*SplitB
= nullptr;
128 MachineBasicBlock
*TrueB
= nullptr;
129 MachineBasicBlock
*FalseB
= nullptr;
130 MachineBasicBlock
*JoinB
= nullptr;
135 PrintFP(const FlowPattern
&P
, const TargetRegisterInfo
&T
)
138 const FlowPattern
&FP
;
139 const TargetRegisterInfo
&TRI
;
140 friend raw_ostream
&operator<< (raw_ostream
&OS
, const PrintFP
&P
);
142 raw_ostream
&operator<<(raw_ostream
&OS
,
143 const PrintFP
&P
) LLVM_ATTRIBUTE_UNUSED
;
144 raw_ostream
&operator<<(raw_ostream
&OS
, const PrintFP
&P
) {
145 OS
<< "{ SplitB:" << PrintMB(P
.FP
.SplitB
)
146 << ", PredR:" << printReg(P
.FP
.PredR
, &P
.TRI
)
147 << ", TrueB:" << PrintMB(P
.FP
.TrueB
)
148 << ", FalseB:" << PrintMB(P
.FP
.FalseB
)
149 << ", JoinB:" << PrintMB(P
.FP
.JoinB
) << " }";
153 class HexagonEarlyIfConversion
: public MachineFunctionPass
{
157 HexagonEarlyIfConversion() : MachineFunctionPass(ID
) {}
159 StringRef
getPassName() const override
{
160 return "Hexagon early if conversion";
163 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
164 AU
.addRequired
<MachineBranchProbabilityInfoWrapperPass
>();
165 AU
.addRequired
<MachineDominatorTreeWrapperPass
>();
166 AU
.addPreserved
<MachineDominatorTreeWrapperPass
>();
167 AU
.addRequired
<MachineLoopInfoWrapperPass
>();
168 MachineFunctionPass::getAnalysisUsage(AU
);
171 bool runOnMachineFunction(MachineFunction
&MF
) override
;
174 using BlockSetType
= DenseSet
<MachineBasicBlock
*>;
176 bool isPreheader(const MachineBasicBlock
*B
) const;
177 bool matchFlowPattern(MachineBasicBlock
*B
, MachineLoop
*L
,
179 bool visitBlock(MachineBasicBlock
*B
, MachineLoop
*L
);
180 bool visitLoop(MachineLoop
*L
);
182 bool hasEHLabel(const MachineBasicBlock
*B
) const;
183 bool hasUncondBranch(const MachineBasicBlock
*B
) const;
184 bool isValidCandidate(const MachineBasicBlock
*B
) const;
185 bool usesUndefVReg(const MachineInstr
*MI
) const;
186 bool isValid(const FlowPattern
&FP
) const;
187 unsigned countPredicateDefs(const MachineBasicBlock
*B
) const;
188 unsigned computePhiCost(const MachineBasicBlock
*B
,
189 const FlowPattern
&FP
) const;
190 bool isProfitable(const FlowPattern
&FP
) const;
191 bool isPredicableStore(const MachineInstr
*MI
) const;
192 bool isSafeToSpeculate(const MachineInstr
*MI
) const;
193 bool isPredicate(unsigned R
) const;
195 unsigned getCondStoreOpcode(unsigned Opc
, bool IfTrue
) const;
196 void predicateInstr(MachineBasicBlock
*ToB
, MachineBasicBlock::iterator At
,
197 MachineInstr
*MI
, unsigned PredR
, bool IfTrue
);
198 void predicateBlockNB(MachineBasicBlock
*ToB
,
199 MachineBasicBlock::iterator At
, MachineBasicBlock
*FromB
,
200 unsigned PredR
, bool IfTrue
);
202 unsigned buildMux(MachineBasicBlock
*B
, MachineBasicBlock::iterator At
,
203 const TargetRegisterClass
*DRC
, unsigned PredR
, unsigned TR
,
204 unsigned TSR
, unsigned FR
, unsigned FSR
);
205 void updatePhiNodes(MachineBasicBlock
*WhereB
, const FlowPattern
&FP
);
206 void convert(const FlowPattern
&FP
);
208 void removeBlock(MachineBasicBlock
*B
);
209 void eliminatePhis(MachineBasicBlock
*B
);
210 void mergeBlocks(MachineBasicBlock
*PredB
, MachineBasicBlock
*SuccB
);
211 void simplifyFlowGraph(const FlowPattern
&FP
);
213 const HexagonInstrInfo
*HII
= nullptr;
214 const TargetRegisterInfo
*TRI
= nullptr;
215 MachineFunction
*MFN
= nullptr;
216 MachineRegisterInfo
*MRI
= nullptr;
217 MachineDominatorTree
*MDT
= nullptr;
218 MachineLoopInfo
*MLI
= nullptr;
219 BlockSetType Deleted
;
220 const MachineBranchProbabilityInfo
*MBPI
= nullptr;
223 } // end anonymous namespace
225 char HexagonEarlyIfConversion::ID
= 0;
227 INITIALIZE_PASS(HexagonEarlyIfConversion
, "hexagon-early-if",
228 "Hexagon early if conversion", false, false)
230 bool HexagonEarlyIfConversion::isPreheader(const MachineBasicBlock
*B
) const {
231 if (B
->succ_size() != 1)
233 MachineBasicBlock
*SB
= *B
->succ_begin();
234 MachineLoop
*L
= MLI
->getLoopFor(SB
);
235 return L
&& SB
== L
->getHeader() && MDT
->dominates(B
, SB
);
238 bool HexagonEarlyIfConversion::matchFlowPattern(MachineBasicBlock
*B
,
239 MachineLoop
*L
, FlowPattern
&FP
) {
240 LLVM_DEBUG(dbgs() << "Checking flow pattern at " << printMBBReference(*B
)
243 // Interested only in conditional branches, no .new, no new-value, etc.
244 // Check the terminators directly, it's easier than handling all responses
245 // from analyzeBranch.
246 MachineBasicBlock
*TB
= nullptr, *FB
= nullptr;
247 MachineBasicBlock::const_iterator T1I
= B
->getFirstTerminator();
250 unsigned Opc
= T1I
->getOpcode();
251 if (Opc
!= Hexagon::J2_jumpt
&& Opc
!= Hexagon::J2_jumpf
)
253 Register PredR
= T1I
->getOperand(0).getReg();
255 // Get the layout successor, or 0 if B does not have one.
256 MachineFunction::iterator NextBI
= std::next(MachineFunction::iterator(B
));
257 MachineBasicBlock
*NextB
= (NextBI
!= MFN
->end()) ? &*NextBI
: nullptr;
259 MachineBasicBlock
*T1B
= T1I
->getOperand(1).getMBB();
260 MachineBasicBlock::const_iterator T2I
= std::next(T1I
);
261 // The second terminator should be an unconditional branch.
262 assert(T2I
== B
->end() || T2I
->getOpcode() == Hexagon::J2_jump
);
263 MachineBasicBlock
*T2B
= (T2I
== B
->end()) ? NextB
264 : T2I
->getOperand(0).getMBB();
266 // XXX merge if T1B == NextB, or convert branch to unconditional.
267 // mark as diamond with both sides equal?
271 // Record the true/false blocks in such a way that "true" means "if (PredR)",
272 // and "false" means "if (!PredR)".
273 if (Opc
== Hexagon::J2_jumpt
)
278 if (!MDT
->properlyDominates(B
, TB
) || !MDT
->properlyDominates(B
, FB
))
281 // Detect triangle first. In case of a triangle, one of the blocks TB/FB
282 // can fall through into the other, in other words, it will be executed
283 // in both cases. We only want to predicate the block that is executed
285 assert(TB
&& FB
&& "Failed to find triangle control flow blocks");
286 unsigned TNP
= TB
->pred_size(), FNP
= FB
->pred_size();
287 unsigned TNS
= TB
->succ_size(), FNS
= FB
->succ_size();
289 // A block is predicable if it has one predecessor (it must be B), and
290 // it has a single successor. In fact, the block has to end either with
291 // an unconditional branch (which can be predicated), or with a fall-
293 // Also, skip blocks that do not belong to the same loop.
294 bool TOk
= (TNP
== 1 && TNS
== 1 && MLI
->getLoopFor(TB
) == L
);
295 bool FOk
= (FNP
== 1 && FNS
== 1 && MLI
->getLoopFor(FB
) == L
);
297 // If requested (via an option), do not consider branches where the
298 // true and false targets do not belong to the same loop.
299 if (SkipExitBranches
&& MLI
->getLoopFor(TB
) != MLI
->getLoopFor(FB
))
302 // If neither is predicable, there is nothing interesting.
306 MachineBasicBlock
*TSB
= (TNS
> 0) ? *TB
->succ_begin() : nullptr;
307 MachineBasicBlock
*FSB
= (FNS
> 0) ? *FB
->succ_begin() : nullptr;
308 MachineBasicBlock
*JB
= nullptr;
314 // Diamond: "if (P) then TB; else FB;".
322 // !TOk && FOk (at least one must be true by now).
327 // Don't try to predicate loop preheaders.
328 if ((TB
&& isPreheader(TB
)) || (FB
&& isPreheader(FB
))) {
329 LLVM_DEBUG(dbgs() << "One of blocks " << PrintMB(TB
) << ", " << PrintMB(FB
)
330 << " is a loop preheader. Skipping.\n");
334 FP
= FlowPattern(B
, PredR
, TB
, FB
, JB
);
335 LLVM_DEBUG(dbgs() << "Detected " << PrintFP(FP
, *TRI
) << "\n");
339 // KLUDGE: HexagonInstrInfo::analyzeBranch won't work on a block that
340 // contains EH_LABEL.
341 bool HexagonEarlyIfConversion::hasEHLabel(const MachineBasicBlock
*B
) const {
348 // KLUDGE: HexagonInstrInfo::analyzeBranch may be unable to recognize
349 // that a block can never fall-through.
350 bool HexagonEarlyIfConversion::hasUncondBranch(const MachineBasicBlock
*B
)
352 MachineBasicBlock::const_iterator I
= B
->getFirstTerminator(), E
= B
->end();
361 bool HexagonEarlyIfConversion::isValidCandidate(const MachineBasicBlock
*B
)
365 if (B
->isEHPad() || B
->hasAddressTaken())
370 for (auto &MI
: *B
) {
371 if (MI
.isDebugInstr())
373 if (MI
.isConditionalBranch())
375 unsigned Opc
= MI
.getOpcode();
376 bool IsJMP
= (Opc
== Hexagon::J2_jump
);
377 if (!isPredicableStore(&MI
) && !IsJMP
&& !isSafeToSpeculate(&MI
))
379 // Look for predicate registers defined by this instruction. It's ok
380 // to speculate such an instruction, but the predicate register cannot
381 // be used outside of this block (or else it won't be possible to
382 // update the use of it after predication). PHI uses will be updated
383 // to use a result of a MUX, and a MUX cannot be created for predicate
385 for (const MachineOperand
&MO
: MI
.operands()) {
386 if (!MO
.isReg() || !MO
.isDef())
388 Register R
= MO
.getReg();
393 for (const MachineOperand
&U
: MRI
->use_operands(R
))
394 if (U
.getParent()->isPHI())
401 bool HexagonEarlyIfConversion::usesUndefVReg(const MachineInstr
*MI
) const {
402 for (const MachineOperand
&MO
: MI
->operands()) {
403 if (!MO
.isReg() || !MO
.isUse())
405 Register R
= MO
.getReg();
408 const MachineInstr
*DefI
= MRI
->getVRegDef(R
);
409 // "Undefined" virtual registers are actually defined via IMPLICIT_DEF.
410 assert(DefI
&& "Expecting a reaching def in MRI");
411 if (DefI
->isImplicitDef())
417 bool HexagonEarlyIfConversion::isValid(const FlowPattern
&FP
) const {
418 if (hasEHLabel(FP
.SplitB
)) // KLUDGE: see function definition
420 if (FP
.TrueB
&& !isValidCandidate(FP
.TrueB
))
422 if (FP
.FalseB
&& !isValidCandidate(FP
.FalseB
))
424 // Check the PHIs in the join block. If any of them use a register
425 // that is defined as IMPLICIT_DEF, do not convert this. This can
426 // legitimately happen if one side of the split never executes, but
427 // the compiler is unable to prove it. That side may then seem to
428 // provide an "undef" value to the join block, however it will never
429 // execute at run-time. If we convert this case, the "undef" will
430 // be used in a MUX instruction, and that may seem like actually
431 // using an undefined value to other optimizations. This could lead
432 // to trouble further down the optimization stream, cause assertions
435 const MachineBasicBlock
&B
= *FP
.JoinB
;
439 if (usesUndefVReg(&MI
))
441 Register DefR
= MI
.getOperand(0).getReg();
442 if (isPredicate(DefR
))
449 unsigned HexagonEarlyIfConversion::computePhiCost(const MachineBasicBlock
*B
,
450 const FlowPattern
&FP
) const {
451 if (B
->pred_size() < 2)
455 for (const MachineInstr
&MI
: *B
) {
458 // If both incoming blocks are one of the TrueB/FalseB/SplitB, then
459 // a MUX may be needed. Otherwise the PHI will need to be updated at
461 // Find the interesting PHI operands for further checks.
462 SmallVector
<unsigned,2> Inc
;
463 for (unsigned i
= 1, e
= MI
.getNumOperands(); i
!= e
; i
+= 2) {
464 const MachineBasicBlock
*BB
= MI
.getOperand(i
+1).getMBB();
465 if (BB
== FP
.SplitB
|| BB
== FP
.TrueB
|| BB
== FP
.FalseB
)
468 assert(Inc
.size() <= 2);
472 const MachineOperand
&RA
= MI
.getOperand(1);
473 const MachineOperand
&RB
= MI
.getOperand(3);
474 assert(RA
.isReg() && RB
.isReg());
475 // Must have a MUX if the phi uses a subregister.
476 if (RA
.getSubReg() != 0 || RB
.getSubReg() != 0) {
480 const MachineInstr
*Def1
= MRI
->getVRegDef(RA
.getReg());
481 const MachineInstr
*Def3
= MRI
->getVRegDef(RB
.getReg());
482 if (!HII
->isPredicable(*Def1
) || !HII
->isPredicable(*Def3
))
488 unsigned HexagonEarlyIfConversion::countPredicateDefs(
489 const MachineBasicBlock
*B
) const {
490 unsigned PredDefs
= 0;
491 for (auto &MI
: *B
) {
492 for (const MachineOperand
&MO
: MI
.operands()) {
493 if (!MO
.isReg() || !MO
.isDef())
495 Register R
= MO
.getReg();
505 bool HexagonEarlyIfConversion::isProfitable(const FlowPattern
&FP
) const {
506 BranchProbability
JumpProb(1, 10);
507 BranchProbability
Prob(9, 10);
508 if (MBPI
&& FP
.TrueB
&& !FP
.FalseB
&&
509 (MBPI
->getEdgeProbability(FP
.SplitB
, FP
.TrueB
) < JumpProb
||
510 MBPI
->getEdgeProbability(FP
.SplitB
, FP
.TrueB
) > Prob
))
513 if (MBPI
&& !FP
.TrueB
&& FP
.FalseB
&&
514 (MBPI
->getEdgeProbability(FP
.SplitB
, FP
.FalseB
) < JumpProb
||
515 MBPI
->getEdgeProbability(FP
.SplitB
, FP
.FalseB
) > Prob
))
518 if (FP
.TrueB
&& FP
.FalseB
) {
519 // Do not IfCovert if the branch is one sided.
521 if (MBPI
->getEdgeProbability(FP
.SplitB
, FP
.TrueB
) > Prob
)
523 if (MBPI
->getEdgeProbability(FP
.SplitB
, FP
.FalseB
) > Prob
)
527 // If both sides are predicable, convert them if they join, and the
528 // join block has no other predecessors.
529 MachineBasicBlock
*TSB
= *FP
.TrueB
->succ_begin();
530 MachineBasicBlock
*FSB
= *FP
.FalseB
->succ_begin();
533 if (TSB
->pred_size() != 2)
537 // Calculate the total size of the predicated blocks.
538 // Assume instruction counts without branches to be the approximation of
539 // the code size. If the predicated blocks are smaller than a packet size,
540 // approximate the spare room in the packet that could be filled with the
541 // predicated/speculated instructions.
542 auto TotalCount
= [] (const MachineBasicBlock
*B
, unsigned &Spare
) {
545 unsigned T
= std::count_if(B
->begin(), B
->getFirstTerminator(),
546 [](const MachineInstr
&MI
) {
547 return !MI
.isMetaInstruction();
549 if (T
< HEXAGON_PACKET_SIZE
)
550 Spare
+= HEXAGON_PACKET_SIZE
-T
;
554 unsigned TotalIn
= TotalCount(FP
.TrueB
, Spare
) + TotalCount(FP
.FalseB
, Spare
);
556 dbgs() << "Total number of instructions to be predicated/speculated: "
557 << TotalIn
<< ", spare room: " << Spare
<< "\n");
558 if (TotalIn
>= SizeLimit
+Spare
)
561 // Count the number of PHI nodes that will need to be updated (converted
562 // to MUX). Those can be later converted to predicated instructions, so
563 // they aren't always adding extra cost.
564 // KLUDGE: Also, count the number of predicate register definitions in
565 // each block. The scheduler may increase the pressure of these and cause
566 // expensive spills (e.g. bitmnp01).
567 unsigned TotalPh
= 0;
568 unsigned PredDefs
= countPredicateDefs(FP
.SplitB
);
570 TotalPh
= computePhiCost(FP
.JoinB
, FP
);
571 PredDefs
+= countPredicateDefs(FP
.JoinB
);
573 if (FP
.TrueB
&& !FP
.TrueB
->succ_empty()) {
574 MachineBasicBlock
*SB
= *FP
.TrueB
->succ_begin();
575 TotalPh
+= computePhiCost(SB
, FP
);
576 PredDefs
+= countPredicateDefs(SB
);
578 if (FP
.FalseB
&& !FP
.FalseB
->succ_empty()) {
579 MachineBasicBlock
*SB
= *FP
.FalseB
->succ_begin();
580 TotalPh
+= computePhiCost(SB
, FP
);
581 PredDefs
+= countPredicateDefs(SB
);
584 LLVM_DEBUG(dbgs() << "Total number of extra muxes from converted phis: "
586 if (TotalIn
+TotalPh
>= SizeLimit
+Spare
)
589 LLVM_DEBUG(dbgs() << "Total number of predicate registers: " << PredDefs
597 bool HexagonEarlyIfConversion::visitBlock(MachineBasicBlock
*B
,
599 bool Changed
= false;
601 // Visit all dominated blocks from the same loop first, then process B.
602 MachineDomTreeNode
*N
= MDT
->getNode(B
);
604 // We will change CFG/DT during this traversal, so take precautions to
605 // avoid problems related to invalidated iterators. In fact, processing
606 // a child C of B cannot cause another child to be removed, but it can
607 // cause a new child to be added (which was a child of C before C itself
608 // was removed. This new child C, however, would have been processed
609 // prior to processing B, so there is no need to process it again.
610 // Simply keep a list of children of B, and traverse that list.
611 using DTNodeVectType
= SmallVector
<MachineDomTreeNode
*, 4>;
612 DTNodeVectType
Cn(llvm::children
<MachineDomTreeNode
*>(N
));
614 MachineBasicBlock
*SB
= I
->getBlock();
615 if (!Deleted
.count(SB
))
616 Changed
|= visitBlock(SB
, L
);
618 // When walking down the dominator tree, we want to traverse through
619 // blocks from nested (other) loops, because they can dominate blocks
620 // that are in L. Skip the non-L blocks only after the tree traversal.
621 if (MLI
->getLoopFor(B
) != L
)
625 if (!matchFlowPattern(B
, L
, FP
))
629 LLVM_DEBUG(dbgs() << "Conversion is not valid\n");
632 if (!isProfitable(FP
)) {
633 LLVM_DEBUG(dbgs() << "Conversion is not profitable\n");
638 simplifyFlowGraph(FP
);
642 bool HexagonEarlyIfConversion::visitLoop(MachineLoop
*L
) {
643 MachineBasicBlock
*HB
= L
? L
->getHeader() : nullptr;
644 LLVM_DEBUG((L
? dbgs() << "Visiting loop H:" << PrintMB(HB
)
645 : dbgs() << "Visiting function")
647 bool Changed
= false;
649 for (MachineLoop
*I
: *L
)
650 Changed
|= visitLoop(I
);
653 MachineBasicBlock
*EntryB
= GraphTraits
<MachineFunction
*>::getEntryNode(MFN
);
654 Changed
|= visitBlock(L
? HB
: EntryB
, L
);
658 bool HexagonEarlyIfConversion::isPredicableStore(const MachineInstr
*MI
)
660 // HexagonInstrInfo::isPredicable will consider these stores are non-
661 // -predicable if the offset would become constant-extended after
663 unsigned Opc
= MI
->getOpcode();
665 case Hexagon::S2_storerb_io
:
666 case Hexagon::S2_storerbnew_io
:
667 case Hexagon::S2_storerh_io
:
668 case Hexagon::S2_storerhnew_io
:
669 case Hexagon::S2_storeri_io
:
670 case Hexagon::S2_storerinew_io
:
671 case Hexagon::S2_storerd_io
:
672 case Hexagon::S4_storeirb_io
:
673 case Hexagon::S4_storeirh_io
:
674 case Hexagon::S4_storeiri_io
:
678 // TargetInstrInfo::isPredicable takes a non-const pointer.
679 return MI
->mayStore() && HII
->isPredicable(const_cast<MachineInstr
&>(*MI
));
682 bool HexagonEarlyIfConversion::isSafeToSpeculate(const MachineInstr
*MI
)
684 if (MI
->mayLoadOrStore())
686 if (MI
->isCall() || MI
->isBarrier() || MI
->isBranch())
688 if (MI
->hasUnmodeledSideEffects())
690 if (MI
->getOpcode() == TargetOpcode::LIFETIME_END
)
696 bool HexagonEarlyIfConversion::isPredicate(unsigned R
) const {
697 const TargetRegisterClass
*RC
= MRI
->getRegClass(R
);
698 return RC
== &Hexagon::PredRegsRegClass
||
699 RC
== &Hexagon::HvxQRRegClass
;
702 unsigned HexagonEarlyIfConversion::getCondStoreOpcode(unsigned Opc
,
704 return HII
->getCondOpcode(Opc
, !IfTrue
);
707 void HexagonEarlyIfConversion::predicateInstr(MachineBasicBlock
*ToB
,
708 MachineBasicBlock::iterator At
, MachineInstr
*MI
,
709 unsigned PredR
, bool IfTrue
) {
711 if (At
!= ToB
->end())
712 DL
= At
->getDebugLoc();
713 else if (!ToB
->empty())
714 DL
= ToB
->back().getDebugLoc();
716 unsigned Opc
= MI
->getOpcode();
718 if (isPredicableStore(MI
)) {
719 unsigned COpc
= getCondStoreOpcode(Opc
, IfTrue
);
721 MachineInstrBuilder MIB
= BuildMI(*ToB
, At
, DL
, HII
->get(COpc
));
722 MachineInstr::mop_iterator MOI
= MI
->operands_begin();
723 if (HII
->isPostIncrement(*MI
)) {
728 for (const MachineOperand
&MO
: make_range(MOI
, MI
->operands_end()))
731 // Set memory references.
732 MIB
.cloneMemRefs(*MI
);
734 MI
->eraseFromParent();
738 if (Opc
== Hexagon::J2_jump
) {
739 MachineBasicBlock
*TB
= MI
->getOperand(0).getMBB();
740 const MCInstrDesc
&D
= HII
->get(IfTrue
? Hexagon::J2_jumpt
741 : Hexagon::J2_jumpf
);
742 BuildMI(*ToB
, At
, DL
, D
)
745 MI
->eraseFromParent();
749 // Print the offending instruction unconditionally as we are about to
752 llvm_unreachable("Unexpected instruction");
755 // Predicate/speculate non-branch instructions from FromB into block ToB.
756 // Leave the branches alone, they will be handled later. Btw, at this point
757 // FromB should have at most one branch, and it should be unconditional.
758 void HexagonEarlyIfConversion::predicateBlockNB(MachineBasicBlock
*ToB
,
759 MachineBasicBlock::iterator At
, MachineBasicBlock
*FromB
,
760 unsigned PredR
, bool IfTrue
) {
761 LLVM_DEBUG(dbgs() << "Predicating block " << PrintMB(FromB
) << "\n");
762 MachineBasicBlock::iterator End
= FromB
->getFirstTerminator();
763 MachineBasicBlock::iterator I
, NextI
;
765 for (I
= FromB
->begin(); I
!= End
; I
= NextI
) {
767 NextI
= std::next(I
);
768 if (isSafeToSpeculate(&*I
))
769 ToB
->splice(At
, FromB
, I
);
771 predicateInstr(ToB
, At
, &*I
, PredR
, IfTrue
);
775 unsigned HexagonEarlyIfConversion::buildMux(MachineBasicBlock
*B
,
776 MachineBasicBlock::iterator At
, const TargetRegisterClass
*DRC
,
777 unsigned PredR
, unsigned TR
, unsigned TSR
, unsigned FR
, unsigned FSR
) {
779 switch (DRC
->getID()) {
780 case Hexagon::IntRegsRegClassID
:
781 case Hexagon::IntRegsLow8RegClassID
:
782 Opc
= Hexagon::C2_mux
;
784 case Hexagon::DoubleRegsRegClassID
:
785 case Hexagon::GeneralDoubleLow8RegsRegClassID
:
786 Opc
= Hexagon::PS_pselect
;
788 case Hexagon::HvxVRRegClassID
:
789 Opc
= Hexagon::PS_vselect
;
791 case Hexagon::HvxWRRegClassID
:
792 Opc
= Hexagon::PS_wselect
;
795 llvm_unreachable("unexpected register type");
797 const MCInstrDesc
&D
= HII
->get(Opc
);
799 DebugLoc DL
= B
->findBranchDebugLoc();
800 Register MuxR
= MRI
->createVirtualRegister(DRC
);
801 BuildMI(*B
, At
, DL
, D
, MuxR
)
808 void HexagonEarlyIfConversion::updatePhiNodes(MachineBasicBlock
*WhereB
,
809 const FlowPattern
&FP
) {
810 // Visit all PHI nodes in the WhereB block and generate MUX instructions
811 // in the split block. Update the PHI nodes with the values of the MUX.
812 auto NonPHI
= WhereB
->getFirstNonPHI();
813 for (auto I
= WhereB
->begin(); I
!= NonPHI
; ++I
) {
814 MachineInstr
*PN
= &*I
;
815 // Registers and subregisters corresponding to TrueB, FalseB and SplitB.
816 unsigned TR
= 0, TSR
= 0, FR
= 0, FSR
= 0, SR
= 0, SSR
= 0;
817 for (int i
= PN
->getNumOperands()-2; i
> 0; i
-= 2) {
818 const MachineOperand
&RO
= PN
->getOperand(i
), &BO
= PN
->getOperand(i
+1);
819 if (BO
.getMBB() == FP
.SplitB
)
820 SR
= RO
.getReg(), SSR
= RO
.getSubReg();
821 else if (BO
.getMBB() == FP
.TrueB
)
822 TR
= RO
.getReg(), TSR
= RO
.getSubReg();
823 else if (BO
.getMBB() == FP
.FalseB
)
824 FR
= RO
.getReg(), FSR
= RO
.getSubReg();
827 PN
->removeOperand(i
+1);
828 PN
->removeOperand(i
);
836 unsigned MuxR
= 0, MuxSR
= 0;
839 Register DR
= PN
->getOperand(0).getReg();
840 const TargetRegisterClass
*RC
= MRI
->getRegClass(DR
);
841 MuxR
= buildMux(FP
.SplitB
, FP
.SplitB
->getFirstTerminator(), RC
,
842 FP
.PredR
, TR
, TSR
, FR
, FSR
);
851 PN
->addOperand(MachineOperand::CreateReg(MuxR
, false, false, false, false,
852 false, false, MuxSR
));
853 PN
->addOperand(MachineOperand::CreateMBB(FP
.SplitB
));
857 void HexagonEarlyIfConversion::convert(const FlowPattern
&FP
) {
858 MachineBasicBlock
*TSB
= nullptr, *FSB
= nullptr;
859 MachineBasicBlock::iterator OldTI
= FP
.SplitB
->getFirstTerminator();
860 assert(OldTI
!= FP
.SplitB
->end());
861 DebugLoc DL
= OldTI
->getDebugLoc();
864 TSB
= *FP
.TrueB
->succ_begin();
865 predicateBlockNB(FP
.SplitB
, OldTI
, FP
.TrueB
, FP
.PredR
, true);
868 FSB
= *FP
.FalseB
->succ_begin();
869 MachineBasicBlock::iterator At
= FP
.SplitB
->getFirstTerminator();
870 predicateBlockNB(FP
.SplitB
, At
, FP
.FalseB
, FP
.PredR
, false);
873 // Regenerate new terminators in the split block and update the successors.
874 // First, remember any information that may be needed later and remove the
875 // existing terminators/successors from the split block.
876 MachineBasicBlock
*SSB
= nullptr;
877 FP
.SplitB
->erase(OldTI
, FP
.SplitB
->end());
878 while (!FP
.SplitB
->succ_empty()) {
879 MachineBasicBlock
*T
= *FP
.SplitB
->succ_begin();
880 // It's possible that the split block had a successor that is not a pre-
881 // dicated block. This could only happen if there was only one block to
882 // be predicated. Example:
884 // if (p) jump true_b
888 // unrelated2_b: ; can have other predecessors, so it's not "false_b"
890 // true_b: ; only reachable from split_b, can be predicated
893 // Find this successor (SSB) if it exists.
894 if (T
!= FP
.TrueB
&& T
!= FP
.FalseB
) {
898 FP
.SplitB
->removeSuccessor(FP
.SplitB
->succ_begin());
901 // Insert new branches and update the successors of the split block. This
902 // may create unconditional branches to the layout successor, etc., but
903 // that will be cleaned up later. For now, make sure that correct code is
906 assert(!SSB
|| SSB
== FP
.JoinB
);
907 BuildMI(*FP
.SplitB
, FP
.SplitB
->end(), DL
, HII
->get(Hexagon::J2_jump
))
909 FP
.SplitB
->addSuccessor(FP
.JoinB
);
911 bool HasBranch
= false;
913 BuildMI(*FP
.SplitB
, FP
.SplitB
->end(), DL
, HII
->get(Hexagon::J2_jumpt
))
916 FP
.SplitB
->addSuccessor(TSB
);
920 const MCInstrDesc
&D
= HasBranch
? HII
->get(Hexagon::J2_jump
)
921 : HII
->get(Hexagon::J2_jumpf
);
922 MachineInstrBuilder MIB
= BuildMI(*FP
.SplitB
, FP
.SplitB
->end(), DL
, D
);
924 MIB
.addReg(FP
.PredR
);
926 FP
.SplitB
->addSuccessor(FSB
);
929 // This cannot happen if both TSB and FSB are set. [TF]SB are the
930 // successor blocks of the TrueB and FalseB (or null of the TrueB
931 // or FalseB block is null). SSB is the potential successor block
932 // of the SplitB that is neither TrueB nor FalseB.
933 BuildMI(*FP
.SplitB
, FP
.SplitB
->end(), DL
, HII
->get(Hexagon::J2_jump
))
935 FP
.SplitB
->addSuccessor(SSB
);
939 // What is left to do is to update the PHI nodes that could have entries
940 // referring to predicated blocks.
942 updatePhiNodes(FP
.JoinB
, FP
);
945 updatePhiNodes(TSB
, FP
);
947 updatePhiNodes(FSB
, FP
);
948 // Nothing to update in SSB, since SSB's predecessors haven't changed.
952 void HexagonEarlyIfConversion::removeBlock(MachineBasicBlock
*B
) {
953 LLVM_DEBUG(dbgs() << "Removing block " << PrintMB(B
) << "\n");
955 // Transfer the immediate dominator information from B to its descendants.
956 MachineDomTreeNode
*N
= MDT
->getNode(B
);
957 MachineDomTreeNode
*IDN
= N
->getIDom();
959 MachineBasicBlock
*IDB
= IDN
->getBlock();
961 using GTN
= GraphTraits
<MachineDomTreeNode
*>;
962 using DTNodeVectType
= SmallVector
<MachineDomTreeNode
*, 4>;
964 DTNodeVectType
Cn(GTN::child_begin(N
), GTN::child_end(N
));
966 MachineBasicBlock
*SB
= I
->getBlock();
967 MDT
->changeImmediateDominator(SB
, IDB
);
971 while (!B
->succ_empty())
972 B
->removeSuccessor(B
->succ_begin());
974 for (MachineBasicBlock
*Pred
: B
->predecessors())
975 Pred
->removeSuccessor(B
, true);
979 MFN
->erase(B
->getIterator());
982 void HexagonEarlyIfConversion::eliminatePhis(MachineBasicBlock
*B
) {
983 LLVM_DEBUG(dbgs() << "Removing phi nodes from block " << PrintMB(B
) << "\n");
984 MachineBasicBlock::iterator I
, NextI
, NonPHI
= B
->getFirstNonPHI();
985 for (I
= B
->begin(); I
!= NonPHI
; I
= NextI
) {
986 NextI
= std::next(I
);
987 MachineInstr
*PN
= &*I
;
988 assert(PN
->getNumOperands() == 3 && "Invalid phi node");
989 MachineOperand
&UO
= PN
->getOperand(1);
990 Register UseR
= UO
.getReg(), UseSR
= UO
.getSubReg();
991 Register DefR
= PN
->getOperand(0).getReg();
992 unsigned NewR
= UseR
;
994 // MRI.replaceVregUsesWith does not allow to update the subregister,
995 // so instead of doing the use-iteration here, create a copy into a
996 // "non-subregistered" register.
997 const DebugLoc
&DL
= PN
->getDebugLoc();
998 const TargetRegisterClass
*RC
= MRI
->getRegClass(DefR
);
999 NewR
= MRI
->createVirtualRegister(RC
);
1000 NonPHI
= BuildMI(*B
, NonPHI
, DL
, HII
->get(TargetOpcode::COPY
), NewR
)
1001 .addReg(UseR
, 0, UseSR
);
1003 MRI
->replaceRegWith(DefR
, NewR
);
1008 void HexagonEarlyIfConversion::mergeBlocks(MachineBasicBlock
*PredB
,
1009 MachineBasicBlock
*SuccB
) {
1010 LLVM_DEBUG(dbgs() << "Merging blocks " << PrintMB(PredB
) << " and "
1011 << PrintMB(SuccB
) << "\n");
1012 bool TermOk
= hasUncondBranch(SuccB
);
1013 eliminatePhis(SuccB
);
1014 HII
->removeBranch(*PredB
);
1015 PredB
->removeSuccessor(SuccB
);
1016 PredB
->splice(PredB
->end(), SuccB
, SuccB
->begin(), SuccB
->end());
1017 PredB
->transferSuccessorsAndUpdatePHIs(SuccB
);
1018 MachineBasicBlock
*OldLayoutSuccessor
= SuccB
->getNextNode();
1021 PredB
->updateTerminator(OldLayoutSuccessor
);
1024 void HexagonEarlyIfConversion::simplifyFlowGraph(const FlowPattern
&FP
) {
1025 MachineBasicBlock
*OldLayoutSuccessor
= FP
.SplitB
->getNextNode();
1027 removeBlock(FP
.TrueB
);
1029 removeBlock(FP
.FalseB
);
1031 FP
.SplitB
->updateTerminator(OldLayoutSuccessor
);
1032 if (FP
.SplitB
->succ_size() != 1)
1035 MachineBasicBlock
*SB
= *FP
.SplitB
->succ_begin();
1036 if (SB
->pred_size() != 1)
1039 // By now, the split block has only one successor (SB), and SB has only
1040 // one predecessor. We can try to merge them. We will need to update ter-
1041 // minators in FP.Split+SB, and that requires working analyzeBranch, which
1042 // fails on Hexagon for blocks that have EH_LABELs. However, if SB ends
1043 // with an unconditional branch, we won't need to touch the terminators.
1044 if (!hasEHLabel(SB
) || hasUncondBranch(SB
))
1045 mergeBlocks(FP
.SplitB
, SB
);
1048 bool HexagonEarlyIfConversion::runOnMachineFunction(MachineFunction
&MF
) {
1049 if (skipFunction(MF
.getFunction()))
1052 auto &ST
= MF
.getSubtarget
<HexagonSubtarget
>();
1053 HII
= ST
.getInstrInfo();
1054 TRI
= ST
.getRegisterInfo();
1056 MRI
= &MF
.getRegInfo();
1057 MDT
= &getAnalysis
<MachineDominatorTreeWrapperPass
>().getDomTree();
1058 MLI
= &getAnalysis
<MachineLoopInfoWrapperPass
>().getLI();
1059 MBPI
= EnableHexagonBP
1060 ? &getAnalysis
<MachineBranchProbabilityInfoWrapperPass
>().getMBPI()
1064 bool Changed
= false;
1066 for (MachineLoop
*L
: *MLI
)
1067 Changed
|= visitLoop(L
);
1068 Changed
|= visitLoop(nullptr);
1073 //===----------------------------------------------------------------------===//
1074 // Public Constructor Functions
1075 //===----------------------------------------------------------------------===//
1076 FunctionPass
*llvm::createHexagonEarlyIfConversion() {
1077 return new HexagonEarlyIfConversion();