1 //===---- PPCReduceCRLogicals.cpp - Reduce CR Bit Logical operations ------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===---------------------------------------------------------------------===//
10 // This pass aims to reduce the number of logical operations on bits in the CR
11 // register. These instructions have a fairly high latency and only a single
12 // pipeline at their disposal in modern PPC cores. Furthermore, they have a
13 // tendency to occur in fairly small blocks where there's little opportunity
14 // to hide the latency between the CR logical operation and its user.
16 //===---------------------------------------------------------------------===//
18 #include "PPCInstrInfo.h"
20 #include "PPCTargetMachine.h"
21 #include "llvm/CodeGen/MachineFunctionPass.h"
22 #include "llvm/CodeGen/MachineDominators.h"
23 #include "llvm/Support/Debug.h"
24 #include "llvm/ADT/Statistic.h"
28 #define DEBUG_TYPE "ppc-reduce-cr-ops"
29 #include "PPCMachineBasicBlockUtils.h"
31 STATISTIC(NumContainedSingleUseBinOps
,
32 "Number of single-use binary CR logical ops contained in a block");
33 STATISTIC(NumToSplitBlocks
,
34 "Number of binary CR logical ops that can be used to split blocks");
35 STATISTIC(TotalCRLogicals
, "Number of CR logical ops.");
36 STATISTIC(TotalNullaryCRLogicals
,
37 "Number of nullary CR logical ops (CRSET/CRUNSET).");
38 STATISTIC(TotalUnaryCRLogicals
, "Number of unary CR logical ops.");
39 STATISTIC(TotalBinaryCRLogicals
, "Number of CR logical ops.");
40 STATISTIC(NumBlocksSplitOnBinaryCROp
,
41 "Number of blocks split on CR binary logical ops.");
42 STATISTIC(NumNotSplitIdenticalOperands
,
43 "Number of blocks not split due to operands being identical.");
44 STATISTIC(NumNotSplitChainCopies
,
45 "Number of blocks not split due to operands being chained copies.");
46 STATISTIC(NumNotSplitWrongOpcode
,
47 "Number of blocks not split due to the wrong opcode.");
50 void initializePPCReduceCRLogicalsPass(PassRegistry
&);
55 static bool isBinary(MachineInstr
&MI
) {
56 return MI
.getNumOperands() == 3;
59 static bool isNullary(MachineInstr
&MI
) {
60 return MI
.getNumOperands() == 1;
63 /// Given a CR logical operation \p CROp, branch opcode \p BROp as well as
64 /// a flag to indicate if the first operand of \p CROp is used as the
65 /// SplitBefore operand, determines whether either of the branches are to be
66 /// inverted as well as whether the new target should be the original
67 /// fall-through block.
69 computeBranchTargetAndInversion(unsigned CROp
, unsigned BROp
, bool UsingDef1
,
70 bool &InvertNewBranch
, bool &InvertOrigBranch
,
71 bool &TargetIsFallThrough
) {
72 // The conditions under which each of the output operands should be [un]set
73 // can certainly be written much more concisely with just 3 if statements or
74 // ternary expressions. However, this provides a much clearer overview to the
75 // reader as to what is set for each <CROp, BROp, OpUsed> combination.
76 if (BROp
== PPC::BC
|| BROp
== PPC::BCLR
) {
80 llvm_unreachable("Don't know how to handle this CR logical.");
82 InvertNewBranch
= false;
83 InvertOrigBranch
= false;
84 TargetIsFallThrough
= false;
87 InvertNewBranch
= true;
88 InvertOrigBranch
= false;
89 TargetIsFallThrough
= true;
92 InvertNewBranch
= true;
93 InvertOrigBranch
= true;
94 TargetIsFallThrough
= false;
97 InvertNewBranch
= false;
98 InvertOrigBranch
= true;
99 TargetIsFallThrough
= true;
102 InvertNewBranch
= UsingDef1
;
103 InvertOrigBranch
= !UsingDef1
;
104 TargetIsFallThrough
= false;
107 InvertNewBranch
= !UsingDef1
;
108 InvertOrigBranch
= !UsingDef1
;
109 TargetIsFallThrough
= true;
112 } else if (BROp
== PPC::BCn
|| BROp
== PPC::BCLRn
) {
116 llvm_unreachable("Don't know how to handle this CR logical.");
118 InvertNewBranch
= true;
119 InvertOrigBranch
= false;
120 TargetIsFallThrough
= true;
123 InvertNewBranch
= false;
124 InvertOrigBranch
= false;
125 TargetIsFallThrough
= false;
128 InvertNewBranch
= false;
129 InvertOrigBranch
= true;
130 TargetIsFallThrough
= true;
133 InvertNewBranch
= true;
134 InvertOrigBranch
= true;
135 TargetIsFallThrough
= false;
138 InvertNewBranch
= !UsingDef1
;
139 InvertOrigBranch
= !UsingDef1
;
140 TargetIsFallThrough
= true;
143 InvertNewBranch
= UsingDef1
;
144 InvertOrigBranch
= !UsingDef1
;
145 TargetIsFallThrough
= false;
149 llvm_unreachable("Don't know how to handle this branch.");
152 class PPCReduceCRLogicals
: public MachineFunctionPass
{
156 struct CRLogicalOpInfo
{
158 // FIXME: If chains of copies are to be handled, this should be a vector.
159 std::pair
<MachineInstr
*, MachineInstr
*> CopyDefs
;
160 std::pair
<MachineInstr
*, MachineInstr
*> TrueDefs
;
161 unsigned IsBinary
: 1;
162 unsigned IsNullary
: 1;
163 unsigned ContainedInBlock
: 1;
164 unsigned FeedsISEL
: 1;
165 unsigned FeedsBR
: 1;
166 unsigned FeedsLogical
: 1;
167 unsigned SingleUse
: 1;
168 unsigned DefsSingleUse
: 1;
171 CRLogicalOpInfo() : MI(nullptr), IsBinary(0), IsNullary(0),
172 ContainedInBlock(0), FeedsISEL(0), FeedsBR(0),
173 FeedsLogical(0), SingleUse(0), DefsSingleUse(1),
174 SubregDef1(0), SubregDef2(0) { }
179 const PPCInstrInfo
*TII
;
181 MachineRegisterInfo
*MRI
;
182 const MachineBranchProbabilityInfo
*MBPI
;
184 // A vector to contain all the CR logical operations
185 std::vector
<CRLogicalOpInfo
> AllCRLogicalOps
;
186 void initialize(MachineFunction
&MFParm
);
187 void collectCRLogicals();
188 bool handleCROp(CRLogicalOpInfo
&CRI
);
189 bool splitBlockOnBinaryCROp(CRLogicalOpInfo
&CRI
);
190 static bool isCRLogical(MachineInstr
&MI
) {
191 unsigned Opc
= MI
.getOpcode();
192 return Opc
== PPC::CRAND
|| Opc
== PPC::CRNAND
|| Opc
== PPC::CROR
||
193 Opc
== PPC::CRXOR
|| Opc
== PPC::CRNOR
|| Opc
== PPC::CREQV
||
194 Opc
== PPC::CRANDC
|| Opc
== PPC::CRORC
|| Opc
== PPC::CRSET
||
195 Opc
== PPC::CRUNSET
|| Opc
== PPC::CR6SET
|| Opc
== PPC::CR6UNSET
;
197 bool simplifyCode() {
198 bool Changed
= false;
199 // Not using a range-based for loop here as the vector may grow while being
201 for (unsigned i
= 0; i
< AllCRLogicalOps
.size(); i
++)
202 Changed
|= handleCROp(AllCRLogicalOps
[i
]);
207 PPCReduceCRLogicals() : MachineFunctionPass(ID
) {
208 initializePPCReduceCRLogicalsPass(*PassRegistry::getPassRegistry());
211 MachineInstr
*lookThroughCRCopy(unsigned Reg
, unsigned &Subreg
,
212 MachineInstr
*&CpDef
);
213 bool runOnMachineFunction(MachineFunction
&MF
) override
{
214 if (skipFunction(*MF
.getFunction()))
217 // If the subtarget doesn't use CR bits, there's nothing to do.
218 const PPCSubtarget
&STI
= MF
.getSubtarget
<PPCSubtarget
>();
219 if (!STI
.useCRBits())
224 return simplifyCode();
226 CRLogicalOpInfo
createCRLogicalOpInfo(MachineInstr
&MI
);
227 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
228 AU
.addRequired
<MachineBranchProbabilityInfo
>();
229 AU
.addRequired
<MachineDominatorTree
>();
230 MachineFunctionPass::getAnalysisUsage(AU
);
234 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
235 LLVM_DUMP_METHOD
void PPCReduceCRLogicals::CRLogicalOpInfo::dump() {
236 dbgs() << "CRLogicalOpMI: ";
238 dbgs() << "IsBinary: " << IsBinary
<< ", FeedsISEL: " << FeedsISEL
;
239 dbgs() << ", FeedsBR: " << FeedsBR
<< ", FeedsLogical: ";
240 dbgs() << FeedsLogical
<< ", SingleUse: " << SingleUse
;
241 dbgs() << ", DefsSingleUse: " << DefsSingleUse
;
242 dbgs() << ", SubregDef1: " << SubregDef1
<< ", SubregDef2: ";
243 dbgs() << SubregDef2
<< ", ContainedInBlock: " << ContainedInBlock
;
245 dbgs() << "\nDefs:\n";
246 TrueDefs
.first
->dump();
249 TrueDefs
.second
->dump();
251 if (CopyDefs
.first
) {
252 dbgs() << "CopyDef1: ";
253 CopyDefs
.first
->dump();
255 if (CopyDefs
.second
) {
256 dbgs() << "CopyDef2: ";
257 CopyDefs
.second
->dump();
262 PPCReduceCRLogicals::CRLogicalOpInfo
263 PPCReduceCRLogicals::createCRLogicalOpInfo(MachineInstr
&MIParam
) {
267 if (isNullary(MIParam
)) {
269 Ret
.TrueDefs
= std::make_pair(nullptr, nullptr);
270 Ret
.CopyDefs
= std::make_pair(nullptr, nullptr);
272 MachineInstr
*Def1
= lookThroughCRCopy(MIParam
.getOperand(1).getReg(),
273 Ret
.SubregDef1
, Ret
.CopyDefs
.first
);
275 MRI
->hasOneNonDBGUse(Def1
->getOperand(0).getReg());
277 MRI
->hasOneNonDBGUse(Ret
.CopyDefs
.first
->getOperand(0).getReg());
278 assert(Def1
&& "Must be able to find a definition of operand 1.");
279 if (isBinary(MIParam
)) {
281 MachineInstr
*Def2
= lookThroughCRCopy(MIParam
.getOperand(2).getReg(),
283 Ret
.CopyDefs
.second
);
285 MRI
->hasOneNonDBGUse(Def2
->getOperand(0).getReg());
287 MRI
->hasOneNonDBGUse(Ret
.CopyDefs
.second
->getOperand(0).getReg());
288 assert(Def2
&& "Must be able to find a definition of operand 2.");
289 Ret
.TrueDefs
= std::make_pair(Def1
, Def2
);
291 Ret
.TrueDefs
= std::make_pair(Def1
, nullptr);
292 Ret
.CopyDefs
.second
= nullptr;
296 Ret
.ContainedInBlock
= 1;
298 for (MachineInstr
&UseMI
:
299 MRI
->use_nodbg_instructions(MIParam
.getOperand(0).getReg())) {
300 unsigned Opc
= UseMI
.getOpcode();
301 if (Opc
== PPC::ISEL
|| Opc
== PPC::ISEL8
)
303 if (Opc
== PPC::BC
|| Opc
== PPC::BCn
|| Opc
== PPC::BCLR
||
306 Ret
.FeedsLogical
= isCRLogical(UseMI
);
307 if (UseMI
.getParent() != MIParam
.getParent())
308 Ret
.ContainedInBlock
= 0;
310 Ret
.SingleUse
= MRI
->hasOneNonDBGUse(MIParam
.getOperand(0).getReg()) ? 1 : 0;
312 // We now know whether all the uses of the CR logical are in the same block.
313 if (!Ret
.IsNullary
) {
314 Ret
.ContainedInBlock
&=
315 (MIParam
.getParent() == Ret
.TrueDefs
.first
->getParent());
317 Ret
.ContainedInBlock
&=
318 (MIParam
.getParent() == Ret
.TrueDefs
.second
->getParent());
321 if (Ret
.IsBinary
&& Ret
.ContainedInBlock
&& Ret
.SingleUse
) {
322 NumContainedSingleUseBinOps
++;
323 if (Ret
.FeedsBR
&& Ret
.DefsSingleUse
)
329 /// Looks trhough a COPY instruction to the actual definition of the CR-bit
330 /// register and returns the instruction that defines it.
331 /// FIXME: This currently handles what is by-far the most common case:
332 /// an instruction that defines a CR field followed by a single copy of a bit
333 /// from that field into a virtual register. If chains of copies need to be
334 /// handled, this should have a loop until a non-copy instruction is found.
335 MachineInstr
*PPCReduceCRLogicals::lookThroughCRCopy(unsigned Reg
,
337 MachineInstr
*&CpDef
) {
339 if (!TargetRegisterInfo::isVirtualRegister(Reg
))
341 MachineInstr
*Copy
= MRI
->getVRegDef(Reg
);
345 unsigned CopySrc
= Copy
->getOperand(1).getReg();
346 Subreg
= Copy
->getOperand(1).getSubReg();
347 if (!TargetRegisterInfo::isVirtualRegister(CopySrc
)) {
348 const TargetRegisterInfo
*TRI
= &TII
->getRegisterInfo();
350 if (CopySrc
== PPC::CR0EQ
|| CopySrc
== PPC::CR6EQ
)
351 Subreg
= PPC::sub_eq
;
352 if (CopySrc
== PPC::CR0LT
|| CopySrc
== PPC::CR6LT
)
353 Subreg
= PPC::sub_lt
;
354 if (CopySrc
== PPC::CR0GT
|| CopySrc
== PPC::CR6GT
)
355 Subreg
= PPC::sub_gt
;
356 if (CopySrc
== PPC::CR0UN
|| CopySrc
== PPC::CR6UN
)
357 Subreg
= PPC::sub_un
;
358 // Loop backwards and return the first MI that modifies the physical CR Reg.
359 MachineBasicBlock::iterator Me
= Copy
, B
= Copy
->getParent()->begin();
361 if ((--Me
)->modifiesRegister(CopySrc
, TRI
))
365 return MRI
->getVRegDef(CopySrc
);
368 void PPCReduceCRLogicals::initialize(MachineFunction
&MFParam
) {
370 MRI
= &MF
->getRegInfo();
371 TII
= MF
->getSubtarget
<PPCSubtarget
>().getInstrInfo();
372 MBPI
= &getAnalysis
<MachineBranchProbabilityInfo
>();
374 AllCRLogicalOps
.clear();
377 /// Contains all the implemented transformations on CR logical operations.
378 /// For example, a binary CR logical can be used to split a block on its inputs,
379 /// a unary CR logical might be used to change the condition code on a
380 /// comparison feeding it. A nullary CR logical might simply be removable
381 /// if the user of the bit it [un]sets can be transformed.
382 bool PPCReduceCRLogicals::handleCROp(CRLogicalOpInfo
&CRI
) {
383 // We can definitely split a block on the inputs to a binary CR operation
384 // whose defs and (single) use are within the same block.
385 bool Changed
= false;
386 if (CRI
.IsBinary
&& CRI
.ContainedInBlock
&& CRI
.SingleUse
&& CRI
.FeedsBR
&&
388 Changed
= splitBlockOnBinaryCROp(CRI
);
390 NumBlocksSplitOnBinaryCROp
++;
395 /// Splits a block that contains a CR-logical operation that feeds a branch
396 /// and whose operands are produced within the block.
398 /// %vr5<def> = CMPDI %vr2, 0; CRRC:%vr5 G8RC:%vr2
399 /// %vr6<def> = COPY %vr5:sub_eq; CRBITRC:%vr6 CRRC:%vr5
400 /// %vr7<def> = CMPDI %vr3, 0; CRRC:%vr7 G8RC:%vr3
401 /// %vr8<def> = COPY %vr7:sub_eq; CRBITRC:%vr8 CRRC:%vr7
402 /// %vr9<def> = CROR %vr6<kill>, %vr8<kill>; CRBITRC:%vr9,%vr6,%vr8
403 /// BC %vr9<kill>, <BB#2>; CRBITRC:%vr9
405 /// %vr5<def> = CMPDI %vr2, 0; CRRC:%vr5 G8RC:%vr2
406 /// %vr6<def> = COPY %vr5:sub_eq; CRBITRC:%vr6 CRRC:%vr5
407 /// BC %vr6<kill>, <BB#2>; CRBITRC:%vr6
409 /// %vr7<def> = CMPDI %vr3, 0; CRRC:%vr7 G8RC:%vr3
410 /// %vr8<def> = COPY %vr7:sub_eq; CRBITRC:%vr8 CRRC:%vr7
411 /// BC %vr9<kill>, <BB#2>; CRBITRC:%vr9
412 bool PPCReduceCRLogicals::splitBlockOnBinaryCROp(CRLogicalOpInfo
&CRI
) {
413 if (CRI
.CopyDefs
.first
== CRI
.CopyDefs
.second
) {
414 DEBUG(dbgs() << "Unable to split as the two operands are the same\n");
415 NumNotSplitIdenticalOperands
++;
418 if (CRI
.TrueDefs
.first
->isCopy() || CRI
.TrueDefs
.second
->isCopy() ||
419 CRI
.TrueDefs
.first
->isPHI() || CRI
.TrueDefs
.second
->isPHI()) {
420 DEBUG(dbgs() << "Unable to split because one of the operands is a PHI or "
421 "chain of copies.\n");
422 NumNotSplitChainCopies
++;
425 // Note: keep in sync with computeBranchTargetAndInversion().
426 if (CRI
.MI
->getOpcode() != PPC::CROR
&&
427 CRI
.MI
->getOpcode() != PPC::CRAND
&&
428 CRI
.MI
->getOpcode() != PPC::CRNOR
&&
429 CRI
.MI
->getOpcode() != PPC::CRNAND
&&
430 CRI
.MI
->getOpcode() != PPC::CRORC
&&
431 CRI
.MI
->getOpcode() != PPC::CRANDC
) {
432 DEBUG(dbgs() << "Unable to split blocks on this opcode.\n");
433 NumNotSplitWrongOpcode
++;
436 DEBUG(dbgs() << "Splitting the following CR op:\n"; CRI
.dump());
437 MachineBasicBlock::iterator Def1It
= CRI
.TrueDefs
.first
;
438 MachineBasicBlock::iterator Def2It
= CRI
.TrueDefs
.second
;
440 bool UsingDef1
= false;
441 MachineInstr
*SplitBefore
= &*Def2It
;
442 for (auto E
= CRI
.MI
->getParent()->end(); Def2It
!= E
; ++Def2It
) {
443 if (Def1It
== Def2It
) { // Def2 comes before Def1.
444 SplitBefore
= &*Def1It
;
450 DEBUG(dbgs() << "We will split the following block:\n";);
451 DEBUG(CRI
.MI
->getParent()->dump());
452 DEBUG(dbgs() << "Before instruction:\n"; SplitBefore
->dump());
454 // Get the branch instruction.
455 MachineInstr
*Branch
=
456 MRI
->use_nodbg_begin(CRI
.MI
->getOperand(0).getReg())->getParent();
458 // We want the new block to have no code in it other than the definition
459 // of the input to the CR logical and the CR logical itself. So we move
460 // those to the bottom of the block (just before the branch). Then we
461 // will split before the CR logical.
462 MachineBasicBlock
*MBB
= SplitBefore
->getParent();
463 auto FirstTerminator
= MBB
->getFirstTerminator();
464 MachineBasicBlock::iterator FirstInstrToMove
=
465 UsingDef1
? CRI
.TrueDefs
.first
: CRI
.TrueDefs
.second
;
466 MachineBasicBlock::iterator SecondInstrToMove
=
467 UsingDef1
? CRI
.CopyDefs
.first
: CRI
.CopyDefs
.second
;
469 // The instructions that need to be moved are not guaranteed to be
470 // contiguous. Move them individually.
471 // FIXME: If one of the operands is a chain of (single use) copies, they
472 // can all be moved and we can still split.
473 MBB
->splice(FirstTerminator
, MBB
, FirstInstrToMove
);
474 if (FirstInstrToMove
!= SecondInstrToMove
)
475 MBB
->splice(FirstTerminator
, MBB
, SecondInstrToMove
);
476 MBB
->splice(FirstTerminator
, MBB
, CRI
.MI
);
478 unsigned Opc
= CRI
.MI
->getOpcode();
479 bool InvertOrigBranch
, InvertNewBranch
, TargetIsFallThrough
;
480 computeBranchTargetAndInversion(Opc
, Branch
->getOpcode(), UsingDef1
,
481 InvertNewBranch
, InvertOrigBranch
,
482 TargetIsFallThrough
);
483 MachineInstr
*SplitCond
=
484 UsingDef1
? CRI
.CopyDefs
.second
: CRI
.CopyDefs
.first
;
485 DEBUG(dbgs() << "We will " << (InvertNewBranch
? "invert" : "copy"));
486 DEBUG(dbgs() << " the original branch and the target is the " <<
487 (TargetIsFallThrough
? "fallthrough block\n" : "orig. target block\n"));
488 DEBUG(dbgs() << "Original branch instruction: "; Branch
->dump());
489 BlockSplitInfo BSI
{ Branch
, SplitBefore
, SplitCond
, InvertNewBranch
,
490 InvertOrigBranch
, TargetIsFallThrough
, MBPI
, CRI
.MI
,
491 UsingDef1
? CRI
.CopyDefs
.first
: CRI
.CopyDefs
.second
};
492 bool Changed
= splitMBB(BSI
);
493 // If we've split on a CR logical that is fed by a CR logical,
494 // recompute the source CR logical as it may be usable for splitting.
496 bool Input1CRlogical
=
497 CRI
.TrueDefs
.first
&& isCRLogical(*CRI
.TrueDefs
.first
);
498 bool Input2CRlogical
=
499 CRI
.TrueDefs
.second
&& isCRLogical(*CRI
.TrueDefs
.second
);
501 AllCRLogicalOps
.push_back(createCRLogicalOpInfo(*CRI
.TrueDefs
.first
));
503 AllCRLogicalOps
.push_back(createCRLogicalOpInfo(*CRI
.TrueDefs
.second
));
508 void PPCReduceCRLogicals::collectCRLogicals() {
509 for (MachineBasicBlock
&MBB
: *MF
) {
510 for (MachineInstr
&MI
: MBB
) {
511 if (isCRLogical(MI
)) {
512 AllCRLogicalOps
.push_back(createCRLogicalOpInfo(MI
));
514 if (AllCRLogicalOps
.back().IsNullary
)
515 TotalNullaryCRLogicals
++;
516 else if (AllCRLogicalOps
.back().IsBinary
)
517 TotalBinaryCRLogicals
++;
519 TotalUnaryCRLogicals
++;
525 } // end annonymous namespace
527 INITIALIZE_PASS_BEGIN(PPCReduceCRLogicals
, DEBUG_TYPE
,
528 "PowerPC Reduce CR logical Operation", false, false)
529 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree
)
530 INITIALIZE_PASS_END(PPCReduceCRLogicals
, DEBUG_TYPE
,
531 "PowerPC Reduce CR logical Operation", false, false)
533 char PPCReduceCRLogicals::ID
= 0;
535 llvm::createPPCReduceCRLogicalsPass() { return new PPCReduceCRLogicals(); }