1 //===-- AArch64ConditionalCompares.cpp --- CCMP formation for AArch64 -----===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the AArch64ConditionalCompares pass which reduces
10 // branching and code size by using the conditional compare instructions CCMP,
13 // The CFG transformations for forming conditional compares are very similar to
14 // if-conversion, and this pass should run immediately before the early
15 // if-conversion pass.
17 //===----------------------------------------------------------------------===//
20 #include "llvm/ADT/DepthFirstIterator.h"
21 #include "llvm/ADT/SetVector.h"
22 #include "llvm/ADT/SmallPtrSet.h"
23 #include "llvm/ADT/Statistic.h"
24 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
25 #include "llvm/CodeGen/MachineDominators.h"
26 #include "llvm/CodeGen/MachineFunction.h"
27 #include "llvm/CodeGen/MachineFunctionPass.h"
28 #include "llvm/CodeGen/MachineInstrBuilder.h"
29 #include "llvm/CodeGen/MachineLoopInfo.h"
30 #include "llvm/CodeGen/MachineRegisterInfo.h"
31 #include "llvm/CodeGen/MachineTraceMetrics.h"
32 #include "llvm/CodeGen/Passes.h"
33 #include "llvm/CodeGen/TargetInstrInfo.h"
34 #include "llvm/CodeGen/TargetRegisterInfo.h"
35 #include "llvm/CodeGen/TargetSubtargetInfo.h"
36 #include "llvm/Support/CommandLine.h"
37 #include "llvm/Support/Debug.h"
38 #include "llvm/Support/raw_ostream.h"
42 #define DEBUG_TYPE "aarch64-ccmp"
44 // Absolute maximum number of instructions allowed per speculated block.
45 // This bypasses all other heuristics, so it should be set fairly high.
46 static cl::opt
<unsigned> BlockInstrLimit(
47 "aarch64-ccmp-limit", cl::init(30), cl::Hidden
,
48 cl::desc("Maximum number of instructions per speculated block."));
50 // Stress testing mode - disable heuristics.
51 static cl::opt
<bool> Stress("aarch64-stress-ccmp", cl::Hidden
,
52 cl::desc("Turn all knobs to 11"));
54 STATISTIC(NumConsidered
, "Number of ccmps considered");
55 STATISTIC(NumPhiRejs
, "Number of ccmps rejected (PHI)");
56 STATISTIC(NumPhysRejs
, "Number of ccmps rejected (Physregs)");
57 STATISTIC(NumPhi2Rejs
, "Number of ccmps rejected (PHI2)");
58 STATISTIC(NumHeadBranchRejs
, "Number of ccmps rejected (Head branch)");
59 STATISTIC(NumCmpBranchRejs
, "Number of ccmps rejected (CmpBB branch)");
60 STATISTIC(NumCmpTermRejs
, "Number of ccmps rejected (CmpBB is cbz...)");
61 STATISTIC(NumImmRangeRejs
, "Number of ccmps rejected (Imm out of range)");
62 STATISTIC(NumLiveDstRejs
, "Number of ccmps rejected (Cmp dest live)");
63 STATISTIC(NumMultNZCVUses
, "Number of ccmps rejected (NZCV used)");
64 STATISTIC(NumUnknNZCVDefs
, "Number of ccmps rejected (NZCV def unknown)");
66 STATISTIC(NumSpeculateRejs
, "Number of ccmps rejected (Can't speculate)");
68 STATISTIC(NumConverted
, "Number of ccmp instructions created");
69 STATISTIC(NumCompBranches
, "Number of cbz/cbnz branches converted");
71 //===----------------------------------------------------------------------===//
73 //===----------------------------------------------------------------------===//
75 // The SSACCmpConv class performs ccmp-conversion on SSA form machine code
76 // after determining if it is possible. The class contains no heuristics;
77 // external code should be used to determine when ccmp-conversion is a good
80 // CCmp-formation works on a CFG representing chained conditions, typically
81 // from C's short-circuit || and && operators:
83 // From: Head To: Head
93 // The Head block is terminated by a br.cond instruction, and the CmpBB block
94 // contains compare + br.cond. Tail must be a successor of both.
96 // The cmp-conversion turns the compare instruction in CmpBB into a conditional
97 // compare, and merges CmpBB into Head, speculatively executing its
98 // instructions. The AArch64 conditional compare instructions have an immediate
99 // operand that specifies the NZCV flag values when the condition is false and
100 // the compare isn't executed. This makes it possible to chain compares with
101 // different condition codes.
105 // if (a == 5 || b == 17)
122 // ccmp w1, #17, 4, ne ; 4 = nZcv
128 // The ccmp condition code is the one that would cause the Head terminator to
131 // FIXME: It should also be possible to speculate a block on the critical edge
132 // between Head and Tail, just like if-converting a diamond.
134 // FIXME: Handle PHIs in Tail by turning them into selects (if-conversion).
139 const TargetInstrInfo
*TII
;
140 const TargetRegisterInfo
*TRI
;
141 MachineRegisterInfo
*MRI
;
142 const MachineBranchProbabilityInfo
*MBPI
;
145 /// The first block containing a conditional branch, dominating everything
147 MachineBasicBlock
*Head
;
149 /// The block containing cmp+br.cond with a successor shared with Head.
150 MachineBasicBlock
*CmpBB
;
152 /// The common successor for Head and CmpBB.
153 MachineBasicBlock
*Tail
;
155 /// The compare instruction in CmpBB that can be converted to a ccmp.
159 /// The branch condition in Head as determined by AnalyzeBranch.
160 SmallVector
<MachineOperand
, 4> HeadCond
;
162 /// The condition code that makes Head branch to CmpBB.
163 AArch64CC::CondCode HeadCmpBBCC
;
165 /// The branch condition in CmpBB.
166 SmallVector
<MachineOperand
, 4> CmpBBCond
;
168 /// The condition code that makes CmpBB branch to Tail.
169 AArch64CC::CondCode CmpBBTailCC
;
171 /// Check if the Tail PHIs are trivially convertible.
172 bool trivialTailPHIs();
174 /// Remove CmpBB from the Tail PHIs.
175 void updateTailPHIs();
177 /// Check if an operand defining DstReg is dead.
178 bool isDeadDef(unsigned DstReg
);
180 /// Find the compare instruction in MBB that controls the conditional branch.
181 /// Return NULL if a convertible instruction can't be found.
182 MachineInstr
*findConvertibleCompare(MachineBasicBlock
*MBB
);
184 /// Return true if all non-terminator instructions in MBB can be safely
186 bool canSpeculateInstrs(MachineBasicBlock
*MBB
, const MachineInstr
*CmpMI
);
189 /// runOnMachineFunction - Initialize per-function data structures.
190 void runOnMachineFunction(MachineFunction
&MF
,
191 const MachineBranchProbabilityInfo
*MBPI
) {
194 TII
= MF
.getSubtarget().getInstrInfo();
195 TRI
= MF
.getSubtarget().getRegisterInfo();
196 MRI
= &MF
.getRegInfo();
199 /// If the sub-CFG headed by MBB can be cmp-converted, initialize the
200 /// internal state, and return true.
201 bool canConvert(MachineBasicBlock
*MBB
);
203 /// Cmo-convert the last block passed to canConvertCmp(), assuming
204 /// it is possible. Add any erased blocks to RemovedBlocks.
205 void convert(SmallVectorImpl
<MachineBasicBlock
*> &RemovedBlocks
);
207 /// Return the expected code size delta if the conversion into a
208 /// conditional compare is performed.
209 int expectedCodeSizeDelta() const;
211 } // end anonymous namespace
213 // Check that all PHIs in Tail are selecting the same value from Head and CmpBB.
214 // This means that no if-conversion is required when merging CmpBB into Head.
215 bool SSACCmpConv::trivialTailPHIs() {
216 for (auto &I
: *Tail
) {
219 unsigned HeadReg
= 0, CmpBBReg
= 0;
220 // PHI operands come in (VReg, MBB) pairs.
221 for (unsigned oi
= 1, oe
= I
.getNumOperands(); oi
!= oe
; oi
+= 2) {
222 MachineBasicBlock
*MBB
= I
.getOperand(oi
+ 1).getMBB();
223 unsigned Reg
= I
.getOperand(oi
).getReg();
225 assert((!HeadReg
|| HeadReg
== Reg
) && "Inconsistent PHI operands");
229 assert((!CmpBBReg
|| CmpBBReg
== Reg
) && "Inconsistent PHI operands");
233 if (HeadReg
!= CmpBBReg
)
239 // Assuming that trivialTailPHIs() is true, update the Tail PHIs by simply
240 // removing the CmpBB operands. The Head operands will be identical.
241 void SSACCmpConv::updateTailPHIs() {
242 for (auto &I
: *Tail
) {
245 // I is a PHI. It can have multiple entries for CmpBB.
246 for (unsigned oi
= I
.getNumOperands(); oi
> 2; oi
-= 2) {
247 // PHI operands are (Reg, MBB) at (oi-2, oi-1).
248 if (I
.getOperand(oi
- 1).getMBB() == CmpBB
) {
249 I
.RemoveOperand(oi
- 1);
250 I
.RemoveOperand(oi
- 2);
256 // This pass runs before the AArch64DeadRegisterDefinitions pass, so compares
257 // are still writing virtual registers without any uses.
258 bool SSACCmpConv::isDeadDef(unsigned DstReg
) {
259 // Writes to the zero register are dead.
260 if (DstReg
== AArch64::WZR
|| DstReg
== AArch64::XZR
)
262 if (!TargetRegisterInfo::isVirtualRegister(DstReg
))
264 // A virtual register def without any uses will be marked dead later, and
265 // eventually replaced by the zero register.
266 return MRI
->use_nodbg_empty(DstReg
);
269 // Parse a condition code returned by AnalyzeBranch, and compute the CondCode
270 // corresponding to TBB.
272 static bool parseCond(ArrayRef
<MachineOperand
> Cond
, AArch64CC::CondCode
&CC
) {
273 // A normal br.cond simply has the condition code.
274 if (Cond
[0].getImm() != -1) {
275 assert(Cond
.size() == 1 && "Unknown Cond array format");
276 CC
= (AArch64CC::CondCode
)(int)Cond
[0].getImm();
279 // For tbz and cbz instruction, the opcode is next.
280 switch (Cond
[1].getImm()) {
282 // This includes tbz / tbnz branches which can't be converted to
287 assert(Cond
.size() == 3 && "Unknown Cond array format");
292 assert(Cond
.size() == 3 && "Unknown Cond array format");
298 MachineInstr
*SSACCmpConv::findConvertibleCompare(MachineBasicBlock
*MBB
) {
299 MachineBasicBlock::iterator I
= MBB
->getFirstTerminator();
302 // The terminator must be controlled by the flags.
303 if (!I
->readsRegister(AArch64::NZCV
)) {
304 switch (I
->getOpcode()) {
309 // These can be converted into a ccmp against #0.
313 LLVM_DEBUG(dbgs() << "Flags not used by terminator: " << *I
);
317 // Now find the instruction controlling the terminator.
318 for (MachineBasicBlock::iterator B
= MBB
->begin(); I
!= B
;) {
320 assert(!I
->isTerminator() && "Spurious terminator");
321 switch (I
->getOpcode()) {
322 // cmp is an alias for subs with a dead destination register.
323 case AArch64::SUBSWri
:
324 case AArch64::SUBSXri
:
325 // cmn is an alias for adds with a dead destination register.
326 case AArch64::ADDSWri
:
327 case AArch64::ADDSXri
:
328 // Check that the immediate operand is within range, ccmp wants a uimm5.
329 // Rd = SUBSri Rn, imm, shift
330 if (I
->getOperand(3).getImm() || !isUInt
<5>(I
->getOperand(2).getImm())) {
331 LLVM_DEBUG(dbgs() << "Immediate out of range for ccmp: " << *I
);
336 case AArch64::SUBSWrr
:
337 case AArch64::SUBSXrr
:
338 case AArch64::ADDSWrr
:
339 case AArch64::ADDSXrr
:
340 if (isDeadDef(I
->getOperand(0).getReg()))
342 LLVM_DEBUG(dbgs() << "Can't convert compare with live destination: "
346 case AArch64::FCMPSrr
:
347 case AArch64::FCMPDrr
:
348 case AArch64::FCMPESrr
:
349 case AArch64::FCMPEDrr
:
353 // Check for flag reads and clobbers.
354 MIOperands::PhysRegInfo PRI
=
355 MIOperands(*I
).analyzePhysReg(AArch64::NZCV
, TRI
);
358 // The ccmp doesn't produce exactly the same flags as the original
359 // compare, so reject the transform if there are uses of the flags
360 // besides the terminators.
361 LLVM_DEBUG(dbgs() << "Can't create ccmp with multiple uses: " << *I
);
366 if (PRI
.Defined
|| PRI
.Clobbered
) {
367 LLVM_DEBUG(dbgs() << "Not convertible compare: " << *I
);
372 LLVM_DEBUG(dbgs() << "Flags not defined in " << printMBBReference(*MBB
)
377 /// Determine if all the instructions in MBB can safely
378 /// be speculated. The terminators are not considered.
380 /// Only CmpMI is allowed to clobber the flags.
382 bool SSACCmpConv::canSpeculateInstrs(MachineBasicBlock
*MBB
,
383 const MachineInstr
*CmpMI
) {
384 // Reject any live-in physregs. It's probably NZCV/EFLAGS, and very hard to
386 if (!MBB
->livein_empty()) {
387 LLVM_DEBUG(dbgs() << printMBBReference(*MBB
) << " has live-ins.\n");
391 unsigned InstrCount
= 0;
393 // Check all instructions, except the terminators. It is assumed that
394 // terminators never have side effects or define any used register values.
395 for (auto &I
: make_range(MBB
->begin(), MBB
->getFirstTerminator())) {
396 if (I
.isDebugInstr())
399 if (++InstrCount
> BlockInstrLimit
&& !Stress
) {
400 LLVM_DEBUG(dbgs() << printMBBReference(*MBB
) << " has more than "
401 << BlockInstrLimit
<< " instructions.\n");
405 // There shouldn't normally be any phis in a single-predecessor block.
407 LLVM_DEBUG(dbgs() << "Can't hoist: " << I
);
411 // Don't speculate loads. Note that it may be possible and desirable to
412 // speculate GOT or constant pool loads that are guaranteed not to trap,
413 // but we don't support that for now.
415 LLVM_DEBUG(dbgs() << "Won't speculate load: " << I
);
419 // We never speculate stores, so an AA pointer isn't necessary.
420 bool DontMoveAcrossStore
= true;
421 if (!I
.isSafeToMove(nullptr, DontMoveAcrossStore
)) {
422 LLVM_DEBUG(dbgs() << "Can't speculate: " << I
);
426 // Only CmpMI is allowed to clobber the flags.
427 if (&I
!= CmpMI
&& I
.modifiesRegister(AArch64::NZCV
, TRI
)) {
428 LLVM_DEBUG(dbgs() << "Clobbers flags: " << I
);
435 /// Analyze the sub-cfg rooted in MBB, and return true if it is a potential
436 /// candidate for cmp-conversion. Fill out the internal state.
438 bool SSACCmpConv::canConvert(MachineBasicBlock
*MBB
) {
440 Tail
= CmpBB
= nullptr;
442 if (Head
->succ_size() != 2)
444 MachineBasicBlock
*Succ0
= Head
->succ_begin()[0];
445 MachineBasicBlock
*Succ1
= Head
->succ_begin()[1];
447 // CmpBB can only have a single predecessor. Tail is allowed many.
448 if (Succ0
->pred_size() != 1)
449 std::swap(Succ0
, Succ1
);
451 // Succ0 is our candidate for CmpBB.
452 if (Succ0
->pred_size() != 1 || Succ0
->succ_size() != 2)
458 if (!CmpBB
->isSuccessor(Tail
))
461 // The CFG topology checks out.
462 LLVM_DEBUG(dbgs() << "\nTriangle: " << printMBBReference(*Head
) << " -> "
463 << printMBBReference(*CmpBB
) << " -> "
464 << printMBBReference(*Tail
) << '\n');
467 // Tail is allowed to have many predecessors, but we can't handle PHIs yet.
469 // FIXME: Real PHIs could be if-converted as long as the CmpBB values are
470 // defined before The CmpBB cmp clobbers the flags. Alternatively, it should
471 // always be safe to sink the ccmp down to immediately before the CmpBB
473 if (!trivialTailPHIs()) {
474 LLVM_DEBUG(dbgs() << "Can't handle phis in Tail.\n");
479 if (!Tail
->livein_empty()) {
480 LLVM_DEBUG(dbgs() << "Can't handle live-in physregs in Tail.\n");
485 // CmpBB should never have PHIs since Head is its only predecessor.
486 // FIXME: Clean them up if it happens.
487 if (!CmpBB
->empty() && CmpBB
->front().isPHI()) {
488 LLVM_DEBUG(dbgs() << "Can't handle phis in CmpBB.\n");
493 if (!CmpBB
->livein_empty()) {
494 LLVM_DEBUG(dbgs() << "Can't handle live-in physregs in CmpBB.\n");
499 // The branch we're looking to eliminate must be analyzable.
501 MachineBasicBlock
*TBB
= nullptr, *FBB
= nullptr;
502 if (TII
->analyzeBranch(*Head
, TBB
, FBB
, HeadCond
)) {
503 LLVM_DEBUG(dbgs() << "Head branch not analyzable.\n");
508 // This is weird, probably some sort of degenerate CFG, or an edge to a
510 if (!TBB
|| HeadCond
.empty()) {
512 dbgs() << "AnalyzeBranch didn't find conditional branch in Head.\n");
517 if (!parseCond(HeadCond
, HeadCmpBBCC
)) {
518 LLVM_DEBUG(dbgs() << "Unsupported branch type on Head\n");
523 // Make sure the branch direction is right.
525 assert(TBB
== Tail
&& "Unexpected TBB");
526 HeadCmpBBCC
= AArch64CC::getInvertedCondCode(HeadCmpBBCC
);
531 if (TII
->analyzeBranch(*CmpBB
, TBB
, FBB
, CmpBBCond
)) {
532 LLVM_DEBUG(dbgs() << "CmpBB branch not analyzable.\n");
537 if (!TBB
|| CmpBBCond
.empty()) {
539 dbgs() << "AnalyzeBranch didn't find conditional branch in CmpBB.\n");
544 if (!parseCond(CmpBBCond
, CmpBBTailCC
)) {
545 LLVM_DEBUG(dbgs() << "Unsupported branch type on CmpBB\n");
551 CmpBBTailCC
= AArch64CC::getInvertedCondCode(CmpBBTailCC
);
553 LLVM_DEBUG(dbgs() << "Head->CmpBB on "
554 << AArch64CC::getCondCodeName(HeadCmpBBCC
)
555 << ", CmpBB->Tail on "
556 << AArch64CC::getCondCodeName(CmpBBTailCC
) << '\n');
558 CmpMI
= findConvertibleCompare(CmpBB
);
562 if (!canSpeculateInstrs(CmpBB
, CmpMI
)) {
569 void SSACCmpConv::convert(SmallVectorImpl
<MachineBasicBlock
*> &RemovedBlocks
) {
570 LLVM_DEBUG(dbgs() << "Merging " << printMBBReference(*CmpBB
) << " into "
571 << printMBBReference(*Head
) << ":\n"
574 // All CmpBB instructions are moved into Head, and CmpBB is deleted.
575 // Update the CFG first.
578 // Save successor probabilties before removing CmpBB and Tail from their
580 BranchProbability Head2CmpBB
= MBPI
->getEdgeProbability(Head
, CmpBB
);
581 BranchProbability CmpBB2Tail
= MBPI
->getEdgeProbability(CmpBB
, Tail
);
583 Head
->removeSuccessor(CmpBB
);
584 CmpBB
->removeSuccessor(Tail
);
586 // If Head and CmpBB had successor probabilties, udpate the probabilities to
587 // reflect the ccmp-conversion.
588 if (Head
->hasSuccessorProbabilities() && CmpBB
->hasSuccessorProbabilities()) {
590 // Head is allowed two successors. We've removed CmpBB, so the remaining
591 // successor is Tail. We need to increase the successor probability for
592 // Tail to account for the CmpBB path we removed.
594 // Pr(Tail|Head) += Pr(CmpBB|Head) * Pr(Tail|CmpBB).
595 assert(*Head
->succ_begin() == Tail
&& "Head successor is not Tail");
596 BranchProbability Head2Tail
= MBPI
->getEdgeProbability(Head
, Tail
);
597 Head
->setSuccProbability(Head
->succ_begin(),
598 Head2Tail
+ Head2CmpBB
* CmpBB2Tail
);
600 // We will transfer successors of CmpBB to Head in a moment without
601 // normalizing the successor probabilities. Set the successor probabilites
604 // Pr(I|Head) = Pr(CmpBB|Head) * Pr(I|CmpBB).
605 for (auto I
= CmpBB
->succ_begin(), E
= CmpBB
->succ_end(); I
!= E
; ++I
) {
606 BranchProbability CmpBB2I
= MBPI
->getEdgeProbability(CmpBB
, *I
);
607 CmpBB
->setSuccProbability(I
, Head2CmpBB
* CmpBB2I
);
611 Head
->transferSuccessorsAndUpdatePHIs(CmpBB
);
612 DebugLoc TermDL
= Head
->getFirstTerminator()->getDebugLoc();
613 TII
->removeBranch(*Head
);
615 // If the Head terminator was one of the cbz / tbz branches with built-in
616 // compare, we need to insert an explicit compare instruction in its place.
617 if (HeadCond
[0].getImm() == -1) {
620 switch (HeadCond
[1].getImm()) {
623 Opc
= AArch64::SUBSWri
;
627 Opc
= AArch64::SUBSXri
;
630 llvm_unreachable("Cannot convert Head branch");
632 const MCInstrDesc
&MCID
= TII
->get(Opc
);
633 // Create a dummy virtual register for the SUBS def.
635 MRI
->createVirtualRegister(TII
->getRegClass(MCID
, 0, TRI
, *MF
));
636 // Insert a SUBS Rn, #0 instruction instead of the cbz / cbnz.
637 BuildMI(*Head
, Head
->end(), TermDL
, MCID
)
638 .addReg(DestReg
, RegState::Define
| RegState::Dead
)
642 // SUBS uses the GPR*sp register classes.
643 MRI
->constrainRegClass(HeadCond
[2].getReg(),
644 TII
->getRegClass(MCID
, 1, TRI
, *MF
));
647 Head
->splice(Head
->end(), CmpBB
, CmpBB
->begin(), CmpBB
->end());
649 // Now replace CmpMI with a ccmp instruction that also considers the incoming
652 unsigned FirstOp
= 1; // First CmpMI operand to copy.
653 bool isZBranch
= false; // CmpMI is a cbz/cbnz instruction.
654 switch (CmpMI
->getOpcode()) {
656 llvm_unreachable("Unknown compare opcode");
657 case AArch64::SUBSWri
: Opc
= AArch64::CCMPWi
; break;
658 case AArch64::SUBSWrr
: Opc
= AArch64::CCMPWr
; break;
659 case AArch64::SUBSXri
: Opc
= AArch64::CCMPXi
; break;
660 case AArch64::SUBSXrr
: Opc
= AArch64::CCMPXr
; break;
661 case AArch64::ADDSWri
: Opc
= AArch64::CCMNWi
; break;
662 case AArch64::ADDSWrr
: Opc
= AArch64::CCMNWr
; break;
663 case AArch64::ADDSXri
: Opc
= AArch64::CCMNXi
; break;
664 case AArch64::ADDSXrr
: Opc
= AArch64::CCMNXr
; break;
665 case AArch64::FCMPSrr
: Opc
= AArch64::FCCMPSrr
; FirstOp
= 0; break;
666 case AArch64::FCMPDrr
: Opc
= AArch64::FCCMPDrr
; FirstOp
= 0; break;
667 case AArch64::FCMPESrr
: Opc
= AArch64::FCCMPESrr
; FirstOp
= 0; break;
668 case AArch64::FCMPEDrr
: Opc
= AArch64::FCCMPEDrr
; FirstOp
= 0; break;
671 Opc
= AArch64::CCMPWi
;
677 Opc
= AArch64::CCMPXi
;
683 // The ccmp instruction should set the flags according to the comparison when
684 // Head would have branched to CmpBB.
685 // The NZCV immediate operand should provide flags for the case where Head
686 // would have branched to Tail. These flags should cause the new Head
687 // terminator to branch to tail.
688 unsigned NZCV
= AArch64CC::getNZCVToSatisfyCondCode(CmpBBTailCC
);
689 const MCInstrDesc
&MCID
= TII
->get(Opc
);
690 MRI
->constrainRegClass(CmpMI
->getOperand(FirstOp
).getReg(),
691 TII
->getRegClass(MCID
, 0, TRI
, *MF
));
692 if (CmpMI
->getOperand(FirstOp
+ 1).isReg())
693 MRI
->constrainRegClass(CmpMI
->getOperand(FirstOp
+ 1).getReg(),
694 TII
->getRegClass(MCID
, 1, TRI
, *MF
));
695 MachineInstrBuilder MIB
= BuildMI(*Head
, CmpMI
, CmpMI
->getDebugLoc(), MCID
)
696 .add(CmpMI
->getOperand(FirstOp
)); // Register Rn
698 MIB
.addImm(0); // cbz/cbnz Rn -> ccmp Rn, #0
700 MIB
.add(CmpMI
->getOperand(FirstOp
+ 1)); // Register Rm / Immediate
701 MIB
.addImm(NZCV
).addImm(HeadCmpBBCC
);
703 // If CmpMI was a terminator, we need a new conditional branch to replace it.
704 // This now becomes a Head terminator.
706 bool isNZ
= CmpMI
->getOpcode() == AArch64::CBNZW
||
707 CmpMI
->getOpcode() == AArch64::CBNZX
;
708 BuildMI(*Head
, CmpMI
, CmpMI
->getDebugLoc(), TII
->get(AArch64::Bcc
))
709 .addImm(isNZ
? AArch64CC::NE
: AArch64CC::EQ
)
710 .add(CmpMI
->getOperand(1)); // Branch target.
712 CmpMI
->eraseFromParent();
713 Head
->updateTerminator();
715 RemovedBlocks
.push_back(CmpBB
);
716 CmpBB
->eraseFromParent();
717 LLVM_DEBUG(dbgs() << "Result:\n" << *Head
);
721 int SSACCmpConv::expectedCodeSizeDelta() const {
723 // If the Head terminator was one of the cbz / tbz branches with built-in
724 // compare, we need to insert an explicit compare instruction in its place
725 // plus a branch instruction.
726 if (HeadCond
[0].getImm() == -1) {
727 switch (HeadCond
[1].getImm()) {
732 // Therefore delta += 1
736 llvm_unreachable("Cannot convert Head branch");
739 // If the Cmp terminator was one of the cbz / tbz branches with
740 // built-in compare, it will be turned into a compare instruction
741 // into Head, but we do not save any instruction.
742 // Otherwise, we save the branch instruction.
743 switch (CmpMI
->getOpcode()) {
756 //===----------------------------------------------------------------------===//
757 // AArch64ConditionalCompares Pass
758 //===----------------------------------------------------------------------===//
761 class AArch64ConditionalCompares
: public MachineFunctionPass
{
762 const MachineBranchProbabilityInfo
*MBPI
;
763 const TargetInstrInfo
*TII
;
764 const TargetRegisterInfo
*TRI
;
765 MCSchedModel SchedModel
;
766 // Does the proceeded function has Oz attribute.
768 MachineRegisterInfo
*MRI
;
769 MachineDominatorTree
*DomTree
;
770 MachineLoopInfo
*Loops
;
771 MachineTraceMetrics
*Traces
;
772 MachineTraceMetrics::Ensemble
*MinInstr
;
777 AArch64ConditionalCompares() : MachineFunctionPass(ID
) {
778 initializeAArch64ConditionalComparesPass(*PassRegistry::getPassRegistry());
780 void getAnalysisUsage(AnalysisUsage
&AU
) const override
;
781 bool runOnMachineFunction(MachineFunction
&MF
) override
;
782 StringRef
getPassName() const override
{
783 return "AArch64 Conditional Compares";
787 bool tryConvert(MachineBasicBlock
*);
788 void updateDomTree(ArrayRef
<MachineBasicBlock
*> Removed
);
789 void updateLoops(ArrayRef
<MachineBasicBlock
*> Removed
);
790 void invalidateTraces();
791 bool shouldConvert();
793 } // end anonymous namespace
795 char AArch64ConditionalCompares::ID
= 0;
797 INITIALIZE_PASS_BEGIN(AArch64ConditionalCompares
, "aarch64-ccmp",
798 "AArch64 CCMP Pass", false, false)
799 INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo
)
800 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree
)
801 INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics
)
802 INITIALIZE_PASS_END(AArch64ConditionalCompares
, "aarch64-ccmp",
803 "AArch64 CCMP Pass", false, false)
805 FunctionPass
*llvm::createAArch64ConditionalCompares() {
806 return new AArch64ConditionalCompares();
809 void AArch64ConditionalCompares::getAnalysisUsage(AnalysisUsage
&AU
) const {
810 AU
.addRequired
<MachineBranchProbabilityInfo
>();
811 AU
.addRequired
<MachineDominatorTree
>();
812 AU
.addPreserved
<MachineDominatorTree
>();
813 AU
.addRequired
<MachineLoopInfo
>();
814 AU
.addPreserved
<MachineLoopInfo
>();
815 AU
.addRequired
<MachineTraceMetrics
>();
816 AU
.addPreserved
<MachineTraceMetrics
>();
817 MachineFunctionPass::getAnalysisUsage(AU
);
820 /// Update the dominator tree after if-conversion erased some blocks.
821 void AArch64ConditionalCompares::updateDomTree(
822 ArrayRef
<MachineBasicBlock
*> Removed
) {
823 // convert() removes CmpBB which was previously dominated by Head.
824 // CmpBB children should be transferred to Head.
825 MachineDomTreeNode
*HeadNode
= DomTree
->getNode(CmpConv
.Head
);
826 for (MachineBasicBlock
*RemovedMBB
: Removed
) {
827 MachineDomTreeNode
*Node
= DomTree
->getNode(RemovedMBB
);
828 assert(Node
!= HeadNode
&& "Cannot erase the head node");
829 assert(Node
->getIDom() == HeadNode
&& "CmpBB should be dominated by Head");
830 while (Node
->getNumChildren())
831 DomTree
->changeImmediateDominator(Node
->getChildren().back(), HeadNode
);
832 DomTree
->eraseNode(RemovedMBB
);
836 /// Update LoopInfo after if-conversion.
838 AArch64ConditionalCompares::updateLoops(ArrayRef
<MachineBasicBlock
*> Removed
) {
841 for (MachineBasicBlock
*RemovedMBB
: Removed
)
842 Loops
->removeBlock(RemovedMBB
);
845 /// Invalidate MachineTraceMetrics before if-conversion.
846 void AArch64ConditionalCompares::invalidateTraces() {
847 Traces
->invalidate(CmpConv
.Head
);
848 Traces
->invalidate(CmpConv
.CmpBB
);
851 /// Apply cost model and heuristics to the if-conversion in IfConv.
852 /// Return true if the conversion is a good idea.
854 bool AArch64ConditionalCompares::shouldConvert() {
855 // Stress testing mode disables all cost considerations.
859 MinInstr
= Traces
->getEnsemble(MachineTraceMetrics::TS_MinInstrCount
);
861 // Head dominates CmpBB, so it is always included in its trace.
862 MachineTraceMetrics::Trace Trace
= MinInstr
->getTrace(CmpConv
.CmpBB
);
864 // If code size is the main concern
866 int CodeSizeDelta
= CmpConv
.expectedCodeSizeDelta();
867 LLVM_DEBUG(dbgs() << "Code size delta: " << CodeSizeDelta
<< '\n');
868 // If we are minimizing the code size, do the conversion whatever
870 if (CodeSizeDelta
< 0)
872 if (CodeSizeDelta
> 0) {
873 LLVM_DEBUG(dbgs() << "Code size is increasing, give up on this one.\n");
876 // CodeSizeDelta == 0, continue with the regular heuristics
879 // Heuristic: The compare conversion delays the execution of the branch
880 // instruction because we must wait for the inputs to the second compare as
881 // well. The branch has no dependent instructions, but delaying it increases
882 // the cost of a misprediction.
884 // Set a limit on the delay we will accept.
885 unsigned DelayLimit
= SchedModel
.MispredictPenalty
* 3 / 4;
887 // Instruction depths can be computed for all trace instructions above CmpBB.
889 Trace
.getInstrCycles(*CmpConv
.Head
->getFirstTerminator()).Depth
;
890 unsigned CmpBBDepth
=
891 Trace
.getInstrCycles(*CmpConv
.CmpBB
->getFirstTerminator()).Depth
;
892 LLVM_DEBUG(dbgs() << "Head depth: " << HeadDepth
893 << "\nCmpBB depth: " << CmpBBDepth
<< '\n');
894 if (CmpBBDepth
> HeadDepth
+ DelayLimit
) {
895 LLVM_DEBUG(dbgs() << "Branch delay would be larger than " << DelayLimit
900 // Check the resource depth at the bottom of CmpBB - these instructions will
902 unsigned ResDepth
= Trace
.getResourceDepth(true);
903 LLVM_DEBUG(dbgs() << "Resources: " << ResDepth
<< '\n');
905 // Heuristic: The speculatively executed instructions must all be able to
906 // merge into the Head block. The Head critical path should dominate the
907 // resource cost of the speculated instructions.
908 if (ResDepth
> HeadDepth
) {
909 LLVM_DEBUG(dbgs() << "Too many instructions to speculate.\n");
915 bool AArch64ConditionalCompares::tryConvert(MachineBasicBlock
*MBB
) {
916 bool Changed
= false;
917 while (CmpConv
.canConvert(MBB
) && shouldConvert()) {
919 SmallVector
<MachineBasicBlock
*, 4> RemovedBlocks
;
920 CmpConv
.convert(RemovedBlocks
);
922 updateDomTree(RemovedBlocks
);
923 updateLoops(RemovedBlocks
);
928 bool AArch64ConditionalCompares::runOnMachineFunction(MachineFunction
&MF
) {
929 LLVM_DEBUG(dbgs() << "********** AArch64 Conditional Compares **********\n"
930 << "********** Function: " << MF
.getName() << '\n');
931 if (skipFunction(MF
.getFunction()))
934 TII
= MF
.getSubtarget().getInstrInfo();
935 TRI
= MF
.getSubtarget().getRegisterInfo();
936 SchedModel
= MF
.getSubtarget().getSchedModel();
937 MRI
= &MF
.getRegInfo();
938 DomTree
= &getAnalysis
<MachineDominatorTree
>();
939 Loops
= getAnalysisIfAvailable
<MachineLoopInfo
>();
940 MBPI
= &getAnalysis
<MachineBranchProbabilityInfo
>();
941 Traces
= &getAnalysis
<MachineTraceMetrics
>();
943 MinSize
= MF
.getFunction().hasMinSize();
945 bool Changed
= false;
946 CmpConv
.runOnMachineFunction(MF
, MBPI
);
948 // Visit blocks in dominator tree pre-order. The pre-order enables multiple
949 // cmp-conversions from the same head block.
950 // Note that updateDomTree() modifies the children of the DomTree node
951 // currently being visited. The df_iterator supports that; it doesn't look at
952 // child_begin() / child_end() until after a node has been visited.
953 for (auto *I
: depth_first(DomTree
))
954 if (tryConvert(I
->getBlock()))