1 //===-- AArch64CondBrTuning.cpp --- Conditional branch tuning for AArch64 -===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 /// This file contains a pass that transforms CBZ/CBNZ/TBZ/TBNZ instructions
11 /// into a conditional branch (B.cond), when the NZCV flags can be set for
12 /// "free". This is preferred on targets that have more flexibility when
13 /// scheduling B.cond instructions as compared to CBZ/CBNZ/TBZ/TBNZ (assuming
14 /// all other variables are equal). This can also reduce register pressure.
18 /// 1) add w8, w0, w1 -> cmn w0, w1 ; CMN is an alias of ADDS.
19 /// cbz w8, .LBB_2 -> b.eq .LBB0_2
21 /// 2) add w8, w0, w1 -> adds w8, w0, w1 ; w8 has multiple uses.
22 /// cbz w8, .LBB1_2 -> b.eq .LBB1_2
24 /// 3) sub w8, w0, w1 -> subs w8, w0, w1 ; w8 has multiple uses.
25 /// tbz w8, #31, .LBB6_2 -> b.pl .LBB6_2
27 //===----------------------------------------------------------------------===//
30 #include "AArch64Subtarget.h"
31 #include "llvm/CodeGen/MachineFunction.h"
32 #include "llvm/CodeGen/MachineFunctionPass.h"
33 #include "llvm/CodeGen/MachineInstrBuilder.h"
34 #include "llvm/CodeGen/MachineRegisterInfo.h"
35 #include "llvm/CodeGen/Passes.h"
36 #include "llvm/CodeGen/TargetInstrInfo.h"
37 #include "llvm/CodeGen/TargetRegisterInfo.h"
38 #include "llvm/CodeGen/TargetSubtargetInfo.h"
39 #include "llvm/Support/Debug.h"
40 #include "llvm/Support/raw_ostream.h"
44 #define DEBUG_TYPE "aarch64-cond-br-tuning"
45 #define AARCH64_CONDBR_TUNING_NAME "AArch64 Conditional Branch Tuning"
48 class AArch64CondBrTuning
: public MachineFunctionPass
{
49 const AArch64InstrInfo
*TII
;
50 const TargetRegisterInfo
*TRI
;
52 MachineRegisterInfo
*MRI
;
56 AArch64CondBrTuning() : MachineFunctionPass(ID
) {
57 initializeAArch64CondBrTuningPass(*PassRegistry::getPassRegistry());
59 void getAnalysisUsage(AnalysisUsage
&AU
) const override
;
60 bool runOnMachineFunction(MachineFunction
&MF
) override
;
61 StringRef
getPassName() const override
{ return AARCH64_CONDBR_TUNING_NAME
; }
64 MachineInstr
*getOperandDef(const MachineOperand
&MO
);
65 MachineInstr
*convertToFlagSetting(MachineInstr
&MI
, bool IsFlagSetting
);
66 MachineInstr
*convertToCondBr(MachineInstr
&MI
);
67 bool tryToTuneBranch(MachineInstr
&MI
, MachineInstr
&DefMI
);
69 } // end anonymous namespace
71 char AArch64CondBrTuning::ID
= 0;
73 INITIALIZE_PASS(AArch64CondBrTuning
, "aarch64-cond-br-tuning",
74 AARCH64_CONDBR_TUNING_NAME
, false, false)
76 void AArch64CondBrTuning::getAnalysisUsage(AnalysisUsage
&AU
) const {
78 MachineFunctionPass::getAnalysisUsage(AU
);
81 MachineInstr
*AArch64CondBrTuning::getOperandDef(const MachineOperand
&MO
) {
82 if (!TargetRegisterInfo::isVirtualRegister(MO
.getReg()))
84 return MRI
->getUniqueVRegDef(MO
.getReg());
87 MachineInstr
*AArch64CondBrTuning::convertToFlagSetting(MachineInstr
&MI
,
89 // If this is already the flag setting version of the instruction (e.g., SUBS)
90 // just make sure the implicit-def of NZCV isn't marked dead.
92 for (unsigned I
= MI
.getNumExplicitOperands(), E
= MI
.getNumOperands();
94 MachineOperand
&MO
= MI
.getOperand(I
);
95 if (MO
.isReg() && MO
.isDead() && MO
.getReg() == AArch64::NZCV
)
101 unsigned NewOpc
= TII
->convertToFlagSettingOpc(MI
.getOpcode(), Is64Bit
);
102 unsigned NewDestReg
= MI
.getOperand(0).getReg();
103 if (MRI
->hasOneNonDBGUse(MI
.getOperand(0).getReg()))
104 NewDestReg
= Is64Bit
? AArch64::XZR
: AArch64::WZR
;
106 MachineInstrBuilder MIB
= BuildMI(*MI
.getParent(), MI
, MI
.getDebugLoc(),
107 TII
->get(NewOpc
), NewDestReg
);
108 for (unsigned I
= 1, E
= MI
.getNumOperands(); I
!= E
; ++I
)
109 MIB
.add(MI
.getOperand(I
));
114 MachineInstr
*AArch64CondBrTuning::convertToCondBr(MachineInstr
&MI
) {
115 AArch64CC::CondCode CC
;
116 MachineBasicBlock
*TargetMBB
= TII
->getBranchDestBlock(MI
);
117 switch (MI
.getOpcode()) {
119 llvm_unreachable("Unexpected opcode!");
138 return BuildMI(*MI
.getParent(), MI
, MI
.getDebugLoc(), TII
->get(AArch64::Bcc
))
143 bool AArch64CondBrTuning::tryToTuneBranch(MachineInstr
&MI
,
144 MachineInstr
&DefMI
) {
145 // We don't want NZCV bits live across blocks.
146 if (MI
.getParent() != DefMI
.getParent())
149 bool IsFlagSetting
= true;
150 unsigned MIOpc
= MI
.getOpcode();
151 MachineInstr
*NewCmp
= nullptr, *NewBr
= nullptr;
152 switch (DefMI
.getOpcode()) {
155 case AArch64::ADDWri
:
156 case AArch64::ADDWrr
:
157 case AArch64::ADDWrs
:
158 case AArch64::ADDWrx
:
159 case AArch64::ANDWri
:
160 case AArch64::ANDWrr
:
161 case AArch64::ANDWrs
:
162 case AArch64::BICWrr
:
163 case AArch64::BICWrs
:
164 case AArch64::SUBWri
:
165 case AArch64::SUBWrr
:
166 case AArch64::SUBWrs
:
167 case AArch64::SUBWrx
:
168 IsFlagSetting
= false;
170 case AArch64::ADDSWri
:
171 case AArch64::ADDSWrr
:
172 case AArch64::ADDSWrs
:
173 case AArch64::ADDSWrx
:
174 case AArch64::ANDSWri
:
175 case AArch64::ANDSWrr
:
176 case AArch64::ANDSWrs
:
177 case AArch64::BICSWrr
:
178 case AArch64::BICSWrs
:
179 case AArch64::SUBSWri
:
180 case AArch64::SUBSWrr
:
181 case AArch64::SUBSWrs
:
182 case AArch64::SUBSWrx
:
185 llvm_unreachable("Unexpected opcode!");
191 // Check to see if the TBZ/TBNZ is checking the sign bit.
192 if ((MIOpc
== AArch64::TBZW
|| MIOpc
== AArch64::TBNZW
) &&
193 MI
.getOperand(1).getImm() != 31)
196 // There must not be any instruction between DefMI and MI that clobbers or
198 MachineBasicBlock::iterator
I(DefMI
), E(MI
);
199 for (I
= std::next(I
); I
!= E
; ++I
) {
200 if (I
->modifiesRegister(AArch64::NZCV
, TRI
) ||
201 I
->readsRegister(AArch64::NZCV
, TRI
))
204 LLVM_DEBUG(dbgs() << " Replacing instructions:\n ");
205 LLVM_DEBUG(DefMI
.print(dbgs()));
206 LLVM_DEBUG(dbgs() << " ");
207 LLVM_DEBUG(MI
.print(dbgs()));
209 NewCmp
= convertToFlagSetting(DefMI
, IsFlagSetting
);
210 NewBr
= convertToCondBr(MI
);
215 case AArch64::ADDXri
:
216 case AArch64::ADDXrr
:
217 case AArch64::ADDXrs
:
218 case AArch64::ADDXrx
:
219 case AArch64::ANDXri
:
220 case AArch64::ANDXrr
:
221 case AArch64::ANDXrs
:
222 case AArch64::BICXrr
:
223 case AArch64::BICXrs
:
224 case AArch64::SUBXri
:
225 case AArch64::SUBXrr
:
226 case AArch64::SUBXrs
:
227 case AArch64::SUBXrx
:
228 IsFlagSetting
= false;
230 case AArch64::ADDSXri
:
231 case AArch64::ADDSXrr
:
232 case AArch64::ADDSXrs
:
233 case AArch64::ADDSXrx
:
234 case AArch64::ANDSXri
:
235 case AArch64::ANDSXrr
:
236 case AArch64::ANDSXrs
:
237 case AArch64::BICSXrr
:
238 case AArch64::BICSXrs
:
239 case AArch64::SUBSXri
:
240 case AArch64::SUBSXrr
:
241 case AArch64::SUBSXrs
:
242 case AArch64::SUBSXrx
:
245 llvm_unreachable("Unexpected opcode!");
250 case AArch64::TBNZX
: {
251 // Check to see if the TBZ/TBNZ is checking the sign bit.
252 if ((MIOpc
== AArch64::TBZX
|| MIOpc
== AArch64::TBNZX
) &&
253 MI
.getOperand(1).getImm() != 63)
255 // There must not be any instruction between DefMI and MI that clobbers or
257 MachineBasicBlock::iterator
I(DefMI
), E(MI
);
258 for (I
= std::next(I
); I
!= E
; ++I
) {
259 if (I
->modifiesRegister(AArch64::NZCV
, TRI
) ||
260 I
->readsRegister(AArch64::NZCV
, TRI
))
263 LLVM_DEBUG(dbgs() << " Replacing instructions:\n ");
264 LLVM_DEBUG(DefMI
.print(dbgs()));
265 LLVM_DEBUG(dbgs() << " ");
266 LLVM_DEBUG(MI
.print(dbgs()));
268 NewCmp
= convertToFlagSetting(DefMI
, IsFlagSetting
);
269 NewBr
= convertToCondBr(MI
);
275 (void)NewCmp
; (void)NewBr
;
276 assert(NewCmp
&& NewBr
&& "Expected new instructions.");
278 LLVM_DEBUG(dbgs() << " with instruction:\n ");
279 LLVM_DEBUG(NewCmp
->print(dbgs()));
280 LLVM_DEBUG(dbgs() << " ");
281 LLVM_DEBUG(NewBr
->print(dbgs()));
283 // If this was a flag setting version of the instruction, we use the original
284 // instruction by just clearing the dead marked on the implicit-def of NCZV.
285 // Therefore, we should not erase this instruction.
287 DefMI
.eraseFromParent();
288 MI
.eraseFromParent();
292 bool AArch64CondBrTuning::runOnMachineFunction(MachineFunction
&MF
) {
293 if (skipFunction(MF
.getFunction()))
297 dbgs() << "********** AArch64 Conditional Branch Tuning **********\n"
298 << "********** Function: " << MF
.getName() << '\n');
300 TII
= static_cast<const AArch64InstrInfo
*>(MF
.getSubtarget().getInstrInfo());
301 TRI
= MF
.getSubtarget().getRegisterInfo();
302 MRI
= &MF
.getRegInfo();
304 bool Changed
= false;
305 for (MachineBasicBlock
&MBB
: MF
) {
306 bool LocalChange
= false;
307 for (MachineBasicBlock::iterator I
= MBB
.getFirstTerminator(),
310 MachineInstr
&MI
= *I
;
311 switch (MI
.getOpcode()) {
322 MachineInstr
*DefMI
= getOperandDef(MI
.getOperand(0));
323 LocalChange
= (DefMI
&& tryToTuneBranch(MI
, *DefMI
));
326 // If the optimization was successful, we can't optimize any other
327 // branches because doing so would clobber the NZCV flags.
337 FunctionPass
*llvm::createAArch64CondBrTuning() {
338 return new AArch64CondBrTuning();