1 //===-- MLxExpansionPass.cpp - Expand MLx instrs to avoid hazards ----------=//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // Expand VFP / NEON floating point MLA / MLS instructions (each to a pair of
11 // multiple and add / sub instructions) when special VMLx hazards are detected.
13 //===----------------------------------------------------------------------===//
15 #define DEBUG_TYPE "mlx-expansion"
17 #include "ARMBaseInstrInfo.h"
18 #include "ARMSubtarget.h"
19 #include "llvm/CodeGen/MachineInstr.h"
20 #include "llvm/CodeGen/MachineInstrBuilder.h"
21 #include "llvm/CodeGen/MachineFunctionPass.h"
22 #include "llvm/CodeGen/MachineRegisterInfo.h"
23 #include "llvm/Target/TargetRegisterInfo.h"
24 #include "llvm/ADT/SmallPtrSet.h"
25 #include "llvm/ADT/Statistic.h"
26 #include "llvm/Support/CommandLine.h"
27 #include "llvm/Support/Debug.h"
28 #include "llvm/Support/raw_ostream.h"
32 ForceExapnd("expand-all-fp-mlx", cl::init(false), cl::Hidden
);
33 static cl::opt
<unsigned>
34 ExpandLimit("expand-limit", cl::init(~0U), cl::Hidden
);
36 STATISTIC(NumExpand
, "Number of fp MLA / MLS instructions expanded");
39 struct MLxExpansion
: public MachineFunctionPass
{
41 MLxExpansion() : MachineFunctionPass(ID
) {}
43 virtual bool runOnMachineFunction(MachineFunction
&Fn
);
45 virtual const char *getPassName() const {
46 return "ARM MLA / MLS expansion pass";
50 const ARMBaseInstrInfo
*TII
;
51 const TargetRegisterInfo
*TRI
;
52 MachineRegisterInfo
*MRI
;
56 MachineInstr
* LastMIs
[4];
57 SmallPtrSet
<MachineInstr
*, 4> IgnoreStall
;
60 void pushStack(MachineInstr
*MI
);
61 MachineInstr
*getAccDefMI(MachineInstr
*MI
) const;
62 unsigned getDefReg(MachineInstr
*MI
) const;
63 bool hasRAWHazard(unsigned Reg
, MachineInstr
*MI
) const;
64 bool FindMLxHazard(MachineInstr
*MI
);
65 void ExpandFPMLxInstruction(MachineBasicBlock
&MBB
, MachineInstr
*MI
,
66 unsigned MulOpc
, unsigned AddSubOpc
,
67 bool NegAcc
, bool HasLane
);
68 bool ExpandFPMLxInstructions(MachineBasicBlock
&MBB
);
70 char MLxExpansion::ID
= 0;
73 void MLxExpansion::clearStack() {
74 std::fill(LastMIs
, LastMIs
+ 4, (MachineInstr
*)0);
78 void MLxExpansion::pushStack(MachineInstr
*MI
) {
84 MachineInstr
*MLxExpansion::getAccDefMI(MachineInstr
*MI
) const {
85 // Look past COPY and INSERT_SUBREG instructions to find the
86 // real definition MI. This is important for _sfp instructions.
87 unsigned Reg
= MI
->getOperand(1).getReg();
88 if (TargetRegisterInfo::isPhysicalRegister(Reg
))
91 MachineBasicBlock
*MBB
= MI
->getParent();
92 MachineInstr
*DefMI
= MRI
->getVRegDef(Reg
);
94 if (DefMI
->getParent() != MBB
)
96 if (DefMI
->isCopyLike()) {
97 Reg
= DefMI
->getOperand(1).getReg();
98 if (TargetRegisterInfo::isVirtualRegister(Reg
)) {
99 DefMI
= MRI
->getVRegDef(Reg
);
102 } else if (DefMI
->isInsertSubreg()) {
103 Reg
= DefMI
->getOperand(2).getReg();
104 if (TargetRegisterInfo::isVirtualRegister(Reg
)) {
105 DefMI
= MRI
->getVRegDef(Reg
);
114 unsigned MLxExpansion::getDefReg(MachineInstr
*MI
) const {
115 unsigned Reg
= MI
->getOperand(0).getReg();
116 if (TargetRegisterInfo::isPhysicalRegister(Reg
) ||
117 !MRI
->hasOneNonDBGUse(Reg
))
120 MachineBasicBlock
*MBB
= MI
->getParent();
121 MachineInstr
*UseMI
= &*MRI
->use_nodbg_begin(Reg
);
122 if (UseMI
->getParent() != MBB
)
125 while (UseMI
->isCopy() || UseMI
->isInsertSubreg()) {
126 Reg
= UseMI
->getOperand(0).getReg();
127 if (TargetRegisterInfo::isPhysicalRegister(Reg
) ||
128 !MRI
->hasOneNonDBGUse(Reg
))
130 UseMI
= &*MRI
->use_nodbg_begin(Reg
);
131 if (UseMI
->getParent() != MBB
)
138 bool MLxExpansion::hasRAWHazard(unsigned Reg
, MachineInstr
*MI
) const {
139 // FIXME: Detect integer instructions properly.
140 const MCInstrDesc
&MCID
= MI
->getDesc();
141 unsigned Domain
= MCID
.TSFlags
& ARMII::DomainMask
;
144 unsigned Opcode
= MCID
.getOpcode();
145 if (Opcode
== ARM::VMOVRS
|| Opcode
== ARM::VMOVRRD
)
147 if ((Domain
& ARMII::DomainVFP
) || (Domain
& ARMII::DomainNEON
))
148 return MI
->readsRegister(Reg
, TRI
);
153 bool MLxExpansion::FindMLxHazard(MachineInstr
*MI
) {
154 if (NumExpand
>= ExpandLimit
)
160 MachineInstr
*DefMI
= getAccDefMI(MI
);
161 if (TII
->isFpMLxInstruction(DefMI
->getOpcode())) {
163 // r3 = vmla r0, r1, r2
164 // takes 16 - 17 cycles
169 // takes about 14 - 15 cycles even with vmul stalling for 4 cycles.
170 IgnoreStall
.insert(DefMI
);
174 if (IgnoreStall
.count(MI
))
177 // If a VMLA.F is followed by an VADD.F or VMUL.F with no RAW hazard, the
178 // VADD.F or VMUL.F will stall 4 cycles before issue. The 4 cycle stall
179 // preserves the in-order retirement of the instructions.
180 // Look at the next few instructions, if *most* of them can cause hazards,
181 // then the scheduler can't *fix* this, we'd better break up the VMLA.
182 unsigned Limit1
= isA9
? 1 : 4;
183 unsigned Limit2
= isA9
? 1 : 4;
184 for (unsigned i
= 1; i
<= 4; ++i
) {
185 int Idx
= ((int)MIIdx
- i
+ 4) % 4;
186 MachineInstr
*NextMI
= LastMIs
[Idx
];
190 if (TII
->canCauseFpMLxStall(NextMI
->getOpcode())) {
195 // Look for VMLx RAW hazard.
196 if (i
<= Limit2
&& hasRAWHazard(getDefReg(MI
), NextMI
))
203 /// ExpandFPMLxInstructions - Expand a MLA / MLS instruction into a pair
204 /// of MUL + ADD / SUB instructions.
206 MLxExpansion::ExpandFPMLxInstruction(MachineBasicBlock
&MBB
, MachineInstr
*MI
,
207 unsigned MulOpc
, unsigned AddSubOpc
,
208 bool NegAcc
, bool HasLane
) {
209 unsigned DstReg
= MI
->getOperand(0).getReg();
210 bool DstDead
= MI
->getOperand(0).isDead();
211 unsigned AccReg
= MI
->getOperand(1).getReg();
212 unsigned Src1Reg
= MI
->getOperand(2).getReg();
213 unsigned Src2Reg
= MI
->getOperand(3).getReg();
214 bool Src1Kill
= MI
->getOperand(2).isKill();
215 bool Src2Kill
= MI
->getOperand(3).isKill();
216 unsigned LaneImm
= HasLane
? MI
->getOperand(4).getImm() : 0;
217 unsigned NextOp
= HasLane
? 5 : 4;
218 ARMCC::CondCodes Pred
= (ARMCC::CondCodes
)MI
->getOperand(NextOp
).getImm();
219 unsigned PredReg
= MI
->getOperand(++NextOp
).getReg();
221 const MCInstrDesc
&MCID1
= TII
->get(MulOpc
);
222 const MCInstrDesc
&MCID2
= TII
->get(AddSubOpc
);
223 unsigned TmpReg
= MRI
->createVirtualRegister(TII
->getRegClass(MCID1
, 0, TRI
));
225 MachineInstrBuilder MIB
= BuildMI(MBB
, *MI
, MI
->getDebugLoc(), MCID1
, TmpReg
)
226 .addReg(Src1Reg
, getKillRegState(Src1Kill
))
227 .addReg(Src2Reg
, getKillRegState(Src2Kill
));
230 MIB
.addImm(Pred
).addReg(PredReg
);
232 MIB
= BuildMI(MBB
, *MI
, MI
->getDebugLoc(), MCID2
)
233 .addReg(DstReg
, getDefRegState(true) | getDeadRegState(DstDead
));
236 bool AccKill
= MRI
->hasOneNonDBGUse(AccReg
);
237 MIB
.addReg(TmpReg
, getKillRegState(true))
238 .addReg(AccReg
, getKillRegState(AccKill
));
240 MIB
.addReg(AccReg
).addReg(TmpReg
, getKillRegState(true));
242 MIB
.addImm(Pred
).addReg(PredReg
);
245 dbgs() << "Expanding: " << *MI
;
247 MachineBasicBlock::iterator MII
= MI
;
248 MII
= llvm::prior(MII
);
249 MachineInstr
&MI2
= *MII
;
250 MII
= llvm::prior(MII
);
251 MachineInstr
&MI1
= *MII
;
252 dbgs() << " " << MI1
;
253 dbgs() << " " << MI2
;
256 MI
->eraseFromParent();
260 bool MLxExpansion::ExpandFPMLxInstructions(MachineBasicBlock
&MBB
) {
261 bool Changed
= false;
267 MachineBasicBlock::reverse_iterator MII
= MBB
.rbegin(), E
= MBB
.rend();
269 MachineInstr
*MI
= &*MII
;
271 if (MI
->isLabel() || MI
->isImplicitDef() || MI
->isCopy()) {
276 const MCInstrDesc
&MCID
= MI
->getDesc();
277 if (MCID
.isBarrier()) {
284 unsigned Domain
= MCID
.TSFlags
& ARMII::DomainMask
;
285 if (Domain
== ARMII::DomainGeneral
) {
287 // Assume dual issues of non-VFP / NEON instructions.
292 unsigned MulOpc
, AddSubOpc
;
293 bool NegAcc
, HasLane
;
294 if (!TII
->isFpMLxInstruction(MCID
.getOpcode(),
295 MulOpc
, AddSubOpc
, NegAcc
, HasLane
) ||
299 ExpandFPMLxInstruction(MBB
, MI
, MulOpc
, AddSubOpc
, NegAcc
, HasLane
);
300 E
= MBB
.rend(); // May have changed if MI was the 1st instruction.
312 bool MLxExpansion::runOnMachineFunction(MachineFunction
&Fn
) {
313 TII
= static_cast<const ARMBaseInstrInfo
*>(Fn
.getTarget().getInstrInfo());
314 TRI
= Fn
.getTarget().getRegisterInfo();
315 MRI
= &Fn
.getRegInfo();
316 const ARMSubtarget
*STI
= &Fn
.getTarget().getSubtarget
<ARMSubtarget
>();
317 isA9
= STI
->isCortexA9();
319 bool Modified
= false;
320 for (MachineFunction::iterator MFI
= Fn
.begin(), E
= Fn
.end(); MFI
!= E
;
322 MachineBasicBlock
&MBB
= *MFI
;
323 Modified
|= ExpandFPMLxInstructions(MBB
);
329 FunctionPass
*llvm::createMLxExpansionPass() {
330 return new MLxExpansion();