1 //===-- MLxExpansionPass.cpp - Expand MLx instrs to avoid hazards ---------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // Expand VFP / NEON floating point MLA / MLS instructions (each to a pair of
10 // multiple and add / sub instructions) when special VMLx hazards are detected.
12 //===----------------------------------------------------------------------===//
15 #include "ARMBaseInstrInfo.h"
16 #include "ARMSubtarget.h"
17 #include "llvm/ADT/SmallPtrSet.h"
18 #include "llvm/ADT/Statistic.h"
19 #include "llvm/CodeGen/MachineFunctionPass.h"
20 #include "llvm/CodeGen/MachineInstr.h"
21 #include "llvm/CodeGen/MachineInstrBuilder.h"
22 #include "llvm/CodeGen/MachineRegisterInfo.h"
23 #include "llvm/CodeGen/TargetRegisterInfo.h"
24 #include "llvm/Support/CommandLine.h"
25 #include "llvm/Support/Debug.h"
26 #include "llvm/Support/raw_ostream.h"
29 #define DEBUG_TYPE "mlx-expansion"
32 ForceExapnd("expand-all-fp-mlx", cl::init(false), cl::Hidden
);
33 static cl::opt
<unsigned>
34 ExpandLimit("expand-limit", cl::init(~0U), cl::Hidden
);
36 STATISTIC(NumExpand
, "Number of fp MLA / MLS instructions expanded");
39 struct MLxExpansion
: public MachineFunctionPass
{
41 MLxExpansion() : MachineFunctionPass(ID
) {}
43 bool runOnMachineFunction(MachineFunction
&Fn
) override
;
45 StringRef
getPassName() const override
{
46 return "ARM MLA / MLS expansion pass";
50 const ARMBaseInstrInfo
*TII
;
51 const TargetRegisterInfo
*TRI
;
52 MachineRegisterInfo
*MRI
;
57 MachineInstr
* LastMIs
[4];
58 SmallPtrSet
<MachineInstr
*, 4> IgnoreStall
;
61 void pushStack(MachineInstr
*MI
);
62 MachineInstr
*getAccDefMI(MachineInstr
*MI
) const;
63 unsigned getDefReg(MachineInstr
*MI
) const;
64 bool hasLoopHazard(MachineInstr
*MI
) const;
65 bool hasRAWHazard(unsigned Reg
, MachineInstr
*MI
) const;
66 bool FindMLxHazard(MachineInstr
*MI
);
67 void ExpandFPMLxInstruction(MachineBasicBlock
&MBB
, MachineInstr
*MI
,
68 unsigned MulOpc
, unsigned AddSubOpc
,
69 bool NegAcc
, bool HasLane
);
70 bool ExpandFPMLxInstructions(MachineBasicBlock
&MBB
);
72 char MLxExpansion::ID
= 0;
75 void MLxExpansion::clearStack() {
76 std::fill(LastMIs
, LastMIs
+ 4, nullptr);
80 void MLxExpansion::pushStack(MachineInstr
*MI
) {
86 MachineInstr
*MLxExpansion::getAccDefMI(MachineInstr
*MI
) const {
87 // Look past COPY and INSERT_SUBREG instructions to find the
88 // real definition MI. This is important for _sfp instructions.
89 unsigned Reg
= MI
->getOperand(1).getReg();
90 if (TargetRegisterInfo::isPhysicalRegister(Reg
))
93 MachineBasicBlock
*MBB
= MI
->getParent();
94 MachineInstr
*DefMI
= MRI
->getVRegDef(Reg
);
96 if (DefMI
->getParent() != MBB
)
98 if (DefMI
->isCopyLike()) {
99 Reg
= DefMI
->getOperand(1).getReg();
100 if (TargetRegisterInfo::isVirtualRegister(Reg
)) {
101 DefMI
= MRI
->getVRegDef(Reg
);
104 } else if (DefMI
->isInsertSubreg()) {
105 Reg
= DefMI
->getOperand(2).getReg();
106 if (TargetRegisterInfo::isVirtualRegister(Reg
)) {
107 DefMI
= MRI
->getVRegDef(Reg
);
116 unsigned MLxExpansion::getDefReg(MachineInstr
*MI
) const {
117 unsigned Reg
= MI
->getOperand(0).getReg();
118 if (TargetRegisterInfo::isPhysicalRegister(Reg
) ||
119 !MRI
->hasOneNonDBGUse(Reg
))
122 MachineBasicBlock
*MBB
= MI
->getParent();
123 MachineInstr
*UseMI
= &*MRI
->use_instr_nodbg_begin(Reg
);
124 if (UseMI
->getParent() != MBB
)
127 while (UseMI
->isCopy() || UseMI
->isInsertSubreg()) {
128 Reg
= UseMI
->getOperand(0).getReg();
129 if (TargetRegisterInfo::isPhysicalRegister(Reg
) ||
130 !MRI
->hasOneNonDBGUse(Reg
))
132 UseMI
= &*MRI
->use_instr_nodbg_begin(Reg
);
133 if (UseMI
->getParent() != MBB
)
140 /// hasLoopHazard - Check whether an MLx instruction is chained to itself across
141 /// a single-MBB loop.
142 bool MLxExpansion::hasLoopHazard(MachineInstr
*MI
) const {
143 unsigned Reg
= MI
->getOperand(1).getReg();
144 if (TargetRegisterInfo::isPhysicalRegister(Reg
))
147 MachineBasicBlock
*MBB
= MI
->getParent();
148 MachineInstr
*DefMI
= MRI
->getVRegDef(Reg
);
151 if (DefMI
->getParent() != MBB
)
154 if (DefMI
->isPHI()) {
155 for (unsigned i
= 1, e
= DefMI
->getNumOperands(); i
< e
; i
+= 2) {
156 if (DefMI
->getOperand(i
+ 1).getMBB() == MBB
) {
157 unsigned SrcReg
= DefMI
->getOperand(i
).getReg();
158 if (TargetRegisterInfo::isVirtualRegister(SrcReg
)) {
159 DefMI
= MRI
->getVRegDef(SrcReg
);
164 } else if (DefMI
->isCopyLike()) {
165 Reg
= DefMI
->getOperand(1).getReg();
166 if (TargetRegisterInfo::isVirtualRegister(Reg
)) {
167 DefMI
= MRI
->getVRegDef(Reg
);
170 } else if (DefMI
->isInsertSubreg()) {
171 Reg
= DefMI
->getOperand(2).getReg();
172 if (TargetRegisterInfo::isVirtualRegister(Reg
)) {
173 DefMI
= MRI
->getVRegDef(Reg
);
184 bool MLxExpansion::hasRAWHazard(unsigned Reg
, MachineInstr
*MI
) const {
185 // FIXME: Detect integer instructions properly.
186 const MCInstrDesc
&MCID
= MI
->getDesc();
187 unsigned Domain
= MCID
.TSFlags
& ARMII::DomainMask
;
190 unsigned Opcode
= MCID
.getOpcode();
191 if (Opcode
== ARM::VMOVRS
|| Opcode
== ARM::VMOVRRD
)
193 if ((Domain
& ARMII::DomainVFP
) || (Domain
& ARMII::DomainNEON
))
194 return MI
->readsRegister(Reg
, TRI
);
198 static bool isFpMulInstruction(unsigned Opcode
) {
212 bool MLxExpansion::FindMLxHazard(MachineInstr
*MI
) {
213 if (NumExpand
>= ExpandLimit
)
219 MachineInstr
*DefMI
= getAccDefMI(MI
);
220 if (TII
->isFpMLxInstruction(DefMI
->getOpcode())) {
222 // r3 = vmla r0, r1, r2
223 // takes 16 - 17 cycles
228 // takes about 14 - 15 cycles even with vmul stalling for 4 cycles.
229 IgnoreStall
.insert(DefMI
);
233 // On Swift, we mostly care about hazards from multiplication instructions
234 // writing the accumulator and the pipelining of loop iterations by out-of-
237 return isFpMulInstruction(DefMI
->getOpcode()) || hasLoopHazard(MI
);
239 if (IgnoreStall
.count(MI
))
242 // If a VMLA.F is followed by an VADD.F or VMUL.F with no RAW hazard, the
243 // VADD.F or VMUL.F will stall 4 cycles before issue. The 4 cycle stall
244 // preserves the in-order retirement of the instructions.
245 // Look at the next few instructions, if *most* of them can cause hazards,
246 // then the scheduler can't *fix* this, we'd better break up the VMLA.
247 unsigned Limit1
= isLikeA9
? 1 : 4;
248 unsigned Limit2
= isLikeA9
? 1 : 4;
249 for (unsigned i
= 1; i
<= 4; ++i
) {
250 int Idx
= ((int)MIIdx
- i
+ 4) % 4;
251 MachineInstr
*NextMI
= LastMIs
[Idx
];
255 if (TII
->canCauseFpMLxStall(NextMI
->getOpcode())) {
260 // Look for VMLx RAW hazard.
261 if (i
<= Limit2
&& hasRAWHazard(getDefReg(MI
), NextMI
))
268 /// ExpandFPMLxInstructions - Expand a MLA / MLS instruction into a pair
269 /// of MUL + ADD / SUB instructions.
271 MLxExpansion::ExpandFPMLxInstruction(MachineBasicBlock
&MBB
, MachineInstr
*MI
,
272 unsigned MulOpc
, unsigned AddSubOpc
,
273 bool NegAcc
, bool HasLane
) {
274 unsigned DstReg
= MI
->getOperand(0).getReg();
275 bool DstDead
= MI
->getOperand(0).isDead();
276 unsigned AccReg
= MI
->getOperand(1).getReg();
277 unsigned Src1Reg
= MI
->getOperand(2).getReg();
278 unsigned Src2Reg
= MI
->getOperand(3).getReg();
279 bool Src1Kill
= MI
->getOperand(2).isKill();
280 bool Src2Kill
= MI
->getOperand(3).isKill();
281 unsigned LaneImm
= HasLane
? MI
->getOperand(4).getImm() : 0;
282 unsigned NextOp
= HasLane
? 5 : 4;
283 ARMCC::CondCodes Pred
= (ARMCC::CondCodes
)MI
->getOperand(NextOp
).getImm();
284 unsigned PredReg
= MI
->getOperand(++NextOp
).getReg();
286 const MCInstrDesc
&MCID1
= TII
->get(MulOpc
);
287 const MCInstrDesc
&MCID2
= TII
->get(AddSubOpc
);
288 const MachineFunction
&MF
= *MI
->getParent()->getParent();
289 unsigned TmpReg
= MRI
->createVirtualRegister(
290 TII
->getRegClass(MCID1
, 0, TRI
, MF
));
292 MachineInstrBuilder MIB
= BuildMI(MBB
, MI
, MI
->getDebugLoc(), MCID1
, TmpReg
)
293 .addReg(Src1Reg
, getKillRegState(Src1Kill
))
294 .addReg(Src2Reg
, getKillRegState(Src2Kill
));
297 MIB
.addImm(Pred
).addReg(PredReg
);
299 MIB
= BuildMI(MBB
, MI
, MI
->getDebugLoc(), MCID2
)
300 .addReg(DstReg
, getDefRegState(true) | getDeadRegState(DstDead
));
303 bool AccKill
= MRI
->hasOneNonDBGUse(AccReg
);
304 MIB
.addReg(TmpReg
, getKillRegState(true))
305 .addReg(AccReg
, getKillRegState(AccKill
));
307 MIB
.addReg(AccReg
).addReg(TmpReg
, getKillRegState(true));
309 MIB
.addImm(Pred
).addReg(PredReg
);
312 dbgs() << "Expanding: " << *MI
;
314 MachineBasicBlock::iterator MII
= MI
;
315 MII
= std::prev(MII
);
316 MachineInstr
&MI2
= *MII
;
317 MII
= std::prev(MII
);
318 MachineInstr
&MI1
= *MII
;
319 dbgs() << " " << MI1
;
320 dbgs() << " " << MI2
;
323 MI
->eraseFromParent();
327 bool MLxExpansion::ExpandFPMLxInstructions(MachineBasicBlock
&MBB
) {
328 bool Changed
= false;
334 MachineBasicBlock::reverse_iterator MII
= MBB
.rbegin(), E
= MBB
.rend();
336 MachineInstr
*MI
= &*MII
++;
338 if (MI
->isPosition() || MI
->isImplicitDef() || MI
->isCopy())
341 const MCInstrDesc
&MCID
= MI
->getDesc();
342 if (MI
->isBarrier()) {
348 unsigned Domain
= MCID
.TSFlags
& ARMII::DomainMask
;
349 if (Domain
== ARMII::DomainGeneral
) {
351 // Assume dual issues of non-VFP / NEON instructions.
356 unsigned MulOpc
, AddSubOpc
;
357 bool NegAcc
, HasLane
;
358 if (!TII
->isFpMLxInstruction(MCID
.getOpcode(),
359 MulOpc
, AddSubOpc
, NegAcc
, HasLane
) ||
363 ExpandFPMLxInstruction(MBB
, MI
, MulOpc
, AddSubOpc
, NegAcc
, HasLane
);
372 bool MLxExpansion::runOnMachineFunction(MachineFunction
&Fn
) {
373 if (skipFunction(Fn
.getFunction()))
376 TII
= static_cast<const ARMBaseInstrInfo
*>(Fn
.getSubtarget().getInstrInfo());
377 TRI
= Fn
.getSubtarget().getRegisterInfo();
378 MRI
= &Fn
.getRegInfo();
379 const ARMSubtarget
*STI
= &Fn
.getSubtarget
<ARMSubtarget
>();
380 if (!STI
->expandMLx())
382 isLikeA9
= STI
->isLikeA9() || STI
->isSwift();
383 isSwift
= STI
->isSwift();
385 bool Modified
= false;
386 for (MachineBasicBlock
&MBB
: Fn
)
387 Modified
|= ExpandFPMLxInstructions(MBB
);
392 FunctionPass
*llvm::createMLxExpansionPass() {
393 return new MLxExpansion();