1 //===-- MLxExpansionPass.cpp - Expand MLx instrs to avoid hazards ---------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // Expand VFP / NEON floating point MLA / MLS instructions (each to a pair of
10 // multiple and add / sub instructions) when special VMLx hazards are detected.
12 //===----------------------------------------------------------------------===//
15 #include "ARMBaseInstrInfo.h"
16 #include "ARMSubtarget.h"
17 #include "llvm/ADT/SmallPtrSet.h"
18 #include "llvm/ADT/Statistic.h"
19 #include "llvm/CodeGen/MachineFunctionPass.h"
20 #include "llvm/CodeGen/MachineInstr.h"
21 #include "llvm/CodeGen/MachineInstrBuilder.h"
22 #include "llvm/CodeGen/MachineRegisterInfo.h"
23 #include "llvm/CodeGen/TargetRegisterInfo.h"
24 #include "llvm/Support/CommandLine.h"
25 #include "llvm/Support/Debug.h"
26 #include "llvm/Support/raw_ostream.h"
29 #define DEBUG_TYPE "mlx-expansion"
32 ForceExapnd("expand-all-fp-mlx", cl::init(false), cl::Hidden
);
33 static cl::opt
<unsigned>
34 ExpandLimit("expand-limit", cl::init(~0U), cl::Hidden
);
36 STATISTIC(NumExpand
, "Number of fp MLA / MLS instructions expanded");
39 struct MLxExpansion
: public MachineFunctionPass
{
41 MLxExpansion() : MachineFunctionPass(ID
) {}
43 bool runOnMachineFunction(MachineFunction
&Fn
) override
;
45 StringRef
getPassName() const override
{
46 return "ARM MLA / MLS expansion pass";
50 const ARMBaseInstrInfo
*TII
;
51 const TargetRegisterInfo
*TRI
;
52 MachineRegisterInfo
*MRI
;
57 MachineInstr
* LastMIs
[4];
58 SmallPtrSet
<MachineInstr
*, 4> IgnoreStall
;
61 void pushStack(MachineInstr
*MI
);
62 MachineInstr
*getAccDefMI(MachineInstr
*MI
) const;
63 unsigned getDefReg(MachineInstr
*MI
) const;
64 bool hasLoopHazard(MachineInstr
*MI
) const;
65 bool hasRAWHazard(unsigned Reg
, MachineInstr
*MI
) const;
66 bool FindMLxHazard(MachineInstr
*MI
);
67 void ExpandFPMLxInstruction(MachineBasicBlock
&MBB
, MachineInstr
*MI
,
68 unsigned MulOpc
, unsigned AddSubOpc
,
69 bool NegAcc
, bool HasLane
);
70 bool ExpandFPMLxInstructions(MachineBasicBlock
&MBB
);
72 char MLxExpansion::ID
= 0;
75 void MLxExpansion::clearStack() {
76 std::fill(LastMIs
, LastMIs
+ 4, nullptr);
80 void MLxExpansion::pushStack(MachineInstr
*MI
) {
86 MachineInstr
*MLxExpansion::getAccDefMI(MachineInstr
*MI
) const {
87 // Look past COPY and INSERT_SUBREG instructions to find the
88 // real definition MI. This is important for _sfp instructions.
89 unsigned Reg
= MI
->getOperand(1).getReg();
90 if (Register::isPhysicalRegister(Reg
))
93 MachineBasicBlock
*MBB
= MI
->getParent();
94 MachineInstr
*DefMI
= MRI
->getVRegDef(Reg
);
96 if (DefMI
->getParent() != MBB
)
98 if (DefMI
->isCopyLike()) {
99 Reg
= DefMI
->getOperand(1).getReg();
100 if (Register::isVirtualRegister(Reg
)) {
101 DefMI
= MRI
->getVRegDef(Reg
);
104 } else if (DefMI
->isInsertSubreg()) {
105 Reg
= DefMI
->getOperand(2).getReg();
106 if (Register::isVirtualRegister(Reg
)) {
107 DefMI
= MRI
->getVRegDef(Reg
);
116 unsigned MLxExpansion::getDefReg(MachineInstr
*MI
) const {
117 unsigned Reg
= MI
->getOperand(0).getReg();
118 if (Register::isPhysicalRegister(Reg
) || !MRI
->hasOneNonDBGUse(Reg
))
121 MachineBasicBlock
*MBB
= MI
->getParent();
122 MachineInstr
*UseMI
= &*MRI
->use_instr_nodbg_begin(Reg
);
123 if (UseMI
->getParent() != MBB
)
126 while (UseMI
->isCopy() || UseMI
->isInsertSubreg()) {
127 Reg
= UseMI
->getOperand(0).getReg();
128 if (Register::isPhysicalRegister(Reg
) || !MRI
->hasOneNonDBGUse(Reg
))
130 UseMI
= &*MRI
->use_instr_nodbg_begin(Reg
);
131 if (UseMI
->getParent() != MBB
)
138 /// hasLoopHazard - Check whether an MLx instruction is chained to itself across
139 /// a single-MBB loop.
140 bool MLxExpansion::hasLoopHazard(MachineInstr
*MI
) const {
141 unsigned Reg
= MI
->getOperand(1).getReg();
142 if (Register::isPhysicalRegister(Reg
))
145 MachineBasicBlock
*MBB
= MI
->getParent();
146 MachineInstr
*DefMI
= MRI
->getVRegDef(Reg
);
149 if (DefMI
->getParent() != MBB
)
152 if (DefMI
->isPHI()) {
153 for (unsigned i
= 1, e
= DefMI
->getNumOperands(); i
< e
; i
+= 2) {
154 if (DefMI
->getOperand(i
+ 1).getMBB() == MBB
) {
155 unsigned SrcReg
= DefMI
->getOperand(i
).getReg();
156 if (Register::isVirtualRegister(SrcReg
)) {
157 DefMI
= MRI
->getVRegDef(SrcReg
);
162 } else if (DefMI
->isCopyLike()) {
163 Reg
= DefMI
->getOperand(1).getReg();
164 if (Register::isVirtualRegister(Reg
)) {
165 DefMI
= MRI
->getVRegDef(Reg
);
168 } else if (DefMI
->isInsertSubreg()) {
169 Reg
= DefMI
->getOperand(2).getReg();
170 if (Register::isVirtualRegister(Reg
)) {
171 DefMI
= MRI
->getVRegDef(Reg
);
182 bool MLxExpansion::hasRAWHazard(unsigned Reg
, MachineInstr
*MI
) const {
183 // FIXME: Detect integer instructions properly.
184 const MCInstrDesc
&MCID
= MI
->getDesc();
185 unsigned Domain
= MCID
.TSFlags
& ARMII::DomainMask
;
188 unsigned Opcode
= MCID
.getOpcode();
189 if (Opcode
== ARM::VMOVRS
|| Opcode
== ARM::VMOVRRD
)
191 if ((Domain
& ARMII::DomainVFP
) || (Domain
& ARMII::DomainNEON
))
192 return MI
->readsRegister(Reg
, TRI
);
196 static bool isFpMulInstruction(unsigned Opcode
) {
210 bool MLxExpansion::FindMLxHazard(MachineInstr
*MI
) {
211 if (NumExpand
>= ExpandLimit
)
217 MachineInstr
*DefMI
= getAccDefMI(MI
);
218 if (TII
->isFpMLxInstruction(DefMI
->getOpcode())) {
220 // r3 = vmla r0, r1, r2
221 // takes 16 - 17 cycles
226 // takes about 14 - 15 cycles even with vmul stalling for 4 cycles.
227 IgnoreStall
.insert(DefMI
);
231 // On Swift, we mostly care about hazards from multiplication instructions
232 // writing the accumulator and the pipelining of loop iterations by out-of-
235 return isFpMulInstruction(DefMI
->getOpcode()) || hasLoopHazard(MI
);
237 if (IgnoreStall
.count(MI
))
240 // If a VMLA.F is followed by an VADD.F or VMUL.F with no RAW hazard, the
241 // VADD.F or VMUL.F will stall 4 cycles before issue. The 4 cycle stall
242 // preserves the in-order retirement of the instructions.
243 // Look at the next few instructions, if *most* of them can cause hazards,
244 // then the scheduler can't *fix* this, we'd better break up the VMLA.
245 unsigned Limit1
= isLikeA9
? 1 : 4;
246 unsigned Limit2
= isLikeA9
? 1 : 4;
247 for (unsigned i
= 1; i
<= 4; ++i
) {
248 int Idx
= ((int)MIIdx
- i
+ 4) % 4;
249 MachineInstr
*NextMI
= LastMIs
[Idx
];
253 if (TII
->canCauseFpMLxStall(NextMI
->getOpcode())) {
258 // Look for VMLx RAW hazard.
259 if (i
<= Limit2
&& hasRAWHazard(getDefReg(MI
), NextMI
))
266 /// ExpandFPMLxInstructions - Expand a MLA / MLS instruction into a pair
267 /// of MUL + ADD / SUB instructions.
269 MLxExpansion::ExpandFPMLxInstruction(MachineBasicBlock
&MBB
, MachineInstr
*MI
,
270 unsigned MulOpc
, unsigned AddSubOpc
,
271 bool NegAcc
, bool HasLane
) {
272 unsigned DstReg
= MI
->getOperand(0).getReg();
273 bool DstDead
= MI
->getOperand(0).isDead();
274 unsigned AccReg
= MI
->getOperand(1).getReg();
275 unsigned Src1Reg
= MI
->getOperand(2).getReg();
276 unsigned Src2Reg
= MI
->getOperand(3).getReg();
277 bool Src1Kill
= MI
->getOperand(2).isKill();
278 bool Src2Kill
= MI
->getOperand(3).isKill();
279 unsigned LaneImm
= HasLane
? MI
->getOperand(4).getImm() : 0;
280 unsigned NextOp
= HasLane
? 5 : 4;
281 ARMCC::CondCodes Pred
= (ARMCC::CondCodes
)MI
->getOperand(NextOp
).getImm();
282 unsigned PredReg
= MI
->getOperand(++NextOp
).getReg();
284 const MCInstrDesc
&MCID1
= TII
->get(MulOpc
);
285 const MCInstrDesc
&MCID2
= TII
->get(AddSubOpc
);
286 const MachineFunction
&MF
= *MI
->getParent()->getParent();
287 unsigned TmpReg
= MRI
->createVirtualRegister(
288 TII
->getRegClass(MCID1
, 0, TRI
, MF
));
290 MachineInstrBuilder MIB
= BuildMI(MBB
, MI
, MI
->getDebugLoc(), MCID1
, TmpReg
)
291 .addReg(Src1Reg
, getKillRegState(Src1Kill
))
292 .addReg(Src2Reg
, getKillRegState(Src2Kill
));
295 MIB
.addImm(Pred
).addReg(PredReg
);
297 MIB
= BuildMI(MBB
, MI
, MI
->getDebugLoc(), MCID2
)
298 .addReg(DstReg
, getDefRegState(true) | getDeadRegState(DstDead
));
301 bool AccKill
= MRI
->hasOneNonDBGUse(AccReg
);
302 MIB
.addReg(TmpReg
, getKillRegState(true))
303 .addReg(AccReg
, getKillRegState(AccKill
));
305 MIB
.addReg(AccReg
).addReg(TmpReg
, getKillRegState(true));
307 MIB
.addImm(Pred
).addReg(PredReg
);
310 dbgs() << "Expanding: " << *MI
;
312 MachineBasicBlock::iterator MII
= MI
;
313 MII
= std::prev(MII
);
314 MachineInstr
&MI2
= *MII
;
315 MII
= std::prev(MII
);
316 MachineInstr
&MI1
= *MII
;
317 dbgs() << " " << MI1
;
318 dbgs() << " " << MI2
;
321 MI
->eraseFromParent();
325 bool MLxExpansion::ExpandFPMLxInstructions(MachineBasicBlock
&MBB
) {
326 bool Changed
= false;
332 MachineBasicBlock::reverse_iterator MII
= MBB
.rbegin(), E
= MBB
.rend();
334 MachineInstr
*MI
= &*MII
++;
336 if (MI
->isPosition() || MI
->isImplicitDef() || MI
->isCopy())
339 const MCInstrDesc
&MCID
= MI
->getDesc();
340 if (MI
->isBarrier()) {
346 unsigned Domain
= MCID
.TSFlags
& ARMII::DomainMask
;
347 if (Domain
== ARMII::DomainGeneral
) {
349 // Assume dual issues of non-VFP / NEON instructions.
354 unsigned MulOpc
, AddSubOpc
;
355 bool NegAcc
, HasLane
;
356 if (!TII
->isFpMLxInstruction(MCID
.getOpcode(),
357 MulOpc
, AddSubOpc
, NegAcc
, HasLane
) ||
361 ExpandFPMLxInstruction(MBB
, MI
, MulOpc
, AddSubOpc
, NegAcc
, HasLane
);
370 bool MLxExpansion::runOnMachineFunction(MachineFunction
&Fn
) {
371 if (skipFunction(Fn
.getFunction()))
374 TII
= static_cast<const ARMBaseInstrInfo
*>(Fn
.getSubtarget().getInstrInfo());
375 TRI
= Fn
.getSubtarget().getRegisterInfo();
376 MRI
= &Fn
.getRegInfo();
377 const ARMSubtarget
*STI
= &Fn
.getSubtarget
<ARMSubtarget
>();
378 if (!STI
->expandMLx())
380 isLikeA9
= STI
->isLikeA9() || STI
->isSwift();
381 isSwift
= STI
->isSwift();
383 bool Modified
= false;
384 for (MachineBasicBlock
&MBB
: Fn
)
385 Modified
|= ExpandFPMLxInstructions(MBB
);
390 FunctionPass
*llvm::createMLxExpansionPass() {
391 return new MLxExpansion();