1 //===-- ARMLowOverheadLoops.cpp - CodeGen Low-overhead Loops ---*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 /// Finalize v8.1-m low-overhead loops by converting the associated pseudo
10 /// instructions into machine operations.
11 /// The expectation is that the loop contains three pseudo instructions:
12 /// - t2*LoopStart - placed in the preheader or pre-preheader. The do-loop
13 /// form should be in the preheader, whereas the while form should be in the
14 /// preheaders only predecessor.
15 /// - t2LoopDec - placed within in the loop body.
16 /// - t2LoopEnd - the loop latch terminator.
18 //===----------------------------------------------------------------------===//
21 #include "ARMBaseInstrInfo.h"
22 #include "ARMBaseRegisterInfo.h"
23 #include "ARMBasicBlockInfo.h"
24 #include "ARMSubtarget.h"
25 #include "llvm/CodeGen/MachineFunctionPass.h"
26 #include "llvm/CodeGen/MachineLoopInfo.h"
27 #include "llvm/CodeGen/MachineRegisterInfo.h"
31 #define DEBUG_TYPE "arm-low-overhead-loops"
32 #define ARM_LOW_OVERHEAD_LOOPS_NAME "ARM Low Overhead Loops pass"
36 class ARMLowOverheadLoops
: public MachineFunctionPass
{
37 const ARMBaseInstrInfo
*TII
= nullptr;
38 MachineRegisterInfo
*MRI
= nullptr;
39 std::unique_ptr
<ARMBasicBlockUtils
> BBUtils
= nullptr;
44 ARMLowOverheadLoops() : MachineFunctionPass(ID
) { }
46 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
48 AU
.addRequired
<MachineLoopInfo
>();
49 MachineFunctionPass::getAnalysisUsage(AU
);
52 bool runOnMachineFunction(MachineFunction
&MF
) override
;
54 bool ProcessLoop(MachineLoop
*ML
);
56 bool RevertNonLoops(MachineFunction
&MF
);
58 void RevertWhile(MachineInstr
*MI
) const;
60 void RevertLoopDec(MachineInstr
*MI
) const;
62 void RevertLoopEnd(MachineInstr
*MI
) const;
64 void Expand(MachineLoop
*ML
, MachineInstr
*Start
,
65 MachineInstr
*Dec
, MachineInstr
*End
, bool Revert
);
67 MachineFunctionProperties
getRequiredProperties() const override
{
68 return MachineFunctionProperties().set(
69 MachineFunctionProperties::Property::NoVRegs
);
72 StringRef
getPassName() const override
{
73 return ARM_LOW_OVERHEAD_LOOPS_NAME
;
78 char ARMLowOverheadLoops::ID
= 0;
80 INITIALIZE_PASS(ARMLowOverheadLoops
, DEBUG_TYPE
, ARM_LOW_OVERHEAD_LOOPS_NAME
,
83 bool ARMLowOverheadLoops::runOnMachineFunction(MachineFunction
&MF
) {
84 if (!static_cast<const ARMSubtarget
&>(MF
.getSubtarget()).hasLOB())
87 LLVM_DEBUG(dbgs() << "ARM Loops on " << MF
.getName() << " ------------- \n");
89 auto &MLI
= getAnalysis
<MachineLoopInfo
>();
90 MRI
= &MF
.getRegInfo();
91 TII
= static_cast<const ARMBaseInstrInfo
*>(
92 MF
.getSubtarget().getInstrInfo());
93 BBUtils
= std::unique_ptr
<ARMBasicBlockUtils
>(new ARMBasicBlockUtils(MF
));
94 BBUtils
->computeAllBlockSizes();
95 BBUtils
->adjustBBOffsetsAfter(&MF
.front());
99 if (!ML
->getParentLoop())
100 Changed
|= ProcessLoop(ML
);
102 Changed
|= RevertNonLoops(MF
);
106 static bool IsLoopStart(MachineInstr
&MI
) {
107 return MI
.getOpcode() == ARM::t2DoLoopStart
||
108 MI
.getOpcode() == ARM::t2WhileLoopStart
;
111 bool ARMLowOverheadLoops::ProcessLoop(MachineLoop
*ML
) {
113 bool Changed
= false;
115 // Process inner loops first.
116 for (auto I
= ML
->begin(), E
= ML
->end(); I
!= E
; ++I
)
117 Changed
|= ProcessLoop(*I
);
119 LLVM_DEBUG(dbgs() << "ARM Loops: Processing " << *ML
);
121 // Search the given block for a loop start instruction. If one isn't found,
122 // and there's only one predecessor block, search that one too.
123 std::function
<MachineInstr
*(MachineBasicBlock
*)> SearchForStart
=
124 [&SearchForStart
](MachineBasicBlock
*MBB
) -> MachineInstr
* {
125 for (auto &MI
: *MBB
) {
129 if (MBB
->pred_size() == 1)
130 return SearchForStart(*MBB
->pred_begin());
134 MachineInstr
*Start
= nullptr;
135 MachineInstr
*Dec
= nullptr;
136 MachineInstr
*End
= nullptr;
139 // Search the preheader for the start intrinsic, or look through the
140 // predecessors of the header to find exactly one set.iterations intrinsic.
141 // FIXME: I don't see why we shouldn't be supporting multiple predecessors
142 // with potentially multiple set.loop.iterations, so we need to enable this.
143 if (auto *Preheader
= ML
->getLoopPreheader()) {
144 Start
= SearchForStart(Preheader
);
146 LLVM_DEBUG(dbgs() << "ARM Loops: Failed to find loop preheader!\n"
147 << " - Performing manual predecessor search.\n");
148 MachineBasicBlock
*Pred
= nullptr;
149 for (auto *MBB
: ML
->getHeader()->predecessors()) {
150 if (!ML
->contains(MBB
)) {
152 LLVM_DEBUG(dbgs() << " - Found multiple out-of-loop preds.\n");
157 Start
= SearchForStart(MBB
);
162 // Find the low-overhead loop components and decide whether or not to fall
163 // back to a normal loop.
164 for (auto *MBB
: reverse(ML
->getBlocks())) {
165 for (auto &MI
: *MBB
) {
166 if (MI
.getOpcode() == ARM::t2LoopDec
)
168 else if (MI
.getOpcode() == ARM::t2LoopEnd
)
170 else if (IsLoopStart(MI
))
172 else if (MI
.getDesc().isCall())
173 // TODO: Though the call will require LE to execute again, does this
174 // mean we should revert? Always executing LE hopefully should be
175 // faster than performing a sub,cmp,br or even subs,br.
181 // If we find that LR has been written or read between LoopDec and
182 // LoopEnd, expect that the decremented value is being used else where.
183 // Because this value isn't actually going to be produced until the
184 // latch, by LE, we would need to generate a real sub. The value is also
185 // likely to be copied/reloaded for use of LoopEnd - in which in case
186 // we'd need to perform an add because it gets subtracted again by LE!
187 // The other option is to then generate the other form of LE which doesn't
189 for (auto &MO
: MI
.operands()) {
190 if (MI
.getOpcode() != ARM::t2LoopDec
&& MO
.isReg() &&
191 MO
.getReg() == ARM::LR
) {
192 LLVM_DEBUG(dbgs() << "ARM Loops: Found LR Use/Def: " << MI
);
199 if (Dec
&& End
&& Revert
)
203 LLVM_DEBUG(if (Start
) dbgs() << "ARM Loops: Found Loop Start: " << *Start
;
204 if (Dec
) dbgs() << "ARM Loops: Found Loop Dec: " << *Dec
;
205 if (End
) dbgs() << "ARM Loops: Found Loop End: " << *End
;);
207 if (!Start
&& !Dec
&& !End
) {
208 LLVM_DEBUG(dbgs() << "ARM Loops: Not a low-overhead loop.\n");
210 } else if (!(Start
&& Dec
&& End
)) {
211 LLVM_DEBUG(dbgs() << "ARM Loops: Failed to find all loop components.\n");
215 if (!End
->getOperand(1).isMBB())
216 report_fatal_error("Expected LoopEnd to target basic block");
218 // TODO Maybe there's cases where the target doesn't have to be the header,
219 // but for now be safe and revert.
220 if (End
->getOperand(1).getMBB() != ML
->getHeader()) {
221 LLVM_DEBUG(dbgs() << "ARM Loops: LoopEnd is not targetting header.\n");
225 // The WLS and LE instructions have 12-bits for the label offset. WLS
226 // requires a positive offset, while LE uses negative.
227 if (BBUtils
->getOffsetOf(End
) < BBUtils
->getOffsetOf(ML
->getHeader()) ||
228 !BBUtils
->isBBInRange(End
, ML
->getHeader(), 4094)) {
229 LLVM_DEBUG(dbgs() << "ARM Loops: LE offset is out-of-range\n");
232 if (Start
->getOpcode() == ARM::t2WhileLoopStart
&&
233 (BBUtils
->getOffsetOf(Start
) >
234 BBUtils
->getOffsetOf(Start
->getOperand(1).getMBB()) ||
235 !BBUtils
->isBBInRange(Start
, Start
->getOperand(1).getMBB(), 4094))) {
236 LLVM_DEBUG(dbgs() << "ARM Loops: WLS offset is out-of-range!\n");
240 Expand(ML
, Start
, Dec
, End
, Revert
);
244 // WhileLoopStart holds the exit block, so produce a cmp lr, 0 and then a
245 // beq that branches to the exit branch.
246 // FIXME: Need to check that we're not trashing the CPSR when generating the
247 // cmp. We could also try to generate a cbz if the value in LR is also in
248 // another low register.
249 void ARMLowOverheadLoops::RevertWhile(MachineInstr
*MI
) const {
250 LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to cmp: " << *MI
);
251 MachineBasicBlock
*MBB
= MI
->getParent();
252 MachineInstrBuilder MIB
= BuildMI(*MBB
, MI
, MI
->getDebugLoc(),
253 TII
->get(ARM::t2CMPri
));
254 MIB
.add(MI
->getOperand(0));
256 MIB
.addImm(ARMCC::AL
);
257 MIB
.addReg(ARM::NoRegister
);
259 // TODO: Try to use tBcc instead
260 MIB
= BuildMI(*MBB
, MI
, MI
->getDebugLoc(), TII
->get(ARM::t2Bcc
));
261 MIB
.add(MI
->getOperand(1)); // branch target
262 MIB
.addImm(ARMCC::EQ
); // condition code
263 MIB
.addReg(ARM::CPSR
);
264 MI
->eraseFromParent();
267 // TODO: Check flags so that we can possibly generate a tSubs or tSub.
268 void ARMLowOverheadLoops::RevertLoopDec(MachineInstr
*MI
) const {
269 LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to sub: " << *MI
);
270 MachineBasicBlock
*MBB
= MI
->getParent();
271 MachineInstrBuilder MIB
= BuildMI(*MBB
, MI
, MI
->getDebugLoc(),
272 TII
->get(ARM::t2SUBri
));
274 MIB
.add(MI
->getOperand(1));
275 MIB
.add(MI
->getOperand(2));
276 MIB
.addImm(ARMCC::AL
);
279 MI
->eraseFromParent();
282 // Generate a subs, or sub and cmp, and a branch instead of an LE.
283 // FIXME: Need to check that we're not trashing the CPSR when generating
285 void ARMLowOverheadLoops::RevertLoopEnd(MachineInstr
*MI
) const {
286 LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to cmp, br: " << *MI
);
289 MachineBasicBlock
*MBB
= MI
->getParent();
290 MachineInstrBuilder MIB
= BuildMI(*MBB
, MI
, MI
->getDebugLoc(),
291 TII
->get(ARM::t2CMPri
));
294 MIB
.addImm(ARMCC::AL
);
295 MIB
.addReg(ARM::NoRegister
);
297 // TODO Try to use tBcc instead.
299 MIB
= BuildMI(*MBB
, MI
, MI
->getDebugLoc(), TII
->get(ARM::t2Bcc
));
300 MIB
.add(MI
->getOperand(1)); // branch target
301 MIB
.addImm(ARMCC::NE
); // condition code
302 MIB
.addReg(ARM::CPSR
);
303 MI
->eraseFromParent();
306 void ARMLowOverheadLoops::Expand(MachineLoop
*ML
, MachineInstr
*Start
,
307 MachineInstr
*Dec
, MachineInstr
*End
,
310 auto ExpandLoopStart
= [this](MachineLoop
*ML
, MachineInstr
*Start
) {
311 // The trip count should already been held in LR since the instructions
312 // within the loop can only read and write to LR. So, there should be a
313 // mov to setup the count. WLS/DLS perform this move, so find the original
314 // and delete it - inserting WLS/DLS in its place.
315 MachineBasicBlock
*MBB
= Start
->getParent();
316 MachineInstr
*InsertPt
= Start
;
317 for (auto &I
: MRI
->def_instructions(ARM::LR
)) {
318 if (I
.getParent() != MBB
)
322 if (!I
.getOperand(2).isImm() || I
.getOperand(2).getImm() != ARMCC::AL
)
325 // Only handle move reg, if the trip count it will need moving into a reg
326 // before the setup instruction anyway.
327 if (!I
.getDesc().isMoveReg() ||
328 !I
.getOperand(1).isIdenticalTo(Start
->getOperand(0)))
334 unsigned Opc
= Start
->getOpcode() == ARM::t2DoLoopStart
?
335 ARM::t2DLS
: ARM::t2WLS
;
336 MachineInstrBuilder MIB
=
337 BuildMI(*MBB
, InsertPt
, InsertPt
->getDebugLoc(), TII
->get(Opc
));
340 MIB
.add(Start
->getOperand(0));
341 if (Opc
== ARM::t2WLS
)
342 MIB
.add(Start
->getOperand(1));
344 if (InsertPt
!= Start
)
345 InsertPt
->eraseFromParent();
346 Start
->eraseFromParent();
347 LLVM_DEBUG(dbgs() << "ARM Loops: Inserted start: " << *MIB
);
351 // Combine the LoopDec and LoopEnd instructions into LE(TP).
352 auto ExpandLoopEnd
= [this](MachineLoop
*ML
, MachineInstr
*Dec
,
354 MachineBasicBlock
*MBB
= End
->getParent();
355 MachineInstrBuilder MIB
= BuildMI(*MBB
, End
, End
->getDebugLoc(),
356 TII
->get(ARM::t2LEUpdate
));
358 MIB
.add(End
->getOperand(0));
359 MIB
.add(End
->getOperand(1));
360 LLVM_DEBUG(dbgs() << "ARM Loops: Inserted LE: " << *MIB
);
362 End
->eraseFromParent();
363 Dec
->eraseFromParent();
367 // TODO: We should be able to automatically remove these branches before we
368 // get here - probably by teaching analyzeBranch about the pseudo
370 // If there is an unconditional branch, after I, that just branches to the
371 // next block, remove it.
372 auto RemoveDeadBranch
= [](MachineInstr
*I
) {
373 MachineBasicBlock
*BB
= I
->getParent();
374 MachineInstr
*Terminator
= &BB
->instr_back();
375 if (Terminator
->isUnconditionalBranch() && I
!= Terminator
) {
376 MachineBasicBlock
*Succ
= Terminator
->getOperand(0).getMBB();
377 if (BB
->isLayoutSuccessor(Succ
)) {
378 LLVM_DEBUG(dbgs() << "ARM Loops: Removing branch: " << *Terminator
);
379 Terminator
->eraseFromParent();
385 if (Start
->getOpcode() == ARM::t2WhileLoopStart
)
388 Start
->eraseFromParent();
392 Start
= ExpandLoopStart(ML
, Start
);
393 RemoveDeadBranch(Start
);
394 End
= ExpandLoopEnd(ML
, Dec
, End
);
395 RemoveDeadBranch(End
);
399 bool ARMLowOverheadLoops::RevertNonLoops(MachineFunction
&MF
) {
400 LLVM_DEBUG(dbgs() << "ARM Loops: Reverting any remaining pseudos...\n");
401 bool Changed
= false;
403 for (auto &MBB
: MF
) {
404 SmallVector
<MachineInstr
*, 4> Starts
;
405 SmallVector
<MachineInstr
*, 4> Decs
;
406 SmallVector
<MachineInstr
*, 4> Ends
;
408 for (auto &I
: MBB
) {
410 Starts
.push_back(&I
);
411 else if (I
.getOpcode() == ARM::t2LoopDec
)
413 else if (I
.getOpcode() == ARM::t2LoopEnd
)
417 if (Starts
.empty() && Decs
.empty() && Ends
.empty())
422 for (auto *Start
: Starts
) {
423 if (Start
->getOpcode() == ARM::t2WhileLoopStart
)
426 Start
->eraseFromParent();
428 for (auto *Dec
: Decs
)
431 for (auto *End
: Ends
)
437 FunctionPass
*llvm::createARMLowOverheadLoopsPass() {
438 return new ARMLowOverheadLoops();