1 //===-- ARMLowOverheadLoops.cpp - CodeGen Low-overhead Loops ---*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 /// Finalize v8.1-m low-overhead loops by converting the associated pseudo
10 /// instructions into machine operations.
11 /// The expectation is that the loop contains three pseudo instructions:
12 /// - t2*LoopStart - placed in the preheader or pre-preheader. The do-loop
13 /// form should be in the preheader, whereas the while form should be in the
14 /// preheaders only predecessor. TODO: Could DoLoopStart get moved into the
16 /// - t2LoopDec - placed within in the loop body.
17 /// - t2LoopEnd - the loop latch terminator.
19 //===----------------------------------------------------------------------===//
22 #include "ARMBaseInstrInfo.h"
23 #include "ARMBaseRegisterInfo.h"
24 #include "ARMBasicBlockInfo.h"
25 #include "ARMSubtarget.h"
26 #include "llvm/CodeGen/MachineFunctionPass.h"
27 #include "llvm/CodeGen/MachineLoopInfo.h"
28 #include "llvm/CodeGen/MachineRegisterInfo.h"
32 #define DEBUG_TYPE "arm-low-overhead-loops"
33 #define ARM_LOW_OVERHEAD_LOOPS_NAME "ARM Low Overhead Loops pass"
37 class ARMLowOverheadLoops
: public MachineFunctionPass
{
38 const ARMBaseInstrInfo
*TII
= nullptr;
39 MachineRegisterInfo
*MRI
= nullptr;
40 std::unique_ptr
<ARMBasicBlockUtils
> BBUtils
= nullptr;
45 ARMLowOverheadLoops() : MachineFunctionPass(ID
) { }
47 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
49 AU
.addRequired
<MachineLoopInfo
>();
50 MachineFunctionPass::getAnalysisUsage(AU
);
53 bool runOnMachineFunction(MachineFunction
&MF
) override
;
55 bool ProcessLoop(MachineLoop
*ML
);
57 bool RevertNonLoops(MachineFunction
&MF
);
59 void RevertWhile(MachineInstr
*MI
) const;
61 void RevertLoopDec(MachineInstr
*MI
) const;
63 void RevertLoopEnd(MachineInstr
*MI
) const;
65 void Expand(MachineLoop
*ML
, MachineInstr
*Start
,
66 MachineInstr
*Dec
, MachineInstr
*End
, bool Revert
);
68 MachineFunctionProperties
getRequiredProperties() const override
{
69 return MachineFunctionProperties().set(
70 MachineFunctionProperties::Property::NoVRegs
);
73 StringRef
getPassName() const override
{
74 return ARM_LOW_OVERHEAD_LOOPS_NAME
;
79 char ARMLowOverheadLoops::ID
= 0;
81 INITIALIZE_PASS(ARMLowOverheadLoops
, DEBUG_TYPE
, ARM_LOW_OVERHEAD_LOOPS_NAME
,
84 bool ARMLowOverheadLoops::runOnMachineFunction(MachineFunction
&MF
) {
85 if (!static_cast<const ARMSubtarget
&>(MF
.getSubtarget()).hasLOB())
88 LLVM_DEBUG(dbgs() << "ARM Loops on " << MF
.getName() << " ------------- \n");
90 auto &MLI
= getAnalysis
<MachineLoopInfo
>();
91 MRI
= &MF
.getRegInfo();
92 TII
= static_cast<const ARMBaseInstrInfo
*>(
93 MF
.getSubtarget().getInstrInfo());
94 BBUtils
= std::unique_ptr
<ARMBasicBlockUtils
>(new ARMBasicBlockUtils(MF
));
95 BBUtils
->computeAllBlockSizes();
96 BBUtils
->adjustBBOffsetsAfter(&MF
.front());
100 if (!ML
->getParentLoop())
101 Changed
|= ProcessLoop(ML
);
103 Changed
|= RevertNonLoops(MF
);
107 static bool IsLoopStart(MachineInstr
&MI
) {
108 return MI
.getOpcode() == ARM::t2DoLoopStart
||
109 MI
.getOpcode() == ARM::t2WhileLoopStart
;
112 bool ARMLowOverheadLoops::ProcessLoop(MachineLoop
*ML
) {
114 bool Changed
= false;
116 // Process inner loops first.
117 for (auto I
= ML
->begin(), E
= ML
->end(); I
!= E
; ++I
)
118 Changed
|= ProcessLoop(*I
);
120 LLVM_DEBUG(dbgs() << "ARM Loops: Processing " << *ML
);
122 // Search the given block for a loop start instruction. If one isn't found,
123 // and there's only one predecessor block, search that one too.
124 std::function
<MachineInstr
*(MachineBasicBlock
*)> SearchForStart
=
125 [&SearchForStart
](MachineBasicBlock
*MBB
) -> MachineInstr
* {
126 for (auto &MI
: *MBB
) {
130 if (MBB
->pred_size() == 1)
131 return SearchForStart(*MBB
->pred_begin());
135 MachineInstr
*Start
= nullptr;
136 MachineInstr
*Dec
= nullptr;
137 MachineInstr
*End
= nullptr;
140 // Search the preheader for the start intrinsic, or look through the
141 // predecessors of the header to find exactly one set.iterations intrinsic.
142 // FIXME: I don't see why we shouldn't be supporting multiple predecessors
143 // with potentially multiple set.loop.iterations, so we need to enable this.
144 if (auto *Preheader
= ML
->getLoopPreheader()) {
145 Start
= SearchForStart(Preheader
);
147 LLVM_DEBUG(dbgs() << "ARM Loops: Failed to find loop preheader!\n"
148 << " - Performing manual predecessor search.\n");
149 MachineBasicBlock
*Pred
= nullptr;
150 for (auto *MBB
: ML
->getHeader()->predecessors()) {
151 if (!ML
->contains(MBB
)) {
153 LLVM_DEBUG(dbgs() << " - Found multiple out-of-loop preds.\n");
158 Start
= SearchForStart(MBB
);
163 // Find the low-overhead loop components and decide whether or not to fall
164 // back to a normal loop.
165 for (auto *MBB
: reverse(ML
->getBlocks())) {
166 for (auto &MI
: *MBB
) {
167 if (MI
.getOpcode() == ARM::t2LoopDec
)
169 else if (MI
.getOpcode() == ARM::t2LoopEnd
)
171 else if (IsLoopStart(MI
))
173 else if (MI
.getDesc().isCall())
174 // TODO: Though the call will require LE to execute again, does this
175 // mean we should revert? Always executing LE hopefully should be
176 // faster than performing a sub,cmp,br or even subs,br.
182 // If we find that we load/store LR between LoopDec and LoopEnd, expect
183 // that the decremented value has been spilled to the stack. Because
184 // this value isn't actually going to be produced until the latch, by LE,
185 // we would need to generate a real sub. The value is also likely to be
186 // reloaded for use of LoopEnd - in which in case we'd need to perform
187 // an add because it gets negated again by LE! The other option is to
188 // then generate the other form of LE which doesn't perform the sub.
189 if (MI
.mayLoad() || MI
.mayStore())
191 MI
.getOperand(0).isReg() && MI
.getOperand(0).getReg() == ARM::LR
;
194 if (Dec
&& End
&& Revert
)
198 LLVM_DEBUG(if (Start
) dbgs() << "ARM Loops: Found Loop Start: " << *Start
;
199 if (Dec
) dbgs() << "ARM Loops: Found Loop Dec: " << *Dec
;
200 if (End
) dbgs() << "ARM Loops: Found Loop End: " << *End
;);
202 if (!Start
&& !Dec
&& !End
) {
203 LLVM_DEBUG(dbgs() << "ARM Loops: Not a low-overhead loop.\n");
205 } else if (!(Start
&& Dec
&& End
)) {
206 LLVM_DEBUG(dbgs() << "ARM Loops: Failed to find all loop components.\n");
210 if (!End
->getOperand(1).isMBB() ||
211 End
->getOperand(1).getMBB() != ML
->getHeader())
212 report_fatal_error("Expected LoopEnd to target Loop Header");
214 // The WLS and LE instructions have 12-bits for the label offset. WLS
215 // requires a positive offset, while LE uses negative.
216 if (BBUtils
->getOffsetOf(End
) < BBUtils
->getOffsetOf(ML
->getHeader()) ||
217 !BBUtils
->isBBInRange(End
, ML
->getHeader(), 4094)) {
218 LLVM_DEBUG(dbgs() << "ARM Loops: LE offset is out-of-range\n");
221 if (Start
->getOpcode() == ARM::t2WhileLoopStart
&&
222 (BBUtils
->getOffsetOf(Start
) >
223 BBUtils
->getOffsetOf(Start
->getOperand(1).getMBB()) ||
224 !BBUtils
->isBBInRange(Start
, Start
->getOperand(1).getMBB(), 4094))) {
225 LLVM_DEBUG(dbgs() << "ARM Loops: WLS offset is out-of-range!\n");
229 Expand(ML
, Start
, Dec
, End
, Revert
);
233 // WhileLoopStart holds the exit block, so produce a cmp lr, 0 and then a
234 // beq that branches to the exit branch.
235 // FIXME: Need to check that we're not trashing the CPSR when generating the
236 // cmp. We could also try to generate a cbz if the value in LR is also in
237 // another low register.
238 void ARMLowOverheadLoops::RevertWhile(MachineInstr
*MI
) const {
239 LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to cmp: " << *MI
);
240 MachineBasicBlock
*MBB
= MI
->getParent();
241 MachineInstrBuilder MIB
= BuildMI(*MBB
, MI
, MI
->getDebugLoc(),
242 TII
->get(ARM::t2CMPri
));
245 MIB
.addImm(ARMCC::AL
);
246 MIB
.addReg(ARM::CPSR
);
248 // TODO: Try to use tBcc instead
249 MIB
= BuildMI(*MBB
, MI
, MI
->getDebugLoc(), TII
->get(ARM::t2Bcc
));
250 MIB
.add(MI
->getOperand(1)); // branch target
251 MIB
.addImm(ARMCC::EQ
); // condition code
252 MIB
.addReg(ARM::CPSR
);
253 MI
->eraseFromParent();
256 // TODO: Check flags so that we can possibly generate a tSubs or tSub.
257 void ARMLowOverheadLoops::RevertLoopDec(MachineInstr
*MI
) const {
258 LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to sub: " << *MI
);
259 MachineBasicBlock
*MBB
= MI
->getParent();
260 MachineInstrBuilder MIB
= BuildMI(*MBB
, MI
, MI
->getDebugLoc(),
261 TII
->get(ARM::t2SUBri
));
263 MIB
.add(MI
->getOperand(1));
264 MIB
.add(MI
->getOperand(2));
265 MIB
.addImm(ARMCC::AL
);
268 MI
->eraseFromParent();
271 // Generate a subs, or sub and cmp, and a branch instead of an LE.
272 // FIXME: Need to check that we're not trashing the CPSR when generating
274 void ARMLowOverheadLoops::RevertLoopEnd(MachineInstr
*MI
) const {
275 LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to cmp, br: " << *MI
);
278 MachineBasicBlock
*MBB
= MI
->getParent();
279 MachineInstrBuilder MIB
= BuildMI(*MBB
, MI
, MI
->getDebugLoc(),
280 TII
->get(ARM::t2CMPri
));
283 MIB
.addImm(ARMCC::AL
);
284 MIB
.addReg(ARM::CPSR
);
286 // TODO Try to use tBcc instead.
288 MIB
= BuildMI(*MBB
, MI
, MI
->getDebugLoc(), TII
->get(ARM::t2Bcc
));
289 MIB
.add(MI
->getOperand(1)); // branch target
290 MIB
.addImm(ARMCC::NE
); // condition code
291 MIB
.addReg(ARM::CPSR
);
292 MI
->eraseFromParent();
295 void ARMLowOverheadLoops::Expand(MachineLoop
*ML
, MachineInstr
*Start
,
296 MachineInstr
*Dec
, MachineInstr
*End
,
299 auto ExpandLoopStart
= [this](MachineLoop
*ML
, MachineInstr
*Start
) {
300 // The trip count should already been held in LR since the instructions
301 // within the loop can only read and write to LR. So, there should be a
302 // mov to setup the count. WLS/DLS perform this move, so find the original
303 // and delete it - inserting WLS/DLS in its place.
304 MachineBasicBlock
*MBB
= Start
->getParent();
305 MachineInstr
*InsertPt
= Start
;
306 for (auto &I
: MRI
->def_instructions(ARM::LR
)) {
307 if (I
.getParent() != MBB
)
311 if (!I
.getOperand(2).isImm() || I
.getOperand(2).getImm() != ARMCC::AL
)
314 // Only handle move reg, if the trip count it will need moving into a reg
315 // before the setup instruction anyway.
316 if (!I
.getDesc().isMoveReg() ||
317 !I
.getOperand(1).isIdenticalTo(Start
->getOperand(0)))
323 unsigned Opc
= Start
->getOpcode() == ARM::t2DoLoopStart
?
324 ARM::t2DLS
: ARM::t2WLS
;
325 MachineInstrBuilder MIB
=
326 BuildMI(*MBB
, InsertPt
, InsertPt
->getDebugLoc(), TII
->get(Opc
));
329 MIB
.add(Start
->getOperand(0));
330 if (Opc
== ARM::t2WLS
)
331 MIB
.add(Start
->getOperand(1));
333 if (InsertPt
!= Start
)
334 InsertPt
->eraseFromParent();
335 Start
->eraseFromParent();
336 LLVM_DEBUG(dbgs() << "ARM Loops: Inserted start: " << *MIB
);
340 // Combine the LoopDec and LoopEnd instructions into LE(TP).
341 auto ExpandLoopEnd
= [this](MachineLoop
*ML
, MachineInstr
*Dec
,
343 MachineBasicBlock
*MBB
= End
->getParent();
344 MachineInstrBuilder MIB
= BuildMI(*MBB
, End
, End
->getDebugLoc(),
345 TII
->get(ARM::t2LEUpdate
));
347 MIB
.add(End
->getOperand(0));
348 MIB
.add(End
->getOperand(1));
349 LLVM_DEBUG(dbgs() << "ARM Loops: Inserted LE: " << *MIB
);
351 End
->eraseFromParent();
352 Dec
->eraseFromParent();
356 // TODO: We should be able to automatically remove these branches before we
357 // get here - probably by teaching analyzeBranch about the pseudo
359 // If there is an unconditional branch, after I, that just branches to the
360 // next block, remove it.
361 auto RemoveDeadBranch
= [](MachineInstr
*I
) {
362 MachineBasicBlock
*BB
= I
->getParent();
363 MachineInstr
*Terminator
= &BB
->instr_back();
364 if (Terminator
->isUnconditionalBranch() && I
!= Terminator
) {
365 MachineBasicBlock
*Succ
= Terminator
->getOperand(0).getMBB();
366 if (BB
->isLayoutSuccessor(Succ
)) {
367 LLVM_DEBUG(dbgs() << "ARM Loops: Removing branch: " << *Terminator
);
368 Terminator
->eraseFromParent();
374 if (Start
->getOpcode() == ARM::t2WhileLoopStart
)
377 Start
->eraseFromParent();
381 Start
= ExpandLoopStart(ML
, Start
);
382 RemoveDeadBranch(Start
);
383 End
= ExpandLoopEnd(ML
, Dec
, End
);
384 RemoveDeadBranch(End
);
388 bool ARMLowOverheadLoops::RevertNonLoops(MachineFunction
&MF
) {
389 LLVM_DEBUG(dbgs() << "ARM Loops: Reverting any remaining pseudos...\n");
390 bool Changed
= false;
392 for (auto &MBB
: MF
) {
393 SmallVector
<MachineInstr
*, 4> Starts
;
394 SmallVector
<MachineInstr
*, 4> Decs
;
395 SmallVector
<MachineInstr
*, 4> Ends
;
397 for (auto &I
: MBB
) {
399 Starts
.push_back(&I
);
400 else if (I
.getOpcode() == ARM::t2LoopDec
)
402 else if (I
.getOpcode() == ARM::t2LoopEnd
)
406 if (Starts
.empty() && Decs
.empty() && Ends
.empty())
411 for (auto *Start
: Starts
) {
412 if (Start
->getOpcode() == ARM::t2WhileLoopStart
)
415 Start
->eraseFromParent();
417 for (auto *Dec
: Decs
)
420 for (auto *End
: Ends
)
426 FunctionPass
*llvm::createARMLowOverheadLoopsPass() {
427 return new ARMLowOverheadLoops();