1 //===-- ARMLowOverheadLoops.cpp - CodeGen Low-overhead Loops ---*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 /// Finalize v8.1-m low-overhead loops by converting the associated pseudo
10 /// instructions into machine operations.
11 /// The expectation is that the loop contains three pseudo instructions:
12 /// - t2*LoopStart - placed in the preheader or pre-preheader. The do-loop
13 /// form should be in the preheader, whereas the while form should be in the
14 /// preheaders only predecessor. TODO: Could DoLoopStart get moved into the
16 /// - t2LoopDec - placed within in the loop body.
17 /// - t2LoopEnd - the loop latch terminator.
19 //===----------------------------------------------------------------------===//
22 #include "ARMBaseInstrInfo.h"
23 #include "ARMBaseRegisterInfo.h"
24 #include "ARMBasicBlockInfo.h"
25 #include "ARMSubtarget.h"
26 #include "llvm/CodeGen/MachineFunctionPass.h"
27 #include "llvm/CodeGen/MachineLoopInfo.h"
28 #include "llvm/CodeGen/MachineRegisterInfo.h"
32 #define DEBUG_TYPE "arm-low-overhead-loops"
33 #define ARM_LOW_OVERHEAD_LOOPS_NAME "ARM Low Overhead Loops pass"
37 class ARMLowOverheadLoops
: public MachineFunctionPass
{
38 const ARMBaseInstrInfo
*TII
= nullptr;
39 MachineRegisterInfo
*MRI
= nullptr;
40 std::unique_ptr
<ARMBasicBlockUtils
> BBUtils
= nullptr;
45 ARMLowOverheadLoops() : MachineFunctionPass(ID
) { }
47 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
49 AU
.addRequired
<MachineLoopInfo
>();
50 MachineFunctionPass::getAnalysisUsage(AU
);
53 bool runOnMachineFunction(MachineFunction
&MF
) override
;
55 bool ProcessLoop(MachineLoop
*ML
);
57 bool RevertNonLoops(MachineFunction
&MF
);
59 void RevertWhile(MachineInstr
*MI
) const;
61 void RevertLoopDec(MachineInstr
*MI
) const;
63 void RevertLoopEnd(MachineInstr
*MI
) const;
65 void Expand(MachineLoop
*ML
, MachineInstr
*Start
,
66 MachineInstr
*Dec
, MachineInstr
*End
, bool Revert
);
68 MachineFunctionProperties
getRequiredProperties() const override
{
69 return MachineFunctionProperties().set(
70 MachineFunctionProperties::Property::NoVRegs
);
73 StringRef
getPassName() const override
{
74 return ARM_LOW_OVERHEAD_LOOPS_NAME
;
79 char ARMLowOverheadLoops::ID
= 0;
81 INITIALIZE_PASS(ARMLowOverheadLoops
, DEBUG_TYPE
, ARM_LOW_OVERHEAD_LOOPS_NAME
,
84 bool ARMLowOverheadLoops::runOnMachineFunction(MachineFunction
&MF
) {
85 if (!static_cast<const ARMSubtarget
&>(MF
.getSubtarget()).hasLOB())
88 LLVM_DEBUG(dbgs() << "ARM Loops on " << MF
.getName() << " ------------- \n");
90 auto &MLI
= getAnalysis
<MachineLoopInfo
>();
91 MRI
= &MF
.getRegInfo();
92 TII
= static_cast<const ARMBaseInstrInfo
*>(
93 MF
.getSubtarget().getInstrInfo());
94 BBUtils
= std::unique_ptr
<ARMBasicBlockUtils
>(new ARMBasicBlockUtils(MF
));
95 BBUtils
->computeAllBlockSizes();
96 BBUtils
->adjustBBOffsetsAfter(&MF
.front());
100 if (!ML
->getParentLoop())
101 Changed
|= ProcessLoop(ML
);
103 Changed
|= RevertNonLoops(MF
);
107 static bool IsLoopStart(MachineInstr
&MI
) {
108 return MI
.getOpcode() == ARM::t2DoLoopStart
||
109 MI
.getOpcode() == ARM::t2WhileLoopStart
;
112 bool ARMLowOverheadLoops::ProcessLoop(MachineLoop
*ML
) {
114 bool Changed
= false;
116 // Process inner loops first.
117 for (auto I
= ML
->begin(), E
= ML
->end(); I
!= E
; ++I
)
118 Changed
|= ProcessLoop(*I
);
120 LLVM_DEBUG(dbgs() << "ARM Loops: Processing " << *ML
);
122 // Search the given block for a loop start instruction. If one isn't found,
123 // and there's only one predecessor block, search that one too.
124 std::function
<MachineInstr
*(MachineBasicBlock
*)> SearchForStart
=
125 [&SearchForStart
](MachineBasicBlock
*MBB
) -> MachineInstr
* {
126 for (auto &MI
: *MBB
) {
130 if (MBB
->pred_size() == 1)
131 return SearchForStart(*MBB
->pred_begin());
135 MachineInstr
*Start
= nullptr;
136 MachineInstr
*Dec
= nullptr;
137 MachineInstr
*End
= nullptr;
140 // Search the preheader for the start intrinsic, or look through the
141 // predecessors of the header to find exactly one set.iterations intrinsic.
142 // FIXME: I don't see why we shouldn't be supporting multiple predecessors
143 // with potentially multiple set.loop.iterations, so we need to enable this.
144 if (auto *Preheader
= ML
->getLoopPreheader()) {
145 Start
= SearchForStart(Preheader
);
147 LLVM_DEBUG(dbgs() << "ARM Loops: Failed to find loop preheader!\n"
148 << " - Performing manual predecessor search.\n");
149 MachineBasicBlock
*Pred
= nullptr;
150 for (auto *MBB
: ML
->getHeader()->predecessors()) {
151 if (!ML
->contains(MBB
)) {
153 LLVM_DEBUG(dbgs() << " - Found multiple out-of-loop preds.\n");
158 Start
= SearchForStart(MBB
);
163 // Find the low-overhead loop components and decide whether or not to fall
164 // back to a normal loop.
165 for (auto *MBB
: reverse(ML
->getBlocks())) {
166 for (auto &MI
: *MBB
) {
167 if (MI
.getOpcode() == ARM::t2LoopDec
)
169 else if (MI
.getOpcode() == ARM::t2LoopEnd
)
171 else if (IsLoopStart(MI
))
173 else if (MI
.getDesc().isCall())
174 // TODO: Though the call will require LE to execute again, does this
175 // mean we should revert? Always executing LE hopefully should be
176 // faster than performing a sub,cmp,br or even subs,br.
182 // If we find that we load/store LR between LoopDec and LoopEnd, expect
183 // that the decremented value has been spilled to the stack. Because
184 // this value isn't actually going to be produced until the latch, by LE,
185 // we would need to generate a real sub. The value is also likely to be
186 // reloaded for use of LoopEnd - in which in case we'd need to perform
187 // an add because it gets negated again by LE! The other option is to
188 // then generate the other form of LE which doesn't perform the sub.
189 if (MI
.mayLoad() || MI
.mayStore())
191 MI
.getOperand(0).isReg() && MI
.getOperand(0).getReg() == ARM::LR
;
194 if (Dec
&& End
&& Revert
)
198 LLVM_DEBUG(if (Start
) dbgs() << "ARM Loops: Found Loop Start: " << *Start
;
199 if (Dec
) dbgs() << "ARM Loops: Found Loop Dec: " << *Dec
;
200 if (End
) dbgs() << "ARM Loops: Found Loop End: " << *End
;);
202 if (!Start
&& !Dec
&& !End
) {
203 LLVM_DEBUG(dbgs() << "ARM Loops: Not a low-overhead loop.\n");
205 } else if (!(Start
&& Dec
&& End
)) {
206 LLVM_DEBUG(dbgs() << "ARM Loops: Failed to find all loop components.\n");
210 if (!End
->getOperand(1).isMBB())
211 report_fatal_error("Expected LoopEnd to target basic block");
213 // TODO Maybe there's cases where the target doesn't have to be the header,
214 // but for now be safe and revert.
215 if (End
->getOperand(1).getMBB() != ML
->getHeader()) {
216 LLVM_DEBUG(dbgs() << "ARM Loops: LoopEnd is not targetting header.\n");
220 // The WLS and LE instructions have 12-bits for the label offset. WLS
221 // requires a positive offset, while LE uses negative.
222 if (BBUtils
->getOffsetOf(End
) < BBUtils
->getOffsetOf(ML
->getHeader()) ||
223 !BBUtils
->isBBInRange(End
, ML
->getHeader(), 4094)) {
224 LLVM_DEBUG(dbgs() << "ARM Loops: LE offset is out-of-range\n");
227 if (Start
->getOpcode() == ARM::t2WhileLoopStart
&&
228 (BBUtils
->getOffsetOf(Start
) >
229 BBUtils
->getOffsetOf(Start
->getOperand(1).getMBB()) ||
230 !BBUtils
->isBBInRange(Start
, Start
->getOperand(1).getMBB(), 4094))) {
231 LLVM_DEBUG(dbgs() << "ARM Loops: WLS offset is out-of-range!\n");
235 Expand(ML
, Start
, Dec
, End
, Revert
);
239 // WhileLoopStart holds the exit block, so produce a cmp lr, 0 and then a
240 // beq that branches to the exit branch.
241 // FIXME: Need to check that we're not trashing the CPSR when generating the
242 // cmp. We could also try to generate a cbz if the value in LR is also in
243 // another low register.
244 void ARMLowOverheadLoops::RevertWhile(MachineInstr
*MI
) const {
245 LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to cmp: " << *MI
);
246 MachineBasicBlock
*MBB
= MI
->getParent();
247 MachineInstrBuilder MIB
= BuildMI(*MBB
, MI
, MI
->getDebugLoc(),
248 TII
->get(ARM::t2CMPri
));
251 MIB
.addImm(ARMCC::AL
);
252 MIB
.addReg(ARM::CPSR
);
254 // TODO: Try to use tBcc instead
255 MIB
= BuildMI(*MBB
, MI
, MI
->getDebugLoc(), TII
->get(ARM::t2Bcc
));
256 MIB
.add(MI
->getOperand(1)); // branch target
257 MIB
.addImm(ARMCC::EQ
); // condition code
258 MIB
.addReg(ARM::CPSR
);
259 MI
->eraseFromParent();
262 // TODO: Check flags so that we can possibly generate a tSubs or tSub.
263 void ARMLowOverheadLoops::RevertLoopDec(MachineInstr
*MI
) const {
264 LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to sub: " << *MI
);
265 MachineBasicBlock
*MBB
= MI
->getParent();
266 MachineInstrBuilder MIB
= BuildMI(*MBB
, MI
, MI
->getDebugLoc(),
267 TII
->get(ARM::t2SUBri
));
269 MIB
.add(MI
->getOperand(1));
270 MIB
.add(MI
->getOperand(2));
271 MIB
.addImm(ARMCC::AL
);
274 MI
->eraseFromParent();
277 // Generate a subs, or sub and cmp, and a branch instead of an LE.
278 // FIXME: Need to check that we're not trashing the CPSR when generating
280 void ARMLowOverheadLoops::RevertLoopEnd(MachineInstr
*MI
) const {
281 LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to cmp, br: " << *MI
);
284 MachineBasicBlock
*MBB
= MI
->getParent();
285 MachineInstrBuilder MIB
= BuildMI(*MBB
, MI
, MI
->getDebugLoc(),
286 TII
->get(ARM::t2CMPri
));
289 MIB
.addImm(ARMCC::AL
);
290 MIB
.addReg(ARM::CPSR
);
292 // TODO Try to use tBcc instead.
294 MIB
= BuildMI(*MBB
, MI
, MI
->getDebugLoc(), TII
->get(ARM::t2Bcc
));
295 MIB
.add(MI
->getOperand(1)); // branch target
296 MIB
.addImm(ARMCC::NE
); // condition code
297 MIB
.addReg(ARM::CPSR
);
298 MI
->eraseFromParent();
301 void ARMLowOverheadLoops::Expand(MachineLoop
*ML
, MachineInstr
*Start
,
302 MachineInstr
*Dec
, MachineInstr
*End
,
305 auto ExpandLoopStart
= [this](MachineLoop
*ML
, MachineInstr
*Start
) {
306 // The trip count should already been held in LR since the instructions
307 // within the loop can only read and write to LR. So, there should be a
308 // mov to setup the count. WLS/DLS perform this move, so find the original
309 // and delete it - inserting WLS/DLS in its place.
310 MachineBasicBlock
*MBB
= Start
->getParent();
311 MachineInstr
*InsertPt
= Start
;
312 for (auto &I
: MRI
->def_instructions(ARM::LR
)) {
313 if (I
.getParent() != MBB
)
317 if (!I
.getOperand(2).isImm() || I
.getOperand(2).getImm() != ARMCC::AL
)
320 // Only handle move reg, if the trip count it will need moving into a reg
321 // before the setup instruction anyway.
322 if (!I
.getDesc().isMoveReg() ||
323 !I
.getOperand(1).isIdenticalTo(Start
->getOperand(0)))
329 unsigned Opc
= Start
->getOpcode() == ARM::t2DoLoopStart
?
330 ARM::t2DLS
: ARM::t2WLS
;
331 MachineInstrBuilder MIB
=
332 BuildMI(*MBB
, InsertPt
, InsertPt
->getDebugLoc(), TII
->get(Opc
));
335 MIB
.add(Start
->getOperand(0));
336 if (Opc
== ARM::t2WLS
)
337 MIB
.add(Start
->getOperand(1));
339 if (InsertPt
!= Start
)
340 InsertPt
->eraseFromParent();
341 Start
->eraseFromParent();
342 LLVM_DEBUG(dbgs() << "ARM Loops: Inserted start: " << *MIB
);
346 // Combine the LoopDec and LoopEnd instructions into LE(TP).
347 auto ExpandLoopEnd
= [this](MachineLoop
*ML
, MachineInstr
*Dec
,
349 MachineBasicBlock
*MBB
= End
->getParent();
350 MachineInstrBuilder MIB
= BuildMI(*MBB
, End
, End
->getDebugLoc(),
351 TII
->get(ARM::t2LEUpdate
));
353 MIB
.add(End
->getOperand(0));
354 MIB
.add(End
->getOperand(1));
355 LLVM_DEBUG(dbgs() << "ARM Loops: Inserted LE: " << *MIB
);
357 End
->eraseFromParent();
358 Dec
->eraseFromParent();
362 // TODO: We should be able to automatically remove these branches before we
363 // get here - probably by teaching analyzeBranch about the pseudo
365 // If there is an unconditional branch, after I, that just branches to the
366 // next block, remove it.
367 auto RemoveDeadBranch
= [](MachineInstr
*I
) {
368 MachineBasicBlock
*BB
= I
->getParent();
369 MachineInstr
*Terminator
= &BB
->instr_back();
370 if (Terminator
->isUnconditionalBranch() && I
!= Terminator
) {
371 MachineBasicBlock
*Succ
= Terminator
->getOperand(0).getMBB();
372 if (BB
->isLayoutSuccessor(Succ
)) {
373 LLVM_DEBUG(dbgs() << "ARM Loops: Removing branch: " << *Terminator
);
374 Terminator
->eraseFromParent();
380 if (Start
->getOpcode() == ARM::t2WhileLoopStart
)
383 Start
->eraseFromParent();
387 Start
= ExpandLoopStart(ML
, Start
);
388 RemoveDeadBranch(Start
);
389 End
= ExpandLoopEnd(ML
, Dec
, End
);
390 RemoveDeadBranch(End
);
394 bool ARMLowOverheadLoops::RevertNonLoops(MachineFunction
&MF
) {
395 LLVM_DEBUG(dbgs() << "ARM Loops: Reverting any remaining pseudos...\n");
396 bool Changed
= false;
398 for (auto &MBB
: MF
) {
399 SmallVector
<MachineInstr
*, 4> Starts
;
400 SmallVector
<MachineInstr
*, 4> Decs
;
401 SmallVector
<MachineInstr
*, 4> Ends
;
403 for (auto &I
: MBB
) {
405 Starts
.push_back(&I
);
406 else if (I
.getOpcode() == ARM::t2LoopDec
)
408 else if (I
.getOpcode() == ARM::t2LoopEnd
)
412 if (Starts
.empty() && Decs
.empty() && Ends
.empty())
417 for (auto *Start
: Starts
) {
418 if (Start
->getOpcode() == ARM::t2WhileLoopStart
)
421 Start
->eraseFromParent();
423 for (auto *Dec
: Decs
)
426 for (auto *End
: Ends
)
432 FunctionPass
*llvm::createARMLowOverheadLoopsPass() {
433 return new ARMLowOverheadLoops();