1 //===-- ARMLowOverheadLoops.cpp - CodeGen Low-overhead Loops ---*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 /// Finalize v8.1-m low-overhead loops by converting the associated pseudo
10 /// instructions into machine operations.
11 /// The expectation is that the loop contains three pseudo instructions:
12 /// - t2*LoopStart - placed in the preheader or pre-preheader. The do-loop
13 /// form should be in the preheader, whereas the while form should be in the
14 /// preheaders only predecessor. TODO: Could DoLoopStart get moved into the
16 /// - t2LoopDec - placed within in the loop body.
17 /// - t2LoopEnd - the loop latch terminator.
19 //===----------------------------------------------------------------------===//
22 #include "ARMBaseInstrInfo.h"
23 #include "ARMBaseRegisterInfo.h"
24 #include "ARMBasicBlockInfo.h"
25 #include "ARMSubtarget.h"
26 #include "llvm/CodeGen/MachineFunctionPass.h"
27 #include "llvm/CodeGen/MachineLoopInfo.h"
28 #include "llvm/CodeGen/MachineRegisterInfo.h"
32 #define DEBUG_TYPE "arm-low-overhead-loops"
33 #define ARM_LOW_OVERHEAD_LOOPS_NAME "ARM Low Overhead Loops pass"
37 class ARMLowOverheadLoops
: public MachineFunctionPass
{
38 const ARMBaseInstrInfo
*TII
= nullptr;
39 MachineRegisterInfo
*MRI
= nullptr;
40 std::unique_ptr
<ARMBasicBlockUtils
> BBUtils
= nullptr;
45 ARMLowOverheadLoops() : MachineFunctionPass(ID
) { }
47 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
49 AU
.addRequired
<MachineLoopInfo
>();
50 MachineFunctionPass::getAnalysisUsage(AU
);
53 bool runOnMachineFunction(MachineFunction
&MF
) override
;
55 bool ProcessLoop(MachineLoop
*ML
);
57 void Expand(MachineLoop
*ML
, MachineInstr
*Start
,
58 MachineInstr
*Dec
, MachineInstr
*End
, bool Revert
);
60 MachineFunctionProperties
getRequiredProperties() const override
{
61 return MachineFunctionProperties().set(
62 MachineFunctionProperties::Property::NoVRegs
);
65 StringRef
getPassName() const override
{
66 return ARM_LOW_OVERHEAD_LOOPS_NAME
;
71 char ARMLowOverheadLoops::ID
= 0;
73 INITIALIZE_PASS(ARMLowOverheadLoops
, DEBUG_TYPE
, ARM_LOW_OVERHEAD_LOOPS_NAME
,
76 bool ARMLowOverheadLoops::runOnMachineFunction(MachineFunction
&MF
) {
77 //if (!static_cast<const ARMSubtarget&>(MF.getSubtarget()).hasLOB())
80 LLVM_DEBUG(dbgs() << "ARM Loops on " << MF
.getName() << " ------------- \n");
82 auto &MLI
= getAnalysis
<MachineLoopInfo
>();
83 MRI
= &MF
.getRegInfo();
84 TII
= static_cast<const ARMBaseInstrInfo
*>(
85 MF
.getSubtarget().getInstrInfo());
86 BBUtils
= std::unique_ptr
<ARMBasicBlockUtils
>(new ARMBasicBlockUtils(MF
));
87 BBUtils
->computeAllBlockSizes();
91 if (!ML
->getParentLoop())
92 Changed
|= ProcessLoop(ML
);
97 bool ARMLowOverheadLoops::ProcessLoop(MachineLoop
*ML
) {
101 // Process inner loops first.
102 for (auto I
= ML
->begin(), E
= ML
->end(); I
!= E
; ++I
)
103 Changed
|= ProcessLoop(*I
);
105 LLVM_DEBUG(dbgs() << "ARM Loops: Processing " << *ML
);
107 auto IsLoopStart
= [](MachineInstr
&MI
) {
108 return MI
.getOpcode() == ARM::t2DoLoopStart
;
111 auto SearchForStart
=
112 [&IsLoopStart
](MachineBasicBlock
*MBB
) -> MachineInstr
* {
113 for (auto &MI
: *MBB
) {
120 MachineInstr
*Start
= nullptr;
121 MachineInstr
*Dec
= nullptr;
122 MachineInstr
*End
= nullptr;
125 if (auto *Preheader
= ML
->getLoopPreheader())
126 Start
= SearchForStart(Preheader
);
128 // Find the low-overhead loop components and decide whether or not to fall
129 // back to a normal loop.
130 for (auto *MBB
: reverse(ML
->getBlocks())) {
131 for (auto &MI
: *MBB
) {
132 if (MI
.getOpcode() == ARM::t2LoopDec
)
134 else if (MI
.getOpcode() == ARM::t2LoopEnd
)
140 // TODO: Though the call will require LE to execute again, does this
141 // mean we should revert? Always executing LE hopefully should be faster
142 // than performing a sub,cmp,br or even subs,br.
143 if (MI
.getDesc().isCall())
146 // If we find that we load/store LR between LoopDec and LoopEnd, expect
147 // that the decremented value has been spilled to the stack. Because
148 // this value isn't actually going to be produced until the latch, by LE,
149 // we would need to generate a real sub. The value is also likely to be
150 // reloaded for use of LoopEnd - in which in case we'd need to perform
151 // an add because it gets negated again by LE! The other option is to
152 // then generate the other form of LE which doesn't perform the sub.
153 if (MI
.mayLoad() || MI
.mayStore())
155 MI
.getOperand(0).isReg() && MI
.getOperand(0).getReg() == ARM::LR
;
158 if (Dec
&& End
&& Revert
)
162 if (Start
|| Dec
|| End
) {
163 if (!Start
|| !Dec
|| !End
)
164 report_fatal_error("Failed to find all loop components");
166 LLVM_DEBUG(dbgs() << "ARM Loops: Not a low-overhead loop.\n");
170 if (!End
->getOperand(1).isMBB() ||
171 End
->getOperand(1).getMBB() != ML
->getHeader())
172 report_fatal_error("Expected LoopEnd to target Loop Header");
174 // The LE instructions has 12-bits for the label offset.
175 if (!BBUtils
->isBBInRange(End
, ML
->getHeader(), 4096)) {
176 LLVM_DEBUG(dbgs() << "ARM Loops: Too large for a low-overhead loop!\n");
180 LLVM_DEBUG(dbgs() << "ARM Loops:\n - Found Loop Start: " << *Start
181 << " - Found Loop Dec: " << *Dec
182 << " - Found Loop End: " << *End
);
184 Expand(ML
, Start
, Dec
, End
, Revert
);
188 void ARMLowOverheadLoops::Expand(MachineLoop
*ML
, MachineInstr
*Start
,
189 MachineInstr
*Dec
, MachineInstr
*End
,
192 auto ExpandLoopStart
= [this](MachineLoop
*ML
, MachineInstr
*Start
) {
193 // The trip count should already been held in LR since the instructions
194 // within the loop can only read and write to LR. So, there should be a
195 // mov to setup the count. WLS/DLS perform this move, so find the original
196 // and delete it - inserting WLS/DLS in its place.
197 MachineBasicBlock
*MBB
= Start
->getParent();
198 MachineInstr
*InsertPt
= Start
;
199 for (auto &I
: MRI
->def_instructions(ARM::LR
)) {
200 if (I
.getParent() != MBB
)
204 if (!I
.getOperand(2).isImm() || I
.getOperand(2).getImm() != ARMCC::AL
)
207 // Only handle move reg, if the trip count it will need moving into a reg
208 // before the setup instruction anyway.
209 if (!I
.getDesc().isMoveReg() ||
210 !I
.getOperand(1).isIdenticalTo(Start
->getOperand(0)))
216 MachineInstrBuilder MIB
=
217 BuildMI(*MBB
, InsertPt
, InsertPt
->getDebugLoc(), TII
->get(ARM::t2DLS
));
218 if (InsertPt
!= Start
)
219 InsertPt
->eraseFromParent();
222 MIB
.add(Start
->getOperand(0));
223 LLVM_DEBUG(dbgs() << "ARM Loops: Inserted DLS: " << *MIB
);
224 Start
->eraseFromParent();
227 // Combine the LoopDec and LoopEnd instructions into LE(TP).
228 auto ExpandLoopEnd
= [this](MachineLoop
*ML
, MachineInstr
*Dec
,
230 MachineBasicBlock
*MBB
= End
->getParent();
231 MachineInstrBuilder MIB
= BuildMI(*MBB
, End
, End
->getDebugLoc(),
232 TII
->get(ARM::t2LEUpdate
));
234 MIB
.add(End
->getOperand(0));
235 MIB
.add(End
->getOperand(1));
236 LLVM_DEBUG(dbgs() << "ARM Loops: Inserted LE: " << *MIB
);
238 // If there is a branch after loop end, which branches to the fallthrough
239 // block, remove the branch.
240 MachineBasicBlock
*Latch
= End
->getParent();
241 MachineInstr
*Terminator
= &Latch
->instr_back();
242 if (End
!= Terminator
) {
243 MachineBasicBlock
*Exit
= ML
->getExitBlock();
244 if (Latch
->isLayoutSuccessor(Exit
)) {
245 LLVM_DEBUG(dbgs() << "ARM Loops: Removing loop exit branch: "
247 Terminator
->eraseFromParent();
250 End
->eraseFromParent();
251 Dec
->eraseFromParent();
254 // Generate a subs, or sub and cmp, and a branch instead of an LE.
255 // TODO: Check flags so that we can possibly generate a subs.
256 auto ExpandBranch
= [this](MachineInstr
*Dec
, MachineInstr
*End
) {
257 LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to sub, cmp, br.\n");
259 MachineBasicBlock
*MBB
= Dec
->getParent();
260 MachineInstrBuilder MIB
= BuildMI(*MBB
, Dec
, Dec
->getDebugLoc(),
261 TII
->get(ARM::t2SUBri
));
263 MIB
.add(Dec
->getOperand(1));
264 MIB
.add(Dec
->getOperand(2));
265 MIB
.addImm(ARMCC::AL
);
270 MBB
= End
->getParent();
271 MIB
= BuildMI(*MBB
, End
, End
->getDebugLoc(), TII
->get(ARM::t2CMPri
));
274 MIB
.addImm(ARMCC::AL
);
277 MIB
= BuildMI(*MBB
, End
, End
->getDebugLoc(), TII
->get(ARM::t2Bcc
));
278 MIB
.add(End
->getOperand(1)); // branch target
279 MIB
.addImm(ARMCC::NE
); // condition code
280 End
->eraseFromParent();
281 Dec
->eraseFromParent();
285 Start
->eraseFromParent();
286 ExpandBranch(Dec
, End
);
288 ExpandLoopStart(ML
, Start
);
289 ExpandLoopEnd(ML
, Dec
, End
);
293 FunctionPass
*llvm::createARMLowOverheadLoopsPass() {
294 return new ARMLowOverheadLoops();