[ARM] DLS/LE low-overhead loop code generation
[llvm-core.git] / lib / Target / ARM / ARMLowOverheadLoops.cpp
blobb7f3e5bd350227f2a8020980e6057c5b5954ac04
1 //===-- ARMLowOverheadLoops.cpp - CodeGen Low-overhead Loops ---*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// Finalize v8.1-m low-overhead loops by converting the associated pseudo
10 /// instructions into machine operations.
11 /// The expectation is that the loop contains three pseudo instructions:
12 /// - t2*LoopStart - placed in the preheader or pre-preheader. The do-loop
13 /// form should be in the preheader, whereas the while form should be in the
14 /// preheaders only predecessor. TODO: Could DoLoopStart get moved into the
15 /// pre-preheader?
16 /// - t2LoopDec - placed within in the loop body.
17 /// - t2LoopEnd - the loop latch terminator.
18 ///
19 //===----------------------------------------------------------------------===//
21 #include "ARM.h"
22 #include "ARMBaseInstrInfo.h"
23 #include "ARMBaseRegisterInfo.h"
24 #include "ARMBasicBlockInfo.h"
25 #include "ARMSubtarget.h"
26 #include "llvm/CodeGen/MachineFunctionPass.h"
27 #include "llvm/CodeGen/MachineLoopInfo.h"
28 #include "llvm/CodeGen/MachineRegisterInfo.h"
30 using namespace llvm;
32 #define DEBUG_TYPE "arm-low-overhead-loops"
33 #define ARM_LOW_OVERHEAD_LOOPS_NAME "ARM Low Overhead Loops pass"
35 namespace {
37 class ARMLowOverheadLoops : public MachineFunctionPass {
38 const ARMBaseInstrInfo *TII = nullptr;
39 MachineRegisterInfo *MRI = nullptr;
40 std::unique_ptr<ARMBasicBlockUtils> BBUtils = nullptr;
42 public:
43 static char ID;
45 ARMLowOverheadLoops() : MachineFunctionPass(ID) { }
47 void getAnalysisUsage(AnalysisUsage &AU) const override {
48 AU.setPreservesCFG();
49 AU.addRequired<MachineLoopInfo>();
50 MachineFunctionPass::getAnalysisUsage(AU);
53 bool runOnMachineFunction(MachineFunction &MF) override;
55 bool ProcessLoop(MachineLoop *ML);
57 void Expand(MachineLoop *ML, MachineInstr *Start,
58 MachineInstr *Dec, MachineInstr *End, bool Revert);
60 MachineFunctionProperties getRequiredProperties() const override {
61 return MachineFunctionProperties().set(
62 MachineFunctionProperties::Property::NoVRegs);
65 StringRef getPassName() const override {
66 return ARM_LOW_OVERHEAD_LOOPS_NAME;
71 char ARMLowOverheadLoops::ID = 0;
73 INITIALIZE_PASS(ARMLowOverheadLoops, DEBUG_TYPE, ARM_LOW_OVERHEAD_LOOPS_NAME,
74 false, false)
76 bool ARMLowOverheadLoops::runOnMachineFunction(MachineFunction &MF) {
77 //if (!static_cast<const ARMSubtarget&>(MF.getSubtarget()).hasLOB())
78 //return false;
80 LLVM_DEBUG(dbgs() << "ARM Loops on " << MF.getName() << " ------------- \n");
82 auto &MLI = getAnalysis<MachineLoopInfo>();
83 MRI = &MF.getRegInfo();
84 TII = static_cast<const ARMBaseInstrInfo*>(
85 MF.getSubtarget().getInstrInfo());
86 BBUtils = std::unique_ptr<ARMBasicBlockUtils>(new ARMBasicBlockUtils(MF));
87 BBUtils->computeAllBlockSizes();
89 bool Changed = false;
90 for (auto ML : MLI) {
91 if (!ML->getParentLoop())
92 Changed |= ProcessLoop(ML);
94 return Changed;
97 bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) {
99 bool Changed = false;
101 // Process inner loops first.
102 for (auto I = ML->begin(), E = ML->end(); I != E; ++I)
103 Changed |= ProcessLoop(*I);
105 LLVM_DEBUG(dbgs() << "ARM Loops: Processing " << *ML);
107 auto IsLoopStart = [](MachineInstr &MI) {
108 return MI.getOpcode() == ARM::t2DoLoopStart;
111 auto SearchForStart =
112 [&IsLoopStart](MachineBasicBlock *MBB) -> MachineInstr* {
113 for (auto &MI : *MBB) {
114 if (IsLoopStart(MI))
115 return &MI;
117 return nullptr;
120 MachineInstr *Start = nullptr;
121 MachineInstr *Dec = nullptr;
122 MachineInstr *End = nullptr;
123 bool Revert = false;
125 if (auto *Preheader = ML->getLoopPreheader())
126 Start = SearchForStart(Preheader);
128 // Find the low-overhead loop components and decide whether or not to fall
129 // back to a normal loop.
130 for (auto *MBB : reverse(ML->getBlocks())) {
131 for (auto &MI : *MBB) {
132 if (MI.getOpcode() == ARM::t2LoopDec)
133 Dec = &MI;
134 else if (MI.getOpcode() == ARM::t2LoopEnd)
135 End = &MI;
137 if (!Dec)
138 continue;
140 // TODO: Though the call will require LE to execute again, does this
141 // mean we should revert? Always executing LE hopefully should be faster
142 // than performing a sub,cmp,br or even subs,br.
143 if (MI.getDesc().isCall())
144 Revert = true;
146 // If we find that we load/store LR between LoopDec and LoopEnd, expect
147 // that the decremented value has been spilled to the stack. Because
148 // this value isn't actually going to be produced until the latch, by LE,
149 // we would need to generate a real sub. The value is also likely to be
150 // reloaded for use of LoopEnd - in which in case we'd need to perform
151 // an add because it gets negated again by LE! The other option is to
152 // then generate the other form of LE which doesn't perform the sub.
153 if (MI.mayLoad() || MI.mayStore())
154 Revert =
155 MI.getOperand(0).isReg() && MI.getOperand(0).getReg() == ARM::LR;
158 if (Dec && End && Revert)
159 break;
162 if (Start || Dec || End) {
163 if (!Start || !Dec || !End)
164 report_fatal_error("Failed to find all loop components");
165 } else {
166 LLVM_DEBUG(dbgs() << "ARM Loops: Not a low-overhead loop.\n");
167 return Changed;
170 if (!End->getOperand(1).isMBB() ||
171 End->getOperand(1).getMBB() != ML->getHeader())
172 report_fatal_error("Expected LoopEnd to target Loop Header");
174 // The LE instructions has 12-bits for the label offset.
175 if (!BBUtils->isBBInRange(End, ML->getHeader(), 4096)) {
176 LLVM_DEBUG(dbgs() << "ARM Loops: Too large for a low-overhead loop!\n");
177 Revert = true;
180 LLVM_DEBUG(dbgs() << "ARM Loops:\n - Found Loop Start: " << *Start
181 << " - Found Loop Dec: " << *Dec
182 << " - Found Loop End: " << *End);
184 Expand(ML, Start, Dec, End, Revert);
185 return true;
188 void ARMLowOverheadLoops::Expand(MachineLoop *ML, MachineInstr *Start,
189 MachineInstr *Dec, MachineInstr *End,
190 bool Revert) {
192 auto ExpandLoopStart = [this](MachineLoop *ML, MachineInstr *Start) {
193 // The trip count should already been held in LR since the instructions
194 // within the loop can only read and write to LR. So, there should be a
195 // mov to setup the count. WLS/DLS perform this move, so find the original
196 // and delete it - inserting WLS/DLS in its place.
197 MachineBasicBlock *MBB = Start->getParent();
198 MachineInstr *InsertPt = Start;
199 for (auto &I : MRI->def_instructions(ARM::LR)) {
200 if (I.getParent() != MBB)
201 continue;
203 // Always execute.
204 if (!I.getOperand(2).isImm() || I.getOperand(2).getImm() != ARMCC::AL)
205 continue;
207 // Only handle move reg, if the trip count it will need moving into a reg
208 // before the setup instruction anyway.
209 if (!I.getDesc().isMoveReg() ||
210 !I.getOperand(1).isIdenticalTo(Start->getOperand(0)))
211 continue;
212 InsertPt = &I;
213 break;
216 MachineInstrBuilder MIB =
217 BuildMI(*MBB, InsertPt, InsertPt->getDebugLoc(), TII->get(ARM::t2DLS));
218 if (InsertPt != Start)
219 InsertPt->eraseFromParent();
221 MIB.addDef(ARM::LR);
222 MIB.add(Start->getOperand(0));
223 LLVM_DEBUG(dbgs() << "ARM Loops: Inserted DLS: " << *MIB);
224 Start->eraseFromParent();
227 // Combine the LoopDec and LoopEnd instructions into LE(TP).
228 auto ExpandLoopEnd = [this](MachineLoop *ML, MachineInstr *Dec,
229 MachineInstr *End) {
230 MachineBasicBlock *MBB = End->getParent();
231 MachineInstrBuilder MIB = BuildMI(*MBB, End, End->getDebugLoc(),
232 TII->get(ARM::t2LEUpdate));
233 MIB.addDef(ARM::LR);
234 MIB.add(End->getOperand(0));
235 MIB.add(End->getOperand(1));
236 LLVM_DEBUG(dbgs() << "ARM Loops: Inserted LE: " << *MIB);
238 // If there is a branch after loop end, which branches to the fallthrough
239 // block, remove the branch.
240 MachineBasicBlock *Latch = End->getParent();
241 MachineInstr *Terminator = &Latch->instr_back();
242 if (End != Terminator) {
243 MachineBasicBlock *Exit = ML->getExitBlock();
244 if (Latch->isLayoutSuccessor(Exit)) {
245 LLVM_DEBUG(dbgs() << "ARM Loops: Removing loop exit branch: "
246 << *Terminator);
247 Terminator->eraseFromParent();
250 End->eraseFromParent();
251 Dec->eraseFromParent();
254 // Generate a subs, or sub and cmp, and a branch instead of an LE.
255 // TODO: Check flags so that we can possibly generate a subs.
256 auto ExpandBranch = [this](MachineInstr *Dec, MachineInstr *End) {
257 LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to sub, cmp, br.\n");
258 // Create sub
259 MachineBasicBlock *MBB = Dec->getParent();
260 MachineInstrBuilder MIB = BuildMI(*MBB, Dec, Dec->getDebugLoc(),
261 TII->get(ARM::t2SUBri));
262 MIB.addDef(ARM::LR);
263 MIB.add(Dec->getOperand(1));
264 MIB.add(Dec->getOperand(2));
265 MIB.addImm(ARMCC::AL);
266 MIB.addReg(0);
267 MIB.addReg(0);
269 // Create cmp
270 MBB = End->getParent();
271 MIB = BuildMI(*MBB, End, End->getDebugLoc(), TII->get(ARM::t2CMPri));
272 MIB.addReg(ARM::LR);
273 MIB.addImm(0);
274 MIB.addImm(ARMCC::AL);
276 // Create bne
277 MIB = BuildMI(*MBB, End, End->getDebugLoc(), TII->get(ARM::t2Bcc));
278 MIB.add(End->getOperand(1)); // branch target
279 MIB.addImm(ARMCC::NE); // condition code
280 End->eraseFromParent();
281 Dec->eraseFromParent();
284 if (Revert) {
285 Start->eraseFromParent();
286 ExpandBranch(Dec, End);
287 } else {
288 ExpandLoopStart(ML, Start);
289 ExpandLoopEnd(ML, Dec, End);
293 FunctionPass *llvm::createARMLowOverheadLoopsPass() {
294 return new ARMLowOverheadLoops();