[ARM] MVE big endian bitcasts
[llvm-complete.git] / lib / Target / ARM / ARMLowOverheadLoops.cpp
blob303986a0b49f21a23e8a77b5b991c57187d74f92
1 //===-- ARMLowOverheadLoops.cpp - CodeGen Low-overhead Loops ---*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// Finalize v8.1-m low-overhead loops by converting the associated pseudo
10 /// instructions into machine operations.
11 /// The expectation is that the loop contains three pseudo instructions:
12 /// - t2*LoopStart - placed in the preheader or pre-preheader. The do-loop
13 /// form should be in the preheader, whereas the while form should be in the
14 /// preheaders only predecessor. TODO: Could DoLoopStart get moved into the
15 /// pre-preheader?
16 /// - t2LoopDec - placed within in the loop body.
17 /// - t2LoopEnd - the loop latch terminator.
18 ///
19 //===----------------------------------------------------------------------===//
21 #include "ARM.h"
22 #include "ARMBaseInstrInfo.h"
23 #include "ARMBaseRegisterInfo.h"
24 #include "ARMBasicBlockInfo.h"
25 #include "ARMSubtarget.h"
26 #include "llvm/CodeGen/MachineFunctionPass.h"
27 #include "llvm/CodeGen/MachineLoopInfo.h"
28 #include "llvm/CodeGen/MachineRegisterInfo.h"
30 using namespace llvm;
32 #define DEBUG_TYPE "arm-low-overhead-loops"
33 #define ARM_LOW_OVERHEAD_LOOPS_NAME "ARM Low Overhead Loops pass"
35 namespace {
37 class ARMLowOverheadLoops : public MachineFunctionPass {
38 const ARMBaseInstrInfo *TII = nullptr;
39 MachineRegisterInfo *MRI = nullptr;
40 std::unique_ptr<ARMBasicBlockUtils> BBUtils = nullptr;
42 public:
43 static char ID;
45 ARMLowOverheadLoops() : MachineFunctionPass(ID) { }
47 void getAnalysisUsage(AnalysisUsage &AU) const override {
48 AU.setPreservesCFG();
49 AU.addRequired<MachineLoopInfo>();
50 MachineFunctionPass::getAnalysisUsage(AU);
53 bool runOnMachineFunction(MachineFunction &MF) override;
55 bool ProcessLoop(MachineLoop *ML);
57 bool RevertNonLoops(MachineFunction &MF);
59 void RevertWhile(MachineInstr *MI) const;
61 void RevertLoopDec(MachineInstr *MI) const;
63 void RevertLoopEnd(MachineInstr *MI) const;
65 void Expand(MachineLoop *ML, MachineInstr *Start,
66 MachineInstr *Dec, MachineInstr *End, bool Revert);
68 MachineFunctionProperties getRequiredProperties() const override {
69 return MachineFunctionProperties().set(
70 MachineFunctionProperties::Property::NoVRegs);
73 StringRef getPassName() const override {
74 return ARM_LOW_OVERHEAD_LOOPS_NAME;
79 char ARMLowOverheadLoops::ID = 0;
81 INITIALIZE_PASS(ARMLowOverheadLoops, DEBUG_TYPE, ARM_LOW_OVERHEAD_LOOPS_NAME,
82 false, false)
84 bool ARMLowOverheadLoops::runOnMachineFunction(MachineFunction &MF) {
85 if (!static_cast<const ARMSubtarget&>(MF.getSubtarget()).hasLOB())
86 return false;
88 LLVM_DEBUG(dbgs() << "ARM Loops on " << MF.getName() << " ------------- \n");
90 auto &MLI = getAnalysis<MachineLoopInfo>();
91 MRI = &MF.getRegInfo();
92 TII = static_cast<const ARMBaseInstrInfo*>(
93 MF.getSubtarget().getInstrInfo());
94 BBUtils = std::unique_ptr<ARMBasicBlockUtils>(new ARMBasicBlockUtils(MF));
95 BBUtils->computeAllBlockSizes();
96 BBUtils->adjustBBOffsetsAfter(&MF.front());
98 bool Changed = false;
99 for (auto ML : MLI) {
100 if (!ML->getParentLoop())
101 Changed |= ProcessLoop(ML);
103 Changed |= RevertNonLoops(MF);
104 return Changed;
107 static bool IsLoopStart(MachineInstr &MI) {
108 return MI.getOpcode() == ARM::t2DoLoopStart ||
109 MI.getOpcode() == ARM::t2WhileLoopStart;
112 bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) {
114 bool Changed = false;
116 // Process inner loops first.
117 for (auto I = ML->begin(), E = ML->end(); I != E; ++I)
118 Changed |= ProcessLoop(*I);
120 LLVM_DEBUG(dbgs() << "ARM Loops: Processing " << *ML);
122 // Search the given block for a loop start instruction. If one isn't found,
123 // and there's only one predecessor block, search that one too.
124 std::function<MachineInstr*(MachineBasicBlock*)> SearchForStart =
125 [&SearchForStart](MachineBasicBlock *MBB) -> MachineInstr* {
126 for (auto &MI : *MBB) {
127 if (IsLoopStart(MI))
128 return &MI;
130 if (MBB->pred_size() == 1)
131 return SearchForStart(*MBB->pred_begin());
132 return nullptr;
135 MachineInstr *Start = nullptr;
136 MachineInstr *Dec = nullptr;
137 MachineInstr *End = nullptr;
138 bool Revert = false;
140 // Search the preheader for the start intrinsic, or look through the
141 // predecessors of the header to find exactly one set.iterations intrinsic.
142 // FIXME: I don't see why we shouldn't be supporting multiple predecessors
143 // with potentially multiple set.loop.iterations, so we need to enable this.
144 if (auto *Preheader = ML->getLoopPreheader()) {
145 Start = SearchForStart(Preheader);
146 } else {
147 LLVM_DEBUG(dbgs() << "ARM Loops: Failed to find loop preheader!\n"
148 << " - Performing manual predecessor search.\n");
149 MachineBasicBlock *Pred = nullptr;
150 for (auto *MBB : ML->getHeader()->predecessors()) {
151 if (!ML->contains(MBB)) {
152 if (Pred) {
153 LLVM_DEBUG(dbgs() << " - Found multiple out-of-loop preds.\n");
154 Start = nullptr;
155 break;
157 Pred = MBB;
158 Start = SearchForStart(MBB);
163 // Find the low-overhead loop components and decide whether or not to fall
164 // back to a normal loop.
165 for (auto *MBB : reverse(ML->getBlocks())) {
166 for (auto &MI : *MBB) {
167 if (MI.getOpcode() == ARM::t2LoopDec)
168 Dec = &MI;
169 else if (MI.getOpcode() == ARM::t2LoopEnd)
170 End = &MI;
171 else if (IsLoopStart(MI))
172 Start = &MI;
173 else if (MI.getDesc().isCall())
174 // TODO: Though the call will require LE to execute again, does this
175 // mean we should revert? Always executing LE hopefully should be
176 // faster than performing a sub,cmp,br or even subs,br.
177 Revert = true;
179 if (!Dec)
180 continue;
182 // If we find that we load/store LR between LoopDec and LoopEnd, expect
183 // that the decremented value has been spilled to the stack. Because
184 // this value isn't actually going to be produced until the latch, by LE,
185 // we would need to generate a real sub. The value is also likely to be
186 // reloaded for use of LoopEnd - in which in case we'd need to perform
187 // an add because it gets negated again by LE! The other option is to
188 // then generate the other form of LE which doesn't perform the sub.
189 if (MI.mayLoad() || MI.mayStore())
190 Revert =
191 MI.getOperand(0).isReg() && MI.getOperand(0).getReg() == ARM::LR;
194 if (Dec && End && Revert)
195 break;
198 LLVM_DEBUG(if (Start) dbgs() << "ARM Loops: Found Loop Start: " << *Start;
199 if (Dec) dbgs() << "ARM Loops: Found Loop Dec: " << *Dec;
200 if (End) dbgs() << "ARM Loops: Found Loop End: " << *End;);
202 if (!Start && !Dec && !End) {
203 LLVM_DEBUG(dbgs() << "ARM Loops: Not a low-overhead loop.\n");
204 return Changed;
205 } else if (!(Start && Dec && End)) {
206 LLVM_DEBUG(dbgs() << "ARM Loops: Failed to find all loop components.\n");
207 return false;
210 if (!End->getOperand(1).isMBB())
211 report_fatal_error("Expected LoopEnd to target basic block");
213 // TODO Maybe there's cases where the target doesn't have to be the header,
214 // but for now be safe and revert.
215 if (End->getOperand(1).getMBB() != ML->getHeader()) {
216 LLVM_DEBUG(dbgs() << "ARM Loops: LoopEnd is not targetting header.\n");
217 Revert = true;
220 // The WLS and LE instructions have 12-bits for the label offset. WLS
221 // requires a positive offset, while LE uses negative.
222 if (BBUtils->getOffsetOf(End) < BBUtils->getOffsetOf(ML->getHeader()) ||
223 !BBUtils->isBBInRange(End, ML->getHeader(), 4094)) {
224 LLVM_DEBUG(dbgs() << "ARM Loops: LE offset is out-of-range\n");
225 Revert = true;
227 if (Start->getOpcode() == ARM::t2WhileLoopStart &&
228 (BBUtils->getOffsetOf(Start) >
229 BBUtils->getOffsetOf(Start->getOperand(1).getMBB()) ||
230 !BBUtils->isBBInRange(Start, Start->getOperand(1).getMBB(), 4094))) {
231 LLVM_DEBUG(dbgs() << "ARM Loops: WLS offset is out-of-range!\n");
232 Revert = true;
235 Expand(ML, Start, Dec, End, Revert);
236 return true;
239 // WhileLoopStart holds the exit block, so produce a cmp lr, 0 and then a
240 // beq that branches to the exit branch.
241 // FIXME: Need to check that we're not trashing the CPSR when generating the
242 // cmp. We could also try to generate a cbz if the value in LR is also in
243 // another low register.
244 void ARMLowOverheadLoops::RevertWhile(MachineInstr *MI) const {
245 LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to cmp: " << *MI);
246 MachineBasicBlock *MBB = MI->getParent();
247 MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI->getDebugLoc(),
248 TII->get(ARM::t2CMPri));
249 MIB.addReg(ARM::LR);
250 MIB.addImm(0);
251 MIB.addImm(ARMCC::AL);
252 MIB.addReg(ARM::CPSR);
254 // TODO: Try to use tBcc instead
255 MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2Bcc));
256 MIB.add(MI->getOperand(1)); // branch target
257 MIB.addImm(ARMCC::EQ); // condition code
258 MIB.addReg(ARM::CPSR);
259 MI->eraseFromParent();
262 // TODO: Check flags so that we can possibly generate a tSubs or tSub.
263 void ARMLowOverheadLoops::RevertLoopDec(MachineInstr *MI) const {
264 LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to sub: " << *MI);
265 MachineBasicBlock *MBB = MI->getParent();
266 MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI->getDebugLoc(),
267 TII->get(ARM::t2SUBri));
268 MIB.addDef(ARM::LR);
269 MIB.add(MI->getOperand(1));
270 MIB.add(MI->getOperand(2));
271 MIB.addImm(ARMCC::AL);
272 MIB.addReg(0);
273 MIB.addReg(0);
274 MI->eraseFromParent();
277 // Generate a subs, or sub and cmp, and a branch instead of an LE.
278 // FIXME: Need to check that we're not trashing the CPSR when generating
279 // the cmp.
280 void ARMLowOverheadLoops::RevertLoopEnd(MachineInstr *MI) const {
281 LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to cmp, br: " << *MI);
283 // Create cmp
284 MachineBasicBlock *MBB = MI->getParent();
285 MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI->getDebugLoc(),
286 TII->get(ARM::t2CMPri));
287 MIB.addReg(ARM::LR);
288 MIB.addImm(0);
289 MIB.addImm(ARMCC::AL);
290 MIB.addReg(ARM::CPSR);
292 // TODO Try to use tBcc instead.
293 // Create bne
294 MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2Bcc));
295 MIB.add(MI->getOperand(1)); // branch target
296 MIB.addImm(ARMCC::NE); // condition code
297 MIB.addReg(ARM::CPSR);
298 MI->eraseFromParent();
301 void ARMLowOverheadLoops::Expand(MachineLoop *ML, MachineInstr *Start,
302 MachineInstr *Dec, MachineInstr *End,
303 bool Revert) {
305 auto ExpandLoopStart = [this](MachineLoop *ML, MachineInstr *Start) {
306 // The trip count should already been held in LR since the instructions
307 // within the loop can only read and write to LR. So, there should be a
308 // mov to setup the count. WLS/DLS perform this move, so find the original
309 // and delete it - inserting WLS/DLS in its place.
310 MachineBasicBlock *MBB = Start->getParent();
311 MachineInstr *InsertPt = Start;
312 for (auto &I : MRI->def_instructions(ARM::LR)) {
313 if (I.getParent() != MBB)
314 continue;
316 // Always execute.
317 if (!I.getOperand(2).isImm() || I.getOperand(2).getImm() != ARMCC::AL)
318 continue;
320 // Only handle move reg, if the trip count it will need moving into a reg
321 // before the setup instruction anyway.
322 if (!I.getDesc().isMoveReg() ||
323 !I.getOperand(1).isIdenticalTo(Start->getOperand(0)))
324 continue;
325 InsertPt = &I;
326 break;
329 unsigned Opc = Start->getOpcode() == ARM::t2DoLoopStart ?
330 ARM::t2DLS : ARM::t2WLS;
331 MachineInstrBuilder MIB =
332 BuildMI(*MBB, InsertPt, InsertPt->getDebugLoc(), TII->get(Opc));
334 MIB.addDef(ARM::LR);
335 MIB.add(Start->getOperand(0));
336 if (Opc == ARM::t2WLS)
337 MIB.add(Start->getOperand(1));
339 if (InsertPt != Start)
340 InsertPt->eraseFromParent();
341 Start->eraseFromParent();
342 LLVM_DEBUG(dbgs() << "ARM Loops: Inserted start: " << *MIB);
343 return &*MIB;
346 // Combine the LoopDec and LoopEnd instructions into LE(TP).
347 auto ExpandLoopEnd = [this](MachineLoop *ML, MachineInstr *Dec,
348 MachineInstr *End) {
349 MachineBasicBlock *MBB = End->getParent();
350 MachineInstrBuilder MIB = BuildMI(*MBB, End, End->getDebugLoc(),
351 TII->get(ARM::t2LEUpdate));
352 MIB.addDef(ARM::LR);
353 MIB.add(End->getOperand(0));
354 MIB.add(End->getOperand(1));
355 LLVM_DEBUG(dbgs() << "ARM Loops: Inserted LE: " << *MIB);
357 End->eraseFromParent();
358 Dec->eraseFromParent();
359 return &*MIB;
362 // TODO: We should be able to automatically remove these branches before we
363 // get here - probably by teaching analyzeBranch about the pseudo
364 // instructions.
365 // If there is an unconditional branch, after I, that just branches to the
366 // next block, remove it.
367 auto RemoveDeadBranch = [](MachineInstr *I) {
368 MachineBasicBlock *BB = I->getParent();
369 MachineInstr *Terminator = &BB->instr_back();
370 if (Terminator->isUnconditionalBranch() && I != Terminator) {
371 MachineBasicBlock *Succ = Terminator->getOperand(0).getMBB();
372 if (BB->isLayoutSuccessor(Succ)) {
373 LLVM_DEBUG(dbgs() << "ARM Loops: Removing branch: " << *Terminator);
374 Terminator->eraseFromParent();
379 if (Revert) {
380 if (Start->getOpcode() == ARM::t2WhileLoopStart)
381 RevertWhile(Start);
382 else
383 Start->eraseFromParent();
384 RevertLoopDec(Dec);
385 RevertLoopEnd(End);
386 } else {
387 Start = ExpandLoopStart(ML, Start);
388 RemoveDeadBranch(Start);
389 End = ExpandLoopEnd(ML, Dec, End);
390 RemoveDeadBranch(End);
394 bool ARMLowOverheadLoops::RevertNonLoops(MachineFunction &MF) {
395 LLVM_DEBUG(dbgs() << "ARM Loops: Reverting any remaining pseudos...\n");
396 bool Changed = false;
398 for (auto &MBB : MF) {
399 SmallVector<MachineInstr*, 4> Starts;
400 SmallVector<MachineInstr*, 4> Decs;
401 SmallVector<MachineInstr*, 4> Ends;
403 for (auto &I : MBB) {
404 if (IsLoopStart(I))
405 Starts.push_back(&I);
406 else if (I.getOpcode() == ARM::t2LoopDec)
407 Decs.push_back(&I);
408 else if (I.getOpcode() == ARM::t2LoopEnd)
409 Ends.push_back(&I);
412 if (Starts.empty() && Decs.empty() && Ends.empty())
413 continue;
415 Changed = true;
417 for (auto *Start : Starts) {
418 if (Start->getOpcode() == ARM::t2WhileLoopStart)
419 RevertWhile(Start);
420 else
421 Start->eraseFromParent();
423 for (auto *Dec : Decs)
424 RevertLoopDec(Dec);
426 for (auto *End : Ends)
427 RevertLoopEnd(End);
429 return Changed;
432 FunctionPass *llvm::createARMLowOverheadLoopsPass() {
433 return new ARMLowOverheadLoops();