[AMDGPU] Check for immediate SrcC in mfma in AsmParser
[llvm-core.git] / lib / Target / ARM / ARMLowOverheadLoops.cpp
blobcfd1f40871077f425392701b3546f3bf9a82fe2e
1 //===-- ARMLowOverheadLoops.cpp - CodeGen Low-overhead Loops ---*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// Finalize v8.1-m low-overhead loops by converting the associated pseudo
10 /// instructions into machine operations.
11 /// The expectation is that the loop contains three pseudo instructions:
12 /// - t2*LoopStart - placed in the preheader or pre-preheader. The do-loop
13 /// form should be in the preheader, whereas the while form should be in the
14 /// preheaders only predecessor.
15 /// - t2LoopDec - placed within in the loop body.
16 /// - t2LoopEnd - the loop latch terminator.
17 ///
18 //===----------------------------------------------------------------------===//
20 #include "ARM.h"
21 #include "ARMBaseInstrInfo.h"
22 #include "ARMBaseRegisterInfo.h"
23 #include "ARMBasicBlockInfo.h"
24 #include "ARMSubtarget.h"
25 #include "llvm/CodeGen/MachineFunctionPass.h"
26 #include "llvm/CodeGen/MachineLoopInfo.h"
27 #include "llvm/CodeGen/MachineRegisterInfo.h"
29 using namespace llvm;
31 #define DEBUG_TYPE "arm-low-overhead-loops"
32 #define ARM_LOW_OVERHEAD_LOOPS_NAME "ARM Low Overhead Loops pass"
34 namespace {
36 class ARMLowOverheadLoops : public MachineFunctionPass {
37 const ARMBaseInstrInfo *TII = nullptr;
38 MachineRegisterInfo *MRI = nullptr;
39 std::unique_ptr<ARMBasicBlockUtils> BBUtils = nullptr;
41 public:
42 static char ID;
44 ARMLowOverheadLoops() : MachineFunctionPass(ID) { }
46 void getAnalysisUsage(AnalysisUsage &AU) const override {
47 AU.setPreservesCFG();
48 AU.addRequired<MachineLoopInfo>();
49 MachineFunctionPass::getAnalysisUsage(AU);
52 bool runOnMachineFunction(MachineFunction &MF) override;
54 bool ProcessLoop(MachineLoop *ML);
56 bool RevertNonLoops(MachineFunction &MF);
58 void RevertWhile(MachineInstr *MI) const;
60 void RevertLoopDec(MachineInstr *MI) const;
62 void RevertLoopEnd(MachineInstr *MI) const;
64 void Expand(MachineLoop *ML, MachineInstr *Start,
65 MachineInstr *Dec, MachineInstr *End, bool Revert);
67 MachineFunctionProperties getRequiredProperties() const override {
68 return MachineFunctionProperties().set(
69 MachineFunctionProperties::Property::NoVRegs);
72 StringRef getPassName() const override {
73 return ARM_LOW_OVERHEAD_LOOPS_NAME;
78 char ARMLowOverheadLoops::ID = 0;
80 INITIALIZE_PASS(ARMLowOverheadLoops, DEBUG_TYPE, ARM_LOW_OVERHEAD_LOOPS_NAME,
81 false, false)
83 bool ARMLowOverheadLoops::runOnMachineFunction(MachineFunction &MF) {
84 if (!static_cast<const ARMSubtarget&>(MF.getSubtarget()).hasLOB())
85 return false;
87 LLVM_DEBUG(dbgs() << "ARM Loops on " << MF.getName() << " ------------- \n");
89 auto &MLI = getAnalysis<MachineLoopInfo>();
90 MRI = &MF.getRegInfo();
91 TII = static_cast<const ARMBaseInstrInfo*>(
92 MF.getSubtarget().getInstrInfo());
93 BBUtils = std::unique_ptr<ARMBasicBlockUtils>(new ARMBasicBlockUtils(MF));
94 BBUtils->computeAllBlockSizes();
95 BBUtils->adjustBBOffsetsAfter(&MF.front());
97 bool Changed = false;
98 for (auto ML : MLI) {
99 if (!ML->getParentLoop())
100 Changed |= ProcessLoop(ML);
102 Changed |= RevertNonLoops(MF);
103 return Changed;
106 static bool IsLoopStart(MachineInstr &MI) {
107 return MI.getOpcode() == ARM::t2DoLoopStart ||
108 MI.getOpcode() == ARM::t2WhileLoopStart;
111 bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) {
113 bool Changed = false;
115 // Process inner loops first.
116 for (auto I = ML->begin(), E = ML->end(); I != E; ++I)
117 Changed |= ProcessLoop(*I);
119 LLVM_DEBUG(dbgs() << "ARM Loops: Processing " << *ML);
121 // Search the given block for a loop start instruction. If one isn't found,
122 // and there's only one predecessor block, search that one too.
123 std::function<MachineInstr*(MachineBasicBlock*)> SearchForStart =
124 [&SearchForStart](MachineBasicBlock *MBB) -> MachineInstr* {
125 for (auto &MI : *MBB) {
126 if (IsLoopStart(MI))
127 return &MI;
129 if (MBB->pred_size() == 1)
130 return SearchForStart(*MBB->pred_begin());
131 return nullptr;
134 MachineInstr *Start = nullptr;
135 MachineInstr *Dec = nullptr;
136 MachineInstr *End = nullptr;
137 bool Revert = false;
139 // Search the preheader for the start intrinsic, or look through the
140 // predecessors of the header to find exactly one set.iterations intrinsic.
141 // FIXME: I don't see why we shouldn't be supporting multiple predecessors
142 // with potentially multiple set.loop.iterations, so we need to enable this.
143 if (auto *Preheader = ML->getLoopPreheader()) {
144 Start = SearchForStart(Preheader);
145 } else {
146 LLVM_DEBUG(dbgs() << "ARM Loops: Failed to find loop preheader!\n"
147 << " - Performing manual predecessor search.\n");
148 MachineBasicBlock *Pred = nullptr;
149 for (auto *MBB : ML->getHeader()->predecessors()) {
150 if (!ML->contains(MBB)) {
151 if (Pred) {
152 LLVM_DEBUG(dbgs() << " - Found multiple out-of-loop preds.\n");
153 Start = nullptr;
154 break;
156 Pred = MBB;
157 Start = SearchForStart(MBB);
162 // Find the low-overhead loop components and decide whether or not to fall
163 // back to a normal loop.
164 for (auto *MBB : reverse(ML->getBlocks())) {
165 for (auto &MI : *MBB) {
166 if (MI.getOpcode() == ARM::t2LoopDec)
167 Dec = &MI;
168 else if (MI.getOpcode() == ARM::t2LoopEnd)
169 End = &MI;
170 else if (IsLoopStart(MI))
171 Start = &MI;
172 else if (MI.getDesc().isCall())
173 // TODO: Though the call will require LE to execute again, does this
174 // mean we should revert? Always executing LE hopefully should be
175 // faster than performing a sub,cmp,br or even subs,br.
176 Revert = true;
178 if (!Dec || End)
179 continue;
181 // If we find that LR has been written or read between LoopDec and
182 // LoopEnd, expect that the decremented value is being used else where.
183 // Because this value isn't actually going to be produced until the
184 // latch, by LE, we would need to generate a real sub. The value is also
185 // likely to be copied/reloaded for use of LoopEnd - in which in case
186 // we'd need to perform an add because it gets subtracted again by LE!
187 // The other option is to then generate the other form of LE which doesn't
188 // perform the sub.
189 for (auto &MO : MI.operands()) {
190 if (MI.getOpcode() != ARM::t2LoopDec && MO.isReg() &&
191 MO.getReg() == ARM::LR) {
192 LLVM_DEBUG(dbgs() << "ARM Loops: Found LR Use/Def: " << MI);
193 Revert = true;
194 break;
199 if (Dec && End && Revert)
200 break;
203 LLVM_DEBUG(if (Start) dbgs() << "ARM Loops: Found Loop Start: " << *Start;
204 if (Dec) dbgs() << "ARM Loops: Found Loop Dec: " << *Dec;
205 if (End) dbgs() << "ARM Loops: Found Loop End: " << *End;);
207 if (!Start && !Dec && !End) {
208 LLVM_DEBUG(dbgs() << "ARM Loops: Not a low-overhead loop.\n");
209 return Changed;
210 } else if (!(Start && Dec && End)) {
211 LLVM_DEBUG(dbgs() << "ARM Loops: Failed to find all loop components.\n");
212 return false;
215 if (!End->getOperand(1).isMBB())
216 report_fatal_error("Expected LoopEnd to target basic block");
218 // TODO Maybe there's cases where the target doesn't have to be the header,
219 // but for now be safe and revert.
220 if (End->getOperand(1).getMBB() != ML->getHeader()) {
221 LLVM_DEBUG(dbgs() << "ARM Loops: LoopEnd is not targetting header.\n");
222 Revert = true;
225 // The WLS and LE instructions have 12-bits for the label offset. WLS
226 // requires a positive offset, while LE uses negative.
227 if (BBUtils->getOffsetOf(End) < BBUtils->getOffsetOf(ML->getHeader()) ||
228 !BBUtils->isBBInRange(End, ML->getHeader(), 4094)) {
229 LLVM_DEBUG(dbgs() << "ARM Loops: LE offset is out-of-range\n");
230 Revert = true;
232 if (Start->getOpcode() == ARM::t2WhileLoopStart &&
233 (BBUtils->getOffsetOf(Start) >
234 BBUtils->getOffsetOf(Start->getOperand(1).getMBB()) ||
235 !BBUtils->isBBInRange(Start, Start->getOperand(1).getMBB(), 4094))) {
236 LLVM_DEBUG(dbgs() << "ARM Loops: WLS offset is out-of-range!\n");
237 Revert = true;
240 Expand(ML, Start, Dec, End, Revert);
241 return true;
244 // WhileLoopStart holds the exit block, so produce a cmp lr, 0 and then a
245 // beq that branches to the exit branch.
246 // FIXME: Need to check that we're not trashing the CPSR when generating the
247 // cmp. We could also try to generate a cbz if the value in LR is also in
248 // another low register.
249 void ARMLowOverheadLoops::RevertWhile(MachineInstr *MI) const {
250 LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to cmp: " << *MI);
251 MachineBasicBlock *MBB = MI->getParent();
252 MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI->getDebugLoc(),
253 TII->get(ARM::t2CMPri));
254 MIB.add(MI->getOperand(0));
255 MIB.addImm(0);
256 MIB.addImm(ARMCC::AL);
257 MIB.addReg(ARM::NoRegister);
259 // TODO: Try to use tBcc instead
260 MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2Bcc));
261 MIB.add(MI->getOperand(1)); // branch target
262 MIB.addImm(ARMCC::EQ); // condition code
263 MIB.addReg(ARM::CPSR);
264 MI->eraseFromParent();
267 // TODO: Check flags so that we can possibly generate a tSubs or tSub.
268 void ARMLowOverheadLoops::RevertLoopDec(MachineInstr *MI) const {
269 LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to sub: " << *MI);
270 MachineBasicBlock *MBB = MI->getParent();
271 MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI->getDebugLoc(),
272 TII->get(ARM::t2SUBri));
273 MIB.addDef(ARM::LR);
274 MIB.add(MI->getOperand(1));
275 MIB.add(MI->getOperand(2));
276 MIB.addImm(ARMCC::AL);
277 MIB.addReg(0);
278 MIB.addReg(0);
279 MI->eraseFromParent();
282 // Generate a subs, or sub and cmp, and a branch instead of an LE.
283 // FIXME: Need to check that we're not trashing the CPSR when generating
284 // the cmp.
285 void ARMLowOverheadLoops::RevertLoopEnd(MachineInstr *MI) const {
286 LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to cmp, br: " << *MI);
288 // Create cmp
289 MachineBasicBlock *MBB = MI->getParent();
290 MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI->getDebugLoc(),
291 TII->get(ARM::t2CMPri));
292 MIB.addReg(ARM::LR);
293 MIB.addImm(0);
294 MIB.addImm(ARMCC::AL);
295 MIB.addReg(ARM::NoRegister);
297 // TODO Try to use tBcc instead.
298 // Create bne
299 MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2Bcc));
300 MIB.add(MI->getOperand(1)); // branch target
301 MIB.addImm(ARMCC::NE); // condition code
302 MIB.addReg(ARM::CPSR);
303 MI->eraseFromParent();
306 void ARMLowOverheadLoops::Expand(MachineLoop *ML, MachineInstr *Start,
307 MachineInstr *Dec, MachineInstr *End,
308 bool Revert) {
310 auto ExpandLoopStart = [this](MachineLoop *ML, MachineInstr *Start) {
311 // The trip count should already been held in LR since the instructions
312 // within the loop can only read and write to LR. So, there should be a
313 // mov to setup the count. WLS/DLS perform this move, so find the original
314 // and delete it - inserting WLS/DLS in its place.
315 MachineBasicBlock *MBB = Start->getParent();
316 MachineInstr *InsertPt = Start;
317 for (auto &I : MRI->def_instructions(ARM::LR)) {
318 if (I.getParent() != MBB)
319 continue;
321 // Always execute.
322 if (!I.getOperand(2).isImm() || I.getOperand(2).getImm() != ARMCC::AL)
323 continue;
325 // Only handle move reg, if the trip count it will need moving into a reg
326 // before the setup instruction anyway.
327 if (!I.getDesc().isMoveReg() ||
328 !I.getOperand(1).isIdenticalTo(Start->getOperand(0)))
329 continue;
330 InsertPt = &I;
331 break;
334 unsigned Opc = Start->getOpcode() == ARM::t2DoLoopStart ?
335 ARM::t2DLS : ARM::t2WLS;
336 MachineInstrBuilder MIB =
337 BuildMI(*MBB, InsertPt, InsertPt->getDebugLoc(), TII->get(Opc));
339 MIB.addDef(ARM::LR);
340 MIB.add(Start->getOperand(0));
341 if (Opc == ARM::t2WLS)
342 MIB.add(Start->getOperand(1));
344 if (InsertPt != Start)
345 InsertPt->eraseFromParent();
346 Start->eraseFromParent();
347 LLVM_DEBUG(dbgs() << "ARM Loops: Inserted start: " << *MIB);
348 return &*MIB;
351 // Combine the LoopDec and LoopEnd instructions into LE(TP).
352 auto ExpandLoopEnd = [this](MachineLoop *ML, MachineInstr *Dec,
353 MachineInstr *End) {
354 MachineBasicBlock *MBB = End->getParent();
355 MachineInstrBuilder MIB = BuildMI(*MBB, End, End->getDebugLoc(),
356 TII->get(ARM::t2LEUpdate));
357 MIB.addDef(ARM::LR);
358 MIB.add(End->getOperand(0));
359 MIB.add(End->getOperand(1));
360 LLVM_DEBUG(dbgs() << "ARM Loops: Inserted LE: " << *MIB);
362 End->eraseFromParent();
363 Dec->eraseFromParent();
364 return &*MIB;
367 // TODO: We should be able to automatically remove these branches before we
368 // get here - probably by teaching analyzeBranch about the pseudo
369 // instructions.
370 // If there is an unconditional branch, after I, that just branches to the
371 // next block, remove it.
372 auto RemoveDeadBranch = [](MachineInstr *I) {
373 MachineBasicBlock *BB = I->getParent();
374 MachineInstr *Terminator = &BB->instr_back();
375 if (Terminator->isUnconditionalBranch() && I != Terminator) {
376 MachineBasicBlock *Succ = Terminator->getOperand(0).getMBB();
377 if (BB->isLayoutSuccessor(Succ)) {
378 LLVM_DEBUG(dbgs() << "ARM Loops: Removing branch: " << *Terminator);
379 Terminator->eraseFromParent();
384 if (Revert) {
385 if (Start->getOpcode() == ARM::t2WhileLoopStart)
386 RevertWhile(Start);
387 else
388 Start->eraseFromParent();
389 RevertLoopDec(Dec);
390 RevertLoopEnd(End);
391 } else {
392 Start = ExpandLoopStart(ML, Start);
393 RemoveDeadBranch(Start);
394 End = ExpandLoopEnd(ML, Dec, End);
395 RemoveDeadBranch(End);
399 bool ARMLowOverheadLoops::RevertNonLoops(MachineFunction &MF) {
400 LLVM_DEBUG(dbgs() << "ARM Loops: Reverting any remaining pseudos...\n");
401 bool Changed = false;
403 for (auto &MBB : MF) {
404 SmallVector<MachineInstr*, 4> Starts;
405 SmallVector<MachineInstr*, 4> Decs;
406 SmallVector<MachineInstr*, 4> Ends;
408 for (auto &I : MBB) {
409 if (IsLoopStart(I))
410 Starts.push_back(&I);
411 else if (I.getOpcode() == ARM::t2LoopDec)
412 Decs.push_back(&I);
413 else if (I.getOpcode() == ARM::t2LoopEnd)
414 Ends.push_back(&I);
417 if (Starts.empty() && Decs.empty() && Ends.empty())
418 continue;
420 Changed = true;
422 for (auto *Start : Starts) {
423 if (Start->getOpcode() == ARM::t2WhileLoopStart)
424 RevertWhile(Start);
425 else
426 Start->eraseFromParent();
428 for (auto *Dec : Decs)
429 RevertLoopDec(Dec);
431 for (auto *End : Ends)
432 RevertLoopEnd(End);
434 return Changed;
437 FunctionPass *llvm::createARMLowOverheadLoopsPass() {
438 return new ARMLowOverheadLoops();