1 //===-- ARMLowOverheadLoops.cpp - CodeGen Low-overhead Loops ---*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 /// Finalize v8.1-m low-overhead loops by converting the associated pseudo
10 /// instructions into machine operations.
11 /// The expectation is that the loop contains three pseudo instructions:
12 /// - t2*LoopStart - placed in the preheader or pre-preheader. The do-loop
13 /// form should be in the preheader, whereas the while form should be in the
14 /// preheaders only predecessor.
15 /// - t2LoopDec - placed within in the loop body.
16 /// - t2LoopEnd - the loop latch terminator.
18 //===----------------------------------------------------------------------===//
21 #include "ARMBaseInstrInfo.h"
22 #include "ARMBaseRegisterInfo.h"
23 #include "ARMBasicBlockInfo.h"
24 #include "ARMSubtarget.h"
25 #include "llvm/CodeGen/MachineFunctionPass.h"
26 #include "llvm/CodeGen/MachineLoopInfo.h"
27 #include "llvm/CodeGen/MachineRegisterInfo.h"
31 #define DEBUG_TYPE "arm-low-overhead-loops"
32 #define ARM_LOW_OVERHEAD_LOOPS_NAME "ARM Low Overhead Loops pass"
36 class ARMLowOverheadLoops
: public MachineFunctionPass
{
37 MachineFunction
*MF
= nullptr;
38 const ARMBaseInstrInfo
*TII
= nullptr;
39 MachineRegisterInfo
*MRI
= nullptr;
40 std::unique_ptr
<ARMBasicBlockUtils
> BBUtils
= nullptr;
45 ARMLowOverheadLoops() : MachineFunctionPass(ID
) { }
47 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
49 AU
.addRequired
<MachineLoopInfo
>();
50 MachineFunctionPass::getAnalysisUsage(AU
);
53 bool runOnMachineFunction(MachineFunction
&MF
) override
;
55 MachineFunctionProperties
getRequiredProperties() const override
{
56 return MachineFunctionProperties().set(
57 MachineFunctionProperties::Property::NoVRegs
);
60 StringRef
getPassName() const override
{
61 return ARM_LOW_OVERHEAD_LOOPS_NAME
;
65 bool ProcessLoop(MachineLoop
*ML
);
67 MachineInstr
* IsSafeToDefineLR(MachineInstr
*MI
);
69 bool RevertNonLoops();
71 void RevertWhile(MachineInstr
*MI
) const;
73 bool RevertLoopDec(MachineInstr
*MI
, bool AllowFlags
= false) const;
75 void RevertLoopEnd(MachineInstr
*MI
, bool SkipCmp
= false) const;
77 void Expand(MachineLoop
*ML
, MachineInstr
*Start
,
78 MachineInstr
*InsertPt
, MachineInstr
*Dec
,
79 MachineInstr
*End
, bool Revert
);
84 char ARMLowOverheadLoops::ID
= 0;
86 INITIALIZE_PASS(ARMLowOverheadLoops
, DEBUG_TYPE
, ARM_LOW_OVERHEAD_LOOPS_NAME
,
89 bool ARMLowOverheadLoops::runOnMachineFunction(MachineFunction
&mf
) {
90 const ARMSubtarget
&ST
= static_cast<const ARMSubtarget
&>(mf
.getSubtarget());
95 LLVM_DEBUG(dbgs() << "ARM Loops on " << MF
->getName() << " ------------- \n");
97 auto &MLI
= getAnalysis
<MachineLoopInfo
>();
98 MF
->getProperties().set(MachineFunctionProperties::Property::TracksLiveness
);
99 MRI
= &MF
->getRegInfo();
100 TII
= static_cast<const ARMBaseInstrInfo
*>(ST
.getInstrInfo());
101 BBUtils
= std::unique_ptr
<ARMBasicBlockUtils
>(new ARMBasicBlockUtils(*MF
));
102 BBUtils
->computeAllBlockSizes();
103 BBUtils
->adjustBBOffsetsAfter(&MF
->front());
105 bool Changed
= false;
106 for (auto ML
: MLI
) {
107 if (!ML
->getParentLoop())
108 Changed
|= ProcessLoop(ML
);
110 Changed
|= RevertNonLoops();
114 static bool IsLoopStart(MachineInstr
&MI
) {
115 return MI
.getOpcode() == ARM::t2DoLoopStart
||
116 MI
.getOpcode() == ARM::t2WhileLoopStart
;
120 static MachineInstr
* SearchForDef(MachineInstr
*Begin
, T End
, unsigned Reg
) {
121 for(auto &MI
: make_range(T(Begin
), End
)) {
122 for (auto &MO
: MI
.operands()) {
123 if (!MO
.isReg() || !MO
.isDef() || MO
.getReg() != Reg
)
131 static MachineInstr
* SearchForUse(MachineInstr
*Begin
,
132 MachineBasicBlock::iterator End
,
134 for(auto &MI
: make_range(MachineBasicBlock::iterator(Begin
), End
)) {
135 for (auto &MO
: MI
.operands()) {
136 if (!MO
.isReg() || !MO
.isUse() || MO
.getReg() != Reg
)
144 // Is it safe to define LR with DLS/WLS?
145 // LR can defined if it is the operand to start, because it's the same value,
146 // or if it's going to be equivalent to the operand to Start.
147 MachineInstr
*ARMLowOverheadLoops::IsSafeToDefineLR(MachineInstr
*Start
) {
149 auto IsMoveLR
= [](MachineInstr
*MI
, unsigned Reg
) {
150 return MI
->getOpcode() == ARM::tMOVr
&&
151 MI
->getOperand(0).getReg() == ARM::LR
&&
152 MI
->getOperand(1).getReg() == Reg
&&
153 MI
->getOperand(2).getImm() == ARMCC::AL
;
156 MachineBasicBlock
*MBB
= Start
->getParent();
157 unsigned CountReg
= Start
->getOperand(0).getReg();
158 // Walk forward and backward in the block to find the closest instructions
159 // that define LR. Then also filter them out if they're not a mov lr.
160 MachineInstr
*PredLRDef
= SearchForDef(Start
, MBB
->rend(), ARM::LR
);
161 if (PredLRDef
&& !IsMoveLR(PredLRDef
, CountReg
))
164 MachineInstr
*SuccLRDef
= SearchForDef(Start
, MBB
->end(), ARM::LR
);
165 if (SuccLRDef
&& !IsMoveLR(SuccLRDef
, CountReg
))
168 // We've either found one, two or none mov lr instructions... Now figure out
169 // if they are performing the equilvant mov that the Start instruction will.
170 // Do this by scanning forward and backward to see if there's a def of the
171 // register holding the count value. If we find a suitable def, return it as
172 // the insert point. Later, if InsertPt != Start, then we can remove the
173 // redundant instruction.
175 MachineBasicBlock::iterator
End(SuccLRDef
);
176 if (!SearchForDef(Start
, End
, CountReg
)) {
182 MachineBasicBlock::reverse_iterator
End(PredLRDef
);
183 if (!SearchForDef(Start
, End
, CountReg
)) {
189 // We can define LR because LR already contains the same value.
190 if (Start
->getOperand(0).getReg() == ARM::LR
)
193 // We've found no suitable LR def and Start doesn't use LR directly. Can we
194 // just define LR anyway?
195 const TargetRegisterInfo
*TRI
= MF
->getSubtarget().getRegisterInfo();
196 LivePhysRegs
LiveRegs(*TRI
);
197 LiveRegs
.addLiveOuts(*MBB
);
199 // Not if we've haven't found a suitable mov and LR is live out.
200 if (LiveRegs
.contains(ARM::LR
))
203 // If LR is not live out, we can insert the instruction if nothing else
205 if (!SearchForUse(Start
, MBB
->end(), ARM::LR
))
208 LLVM_DEBUG(dbgs() << "ARM Loops: Failed to find suitable insertion point for"
213 bool ARMLowOverheadLoops::ProcessLoop(MachineLoop
*ML
) {
215 bool Changed
= false;
217 // Process inner loops first.
218 for (auto I
= ML
->begin(), E
= ML
->end(); I
!= E
; ++I
)
219 Changed
|= ProcessLoop(*I
);
221 LLVM_DEBUG(dbgs() << "ARM Loops: Processing " << *ML
);
223 // Search the given block for a loop start instruction. If one isn't found,
224 // and there's only one predecessor block, search that one too.
225 std::function
<MachineInstr
*(MachineBasicBlock
*)> SearchForStart
=
226 [&SearchForStart
](MachineBasicBlock
*MBB
) -> MachineInstr
* {
227 for (auto &MI
: *MBB
) {
231 if (MBB
->pred_size() == 1)
232 return SearchForStart(*MBB
->pred_begin());
236 MachineInstr
*Start
= nullptr;
237 MachineInstr
*Dec
= nullptr;
238 MachineInstr
*End
= nullptr;
241 // Search the preheader for the start intrinsic, or look through the
242 // predecessors of the header to find exactly one set.iterations intrinsic.
243 // FIXME: I don't see why we shouldn't be supporting multiple predecessors
244 // with potentially multiple set.loop.iterations, so we need to enable this.
245 if (auto *Preheader
= ML
->getLoopPreheader()) {
246 Start
= SearchForStart(Preheader
);
248 LLVM_DEBUG(dbgs() << "ARM Loops: Failed to find loop preheader!\n"
249 << " - Performing manual predecessor search.\n");
250 MachineBasicBlock
*Pred
= nullptr;
251 for (auto *MBB
: ML
->getHeader()->predecessors()) {
252 if (!ML
->contains(MBB
)) {
254 LLVM_DEBUG(dbgs() << " - Found multiple out-of-loop preds.\n");
259 Start
= SearchForStart(MBB
);
264 // Find the low-overhead loop components and decide whether or not to fall
265 // back to a normal loop.
266 for (auto *MBB
: reverse(ML
->getBlocks())) {
267 for (auto &MI
: *MBB
) {
268 if (MI
.getOpcode() == ARM::t2LoopDec
)
270 else if (MI
.getOpcode() == ARM::t2LoopEnd
)
272 else if (IsLoopStart(MI
))
274 else if (MI
.getDesc().isCall()) {
275 // TODO: Though the call will require LE to execute again, does this
276 // mean we should revert? Always executing LE hopefully should be
277 // faster than performing a sub,cmp,br or even subs,br.
279 LLVM_DEBUG(dbgs() << "ARM Loops: Found call.\n");
285 // If we find that LR has been written or read between LoopDec and
286 // LoopEnd, expect that the decremented value is being used else where.
287 // Because this value isn't actually going to be produced until the
288 // latch, by LE, we would need to generate a real sub. The value is also
289 // likely to be copied/reloaded for use of LoopEnd - in which in case
290 // we'd need to perform an add because it gets subtracted again by LE!
291 // The other option is to then generate the other form of LE which doesn't
293 for (auto &MO
: MI
.operands()) {
294 if (MI
.getOpcode() != ARM::t2LoopDec
&& MO
.isReg() &&
295 MO
.getReg() == ARM::LR
) {
296 LLVM_DEBUG(dbgs() << "ARM Loops: Found LR Use/Def: " << MI
);
303 if (Dec
&& End
&& Revert
)
307 LLVM_DEBUG(if (Start
) dbgs() << "ARM Loops: Found Loop Start: " << *Start
;
308 if (Dec
) dbgs() << "ARM Loops: Found Loop Dec: " << *Dec
;
309 if (End
) dbgs() << "ARM Loops: Found Loop End: " << *End
;);
311 if (!Start
&& !Dec
&& !End
) {
312 LLVM_DEBUG(dbgs() << "ARM Loops: Not a low-overhead loop.\n");
314 } else if (!(Start
&& Dec
&& End
)) {
315 LLVM_DEBUG(dbgs() << "ARM Loops: Failed to find all loop components.\n");
319 if (!End
->getOperand(1).isMBB())
320 report_fatal_error("Expected LoopEnd to target basic block");
322 // TODO Maybe there's cases where the target doesn't have to be the header,
323 // but for now be safe and revert.
324 if (End
->getOperand(1).getMBB() != ML
->getHeader()) {
325 LLVM_DEBUG(dbgs() << "ARM Loops: LoopEnd is not targetting header.\n");
329 // The WLS and LE instructions have 12-bits for the label offset. WLS
330 // requires a positive offset, while LE uses negative.
331 if (BBUtils
->getOffsetOf(End
) < BBUtils
->getOffsetOf(ML
->getHeader()) ||
332 !BBUtils
->isBBInRange(End
, ML
->getHeader(), 4094)) {
333 LLVM_DEBUG(dbgs() << "ARM Loops: LE offset is out-of-range\n");
336 if (Start
->getOpcode() == ARM::t2WhileLoopStart
&&
337 (BBUtils
->getOffsetOf(Start
) >
338 BBUtils
->getOffsetOf(Start
->getOperand(1).getMBB()) ||
339 !BBUtils
->isBBInRange(Start
, Start
->getOperand(1).getMBB(), 4094))) {
340 LLVM_DEBUG(dbgs() << "ARM Loops: WLS offset is out-of-range!\n");
344 MachineInstr
*InsertPt
= Revert
? nullptr : IsSafeToDefineLR(Start
);
346 LLVM_DEBUG(dbgs() << "ARM Loops: Unable to find safe insertion point.\n");
349 LLVM_DEBUG(dbgs() << "ARM Loops: Start insertion point: " << *InsertPt
);
351 Expand(ML
, Start
, InsertPt
, Dec
, End
, Revert
);
355 // WhileLoopStart holds the exit block, so produce a cmp lr, 0 and then a
356 // beq that branches to the exit branch.
357 // TODO: We could also try to generate a cbz if the value in LR is also in
358 // another low register.
359 void ARMLowOverheadLoops::RevertWhile(MachineInstr
*MI
) const {
360 LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to cmp: " << *MI
);
361 MachineBasicBlock
*MBB
= MI
->getParent();
362 MachineInstrBuilder MIB
= BuildMI(*MBB
, MI
, MI
->getDebugLoc(),
363 TII
->get(ARM::t2CMPri
));
364 MIB
.add(MI
->getOperand(0));
366 MIB
.addImm(ARMCC::AL
);
367 MIB
.addReg(ARM::NoRegister
);
369 MachineBasicBlock
*DestBB
= MI
->getOperand(1).getMBB();
370 unsigned BrOpc
= BBUtils
->isBBInRange(MI
, DestBB
, 254) ?
371 ARM::tBcc
: ARM::t2Bcc
;
373 MIB
= BuildMI(*MBB
, MI
, MI
->getDebugLoc(), TII
->get(BrOpc
));
374 MIB
.add(MI
->getOperand(1)); // branch target
375 MIB
.addImm(ARMCC::EQ
); // condition code
376 MIB
.addReg(ARM::CPSR
);
377 MI
->eraseFromParent();
380 bool ARMLowOverheadLoops::RevertLoopDec(MachineInstr
*MI
,
381 bool AllowFlags
) const {
382 LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to sub: " << *MI
);
383 MachineBasicBlock
*MBB
= MI
->getParent();
385 // If nothing uses or defines CPSR between LoopDec and LoopEnd, use a t2SUBS.
386 bool SetFlags
= false;
388 if (auto *Def
= SearchForDef(MI
, MBB
->end(), ARM::CPSR
)) {
389 if (!SearchForUse(MI
, MBB
->end(), ARM::CPSR
) &&
390 Def
->getOpcode() == ARM::t2LoopEnd
)
395 MachineInstrBuilder MIB
= BuildMI(*MBB
, MI
, MI
->getDebugLoc(),
396 TII
->get(ARM::t2SUBri
));
398 MIB
.add(MI
->getOperand(1));
399 MIB
.add(MI
->getOperand(2));
400 MIB
.addImm(ARMCC::AL
);
404 MIB
.addReg(ARM::CPSR
);
405 MIB
->getOperand(5).setIsDef(true);
409 MI
->eraseFromParent();
413 // Generate a subs, or sub and cmp, and a branch instead of an LE.
414 void ARMLowOverheadLoops::RevertLoopEnd(MachineInstr
*MI
, bool SkipCmp
) const {
415 LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to cmp, br: " << *MI
);
417 MachineBasicBlock
*MBB
= MI
->getParent();
420 MachineInstrBuilder MIB
= BuildMI(*MBB
, MI
, MI
->getDebugLoc(),
421 TII
->get(ARM::t2CMPri
));
424 MIB
.addImm(ARMCC::AL
);
425 MIB
.addReg(ARM::NoRegister
);
428 MachineBasicBlock
*DestBB
= MI
->getOperand(1).getMBB();
429 unsigned BrOpc
= BBUtils
->isBBInRange(MI
, DestBB
, 254) ?
430 ARM::tBcc
: ARM::t2Bcc
;
433 MachineInstrBuilder MIB
=
434 BuildMI(*MBB
, MI
, MI
->getDebugLoc(), TII
->get(BrOpc
));
435 MIB
.add(MI
->getOperand(1)); // branch target
436 MIB
.addImm(ARMCC::NE
); // condition code
437 MIB
.addReg(ARM::CPSR
);
438 MI
->eraseFromParent();
441 void ARMLowOverheadLoops::Expand(MachineLoop
*ML
, MachineInstr
*Start
,
442 MachineInstr
*InsertPt
,
443 MachineInstr
*Dec
, MachineInstr
*End
,
446 auto ExpandLoopStart
= [this](MachineLoop
*ML
, MachineInstr
*Start
,
447 MachineInstr
*InsertPt
) {
448 MachineBasicBlock
*MBB
= InsertPt
->getParent();
449 unsigned Opc
= Start
->getOpcode() == ARM::t2DoLoopStart
?
450 ARM::t2DLS
: ARM::t2WLS
;
451 MachineInstrBuilder MIB
=
452 BuildMI(*MBB
, InsertPt
, InsertPt
->getDebugLoc(), TII
->get(Opc
));
455 MIB
.add(Start
->getOperand(0));
456 if (Opc
== ARM::t2WLS
)
457 MIB
.add(Start
->getOperand(1));
459 if (InsertPt
!= Start
)
460 InsertPt
->eraseFromParent();
461 Start
->eraseFromParent();
462 LLVM_DEBUG(dbgs() << "ARM Loops: Inserted start: " << *MIB
);
466 // Combine the LoopDec and LoopEnd instructions into LE(TP).
467 auto ExpandLoopEnd
= [this](MachineLoop
*ML
, MachineInstr
*Dec
,
469 MachineBasicBlock
*MBB
= End
->getParent();
470 MachineInstrBuilder MIB
= BuildMI(*MBB
, End
, End
->getDebugLoc(),
471 TII
->get(ARM::t2LEUpdate
));
473 MIB
.add(End
->getOperand(0));
474 MIB
.add(End
->getOperand(1));
475 LLVM_DEBUG(dbgs() << "ARM Loops: Inserted LE: " << *MIB
);
477 End
->eraseFromParent();
478 Dec
->eraseFromParent();
482 // TODO: We should be able to automatically remove these branches before we
483 // get here - probably by teaching analyzeBranch about the pseudo
485 // If there is an unconditional branch, after I, that just branches to the
486 // next block, remove it.
487 auto RemoveDeadBranch
= [](MachineInstr
*I
) {
488 MachineBasicBlock
*BB
= I
->getParent();
489 MachineInstr
*Terminator
= &BB
->instr_back();
490 if (Terminator
->isUnconditionalBranch() && I
!= Terminator
) {
491 MachineBasicBlock
*Succ
= Terminator
->getOperand(0).getMBB();
492 if (BB
->isLayoutSuccessor(Succ
)) {
493 LLVM_DEBUG(dbgs() << "ARM Loops: Removing branch: " << *Terminator
);
494 Terminator
->eraseFromParent();
500 if (Start
->getOpcode() == ARM::t2WhileLoopStart
)
503 Start
->eraseFromParent();
504 bool FlagsAlreadySet
= RevertLoopDec(Dec
, true);
505 RevertLoopEnd(End
, FlagsAlreadySet
);
507 Start
= ExpandLoopStart(ML
, Start
, InsertPt
);
508 RemoveDeadBranch(Start
);
509 End
= ExpandLoopEnd(ML
, Dec
, End
);
510 RemoveDeadBranch(End
);
514 bool ARMLowOverheadLoops::RevertNonLoops() {
515 LLVM_DEBUG(dbgs() << "ARM Loops: Reverting any remaining pseudos...\n");
516 bool Changed
= false;
518 for (auto &MBB
: *MF
) {
519 SmallVector
<MachineInstr
*, 4> Starts
;
520 SmallVector
<MachineInstr
*, 4> Decs
;
521 SmallVector
<MachineInstr
*, 4> Ends
;
523 for (auto &I
: MBB
) {
525 Starts
.push_back(&I
);
526 else if (I
.getOpcode() == ARM::t2LoopDec
)
528 else if (I
.getOpcode() == ARM::t2LoopEnd
)
532 if (Starts
.empty() && Decs
.empty() && Ends
.empty())
537 for (auto *Start
: Starts
) {
538 if (Start
->getOpcode() == ARM::t2WhileLoopStart
)
541 Start
->eraseFromParent();
543 for (auto *Dec
: Decs
)
546 for (auto *End
: Ends
)
552 FunctionPass
*llvm::createARMLowOverheadLoopsPass() {
553 return new ARMLowOverheadLoops();