1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains a pass that performs load / store related peephole
11 // optimizations. This pass should be run after register allocation.
13 //===----------------------------------------------------------------------===//
15 #define DEBUG_TYPE "arm-ldst-opt"
17 #include "ARMAddressingModes.h"
18 #include "ARMMachineFunctionInfo.h"
19 #include "ARMRegisterInfo.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/SmallVector.h"
22 #include "llvm/ADT/Statistic.h"
23 #include "llvm/CodeGen/MachineBasicBlock.h"
24 #include "llvm/CodeGen/MachineFunctionPass.h"
25 #include "llvm/CodeGen/MachineInstr.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/RegisterScavenging.h"
28 #include "llvm/Support/Compiler.h"
29 #include "llvm/Target/TargetRegisterInfo.h"
30 #include "llvm/Target/TargetInstrInfo.h"
31 #include "llvm/Target/TargetMachine.h"
34 STATISTIC(NumLDMGened
, "Number of ldm instructions generated");
35 STATISTIC(NumSTMGened
, "Number of stm instructions generated");
36 STATISTIC(NumFLDMGened
, "Number of fldm instructions generated");
37 STATISTIC(NumFSTMGened
, "Number of fstm instructions generated");
40 struct VISIBILITY_HIDDEN ARMLoadStoreOpt
: public MachineFunctionPass
{
42 ARMLoadStoreOpt() : MachineFunctionPass(&ID
) {}
44 const TargetInstrInfo
*TII
;
45 const TargetRegisterInfo
*TRI
;
49 virtual bool runOnMachineFunction(MachineFunction
&Fn
);
51 virtual const char *getPassName() const {
52 return "ARM load / store optimization pass";
56 struct MemOpQueueEntry
{
59 MachineBasicBlock::iterator MBBI
;
61 MemOpQueueEntry(int o
, int p
, MachineBasicBlock::iterator i
)
62 : Offset(o
), Position(p
), MBBI(i
), Merged(false) {};
64 typedef SmallVector
<MemOpQueueEntry
,8> MemOpQueue
;
65 typedef MemOpQueue::iterator MemOpQueueIter
;
67 SmallVector
<MachineBasicBlock::iterator
, 4>
68 MergeLDR_STR(MachineBasicBlock
&MBB
, unsigned SIndex
, unsigned Base
,
69 int Opcode
, unsigned Size
,
70 ARMCC::CondCodes Pred
, unsigned PredReg
,
71 unsigned Scratch
, MemOpQueue
&MemOps
);
73 void AdvanceRS(MachineBasicBlock
&MBB
, MemOpQueue
&MemOps
);
74 bool LoadStoreMultipleOpti(MachineBasicBlock
&MBB
);
75 bool MergeReturnIntoLDM(MachineBasicBlock
&MBB
);
77 char ARMLoadStoreOpt::ID
= 0;
80 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store
81 /// optimization pass.
82 FunctionPass
*llvm::createARMLoadStoreOptimizationPass() {
83 return new ARMLoadStoreOpt();
86 static int getLoadStoreMultipleOpcode(int Opcode
) {
111 /// mergeOps - Create and insert a LDM or STM with Base as base register and
112 /// registers in Regs as the register operands that would be loaded / stored.
113 /// It returns true if the transformation is done.
114 static bool mergeOps(MachineBasicBlock
&MBB
, MachineBasicBlock::iterator MBBI
,
115 int Offset
, unsigned Base
, bool BaseKill
, int Opcode
,
116 ARMCC::CondCodes Pred
, unsigned PredReg
, unsigned Scratch
,
117 SmallVector
<std::pair
<unsigned, bool>, 8> &Regs
,
118 const TargetInstrInfo
*TII
) {
119 // FIXME would it be better to take a DL from one of the loads arbitrarily?
120 DebugLoc dl
= DebugLoc::getUnknownLoc();
121 // Only a single register to load / store. Don't bother.
122 unsigned NumRegs
= Regs
.size();
126 ARM_AM::AMSubMode Mode
= ARM_AM::ia
;
127 bool isAM4
= Opcode
== ARM::LDR
|| Opcode
== ARM::STR
;
128 if (isAM4
&& Offset
== 4)
130 else if (isAM4
&& Offset
== -4 * (int)NumRegs
+ 4)
132 else if (isAM4
&& Offset
== -4 * (int)NumRegs
)
134 else if (Offset
!= 0) {
135 // If starting offset isn't zero, insert a MI to materialize a new base.
136 // But only do so if it is cost effective, i.e. merging more than two
142 if (Opcode
== ARM::LDR
)
143 // If it is a load, then just use one of the destination register to
144 // use as the new base.
145 NewBase
= Regs
[NumRegs
-1].first
;
147 // Use the scratch register to use as a new base.
152 int BaseOpc
= ARM::ADDri
;
154 BaseOpc
= ARM::SUBri
;
157 int ImmedOffset
= ARM_AM::getSOImmVal(Offset
);
158 if (ImmedOffset
== -1)
159 return false; // Probably not worth it then.
161 BuildMI(MBB
, MBBI
, dl
, TII
->get(BaseOpc
), NewBase
)
162 .addReg(Base
, false, false, BaseKill
).addImm(ImmedOffset
)
163 .addImm(Pred
).addReg(PredReg
).addReg(0);
165 BaseKill
= true; // New base is always killed right its use.
168 bool isDPR
= Opcode
== ARM::FLDD
|| Opcode
== ARM::FSTD
;
169 bool isDef
= Opcode
== ARM::LDR
|| Opcode
== ARM::FLDS
|| Opcode
== ARM::FLDD
;
170 Opcode
= getLoadStoreMultipleOpcode(Opcode
);
171 MachineInstrBuilder MIB
= (isAM4
)
172 ? BuildMI(MBB
, MBBI
, dl
, TII
->get(Opcode
))
173 .addReg(Base
, false, false, BaseKill
)
174 .addImm(ARM_AM::getAM4ModeImm(Mode
)).addImm(Pred
).addReg(PredReg
)
175 : BuildMI(MBB
, MBBI
, dl
, TII
->get(Opcode
))
176 .addReg(Base
, false, false, BaseKill
)
177 .addImm(ARM_AM::getAM5Opc(Mode
, false, isDPR
? NumRegs
<<1 : NumRegs
))
178 .addImm(Pred
).addReg(PredReg
);
179 for (unsigned i
= 0; i
!= NumRegs
; ++i
)
180 MIB
= MIB
.addReg(Regs
[i
].first
, isDef
, false, Regs
[i
].second
);
185 /// MergeLDR_STR - Merge a number of load / store instructions into one or more
186 /// load / store multiple instructions.
187 SmallVector
<MachineBasicBlock::iterator
, 4>
188 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock
&MBB
, unsigned SIndex
,
189 unsigned Base
, int Opcode
, unsigned Size
,
190 ARMCC::CondCodes Pred
, unsigned PredReg
,
191 unsigned Scratch
, MemOpQueue
&MemOps
) {
192 SmallVector
<MachineBasicBlock::iterator
, 4> Merges
;
193 bool isAM4
= Opcode
== ARM::LDR
|| Opcode
== ARM::STR
;
194 int Offset
= MemOps
[SIndex
].Offset
;
195 int SOffset
= Offset
;
196 unsigned Pos
= MemOps
[SIndex
].Position
;
197 MachineBasicBlock::iterator Loc
= MemOps
[SIndex
].MBBI
;
198 unsigned PReg
= MemOps
[SIndex
].MBBI
->getOperand(0).getReg();
199 unsigned PRegNum
= ARMRegisterInfo::getRegisterNumbering(PReg
);
200 bool isKill
= MemOps
[SIndex
].MBBI
->getOperand(0).isKill();
202 SmallVector
<std::pair
<unsigned,bool>, 8> Regs
;
203 Regs
.push_back(std::make_pair(PReg
, isKill
));
204 for (unsigned i
= SIndex
+1, e
= MemOps
.size(); i
!= e
; ++i
) {
205 int NewOffset
= MemOps
[i
].Offset
;
206 unsigned Reg
= MemOps
[i
].MBBI
->getOperand(0).getReg();
207 unsigned RegNum
= ARMRegisterInfo::getRegisterNumbering(Reg
);
208 isKill
= MemOps
[i
].MBBI
->getOperand(0).isKill();
209 // AM4 - register numbers in ascending order.
210 // AM5 - consecutive register numbers in ascending order.
211 if (NewOffset
== Offset
+ (int)Size
&&
212 ((isAM4
&& RegNum
> PRegNum
) || RegNum
== PRegNum
+1)) {
214 Regs
.push_back(std::make_pair(Reg
, isKill
));
217 // Can't merge this in. Try merge the earlier ones first.
218 if (mergeOps(MBB
, ++Loc
, SOffset
, Base
, false, Opcode
, Pred
, PredReg
,
219 Scratch
, Regs
, TII
)) {
220 Merges
.push_back(prior(Loc
));
221 for (unsigned j
= SIndex
; j
< i
; ++j
) {
222 MBB
.erase(MemOps
[j
].MBBI
);
223 MemOps
[j
].Merged
= true;
226 SmallVector
<MachineBasicBlock::iterator
, 4> Merges2
=
227 MergeLDR_STR(MBB
, i
, Base
, Opcode
, Size
, Pred
, PredReg
, Scratch
,MemOps
);
228 Merges
.append(Merges2
.begin(), Merges2
.end());
232 if (MemOps
[i
].Position
> Pos
) {
233 Pos
= MemOps
[i
].Position
;
234 Loc
= MemOps
[i
].MBBI
;
238 bool BaseKill
= Loc
->findRegisterUseOperandIdx(Base
, true) != -1;
239 if (mergeOps(MBB
, ++Loc
, SOffset
, Base
, BaseKill
, Opcode
, Pred
, PredReg
,
240 Scratch
, Regs
, TII
)) {
241 Merges
.push_back(prior(Loc
));
242 for (unsigned i
= SIndex
, e
= MemOps
.size(); i
!= e
; ++i
) {
243 MBB
.erase(MemOps
[i
].MBBI
);
244 MemOps
[i
].Merged
= true;
251 /// getInstrPredicate - If instruction is predicated, returns its predicate
252 /// condition, otherwise returns AL. It also returns the condition code
253 /// register by reference.
254 static ARMCC::CondCodes
getInstrPredicate(MachineInstr
*MI
, unsigned &PredReg
) {
255 int PIdx
= MI
->findFirstPredOperandIdx();
261 PredReg
= MI
->getOperand(PIdx
+1).getReg();
262 return (ARMCC::CondCodes
)MI
->getOperand(PIdx
).getImm();
265 static inline bool isMatchingDecrement(MachineInstr
*MI
, unsigned Base
,
266 unsigned Bytes
, ARMCC::CondCodes Pred
,
268 unsigned MyPredReg
= 0;
269 return (MI
&& MI
->getOpcode() == ARM::SUBri
&&
270 MI
->getOperand(0).getReg() == Base
&&
271 MI
->getOperand(1).getReg() == Base
&&
272 ARM_AM::getAM2Offset(MI
->getOperand(2).getImm()) == Bytes
&&
273 getInstrPredicate(MI
, MyPredReg
) == Pred
&&
274 MyPredReg
== PredReg
);
277 static inline bool isMatchingIncrement(MachineInstr
*MI
, unsigned Base
,
278 unsigned Bytes
, ARMCC::CondCodes Pred
,
280 unsigned MyPredReg
= 0;
281 return (MI
&& MI
->getOpcode() == ARM::ADDri
&&
282 MI
->getOperand(0).getReg() == Base
&&
283 MI
->getOperand(1).getReg() == Base
&&
284 ARM_AM::getAM2Offset(MI
->getOperand(2).getImm()) == Bytes
&&
285 getInstrPredicate(MI
, MyPredReg
) == Pred
&&
286 MyPredReg
== PredReg
);
289 static inline unsigned getLSMultipleTransferSize(MachineInstr
*MI
) {
290 switch (MI
->getOpcode()) {
302 return (MI
->getNumOperands() - 4) * 4;
307 return ARM_AM::getAM5Offset(MI
->getOperand(1).getImm()) * 4;
311 /// mergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
312 /// register into the LDM/STM/FLDM{D|S}/FSTM{D|S} op when possible:
314 /// stmia rn, <ra, rb, rc>
315 /// rn := rn + 4 * 3;
317 /// stmia rn!, <ra, rb, rc>
319 /// rn := rn - 4 * 3;
320 /// ldmia rn, <ra, rb, rc>
322 /// ldmdb rn!, <ra, rb, rc>
323 static bool mergeBaseUpdateLSMultiple(MachineBasicBlock
&MBB
,
324 MachineBasicBlock::iterator MBBI
,
326 MachineBasicBlock::iterator
&I
) {
327 MachineInstr
*MI
= MBBI
;
328 unsigned Base
= MI
->getOperand(0).getReg();
329 unsigned Bytes
= getLSMultipleTransferSize(MI
);
330 unsigned PredReg
= 0;
331 ARMCC::CondCodes Pred
= getInstrPredicate(MI
, PredReg
);
332 int Opcode
= MI
->getOpcode();
333 bool isAM4
= Opcode
== ARM::LDM
|| Opcode
== ARM::STM
;
336 if (ARM_AM::getAM4WBFlag(MI
->getOperand(1).getImm()))
339 // Can't use the updating AM4 sub-mode if the base register is also a dest
340 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
341 for (unsigned i
= 3, e
= MI
->getNumOperands(); i
!= e
; ++i
) {
342 if (MI
->getOperand(i
).getReg() == Base
)
346 ARM_AM::AMSubMode Mode
= ARM_AM::getAM4SubMode(MI
->getOperand(1).getImm());
347 if (MBBI
!= MBB
.begin()) {
348 MachineBasicBlock::iterator PrevMBBI
= prior(MBBI
);
349 if (Mode
== ARM_AM::ia
&&
350 isMatchingDecrement(PrevMBBI
, Base
, Bytes
, Pred
, PredReg
)) {
351 MI
->getOperand(1).setImm(ARM_AM::getAM4ModeImm(ARM_AM::db
, true));
354 } else if (Mode
== ARM_AM::ib
&&
355 isMatchingDecrement(PrevMBBI
, Base
, Bytes
, Pred
, PredReg
)) {
356 MI
->getOperand(1).setImm(ARM_AM::getAM4ModeImm(ARM_AM::da
, true));
362 if (MBBI
!= MBB
.end()) {
363 MachineBasicBlock::iterator NextMBBI
= next(MBBI
);
364 if ((Mode
== ARM_AM::ia
|| Mode
== ARM_AM::ib
) &&
365 isMatchingIncrement(NextMBBI
, Base
, Bytes
, Pred
, PredReg
)) {
366 MI
->getOperand(1).setImm(ARM_AM::getAM4ModeImm(Mode
, true));
373 } else if ((Mode
== ARM_AM::da
|| Mode
== ARM_AM::db
) &&
374 isMatchingDecrement(NextMBBI
, Base
, Bytes
, Pred
, PredReg
)) {
375 MI
->getOperand(1).setImm(ARM_AM::getAM4ModeImm(Mode
, true));
385 // FLDM{D|S}, FSTM{D|S} addressing mode 5 ops.
386 if (ARM_AM::getAM5WBFlag(MI
->getOperand(1).getImm()))
389 ARM_AM::AMSubMode Mode
= ARM_AM::getAM5SubMode(MI
->getOperand(1).getImm());
390 unsigned Offset
= ARM_AM::getAM5Offset(MI
->getOperand(1).getImm());
391 if (MBBI
!= MBB
.begin()) {
392 MachineBasicBlock::iterator PrevMBBI
= prior(MBBI
);
393 if (Mode
== ARM_AM::ia
&&
394 isMatchingDecrement(PrevMBBI
, Base
, Bytes
, Pred
, PredReg
)) {
395 MI
->getOperand(1).setImm(ARM_AM::getAM5Opc(ARM_AM::db
, true, Offset
));
401 if (MBBI
!= MBB
.end()) {
402 MachineBasicBlock::iterator NextMBBI
= next(MBBI
);
403 if (Mode
== ARM_AM::ia
&&
404 isMatchingIncrement(NextMBBI
, Base
, Bytes
, Pred
, PredReg
)) {
405 MI
->getOperand(1).setImm(ARM_AM::getAM5Opc(ARM_AM::ia
, true, Offset
));
419 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc
) {
421 case ARM::LDR
: return ARM::LDR_PRE
;
422 case ARM::STR
: return ARM::STR_PRE
;
423 case ARM::FLDS
: return ARM::FLDMS
;
424 case ARM::FLDD
: return ARM::FLDMD
;
425 case ARM::FSTS
: return ARM::FSTMS
;
426 case ARM::FSTD
: return ARM::FSTMD
;
432 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc
) {
434 case ARM::LDR
: return ARM::LDR_POST
;
435 case ARM::STR
: return ARM::STR_POST
;
436 case ARM::FLDS
: return ARM::FLDMS
;
437 case ARM::FLDD
: return ARM::FLDMD
;
438 case ARM::FSTS
: return ARM::FSTMS
;
439 case ARM::FSTD
: return ARM::FSTMD
;
445 /// mergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
446 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
447 static bool mergeBaseUpdateLoadStore(MachineBasicBlock
&MBB
,
448 MachineBasicBlock::iterator MBBI
,
449 const TargetInstrInfo
*TII
,
451 MachineBasicBlock::iterator
&I
) {
452 MachineInstr
*MI
= MBBI
;
453 unsigned Base
= MI
->getOperand(1).getReg();
454 bool BaseKill
= MI
->getOperand(1).isKill();
455 unsigned Bytes
= getLSMultipleTransferSize(MI
);
456 int Opcode
= MI
->getOpcode();
457 DebugLoc dl
= MI
->getDebugLoc();
458 bool isAM2
= Opcode
== ARM::LDR
|| Opcode
== ARM::STR
;
459 if ((isAM2
&& ARM_AM::getAM2Offset(MI
->getOperand(3).getImm()) != 0) ||
460 (!isAM2
&& ARM_AM::getAM5Offset(MI
->getOperand(2).getImm()) != 0))
463 bool isLd
= Opcode
== ARM::LDR
|| Opcode
== ARM::FLDS
|| Opcode
== ARM::FLDD
;
464 // Can't do the merge if the destination register is the same as the would-be
465 // writeback register.
466 if (isLd
&& MI
->getOperand(0).getReg() == Base
)
469 unsigned PredReg
= 0;
470 ARMCC::CondCodes Pred
= getInstrPredicate(MI
, PredReg
);
471 bool DoMerge
= false;
472 ARM_AM::AddrOpc AddSub
= ARM_AM::add
;
474 if (MBBI
!= MBB
.begin()) {
475 MachineBasicBlock::iterator PrevMBBI
= prior(MBBI
);
476 if (isMatchingDecrement(PrevMBBI
, Base
, Bytes
, Pred
, PredReg
)) {
478 AddSub
= ARM_AM::sub
;
479 NewOpc
= getPreIndexedLoadStoreOpcode(Opcode
);
480 } else if (isAM2
&& isMatchingIncrement(PrevMBBI
, Base
, Bytes
,
483 NewOpc
= getPreIndexedLoadStoreOpcode(Opcode
);
489 if (!DoMerge
&& MBBI
!= MBB
.end()) {
490 MachineBasicBlock::iterator NextMBBI
= next(MBBI
);
491 if (isAM2
&& isMatchingDecrement(NextMBBI
, Base
, Bytes
, Pred
, PredReg
)) {
493 AddSub
= ARM_AM::sub
;
494 NewOpc
= getPostIndexedLoadStoreOpcode(Opcode
);
495 } else if (isMatchingIncrement(NextMBBI
, Base
, Bytes
, Pred
, PredReg
)) {
497 NewOpc
= getPostIndexedLoadStoreOpcode(Opcode
);
511 bool isDPR
= NewOpc
== ARM::FLDMD
|| NewOpc
== ARM::FSTMD
;
512 unsigned Offset
= isAM2
? ARM_AM::getAM2Opc(AddSub
, Bytes
, ARM_AM::no_shift
)
513 : ARM_AM::getAM5Opc((AddSub
== ARM_AM::sub
) ? ARM_AM::db
: ARM_AM::ia
,
514 true, isDPR
? 2 : 1);
517 // LDR_PRE, LDR_POST;
518 BuildMI(MBB
, MBBI
, dl
, TII
->get(NewOpc
), MI
->getOperand(0).getReg())
520 .addReg(Base
).addReg(0).addImm(Offset
).addImm(Pred
).addReg(PredReg
);
523 BuildMI(MBB
, MBBI
, dl
, TII
->get(NewOpc
))
524 .addReg(Base
, false, false, BaseKill
)
525 .addImm(Offset
).addImm(Pred
).addReg(PredReg
)
526 .addReg(MI
->getOperand(0).getReg(), true);
528 MachineOperand
&MO
= MI
->getOperand(0);
530 // STR_PRE, STR_POST;
531 BuildMI(MBB
, MBBI
, dl
, TII
->get(NewOpc
), Base
)
532 .addReg(MO
.getReg(), false, false, MO
.isKill())
533 .addReg(Base
).addReg(0).addImm(Offset
).addImm(Pred
).addReg(PredReg
);
536 BuildMI(MBB
, MBBI
, dl
, TII
->get(NewOpc
)).addReg(Base
).addImm(Offset
)
537 .addImm(Pred
).addReg(PredReg
)
538 .addReg(MO
.getReg(), false, false, MO
.isKill());
545 /// isMemoryOp - Returns true if instruction is a memory operations (that this
546 /// pass is capable of operating on).
547 static bool isMemoryOp(MachineInstr
*MI
) {
548 int Opcode
= MI
->getOpcode();
553 return MI
->getOperand(1).isReg() && MI
->getOperand(2).getReg() == 0;
556 return MI
->getOperand(1).isReg();
559 return MI
->getOperand(1).isReg();
564 /// AdvanceRS - Advance register scavenger to just before the earliest memory
565 /// op that is being merged.
566 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock
&MBB
, MemOpQueue
&MemOps
) {
567 MachineBasicBlock::iterator Loc
= MemOps
[0].MBBI
;
568 unsigned Position
= MemOps
[0].Position
;
569 for (unsigned i
= 1, e
= MemOps
.size(); i
!= e
; ++i
) {
570 if (MemOps
[i
].Position
< Position
) {
571 Position
= MemOps
[i
].Position
;
572 Loc
= MemOps
[i
].MBBI
;
576 if (Loc
!= MBB
.begin())
577 RS
->forward(prior(Loc
));
580 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
581 /// ops of the same base and incrementing offset into LDM / STM ops.
582 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock
&MBB
) {
583 unsigned NumMerges
= 0;
584 unsigned NumMemOps
= 0;
586 unsigned CurrBase
= 0;
588 unsigned CurrSize
= 0;
589 ARMCC::CondCodes CurrPred
= ARMCC::AL
;
590 unsigned CurrPredReg
= 0;
591 unsigned Position
= 0;
593 RS
->enterBasicBlock(&MBB
);
594 MachineBasicBlock::iterator MBBI
= MBB
.begin(), E
= MBB
.end();
596 bool Advance
= false;
597 bool TryMerge
= false;
598 bool Clobber
= false;
600 bool isMemOp
= isMemoryOp(MBBI
);
602 int Opcode
= MBBI
->getOpcode();
603 bool isAM2
= Opcode
== ARM::LDR
|| Opcode
== ARM::STR
;
604 unsigned Size
= getLSMultipleTransferSize(MBBI
);
605 unsigned Base
= MBBI
->getOperand(1).getReg();
606 unsigned PredReg
= 0;
607 ARMCC::CondCodes Pred
= getInstrPredicate(MBBI
, PredReg
);
608 unsigned NumOperands
= MBBI
->getDesc().getNumOperands();
609 unsigned OffField
= MBBI
->getOperand(NumOperands
-3).getImm();
611 ? ARM_AM::getAM2Offset(OffField
) : ARM_AM::getAM5Offset(OffField
) * 4;
613 if (ARM_AM::getAM2Op(OffField
) == ARM_AM::sub
)
616 if (ARM_AM::getAM5Op(OffField
) == ARM_AM::sub
)
621 // r5 := ldr [r5, #4]
622 // r6 := ldr [r5, #8]
624 // The second ldr has effectively broken the chain even though it
625 // looks like the later ldr(s) use the same base register. Try to
626 // merge the ldr's so far, including this one. But don't try to
627 // combine the following ldr(s).
628 Clobber
= (Opcode
== ARM::LDR
&& Base
== MBBI
->getOperand(0).getReg());
629 if (CurrBase
== 0 && !Clobber
) {
630 // Start of a new chain.
635 CurrPredReg
= PredReg
;
636 MemOps
.push_back(MemOpQueueEntry(Offset
, Position
, MBBI
));
645 if (CurrOpc
== Opcode
&& CurrBase
== Base
&& CurrPred
== Pred
) {
646 // No need to match PredReg.
647 // Continue adding to the queue.
648 if (Offset
> MemOps
.back().Offset
) {
649 MemOps
.push_back(MemOpQueueEntry(Offset
, Position
, MBBI
));
653 for (MemOpQueueIter I
= MemOps
.begin(), E
= MemOps
.end();
655 if (Offset
< I
->Offset
) {
656 MemOps
.insert(I
, MemOpQueueEntry(Offset
, Position
, MBBI
));
660 } else if (Offset
== I
->Offset
) {
661 // Collision! This can't be merged!
678 // Try to find a free register to use as a new base in case it's needed.
679 // First advance to the instruction just before the start of the chain.
680 AdvanceRS(MBB
, MemOps
);
681 // Find a scratch register. Make sure it's a call clobbered register or
682 // a spilled callee-saved register.
683 unsigned Scratch
= RS
->FindUnusedReg(&ARM::GPRRegClass
, true);
685 Scratch
= RS
->FindUnusedReg(&ARM::GPRRegClass
,
686 AFI
->getSpilledCSRegisters());
687 // Process the load / store instructions.
688 RS
->forward(prior(MBBI
));
691 SmallVector
<MachineBasicBlock::iterator
,4> MBBII
=
692 MergeLDR_STR(MBB
, 0, CurrBase
, CurrOpc
, CurrSize
,
693 CurrPred
, CurrPredReg
, Scratch
, MemOps
);
695 // Try folding preceeding/trailing base inc/dec into the generated
697 for (unsigned i
= 0, e
= MBBII
.size(); i
< e
; ++i
)
698 if (mergeBaseUpdateLSMultiple(MBB
, MBBII
[i
], Advance
, MBBI
))
700 NumMerges
+= MBBII
.size();
702 // Try folding preceeding/trailing base inc/dec into those load/store
703 // that were not merged to form LDM/STM ops.
704 for (unsigned i
= 0; i
!= NumMemOps
; ++i
)
705 if (!MemOps
[i
].Merged
)
706 if (mergeBaseUpdateLoadStore(MBB
, MemOps
[i
].MBBI
, TII
,Advance
,MBBI
))
709 // RS may be pointing to an instruction that's deleted.
710 RS
->skipTo(prior(MBBI
));
716 CurrPred
= ARMCC::AL
;
723 // If iterator hasn't been advanced and this is not a memory op, skip it.
724 // It can't start a new chain anyway.
725 if (!Advance
&& !isMemOp
&& MBBI
!= E
) {
731 return NumMerges
> 0;
734 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return op
735 /// (bx lr) into the preceeding stack restore so it directly restore the value
737 /// ldmfd sp!, {r7, lr}
740 /// ldmfd sp!, {r7, pc}
741 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock
&MBB
) {
742 if (MBB
.empty()) return false;
744 MachineBasicBlock::iterator MBBI
= prior(MBB
.end());
745 if (MBBI
->getOpcode() == ARM::BX_RET
&& MBBI
!= MBB
.begin()) {
746 MachineInstr
*PrevMI
= prior(MBBI
);
747 if (PrevMI
->getOpcode() == ARM::LDM
) {
748 MachineOperand
&MO
= PrevMI
->getOperand(PrevMI
->getNumOperands()-1);
749 if (MO
.getReg() == ARM::LR
) {
750 PrevMI
->setDesc(TII
->get(ARM::LDM_RET
));
760 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction
&Fn
) {
761 const TargetMachine
&TM
= Fn
.getTarget();
762 AFI
= Fn
.getInfo
<ARMFunctionInfo
>();
763 TII
= TM
.getInstrInfo();
764 TRI
= TM
.getRegisterInfo();
765 RS
= new RegScavenger();
767 bool Modified
= false;
768 for (MachineFunction::iterator MFI
= Fn
.begin(), E
= Fn
.end(); MFI
!= E
;
770 MachineBasicBlock
&MBB
= *MFI
;
771 Modified
|= LoadStoreMultipleOpti(MBB
);
772 Modified
|= MergeReturnIntoLDM(MBB
);