1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains a pass that performs load / store related peephole
11 // optimizations. This pass should be run after register allocation.
13 //===----------------------------------------------------------------------===//
15 #define DEBUG_TYPE "arm-ldst-opt"
17 #include "ARMAddressingModes.h"
18 #include "ARMMachineFunctionInfo.h"
19 #include "ARMRegisterInfo.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/SmallVector.h"
22 #include "llvm/ADT/Statistic.h"
23 #include "llvm/CodeGen/MachineBasicBlock.h"
24 #include "llvm/CodeGen/MachineFunctionPass.h"
25 #include "llvm/CodeGen/MachineInstr.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/RegisterScavenging.h"
28 #include "llvm/Support/Compiler.h"
29 #include "llvm/Target/TargetRegisterInfo.h"
30 #include "llvm/Target/TargetInstrInfo.h"
31 #include "llvm/Target/TargetMachine.h"
34 STATISTIC(NumLDMGened
, "Number of ldm instructions generated");
35 STATISTIC(NumSTMGened
, "Number of stm instructions generated");
36 STATISTIC(NumFLDMGened
, "Number of fldm instructions generated");
37 STATISTIC(NumFSTMGened
, "Number of fstm instructions generated");
40 struct VISIBILITY_HIDDEN ARMLoadStoreOpt
: public MachineFunctionPass
{
42 ARMLoadStoreOpt() : MachineFunctionPass(&ID
) {}
44 const TargetInstrInfo
*TII
;
45 const TargetRegisterInfo
*TRI
;
49 virtual bool runOnMachineFunction(MachineFunction
&Fn
);
51 virtual const char *getPassName() const {
52 return "ARM load / store optimization pass";
56 struct MemOpQueueEntry
{
59 MachineBasicBlock::iterator MBBI
;
61 MemOpQueueEntry(int o
, int p
, MachineBasicBlock::iterator i
)
62 : Offset(o
), Position(p
), MBBI(i
), Merged(false) {};
64 typedef SmallVector
<MemOpQueueEntry
,8> MemOpQueue
;
65 typedef MemOpQueue::iterator MemOpQueueIter
;
67 SmallVector
<MachineBasicBlock::iterator
, 4>
68 MergeLDR_STR(MachineBasicBlock
&MBB
, unsigned SIndex
, unsigned Base
,
69 int Opcode
, unsigned Size
,
70 ARMCC::CondCodes Pred
, unsigned PredReg
,
71 unsigned Scratch
, MemOpQueue
&MemOps
);
73 void AdvanceRS(MachineBasicBlock
&MBB
, MemOpQueue
&MemOps
);
74 bool LoadStoreMultipleOpti(MachineBasicBlock
&MBB
);
75 bool MergeReturnIntoLDM(MachineBasicBlock
&MBB
);
77 char ARMLoadStoreOpt::ID
= 0;
80 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store
81 /// optimization pass.
82 FunctionPass
*llvm::createARMLoadStoreOptimizationPass() {
83 return new ARMLoadStoreOpt();
86 static int getLoadStoreMultipleOpcode(int Opcode
) {
111 /// mergeOps - Create and insert a LDM or STM with Base as base register and
112 /// registers in Regs as the register operands that would be loaded / stored.
113 /// It returns true if the transformation is done.
114 static bool mergeOps(MachineBasicBlock
&MBB
, MachineBasicBlock::iterator MBBI
,
115 int Offset
, unsigned Base
, bool BaseKill
, int Opcode
,
116 ARMCC::CondCodes Pred
, unsigned PredReg
, unsigned Scratch
,
117 SmallVector
<std::pair
<unsigned, bool>, 8> &Regs
,
118 const TargetInstrInfo
*TII
) {
119 // FIXME would it be better to take a DL from one of the loads arbitrarily?
120 DebugLoc dl
= DebugLoc::getUnknownLoc();
121 // Only a single register to load / store. Don't bother.
122 unsigned NumRegs
= Regs
.size();
126 ARM_AM::AMSubMode Mode
= ARM_AM::ia
;
127 bool isAM4
= Opcode
== ARM::LDR
|| Opcode
== ARM::STR
;
128 if (isAM4
&& Offset
== 4)
130 else if (isAM4
&& Offset
== -4 * (int)NumRegs
+ 4)
132 else if (isAM4
&& Offset
== -4 * (int)NumRegs
)
134 else if (Offset
!= 0) {
135 // If starting offset isn't zero, insert a MI to materialize a new base.
136 // But only do so if it is cost effective, i.e. merging more than two
142 if (Opcode
== ARM::LDR
)
143 // If it is a load, then just use one of the destination register to
144 // use as the new base.
145 NewBase
= Regs
[NumRegs
-1].first
;
147 // Use the scratch register to use as a new base.
152 int BaseOpc
= ARM::ADDri
;
154 BaseOpc
= ARM::SUBri
;
157 int ImmedOffset
= ARM_AM::getSOImmVal(Offset
);
158 if (ImmedOffset
== -1)
159 return false; // Probably not worth it then.
161 BuildMI(MBB
, MBBI
, dl
, TII
->get(BaseOpc
), NewBase
)
162 .addReg(Base
, getKillRegState(BaseKill
)).addImm(ImmedOffset
)
163 .addImm(Pred
).addReg(PredReg
).addReg(0);
165 BaseKill
= true; // New base is always killed right its use.
168 bool isDPR
= Opcode
== ARM::FLDD
|| Opcode
== ARM::FSTD
;
169 bool isDef
= Opcode
== ARM::LDR
|| Opcode
== ARM::FLDS
|| Opcode
== ARM::FLDD
;
170 Opcode
= getLoadStoreMultipleOpcode(Opcode
);
171 MachineInstrBuilder MIB
= (isAM4
)
172 ? BuildMI(MBB
, MBBI
, dl
, TII
->get(Opcode
))
173 .addReg(Base
, getKillRegState(BaseKill
))
174 .addImm(ARM_AM::getAM4ModeImm(Mode
)).addImm(Pred
).addReg(PredReg
)
175 : BuildMI(MBB
, MBBI
, dl
, TII
->get(Opcode
))
176 .addReg(Base
, getKillRegState(BaseKill
))
177 .addImm(ARM_AM::getAM5Opc(Mode
, false, isDPR
? NumRegs
<<1 : NumRegs
))
178 .addImm(Pred
).addReg(PredReg
);
179 for (unsigned i
= 0; i
!= NumRegs
; ++i
)
180 MIB
= MIB
.addReg(Regs
[i
].first
, getDefRegState(isDef
)
181 | getKillRegState(Regs
[i
].second
));
186 /// MergeLDR_STR - Merge a number of load / store instructions into one or more
187 /// load / store multiple instructions.
188 SmallVector
<MachineBasicBlock::iterator
, 4>
189 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock
&MBB
, unsigned SIndex
,
190 unsigned Base
, int Opcode
, unsigned Size
,
191 ARMCC::CondCodes Pred
, unsigned PredReg
,
192 unsigned Scratch
, MemOpQueue
&MemOps
) {
193 SmallVector
<MachineBasicBlock::iterator
, 4> Merges
;
194 bool isAM4
= Opcode
== ARM::LDR
|| Opcode
== ARM::STR
;
195 int Offset
= MemOps
[SIndex
].Offset
;
196 int SOffset
= Offset
;
197 unsigned Pos
= MemOps
[SIndex
].Position
;
198 MachineBasicBlock::iterator Loc
= MemOps
[SIndex
].MBBI
;
199 unsigned PReg
= MemOps
[SIndex
].MBBI
->getOperand(0).getReg();
200 unsigned PRegNum
= ARMRegisterInfo::getRegisterNumbering(PReg
);
201 bool isKill
= MemOps
[SIndex
].MBBI
->getOperand(0).isKill();
203 SmallVector
<std::pair
<unsigned,bool>, 8> Regs
;
204 Regs
.push_back(std::make_pair(PReg
, isKill
));
205 for (unsigned i
= SIndex
+1, e
= MemOps
.size(); i
!= e
; ++i
) {
206 int NewOffset
= MemOps
[i
].Offset
;
207 unsigned Reg
= MemOps
[i
].MBBI
->getOperand(0).getReg();
208 unsigned RegNum
= ARMRegisterInfo::getRegisterNumbering(Reg
);
209 isKill
= MemOps
[i
].MBBI
->getOperand(0).isKill();
210 // AM4 - register numbers in ascending order.
211 // AM5 - consecutive register numbers in ascending order.
212 if (NewOffset
== Offset
+ (int)Size
&&
213 ((isAM4
&& RegNum
> PRegNum
) || RegNum
== PRegNum
+1)) {
215 Regs
.push_back(std::make_pair(Reg
, isKill
));
218 // Can't merge this in. Try merge the earlier ones first.
219 if (mergeOps(MBB
, ++Loc
, SOffset
, Base
, false, Opcode
, Pred
, PredReg
,
220 Scratch
, Regs
, TII
)) {
221 Merges
.push_back(prior(Loc
));
222 for (unsigned j
= SIndex
; j
< i
; ++j
) {
223 MBB
.erase(MemOps
[j
].MBBI
);
224 MemOps
[j
].Merged
= true;
227 SmallVector
<MachineBasicBlock::iterator
, 4> Merges2
=
228 MergeLDR_STR(MBB
, i
, Base
, Opcode
, Size
, Pred
, PredReg
, Scratch
,MemOps
);
229 Merges
.append(Merges2
.begin(), Merges2
.end());
233 if (MemOps
[i
].Position
> Pos
) {
234 Pos
= MemOps
[i
].Position
;
235 Loc
= MemOps
[i
].MBBI
;
239 bool BaseKill
= Loc
->findRegisterUseOperandIdx(Base
, true) != -1;
240 if (mergeOps(MBB
, ++Loc
, SOffset
, Base
, BaseKill
, Opcode
, Pred
, PredReg
,
241 Scratch
, Regs
, TII
)) {
242 Merges
.push_back(prior(Loc
));
243 for (unsigned i
= SIndex
, e
= MemOps
.size(); i
!= e
; ++i
) {
244 MBB
.erase(MemOps
[i
].MBBI
);
245 MemOps
[i
].Merged
= true;
252 /// getInstrPredicate - If instruction is predicated, returns its predicate
253 /// condition, otherwise returns AL. It also returns the condition code
254 /// register by reference.
255 static ARMCC::CondCodes
getInstrPredicate(MachineInstr
*MI
, unsigned &PredReg
) {
256 int PIdx
= MI
->findFirstPredOperandIdx();
262 PredReg
= MI
->getOperand(PIdx
+1).getReg();
263 return (ARMCC::CondCodes
)MI
->getOperand(PIdx
).getImm();
266 static inline bool isMatchingDecrement(MachineInstr
*MI
, unsigned Base
,
267 unsigned Bytes
, ARMCC::CondCodes Pred
,
269 unsigned MyPredReg
= 0;
270 return (MI
&& MI
->getOpcode() == ARM::SUBri
&&
271 MI
->getOperand(0).getReg() == Base
&&
272 MI
->getOperand(1).getReg() == Base
&&
273 ARM_AM::getAM2Offset(MI
->getOperand(2).getImm()) == Bytes
&&
274 getInstrPredicate(MI
, MyPredReg
) == Pred
&&
275 MyPredReg
== PredReg
);
278 static inline bool isMatchingIncrement(MachineInstr
*MI
, unsigned Base
,
279 unsigned Bytes
, ARMCC::CondCodes Pred
,
281 unsigned MyPredReg
= 0;
282 return (MI
&& MI
->getOpcode() == ARM::ADDri
&&
283 MI
->getOperand(0).getReg() == Base
&&
284 MI
->getOperand(1).getReg() == Base
&&
285 ARM_AM::getAM2Offset(MI
->getOperand(2).getImm()) == Bytes
&&
286 getInstrPredicate(MI
, MyPredReg
) == Pred
&&
287 MyPredReg
== PredReg
);
290 static inline unsigned getLSMultipleTransferSize(MachineInstr
*MI
) {
291 switch (MI
->getOpcode()) {
303 return (MI
->getNumOperands() - 4) * 4;
308 return ARM_AM::getAM5Offset(MI
->getOperand(1).getImm()) * 4;
312 /// mergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
313 /// register into the LDM/STM/FLDM{D|S}/FSTM{D|S} op when possible:
315 /// stmia rn, <ra, rb, rc>
316 /// rn := rn + 4 * 3;
318 /// stmia rn!, <ra, rb, rc>
320 /// rn := rn - 4 * 3;
321 /// ldmia rn, <ra, rb, rc>
323 /// ldmdb rn!, <ra, rb, rc>
324 static bool mergeBaseUpdateLSMultiple(MachineBasicBlock
&MBB
,
325 MachineBasicBlock::iterator MBBI
,
327 MachineBasicBlock::iterator
&I
) {
328 MachineInstr
*MI
= MBBI
;
329 unsigned Base
= MI
->getOperand(0).getReg();
330 unsigned Bytes
= getLSMultipleTransferSize(MI
);
331 unsigned PredReg
= 0;
332 ARMCC::CondCodes Pred
= getInstrPredicate(MI
, PredReg
);
333 int Opcode
= MI
->getOpcode();
334 bool isAM4
= Opcode
== ARM::LDM
|| Opcode
== ARM::STM
;
337 if (ARM_AM::getAM4WBFlag(MI
->getOperand(1).getImm()))
340 // Can't use the updating AM4 sub-mode if the base register is also a dest
341 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
342 for (unsigned i
= 3, e
= MI
->getNumOperands(); i
!= e
; ++i
) {
343 if (MI
->getOperand(i
).getReg() == Base
)
347 ARM_AM::AMSubMode Mode
= ARM_AM::getAM4SubMode(MI
->getOperand(1).getImm());
348 if (MBBI
!= MBB
.begin()) {
349 MachineBasicBlock::iterator PrevMBBI
= prior(MBBI
);
350 if (Mode
== ARM_AM::ia
&&
351 isMatchingDecrement(PrevMBBI
, Base
, Bytes
, Pred
, PredReg
)) {
352 MI
->getOperand(1).setImm(ARM_AM::getAM4ModeImm(ARM_AM::db
, true));
355 } else if (Mode
== ARM_AM::ib
&&
356 isMatchingDecrement(PrevMBBI
, Base
, Bytes
, Pred
, PredReg
)) {
357 MI
->getOperand(1).setImm(ARM_AM::getAM4ModeImm(ARM_AM::da
, true));
363 if (MBBI
!= MBB
.end()) {
364 MachineBasicBlock::iterator NextMBBI
= next(MBBI
);
365 if ((Mode
== ARM_AM::ia
|| Mode
== ARM_AM::ib
) &&
366 isMatchingIncrement(NextMBBI
, Base
, Bytes
, Pred
, PredReg
)) {
367 MI
->getOperand(1).setImm(ARM_AM::getAM4ModeImm(Mode
, true));
374 } else if ((Mode
== ARM_AM::da
|| Mode
== ARM_AM::db
) &&
375 isMatchingDecrement(NextMBBI
, Base
, Bytes
, Pred
, PredReg
)) {
376 MI
->getOperand(1).setImm(ARM_AM::getAM4ModeImm(Mode
, true));
386 // FLDM{D|S}, FSTM{D|S} addressing mode 5 ops.
387 if (ARM_AM::getAM5WBFlag(MI
->getOperand(1).getImm()))
390 ARM_AM::AMSubMode Mode
= ARM_AM::getAM5SubMode(MI
->getOperand(1).getImm());
391 unsigned Offset
= ARM_AM::getAM5Offset(MI
->getOperand(1).getImm());
392 if (MBBI
!= MBB
.begin()) {
393 MachineBasicBlock::iterator PrevMBBI
= prior(MBBI
);
394 if (Mode
== ARM_AM::ia
&&
395 isMatchingDecrement(PrevMBBI
, Base
, Bytes
, Pred
, PredReg
)) {
396 MI
->getOperand(1).setImm(ARM_AM::getAM5Opc(ARM_AM::db
, true, Offset
));
402 if (MBBI
!= MBB
.end()) {
403 MachineBasicBlock::iterator NextMBBI
= next(MBBI
);
404 if (Mode
== ARM_AM::ia
&&
405 isMatchingIncrement(NextMBBI
, Base
, Bytes
, Pred
, PredReg
)) {
406 MI
->getOperand(1).setImm(ARM_AM::getAM5Opc(ARM_AM::ia
, true, Offset
));
420 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc
) {
422 case ARM::LDR
: return ARM::LDR_PRE
;
423 case ARM::STR
: return ARM::STR_PRE
;
424 case ARM::FLDS
: return ARM::FLDMS
;
425 case ARM::FLDD
: return ARM::FLDMD
;
426 case ARM::FSTS
: return ARM::FSTMS
;
427 case ARM::FSTD
: return ARM::FSTMD
;
433 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc
) {
435 case ARM::LDR
: return ARM::LDR_POST
;
436 case ARM::STR
: return ARM::STR_POST
;
437 case ARM::FLDS
: return ARM::FLDMS
;
438 case ARM::FLDD
: return ARM::FLDMD
;
439 case ARM::FSTS
: return ARM::FSTMS
;
440 case ARM::FSTD
: return ARM::FSTMD
;
446 /// mergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
447 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
448 static bool mergeBaseUpdateLoadStore(MachineBasicBlock
&MBB
,
449 MachineBasicBlock::iterator MBBI
,
450 const TargetInstrInfo
*TII
,
452 MachineBasicBlock::iterator
&I
) {
453 MachineInstr
*MI
= MBBI
;
454 unsigned Base
= MI
->getOperand(1).getReg();
455 bool BaseKill
= MI
->getOperand(1).isKill();
456 unsigned Bytes
= getLSMultipleTransferSize(MI
);
457 int Opcode
= MI
->getOpcode();
458 DebugLoc dl
= MI
->getDebugLoc();
459 bool isAM2
= Opcode
== ARM::LDR
|| Opcode
== ARM::STR
;
460 if ((isAM2
&& ARM_AM::getAM2Offset(MI
->getOperand(3).getImm()) != 0) ||
461 (!isAM2
&& ARM_AM::getAM5Offset(MI
->getOperand(2).getImm()) != 0))
464 bool isLd
= Opcode
== ARM::LDR
|| Opcode
== ARM::FLDS
|| Opcode
== ARM::FLDD
;
465 // Can't do the merge if the destination register is the same as the would-be
466 // writeback register.
467 if (isLd
&& MI
->getOperand(0).getReg() == Base
)
470 unsigned PredReg
= 0;
471 ARMCC::CondCodes Pred
= getInstrPredicate(MI
, PredReg
);
472 bool DoMerge
= false;
473 ARM_AM::AddrOpc AddSub
= ARM_AM::add
;
475 if (MBBI
!= MBB
.begin()) {
476 MachineBasicBlock::iterator PrevMBBI
= prior(MBBI
);
477 if (isMatchingDecrement(PrevMBBI
, Base
, Bytes
, Pred
, PredReg
)) {
479 AddSub
= ARM_AM::sub
;
480 NewOpc
= getPreIndexedLoadStoreOpcode(Opcode
);
481 } else if (isAM2
&& isMatchingIncrement(PrevMBBI
, Base
, Bytes
,
484 NewOpc
= getPreIndexedLoadStoreOpcode(Opcode
);
490 if (!DoMerge
&& MBBI
!= MBB
.end()) {
491 MachineBasicBlock::iterator NextMBBI
= next(MBBI
);
492 if (isAM2
&& isMatchingDecrement(NextMBBI
, Base
, Bytes
, Pred
, PredReg
)) {
494 AddSub
= ARM_AM::sub
;
495 NewOpc
= getPostIndexedLoadStoreOpcode(Opcode
);
496 } else if (isMatchingIncrement(NextMBBI
, Base
, Bytes
, Pred
, PredReg
)) {
498 NewOpc
= getPostIndexedLoadStoreOpcode(Opcode
);
512 bool isDPR
= NewOpc
== ARM::FLDMD
|| NewOpc
== ARM::FSTMD
;
513 unsigned Offset
= isAM2
? ARM_AM::getAM2Opc(AddSub
, Bytes
, ARM_AM::no_shift
)
514 : ARM_AM::getAM5Opc((AddSub
== ARM_AM::sub
) ? ARM_AM::db
: ARM_AM::ia
,
515 true, isDPR
? 2 : 1);
518 // LDR_PRE, LDR_POST;
519 BuildMI(MBB
, MBBI
, dl
, TII
->get(NewOpc
), MI
->getOperand(0).getReg())
520 .addReg(Base
, RegState::Define
)
521 .addReg(Base
).addReg(0).addImm(Offset
).addImm(Pred
).addReg(PredReg
);
524 BuildMI(MBB
, MBBI
, dl
, TII
->get(NewOpc
))
525 .addReg(Base
, getKillRegState(BaseKill
))
526 .addImm(Offset
).addImm(Pred
).addReg(PredReg
)
527 .addReg(MI
->getOperand(0).getReg(), RegState::Define
);
529 MachineOperand
&MO
= MI
->getOperand(0);
531 // STR_PRE, STR_POST;
532 BuildMI(MBB
, MBBI
, dl
, TII
->get(NewOpc
), Base
)
533 .addReg(MO
.getReg(), getKillRegState(BaseKill
))
534 .addReg(Base
).addReg(0).addImm(Offset
).addImm(Pred
).addReg(PredReg
);
537 BuildMI(MBB
, MBBI
, dl
, TII
->get(NewOpc
)).addReg(Base
).addImm(Offset
)
538 .addImm(Pred
).addReg(PredReg
)
539 .addReg(MO
.getReg(), getKillRegState(MO
.isKill()));
546 /// isMemoryOp - Returns true if instruction is a memory operations (that this
547 /// pass is capable of operating on).
548 static bool isMemoryOp(MachineInstr
*MI
) {
549 int Opcode
= MI
->getOpcode();
554 return MI
->getOperand(1).isReg() && MI
->getOperand(2).getReg() == 0;
557 return MI
->getOperand(1).isReg();
560 return MI
->getOperand(1).isReg();
565 /// AdvanceRS - Advance register scavenger to just before the earliest memory
566 /// op that is being merged.
567 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock
&MBB
, MemOpQueue
&MemOps
) {
568 MachineBasicBlock::iterator Loc
= MemOps
[0].MBBI
;
569 unsigned Position
= MemOps
[0].Position
;
570 for (unsigned i
= 1, e
= MemOps
.size(); i
!= e
; ++i
) {
571 if (MemOps
[i
].Position
< Position
) {
572 Position
= MemOps
[i
].Position
;
573 Loc
= MemOps
[i
].MBBI
;
577 if (Loc
!= MBB
.begin())
578 RS
->forward(prior(Loc
));
581 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
582 /// ops of the same base and incrementing offset into LDM / STM ops.
583 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock
&MBB
) {
584 unsigned NumMerges
= 0;
585 unsigned NumMemOps
= 0;
587 unsigned CurrBase
= 0;
589 unsigned CurrSize
= 0;
590 ARMCC::CondCodes CurrPred
= ARMCC::AL
;
591 unsigned CurrPredReg
= 0;
592 unsigned Position
= 0;
594 RS
->enterBasicBlock(&MBB
);
595 MachineBasicBlock::iterator MBBI
= MBB
.begin(), E
= MBB
.end();
597 bool Advance
= false;
598 bool TryMerge
= false;
599 bool Clobber
= false;
601 bool isMemOp
= isMemoryOp(MBBI
);
603 int Opcode
= MBBI
->getOpcode();
604 bool isAM2
= Opcode
== ARM::LDR
|| Opcode
== ARM::STR
;
605 unsigned Size
= getLSMultipleTransferSize(MBBI
);
606 unsigned Base
= MBBI
->getOperand(1).getReg();
607 unsigned PredReg
= 0;
608 ARMCC::CondCodes Pred
= getInstrPredicate(MBBI
, PredReg
);
609 unsigned NumOperands
= MBBI
->getDesc().getNumOperands();
610 unsigned OffField
= MBBI
->getOperand(NumOperands
-3).getImm();
612 ? ARM_AM::getAM2Offset(OffField
) : ARM_AM::getAM5Offset(OffField
) * 4;
614 if (ARM_AM::getAM2Op(OffField
) == ARM_AM::sub
)
617 if (ARM_AM::getAM5Op(OffField
) == ARM_AM::sub
)
622 // r5 := ldr [r5, #4]
623 // r6 := ldr [r5, #8]
625 // The second ldr has effectively broken the chain even though it
626 // looks like the later ldr(s) use the same base register. Try to
627 // merge the ldr's so far, including this one. But don't try to
628 // combine the following ldr(s).
629 Clobber
= (Opcode
== ARM::LDR
&& Base
== MBBI
->getOperand(0).getReg());
630 if (CurrBase
== 0 && !Clobber
) {
631 // Start of a new chain.
636 CurrPredReg
= PredReg
;
637 MemOps
.push_back(MemOpQueueEntry(Offset
, Position
, MBBI
));
646 if (CurrOpc
== Opcode
&& CurrBase
== Base
&& CurrPred
== Pred
) {
647 // No need to match PredReg.
648 // Continue adding to the queue.
649 if (Offset
> MemOps
.back().Offset
) {
650 MemOps
.push_back(MemOpQueueEntry(Offset
, Position
, MBBI
));
654 for (MemOpQueueIter I
= MemOps
.begin(), E
= MemOps
.end();
656 if (Offset
< I
->Offset
) {
657 MemOps
.insert(I
, MemOpQueueEntry(Offset
, Position
, MBBI
));
661 } else if (Offset
== I
->Offset
) {
662 // Collision! This can't be merged!
679 // Try to find a free register to use as a new base in case it's needed.
680 // First advance to the instruction just before the start of the chain.
681 AdvanceRS(MBB
, MemOps
);
682 // Find a scratch register. Make sure it's a call clobbered register or
683 // a spilled callee-saved register.
684 unsigned Scratch
= RS
->FindUnusedReg(&ARM::GPRRegClass
, true);
686 Scratch
= RS
->FindUnusedReg(&ARM::GPRRegClass
,
687 AFI
->getSpilledCSRegisters());
688 // Process the load / store instructions.
689 RS
->forward(prior(MBBI
));
692 SmallVector
<MachineBasicBlock::iterator
,4> MBBII
=
693 MergeLDR_STR(MBB
, 0, CurrBase
, CurrOpc
, CurrSize
,
694 CurrPred
, CurrPredReg
, Scratch
, MemOps
);
696 // Try folding preceeding/trailing base inc/dec into the generated
698 for (unsigned i
= 0, e
= MBBII
.size(); i
< e
; ++i
)
699 if (mergeBaseUpdateLSMultiple(MBB
, MBBII
[i
], Advance
, MBBI
))
701 NumMerges
+= MBBII
.size();
703 // Try folding preceeding/trailing base inc/dec into those load/store
704 // that were not merged to form LDM/STM ops.
705 for (unsigned i
= 0; i
!= NumMemOps
; ++i
)
706 if (!MemOps
[i
].Merged
)
707 if (mergeBaseUpdateLoadStore(MBB
, MemOps
[i
].MBBI
, TII
,Advance
,MBBI
))
710 // RS may be pointing to an instruction that's deleted.
711 RS
->skipTo(prior(MBBI
));
717 CurrPred
= ARMCC::AL
;
724 // If iterator hasn't been advanced and this is not a memory op, skip it.
725 // It can't start a new chain anyway.
726 if (!Advance
&& !isMemOp
&& MBBI
!= E
) {
732 return NumMerges
> 0;
735 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return op
736 /// (bx lr) into the preceeding stack restore so it directly restore the value
738 /// ldmfd sp!, {r7, lr}
741 /// ldmfd sp!, {r7, pc}
742 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock
&MBB
) {
743 if (MBB
.empty()) return false;
745 MachineBasicBlock::iterator MBBI
= prior(MBB
.end());
746 if (MBBI
->getOpcode() == ARM::BX_RET
&& MBBI
!= MBB
.begin()) {
747 MachineInstr
*PrevMI
= prior(MBBI
);
748 if (PrevMI
->getOpcode() == ARM::LDM
) {
749 MachineOperand
&MO
= PrevMI
->getOperand(PrevMI
->getNumOperands()-1);
750 if (MO
.getReg() == ARM::LR
) {
751 PrevMI
->setDesc(TII
->get(ARM::LDM_RET
));
761 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction
&Fn
) {
762 const TargetMachine
&TM
= Fn
.getTarget();
763 AFI
= Fn
.getInfo
<ARMFunctionInfo
>();
764 TII
= TM
.getInstrInfo();
765 TRI
= TM
.getRegisterInfo();
766 RS
= new RegScavenger();
768 bool Modified
= false;
769 for (MachineFunction::iterator MFI
= Fn
.begin(), E
= Fn
.end(); MFI
!= E
;
771 MachineBasicBlock
&MBB
= *MFI
;
772 Modified
|= LoadStoreMultipleOpti(MBB
);
773 Modified
|= MergeReturnIntoLDM(MBB
);