1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains a pass that performs load / store related peephole
11 // optimizations. This pass should be run after register allocation.
13 //===----------------------------------------------------------------------===//
15 #define DEBUG_TYPE "arm-ldst-opt"
17 #include "ARMAddressingModes.h"
18 #include "ARMMachineFunctionInfo.h"
19 #include "ARMRegisterInfo.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/SmallVector.h"
22 #include "llvm/ADT/Statistic.h"
23 #include "llvm/CodeGen/MachineBasicBlock.h"
24 #include "llvm/CodeGen/MachineFunctionPass.h"
25 #include "llvm/CodeGen/MachineInstr.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/RegisterScavenging.h"
28 #include "llvm/Support/Compiler.h"
29 #include "llvm/Target/MRegisterInfo.h"
30 #include "llvm/Target/TargetInstrInfo.h"
31 #include "llvm/Target/TargetMachine.h"
34 STATISTIC(NumLDMGened
, "Number of ldm instructions generated");
35 STATISTIC(NumSTMGened
, "Number of stm instructions generated");
36 STATISTIC(NumFLDMGened
, "Number of fldm instructions generated");
37 STATISTIC(NumFSTMGened
, "Number of fstm instructions generated");
40 struct VISIBILITY_HIDDEN ARMLoadStoreOpt
: public MachineFunctionPass
{
42 ARMLoadStoreOpt() : MachineFunctionPass((intptr_t)&ID
) {}
44 const TargetInstrInfo
*TII
;
45 const MRegisterInfo
*MRI
;
49 virtual bool runOnMachineFunction(MachineFunction
&Fn
);
51 virtual const char *getPassName() const {
52 return "ARM load / store optimization pass";
56 struct MemOpQueueEntry
{
59 MachineBasicBlock::iterator MBBI
;
61 MemOpQueueEntry(int o
, int p
, MachineBasicBlock::iterator i
)
62 : Offset(o
), Position(p
), MBBI(i
), Merged(false) {};
64 typedef SmallVector
<MemOpQueueEntry
,8> MemOpQueue
;
65 typedef MemOpQueue::iterator MemOpQueueIter
;
67 SmallVector
<MachineBasicBlock::iterator
, 4>
68 MergeLDR_STR(MachineBasicBlock
&MBB
, unsigned SIndex
, unsigned Base
,
69 int Opcode
, unsigned Size
,
70 ARMCC::CondCodes Pred
, unsigned PredReg
,
71 unsigned Scratch
, MemOpQueue
&MemOps
);
73 void AdvanceRS(MachineBasicBlock
&MBB
, MemOpQueue
&MemOps
);
74 bool LoadStoreMultipleOpti(MachineBasicBlock
&MBB
);
75 bool MergeReturnIntoLDM(MachineBasicBlock
&MBB
);
77 char ARMLoadStoreOpt::ID
= 0;
80 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store
81 /// optimization pass.
82 FunctionPass
*llvm::createARMLoadStoreOptimizationPass() {
83 return new ARMLoadStoreOpt();
86 static int getLoadStoreMultipleOpcode(int Opcode
) {
111 /// mergeOps - Create and insert a LDM or STM with Base as base register and
112 /// registers in Regs as the register operands that would be loaded / stored.
113 /// It returns true if the transformation is done.
114 static bool mergeOps(MachineBasicBlock
&MBB
, MachineBasicBlock::iterator MBBI
,
115 int Offset
, unsigned Base
, bool BaseKill
, int Opcode
,
116 ARMCC::CondCodes Pred
, unsigned PredReg
, unsigned Scratch
,
117 SmallVector
<std::pair
<unsigned, bool>, 8> &Regs
,
118 const TargetInstrInfo
*TII
) {
119 // Only a single register to load / store. Don't bother.
120 unsigned NumRegs
= Regs
.size();
124 ARM_AM::AMSubMode Mode
= ARM_AM::ia
;
125 bool isAM4
= Opcode
== ARM::LDR
|| Opcode
== ARM::STR
;
126 if (isAM4
&& Offset
== 4)
128 else if (isAM4
&& Offset
== -4 * (int)NumRegs
+ 4)
130 else if (isAM4
&& Offset
== -4 * (int)NumRegs
)
132 else if (Offset
!= 0) {
133 // If starting offset isn't zero, insert a MI to materialize a new base.
134 // But only do so if it is cost effective, i.e. merging more than two
140 if (Opcode
== ARM::LDR
)
141 // If it is a load, then just use one of the destination register to
142 // use as the new base.
143 NewBase
= Regs
[NumRegs
-1].first
;
145 // Use the scratch register to use as a new base.
150 int BaseOpc
= ARM::ADDri
;
152 BaseOpc
= ARM::SUBri
;
155 int ImmedOffset
= ARM_AM::getSOImmVal(Offset
);
156 if (ImmedOffset
== -1)
157 return false; // Probably not worth it then.
159 BuildMI(MBB
, MBBI
, TII
->get(BaseOpc
), NewBase
)
160 .addReg(Base
, false, false, BaseKill
).addImm(ImmedOffset
)
161 .addImm(Pred
).addReg(PredReg
).addReg(0);
163 BaseKill
= true; // New base is always killed right its use.
166 bool isDPR
= Opcode
== ARM::FLDD
|| Opcode
== ARM::FSTD
;
167 bool isDef
= Opcode
== ARM::LDR
|| Opcode
== ARM::FLDS
|| Opcode
== ARM::FLDD
;
168 Opcode
= getLoadStoreMultipleOpcode(Opcode
);
169 MachineInstrBuilder MIB
= (isAM4
)
170 ? BuildMI(MBB
, MBBI
, TII
->get(Opcode
)).addReg(Base
, false, false, BaseKill
)
171 .addImm(ARM_AM::getAM4ModeImm(Mode
)).addImm(Pred
).addReg(PredReg
)
172 : BuildMI(MBB
, MBBI
, TII
->get(Opcode
)).addReg(Base
, false, false, BaseKill
)
173 .addImm(ARM_AM::getAM5Opc(Mode
, false, isDPR
? NumRegs
<<1 : NumRegs
))
174 .addImm(Pred
).addReg(PredReg
);
175 for (unsigned i
= 0; i
!= NumRegs
; ++i
)
176 MIB
= MIB
.addReg(Regs
[i
].first
, isDef
, false, Regs
[i
].second
);
181 /// MergeLDR_STR - Merge a number of load / store instructions into one or more
182 /// load / store multiple instructions.
183 SmallVector
<MachineBasicBlock::iterator
, 4>
184 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock
&MBB
, unsigned SIndex
,
185 unsigned Base
, int Opcode
, unsigned Size
,
186 ARMCC::CondCodes Pred
, unsigned PredReg
,
187 unsigned Scratch
, MemOpQueue
&MemOps
) {
188 SmallVector
<MachineBasicBlock::iterator
, 4> Merges
;
189 bool isAM4
= Opcode
== ARM::LDR
|| Opcode
== ARM::STR
;
190 int Offset
= MemOps
[SIndex
].Offset
;
191 int SOffset
= Offset
;
192 unsigned Pos
= MemOps
[SIndex
].Position
;
193 MachineBasicBlock::iterator Loc
= MemOps
[SIndex
].MBBI
;
194 unsigned PReg
= MemOps
[SIndex
].MBBI
->getOperand(0).getReg();
195 unsigned PRegNum
= ARMRegisterInfo::getRegisterNumbering(PReg
);
196 bool isKill
= MemOps
[SIndex
].MBBI
->getOperand(0).isKill();
198 SmallVector
<std::pair
<unsigned,bool>, 8> Regs
;
199 Regs
.push_back(std::make_pair(PReg
, isKill
));
200 for (unsigned i
= SIndex
+1, e
= MemOps
.size(); i
!= e
; ++i
) {
201 int NewOffset
= MemOps
[i
].Offset
;
202 unsigned Reg
= MemOps
[i
].MBBI
->getOperand(0).getReg();
203 unsigned RegNum
= ARMRegisterInfo::getRegisterNumbering(Reg
);
204 isKill
= MemOps
[i
].MBBI
->getOperand(0).isKill();
205 // AM4 - register numbers in ascending order.
206 // AM5 - consecutive register numbers in ascending order.
207 if (NewOffset
== Offset
+ (int)Size
&&
208 ((isAM4
&& RegNum
> PRegNum
) || RegNum
== PRegNum
+1)) {
210 Regs
.push_back(std::make_pair(Reg
, isKill
));
213 // Can't merge this in. Try merge the earlier ones first.
214 if (mergeOps(MBB
, ++Loc
, SOffset
, Base
, false, Opcode
, Pred
, PredReg
,
215 Scratch
, Regs
, TII
)) {
216 Merges
.push_back(prior(Loc
));
217 for (unsigned j
= SIndex
; j
< i
; ++j
) {
218 MBB
.erase(MemOps
[j
].MBBI
);
219 MemOps
[j
].Merged
= true;
222 SmallVector
<MachineBasicBlock::iterator
, 4> Merges2
=
223 MergeLDR_STR(MBB
, i
, Base
, Opcode
, Size
, Pred
, PredReg
, Scratch
,MemOps
);
224 Merges
.append(Merges2
.begin(), Merges2
.end());
228 if (MemOps
[i
].Position
> Pos
) {
229 Pos
= MemOps
[i
].Position
;
230 Loc
= MemOps
[i
].MBBI
;
234 bool BaseKill
= Loc
->findRegisterUseOperandIdx(Base
, true) != -1;
235 if (mergeOps(MBB
, ++Loc
, SOffset
, Base
, BaseKill
, Opcode
, Pred
, PredReg
,
236 Scratch
, Regs
, TII
)) {
237 Merges
.push_back(prior(Loc
));
238 for (unsigned i
= SIndex
, e
= MemOps
.size(); i
!= e
; ++i
) {
239 MBB
.erase(MemOps
[i
].MBBI
);
240 MemOps
[i
].Merged
= true;
247 /// getInstrPredicate - If instruction is predicated, returns its predicate
248 /// condition, otherwise returns AL. It also returns the condition code
249 /// register by reference.
250 static ARMCC::CondCodes
getInstrPredicate(MachineInstr
*MI
, unsigned &PredReg
) {
251 int PIdx
= MI
->findFirstPredOperandIdx();
257 PredReg
= MI
->getOperand(PIdx
+1).getReg();
258 return (ARMCC::CondCodes
)MI
->getOperand(PIdx
).getImm();
261 static inline bool isMatchingDecrement(MachineInstr
*MI
, unsigned Base
,
262 unsigned Bytes
, ARMCC::CondCodes Pred
,
264 unsigned MyPredReg
= 0;
265 return (MI
&& MI
->getOpcode() == ARM::SUBri
&&
266 MI
->getOperand(0).getReg() == Base
&&
267 MI
->getOperand(1).getReg() == Base
&&
268 ARM_AM::getAM2Offset(MI
->getOperand(2).getImm()) == Bytes
&&
269 getInstrPredicate(MI
, MyPredReg
) == Pred
&&
270 MyPredReg
== PredReg
);
273 static inline bool isMatchingIncrement(MachineInstr
*MI
, unsigned Base
,
274 unsigned Bytes
, ARMCC::CondCodes Pred
,
276 unsigned MyPredReg
= 0;
277 return (MI
&& MI
->getOpcode() == ARM::ADDri
&&
278 MI
->getOperand(0).getReg() == Base
&&
279 MI
->getOperand(1).getReg() == Base
&&
280 ARM_AM::getAM2Offset(MI
->getOperand(2).getImm()) == Bytes
&&
281 getInstrPredicate(MI
, MyPredReg
) == Pred
&&
282 MyPredReg
== PredReg
);
285 static inline unsigned getLSMultipleTransferSize(MachineInstr
*MI
) {
286 switch (MI
->getOpcode()) {
298 return (MI
->getNumOperands() - 4) * 4;
303 return ARM_AM::getAM5Offset(MI
->getOperand(1).getImm()) * 4;
307 /// mergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
308 /// register into the LDM/STM/FLDM{D|S}/FSTM{D|S} op when possible:
310 /// stmia rn, <ra, rb, rc>
311 /// rn := rn + 4 * 3;
313 /// stmia rn!, <ra, rb, rc>
315 /// rn := rn - 4 * 3;
316 /// ldmia rn, <ra, rb, rc>
318 /// ldmdb rn!, <ra, rb, rc>
319 static bool mergeBaseUpdateLSMultiple(MachineBasicBlock
&MBB
,
320 MachineBasicBlock::iterator MBBI
,
322 MachineBasicBlock::iterator
&I
) {
323 MachineInstr
*MI
= MBBI
;
324 unsigned Base
= MI
->getOperand(0).getReg();
325 unsigned Bytes
= getLSMultipleTransferSize(MI
);
326 unsigned PredReg
= 0;
327 ARMCC::CondCodes Pred
= getInstrPredicate(MI
, PredReg
);
328 int Opcode
= MI
->getOpcode();
329 bool isAM4
= Opcode
== ARM::LDM
|| Opcode
== ARM::STM
;
332 if (ARM_AM::getAM4WBFlag(MI
->getOperand(1).getImm()))
335 // Can't use the updating AM4 sub-mode if the base register is also a dest
336 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
337 for (unsigned i
= 3, e
= MI
->getNumOperands(); i
!= e
; ++i
) {
338 if (MI
->getOperand(i
).getReg() == Base
)
342 ARM_AM::AMSubMode Mode
= ARM_AM::getAM4SubMode(MI
->getOperand(1).getImm());
343 if (MBBI
!= MBB
.begin()) {
344 MachineBasicBlock::iterator PrevMBBI
= prior(MBBI
);
345 if (Mode
== ARM_AM::ia
&&
346 isMatchingDecrement(PrevMBBI
, Base
, Bytes
, Pred
, PredReg
)) {
347 MI
->getOperand(1).setImm(ARM_AM::getAM4ModeImm(ARM_AM::db
, true));
350 } else if (Mode
== ARM_AM::ib
&&
351 isMatchingDecrement(PrevMBBI
, Base
, Bytes
, Pred
, PredReg
)) {
352 MI
->getOperand(1).setImm(ARM_AM::getAM4ModeImm(ARM_AM::da
, true));
358 if (MBBI
!= MBB
.end()) {
359 MachineBasicBlock::iterator NextMBBI
= next(MBBI
);
360 if ((Mode
== ARM_AM::ia
|| Mode
== ARM_AM::ib
) &&
361 isMatchingIncrement(NextMBBI
, Base
, Bytes
, Pred
, PredReg
)) {
362 MI
->getOperand(1).setImm(ARM_AM::getAM4ModeImm(Mode
, true));
369 } else if ((Mode
== ARM_AM::da
|| Mode
== ARM_AM::db
) &&
370 isMatchingDecrement(NextMBBI
, Base
, Bytes
, Pred
, PredReg
)) {
371 MI
->getOperand(1).setImm(ARM_AM::getAM4ModeImm(Mode
, true));
381 // FLDM{D|S}, FSTM{D|S} addressing mode 5 ops.
382 if (ARM_AM::getAM5WBFlag(MI
->getOperand(1).getImm()))
385 ARM_AM::AMSubMode Mode
= ARM_AM::getAM5SubMode(MI
->getOperand(1).getImm());
386 unsigned Offset
= ARM_AM::getAM5Offset(MI
->getOperand(1).getImm());
387 if (MBBI
!= MBB
.begin()) {
388 MachineBasicBlock::iterator PrevMBBI
= prior(MBBI
);
389 if (Mode
== ARM_AM::ia
&&
390 isMatchingDecrement(PrevMBBI
, Base
, Bytes
, Pred
, PredReg
)) {
391 MI
->getOperand(1).setImm(ARM_AM::getAM5Opc(ARM_AM::db
, true, Offset
));
397 if (MBBI
!= MBB
.end()) {
398 MachineBasicBlock::iterator NextMBBI
= next(MBBI
);
399 if (Mode
== ARM_AM::ia
&&
400 isMatchingIncrement(NextMBBI
, Base
, Bytes
, Pred
, PredReg
)) {
401 MI
->getOperand(1).setImm(ARM_AM::getAM5Opc(ARM_AM::ia
, true, Offset
));
415 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc
) {
417 case ARM::LDR
: return ARM::LDR_PRE
;
418 case ARM::STR
: return ARM::STR_PRE
;
419 case ARM::FLDS
: return ARM::FLDMS
;
420 case ARM::FLDD
: return ARM::FLDMD
;
421 case ARM::FSTS
: return ARM::FSTMS
;
422 case ARM::FSTD
: return ARM::FSTMD
;
428 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc
) {
430 case ARM::LDR
: return ARM::LDR_POST
;
431 case ARM::STR
: return ARM::STR_POST
;
432 case ARM::FLDS
: return ARM::FLDMS
;
433 case ARM::FLDD
: return ARM::FLDMD
;
434 case ARM::FSTS
: return ARM::FSTMS
;
435 case ARM::FSTD
: return ARM::FSTMD
;
441 /// mergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
442 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
443 static bool mergeBaseUpdateLoadStore(MachineBasicBlock
&MBB
,
444 MachineBasicBlock::iterator MBBI
,
445 const TargetInstrInfo
*TII
,
447 MachineBasicBlock::iterator
&I
) {
448 MachineInstr
*MI
= MBBI
;
449 unsigned Base
= MI
->getOperand(1).getReg();
450 bool BaseKill
= MI
->getOperand(1).isKill();
451 unsigned Bytes
= getLSMultipleTransferSize(MI
);
452 int Opcode
= MI
->getOpcode();
453 bool isAM2
= Opcode
== ARM::LDR
|| Opcode
== ARM::STR
;
454 if ((isAM2
&& ARM_AM::getAM2Offset(MI
->getOperand(3).getImm()) != 0) ||
455 (!isAM2
&& ARM_AM::getAM5Offset(MI
->getOperand(2).getImm()) != 0))
458 bool isLd
= Opcode
== ARM::LDR
|| Opcode
== ARM::FLDS
|| Opcode
== ARM::FLDD
;
459 // Can't do the merge if the destination register is the same as the would-be
460 // writeback register.
461 if (isLd
&& MI
->getOperand(0).getReg() == Base
)
464 unsigned PredReg
= 0;
465 ARMCC::CondCodes Pred
= getInstrPredicate(MI
, PredReg
);
466 bool DoMerge
= false;
467 ARM_AM::AddrOpc AddSub
= ARM_AM::add
;
469 if (MBBI
!= MBB
.begin()) {
470 MachineBasicBlock::iterator PrevMBBI
= prior(MBBI
);
471 if (isMatchingDecrement(PrevMBBI
, Base
, Bytes
, Pred
, PredReg
)) {
473 AddSub
= ARM_AM::sub
;
474 NewOpc
= getPreIndexedLoadStoreOpcode(Opcode
);
475 } else if (isAM2
&& isMatchingIncrement(PrevMBBI
, Base
, Bytes
,
478 NewOpc
= getPreIndexedLoadStoreOpcode(Opcode
);
484 if (!DoMerge
&& MBBI
!= MBB
.end()) {
485 MachineBasicBlock::iterator NextMBBI
= next(MBBI
);
486 if (isAM2
&& isMatchingDecrement(NextMBBI
, Base
, Bytes
, Pred
, PredReg
)) {
488 AddSub
= ARM_AM::sub
;
489 NewOpc
= getPostIndexedLoadStoreOpcode(Opcode
);
490 } else if (isMatchingIncrement(NextMBBI
, Base
, Bytes
, Pred
, PredReg
)) {
492 NewOpc
= getPostIndexedLoadStoreOpcode(Opcode
);
506 bool isDPR
= NewOpc
== ARM::FLDMD
|| NewOpc
== ARM::FSTMD
;
507 unsigned Offset
= isAM2
? ARM_AM::getAM2Opc(AddSub
, Bytes
, ARM_AM::no_shift
)
508 : ARM_AM::getAM5Opc((AddSub
== ARM_AM::sub
) ? ARM_AM::db
: ARM_AM::ia
,
509 true, isDPR
? 2 : 1);
512 // LDR_PRE, LDR_POST;
513 BuildMI(MBB
, MBBI
, TII
->get(NewOpc
), MI
->getOperand(0).getReg())
515 .addReg(Base
).addReg(0).addImm(Offset
).addImm(Pred
).addReg(PredReg
);
518 BuildMI(MBB
, MBBI
, TII
->get(NewOpc
)).addReg(Base
, false, false, BaseKill
)
519 .addImm(Offset
).addImm(Pred
).addReg(PredReg
)
520 .addReg(MI
->getOperand(0).getReg(), true);
522 MachineOperand
&MO
= MI
->getOperand(0);
524 // STR_PRE, STR_POST;
525 BuildMI(MBB
, MBBI
, TII
->get(NewOpc
), Base
)
526 .addReg(MO
.getReg(), false, false, MO
.isKill())
527 .addReg(Base
).addReg(0).addImm(Offset
).addImm(Pred
).addReg(PredReg
);
530 BuildMI(MBB
, MBBI
, TII
->get(NewOpc
)).addReg(Base
).addImm(Offset
)
531 .addImm(Pred
).addReg(PredReg
)
532 .addReg(MO
.getReg(), false, false, MO
.isKill());
539 /// isMemoryOp - Returns true if instruction is a memory operations (that this
540 /// pass is capable of operating on).
541 static bool isMemoryOp(MachineInstr
*MI
) {
542 int Opcode
= MI
->getOpcode();
547 return MI
->getOperand(1).isRegister() && MI
->getOperand(2).getReg() == 0;
550 return MI
->getOperand(1).isRegister();
553 return MI
->getOperand(1).isRegister();
558 /// AdvanceRS - Advance register scavenger to just before the earliest memory
559 /// op that is being merged.
560 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock
&MBB
, MemOpQueue
&MemOps
) {
561 MachineBasicBlock::iterator Loc
= MemOps
[0].MBBI
;
562 unsigned Position
= MemOps
[0].Position
;
563 for (unsigned i
= 1, e
= MemOps
.size(); i
!= e
; ++i
) {
564 if (MemOps
[i
].Position
< Position
) {
565 Position
= MemOps
[i
].Position
;
566 Loc
= MemOps
[i
].MBBI
;
570 if (Loc
!= MBB
.begin())
571 RS
->forward(prior(Loc
));
574 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
575 /// ops of the same base and incrementing offset into LDM / STM ops.
576 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock
&MBB
) {
577 unsigned NumMerges
= 0;
578 unsigned NumMemOps
= 0;
580 unsigned CurrBase
= 0;
582 unsigned CurrSize
= 0;
583 ARMCC::CondCodes CurrPred
= ARMCC::AL
;
584 unsigned CurrPredReg
= 0;
585 unsigned Position
= 0;
587 RS
->enterBasicBlock(&MBB
);
588 MachineBasicBlock::iterator MBBI
= MBB
.begin(), E
= MBB
.end();
590 bool Advance
= false;
591 bool TryMerge
= false;
592 bool Clobber
= false;
594 bool isMemOp
= isMemoryOp(MBBI
);
596 int Opcode
= MBBI
->getOpcode();
597 bool isAM2
= Opcode
== ARM::LDR
|| Opcode
== ARM::STR
;
598 unsigned Size
= getLSMultipleTransferSize(MBBI
);
599 unsigned Base
= MBBI
->getOperand(1).getReg();
600 unsigned PredReg
= 0;
601 ARMCC::CondCodes Pred
= getInstrPredicate(MBBI
, PredReg
);
602 unsigned NumOperands
= MBBI
->getDesc().getNumOperands();
603 unsigned OffField
= MBBI
->getOperand(NumOperands
-3).getImm();
605 ? ARM_AM::getAM2Offset(OffField
) : ARM_AM::getAM5Offset(OffField
) * 4;
607 if (ARM_AM::getAM2Op(OffField
) == ARM_AM::sub
)
610 if (ARM_AM::getAM5Op(OffField
) == ARM_AM::sub
)
615 // r5 := ldr [r5, #4]
616 // r6 := ldr [r5, #8]
618 // The second ldr has effectively broken the chain even though it
619 // looks like the later ldr(s) use the same base register. Try to
620 // merge the ldr's so far, including this one. But don't try to
621 // combine the following ldr(s).
622 Clobber
= (Opcode
== ARM::LDR
&& Base
== MBBI
->getOperand(0).getReg());
623 if (CurrBase
== 0 && !Clobber
) {
624 // Start of a new chain.
629 CurrPredReg
= PredReg
;
630 MemOps
.push_back(MemOpQueueEntry(Offset
, Position
, MBBI
));
639 if (CurrOpc
== Opcode
&& CurrBase
== Base
&& CurrPred
== Pred
) {
640 // No need to match PredReg.
641 // Continue adding to the queue.
642 if (Offset
> MemOps
.back().Offset
) {
643 MemOps
.push_back(MemOpQueueEntry(Offset
, Position
, MBBI
));
647 for (MemOpQueueIter I
= MemOps
.begin(), E
= MemOps
.end();
649 if (Offset
< I
->Offset
) {
650 MemOps
.insert(I
, MemOpQueueEntry(Offset
, Position
, MBBI
));
654 } else if (Offset
== I
->Offset
) {
655 // Collision! This can't be merged!
672 // Try to find a free register to use as a new base in case it's needed.
673 // First advance to the instruction just before the start of the chain.
674 AdvanceRS(MBB
, MemOps
);
675 // Find a scratch register. Make sure it's a call clobbered register or
676 // a spilled callee-saved register.
677 unsigned Scratch
= RS
->FindUnusedReg(&ARM::GPRRegClass
, true);
679 Scratch
= RS
->FindUnusedReg(&ARM::GPRRegClass
,
680 AFI
->getSpilledCSRegisters());
681 // Process the load / store instructions.
682 RS
->forward(prior(MBBI
));
685 SmallVector
<MachineBasicBlock::iterator
,4> MBBII
=
686 MergeLDR_STR(MBB
, 0, CurrBase
, CurrOpc
, CurrSize
,
687 CurrPred
, CurrPredReg
, Scratch
, MemOps
);
689 // Try folding preceeding/trailing base inc/dec into the generated
691 for (unsigned i
= 0, e
= MBBII
.size(); i
< e
; ++i
)
692 if (mergeBaseUpdateLSMultiple(MBB
, MBBII
[i
], Advance
, MBBI
))
694 NumMerges
+= MBBII
.size();
696 // Try folding preceeding/trailing base inc/dec into those load/store
697 // that were not merged to form LDM/STM ops.
698 for (unsigned i
= 0; i
!= NumMemOps
; ++i
)
699 if (!MemOps
[i
].Merged
)
700 if (mergeBaseUpdateLoadStore(MBB
, MemOps
[i
].MBBI
, TII
,Advance
,MBBI
))
703 // RS may be pointing to an instruction that's deleted.
704 RS
->skipTo(prior(MBBI
));
710 CurrPred
= ARMCC::AL
;
717 // If iterator hasn't been advanced and this is not a memory op, skip it.
718 // It can't start a new chain anyway.
719 if (!Advance
&& !isMemOp
&& MBBI
!= E
) {
725 return NumMerges
> 0;
728 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return op
729 /// (bx lr) into the preceeding stack restore so it directly restore the value
731 /// ldmfd sp!, {r7, lr}
734 /// ldmfd sp!, {r7, pc}
735 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock
&MBB
) {
736 if (MBB
.empty()) return false;
738 MachineBasicBlock::iterator MBBI
= prior(MBB
.end());
739 if (MBBI
->getOpcode() == ARM::BX_RET
&& MBBI
!= MBB
.begin()) {
740 MachineInstr
*PrevMI
= prior(MBBI
);
741 if (PrevMI
->getOpcode() == ARM::LDM
) {
742 MachineOperand
&MO
= PrevMI
->getOperand(PrevMI
->getNumOperands()-1);
743 if (MO
.getReg() == ARM::LR
) {
744 PrevMI
->setDesc(TII
->get(ARM::LDM_RET
));
754 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction
&Fn
) {
755 const TargetMachine
&TM
= Fn
.getTarget();
756 AFI
= Fn
.getInfo
<ARMFunctionInfo
>();
757 TII
= TM
.getInstrInfo();
758 MRI
= TM
.getRegisterInfo();
759 RS
= new RegScavenger();
761 bool Modified
= false;
762 for (MachineFunction::iterator MFI
= Fn
.begin(), E
= Fn
.end(); MFI
!= E
;
764 MachineBasicBlock
&MBB
= *MFI
;
765 Modified
|= LoadStoreMultipleOpti(MBB
);
766 Modified
|= MergeReturnIntoLDM(MBB
);