1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains a pass that performs load / store related peephole
11 // optimizations. This pass should be run after register allocation.
13 //===----------------------------------------------------------------------===//
15 #define DEBUG_TYPE "arm-ldst-opt"
17 #include "ARMAddressingModes.h"
18 #include "ARMBaseInstrInfo.h"
19 #include "ARMMachineFunctionInfo.h"
20 #include "ARMRegisterInfo.h"
21 #include "llvm/DerivedTypes.h"
22 #include "llvm/Function.h"
23 #include "llvm/CodeGen/MachineBasicBlock.h"
24 #include "llvm/CodeGen/MachineFunctionPass.h"
25 #include "llvm/CodeGen/MachineInstr.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineRegisterInfo.h"
28 #include "llvm/CodeGen/RegisterScavenging.h"
29 #include "llvm/Target/TargetData.h"
30 #include "llvm/Target/TargetInstrInfo.h"
31 #include "llvm/Target/TargetMachine.h"
32 #include "llvm/Target/TargetRegisterInfo.h"
33 #include "llvm/Support/Compiler.h"
34 #include "llvm/Support/ErrorHandling.h"
35 #include "llvm/ADT/DenseMap.h"
36 #include "llvm/ADT/STLExtras.h"
37 #include "llvm/ADT/SmallPtrSet.h"
38 #include "llvm/ADT/SmallSet.h"
39 #include "llvm/ADT/SmallVector.h"
40 #include "llvm/ADT/Statistic.h"
43 STATISTIC(NumLDMGened
, "Number of ldm instructions generated");
44 STATISTIC(NumSTMGened
, "Number of stm instructions generated");
45 STATISTIC(NumFLDMGened
, "Number of fldm instructions generated");
46 STATISTIC(NumFSTMGened
, "Number of fstm instructions generated");
47 STATISTIC(NumLdStMoved
, "Number of load / store instructions moved");
48 STATISTIC(NumLDRDFormed
,"Number of ldrd created before allocation");
49 STATISTIC(NumSTRDFormed
,"Number of strd created before allocation");
50 STATISTIC(NumLDRD2LDM
, "Number of ldrd instructions turned back into ldm");
51 STATISTIC(NumSTRD2STM
, "Number of strd instructions turned back into stm");
52 STATISTIC(NumLDRD2LDR
, "Number of ldrd instructions turned back into ldr's");
53 STATISTIC(NumSTRD2STR
, "Number of strd instructions turned back into str's");
55 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
56 /// load / store instructions to form ldm / stm instructions.
59 struct VISIBILITY_HIDDEN ARMLoadStoreOpt
: public MachineFunctionPass
{
61 ARMLoadStoreOpt() : MachineFunctionPass(&ID
) {}
63 const TargetInstrInfo
*TII
;
64 const TargetRegisterInfo
*TRI
;
69 virtual bool runOnMachineFunction(MachineFunction
&Fn
);
71 virtual const char *getPassName() const {
72 return "ARM load / store optimization pass";
76 struct MemOpQueueEntry
{
79 MachineBasicBlock::iterator MBBI
;
81 MemOpQueueEntry(int o
, int p
, MachineBasicBlock::iterator i
)
82 : Offset(o
), Position(p
), MBBI(i
), Merged(false) {};
84 typedef SmallVector
<MemOpQueueEntry
,8> MemOpQueue
;
85 typedef MemOpQueue::iterator MemOpQueueIter
;
87 bool MergeOps(MachineBasicBlock
&MBB
, MachineBasicBlock::iterator MBBI
,
88 int Offset
, unsigned Base
, bool BaseKill
, int Opcode
,
89 ARMCC::CondCodes Pred
, unsigned PredReg
, unsigned Scratch
,
90 DebugLoc dl
, SmallVector
<std::pair
<unsigned, bool>, 8> &Regs
);
91 void MergeLDR_STR(MachineBasicBlock
&MBB
, unsigned SIndex
, unsigned Base
,
92 int Opcode
, unsigned Size
,
93 ARMCC::CondCodes Pred
, unsigned PredReg
,
94 unsigned Scratch
, MemOpQueue
&MemOps
,
95 SmallVector
<MachineBasicBlock::iterator
, 4> &Merges
);
97 void AdvanceRS(MachineBasicBlock
&MBB
, MemOpQueue
&MemOps
);
98 bool FixInvalidRegPairOp(MachineBasicBlock
&MBB
,
99 MachineBasicBlock::iterator
&MBBI
);
100 bool MergeBaseUpdateLoadStore(MachineBasicBlock
&MBB
,
101 MachineBasicBlock::iterator MBBI
,
102 const TargetInstrInfo
*TII
,
104 MachineBasicBlock::iterator
&I
);
105 bool MergeBaseUpdateLSMultiple(MachineBasicBlock
&MBB
,
106 MachineBasicBlock::iterator MBBI
,
108 MachineBasicBlock::iterator
&I
);
109 bool LoadStoreMultipleOpti(MachineBasicBlock
&MBB
);
110 bool MergeReturnIntoLDM(MachineBasicBlock
&MBB
);
112 char ARMLoadStoreOpt::ID
= 0;
115 static int getLoadStoreMultipleOpcode(int Opcode
) {
143 default: llvm_unreachable("Unhandled opcode!");
148 static bool isT2i32Load(unsigned Opc
) {
149 return Opc
== ARM::t2LDRi12
|| Opc
== ARM::t2LDRi8
;
152 static bool isi32Load(unsigned Opc
) {
153 return Opc
== ARM::LDR
|| isT2i32Load(Opc
);
156 static bool isT2i32Store(unsigned Opc
) {
157 return Opc
== ARM::t2STRi12
|| Opc
== ARM::t2STRi8
;
160 static bool isi32Store(unsigned Opc
) {
161 return Opc
== ARM::STR
|| isT2i32Store(Opc
);
164 /// MergeOps - Create and insert a LDM or STM with Base as base register and
165 /// registers in Regs as the register operands that would be loaded / stored.
166 /// It returns true if the transformation is done.
168 ARMLoadStoreOpt::MergeOps(MachineBasicBlock
&MBB
,
169 MachineBasicBlock::iterator MBBI
,
170 int Offset
, unsigned Base
, bool BaseKill
,
171 int Opcode
, ARMCC::CondCodes Pred
,
172 unsigned PredReg
, unsigned Scratch
, DebugLoc dl
,
173 SmallVector
<std::pair
<unsigned, bool>, 8> &Regs
) {
174 // Only a single register to load / store. Don't bother.
175 unsigned NumRegs
= Regs
.size();
179 ARM_AM::AMSubMode Mode
= ARM_AM::ia
;
180 bool isAM4
= isi32Load(Opcode
) || isi32Store(Opcode
);
181 if (isAM4
&& Offset
== 4) {
183 // Thumb2 does not support ldmib / stmib.
186 } else if (isAM4
&& Offset
== -4 * (int)NumRegs
+ 4) {
188 // Thumb2 does not support ldmda / stmda.
191 } else if (isAM4
&& Offset
== -4 * (int)NumRegs
) {
193 } else if (Offset
!= 0) {
194 // If starting offset isn't zero, insert a MI to materialize a new base.
195 // But only do so if it is cost effective, i.e. merging more than two
201 if (isi32Load(Opcode
))
202 // If it is a load, then just use one of the destination register to
203 // use as the new base.
204 NewBase
= Regs
[NumRegs
-1].first
;
206 // Use the scratch register to use as a new base.
211 int BaseOpc
= !isThumb2
213 : ((Base
== ARM::SP
) ? ARM::t2ADDrSPi
: ARM::t2ADDri
);
217 : ((Base
== ARM::SP
) ? ARM::t2SUBrSPi
: ARM::t2SUBri
);
220 int ImmedOffset
= isThumb2
221 ? ARM_AM::getT2SOImmVal(Offset
) : ARM_AM::getSOImmVal(Offset
);
222 if (ImmedOffset
== -1)
223 // FIXME: Try t2ADDri12 or t2SUBri12?
224 return false; // Probably not worth it then.
226 BuildMI(MBB
, MBBI
, dl
, TII
->get(BaseOpc
), NewBase
)
227 .addReg(Base
, getKillRegState(BaseKill
)).addImm(Offset
)
228 .addImm(Pred
).addReg(PredReg
).addReg(0);
230 BaseKill
= true; // New base is always killed right its use.
233 bool isDPR
= Opcode
== ARM::FLDD
|| Opcode
== ARM::FSTD
;
234 bool isDef
= isi32Load(Opcode
) || Opcode
== ARM::FLDS
|| Opcode
== ARM::FLDD
;
235 Opcode
= getLoadStoreMultipleOpcode(Opcode
);
236 MachineInstrBuilder MIB
= (isAM4
)
237 ? BuildMI(MBB
, MBBI
, dl
, TII
->get(Opcode
))
238 .addReg(Base
, getKillRegState(BaseKill
))
239 .addImm(ARM_AM::getAM4ModeImm(Mode
)).addImm(Pred
).addReg(PredReg
)
240 : BuildMI(MBB
, MBBI
, dl
, TII
->get(Opcode
))
241 .addReg(Base
, getKillRegState(BaseKill
))
242 .addImm(ARM_AM::getAM5Opc(Mode
, false, isDPR
? NumRegs
<<1 : NumRegs
))
243 .addImm(Pred
).addReg(PredReg
);
244 for (unsigned i
= 0; i
!= NumRegs
; ++i
)
245 MIB
= MIB
.addReg(Regs
[i
].first
, getDefRegState(isDef
)
246 | getKillRegState(Regs
[i
].second
));
251 /// MergeLDR_STR - Merge a number of load / store instructions into one or more
252 /// load / store multiple instructions.
254 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock
&MBB
, unsigned SIndex
,
255 unsigned Base
, int Opcode
, unsigned Size
,
256 ARMCC::CondCodes Pred
, unsigned PredReg
,
257 unsigned Scratch
, MemOpQueue
&MemOps
,
258 SmallVector
<MachineBasicBlock::iterator
, 4> &Merges
) {
259 bool isAM4
= isi32Load(Opcode
) || isi32Store(Opcode
);
260 int Offset
= MemOps
[SIndex
].Offset
;
261 int SOffset
= Offset
;
262 unsigned Pos
= MemOps
[SIndex
].Position
;
263 MachineBasicBlock::iterator Loc
= MemOps
[SIndex
].MBBI
;
264 DebugLoc dl
= Loc
->getDebugLoc();
265 unsigned PReg
= Loc
->getOperand(0).getReg();
266 unsigned PRegNum
= ARMRegisterInfo::getRegisterNumbering(PReg
);
267 bool isKill
= Loc
->getOperand(0).isKill();
269 SmallVector
<std::pair
<unsigned,bool>, 8> Regs
;
270 Regs
.push_back(std::make_pair(PReg
, isKill
));
271 for (unsigned i
= SIndex
+1, e
= MemOps
.size(); i
!= e
; ++i
) {
272 int NewOffset
= MemOps
[i
].Offset
;
273 unsigned Reg
= MemOps
[i
].MBBI
->getOperand(0).getReg();
274 unsigned RegNum
= ARMRegisterInfo::getRegisterNumbering(Reg
);
275 isKill
= MemOps
[i
].MBBI
->getOperand(0).isKill();
276 // AM4 - register numbers in ascending order.
277 // AM5 - consecutive register numbers in ascending order.
278 if (NewOffset
== Offset
+ (int)Size
&&
279 ((isAM4
&& RegNum
> PRegNum
) || RegNum
== PRegNum
+1)) {
281 Regs
.push_back(std::make_pair(Reg
, isKill
));
284 // Can't merge this in. Try merge the earlier ones first.
285 if (MergeOps(MBB
, ++Loc
, SOffset
, Base
, false, Opcode
, Pred
, PredReg
,
286 Scratch
, dl
, Regs
)) {
287 Merges
.push_back(prior(Loc
));
288 for (unsigned j
= SIndex
; j
< i
; ++j
) {
289 MBB
.erase(MemOps
[j
].MBBI
);
290 MemOps
[j
].Merged
= true;
293 MergeLDR_STR(MBB
, i
, Base
, Opcode
, Size
, Pred
, PredReg
, Scratch
,
298 if (MemOps
[i
].Position
> Pos
) {
299 Pos
= MemOps
[i
].Position
;
300 Loc
= MemOps
[i
].MBBI
;
304 bool BaseKill
= Loc
->findRegisterUseOperandIdx(Base
, true) != -1;
305 if (MergeOps(MBB
, ++Loc
, SOffset
, Base
, BaseKill
, Opcode
, Pred
, PredReg
,
306 Scratch
, dl
, Regs
)) {
307 Merges
.push_back(prior(Loc
));
308 for (unsigned i
= SIndex
, e
= MemOps
.size(); i
!= e
; ++i
) {
309 MBB
.erase(MemOps
[i
].MBBI
);
310 MemOps
[i
].Merged
= true;
317 static inline bool isMatchingDecrement(MachineInstr
*MI
, unsigned Base
,
318 unsigned Bytes
, unsigned Limit
,
319 ARMCC::CondCodes Pred
, unsigned PredReg
){
320 unsigned MyPredReg
= 0;
323 if (MI
->getOpcode() != ARM::t2SUBri
&&
324 MI
->getOpcode() != ARM::t2SUBrSPi
&&
325 MI
->getOpcode() != ARM::t2SUBrSPi12
&&
326 MI
->getOpcode() != ARM::tSUBspi
&&
327 MI
->getOpcode() != ARM::SUBri
)
330 // Make sure the offset fits in 8 bits.
331 if (Bytes
<= 0 || (Limit
&& Bytes
>= Limit
))
334 unsigned Scale
= (MI
->getOpcode() == ARM::tSUBspi
) ? 4 : 1; // FIXME
335 return (MI
->getOperand(0).getReg() == Base
&&
336 MI
->getOperand(1).getReg() == Base
&&
337 (MI
->getOperand(2).getImm()*Scale
) == Bytes
&&
338 llvm::getInstrPredicate(MI
, MyPredReg
) == Pred
&&
339 MyPredReg
== PredReg
);
342 static inline bool isMatchingIncrement(MachineInstr
*MI
, unsigned Base
,
343 unsigned Bytes
, unsigned Limit
,
344 ARMCC::CondCodes Pred
, unsigned PredReg
){
345 unsigned MyPredReg
= 0;
348 if (MI
->getOpcode() != ARM::t2ADDri
&&
349 MI
->getOpcode() != ARM::t2ADDrSPi
&&
350 MI
->getOpcode() != ARM::t2ADDrSPi12
&&
351 MI
->getOpcode() != ARM::tADDspi
&&
352 MI
->getOpcode() != ARM::ADDri
)
355 if (Bytes
<= 0 || (Limit
&& Bytes
>= Limit
))
356 // Make sure the offset fits in 8 bits.
359 unsigned Scale
= (MI
->getOpcode() == ARM::tADDspi
) ? 4 : 1; // FIXME
360 return (MI
->getOperand(0).getReg() == Base
&&
361 MI
->getOperand(1).getReg() == Base
&&
362 (MI
->getOperand(2).getImm()*Scale
) == Bytes
&&
363 llvm::getInstrPredicate(MI
, MyPredReg
) == Pred
&&
364 MyPredReg
== PredReg
);
367 static inline unsigned getLSMultipleTransferSize(MachineInstr
*MI
) {
368 switch (MI
->getOpcode()) {
386 return (MI
->getNumOperands() - 4) * 4;
391 return ARM_AM::getAM5Offset(MI
->getOperand(1).getImm()) * 4;
395 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
396 /// register into the LDM/STM/FLDM{D|S}/FSTM{D|S} op when possible:
398 /// stmia rn, <ra, rb, rc>
399 /// rn := rn + 4 * 3;
401 /// stmia rn!, <ra, rb, rc>
403 /// rn := rn - 4 * 3;
404 /// ldmia rn, <ra, rb, rc>
406 /// ldmdb rn!, <ra, rb, rc>
407 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock
&MBB
,
408 MachineBasicBlock::iterator MBBI
,
410 MachineBasicBlock::iterator
&I
) {
411 MachineInstr
*MI
= MBBI
;
412 unsigned Base
= MI
->getOperand(0).getReg();
413 unsigned Bytes
= getLSMultipleTransferSize(MI
);
414 unsigned PredReg
= 0;
415 ARMCC::CondCodes Pred
= llvm::getInstrPredicate(MI
, PredReg
);
416 int Opcode
= MI
->getOpcode();
417 bool isAM4
= Opcode
== ARM::LDM
|| Opcode
== ARM::t2LDM
||
418 Opcode
== ARM::STM
|| Opcode
== ARM::t2STM
;
421 if (ARM_AM::getAM4WBFlag(MI
->getOperand(1).getImm()))
424 // Can't use the updating AM4 sub-mode if the base register is also a dest
425 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
426 for (unsigned i
= 3, e
= MI
->getNumOperands(); i
!= e
; ++i
) {
427 if (MI
->getOperand(i
).getReg() == Base
)
431 ARM_AM::AMSubMode Mode
= ARM_AM::getAM4SubMode(MI
->getOperand(1).getImm());
432 if (MBBI
!= MBB
.begin()) {
433 MachineBasicBlock::iterator PrevMBBI
= prior(MBBI
);
434 if (Mode
== ARM_AM::ia
&&
435 isMatchingDecrement(PrevMBBI
, Base
, Bytes
, 0, Pred
, PredReg
)) {
436 MI
->getOperand(1).setImm(ARM_AM::getAM4ModeImm(ARM_AM::db
, true));
439 } else if (Mode
== ARM_AM::ib
&&
440 isMatchingDecrement(PrevMBBI
, Base
, Bytes
, 0, Pred
, PredReg
)) {
441 MI
->getOperand(1).setImm(ARM_AM::getAM4ModeImm(ARM_AM::da
, true));
447 if (MBBI
!= MBB
.end()) {
448 MachineBasicBlock::iterator NextMBBI
= next(MBBI
);
449 if ((Mode
== ARM_AM::ia
|| Mode
== ARM_AM::ib
) &&
450 isMatchingIncrement(NextMBBI
, Base
, Bytes
, 0, Pred
, PredReg
)) {
451 MI
->getOperand(1).setImm(ARM_AM::getAM4ModeImm(Mode
, true));
458 } else if ((Mode
== ARM_AM::da
|| Mode
== ARM_AM::db
) &&
459 isMatchingDecrement(NextMBBI
, Base
, Bytes
, 0, Pred
, PredReg
)) {
460 MI
->getOperand(1).setImm(ARM_AM::getAM4ModeImm(Mode
, true));
470 // FLDM{D|S}, FSTM{D|S} addressing mode 5 ops.
471 if (ARM_AM::getAM5WBFlag(MI
->getOperand(1).getImm()))
474 ARM_AM::AMSubMode Mode
= ARM_AM::getAM5SubMode(MI
->getOperand(1).getImm());
475 unsigned Offset
= ARM_AM::getAM5Offset(MI
->getOperand(1).getImm());
476 if (MBBI
!= MBB
.begin()) {
477 MachineBasicBlock::iterator PrevMBBI
= prior(MBBI
);
478 if (Mode
== ARM_AM::ia
&&
479 isMatchingDecrement(PrevMBBI
, Base
, Bytes
, 0, Pred
, PredReg
)) {
480 MI
->getOperand(1).setImm(ARM_AM::getAM5Opc(ARM_AM::db
, true, Offset
));
486 if (MBBI
!= MBB
.end()) {
487 MachineBasicBlock::iterator NextMBBI
= next(MBBI
);
488 if (Mode
== ARM_AM::ia
&&
489 isMatchingIncrement(NextMBBI
, Base
, Bytes
, 0, Pred
, PredReg
)) {
490 MI
->getOperand(1).setImm(ARM_AM::getAM5Opc(ARM_AM::ia
, true, Offset
));
504 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc
) {
506 case ARM::LDR
: return ARM::LDR_PRE
;
507 case ARM::STR
: return ARM::STR_PRE
;
508 case ARM::FLDS
: return ARM::FLDMS
;
509 case ARM::FLDD
: return ARM::FLDMD
;
510 case ARM::FSTS
: return ARM::FSTMS
;
511 case ARM::FSTD
: return ARM::FSTMD
;
514 return ARM::t2LDR_PRE
;
517 return ARM::t2STR_PRE
;
518 default: llvm_unreachable("Unhandled opcode!");
523 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc
) {
525 case ARM::LDR
: return ARM::LDR_POST
;
526 case ARM::STR
: return ARM::STR_POST
;
527 case ARM::FLDS
: return ARM::FLDMS
;
528 case ARM::FLDD
: return ARM::FLDMD
;
529 case ARM::FSTS
: return ARM::FSTMS
;
530 case ARM::FSTD
: return ARM::FSTMD
;
533 return ARM::t2LDR_POST
;
536 return ARM::t2STR_POST
;
537 default: llvm_unreachable("Unhandled opcode!");
542 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
543 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
544 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock
&MBB
,
545 MachineBasicBlock::iterator MBBI
,
546 const TargetInstrInfo
*TII
,
548 MachineBasicBlock::iterator
&I
) {
549 MachineInstr
*MI
= MBBI
;
550 unsigned Base
= MI
->getOperand(1).getReg();
551 bool BaseKill
= MI
->getOperand(1).isKill();
552 unsigned Bytes
= getLSMultipleTransferSize(MI
);
553 int Opcode
= MI
->getOpcode();
554 DebugLoc dl
= MI
->getDebugLoc();
555 bool isAM5
= Opcode
== ARM::FLDD
|| Opcode
== ARM::FLDS
||
556 Opcode
== ARM::FSTD
|| Opcode
== ARM::FSTS
;
557 bool isAM2
= Opcode
== ARM::LDR
|| Opcode
== ARM::STR
;
558 if (isAM2
&& ARM_AM::getAM2Offset(MI
->getOperand(3).getImm()) != 0)
560 else if (isAM5
&& ARM_AM::getAM5Offset(MI
->getOperand(2).getImm()) != 0)
562 else if (isT2i32Load(Opcode
) || isT2i32Store(Opcode
))
563 if (MI
->getOperand(2).getImm() != 0)
566 bool isLd
= isi32Load(Opcode
) || Opcode
== ARM::FLDS
|| Opcode
== ARM::FLDD
;
567 // Can't do the merge if the destination register is the same as the would-be
568 // writeback register.
569 if (isLd
&& MI
->getOperand(0).getReg() == Base
)
572 unsigned PredReg
= 0;
573 ARMCC::CondCodes Pred
= llvm::getInstrPredicate(MI
, PredReg
);
574 bool DoMerge
= false;
575 ARM_AM::AddrOpc AddSub
= ARM_AM::add
;
577 // AM2 - 12 bits, thumb2 - 8 bits.
578 unsigned Limit
= isAM5
? 0 : (isAM2
? 0x1000 : 0x100);
579 if (MBBI
!= MBB
.begin()) {
580 MachineBasicBlock::iterator PrevMBBI
= prior(MBBI
);
581 if (isMatchingDecrement(PrevMBBI
, Base
, Bytes
, Limit
, Pred
, PredReg
)) {
583 AddSub
= ARM_AM::sub
;
584 NewOpc
= getPreIndexedLoadStoreOpcode(Opcode
);
586 isMatchingIncrement(PrevMBBI
, Base
, Bytes
, Limit
,Pred
,PredReg
)) {
588 NewOpc
= getPreIndexedLoadStoreOpcode(Opcode
);
594 if (!DoMerge
&& MBBI
!= MBB
.end()) {
595 MachineBasicBlock::iterator NextMBBI
= next(MBBI
);
597 isMatchingDecrement(NextMBBI
, Base
, Bytes
, Limit
, Pred
, PredReg
)) {
599 AddSub
= ARM_AM::sub
;
600 NewOpc
= getPostIndexedLoadStoreOpcode(Opcode
);
601 } else if (isMatchingIncrement(NextMBBI
, Base
, Bytes
, Limit
,Pred
,PredReg
)) {
603 NewOpc
= getPostIndexedLoadStoreOpcode(Opcode
);
617 bool isDPR
= NewOpc
== ARM::FLDMD
|| NewOpc
== ARM::FSTMD
;
620 Offset
= ARM_AM::getAM5Opc((AddSub
== ARM_AM::sub
)
622 : ARM_AM::ia
, true, (isDPR
? 2 : 1));
624 Offset
= ARM_AM::getAM2Opc(AddSub
, Bytes
, ARM_AM::no_shift
);
626 Offset
= AddSub
== ARM_AM::sub
? -Bytes
: Bytes
;
630 BuildMI(MBB
, MBBI
, dl
, TII
->get(NewOpc
))
631 .addReg(Base
, getKillRegState(BaseKill
))
632 .addImm(Offset
).addImm(Pred
).addReg(PredReg
)
633 .addReg(MI
->getOperand(0).getReg(), RegState::Define
);
635 // LDR_PRE, LDR_POST,
636 BuildMI(MBB
, MBBI
, dl
, TII
->get(NewOpc
), MI
->getOperand(0).getReg())
637 .addReg(Base
, RegState::Define
)
638 .addReg(Base
).addReg(0).addImm(Offset
).addImm(Pred
).addReg(PredReg
);
640 // t2LDR_PRE, t2LDR_POST
641 BuildMI(MBB
, MBBI
, dl
, TII
->get(NewOpc
), MI
->getOperand(0).getReg())
642 .addReg(Base
, RegState::Define
)
643 .addReg(Base
).addImm(Offset
).addImm(Pred
).addReg(PredReg
);
645 MachineOperand
&MO
= MI
->getOperand(0);
648 BuildMI(MBB
, MBBI
, dl
, TII
->get(NewOpc
)).addReg(Base
).addImm(Offset
)
649 .addImm(Pred
).addReg(PredReg
)
650 .addReg(MO
.getReg(), getKillRegState(MO
.isKill()));
653 BuildMI(MBB
, MBBI
, dl
, TII
->get(NewOpc
), Base
)
654 .addReg(MO
.getReg(), getKillRegState(MO
.isKill()))
655 .addReg(Base
).addReg(0).addImm(Offset
).addImm(Pred
).addReg(PredReg
);
657 // t2STR_PRE, t2STR_POST
658 BuildMI(MBB
, MBBI
, dl
, TII
->get(NewOpc
), Base
)
659 .addReg(MO
.getReg(), getKillRegState(MO
.isKill()))
660 .addReg(Base
).addImm(Offset
).addImm(Pred
).addReg(PredReg
);
667 /// isMemoryOp - Returns true if instruction is a memory operations (that this
668 /// pass is capable of operating on).
669 static bool isMemoryOp(const MachineInstr
*MI
) {
670 int Opcode
= MI
->getOpcode();
675 return MI
->getOperand(1).isReg() && MI
->getOperand(2).getReg() == 0;
678 return MI
->getOperand(1).isReg();
681 return MI
->getOperand(1).isReg();
691 /// AdvanceRS - Advance register scavenger to just before the earliest memory
692 /// op that is being merged.
693 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock
&MBB
, MemOpQueue
&MemOps
) {
694 MachineBasicBlock::iterator Loc
= MemOps
[0].MBBI
;
695 unsigned Position
= MemOps
[0].Position
;
696 for (unsigned i
= 1, e
= MemOps
.size(); i
!= e
; ++i
) {
697 if (MemOps
[i
].Position
< Position
) {
698 Position
= MemOps
[i
].Position
;
699 Loc
= MemOps
[i
].MBBI
;
703 if (Loc
!= MBB
.begin())
704 RS
->forward(prior(Loc
));
707 static int getMemoryOpOffset(const MachineInstr
*MI
) {
708 int Opcode
= MI
->getOpcode();
709 bool isAM2
= Opcode
== ARM::LDR
|| Opcode
== ARM::STR
;
710 bool isAM3
= Opcode
== ARM::LDRD
|| Opcode
== ARM::STRD
;
711 unsigned NumOperands
= MI
->getDesc().getNumOperands();
712 unsigned OffField
= MI
->getOperand(NumOperands
-3).getImm();
714 if (Opcode
== ARM::t2LDRi12
|| Opcode
== ARM::t2LDRi8
||
715 Opcode
== ARM::t2STRi12
|| Opcode
== ARM::t2STRi8
||
716 Opcode
== ARM::t2LDRDi8
|| Opcode
== ARM::t2STRDi8
)
720 ? ARM_AM::getAM2Offset(OffField
)
721 : (isAM3
? ARM_AM::getAM3Offset(OffField
)
722 : ARM_AM::getAM5Offset(OffField
) * 4);
724 if (ARM_AM::getAM2Op(OffField
) == ARM_AM::sub
)
727 if (ARM_AM::getAM3Op(OffField
) == ARM_AM::sub
)
730 if (ARM_AM::getAM5Op(OffField
) == ARM_AM::sub
)
736 static void InsertLDR_STR(MachineBasicBlock
&MBB
,
737 MachineBasicBlock::iterator
&MBBI
,
738 int OffImm
, bool isDef
,
739 DebugLoc dl
, unsigned NewOpc
,
740 unsigned Reg
, bool RegDeadKill
,
741 unsigned BaseReg
, bool BaseKill
,
742 unsigned OffReg
, bool OffKill
,
743 ARMCC::CondCodes Pred
, unsigned PredReg
,
744 const TargetInstrInfo
*TII
) {
747 Offset
= ARM_AM::getAM2Opc(ARM_AM::sub
, -OffImm
, ARM_AM::no_shift
);
749 Offset
= ARM_AM::getAM2Opc(ARM_AM::add
, OffImm
, ARM_AM::no_shift
);
751 BuildMI(MBB
, MBBI
, MBBI
->getDebugLoc(), TII
->get(NewOpc
))
752 .addReg(Reg
, getDefRegState(true) | getDeadRegState(RegDeadKill
))
753 .addReg(BaseReg
, getKillRegState(BaseKill
))
754 .addReg(OffReg
, getKillRegState(OffKill
))
756 .addImm(Pred
).addReg(PredReg
);
758 BuildMI(MBB
, MBBI
, MBBI
->getDebugLoc(), TII
->get(NewOpc
))
759 .addReg(Reg
, getKillRegState(RegDeadKill
))
760 .addReg(BaseReg
, getKillRegState(BaseKill
))
761 .addReg(OffReg
, getKillRegState(OffKill
))
763 .addImm(Pred
).addReg(PredReg
);
766 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock
&MBB
,
767 MachineBasicBlock::iterator
&MBBI
) {
768 MachineInstr
*MI
= &*MBBI
;
769 unsigned Opcode
= MI
->getOpcode();
770 if (Opcode
== ARM::LDRD
|| Opcode
== ARM::STRD
) {
771 unsigned EvenReg
= MI
->getOperand(0).getReg();
772 unsigned OddReg
= MI
->getOperand(1).getReg();
773 unsigned EvenRegNum
= TRI
->getDwarfRegNum(EvenReg
, false);
774 unsigned OddRegNum
= TRI
->getDwarfRegNum(OddReg
, false);
775 if ((EvenRegNum
& 1) == 0 && (EvenRegNum
+ 1) == OddRegNum
)
778 bool isLd
= Opcode
== ARM::LDRD
;
779 bool EvenDeadKill
= isLd
?
780 MI
->getOperand(0).isDead() : MI
->getOperand(0).isKill();
781 bool OddDeadKill
= isLd
?
782 MI
->getOperand(1).isDead() : MI
->getOperand(1).isKill();
783 const MachineOperand
&BaseOp
= MI
->getOperand(2);
784 unsigned BaseReg
= BaseOp
.getReg();
785 bool BaseKill
= BaseOp
.isKill();
786 const MachineOperand
&OffOp
= MI
->getOperand(3);
787 unsigned OffReg
= OffOp
.getReg();
788 bool OffKill
= OffOp
.isKill();
789 int OffImm
= getMemoryOpOffset(MI
);
790 unsigned PredReg
= 0;
791 ARMCC::CondCodes Pred
= llvm::getInstrPredicate(MI
, PredReg
);
793 if (OddRegNum
> EvenRegNum
&& OffReg
== 0 && OffImm
== 0) {
794 // Ascending register numbers and no offset. It's safe to change it to a
796 unsigned NewOpc
= (Opcode
== ARM::LDRD
) ? ARM::LDM
: ARM::STM
;
798 BuildMI(MBB
, MBBI
, MBBI
->getDebugLoc(), TII
->get(NewOpc
))
799 .addReg(BaseReg
, getKillRegState(BaseKill
))
800 .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia
))
801 .addImm(Pred
).addReg(PredReg
)
802 .addReg(EvenReg
, getDefRegState(isLd
) | getDeadRegState(EvenDeadKill
))
803 .addReg(OddReg
, getDefRegState(isLd
) | getDeadRegState(OddDeadKill
));
806 BuildMI(MBB
, MBBI
, MBBI
->getDebugLoc(), TII
->get(NewOpc
))
807 .addReg(BaseReg
, getKillRegState(BaseKill
))
808 .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia
))
809 .addImm(Pred
).addReg(PredReg
)
810 .addReg(EvenReg
, getKillRegState(EvenDeadKill
))
811 .addReg(OddReg
, getKillRegState(OddDeadKill
));
815 // Split into two instructions.
816 unsigned NewOpc
= (Opcode
== ARM::LDRD
) ? ARM::LDR
: ARM::STR
;
817 DebugLoc dl
= MBBI
->getDebugLoc();
818 // If this is a load and base register is killed, it may have been
819 // re-defed by the load, make sure the first load does not clobber it.
821 (BaseKill
|| OffKill
) &&
822 (TRI
->regsOverlap(EvenReg
, BaseReg
) ||
823 (OffReg
&& TRI
->regsOverlap(EvenReg
, OffReg
)))) {
824 assert(!TRI
->regsOverlap(OddReg
, BaseReg
) &&
825 (!OffReg
|| !TRI
->regsOverlap(OddReg
, OffReg
)));
826 InsertLDR_STR(MBB
, MBBI
, OffImm
+4, isLd
, dl
, NewOpc
, OddReg
, OddDeadKill
,
827 BaseReg
, false, OffReg
, false, Pred
, PredReg
, TII
);
828 InsertLDR_STR(MBB
, MBBI
, OffImm
, isLd
, dl
, NewOpc
, EvenReg
, EvenDeadKill
,
829 BaseReg
, BaseKill
, OffReg
, OffKill
, Pred
, PredReg
, TII
);
831 InsertLDR_STR(MBB
, MBBI
, OffImm
, isLd
, dl
, NewOpc
,
832 EvenReg
, EvenDeadKill
, BaseReg
, false, OffReg
, false,
834 InsertLDR_STR(MBB
, MBBI
, OffImm
+4, isLd
, dl
, NewOpc
,
835 OddReg
, OddDeadKill
, BaseReg
, BaseKill
, OffReg
, OffKill
,
850 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
851 /// ops of the same base and incrementing offset into LDM / STM ops.
852 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock
&MBB
) {
853 unsigned NumMerges
= 0;
854 unsigned NumMemOps
= 0;
856 unsigned CurrBase
= 0;
858 unsigned CurrSize
= 0;
859 ARMCC::CondCodes CurrPred
= ARMCC::AL
;
860 unsigned CurrPredReg
= 0;
861 unsigned Position
= 0;
862 SmallVector
<MachineBasicBlock::iterator
,4> Merges
;
864 RS
->enterBasicBlock(&MBB
);
865 MachineBasicBlock::iterator MBBI
= MBB
.begin(), E
= MBB
.end();
867 if (FixInvalidRegPairOp(MBB
, MBBI
))
870 bool Advance
= false;
871 bool TryMerge
= false;
872 bool Clobber
= false;
874 bool isMemOp
= isMemoryOp(MBBI
);
876 int Opcode
= MBBI
->getOpcode();
877 unsigned Size
= getLSMultipleTransferSize(MBBI
);
878 unsigned Base
= MBBI
->getOperand(1).getReg();
879 unsigned PredReg
= 0;
880 ARMCC::CondCodes Pred
= llvm::getInstrPredicate(MBBI
, PredReg
);
881 int Offset
= getMemoryOpOffset(MBBI
);
884 // r5 := ldr [r5, #4]
885 // r6 := ldr [r5, #8]
887 // The second ldr has effectively broken the chain even though it
888 // looks like the later ldr(s) use the same base register. Try to
889 // merge the ldr's so far, including this one. But don't try to
890 // combine the following ldr(s).
891 Clobber
= (isi32Load(Opcode
) && Base
== MBBI
->getOperand(0).getReg());
892 if (CurrBase
== 0 && !Clobber
) {
893 // Start of a new chain.
898 CurrPredReg
= PredReg
;
899 MemOps
.push_back(MemOpQueueEntry(Offset
, Position
, MBBI
));
908 if (CurrOpc
== Opcode
&& CurrBase
== Base
&& CurrPred
== Pred
) {
909 // No need to match PredReg.
910 // Continue adding to the queue.
911 if (Offset
> MemOps
.back().Offset
) {
912 MemOps
.push_back(MemOpQueueEntry(Offset
, Position
, MBBI
));
916 for (MemOpQueueIter I
= MemOps
.begin(), E
= MemOps
.end();
918 if (Offset
< I
->Offset
) {
919 MemOps
.insert(I
, MemOpQueueEntry(Offset
, Position
, MBBI
));
923 } else if (Offset
== I
->Offset
) {
924 // Collision! This can't be merged!
941 // Try to find a free register to use as a new base in case it's needed.
942 // First advance to the instruction just before the start of the chain.
943 AdvanceRS(MBB
, MemOps
);
944 // Find a scratch register.
945 unsigned Scratch
= RS
->FindUnusedReg(ARM::GPRRegisterClass
);
946 // Process the load / store instructions.
947 RS
->forward(prior(MBBI
));
951 MergeLDR_STR(MBB
, 0, CurrBase
, CurrOpc
, CurrSize
,
952 CurrPred
, CurrPredReg
, Scratch
, MemOps
, Merges
);
954 // Try folding preceeding/trailing base inc/dec into the generated
956 for (unsigned i
= 0, e
= Merges
.size(); i
< e
; ++i
)
957 if (MergeBaseUpdateLSMultiple(MBB
, Merges
[i
], Advance
, MBBI
))
959 NumMerges
+= Merges
.size();
961 // Try folding preceeding/trailing base inc/dec into those load/store
962 // that were not merged to form LDM/STM ops.
963 for (unsigned i
= 0; i
!= NumMemOps
; ++i
)
964 if (!MemOps
[i
].Merged
)
965 if (MergeBaseUpdateLoadStore(MBB
, MemOps
[i
].MBBI
, TII
,Advance
,MBBI
))
968 // RS may be pointing to an instruction that's deleted.
969 RS
->skipTo(prior(MBBI
));
970 } else if (NumMemOps
== 1) {
971 // Try folding preceeding/trailing base inc/dec into the single
973 if (MergeBaseUpdateLoadStore(MBB
, MemOps
[0].MBBI
, TII
, Advance
, MBBI
)) {
975 RS
->forward(prior(MBBI
));
982 CurrPred
= ARMCC::AL
;
989 // If iterator hasn't been advanced and this is not a memory op, skip it.
990 // It can't start a new chain anyway.
991 if (!Advance
&& !isMemOp
&& MBBI
!= E
) {
997 return NumMerges
> 0;
1001 struct OffsetCompare
{
1002 bool operator()(const MachineInstr
*LHS
, const MachineInstr
*RHS
) const {
1003 int LOffset
= getMemoryOpOffset(LHS
);
1004 int ROffset
= getMemoryOpOffset(RHS
);
1005 assert(LHS
== RHS
|| LOffset
!= ROffset
);
1006 return LOffset
> ROffset
;
1011 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return op
1012 /// (bx lr) into the preceeding stack restore so it directly restore the value
1014 /// ldmfd sp!, {r7, lr}
1017 /// ldmfd sp!, {r7, pc}
1018 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock
&MBB
) {
1019 if (MBB
.empty()) return false;
1021 MachineBasicBlock::iterator MBBI
= prior(MBB
.end());
1022 if (MBBI
!= MBB
.begin() &&
1023 (MBBI
->getOpcode() == ARM::BX_RET
|| MBBI
->getOpcode() == ARM::tBX_RET
)) {
1024 MachineInstr
*PrevMI
= prior(MBBI
);
1025 if (PrevMI
->getOpcode() == ARM::LDM
|| PrevMI
->getOpcode() == ARM::t2LDM
) {
1026 MachineOperand
&MO
= PrevMI
->getOperand(PrevMI
->getNumOperands()-1);
1027 if (MO
.getReg() != ARM::LR
)
1029 unsigned NewOpc
= isThumb2
? ARM::t2LDM_RET
: ARM::LDM_RET
;
1030 PrevMI
->setDesc(TII
->get(NewOpc
));
1039 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction
&Fn
) {
1040 const TargetMachine
&TM
= Fn
.getTarget();
1041 AFI
= Fn
.getInfo
<ARMFunctionInfo
>();
1042 TII
= TM
.getInstrInfo();
1043 TRI
= TM
.getRegisterInfo();
1044 RS
= new RegScavenger();
1045 isThumb2
= AFI
->isThumb2Function();
1047 bool Modified
= false;
1048 for (MachineFunction::iterator MFI
= Fn
.begin(), E
= Fn
.end(); MFI
!= E
;
1050 MachineBasicBlock
&MBB
= *MFI
;
1051 Modified
|= LoadStoreMultipleOpti(MBB
);
1052 Modified
|= MergeReturnIntoLDM(MBB
);
1060 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
1061 /// load / stores from consecutive locations close to make it more
1062 /// likely they will be combined later.
1065 struct VISIBILITY_HIDDEN ARMPreAllocLoadStoreOpt
: public MachineFunctionPass
{
1067 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(&ID
) {}
1069 const TargetData
*TD
;
1070 const TargetInstrInfo
*TII
;
1071 const TargetRegisterInfo
*TRI
;
1072 const ARMSubtarget
*STI
;
1073 MachineRegisterInfo
*MRI
;
1075 virtual bool runOnMachineFunction(MachineFunction
&Fn
);
1077 virtual const char *getPassName() const {
1078 return "ARM pre- register allocation load / store optimization pass";
1082 bool CanFormLdStDWord(MachineInstr
*Op0
, MachineInstr
*Op1
, DebugLoc
&dl
,
1083 unsigned &NewOpc
, unsigned &EvenReg
,
1084 unsigned &OddReg
, unsigned &BaseReg
,
1085 unsigned &OffReg
, unsigned &Offset
,
1086 unsigned &PredReg
, ARMCC::CondCodes
&Pred
);
1087 bool RescheduleOps(MachineBasicBlock
*MBB
,
1088 SmallVector
<MachineInstr
*, 4> &Ops
,
1089 unsigned Base
, bool isLd
,
1090 DenseMap
<MachineInstr
*, unsigned> &MI2LocMap
);
1091 bool RescheduleLoadStoreInstrs(MachineBasicBlock
*MBB
);
1093 char ARMPreAllocLoadStoreOpt::ID
= 0;
1096 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction
&Fn
) {
1097 TD
= Fn
.getTarget().getTargetData();
1098 TII
= Fn
.getTarget().getInstrInfo();
1099 TRI
= Fn
.getTarget().getRegisterInfo();
1100 STI
= &Fn
.getTarget().getSubtarget
<ARMSubtarget
>();
1101 MRI
= &Fn
.getRegInfo();
1103 bool Modified
= false;
1104 for (MachineFunction::iterator MFI
= Fn
.begin(), E
= Fn
.end(); MFI
!= E
;
1106 Modified
|= RescheduleLoadStoreInstrs(MFI
);
1111 static bool IsSafeAndProfitableToMove(bool isLd
, unsigned Base
,
1112 MachineBasicBlock::iterator I
,
1113 MachineBasicBlock::iterator E
,
1114 SmallPtrSet
<MachineInstr
*, 4> &MemOps
,
1115 SmallSet
<unsigned, 4> &MemRegs
,
1116 const TargetRegisterInfo
*TRI
) {
1117 // Are there stores / loads / calls between them?
1118 // FIXME: This is overly conservative. We should make use of alias information
1120 SmallSet
<unsigned, 4> AddedRegPressure
;
1122 if (MemOps
.count(&*I
))
1124 const TargetInstrDesc
&TID
= I
->getDesc();
1125 if (TID
.isCall() || TID
.isTerminator() || TID
.hasUnmodeledSideEffects())
1127 if (isLd
&& TID
.mayStore())
1132 // It's not safe to move the first 'str' down.
1135 // str r4, [r0, #+4]
1139 for (unsigned j
= 0, NumOps
= I
->getNumOperands(); j
!= NumOps
; ++j
) {
1140 MachineOperand
&MO
= I
->getOperand(j
);
1143 unsigned Reg
= MO
.getReg();
1144 if (MO
.isDef() && TRI
->regsOverlap(Reg
, Base
))
1146 if (Reg
!= Base
&& !MemRegs
.count(Reg
))
1147 AddedRegPressure
.insert(Reg
);
1151 // Estimate register pressure increase due to the transformation.
1152 if (MemRegs
.size() <= 4)
1153 // Ok if we are moving small number of instructions.
1155 return AddedRegPressure
.size() <= MemRegs
.size() * 2;
1159 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr
*Op0
, MachineInstr
*Op1
,
1161 unsigned &NewOpc
, unsigned &EvenReg
,
1162 unsigned &OddReg
, unsigned &BaseReg
,
1163 unsigned &OffReg
, unsigned &Offset
,
1165 ARMCC::CondCodes
&Pred
) {
1166 // FIXME: FLDS / FSTS -> FLDD / FSTD
1167 unsigned Opcode
= Op0
->getOpcode();
1168 if (Opcode
== ARM::LDR
)
1170 else if (Opcode
== ARM::STR
)
1175 // Must sure the base address satisfies i64 ld / st alignment requirement.
1176 if (!Op0
->hasOneMemOperand() ||
1177 !Op0
->memoperands_begin()->getValue() ||
1178 Op0
->memoperands_begin()->isVolatile())
1181 unsigned Align
= Op0
->memoperands_begin()->getAlignment();
1182 unsigned ReqAlign
= STI
->hasV6Ops()
1183 ? TD
->getPrefTypeAlignment(
1184 Type::getInt64Ty(Op0
->getParent()->getParent()->getFunction()->getContext()))
1185 : 8; // Pre-v6 need 8-byte align
1186 if (Align
< ReqAlign
)
1189 // Then make sure the immediate offset fits.
1190 int OffImm
= getMemoryOpOffset(Op0
);
1191 ARM_AM::AddrOpc AddSub
= ARM_AM::add
;
1193 AddSub
= ARM_AM::sub
;
1196 if (OffImm
>= 256) // 8 bits
1198 Offset
= ARM_AM::getAM3Opc(AddSub
, OffImm
);
1200 EvenReg
= Op0
->getOperand(0).getReg();
1201 OddReg
= Op1
->getOperand(0).getReg();
1202 if (EvenReg
== OddReg
)
1204 BaseReg
= Op0
->getOperand(1).getReg();
1205 OffReg
= Op0
->getOperand(2).getReg();
1206 Pred
= llvm::getInstrPredicate(Op0
, PredReg
);
1207 dl
= Op0
->getDebugLoc();
1211 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock
*MBB
,
1212 SmallVector
<MachineInstr
*, 4> &Ops
,
1213 unsigned Base
, bool isLd
,
1214 DenseMap
<MachineInstr
*, unsigned> &MI2LocMap
) {
1215 bool RetVal
= false;
1217 // Sort by offset (in reverse order).
1218 std::sort(Ops
.begin(), Ops
.end(), OffsetCompare());
1220 // The loads / stores of the same base are in order. Scan them from first to
1221 // last and check for the followins:
1222 // 1. Any def of base.
1224 while (Ops
.size() > 1) {
1225 unsigned FirstLoc
= ~0U;
1226 unsigned LastLoc
= 0;
1227 MachineInstr
*FirstOp
= 0;
1228 MachineInstr
*LastOp
= 0;
1230 unsigned LastOpcode
= 0;
1231 unsigned LastBytes
= 0;
1232 unsigned NumMove
= 0;
1233 for (int i
= Ops
.size() - 1; i
>= 0; --i
) {
1234 MachineInstr
*Op
= Ops
[i
];
1235 unsigned Loc
= MI2LocMap
[Op
];
1236 if (Loc
<= FirstLoc
) {
1240 if (Loc
>= LastLoc
) {
1245 unsigned Opcode
= Op
->getOpcode();
1246 if (LastOpcode
&& Opcode
!= LastOpcode
)
1249 int Offset
= getMemoryOpOffset(Op
);
1250 unsigned Bytes
= getLSMultipleTransferSize(Op
);
1252 if (Bytes
!= LastBytes
|| Offset
!= (LastOffset
+ (int)Bytes
))
1255 LastOffset
= Offset
;
1257 LastOpcode
= Opcode
;
1258 if (++NumMove
== 8) // FIXME: Tune
1265 SmallPtrSet
<MachineInstr
*, 4> MemOps
;
1266 SmallSet
<unsigned, 4> MemRegs
;
1267 for (int i
= NumMove
-1; i
>= 0; --i
) {
1268 MemOps
.insert(Ops
[i
]);
1269 MemRegs
.insert(Ops
[i
]->getOperand(0).getReg());
1272 // Be conservative, if the instructions are too far apart, don't
1273 // move them. We want to limit the increase of register pressure.
1274 bool DoMove
= (LastLoc
- FirstLoc
) <= NumMove
*4; // FIXME: Tune this.
1276 DoMove
= IsSafeAndProfitableToMove(isLd
, Base
, FirstOp
, LastOp
,
1277 MemOps
, MemRegs
, TRI
);
1279 for (unsigned i
= 0; i
!= NumMove
; ++i
)
1282 // This is the new location for the loads / stores.
1283 MachineBasicBlock::iterator InsertPos
= isLd
? FirstOp
: LastOp
;
1284 while (InsertPos
!= MBB
->end() && MemOps
.count(InsertPos
))
1287 // If we are moving a pair of loads / stores, see if it makes sense
1288 // to try to allocate a pair of registers that can form register pairs.
1289 MachineInstr
*Op0
= Ops
.back();
1290 MachineInstr
*Op1
= Ops
[Ops
.size()-2];
1291 unsigned EvenReg
= 0, OddReg
= 0;
1292 unsigned BaseReg
= 0, OffReg
= 0, PredReg
= 0;
1293 ARMCC::CondCodes Pred
= ARMCC::AL
;
1294 unsigned NewOpc
= 0;
1295 unsigned Offset
= 0;
1297 if (NumMove
== 2 && CanFormLdStDWord(Op0
, Op1
, dl
, NewOpc
,
1298 EvenReg
, OddReg
, BaseReg
, OffReg
,
1299 Offset
, PredReg
, Pred
)) {
1303 // Form the pair instruction.
1305 BuildMI(*MBB
, InsertPos
, dl
, TII
->get(NewOpc
))
1306 .addReg(EvenReg
, RegState::Define
)
1307 .addReg(OddReg
, RegState::Define
)
1308 .addReg(BaseReg
).addReg(0).addImm(Offset
)
1309 .addImm(Pred
).addReg(PredReg
);
1312 BuildMI(*MBB
, InsertPos
, dl
, TII
->get(NewOpc
))
1315 .addReg(BaseReg
).addReg(0).addImm(Offset
)
1316 .addImm(Pred
).addReg(PredReg
);
1322 // Add register allocation hints to form register pairs.
1323 MRI
->setRegAllocationHint(EvenReg
, ARMRI::RegPairEven
, OddReg
);
1324 MRI
->setRegAllocationHint(OddReg
, ARMRI::RegPairOdd
, EvenReg
);
1326 for (unsigned i
= 0; i
!= NumMove
; ++i
) {
1327 MachineInstr
*Op
= Ops
.back();
1329 MBB
->splice(InsertPos
, MBB
, Op
);
1333 NumLdStMoved
+= NumMove
;
1343 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock
*MBB
) {
1344 bool RetVal
= false;
1346 DenseMap
<MachineInstr
*, unsigned> MI2LocMap
;
1347 DenseMap
<unsigned, SmallVector
<MachineInstr
*, 4> > Base2LdsMap
;
1348 DenseMap
<unsigned, SmallVector
<MachineInstr
*, 4> > Base2StsMap
;
1349 SmallVector
<unsigned, 4> LdBases
;
1350 SmallVector
<unsigned, 4> StBases
;
1353 MachineBasicBlock::iterator MBBI
= MBB
->begin();
1354 MachineBasicBlock::iterator E
= MBB
->end();
1356 for (; MBBI
!= E
; ++MBBI
) {
1357 MachineInstr
*MI
= MBBI
;
1358 const TargetInstrDesc
&TID
= MI
->getDesc();
1359 if (TID
.isCall() || TID
.isTerminator()) {
1360 // Stop at barriers.
1365 MI2LocMap
[MI
] = Loc
++;
1366 if (!isMemoryOp(MI
))
1368 unsigned PredReg
= 0;
1369 if (llvm::getInstrPredicate(MI
, PredReg
) != ARMCC::AL
)
1372 int Opcode
= MI
->getOpcode();
1373 bool isLd
= Opcode
== ARM::LDR
||
1374 Opcode
== ARM::FLDS
|| Opcode
== ARM::FLDD
;
1375 unsigned Base
= MI
->getOperand(1).getReg();
1376 int Offset
= getMemoryOpOffset(MI
);
1378 bool StopHere
= false;
1380 DenseMap
<unsigned, SmallVector
<MachineInstr
*, 4> >::iterator BI
=
1381 Base2LdsMap
.find(Base
);
1382 if (BI
!= Base2LdsMap
.end()) {
1383 for (unsigned i
= 0, e
= BI
->second
.size(); i
!= e
; ++i
) {
1384 if (Offset
== getMemoryOpOffset(BI
->second
[i
])) {
1390 BI
->second
.push_back(MI
);
1392 SmallVector
<MachineInstr
*, 4> MIs
;
1394 Base2LdsMap
[Base
] = MIs
;
1395 LdBases
.push_back(Base
);
1398 DenseMap
<unsigned, SmallVector
<MachineInstr
*, 4> >::iterator BI
=
1399 Base2StsMap
.find(Base
);
1400 if (BI
!= Base2StsMap
.end()) {
1401 for (unsigned i
= 0, e
= BI
->second
.size(); i
!= e
; ++i
) {
1402 if (Offset
== getMemoryOpOffset(BI
->second
[i
])) {
1408 BI
->second
.push_back(MI
);
1410 SmallVector
<MachineInstr
*, 4> MIs
;
1412 Base2StsMap
[Base
] = MIs
;
1413 StBases
.push_back(Base
);
1418 // Found a duplicate (a base+offset combination that's seen earlier).
1425 // Re-schedule loads.
1426 for (unsigned i
= 0, e
= LdBases
.size(); i
!= e
; ++i
) {
1427 unsigned Base
= LdBases
[i
];
1428 SmallVector
<MachineInstr
*, 4> &Lds
= Base2LdsMap
[Base
];
1430 RetVal
|= RescheduleOps(MBB
, Lds
, Base
, true, MI2LocMap
);
1433 // Re-schedule stores.
1434 for (unsigned i
= 0, e
= StBases
.size(); i
!= e
; ++i
) {
1435 unsigned Base
= StBases
[i
];
1436 SmallVector
<MachineInstr
*, 4> &Sts
= Base2StsMap
[Base
];
1438 RetVal
|= RescheduleOps(MBB
, Sts
, Base
, false, MI2LocMap
);
1442 Base2LdsMap
.clear();
1443 Base2StsMap
.clear();
1453 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store
1454 /// optimization pass.
1455 FunctionPass
*llvm::createARMLoadStoreOptimizationPass(bool PreAlloc
) {
1457 return new ARMPreAllocLoadStoreOpt();
1458 return new ARMLoadStoreOpt();