1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains a pass that performs load / store related peephole
11 // optimizations. This pass should be run after register allocation.
13 //===----------------------------------------------------------------------===//
15 #define DEBUG_TYPE "arm-ldst-opt"
17 #include "ARMAddressingModes.h"
18 #include "ARMBaseInstrInfo.h"
19 #include "ARMMachineFunctionInfo.h"
20 #include "ARMRegisterInfo.h"
21 #include "llvm/DerivedTypes.h"
22 #include "llvm/Function.h"
23 #include "llvm/CodeGen/MachineBasicBlock.h"
24 #include "llvm/CodeGen/MachineFunctionPass.h"
25 #include "llvm/CodeGen/MachineInstr.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineRegisterInfo.h"
28 #include "llvm/CodeGen/RegisterScavenging.h"
29 #include "llvm/Target/TargetData.h"
30 #include "llvm/Target/TargetInstrInfo.h"
31 #include "llvm/Target/TargetMachine.h"
32 #include "llvm/Target/TargetRegisterInfo.h"
33 #include "llvm/Support/ErrorHandling.h"
34 #include "llvm/ADT/DenseMap.h"
35 #include "llvm/ADT/STLExtras.h"
36 #include "llvm/ADT/SmallPtrSet.h"
37 #include "llvm/ADT/SmallSet.h"
38 #include "llvm/ADT/SmallVector.h"
39 #include "llvm/ADT/Statistic.h"
42 STATISTIC(NumLDMGened
, "Number of ldm instructions generated");
43 STATISTIC(NumSTMGened
, "Number of stm instructions generated");
44 STATISTIC(NumVLDMGened
, "Number of vldm instructions generated");
45 STATISTIC(NumVSTMGened
, "Number of vstm instructions generated");
46 STATISTIC(NumLdStMoved
, "Number of load / store instructions moved");
47 STATISTIC(NumLDRDFormed
,"Number of ldrd created before allocation");
48 STATISTIC(NumSTRDFormed
,"Number of strd created before allocation");
49 STATISTIC(NumLDRD2LDM
, "Number of ldrd instructions turned back into ldm");
50 STATISTIC(NumSTRD2STM
, "Number of strd instructions turned back into stm");
51 STATISTIC(NumLDRD2LDR
, "Number of ldrd instructions turned back into ldr's");
52 STATISTIC(NumSTRD2STR
, "Number of strd instructions turned back into str's");
54 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
55 /// load / store instructions to form ldm / stm instructions.
58 struct ARMLoadStoreOpt
: public MachineFunctionPass
{
60 ARMLoadStoreOpt() : MachineFunctionPass(ID
) {}
62 const TargetInstrInfo
*TII
;
63 const TargetRegisterInfo
*TRI
;
68 virtual bool runOnMachineFunction(MachineFunction
&Fn
);
70 virtual const char *getPassName() const {
71 return "ARM load / store optimization pass";
75 struct MemOpQueueEntry
{
80 MachineBasicBlock::iterator MBBI
;
82 MemOpQueueEntry(int o
, unsigned r
, bool k
, unsigned p
,
83 MachineBasicBlock::iterator i
)
84 : Offset(o
), Reg(r
), isKill(k
), Position(p
), MBBI(i
), Merged(false) {}
86 typedef SmallVector
<MemOpQueueEntry
,8> MemOpQueue
;
87 typedef MemOpQueue::iterator MemOpQueueIter
;
89 bool MergeOps(MachineBasicBlock
&MBB
, MachineBasicBlock::iterator MBBI
,
90 int Offset
, unsigned Base
, bool BaseKill
, int Opcode
,
91 ARMCC::CondCodes Pred
, unsigned PredReg
, unsigned Scratch
,
92 DebugLoc dl
, SmallVector
<std::pair
<unsigned, bool>, 8> &Regs
);
93 void MergeOpsUpdate(MachineBasicBlock
&MBB
,
102 ARMCC::CondCodes Pred
,
106 SmallVector
<MachineBasicBlock::iterator
, 4> &Merges
);
107 void MergeLDR_STR(MachineBasicBlock
&MBB
, unsigned SIndex
, unsigned Base
,
108 int Opcode
, unsigned Size
,
109 ARMCC::CondCodes Pred
, unsigned PredReg
,
110 unsigned Scratch
, MemOpQueue
&MemOps
,
111 SmallVector
<MachineBasicBlock::iterator
, 4> &Merges
);
113 void AdvanceRS(MachineBasicBlock
&MBB
, MemOpQueue
&MemOps
);
114 bool FixInvalidRegPairOp(MachineBasicBlock
&MBB
,
115 MachineBasicBlock::iterator
&MBBI
);
116 bool MergeBaseUpdateLoadStore(MachineBasicBlock
&MBB
,
117 MachineBasicBlock::iterator MBBI
,
118 const TargetInstrInfo
*TII
,
120 MachineBasicBlock::iterator
&I
);
121 bool MergeBaseUpdateLSMultiple(MachineBasicBlock
&MBB
,
122 MachineBasicBlock::iterator MBBI
,
124 MachineBasicBlock::iterator
&I
);
125 bool LoadStoreMultipleOpti(MachineBasicBlock
&MBB
);
126 bool MergeReturnIntoLDM(MachineBasicBlock
&MBB
);
128 char ARMLoadStoreOpt::ID
= 0;
131 static int getLoadStoreMultipleOpcode(int Opcode
, ARM_AM::AMSubMode Mode
) {
133 default: llvm_unreachable("Unhandled opcode!");
137 default: llvm_unreachable("Unhandled submode!");
138 case ARM_AM::ia
: return ARM::LDMIA
;
139 case ARM_AM::da
: return ARM::LDMDA
;
140 case ARM_AM::db
: return ARM::LDMDB
;
141 case ARM_AM::ib
: return ARM::LDMIB
;
147 default: llvm_unreachable("Unhandled submode!");
148 case ARM_AM::ia
: return ARM::STMIA
;
149 case ARM_AM::da
: return ARM::STMDA
;
150 case ARM_AM::db
: return ARM::STMDB
;
151 case ARM_AM::ib
: return ARM::STMIB
;
158 default: llvm_unreachable("Unhandled submode!");
159 case ARM_AM::ia
: return ARM::t2LDMIA
;
160 case ARM_AM::db
: return ARM::t2LDMDB
;
167 default: llvm_unreachable("Unhandled submode!");
168 case ARM_AM::ia
: return ARM::t2STMIA
;
169 case ARM_AM::db
: return ARM::t2STMDB
;
175 default: llvm_unreachable("Unhandled submode!");
176 case ARM_AM::ia
: return ARM::VLDMSIA
;
177 case ARM_AM::db
: return 0; // Only VLDMSDB_UPD exists.
183 default: llvm_unreachable("Unhandled submode!");
184 case ARM_AM::ia
: return ARM::VSTMSIA
;
185 case ARM_AM::db
: return 0; // Only VSTMSDB_UPD exists.
191 default: llvm_unreachable("Unhandled submode!");
192 case ARM_AM::ia
: return ARM::VLDMDIA
;
193 case ARM_AM::db
: return 0; // Only VLDMDDB_UPD exists.
199 default: llvm_unreachable("Unhandled submode!");
200 case ARM_AM::ia
: return ARM::VSTMDIA
;
201 case ARM_AM::db
: return 0; // Only VSTMDDB_UPD exists.
212 AMSubMode
getLoadStoreMultipleSubMode(int Opcode
) {
214 default: llvm_unreachable("Unhandled opcode!");
220 case ARM::t2LDMIA_RET
:
222 case ARM::t2LDMIA_UPD
:
224 case ARM::t2STMIA_UPD
:
226 case ARM::VLDMSIA_UPD
:
228 case ARM::VSTMSIA_UPD
:
230 case ARM::VLDMDIA_UPD
:
232 case ARM::VSTMDIA_UPD
:
246 case ARM::t2LDMDB_UPD
:
248 case ARM::t2STMDB_UPD
:
249 case ARM::VLDMSDB_UPD
:
250 case ARM::VSTMSDB_UPD
:
251 case ARM::VLDMDDB_UPD
:
252 case ARM::VSTMDDB_UPD
:
262 return ARM_AM::bad_am_submode
;
265 } // end namespace ARM_AM
266 } // end namespace llvm
268 static bool isT2i32Load(unsigned Opc
) {
269 return Opc
== ARM::t2LDRi12
|| Opc
== ARM::t2LDRi8
;
272 static bool isi32Load(unsigned Opc
) {
273 return Opc
== ARM::LDRi12
|| isT2i32Load(Opc
);
276 static bool isT2i32Store(unsigned Opc
) {
277 return Opc
== ARM::t2STRi12
|| Opc
== ARM::t2STRi8
;
280 static bool isi32Store(unsigned Opc
) {
281 return Opc
== ARM::STRi12
|| isT2i32Store(Opc
);
284 /// MergeOps - Create and insert a LDM or STM with Base as base register and
285 /// registers in Regs as the register operands that would be loaded / stored.
286 /// It returns true if the transformation is done.
288 ARMLoadStoreOpt::MergeOps(MachineBasicBlock
&MBB
,
289 MachineBasicBlock::iterator MBBI
,
290 int Offset
, unsigned Base
, bool BaseKill
,
291 int Opcode
, ARMCC::CondCodes Pred
,
292 unsigned PredReg
, unsigned Scratch
, DebugLoc dl
,
293 SmallVector
<std::pair
<unsigned, bool>, 8> &Regs
) {
294 // Only a single register to load / store. Don't bother.
295 unsigned NumRegs
= Regs
.size();
299 ARM_AM::AMSubMode Mode
= ARM_AM::ia
;
300 // VFP and Thumb2 do not support IB or DA modes.
301 bool isNotVFP
= isi32Load(Opcode
) || isi32Store(Opcode
);
302 bool haveIBAndDA
= isNotVFP
&& !isThumb2
;
303 if (Offset
== 4 && haveIBAndDA
)
305 else if (Offset
== -4 * (int)NumRegs
+ 4 && haveIBAndDA
)
307 else if (Offset
== -4 * (int)NumRegs
&& isNotVFP
)
308 // VLDM/VSTM do not support DB mode without also updating the base reg.
310 else if (Offset
!= 0) {
311 // Check if this is a supported opcode before we insert instructions to
312 // calculate a new base register.
313 if (!getLoadStoreMultipleOpcode(Opcode
, Mode
)) return false;
315 // If starting offset isn't zero, insert a MI to materialize a new base.
316 // But only do so if it is cost effective, i.e. merging more than two
322 if (isi32Load(Opcode
))
323 // If it is a load, then just use one of the destination register to
324 // use as the new base.
325 NewBase
= Regs
[NumRegs
-1].first
;
327 // Use the scratch register to use as a new base.
332 int BaseOpc
= !isThumb2
? ARM::ADDri
: ARM::t2ADDri
;
334 BaseOpc
= !isThumb2
? ARM::SUBri
: ARM::t2SUBri
;
337 int ImmedOffset
= isThumb2
338 ? ARM_AM::getT2SOImmVal(Offset
) : ARM_AM::getSOImmVal(Offset
);
339 if (ImmedOffset
== -1)
340 // FIXME: Try t2ADDri12 or t2SUBri12?
341 return false; // Probably not worth it then.
343 BuildMI(MBB
, MBBI
, dl
, TII
->get(BaseOpc
), NewBase
)
344 .addReg(Base
, getKillRegState(BaseKill
)).addImm(Offset
)
345 .addImm(Pred
).addReg(PredReg
).addReg(0);
347 BaseKill
= true; // New base is always killed right its use.
350 bool isDef
= (isi32Load(Opcode
) || Opcode
== ARM::VLDRS
||
351 Opcode
== ARM::VLDRD
);
352 Opcode
= getLoadStoreMultipleOpcode(Opcode
, Mode
);
353 if (!Opcode
) return false;
354 MachineInstrBuilder MIB
= BuildMI(MBB
, MBBI
, dl
, TII
->get(Opcode
))
355 .addReg(Base
, getKillRegState(BaseKill
))
356 .addImm(Pred
).addReg(PredReg
);
357 for (unsigned i
= 0; i
!= NumRegs
; ++i
)
358 MIB
= MIB
.addReg(Regs
[i
].first
, getDefRegState(isDef
)
359 | getKillRegState(Regs
[i
].second
));
364 // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
366 void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock
&MBB
,
368 unsigned memOpsBegin
, unsigned memOpsEnd
,
369 unsigned insertAfter
, int Offset
,
370 unsigned Base
, bool BaseKill
,
372 ARMCC::CondCodes Pred
, unsigned PredReg
,
375 SmallVector
<MachineBasicBlock::iterator
, 4> &Merges
) {
376 // First calculate which of the registers should be killed by the merged
378 const unsigned insertPos
= memOps
[insertAfter
].Position
;
379 SmallSet
<unsigned, 4> KilledRegs
;
380 DenseMap
<unsigned, unsigned> Killer
;
381 for (unsigned i
= 0, e
= memOps
.size(); i
!= e
; ++i
) {
382 if (i
== memOpsBegin
) {
387 if (memOps
[i
].Position
< insertPos
&& memOps
[i
].isKill
) {
388 unsigned Reg
= memOps
[i
].Reg
;
389 KilledRegs
.insert(Reg
);
394 SmallVector
<std::pair
<unsigned, bool>, 8> Regs
;
395 for (unsigned i
= memOpsBegin
; i
< memOpsEnd
; ++i
) {
396 unsigned Reg
= memOps
[i
].Reg
;
397 // If we are inserting the merged operation after an operation that
398 // uses the same register, make sure to transfer any kill flag.
399 bool isKill
= memOps
[i
].isKill
|| KilledRegs
.count(Reg
);
400 Regs
.push_back(std::make_pair(Reg
, isKill
));
403 // Try to do the merge.
404 MachineBasicBlock::iterator Loc
= memOps
[insertAfter
].MBBI
;
406 if (!MergeOps(MBB
, Loc
, Offset
, Base
, BaseKill
, Opcode
,
407 Pred
, PredReg
, Scratch
, dl
, Regs
))
410 // Merge succeeded, update records.
411 Merges
.push_back(prior(Loc
));
412 for (unsigned i
= memOpsBegin
; i
< memOpsEnd
; ++i
) {
413 // Remove kill flags from any memops that come before insertPos.
414 if (Regs
[i
-memOpsBegin
].second
) {
415 unsigned Reg
= Regs
[i
-memOpsBegin
].first
;
416 if (KilledRegs
.count(Reg
)) {
417 unsigned j
= Killer
[Reg
];
418 int Idx
= memOps
[j
].MBBI
->findRegisterUseOperandIdx(Reg
, true);
419 assert(Idx
>= 0 && "Cannot find killing operand");
420 memOps
[j
].MBBI
->getOperand(Idx
).setIsKill(false);
421 memOps
[j
].isKill
= false;
423 memOps
[i
].isKill
= true;
425 MBB
.erase(memOps
[i
].MBBI
);
426 // Update this memop to refer to the merged instruction.
427 // We may need to move kill flags again.
428 memOps
[i
].Merged
= true;
429 memOps
[i
].MBBI
= Merges
.back();
430 memOps
[i
].Position
= insertPos
;
434 /// MergeLDR_STR - Merge a number of load / store instructions into one or more
435 /// load / store multiple instructions.
437 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock
&MBB
, unsigned SIndex
,
438 unsigned Base
, int Opcode
, unsigned Size
,
439 ARMCC::CondCodes Pred
, unsigned PredReg
,
440 unsigned Scratch
, MemOpQueue
&MemOps
,
441 SmallVector
<MachineBasicBlock::iterator
, 4> &Merges
) {
442 bool isNotVFP
= isi32Load(Opcode
) || isi32Store(Opcode
);
443 int Offset
= MemOps
[SIndex
].Offset
;
444 int SOffset
= Offset
;
445 unsigned insertAfter
= SIndex
;
446 MachineBasicBlock::iterator Loc
= MemOps
[SIndex
].MBBI
;
447 DebugLoc dl
= Loc
->getDebugLoc();
448 const MachineOperand
&PMO
= Loc
->getOperand(0);
449 unsigned PReg
= PMO
.getReg();
450 unsigned PRegNum
= PMO
.isUndef() ? UINT_MAX
451 : getARMRegisterNumbering(PReg
);
453 unsigned Limit
= ~0U;
455 // vldm / vstm limit are 32 for S variants, 16 for D variants.
473 for (unsigned i
= SIndex
+1, e
= MemOps
.size(); i
!= e
; ++i
) {
474 int NewOffset
= MemOps
[i
].Offset
;
475 const MachineOperand
&MO
= MemOps
[i
].MBBI
->getOperand(0);
476 unsigned Reg
= MO
.getReg();
477 unsigned RegNum
= MO
.isUndef() ? UINT_MAX
478 : getARMRegisterNumbering(Reg
);
479 // Register numbers must be in ascending order. For VFP / NEON load and
480 // store multiples, the registers must also be consecutive and within the
481 // limit on the number of registers per instruction.
482 if (Reg
!= ARM::SP
&&
483 NewOffset
== Offset
+ (int)Size
&&
484 ((isNotVFP
&& RegNum
> PRegNum
) ||
485 ((Count
< Limit
) && RegNum
== PRegNum
+1))) {
490 // Can't merge this in. Try merge the earlier ones first.
491 MergeOpsUpdate(MBB
, MemOps
, SIndex
, i
, insertAfter
, SOffset
,
492 Base
, false, Opcode
, Pred
, PredReg
, Scratch
, dl
, Merges
);
493 MergeLDR_STR(MBB
, i
, Base
, Opcode
, Size
, Pred
, PredReg
, Scratch
,
498 if (MemOps
[i
].Position
> MemOps
[insertAfter
].Position
)
502 bool BaseKill
= Loc
->findRegisterUseOperandIdx(Base
, true) != -1;
503 MergeOpsUpdate(MBB
, MemOps
, SIndex
, MemOps
.size(), insertAfter
, SOffset
,
504 Base
, BaseKill
, Opcode
, Pred
, PredReg
, Scratch
, dl
, Merges
);
508 static inline bool isMatchingDecrement(MachineInstr
*MI
, unsigned Base
,
509 unsigned Bytes
, unsigned Limit
,
510 ARMCC::CondCodes Pred
, unsigned PredReg
){
511 unsigned MyPredReg
= 0;
514 if (MI
->getOpcode() != ARM::t2SUBri
&&
515 MI
->getOpcode() != ARM::tSUBspi
&&
516 MI
->getOpcode() != ARM::SUBri
)
519 // Make sure the offset fits in 8 bits.
520 if (Bytes
== 0 || (Limit
&& Bytes
>= Limit
))
523 unsigned Scale
= (MI
->getOpcode() == ARM::tSUBspi
) ? 4 : 1; // FIXME
524 return (MI
->getOperand(0).getReg() == Base
&&
525 MI
->getOperand(1).getReg() == Base
&&
526 (MI
->getOperand(2).getImm()*Scale
) == Bytes
&&
527 llvm::getInstrPredicate(MI
, MyPredReg
) == Pred
&&
528 MyPredReg
== PredReg
);
531 static inline bool isMatchingIncrement(MachineInstr
*MI
, unsigned Base
,
532 unsigned Bytes
, unsigned Limit
,
533 ARMCC::CondCodes Pred
, unsigned PredReg
){
534 unsigned MyPredReg
= 0;
537 if (MI
->getOpcode() != ARM::t2ADDri
&&
538 MI
->getOpcode() != ARM::tADDspi
&&
539 MI
->getOpcode() != ARM::ADDri
)
542 if (Bytes
== 0 || (Limit
&& Bytes
>= Limit
))
543 // Make sure the offset fits in 8 bits.
546 unsigned Scale
= (MI
->getOpcode() == ARM::tADDspi
) ? 4 : 1; // FIXME
547 return (MI
->getOperand(0).getReg() == Base
&&
548 MI
->getOperand(1).getReg() == Base
&&
549 (MI
->getOperand(2).getImm()*Scale
) == Bytes
&&
550 llvm::getInstrPredicate(MI
, MyPredReg
) == Pred
&&
551 MyPredReg
== PredReg
);
554 static inline unsigned getLSMultipleTransferSize(MachineInstr
*MI
) {
555 switch (MI
->getOpcode()) {
583 return (MI
->getNumOperands() - MI
->getDesc().getNumOperands() + 1) * 4;
586 return (MI
->getNumOperands() - MI
->getDesc().getNumOperands() + 1) * 8;
590 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc
,
591 ARM_AM::AMSubMode Mode
) {
593 default: llvm_unreachable("Unhandled opcode!");
599 default: llvm_unreachable("Unhandled submode!");
600 case ARM_AM::ia
: return ARM::LDMIA_UPD
;
601 case ARM_AM::ib
: return ARM::LDMIB_UPD
;
602 case ARM_AM::da
: return ARM::LDMDA_UPD
;
603 case ARM_AM::db
: return ARM::LDMDB_UPD
;
611 default: llvm_unreachable("Unhandled submode!");
612 case ARM_AM::ia
: return ARM::STMIA_UPD
;
613 case ARM_AM::ib
: return ARM::STMIB_UPD
;
614 case ARM_AM::da
: return ARM::STMDA_UPD
;
615 case ARM_AM::db
: return ARM::STMDB_UPD
;
621 default: llvm_unreachable("Unhandled submode!");
622 case ARM_AM::ia
: return ARM::t2LDMIA_UPD
;
623 case ARM_AM::db
: return ARM::t2LDMDB_UPD
;
629 default: llvm_unreachable("Unhandled submode!");
630 case ARM_AM::ia
: return ARM::t2STMIA_UPD
;
631 case ARM_AM::db
: return ARM::t2STMDB_UPD
;
636 default: llvm_unreachable("Unhandled submode!");
637 case ARM_AM::ia
: return ARM::VLDMSIA_UPD
;
638 case ARM_AM::db
: return ARM::VLDMSDB_UPD
;
643 default: llvm_unreachable("Unhandled submode!");
644 case ARM_AM::ia
: return ARM::VLDMDIA_UPD
;
645 case ARM_AM::db
: return ARM::VLDMDDB_UPD
;
650 default: llvm_unreachable("Unhandled submode!");
651 case ARM_AM::ia
: return ARM::VSTMSIA_UPD
;
652 case ARM_AM::db
: return ARM::VSTMSDB_UPD
;
657 default: llvm_unreachable("Unhandled submode!");
658 case ARM_AM::ia
: return ARM::VSTMDIA_UPD
;
659 case ARM_AM::db
: return ARM::VSTMDDB_UPD
;
667 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
668 /// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
670 /// stmia rn, <ra, rb, rc>
671 /// rn := rn + 4 * 3;
673 /// stmia rn!, <ra, rb, rc>
675 /// rn := rn - 4 * 3;
676 /// ldmia rn, <ra, rb, rc>
678 /// ldmdb rn!, <ra, rb, rc>
679 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock
&MBB
,
680 MachineBasicBlock::iterator MBBI
,
682 MachineBasicBlock::iterator
&I
) {
683 MachineInstr
*MI
= MBBI
;
684 unsigned Base
= MI
->getOperand(0).getReg();
685 bool BaseKill
= MI
->getOperand(0).isKill();
686 unsigned Bytes
= getLSMultipleTransferSize(MI
);
687 unsigned PredReg
= 0;
688 ARMCC::CondCodes Pred
= llvm::getInstrPredicate(MI
, PredReg
);
689 int Opcode
= MI
->getOpcode();
690 DebugLoc dl
= MI
->getDebugLoc();
692 // Can't use an updating ld/st if the base register is also a dest
693 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
694 for (unsigned i
= 2, e
= MI
->getNumOperands(); i
!= e
; ++i
)
695 if (MI
->getOperand(i
).getReg() == Base
)
698 bool DoMerge
= false;
699 ARM_AM::AMSubMode Mode
= ARM_AM::getLoadStoreMultipleSubMode(Opcode
);
701 // Try merging with the previous instruction.
702 MachineBasicBlock::iterator BeginMBBI
= MBB
.begin();
703 if (MBBI
!= BeginMBBI
) {
704 MachineBasicBlock::iterator PrevMBBI
= prior(MBBI
);
705 while (PrevMBBI
!= BeginMBBI
&& PrevMBBI
->isDebugValue())
707 if (Mode
== ARM_AM::ia
&&
708 isMatchingDecrement(PrevMBBI
, Base
, Bytes
, 0, Pred
, PredReg
)) {
711 } else if (Mode
== ARM_AM::ib
&&
712 isMatchingDecrement(PrevMBBI
, Base
, Bytes
, 0, Pred
, PredReg
)) {
720 // Try merging with the next instruction.
721 MachineBasicBlock::iterator EndMBBI
= MBB
.end();
722 if (!DoMerge
&& MBBI
!= EndMBBI
) {
723 MachineBasicBlock::iterator NextMBBI
= llvm::next(MBBI
);
724 while (NextMBBI
!= EndMBBI
&& NextMBBI
->isDebugValue())
726 if ((Mode
== ARM_AM::ia
|| Mode
== ARM_AM::ib
) &&
727 isMatchingIncrement(NextMBBI
, Base
, Bytes
, 0, Pred
, PredReg
)) {
729 } else if ((Mode
== ARM_AM::da
|| Mode
== ARM_AM::db
) &&
730 isMatchingDecrement(NextMBBI
, Base
, Bytes
, 0, Pred
, PredReg
)) {
745 unsigned NewOpc
= getUpdatingLSMultipleOpcode(Opcode
, Mode
);
746 MachineInstrBuilder MIB
= BuildMI(MBB
, MBBI
, dl
, TII
->get(NewOpc
))
747 .addReg(Base
, getDefRegState(true)) // WB base register
748 .addReg(Base
, getKillRegState(BaseKill
))
749 .addImm(Pred
).addReg(PredReg
);
751 // Transfer the rest of operands.
752 for (unsigned OpNum
= 3, e
= MI
->getNumOperands(); OpNum
!= e
; ++OpNum
)
753 MIB
.addOperand(MI
->getOperand(OpNum
));
755 // Transfer memoperands.
756 MIB
->setMemRefs(MI
->memoperands_begin(), MI
->memoperands_end());
762 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc
,
763 ARM_AM::AddrOpc Mode
) {
770 return Mode
== ARM_AM::add
? ARM::VLDMSIA_UPD
: ARM::VLDMSDB_UPD
;
772 return Mode
== ARM_AM::add
? ARM::VLDMDIA_UPD
: ARM::VLDMDDB_UPD
;
774 return Mode
== ARM_AM::add
? ARM::VSTMSIA_UPD
: ARM::VSTMSDB_UPD
;
776 return Mode
== ARM_AM::add
? ARM::VSTMDIA_UPD
: ARM::VSTMDDB_UPD
;
779 return ARM::t2LDR_PRE
;
782 return ARM::t2STR_PRE
;
783 default: llvm_unreachable("Unhandled opcode!");
788 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc
,
789 ARM_AM::AddrOpc Mode
) {
792 return ARM::LDR_POST
;
794 return ARM::STR_POST
;
796 return Mode
== ARM_AM::add
? ARM::VLDMSIA_UPD
: ARM::VLDMSDB_UPD
;
798 return Mode
== ARM_AM::add
? ARM::VLDMDIA_UPD
: ARM::VLDMDDB_UPD
;
800 return Mode
== ARM_AM::add
? ARM::VSTMSIA_UPD
: ARM::VSTMSDB_UPD
;
802 return Mode
== ARM_AM::add
? ARM::VSTMDIA_UPD
: ARM::VSTMDDB_UPD
;
805 return ARM::t2LDR_POST
;
808 return ARM::t2STR_POST
;
809 default: llvm_unreachable("Unhandled opcode!");
814 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
815 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
816 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock
&MBB
,
817 MachineBasicBlock::iterator MBBI
,
818 const TargetInstrInfo
*TII
,
820 MachineBasicBlock::iterator
&I
) {
821 MachineInstr
*MI
= MBBI
;
822 unsigned Base
= MI
->getOperand(1).getReg();
823 bool BaseKill
= MI
->getOperand(1).isKill();
824 unsigned Bytes
= getLSMultipleTransferSize(MI
);
825 int Opcode
= MI
->getOpcode();
826 DebugLoc dl
= MI
->getDebugLoc();
827 bool isAM5
= (Opcode
== ARM::VLDRD
|| Opcode
== ARM::VLDRS
||
828 Opcode
== ARM::VSTRD
|| Opcode
== ARM::VSTRS
);
829 bool isAM2
= (Opcode
== ARM::LDRi12
|| Opcode
== ARM::STRi12
);
830 if (isi32Load(Opcode
) || isi32Store(Opcode
))
831 if (MI
->getOperand(2).getImm() != 0)
833 if (isAM5
&& ARM_AM::getAM5Offset(MI
->getOperand(2).getImm()) != 0)
836 bool isLd
= isi32Load(Opcode
) || Opcode
== ARM::VLDRS
|| Opcode
== ARM::VLDRD
;
837 // Can't do the merge if the destination register is the same as the would-be
838 // writeback register.
839 if (isLd
&& MI
->getOperand(0).getReg() == Base
)
842 unsigned PredReg
= 0;
843 ARMCC::CondCodes Pred
= llvm::getInstrPredicate(MI
, PredReg
);
844 bool DoMerge
= false;
845 ARM_AM::AddrOpc AddSub
= ARM_AM::add
;
847 // AM2 - 12 bits, thumb2 - 8 bits.
848 unsigned Limit
= isAM5
? 0 : (isAM2
? 0x1000 : 0x100);
850 // Try merging with the previous instruction.
851 MachineBasicBlock::iterator BeginMBBI
= MBB
.begin();
852 if (MBBI
!= BeginMBBI
) {
853 MachineBasicBlock::iterator PrevMBBI
= prior(MBBI
);
854 while (PrevMBBI
!= BeginMBBI
&& PrevMBBI
->isDebugValue())
856 if (isMatchingDecrement(PrevMBBI
, Base
, Bytes
, Limit
, Pred
, PredReg
)) {
858 AddSub
= ARM_AM::sub
;
860 isMatchingIncrement(PrevMBBI
, Base
, Bytes
, Limit
,Pred
,PredReg
)) {
864 NewOpc
= getPreIndexedLoadStoreOpcode(Opcode
, AddSub
);
869 // Try merging with the next instruction.
870 MachineBasicBlock::iterator EndMBBI
= MBB
.end();
871 if (!DoMerge
&& MBBI
!= EndMBBI
) {
872 MachineBasicBlock::iterator NextMBBI
= llvm::next(MBBI
);
873 while (NextMBBI
!= EndMBBI
&& NextMBBI
->isDebugValue())
876 isMatchingDecrement(NextMBBI
, Base
, Bytes
, Limit
, Pred
, PredReg
)) {
878 AddSub
= ARM_AM::sub
;
879 } else if (isMatchingIncrement(NextMBBI
, Base
, Bytes
, Limit
,Pred
,PredReg
)) {
883 NewOpc
= getPostIndexedLoadStoreOpcode(Opcode
, AddSub
);
897 Offset
= ARM_AM::getAM2Opc(AddSub
, Bytes
, ARM_AM::no_shift
);
899 Offset
= AddSub
== ARM_AM::sub
? -Bytes
: Bytes
;
902 // VLDM[SD}_UPD, VSTM[SD]_UPD
903 // (There are no base-updating versions of VLDR/VSTR instructions, but the
904 // updating load/store-multiple instructions can be used with only one
906 MachineOperand
&MO
= MI
->getOperand(0);
907 BuildMI(MBB
, MBBI
, dl
, TII
->get(NewOpc
))
908 .addReg(Base
, getDefRegState(true)) // WB base register
909 .addReg(Base
, getKillRegState(isLd
? BaseKill
: false))
910 .addImm(Pred
).addReg(PredReg
)
911 .addReg(MO
.getReg(), (isLd
? getDefRegState(true) :
912 getKillRegState(MO
.isKill())));
915 // LDR_PRE, LDR_POST,
916 BuildMI(MBB
, MBBI
, dl
, TII
->get(NewOpc
), MI
->getOperand(0).getReg())
917 .addReg(Base
, RegState::Define
)
918 .addReg(Base
).addReg(0).addImm(Offset
).addImm(Pred
).addReg(PredReg
);
920 // t2LDR_PRE, t2LDR_POST
921 BuildMI(MBB
, MBBI
, dl
, TII
->get(NewOpc
), MI
->getOperand(0).getReg())
922 .addReg(Base
, RegState::Define
)
923 .addReg(Base
).addImm(Offset
).addImm(Pred
).addReg(PredReg
);
925 MachineOperand
&MO
= MI
->getOperand(0);
928 BuildMI(MBB
, MBBI
, dl
, TII
->get(NewOpc
), Base
)
929 .addReg(MO
.getReg(), getKillRegState(MO
.isKill()))
930 .addReg(Base
).addReg(0).addImm(Offset
).addImm(Pred
).addReg(PredReg
);
932 // t2STR_PRE, t2STR_POST
933 BuildMI(MBB
, MBBI
, dl
, TII
->get(NewOpc
), Base
)
934 .addReg(MO
.getReg(), getKillRegState(MO
.isKill()))
935 .addReg(Base
).addImm(Offset
).addImm(Pred
).addReg(PredReg
);
942 /// isMemoryOp - Returns true if instruction is a memory operation that this
943 /// pass is capable of operating on.
944 static bool isMemoryOp(const MachineInstr
*MI
) {
945 // When no memory operands are present, conservatively assume unaligned,
946 // volatile, unfoldable.
947 if (!MI
->hasOneMemOperand())
950 const MachineMemOperand
*MMO
= *MI
->memoperands_begin();
952 // Don't touch volatile memory accesses - we may be changing their order.
953 if (MMO
->isVolatile())
956 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
958 if (MMO
->getAlignment() < 4)
961 // str <undef> could probably be eliminated entirely, but for now we just want
962 // to avoid making a mess of it.
963 // FIXME: Use str <undef> as a wildcard to enable better stm folding.
964 if (MI
->getNumOperands() > 0 && MI
->getOperand(0).isReg() &&
965 MI
->getOperand(0).isUndef())
968 // Likewise don't mess with references to undefined addresses.
969 if (MI
->getNumOperands() > 1 && MI
->getOperand(1).isReg() &&
970 MI
->getOperand(1).isUndef())
973 int Opcode
= MI
->getOpcode();
978 return MI
->getOperand(1).isReg();
981 return MI
->getOperand(1).isReg();
988 return MI
->getOperand(1).isReg();
993 /// AdvanceRS - Advance register scavenger to just before the earliest memory
994 /// op that is being merged.
995 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock
&MBB
, MemOpQueue
&MemOps
) {
996 MachineBasicBlock::iterator Loc
= MemOps
[0].MBBI
;
997 unsigned Position
= MemOps
[0].Position
;
998 for (unsigned i
= 1, e
= MemOps
.size(); i
!= e
; ++i
) {
999 if (MemOps
[i
].Position
< Position
) {
1000 Position
= MemOps
[i
].Position
;
1001 Loc
= MemOps
[i
].MBBI
;
1005 if (Loc
!= MBB
.begin())
1006 RS
->forward(prior(Loc
));
1009 static int getMemoryOpOffset(const MachineInstr
*MI
) {
1010 int Opcode
= MI
->getOpcode();
1011 bool isAM3
= Opcode
== ARM::LDRD
|| Opcode
== ARM::STRD
;
1012 unsigned NumOperands
= MI
->getDesc().getNumOperands();
1013 unsigned OffField
= MI
->getOperand(NumOperands
-3).getImm();
1015 if (Opcode
== ARM::t2LDRi12
|| Opcode
== ARM::t2LDRi8
||
1016 Opcode
== ARM::t2STRi12
|| Opcode
== ARM::t2STRi8
||
1017 Opcode
== ARM::t2LDRDi8
|| Opcode
== ARM::t2STRDi8
||
1018 Opcode
== ARM::LDRi12
|| Opcode
== ARM::STRi12
)
1021 int Offset
= isAM3
? ARM_AM::getAM3Offset(OffField
)
1022 : ARM_AM::getAM5Offset(OffField
) * 4;
1024 if (ARM_AM::getAM3Op(OffField
) == ARM_AM::sub
)
1027 if (ARM_AM::getAM5Op(OffField
) == ARM_AM::sub
)
1033 static void InsertLDR_STR(MachineBasicBlock
&MBB
,
1034 MachineBasicBlock::iterator
&MBBI
,
1035 int Offset
, bool isDef
,
1036 DebugLoc dl
, unsigned NewOpc
,
1037 unsigned Reg
, bool RegDeadKill
, bool RegUndef
,
1038 unsigned BaseReg
, bool BaseKill
, bool BaseUndef
,
1039 bool OffKill
, bool OffUndef
,
1040 ARMCC::CondCodes Pred
, unsigned PredReg
,
1041 const TargetInstrInfo
*TII
, bool isT2
) {
1043 MachineInstrBuilder MIB
= BuildMI(MBB
, MBBI
, MBBI
->getDebugLoc(),
1045 .addReg(Reg
, getDefRegState(true) | getDeadRegState(RegDeadKill
))
1046 .addReg(BaseReg
, getKillRegState(BaseKill
)|getUndefRegState(BaseUndef
));
1047 MIB
.addImm(Offset
).addImm(Pred
).addReg(PredReg
);
1049 MachineInstrBuilder MIB
= BuildMI(MBB
, MBBI
, MBBI
->getDebugLoc(),
1051 .addReg(Reg
, getKillRegState(RegDeadKill
) | getUndefRegState(RegUndef
))
1052 .addReg(BaseReg
, getKillRegState(BaseKill
)|getUndefRegState(BaseUndef
));
1053 MIB
.addImm(Offset
).addImm(Pred
).addReg(PredReg
);
1057 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock
&MBB
,
1058 MachineBasicBlock::iterator
&MBBI
) {
1059 MachineInstr
*MI
= &*MBBI
;
1060 unsigned Opcode
= MI
->getOpcode();
1061 if (Opcode
== ARM::LDRD
|| Opcode
== ARM::STRD
||
1062 Opcode
== ARM::t2LDRDi8
|| Opcode
== ARM::t2STRDi8
) {
1063 unsigned EvenReg
= MI
->getOperand(0).getReg();
1064 unsigned OddReg
= MI
->getOperand(1).getReg();
1065 unsigned EvenRegNum
= TRI
->getDwarfRegNum(EvenReg
, false);
1066 unsigned OddRegNum
= TRI
->getDwarfRegNum(OddReg
, false);
1067 if ((EvenRegNum
& 1) == 0 && (EvenRegNum
+ 1) == OddRegNum
)
1070 MachineBasicBlock::iterator NewBBI
= MBBI
;
1071 bool isT2
= Opcode
== ARM::t2LDRDi8
|| Opcode
== ARM::t2STRDi8
;
1072 bool isLd
= Opcode
== ARM::LDRD
|| Opcode
== ARM::t2LDRDi8
;
1073 bool EvenDeadKill
= isLd
?
1074 MI
->getOperand(0).isDead() : MI
->getOperand(0).isKill();
1075 bool EvenUndef
= MI
->getOperand(0).isUndef();
1076 bool OddDeadKill
= isLd
?
1077 MI
->getOperand(1).isDead() : MI
->getOperand(1).isKill();
1078 bool OddUndef
= MI
->getOperand(1).isUndef();
1079 const MachineOperand
&BaseOp
= MI
->getOperand(2);
1080 unsigned BaseReg
= BaseOp
.getReg();
1081 bool BaseKill
= BaseOp
.isKill();
1082 bool BaseUndef
= BaseOp
.isUndef();
1083 bool OffKill
= isT2
? false : MI
->getOperand(3).isKill();
1084 bool OffUndef
= isT2
? false : MI
->getOperand(3).isUndef();
1085 int OffImm
= getMemoryOpOffset(MI
);
1086 unsigned PredReg
= 0;
1087 ARMCC::CondCodes Pred
= llvm::getInstrPredicate(MI
, PredReg
);
1089 if (OddRegNum
> EvenRegNum
&& OffImm
== 0) {
1090 // Ascending register numbers and no offset. It's safe to change it to a
1092 unsigned NewOpc
= (isLd
)
1093 ? (isT2
? ARM::t2LDMIA
: ARM::LDMIA
)
1094 : (isT2
? ARM::t2STMIA
: ARM::STMIA
);
1096 BuildMI(MBB
, MBBI
, MBBI
->getDebugLoc(), TII
->get(NewOpc
))
1097 .addReg(BaseReg
, getKillRegState(BaseKill
))
1098 .addImm(Pred
).addReg(PredReg
)
1099 .addReg(EvenReg
, getDefRegState(isLd
) | getDeadRegState(EvenDeadKill
))
1100 .addReg(OddReg
, getDefRegState(isLd
) | getDeadRegState(OddDeadKill
));
1103 BuildMI(MBB
, MBBI
, MBBI
->getDebugLoc(), TII
->get(NewOpc
))
1104 .addReg(BaseReg
, getKillRegState(BaseKill
))
1105 .addImm(Pred
).addReg(PredReg
)
1107 getKillRegState(EvenDeadKill
) | getUndefRegState(EvenUndef
))
1109 getKillRegState(OddDeadKill
) | getUndefRegState(OddUndef
));
1112 NewBBI
= llvm::prior(MBBI
);
1114 // Split into two instructions.
1115 unsigned NewOpc
= (isLd
)
1116 ? (isT2
? (OffImm
< 0 ? ARM::t2LDRi8
: ARM::t2LDRi12
) : ARM::LDRi12
)
1117 : (isT2
? (OffImm
< 0 ? ARM::t2STRi8
: ARM::t2STRi12
) : ARM::STRi12
);
1118 DebugLoc dl
= MBBI
->getDebugLoc();
1119 // If this is a load and base register is killed, it may have been
1120 // re-defed by the load, make sure the first load does not clobber it.
1122 (BaseKill
|| OffKill
) &&
1123 (TRI
->regsOverlap(EvenReg
, BaseReg
))) {
1124 assert(!TRI
->regsOverlap(OddReg
, BaseReg
));
1125 InsertLDR_STR(MBB
, MBBI
, OffImm
+4, isLd
, dl
, NewOpc
,
1126 OddReg
, OddDeadKill
, false,
1127 BaseReg
, false, BaseUndef
, false, OffUndef
,
1128 Pred
, PredReg
, TII
, isT2
);
1129 NewBBI
= llvm::prior(MBBI
);
1130 InsertLDR_STR(MBB
, MBBI
, OffImm
, isLd
, dl
, NewOpc
,
1131 EvenReg
, EvenDeadKill
, false,
1132 BaseReg
, BaseKill
, BaseUndef
, OffKill
, OffUndef
,
1133 Pred
, PredReg
, TII
, isT2
);
1135 if (OddReg
== EvenReg
&& EvenDeadKill
) {
1136 // If the two source operands are the same, the kill marker is
1137 // probably on the first one. e.g.
1138 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
1139 EvenDeadKill
= false;
1142 InsertLDR_STR(MBB
, MBBI
, OffImm
, isLd
, dl
, NewOpc
,
1143 EvenReg
, EvenDeadKill
, EvenUndef
,
1144 BaseReg
, false, BaseUndef
, false, OffUndef
,
1145 Pred
, PredReg
, TII
, isT2
);
1146 NewBBI
= llvm::prior(MBBI
);
1147 InsertLDR_STR(MBB
, MBBI
, OffImm
+4, isLd
, dl
, NewOpc
,
1148 OddReg
, OddDeadKill
, OddUndef
,
1149 BaseReg
, BaseKill
, BaseUndef
, OffKill
, OffUndef
,
1150 Pred
, PredReg
, TII
, isT2
);
1165 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
1166 /// ops of the same base and incrementing offset into LDM / STM ops.
1167 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock
&MBB
) {
1168 unsigned NumMerges
= 0;
1169 unsigned NumMemOps
= 0;
1171 unsigned CurrBase
= 0;
1173 unsigned CurrSize
= 0;
1174 ARMCC::CondCodes CurrPred
= ARMCC::AL
;
1175 unsigned CurrPredReg
= 0;
1176 unsigned Position
= 0;
1177 SmallVector
<MachineBasicBlock::iterator
,4> Merges
;
1179 RS
->enterBasicBlock(&MBB
);
1180 MachineBasicBlock::iterator MBBI
= MBB
.begin(), E
= MBB
.end();
1182 if (FixInvalidRegPairOp(MBB
, MBBI
))
1185 bool Advance
= false;
1186 bool TryMerge
= false;
1187 bool Clobber
= false;
1189 bool isMemOp
= isMemoryOp(MBBI
);
1191 int Opcode
= MBBI
->getOpcode();
1192 unsigned Size
= getLSMultipleTransferSize(MBBI
);
1193 const MachineOperand
&MO
= MBBI
->getOperand(0);
1194 unsigned Reg
= MO
.getReg();
1195 bool isKill
= MO
.isDef() ? false : MO
.isKill();
1196 unsigned Base
= MBBI
->getOperand(1).getReg();
1197 unsigned PredReg
= 0;
1198 ARMCC::CondCodes Pred
= llvm::getInstrPredicate(MBBI
, PredReg
);
1199 int Offset
= getMemoryOpOffset(MBBI
);
1202 // r5 := ldr [r5, #4]
1203 // r6 := ldr [r5, #8]
1205 // The second ldr has effectively broken the chain even though it
1206 // looks like the later ldr(s) use the same base register. Try to
1207 // merge the ldr's so far, including this one. But don't try to
1208 // combine the following ldr(s).
1209 Clobber
= (isi32Load(Opcode
) && Base
== MBBI
->getOperand(0).getReg());
1210 if (CurrBase
== 0 && !Clobber
) {
1211 // Start of a new chain.
1216 CurrPredReg
= PredReg
;
1217 MemOps
.push_back(MemOpQueueEntry(Offset
, Reg
, isKill
, Position
, MBBI
));
1226 if (CurrOpc
== Opcode
&& CurrBase
== Base
&& CurrPred
== Pred
) {
1227 // No need to match PredReg.
1228 // Continue adding to the queue.
1229 if (Offset
> MemOps
.back().Offset
) {
1230 MemOps
.push_back(MemOpQueueEntry(Offset
, Reg
, isKill
,
1235 for (MemOpQueueIter I
= MemOps
.begin(), E
= MemOps
.end();
1237 if (Offset
< I
->Offset
) {
1238 MemOps
.insert(I
, MemOpQueueEntry(Offset
, Reg
, isKill
,
1243 } else if (Offset
== I
->Offset
) {
1244 // Collision! This can't be merged!
1253 if (MBBI
->isDebugValue()) {
1256 // Reach the end of the block, try merging the memory instructions.
1258 } else if (Advance
) {
1262 // Reach the end of the block, try merging the memory instructions.
1268 if (NumMemOps
> 1) {
1269 // Try to find a free register to use as a new base in case it's needed.
1270 // First advance to the instruction just before the start of the chain.
1271 AdvanceRS(MBB
, MemOps
);
1272 // Find a scratch register.
1273 unsigned Scratch
= RS
->FindUnusedReg(ARM::GPRRegisterClass
);
1274 // Process the load / store instructions.
1275 RS
->forward(prior(MBBI
));
1279 MergeLDR_STR(MBB
, 0, CurrBase
, CurrOpc
, CurrSize
,
1280 CurrPred
, CurrPredReg
, Scratch
, MemOps
, Merges
);
1282 // Try folding preceding/trailing base inc/dec into the generated
1284 for (unsigned i
= 0, e
= Merges
.size(); i
< e
; ++i
)
1285 if (MergeBaseUpdateLSMultiple(MBB
, Merges
[i
], Advance
, MBBI
))
1287 NumMerges
+= Merges
.size();
1289 // Try folding preceding/trailing base inc/dec into those load/store
1290 // that were not merged to form LDM/STM ops.
1291 for (unsigned i
= 0; i
!= NumMemOps
; ++i
)
1292 if (!MemOps
[i
].Merged
)
1293 if (MergeBaseUpdateLoadStore(MBB
, MemOps
[i
].MBBI
, TII
,Advance
,MBBI
))
1296 // RS may be pointing to an instruction that's deleted.
1297 RS
->skipTo(prior(MBBI
));
1298 } else if (NumMemOps
== 1) {
1299 // Try folding preceding/trailing base inc/dec into the single
1301 if (MergeBaseUpdateLoadStore(MBB
, MemOps
[0].MBBI
, TII
, Advance
, MBBI
)) {
1303 RS
->forward(prior(MBBI
));
1310 CurrPred
= ARMCC::AL
;
1317 // If iterator hasn't been advanced and this is not a memory op, skip it.
1318 // It can't start a new chain anyway.
1319 if (!Advance
&& !isMemOp
&& MBBI
!= E
) {
1325 return NumMerges
> 0;
1328 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops
1329 /// ("bx lr" and "mov pc, lr") into the preceding stack restore so it
1330 /// directly restore the value of LR into pc.
1331 /// ldmfd sp!, {..., lr}
1334 /// ldmfd sp!, {..., lr}
1337 /// ldmfd sp!, {..., pc}
1338 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock
&MBB
) {
1339 if (MBB
.empty()) return false;
1341 MachineBasicBlock::iterator MBBI
= MBB
.getLastNonDebugInstr();
1342 if (MBBI
!= MBB
.begin() &&
1343 (MBBI
->getOpcode() == ARM::BX_RET
||
1344 MBBI
->getOpcode() == ARM::tBX_RET
||
1345 MBBI
->getOpcode() == ARM::MOVPCLR
)) {
1346 MachineInstr
*PrevMI
= prior(MBBI
);
1347 unsigned Opcode
= PrevMI
->getOpcode();
1348 if (Opcode
== ARM::LDMIA_UPD
|| Opcode
== ARM::LDMDA_UPD
||
1349 Opcode
== ARM::LDMDB_UPD
|| Opcode
== ARM::LDMIB_UPD
||
1350 Opcode
== ARM::t2LDMIA_UPD
|| Opcode
== ARM::t2LDMDB_UPD
) {
1351 MachineOperand
&MO
= PrevMI
->getOperand(PrevMI
->getNumOperands()-1);
1352 if (MO
.getReg() != ARM::LR
)
1354 unsigned NewOpc
= (isThumb2
? ARM::t2LDMIA_RET
: ARM::LDMIA_RET
);
1355 assert(((isThumb2
&& Opcode
== ARM::t2LDMIA_UPD
) ||
1356 Opcode
== ARM::LDMIA_UPD
) && "Unsupported multiple load-return!");
1357 PrevMI
->setDesc(TII
->get(NewOpc
));
1359 PrevMI
->copyImplicitOps(&*MBBI
);
1367 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction
&Fn
) {
1368 const TargetMachine
&TM
= Fn
.getTarget();
1369 AFI
= Fn
.getInfo
<ARMFunctionInfo
>();
1370 TII
= TM
.getInstrInfo();
1371 TRI
= TM
.getRegisterInfo();
1372 RS
= new RegScavenger();
1373 isThumb2
= AFI
->isThumb2Function();
1375 bool Modified
= false;
1376 for (MachineFunction::iterator MFI
= Fn
.begin(), E
= Fn
.end(); MFI
!= E
;
1378 MachineBasicBlock
&MBB
= *MFI
;
1379 Modified
|= LoadStoreMultipleOpti(MBB
);
1380 if (TM
.getSubtarget
<ARMSubtarget
>().hasV5TOps())
1381 Modified
|= MergeReturnIntoLDM(MBB
);
1389 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
1390 /// load / stores from consecutive locations close to make it more
1391 /// likely they will be combined later.
1394 struct ARMPreAllocLoadStoreOpt
: public MachineFunctionPass
{
1396 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID
) {}
1398 const TargetData
*TD
;
1399 const TargetInstrInfo
*TII
;
1400 const TargetRegisterInfo
*TRI
;
1401 const ARMSubtarget
*STI
;
1402 MachineRegisterInfo
*MRI
;
1403 MachineFunction
*MF
;
1405 virtual bool runOnMachineFunction(MachineFunction
&Fn
);
1407 virtual const char *getPassName() const {
1408 return "ARM pre- register allocation load / store optimization pass";
1412 bool CanFormLdStDWord(MachineInstr
*Op0
, MachineInstr
*Op1
, DebugLoc
&dl
,
1413 unsigned &NewOpc
, unsigned &EvenReg
,
1414 unsigned &OddReg
, unsigned &BaseReg
,
1416 unsigned &PredReg
, ARMCC::CondCodes
&Pred
,
1418 bool RescheduleOps(MachineBasicBlock
*MBB
,
1419 SmallVector
<MachineInstr
*, 4> &Ops
,
1420 unsigned Base
, bool isLd
,
1421 DenseMap
<MachineInstr
*, unsigned> &MI2LocMap
);
1422 bool RescheduleLoadStoreInstrs(MachineBasicBlock
*MBB
);
1424 char ARMPreAllocLoadStoreOpt::ID
= 0;
1427 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction
&Fn
) {
1428 TD
= Fn
.getTarget().getTargetData();
1429 TII
= Fn
.getTarget().getInstrInfo();
1430 TRI
= Fn
.getTarget().getRegisterInfo();
1431 STI
= &Fn
.getTarget().getSubtarget
<ARMSubtarget
>();
1432 MRI
= &Fn
.getRegInfo();
1435 bool Modified
= false;
1436 for (MachineFunction::iterator MFI
= Fn
.begin(), E
= Fn
.end(); MFI
!= E
;
1438 Modified
|= RescheduleLoadStoreInstrs(MFI
);
1443 static bool IsSafeAndProfitableToMove(bool isLd
, unsigned Base
,
1444 MachineBasicBlock::iterator I
,
1445 MachineBasicBlock::iterator E
,
1446 SmallPtrSet
<MachineInstr
*, 4> &MemOps
,
1447 SmallSet
<unsigned, 4> &MemRegs
,
1448 const TargetRegisterInfo
*TRI
) {
1449 // Are there stores / loads / calls between them?
1450 // FIXME: This is overly conservative. We should make use of alias information
1452 SmallSet
<unsigned, 4> AddedRegPressure
;
1454 if (I
->isDebugValue() || MemOps
.count(&*I
))
1456 const MCInstrDesc
&MCID
= I
->getDesc();
1457 if (MCID
.isCall() || MCID
.isTerminator() || I
->hasUnmodeledSideEffects())
1459 if (isLd
&& MCID
.mayStore())
1464 // It's not safe to move the first 'str' down.
1467 // str r4, [r0, #+4]
1468 if (MCID
.mayStore())
1471 for (unsigned j
= 0, NumOps
= I
->getNumOperands(); j
!= NumOps
; ++j
) {
1472 MachineOperand
&MO
= I
->getOperand(j
);
1475 unsigned Reg
= MO
.getReg();
1476 if (MO
.isDef() && TRI
->regsOverlap(Reg
, Base
))
1478 if (Reg
!= Base
&& !MemRegs
.count(Reg
))
1479 AddedRegPressure
.insert(Reg
);
1483 // Estimate register pressure increase due to the transformation.
1484 if (MemRegs
.size() <= 4)
1485 // Ok if we are moving small number of instructions.
1487 return AddedRegPressure
.size() <= MemRegs
.size() * 2;
1491 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr
*Op0
, MachineInstr
*Op1
,
1493 unsigned &NewOpc
, unsigned &EvenReg
,
1494 unsigned &OddReg
, unsigned &BaseReg
,
1495 int &Offset
, unsigned &PredReg
,
1496 ARMCC::CondCodes
&Pred
,
1498 // Make sure we're allowed to generate LDRD/STRD.
1499 if (!STI
->hasV5TEOps())
1502 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
1504 unsigned Opcode
= Op0
->getOpcode();
1505 if (Opcode
== ARM::LDRi12
)
1507 else if (Opcode
== ARM::STRi12
)
1509 else if (Opcode
== ARM::t2LDRi8
|| Opcode
== ARM::t2LDRi12
) {
1510 NewOpc
= ARM::t2LDRDi8
;
1513 } else if (Opcode
== ARM::t2STRi8
|| Opcode
== ARM::t2STRi12
) {
1514 NewOpc
= ARM::t2STRDi8
;
1520 // Make sure the base address satisfies i64 ld / st alignment requirement.
1521 if (!Op0
->hasOneMemOperand() ||
1522 !(*Op0
->memoperands_begin())->getValue() ||
1523 (*Op0
->memoperands_begin())->isVolatile())
1526 unsigned Align
= (*Op0
->memoperands_begin())->getAlignment();
1527 const Function
*Func
= MF
->getFunction();
1528 unsigned ReqAlign
= STI
->hasV6Ops()
1529 ? TD
->getABITypeAlignment(Type::getInt64Ty(Func
->getContext()))
1530 : 8; // Pre-v6 need 8-byte align
1531 if (Align
< ReqAlign
)
1534 // Then make sure the immediate offset fits.
1535 int OffImm
= getMemoryOpOffset(Op0
);
1537 int Limit
= (1 << 8) * Scale
;
1538 if (OffImm
>= Limit
|| (OffImm
<= -Limit
) || (OffImm
& (Scale
-1)))
1542 ARM_AM::AddrOpc AddSub
= ARM_AM::add
;
1544 AddSub
= ARM_AM::sub
;
1547 int Limit
= (1 << 8) * Scale
;
1548 if (OffImm
>= Limit
|| (OffImm
& (Scale
-1)))
1550 Offset
= ARM_AM::getAM3Opc(AddSub
, OffImm
);
1552 EvenReg
= Op0
->getOperand(0).getReg();
1553 OddReg
= Op1
->getOperand(0).getReg();
1554 if (EvenReg
== OddReg
)
1556 BaseReg
= Op0
->getOperand(1).getReg();
1557 Pred
= llvm::getInstrPredicate(Op0
, PredReg
);
1558 dl
= Op0
->getDebugLoc();
1563 struct OffsetCompare
{
1564 bool operator()(const MachineInstr
*LHS
, const MachineInstr
*RHS
) const {
1565 int LOffset
= getMemoryOpOffset(LHS
);
1566 int ROffset
= getMemoryOpOffset(RHS
);
1567 assert(LHS
== RHS
|| LOffset
!= ROffset
);
1568 return LOffset
> ROffset
;
1573 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock
*MBB
,
1574 SmallVector
<MachineInstr
*, 4> &Ops
,
1575 unsigned Base
, bool isLd
,
1576 DenseMap
<MachineInstr
*, unsigned> &MI2LocMap
) {
1577 bool RetVal
= false;
1579 // Sort by offset (in reverse order).
1580 std::sort(Ops
.begin(), Ops
.end(), OffsetCompare());
1582 // The loads / stores of the same base are in order. Scan them from first to
1583 // last and check for the following:
1584 // 1. Any def of base.
1586 while (Ops
.size() > 1) {
1587 unsigned FirstLoc
= ~0U;
1588 unsigned LastLoc
= 0;
1589 MachineInstr
*FirstOp
= 0;
1590 MachineInstr
*LastOp
= 0;
1592 unsigned LastOpcode
= 0;
1593 unsigned LastBytes
= 0;
1594 unsigned NumMove
= 0;
1595 for (int i
= Ops
.size() - 1; i
>= 0; --i
) {
1596 MachineInstr
*Op
= Ops
[i
];
1597 unsigned Loc
= MI2LocMap
[Op
];
1598 if (Loc
<= FirstLoc
) {
1602 if (Loc
>= LastLoc
) {
1607 unsigned Opcode
= Op
->getOpcode();
1608 if (LastOpcode
&& Opcode
!= LastOpcode
)
1611 int Offset
= getMemoryOpOffset(Op
);
1612 unsigned Bytes
= getLSMultipleTransferSize(Op
);
1614 if (Bytes
!= LastBytes
|| Offset
!= (LastOffset
+ (int)Bytes
))
1617 LastOffset
= Offset
;
1619 LastOpcode
= Opcode
;
1620 if (++NumMove
== 8) // FIXME: Tune this limit.
1627 SmallPtrSet
<MachineInstr
*, 4> MemOps
;
1628 SmallSet
<unsigned, 4> MemRegs
;
1629 for (int i
= NumMove
-1; i
>= 0; --i
) {
1630 MemOps
.insert(Ops
[i
]);
1631 MemRegs
.insert(Ops
[i
]->getOperand(0).getReg());
1634 // Be conservative, if the instructions are too far apart, don't
1635 // move them. We want to limit the increase of register pressure.
1636 bool DoMove
= (LastLoc
- FirstLoc
) <= NumMove
*4; // FIXME: Tune this.
1638 DoMove
= IsSafeAndProfitableToMove(isLd
, Base
, FirstOp
, LastOp
,
1639 MemOps
, MemRegs
, TRI
);
1641 for (unsigned i
= 0; i
!= NumMove
; ++i
)
1644 // This is the new location for the loads / stores.
1645 MachineBasicBlock::iterator InsertPos
= isLd
? FirstOp
: LastOp
;
1646 while (InsertPos
!= MBB
->end()
1647 && (MemOps
.count(InsertPos
) || InsertPos
->isDebugValue()))
1650 // If we are moving a pair of loads / stores, see if it makes sense
1651 // to try to allocate a pair of registers that can form register pairs.
1652 MachineInstr
*Op0
= Ops
.back();
1653 MachineInstr
*Op1
= Ops
[Ops
.size()-2];
1654 unsigned EvenReg
= 0, OddReg
= 0;
1655 unsigned BaseReg
= 0, PredReg
= 0;
1656 ARMCC::CondCodes Pred
= ARMCC::AL
;
1658 unsigned NewOpc
= 0;
1661 if (NumMove
== 2 && CanFormLdStDWord(Op0
, Op1
, dl
, NewOpc
,
1662 EvenReg
, OddReg
, BaseReg
,
1663 Offset
, PredReg
, Pred
, isT2
)) {
1667 const MCInstrDesc
&MCID
= TII
->get(NewOpc
);
1668 const TargetRegisterClass
*TRC
= TII
->getRegClass(MCID
, 0, TRI
);
1669 MRI
->constrainRegClass(EvenReg
, TRC
);
1670 MRI
->constrainRegClass(OddReg
, TRC
);
1672 // Form the pair instruction.
1674 MachineInstrBuilder MIB
= BuildMI(*MBB
, InsertPos
, dl
, MCID
)
1675 .addReg(EvenReg
, RegState::Define
)
1676 .addReg(OddReg
, RegState::Define
)
1678 // FIXME: We're converting from LDRi12 to an insn that still
1679 // uses addrmode2, so we need an explicit offset reg. It should
1680 // always by reg0 since we're transforming LDRi12s.
1683 MIB
.addImm(Offset
).addImm(Pred
).addReg(PredReg
);
1686 MachineInstrBuilder MIB
= BuildMI(*MBB
, InsertPos
, dl
, MCID
)
1690 // FIXME: We're converting from LDRi12 to an insn that still
1691 // uses addrmode2, so we need an explicit offset reg. It should
1692 // always by reg0 since we're transforming STRi12s.
1695 MIB
.addImm(Offset
).addImm(Pred
).addReg(PredReg
);
1701 // Add register allocation hints to form register pairs.
1702 MRI
->setRegAllocationHint(EvenReg
, ARMRI::RegPairEven
, OddReg
);
1703 MRI
->setRegAllocationHint(OddReg
, ARMRI::RegPairOdd
, EvenReg
);
1705 for (unsigned i
= 0; i
!= NumMove
; ++i
) {
1706 MachineInstr
*Op
= Ops
.back();
1708 MBB
->splice(InsertPos
, MBB
, Op
);
1712 NumLdStMoved
+= NumMove
;
1722 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock
*MBB
) {
1723 bool RetVal
= false;
1725 DenseMap
<MachineInstr
*, unsigned> MI2LocMap
;
1726 DenseMap
<unsigned, SmallVector
<MachineInstr
*, 4> > Base2LdsMap
;
1727 DenseMap
<unsigned, SmallVector
<MachineInstr
*, 4> > Base2StsMap
;
1728 SmallVector
<unsigned, 4> LdBases
;
1729 SmallVector
<unsigned, 4> StBases
;
1732 MachineBasicBlock::iterator MBBI
= MBB
->begin();
1733 MachineBasicBlock::iterator E
= MBB
->end();
1735 for (; MBBI
!= E
; ++MBBI
) {
1736 MachineInstr
*MI
= MBBI
;
1737 const MCInstrDesc
&MCID
= MI
->getDesc();
1738 if (MCID
.isCall() || MCID
.isTerminator()) {
1739 // Stop at barriers.
1744 if (!MI
->isDebugValue())
1745 MI2LocMap
[MI
] = ++Loc
;
1747 if (!isMemoryOp(MI
))
1749 unsigned PredReg
= 0;
1750 if (llvm::getInstrPredicate(MI
, PredReg
) != ARMCC::AL
)
1753 int Opc
= MI
->getOpcode();
1754 bool isLd
= isi32Load(Opc
) || Opc
== ARM::VLDRS
|| Opc
== ARM::VLDRD
;
1755 unsigned Base
= MI
->getOperand(1).getReg();
1756 int Offset
= getMemoryOpOffset(MI
);
1758 bool StopHere
= false;
1760 DenseMap
<unsigned, SmallVector
<MachineInstr
*, 4> >::iterator BI
=
1761 Base2LdsMap
.find(Base
);
1762 if (BI
!= Base2LdsMap
.end()) {
1763 for (unsigned i
= 0, e
= BI
->second
.size(); i
!= e
; ++i
) {
1764 if (Offset
== getMemoryOpOffset(BI
->second
[i
])) {
1770 BI
->second
.push_back(MI
);
1772 SmallVector
<MachineInstr
*, 4> MIs
;
1774 Base2LdsMap
[Base
] = MIs
;
1775 LdBases
.push_back(Base
);
1778 DenseMap
<unsigned, SmallVector
<MachineInstr
*, 4> >::iterator BI
=
1779 Base2StsMap
.find(Base
);
1780 if (BI
!= Base2StsMap
.end()) {
1781 for (unsigned i
= 0, e
= BI
->second
.size(); i
!= e
; ++i
) {
1782 if (Offset
== getMemoryOpOffset(BI
->second
[i
])) {
1788 BI
->second
.push_back(MI
);
1790 SmallVector
<MachineInstr
*, 4> MIs
;
1792 Base2StsMap
[Base
] = MIs
;
1793 StBases
.push_back(Base
);
1798 // Found a duplicate (a base+offset combination that's seen earlier).
1805 // Re-schedule loads.
1806 for (unsigned i
= 0, e
= LdBases
.size(); i
!= e
; ++i
) {
1807 unsigned Base
= LdBases
[i
];
1808 SmallVector
<MachineInstr
*, 4> &Lds
= Base2LdsMap
[Base
];
1810 RetVal
|= RescheduleOps(MBB
, Lds
, Base
, true, MI2LocMap
);
1813 // Re-schedule stores.
1814 for (unsigned i
= 0, e
= StBases
.size(); i
!= e
; ++i
) {
1815 unsigned Base
= StBases
[i
];
1816 SmallVector
<MachineInstr
*, 4> &Sts
= Base2StsMap
[Base
];
1818 RetVal
|= RescheduleOps(MBB
, Sts
, Base
, false, MI2LocMap
);
1822 Base2LdsMap
.clear();
1823 Base2StsMap
.clear();
1833 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store
1834 /// optimization pass.
1835 FunctionPass
*llvm::createARMLoadStoreOptimizationPass(bool PreAlloc
) {
1837 return new ARMPreAllocLoadStoreOpt();
1838 return new ARMLoadStoreOpt();