1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains a pass that performs load / store related peephole
11 // optimizations. This pass should be run after register allocation.
13 //===----------------------------------------------------------------------===//
15 #define DEBUG_TYPE "arm-ldst-opt"
17 #include "ARMAddressingModes.h"
18 #include "ARMBaseInstrInfo.h"
19 #include "ARMMachineFunctionInfo.h"
20 #include "ARMRegisterInfo.h"
21 #include "llvm/DerivedTypes.h"
22 #include "llvm/Function.h"
23 #include "llvm/CodeGen/MachineBasicBlock.h"
24 #include "llvm/CodeGen/MachineFunctionPass.h"
25 #include "llvm/CodeGen/MachineInstr.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineRegisterInfo.h"
28 #include "llvm/CodeGen/RegisterScavenging.h"
29 #include "llvm/Target/TargetData.h"
30 #include "llvm/Target/TargetInstrInfo.h"
31 #include "llvm/Target/TargetMachine.h"
32 #include "llvm/Target/TargetRegisterInfo.h"
33 #include "llvm/Support/ErrorHandling.h"
34 #include "llvm/ADT/DenseMap.h"
35 #include "llvm/ADT/STLExtras.h"
36 #include "llvm/ADT/SmallPtrSet.h"
37 #include "llvm/ADT/SmallSet.h"
38 #include "llvm/ADT/SmallVector.h"
39 #include "llvm/ADT/Statistic.h"
42 STATISTIC(NumLDMGened
, "Number of ldm instructions generated");
43 STATISTIC(NumSTMGened
, "Number of stm instructions generated");
44 STATISTIC(NumVLDMGened
, "Number of vldm instructions generated");
45 STATISTIC(NumVSTMGened
, "Number of vstm instructions generated");
46 STATISTIC(NumLdStMoved
, "Number of load / store instructions moved");
47 STATISTIC(NumLDRDFormed
,"Number of ldrd created before allocation");
48 STATISTIC(NumSTRDFormed
,"Number of strd created before allocation");
49 STATISTIC(NumLDRD2LDM
, "Number of ldrd instructions turned back into ldm");
50 STATISTIC(NumSTRD2STM
, "Number of strd instructions turned back into stm");
51 STATISTIC(NumLDRD2LDR
, "Number of ldrd instructions turned back into ldr's");
52 STATISTIC(NumSTRD2STR
, "Number of strd instructions turned back into str's");
54 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
55 /// load / store instructions to form ldm / stm instructions.
58 struct ARMLoadStoreOpt
: public MachineFunctionPass
{
60 ARMLoadStoreOpt() : MachineFunctionPass(ID
) {}
62 const TargetInstrInfo
*TII
;
63 const TargetRegisterInfo
*TRI
;
68 virtual bool runOnMachineFunction(MachineFunction
&Fn
);
70 virtual const char *getPassName() const {
71 return "ARM load / store optimization pass";
75 struct MemOpQueueEntry
{
80 MachineBasicBlock::iterator MBBI
;
82 MemOpQueueEntry(int o
, unsigned r
, bool k
, unsigned p
,
83 MachineBasicBlock::iterator i
)
84 : Offset(o
), Reg(r
), isKill(k
), Position(p
), MBBI(i
), Merged(false) {}
86 typedef SmallVector
<MemOpQueueEntry
,8> MemOpQueue
;
87 typedef MemOpQueue::iterator MemOpQueueIter
;
89 bool MergeOps(MachineBasicBlock
&MBB
, MachineBasicBlock::iterator MBBI
,
90 int Offset
, unsigned Base
, bool BaseKill
, int Opcode
,
91 ARMCC::CondCodes Pred
, unsigned PredReg
, unsigned Scratch
,
92 DebugLoc dl
, SmallVector
<std::pair
<unsigned, bool>, 8> &Regs
);
93 void MergeOpsUpdate(MachineBasicBlock
&MBB
,
102 ARMCC::CondCodes Pred
,
106 SmallVector
<MachineBasicBlock::iterator
, 4> &Merges
);
107 void MergeLDR_STR(MachineBasicBlock
&MBB
, unsigned SIndex
, unsigned Base
,
108 int Opcode
, unsigned Size
,
109 ARMCC::CondCodes Pred
, unsigned PredReg
,
110 unsigned Scratch
, MemOpQueue
&MemOps
,
111 SmallVector
<MachineBasicBlock::iterator
, 4> &Merges
);
113 void AdvanceRS(MachineBasicBlock
&MBB
, MemOpQueue
&MemOps
);
114 bool FixInvalidRegPairOp(MachineBasicBlock
&MBB
,
115 MachineBasicBlock::iterator
&MBBI
);
116 bool MergeBaseUpdateLoadStore(MachineBasicBlock
&MBB
,
117 MachineBasicBlock::iterator MBBI
,
118 const TargetInstrInfo
*TII
,
120 MachineBasicBlock::iterator
&I
);
121 bool MergeBaseUpdateLSMultiple(MachineBasicBlock
&MBB
,
122 MachineBasicBlock::iterator MBBI
,
124 MachineBasicBlock::iterator
&I
);
125 bool LoadStoreMultipleOpti(MachineBasicBlock
&MBB
);
126 bool MergeReturnIntoLDM(MachineBasicBlock
&MBB
);
128 char ARMLoadStoreOpt::ID
= 0;
131 static int getLoadStoreMultipleOpcode(int Opcode
) {
159 default: llvm_unreachable("Unhandled opcode!");
164 static bool isT2i32Load(unsigned Opc
) {
165 return Opc
== ARM::t2LDRi12
|| Opc
== ARM::t2LDRi8
;
168 static bool isi32Load(unsigned Opc
) {
169 return Opc
== ARM::LDRi12
|| isT2i32Load(Opc
);
172 static bool isT2i32Store(unsigned Opc
) {
173 return Opc
== ARM::t2STRi12
|| Opc
== ARM::t2STRi8
;
176 static bool isi32Store(unsigned Opc
) {
177 return Opc
== ARM::STRi12
|| isT2i32Store(Opc
);
180 /// MergeOps - Create and insert a LDM or STM with Base as base register and
181 /// registers in Regs as the register operands that would be loaded / stored.
182 /// It returns true if the transformation is done.
184 ARMLoadStoreOpt::MergeOps(MachineBasicBlock
&MBB
,
185 MachineBasicBlock::iterator MBBI
,
186 int Offset
, unsigned Base
, bool BaseKill
,
187 int Opcode
, ARMCC::CondCodes Pred
,
188 unsigned PredReg
, unsigned Scratch
, DebugLoc dl
,
189 SmallVector
<std::pair
<unsigned, bool>, 8> &Regs
) {
190 // Only a single register to load / store. Don't bother.
191 unsigned NumRegs
= Regs
.size();
195 ARM_AM::AMSubMode Mode
= ARM_AM::ia
;
196 // VFP and Thumb2 do not support IB or DA modes.
197 bool isNotVFP
= isi32Load(Opcode
) || isi32Store(Opcode
);
198 bool haveIBAndDA
= isNotVFP
&& !isThumb2
;
199 if (Offset
== 4 && haveIBAndDA
)
201 else if (Offset
== -4 * (int)NumRegs
+ 4 && haveIBAndDA
)
203 else if (Offset
== -4 * (int)NumRegs
&& isNotVFP
)
204 // VLDM/VSTM do not support DB mode without also updating the base reg.
206 else if (Offset
!= 0) {
207 // If starting offset isn't zero, insert a MI to materialize a new base.
208 // But only do so if it is cost effective, i.e. merging more than two
214 if (isi32Load(Opcode
))
215 // If it is a load, then just use one of the destination register to
216 // use as the new base.
217 NewBase
= Regs
[NumRegs
-1].first
;
219 // Use the scratch register to use as a new base.
224 int BaseOpc
= !isThumb2
226 : ((Base
== ARM::SP
) ? ARM::t2ADDrSPi
: ARM::t2ADDri
);
230 : ((Base
== ARM::SP
) ? ARM::t2SUBrSPi
: ARM::t2SUBri
);
233 int ImmedOffset
= isThumb2
234 ? ARM_AM::getT2SOImmVal(Offset
) : ARM_AM::getSOImmVal(Offset
);
235 if (ImmedOffset
== -1)
236 // FIXME: Try t2ADDri12 or t2SUBri12?
237 return false; // Probably not worth it then.
239 BuildMI(MBB
, MBBI
, dl
, TII
->get(BaseOpc
), NewBase
)
240 .addReg(Base
, getKillRegState(BaseKill
)).addImm(Offset
)
241 .addImm(Pred
).addReg(PredReg
).addReg(0);
243 BaseKill
= true; // New base is always killed right its use.
246 bool isDef
= (isi32Load(Opcode
) || Opcode
== ARM::VLDRS
||
247 Opcode
== ARM::VLDRD
);
248 Opcode
= getLoadStoreMultipleOpcode(Opcode
);
249 MachineInstrBuilder MIB
= BuildMI(MBB
, MBBI
, dl
, TII
->get(Opcode
))
250 .addReg(Base
, getKillRegState(BaseKill
))
251 .addImm(ARM_AM::getAM4ModeImm(Mode
)).addImm(Pred
).addReg(PredReg
);
252 for (unsigned i
= 0; i
!= NumRegs
; ++i
)
253 MIB
= MIB
.addReg(Regs
[i
].first
, getDefRegState(isDef
)
254 | getKillRegState(Regs
[i
].second
));
259 // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
261 void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock
&MBB
,
263 unsigned memOpsBegin
, unsigned memOpsEnd
,
264 unsigned insertAfter
, int Offset
,
265 unsigned Base
, bool BaseKill
,
267 ARMCC::CondCodes Pred
, unsigned PredReg
,
270 SmallVector
<MachineBasicBlock::iterator
, 4> &Merges
) {
271 // First calculate which of the registers should be killed by the merged
273 const unsigned insertPos
= memOps
[insertAfter
].Position
;
275 SmallSet
<unsigned, 4> UnavailRegs
;
276 SmallSet
<unsigned, 4> KilledRegs
;
277 DenseMap
<unsigned, unsigned> Killer
;
278 for (unsigned i
= 0; i
< memOpsBegin
; ++i
) {
279 if (memOps
[i
].Position
< insertPos
&& memOps
[i
].isKill
) {
280 unsigned Reg
= memOps
[i
].Reg
;
281 if (memOps
[i
].Merged
)
282 UnavailRegs
.insert(Reg
);
284 KilledRegs
.insert(Reg
);
289 for (unsigned i
= memOpsEnd
, e
= memOps
.size(); i
!= e
; ++i
) {
290 if (memOps
[i
].Position
< insertPos
&& memOps
[i
].isKill
) {
291 unsigned Reg
= memOps
[i
].Reg
;
292 KilledRegs
.insert(Reg
);
297 SmallVector
<std::pair
<unsigned, bool>, 8> Regs
;
298 for (unsigned i
= memOpsBegin
; i
< memOpsEnd
; ++i
) {
299 unsigned Reg
= memOps
[i
].Reg
;
300 if (UnavailRegs
.count(Reg
))
301 // Register is killed before and it's not easy / possible to update the
302 // kill marker on already merged instructions. Abort.
305 // If we are inserting the merged operation after an unmerged operation that
306 // uses the same register, make sure to transfer any kill flag.
307 bool isKill
= memOps
[i
].isKill
|| KilledRegs
.count(Reg
);
308 Regs
.push_back(std::make_pair(Reg
, isKill
));
311 // Try to do the merge.
312 MachineBasicBlock::iterator Loc
= memOps
[insertAfter
].MBBI
;
314 if (!MergeOps(MBB
, Loc
, Offset
, Base
, BaseKill
, Opcode
,
315 Pred
, PredReg
, Scratch
, dl
, Regs
))
318 // Merge succeeded, update records.
319 Merges
.push_back(prior(Loc
));
320 for (unsigned i
= memOpsBegin
; i
< memOpsEnd
; ++i
) {
321 // Remove kill flags from any unmerged memops that come before insertPos.
322 if (Regs
[i
-memOpsBegin
].second
) {
323 unsigned Reg
= Regs
[i
-memOpsBegin
].first
;
324 if (KilledRegs
.count(Reg
)) {
325 unsigned j
= Killer
[Reg
];
326 memOps
[j
].MBBI
->getOperand(0).setIsKill(false);
327 memOps
[j
].isKill
= false;
330 MBB
.erase(memOps
[i
].MBBI
);
331 memOps
[i
].Merged
= true;
335 /// MergeLDR_STR - Merge a number of load / store instructions into one or more
336 /// load / store multiple instructions.
338 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock
&MBB
, unsigned SIndex
,
339 unsigned Base
, int Opcode
, unsigned Size
,
340 ARMCC::CondCodes Pred
, unsigned PredReg
,
341 unsigned Scratch
, MemOpQueue
&MemOps
,
342 SmallVector
<MachineBasicBlock::iterator
, 4> &Merges
) {
343 bool isNotVFP
= isi32Load(Opcode
) || isi32Store(Opcode
);
344 int Offset
= MemOps
[SIndex
].Offset
;
345 int SOffset
= Offset
;
346 unsigned insertAfter
= SIndex
;
347 MachineBasicBlock::iterator Loc
= MemOps
[SIndex
].MBBI
;
348 DebugLoc dl
= Loc
->getDebugLoc();
349 const MachineOperand
&PMO
= Loc
->getOperand(0);
350 unsigned PReg
= PMO
.getReg();
351 unsigned PRegNum
= PMO
.isUndef() ? UINT_MAX
352 : getARMRegisterNumbering(PReg
);
355 for (unsigned i
= SIndex
+1, e
= MemOps
.size(); i
!= e
; ++i
) {
356 int NewOffset
= MemOps
[i
].Offset
;
357 const MachineOperand
&MO
= MemOps
[i
].MBBI
->getOperand(0);
358 unsigned Reg
= MO
.getReg();
359 unsigned RegNum
= MO
.isUndef() ? UINT_MAX
360 : getARMRegisterNumbering(Reg
);
361 // Register numbers must be in ascending order. For VFP, the registers
362 // must also be consecutive and there is a limit of 16 double-word
363 // registers per instruction.
364 if (Reg
!= ARM::SP
&&
365 NewOffset
== Offset
+ (int)Size
&&
366 ((isNotVFP
&& RegNum
> PRegNum
)
367 || ((Size
< 8 || Count
< 16) && RegNum
== PRegNum
+1))) {
372 // Can't merge this in. Try merge the earlier ones first.
373 MergeOpsUpdate(MBB
, MemOps
, SIndex
, i
, insertAfter
, SOffset
,
374 Base
, false, Opcode
, Pred
, PredReg
, Scratch
, dl
, Merges
);
375 MergeLDR_STR(MBB
, i
, Base
, Opcode
, Size
, Pred
, PredReg
, Scratch
,
380 if (MemOps
[i
].Position
> MemOps
[insertAfter
].Position
)
384 bool BaseKill
= Loc
->findRegisterUseOperandIdx(Base
, true) != -1;
385 MergeOpsUpdate(MBB
, MemOps
, SIndex
, MemOps
.size(), insertAfter
, SOffset
,
386 Base
, BaseKill
, Opcode
, Pred
, PredReg
, Scratch
, dl
, Merges
);
390 static inline bool isMatchingDecrement(MachineInstr
*MI
, unsigned Base
,
391 unsigned Bytes
, unsigned Limit
,
392 ARMCC::CondCodes Pred
, unsigned PredReg
){
393 unsigned MyPredReg
= 0;
396 if (MI
->getOpcode() != ARM::t2SUBri
&&
397 MI
->getOpcode() != ARM::t2SUBrSPi
&&
398 MI
->getOpcode() != ARM::t2SUBrSPi12
&&
399 MI
->getOpcode() != ARM::tSUBspi
&&
400 MI
->getOpcode() != ARM::SUBri
)
403 // Make sure the offset fits in 8 bits.
404 if (Bytes
== 0 || (Limit
&& Bytes
>= Limit
))
407 unsigned Scale
= (MI
->getOpcode() == ARM::tSUBspi
) ? 4 : 1; // FIXME
408 return (MI
->getOperand(0).getReg() == Base
&&
409 MI
->getOperand(1).getReg() == Base
&&
410 (MI
->getOperand(2).getImm()*Scale
) == Bytes
&&
411 llvm::getInstrPredicate(MI
, MyPredReg
) == Pred
&&
412 MyPredReg
== PredReg
);
415 static inline bool isMatchingIncrement(MachineInstr
*MI
, unsigned Base
,
416 unsigned Bytes
, unsigned Limit
,
417 ARMCC::CondCodes Pred
, unsigned PredReg
){
418 unsigned MyPredReg
= 0;
421 if (MI
->getOpcode() != ARM::t2ADDri
&&
422 MI
->getOpcode() != ARM::t2ADDrSPi
&&
423 MI
->getOpcode() != ARM::t2ADDrSPi12
&&
424 MI
->getOpcode() != ARM::tADDspi
&&
425 MI
->getOpcode() != ARM::ADDri
)
428 if (Bytes
== 0 || (Limit
&& Bytes
>= Limit
))
429 // Make sure the offset fits in 8 bits.
432 unsigned Scale
= (MI
->getOpcode() == ARM::tADDspi
) ? 4 : 1; // FIXME
433 return (MI
->getOperand(0).getReg() == Base
&&
434 MI
->getOperand(1).getReg() == Base
&&
435 (MI
->getOperand(2).getImm()*Scale
) == Bytes
&&
436 llvm::getInstrPredicate(MI
, MyPredReg
) == Pred
&&
437 MyPredReg
== PredReg
);
440 static inline unsigned getLSMultipleTransferSize(MachineInstr
*MI
) {
441 switch (MI
->getOpcode()) {
461 return (MI
->getNumOperands() - MI
->getDesc().getNumOperands() + 1) * 4;
464 return (MI
->getNumOperands() - MI
->getDesc().getNumOperands() + 1) * 8;
468 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc
) {
470 case ARM::LDM
: return ARM::LDM_UPD
;
471 case ARM::STM
: return ARM::STM_UPD
;
472 case ARM::t2LDM
: return ARM::t2LDM_UPD
;
473 case ARM::t2STM
: return ARM::t2STM_UPD
;
474 case ARM::VLDMS
: return ARM::VLDMS_UPD
;
475 case ARM::VLDMD
: return ARM::VLDMD_UPD
;
476 case ARM::VSTMS
: return ARM::VSTMS_UPD
;
477 case ARM::VSTMD
: return ARM::VSTMD_UPD
;
478 default: llvm_unreachable("Unhandled opcode!");
483 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
484 /// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
486 /// stmia rn, <ra, rb, rc>
487 /// rn := rn + 4 * 3;
489 /// stmia rn!, <ra, rb, rc>
491 /// rn := rn - 4 * 3;
492 /// ldmia rn, <ra, rb, rc>
494 /// ldmdb rn!, <ra, rb, rc>
495 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock
&MBB
,
496 MachineBasicBlock::iterator MBBI
,
498 MachineBasicBlock::iterator
&I
) {
499 MachineInstr
*MI
= MBBI
;
500 unsigned Base
= MI
->getOperand(0).getReg();
501 bool BaseKill
= MI
->getOperand(0).isKill();
502 unsigned Bytes
= getLSMultipleTransferSize(MI
);
503 unsigned PredReg
= 0;
504 ARMCC::CondCodes Pred
= llvm::getInstrPredicate(MI
, PredReg
);
505 int Opcode
= MI
->getOpcode();
506 DebugLoc dl
= MI
->getDebugLoc();
508 bool DoMerge
= false;
509 ARM_AM::AMSubMode Mode
= ARM_AM::ia
;
511 // Can't use an updating ld/st if the base register is also a dest
512 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
513 for (unsigned i
= 3, e
= MI
->getNumOperands(); i
!= e
; ++i
) {
514 if (MI
->getOperand(i
).getReg() == Base
)
517 Mode
= ARM_AM::getAM4SubMode(MI
->getOperand(1).getImm());
519 // Try merging with the previous instruction.
520 MachineBasicBlock::iterator BeginMBBI
= MBB
.begin();
521 if (MBBI
!= BeginMBBI
) {
522 MachineBasicBlock::iterator PrevMBBI
= prior(MBBI
);
523 while (PrevMBBI
!= BeginMBBI
&& PrevMBBI
->isDebugValue())
525 if (Mode
== ARM_AM::ia
&&
526 isMatchingDecrement(PrevMBBI
, Base
, Bytes
, 0, Pred
, PredReg
)) {
529 } else if (Mode
== ARM_AM::ib
&&
530 isMatchingDecrement(PrevMBBI
, Base
, Bytes
, 0, Pred
, PredReg
)) {
538 // Try merging with the next instruction.
539 MachineBasicBlock::iterator EndMBBI
= MBB
.end();
540 if (!DoMerge
&& MBBI
!= EndMBBI
) {
541 MachineBasicBlock::iterator NextMBBI
= llvm::next(MBBI
);
542 while (NextMBBI
!= EndMBBI
&& NextMBBI
->isDebugValue())
544 if ((Mode
== ARM_AM::ia
|| Mode
== ARM_AM::ib
) &&
545 isMatchingIncrement(NextMBBI
, Base
, Bytes
, 0, Pred
, PredReg
)) {
547 } else if ((Mode
== ARM_AM::da
|| Mode
== ARM_AM::db
) &&
548 isMatchingDecrement(NextMBBI
, Base
, Bytes
, 0, Pred
, PredReg
)) {
563 unsigned NewOpc
= getUpdatingLSMultipleOpcode(Opcode
);
564 MachineInstrBuilder MIB
= BuildMI(MBB
, MBBI
, dl
, TII
->get(NewOpc
))
565 .addReg(Base
, getDefRegState(true)) // WB base register
566 .addReg(Base
, getKillRegState(BaseKill
))
567 .addImm(ARM_AM::getAM4ModeImm(Mode
))
568 .addImm(Pred
).addReg(PredReg
);
569 // Transfer the rest of operands.
570 for (unsigned OpNum
= 4, e
= MI
->getNumOperands(); OpNum
!= e
; ++OpNum
)
571 MIB
.addOperand(MI
->getOperand(OpNum
));
572 // Transfer memoperands.
573 (*MIB
).setMemRefs(MI
->memoperands_begin(), MI
->memoperands_end());
579 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc
) {
581 case ARM::LDRi12
: return ARM::LDR_PRE
;
582 case ARM::STRi12
: return ARM::STR_PRE
;
583 case ARM::VLDRS
: return ARM::VLDMS_UPD
;
584 case ARM::VLDRD
: return ARM::VLDMD_UPD
;
585 case ARM::VSTRS
: return ARM::VSTMS_UPD
;
586 case ARM::VSTRD
: return ARM::VSTMD_UPD
;
589 return ARM::t2LDR_PRE
;
592 return ARM::t2STR_PRE
;
593 default: llvm_unreachable("Unhandled opcode!");
598 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc
) {
600 case ARM::LDRi12
: return ARM::LDR_POST
;
601 case ARM::STRi12
: return ARM::STR_POST
;
602 case ARM::VLDRS
: return ARM::VLDMS_UPD
;
603 case ARM::VLDRD
: return ARM::VLDMD_UPD
;
604 case ARM::VSTRS
: return ARM::VSTMS_UPD
;
605 case ARM::VSTRD
: return ARM::VSTMD_UPD
;
608 return ARM::t2LDR_POST
;
611 return ARM::t2STR_POST
;
612 default: llvm_unreachable("Unhandled opcode!");
617 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
618 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
619 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock
&MBB
,
620 MachineBasicBlock::iterator MBBI
,
621 const TargetInstrInfo
*TII
,
623 MachineBasicBlock::iterator
&I
) {
624 MachineInstr
*MI
= MBBI
;
625 unsigned Base
= MI
->getOperand(1).getReg();
626 bool BaseKill
= MI
->getOperand(1).isKill();
627 unsigned Bytes
= getLSMultipleTransferSize(MI
);
628 int Opcode
= MI
->getOpcode();
629 DebugLoc dl
= MI
->getDebugLoc();
630 bool isAM5
= (Opcode
== ARM::VLDRD
|| Opcode
== ARM::VLDRS
||
631 Opcode
== ARM::VSTRD
|| Opcode
== ARM::VSTRS
);
632 bool isAM2
= (Opcode
== ARM::LDRi12
|| Opcode
== ARM::STRi12
);
633 if (isi32Load(Opcode
) || isi32Store(Opcode
))
634 if (MI
->getOperand(2).getImm() != 0)
636 if (isAM5
&& ARM_AM::getAM5Offset(MI
->getOperand(2).getImm()) != 0)
639 bool isLd
= isi32Load(Opcode
) || Opcode
== ARM::VLDRS
|| Opcode
== ARM::VLDRD
;
640 // Can't do the merge if the destination register is the same as the would-be
641 // writeback register.
642 if (isLd
&& MI
->getOperand(0).getReg() == Base
)
645 unsigned PredReg
= 0;
646 ARMCC::CondCodes Pred
= llvm::getInstrPredicate(MI
, PredReg
);
647 bool DoMerge
= false;
648 ARM_AM::AddrOpc AddSub
= ARM_AM::add
;
650 // AM2 - 12 bits, thumb2 - 8 bits.
651 unsigned Limit
= isAM5
? 0 : (isAM2
? 0x1000 : 0x100);
653 // Try merging with the previous instruction.
654 MachineBasicBlock::iterator BeginMBBI
= MBB
.begin();
655 if (MBBI
!= BeginMBBI
) {
656 MachineBasicBlock::iterator PrevMBBI
= prior(MBBI
);
657 while (PrevMBBI
!= BeginMBBI
&& PrevMBBI
->isDebugValue())
659 if (isMatchingDecrement(PrevMBBI
, Base
, Bytes
, Limit
, Pred
, PredReg
)) {
661 AddSub
= ARM_AM::sub
;
663 isMatchingIncrement(PrevMBBI
, Base
, Bytes
, Limit
,Pred
,PredReg
)) {
667 NewOpc
= getPreIndexedLoadStoreOpcode(Opcode
);
672 // Try merging with the next instruction.
673 MachineBasicBlock::iterator EndMBBI
= MBB
.end();
674 if (!DoMerge
&& MBBI
!= EndMBBI
) {
675 MachineBasicBlock::iterator NextMBBI
= llvm::next(MBBI
);
676 while (NextMBBI
!= EndMBBI
&& NextMBBI
->isDebugValue())
679 isMatchingDecrement(NextMBBI
, Base
, Bytes
, Limit
, Pred
, PredReg
)) {
681 AddSub
= ARM_AM::sub
;
682 } else if (isMatchingIncrement(NextMBBI
, Base
, Bytes
, Limit
,Pred
,PredReg
)) {
686 NewOpc
= getPostIndexedLoadStoreOpcode(Opcode
);
700 Offset
= ARM_AM::getAM4ModeImm(AddSub
== ARM_AM::sub
?
701 ARM_AM::db
: ARM_AM::ia
);
703 Offset
= ARM_AM::getAM2Opc(AddSub
, Bytes
, ARM_AM::no_shift
);
705 Offset
= AddSub
== ARM_AM::sub
? -Bytes
: Bytes
;
708 // VLDM[SD}_UPD, VSTM[SD]_UPD
709 // (There are no base-updating versions of VLDR/VSTR instructions, but the
710 // updating load/store-multiple instructions can be used with only one
712 MachineOperand
&MO
= MI
->getOperand(0);
713 BuildMI(MBB
, MBBI
, dl
, TII
->get(NewOpc
))
714 .addReg(Base
, getDefRegState(true)) // WB base register
715 .addReg(Base
, getKillRegState(isLd
? BaseKill
: false))
717 .addImm(Pred
).addReg(PredReg
)
718 .addReg(MO
.getReg(), (isLd
? getDefRegState(true) :
719 getKillRegState(MO
.isKill())));
722 // LDR_PRE, LDR_POST,
723 BuildMI(MBB
, MBBI
, dl
, TII
->get(NewOpc
), MI
->getOperand(0).getReg())
724 .addReg(Base
, RegState::Define
)
725 .addReg(Base
).addReg(0).addImm(Offset
).addImm(Pred
).addReg(PredReg
);
727 // t2LDR_PRE, t2LDR_POST
728 BuildMI(MBB
, MBBI
, dl
, TII
->get(NewOpc
), MI
->getOperand(0).getReg())
729 .addReg(Base
, RegState::Define
)
730 .addReg(Base
).addImm(Offset
).addImm(Pred
).addReg(PredReg
);
732 MachineOperand
&MO
= MI
->getOperand(0);
735 BuildMI(MBB
, MBBI
, dl
, TII
->get(NewOpc
), Base
)
736 .addReg(MO
.getReg(), getKillRegState(MO
.isKill()))
737 .addReg(Base
).addReg(0).addImm(Offset
).addImm(Pred
).addReg(PredReg
);
739 // t2STR_PRE, t2STR_POST
740 BuildMI(MBB
, MBBI
, dl
, TII
->get(NewOpc
), Base
)
741 .addReg(MO
.getReg(), getKillRegState(MO
.isKill()))
742 .addReg(Base
).addImm(Offset
).addImm(Pred
).addReg(PredReg
);
749 /// isMemoryOp - Returns true if instruction is a memory operations (that this
750 /// pass is capable of operating on).
751 static bool isMemoryOp(const MachineInstr
*MI
) {
752 // When no memory operands are present, conservatively assume unaligned,
753 // volatile, unfoldable.
754 if (!MI
->hasOneMemOperand())
757 const MachineMemOperand
*MMO
= *MI
->memoperands_begin();
759 // Don't touch volatile memory accesses - we may be changing their order.
760 if (MMO
->isVolatile())
763 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
765 if (MMO
->getAlignment() < 4)
768 // str <undef> could probably be eliminated entirely, but for now we just want
769 // to avoid making a mess of it.
770 // FIXME: Use str <undef> as a wildcard to enable better stm folding.
771 if (MI
->getNumOperands() > 0 && MI
->getOperand(0).isReg() &&
772 MI
->getOperand(0).isUndef())
775 // Likewise don't mess with references to undefined addresses.
776 if (MI
->getNumOperands() > 1 && MI
->getOperand(1).isReg() &&
777 MI
->getOperand(1).isUndef())
780 int Opcode
= MI
->getOpcode();
785 return MI
->getOperand(1).isReg();
788 return MI
->getOperand(1).isReg();
795 return MI
->getOperand(1).isReg();
800 /// AdvanceRS - Advance register scavenger to just before the earliest memory
801 /// op that is being merged.
802 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock
&MBB
, MemOpQueue
&MemOps
) {
803 MachineBasicBlock::iterator Loc
= MemOps
[0].MBBI
;
804 unsigned Position
= MemOps
[0].Position
;
805 for (unsigned i
= 1, e
= MemOps
.size(); i
!= e
; ++i
) {
806 if (MemOps
[i
].Position
< Position
) {
807 Position
= MemOps
[i
].Position
;
808 Loc
= MemOps
[i
].MBBI
;
812 if (Loc
!= MBB
.begin())
813 RS
->forward(prior(Loc
));
816 static int getMemoryOpOffset(const MachineInstr
*MI
) {
817 int Opcode
= MI
->getOpcode();
818 bool isAM3
= Opcode
== ARM::LDRD
|| Opcode
== ARM::STRD
;
819 unsigned NumOperands
= MI
->getDesc().getNumOperands();
820 unsigned OffField
= MI
->getOperand(NumOperands
-3).getImm();
822 if (Opcode
== ARM::t2LDRi12
|| Opcode
== ARM::t2LDRi8
||
823 Opcode
== ARM::t2STRi12
|| Opcode
== ARM::t2STRi8
||
824 Opcode
== ARM::t2LDRDi8
|| Opcode
== ARM::t2STRDi8
||
825 Opcode
== ARM::LDRi12
|| Opcode
== ARM::STRi12
)
828 int Offset
= isAM3
? ARM_AM::getAM3Offset(OffField
)
829 : ARM_AM::getAM5Offset(OffField
) * 4;
831 if (ARM_AM::getAM3Op(OffField
) == ARM_AM::sub
)
834 if (ARM_AM::getAM5Op(OffField
) == ARM_AM::sub
)
840 static void InsertLDR_STR(MachineBasicBlock
&MBB
,
841 MachineBasicBlock::iterator
&MBBI
,
842 int Offset
, bool isDef
,
843 DebugLoc dl
, unsigned NewOpc
,
844 unsigned Reg
, bool RegDeadKill
, bool RegUndef
,
845 unsigned BaseReg
, bool BaseKill
, bool BaseUndef
,
846 bool OffKill
, bool OffUndef
,
847 ARMCC::CondCodes Pred
, unsigned PredReg
,
848 const TargetInstrInfo
*TII
, bool isT2
) {
850 MachineInstrBuilder MIB
= BuildMI(MBB
, MBBI
, MBBI
->getDebugLoc(),
852 .addReg(Reg
, getDefRegState(true) | getDeadRegState(RegDeadKill
))
853 .addReg(BaseReg
, getKillRegState(BaseKill
)|getUndefRegState(BaseUndef
));
854 MIB
.addImm(Offset
).addImm(Pred
).addReg(PredReg
);
856 MachineInstrBuilder MIB
= BuildMI(MBB
, MBBI
, MBBI
->getDebugLoc(),
858 .addReg(Reg
, getKillRegState(RegDeadKill
) | getUndefRegState(RegUndef
))
859 .addReg(BaseReg
, getKillRegState(BaseKill
)|getUndefRegState(BaseUndef
));
860 MIB
.addImm(Offset
).addImm(Pred
).addReg(PredReg
);
864 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock
&MBB
,
865 MachineBasicBlock::iterator
&MBBI
) {
866 MachineInstr
*MI
= &*MBBI
;
867 unsigned Opcode
= MI
->getOpcode();
868 if (Opcode
== ARM::LDRD
|| Opcode
== ARM::STRD
||
869 Opcode
== ARM::t2LDRDi8
|| Opcode
== ARM::t2STRDi8
) {
870 unsigned EvenReg
= MI
->getOperand(0).getReg();
871 unsigned OddReg
= MI
->getOperand(1).getReg();
872 unsigned EvenRegNum
= TRI
->getDwarfRegNum(EvenReg
, false);
873 unsigned OddRegNum
= TRI
->getDwarfRegNum(OddReg
, false);
874 if ((EvenRegNum
& 1) == 0 && (EvenRegNum
+ 1) == OddRegNum
)
877 MachineBasicBlock::iterator NewBBI
= MBBI
;
878 bool isT2
= Opcode
== ARM::t2LDRDi8
|| Opcode
== ARM::t2STRDi8
;
879 bool isLd
= Opcode
== ARM::LDRD
|| Opcode
== ARM::t2LDRDi8
;
880 bool EvenDeadKill
= isLd
?
881 MI
->getOperand(0).isDead() : MI
->getOperand(0).isKill();
882 bool EvenUndef
= MI
->getOperand(0).isUndef();
883 bool OddDeadKill
= isLd
?
884 MI
->getOperand(1).isDead() : MI
->getOperand(1).isKill();
885 bool OddUndef
= MI
->getOperand(1).isUndef();
886 const MachineOperand
&BaseOp
= MI
->getOperand(2);
887 unsigned BaseReg
= BaseOp
.getReg();
888 bool BaseKill
= BaseOp
.isKill();
889 bool BaseUndef
= BaseOp
.isUndef();
890 bool OffKill
= isT2
? false : MI
->getOperand(3).isKill();
891 bool OffUndef
= isT2
? false : MI
->getOperand(3).isUndef();
892 int OffImm
= getMemoryOpOffset(MI
);
893 unsigned PredReg
= 0;
894 ARMCC::CondCodes Pred
= llvm::getInstrPredicate(MI
, PredReg
);
896 if (OddRegNum
> EvenRegNum
&& OffImm
== 0) {
897 // Ascending register numbers and no offset. It's safe to change it to a
899 unsigned NewOpc
= (isLd
)
900 ? (isT2
? ARM::t2LDM
: ARM::LDM
)
901 : (isT2
? ARM::t2STM
: ARM::STM
);
903 BuildMI(MBB
, MBBI
, MBBI
->getDebugLoc(), TII
->get(NewOpc
))
904 .addReg(BaseReg
, getKillRegState(BaseKill
))
905 .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia
))
906 .addImm(Pred
).addReg(PredReg
)
907 .addReg(EvenReg
, getDefRegState(isLd
) | getDeadRegState(EvenDeadKill
))
908 .addReg(OddReg
, getDefRegState(isLd
) | getDeadRegState(OddDeadKill
));
911 BuildMI(MBB
, MBBI
, MBBI
->getDebugLoc(), TII
->get(NewOpc
))
912 .addReg(BaseReg
, getKillRegState(BaseKill
))
913 .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia
))
914 .addImm(Pred
).addReg(PredReg
)
916 getKillRegState(EvenDeadKill
) | getUndefRegState(EvenUndef
))
918 getKillRegState(OddDeadKill
) | getUndefRegState(OddUndef
));
921 NewBBI
= llvm::prior(MBBI
);
923 // Split into two instructions.
924 unsigned NewOpc
= (isLd
)
925 ? (isT2
? (OffImm
< 0 ? ARM::t2LDRi8
: ARM::t2LDRi12
) : ARM::LDRi12
)
926 : (isT2
? (OffImm
< 0 ? ARM::t2STRi8
: ARM::t2STRi12
) : ARM::STRi12
);
927 DebugLoc dl
= MBBI
->getDebugLoc();
928 // If this is a load and base register is killed, it may have been
929 // re-defed by the load, make sure the first load does not clobber it.
931 (BaseKill
|| OffKill
) &&
932 (TRI
->regsOverlap(EvenReg
, BaseReg
))) {
933 assert(!TRI
->regsOverlap(OddReg
, BaseReg
));
934 InsertLDR_STR(MBB
, MBBI
, OffImm
+4, isLd
, dl
, NewOpc
,
935 OddReg
, OddDeadKill
, false,
936 BaseReg
, false, BaseUndef
, false, OffUndef
,
937 Pred
, PredReg
, TII
, isT2
);
938 NewBBI
= llvm::prior(MBBI
);
939 InsertLDR_STR(MBB
, MBBI
, OffImm
, isLd
, dl
, NewOpc
,
940 EvenReg
, EvenDeadKill
, false,
941 BaseReg
, BaseKill
, BaseUndef
, OffKill
, OffUndef
,
942 Pred
, PredReg
, TII
, isT2
);
944 if (OddReg
== EvenReg
&& EvenDeadKill
) {
945 // If the two source operands are the same, the kill marker is
946 // probably on the first one. e.g.
947 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
948 EvenDeadKill
= false;
951 InsertLDR_STR(MBB
, MBBI
, OffImm
, isLd
, dl
, NewOpc
,
952 EvenReg
, EvenDeadKill
, EvenUndef
,
953 BaseReg
, false, BaseUndef
, false, OffUndef
,
954 Pred
, PredReg
, TII
, isT2
);
955 NewBBI
= llvm::prior(MBBI
);
956 InsertLDR_STR(MBB
, MBBI
, OffImm
+4, isLd
, dl
, NewOpc
,
957 OddReg
, OddDeadKill
, OddUndef
,
958 BaseReg
, BaseKill
, BaseUndef
, OffKill
, OffUndef
,
959 Pred
, PredReg
, TII
, isT2
);
974 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
975 /// ops of the same base and incrementing offset into LDM / STM ops.
976 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock
&MBB
) {
977 unsigned NumMerges
= 0;
978 unsigned NumMemOps
= 0;
980 unsigned CurrBase
= 0;
982 unsigned CurrSize
= 0;
983 ARMCC::CondCodes CurrPred
= ARMCC::AL
;
984 unsigned CurrPredReg
= 0;
985 unsigned Position
= 0;
986 SmallVector
<MachineBasicBlock::iterator
,4> Merges
;
988 RS
->enterBasicBlock(&MBB
);
989 MachineBasicBlock::iterator MBBI
= MBB
.begin(), E
= MBB
.end();
991 if (FixInvalidRegPairOp(MBB
, MBBI
))
994 bool Advance
= false;
995 bool TryMerge
= false;
996 bool Clobber
= false;
998 bool isMemOp
= isMemoryOp(MBBI
);
1000 int Opcode
= MBBI
->getOpcode();
1001 unsigned Size
= getLSMultipleTransferSize(MBBI
);
1002 const MachineOperand
&MO
= MBBI
->getOperand(0);
1003 unsigned Reg
= MO
.getReg();
1004 bool isKill
= MO
.isDef() ? false : MO
.isKill();
1005 unsigned Base
= MBBI
->getOperand(1).getReg();
1006 unsigned PredReg
= 0;
1007 ARMCC::CondCodes Pred
= llvm::getInstrPredicate(MBBI
, PredReg
);
1008 int Offset
= getMemoryOpOffset(MBBI
);
1011 // r5 := ldr [r5, #4]
1012 // r6 := ldr [r5, #8]
1014 // The second ldr has effectively broken the chain even though it
1015 // looks like the later ldr(s) use the same base register. Try to
1016 // merge the ldr's so far, including this one. But don't try to
1017 // combine the following ldr(s).
1018 Clobber
= (isi32Load(Opcode
) && Base
== MBBI
->getOperand(0).getReg());
1019 if (CurrBase
== 0 && !Clobber
) {
1020 // Start of a new chain.
1025 CurrPredReg
= PredReg
;
1026 MemOps
.push_back(MemOpQueueEntry(Offset
, Reg
, isKill
, Position
, MBBI
));
1035 if (CurrOpc
== Opcode
&& CurrBase
== Base
&& CurrPred
== Pred
) {
1036 // No need to match PredReg.
1037 // Continue adding to the queue.
1038 if (Offset
> MemOps
.back().Offset
) {
1039 MemOps
.push_back(MemOpQueueEntry(Offset
, Reg
, isKill
,
1044 for (MemOpQueueIter I
= MemOps
.begin(), E
= MemOps
.end();
1046 if (Offset
< I
->Offset
) {
1047 MemOps
.insert(I
, MemOpQueueEntry(Offset
, Reg
, isKill
,
1052 } else if (Offset
== I
->Offset
) {
1053 // Collision! This can't be merged!
1062 if (MBBI
->isDebugValue()) {
1065 // Reach the end of the block, try merging the memory instructions.
1067 } else if (Advance
) {
1071 // Reach the end of the block, try merging the memory instructions.
1077 if (NumMemOps
> 1) {
1078 // Try to find a free register to use as a new base in case it's needed.
1079 // First advance to the instruction just before the start of the chain.
1080 AdvanceRS(MBB
, MemOps
);
1081 // Find a scratch register.
1082 unsigned Scratch
= RS
->FindUnusedReg(ARM::GPRRegisterClass
);
1083 // Process the load / store instructions.
1084 RS
->forward(prior(MBBI
));
1088 MergeLDR_STR(MBB
, 0, CurrBase
, CurrOpc
, CurrSize
,
1089 CurrPred
, CurrPredReg
, Scratch
, MemOps
, Merges
);
1091 // Try folding preceeding/trailing base inc/dec into the generated
1093 for (unsigned i
= 0, e
= Merges
.size(); i
< e
; ++i
)
1094 if (MergeBaseUpdateLSMultiple(MBB
, Merges
[i
], Advance
, MBBI
))
1096 NumMerges
+= Merges
.size();
1098 // Try folding preceeding/trailing base inc/dec into those load/store
1099 // that were not merged to form LDM/STM ops.
1100 for (unsigned i
= 0; i
!= NumMemOps
; ++i
)
1101 if (!MemOps
[i
].Merged
)
1102 if (MergeBaseUpdateLoadStore(MBB
, MemOps
[i
].MBBI
, TII
,Advance
,MBBI
))
1105 // RS may be pointing to an instruction that's deleted.
1106 RS
->skipTo(prior(MBBI
));
1107 } else if (NumMemOps
== 1) {
1108 // Try folding preceeding/trailing base inc/dec into the single
1110 if (MergeBaseUpdateLoadStore(MBB
, MemOps
[0].MBBI
, TII
, Advance
, MBBI
)) {
1112 RS
->forward(prior(MBBI
));
1119 CurrPred
= ARMCC::AL
;
1126 // If iterator hasn't been advanced and this is not a memory op, skip it.
1127 // It can't start a new chain anyway.
1128 if (!Advance
&& !isMemOp
&& MBBI
!= E
) {
1134 return NumMerges
> 0;
1138 struct OffsetCompare
{
1139 bool operator()(const MachineInstr
*LHS
, const MachineInstr
*RHS
) const {
1140 int LOffset
= getMemoryOpOffset(LHS
);
1141 int ROffset
= getMemoryOpOffset(RHS
);
1142 assert(LHS
== RHS
|| LOffset
!= ROffset
);
1143 return LOffset
> ROffset
;
1148 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops
1149 /// ("bx lr" and "mov pc, lr") into the preceeding stack restore so it
1150 /// directly restore the value of LR into pc.
1151 /// ldmfd sp!, {..., lr}
1154 /// ldmfd sp!, {..., lr}
1157 /// ldmfd sp!, {..., pc}
1158 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock
&MBB
) {
1159 if (MBB
.empty()) return false;
1161 MachineBasicBlock::iterator MBBI
= prior(MBB
.end());
1162 if (MBBI
!= MBB
.begin() &&
1163 (MBBI
->getOpcode() == ARM::BX_RET
||
1164 MBBI
->getOpcode() == ARM::tBX_RET
||
1165 MBBI
->getOpcode() == ARM::MOVPCLR
)) {
1166 MachineInstr
*PrevMI
= prior(MBBI
);
1167 if (PrevMI
->getOpcode() == ARM::LDM_UPD
||
1168 PrevMI
->getOpcode() == ARM::t2LDM_UPD
) {
1169 MachineOperand
&MO
= PrevMI
->getOperand(PrevMI
->getNumOperands()-1);
1170 if (MO
.getReg() != ARM::LR
)
1172 unsigned NewOpc
= isThumb2
? ARM::t2LDM_RET
: ARM::LDM_RET
;
1173 PrevMI
->setDesc(TII
->get(NewOpc
));
1175 PrevMI
->copyImplicitOps(&*MBBI
);
1183 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction
&Fn
) {
1184 const TargetMachine
&TM
= Fn
.getTarget();
1185 AFI
= Fn
.getInfo
<ARMFunctionInfo
>();
1186 TII
= TM
.getInstrInfo();
1187 TRI
= TM
.getRegisterInfo();
1188 RS
= new RegScavenger();
1189 isThumb2
= AFI
->isThumb2Function();
1191 bool Modified
= false;
1192 for (MachineFunction::iterator MFI
= Fn
.begin(), E
= Fn
.end(); MFI
!= E
;
1194 MachineBasicBlock
&MBB
= *MFI
;
1195 Modified
|= LoadStoreMultipleOpti(MBB
);
1196 Modified
|= MergeReturnIntoLDM(MBB
);
1204 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
1205 /// load / stores from consecutive locations close to make it more
1206 /// likely they will be combined later.
1209 struct ARMPreAllocLoadStoreOpt
: public MachineFunctionPass
{
1211 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID
) {}
1213 const TargetData
*TD
;
1214 const TargetInstrInfo
*TII
;
1215 const TargetRegisterInfo
*TRI
;
1216 const ARMSubtarget
*STI
;
1217 MachineRegisterInfo
*MRI
;
1218 MachineFunction
*MF
;
1220 virtual bool runOnMachineFunction(MachineFunction
&Fn
);
1222 virtual const char *getPassName() const {
1223 return "ARM pre- register allocation load / store optimization pass";
1227 bool CanFormLdStDWord(MachineInstr
*Op0
, MachineInstr
*Op1
, DebugLoc
&dl
,
1228 unsigned &NewOpc
, unsigned &EvenReg
,
1229 unsigned &OddReg
, unsigned &BaseReg
,
1231 unsigned &PredReg
, ARMCC::CondCodes
&Pred
,
1233 bool RescheduleOps(MachineBasicBlock
*MBB
,
1234 SmallVector
<MachineInstr
*, 4> &Ops
,
1235 unsigned Base
, bool isLd
,
1236 DenseMap
<MachineInstr
*, unsigned> &MI2LocMap
);
1237 bool RescheduleLoadStoreInstrs(MachineBasicBlock
*MBB
);
1239 char ARMPreAllocLoadStoreOpt::ID
= 0;
1242 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction
&Fn
) {
1243 TD
= Fn
.getTarget().getTargetData();
1244 TII
= Fn
.getTarget().getInstrInfo();
1245 TRI
= Fn
.getTarget().getRegisterInfo();
1246 STI
= &Fn
.getTarget().getSubtarget
<ARMSubtarget
>();
1247 MRI
= &Fn
.getRegInfo();
1250 bool Modified
= false;
1251 for (MachineFunction::iterator MFI
= Fn
.begin(), E
= Fn
.end(); MFI
!= E
;
1253 Modified
|= RescheduleLoadStoreInstrs(MFI
);
1258 static bool IsSafeAndProfitableToMove(bool isLd
, unsigned Base
,
1259 MachineBasicBlock::iterator I
,
1260 MachineBasicBlock::iterator E
,
1261 SmallPtrSet
<MachineInstr
*, 4> &MemOps
,
1262 SmallSet
<unsigned, 4> &MemRegs
,
1263 const TargetRegisterInfo
*TRI
) {
1264 // Are there stores / loads / calls between them?
1265 // FIXME: This is overly conservative. We should make use of alias information
1267 SmallSet
<unsigned, 4> AddedRegPressure
;
1269 if (I
->isDebugValue() || MemOps
.count(&*I
))
1271 const TargetInstrDesc
&TID
= I
->getDesc();
1272 if (TID
.isCall() || TID
.isTerminator() || TID
.hasUnmodeledSideEffects())
1274 if (isLd
&& TID
.mayStore())
1279 // It's not safe to move the first 'str' down.
1282 // str r4, [r0, #+4]
1286 for (unsigned j
= 0, NumOps
= I
->getNumOperands(); j
!= NumOps
; ++j
) {
1287 MachineOperand
&MO
= I
->getOperand(j
);
1290 unsigned Reg
= MO
.getReg();
1291 if (MO
.isDef() && TRI
->regsOverlap(Reg
, Base
))
1293 if (Reg
!= Base
&& !MemRegs
.count(Reg
))
1294 AddedRegPressure
.insert(Reg
);
1298 // Estimate register pressure increase due to the transformation.
1299 if (MemRegs
.size() <= 4)
1300 // Ok if we are moving small number of instructions.
1302 return AddedRegPressure
.size() <= MemRegs
.size() * 2;
1306 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr
*Op0
, MachineInstr
*Op1
,
1308 unsigned &NewOpc
, unsigned &EvenReg
,
1309 unsigned &OddReg
, unsigned &BaseReg
,
1310 int &Offset
, unsigned &PredReg
,
1311 ARMCC::CondCodes
&Pred
,
1313 // Make sure we're allowed to generate LDRD/STRD.
1314 if (!STI
->hasV5TEOps())
1317 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
1319 unsigned Opcode
= Op0
->getOpcode();
1320 if (Opcode
== ARM::LDRi12
)
1322 else if (Opcode
== ARM::STRi12
)
1324 else if (Opcode
== ARM::t2LDRi8
|| Opcode
== ARM::t2LDRi12
) {
1325 NewOpc
= ARM::t2LDRDi8
;
1328 } else if (Opcode
== ARM::t2STRi8
|| Opcode
== ARM::t2STRi12
) {
1329 NewOpc
= ARM::t2STRDi8
;
1335 // Make sure the base address satisfies i64 ld / st alignment requirement.
1336 if (!Op0
->hasOneMemOperand() ||
1337 !(*Op0
->memoperands_begin())->getValue() ||
1338 (*Op0
->memoperands_begin())->isVolatile())
1341 unsigned Align
= (*Op0
->memoperands_begin())->getAlignment();
1342 const Function
*Func
= MF
->getFunction();
1343 unsigned ReqAlign
= STI
->hasV6Ops()
1344 ? TD
->getABITypeAlignment(Type::getInt64Ty(Func
->getContext()))
1345 : 8; // Pre-v6 need 8-byte align
1346 if (Align
< ReqAlign
)
1349 // Then make sure the immediate offset fits.
1350 int OffImm
= getMemoryOpOffset(Op0
);
1354 // Can't fall back to t2LDRi8 / t2STRi8.
1357 int Limit
= (1 << 8) * Scale
;
1358 if (OffImm
>= Limit
|| (OffImm
& (Scale
-1)))
1363 ARM_AM::AddrOpc AddSub
= ARM_AM::add
;
1365 AddSub
= ARM_AM::sub
;
1368 int Limit
= (1 << 8) * Scale
;
1369 if (OffImm
>= Limit
|| (OffImm
& (Scale
-1)))
1371 Offset
= ARM_AM::getAM3Opc(AddSub
, OffImm
);
1373 EvenReg
= Op0
->getOperand(0).getReg();
1374 OddReg
= Op1
->getOperand(0).getReg();
1375 if (EvenReg
== OddReg
)
1377 BaseReg
= Op0
->getOperand(1).getReg();
1378 Pred
= llvm::getInstrPredicate(Op0
, PredReg
);
1379 dl
= Op0
->getDebugLoc();
1383 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock
*MBB
,
1384 SmallVector
<MachineInstr
*, 4> &Ops
,
1385 unsigned Base
, bool isLd
,
1386 DenseMap
<MachineInstr
*, unsigned> &MI2LocMap
) {
1387 bool RetVal
= false;
1389 // Sort by offset (in reverse order).
1390 std::sort(Ops
.begin(), Ops
.end(), OffsetCompare());
1392 // The loads / stores of the same base are in order. Scan them from first to
1393 // last and check for the following:
1394 // 1. Any def of base.
1396 while (Ops
.size() > 1) {
1397 unsigned FirstLoc
= ~0U;
1398 unsigned LastLoc
= 0;
1399 MachineInstr
*FirstOp
= 0;
1400 MachineInstr
*LastOp
= 0;
1402 unsigned LastOpcode
= 0;
1403 unsigned LastBytes
= 0;
1404 unsigned NumMove
= 0;
1405 for (int i
= Ops
.size() - 1; i
>= 0; --i
) {
1406 MachineInstr
*Op
= Ops
[i
];
1407 unsigned Loc
= MI2LocMap
[Op
];
1408 if (Loc
<= FirstLoc
) {
1412 if (Loc
>= LastLoc
) {
1417 unsigned Opcode
= Op
->getOpcode();
1418 if (LastOpcode
&& Opcode
!= LastOpcode
)
1421 int Offset
= getMemoryOpOffset(Op
);
1422 unsigned Bytes
= getLSMultipleTransferSize(Op
);
1424 if (Bytes
!= LastBytes
|| Offset
!= (LastOffset
+ (int)Bytes
))
1427 LastOffset
= Offset
;
1429 LastOpcode
= Opcode
;
1430 if (++NumMove
== 8) // FIXME: Tune this limit.
1437 SmallPtrSet
<MachineInstr
*, 4> MemOps
;
1438 SmallSet
<unsigned, 4> MemRegs
;
1439 for (int i
= NumMove
-1; i
>= 0; --i
) {
1440 MemOps
.insert(Ops
[i
]);
1441 MemRegs
.insert(Ops
[i
]->getOperand(0).getReg());
1444 // Be conservative, if the instructions are too far apart, don't
1445 // move them. We want to limit the increase of register pressure.
1446 bool DoMove
= (LastLoc
- FirstLoc
) <= NumMove
*4; // FIXME: Tune this.
1448 DoMove
= IsSafeAndProfitableToMove(isLd
, Base
, FirstOp
, LastOp
,
1449 MemOps
, MemRegs
, TRI
);
1451 for (unsigned i
= 0; i
!= NumMove
; ++i
)
1454 // This is the new location for the loads / stores.
1455 MachineBasicBlock::iterator InsertPos
= isLd
? FirstOp
: LastOp
;
1456 while (InsertPos
!= MBB
->end()
1457 && (MemOps
.count(InsertPos
) || InsertPos
->isDebugValue()))
1460 // If we are moving a pair of loads / stores, see if it makes sense
1461 // to try to allocate a pair of registers that can form register pairs.
1462 MachineInstr
*Op0
= Ops
.back();
1463 MachineInstr
*Op1
= Ops
[Ops
.size()-2];
1464 unsigned EvenReg
= 0, OddReg
= 0;
1465 unsigned BaseReg
= 0, PredReg
= 0;
1466 ARMCC::CondCodes Pred
= ARMCC::AL
;
1468 unsigned NewOpc
= 0;
1471 if (NumMove
== 2 && CanFormLdStDWord(Op0
, Op1
, dl
, NewOpc
,
1472 EvenReg
, OddReg
, BaseReg
,
1473 Offset
, PredReg
, Pred
, isT2
)) {
1477 // Form the pair instruction.
1479 MachineInstrBuilder MIB
= BuildMI(*MBB
, InsertPos
,
1480 dl
, TII
->get(NewOpc
))
1481 .addReg(EvenReg
, RegState::Define
)
1482 .addReg(OddReg
, RegState::Define
)
1484 // FIXME: We're converting from LDRi12 to an insn that still
1485 // uses addrmode2, so we need an explicit offset reg. It should
1486 // always by reg0 since we're transforming LDRi12s.
1489 MIB
.addImm(Offset
).addImm(Pred
).addReg(PredReg
);
1492 MachineInstrBuilder MIB
= BuildMI(*MBB
, InsertPos
,
1493 dl
, TII
->get(NewOpc
))
1497 // FIXME: We're converting from LDRi12 to an insn that still
1498 // uses addrmode2, so we need an explicit offset reg. It should
1499 // always by reg0 since we're transforming STRi12s.
1502 MIB
.addImm(Offset
).addImm(Pred
).addReg(PredReg
);
1508 // Add register allocation hints to form register pairs.
1509 MRI
->setRegAllocationHint(EvenReg
, ARMRI::RegPairEven
, OddReg
);
1510 MRI
->setRegAllocationHint(OddReg
, ARMRI::RegPairOdd
, EvenReg
);
1512 for (unsigned i
= 0; i
!= NumMove
; ++i
) {
1513 MachineInstr
*Op
= Ops
.back();
1515 MBB
->splice(InsertPos
, MBB
, Op
);
1519 NumLdStMoved
+= NumMove
;
1529 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock
*MBB
) {
1530 bool RetVal
= false;
1532 DenseMap
<MachineInstr
*, unsigned> MI2LocMap
;
1533 DenseMap
<unsigned, SmallVector
<MachineInstr
*, 4> > Base2LdsMap
;
1534 DenseMap
<unsigned, SmallVector
<MachineInstr
*, 4> > Base2StsMap
;
1535 SmallVector
<unsigned, 4> LdBases
;
1536 SmallVector
<unsigned, 4> StBases
;
1539 MachineBasicBlock::iterator MBBI
= MBB
->begin();
1540 MachineBasicBlock::iterator E
= MBB
->end();
1542 for (; MBBI
!= E
; ++MBBI
) {
1543 MachineInstr
*MI
= MBBI
;
1544 const TargetInstrDesc
&TID
= MI
->getDesc();
1545 if (TID
.isCall() || TID
.isTerminator()) {
1546 // Stop at barriers.
1551 if (!MI
->isDebugValue())
1552 MI2LocMap
[MI
] = ++Loc
;
1554 if (!isMemoryOp(MI
))
1556 unsigned PredReg
= 0;
1557 if (llvm::getInstrPredicate(MI
, PredReg
) != ARMCC::AL
)
1560 int Opc
= MI
->getOpcode();
1561 bool isLd
= isi32Load(Opc
) || Opc
== ARM::VLDRS
|| Opc
== ARM::VLDRD
;
1562 unsigned Base
= MI
->getOperand(1).getReg();
1563 int Offset
= getMemoryOpOffset(MI
);
1565 bool StopHere
= false;
1567 DenseMap
<unsigned, SmallVector
<MachineInstr
*, 4> >::iterator BI
=
1568 Base2LdsMap
.find(Base
);
1569 if (BI
!= Base2LdsMap
.end()) {
1570 for (unsigned i
= 0, e
= BI
->second
.size(); i
!= e
; ++i
) {
1571 if (Offset
== getMemoryOpOffset(BI
->second
[i
])) {
1577 BI
->second
.push_back(MI
);
1579 SmallVector
<MachineInstr
*, 4> MIs
;
1581 Base2LdsMap
[Base
] = MIs
;
1582 LdBases
.push_back(Base
);
1585 DenseMap
<unsigned, SmallVector
<MachineInstr
*, 4> >::iterator BI
=
1586 Base2StsMap
.find(Base
);
1587 if (BI
!= Base2StsMap
.end()) {
1588 for (unsigned i
= 0, e
= BI
->second
.size(); i
!= e
; ++i
) {
1589 if (Offset
== getMemoryOpOffset(BI
->second
[i
])) {
1595 BI
->second
.push_back(MI
);
1597 SmallVector
<MachineInstr
*, 4> MIs
;
1599 Base2StsMap
[Base
] = MIs
;
1600 StBases
.push_back(Base
);
1605 // Found a duplicate (a base+offset combination that's seen earlier).
1612 // Re-schedule loads.
1613 for (unsigned i
= 0, e
= LdBases
.size(); i
!= e
; ++i
) {
1614 unsigned Base
= LdBases
[i
];
1615 SmallVector
<MachineInstr
*, 4> &Lds
= Base2LdsMap
[Base
];
1617 RetVal
|= RescheduleOps(MBB
, Lds
, Base
, true, MI2LocMap
);
1620 // Re-schedule stores.
1621 for (unsigned i
= 0, e
= StBases
.size(); i
!= e
; ++i
) {
1622 unsigned Base
= StBases
[i
];
1623 SmallVector
<MachineInstr
*, 4> &Sts
= Base2StsMap
[Base
];
1625 RetVal
|= RescheduleOps(MBB
, Sts
, Base
, false, MI2LocMap
);
1629 Base2LdsMap
.clear();
1630 Base2StsMap
.clear();
1640 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store
1641 /// optimization pass.
1642 FunctionPass
*llvm::createARMLoadStoreOptimizationPass(bool PreAlloc
) {
1644 return new ARMPreAllocLoadStoreOpt();
1645 return new ARMLoadStoreOpt();