Fixed some bugs.
[llvm/zpu.git] / lib / Target / ARM / ARMLoadStoreOptimizer.cpp
blobb1367883338d5f8381ec41157329b6082f8fa7c8
1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains a pass that performs load / store related peephole
11 // optimizations. This pass should be run after register allocation.
13 //===----------------------------------------------------------------------===//
15 #define DEBUG_TYPE "arm-ldst-opt"
16 #include "ARM.h"
17 #include "ARMAddressingModes.h"
18 #include "ARMBaseInstrInfo.h"
19 #include "ARMMachineFunctionInfo.h"
20 #include "ARMRegisterInfo.h"
21 #include "llvm/DerivedTypes.h"
22 #include "llvm/Function.h"
23 #include "llvm/CodeGen/MachineBasicBlock.h"
24 #include "llvm/CodeGen/MachineFunctionPass.h"
25 #include "llvm/CodeGen/MachineInstr.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineRegisterInfo.h"
28 #include "llvm/CodeGen/RegisterScavenging.h"
29 #include "llvm/Target/TargetData.h"
30 #include "llvm/Target/TargetInstrInfo.h"
31 #include "llvm/Target/TargetMachine.h"
32 #include "llvm/Target/TargetRegisterInfo.h"
33 #include "llvm/Support/ErrorHandling.h"
34 #include "llvm/ADT/DenseMap.h"
35 #include "llvm/ADT/STLExtras.h"
36 #include "llvm/ADT/SmallPtrSet.h"
37 #include "llvm/ADT/SmallSet.h"
38 #include "llvm/ADT/SmallVector.h"
39 #include "llvm/ADT/Statistic.h"
40 using namespace llvm;
42 STATISTIC(NumLDMGened , "Number of ldm instructions generated");
43 STATISTIC(NumSTMGened , "Number of stm instructions generated");
44 STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
45 STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
46 STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
47 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
48 STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
49 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
50 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
51 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
52 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
54 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
55 /// load / store instructions to form ldm / stm instructions.
57 namespace {
58 struct ARMLoadStoreOpt : public MachineFunctionPass {
59 static char ID;
60 ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
62 const TargetInstrInfo *TII;
63 const TargetRegisterInfo *TRI;
64 ARMFunctionInfo *AFI;
65 RegScavenger *RS;
66 bool isThumb2;
68 virtual bool runOnMachineFunction(MachineFunction &Fn);
70 virtual const char *getPassName() const {
71 return "ARM load / store optimization pass";
74 private:
75 struct MemOpQueueEntry {
76 int Offset;
77 unsigned Reg;
78 bool isKill;
79 unsigned Position;
80 MachineBasicBlock::iterator MBBI;
81 bool Merged;
82 MemOpQueueEntry(int o, unsigned r, bool k, unsigned p,
83 MachineBasicBlock::iterator i)
84 : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {}
86 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
87 typedef MemOpQueue::iterator MemOpQueueIter;
89 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
90 int Offset, unsigned Base, bool BaseKill, int Opcode,
91 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
92 DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs);
93 void MergeOpsUpdate(MachineBasicBlock &MBB,
94 MemOpQueue &MemOps,
95 unsigned memOpsBegin,
96 unsigned memOpsEnd,
97 unsigned insertAfter,
98 int Offset,
99 unsigned Base,
100 bool BaseKill,
101 int Opcode,
102 ARMCC::CondCodes Pred,
103 unsigned PredReg,
104 unsigned Scratch,
105 DebugLoc dl,
106 SmallVector<MachineBasicBlock::iterator, 4> &Merges);
107 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
108 int Opcode, unsigned Size,
109 ARMCC::CondCodes Pred, unsigned PredReg,
110 unsigned Scratch, MemOpQueue &MemOps,
111 SmallVector<MachineBasicBlock::iterator, 4> &Merges);
113 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
114 bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
115 MachineBasicBlock::iterator &MBBI);
116 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
117 MachineBasicBlock::iterator MBBI,
118 const TargetInstrInfo *TII,
119 bool &Advance,
120 MachineBasicBlock::iterator &I);
121 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
122 MachineBasicBlock::iterator MBBI,
123 bool &Advance,
124 MachineBasicBlock::iterator &I);
125 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
126 bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
128 char ARMLoadStoreOpt::ID = 0;
131 static int getLoadStoreMultipleOpcode(int Opcode) {
132 switch (Opcode) {
133 case ARM::LDRi12:
134 ++NumLDMGened;
135 return ARM::LDM;
136 case ARM::STRi12:
137 ++NumSTMGened;
138 return ARM::STM;
139 case ARM::t2LDRi8:
140 case ARM::t2LDRi12:
141 ++NumLDMGened;
142 return ARM::t2LDM;
143 case ARM::t2STRi8:
144 case ARM::t2STRi12:
145 ++NumSTMGened;
146 return ARM::t2STM;
147 case ARM::VLDRS:
148 ++NumVLDMGened;
149 return ARM::VLDMS;
150 case ARM::VSTRS:
151 ++NumVSTMGened;
152 return ARM::VSTMS;
153 case ARM::VLDRD:
154 ++NumVLDMGened;
155 return ARM::VLDMD;
156 case ARM::VSTRD:
157 ++NumVSTMGened;
158 return ARM::VSTMD;
159 default: llvm_unreachable("Unhandled opcode!");
161 return 0;
164 static bool isT2i32Load(unsigned Opc) {
165 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
168 static bool isi32Load(unsigned Opc) {
169 return Opc == ARM::LDRi12 || isT2i32Load(Opc);
172 static bool isT2i32Store(unsigned Opc) {
173 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
176 static bool isi32Store(unsigned Opc) {
177 return Opc == ARM::STRi12 || isT2i32Store(Opc);
180 /// MergeOps - Create and insert a LDM or STM with Base as base register and
181 /// registers in Regs as the register operands that would be loaded / stored.
182 /// It returns true if the transformation is done.
183 bool
184 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
185 MachineBasicBlock::iterator MBBI,
186 int Offset, unsigned Base, bool BaseKill,
187 int Opcode, ARMCC::CondCodes Pred,
188 unsigned PredReg, unsigned Scratch, DebugLoc dl,
189 SmallVector<std::pair<unsigned, bool>, 8> &Regs) {
190 // Only a single register to load / store. Don't bother.
191 unsigned NumRegs = Regs.size();
192 if (NumRegs <= 1)
193 return false;
195 ARM_AM::AMSubMode Mode = ARM_AM::ia;
196 // VFP and Thumb2 do not support IB or DA modes.
197 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
198 bool haveIBAndDA = isNotVFP && !isThumb2;
199 if (Offset == 4 && haveIBAndDA)
200 Mode = ARM_AM::ib;
201 else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA)
202 Mode = ARM_AM::da;
203 else if (Offset == -4 * (int)NumRegs && isNotVFP)
204 // VLDM/VSTM do not support DB mode without also updating the base reg.
205 Mode = ARM_AM::db;
206 else if (Offset != 0) {
207 // If starting offset isn't zero, insert a MI to materialize a new base.
208 // But only do so if it is cost effective, i.e. merging more than two
209 // loads / stores.
210 if (NumRegs <= 2)
211 return false;
213 unsigned NewBase;
214 if (isi32Load(Opcode))
215 // If it is a load, then just use one of the destination register to
216 // use as the new base.
217 NewBase = Regs[NumRegs-1].first;
218 else {
219 // Use the scratch register to use as a new base.
220 NewBase = Scratch;
221 if (NewBase == 0)
222 return false;
224 int BaseOpc = !isThumb2
225 ? ARM::ADDri
226 : ((Base == ARM::SP) ? ARM::t2ADDrSPi : ARM::t2ADDri);
227 if (Offset < 0) {
228 BaseOpc = !isThumb2
229 ? ARM::SUBri
230 : ((Base == ARM::SP) ? ARM::t2SUBrSPi : ARM::t2SUBri);
231 Offset = - Offset;
233 int ImmedOffset = isThumb2
234 ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset);
235 if (ImmedOffset == -1)
236 // FIXME: Try t2ADDri12 or t2SUBri12?
237 return false; // Probably not worth it then.
239 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
240 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
241 .addImm(Pred).addReg(PredReg).addReg(0);
242 Base = NewBase;
243 BaseKill = true; // New base is always killed right its use.
246 bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
247 Opcode == ARM::VLDRD);
248 Opcode = getLoadStoreMultipleOpcode(Opcode);
249 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode))
250 .addReg(Base, getKillRegState(BaseKill))
251 .addImm(ARM_AM::getAM4ModeImm(Mode)).addImm(Pred).addReg(PredReg);
252 for (unsigned i = 0; i != NumRegs; ++i)
253 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
254 | getKillRegState(Regs[i].second));
256 return true;
259 // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
260 // success.
261 void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
262 MemOpQueue &memOps,
263 unsigned memOpsBegin, unsigned memOpsEnd,
264 unsigned insertAfter, int Offset,
265 unsigned Base, bool BaseKill,
266 int Opcode,
267 ARMCC::CondCodes Pred, unsigned PredReg,
268 unsigned Scratch,
269 DebugLoc dl,
270 SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
271 // First calculate which of the registers should be killed by the merged
272 // instruction.
273 const unsigned insertPos = memOps[insertAfter].Position;
275 SmallSet<unsigned, 4> UnavailRegs;
276 SmallSet<unsigned, 4> KilledRegs;
277 DenseMap<unsigned, unsigned> Killer;
278 for (unsigned i = 0; i < memOpsBegin; ++i) {
279 if (memOps[i].Position < insertPos && memOps[i].isKill) {
280 unsigned Reg = memOps[i].Reg;
281 if (memOps[i].Merged)
282 UnavailRegs.insert(Reg);
283 else {
284 KilledRegs.insert(Reg);
285 Killer[Reg] = i;
289 for (unsigned i = memOpsEnd, e = memOps.size(); i != e; ++i) {
290 if (memOps[i].Position < insertPos && memOps[i].isKill) {
291 unsigned Reg = memOps[i].Reg;
292 KilledRegs.insert(Reg);
293 Killer[Reg] = i;
297 SmallVector<std::pair<unsigned, bool>, 8> Regs;
298 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
299 unsigned Reg = memOps[i].Reg;
300 if (UnavailRegs.count(Reg))
301 // Register is killed before and it's not easy / possible to update the
302 // kill marker on already merged instructions. Abort.
303 return;
305 // If we are inserting the merged operation after an unmerged operation that
306 // uses the same register, make sure to transfer any kill flag.
307 bool isKill = memOps[i].isKill || KilledRegs.count(Reg);
308 Regs.push_back(std::make_pair(Reg, isKill));
311 // Try to do the merge.
312 MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
313 ++Loc;
314 if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
315 Pred, PredReg, Scratch, dl, Regs))
316 return;
318 // Merge succeeded, update records.
319 Merges.push_back(prior(Loc));
320 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
321 // Remove kill flags from any unmerged memops that come before insertPos.
322 if (Regs[i-memOpsBegin].second) {
323 unsigned Reg = Regs[i-memOpsBegin].first;
324 if (KilledRegs.count(Reg)) {
325 unsigned j = Killer[Reg];
326 memOps[j].MBBI->getOperand(0).setIsKill(false);
327 memOps[j].isKill = false;
330 MBB.erase(memOps[i].MBBI);
331 memOps[i].Merged = true;
335 /// MergeLDR_STR - Merge a number of load / store instructions into one or more
336 /// load / store multiple instructions.
337 void
338 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
339 unsigned Base, int Opcode, unsigned Size,
340 ARMCC::CondCodes Pred, unsigned PredReg,
341 unsigned Scratch, MemOpQueue &MemOps,
342 SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
343 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
344 int Offset = MemOps[SIndex].Offset;
345 int SOffset = Offset;
346 unsigned insertAfter = SIndex;
347 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
348 DebugLoc dl = Loc->getDebugLoc();
349 const MachineOperand &PMO = Loc->getOperand(0);
350 unsigned PReg = PMO.getReg();
351 unsigned PRegNum = PMO.isUndef() ? UINT_MAX
352 : getARMRegisterNumbering(PReg);
353 unsigned Count = 1;
355 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
356 int NewOffset = MemOps[i].Offset;
357 const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
358 unsigned Reg = MO.getReg();
359 unsigned RegNum = MO.isUndef() ? UINT_MAX
360 : getARMRegisterNumbering(Reg);
361 // Register numbers must be in ascending order. For VFP, the registers
362 // must also be consecutive and there is a limit of 16 double-word
363 // registers per instruction.
364 if (Reg != ARM::SP &&
365 NewOffset == Offset + (int)Size &&
366 ((isNotVFP && RegNum > PRegNum)
367 || ((Size < 8 || Count < 16) && RegNum == PRegNum+1))) {
368 Offset += Size;
369 PRegNum = RegNum;
370 ++Count;
371 } else {
372 // Can't merge this in. Try merge the earlier ones first.
373 MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset,
374 Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges);
375 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
376 MemOps, Merges);
377 return;
380 if (MemOps[i].Position > MemOps[insertAfter].Position)
381 insertAfter = i;
384 bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1;
385 MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
386 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
387 return;
390 static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
391 unsigned Bytes, unsigned Limit,
392 ARMCC::CondCodes Pred, unsigned PredReg){
393 unsigned MyPredReg = 0;
394 if (!MI)
395 return false;
396 if (MI->getOpcode() != ARM::t2SUBri &&
397 MI->getOpcode() != ARM::t2SUBrSPi &&
398 MI->getOpcode() != ARM::t2SUBrSPi12 &&
399 MI->getOpcode() != ARM::tSUBspi &&
400 MI->getOpcode() != ARM::SUBri)
401 return false;
403 // Make sure the offset fits in 8 bits.
404 if (Bytes == 0 || (Limit && Bytes >= Limit))
405 return false;
407 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME
408 return (MI->getOperand(0).getReg() == Base &&
409 MI->getOperand(1).getReg() == Base &&
410 (MI->getOperand(2).getImm()*Scale) == Bytes &&
411 llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
412 MyPredReg == PredReg);
415 static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
416 unsigned Bytes, unsigned Limit,
417 ARMCC::CondCodes Pred, unsigned PredReg){
418 unsigned MyPredReg = 0;
419 if (!MI)
420 return false;
421 if (MI->getOpcode() != ARM::t2ADDri &&
422 MI->getOpcode() != ARM::t2ADDrSPi &&
423 MI->getOpcode() != ARM::t2ADDrSPi12 &&
424 MI->getOpcode() != ARM::tADDspi &&
425 MI->getOpcode() != ARM::ADDri)
426 return false;
428 if (Bytes == 0 || (Limit && Bytes >= Limit))
429 // Make sure the offset fits in 8 bits.
430 return false;
432 unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME
433 return (MI->getOperand(0).getReg() == Base &&
434 MI->getOperand(1).getReg() == Base &&
435 (MI->getOperand(2).getImm()*Scale) == Bytes &&
436 llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
437 MyPredReg == PredReg);
440 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
441 switch (MI->getOpcode()) {
442 default: return 0;
443 case ARM::LDRi12:
444 case ARM::STRi12:
445 case ARM::t2LDRi8:
446 case ARM::t2LDRi12:
447 case ARM::t2STRi8:
448 case ARM::t2STRi12:
449 case ARM::VLDRS:
450 case ARM::VSTRS:
451 return 4;
452 case ARM::VLDRD:
453 case ARM::VSTRD:
454 return 8;
455 case ARM::LDM:
456 case ARM::STM:
457 case ARM::t2LDM:
458 case ARM::t2STM:
459 case ARM::VLDMS:
460 case ARM::VSTMS:
461 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
462 case ARM::VLDMD:
463 case ARM::VSTMD:
464 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
468 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc) {
469 switch (Opc) {
470 case ARM::LDM: return ARM::LDM_UPD;
471 case ARM::STM: return ARM::STM_UPD;
472 case ARM::t2LDM: return ARM::t2LDM_UPD;
473 case ARM::t2STM: return ARM::t2STM_UPD;
474 case ARM::VLDMS: return ARM::VLDMS_UPD;
475 case ARM::VLDMD: return ARM::VLDMD_UPD;
476 case ARM::VSTMS: return ARM::VSTMS_UPD;
477 case ARM::VSTMD: return ARM::VSTMD_UPD;
478 default: llvm_unreachable("Unhandled opcode!");
480 return 0;
483 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
484 /// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
486 /// stmia rn, <ra, rb, rc>
487 /// rn := rn + 4 * 3;
488 /// =>
489 /// stmia rn!, <ra, rb, rc>
491 /// rn := rn - 4 * 3;
492 /// ldmia rn, <ra, rb, rc>
493 /// =>
494 /// ldmdb rn!, <ra, rb, rc>
495 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
496 MachineBasicBlock::iterator MBBI,
497 bool &Advance,
498 MachineBasicBlock::iterator &I) {
499 MachineInstr *MI = MBBI;
500 unsigned Base = MI->getOperand(0).getReg();
501 bool BaseKill = MI->getOperand(0).isKill();
502 unsigned Bytes = getLSMultipleTransferSize(MI);
503 unsigned PredReg = 0;
504 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
505 int Opcode = MI->getOpcode();
506 DebugLoc dl = MI->getDebugLoc();
508 bool DoMerge = false;
509 ARM_AM::AMSubMode Mode = ARM_AM::ia;
511 // Can't use an updating ld/st if the base register is also a dest
512 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
513 for (unsigned i = 3, e = MI->getNumOperands(); i != e; ++i) {
514 if (MI->getOperand(i).getReg() == Base)
515 return false;
517 Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm());
519 // Try merging with the previous instruction.
520 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
521 if (MBBI != BeginMBBI) {
522 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
523 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
524 --PrevMBBI;
525 if (Mode == ARM_AM::ia &&
526 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
527 Mode = ARM_AM::db;
528 DoMerge = true;
529 } else if (Mode == ARM_AM::ib &&
530 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
531 Mode = ARM_AM::da;
532 DoMerge = true;
534 if (DoMerge)
535 MBB.erase(PrevMBBI);
538 // Try merging with the next instruction.
539 MachineBasicBlock::iterator EndMBBI = MBB.end();
540 if (!DoMerge && MBBI != EndMBBI) {
541 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
542 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
543 ++NextMBBI;
544 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
545 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
546 DoMerge = true;
547 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
548 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
549 DoMerge = true;
551 if (DoMerge) {
552 if (NextMBBI == I) {
553 Advance = true;
554 ++I;
556 MBB.erase(NextMBBI);
560 if (!DoMerge)
561 return false;
563 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode);
564 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
565 .addReg(Base, getDefRegState(true)) // WB base register
566 .addReg(Base, getKillRegState(BaseKill))
567 .addImm(ARM_AM::getAM4ModeImm(Mode))
568 .addImm(Pred).addReg(PredReg);
569 // Transfer the rest of operands.
570 for (unsigned OpNum = 4, e = MI->getNumOperands(); OpNum != e; ++OpNum)
571 MIB.addOperand(MI->getOperand(OpNum));
572 // Transfer memoperands.
573 (*MIB).setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
575 MBB.erase(MBBI);
576 return true;
579 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc) {
580 switch (Opc) {
581 case ARM::LDRi12: return ARM::LDR_PRE;
582 case ARM::STRi12: return ARM::STR_PRE;
583 case ARM::VLDRS: return ARM::VLDMS_UPD;
584 case ARM::VLDRD: return ARM::VLDMD_UPD;
585 case ARM::VSTRS: return ARM::VSTMS_UPD;
586 case ARM::VSTRD: return ARM::VSTMD_UPD;
587 case ARM::t2LDRi8:
588 case ARM::t2LDRi12:
589 return ARM::t2LDR_PRE;
590 case ARM::t2STRi8:
591 case ARM::t2STRi12:
592 return ARM::t2STR_PRE;
593 default: llvm_unreachable("Unhandled opcode!");
595 return 0;
598 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc) {
599 switch (Opc) {
600 case ARM::LDRi12: return ARM::LDR_POST;
601 case ARM::STRi12: return ARM::STR_POST;
602 case ARM::VLDRS: return ARM::VLDMS_UPD;
603 case ARM::VLDRD: return ARM::VLDMD_UPD;
604 case ARM::VSTRS: return ARM::VSTMS_UPD;
605 case ARM::VSTRD: return ARM::VSTMD_UPD;
606 case ARM::t2LDRi8:
607 case ARM::t2LDRi12:
608 return ARM::t2LDR_POST;
609 case ARM::t2STRi8:
610 case ARM::t2STRi12:
611 return ARM::t2STR_POST;
612 default: llvm_unreachable("Unhandled opcode!");
614 return 0;
617 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
618 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
619 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
620 MachineBasicBlock::iterator MBBI,
621 const TargetInstrInfo *TII,
622 bool &Advance,
623 MachineBasicBlock::iterator &I) {
624 MachineInstr *MI = MBBI;
625 unsigned Base = MI->getOperand(1).getReg();
626 bool BaseKill = MI->getOperand(1).isKill();
627 unsigned Bytes = getLSMultipleTransferSize(MI);
628 int Opcode = MI->getOpcode();
629 DebugLoc dl = MI->getDebugLoc();
630 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
631 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
632 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
633 if (isi32Load(Opcode) || isi32Store(Opcode))
634 if (MI->getOperand(2).getImm() != 0)
635 return false;
636 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
637 return false;
639 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
640 // Can't do the merge if the destination register is the same as the would-be
641 // writeback register.
642 if (isLd && MI->getOperand(0).getReg() == Base)
643 return false;
645 unsigned PredReg = 0;
646 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
647 bool DoMerge = false;
648 ARM_AM::AddrOpc AddSub = ARM_AM::add;
649 unsigned NewOpc = 0;
650 // AM2 - 12 bits, thumb2 - 8 bits.
651 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
653 // Try merging with the previous instruction.
654 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
655 if (MBBI != BeginMBBI) {
656 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
657 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
658 --PrevMBBI;
659 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
660 DoMerge = true;
661 AddSub = ARM_AM::sub;
662 } else if (!isAM5 &&
663 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
664 DoMerge = true;
666 if (DoMerge) {
667 NewOpc = getPreIndexedLoadStoreOpcode(Opcode);
668 MBB.erase(PrevMBBI);
672 // Try merging with the next instruction.
673 MachineBasicBlock::iterator EndMBBI = MBB.end();
674 if (!DoMerge && MBBI != EndMBBI) {
675 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
676 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
677 ++NextMBBI;
678 if (!isAM5 &&
679 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
680 DoMerge = true;
681 AddSub = ARM_AM::sub;
682 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
683 DoMerge = true;
685 if (DoMerge) {
686 NewOpc = getPostIndexedLoadStoreOpcode(Opcode);
687 if (NextMBBI == I) {
688 Advance = true;
689 ++I;
691 MBB.erase(NextMBBI);
695 if (!DoMerge)
696 return false;
698 unsigned Offset = 0;
699 if (isAM5)
700 Offset = ARM_AM::getAM4ModeImm(AddSub == ARM_AM::sub ?
701 ARM_AM::db : ARM_AM::ia);
702 else if (isAM2)
703 Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
704 else
705 Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
707 if (isAM5) {
708 // VLDM[SD}_UPD, VSTM[SD]_UPD
709 // (There are no base-updating versions of VLDR/VSTR instructions, but the
710 // updating load/store-multiple instructions can be used with only one
711 // register.)
712 MachineOperand &MO = MI->getOperand(0);
713 BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
714 .addReg(Base, getDefRegState(true)) // WB base register
715 .addReg(Base, getKillRegState(isLd ? BaseKill : false))
716 .addImm(Offset)
717 .addImm(Pred).addReg(PredReg)
718 .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
719 getKillRegState(MO.isKill())));
720 } else if (isLd) {
721 if (isAM2)
722 // LDR_PRE, LDR_POST,
723 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
724 .addReg(Base, RegState::Define)
725 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
726 else
727 // t2LDR_PRE, t2LDR_POST
728 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
729 .addReg(Base, RegState::Define)
730 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
731 } else {
732 MachineOperand &MO = MI->getOperand(0);
733 if (isAM2)
734 // STR_PRE, STR_POST
735 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
736 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
737 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
738 else
739 // t2STR_PRE, t2STR_POST
740 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
741 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
742 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
744 MBB.erase(MBBI);
746 return true;
749 /// isMemoryOp - Returns true if instruction is a memory operations (that this
750 /// pass is capable of operating on).
751 static bool isMemoryOp(const MachineInstr *MI) {
752 // When no memory operands are present, conservatively assume unaligned,
753 // volatile, unfoldable.
754 if (!MI->hasOneMemOperand())
755 return false;
757 const MachineMemOperand *MMO = *MI->memoperands_begin();
759 // Don't touch volatile memory accesses - we may be changing their order.
760 if (MMO->isVolatile())
761 return false;
763 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
764 // not.
765 if (MMO->getAlignment() < 4)
766 return false;
768 // str <undef> could probably be eliminated entirely, but for now we just want
769 // to avoid making a mess of it.
770 // FIXME: Use str <undef> as a wildcard to enable better stm folding.
771 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
772 MI->getOperand(0).isUndef())
773 return false;
775 // Likewise don't mess with references to undefined addresses.
776 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
777 MI->getOperand(1).isUndef())
778 return false;
780 int Opcode = MI->getOpcode();
781 switch (Opcode) {
782 default: break;
783 case ARM::VLDRS:
784 case ARM::VSTRS:
785 return MI->getOperand(1).isReg();
786 case ARM::VLDRD:
787 case ARM::VSTRD:
788 return MI->getOperand(1).isReg();
789 case ARM::LDRi12:
790 case ARM::STRi12:
791 case ARM::t2LDRi8:
792 case ARM::t2LDRi12:
793 case ARM::t2STRi8:
794 case ARM::t2STRi12:
795 return MI->getOperand(1).isReg();
797 return false;
800 /// AdvanceRS - Advance register scavenger to just before the earliest memory
801 /// op that is being merged.
802 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
803 MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
804 unsigned Position = MemOps[0].Position;
805 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
806 if (MemOps[i].Position < Position) {
807 Position = MemOps[i].Position;
808 Loc = MemOps[i].MBBI;
812 if (Loc != MBB.begin())
813 RS->forward(prior(Loc));
816 static int getMemoryOpOffset(const MachineInstr *MI) {
817 int Opcode = MI->getOpcode();
818 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
819 unsigned NumOperands = MI->getDesc().getNumOperands();
820 unsigned OffField = MI->getOperand(NumOperands-3).getImm();
822 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
823 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
824 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
825 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
826 return OffField;
828 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
829 : ARM_AM::getAM5Offset(OffField) * 4;
830 if (isAM3) {
831 if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub)
832 Offset = -Offset;
833 } else {
834 if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
835 Offset = -Offset;
837 return Offset;
840 static void InsertLDR_STR(MachineBasicBlock &MBB,
841 MachineBasicBlock::iterator &MBBI,
842 int Offset, bool isDef,
843 DebugLoc dl, unsigned NewOpc,
844 unsigned Reg, bool RegDeadKill, bool RegUndef,
845 unsigned BaseReg, bool BaseKill, bool BaseUndef,
846 bool OffKill, bool OffUndef,
847 ARMCC::CondCodes Pred, unsigned PredReg,
848 const TargetInstrInfo *TII, bool isT2) {
849 if (isDef) {
850 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
851 TII->get(NewOpc))
852 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
853 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
854 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
855 } else {
856 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
857 TII->get(NewOpc))
858 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
859 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
860 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
864 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
865 MachineBasicBlock::iterator &MBBI) {
866 MachineInstr *MI = &*MBBI;
867 unsigned Opcode = MI->getOpcode();
868 if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
869 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
870 unsigned EvenReg = MI->getOperand(0).getReg();
871 unsigned OddReg = MI->getOperand(1).getReg();
872 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
873 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
874 if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum)
875 return false;
877 MachineBasicBlock::iterator NewBBI = MBBI;
878 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
879 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
880 bool EvenDeadKill = isLd ?
881 MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
882 bool EvenUndef = MI->getOperand(0).isUndef();
883 bool OddDeadKill = isLd ?
884 MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
885 bool OddUndef = MI->getOperand(1).isUndef();
886 const MachineOperand &BaseOp = MI->getOperand(2);
887 unsigned BaseReg = BaseOp.getReg();
888 bool BaseKill = BaseOp.isKill();
889 bool BaseUndef = BaseOp.isUndef();
890 bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
891 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
892 int OffImm = getMemoryOpOffset(MI);
893 unsigned PredReg = 0;
894 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
896 if (OddRegNum > EvenRegNum && OffImm == 0) {
897 // Ascending register numbers and no offset. It's safe to change it to a
898 // ldm or stm.
899 unsigned NewOpc = (isLd)
900 ? (isT2 ? ARM::t2LDM : ARM::LDM)
901 : (isT2 ? ARM::t2STM : ARM::STM);
902 if (isLd) {
903 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
904 .addReg(BaseReg, getKillRegState(BaseKill))
905 .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
906 .addImm(Pred).addReg(PredReg)
907 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
908 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
909 ++NumLDRD2LDM;
910 } else {
911 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
912 .addReg(BaseReg, getKillRegState(BaseKill))
913 .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
914 .addImm(Pred).addReg(PredReg)
915 .addReg(EvenReg,
916 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
917 .addReg(OddReg,
918 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
919 ++NumSTRD2STM;
921 NewBBI = llvm::prior(MBBI);
922 } else {
923 // Split into two instructions.
924 unsigned NewOpc = (isLd)
925 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
926 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
927 DebugLoc dl = MBBI->getDebugLoc();
928 // If this is a load and base register is killed, it may have been
929 // re-defed by the load, make sure the first load does not clobber it.
930 if (isLd &&
931 (BaseKill || OffKill) &&
932 (TRI->regsOverlap(EvenReg, BaseReg))) {
933 assert(!TRI->regsOverlap(OddReg, BaseReg));
934 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
935 OddReg, OddDeadKill, false,
936 BaseReg, false, BaseUndef, false, OffUndef,
937 Pred, PredReg, TII, isT2);
938 NewBBI = llvm::prior(MBBI);
939 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
940 EvenReg, EvenDeadKill, false,
941 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
942 Pred, PredReg, TII, isT2);
943 } else {
944 if (OddReg == EvenReg && EvenDeadKill) {
945 // If the two source operands are the same, the kill marker is
946 // probably on the first one. e.g.
947 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
948 EvenDeadKill = false;
949 OddDeadKill = true;
951 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
952 EvenReg, EvenDeadKill, EvenUndef,
953 BaseReg, false, BaseUndef, false, OffUndef,
954 Pred, PredReg, TII, isT2);
955 NewBBI = llvm::prior(MBBI);
956 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
957 OddReg, OddDeadKill, OddUndef,
958 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
959 Pred, PredReg, TII, isT2);
961 if (isLd)
962 ++NumLDRD2LDR;
963 else
964 ++NumSTRD2STR;
967 MBB.erase(MI);
968 MBBI = NewBBI;
969 return true;
971 return false;
974 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
975 /// ops of the same base and incrementing offset into LDM / STM ops.
976 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
977 unsigned NumMerges = 0;
978 unsigned NumMemOps = 0;
979 MemOpQueue MemOps;
980 unsigned CurrBase = 0;
981 int CurrOpc = -1;
982 unsigned CurrSize = 0;
983 ARMCC::CondCodes CurrPred = ARMCC::AL;
984 unsigned CurrPredReg = 0;
985 unsigned Position = 0;
986 SmallVector<MachineBasicBlock::iterator,4> Merges;
988 RS->enterBasicBlock(&MBB);
989 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
990 while (MBBI != E) {
991 if (FixInvalidRegPairOp(MBB, MBBI))
992 continue;
994 bool Advance = false;
995 bool TryMerge = false;
996 bool Clobber = false;
998 bool isMemOp = isMemoryOp(MBBI);
999 if (isMemOp) {
1000 int Opcode = MBBI->getOpcode();
1001 unsigned Size = getLSMultipleTransferSize(MBBI);
1002 const MachineOperand &MO = MBBI->getOperand(0);
1003 unsigned Reg = MO.getReg();
1004 bool isKill = MO.isDef() ? false : MO.isKill();
1005 unsigned Base = MBBI->getOperand(1).getReg();
1006 unsigned PredReg = 0;
1007 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg);
1008 int Offset = getMemoryOpOffset(MBBI);
1009 // Watch out for:
1010 // r4 := ldr [r5]
1011 // r5 := ldr [r5, #4]
1012 // r6 := ldr [r5, #8]
1014 // The second ldr has effectively broken the chain even though it
1015 // looks like the later ldr(s) use the same base register. Try to
1016 // merge the ldr's so far, including this one. But don't try to
1017 // combine the following ldr(s).
1018 Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
1019 if (CurrBase == 0 && !Clobber) {
1020 // Start of a new chain.
1021 CurrBase = Base;
1022 CurrOpc = Opcode;
1023 CurrSize = Size;
1024 CurrPred = Pred;
1025 CurrPredReg = PredReg;
1026 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
1027 ++NumMemOps;
1028 Advance = true;
1029 } else {
1030 if (Clobber) {
1031 TryMerge = true;
1032 Advance = true;
1035 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1036 // No need to match PredReg.
1037 // Continue adding to the queue.
1038 if (Offset > MemOps.back().Offset) {
1039 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill,
1040 Position, MBBI));
1041 ++NumMemOps;
1042 Advance = true;
1043 } else {
1044 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
1045 I != E; ++I) {
1046 if (Offset < I->Offset) {
1047 MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill,
1048 Position, MBBI));
1049 ++NumMemOps;
1050 Advance = true;
1051 break;
1052 } else if (Offset == I->Offset) {
1053 // Collision! This can't be merged!
1054 break;
1062 if (MBBI->isDebugValue()) {
1063 ++MBBI;
1064 if (MBBI == E)
1065 // Reach the end of the block, try merging the memory instructions.
1066 TryMerge = true;
1067 } else if (Advance) {
1068 ++Position;
1069 ++MBBI;
1070 if (MBBI == E)
1071 // Reach the end of the block, try merging the memory instructions.
1072 TryMerge = true;
1073 } else
1074 TryMerge = true;
1076 if (TryMerge) {
1077 if (NumMemOps > 1) {
1078 // Try to find a free register to use as a new base in case it's needed.
1079 // First advance to the instruction just before the start of the chain.
1080 AdvanceRS(MBB, MemOps);
1081 // Find a scratch register.
1082 unsigned Scratch = RS->FindUnusedReg(ARM::GPRRegisterClass);
1083 // Process the load / store instructions.
1084 RS->forward(prior(MBBI));
1086 // Merge ops.
1087 Merges.clear();
1088 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
1089 CurrPred, CurrPredReg, Scratch, MemOps, Merges);
1091 // Try folding preceeding/trailing base inc/dec into the generated
1092 // LDM/STM ops.
1093 for (unsigned i = 0, e = Merges.size(); i < e; ++i)
1094 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
1095 ++NumMerges;
1096 NumMerges += Merges.size();
1098 // Try folding preceeding/trailing base inc/dec into those load/store
1099 // that were not merged to form LDM/STM ops.
1100 for (unsigned i = 0; i != NumMemOps; ++i)
1101 if (!MemOps[i].Merged)
1102 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
1103 ++NumMerges;
1105 // RS may be pointing to an instruction that's deleted.
1106 RS->skipTo(prior(MBBI));
1107 } else if (NumMemOps == 1) {
1108 // Try folding preceeding/trailing base inc/dec into the single
1109 // load/store.
1110 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
1111 ++NumMerges;
1112 RS->forward(prior(MBBI));
1116 CurrBase = 0;
1117 CurrOpc = -1;
1118 CurrSize = 0;
1119 CurrPred = ARMCC::AL;
1120 CurrPredReg = 0;
1121 if (NumMemOps) {
1122 MemOps.clear();
1123 NumMemOps = 0;
1126 // If iterator hasn't been advanced and this is not a memory op, skip it.
1127 // It can't start a new chain anyway.
1128 if (!Advance && !isMemOp && MBBI != E) {
1129 ++Position;
1130 ++MBBI;
1134 return NumMerges > 0;
1137 namespace {
1138 struct OffsetCompare {
1139 bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
1140 int LOffset = getMemoryOpOffset(LHS);
1141 int ROffset = getMemoryOpOffset(RHS);
1142 assert(LHS == RHS || LOffset != ROffset);
1143 return LOffset > ROffset;
1148 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops
1149 /// ("bx lr" and "mov pc, lr") into the preceeding stack restore so it
1150 /// directly restore the value of LR into pc.
1151 /// ldmfd sp!, {..., lr}
1152 /// bx lr
1153 /// or
1154 /// ldmfd sp!, {..., lr}
1155 /// mov pc, lr
1156 /// =>
1157 /// ldmfd sp!, {..., pc}
1158 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1159 if (MBB.empty()) return false;
1161 MachineBasicBlock::iterator MBBI = prior(MBB.end());
1162 if (MBBI != MBB.begin() &&
1163 (MBBI->getOpcode() == ARM::BX_RET ||
1164 MBBI->getOpcode() == ARM::tBX_RET ||
1165 MBBI->getOpcode() == ARM::MOVPCLR)) {
1166 MachineInstr *PrevMI = prior(MBBI);
1167 if (PrevMI->getOpcode() == ARM::LDM_UPD ||
1168 PrevMI->getOpcode() == ARM::t2LDM_UPD) {
1169 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1170 if (MO.getReg() != ARM::LR)
1171 return false;
1172 unsigned NewOpc = isThumb2 ? ARM::t2LDM_RET : ARM::LDM_RET;
1173 PrevMI->setDesc(TII->get(NewOpc));
1174 MO.setReg(ARM::PC);
1175 PrevMI->copyImplicitOps(&*MBBI);
1176 MBB.erase(MBBI);
1177 return true;
1180 return false;
1183 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1184 const TargetMachine &TM = Fn.getTarget();
1185 AFI = Fn.getInfo<ARMFunctionInfo>();
1186 TII = TM.getInstrInfo();
1187 TRI = TM.getRegisterInfo();
1188 RS = new RegScavenger();
1189 isThumb2 = AFI->isThumb2Function();
1191 bool Modified = false;
1192 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1193 ++MFI) {
1194 MachineBasicBlock &MBB = *MFI;
1195 Modified |= LoadStoreMultipleOpti(MBB);
1196 Modified |= MergeReturnIntoLDM(MBB);
1199 delete RS;
1200 return Modified;
1204 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
1205 /// load / stores from consecutive locations close to make it more
1206 /// likely they will be combined later.
1208 namespace {
1209 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1210 static char ID;
1211 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
1213 const TargetData *TD;
1214 const TargetInstrInfo *TII;
1215 const TargetRegisterInfo *TRI;
1216 const ARMSubtarget *STI;
1217 MachineRegisterInfo *MRI;
1218 MachineFunction *MF;
1220 virtual bool runOnMachineFunction(MachineFunction &Fn);
1222 virtual const char *getPassName() const {
1223 return "ARM pre- register allocation load / store optimization pass";
1226 private:
1227 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1228 unsigned &NewOpc, unsigned &EvenReg,
1229 unsigned &OddReg, unsigned &BaseReg,
1230 int &Offset,
1231 unsigned &PredReg, ARMCC::CondCodes &Pred,
1232 bool &isT2);
1233 bool RescheduleOps(MachineBasicBlock *MBB,
1234 SmallVector<MachineInstr*, 4> &Ops,
1235 unsigned Base, bool isLd,
1236 DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1237 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1239 char ARMPreAllocLoadStoreOpt::ID = 0;
1242 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1243 TD = Fn.getTarget().getTargetData();
1244 TII = Fn.getTarget().getInstrInfo();
1245 TRI = Fn.getTarget().getRegisterInfo();
1246 STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
1247 MRI = &Fn.getRegInfo();
1248 MF = &Fn;
1250 bool Modified = false;
1251 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1252 ++MFI)
1253 Modified |= RescheduleLoadStoreInstrs(MFI);
1255 return Modified;
1258 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1259 MachineBasicBlock::iterator I,
1260 MachineBasicBlock::iterator E,
1261 SmallPtrSet<MachineInstr*, 4> &MemOps,
1262 SmallSet<unsigned, 4> &MemRegs,
1263 const TargetRegisterInfo *TRI) {
1264 // Are there stores / loads / calls between them?
1265 // FIXME: This is overly conservative. We should make use of alias information
1266 // some day.
1267 SmallSet<unsigned, 4> AddedRegPressure;
1268 while (++I != E) {
1269 if (I->isDebugValue() || MemOps.count(&*I))
1270 continue;
1271 const TargetInstrDesc &TID = I->getDesc();
1272 if (TID.isCall() || TID.isTerminator() || TID.hasUnmodeledSideEffects())
1273 return false;
1274 if (isLd && TID.mayStore())
1275 return false;
1276 if (!isLd) {
1277 if (TID.mayLoad())
1278 return false;
1279 // It's not safe to move the first 'str' down.
1280 // str r1, [r0]
1281 // strh r5, [r0]
1282 // str r4, [r0, #+4]
1283 if (TID.mayStore())
1284 return false;
1286 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1287 MachineOperand &MO = I->getOperand(j);
1288 if (!MO.isReg())
1289 continue;
1290 unsigned Reg = MO.getReg();
1291 if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1292 return false;
1293 if (Reg != Base && !MemRegs.count(Reg))
1294 AddedRegPressure.insert(Reg);
1298 // Estimate register pressure increase due to the transformation.
1299 if (MemRegs.size() <= 4)
1300 // Ok if we are moving small number of instructions.
1301 return true;
1302 return AddedRegPressure.size() <= MemRegs.size() * 2;
1305 bool
1306 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1307 DebugLoc &dl,
1308 unsigned &NewOpc, unsigned &EvenReg,
1309 unsigned &OddReg, unsigned &BaseReg,
1310 int &Offset, unsigned &PredReg,
1311 ARMCC::CondCodes &Pred,
1312 bool &isT2) {
1313 // Make sure we're allowed to generate LDRD/STRD.
1314 if (!STI->hasV5TEOps())
1315 return false;
1317 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
1318 unsigned Scale = 1;
1319 unsigned Opcode = Op0->getOpcode();
1320 if (Opcode == ARM::LDRi12)
1321 NewOpc = ARM::LDRD;
1322 else if (Opcode == ARM::STRi12)
1323 NewOpc = ARM::STRD;
1324 else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1325 NewOpc = ARM::t2LDRDi8;
1326 Scale = 4;
1327 isT2 = true;
1328 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
1329 NewOpc = ARM::t2STRDi8;
1330 Scale = 4;
1331 isT2 = true;
1332 } else
1333 return false;
1335 // Make sure the base address satisfies i64 ld / st alignment requirement.
1336 if (!Op0->hasOneMemOperand() ||
1337 !(*Op0->memoperands_begin())->getValue() ||
1338 (*Op0->memoperands_begin())->isVolatile())
1339 return false;
1341 unsigned Align = (*Op0->memoperands_begin())->getAlignment();
1342 const Function *Func = MF->getFunction();
1343 unsigned ReqAlign = STI->hasV6Ops()
1344 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext()))
1345 : 8; // Pre-v6 need 8-byte align
1346 if (Align < ReqAlign)
1347 return false;
1349 // Then make sure the immediate offset fits.
1350 int OffImm = getMemoryOpOffset(Op0);
1351 if (isT2) {
1352 if (OffImm < 0) {
1353 if (OffImm < -255)
1354 // Can't fall back to t2LDRi8 / t2STRi8.
1355 return false;
1356 } else {
1357 int Limit = (1 << 8) * Scale;
1358 if (OffImm >= Limit || (OffImm & (Scale-1)))
1359 return false;
1361 Offset = OffImm;
1362 } else {
1363 ARM_AM::AddrOpc AddSub = ARM_AM::add;
1364 if (OffImm < 0) {
1365 AddSub = ARM_AM::sub;
1366 OffImm = - OffImm;
1368 int Limit = (1 << 8) * Scale;
1369 if (OffImm >= Limit || (OffImm & (Scale-1)))
1370 return false;
1371 Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
1373 EvenReg = Op0->getOperand(0).getReg();
1374 OddReg = Op1->getOperand(0).getReg();
1375 if (EvenReg == OddReg)
1376 return false;
1377 BaseReg = Op0->getOperand(1).getReg();
1378 Pred = llvm::getInstrPredicate(Op0, PredReg);
1379 dl = Op0->getDebugLoc();
1380 return true;
1383 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
1384 SmallVector<MachineInstr*, 4> &Ops,
1385 unsigned Base, bool isLd,
1386 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
1387 bool RetVal = false;
1389 // Sort by offset (in reverse order).
1390 std::sort(Ops.begin(), Ops.end(), OffsetCompare());
1392 // The loads / stores of the same base are in order. Scan them from first to
1393 // last and check for the following:
1394 // 1. Any def of base.
1395 // 2. Any gaps.
1396 while (Ops.size() > 1) {
1397 unsigned FirstLoc = ~0U;
1398 unsigned LastLoc = 0;
1399 MachineInstr *FirstOp = 0;
1400 MachineInstr *LastOp = 0;
1401 int LastOffset = 0;
1402 unsigned LastOpcode = 0;
1403 unsigned LastBytes = 0;
1404 unsigned NumMove = 0;
1405 for (int i = Ops.size() - 1; i >= 0; --i) {
1406 MachineInstr *Op = Ops[i];
1407 unsigned Loc = MI2LocMap[Op];
1408 if (Loc <= FirstLoc) {
1409 FirstLoc = Loc;
1410 FirstOp = Op;
1412 if (Loc >= LastLoc) {
1413 LastLoc = Loc;
1414 LastOp = Op;
1417 unsigned Opcode = Op->getOpcode();
1418 if (LastOpcode && Opcode != LastOpcode)
1419 break;
1421 int Offset = getMemoryOpOffset(Op);
1422 unsigned Bytes = getLSMultipleTransferSize(Op);
1423 if (LastBytes) {
1424 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
1425 break;
1427 LastOffset = Offset;
1428 LastBytes = Bytes;
1429 LastOpcode = Opcode;
1430 if (++NumMove == 8) // FIXME: Tune this limit.
1431 break;
1434 if (NumMove <= 1)
1435 Ops.pop_back();
1436 else {
1437 SmallPtrSet<MachineInstr*, 4> MemOps;
1438 SmallSet<unsigned, 4> MemRegs;
1439 for (int i = NumMove-1; i >= 0; --i) {
1440 MemOps.insert(Ops[i]);
1441 MemRegs.insert(Ops[i]->getOperand(0).getReg());
1444 // Be conservative, if the instructions are too far apart, don't
1445 // move them. We want to limit the increase of register pressure.
1446 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
1447 if (DoMove)
1448 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
1449 MemOps, MemRegs, TRI);
1450 if (!DoMove) {
1451 for (unsigned i = 0; i != NumMove; ++i)
1452 Ops.pop_back();
1453 } else {
1454 // This is the new location for the loads / stores.
1455 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
1456 while (InsertPos != MBB->end()
1457 && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
1458 ++InsertPos;
1460 // If we are moving a pair of loads / stores, see if it makes sense
1461 // to try to allocate a pair of registers that can form register pairs.
1462 MachineInstr *Op0 = Ops.back();
1463 MachineInstr *Op1 = Ops[Ops.size()-2];
1464 unsigned EvenReg = 0, OddReg = 0;
1465 unsigned BaseReg = 0, PredReg = 0;
1466 ARMCC::CondCodes Pred = ARMCC::AL;
1467 bool isT2 = false;
1468 unsigned NewOpc = 0;
1469 int Offset = 0;
1470 DebugLoc dl;
1471 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
1472 EvenReg, OddReg, BaseReg,
1473 Offset, PredReg, Pred, isT2)) {
1474 Ops.pop_back();
1475 Ops.pop_back();
1477 // Form the pair instruction.
1478 if (isLd) {
1479 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos,
1480 dl, TII->get(NewOpc))
1481 .addReg(EvenReg, RegState::Define)
1482 .addReg(OddReg, RegState::Define)
1483 .addReg(BaseReg);
1484 // FIXME: We're converting from LDRi12 to an insn that still
1485 // uses addrmode2, so we need an explicit offset reg. It should
1486 // always by reg0 since we're transforming LDRi12s.
1487 if (!isT2)
1488 MIB.addReg(0);
1489 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1490 ++NumLDRDFormed;
1491 } else {
1492 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos,
1493 dl, TII->get(NewOpc))
1494 .addReg(EvenReg)
1495 .addReg(OddReg)
1496 .addReg(BaseReg);
1497 // FIXME: We're converting from LDRi12 to an insn that still
1498 // uses addrmode2, so we need an explicit offset reg. It should
1499 // always by reg0 since we're transforming STRi12s.
1500 if (!isT2)
1501 MIB.addReg(0);
1502 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1503 ++NumSTRDFormed;
1505 MBB->erase(Op0);
1506 MBB->erase(Op1);
1508 // Add register allocation hints to form register pairs.
1509 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
1510 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg);
1511 } else {
1512 for (unsigned i = 0; i != NumMove; ++i) {
1513 MachineInstr *Op = Ops.back();
1514 Ops.pop_back();
1515 MBB->splice(InsertPos, MBB, Op);
1519 NumLdStMoved += NumMove;
1520 RetVal = true;
1525 return RetVal;
1528 bool
1529 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
1530 bool RetVal = false;
1532 DenseMap<MachineInstr*, unsigned> MI2LocMap;
1533 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
1534 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
1535 SmallVector<unsigned, 4> LdBases;
1536 SmallVector<unsigned, 4> StBases;
1538 unsigned Loc = 0;
1539 MachineBasicBlock::iterator MBBI = MBB->begin();
1540 MachineBasicBlock::iterator E = MBB->end();
1541 while (MBBI != E) {
1542 for (; MBBI != E; ++MBBI) {
1543 MachineInstr *MI = MBBI;
1544 const TargetInstrDesc &TID = MI->getDesc();
1545 if (TID.isCall() || TID.isTerminator()) {
1546 // Stop at barriers.
1547 ++MBBI;
1548 break;
1551 if (!MI->isDebugValue())
1552 MI2LocMap[MI] = ++Loc;
1554 if (!isMemoryOp(MI))
1555 continue;
1556 unsigned PredReg = 0;
1557 if (llvm::getInstrPredicate(MI, PredReg) != ARMCC::AL)
1558 continue;
1560 int Opc = MI->getOpcode();
1561 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
1562 unsigned Base = MI->getOperand(1).getReg();
1563 int Offset = getMemoryOpOffset(MI);
1565 bool StopHere = false;
1566 if (isLd) {
1567 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1568 Base2LdsMap.find(Base);
1569 if (BI != Base2LdsMap.end()) {
1570 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1571 if (Offset == getMemoryOpOffset(BI->second[i])) {
1572 StopHere = true;
1573 break;
1576 if (!StopHere)
1577 BI->second.push_back(MI);
1578 } else {
1579 SmallVector<MachineInstr*, 4> MIs;
1580 MIs.push_back(MI);
1581 Base2LdsMap[Base] = MIs;
1582 LdBases.push_back(Base);
1584 } else {
1585 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1586 Base2StsMap.find(Base);
1587 if (BI != Base2StsMap.end()) {
1588 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1589 if (Offset == getMemoryOpOffset(BI->second[i])) {
1590 StopHere = true;
1591 break;
1594 if (!StopHere)
1595 BI->second.push_back(MI);
1596 } else {
1597 SmallVector<MachineInstr*, 4> MIs;
1598 MIs.push_back(MI);
1599 Base2StsMap[Base] = MIs;
1600 StBases.push_back(Base);
1604 if (StopHere) {
1605 // Found a duplicate (a base+offset combination that's seen earlier).
1606 // Backtrack.
1607 --Loc;
1608 break;
1612 // Re-schedule loads.
1613 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
1614 unsigned Base = LdBases[i];
1615 SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base];
1616 if (Lds.size() > 1)
1617 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
1620 // Re-schedule stores.
1621 for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
1622 unsigned Base = StBases[i];
1623 SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base];
1624 if (Sts.size() > 1)
1625 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
1628 if (MBBI != E) {
1629 Base2LdsMap.clear();
1630 Base2StsMap.clear();
1631 LdBases.clear();
1632 StBases.clear();
1636 return RetVal;
1640 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store
1641 /// optimization pass.
1642 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
1643 if (PreAlloc)
1644 return new ARMPreAllocLoadStoreOpt();
1645 return new ARMLoadStoreOpt();