1 //===- ARMBaseInstrInfo.cpp - ARM Instruction Information -------*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains the Base ARM implementation of the TargetInstrInfo class.
12 //===----------------------------------------------------------------------===//
14 #include "ARMBaseInstrInfo.h"
16 #include "ARMAddressingModes.h"
17 #include "ARMConstantPoolValue.h"
18 #include "ARMMachineFunctionInfo.h"
19 #include "ARMRegisterInfo.h"
20 #include "ARMGenInstrInfo.inc"
21 #include "llvm/Constants.h"
22 #include "llvm/Function.h"
23 #include "llvm/GlobalValue.h"
24 #include "llvm/CodeGen/LiveVariables.h"
25 #include "llvm/CodeGen/MachineConstantPool.h"
26 #include "llvm/CodeGen/MachineFrameInfo.h"
27 #include "llvm/CodeGen/MachineInstrBuilder.h"
28 #include "llvm/CodeGen/MachineJumpTableInfo.h"
29 #include "llvm/CodeGen/MachineMemOperand.h"
30 #include "llvm/CodeGen/MachineRegisterInfo.h"
31 #include "llvm/CodeGen/PseudoSourceValue.h"
32 #include "llvm/MC/MCAsmInfo.h"
33 #include "llvm/Support/CommandLine.h"
34 #include "llvm/Support/Debug.h"
35 #include "llvm/Support/ErrorHandling.h"
36 #include "llvm/ADT/STLExtras.h"
40 EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden
,
41 cl::desc("Enable ARM 2-addr to 3-addr conv"));
43 ARMBaseInstrInfo::ARMBaseInstrInfo(const ARMSubtarget
& STI
)
44 : TargetInstrInfoImpl(ARMInsts
, array_lengthof(ARMInsts
)),
49 ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator
&MFI
,
50 MachineBasicBlock::iterator
&MBBI
,
51 LiveVariables
*LV
) const {
52 // FIXME: Thumb2 support.
57 MachineInstr
*MI
= MBBI
;
58 MachineFunction
&MF
= *MI
->getParent()->getParent();
59 uint64_t TSFlags
= MI
->getDesc().TSFlags
;
61 switch ((TSFlags
& ARMII::IndexModeMask
) >> ARMII::IndexModeShift
) {
63 case ARMII::IndexModePre
:
66 case ARMII::IndexModePost
:
70 // Try splitting an indexed load/store to an un-indexed one plus an add/sub
72 unsigned MemOpc
= getUnindexedOpcode(MI
->getOpcode());
76 MachineInstr
*UpdateMI
= NULL
;
77 MachineInstr
*MemMI
= NULL
;
78 unsigned AddrMode
= (TSFlags
& ARMII::AddrModeMask
);
79 const TargetInstrDesc
&TID
= MI
->getDesc();
80 unsigned NumOps
= TID
.getNumOperands();
81 bool isLoad
= !TID
.mayStore();
82 const MachineOperand
&WB
= isLoad
? MI
->getOperand(1) : MI
->getOperand(0);
83 const MachineOperand
&Base
= MI
->getOperand(2);
84 const MachineOperand
&Offset
= MI
->getOperand(NumOps
-3);
85 unsigned WBReg
= WB
.getReg();
86 unsigned BaseReg
= Base
.getReg();
87 unsigned OffReg
= Offset
.getReg();
88 unsigned OffImm
= MI
->getOperand(NumOps
-2).getImm();
89 ARMCC::CondCodes Pred
= (ARMCC::CondCodes
)MI
->getOperand(NumOps
-1).getImm();
92 assert(false && "Unknown indexed op!");
94 case ARMII::AddrMode2
: {
95 bool isSub
= ARM_AM::getAM2Op(OffImm
) == ARM_AM::sub
;
96 unsigned Amt
= ARM_AM::getAM2Offset(OffImm
);
98 if (ARM_AM::getSOImmVal(Amt
) == -1)
99 // Can't encode it in a so_imm operand. This transformation will
100 // add more than 1 instruction. Abandon!
102 UpdateMI
= BuildMI(MF
, MI
->getDebugLoc(),
103 get(isSub
? ARM::SUBri
: ARM::ADDri
), WBReg
)
104 .addReg(BaseReg
).addImm(Amt
)
105 .addImm(Pred
).addReg(0).addReg(0);
106 } else if (Amt
!= 0) {
107 ARM_AM::ShiftOpc ShOpc
= ARM_AM::getAM2ShiftOpc(OffImm
);
108 unsigned SOOpc
= ARM_AM::getSORegOpc(ShOpc
, Amt
);
109 UpdateMI
= BuildMI(MF
, MI
->getDebugLoc(),
110 get(isSub
? ARM::SUBrs
: ARM::ADDrs
), WBReg
)
111 .addReg(BaseReg
).addReg(OffReg
).addReg(0).addImm(SOOpc
)
112 .addImm(Pred
).addReg(0).addReg(0);
114 UpdateMI
= BuildMI(MF
, MI
->getDebugLoc(),
115 get(isSub
? ARM::SUBrr
: ARM::ADDrr
), WBReg
)
116 .addReg(BaseReg
).addReg(OffReg
)
117 .addImm(Pred
).addReg(0).addReg(0);
120 case ARMII::AddrMode3
: {
121 bool isSub
= ARM_AM::getAM3Op(OffImm
) == ARM_AM::sub
;
122 unsigned Amt
= ARM_AM::getAM3Offset(OffImm
);
124 // Immediate is 8-bits. It's guaranteed to fit in a so_imm operand.
125 UpdateMI
= BuildMI(MF
, MI
->getDebugLoc(),
126 get(isSub
? ARM::SUBri
: ARM::ADDri
), WBReg
)
127 .addReg(BaseReg
).addImm(Amt
)
128 .addImm(Pred
).addReg(0).addReg(0);
130 UpdateMI
= BuildMI(MF
, MI
->getDebugLoc(),
131 get(isSub
? ARM::SUBrr
: ARM::ADDrr
), WBReg
)
132 .addReg(BaseReg
).addReg(OffReg
)
133 .addImm(Pred
).addReg(0).addReg(0);
138 std::vector
<MachineInstr
*> NewMIs
;
141 MemMI
= BuildMI(MF
, MI
->getDebugLoc(),
142 get(MemOpc
), MI
->getOperand(0).getReg())
143 .addReg(WBReg
).addImm(0).addImm(Pred
);
145 MemMI
= BuildMI(MF
, MI
->getDebugLoc(),
146 get(MemOpc
)).addReg(MI
->getOperand(1).getReg())
147 .addReg(WBReg
).addReg(0).addImm(0).addImm(Pred
);
148 NewMIs
.push_back(MemMI
);
149 NewMIs
.push_back(UpdateMI
);
152 MemMI
= BuildMI(MF
, MI
->getDebugLoc(),
153 get(MemOpc
), MI
->getOperand(0).getReg())
154 .addReg(BaseReg
).addImm(0).addImm(Pred
);
156 MemMI
= BuildMI(MF
, MI
->getDebugLoc(),
157 get(MemOpc
)).addReg(MI
->getOperand(1).getReg())
158 .addReg(BaseReg
).addReg(0).addImm(0).addImm(Pred
);
160 UpdateMI
->getOperand(0).setIsDead();
161 NewMIs
.push_back(UpdateMI
);
162 NewMIs
.push_back(MemMI
);
165 // Transfer LiveVariables states, kill / dead info.
167 for (unsigned i
= 0, e
= MI
->getNumOperands(); i
!= e
; ++i
) {
168 MachineOperand
&MO
= MI
->getOperand(i
);
169 if (MO
.isReg() && MO
.getReg() &&
170 TargetRegisterInfo::isVirtualRegister(MO
.getReg())) {
171 unsigned Reg
= MO
.getReg();
173 LiveVariables::VarInfo
&VI
= LV
->getVarInfo(Reg
);
175 MachineInstr
*NewMI
= (Reg
== WBReg
) ? UpdateMI
: MemMI
;
177 LV
->addVirtualRegisterDead(Reg
, NewMI
);
179 if (MO
.isUse() && MO
.isKill()) {
180 for (unsigned j
= 0; j
< 2; ++j
) {
181 // Look at the two new MI's in reverse order.
182 MachineInstr
*NewMI
= NewMIs
[j
];
183 if (!NewMI
->readsRegister(Reg
))
185 LV
->addVirtualRegisterKilled(Reg
, NewMI
);
186 if (VI
.removeKill(MI
))
187 VI
.Kills
.push_back(NewMI
);
195 MFI
->insert(MBBI
, NewMIs
[1]);
196 MFI
->insert(MBBI
, NewMIs
[0]);
201 ARMBaseInstrInfo::spillCalleeSavedRegisters(MachineBasicBlock
&MBB
,
202 MachineBasicBlock::iterator MI
,
203 const std::vector
<CalleeSavedInfo
> &CSI
,
204 const TargetRegisterInfo
*TRI
) const {
209 if (MI
!= MBB
.end()) DL
= MI
->getDebugLoc();
211 for (unsigned i
= 0, e
= CSI
.size(); i
!= e
; ++i
) {
212 unsigned Reg
= CSI
[i
].getReg();
215 // Add the callee-saved register as live-in unless it's LR and
216 // @llvm.returnaddress is called. If LR is returned for @llvm.returnaddress
217 // then it's already added to the function and entry block live-in sets.
218 if (Reg
== ARM::LR
) {
219 MachineFunction
&MF
= *MBB
.getParent();
220 if (MF
.getFrameInfo()->isReturnAddressTaken() &&
221 MF
.getRegInfo().isLiveIn(Reg
))
228 // Insert the spill to the stack frame. The register is killed at the spill
230 const TargetRegisterClass
*RC
= TRI
->getMinimalPhysRegClass(Reg
);
231 storeRegToStackSlot(MBB
, MI
, Reg
, isKill
,
232 CSI
[i
].getFrameIdx(), RC
, TRI
);
239 ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock
&MBB
,MachineBasicBlock
*&TBB
,
240 MachineBasicBlock
*&FBB
,
241 SmallVectorImpl
<MachineOperand
> &Cond
,
242 bool AllowModify
) const {
243 // If the block has no terminators, it just falls into the block after it.
244 MachineBasicBlock::iterator I
= MBB
.end();
245 if (I
== MBB
.begin())
248 while (I
->isDebugValue()) {
249 if (I
== MBB
.begin())
253 if (!isUnpredicatedTerminator(I
))
256 // Get the last instruction in the block.
257 MachineInstr
*LastInst
= I
;
259 // If there is only one terminator instruction, process it.
260 unsigned LastOpc
= LastInst
->getOpcode();
261 if (I
== MBB
.begin() || !isUnpredicatedTerminator(--I
)) {
262 if (isUncondBranchOpcode(LastOpc
)) {
263 TBB
= LastInst
->getOperand(0).getMBB();
266 if (isCondBranchOpcode(LastOpc
)) {
267 // Block ends with fall-through condbranch.
268 TBB
= LastInst
->getOperand(0).getMBB();
269 Cond
.push_back(LastInst
->getOperand(1));
270 Cond
.push_back(LastInst
->getOperand(2));
273 return true; // Can't handle indirect branch.
276 // Get the instruction before it if it is a terminator.
277 MachineInstr
*SecondLastInst
= I
;
278 unsigned SecondLastOpc
= SecondLastInst
->getOpcode();
280 // If AllowModify is true and the block ends with two or more unconditional
281 // branches, delete all but the first unconditional branch.
282 if (AllowModify
&& isUncondBranchOpcode(LastOpc
)) {
283 while (isUncondBranchOpcode(SecondLastOpc
)) {
284 LastInst
->eraseFromParent();
285 LastInst
= SecondLastInst
;
286 LastOpc
= LastInst
->getOpcode();
287 if (I
== MBB
.begin() || !isUnpredicatedTerminator(--I
)) {
288 // Return now the only terminator is an unconditional branch.
289 TBB
= LastInst
->getOperand(0).getMBB();
293 SecondLastOpc
= SecondLastInst
->getOpcode();
298 // If there are three terminators, we don't know what sort of block this is.
299 if (SecondLastInst
&& I
!= MBB
.begin() && isUnpredicatedTerminator(--I
))
302 // If the block ends with a B and a Bcc, handle it.
303 if (isCondBranchOpcode(SecondLastOpc
) && isUncondBranchOpcode(LastOpc
)) {
304 TBB
= SecondLastInst
->getOperand(0).getMBB();
305 Cond
.push_back(SecondLastInst
->getOperand(1));
306 Cond
.push_back(SecondLastInst
->getOperand(2));
307 FBB
= LastInst
->getOperand(0).getMBB();
311 // If the block ends with two unconditional branches, handle it. The second
312 // one is not executed, so remove it.
313 if (isUncondBranchOpcode(SecondLastOpc
) && isUncondBranchOpcode(LastOpc
)) {
314 TBB
= SecondLastInst
->getOperand(0).getMBB();
317 I
->eraseFromParent();
321 // ...likewise if it ends with a branch table followed by an unconditional
322 // branch. The branch folder can create these, and we must get rid of them for
323 // correctness of Thumb constant islands.
324 if ((isJumpTableBranchOpcode(SecondLastOpc
) ||
325 isIndirectBranchOpcode(SecondLastOpc
)) &&
326 isUncondBranchOpcode(LastOpc
)) {
329 I
->eraseFromParent();
333 // Otherwise, can't handle this.
338 unsigned ARMBaseInstrInfo::RemoveBranch(MachineBasicBlock
&MBB
) const {
339 MachineBasicBlock::iterator I
= MBB
.end();
340 if (I
== MBB
.begin()) return 0;
342 while (I
->isDebugValue()) {
343 if (I
== MBB
.begin())
347 if (!isUncondBranchOpcode(I
->getOpcode()) &&
348 !isCondBranchOpcode(I
->getOpcode()))
351 // Remove the branch.
352 I
->eraseFromParent();
356 if (I
== MBB
.begin()) return 1;
358 if (!isCondBranchOpcode(I
->getOpcode()))
361 // Remove the branch.
362 I
->eraseFromParent();
367 ARMBaseInstrInfo::InsertBranch(MachineBasicBlock
&MBB
, MachineBasicBlock
*TBB
,
368 MachineBasicBlock
*FBB
,
369 const SmallVectorImpl
<MachineOperand
> &Cond
,
371 ARMFunctionInfo
*AFI
= MBB
.getParent()->getInfo
<ARMFunctionInfo
>();
372 int BOpc
= !AFI
->isThumbFunction()
373 ? ARM::B
: (AFI
->isThumb2Function() ? ARM::t2B
: ARM::tB
);
374 int BccOpc
= !AFI
->isThumbFunction()
375 ? ARM::Bcc
: (AFI
->isThumb2Function() ? ARM::t2Bcc
: ARM::tBcc
);
377 // Shouldn't be a fall through.
378 assert(TBB
&& "InsertBranch must not be told to insert a fallthrough");
379 assert((Cond
.size() == 2 || Cond
.size() == 0) &&
380 "ARM branch conditions have two components!");
383 if (Cond
.empty()) // Unconditional branch?
384 BuildMI(&MBB
, DL
, get(BOpc
)).addMBB(TBB
);
386 BuildMI(&MBB
, DL
, get(BccOpc
)).addMBB(TBB
)
387 .addImm(Cond
[0].getImm()).addReg(Cond
[1].getReg());
391 // Two-way conditional branch.
392 BuildMI(&MBB
, DL
, get(BccOpc
)).addMBB(TBB
)
393 .addImm(Cond
[0].getImm()).addReg(Cond
[1].getReg());
394 BuildMI(&MBB
, DL
, get(BOpc
)).addMBB(FBB
);
398 bool ARMBaseInstrInfo::
399 ReverseBranchCondition(SmallVectorImpl
<MachineOperand
> &Cond
) const {
400 ARMCC::CondCodes CC
= (ARMCC::CondCodes
)(int)Cond
[0].getImm();
401 Cond
[0].setImm(ARMCC::getOppositeCondition(CC
));
405 bool ARMBaseInstrInfo::
406 PredicateInstruction(MachineInstr
*MI
,
407 const SmallVectorImpl
<MachineOperand
> &Pred
) const {
408 unsigned Opc
= MI
->getOpcode();
409 if (isUncondBranchOpcode(Opc
)) {
410 MI
->setDesc(get(getMatchingCondBranchOpcode(Opc
)));
411 MI
->addOperand(MachineOperand::CreateImm(Pred
[0].getImm()));
412 MI
->addOperand(MachineOperand::CreateReg(Pred
[1].getReg(), false));
416 int PIdx
= MI
->findFirstPredOperandIdx();
418 MachineOperand
&PMO
= MI
->getOperand(PIdx
);
419 PMO
.setImm(Pred
[0].getImm());
420 MI
->getOperand(PIdx
+1).setReg(Pred
[1].getReg());
426 bool ARMBaseInstrInfo::
427 SubsumesPredicate(const SmallVectorImpl
<MachineOperand
> &Pred1
,
428 const SmallVectorImpl
<MachineOperand
> &Pred2
) const {
429 if (Pred1
.size() > 2 || Pred2
.size() > 2)
432 ARMCC::CondCodes CC1
= (ARMCC::CondCodes
)Pred1
[0].getImm();
433 ARMCC::CondCodes CC2
= (ARMCC::CondCodes
)Pred2
[0].getImm();
443 return CC2
== ARMCC::HI
;
445 return CC2
== ARMCC::LO
|| CC2
== ARMCC::EQ
;
447 return CC2
== ARMCC::GT
;
449 return CC2
== ARMCC::LT
;
453 bool ARMBaseInstrInfo::DefinesPredicate(MachineInstr
*MI
,
454 std::vector
<MachineOperand
> &Pred
) const {
455 // FIXME: This confuses implicit_def with optional CPSR def.
456 const TargetInstrDesc
&TID
= MI
->getDesc();
457 if (!TID
.getImplicitDefs() && !TID
.hasOptionalDef())
461 for (unsigned i
= 0, e
= MI
->getNumOperands(); i
!= e
; ++i
) {
462 const MachineOperand
&MO
= MI
->getOperand(i
);
463 if (MO
.isReg() && MO
.getReg() == ARM::CPSR
) {
472 /// isPredicable - Return true if the specified instruction can be predicated.
473 /// By default, this returns true for every instruction with a
474 /// PredicateOperand.
475 bool ARMBaseInstrInfo::isPredicable(MachineInstr
*MI
) const {
476 const TargetInstrDesc
&TID
= MI
->getDesc();
477 if (!TID
.isPredicable())
480 if ((TID
.TSFlags
& ARMII::DomainMask
) == ARMII::DomainNEON
) {
481 ARMFunctionInfo
*AFI
=
482 MI
->getParent()->getParent()->getInfo
<ARMFunctionInfo
>();
483 return AFI
->isThumb2Function();
488 /// FIXME: Works around a gcc miscompilation with -fstrict-aliasing.
489 LLVM_ATTRIBUTE_NOINLINE
490 static unsigned getNumJTEntries(const std::vector
<MachineJumpTableEntry
> &JT
,
492 static unsigned getNumJTEntries(const std::vector
<MachineJumpTableEntry
> &JT
,
494 assert(JTI
< JT
.size());
495 return JT
[JTI
].MBBs
.size();
498 /// GetInstSize - Return the size of the specified MachineInstr.
500 unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr
*MI
) const {
501 const MachineBasicBlock
&MBB
= *MI
->getParent();
502 const MachineFunction
*MF
= MBB
.getParent();
503 const MCAsmInfo
*MAI
= MF
->getTarget().getMCAsmInfo();
505 // Basic size info comes from the TSFlags field.
506 const TargetInstrDesc
&TID
= MI
->getDesc();
507 uint64_t TSFlags
= TID
.TSFlags
;
509 unsigned Opc
= MI
->getOpcode();
510 switch ((TSFlags
& ARMII::SizeMask
) >> ARMII::SizeShift
) {
512 // If this machine instr is an inline asm, measure it.
513 if (MI
->getOpcode() == ARM::INLINEASM
)
514 return getInlineAsmLength(MI
->getOperand(0).getSymbolName(), *MAI
);
519 llvm_unreachable("Unknown or unset size field for instr!");
520 case TargetOpcode::IMPLICIT_DEF
:
521 case TargetOpcode::KILL
:
522 case TargetOpcode::PROLOG_LABEL
:
523 case TargetOpcode::EH_LABEL
:
524 case TargetOpcode::DBG_VALUE
:
529 case ARMII::Size8Bytes
: return 8; // ARM instruction x 2.
530 case ARMII::Size4Bytes
: return 4; // ARM / Thumb2 instruction.
531 case ARMII::Size2Bytes
: return 2; // Thumb1 instruction.
532 case ARMII::SizeSpecial
: {
535 case ARM::t2MOVi32imm
:
537 case ARM::CONSTPOOL_ENTRY
:
538 // If this machine instr is a constant pool entry, its size is recorded as
540 return MI
->getOperand(2).getImm();
541 case ARM::Int_eh_sjlj_longjmp
:
543 case ARM::tInt_eh_sjlj_longjmp
:
545 case ARM::Int_eh_sjlj_setjmp
:
546 case ARM::Int_eh_sjlj_setjmp_nofp
:
548 case ARM::tInt_eh_sjlj_setjmp
:
549 case ARM::t2Int_eh_sjlj_setjmp
:
550 case ARM::t2Int_eh_sjlj_setjmp_nofp
:
559 // These are jumptable branches, i.e. a branch followed by an inlined
560 // jumptable. The size is 4 + 4 * number of entries. For TBB, each
561 // entry is one byte; TBH two byte each.
562 unsigned EntrySize
= (Opc
== ARM::t2TBB
)
563 ? 1 : ((Opc
== ARM::t2TBH
) ? 2 : 4);
564 unsigned NumOps
= TID
.getNumOperands();
565 MachineOperand JTOP
=
566 MI
->getOperand(NumOps
- (TID
.isPredicable() ? 3 : 2));
567 unsigned JTI
= JTOP
.getIndex();
568 const MachineJumpTableInfo
*MJTI
= MF
->getJumpTableInfo();
570 const std::vector
<MachineJumpTableEntry
> &JT
= MJTI
->getJumpTables();
571 assert(JTI
< JT
.size());
572 // Thumb instructions are 2 byte aligned, but JT entries are 4 byte
573 // 4 aligned. The assembler / linker may add 2 byte padding just before
574 // the JT entries. The size does not include this padding; the
575 // constant islands pass does separate bookkeeping for it.
576 // FIXME: If we know the size of the function is less than (1 << 16) *2
577 // bytes, we can use 16-bit entries instead. Then there won't be an
579 unsigned InstSize
= (Opc
== ARM::tBR_JTr
|| Opc
== ARM::t2BR_JT
) ? 2 : 4;
580 unsigned NumEntries
= getNumJTEntries(JT
, JTI
);
581 if (Opc
== ARM::t2TBB
&& (NumEntries
& 1))
582 // Make sure the instruction that follows TBB is 2-byte aligned.
583 // FIXME: Constant island pass should insert an "ALIGN" instruction
586 return NumEntries
* EntrySize
+ InstSize
;
589 // Otherwise, pseudo-instruction sizes are zero.
594 return 0; // Not reached
597 void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock
&MBB
,
598 MachineBasicBlock::iterator I
, DebugLoc DL
,
599 unsigned DestReg
, unsigned SrcReg
,
600 bool KillSrc
) const {
601 bool GPRDest
= ARM::GPRRegClass
.contains(DestReg
);
602 bool GPRSrc
= ARM::GPRRegClass
.contains(SrcReg
);
604 if (GPRDest
&& GPRSrc
) {
605 AddDefaultCC(AddDefaultPred(BuildMI(MBB
, I
, DL
, get(ARM::MOVr
), DestReg
)
606 .addReg(SrcReg
, getKillRegState(KillSrc
))));
610 bool SPRDest
= ARM::SPRRegClass
.contains(DestReg
);
611 bool SPRSrc
= ARM::SPRRegClass
.contains(SrcReg
);
614 if (SPRDest
&& SPRSrc
)
616 else if (GPRDest
&& SPRSrc
)
618 else if (SPRDest
&& GPRSrc
)
620 else if (ARM::DPRRegClass
.contains(DestReg
, SrcReg
))
622 else if (ARM::QPRRegClass
.contains(DestReg
, SrcReg
))
624 else if (ARM::QQPRRegClass
.contains(DestReg
, SrcReg
))
626 else if (ARM::QQQQPRRegClass
.contains(DestReg
, SrcReg
))
629 llvm_unreachable("Impossible reg-to-reg copy");
631 MachineInstrBuilder MIB
= BuildMI(MBB
, I
, DL
, get(Opc
), DestReg
);
632 MIB
.addReg(SrcReg
, getKillRegState(KillSrc
));
633 if (Opc
!= ARM::VMOVQQ
&& Opc
!= ARM::VMOVQQQQ
)
638 MachineInstrBuilder
&AddDReg(MachineInstrBuilder
&MIB
,
639 unsigned Reg
, unsigned SubIdx
, unsigned State
,
640 const TargetRegisterInfo
*TRI
) {
642 return MIB
.addReg(Reg
, State
);
644 if (TargetRegisterInfo::isPhysicalRegister(Reg
))
645 return MIB
.addReg(TRI
->getSubReg(Reg
, SubIdx
), State
);
646 return MIB
.addReg(Reg
, State
, SubIdx
);
649 void ARMBaseInstrInfo::
650 storeRegToStackSlot(MachineBasicBlock
&MBB
, MachineBasicBlock::iterator I
,
651 unsigned SrcReg
, bool isKill
, int FI
,
652 const TargetRegisterClass
*RC
,
653 const TargetRegisterInfo
*TRI
) const {
655 if (I
!= MBB
.end()) DL
= I
->getDebugLoc();
656 MachineFunction
&MF
= *MBB
.getParent();
657 MachineFrameInfo
&MFI
= *MF
.getFrameInfo();
658 unsigned Align
= MFI
.getObjectAlignment(FI
);
660 MachineMemOperand
*MMO
=
661 MF
.getMachineMemOperand(MachinePointerInfo(
662 PseudoSourceValue::getFixedStack(FI
)),
663 MachineMemOperand::MOStore
,
664 MFI
.getObjectSize(FI
),
667 // tGPR is used sometimes in ARM instructions that need to avoid using
668 // certain registers. Just treat it as GPR here. Likewise, rGPR.
669 if (RC
== ARM::tGPRRegisterClass
|| RC
== ARM::tcGPRRegisterClass
670 || RC
== ARM::rGPRRegisterClass
)
671 RC
= ARM::GPRRegisterClass
;
673 switch (RC
->getID()) {
674 case ARM::GPRRegClassID
:
675 AddDefaultPred(BuildMI(MBB
, I
, DL
, get(ARM::STRi12
))
676 .addReg(SrcReg
, getKillRegState(isKill
))
677 .addFrameIndex(FI
).addImm(0).addMemOperand(MMO
));
679 case ARM::SPRRegClassID
:
680 AddDefaultPred(BuildMI(MBB
, I
, DL
, get(ARM::VSTRS
))
681 .addReg(SrcReg
, getKillRegState(isKill
))
682 .addFrameIndex(FI
).addImm(0).addMemOperand(MMO
));
684 case ARM::DPRRegClassID
:
685 case ARM::DPR_VFP2RegClassID
:
686 case ARM::DPR_8RegClassID
:
687 AddDefaultPred(BuildMI(MBB
, I
, DL
, get(ARM::VSTRD
))
688 .addReg(SrcReg
, getKillRegState(isKill
))
689 .addFrameIndex(FI
).addImm(0).addMemOperand(MMO
));
691 case ARM::QPRRegClassID
:
692 case ARM::QPR_VFP2RegClassID
:
693 case ARM::QPR_8RegClassID
:
694 if (Align
>= 16 && getRegisterInfo().needsStackRealignment(MF
)) {
695 AddDefaultPred(BuildMI(MBB
, I
, DL
, get(ARM::VST1q64Pseudo
))
696 .addFrameIndex(FI
).addImm(16)
697 .addReg(SrcReg
, getKillRegState(isKill
))
698 .addMemOperand(MMO
));
700 AddDefaultPred(BuildMI(MBB
, I
, DL
, get(ARM::VSTMQ
))
701 .addReg(SrcReg
, getKillRegState(isKill
))
703 .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia
))
704 .addMemOperand(MMO
));
707 case ARM::QQPRRegClassID
:
708 case ARM::QQPR_VFP2RegClassID
:
709 if (Align
>= 16 && getRegisterInfo().canRealignStack(MF
)) {
710 // FIXME: It's possible to only store part of the QQ register if the
711 // spilled def has a sub-register index.
712 AddDefaultPred(BuildMI(MBB
, I
, DL
, get(ARM::VST1d64QPseudo
))
713 .addFrameIndex(FI
).addImm(16)
714 .addReg(SrcReg
, getKillRegState(isKill
))
715 .addMemOperand(MMO
));
717 MachineInstrBuilder MIB
=
718 AddDefaultPred(BuildMI(MBB
, I
, DL
, get(ARM::VSTMD
))
720 .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia
)))
722 MIB
= AddDReg(MIB
, SrcReg
, ARM::dsub_0
, getKillRegState(isKill
), TRI
);
723 MIB
= AddDReg(MIB
, SrcReg
, ARM::dsub_1
, 0, TRI
);
724 MIB
= AddDReg(MIB
, SrcReg
, ARM::dsub_2
, 0, TRI
);
725 AddDReg(MIB
, SrcReg
, ARM::dsub_3
, 0, TRI
);
728 case ARM::QQQQPRRegClassID
: {
729 MachineInstrBuilder MIB
=
730 AddDefaultPred(BuildMI(MBB
, I
, DL
, get(ARM::VSTMD
))
732 .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia
)))
734 MIB
= AddDReg(MIB
, SrcReg
, ARM::dsub_0
, getKillRegState(isKill
), TRI
);
735 MIB
= AddDReg(MIB
, SrcReg
, ARM::dsub_1
, 0, TRI
);
736 MIB
= AddDReg(MIB
, SrcReg
, ARM::dsub_2
, 0, TRI
);
737 MIB
= AddDReg(MIB
, SrcReg
, ARM::dsub_3
, 0, TRI
);
738 MIB
= AddDReg(MIB
, SrcReg
, ARM::dsub_4
, 0, TRI
);
739 MIB
= AddDReg(MIB
, SrcReg
, ARM::dsub_5
, 0, TRI
);
740 MIB
= AddDReg(MIB
, SrcReg
, ARM::dsub_6
, 0, TRI
);
741 AddDReg(MIB
, SrcReg
, ARM::dsub_7
, 0, TRI
);
745 llvm_unreachable("Unknown regclass!");
750 ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr
*MI
,
751 int &FrameIndex
) const {
752 switch (MI
->getOpcode()) {
755 case ARM::t2STRs
: // FIXME: don't use t2STRs to access frame.
756 if (MI
->getOperand(1).isFI() &&
757 MI
->getOperand(2).isReg() &&
758 MI
->getOperand(3).isImm() &&
759 MI
->getOperand(2).getReg() == 0 &&
760 MI
->getOperand(3).getImm() == 0) {
761 FrameIndex
= MI
->getOperand(1).getIndex();
762 return MI
->getOperand(0).getReg();
770 if (MI
->getOperand(1).isFI() &&
771 MI
->getOperand(2).isImm() &&
772 MI
->getOperand(2).getImm() == 0) {
773 FrameIndex
= MI
->getOperand(1).getIndex();
774 return MI
->getOperand(0).getReg();
777 case ARM::VST1q64Pseudo
:
778 if (MI
->getOperand(0).isFI() &&
779 MI
->getOperand(2).getSubReg() == 0) {
780 FrameIndex
= MI
->getOperand(0).getIndex();
781 return MI
->getOperand(2).getReg();
785 if (MI
->getOperand(1).isFI() &&
786 MI
->getOperand(2).isImm() &&
787 MI
->getOperand(2).getImm() == ARM_AM::getAM4ModeImm(ARM_AM::ia
) &&
788 MI
->getOperand(0).getSubReg() == 0) {
789 FrameIndex
= MI
->getOperand(1).getIndex();
790 return MI
->getOperand(0).getReg();
798 void ARMBaseInstrInfo::
799 loadRegFromStackSlot(MachineBasicBlock
&MBB
, MachineBasicBlock::iterator I
,
800 unsigned DestReg
, int FI
,
801 const TargetRegisterClass
*RC
,
802 const TargetRegisterInfo
*TRI
) const {
804 if (I
!= MBB
.end()) DL
= I
->getDebugLoc();
805 MachineFunction
&MF
= *MBB
.getParent();
806 MachineFrameInfo
&MFI
= *MF
.getFrameInfo();
807 unsigned Align
= MFI
.getObjectAlignment(FI
);
808 MachineMemOperand
*MMO
=
809 MF
.getMachineMemOperand(
810 MachinePointerInfo(PseudoSourceValue::getFixedStack(FI
)),
811 MachineMemOperand::MOLoad
,
812 MFI
.getObjectSize(FI
),
815 // tGPR is used sometimes in ARM instructions that need to avoid using
816 // certain registers. Just treat it as GPR here.
817 if (RC
== ARM::tGPRRegisterClass
|| RC
== ARM::tcGPRRegisterClass
818 || RC
== ARM::rGPRRegisterClass
)
819 RC
= ARM::GPRRegisterClass
;
821 switch (RC
->getID()) {
822 case ARM::GPRRegClassID
:
823 AddDefaultPred(BuildMI(MBB
, I
, DL
, get(ARM::LDRi12
), DestReg
)
824 .addFrameIndex(FI
).addImm(0).addMemOperand(MMO
));
826 case ARM::SPRRegClassID
:
827 AddDefaultPred(BuildMI(MBB
, I
, DL
, get(ARM::VLDRS
), DestReg
)
828 .addFrameIndex(FI
).addImm(0).addMemOperand(MMO
));
830 case ARM::DPRRegClassID
:
831 case ARM::DPR_VFP2RegClassID
:
832 case ARM::DPR_8RegClassID
:
833 AddDefaultPred(BuildMI(MBB
, I
, DL
, get(ARM::VLDRD
), DestReg
)
834 .addFrameIndex(FI
).addImm(0).addMemOperand(MMO
));
836 case ARM::QPRRegClassID
:
837 case ARM::QPR_VFP2RegClassID
:
838 case ARM::QPR_8RegClassID
:
839 if (Align
>= 16 && getRegisterInfo().needsStackRealignment(MF
)) {
840 AddDefaultPred(BuildMI(MBB
, I
, DL
, get(ARM::VLD1q64Pseudo
), DestReg
)
841 .addFrameIndex(FI
).addImm(16)
842 .addMemOperand(MMO
));
844 AddDefaultPred(BuildMI(MBB
, I
, DL
, get(ARM::VLDMQ
), DestReg
)
846 .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia
))
847 .addMemOperand(MMO
));
850 case ARM::QQPRRegClassID
:
851 case ARM::QQPR_VFP2RegClassID
:
852 if (Align
>= 16 && getRegisterInfo().canRealignStack(MF
)) {
853 AddDefaultPred(BuildMI(MBB
, I
, DL
, get(ARM::VLD1d64QPseudo
), DestReg
)
854 .addFrameIndex(FI
).addImm(16)
855 .addMemOperand(MMO
));
857 MachineInstrBuilder MIB
=
858 AddDefaultPred(BuildMI(MBB
, I
, DL
, get(ARM::VLDMD
))
860 .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia
)))
862 MIB
= AddDReg(MIB
, DestReg
, ARM::dsub_0
, RegState::Define
, TRI
);
863 MIB
= AddDReg(MIB
, DestReg
, ARM::dsub_1
, RegState::Define
, TRI
);
864 MIB
= AddDReg(MIB
, DestReg
, ARM::dsub_2
, RegState::Define
, TRI
);
865 AddDReg(MIB
, DestReg
, ARM::dsub_3
, RegState::Define
, TRI
);
868 case ARM::QQQQPRRegClassID
: {
869 MachineInstrBuilder MIB
=
870 AddDefaultPred(BuildMI(MBB
, I
, DL
, get(ARM::VLDMD
))
872 .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia
)))
874 MIB
= AddDReg(MIB
, DestReg
, ARM::dsub_0
, RegState::Define
, TRI
);
875 MIB
= AddDReg(MIB
, DestReg
, ARM::dsub_1
, RegState::Define
, TRI
);
876 MIB
= AddDReg(MIB
, DestReg
, ARM::dsub_2
, RegState::Define
, TRI
);
877 MIB
= AddDReg(MIB
, DestReg
, ARM::dsub_3
, RegState::Define
, TRI
);
878 MIB
= AddDReg(MIB
, DestReg
, ARM::dsub_4
, RegState::Define
, TRI
);
879 MIB
= AddDReg(MIB
, DestReg
, ARM::dsub_5
, RegState::Define
, TRI
);
880 MIB
= AddDReg(MIB
, DestReg
, ARM::dsub_6
, RegState::Define
, TRI
);
881 AddDReg(MIB
, DestReg
, ARM::dsub_7
, RegState::Define
, TRI
);
885 llvm_unreachable("Unknown regclass!");
890 ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr
*MI
,
891 int &FrameIndex
) const {
892 switch (MI
->getOpcode()) {
895 case ARM::t2LDRs
: // FIXME: don't use t2LDRs to access frame.
896 if (MI
->getOperand(1).isFI() &&
897 MI
->getOperand(2).isReg() &&
898 MI
->getOperand(3).isImm() &&
899 MI
->getOperand(2).getReg() == 0 &&
900 MI
->getOperand(3).getImm() == 0) {
901 FrameIndex
= MI
->getOperand(1).getIndex();
902 return MI
->getOperand(0).getReg();
910 if (MI
->getOperand(1).isFI() &&
911 MI
->getOperand(2).isImm() &&
912 MI
->getOperand(2).getImm() == 0) {
913 FrameIndex
= MI
->getOperand(1).getIndex();
914 return MI
->getOperand(0).getReg();
917 case ARM::VLD1q64Pseudo
:
918 if (MI
->getOperand(1).isFI() &&
919 MI
->getOperand(0).getSubReg() == 0) {
920 FrameIndex
= MI
->getOperand(1).getIndex();
921 return MI
->getOperand(0).getReg();
925 if (MI
->getOperand(1).isFI() &&
926 MI
->getOperand(2).isImm() &&
927 MI
->getOperand(2).getImm() == ARM_AM::getAM4ModeImm(ARM_AM::ia
) &&
928 MI
->getOperand(0).getSubReg() == 0) {
929 FrameIndex
= MI
->getOperand(1).getIndex();
930 return MI
->getOperand(0).getReg();
939 ARMBaseInstrInfo::emitFrameIndexDebugValue(MachineFunction
&MF
,
940 int FrameIx
, uint64_t Offset
,
943 MachineInstrBuilder MIB
= BuildMI(MF
, DL
, get(ARM::DBG_VALUE
))
944 .addFrameIndex(FrameIx
).addImm(0).addImm(Offset
).addMetadata(MDPtr
);
948 /// Create a copy of a const pool value. Update CPI to the new index and return
950 static unsigned duplicateCPV(MachineFunction
&MF
, unsigned &CPI
) {
951 MachineConstantPool
*MCP
= MF
.getConstantPool();
952 ARMFunctionInfo
*AFI
= MF
.getInfo
<ARMFunctionInfo
>();
954 const MachineConstantPoolEntry
&MCPE
= MCP
->getConstants()[CPI
];
955 assert(MCPE
.isMachineConstantPoolEntry() &&
956 "Expecting a machine constantpool entry!");
957 ARMConstantPoolValue
*ACPV
=
958 static_cast<ARMConstantPoolValue
*>(MCPE
.Val
.MachineCPVal
);
960 unsigned PCLabelId
= AFI
->createConstPoolEntryUId();
961 ARMConstantPoolValue
*NewCPV
= 0;
962 // FIXME: The below assumes PIC relocation model and that the function
963 // is Thumb mode (t1 or t2). PCAdjustment would be 8 for ARM mode PIC, and
964 // zero for non-PIC in ARM or Thumb. The callers are all of thumb LDR
965 // instructions, so that's probably OK, but is PIC always correct when
967 if (ACPV
->isGlobalValue())
968 NewCPV
= new ARMConstantPoolValue(ACPV
->getGV(), PCLabelId
,
970 else if (ACPV
->isExtSymbol())
971 NewCPV
= new ARMConstantPoolValue(MF
.getFunction()->getContext(),
972 ACPV
->getSymbol(), PCLabelId
, 4);
973 else if (ACPV
->isBlockAddress())
974 NewCPV
= new ARMConstantPoolValue(ACPV
->getBlockAddress(), PCLabelId
,
975 ARMCP::CPBlockAddress
, 4);
976 else if (ACPV
->isLSDA())
977 NewCPV
= new ARMConstantPoolValue(MF
.getFunction(), PCLabelId
,
980 llvm_unreachable("Unexpected ARM constantpool value type!!");
981 CPI
= MCP
->getConstantPoolIndex(NewCPV
, MCPE
.getAlignment());
985 void ARMBaseInstrInfo::
986 reMaterialize(MachineBasicBlock
&MBB
,
987 MachineBasicBlock::iterator I
,
988 unsigned DestReg
, unsigned SubIdx
,
989 const MachineInstr
*Orig
,
990 const TargetRegisterInfo
&TRI
) const {
991 unsigned Opcode
= Orig
->getOpcode();
994 MachineInstr
*MI
= MBB
.getParent()->CloneMachineInstr(Orig
);
995 MI
->substituteRegister(Orig
->getOperand(0).getReg(), DestReg
, SubIdx
, TRI
);
999 case ARM::tLDRpci_pic
:
1000 case ARM::t2LDRpci_pic
: {
1001 MachineFunction
&MF
= *MBB
.getParent();
1002 unsigned CPI
= Orig
->getOperand(1).getIndex();
1003 unsigned PCLabelId
= duplicateCPV(MF
, CPI
);
1004 MachineInstrBuilder MIB
= BuildMI(MBB
, I
, Orig
->getDebugLoc(), get(Opcode
),
1006 .addConstantPoolIndex(CPI
).addImm(PCLabelId
);
1007 (*MIB
).setMemRefs(Orig
->memoperands_begin(), Orig
->memoperands_end());
1014 ARMBaseInstrInfo::duplicate(MachineInstr
*Orig
, MachineFunction
&MF
) const {
1015 MachineInstr
*MI
= TargetInstrInfoImpl::duplicate(Orig
, MF
);
1016 switch(Orig
->getOpcode()) {
1017 case ARM::tLDRpci_pic
:
1018 case ARM::t2LDRpci_pic
: {
1019 unsigned CPI
= Orig
->getOperand(1).getIndex();
1020 unsigned PCLabelId
= duplicateCPV(MF
, CPI
);
1021 Orig
->getOperand(1).setIndex(CPI
);
1022 Orig
->getOperand(2).setImm(PCLabelId
);
1029 bool ARMBaseInstrInfo::produceSameValue(const MachineInstr
*MI0
,
1030 const MachineInstr
*MI1
) const {
1031 int Opcode
= MI0
->getOpcode();
1032 if (Opcode
== ARM::t2LDRpci
||
1033 Opcode
== ARM::t2LDRpci_pic
||
1034 Opcode
== ARM::tLDRpci
||
1035 Opcode
== ARM::tLDRpci_pic
) {
1036 if (MI1
->getOpcode() != Opcode
)
1038 if (MI0
->getNumOperands() != MI1
->getNumOperands())
1041 const MachineOperand
&MO0
= MI0
->getOperand(1);
1042 const MachineOperand
&MO1
= MI1
->getOperand(1);
1043 if (MO0
.getOffset() != MO1
.getOffset())
1046 const MachineFunction
*MF
= MI0
->getParent()->getParent();
1047 const MachineConstantPool
*MCP
= MF
->getConstantPool();
1048 int CPI0
= MO0
.getIndex();
1049 int CPI1
= MO1
.getIndex();
1050 const MachineConstantPoolEntry
&MCPE0
= MCP
->getConstants()[CPI0
];
1051 const MachineConstantPoolEntry
&MCPE1
= MCP
->getConstants()[CPI1
];
1052 ARMConstantPoolValue
*ACPV0
=
1053 static_cast<ARMConstantPoolValue
*>(MCPE0
.Val
.MachineCPVal
);
1054 ARMConstantPoolValue
*ACPV1
=
1055 static_cast<ARMConstantPoolValue
*>(MCPE1
.Val
.MachineCPVal
);
1056 return ACPV0
->hasSameValue(ACPV1
);
1059 return MI0
->isIdenticalTo(MI1
, MachineInstr::IgnoreVRegDefs
);
1062 /// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to
1063 /// determine if two loads are loading from the same base address. It should
1064 /// only return true if the base pointers are the same and the only differences
1065 /// between the two addresses is the offset. It also returns the offsets by
1067 bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode
*Load1
, SDNode
*Load2
,
1069 int64_t &Offset2
) const {
1070 // Don't worry about Thumb: just ARM and Thumb2.
1071 if (Subtarget
.isThumb1Only()) return false;
1073 if (!Load1
->isMachineOpcode() || !Load2
->isMachineOpcode())
1076 switch (Load1
->getMachineOpcode()) {
1089 case ARM::t2LDRSHi8
:
1091 case ARM::t2LDRSHi12
:
1095 switch (Load2
->getMachineOpcode()) {
1108 case ARM::t2LDRSHi8
:
1110 case ARM::t2LDRSHi12
:
1114 // Check if base addresses and chain operands match.
1115 if (Load1
->getOperand(0) != Load2
->getOperand(0) ||
1116 Load1
->getOperand(4) != Load2
->getOperand(4))
1119 // Index should be Reg0.
1120 if (Load1
->getOperand(3) != Load2
->getOperand(3))
1123 // Determine the offsets.
1124 if (isa
<ConstantSDNode
>(Load1
->getOperand(1)) &&
1125 isa
<ConstantSDNode
>(Load2
->getOperand(1))) {
1126 Offset1
= cast
<ConstantSDNode
>(Load1
->getOperand(1))->getSExtValue();
1127 Offset2
= cast
<ConstantSDNode
>(Load2
->getOperand(1))->getSExtValue();
1134 /// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to
1135 /// determine (in conjuction with areLoadsFromSameBasePtr) if two loads should
1136 /// be scheduled togther. On some targets if two loads are loading from
1137 /// addresses in the same cache line, it's better if they are scheduled
1138 /// together. This function takes two integers that represent the load offsets
1139 /// from the common base address. It returns true if it decides it's desirable
1140 /// to schedule the two loads together. "NumLoads" is the number of loads that
1141 /// have already been scheduled after Load1.
1142 bool ARMBaseInstrInfo::shouldScheduleLoadsNear(SDNode
*Load1
, SDNode
*Load2
,
1143 int64_t Offset1
, int64_t Offset2
,
1144 unsigned NumLoads
) const {
1145 // Don't worry about Thumb: just ARM and Thumb2.
1146 if (Subtarget
.isThumb1Only()) return false;
1148 assert(Offset2
> Offset1
);
1150 if ((Offset2
- Offset1
) / 8 > 64)
1153 if (Load1
->getMachineOpcode() != Load2
->getMachineOpcode())
1154 return false; // FIXME: overly conservative?
1156 // Four loads in a row should be sufficient.
1163 bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr
*MI
,
1164 const MachineBasicBlock
*MBB
,
1165 const MachineFunction
&MF
) const {
1166 // Debug info is never a scheduling boundary. It's necessary to be explicit
1167 // due to the special treatment of IT instructions below, otherwise a
1168 // dbg_value followed by an IT will result in the IT instruction being
1169 // considered a scheduling hazard, which is wrong. It should be the actual
1170 // instruction preceding the dbg_value instruction(s), just like it is
1171 // when debug info is not present.
1172 if (MI
->isDebugValue())
1175 // Terminators and labels can't be scheduled around.
1176 if (MI
->getDesc().isTerminator() || MI
->isLabel())
1179 // Treat the start of the IT block as a scheduling boundary, but schedule
1180 // t2IT along with all instructions following it.
1181 // FIXME: This is a big hammer. But the alternative is to add all potential
1182 // true and anti dependencies to IT block instructions as implicit operands
1183 // to the t2IT instruction. The added compile time and complexity does not
1185 MachineBasicBlock::const_iterator I
= MI
;
1186 // Make sure to skip any dbg_value instructions
1187 while (++I
!= MBB
->end() && I
->isDebugValue())
1189 if (I
!= MBB
->end() && I
->getOpcode() == ARM::t2IT
)
1192 // Don't attempt to schedule around any instruction that defines
1193 // a stack-oriented pointer, as it's unlikely to be profitable. This
1194 // saves compile time, because it doesn't require every single
1195 // stack slot reference to depend on the instruction that does the
1197 if (MI
->definesRegister(ARM::SP
))
1203 bool ARMBaseInstrInfo::isProfitableToIfCvt(MachineBasicBlock
&MBB
,
1205 unsigned ExtraPredCycles
,
1207 float Confidence
) const {
1211 // Attempt to estimate the relative costs of predication versus branching.
1212 float UnpredCost
= Probability
* NumCyles
;
1213 UnpredCost
+= 1.0; // The branch itself
1214 UnpredCost
+= (1.0 - Confidence
) * Subtarget
.getMispredictionPenalty();
1216 return (float)(NumCyles
+ ExtraPredCycles
) < UnpredCost
;
1219 bool ARMBaseInstrInfo::
1220 isProfitableToIfCvt(MachineBasicBlock
&TMBB
,
1221 unsigned TCycles
, unsigned TExtra
,
1222 MachineBasicBlock
&FMBB
,
1223 unsigned FCycles
, unsigned FExtra
,
1224 float Probability
, float Confidence
) const {
1225 if (!TCycles
|| !FCycles
)
1228 // Attempt to estimate the relative costs of predication versus branching.
1229 float UnpredCost
= Probability
* TCycles
+ (1.0 - Probability
) * FCycles
;
1230 UnpredCost
+= 1.0; // The branch itself
1231 UnpredCost
+= (1.0 - Confidence
) * Subtarget
.getMispredictionPenalty();
1233 return (float)(TCycles
+ FCycles
+ TExtra
+ FExtra
) < UnpredCost
;
1236 /// getInstrPredicate - If instruction is predicated, returns its predicate
1237 /// condition, otherwise returns AL. It also returns the condition code
1238 /// register by reference.
1240 llvm::getInstrPredicate(const MachineInstr
*MI
, unsigned &PredReg
) {
1241 int PIdx
= MI
->findFirstPredOperandIdx();
1247 PredReg
= MI
->getOperand(PIdx
+1).getReg();
1248 return (ARMCC::CondCodes
)MI
->getOperand(PIdx
).getImm();
1252 int llvm::getMatchingCondBranchOpcode(int Opc
) {
1255 else if (Opc
== ARM::tB
)
1257 else if (Opc
== ARM::t2B
)
1260 llvm_unreachable("Unknown unconditional branch opcode!");
1265 void llvm::emitARMRegPlusImmediate(MachineBasicBlock
&MBB
,
1266 MachineBasicBlock::iterator
&MBBI
, DebugLoc dl
,
1267 unsigned DestReg
, unsigned BaseReg
, int NumBytes
,
1268 ARMCC::CondCodes Pred
, unsigned PredReg
,
1269 const ARMBaseInstrInfo
&TII
) {
1270 bool isSub
= NumBytes
< 0;
1271 if (isSub
) NumBytes
= -NumBytes
;
1274 unsigned RotAmt
= ARM_AM::getSOImmValRotate(NumBytes
);
1275 unsigned ThisVal
= NumBytes
& ARM_AM::rotr32(0xFF, RotAmt
);
1276 assert(ThisVal
&& "Didn't extract field correctly");
1278 // We will handle these bits from offset, clear them.
1279 NumBytes
&= ~ThisVal
;
1281 assert(ARM_AM::getSOImmVal(ThisVal
) != -1 && "Bit extraction didn't work?");
1283 // Build the new ADD / SUB.
1284 unsigned Opc
= isSub
? ARM::SUBri
: ARM::ADDri
;
1285 BuildMI(MBB
, MBBI
, dl
, TII
.get(Opc
), DestReg
)
1286 .addReg(BaseReg
, RegState::Kill
).addImm(ThisVal
)
1287 .addImm((unsigned)Pred
).addReg(PredReg
).addReg(0);
1292 bool llvm::rewriteARMFrameIndex(MachineInstr
&MI
, unsigned FrameRegIdx
,
1293 unsigned FrameReg
, int &Offset
,
1294 const ARMBaseInstrInfo
&TII
) {
1295 unsigned Opcode
= MI
.getOpcode();
1296 const TargetInstrDesc
&Desc
= MI
.getDesc();
1297 unsigned AddrMode
= (Desc
.TSFlags
& ARMII::AddrModeMask
);
1300 // Memory operands in inline assembly always use AddrMode2.
1301 if (Opcode
== ARM::INLINEASM
)
1302 AddrMode
= ARMII::AddrMode2
;
1304 if (Opcode
== ARM::ADDri
) {
1305 Offset
+= MI
.getOperand(FrameRegIdx
+1).getImm();
1307 // Turn it into a move.
1308 MI
.setDesc(TII
.get(ARM::MOVr
));
1309 MI
.getOperand(FrameRegIdx
).ChangeToRegister(FrameReg
, false);
1310 MI
.RemoveOperand(FrameRegIdx
+1);
1313 } else if (Offset
< 0) {
1316 MI
.setDesc(TII
.get(ARM::SUBri
));
1319 // Common case: small offset, fits into instruction.
1320 if (ARM_AM::getSOImmVal(Offset
) != -1) {
1321 // Replace the FrameIndex with sp / fp
1322 MI
.getOperand(FrameRegIdx
).ChangeToRegister(FrameReg
, false);
1323 MI
.getOperand(FrameRegIdx
+1).ChangeToImmediate(Offset
);
1328 // Otherwise, pull as much of the immedidate into this ADDri/SUBri
1330 unsigned RotAmt
= ARM_AM::getSOImmValRotate(Offset
);
1331 unsigned ThisImmVal
= Offset
& ARM_AM::rotr32(0xFF, RotAmt
);
1333 // We will handle these bits from offset, clear them.
1334 Offset
&= ~ThisImmVal
;
1336 // Get the properly encoded SOImmVal field.
1337 assert(ARM_AM::getSOImmVal(ThisImmVal
) != -1 &&
1338 "Bit extraction didn't work?");
1339 MI
.getOperand(FrameRegIdx
+1).ChangeToImmediate(ThisImmVal
);
1341 unsigned ImmIdx
= 0;
1343 unsigned NumBits
= 0;
1346 case ARMII::AddrMode_i12
: {
1347 ImmIdx
= FrameRegIdx
+ 1;
1348 InstrOffs
= MI
.getOperand(ImmIdx
).getImm();
1352 case ARMII::AddrMode2
: {
1353 ImmIdx
= FrameRegIdx
+2;
1354 InstrOffs
= ARM_AM::getAM2Offset(MI
.getOperand(ImmIdx
).getImm());
1355 if (ARM_AM::getAM2Op(MI
.getOperand(ImmIdx
).getImm()) == ARM_AM::sub
)
1360 case ARMII::AddrMode3
: {
1361 ImmIdx
= FrameRegIdx
+2;
1362 InstrOffs
= ARM_AM::getAM3Offset(MI
.getOperand(ImmIdx
).getImm());
1363 if (ARM_AM::getAM3Op(MI
.getOperand(ImmIdx
).getImm()) == ARM_AM::sub
)
1368 case ARMII::AddrMode4
:
1369 case ARMII::AddrMode6
:
1370 // Can't fold any offset even if it's zero.
1372 case ARMII::AddrMode5
: {
1373 ImmIdx
= FrameRegIdx
+1;
1374 InstrOffs
= ARM_AM::getAM5Offset(MI
.getOperand(ImmIdx
).getImm());
1375 if (ARM_AM::getAM5Op(MI
.getOperand(ImmIdx
).getImm()) == ARM_AM::sub
)
1382 llvm_unreachable("Unsupported addressing mode!");
1386 Offset
+= InstrOffs
* Scale
;
1387 assert((Offset
& (Scale
-1)) == 0 && "Can't encode this offset!");
1393 // Attempt to fold address comp. if opcode has offset bits
1395 // Common case: small offset, fits into instruction.
1396 MachineOperand
&ImmOp
= MI
.getOperand(ImmIdx
);
1397 int ImmedOffset
= Offset
/ Scale
;
1398 unsigned Mask
= (1 << NumBits
) - 1;
1399 if ((unsigned)Offset
<= Mask
* Scale
) {
1400 // Replace the FrameIndex with sp
1401 MI
.getOperand(FrameRegIdx
).ChangeToRegister(FrameReg
, false);
1402 // FIXME: When addrmode2 goes away, this will simplify (like the
1403 // T2 version), as the LDR.i12 versions don't need the encoding
1404 // tricks for the offset value.
1406 if (AddrMode
== ARMII::AddrMode_i12
)
1407 ImmedOffset
= -ImmedOffset
;
1409 ImmedOffset
|= 1 << NumBits
;
1411 ImmOp
.ChangeToImmediate(ImmedOffset
);
1416 // Otherwise, it didn't fit. Pull in what we can to simplify the immed.
1417 ImmedOffset
= ImmedOffset
& Mask
;
1419 if (AddrMode
== ARMII::AddrMode_i12
)
1420 ImmedOffset
= -ImmedOffset
;
1422 ImmedOffset
|= 1 << NumBits
;
1424 ImmOp
.ChangeToImmediate(ImmedOffset
);
1425 Offset
&= ~(Mask
*Scale
);
1429 Offset
= (isSub
) ? -Offset
: Offset
;
1433 bool ARMBaseInstrInfo::
1434 AnalyzeCompare(const MachineInstr
*MI
, unsigned &SrcReg
, int &CmpMask
,
1435 int &CmpValue
) const {
1436 switch (MI
->getOpcode()) {
1442 SrcReg
= MI
->getOperand(0).getReg();
1444 CmpValue
= MI
->getOperand(1).getImm();
1448 SrcReg
= MI
->getOperand(0).getReg();
1449 CmpMask
= MI
->getOperand(1).getImm();
1457 /// isSuitableForMask - Identify a suitable 'and' instruction that
1458 /// operates on the given source register and applies the same mask
1459 /// as a 'tst' instruction. Provide a limited look-through for copies.
1460 /// When successful, MI will hold the found instruction.
1461 static bool isSuitableForMask(MachineInstr
*&MI
, unsigned SrcReg
,
1462 int CmpMask
, bool CommonUse
) {
1463 switch (MI
->getOpcode()) {
1466 if (CmpMask
!= MI
->getOperand(2).getImm())
1468 if (SrcReg
== MI
->getOperand(CommonUse
? 1 : 0).getReg())
1472 // Walk down one instruction which is potentially an 'and'.
1473 const MachineInstr
&Copy
= *MI
;
1474 MachineBasicBlock::iterator
AND(
1475 llvm::next(MachineBasicBlock::iterator(MI
)));
1476 if (AND
== MI
->getParent()->end()) return false;
1478 return isSuitableForMask(MI
, Copy
.getOperand(0).getReg(),
1486 /// OptimizeCompareInstr - Convert the instruction supplying the argument to the
1487 /// comparison into one that sets the zero bit in the flags register. Update the
1488 /// iterator *only* if a transformation took place.
1489 bool ARMBaseInstrInfo::
1490 OptimizeCompareInstr(MachineInstr
*CmpInstr
, unsigned SrcReg
, int CmpMask
,
1491 int CmpValue
, const MachineRegisterInfo
*MRI
,
1492 MachineBasicBlock::iterator
&MII
) const {
1496 MachineRegisterInfo::def_iterator DI
= MRI
->def_begin(SrcReg
);
1497 if (llvm::next(DI
) != MRI
->def_end())
1498 // Only support one definition.
1501 MachineInstr
*MI
= &*DI
;
1503 // Masked compares sometimes use the same register as the corresponding 'and'.
1504 if (CmpMask
!= ~0) {
1505 if (!isSuitableForMask(MI
, SrcReg
, CmpMask
, false)) {
1507 for (MachineRegisterInfo::use_iterator UI
= MRI
->use_begin(SrcReg
),
1508 UE
= MRI
->use_end(); UI
!= UE
; ++UI
) {
1509 if (UI
->getParent() != CmpInstr
->getParent()) continue;
1510 MachineInstr
*PotentialAND
= &*UI
;
1511 if (!isSuitableForMask(PotentialAND
, SrcReg
, CmpMask
, true))
1516 if (!MI
) return false;
1520 // Conservatively refuse to convert an instruction which isn't in the same BB
1521 // as the comparison.
1522 if (MI
->getParent() != CmpInstr
->getParent())
1525 // Check that CPSR isn't set between the comparison instruction and the one we
1527 MachineBasicBlock::const_iterator I
= CmpInstr
, E
= MI
,
1528 B
= MI
->getParent()->begin();
1530 // Early exit if CmpInstr is at the beginning of the BB.
1531 if (I
== B
) return false;
1534 for (; I
!= E
; --I
) {
1535 const MachineInstr
&Instr
= *I
;
1537 for (unsigned IO
= 0, EO
= Instr
.getNumOperands(); IO
!= EO
; ++IO
) {
1538 const MachineOperand
&MO
= Instr
.getOperand(IO
);
1539 if (!MO
.isReg()) continue;
1541 // This instruction modifies or uses CPSR after the one we want to
1542 // change. We can't do this transformation.
1543 if (MO
.getReg() == ARM::CPSR
)
1548 // The 'and' is below the comparison instruction.
1552 // Set the "zero" bit in CPSR.
1553 switch (MI
->getOpcode()) {
1561 MI
->RemoveOperand(5);
1562 MachineInstrBuilder(MI
)
1563 .addReg(ARM::CPSR
, RegState::Define
| RegState::Implicit
);
1564 MII
= llvm::next(MachineBasicBlock::iterator(CmpInstr
));
1565 CmpInstr
->eraseFromParent();
1573 ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData
*ItinData
,
1574 const MachineInstr
*MI
) const {
1575 if (!ItinData
|| ItinData
->isEmpty())
1578 const TargetInstrDesc
&Desc
= MI
->getDesc();
1579 unsigned Class
= Desc
.getSchedClass();
1580 unsigned UOps
= ItinData
->Itineraries
[Class
].NumMicroOps
;
1584 unsigned Opc
= MI
->getOpcode();
1587 llvm_unreachable("Unexpected multi-uops instruction!");
1593 // The number of uOps for load / store multiple are determined by the number
1595 // On Cortex-A8, each pair of register loads / stores can be scheduled on the
1596 // same cycle. The scheduling for the first load / store must be done
1597 // separately by assuming the the address is not 64-bit aligned.
1598 // On Cortex-A9, the formula is simply (#reg / 2) + (#reg % 2). If the address
1599 // is not 64-bit aligned, then AGU would take an extra cycle.
1600 // For VFP / NEON load / store multiple, the formula is
1601 // (#reg / 2) + (#reg % 2) + 1.
1604 case ARM::VLDMD_UPD
:
1605 case ARM::VLDMS_UPD
:
1608 case ARM::VSTMD_UPD
:
1609 case ARM::VSTMS_UPD
: {
1610 unsigned NumRegs
= MI
->getNumOperands() - Desc
.getNumOperands();
1611 return (NumRegs
/ 2) + (NumRegs
% 2) + 1;
1624 case ARM::t2LDM_RET
:
1626 case ARM::t2LDM_UPD
:
1628 case ARM::t2STM_UPD
: {
1629 unsigned NumRegs
= MI
->getNumOperands() - Desc
.getNumOperands() + 1;
1630 if (Subtarget
.isCortexA8()) {
1633 // 4 registers would be issued: 2, 2.
1634 // 5 registers would be issued: 2, 2, 1.
1635 UOps
= (NumRegs
/ 2);
1639 } else if (Subtarget
.isCortexA9()) {
1640 UOps
= (NumRegs
/ 2);
1641 // If there are odd number of registers or if it's not 64-bit aligned,
1642 // then it takes an extra AGU (Address Generation Unit) cycle.
1643 if ((NumRegs
% 2) ||
1644 !MI
->hasOneMemOperand() ||
1645 (*MI
->memoperands_begin())->getAlignment() < 8)
1649 // Assume the worst.
1657 ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData
*ItinData
,
1658 const TargetInstrDesc
&DefTID
,
1660 unsigned DefIdx
, unsigned DefAlign
) const {
1661 int RegNo
= (int)(DefIdx
+1) - DefTID
.getNumOperands() + 1;
1663 // Def is the address writeback.
1664 return ItinData
->getOperandCycle(DefClass
, DefIdx
);
1667 if (Subtarget
.isCortexA8()) {
1668 // (regno / 2) + (regno % 2) + 1
1669 DefCycle
= RegNo
/ 2 + 1;
1672 } else if (Subtarget
.isCortexA9()) {
1674 bool isSLoad
= false;
1675 switch (DefTID
.getOpcode()) {
1678 case ARM::VLDMS_UPD
:
1682 // If there are odd number of 'S' registers or if it's not 64-bit aligned,
1683 // then it takes an extra cycle.
1684 if ((isSLoad
&& (RegNo
% 2)) || DefAlign
< 8)
1687 // Assume the worst.
1688 DefCycle
= RegNo
+ 2;
1695 ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData
*ItinData
,
1696 const TargetInstrDesc
&DefTID
,
1698 unsigned DefIdx
, unsigned DefAlign
) const {
1699 int RegNo
= (int)(DefIdx
+1) - DefTID
.getNumOperands() + 1;
1701 // Def is the address writeback.
1702 return ItinData
->getOperandCycle(DefClass
, DefIdx
);
1705 if (Subtarget
.isCortexA8()) {
1706 // 4 registers would be issued: 1, 2, 1.
1707 // 5 registers would be issued: 1, 2, 2.
1708 DefCycle
= RegNo
/ 2;
1711 // Result latency is issue cycle + 2: E2.
1713 } else if (Subtarget
.isCortexA9()) {
1714 DefCycle
= (RegNo
/ 2);
1715 // If there are odd number of registers or if it's not 64-bit aligned,
1716 // then it takes an extra AGU (Address Generation Unit) cycle.
1717 if ((RegNo
% 2) || DefAlign
< 8)
1719 // Result latency is AGU cycles + 2.
1722 // Assume the worst.
1723 DefCycle
= RegNo
+ 2;
1730 ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData
*ItinData
,
1731 const TargetInstrDesc
&UseTID
,
1733 unsigned UseIdx
, unsigned UseAlign
) const {
1734 int RegNo
= (int)(UseIdx
+1) - UseTID
.getNumOperands() + 1;
1736 return ItinData
->getOperandCycle(UseClass
, UseIdx
);
1739 if (Subtarget
.isCortexA8()) {
1740 // (regno / 2) + (regno % 2) + 1
1741 UseCycle
= RegNo
/ 2 + 1;
1744 } else if (Subtarget
.isCortexA9()) {
1746 bool isSStore
= false;
1747 switch (UseTID
.getOpcode()) {
1750 case ARM::VSTMS_UPD
:
1754 // If there are odd number of 'S' registers or if it's not 64-bit aligned,
1755 // then it takes an extra cycle.
1756 if ((isSStore
&& (RegNo
% 2)) || UseAlign
< 8)
1759 // Assume the worst.
1760 UseCycle
= RegNo
+ 2;
1767 ARMBaseInstrInfo::getSTMUseCycle(const InstrItineraryData
*ItinData
,
1768 const TargetInstrDesc
&UseTID
,
1770 unsigned UseIdx
, unsigned UseAlign
) const {
1771 int RegNo
= (int)(UseIdx
+1) - UseTID
.getNumOperands() + 1;
1773 return ItinData
->getOperandCycle(UseClass
, UseIdx
);
1776 if (Subtarget
.isCortexA8()) {
1777 UseCycle
= RegNo
/ 2;
1782 } else if (Subtarget
.isCortexA9()) {
1783 UseCycle
= (RegNo
/ 2);
1784 // If there are odd number of registers or if it's not 64-bit aligned,
1785 // then it takes an extra AGU (Address Generation Unit) cycle.
1786 if ((RegNo
% 2) || UseAlign
< 8)
1789 // Assume the worst.
1796 ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData
*ItinData
,
1797 const TargetInstrDesc
&DefTID
,
1798 unsigned DefIdx
, unsigned DefAlign
,
1799 const TargetInstrDesc
&UseTID
,
1800 unsigned UseIdx
, unsigned UseAlign
) const {
1801 unsigned DefClass
= DefTID
.getSchedClass();
1802 unsigned UseClass
= UseTID
.getSchedClass();
1804 if (DefIdx
< DefTID
.getNumDefs() && UseIdx
< UseTID
.getNumOperands())
1805 return ItinData
->getOperandLatency(DefClass
, DefIdx
, UseClass
, UseIdx
);
1807 // This may be a def / use of a variable_ops instruction, the operand
1808 // latency might be determinable dynamically. Let the target try to
1811 bool LdmBypass
= false;
1812 switch (DefTID
.getOpcode()) {
1814 DefCycle
= ItinData
->getOperandCycle(DefClass
, DefIdx
);
1818 case ARM::VLDMD_UPD
:
1819 case ARM::VLDMS_UPD
: {
1820 DefCycle
= getVLDMDefCycle(ItinData
, DefTID
, DefClass
, DefIdx
, DefAlign
);
1829 case ARM::t2LDM_RET
:
1831 case ARM::t2LDM_UPD
: {
1833 DefCycle
= getLDMDefCycle(ItinData
, DefTID
, DefClass
, DefIdx
, DefAlign
);
1839 // We can't seem to determine the result latency of the def, assume it's 2.
1843 switch (UseTID
.getOpcode()) {
1845 UseCycle
= ItinData
->getOperandCycle(UseClass
, UseIdx
);
1849 case ARM::VSTMD_UPD
:
1850 case ARM::VSTMS_UPD
: {
1851 UseCycle
= getVSTMUseCycle(ItinData
, UseTID
, UseClass
, UseIdx
, UseAlign
);
1860 case ARM::t2STM_UPD
: {
1861 UseCycle
= getSTMUseCycle(ItinData
, UseTID
, UseClass
, UseIdx
, UseAlign
);
1867 // Assume it's read in the first stage.
1870 UseCycle
= DefCycle
- UseCycle
+ 1;
1873 // It's a variable_ops instruction so we can't use DefIdx here. Just use
1874 // first def operand.
1875 if (ItinData
->hasPipelineForwarding(DefClass
, DefTID
.getNumOperands()-1,
1878 } else if (ItinData
->hasPipelineForwarding(DefClass
, DefIdx
,
1887 ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData
*ItinData
,
1888 const MachineInstr
*DefMI
, unsigned DefIdx
,
1889 const MachineInstr
*UseMI
, unsigned UseIdx
) const {
1890 if (DefMI
->isCopyLike() || DefMI
->isInsertSubreg() ||
1891 DefMI
->isRegSequence() || DefMI
->isImplicitDef())
1894 const TargetInstrDesc
&DefTID
= DefMI
->getDesc();
1895 if (!ItinData
|| ItinData
->isEmpty())
1896 return DefTID
.mayLoad() ? 3 : 1;
1899 const TargetInstrDesc
&UseTID
= UseMI
->getDesc();
1900 const MachineOperand
&DefMO
= DefMI
->getOperand(DefIdx
);
1901 if (DefMO
.getReg() == ARM::CPSR
) {
1902 if (DefMI
->getOpcode() == ARM::FMSTAT
) {
1903 // fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?)
1904 return Subtarget
.isCortexA9() ? 1 : 20;
1907 // CPSR set and branch can be paired in the same cycle.
1908 if (UseTID
.isBranch())
1912 unsigned DefAlign
= DefMI
->hasOneMemOperand()
1913 ? (*DefMI
->memoperands_begin())->getAlignment() : 0;
1914 unsigned UseAlign
= UseMI
->hasOneMemOperand()
1915 ? (*UseMI
->memoperands_begin())->getAlignment() : 0;
1916 int Latency
= getOperandLatency(ItinData
, DefTID
, DefIdx
, DefAlign
,
1917 UseTID
, UseIdx
, UseAlign
);
1920 (Subtarget
.isCortexA8() || Subtarget
.isCortexA9())) {
1921 // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
1922 // variants are one cycle cheaper.
1923 switch (DefTID
.getOpcode()) {
1927 unsigned ShOpVal
= DefMI
->getOperand(3).getImm();
1928 unsigned ShImm
= ARM_AM::getAM2Offset(ShOpVal
);
1930 (ShImm
== 2 && ARM_AM::getAM2ShiftOpc(ShOpVal
) == ARM_AM::lsl
))
1937 case ARM::t2LDRSHs
: {
1938 // Thumb2 mode: lsl only.
1939 unsigned ShAmt
= DefMI
->getOperand(3).getImm();
1940 if (ShAmt
== 0 || ShAmt
== 2)
1951 ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData
*ItinData
,
1952 SDNode
*DefNode
, unsigned DefIdx
,
1953 SDNode
*UseNode
, unsigned UseIdx
) const {
1954 if (!DefNode
->isMachineOpcode())
1957 const TargetInstrDesc
&DefTID
= get(DefNode
->getMachineOpcode());
1958 if (!ItinData
|| ItinData
->isEmpty())
1959 return DefTID
.mayLoad() ? 3 : 1;
1961 if (!UseNode
->isMachineOpcode()) {
1962 int Latency
= ItinData
->getOperandCycle(DefTID
.getSchedClass(), DefIdx
);
1963 if (Subtarget
.isCortexA9())
1964 return Latency
<= 2 ? 1 : Latency
- 1;
1966 return Latency
<= 3 ? 1 : Latency
- 2;
1969 const TargetInstrDesc
&UseTID
= get(UseNode
->getMachineOpcode());
1970 const MachineSDNode
*DefMN
= dyn_cast
<MachineSDNode
>(DefNode
);
1971 unsigned DefAlign
= !DefMN
->memoperands_empty()
1972 ? (*DefMN
->memoperands_begin())->getAlignment() : 0;
1973 const MachineSDNode
*UseMN
= dyn_cast
<MachineSDNode
>(UseNode
);
1974 unsigned UseAlign
= !UseMN
->memoperands_empty()
1975 ? (*UseMN
->memoperands_begin())->getAlignment() : 0;
1976 int Latency
= getOperandLatency(ItinData
, DefTID
, DefIdx
, DefAlign
,
1977 UseTID
, UseIdx
, UseAlign
);
1980 (Subtarget
.isCortexA8() || Subtarget
.isCortexA9())) {
1981 // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
1982 // variants are one cycle cheaper.
1983 switch (DefTID
.getOpcode()) {
1988 cast
<ConstantSDNode
>(DefNode
->getOperand(2))->getZExtValue();
1989 unsigned ShImm
= ARM_AM::getAM2Offset(ShOpVal
);
1991 (ShImm
== 2 && ARM_AM::getAM2ShiftOpc(ShOpVal
) == ARM_AM::lsl
))
1998 case ARM::t2LDRSHs
: {
1999 // Thumb2 mode: lsl only.
2001 cast
<ConstantSDNode
>(DefNode
->getOperand(2))->getZExtValue();
2002 if (ShAmt
== 0 || ShAmt
== 2)
2012 int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData
*ItinData
,
2013 const MachineInstr
*MI
,
2014 unsigned *PredCost
) const {
2015 if (MI
->isCopyLike() || MI
->isInsertSubreg() ||
2016 MI
->isRegSequence() || MI
->isImplicitDef())
2019 if (!ItinData
|| ItinData
->isEmpty())
2022 const TargetInstrDesc
&TID
= MI
->getDesc();
2023 unsigned Class
= TID
.getSchedClass();
2024 unsigned UOps
= ItinData
->Itineraries
[Class
].NumMicroOps
;
2025 if (PredCost
&& TID
.hasImplicitDefOfPhysReg(ARM::CPSR
))
2026 // When predicated, CPSR is an additional source operand for CPSR updating
2027 // instructions, this apparently increases their latencies.
2030 return ItinData
->getStageLatency(Class
);
2031 return getNumMicroOps(ItinData
, MI
);
2034 int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData
*ItinData
,
2035 SDNode
*Node
) const {
2036 if (!Node
->isMachineOpcode())
2039 if (!ItinData
|| ItinData
->isEmpty())
2042 unsigned Opcode
= Node
->getMachineOpcode();
2045 return ItinData
->getStageLatency(get(Opcode
).getSchedClass());
2052 bool ARMBaseInstrInfo::
2053 hasHighOperandLatency(const InstrItineraryData
*ItinData
,
2054 const MachineRegisterInfo
*MRI
,
2055 const MachineInstr
*DefMI
, unsigned DefIdx
,
2056 const MachineInstr
*UseMI
, unsigned UseIdx
) const {
2057 unsigned DDomain
= DefMI
->getDesc().TSFlags
& ARMII::DomainMask
;
2058 unsigned UDomain
= UseMI
->getDesc().TSFlags
& ARMII::DomainMask
;
2059 if (Subtarget
.isCortexA8() &&
2060 (DDomain
== ARMII::DomainVFP
|| UDomain
== ARMII::DomainVFP
))
2061 // CortexA8 VFP instructions are not pipelined.
2064 // Hoist VFP / NEON instructions with 4 or higher latency.
2065 int Latency
= getOperandLatency(ItinData
, DefMI
, DefIdx
, UseMI
, UseIdx
);
2068 return DDomain
== ARMII::DomainVFP
|| DDomain
== ARMII::DomainNEON
||
2069 UDomain
== ARMII::DomainVFP
|| UDomain
== ARMII::DomainNEON
;
2072 bool ARMBaseInstrInfo::
2073 hasLowDefLatency(const InstrItineraryData
*ItinData
,
2074 const MachineInstr
*DefMI
, unsigned DefIdx
) const {
2075 if (!ItinData
|| ItinData
->isEmpty())
2078 unsigned DDomain
= DefMI
->getDesc().TSFlags
& ARMII::DomainMask
;
2079 if (DDomain
== ARMII::DomainGeneral
) {
2080 unsigned DefClass
= DefMI
->getDesc().getSchedClass();
2081 int DefCycle
= ItinData
->getOperandCycle(DefClass
, DefIdx
);
2082 return (DefCycle
!= -1 && DefCycle
<= 2);