1 //===-- ARMBaseInstrInfo.cpp - ARM Instruction Information ----------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file contains the Base ARM implementation of the TargetInstrInfo class.
11 //===----------------------------------------------------------------------===//
13 #include "ARMBaseInstrInfo.h"
14 #include "ARMBaseRegisterInfo.h"
15 #include "ARMConstantPoolValue.h"
16 #include "ARMFeatures.h"
17 #include "ARMHazardRecognizer.h"
18 #include "ARMMachineFunctionInfo.h"
19 #include "ARMSubtarget.h"
20 #include "MCTargetDesc/ARMAddressingModes.h"
21 #include "MCTargetDesc/ARMBaseInfo.h"
22 #include "MVETailPredUtils.h"
23 #include "llvm/ADT/DenseMap.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/SmallSet.h"
26 #include "llvm/ADT/SmallVector.h"
27 #include "llvm/CodeGen/DFAPacketizer.h"
28 #include "llvm/CodeGen/LiveVariables.h"
29 #include "llvm/CodeGen/MachineBasicBlock.h"
30 #include "llvm/CodeGen/MachineConstantPool.h"
31 #include "llvm/CodeGen/MachineFrameInfo.h"
32 #include "llvm/CodeGen/MachineFunction.h"
33 #include "llvm/CodeGen/MachineInstr.h"
34 #include "llvm/CodeGen/MachineInstrBuilder.h"
35 #include "llvm/CodeGen/MachineMemOperand.h"
36 #include "llvm/CodeGen/MachineModuleInfo.h"
37 #include "llvm/CodeGen/MachineOperand.h"
38 #include "llvm/CodeGen/MachinePipeliner.h"
39 #include "llvm/CodeGen/MachineRegisterInfo.h"
40 #include "llvm/CodeGen/MachineScheduler.h"
41 #include "llvm/CodeGen/MultiHazardRecognizer.h"
42 #include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
43 #include "llvm/CodeGen/SelectionDAGNodes.h"
44 #include "llvm/CodeGen/TargetInstrInfo.h"
45 #include "llvm/CodeGen/TargetRegisterInfo.h"
46 #include "llvm/CodeGen/TargetSchedule.h"
47 #include "llvm/IR/Attributes.h"
48 #include "llvm/IR/DebugLoc.h"
49 #include "llvm/IR/Function.h"
50 #include "llvm/IR/GlobalValue.h"
51 #include "llvm/IR/Module.h"
52 #include "llvm/MC/MCAsmInfo.h"
53 #include "llvm/MC/MCInstrDesc.h"
54 #include "llvm/MC/MCInstrItineraries.h"
55 #include "llvm/Support/BranchProbability.h"
56 #include "llvm/Support/Casting.h"
57 #include "llvm/Support/CommandLine.h"
58 #include "llvm/Support/Compiler.h"
59 #include "llvm/Support/Debug.h"
60 #include "llvm/Support/ErrorHandling.h"
61 #include "llvm/Support/raw_ostream.h"
62 #include "llvm/Target/TargetMachine.h"
73 #define DEBUG_TYPE "arm-instrinfo"
75 #define GET_INSTRINFO_CTOR_DTOR
76 #include "ARMGenInstrInfo.inc"
79 EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden
,
80 cl::desc("Enable ARM 2-addr to 3-addr conv"));
82 /// ARM_MLxEntry - Record information about MLA / MLS instructions.
84 uint16_t MLxOpc
; // MLA / MLS opcode
85 uint16_t MulOpc
; // Expanded multiplication opcode
86 uint16_t AddSubOpc
; // Expanded add / sub opcode
87 bool NegAcc
; // True if the acc is negated before the add / sub.
88 bool HasLane
; // True if instruction has an extra "lane" operand.
91 static const ARM_MLxEntry ARM_MLxTable
[] = {
92 // MLxOpc, MulOpc, AddSubOpc, NegAcc, HasLane
94 { ARM::VMLAS
, ARM::VMULS
, ARM::VADDS
, false, false },
95 { ARM::VMLSS
, ARM::VMULS
, ARM::VSUBS
, false, false },
96 { ARM::VMLAD
, ARM::VMULD
, ARM::VADDD
, false, false },
97 { ARM::VMLSD
, ARM::VMULD
, ARM::VSUBD
, false, false },
98 { ARM::VNMLAS
, ARM::VNMULS
, ARM::VSUBS
, true, false },
99 { ARM::VNMLSS
, ARM::VMULS
, ARM::VSUBS
, true, false },
100 { ARM::VNMLAD
, ARM::VNMULD
, ARM::VSUBD
, true, false },
101 { ARM::VNMLSD
, ARM::VMULD
, ARM::VSUBD
, true, false },
104 { ARM::VMLAfd
, ARM::VMULfd
, ARM::VADDfd
, false, false },
105 { ARM::VMLSfd
, ARM::VMULfd
, ARM::VSUBfd
, false, false },
106 { ARM::VMLAfq
, ARM::VMULfq
, ARM::VADDfq
, false, false },
107 { ARM::VMLSfq
, ARM::VMULfq
, ARM::VSUBfq
, false, false },
108 { ARM::VMLAslfd
, ARM::VMULslfd
, ARM::VADDfd
, false, true },
109 { ARM::VMLSslfd
, ARM::VMULslfd
, ARM::VSUBfd
, false, true },
110 { ARM::VMLAslfq
, ARM::VMULslfq
, ARM::VADDfq
, false, true },
111 { ARM::VMLSslfq
, ARM::VMULslfq
, ARM::VSUBfq
, false, true },
114 ARMBaseInstrInfo::ARMBaseInstrInfo(const ARMSubtarget
& STI
)
115 : ARMGenInstrInfo(ARM::ADJCALLSTACKDOWN
, ARM::ADJCALLSTACKUP
),
117 for (unsigned i
= 0, e
= std::size(ARM_MLxTable
); i
!= e
; ++i
) {
118 if (!MLxEntryMap
.insert(std::make_pair(ARM_MLxTable
[i
].MLxOpc
, i
)).second
)
119 llvm_unreachable("Duplicated entries?");
120 MLxHazardOpcodes
.insert(ARM_MLxTable
[i
].AddSubOpc
);
121 MLxHazardOpcodes
.insert(ARM_MLxTable
[i
].MulOpc
);
125 // Use a ScoreboardHazardRecognizer for prepass ARM scheduling. TargetInstrImpl
126 // currently defaults to no prepass hazard recognizer.
127 ScheduleHazardRecognizer
*
128 ARMBaseInstrInfo::CreateTargetHazardRecognizer(const TargetSubtargetInfo
*STI
,
129 const ScheduleDAG
*DAG
) const {
130 if (usePreRAHazardRecognizer()) {
131 const InstrItineraryData
*II
=
132 static_cast<const ARMSubtarget
*>(STI
)->getInstrItineraryData();
133 return new ScoreboardHazardRecognizer(II
, DAG
, "pre-RA-sched");
135 return TargetInstrInfo::CreateTargetHazardRecognizer(STI
, DAG
);
139 // - pre-RA scheduling
140 // - post-RA scheduling when FeatureUseMISched is set
141 ScheduleHazardRecognizer
*ARMBaseInstrInfo::CreateTargetMIHazardRecognizer(
142 const InstrItineraryData
*II
, const ScheduleDAGMI
*DAG
) const {
143 MultiHazardRecognizer
*MHR
= new MultiHazardRecognizer();
145 // We would like to restrict this hazard recognizer to only
146 // post-RA scheduling; we can tell that we're post-RA because we don't
147 // track VRegLiveness.
148 // Cortex-M7: TRM indicates that there is a single ITCM bank and two DTCM
149 // banks banked on bit 2. Assume that TCMs are in use.
150 if (Subtarget
.isCortexM7() && !DAG
->hasVRegLiveness())
151 MHR
->AddHazardRecognizer(
152 std::make_unique
<ARMBankConflictHazardRecognizer
>(DAG
, 0x4, true));
154 // Not inserting ARMHazardRecognizerFPMLx because that would change
157 auto BHR
= TargetInstrInfo::CreateTargetMIHazardRecognizer(II
, DAG
);
158 MHR
->AddHazardRecognizer(std::unique_ptr
<ScheduleHazardRecognizer
>(BHR
));
162 // Called during post-RA scheduling when FeatureUseMISched is not set
163 ScheduleHazardRecognizer
*ARMBaseInstrInfo::
164 CreateTargetPostRAHazardRecognizer(const InstrItineraryData
*II
,
165 const ScheduleDAG
*DAG
) const {
166 MultiHazardRecognizer
*MHR
= new MultiHazardRecognizer();
168 if (Subtarget
.isThumb2() || Subtarget
.hasVFP2Base())
169 MHR
->AddHazardRecognizer(std::make_unique
<ARMHazardRecognizerFPMLx
>());
171 auto BHR
= TargetInstrInfo::CreateTargetPostRAHazardRecognizer(II
, DAG
);
173 MHR
->AddHazardRecognizer(std::unique_ptr
<ScheduleHazardRecognizer
>(BHR
));
178 ARMBaseInstrInfo::convertToThreeAddress(MachineInstr
&MI
, LiveVariables
*LV
,
179 LiveIntervals
*LIS
) const {
180 // FIXME: Thumb2 support.
185 MachineFunction
&MF
= *MI
.getParent()->getParent();
186 uint64_t TSFlags
= MI
.getDesc().TSFlags
;
188 switch ((TSFlags
& ARMII::IndexModeMask
) >> ARMII::IndexModeShift
) {
189 default: return nullptr;
190 case ARMII::IndexModePre
:
193 case ARMII::IndexModePost
:
197 // Try splitting an indexed load/store to an un-indexed one plus an add/sub
199 unsigned MemOpc
= getUnindexedOpcode(MI
.getOpcode());
203 MachineInstr
*UpdateMI
= nullptr;
204 MachineInstr
*MemMI
= nullptr;
205 unsigned AddrMode
= (TSFlags
& ARMII::AddrModeMask
);
206 const MCInstrDesc
&MCID
= MI
.getDesc();
207 unsigned NumOps
= MCID
.getNumOperands();
208 bool isLoad
= !MI
.mayStore();
209 const MachineOperand
&WB
= isLoad
? MI
.getOperand(1) : MI
.getOperand(0);
210 const MachineOperand
&Base
= MI
.getOperand(2);
211 const MachineOperand
&Offset
= MI
.getOperand(NumOps
- 3);
212 Register WBReg
= WB
.getReg();
213 Register BaseReg
= Base
.getReg();
214 Register OffReg
= Offset
.getReg();
215 unsigned OffImm
= MI
.getOperand(NumOps
- 2).getImm();
216 ARMCC::CondCodes Pred
= (ARMCC::CondCodes
)MI
.getOperand(NumOps
- 1).getImm();
218 default: llvm_unreachable("Unknown indexed op!");
219 case ARMII::AddrMode2
: {
220 bool isSub
= ARM_AM::getAM2Op(OffImm
) == ARM_AM::sub
;
221 unsigned Amt
= ARM_AM::getAM2Offset(OffImm
);
223 if (ARM_AM::getSOImmVal(Amt
) == -1)
224 // Can't encode it in a so_imm operand. This transformation will
225 // add more than 1 instruction. Abandon!
227 UpdateMI
= BuildMI(MF
, MI
.getDebugLoc(),
228 get(isSub
? ARM::SUBri
: ARM::ADDri
), WBReg
)
233 } else if (Amt
!= 0) {
234 ARM_AM::ShiftOpc ShOpc
= ARM_AM::getAM2ShiftOpc(OffImm
);
235 unsigned SOOpc
= ARM_AM::getSORegOpc(ShOpc
, Amt
);
236 UpdateMI
= BuildMI(MF
, MI
.getDebugLoc(),
237 get(isSub
? ARM::SUBrsi
: ARM::ADDrsi
), WBReg
)
245 UpdateMI
= BuildMI(MF
, MI
.getDebugLoc(),
246 get(isSub
? ARM::SUBrr
: ARM::ADDrr
), WBReg
)
253 case ARMII::AddrMode3
: {
254 bool isSub
= ARM_AM::getAM3Op(OffImm
) == ARM_AM::sub
;
255 unsigned Amt
= ARM_AM::getAM3Offset(OffImm
);
257 // Immediate is 8-bits. It's guaranteed to fit in a so_imm operand.
258 UpdateMI
= BuildMI(MF
, MI
.getDebugLoc(),
259 get(isSub
? ARM::SUBri
: ARM::ADDri
), WBReg
)
265 UpdateMI
= BuildMI(MF
, MI
.getDebugLoc(),
266 get(isSub
? ARM::SUBrr
: ARM::ADDrr
), WBReg
)
275 std::vector
<MachineInstr
*> NewMIs
;
279 BuildMI(MF
, MI
.getDebugLoc(), get(MemOpc
), MI
.getOperand(0).getReg())
284 MemMI
= BuildMI(MF
, MI
.getDebugLoc(), get(MemOpc
))
285 .addReg(MI
.getOperand(1).getReg())
290 NewMIs
.push_back(MemMI
);
291 NewMIs
.push_back(UpdateMI
);
295 BuildMI(MF
, MI
.getDebugLoc(), get(MemOpc
), MI
.getOperand(0).getReg())
300 MemMI
= BuildMI(MF
, MI
.getDebugLoc(), get(MemOpc
))
301 .addReg(MI
.getOperand(1).getReg())
307 UpdateMI
->getOperand(0).setIsDead();
308 NewMIs
.push_back(UpdateMI
);
309 NewMIs
.push_back(MemMI
);
312 // Transfer LiveVariables states, kill / dead info.
314 for (const MachineOperand
&MO
: MI
.operands()) {
315 if (MO
.isReg() && MO
.getReg().isVirtual()) {
316 Register Reg
= MO
.getReg();
318 LiveVariables::VarInfo
&VI
= LV
->getVarInfo(Reg
);
320 MachineInstr
*NewMI
= (Reg
== WBReg
) ? UpdateMI
: MemMI
;
322 LV
->addVirtualRegisterDead(Reg
, *NewMI
);
324 if (MO
.isUse() && MO
.isKill()) {
325 for (unsigned j
= 0; j
< 2; ++j
) {
326 // Look at the two new MI's in reverse order.
327 MachineInstr
*NewMI
= NewMIs
[j
];
328 if (!NewMI
->readsRegister(Reg
, /*TRI=*/nullptr))
330 LV
->addVirtualRegisterKilled(Reg
, *NewMI
);
331 if (VI
.removeKill(MI
))
332 VI
.Kills
.push_back(NewMI
);
340 MachineBasicBlock
&MBB
= *MI
.getParent();
341 MBB
.insert(MI
, NewMIs
[1]);
342 MBB
.insert(MI
, NewMIs
[0]);
347 // Cond vector output format:
348 // 0 elements indicates an unconditional branch
349 // 2 elements indicates a conditional branch; the elements are
350 // the condition to check and the CPSR.
351 // 3 elements indicates a hardware loop end; the elements
352 // are the opcode, the operand value to test, and a dummy
353 // operand used to pad out to 3 operands.
354 bool ARMBaseInstrInfo::analyzeBranch(MachineBasicBlock
&MBB
,
355 MachineBasicBlock
*&TBB
,
356 MachineBasicBlock
*&FBB
,
357 SmallVectorImpl
<MachineOperand
> &Cond
,
358 bool AllowModify
) const {
362 MachineBasicBlock::instr_iterator I
= MBB
.instr_end();
363 if (I
== MBB
.instr_begin())
364 return false; // Empty blocks are easy.
367 // Walk backwards from the end of the basic block until the branch is
368 // analyzed or we give up.
369 while (isPredicated(*I
) || I
->isTerminator() || I
->isDebugValue()) {
370 // Flag to be raised on unanalyzeable instructions. This is useful in cases
371 // where we want to clean up on the end of the basic block before we bail
373 bool CantAnalyze
= false;
375 // Skip over DEBUG values, predicated nonterminators and speculation
376 // barrier terminators.
377 while (I
->isDebugInstr() || !I
->isTerminator() ||
378 isSpeculationBarrierEndBBOpcode(I
->getOpcode()) ||
379 I
->getOpcode() == ARM::t2DoLoopStartTP
){
380 if (I
== MBB
.instr_begin())
385 if (isIndirectBranchOpcode(I
->getOpcode()) ||
386 isJumpTableBranchOpcode(I
->getOpcode())) {
387 // Indirect branches and jump tables can't be analyzed, but we still want
388 // to clean up any instructions at the tail of the basic block.
390 } else if (isUncondBranchOpcode(I
->getOpcode())) {
391 TBB
= I
->getOperand(0).getMBB();
392 } else if (isCondBranchOpcode(I
->getOpcode())) {
393 // Bail out if we encounter multiple conditional branches.
397 assert(!FBB
&& "FBB should have been null.");
399 TBB
= I
->getOperand(0).getMBB();
400 Cond
.push_back(I
->getOperand(1));
401 Cond
.push_back(I
->getOperand(2));
402 } else if (I
->isReturn()) {
403 // Returns can't be analyzed, but we should run cleanup.
405 } else if (I
->getOpcode() == ARM::t2LoopEnd
&&
407 ->getSubtarget
<ARMSubtarget
>()
408 .enableMachinePipeliner()) {
412 TBB
= I
->getOperand(1).getMBB();
413 Cond
.push_back(MachineOperand::CreateImm(I
->getOpcode()));
414 Cond
.push_back(I
->getOperand(0));
415 Cond
.push_back(MachineOperand::CreateImm(0));
417 // We encountered other unrecognized terminator. Bail out immediately.
421 // Cleanup code - to be run for unpredicated unconditional branches and
423 if (!isPredicated(*I
) &&
424 (isUncondBranchOpcode(I
->getOpcode()) ||
425 isIndirectBranchOpcode(I
->getOpcode()) ||
426 isJumpTableBranchOpcode(I
->getOpcode()) ||
428 // Forget any previous condition branch information - it no longer applies.
432 // If we can modify the function, delete everything below this
433 // unconditional branch.
435 MachineBasicBlock::iterator DI
= std::next(I
);
436 while (DI
!= MBB
.instr_end()) {
437 MachineInstr
&InstToDelete
= *DI
;
439 // Speculation barriers must not be deleted.
440 if (isSpeculationBarrierEndBBOpcode(InstToDelete
.getOpcode()))
442 InstToDelete
.eraseFromParent();
448 // We may not be able to analyze the block, but we could still have
449 // an unconditional branch as the last instruction in the block, which
450 // just branches to layout successor. If this is the case, then just
451 // remove it if we're allowed to make modifications.
452 if (AllowModify
&& !isPredicated(MBB
.back()) &&
453 isUncondBranchOpcode(MBB
.back().getOpcode()) &&
454 TBB
&& MBB
.isLayoutSuccessor(TBB
))
459 if (I
== MBB
.instr_begin())
465 // We made it past the terminators without bailing out - we must have
466 // analyzed this branch successfully.
470 unsigned ARMBaseInstrInfo::removeBranch(MachineBasicBlock
&MBB
,
471 int *BytesRemoved
) const {
472 assert(!BytesRemoved
&& "code size not handled");
474 MachineBasicBlock::iterator I
= MBB
.getLastNonDebugInstr();
478 if (!isUncondBranchOpcode(I
->getOpcode()) &&
479 !isCondBranchOpcode(I
->getOpcode()) && I
->getOpcode() != ARM::t2LoopEnd
)
482 // Remove the branch.
483 I
->eraseFromParent();
487 if (I
== MBB
.begin()) return 1;
489 if (!isCondBranchOpcode(I
->getOpcode()) && I
->getOpcode() != ARM::t2LoopEnd
)
492 // Remove the branch.
493 I
->eraseFromParent();
497 unsigned ARMBaseInstrInfo::insertBranch(MachineBasicBlock
&MBB
,
498 MachineBasicBlock
*TBB
,
499 MachineBasicBlock
*FBB
,
500 ArrayRef
<MachineOperand
> Cond
,
502 int *BytesAdded
) const {
503 assert(!BytesAdded
&& "code size not handled");
504 ARMFunctionInfo
*AFI
= MBB
.getParent()->getInfo
<ARMFunctionInfo
>();
505 int BOpc
= !AFI
->isThumbFunction()
506 ? ARM::B
: (AFI
->isThumb2Function() ? ARM::t2B
: ARM::tB
);
507 int BccOpc
= !AFI
->isThumbFunction()
508 ? ARM::Bcc
: (AFI
->isThumb2Function() ? ARM::t2Bcc
: ARM::tBcc
);
509 bool isThumb
= AFI
->isThumbFunction() || AFI
->isThumb2Function();
511 // Shouldn't be a fall through.
512 assert(TBB
&& "insertBranch must not be told to insert a fallthrough");
513 assert((Cond
.size() == 2 || Cond
.size() == 0 || Cond
.size() == 3) &&
514 "ARM branch conditions have two or three components!");
516 // For conditional branches, we use addOperand to preserve CPSR flags.
519 if (Cond
.empty()) { // Unconditional branch?
521 BuildMI(&MBB
, DL
, get(BOpc
)).addMBB(TBB
).add(predOps(ARMCC::AL
));
523 BuildMI(&MBB
, DL
, get(BOpc
)).addMBB(TBB
);
524 } else if (Cond
.size() == 2) {
525 BuildMI(&MBB
, DL
, get(BccOpc
))
527 .addImm(Cond
[0].getImm())
530 BuildMI(&MBB
, DL
, get(Cond
[0].getImm())).add(Cond
[1]).addMBB(TBB
);
534 // Two-way conditional branch.
535 if (Cond
.size() == 2)
536 BuildMI(&MBB
, DL
, get(BccOpc
))
538 .addImm(Cond
[0].getImm())
540 else if (Cond
.size() == 3)
541 BuildMI(&MBB
, DL
, get(Cond
[0].getImm())).add(Cond
[1]).addMBB(TBB
);
543 BuildMI(&MBB
, DL
, get(BOpc
)).addMBB(FBB
).add(predOps(ARMCC::AL
));
545 BuildMI(&MBB
, DL
, get(BOpc
)).addMBB(FBB
);
549 bool ARMBaseInstrInfo::
550 reverseBranchCondition(SmallVectorImpl
<MachineOperand
> &Cond
) const {
551 if (Cond
.size() == 2) {
552 ARMCC::CondCodes CC
= (ARMCC::CondCodes
)(int)Cond
[0].getImm();
553 Cond
[0].setImm(ARMCC::getOppositeCondition(CC
));
559 bool ARMBaseInstrInfo::isPredicated(const MachineInstr
&MI
) const {
561 MachineBasicBlock::const_instr_iterator I
= MI
.getIterator();
562 MachineBasicBlock::const_instr_iterator E
= MI
.getParent()->instr_end();
563 while (++I
!= E
&& I
->isInsideBundle()) {
564 int PIdx
= I
->findFirstPredOperandIdx();
565 if (PIdx
!= -1 && I
->getOperand(PIdx
).getImm() != ARMCC::AL
)
571 int PIdx
= MI
.findFirstPredOperandIdx();
572 return PIdx
!= -1 && MI
.getOperand(PIdx
).getImm() != ARMCC::AL
;
575 std::string
ARMBaseInstrInfo::createMIROperandComment(
576 const MachineInstr
&MI
, const MachineOperand
&Op
, unsigned OpIdx
,
577 const TargetRegisterInfo
*TRI
) const {
579 // First, let's see if there is a generic comment for this operand
580 std::string GenericComment
=
581 TargetInstrInfo::createMIROperandComment(MI
, Op
, OpIdx
, TRI
);
582 if (!GenericComment
.empty())
583 return GenericComment
;
585 // If not, check if we have an immediate operand.
587 return std::string();
589 // And print its corresponding condition code if the immediate is a
591 int FirstPredOp
= MI
.findFirstPredOperandIdx();
592 if (FirstPredOp
!= (int) OpIdx
)
593 return std::string();
595 std::string CC
= "CC::";
596 CC
+= ARMCondCodeToString((ARMCC::CondCodes
)Op
.getImm());
600 bool ARMBaseInstrInfo::PredicateInstruction(
601 MachineInstr
&MI
, ArrayRef
<MachineOperand
> Pred
) const {
602 unsigned Opc
= MI
.getOpcode();
603 if (isUncondBranchOpcode(Opc
)) {
604 MI
.setDesc(get(getMatchingCondBranchOpcode(Opc
)));
605 MachineInstrBuilder(*MI
.getParent()->getParent(), MI
)
606 .addImm(Pred
[0].getImm())
607 .addReg(Pred
[1].getReg());
611 int PIdx
= MI
.findFirstPredOperandIdx();
613 MachineOperand
&PMO
= MI
.getOperand(PIdx
);
614 PMO
.setImm(Pred
[0].getImm());
615 MI
.getOperand(PIdx
+1).setReg(Pred
[1].getReg());
617 // Thumb 1 arithmetic instructions do not set CPSR when executed inside an
618 // IT block. This affects how they are printed.
619 const MCInstrDesc
&MCID
= MI
.getDesc();
620 if (MCID
.TSFlags
& ARMII::ThumbArithFlagSetting
) {
621 assert(MCID
.operands()[1].isOptionalDef() &&
622 "CPSR def isn't expected operand");
623 assert((MI
.getOperand(1).isDead() ||
624 MI
.getOperand(1).getReg() != ARM::CPSR
) &&
625 "if conversion tried to stop defining used CPSR");
626 MI
.getOperand(1).setReg(ARM::NoRegister
);
634 bool ARMBaseInstrInfo::SubsumesPredicate(ArrayRef
<MachineOperand
> Pred1
,
635 ArrayRef
<MachineOperand
> Pred2
) const {
636 if (Pred1
.size() > 2 || Pred2
.size() > 2)
639 ARMCC::CondCodes CC1
= (ARMCC::CondCodes
)Pred1
[0].getImm();
640 ARMCC::CondCodes CC2
= (ARMCC::CondCodes
)Pred2
[0].getImm();
650 return CC2
== ARMCC::HI
;
652 return CC2
== ARMCC::LO
|| CC2
== ARMCC::EQ
;
654 return CC2
== ARMCC::GT
;
656 return CC2
== ARMCC::LT
;
660 bool ARMBaseInstrInfo::ClobbersPredicate(MachineInstr
&MI
,
661 std::vector
<MachineOperand
> &Pred
,
662 bool SkipDead
) const {
664 for (const MachineOperand
&MO
: MI
.operands()) {
665 bool ClobbersCPSR
= MO
.isRegMask() && MO
.clobbersPhysReg(ARM::CPSR
);
666 bool IsCPSR
= MO
.isReg() && MO
.isDef() && MO
.getReg() == ARM::CPSR
;
667 if (ClobbersCPSR
|| IsCPSR
) {
669 // Filter out T1 instructions that have a dead CPSR,
670 // allowing IT blocks to be generated containing T1 instructions
671 const MCInstrDesc
&MCID
= MI
.getDesc();
672 if (MCID
.TSFlags
& ARMII::ThumbArithFlagSetting
&& MO
.isDead() &&
684 bool ARMBaseInstrInfo::isCPSRDefined(const MachineInstr
&MI
) {
685 for (const auto &MO
: MI
.operands())
686 if (MO
.isReg() && MO
.getReg() == ARM::CPSR
&& MO
.isDef() && !MO
.isDead())
691 static bool isEligibleForITBlock(const MachineInstr
*MI
) {
692 switch (MI
->getOpcode()) {
693 default: return true;
694 case ARM::tADC
: // ADC (register) T1
695 case ARM::tADDi3
: // ADD (immediate) T1
696 case ARM::tADDi8
: // ADD (immediate) T2
697 case ARM::tADDrr
: // ADD (register) T1
698 case ARM::tAND
: // AND (register) T1
699 case ARM::tASRri
: // ASR (immediate) T1
700 case ARM::tASRrr
: // ASR (register) T1
701 case ARM::tBIC
: // BIC (register) T1
702 case ARM::tEOR
: // EOR (register) T1
703 case ARM::tLSLri
: // LSL (immediate) T1
704 case ARM::tLSLrr
: // LSL (register) T1
705 case ARM::tLSRri
: // LSR (immediate) T1
706 case ARM::tLSRrr
: // LSR (register) T1
707 case ARM::tMUL
: // MUL T1
708 case ARM::tMVN
: // MVN (register) T1
709 case ARM::tORR
: // ORR (register) T1
710 case ARM::tROR
: // ROR (register) T1
711 case ARM::tRSB
: // RSB (immediate) T1
712 case ARM::tSBC
: // SBC (register) T1
713 case ARM::tSUBi3
: // SUB (immediate) T1
714 case ARM::tSUBi8
: // SUB (immediate) T2
715 case ARM::tSUBrr
: // SUB (register) T1
716 return !ARMBaseInstrInfo::isCPSRDefined(*MI
);
720 /// isPredicable - Return true if the specified instruction can be predicated.
721 /// By default, this returns true for every instruction with a
722 /// PredicateOperand.
723 bool ARMBaseInstrInfo::isPredicable(const MachineInstr
&MI
) const {
724 if (!MI
.isPredicable())
730 if (!isEligibleForITBlock(&MI
))
733 const MachineFunction
*MF
= MI
.getParent()->getParent();
734 const ARMFunctionInfo
*AFI
=
735 MF
->getInfo
<ARMFunctionInfo
>();
737 // Neon instructions in Thumb2 IT blocks are deprecated, see ARMARM.
738 // In their ARM encoding, they can't be encoded in a conditional form.
739 if ((MI
.getDesc().TSFlags
& ARMII::DomainMask
) == ARMII::DomainNEON
)
742 // Make indirect control flow changes unpredicable when SLS mitigation is
744 const ARMSubtarget
&ST
= MF
->getSubtarget
<ARMSubtarget
>();
745 if (ST
.hardenSlsRetBr() && isIndirectControlFlowNotComingBack(MI
))
747 if (ST
.hardenSlsBlr() && isIndirectCall(MI
))
750 if (AFI
->isThumb2Function()) {
751 if (getSubtarget().restrictIT())
752 return isV8EligibleForIT(&MI
);
760 template <> bool IsCPSRDead
<MachineInstr
>(const MachineInstr
*MI
) {
761 for (const MachineOperand
&MO
: MI
->operands()) {
762 if (!MO
.isReg() || MO
.isUndef() || MO
.isUse())
764 if (MO
.getReg() != ARM::CPSR
)
769 // all definitions of CPSR are dead
773 } // end namespace llvm
775 /// GetInstSize - Return the size of the specified MachineInstr.
777 unsigned ARMBaseInstrInfo::getInstSizeInBytes(const MachineInstr
&MI
) const {
778 const MachineBasicBlock
&MBB
= *MI
.getParent();
779 const MachineFunction
*MF
= MBB
.getParent();
780 const MCAsmInfo
*MAI
= MF
->getTarget().getMCAsmInfo();
782 const MCInstrDesc
&MCID
= MI
.getDesc();
784 switch (MI
.getOpcode()) {
786 // Return the size specified in .td file. If there's none, return 0, as we
787 // can't define a default size (Thumb1 instructions are 2 bytes, Thumb2
788 // instructions are 2-4 bytes, and ARM instructions are 4 bytes), in
789 // contrast to AArch64 instructions which have a default size of 4 bytes for
791 return MCID
.getSize();
792 case TargetOpcode::BUNDLE
:
793 return getInstBundleLength(MI
);
794 case ARM::CONSTPOOL_ENTRY
:
795 case ARM::JUMPTABLE_INSTS
:
796 case ARM::JUMPTABLE_ADDRS
:
797 case ARM::JUMPTABLE_TBB
:
798 case ARM::JUMPTABLE_TBH
:
799 // If this machine instr is a constant pool entry, its size is recorded as
801 return MI
.getOperand(2).getImm();
803 return MI
.getOperand(1).getImm();
805 case ARM::INLINEASM_BR
: {
806 // If this machine instr is an inline asm, measure it.
807 unsigned Size
= getInlineAsmLength(MI
.getOperand(0).getSymbolName(), *MAI
);
808 if (!MF
->getInfo
<ARMFunctionInfo
>()->isThumbFunction())
809 Size
= alignTo(Size
, 4);
815 unsigned ARMBaseInstrInfo::getInstBundleLength(const MachineInstr
&MI
) const {
817 MachineBasicBlock::const_instr_iterator I
= MI
.getIterator();
818 MachineBasicBlock::const_instr_iterator E
= MI
.getParent()->instr_end();
819 while (++I
!= E
&& I
->isInsideBundle()) {
820 assert(!I
->isBundle() && "No nested bundle!");
821 Size
+= getInstSizeInBytes(*I
);
826 void ARMBaseInstrInfo::copyFromCPSR(MachineBasicBlock
&MBB
,
827 MachineBasicBlock::iterator I
,
828 unsigned DestReg
, bool KillSrc
,
829 const ARMSubtarget
&Subtarget
) const {
830 unsigned Opc
= Subtarget
.isThumb()
831 ? (Subtarget
.isMClass() ? ARM::t2MRS_M
: ARM::t2MRS_AR
)
834 MachineInstrBuilder MIB
=
835 BuildMI(MBB
, I
, I
->getDebugLoc(), get(Opc
), DestReg
);
837 // There is only 1 A/R class MRS instruction, and it always refers to
838 // APSR. However, there are lots of other possibilities on M-class cores.
839 if (Subtarget
.isMClass())
842 MIB
.add(predOps(ARMCC::AL
))
843 .addReg(ARM::CPSR
, RegState::Implicit
| getKillRegState(KillSrc
));
846 void ARMBaseInstrInfo::copyToCPSR(MachineBasicBlock
&MBB
,
847 MachineBasicBlock::iterator I
,
848 unsigned SrcReg
, bool KillSrc
,
849 const ARMSubtarget
&Subtarget
) const {
850 unsigned Opc
= Subtarget
.isThumb()
851 ? (Subtarget
.isMClass() ? ARM::t2MSR_M
: ARM::t2MSR_AR
)
854 MachineInstrBuilder MIB
= BuildMI(MBB
, I
, I
->getDebugLoc(), get(Opc
));
856 if (Subtarget
.isMClass())
861 MIB
.addReg(SrcReg
, getKillRegState(KillSrc
))
862 .add(predOps(ARMCC::AL
))
863 .addReg(ARM::CPSR
, RegState::Implicit
| RegState::Define
);
866 void llvm::addUnpredicatedMveVpredNOp(MachineInstrBuilder
&MIB
) {
867 MIB
.addImm(ARMVCC::None
);
869 MIB
.addReg(0); // tp_reg
872 void llvm::addUnpredicatedMveVpredROp(MachineInstrBuilder
&MIB
,
874 addUnpredicatedMveVpredNOp(MIB
);
875 MIB
.addReg(DestReg
, RegState::Undef
);
878 void llvm::addPredicatedMveVpredNOp(MachineInstrBuilder
&MIB
, unsigned Cond
) {
880 MIB
.addReg(ARM::VPR
, RegState::Implicit
);
881 MIB
.addReg(0); // tp_reg
884 void llvm::addPredicatedMveVpredROp(MachineInstrBuilder
&MIB
,
885 unsigned Cond
, unsigned Inactive
) {
886 addPredicatedMveVpredNOp(MIB
, Cond
);
887 MIB
.addReg(Inactive
);
890 void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock
&MBB
,
891 MachineBasicBlock::iterator I
,
892 const DebugLoc
&DL
, MCRegister DestReg
,
893 MCRegister SrcReg
, bool KillSrc
,
895 bool RenamableSrc
) const {
896 bool GPRDest
= ARM::GPRRegClass
.contains(DestReg
);
897 bool GPRSrc
= ARM::GPRRegClass
.contains(SrcReg
);
899 if (GPRDest
&& GPRSrc
) {
900 BuildMI(MBB
, I
, DL
, get(ARM::MOVr
), DestReg
)
901 .addReg(SrcReg
, getKillRegState(KillSrc
))
902 .add(predOps(ARMCC::AL
))
907 bool SPRDest
= ARM::SPRRegClass
.contains(DestReg
);
908 bool SPRSrc
= ARM::SPRRegClass
.contains(SrcReg
);
911 if (SPRDest
&& SPRSrc
)
913 else if (GPRDest
&& SPRSrc
)
915 else if (SPRDest
&& GPRSrc
)
917 else if (ARM::DPRRegClass
.contains(DestReg
, SrcReg
) && Subtarget
.hasFP64())
919 else if (ARM::QPRRegClass
.contains(DestReg
, SrcReg
))
920 Opc
= Subtarget
.hasNEON() ? ARM::VORRq
: ARM::MQPRCopy
;
923 MachineInstrBuilder MIB
= BuildMI(MBB
, I
, DL
, get(Opc
), DestReg
);
924 MIB
.addReg(SrcReg
, getKillRegState(KillSrc
));
925 if (Opc
== ARM::VORRq
|| Opc
== ARM::MVE_VORR
)
926 MIB
.addReg(SrcReg
, getKillRegState(KillSrc
));
927 if (Opc
== ARM::MVE_VORR
)
928 addUnpredicatedMveVpredROp(MIB
, DestReg
);
929 else if (Opc
!= ARM::MQPRCopy
)
930 MIB
.add(predOps(ARMCC::AL
));
934 // Handle register classes that require multiple instructions.
935 unsigned BeginIdx
= 0;
936 unsigned SubRegs
= 0;
939 // Use VORRq when possible.
940 if (ARM::QQPRRegClass
.contains(DestReg
, SrcReg
)) {
941 Opc
= Subtarget
.hasNEON() ? ARM::VORRq
: ARM::MVE_VORR
;
942 BeginIdx
= ARM::qsub_0
;
944 } else if (ARM::QQQQPRRegClass
.contains(DestReg
, SrcReg
)) {
945 Opc
= Subtarget
.hasNEON() ? ARM::VORRq
: ARM::MVE_VORR
;
946 BeginIdx
= ARM::qsub_0
;
948 // Fall back to VMOVD.
949 } else if (ARM::DPairRegClass
.contains(DestReg
, SrcReg
)) {
951 BeginIdx
= ARM::dsub_0
;
953 } else if (ARM::DTripleRegClass
.contains(DestReg
, SrcReg
)) {
955 BeginIdx
= ARM::dsub_0
;
957 } else if (ARM::DQuadRegClass
.contains(DestReg
, SrcReg
)) {
959 BeginIdx
= ARM::dsub_0
;
961 } else if (ARM::GPRPairRegClass
.contains(DestReg
, SrcReg
)) {
962 Opc
= Subtarget
.isThumb2() ? ARM::tMOVr
: ARM::MOVr
;
963 BeginIdx
= ARM::gsub_0
;
965 } else if (ARM::DPairSpcRegClass
.contains(DestReg
, SrcReg
)) {
967 BeginIdx
= ARM::dsub_0
;
970 } else if (ARM::DTripleSpcRegClass
.contains(DestReg
, SrcReg
)) {
972 BeginIdx
= ARM::dsub_0
;
975 } else if (ARM::DQuadSpcRegClass
.contains(DestReg
, SrcReg
)) {
977 BeginIdx
= ARM::dsub_0
;
980 } else if (ARM::DPRRegClass
.contains(DestReg
, SrcReg
) &&
981 !Subtarget
.hasFP64()) {
983 BeginIdx
= ARM::ssub_0
;
985 } else if (SrcReg
== ARM::CPSR
) {
986 copyFromCPSR(MBB
, I
, DestReg
, KillSrc
, Subtarget
);
988 } else if (DestReg
== ARM::CPSR
) {
989 copyToCPSR(MBB
, I
, SrcReg
, KillSrc
, Subtarget
);
991 } else if (DestReg
== ARM::VPR
) {
992 assert(ARM::GPRRegClass
.contains(SrcReg
));
993 BuildMI(MBB
, I
, I
->getDebugLoc(), get(ARM::VMSR_P0
), DestReg
)
994 .addReg(SrcReg
, getKillRegState(KillSrc
))
995 .add(predOps(ARMCC::AL
));
997 } else if (SrcReg
== ARM::VPR
) {
998 assert(ARM::GPRRegClass
.contains(DestReg
));
999 BuildMI(MBB
, I
, I
->getDebugLoc(), get(ARM::VMRS_P0
), DestReg
)
1000 .addReg(SrcReg
, getKillRegState(KillSrc
))
1001 .add(predOps(ARMCC::AL
));
1003 } else if (DestReg
== ARM::FPSCR_NZCV
) {
1004 assert(ARM::GPRRegClass
.contains(SrcReg
));
1005 BuildMI(MBB
, I
, I
->getDebugLoc(), get(ARM::VMSR_FPSCR_NZCVQC
), DestReg
)
1006 .addReg(SrcReg
, getKillRegState(KillSrc
))
1007 .add(predOps(ARMCC::AL
));
1009 } else if (SrcReg
== ARM::FPSCR_NZCV
) {
1010 assert(ARM::GPRRegClass
.contains(DestReg
));
1011 BuildMI(MBB
, I
, I
->getDebugLoc(), get(ARM::VMRS_FPSCR_NZCVQC
), DestReg
)
1012 .addReg(SrcReg
, getKillRegState(KillSrc
))
1013 .add(predOps(ARMCC::AL
));
1017 assert(Opc
&& "Impossible reg-to-reg copy");
1019 const TargetRegisterInfo
*TRI
= &getRegisterInfo();
1020 MachineInstrBuilder Mov
;
1022 // Copy register tuples backward when the first Dest reg overlaps with SrcReg.
1023 if (TRI
->regsOverlap(SrcReg
, TRI
->getSubReg(DestReg
, BeginIdx
))) {
1024 BeginIdx
= BeginIdx
+ ((SubRegs
- 1) * Spacing
);
1028 SmallSet
<unsigned, 4> DstRegs
;
1030 for (unsigned i
= 0; i
!= SubRegs
; ++i
) {
1031 Register Dst
= TRI
->getSubReg(DestReg
, BeginIdx
+ i
* Spacing
);
1032 Register Src
= TRI
->getSubReg(SrcReg
, BeginIdx
+ i
* Spacing
);
1033 assert(Dst
&& Src
&& "Bad sub-register");
1035 assert(!DstRegs
.count(Src
) && "destructive vector copy");
1036 DstRegs
.insert(Dst
);
1038 Mov
= BuildMI(MBB
, I
, I
->getDebugLoc(), get(Opc
), Dst
).addReg(Src
);
1039 // VORR (NEON or MVE) takes two source operands.
1040 if (Opc
== ARM::VORRq
|| Opc
== ARM::MVE_VORR
) {
1043 // MVE VORR takes predicate operands in place of an ordinary condition.
1044 if (Opc
== ARM::MVE_VORR
)
1045 addUnpredicatedMveVpredROp(Mov
, Dst
);
1047 Mov
= Mov
.add(predOps(ARMCC::AL
));
1049 if (Opc
== ARM::MOVr
)
1050 Mov
= Mov
.add(condCodeOp());
1052 // Add implicit super-register defs and kills to the last instruction.
1053 Mov
->addRegisterDefined(DestReg
, TRI
);
1055 Mov
->addRegisterKilled(SrcReg
, TRI
);
1058 std::optional
<DestSourcePair
>
1059 ARMBaseInstrInfo::isCopyInstrImpl(const MachineInstr
&MI
) const {
1060 // VMOVRRD is also a copy instruction but it requires
1061 // special way of handling. It is more complex copy version
1062 // and since that we are not considering it. For recognition
1063 // of such instruction isExtractSubregLike MI interface fuction
1065 // VORRq is considered as a move only if two inputs are
1066 // the same register.
1067 if (!MI
.isMoveReg() ||
1068 (MI
.getOpcode() == ARM::VORRq
&&
1069 MI
.getOperand(1).getReg() != MI
.getOperand(2).getReg()))
1070 return std::nullopt
;
1071 return DestSourcePair
{MI
.getOperand(0), MI
.getOperand(1)};
1074 std::optional
<ParamLoadedValue
>
1075 ARMBaseInstrInfo::describeLoadedValue(const MachineInstr
&MI
,
1076 Register Reg
) const {
1077 if (auto DstSrcPair
= isCopyInstrImpl(MI
)) {
1078 Register DstReg
= DstSrcPair
->Destination
->getReg();
1080 // TODO: We don't handle cases where the forwarding reg is narrower/wider
1081 // than the copy registers. Consider for example:
1087 // We'd like to describe the call site value of d0 as d8, but this requires
1088 // gathering and merging the descriptions for the two VMOVS instructions.
1090 // We also don't handle the reverse situation, where the forwarding reg is
1091 // narrower than the copy destination:
1096 // We need to produce a fragment description (the call site value of s1 is
1099 return std::nullopt
;
1101 return TargetInstrInfo::describeLoadedValue(MI
, Reg
);
1104 const MachineInstrBuilder
&
1105 ARMBaseInstrInfo::AddDReg(MachineInstrBuilder
&MIB
, unsigned Reg
,
1106 unsigned SubIdx
, unsigned State
,
1107 const TargetRegisterInfo
*TRI
) const {
1109 return MIB
.addReg(Reg
, State
);
1111 if (Register::isPhysicalRegister(Reg
))
1112 return MIB
.addReg(TRI
->getSubReg(Reg
, SubIdx
), State
);
1113 return MIB
.addReg(Reg
, State
, SubIdx
);
1116 void ARMBaseInstrInfo::storeRegToStackSlot(MachineBasicBlock
&MBB
,
1117 MachineBasicBlock::iterator I
,
1118 Register SrcReg
, bool isKill
, int FI
,
1119 const TargetRegisterClass
*RC
,
1120 const TargetRegisterInfo
*TRI
,
1121 Register VReg
) const {
1122 MachineFunction
&MF
= *MBB
.getParent();
1123 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
1124 Align Alignment
= MFI
.getObjectAlign(FI
);
1126 MachineMemOperand
*MMO
= MF
.getMachineMemOperand(
1127 MachinePointerInfo::getFixedStack(MF
, FI
), MachineMemOperand::MOStore
,
1128 MFI
.getObjectSize(FI
), Alignment
);
1130 switch (TRI
->getSpillSize(*RC
)) {
1132 if (ARM::HPRRegClass
.hasSubClassEq(RC
)) {
1133 BuildMI(MBB
, I
, DebugLoc(), get(ARM::VSTRH
))
1134 .addReg(SrcReg
, getKillRegState(isKill
))
1138 .add(predOps(ARMCC::AL
));
1140 llvm_unreachable("Unknown reg class!");
1143 if (ARM::GPRRegClass
.hasSubClassEq(RC
)) {
1144 BuildMI(MBB
, I
, DebugLoc(), get(ARM::STRi12
))
1145 .addReg(SrcReg
, getKillRegState(isKill
))
1149 .add(predOps(ARMCC::AL
));
1150 } else if (ARM::SPRRegClass
.hasSubClassEq(RC
)) {
1151 BuildMI(MBB
, I
, DebugLoc(), get(ARM::VSTRS
))
1152 .addReg(SrcReg
, getKillRegState(isKill
))
1156 .add(predOps(ARMCC::AL
));
1157 } else if (ARM::VCCRRegClass
.hasSubClassEq(RC
)) {
1158 BuildMI(MBB
, I
, DebugLoc(), get(ARM::VSTR_P0_off
))
1159 .addReg(SrcReg
, getKillRegState(isKill
))
1163 .add(predOps(ARMCC::AL
));
1164 } else if (ARM::cl_FPSCR_NZCVRegClass
.hasSubClassEq(RC
)) {
1165 BuildMI(MBB
, I
, DebugLoc(), get(ARM::VSTR_FPSCR_NZCVQC_off
))
1166 .addReg(SrcReg
, getKillRegState(isKill
))
1170 .add(predOps(ARMCC::AL
));
1172 llvm_unreachable("Unknown reg class!");
1175 if (ARM::DPRRegClass
.hasSubClassEq(RC
)) {
1176 BuildMI(MBB
, I
, DebugLoc(), get(ARM::VSTRD
))
1177 .addReg(SrcReg
, getKillRegState(isKill
))
1181 .add(predOps(ARMCC::AL
));
1182 } else if (ARM::GPRPairRegClass
.hasSubClassEq(RC
)) {
1183 if (Subtarget
.hasV5TEOps()) {
1184 MachineInstrBuilder MIB
= BuildMI(MBB
, I
, DebugLoc(), get(ARM::STRD
));
1185 AddDReg(MIB
, SrcReg
, ARM::gsub_0
, getKillRegState(isKill
), TRI
);
1186 AddDReg(MIB
, SrcReg
, ARM::gsub_1
, 0, TRI
);
1187 MIB
.addFrameIndex(FI
).addReg(0).addImm(0).addMemOperand(MMO
)
1188 .add(predOps(ARMCC::AL
));
1190 // Fallback to STM instruction, which has existed since the dawn of
1192 MachineInstrBuilder MIB
= BuildMI(MBB
, I
, DebugLoc(), get(ARM::STMIA
))
1195 .add(predOps(ARMCC::AL
));
1196 AddDReg(MIB
, SrcReg
, ARM::gsub_0
, getKillRegState(isKill
), TRI
);
1197 AddDReg(MIB
, SrcReg
, ARM::gsub_1
, 0, TRI
);
1200 llvm_unreachable("Unknown reg class!");
1203 if (ARM::DPairRegClass
.hasSubClassEq(RC
) && Subtarget
.hasNEON()) {
1204 // Use aligned spills if the stack can be realigned.
1205 if (Alignment
>= 16 && getRegisterInfo().canRealignStack(MF
)) {
1206 BuildMI(MBB
, I
, DebugLoc(), get(ARM::VST1q64
))
1209 .addReg(SrcReg
, getKillRegState(isKill
))
1211 .add(predOps(ARMCC::AL
));
1213 BuildMI(MBB
, I
, DebugLoc(), get(ARM::VSTMQIA
))
1214 .addReg(SrcReg
, getKillRegState(isKill
))
1217 .add(predOps(ARMCC::AL
));
1219 } else if (ARM::QPRRegClass
.hasSubClassEq(RC
) &&
1220 Subtarget
.hasMVEIntegerOps()) {
1221 auto MIB
= BuildMI(MBB
, I
, DebugLoc(), get(ARM::MVE_VSTRWU32
));
1222 MIB
.addReg(SrcReg
, getKillRegState(isKill
))
1225 .addMemOperand(MMO
);
1226 addUnpredicatedMveVpredNOp(MIB
);
1228 llvm_unreachable("Unknown reg class!");
1231 if (ARM::DTripleRegClass
.hasSubClassEq(RC
)) {
1232 // Use aligned spills if the stack can be realigned.
1233 if (Alignment
>= 16 && getRegisterInfo().canRealignStack(MF
) &&
1234 Subtarget
.hasNEON()) {
1235 BuildMI(MBB
, I
, DebugLoc(), get(ARM::VST1d64TPseudo
))
1238 .addReg(SrcReg
, getKillRegState(isKill
))
1240 .add(predOps(ARMCC::AL
));
1242 MachineInstrBuilder MIB
= BuildMI(MBB
, I
, DebugLoc(),
1245 .add(predOps(ARMCC::AL
))
1246 .addMemOperand(MMO
);
1247 MIB
= AddDReg(MIB
, SrcReg
, ARM::dsub_0
, getKillRegState(isKill
), TRI
);
1248 MIB
= AddDReg(MIB
, SrcReg
, ARM::dsub_1
, 0, TRI
);
1249 AddDReg(MIB
, SrcReg
, ARM::dsub_2
, 0, TRI
);
1252 llvm_unreachable("Unknown reg class!");
1255 if (ARM::QQPRRegClass
.hasSubClassEq(RC
) ||
1256 ARM::MQQPRRegClass
.hasSubClassEq(RC
) ||
1257 ARM::DQuadRegClass
.hasSubClassEq(RC
)) {
1258 if (Alignment
>= 16 && getRegisterInfo().canRealignStack(MF
) &&
1259 Subtarget
.hasNEON()) {
1260 // FIXME: It's possible to only store part of the QQ register if the
1261 // spilled def has a sub-register index.
1262 BuildMI(MBB
, I
, DebugLoc(), get(ARM::VST1d64QPseudo
))
1265 .addReg(SrcReg
, getKillRegState(isKill
))
1267 .add(predOps(ARMCC::AL
));
1268 } else if (Subtarget
.hasMVEIntegerOps()) {
1269 BuildMI(MBB
, I
, DebugLoc(), get(ARM::MQQPRStore
))
1270 .addReg(SrcReg
, getKillRegState(isKill
))
1272 .addMemOperand(MMO
);
1274 MachineInstrBuilder MIB
= BuildMI(MBB
, I
, DebugLoc(),
1277 .add(predOps(ARMCC::AL
))
1278 .addMemOperand(MMO
);
1279 MIB
= AddDReg(MIB
, SrcReg
, ARM::dsub_0
, getKillRegState(isKill
), TRI
);
1280 MIB
= AddDReg(MIB
, SrcReg
, ARM::dsub_1
, 0, TRI
);
1281 MIB
= AddDReg(MIB
, SrcReg
, ARM::dsub_2
, 0, TRI
);
1282 AddDReg(MIB
, SrcReg
, ARM::dsub_3
, 0, TRI
);
1285 llvm_unreachable("Unknown reg class!");
1288 if (ARM::MQQQQPRRegClass
.hasSubClassEq(RC
) &&
1289 Subtarget
.hasMVEIntegerOps()) {
1290 BuildMI(MBB
, I
, DebugLoc(), get(ARM::MQQQQPRStore
))
1291 .addReg(SrcReg
, getKillRegState(isKill
))
1293 .addMemOperand(MMO
);
1294 } else if (ARM::QQQQPRRegClass
.hasSubClassEq(RC
)) {
1295 MachineInstrBuilder MIB
= BuildMI(MBB
, I
, DebugLoc(), get(ARM::VSTMDIA
))
1297 .add(predOps(ARMCC::AL
))
1298 .addMemOperand(MMO
);
1299 MIB
= AddDReg(MIB
, SrcReg
, ARM::dsub_0
, getKillRegState(isKill
), TRI
);
1300 MIB
= AddDReg(MIB
, SrcReg
, ARM::dsub_1
, 0, TRI
);
1301 MIB
= AddDReg(MIB
, SrcReg
, ARM::dsub_2
, 0, TRI
);
1302 MIB
= AddDReg(MIB
, SrcReg
, ARM::dsub_3
, 0, TRI
);
1303 MIB
= AddDReg(MIB
, SrcReg
, ARM::dsub_4
, 0, TRI
);
1304 MIB
= AddDReg(MIB
, SrcReg
, ARM::dsub_5
, 0, TRI
);
1305 MIB
= AddDReg(MIB
, SrcReg
, ARM::dsub_6
, 0, TRI
);
1306 AddDReg(MIB
, SrcReg
, ARM::dsub_7
, 0, TRI
);
1308 llvm_unreachable("Unknown reg class!");
1311 llvm_unreachable("Unknown reg class!");
1315 Register
ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr
&MI
,
1316 int &FrameIndex
) const {
1317 switch (MI
.getOpcode()) {
1320 case ARM::t2STRs
: // FIXME: don't use t2STRs to access frame.
1321 if (MI
.getOperand(1).isFI() && MI
.getOperand(2).isReg() &&
1322 MI
.getOperand(3).isImm() && MI
.getOperand(2).getReg() == 0 &&
1323 MI
.getOperand(3).getImm() == 0) {
1324 FrameIndex
= MI
.getOperand(1).getIndex();
1325 return MI
.getOperand(0).getReg();
1334 case ARM::VSTR_P0_off
:
1335 case ARM::VSTR_FPSCR_NZCVQC_off
:
1336 case ARM::MVE_VSTRWU32
:
1337 if (MI
.getOperand(1).isFI() && MI
.getOperand(2).isImm() &&
1338 MI
.getOperand(2).getImm() == 0) {
1339 FrameIndex
= MI
.getOperand(1).getIndex();
1340 return MI
.getOperand(0).getReg();
1344 case ARM::VST1d64TPseudo
:
1345 case ARM::VST1d64QPseudo
:
1346 if (MI
.getOperand(0).isFI() && MI
.getOperand(2).getSubReg() == 0) {
1347 FrameIndex
= MI
.getOperand(0).getIndex();
1348 return MI
.getOperand(2).getReg();
1352 if (MI
.getOperand(1).isFI() && MI
.getOperand(0).getSubReg() == 0) {
1353 FrameIndex
= MI
.getOperand(1).getIndex();
1354 return MI
.getOperand(0).getReg();
1357 case ARM::MQQPRStore
:
1358 case ARM::MQQQQPRStore
:
1359 if (MI
.getOperand(1).isFI()) {
1360 FrameIndex
= MI
.getOperand(1).getIndex();
1361 return MI
.getOperand(0).getReg();
1369 Register
ARMBaseInstrInfo::isStoreToStackSlotPostFE(const MachineInstr
&MI
,
1370 int &FrameIndex
) const {
1371 SmallVector
<const MachineMemOperand
*, 1> Accesses
;
1372 if (MI
.mayStore() && hasStoreToStackSlot(MI
, Accesses
) &&
1373 Accesses
.size() == 1) {
1375 cast
<FixedStackPseudoSourceValue
>(Accesses
.front()->getPseudoValue())
1382 void ARMBaseInstrInfo::loadRegFromStackSlot(MachineBasicBlock
&MBB
,
1383 MachineBasicBlock::iterator I
,
1384 Register DestReg
, int FI
,
1385 const TargetRegisterClass
*RC
,
1386 const TargetRegisterInfo
*TRI
,
1387 Register VReg
) const {
1389 if (I
!= MBB
.end()) DL
= I
->getDebugLoc();
1390 MachineFunction
&MF
= *MBB
.getParent();
1391 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
1392 const Align Alignment
= MFI
.getObjectAlign(FI
);
1393 MachineMemOperand
*MMO
= MF
.getMachineMemOperand(
1394 MachinePointerInfo::getFixedStack(MF
, FI
), MachineMemOperand::MOLoad
,
1395 MFI
.getObjectSize(FI
), Alignment
);
1397 switch (TRI
->getSpillSize(*RC
)) {
1399 if (ARM::HPRRegClass
.hasSubClassEq(RC
)) {
1400 BuildMI(MBB
, I
, DL
, get(ARM::VLDRH
), DestReg
)
1404 .add(predOps(ARMCC::AL
));
1406 llvm_unreachable("Unknown reg class!");
1409 if (ARM::GPRRegClass
.hasSubClassEq(RC
)) {
1410 BuildMI(MBB
, I
, DL
, get(ARM::LDRi12
), DestReg
)
1414 .add(predOps(ARMCC::AL
));
1415 } else if (ARM::SPRRegClass
.hasSubClassEq(RC
)) {
1416 BuildMI(MBB
, I
, DL
, get(ARM::VLDRS
), DestReg
)
1420 .add(predOps(ARMCC::AL
));
1421 } else if (ARM::VCCRRegClass
.hasSubClassEq(RC
)) {
1422 BuildMI(MBB
, I
, DL
, get(ARM::VLDR_P0_off
), DestReg
)
1426 .add(predOps(ARMCC::AL
));
1427 } else if (ARM::cl_FPSCR_NZCVRegClass
.hasSubClassEq(RC
)) {
1428 BuildMI(MBB
, I
, DL
, get(ARM::VLDR_FPSCR_NZCVQC_off
), DestReg
)
1432 .add(predOps(ARMCC::AL
));
1434 llvm_unreachable("Unknown reg class!");
1437 if (ARM::DPRRegClass
.hasSubClassEq(RC
)) {
1438 BuildMI(MBB
, I
, DL
, get(ARM::VLDRD
), DestReg
)
1442 .add(predOps(ARMCC::AL
));
1443 } else if (ARM::GPRPairRegClass
.hasSubClassEq(RC
)) {
1444 MachineInstrBuilder MIB
;
1446 if (Subtarget
.hasV5TEOps()) {
1447 MIB
= BuildMI(MBB
, I
, DL
, get(ARM::LDRD
));
1448 AddDReg(MIB
, DestReg
, ARM::gsub_0
, RegState::DefineNoRead
, TRI
);
1449 AddDReg(MIB
, DestReg
, ARM::gsub_1
, RegState::DefineNoRead
, TRI
);
1450 MIB
.addFrameIndex(FI
).addReg(0).addImm(0).addMemOperand(MMO
)
1451 .add(predOps(ARMCC::AL
));
1453 // Fallback to LDM instruction, which has existed since the dawn of
1455 MIB
= BuildMI(MBB
, I
, DL
, get(ARM::LDMIA
))
1458 .add(predOps(ARMCC::AL
));
1459 MIB
= AddDReg(MIB
, DestReg
, ARM::gsub_0
, RegState::DefineNoRead
, TRI
);
1460 MIB
= AddDReg(MIB
, DestReg
, ARM::gsub_1
, RegState::DefineNoRead
, TRI
);
1463 if (DestReg
.isPhysical())
1464 MIB
.addReg(DestReg
, RegState::ImplicitDefine
);
1466 llvm_unreachable("Unknown reg class!");
1469 if (ARM::DPairRegClass
.hasSubClassEq(RC
) && Subtarget
.hasNEON()) {
1470 if (Alignment
>= 16 && getRegisterInfo().canRealignStack(MF
)) {
1471 BuildMI(MBB
, I
, DL
, get(ARM::VLD1q64
), DestReg
)
1475 .add(predOps(ARMCC::AL
));
1477 BuildMI(MBB
, I
, DL
, get(ARM::VLDMQIA
), DestReg
)
1480 .add(predOps(ARMCC::AL
));
1482 } else if (ARM::QPRRegClass
.hasSubClassEq(RC
) &&
1483 Subtarget
.hasMVEIntegerOps()) {
1484 auto MIB
= BuildMI(MBB
, I
, DL
, get(ARM::MVE_VLDRWU32
), DestReg
);
1485 MIB
.addFrameIndex(FI
)
1487 .addMemOperand(MMO
);
1488 addUnpredicatedMveVpredNOp(MIB
);
1490 llvm_unreachable("Unknown reg class!");
1493 if (ARM::DTripleRegClass
.hasSubClassEq(RC
)) {
1494 if (Alignment
>= 16 && getRegisterInfo().canRealignStack(MF
) &&
1495 Subtarget
.hasNEON()) {
1496 BuildMI(MBB
, I
, DL
, get(ARM::VLD1d64TPseudo
), DestReg
)
1500 .add(predOps(ARMCC::AL
));
1502 MachineInstrBuilder MIB
= BuildMI(MBB
, I
, DL
, get(ARM::VLDMDIA
))
1505 .add(predOps(ARMCC::AL
));
1506 MIB
= AddDReg(MIB
, DestReg
, ARM::dsub_0
, RegState::DefineNoRead
, TRI
);
1507 MIB
= AddDReg(MIB
, DestReg
, ARM::dsub_1
, RegState::DefineNoRead
, TRI
);
1508 MIB
= AddDReg(MIB
, DestReg
, ARM::dsub_2
, RegState::DefineNoRead
, TRI
);
1509 if (DestReg
.isPhysical())
1510 MIB
.addReg(DestReg
, RegState::ImplicitDefine
);
1513 llvm_unreachable("Unknown reg class!");
1516 if (ARM::QQPRRegClass
.hasSubClassEq(RC
) ||
1517 ARM::MQQPRRegClass
.hasSubClassEq(RC
) ||
1518 ARM::DQuadRegClass
.hasSubClassEq(RC
)) {
1519 if (Alignment
>= 16 && getRegisterInfo().canRealignStack(MF
) &&
1520 Subtarget
.hasNEON()) {
1521 BuildMI(MBB
, I
, DL
, get(ARM::VLD1d64QPseudo
), DestReg
)
1525 .add(predOps(ARMCC::AL
));
1526 } else if (Subtarget
.hasMVEIntegerOps()) {
1527 BuildMI(MBB
, I
, DL
, get(ARM::MQQPRLoad
), DestReg
)
1529 .addMemOperand(MMO
);
1531 MachineInstrBuilder MIB
= BuildMI(MBB
, I
, DL
, get(ARM::VLDMDIA
))
1533 .add(predOps(ARMCC::AL
))
1534 .addMemOperand(MMO
);
1535 MIB
= AddDReg(MIB
, DestReg
, ARM::dsub_0
, RegState::DefineNoRead
, TRI
);
1536 MIB
= AddDReg(MIB
, DestReg
, ARM::dsub_1
, RegState::DefineNoRead
, TRI
);
1537 MIB
= AddDReg(MIB
, DestReg
, ARM::dsub_2
, RegState::DefineNoRead
, TRI
);
1538 MIB
= AddDReg(MIB
, DestReg
, ARM::dsub_3
, RegState::DefineNoRead
, TRI
);
1539 if (DestReg
.isPhysical())
1540 MIB
.addReg(DestReg
, RegState::ImplicitDefine
);
1543 llvm_unreachable("Unknown reg class!");
1546 if (ARM::MQQQQPRRegClass
.hasSubClassEq(RC
) &&
1547 Subtarget
.hasMVEIntegerOps()) {
1548 BuildMI(MBB
, I
, DL
, get(ARM::MQQQQPRLoad
), DestReg
)
1550 .addMemOperand(MMO
);
1551 } else if (ARM::QQQQPRRegClass
.hasSubClassEq(RC
)) {
1552 MachineInstrBuilder MIB
= BuildMI(MBB
, I
, DL
, get(ARM::VLDMDIA
))
1554 .add(predOps(ARMCC::AL
))
1555 .addMemOperand(MMO
);
1556 MIB
= AddDReg(MIB
, DestReg
, ARM::dsub_0
, RegState::DefineNoRead
, TRI
);
1557 MIB
= AddDReg(MIB
, DestReg
, ARM::dsub_1
, RegState::DefineNoRead
, TRI
);
1558 MIB
= AddDReg(MIB
, DestReg
, ARM::dsub_2
, RegState::DefineNoRead
, TRI
);
1559 MIB
= AddDReg(MIB
, DestReg
, ARM::dsub_3
, RegState::DefineNoRead
, TRI
);
1560 MIB
= AddDReg(MIB
, DestReg
, ARM::dsub_4
, RegState::DefineNoRead
, TRI
);
1561 MIB
= AddDReg(MIB
, DestReg
, ARM::dsub_5
, RegState::DefineNoRead
, TRI
);
1562 MIB
= AddDReg(MIB
, DestReg
, ARM::dsub_6
, RegState::DefineNoRead
, TRI
);
1563 MIB
= AddDReg(MIB
, DestReg
, ARM::dsub_7
, RegState::DefineNoRead
, TRI
);
1564 if (DestReg
.isPhysical())
1565 MIB
.addReg(DestReg
, RegState::ImplicitDefine
);
1567 llvm_unreachable("Unknown reg class!");
1570 llvm_unreachable("Unknown regclass!");
1574 Register
ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr
&MI
,
1575 int &FrameIndex
) const {
1576 switch (MI
.getOpcode()) {
1579 case ARM::t2LDRs
: // FIXME: don't use t2LDRs to access frame.
1580 if (MI
.getOperand(1).isFI() && MI
.getOperand(2).isReg() &&
1581 MI
.getOperand(3).isImm() && MI
.getOperand(2).getReg() == 0 &&
1582 MI
.getOperand(3).getImm() == 0) {
1583 FrameIndex
= MI
.getOperand(1).getIndex();
1584 return MI
.getOperand(0).getReg();
1593 case ARM::VLDR_P0_off
:
1594 case ARM::VLDR_FPSCR_NZCVQC_off
:
1595 case ARM::MVE_VLDRWU32
:
1596 if (MI
.getOperand(1).isFI() && MI
.getOperand(2).isImm() &&
1597 MI
.getOperand(2).getImm() == 0) {
1598 FrameIndex
= MI
.getOperand(1).getIndex();
1599 return MI
.getOperand(0).getReg();
1603 case ARM::VLD1d8TPseudo
:
1604 case ARM::VLD1d16TPseudo
:
1605 case ARM::VLD1d32TPseudo
:
1606 case ARM::VLD1d64TPseudo
:
1607 case ARM::VLD1d8QPseudo
:
1608 case ARM::VLD1d16QPseudo
:
1609 case ARM::VLD1d32QPseudo
:
1610 case ARM::VLD1d64QPseudo
:
1611 if (MI
.getOperand(1).isFI() && MI
.getOperand(0).getSubReg() == 0) {
1612 FrameIndex
= MI
.getOperand(1).getIndex();
1613 return MI
.getOperand(0).getReg();
1617 if (MI
.getOperand(1).isFI() && MI
.getOperand(0).getSubReg() == 0) {
1618 FrameIndex
= MI
.getOperand(1).getIndex();
1619 return MI
.getOperand(0).getReg();
1622 case ARM::MQQPRLoad
:
1623 case ARM::MQQQQPRLoad
:
1624 if (MI
.getOperand(1).isFI()) {
1625 FrameIndex
= MI
.getOperand(1).getIndex();
1626 return MI
.getOperand(0).getReg();
1634 Register
ARMBaseInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr
&MI
,
1635 int &FrameIndex
) const {
1636 SmallVector
<const MachineMemOperand
*, 1> Accesses
;
1637 if (MI
.mayLoad() && hasLoadFromStackSlot(MI
, Accesses
) &&
1638 Accesses
.size() == 1) {
1640 cast
<FixedStackPseudoSourceValue
>(Accesses
.front()->getPseudoValue())
1647 /// Expands MEMCPY to either LDMIA/STMIA or LDMIA_UPD/STMID_UPD
1648 /// depending on whether the result is used.
1649 void ARMBaseInstrInfo::expandMEMCPY(MachineBasicBlock::iterator MI
) const {
1650 bool isThumb1
= Subtarget
.isThumb1Only();
1651 bool isThumb2
= Subtarget
.isThumb2();
1652 const ARMBaseInstrInfo
*TII
= Subtarget
.getInstrInfo();
1654 DebugLoc dl
= MI
->getDebugLoc();
1655 MachineBasicBlock
*BB
= MI
->getParent();
1657 MachineInstrBuilder LDM
, STM
;
1658 if (isThumb1
|| !MI
->getOperand(1).isDead()) {
1659 MachineOperand
LDWb(MI
->getOperand(1));
1660 LDM
= BuildMI(*BB
, MI
, dl
, TII
->get(isThumb2
? ARM::t2LDMIA_UPD
1661 : isThumb1
? ARM::tLDMIA_UPD
1665 LDM
= BuildMI(*BB
, MI
, dl
, TII
->get(isThumb2
? ARM::t2LDMIA
: ARM::LDMIA
));
1668 if (isThumb1
|| !MI
->getOperand(0).isDead()) {
1669 MachineOperand
STWb(MI
->getOperand(0));
1670 STM
= BuildMI(*BB
, MI
, dl
, TII
->get(isThumb2
? ARM::t2STMIA_UPD
1671 : isThumb1
? ARM::tSTMIA_UPD
1675 STM
= BuildMI(*BB
, MI
, dl
, TII
->get(isThumb2
? ARM::t2STMIA
: ARM::STMIA
));
1678 MachineOperand
LDBase(MI
->getOperand(3));
1679 LDM
.add(LDBase
).add(predOps(ARMCC::AL
));
1681 MachineOperand
STBase(MI
->getOperand(2));
1682 STM
.add(STBase
).add(predOps(ARMCC::AL
));
1684 // Sort the scratch registers into ascending order.
1685 const TargetRegisterInfo
&TRI
= getRegisterInfo();
1686 SmallVector
<unsigned, 6> ScratchRegs
;
1687 for (MachineOperand
&MO
: llvm::drop_begin(MI
->operands(), 5))
1688 ScratchRegs
.push_back(MO
.getReg());
1689 llvm::sort(ScratchRegs
,
1690 [&TRI
](const unsigned &Reg1
, const unsigned &Reg2
) -> bool {
1691 return TRI
.getEncodingValue(Reg1
) <
1692 TRI
.getEncodingValue(Reg2
);
1695 for (const auto &Reg
: ScratchRegs
) {
1696 LDM
.addReg(Reg
, RegState::Define
);
1697 STM
.addReg(Reg
, RegState::Kill
);
1703 bool ARMBaseInstrInfo::expandPostRAPseudo(MachineInstr
&MI
) const {
1704 if (MI
.getOpcode() == TargetOpcode::LOAD_STACK_GUARD
) {
1705 expandLoadStackGuard(MI
);
1706 MI
.getParent()->erase(MI
);
1710 if (MI
.getOpcode() == ARM::MEMCPY
) {
1715 // This hook gets to expand COPY instructions before they become
1716 // copyPhysReg() calls. Look for VMOVS instructions that can legally be
1717 // widened to VMOVD. We prefer the VMOVD when possible because it may be
1718 // changed into a VORR that can go down the NEON pipeline.
1719 if (!MI
.isCopy() || Subtarget
.dontWidenVMOVS() || !Subtarget
.hasFP64())
1722 // Look for a copy between even S-registers. That is where we keep floats
1723 // when using NEON v2f32 instructions for f32 arithmetic.
1724 Register DstRegS
= MI
.getOperand(0).getReg();
1725 Register SrcRegS
= MI
.getOperand(1).getReg();
1726 if (!ARM::SPRRegClass
.contains(DstRegS
, SrcRegS
))
1729 const TargetRegisterInfo
*TRI
= &getRegisterInfo();
1730 unsigned DstRegD
= TRI
->getMatchingSuperReg(DstRegS
, ARM::ssub_0
,
1732 unsigned SrcRegD
= TRI
->getMatchingSuperReg(SrcRegS
, ARM::ssub_0
,
1734 if (!DstRegD
|| !SrcRegD
)
1737 // We want to widen this into a DstRegD = VMOVD SrcRegD copy. This is only
1738 // legal if the COPY already defines the full DstRegD, and it isn't a
1739 // sub-register insertion.
1740 if (!MI
.definesRegister(DstRegD
, TRI
) || MI
.readsRegister(DstRegD
, TRI
))
1743 // A dead copy shouldn't show up here, but reject it just in case.
1744 if (MI
.getOperand(0).isDead())
1747 // All clear, widen the COPY.
1748 LLVM_DEBUG(dbgs() << "widening: " << MI
);
1749 MachineInstrBuilder
MIB(*MI
.getParent()->getParent(), MI
);
1751 // Get rid of the old implicit-def of DstRegD. Leave it if it defines a Q-reg
1752 // or some other super-register.
1753 int ImpDefIdx
= MI
.findRegisterDefOperandIdx(DstRegD
, /*TRI=*/nullptr);
1754 if (ImpDefIdx
!= -1)
1755 MI
.removeOperand(ImpDefIdx
);
1757 // Change the opcode and operands.
1758 MI
.setDesc(get(ARM::VMOVD
));
1759 MI
.getOperand(0).setReg(DstRegD
);
1760 MI
.getOperand(1).setReg(SrcRegD
);
1761 MIB
.add(predOps(ARMCC::AL
));
1763 // We are now reading SrcRegD instead of SrcRegS. This may upset the
1764 // register scavenger and machine verifier, so we need to indicate that we
1765 // are reading an undefined value from SrcRegD, but a proper value from
1767 MI
.getOperand(1).setIsUndef();
1768 MIB
.addReg(SrcRegS
, RegState::Implicit
);
1770 // SrcRegD may actually contain an unrelated value in the ssub_1
1771 // sub-register. Don't kill it. Only kill the ssub_0 sub-register.
1772 if (MI
.getOperand(1).isKill()) {
1773 MI
.getOperand(1).setIsKill(false);
1774 MI
.addRegisterKilled(SrcRegS
, TRI
, true);
1777 LLVM_DEBUG(dbgs() << "replaced by: " << MI
);
1781 /// Create a copy of a const pool value. Update CPI to the new index and return
1783 static unsigned duplicateCPV(MachineFunction
&MF
, unsigned &CPI
) {
1784 MachineConstantPool
*MCP
= MF
.getConstantPool();
1785 ARMFunctionInfo
*AFI
= MF
.getInfo
<ARMFunctionInfo
>();
1787 const MachineConstantPoolEntry
&MCPE
= MCP
->getConstants()[CPI
];
1788 assert(MCPE
.isMachineConstantPoolEntry() &&
1789 "Expecting a machine constantpool entry!");
1790 ARMConstantPoolValue
*ACPV
=
1791 static_cast<ARMConstantPoolValue
*>(MCPE
.Val
.MachineCPVal
);
1793 unsigned PCLabelId
= AFI
->createPICLabelUId();
1794 ARMConstantPoolValue
*NewCPV
= nullptr;
1796 // FIXME: The below assumes PIC relocation model and that the function
1797 // is Thumb mode (t1 or t2). PCAdjustment would be 8 for ARM mode PIC, and
1798 // zero for non-PIC in ARM or Thumb. The callers are all of thumb LDR
1799 // instructions, so that's probably OK, but is PIC always correct when
1801 if (ACPV
->isGlobalValue())
1802 NewCPV
= ARMConstantPoolConstant::Create(
1803 cast
<ARMConstantPoolConstant
>(ACPV
)->getGV(), PCLabelId
, ARMCP::CPValue
,
1804 4, ACPV
->getModifier(), ACPV
->mustAddCurrentAddress());
1805 else if (ACPV
->isExtSymbol())
1806 NewCPV
= ARMConstantPoolSymbol::
1807 Create(MF
.getFunction().getContext(),
1808 cast
<ARMConstantPoolSymbol
>(ACPV
)->getSymbol(), PCLabelId
, 4);
1809 else if (ACPV
->isBlockAddress())
1810 NewCPV
= ARMConstantPoolConstant::
1811 Create(cast
<ARMConstantPoolConstant
>(ACPV
)->getBlockAddress(), PCLabelId
,
1812 ARMCP::CPBlockAddress
, 4);
1813 else if (ACPV
->isLSDA())
1814 NewCPV
= ARMConstantPoolConstant::Create(&MF
.getFunction(), PCLabelId
,
1816 else if (ACPV
->isMachineBasicBlock())
1817 NewCPV
= ARMConstantPoolMBB::
1818 Create(MF
.getFunction().getContext(),
1819 cast
<ARMConstantPoolMBB
>(ACPV
)->getMBB(), PCLabelId
, 4);
1821 llvm_unreachable("Unexpected ARM constantpool value type!!");
1822 CPI
= MCP
->getConstantPoolIndex(NewCPV
, MCPE
.getAlign());
1826 void ARMBaseInstrInfo::reMaterialize(MachineBasicBlock
&MBB
,
1827 MachineBasicBlock::iterator I
,
1828 Register DestReg
, unsigned SubIdx
,
1829 const MachineInstr
&Orig
,
1830 const TargetRegisterInfo
&TRI
) const {
1831 unsigned Opcode
= Orig
.getOpcode();
1834 MachineInstr
*MI
= MBB
.getParent()->CloneMachineInstr(&Orig
);
1835 MI
->substituteRegister(Orig
.getOperand(0).getReg(), DestReg
, SubIdx
, TRI
);
1839 case ARM::tLDRpci_pic
:
1840 case ARM::t2LDRpci_pic
: {
1841 MachineFunction
&MF
= *MBB
.getParent();
1842 unsigned CPI
= Orig
.getOperand(1).getIndex();
1843 unsigned PCLabelId
= duplicateCPV(MF
, CPI
);
1844 BuildMI(MBB
, I
, Orig
.getDebugLoc(), get(Opcode
), DestReg
)
1845 .addConstantPoolIndex(CPI
)
1847 .cloneMemRefs(Orig
);
1854 ARMBaseInstrInfo::duplicate(MachineBasicBlock
&MBB
,
1855 MachineBasicBlock::iterator InsertBefore
,
1856 const MachineInstr
&Orig
) const {
1857 MachineInstr
&Cloned
= TargetInstrInfo::duplicate(MBB
, InsertBefore
, Orig
);
1858 MachineBasicBlock::instr_iterator I
= Cloned
.getIterator();
1860 switch (I
->getOpcode()) {
1861 case ARM::tLDRpci_pic
:
1862 case ARM::t2LDRpci_pic
: {
1863 MachineFunction
&MF
= *MBB
.getParent();
1864 unsigned CPI
= I
->getOperand(1).getIndex();
1865 unsigned PCLabelId
= duplicateCPV(MF
, CPI
);
1866 I
->getOperand(1).setIndex(CPI
);
1867 I
->getOperand(2).setImm(PCLabelId
);
1871 if (!I
->isBundledWithSucc())
1878 bool ARMBaseInstrInfo::produceSameValue(const MachineInstr
&MI0
,
1879 const MachineInstr
&MI1
,
1880 const MachineRegisterInfo
*MRI
) const {
1881 unsigned Opcode
= MI0
.getOpcode();
1882 if (Opcode
== ARM::t2LDRpci
|| Opcode
== ARM::t2LDRpci_pic
||
1883 Opcode
== ARM::tLDRpci
|| Opcode
== ARM::tLDRpci_pic
||
1884 Opcode
== ARM::LDRLIT_ga_pcrel
|| Opcode
== ARM::LDRLIT_ga_pcrel_ldr
||
1885 Opcode
== ARM::tLDRLIT_ga_pcrel
|| Opcode
== ARM::t2LDRLIT_ga_pcrel
||
1886 Opcode
== ARM::MOV_ga_pcrel
|| Opcode
== ARM::MOV_ga_pcrel_ldr
||
1887 Opcode
== ARM::t2MOV_ga_pcrel
) {
1888 if (MI1
.getOpcode() != Opcode
)
1890 if (MI0
.getNumOperands() != MI1
.getNumOperands())
1893 const MachineOperand
&MO0
= MI0
.getOperand(1);
1894 const MachineOperand
&MO1
= MI1
.getOperand(1);
1895 if (MO0
.getOffset() != MO1
.getOffset())
1898 if (Opcode
== ARM::LDRLIT_ga_pcrel
|| Opcode
== ARM::LDRLIT_ga_pcrel_ldr
||
1899 Opcode
== ARM::tLDRLIT_ga_pcrel
|| Opcode
== ARM::t2LDRLIT_ga_pcrel
||
1900 Opcode
== ARM::MOV_ga_pcrel
|| Opcode
== ARM::MOV_ga_pcrel_ldr
||
1901 Opcode
== ARM::t2MOV_ga_pcrel
)
1902 // Ignore the PC labels.
1903 return MO0
.getGlobal() == MO1
.getGlobal();
1905 const MachineFunction
*MF
= MI0
.getParent()->getParent();
1906 const MachineConstantPool
*MCP
= MF
->getConstantPool();
1907 int CPI0
= MO0
.getIndex();
1908 int CPI1
= MO1
.getIndex();
1909 const MachineConstantPoolEntry
&MCPE0
= MCP
->getConstants()[CPI0
];
1910 const MachineConstantPoolEntry
&MCPE1
= MCP
->getConstants()[CPI1
];
1911 bool isARMCP0
= MCPE0
.isMachineConstantPoolEntry();
1912 bool isARMCP1
= MCPE1
.isMachineConstantPoolEntry();
1913 if (isARMCP0
&& isARMCP1
) {
1914 ARMConstantPoolValue
*ACPV0
=
1915 static_cast<ARMConstantPoolValue
*>(MCPE0
.Val
.MachineCPVal
);
1916 ARMConstantPoolValue
*ACPV1
=
1917 static_cast<ARMConstantPoolValue
*>(MCPE1
.Val
.MachineCPVal
);
1918 return ACPV0
->hasSameValue(ACPV1
);
1919 } else if (!isARMCP0
&& !isARMCP1
) {
1920 return MCPE0
.Val
.ConstVal
== MCPE1
.Val
.ConstVal
;
1923 } else if (Opcode
== ARM::PICLDR
) {
1924 if (MI1
.getOpcode() != Opcode
)
1926 if (MI0
.getNumOperands() != MI1
.getNumOperands())
1929 Register Addr0
= MI0
.getOperand(1).getReg();
1930 Register Addr1
= MI1
.getOperand(1).getReg();
1931 if (Addr0
!= Addr1
) {
1932 if (!MRI
|| !Addr0
.isVirtual() || !Addr1
.isVirtual())
1935 // This assumes SSA form.
1936 MachineInstr
*Def0
= MRI
->getVRegDef(Addr0
);
1937 MachineInstr
*Def1
= MRI
->getVRegDef(Addr1
);
1938 // Check if the loaded value, e.g. a constantpool of a global address, are
1940 if (!produceSameValue(*Def0
, *Def1
, MRI
))
1944 for (unsigned i
= 3, e
= MI0
.getNumOperands(); i
!= e
; ++i
) {
1945 // %12 = PICLDR %11, 0, 14, %noreg
1946 const MachineOperand
&MO0
= MI0
.getOperand(i
);
1947 const MachineOperand
&MO1
= MI1
.getOperand(i
);
1948 if (!MO0
.isIdenticalTo(MO1
))
1954 return MI0
.isIdenticalTo(MI1
, MachineInstr::IgnoreVRegDefs
);
1957 /// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to
1958 /// determine if two loads are loading from the same base address. It should
1959 /// only return true if the base pointers are the same and the only differences
1960 /// between the two addresses is the offset. It also returns the offsets by
1963 /// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched
1964 /// is permanently disabled.
1965 bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode
*Load1
, SDNode
*Load2
,
1967 int64_t &Offset2
) const {
1968 // Don't worry about Thumb: just ARM and Thumb2.
1969 if (Subtarget
.isThumb1Only()) return false;
1971 if (!Load1
->isMachineOpcode() || !Load2
->isMachineOpcode())
1974 auto IsLoadOpcode
= [&](unsigned Opcode
) {
1989 case ARM::t2LDRSHi8
:
1991 case ARM::t2LDRBi12
:
1992 case ARM::t2LDRSHi12
:
1997 if (!IsLoadOpcode(Load1
->getMachineOpcode()) ||
1998 !IsLoadOpcode(Load2
->getMachineOpcode()))
2001 // Check if base addresses and chain operands match.
2002 if (Load1
->getOperand(0) != Load2
->getOperand(0) ||
2003 Load1
->getOperand(4) != Load2
->getOperand(4))
2006 // Index should be Reg0.
2007 if (Load1
->getOperand(3) != Load2
->getOperand(3))
2010 // Determine the offsets.
2011 if (isa
<ConstantSDNode
>(Load1
->getOperand(1)) &&
2012 isa
<ConstantSDNode
>(Load2
->getOperand(1))) {
2013 Offset1
= cast
<ConstantSDNode
>(Load1
->getOperand(1))->getSExtValue();
2014 Offset2
= cast
<ConstantSDNode
>(Load2
->getOperand(1))->getSExtValue();
2021 /// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to
2022 /// determine (in conjunction with areLoadsFromSameBasePtr) if two loads should
2023 /// be scheduled togther. On some targets if two loads are loading from
2024 /// addresses in the same cache line, it's better if they are scheduled
2025 /// together. This function takes two integers that represent the load offsets
2026 /// from the common base address. It returns true if it decides it's desirable
2027 /// to schedule the two loads together. "NumLoads" is the number of loads that
2028 /// have already been scheduled after Load1.
2030 /// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched
2031 /// is permanently disabled.
2032 bool ARMBaseInstrInfo::shouldScheduleLoadsNear(SDNode
*Load1
, SDNode
*Load2
,
2033 int64_t Offset1
, int64_t Offset2
,
2034 unsigned NumLoads
) const {
2035 // Don't worry about Thumb: just ARM and Thumb2.
2036 if (Subtarget
.isThumb1Only()) return false;
2038 assert(Offset2
> Offset1
);
2040 if ((Offset2
- Offset1
) / 8 > 64)
2043 // Check if the machine opcodes are different. If they are different
2044 // then we consider them to not be of the same base address,
2045 // EXCEPT in the case of Thumb2 byte loads where one is LDRBi8 and the other LDRBi12.
2046 // In this case, they are considered to be the same because they are different
2047 // encoding forms of the same basic instruction.
2048 if ((Load1
->getMachineOpcode() != Load2
->getMachineOpcode()) &&
2049 !((Load1
->getMachineOpcode() == ARM::t2LDRBi8
&&
2050 Load2
->getMachineOpcode() == ARM::t2LDRBi12
) ||
2051 (Load1
->getMachineOpcode() == ARM::t2LDRBi12
&&
2052 Load2
->getMachineOpcode() == ARM::t2LDRBi8
)))
2053 return false; // FIXME: overly conservative?
2055 // Four loads in a row should be sufficient.
2062 bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr
&MI
,
2063 const MachineBasicBlock
*MBB
,
2064 const MachineFunction
&MF
) const {
2065 // Debug info is never a scheduling boundary. It's necessary to be explicit
2066 // due to the special treatment of IT instructions below, otherwise a
2067 // dbg_value followed by an IT will result in the IT instruction being
2068 // considered a scheduling hazard, which is wrong. It should be the actual
2069 // instruction preceding the dbg_value instruction(s), just like it is
2070 // when debug info is not present.
2071 if (MI
.isDebugInstr())
2074 // Terminators and labels can't be scheduled around.
2075 if (MI
.isTerminator() || MI
.isPosition())
2078 // INLINEASM_BR can jump to another block
2079 if (MI
.getOpcode() == TargetOpcode::INLINEASM_BR
)
2082 if (isSEHInstruction(MI
))
2085 // Treat the start of the IT block as a scheduling boundary, but schedule
2086 // t2IT along with all instructions following it.
2087 // FIXME: This is a big hammer. But the alternative is to add all potential
2088 // true and anti dependencies to IT block instructions as implicit operands
2089 // to the t2IT instruction. The added compile time and complexity does not
2091 MachineBasicBlock::const_iterator I
= MI
;
2092 // Make sure to skip any debug instructions
2093 while (++I
!= MBB
->end() && I
->isDebugInstr())
2095 if (I
!= MBB
->end() && I
->getOpcode() == ARM::t2IT
)
2098 // Don't attempt to schedule around any instruction that defines
2099 // a stack-oriented pointer, as it's unlikely to be profitable. This
2100 // saves compile time, because it doesn't require every single
2101 // stack slot reference to depend on the instruction that does the
2103 // Calls don't actually change the stack pointer, even if they have imp-defs.
2104 // No ARM calling conventions change the stack pointer. (X86 calling
2105 // conventions sometimes do).
2106 if (!MI
.isCall() && MI
.definesRegister(ARM::SP
, /*TRI=*/nullptr))
2112 bool ARMBaseInstrInfo::
2113 isProfitableToIfCvt(MachineBasicBlock
&MBB
,
2114 unsigned NumCycles
, unsigned ExtraPredCycles
,
2115 BranchProbability Probability
) const {
2119 // If we are optimizing for size, see if the branch in the predecessor can be
2120 // lowered to cbn?z by the constant island lowering pass, and return false if
2121 // so. This results in a shorter instruction sequence.
2122 if (MBB
.getParent()->getFunction().hasOptSize()) {
2123 MachineBasicBlock
*Pred
= *MBB
.pred_begin();
2124 if (!Pred
->empty()) {
2125 MachineInstr
*LastMI
= &*Pred
->rbegin();
2126 if (LastMI
->getOpcode() == ARM::t2Bcc
) {
2127 const TargetRegisterInfo
*TRI
= &getRegisterInfo();
2128 MachineInstr
*CmpMI
= findCMPToFoldIntoCBZ(LastMI
, TRI
);
2134 return isProfitableToIfCvt(MBB
, NumCycles
, ExtraPredCycles
,
2135 MBB
, 0, 0, Probability
);
2138 bool ARMBaseInstrInfo::
2139 isProfitableToIfCvt(MachineBasicBlock
&TBB
,
2140 unsigned TCycles
, unsigned TExtra
,
2141 MachineBasicBlock
&FBB
,
2142 unsigned FCycles
, unsigned FExtra
,
2143 BranchProbability Probability
) const {
2147 // In thumb code we often end up trading one branch for a IT block, and
2148 // if we are cloning the instruction can increase code size. Prevent
2149 // blocks with multiple predecesors from being ifcvted to prevent this
2151 if (Subtarget
.isThumb2() && TBB
.getParent()->getFunction().hasMinSize()) {
2152 if (TBB
.pred_size() != 1 || FBB
.pred_size() != 1)
2156 // Attempt to estimate the relative costs of predication versus branching.
2157 // Here we scale up each component of UnpredCost to avoid precision issue when
2158 // scaling TCycles/FCycles by Probability.
2159 const unsigned ScalingUpFactor
= 1024;
2161 unsigned PredCost
= (TCycles
+ FCycles
+ TExtra
+ FExtra
) * ScalingUpFactor
;
2162 unsigned UnpredCost
;
2163 if (!Subtarget
.hasBranchPredictor()) {
2164 // When we don't have a branch predictor it's always cheaper to not take a
2165 // branch than take it, so we have to take that into account.
2166 unsigned NotTakenBranchCost
= 1;
2167 unsigned TakenBranchCost
= Subtarget
.getMispredictionPenalty();
2168 unsigned TUnpredCycles
, FUnpredCycles
;
2170 // Triangle: TBB is the fallthrough
2171 TUnpredCycles
= TCycles
+ NotTakenBranchCost
;
2172 FUnpredCycles
= TakenBranchCost
;
2174 // Diamond: TBB is the block that is branched to, FBB is the fallthrough
2175 TUnpredCycles
= TCycles
+ TakenBranchCost
;
2176 FUnpredCycles
= FCycles
+ NotTakenBranchCost
;
2177 // The branch at the end of FBB will disappear when it's predicated, so
2178 // discount it from PredCost.
2179 PredCost
-= 1 * ScalingUpFactor
;
2181 // The total cost is the cost of each path scaled by their probabilites
2182 unsigned TUnpredCost
= Probability
.scale(TUnpredCycles
* ScalingUpFactor
);
2183 unsigned FUnpredCost
= Probability
.getCompl().scale(FUnpredCycles
* ScalingUpFactor
);
2184 UnpredCost
= TUnpredCost
+ FUnpredCost
;
2185 // When predicating assume that the first IT can be folded away but later
2186 // ones cost one cycle each
2187 if (Subtarget
.isThumb2() && TCycles
+ FCycles
> 4) {
2188 PredCost
+= ((TCycles
+ FCycles
- 4) / 4) * ScalingUpFactor
;
2191 unsigned TUnpredCost
= Probability
.scale(TCycles
* ScalingUpFactor
);
2192 unsigned FUnpredCost
=
2193 Probability
.getCompl().scale(FCycles
* ScalingUpFactor
);
2194 UnpredCost
= TUnpredCost
+ FUnpredCost
;
2195 UnpredCost
+= 1 * ScalingUpFactor
; // The branch itself
2196 UnpredCost
+= Subtarget
.getMispredictionPenalty() * ScalingUpFactor
/ 10;
2199 return PredCost
<= UnpredCost
;
2203 ARMBaseInstrInfo::extraSizeToPredicateInstructions(const MachineFunction
&MF
,
2204 unsigned NumInsts
) const {
2205 // Thumb2 needs a 2-byte IT instruction to predicate up to 4 instructions.
2206 // ARM has a condition code field in every predicable instruction, using it
2207 // doesn't change code size.
2208 if (!Subtarget
.isThumb2())
2211 // It's possible that the size of the IT is restricted to a single block.
2212 unsigned MaxInsts
= Subtarget
.restrictIT() ? 1 : 4;
2213 return divideCeil(NumInsts
, MaxInsts
) * 2;
2217 ARMBaseInstrInfo::predictBranchSizeForIfCvt(MachineInstr
&MI
) const {
2218 // If this branch is likely to be folded into the comparison to form a
2219 // CB(N)Z, then removing it won't reduce code size at all, because that will
2220 // just replace the CB(N)Z with a CMP.
2221 if (MI
.getOpcode() == ARM::t2Bcc
&&
2222 findCMPToFoldIntoCBZ(&MI
, &getRegisterInfo()))
2225 unsigned Size
= getInstSizeInBytes(MI
);
2227 // For Thumb2, all branches are 32-bit instructions during the if conversion
2228 // pass, but may be replaced with 16-bit instructions during size reduction.
2229 // Since the branches considered by if conversion tend to be forward branches
2230 // over small basic blocks, they are very likely to be in range for the
2231 // narrow instructions, so we assume the final code size will be half what it
2233 if (Subtarget
.isThumb2())
2240 ARMBaseInstrInfo::isProfitableToUnpredicate(MachineBasicBlock
&TMBB
,
2241 MachineBasicBlock
&FMBB
) const {
2242 // Reduce false anti-dependencies to let the target's out-of-order execution
2243 // engine do its thing.
2244 return Subtarget
.isProfitableToUnpredicate();
2247 /// getInstrPredicate - If instruction is predicated, returns its predicate
2248 /// condition, otherwise returns AL. It also returns the condition code
2249 /// register by reference.
2250 ARMCC::CondCodes
llvm::getInstrPredicate(const MachineInstr
&MI
,
2251 Register
&PredReg
) {
2252 int PIdx
= MI
.findFirstPredOperandIdx();
2258 PredReg
= MI
.getOperand(PIdx
+1).getReg();
2259 return (ARMCC::CondCodes
)MI
.getOperand(PIdx
).getImm();
2262 unsigned llvm::getMatchingCondBranchOpcode(unsigned Opc
) {
2267 if (Opc
== ARM::t2B
)
2270 llvm_unreachable("Unknown unconditional branch opcode!");
2273 MachineInstr
*ARMBaseInstrInfo::commuteInstructionImpl(MachineInstr
&MI
,
2276 unsigned OpIdx2
) const {
2277 switch (MI
.getOpcode()) {
2279 case ARM::t2MOVCCr
: {
2280 // MOVCC can be commuted by inverting the condition.
2282 ARMCC::CondCodes CC
= getInstrPredicate(MI
, PredReg
);
2283 // MOVCC AL can't be inverted. Shouldn't happen.
2284 if (CC
== ARMCC::AL
|| PredReg
!= ARM::CPSR
)
2286 MachineInstr
*CommutedMI
=
2287 TargetInstrInfo::commuteInstructionImpl(MI
, NewMI
, OpIdx1
, OpIdx2
);
2290 // After swapping the MOVCC operands, also invert the condition.
2291 CommutedMI
->getOperand(CommutedMI
->findFirstPredOperandIdx())
2292 .setImm(ARMCC::getOppositeCondition(CC
));
2296 return TargetInstrInfo::commuteInstructionImpl(MI
, NewMI
, OpIdx1
, OpIdx2
);
2299 /// Identify instructions that can be folded into a MOVCC instruction, and
2300 /// return the defining instruction.
2302 ARMBaseInstrInfo::canFoldIntoMOVCC(Register Reg
, const MachineRegisterInfo
&MRI
,
2303 const TargetInstrInfo
*TII
) const {
2304 if (!Reg
.isVirtual())
2306 if (!MRI
.hasOneNonDBGUse(Reg
))
2308 MachineInstr
*MI
= MRI
.getVRegDef(Reg
);
2311 // Check if MI can be predicated and folded into the MOVCC.
2312 if (!isPredicable(*MI
))
2314 // Check if MI has any non-dead defs or physreg uses. This also detects
2315 // predicated instructions which will be reading CPSR.
2316 for (const MachineOperand
&MO
: llvm::drop_begin(MI
->operands(), 1)) {
2317 // Reject frame index operands, PEI can't handle the predicated pseudos.
2318 if (MO
.isFI() || MO
.isCPI() || MO
.isJTI())
2322 // MI can't have any tied operands, that would conflict with predication.
2325 if (MO
.getReg().isPhysical())
2327 if (MO
.isDef() && !MO
.isDead())
2330 bool DontMoveAcrossStores
= true;
2331 if (!MI
->isSafeToMove(DontMoveAcrossStores
))
2336 bool ARMBaseInstrInfo::analyzeSelect(const MachineInstr
&MI
,
2337 SmallVectorImpl
<MachineOperand
> &Cond
,
2338 unsigned &TrueOp
, unsigned &FalseOp
,
2339 bool &Optimizable
) const {
2340 assert((MI
.getOpcode() == ARM::MOVCCr
|| MI
.getOpcode() == ARM::t2MOVCCr
) &&
2341 "Unknown select instruction");
2346 // 3: Condition code.
2350 Cond
.push_back(MI
.getOperand(3));
2351 Cond
.push_back(MI
.getOperand(4));
2352 // We can always fold a def.
2358 ARMBaseInstrInfo::optimizeSelect(MachineInstr
&MI
,
2359 SmallPtrSetImpl
<MachineInstr
*> &SeenMIs
,
2360 bool PreferFalse
) const {
2361 assert((MI
.getOpcode() == ARM::MOVCCr
|| MI
.getOpcode() == ARM::t2MOVCCr
) &&
2362 "Unknown select instruction");
2363 MachineRegisterInfo
&MRI
= MI
.getParent()->getParent()->getRegInfo();
2364 MachineInstr
*DefMI
= canFoldIntoMOVCC(MI
.getOperand(2).getReg(), MRI
, this);
2365 bool Invert
= !DefMI
;
2367 DefMI
= canFoldIntoMOVCC(MI
.getOperand(1).getReg(), MRI
, this);
2371 // Find new register class to use.
2372 MachineOperand FalseReg
= MI
.getOperand(Invert
? 2 : 1);
2373 MachineOperand TrueReg
= MI
.getOperand(Invert
? 1 : 2);
2374 Register DestReg
= MI
.getOperand(0).getReg();
2375 const TargetRegisterClass
*FalseClass
= MRI
.getRegClass(FalseReg
.getReg());
2376 const TargetRegisterClass
*TrueClass
= MRI
.getRegClass(TrueReg
.getReg());
2377 if (!MRI
.constrainRegClass(DestReg
, FalseClass
))
2379 if (!MRI
.constrainRegClass(DestReg
, TrueClass
))
2382 // Create a new predicated version of DefMI.
2383 // Rfalse is the first use.
2384 MachineInstrBuilder NewMI
=
2385 BuildMI(*MI
.getParent(), MI
, MI
.getDebugLoc(), DefMI
->getDesc(), DestReg
);
2387 // Copy all the DefMI operands, excluding its (null) predicate.
2388 const MCInstrDesc
&DefDesc
= DefMI
->getDesc();
2389 for (unsigned i
= 1, e
= DefDesc
.getNumOperands();
2390 i
!= e
&& !DefDesc
.operands()[i
].isPredicate(); ++i
)
2391 NewMI
.add(DefMI
->getOperand(i
));
2393 unsigned CondCode
= MI
.getOperand(3).getImm();
2395 NewMI
.addImm(ARMCC::getOppositeCondition(ARMCC::CondCodes(CondCode
)));
2397 NewMI
.addImm(CondCode
);
2398 NewMI
.add(MI
.getOperand(4));
2400 // DefMI is not the -S version that sets CPSR, so add an optional %noreg.
2401 if (NewMI
->hasOptionalDef())
2402 NewMI
.add(condCodeOp());
2404 // The output register value when the predicate is false is an implicit
2405 // register operand tied to the first def.
2406 // The tie makes the register allocator ensure the FalseReg is allocated the
2407 // same register as operand 0.
2408 FalseReg
.setImplicit();
2409 NewMI
.add(FalseReg
);
2410 NewMI
->tieOperands(0, NewMI
->getNumOperands() - 1);
2412 // Update SeenMIs set: register newly created MI and erase removed DefMI.
2413 SeenMIs
.insert(NewMI
);
2414 SeenMIs
.erase(DefMI
);
2416 // If MI is inside a loop, and DefMI is outside the loop, then kill flags on
2417 // DefMI would be invalid when tranferred inside the loop. Checking for a
2418 // loop is expensive, but at least remove kill flags if they are in different
2420 if (DefMI
->getParent() != MI
.getParent())
2421 NewMI
->clearKillInfo();
2423 // The caller will erase MI, but not DefMI.
2424 DefMI
->eraseFromParent();
2428 /// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether the
2429 /// instruction is encoded with an 'S' bit is determined by the optional CPSR
2432 /// This will go away once we can teach tblgen how to set the optional CPSR def
2434 struct AddSubFlagsOpcodePair
{
2436 uint16_t MachineOpc
;
2439 static const AddSubFlagsOpcodePair AddSubFlagsOpcodeMap
[] = {
2440 {ARM::ADDSri
, ARM::ADDri
},
2441 {ARM::ADDSrr
, ARM::ADDrr
},
2442 {ARM::ADDSrsi
, ARM::ADDrsi
},
2443 {ARM::ADDSrsr
, ARM::ADDrsr
},
2445 {ARM::SUBSri
, ARM::SUBri
},
2446 {ARM::SUBSrr
, ARM::SUBrr
},
2447 {ARM::SUBSrsi
, ARM::SUBrsi
},
2448 {ARM::SUBSrsr
, ARM::SUBrsr
},
2450 {ARM::RSBSri
, ARM::RSBri
},
2451 {ARM::RSBSrsi
, ARM::RSBrsi
},
2452 {ARM::RSBSrsr
, ARM::RSBrsr
},
2454 {ARM::tADDSi3
, ARM::tADDi3
},
2455 {ARM::tADDSi8
, ARM::tADDi8
},
2456 {ARM::tADDSrr
, ARM::tADDrr
},
2457 {ARM::tADCS
, ARM::tADC
},
2459 {ARM::tSUBSi3
, ARM::tSUBi3
},
2460 {ARM::tSUBSi8
, ARM::tSUBi8
},
2461 {ARM::tSUBSrr
, ARM::tSUBrr
},
2462 {ARM::tSBCS
, ARM::tSBC
},
2463 {ARM::tRSBS
, ARM::tRSB
},
2464 {ARM::tLSLSri
, ARM::tLSLri
},
2466 {ARM::t2ADDSri
, ARM::t2ADDri
},
2467 {ARM::t2ADDSrr
, ARM::t2ADDrr
},
2468 {ARM::t2ADDSrs
, ARM::t2ADDrs
},
2470 {ARM::t2SUBSri
, ARM::t2SUBri
},
2471 {ARM::t2SUBSrr
, ARM::t2SUBrr
},
2472 {ARM::t2SUBSrs
, ARM::t2SUBrs
},
2474 {ARM::t2RSBSri
, ARM::t2RSBri
},
2475 {ARM::t2RSBSrs
, ARM::t2RSBrs
},
2478 unsigned llvm::convertAddSubFlagsOpcode(unsigned OldOpc
) {
2479 for (const auto &Entry
: AddSubFlagsOpcodeMap
)
2480 if (OldOpc
== Entry
.PseudoOpc
)
2481 return Entry
.MachineOpc
;
2485 void llvm::emitARMRegPlusImmediate(MachineBasicBlock
&MBB
,
2486 MachineBasicBlock::iterator
&MBBI
,
2487 const DebugLoc
&dl
, Register DestReg
,
2488 Register BaseReg
, int NumBytes
,
2489 ARMCC::CondCodes Pred
, Register PredReg
,
2490 const ARMBaseInstrInfo
&TII
,
2492 if (NumBytes
== 0 && DestReg
!= BaseReg
) {
2493 BuildMI(MBB
, MBBI
, dl
, TII
.get(ARM::MOVr
), DestReg
)
2494 .addReg(BaseReg
, RegState::Kill
)
2495 .add(predOps(Pred
, PredReg
))
2497 .setMIFlags(MIFlags
);
2501 bool isSub
= NumBytes
< 0;
2502 if (isSub
) NumBytes
= -NumBytes
;
2505 unsigned RotAmt
= ARM_AM::getSOImmValRotate(NumBytes
);
2506 unsigned ThisVal
= NumBytes
& llvm::rotr
<uint32_t>(0xFF, RotAmt
);
2507 assert(ThisVal
&& "Didn't extract field correctly");
2509 // We will handle these bits from offset, clear them.
2510 NumBytes
&= ~ThisVal
;
2512 assert(ARM_AM::getSOImmVal(ThisVal
) != -1 && "Bit extraction didn't work?");
2514 // Build the new ADD / SUB.
2515 unsigned Opc
= isSub
? ARM::SUBri
: ARM::ADDri
;
2516 BuildMI(MBB
, MBBI
, dl
, TII
.get(Opc
), DestReg
)
2517 .addReg(BaseReg
, RegState::Kill
)
2519 .add(predOps(Pred
, PredReg
))
2521 .setMIFlags(MIFlags
);
2526 bool llvm::tryFoldSPUpdateIntoPushPop(const ARMSubtarget
&Subtarget
,
2527 MachineFunction
&MF
, MachineInstr
*MI
,
2528 unsigned NumBytes
) {
2529 // This optimisation potentially adds lots of load and store
2530 // micro-operations, it's only really a great benefit to code-size.
2531 if (!Subtarget
.hasMinSize())
2534 // If only one register is pushed/popped, LLVM can use an LDR/STR
2535 // instead. We can't modify those so make sure we're dealing with an
2536 // instruction we understand.
2537 bool IsPop
= isPopOpcode(MI
->getOpcode());
2538 bool IsPush
= isPushOpcode(MI
->getOpcode());
2539 if (!IsPush
&& !IsPop
)
2542 bool IsVFPPushPop
= MI
->getOpcode() == ARM::VSTMDDB_UPD
||
2543 MI
->getOpcode() == ARM::VLDMDIA_UPD
;
2544 bool IsT1PushPop
= MI
->getOpcode() == ARM::tPUSH
||
2545 MI
->getOpcode() == ARM::tPOP
||
2546 MI
->getOpcode() == ARM::tPOP_RET
;
2548 assert((IsT1PushPop
|| (MI
->getOperand(0).getReg() == ARM::SP
&&
2549 MI
->getOperand(1).getReg() == ARM::SP
)) &&
2550 "trying to fold sp update into non-sp-updating push/pop");
2552 // The VFP push & pop act on D-registers, so we can only fold an adjustment
2553 // by a multiple of 8 bytes in correctly. Similarly rN is 4-bytes. Don't try
2554 // if this is violated.
2555 if (NumBytes
% (IsVFPPushPop
? 8 : 4) != 0)
2558 // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+
2559 // pred) so the list starts at 4. Thumb1 starts after the predicate.
2560 int RegListIdx
= IsT1PushPop
? 2 : 4;
2562 // Calculate the space we'll need in terms of registers.
2563 unsigned RegsNeeded
;
2564 const TargetRegisterClass
*RegClass
;
2566 RegsNeeded
= NumBytes
/ 8;
2567 RegClass
= &ARM::DPRRegClass
;
2569 RegsNeeded
= NumBytes
/ 4;
2570 RegClass
= &ARM::GPRRegClass
;
2573 // We're going to have to strip all list operands off before
2574 // re-adding them since the order matters, so save the existing ones
2576 SmallVector
<MachineOperand
, 4> RegList
;
2578 // We're also going to need the first register transferred by this
2579 // instruction, which won't necessarily be the first register in the list.
2580 unsigned FirstRegEnc
= -1;
2582 const TargetRegisterInfo
*TRI
= MF
.getRegInfo().getTargetRegisterInfo();
2583 for (int i
= MI
->getNumOperands() - 1; i
>= RegListIdx
; --i
) {
2584 MachineOperand
&MO
= MI
->getOperand(i
);
2585 RegList
.push_back(MO
);
2587 if (MO
.isReg() && !MO
.isImplicit() &&
2588 TRI
->getEncodingValue(MO
.getReg()) < FirstRegEnc
)
2589 FirstRegEnc
= TRI
->getEncodingValue(MO
.getReg());
2592 const MCPhysReg
*CSRegs
= TRI
->getCalleeSavedRegs(&MF
);
2594 // Now try to find enough space in the reglist to allocate NumBytes.
2595 for (int CurRegEnc
= FirstRegEnc
- 1; CurRegEnc
>= 0 && RegsNeeded
;
2597 unsigned CurReg
= RegClass
->getRegister(CurRegEnc
);
2598 if (IsT1PushPop
&& CurRegEnc
> TRI
->getEncodingValue(ARM::R7
))
2601 // Pushing any register is completely harmless, mark the register involved
2602 // as undef since we don't care about its value and must not restore it
2603 // during stack unwinding.
2604 RegList
.push_back(MachineOperand::CreateReg(CurReg
, false, false,
2605 false, false, true));
2610 // However, we can only pop an extra register if it's not live. For
2611 // registers live within the function we might clobber a return value
2612 // register; the other way a register can be live here is if it's
2614 if (isCalleeSavedRegister(CurReg
, CSRegs
) ||
2615 MI
->getParent()->computeRegisterLiveness(TRI
, CurReg
, MI
) !=
2616 MachineBasicBlock::LQR_Dead
) {
2617 // VFP pops don't allow holes in the register list, so any skip is fatal
2618 // for our transformation. GPR pops do, so we should just keep looking.
2625 // Mark the unimportant registers as <def,dead> in the POP.
2626 RegList
.push_back(MachineOperand::CreateReg(CurReg
, true, false, false,
2634 // Finally we know we can profitably perform the optimisation so go
2635 // ahead: strip all existing registers off and add them back again
2636 // in the right order.
2637 for (int i
= MI
->getNumOperands() - 1; i
>= RegListIdx
; --i
)
2638 MI
->removeOperand(i
);
2640 // Add the complete list back in.
2641 MachineInstrBuilder
MIB(MF
, &*MI
);
2642 for (const MachineOperand
&MO
: llvm::reverse(RegList
))
2648 bool llvm::rewriteARMFrameIndex(MachineInstr
&MI
, unsigned FrameRegIdx
,
2649 Register FrameReg
, int &Offset
,
2650 const ARMBaseInstrInfo
&TII
) {
2651 unsigned Opcode
= MI
.getOpcode();
2652 const MCInstrDesc
&Desc
= MI
.getDesc();
2653 unsigned AddrMode
= (Desc
.TSFlags
& ARMII::AddrModeMask
);
2656 // Memory operands in inline assembly always use AddrMode2.
2657 if (Opcode
== ARM::INLINEASM
|| Opcode
== ARM::INLINEASM_BR
)
2658 AddrMode
= ARMII::AddrMode2
;
2660 if (Opcode
== ARM::ADDri
) {
2661 Offset
+= MI
.getOperand(FrameRegIdx
+1).getImm();
2663 // Turn it into a move.
2664 MI
.setDesc(TII
.get(ARM::MOVr
));
2665 MI
.getOperand(FrameRegIdx
).ChangeToRegister(FrameReg
, false);
2666 MI
.removeOperand(FrameRegIdx
+1);
2669 } else if (Offset
< 0) {
2672 MI
.setDesc(TII
.get(ARM::SUBri
));
2675 // Common case: small offset, fits into instruction.
2676 if (ARM_AM::getSOImmVal(Offset
) != -1) {
2677 // Replace the FrameIndex with sp / fp
2678 MI
.getOperand(FrameRegIdx
).ChangeToRegister(FrameReg
, false);
2679 MI
.getOperand(FrameRegIdx
+1).ChangeToImmediate(Offset
);
2684 // Otherwise, pull as much of the immedidate into this ADDri/SUBri
2686 unsigned RotAmt
= ARM_AM::getSOImmValRotate(Offset
);
2687 unsigned ThisImmVal
= Offset
& llvm::rotr
<uint32_t>(0xFF, RotAmt
);
2689 // We will handle these bits from offset, clear them.
2690 Offset
&= ~ThisImmVal
;
2692 // Get the properly encoded SOImmVal field.
2693 assert(ARM_AM::getSOImmVal(ThisImmVal
) != -1 &&
2694 "Bit extraction didn't work?");
2695 MI
.getOperand(FrameRegIdx
+1).ChangeToImmediate(ThisImmVal
);
2697 unsigned ImmIdx
= 0;
2699 unsigned NumBits
= 0;
2702 case ARMII::AddrMode_i12
:
2703 ImmIdx
= FrameRegIdx
+ 1;
2704 InstrOffs
= MI
.getOperand(ImmIdx
).getImm();
2707 case ARMII::AddrMode2
:
2708 ImmIdx
= FrameRegIdx
+2;
2709 InstrOffs
= ARM_AM::getAM2Offset(MI
.getOperand(ImmIdx
).getImm());
2710 if (ARM_AM::getAM2Op(MI
.getOperand(ImmIdx
).getImm()) == ARM_AM::sub
)
2714 case ARMII::AddrMode3
:
2715 ImmIdx
= FrameRegIdx
+2;
2716 InstrOffs
= ARM_AM::getAM3Offset(MI
.getOperand(ImmIdx
).getImm());
2717 if (ARM_AM::getAM3Op(MI
.getOperand(ImmIdx
).getImm()) == ARM_AM::sub
)
2721 case ARMII::AddrMode4
:
2722 case ARMII::AddrMode6
:
2723 // Can't fold any offset even if it's zero.
2725 case ARMII::AddrMode5
:
2726 ImmIdx
= FrameRegIdx
+1;
2727 InstrOffs
= ARM_AM::getAM5Offset(MI
.getOperand(ImmIdx
).getImm());
2728 if (ARM_AM::getAM5Op(MI
.getOperand(ImmIdx
).getImm()) == ARM_AM::sub
)
2733 case ARMII::AddrMode5FP16
:
2734 ImmIdx
= FrameRegIdx
+1;
2735 InstrOffs
= ARM_AM::getAM5Offset(MI
.getOperand(ImmIdx
).getImm());
2736 if (ARM_AM::getAM5Op(MI
.getOperand(ImmIdx
).getImm()) == ARM_AM::sub
)
2741 case ARMII::AddrModeT2_i7
:
2742 case ARMII::AddrModeT2_i7s2
:
2743 case ARMII::AddrModeT2_i7s4
:
2744 ImmIdx
= FrameRegIdx
+1;
2745 InstrOffs
= MI
.getOperand(ImmIdx
).getImm();
2747 Scale
= (AddrMode
== ARMII::AddrModeT2_i7s2
? 2 :
2748 AddrMode
== ARMII::AddrModeT2_i7s4
? 4 : 1);
2751 llvm_unreachable("Unsupported addressing mode!");
2754 Offset
+= InstrOffs
* Scale
;
2755 assert((Offset
& (Scale
-1)) == 0 && "Can't encode this offset!");
2761 // Attempt to fold address comp. if opcode has offset bits
2763 // Common case: small offset, fits into instruction.
2764 MachineOperand
&ImmOp
= MI
.getOperand(ImmIdx
);
2765 int ImmedOffset
= Offset
/ Scale
;
2766 unsigned Mask
= (1 << NumBits
) - 1;
2767 if ((unsigned)Offset
<= Mask
* Scale
) {
2768 // Replace the FrameIndex with sp
2769 MI
.getOperand(FrameRegIdx
).ChangeToRegister(FrameReg
, false);
2770 // FIXME: When addrmode2 goes away, this will simplify (like the
2771 // T2 version), as the LDR.i12 versions don't need the encoding
2772 // tricks for the offset value.
2774 if (AddrMode
== ARMII::AddrMode_i12
)
2775 ImmedOffset
= -ImmedOffset
;
2777 ImmedOffset
|= 1 << NumBits
;
2779 ImmOp
.ChangeToImmediate(ImmedOffset
);
2784 // Otherwise, it didn't fit. Pull in what we can to simplify the immed.
2785 ImmedOffset
= ImmedOffset
& Mask
;
2787 if (AddrMode
== ARMII::AddrMode_i12
)
2788 ImmedOffset
= -ImmedOffset
;
2790 ImmedOffset
|= 1 << NumBits
;
2792 ImmOp
.ChangeToImmediate(ImmedOffset
);
2793 Offset
&= ~(Mask
*Scale
);
2797 Offset
= (isSub
) ? -Offset
: Offset
;
2801 /// analyzeCompare - For a comparison instruction, return the source registers
2802 /// in SrcReg and SrcReg2 if having two register operands, and the value it
2803 /// compares against in CmpValue. Return true if the comparison instruction
2804 /// can be analyzed.
2805 bool ARMBaseInstrInfo::analyzeCompare(const MachineInstr
&MI
, Register
&SrcReg
,
2806 Register
&SrcReg2
, int64_t &CmpMask
,
2807 int64_t &CmpValue
) const {
2808 switch (MI
.getOpcode()) {
2813 SrcReg
= MI
.getOperand(0).getReg();
2816 CmpValue
= MI
.getOperand(1).getImm();
2821 SrcReg
= MI
.getOperand(0).getReg();
2822 SrcReg2
= MI
.getOperand(1).getReg();
2828 SrcReg
= MI
.getOperand(0).getReg();
2830 CmpMask
= MI
.getOperand(1).getImm();
2838 /// isSuitableForMask - Identify a suitable 'and' instruction that
2839 /// operates on the given source register and applies the same mask
2840 /// as a 'tst' instruction. Provide a limited look-through for copies.
2841 /// When successful, MI will hold the found instruction.
2842 static bool isSuitableForMask(MachineInstr
*&MI
, Register SrcReg
,
2843 int CmpMask
, bool CommonUse
) {
2844 switch (MI
->getOpcode()) {
2847 if (CmpMask
!= MI
->getOperand(2).getImm())
2849 if (SrcReg
== MI
->getOperand(CommonUse
? 1 : 0).getReg())
2857 /// getCmpToAddCondition - assume the flags are set by CMP(a,b), return
2858 /// the condition code if we modify the instructions such that flags are
2859 /// set by ADD(a,b,X).
2860 inline static ARMCC::CondCodes
getCmpToAddCondition(ARMCC::CondCodes CC
) {
2862 default: return ARMCC::AL
;
2863 case ARMCC::HS
: return ARMCC::LO
;
2864 case ARMCC::LO
: return ARMCC::HS
;
2865 case ARMCC::VS
: return ARMCC::VS
;
2866 case ARMCC::VC
: return ARMCC::VC
;
2870 /// isRedundantFlagInstr - check whether the first instruction, whose only
2871 /// purpose is to update flags, can be made redundant.
2872 /// CMPrr can be made redundant by SUBrr if the operands are the same.
2873 /// CMPri can be made redundant by SUBri if the operands are the same.
2874 /// CMPrr(r0, r1) can be made redundant by ADDr[ri](r0, r1, X).
2875 /// This function can be extended later on.
2876 inline static bool isRedundantFlagInstr(const MachineInstr
*CmpI
,
2877 Register SrcReg
, Register SrcReg2
,
2879 const MachineInstr
*OI
,
2881 if ((CmpI
->getOpcode() == ARM::CMPrr
|| CmpI
->getOpcode() == ARM::t2CMPrr
) &&
2882 (OI
->getOpcode() == ARM::SUBrr
|| OI
->getOpcode() == ARM::t2SUBrr
) &&
2883 ((OI
->getOperand(1).getReg() == SrcReg
&&
2884 OI
->getOperand(2).getReg() == SrcReg2
) ||
2885 (OI
->getOperand(1).getReg() == SrcReg2
&&
2886 OI
->getOperand(2).getReg() == SrcReg
))) {
2891 if (CmpI
->getOpcode() == ARM::tCMPr
&& OI
->getOpcode() == ARM::tSUBrr
&&
2892 ((OI
->getOperand(2).getReg() == SrcReg
&&
2893 OI
->getOperand(3).getReg() == SrcReg2
) ||
2894 (OI
->getOperand(2).getReg() == SrcReg2
&&
2895 OI
->getOperand(3).getReg() == SrcReg
))) {
2900 if ((CmpI
->getOpcode() == ARM::CMPri
|| CmpI
->getOpcode() == ARM::t2CMPri
) &&
2901 (OI
->getOpcode() == ARM::SUBri
|| OI
->getOpcode() == ARM::t2SUBri
) &&
2902 OI
->getOperand(1).getReg() == SrcReg
&&
2903 OI
->getOperand(2).getImm() == ImmValue
) {
2908 if (CmpI
->getOpcode() == ARM::tCMPi8
&&
2909 (OI
->getOpcode() == ARM::tSUBi8
|| OI
->getOpcode() == ARM::tSUBi3
) &&
2910 OI
->getOperand(2).getReg() == SrcReg
&&
2911 OI
->getOperand(3).getImm() == ImmValue
) {
2916 if ((CmpI
->getOpcode() == ARM::CMPrr
|| CmpI
->getOpcode() == ARM::t2CMPrr
) &&
2917 (OI
->getOpcode() == ARM::ADDrr
|| OI
->getOpcode() == ARM::t2ADDrr
||
2918 OI
->getOpcode() == ARM::ADDri
|| OI
->getOpcode() == ARM::t2ADDri
) &&
2919 OI
->getOperand(0).isReg() && OI
->getOperand(1).isReg() &&
2920 OI
->getOperand(0).getReg() == SrcReg
&&
2921 OI
->getOperand(1).getReg() == SrcReg2
) {
2926 if (CmpI
->getOpcode() == ARM::tCMPr
&&
2927 (OI
->getOpcode() == ARM::tADDi3
|| OI
->getOpcode() == ARM::tADDi8
||
2928 OI
->getOpcode() == ARM::tADDrr
) &&
2929 OI
->getOperand(0).getReg() == SrcReg
&&
2930 OI
->getOperand(2).getReg() == SrcReg2
) {
2938 static bool isOptimizeCompareCandidate(MachineInstr
*MI
, bool &IsThumb1
) {
2939 switch (MI
->getOpcode()) {
2940 default: return false;
3024 /// optimizeCompareInstr - Convert the instruction supplying the argument to the
3025 /// comparison into one that sets the zero bit in the flags register;
3026 /// Remove a redundant Compare instruction if an earlier instruction can set the
3027 /// flags in the same way as Compare.
3028 /// E.g. SUBrr(r1,r2) and CMPrr(r1,r2). We also handle the case where two
3029 /// operands are swapped: SUBrr(r1,r2) and CMPrr(r2,r1), by updating the
3030 /// condition code of instructions which use the flags.
3031 bool ARMBaseInstrInfo::optimizeCompareInstr(
3032 MachineInstr
&CmpInstr
, Register SrcReg
, Register SrcReg2
, int64_t CmpMask
,
3033 int64_t CmpValue
, const MachineRegisterInfo
*MRI
) const {
3034 // Get the unique definition of SrcReg.
3035 MachineInstr
*MI
= MRI
->getUniqueVRegDef(SrcReg
);
3036 if (!MI
) return false;
3038 // Masked compares sometimes use the same register as the corresponding 'and'.
3039 if (CmpMask
!= ~0) {
3040 if (!isSuitableForMask(MI
, SrcReg
, CmpMask
, false) || isPredicated(*MI
)) {
3042 for (MachineRegisterInfo::use_instr_iterator
3043 UI
= MRI
->use_instr_begin(SrcReg
), UE
= MRI
->use_instr_end();
3045 if (UI
->getParent() != CmpInstr
.getParent())
3047 MachineInstr
*PotentialAND
= &*UI
;
3048 if (!isSuitableForMask(PotentialAND
, SrcReg
, CmpMask
, true) ||
3049 isPredicated(*PotentialAND
))
3054 if (!MI
) return false;
3058 // Get ready to iterate backward from CmpInstr.
3059 MachineBasicBlock::iterator I
= CmpInstr
, E
= MI
,
3060 B
= CmpInstr
.getParent()->begin();
3062 // Early exit if CmpInstr is at the beginning of the BB.
3063 if (I
== B
) return false;
3065 // There are two possible candidates which can be changed to set CPSR:
3066 // One is MI, the other is a SUB or ADD instruction.
3067 // For CMPrr(r1,r2), we are looking for SUB(r1,r2), SUB(r2,r1), or
3068 // ADDr[ri](r1, r2, X).
3069 // For CMPri(r1, CmpValue), we are looking for SUBri(r1, CmpValue).
3070 MachineInstr
*SubAdd
= nullptr;
3072 // MI is not a candidate for CMPrr.
3074 else if (MI
->getParent() != CmpInstr
.getParent() || CmpValue
!= 0) {
3075 // Conservatively refuse to convert an instruction which isn't in the same
3076 // BB as the comparison.
3077 // For CMPri w/ CmpValue != 0, a SubAdd may still be a candidate.
3078 // Thus we cannot return here.
3079 if (CmpInstr
.getOpcode() == ARM::CMPri
||
3080 CmpInstr
.getOpcode() == ARM::t2CMPri
||
3081 CmpInstr
.getOpcode() == ARM::tCMPi8
)
3087 bool IsThumb1
= false;
3088 if (MI
&& !isOptimizeCompareCandidate(MI
, IsThumb1
))
3091 // We also want to do this peephole for cases like this: if (a*b == 0),
3092 // and optimise away the CMP instruction from the generated code sequence:
3093 // MULS, MOVS, MOVS, CMP. Here the MOVS instructions load the boolean values
3094 // resulting from the select instruction, but these MOVS instructions for
3095 // Thumb1 (V6M) are flag setting and are thus preventing this optimisation.
3096 // However, if we only have MOVS instructions in between the CMP and the
3097 // other instruction (the MULS in this example), then the CPSR is dead so we
3098 // can safely reorder the sequence into: MOVS, MOVS, MULS, CMP. We do this
3099 // reordering and then continue the analysis hoping we can eliminate the
3100 // CMP. This peephole works on the vregs, so is still in SSA form. As a
3101 // consequence, the movs won't redefine/kill the MUL operands which would
3102 // make this reordering illegal.
3103 const TargetRegisterInfo
*TRI
= &getRegisterInfo();
3104 if (MI
&& IsThumb1
) {
3106 if (I
!= E
&& !MI
->readsRegister(ARM::CPSR
, TRI
)) {
3107 bool CanReorder
= true;
3108 for (; I
!= E
; --I
) {
3109 if (I
->getOpcode() != ARM::tMOVi8
) {
3115 MI
= MI
->removeFromParent();
3117 CmpInstr
.getParent()->insert(E
, MI
);
3124 // Check that CPSR isn't set between the comparison instruction and the one we
3125 // want to change. At the same time, search for SubAdd.
3126 bool SubAddIsThumb1
= false;
3128 const MachineInstr
&Instr
= *--I
;
3130 // Check whether CmpInstr can be made redundant by the current instruction.
3131 if (isRedundantFlagInstr(&CmpInstr
, SrcReg
, SrcReg2
, CmpValue
, &Instr
,
3137 // Allow E (which was initially MI) to be SubAdd but do not search before E.
3141 if (Instr
.modifiesRegister(ARM::CPSR
, TRI
) ||
3142 Instr
.readsRegister(ARM::CPSR
, TRI
))
3143 // This instruction modifies or uses CPSR after the one we want to
3144 // change. We can't do this transformation.
3148 // In some cases, we scan the use-list of an instruction for an AND;
3149 // that AND is in the same BB, but may not be scheduled before the
3150 // corresponding TST. In that case, bail out.
3152 // FIXME: We could try to reschedule the AND.
3157 // Return false if no candidates exist.
3161 // If we found a SubAdd, use it as it will be closer to the CMP
3164 IsThumb1
= SubAddIsThumb1
;
3167 // We can't use a predicated instruction - it doesn't always write the flags.
3168 if (isPredicated(*MI
))
3171 // Scan forward for the use of CPSR
3172 // When checking against MI: if it's a conditional code that requires
3173 // checking of the V bit or C bit, then this is not safe to do.
3174 // It is safe to remove CmpInstr if CPSR is redefined or killed.
3175 // If we are done with the basic block, we need to check whether CPSR is
3177 SmallVector
<std::pair
<MachineOperand
*, ARMCC::CondCodes
>, 4>
3179 bool isSafe
= false;
3181 E
= CmpInstr
.getParent()->end();
3182 while (!isSafe
&& ++I
!= E
) {
3183 const MachineInstr
&Instr
= *I
;
3184 for (unsigned IO
= 0, EO
= Instr
.getNumOperands();
3185 !isSafe
&& IO
!= EO
; ++IO
) {
3186 const MachineOperand
&MO
= Instr
.getOperand(IO
);
3187 if (MO
.isRegMask() && MO
.clobbersPhysReg(ARM::CPSR
)) {
3191 if (!MO
.isReg() || MO
.getReg() != ARM::CPSR
)
3197 // Condition code is after the operand before CPSR except for VSELs.
3198 ARMCC::CondCodes CC
;
3199 bool IsInstrVSel
= true;
3200 switch (Instr
.getOpcode()) {
3202 IsInstrVSel
= false;
3203 CC
= (ARMCC::CondCodes
)Instr
.getOperand(IO
- 1).getImm();
3228 // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based
3229 // on CMP needs to be updated to be based on SUB.
3230 // If we have ADD(r1, r2, X) and CMP(r1, r2), the condition code also
3231 // needs to be modified.
3232 // Push the condition code operands to OperandsToUpdate.
3233 // If it is safe to remove CmpInstr, the condition code of these
3234 // operands will be modified.
3235 unsigned Opc
= SubAdd
->getOpcode();
3236 bool IsSub
= Opc
== ARM::SUBrr
|| Opc
== ARM::t2SUBrr
||
3237 Opc
== ARM::SUBri
|| Opc
== ARM::t2SUBri
||
3238 Opc
== ARM::tSUBrr
|| Opc
== ARM::tSUBi3
||
3240 unsigned OpI
= Opc
!= ARM::tSUBrr
? 1 : 2;
3242 (SrcReg2
!= 0 && SubAdd
->getOperand(OpI
).getReg() == SrcReg2
&&
3243 SubAdd
->getOperand(OpI
+ 1).getReg() == SrcReg
)) {
3244 // VSel doesn't support condition code update.
3247 // Ensure we can swap the condition.
3248 ARMCC::CondCodes NewCC
= (IsSub
? getSwappedCondition(CC
) : getCmpToAddCondition(CC
));
3249 if (NewCC
== ARMCC::AL
)
3251 OperandsToUpdate
.push_back(
3252 std::make_pair(&((*I
).getOperand(IO
- 1)), NewCC
));
3255 // No SubAdd, so this is x = <op> y, z; cmp x, 0.
3257 case ARMCC::EQ
: // Z
3258 case ARMCC::NE
: // Z
3259 case ARMCC::MI
: // N
3260 case ARMCC::PL
: // N
3261 case ARMCC::AL
: // none
3262 // CPSR can be used multiple times, we should continue.
3264 case ARMCC::HS
: // C
3265 case ARMCC::LO
: // C
3266 case ARMCC::VS
: // V
3267 case ARMCC::VC
: // V
3268 case ARMCC::HI
: // C Z
3269 case ARMCC::LS
: // C Z
3270 case ARMCC::GE
: // N V
3271 case ARMCC::LT
: // N V
3272 case ARMCC::GT
: // Z N V
3273 case ARMCC::LE
: // Z N V
3274 // The instruction uses the V bit or C bit which is not safe.
3281 // If CPSR is not killed nor re-defined, we should check whether it is
3282 // live-out. If it is live-out, do not optimize.
3284 MachineBasicBlock
*MBB
= CmpInstr
.getParent();
3285 for (MachineBasicBlock
*Succ
: MBB
->successors())
3286 if (Succ
->isLiveIn(ARM::CPSR
))
3290 // Toggle the optional operand to CPSR (if it exists - in Thumb1 we always
3291 // set CPSR so this is represented as an explicit output)
3293 unsigned CPSRRegNum
= MI
->getNumExplicitOperands() - 1;
3294 MI
->getOperand(CPSRRegNum
).setReg(ARM::CPSR
);
3295 MI
->getOperand(CPSRRegNum
).setIsDef(true);
3297 assert(!isPredicated(*MI
) && "Can't use flags from predicated instruction");
3298 CmpInstr
.eraseFromParent();
3300 // Modify the condition code of operands in OperandsToUpdate.
3301 // Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to
3302 // be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc.
3303 for (unsigned i
= 0, e
= OperandsToUpdate
.size(); i
< e
; i
++)
3304 OperandsToUpdate
[i
].first
->setImm(OperandsToUpdate
[i
].second
);
3306 MI
->clearRegisterDeads(ARM::CPSR
);
3311 bool ARMBaseInstrInfo::shouldSink(const MachineInstr
&MI
) const {
3312 // Do not sink MI if it might be used to optimize a redundant compare.
3313 // We heuristically only look at the instruction immediately following MI to
3314 // avoid potentially searching the entire basic block.
3315 if (isPredicated(MI
))
3317 MachineBasicBlock::const_iterator Next
= &MI
;
3319 Register SrcReg
, SrcReg2
;
3320 int64_t CmpMask
, CmpValue
;
3322 if (Next
!= MI
.getParent()->end() &&
3323 analyzeCompare(*Next
, SrcReg
, SrcReg2
, CmpMask
, CmpValue
) &&
3324 isRedundantFlagInstr(&*Next
, SrcReg
, SrcReg2
, CmpValue
, &MI
, IsThumb1
))
3329 bool ARMBaseInstrInfo::foldImmediate(MachineInstr
&UseMI
, MachineInstr
&DefMI
,
3331 MachineRegisterInfo
*MRI
) const {
3332 // Fold large immediates into add, sub, or, xor.
3333 unsigned DefOpc
= DefMI
.getOpcode();
3334 if (DefOpc
!= ARM::t2MOVi32imm
&& DefOpc
!= ARM::MOVi32imm
&&
3335 DefOpc
!= ARM::tMOVi32imm
)
3337 if (!DefMI
.getOperand(1).isImm())
3338 // Could be t2MOVi32imm @xx
3341 if (!MRI
->hasOneNonDBGUse(Reg
))
3344 const MCInstrDesc
&DefMCID
= DefMI
.getDesc();
3345 if (DefMCID
.hasOptionalDef()) {
3346 unsigned NumOps
= DefMCID
.getNumOperands();
3347 const MachineOperand
&MO
= DefMI
.getOperand(NumOps
- 1);
3348 if (MO
.getReg() == ARM::CPSR
&& !MO
.isDead())
3349 // If DefMI defines CPSR and it is not dead, it's obviously not safe
3354 const MCInstrDesc
&UseMCID
= UseMI
.getDesc();
3355 if (UseMCID
.hasOptionalDef()) {
3356 unsigned NumOps
= UseMCID
.getNumOperands();
3357 if (UseMI
.getOperand(NumOps
- 1).getReg() == ARM::CPSR
)
3358 // If the instruction sets the flag, do not attempt this optimization
3359 // since it may change the semantics of the code.
3363 unsigned UseOpc
= UseMI
.getOpcode();
3364 unsigned NewUseOpc
= 0;
3365 uint32_t ImmVal
= (uint32_t)DefMI
.getOperand(1).getImm();
3366 uint32_t SOImmValV1
= 0, SOImmValV2
= 0;
3367 bool Commute
= false;
3369 default: return false;
3377 case ARM::t2EORrr
: {
3378 Commute
= UseMI
.getOperand(2).getReg() != Reg
;
3383 if (UseOpc
== ARM::SUBrr
&& Commute
)
3386 // ADD/SUB are special because they're essentially the same operation, so
3387 // we can handle a larger range of immediates.
3388 if (ARM_AM::isSOImmTwoPartVal(ImmVal
))
3389 NewUseOpc
= UseOpc
== ARM::ADDrr
? ARM::ADDri
: ARM::SUBri
;
3390 else if (ARM_AM::isSOImmTwoPartVal(-ImmVal
)) {
3392 NewUseOpc
= UseOpc
== ARM::ADDrr
? ARM::SUBri
: ARM::ADDri
;
3395 SOImmValV1
= (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal
);
3396 SOImmValV2
= (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal
);
3400 if (!ARM_AM::isSOImmTwoPartVal(ImmVal
))
3402 SOImmValV1
= (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal
);
3403 SOImmValV2
= (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal
);
3406 case ARM::ORRrr
: NewUseOpc
= ARM::ORRri
; break;
3407 case ARM::EORrr
: NewUseOpc
= ARM::EORri
; break;
3411 case ARM::t2SUBrr
: {
3412 if (UseOpc
== ARM::t2SUBrr
&& Commute
)
3415 // ADD/SUB are special because they're essentially the same operation, so
3416 // we can handle a larger range of immediates.
3417 const bool ToSP
= DefMI
.getOperand(0).getReg() == ARM::SP
;
3418 const unsigned t2ADD
= ToSP
? ARM::t2ADDspImm
: ARM::t2ADDri
;
3419 const unsigned t2SUB
= ToSP
? ARM::t2SUBspImm
: ARM::t2SUBri
;
3420 if (ARM_AM::isT2SOImmTwoPartVal(ImmVal
))
3421 NewUseOpc
= UseOpc
== ARM::t2ADDrr
? t2ADD
: t2SUB
;
3422 else if (ARM_AM::isT2SOImmTwoPartVal(-ImmVal
)) {
3424 NewUseOpc
= UseOpc
== ARM::t2ADDrr
? t2SUB
: t2ADD
;
3427 SOImmValV1
= (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal
);
3428 SOImmValV2
= (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal
);
3433 if (!ARM_AM::isT2SOImmTwoPartVal(ImmVal
))
3435 SOImmValV1
= (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal
);
3436 SOImmValV2
= (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal
);
3439 case ARM::t2ORRrr
: NewUseOpc
= ARM::t2ORRri
; break;
3440 case ARM::t2EORrr
: NewUseOpc
= ARM::t2EORri
; break;
3447 unsigned OpIdx
= Commute
? 2 : 1;
3448 Register Reg1
= UseMI
.getOperand(OpIdx
).getReg();
3449 bool isKill
= UseMI
.getOperand(OpIdx
).isKill();
3450 const TargetRegisterClass
*TRC
= MRI
->getRegClass(Reg
);
3451 Register NewReg
= MRI
->createVirtualRegister(TRC
);
3452 BuildMI(*UseMI
.getParent(), UseMI
, UseMI
.getDebugLoc(), get(NewUseOpc
),
3454 .addReg(Reg1
, getKillRegState(isKill
))
3456 .add(predOps(ARMCC::AL
))
3458 UseMI
.setDesc(get(NewUseOpc
));
3459 UseMI
.getOperand(1).setReg(NewReg
);
3460 UseMI
.getOperand(1).setIsKill();
3461 UseMI
.getOperand(2).ChangeToImmediate(SOImmValV2
);
3462 DefMI
.eraseFromParent();
3463 // FIXME: t2ADDrr should be split, as different rulles apply when writing to SP.
3464 // Just as t2ADDri, that was split to [t2ADDri, t2ADDspImm].
3465 // Then the below code will not be needed, as the input/output register
3466 // classes will be rgpr or gprSP.
3467 // For now, we fix the UseMI operand explicitly here:
3469 case ARM::t2ADDspImm
:
3470 case ARM::t2SUBspImm
:
3473 MRI
->constrainRegClass(UseMI
.getOperand(0).getReg(), TRC
);
3478 static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData
*ItinData
,
3479 const MachineInstr
&MI
) {
3480 switch (MI
.getOpcode()) {
3482 const MCInstrDesc
&Desc
= MI
.getDesc();
3483 int UOps
= ItinData
->getNumMicroOps(Desc
.getSchedClass());
3484 assert(UOps
>= 0 && "bad # UOps");
3492 unsigned ShOpVal
= MI
.getOperand(3).getImm();
3493 bool isSub
= ARM_AM::getAM2Op(ShOpVal
) == ARM_AM::sub
;
3494 unsigned ShImm
= ARM_AM::getAM2Offset(ShOpVal
);
3497 ((ShImm
== 1 || ShImm
== 2 || ShImm
== 3) &&
3498 ARM_AM::getAM2ShiftOpc(ShOpVal
) == ARM_AM::lsl
)))
3505 if (!MI
.getOperand(2).getReg())
3508 unsigned ShOpVal
= MI
.getOperand(3).getImm();
3509 bool isSub
= ARM_AM::getAM2Op(ShOpVal
) == ARM_AM::sub
;
3510 unsigned ShImm
= ARM_AM::getAM2Offset(ShOpVal
);
3513 ((ShImm
== 1 || ShImm
== 2 || ShImm
== 3) &&
3514 ARM_AM::getAM2ShiftOpc(ShOpVal
) == ARM_AM::lsl
)))
3521 return (ARM_AM::getAM3Op(MI
.getOperand(3).getImm()) == ARM_AM::sub
) ? 3 : 2;
3523 case ARM::LDRSB_POST
:
3524 case ARM::LDRSH_POST
: {
3525 Register Rt
= MI
.getOperand(0).getReg();
3526 Register Rm
= MI
.getOperand(3).getReg();
3527 return (Rt
== Rm
) ? 4 : 3;
3530 case ARM::LDR_PRE_REG
:
3531 case ARM::LDRB_PRE_REG
: {
3532 Register Rt
= MI
.getOperand(0).getReg();
3533 Register Rm
= MI
.getOperand(3).getReg();
3536 unsigned ShOpVal
= MI
.getOperand(4).getImm();
3537 bool isSub
= ARM_AM::getAM2Op(ShOpVal
) == ARM_AM::sub
;
3538 unsigned ShImm
= ARM_AM::getAM2Offset(ShOpVal
);
3541 ((ShImm
== 1 || ShImm
== 2 || ShImm
== 3) &&
3542 ARM_AM::getAM2ShiftOpc(ShOpVal
) == ARM_AM::lsl
)))
3547 case ARM::STR_PRE_REG
:
3548 case ARM::STRB_PRE_REG
: {
3549 unsigned ShOpVal
= MI
.getOperand(4).getImm();
3550 bool isSub
= ARM_AM::getAM2Op(ShOpVal
) == ARM_AM::sub
;
3551 unsigned ShImm
= ARM_AM::getAM2Offset(ShOpVal
);
3554 ((ShImm
== 1 || ShImm
== 2 || ShImm
== 3) &&
3555 ARM_AM::getAM2ShiftOpc(ShOpVal
) == ARM_AM::lsl
)))
3561 case ARM::STRH_PRE
: {
3562 Register Rt
= MI
.getOperand(0).getReg();
3563 Register Rm
= MI
.getOperand(3).getReg();
3568 return (ARM_AM::getAM3Op(MI
.getOperand(4).getImm()) == ARM_AM::sub
) ? 3 : 2;
3571 case ARM::LDR_POST_REG
:
3572 case ARM::LDRB_POST_REG
:
3573 case ARM::LDRH_POST
: {
3574 Register Rt
= MI
.getOperand(0).getReg();
3575 Register Rm
= MI
.getOperand(3).getReg();
3576 return (Rt
== Rm
) ? 3 : 2;
3579 case ARM::LDR_PRE_IMM
:
3580 case ARM::LDRB_PRE_IMM
:
3581 case ARM::LDR_POST_IMM
:
3582 case ARM::LDRB_POST_IMM
:
3583 case ARM::STRB_POST_IMM
:
3584 case ARM::STRB_POST_REG
:
3585 case ARM::STRB_PRE_IMM
:
3586 case ARM::STRH_POST
:
3587 case ARM::STR_POST_IMM
:
3588 case ARM::STR_POST_REG
:
3589 case ARM::STR_PRE_IMM
:
3592 case ARM::LDRSB_PRE
:
3593 case ARM::LDRSH_PRE
: {
3594 Register Rm
= MI
.getOperand(3).getReg();
3597 Register Rt
= MI
.getOperand(0).getReg();
3600 unsigned ShOpVal
= MI
.getOperand(4).getImm();
3601 bool isSub
= ARM_AM::getAM2Op(ShOpVal
) == ARM_AM::sub
;
3602 unsigned ShImm
= ARM_AM::getAM2Offset(ShOpVal
);
3605 ((ShImm
== 1 || ShImm
== 2 || ShImm
== 3) &&
3606 ARM_AM::getAM2ShiftOpc(ShOpVal
) == ARM_AM::lsl
)))
3612 Register Rt
= MI
.getOperand(0).getReg();
3613 Register Rn
= MI
.getOperand(2).getReg();
3614 Register Rm
= MI
.getOperand(3).getReg();
3616 return (ARM_AM::getAM3Op(MI
.getOperand(4).getImm()) == ARM_AM::sub
) ? 4
3618 return (Rt
== Rn
) ? 3 : 2;
3622 Register Rm
= MI
.getOperand(3).getReg();
3624 return (ARM_AM::getAM3Op(MI
.getOperand(4).getImm()) == ARM_AM::sub
) ? 4
3629 case ARM::LDRD_POST
:
3630 case ARM::t2LDRD_POST
:
3633 case ARM::STRD_POST
:
3634 case ARM::t2STRD_POST
:
3637 case ARM::LDRD_PRE
: {
3638 Register Rt
= MI
.getOperand(0).getReg();
3639 Register Rn
= MI
.getOperand(3).getReg();
3640 Register Rm
= MI
.getOperand(4).getReg();
3642 return (ARM_AM::getAM3Op(MI
.getOperand(5).getImm()) == ARM_AM::sub
) ? 5
3644 return (Rt
== Rn
) ? 4 : 3;
3647 case ARM::t2LDRD_PRE
: {
3648 Register Rt
= MI
.getOperand(0).getReg();
3649 Register Rn
= MI
.getOperand(3).getReg();
3650 return (Rt
== Rn
) ? 4 : 3;
3653 case ARM::STRD_PRE
: {
3654 Register Rm
= MI
.getOperand(4).getReg();
3656 return (ARM_AM::getAM3Op(MI
.getOperand(5).getImm()) == ARM_AM::sub
) ? 5
3661 case ARM::t2STRD_PRE
:
3664 case ARM::t2LDR_POST
:
3665 case ARM::t2LDRB_POST
:
3666 case ARM::t2LDRB_PRE
:
3667 case ARM::t2LDRSBi12
:
3668 case ARM::t2LDRSBi8
:
3669 case ARM::t2LDRSBpci
:
3671 case ARM::t2LDRH_POST
:
3672 case ARM::t2LDRH_PRE
:
3674 case ARM::t2LDRSB_POST
:
3675 case ARM::t2LDRSB_PRE
:
3676 case ARM::t2LDRSH_POST
:
3677 case ARM::t2LDRSH_PRE
:
3678 case ARM::t2LDRSHi12
:
3679 case ARM::t2LDRSHi8
:
3680 case ARM::t2LDRSHpci
:
3684 case ARM::t2LDRDi8
: {
3685 Register Rt
= MI
.getOperand(0).getReg();
3686 Register Rn
= MI
.getOperand(2).getReg();
3687 return (Rt
== Rn
) ? 3 : 2;
3690 case ARM::t2STRB_POST
:
3691 case ARM::t2STRB_PRE
:
3694 case ARM::t2STRH_POST
:
3695 case ARM::t2STRH_PRE
:
3697 case ARM::t2STR_POST
:
3698 case ARM::t2STR_PRE
:
3704 // Return the number of 32-bit words loaded by LDM or stored by STM. If this
3705 // can't be easily determined return 0 (missing MachineMemOperand).
3707 // FIXME: The current MachineInstr design does not support relying on machine
3708 // mem operands to determine the width of a memory access. Instead, we expect
3709 // the target to provide this information based on the instruction opcode and
3710 // operands. However, using MachineMemOperand is the best solution now for
3713 // 1) getNumMicroOps tries to infer LDM memory width from the total number of MI
3714 // operands. This is much more dangerous than using the MachineMemOperand
3715 // sizes because CodeGen passes can insert/remove optional machine operands. In
3716 // fact, it's totally incorrect for preRA passes and appears to be wrong for
3717 // postRA passes as well.
3719 // 2) getNumLDMAddresses is only used by the scheduling machine model and any
3720 // machine model that calls this should handle the unknown (zero size) case.
3722 // Long term, we should require a target hook that verifies MachineMemOperand
3723 // sizes during MC lowering. That target hook should be local to MC lowering
3724 // because we can't ensure that it is aware of other MI forms. Doing this will
3725 // ensure that MachineMemOperands are correctly propagated through all passes.
3726 unsigned ARMBaseInstrInfo::getNumLDMAddresses(const MachineInstr
&MI
) const {
3728 for (MachineInstr::mmo_iterator I
= MI
.memoperands_begin(),
3729 E
= MI
.memoperands_end();
3731 Size
+= (*I
)->getSize().getValue();
3733 // FIXME: The scheduler currently can't handle values larger than 16. But
3734 // the values can actually go up to 32 for floating-point load/store
3735 // multiple (VLDMIA etc.). Also, the way this code is reasoning about memory
3736 // operations isn't right; we could end up with "extra" memory operands for
3737 // various reasons, like tail merge merging two memory operations.
3738 return std::min(Size
/ 4, 16U);
3741 static unsigned getNumMicroOpsSingleIssuePlusExtras(unsigned Opc
,
3743 unsigned UOps
= 1 + NumRegs
; // 1 for address computation.
3747 case ARM::VLDMDIA_UPD
:
3748 case ARM::VLDMDDB_UPD
:
3749 case ARM::VLDMSIA_UPD
:
3750 case ARM::VLDMSDB_UPD
:
3751 case ARM::VSTMDIA_UPD
:
3752 case ARM::VSTMDDB_UPD
:
3753 case ARM::VSTMSIA_UPD
:
3754 case ARM::VSTMSDB_UPD
:
3755 case ARM::LDMIA_UPD
:
3756 case ARM::LDMDA_UPD
:
3757 case ARM::LDMDB_UPD
:
3758 case ARM::LDMIB_UPD
:
3759 case ARM::STMIA_UPD
:
3760 case ARM::STMDA_UPD
:
3761 case ARM::STMDB_UPD
:
3762 case ARM::STMIB_UPD
:
3763 case ARM::tLDMIA_UPD
:
3764 case ARM::tSTMIA_UPD
:
3765 case ARM::t2LDMIA_UPD
:
3766 case ARM::t2LDMDB_UPD
:
3767 case ARM::t2STMIA_UPD
:
3768 case ARM::t2STMDB_UPD
:
3769 ++UOps
; // One for base register writeback.
3771 case ARM::LDMIA_RET
:
3773 case ARM::t2LDMIA_RET
:
3774 UOps
+= 2; // One for base reg wb, one for write to pc.
3780 unsigned ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData
*ItinData
,
3781 const MachineInstr
&MI
) const {
3782 if (!ItinData
|| ItinData
->isEmpty())
3785 const MCInstrDesc
&Desc
= MI
.getDesc();
3786 unsigned Class
= Desc
.getSchedClass();
3787 int ItinUOps
= ItinData
->getNumMicroOps(Class
);
3788 if (ItinUOps
>= 0) {
3789 if (Subtarget
.isSwift() && (Desc
.mayLoad() || Desc
.mayStore()))
3790 return getNumMicroOpsSwiftLdSt(ItinData
, MI
);
3795 unsigned Opc
= MI
.getOpcode();
3798 llvm_unreachable("Unexpected multi-uops instruction!");
3803 // The number of uOps for load / store multiple are determined by the number
3806 // On Cortex-A8, each pair of register loads / stores can be scheduled on the
3807 // same cycle. The scheduling for the first load / store must be done
3808 // separately by assuming the address is not 64-bit aligned.
3810 // On Cortex-A9, the formula is simply (#reg / 2) + (#reg % 2). If the address
3811 // is not 64-bit aligned, then AGU would take an extra cycle. For VFP / NEON
3812 // load / store multiple, the formula is (#reg / 2) + (#reg % 2) + 1.
3814 case ARM::VLDMDIA_UPD
:
3815 case ARM::VLDMDDB_UPD
:
3817 case ARM::VLDMSIA_UPD
:
3818 case ARM::VLDMSDB_UPD
:
3820 case ARM::VSTMDIA_UPD
:
3821 case ARM::VSTMDDB_UPD
:
3823 case ARM::VSTMSIA_UPD
:
3824 case ARM::VSTMSDB_UPD
: {
3825 unsigned NumRegs
= MI
.getNumOperands() - Desc
.getNumOperands();
3826 return (NumRegs
/ 2) + (NumRegs
% 2) + 1;
3829 case ARM::LDMIA_RET
:
3834 case ARM::LDMIA_UPD
:
3835 case ARM::LDMDA_UPD
:
3836 case ARM::LDMDB_UPD
:
3837 case ARM::LDMIB_UPD
:
3842 case ARM::STMIA_UPD
:
3843 case ARM::STMDA_UPD
:
3844 case ARM::STMDB_UPD
:
3845 case ARM::STMIB_UPD
:
3847 case ARM::tLDMIA_UPD
:
3848 case ARM::tSTMIA_UPD
:
3852 case ARM::t2LDMIA_RET
:
3855 case ARM::t2LDMIA_UPD
:
3856 case ARM::t2LDMDB_UPD
:
3859 case ARM::t2STMIA_UPD
:
3860 case ARM::t2STMDB_UPD
: {
3861 unsigned NumRegs
= MI
.getNumOperands() - Desc
.getNumOperands() + 1;
3862 switch (Subtarget
.getLdStMultipleTiming()) {
3863 case ARMSubtarget::SingleIssuePlusExtras
:
3864 return getNumMicroOpsSingleIssuePlusExtras(Opc
, NumRegs
);
3865 case ARMSubtarget::SingleIssue
:
3866 // Assume the worst.
3868 case ARMSubtarget::DoubleIssue
: {
3871 // 4 registers would be issued: 2, 2.
3872 // 5 registers would be issued: 2, 2, 1.
3873 unsigned UOps
= (NumRegs
/ 2);
3878 case ARMSubtarget::DoubleIssueCheckUnalignedAccess
: {
3879 unsigned UOps
= (NumRegs
/ 2);
3880 // If there are odd number of registers or if it's not 64-bit aligned,
3881 // then it takes an extra AGU (Address Generation Unit) cycle.
3882 if ((NumRegs
% 2) || !MI
.hasOneMemOperand() ||
3883 (*MI
.memoperands_begin())->getAlign() < Align(8))
3890 llvm_unreachable("Didn't find the number of microops");
3893 std::optional
<unsigned>
3894 ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData
*ItinData
,
3895 const MCInstrDesc
&DefMCID
, unsigned DefClass
,
3896 unsigned DefIdx
, unsigned DefAlign
) const {
3897 int RegNo
= (int)(DefIdx
+1) - DefMCID
.getNumOperands() + 1;
3899 // Def is the address writeback.
3900 return ItinData
->getOperandCycle(DefClass
, DefIdx
);
3903 if (Subtarget
.isCortexA8() || Subtarget
.isCortexA7()) {
3904 // (regno / 2) + (regno % 2) + 1
3905 DefCycle
= RegNo
/ 2 + 1;
3908 } else if (Subtarget
.isLikeA9() || Subtarget
.isSwift()) {
3910 bool isSLoad
= false;
3912 switch (DefMCID
.getOpcode()) {
3915 case ARM::VLDMSIA_UPD
:
3916 case ARM::VLDMSDB_UPD
:
3921 // If there are odd number of 'S' registers or if it's not 64-bit aligned,
3922 // then it takes an extra cycle.
3923 if ((isSLoad
&& (RegNo
% 2)) || DefAlign
< 8)
3926 // Assume the worst.
3927 DefCycle
= RegNo
+ 2;
3933 std::optional
<unsigned>
3934 ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData
*ItinData
,
3935 const MCInstrDesc
&DefMCID
, unsigned DefClass
,
3936 unsigned DefIdx
, unsigned DefAlign
) const {
3937 int RegNo
= (int)(DefIdx
+1) - DefMCID
.getNumOperands() + 1;
3939 // Def is the address writeback.
3940 return ItinData
->getOperandCycle(DefClass
, DefIdx
);
3943 if (Subtarget
.isCortexA8() || Subtarget
.isCortexA7()) {
3944 // 4 registers would be issued: 1, 2, 1.
3945 // 5 registers would be issued: 1, 2, 2.
3946 DefCycle
= RegNo
/ 2;
3949 // Result latency is issue cycle + 2: E2.
3951 } else if (Subtarget
.isLikeA9() || Subtarget
.isSwift()) {
3952 DefCycle
= (RegNo
/ 2);
3953 // If there are odd number of registers or if it's not 64-bit aligned,
3954 // then it takes an extra AGU (Address Generation Unit) cycle.
3955 if ((RegNo
% 2) || DefAlign
< 8)
3957 // Result latency is AGU cycles + 2.
3960 // Assume the worst.
3961 DefCycle
= RegNo
+ 2;
3967 std::optional
<unsigned>
3968 ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData
*ItinData
,
3969 const MCInstrDesc
&UseMCID
, unsigned UseClass
,
3970 unsigned UseIdx
, unsigned UseAlign
) const {
3971 int RegNo
= (int)(UseIdx
+1) - UseMCID
.getNumOperands() + 1;
3973 return ItinData
->getOperandCycle(UseClass
, UseIdx
);
3976 if (Subtarget
.isCortexA8() || Subtarget
.isCortexA7()) {
3977 // (regno / 2) + (regno % 2) + 1
3978 UseCycle
= RegNo
/ 2 + 1;
3981 } else if (Subtarget
.isLikeA9() || Subtarget
.isSwift()) {
3983 bool isSStore
= false;
3985 switch (UseMCID
.getOpcode()) {
3988 case ARM::VSTMSIA_UPD
:
3989 case ARM::VSTMSDB_UPD
:
3994 // If there are odd number of 'S' registers or if it's not 64-bit aligned,
3995 // then it takes an extra cycle.
3996 if ((isSStore
&& (RegNo
% 2)) || UseAlign
< 8)
3999 // Assume the worst.
4000 UseCycle
= RegNo
+ 2;
4006 std::optional
<unsigned>
4007 ARMBaseInstrInfo::getSTMUseCycle(const InstrItineraryData
*ItinData
,
4008 const MCInstrDesc
&UseMCID
, unsigned UseClass
,
4009 unsigned UseIdx
, unsigned UseAlign
) const {
4010 int RegNo
= (int)(UseIdx
+1) - UseMCID
.getNumOperands() + 1;
4012 return ItinData
->getOperandCycle(UseClass
, UseIdx
);
4015 if (Subtarget
.isCortexA8() || Subtarget
.isCortexA7()) {
4016 UseCycle
= RegNo
/ 2;
4021 } else if (Subtarget
.isLikeA9() || Subtarget
.isSwift()) {
4022 UseCycle
= (RegNo
/ 2);
4023 // If there are odd number of registers or if it's not 64-bit aligned,
4024 // then it takes an extra AGU (Address Generation Unit) cycle.
4025 if ((RegNo
% 2) || UseAlign
< 8)
4028 // Assume the worst.
4034 std::optional
<unsigned> ARMBaseInstrInfo::getOperandLatency(
4035 const InstrItineraryData
*ItinData
, const MCInstrDesc
&DefMCID
,
4036 unsigned DefIdx
, unsigned DefAlign
, const MCInstrDesc
&UseMCID
,
4037 unsigned UseIdx
, unsigned UseAlign
) const {
4038 unsigned DefClass
= DefMCID
.getSchedClass();
4039 unsigned UseClass
= UseMCID
.getSchedClass();
4041 if (DefIdx
< DefMCID
.getNumDefs() && UseIdx
< UseMCID
.getNumOperands())
4042 return ItinData
->getOperandLatency(DefClass
, DefIdx
, UseClass
, UseIdx
);
4044 // This may be a def / use of a variable_ops instruction, the operand
4045 // latency might be determinable dynamically. Let the target try to
4047 std::optional
<unsigned> DefCycle
;
4048 bool LdmBypass
= false;
4049 switch (DefMCID
.getOpcode()) {
4051 DefCycle
= ItinData
->getOperandCycle(DefClass
, DefIdx
);
4055 case ARM::VLDMDIA_UPD
:
4056 case ARM::VLDMDDB_UPD
:
4058 case ARM::VLDMSIA_UPD
:
4059 case ARM::VLDMSDB_UPD
:
4060 DefCycle
= getVLDMDefCycle(ItinData
, DefMCID
, DefClass
, DefIdx
, DefAlign
);
4063 case ARM::LDMIA_RET
:
4068 case ARM::LDMIA_UPD
:
4069 case ARM::LDMDA_UPD
:
4070 case ARM::LDMDB_UPD
:
4071 case ARM::LDMIB_UPD
:
4073 case ARM::tLDMIA_UPD
:
4075 case ARM::t2LDMIA_RET
:
4078 case ARM::t2LDMIA_UPD
:
4079 case ARM::t2LDMDB_UPD
:
4081 DefCycle
= getLDMDefCycle(ItinData
, DefMCID
, DefClass
, DefIdx
, DefAlign
);
4086 // We can't seem to determine the result latency of the def, assume it's 2.
4089 std::optional
<unsigned> UseCycle
;
4090 switch (UseMCID
.getOpcode()) {
4092 UseCycle
= ItinData
->getOperandCycle(UseClass
, UseIdx
);
4096 case ARM::VSTMDIA_UPD
:
4097 case ARM::VSTMDDB_UPD
:
4099 case ARM::VSTMSIA_UPD
:
4100 case ARM::VSTMSDB_UPD
:
4101 UseCycle
= getVSTMUseCycle(ItinData
, UseMCID
, UseClass
, UseIdx
, UseAlign
);
4108 case ARM::STMIA_UPD
:
4109 case ARM::STMDA_UPD
:
4110 case ARM::STMDB_UPD
:
4111 case ARM::STMIB_UPD
:
4112 case ARM::tSTMIA_UPD
:
4117 case ARM::t2STMIA_UPD
:
4118 case ARM::t2STMDB_UPD
:
4119 UseCycle
= getSTMUseCycle(ItinData
, UseMCID
, UseClass
, UseIdx
, UseAlign
);
4124 // Assume it's read in the first stage.
4127 if (UseCycle
> *DefCycle
+ 1)
4128 return std::nullopt
;
4130 UseCycle
= *DefCycle
- *UseCycle
+ 1;
4131 if (UseCycle
> 0u) {
4133 // It's a variable_ops instruction so we can't use DefIdx here. Just use
4134 // first def operand.
4135 if (ItinData
->hasPipelineForwarding(DefClass
, DefMCID
.getNumOperands()-1,
4137 UseCycle
= *UseCycle
- 1;
4138 } else if (ItinData
->hasPipelineForwarding(DefClass
, DefIdx
,
4139 UseClass
, UseIdx
)) {
4140 UseCycle
= *UseCycle
- 1;
4147 static const MachineInstr
*getBundledDefMI(const TargetRegisterInfo
*TRI
,
4148 const MachineInstr
*MI
, unsigned Reg
,
4149 unsigned &DefIdx
, unsigned &Dist
) {
4152 MachineBasicBlock::const_iterator I
= MI
; ++I
;
4153 MachineBasicBlock::const_instr_iterator II
= std::prev(I
.getInstrIterator());
4154 assert(II
->isInsideBundle() && "Empty bundle?");
4157 while (II
->isInsideBundle()) {
4158 Idx
= II
->findRegisterDefOperandIdx(Reg
, TRI
, false, true);
4165 assert(Idx
!= -1 && "Cannot find bundled definition!");
4170 static const MachineInstr
*getBundledUseMI(const TargetRegisterInfo
*TRI
,
4171 const MachineInstr
&MI
, unsigned Reg
,
4172 unsigned &UseIdx
, unsigned &Dist
) {
4175 MachineBasicBlock::const_instr_iterator II
= ++MI
.getIterator();
4176 assert(II
->isInsideBundle() && "Empty bundle?");
4177 MachineBasicBlock::const_instr_iterator E
= MI
.getParent()->instr_end();
4179 // FIXME: This doesn't properly handle multiple uses.
4181 while (II
!= E
&& II
->isInsideBundle()) {
4182 Idx
= II
->findRegisterUseOperandIdx(Reg
, TRI
, false);
4185 if (II
->getOpcode() != ARM::t2IT
)
4199 /// Return the number of cycles to add to (or subtract from) the static
4200 /// itinerary based on the def opcode and alignment. The caller will ensure that
4201 /// adjusted latency is at least one cycle.
4202 static int adjustDefLatency(const ARMSubtarget
&Subtarget
,
4203 const MachineInstr
&DefMI
,
4204 const MCInstrDesc
&DefMCID
, unsigned DefAlign
) {
4206 if (Subtarget
.isCortexA8() || Subtarget
.isLikeA9() || Subtarget
.isCortexA7()) {
4207 // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
4208 // variants are one cycle cheaper.
4209 switch (DefMCID
.getOpcode()) {
4213 unsigned ShOpVal
= DefMI
.getOperand(3).getImm();
4214 unsigned ShImm
= ARM_AM::getAM2Offset(ShOpVal
);
4216 (ShImm
== 2 && ARM_AM::getAM2ShiftOpc(ShOpVal
) == ARM_AM::lsl
))
4223 case ARM::t2LDRSHs
: {
4224 // Thumb2 mode: lsl only.
4225 unsigned ShAmt
= DefMI
.getOperand(3).getImm();
4226 if (ShAmt
== 0 || ShAmt
== 2)
4231 } else if (Subtarget
.isSwift()) {
4232 // FIXME: Properly handle all of the latency adjustments for address
4234 switch (DefMCID
.getOpcode()) {
4238 unsigned ShOpVal
= DefMI
.getOperand(3).getImm();
4239 bool isSub
= ARM_AM::getAM2Op(ShOpVal
) == ARM_AM::sub
;
4240 unsigned ShImm
= ARM_AM::getAM2Offset(ShOpVal
);
4243 ((ShImm
== 1 || ShImm
== 2 || ShImm
== 3) &&
4244 ARM_AM::getAM2ShiftOpc(ShOpVal
) == ARM_AM::lsl
)))
4247 ShImm
== 1 && ARM_AM::getAM2ShiftOpc(ShOpVal
) == ARM_AM::lsr
)
4254 case ARM::t2LDRSHs
: {
4255 // Thumb2 mode: lsl only.
4256 unsigned ShAmt
= DefMI
.getOperand(3).getImm();
4257 if (ShAmt
== 0 || ShAmt
== 1 || ShAmt
== 2 || ShAmt
== 3)
4264 if (DefAlign
< 8 && Subtarget
.checkVLDnAccessAlignment()) {
4265 switch (DefMCID
.getOpcode()) {
4271 case ARM::VLD1q8wb_fixed
:
4272 case ARM::VLD1q16wb_fixed
:
4273 case ARM::VLD1q32wb_fixed
:
4274 case ARM::VLD1q64wb_fixed
:
4275 case ARM::VLD1q8wb_register
:
4276 case ARM::VLD1q16wb_register
:
4277 case ARM::VLD1q32wb_register
:
4278 case ARM::VLD1q64wb_register
:
4285 case ARM::VLD2d8wb_fixed
:
4286 case ARM::VLD2d16wb_fixed
:
4287 case ARM::VLD2d32wb_fixed
:
4288 case ARM::VLD2q8wb_fixed
:
4289 case ARM::VLD2q16wb_fixed
:
4290 case ARM::VLD2q32wb_fixed
:
4291 case ARM::VLD2d8wb_register
:
4292 case ARM::VLD2d16wb_register
:
4293 case ARM::VLD2d32wb_register
:
4294 case ARM::VLD2q8wb_register
:
4295 case ARM::VLD2q16wb_register
:
4296 case ARM::VLD2q32wb_register
:
4301 case ARM::VLD3d8_UPD
:
4302 case ARM::VLD3d16_UPD
:
4303 case ARM::VLD3d32_UPD
:
4304 case ARM::VLD1d64Twb_fixed
:
4305 case ARM::VLD1d64Twb_register
:
4306 case ARM::VLD3q8_UPD
:
4307 case ARM::VLD3q16_UPD
:
4308 case ARM::VLD3q32_UPD
:
4313 case ARM::VLD4d8_UPD
:
4314 case ARM::VLD4d16_UPD
:
4315 case ARM::VLD4d32_UPD
:
4316 case ARM::VLD1d64Qwb_fixed
:
4317 case ARM::VLD1d64Qwb_register
:
4318 case ARM::VLD4q8_UPD
:
4319 case ARM::VLD4q16_UPD
:
4320 case ARM::VLD4q32_UPD
:
4321 case ARM::VLD1DUPq8
:
4322 case ARM::VLD1DUPq16
:
4323 case ARM::VLD1DUPq32
:
4324 case ARM::VLD1DUPq8wb_fixed
:
4325 case ARM::VLD1DUPq16wb_fixed
:
4326 case ARM::VLD1DUPq32wb_fixed
:
4327 case ARM::VLD1DUPq8wb_register
:
4328 case ARM::VLD1DUPq16wb_register
:
4329 case ARM::VLD1DUPq32wb_register
:
4330 case ARM::VLD2DUPd8
:
4331 case ARM::VLD2DUPd16
:
4332 case ARM::VLD2DUPd32
:
4333 case ARM::VLD2DUPd8wb_fixed
:
4334 case ARM::VLD2DUPd16wb_fixed
:
4335 case ARM::VLD2DUPd32wb_fixed
:
4336 case ARM::VLD2DUPd8wb_register
:
4337 case ARM::VLD2DUPd16wb_register
:
4338 case ARM::VLD2DUPd32wb_register
:
4339 case ARM::VLD4DUPd8
:
4340 case ARM::VLD4DUPd16
:
4341 case ARM::VLD4DUPd32
:
4342 case ARM::VLD4DUPd8_UPD
:
4343 case ARM::VLD4DUPd16_UPD
:
4344 case ARM::VLD4DUPd32_UPD
:
4346 case ARM::VLD1LNd16
:
4347 case ARM::VLD1LNd32
:
4348 case ARM::VLD1LNd8_UPD
:
4349 case ARM::VLD1LNd16_UPD
:
4350 case ARM::VLD1LNd32_UPD
:
4352 case ARM::VLD2LNd16
:
4353 case ARM::VLD2LNd32
:
4354 case ARM::VLD2LNq16
:
4355 case ARM::VLD2LNq32
:
4356 case ARM::VLD2LNd8_UPD
:
4357 case ARM::VLD2LNd16_UPD
:
4358 case ARM::VLD2LNd32_UPD
:
4359 case ARM::VLD2LNq16_UPD
:
4360 case ARM::VLD2LNq32_UPD
:
4362 case ARM::VLD4LNd16
:
4363 case ARM::VLD4LNd32
:
4364 case ARM::VLD4LNq16
:
4365 case ARM::VLD4LNq32
:
4366 case ARM::VLD4LNd8_UPD
:
4367 case ARM::VLD4LNd16_UPD
:
4368 case ARM::VLD4LNd32_UPD
:
4369 case ARM::VLD4LNq16_UPD
:
4370 case ARM::VLD4LNq32_UPD
:
4371 // If the address is not 64-bit aligned, the latencies of these
4372 // instructions increases by one.
4380 std::optional
<unsigned> ARMBaseInstrInfo::getOperandLatency(
4381 const InstrItineraryData
*ItinData
, const MachineInstr
&DefMI
,
4382 unsigned DefIdx
, const MachineInstr
&UseMI
, unsigned UseIdx
) const {
4383 // No operand latency. The caller may fall back to getInstrLatency.
4384 if (!ItinData
|| ItinData
->isEmpty())
4385 return std::nullopt
;
4387 const MachineOperand
&DefMO
= DefMI
.getOperand(DefIdx
);
4388 Register Reg
= DefMO
.getReg();
4390 const MachineInstr
*ResolvedDefMI
= &DefMI
;
4391 unsigned DefAdj
= 0;
4392 if (DefMI
.isBundle())
4394 getBundledDefMI(&getRegisterInfo(), &DefMI
, Reg
, DefIdx
, DefAdj
);
4395 if (ResolvedDefMI
->isCopyLike() || ResolvedDefMI
->isInsertSubreg() ||
4396 ResolvedDefMI
->isRegSequence() || ResolvedDefMI
->isImplicitDef()) {
4400 const MachineInstr
*ResolvedUseMI
= &UseMI
;
4401 unsigned UseAdj
= 0;
4402 if (UseMI
.isBundle()) {
4404 getBundledUseMI(&getRegisterInfo(), UseMI
, Reg
, UseIdx
, UseAdj
);
4406 return std::nullopt
;
4409 return getOperandLatencyImpl(
4410 ItinData
, *ResolvedDefMI
, DefIdx
, ResolvedDefMI
->getDesc(), DefAdj
, DefMO
,
4411 Reg
, *ResolvedUseMI
, UseIdx
, ResolvedUseMI
->getDesc(), UseAdj
);
4414 std::optional
<unsigned> ARMBaseInstrInfo::getOperandLatencyImpl(
4415 const InstrItineraryData
*ItinData
, const MachineInstr
&DefMI
,
4416 unsigned DefIdx
, const MCInstrDesc
&DefMCID
, unsigned DefAdj
,
4417 const MachineOperand
&DefMO
, unsigned Reg
, const MachineInstr
&UseMI
,
4418 unsigned UseIdx
, const MCInstrDesc
&UseMCID
, unsigned UseAdj
) const {
4419 if (Reg
== ARM::CPSR
) {
4420 if (DefMI
.getOpcode() == ARM::FMSTAT
) {
4421 // fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?)
4422 return Subtarget
.isLikeA9() ? 1 : 20;
4425 // CPSR set and branch can be paired in the same cycle.
4426 if (UseMI
.isBranch())
4429 // Otherwise it takes the instruction latency (generally one).
4430 unsigned Latency
= getInstrLatency(ItinData
, DefMI
);
4432 // For Thumb2 and -Os, prefer scheduling CPSR setting instruction close to
4433 // its uses. Instructions which are otherwise scheduled between them may
4434 // incur a code size penalty (not able to use the CPSR setting 16-bit
4436 if (Latency
> 0 && Subtarget
.isThumb2()) {
4437 const MachineFunction
*MF
= DefMI
.getParent()->getParent();
4438 // FIXME: Use Function::hasOptSize().
4439 if (MF
->getFunction().hasFnAttribute(Attribute::OptimizeForSize
))
4445 if (DefMO
.isImplicit() || UseMI
.getOperand(UseIdx
).isImplicit())
4446 return std::nullopt
;
4448 unsigned DefAlign
= DefMI
.hasOneMemOperand()
4449 ? (*DefMI
.memoperands_begin())->getAlign().value()
4451 unsigned UseAlign
= UseMI
.hasOneMemOperand()
4452 ? (*UseMI
.memoperands_begin())->getAlign().value()
4455 // Get the itinerary's latency if possible, and handle variable_ops.
4456 std::optional
<unsigned> Latency
= getOperandLatency(
4457 ItinData
, DefMCID
, DefIdx
, DefAlign
, UseMCID
, UseIdx
, UseAlign
);
4458 // Unable to find operand latency. The caller may resort to getInstrLatency.
4460 return std::nullopt
;
4462 // Adjust for IT block position.
4463 int Adj
= DefAdj
+ UseAdj
;
4465 // Adjust for dynamic def-side opcode variants not captured by the itinerary.
4466 Adj
+= adjustDefLatency(Subtarget
, DefMI
, DefMCID
, DefAlign
);
4467 if (Adj
>= 0 || (int)*Latency
> -Adj
) {
4468 return *Latency
+ Adj
;
4470 // Return the itinerary latency, which may be zero but not less than zero.
4474 std::optional
<unsigned>
4475 ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData
*ItinData
,
4476 SDNode
*DefNode
, unsigned DefIdx
,
4477 SDNode
*UseNode
, unsigned UseIdx
) const {
4478 if (!DefNode
->isMachineOpcode())
4481 const MCInstrDesc
&DefMCID
= get(DefNode
->getMachineOpcode());
4483 if (isZeroCost(DefMCID
.Opcode
))
4486 if (!ItinData
|| ItinData
->isEmpty())
4487 return DefMCID
.mayLoad() ? 3 : 1;
4489 if (!UseNode
->isMachineOpcode()) {
4490 std::optional
<unsigned> Latency
=
4491 ItinData
->getOperandCycle(DefMCID
.getSchedClass(), DefIdx
);
4492 int Adj
= Subtarget
.getPreISelOperandLatencyAdjustment();
4493 int Threshold
= 1 + Adj
;
4494 return !Latency
|| Latency
<= (unsigned)Threshold
? 1 : *Latency
- Adj
;
4497 const MCInstrDesc
&UseMCID
= get(UseNode
->getMachineOpcode());
4498 auto *DefMN
= cast
<MachineSDNode
>(DefNode
);
4499 unsigned DefAlign
= !DefMN
->memoperands_empty()
4500 ? (*DefMN
->memoperands_begin())->getAlign().value()
4502 auto *UseMN
= cast
<MachineSDNode
>(UseNode
);
4503 unsigned UseAlign
= !UseMN
->memoperands_empty()
4504 ? (*UseMN
->memoperands_begin())->getAlign().value()
4506 std::optional
<unsigned> Latency
= getOperandLatency(
4507 ItinData
, DefMCID
, DefIdx
, DefAlign
, UseMCID
, UseIdx
, UseAlign
);
4509 return std::nullopt
;
4512 (Subtarget
.isCortexA8() || Subtarget
.isLikeA9() ||
4513 Subtarget
.isCortexA7())) {
4514 // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
4515 // variants are one cycle cheaper.
4516 switch (DefMCID
.getOpcode()) {
4520 unsigned ShOpVal
= DefNode
->getConstantOperandVal(2);
4521 unsigned ShImm
= ARM_AM::getAM2Offset(ShOpVal
);
4523 (ShImm
== 2 && ARM_AM::getAM2ShiftOpc(ShOpVal
) == ARM_AM::lsl
))
4524 Latency
= *Latency
- 1;
4530 case ARM::t2LDRSHs
: {
4531 // Thumb2 mode: lsl only.
4532 unsigned ShAmt
= DefNode
->getConstantOperandVal(2);
4533 if (ShAmt
== 0 || ShAmt
== 2)
4534 Latency
= *Latency
- 1;
4538 } else if (DefIdx
== 0 && Latency
> 2U && Subtarget
.isSwift()) {
4539 // FIXME: Properly handle all of the latency adjustments for address
4541 switch (DefMCID
.getOpcode()) {
4545 unsigned ShOpVal
= DefNode
->getConstantOperandVal(2);
4546 unsigned ShImm
= ARM_AM::getAM2Offset(ShOpVal
);
4548 ((ShImm
== 1 || ShImm
== 2 || ShImm
== 3) &&
4549 ARM_AM::getAM2ShiftOpc(ShOpVal
) == ARM_AM::lsl
))
4550 Latency
= *Latency
- 2;
4551 else if (ShImm
== 1 && ARM_AM::getAM2ShiftOpc(ShOpVal
) == ARM_AM::lsr
)
4552 Latency
= *Latency
- 1;
4559 // Thumb2 mode: lsl 0-3 only.
4560 Latency
= *Latency
- 2;
4565 if (DefAlign
< 8 && Subtarget
.checkVLDnAccessAlignment())
4566 switch (DefMCID
.getOpcode()) {
4572 case ARM::VLD1q8wb_register
:
4573 case ARM::VLD1q16wb_register
:
4574 case ARM::VLD1q32wb_register
:
4575 case ARM::VLD1q64wb_register
:
4576 case ARM::VLD1q8wb_fixed
:
4577 case ARM::VLD1q16wb_fixed
:
4578 case ARM::VLD1q32wb_fixed
:
4579 case ARM::VLD1q64wb_fixed
:
4583 case ARM::VLD2q8Pseudo
:
4584 case ARM::VLD2q16Pseudo
:
4585 case ARM::VLD2q32Pseudo
:
4586 case ARM::VLD2d8wb_fixed
:
4587 case ARM::VLD2d16wb_fixed
:
4588 case ARM::VLD2d32wb_fixed
:
4589 case ARM::VLD2q8PseudoWB_fixed
:
4590 case ARM::VLD2q16PseudoWB_fixed
:
4591 case ARM::VLD2q32PseudoWB_fixed
:
4592 case ARM::VLD2d8wb_register
:
4593 case ARM::VLD2d16wb_register
:
4594 case ARM::VLD2d32wb_register
:
4595 case ARM::VLD2q8PseudoWB_register
:
4596 case ARM::VLD2q16PseudoWB_register
:
4597 case ARM::VLD2q32PseudoWB_register
:
4598 case ARM::VLD3d8Pseudo
:
4599 case ARM::VLD3d16Pseudo
:
4600 case ARM::VLD3d32Pseudo
:
4601 case ARM::VLD1d8TPseudo
:
4602 case ARM::VLD1d16TPseudo
:
4603 case ARM::VLD1d32TPseudo
:
4604 case ARM::VLD1d64TPseudo
:
4605 case ARM::VLD1d64TPseudoWB_fixed
:
4606 case ARM::VLD1d64TPseudoWB_register
:
4607 case ARM::VLD3d8Pseudo_UPD
:
4608 case ARM::VLD3d16Pseudo_UPD
:
4609 case ARM::VLD3d32Pseudo_UPD
:
4610 case ARM::VLD3q8Pseudo_UPD
:
4611 case ARM::VLD3q16Pseudo_UPD
:
4612 case ARM::VLD3q32Pseudo_UPD
:
4613 case ARM::VLD3q8oddPseudo
:
4614 case ARM::VLD3q16oddPseudo
:
4615 case ARM::VLD3q32oddPseudo
:
4616 case ARM::VLD3q8oddPseudo_UPD
:
4617 case ARM::VLD3q16oddPseudo_UPD
:
4618 case ARM::VLD3q32oddPseudo_UPD
:
4619 case ARM::VLD4d8Pseudo
:
4620 case ARM::VLD4d16Pseudo
:
4621 case ARM::VLD4d32Pseudo
:
4622 case ARM::VLD1d8QPseudo
:
4623 case ARM::VLD1d16QPseudo
:
4624 case ARM::VLD1d32QPseudo
:
4625 case ARM::VLD1d64QPseudo
:
4626 case ARM::VLD1d64QPseudoWB_fixed
:
4627 case ARM::VLD1d64QPseudoWB_register
:
4628 case ARM::VLD1q8HighQPseudo
:
4629 case ARM::VLD1q8LowQPseudo_UPD
:
4630 case ARM::VLD1q8HighTPseudo
:
4631 case ARM::VLD1q8LowTPseudo_UPD
:
4632 case ARM::VLD1q16HighQPseudo
:
4633 case ARM::VLD1q16LowQPseudo_UPD
:
4634 case ARM::VLD1q16HighTPseudo
:
4635 case ARM::VLD1q16LowTPseudo_UPD
:
4636 case ARM::VLD1q32HighQPseudo
:
4637 case ARM::VLD1q32LowQPseudo_UPD
:
4638 case ARM::VLD1q32HighTPseudo
:
4639 case ARM::VLD1q32LowTPseudo_UPD
:
4640 case ARM::VLD1q64HighQPseudo
:
4641 case ARM::VLD1q64LowQPseudo_UPD
:
4642 case ARM::VLD1q64HighTPseudo
:
4643 case ARM::VLD1q64LowTPseudo_UPD
:
4644 case ARM::VLD4d8Pseudo_UPD
:
4645 case ARM::VLD4d16Pseudo_UPD
:
4646 case ARM::VLD4d32Pseudo_UPD
:
4647 case ARM::VLD4q8Pseudo_UPD
:
4648 case ARM::VLD4q16Pseudo_UPD
:
4649 case ARM::VLD4q32Pseudo_UPD
:
4650 case ARM::VLD4q8oddPseudo
:
4651 case ARM::VLD4q16oddPseudo
:
4652 case ARM::VLD4q32oddPseudo
:
4653 case ARM::VLD4q8oddPseudo_UPD
:
4654 case ARM::VLD4q16oddPseudo_UPD
:
4655 case ARM::VLD4q32oddPseudo_UPD
:
4656 case ARM::VLD1DUPq8
:
4657 case ARM::VLD1DUPq16
:
4658 case ARM::VLD1DUPq32
:
4659 case ARM::VLD1DUPq8wb_fixed
:
4660 case ARM::VLD1DUPq16wb_fixed
:
4661 case ARM::VLD1DUPq32wb_fixed
:
4662 case ARM::VLD1DUPq8wb_register
:
4663 case ARM::VLD1DUPq16wb_register
:
4664 case ARM::VLD1DUPq32wb_register
:
4665 case ARM::VLD2DUPd8
:
4666 case ARM::VLD2DUPd16
:
4667 case ARM::VLD2DUPd32
:
4668 case ARM::VLD2DUPd8wb_fixed
:
4669 case ARM::VLD2DUPd16wb_fixed
:
4670 case ARM::VLD2DUPd32wb_fixed
:
4671 case ARM::VLD2DUPd8wb_register
:
4672 case ARM::VLD2DUPd16wb_register
:
4673 case ARM::VLD2DUPd32wb_register
:
4674 case ARM::VLD2DUPq8EvenPseudo
:
4675 case ARM::VLD2DUPq8OddPseudo
:
4676 case ARM::VLD2DUPq16EvenPseudo
:
4677 case ARM::VLD2DUPq16OddPseudo
:
4678 case ARM::VLD2DUPq32EvenPseudo
:
4679 case ARM::VLD2DUPq32OddPseudo
:
4680 case ARM::VLD3DUPq8EvenPseudo
:
4681 case ARM::VLD3DUPq8OddPseudo
:
4682 case ARM::VLD3DUPq16EvenPseudo
:
4683 case ARM::VLD3DUPq16OddPseudo
:
4684 case ARM::VLD3DUPq32EvenPseudo
:
4685 case ARM::VLD3DUPq32OddPseudo
:
4686 case ARM::VLD4DUPd8Pseudo
:
4687 case ARM::VLD4DUPd16Pseudo
:
4688 case ARM::VLD4DUPd32Pseudo
:
4689 case ARM::VLD4DUPd8Pseudo_UPD
:
4690 case ARM::VLD4DUPd16Pseudo_UPD
:
4691 case ARM::VLD4DUPd32Pseudo_UPD
:
4692 case ARM::VLD4DUPq8EvenPseudo
:
4693 case ARM::VLD4DUPq8OddPseudo
:
4694 case ARM::VLD4DUPq16EvenPseudo
:
4695 case ARM::VLD4DUPq16OddPseudo
:
4696 case ARM::VLD4DUPq32EvenPseudo
:
4697 case ARM::VLD4DUPq32OddPseudo
:
4698 case ARM::VLD1LNq8Pseudo
:
4699 case ARM::VLD1LNq16Pseudo
:
4700 case ARM::VLD1LNq32Pseudo
:
4701 case ARM::VLD1LNq8Pseudo_UPD
:
4702 case ARM::VLD1LNq16Pseudo_UPD
:
4703 case ARM::VLD1LNq32Pseudo_UPD
:
4704 case ARM::VLD2LNd8Pseudo
:
4705 case ARM::VLD2LNd16Pseudo
:
4706 case ARM::VLD2LNd32Pseudo
:
4707 case ARM::VLD2LNq16Pseudo
:
4708 case ARM::VLD2LNq32Pseudo
:
4709 case ARM::VLD2LNd8Pseudo_UPD
:
4710 case ARM::VLD2LNd16Pseudo_UPD
:
4711 case ARM::VLD2LNd32Pseudo_UPD
:
4712 case ARM::VLD2LNq16Pseudo_UPD
:
4713 case ARM::VLD2LNq32Pseudo_UPD
:
4714 case ARM::VLD4LNd8Pseudo
:
4715 case ARM::VLD4LNd16Pseudo
:
4716 case ARM::VLD4LNd32Pseudo
:
4717 case ARM::VLD4LNq16Pseudo
:
4718 case ARM::VLD4LNq32Pseudo
:
4719 case ARM::VLD4LNd8Pseudo_UPD
:
4720 case ARM::VLD4LNd16Pseudo_UPD
:
4721 case ARM::VLD4LNd32Pseudo_UPD
:
4722 case ARM::VLD4LNq16Pseudo_UPD
:
4723 case ARM::VLD4LNq32Pseudo_UPD
:
4724 // If the address is not 64-bit aligned, the latencies of these
4725 // instructions increases by one.
4726 Latency
= *Latency
+ 1;
4733 unsigned ARMBaseInstrInfo::getPredicationCost(const MachineInstr
&MI
) const {
4734 if (MI
.isCopyLike() || MI
.isInsertSubreg() || MI
.isRegSequence() ||
4741 const MCInstrDesc
&MCID
= MI
.getDesc();
4743 if (MCID
.isCall() || (MCID
.hasImplicitDefOfPhysReg(ARM::CPSR
) &&
4744 !Subtarget
.cheapPredicableCPSRDef())) {
4745 // When predicated, CPSR is an additional source operand for CPSR updating
4746 // instructions, this apparently increases their latencies.
4752 unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData
*ItinData
,
4753 const MachineInstr
&MI
,
4754 unsigned *PredCost
) const {
4755 if (MI
.isCopyLike() || MI
.isInsertSubreg() || MI
.isRegSequence() ||
4759 // An instruction scheduler typically runs on unbundled instructions, however
4760 // other passes may query the latency of a bundled instruction.
4761 if (MI
.isBundle()) {
4762 unsigned Latency
= 0;
4763 MachineBasicBlock::const_instr_iterator I
= MI
.getIterator();
4764 MachineBasicBlock::const_instr_iterator E
= MI
.getParent()->instr_end();
4765 while (++I
!= E
&& I
->isInsideBundle()) {
4766 if (I
->getOpcode() != ARM::t2IT
)
4767 Latency
+= getInstrLatency(ItinData
, *I
, PredCost
);
4772 const MCInstrDesc
&MCID
= MI
.getDesc();
4773 if (PredCost
&& (MCID
.isCall() || (MCID
.hasImplicitDefOfPhysReg(ARM::CPSR
) &&
4774 !Subtarget
.cheapPredicableCPSRDef()))) {
4775 // When predicated, CPSR is an additional source operand for CPSR updating
4776 // instructions, this apparently increases their latencies.
4779 // Be sure to call getStageLatency for an empty itinerary in case it has a
4780 // valid MinLatency property.
4782 return MI
.mayLoad() ? 3 : 1;
4784 unsigned Class
= MCID
.getSchedClass();
4786 // For instructions with variable uops, use uops as latency.
4787 if (!ItinData
->isEmpty() && ItinData
->getNumMicroOps(Class
) < 0)
4788 return getNumMicroOps(ItinData
, MI
);
4790 // For the common case, fall back on the itinerary's latency.
4791 unsigned Latency
= ItinData
->getStageLatency(Class
);
4793 // Adjust for dynamic def-side opcode variants not captured by the itinerary.
4795 MI
.hasOneMemOperand() ? (*MI
.memoperands_begin())->getAlign().value() : 0;
4796 int Adj
= adjustDefLatency(Subtarget
, MI
, MCID
, DefAlign
);
4797 if (Adj
>= 0 || (int)Latency
> -Adj
) {
4798 return Latency
+ Adj
;
4803 unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData
*ItinData
,
4804 SDNode
*Node
) const {
4805 if (!Node
->isMachineOpcode())
4808 if (!ItinData
|| ItinData
->isEmpty())
4811 unsigned Opcode
= Node
->getMachineOpcode();
4814 return ItinData
->getStageLatency(get(Opcode
).getSchedClass());
4821 bool ARMBaseInstrInfo::hasHighOperandLatency(const TargetSchedModel
&SchedModel
,
4822 const MachineRegisterInfo
*MRI
,
4823 const MachineInstr
&DefMI
,
4825 const MachineInstr
&UseMI
,
4826 unsigned UseIdx
) const {
4827 unsigned DDomain
= DefMI
.getDesc().TSFlags
& ARMII::DomainMask
;
4828 unsigned UDomain
= UseMI
.getDesc().TSFlags
& ARMII::DomainMask
;
4829 if (Subtarget
.nonpipelinedVFP() &&
4830 (DDomain
== ARMII::DomainVFP
|| UDomain
== ARMII::DomainVFP
))
4833 // Hoist VFP / NEON instructions with 4 or higher latency.
4835 SchedModel
.computeOperandLatency(&DefMI
, DefIdx
, &UseMI
, UseIdx
);
4838 return DDomain
== ARMII::DomainVFP
|| DDomain
== ARMII::DomainNEON
||
4839 UDomain
== ARMII::DomainVFP
|| UDomain
== ARMII::DomainNEON
;
4842 bool ARMBaseInstrInfo::hasLowDefLatency(const TargetSchedModel
&SchedModel
,
4843 const MachineInstr
&DefMI
,
4844 unsigned DefIdx
) const {
4845 const InstrItineraryData
*ItinData
= SchedModel
.getInstrItineraries();
4846 if (!ItinData
|| ItinData
->isEmpty())
4849 unsigned DDomain
= DefMI
.getDesc().TSFlags
& ARMII::DomainMask
;
4850 if (DDomain
== ARMII::DomainGeneral
) {
4851 unsigned DefClass
= DefMI
.getDesc().getSchedClass();
4852 std::optional
<unsigned> DefCycle
=
4853 ItinData
->getOperandCycle(DefClass
, DefIdx
);
4854 return DefCycle
&& DefCycle
<= 2U;
4859 bool ARMBaseInstrInfo::verifyInstruction(const MachineInstr
&MI
,
4860 StringRef
&ErrInfo
) const {
4861 if (convertAddSubFlagsOpcode(MI
.getOpcode())) {
4862 ErrInfo
= "Pseudo flag setting opcodes only exist in Selection DAG";
4865 if (MI
.getOpcode() == ARM::tMOVr
&& !Subtarget
.hasV6Ops()) {
4866 // Make sure we don't generate a lo-lo mov that isn't supported.
4867 if (!ARM::hGPRRegClass
.contains(MI
.getOperand(0).getReg()) &&
4868 !ARM::hGPRRegClass
.contains(MI
.getOperand(1).getReg())) {
4869 ErrInfo
= "Non-flag-setting Thumb1 mov is v6-only";
4873 if (MI
.getOpcode() == ARM::tPUSH
||
4874 MI
.getOpcode() == ARM::tPOP
||
4875 MI
.getOpcode() == ARM::tPOP_RET
) {
4876 for (const MachineOperand
&MO
: llvm::drop_begin(MI
.operands(), 2)) {
4877 if (MO
.isImplicit() || !MO
.isReg())
4879 Register Reg
= MO
.getReg();
4880 if (Reg
< ARM::R0
|| Reg
> ARM::R7
) {
4881 if (!(MI
.getOpcode() == ARM::tPUSH
&& Reg
== ARM::LR
) &&
4882 !(MI
.getOpcode() == ARM::tPOP_RET
&& Reg
== ARM::PC
)) {
4883 ErrInfo
= "Unsupported register in Thumb1 push/pop";
4889 if (MI
.getOpcode() == ARM::MVE_VMOV_q_rr
) {
4890 assert(MI
.getOperand(4).isImm() && MI
.getOperand(5).isImm());
4891 if ((MI
.getOperand(4).getImm() != 2 && MI
.getOperand(4).getImm() != 3) ||
4892 MI
.getOperand(4).getImm() != MI
.getOperand(5).getImm() + 2) {
4893 ErrInfo
= "Incorrect array index for MVE_VMOV_q_rr";
4898 // Check the address model by taking the first Imm operand and checking it is
4899 // legal for that addressing mode.
4900 ARMII::AddrMode AddrMode
=
4901 (ARMII::AddrMode
)(MI
.getDesc().TSFlags
& ARMII::AddrModeMask
);
4905 case ARMII::AddrModeT2_i7
:
4906 case ARMII::AddrModeT2_i7s2
:
4907 case ARMII::AddrModeT2_i7s4
:
4908 case ARMII::AddrModeT2_i8
:
4909 case ARMII::AddrModeT2_i8pos
:
4910 case ARMII::AddrModeT2_i8neg
:
4911 case ARMII::AddrModeT2_i8s4
:
4912 case ARMII::AddrModeT2_i12
: {
4914 for (auto Op
: MI
.operands()) {
4920 if (!isLegalAddressImm(MI
.getOpcode(), Imm
, this)) {
4921 ErrInfo
= "Incorrect AddrMode Imm for instruction";
4930 void ARMBaseInstrInfo::expandLoadStackGuardBase(MachineBasicBlock::iterator MI
,
4931 unsigned LoadImmOpc
,
4932 unsigned LoadOpc
) const {
4933 assert(!Subtarget
.isROPI() && !Subtarget
.isRWPI() &&
4934 "ROPI/RWPI not currently supported with stack guard");
4936 MachineBasicBlock
&MBB
= *MI
->getParent();
4937 DebugLoc DL
= MI
->getDebugLoc();
4938 Register Reg
= MI
->getOperand(0).getReg();
4939 MachineInstrBuilder MIB
;
4940 unsigned int Offset
= 0;
4942 if (LoadImmOpc
== ARM::MRC
|| LoadImmOpc
== ARM::t2MRC
) {
4943 assert(!Subtarget
.isReadTPSoft() &&
4944 "TLS stack protector requires hardware TLS register");
4946 BuildMI(MBB
, MI
, DL
, get(LoadImmOpc
), Reg
)
4952 .add(predOps(ARMCC::AL
));
4954 Module
&M
= *MBB
.getParent()->getFunction().getParent();
4955 Offset
= M
.getStackProtectorGuardOffset();
4956 if (Offset
& ~0xfffU
) {
4957 // The offset won't fit in the LDR's 12-bit immediate field, so emit an
4958 // extra ADD to cover the delta. This gives us a guaranteed 8 additional
4959 // bits, resulting in a range of 0 to +1 MiB for the guard offset.
4960 unsigned AddOpc
= (LoadImmOpc
== ARM::MRC
) ? ARM::ADDri
: ARM::t2ADDri
;
4961 BuildMI(MBB
, MI
, DL
, get(AddOpc
), Reg
)
4962 .addReg(Reg
, RegState::Kill
)
4963 .addImm(Offset
& ~0xfffU
)
4964 .add(predOps(ARMCC::AL
))
4969 const GlobalValue
*GV
=
4970 cast
<GlobalValue
>((*MI
->memoperands_begin())->getValue());
4971 bool IsIndirect
= Subtarget
.isGVIndirectSymbol(GV
);
4973 unsigned TargetFlags
= ARMII::MO_NO_FLAG
;
4974 if (Subtarget
.isTargetMachO()) {
4975 TargetFlags
|= ARMII::MO_NONLAZY
;
4976 } else if (Subtarget
.isTargetCOFF()) {
4977 if (GV
->hasDLLImportStorageClass())
4978 TargetFlags
|= ARMII::MO_DLLIMPORT
;
4979 else if (IsIndirect
)
4980 TargetFlags
|= ARMII::MO_COFFSTUB
;
4981 } else if (IsIndirect
) {
4982 TargetFlags
|= ARMII::MO_GOT
;
4985 if (LoadImmOpc
== ARM::tMOVi32imm
) { // Thumb-1 execute-only
4986 Register CPSRSaveReg
= ARM::R12
; // Use R12 as scratch register
4988 ARMSysReg::lookupMClassSysRegByName("apsr_nzcvq")->Encoding
;
4989 BuildMI(MBB
, MI
, DL
, get(ARM::t2MRS_M
), CPSRSaveReg
)
4990 .addImm(APSREncoding
)
4991 .add(predOps(ARMCC::AL
));
4992 BuildMI(MBB
, MI
, DL
, get(LoadImmOpc
), Reg
)
4993 .addGlobalAddress(GV
, 0, TargetFlags
);
4994 BuildMI(MBB
, MI
, DL
, get(ARM::t2MSR_M
))
4995 .addImm(APSREncoding
)
4996 .addReg(CPSRSaveReg
, RegState::Kill
)
4997 .add(predOps(ARMCC::AL
));
4999 BuildMI(MBB
, MI
, DL
, get(LoadImmOpc
), Reg
)
5000 .addGlobalAddress(GV
, 0, TargetFlags
);
5004 MIB
= BuildMI(MBB
, MI
, DL
, get(LoadOpc
), Reg
);
5005 MIB
.addReg(Reg
, RegState::Kill
).addImm(0);
5006 auto Flags
= MachineMemOperand::MOLoad
|
5007 MachineMemOperand::MODereferenceable
|
5008 MachineMemOperand::MOInvariant
;
5009 MachineMemOperand
*MMO
= MBB
.getParent()->getMachineMemOperand(
5010 MachinePointerInfo::getGOT(*MBB
.getParent()), Flags
, 4, Align(4));
5011 MIB
.addMemOperand(MMO
).add(predOps(ARMCC::AL
));
5015 MIB
= BuildMI(MBB
, MI
, DL
, get(LoadOpc
), Reg
);
5016 MIB
.addReg(Reg
, RegState::Kill
)
5019 .add(predOps(ARMCC::AL
));
5023 ARMBaseInstrInfo::isFpMLxInstruction(unsigned Opcode
, unsigned &MulOpc
,
5024 unsigned &AddSubOpc
,
5025 bool &NegAcc
, bool &HasLane
) const {
5026 DenseMap
<unsigned, unsigned>::const_iterator I
= MLxEntryMap
.find(Opcode
);
5027 if (I
== MLxEntryMap
.end())
5030 const ARM_MLxEntry
&Entry
= ARM_MLxTable
[I
->second
];
5031 MulOpc
= Entry
.MulOpc
;
5032 AddSubOpc
= Entry
.AddSubOpc
;
5033 NegAcc
= Entry
.NegAcc
;
5034 HasLane
= Entry
.HasLane
;
5038 //===----------------------------------------------------------------------===//
5039 // Execution domains.
5040 //===----------------------------------------------------------------------===//
5042 // Some instructions go down the NEON pipeline, some go down the VFP pipeline,
5043 // and some can go down both. The vmov instructions go down the VFP pipeline,
5044 // but they can be changed to vorr equivalents that are executed by the NEON
5047 // We use the following execution domain numbering:
5056 // Also see ARMInstrFormats.td and Domain* enums in ARMBaseInfo.h
5058 std::pair
<uint16_t, uint16_t>
5059 ARMBaseInstrInfo::getExecutionDomain(const MachineInstr
&MI
) const {
5060 // If we don't have access to NEON instructions then we won't be able
5061 // to swizzle anything to the NEON domain. Check to make sure.
5062 if (Subtarget
.hasNEON()) {
5063 // VMOVD, VMOVRS and VMOVSR are VFP instructions, but can be changed to NEON
5064 // if they are not predicated.
5065 if (MI
.getOpcode() == ARM::VMOVD
&& !isPredicated(MI
))
5066 return std::make_pair(ExeVFP
, (1 << ExeVFP
) | (1 << ExeNEON
));
5068 // CortexA9 is particularly picky about mixing the two and wants these
5070 if (Subtarget
.useNEONForFPMovs() && !isPredicated(MI
) &&
5071 (MI
.getOpcode() == ARM::VMOVRS
|| MI
.getOpcode() == ARM::VMOVSR
||
5072 MI
.getOpcode() == ARM::VMOVS
))
5073 return std::make_pair(ExeVFP
, (1 << ExeVFP
) | (1 << ExeNEON
));
5075 // No other instructions can be swizzled, so just determine their domain.
5076 unsigned Domain
= MI
.getDesc().TSFlags
& ARMII::DomainMask
;
5078 if (Domain
& ARMII::DomainNEON
)
5079 return std::make_pair(ExeNEON
, 0);
5081 // Certain instructions can go either way on Cortex-A8.
5082 // Treat them as NEON instructions.
5083 if ((Domain
& ARMII::DomainNEONA8
) && Subtarget
.isCortexA8())
5084 return std::make_pair(ExeNEON
, 0);
5086 if (Domain
& ARMII::DomainVFP
)
5087 return std::make_pair(ExeVFP
, 0);
5089 return std::make_pair(ExeGeneric
, 0);
5092 static unsigned getCorrespondingDRegAndLane(const TargetRegisterInfo
*TRI
,
5093 unsigned SReg
, unsigned &Lane
) {
5094 unsigned DReg
= TRI
->getMatchingSuperReg(SReg
, ARM::ssub_0
, &ARM::DPRRegClass
);
5097 if (DReg
!= ARM::NoRegister
)
5101 DReg
= TRI
->getMatchingSuperReg(SReg
, ARM::ssub_1
, &ARM::DPRRegClass
);
5103 assert(DReg
&& "S-register with no D super-register?");
5107 /// getImplicitSPRUseForDPRUse - Given a use of a DPR register and lane,
5108 /// set ImplicitSReg to a register number that must be marked as implicit-use or
5109 /// zero if no register needs to be defined as implicit-use.
5111 /// If the function cannot determine if an SPR should be marked implicit use or
5112 /// not, it returns false.
5114 /// This function handles cases where an instruction is being modified from taking
5115 /// an SPR to a DPR[Lane]. A use of the DPR is being added, which may conflict
5116 /// with an earlier def of an SPR corresponding to DPR[Lane^1] (i.e. the other
5117 /// lane of the DPR).
5119 /// If the other SPR is defined, an implicit-use of it should be added. Else,
5120 /// (including the case where the DPR itself is defined), it should not.
5122 static bool getImplicitSPRUseForDPRUse(const TargetRegisterInfo
*TRI
,
5123 MachineInstr
&MI
, unsigned DReg
,
5124 unsigned Lane
, unsigned &ImplicitSReg
) {
5125 // If the DPR is defined or used already, the other SPR lane will be chained
5126 // correctly, so there is nothing to be done.
5127 if (MI
.definesRegister(DReg
, TRI
) || MI
.readsRegister(DReg
, TRI
)) {
5132 // Otherwise we need to go searching to see if the SPR is set explicitly.
5133 ImplicitSReg
= TRI
->getSubReg(DReg
,
5134 (Lane
& 1) ? ARM::ssub_0
: ARM::ssub_1
);
5135 MachineBasicBlock::LivenessQueryResult LQR
=
5136 MI
.getParent()->computeRegisterLiveness(TRI
, ImplicitSReg
, MI
);
5138 if (LQR
== MachineBasicBlock::LQR_Live
)
5140 else if (LQR
== MachineBasicBlock::LQR_Unknown
)
5143 // If the register is known not to be live, there is no need to add an
5149 void ARMBaseInstrInfo::setExecutionDomain(MachineInstr
&MI
,
5150 unsigned Domain
) const {
5151 unsigned DstReg
, SrcReg
, DReg
;
5153 MachineInstrBuilder
MIB(*MI
.getParent()->getParent(), MI
);
5154 const TargetRegisterInfo
*TRI
= &getRegisterInfo();
5155 switch (MI
.getOpcode()) {
5157 llvm_unreachable("cannot handle opcode!");
5160 if (Domain
!= ExeNEON
)
5163 // Zap the predicate operands.
5164 assert(!isPredicated(MI
) && "Cannot predicate a VORRd");
5166 // Make sure we've got NEON instructions.
5167 assert(Subtarget
.hasNEON() && "VORRd requires NEON");
5169 // Source instruction is %DDst = VMOVD %DSrc, 14, %noreg (; implicits)
5170 DstReg
= MI
.getOperand(0).getReg();
5171 SrcReg
= MI
.getOperand(1).getReg();
5173 for (unsigned i
= MI
.getDesc().getNumOperands(); i
; --i
)
5174 MI
.removeOperand(i
- 1);
5176 // Change to a %DDst = VORRd %DSrc, %DSrc, 14, %noreg (; implicits)
5177 MI
.setDesc(get(ARM::VORRd
));
5178 MIB
.addReg(DstReg
, RegState::Define
)
5181 .add(predOps(ARMCC::AL
));
5184 if (Domain
!= ExeNEON
)
5186 assert(!isPredicated(MI
) && "Cannot predicate a VGETLN");
5188 // Source instruction is %RDst = VMOVRS %SSrc, 14, %noreg (; implicits)
5189 DstReg
= MI
.getOperand(0).getReg();
5190 SrcReg
= MI
.getOperand(1).getReg();
5192 for (unsigned i
= MI
.getDesc().getNumOperands(); i
; --i
)
5193 MI
.removeOperand(i
- 1);
5195 DReg
= getCorrespondingDRegAndLane(TRI
, SrcReg
, Lane
);
5197 // Convert to %RDst = VGETLNi32 %DSrc, Lane, 14, %noreg (; imps)
5198 // Note that DSrc has been widened and the other lane may be undef, which
5199 // contaminates the entire register.
5200 MI
.setDesc(get(ARM::VGETLNi32
));
5201 MIB
.addReg(DstReg
, RegState::Define
)
5202 .addReg(DReg
, RegState::Undef
)
5204 .add(predOps(ARMCC::AL
));
5206 // The old source should be an implicit use, otherwise we might think it
5207 // was dead before here.
5208 MIB
.addReg(SrcReg
, RegState::Implicit
);
5211 if (Domain
!= ExeNEON
)
5213 assert(!isPredicated(MI
) && "Cannot predicate a VSETLN");
5215 // Source instruction is %SDst = VMOVSR %RSrc, 14, %noreg (; implicits)
5216 DstReg
= MI
.getOperand(0).getReg();
5217 SrcReg
= MI
.getOperand(1).getReg();
5219 DReg
= getCorrespondingDRegAndLane(TRI
, DstReg
, Lane
);
5221 unsigned ImplicitSReg
;
5222 if (!getImplicitSPRUseForDPRUse(TRI
, MI
, DReg
, Lane
, ImplicitSReg
))
5225 for (unsigned i
= MI
.getDesc().getNumOperands(); i
; --i
)
5226 MI
.removeOperand(i
- 1);
5228 // Convert to %DDst = VSETLNi32 %DDst, %RSrc, Lane, 14, %noreg (; imps)
5229 // Again DDst may be undefined at the beginning of this instruction.
5230 MI
.setDesc(get(ARM::VSETLNi32
));
5231 MIB
.addReg(DReg
, RegState::Define
)
5232 .addReg(DReg
, getUndefRegState(!MI
.readsRegister(DReg
, TRI
)))
5235 .add(predOps(ARMCC::AL
));
5237 // The narrower destination must be marked as set to keep previous chains
5239 MIB
.addReg(DstReg
, RegState::Define
| RegState::Implicit
);
5240 if (ImplicitSReg
!= 0)
5241 MIB
.addReg(ImplicitSReg
, RegState::Implicit
);
5245 if (Domain
!= ExeNEON
)
5248 // Source instruction is %SDst = VMOVS %SSrc, 14, %noreg (; implicits)
5249 DstReg
= MI
.getOperand(0).getReg();
5250 SrcReg
= MI
.getOperand(1).getReg();
5252 unsigned DstLane
= 0, SrcLane
= 0, DDst
, DSrc
;
5253 DDst
= getCorrespondingDRegAndLane(TRI
, DstReg
, DstLane
);
5254 DSrc
= getCorrespondingDRegAndLane(TRI
, SrcReg
, SrcLane
);
5256 unsigned ImplicitSReg
;
5257 if (!getImplicitSPRUseForDPRUse(TRI
, MI
, DSrc
, SrcLane
, ImplicitSReg
))
5260 for (unsigned i
= MI
.getDesc().getNumOperands(); i
; --i
)
5261 MI
.removeOperand(i
- 1);
5264 // Destination can be:
5265 // %DDst = VDUPLN32d %DDst, Lane, 14, %noreg (; implicits)
5266 MI
.setDesc(get(ARM::VDUPLN32d
));
5267 MIB
.addReg(DDst
, RegState::Define
)
5268 .addReg(DDst
, getUndefRegState(!MI
.readsRegister(DDst
, TRI
)))
5270 .add(predOps(ARMCC::AL
));
5272 // Neither the source or the destination are naturally represented any
5273 // more, so add them in manually.
5274 MIB
.addReg(DstReg
, RegState::Implicit
| RegState::Define
);
5275 MIB
.addReg(SrcReg
, RegState::Implicit
);
5276 if (ImplicitSReg
!= 0)
5277 MIB
.addReg(ImplicitSReg
, RegState::Implicit
);
5281 // In general there's no single instruction that can perform an S <-> S
5282 // move in NEON space, but a pair of VEXT instructions *can* do the
5283 // job. It turns out that the VEXTs needed will only use DSrc once, with
5284 // the position based purely on the combination of lane-0 and lane-1
5285 // involved. For example
5286 // vmov s0, s2 -> vext.32 d0, d0, d1, #1 vext.32 d0, d0, d0, #1
5287 // vmov s1, s3 -> vext.32 d0, d1, d0, #1 vext.32 d0, d0, d0, #1
5288 // vmov s0, s3 -> vext.32 d0, d0, d0, #1 vext.32 d0, d1, d0, #1
5289 // vmov s1, s2 -> vext.32 d0, d0, d0, #1 vext.32 d0, d0, d1, #1
5291 // Pattern of the MachineInstrs is:
5292 // %DDst = VEXTd32 %DSrc1, %DSrc2, Lane, 14, %noreg (;implicits)
5293 MachineInstrBuilder NewMIB
;
5294 NewMIB
= BuildMI(*MI
.getParent(), MI
, MI
.getDebugLoc(), get(ARM::VEXTd32
),
5297 // On the first instruction, both DSrc and DDst may be undef if present.
5298 // Specifically when the original instruction didn't have them as an
5300 unsigned CurReg
= SrcLane
== 1 && DstLane
== 1 ? DSrc
: DDst
;
5301 bool CurUndef
= !MI
.readsRegister(CurReg
, TRI
);
5302 NewMIB
.addReg(CurReg
, getUndefRegState(CurUndef
));
5304 CurReg
= SrcLane
== 0 && DstLane
== 0 ? DSrc
: DDst
;
5305 CurUndef
= !MI
.readsRegister(CurReg
, TRI
);
5306 NewMIB
.addReg(CurReg
, getUndefRegState(CurUndef
))
5308 .add(predOps(ARMCC::AL
));
5310 if (SrcLane
== DstLane
)
5311 NewMIB
.addReg(SrcReg
, RegState::Implicit
);
5313 MI
.setDesc(get(ARM::VEXTd32
));
5314 MIB
.addReg(DDst
, RegState::Define
);
5316 // On the second instruction, DDst has definitely been defined above, so
5317 // it is not undef. DSrc, if present, can be undef as above.
5318 CurReg
= SrcLane
== 1 && DstLane
== 0 ? DSrc
: DDst
;
5319 CurUndef
= CurReg
== DSrc
&& !MI
.readsRegister(CurReg
, TRI
);
5320 MIB
.addReg(CurReg
, getUndefRegState(CurUndef
));
5322 CurReg
= SrcLane
== 0 && DstLane
== 1 ? DSrc
: DDst
;
5323 CurUndef
= CurReg
== DSrc
&& !MI
.readsRegister(CurReg
, TRI
);
5324 MIB
.addReg(CurReg
, getUndefRegState(CurUndef
))
5326 .add(predOps(ARMCC::AL
));
5328 if (SrcLane
!= DstLane
)
5329 MIB
.addReg(SrcReg
, RegState::Implicit
);
5331 // As before, the original destination is no longer represented, add it
5333 MIB
.addReg(DstReg
, RegState::Define
| RegState::Implicit
);
5334 if (ImplicitSReg
!= 0)
5335 MIB
.addReg(ImplicitSReg
, RegState::Implicit
);
5341 //===----------------------------------------------------------------------===//
5342 // Partial register updates
5343 //===----------------------------------------------------------------------===//
5345 // Swift renames NEON registers with 64-bit granularity. That means any
5346 // instruction writing an S-reg implicitly reads the containing D-reg. The
5347 // problem is mostly avoided by translating f32 operations to v2f32 operations
5348 // on D-registers, but f32 loads are still a problem.
5350 // These instructions can load an f32 into a NEON register:
5352 // VLDRS - Only writes S, partial D update.
5353 // VLD1LNd32 - Writes all D-regs, explicit partial D update, 2 uops.
5354 // VLD1DUPd32 - Writes all D-regs, no partial reg update, 2 uops.
5356 // FCONSTD can be used as a dependency-breaking instruction.
5357 unsigned ARMBaseInstrInfo::getPartialRegUpdateClearance(
5358 const MachineInstr
&MI
, unsigned OpNum
,
5359 const TargetRegisterInfo
*TRI
) const {
5360 auto PartialUpdateClearance
= Subtarget
.getPartialUpdateClearance();
5361 if (!PartialUpdateClearance
)
5364 assert(TRI
&& "Need TRI instance");
5366 const MachineOperand
&MO
= MI
.getOperand(OpNum
);
5369 Register Reg
= MO
.getReg();
5372 switch (MI
.getOpcode()) {
5373 // Normal instructions writing only an S-register.
5378 case ARM::VMOVv4i16
:
5379 case ARM::VMOVv2i32
:
5380 case ARM::VMOVv2f32
:
5381 case ARM::VMOVv1i64
:
5382 UseOp
= MI
.findRegisterUseOperandIdx(Reg
, TRI
, false);
5385 // Explicitly reads the dependency.
5386 case ARM::VLD1LNd32
:
5393 // If this instruction actually reads a value from Reg, there is no unwanted
5395 if (UseOp
!= -1 && MI
.getOperand(UseOp
).readsReg())
5398 // We must be able to clobber the whole D-reg.
5399 if (Reg
.isVirtual()) {
5400 // Virtual register must be a def undef foo:ssub_0 operand.
5401 if (!MO
.getSubReg() || MI
.readsVirtualRegister(Reg
))
5403 } else if (ARM::SPRRegClass
.contains(Reg
)) {
5404 // Physical register: MI must define the full D-reg.
5405 unsigned DReg
= TRI
->getMatchingSuperReg(Reg
, ARM::ssub_0
,
5407 if (!DReg
|| !MI
.definesRegister(DReg
, TRI
))
5411 // MI has an unwanted D-register dependency.
5412 // Avoid defs in the previous N instructrions.
5413 return PartialUpdateClearance
;
5416 // Break a partial register dependency after getPartialRegUpdateClearance
5417 // returned non-zero.
5418 void ARMBaseInstrInfo::breakPartialRegDependency(
5419 MachineInstr
&MI
, unsigned OpNum
, const TargetRegisterInfo
*TRI
) const {
5420 assert(OpNum
< MI
.getDesc().getNumDefs() && "OpNum is not a def");
5421 assert(TRI
&& "Need TRI instance");
5423 const MachineOperand
&MO
= MI
.getOperand(OpNum
);
5424 Register Reg
= MO
.getReg();
5425 assert(Reg
.isPhysical() && "Can't break virtual register dependencies.");
5426 unsigned DReg
= Reg
;
5428 // If MI defines an S-reg, find the corresponding D super-register.
5429 if (ARM::SPRRegClass
.contains(Reg
)) {
5430 DReg
= ARM::D0
+ (Reg
- ARM::S0
) / 2;
5431 assert(TRI
->isSuperRegister(Reg
, DReg
) && "Register enums broken");
5434 assert(ARM::DPRRegClass
.contains(DReg
) && "Can only break D-reg deps");
5435 assert(MI
.definesRegister(DReg
, TRI
) && "MI doesn't clobber full D-reg");
5437 // FIXME: In some cases, VLDRS can be changed to a VLD1DUPd32 which defines
5438 // the full D-register by loading the same value to both lanes. The
5439 // instruction is micro-coded with 2 uops, so don't do this until we can
5440 // properly schedule micro-coded instructions. The dispatcher stalls cause
5441 // too big regressions.
5443 // Insert the dependency-breaking FCONSTD before MI.
5444 // 96 is the encoding of 0.5, but the actual value doesn't matter here.
5445 BuildMI(*MI
.getParent(), MI
, MI
.getDebugLoc(), get(ARM::FCONSTD
), DReg
)
5447 .add(predOps(ARMCC::AL
));
5448 MI
.addRegisterKilled(DReg
, TRI
, true);
5451 bool ARMBaseInstrInfo::hasNOP() const {
5452 return Subtarget
.hasFeature(ARM::HasV6KOps
);
5455 bool ARMBaseInstrInfo::isSwiftFastImmShift(const MachineInstr
*MI
) const {
5456 if (MI
->getNumOperands() < 4)
5458 unsigned ShOpVal
= MI
->getOperand(3).getImm();
5459 unsigned ShImm
= ARM_AM::getSORegOffset(ShOpVal
);
5460 // Swift supports faster shifts for: lsl 2, lsl 1, and lsr 1.
5461 if ((ShImm
== 1 && ARM_AM::getSORegShOp(ShOpVal
) == ARM_AM::lsr
) ||
5462 ((ShImm
== 1 || ShImm
== 2) &&
5463 ARM_AM::getSORegShOp(ShOpVal
) == ARM_AM::lsl
))
5469 bool ARMBaseInstrInfo::getRegSequenceLikeInputs(
5470 const MachineInstr
&MI
, unsigned DefIdx
,
5471 SmallVectorImpl
<RegSubRegPairAndIdx
> &InputRegs
) const {
5472 assert(DefIdx
< MI
.getDesc().getNumDefs() && "Invalid definition index");
5473 assert(MI
.isRegSequenceLike() && "Invalid kind of instruction");
5475 switch (MI
.getOpcode()) {
5477 // dX = VMOVDRR rY, rZ
5479 // dX = REG_SEQUENCE rY, ssub_0, rZ, ssub_1
5480 // Populate the InputRegs accordingly.
5482 const MachineOperand
*MOReg
= &MI
.getOperand(1);
5483 if (!MOReg
->isUndef())
5484 InputRegs
.push_back(RegSubRegPairAndIdx(MOReg
->getReg(),
5485 MOReg
->getSubReg(), ARM::ssub_0
));
5487 MOReg
= &MI
.getOperand(2);
5488 if (!MOReg
->isUndef())
5489 InputRegs
.push_back(RegSubRegPairAndIdx(MOReg
->getReg(),
5490 MOReg
->getSubReg(), ARM::ssub_1
));
5493 llvm_unreachable("Target dependent opcode missing");
5496 bool ARMBaseInstrInfo::getExtractSubregLikeInputs(
5497 const MachineInstr
&MI
, unsigned DefIdx
,
5498 RegSubRegPairAndIdx
&InputReg
) const {
5499 assert(DefIdx
< MI
.getDesc().getNumDefs() && "Invalid definition index");
5500 assert(MI
.isExtractSubregLike() && "Invalid kind of instruction");
5502 switch (MI
.getOpcode()) {
5504 // rX, rY = VMOVRRD dZ
5506 // rX = EXTRACT_SUBREG dZ, ssub_0
5507 // rY = EXTRACT_SUBREG dZ, ssub_1
5508 const MachineOperand
&MOReg
= MI
.getOperand(2);
5509 if (MOReg
.isUndef())
5511 InputReg
.Reg
= MOReg
.getReg();
5512 InputReg
.SubReg
= MOReg
.getSubReg();
5513 InputReg
.SubIdx
= DefIdx
== 0 ? ARM::ssub_0
: ARM::ssub_1
;
5516 llvm_unreachable("Target dependent opcode missing");
5519 bool ARMBaseInstrInfo::getInsertSubregLikeInputs(
5520 const MachineInstr
&MI
, unsigned DefIdx
, RegSubRegPair
&BaseReg
,
5521 RegSubRegPairAndIdx
&InsertedReg
) const {
5522 assert(DefIdx
< MI
.getDesc().getNumDefs() && "Invalid definition index");
5523 assert(MI
.isInsertSubregLike() && "Invalid kind of instruction");
5525 switch (MI
.getOpcode()) {
5526 case ARM::VSETLNi32
:
5527 case ARM::MVE_VMOV_to_lane_32
:
5528 // dX = VSETLNi32 dY, rZ, imm
5529 // qX = MVE_VMOV_to_lane_32 qY, rZ, imm
5530 const MachineOperand
&MOBaseReg
= MI
.getOperand(1);
5531 const MachineOperand
&MOInsertedReg
= MI
.getOperand(2);
5532 if (MOInsertedReg
.isUndef())
5534 const MachineOperand
&MOIndex
= MI
.getOperand(3);
5535 BaseReg
.Reg
= MOBaseReg
.getReg();
5536 BaseReg
.SubReg
= MOBaseReg
.getSubReg();
5538 InsertedReg
.Reg
= MOInsertedReg
.getReg();
5539 InsertedReg
.SubReg
= MOInsertedReg
.getSubReg();
5540 InsertedReg
.SubIdx
= ARM::ssub_0
+ MOIndex
.getImm();
5543 llvm_unreachable("Target dependent opcode missing");
5546 std::pair
<unsigned, unsigned>
5547 ARMBaseInstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF
) const {
5548 const unsigned Mask
= ARMII::MO_OPTION_MASK
;
5549 return std::make_pair(TF
& Mask
, TF
& ~Mask
);
5552 ArrayRef
<std::pair
<unsigned, const char *>>
5553 ARMBaseInstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
5554 using namespace ARMII
;
5556 static const std::pair
<unsigned, const char *> TargetFlags
[] = {
5557 {MO_LO16
, "arm-lo16"}, {MO_HI16
, "arm-hi16"},
5558 {MO_LO_0_7
, "arm-lo-0-7"}, {MO_HI_0_7
, "arm-hi-0-7"},
5559 {MO_LO_8_15
, "arm-lo-8-15"}, {MO_HI_8_15
, "arm-hi-8-15"},
5561 return ArrayRef(TargetFlags
);
5564 ArrayRef
<std::pair
<unsigned, const char *>>
5565 ARMBaseInstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
5566 using namespace ARMII
;
5568 static const std::pair
<unsigned, const char *> TargetFlags
[] = {
5569 {MO_COFFSTUB
, "arm-coffstub"},
5570 {MO_GOT
, "arm-got"},
5571 {MO_SBREL
, "arm-sbrel"},
5572 {MO_DLLIMPORT
, "arm-dllimport"},
5573 {MO_SECREL
, "arm-secrel"},
5574 {MO_NONLAZY
, "arm-nonlazy"}};
5575 return ArrayRef(TargetFlags
);
5578 std::optional
<RegImmPair
>
5579 ARMBaseInstrInfo::isAddImmediate(const MachineInstr
&MI
, Register Reg
) const {
5581 unsigned Opcode
= MI
.getOpcode();
5584 // TODO: Handle cases where Reg is a super- or sub-register of the
5585 // destination register.
5586 const MachineOperand
&Op0
= MI
.getOperand(0);
5587 if (!Op0
.isReg() || Reg
!= Op0
.getReg())
5588 return std::nullopt
;
5590 // We describe SUBri or ADDri instructions.
5591 if (Opcode
== ARM::SUBri
)
5593 else if (Opcode
!= ARM::ADDri
)
5594 return std::nullopt
;
5596 // TODO: Third operand can be global address (usually some string). Since
5597 // strings can be relocated we cannot calculate their offsets for
5599 if (!MI
.getOperand(1).isReg() || !MI
.getOperand(2).isImm())
5600 return std::nullopt
;
5602 Offset
= MI
.getOperand(2).getImm() * Sign
;
5603 return RegImmPair
{MI
.getOperand(1).getReg(), Offset
};
5606 bool llvm::registerDefinedBetween(unsigned Reg
,
5607 MachineBasicBlock::iterator From
,
5608 MachineBasicBlock::iterator To
,
5609 const TargetRegisterInfo
*TRI
) {
5610 for (auto I
= From
; I
!= To
; ++I
)
5611 if (I
->modifiesRegister(Reg
, TRI
))
5616 MachineInstr
*llvm::findCMPToFoldIntoCBZ(MachineInstr
*Br
,
5617 const TargetRegisterInfo
*TRI
) {
5618 // Search backwards to the instruction that defines CSPR. This may or not
5619 // be a CMP, we check that after this loop. If we find another instruction
5620 // that reads cpsr, we return nullptr.
5621 MachineBasicBlock::iterator CmpMI
= Br
;
5622 while (CmpMI
!= Br
->getParent()->begin()) {
5624 if (CmpMI
->modifiesRegister(ARM::CPSR
, TRI
))
5626 if (CmpMI
->readsRegister(ARM::CPSR
, TRI
))
5630 // Check that this inst is a CMP r[0-7], #0 and that the register
5631 // is not redefined between the cmp and the br.
5632 if (CmpMI
->getOpcode() != ARM::tCMPi8
&& CmpMI
->getOpcode() != ARM::t2CMPri
)
5634 Register Reg
= CmpMI
->getOperand(0).getReg();
5636 ARMCC::CondCodes Pred
= getInstrPredicate(*CmpMI
, PredReg
);
5637 if (Pred
!= ARMCC::AL
|| CmpMI
->getOperand(1).getImm() != 0)
5639 if (!isARMLowRegister(Reg
))
5641 if (registerDefinedBetween(Reg
, CmpMI
->getNextNode(), Br
, TRI
))
5647 unsigned llvm::ConstantMaterializationCost(unsigned Val
,
5648 const ARMSubtarget
*Subtarget
,
5650 if (Subtarget
->isThumb()) {
5651 if (Val
<= 255) // MOV
5652 return ForCodesize
? 2 : 1;
5653 if (Subtarget
->hasV6T2Ops() && (Val
<= 0xffff || // MOV
5654 ARM_AM::getT2SOImmVal(Val
) != -1 || // MOVW
5655 ARM_AM::getT2SOImmVal(~Val
) != -1)) // MVN
5656 return ForCodesize
? 4 : 1;
5657 if (Val
<= 510) // MOV + ADDi8
5658 return ForCodesize
? 4 : 2;
5659 if (~Val
<= 255) // MOV + MVN
5660 return ForCodesize
? 4 : 2;
5661 if (ARM_AM::isThumbImmShiftedVal(Val
)) // MOV + LSL
5662 return ForCodesize
? 4 : 2;
5664 if (ARM_AM::getSOImmVal(Val
) != -1) // MOV
5665 return ForCodesize
? 4 : 1;
5666 if (ARM_AM::getSOImmVal(~Val
) != -1) // MVN
5667 return ForCodesize
? 4 : 1;
5668 if (Subtarget
->hasV6T2Ops() && Val
<= 0xffff) // MOVW
5669 return ForCodesize
? 4 : 1;
5670 if (ARM_AM::isSOImmTwoPartVal(Val
)) // two instrs
5671 return ForCodesize
? 8 : 2;
5672 if (ARM_AM::isSOImmTwoPartValNeg(Val
)) // two instrs
5673 return ForCodesize
? 8 : 2;
5675 if (Subtarget
->useMovt()) // MOVW + MOVT
5676 return ForCodesize
? 8 : 2;
5677 return ForCodesize
? 8 : 3; // Literal pool load
5680 bool llvm::HasLowerConstantMaterializationCost(unsigned Val1
, unsigned Val2
,
5681 const ARMSubtarget
*Subtarget
,
5683 // Check with ForCodesize
5684 unsigned Cost1
= ConstantMaterializationCost(Val1
, Subtarget
, ForCodesize
);
5685 unsigned Cost2
= ConstantMaterializationCost(Val2
, Subtarget
, ForCodesize
);
5691 // If they are equal, try with !ForCodesize
5692 return ConstantMaterializationCost(Val1
, Subtarget
, !ForCodesize
) <
5693 ConstantMaterializationCost(Val2
, Subtarget
, !ForCodesize
);
5696 /// Constants defining how certain sequences should be outlined.
5697 /// This encompasses how an outlined function should be called, and what kind of
5698 /// frame should be emitted for that outlined function.
5700 /// \p MachineOutlinerTailCall implies that the function is being created from
5701 /// a sequence of instructions ending in a return.
5705 /// I1 OUTLINED_FUNCTION:
5706 /// I2 --> B OUTLINED_FUNCTION I1
5710 /// +-------------------------+--------+-----+
5711 /// | | Thumb2 | ARM |
5712 /// +-------------------------+--------+-----+
5713 /// | Call overhead in Bytes | 4 | 4 |
5714 /// | Frame overhead in Bytes | 0 | 0 |
5715 /// | Stack fixup required | No | No |
5716 /// +-------------------------+--------+-----+
5718 /// \p MachineOutlinerThunk implies that the function is being created from
5719 /// a sequence of instructions ending in a call. The outlined function is
5720 /// called with a BL instruction, and the outlined function tail-calls the
5721 /// original call destination.
5725 /// I1 OUTLINED_FUNCTION:
5726 /// I2 --> BL OUTLINED_FUNCTION I1
5730 /// +-------------------------+--------+-----+
5731 /// | | Thumb2 | ARM |
5732 /// +-------------------------+--------+-----+
5733 /// | Call overhead in Bytes | 4 | 4 |
5734 /// | Frame overhead in Bytes | 0 | 0 |
5735 /// | Stack fixup required | No | No |
5736 /// +-------------------------+--------+-----+
5738 /// \p MachineOutlinerNoLRSave implies that the function should be called using
5739 /// a BL instruction, but doesn't require LR to be saved and restored. This
5740 /// happens when LR is known to be dead.
5744 /// I1 OUTLINED_FUNCTION:
5745 /// I2 --> BL OUTLINED_FUNCTION I1
5750 /// +-------------------------+--------+-----+
5751 /// | | Thumb2 | ARM |
5752 /// +-------------------------+--------+-----+
5753 /// | Call overhead in Bytes | 4 | 4 |
5754 /// | Frame overhead in Bytes | 2 | 4 |
5755 /// | Stack fixup required | No | No |
5756 /// +-------------------------+--------+-----+
5758 /// \p MachineOutlinerRegSave implies that the function should be called with a
5759 /// save and restore of LR to an available register. This allows us to avoid
5760 /// stack fixups. Note that this outlining variant is compatible with the
5765 /// I1 Save LR OUTLINED_FUNCTION:
5766 /// I2 --> BL OUTLINED_FUNCTION I1
5767 /// I3 Restore LR I2
5771 /// +-------------------------+--------+-----+
5772 /// | | Thumb2 | ARM |
5773 /// +-------------------------+--------+-----+
5774 /// | Call overhead in Bytes | 8 | 12 |
5775 /// | Frame overhead in Bytes | 2 | 4 |
5776 /// | Stack fixup required | No | No |
5777 /// +-------------------------+--------+-----+
5779 /// \p MachineOutlinerDefault implies that the function should be called with
5780 /// a save and restore of LR to the stack.
5784 /// I1 Save LR OUTLINED_FUNCTION:
5785 /// I2 --> BL OUTLINED_FUNCTION I1
5786 /// I3 Restore LR I2
5790 /// +-------------------------+--------+-----+
5791 /// | | Thumb2 | ARM |
5792 /// +-------------------------+--------+-----+
5793 /// | Call overhead in Bytes | 8 | 12 |
5794 /// | Frame overhead in Bytes | 2 | 4 |
5795 /// | Stack fixup required | Yes | Yes |
5796 /// +-------------------------+--------+-----+
5798 enum MachineOutlinerClass
{
5799 MachineOutlinerTailCall
,
5800 MachineOutlinerThunk
,
5801 MachineOutlinerNoLRSave
,
5802 MachineOutlinerRegSave
,
5803 MachineOutlinerDefault
5806 enum MachineOutlinerMBBFlags
{
5807 LRUnavailableSomewhere
= 0x2,
5809 UnsafeRegsDead
= 0x8
5812 struct OutlinerCosts
{
5823 int SaveRestoreLROnStack
;
5825 OutlinerCosts(const ARMSubtarget
&target
)
5826 : CallTailCall(target
.isThumb() ? 4 : 4),
5827 FrameTailCall(target
.isThumb() ? 0 : 0),
5828 CallThunk(target
.isThumb() ? 4 : 4),
5829 FrameThunk(target
.isThumb() ? 0 : 0),
5830 CallNoLRSave(target
.isThumb() ? 4 : 4),
5831 FrameNoLRSave(target
.isThumb() ? 2 : 4),
5832 CallRegSave(target
.isThumb() ? 8 : 12),
5833 FrameRegSave(target
.isThumb() ? 2 : 4),
5834 CallDefault(target
.isThumb() ? 8 : 12),
5835 FrameDefault(target
.isThumb() ? 2 : 4),
5836 SaveRestoreLROnStack(target
.isThumb() ? 8 : 8) {}
5840 ARMBaseInstrInfo::findRegisterToSaveLRTo(outliner::Candidate
&C
) const {
5841 MachineFunction
*MF
= C
.getMF();
5842 const TargetRegisterInfo
&TRI
= *MF
->getSubtarget().getRegisterInfo();
5843 const ARMBaseRegisterInfo
*ARI
=
5844 static_cast<const ARMBaseRegisterInfo
*>(&TRI
);
5846 BitVector regsReserved
= ARI
->getReservedRegs(*MF
);
5847 // Check if there is an available register across the sequence that we can
5849 for (Register Reg
: ARM::rGPRRegClass
) {
5850 if (!(Reg
< regsReserved
.size() && regsReserved
.test(Reg
)) &&
5851 Reg
!= ARM::LR
&& // LR is not reserved, but don't use it.
5852 Reg
!= ARM::R12
&& // R12 is not guaranteed to be preserved.
5853 C
.isAvailableAcrossAndOutOfSeq(Reg
, TRI
) &&
5854 C
.isAvailableInsideSeq(Reg
, TRI
))
5860 // Compute liveness of LR at the point after the interval [I, E), which
5861 // denotes a *backward* iteration through instructions. Used only for return
5862 // basic blocks, which do not end with a tail call.
5863 static bool isLRAvailable(const TargetRegisterInfo
&TRI
,
5864 MachineBasicBlock::reverse_iterator I
,
5865 MachineBasicBlock::reverse_iterator E
) {
5866 // At the end of the function LR dead.
5868 for (; I
!= E
; ++I
) {
5869 const MachineInstr
&MI
= *I
;
5871 // Check defs of LR.
5872 if (MI
.modifiesRegister(ARM::LR
, &TRI
))
5875 // Check uses of LR.
5876 unsigned Opcode
= MI
.getOpcode();
5877 if (Opcode
== ARM::BX_RET
|| Opcode
== ARM::MOVPCLR
||
5878 Opcode
== ARM::SUBS_PC_LR
|| Opcode
== ARM::tBX_RET
||
5879 Opcode
== ARM::tBXNS_RET
) {
5880 // These instructions use LR, but it's not an (explicit or implicit)
5885 if (MI
.readsRegister(ARM::LR
, &TRI
))
5891 std::optional
<std::unique_ptr
<outliner::OutlinedFunction
>>
5892 ARMBaseInstrInfo::getOutliningCandidateInfo(
5893 const MachineModuleInfo
&MMI
,
5894 std::vector
<outliner::Candidate
> &RepeatedSequenceLocs
,
5895 unsigned MinRepeats
) const {
5896 unsigned SequenceSize
= 0;
5897 for (auto &MI
: RepeatedSequenceLocs
[0])
5898 SequenceSize
+= getInstSizeInBytes(MI
);
5900 // Properties about candidate MBBs that hold for all of them.
5901 unsigned FlagsSetInAll
= 0xF;
5903 // Compute liveness information for each candidate, and set FlagsSetInAll.
5904 const TargetRegisterInfo
&TRI
= getRegisterInfo();
5905 for (outliner::Candidate
&C
: RepeatedSequenceLocs
)
5906 FlagsSetInAll
&= C
.Flags
;
5908 // According to the ARM Procedure Call Standard, the following are
5909 // undefined on entry/exit from a function call:
5911 // * Register R12(IP),
5912 // * Condition codes (and thus the CPSR register)
5914 // Since we control the instructions which are part of the outlined regions
5915 // we don't need to be fully compliant with the AAPCS, but we have to
5916 // guarantee that if a veneer is inserted at link time the code is still
5917 // correct. Because of this, we can't outline any sequence of instructions
5918 // where one of these registers is live into/across it. Thus, we need to
5919 // delete those candidates.
5920 auto CantGuaranteeValueAcrossCall
= [&TRI
](outliner::Candidate
&C
) {
5921 // If the unsafe registers in this block are all dead, then we don't need
5922 // to compute liveness here.
5923 if (C
.Flags
& UnsafeRegsDead
)
5925 return C
.isAnyUnavailableAcrossOrOutOfSeq({ARM::R12
, ARM::CPSR
}, TRI
);
5928 // Are there any candidates where those registers are live?
5929 if (!(FlagsSetInAll
& UnsafeRegsDead
)) {
5930 // Erase every candidate that violates the restrictions above. (It could be
5931 // true that we have viable candidates, so it's not worth bailing out in
5932 // the case that, say, 1 out of 20 candidates violate the restructions.)
5933 llvm::erase_if(RepeatedSequenceLocs
, CantGuaranteeValueAcrossCall
);
5935 // If the sequence doesn't have enough candidates left, then we're done.
5936 if (RepeatedSequenceLocs
.size() < MinRepeats
)
5937 return std::nullopt
;
5940 // We expect the majority of the outlining candidates to be in consensus with
5941 // regard to return address sign and authentication, and branch target
5942 // enforcement, in other words, partitioning according to all the four
5943 // possible combinations of PAC-RET and BTI is going to yield one big subset
5944 // and three small (likely empty) subsets. That allows us to cull incompatible
5945 // candidates separately for PAC-RET and BTI.
5947 // Partition the candidates in two sets: one with BTI enabled and one with BTI
5948 // disabled. Remove the candidates from the smaller set. If they are the same
5949 // number prefer the non-BTI ones for outlining, since they have less
5952 llvm::partition(RepeatedSequenceLocs
, [](const outliner::Candidate
&C
) {
5953 const ARMFunctionInfo
&AFI
= *C
.getMF()->getInfo
<ARMFunctionInfo
>();
5954 return AFI
.branchTargetEnforcement();
5956 if (std::distance(RepeatedSequenceLocs
.begin(), NoBTI
) >
5957 std::distance(NoBTI
, RepeatedSequenceLocs
.end()))
5958 RepeatedSequenceLocs
.erase(NoBTI
, RepeatedSequenceLocs
.end());
5960 RepeatedSequenceLocs
.erase(RepeatedSequenceLocs
.begin(), NoBTI
);
5962 if (RepeatedSequenceLocs
.size() < MinRepeats
)
5963 return std::nullopt
;
5965 // Likewise, partition the candidates according to PAC-RET enablement.
5967 llvm::partition(RepeatedSequenceLocs
, [](const outliner::Candidate
&C
) {
5968 const ARMFunctionInfo
&AFI
= *C
.getMF()->getInfo
<ARMFunctionInfo
>();
5969 // If the function happens to not spill the LR, do not disqualify it
5970 // from the outlining.
5971 return AFI
.shouldSignReturnAddress(true);
5973 if (std::distance(RepeatedSequenceLocs
.begin(), NoPAC
) >
5974 std::distance(NoPAC
, RepeatedSequenceLocs
.end()))
5975 RepeatedSequenceLocs
.erase(NoPAC
, RepeatedSequenceLocs
.end());
5977 RepeatedSequenceLocs
.erase(RepeatedSequenceLocs
.begin(), NoPAC
);
5979 if (RepeatedSequenceLocs
.size() < MinRepeats
)
5980 return std::nullopt
;
5982 // At this point, we have only "safe" candidates to outline. Figure out
5983 // frame + call instruction information.
5985 unsigned LastInstrOpcode
= RepeatedSequenceLocs
[0].back().getOpcode();
5987 // Helper lambda which sets call information for every candidate.
5988 auto SetCandidateCallInfo
=
5989 [&RepeatedSequenceLocs
](unsigned CallID
, unsigned NumBytesForCall
) {
5990 for (outliner::Candidate
&C
: RepeatedSequenceLocs
)
5991 C
.setCallInfo(CallID
, NumBytesForCall
);
5994 OutlinerCosts
Costs(Subtarget
);
5996 const auto &SomeMFI
=
5997 *RepeatedSequenceLocs
.front().getMF()->getInfo
<ARMFunctionInfo
>();
5998 // Adjust costs to account for the BTI instructions.
5999 if (SomeMFI
.branchTargetEnforcement()) {
6000 Costs
.FrameDefault
+= 4;
6001 Costs
.FrameNoLRSave
+= 4;
6002 Costs
.FrameRegSave
+= 4;
6003 Costs
.FrameTailCall
+= 4;
6004 Costs
.FrameThunk
+= 4;
6007 // Adjust costs to account for sign and authentication instructions.
6008 if (SomeMFI
.shouldSignReturnAddress(true)) {
6009 Costs
.CallDefault
+= 8; // +PAC instr, +AUT instr
6010 Costs
.SaveRestoreLROnStack
+= 8; // +PAC instr, +AUT instr
6013 unsigned FrameID
= MachineOutlinerDefault
;
6014 unsigned NumBytesToCreateFrame
= Costs
.FrameDefault
;
6016 // If the last instruction in any candidate is a terminator, then we should
6017 // tail call all of the candidates.
6018 if (RepeatedSequenceLocs
[0].back().isTerminator()) {
6019 FrameID
= MachineOutlinerTailCall
;
6020 NumBytesToCreateFrame
= Costs
.FrameTailCall
;
6021 SetCandidateCallInfo(MachineOutlinerTailCall
, Costs
.CallTailCall
);
6022 } else if (LastInstrOpcode
== ARM::BL
|| LastInstrOpcode
== ARM::BLX
||
6023 LastInstrOpcode
== ARM::BLX_noip
|| LastInstrOpcode
== ARM::tBL
||
6024 LastInstrOpcode
== ARM::tBLXr
||
6025 LastInstrOpcode
== ARM::tBLXr_noip
||
6026 LastInstrOpcode
== ARM::tBLXi
) {
6027 FrameID
= MachineOutlinerThunk
;
6028 NumBytesToCreateFrame
= Costs
.FrameThunk
;
6029 SetCandidateCallInfo(MachineOutlinerThunk
, Costs
.CallThunk
);
6031 // We need to decide how to emit calls + frames. We can always emit the same
6032 // frame if we don't need to save to the stack. If we have to save to the
6033 // stack, then we need a different frame.
6034 unsigned NumBytesNoStackCalls
= 0;
6035 std::vector
<outliner::Candidate
> CandidatesWithoutStackFixups
;
6037 for (outliner::Candidate
&C
: RepeatedSequenceLocs
) {
6038 // LR liveness is overestimated in return blocks, unless they end with a
6040 const auto Last
= C
.getMBB()->rbegin();
6041 const bool LRIsAvailable
=
6042 C
.getMBB()->isReturnBlock() && !Last
->isCall()
6043 ? isLRAvailable(TRI
, Last
,
6044 (MachineBasicBlock::reverse_iterator
)C
.begin())
6045 : C
.isAvailableAcrossAndOutOfSeq(ARM::LR
, TRI
);
6046 if (LRIsAvailable
) {
6047 FrameID
= MachineOutlinerNoLRSave
;
6048 NumBytesNoStackCalls
+= Costs
.CallNoLRSave
;
6049 C
.setCallInfo(MachineOutlinerNoLRSave
, Costs
.CallNoLRSave
);
6050 CandidatesWithoutStackFixups
.push_back(C
);
6053 // Is an unused register available? If so, we won't modify the stack, so
6054 // we can outline with the same frame type as those that don't save LR.
6055 else if (findRegisterToSaveLRTo(C
)) {
6056 FrameID
= MachineOutlinerRegSave
;
6057 NumBytesNoStackCalls
+= Costs
.CallRegSave
;
6058 C
.setCallInfo(MachineOutlinerRegSave
, Costs
.CallRegSave
);
6059 CandidatesWithoutStackFixups
.push_back(C
);
6062 // Is SP used in the sequence at all? If not, we don't have to modify
6063 // the stack, so we are guaranteed to get the same frame.
6064 else if (C
.isAvailableInsideSeq(ARM::SP
, TRI
)) {
6065 NumBytesNoStackCalls
+= Costs
.CallDefault
;
6066 C
.setCallInfo(MachineOutlinerDefault
, Costs
.CallDefault
);
6067 CandidatesWithoutStackFixups
.push_back(C
);
6070 // If we outline this, we need to modify the stack. Pretend we don't
6071 // outline this by saving all of its bytes.
6073 NumBytesNoStackCalls
+= SequenceSize
;
6076 // If there are no places where we have to save LR, then note that we don't
6077 // have to update the stack. Otherwise, give every candidate the default
6079 if (NumBytesNoStackCalls
<=
6080 RepeatedSequenceLocs
.size() * Costs
.CallDefault
) {
6081 RepeatedSequenceLocs
= CandidatesWithoutStackFixups
;
6082 FrameID
= MachineOutlinerNoLRSave
;
6083 if (RepeatedSequenceLocs
.size() < MinRepeats
)
6084 return std::nullopt
;
6086 SetCandidateCallInfo(MachineOutlinerDefault
, Costs
.CallDefault
);
6089 // Does every candidate's MBB contain a call? If so, then we might have a
6090 // call in the range.
6091 if (FlagsSetInAll
& MachineOutlinerMBBFlags::HasCalls
) {
6092 // check if the range contains a call. These require a save + restore of
6093 // the link register.
6094 outliner::Candidate
&FirstCand
= RepeatedSequenceLocs
[0];
6095 if (any_of(drop_end(FirstCand
),
6096 [](const MachineInstr
&MI
) { return MI
.isCall(); }))
6097 NumBytesToCreateFrame
+= Costs
.SaveRestoreLROnStack
;
6099 // Handle the last instruction separately. If it is tail call, then the
6100 // last instruction is a call, we don't want to save + restore in this
6101 // case. However, it could be possible that the last instruction is a
6102 // call without it being valid to tail call this sequence. We should
6103 // consider this as well.
6104 else if (FrameID
!= MachineOutlinerThunk
&&
6105 FrameID
!= MachineOutlinerTailCall
&& FirstCand
.back().isCall())
6106 NumBytesToCreateFrame
+= Costs
.SaveRestoreLROnStack
;
6109 return std::make_unique
<outliner::OutlinedFunction
>(
6110 RepeatedSequenceLocs
, SequenceSize
, NumBytesToCreateFrame
, FrameID
);
6113 bool ARMBaseInstrInfo::checkAndUpdateStackOffset(MachineInstr
*MI
,
6116 int SPIdx
= MI
->findRegisterUseOperandIdx(ARM::SP
, /*TRI=*/nullptr);
6117 unsigned AddrMode
= (MI
->getDesc().TSFlags
& ARMII::AddrModeMask
);
6121 else if (SPIdx
!= 1 && (AddrMode
!= ARMII::AddrModeT2_i8s4
|| SPIdx
!= 2))
6122 // If SP is not the base register we can't do much
6125 // Stack might be involved but addressing mode doesn't handle any offset.
6126 // Rq: AddrModeT1_[1|2|4] don't operate on SP
6127 if (AddrMode
== ARMII::AddrMode1
|| // Arithmetic instructions
6128 AddrMode
== ARMII::AddrMode4
|| // Load/Store Multiple
6129 AddrMode
== ARMII::AddrMode6
|| // Neon Load/Store Multiple
6130 AddrMode
== ARMII::AddrModeT2_so
|| // SP can't be used as based register
6131 AddrMode
== ARMII::AddrModeT2_pc
|| // PCrel access
6132 AddrMode
== ARMII::AddrMode2
|| // Used by PRE and POST indexed LD/ST
6133 AddrMode
== ARMII::AddrModeT2_i7
|| // v8.1-M MVE
6134 AddrMode
== ARMII::AddrModeT2_i7s2
|| // v8.1-M MVE
6135 AddrMode
== ARMII::AddrModeT2_i7s4
|| // v8.1-M sys regs VLDR/VSTR
6136 AddrMode
== ARMII::AddrModeNone
||
6137 AddrMode
== ARMII::AddrModeT2_i8
|| // Pre/Post inc instructions
6138 AddrMode
== ARMII::AddrModeT2_i8neg
) // Always negative imm
6141 unsigned NumOps
= MI
->getDesc().getNumOperands();
6142 unsigned ImmIdx
= NumOps
- 3;
6144 const MachineOperand
&Offset
= MI
->getOperand(ImmIdx
);
6145 assert(Offset
.isImm() && "Is not an immediate");
6146 int64_t OffVal
= Offset
.getImm();
6149 // Don't override data if the are below SP.
6152 unsigned NumBits
= 0;
6156 case ARMII::AddrMode3
:
6157 if (ARM_AM::getAM3Op(OffVal
) == ARM_AM::sub
)
6159 OffVal
= ARM_AM::getAM3Offset(OffVal
);
6162 case ARMII::AddrMode5
:
6163 if (ARM_AM::getAM5Op(OffVal
) == ARM_AM::sub
)
6165 OffVal
= ARM_AM::getAM5Offset(OffVal
);
6169 case ARMII::AddrMode5FP16
:
6170 if (ARM_AM::getAM5FP16Op(OffVal
) == ARM_AM::sub
)
6172 OffVal
= ARM_AM::getAM5FP16Offset(OffVal
);
6176 case ARMII::AddrModeT2_i8pos
:
6179 case ARMII::AddrModeT2_i8s4
:
6180 // FIXME: Values are already scaled in this addressing mode.
6181 assert((Fixup
& 3) == 0 && "Can't encode this offset!");
6184 case ARMII::AddrModeT2_ldrex
:
6188 case ARMII::AddrModeT2_i12
:
6189 case ARMII::AddrMode_i12
:
6192 case ARMII::AddrModeT1_s
: // SP-relative LD/ST
6197 llvm_unreachable("Unsupported addressing mode!");
6199 // Make sure the offset is encodable for instructions that scale the
6201 assert(((OffVal
* Scale
+ Fixup
) & (Scale
- 1)) == 0 &&
6202 "Can't encode this offset!");
6203 OffVal
+= Fixup
/ Scale
;
6205 unsigned Mask
= (1 << NumBits
) - 1;
6207 if (OffVal
<= Mask
) {
6209 MI
->getOperand(ImmIdx
).setImm(OffVal
);
6216 void ARMBaseInstrInfo::mergeOutliningCandidateAttributes(
6217 Function
&F
, std::vector
<outliner::Candidate
> &Candidates
) const {
6218 outliner::Candidate
&C
= Candidates
.front();
6219 // branch-target-enforcement is guaranteed to be consistent between all
6220 // candidates, so we only need to look at one.
6221 const Function
&CFn
= C
.getMF()->getFunction();
6222 if (CFn
.hasFnAttribute("branch-target-enforcement"))
6223 F
.addFnAttr(CFn
.getFnAttribute("branch-target-enforcement"));
6225 if (CFn
.hasFnAttribute("sign-return-address"))
6226 F
.addFnAttr(CFn
.getFnAttribute("sign-return-address"));
6228 ARMGenInstrInfo::mergeOutliningCandidateAttributes(F
, Candidates
);
6231 bool ARMBaseInstrInfo::isFunctionSafeToOutlineFrom(
6232 MachineFunction
&MF
, bool OutlineFromLinkOnceODRs
) const {
6233 const Function
&F
= MF
.getFunction();
6235 // Can F be deduplicated by the linker? If it can, don't outline from it.
6236 if (!OutlineFromLinkOnceODRs
&& F
.hasLinkOnceODRLinkage())
6239 // Don't outline from functions with section markings; the program could
6240 // expect that all the code is in the named section.
6241 // FIXME: Allow outlining from multiple functions with the same section
6246 // FIXME: Thumb1 outlining is not handled
6247 if (MF
.getInfo
<ARMFunctionInfo
>()->isThumb1OnlyFunction())
6250 // It's safe to outline from MF.
6254 bool ARMBaseInstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock
&MBB
,
6255 unsigned &Flags
) const {
6256 // Check if LR is available through all of the MBB. If it's not, then set
6258 assert(MBB
.getParent()->getRegInfo().tracksLiveness() &&
6259 "Suitable Machine Function for outlining must track liveness");
6261 LiveRegUnits
LRU(getRegisterInfo());
6263 for (MachineInstr
&MI
: llvm::reverse(MBB
))
6266 // Check if each of the unsafe registers are available...
6267 bool R12AvailableInBlock
= LRU
.available(ARM::R12
);
6268 bool CPSRAvailableInBlock
= LRU
.available(ARM::CPSR
);
6270 // If all of these are dead (and not live out), we know we don't have to check
6272 if (R12AvailableInBlock
&& CPSRAvailableInBlock
)
6273 Flags
|= MachineOutlinerMBBFlags::UnsafeRegsDead
;
6275 // Now, add the live outs to the set.
6276 LRU
.addLiveOuts(MBB
);
6278 // If any of these registers is available in the MBB, but also a live out of
6279 // the block, then we know outlining is unsafe.
6280 if (R12AvailableInBlock
&& !LRU
.available(ARM::R12
))
6282 if (CPSRAvailableInBlock
&& !LRU
.available(ARM::CPSR
))
6285 // Check if there's a call inside this MachineBasicBlock. If there is, then
6287 if (any_of(MBB
, [](MachineInstr
&MI
) { return MI
.isCall(); }))
6288 Flags
|= MachineOutlinerMBBFlags::HasCalls
;
6290 // LR liveness is overestimated in return blocks.
6292 bool LRIsAvailable
=
6293 MBB
.isReturnBlock() && !MBB
.back().isCall()
6294 ? isLRAvailable(getRegisterInfo(), MBB
.rbegin(), MBB
.rend())
6295 : LRU
.available(ARM::LR
);
6297 Flags
|= MachineOutlinerMBBFlags::LRUnavailableSomewhere
;
6303 ARMBaseInstrInfo::getOutliningTypeImpl(const MachineModuleInfo
&MMI
,
6304 MachineBasicBlock::iterator
&MIT
,
6305 unsigned Flags
) const {
6306 MachineInstr
&MI
= *MIT
;
6307 const TargetRegisterInfo
*TRI
= &getRegisterInfo();
6309 // PIC instructions contain labels, outlining them would break offset
6310 // computing. unsigned Opc = MI.getOpcode();
6311 unsigned Opc
= MI
.getOpcode();
6312 if (Opc
== ARM::tPICADD
|| Opc
== ARM::PICADD
|| Opc
== ARM::PICSTR
||
6313 Opc
== ARM::PICSTRB
|| Opc
== ARM::PICSTRH
|| Opc
== ARM::PICLDR
||
6314 Opc
== ARM::PICLDRB
|| Opc
== ARM::PICLDRH
|| Opc
== ARM::PICLDRSB
||
6315 Opc
== ARM::PICLDRSH
|| Opc
== ARM::t2LDRpci_pic
||
6316 Opc
== ARM::t2MOVi16_ga_pcrel
|| Opc
== ARM::t2MOVTi16_ga_pcrel
||
6317 Opc
== ARM::t2MOV_ga_pcrel
)
6318 return outliner::InstrType::Illegal
;
6320 // Be conservative with ARMv8.1 MVE instructions.
6321 if (Opc
== ARM::t2BF_LabelPseudo
|| Opc
== ARM::t2DoLoopStart
||
6322 Opc
== ARM::t2DoLoopStartTP
|| Opc
== ARM::t2WhileLoopStart
||
6323 Opc
== ARM::t2WhileLoopStartLR
|| Opc
== ARM::t2WhileLoopStartTP
||
6324 Opc
== ARM::t2LoopDec
|| Opc
== ARM::t2LoopEnd
||
6325 Opc
== ARM::t2LoopEndDec
)
6326 return outliner::InstrType::Illegal
;
6328 const MCInstrDesc
&MCID
= MI
.getDesc();
6329 uint64_t MIFlags
= MCID
.TSFlags
;
6330 if ((MIFlags
& ARMII::DomainMask
) == ARMII::DomainMVE
)
6331 return outliner::InstrType::Illegal
;
6333 // Is this a terminator for a basic block?
6334 if (MI
.isTerminator())
6335 // TargetInstrInfo::getOutliningType has already filtered out anything
6336 // that would break this, so we can allow it here.
6337 return outliner::InstrType::Legal
;
6339 // Don't outline if link register or program counter value are used.
6340 if (MI
.readsRegister(ARM::LR
, TRI
) || MI
.readsRegister(ARM::PC
, TRI
))
6341 return outliner::InstrType::Illegal
;
6344 // Get the function associated with the call. Look at each operand and find
6345 // the one that represents the calle and get its name.
6346 const Function
*Callee
= nullptr;
6347 for (const MachineOperand
&MOP
: MI
.operands()) {
6348 if (MOP
.isGlobal()) {
6349 Callee
= dyn_cast
<Function
>(MOP
.getGlobal());
6354 // Dont't outline calls to "mcount" like functions, in particular Linux
6355 // kernel function tracing relies on it.
6357 (Callee
->getName() == "\01__gnu_mcount_nc" ||
6358 Callee
->getName() == "\01mcount" || Callee
->getName() == "__mcount"))
6359 return outliner::InstrType::Illegal
;
6361 // If we don't know anything about the callee, assume it depends on the
6362 // stack layout of the caller. In that case, it's only legal to outline
6363 // as a tail-call. Explicitly list the call instructions we know about so
6364 // we don't get unexpected results with call pseudo-instructions.
6365 auto UnknownCallOutlineType
= outliner::InstrType::Illegal
;
6366 if (Opc
== ARM::BL
|| Opc
== ARM::tBL
|| Opc
== ARM::BLX
||
6367 Opc
== ARM::BLX_noip
|| Opc
== ARM::tBLXr
|| Opc
== ARM::tBLXr_noip
||
6369 UnknownCallOutlineType
= outliner::InstrType::LegalTerminator
;
6372 return UnknownCallOutlineType
;
6374 // We have a function we have information about. Check if it's something we
6375 // can safely outline.
6376 MachineFunction
*CalleeMF
= MMI
.getMachineFunction(*Callee
);
6378 // We don't know what's going on with the callee at all. Don't touch it.
6380 return UnknownCallOutlineType
;
6382 // Check if we know anything about the callee saves on the function. If we
6383 // don't, then don't touch it, since that implies that we haven't computed
6384 // anything about its stack frame yet.
6385 MachineFrameInfo
&MFI
= CalleeMF
->getFrameInfo();
6386 if (!MFI
.isCalleeSavedInfoValid() || MFI
.getStackSize() > 0 ||
6387 MFI
.getNumObjects() > 0)
6388 return UnknownCallOutlineType
;
6390 // At this point, we can say that CalleeMF ought to not pass anything on the
6391 // stack. Therefore, we can outline it.
6392 return outliner::InstrType::Legal
;
6395 // Since calls are handled, don't touch LR or PC
6396 if (MI
.modifiesRegister(ARM::LR
, TRI
) || MI
.modifiesRegister(ARM::PC
, TRI
))
6397 return outliner::InstrType::Illegal
;
6399 // Does this use the stack?
6400 if (MI
.modifiesRegister(ARM::SP
, TRI
) || MI
.readsRegister(ARM::SP
, TRI
)) {
6401 // True if there is no chance that any outlined candidate from this range
6402 // could require stack fixups. That is, both
6403 // * LR is available in the range (No save/restore around call)
6404 // * The range doesn't include calls (No save/restore in outlined frame)
6406 // These conditions also ensure correctness of the return address
6407 // authentication - we insert sign and authentication instructions only if
6408 // we save/restore LR on stack, but then this condition ensures that the
6409 // outlined range does not modify the SP, therefore the SP value used for
6410 // signing is the same as the one used for authentication.
6411 // FIXME: This is very restrictive; the flags check the whole block,
6412 // not just the bit we will try to outline.
6413 bool MightNeedStackFixUp
=
6414 (Flags
& (MachineOutlinerMBBFlags::LRUnavailableSomewhere
|
6415 MachineOutlinerMBBFlags::HasCalls
));
6417 if (!MightNeedStackFixUp
)
6418 return outliner::InstrType::Legal
;
6420 // Any modification of SP will break our code to save/restore LR.
6421 // FIXME: We could handle some instructions which add a constant offset to
6422 // SP, with a bit more work.
6423 if (MI
.modifiesRegister(ARM::SP
, TRI
))
6424 return outliner::InstrType::Illegal
;
6426 // At this point, we have a stack instruction that we might need to fix up.
6427 // up. We'll handle it if it's a load or store.
6428 if (checkAndUpdateStackOffset(&MI
, Subtarget
.getStackAlignment().value(),
6430 return outliner::InstrType::Legal
;
6432 // We can't fix it up, so don't outline it.
6433 return outliner::InstrType::Illegal
;
6436 // Be conservative with IT blocks.
6437 if (MI
.readsRegister(ARM::ITSTATE
, TRI
) ||
6438 MI
.modifiesRegister(ARM::ITSTATE
, TRI
))
6439 return outliner::InstrType::Illegal
;
6441 // Don't outline CFI instructions.
6442 if (MI
.isCFIInstruction())
6443 return outliner::InstrType::Illegal
;
6445 return outliner::InstrType::Legal
;
6448 void ARMBaseInstrInfo::fixupPostOutline(MachineBasicBlock
&MBB
) const {
6449 for (MachineInstr
&MI
: MBB
) {
6450 checkAndUpdateStackOffset(&MI
, Subtarget
.getStackAlignment().value(), true);
6454 void ARMBaseInstrInfo::saveLROnStack(MachineBasicBlock
&MBB
,
6455 MachineBasicBlock::iterator It
, bool CFI
,
6457 int Align
= std::max(Subtarget
.getStackAlignment().value(), uint64_t(8));
6458 unsigned MIFlags
= CFI
? MachineInstr::FrameSetup
: 0;
6459 assert(Align
>= 8 && Align
<= 256);
6461 assert(Subtarget
.isThumb2());
6462 // Compute PAC in R12. Outlining ensures R12 is dead across the outlined
6464 BuildMI(MBB
, It
, DebugLoc(), get(ARM::t2PAC
)).setMIFlags(MIFlags
);
6465 BuildMI(MBB
, It
, DebugLoc(), get(ARM::t2STRD_PRE
), ARM::SP
)
6466 .addReg(ARM::R12
, RegState::Kill
)
6467 .addReg(ARM::LR
, RegState::Kill
)
6470 .add(predOps(ARMCC::AL
))
6471 .setMIFlags(MIFlags
);
6473 unsigned Opc
= Subtarget
.isThumb() ? ARM::t2STR_PRE
: ARM::STR_PRE_IMM
;
6474 BuildMI(MBB
, It
, DebugLoc(), get(Opc
), ARM::SP
)
6475 .addReg(ARM::LR
, RegState::Kill
)
6478 .add(predOps(ARMCC::AL
))
6479 .setMIFlags(MIFlags
);
6485 MachineFunction
&MF
= *MBB
.getParent();
6487 // Add a CFI, saying CFA is offset by Align bytes from SP.
6488 int64_t StackPosEntry
=
6489 MF
.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, Align
));
6490 BuildMI(MBB
, It
, DebugLoc(), get(ARM::CFI_INSTRUCTION
))
6491 .addCFIIndex(StackPosEntry
)
6492 .setMIFlags(MachineInstr::FrameSetup
);
6494 // Add a CFI saying that the LR that we want to find is now higher than
6496 int LROffset
= Auth
? Align
- 4 : Align
;
6497 const MCRegisterInfo
*MRI
= Subtarget
.getRegisterInfo();
6498 unsigned DwarfLR
= MRI
->getDwarfRegNum(ARM::LR
, true);
6499 int64_t LRPosEntry
= MF
.addFrameInst(
6500 MCCFIInstruction::createOffset(nullptr, DwarfLR
, -LROffset
));
6501 BuildMI(MBB
, It
, DebugLoc(), get(ARM::CFI_INSTRUCTION
))
6502 .addCFIIndex(LRPosEntry
)
6503 .setMIFlags(MachineInstr::FrameSetup
);
6505 // Add a CFI for the location of the return adddress PAC.
6506 unsigned DwarfRAC
= MRI
->getDwarfRegNum(ARM::RA_AUTH_CODE
, true);
6507 int64_t RACPosEntry
= MF
.addFrameInst(
6508 MCCFIInstruction::createOffset(nullptr, DwarfRAC
, -Align
));
6509 BuildMI(MBB
, It
, DebugLoc(), get(ARM::CFI_INSTRUCTION
))
6510 .addCFIIndex(RACPosEntry
)
6511 .setMIFlags(MachineInstr::FrameSetup
);
6515 void ARMBaseInstrInfo::emitCFIForLRSaveToReg(MachineBasicBlock
&MBB
,
6516 MachineBasicBlock::iterator It
,
6517 Register Reg
) const {
6518 MachineFunction
&MF
= *MBB
.getParent();
6519 const MCRegisterInfo
*MRI
= Subtarget
.getRegisterInfo();
6520 unsigned DwarfLR
= MRI
->getDwarfRegNum(ARM::LR
, true);
6521 unsigned DwarfReg
= MRI
->getDwarfRegNum(Reg
, true);
6523 int64_t LRPosEntry
= MF
.addFrameInst(
6524 MCCFIInstruction::createRegister(nullptr, DwarfLR
, DwarfReg
));
6525 BuildMI(MBB
, It
, DebugLoc(), get(ARM::CFI_INSTRUCTION
))
6526 .addCFIIndex(LRPosEntry
)
6527 .setMIFlags(MachineInstr::FrameSetup
);
6530 void ARMBaseInstrInfo::restoreLRFromStack(MachineBasicBlock
&MBB
,
6531 MachineBasicBlock::iterator It
,
6532 bool CFI
, bool Auth
) const {
6533 int Align
= Subtarget
.getStackAlignment().value();
6534 unsigned MIFlags
= CFI
? MachineInstr::FrameDestroy
: 0;
6536 assert(Subtarget
.isThumb2());
6537 // Restore return address PAC and LR.
6538 BuildMI(MBB
, It
, DebugLoc(), get(ARM::t2LDRD_POST
))
6539 .addReg(ARM::R12
, RegState::Define
)
6540 .addReg(ARM::LR
, RegState::Define
)
6541 .addReg(ARM::SP
, RegState::Define
)
6544 .add(predOps(ARMCC::AL
))
6545 .setMIFlags(MIFlags
);
6546 // LR authentication is after the CFI instructions, below.
6548 unsigned Opc
= Subtarget
.isThumb() ? ARM::t2LDR_POST
: ARM::LDR_POST_IMM
;
6549 MachineInstrBuilder MIB
= BuildMI(MBB
, It
, DebugLoc(), get(Opc
), ARM::LR
)
6550 .addReg(ARM::SP
, RegState::Define
)
6552 if (!Subtarget
.isThumb())
6554 MIB
.addImm(Subtarget
.getStackAlignment().value())
6555 .add(predOps(ARMCC::AL
))
6556 .setMIFlags(MIFlags
);
6560 // Now stack has moved back up...
6561 MachineFunction
&MF
= *MBB
.getParent();
6562 const MCRegisterInfo
*MRI
= Subtarget
.getRegisterInfo();
6563 unsigned DwarfLR
= MRI
->getDwarfRegNum(ARM::LR
, true);
6564 int64_t StackPosEntry
=
6565 MF
.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 0));
6566 BuildMI(MBB
, It
, DebugLoc(), get(ARM::CFI_INSTRUCTION
))
6567 .addCFIIndex(StackPosEntry
)
6568 .setMIFlags(MachineInstr::FrameDestroy
);
6570 // ... and we have restored LR.
6571 int64_t LRPosEntry
=
6572 MF
.addFrameInst(MCCFIInstruction::createRestore(nullptr, DwarfLR
));
6573 BuildMI(MBB
, It
, DebugLoc(), get(ARM::CFI_INSTRUCTION
))
6574 .addCFIIndex(LRPosEntry
)
6575 .setMIFlags(MachineInstr::FrameDestroy
);
6578 unsigned DwarfRAC
= MRI
->getDwarfRegNum(ARM::RA_AUTH_CODE
, true);
6580 MF
.addFrameInst(MCCFIInstruction::createUndefined(nullptr, DwarfRAC
));
6581 BuildMI(MBB
, It
, DebugLoc(), get(ARM::CFI_INSTRUCTION
))
6583 .setMIFlags(MachineInstr::FrameDestroy
);
6588 BuildMI(MBB
, It
, DebugLoc(), get(ARM::t2AUT
));
6591 void ARMBaseInstrInfo::emitCFIForLRRestoreFromReg(
6592 MachineBasicBlock
&MBB
, MachineBasicBlock::iterator It
) const {
6593 MachineFunction
&MF
= *MBB
.getParent();
6594 const MCRegisterInfo
*MRI
= Subtarget
.getRegisterInfo();
6595 unsigned DwarfLR
= MRI
->getDwarfRegNum(ARM::LR
, true);
6597 int64_t LRPosEntry
=
6598 MF
.addFrameInst(MCCFIInstruction::createRestore(nullptr, DwarfLR
));
6599 BuildMI(MBB
, It
, DebugLoc(), get(ARM::CFI_INSTRUCTION
))
6600 .addCFIIndex(LRPosEntry
)
6601 .setMIFlags(MachineInstr::FrameDestroy
);
6604 void ARMBaseInstrInfo::buildOutlinedFrame(
6605 MachineBasicBlock
&MBB
, MachineFunction
&MF
,
6606 const outliner::OutlinedFunction
&OF
) const {
6607 // For thunk outlining, rewrite the last instruction from a call to a
6609 if (OF
.FrameConstructionID
== MachineOutlinerThunk
) {
6610 MachineInstr
*Call
= &*--MBB
.instr_end();
6611 bool isThumb
= Subtarget
.isThumb();
6612 unsigned FuncOp
= isThumb
? 2 : 0;
6613 unsigned Opc
= Call
->getOperand(FuncOp
).isReg()
6614 ? isThumb
? ARM::tTAILJMPr
: ARM::TAILJMPr
6615 : isThumb
? Subtarget
.isTargetMachO() ? ARM::tTAILJMPd
6618 MachineInstrBuilder MIB
= BuildMI(MBB
, MBB
.end(), DebugLoc(), get(Opc
))
6619 .add(Call
->getOperand(FuncOp
));
6620 if (isThumb
&& !Call
->getOperand(FuncOp
).isReg())
6621 MIB
.add(predOps(ARMCC::AL
));
6622 Call
->eraseFromParent();
6625 // Is there a call in the outlined range?
6626 auto IsNonTailCall
= [](MachineInstr
&MI
) {
6627 return MI
.isCall() && !MI
.isReturn();
6629 if (llvm::any_of(MBB
.instrs(), IsNonTailCall
)) {
6630 MachineBasicBlock::iterator It
= MBB
.begin();
6631 MachineBasicBlock::iterator Et
= MBB
.end();
6633 if (OF
.FrameConstructionID
== MachineOutlinerTailCall
||
6634 OF
.FrameConstructionID
== MachineOutlinerThunk
)
6635 Et
= std::prev(MBB
.end());
6637 // We have to save and restore LR, we need to add it to the liveins if it
6638 // is not already part of the set. This is suffient since outlined
6639 // functions only have one block.
6640 if (!MBB
.isLiveIn(ARM::LR
))
6641 MBB
.addLiveIn(ARM::LR
);
6643 // Insert a save before the outlined region
6644 bool Auth
= MF
.getInfo
<ARMFunctionInfo
>()->shouldSignReturnAddress(true);
6645 saveLROnStack(MBB
, It
, true, Auth
);
6647 // Fix up the instructions in the range, since we're going to modify the
6649 assert(OF
.FrameConstructionID
!= MachineOutlinerDefault
&&
6650 "Can only fix up stack references once");
6651 fixupPostOutline(MBB
);
6653 // Insert a restore before the terminator for the function. Restore LR.
6654 restoreLRFromStack(MBB
, Et
, true, Auth
);
6657 // If this is a tail call outlined function, then there's already a return.
6658 if (OF
.FrameConstructionID
== MachineOutlinerTailCall
||
6659 OF
.FrameConstructionID
== MachineOutlinerThunk
)
6662 // Here we have to insert the return ourselves. Get the correct opcode from
6663 // current feature set.
6664 BuildMI(MBB
, MBB
.end(), DebugLoc(), get(Subtarget
.getReturnOpcode()))
6665 .add(predOps(ARMCC::AL
));
6667 // Did we have to modify the stack by saving the link register?
6668 if (OF
.FrameConstructionID
!= MachineOutlinerDefault
&&
6669 OF
.Candidates
[0].CallConstructionID
!= MachineOutlinerDefault
)
6672 // We modified the stack.
6673 // Walk over the basic block and fix up all the stack accesses.
6674 fixupPostOutline(MBB
);
6677 MachineBasicBlock::iterator
ARMBaseInstrInfo::insertOutlinedCall(
6678 Module
&M
, MachineBasicBlock
&MBB
, MachineBasicBlock::iterator
&It
,
6679 MachineFunction
&MF
, outliner::Candidate
&C
) const {
6680 MachineInstrBuilder MIB
;
6681 MachineBasicBlock::iterator CallPt
;
6683 bool isThumb
= Subtarget
.isThumb();
6685 // Are we tail calling?
6686 if (C
.CallConstructionID
== MachineOutlinerTailCall
) {
6687 // If yes, then we can just branch to the label.
6689 ? Subtarget
.isTargetMachO() ? ARM::tTAILJMPd
: ARM::tTAILJMPdND
6691 MIB
= BuildMI(MF
, DebugLoc(), get(Opc
))
6692 .addGlobalAddress(M
.getNamedValue(MF
.getName()));
6694 MIB
.add(predOps(ARMCC::AL
));
6695 It
= MBB
.insert(It
, MIB
);
6699 // Create the call instruction.
6700 Opc
= isThumb
? ARM::tBL
: ARM::BL
;
6701 MachineInstrBuilder CallMIB
= BuildMI(MF
, DebugLoc(), get(Opc
));
6703 CallMIB
.add(predOps(ARMCC::AL
));
6704 CallMIB
.addGlobalAddress(M
.getNamedValue(MF
.getName()));
6706 if (C
.CallConstructionID
== MachineOutlinerNoLRSave
||
6707 C
.CallConstructionID
== MachineOutlinerThunk
) {
6708 // No, so just insert the call.
6709 It
= MBB
.insert(It
, CallMIB
);
6713 const ARMFunctionInfo
&AFI
= *C
.getMF()->getInfo
<ARMFunctionInfo
>();
6714 // Can we save to a register?
6715 if (C
.CallConstructionID
== MachineOutlinerRegSave
) {
6716 Register Reg
= findRegisterToSaveLRTo(C
);
6717 assert(Reg
!= 0 && "No callee-saved register available?");
6719 // Save and restore LR from that register.
6720 copyPhysReg(MBB
, It
, DebugLoc(), Reg
, ARM::LR
, true);
6721 if (!AFI
.isLRSpilled())
6722 emitCFIForLRSaveToReg(MBB
, It
, Reg
);
6723 CallPt
= MBB
.insert(It
, CallMIB
);
6724 copyPhysReg(MBB
, It
, DebugLoc(), ARM::LR
, Reg
, true);
6725 if (!AFI
.isLRSpilled())
6726 emitCFIForLRRestoreFromReg(MBB
, It
);
6730 // We have the default case. Save and restore from SP.
6731 if (!MBB
.isLiveIn(ARM::LR
))
6732 MBB
.addLiveIn(ARM::LR
);
6733 bool Auth
= !AFI
.isLRSpilled() && AFI
.shouldSignReturnAddress(true);
6734 saveLROnStack(MBB
, It
, !AFI
.isLRSpilled(), Auth
);
6735 CallPt
= MBB
.insert(It
, CallMIB
);
6736 restoreLRFromStack(MBB
, It
, !AFI
.isLRSpilled(), Auth
);
6741 bool ARMBaseInstrInfo::shouldOutlineFromFunctionByDefault(
6742 MachineFunction
&MF
) const {
6743 return Subtarget
.isMClass() && MF
.getFunction().hasMinSize();
6746 bool ARMBaseInstrInfo::isReallyTriviallyReMaterializable(
6747 const MachineInstr
&MI
) const {
6748 // Try hard to rematerialize any VCTPs because if we spill P0, it will block
6749 // the tail predication conversion. This means that the element count
6750 // register has to be live for longer, but that has to be better than
6751 // spill/restore and VPT predication.
6752 return (isVCTP(&MI
) && !isPredicated(MI
)) ||
6753 TargetInstrInfo::isReallyTriviallyReMaterializable(MI
);
6756 unsigned llvm::getBLXOpcode(const MachineFunction
&MF
) {
6757 return (MF
.getSubtarget
<ARMSubtarget
>().hardenSlsBlr()) ? ARM::BLX_noip
6761 unsigned llvm::gettBLXrOpcode(const MachineFunction
&MF
) {
6762 return (MF
.getSubtarget
<ARMSubtarget
>().hardenSlsBlr()) ? ARM::tBLXr_noip
6766 unsigned llvm::getBLXpredOpcode(const MachineFunction
&MF
) {
6767 return (MF
.getSubtarget
<ARMSubtarget
>().hardenSlsBlr()) ? ARM::BLX_pred_noip
6772 class ARMPipelinerLoopInfo
: public TargetInstrInfo::PipelinerLoopInfo
{
6773 MachineInstr
*EndLoop
, *LoopCount
;
6774 MachineFunction
*MF
;
6775 const TargetInstrInfo
*TII
;
6777 // Bitset[0 .. MAX_STAGES-1] ... iterations needed
6778 // [LAST_IS_USE] : last reference to register in schedule is a use
6779 // [SEEN_AS_LIVE] : Normal pressure algorithm believes register is live
6780 static int constexpr MAX_STAGES
= 30;
6781 static int constexpr LAST_IS_USE
= MAX_STAGES
;
6782 static int constexpr SEEN_AS_LIVE
= MAX_STAGES
+ 1;
6783 typedef std::bitset
<MAX_STAGES
+ 2> IterNeed
;
6784 typedef std::map
<unsigned, IterNeed
> IterNeeds
;
6786 void bumpCrossIterationPressure(RegPressureTracker
&RPT
,
6787 const IterNeeds
&CIN
);
6788 bool tooMuchRegisterPressure(SwingSchedulerDAG
&SSD
, SMSchedule
&SMS
);
6790 // Meanings of the various stuff with loop types:
6792 // EndLoop = branch at end of original BB that will become a kernel
6793 // LoopCount = CC setter live into branch
6795 // EndLoop = branch at end of original BB
6796 // LoopCount = t2LoopDec
6798 ARMPipelinerLoopInfo(MachineInstr
*EndLoop
, MachineInstr
*LoopCount
)
6799 : EndLoop(EndLoop
), LoopCount(LoopCount
),
6800 MF(EndLoop
->getParent()->getParent()),
6801 TII(MF
->getSubtarget().getInstrInfo()) {}
6803 bool shouldIgnoreForPipelining(const MachineInstr
*MI
) const override
{
6804 // Only ignore the terminator.
6805 return MI
== EndLoop
|| MI
== LoopCount
;
6808 bool shouldUseSchedule(SwingSchedulerDAG
&SSD
, SMSchedule
&SMS
) override
{
6809 if (tooMuchRegisterPressure(SSD
, SMS
))
6815 std::optional
<bool> createTripCountGreaterCondition(
6816 int TC
, MachineBasicBlock
&MBB
,
6817 SmallVectorImpl
<MachineOperand
> &Cond
) override
{
6819 if (isCondBranchOpcode(EndLoop
->getOpcode())) {
6820 Cond
.push_back(EndLoop
->getOperand(1));
6821 Cond
.push_back(EndLoop
->getOperand(2));
6822 if (EndLoop
->getOperand(0).getMBB() == EndLoop
->getParent()) {
6823 TII
->reverseBranchCondition(Cond
);
6826 } else if (EndLoop
->getOpcode() == ARM::t2LoopEnd
) {
6827 // General case just lets the unrolled t2LoopDec do the subtraction and
6828 // therefore just needs to check if zero has been reached.
6829 MachineInstr
*LoopDec
= nullptr;
6830 for (auto &I
: MBB
.instrs())
6831 if (I
.getOpcode() == ARM::t2LoopDec
)
6833 assert(LoopDec
&& "Unable to find copied LoopDec");
6834 // Check if we're done with the loop.
6835 BuildMI(&MBB
, LoopDec
->getDebugLoc(), TII
->get(ARM::t2CMPri
))
6836 .addReg(LoopDec
->getOperand(0).getReg())
6839 .addReg(ARM::NoRegister
);
6840 Cond
.push_back(MachineOperand::CreateImm(ARMCC::EQ
));
6841 Cond
.push_back(MachineOperand::CreateReg(ARM::CPSR
, false));
6844 llvm_unreachable("Unknown EndLoop");
6847 void setPreheader(MachineBasicBlock
*NewPreheader
) override
{}
6849 void adjustTripCount(int TripCountAdjust
) override
{}
6851 void disposed() override
{}
6854 void ARMPipelinerLoopInfo::bumpCrossIterationPressure(RegPressureTracker
&RPT
,
6855 const IterNeeds
&CIN
) {
6856 // Increase pressure by the amounts in CrossIterationNeeds
6857 for (const auto &N
: CIN
) {
6858 int Cnt
= N
.second
.count() - N
.second
[SEEN_AS_LIVE
] * 2;
6859 for (int I
= 0; I
< Cnt
; ++I
)
6860 RPT
.increaseRegPressure(Register(N
.first
), LaneBitmask::getNone(),
6861 LaneBitmask::getAll());
6863 // Decrease pressure by the amounts in CrossIterationNeeds
6864 for (const auto &N
: CIN
) {
6865 int Cnt
= N
.second
.count() - N
.second
[SEEN_AS_LIVE
] * 2;
6866 for (int I
= 0; I
< Cnt
; ++I
)
6867 RPT
.decreaseRegPressure(Register(N
.first
), LaneBitmask::getAll(),
6868 LaneBitmask::getNone());
6872 bool ARMPipelinerLoopInfo::tooMuchRegisterPressure(SwingSchedulerDAG
&SSD
,
6874 IterNeeds CrossIterationNeeds
;
6876 // Determine which values will be loop-carried after the schedule is
6879 for (auto &SU
: SSD
.SUnits
) {
6880 const MachineInstr
*MI
= SU
.getInstr();
6881 int Stg
= SMS
.stageScheduled(const_cast<SUnit
*>(&SU
));
6882 for (auto &S
: SU
.Succs
)
6883 if (MI
->isPHI() && S
.getKind() == SDep::Anti
) {
6884 Register Reg
= S
.getReg();
6885 if (Reg
.isVirtual())
6886 CrossIterationNeeds
[Reg
.id()].set(0);
6887 } else if (S
.isAssignedRegDep()) {
6888 int OStg
= SMS
.stageScheduled(S
.getSUnit());
6889 if (OStg
>= 0 && OStg
!= Stg
) {
6890 Register Reg
= S
.getReg();
6891 if (Reg
.isVirtual())
6892 CrossIterationNeeds
[Reg
.id()] |= ((1 << (OStg
- Stg
)) - 1);
6897 // Determine more-or-less what the proposed schedule (reversed) is going to
6898 // be; it might not be quite the same because the within-cycle ordering
6899 // created by SMSchedule depends upon changes to help with address offsets and
6901 std::vector
<SUnit
*> ProposedSchedule
;
6902 for (int Cycle
= SMS
.getFinalCycle(); Cycle
>= SMS
.getFirstCycle(); --Cycle
)
6903 for (int Stage
= 0, StageEnd
= SMS
.getMaxStageCount(); Stage
<= StageEnd
;
6905 std::deque
<SUnit
*> Instrs
=
6906 SMS
.getInstructions(Cycle
+ Stage
* SMS
.getInitiationInterval());
6907 std::sort(Instrs
.begin(), Instrs
.end(),
6908 [](SUnit
*A
, SUnit
*B
) { return A
->NodeNum
> B
->NodeNum
; });
6909 for (SUnit
*SU
: Instrs
)
6910 ProposedSchedule
.push_back(SU
);
6913 // Learn whether the last use/def of each cross-iteration register is a use or
6914 // def. If it is a def, RegisterPressure will implicitly increase max pressure
6915 // and we do not have to add the pressure.
6916 for (auto *SU
: ProposedSchedule
)
6917 for (ConstMIBundleOperands
OperI(*SU
->getInstr()); OperI
.isValid();
6920 if (!MO
.isReg() || !MO
.getReg())
6922 Register Reg
= MO
.getReg();
6923 auto CIter
= CrossIterationNeeds
.find(Reg
.id());
6924 if (CIter
== CrossIterationNeeds
.end() || CIter
->second
[LAST_IS_USE
] ||
6925 CIter
->second
[SEEN_AS_LIVE
])
6927 if (MO
.isDef() && !MO
.isDead())
6928 CIter
->second
.set(SEEN_AS_LIVE
);
6929 else if (MO
.isUse())
6930 CIter
->second
.set(LAST_IS_USE
);
6932 for (auto &CI
: CrossIterationNeeds
)
6933 CI
.second
.reset(LAST_IS_USE
);
6935 RegionPressure RecRegPressure
;
6936 RegPressureTracker
RPTracker(RecRegPressure
);
6937 RegisterClassInfo RegClassInfo
;
6938 RegClassInfo
.runOnMachineFunction(*MF
);
6939 RPTracker
.init(MF
, &RegClassInfo
, nullptr, EndLoop
->getParent(),
6940 EndLoop
->getParent()->end(), false, false);
6942 bumpCrossIterationPressure(RPTracker
, CrossIterationNeeds
);
6944 for (auto *SU
: ProposedSchedule
) {
6945 MachineBasicBlock::const_iterator CurInstI
= SU
->getInstr();
6946 RPTracker
.setPos(std::next(CurInstI
));
6949 // Track what cross-iteration registers would be seen as live
6950 for (ConstMIBundleOperands
OperI(*CurInstI
); OperI
.isValid(); ++OperI
) {
6952 if (!MO
.isReg() || !MO
.getReg())
6954 Register Reg
= MO
.getReg();
6955 if (MO
.isDef() && !MO
.isDead()) {
6956 auto CIter
= CrossIterationNeeds
.find(Reg
.id());
6957 if (CIter
!= CrossIterationNeeds
.end()) {
6958 CIter
->second
.reset(0);
6959 CIter
->second
.reset(SEEN_AS_LIVE
);
6963 for (auto &S
: SU
->Preds
) {
6964 auto Stg
= SMS
.stageScheduled(SU
);
6965 if (S
.isAssignedRegDep()) {
6966 Register Reg
= S
.getReg();
6967 auto CIter
= CrossIterationNeeds
.find(Reg
.id());
6968 if (CIter
!= CrossIterationNeeds
.end()) {
6969 auto Stg2
= SMS
.stageScheduled(const_cast<SUnit
*>(S
.getSUnit()));
6970 assert(Stg2
<= Stg
&& "Data dependence upon earlier stage");
6971 if (Stg
- Stg2
< MAX_STAGES
)
6972 CIter
->second
.set(Stg
- Stg2
);
6973 CIter
->second
.set(SEEN_AS_LIVE
);
6978 bumpCrossIterationPressure(RPTracker
, CrossIterationNeeds
);
6981 auto &P
= RPTracker
.getPressure().MaxSetPressure
;
6982 for (unsigned I
= 0, E
= P
.size(); I
< E
; ++I
) {
6983 // Exclude some Neon register classes.
6984 if (I
== ARM::DQuad_with_ssub_0
|| I
== ARM::DTripleSpc_with_ssub_0
||
6985 I
== ARM::DTriple_with_qsub_0_in_QPR
)
6988 if (P
[I
] > RegClassInfo
.getRegPressureSetLimit(I
)) {
6997 std::unique_ptr
<TargetInstrInfo::PipelinerLoopInfo
>
6998 ARMBaseInstrInfo::analyzeLoopForPipelining(MachineBasicBlock
*LoopBB
) const {
6999 MachineBasicBlock::iterator I
= LoopBB
->getFirstTerminator();
7000 MachineBasicBlock
*Preheader
= *LoopBB
->pred_begin();
7001 if (Preheader
== LoopBB
)
7002 Preheader
= *std::next(LoopBB
->pred_begin());
7004 if (I
!= LoopBB
->end() && I
->getOpcode() == ARM::t2Bcc
) {
7005 // If the branch is a Bcc, then the CPSR should be set somewhere within the
7006 // block. We need to determine the reaching definition of CPSR so that
7007 // it can be marked as non-pipelineable, allowing the pipeliner to force
7008 // it into stage 0 or give up if it cannot or will not do so.
7009 MachineInstr
*CCSetter
= nullptr;
7010 for (auto &L
: LoopBB
->instrs()) {
7013 if (isCPSRDefined(L
))
7017 return std::make_unique
<ARMPipelinerLoopInfo
>(&*I
, CCSetter
);
7019 return nullptr; // Unable to find the CC setter, so unable to guarantee
7020 // that pipeline will work
7025 // %1 = t2DoopLoopStart %0
7027 // %2 = phi %1, <not loop>, %..., %loop
7028 // %3 = t2LoopDec %2, <imm>
7029 // t2LoopEnd %3, %loop
7031 if (I
!= LoopBB
->end() && I
->getOpcode() == ARM::t2LoopEnd
) {
7032 for (auto &L
: LoopBB
->instrs())
7035 else if (isVCTP(&L
))
7037 Register LoopDecResult
= I
->getOperand(0).getReg();
7038 MachineRegisterInfo
&MRI
= LoopBB
->getParent()->getRegInfo();
7039 MachineInstr
*LoopDec
= MRI
.getUniqueVRegDef(LoopDecResult
);
7040 if (!LoopDec
|| LoopDec
->getOpcode() != ARM::t2LoopDec
)
7042 MachineInstr
*LoopStart
= nullptr;
7043 for (auto &J
: Preheader
->instrs())
7044 if (J
.getOpcode() == ARM::t2DoLoopStart
)
7048 return std::make_unique
<ARMPipelinerLoopInfo
>(&*I
, LoopDec
);