1 //===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file contains the AArch64 implementation of the TargetInstrInfo class.
11 //===----------------------------------------------------------------------===//
13 #include "AArch64InstrInfo.h"
14 #include "AArch64MachineFunctionInfo.h"
15 #include "AArch64Subtarget.h"
16 #include "MCTargetDesc/AArch64AddressingModes.h"
17 #include "Utils/AArch64BaseInfo.h"
18 #include "llvm/ADT/ArrayRef.h"
19 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/CodeGen/MachineBasicBlock.h"
22 #include "llvm/CodeGen/MachineFrameInfo.h"
23 #include "llvm/CodeGen/MachineFunction.h"
24 #include "llvm/CodeGen/MachineInstr.h"
25 #include "llvm/CodeGen/MachineInstrBuilder.h"
26 #include "llvm/CodeGen/MachineMemOperand.h"
27 #include "llvm/CodeGen/MachineOperand.h"
28 #include "llvm/CodeGen/MachineRegisterInfo.h"
29 #include "llvm/CodeGen/MachineModuleInfo.h"
30 #include "llvm/CodeGen/StackMaps.h"
31 #include "llvm/CodeGen/TargetRegisterInfo.h"
32 #include "llvm/CodeGen/TargetSubtargetInfo.h"
33 #include "llvm/IR/DebugLoc.h"
34 #include "llvm/IR/GlobalValue.h"
35 #include "llvm/MC/MCInst.h"
36 #include "llvm/MC/MCInstrDesc.h"
37 #include "llvm/Support/Casting.h"
38 #include "llvm/Support/CodeGen.h"
39 #include "llvm/Support/CommandLine.h"
40 #include "llvm/Support/Compiler.h"
41 #include "llvm/Support/ErrorHandling.h"
42 #include "llvm/Support/MathExtras.h"
43 #include "llvm/Target/TargetMachine.h"
44 #include "llvm/Target/TargetOptions.h"
52 #define GET_INSTRINFO_CTOR_DTOR
53 #include "AArch64GenInstrInfo.inc"
55 static cl::opt
<unsigned> TBZDisplacementBits(
56 "aarch64-tbz-offset-bits", cl::Hidden
, cl::init(14),
57 cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"));
59 static cl::opt
<unsigned> CBZDisplacementBits(
60 "aarch64-cbz-offset-bits", cl::Hidden
, cl::init(19),
61 cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"));
63 static cl::opt
<unsigned>
64 BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden
, cl::init(19),
65 cl::desc("Restrict range of Bcc instructions (DEBUG)"));
67 AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget
&STI
)
68 : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN
, AArch64::ADJCALLSTACKUP
,
70 RI(STI
.getTargetTriple()), Subtarget(STI
) {}
72 /// GetInstSize - Return the number of bytes of code the specified
73 /// instruction may be. This returns the maximum number of bytes.
74 unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr
&MI
) const {
75 const MachineBasicBlock
&MBB
= *MI
.getParent();
76 const MachineFunction
*MF
= MBB
.getParent();
77 const MCAsmInfo
*MAI
= MF
->getTarget().getMCAsmInfo();
79 if (MI
.getOpcode() == AArch64::INLINEASM
)
80 return getInlineAsmLength(MI
.getOperand(0).getSymbolName(), *MAI
);
82 // FIXME: We currently only handle pseudoinstructions that don't get expanded
83 // before the assembly printer.
84 unsigned NumBytes
= 0;
85 const MCInstrDesc
&Desc
= MI
.getDesc();
86 switch (Desc
.getOpcode()) {
88 // Anything not explicitly designated otherwise is a normal 4-byte insn.
91 case TargetOpcode::DBG_VALUE
:
92 case TargetOpcode::EH_LABEL
:
93 case TargetOpcode::IMPLICIT_DEF
:
94 case TargetOpcode::KILL
:
97 case TargetOpcode::STACKMAP
:
98 // The upper bound for a stackmap intrinsic is the full length of its shadow
99 NumBytes
= StackMapOpers(&MI
).getNumPatchBytes();
100 assert(NumBytes
% 4 == 0 && "Invalid number of NOP bytes requested!");
102 case TargetOpcode::PATCHPOINT
:
103 // The size of the patchpoint intrinsic is the number of bytes requested
104 NumBytes
= PatchPointOpers(&MI
).getNumPatchBytes();
105 assert(NumBytes
% 4 == 0 && "Invalid number of NOP bytes requested!");
107 case AArch64::TLSDESC_CALLSEQ
:
108 // This gets lowered to an instruction sequence which takes 16 bytes
111 case AArch64::JumpTableDest32
:
112 case AArch64::JumpTableDest16
:
113 case AArch64::JumpTableDest8
:
117 NumBytes
= MI
.getOperand(1).getImm();
124 static void parseCondBranch(MachineInstr
*LastInst
, MachineBasicBlock
*&Target
,
125 SmallVectorImpl
<MachineOperand
> &Cond
) {
126 // Block ends with fall-through condbranch.
127 switch (LastInst
->getOpcode()) {
129 llvm_unreachable("Unknown branch instruction?");
131 Target
= LastInst
->getOperand(1).getMBB();
132 Cond
.push_back(LastInst
->getOperand(0));
138 Target
= LastInst
->getOperand(1).getMBB();
139 Cond
.push_back(MachineOperand::CreateImm(-1));
140 Cond
.push_back(MachineOperand::CreateImm(LastInst
->getOpcode()));
141 Cond
.push_back(LastInst
->getOperand(0));
147 Target
= LastInst
->getOperand(2).getMBB();
148 Cond
.push_back(MachineOperand::CreateImm(-1));
149 Cond
.push_back(MachineOperand::CreateImm(LastInst
->getOpcode()));
150 Cond
.push_back(LastInst
->getOperand(0));
151 Cond
.push_back(LastInst
->getOperand(1));
155 static unsigned getBranchDisplacementBits(unsigned Opc
) {
158 llvm_unreachable("unexpected opcode!");
165 return TBZDisplacementBits
;
170 return CBZDisplacementBits
;
172 return BCCDisplacementBits
;
176 bool AArch64InstrInfo::isBranchOffsetInRange(unsigned BranchOp
,
177 int64_t BrOffset
) const {
178 unsigned Bits
= getBranchDisplacementBits(BranchOp
);
179 assert(Bits
>= 3 && "max branch displacement must be enough to jump"
180 "over conditional branch expansion");
181 return isIntN(Bits
, BrOffset
/ 4);
185 AArch64InstrInfo::getBranchDestBlock(const MachineInstr
&MI
) const {
186 switch (MI
.getOpcode()) {
188 llvm_unreachable("unexpected opcode!");
190 return MI
.getOperand(0).getMBB();
195 return MI
.getOperand(2).getMBB();
201 return MI
.getOperand(1).getMBB();
206 bool AArch64InstrInfo::analyzeBranch(MachineBasicBlock
&MBB
,
207 MachineBasicBlock
*&TBB
,
208 MachineBasicBlock
*&FBB
,
209 SmallVectorImpl
<MachineOperand
> &Cond
,
210 bool AllowModify
) const {
211 // If the block has no terminators, it just falls into the block after it.
212 MachineBasicBlock::iterator I
= MBB
.getLastNonDebugInstr();
216 if (!isUnpredicatedTerminator(*I
))
219 // Get the last instruction in the block.
220 MachineInstr
*LastInst
= &*I
;
222 // If there is only one terminator instruction, process it.
223 unsigned LastOpc
= LastInst
->getOpcode();
224 if (I
== MBB
.begin() || !isUnpredicatedTerminator(*--I
)) {
225 if (isUncondBranchOpcode(LastOpc
)) {
226 TBB
= LastInst
->getOperand(0).getMBB();
229 if (isCondBranchOpcode(LastOpc
)) {
230 // Block ends with fall-through condbranch.
231 parseCondBranch(LastInst
, TBB
, Cond
);
234 return true; // Can't handle indirect branch.
237 // Get the instruction before it if it is a terminator.
238 MachineInstr
*SecondLastInst
= &*I
;
239 unsigned SecondLastOpc
= SecondLastInst
->getOpcode();
241 // If AllowModify is true and the block ends with two or more unconditional
242 // branches, delete all but the first unconditional branch.
243 if (AllowModify
&& isUncondBranchOpcode(LastOpc
)) {
244 while (isUncondBranchOpcode(SecondLastOpc
)) {
245 LastInst
->eraseFromParent();
246 LastInst
= SecondLastInst
;
247 LastOpc
= LastInst
->getOpcode();
248 if (I
== MBB
.begin() || !isUnpredicatedTerminator(*--I
)) {
249 // Return now the only terminator is an unconditional branch.
250 TBB
= LastInst
->getOperand(0).getMBB();
253 SecondLastInst
= &*I
;
254 SecondLastOpc
= SecondLastInst
->getOpcode();
259 // If there are three terminators, we don't know what sort of block this is.
260 if (SecondLastInst
&& I
!= MBB
.begin() && isUnpredicatedTerminator(*--I
))
263 // If the block ends with a B and a Bcc, handle it.
264 if (isCondBranchOpcode(SecondLastOpc
) && isUncondBranchOpcode(LastOpc
)) {
265 parseCondBranch(SecondLastInst
, TBB
, Cond
);
266 FBB
= LastInst
->getOperand(0).getMBB();
270 // If the block ends with two unconditional branches, handle it. The second
271 // one is not executed, so remove it.
272 if (isUncondBranchOpcode(SecondLastOpc
) && isUncondBranchOpcode(LastOpc
)) {
273 TBB
= SecondLastInst
->getOperand(0).getMBB();
276 I
->eraseFromParent();
280 // ...likewise if it ends with an indirect branch followed by an unconditional
282 if (isIndirectBranchOpcode(SecondLastOpc
) && isUncondBranchOpcode(LastOpc
)) {
285 I
->eraseFromParent();
289 // Otherwise, can't handle this.
293 bool AArch64InstrInfo::reverseBranchCondition(
294 SmallVectorImpl
<MachineOperand
> &Cond
) const {
295 if (Cond
[0].getImm() != -1) {
297 AArch64CC::CondCode CC
= (AArch64CC::CondCode
)(int)Cond
[0].getImm();
298 Cond
[0].setImm(AArch64CC::getInvertedCondCode(CC
));
300 // Folded compare-and-branch
301 switch (Cond
[1].getImm()) {
303 llvm_unreachable("Unknown conditional branch!");
305 Cond
[1].setImm(AArch64::CBNZW
);
308 Cond
[1].setImm(AArch64::CBZW
);
311 Cond
[1].setImm(AArch64::CBNZX
);
314 Cond
[1].setImm(AArch64::CBZX
);
317 Cond
[1].setImm(AArch64::TBNZW
);
320 Cond
[1].setImm(AArch64::TBZW
);
323 Cond
[1].setImm(AArch64::TBNZX
);
326 Cond
[1].setImm(AArch64::TBZX
);
334 unsigned AArch64InstrInfo::removeBranch(MachineBasicBlock
&MBB
,
335 int *BytesRemoved
) const {
336 MachineBasicBlock::iterator I
= MBB
.getLastNonDebugInstr();
340 if (!isUncondBranchOpcode(I
->getOpcode()) &&
341 !isCondBranchOpcode(I
->getOpcode()))
344 // Remove the branch.
345 I
->eraseFromParent();
349 if (I
== MBB
.begin()) {
355 if (!isCondBranchOpcode(I
->getOpcode())) {
361 // Remove the branch.
362 I
->eraseFromParent();
369 void AArch64InstrInfo::instantiateCondBranch(
370 MachineBasicBlock
&MBB
, const DebugLoc
&DL
, MachineBasicBlock
*TBB
,
371 ArrayRef
<MachineOperand
> Cond
) const {
372 if (Cond
[0].getImm() != -1) {
374 BuildMI(&MBB
, DL
, get(AArch64::Bcc
)).addImm(Cond
[0].getImm()).addMBB(TBB
);
376 // Folded compare-and-branch
377 // Note that we use addOperand instead of addReg to keep the flags.
378 const MachineInstrBuilder MIB
=
379 BuildMI(&MBB
, DL
, get(Cond
[1].getImm())).add(Cond
[2]);
381 MIB
.addImm(Cond
[3].getImm());
386 unsigned AArch64InstrInfo::insertBranch(
387 MachineBasicBlock
&MBB
, MachineBasicBlock
*TBB
, MachineBasicBlock
*FBB
,
388 ArrayRef
<MachineOperand
> Cond
, const DebugLoc
&DL
, int *BytesAdded
) const {
389 // Shouldn't be a fall through.
390 assert(TBB
&& "insertBranch must not be told to insert a fallthrough");
393 if (Cond
.empty()) // Unconditional branch?
394 BuildMI(&MBB
, DL
, get(AArch64::B
)).addMBB(TBB
);
396 instantiateCondBranch(MBB
, DL
, TBB
, Cond
);
404 // Two-way conditional branch.
405 instantiateCondBranch(MBB
, DL
, TBB
, Cond
);
406 BuildMI(&MBB
, DL
, get(AArch64::B
)).addMBB(FBB
);
414 // Find the original register that VReg is copied from.
415 static unsigned removeCopies(const MachineRegisterInfo
&MRI
, unsigned VReg
) {
416 while (TargetRegisterInfo::isVirtualRegister(VReg
)) {
417 const MachineInstr
*DefMI
= MRI
.getVRegDef(VReg
);
418 if (!DefMI
->isFullCopy())
420 VReg
= DefMI
->getOperand(1).getReg();
425 // Determine if VReg is defined by an instruction that can be folded into a
426 // csel instruction. If so, return the folded opcode, and the replacement
428 static unsigned canFoldIntoCSel(const MachineRegisterInfo
&MRI
, unsigned VReg
,
429 unsigned *NewVReg
= nullptr) {
430 VReg
= removeCopies(MRI
, VReg
);
431 if (!TargetRegisterInfo::isVirtualRegister(VReg
))
434 bool Is64Bit
= AArch64::GPR64allRegClass
.hasSubClassEq(MRI
.getRegClass(VReg
));
435 const MachineInstr
*DefMI
= MRI
.getVRegDef(VReg
);
437 unsigned SrcOpNum
= 0;
438 switch (DefMI
->getOpcode()) {
439 case AArch64::ADDSXri
:
440 case AArch64::ADDSWri
:
441 // if NZCV is used, do not fold.
442 if (DefMI
->findRegisterDefOperandIdx(AArch64::NZCV
, true) == -1)
444 // fall-through to ADDXri and ADDWri.
446 case AArch64::ADDXri
:
447 case AArch64::ADDWri
:
448 // add x, 1 -> csinc.
449 if (!DefMI
->getOperand(2).isImm() || DefMI
->getOperand(2).getImm() != 1 ||
450 DefMI
->getOperand(3).getImm() != 0)
453 Opc
= Is64Bit
? AArch64::CSINCXr
: AArch64::CSINCWr
;
456 case AArch64::ORNXrr
:
457 case AArch64::ORNWrr
: {
458 // not x -> csinv, represented as orn dst, xzr, src.
459 unsigned ZReg
= removeCopies(MRI
, DefMI
->getOperand(1).getReg());
460 if (ZReg
!= AArch64::XZR
&& ZReg
!= AArch64::WZR
)
463 Opc
= Is64Bit
? AArch64::CSINVXr
: AArch64::CSINVWr
;
467 case AArch64::SUBSXrr
:
468 case AArch64::SUBSWrr
:
469 // if NZCV is used, do not fold.
470 if (DefMI
->findRegisterDefOperandIdx(AArch64::NZCV
, true) == -1)
472 // fall-through to SUBXrr and SUBWrr.
474 case AArch64::SUBXrr
:
475 case AArch64::SUBWrr
: {
476 // neg x -> csneg, represented as sub dst, xzr, src.
477 unsigned ZReg
= removeCopies(MRI
, DefMI
->getOperand(1).getReg());
478 if (ZReg
!= AArch64::XZR
&& ZReg
!= AArch64::WZR
)
481 Opc
= Is64Bit
? AArch64::CSNEGXr
: AArch64::CSNEGWr
;
487 assert(Opc
&& SrcOpNum
&& "Missing parameters");
490 *NewVReg
= DefMI
->getOperand(SrcOpNum
).getReg();
494 bool AArch64InstrInfo::canInsertSelect(const MachineBasicBlock
&MBB
,
495 ArrayRef
<MachineOperand
> Cond
,
496 unsigned TrueReg
, unsigned FalseReg
,
497 int &CondCycles
, int &TrueCycles
,
498 int &FalseCycles
) const {
499 // Check register classes.
500 const MachineRegisterInfo
&MRI
= MBB
.getParent()->getRegInfo();
501 const TargetRegisterClass
*RC
=
502 RI
.getCommonSubClass(MRI
.getRegClass(TrueReg
), MRI
.getRegClass(FalseReg
));
506 // Expanding cbz/tbz requires an extra cycle of latency on the condition.
507 unsigned ExtraCondLat
= Cond
.size() != 1;
509 // GPRs are handled by csel.
510 // FIXME: Fold in x+1, -x, and ~x when applicable.
511 if (AArch64::GPR64allRegClass
.hasSubClassEq(RC
) ||
512 AArch64::GPR32allRegClass
.hasSubClassEq(RC
)) {
513 // Single-cycle csel, csinc, csinv, and csneg.
514 CondCycles
= 1 + ExtraCondLat
;
515 TrueCycles
= FalseCycles
= 1;
516 if (canFoldIntoCSel(MRI
, TrueReg
))
518 else if (canFoldIntoCSel(MRI
, FalseReg
))
523 // Scalar floating point is handled by fcsel.
524 // FIXME: Form fabs, fmin, and fmax when applicable.
525 if (AArch64::FPR64RegClass
.hasSubClassEq(RC
) ||
526 AArch64::FPR32RegClass
.hasSubClassEq(RC
)) {
527 CondCycles
= 5 + ExtraCondLat
;
528 TrueCycles
= FalseCycles
= 2;
536 void AArch64InstrInfo::insertSelect(MachineBasicBlock
&MBB
,
537 MachineBasicBlock::iterator I
,
538 const DebugLoc
&DL
, unsigned DstReg
,
539 ArrayRef
<MachineOperand
> Cond
,
540 unsigned TrueReg
, unsigned FalseReg
) const {
541 MachineRegisterInfo
&MRI
= MBB
.getParent()->getRegInfo();
543 // Parse the condition code, see parseCondBranch() above.
544 AArch64CC::CondCode CC
;
545 switch (Cond
.size()) {
547 llvm_unreachable("Unknown condition opcode in Cond");
549 CC
= AArch64CC::CondCode(Cond
[0].getImm());
551 case 3: { // cbz/cbnz
552 // We must insert a compare against 0.
554 switch (Cond
[1].getImm()) {
556 llvm_unreachable("Unknown branch opcode in Cond");
574 unsigned SrcReg
= Cond
[2].getReg();
576 // cmp reg, #0 is actually subs xzr, reg, #0.
577 MRI
.constrainRegClass(SrcReg
, &AArch64::GPR64spRegClass
);
578 BuildMI(MBB
, I
, DL
, get(AArch64::SUBSXri
), AArch64::XZR
)
583 MRI
.constrainRegClass(SrcReg
, &AArch64::GPR32spRegClass
);
584 BuildMI(MBB
, I
, DL
, get(AArch64::SUBSWri
), AArch64::WZR
)
591 case 4: { // tbz/tbnz
592 // We must insert a tst instruction.
593 switch (Cond
[1].getImm()) {
595 llvm_unreachable("Unknown branch opcode in Cond");
605 // cmp reg, #foo is actually ands xzr, reg, #1<<foo.
606 if (Cond
[1].getImm() == AArch64::TBZW
|| Cond
[1].getImm() == AArch64::TBNZW
)
607 BuildMI(MBB
, I
, DL
, get(AArch64::ANDSWri
), AArch64::WZR
)
608 .addReg(Cond
[2].getReg())
610 AArch64_AM::encodeLogicalImmediate(1ull << Cond
[3].getImm(), 32));
612 BuildMI(MBB
, I
, DL
, get(AArch64::ANDSXri
), AArch64::XZR
)
613 .addReg(Cond
[2].getReg())
615 AArch64_AM::encodeLogicalImmediate(1ull << Cond
[3].getImm(), 64));
621 const TargetRegisterClass
*RC
= nullptr;
622 bool TryFold
= false;
623 if (MRI
.constrainRegClass(DstReg
, &AArch64::GPR64RegClass
)) {
624 RC
= &AArch64::GPR64RegClass
;
625 Opc
= AArch64::CSELXr
;
627 } else if (MRI
.constrainRegClass(DstReg
, &AArch64::GPR32RegClass
)) {
628 RC
= &AArch64::GPR32RegClass
;
629 Opc
= AArch64::CSELWr
;
631 } else if (MRI
.constrainRegClass(DstReg
, &AArch64::FPR64RegClass
)) {
632 RC
= &AArch64::FPR64RegClass
;
633 Opc
= AArch64::FCSELDrrr
;
634 } else if (MRI
.constrainRegClass(DstReg
, &AArch64::FPR32RegClass
)) {
635 RC
= &AArch64::FPR32RegClass
;
636 Opc
= AArch64::FCSELSrrr
;
638 assert(RC
&& "Unsupported regclass");
640 // Try folding simple instructions into the csel.
642 unsigned NewVReg
= 0;
643 unsigned FoldedOpc
= canFoldIntoCSel(MRI
, TrueReg
, &NewVReg
);
645 // The folded opcodes csinc, csinc and csneg apply the operation to
646 // FalseReg, so we need to invert the condition.
647 CC
= AArch64CC::getInvertedCondCode(CC
);
650 FoldedOpc
= canFoldIntoCSel(MRI
, FalseReg
, &NewVReg
);
652 // Fold the operation. Leave any dead instructions for DCE to clean up.
656 // The extends the live range of NewVReg.
657 MRI
.clearKillFlags(NewVReg
);
661 // Pull all virtual register into the appropriate class.
662 MRI
.constrainRegClass(TrueReg
, RC
);
663 MRI
.constrainRegClass(FalseReg
, RC
);
666 BuildMI(MBB
, I
, DL
, get(Opc
), DstReg
)
672 /// Returns true if a MOVi32imm or MOVi64imm can be expanded to an ORRxx.
673 static bool canBeExpandedToORR(const MachineInstr
&MI
, unsigned BitSize
) {
674 uint64_t Imm
= MI
.getOperand(1).getImm();
675 uint64_t UImm
= Imm
<< (64 - BitSize
) >> (64 - BitSize
);
677 return AArch64_AM::processLogicalImmediate(UImm
, BitSize
, Encoding
);
680 // FIXME: this implementation should be micro-architecture dependent, so a
681 // micro-architecture target hook should be introduced here in future.
682 bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr
&MI
) const {
683 if (!Subtarget
.hasCustomCheapAsMoveHandling())
684 return MI
.isAsCheapAsAMove();
686 const unsigned Opcode
= MI
.getOpcode();
688 // Firstly, check cases gated by features.
690 if (Subtarget
.hasZeroCycleZeroingFP()) {
691 if (Opcode
== AArch64::FMOVH0
||
692 Opcode
== AArch64::FMOVS0
||
693 Opcode
== AArch64::FMOVD0
)
697 if (Subtarget
.hasZeroCycleZeroingGP()) {
698 if (Opcode
== TargetOpcode::COPY
&&
699 (MI
.getOperand(1).getReg() == AArch64::WZR
||
700 MI
.getOperand(1).getReg() == AArch64::XZR
))
704 // Secondly, check cases specific to sub-targets.
706 if (Subtarget
.hasExynosCheapAsMoveHandling()) {
707 if (isExynosCheapAsMove(MI
))
710 return MI
.isAsCheapAsAMove();
713 // Finally, check generic cases.
719 // add/sub on register without shift
720 case AArch64::ADDWri
:
721 case AArch64::ADDXri
:
722 case AArch64::SUBWri
:
723 case AArch64::SUBXri
:
724 return (MI
.getOperand(3).getImm() == 0);
726 // logical ops on immediate
727 case AArch64::ANDWri
:
728 case AArch64::ANDXri
:
729 case AArch64::EORWri
:
730 case AArch64::EORXri
:
731 case AArch64::ORRWri
:
732 case AArch64::ORRXri
:
735 // logical ops on register without shift
736 case AArch64::ANDWrr
:
737 case AArch64::ANDXrr
:
738 case AArch64::BICWrr
:
739 case AArch64::BICXrr
:
740 case AArch64::EONWrr
:
741 case AArch64::EONXrr
:
742 case AArch64::EORWrr
:
743 case AArch64::EORXrr
:
744 case AArch64::ORNWrr
:
745 case AArch64::ORNXrr
:
746 case AArch64::ORRWrr
:
747 case AArch64::ORRXrr
:
750 // If MOVi32imm or MOVi64imm can be expanded into ORRWri or
751 // ORRXri, it is as cheap as MOV
752 case AArch64::MOVi32imm
:
753 return canBeExpandedToORR(MI
, 32);
754 case AArch64::MOVi64imm
:
755 return canBeExpandedToORR(MI
, 64);
758 llvm_unreachable("Unknown opcode to check as cheap as a move!");
761 bool AArch64InstrInfo::isFalkorShiftExtFast(const MachineInstr
&MI
) {
762 switch (MI
.getOpcode()) {
766 case AArch64::ADDWrs
:
767 case AArch64::ADDXrs
:
768 case AArch64::ADDSWrs
:
769 case AArch64::ADDSXrs
: {
770 unsigned Imm
= MI
.getOperand(3).getImm();
771 unsigned ShiftVal
= AArch64_AM::getShiftValue(Imm
);
774 return AArch64_AM::getShiftType(Imm
) == AArch64_AM::LSL
&& ShiftVal
<= 5;
777 case AArch64::ADDWrx
:
778 case AArch64::ADDXrx
:
779 case AArch64::ADDXrx64
:
780 case AArch64::ADDSWrx
:
781 case AArch64::ADDSXrx
:
782 case AArch64::ADDSXrx64
: {
783 unsigned Imm
= MI
.getOperand(3).getImm();
784 switch (AArch64_AM::getArithExtendType(Imm
)) {
787 case AArch64_AM::UXTB
:
788 case AArch64_AM::UXTH
:
789 case AArch64_AM::UXTW
:
790 case AArch64_AM::UXTX
:
791 return AArch64_AM::getArithShiftValue(Imm
) <= 4;
795 case AArch64::SUBWrs
:
796 case AArch64::SUBSWrs
: {
797 unsigned Imm
= MI
.getOperand(3).getImm();
798 unsigned ShiftVal
= AArch64_AM::getShiftValue(Imm
);
799 return ShiftVal
== 0 ||
800 (AArch64_AM::getShiftType(Imm
) == AArch64_AM::ASR
&& ShiftVal
== 31);
803 case AArch64::SUBXrs
:
804 case AArch64::SUBSXrs
: {
805 unsigned Imm
= MI
.getOperand(3).getImm();
806 unsigned ShiftVal
= AArch64_AM::getShiftValue(Imm
);
807 return ShiftVal
== 0 ||
808 (AArch64_AM::getShiftType(Imm
) == AArch64_AM::ASR
&& ShiftVal
== 63);
811 case AArch64::SUBWrx
:
812 case AArch64::SUBXrx
:
813 case AArch64::SUBXrx64
:
814 case AArch64::SUBSWrx
:
815 case AArch64::SUBSXrx
:
816 case AArch64::SUBSXrx64
: {
817 unsigned Imm
= MI
.getOperand(3).getImm();
818 switch (AArch64_AM::getArithExtendType(Imm
)) {
821 case AArch64_AM::UXTB
:
822 case AArch64_AM::UXTH
:
823 case AArch64_AM::UXTW
:
824 case AArch64_AM::UXTX
:
825 return AArch64_AM::getArithShiftValue(Imm
) == 0;
829 case AArch64::LDRBBroW
:
830 case AArch64::LDRBBroX
:
831 case AArch64::LDRBroW
:
832 case AArch64::LDRBroX
:
833 case AArch64::LDRDroW
:
834 case AArch64::LDRDroX
:
835 case AArch64::LDRHHroW
:
836 case AArch64::LDRHHroX
:
837 case AArch64::LDRHroW
:
838 case AArch64::LDRHroX
:
839 case AArch64::LDRQroW
:
840 case AArch64::LDRQroX
:
841 case AArch64::LDRSBWroW
:
842 case AArch64::LDRSBWroX
:
843 case AArch64::LDRSBXroW
:
844 case AArch64::LDRSBXroX
:
845 case AArch64::LDRSHWroW
:
846 case AArch64::LDRSHWroX
:
847 case AArch64::LDRSHXroW
:
848 case AArch64::LDRSHXroX
:
849 case AArch64::LDRSWroW
:
850 case AArch64::LDRSWroX
:
851 case AArch64::LDRSroW
:
852 case AArch64::LDRSroX
:
853 case AArch64::LDRWroW
:
854 case AArch64::LDRWroX
:
855 case AArch64::LDRXroW
:
856 case AArch64::LDRXroX
:
857 case AArch64::PRFMroW
:
858 case AArch64::PRFMroX
:
859 case AArch64::STRBBroW
:
860 case AArch64::STRBBroX
:
861 case AArch64::STRBroW
:
862 case AArch64::STRBroX
:
863 case AArch64::STRDroW
:
864 case AArch64::STRDroX
:
865 case AArch64::STRHHroW
:
866 case AArch64::STRHHroX
:
867 case AArch64::STRHroW
:
868 case AArch64::STRHroX
:
869 case AArch64::STRQroW
:
870 case AArch64::STRQroX
:
871 case AArch64::STRSroW
:
872 case AArch64::STRSroX
:
873 case AArch64::STRWroW
:
874 case AArch64::STRWroX
:
875 case AArch64::STRXroW
:
876 case AArch64::STRXroX
: {
877 unsigned IsSigned
= MI
.getOperand(3).getImm();
883 bool AArch64InstrInfo::isSEHInstruction(const MachineInstr
&MI
) {
884 unsigned Opc
= MI
.getOpcode();
888 case AArch64::SEH_StackAlloc
:
889 case AArch64::SEH_SaveFPLR
:
890 case AArch64::SEH_SaveFPLR_X
:
891 case AArch64::SEH_SaveReg
:
892 case AArch64::SEH_SaveReg_X
:
893 case AArch64::SEH_SaveRegP
:
894 case AArch64::SEH_SaveRegP_X
:
895 case AArch64::SEH_SaveFReg
:
896 case AArch64::SEH_SaveFReg_X
:
897 case AArch64::SEH_SaveFRegP
:
898 case AArch64::SEH_SaveFRegP_X
:
899 case AArch64::SEH_SetFP
:
900 case AArch64::SEH_AddFP
:
901 case AArch64::SEH_Nop
:
902 case AArch64::SEH_PrologEnd
:
903 case AArch64::SEH_EpilogStart
:
904 case AArch64::SEH_EpilogEnd
:
909 bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr
&MI
,
910 unsigned &SrcReg
, unsigned &DstReg
,
911 unsigned &SubIdx
) const {
912 switch (MI
.getOpcode()) {
915 case AArch64::SBFMXri
: // aka sxtw
916 case AArch64::UBFMXri
: // aka uxtw
917 // Check for the 32 -> 64 bit extension case, these instructions can do
919 if (MI
.getOperand(2).getImm() != 0 || MI
.getOperand(3).getImm() != 31)
921 // This is a signed or unsigned 32 -> 64 bit extension.
922 SrcReg
= MI
.getOperand(1).getReg();
923 DstReg
= MI
.getOperand(0).getReg();
924 SubIdx
= AArch64::sub_32
;
929 bool AArch64InstrInfo::areMemAccessesTriviallyDisjoint(
930 MachineInstr
&MIa
, MachineInstr
&MIb
, AliasAnalysis
*AA
) const {
931 const TargetRegisterInfo
*TRI
= &getRegisterInfo();
932 MachineOperand
*BaseOpA
= nullptr, *BaseOpB
= nullptr;
933 int64_t OffsetA
= 0, OffsetB
= 0;
934 unsigned WidthA
= 0, WidthB
= 0;
936 assert(MIa
.mayLoadOrStore() && "MIa must be a load or store.");
937 assert(MIb
.mayLoadOrStore() && "MIb must be a load or store.");
939 if (MIa
.hasUnmodeledSideEffects() || MIb
.hasUnmodeledSideEffects() ||
940 MIa
.hasOrderedMemoryRef() || MIb
.hasOrderedMemoryRef())
943 // Retrieve the base, offset from the base and width. Width
944 // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8). If
945 // base are identical, and the offset of a lower memory access +
946 // the width doesn't overlap the offset of a higher memory access,
947 // then the memory accesses are different.
948 if (getMemOperandWithOffsetWidth(MIa
, BaseOpA
, OffsetA
, WidthA
, TRI
) &&
949 getMemOperandWithOffsetWidth(MIb
, BaseOpB
, OffsetB
, WidthB
, TRI
)) {
950 if (BaseOpA
->isIdenticalTo(*BaseOpB
)) {
951 int LowOffset
= OffsetA
< OffsetB
? OffsetA
: OffsetB
;
952 int HighOffset
= OffsetA
< OffsetB
? OffsetB
: OffsetA
;
953 int LowWidth
= (LowOffset
== OffsetA
) ? WidthA
: WidthB
;
954 if (LowOffset
+ LowWidth
<= HighOffset
)
961 bool AArch64InstrInfo::isSchedulingBoundary(const MachineInstr
&MI
,
962 const MachineBasicBlock
*MBB
,
963 const MachineFunction
&MF
) const {
964 if (TargetInstrInfo::isSchedulingBoundary(MI
, MBB
, MF
))
966 switch (MI
.getOpcode()) {
968 // CSDB hints are scheduling barriers.
969 if (MI
.getOperand(0).getImm() == 0x14)
974 // DSB and ISB also are scheduling barriers.
978 return isSEHInstruction(MI
);
981 /// analyzeCompare - For a comparison instruction, return the source registers
982 /// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
983 /// Return true if the comparison instruction can be analyzed.
984 bool AArch64InstrInfo::analyzeCompare(const MachineInstr
&MI
, unsigned &SrcReg
,
985 unsigned &SrcReg2
, int &CmpMask
,
986 int &CmpValue
) const {
987 // The first operand can be a frame index where we'd normally expect a
989 assert(MI
.getNumOperands() >= 2 && "All AArch64 cmps should have 2 operands");
990 if (!MI
.getOperand(1).isReg())
993 switch (MI
.getOpcode()) {
996 case AArch64::SUBSWrr
:
997 case AArch64::SUBSWrs
:
998 case AArch64::SUBSWrx
:
999 case AArch64::SUBSXrr
:
1000 case AArch64::SUBSXrs
:
1001 case AArch64::SUBSXrx
:
1002 case AArch64::ADDSWrr
:
1003 case AArch64::ADDSWrs
:
1004 case AArch64::ADDSWrx
:
1005 case AArch64::ADDSXrr
:
1006 case AArch64::ADDSXrs
:
1007 case AArch64::ADDSXrx
:
1008 // Replace SUBSWrr with SUBWrr if NZCV is not used.
1009 SrcReg
= MI
.getOperand(1).getReg();
1010 SrcReg2
= MI
.getOperand(2).getReg();
1014 case AArch64::SUBSWri
:
1015 case AArch64::ADDSWri
:
1016 case AArch64::SUBSXri
:
1017 case AArch64::ADDSXri
:
1018 SrcReg
= MI
.getOperand(1).getReg();
1021 // FIXME: In order to convert CmpValue to 0 or 1
1022 CmpValue
= MI
.getOperand(2).getImm() != 0;
1024 case AArch64::ANDSWri
:
1025 case AArch64::ANDSXri
:
1026 // ANDS does not use the same encoding scheme as the others xxxS
1028 SrcReg
= MI
.getOperand(1).getReg();
1031 // FIXME:The return val type of decodeLogicalImmediate is uint64_t,
1032 // while the type of CmpValue is int. When converting uint64_t to int,
1033 // the high 32 bits of uint64_t will be lost.
1034 // In fact it causes a bug in spec2006-483.xalancbmk
1035 // CmpValue is only used to compare with zero in OptimizeCompareInstr
1036 CmpValue
= AArch64_AM::decodeLogicalImmediate(
1037 MI
.getOperand(2).getImm(),
1038 MI
.getOpcode() == AArch64::ANDSWri
? 32 : 64) != 0;
1045 static bool UpdateOperandRegClass(MachineInstr
&Instr
) {
1046 MachineBasicBlock
*MBB
= Instr
.getParent();
1047 assert(MBB
&& "Can't get MachineBasicBlock here");
1048 MachineFunction
*MF
= MBB
->getParent();
1049 assert(MF
&& "Can't get MachineFunction here");
1050 const TargetInstrInfo
*TII
= MF
->getSubtarget().getInstrInfo();
1051 const TargetRegisterInfo
*TRI
= MF
->getSubtarget().getRegisterInfo();
1052 MachineRegisterInfo
*MRI
= &MF
->getRegInfo();
1054 for (unsigned OpIdx
= 0, EndIdx
= Instr
.getNumOperands(); OpIdx
< EndIdx
;
1056 MachineOperand
&MO
= Instr
.getOperand(OpIdx
);
1057 const TargetRegisterClass
*OpRegCstraints
=
1058 Instr
.getRegClassConstraint(OpIdx
, TII
, TRI
);
1060 // If there's no constraint, there's nothing to do.
1061 if (!OpRegCstraints
)
1063 // If the operand is a frame index, there's nothing to do here.
1064 // A frame index operand will resolve correctly during PEI.
1068 assert(MO
.isReg() &&
1069 "Operand has register constraints without being a register!");
1071 unsigned Reg
= MO
.getReg();
1072 if (TargetRegisterInfo::isPhysicalRegister(Reg
)) {
1073 if (!OpRegCstraints
->contains(Reg
))
1075 } else if (!OpRegCstraints
->hasSubClassEq(MRI
->getRegClass(Reg
)) &&
1076 !MRI
->constrainRegClass(Reg
, OpRegCstraints
))
1083 /// Return the opcode that does not set flags when possible - otherwise
1084 /// return the original opcode. The caller is responsible to do the actual
1085 /// substitution and legality checking.
1086 static unsigned convertToNonFlagSettingOpc(const MachineInstr
&MI
) {
1087 // Don't convert all compare instructions, because for some the zero register
1088 // encoding becomes the sp register.
1089 bool MIDefinesZeroReg
= false;
1090 if (MI
.definesRegister(AArch64::WZR
) || MI
.definesRegister(AArch64::XZR
))
1091 MIDefinesZeroReg
= true;
1093 switch (MI
.getOpcode()) {
1095 return MI
.getOpcode();
1096 case AArch64::ADDSWrr
:
1097 return AArch64::ADDWrr
;
1098 case AArch64::ADDSWri
:
1099 return MIDefinesZeroReg
? AArch64::ADDSWri
: AArch64::ADDWri
;
1100 case AArch64::ADDSWrs
:
1101 return MIDefinesZeroReg
? AArch64::ADDSWrs
: AArch64::ADDWrs
;
1102 case AArch64::ADDSWrx
:
1103 return AArch64::ADDWrx
;
1104 case AArch64::ADDSXrr
:
1105 return AArch64::ADDXrr
;
1106 case AArch64::ADDSXri
:
1107 return MIDefinesZeroReg
? AArch64::ADDSXri
: AArch64::ADDXri
;
1108 case AArch64::ADDSXrs
:
1109 return MIDefinesZeroReg
? AArch64::ADDSXrs
: AArch64::ADDXrs
;
1110 case AArch64::ADDSXrx
:
1111 return AArch64::ADDXrx
;
1112 case AArch64::SUBSWrr
:
1113 return AArch64::SUBWrr
;
1114 case AArch64::SUBSWri
:
1115 return MIDefinesZeroReg
? AArch64::SUBSWri
: AArch64::SUBWri
;
1116 case AArch64::SUBSWrs
:
1117 return MIDefinesZeroReg
? AArch64::SUBSWrs
: AArch64::SUBWrs
;
1118 case AArch64::SUBSWrx
:
1119 return AArch64::SUBWrx
;
1120 case AArch64::SUBSXrr
:
1121 return AArch64::SUBXrr
;
1122 case AArch64::SUBSXri
:
1123 return MIDefinesZeroReg
? AArch64::SUBSXri
: AArch64::SUBXri
;
1124 case AArch64::SUBSXrs
:
1125 return MIDefinesZeroReg
? AArch64::SUBSXrs
: AArch64::SUBXrs
;
1126 case AArch64::SUBSXrx
:
1127 return AArch64::SUBXrx
;
1131 enum AccessKind
{ AK_Write
= 0x01, AK_Read
= 0x10, AK_All
= 0x11 };
1133 /// True when condition flags are accessed (either by writing or reading)
1134 /// on the instruction trace starting at From and ending at To.
1136 /// Note: If From and To are from different blocks it's assumed CC are accessed
1138 static bool areCFlagsAccessedBetweenInstrs(
1139 MachineBasicBlock::iterator From
, MachineBasicBlock::iterator To
,
1140 const TargetRegisterInfo
*TRI
, const AccessKind AccessToCheck
= AK_All
) {
1141 // Early exit if To is at the beginning of the BB.
1142 if (To
== To
->getParent()->begin())
1145 // Check whether the instructions are in the same basic block
1146 // If not, assume the condition flags might get modified somewhere.
1147 if (To
->getParent() != From
->getParent())
1150 // From must be above To.
1151 assert(std::find_if(++To
.getReverse(), To
->getParent()->rend(),
1152 [From
](MachineInstr
&MI
) {
1153 return MI
.getIterator() == From
;
1154 }) != To
->getParent()->rend());
1156 // We iterate backward starting \p To until we hit \p From.
1157 for (--To
; To
!= From
; --To
) {
1158 const MachineInstr
&Instr
= *To
;
1160 if (((AccessToCheck
& AK_Write
) &&
1161 Instr
.modifiesRegister(AArch64::NZCV
, TRI
)) ||
1162 ((AccessToCheck
& AK_Read
) && Instr
.readsRegister(AArch64::NZCV
, TRI
)))
1168 /// Try to optimize a compare instruction. A compare instruction is an
1169 /// instruction which produces AArch64::NZCV. It can be truly compare
1171 /// when there are no uses of its destination register.
1173 /// The following steps are tried in order:
1174 /// 1. Convert CmpInstr into an unconditional version.
1175 /// 2. Remove CmpInstr if above there is an instruction producing a needed
1176 /// condition code or an instruction which can be converted into such an
1178 /// Only comparison with zero is supported.
1179 bool AArch64InstrInfo::optimizeCompareInstr(
1180 MachineInstr
&CmpInstr
, unsigned SrcReg
, unsigned SrcReg2
, int CmpMask
,
1181 int CmpValue
, const MachineRegisterInfo
*MRI
) const {
1182 assert(CmpInstr
.getParent());
1185 // Replace SUBSWrr with SUBWrr if NZCV is not used.
1186 int DeadNZCVIdx
= CmpInstr
.findRegisterDefOperandIdx(AArch64::NZCV
, true);
1187 if (DeadNZCVIdx
!= -1) {
1188 if (CmpInstr
.definesRegister(AArch64::WZR
) ||
1189 CmpInstr
.definesRegister(AArch64::XZR
)) {
1190 CmpInstr
.eraseFromParent();
1193 unsigned Opc
= CmpInstr
.getOpcode();
1194 unsigned NewOpc
= convertToNonFlagSettingOpc(CmpInstr
);
1197 const MCInstrDesc
&MCID
= get(NewOpc
);
1198 CmpInstr
.setDesc(MCID
);
1199 CmpInstr
.RemoveOperand(DeadNZCVIdx
);
1200 bool succeeded
= UpdateOperandRegClass(CmpInstr
);
1202 assert(succeeded
&& "Some operands reg class are incompatible!");
1206 // Continue only if we have a "ri" where immediate is zero.
1207 // FIXME:CmpValue has already been converted to 0 or 1 in analyzeCompare
1209 assert((CmpValue
== 0 || CmpValue
== 1) && "CmpValue must be 0 or 1!");
1210 if (CmpValue
!= 0 || SrcReg2
!= 0)
1213 // CmpInstr is a Compare instruction if destination register is not used.
1214 if (!MRI
->use_nodbg_empty(CmpInstr
.getOperand(0).getReg()))
1217 return substituteCmpToZero(CmpInstr
, SrcReg
, MRI
);
1220 /// Get opcode of S version of Instr.
1221 /// If Instr is S version its opcode is returned.
1222 /// AArch64::INSTRUCTION_LIST_END is returned if Instr does not have S version
1223 /// or we are not interested in it.
1224 static unsigned sForm(MachineInstr
&Instr
) {
1225 switch (Instr
.getOpcode()) {
1227 return AArch64::INSTRUCTION_LIST_END
;
1229 case AArch64::ADDSWrr
:
1230 case AArch64::ADDSWri
:
1231 case AArch64::ADDSXrr
:
1232 case AArch64::ADDSXri
:
1233 case AArch64::SUBSWrr
:
1234 case AArch64::SUBSWri
:
1235 case AArch64::SUBSXrr
:
1236 case AArch64::SUBSXri
:
1237 return Instr
.getOpcode();
1239 case AArch64::ADDWrr
:
1240 return AArch64::ADDSWrr
;
1241 case AArch64::ADDWri
:
1242 return AArch64::ADDSWri
;
1243 case AArch64::ADDXrr
:
1244 return AArch64::ADDSXrr
;
1245 case AArch64::ADDXri
:
1246 return AArch64::ADDSXri
;
1247 case AArch64::ADCWr
:
1248 return AArch64::ADCSWr
;
1249 case AArch64::ADCXr
:
1250 return AArch64::ADCSXr
;
1251 case AArch64::SUBWrr
:
1252 return AArch64::SUBSWrr
;
1253 case AArch64::SUBWri
:
1254 return AArch64::SUBSWri
;
1255 case AArch64::SUBXrr
:
1256 return AArch64::SUBSXrr
;
1257 case AArch64::SUBXri
:
1258 return AArch64::SUBSXri
;
1259 case AArch64::SBCWr
:
1260 return AArch64::SBCSWr
;
1261 case AArch64::SBCXr
:
1262 return AArch64::SBCSXr
;
1263 case AArch64::ANDWri
:
1264 return AArch64::ANDSWri
;
1265 case AArch64::ANDXri
:
1266 return AArch64::ANDSXri
;
1270 /// Check if AArch64::NZCV should be alive in successors of MBB.
1271 static bool areCFlagsAliveInSuccessors(MachineBasicBlock
*MBB
) {
1272 for (auto *BB
: MBB
->successors())
1273 if (BB
->isLiveIn(AArch64::NZCV
))
1286 UsedNZCV() = default;
1288 UsedNZCV
&operator|=(const UsedNZCV
&UsedFlags
) {
1289 this->N
|= UsedFlags
.N
;
1290 this->Z
|= UsedFlags
.Z
;
1291 this->C
|= UsedFlags
.C
;
1292 this->V
|= UsedFlags
.V
;
1297 } // end anonymous namespace
1299 /// Find a condition code used by the instruction.
1300 /// Returns AArch64CC::Invalid if either the instruction does not use condition
1301 /// codes or we don't optimize CmpInstr in the presence of such instructions.
1302 static AArch64CC::CondCode
findCondCodeUsedByInstr(const MachineInstr
&Instr
) {
1303 switch (Instr
.getOpcode()) {
1305 return AArch64CC::Invalid
;
1307 case AArch64::Bcc
: {
1308 int Idx
= Instr
.findRegisterUseOperandIdx(AArch64::NZCV
);
1310 return static_cast<AArch64CC::CondCode
>(Instr
.getOperand(Idx
- 2).getImm());
1313 case AArch64::CSINVWr
:
1314 case AArch64::CSINVXr
:
1315 case AArch64::CSINCWr
:
1316 case AArch64::CSINCXr
:
1317 case AArch64::CSELWr
:
1318 case AArch64::CSELXr
:
1319 case AArch64::CSNEGWr
:
1320 case AArch64::CSNEGXr
:
1321 case AArch64::FCSELSrrr
:
1322 case AArch64::FCSELDrrr
: {
1323 int Idx
= Instr
.findRegisterUseOperandIdx(AArch64::NZCV
);
1325 return static_cast<AArch64CC::CondCode
>(Instr
.getOperand(Idx
- 1).getImm());
1330 static UsedNZCV
getUsedNZCV(AArch64CC::CondCode CC
) {
1331 assert(CC
!= AArch64CC::Invalid
);
1337 case AArch64CC::EQ
: // Z set
1338 case AArch64CC::NE
: // Z clear
1342 case AArch64CC::HI
: // Z clear and C set
1343 case AArch64CC::LS
: // Z set or C clear
1346 case AArch64CC::HS
: // C set
1347 case AArch64CC::LO
: // C clear
1351 case AArch64CC::MI
: // N set
1352 case AArch64CC::PL
: // N clear
1356 case AArch64CC::VS
: // V set
1357 case AArch64CC::VC
: // V clear
1361 case AArch64CC::GT
: // Z clear, N and V the same
1362 case AArch64CC::LE
: // Z set, N and V differ
1365 case AArch64CC::GE
: // N and V the same
1366 case AArch64CC::LT
: // N and V differ
1374 static bool isADDSRegImm(unsigned Opcode
) {
1375 return Opcode
== AArch64::ADDSWri
|| Opcode
== AArch64::ADDSXri
;
1378 static bool isSUBSRegImm(unsigned Opcode
) {
1379 return Opcode
== AArch64::SUBSWri
|| Opcode
== AArch64::SUBSXri
;
1382 /// Check if CmpInstr can be substituted by MI.
1384 /// CmpInstr can be substituted:
1385 /// - CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
1386 /// - and, MI and CmpInstr are from the same MachineBB
1387 /// - and, condition flags are not alive in successors of the CmpInstr parent
1388 /// - and, if MI opcode is the S form there must be no defs of flags between
1390 /// or if MI opcode is not the S form there must be neither defs of flags
1391 /// nor uses of flags between MI and CmpInstr.
1392 /// - and C/V flags are not used after CmpInstr
1393 static bool canInstrSubstituteCmpInstr(MachineInstr
*MI
, MachineInstr
*CmpInstr
,
1394 const TargetRegisterInfo
*TRI
) {
1396 assert(sForm(*MI
) != AArch64::INSTRUCTION_LIST_END
);
1399 const unsigned CmpOpcode
= CmpInstr
->getOpcode();
1400 if (!isADDSRegImm(CmpOpcode
) && !isSUBSRegImm(CmpOpcode
))
1403 if (MI
->getParent() != CmpInstr
->getParent())
1406 if (areCFlagsAliveInSuccessors(CmpInstr
->getParent()))
1409 AccessKind AccessToCheck
= AK_Write
;
1410 if (sForm(*MI
) != MI
->getOpcode())
1411 AccessToCheck
= AK_All
;
1412 if (areCFlagsAccessedBetweenInstrs(MI
, CmpInstr
, TRI
, AccessToCheck
))
1415 UsedNZCV NZCVUsedAfterCmp
;
1416 for (auto I
= std::next(CmpInstr
->getIterator()),
1417 E
= CmpInstr
->getParent()->instr_end();
1419 const MachineInstr
&Instr
= *I
;
1420 if (Instr
.readsRegister(AArch64::NZCV
, TRI
)) {
1421 AArch64CC::CondCode CC
= findCondCodeUsedByInstr(Instr
);
1422 if (CC
== AArch64CC::Invalid
) // Unsupported conditional instruction
1424 NZCVUsedAfterCmp
|= getUsedNZCV(CC
);
1427 if (Instr
.modifiesRegister(AArch64::NZCV
, TRI
))
1431 return !NZCVUsedAfterCmp
.C
&& !NZCVUsedAfterCmp
.V
;
1434 /// Substitute an instruction comparing to zero with another instruction
1435 /// which produces needed condition flags.
1437 /// Return true on success.
1438 bool AArch64InstrInfo::substituteCmpToZero(
1439 MachineInstr
&CmpInstr
, unsigned SrcReg
,
1440 const MachineRegisterInfo
*MRI
) const {
1442 // Get the unique definition of SrcReg.
1443 MachineInstr
*MI
= MRI
->getUniqueVRegDef(SrcReg
);
1447 const TargetRegisterInfo
*TRI
= &getRegisterInfo();
1449 unsigned NewOpc
= sForm(*MI
);
1450 if (NewOpc
== AArch64::INSTRUCTION_LIST_END
)
1453 if (!canInstrSubstituteCmpInstr(MI
, &CmpInstr
, TRI
))
1456 // Update the instruction to set NZCV.
1457 MI
->setDesc(get(NewOpc
));
1458 CmpInstr
.eraseFromParent();
1459 bool succeeded
= UpdateOperandRegClass(*MI
);
1461 assert(succeeded
&& "Some operands reg class are incompatible!");
1462 MI
->addRegisterDefined(AArch64::NZCV
, TRI
);
1466 bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr
&MI
) const {
1467 if (MI
.getOpcode() != TargetOpcode::LOAD_STACK_GUARD
&&
1468 MI
.getOpcode() != AArch64::CATCHRET
)
1471 MachineBasicBlock
&MBB
= *MI
.getParent();
1472 DebugLoc DL
= MI
.getDebugLoc();
1474 if (MI
.getOpcode() == AArch64::CATCHRET
) {
1475 // Skip to the first instruction before the epilog.
1476 const TargetInstrInfo
*TII
=
1477 MBB
.getParent()->getSubtarget().getInstrInfo();
1478 MachineBasicBlock
*TargetMBB
= MI
.getOperand(0).getMBB();
1479 auto MBBI
= MachineBasicBlock::iterator(MI
);
1480 MachineBasicBlock::iterator FirstEpilogSEH
= std::prev(MBBI
);
1481 while (FirstEpilogSEH
->getFlag(MachineInstr::FrameDestroy
) &&
1482 FirstEpilogSEH
!= MBB
.begin())
1483 FirstEpilogSEH
= std::prev(FirstEpilogSEH
);
1484 if (FirstEpilogSEH
!= MBB
.begin())
1485 FirstEpilogSEH
= std::next(FirstEpilogSEH
);
1486 BuildMI(MBB
, FirstEpilogSEH
, DL
, TII
->get(AArch64::ADRP
))
1487 .addReg(AArch64::X0
, RegState::Define
)
1489 BuildMI(MBB
, FirstEpilogSEH
, DL
, TII
->get(AArch64::ADDXri
))
1490 .addReg(AArch64::X0
, RegState::Define
)
1491 .addReg(AArch64::X0
)
1497 unsigned Reg
= MI
.getOperand(0).getReg();
1498 const GlobalValue
*GV
=
1499 cast
<GlobalValue
>((*MI
.memoperands_begin())->getValue());
1500 const TargetMachine
&TM
= MBB
.getParent()->getTarget();
1501 unsigned char OpFlags
= Subtarget
.ClassifyGlobalReference(GV
, TM
);
1502 const unsigned char MO_NC
= AArch64II::MO_NC
;
1504 if ((OpFlags
& AArch64II::MO_GOT
) != 0) {
1505 BuildMI(MBB
, MI
, DL
, get(AArch64::LOADgot
), Reg
)
1506 .addGlobalAddress(GV
, 0, OpFlags
);
1507 BuildMI(MBB
, MI
, DL
, get(AArch64::LDRXui
), Reg
)
1508 .addReg(Reg
, RegState::Kill
)
1510 .addMemOperand(*MI
.memoperands_begin());
1511 } else if (TM
.getCodeModel() == CodeModel::Large
) {
1512 BuildMI(MBB
, MI
, DL
, get(AArch64::MOVZXi
), Reg
)
1513 .addGlobalAddress(GV
, 0, AArch64II::MO_G0
| MO_NC
)
1515 BuildMI(MBB
, MI
, DL
, get(AArch64::MOVKXi
), Reg
)
1516 .addReg(Reg
, RegState::Kill
)
1517 .addGlobalAddress(GV
, 0, AArch64II::MO_G1
| MO_NC
)
1519 BuildMI(MBB
, MI
, DL
, get(AArch64::MOVKXi
), Reg
)
1520 .addReg(Reg
, RegState::Kill
)
1521 .addGlobalAddress(GV
, 0, AArch64II::MO_G2
| MO_NC
)
1523 BuildMI(MBB
, MI
, DL
, get(AArch64::MOVKXi
), Reg
)
1524 .addReg(Reg
, RegState::Kill
)
1525 .addGlobalAddress(GV
, 0, AArch64II::MO_G3
)
1527 BuildMI(MBB
, MI
, DL
, get(AArch64::LDRXui
), Reg
)
1528 .addReg(Reg
, RegState::Kill
)
1530 .addMemOperand(*MI
.memoperands_begin());
1531 } else if (TM
.getCodeModel() == CodeModel::Tiny
) {
1532 BuildMI(MBB
, MI
, DL
, get(AArch64::ADR
), Reg
)
1533 .addGlobalAddress(GV
, 0, OpFlags
);
1535 BuildMI(MBB
, MI
, DL
, get(AArch64::ADRP
), Reg
)
1536 .addGlobalAddress(GV
, 0, OpFlags
| AArch64II::MO_PAGE
);
1537 unsigned char LoFlags
= OpFlags
| AArch64II::MO_PAGEOFF
| MO_NC
;
1538 BuildMI(MBB
, MI
, DL
, get(AArch64::LDRXui
), Reg
)
1539 .addReg(Reg
, RegState::Kill
)
1540 .addGlobalAddress(GV
, 0, LoFlags
)
1541 .addMemOperand(*MI
.memoperands_begin());
1549 // Return true if this instruction simply sets its single destination register
1550 // to zero. This is equivalent to a register rename of the zero-register.
1551 bool AArch64InstrInfo::isGPRZero(const MachineInstr
&MI
) {
1552 switch (MI
.getOpcode()) {
1555 case AArch64::MOVZWi
:
1556 case AArch64::MOVZXi
: // movz Rd, #0 (LSL #0)
1557 if (MI
.getOperand(1).isImm() && MI
.getOperand(1).getImm() == 0) {
1558 assert(MI
.getDesc().getNumOperands() == 3 &&
1559 MI
.getOperand(2).getImm() == 0 && "invalid MOVZi operands");
1563 case AArch64::ANDWri
: // and Rd, Rzr, #imm
1564 return MI
.getOperand(1).getReg() == AArch64::WZR
;
1565 case AArch64::ANDXri
:
1566 return MI
.getOperand(1).getReg() == AArch64::XZR
;
1567 case TargetOpcode::COPY
:
1568 return MI
.getOperand(1).getReg() == AArch64::WZR
;
1573 // Return true if this instruction simply renames a general register without
1575 bool AArch64InstrInfo::isGPRCopy(const MachineInstr
&MI
) {
1576 switch (MI
.getOpcode()) {
1579 case TargetOpcode::COPY
: {
1580 // GPR32 copies will by lowered to ORRXrs
1581 unsigned DstReg
= MI
.getOperand(0).getReg();
1582 return (AArch64::GPR32RegClass
.contains(DstReg
) ||
1583 AArch64::GPR64RegClass
.contains(DstReg
));
1585 case AArch64::ORRXrs
: // orr Xd, Xzr, Xm (LSL #0)
1586 if (MI
.getOperand(1).getReg() == AArch64::XZR
) {
1587 assert(MI
.getDesc().getNumOperands() == 4 &&
1588 MI
.getOperand(3).getImm() == 0 && "invalid ORRrs operands");
1592 case AArch64::ADDXri
: // add Xd, Xn, #0 (LSL #0)
1593 if (MI
.getOperand(2).getImm() == 0) {
1594 assert(MI
.getDesc().getNumOperands() == 4 &&
1595 MI
.getOperand(3).getImm() == 0 && "invalid ADDXri operands");
1603 // Return true if this instruction simply renames a general register without
1605 bool AArch64InstrInfo::isFPRCopy(const MachineInstr
&MI
) {
1606 switch (MI
.getOpcode()) {
1609 case TargetOpcode::COPY
: {
1610 // FPR64 copies will by lowered to ORR.16b
1611 unsigned DstReg
= MI
.getOperand(0).getReg();
1612 return (AArch64::FPR64RegClass
.contains(DstReg
) ||
1613 AArch64::FPR128RegClass
.contains(DstReg
));
1615 case AArch64::ORRv16i8
:
1616 if (MI
.getOperand(1).getReg() == MI
.getOperand(2).getReg()) {
1617 assert(MI
.getDesc().getNumOperands() == 3 && MI
.getOperand(0).isReg() &&
1618 "invalid ORRv16i8 operands");
1626 unsigned AArch64InstrInfo::isLoadFromStackSlot(const MachineInstr
&MI
,
1627 int &FrameIndex
) const {
1628 switch (MI
.getOpcode()) {
1631 case AArch64::LDRWui
:
1632 case AArch64::LDRXui
:
1633 case AArch64::LDRBui
:
1634 case AArch64::LDRHui
:
1635 case AArch64::LDRSui
:
1636 case AArch64::LDRDui
:
1637 case AArch64::LDRQui
:
1638 if (MI
.getOperand(0).getSubReg() == 0 && MI
.getOperand(1).isFI() &&
1639 MI
.getOperand(2).isImm() && MI
.getOperand(2).getImm() == 0) {
1640 FrameIndex
= MI
.getOperand(1).getIndex();
1641 return MI
.getOperand(0).getReg();
1649 unsigned AArch64InstrInfo::isStoreToStackSlot(const MachineInstr
&MI
,
1650 int &FrameIndex
) const {
1651 switch (MI
.getOpcode()) {
1654 case AArch64::STRWui
:
1655 case AArch64::STRXui
:
1656 case AArch64::STRBui
:
1657 case AArch64::STRHui
:
1658 case AArch64::STRSui
:
1659 case AArch64::STRDui
:
1660 case AArch64::STRQui
:
1661 if (MI
.getOperand(0).getSubReg() == 0 && MI
.getOperand(1).isFI() &&
1662 MI
.getOperand(2).isImm() && MI
.getOperand(2).getImm() == 0) {
1663 FrameIndex
= MI
.getOperand(1).getIndex();
1664 return MI
.getOperand(0).getReg();
1671 /// Check all MachineMemOperands for a hint to suppress pairing.
1672 bool AArch64InstrInfo::isLdStPairSuppressed(const MachineInstr
&MI
) {
1673 return llvm::any_of(MI
.memoperands(), [](MachineMemOperand
*MMO
) {
1674 return MMO
->getFlags() & MOSuppressPair
;
1678 /// Set a flag on the first MachineMemOperand to suppress pairing.
1679 void AArch64InstrInfo::suppressLdStPair(MachineInstr
&MI
) {
1680 if (MI
.memoperands_empty())
1682 (*MI
.memoperands_begin())->setFlags(MOSuppressPair
);
1685 /// Check all MachineMemOperands for a hint that the load/store is strided.
1686 bool AArch64InstrInfo::isStridedAccess(const MachineInstr
&MI
) {
1687 return llvm::any_of(MI
.memoperands(), [](MachineMemOperand
*MMO
) {
1688 return MMO
->getFlags() & MOStridedAccess
;
1692 bool AArch64InstrInfo::isUnscaledLdSt(unsigned Opc
) {
1696 case AArch64::STURSi
:
1697 case AArch64::STURDi
:
1698 case AArch64::STURQi
:
1699 case AArch64::STURBBi
:
1700 case AArch64::STURHHi
:
1701 case AArch64::STURWi
:
1702 case AArch64::STURXi
:
1703 case AArch64::LDURSi
:
1704 case AArch64::LDURDi
:
1705 case AArch64::LDURQi
:
1706 case AArch64::LDURWi
:
1707 case AArch64::LDURXi
:
1708 case AArch64::LDURSWi
:
1709 case AArch64::LDURHHi
:
1710 case AArch64::LDURBBi
:
1711 case AArch64::LDURSBWi
:
1712 case AArch64::LDURSHWi
:
1717 bool AArch64InstrInfo::isPairableLdStInst(const MachineInstr
&MI
) {
1718 switch (MI
.getOpcode()) {
1721 // Scaled instructions.
1722 case AArch64::STRSui
:
1723 case AArch64::STRDui
:
1724 case AArch64::STRQui
:
1725 case AArch64::STRXui
:
1726 case AArch64::STRWui
:
1727 case AArch64::LDRSui
:
1728 case AArch64::LDRDui
:
1729 case AArch64::LDRQui
:
1730 case AArch64::LDRXui
:
1731 case AArch64::LDRWui
:
1732 case AArch64::LDRSWui
:
1733 // Unscaled instructions.
1734 case AArch64::STURSi
:
1735 case AArch64::STURDi
:
1736 case AArch64::STURQi
:
1737 case AArch64::STURWi
:
1738 case AArch64::STURXi
:
1739 case AArch64::LDURSi
:
1740 case AArch64::LDURDi
:
1741 case AArch64::LDURQi
:
1742 case AArch64::LDURWi
:
1743 case AArch64::LDURXi
:
1744 case AArch64::LDURSWi
:
1749 unsigned AArch64InstrInfo::convertToFlagSettingOpc(unsigned Opc
,
1753 llvm_unreachable("Opcode has no flag setting equivalent!");
1755 case AArch64::ADDWri
:
1757 return AArch64::ADDSWri
;
1758 case AArch64::ADDWrr
:
1760 return AArch64::ADDSWrr
;
1761 case AArch64::ADDWrs
:
1763 return AArch64::ADDSWrs
;
1764 case AArch64::ADDWrx
:
1766 return AArch64::ADDSWrx
;
1767 case AArch64::ANDWri
:
1769 return AArch64::ANDSWri
;
1770 case AArch64::ANDWrr
:
1772 return AArch64::ANDSWrr
;
1773 case AArch64::ANDWrs
:
1775 return AArch64::ANDSWrs
;
1776 case AArch64::BICWrr
:
1778 return AArch64::BICSWrr
;
1779 case AArch64::BICWrs
:
1781 return AArch64::BICSWrs
;
1782 case AArch64::SUBWri
:
1784 return AArch64::SUBSWri
;
1785 case AArch64::SUBWrr
:
1787 return AArch64::SUBSWrr
;
1788 case AArch64::SUBWrs
:
1790 return AArch64::SUBSWrs
;
1791 case AArch64::SUBWrx
:
1793 return AArch64::SUBSWrx
;
1795 case AArch64::ADDXri
:
1797 return AArch64::ADDSXri
;
1798 case AArch64::ADDXrr
:
1800 return AArch64::ADDSXrr
;
1801 case AArch64::ADDXrs
:
1803 return AArch64::ADDSXrs
;
1804 case AArch64::ADDXrx
:
1806 return AArch64::ADDSXrx
;
1807 case AArch64::ANDXri
:
1809 return AArch64::ANDSXri
;
1810 case AArch64::ANDXrr
:
1812 return AArch64::ANDSXrr
;
1813 case AArch64::ANDXrs
:
1815 return AArch64::ANDSXrs
;
1816 case AArch64::BICXrr
:
1818 return AArch64::BICSXrr
;
1819 case AArch64::BICXrs
:
1821 return AArch64::BICSXrs
;
1822 case AArch64::SUBXri
:
1824 return AArch64::SUBSXri
;
1825 case AArch64::SUBXrr
:
1827 return AArch64::SUBSXrr
;
1828 case AArch64::SUBXrs
:
1830 return AArch64::SUBSXrs
;
1831 case AArch64::SUBXrx
:
1833 return AArch64::SUBSXrx
;
1837 // Is this a candidate for ld/st merging or pairing? For example, we don't
1838 // touch volatiles or load/stores that have a hint to avoid pair formation.
1839 bool AArch64InstrInfo::isCandidateToMergeOrPair(MachineInstr
&MI
) const {
1840 // If this is a volatile load/store, don't mess with it.
1841 if (MI
.hasOrderedMemoryRef())
1844 // Make sure this is a reg/fi+imm (as opposed to an address reloc).
1845 assert((MI
.getOperand(1).isReg() || MI
.getOperand(1).isFI()) &&
1846 "Expected a reg or frame index operand.");
1847 if (!MI
.getOperand(2).isImm())
1850 // Can't merge/pair if the instruction modifies the base register.
1851 // e.g., ldr x0, [x0]
1852 // This case will never occur with an FI base.
1853 if (MI
.getOperand(1).isReg()) {
1854 unsigned BaseReg
= MI
.getOperand(1).getReg();
1855 const TargetRegisterInfo
*TRI
= &getRegisterInfo();
1856 if (MI
.modifiesRegister(BaseReg
, TRI
))
1860 // Check if this load/store has a hint to avoid pair formation.
1861 // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
1862 if (isLdStPairSuppressed(MI
))
1865 // On some CPUs quad load/store pairs are slower than two single load/stores.
1866 if (Subtarget
.isPaired128Slow()) {
1867 switch (MI
.getOpcode()) {
1870 case AArch64::LDURQi
:
1871 case AArch64::STURQi
:
1872 case AArch64::LDRQui
:
1873 case AArch64::STRQui
:
1881 bool AArch64InstrInfo::getMemOperandWithOffset(MachineInstr
&LdSt
,
1882 MachineOperand
*&BaseOp
,
1884 const TargetRegisterInfo
*TRI
) const {
1886 return getMemOperandWithOffsetWidth(LdSt
, BaseOp
, Offset
, Width
, TRI
);
1889 bool AArch64InstrInfo::getMemOperandWithOffsetWidth(
1890 MachineInstr
&LdSt
, MachineOperand
*&BaseOp
, int64_t &Offset
,
1891 unsigned &Width
, const TargetRegisterInfo
*TRI
) const {
1892 assert(LdSt
.mayLoadOrStore() && "Expected a memory operation.");
1893 // Handle only loads/stores with base register followed by immediate offset.
1894 if (LdSt
.getNumExplicitOperands() == 3) {
1895 // Non-paired instruction (e.g., ldr x1, [x0, #8]).
1896 if ((!LdSt
.getOperand(1).isReg() && !LdSt
.getOperand(1).isFI()) ||
1897 !LdSt
.getOperand(2).isImm())
1899 } else if (LdSt
.getNumExplicitOperands() == 4) {
1900 // Paired instruction (e.g., ldp x1, x2, [x0, #8]).
1901 if (!LdSt
.getOperand(1).isReg() ||
1902 (!LdSt
.getOperand(2).isReg() && !LdSt
.getOperand(2).isFI()) ||
1903 !LdSt
.getOperand(3).isImm())
1908 // Get the scaling factor for the instruction and set the width for the
1911 int64_t Dummy1
, Dummy2
;
1913 // If this returns false, then it's an instruction we don't want to handle.
1914 if (!getMemOpInfo(LdSt
.getOpcode(), Scale
, Width
, Dummy1
, Dummy2
))
1917 // Compute the offset. Offset is calculated as the immediate operand
1918 // multiplied by the scaling factor. Unscaled instructions have scaling factor
1920 if (LdSt
.getNumExplicitOperands() == 3) {
1921 BaseOp
= &LdSt
.getOperand(1);
1922 Offset
= LdSt
.getOperand(2).getImm() * Scale
;
1924 assert(LdSt
.getNumExplicitOperands() == 4 && "invalid number of operands");
1925 BaseOp
= &LdSt
.getOperand(2);
1926 Offset
= LdSt
.getOperand(3).getImm() * Scale
;
1929 assert((BaseOp
->isReg() || BaseOp
->isFI()) &&
1930 "getMemOperandWithOffset only supports base "
1931 "operands of type register or frame index.");
1937 AArch64InstrInfo::getMemOpBaseRegImmOfsOffsetOperand(MachineInstr
&LdSt
) const {
1938 assert(LdSt
.mayLoadOrStore() && "Expected a memory operation.");
1939 MachineOperand
&OfsOp
= LdSt
.getOperand(LdSt
.getNumExplicitOperands() - 1);
1940 assert(OfsOp
.isImm() && "Offset operand wasn't immediate.");
1944 bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode
, unsigned &Scale
,
1945 unsigned &Width
, int64_t &MinOffset
,
1946 int64_t &MaxOffset
) const {
1948 // Not a memory operation or something we want to handle.
1951 MinOffset
= MaxOffset
= 0;
1953 case AArch64::STRWpost
:
1954 case AArch64::LDRWpost
:
1960 case AArch64::LDURQi
:
1961 case AArch64::STURQi
:
1967 case AArch64::LDURXi
:
1968 case AArch64::LDURDi
:
1969 case AArch64::STURXi
:
1970 case AArch64::STURDi
:
1976 case AArch64::LDURWi
:
1977 case AArch64::LDURSi
:
1978 case AArch64::LDURSWi
:
1979 case AArch64::STURWi
:
1980 case AArch64::STURSi
:
1986 case AArch64::LDURHi
:
1987 case AArch64::LDURHHi
:
1988 case AArch64::LDURSHXi
:
1989 case AArch64::LDURSHWi
:
1990 case AArch64::STURHi
:
1991 case AArch64::STURHHi
:
1997 case AArch64::LDURBi
:
1998 case AArch64::LDURBBi
:
1999 case AArch64::LDURSBXi
:
2000 case AArch64::LDURSBWi
:
2001 case AArch64::STURBi
:
2002 case AArch64::STURBBi
:
2008 case AArch64::LDPQi
:
2009 case AArch64::LDNPQi
:
2010 case AArch64::STPQi
:
2011 case AArch64::STNPQi
:
2017 case AArch64::LDRQui
:
2018 case AArch64::STRQui
:
2023 case AArch64::LDPXi
:
2024 case AArch64::LDPDi
:
2025 case AArch64::LDNPXi
:
2026 case AArch64::LDNPDi
:
2027 case AArch64::STPXi
:
2028 case AArch64::STPDi
:
2029 case AArch64::STNPXi
:
2030 case AArch64::STNPDi
:
2036 case AArch64::LDRXui
:
2037 case AArch64::LDRDui
:
2038 case AArch64::STRXui
:
2039 case AArch64::STRDui
:
2044 case AArch64::LDPWi
:
2045 case AArch64::LDPSi
:
2046 case AArch64::LDNPWi
:
2047 case AArch64::LDNPSi
:
2048 case AArch64::STPWi
:
2049 case AArch64::STPSi
:
2050 case AArch64::STNPWi
:
2051 case AArch64::STNPSi
:
2057 case AArch64::LDRWui
:
2058 case AArch64::LDRSui
:
2059 case AArch64::LDRSWui
:
2060 case AArch64::STRWui
:
2061 case AArch64::STRSui
:
2066 case AArch64::LDRHui
:
2067 case AArch64::LDRHHui
:
2068 case AArch64::STRHui
:
2069 case AArch64::STRHHui
:
2074 case AArch64::LDRBui
:
2075 case AArch64::LDRBBui
:
2076 case AArch64::STRBui
:
2077 case AArch64::STRBBui
:
2087 static unsigned getOffsetStride(unsigned Opc
) {
2091 case AArch64::LDURQi
:
2092 case AArch64::STURQi
:
2094 case AArch64::LDURXi
:
2095 case AArch64::LDURDi
:
2096 case AArch64::STURXi
:
2097 case AArch64::STURDi
:
2099 case AArch64::LDURWi
:
2100 case AArch64::LDURSi
:
2101 case AArch64::LDURSWi
:
2102 case AArch64::STURWi
:
2103 case AArch64::STURSi
:
2108 // Scale the unscaled offsets. Returns false if the unscaled offset can't be
2110 static bool scaleOffset(unsigned Opc
, int64_t &Offset
) {
2111 unsigned OffsetStride
= getOffsetStride(Opc
);
2112 if (OffsetStride
== 0)
2114 // If the byte-offset isn't a multiple of the stride, we can't scale this
2116 if (Offset
% OffsetStride
!= 0)
2119 // Convert the byte-offset used by unscaled into an "element" offset used
2120 // by the scaled pair load/store instructions.
2121 Offset
/= OffsetStride
;
2125 // Unscale the scaled offsets. Returns false if the scaled offset can't be
2127 static bool unscaleOffset(unsigned Opc
, int64_t &Offset
) {
2128 unsigned OffsetStride
= getOffsetStride(Opc
);
2129 if (OffsetStride
== 0)
2132 // Convert the "element" offset used by scaled pair load/store instructions
2133 // into the byte-offset used by unscaled.
2134 Offset
*= OffsetStride
;
2138 static bool canPairLdStOpc(unsigned FirstOpc
, unsigned SecondOpc
) {
2139 if (FirstOpc
== SecondOpc
)
2141 // We can also pair sign-ext and zero-ext instructions.
2145 case AArch64::LDRWui
:
2146 case AArch64::LDURWi
:
2147 return SecondOpc
== AArch64::LDRSWui
|| SecondOpc
== AArch64::LDURSWi
;
2148 case AArch64::LDRSWui
:
2149 case AArch64::LDURSWi
:
2150 return SecondOpc
== AArch64::LDRWui
|| SecondOpc
== AArch64::LDURWi
;
2152 // These instructions can't be paired based on their opcodes.
2156 static bool shouldClusterFI(const MachineFrameInfo
&MFI
, int FI1
,
2157 int64_t Offset1
, unsigned Opcode1
, int FI2
,
2158 int64_t Offset2
, unsigned Opcode2
) {
2159 // Accesses through fixed stack object frame indices may access a different
2160 // fixed stack slot. Check that the object offsets + offsets match.
2161 if (MFI
.isFixedObjectIndex(FI1
) && MFI
.isFixedObjectIndex(FI2
)) {
2162 int64_t ObjectOffset1
= MFI
.getObjectOffset(FI1
);
2163 int64_t ObjectOffset2
= MFI
.getObjectOffset(FI2
);
2164 assert(ObjectOffset1
<= ObjectOffset2
&& "Object offsets are not ordered.");
2165 // Get the byte-offset from the object offset.
2166 if (!unscaleOffset(Opcode1
, Offset1
) || !unscaleOffset(Opcode2
, Offset2
))
2168 ObjectOffset1
+= Offset1
;
2169 ObjectOffset2
+= Offset2
;
2170 // Get the "element" index in the object.
2171 if (!scaleOffset(Opcode1
, ObjectOffset1
) ||
2172 !scaleOffset(Opcode2
, ObjectOffset2
))
2174 return ObjectOffset1
+ 1 == ObjectOffset2
;
2180 /// Detect opportunities for ldp/stp formation.
2182 /// Only called for LdSt for which getMemOperandWithOffset returns true.
2183 bool AArch64InstrInfo::shouldClusterMemOps(MachineOperand
&BaseOp1
,
2184 MachineOperand
&BaseOp2
,
2185 unsigned NumLoads
) const {
2186 MachineInstr
&FirstLdSt
= *BaseOp1
.getParent();
2187 MachineInstr
&SecondLdSt
= *BaseOp2
.getParent();
2188 if (BaseOp1
.getType() != BaseOp2
.getType())
2191 assert((BaseOp1
.isReg() || BaseOp1
.isFI()) &&
2192 "Only base registers and frame indices are supported.");
2194 // Check for both base regs and base FI.
2195 if (BaseOp1
.isReg() && BaseOp1
.getReg() != BaseOp2
.getReg())
2198 // Only cluster up to a single pair.
2202 if (!isPairableLdStInst(FirstLdSt
) || !isPairableLdStInst(SecondLdSt
))
2205 // Can we pair these instructions based on their opcodes?
2206 unsigned FirstOpc
= FirstLdSt
.getOpcode();
2207 unsigned SecondOpc
= SecondLdSt
.getOpcode();
2208 if (!canPairLdStOpc(FirstOpc
, SecondOpc
))
2211 // Can't merge volatiles or load/stores that have a hint to avoid pair
2212 // formation, for example.
2213 if (!isCandidateToMergeOrPair(FirstLdSt
) ||
2214 !isCandidateToMergeOrPair(SecondLdSt
))
2217 // isCandidateToMergeOrPair guarantees that operand 2 is an immediate.
2218 int64_t Offset1
= FirstLdSt
.getOperand(2).getImm();
2219 if (isUnscaledLdSt(FirstOpc
) && !scaleOffset(FirstOpc
, Offset1
))
2222 int64_t Offset2
= SecondLdSt
.getOperand(2).getImm();
2223 if (isUnscaledLdSt(SecondOpc
) && !scaleOffset(SecondOpc
, Offset2
))
2226 // Pairwise instructions have a 7-bit signed offset field.
2227 if (Offset1
> 63 || Offset1
< -64)
2230 // The caller should already have ordered First/SecondLdSt by offset.
2231 // Note: except for non-equal frame index bases
2232 if (BaseOp1
.isFI()) {
2233 assert((!BaseOp1
.isIdenticalTo(BaseOp2
) || Offset1
>= Offset2
) &&
2234 "Caller should have ordered offsets.");
2236 const MachineFrameInfo
&MFI
=
2237 FirstLdSt
.getParent()->getParent()->getFrameInfo();
2238 return shouldClusterFI(MFI
, BaseOp1
.getIndex(), Offset1
, FirstOpc
,
2239 BaseOp2
.getIndex(), Offset2
, SecondOpc
);
2242 assert((!BaseOp1
.isIdenticalTo(BaseOp2
) || Offset1
<= Offset2
) &&
2243 "Caller should have ordered offsets.");
2245 return Offset1
+ 1 == Offset2
;
2248 static const MachineInstrBuilder
&AddSubReg(const MachineInstrBuilder
&MIB
,
2249 unsigned Reg
, unsigned SubIdx
,
2251 const TargetRegisterInfo
*TRI
) {
2253 return MIB
.addReg(Reg
, State
);
2255 if (TargetRegisterInfo::isPhysicalRegister(Reg
))
2256 return MIB
.addReg(TRI
->getSubReg(Reg
, SubIdx
), State
);
2257 return MIB
.addReg(Reg
, State
, SubIdx
);
2260 static bool forwardCopyWillClobberTuple(unsigned DestReg
, unsigned SrcReg
,
2262 // We really want the positive remainder mod 32 here, that happens to be
2263 // easily obtainable with a mask.
2264 return ((DestReg
- SrcReg
) & 0x1f) < NumRegs
;
2267 void AArch64InstrInfo::copyPhysRegTuple(MachineBasicBlock
&MBB
,
2268 MachineBasicBlock::iterator I
,
2269 const DebugLoc
&DL
, unsigned DestReg
,
2270 unsigned SrcReg
, bool KillSrc
,
2272 ArrayRef
<unsigned> Indices
) const {
2273 assert(Subtarget
.hasNEON() && "Unexpected register copy without NEON");
2274 const TargetRegisterInfo
*TRI
= &getRegisterInfo();
2275 uint16_t DestEncoding
= TRI
->getEncodingValue(DestReg
);
2276 uint16_t SrcEncoding
= TRI
->getEncodingValue(SrcReg
);
2277 unsigned NumRegs
= Indices
.size();
2279 int SubReg
= 0, End
= NumRegs
, Incr
= 1;
2280 if (forwardCopyWillClobberTuple(DestEncoding
, SrcEncoding
, NumRegs
)) {
2281 SubReg
= NumRegs
- 1;
2286 for (; SubReg
!= End
; SubReg
+= Incr
) {
2287 const MachineInstrBuilder MIB
= BuildMI(MBB
, I
, DL
, get(Opcode
));
2288 AddSubReg(MIB
, DestReg
, Indices
[SubReg
], RegState::Define
, TRI
);
2289 AddSubReg(MIB
, SrcReg
, Indices
[SubReg
], 0, TRI
);
2290 AddSubReg(MIB
, SrcReg
, Indices
[SubReg
], getKillRegState(KillSrc
), TRI
);
2294 void AArch64InstrInfo::copyGPRRegTuple(MachineBasicBlock
&MBB
,
2295 MachineBasicBlock::iterator I
,
2296 DebugLoc DL
, unsigned DestReg
,
2297 unsigned SrcReg
, bool KillSrc
,
2298 unsigned Opcode
, unsigned ZeroReg
,
2299 llvm::ArrayRef
<unsigned> Indices
) const {
2300 const TargetRegisterInfo
*TRI
= &getRegisterInfo();
2301 unsigned NumRegs
= Indices
.size();
2304 uint16_t DestEncoding
= TRI
->getEncodingValue(DestReg
);
2305 uint16_t SrcEncoding
= TRI
->getEncodingValue(SrcReg
);
2306 assert(DestEncoding
% NumRegs
== 0 && SrcEncoding
% NumRegs
== 0 &&
2307 "GPR reg sequences should not be able to overlap");
2310 for (unsigned SubReg
= 0; SubReg
!= NumRegs
; ++SubReg
) {
2311 const MachineInstrBuilder MIB
= BuildMI(MBB
, I
, DL
, get(Opcode
));
2312 AddSubReg(MIB
, DestReg
, Indices
[SubReg
], RegState::Define
, TRI
);
2313 MIB
.addReg(ZeroReg
);
2314 AddSubReg(MIB
, SrcReg
, Indices
[SubReg
], getKillRegState(KillSrc
), TRI
);
2319 void AArch64InstrInfo::copyPhysReg(MachineBasicBlock
&MBB
,
2320 MachineBasicBlock::iterator I
,
2321 const DebugLoc
&DL
, unsigned DestReg
,
2322 unsigned SrcReg
, bool KillSrc
) const {
2323 if (AArch64::GPR32spRegClass
.contains(DestReg
) &&
2324 (AArch64::GPR32spRegClass
.contains(SrcReg
) || SrcReg
== AArch64::WZR
)) {
2325 const TargetRegisterInfo
*TRI
= &getRegisterInfo();
2327 if (DestReg
== AArch64::WSP
|| SrcReg
== AArch64::WSP
) {
2328 // If either operand is WSP, expand to ADD #0.
2329 if (Subtarget
.hasZeroCycleRegMove()) {
2330 // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move.
2331 unsigned DestRegX
= TRI
->getMatchingSuperReg(DestReg
, AArch64::sub_32
,
2332 &AArch64::GPR64spRegClass
);
2333 unsigned SrcRegX
= TRI
->getMatchingSuperReg(SrcReg
, AArch64::sub_32
,
2334 &AArch64::GPR64spRegClass
);
2335 // This instruction is reading and writing X registers. This may upset
2336 // the register scavenger and machine verifier, so we need to indicate
2337 // that we are reading an undefined value from SrcRegX, but a proper
2338 // value from SrcReg.
2339 BuildMI(MBB
, I
, DL
, get(AArch64::ADDXri
), DestRegX
)
2340 .addReg(SrcRegX
, RegState::Undef
)
2342 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL
, 0))
2343 .addReg(SrcReg
, RegState::Implicit
| getKillRegState(KillSrc
));
2345 BuildMI(MBB
, I
, DL
, get(AArch64::ADDWri
), DestReg
)
2346 .addReg(SrcReg
, getKillRegState(KillSrc
))
2348 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL
, 0));
2350 } else if (SrcReg
== AArch64::WZR
&& Subtarget
.hasZeroCycleZeroingGP()) {
2351 BuildMI(MBB
, I
, DL
, get(AArch64::MOVZWi
), DestReg
)
2353 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL
, 0));
2355 if (Subtarget
.hasZeroCycleRegMove()) {
2356 // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
2357 unsigned DestRegX
= TRI
->getMatchingSuperReg(DestReg
, AArch64::sub_32
,
2358 &AArch64::GPR64spRegClass
);
2359 unsigned SrcRegX
= TRI
->getMatchingSuperReg(SrcReg
, AArch64::sub_32
,
2360 &AArch64::GPR64spRegClass
);
2361 // This instruction is reading and writing X registers. This may upset
2362 // the register scavenger and machine verifier, so we need to indicate
2363 // that we are reading an undefined value from SrcRegX, but a proper
2364 // value from SrcReg.
2365 BuildMI(MBB
, I
, DL
, get(AArch64::ORRXrr
), DestRegX
)
2366 .addReg(AArch64::XZR
)
2367 .addReg(SrcRegX
, RegState::Undef
)
2368 .addReg(SrcReg
, RegState::Implicit
| getKillRegState(KillSrc
));
2370 // Otherwise, expand to ORR WZR.
2371 BuildMI(MBB
, I
, DL
, get(AArch64::ORRWrr
), DestReg
)
2372 .addReg(AArch64::WZR
)
2373 .addReg(SrcReg
, getKillRegState(KillSrc
));
2379 if (AArch64::GPR64spRegClass
.contains(DestReg
) &&
2380 (AArch64::GPR64spRegClass
.contains(SrcReg
) || SrcReg
== AArch64::XZR
)) {
2381 if (DestReg
== AArch64::SP
|| SrcReg
== AArch64::SP
) {
2382 // If either operand is SP, expand to ADD #0.
2383 BuildMI(MBB
, I
, DL
, get(AArch64::ADDXri
), DestReg
)
2384 .addReg(SrcReg
, getKillRegState(KillSrc
))
2386 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL
, 0));
2387 } else if (SrcReg
== AArch64::XZR
&& Subtarget
.hasZeroCycleZeroingGP()) {
2388 BuildMI(MBB
, I
, DL
, get(AArch64::MOVZXi
), DestReg
)
2390 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL
, 0));
2392 // Otherwise, expand to ORR XZR.
2393 BuildMI(MBB
, I
, DL
, get(AArch64::ORRXrr
), DestReg
)
2394 .addReg(AArch64::XZR
)
2395 .addReg(SrcReg
, getKillRegState(KillSrc
));
2400 // Copy a DDDD register quad by copying the individual sub-registers.
2401 if (AArch64::DDDDRegClass
.contains(DestReg
) &&
2402 AArch64::DDDDRegClass
.contains(SrcReg
)) {
2403 static const unsigned Indices
[] = {AArch64::dsub0
, AArch64::dsub1
,
2404 AArch64::dsub2
, AArch64::dsub3
};
2405 copyPhysRegTuple(MBB
, I
, DL
, DestReg
, SrcReg
, KillSrc
, AArch64::ORRv8i8
,
2410 // Copy a DDD register triple by copying the individual sub-registers.
2411 if (AArch64::DDDRegClass
.contains(DestReg
) &&
2412 AArch64::DDDRegClass
.contains(SrcReg
)) {
2413 static const unsigned Indices
[] = {AArch64::dsub0
, AArch64::dsub1
,
2415 copyPhysRegTuple(MBB
, I
, DL
, DestReg
, SrcReg
, KillSrc
, AArch64::ORRv8i8
,
2420 // Copy a DD register pair by copying the individual sub-registers.
2421 if (AArch64::DDRegClass
.contains(DestReg
) &&
2422 AArch64::DDRegClass
.contains(SrcReg
)) {
2423 static const unsigned Indices
[] = {AArch64::dsub0
, AArch64::dsub1
};
2424 copyPhysRegTuple(MBB
, I
, DL
, DestReg
, SrcReg
, KillSrc
, AArch64::ORRv8i8
,
2429 // Copy a QQQQ register quad by copying the individual sub-registers.
2430 if (AArch64::QQQQRegClass
.contains(DestReg
) &&
2431 AArch64::QQQQRegClass
.contains(SrcReg
)) {
2432 static const unsigned Indices
[] = {AArch64::qsub0
, AArch64::qsub1
,
2433 AArch64::qsub2
, AArch64::qsub3
};
2434 copyPhysRegTuple(MBB
, I
, DL
, DestReg
, SrcReg
, KillSrc
, AArch64::ORRv16i8
,
2439 // Copy a QQQ register triple by copying the individual sub-registers.
2440 if (AArch64::QQQRegClass
.contains(DestReg
) &&
2441 AArch64::QQQRegClass
.contains(SrcReg
)) {
2442 static const unsigned Indices
[] = {AArch64::qsub0
, AArch64::qsub1
,
2444 copyPhysRegTuple(MBB
, I
, DL
, DestReg
, SrcReg
, KillSrc
, AArch64::ORRv16i8
,
2449 // Copy a QQ register pair by copying the individual sub-registers.
2450 if (AArch64::QQRegClass
.contains(DestReg
) &&
2451 AArch64::QQRegClass
.contains(SrcReg
)) {
2452 static const unsigned Indices
[] = {AArch64::qsub0
, AArch64::qsub1
};
2453 copyPhysRegTuple(MBB
, I
, DL
, DestReg
, SrcReg
, KillSrc
, AArch64::ORRv16i8
,
2458 if (AArch64::XSeqPairsClassRegClass
.contains(DestReg
) &&
2459 AArch64::XSeqPairsClassRegClass
.contains(SrcReg
)) {
2460 static const unsigned Indices
[] = {AArch64::sube64
, AArch64::subo64
};
2461 copyGPRRegTuple(MBB
, I
, DL
, DestReg
, SrcReg
, KillSrc
, AArch64::ORRXrs
,
2462 AArch64::XZR
, Indices
);
2466 if (AArch64::WSeqPairsClassRegClass
.contains(DestReg
) &&
2467 AArch64::WSeqPairsClassRegClass
.contains(SrcReg
)) {
2468 static const unsigned Indices
[] = {AArch64::sube32
, AArch64::subo32
};
2469 copyGPRRegTuple(MBB
, I
, DL
, DestReg
, SrcReg
, KillSrc
, AArch64::ORRWrs
,
2470 AArch64::WZR
, Indices
);
2474 if (AArch64::FPR128RegClass
.contains(DestReg
) &&
2475 AArch64::FPR128RegClass
.contains(SrcReg
)) {
2476 if (Subtarget
.hasNEON()) {
2477 BuildMI(MBB
, I
, DL
, get(AArch64::ORRv16i8
), DestReg
)
2479 .addReg(SrcReg
, getKillRegState(KillSrc
));
2481 BuildMI(MBB
, I
, DL
, get(AArch64::STRQpre
))
2482 .addReg(AArch64::SP
, RegState::Define
)
2483 .addReg(SrcReg
, getKillRegState(KillSrc
))
2484 .addReg(AArch64::SP
)
2486 BuildMI(MBB
, I
, DL
, get(AArch64::LDRQpre
))
2487 .addReg(AArch64::SP
, RegState::Define
)
2488 .addReg(DestReg
, RegState::Define
)
2489 .addReg(AArch64::SP
)
2495 if (AArch64::FPR64RegClass
.contains(DestReg
) &&
2496 AArch64::FPR64RegClass
.contains(SrcReg
)) {
2497 if (Subtarget
.hasNEON()) {
2498 DestReg
= RI
.getMatchingSuperReg(DestReg
, AArch64::dsub
,
2499 &AArch64::FPR128RegClass
);
2500 SrcReg
= RI
.getMatchingSuperReg(SrcReg
, AArch64::dsub
,
2501 &AArch64::FPR128RegClass
);
2502 BuildMI(MBB
, I
, DL
, get(AArch64::ORRv16i8
), DestReg
)
2504 .addReg(SrcReg
, getKillRegState(KillSrc
));
2506 BuildMI(MBB
, I
, DL
, get(AArch64::FMOVDr
), DestReg
)
2507 .addReg(SrcReg
, getKillRegState(KillSrc
));
2512 if (AArch64::FPR32RegClass
.contains(DestReg
) &&
2513 AArch64::FPR32RegClass
.contains(SrcReg
)) {
2514 if (Subtarget
.hasNEON()) {
2515 DestReg
= RI
.getMatchingSuperReg(DestReg
, AArch64::ssub
,
2516 &AArch64::FPR128RegClass
);
2517 SrcReg
= RI
.getMatchingSuperReg(SrcReg
, AArch64::ssub
,
2518 &AArch64::FPR128RegClass
);
2519 BuildMI(MBB
, I
, DL
, get(AArch64::ORRv16i8
), DestReg
)
2521 .addReg(SrcReg
, getKillRegState(KillSrc
));
2523 BuildMI(MBB
, I
, DL
, get(AArch64::FMOVSr
), DestReg
)
2524 .addReg(SrcReg
, getKillRegState(KillSrc
));
2529 if (AArch64::FPR16RegClass
.contains(DestReg
) &&
2530 AArch64::FPR16RegClass
.contains(SrcReg
)) {
2531 if (Subtarget
.hasNEON()) {
2532 DestReg
= RI
.getMatchingSuperReg(DestReg
, AArch64::hsub
,
2533 &AArch64::FPR128RegClass
);
2534 SrcReg
= RI
.getMatchingSuperReg(SrcReg
, AArch64::hsub
,
2535 &AArch64::FPR128RegClass
);
2536 BuildMI(MBB
, I
, DL
, get(AArch64::ORRv16i8
), DestReg
)
2538 .addReg(SrcReg
, getKillRegState(KillSrc
));
2540 DestReg
= RI
.getMatchingSuperReg(DestReg
, AArch64::hsub
,
2541 &AArch64::FPR32RegClass
);
2542 SrcReg
= RI
.getMatchingSuperReg(SrcReg
, AArch64::hsub
,
2543 &AArch64::FPR32RegClass
);
2544 BuildMI(MBB
, I
, DL
, get(AArch64::FMOVSr
), DestReg
)
2545 .addReg(SrcReg
, getKillRegState(KillSrc
));
2550 if (AArch64::FPR8RegClass
.contains(DestReg
) &&
2551 AArch64::FPR8RegClass
.contains(SrcReg
)) {
2552 if (Subtarget
.hasNEON()) {
2553 DestReg
= RI
.getMatchingSuperReg(DestReg
, AArch64::bsub
,
2554 &AArch64::FPR128RegClass
);
2555 SrcReg
= RI
.getMatchingSuperReg(SrcReg
, AArch64::bsub
,
2556 &AArch64::FPR128RegClass
);
2557 BuildMI(MBB
, I
, DL
, get(AArch64::ORRv16i8
), DestReg
)
2559 .addReg(SrcReg
, getKillRegState(KillSrc
));
2561 DestReg
= RI
.getMatchingSuperReg(DestReg
, AArch64::bsub
,
2562 &AArch64::FPR32RegClass
);
2563 SrcReg
= RI
.getMatchingSuperReg(SrcReg
, AArch64::bsub
,
2564 &AArch64::FPR32RegClass
);
2565 BuildMI(MBB
, I
, DL
, get(AArch64::FMOVSr
), DestReg
)
2566 .addReg(SrcReg
, getKillRegState(KillSrc
));
2571 // Copies between GPR64 and FPR64.
2572 if (AArch64::FPR64RegClass
.contains(DestReg
) &&
2573 AArch64::GPR64RegClass
.contains(SrcReg
)) {
2574 BuildMI(MBB
, I
, DL
, get(AArch64::FMOVXDr
), DestReg
)
2575 .addReg(SrcReg
, getKillRegState(KillSrc
));
2578 if (AArch64::GPR64RegClass
.contains(DestReg
) &&
2579 AArch64::FPR64RegClass
.contains(SrcReg
)) {
2580 BuildMI(MBB
, I
, DL
, get(AArch64::FMOVDXr
), DestReg
)
2581 .addReg(SrcReg
, getKillRegState(KillSrc
));
2584 // Copies between GPR32 and FPR32.
2585 if (AArch64::FPR32RegClass
.contains(DestReg
) &&
2586 AArch64::GPR32RegClass
.contains(SrcReg
)) {
2587 BuildMI(MBB
, I
, DL
, get(AArch64::FMOVWSr
), DestReg
)
2588 .addReg(SrcReg
, getKillRegState(KillSrc
));
2591 if (AArch64::GPR32RegClass
.contains(DestReg
) &&
2592 AArch64::FPR32RegClass
.contains(SrcReg
)) {
2593 BuildMI(MBB
, I
, DL
, get(AArch64::FMOVSWr
), DestReg
)
2594 .addReg(SrcReg
, getKillRegState(KillSrc
));
2598 if (DestReg
== AArch64::NZCV
) {
2599 assert(AArch64::GPR64RegClass
.contains(SrcReg
) && "Invalid NZCV copy");
2600 BuildMI(MBB
, I
, DL
, get(AArch64::MSR
))
2601 .addImm(AArch64SysReg::NZCV
)
2602 .addReg(SrcReg
, getKillRegState(KillSrc
))
2603 .addReg(AArch64::NZCV
, RegState::Implicit
| RegState::Define
);
2607 if (SrcReg
== AArch64::NZCV
) {
2608 assert(AArch64::GPR64RegClass
.contains(DestReg
) && "Invalid NZCV copy");
2609 BuildMI(MBB
, I
, DL
, get(AArch64::MRS
), DestReg
)
2610 .addImm(AArch64SysReg::NZCV
)
2611 .addReg(AArch64::NZCV
, RegState::Implicit
| getKillRegState(KillSrc
));
2615 llvm_unreachable("unimplemented reg-to-reg copy");
2618 static void storeRegPairToStackSlot(const TargetRegisterInfo
&TRI
,
2619 MachineBasicBlock
&MBB
,
2620 MachineBasicBlock::iterator InsertBefore
,
2621 const MCInstrDesc
&MCID
,
2622 unsigned SrcReg
, bool IsKill
,
2623 unsigned SubIdx0
, unsigned SubIdx1
, int FI
,
2624 MachineMemOperand
*MMO
) {
2625 unsigned SrcReg0
= SrcReg
;
2626 unsigned SrcReg1
= SrcReg
;
2627 if (TargetRegisterInfo::isPhysicalRegister(SrcReg
)) {
2628 SrcReg0
= TRI
.getSubReg(SrcReg
, SubIdx0
);
2630 SrcReg1
= TRI
.getSubReg(SrcReg
, SubIdx1
);
2633 BuildMI(MBB
, InsertBefore
, DebugLoc(), MCID
)
2634 .addReg(SrcReg0
, getKillRegState(IsKill
), SubIdx0
)
2635 .addReg(SrcReg1
, getKillRegState(IsKill
), SubIdx1
)
2638 .addMemOperand(MMO
);
2641 void AArch64InstrInfo::storeRegToStackSlot(
2642 MachineBasicBlock
&MBB
, MachineBasicBlock::iterator MBBI
, unsigned SrcReg
,
2643 bool isKill
, int FI
, const TargetRegisterClass
*RC
,
2644 const TargetRegisterInfo
*TRI
) const {
2645 MachineFunction
&MF
= *MBB
.getParent();
2646 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
2647 unsigned Align
= MFI
.getObjectAlignment(FI
);
2649 MachinePointerInfo PtrInfo
= MachinePointerInfo::getFixedStack(MF
, FI
);
2650 MachineMemOperand
*MMO
= MF
.getMachineMemOperand(
2651 PtrInfo
, MachineMemOperand::MOStore
, MFI
.getObjectSize(FI
), Align
);
2654 switch (TRI
->getSpillSize(*RC
)) {
2656 if (AArch64::FPR8RegClass
.hasSubClassEq(RC
))
2657 Opc
= AArch64::STRBui
;
2660 if (AArch64::FPR16RegClass
.hasSubClassEq(RC
))
2661 Opc
= AArch64::STRHui
;
2664 if (AArch64::GPR32allRegClass
.hasSubClassEq(RC
)) {
2665 Opc
= AArch64::STRWui
;
2666 if (TargetRegisterInfo::isVirtualRegister(SrcReg
))
2667 MF
.getRegInfo().constrainRegClass(SrcReg
, &AArch64::GPR32RegClass
);
2669 assert(SrcReg
!= AArch64::WSP
);
2670 } else if (AArch64::FPR32RegClass
.hasSubClassEq(RC
))
2671 Opc
= AArch64::STRSui
;
2674 if (AArch64::GPR64allRegClass
.hasSubClassEq(RC
)) {
2675 Opc
= AArch64::STRXui
;
2676 if (TargetRegisterInfo::isVirtualRegister(SrcReg
))
2677 MF
.getRegInfo().constrainRegClass(SrcReg
, &AArch64::GPR64RegClass
);
2679 assert(SrcReg
!= AArch64::SP
);
2680 } else if (AArch64::FPR64RegClass
.hasSubClassEq(RC
)) {
2681 Opc
= AArch64::STRDui
;
2682 } else if (AArch64::WSeqPairsClassRegClass
.hasSubClassEq(RC
)) {
2683 storeRegPairToStackSlot(getRegisterInfo(), MBB
, MBBI
,
2684 get(AArch64::STPWi
), SrcReg
, isKill
,
2685 AArch64::sube32
, AArch64::subo32
, FI
, MMO
);
2690 if (AArch64::FPR128RegClass
.hasSubClassEq(RC
))
2691 Opc
= AArch64::STRQui
;
2692 else if (AArch64::DDRegClass
.hasSubClassEq(RC
)) {
2693 assert(Subtarget
.hasNEON() && "Unexpected register store without NEON");
2694 Opc
= AArch64::ST1Twov1d
;
2696 } else if (AArch64::XSeqPairsClassRegClass
.hasSubClassEq(RC
)) {
2697 storeRegPairToStackSlot(getRegisterInfo(), MBB
, MBBI
,
2698 get(AArch64::STPXi
), SrcReg
, isKill
,
2699 AArch64::sube64
, AArch64::subo64
, FI
, MMO
);
2704 if (AArch64::DDDRegClass
.hasSubClassEq(RC
)) {
2705 assert(Subtarget
.hasNEON() && "Unexpected register store without NEON");
2706 Opc
= AArch64::ST1Threev1d
;
2711 if (AArch64::DDDDRegClass
.hasSubClassEq(RC
)) {
2712 assert(Subtarget
.hasNEON() && "Unexpected register store without NEON");
2713 Opc
= AArch64::ST1Fourv1d
;
2715 } else if (AArch64::QQRegClass
.hasSubClassEq(RC
)) {
2716 assert(Subtarget
.hasNEON() && "Unexpected register store without NEON");
2717 Opc
= AArch64::ST1Twov2d
;
2722 if (AArch64::QQQRegClass
.hasSubClassEq(RC
)) {
2723 assert(Subtarget
.hasNEON() && "Unexpected register store without NEON");
2724 Opc
= AArch64::ST1Threev2d
;
2729 if (AArch64::QQQQRegClass
.hasSubClassEq(RC
)) {
2730 assert(Subtarget
.hasNEON() && "Unexpected register store without NEON");
2731 Opc
= AArch64::ST1Fourv2d
;
2736 assert(Opc
&& "Unknown register class");
2738 const MachineInstrBuilder MI
= BuildMI(MBB
, MBBI
, DebugLoc(), get(Opc
))
2739 .addReg(SrcReg
, getKillRegState(isKill
))
2744 MI
.addMemOperand(MMO
);
2747 static void loadRegPairFromStackSlot(const TargetRegisterInfo
&TRI
,
2748 MachineBasicBlock
&MBB
,
2749 MachineBasicBlock::iterator InsertBefore
,
2750 const MCInstrDesc
&MCID
,
2751 unsigned DestReg
, unsigned SubIdx0
,
2752 unsigned SubIdx1
, int FI
,
2753 MachineMemOperand
*MMO
) {
2754 unsigned DestReg0
= DestReg
;
2755 unsigned DestReg1
= DestReg
;
2756 bool IsUndef
= true;
2757 if (TargetRegisterInfo::isPhysicalRegister(DestReg
)) {
2758 DestReg0
= TRI
.getSubReg(DestReg
, SubIdx0
);
2760 DestReg1
= TRI
.getSubReg(DestReg
, SubIdx1
);
2764 BuildMI(MBB
, InsertBefore
, DebugLoc(), MCID
)
2765 .addReg(DestReg0
, RegState::Define
| getUndefRegState(IsUndef
), SubIdx0
)
2766 .addReg(DestReg1
, RegState::Define
| getUndefRegState(IsUndef
), SubIdx1
)
2769 .addMemOperand(MMO
);
2772 void AArch64InstrInfo::loadRegFromStackSlot(
2773 MachineBasicBlock
&MBB
, MachineBasicBlock::iterator MBBI
, unsigned DestReg
,
2774 int FI
, const TargetRegisterClass
*RC
,
2775 const TargetRegisterInfo
*TRI
) const {
2776 MachineFunction
&MF
= *MBB
.getParent();
2777 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
2778 unsigned Align
= MFI
.getObjectAlignment(FI
);
2779 MachinePointerInfo PtrInfo
= MachinePointerInfo::getFixedStack(MF
, FI
);
2780 MachineMemOperand
*MMO
= MF
.getMachineMemOperand(
2781 PtrInfo
, MachineMemOperand::MOLoad
, MFI
.getObjectSize(FI
), Align
);
2785 switch (TRI
->getSpillSize(*RC
)) {
2787 if (AArch64::FPR8RegClass
.hasSubClassEq(RC
))
2788 Opc
= AArch64::LDRBui
;
2791 if (AArch64::FPR16RegClass
.hasSubClassEq(RC
))
2792 Opc
= AArch64::LDRHui
;
2795 if (AArch64::GPR32allRegClass
.hasSubClassEq(RC
)) {
2796 Opc
= AArch64::LDRWui
;
2797 if (TargetRegisterInfo::isVirtualRegister(DestReg
))
2798 MF
.getRegInfo().constrainRegClass(DestReg
, &AArch64::GPR32RegClass
);
2800 assert(DestReg
!= AArch64::WSP
);
2801 } else if (AArch64::FPR32RegClass
.hasSubClassEq(RC
))
2802 Opc
= AArch64::LDRSui
;
2805 if (AArch64::GPR64allRegClass
.hasSubClassEq(RC
)) {
2806 Opc
= AArch64::LDRXui
;
2807 if (TargetRegisterInfo::isVirtualRegister(DestReg
))
2808 MF
.getRegInfo().constrainRegClass(DestReg
, &AArch64::GPR64RegClass
);
2810 assert(DestReg
!= AArch64::SP
);
2811 } else if (AArch64::FPR64RegClass
.hasSubClassEq(RC
)) {
2812 Opc
= AArch64::LDRDui
;
2813 } else if (AArch64::WSeqPairsClassRegClass
.hasSubClassEq(RC
)) {
2814 loadRegPairFromStackSlot(getRegisterInfo(), MBB
, MBBI
,
2815 get(AArch64::LDPWi
), DestReg
, AArch64::sube32
,
2816 AArch64::subo32
, FI
, MMO
);
2821 if (AArch64::FPR128RegClass
.hasSubClassEq(RC
))
2822 Opc
= AArch64::LDRQui
;
2823 else if (AArch64::DDRegClass
.hasSubClassEq(RC
)) {
2824 assert(Subtarget
.hasNEON() && "Unexpected register load without NEON");
2825 Opc
= AArch64::LD1Twov1d
;
2827 } else if (AArch64::XSeqPairsClassRegClass
.hasSubClassEq(RC
)) {
2828 loadRegPairFromStackSlot(getRegisterInfo(), MBB
, MBBI
,
2829 get(AArch64::LDPXi
), DestReg
, AArch64::sube64
,
2830 AArch64::subo64
, FI
, MMO
);
2835 if (AArch64::DDDRegClass
.hasSubClassEq(RC
)) {
2836 assert(Subtarget
.hasNEON() && "Unexpected register load without NEON");
2837 Opc
= AArch64::LD1Threev1d
;
2842 if (AArch64::DDDDRegClass
.hasSubClassEq(RC
)) {
2843 assert(Subtarget
.hasNEON() && "Unexpected register load without NEON");
2844 Opc
= AArch64::LD1Fourv1d
;
2846 } else if (AArch64::QQRegClass
.hasSubClassEq(RC
)) {
2847 assert(Subtarget
.hasNEON() && "Unexpected register load without NEON");
2848 Opc
= AArch64::LD1Twov2d
;
2853 if (AArch64::QQQRegClass
.hasSubClassEq(RC
)) {
2854 assert(Subtarget
.hasNEON() && "Unexpected register load without NEON");
2855 Opc
= AArch64::LD1Threev2d
;
2860 if (AArch64::QQQQRegClass
.hasSubClassEq(RC
)) {
2861 assert(Subtarget
.hasNEON() && "Unexpected register load without NEON");
2862 Opc
= AArch64::LD1Fourv2d
;
2867 assert(Opc
&& "Unknown register class");
2869 const MachineInstrBuilder MI
= BuildMI(MBB
, MBBI
, DebugLoc(), get(Opc
))
2870 .addReg(DestReg
, getDefRegState(true))
2874 MI
.addMemOperand(MMO
);
2877 void llvm::emitFrameOffset(MachineBasicBlock
&MBB
,
2878 MachineBasicBlock::iterator MBBI
, const DebugLoc
&DL
,
2879 unsigned DestReg
, unsigned SrcReg
, int Offset
,
2880 const TargetInstrInfo
*TII
,
2881 MachineInstr::MIFlag Flag
, bool SetNZCV
,
2883 if (DestReg
== SrcReg
&& Offset
== 0)
2886 assert((DestReg
!= AArch64::SP
|| Offset
% 16 == 0) &&
2887 "SP increment/decrement not 16-byte aligned");
2889 bool isSub
= Offset
< 0;
2893 // FIXME: If the offset won't fit in 24-bits, compute the offset into a
2894 // scratch register. If DestReg is a virtual register, use it as the
2895 // scratch register; otherwise, create a new virtual register (to be
2896 // replaced by the scavenger at the end of PEI). That case can be optimized
2897 // slightly if DestReg is SP which is always 16-byte aligned, so the scratch
2898 // register can be loaded with offset%8 and the add/sub can use an extending
2899 // instruction with LSL#3.
2900 // Currently the function handles any offsets but generates a poor sequence
2902 // assert(Offset < (1 << 24) && "unimplemented reg plus immediate");
2906 Opc
= isSub
? AArch64::SUBSXri
: AArch64::ADDSXri
;
2908 Opc
= isSub
? AArch64::SUBXri
: AArch64::ADDXri
;
2909 const unsigned MaxEncoding
= 0xfff;
2910 const unsigned ShiftSize
= 12;
2911 const unsigned MaxEncodableValue
= MaxEncoding
<< ShiftSize
;
2912 while (((unsigned)Offset
) >= (1 << ShiftSize
)) {
2914 if (((unsigned)Offset
) > MaxEncodableValue
) {
2915 ThisVal
= MaxEncodableValue
;
2917 ThisVal
= Offset
& MaxEncodableValue
;
2919 assert((ThisVal
>> ShiftSize
) <= MaxEncoding
&&
2920 "Encoding cannot handle value that big");
2921 BuildMI(MBB
, MBBI
, DL
, TII
->get(Opc
), DestReg
)
2923 .addImm(ThisVal
>> ShiftSize
)
2924 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL
, ShiftSize
))
2927 if (NeedsWinCFI
&& SrcReg
== AArch64::SP
&& DestReg
== AArch64::SP
)
2928 BuildMI(MBB
, MBBI
, DL
, TII
->get(AArch64::SEH_StackAlloc
))
2937 BuildMI(MBB
, MBBI
, DL
, TII
->get(Opc
), DestReg
)
2940 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL
, 0))
2944 if ((DestReg
== AArch64::FP
&& SrcReg
== AArch64::SP
) ||
2945 (SrcReg
== AArch64::FP
&& DestReg
== AArch64::SP
)) {
2947 BuildMI(MBB
, MBBI
, DL
, TII
->get(AArch64::SEH_SetFP
)).
2950 BuildMI(MBB
, MBBI
, DL
, TII
->get(AArch64::SEH_AddFP
)).
2951 addImm(Offset
).setMIFlag(Flag
);
2952 } else if (DestReg
== AArch64::SP
) {
2953 BuildMI(MBB
, MBBI
, DL
, TII
->get(AArch64::SEH_StackAlloc
)).
2954 addImm(Offset
).setMIFlag(Flag
);
2959 MachineInstr
*AArch64InstrInfo::foldMemoryOperandImpl(
2960 MachineFunction
&MF
, MachineInstr
&MI
, ArrayRef
<unsigned> Ops
,
2961 MachineBasicBlock::iterator InsertPt
, int FrameIndex
,
2962 LiveIntervals
*LIS
) const {
2963 // This is a bit of a hack. Consider this instruction:
2965 // %0 = COPY %sp; GPR64all:%0
2967 // We explicitly chose GPR64all for the virtual register so such a copy might
2968 // be eliminated by RegisterCoalescer. However, that may not be possible, and
2969 // %0 may even spill. We can't spill %sp, and since it is in the GPR64all
2970 // register class, TargetInstrInfo::foldMemoryOperand() is going to try.
2972 // To prevent that, we are going to constrain the %0 register class here.
2974 // <rdar://problem/11522048>
2976 if (MI
.isFullCopy()) {
2977 unsigned DstReg
= MI
.getOperand(0).getReg();
2978 unsigned SrcReg
= MI
.getOperand(1).getReg();
2979 if (SrcReg
== AArch64::SP
&&
2980 TargetRegisterInfo::isVirtualRegister(DstReg
)) {
2981 MF
.getRegInfo().constrainRegClass(DstReg
, &AArch64::GPR64RegClass
);
2984 if (DstReg
== AArch64::SP
&&
2985 TargetRegisterInfo::isVirtualRegister(SrcReg
)) {
2986 MF
.getRegInfo().constrainRegClass(SrcReg
, &AArch64::GPR64RegClass
);
2991 // Handle the case where a copy is being spilled or filled but the source
2992 // and destination register class don't match. For example:
2994 // %0 = COPY %xzr; GPR64common:%0
2996 // In this case we can still safely fold away the COPY and generate the
2997 // following spill code:
2999 // STRXui %xzr, %stack.0
3001 // This also eliminates spilled cross register class COPYs (e.g. between x and
3002 // d regs) of the same size. For example:
3004 // %0 = COPY %1; GPR64:%0, FPR64:%1
3006 // will be filled as
3008 // LDRDui %0, fi<#0>
3012 // LDRXui %Temp, fi<#0>
3015 if (MI
.isCopy() && Ops
.size() == 1 &&
3016 // Make sure we're only folding the explicit COPY defs/uses.
3017 (Ops
[0] == 0 || Ops
[0] == 1)) {
3018 bool IsSpill
= Ops
[0] == 0;
3019 bool IsFill
= !IsSpill
;
3020 const TargetRegisterInfo
&TRI
= *MF
.getSubtarget().getRegisterInfo();
3021 const MachineRegisterInfo
&MRI
= MF
.getRegInfo();
3022 MachineBasicBlock
&MBB
= *MI
.getParent();
3023 const MachineOperand
&DstMO
= MI
.getOperand(0);
3024 const MachineOperand
&SrcMO
= MI
.getOperand(1);
3025 unsigned DstReg
= DstMO
.getReg();
3026 unsigned SrcReg
= SrcMO
.getReg();
3027 // This is slightly expensive to compute for physical regs since
3028 // getMinimalPhysRegClass is slow.
3029 auto getRegClass
= [&](unsigned Reg
) {
3030 return TargetRegisterInfo::isVirtualRegister(Reg
)
3031 ? MRI
.getRegClass(Reg
)
3032 : TRI
.getMinimalPhysRegClass(Reg
);
3035 if (DstMO
.getSubReg() == 0 && SrcMO
.getSubReg() == 0) {
3036 assert(TRI
.getRegSizeInBits(*getRegClass(DstReg
)) ==
3037 TRI
.getRegSizeInBits(*getRegClass(SrcReg
)) &&
3038 "Mismatched register size in non subreg COPY");
3040 storeRegToStackSlot(MBB
, InsertPt
, SrcReg
, SrcMO
.isKill(), FrameIndex
,
3041 getRegClass(SrcReg
), &TRI
);
3043 loadRegFromStackSlot(MBB
, InsertPt
, DstReg
, FrameIndex
,
3044 getRegClass(DstReg
), &TRI
);
3045 return &*--InsertPt
;
3048 // Handle cases like spilling def of:
3050 // %0:sub_32<def,read-undef> = COPY %wzr; GPR64common:%0
3052 // where the physical register source can be widened and stored to the full
3053 // virtual reg destination stack slot, in this case producing:
3055 // STRXui %xzr, %stack.0
3057 if (IsSpill
&& DstMO
.isUndef() &&
3058 TargetRegisterInfo::isPhysicalRegister(SrcReg
)) {
3059 assert(SrcMO
.getSubReg() == 0 &&
3060 "Unexpected subreg on physical register");
3061 const TargetRegisterClass
*SpillRC
;
3062 unsigned SpillSubreg
;
3063 switch (DstMO
.getSubReg()) {
3067 case AArch64::sub_32
:
3069 if (AArch64::GPR32RegClass
.contains(SrcReg
)) {
3070 SpillRC
= &AArch64::GPR64RegClass
;
3071 SpillSubreg
= AArch64::sub_32
;
3072 } else if (AArch64::FPR32RegClass
.contains(SrcReg
)) {
3073 SpillRC
= &AArch64::FPR64RegClass
;
3074 SpillSubreg
= AArch64::ssub
;
3079 if (AArch64::FPR64RegClass
.contains(SrcReg
)) {
3080 SpillRC
= &AArch64::FPR128RegClass
;
3081 SpillSubreg
= AArch64::dsub
;
3088 if (unsigned WidenedSrcReg
=
3089 TRI
.getMatchingSuperReg(SrcReg
, SpillSubreg
, SpillRC
)) {
3090 storeRegToStackSlot(MBB
, InsertPt
, WidenedSrcReg
, SrcMO
.isKill(),
3091 FrameIndex
, SpillRC
, &TRI
);
3092 return &*--InsertPt
;
3096 // Handle cases like filling use of:
3098 // %0:sub_32<def,read-undef> = COPY %1; GPR64:%0, GPR32:%1
3100 // where we can load the full virtual reg source stack slot, into the subreg
3101 // destination, in this case producing:
3103 // LDRWui %0:sub_32<def,read-undef>, %stack.0
3105 if (IsFill
&& SrcMO
.getSubReg() == 0 && DstMO
.isUndef()) {
3106 const TargetRegisterClass
*FillRC
;
3107 switch (DstMO
.getSubReg()) {
3111 case AArch64::sub_32
:
3112 FillRC
= &AArch64::GPR32RegClass
;
3115 FillRC
= &AArch64::FPR32RegClass
;
3118 FillRC
= &AArch64::FPR64RegClass
;
3123 assert(TRI
.getRegSizeInBits(*getRegClass(SrcReg
)) ==
3124 TRI
.getRegSizeInBits(*FillRC
) &&
3125 "Mismatched regclass size on folded subreg COPY");
3126 loadRegFromStackSlot(MBB
, InsertPt
, DstReg
, FrameIndex
, FillRC
, &TRI
);
3127 MachineInstr
&LoadMI
= *--InsertPt
;
3128 MachineOperand
&LoadDst
= LoadMI
.getOperand(0);
3129 assert(LoadDst
.getSubReg() == 0 && "unexpected subreg on fill load");
3130 LoadDst
.setSubReg(DstMO
.getSubReg());
3131 LoadDst
.setIsUndef();
3141 int llvm::isAArch64FrameOffsetLegal(const MachineInstr
&MI
, int &Offset
,
3142 bool *OutUseUnscaledOp
,
3143 unsigned *OutUnscaledOp
,
3144 int *EmittableOffset
) {
3146 bool IsSigned
= false;
3147 // The ImmIdx should be changed case by case if it is not 2.
3148 unsigned ImmIdx
= 2;
3149 unsigned UnscaledOp
= 0;
3150 // Set output values in case of early exit.
3151 if (EmittableOffset
)
3152 *EmittableOffset
= 0;
3153 if (OutUseUnscaledOp
)
3154 *OutUseUnscaledOp
= false;
3157 switch (MI
.getOpcode()) {
3159 llvm_unreachable("unhandled opcode in rewriteAArch64FrameIndex");
3160 // Vector spills/fills can't take an immediate offset.
3161 case AArch64::LD1Twov2d
:
3162 case AArch64::LD1Threev2d
:
3163 case AArch64::LD1Fourv2d
:
3164 case AArch64::LD1Twov1d
:
3165 case AArch64::LD1Threev1d
:
3166 case AArch64::LD1Fourv1d
:
3167 case AArch64::ST1Twov2d
:
3168 case AArch64::ST1Threev2d
:
3169 case AArch64::ST1Fourv2d
:
3170 case AArch64::ST1Twov1d
:
3171 case AArch64::ST1Threev1d
:
3172 case AArch64::ST1Fourv1d
:
3173 return AArch64FrameOffsetCannotUpdate
;
3174 case AArch64::PRFMui
:
3176 UnscaledOp
= AArch64::PRFUMi
;
3178 case AArch64::LDRXui
:
3180 UnscaledOp
= AArch64::LDURXi
;
3182 case AArch64::LDRWui
:
3184 UnscaledOp
= AArch64::LDURWi
;
3186 case AArch64::LDRBui
:
3188 UnscaledOp
= AArch64::LDURBi
;
3190 case AArch64::LDRHui
:
3192 UnscaledOp
= AArch64::LDURHi
;
3194 case AArch64::LDRSui
:
3196 UnscaledOp
= AArch64::LDURSi
;
3198 case AArch64::LDRDui
:
3200 UnscaledOp
= AArch64::LDURDi
;
3202 case AArch64::LDRQui
:
3204 UnscaledOp
= AArch64::LDURQi
;
3206 case AArch64::LDRBBui
:
3208 UnscaledOp
= AArch64::LDURBBi
;
3210 case AArch64::LDRHHui
:
3212 UnscaledOp
= AArch64::LDURHHi
;
3214 case AArch64::LDRSBXui
:
3216 UnscaledOp
= AArch64::LDURSBXi
;
3218 case AArch64::LDRSBWui
:
3220 UnscaledOp
= AArch64::LDURSBWi
;
3222 case AArch64::LDRSHXui
:
3224 UnscaledOp
= AArch64::LDURSHXi
;
3226 case AArch64::LDRSHWui
:
3228 UnscaledOp
= AArch64::LDURSHWi
;
3230 case AArch64::LDRSWui
:
3232 UnscaledOp
= AArch64::LDURSWi
;
3235 case AArch64::STRXui
:
3237 UnscaledOp
= AArch64::STURXi
;
3239 case AArch64::STRWui
:
3241 UnscaledOp
= AArch64::STURWi
;
3243 case AArch64::STRBui
:
3245 UnscaledOp
= AArch64::STURBi
;
3247 case AArch64::STRHui
:
3249 UnscaledOp
= AArch64::STURHi
;
3251 case AArch64::STRSui
:
3253 UnscaledOp
= AArch64::STURSi
;
3255 case AArch64::STRDui
:
3257 UnscaledOp
= AArch64::STURDi
;
3259 case AArch64::STRQui
:
3261 UnscaledOp
= AArch64::STURQi
;
3263 case AArch64::STRBBui
:
3265 UnscaledOp
= AArch64::STURBBi
;
3267 case AArch64::STRHHui
:
3269 UnscaledOp
= AArch64::STURHHi
;
3272 case AArch64::LDPXi
:
3273 case AArch64::LDPDi
:
3274 case AArch64::STPXi
:
3275 case AArch64::STPDi
:
3276 case AArch64::LDNPXi
:
3277 case AArch64::LDNPDi
:
3278 case AArch64::STNPXi
:
3279 case AArch64::STNPDi
:
3284 case AArch64::LDPQi
:
3285 case AArch64::STPQi
:
3286 case AArch64::LDNPQi
:
3287 case AArch64::STNPQi
:
3292 case AArch64::LDPWi
:
3293 case AArch64::LDPSi
:
3294 case AArch64::STPWi
:
3295 case AArch64::STPSi
:
3296 case AArch64::LDNPWi
:
3297 case AArch64::LDNPSi
:
3298 case AArch64::STNPWi
:
3299 case AArch64::STNPSi
:
3305 case AArch64::LDURXi
:
3306 case AArch64::LDURWi
:
3307 case AArch64::LDURBi
:
3308 case AArch64::LDURHi
:
3309 case AArch64::LDURSi
:
3310 case AArch64::LDURDi
:
3311 case AArch64::LDURQi
:
3312 case AArch64::LDURHHi
:
3313 case AArch64::LDURBBi
:
3314 case AArch64::LDURSBXi
:
3315 case AArch64::LDURSBWi
:
3316 case AArch64::LDURSHXi
:
3317 case AArch64::LDURSHWi
:
3318 case AArch64::LDURSWi
:
3319 case AArch64::STURXi
:
3320 case AArch64::STURWi
:
3321 case AArch64::STURBi
:
3322 case AArch64::STURHi
:
3323 case AArch64::STURSi
:
3324 case AArch64::STURDi
:
3325 case AArch64::STURQi
:
3326 case AArch64::STURBBi
:
3327 case AArch64::STURHHi
:
3332 Offset
+= MI
.getOperand(ImmIdx
).getImm() * Scale
;
3334 bool useUnscaledOp
= false;
3335 // If the offset doesn't match the scale, we rewrite the instruction to
3336 // use the unscaled instruction instead. Likewise, if we have a negative
3337 // offset (and have an unscaled op to use).
3338 if ((Offset
& (Scale
- 1)) != 0 || (Offset
< 0 && UnscaledOp
!= 0))
3339 useUnscaledOp
= true;
3341 // Use an unscaled addressing mode if the instruction has a negative offset
3342 // (or if the instruction is already using an unscaled addressing mode).
3345 // ldp/stp instructions.
3348 } else if (UnscaledOp
== 0 || useUnscaledOp
) {
3358 // Attempt to fold address computation.
3359 int MaxOff
= (1 << (MaskBits
- IsSigned
)) - 1;
3360 int MinOff
= (IsSigned
? (-MaxOff
- 1) : 0);
3361 if (Offset
>= MinOff
&& Offset
<= MaxOff
) {
3362 if (EmittableOffset
)
3363 *EmittableOffset
= Offset
;
3366 int NewOff
= Offset
< 0 ? MinOff
: MaxOff
;
3367 if (EmittableOffset
)
3368 *EmittableOffset
= NewOff
;
3369 Offset
= (Offset
- NewOff
) * Scale
;
3371 if (OutUseUnscaledOp
)
3372 *OutUseUnscaledOp
= useUnscaledOp
;
3374 *OutUnscaledOp
= UnscaledOp
;
3375 return AArch64FrameOffsetCanUpdate
|
3376 (Offset
== 0 ? AArch64FrameOffsetIsLegal
: 0);
3379 bool llvm::rewriteAArch64FrameIndex(MachineInstr
&MI
, unsigned FrameRegIdx
,
3380 unsigned FrameReg
, int &Offset
,
3381 const AArch64InstrInfo
*TII
) {
3382 unsigned Opcode
= MI
.getOpcode();
3383 unsigned ImmIdx
= FrameRegIdx
+ 1;
3385 if (Opcode
== AArch64::ADDSXri
|| Opcode
== AArch64::ADDXri
) {
3386 Offset
+= MI
.getOperand(ImmIdx
).getImm();
3387 emitFrameOffset(*MI
.getParent(), MI
, MI
.getDebugLoc(),
3388 MI
.getOperand(0).getReg(), FrameReg
, Offset
, TII
,
3389 MachineInstr::NoFlags
, (Opcode
== AArch64::ADDSXri
));
3390 MI
.eraseFromParent();
3396 unsigned UnscaledOp
;
3398 int Status
= isAArch64FrameOffsetLegal(MI
, Offset
, &UseUnscaledOp
,
3399 &UnscaledOp
, &NewOffset
);
3400 if (Status
& AArch64FrameOffsetCanUpdate
) {
3401 if (Status
& AArch64FrameOffsetIsLegal
)
3402 // Replace the FrameIndex with FrameReg.
3403 MI
.getOperand(FrameRegIdx
).ChangeToRegister(FrameReg
, false);
3405 MI
.setDesc(TII
->get(UnscaledOp
));
3407 MI
.getOperand(ImmIdx
).ChangeToImmediate(NewOffset
);
3414 void AArch64InstrInfo::getNoop(MCInst
&NopInst
) const {
3415 NopInst
.setOpcode(AArch64::HINT
);
3416 NopInst
.addOperand(MCOperand::createImm(0));
3419 // AArch64 supports MachineCombiner.
3420 bool AArch64InstrInfo::useMachineCombiner() const { return true; }
3422 // True when Opc sets flag
3423 static bool isCombineInstrSettingFlag(unsigned Opc
) {
3425 case AArch64::ADDSWrr
:
3426 case AArch64::ADDSWri
:
3427 case AArch64::ADDSXrr
:
3428 case AArch64::ADDSXri
:
3429 case AArch64::SUBSWrr
:
3430 case AArch64::SUBSXrr
:
3431 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
3432 case AArch64::SUBSWri
:
3433 case AArch64::SUBSXri
:
3441 // 32b Opcodes that can be combined with a MUL
3442 static bool isCombineInstrCandidate32(unsigned Opc
) {
3444 case AArch64::ADDWrr
:
3445 case AArch64::ADDWri
:
3446 case AArch64::SUBWrr
:
3447 case AArch64::ADDSWrr
:
3448 case AArch64::ADDSWri
:
3449 case AArch64::SUBSWrr
:
3450 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
3451 case AArch64::SUBWri
:
3452 case AArch64::SUBSWri
:
3460 // 64b Opcodes that can be combined with a MUL
3461 static bool isCombineInstrCandidate64(unsigned Opc
) {
3463 case AArch64::ADDXrr
:
3464 case AArch64::ADDXri
:
3465 case AArch64::SUBXrr
:
3466 case AArch64::ADDSXrr
:
3467 case AArch64::ADDSXri
:
3468 case AArch64::SUBSXrr
:
3469 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
3470 case AArch64::SUBXri
:
3471 case AArch64::SUBSXri
:
3479 // FP Opcodes that can be combined with a FMUL
3480 static bool isCombineInstrCandidateFP(const MachineInstr
&Inst
) {
3481 switch (Inst
.getOpcode()) {
3484 case AArch64::FADDSrr
:
3485 case AArch64::FADDDrr
:
3486 case AArch64::FADDv2f32
:
3487 case AArch64::FADDv2f64
:
3488 case AArch64::FADDv4f32
:
3489 case AArch64::FSUBSrr
:
3490 case AArch64::FSUBDrr
:
3491 case AArch64::FSUBv2f32
:
3492 case AArch64::FSUBv2f64
:
3493 case AArch64::FSUBv4f32
:
3494 TargetOptions Options
= Inst
.getParent()->getParent()->getTarget().Options
;
3495 return (Options
.UnsafeFPMath
||
3496 Options
.AllowFPOpFusion
== FPOpFusion::Fast
);
3501 // Opcodes that can be combined with a MUL
3502 static bool isCombineInstrCandidate(unsigned Opc
) {
3503 return (isCombineInstrCandidate32(Opc
) || isCombineInstrCandidate64(Opc
));
3507 // Utility routine that checks if \param MO is defined by an
3508 // \param CombineOpc instruction in the basic block \param MBB
3509 static bool canCombine(MachineBasicBlock
&MBB
, MachineOperand
&MO
,
3510 unsigned CombineOpc
, unsigned ZeroReg
= 0,
3511 bool CheckZeroReg
= false) {
3512 MachineRegisterInfo
&MRI
= MBB
.getParent()->getRegInfo();
3513 MachineInstr
*MI
= nullptr;
3515 if (MO
.isReg() && TargetRegisterInfo::isVirtualRegister(MO
.getReg()))
3516 MI
= MRI
.getUniqueVRegDef(MO
.getReg());
3517 // And it needs to be in the trace (otherwise, it won't have a depth).
3518 if (!MI
|| MI
->getParent() != &MBB
|| (unsigned)MI
->getOpcode() != CombineOpc
)
3520 // Must only used by the user we combine with.
3521 if (!MRI
.hasOneNonDBGUse(MI
->getOperand(0).getReg()))
3525 assert(MI
->getNumOperands() >= 4 && MI
->getOperand(0).isReg() &&
3526 MI
->getOperand(1).isReg() && MI
->getOperand(2).isReg() &&
3527 MI
->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs");
3528 // The third input reg must be zero.
3529 if (MI
->getOperand(3).getReg() != ZeroReg
)
3537 // Is \param MO defined by an integer multiply and can be combined?
3538 static bool canCombineWithMUL(MachineBasicBlock
&MBB
, MachineOperand
&MO
,
3539 unsigned MulOpc
, unsigned ZeroReg
) {
3540 return canCombine(MBB
, MO
, MulOpc
, ZeroReg
, true);
3544 // Is \param MO defined by a floating-point multiply and can be combined?
3545 static bool canCombineWithFMUL(MachineBasicBlock
&MBB
, MachineOperand
&MO
,
3547 return canCombine(MBB
, MO
, MulOpc
);
3550 // TODO: There are many more machine instruction opcodes to match:
3551 // 1. Other data types (integer, vectors)
3552 // 2. Other math / logic operations (xor, or)
3553 // 3. Other forms of the same operation (intrinsics and other variants)
3554 bool AArch64InstrInfo::isAssociativeAndCommutative(
3555 const MachineInstr
&Inst
) const {
3556 switch (Inst
.getOpcode()) {
3557 case AArch64::FADDDrr
:
3558 case AArch64::FADDSrr
:
3559 case AArch64::FADDv2f32
:
3560 case AArch64::FADDv2f64
:
3561 case AArch64::FADDv4f32
:
3562 case AArch64::FMULDrr
:
3563 case AArch64::FMULSrr
:
3564 case AArch64::FMULX32
:
3565 case AArch64::FMULX64
:
3566 case AArch64::FMULXv2f32
:
3567 case AArch64::FMULXv2f64
:
3568 case AArch64::FMULXv4f32
:
3569 case AArch64::FMULv2f32
:
3570 case AArch64::FMULv2f64
:
3571 case AArch64::FMULv4f32
:
3572 return Inst
.getParent()->getParent()->getTarget().Options
.UnsafeFPMath
;
3578 /// Find instructions that can be turned into madd.
3579 static bool getMaddPatterns(MachineInstr
&Root
,
3580 SmallVectorImpl
<MachineCombinerPattern
> &Patterns
) {
3581 unsigned Opc
= Root
.getOpcode();
3582 MachineBasicBlock
&MBB
= *Root
.getParent();
3585 if (!isCombineInstrCandidate(Opc
))
3587 if (isCombineInstrSettingFlag(Opc
)) {
3588 int Cmp_NZCV
= Root
.findRegisterDefOperandIdx(AArch64::NZCV
, true);
3589 // When NZCV is live bail out.
3592 unsigned NewOpc
= convertToNonFlagSettingOpc(Root
);
3593 // When opcode can't change bail out.
3594 // CHECKME: do we miss any cases for opcode conversion?
3603 case AArch64::ADDWrr
:
3604 assert(Root
.getOperand(1).isReg() && Root
.getOperand(2).isReg() &&
3605 "ADDWrr does not have register operands");
3606 if (canCombineWithMUL(MBB
, Root
.getOperand(1), AArch64::MADDWrrr
,
3608 Patterns
.push_back(MachineCombinerPattern::MULADDW_OP1
);
3611 if (canCombineWithMUL(MBB
, Root
.getOperand(2), AArch64::MADDWrrr
,
3613 Patterns
.push_back(MachineCombinerPattern::MULADDW_OP2
);
3617 case AArch64::ADDXrr
:
3618 if (canCombineWithMUL(MBB
, Root
.getOperand(1), AArch64::MADDXrrr
,
3620 Patterns
.push_back(MachineCombinerPattern::MULADDX_OP1
);
3623 if (canCombineWithMUL(MBB
, Root
.getOperand(2), AArch64::MADDXrrr
,
3625 Patterns
.push_back(MachineCombinerPattern::MULADDX_OP2
);
3629 case AArch64::SUBWrr
:
3630 if (canCombineWithMUL(MBB
, Root
.getOperand(1), AArch64::MADDWrrr
,
3632 Patterns
.push_back(MachineCombinerPattern::MULSUBW_OP1
);
3635 if (canCombineWithMUL(MBB
, Root
.getOperand(2), AArch64::MADDWrrr
,
3637 Patterns
.push_back(MachineCombinerPattern::MULSUBW_OP2
);
3641 case AArch64::SUBXrr
:
3642 if (canCombineWithMUL(MBB
, Root
.getOperand(1), AArch64::MADDXrrr
,
3644 Patterns
.push_back(MachineCombinerPattern::MULSUBX_OP1
);
3647 if (canCombineWithMUL(MBB
, Root
.getOperand(2), AArch64::MADDXrrr
,
3649 Patterns
.push_back(MachineCombinerPattern::MULSUBX_OP2
);
3653 case AArch64::ADDWri
:
3654 if (canCombineWithMUL(MBB
, Root
.getOperand(1), AArch64::MADDWrrr
,
3656 Patterns
.push_back(MachineCombinerPattern::MULADDWI_OP1
);
3660 case AArch64::ADDXri
:
3661 if (canCombineWithMUL(MBB
, Root
.getOperand(1), AArch64::MADDXrrr
,
3663 Patterns
.push_back(MachineCombinerPattern::MULADDXI_OP1
);
3667 case AArch64::SUBWri
:
3668 if (canCombineWithMUL(MBB
, Root
.getOperand(1), AArch64::MADDWrrr
,
3670 Patterns
.push_back(MachineCombinerPattern::MULSUBWI_OP1
);
3674 case AArch64::SUBXri
:
3675 if (canCombineWithMUL(MBB
, Root
.getOperand(1), AArch64::MADDXrrr
,
3677 Patterns
.push_back(MachineCombinerPattern::MULSUBXI_OP1
);
3684 /// Floating-Point Support
3686 /// Find instructions that can be turned into madd.
3687 static bool getFMAPatterns(MachineInstr
&Root
,
3688 SmallVectorImpl
<MachineCombinerPattern
> &Patterns
) {
3690 if (!isCombineInstrCandidateFP(Root
))
3693 MachineBasicBlock
&MBB
= *Root
.getParent();
3696 switch (Root
.getOpcode()) {
3698 assert(false && "Unsupported FP instruction in combiner\n");
3700 case AArch64::FADDSrr
:
3701 assert(Root
.getOperand(1).isReg() && Root
.getOperand(2).isReg() &&
3702 "FADDWrr does not have register operands");
3703 if (canCombineWithFMUL(MBB
, Root
.getOperand(1), AArch64::FMULSrr
)) {
3704 Patterns
.push_back(MachineCombinerPattern::FMULADDS_OP1
);
3706 } else if (canCombineWithFMUL(MBB
, Root
.getOperand(1),
3707 AArch64::FMULv1i32_indexed
)) {
3708 Patterns
.push_back(MachineCombinerPattern::FMLAv1i32_indexed_OP1
);
3711 if (canCombineWithFMUL(MBB
, Root
.getOperand(2), AArch64::FMULSrr
)) {
3712 Patterns
.push_back(MachineCombinerPattern::FMULADDS_OP2
);
3714 } else if (canCombineWithFMUL(MBB
, Root
.getOperand(2),
3715 AArch64::FMULv1i32_indexed
)) {
3716 Patterns
.push_back(MachineCombinerPattern::FMLAv1i32_indexed_OP2
);
3720 case AArch64::FADDDrr
:
3721 if (canCombineWithFMUL(MBB
, Root
.getOperand(1), AArch64::FMULDrr
)) {
3722 Patterns
.push_back(MachineCombinerPattern::FMULADDD_OP1
);
3724 } else if (canCombineWithFMUL(MBB
, Root
.getOperand(1),
3725 AArch64::FMULv1i64_indexed
)) {
3726 Patterns
.push_back(MachineCombinerPattern::FMLAv1i64_indexed_OP1
);
3729 if (canCombineWithFMUL(MBB
, Root
.getOperand(2), AArch64::FMULDrr
)) {
3730 Patterns
.push_back(MachineCombinerPattern::FMULADDD_OP2
);
3732 } else if (canCombineWithFMUL(MBB
, Root
.getOperand(2),
3733 AArch64::FMULv1i64_indexed
)) {
3734 Patterns
.push_back(MachineCombinerPattern::FMLAv1i64_indexed_OP2
);
3738 case AArch64::FADDv2f32
:
3739 if (canCombineWithFMUL(MBB
, Root
.getOperand(1),
3740 AArch64::FMULv2i32_indexed
)) {
3741 Patterns
.push_back(MachineCombinerPattern::FMLAv2i32_indexed_OP1
);
3743 } else if (canCombineWithFMUL(MBB
, Root
.getOperand(1),
3744 AArch64::FMULv2f32
)) {
3745 Patterns
.push_back(MachineCombinerPattern::FMLAv2f32_OP1
);
3748 if (canCombineWithFMUL(MBB
, Root
.getOperand(2),
3749 AArch64::FMULv2i32_indexed
)) {
3750 Patterns
.push_back(MachineCombinerPattern::FMLAv2i32_indexed_OP2
);
3752 } else if (canCombineWithFMUL(MBB
, Root
.getOperand(2),
3753 AArch64::FMULv2f32
)) {
3754 Patterns
.push_back(MachineCombinerPattern::FMLAv2f32_OP2
);
3758 case AArch64::FADDv2f64
:
3759 if (canCombineWithFMUL(MBB
, Root
.getOperand(1),
3760 AArch64::FMULv2i64_indexed
)) {
3761 Patterns
.push_back(MachineCombinerPattern::FMLAv2i64_indexed_OP1
);
3763 } else if (canCombineWithFMUL(MBB
, Root
.getOperand(1),
3764 AArch64::FMULv2f64
)) {
3765 Patterns
.push_back(MachineCombinerPattern::FMLAv2f64_OP1
);
3768 if (canCombineWithFMUL(MBB
, Root
.getOperand(2),
3769 AArch64::FMULv2i64_indexed
)) {
3770 Patterns
.push_back(MachineCombinerPattern::FMLAv2i64_indexed_OP2
);
3772 } else if (canCombineWithFMUL(MBB
, Root
.getOperand(2),
3773 AArch64::FMULv2f64
)) {
3774 Patterns
.push_back(MachineCombinerPattern::FMLAv2f64_OP2
);
3778 case AArch64::FADDv4f32
:
3779 if (canCombineWithFMUL(MBB
, Root
.getOperand(1),
3780 AArch64::FMULv4i32_indexed
)) {
3781 Patterns
.push_back(MachineCombinerPattern::FMLAv4i32_indexed_OP1
);
3783 } else if (canCombineWithFMUL(MBB
, Root
.getOperand(1),
3784 AArch64::FMULv4f32
)) {
3785 Patterns
.push_back(MachineCombinerPattern::FMLAv4f32_OP1
);
3788 if (canCombineWithFMUL(MBB
, Root
.getOperand(2),
3789 AArch64::FMULv4i32_indexed
)) {
3790 Patterns
.push_back(MachineCombinerPattern::FMLAv4i32_indexed_OP2
);
3792 } else if (canCombineWithFMUL(MBB
, Root
.getOperand(2),
3793 AArch64::FMULv4f32
)) {
3794 Patterns
.push_back(MachineCombinerPattern::FMLAv4f32_OP2
);
3799 case AArch64::FSUBSrr
:
3800 if (canCombineWithFMUL(MBB
, Root
.getOperand(1), AArch64::FMULSrr
)) {
3801 Patterns
.push_back(MachineCombinerPattern::FMULSUBS_OP1
);
3804 if (canCombineWithFMUL(MBB
, Root
.getOperand(2), AArch64::FMULSrr
)) {
3805 Patterns
.push_back(MachineCombinerPattern::FMULSUBS_OP2
);
3807 } else if (canCombineWithFMUL(MBB
, Root
.getOperand(2),
3808 AArch64::FMULv1i32_indexed
)) {
3809 Patterns
.push_back(MachineCombinerPattern::FMLSv1i32_indexed_OP2
);
3812 if (canCombineWithFMUL(MBB
, Root
.getOperand(1), AArch64::FNMULSrr
)) {
3813 Patterns
.push_back(MachineCombinerPattern::FNMULSUBS_OP1
);
3817 case AArch64::FSUBDrr
:
3818 if (canCombineWithFMUL(MBB
, Root
.getOperand(1), AArch64::FMULDrr
)) {
3819 Patterns
.push_back(MachineCombinerPattern::FMULSUBD_OP1
);
3822 if (canCombineWithFMUL(MBB
, Root
.getOperand(2), AArch64::FMULDrr
)) {
3823 Patterns
.push_back(MachineCombinerPattern::FMULSUBD_OP2
);
3825 } else if (canCombineWithFMUL(MBB
, Root
.getOperand(2),
3826 AArch64::FMULv1i64_indexed
)) {
3827 Patterns
.push_back(MachineCombinerPattern::FMLSv1i64_indexed_OP2
);
3830 if (canCombineWithFMUL(MBB
, Root
.getOperand(1), AArch64::FNMULDrr
)) {
3831 Patterns
.push_back(MachineCombinerPattern::FNMULSUBD_OP1
);
3835 case AArch64::FSUBv2f32
:
3836 if (canCombineWithFMUL(MBB
, Root
.getOperand(2),
3837 AArch64::FMULv2i32_indexed
)) {
3838 Patterns
.push_back(MachineCombinerPattern::FMLSv2i32_indexed_OP2
);
3840 } else if (canCombineWithFMUL(MBB
, Root
.getOperand(2),
3841 AArch64::FMULv2f32
)) {
3842 Patterns
.push_back(MachineCombinerPattern::FMLSv2f32_OP2
);
3845 if (canCombineWithFMUL(MBB
, Root
.getOperand(1),
3846 AArch64::FMULv2i32_indexed
)) {
3847 Patterns
.push_back(MachineCombinerPattern::FMLSv2i32_indexed_OP1
);
3849 } else if (canCombineWithFMUL(MBB
, Root
.getOperand(1),
3850 AArch64::FMULv2f32
)) {
3851 Patterns
.push_back(MachineCombinerPattern::FMLSv2f32_OP1
);
3855 case AArch64::FSUBv2f64
:
3856 if (canCombineWithFMUL(MBB
, Root
.getOperand(2),
3857 AArch64::FMULv2i64_indexed
)) {
3858 Patterns
.push_back(MachineCombinerPattern::FMLSv2i64_indexed_OP2
);
3860 } else if (canCombineWithFMUL(MBB
, Root
.getOperand(2),
3861 AArch64::FMULv2f64
)) {
3862 Patterns
.push_back(MachineCombinerPattern::FMLSv2f64_OP2
);
3865 if (canCombineWithFMUL(MBB
, Root
.getOperand(1),
3866 AArch64::FMULv2i64_indexed
)) {
3867 Patterns
.push_back(MachineCombinerPattern::FMLSv2i64_indexed_OP1
);
3869 } else if (canCombineWithFMUL(MBB
, Root
.getOperand(1),
3870 AArch64::FMULv2f64
)) {
3871 Patterns
.push_back(MachineCombinerPattern::FMLSv2f64_OP1
);
3875 case AArch64::FSUBv4f32
:
3876 if (canCombineWithFMUL(MBB
, Root
.getOperand(2),
3877 AArch64::FMULv4i32_indexed
)) {
3878 Patterns
.push_back(MachineCombinerPattern::FMLSv4i32_indexed_OP2
);
3880 } else if (canCombineWithFMUL(MBB
, Root
.getOperand(2),
3881 AArch64::FMULv4f32
)) {
3882 Patterns
.push_back(MachineCombinerPattern::FMLSv4f32_OP2
);
3885 if (canCombineWithFMUL(MBB
, Root
.getOperand(1),
3886 AArch64::FMULv4i32_indexed
)) {
3887 Patterns
.push_back(MachineCombinerPattern::FMLSv4i32_indexed_OP1
);
3889 } else if (canCombineWithFMUL(MBB
, Root
.getOperand(1),
3890 AArch64::FMULv4f32
)) {
3891 Patterns
.push_back(MachineCombinerPattern::FMLSv4f32_OP1
);
3899 /// Return true when a code sequence can improve throughput. It
3900 /// should be called only for instructions in loops.
3901 /// \param Pattern - combiner pattern
3902 bool AArch64InstrInfo::isThroughputPattern(
3903 MachineCombinerPattern Pattern
) const {
3907 case MachineCombinerPattern::FMULADDS_OP1
:
3908 case MachineCombinerPattern::FMULADDS_OP2
:
3909 case MachineCombinerPattern::FMULSUBS_OP1
:
3910 case MachineCombinerPattern::FMULSUBS_OP2
:
3911 case MachineCombinerPattern::FMULADDD_OP1
:
3912 case MachineCombinerPattern::FMULADDD_OP2
:
3913 case MachineCombinerPattern::FMULSUBD_OP1
:
3914 case MachineCombinerPattern::FMULSUBD_OP2
:
3915 case MachineCombinerPattern::FNMULSUBS_OP1
:
3916 case MachineCombinerPattern::FNMULSUBD_OP1
:
3917 case MachineCombinerPattern::FMLAv1i32_indexed_OP1
:
3918 case MachineCombinerPattern::FMLAv1i32_indexed_OP2
:
3919 case MachineCombinerPattern::FMLAv1i64_indexed_OP1
:
3920 case MachineCombinerPattern::FMLAv1i64_indexed_OP2
:
3921 case MachineCombinerPattern::FMLAv2f32_OP2
:
3922 case MachineCombinerPattern::FMLAv2f32_OP1
:
3923 case MachineCombinerPattern::FMLAv2f64_OP1
:
3924 case MachineCombinerPattern::FMLAv2f64_OP2
:
3925 case MachineCombinerPattern::FMLAv2i32_indexed_OP1
:
3926 case MachineCombinerPattern::FMLAv2i32_indexed_OP2
:
3927 case MachineCombinerPattern::FMLAv2i64_indexed_OP1
:
3928 case MachineCombinerPattern::FMLAv2i64_indexed_OP2
:
3929 case MachineCombinerPattern::FMLAv4f32_OP1
:
3930 case MachineCombinerPattern::FMLAv4f32_OP2
:
3931 case MachineCombinerPattern::FMLAv4i32_indexed_OP1
:
3932 case MachineCombinerPattern::FMLAv4i32_indexed_OP2
:
3933 case MachineCombinerPattern::FMLSv1i32_indexed_OP2
:
3934 case MachineCombinerPattern::FMLSv1i64_indexed_OP2
:
3935 case MachineCombinerPattern::FMLSv2i32_indexed_OP2
:
3936 case MachineCombinerPattern::FMLSv2i64_indexed_OP2
:
3937 case MachineCombinerPattern::FMLSv2f32_OP2
:
3938 case MachineCombinerPattern::FMLSv2f64_OP2
:
3939 case MachineCombinerPattern::FMLSv4i32_indexed_OP2
:
3940 case MachineCombinerPattern::FMLSv4f32_OP2
:
3942 } // end switch (Pattern)
3945 /// Return true when there is potentially a faster code sequence for an
3946 /// instruction chain ending in \p Root. All potential patterns are listed in
3947 /// the \p Pattern vector. Pattern should be sorted in priority order since the
3948 /// pattern evaluator stops checking as soon as it finds a faster sequence.
3950 bool AArch64InstrInfo::getMachineCombinerPatterns(
3952 SmallVectorImpl
<MachineCombinerPattern
> &Patterns
) const {
3954 if (getMaddPatterns(Root
, Patterns
))
3956 // Floating point patterns
3957 if (getFMAPatterns(Root
, Patterns
))
3960 return TargetInstrInfo::getMachineCombinerPatterns(Root
, Patterns
);
3963 enum class FMAInstKind
{ Default
, Indexed
, Accumulator
};
3964 /// genFusedMultiply - Generate fused multiply instructions.
3965 /// This function supports both integer and floating point instructions.
3966 /// A typical example:
3969 /// ==> F|MADD R,A,B,C
3970 /// \param MF Containing MachineFunction
3971 /// \param MRI Register information
3972 /// \param TII Target information
3973 /// \param Root is the F|ADD instruction
3974 /// \param [out] InsInstrs is a vector of machine instructions and will
3975 /// contain the generated madd instruction
3976 /// \param IdxMulOpd is index of operand in Root that is the result of
3977 /// the F|MUL. In the example above IdxMulOpd is 1.
3978 /// \param MaddOpc the opcode fo the f|madd instruction
3979 /// \param RC Register class of operands
3980 /// \param kind of fma instruction (addressing mode) to be generated
3981 /// \param ReplacedAddend is the result register from the instruction
3982 /// replacing the non-combined operand, if any.
3983 static MachineInstr
*
3984 genFusedMultiply(MachineFunction
&MF
, MachineRegisterInfo
&MRI
,
3985 const TargetInstrInfo
*TII
, MachineInstr
&Root
,
3986 SmallVectorImpl
<MachineInstr
*> &InsInstrs
, unsigned IdxMulOpd
,
3987 unsigned MaddOpc
, const TargetRegisterClass
*RC
,
3988 FMAInstKind kind
= FMAInstKind::Default
,
3989 const unsigned *ReplacedAddend
= nullptr) {
3990 assert(IdxMulOpd
== 1 || IdxMulOpd
== 2);
3992 unsigned IdxOtherOpd
= IdxMulOpd
== 1 ? 2 : 1;
3993 MachineInstr
*MUL
= MRI
.getUniqueVRegDef(Root
.getOperand(IdxMulOpd
).getReg());
3994 unsigned ResultReg
= Root
.getOperand(0).getReg();
3995 unsigned SrcReg0
= MUL
->getOperand(1).getReg();
3996 bool Src0IsKill
= MUL
->getOperand(1).isKill();
3997 unsigned SrcReg1
= MUL
->getOperand(2).getReg();
3998 bool Src1IsKill
= MUL
->getOperand(2).isKill();
4002 if (ReplacedAddend
) {
4003 // If we just generated a new addend, we must be it's only use.
4004 SrcReg2
= *ReplacedAddend
;
4007 SrcReg2
= Root
.getOperand(IdxOtherOpd
).getReg();
4008 Src2IsKill
= Root
.getOperand(IdxOtherOpd
).isKill();
4011 if (TargetRegisterInfo::isVirtualRegister(ResultReg
))
4012 MRI
.constrainRegClass(ResultReg
, RC
);
4013 if (TargetRegisterInfo::isVirtualRegister(SrcReg0
))
4014 MRI
.constrainRegClass(SrcReg0
, RC
);
4015 if (TargetRegisterInfo::isVirtualRegister(SrcReg1
))
4016 MRI
.constrainRegClass(SrcReg1
, RC
);
4017 if (TargetRegisterInfo::isVirtualRegister(SrcReg2
))
4018 MRI
.constrainRegClass(SrcReg2
, RC
);
4020 MachineInstrBuilder MIB
;
4021 if (kind
== FMAInstKind::Default
)
4022 MIB
= BuildMI(MF
, Root
.getDebugLoc(), TII
->get(MaddOpc
), ResultReg
)
4023 .addReg(SrcReg0
, getKillRegState(Src0IsKill
))
4024 .addReg(SrcReg1
, getKillRegState(Src1IsKill
))
4025 .addReg(SrcReg2
, getKillRegState(Src2IsKill
));
4026 else if (kind
== FMAInstKind::Indexed
)
4027 MIB
= BuildMI(MF
, Root
.getDebugLoc(), TII
->get(MaddOpc
), ResultReg
)
4028 .addReg(SrcReg2
, getKillRegState(Src2IsKill
))
4029 .addReg(SrcReg0
, getKillRegState(Src0IsKill
))
4030 .addReg(SrcReg1
, getKillRegState(Src1IsKill
))
4031 .addImm(MUL
->getOperand(3).getImm());
4032 else if (kind
== FMAInstKind::Accumulator
)
4033 MIB
= BuildMI(MF
, Root
.getDebugLoc(), TII
->get(MaddOpc
), ResultReg
)
4034 .addReg(SrcReg2
, getKillRegState(Src2IsKill
))
4035 .addReg(SrcReg0
, getKillRegState(Src0IsKill
))
4036 .addReg(SrcReg1
, getKillRegState(Src1IsKill
));
4038 assert(false && "Invalid FMA instruction kind \n");
4039 // Insert the MADD (MADD, FMA, FMS, FMLA, FMSL)
4040 InsInstrs
.push_back(MIB
);
4044 /// genMaddR - Generate madd instruction and combine mul and add using
4045 /// an extra virtual register
4046 /// Example - an ADD intermediate needs to be stored in a register:
4049 /// ==> ORR V, ZR, Imm
4050 /// ==> MADD R,A,B,V
4051 /// \param MF Containing MachineFunction
4052 /// \param MRI Register information
4053 /// \param TII Target information
4054 /// \param Root is the ADD instruction
4055 /// \param [out] InsInstrs is a vector of machine instructions and will
4056 /// contain the generated madd instruction
4057 /// \param IdxMulOpd is index of operand in Root that is the result of
4058 /// the MUL. In the example above IdxMulOpd is 1.
4059 /// \param MaddOpc the opcode fo the madd instruction
4060 /// \param VR is a virtual register that holds the value of an ADD operand
4061 /// (V in the example above).
4062 /// \param RC Register class of operands
4063 static MachineInstr
*genMaddR(MachineFunction
&MF
, MachineRegisterInfo
&MRI
,
4064 const TargetInstrInfo
*TII
, MachineInstr
&Root
,
4065 SmallVectorImpl
<MachineInstr
*> &InsInstrs
,
4066 unsigned IdxMulOpd
, unsigned MaddOpc
, unsigned VR
,
4067 const TargetRegisterClass
*RC
) {
4068 assert(IdxMulOpd
== 1 || IdxMulOpd
== 2);
4070 MachineInstr
*MUL
= MRI
.getUniqueVRegDef(Root
.getOperand(IdxMulOpd
).getReg());
4071 unsigned ResultReg
= Root
.getOperand(0).getReg();
4072 unsigned SrcReg0
= MUL
->getOperand(1).getReg();
4073 bool Src0IsKill
= MUL
->getOperand(1).isKill();
4074 unsigned SrcReg1
= MUL
->getOperand(2).getReg();
4075 bool Src1IsKill
= MUL
->getOperand(2).isKill();
4077 if (TargetRegisterInfo::isVirtualRegister(ResultReg
))
4078 MRI
.constrainRegClass(ResultReg
, RC
);
4079 if (TargetRegisterInfo::isVirtualRegister(SrcReg0
))
4080 MRI
.constrainRegClass(SrcReg0
, RC
);
4081 if (TargetRegisterInfo::isVirtualRegister(SrcReg1
))
4082 MRI
.constrainRegClass(SrcReg1
, RC
);
4083 if (TargetRegisterInfo::isVirtualRegister(VR
))
4084 MRI
.constrainRegClass(VR
, RC
);
4086 MachineInstrBuilder MIB
=
4087 BuildMI(MF
, Root
.getDebugLoc(), TII
->get(MaddOpc
), ResultReg
)
4088 .addReg(SrcReg0
, getKillRegState(Src0IsKill
))
4089 .addReg(SrcReg1
, getKillRegState(Src1IsKill
))
4092 InsInstrs
.push_back(MIB
);
4096 /// When getMachineCombinerPatterns() finds potential patterns,
4097 /// this function generates the instructions that could replace the
4098 /// original code sequence
4099 void AArch64InstrInfo::genAlternativeCodeSequence(
4100 MachineInstr
&Root
, MachineCombinerPattern Pattern
,
4101 SmallVectorImpl
<MachineInstr
*> &InsInstrs
,
4102 SmallVectorImpl
<MachineInstr
*> &DelInstrs
,
4103 DenseMap
<unsigned, unsigned> &InstrIdxForVirtReg
) const {
4104 MachineBasicBlock
&MBB
= *Root
.getParent();
4105 MachineRegisterInfo
&MRI
= MBB
.getParent()->getRegInfo();
4106 MachineFunction
&MF
= *MBB
.getParent();
4107 const TargetInstrInfo
*TII
= MF
.getSubtarget().getInstrInfo();
4110 const TargetRegisterClass
*RC
;
4114 // Reassociate instructions.
4115 TargetInstrInfo::genAlternativeCodeSequence(Root
, Pattern
, InsInstrs
,
4116 DelInstrs
, InstrIdxForVirtReg
);
4118 case MachineCombinerPattern::MULADDW_OP1
:
4119 case MachineCombinerPattern::MULADDX_OP1
:
4123 // --- Create(MADD);
4124 if (Pattern
== MachineCombinerPattern::MULADDW_OP1
) {
4125 Opc
= AArch64::MADDWrrr
;
4126 RC
= &AArch64::GPR32RegClass
;
4128 Opc
= AArch64::MADDXrrr
;
4129 RC
= &AArch64::GPR64RegClass
;
4131 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
);
4133 case MachineCombinerPattern::MULADDW_OP2
:
4134 case MachineCombinerPattern::MULADDX_OP2
:
4138 // --- Create(MADD);
4139 if (Pattern
== MachineCombinerPattern::MULADDW_OP2
) {
4140 Opc
= AArch64::MADDWrrr
;
4141 RC
= &AArch64::GPR32RegClass
;
4143 Opc
= AArch64::MADDXrrr
;
4144 RC
= &AArch64::GPR64RegClass
;
4146 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
);
4148 case MachineCombinerPattern::MULADDWI_OP1
:
4149 case MachineCombinerPattern::MULADDXI_OP1
: {
4152 // ==> ORR V, ZR, Imm
4154 // --- Create(MADD);
4155 const TargetRegisterClass
*OrrRC
;
4156 unsigned BitSize
, OrrOpc
, ZeroReg
;
4157 if (Pattern
== MachineCombinerPattern::MULADDWI_OP1
) {
4158 OrrOpc
= AArch64::ORRWri
;
4159 OrrRC
= &AArch64::GPR32spRegClass
;
4161 ZeroReg
= AArch64::WZR
;
4162 Opc
= AArch64::MADDWrrr
;
4163 RC
= &AArch64::GPR32RegClass
;
4165 OrrOpc
= AArch64::ORRXri
;
4166 OrrRC
= &AArch64::GPR64spRegClass
;
4168 ZeroReg
= AArch64::XZR
;
4169 Opc
= AArch64::MADDXrrr
;
4170 RC
= &AArch64::GPR64RegClass
;
4172 unsigned NewVR
= MRI
.createVirtualRegister(OrrRC
);
4173 uint64_t Imm
= Root
.getOperand(2).getImm();
4175 if (Root
.getOperand(3).isImm()) {
4176 unsigned Val
= Root
.getOperand(3).getImm();
4179 uint64_t UImm
= SignExtend64(Imm
, BitSize
);
4181 if (AArch64_AM::processLogicalImmediate(UImm
, BitSize
, Encoding
)) {
4182 MachineInstrBuilder MIB1
=
4183 BuildMI(MF
, Root
.getDebugLoc(), TII
->get(OrrOpc
), NewVR
)
4186 InsInstrs
.push_back(MIB1
);
4187 InstrIdxForVirtReg
.insert(std::make_pair(NewVR
, 0));
4188 MUL
= genMaddR(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, NewVR
, RC
);
4192 case MachineCombinerPattern::MULSUBW_OP1
:
4193 case MachineCombinerPattern::MULSUBX_OP1
: {
4197 // ==> MADD R,A,B,V // = -C + A*B
4198 // --- Create(MADD);
4199 const TargetRegisterClass
*SubRC
;
4200 unsigned SubOpc
, ZeroReg
;
4201 if (Pattern
== MachineCombinerPattern::MULSUBW_OP1
) {
4202 SubOpc
= AArch64::SUBWrr
;
4203 SubRC
= &AArch64::GPR32spRegClass
;
4204 ZeroReg
= AArch64::WZR
;
4205 Opc
= AArch64::MADDWrrr
;
4206 RC
= &AArch64::GPR32RegClass
;
4208 SubOpc
= AArch64::SUBXrr
;
4209 SubRC
= &AArch64::GPR64spRegClass
;
4210 ZeroReg
= AArch64::XZR
;
4211 Opc
= AArch64::MADDXrrr
;
4212 RC
= &AArch64::GPR64RegClass
;
4214 unsigned NewVR
= MRI
.createVirtualRegister(SubRC
);
4216 MachineInstrBuilder MIB1
=
4217 BuildMI(MF
, Root
.getDebugLoc(), TII
->get(SubOpc
), NewVR
)
4219 .add(Root
.getOperand(2));
4220 InsInstrs
.push_back(MIB1
);
4221 InstrIdxForVirtReg
.insert(std::make_pair(NewVR
, 0));
4222 MUL
= genMaddR(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, NewVR
, RC
);
4225 case MachineCombinerPattern::MULSUBW_OP2
:
4226 case MachineCombinerPattern::MULSUBX_OP2
:
4229 // ==> MSUB R,A,B,C (computes C - A*B)
4230 // --- Create(MSUB);
4231 if (Pattern
== MachineCombinerPattern::MULSUBW_OP2
) {
4232 Opc
= AArch64::MSUBWrrr
;
4233 RC
= &AArch64::GPR32RegClass
;
4235 Opc
= AArch64::MSUBXrrr
;
4236 RC
= &AArch64::GPR64RegClass
;
4238 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
);
4240 case MachineCombinerPattern::MULSUBWI_OP1
:
4241 case MachineCombinerPattern::MULSUBXI_OP1
: {
4244 // ==> ORR V, ZR, -Imm
4245 // ==> MADD R,A,B,V // = -Imm + A*B
4246 // --- Create(MADD);
4247 const TargetRegisterClass
*OrrRC
;
4248 unsigned BitSize
, OrrOpc
, ZeroReg
;
4249 if (Pattern
== MachineCombinerPattern::MULSUBWI_OP1
) {
4250 OrrOpc
= AArch64::ORRWri
;
4251 OrrRC
= &AArch64::GPR32spRegClass
;
4253 ZeroReg
= AArch64::WZR
;
4254 Opc
= AArch64::MADDWrrr
;
4255 RC
= &AArch64::GPR32RegClass
;
4257 OrrOpc
= AArch64::ORRXri
;
4258 OrrRC
= &AArch64::GPR64spRegClass
;
4260 ZeroReg
= AArch64::XZR
;
4261 Opc
= AArch64::MADDXrrr
;
4262 RC
= &AArch64::GPR64RegClass
;
4264 unsigned NewVR
= MRI
.createVirtualRegister(OrrRC
);
4265 uint64_t Imm
= Root
.getOperand(2).getImm();
4266 if (Root
.getOperand(3).isImm()) {
4267 unsigned Val
= Root
.getOperand(3).getImm();
4270 uint64_t UImm
= SignExtend64(-Imm
, BitSize
);
4272 if (AArch64_AM::processLogicalImmediate(UImm
, BitSize
, Encoding
)) {
4273 MachineInstrBuilder MIB1
=
4274 BuildMI(MF
, Root
.getDebugLoc(), TII
->get(OrrOpc
), NewVR
)
4277 InsInstrs
.push_back(MIB1
);
4278 InstrIdxForVirtReg
.insert(std::make_pair(NewVR
, 0));
4279 MUL
= genMaddR(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, NewVR
, RC
);
4283 // Floating Point Support
4284 case MachineCombinerPattern::FMULADDS_OP1
:
4285 case MachineCombinerPattern::FMULADDD_OP1
:
4289 // --- Create(MADD);
4290 if (Pattern
== MachineCombinerPattern::FMULADDS_OP1
) {
4291 Opc
= AArch64::FMADDSrrr
;
4292 RC
= &AArch64::FPR32RegClass
;
4294 Opc
= AArch64::FMADDDrrr
;
4295 RC
= &AArch64::FPR64RegClass
;
4297 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
);
4299 case MachineCombinerPattern::FMULADDS_OP2
:
4300 case MachineCombinerPattern::FMULADDD_OP2
:
4303 // ==> FMADD R,A,B,C
4304 // --- Create(FMADD);
4305 if (Pattern
== MachineCombinerPattern::FMULADDS_OP2
) {
4306 Opc
= AArch64::FMADDSrrr
;
4307 RC
= &AArch64::FPR32RegClass
;
4309 Opc
= AArch64::FMADDDrrr
;
4310 RC
= &AArch64::FPR64RegClass
;
4312 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
);
4315 case MachineCombinerPattern::FMLAv1i32_indexed_OP1
:
4316 Opc
= AArch64::FMLAv1i32_indexed
;
4317 RC
= &AArch64::FPR32RegClass
;
4318 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
,
4319 FMAInstKind::Indexed
);
4321 case MachineCombinerPattern::FMLAv1i32_indexed_OP2
:
4322 Opc
= AArch64::FMLAv1i32_indexed
;
4323 RC
= &AArch64::FPR32RegClass
;
4324 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
,
4325 FMAInstKind::Indexed
);
4328 case MachineCombinerPattern::FMLAv1i64_indexed_OP1
:
4329 Opc
= AArch64::FMLAv1i64_indexed
;
4330 RC
= &AArch64::FPR64RegClass
;
4331 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
,
4332 FMAInstKind::Indexed
);
4334 case MachineCombinerPattern::FMLAv1i64_indexed_OP2
:
4335 Opc
= AArch64::FMLAv1i64_indexed
;
4336 RC
= &AArch64::FPR64RegClass
;
4337 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
,
4338 FMAInstKind::Indexed
);
4341 case MachineCombinerPattern::FMLAv2i32_indexed_OP1
:
4342 case MachineCombinerPattern::FMLAv2f32_OP1
:
4343 RC
= &AArch64::FPR64RegClass
;
4344 if (Pattern
== MachineCombinerPattern::FMLAv2i32_indexed_OP1
) {
4345 Opc
= AArch64::FMLAv2i32_indexed
;
4346 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
,
4347 FMAInstKind::Indexed
);
4349 Opc
= AArch64::FMLAv2f32
;
4350 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
,
4351 FMAInstKind::Accumulator
);
4354 case MachineCombinerPattern::FMLAv2i32_indexed_OP2
:
4355 case MachineCombinerPattern::FMLAv2f32_OP2
:
4356 RC
= &AArch64::FPR64RegClass
;
4357 if (Pattern
== MachineCombinerPattern::FMLAv2i32_indexed_OP2
) {
4358 Opc
= AArch64::FMLAv2i32_indexed
;
4359 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
,
4360 FMAInstKind::Indexed
);
4362 Opc
= AArch64::FMLAv2f32
;
4363 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
,
4364 FMAInstKind::Accumulator
);
4368 case MachineCombinerPattern::FMLAv2i64_indexed_OP1
:
4369 case MachineCombinerPattern::FMLAv2f64_OP1
:
4370 RC
= &AArch64::FPR128RegClass
;
4371 if (Pattern
== MachineCombinerPattern::FMLAv2i64_indexed_OP1
) {
4372 Opc
= AArch64::FMLAv2i64_indexed
;
4373 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
,
4374 FMAInstKind::Indexed
);
4376 Opc
= AArch64::FMLAv2f64
;
4377 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
,
4378 FMAInstKind::Accumulator
);
4381 case MachineCombinerPattern::FMLAv2i64_indexed_OP2
:
4382 case MachineCombinerPattern::FMLAv2f64_OP2
:
4383 RC
= &AArch64::FPR128RegClass
;
4384 if (Pattern
== MachineCombinerPattern::FMLAv2i64_indexed_OP2
) {
4385 Opc
= AArch64::FMLAv2i64_indexed
;
4386 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
,
4387 FMAInstKind::Indexed
);
4389 Opc
= AArch64::FMLAv2f64
;
4390 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
,
4391 FMAInstKind::Accumulator
);
4395 case MachineCombinerPattern::FMLAv4i32_indexed_OP1
:
4396 case MachineCombinerPattern::FMLAv4f32_OP1
:
4397 RC
= &AArch64::FPR128RegClass
;
4398 if (Pattern
== MachineCombinerPattern::FMLAv4i32_indexed_OP1
) {
4399 Opc
= AArch64::FMLAv4i32_indexed
;
4400 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
,
4401 FMAInstKind::Indexed
);
4403 Opc
= AArch64::FMLAv4f32
;
4404 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
,
4405 FMAInstKind::Accumulator
);
4409 case MachineCombinerPattern::FMLAv4i32_indexed_OP2
:
4410 case MachineCombinerPattern::FMLAv4f32_OP2
:
4411 RC
= &AArch64::FPR128RegClass
;
4412 if (Pattern
== MachineCombinerPattern::FMLAv4i32_indexed_OP2
) {
4413 Opc
= AArch64::FMLAv4i32_indexed
;
4414 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
,
4415 FMAInstKind::Indexed
);
4417 Opc
= AArch64::FMLAv4f32
;
4418 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
,
4419 FMAInstKind::Accumulator
);
4423 case MachineCombinerPattern::FMULSUBS_OP1
:
4424 case MachineCombinerPattern::FMULSUBD_OP1
: {
4427 // ==> FNMSUB R,A,B,C // = -C + A*B
4428 // --- Create(FNMSUB);
4429 if (Pattern
== MachineCombinerPattern::FMULSUBS_OP1
) {
4430 Opc
= AArch64::FNMSUBSrrr
;
4431 RC
= &AArch64::FPR32RegClass
;
4433 Opc
= AArch64::FNMSUBDrrr
;
4434 RC
= &AArch64::FPR64RegClass
;
4436 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
);
4440 case MachineCombinerPattern::FNMULSUBS_OP1
:
4441 case MachineCombinerPattern::FNMULSUBD_OP1
: {
4444 // ==> FNMADD R,A,B,C // = -A*B - C
4445 // --- Create(FNMADD);
4446 if (Pattern
== MachineCombinerPattern::FNMULSUBS_OP1
) {
4447 Opc
= AArch64::FNMADDSrrr
;
4448 RC
= &AArch64::FPR32RegClass
;
4450 Opc
= AArch64::FNMADDDrrr
;
4451 RC
= &AArch64::FPR64RegClass
;
4453 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
);
4457 case MachineCombinerPattern::FMULSUBS_OP2
:
4458 case MachineCombinerPattern::FMULSUBD_OP2
: {
4461 // ==> FMSUB R,A,B,C (computes C - A*B)
4462 // --- Create(FMSUB);
4463 if (Pattern
== MachineCombinerPattern::FMULSUBS_OP2
) {
4464 Opc
= AArch64::FMSUBSrrr
;
4465 RC
= &AArch64::FPR32RegClass
;
4467 Opc
= AArch64::FMSUBDrrr
;
4468 RC
= &AArch64::FPR64RegClass
;
4470 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
);
4474 case MachineCombinerPattern::FMLSv1i32_indexed_OP2
:
4475 Opc
= AArch64::FMLSv1i32_indexed
;
4476 RC
= &AArch64::FPR32RegClass
;
4477 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
,
4478 FMAInstKind::Indexed
);
4481 case MachineCombinerPattern::FMLSv1i64_indexed_OP2
:
4482 Opc
= AArch64::FMLSv1i64_indexed
;
4483 RC
= &AArch64::FPR64RegClass
;
4484 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
,
4485 FMAInstKind::Indexed
);
4488 case MachineCombinerPattern::FMLSv2f32_OP2
:
4489 case MachineCombinerPattern::FMLSv2i32_indexed_OP2
:
4490 RC
= &AArch64::FPR64RegClass
;
4491 if (Pattern
== MachineCombinerPattern::FMLSv2i32_indexed_OP2
) {
4492 Opc
= AArch64::FMLSv2i32_indexed
;
4493 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
,
4494 FMAInstKind::Indexed
);
4496 Opc
= AArch64::FMLSv2f32
;
4497 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
,
4498 FMAInstKind::Accumulator
);
4502 case MachineCombinerPattern::FMLSv2f64_OP2
:
4503 case MachineCombinerPattern::FMLSv2i64_indexed_OP2
:
4504 RC
= &AArch64::FPR128RegClass
;
4505 if (Pattern
== MachineCombinerPattern::FMLSv2i64_indexed_OP2
) {
4506 Opc
= AArch64::FMLSv2i64_indexed
;
4507 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
,
4508 FMAInstKind::Indexed
);
4510 Opc
= AArch64::FMLSv2f64
;
4511 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
,
4512 FMAInstKind::Accumulator
);
4516 case MachineCombinerPattern::FMLSv4f32_OP2
:
4517 case MachineCombinerPattern::FMLSv4i32_indexed_OP2
:
4518 RC
= &AArch64::FPR128RegClass
;
4519 if (Pattern
== MachineCombinerPattern::FMLSv4i32_indexed_OP2
) {
4520 Opc
= AArch64::FMLSv4i32_indexed
;
4521 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
,
4522 FMAInstKind::Indexed
);
4524 Opc
= AArch64::FMLSv4f32
;
4525 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
,
4526 FMAInstKind::Accumulator
);
4529 case MachineCombinerPattern::FMLSv2f32_OP1
:
4530 case MachineCombinerPattern::FMLSv2i32_indexed_OP1
: {
4531 RC
= &AArch64::FPR64RegClass
;
4532 unsigned NewVR
= MRI
.createVirtualRegister(RC
);
4533 MachineInstrBuilder MIB1
=
4534 BuildMI(MF
, Root
.getDebugLoc(), TII
->get(AArch64::FNEGv2f32
), NewVR
)
4535 .add(Root
.getOperand(2));
4536 InsInstrs
.push_back(MIB1
);
4537 InstrIdxForVirtReg
.insert(std::make_pair(NewVR
, 0));
4538 if (Pattern
== MachineCombinerPattern::FMLSv2i32_indexed_OP1
) {
4539 Opc
= AArch64::FMLAv2i32_indexed
;
4540 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
,
4541 FMAInstKind::Indexed
, &NewVR
);
4543 Opc
= AArch64::FMLAv2f32
;
4544 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
,
4545 FMAInstKind::Accumulator
, &NewVR
);
4549 case MachineCombinerPattern::FMLSv4f32_OP1
:
4550 case MachineCombinerPattern::FMLSv4i32_indexed_OP1
: {
4551 RC
= &AArch64::FPR128RegClass
;
4552 unsigned NewVR
= MRI
.createVirtualRegister(RC
);
4553 MachineInstrBuilder MIB1
=
4554 BuildMI(MF
, Root
.getDebugLoc(), TII
->get(AArch64::FNEGv4f32
), NewVR
)
4555 .add(Root
.getOperand(2));
4556 InsInstrs
.push_back(MIB1
);
4557 InstrIdxForVirtReg
.insert(std::make_pair(NewVR
, 0));
4558 if (Pattern
== MachineCombinerPattern::FMLSv4i32_indexed_OP1
) {
4559 Opc
= AArch64::FMLAv4i32_indexed
;
4560 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
,
4561 FMAInstKind::Indexed
, &NewVR
);
4563 Opc
= AArch64::FMLAv4f32
;
4564 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
,
4565 FMAInstKind::Accumulator
, &NewVR
);
4569 case MachineCombinerPattern::FMLSv2f64_OP1
:
4570 case MachineCombinerPattern::FMLSv2i64_indexed_OP1
: {
4571 RC
= &AArch64::FPR128RegClass
;
4572 unsigned NewVR
= MRI
.createVirtualRegister(RC
);
4573 MachineInstrBuilder MIB1
=
4574 BuildMI(MF
, Root
.getDebugLoc(), TII
->get(AArch64::FNEGv2f64
), NewVR
)
4575 .add(Root
.getOperand(2));
4576 InsInstrs
.push_back(MIB1
);
4577 InstrIdxForVirtReg
.insert(std::make_pair(NewVR
, 0));
4578 if (Pattern
== MachineCombinerPattern::FMLSv2i64_indexed_OP1
) {
4579 Opc
= AArch64::FMLAv2i64_indexed
;
4580 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
,
4581 FMAInstKind::Indexed
, &NewVR
);
4583 Opc
= AArch64::FMLAv2f64
;
4584 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
,
4585 FMAInstKind::Accumulator
, &NewVR
);
4589 } // end switch (Pattern)
4590 // Record MUL and ADD/SUB for deletion
4591 DelInstrs
.push_back(MUL
);
4592 DelInstrs
.push_back(&Root
);
4595 /// Replace csincr-branch sequence by simple conditional branch
4599 /// csinc w9, wzr, wzr, <condition code>
4600 /// tbnz w9, #0, 0x44
4604 /// b.<inverted condition code>
4608 /// csinc w9, wzr, wzr, <condition code>
4609 /// tbz w9, #0, 0x44
4613 /// b.<condition code>
4616 /// Replace compare and branch sequence by TBZ/TBNZ instruction when the
4617 /// compare's constant operand is power of 2.
4621 /// and w8, w8, #0x400
4626 /// tbnz w8, #10, L1
4629 /// \param MI Conditional Branch
4630 /// \return True when the simple conditional branch is generated
4632 bool AArch64InstrInfo::optimizeCondBranch(MachineInstr
&MI
) const {
4633 bool IsNegativeBranch
= false;
4634 bool IsTestAndBranch
= false;
4635 unsigned TargetBBInMI
= 0;
4636 switch (MI
.getOpcode()) {
4638 llvm_unreachable("Unknown branch instruction?");
4645 case AArch64::CBNZW
:
4646 case AArch64::CBNZX
:
4648 IsNegativeBranch
= true;
4653 IsTestAndBranch
= true;
4655 case AArch64::TBNZW
:
4656 case AArch64::TBNZX
:
4658 IsNegativeBranch
= true;
4659 IsTestAndBranch
= true;
4662 // So we increment a zero register and test for bits other
4663 // than bit 0? Conservatively bail out in case the verifier
4664 // missed this case.
4665 if (IsTestAndBranch
&& MI
.getOperand(1).getImm())
4669 assert(MI
.getParent() && "Incomplete machine instruciton\n");
4670 MachineBasicBlock
*MBB
= MI
.getParent();
4671 MachineFunction
*MF
= MBB
->getParent();
4672 MachineRegisterInfo
*MRI
= &MF
->getRegInfo();
4673 unsigned VReg
= MI
.getOperand(0).getReg();
4674 if (!TargetRegisterInfo::isVirtualRegister(VReg
))
4677 MachineInstr
*DefMI
= MRI
->getVRegDef(VReg
);
4679 // Look through COPY instructions to find definition.
4680 while (DefMI
->isCopy()) {
4681 unsigned CopyVReg
= DefMI
->getOperand(1).getReg();
4682 if (!MRI
->hasOneNonDBGUse(CopyVReg
))
4684 if (!MRI
->hasOneDef(CopyVReg
))
4686 DefMI
= MRI
->getVRegDef(CopyVReg
);
4689 switch (DefMI
->getOpcode()) {
4692 // Fold AND into a TBZ/TBNZ if constant operand is power of 2.
4693 case AArch64::ANDWri
:
4694 case AArch64::ANDXri
: {
4695 if (IsTestAndBranch
)
4697 if (DefMI
->getParent() != MBB
)
4699 if (!MRI
->hasOneNonDBGUse(VReg
))
4702 bool Is32Bit
= (DefMI
->getOpcode() == AArch64::ANDWri
);
4703 uint64_t Mask
= AArch64_AM::decodeLogicalImmediate(
4704 DefMI
->getOperand(2).getImm(), Is32Bit
? 32 : 64);
4705 if (!isPowerOf2_64(Mask
))
4708 MachineOperand
&MO
= DefMI
->getOperand(1);
4709 unsigned NewReg
= MO
.getReg();
4710 if (!TargetRegisterInfo::isVirtualRegister(NewReg
))
4713 assert(!MRI
->def_empty(NewReg
) && "Register must be defined.");
4715 MachineBasicBlock
&RefToMBB
= *MBB
;
4716 MachineBasicBlock
*TBB
= MI
.getOperand(1).getMBB();
4717 DebugLoc DL
= MI
.getDebugLoc();
4718 unsigned Imm
= Log2_64(Mask
);
4719 unsigned Opc
= (Imm
< 32)
4720 ? (IsNegativeBranch
? AArch64::TBNZW
: AArch64::TBZW
)
4721 : (IsNegativeBranch
? AArch64::TBNZX
: AArch64::TBZX
);
4722 MachineInstr
*NewMI
= BuildMI(RefToMBB
, MI
, DL
, get(Opc
))
4726 // Register lives on to the CBZ now.
4727 MO
.setIsKill(false);
4729 // For immediate smaller than 32, we need to use the 32-bit
4730 // variant (W) in all cases. Indeed the 64-bit variant does not
4731 // allow to encode them.
4732 // Therefore, if the input register is 64-bit, we need to take the
4734 if (!Is32Bit
&& Imm
< 32)
4735 NewMI
->getOperand(0).setSubReg(AArch64::sub_32
);
4736 MI
.eraseFromParent();
4740 case AArch64::CSINCWr
:
4741 case AArch64::CSINCXr
: {
4742 if (!(DefMI
->getOperand(1).getReg() == AArch64::WZR
&&
4743 DefMI
->getOperand(2).getReg() == AArch64::WZR
) &&
4744 !(DefMI
->getOperand(1).getReg() == AArch64::XZR
&&
4745 DefMI
->getOperand(2).getReg() == AArch64::XZR
))
4748 if (DefMI
->findRegisterDefOperandIdx(AArch64::NZCV
, true) != -1)
4751 AArch64CC::CondCode CC
= (AArch64CC::CondCode
)DefMI
->getOperand(3).getImm();
4752 // Convert only when the condition code is not modified between
4753 // the CSINC and the branch. The CC may be used by other
4754 // instructions in between.
4755 if (areCFlagsAccessedBetweenInstrs(DefMI
, MI
, &getRegisterInfo(), AK_Write
))
4757 MachineBasicBlock
&RefToMBB
= *MBB
;
4758 MachineBasicBlock
*TBB
= MI
.getOperand(TargetBBInMI
).getMBB();
4759 DebugLoc DL
= MI
.getDebugLoc();
4760 if (IsNegativeBranch
)
4761 CC
= AArch64CC::getInvertedCondCode(CC
);
4762 BuildMI(RefToMBB
, MI
, DL
, get(AArch64::Bcc
)).addImm(CC
).addMBB(TBB
);
4763 MI
.eraseFromParent();
4769 std::pair
<unsigned, unsigned>
4770 AArch64InstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF
) const {
4771 const unsigned Mask
= AArch64II::MO_FRAGMENT
;
4772 return std::make_pair(TF
& Mask
, TF
& ~Mask
);
4775 ArrayRef
<std::pair
<unsigned, const char *>>
4776 AArch64InstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
4777 using namespace AArch64II
;
4779 static const std::pair
<unsigned, const char *> TargetFlags
[] = {
4780 {MO_PAGE
, "aarch64-page"}, {MO_PAGEOFF
, "aarch64-pageoff"},
4781 {MO_G3
, "aarch64-g3"}, {MO_G2
, "aarch64-g2"},
4782 {MO_G1
, "aarch64-g1"}, {MO_G0
, "aarch64-g0"},
4783 {MO_HI12
, "aarch64-hi12"}};
4784 return makeArrayRef(TargetFlags
);
4787 ArrayRef
<std::pair
<unsigned, const char *>>
4788 AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
4789 using namespace AArch64II
;
4791 static const std::pair
<unsigned, const char *> TargetFlags
[] = {
4792 {MO_COFFSTUB
, "aarch64-coffstub"},
4793 {MO_GOT
, "aarch64-got"}, {MO_NC
, "aarch64-nc"},
4794 {MO_S
, "aarch64-s"}, {MO_TLS
, "aarch64-tls"},
4795 {MO_DLLIMPORT
, "aarch64-dllimport"}};
4796 return makeArrayRef(TargetFlags
);
4799 ArrayRef
<std::pair
<MachineMemOperand::Flags
, const char *>>
4800 AArch64InstrInfo::getSerializableMachineMemOperandTargetFlags() const {
4801 static const std::pair
<MachineMemOperand::Flags
, const char *> TargetFlags
[] =
4802 {{MOSuppressPair
, "aarch64-suppress-pair"},
4803 {MOStridedAccess
, "aarch64-strided-access"}};
4804 return makeArrayRef(TargetFlags
);
4807 /// Constants defining how certain sequences should be outlined.
4808 /// This encompasses how an outlined function should be called, and what kind of
4809 /// frame should be emitted for that outlined function.
4811 /// \p MachineOutlinerDefault implies that the function should be called with
4812 /// a save and restore of LR to the stack.
4816 /// I1 Save LR OUTLINED_FUNCTION:
4817 /// I2 --> BL OUTLINED_FUNCTION I1
4818 /// I3 Restore LR I2
4822 /// * Call construction overhead: 3 (save + BL + restore)
4823 /// * Frame construction overhead: 1 (ret)
4824 /// * Requires stack fixups? Yes
4826 /// \p MachineOutlinerTailCall implies that the function is being created from
4827 /// a sequence of instructions ending in a return.
4831 /// I1 OUTLINED_FUNCTION:
4832 /// I2 --> B OUTLINED_FUNCTION I1
4836 /// * Call construction overhead: 1 (B)
4837 /// * Frame construction overhead: 0 (Return included in sequence)
4838 /// * Requires stack fixups? No
4840 /// \p MachineOutlinerNoLRSave implies that the function should be called using
4841 /// a BL instruction, but doesn't require LR to be saved and restored. This
4842 /// happens when LR is known to be dead.
4846 /// I1 OUTLINED_FUNCTION:
4847 /// I2 --> BL OUTLINED_FUNCTION I1
4852 /// * Call construction overhead: 1 (BL)
4853 /// * Frame construction overhead: 1 (RET)
4854 /// * Requires stack fixups? No
4856 /// \p MachineOutlinerThunk implies that the function is being created from
4857 /// a sequence of instructions ending in a call. The outlined function is
4858 /// called with a BL instruction, and the outlined function tail-calls the
4859 /// original call destination.
4863 /// I1 OUTLINED_FUNCTION:
4864 /// I2 --> BL OUTLINED_FUNCTION I1
4867 /// * Call construction overhead: 1 (BL)
4868 /// * Frame construction overhead: 0
4869 /// * Requires stack fixups? No
4871 /// \p MachineOutlinerRegSave implies that the function should be called with a
4872 /// save and restore of LR to an available register. This allows us to avoid
4873 /// stack fixups. Note that this outlining variant is compatible with the
4878 /// I1 Save LR OUTLINED_FUNCTION:
4879 /// I2 --> BL OUTLINED_FUNCTION I1
4880 /// I3 Restore LR I2
4884 /// * Call construction overhead: 3 (save + BL + restore)
4885 /// * Frame construction overhead: 1 (ret)
4886 /// * Requires stack fixups? No
4887 enum MachineOutlinerClass
{
4888 MachineOutlinerDefault
, /// Emit a save, restore, call, and return.
4889 MachineOutlinerTailCall
, /// Only emit a branch.
4890 MachineOutlinerNoLRSave
, /// Emit a call and return.
4891 MachineOutlinerThunk
, /// Emit a call and tail-call.
4892 MachineOutlinerRegSave
/// Same as default, but save to a register.
4895 enum MachineOutlinerMBBFlags
{
4896 LRUnavailableSomewhere
= 0x2,
4898 UnsafeRegsDead
= 0x8
4902 AArch64InstrInfo::findRegisterToSaveLRTo(const outliner::Candidate
&C
) const {
4903 assert(C
.LRUWasSet
&& "LRU wasn't set?");
4904 MachineFunction
*MF
= C
.getMF();
4905 const AArch64RegisterInfo
*ARI
= static_cast<const AArch64RegisterInfo
*>(
4906 MF
->getSubtarget().getRegisterInfo());
4908 // Check if there is an available register across the sequence that we can
4910 for (unsigned Reg
: AArch64::GPR64RegClass
) {
4911 if (!ARI
->isReservedReg(*MF
, Reg
) &&
4912 Reg
!= AArch64::LR
&& // LR is not reserved, but don't use it.
4913 Reg
!= AArch64::X16
&& // X16 is not guaranteed to be preserved.
4914 Reg
!= AArch64::X17
&& // Ditto for X17.
4915 C
.LRU
.available(Reg
) && C
.UsedInSequence
.available(Reg
))
4919 // No suitable register. Return 0.
4923 outliner::OutlinedFunction
4924 AArch64InstrInfo::getOutliningCandidateInfo(
4925 std::vector
<outliner::Candidate
> &RepeatedSequenceLocs
) const {
4926 outliner::Candidate
&FirstCand
= RepeatedSequenceLocs
[0];
4927 unsigned SequenceSize
=
4928 std::accumulate(FirstCand
.front(), std::next(FirstCand
.back()), 0,
4929 [this](unsigned Sum
, const MachineInstr
&MI
) {
4930 return Sum
+ getInstSizeInBytes(MI
);
4933 // Properties about candidate MBBs that hold for all of them.
4934 unsigned FlagsSetInAll
= 0xF;
4936 // Compute liveness information for each candidate, and set FlagsSetInAll.
4937 const TargetRegisterInfo
&TRI
= getRegisterInfo();
4938 std::for_each(RepeatedSequenceLocs
.begin(), RepeatedSequenceLocs
.end(),
4939 [&FlagsSetInAll
](outliner::Candidate
&C
) {
4940 FlagsSetInAll
&= C
.Flags
;
4943 // According to the AArch64 Procedure Call Standard, the following are
4944 // undefined on entry/exit from a function call:
4946 // * Registers x16, x17, (and thus w16, w17)
4947 // * Condition codes (and thus the NZCV register)
4949 // Because if this, we can't outline any sequence of instructions where
4951 // of these registers is live into/across it. Thus, we need to delete
4954 auto CantGuaranteeValueAcrossCall
= [&TRI
](outliner::Candidate
&C
) {
4955 // If the unsafe registers in this block are all dead, then we don't need
4956 // to compute liveness here.
4957 if (C
.Flags
& UnsafeRegsDead
)
4960 LiveRegUnits LRU
= C
.LRU
;
4961 return (!LRU
.available(AArch64::W16
) || !LRU
.available(AArch64::W17
) ||
4962 !LRU
.available(AArch64::NZCV
));
4965 // Are there any candidates where those registers are live?
4966 if (!(FlagsSetInAll
& UnsafeRegsDead
)) {
4967 // Erase every candidate that violates the restrictions above. (It could be
4968 // true that we have viable candidates, so it's not worth bailing out in
4969 // the case that, say, 1 out of 20 candidates violate the restructions.)
4970 RepeatedSequenceLocs
.erase(std::remove_if(RepeatedSequenceLocs
.begin(),
4971 RepeatedSequenceLocs
.end(),
4972 CantGuaranteeValueAcrossCall
),
4973 RepeatedSequenceLocs
.end());
4975 // If the sequence doesn't have enough candidates left, then we're done.
4976 if (RepeatedSequenceLocs
.size() < 2)
4977 return outliner::OutlinedFunction();
4980 // At this point, we have only "safe" candidates to outline. Figure out
4981 // frame + call instruction information.
4983 unsigned LastInstrOpcode
= RepeatedSequenceLocs
[0].back()->getOpcode();
4985 // Helper lambda which sets call information for every candidate.
4986 auto SetCandidateCallInfo
=
4987 [&RepeatedSequenceLocs
](unsigned CallID
, unsigned NumBytesForCall
) {
4988 for (outliner::Candidate
&C
: RepeatedSequenceLocs
)
4989 C
.setCallInfo(CallID
, NumBytesForCall
);
4992 unsigned FrameID
= MachineOutlinerDefault
;
4993 unsigned NumBytesToCreateFrame
= 4;
4995 bool HasBTI
= any_of(RepeatedSequenceLocs
, [](outliner::Candidate
&C
) {
4996 return C
.getMF()->getFunction().hasFnAttribute("branch-target-enforcement");
4999 // Returns true if an instructions is safe to fix up, false otherwise.
5000 auto IsSafeToFixup
= [this, &TRI
](MachineInstr
&MI
) {
5004 if (!MI
.modifiesRegister(AArch64::SP
, &TRI
) &&
5005 !MI
.readsRegister(AArch64::SP
, &TRI
))
5008 // Any modification of SP will break our code to save/restore LR.
5009 // FIXME: We could handle some instructions which add a constant
5010 // offset to SP, with a bit more work.
5011 if (MI
.modifiesRegister(AArch64::SP
, &TRI
))
5014 // At this point, we have a stack instruction that we might need to
5015 // fix up. We'll handle it if it's a load or store.
5016 if (MI
.mayLoadOrStore()) {
5017 MachineOperand
*Base
; // Filled with the base operand of MI.
5018 int64_t Offset
; // Filled with the offset of MI.
5020 // Does it allow us to offset the base operand and is the base the
5022 if (!getMemOperandWithOffset(MI
, Base
, Offset
, &TRI
) || !Base
->isReg() ||
5023 Base
->getReg() != AArch64::SP
)
5026 // Find the minimum/maximum offset for this instruction and check
5027 // if fixing it up would be in range.
5029 MaxOffset
; // Unscaled offsets for the instruction.
5030 unsigned Scale
; // The scale to multiply the offsets by.
5031 unsigned DummyWidth
;
5032 getMemOpInfo(MI
.getOpcode(), Scale
, DummyWidth
, MinOffset
, MaxOffset
);
5034 Offset
+= 16; // Update the offset to what it would be if we outlined.
5035 if (Offset
< MinOffset
* Scale
|| Offset
> MaxOffset
* Scale
)
5038 // It's in range, so we can outline it.
5042 // FIXME: Add handling for instructions like "add x0, sp, #8".
5044 // We can't fix it up, so don't outline it.
5048 // True if it's possible to fix up each stack instruction in this sequence.
5049 // Important for frames/call variants that modify the stack.
5050 bool AllStackInstrsSafe
= std::all_of(
5051 FirstCand
.front(), std::next(FirstCand
.back()), IsSafeToFixup
);
5053 // If the last instruction in any candidate is a terminator, then we should
5054 // tail call all of the candidates.
5055 if (RepeatedSequenceLocs
[0].back()->isTerminator()) {
5056 FrameID
= MachineOutlinerTailCall
;
5057 NumBytesToCreateFrame
= 0;
5058 SetCandidateCallInfo(MachineOutlinerTailCall
, 4);
5061 else if (LastInstrOpcode
== AArch64::BL
||
5062 (LastInstrOpcode
== AArch64::BLR
&& !HasBTI
)) {
5063 // FIXME: Do we need to check if the code after this uses the value of LR?
5064 FrameID
= MachineOutlinerThunk
;
5065 NumBytesToCreateFrame
= 0;
5066 SetCandidateCallInfo(MachineOutlinerThunk
, 4);
5070 // We need to decide how to emit calls + frames. We can always emit the same
5071 // frame if we don't need to save to the stack. If we have to save to the
5072 // stack, then we need a different frame.
5073 unsigned NumBytesNoStackCalls
= 0;
5074 std::vector
<outliner::Candidate
> CandidatesWithoutStackFixups
;
5076 for (outliner::Candidate
&C
: RepeatedSequenceLocs
) {
5079 // Is LR available? If so, we don't need a save.
5080 if (C
.LRU
.available(AArch64::LR
)) {
5081 NumBytesNoStackCalls
+= 4;
5082 C
.setCallInfo(MachineOutlinerNoLRSave
, 4);
5083 CandidatesWithoutStackFixups
.push_back(C
);
5086 // Is an unused register available? If so, we won't modify the stack, so
5087 // we can outline with the same frame type as those that don't save LR.
5088 else if (findRegisterToSaveLRTo(C
)) {
5089 NumBytesNoStackCalls
+= 12;
5090 C
.setCallInfo(MachineOutlinerRegSave
, 12);
5091 CandidatesWithoutStackFixups
.push_back(C
);
5094 // Is SP used in the sequence at all? If not, we don't have to modify
5095 // the stack, so we are guaranteed to get the same frame.
5096 else if (C
.UsedInSequence
.available(AArch64::SP
)) {
5097 NumBytesNoStackCalls
+= 12;
5098 C
.setCallInfo(MachineOutlinerDefault
, 12);
5099 CandidatesWithoutStackFixups
.push_back(C
);
5102 // If we outline this, we need to modify the stack. Pretend we don't
5103 // outline this by saving all of its bytes.
5105 NumBytesNoStackCalls
+= SequenceSize
;
5109 // If there are no places where we have to save LR, then note that we
5110 // don't have to update the stack. Otherwise, give every candidate the
5111 // default call type, as long as it's safe to do so.
5112 if (!AllStackInstrsSafe
||
5113 NumBytesNoStackCalls
<= RepeatedSequenceLocs
.size() * 12) {
5114 RepeatedSequenceLocs
= CandidatesWithoutStackFixups
;
5115 FrameID
= MachineOutlinerNoLRSave
;
5117 SetCandidateCallInfo(MachineOutlinerDefault
, 12);
5120 // If we dropped all of the candidates, bail out here.
5121 if (RepeatedSequenceLocs
.size() < 2) {
5122 RepeatedSequenceLocs
.clear();
5123 return outliner::OutlinedFunction();
5127 // Does every candidate's MBB contain a call? If so, then we might have a call
5129 if (FlagsSetInAll
& MachineOutlinerMBBFlags::HasCalls
) {
5130 // Check if the range contains a call. These require a save + restore of the
5132 bool ModStackToSaveLR
= false;
5133 if (std::any_of(FirstCand
.front(), FirstCand
.back(),
5134 [](const MachineInstr
&MI
) { return MI
.isCall(); }))
5135 ModStackToSaveLR
= true;
5137 // Handle the last instruction separately. If this is a tail call, then the
5138 // last instruction is a call. We don't want to save + restore in this case.
5139 // However, it could be possible that the last instruction is a call without
5140 // it being valid to tail call this sequence. We should consider this as
5142 else if (FrameID
!= MachineOutlinerThunk
&&
5143 FrameID
!= MachineOutlinerTailCall
&& FirstCand
.back()->isCall())
5144 ModStackToSaveLR
= true;
5146 if (ModStackToSaveLR
) {
5147 // We can't fix up the stack. Bail out.
5148 if (!AllStackInstrsSafe
) {
5149 RepeatedSequenceLocs
.clear();
5150 return outliner::OutlinedFunction();
5153 // Save + restore LR.
5154 NumBytesToCreateFrame
+= 8;
5158 return outliner::OutlinedFunction(RepeatedSequenceLocs
, SequenceSize
,
5159 NumBytesToCreateFrame
, FrameID
);
5162 bool AArch64InstrInfo::isFunctionSafeToOutlineFrom(
5163 MachineFunction
&MF
, bool OutlineFromLinkOnceODRs
) const {
5164 const Function
&F
= MF
.getFunction();
5166 // Can F be deduplicated by the linker? If it can, don't outline from it.
5167 if (!OutlineFromLinkOnceODRs
&& F
.hasLinkOnceODRLinkage())
5170 // Don't outline from functions with section markings; the program could
5171 // expect that all the code is in the named section.
5172 // FIXME: Allow outlining from multiple functions with the same section
5177 // Outlining from functions with redzones is unsafe since the outliner may
5178 // modify the stack. Check if hasRedZone is true or unknown; if yes, don't
5180 AArch64FunctionInfo
*AFI
= MF
.getInfo
<AArch64FunctionInfo
>();
5181 if (!AFI
|| AFI
->hasRedZone().getValueOr(true))
5184 // It's safe to outline from MF.
5188 bool AArch64InstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock
&MBB
,
5189 unsigned &Flags
) const {
5190 // Check if LR is available through all of the MBB. If it's not, then set
5192 assert(MBB
.getParent()->getRegInfo().tracksLiveness() &&
5193 "Suitable Machine Function for outlining must track liveness");
5194 LiveRegUnits
LRU(getRegisterInfo());
5196 std::for_each(MBB
.rbegin(), MBB
.rend(),
5197 [&LRU
](MachineInstr
&MI
) { LRU
.accumulate(MI
); });
5199 // Check if each of the unsafe registers are available...
5200 bool W16AvailableInBlock
= LRU
.available(AArch64::W16
);
5201 bool W17AvailableInBlock
= LRU
.available(AArch64::W17
);
5202 bool NZCVAvailableInBlock
= LRU
.available(AArch64::NZCV
);
5204 // If all of these are dead (and not live out), we know we don't have to check
5206 if (W16AvailableInBlock
&& W17AvailableInBlock
&& NZCVAvailableInBlock
)
5207 Flags
|= MachineOutlinerMBBFlags::UnsafeRegsDead
;
5209 // Now, add the live outs to the set.
5210 LRU
.addLiveOuts(MBB
);
5212 // If any of these registers is available in the MBB, but also a live out of
5213 // the block, then we know outlining is unsafe.
5214 if (W16AvailableInBlock
&& !LRU
.available(AArch64::W16
))
5216 if (W17AvailableInBlock
&& !LRU
.available(AArch64::W17
))
5218 if (NZCVAvailableInBlock
&& !LRU
.available(AArch64::NZCV
))
5221 // Check if there's a call inside this MachineBasicBlock. If there is, then
5223 if (any_of(MBB
, [](MachineInstr
&MI
) { return MI
.isCall(); }))
5224 Flags
|= MachineOutlinerMBBFlags::HasCalls
;
5226 MachineFunction
*MF
= MBB
.getParent();
5228 // In the event that we outline, we may have to save LR. If there is an
5229 // available register in the MBB, then we'll always save LR there. Check if
5231 bool CanSaveLR
= false;
5232 const AArch64RegisterInfo
*ARI
= static_cast<const AArch64RegisterInfo
*>(
5233 MF
->getSubtarget().getRegisterInfo());
5235 // Check if there is an available register across the sequence that we can
5237 for (unsigned Reg
: AArch64::GPR64RegClass
) {
5238 if (!ARI
->isReservedReg(*MF
, Reg
) && Reg
!= AArch64::LR
&&
5239 Reg
!= AArch64::X16
&& Reg
!= AArch64::X17
&& LRU
.available(Reg
)) {
5245 // Check if we have a register we can save LR to, and if LR was used
5246 // somewhere. If both of those things are true, then we need to evaluate the
5247 // safety of outlining stack instructions later.
5248 if (!CanSaveLR
&& !LRU
.available(AArch64::LR
))
5249 Flags
|= MachineOutlinerMBBFlags::LRUnavailableSomewhere
;
5255 AArch64InstrInfo::getOutliningType(MachineBasicBlock::iterator
&MIT
,
5256 unsigned Flags
) const {
5257 MachineInstr
&MI
= *MIT
;
5258 MachineBasicBlock
*MBB
= MI
.getParent();
5259 MachineFunction
*MF
= MBB
->getParent();
5260 AArch64FunctionInfo
*FuncInfo
= MF
->getInfo
<AArch64FunctionInfo
>();
5262 // Don't outline LOHs.
5263 if (FuncInfo
->getLOHRelated().count(&MI
))
5264 return outliner::InstrType::Illegal
;
5266 // Don't allow debug values to impact outlining type.
5267 if (MI
.isDebugInstr() || MI
.isIndirectDebugValue())
5268 return outliner::InstrType::Invisible
;
5270 // At this point, KILL instructions don't really tell us much so we can go
5271 // ahead and skip over them.
5273 return outliner::InstrType::Invisible
;
5275 // Is this a terminator for a basic block?
5276 if (MI
.isTerminator()) {
5278 // Is this the end of a function?
5279 if (MI
.getParent()->succ_empty())
5280 return outliner::InstrType::Legal
;
5282 // It's not, so don't outline it.
5283 return outliner::InstrType::Illegal
;
5286 // Make sure none of the operands are un-outlinable.
5287 for (const MachineOperand
&MOP
: MI
.operands()) {
5288 if (MOP
.isCPI() || MOP
.isJTI() || MOP
.isCFIIndex() || MOP
.isFI() ||
5289 MOP
.isTargetIndex())
5290 return outliner::InstrType::Illegal
;
5292 // If it uses LR or W30 explicitly, then don't touch it.
5293 if (MOP
.isReg() && !MOP
.isImplicit() &&
5294 (MOP
.getReg() == AArch64::LR
|| MOP
.getReg() == AArch64::W30
))
5295 return outliner::InstrType::Illegal
;
5298 // Special cases for instructions that can always be outlined, but will fail
5299 // the later tests. e.g, ADRPs, which are PC-relative use LR, but can always
5300 // be outlined because they don't require a *specific* value to be in LR.
5301 if (MI
.getOpcode() == AArch64::ADRP
)
5302 return outliner::InstrType::Legal
;
5304 // If MI is a call we might be able to outline it. We don't want to outline
5305 // any calls that rely on the position of items on the stack. When we outline
5306 // something containing a call, we have to emit a save and restore of LR in
5307 // the outlined function. Currently, this always happens by saving LR to the
5308 // stack. Thus, if we outline, say, half the parameters for a function call
5309 // plus the call, then we'll break the callee's expectations for the layout
5312 // FIXME: Allow calls to functions which construct a stack frame, as long
5313 // as they don't access arguments on the stack.
5314 // FIXME: Figure out some way to analyze functions defined in other modules.
5315 // We should be able to compute the memory usage based on the IR calling
5316 // convention, even if we can't see the definition.
5318 // Get the function associated with the call. Look at each operand and find
5319 // the one that represents the callee and get its name.
5320 const Function
*Callee
= nullptr;
5321 for (const MachineOperand
&MOP
: MI
.operands()) {
5322 if (MOP
.isGlobal()) {
5323 Callee
= dyn_cast
<Function
>(MOP
.getGlobal());
5328 // Never outline calls to mcount. There isn't any rule that would require
5329 // this, but the Linux kernel's "ftrace" feature depends on it.
5330 if (Callee
&& Callee
->getName() == "\01_mcount")
5331 return outliner::InstrType::Illegal
;
5333 // If we don't know anything about the callee, assume it depends on the
5334 // stack layout of the caller. In that case, it's only legal to outline
5335 // as a tail-call. Whitelist the call instructions we know about so we
5336 // don't get unexpected results with call pseudo-instructions.
5337 auto UnknownCallOutlineType
= outliner::InstrType::Illegal
;
5338 if (MI
.getOpcode() == AArch64::BLR
|| MI
.getOpcode() == AArch64::BL
)
5339 UnknownCallOutlineType
= outliner::InstrType::LegalTerminator
;
5342 return UnknownCallOutlineType
;
5344 // We have a function we have information about. Check it if it's something
5345 // can safely outline.
5346 MachineFunction
*CalleeMF
= MF
->getMMI().getMachineFunction(*Callee
);
5348 // We don't know what's going on with the callee at all. Don't touch it.
5350 return UnknownCallOutlineType
;
5352 // Check if we know anything about the callee saves on the function. If we
5353 // don't, then don't touch it, since that implies that we haven't
5354 // computed anything about its stack frame yet.
5355 MachineFrameInfo
&MFI
= CalleeMF
->getFrameInfo();
5356 if (!MFI
.isCalleeSavedInfoValid() || MFI
.getStackSize() > 0 ||
5357 MFI
.getNumObjects() > 0)
5358 return UnknownCallOutlineType
;
5360 // At this point, we can say that CalleeMF ought to not pass anything on the
5361 // stack. Therefore, we can outline it.
5362 return outliner::InstrType::Legal
;
5365 // Don't outline positions.
5366 if (MI
.isPosition())
5367 return outliner::InstrType::Illegal
;
5369 // Don't touch the link register or W30.
5370 if (MI
.readsRegister(AArch64::W30
, &getRegisterInfo()) ||
5371 MI
.modifiesRegister(AArch64::W30
, &getRegisterInfo()))
5372 return outliner::InstrType::Illegal
;
5374 // Don't outline BTI instructions, because that will prevent the outlining
5375 // site from being indirectly callable.
5376 if (MI
.getOpcode() == AArch64::HINT
) {
5377 int64_t Imm
= MI
.getOperand(0).getImm();
5378 if (Imm
== 32 || Imm
== 34 || Imm
== 36 || Imm
== 38)
5379 return outliner::InstrType::Illegal
;
5382 return outliner::InstrType::Legal
;
5385 void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock
&MBB
) const {
5386 for (MachineInstr
&MI
: MBB
) {
5387 MachineOperand
*Base
;
5391 // Is this a load or store with an immediate offset with SP as the base?
5392 if (!MI
.mayLoadOrStore() ||
5393 !getMemOperandWithOffsetWidth(MI
, Base
, Offset
, Width
, &RI
) ||
5394 (Base
->isReg() && Base
->getReg() != AArch64::SP
))
5397 // It is, so we have to fix it up.
5399 int64_t Dummy1
, Dummy2
;
5401 MachineOperand
&StackOffsetOperand
= getMemOpBaseRegImmOfsOffsetOperand(MI
);
5402 assert(StackOffsetOperand
.isImm() && "Stack offset wasn't immediate!");
5403 getMemOpInfo(MI
.getOpcode(), Scale
, Width
, Dummy1
, Dummy2
);
5404 assert(Scale
!= 0 && "Unexpected opcode!");
5406 // We've pushed the return address to the stack, so add 16 to the offset.
5407 // This is safe, since we already checked if it would overflow when we
5408 // checked if this instruction was legal to outline.
5409 int64_t NewImm
= (Offset
+ 16) / Scale
;
5410 StackOffsetOperand
.setImm(NewImm
);
5414 void AArch64InstrInfo::buildOutlinedFrame(
5415 MachineBasicBlock
&MBB
, MachineFunction
&MF
,
5416 const outliner::OutlinedFunction
&OF
) const {
5417 // For thunk outlining, rewrite the last instruction from a call to a
5419 if (OF
.FrameConstructionID
== MachineOutlinerThunk
) {
5420 MachineInstr
*Call
= &*--MBB
.instr_end();
5421 unsigned TailOpcode
;
5422 if (Call
->getOpcode() == AArch64::BL
) {
5423 TailOpcode
= AArch64::TCRETURNdi
;
5425 assert(Call
->getOpcode() == AArch64::BLR
);
5426 TailOpcode
= AArch64::TCRETURNriALL
;
5428 MachineInstr
*TC
= BuildMI(MF
, DebugLoc(), get(TailOpcode
))
5429 .add(Call
->getOperand(0))
5431 MBB
.insert(MBB
.end(), TC
);
5432 Call
->eraseFromParent();
5435 // Is there a call in the outlined range?
5436 auto IsNonTailCall
= [](MachineInstr
&MI
) {
5437 return MI
.isCall() && !MI
.isReturn();
5439 if (std::any_of(MBB
.instr_begin(), MBB
.instr_end(), IsNonTailCall
)) {
5440 // Fix up the instructions in the range, since we're going to modify the
5442 assert(OF
.FrameConstructionID
!= MachineOutlinerDefault
&&
5443 "Can only fix up stack references once");
5444 fixupPostOutline(MBB
);
5446 // LR has to be a live in so that we can save it.
5447 MBB
.addLiveIn(AArch64::LR
);
5449 MachineBasicBlock::iterator It
= MBB
.begin();
5450 MachineBasicBlock::iterator Et
= MBB
.end();
5452 if (OF
.FrameConstructionID
== MachineOutlinerTailCall
||
5453 OF
.FrameConstructionID
== MachineOutlinerThunk
)
5454 Et
= std::prev(MBB
.end());
5456 // Insert a save before the outlined region
5457 MachineInstr
*STRXpre
= BuildMI(MF
, DebugLoc(), get(AArch64::STRXpre
))
5458 .addReg(AArch64::SP
, RegState::Define
)
5459 .addReg(AArch64::LR
)
5460 .addReg(AArch64::SP
)
5462 It
= MBB
.insert(It
, STRXpre
);
5464 const TargetSubtargetInfo
&STI
= MF
.getSubtarget();
5465 const MCRegisterInfo
*MRI
= STI
.getRegisterInfo();
5466 unsigned DwarfReg
= MRI
->getDwarfRegNum(AArch64::LR
, true);
5468 // Add a CFI saying the stack was moved 16 B down.
5469 int64_t StackPosEntry
=
5470 MF
.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, 16));
5471 BuildMI(MBB
, It
, DebugLoc(), get(AArch64::CFI_INSTRUCTION
))
5472 .addCFIIndex(StackPosEntry
)
5473 .setMIFlags(MachineInstr::FrameSetup
);
5475 // Add a CFI saying that the LR that we want to find is now 16 B higher than
5477 int64_t LRPosEntry
=
5478 MF
.addFrameInst(MCCFIInstruction::createOffset(nullptr, DwarfReg
, 16));
5479 BuildMI(MBB
, It
, DebugLoc(), get(AArch64::CFI_INSTRUCTION
))
5480 .addCFIIndex(LRPosEntry
)
5481 .setMIFlags(MachineInstr::FrameSetup
);
5483 // Insert a restore before the terminator for the function.
5484 MachineInstr
*LDRXpost
= BuildMI(MF
, DebugLoc(), get(AArch64::LDRXpost
))
5485 .addReg(AArch64::SP
, RegState::Define
)
5486 .addReg(AArch64::LR
, RegState::Define
)
5487 .addReg(AArch64::SP
)
5489 Et
= MBB
.insert(Et
, LDRXpost
);
5492 // If this is a tail call outlined function, then there's already a return.
5493 if (OF
.FrameConstructionID
== MachineOutlinerTailCall
||
5494 OF
.FrameConstructionID
== MachineOutlinerThunk
)
5497 // It's not a tail call, so we have to insert the return ourselves.
5498 MachineInstr
*ret
= BuildMI(MF
, DebugLoc(), get(AArch64::RET
))
5499 .addReg(AArch64::LR
, RegState::Undef
);
5500 MBB
.insert(MBB
.end(), ret
);
5502 // Did we have to modify the stack by saving the link register?
5503 if (OF
.FrameConstructionID
!= MachineOutlinerDefault
)
5506 // We modified the stack.
5507 // Walk over the basic block and fix up all the stack accesses.
5508 fixupPostOutline(MBB
);
5511 MachineBasicBlock::iterator
AArch64InstrInfo::insertOutlinedCall(
5512 Module
&M
, MachineBasicBlock
&MBB
, MachineBasicBlock::iterator
&It
,
5513 MachineFunction
&MF
, const outliner::Candidate
&C
) const {
5515 // Are we tail calling?
5516 if (C
.CallConstructionID
== MachineOutlinerTailCall
) {
5517 // If yes, then we can just branch to the label.
5518 It
= MBB
.insert(It
, BuildMI(MF
, DebugLoc(), get(AArch64::TCRETURNdi
))
5519 .addGlobalAddress(M
.getNamedValue(MF
.getName()))
5524 // Are we saving the link register?
5525 if (C
.CallConstructionID
== MachineOutlinerNoLRSave
||
5526 C
.CallConstructionID
== MachineOutlinerThunk
) {
5527 // No, so just insert the call.
5528 It
= MBB
.insert(It
, BuildMI(MF
, DebugLoc(), get(AArch64::BL
))
5529 .addGlobalAddress(M
.getNamedValue(MF
.getName())));
5533 // We want to return the spot where we inserted the call.
5534 MachineBasicBlock::iterator CallPt
;
5536 // Instructions for saving and restoring LR around the call instruction we're
5539 MachineInstr
*Restore
;
5540 // Can we save to a register?
5541 if (C
.CallConstructionID
== MachineOutlinerRegSave
) {
5542 // FIXME: This logic should be sunk into a target-specific interface so that
5543 // we don't have to recompute the register.
5544 unsigned Reg
= findRegisterToSaveLRTo(C
);
5545 assert(Reg
!= 0 && "No callee-saved register available?");
5547 // Save and restore LR from that register.
5548 Save
= BuildMI(MF
, DebugLoc(), get(AArch64::ORRXrs
), Reg
)
5549 .addReg(AArch64::XZR
)
5550 .addReg(AArch64::LR
)
5552 Restore
= BuildMI(MF
, DebugLoc(), get(AArch64::ORRXrs
), AArch64::LR
)
5553 .addReg(AArch64::XZR
)
5557 // We have the default case. Save and restore from SP.
5558 Save
= BuildMI(MF
, DebugLoc(), get(AArch64::STRXpre
))
5559 .addReg(AArch64::SP
, RegState::Define
)
5560 .addReg(AArch64::LR
)
5561 .addReg(AArch64::SP
)
5563 Restore
= BuildMI(MF
, DebugLoc(), get(AArch64::LDRXpost
))
5564 .addReg(AArch64::SP
, RegState::Define
)
5565 .addReg(AArch64::LR
, RegState::Define
)
5566 .addReg(AArch64::SP
)
5570 It
= MBB
.insert(It
, Save
);
5574 It
= MBB
.insert(It
, BuildMI(MF
, DebugLoc(), get(AArch64::BL
))
5575 .addGlobalAddress(M
.getNamedValue(MF
.getName())));
5579 It
= MBB
.insert(It
, Restore
);
5583 bool AArch64InstrInfo::shouldOutlineFromFunctionByDefault(
5584 MachineFunction
&MF
) const {
5585 return MF
.getFunction().optForMinSize();
5588 #define GET_INSTRINFO_HELPERS
5589 #include "AArch64GenInstrInfo.inc"