1 //===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains the AArch64 implementation of the TargetInstrInfo class.
12 //===----------------------------------------------------------------------===//
14 #include "AArch64InstrInfo.h"
15 #include "AArch64MachineFunctionInfo.h"
16 #include "AArch64Subtarget.h"
17 #include "MCTargetDesc/AArch64AddressingModes.h"
18 #include "Utils/AArch64BaseInfo.h"
19 #include "llvm/ADT/ArrayRef.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/SmallVector.h"
22 #include "llvm/CodeGen/MachineBasicBlock.h"
23 #include "llvm/CodeGen/MachineFrameInfo.h"
24 #include "llvm/CodeGen/MachineFunction.h"
25 #include "llvm/CodeGen/MachineInstr.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineMemOperand.h"
28 #include "llvm/CodeGen/MachineOperand.h"
29 #include "llvm/CodeGen/MachineRegisterInfo.h"
30 #include "llvm/CodeGen/MachineModuleInfo.h"
31 #include "llvm/CodeGen/StackMaps.h"
32 #include "llvm/CodeGen/TargetRegisterInfo.h"
33 #include "llvm/CodeGen/TargetSubtargetInfo.h"
34 #include "llvm/IR/DebugLoc.h"
35 #include "llvm/IR/GlobalValue.h"
36 #include "llvm/MC/MCInst.h"
37 #include "llvm/MC/MCInstrDesc.h"
38 #include "llvm/Support/Casting.h"
39 #include "llvm/Support/CodeGen.h"
40 #include "llvm/Support/CommandLine.h"
41 #include "llvm/Support/Compiler.h"
42 #include "llvm/Support/ErrorHandling.h"
43 #include "llvm/Support/MathExtras.h"
44 #include "llvm/Target/TargetMachine.h"
45 #include "llvm/Target/TargetOptions.h"
53 #define GET_INSTRINFO_CTOR_DTOR
54 #include "AArch64GenInstrInfo.inc"
56 static cl::opt
<unsigned> TBZDisplacementBits(
57 "aarch64-tbz-offset-bits", cl::Hidden
, cl::init(14),
58 cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"));
60 static cl::opt
<unsigned> CBZDisplacementBits(
61 "aarch64-cbz-offset-bits", cl::Hidden
, cl::init(19),
62 cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"));
64 static cl::opt
<unsigned>
65 BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden
, cl::init(19),
66 cl::desc("Restrict range of Bcc instructions (DEBUG)"));
68 AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget
&STI
)
69 : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN
, AArch64::ADJCALLSTACKUP
),
70 RI(STI
.getTargetTriple()), Subtarget(STI
) {}
72 /// GetInstSize - Return the number of bytes of code the specified
73 /// instruction may be. This returns the maximum number of bytes.
74 unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr
&MI
) const {
75 const MachineBasicBlock
&MBB
= *MI
.getParent();
76 const MachineFunction
*MF
= MBB
.getParent();
77 const MCAsmInfo
*MAI
= MF
->getTarget().getMCAsmInfo();
79 if (MI
.getOpcode() == AArch64::INLINEASM
)
80 return getInlineAsmLength(MI
.getOperand(0).getSymbolName(), *MAI
);
82 // FIXME: We currently only handle pseudoinstructions that don't get expanded
83 // before the assembly printer.
84 unsigned NumBytes
= 0;
85 const MCInstrDesc
&Desc
= MI
.getDesc();
86 switch (Desc
.getOpcode()) {
88 // Anything not explicitly designated otherwise is a normal 4-byte insn.
91 case TargetOpcode::DBG_VALUE
:
92 case TargetOpcode::EH_LABEL
:
93 case TargetOpcode::IMPLICIT_DEF
:
94 case TargetOpcode::KILL
:
97 case TargetOpcode::STACKMAP
:
98 // The upper bound for a stackmap intrinsic is the full length of its shadow
99 NumBytes
= StackMapOpers(&MI
).getNumPatchBytes();
100 assert(NumBytes
% 4 == 0 && "Invalid number of NOP bytes requested!");
102 case TargetOpcode::PATCHPOINT
:
103 // The size of the patchpoint intrinsic is the number of bytes requested
104 NumBytes
= PatchPointOpers(&MI
).getNumPatchBytes();
105 assert(NumBytes
% 4 == 0 && "Invalid number of NOP bytes requested!");
107 case AArch64::TLSDESC_CALLSEQ
:
108 // This gets lowered to an instruction sequence which takes 16 bytes
116 static void parseCondBranch(MachineInstr
*LastInst
, MachineBasicBlock
*&Target
,
117 SmallVectorImpl
<MachineOperand
> &Cond
) {
118 // Block ends with fall-through condbranch.
119 switch (LastInst
->getOpcode()) {
121 llvm_unreachable("Unknown branch instruction?");
123 Target
= LastInst
->getOperand(1).getMBB();
124 Cond
.push_back(LastInst
->getOperand(0));
130 Target
= LastInst
->getOperand(1).getMBB();
131 Cond
.push_back(MachineOperand::CreateImm(-1));
132 Cond
.push_back(MachineOperand::CreateImm(LastInst
->getOpcode()));
133 Cond
.push_back(LastInst
->getOperand(0));
139 Target
= LastInst
->getOperand(2).getMBB();
140 Cond
.push_back(MachineOperand::CreateImm(-1));
141 Cond
.push_back(MachineOperand::CreateImm(LastInst
->getOpcode()));
142 Cond
.push_back(LastInst
->getOperand(0));
143 Cond
.push_back(LastInst
->getOperand(1));
147 static unsigned getBranchDisplacementBits(unsigned Opc
) {
150 llvm_unreachable("unexpected opcode!");
157 return TBZDisplacementBits
;
162 return CBZDisplacementBits
;
164 return BCCDisplacementBits
;
168 bool AArch64InstrInfo::isBranchOffsetInRange(unsigned BranchOp
,
169 int64_t BrOffset
) const {
170 unsigned Bits
= getBranchDisplacementBits(BranchOp
);
171 assert(Bits
>= 3 && "max branch displacement must be enough to jump"
172 "over conditional branch expansion");
173 return isIntN(Bits
, BrOffset
/ 4);
177 AArch64InstrInfo::getBranchDestBlock(const MachineInstr
&MI
) const {
178 switch (MI
.getOpcode()) {
180 llvm_unreachable("unexpected opcode!");
182 return MI
.getOperand(0).getMBB();
187 return MI
.getOperand(2).getMBB();
193 return MI
.getOperand(1).getMBB();
198 bool AArch64InstrInfo::analyzeBranch(MachineBasicBlock
&MBB
,
199 MachineBasicBlock
*&TBB
,
200 MachineBasicBlock
*&FBB
,
201 SmallVectorImpl
<MachineOperand
> &Cond
,
202 bool AllowModify
) const {
203 // If the block has no terminators, it just falls into the block after it.
204 MachineBasicBlock::iterator I
= MBB
.getLastNonDebugInstr();
208 if (!isUnpredicatedTerminator(*I
))
211 // Get the last instruction in the block.
212 MachineInstr
*LastInst
= &*I
;
214 // If there is only one terminator instruction, process it.
215 unsigned LastOpc
= LastInst
->getOpcode();
216 if (I
== MBB
.begin() || !isUnpredicatedTerminator(*--I
)) {
217 if (isUncondBranchOpcode(LastOpc
)) {
218 TBB
= LastInst
->getOperand(0).getMBB();
221 if (isCondBranchOpcode(LastOpc
)) {
222 // Block ends with fall-through condbranch.
223 parseCondBranch(LastInst
, TBB
, Cond
);
226 return true; // Can't handle indirect branch.
229 // Get the instruction before it if it is a terminator.
230 MachineInstr
*SecondLastInst
= &*I
;
231 unsigned SecondLastOpc
= SecondLastInst
->getOpcode();
233 // If AllowModify is true and the block ends with two or more unconditional
234 // branches, delete all but the first unconditional branch.
235 if (AllowModify
&& isUncondBranchOpcode(LastOpc
)) {
236 while (isUncondBranchOpcode(SecondLastOpc
)) {
237 LastInst
->eraseFromParent();
238 LastInst
= SecondLastInst
;
239 LastOpc
= LastInst
->getOpcode();
240 if (I
== MBB
.begin() || !isUnpredicatedTerminator(*--I
)) {
241 // Return now the only terminator is an unconditional branch.
242 TBB
= LastInst
->getOperand(0).getMBB();
245 SecondLastInst
= &*I
;
246 SecondLastOpc
= SecondLastInst
->getOpcode();
251 // If there are three terminators, we don't know what sort of block this is.
252 if (SecondLastInst
&& I
!= MBB
.begin() && isUnpredicatedTerminator(*--I
))
255 // If the block ends with a B and a Bcc, handle it.
256 if (isCondBranchOpcode(SecondLastOpc
) && isUncondBranchOpcode(LastOpc
)) {
257 parseCondBranch(SecondLastInst
, TBB
, Cond
);
258 FBB
= LastInst
->getOperand(0).getMBB();
262 // If the block ends with two unconditional branches, handle it. The second
263 // one is not executed, so remove it.
264 if (isUncondBranchOpcode(SecondLastOpc
) && isUncondBranchOpcode(LastOpc
)) {
265 TBB
= SecondLastInst
->getOperand(0).getMBB();
268 I
->eraseFromParent();
272 // ...likewise if it ends with an indirect branch followed by an unconditional
274 if (isIndirectBranchOpcode(SecondLastOpc
) && isUncondBranchOpcode(LastOpc
)) {
277 I
->eraseFromParent();
281 // Otherwise, can't handle this.
285 bool AArch64InstrInfo::reverseBranchCondition(
286 SmallVectorImpl
<MachineOperand
> &Cond
) const {
287 if (Cond
[0].getImm() != -1) {
289 AArch64CC::CondCode CC
= (AArch64CC::CondCode
)(int)Cond
[0].getImm();
290 Cond
[0].setImm(AArch64CC::getInvertedCondCode(CC
));
292 // Folded compare-and-branch
293 switch (Cond
[1].getImm()) {
295 llvm_unreachable("Unknown conditional branch!");
297 Cond
[1].setImm(AArch64::CBNZW
);
300 Cond
[1].setImm(AArch64::CBZW
);
303 Cond
[1].setImm(AArch64::CBNZX
);
306 Cond
[1].setImm(AArch64::CBZX
);
309 Cond
[1].setImm(AArch64::TBNZW
);
312 Cond
[1].setImm(AArch64::TBZW
);
315 Cond
[1].setImm(AArch64::TBNZX
);
318 Cond
[1].setImm(AArch64::TBZX
);
326 unsigned AArch64InstrInfo::removeBranch(MachineBasicBlock
&MBB
,
327 int *BytesRemoved
) const {
328 MachineBasicBlock::iterator I
= MBB
.getLastNonDebugInstr();
332 if (!isUncondBranchOpcode(I
->getOpcode()) &&
333 !isCondBranchOpcode(I
->getOpcode()))
336 // Remove the branch.
337 I
->eraseFromParent();
341 if (I
== MBB
.begin()) {
347 if (!isCondBranchOpcode(I
->getOpcode())) {
353 // Remove the branch.
354 I
->eraseFromParent();
361 void AArch64InstrInfo::instantiateCondBranch(
362 MachineBasicBlock
&MBB
, const DebugLoc
&DL
, MachineBasicBlock
*TBB
,
363 ArrayRef
<MachineOperand
> Cond
) const {
364 if (Cond
[0].getImm() != -1) {
366 BuildMI(&MBB
, DL
, get(AArch64::Bcc
)).addImm(Cond
[0].getImm()).addMBB(TBB
);
368 // Folded compare-and-branch
369 // Note that we use addOperand instead of addReg to keep the flags.
370 const MachineInstrBuilder MIB
=
371 BuildMI(&MBB
, DL
, get(Cond
[1].getImm())).add(Cond
[2]);
373 MIB
.addImm(Cond
[3].getImm());
378 unsigned AArch64InstrInfo::insertBranch(
379 MachineBasicBlock
&MBB
, MachineBasicBlock
*TBB
, MachineBasicBlock
*FBB
,
380 ArrayRef
<MachineOperand
> Cond
, const DebugLoc
&DL
, int *BytesAdded
) const {
381 // Shouldn't be a fall through.
382 assert(TBB
&& "insertBranch must not be told to insert a fallthrough");
385 if (Cond
.empty()) // Unconditional branch?
386 BuildMI(&MBB
, DL
, get(AArch64::B
)).addMBB(TBB
);
388 instantiateCondBranch(MBB
, DL
, TBB
, Cond
);
396 // Two-way conditional branch.
397 instantiateCondBranch(MBB
, DL
, TBB
, Cond
);
398 BuildMI(&MBB
, DL
, get(AArch64::B
)).addMBB(FBB
);
406 // Find the original register that VReg is copied from.
407 static unsigned removeCopies(const MachineRegisterInfo
&MRI
, unsigned VReg
) {
408 while (TargetRegisterInfo::isVirtualRegister(VReg
)) {
409 const MachineInstr
*DefMI
= MRI
.getVRegDef(VReg
);
410 if (!DefMI
->isFullCopy())
412 VReg
= DefMI
->getOperand(1).getReg();
417 // Determine if VReg is defined by an instruction that can be folded into a
418 // csel instruction. If so, return the folded opcode, and the replacement
420 static unsigned canFoldIntoCSel(const MachineRegisterInfo
&MRI
, unsigned VReg
,
421 unsigned *NewVReg
= nullptr) {
422 VReg
= removeCopies(MRI
, VReg
);
423 if (!TargetRegisterInfo::isVirtualRegister(VReg
))
426 bool Is64Bit
= AArch64::GPR64allRegClass
.hasSubClassEq(MRI
.getRegClass(VReg
));
427 const MachineInstr
*DefMI
= MRI
.getVRegDef(VReg
);
429 unsigned SrcOpNum
= 0;
430 switch (DefMI
->getOpcode()) {
431 case AArch64::ADDSXri
:
432 case AArch64::ADDSWri
:
433 // if NZCV is used, do not fold.
434 if (DefMI
->findRegisterDefOperandIdx(AArch64::NZCV
, true) == -1)
436 // fall-through to ADDXri and ADDWri.
438 case AArch64::ADDXri
:
439 case AArch64::ADDWri
:
440 // add x, 1 -> csinc.
441 if (!DefMI
->getOperand(2).isImm() || DefMI
->getOperand(2).getImm() != 1 ||
442 DefMI
->getOperand(3).getImm() != 0)
445 Opc
= Is64Bit
? AArch64::CSINCXr
: AArch64::CSINCWr
;
448 case AArch64::ORNXrr
:
449 case AArch64::ORNWrr
: {
450 // not x -> csinv, represented as orn dst, xzr, src.
451 unsigned ZReg
= removeCopies(MRI
, DefMI
->getOperand(1).getReg());
452 if (ZReg
!= AArch64::XZR
&& ZReg
!= AArch64::WZR
)
455 Opc
= Is64Bit
? AArch64::CSINVXr
: AArch64::CSINVWr
;
459 case AArch64::SUBSXrr
:
460 case AArch64::SUBSWrr
:
461 // if NZCV is used, do not fold.
462 if (DefMI
->findRegisterDefOperandIdx(AArch64::NZCV
, true) == -1)
464 // fall-through to SUBXrr and SUBWrr.
466 case AArch64::SUBXrr
:
467 case AArch64::SUBWrr
: {
468 // neg x -> csneg, represented as sub dst, xzr, src.
469 unsigned ZReg
= removeCopies(MRI
, DefMI
->getOperand(1).getReg());
470 if (ZReg
!= AArch64::XZR
&& ZReg
!= AArch64::WZR
)
473 Opc
= Is64Bit
? AArch64::CSNEGXr
: AArch64::CSNEGWr
;
479 assert(Opc
&& SrcOpNum
&& "Missing parameters");
482 *NewVReg
= DefMI
->getOperand(SrcOpNum
).getReg();
486 bool AArch64InstrInfo::canInsertSelect(const MachineBasicBlock
&MBB
,
487 ArrayRef
<MachineOperand
> Cond
,
488 unsigned TrueReg
, unsigned FalseReg
,
489 int &CondCycles
, int &TrueCycles
,
490 int &FalseCycles
) const {
491 // Check register classes.
492 const MachineRegisterInfo
&MRI
= MBB
.getParent()->getRegInfo();
493 const TargetRegisterClass
*RC
=
494 RI
.getCommonSubClass(MRI
.getRegClass(TrueReg
), MRI
.getRegClass(FalseReg
));
498 // Expanding cbz/tbz requires an extra cycle of latency on the condition.
499 unsigned ExtraCondLat
= Cond
.size() != 1;
501 // GPRs are handled by csel.
502 // FIXME: Fold in x+1, -x, and ~x when applicable.
503 if (AArch64::GPR64allRegClass
.hasSubClassEq(RC
) ||
504 AArch64::GPR32allRegClass
.hasSubClassEq(RC
)) {
505 // Single-cycle csel, csinc, csinv, and csneg.
506 CondCycles
= 1 + ExtraCondLat
;
507 TrueCycles
= FalseCycles
= 1;
508 if (canFoldIntoCSel(MRI
, TrueReg
))
510 else if (canFoldIntoCSel(MRI
, FalseReg
))
515 // Scalar floating point is handled by fcsel.
516 // FIXME: Form fabs, fmin, and fmax when applicable.
517 if (AArch64::FPR64RegClass
.hasSubClassEq(RC
) ||
518 AArch64::FPR32RegClass
.hasSubClassEq(RC
)) {
519 CondCycles
= 5 + ExtraCondLat
;
520 TrueCycles
= FalseCycles
= 2;
528 void AArch64InstrInfo::insertSelect(MachineBasicBlock
&MBB
,
529 MachineBasicBlock::iterator I
,
530 const DebugLoc
&DL
, unsigned DstReg
,
531 ArrayRef
<MachineOperand
> Cond
,
532 unsigned TrueReg
, unsigned FalseReg
) const {
533 MachineRegisterInfo
&MRI
= MBB
.getParent()->getRegInfo();
535 // Parse the condition code, see parseCondBranch() above.
536 AArch64CC::CondCode CC
;
537 switch (Cond
.size()) {
539 llvm_unreachable("Unknown condition opcode in Cond");
541 CC
= AArch64CC::CondCode(Cond
[0].getImm());
543 case 3: { // cbz/cbnz
544 // We must insert a compare against 0.
546 switch (Cond
[1].getImm()) {
548 llvm_unreachable("Unknown branch opcode in Cond");
566 unsigned SrcReg
= Cond
[2].getReg();
568 // cmp reg, #0 is actually subs xzr, reg, #0.
569 MRI
.constrainRegClass(SrcReg
, &AArch64::GPR64spRegClass
);
570 BuildMI(MBB
, I
, DL
, get(AArch64::SUBSXri
), AArch64::XZR
)
575 MRI
.constrainRegClass(SrcReg
, &AArch64::GPR32spRegClass
);
576 BuildMI(MBB
, I
, DL
, get(AArch64::SUBSWri
), AArch64::WZR
)
583 case 4: { // tbz/tbnz
584 // We must insert a tst instruction.
585 switch (Cond
[1].getImm()) {
587 llvm_unreachable("Unknown branch opcode in Cond");
597 // cmp reg, #foo is actually ands xzr, reg, #1<<foo.
598 if (Cond
[1].getImm() == AArch64::TBZW
|| Cond
[1].getImm() == AArch64::TBNZW
)
599 BuildMI(MBB
, I
, DL
, get(AArch64::ANDSWri
), AArch64::WZR
)
600 .addReg(Cond
[2].getReg())
602 AArch64_AM::encodeLogicalImmediate(1ull << Cond
[3].getImm(), 32));
604 BuildMI(MBB
, I
, DL
, get(AArch64::ANDSXri
), AArch64::XZR
)
605 .addReg(Cond
[2].getReg())
607 AArch64_AM::encodeLogicalImmediate(1ull << Cond
[3].getImm(), 64));
613 const TargetRegisterClass
*RC
= nullptr;
614 bool TryFold
= false;
615 if (MRI
.constrainRegClass(DstReg
, &AArch64::GPR64RegClass
)) {
616 RC
= &AArch64::GPR64RegClass
;
617 Opc
= AArch64::CSELXr
;
619 } else if (MRI
.constrainRegClass(DstReg
, &AArch64::GPR32RegClass
)) {
620 RC
= &AArch64::GPR32RegClass
;
621 Opc
= AArch64::CSELWr
;
623 } else if (MRI
.constrainRegClass(DstReg
, &AArch64::FPR64RegClass
)) {
624 RC
= &AArch64::FPR64RegClass
;
625 Opc
= AArch64::FCSELDrrr
;
626 } else if (MRI
.constrainRegClass(DstReg
, &AArch64::FPR32RegClass
)) {
627 RC
= &AArch64::FPR32RegClass
;
628 Opc
= AArch64::FCSELSrrr
;
630 assert(RC
&& "Unsupported regclass");
632 // Try folding simple instructions into the csel.
634 unsigned NewVReg
= 0;
635 unsigned FoldedOpc
= canFoldIntoCSel(MRI
, TrueReg
, &NewVReg
);
637 // The folded opcodes csinc, csinc and csneg apply the operation to
638 // FalseReg, so we need to invert the condition.
639 CC
= AArch64CC::getInvertedCondCode(CC
);
642 FoldedOpc
= canFoldIntoCSel(MRI
, FalseReg
, &NewVReg
);
644 // Fold the operation. Leave any dead instructions for DCE to clean up.
648 // The extends the live range of NewVReg.
649 MRI
.clearKillFlags(NewVReg
);
653 // Pull all virtual register into the appropriate class.
654 MRI
.constrainRegClass(TrueReg
, RC
);
655 MRI
.constrainRegClass(FalseReg
, RC
);
658 BuildMI(MBB
, I
, DL
, get(Opc
), DstReg
)
664 /// Returns true if a MOVi32imm or MOVi64imm can be expanded to an ORRxx.
665 static bool canBeExpandedToORR(const MachineInstr
&MI
, unsigned BitSize
) {
666 uint64_t Imm
= MI
.getOperand(1).getImm();
667 uint64_t UImm
= Imm
<< (64 - BitSize
) >> (64 - BitSize
);
669 return AArch64_AM::processLogicalImmediate(UImm
, BitSize
, Encoding
);
672 // FIXME: this implementation should be micro-architecture dependent, so a
673 // micro-architecture target hook should be introduced here in future.
674 bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr
&MI
) const {
675 if (!Subtarget
.hasCustomCheapAsMoveHandling())
676 return MI
.isAsCheapAsAMove();
678 if (Subtarget
.hasExynosCheapAsMoveHandling()) {
679 if (isExynosResetFast(MI
) || isExynosShiftLeftFast(MI
))
682 return MI
.isAsCheapAsAMove();
685 switch (MI
.getOpcode()) {
689 // add/sub on register without shift
690 case AArch64::ADDWri
:
691 case AArch64::ADDXri
:
692 case AArch64::SUBWri
:
693 case AArch64::SUBXri
:
694 return (MI
.getOperand(3).getImm() == 0);
696 // logical ops on immediate
697 case AArch64::ANDWri
:
698 case AArch64::ANDXri
:
699 case AArch64::EORWri
:
700 case AArch64::EORXri
:
701 case AArch64::ORRWri
:
702 case AArch64::ORRXri
:
705 // logical ops on register without shift
706 case AArch64::ANDWrr
:
707 case AArch64::ANDXrr
:
708 case AArch64::BICWrr
:
709 case AArch64::BICXrr
:
710 case AArch64::EONWrr
:
711 case AArch64::EONXrr
:
712 case AArch64::EORWrr
:
713 case AArch64::EORXrr
:
714 case AArch64::ORNWrr
:
715 case AArch64::ORNXrr
:
716 case AArch64::ORRWrr
:
717 case AArch64::ORRXrr
:
720 // If MOVi32imm or MOVi64imm can be expanded into ORRWri or
721 // ORRXri, it is as cheap as MOV
722 case AArch64::MOVi32imm
:
723 return canBeExpandedToORR(MI
, 32);
724 case AArch64::MOVi64imm
:
725 return canBeExpandedToORR(MI
, 64);
727 // It is cheap to zero out registers if the subtarget has ZeroCycleZeroing
729 case AArch64::FMOVH0
:
730 case AArch64::FMOVS0
:
731 case AArch64::FMOVD0
:
732 return Subtarget
.hasZeroCycleZeroing();
733 case TargetOpcode::COPY
:
734 return (Subtarget
.hasZeroCycleZeroing() &&
735 (MI
.getOperand(1).getReg() == AArch64::WZR
||
736 MI
.getOperand(1).getReg() == AArch64::XZR
));
739 llvm_unreachable("Unknown opcode to check as cheap as a move!");
742 bool AArch64InstrInfo::isExynosResetFast(const MachineInstr
&MI
) const {
743 unsigned Reg
, Imm
, Shift
;
745 switch (MI
.getOpcode()) {
750 case AArch64::ADDWri
:
751 case AArch64::ADDXri
:
752 if (!MI
.getOperand(1).isReg() || !MI
.getOperand(2).isImm())
755 Reg
= MI
.getOperand(1).getReg();
756 Imm
= MI
.getOperand(2).getImm();
757 return ((Reg
== AArch64::WSP
|| Reg
== AArch64::SP
) && Imm
== 0);
766 case AArch64::MOVIv8b_ns
:
767 case AArch64::MOVIv2d_ns
:
768 case AArch64::MOVIv16b_ns
:
769 Imm
= MI
.getOperand(1).getImm();
773 case AArch64::MOVIv2i32
:
774 case AArch64::MOVIv4i16
:
775 case AArch64::MOVIv4i32
:
776 case AArch64::MOVIv8i16
:
777 Imm
= MI
.getOperand(1).getImm();
778 Shift
= MI
.getOperand(2).getImm();
779 return (Imm
== 0 && Shift
== 0);
782 case AArch64::MOVNWi
:
783 case AArch64::MOVNXi
:
786 case AArch64::MOVZWi
:
787 case AArch64::MOVZXi
:
791 case AArch64::ORRWri
:
792 case AArch64::ORRXri
:
793 if (!MI
.getOperand(1).isReg())
796 Reg
= MI
.getOperand(1).getReg();
797 Imm
= MI
.getOperand(2).getImm();
798 return ((Reg
== AArch64::WZR
|| Reg
== AArch64::XZR
) && Imm
== 0);
801 case AArch64::ORRWrs
:
802 case AArch64::ORRXrs
:
803 if (!MI
.getOperand(1).isReg())
806 Reg
= MI
.getOperand(1).getReg();
807 Imm
= MI
.getOperand(3).getImm();
808 Shift
= AArch64_AM::getShiftValue(Imm
);
809 return ((Reg
== AArch64::WZR
|| Reg
== AArch64::XZR
) && Shift
== 0);
813 bool AArch64InstrInfo::isExynosShiftLeftFast(const MachineInstr
&MI
) const {
815 AArch64_AM::ShiftExtendType Ext
;
817 switch (MI
.getOpcode()) {
822 case AArch64::ADDSWri
:
823 case AArch64::ADDSXri
:
824 case AArch64::ADDWri
:
825 case AArch64::ADDXri
:
826 case AArch64::SUBSWri
:
827 case AArch64::SUBSXri
:
828 case AArch64::SUBWri
:
829 case AArch64::SUBXri
:
833 case AArch64::ADDSWrs
:
834 case AArch64::ADDSXrs
:
835 case AArch64::ADDWrs
:
836 case AArch64::ADDXrs
:
837 case AArch64::ANDSWrs
:
838 case AArch64::ANDSXrs
:
839 case AArch64::ANDWrs
:
840 case AArch64::ANDXrs
:
841 case AArch64::BICSWrs
:
842 case AArch64::BICSXrs
:
843 case AArch64::BICWrs
:
844 case AArch64::BICXrs
:
845 case AArch64::EONWrs
:
846 case AArch64::EONXrs
:
847 case AArch64::EORWrs
:
848 case AArch64::EORXrs
:
849 case AArch64::ORNWrs
:
850 case AArch64::ORNXrs
:
851 case AArch64::ORRWrs
:
852 case AArch64::ORRXrs
:
853 case AArch64::SUBSWrs
:
854 case AArch64::SUBSXrs
:
855 case AArch64::SUBWrs
:
856 case AArch64::SUBXrs
:
857 Imm
= MI
.getOperand(3).getImm();
858 Shift
= AArch64_AM::getShiftValue(Imm
);
859 Ext
= AArch64_AM::getShiftType(Imm
);
860 return (Shift
== 0 || (Shift
<= 3 && Ext
== AArch64_AM::LSL
));
863 case AArch64::ADDSWrx
:
864 case AArch64::ADDSXrx
:
865 case AArch64::ADDSXrx64
:
866 case AArch64::ADDWrx
:
867 case AArch64::ADDXrx
:
868 case AArch64::ADDXrx64
:
869 case AArch64::SUBSWrx
:
870 case AArch64::SUBSXrx
:
871 case AArch64::SUBSXrx64
:
872 case AArch64::SUBWrx
:
873 case AArch64::SUBXrx
:
874 case AArch64::SUBXrx64
:
875 Imm
= MI
.getOperand(3).getImm();
876 Shift
= AArch64_AM::getArithShiftValue(Imm
);
877 Ext
= AArch64_AM::getArithExtendType(Imm
);
878 return (Shift
== 0 || (Shift
<= 3 && Ext
== AArch64_AM::UXTX
));
880 case AArch64::PRFMroW
:
881 case AArch64::PRFMroX
:
884 case AArch64::LDRBBroW
:
885 case AArch64::LDRBBroX
:
886 case AArch64::LDRHHroW
:
887 case AArch64::LDRHHroX
:
888 case AArch64::LDRSBWroW
:
889 case AArch64::LDRSBWroX
:
890 case AArch64::LDRSBXroW
:
891 case AArch64::LDRSBXroX
:
892 case AArch64::LDRSHWroW
:
893 case AArch64::LDRSHWroX
:
894 case AArch64::LDRSHXroW
:
895 case AArch64::LDRSHXroX
:
896 case AArch64::LDRSWroW
:
897 case AArch64::LDRSWroX
:
898 case AArch64::LDRWroW
:
899 case AArch64::LDRWroX
:
900 case AArch64::LDRXroW
:
901 case AArch64::LDRXroX
:
903 case AArch64::LDRBroW
:
904 case AArch64::LDRBroX
:
905 case AArch64::LDRDroW
:
906 case AArch64::LDRDroX
:
907 case AArch64::LDRHroW
:
908 case AArch64::LDRHroX
:
909 case AArch64::LDRSroW
:
910 case AArch64::LDRSroX
:
913 case AArch64::STRBBroW
:
914 case AArch64::STRBBroX
:
915 case AArch64::STRHHroW
:
916 case AArch64::STRHHroX
:
917 case AArch64::STRWroW
:
918 case AArch64::STRWroX
:
919 case AArch64::STRXroW
:
920 case AArch64::STRXroX
:
922 case AArch64::STRBroW
:
923 case AArch64::STRBroX
:
924 case AArch64::STRDroW
:
925 case AArch64::STRDroX
:
926 case AArch64::STRHroW
:
927 case AArch64::STRHroX
:
928 case AArch64::STRSroW
:
929 case AArch64::STRSroX
:
930 Imm
= MI
.getOperand(3).getImm();
931 Ext
= AArch64_AM::getMemExtendType(Imm
);
932 return (Ext
== AArch64_AM::SXTX
|| Ext
== AArch64_AM::UXTX
);
936 bool AArch64InstrInfo::isFalkorShiftExtFast(const MachineInstr
&MI
) const {
937 switch (MI
.getOpcode()) {
941 case AArch64::ADDWrs
:
942 case AArch64::ADDXrs
:
943 case AArch64::ADDSWrs
:
944 case AArch64::ADDSXrs
: {
945 unsigned Imm
= MI
.getOperand(3).getImm();
946 unsigned ShiftVal
= AArch64_AM::getShiftValue(Imm
);
949 return AArch64_AM::getShiftType(Imm
) == AArch64_AM::LSL
&& ShiftVal
<= 5;
952 case AArch64::ADDWrx
:
953 case AArch64::ADDXrx
:
954 case AArch64::ADDXrx64
:
955 case AArch64::ADDSWrx
:
956 case AArch64::ADDSXrx
:
957 case AArch64::ADDSXrx64
: {
958 unsigned Imm
= MI
.getOperand(3).getImm();
959 switch (AArch64_AM::getArithExtendType(Imm
)) {
962 case AArch64_AM::UXTB
:
963 case AArch64_AM::UXTH
:
964 case AArch64_AM::UXTW
:
965 case AArch64_AM::UXTX
:
966 return AArch64_AM::getArithShiftValue(Imm
) <= 4;
970 case AArch64::SUBWrs
:
971 case AArch64::SUBSWrs
: {
972 unsigned Imm
= MI
.getOperand(3).getImm();
973 unsigned ShiftVal
= AArch64_AM::getShiftValue(Imm
);
974 return ShiftVal
== 0 ||
975 (AArch64_AM::getShiftType(Imm
) == AArch64_AM::ASR
&& ShiftVal
== 31);
978 case AArch64::SUBXrs
:
979 case AArch64::SUBSXrs
: {
980 unsigned Imm
= MI
.getOperand(3).getImm();
981 unsigned ShiftVal
= AArch64_AM::getShiftValue(Imm
);
982 return ShiftVal
== 0 ||
983 (AArch64_AM::getShiftType(Imm
) == AArch64_AM::ASR
&& ShiftVal
== 63);
986 case AArch64::SUBWrx
:
987 case AArch64::SUBXrx
:
988 case AArch64::SUBXrx64
:
989 case AArch64::SUBSWrx
:
990 case AArch64::SUBSXrx
:
991 case AArch64::SUBSXrx64
: {
992 unsigned Imm
= MI
.getOperand(3).getImm();
993 switch (AArch64_AM::getArithExtendType(Imm
)) {
996 case AArch64_AM::UXTB
:
997 case AArch64_AM::UXTH
:
998 case AArch64_AM::UXTW
:
999 case AArch64_AM::UXTX
:
1000 return AArch64_AM::getArithShiftValue(Imm
) == 0;
1004 case AArch64::LDRBBroW
:
1005 case AArch64::LDRBBroX
:
1006 case AArch64::LDRBroW
:
1007 case AArch64::LDRBroX
:
1008 case AArch64::LDRDroW
:
1009 case AArch64::LDRDroX
:
1010 case AArch64::LDRHHroW
:
1011 case AArch64::LDRHHroX
:
1012 case AArch64::LDRHroW
:
1013 case AArch64::LDRHroX
:
1014 case AArch64::LDRQroW
:
1015 case AArch64::LDRQroX
:
1016 case AArch64::LDRSBWroW
:
1017 case AArch64::LDRSBWroX
:
1018 case AArch64::LDRSBXroW
:
1019 case AArch64::LDRSBXroX
:
1020 case AArch64::LDRSHWroW
:
1021 case AArch64::LDRSHWroX
:
1022 case AArch64::LDRSHXroW
:
1023 case AArch64::LDRSHXroX
:
1024 case AArch64::LDRSWroW
:
1025 case AArch64::LDRSWroX
:
1026 case AArch64::LDRSroW
:
1027 case AArch64::LDRSroX
:
1028 case AArch64::LDRWroW
:
1029 case AArch64::LDRWroX
:
1030 case AArch64::LDRXroW
:
1031 case AArch64::LDRXroX
:
1032 case AArch64::PRFMroW
:
1033 case AArch64::PRFMroX
:
1034 case AArch64::STRBBroW
:
1035 case AArch64::STRBBroX
:
1036 case AArch64::STRBroW
:
1037 case AArch64::STRBroX
:
1038 case AArch64::STRDroW
:
1039 case AArch64::STRDroX
:
1040 case AArch64::STRHHroW
:
1041 case AArch64::STRHHroX
:
1042 case AArch64::STRHroW
:
1043 case AArch64::STRHroX
:
1044 case AArch64::STRQroW
:
1045 case AArch64::STRQroX
:
1046 case AArch64::STRSroW
:
1047 case AArch64::STRSroX
:
1048 case AArch64::STRWroW
:
1049 case AArch64::STRWroX
:
1050 case AArch64::STRXroW
:
1051 case AArch64::STRXroX
: {
1052 unsigned IsSigned
= MI
.getOperand(3).getImm();
1058 bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr
&MI
,
1059 unsigned &SrcReg
, unsigned &DstReg
,
1060 unsigned &SubIdx
) const {
1061 switch (MI
.getOpcode()) {
1064 case AArch64::SBFMXri
: // aka sxtw
1065 case AArch64::UBFMXri
: // aka uxtw
1066 // Check for the 32 -> 64 bit extension case, these instructions can do
1068 if (MI
.getOperand(2).getImm() != 0 || MI
.getOperand(3).getImm() != 31)
1070 // This is a signed or unsigned 32 -> 64 bit extension.
1071 SrcReg
= MI
.getOperand(1).getReg();
1072 DstReg
= MI
.getOperand(0).getReg();
1073 SubIdx
= AArch64::sub_32
;
1078 bool AArch64InstrInfo::areMemAccessesTriviallyDisjoint(
1079 MachineInstr
&MIa
, MachineInstr
&MIb
, AliasAnalysis
*AA
) const {
1080 const TargetRegisterInfo
*TRI
= &getRegisterInfo();
1081 unsigned BaseRegA
= 0, BaseRegB
= 0;
1082 int64_t OffsetA
= 0, OffsetB
= 0;
1083 unsigned WidthA
= 0, WidthB
= 0;
1085 assert(MIa
.mayLoadOrStore() && "MIa must be a load or store.");
1086 assert(MIb
.mayLoadOrStore() && "MIb must be a load or store.");
1088 if (MIa
.hasUnmodeledSideEffects() || MIb
.hasUnmodeledSideEffects() ||
1089 MIa
.hasOrderedMemoryRef() || MIb
.hasOrderedMemoryRef())
1092 // Retrieve the base register, offset from the base register and width. Width
1093 // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8). If
1094 // base registers are identical, and the offset of a lower memory access +
1095 // the width doesn't overlap the offset of a higher memory access,
1096 // then the memory accesses are different.
1097 if (getMemOpBaseRegImmOfsWidth(MIa
, BaseRegA
, OffsetA
, WidthA
, TRI
) &&
1098 getMemOpBaseRegImmOfsWidth(MIb
, BaseRegB
, OffsetB
, WidthB
, TRI
)) {
1099 if (BaseRegA
== BaseRegB
) {
1100 int LowOffset
= OffsetA
< OffsetB
? OffsetA
: OffsetB
;
1101 int HighOffset
= OffsetA
< OffsetB
? OffsetB
: OffsetA
;
1102 int LowWidth
= (LowOffset
== OffsetA
) ? WidthA
: WidthB
;
1103 if (LowOffset
+ LowWidth
<= HighOffset
)
1110 /// analyzeCompare - For a comparison instruction, return the source registers
1111 /// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
1112 /// Return true if the comparison instruction can be analyzed.
1113 bool AArch64InstrInfo::analyzeCompare(const MachineInstr
&MI
, unsigned &SrcReg
,
1114 unsigned &SrcReg2
, int &CmpMask
,
1115 int &CmpValue
) const {
1116 // The first operand can be a frame index where we'd normally expect a
1118 assert(MI
.getNumOperands() >= 2 && "All AArch64 cmps should have 2 operands");
1119 if (!MI
.getOperand(1).isReg())
1122 switch (MI
.getOpcode()) {
1125 case AArch64::SUBSWrr
:
1126 case AArch64::SUBSWrs
:
1127 case AArch64::SUBSWrx
:
1128 case AArch64::SUBSXrr
:
1129 case AArch64::SUBSXrs
:
1130 case AArch64::SUBSXrx
:
1131 case AArch64::ADDSWrr
:
1132 case AArch64::ADDSWrs
:
1133 case AArch64::ADDSWrx
:
1134 case AArch64::ADDSXrr
:
1135 case AArch64::ADDSXrs
:
1136 case AArch64::ADDSXrx
:
1137 // Replace SUBSWrr with SUBWrr if NZCV is not used.
1138 SrcReg
= MI
.getOperand(1).getReg();
1139 SrcReg2
= MI
.getOperand(2).getReg();
1143 case AArch64::SUBSWri
:
1144 case AArch64::ADDSWri
:
1145 case AArch64::SUBSXri
:
1146 case AArch64::ADDSXri
:
1147 SrcReg
= MI
.getOperand(1).getReg();
1150 // FIXME: In order to convert CmpValue to 0 or 1
1151 CmpValue
= MI
.getOperand(2).getImm() != 0;
1153 case AArch64::ANDSWri
:
1154 case AArch64::ANDSXri
:
1155 // ANDS does not use the same encoding scheme as the others xxxS
1157 SrcReg
= MI
.getOperand(1).getReg();
1160 // FIXME:The return val type of decodeLogicalImmediate is uint64_t,
1161 // while the type of CmpValue is int. When converting uint64_t to int,
1162 // the high 32 bits of uint64_t will be lost.
1163 // In fact it causes a bug in spec2006-483.xalancbmk
1164 // CmpValue is only used to compare with zero in OptimizeCompareInstr
1165 CmpValue
= AArch64_AM::decodeLogicalImmediate(
1166 MI
.getOperand(2).getImm(),
1167 MI
.getOpcode() == AArch64::ANDSWri
? 32 : 64) != 0;
1174 static bool UpdateOperandRegClass(MachineInstr
&Instr
) {
1175 MachineBasicBlock
*MBB
= Instr
.getParent();
1176 assert(MBB
&& "Can't get MachineBasicBlock here");
1177 MachineFunction
*MF
= MBB
->getParent();
1178 assert(MF
&& "Can't get MachineFunction here");
1179 const TargetInstrInfo
*TII
= MF
->getSubtarget().getInstrInfo();
1180 const TargetRegisterInfo
*TRI
= MF
->getSubtarget().getRegisterInfo();
1181 MachineRegisterInfo
*MRI
= &MF
->getRegInfo();
1183 for (unsigned OpIdx
= 0, EndIdx
= Instr
.getNumOperands(); OpIdx
< EndIdx
;
1185 MachineOperand
&MO
= Instr
.getOperand(OpIdx
);
1186 const TargetRegisterClass
*OpRegCstraints
=
1187 Instr
.getRegClassConstraint(OpIdx
, TII
, TRI
);
1189 // If there's no constraint, there's nothing to do.
1190 if (!OpRegCstraints
)
1192 // If the operand is a frame index, there's nothing to do here.
1193 // A frame index operand will resolve correctly during PEI.
1197 assert(MO
.isReg() &&
1198 "Operand has register constraints without being a register!");
1200 unsigned Reg
= MO
.getReg();
1201 if (TargetRegisterInfo::isPhysicalRegister(Reg
)) {
1202 if (!OpRegCstraints
->contains(Reg
))
1204 } else if (!OpRegCstraints
->hasSubClassEq(MRI
->getRegClass(Reg
)) &&
1205 !MRI
->constrainRegClass(Reg
, OpRegCstraints
))
1212 /// Return the opcode that does not set flags when possible - otherwise
1213 /// return the original opcode. The caller is responsible to do the actual
1214 /// substitution and legality checking.
1215 static unsigned convertToNonFlagSettingOpc(const MachineInstr
&MI
) {
1216 // Don't convert all compare instructions, because for some the zero register
1217 // encoding becomes the sp register.
1218 bool MIDefinesZeroReg
= false;
1219 if (MI
.definesRegister(AArch64::WZR
) || MI
.definesRegister(AArch64::XZR
))
1220 MIDefinesZeroReg
= true;
1222 switch (MI
.getOpcode()) {
1224 return MI
.getOpcode();
1225 case AArch64::ADDSWrr
:
1226 return AArch64::ADDWrr
;
1227 case AArch64::ADDSWri
:
1228 return MIDefinesZeroReg
? AArch64::ADDSWri
: AArch64::ADDWri
;
1229 case AArch64::ADDSWrs
:
1230 return MIDefinesZeroReg
? AArch64::ADDSWrs
: AArch64::ADDWrs
;
1231 case AArch64::ADDSWrx
:
1232 return AArch64::ADDWrx
;
1233 case AArch64::ADDSXrr
:
1234 return AArch64::ADDXrr
;
1235 case AArch64::ADDSXri
:
1236 return MIDefinesZeroReg
? AArch64::ADDSXri
: AArch64::ADDXri
;
1237 case AArch64::ADDSXrs
:
1238 return MIDefinesZeroReg
? AArch64::ADDSXrs
: AArch64::ADDXrs
;
1239 case AArch64::ADDSXrx
:
1240 return AArch64::ADDXrx
;
1241 case AArch64::SUBSWrr
:
1242 return AArch64::SUBWrr
;
1243 case AArch64::SUBSWri
:
1244 return MIDefinesZeroReg
? AArch64::SUBSWri
: AArch64::SUBWri
;
1245 case AArch64::SUBSWrs
:
1246 return MIDefinesZeroReg
? AArch64::SUBSWrs
: AArch64::SUBWrs
;
1247 case AArch64::SUBSWrx
:
1248 return AArch64::SUBWrx
;
1249 case AArch64::SUBSXrr
:
1250 return AArch64::SUBXrr
;
1251 case AArch64::SUBSXri
:
1252 return MIDefinesZeroReg
? AArch64::SUBSXri
: AArch64::SUBXri
;
1253 case AArch64::SUBSXrs
:
1254 return MIDefinesZeroReg
? AArch64::SUBSXrs
: AArch64::SUBXrs
;
1255 case AArch64::SUBSXrx
:
1256 return AArch64::SUBXrx
;
1260 enum AccessKind
{ AK_Write
= 0x01, AK_Read
= 0x10, AK_All
= 0x11 };
1262 /// True when condition flags are accessed (either by writing or reading)
1263 /// on the instruction trace starting at From and ending at To.
1265 /// Note: If From and To are from different blocks it's assumed CC are accessed
1267 static bool areCFlagsAccessedBetweenInstrs(
1268 MachineBasicBlock::iterator From
, MachineBasicBlock::iterator To
,
1269 const TargetRegisterInfo
*TRI
, const AccessKind AccessToCheck
= AK_All
) {
1270 // Early exit if To is at the beginning of the BB.
1271 if (To
== To
->getParent()->begin())
1274 // Check whether the instructions are in the same basic block
1275 // If not, assume the condition flags might get modified somewhere.
1276 if (To
->getParent() != From
->getParent())
1279 // From must be above To.
1280 assert(std::find_if(++To
.getReverse(), To
->getParent()->rend(),
1281 [From
](MachineInstr
&MI
) {
1282 return MI
.getIterator() == From
;
1283 }) != To
->getParent()->rend());
1285 // We iterate backward starting \p To until we hit \p From.
1286 for (--To
; To
!= From
; --To
) {
1287 const MachineInstr
&Instr
= *To
;
1289 if (((AccessToCheck
& AK_Write
) &&
1290 Instr
.modifiesRegister(AArch64::NZCV
, TRI
)) ||
1291 ((AccessToCheck
& AK_Read
) && Instr
.readsRegister(AArch64::NZCV
, TRI
)))
1297 /// Try to optimize a compare instruction. A compare instruction is an
1298 /// instruction which produces AArch64::NZCV. It can be truly compare
1300 /// when there are no uses of its destination register.
1302 /// The following steps are tried in order:
1303 /// 1. Convert CmpInstr into an unconditional version.
1304 /// 2. Remove CmpInstr if above there is an instruction producing a needed
1305 /// condition code or an instruction which can be converted into such an
1307 /// Only comparison with zero is supported.
1308 bool AArch64InstrInfo::optimizeCompareInstr(
1309 MachineInstr
&CmpInstr
, unsigned SrcReg
, unsigned SrcReg2
, int CmpMask
,
1310 int CmpValue
, const MachineRegisterInfo
*MRI
) const {
1311 assert(CmpInstr
.getParent());
1314 // Replace SUBSWrr with SUBWrr if NZCV is not used.
1315 int DeadNZCVIdx
= CmpInstr
.findRegisterDefOperandIdx(AArch64::NZCV
, true);
1316 if (DeadNZCVIdx
!= -1) {
1317 if (CmpInstr
.definesRegister(AArch64::WZR
) ||
1318 CmpInstr
.definesRegister(AArch64::XZR
)) {
1319 CmpInstr
.eraseFromParent();
1322 unsigned Opc
= CmpInstr
.getOpcode();
1323 unsigned NewOpc
= convertToNonFlagSettingOpc(CmpInstr
);
1326 const MCInstrDesc
&MCID
= get(NewOpc
);
1327 CmpInstr
.setDesc(MCID
);
1328 CmpInstr
.RemoveOperand(DeadNZCVIdx
);
1329 bool succeeded
= UpdateOperandRegClass(CmpInstr
);
1331 assert(succeeded
&& "Some operands reg class are incompatible!");
1335 // Continue only if we have a "ri" where immediate is zero.
1336 // FIXME:CmpValue has already been converted to 0 or 1 in analyzeCompare
1338 assert((CmpValue
== 0 || CmpValue
== 1) && "CmpValue must be 0 or 1!");
1339 if (CmpValue
!= 0 || SrcReg2
!= 0)
1342 // CmpInstr is a Compare instruction if destination register is not used.
1343 if (!MRI
->use_nodbg_empty(CmpInstr
.getOperand(0).getReg()))
1346 return substituteCmpToZero(CmpInstr
, SrcReg
, MRI
);
1349 /// Get opcode of S version of Instr.
1350 /// If Instr is S version its opcode is returned.
1351 /// AArch64::INSTRUCTION_LIST_END is returned if Instr does not have S version
1352 /// or we are not interested in it.
1353 static unsigned sForm(MachineInstr
&Instr
) {
1354 switch (Instr
.getOpcode()) {
1356 return AArch64::INSTRUCTION_LIST_END
;
1358 case AArch64::ADDSWrr
:
1359 case AArch64::ADDSWri
:
1360 case AArch64::ADDSXrr
:
1361 case AArch64::ADDSXri
:
1362 case AArch64::SUBSWrr
:
1363 case AArch64::SUBSWri
:
1364 case AArch64::SUBSXrr
:
1365 case AArch64::SUBSXri
:
1366 return Instr
.getOpcode();
1368 case AArch64::ADDWrr
:
1369 return AArch64::ADDSWrr
;
1370 case AArch64::ADDWri
:
1371 return AArch64::ADDSWri
;
1372 case AArch64::ADDXrr
:
1373 return AArch64::ADDSXrr
;
1374 case AArch64::ADDXri
:
1375 return AArch64::ADDSXri
;
1376 case AArch64::ADCWr
:
1377 return AArch64::ADCSWr
;
1378 case AArch64::ADCXr
:
1379 return AArch64::ADCSXr
;
1380 case AArch64::SUBWrr
:
1381 return AArch64::SUBSWrr
;
1382 case AArch64::SUBWri
:
1383 return AArch64::SUBSWri
;
1384 case AArch64::SUBXrr
:
1385 return AArch64::SUBSXrr
;
1386 case AArch64::SUBXri
:
1387 return AArch64::SUBSXri
;
1388 case AArch64::SBCWr
:
1389 return AArch64::SBCSWr
;
1390 case AArch64::SBCXr
:
1391 return AArch64::SBCSXr
;
1392 case AArch64::ANDWri
:
1393 return AArch64::ANDSWri
;
1394 case AArch64::ANDXri
:
1395 return AArch64::ANDSXri
;
1399 /// Check if AArch64::NZCV should be alive in successors of MBB.
1400 static bool areCFlagsAliveInSuccessors(MachineBasicBlock
*MBB
) {
1401 for (auto *BB
: MBB
->successors())
1402 if (BB
->isLiveIn(AArch64::NZCV
))
1415 UsedNZCV() = default;
1417 UsedNZCV
&operator|=(const UsedNZCV
&UsedFlags
) {
1418 this->N
|= UsedFlags
.N
;
1419 this->Z
|= UsedFlags
.Z
;
1420 this->C
|= UsedFlags
.C
;
1421 this->V
|= UsedFlags
.V
;
1426 } // end anonymous namespace
1428 /// Find a condition code used by the instruction.
1429 /// Returns AArch64CC::Invalid if either the instruction does not use condition
1430 /// codes or we don't optimize CmpInstr in the presence of such instructions.
1431 static AArch64CC::CondCode
findCondCodeUsedByInstr(const MachineInstr
&Instr
) {
1432 switch (Instr
.getOpcode()) {
1434 return AArch64CC::Invalid
;
1436 case AArch64::Bcc
: {
1437 int Idx
= Instr
.findRegisterUseOperandIdx(AArch64::NZCV
);
1439 return static_cast<AArch64CC::CondCode
>(Instr
.getOperand(Idx
- 2).getImm());
1442 case AArch64::CSINVWr
:
1443 case AArch64::CSINVXr
:
1444 case AArch64::CSINCWr
:
1445 case AArch64::CSINCXr
:
1446 case AArch64::CSELWr
:
1447 case AArch64::CSELXr
:
1448 case AArch64::CSNEGWr
:
1449 case AArch64::CSNEGXr
:
1450 case AArch64::FCSELSrrr
:
1451 case AArch64::FCSELDrrr
: {
1452 int Idx
= Instr
.findRegisterUseOperandIdx(AArch64::NZCV
);
1454 return static_cast<AArch64CC::CondCode
>(Instr
.getOperand(Idx
- 1).getImm());
1459 static UsedNZCV
getUsedNZCV(AArch64CC::CondCode CC
) {
1460 assert(CC
!= AArch64CC::Invalid
);
1466 case AArch64CC::EQ
: // Z set
1467 case AArch64CC::NE
: // Z clear
1471 case AArch64CC::HI
: // Z clear and C set
1472 case AArch64CC::LS
: // Z set or C clear
1475 case AArch64CC::HS
: // C set
1476 case AArch64CC::LO
: // C clear
1480 case AArch64CC::MI
: // N set
1481 case AArch64CC::PL
: // N clear
1485 case AArch64CC::VS
: // V set
1486 case AArch64CC::VC
: // V clear
1490 case AArch64CC::GT
: // Z clear, N and V the same
1491 case AArch64CC::LE
: // Z set, N and V differ
1494 case AArch64CC::GE
: // N and V the same
1495 case AArch64CC::LT
: // N and V differ
1503 static bool isADDSRegImm(unsigned Opcode
) {
1504 return Opcode
== AArch64::ADDSWri
|| Opcode
== AArch64::ADDSXri
;
1507 static bool isSUBSRegImm(unsigned Opcode
) {
1508 return Opcode
== AArch64::SUBSWri
|| Opcode
== AArch64::SUBSXri
;
1511 /// Check if CmpInstr can be substituted by MI.
1513 /// CmpInstr can be substituted:
1514 /// - CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
1515 /// - and, MI and CmpInstr are from the same MachineBB
1516 /// - and, condition flags are not alive in successors of the CmpInstr parent
1517 /// - and, if MI opcode is the S form there must be no defs of flags between
1519 /// or if MI opcode is not the S form there must be neither defs of flags
1520 /// nor uses of flags between MI and CmpInstr.
1521 /// - and C/V flags are not used after CmpInstr
1522 static bool canInstrSubstituteCmpInstr(MachineInstr
*MI
, MachineInstr
*CmpInstr
,
1523 const TargetRegisterInfo
*TRI
) {
1525 assert(sForm(*MI
) != AArch64::INSTRUCTION_LIST_END
);
1528 const unsigned CmpOpcode
= CmpInstr
->getOpcode();
1529 if (!isADDSRegImm(CmpOpcode
) && !isSUBSRegImm(CmpOpcode
))
1532 if (MI
->getParent() != CmpInstr
->getParent())
1535 if (areCFlagsAliveInSuccessors(CmpInstr
->getParent()))
1538 AccessKind AccessToCheck
= AK_Write
;
1539 if (sForm(*MI
) != MI
->getOpcode())
1540 AccessToCheck
= AK_All
;
1541 if (areCFlagsAccessedBetweenInstrs(MI
, CmpInstr
, TRI
, AccessToCheck
))
1544 UsedNZCV NZCVUsedAfterCmp
;
1545 for (auto I
= std::next(CmpInstr
->getIterator()),
1546 E
= CmpInstr
->getParent()->instr_end();
1548 const MachineInstr
&Instr
= *I
;
1549 if (Instr
.readsRegister(AArch64::NZCV
, TRI
)) {
1550 AArch64CC::CondCode CC
= findCondCodeUsedByInstr(Instr
);
1551 if (CC
== AArch64CC::Invalid
) // Unsupported conditional instruction
1553 NZCVUsedAfterCmp
|= getUsedNZCV(CC
);
1556 if (Instr
.modifiesRegister(AArch64::NZCV
, TRI
))
1560 return !NZCVUsedAfterCmp
.C
&& !NZCVUsedAfterCmp
.V
;
1563 /// Substitute an instruction comparing to zero with another instruction
1564 /// which produces needed condition flags.
1566 /// Return true on success.
1567 bool AArch64InstrInfo::substituteCmpToZero(
1568 MachineInstr
&CmpInstr
, unsigned SrcReg
,
1569 const MachineRegisterInfo
*MRI
) const {
1571 // Get the unique definition of SrcReg.
1572 MachineInstr
*MI
= MRI
->getUniqueVRegDef(SrcReg
);
1576 const TargetRegisterInfo
*TRI
= &getRegisterInfo();
1578 unsigned NewOpc
= sForm(*MI
);
1579 if (NewOpc
== AArch64::INSTRUCTION_LIST_END
)
1582 if (!canInstrSubstituteCmpInstr(MI
, &CmpInstr
, TRI
))
1585 // Update the instruction to set NZCV.
1586 MI
->setDesc(get(NewOpc
));
1587 CmpInstr
.eraseFromParent();
1588 bool succeeded
= UpdateOperandRegClass(*MI
);
1590 assert(succeeded
&& "Some operands reg class are incompatible!");
1591 MI
->addRegisterDefined(AArch64::NZCV
, TRI
);
1595 bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr
&MI
) const {
1596 if (MI
.getOpcode() != TargetOpcode::LOAD_STACK_GUARD
)
1599 MachineBasicBlock
&MBB
= *MI
.getParent();
1600 DebugLoc DL
= MI
.getDebugLoc();
1601 unsigned Reg
= MI
.getOperand(0).getReg();
1602 const GlobalValue
*GV
=
1603 cast
<GlobalValue
>((*MI
.memoperands_begin())->getValue());
1604 const TargetMachine
&TM
= MBB
.getParent()->getTarget();
1605 unsigned char OpFlags
= Subtarget
.ClassifyGlobalReference(GV
, TM
);
1606 const unsigned char MO_NC
= AArch64II::MO_NC
;
1608 if ((OpFlags
& AArch64II::MO_GOT
) != 0) {
1609 BuildMI(MBB
, MI
, DL
, get(AArch64::LOADgot
), Reg
)
1610 .addGlobalAddress(GV
, 0, AArch64II::MO_GOT
);
1611 BuildMI(MBB
, MI
, DL
, get(AArch64::LDRXui
), Reg
)
1612 .addReg(Reg
, RegState::Kill
)
1614 .addMemOperand(*MI
.memoperands_begin());
1615 } else if (TM
.getCodeModel() == CodeModel::Large
) {
1616 BuildMI(MBB
, MI
, DL
, get(AArch64::MOVZXi
), Reg
)
1617 .addGlobalAddress(GV
, 0, AArch64II::MO_G0
| MO_NC
)
1619 BuildMI(MBB
, MI
, DL
, get(AArch64::MOVKXi
), Reg
)
1620 .addReg(Reg
, RegState::Kill
)
1621 .addGlobalAddress(GV
, 0, AArch64II::MO_G1
| MO_NC
)
1623 BuildMI(MBB
, MI
, DL
, get(AArch64::MOVKXi
), Reg
)
1624 .addReg(Reg
, RegState::Kill
)
1625 .addGlobalAddress(GV
, 0, AArch64II::MO_G2
| MO_NC
)
1627 BuildMI(MBB
, MI
, DL
, get(AArch64::MOVKXi
), Reg
)
1628 .addReg(Reg
, RegState::Kill
)
1629 .addGlobalAddress(GV
, 0, AArch64II::MO_G3
)
1631 BuildMI(MBB
, MI
, DL
, get(AArch64::LDRXui
), Reg
)
1632 .addReg(Reg
, RegState::Kill
)
1634 .addMemOperand(*MI
.memoperands_begin());
1636 BuildMI(MBB
, MI
, DL
, get(AArch64::ADRP
), Reg
)
1637 .addGlobalAddress(GV
, 0, OpFlags
| AArch64II::MO_PAGE
);
1638 unsigned char LoFlags
= OpFlags
| AArch64II::MO_PAGEOFF
| MO_NC
;
1639 BuildMI(MBB
, MI
, DL
, get(AArch64::LDRXui
), Reg
)
1640 .addReg(Reg
, RegState::Kill
)
1641 .addGlobalAddress(GV
, 0, LoFlags
)
1642 .addMemOperand(*MI
.memoperands_begin());
1650 /// Return true if this is this instruction has a non-zero immediate
1651 bool AArch64InstrInfo::hasShiftedReg(const MachineInstr
&MI
) {
1652 switch (MI
.getOpcode()) {
1655 case AArch64::ADDSWrs
:
1656 case AArch64::ADDSXrs
:
1657 case AArch64::ADDWrs
:
1658 case AArch64::ADDXrs
:
1659 case AArch64::ANDSWrs
:
1660 case AArch64::ANDSXrs
:
1661 case AArch64::ANDWrs
:
1662 case AArch64::ANDXrs
:
1663 case AArch64::BICSWrs
:
1664 case AArch64::BICSXrs
:
1665 case AArch64::BICWrs
:
1666 case AArch64::BICXrs
:
1667 case AArch64::EONWrs
:
1668 case AArch64::EONXrs
:
1669 case AArch64::EORWrs
:
1670 case AArch64::EORXrs
:
1671 case AArch64::ORNWrs
:
1672 case AArch64::ORNXrs
:
1673 case AArch64::ORRWrs
:
1674 case AArch64::ORRXrs
:
1675 case AArch64::SUBSWrs
:
1676 case AArch64::SUBSXrs
:
1677 case AArch64::SUBWrs
:
1678 case AArch64::SUBXrs
:
1679 if (MI
.getOperand(3).isImm()) {
1680 unsigned val
= MI
.getOperand(3).getImm();
1688 /// Return true if this is this instruction has a non-zero immediate
1689 bool AArch64InstrInfo::hasExtendedReg(const MachineInstr
&MI
) {
1690 switch (MI
.getOpcode()) {
1693 case AArch64::ADDSWrx
:
1694 case AArch64::ADDSXrx
:
1695 case AArch64::ADDSXrx64
:
1696 case AArch64::ADDWrx
:
1697 case AArch64::ADDXrx
:
1698 case AArch64::ADDXrx64
:
1699 case AArch64::SUBSWrx
:
1700 case AArch64::SUBSXrx
:
1701 case AArch64::SUBSXrx64
:
1702 case AArch64::SUBWrx
:
1703 case AArch64::SUBXrx
:
1704 case AArch64::SUBXrx64
:
1705 if (MI
.getOperand(3).isImm()) {
1706 unsigned val
= MI
.getOperand(3).getImm();
1715 // Return true if this instruction simply sets its single destination register
1716 // to zero. This is equivalent to a register rename of the zero-register.
1717 bool AArch64InstrInfo::isGPRZero(const MachineInstr
&MI
) {
1718 switch (MI
.getOpcode()) {
1721 case AArch64::MOVZWi
:
1722 case AArch64::MOVZXi
: // movz Rd, #0 (LSL #0)
1723 if (MI
.getOperand(1).isImm() && MI
.getOperand(1).getImm() == 0) {
1724 assert(MI
.getDesc().getNumOperands() == 3 &&
1725 MI
.getOperand(2).getImm() == 0 && "invalid MOVZi operands");
1729 case AArch64::ANDWri
: // and Rd, Rzr, #imm
1730 return MI
.getOperand(1).getReg() == AArch64::WZR
;
1731 case AArch64::ANDXri
:
1732 return MI
.getOperand(1).getReg() == AArch64::XZR
;
1733 case TargetOpcode::COPY
:
1734 return MI
.getOperand(1).getReg() == AArch64::WZR
;
1739 // Return true if this instruction simply renames a general register without
1741 bool AArch64InstrInfo::isGPRCopy(const MachineInstr
&MI
) {
1742 switch (MI
.getOpcode()) {
1745 case TargetOpcode::COPY
: {
1746 // GPR32 copies will by lowered to ORRXrs
1747 unsigned DstReg
= MI
.getOperand(0).getReg();
1748 return (AArch64::GPR32RegClass
.contains(DstReg
) ||
1749 AArch64::GPR64RegClass
.contains(DstReg
));
1751 case AArch64::ORRXrs
: // orr Xd, Xzr, Xm (LSL #0)
1752 if (MI
.getOperand(1).getReg() == AArch64::XZR
) {
1753 assert(MI
.getDesc().getNumOperands() == 4 &&
1754 MI
.getOperand(3).getImm() == 0 && "invalid ORRrs operands");
1758 case AArch64::ADDXri
: // add Xd, Xn, #0 (LSL #0)
1759 if (MI
.getOperand(2).getImm() == 0) {
1760 assert(MI
.getDesc().getNumOperands() == 4 &&
1761 MI
.getOperand(3).getImm() == 0 && "invalid ADDXri operands");
1769 // Return true if this instruction simply renames a general register without
1771 bool AArch64InstrInfo::isFPRCopy(const MachineInstr
&MI
) {
1772 switch (MI
.getOpcode()) {
1775 case TargetOpcode::COPY
: {
1776 // FPR64 copies will by lowered to ORR.16b
1777 unsigned DstReg
= MI
.getOperand(0).getReg();
1778 return (AArch64::FPR64RegClass
.contains(DstReg
) ||
1779 AArch64::FPR128RegClass
.contains(DstReg
));
1781 case AArch64::ORRv16i8
:
1782 if (MI
.getOperand(1).getReg() == MI
.getOperand(2).getReg()) {
1783 assert(MI
.getDesc().getNumOperands() == 3 && MI
.getOperand(0).isReg() &&
1784 "invalid ORRv16i8 operands");
1792 unsigned AArch64InstrInfo::isLoadFromStackSlot(const MachineInstr
&MI
,
1793 int &FrameIndex
) const {
1794 switch (MI
.getOpcode()) {
1797 case AArch64::LDRWui
:
1798 case AArch64::LDRXui
:
1799 case AArch64::LDRBui
:
1800 case AArch64::LDRHui
:
1801 case AArch64::LDRSui
:
1802 case AArch64::LDRDui
:
1803 case AArch64::LDRQui
:
1804 if (MI
.getOperand(0).getSubReg() == 0 && MI
.getOperand(1).isFI() &&
1805 MI
.getOperand(2).isImm() && MI
.getOperand(2).getImm() == 0) {
1806 FrameIndex
= MI
.getOperand(1).getIndex();
1807 return MI
.getOperand(0).getReg();
1815 unsigned AArch64InstrInfo::isStoreToStackSlot(const MachineInstr
&MI
,
1816 int &FrameIndex
) const {
1817 switch (MI
.getOpcode()) {
1820 case AArch64::STRWui
:
1821 case AArch64::STRXui
:
1822 case AArch64::STRBui
:
1823 case AArch64::STRHui
:
1824 case AArch64::STRSui
:
1825 case AArch64::STRDui
:
1826 case AArch64::STRQui
:
1827 if (MI
.getOperand(0).getSubReg() == 0 && MI
.getOperand(1).isFI() &&
1828 MI
.getOperand(2).isImm() && MI
.getOperand(2).getImm() == 0) {
1829 FrameIndex
= MI
.getOperand(1).getIndex();
1830 return MI
.getOperand(0).getReg();
1837 /// Return true if this is load/store scales or extends its register offset.
1838 /// This refers to scaling a dynamic index as opposed to scaled immediates.
1839 /// MI should be a memory op that allows scaled addressing.
1840 bool AArch64InstrInfo::isScaledAddr(const MachineInstr
&MI
) {
1841 switch (MI
.getOpcode()) {
1844 case AArch64::LDRBBroW
:
1845 case AArch64::LDRBroW
:
1846 case AArch64::LDRDroW
:
1847 case AArch64::LDRHHroW
:
1848 case AArch64::LDRHroW
:
1849 case AArch64::LDRQroW
:
1850 case AArch64::LDRSBWroW
:
1851 case AArch64::LDRSBXroW
:
1852 case AArch64::LDRSHWroW
:
1853 case AArch64::LDRSHXroW
:
1854 case AArch64::LDRSWroW
:
1855 case AArch64::LDRSroW
:
1856 case AArch64::LDRWroW
:
1857 case AArch64::LDRXroW
:
1858 case AArch64::STRBBroW
:
1859 case AArch64::STRBroW
:
1860 case AArch64::STRDroW
:
1861 case AArch64::STRHHroW
:
1862 case AArch64::STRHroW
:
1863 case AArch64::STRQroW
:
1864 case AArch64::STRSroW
:
1865 case AArch64::STRWroW
:
1866 case AArch64::STRXroW
:
1867 case AArch64::LDRBBroX
:
1868 case AArch64::LDRBroX
:
1869 case AArch64::LDRDroX
:
1870 case AArch64::LDRHHroX
:
1871 case AArch64::LDRHroX
:
1872 case AArch64::LDRQroX
:
1873 case AArch64::LDRSBWroX
:
1874 case AArch64::LDRSBXroX
:
1875 case AArch64::LDRSHWroX
:
1876 case AArch64::LDRSHXroX
:
1877 case AArch64::LDRSWroX
:
1878 case AArch64::LDRSroX
:
1879 case AArch64::LDRWroX
:
1880 case AArch64::LDRXroX
:
1881 case AArch64::STRBBroX
:
1882 case AArch64::STRBroX
:
1883 case AArch64::STRDroX
:
1884 case AArch64::STRHHroX
:
1885 case AArch64::STRHroX
:
1886 case AArch64::STRQroX
:
1887 case AArch64::STRSroX
:
1888 case AArch64::STRWroX
:
1889 case AArch64::STRXroX
:
1891 unsigned Val
= MI
.getOperand(3).getImm();
1892 AArch64_AM::ShiftExtendType ExtType
= AArch64_AM::getMemExtendType(Val
);
1893 return (ExtType
!= AArch64_AM::UXTX
) || AArch64_AM::getMemDoShift(Val
);
1898 /// Check all MachineMemOperands for a hint to suppress pairing.
1899 bool AArch64InstrInfo::isLdStPairSuppressed(const MachineInstr
&MI
) {
1900 return llvm::any_of(MI
.memoperands(), [](MachineMemOperand
*MMO
) {
1901 return MMO
->getFlags() & MOSuppressPair
;
1905 /// Set a flag on the first MachineMemOperand to suppress pairing.
1906 void AArch64InstrInfo::suppressLdStPair(MachineInstr
&MI
) {
1907 if (MI
.memoperands_empty())
1909 (*MI
.memoperands_begin())->setFlags(MOSuppressPair
);
1912 /// Check all MachineMemOperands for a hint that the load/store is strided.
1913 bool AArch64InstrInfo::isStridedAccess(const MachineInstr
&MI
) {
1914 return llvm::any_of(MI
.memoperands(), [](MachineMemOperand
*MMO
) {
1915 return MMO
->getFlags() & MOStridedAccess
;
1919 bool AArch64InstrInfo::isUnscaledLdSt(unsigned Opc
) {
1923 case AArch64::STURSi
:
1924 case AArch64::STURDi
:
1925 case AArch64::STURQi
:
1926 case AArch64::STURBBi
:
1927 case AArch64::STURHHi
:
1928 case AArch64::STURWi
:
1929 case AArch64::STURXi
:
1930 case AArch64::LDURSi
:
1931 case AArch64::LDURDi
:
1932 case AArch64::LDURQi
:
1933 case AArch64::LDURWi
:
1934 case AArch64::LDURXi
:
1935 case AArch64::LDURSWi
:
1936 case AArch64::LDURHHi
:
1937 case AArch64::LDURBBi
:
1938 case AArch64::LDURSBWi
:
1939 case AArch64::LDURSHWi
:
1944 bool AArch64InstrInfo::isPairableLdStInst(const MachineInstr
&MI
) {
1945 switch (MI
.getOpcode()) {
1948 // Scaled instructions.
1949 case AArch64::STRSui
:
1950 case AArch64::STRDui
:
1951 case AArch64::STRQui
:
1952 case AArch64::STRXui
:
1953 case AArch64::STRWui
:
1954 case AArch64::LDRSui
:
1955 case AArch64::LDRDui
:
1956 case AArch64::LDRQui
:
1957 case AArch64::LDRXui
:
1958 case AArch64::LDRWui
:
1959 case AArch64::LDRSWui
:
1960 // Unscaled instructions.
1961 case AArch64::STURSi
:
1962 case AArch64::STURDi
:
1963 case AArch64::STURQi
:
1964 case AArch64::STURWi
:
1965 case AArch64::STURXi
:
1966 case AArch64::LDURSi
:
1967 case AArch64::LDURDi
:
1968 case AArch64::LDURQi
:
1969 case AArch64::LDURWi
:
1970 case AArch64::LDURXi
:
1971 case AArch64::LDURSWi
:
1976 unsigned AArch64InstrInfo::convertToFlagSettingOpc(unsigned Opc
,
1980 llvm_unreachable("Opcode has no flag setting equivalent!");
1982 case AArch64::ADDWri
:
1984 return AArch64::ADDSWri
;
1985 case AArch64::ADDWrr
:
1987 return AArch64::ADDSWrr
;
1988 case AArch64::ADDWrs
:
1990 return AArch64::ADDSWrs
;
1991 case AArch64::ADDWrx
:
1993 return AArch64::ADDSWrx
;
1994 case AArch64::ANDWri
:
1996 return AArch64::ANDSWri
;
1997 case AArch64::ANDWrr
:
1999 return AArch64::ANDSWrr
;
2000 case AArch64::ANDWrs
:
2002 return AArch64::ANDSWrs
;
2003 case AArch64::BICWrr
:
2005 return AArch64::BICSWrr
;
2006 case AArch64::BICWrs
:
2008 return AArch64::BICSWrs
;
2009 case AArch64::SUBWri
:
2011 return AArch64::SUBSWri
;
2012 case AArch64::SUBWrr
:
2014 return AArch64::SUBSWrr
;
2015 case AArch64::SUBWrs
:
2017 return AArch64::SUBSWrs
;
2018 case AArch64::SUBWrx
:
2020 return AArch64::SUBSWrx
;
2022 case AArch64::ADDXri
:
2024 return AArch64::ADDSXri
;
2025 case AArch64::ADDXrr
:
2027 return AArch64::ADDSXrr
;
2028 case AArch64::ADDXrs
:
2030 return AArch64::ADDSXrs
;
2031 case AArch64::ADDXrx
:
2033 return AArch64::ADDSXrx
;
2034 case AArch64::ANDXri
:
2036 return AArch64::ANDSXri
;
2037 case AArch64::ANDXrr
:
2039 return AArch64::ANDSXrr
;
2040 case AArch64::ANDXrs
:
2042 return AArch64::ANDSXrs
;
2043 case AArch64::BICXrr
:
2045 return AArch64::BICSXrr
;
2046 case AArch64::BICXrs
:
2048 return AArch64::BICSXrs
;
2049 case AArch64::SUBXri
:
2051 return AArch64::SUBSXri
;
2052 case AArch64::SUBXrr
:
2054 return AArch64::SUBSXrr
;
2055 case AArch64::SUBXrs
:
2057 return AArch64::SUBSXrs
;
2058 case AArch64::SUBXrx
:
2060 return AArch64::SUBSXrx
;
2064 // Is this a candidate for ld/st merging or pairing? For example, we don't
2065 // touch volatiles or load/stores that have a hint to avoid pair formation.
2066 bool AArch64InstrInfo::isCandidateToMergeOrPair(MachineInstr
&MI
) const {
2067 // If this is a volatile load/store, don't mess with it.
2068 if (MI
.hasOrderedMemoryRef())
2071 // Make sure this is a reg+imm (as opposed to an address reloc).
2072 assert(MI
.getOperand(1).isReg() && "Expected a reg operand.");
2073 if (!MI
.getOperand(2).isImm())
2076 // Can't merge/pair if the instruction modifies the base register.
2077 // e.g., ldr x0, [x0]
2078 unsigned BaseReg
= MI
.getOperand(1).getReg();
2079 const TargetRegisterInfo
*TRI
= &getRegisterInfo();
2080 if (MI
.modifiesRegister(BaseReg
, TRI
))
2083 // Check if this load/store has a hint to avoid pair formation.
2084 // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
2085 if (isLdStPairSuppressed(MI
))
2088 // On some CPUs quad load/store pairs are slower than two single load/stores.
2089 if (Subtarget
.isPaired128Slow()) {
2090 switch (MI
.getOpcode()) {
2093 case AArch64::LDURQi
:
2094 case AArch64::STURQi
:
2095 case AArch64::LDRQui
:
2096 case AArch64::STRQui
:
2104 bool AArch64InstrInfo::getMemOpBaseRegImmOfs(
2105 MachineInstr
&LdSt
, unsigned &BaseReg
, int64_t &Offset
,
2106 const TargetRegisterInfo
*TRI
) const {
2108 return getMemOpBaseRegImmOfsWidth(LdSt
, BaseReg
, Offset
, Width
, TRI
);
2111 bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth(
2112 MachineInstr
&LdSt
, unsigned &BaseReg
, int64_t &Offset
, unsigned &Width
,
2113 const TargetRegisterInfo
*TRI
) const {
2114 assert(LdSt
.mayLoadOrStore() && "Expected a memory operation.");
2115 // Handle only loads/stores with base register followed by immediate offset.
2116 if (LdSt
.getNumExplicitOperands() == 3) {
2117 // Non-paired instruction (e.g., ldr x1, [x0, #8]).
2118 if (!LdSt
.getOperand(1).isReg() || !LdSt
.getOperand(2).isImm())
2120 } else if (LdSt
.getNumExplicitOperands() == 4) {
2121 // Paired instruction (e.g., ldp x1, x2, [x0, #8]).
2122 if (!LdSt
.getOperand(1).isReg() || !LdSt
.getOperand(2).isReg() ||
2123 !LdSt
.getOperand(3).isImm())
2128 // Get the scaling factor for the instruction and set the width for the
2131 int64_t Dummy1
, Dummy2
;
2133 // If this returns false, then it's an instruction we don't want to handle.
2134 if (!getMemOpInfo(LdSt
.getOpcode(), Scale
, Width
, Dummy1
, Dummy2
))
2137 // Compute the offset. Offset is calculated as the immediate operand
2138 // multiplied by the scaling factor. Unscaled instructions have scaling factor
2140 if (LdSt
.getNumExplicitOperands() == 3) {
2141 BaseReg
= LdSt
.getOperand(1).getReg();
2142 Offset
= LdSt
.getOperand(2).getImm() * Scale
;
2144 assert(LdSt
.getNumExplicitOperands() == 4 && "invalid number of operands");
2145 BaseReg
= LdSt
.getOperand(2).getReg();
2146 Offset
= LdSt
.getOperand(3).getImm() * Scale
;
2152 AArch64InstrInfo::getMemOpBaseRegImmOfsOffsetOperand(MachineInstr
&LdSt
) const {
2153 assert(LdSt
.mayLoadOrStore() && "Expected a memory operation.");
2154 MachineOperand
&OfsOp
= LdSt
.getOperand(LdSt
.getNumExplicitOperands() - 1);
2155 assert(OfsOp
.isImm() && "Offset operand wasn't immediate.");
2159 bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode
, unsigned &Scale
,
2160 unsigned &Width
, int64_t &MinOffset
,
2161 int64_t &MaxOffset
) const {
2163 // Not a memory operation or something we want to handle.
2166 MinOffset
= MaxOffset
= 0;
2168 case AArch64::STRWpost
:
2169 case AArch64::LDRWpost
:
2175 case AArch64::LDURQi
:
2176 case AArch64::STURQi
:
2182 case AArch64::LDURXi
:
2183 case AArch64::LDURDi
:
2184 case AArch64::STURXi
:
2185 case AArch64::STURDi
:
2191 case AArch64::LDURWi
:
2192 case AArch64::LDURSi
:
2193 case AArch64::LDURSWi
:
2194 case AArch64::STURWi
:
2195 case AArch64::STURSi
:
2201 case AArch64::LDURHi
:
2202 case AArch64::LDURHHi
:
2203 case AArch64::LDURSHXi
:
2204 case AArch64::LDURSHWi
:
2205 case AArch64::STURHi
:
2206 case AArch64::STURHHi
:
2212 case AArch64::LDURBi
:
2213 case AArch64::LDURBBi
:
2214 case AArch64::LDURSBXi
:
2215 case AArch64::LDURSBWi
:
2216 case AArch64::STURBi
:
2217 case AArch64::STURBBi
:
2223 case AArch64::LDPQi
:
2224 case AArch64::LDNPQi
:
2225 case AArch64::STPQi
:
2226 case AArch64::STNPQi
:
2232 case AArch64::LDRQui
:
2233 case AArch64::STRQui
:
2238 case AArch64::LDPXi
:
2239 case AArch64::LDPDi
:
2240 case AArch64::LDNPXi
:
2241 case AArch64::LDNPDi
:
2242 case AArch64::STPXi
:
2243 case AArch64::STPDi
:
2244 case AArch64::STNPXi
:
2245 case AArch64::STNPDi
:
2251 case AArch64::LDRXui
:
2252 case AArch64::LDRDui
:
2253 case AArch64::STRXui
:
2254 case AArch64::STRDui
:
2259 case AArch64::LDPWi
:
2260 case AArch64::LDPSi
:
2261 case AArch64::LDNPWi
:
2262 case AArch64::LDNPSi
:
2263 case AArch64::STPWi
:
2264 case AArch64::STPSi
:
2265 case AArch64::STNPWi
:
2266 case AArch64::STNPSi
:
2272 case AArch64::LDRWui
:
2273 case AArch64::LDRSui
:
2274 case AArch64::LDRSWui
:
2275 case AArch64::STRWui
:
2276 case AArch64::STRSui
:
2281 case AArch64::LDRHui
:
2282 case AArch64::LDRHHui
:
2283 case AArch64::STRHui
:
2284 case AArch64::STRHHui
:
2289 case AArch64::LDRBui
:
2290 case AArch64::LDRBBui
:
2291 case AArch64::STRBui
:
2292 case AArch64::STRBBui
:
2302 // Scale the unscaled offsets. Returns false if the unscaled offset can't be
2304 static bool scaleOffset(unsigned Opc
, int64_t &Offset
) {
2305 unsigned OffsetStride
= 1;
2309 case AArch64::LDURQi
:
2310 case AArch64::STURQi
:
2313 case AArch64::LDURXi
:
2314 case AArch64::LDURDi
:
2315 case AArch64::STURXi
:
2316 case AArch64::STURDi
:
2319 case AArch64::LDURWi
:
2320 case AArch64::LDURSi
:
2321 case AArch64::LDURSWi
:
2322 case AArch64::STURWi
:
2323 case AArch64::STURSi
:
2327 // If the byte-offset isn't a multiple of the stride, we can't scale this
2329 if (Offset
% OffsetStride
!= 0)
2332 // Convert the byte-offset used by unscaled into an "element" offset used
2333 // by the scaled pair load/store instructions.
2334 Offset
/= OffsetStride
;
2338 static bool canPairLdStOpc(unsigned FirstOpc
, unsigned SecondOpc
) {
2339 if (FirstOpc
== SecondOpc
)
2341 // We can also pair sign-ext and zero-ext instructions.
2345 case AArch64::LDRWui
:
2346 case AArch64::LDURWi
:
2347 return SecondOpc
== AArch64::LDRSWui
|| SecondOpc
== AArch64::LDURSWi
;
2348 case AArch64::LDRSWui
:
2349 case AArch64::LDURSWi
:
2350 return SecondOpc
== AArch64::LDRWui
|| SecondOpc
== AArch64::LDURWi
;
2352 // These instructions can't be paired based on their opcodes.
2356 /// Detect opportunities for ldp/stp formation.
2358 /// Only called for LdSt for which getMemOpBaseRegImmOfs returns true.
2359 bool AArch64InstrInfo::shouldClusterMemOps(MachineInstr
&FirstLdSt
,
2361 MachineInstr
&SecondLdSt
,
2363 unsigned NumLoads
) const {
2364 if (BaseReg1
!= BaseReg2
)
2367 // Only cluster up to a single pair.
2371 if (!isPairableLdStInst(FirstLdSt
) || !isPairableLdStInst(SecondLdSt
))
2374 // Can we pair these instructions based on their opcodes?
2375 unsigned FirstOpc
= FirstLdSt
.getOpcode();
2376 unsigned SecondOpc
= SecondLdSt
.getOpcode();
2377 if (!canPairLdStOpc(FirstOpc
, SecondOpc
))
2380 // Can't merge volatiles or load/stores that have a hint to avoid pair
2381 // formation, for example.
2382 if (!isCandidateToMergeOrPair(FirstLdSt
) ||
2383 !isCandidateToMergeOrPair(SecondLdSt
))
2386 // isCandidateToMergeOrPair guarantees that operand 2 is an immediate.
2387 int64_t Offset1
= FirstLdSt
.getOperand(2).getImm();
2388 if (isUnscaledLdSt(FirstOpc
) && !scaleOffset(FirstOpc
, Offset1
))
2391 int64_t Offset2
= SecondLdSt
.getOperand(2).getImm();
2392 if (isUnscaledLdSt(SecondOpc
) && !scaleOffset(SecondOpc
, Offset2
))
2395 // Pairwise instructions have a 7-bit signed offset field.
2396 if (Offset1
> 63 || Offset1
< -64)
2399 // The caller should already have ordered First/SecondLdSt by offset.
2400 assert(Offset1
<= Offset2
&& "Caller should have ordered offsets.");
2401 return Offset1
+ 1 == Offset2
;
2404 static const MachineInstrBuilder
&AddSubReg(const MachineInstrBuilder
&MIB
,
2405 unsigned Reg
, unsigned SubIdx
,
2407 const TargetRegisterInfo
*TRI
) {
2409 return MIB
.addReg(Reg
, State
);
2411 if (TargetRegisterInfo::isPhysicalRegister(Reg
))
2412 return MIB
.addReg(TRI
->getSubReg(Reg
, SubIdx
), State
);
2413 return MIB
.addReg(Reg
, State
, SubIdx
);
2416 static bool forwardCopyWillClobberTuple(unsigned DestReg
, unsigned SrcReg
,
2418 // We really want the positive remainder mod 32 here, that happens to be
2419 // easily obtainable with a mask.
2420 return ((DestReg
- SrcReg
) & 0x1f) < NumRegs
;
2423 void AArch64InstrInfo::copyPhysRegTuple(MachineBasicBlock
&MBB
,
2424 MachineBasicBlock::iterator I
,
2425 const DebugLoc
&DL
, unsigned DestReg
,
2426 unsigned SrcReg
, bool KillSrc
,
2428 ArrayRef
<unsigned> Indices
) const {
2429 assert(Subtarget
.hasNEON() && "Unexpected register copy without NEON");
2430 const TargetRegisterInfo
*TRI
= &getRegisterInfo();
2431 uint16_t DestEncoding
= TRI
->getEncodingValue(DestReg
);
2432 uint16_t SrcEncoding
= TRI
->getEncodingValue(SrcReg
);
2433 unsigned NumRegs
= Indices
.size();
2435 int SubReg
= 0, End
= NumRegs
, Incr
= 1;
2436 if (forwardCopyWillClobberTuple(DestEncoding
, SrcEncoding
, NumRegs
)) {
2437 SubReg
= NumRegs
- 1;
2442 for (; SubReg
!= End
; SubReg
+= Incr
) {
2443 const MachineInstrBuilder MIB
= BuildMI(MBB
, I
, DL
, get(Opcode
));
2444 AddSubReg(MIB
, DestReg
, Indices
[SubReg
], RegState::Define
, TRI
);
2445 AddSubReg(MIB
, SrcReg
, Indices
[SubReg
], 0, TRI
);
2446 AddSubReg(MIB
, SrcReg
, Indices
[SubReg
], getKillRegState(KillSrc
), TRI
);
2450 void AArch64InstrInfo::copyPhysReg(MachineBasicBlock
&MBB
,
2451 MachineBasicBlock::iterator I
,
2452 const DebugLoc
&DL
, unsigned DestReg
,
2453 unsigned SrcReg
, bool KillSrc
) const {
2454 if (AArch64::GPR32spRegClass
.contains(DestReg
) &&
2455 (AArch64::GPR32spRegClass
.contains(SrcReg
) || SrcReg
== AArch64::WZR
)) {
2456 const TargetRegisterInfo
*TRI
= &getRegisterInfo();
2458 if (DestReg
== AArch64::WSP
|| SrcReg
== AArch64::WSP
) {
2459 // If either operand is WSP, expand to ADD #0.
2460 if (Subtarget
.hasZeroCycleRegMove()) {
2461 // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move.
2462 unsigned DestRegX
= TRI
->getMatchingSuperReg(DestReg
, AArch64::sub_32
,
2463 &AArch64::GPR64spRegClass
);
2464 unsigned SrcRegX
= TRI
->getMatchingSuperReg(SrcReg
, AArch64::sub_32
,
2465 &AArch64::GPR64spRegClass
);
2466 // This instruction is reading and writing X registers. This may upset
2467 // the register scavenger and machine verifier, so we need to indicate
2468 // that we are reading an undefined value from SrcRegX, but a proper
2469 // value from SrcReg.
2470 BuildMI(MBB
, I
, DL
, get(AArch64::ADDXri
), DestRegX
)
2471 .addReg(SrcRegX
, RegState::Undef
)
2473 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL
, 0))
2474 .addReg(SrcReg
, RegState::Implicit
| getKillRegState(KillSrc
));
2476 BuildMI(MBB
, I
, DL
, get(AArch64::ADDWri
), DestReg
)
2477 .addReg(SrcReg
, getKillRegState(KillSrc
))
2479 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL
, 0));
2481 } else if (SrcReg
== AArch64::WZR
&& Subtarget
.hasZeroCycleZeroing()) {
2482 BuildMI(MBB
, I
, DL
, get(AArch64::MOVZWi
), DestReg
)
2484 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL
, 0));
2486 if (Subtarget
.hasZeroCycleRegMove()) {
2487 // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
2488 unsigned DestRegX
= TRI
->getMatchingSuperReg(DestReg
, AArch64::sub_32
,
2489 &AArch64::GPR64spRegClass
);
2490 unsigned SrcRegX
= TRI
->getMatchingSuperReg(SrcReg
, AArch64::sub_32
,
2491 &AArch64::GPR64spRegClass
);
2492 // This instruction is reading and writing X registers. This may upset
2493 // the register scavenger and machine verifier, so we need to indicate
2494 // that we are reading an undefined value from SrcRegX, but a proper
2495 // value from SrcReg.
2496 BuildMI(MBB
, I
, DL
, get(AArch64::ORRXrr
), DestRegX
)
2497 .addReg(AArch64::XZR
)
2498 .addReg(SrcRegX
, RegState::Undef
)
2499 .addReg(SrcReg
, RegState::Implicit
| getKillRegState(KillSrc
));
2501 // Otherwise, expand to ORR WZR.
2502 BuildMI(MBB
, I
, DL
, get(AArch64::ORRWrr
), DestReg
)
2503 .addReg(AArch64::WZR
)
2504 .addReg(SrcReg
, getKillRegState(KillSrc
));
2510 if (AArch64::GPR64spRegClass
.contains(DestReg
) &&
2511 (AArch64::GPR64spRegClass
.contains(SrcReg
) || SrcReg
== AArch64::XZR
)) {
2512 if (DestReg
== AArch64::SP
|| SrcReg
== AArch64::SP
) {
2513 // If either operand is SP, expand to ADD #0.
2514 BuildMI(MBB
, I
, DL
, get(AArch64::ADDXri
), DestReg
)
2515 .addReg(SrcReg
, getKillRegState(KillSrc
))
2517 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL
, 0));
2518 } else if (SrcReg
== AArch64::XZR
&& Subtarget
.hasZeroCycleZeroing()) {
2519 BuildMI(MBB
, I
, DL
, get(AArch64::MOVZXi
), DestReg
)
2521 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL
, 0));
2523 // Otherwise, expand to ORR XZR.
2524 BuildMI(MBB
, I
, DL
, get(AArch64::ORRXrr
), DestReg
)
2525 .addReg(AArch64::XZR
)
2526 .addReg(SrcReg
, getKillRegState(KillSrc
));
2531 // Copy a DDDD register quad by copying the individual sub-registers.
2532 if (AArch64::DDDDRegClass
.contains(DestReg
) &&
2533 AArch64::DDDDRegClass
.contains(SrcReg
)) {
2534 static const unsigned Indices
[] = {AArch64::dsub0
, AArch64::dsub1
,
2535 AArch64::dsub2
, AArch64::dsub3
};
2536 copyPhysRegTuple(MBB
, I
, DL
, DestReg
, SrcReg
, KillSrc
, AArch64::ORRv8i8
,
2541 // Copy a DDD register triple by copying the individual sub-registers.
2542 if (AArch64::DDDRegClass
.contains(DestReg
) &&
2543 AArch64::DDDRegClass
.contains(SrcReg
)) {
2544 static const unsigned Indices
[] = {AArch64::dsub0
, AArch64::dsub1
,
2546 copyPhysRegTuple(MBB
, I
, DL
, DestReg
, SrcReg
, KillSrc
, AArch64::ORRv8i8
,
2551 // Copy a DD register pair by copying the individual sub-registers.
2552 if (AArch64::DDRegClass
.contains(DestReg
) &&
2553 AArch64::DDRegClass
.contains(SrcReg
)) {
2554 static const unsigned Indices
[] = {AArch64::dsub0
, AArch64::dsub1
};
2555 copyPhysRegTuple(MBB
, I
, DL
, DestReg
, SrcReg
, KillSrc
, AArch64::ORRv8i8
,
2560 // Copy a QQQQ register quad by copying the individual sub-registers.
2561 if (AArch64::QQQQRegClass
.contains(DestReg
) &&
2562 AArch64::QQQQRegClass
.contains(SrcReg
)) {
2563 static const unsigned Indices
[] = {AArch64::qsub0
, AArch64::qsub1
,
2564 AArch64::qsub2
, AArch64::qsub3
};
2565 copyPhysRegTuple(MBB
, I
, DL
, DestReg
, SrcReg
, KillSrc
, AArch64::ORRv16i8
,
2570 // Copy a QQQ register triple by copying the individual sub-registers.
2571 if (AArch64::QQQRegClass
.contains(DestReg
) &&
2572 AArch64::QQQRegClass
.contains(SrcReg
)) {
2573 static const unsigned Indices
[] = {AArch64::qsub0
, AArch64::qsub1
,
2575 copyPhysRegTuple(MBB
, I
, DL
, DestReg
, SrcReg
, KillSrc
, AArch64::ORRv16i8
,
2580 // Copy a QQ register pair by copying the individual sub-registers.
2581 if (AArch64::QQRegClass
.contains(DestReg
) &&
2582 AArch64::QQRegClass
.contains(SrcReg
)) {
2583 static const unsigned Indices
[] = {AArch64::qsub0
, AArch64::qsub1
};
2584 copyPhysRegTuple(MBB
, I
, DL
, DestReg
, SrcReg
, KillSrc
, AArch64::ORRv16i8
,
2589 if (AArch64::FPR128RegClass
.contains(DestReg
) &&
2590 AArch64::FPR128RegClass
.contains(SrcReg
)) {
2591 if (Subtarget
.hasNEON()) {
2592 BuildMI(MBB
, I
, DL
, get(AArch64::ORRv16i8
), DestReg
)
2594 .addReg(SrcReg
, getKillRegState(KillSrc
));
2596 BuildMI(MBB
, I
, DL
, get(AArch64::STRQpre
))
2597 .addReg(AArch64::SP
, RegState::Define
)
2598 .addReg(SrcReg
, getKillRegState(KillSrc
))
2599 .addReg(AArch64::SP
)
2601 BuildMI(MBB
, I
, DL
, get(AArch64::LDRQpre
))
2602 .addReg(AArch64::SP
, RegState::Define
)
2603 .addReg(DestReg
, RegState::Define
)
2604 .addReg(AArch64::SP
)
2610 if (AArch64::FPR64RegClass
.contains(DestReg
) &&
2611 AArch64::FPR64RegClass
.contains(SrcReg
)) {
2612 if (Subtarget
.hasNEON()) {
2613 DestReg
= RI
.getMatchingSuperReg(DestReg
, AArch64::dsub
,
2614 &AArch64::FPR128RegClass
);
2615 SrcReg
= RI
.getMatchingSuperReg(SrcReg
, AArch64::dsub
,
2616 &AArch64::FPR128RegClass
);
2617 BuildMI(MBB
, I
, DL
, get(AArch64::ORRv16i8
), DestReg
)
2619 .addReg(SrcReg
, getKillRegState(KillSrc
));
2621 BuildMI(MBB
, I
, DL
, get(AArch64::FMOVDr
), DestReg
)
2622 .addReg(SrcReg
, getKillRegState(KillSrc
));
2627 if (AArch64::FPR32RegClass
.contains(DestReg
) &&
2628 AArch64::FPR32RegClass
.contains(SrcReg
)) {
2629 if (Subtarget
.hasNEON()) {
2630 DestReg
= RI
.getMatchingSuperReg(DestReg
, AArch64::ssub
,
2631 &AArch64::FPR128RegClass
);
2632 SrcReg
= RI
.getMatchingSuperReg(SrcReg
, AArch64::ssub
,
2633 &AArch64::FPR128RegClass
);
2634 BuildMI(MBB
, I
, DL
, get(AArch64::ORRv16i8
), DestReg
)
2636 .addReg(SrcReg
, getKillRegState(KillSrc
));
2638 BuildMI(MBB
, I
, DL
, get(AArch64::FMOVSr
), DestReg
)
2639 .addReg(SrcReg
, getKillRegState(KillSrc
));
2644 if (AArch64::FPR16RegClass
.contains(DestReg
) &&
2645 AArch64::FPR16RegClass
.contains(SrcReg
)) {
2646 if (Subtarget
.hasNEON()) {
2647 DestReg
= RI
.getMatchingSuperReg(DestReg
, AArch64::hsub
,
2648 &AArch64::FPR128RegClass
);
2649 SrcReg
= RI
.getMatchingSuperReg(SrcReg
, AArch64::hsub
,
2650 &AArch64::FPR128RegClass
);
2651 BuildMI(MBB
, I
, DL
, get(AArch64::ORRv16i8
), DestReg
)
2653 .addReg(SrcReg
, getKillRegState(KillSrc
));
2655 DestReg
= RI
.getMatchingSuperReg(DestReg
, AArch64::hsub
,
2656 &AArch64::FPR32RegClass
);
2657 SrcReg
= RI
.getMatchingSuperReg(SrcReg
, AArch64::hsub
,
2658 &AArch64::FPR32RegClass
);
2659 BuildMI(MBB
, I
, DL
, get(AArch64::FMOVSr
), DestReg
)
2660 .addReg(SrcReg
, getKillRegState(KillSrc
));
2665 if (AArch64::FPR8RegClass
.contains(DestReg
) &&
2666 AArch64::FPR8RegClass
.contains(SrcReg
)) {
2667 if (Subtarget
.hasNEON()) {
2668 DestReg
= RI
.getMatchingSuperReg(DestReg
, AArch64::bsub
,
2669 &AArch64::FPR128RegClass
);
2670 SrcReg
= RI
.getMatchingSuperReg(SrcReg
, AArch64::bsub
,
2671 &AArch64::FPR128RegClass
);
2672 BuildMI(MBB
, I
, DL
, get(AArch64::ORRv16i8
), DestReg
)
2674 .addReg(SrcReg
, getKillRegState(KillSrc
));
2676 DestReg
= RI
.getMatchingSuperReg(DestReg
, AArch64::bsub
,
2677 &AArch64::FPR32RegClass
);
2678 SrcReg
= RI
.getMatchingSuperReg(SrcReg
, AArch64::bsub
,
2679 &AArch64::FPR32RegClass
);
2680 BuildMI(MBB
, I
, DL
, get(AArch64::FMOVSr
), DestReg
)
2681 .addReg(SrcReg
, getKillRegState(KillSrc
));
2686 // Copies between GPR64 and FPR64.
2687 if (AArch64::FPR64RegClass
.contains(DestReg
) &&
2688 AArch64::GPR64RegClass
.contains(SrcReg
)) {
2689 BuildMI(MBB
, I
, DL
, get(AArch64::FMOVXDr
), DestReg
)
2690 .addReg(SrcReg
, getKillRegState(KillSrc
));
2693 if (AArch64::GPR64RegClass
.contains(DestReg
) &&
2694 AArch64::FPR64RegClass
.contains(SrcReg
)) {
2695 BuildMI(MBB
, I
, DL
, get(AArch64::FMOVDXr
), DestReg
)
2696 .addReg(SrcReg
, getKillRegState(KillSrc
));
2699 // Copies between GPR32 and FPR32.
2700 if (AArch64::FPR32RegClass
.contains(DestReg
) &&
2701 AArch64::GPR32RegClass
.contains(SrcReg
)) {
2702 BuildMI(MBB
, I
, DL
, get(AArch64::FMOVWSr
), DestReg
)
2703 .addReg(SrcReg
, getKillRegState(KillSrc
));
2706 if (AArch64::GPR32RegClass
.contains(DestReg
) &&
2707 AArch64::FPR32RegClass
.contains(SrcReg
)) {
2708 BuildMI(MBB
, I
, DL
, get(AArch64::FMOVSWr
), DestReg
)
2709 .addReg(SrcReg
, getKillRegState(KillSrc
));
2713 if (DestReg
== AArch64::NZCV
) {
2714 assert(AArch64::GPR64RegClass
.contains(SrcReg
) && "Invalid NZCV copy");
2715 BuildMI(MBB
, I
, DL
, get(AArch64::MSR
))
2716 .addImm(AArch64SysReg::NZCV
)
2717 .addReg(SrcReg
, getKillRegState(KillSrc
))
2718 .addReg(AArch64::NZCV
, RegState::Implicit
| RegState::Define
);
2722 if (SrcReg
== AArch64::NZCV
) {
2723 assert(AArch64::GPR64RegClass
.contains(DestReg
) && "Invalid NZCV copy");
2724 BuildMI(MBB
, I
, DL
, get(AArch64::MRS
), DestReg
)
2725 .addImm(AArch64SysReg::NZCV
)
2726 .addReg(AArch64::NZCV
, RegState::Implicit
| getKillRegState(KillSrc
));
2730 llvm_unreachable("unimplemented reg-to-reg copy");
2733 void AArch64InstrInfo::storeRegToStackSlot(
2734 MachineBasicBlock
&MBB
, MachineBasicBlock::iterator MBBI
, unsigned SrcReg
,
2735 bool isKill
, int FI
, const TargetRegisterClass
*RC
,
2736 const TargetRegisterInfo
*TRI
) const {
2738 if (MBBI
!= MBB
.end())
2739 DL
= MBBI
->getDebugLoc();
2740 MachineFunction
&MF
= *MBB
.getParent();
2741 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
2742 unsigned Align
= MFI
.getObjectAlignment(FI
);
2744 MachinePointerInfo PtrInfo
= MachinePointerInfo::getFixedStack(MF
, FI
);
2745 MachineMemOperand
*MMO
= MF
.getMachineMemOperand(
2746 PtrInfo
, MachineMemOperand::MOStore
, MFI
.getObjectSize(FI
), Align
);
2749 switch (TRI
->getSpillSize(*RC
)) {
2751 if (AArch64::FPR8RegClass
.hasSubClassEq(RC
))
2752 Opc
= AArch64::STRBui
;
2755 if (AArch64::FPR16RegClass
.hasSubClassEq(RC
))
2756 Opc
= AArch64::STRHui
;
2759 if (AArch64::GPR32allRegClass
.hasSubClassEq(RC
)) {
2760 Opc
= AArch64::STRWui
;
2761 if (TargetRegisterInfo::isVirtualRegister(SrcReg
))
2762 MF
.getRegInfo().constrainRegClass(SrcReg
, &AArch64::GPR32RegClass
);
2764 assert(SrcReg
!= AArch64::WSP
);
2765 } else if (AArch64::FPR32RegClass
.hasSubClassEq(RC
))
2766 Opc
= AArch64::STRSui
;
2769 if (AArch64::GPR64allRegClass
.hasSubClassEq(RC
)) {
2770 Opc
= AArch64::STRXui
;
2771 if (TargetRegisterInfo::isVirtualRegister(SrcReg
))
2772 MF
.getRegInfo().constrainRegClass(SrcReg
, &AArch64::GPR64RegClass
);
2774 assert(SrcReg
!= AArch64::SP
);
2775 } else if (AArch64::FPR64RegClass
.hasSubClassEq(RC
))
2776 Opc
= AArch64::STRDui
;
2779 if (AArch64::FPR128RegClass
.hasSubClassEq(RC
))
2780 Opc
= AArch64::STRQui
;
2781 else if (AArch64::DDRegClass
.hasSubClassEq(RC
)) {
2782 assert(Subtarget
.hasNEON() && "Unexpected register store without NEON");
2783 Opc
= AArch64::ST1Twov1d
;
2785 } else if (AArch64::XSeqPairsClassRegClass
.hasSubClassEq(RC
)) {
2786 BuildMI(MBB
, MBBI
, DL
, get(AArch64::STPXi
))
2787 .addReg(TRI
->getSubReg(SrcReg
, AArch64::sube64
),
2788 getKillRegState(isKill
))
2789 .addReg(TRI
->getSubReg(SrcReg
, AArch64::subo64
),
2790 getKillRegState(isKill
))
2793 .addMemOperand(MMO
);
2798 if (AArch64::DDDRegClass
.hasSubClassEq(RC
)) {
2799 assert(Subtarget
.hasNEON() && "Unexpected register store without NEON");
2800 Opc
= AArch64::ST1Threev1d
;
2805 if (AArch64::DDDDRegClass
.hasSubClassEq(RC
)) {
2806 assert(Subtarget
.hasNEON() && "Unexpected register store without NEON");
2807 Opc
= AArch64::ST1Fourv1d
;
2809 } else if (AArch64::QQRegClass
.hasSubClassEq(RC
)) {
2810 assert(Subtarget
.hasNEON() && "Unexpected register store without NEON");
2811 Opc
= AArch64::ST1Twov2d
;
2816 if (AArch64::QQQRegClass
.hasSubClassEq(RC
)) {
2817 assert(Subtarget
.hasNEON() && "Unexpected register store without NEON");
2818 Opc
= AArch64::ST1Threev2d
;
2823 if (AArch64::QQQQRegClass
.hasSubClassEq(RC
)) {
2824 assert(Subtarget
.hasNEON() && "Unexpected register store without NEON");
2825 Opc
= AArch64::ST1Fourv2d
;
2830 assert(Opc
&& "Unknown register class");
2832 const MachineInstrBuilder MI
= BuildMI(MBB
, MBBI
, DL
, get(Opc
))
2833 .addReg(SrcReg
, getKillRegState(isKill
))
2838 MI
.addMemOperand(MMO
);
2841 void AArch64InstrInfo::loadRegFromStackSlot(
2842 MachineBasicBlock
&MBB
, MachineBasicBlock::iterator MBBI
, unsigned DestReg
,
2843 int FI
, const TargetRegisterClass
*RC
,
2844 const TargetRegisterInfo
*TRI
) const {
2846 if (MBBI
!= MBB
.end())
2847 DL
= MBBI
->getDebugLoc();
2848 MachineFunction
&MF
= *MBB
.getParent();
2849 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
2850 unsigned Align
= MFI
.getObjectAlignment(FI
);
2851 MachinePointerInfo PtrInfo
= MachinePointerInfo::getFixedStack(MF
, FI
);
2852 MachineMemOperand
*MMO
= MF
.getMachineMemOperand(
2853 PtrInfo
, MachineMemOperand::MOLoad
, MFI
.getObjectSize(FI
), Align
);
2857 switch (TRI
->getSpillSize(*RC
)) {
2859 if (AArch64::FPR8RegClass
.hasSubClassEq(RC
))
2860 Opc
= AArch64::LDRBui
;
2863 if (AArch64::FPR16RegClass
.hasSubClassEq(RC
))
2864 Opc
= AArch64::LDRHui
;
2867 if (AArch64::GPR32allRegClass
.hasSubClassEq(RC
)) {
2868 Opc
= AArch64::LDRWui
;
2869 if (TargetRegisterInfo::isVirtualRegister(DestReg
))
2870 MF
.getRegInfo().constrainRegClass(DestReg
, &AArch64::GPR32RegClass
);
2872 assert(DestReg
!= AArch64::WSP
);
2873 } else if (AArch64::FPR32RegClass
.hasSubClassEq(RC
))
2874 Opc
= AArch64::LDRSui
;
2877 if (AArch64::GPR64allRegClass
.hasSubClassEq(RC
)) {
2878 Opc
= AArch64::LDRXui
;
2879 if (TargetRegisterInfo::isVirtualRegister(DestReg
))
2880 MF
.getRegInfo().constrainRegClass(DestReg
, &AArch64::GPR64RegClass
);
2882 assert(DestReg
!= AArch64::SP
);
2883 } else if (AArch64::FPR64RegClass
.hasSubClassEq(RC
))
2884 Opc
= AArch64::LDRDui
;
2887 if (AArch64::FPR128RegClass
.hasSubClassEq(RC
))
2888 Opc
= AArch64::LDRQui
;
2889 else if (AArch64::DDRegClass
.hasSubClassEq(RC
)) {
2890 assert(Subtarget
.hasNEON() && "Unexpected register load without NEON");
2891 Opc
= AArch64::LD1Twov1d
;
2893 } else if (AArch64::XSeqPairsClassRegClass
.hasSubClassEq(RC
)) {
2894 BuildMI(MBB
, MBBI
, DL
, get(AArch64::LDPXi
))
2895 .addReg(TRI
->getSubReg(DestReg
, AArch64::sube64
),
2896 getDefRegState(true))
2897 .addReg(TRI
->getSubReg(DestReg
, AArch64::subo64
),
2898 getDefRegState(true))
2901 .addMemOperand(MMO
);
2906 if (AArch64::DDDRegClass
.hasSubClassEq(RC
)) {
2907 assert(Subtarget
.hasNEON() && "Unexpected register load without NEON");
2908 Opc
= AArch64::LD1Threev1d
;
2913 if (AArch64::DDDDRegClass
.hasSubClassEq(RC
)) {
2914 assert(Subtarget
.hasNEON() && "Unexpected register load without NEON");
2915 Opc
= AArch64::LD1Fourv1d
;
2917 } else if (AArch64::QQRegClass
.hasSubClassEq(RC
)) {
2918 assert(Subtarget
.hasNEON() && "Unexpected register load without NEON");
2919 Opc
= AArch64::LD1Twov2d
;
2924 if (AArch64::QQQRegClass
.hasSubClassEq(RC
)) {
2925 assert(Subtarget
.hasNEON() && "Unexpected register load without NEON");
2926 Opc
= AArch64::LD1Threev2d
;
2931 if (AArch64::QQQQRegClass
.hasSubClassEq(RC
)) {
2932 assert(Subtarget
.hasNEON() && "Unexpected register load without NEON");
2933 Opc
= AArch64::LD1Fourv2d
;
2938 assert(Opc
&& "Unknown register class");
2940 const MachineInstrBuilder MI
= BuildMI(MBB
, MBBI
, DL
, get(Opc
))
2941 .addReg(DestReg
, getDefRegState(true))
2945 MI
.addMemOperand(MMO
);
2948 void llvm::emitFrameOffset(MachineBasicBlock
&MBB
,
2949 MachineBasicBlock::iterator MBBI
, const DebugLoc
&DL
,
2950 unsigned DestReg
, unsigned SrcReg
, int Offset
,
2951 const TargetInstrInfo
*TII
,
2952 MachineInstr::MIFlag Flag
, bool SetNZCV
) {
2953 if (DestReg
== SrcReg
&& Offset
== 0)
2956 assert((DestReg
!= AArch64::SP
|| Offset
% 16 == 0) &&
2957 "SP increment/decrement not 16-byte aligned");
2959 bool isSub
= Offset
< 0;
2963 // FIXME: If the offset won't fit in 24-bits, compute the offset into a
2964 // scratch register. If DestReg is a virtual register, use it as the
2965 // scratch register; otherwise, create a new virtual register (to be
2966 // replaced by the scavenger at the end of PEI). That case can be optimized
2967 // slightly if DestReg is SP which is always 16-byte aligned, so the scratch
2968 // register can be loaded with offset%8 and the add/sub can use an extending
2969 // instruction with LSL#3.
2970 // Currently the function handles any offsets but generates a poor sequence
2972 // assert(Offset < (1 << 24) && "unimplemented reg plus immediate");
2976 Opc
= isSub
? AArch64::SUBSXri
: AArch64::ADDSXri
;
2978 Opc
= isSub
? AArch64::SUBXri
: AArch64::ADDXri
;
2979 const unsigned MaxEncoding
= 0xfff;
2980 const unsigned ShiftSize
= 12;
2981 const unsigned MaxEncodableValue
= MaxEncoding
<< ShiftSize
;
2982 while (((unsigned)Offset
) >= (1 << ShiftSize
)) {
2984 if (((unsigned)Offset
) > MaxEncodableValue
) {
2985 ThisVal
= MaxEncodableValue
;
2987 ThisVal
= Offset
& MaxEncodableValue
;
2989 assert((ThisVal
>> ShiftSize
) <= MaxEncoding
&&
2990 "Encoding cannot handle value that big");
2991 BuildMI(MBB
, MBBI
, DL
, TII
->get(Opc
), DestReg
)
2993 .addImm(ThisVal
>> ShiftSize
)
2994 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL
, ShiftSize
))
3002 BuildMI(MBB
, MBBI
, DL
, TII
->get(Opc
), DestReg
)
3005 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL
, 0))
3009 MachineInstr
*AArch64InstrInfo::foldMemoryOperandImpl(
3010 MachineFunction
&MF
, MachineInstr
&MI
, ArrayRef
<unsigned> Ops
,
3011 MachineBasicBlock::iterator InsertPt
, int FrameIndex
,
3012 LiveIntervals
*LIS
) const {
3013 // This is a bit of a hack. Consider this instruction:
3015 // %0 = COPY %sp; GPR64all:%0
3017 // We explicitly chose GPR64all for the virtual register so such a copy might
3018 // be eliminated by RegisterCoalescer. However, that may not be possible, and
3019 // %0 may even spill. We can't spill %sp, and since it is in the GPR64all
3020 // register class, TargetInstrInfo::foldMemoryOperand() is going to try.
3022 // To prevent that, we are going to constrain the %0 register class here.
3024 // <rdar://problem/11522048>
3026 if (MI
.isFullCopy()) {
3027 unsigned DstReg
= MI
.getOperand(0).getReg();
3028 unsigned SrcReg
= MI
.getOperand(1).getReg();
3029 if (SrcReg
== AArch64::SP
&&
3030 TargetRegisterInfo::isVirtualRegister(DstReg
)) {
3031 MF
.getRegInfo().constrainRegClass(DstReg
, &AArch64::GPR64RegClass
);
3034 if (DstReg
== AArch64::SP
&&
3035 TargetRegisterInfo::isVirtualRegister(SrcReg
)) {
3036 MF
.getRegInfo().constrainRegClass(SrcReg
, &AArch64::GPR64RegClass
);
3041 // Handle the case where a copy is being spilled or filled but the source
3042 // and destination register class don't match. For example:
3044 // %0 = COPY %xzr; GPR64common:%0
3046 // In this case we can still safely fold away the COPY and generate the
3047 // following spill code:
3049 // STRXui %xzr, %stack.0
3051 // This also eliminates spilled cross register class COPYs (e.g. between x and
3052 // d regs) of the same size. For example:
3054 // %0 = COPY %1; GPR64:%0, FPR64:%1
3056 // will be filled as
3058 // LDRDui %0, fi<#0>
3062 // LDRXui %Temp, fi<#0>
3065 if (MI
.isCopy() && Ops
.size() == 1 &&
3066 // Make sure we're only folding the explicit COPY defs/uses.
3067 (Ops
[0] == 0 || Ops
[0] == 1)) {
3068 bool IsSpill
= Ops
[0] == 0;
3069 bool IsFill
= !IsSpill
;
3070 const TargetRegisterInfo
&TRI
= *MF
.getSubtarget().getRegisterInfo();
3071 const MachineRegisterInfo
&MRI
= MF
.getRegInfo();
3072 MachineBasicBlock
&MBB
= *MI
.getParent();
3073 const MachineOperand
&DstMO
= MI
.getOperand(0);
3074 const MachineOperand
&SrcMO
= MI
.getOperand(1);
3075 unsigned DstReg
= DstMO
.getReg();
3076 unsigned SrcReg
= SrcMO
.getReg();
3077 // This is slightly expensive to compute for physical regs since
3078 // getMinimalPhysRegClass is slow.
3079 auto getRegClass
= [&](unsigned Reg
) {
3080 return TargetRegisterInfo::isVirtualRegister(Reg
)
3081 ? MRI
.getRegClass(Reg
)
3082 : TRI
.getMinimalPhysRegClass(Reg
);
3085 if (DstMO
.getSubReg() == 0 && SrcMO
.getSubReg() == 0) {
3086 assert(TRI
.getRegSizeInBits(*getRegClass(DstReg
)) ==
3087 TRI
.getRegSizeInBits(*getRegClass(SrcReg
)) &&
3088 "Mismatched register size in non subreg COPY");
3090 storeRegToStackSlot(MBB
, InsertPt
, SrcReg
, SrcMO
.isKill(), FrameIndex
,
3091 getRegClass(SrcReg
), &TRI
);
3093 loadRegFromStackSlot(MBB
, InsertPt
, DstReg
, FrameIndex
,
3094 getRegClass(DstReg
), &TRI
);
3095 return &*--InsertPt
;
3098 // Handle cases like spilling def of:
3100 // %0:sub_32<def,read-undef> = COPY %wzr; GPR64common:%0
3102 // where the physical register source can be widened and stored to the full
3103 // virtual reg destination stack slot, in this case producing:
3105 // STRXui %xzr, %stack.0
3107 if (IsSpill
&& DstMO
.isUndef() &&
3108 TargetRegisterInfo::isPhysicalRegister(SrcReg
)) {
3109 assert(SrcMO
.getSubReg() == 0 &&
3110 "Unexpected subreg on physical register");
3111 const TargetRegisterClass
*SpillRC
;
3112 unsigned SpillSubreg
;
3113 switch (DstMO
.getSubReg()) {
3117 case AArch64::sub_32
:
3119 if (AArch64::GPR32RegClass
.contains(SrcReg
)) {
3120 SpillRC
= &AArch64::GPR64RegClass
;
3121 SpillSubreg
= AArch64::sub_32
;
3122 } else if (AArch64::FPR32RegClass
.contains(SrcReg
)) {
3123 SpillRC
= &AArch64::FPR64RegClass
;
3124 SpillSubreg
= AArch64::ssub
;
3129 if (AArch64::FPR64RegClass
.contains(SrcReg
)) {
3130 SpillRC
= &AArch64::FPR128RegClass
;
3131 SpillSubreg
= AArch64::dsub
;
3138 if (unsigned WidenedSrcReg
=
3139 TRI
.getMatchingSuperReg(SrcReg
, SpillSubreg
, SpillRC
)) {
3140 storeRegToStackSlot(MBB
, InsertPt
, WidenedSrcReg
, SrcMO
.isKill(),
3141 FrameIndex
, SpillRC
, &TRI
);
3142 return &*--InsertPt
;
3146 // Handle cases like filling use of:
3148 // %0:sub_32<def,read-undef> = COPY %1; GPR64:%0, GPR32:%1
3150 // where we can load the full virtual reg source stack slot, into the subreg
3151 // destination, in this case producing:
3153 // LDRWui %0:sub_32<def,read-undef>, %stack.0
3155 if (IsFill
&& SrcMO
.getSubReg() == 0 && DstMO
.isUndef()) {
3156 const TargetRegisterClass
*FillRC
;
3157 switch (DstMO
.getSubReg()) {
3161 case AArch64::sub_32
:
3162 FillRC
= &AArch64::GPR32RegClass
;
3165 FillRC
= &AArch64::FPR32RegClass
;
3168 FillRC
= &AArch64::FPR64RegClass
;
3173 assert(TRI
.getRegSizeInBits(*getRegClass(SrcReg
)) ==
3174 TRI
.getRegSizeInBits(*FillRC
) &&
3175 "Mismatched regclass size on folded subreg COPY");
3176 loadRegFromStackSlot(MBB
, InsertPt
, DstReg
, FrameIndex
, FillRC
, &TRI
);
3177 MachineInstr
&LoadMI
= *--InsertPt
;
3178 MachineOperand
&LoadDst
= LoadMI
.getOperand(0);
3179 assert(LoadDst
.getSubReg() == 0 && "unexpected subreg on fill load");
3180 LoadDst
.setSubReg(DstMO
.getSubReg());
3181 LoadDst
.setIsUndef();
3191 int llvm::isAArch64FrameOffsetLegal(const MachineInstr
&MI
, int &Offset
,
3192 bool *OutUseUnscaledOp
,
3193 unsigned *OutUnscaledOp
,
3194 int *EmittableOffset
) {
3196 bool IsSigned
= false;
3197 // The ImmIdx should be changed case by case if it is not 2.
3198 unsigned ImmIdx
= 2;
3199 unsigned UnscaledOp
= 0;
3200 // Set output values in case of early exit.
3201 if (EmittableOffset
)
3202 *EmittableOffset
= 0;
3203 if (OutUseUnscaledOp
)
3204 *OutUseUnscaledOp
= false;
3207 switch (MI
.getOpcode()) {
3209 llvm_unreachable("unhandled opcode in rewriteAArch64FrameIndex");
3210 // Vector spills/fills can't take an immediate offset.
3211 case AArch64::LD1Twov2d
:
3212 case AArch64::LD1Threev2d
:
3213 case AArch64::LD1Fourv2d
:
3214 case AArch64::LD1Twov1d
:
3215 case AArch64::LD1Threev1d
:
3216 case AArch64::LD1Fourv1d
:
3217 case AArch64::ST1Twov2d
:
3218 case AArch64::ST1Threev2d
:
3219 case AArch64::ST1Fourv2d
:
3220 case AArch64::ST1Twov1d
:
3221 case AArch64::ST1Threev1d
:
3222 case AArch64::ST1Fourv1d
:
3223 return AArch64FrameOffsetCannotUpdate
;
3224 case AArch64::PRFMui
:
3226 UnscaledOp
= AArch64::PRFUMi
;
3228 case AArch64::LDRXui
:
3230 UnscaledOp
= AArch64::LDURXi
;
3232 case AArch64::LDRWui
:
3234 UnscaledOp
= AArch64::LDURWi
;
3236 case AArch64::LDRBui
:
3238 UnscaledOp
= AArch64::LDURBi
;
3240 case AArch64::LDRHui
:
3242 UnscaledOp
= AArch64::LDURHi
;
3244 case AArch64::LDRSui
:
3246 UnscaledOp
= AArch64::LDURSi
;
3248 case AArch64::LDRDui
:
3250 UnscaledOp
= AArch64::LDURDi
;
3252 case AArch64::LDRQui
:
3254 UnscaledOp
= AArch64::LDURQi
;
3256 case AArch64::LDRBBui
:
3258 UnscaledOp
= AArch64::LDURBBi
;
3260 case AArch64::LDRHHui
:
3262 UnscaledOp
= AArch64::LDURHHi
;
3264 case AArch64::LDRSBXui
:
3266 UnscaledOp
= AArch64::LDURSBXi
;
3268 case AArch64::LDRSBWui
:
3270 UnscaledOp
= AArch64::LDURSBWi
;
3272 case AArch64::LDRSHXui
:
3274 UnscaledOp
= AArch64::LDURSHXi
;
3276 case AArch64::LDRSHWui
:
3278 UnscaledOp
= AArch64::LDURSHWi
;
3280 case AArch64::LDRSWui
:
3282 UnscaledOp
= AArch64::LDURSWi
;
3285 case AArch64::STRXui
:
3287 UnscaledOp
= AArch64::STURXi
;
3289 case AArch64::STRWui
:
3291 UnscaledOp
= AArch64::STURWi
;
3293 case AArch64::STRBui
:
3295 UnscaledOp
= AArch64::STURBi
;
3297 case AArch64::STRHui
:
3299 UnscaledOp
= AArch64::STURHi
;
3301 case AArch64::STRSui
:
3303 UnscaledOp
= AArch64::STURSi
;
3305 case AArch64::STRDui
:
3307 UnscaledOp
= AArch64::STURDi
;
3309 case AArch64::STRQui
:
3311 UnscaledOp
= AArch64::STURQi
;
3313 case AArch64::STRBBui
:
3315 UnscaledOp
= AArch64::STURBBi
;
3317 case AArch64::STRHHui
:
3319 UnscaledOp
= AArch64::STURHHi
;
3322 case AArch64::LDPXi
:
3323 case AArch64::LDPDi
:
3324 case AArch64::STPXi
:
3325 case AArch64::STPDi
:
3326 case AArch64::LDNPXi
:
3327 case AArch64::LDNPDi
:
3328 case AArch64::STNPXi
:
3329 case AArch64::STNPDi
:
3334 case AArch64::LDPQi
:
3335 case AArch64::STPQi
:
3336 case AArch64::LDNPQi
:
3337 case AArch64::STNPQi
:
3342 case AArch64::LDPWi
:
3343 case AArch64::LDPSi
:
3344 case AArch64::STPWi
:
3345 case AArch64::STPSi
:
3346 case AArch64::LDNPWi
:
3347 case AArch64::LDNPSi
:
3348 case AArch64::STNPWi
:
3349 case AArch64::STNPSi
:
3355 case AArch64::LDURXi
:
3356 case AArch64::LDURWi
:
3357 case AArch64::LDURBi
:
3358 case AArch64::LDURHi
:
3359 case AArch64::LDURSi
:
3360 case AArch64::LDURDi
:
3361 case AArch64::LDURQi
:
3362 case AArch64::LDURHHi
:
3363 case AArch64::LDURBBi
:
3364 case AArch64::LDURSBXi
:
3365 case AArch64::LDURSBWi
:
3366 case AArch64::LDURSHXi
:
3367 case AArch64::LDURSHWi
:
3368 case AArch64::LDURSWi
:
3369 case AArch64::STURXi
:
3370 case AArch64::STURWi
:
3371 case AArch64::STURBi
:
3372 case AArch64::STURHi
:
3373 case AArch64::STURSi
:
3374 case AArch64::STURDi
:
3375 case AArch64::STURQi
:
3376 case AArch64::STURBBi
:
3377 case AArch64::STURHHi
:
3382 Offset
+= MI
.getOperand(ImmIdx
).getImm() * Scale
;
3384 bool useUnscaledOp
= false;
3385 // If the offset doesn't match the scale, we rewrite the instruction to
3386 // use the unscaled instruction instead. Likewise, if we have a negative
3387 // offset (and have an unscaled op to use).
3388 if ((Offset
& (Scale
- 1)) != 0 || (Offset
< 0 && UnscaledOp
!= 0))
3389 useUnscaledOp
= true;
3391 // Use an unscaled addressing mode if the instruction has a negative offset
3392 // (or if the instruction is already using an unscaled addressing mode).
3395 // ldp/stp instructions.
3398 } else if (UnscaledOp
== 0 || useUnscaledOp
) {
3408 // Attempt to fold address computation.
3409 int MaxOff
= (1 << (MaskBits
- IsSigned
)) - 1;
3410 int MinOff
= (IsSigned
? (-MaxOff
- 1) : 0);
3411 if (Offset
>= MinOff
&& Offset
<= MaxOff
) {
3412 if (EmittableOffset
)
3413 *EmittableOffset
= Offset
;
3416 int NewOff
= Offset
< 0 ? MinOff
: MaxOff
;
3417 if (EmittableOffset
)
3418 *EmittableOffset
= NewOff
;
3419 Offset
= (Offset
- NewOff
) * Scale
;
3421 if (OutUseUnscaledOp
)
3422 *OutUseUnscaledOp
= useUnscaledOp
;
3424 *OutUnscaledOp
= UnscaledOp
;
3425 return AArch64FrameOffsetCanUpdate
|
3426 (Offset
== 0 ? AArch64FrameOffsetIsLegal
: 0);
3429 bool llvm::rewriteAArch64FrameIndex(MachineInstr
&MI
, unsigned FrameRegIdx
,
3430 unsigned FrameReg
, int &Offset
,
3431 const AArch64InstrInfo
*TII
) {
3432 unsigned Opcode
= MI
.getOpcode();
3433 unsigned ImmIdx
= FrameRegIdx
+ 1;
3435 if (Opcode
== AArch64::ADDSXri
|| Opcode
== AArch64::ADDXri
) {
3436 Offset
+= MI
.getOperand(ImmIdx
).getImm();
3437 emitFrameOffset(*MI
.getParent(), MI
, MI
.getDebugLoc(),
3438 MI
.getOperand(0).getReg(), FrameReg
, Offset
, TII
,
3439 MachineInstr::NoFlags
, (Opcode
== AArch64::ADDSXri
));
3440 MI
.eraseFromParent();
3446 unsigned UnscaledOp
;
3448 int Status
= isAArch64FrameOffsetLegal(MI
, Offset
, &UseUnscaledOp
,
3449 &UnscaledOp
, &NewOffset
);
3450 if (Status
& AArch64FrameOffsetCanUpdate
) {
3451 if (Status
& AArch64FrameOffsetIsLegal
)
3452 // Replace the FrameIndex with FrameReg.
3453 MI
.getOperand(FrameRegIdx
).ChangeToRegister(FrameReg
, false);
3455 MI
.setDesc(TII
->get(UnscaledOp
));
3457 MI
.getOperand(ImmIdx
).ChangeToImmediate(NewOffset
);
3464 void AArch64InstrInfo::getNoop(MCInst
&NopInst
) const {
3465 NopInst
.setOpcode(AArch64::HINT
);
3466 NopInst
.addOperand(MCOperand::createImm(0));
3469 // AArch64 supports MachineCombiner.
3470 bool AArch64InstrInfo::useMachineCombiner() const { return true; }
3472 // True when Opc sets flag
3473 static bool isCombineInstrSettingFlag(unsigned Opc
) {
3475 case AArch64::ADDSWrr
:
3476 case AArch64::ADDSWri
:
3477 case AArch64::ADDSXrr
:
3478 case AArch64::ADDSXri
:
3479 case AArch64::SUBSWrr
:
3480 case AArch64::SUBSXrr
:
3481 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
3482 case AArch64::SUBSWri
:
3483 case AArch64::SUBSXri
:
3491 // 32b Opcodes that can be combined with a MUL
3492 static bool isCombineInstrCandidate32(unsigned Opc
) {
3494 case AArch64::ADDWrr
:
3495 case AArch64::ADDWri
:
3496 case AArch64::SUBWrr
:
3497 case AArch64::ADDSWrr
:
3498 case AArch64::ADDSWri
:
3499 case AArch64::SUBSWrr
:
3500 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
3501 case AArch64::SUBWri
:
3502 case AArch64::SUBSWri
:
3510 // 64b Opcodes that can be combined with a MUL
3511 static bool isCombineInstrCandidate64(unsigned Opc
) {
3513 case AArch64::ADDXrr
:
3514 case AArch64::ADDXri
:
3515 case AArch64::SUBXrr
:
3516 case AArch64::ADDSXrr
:
3517 case AArch64::ADDSXri
:
3518 case AArch64::SUBSXrr
:
3519 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
3520 case AArch64::SUBXri
:
3521 case AArch64::SUBSXri
:
3529 // FP Opcodes that can be combined with a FMUL
3530 static bool isCombineInstrCandidateFP(const MachineInstr
&Inst
) {
3531 switch (Inst
.getOpcode()) {
3534 case AArch64::FADDSrr
:
3535 case AArch64::FADDDrr
:
3536 case AArch64::FADDv2f32
:
3537 case AArch64::FADDv2f64
:
3538 case AArch64::FADDv4f32
:
3539 case AArch64::FSUBSrr
:
3540 case AArch64::FSUBDrr
:
3541 case AArch64::FSUBv2f32
:
3542 case AArch64::FSUBv2f64
:
3543 case AArch64::FSUBv4f32
:
3544 TargetOptions Options
= Inst
.getParent()->getParent()->getTarget().Options
;
3545 return (Options
.UnsafeFPMath
||
3546 Options
.AllowFPOpFusion
== FPOpFusion::Fast
);
3551 // Opcodes that can be combined with a MUL
3552 static bool isCombineInstrCandidate(unsigned Opc
) {
3553 return (isCombineInstrCandidate32(Opc
) || isCombineInstrCandidate64(Opc
));
3557 // Utility routine that checks if \param MO is defined by an
3558 // \param CombineOpc instruction in the basic block \param MBB
3559 static bool canCombine(MachineBasicBlock
&MBB
, MachineOperand
&MO
,
3560 unsigned CombineOpc
, unsigned ZeroReg
= 0,
3561 bool CheckZeroReg
= false) {
3562 MachineRegisterInfo
&MRI
= MBB
.getParent()->getRegInfo();
3563 MachineInstr
*MI
= nullptr;
3565 if (MO
.isReg() && TargetRegisterInfo::isVirtualRegister(MO
.getReg()))
3566 MI
= MRI
.getUniqueVRegDef(MO
.getReg());
3567 // And it needs to be in the trace (otherwise, it won't have a depth).
3568 if (!MI
|| MI
->getParent() != &MBB
|| (unsigned)MI
->getOpcode() != CombineOpc
)
3570 // Must only used by the user we combine with.
3571 if (!MRI
.hasOneNonDBGUse(MI
->getOperand(0).getReg()))
3575 assert(MI
->getNumOperands() >= 4 && MI
->getOperand(0).isReg() &&
3576 MI
->getOperand(1).isReg() && MI
->getOperand(2).isReg() &&
3577 MI
->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs");
3578 // The third input reg must be zero.
3579 if (MI
->getOperand(3).getReg() != ZeroReg
)
3587 // Is \param MO defined by an integer multiply and can be combined?
3588 static bool canCombineWithMUL(MachineBasicBlock
&MBB
, MachineOperand
&MO
,
3589 unsigned MulOpc
, unsigned ZeroReg
) {
3590 return canCombine(MBB
, MO
, MulOpc
, ZeroReg
, true);
3594 // Is \param MO defined by a floating-point multiply and can be combined?
3595 static bool canCombineWithFMUL(MachineBasicBlock
&MBB
, MachineOperand
&MO
,
3597 return canCombine(MBB
, MO
, MulOpc
);
3600 // TODO: There are many more machine instruction opcodes to match:
3601 // 1. Other data types (integer, vectors)
3602 // 2. Other math / logic operations (xor, or)
3603 // 3. Other forms of the same operation (intrinsics and other variants)
3604 bool AArch64InstrInfo::isAssociativeAndCommutative(
3605 const MachineInstr
&Inst
) const {
3606 switch (Inst
.getOpcode()) {
3607 case AArch64::FADDDrr
:
3608 case AArch64::FADDSrr
:
3609 case AArch64::FADDv2f32
:
3610 case AArch64::FADDv2f64
:
3611 case AArch64::FADDv4f32
:
3612 case AArch64::FMULDrr
:
3613 case AArch64::FMULSrr
:
3614 case AArch64::FMULX32
:
3615 case AArch64::FMULX64
:
3616 case AArch64::FMULXv2f32
:
3617 case AArch64::FMULXv2f64
:
3618 case AArch64::FMULXv4f32
:
3619 case AArch64::FMULv2f32
:
3620 case AArch64::FMULv2f64
:
3621 case AArch64::FMULv4f32
:
3622 return Inst
.getParent()->getParent()->getTarget().Options
.UnsafeFPMath
;
3628 /// Find instructions that can be turned into madd.
3629 static bool getMaddPatterns(MachineInstr
&Root
,
3630 SmallVectorImpl
<MachineCombinerPattern
> &Patterns
) {
3631 unsigned Opc
= Root
.getOpcode();
3632 MachineBasicBlock
&MBB
= *Root
.getParent();
3635 if (!isCombineInstrCandidate(Opc
))
3637 if (isCombineInstrSettingFlag(Opc
)) {
3638 int Cmp_NZCV
= Root
.findRegisterDefOperandIdx(AArch64::NZCV
, true);
3639 // When NZCV is live bail out.
3642 unsigned NewOpc
= convertToNonFlagSettingOpc(Root
);
3643 // When opcode can't change bail out.
3644 // CHECKME: do we miss any cases for opcode conversion?
3653 case AArch64::ADDWrr
:
3654 assert(Root
.getOperand(1).isReg() && Root
.getOperand(2).isReg() &&
3655 "ADDWrr does not have register operands");
3656 if (canCombineWithMUL(MBB
, Root
.getOperand(1), AArch64::MADDWrrr
,
3658 Patterns
.push_back(MachineCombinerPattern::MULADDW_OP1
);
3661 if (canCombineWithMUL(MBB
, Root
.getOperand(2), AArch64::MADDWrrr
,
3663 Patterns
.push_back(MachineCombinerPattern::MULADDW_OP2
);
3667 case AArch64::ADDXrr
:
3668 if (canCombineWithMUL(MBB
, Root
.getOperand(1), AArch64::MADDXrrr
,
3670 Patterns
.push_back(MachineCombinerPattern::MULADDX_OP1
);
3673 if (canCombineWithMUL(MBB
, Root
.getOperand(2), AArch64::MADDXrrr
,
3675 Patterns
.push_back(MachineCombinerPattern::MULADDX_OP2
);
3679 case AArch64::SUBWrr
:
3680 if (canCombineWithMUL(MBB
, Root
.getOperand(1), AArch64::MADDWrrr
,
3682 Patterns
.push_back(MachineCombinerPattern::MULSUBW_OP1
);
3685 if (canCombineWithMUL(MBB
, Root
.getOperand(2), AArch64::MADDWrrr
,
3687 Patterns
.push_back(MachineCombinerPattern::MULSUBW_OP2
);
3691 case AArch64::SUBXrr
:
3692 if (canCombineWithMUL(MBB
, Root
.getOperand(1), AArch64::MADDXrrr
,
3694 Patterns
.push_back(MachineCombinerPattern::MULSUBX_OP1
);
3697 if (canCombineWithMUL(MBB
, Root
.getOperand(2), AArch64::MADDXrrr
,
3699 Patterns
.push_back(MachineCombinerPattern::MULSUBX_OP2
);
3703 case AArch64::ADDWri
:
3704 if (canCombineWithMUL(MBB
, Root
.getOperand(1), AArch64::MADDWrrr
,
3706 Patterns
.push_back(MachineCombinerPattern::MULADDWI_OP1
);
3710 case AArch64::ADDXri
:
3711 if (canCombineWithMUL(MBB
, Root
.getOperand(1), AArch64::MADDXrrr
,
3713 Patterns
.push_back(MachineCombinerPattern::MULADDXI_OP1
);
3717 case AArch64::SUBWri
:
3718 if (canCombineWithMUL(MBB
, Root
.getOperand(1), AArch64::MADDWrrr
,
3720 Patterns
.push_back(MachineCombinerPattern::MULSUBWI_OP1
);
3724 case AArch64::SUBXri
:
3725 if (canCombineWithMUL(MBB
, Root
.getOperand(1), AArch64::MADDXrrr
,
3727 Patterns
.push_back(MachineCombinerPattern::MULSUBXI_OP1
);
3734 /// Floating-Point Support
3736 /// Find instructions that can be turned into madd.
3737 static bool getFMAPatterns(MachineInstr
&Root
,
3738 SmallVectorImpl
<MachineCombinerPattern
> &Patterns
) {
3740 if (!isCombineInstrCandidateFP(Root
))
3743 MachineBasicBlock
&MBB
= *Root
.getParent();
3746 switch (Root
.getOpcode()) {
3748 assert(false && "Unsupported FP instruction in combiner\n");
3750 case AArch64::FADDSrr
:
3751 assert(Root
.getOperand(1).isReg() && Root
.getOperand(2).isReg() &&
3752 "FADDWrr does not have register operands");
3753 if (canCombineWithFMUL(MBB
, Root
.getOperand(1), AArch64::FMULSrr
)) {
3754 Patterns
.push_back(MachineCombinerPattern::FMULADDS_OP1
);
3756 } else if (canCombineWithFMUL(MBB
, Root
.getOperand(1),
3757 AArch64::FMULv1i32_indexed
)) {
3758 Patterns
.push_back(MachineCombinerPattern::FMLAv1i32_indexed_OP1
);
3761 if (canCombineWithFMUL(MBB
, Root
.getOperand(2), AArch64::FMULSrr
)) {
3762 Patterns
.push_back(MachineCombinerPattern::FMULADDS_OP2
);
3764 } else if (canCombineWithFMUL(MBB
, Root
.getOperand(2),
3765 AArch64::FMULv1i32_indexed
)) {
3766 Patterns
.push_back(MachineCombinerPattern::FMLAv1i32_indexed_OP2
);
3770 case AArch64::FADDDrr
:
3771 if (canCombineWithFMUL(MBB
, Root
.getOperand(1), AArch64::FMULDrr
)) {
3772 Patterns
.push_back(MachineCombinerPattern::FMULADDD_OP1
);
3774 } else if (canCombineWithFMUL(MBB
, Root
.getOperand(1),
3775 AArch64::FMULv1i64_indexed
)) {
3776 Patterns
.push_back(MachineCombinerPattern::FMLAv1i64_indexed_OP1
);
3779 if (canCombineWithFMUL(MBB
, Root
.getOperand(2), AArch64::FMULDrr
)) {
3780 Patterns
.push_back(MachineCombinerPattern::FMULADDD_OP2
);
3782 } else if (canCombineWithFMUL(MBB
, Root
.getOperand(2),
3783 AArch64::FMULv1i64_indexed
)) {
3784 Patterns
.push_back(MachineCombinerPattern::FMLAv1i64_indexed_OP2
);
3788 case AArch64::FADDv2f32
:
3789 if (canCombineWithFMUL(MBB
, Root
.getOperand(1),
3790 AArch64::FMULv2i32_indexed
)) {
3791 Patterns
.push_back(MachineCombinerPattern::FMLAv2i32_indexed_OP1
);
3793 } else if (canCombineWithFMUL(MBB
, Root
.getOperand(1),
3794 AArch64::FMULv2f32
)) {
3795 Patterns
.push_back(MachineCombinerPattern::FMLAv2f32_OP1
);
3798 if (canCombineWithFMUL(MBB
, Root
.getOperand(2),
3799 AArch64::FMULv2i32_indexed
)) {
3800 Patterns
.push_back(MachineCombinerPattern::FMLAv2i32_indexed_OP2
);
3802 } else if (canCombineWithFMUL(MBB
, Root
.getOperand(2),
3803 AArch64::FMULv2f32
)) {
3804 Patterns
.push_back(MachineCombinerPattern::FMLAv2f32_OP2
);
3808 case AArch64::FADDv2f64
:
3809 if (canCombineWithFMUL(MBB
, Root
.getOperand(1),
3810 AArch64::FMULv2i64_indexed
)) {
3811 Patterns
.push_back(MachineCombinerPattern::FMLAv2i64_indexed_OP1
);
3813 } else if (canCombineWithFMUL(MBB
, Root
.getOperand(1),
3814 AArch64::FMULv2f64
)) {
3815 Patterns
.push_back(MachineCombinerPattern::FMLAv2f64_OP1
);
3818 if (canCombineWithFMUL(MBB
, Root
.getOperand(2),
3819 AArch64::FMULv2i64_indexed
)) {
3820 Patterns
.push_back(MachineCombinerPattern::FMLAv2i64_indexed_OP2
);
3822 } else if (canCombineWithFMUL(MBB
, Root
.getOperand(2),
3823 AArch64::FMULv2f64
)) {
3824 Patterns
.push_back(MachineCombinerPattern::FMLAv2f64_OP2
);
3828 case AArch64::FADDv4f32
:
3829 if (canCombineWithFMUL(MBB
, Root
.getOperand(1),
3830 AArch64::FMULv4i32_indexed
)) {
3831 Patterns
.push_back(MachineCombinerPattern::FMLAv4i32_indexed_OP1
);
3833 } else if (canCombineWithFMUL(MBB
, Root
.getOperand(1),
3834 AArch64::FMULv4f32
)) {
3835 Patterns
.push_back(MachineCombinerPattern::FMLAv4f32_OP1
);
3838 if (canCombineWithFMUL(MBB
, Root
.getOperand(2),
3839 AArch64::FMULv4i32_indexed
)) {
3840 Patterns
.push_back(MachineCombinerPattern::FMLAv4i32_indexed_OP2
);
3842 } else if (canCombineWithFMUL(MBB
, Root
.getOperand(2),
3843 AArch64::FMULv4f32
)) {
3844 Patterns
.push_back(MachineCombinerPattern::FMLAv4f32_OP2
);
3849 case AArch64::FSUBSrr
:
3850 if (canCombineWithFMUL(MBB
, Root
.getOperand(1), AArch64::FMULSrr
)) {
3851 Patterns
.push_back(MachineCombinerPattern::FMULSUBS_OP1
);
3854 if (canCombineWithFMUL(MBB
, Root
.getOperand(2), AArch64::FMULSrr
)) {
3855 Patterns
.push_back(MachineCombinerPattern::FMULSUBS_OP2
);
3857 } else if (canCombineWithFMUL(MBB
, Root
.getOperand(2),
3858 AArch64::FMULv1i32_indexed
)) {
3859 Patterns
.push_back(MachineCombinerPattern::FMLSv1i32_indexed_OP2
);
3862 if (canCombineWithFMUL(MBB
, Root
.getOperand(1), AArch64::FNMULSrr
)) {
3863 Patterns
.push_back(MachineCombinerPattern::FNMULSUBS_OP1
);
3867 case AArch64::FSUBDrr
:
3868 if (canCombineWithFMUL(MBB
, Root
.getOperand(1), AArch64::FMULDrr
)) {
3869 Patterns
.push_back(MachineCombinerPattern::FMULSUBD_OP1
);
3872 if (canCombineWithFMUL(MBB
, Root
.getOperand(2), AArch64::FMULDrr
)) {
3873 Patterns
.push_back(MachineCombinerPattern::FMULSUBD_OP2
);
3875 } else if (canCombineWithFMUL(MBB
, Root
.getOperand(2),
3876 AArch64::FMULv1i64_indexed
)) {
3877 Patterns
.push_back(MachineCombinerPattern::FMLSv1i64_indexed_OP2
);
3880 if (canCombineWithFMUL(MBB
, Root
.getOperand(1), AArch64::FNMULDrr
)) {
3881 Patterns
.push_back(MachineCombinerPattern::FNMULSUBD_OP1
);
3885 case AArch64::FSUBv2f32
:
3886 if (canCombineWithFMUL(MBB
, Root
.getOperand(2),
3887 AArch64::FMULv2i32_indexed
)) {
3888 Patterns
.push_back(MachineCombinerPattern::FMLSv2i32_indexed_OP2
);
3890 } else if (canCombineWithFMUL(MBB
, Root
.getOperand(2),
3891 AArch64::FMULv2f32
)) {
3892 Patterns
.push_back(MachineCombinerPattern::FMLSv2f32_OP2
);
3895 if (canCombineWithFMUL(MBB
, Root
.getOperand(1),
3896 AArch64::FMULv2i32_indexed
)) {
3897 Patterns
.push_back(MachineCombinerPattern::FMLSv2i32_indexed_OP1
);
3899 } else if (canCombineWithFMUL(MBB
, Root
.getOperand(1),
3900 AArch64::FMULv2f32
)) {
3901 Patterns
.push_back(MachineCombinerPattern::FMLSv2f32_OP1
);
3905 case AArch64::FSUBv2f64
:
3906 if (canCombineWithFMUL(MBB
, Root
.getOperand(2),
3907 AArch64::FMULv2i64_indexed
)) {
3908 Patterns
.push_back(MachineCombinerPattern::FMLSv2i64_indexed_OP2
);
3910 } else if (canCombineWithFMUL(MBB
, Root
.getOperand(2),
3911 AArch64::FMULv2f64
)) {
3912 Patterns
.push_back(MachineCombinerPattern::FMLSv2f64_OP2
);
3915 if (canCombineWithFMUL(MBB
, Root
.getOperand(1),
3916 AArch64::FMULv2i64_indexed
)) {
3917 Patterns
.push_back(MachineCombinerPattern::FMLSv2i64_indexed_OP1
);
3919 } else if (canCombineWithFMUL(MBB
, Root
.getOperand(1),
3920 AArch64::FMULv2f64
)) {
3921 Patterns
.push_back(MachineCombinerPattern::FMLSv2f64_OP1
);
3925 case AArch64::FSUBv4f32
:
3926 if (canCombineWithFMUL(MBB
, Root
.getOperand(2),
3927 AArch64::FMULv4i32_indexed
)) {
3928 Patterns
.push_back(MachineCombinerPattern::FMLSv4i32_indexed_OP2
);
3930 } else if (canCombineWithFMUL(MBB
, Root
.getOperand(2),
3931 AArch64::FMULv4f32
)) {
3932 Patterns
.push_back(MachineCombinerPattern::FMLSv4f32_OP2
);
3935 if (canCombineWithFMUL(MBB
, Root
.getOperand(1),
3936 AArch64::FMULv4i32_indexed
)) {
3937 Patterns
.push_back(MachineCombinerPattern::FMLSv4i32_indexed_OP1
);
3939 } else if (canCombineWithFMUL(MBB
, Root
.getOperand(1),
3940 AArch64::FMULv4f32
)) {
3941 Patterns
.push_back(MachineCombinerPattern::FMLSv4f32_OP1
);
3949 /// Return true when a code sequence can improve throughput. It
3950 /// should be called only for instructions in loops.
3951 /// \param Pattern - combiner pattern
3952 bool AArch64InstrInfo::isThroughputPattern(
3953 MachineCombinerPattern Pattern
) const {
3957 case MachineCombinerPattern::FMULADDS_OP1
:
3958 case MachineCombinerPattern::FMULADDS_OP2
:
3959 case MachineCombinerPattern::FMULSUBS_OP1
:
3960 case MachineCombinerPattern::FMULSUBS_OP2
:
3961 case MachineCombinerPattern::FMULADDD_OP1
:
3962 case MachineCombinerPattern::FMULADDD_OP2
:
3963 case MachineCombinerPattern::FMULSUBD_OP1
:
3964 case MachineCombinerPattern::FMULSUBD_OP2
:
3965 case MachineCombinerPattern::FNMULSUBS_OP1
:
3966 case MachineCombinerPattern::FNMULSUBD_OP1
:
3967 case MachineCombinerPattern::FMLAv1i32_indexed_OP1
:
3968 case MachineCombinerPattern::FMLAv1i32_indexed_OP2
:
3969 case MachineCombinerPattern::FMLAv1i64_indexed_OP1
:
3970 case MachineCombinerPattern::FMLAv1i64_indexed_OP2
:
3971 case MachineCombinerPattern::FMLAv2f32_OP2
:
3972 case MachineCombinerPattern::FMLAv2f32_OP1
:
3973 case MachineCombinerPattern::FMLAv2f64_OP1
:
3974 case MachineCombinerPattern::FMLAv2f64_OP2
:
3975 case MachineCombinerPattern::FMLAv2i32_indexed_OP1
:
3976 case MachineCombinerPattern::FMLAv2i32_indexed_OP2
:
3977 case MachineCombinerPattern::FMLAv2i64_indexed_OP1
:
3978 case MachineCombinerPattern::FMLAv2i64_indexed_OP2
:
3979 case MachineCombinerPattern::FMLAv4f32_OP1
:
3980 case MachineCombinerPattern::FMLAv4f32_OP2
:
3981 case MachineCombinerPattern::FMLAv4i32_indexed_OP1
:
3982 case MachineCombinerPattern::FMLAv4i32_indexed_OP2
:
3983 case MachineCombinerPattern::FMLSv1i32_indexed_OP2
:
3984 case MachineCombinerPattern::FMLSv1i64_indexed_OP2
:
3985 case MachineCombinerPattern::FMLSv2i32_indexed_OP2
:
3986 case MachineCombinerPattern::FMLSv2i64_indexed_OP2
:
3987 case MachineCombinerPattern::FMLSv2f32_OP2
:
3988 case MachineCombinerPattern::FMLSv2f64_OP2
:
3989 case MachineCombinerPattern::FMLSv4i32_indexed_OP2
:
3990 case MachineCombinerPattern::FMLSv4f32_OP2
:
3992 } // end switch (Pattern)
3995 /// Return true when there is potentially a faster code sequence for an
3996 /// instruction chain ending in \p Root. All potential patterns are listed in
3997 /// the \p Pattern vector. Pattern should be sorted in priority order since the
3998 /// pattern evaluator stops checking as soon as it finds a faster sequence.
4000 bool AArch64InstrInfo::getMachineCombinerPatterns(
4002 SmallVectorImpl
<MachineCombinerPattern
> &Patterns
) const {
4004 if (getMaddPatterns(Root
, Patterns
))
4006 // Floating point patterns
4007 if (getFMAPatterns(Root
, Patterns
))
4010 return TargetInstrInfo::getMachineCombinerPatterns(Root
, Patterns
);
4013 enum class FMAInstKind
{ Default
, Indexed
, Accumulator
};
4014 /// genFusedMultiply - Generate fused multiply instructions.
4015 /// This function supports both integer and floating point instructions.
4016 /// A typical example:
4019 /// ==> F|MADD R,A,B,C
4020 /// \param MF Containing MachineFunction
4021 /// \param MRI Register information
4022 /// \param TII Target information
4023 /// \param Root is the F|ADD instruction
4024 /// \param [out] InsInstrs is a vector of machine instructions and will
4025 /// contain the generated madd instruction
4026 /// \param IdxMulOpd is index of operand in Root that is the result of
4027 /// the F|MUL. In the example above IdxMulOpd is 1.
4028 /// \param MaddOpc the opcode fo the f|madd instruction
4029 /// \param RC Register class of operands
4030 /// \param kind of fma instruction (addressing mode) to be generated
4031 /// \param ReplacedAddend is the result register from the instruction
4032 /// replacing the non-combined operand, if any.
4033 static MachineInstr
*
4034 genFusedMultiply(MachineFunction
&MF
, MachineRegisterInfo
&MRI
,
4035 const TargetInstrInfo
*TII
, MachineInstr
&Root
,
4036 SmallVectorImpl
<MachineInstr
*> &InsInstrs
, unsigned IdxMulOpd
,
4037 unsigned MaddOpc
, const TargetRegisterClass
*RC
,
4038 FMAInstKind kind
= FMAInstKind::Default
,
4039 const unsigned *ReplacedAddend
= nullptr) {
4040 assert(IdxMulOpd
== 1 || IdxMulOpd
== 2);
4042 unsigned IdxOtherOpd
= IdxMulOpd
== 1 ? 2 : 1;
4043 MachineInstr
*MUL
= MRI
.getUniqueVRegDef(Root
.getOperand(IdxMulOpd
).getReg());
4044 unsigned ResultReg
= Root
.getOperand(0).getReg();
4045 unsigned SrcReg0
= MUL
->getOperand(1).getReg();
4046 bool Src0IsKill
= MUL
->getOperand(1).isKill();
4047 unsigned SrcReg1
= MUL
->getOperand(2).getReg();
4048 bool Src1IsKill
= MUL
->getOperand(2).isKill();
4052 if (ReplacedAddend
) {
4053 // If we just generated a new addend, we must be it's only use.
4054 SrcReg2
= *ReplacedAddend
;
4057 SrcReg2
= Root
.getOperand(IdxOtherOpd
).getReg();
4058 Src2IsKill
= Root
.getOperand(IdxOtherOpd
).isKill();
4061 if (TargetRegisterInfo::isVirtualRegister(ResultReg
))
4062 MRI
.constrainRegClass(ResultReg
, RC
);
4063 if (TargetRegisterInfo::isVirtualRegister(SrcReg0
))
4064 MRI
.constrainRegClass(SrcReg0
, RC
);
4065 if (TargetRegisterInfo::isVirtualRegister(SrcReg1
))
4066 MRI
.constrainRegClass(SrcReg1
, RC
);
4067 if (TargetRegisterInfo::isVirtualRegister(SrcReg2
))
4068 MRI
.constrainRegClass(SrcReg2
, RC
);
4070 MachineInstrBuilder MIB
;
4071 if (kind
== FMAInstKind::Default
)
4072 MIB
= BuildMI(MF
, Root
.getDebugLoc(), TII
->get(MaddOpc
), ResultReg
)
4073 .addReg(SrcReg0
, getKillRegState(Src0IsKill
))
4074 .addReg(SrcReg1
, getKillRegState(Src1IsKill
))
4075 .addReg(SrcReg2
, getKillRegState(Src2IsKill
));
4076 else if (kind
== FMAInstKind::Indexed
)
4077 MIB
= BuildMI(MF
, Root
.getDebugLoc(), TII
->get(MaddOpc
), ResultReg
)
4078 .addReg(SrcReg2
, getKillRegState(Src2IsKill
))
4079 .addReg(SrcReg0
, getKillRegState(Src0IsKill
))
4080 .addReg(SrcReg1
, getKillRegState(Src1IsKill
))
4081 .addImm(MUL
->getOperand(3).getImm());
4082 else if (kind
== FMAInstKind::Accumulator
)
4083 MIB
= BuildMI(MF
, Root
.getDebugLoc(), TII
->get(MaddOpc
), ResultReg
)
4084 .addReg(SrcReg2
, getKillRegState(Src2IsKill
))
4085 .addReg(SrcReg0
, getKillRegState(Src0IsKill
))
4086 .addReg(SrcReg1
, getKillRegState(Src1IsKill
));
4088 assert(false && "Invalid FMA instruction kind \n");
4089 // Insert the MADD (MADD, FMA, FMS, FMLA, FMSL)
4090 InsInstrs
.push_back(MIB
);
4094 /// genMaddR - Generate madd instruction and combine mul and add using
4095 /// an extra virtual register
4096 /// Example - an ADD intermediate needs to be stored in a register:
4099 /// ==> ORR V, ZR, Imm
4100 /// ==> MADD R,A,B,V
4101 /// \param MF Containing MachineFunction
4102 /// \param MRI Register information
4103 /// \param TII Target information
4104 /// \param Root is the ADD instruction
4105 /// \param [out] InsInstrs is a vector of machine instructions and will
4106 /// contain the generated madd instruction
4107 /// \param IdxMulOpd is index of operand in Root that is the result of
4108 /// the MUL. In the example above IdxMulOpd is 1.
4109 /// \param MaddOpc the opcode fo the madd instruction
4110 /// \param VR is a virtual register that holds the value of an ADD operand
4111 /// (V in the example above).
4112 /// \param RC Register class of operands
4113 static MachineInstr
*genMaddR(MachineFunction
&MF
, MachineRegisterInfo
&MRI
,
4114 const TargetInstrInfo
*TII
, MachineInstr
&Root
,
4115 SmallVectorImpl
<MachineInstr
*> &InsInstrs
,
4116 unsigned IdxMulOpd
, unsigned MaddOpc
, unsigned VR
,
4117 const TargetRegisterClass
*RC
) {
4118 assert(IdxMulOpd
== 1 || IdxMulOpd
== 2);
4120 MachineInstr
*MUL
= MRI
.getUniqueVRegDef(Root
.getOperand(IdxMulOpd
).getReg());
4121 unsigned ResultReg
= Root
.getOperand(0).getReg();
4122 unsigned SrcReg0
= MUL
->getOperand(1).getReg();
4123 bool Src0IsKill
= MUL
->getOperand(1).isKill();
4124 unsigned SrcReg1
= MUL
->getOperand(2).getReg();
4125 bool Src1IsKill
= MUL
->getOperand(2).isKill();
4127 if (TargetRegisterInfo::isVirtualRegister(ResultReg
))
4128 MRI
.constrainRegClass(ResultReg
, RC
);
4129 if (TargetRegisterInfo::isVirtualRegister(SrcReg0
))
4130 MRI
.constrainRegClass(SrcReg0
, RC
);
4131 if (TargetRegisterInfo::isVirtualRegister(SrcReg1
))
4132 MRI
.constrainRegClass(SrcReg1
, RC
);
4133 if (TargetRegisterInfo::isVirtualRegister(VR
))
4134 MRI
.constrainRegClass(VR
, RC
);
4136 MachineInstrBuilder MIB
=
4137 BuildMI(MF
, Root
.getDebugLoc(), TII
->get(MaddOpc
), ResultReg
)
4138 .addReg(SrcReg0
, getKillRegState(Src0IsKill
))
4139 .addReg(SrcReg1
, getKillRegState(Src1IsKill
))
4142 InsInstrs
.push_back(MIB
);
4146 /// When getMachineCombinerPatterns() finds potential patterns,
4147 /// this function generates the instructions that could replace the
4148 /// original code sequence
4149 void AArch64InstrInfo::genAlternativeCodeSequence(
4150 MachineInstr
&Root
, MachineCombinerPattern Pattern
,
4151 SmallVectorImpl
<MachineInstr
*> &InsInstrs
,
4152 SmallVectorImpl
<MachineInstr
*> &DelInstrs
,
4153 DenseMap
<unsigned, unsigned> &InstrIdxForVirtReg
) const {
4154 MachineBasicBlock
&MBB
= *Root
.getParent();
4155 MachineRegisterInfo
&MRI
= MBB
.getParent()->getRegInfo();
4156 MachineFunction
&MF
= *MBB
.getParent();
4157 const TargetInstrInfo
*TII
= MF
.getSubtarget().getInstrInfo();
4160 const TargetRegisterClass
*RC
;
4164 // Reassociate instructions.
4165 TargetInstrInfo::genAlternativeCodeSequence(Root
, Pattern
, InsInstrs
,
4166 DelInstrs
, InstrIdxForVirtReg
);
4168 case MachineCombinerPattern::MULADDW_OP1
:
4169 case MachineCombinerPattern::MULADDX_OP1
:
4173 // --- Create(MADD);
4174 if (Pattern
== MachineCombinerPattern::MULADDW_OP1
) {
4175 Opc
= AArch64::MADDWrrr
;
4176 RC
= &AArch64::GPR32RegClass
;
4178 Opc
= AArch64::MADDXrrr
;
4179 RC
= &AArch64::GPR64RegClass
;
4181 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
);
4183 case MachineCombinerPattern::MULADDW_OP2
:
4184 case MachineCombinerPattern::MULADDX_OP2
:
4188 // --- Create(MADD);
4189 if (Pattern
== MachineCombinerPattern::MULADDW_OP2
) {
4190 Opc
= AArch64::MADDWrrr
;
4191 RC
= &AArch64::GPR32RegClass
;
4193 Opc
= AArch64::MADDXrrr
;
4194 RC
= &AArch64::GPR64RegClass
;
4196 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
);
4198 case MachineCombinerPattern::MULADDWI_OP1
:
4199 case MachineCombinerPattern::MULADDXI_OP1
: {
4202 // ==> ORR V, ZR, Imm
4204 // --- Create(MADD);
4205 const TargetRegisterClass
*OrrRC
;
4206 unsigned BitSize
, OrrOpc
, ZeroReg
;
4207 if (Pattern
== MachineCombinerPattern::MULADDWI_OP1
) {
4208 OrrOpc
= AArch64::ORRWri
;
4209 OrrRC
= &AArch64::GPR32spRegClass
;
4211 ZeroReg
= AArch64::WZR
;
4212 Opc
= AArch64::MADDWrrr
;
4213 RC
= &AArch64::GPR32RegClass
;
4215 OrrOpc
= AArch64::ORRXri
;
4216 OrrRC
= &AArch64::GPR64spRegClass
;
4218 ZeroReg
= AArch64::XZR
;
4219 Opc
= AArch64::MADDXrrr
;
4220 RC
= &AArch64::GPR64RegClass
;
4222 unsigned NewVR
= MRI
.createVirtualRegister(OrrRC
);
4223 uint64_t Imm
= Root
.getOperand(2).getImm();
4225 if (Root
.getOperand(3).isImm()) {
4226 unsigned Val
= Root
.getOperand(3).getImm();
4229 uint64_t UImm
= SignExtend64(Imm
, BitSize
);
4231 if (AArch64_AM::processLogicalImmediate(UImm
, BitSize
, Encoding
)) {
4232 MachineInstrBuilder MIB1
=
4233 BuildMI(MF
, Root
.getDebugLoc(), TII
->get(OrrOpc
), NewVR
)
4236 InsInstrs
.push_back(MIB1
);
4237 InstrIdxForVirtReg
.insert(std::make_pair(NewVR
, 0));
4238 MUL
= genMaddR(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, NewVR
, RC
);
4242 case MachineCombinerPattern::MULSUBW_OP1
:
4243 case MachineCombinerPattern::MULSUBX_OP1
: {
4247 // ==> MADD R,A,B,V // = -C + A*B
4248 // --- Create(MADD);
4249 const TargetRegisterClass
*SubRC
;
4250 unsigned SubOpc
, ZeroReg
;
4251 if (Pattern
== MachineCombinerPattern::MULSUBW_OP1
) {
4252 SubOpc
= AArch64::SUBWrr
;
4253 SubRC
= &AArch64::GPR32spRegClass
;
4254 ZeroReg
= AArch64::WZR
;
4255 Opc
= AArch64::MADDWrrr
;
4256 RC
= &AArch64::GPR32RegClass
;
4258 SubOpc
= AArch64::SUBXrr
;
4259 SubRC
= &AArch64::GPR64spRegClass
;
4260 ZeroReg
= AArch64::XZR
;
4261 Opc
= AArch64::MADDXrrr
;
4262 RC
= &AArch64::GPR64RegClass
;
4264 unsigned NewVR
= MRI
.createVirtualRegister(SubRC
);
4266 MachineInstrBuilder MIB1
=
4267 BuildMI(MF
, Root
.getDebugLoc(), TII
->get(SubOpc
), NewVR
)
4269 .add(Root
.getOperand(2));
4270 InsInstrs
.push_back(MIB1
);
4271 InstrIdxForVirtReg
.insert(std::make_pair(NewVR
, 0));
4272 MUL
= genMaddR(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, NewVR
, RC
);
4275 case MachineCombinerPattern::MULSUBW_OP2
:
4276 case MachineCombinerPattern::MULSUBX_OP2
:
4279 // ==> MSUB R,A,B,C (computes C - A*B)
4280 // --- Create(MSUB);
4281 if (Pattern
== MachineCombinerPattern::MULSUBW_OP2
) {
4282 Opc
= AArch64::MSUBWrrr
;
4283 RC
= &AArch64::GPR32RegClass
;
4285 Opc
= AArch64::MSUBXrrr
;
4286 RC
= &AArch64::GPR64RegClass
;
4288 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
);
4290 case MachineCombinerPattern::MULSUBWI_OP1
:
4291 case MachineCombinerPattern::MULSUBXI_OP1
: {
4294 // ==> ORR V, ZR, -Imm
4295 // ==> MADD R,A,B,V // = -Imm + A*B
4296 // --- Create(MADD);
4297 const TargetRegisterClass
*OrrRC
;
4298 unsigned BitSize
, OrrOpc
, ZeroReg
;
4299 if (Pattern
== MachineCombinerPattern::MULSUBWI_OP1
) {
4300 OrrOpc
= AArch64::ORRWri
;
4301 OrrRC
= &AArch64::GPR32spRegClass
;
4303 ZeroReg
= AArch64::WZR
;
4304 Opc
= AArch64::MADDWrrr
;
4305 RC
= &AArch64::GPR32RegClass
;
4307 OrrOpc
= AArch64::ORRXri
;
4308 OrrRC
= &AArch64::GPR64spRegClass
;
4310 ZeroReg
= AArch64::XZR
;
4311 Opc
= AArch64::MADDXrrr
;
4312 RC
= &AArch64::GPR64RegClass
;
4314 unsigned NewVR
= MRI
.createVirtualRegister(OrrRC
);
4315 uint64_t Imm
= Root
.getOperand(2).getImm();
4316 if (Root
.getOperand(3).isImm()) {
4317 unsigned Val
= Root
.getOperand(3).getImm();
4320 uint64_t UImm
= SignExtend64(-Imm
, BitSize
);
4322 if (AArch64_AM::processLogicalImmediate(UImm
, BitSize
, Encoding
)) {
4323 MachineInstrBuilder MIB1
=
4324 BuildMI(MF
, Root
.getDebugLoc(), TII
->get(OrrOpc
), NewVR
)
4327 InsInstrs
.push_back(MIB1
);
4328 InstrIdxForVirtReg
.insert(std::make_pair(NewVR
, 0));
4329 MUL
= genMaddR(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, NewVR
, RC
);
4333 // Floating Point Support
4334 case MachineCombinerPattern::FMULADDS_OP1
:
4335 case MachineCombinerPattern::FMULADDD_OP1
:
4339 // --- Create(MADD);
4340 if (Pattern
== MachineCombinerPattern::FMULADDS_OP1
) {
4341 Opc
= AArch64::FMADDSrrr
;
4342 RC
= &AArch64::FPR32RegClass
;
4344 Opc
= AArch64::FMADDDrrr
;
4345 RC
= &AArch64::FPR64RegClass
;
4347 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
);
4349 case MachineCombinerPattern::FMULADDS_OP2
:
4350 case MachineCombinerPattern::FMULADDD_OP2
:
4353 // ==> FMADD R,A,B,C
4354 // --- Create(FMADD);
4355 if (Pattern
== MachineCombinerPattern::FMULADDS_OP2
) {
4356 Opc
= AArch64::FMADDSrrr
;
4357 RC
= &AArch64::FPR32RegClass
;
4359 Opc
= AArch64::FMADDDrrr
;
4360 RC
= &AArch64::FPR64RegClass
;
4362 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
);
4365 case MachineCombinerPattern::FMLAv1i32_indexed_OP1
:
4366 Opc
= AArch64::FMLAv1i32_indexed
;
4367 RC
= &AArch64::FPR32RegClass
;
4368 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
,
4369 FMAInstKind::Indexed
);
4371 case MachineCombinerPattern::FMLAv1i32_indexed_OP2
:
4372 Opc
= AArch64::FMLAv1i32_indexed
;
4373 RC
= &AArch64::FPR32RegClass
;
4374 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
,
4375 FMAInstKind::Indexed
);
4378 case MachineCombinerPattern::FMLAv1i64_indexed_OP1
:
4379 Opc
= AArch64::FMLAv1i64_indexed
;
4380 RC
= &AArch64::FPR64RegClass
;
4381 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
,
4382 FMAInstKind::Indexed
);
4384 case MachineCombinerPattern::FMLAv1i64_indexed_OP2
:
4385 Opc
= AArch64::FMLAv1i64_indexed
;
4386 RC
= &AArch64::FPR64RegClass
;
4387 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
,
4388 FMAInstKind::Indexed
);
4391 case MachineCombinerPattern::FMLAv2i32_indexed_OP1
:
4392 case MachineCombinerPattern::FMLAv2f32_OP1
:
4393 RC
= &AArch64::FPR64RegClass
;
4394 if (Pattern
== MachineCombinerPattern::FMLAv2i32_indexed_OP1
) {
4395 Opc
= AArch64::FMLAv2i32_indexed
;
4396 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
,
4397 FMAInstKind::Indexed
);
4399 Opc
= AArch64::FMLAv2f32
;
4400 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
,
4401 FMAInstKind::Accumulator
);
4404 case MachineCombinerPattern::FMLAv2i32_indexed_OP2
:
4405 case MachineCombinerPattern::FMLAv2f32_OP2
:
4406 RC
= &AArch64::FPR64RegClass
;
4407 if (Pattern
== MachineCombinerPattern::FMLAv2i32_indexed_OP2
) {
4408 Opc
= AArch64::FMLAv2i32_indexed
;
4409 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
,
4410 FMAInstKind::Indexed
);
4412 Opc
= AArch64::FMLAv2f32
;
4413 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
,
4414 FMAInstKind::Accumulator
);
4418 case MachineCombinerPattern::FMLAv2i64_indexed_OP1
:
4419 case MachineCombinerPattern::FMLAv2f64_OP1
:
4420 RC
= &AArch64::FPR128RegClass
;
4421 if (Pattern
== MachineCombinerPattern::FMLAv2i64_indexed_OP1
) {
4422 Opc
= AArch64::FMLAv2i64_indexed
;
4423 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
,
4424 FMAInstKind::Indexed
);
4426 Opc
= AArch64::FMLAv2f64
;
4427 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
,
4428 FMAInstKind::Accumulator
);
4431 case MachineCombinerPattern::FMLAv2i64_indexed_OP2
:
4432 case MachineCombinerPattern::FMLAv2f64_OP2
:
4433 RC
= &AArch64::FPR128RegClass
;
4434 if (Pattern
== MachineCombinerPattern::FMLAv2i64_indexed_OP2
) {
4435 Opc
= AArch64::FMLAv2i64_indexed
;
4436 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
,
4437 FMAInstKind::Indexed
);
4439 Opc
= AArch64::FMLAv2f64
;
4440 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
,
4441 FMAInstKind::Accumulator
);
4445 case MachineCombinerPattern::FMLAv4i32_indexed_OP1
:
4446 case MachineCombinerPattern::FMLAv4f32_OP1
:
4447 RC
= &AArch64::FPR128RegClass
;
4448 if (Pattern
== MachineCombinerPattern::FMLAv4i32_indexed_OP1
) {
4449 Opc
= AArch64::FMLAv4i32_indexed
;
4450 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
,
4451 FMAInstKind::Indexed
);
4453 Opc
= AArch64::FMLAv4f32
;
4454 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
,
4455 FMAInstKind::Accumulator
);
4459 case MachineCombinerPattern::FMLAv4i32_indexed_OP2
:
4460 case MachineCombinerPattern::FMLAv4f32_OP2
:
4461 RC
= &AArch64::FPR128RegClass
;
4462 if (Pattern
== MachineCombinerPattern::FMLAv4i32_indexed_OP2
) {
4463 Opc
= AArch64::FMLAv4i32_indexed
;
4464 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
,
4465 FMAInstKind::Indexed
);
4467 Opc
= AArch64::FMLAv4f32
;
4468 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
,
4469 FMAInstKind::Accumulator
);
4473 case MachineCombinerPattern::FMULSUBS_OP1
:
4474 case MachineCombinerPattern::FMULSUBD_OP1
: {
4477 // ==> FNMSUB R,A,B,C // = -C + A*B
4478 // --- Create(FNMSUB);
4479 if (Pattern
== MachineCombinerPattern::FMULSUBS_OP1
) {
4480 Opc
= AArch64::FNMSUBSrrr
;
4481 RC
= &AArch64::FPR32RegClass
;
4483 Opc
= AArch64::FNMSUBDrrr
;
4484 RC
= &AArch64::FPR64RegClass
;
4486 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
);
4490 case MachineCombinerPattern::FNMULSUBS_OP1
:
4491 case MachineCombinerPattern::FNMULSUBD_OP1
: {
4494 // ==> FNMADD R,A,B,C // = -A*B - C
4495 // --- Create(FNMADD);
4496 if (Pattern
== MachineCombinerPattern::FNMULSUBS_OP1
) {
4497 Opc
= AArch64::FNMADDSrrr
;
4498 RC
= &AArch64::FPR32RegClass
;
4500 Opc
= AArch64::FNMADDDrrr
;
4501 RC
= &AArch64::FPR64RegClass
;
4503 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
);
4507 case MachineCombinerPattern::FMULSUBS_OP2
:
4508 case MachineCombinerPattern::FMULSUBD_OP2
: {
4511 // ==> FMSUB R,A,B,C (computes C - A*B)
4512 // --- Create(FMSUB);
4513 if (Pattern
== MachineCombinerPattern::FMULSUBS_OP2
) {
4514 Opc
= AArch64::FMSUBSrrr
;
4515 RC
= &AArch64::FPR32RegClass
;
4517 Opc
= AArch64::FMSUBDrrr
;
4518 RC
= &AArch64::FPR64RegClass
;
4520 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
);
4524 case MachineCombinerPattern::FMLSv1i32_indexed_OP2
:
4525 Opc
= AArch64::FMLSv1i32_indexed
;
4526 RC
= &AArch64::FPR32RegClass
;
4527 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
,
4528 FMAInstKind::Indexed
);
4531 case MachineCombinerPattern::FMLSv1i64_indexed_OP2
:
4532 Opc
= AArch64::FMLSv1i64_indexed
;
4533 RC
= &AArch64::FPR64RegClass
;
4534 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
,
4535 FMAInstKind::Indexed
);
4538 case MachineCombinerPattern::FMLSv2f32_OP2
:
4539 case MachineCombinerPattern::FMLSv2i32_indexed_OP2
:
4540 RC
= &AArch64::FPR64RegClass
;
4541 if (Pattern
== MachineCombinerPattern::FMLSv2i32_indexed_OP2
) {
4542 Opc
= AArch64::FMLSv2i32_indexed
;
4543 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
,
4544 FMAInstKind::Indexed
);
4546 Opc
= AArch64::FMLSv2f32
;
4547 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
,
4548 FMAInstKind::Accumulator
);
4552 case MachineCombinerPattern::FMLSv2f64_OP2
:
4553 case MachineCombinerPattern::FMLSv2i64_indexed_OP2
:
4554 RC
= &AArch64::FPR128RegClass
;
4555 if (Pattern
== MachineCombinerPattern::FMLSv2i64_indexed_OP2
) {
4556 Opc
= AArch64::FMLSv2i64_indexed
;
4557 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
,
4558 FMAInstKind::Indexed
);
4560 Opc
= AArch64::FMLSv2f64
;
4561 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
,
4562 FMAInstKind::Accumulator
);
4566 case MachineCombinerPattern::FMLSv4f32_OP2
:
4567 case MachineCombinerPattern::FMLSv4i32_indexed_OP2
:
4568 RC
= &AArch64::FPR128RegClass
;
4569 if (Pattern
== MachineCombinerPattern::FMLSv4i32_indexed_OP2
) {
4570 Opc
= AArch64::FMLSv4i32_indexed
;
4571 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
,
4572 FMAInstKind::Indexed
);
4574 Opc
= AArch64::FMLSv4f32
;
4575 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
,
4576 FMAInstKind::Accumulator
);
4579 case MachineCombinerPattern::FMLSv2f32_OP1
:
4580 case MachineCombinerPattern::FMLSv2i32_indexed_OP1
: {
4581 RC
= &AArch64::FPR64RegClass
;
4582 unsigned NewVR
= MRI
.createVirtualRegister(RC
);
4583 MachineInstrBuilder MIB1
=
4584 BuildMI(MF
, Root
.getDebugLoc(), TII
->get(AArch64::FNEGv2f32
), NewVR
)
4585 .add(Root
.getOperand(2));
4586 InsInstrs
.push_back(MIB1
);
4587 InstrIdxForVirtReg
.insert(std::make_pair(NewVR
, 0));
4588 if (Pattern
== MachineCombinerPattern::FMLSv2i32_indexed_OP1
) {
4589 Opc
= AArch64::FMLAv2i32_indexed
;
4590 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
,
4591 FMAInstKind::Indexed
, &NewVR
);
4593 Opc
= AArch64::FMLAv2f32
;
4594 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
,
4595 FMAInstKind::Accumulator
, &NewVR
);
4599 case MachineCombinerPattern::FMLSv4f32_OP1
:
4600 case MachineCombinerPattern::FMLSv4i32_indexed_OP1
: {
4601 RC
= &AArch64::FPR128RegClass
;
4602 unsigned NewVR
= MRI
.createVirtualRegister(RC
);
4603 MachineInstrBuilder MIB1
=
4604 BuildMI(MF
, Root
.getDebugLoc(), TII
->get(AArch64::FNEGv4f32
), NewVR
)
4605 .add(Root
.getOperand(2));
4606 InsInstrs
.push_back(MIB1
);
4607 InstrIdxForVirtReg
.insert(std::make_pair(NewVR
, 0));
4608 if (Pattern
== MachineCombinerPattern::FMLSv4i32_indexed_OP1
) {
4609 Opc
= AArch64::FMLAv4i32_indexed
;
4610 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
,
4611 FMAInstKind::Indexed
, &NewVR
);
4613 Opc
= AArch64::FMLAv4f32
;
4614 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
,
4615 FMAInstKind::Accumulator
, &NewVR
);
4619 case MachineCombinerPattern::FMLSv2f64_OP1
:
4620 case MachineCombinerPattern::FMLSv2i64_indexed_OP1
: {
4621 RC
= &AArch64::FPR128RegClass
;
4622 unsigned NewVR
= MRI
.createVirtualRegister(RC
);
4623 MachineInstrBuilder MIB1
=
4624 BuildMI(MF
, Root
.getDebugLoc(), TII
->get(AArch64::FNEGv2f64
), NewVR
)
4625 .add(Root
.getOperand(2));
4626 InsInstrs
.push_back(MIB1
);
4627 InstrIdxForVirtReg
.insert(std::make_pair(NewVR
, 0));
4628 if (Pattern
== MachineCombinerPattern::FMLSv2i64_indexed_OP1
) {
4629 Opc
= AArch64::FMLAv2i64_indexed
;
4630 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
,
4631 FMAInstKind::Indexed
, &NewVR
);
4633 Opc
= AArch64::FMLAv2f64
;
4634 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
,
4635 FMAInstKind::Accumulator
, &NewVR
);
4639 } // end switch (Pattern)
4640 // Record MUL and ADD/SUB for deletion
4641 DelInstrs
.push_back(MUL
);
4642 DelInstrs
.push_back(&Root
);
4645 /// Replace csincr-branch sequence by simple conditional branch
4649 /// csinc w9, wzr, wzr, <condition code>
4650 /// tbnz w9, #0, 0x44
4654 /// b.<inverted condition code>
4658 /// csinc w9, wzr, wzr, <condition code>
4659 /// tbz w9, #0, 0x44
4663 /// b.<condition code>
4666 /// Replace compare and branch sequence by TBZ/TBNZ instruction when the
4667 /// compare's constant operand is power of 2.
4671 /// and w8, w8, #0x400
4676 /// tbnz w8, #10, L1
4679 /// \param MI Conditional Branch
4680 /// \return True when the simple conditional branch is generated
4682 bool AArch64InstrInfo::optimizeCondBranch(MachineInstr
&MI
) const {
4683 bool IsNegativeBranch
= false;
4684 bool IsTestAndBranch
= false;
4685 unsigned TargetBBInMI
= 0;
4686 switch (MI
.getOpcode()) {
4688 llvm_unreachable("Unknown branch instruction?");
4695 case AArch64::CBNZW
:
4696 case AArch64::CBNZX
:
4698 IsNegativeBranch
= true;
4703 IsTestAndBranch
= true;
4705 case AArch64::TBNZW
:
4706 case AArch64::TBNZX
:
4708 IsNegativeBranch
= true;
4709 IsTestAndBranch
= true;
4712 // So we increment a zero register and test for bits other
4713 // than bit 0? Conservatively bail out in case the verifier
4714 // missed this case.
4715 if (IsTestAndBranch
&& MI
.getOperand(1).getImm())
4719 assert(MI
.getParent() && "Incomplete machine instruciton\n");
4720 MachineBasicBlock
*MBB
= MI
.getParent();
4721 MachineFunction
*MF
= MBB
->getParent();
4722 MachineRegisterInfo
*MRI
= &MF
->getRegInfo();
4723 unsigned VReg
= MI
.getOperand(0).getReg();
4724 if (!TargetRegisterInfo::isVirtualRegister(VReg
))
4727 MachineInstr
*DefMI
= MRI
->getVRegDef(VReg
);
4729 // Look through COPY instructions to find definition.
4730 while (DefMI
->isCopy()) {
4731 unsigned CopyVReg
= DefMI
->getOperand(1).getReg();
4732 if (!MRI
->hasOneNonDBGUse(CopyVReg
))
4734 if (!MRI
->hasOneDef(CopyVReg
))
4736 DefMI
= MRI
->getVRegDef(CopyVReg
);
4739 switch (DefMI
->getOpcode()) {
4742 // Fold AND into a TBZ/TBNZ if constant operand is power of 2.
4743 case AArch64::ANDWri
:
4744 case AArch64::ANDXri
: {
4745 if (IsTestAndBranch
)
4747 if (DefMI
->getParent() != MBB
)
4749 if (!MRI
->hasOneNonDBGUse(VReg
))
4752 bool Is32Bit
= (DefMI
->getOpcode() == AArch64::ANDWri
);
4753 uint64_t Mask
= AArch64_AM::decodeLogicalImmediate(
4754 DefMI
->getOperand(2).getImm(), Is32Bit
? 32 : 64);
4755 if (!isPowerOf2_64(Mask
))
4758 MachineOperand
&MO
= DefMI
->getOperand(1);
4759 unsigned NewReg
= MO
.getReg();
4760 if (!TargetRegisterInfo::isVirtualRegister(NewReg
))
4763 assert(!MRI
->def_empty(NewReg
) && "Register must be defined.");
4765 MachineBasicBlock
&RefToMBB
= *MBB
;
4766 MachineBasicBlock
*TBB
= MI
.getOperand(1).getMBB();
4767 DebugLoc DL
= MI
.getDebugLoc();
4768 unsigned Imm
= Log2_64(Mask
);
4769 unsigned Opc
= (Imm
< 32)
4770 ? (IsNegativeBranch
? AArch64::TBNZW
: AArch64::TBZW
)
4771 : (IsNegativeBranch
? AArch64::TBNZX
: AArch64::TBZX
);
4772 MachineInstr
*NewMI
= BuildMI(RefToMBB
, MI
, DL
, get(Opc
))
4776 // Register lives on to the CBZ now.
4777 MO
.setIsKill(false);
4779 // For immediate smaller than 32, we need to use the 32-bit
4780 // variant (W) in all cases. Indeed the 64-bit variant does not
4781 // allow to encode them.
4782 // Therefore, if the input register is 64-bit, we need to take the
4784 if (!Is32Bit
&& Imm
< 32)
4785 NewMI
->getOperand(0).setSubReg(AArch64::sub_32
);
4786 MI
.eraseFromParent();
4790 case AArch64::CSINCWr
:
4791 case AArch64::CSINCXr
: {
4792 if (!(DefMI
->getOperand(1).getReg() == AArch64::WZR
&&
4793 DefMI
->getOperand(2).getReg() == AArch64::WZR
) &&
4794 !(DefMI
->getOperand(1).getReg() == AArch64::XZR
&&
4795 DefMI
->getOperand(2).getReg() == AArch64::XZR
))
4798 if (DefMI
->findRegisterDefOperandIdx(AArch64::NZCV
, true) != -1)
4801 AArch64CC::CondCode CC
= (AArch64CC::CondCode
)DefMI
->getOperand(3).getImm();
4802 // Convert only when the condition code is not modified between
4803 // the CSINC and the branch. The CC may be used by other
4804 // instructions in between.
4805 if (areCFlagsAccessedBetweenInstrs(DefMI
, MI
, &getRegisterInfo(), AK_Write
))
4807 MachineBasicBlock
&RefToMBB
= *MBB
;
4808 MachineBasicBlock
*TBB
= MI
.getOperand(TargetBBInMI
).getMBB();
4809 DebugLoc DL
= MI
.getDebugLoc();
4810 if (IsNegativeBranch
)
4811 CC
= AArch64CC::getInvertedCondCode(CC
);
4812 BuildMI(RefToMBB
, MI
, DL
, get(AArch64::Bcc
)).addImm(CC
).addMBB(TBB
);
4813 MI
.eraseFromParent();
4819 std::pair
<unsigned, unsigned>
4820 AArch64InstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF
) const {
4821 const unsigned Mask
= AArch64II::MO_FRAGMENT
;
4822 return std::make_pair(TF
& Mask
, TF
& ~Mask
);
4825 ArrayRef
<std::pair
<unsigned, const char *>>
4826 AArch64InstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
4827 using namespace AArch64II
;
4829 static const std::pair
<unsigned, const char *> TargetFlags
[] = {
4830 {MO_PAGE
, "aarch64-page"}, {MO_PAGEOFF
, "aarch64-pageoff"},
4831 {MO_G3
, "aarch64-g3"}, {MO_G2
, "aarch64-g2"},
4832 {MO_G1
, "aarch64-g1"}, {MO_G0
, "aarch64-g0"},
4833 {MO_HI12
, "aarch64-hi12"}};
4834 return makeArrayRef(TargetFlags
);
4837 ArrayRef
<std::pair
<unsigned, const char *>>
4838 AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
4839 using namespace AArch64II
;
4841 static const std::pair
<unsigned, const char *> TargetFlags
[] = {
4842 {MO_GOT
, "aarch64-got"}, {MO_NC
, "aarch64-nc"}, {MO_TLS
, "aarch64-tls"}};
4843 return makeArrayRef(TargetFlags
);
4846 ArrayRef
<std::pair
<MachineMemOperand::Flags
, const char *>>
4847 AArch64InstrInfo::getSerializableMachineMemOperandTargetFlags() const {
4848 static const std::pair
<MachineMemOperand::Flags
, const char *> TargetFlags
[] =
4849 {{MOSuppressPair
, "aarch64-suppress-pair"},
4850 {MOStridedAccess
, "aarch64-strided-access"}};
4851 return makeArrayRef(TargetFlags
);
4854 /// Constants defining how certain sequences should be outlined.
4855 /// This encompasses how an outlined function should be called, and what kind of
4856 /// frame should be emitted for that outlined function.
4858 /// \p MachineOutlinerDefault implies that the function should be called with
4859 /// a save and restore of LR to the stack.
4863 /// I1 Save LR OUTLINED_FUNCTION:
4864 /// I2 --> BL OUTLINED_FUNCTION I1
4865 /// I3 Restore LR I2
4869 /// * Call construction overhead: 3 (save + BL + restore)
4870 /// * Frame construction overhead: 1 (ret)
4871 /// * Requires stack fixups? Yes
4873 /// \p MachineOutlinerTailCall implies that the function is being created from
4874 /// a sequence of instructions ending in a return.
4878 /// I1 OUTLINED_FUNCTION:
4879 /// I2 --> B OUTLINED_FUNCTION I1
4883 /// * Call construction overhead: 1 (B)
4884 /// * Frame construction overhead: 0 (Return included in sequence)
4885 /// * Requires stack fixups? No
4887 /// \p MachineOutlinerNoLRSave implies that the function should be called using
4888 /// a BL instruction, but doesn't require LR to be saved and restored. This
4889 /// happens when LR is known to be dead.
4893 /// I1 OUTLINED_FUNCTION:
4894 /// I2 --> BL OUTLINED_FUNCTION I1
4899 /// * Call construction overhead: 1 (BL)
4900 /// * Frame construction overhead: 1 (RET)
4901 /// * Requires stack fixups? No
4903 /// \p MachineOutlinerThunk implies that the function is being created from
4904 /// a sequence of instructions ending in a call. The outlined function is
4905 /// called with a BL instruction, and the outlined function tail-calls the
4906 /// original call destination.
4910 /// I1 OUTLINED_FUNCTION:
4911 /// I2 --> BL OUTLINED_FUNCTION I1
4914 /// * Call construction overhead: 1 (BL)
4915 /// * Frame construction overhead: 0
4916 /// * Requires stack fixups? No
4918 enum MachineOutlinerClass
{
4919 MachineOutlinerDefault
, /// Emit a save, restore, call, and return.
4920 MachineOutlinerTailCall
, /// Only emit a branch.
4921 MachineOutlinerNoLRSave
, /// Emit a call and return.
4922 MachineOutlinerThunk
, /// Emit a call and tail-call.
4925 enum MachineOutlinerMBBFlags
{
4926 LRUnavailableSomewhere
= 0x2,
4930 outliner::TargetCostInfo
4931 AArch64InstrInfo::getOutliningCandidateInfo(
4932 std::vector
<outliner::Candidate
> &RepeatedSequenceLocs
) const {
4933 unsigned SequenceSize
= std::accumulate(
4934 RepeatedSequenceLocs
[0].front(),
4935 std::next(RepeatedSequenceLocs
[0].back()),
4936 0, [this](unsigned Sum
, const MachineInstr
&MI
) {
4937 return Sum
+ getInstSizeInBytes(MI
);
4939 unsigned CallID
= MachineOutlinerDefault
;
4940 unsigned FrameID
= MachineOutlinerDefault
;
4941 unsigned NumBytesForCall
= 12;
4942 unsigned NumBytesToCreateFrame
= 4;
4944 // Compute liveness information for each candidate.
4945 const TargetRegisterInfo
&TRI
= getRegisterInfo();
4946 std::for_each(RepeatedSequenceLocs
.begin(), RepeatedSequenceLocs
.end(),
4947 [&TRI
](outliner::Candidate
&C
) { C
.initLRU(TRI
); });
4949 // According to the AArch64 Procedure Call Standard, the following are
4950 // undefined on entry/exit from a function call:
4952 // * Registers x16, x17, (and thus w16, w17)
4953 // * Condition codes (and thus the NZCV register)
4955 // Because if this, we can't outline any sequence of instructions where
4957 // of these registers is live into/across it. Thus, we need to delete
4960 auto CantGuaranteeValueAcrossCall
= [](outliner::Candidate
&C
) {
4961 LiveRegUnits LRU
= C
.LRU
;
4962 return (!LRU
.available(AArch64::W16
) || !LRU
.available(AArch64::W17
) ||
4963 !LRU
.available(AArch64::NZCV
));
4966 // Erase every candidate that violates the restrictions above. (It could be
4967 // true that we have viable candidates, so it's not worth bailing out in
4968 // the case that, say, 1 out of 20 candidates violate the restructions.)
4969 RepeatedSequenceLocs
.erase(std::remove_if(RepeatedSequenceLocs
.begin(),
4970 RepeatedSequenceLocs
.end(),
4971 CantGuaranteeValueAcrossCall
),
4972 RepeatedSequenceLocs
.end());
4974 // At this point, we have only "safe" candidates to outline. Figure out
4975 // frame + call instruction information.
4977 unsigned LastInstrOpcode
= RepeatedSequenceLocs
[0].back()->getOpcode();
4979 // If the last instruction in any candidate is a terminator, then we should
4980 // tail call all of the candidates.
4981 if (RepeatedSequenceLocs
[0].back()->isTerminator()) {
4982 CallID
= MachineOutlinerTailCall
;
4983 FrameID
= MachineOutlinerTailCall
;
4984 NumBytesForCall
= 4;
4985 NumBytesToCreateFrame
= 0;
4988 else if (LastInstrOpcode
== AArch64::BL
|| LastInstrOpcode
== AArch64::BLR
) {
4989 // FIXME: Do we need to check if the code after this uses the value of LR?
4990 CallID
= MachineOutlinerThunk
;
4991 FrameID
= MachineOutlinerThunk
;
4992 NumBytesForCall
= 4;
4993 NumBytesToCreateFrame
= 0;
4996 // Make sure that LR isn't live on entry to this candidate. The only
4997 // instructions that use LR that could possibly appear in a repeated sequence
4998 // are calls. Therefore, we only have to check and see if LR is dead on entry
4999 // to (or exit from) some candidate.
5000 else if (std::all_of(RepeatedSequenceLocs
.begin(),
5001 RepeatedSequenceLocs
.end(),
5002 [](outliner::Candidate
&C
) {
5003 return C
.LRU
.available(AArch64::LR
);
5005 CallID
= MachineOutlinerNoLRSave
;
5006 FrameID
= MachineOutlinerNoLRSave
;
5007 NumBytesForCall
= 4;
5008 NumBytesToCreateFrame
= 4;
5011 // Check if the range contains a call. These require a save + restore of the
5013 if (std::any_of(RepeatedSequenceLocs
[0].front(),
5014 RepeatedSequenceLocs
[0].back(),
5015 [](const MachineInstr
&MI
) { return MI
.isCall(); }))
5016 NumBytesToCreateFrame
+= 8; // Save + restore the link register.
5018 // Handle the last instruction separately. If this is a tail call, then the
5019 // last instruction is a call. We don't want to save + restore in this case.
5020 // However, it could be possible that the last instruction is a call without
5021 // it being valid to tail call this sequence. We should consider this as well.
5022 else if (FrameID
!= MachineOutlinerThunk
&&
5023 FrameID
!= MachineOutlinerTailCall
&&
5024 RepeatedSequenceLocs
[0].back()->isCall())
5025 NumBytesToCreateFrame
+= 8;
5027 return outliner::TargetCostInfo(SequenceSize
, NumBytesForCall
,
5028 NumBytesToCreateFrame
, CallID
, FrameID
);
5031 bool AArch64InstrInfo::isFunctionSafeToOutlineFrom(
5032 MachineFunction
&MF
, bool OutlineFromLinkOnceODRs
) const {
5033 const Function
&F
= MF
.getFunction();
5035 // Can F be deduplicated by the linker? If it can, don't outline from it.
5036 if (!OutlineFromLinkOnceODRs
&& F
.hasLinkOnceODRLinkage())
5039 // Don't outline from functions with section markings; the program could
5040 // expect that all the code is in the named section.
5041 // FIXME: Allow outlining from multiple functions with the same section
5046 // Outlining from functions with redzones is unsafe since the outliner may
5047 // modify the stack. Check if hasRedZone is true or unknown; if yes, don't
5049 AArch64FunctionInfo
*AFI
= MF
.getInfo
<AArch64FunctionInfo
>();
5050 if (!AFI
|| AFI
->hasRedZone().getValueOr(true))
5053 // It's safe to outline from MF.
5058 AArch64InstrInfo::getMachineOutlinerMBBFlags(MachineBasicBlock
&MBB
) const {
5059 unsigned Flags
= 0x0;
5060 // Check if there's a call inside this MachineBasicBlock. If there is, then
5062 if (std::any_of(MBB
.begin(), MBB
.end(),
5063 [](MachineInstr
&MI
) { return MI
.isCall(); }))
5064 Flags
|= MachineOutlinerMBBFlags::HasCalls
;
5066 // Check if LR is available through all of the MBB. If it's not, then set
5068 assert(MBB
.getParent()->getRegInfo().tracksLiveness() &&
5069 "Suitable Machine Function for outlining must track liveness");
5070 LiveRegUnits
LRU(getRegisterInfo());
5071 LRU
.addLiveOuts(MBB
);
5073 std::for_each(MBB
.rbegin(),
5075 [&LRU
](MachineInstr
&MI
) { LRU
.accumulate(MI
); });
5077 if (!LRU
.available(AArch64::LR
))
5078 Flags
|= MachineOutlinerMBBFlags::LRUnavailableSomewhere
;
5084 AArch64InstrInfo::getOutliningType(MachineBasicBlock::iterator
&MIT
,
5085 unsigned Flags
) const {
5086 MachineInstr
&MI
= *MIT
;
5087 MachineBasicBlock
*MBB
= MI
.getParent();
5088 MachineFunction
*MF
= MBB
->getParent();
5089 AArch64FunctionInfo
*FuncInfo
= MF
->getInfo
<AArch64FunctionInfo
>();
5091 // Don't outline LOHs.
5092 if (FuncInfo
->getLOHRelated().count(&MI
))
5093 return outliner::InstrType::Illegal
;
5095 // Don't allow debug values to impact outlining type.
5096 if (MI
.isDebugInstr() || MI
.isIndirectDebugValue())
5097 return outliner::InstrType::Invisible
;
5099 // At this point, KILL instructions don't really tell us much so we can go
5100 // ahead and skip over them.
5102 return outliner::InstrType::Invisible
;
5104 // Is this a terminator for a basic block?
5105 if (MI
.isTerminator()) {
5107 // Is this the end of a function?
5108 if (MI
.getParent()->succ_empty())
5109 return outliner::InstrType::Legal
;
5111 // It's not, so don't outline it.
5112 return outliner::InstrType::Illegal
;
5115 // Make sure none of the operands are un-outlinable.
5116 for (const MachineOperand
&MOP
: MI
.operands()) {
5117 if (MOP
.isCPI() || MOP
.isJTI() || MOP
.isCFIIndex() || MOP
.isFI() ||
5118 MOP
.isTargetIndex())
5119 return outliner::InstrType::Illegal
;
5121 // If it uses LR or W30 explicitly, then don't touch it.
5122 if (MOP
.isReg() && !MOP
.isImplicit() &&
5123 (MOP
.getReg() == AArch64::LR
|| MOP
.getReg() == AArch64::W30
))
5124 return outliner::InstrType::Illegal
;
5127 // Special cases for instructions that can always be outlined, but will fail
5128 // the later tests. e.g, ADRPs, which are PC-relative use LR, but can always
5129 // be outlined because they don't require a *specific* value to be in LR.
5130 if (MI
.getOpcode() == AArch64::ADRP
)
5131 return outliner::InstrType::Legal
;
5133 // If MI is a call we might be able to outline it. We don't want to outline
5134 // any calls that rely on the position of items on the stack. When we outline
5135 // something containing a call, we have to emit a save and restore of LR in
5136 // the outlined function. Currently, this always happens by saving LR to the
5137 // stack. Thus, if we outline, say, half the parameters for a function call
5138 // plus the call, then we'll break the callee's expectations for the layout
5141 // FIXME: Allow calls to functions which construct a stack frame, as long
5142 // as they don't access arguments on the stack.
5143 // FIXME: Figure out some way to analyze functions defined in other modules.
5144 // We should be able to compute the memory usage based on the IR calling
5145 // convention, even if we can't see the definition.
5147 // Get the function associated with the call. Look at each operand and find
5148 // the one that represents the callee and get its name.
5149 const Function
*Callee
= nullptr;
5150 for (const MachineOperand
&MOP
: MI
.operands()) {
5151 if (MOP
.isGlobal()) {
5152 Callee
= dyn_cast
<Function
>(MOP
.getGlobal());
5157 // Never outline calls to mcount. There isn't any rule that would require
5158 // this, but the Linux kernel's "ftrace" feature depends on it.
5159 if (Callee
&& Callee
->getName() == "\01_mcount")
5160 return outliner::InstrType::Illegal
;
5162 // If we don't know anything about the callee, assume it depends on the
5163 // stack layout of the caller. In that case, it's only legal to outline
5164 // as a tail-call. Whitelist the call instructions we know about so we
5165 // don't get unexpected results with call pseudo-instructions.
5166 auto UnknownCallOutlineType
= outliner::InstrType::Illegal
;
5167 if (MI
.getOpcode() == AArch64::BLR
|| MI
.getOpcode() == AArch64::BL
)
5168 UnknownCallOutlineType
= outliner::InstrType::LegalTerminator
;
5171 return UnknownCallOutlineType
;
5173 // We have a function we have information about. Check it if it's something
5174 // can safely outline.
5175 MachineFunction
*CalleeMF
= MF
->getMMI().getMachineFunction(*Callee
);
5177 // We don't know what's going on with the callee at all. Don't touch it.
5179 return UnknownCallOutlineType
;
5181 // Check if we know anything about the callee saves on the function. If we
5182 // don't, then don't touch it, since that implies that we haven't
5183 // computed anything about its stack frame yet.
5184 MachineFrameInfo
&MFI
= CalleeMF
->getFrameInfo();
5185 if (!MFI
.isCalleeSavedInfoValid() || MFI
.getStackSize() > 0 ||
5186 MFI
.getNumObjects() > 0)
5187 return UnknownCallOutlineType
;
5189 // At this point, we can say that CalleeMF ought to not pass anything on the
5190 // stack. Therefore, we can outline it.
5191 return outliner::InstrType::Legal
;
5194 // Don't outline positions.
5195 if (MI
.isPosition())
5196 return outliner::InstrType::Illegal
;
5198 // Don't touch the link register or W30.
5199 if (MI
.readsRegister(AArch64::W30
, &getRegisterInfo()) ||
5200 MI
.modifiesRegister(AArch64::W30
, &getRegisterInfo()))
5201 return outliner::InstrType::Illegal
;
5203 // Does this use the stack?
5204 if (MI
.modifiesRegister(AArch64::SP
, &RI
) ||
5205 MI
.readsRegister(AArch64::SP
, &RI
)) {
5206 // True if there is no chance that any outlined candidate from this range
5207 // could require stack fixups. That is, both
5208 // * LR is available in the range (No save/restore around call)
5209 // * The range doesn't include calls (No save/restore in outlined frame)
5211 // FIXME: This is very restrictive; the flags check the whole block,
5212 // not just the bit we will try to outline.
5213 bool MightNeedStackFixUp
=
5214 (Flags
& (MachineOutlinerMBBFlags::LRUnavailableSomewhere
|
5215 MachineOutlinerMBBFlags::HasCalls
));
5217 // If this instruction is in a range where it *never* needs to be fixed
5218 // up, then we can *always* outline it. This is true even if it's not
5219 // possible to fix that instruction up.
5221 // Why? Consider two equivalent instructions I1, I2 where both I1 and I2
5222 // use SP. Suppose that I1 sits within a range that definitely doesn't
5223 // need stack fixups, while I2 sits in a range that does.
5225 // First, I1 can be outlined as long as we *never* fix up the stack in
5226 // any sequence containing it. I1 is already a safe instruction in the
5227 // original program, so as long as we don't modify it we're good to go.
5228 // So this leaves us with showing that outlining I2 won't break our
5231 // Suppose I1 and I2 belong to equivalent candidate sequences. When we
5232 // look at I2, we need to see if it can be fixed up. Suppose I2, (and
5233 // thus I1) cannot be fixed up. Then I2 will be assigned an unique
5234 // integer label; thus, I2 cannot belong to any candidate sequence (a
5235 // contradiction). Suppose I2 can be fixed up. Then I1 can be fixed up
5236 // as well, so we're good. Thus, I1 is always safe to outline.
5238 // This gives us two things: first off, it buys us some more instructions
5239 // for our search space by deeming stack instructions illegal only when
5240 // they can't be fixed up AND we might have to fix them up. Second off,
5241 // This allows us to catch tricky instructions like, say,
5242 // %xi = ADDXri %sp, n, 0. We can't safely outline these since they might
5243 // be paired with later SUBXris, which might *not* end up being outlined.
5244 // If we mess with the stack to save something, then an ADDXri messes with
5245 // it *after*, then we aren't going to restore the right something from
5246 // the stack if we don't outline the corresponding SUBXri first. ADDXris and
5247 // SUBXris are extremely common in prologue/epilogue code, so supporting
5248 // them in the outliner can be a pretty big win!
5249 if (!MightNeedStackFixUp
)
5250 return outliner::InstrType::Legal
;
5252 // Any modification of SP will break our code to save/restore LR.
5253 // FIXME: We could handle some instructions which add a constant offset to
5254 // SP, with a bit more work.
5255 if (MI
.modifiesRegister(AArch64::SP
, &RI
))
5256 return outliner::InstrType::Illegal
;
5258 // At this point, we have a stack instruction that we might need to fix
5259 // up. We'll handle it if it's a load or store.
5260 if (MI
.mayLoadOrStore()) {
5261 unsigned Base
; // Filled with the base regiser of MI.
5262 int64_t Offset
; // Filled with the offset of MI.
5263 unsigned DummyWidth
;
5265 // Does it allow us to offset the base register and is the base SP?
5266 if (!getMemOpBaseRegImmOfsWidth(MI
, Base
, Offset
, DummyWidth
, &RI
) ||
5267 Base
!= AArch64::SP
)
5268 return outliner::InstrType::Illegal
;
5270 // Find the minimum/maximum offset for this instruction and check if
5271 // fixing it up would be in range.
5272 int64_t MinOffset
, MaxOffset
; // Unscaled offsets for the instruction.
5273 unsigned Scale
; // The scale to multiply the offsets by.
5274 getMemOpInfo(MI
.getOpcode(), Scale
, DummyWidth
, MinOffset
, MaxOffset
);
5276 // TODO: We should really test what happens if an instruction overflows.
5277 // This is tricky to test with IR tests, but when the outliner is moved
5278 // to a MIR test, it really ought to be checked.
5279 Offset
+= 16; // Update the offset to what it would be if we outlined.
5280 if (Offset
< MinOffset
* Scale
|| Offset
> MaxOffset
* Scale
)
5281 return outliner::InstrType::Illegal
;
5283 // It's in range, so we can outline it.
5284 return outliner::InstrType::Legal
;
5287 // FIXME: Add handling for instructions like "add x0, sp, #8".
5289 // We can't fix it up, so don't outline it.
5290 return outliner::InstrType::Illegal
;
5293 return outliner::InstrType::Legal
;
5296 void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock
&MBB
) const {
5297 for (MachineInstr
&MI
: MBB
) {
5298 unsigned Base
, Width
;
5301 // Is this a load or store with an immediate offset with SP as the base?
5302 if (!MI
.mayLoadOrStore() ||
5303 !getMemOpBaseRegImmOfsWidth(MI
, Base
, Offset
, Width
, &RI
) ||
5304 Base
!= AArch64::SP
)
5307 // It is, so we have to fix it up.
5309 int64_t Dummy1
, Dummy2
;
5311 MachineOperand
&StackOffsetOperand
= getMemOpBaseRegImmOfsOffsetOperand(MI
);
5312 assert(StackOffsetOperand
.isImm() && "Stack offset wasn't immediate!");
5313 getMemOpInfo(MI
.getOpcode(), Scale
, Width
, Dummy1
, Dummy2
);
5314 assert(Scale
!= 0 && "Unexpected opcode!");
5316 // We've pushed the return address to the stack, so add 16 to the offset.
5317 // This is safe, since we already checked if it would overflow when we
5318 // checked if this instruction was legal to outline.
5319 int64_t NewImm
= (Offset
+ 16) / Scale
;
5320 StackOffsetOperand
.setImm(NewImm
);
5324 void AArch64InstrInfo::buildOutlinedFrame(
5325 MachineBasicBlock
&MBB
, MachineFunction
&MF
,
5326 const outliner::TargetCostInfo
&TCI
) const {
5327 // For thunk outlining, rewrite the last instruction from a call to a
5329 if (TCI
.FrameConstructionID
== MachineOutlinerThunk
) {
5330 MachineInstr
*Call
= &*--MBB
.instr_end();
5331 unsigned TailOpcode
;
5332 if (Call
->getOpcode() == AArch64::BL
) {
5333 TailOpcode
= AArch64::TCRETURNdi
;
5335 assert(Call
->getOpcode() == AArch64::BLR
);
5336 TailOpcode
= AArch64::TCRETURNri
;
5338 MachineInstr
*TC
= BuildMI(MF
, DebugLoc(), get(TailOpcode
))
5339 .add(Call
->getOperand(0))
5341 MBB
.insert(MBB
.end(), TC
);
5342 Call
->eraseFromParent();
5345 // Is there a call in the outlined range?
5346 auto IsNonTailCall
= [](MachineInstr
&MI
) {
5347 return MI
.isCall() && !MI
.isReturn();
5349 if (std::any_of(MBB
.instr_begin(), MBB
.instr_end(), IsNonTailCall
)) {
5350 // Fix up the instructions in the range, since we're going to modify the
5352 assert(TCI
.FrameConstructionID
!= MachineOutlinerDefault
&&
5353 "Can only fix up stack references once");
5354 fixupPostOutline(MBB
);
5356 // LR has to be a live in so that we can save it.
5357 MBB
.addLiveIn(AArch64::LR
);
5359 MachineBasicBlock::iterator It
= MBB
.begin();
5360 MachineBasicBlock::iterator Et
= MBB
.end();
5362 if (TCI
.FrameConstructionID
== MachineOutlinerTailCall
||
5363 TCI
.FrameConstructionID
== MachineOutlinerThunk
)
5364 Et
= std::prev(MBB
.end());
5366 // Insert a save before the outlined region
5367 MachineInstr
*STRXpre
= BuildMI(MF
, DebugLoc(), get(AArch64::STRXpre
))
5368 .addReg(AArch64::SP
, RegState::Define
)
5369 .addReg(AArch64::LR
)
5370 .addReg(AArch64::SP
)
5372 It
= MBB
.insert(It
, STRXpre
);
5374 const TargetSubtargetInfo
&STI
= MF
.getSubtarget();
5375 const MCRegisterInfo
*MRI
= STI
.getRegisterInfo();
5376 unsigned DwarfReg
= MRI
->getDwarfRegNum(AArch64::LR
, true);
5378 // Add a CFI saying the stack was moved 16 B down.
5379 int64_t StackPosEntry
=
5380 MF
.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, 16));
5381 BuildMI(MBB
, It
, DebugLoc(), get(AArch64::CFI_INSTRUCTION
))
5382 .addCFIIndex(StackPosEntry
)
5383 .setMIFlags(MachineInstr::FrameSetup
);
5385 // Add a CFI saying that the LR that we want to find is now 16 B higher than
5387 int64_t LRPosEntry
=
5388 MF
.addFrameInst(MCCFIInstruction::createOffset(nullptr, DwarfReg
, 16));
5389 BuildMI(MBB
, It
, DebugLoc(), get(AArch64::CFI_INSTRUCTION
))
5390 .addCFIIndex(LRPosEntry
)
5391 .setMIFlags(MachineInstr::FrameSetup
);
5393 // Insert a restore before the terminator for the function.
5394 MachineInstr
*LDRXpost
= BuildMI(MF
, DebugLoc(), get(AArch64::LDRXpost
))
5395 .addReg(AArch64::SP
, RegState::Define
)
5396 .addReg(AArch64::LR
, RegState::Define
)
5397 .addReg(AArch64::SP
)
5399 Et
= MBB
.insert(Et
, LDRXpost
);
5402 // If this is a tail call outlined function, then there's already a return.
5403 if (TCI
.FrameConstructionID
== MachineOutlinerTailCall
||
5404 TCI
.FrameConstructionID
== MachineOutlinerThunk
)
5407 // It's not a tail call, so we have to insert the return ourselves.
5408 MachineInstr
*ret
= BuildMI(MF
, DebugLoc(), get(AArch64::RET
))
5409 .addReg(AArch64::LR
, RegState::Undef
);
5410 MBB
.insert(MBB
.end(), ret
);
5412 // Did we have to modify the stack by saving the link register?
5413 if (TCI
.FrameConstructionID
== MachineOutlinerNoLRSave
)
5416 // We modified the stack.
5417 // Walk over the basic block and fix up all the stack accesses.
5418 fixupPostOutline(MBB
);
5421 MachineBasicBlock::iterator
AArch64InstrInfo::insertOutlinedCall(
5422 Module
&M
, MachineBasicBlock
&MBB
, MachineBasicBlock::iterator
&It
,
5423 MachineFunction
&MF
, const outliner::TargetCostInfo
&TCI
) const {
5425 // Are we tail calling?
5426 if (TCI
.CallConstructionID
== MachineOutlinerTailCall
) {
5427 // If yes, then we can just branch to the label.
5428 It
= MBB
.insert(It
, BuildMI(MF
, DebugLoc(), get(AArch64::TCRETURNdi
))
5429 .addGlobalAddress(M
.getNamedValue(MF
.getName()))
5434 // Are we saving the link register?
5435 if (TCI
.CallConstructionID
== MachineOutlinerNoLRSave
||
5436 TCI
.CallConstructionID
== MachineOutlinerThunk
) {
5437 // No, so just insert the call.
5438 It
= MBB
.insert(It
, BuildMI(MF
, DebugLoc(), get(AArch64::BL
))
5439 .addGlobalAddress(M
.getNamedValue(MF
.getName())));
5443 // We want to return the spot where we inserted the call.
5444 MachineBasicBlock::iterator CallPt
;
5446 // We have a default call. Save the link register.
5447 MachineInstr
*STRXpre
= BuildMI(MF
, DebugLoc(), get(AArch64::STRXpre
))
5448 .addReg(AArch64::SP
, RegState::Define
)
5449 .addReg(AArch64::LR
)
5450 .addReg(AArch64::SP
)
5452 It
= MBB
.insert(It
, STRXpre
);
5456 It
= MBB
.insert(It
, BuildMI(MF
, DebugLoc(), get(AArch64::BL
))
5457 .addGlobalAddress(M
.getNamedValue(MF
.getName())));
5461 // Restore the link register.
5462 MachineInstr
*LDRXpost
= BuildMI(MF
, DebugLoc(), get(AArch64::LDRXpost
))
5463 .addReg(AArch64::SP
, RegState::Define
)
5464 .addReg(AArch64::LR
, RegState::Define
)
5465 .addReg(AArch64::SP
)
5467 It
= MBB
.insert(It
, LDRXpost
);