1 //===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file contains the AArch64 implementation of the TargetInstrInfo class.
11 //===----------------------------------------------------------------------===//
13 #include "AArch64InstrInfo.h"
14 #include "AArch64MachineFunctionInfo.h"
15 #include "AArch64Subtarget.h"
16 #include "MCTargetDesc/AArch64AddressingModes.h"
17 #include "Utils/AArch64BaseInfo.h"
18 #include "llvm/ADT/ArrayRef.h"
19 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/CodeGen/MachineBasicBlock.h"
22 #include "llvm/CodeGen/MachineFrameInfo.h"
23 #include "llvm/CodeGen/MachineFunction.h"
24 #include "llvm/CodeGen/MachineInstr.h"
25 #include "llvm/CodeGen/MachineInstrBuilder.h"
26 #include "llvm/CodeGen/MachineMemOperand.h"
27 #include "llvm/CodeGen/MachineModuleInfo.h"
28 #include "llvm/CodeGen/MachineOperand.h"
29 #include "llvm/CodeGen/MachineRegisterInfo.h"
30 #include "llvm/CodeGen/StackMaps.h"
31 #include "llvm/CodeGen/TargetRegisterInfo.h"
32 #include "llvm/CodeGen/TargetSubtargetInfo.h"
33 #include "llvm/IR/DebugInfoMetadata.h"
34 #include "llvm/IR/DebugLoc.h"
35 #include "llvm/IR/GlobalValue.h"
36 #include "llvm/MC/MCAsmInfo.h"
37 #include "llvm/MC/MCInst.h"
38 #include "llvm/MC/MCInstBuilder.h"
39 #include "llvm/MC/MCInstrDesc.h"
40 #include "llvm/Support/Casting.h"
41 #include "llvm/Support/CodeGen.h"
42 #include "llvm/Support/CommandLine.h"
43 #include "llvm/Support/Compiler.h"
44 #include "llvm/Support/ErrorHandling.h"
45 #include "llvm/Support/MathExtras.h"
46 #include "llvm/Target/TargetMachine.h"
47 #include "llvm/Target/TargetOptions.h"
55 #define GET_INSTRINFO_CTOR_DTOR
56 #include "AArch64GenInstrInfo.inc"
58 static cl::opt
<unsigned> TBZDisplacementBits(
59 "aarch64-tbz-offset-bits", cl::Hidden
, cl::init(14),
60 cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"));
62 static cl::opt
<unsigned> CBZDisplacementBits(
63 "aarch64-cbz-offset-bits", cl::Hidden
, cl::init(19),
64 cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"));
66 static cl::opt
<unsigned>
67 BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden
, cl::init(19),
68 cl::desc("Restrict range of Bcc instructions (DEBUG)"));
70 AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget
&STI
)
71 : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN
, AArch64::ADJCALLSTACKUP
,
73 RI(STI
.getTargetTriple()), Subtarget(STI
) {}
75 /// GetInstSize - Return the number of bytes of code the specified
76 /// instruction may be. This returns the maximum number of bytes.
77 unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr
&MI
) const {
78 const MachineBasicBlock
&MBB
= *MI
.getParent();
79 const MachineFunction
*MF
= MBB
.getParent();
80 const MCAsmInfo
*MAI
= MF
->getTarget().getMCAsmInfo();
83 auto Op
= MI
.getOpcode();
84 if (Op
== AArch64::INLINEASM
|| Op
== AArch64::INLINEASM_BR
)
85 return getInlineAsmLength(MI
.getOperand(0).getSymbolName(), *MAI
);
88 // Meta-instructions emit no code.
89 if (MI
.isMetaInstruction())
92 // FIXME: We currently only handle pseudoinstructions that don't get expanded
93 // before the assembly printer.
94 unsigned NumBytes
= 0;
95 const MCInstrDesc
&Desc
= MI
.getDesc();
96 switch (Desc
.getOpcode()) {
98 // Anything not explicitly designated otherwise is a normal 4-byte insn.
101 case TargetOpcode::STACKMAP
:
102 // The upper bound for a stackmap intrinsic is the full length of its shadow
103 NumBytes
= StackMapOpers(&MI
).getNumPatchBytes();
104 assert(NumBytes
% 4 == 0 && "Invalid number of NOP bytes requested!");
106 case TargetOpcode::PATCHPOINT
:
107 // The size of the patchpoint intrinsic is the number of bytes requested
108 NumBytes
= PatchPointOpers(&MI
).getNumPatchBytes();
109 assert(NumBytes
% 4 == 0 && "Invalid number of NOP bytes requested!");
111 case TargetOpcode::STATEPOINT
:
112 NumBytes
= StatepointOpers(&MI
).getNumPatchBytes();
113 assert(NumBytes
% 4 == 0 && "Invalid number of NOP bytes requested!");
114 // No patch bytes means a normal call inst is emitted
118 case AArch64::TLSDESC_CALLSEQ
:
119 // This gets lowered to an instruction sequence which takes 16 bytes
122 case AArch64::SpeculationBarrierISBDSBEndBB
:
123 // This gets lowered to 2 4-byte instructions.
126 case AArch64::SpeculationBarrierSBEndBB
:
127 // This gets lowered to 1 4-byte instructions.
130 case AArch64::JumpTableDest32
:
131 case AArch64::JumpTableDest16
:
132 case AArch64::JumpTableDest8
:
136 NumBytes
= MI
.getOperand(1).getImm();
138 case AArch64::StoreSwiftAsyncContext
:
141 case TargetOpcode::BUNDLE
:
142 NumBytes
= getInstBundleLength(MI
);
149 unsigned AArch64InstrInfo::getInstBundleLength(const MachineInstr
&MI
) const {
151 MachineBasicBlock::const_instr_iterator I
= MI
.getIterator();
152 MachineBasicBlock::const_instr_iterator E
= MI
.getParent()->instr_end();
153 while (++I
!= E
&& I
->isInsideBundle()) {
154 assert(!I
->isBundle() && "No nested bundle!");
155 Size
+= getInstSizeInBytes(*I
);
160 static void parseCondBranch(MachineInstr
*LastInst
, MachineBasicBlock
*&Target
,
161 SmallVectorImpl
<MachineOperand
> &Cond
) {
162 // Block ends with fall-through condbranch.
163 switch (LastInst
->getOpcode()) {
165 llvm_unreachable("Unknown branch instruction?");
167 Target
= LastInst
->getOperand(1).getMBB();
168 Cond
.push_back(LastInst
->getOperand(0));
174 Target
= LastInst
->getOperand(1).getMBB();
175 Cond
.push_back(MachineOperand::CreateImm(-1));
176 Cond
.push_back(MachineOperand::CreateImm(LastInst
->getOpcode()));
177 Cond
.push_back(LastInst
->getOperand(0));
183 Target
= LastInst
->getOperand(2).getMBB();
184 Cond
.push_back(MachineOperand::CreateImm(-1));
185 Cond
.push_back(MachineOperand::CreateImm(LastInst
->getOpcode()));
186 Cond
.push_back(LastInst
->getOperand(0));
187 Cond
.push_back(LastInst
->getOperand(1));
191 static unsigned getBranchDisplacementBits(unsigned Opc
) {
194 llvm_unreachable("unexpected opcode!");
201 return TBZDisplacementBits
;
206 return CBZDisplacementBits
;
208 return BCCDisplacementBits
;
212 bool AArch64InstrInfo::isBranchOffsetInRange(unsigned BranchOp
,
213 int64_t BrOffset
) const {
214 unsigned Bits
= getBranchDisplacementBits(BranchOp
);
215 assert(Bits
>= 3 && "max branch displacement must be enough to jump"
216 "over conditional branch expansion");
217 return isIntN(Bits
, BrOffset
/ 4);
221 AArch64InstrInfo::getBranchDestBlock(const MachineInstr
&MI
) const {
222 switch (MI
.getOpcode()) {
224 llvm_unreachable("unexpected opcode!");
226 return MI
.getOperand(0).getMBB();
231 return MI
.getOperand(2).getMBB();
237 return MI
.getOperand(1).getMBB();
242 bool AArch64InstrInfo::analyzeBranch(MachineBasicBlock
&MBB
,
243 MachineBasicBlock
*&TBB
,
244 MachineBasicBlock
*&FBB
,
245 SmallVectorImpl
<MachineOperand
> &Cond
,
246 bool AllowModify
) const {
247 // If the block has no terminators, it just falls into the block after it.
248 MachineBasicBlock::iterator I
= MBB
.getLastNonDebugInstr();
252 // Skip over SpeculationBarrierEndBB terminators
253 if (I
->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB
||
254 I
->getOpcode() == AArch64::SpeculationBarrierSBEndBB
) {
258 if (!isUnpredicatedTerminator(*I
))
261 // Get the last instruction in the block.
262 MachineInstr
*LastInst
= &*I
;
264 // If there is only one terminator instruction, process it.
265 unsigned LastOpc
= LastInst
->getOpcode();
266 if (I
== MBB
.begin() || !isUnpredicatedTerminator(*--I
)) {
267 if (isUncondBranchOpcode(LastOpc
)) {
268 TBB
= LastInst
->getOperand(0).getMBB();
271 if (isCondBranchOpcode(LastOpc
)) {
272 // Block ends with fall-through condbranch.
273 parseCondBranch(LastInst
, TBB
, Cond
);
276 return true; // Can't handle indirect branch.
279 // Get the instruction before it if it is a terminator.
280 MachineInstr
*SecondLastInst
= &*I
;
281 unsigned SecondLastOpc
= SecondLastInst
->getOpcode();
283 // If AllowModify is true and the block ends with two or more unconditional
284 // branches, delete all but the first unconditional branch.
285 if (AllowModify
&& isUncondBranchOpcode(LastOpc
)) {
286 while (isUncondBranchOpcode(SecondLastOpc
)) {
287 LastInst
->eraseFromParent();
288 LastInst
= SecondLastInst
;
289 LastOpc
= LastInst
->getOpcode();
290 if (I
== MBB
.begin() || !isUnpredicatedTerminator(*--I
)) {
291 // Return now the only terminator is an unconditional branch.
292 TBB
= LastInst
->getOperand(0).getMBB();
295 SecondLastInst
= &*I
;
296 SecondLastOpc
= SecondLastInst
->getOpcode();
301 // If we're allowed to modify and the block ends in a unconditional branch
302 // which could simply fallthrough, remove the branch. (Note: This case only
303 // matters when we can't understand the whole sequence, otherwise it's also
304 // handled by BranchFolding.cpp.)
305 if (AllowModify
&& isUncondBranchOpcode(LastOpc
) &&
306 MBB
.isLayoutSuccessor(getBranchDestBlock(*LastInst
))) {
307 LastInst
->eraseFromParent();
308 LastInst
= SecondLastInst
;
309 LastOpc
= LastInst
->getOpcode();
310 if (I
== MBB
.begin() || !isUnpredicatedTerminator(*--I
)) {
311 assert(!isUncondBranchOpcode(LastOpc
) &&
312 "unreachable unconditional branches removed above");
314 if (isCondBranchOpcode(LastOpc
)) {
315 // Block ends with fall-through condbranch.
316 parseCondBranch(LastInst
, TBB
, Cond
);
319 return true; // Can't handle indirect branch.
321 SecondLastInst
= &*I
;
322 SecondLastOpc
= SecondLastInst
->getOpcode();
326 // If there are three terminators, we don't know what sort of block this is.
327 if (SecondLastInst
&& I
!= MBB
.begin() && isUnpredicatedTerminator(*--I
))
330 // If the block ends with a B and a Bcc, handle it.
331 if (isCondBranchOpcode(SecondLastOpc
) && isUncondBranchOpcode(LastOpc
)) {
332 parseCondBranch(SecondLastInst
, TBB
, Cond
);
333 FBB
= LastInst
->getOperand(0).getMBB();
337 // If the block ends with two unconditional branches, handle it. The second
338 // one is not executed, so remove it.
339 if (isUncondBranchOpcode(SecondLastOpc
) && isUncondBranchOpcode(LastOpc
)) {
340 TBB
= SecondLastInst
->getOperand(0).getMBB();
343 I
->eraseFromParent();
347 // ...likewise if it ends with an indirect branch followed by an unconditional
349 if (isIndirectBranchOpcode(SecondLastOpc
) && isUncondBranchOpcode(LastOpc
)) {
352 I
->eraseFromParent();
356 // Otherwise, can't handle this.
360 bool AArch64InstrInfo::analyzeBranchPredicate(MachineBasicBlock
&MBB
,
361 MachineBranchPredicate
&MBP
,
362 bool AllowModify
) const {
363 // For the moment, handle only a block which ends with a cb(n)zx followed by
364 // a fallthrough. Why this? Because it is a common form.
365 // TODO: Should we handle b.cc?
367 MachineBasicBlock::iterator I
= MBB
.getLastNonDebugInstr();
371 // Skip over SpeculationBarrierEndBB terminators
372 if (I
->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB
||
373 I
->getOpcode() == AArch64::SpeculationBarrierSBEndBB
) {
377 if (!isUnpredicatedTerminator(*I
))
380 // Get the last instruction in the block.
381 MachineInstr
*LastInst
= &*I
;
382 unsigned LastOpc
= LastInst
->getOpcode();
383 if (!isCondBranchOpcode(LastOpc
))
396 MBP
.TrueDest
= LastInst
->getOperand(1).getMBB();
397 assert(MBP
.TrueDest
&& "expected!");
398 MBP
.FalseDest
= MBB
.getNextNode();
400 MBP
.ConditionDef
= nullptr;
401 MBP
.SingleUseCondition
= false;
403 MBP
.LHS
= LastInst
->getOperand(0);
404 MBP
.RHS
= MachineOperand::CreateImm(0);
405 MBP
.Predicate
= LastOpc
== AArch64::CBNZX
? MachineBranchPredicate::PRED_NE
406 : MachineBranchPredicate::PRED_EQ
;
410 bool AArch64InstrInfo::reverseBranchCondition(
411 SmallVectorImpl
<MachineOperand
> &Cond
) const {
412 if (Cond
[0].getImm() != -1) {
414 AArch64CC::CondCode CC
= (AArch64CC::CondCode
)(int)Cond
[0].getImm();
415 Cond
[0].setImm(AArch64CC::getInvertedCondCode(CC
));
417 // Folded compare-and-branch
418 switch (Cond
[1].getImm()) {
420 llvm_unreachable("Unknown conditional branch!");
422 Cond
[1].setImm(AArch64::CBNZW
);
425 Cond
[1].setImm(AArch64::CBZW
);
428 Cond
[1].setImm(AArch64::CBNZX
);
431 Cond
[1].setImm(AArch64::CBZX
);
434 Cond
[1].setImm(AArch64::TBNZW
);
437 Cond
[1].setImm(AArch64::TBZW
);
440 Cond
[1].setImm(AArch64::TBNZX
);
443 Cond
[1].setImm(AArch64::TBZX
);
451 unsigned AArch64InstrInfo::removeBranch(MachineBasicBlock
&MBB
,
452 int *BytesRemoved
) const {
453 MachineBasicBlock::iterator I
= MBB
.getLastNonDebugInstr();
457 if (!isUncondBranchOpcode(I
->getOpcode()) &&
458 !isCondBranchOpcode(I
->getOpcode()))
461 // Remove the branch.
462 I
->eraseFromParent();
466 if (I
== MBB
.begin()) {
472 if (!isCondBranchOpcode(I
->getOpcode())) {
478 // Remove the branch.
479 I
->eraseFromParent();
486 void AArch64InstrInfo::instantiateCondBranch(
487 MachineBasicBlock
&MBB
, const DebugLoc
&DL
, MachineBasicBlock
*TBB
,
488 ArrayRef
<MachineOperand
> Cond
) const {
489 if (Cond
[0].getImm() != -1) {
491 BuildMI(&MBB
, DL
, get(AArch64::Bcc
)).addImm(Cond
[0].getImm()).addMBB(TBB
);
493 // Folded compare-and-branch
494 // Note that we use addOperand instead of addReg to keep the flags.
495 const MachineInstrBuilder MIB
=
496 BuildMI(&MBB
, DL
, get(Cond
[1].getImm())).add(Cond
[2]);
498 MIB
.addImm(Cond
[3].getImm());
503 unsigned AArch64InstrInfo::insertBranch(
504 MachineBasicBlock
&MBB
, MachineBasicBlock
*TBB
, MachineBasicBlock
*FBB
,
505 ArrayRef
<MachineOperand
> Cond
, const DebugLoc
&DL
, int *BytesAdded
) const {
506 // Shouldn't be a fall through.
507 assert(TBB
&& "insertBranch must not be told to insert a fallthrough");
510 if (Cond
.empty()) // Unconditional branch?
511 BuildMI(&MBB
, DL
, get(AArch64::B
)).addMBB(TBB
);
513 instantiateCondBranch(MBB
, DL
, TBB
, Cond
);
521 // Two-way conditional branch.
522 instantiateCondBranch(MBB
, DL
, TBB
, Cond
);
523 BuildMI(&MBB
, DL
, get(AArch64::B
)).addMBB(FBB
);
531 // Find the original register that VReg is copied from.
532 static unsigned removeCopies(const MachineRegisterInfo
&MRI
, unsigned VReg
) {
533 while (Register::isVirtualRegister(VReg
)) {
534 const MachineInstr
*DefMI
= MRI
.getVRegDef(VReg
);
535 if (!DefMI
->isFullCopy())
537 VReg
= DefMI
->getOperand(1).getReg();
542 // Determine if VReg is defined by an instruction that can be folded into a
543 // csel instruction. If so, return the folded opcode, and the replacement
545 static unsigned canFoldIntoCSel(const MachineRegisterInfo
&MRI
, unsigned VReg
,
546 unsigned *NewVReg
= nullptr) {
547 VReg
= removeCopies(MRI
, VReg
);
548 if (!Register::isVirtualRegister(VReg
))
551 bool Is64Bit
= AArch64::GPR64allRegClass
.hasSubClassEq(MRI
.getRegClass(VReg
));
552 const MachineInstr
*DefMI
= MRI
.getVRegDef(VReg
);
554 unsigned SrcOpNum
= 0;
555 switch (DefMI
->getOpcode()) {
556 case AArch64::ADDSXri
:
557 case AArch64::ADDSWri
:
558 // if NZCV is used, do not fold.
559 if (DefMI
->findRegisterDefOperandIdx(AArch64::NZCV
, true) == -1)
561 // fall-through to ADDXri and ADDWri.
563 case AArch64::ADDXri
:
564 case AArch64::ADDWri
:
565 // add x, 1 -> csinc.
566 if (!DefMI
->getOperand(2).isImm() || DefMI
->getOperand(2).getImm() != 1 ||
567 DefMI
->getOperand(3).getImm() != 0)
570 Opc
= Is64Bit
? AArch64::CSINCXr
: AArch64::CSINCWr
;
573 case AArch64::ORNXrr
:
574 case AArch64::ORNWrr
: {
575 // not x -> csinv, represented as orn dst, xzr, src.
576 unsigned ZReg
= removeCopies(MRI
, DefMI
->getOperand(1).getReg());
577 if (ZReg
!= AArch64::XZR
&& ZReg
!= AArch64::WZR
)
580 Opc
= Is64Bit
? AArch64::CSINVXr
: AArch64::CSINVWr
;
584 case AArch64::SUBSXrr
:
585 case AArch64::SUBSWrr
:
586 // if NZCV is used, do not fold.
587 if (DefMI
->findRegisterDefOperandIdx(AArch64::NZCV
, true) == -1)
589 // fall-through to SUBXrr and SUBWrr.
591 case AArch64::SUBXrr
:
592 case AArch64::SUBWrr
: {
593 // neg x -> csneg, represented as sub dst, xzr, src.
594 unsigned ZReg
= removeCopies(MRI
, DefMI
->getOperand(1).getReg());
595 if (ZReg
!= AArch64::XZR
&& ZReg
!= AArch64::WZR
)
598 Opc
= Is64Bit
? AArch64::CSNEGXr
: AArch64::CSNEGWr
;
604 assert(Opc
&& SrcOpNum
&& "Missing parameters");
607 *NewVReg
= DefMI
->getOperand(SrcOpNum
).getReg();
611 bool AArch64InstrInfo::canInsertSelect(const MachineBasicBlock
&MBB
,
612 ArrayRef
<MachineOperand
> Cond
,
613 Register DstReg
, Register TrueReg
,
614 Register FalseReg
, int &CondCycles
,
616 int &FalseCycles
) const {
617 // Check register classes.
618 const MachineRegisterInfo
&MRI
= MBB
.getParent()->getRegInfo();
619 const TargetRegisterClass
*RC
=
620 RI
.getCommonSubClass(MRI
.getRegClass(TrueReg
), MRI
.getRegClass(FalseReg
));
624 // Also need to check the dest regclass, in case we're trying to optimize
626 // %1(gpr) = PHI %2(fpr), bb1, %(fpr), bb2
627 if (!RI
.getCommonSubClass(RC
, MRI
.getRegClass(DstReg
)))
630 // Expanding cbz/tbz requires an extra cycle of latency on the condition.
631 unsigned ExtraCondLat
= Cond
.size() != 1;
633 // GPRs are handled by csel.
634 // FIXME: Fold in x+1, -x, and ~x when applicable.
635 if (AArch64::GPR64allRegClass
.hasSubClassEq(RC
) ||
636 AArch64::GPR32allRegClass
.hasSubClassEq(RC
)) {
637 // Single-cycle csel, csinc, csinv, and csneg.
638 CondCycles
= 1 + ExtraCondLat
;
639 TrueCycles
= FalseCycles
= 1;
640 if (canFoldIntoCSel(MRI
, TrueReg
))
642 else if (canFoldIntoCSel(MRI
, FalseReg
))
647 // Scalar floating point is handled by fcsel.
648 // FIXME: Form fabs, fmin, and fmax when applicable.
649 if (AArch64::FPR64RegClass
.hasSubClassEq(RC
) ||
650 AArch64::FPR32RegClass
.hasSubClassEq(RC
)) {
651 CondCycles
= 5 + ExtraCondLat
;
652 TrueCycles
= FalseCycles
= 2;
660 void AArch64InstrInfo::insertSelect(MachineBasicBlock
&MBB
,
661 MachineBasicBlock::iterator I
,
662 const DebugLoc
&DL
, Register DstReg
,
663 ArrayRef
<MachineOperand
> Cond
,
664 Register TrueReg
, Register FalseReg
) const {
665 MachineRegisterInfo
&MRI
= MBB
.getParent()->getRegInfo();
667 // Parse the condition code, see parseCondBranch() above.
668 AArch64CC::CondCode CC
;
669 switch (Cond
.size()) {
671 llvm_unreachable("Unknown condition opcode in Cond");
673 CC
= AArch64CC::CondCode(Cond
[0].getImm());
675 case 3: { // cbz/cbnz
676 // We must insert a compare against 0.
678 switch (Cond
[1].getImm()) {
680 llvm_unreachable("Unknown branch opcode in Cond");
698 Register SrcReg
= Cond
[2].getReg();
700 // cmp reg, #0 is actually subs xzr, reg, #0.
701 MRI
.constrainRegClass(SrcReg
, &AArch64::GPR64spRegClass
);
702 BuildMI(MBB
, I
, DL
, get(AArch64::SUBSXri
), AArch64::XZR
)
707 MRI
.constrainRegClass(SrcReg
, &AArch64::GPR32spRegClass
);
708 BuildMI(MBB
, I
, DL
, get(AArch64::SUBSWri
), AArch64::WZR
)
715 case 4: { // tbz/tbnz
716 // We must insert a tst instruction.
717 switch (Cond
[1].getImm()) {
719 llvm_unreachable("Unknown branch opcode in Cond");
729 // cmp reg, #foo is actually ands xzr, reg, #1<<foo.
730 if (Cond
[1].getImm() == AArch64::TBZW
|| Cond
[1].getImm() == AArch64::TBNZW
)
731 BuildMI(MBB
, I
, DL
, get(AArch64::ANDSWri
), AArch64::WZR
)
732 .addReg(Cond
[2].getReg())
734 AArch64_AM::encodeLogicalImmediate(1ull << Cond
[3].getImm(), 32));
736 BuildMI(MBB
, I
, DL
, get(AArch64::ANDSXri
), AArch64::XZR
)
737 .addReg(Cond
[2].getReg())
739 AArch64_AM::encodeLogicalImmediate(1ull << Cond
[3].getImm(), 64));
745 const TargetRegisterClass
*RC
= nullptr;
746 bool TryFold
= false;
747 if (MRI
.constrainRegClass(DstReg
, &AArch64::GPR64RegClass
)) {
748 RC
= &AArch64::GPR64RegClass
;
749 Opc
= AArch64::CSELXr
;
751 } else if (MRI
.constrainRegClass(DstReg
, &AArch64::GPR32RegClass
)) {
752 RC
= &AArch64::GPR32RegClass
;
753 Opc
= AArch64::CSELWr
;
755 } else if (MRI
.constrainRegClass(DstReg
, &AArch64::FPR64RegClass
)) {
756 RC
= &AArch64::FPR64RegClass
;
757 Opc
= AArch64::FCSELDrrr
;
758 } else if (MRI
.constrainRegClass(DstReg
, &AArch64::FPR32RegClass
)) {
759 RC
= &AArch64::FPR32RegClass
;
760 Opc
= AArch64::FCSELSrrr
;
762 assert(RC
&& "Unsupported regclass");
764 // Try folding simple instructions into the csel.
766 unsigned NewVReg
= 0;
767 unsigned FoldedOpc
= canFoldIntoCSel(MRI
, TrueReg
, &NewVReg
);
769 // The folded opcodes csinc, csinc and csneg apply the operation to
770 // FalseReg, so we need to invert the condition.
771 CC
= AArch64CC::getInvertedCondCode(CC
);
774 FoldedOpc
= canFoldIntoCSel(MRI
, FalseReg
, &NewVReg
);
776 // Fold the operation. Leave any dead instructions for DCE to clean up.
780 // The extends the live range of NewVReg.
781 MRI
.clearKillFlags(NewVReg
);
785 // Pull all virtual register into the appropriate class.
786 MRI
.constrainRegClass(TrueReg
, RC
);
787 MRI
.constrainRegClass(FalseReg
, RC
);
790 BuildMI(MBB
, I
, DL
, get(Opc
), DstReg
)
796 /// Returns true if a MOVi32imm or MOVi64imm can be expanded to an ORRxx.
797 static bool canBeExpandedToORR(const MachineInstr
&MI
, unsigned BitSize
) {
798 uint64_t Imm
= MI
.getOperand(1).getImm();
799 uint64_t UImm
= Imm
<< (64 - BitSize
) >> (64 - BitSize
);
801 return AArch64_AM::processLogicalImmediate(UImm
, BitSize
, Encoding
);
804 // FIXME: this implementation should be micro-architecture dependent, so a
805 // micro-architecture target hook should be introduced here in future.
806 bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr
&MI
) const {
807 if (!Subtarget
.hasCustomCheapAsMoveHandling())
808 return MI
.isAsCheapAsAMove();
810 const unsigned Opcode
= MI
.getOpcode();
812 // Firstly, check cases gated by features.
814 if (Subtarget
.hasZeroCycleZeroingFP()) {
815 if (Opcode
== AArch64::FMOVH0
||
816 Opcode
== AArch64::FMOVS0
||
817 Opcode
== AArch64::FMOVD0
)
821 if (Subtarget
.hasZeroCycleZeroingGP()) {
822 if (Opcode
== TargetOpcode::COPY
&&
823 (MI
.getOperand(1).getReg() == AArch64::WZR
||
824 MI
.getOperand(1).getReg() == AArch64::XZR
))
828 // Secondly, check cases specific to sub-targets.
830 if (Subtarget
.hasExynosCheapAsMoveHandling()) {
831 if (isExynosCheapAsMove(MI
))
834 return MI
.isAsCheapAsAMove();
837 // Finally, check generic cases.
843 // add/sub on register without shift
844 case AArch64::ADDWri
:
845 case AArch64::ADDXri
:
846 case AArch64::SUBWri
:
847 case AArch64::SUBXri
:
848 return (MI
.getOperand(3).getImm() == 0);
850 // logical ops on immediate
851 case AArch64::ANDWri
:
852 case AArch64::ANDXri
:
853 case AArch64::EORWri
:
854 case AArch64::EORXri
:
855 case AArch64::ORRWri
:
856 case AArch64::ORRXri
:
859 // logical ops on register without shift
860 case AArch64::ANDWrr
:
861 case AArch64::ANDXrr
:
862 case AArch64::BICWrr
:
863 case AArch64::BICXrr
:
864 case AArch64::EONWrr
:
865 case AArch64::EONXrr
:
866 case AArch64::EORWrr
:
867 case AArch64::EORXrr
:
868 case AArch64::ORNWrr
:
869 case AArch64::ORNXrr
:
870 case AArch64::ORRWrr
:
871 case AArch64::ORRXrr
:
874 // If MOVi32imm or MOVi64imm can be expanded into ORRWri or
875 // ORRXri, it is as cheap as MOV
876 case AArch64::MOVi32imm
:
877 return canBeExpandedToORR(MI
, 32);
878 case AArch64::MOVi64imm
:
879 return canBeExpandedToORR(MI
, 64);
882 llvm_unreachable("Unknown opcode to check as cheap as a move!");
885 bool AArch64InstrInfo::isFalkorShiftExtFast(const MachineInstr
&MI
) {
886 switch (MI
.getOpcode()) {
890 case AArch64::ADDWrs
:
891 case AArch64::ADDXrs
:
892 case AArch64::ADDSWrs
:
893 case AArch64::ADDSXrs
: {
894 unsigned Imm
= MI
.getOperand(3).getImm();
895 unsigned ShiftVal
= AArch64_AM::getShiftValue(Imm
);
898 return AArch64_AM::getShiftType(Imm
) == AArch64_AM::LSL
&& ShiftVal
<= 5;
901 case AArch64::ADDWrx
:
902 case AArch64::ADDXrx
:
903 case AArch64::ADDXrx64
:
904 case AArch64::ADDSWrx
:
905 case AArch64::ADDSXrx
:
906 case AArch64::ADDSXrx64
: {
907 unsigned Imm
= MI
.getOperand(3).getImm();
908 switch (AArch64_AM::getArithExtendType(Imm
)) {
911 case AArch64_AM::UXTB
:
912 case AArch64_AM::UXTH
:
913 case AArch64_AM::UXTW
:
914 case AArch64_AM::UXTX
:
915 return AArch64_AM::getArithShiftValue(Imm
) <= 4;
919 case AArch64::SUBWrs
:
920 case AArch64::SUBSWrs
: {
921 unsigned Imm
= MI
.getOperand(3).getImm();
922 unsigned ShiftVal
= AArch64_AM::getShiftValue(Imm
);
923 return ShiftVal
== 0 ||
924 (AArch64_AM::getShiftType(Imm
) == AArch64_AM::ASR
&& ShiftVal
== 31);
927 case AArch64::SUBXrs
:
928 case AArch64::SUBSXrs
: {
929 unsigned Imm
= MI
.getOperand(3).getImm();
930 unsigned ShiftVal
= AArch64_AM::getShiftValue(Imm
);
931 return ShiftVal
== 0 ||
932 (AArch64_AM::getShiftType(Imm
) == AArch64_AM::ASR
&& ShiftVal
== 63);
935 case AArch64::SUBWrx
:
936 case AArch64::SUBXrx
:
937 case AArch64::SUBXrx64
:
938 case AArch64::SUBSWrx
:
939 case AArch64::SUBSXrx
:
940 case AArch64::SUBSXrx64
: {
941 unsigned Imm
= MI
.getOperand(3).getImm();
942 switch (AArch64_AM::getArithExtendType(Imm
)) {
945 case AArch64_AM::UXTB
:
946 case AArch64_AM::UXTH
:
947 case AArch64_AM::UXTW
:
948 case AArch64_AM::UXTX
:
949 return AArch64_AM::getArithShiftValue(Imm
) == 0;
953 case AArch64::LDRBBroW
:
954 case AArch64::LDRBBroX
:
955 case AArch64::LDRBroW
:
956 case AArch64::LDRBroX
:
957 case AArch64::LDRDroW
:
958 case AArch64::LDRDroX
:
959 case AArch64::LDRHHroW
:
960 case AArch64::LDRHHroX
:
961 case AArch64::LDRHroW
:
962 case AArch64::LDRHroX
:
963 case AArch64::LDRQroW
:
964 case AArch64::LDRQroX
:
965 case AArch64::LDRSBWroW
:
966 case AArch64::LDRSBWroX
:
967 case AArch64::LDRSBXroW
:
968 case AArch64::LDRSBXroX
:
969 case AArch64::LDRSHWroW
:
970 case AArch64::LDRSHWroX
:
971 case AArch64::LDRSHXroW
:
972 case AArch64::LDRSHXroX
:
973 case AArch64::LDRSWroW
:
974 case AArch64::LDRSWroX
:
975 case AArch64::LDRSroW
:
976 case AArch64::LDRSroX
:
977 case AArch64::LDRWroW
:
978 case AArch64::LDRWroX
:
979 case AArch64::LDRXroW
:
980 case AArch64::LDRXroX
:
981 case AArch64::PRFMroW
:
982 case AArch64::PRFMroX
:
983 case AArch64::STRBBroW
:
984 case AArch64::STRBBroX
:
985 case AArch64::STRBroW
:
986 case AArch64::STRBroX
:
987 case AArch64::STRDroW
:
988 case AArch64::STRDroX
:
989 case AArch64::STRHHroW
:
990 case AArch64::STRHHroX
:
991 case AArch64::STRHroW
:
992 case AArch64::STRHroX
:
993 case AArch64::STRQroW
:
994 case AArch64::STRQroX
:
995 case AArch64::STRSroW
:
996 case AArch64::STRSroX
:
997 case AArch64::STRWroW
:
998 case AArch64::STRWroX
:
999 case AArch64::STRXroW
:
1000 case AArch64::STRXroX
: {
1001 unsigned IsSigned
= MI
.getOperand(3).getImm();
1007 bool AArch64InstrInfo::isSEHInstruction(const MachineInstr
&MI
) {
1008 unsigned Opc
= MI
.getOpcode();
1012 case AArch64::SEH_StackAlloc
:
1013 case AArch64::SEH_SaveFPLR
:
1014 case AArch64::SEH_SaveFPLR_X
:
1015 case AArch64::SEH_SaveReg
:
1016 case AArch64::SEH_SaveReg_X
:
1017 case AArch64::SEH_SaveRegP
:
1018 case AArch64::SEH_SaveRegP_X
:
1019 case AArch64::SEH_SaveFReg
:
1020 case AArch64::SEH_SaveFReg_X
:
1021 case AArch64::SEH_SaveFRegP
:
1022 case AArch64::SEH_SaveFRegP_X
:
1023 case AArch64::SEH_SetFP
:
1024 case AArch64::SEH_AddFP
:
1025 case AArch64::SEH_Nop
:
1026 case AArch64::SEH_PrologEnd
:
1027 case AArch64::SEH_EpilogStart
:
1028 case AArch64::SEH_EpilogEnd
:
1033 bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr
&MI
,
1034 Register
&SrcReg
, Register
&DstReg
,
1035 unsigned &SubIdx
) const {
1036 switch (MI
.getOpcode()) {
1039 case AArch64::SBFMXri
: // aka sxtw
1040 case AArch64::UBFMXri
: // aka uxtw
1041 // Check for the 32 -> 64 bit extension case, these instructions can do
1043 if (MI
.getOperand(2).getImm() != 0 || MI
.getOperand(3).getImm() != 31)
1045 // This is a signed or unsigned 32 -> 64 bit extension.
1046 SrcReg
= MI
.getOperand(1).getReg();
1047 DstReg
= MI
.getOperand(0).getReg();
1048 SubIdx
= AArch64::sub_32
;
1053 bool AArch64InstrInfo::areMemAccessesTriviallyDisjoint(
1054 const MachineInstr
&MIa
, const MachineInstr
&MIb
) const {
1055 const TargetRegisterInfo
*TRI
= &getRegisterInfo();
1056 const MachineOperand
*BaseOpA
= nullptr, *BaseOpB
= nullptr;
1057 int64_t OffsetA
= 0, OffsetB
= 0;
1058 unsigned WidthA
= 0, WidthB
= 0;
1059 bool OffsetAIsScalable
= false, OffsetBIsScalable
= false;
1061 assert(MIa
.mayLoadOrStore() && "MIa must be a load or store.");
1062 assert(MIb
.mayLoadOrStore() && "MIb must be a load or store.");
1064 if (MIa
.hasUnmodeledSideEffects() || MIb
.hasUnmodeledSideEffects() ||
1065 MIa
.hasOrderedMemoryRef() || MIb
.hasOrderedMemoryRef())
1068 // Retrieve the base, offset from the base and width. Width
1069 // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8). If
1070 // base are identical, and the offset of a lower memory access +
1071 // the width doesn't overlap the offset of a higher memory access,
1072 // then the memory accesses are different.
1073 // If OffsetAIsScalable and OffsetBIsScalable are both true, they
1074 // are assumed to have the same scale (vscale).
1075 if (getMemOperandWithOffsetWidth(MIa
, BaseOpA
, OffsetA
, OffsetAIsScalable
,
1077 getMemOperandWithOffsetWidth(MIb
, BaseOpB
, OffsetB
, OffsetBIsScalable
,
1079 if (BaseOpA
->isIdenticalTo(*BaseOpB
) &&
1080 OffsetAIsScalable
== OffsetBIsScalable
) {
1081 int LowOffset
= OffsetA
< OffsetB
? OffsetA
: OffsetB
;
1082 int HighOffset
= OffsetA
< OffsetB
? OffsetB
: OffsetA
;
1083 int LowWidth
= (LowOffset
== OffsetA
) ? WidthA
: WidthB
;
1084 if (LowOffset
+ LowWidth
<= HighOffset
)
1091 bool AArch64InstrInfo::isSchedulingBoundary(const MachineInstr
&MI
,
1092 const MachineBasicBlock
*MBB
,
1093 const MachineFunction
&MF
) const {
1094 if (TargetInstrInfo::isSchedulingBoundary(MI
, MBB
, MF
))
1096 switch (MI
.getOpcode()) {
1098 // CSDB hints are scheduling barriers.
1099 if (MI
.getOperand(0).getImm() == 0x14)
1104 // DSB and ISB also are scheduling barriers.
1108 return isSEHInstruction(MI
);
1111 /// analyzeCompare - For a comparison instruction, return the source registers
1112 /// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
1113 /// Return true if the comparison instruction can be analyzed.
1114 bool AArch64InstrInfo::analyzeCompare(const MachineInstr
&MI
, Register
&SrcReg
,
1115 Register
&SrcReg2
, int &CmpMask
,
1116 int &CmpValue
) const {
1117 // The first operand can be a frame index where we'd normally expect a
1119 assert(MI
.getNumOperands() >= 2 && "All AArch64 cmps should have 2 operands");
1120 if (!MI
.getOperand(1).isReg())
1123 auto NormalizeCmpValue
= [](int64_t Value
) -> int {
1124 // Comparison immediates may be 64-bit, but CmpValue is only an int.
1125 // Normalize to 0/1/2 return value, where 2 indicates any value apart from
1127 // TODO: Switch CmpValue to int64_t in the API to avoid this.
1128 if (Value
== 0 || Value
== 1)
1133 switch (MI
.getOpcode()) {
1136 case AArch64::PTEST_PP
:
1137 SrcReg
= MI
.getOperand(0).getReg();
1138 SrcReg2
= MI
.getOperand(1).getReg();
1139 // Not sure about the mask and value for now...
1143 case AArch64::SUBSWrr
:
1144 case AArch64::SUBSWrs
:
1145 case AArch64::SUBSWrx
:
1146 case AArch64::SUBSXrr
:
1147 case AArch64::SUBSXrs
:
1148 case AArch64::SUBSXrx
:
1149 case AArch64::ADDSWrr
:
1150 case AArch64::ADDSWrs
:
1151 case AArch64::ADDSWrx
:
1152 case AArch64::ADDSXrr
:
1153 case AArch64::ADDSXrs
:
1154 case AArch64::ADDSXrx
:
1155 // Replace SUBSWrr with SUBWrr if NZCV is not used.
1156 SrcReg
= MI
.getOperand(1).getReg();
1157 SrcReg2
= MI
.getOperand(2).getReg();
1161 case AArch64::SUBSWri
:
1162 case AArch64::ADDSWri
:
1163 case AArch64::SUBSXri
:
1164 case AArch64::ADDSXri
:
1165 SrcReg
= MI
.getOperand(1).getReg();
1168 CmpValue
= NormalizeCmpValue(MI
.getOperand(2).getImm());
1170 case AArch64::ANDSWri
:
1171 case AArch64::ANDSXri
:
1172 // ANDS does not use the same encoding scheme as the others xxxS
1174 SrcReg
= MI
.getOperand(1).getReg();
1177 CmpValue
= NormalizeCmpValue(AArch64_AM::decodeLogicalImmediate(
1178 MI
.getOperand(2).getImm(),
1179 MI
.getOpcode() == AArch64::ANDSWri
? 32 : 64));
1186 static bool UpdateOperandRegClass(MachineInstr
&Instr
) {
1187 MachineBasicBlock
*MBB
= Instr
.getParent();
1188 assert(MBB
&& "Can't get MachineBasicBlock here");
1189 MachineFunction
*MF
= MBB
->getParent();
1190 assert(MF
&& "Can't get MachineFunction here");
1191 const TargetInstrInfo
*TII
= MF
->getSubtarget().getInstrInfo();
1192 const TargetRegisterInfo
*TRI
= MF
->getSubtarget().getRegisterInfo();
1193 MachineRegisterInfo
*MRI
= &MF
->getRegInfo();
1195 for (unsigned OpIdx
= 0, EndIdx
= Instr
.getNumOperands(); OpIdx
< EndIdx
;
1197 MachineOperand
&MO
= Instr
.getOperand(OpIdx
);
1198 const TargetRegisterClass
*OpRegCstraints
=
1199 Instr
.getRegClassConstraint(OpIdx
, TII
, TRI
);
1201 // If there's no constraint, there's nothing to do.
1202 if (!OpRegCstraints
)
1204 // If the operand is a frame index, there's nothing to do here.
1205 // A frame index operand will resolve correctly during PEI.
1209 assert(MO
.isReg() &&
1210 "Operand has register constraints without being a register!");
1212 Register Reg
= MO
.getReg();
1213 if (Register::isPhysicalRegister(Reg
)) {
1214 if (!OpRegCstraints
->contains(Reg
))
1216 } else if (!OpRegCstraints
->hasSubClassEq(MRI
->getRegClass(Reg
)) &&
1217 !MRI
->constrainRegClass(Reg
, OpRegCstraints
))
1224 /// Return the opcode that does not set flags when possible - otherwise
1225 /// return the original opcode. The caller is responsible to do the actual
1226 /// substitution and legality checking.
1227 static unsigned convertToNonFlagSettingOpc(const MachineInstr
&MI
) {
1228 // Don't convert all compare instructions, because for some the zero register
1229 // encoding becomes the sp register.
1230 bool MIDefinesZeroReg
= false;
1231 if (MI
.definesRegister(AArch64::WZR
) || MI
.definesRegister(AArch64::XZR
))
1232 MIDefinesZeroReg
= true;
1234 switch (MI
.getOpcode()) {
1236 return MI
.getOpcode();
1237 case AArch64::ADDSWrr
:
1238 return AArch64::ADDWrr
;
1239 case AArch64::ADDSWri
:
1240 return MIDefinesZeroReg
? AArch64::ADDSWri
: AArch64::ADDWri
;
1241 case AArch64::ADDSWrs
:
1242 return MIDefinesZeroReg
? AArch64::ADDSWrs
: AArch64::ADDWrs
;
1243 case AArch64::ADDSWrx
:
1244 return AArch64::ADDWrx
;
1245 case AArch64::ADDSXrr
:
1246 return AArch64::ADDXrr
;
1247 case AArch64::ADDSXri
:
1248 return MIDefinesZeroReg
? AArch64::ADDSXri
: AArch64::ADDXri
;
1249 case AArch64::ADDSXrs
:
1250 return MIDefinesZeroReg
? AArch64::ADDSXrs
: AArch64::ADDXrs
;
1251 case AArch64::ADDSXrx
:
1252 return AArch64::ADDXrx
;
1253 case AArch64::SUBSWrr
:
1254 return AArch64::SUBWrr
;
1255 case AArch64::SUBSWri
:
1256 return MIDefinesZeroReg
? AArch64::SUBSWri
: AArch64::SUBWri
;
1257 case AArch64::SUBSWrs
:
1258 return MIDefinesZeroReg
? AArch64::SUBSWrs
: AArch64::SUBWrs
;
1259 case AArch64::SUBSWrx
:
1260 return AArch64::SUBWrx
;
1261 case AArch64::SUBSXrr
:
1262 return AArch64::SUBXrr
;
1263 case AArch64::SUBSXri
:
1264 return MIDefinesZeroReg
? AArch64::SUBSXri
: AArch64::SUBXri
;
1265 case AArch64::SUBSXrs
:
1266 return MIDefinesZeroReg
? AArch64::SUBSXrs
: AArch64::SUBXrs
;
1267 case AArch64::SUBSXrx
:
1268 return AArch64::SUBXrx
;
1272 enum AccessKind
{ AK_Write
= 0x01, AK_Read
= 0x10, AK_All
= 0x11 };
1274 /// True when condition flags are accessed (either by writing or reading)
1275 /// on the instruction trace starting at From and ending at To.
1277 /// Note: If From and To are from different blocks it's assumed CC are accessed
1279 static bool areCFlagsAccessedBetweenInstrs(
1280 MachineBasicBlock::iterator From
, MachineBasicBlock::iterator To
,
1281 const TargetRegisterInfo
*TRI
, const AccessKind AccessToCheck
= AK_All
) {
1282 // Early exit if To is at the beginning of the BB.
1283 if (To
== To
->getParent()->begin())
1286 // Check whether the instructions are in the same basic block
1287 // If not, assume the condition flags might get modified somewhere.
1288 if (To
->getParent() != From
->getParent())
1291 // From must be above To.
1293 ++To
.getReverse(), To
->getParent()->rend(),
1294 [From
](MachineInstr
&MI
) { return MI
.getIterator() == From
; }));
1296 // We iterate backward starting at \p To until we hit \p From.
1297 for (const MachineInstr
&Instr
:
1298 instructionsWithoutDebug(++To
.getReverse(), From
.getReverse())) {
1299 if (((AccessToCheck
& AK_Write
) &&
1300 Instr
.modifiesRegister(AArch64::NZCV
, TRI
)) ||
1301 ((AccessToCheck
& AK_Read
) && Instr
.readsRegister(AArch64::NZCV
, TRI
)))
1307 /// optimizePTestInstr - Attempt to remove a ptest of a predicate-generating
1308 /// operation which could set the flags in an identical manner
1309 bool AArch64InstrInfo::optimizePTestInstr(
1310 MachineInstr
*PTest
, unsigned MaskReg
, unsigned PredReg
,
1311 const MachineRegisterInfo
*MRI
) const {
1312 auto *Mask
= MRI
->getUniqueVRegDef(MaskReg
);
1313 auto *Pred
= MRI
->getUniqueVRegDef(PredReg
);
1314 auto NewOp
= Pred
->getOpcode();
1315 bool OpChanged
= false;
1317 unsigned MaskOpcode
= Mask
->getOpcode();
1318 unsigned PredOpcode
= Pred
->getOpcode();
1319 bool PredIsPTestLike
= isPTestLikeOpcode(PredOpcode
);
1320 bool PredIsWhileLike
= isWhileOpcode(PredOpcode
);
1322 if (isPTrueOpcode(MaskOpcode
) && (PredIsPTestLike
|| PredIsWhileLike
)) {
1323 // For PTEST(PTRUE, OTHER_INST), PTEST is redundant when PTRUE doesn't
1324 // deactivate any lanes OTHER_INST might set.
1325 uint64_t MaskElementSize
= getElementSizeForOpcode(MaskOpcode
);
1326 uint64_t PredElementSize
= getElementSizeForOpcode(PredOpcode
);
1328 // Must be an all active predicate of matching element size.
1329 if ((PredElementSize
!= MaskElementSize
) ||
1330 (Mask
->getOperand(1).getImm() != 31))
1333 // Fallthough to simply remove the PTEST.
1334 } else if ((Mask
== Pred
) && (PredIsPTestLike
|| PredIsWhileLike
)) {
1335 // For PTEST(PG, PG), PTEST is redundant when PG is the result of an
1336 // instruction that sets the flags as PTEST would.
1338 // Fallthough to simply remove the PTEST.
1339 } else if (PredIsPTestLike
) {
1340 // For PTEST(PG_1, PTEST_LIKE(PG2, ...)), PTEST is redundant when both
1341 // instructions use the same predicate.
1342 auto PTestLikeMask
= MRI
->getUniqueVRegDef(Pred
->getOperand(1).getReg());
1343 if (Mask
!= PTestLikeMask
)
1346 // Fallthough to simply remove the PTEST.
1348 switch (Pred
->getOpcode()) {
1349 case AArch64::BRKB_PPzP
:
1350 case AArch64::BRKPB_PPzPP
: {
1351 // Op 0 is chain, 1 is the mask, 2 the previous predicate to
1352 // propagate, 3 the new predicate.
1354 // Check to see if our mask is the same as the brkpb's. If
1355 // not the resulting flag bits may be different and we
1356 // can't remove the ptest.
1357 auto *PredMask
= MRI
->getUniqueVRegDef(Pred
->getOperand(1).getReg());
1358 if (Mask
!= PredMask
)
1361 // Switch to the new opcode
1362 NewOp
= Pred
->getOpcode() == AArch64::BRKB_PPzP
? AArch64::BRKBS_PPzP
1363 : AArch64::BRKPBS_PPzPP
;
1367 case AArch64::BRKN_PPzP
: {
1368 auto *PredMask
= MRI
->getUniqueVRegDef(Pred
->getOperand(1).getReg());
1369 if (Mask
!= PredMask
)
1372 NewOp
= AArch64::BRKNS_PPzP
;
1376 case AArch64::RDFFR_PPz
: {
1377 // rdffr p1.b, PredMask=p0/z <--- Definition of Pred
1378 // ptest Mask=p0, Pred=p1.b <--- If equal masks, remove this and use
1379 // `rdffrs p1.b, p0/z` above.
1380 auto *PredMask
= MRI
->getUniqueVRegDef(Pred
->getOperand(1).getReg());
1381 if (Mask
!= PredMask
)
1384 NewOp
= AArch64::RDFFRS_PPz
;
1389 // Bail out if we don't recognize the input
1394 const TargetRegisterInfo
*TRI
= &getRegisterInfo();
1396 // If another instruction between Pred and PTest accesses flags, don't remove
1397 // the ptest or update the earlier instruction to modify them.
1398 if (areCFlagsAccessedBetweenInstrs(Pred
, PTest
, TRI
))
1401 // If we pass all the checks, it's safe to remove the PTEST and use the flags
1402 // as they are prior to PTEST. Sometimes this requires the tested PTEST
1403 // operand to be replaced with an equivalent instruction that also sets the
1405 Pred
->setDesc(get(NewOp
));
1406 PTest
->eraseFromParent();
1408 bool succeeded
= UpdateOperandRegClass(*Pred
);
1410 assert(succeeded
&& "Operands have incompatible register classes!");
1411 Pred
->addRegisterDefined(AArch64::NZCV
, TRI
);
1414 // Ensure that the flags def is live.
1415 if (Pred
->registerDefIsDead(AArch64::NZCV
, TRI
)) {
1416 unsigned i
= 0, e
= Pred
->getNumOperands();
1417 for (; i
!= e
; ++i
) {
1418 MachineOperand
&MO
= Pred
->getOperand(i
);
1419 if (MO
.isReg() && MO
.isDef() && MO
.getReg() == AArch64::NZCV
) {
1420 MO
.setIsDead(false);
1428 /// Try to optimize a compare instruction. A compare instruction is an
1429 /// instruction which produces AArch64::NZCV. It can be truly compare
1431 /// when there are no uses of its destination register.
1433 /// The following steps are tried in order:
1434 /// 1. Convert CmpInstr into an unconditional version.
1435 /// 2. Remove CmpInstr if above there is an instruction producing a needed
1436 /// condition code or an instruction which can be converted into such an
1438 /// Only comparison with zero is supported.
1439 bool AArch64InstrInfo::optimizeCompareInstr(
1440 MachineInstr
&CmpInstr
, Register SrcReg
, Register SrcReg2
, int CmpMask
,
1441 int CmpValue
, const MachineRegisterInfo
*MRI
) const {
1442 assert(CmpInstr
.getParent());
1445 // Replace SUBSWrr with SUBWrr if NZCV is not used.
1446 int DeadNZCVIdx
= CmpInstr
.findRegisterDefOperandIdx(AArch64::NZCV
, true);
1447 if (DeadNZCVIdx
!= -1) {
1448 if (CmpInstr
.definesRegister(AArch64::WZR
) ||
1449 CmpInstr
.definesRegister(AArch64::XZR
)) {
1450 CmpInstr
.eraseFromParent();
1453 unsigned Opc
= CmpInstr
.getOpcode();
1454 unsigned NewOpc
= convertToNonFlagSettingOpc(CmpInstr
);
1457 const MCInstrDesc
&MCID
= get(NewOpc
);
1458 CmpInstr
.setDesc(MCID
);
1459 CmpInstr
.RemoveOperand(DeadNZCVIdx
);
1460 bool succeeded
= UpdateOperandRegClass(CmpInstr
);
1462 assert(succeeded
&& "Some operands reg class are incompatible!");
1466 if (CmpInstr
.getOpcode() == AArch64::PTEST_PP
)
1467 return optimizePTestInstr(&CmpInstr
, SrcReg
, SrcReg2
, MRI
);
1469 // Warning: CmpValue == 2 indicates *any* value apart from 0 or 1.
1470 assert((CmpValue
== 0 || CmpValue
== 1 || CmpValue
== 2) &&
1471 "CmpValue must be 0, 1, or 2!");
1475 // CmpInstr is a Compare instruction if destination register is not used.
1476 if (!MRI
->use_nodbg_empty(CmpInstr
.getOperand(0).getReg()))
1479 if (CmpValue
== 0 && substituteCmpToZero(CmpInstr
, SrcReg
, *MRI
))
1481 return (CmpValue
== 0 || CmpValue
== 1) &&
1482 removeCmpToZeroOrOne(CmpInstr
, SrcReg
, CmpValue
, *MRI
);
1485 /// Get opcode of S version of Instr.
1486 /// If Instr is S version its opcode is returned.
1487 /// AArch64::INSTRUCTION_LIST_END is returned if Instr does not have S version
1488 /// or we are not interested in it.
1489 static unsigned sForm(MachineInstr
&Instr
) {
1490 switch (Instr
.getOpcode()) {
1492 return AArch64::INSTRUCTION_LIST_END
;
1494 case AArch64::ADDSWrr
:
1495 case AArch64::ADDSWri
:
1496 case AArch64::ADDSXrr
:
1497 case AArch64::ADDSXri
:
1498 case AArch64::SUBSWrr
:
1499 case AArch64::SUBSWri
:
1500 case AArch64::SUBSXrr
:
1501 case AArch64::SUBSXri
:
1502 return Instr
.getOpcode();
1504 case AArch64::ADDWrr
:
1505 return AArch64::ADDSWrr
;
1506 case AArch64::ADDWri
:
1507 return AArch64::ADDSWri
;
1508 case AArch64::ADDXrr
:
1509 return AArch64::ADDSXrr
;
1510 case AArch64::ADDXri
:
1511 return AArch64::ADDSXri
;
1512 case AArch64::ADCWr
:
1513 return AArch64::ADCSWr
;
1514 case AArch64::ADCXr
:
1515 return AArch64::ADCSXr
;
1516 case AArch64::SUBWrr
:
1517 return AArch64::SUBSWrr
;
1518 case AArch64::SUBWri
:
1519 return AArch64::SUBSWri
;
1520 case AArch64::SUBXrr
:
1521 return AArch64::SUBSXrr
;
1522 case AArch64::SUBXri
:
1523 return AArch64::SUBSXri
;
1524 case AArch64::SBCWr
:
1525 return AArch64::SBCSWr
;
1526 case AArch64::SBCXr
:
1527 return AArch64::SBCSXr
;
1528 case AArch64::ANDWri
:
1529 return AArch64::ANDSWri
;
1530 case AArch64::ANDXri
:
1531 return AArch64::ANDSXri
;
1535 /// Check if AArch64::NZCV should be alive in successors of MBB.
1536 static bool areCFlagsAliveInSuccessors(const MachineBasicBlock
*MBB
) {
1537 for (auto *BB
: MBB
->successors())
1538 if (BB
->isLiveIn(AArch64::NZCV
))
1543 /// \returns The condition code operand index for \p Instr if it is a branch
1544 /// or select and -1 otherwise.
1546 findCondCodeUseOperandIdxForBranchOrSelect(const MachineInstr
&Instr
) {
1547 switch (Instr
.getOpcode()) {
1551 case AArch64::Bcc
: {
1552 int Idx
= Instr
.findRegisterUseOperandIdx(AArch64::NZCV
);
1557 case AArch64::CSINVWr
:
1558 case AArch64::CSINVXr
:
1559 case AArch64::CSINCWr
:
1560 case AArch64::CSINCXr
:
1561 case AArch64::CSELWr
:
1562 case AArch64::CSELXr
:
1563 case AArch64::CSNEGWr
:
1564 case AArch64::CSNEGXr
:
1565 case AArch64::FCSELSrrr
:
1566 case AArch64::FCSELDrrr
: {
1567 int Idx
= Instr
.findRegisterUseOperandIdx(AArch64::NZCV
);
1582 UsedNZCV() = default;
1584 UsedNZCV
&operator|=(const UsedNZCV
&UsedFlags
) {
1585 this->N
|= UsedFlags
.N
;
1586 this->Z
|= UsedFlags
.Z
;
1587 this->C
|= UsedFlags
.C
;
1588 this->V
|= UsedFlags
.V
;
1593 } // end anonymous namespace
1595 /// Find a condition code used by the instruction.
1596 /// Returns AArch64CC::Invalid if either the instruction does not use condition
1597 /// codes or we don't optimize CmpInstr in the presence of such instructions.
1598 static AArch64CC::CondCode
findCondCodeUsedByInstr(const MachineInstr
&Instr
) {
1599 int CCIdx
= findCondCodeUseOperandIdxForBranchOrSelect(Instr
);
1600 return CCIdx
>= 0 ? static_cast<AArch64CC::CondCode
>(
1601 Instr
.getOperand(CCIdx
).getImm())
1602 : AArch64CC::Invalid
;
1605 static UsedNZCV
getUsedNZCV(AArch64CC::CondCode CC
) {
1606 assert(CC
!= AArch64CC::Invalid
);
1612 case AArch64CC::EQ
: // Z set
1613 case AArch64CC::NE
: // Z clear
1617 case AArch64CC::HI
: // Z clear and C set
1618 case AArch64CC::LS
: // Z set or C clear
1621 case AArch64CC::HS
: // C set
1622 case AArch64CC::LO
: // C clear
1626 case AArch64CC::MI
: // N set
1627 case AArch64CC::PL
: // N clear
1631 case AArch64CC::VS
: // V set
1632 case AArch64CC::VC
: // V clear
1636 case AArch64CC::GT
: // Z clear, N and V the same
1637 case AArch64CC::LE
: // Z set, N and V differ
1640 case AArch64CC::GE
: // N and V the same
1641 case AArch64CC::LT
: // N and V differ
1649 /// \returns Conditions flags used after \p CmpInstr in its MachineBB if they
1650 /// are not containing C or V flags and NZCV flags are not alive in successors
1651 /// of the same \p CmpInstr and \p MI parent. \returns None otherwise.
1653 /// Collect instructions using that flags in \p CCUseInstrs if provided.
1654 static Optional
<UsedNZCV
>
1655 examineCFlagsUse(MachineInstr
&MI
, MachineInstr
&CmpInstr
,
1656 const TargetRegisterInfo
&TRI
,
1657 SmallVectorImpl
<MachineInstr
*> *CCUseInstrs
= nullptr) {
1658 MachineBasicBlock
*CmpParent
= CmpInstr
.getParent();
1659 if (MI
.getParent() != CmpParent
)
1662 if (areCFlagsAliveInSuccessors(CmpParent
))
1665 UsedNZCV NZCVUsedAfterCmp
;
1666 for (MachineInstr
&Instr
: instructionsWithoutDebug(
1667 std::next(CmpInstr
.getIterator()), CmpParent
->instr_end())) {
1668 if (Instr
.readsRegister(AArch64::NZCV
, &TRI
)) {
1669 AArch64CC::CondCode CC
= findCondCodeUsedByInstr(Instr
);
1670 if (CC
== AArch64CC::Invalid
) // Unsupported conditional instruction
1672 NZCVUsedAfterCmp
|= getUsedNZCV(CC
);
1674 CCUseInstrs
->push_back(&Instr
);
1676 if (Instr
.modifiesRegister(AArch64::NZCV
, &TRI
))
1679 if (NZCVUsedAfterCmp
.C
|| NZCVUsedAfterCmp
.V
)
1681 return NZCVUsedAfterCmp
;
1684 static bool isADDSRegImm(unsigned Opcode
) {
1685 return Opcode
== AArch64::ADDSWri
|| Opcode
== AArch64::ADDSXri
;
1688 static bool isSUBSRegImm(unsigned Opcode
) {
1689 return Opcode
== AArch64::SUBSWri
|| Opcode
== AArch64::SUBSXri
;
1692 /// Check if CmpInstr can be substituted by MI.
1694 /// CmpInstr can be substituted:
1695 /// - CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
1696 /// - and, MI and CmpInstr are from the same MachineBB
1697 /// - and, condition flags are not alive in successors of the CmpInstr parent
1698 /// - and, if MI opcode is the S form there must be no defs of flags between
1700 /// or if MI opcode is not the S form there must be neither defs of flags
1701 /// nor uses of flags between MI and CmpInstr.
1702 /// - and C/V flags are not used after CmpInstr
1703 static bool canInstrSubstituteCmpInstr(MachineInstr
&MI
, MachineInstr
&CmpInstr
,
1704 const TargetRegisterInfo
&TRI
) {
1705 assert(sForm(MI
) != AArch64::INSTRUCTION_LIST_END
);
1707 const unsigned CmpOpcode
= CmpInstr
.getOpcode();
1708 if (!isADDSRegImm(CmpOpcode
) && !isSUBSRegImm(CmpOpcode
))
1711 if (!examineCFlagsUse(MI
, CmpInstr
, TRI
))
1714 AccessKind AccessToCheck
= AK_Write
;
1715 if (sForm(MI
) != MI
.getOpcode())
1716 AccessToCheck
= AK_All
;
1717 return !areCFlagsAccessedBetweenInstrs(&MI
, &CmpInstr
, &TRI
, AccessToCheck
);
1720 /// Substitute an instruction comparing to zero with another instruction
1721 /// which produces needed condition flags.
1723 /// Return true on success.
1724 bool AArch64InstrInfo::substituteCmpToZero(
1725 MachineInstr
&CmpInstr
, unsigned SrcReg
,
1726 const MachineRegisterInfo
&MRI
) const {
1727 // Get the unique definition of SrcReg.
1728 MachineInstr
*MI
= MRI
.getUniqueVRegDef(SrcReg
);
1732 const TargetRegisterInfo
&TRI
= getRegisterInfo();
1734 unsigned NewOpc
= sForm(*MI
);
1735 if (NewOpc
== AArch64::INSTRUCTION_LIST_END
)
1738 if (!canInstrSubstituteCmpInstr(*MI
, CmpInstr
, TRI
))
1741 // Update the instruction to set NZCV.
1742 MI
->setDesc(get(NewOpc
));
1743 CmpInstr
.eraseFromParent();
1744 bool succeeded
= UpdateOperandRegClass(*MI
);
1746 assert(succeeded
&& "Some operands reg class are incompatible!");
1747 MI
->addRegisterDefined(AArch64::NZCV
, &TRI
);
1751 /// \returns True if \p CmpInstr can be removed.
1753 /// \p IsInvertCC is true if, after removing \p CmpInstr, condition
1754 /// codes used in \p CCUseInstrs must be inverted.
1755 static bool canCmpInstrBeRemoved(MachineInstr
&MI
, MachineInstr
&CmpInstr
,
1756 int CmpValue
, const TargetRegisterInfo
&TRI
,
1757 SmallVectorImpl
<MachineInstr
*> &CCUseInstrs
,
1759 assert((CmpValue
== 0 || CmpValue
== 1) &&
1760 "Only comparisons to 0 or 1 considered for removal!");
1762 // MI is 'CSINCWr %vreg, wzr, wzr, <cc>' or 'CSINCXr %vreg, xzr, xzr, <cc>'
1763 unsigned MIOpc
= MI
.getOpcode();
1764 if (MIOpc
== AArch64::CSINCWr
) {
1765 if (MI
.getOperand(1).getReg() != AArch64::WZR
||
1766 MI
.getOperand(2).getReg() != AArch64::WZR
)
1768 } else if (MIOpc
== AArch64::CSINCXr
) {
1769 if (MI
.getOperand(1).getReg() != AArch64::XZR
||
1770 MI
.getOperand(2).getReg() != AArch64::XZR
)
1775 AArch64CC::CondCode MICC
= findCondCodeUsedByInstr(MI
);
1776 if (MICC
== AArch64CC::Invalid
)
1779 // NZCV needs to be defined
1780 if (MI
.findRegisterDefOperandIdx(AArch64::NZCV
, true) != -1)
1783 // CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0' or 'SUBS %vreg, 1'
1784 const unsigned CmpOpcode
= CmpInstr
.getOpcode();
1785 bool IsSubsRegImm
= isSUBSRegImm(CmpOpcode
);
1786 if (CmpValue
&& !IsSubsRegImm
)
1788 if (!CmpValue
&& !IsSubsRegImm
&& !isADDSRegImm(CmpOpcode
))
1791 // MI conditions allowed: eq, ne, mi, pl
1792 UsedNZCV MIUsedNZCV
= getUsedNZCV(MICC
);
1793 if (MIUsedNZCV
.C
|| MIUsedNZCV
.V
)
1796 Optional
<UsedNZCV
> NZCVUsedAfterCmp
=
1797 examineCFlagsUse(MI
, CmpInstr
, TRI
, &CCUseInstrs
);
1798 // Condition flags are not used in CmpInstr basic block successors and only
1799 // Z or N flags allowed to be used after CmpInstr within its basic block
1800 if (!NZCVUsedAfterCmp
)
1802 // Z or N flag used after CmpInstr must correspond to the flag used in MI
1803 if ((MIUsedNZCV
.Z
&& NZCVUsedAfterCmp
->N
) ||
1804 (MIUsedNZCV
.N
&& NZCVUsedAfterCmp
->Z
))
1806 // If CmpInstr is comparison to zero MI conditions are limited to eq, ne
1807 if (MIUsedNZCV
.N
&& !CmpValue
)
1810 // There must be no defs of flags between MI and CmpInstr
1811 if (areCFlagsAccessedBetweenInstrs(&MI
, &CmpInstr
, &TRI
, AK_Write
))
1814 // Condition code is inverted in the following cases:
1815 // 1. MI condition is ne; CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
1816 // 2. MI condition is eq, pl; CmpInstr is 'SUBS %vreg, 1'
1817 IsInvertCC
= (CmpValue
&& (MICC
== AArch64CC::EQ
|| MICC
== AArch64CC::PL
)) ||
1818 (!CmpValue
&& MICC
== AArch64CC::NE
);
1822 /// Remove comparision in csinc-cmp sequence
1826 /// csinc w9, wzr, wzr, ne
1832 /// csinc w9, wzr, wzr, ne
1837 /// csinc x2, xzr, xzr, mi
1843 /// csinc x2, xzr, xzr, mi
1847 /// \param CmpInstr comparison instruction
1848 /// \return True when comparison removed
1849 bool AArch64InstrInfo::removeCmpToZeroOrOne(
1850 MachineInstr
&CmpInstr
, unsigned SrcReg
, int CmpValue
,
1851 const MachineRegisterInfo
&MRI
) const {
1852 MachineInstr
*MI
= MRI
.getUniqueVRegDef(SrcReg
);
1855 const TargetRegisterInfo
&TRI
= getRegisterInfo();
1856 SmallVector
<MachineInstr
*, 4> CCUseInstrs
;
1857 bool IsInvertCC
= false;
1858 if (!canCmpInstrBeRemoved(*MI
, CmpInstr
, CmpValue
, TRI
, CCUseInstrs
,
1861 // Make transformation
1862 CmpInstr
.eraseFromParent();
1864 // Invert condition codes in CmpInstr CC users
1865 for (MachineInstr
*CCUseInstr
: CCUseInstrs
) {
1866 int Idx
= findCondCodeUseOperandIdxForBranchOrSelect(*CCUseInstr
);
1867 assert(Idx
>= 0 && "Unexpected instruction using CC.");
1868 MachineOperand
&CCOperand
= CCUseInstr
->getOperand(Idx
);
1869 AArch64CC::CondCode CCUse
= AArch64CC::getInvertedCondCode(
1870 static_cast<AArch64CC::CondCode
>(CCOperand
.getImm()));
1871 CCOperand
.setImm(CCUse
);
1877 bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr
&MI
) const {
1878 if (MI
.getOpcode() != TargetOpcode::LOAD_STACK_GUARD
&&
1879 MI
.getOpcode() != AArch64::CATCHRET
)
1882 MachineBasicBlock
&MBB
= *MI
.getParent();
1883 auto &Subtarget
= MBB
.getParent()->getSubtarget
<AArch64Subtarget
>();
1884 auto TRI
= Subtarget
.getRegisterInfo();
1885 DebugLoc DL
= MI
.getDebugLoc();
1887 if (MI
.getOpcode() == AArch64::CATCHRET
) {
1888 // Skip to the first instruction before the epilog.
1889 const TargetInstrInfo
*TII
=
1890 MBB
.getParent()->getSubtarget().getInstrInfo();
1891 MachineBasicBlock
*TargetMBB
= MI
.getOperand(0).getMBB();
1892 auto MBBI
= MachineBasicBlock::iterator(MI
);
1893 MachineBasicBlock::iterator FirstEpilogSEH
= std::prev(MBBI
);
1894 while (FirstEpilogSEH
->getFlag(MachineInstr::FrameDestroy
) &&
1895 FirstEpilogSEH
!= MBB
.begin())
1896 FirstEpilogSEH
= std::prev(FirstEpilogSEH
);
1897 if (FirstEpilogSEH
!= MBB
.begin())
1898 FirstEpilogSEH
= std::next(FirstEpilogSEH
);
1899 BuildMI(MBB
, FirstEpilogSEH
, DL
, TII
->get(AArch64::ADRP
))
1900 .addReg(AArch64::X0
, RegState::Define
)
1902 BuildMI(MBB
, FirstEpilogSEH
, DL
, TII
->get(AArch64::ADDXri
))
1903 .addReg(AArch64::X0
, RegState::Define
)
1904 .addReg(AArch64::X0
)
1910 Register Reg
= MI
.getOperand(0).getReg();
1911 Module
&M
= *MBB
.getParent()->getFunction().getParent();
1912 if (M
.getStackProtectorGuard() == "sysreg") {
1913 const AArch64SysReg::SysReg
*SrcReg
=
1914 AArch64SysReg::lookupSysRegByName(M
.getStackProtectorGuardReg());
1916 report_fatal_error("Unknown SysReg for Stack Protector Guard Register");
1919 BuildMI(MBB
, MI
, DL
, get(AArch64::MRS
))
1920 .addDef(Reg
, RegState::Renamable
)
1921 .addImm(SrcReg
->Encoding
);
1922 int Offset
= M
.getStackProtectorGuardOffset();
1923 if (Offset
>= 0 && Offset
<= 32760 && Offset
% 8 == 0) {
1924 // ldr xN, [xN, #offset]
1925 BuildMI(MBB
, MI
, DL
, get(AArch64::LDRXui
))
1927 .addUse(Reg
, RegState::Kill
)
1928 .addImm(Offset
/ 8);
1929 } else if (Offset
>= -256 && Offset
<= 255) {
1930 // ldur xN, [xN, #offset]
1931 BuildMI(MBB
, MI
, DL
, get(AArch64::LDURXi
))
1933 .addUse(Reg
, RegState::Kill
)
1935 } else if (Offset
>= -4095 && Offset
<= 4095) {
1937 // add xN, xN, #offset
1938 BuildMI(MBB
, MI
, DL
, get(AArch64::ADDXri
))
1940 .addUse(Reg
, RegState::Kill
)
1944 // sub xN, xN, #offset
1945 BuildMI(MBB
, MI
, DL
, get(AArch64::SUBXri
))
1947 .addUse(Reg
, RegState::Kill
)
1952 BuildMI(MBB
, MI
, DL
, get(AArch64::LDRXui
))
1954 .addUse(Reg
, RegState::Kill
)
1957 // Cases that are larger than +/- 4095 and not a multiple of 8, or larger
1959 // It might be nice to use AArch64::MOVi32imm here, which would get
1960 // expanded in PreSched2 after PostRA, but our lone scratch Reg already
1961 // contains the MRS result. findScratchNonCalleeSaveRegister() in
1962 // AArch64FrameLowering might help us find such a scratch register
1963 // though. If we failed to find a scratch register, we could emit a
1964 // stream of add instructions to build up the immediate. Or, we could try
1965 // to insert a AArch64::MOVi32imm before register allocation so that we
1966 // didn't need to scavenge for a scratch register.
1967 report_fatal_error("Unable to encode Stack Protector Guard Offset");
1973 const GlobalValue
*GV
=
1974 cast
<GlobalValue
>((*MI
.memoperands_begin())->getValue());
1975 const TargetMachine
&TM
= MBB
.getParent()->getTarget();
1976 unsigned OpFlags
= Subtarget
.ClassifyGlobalReference(GV
, TM
);
1977 const unsigned char MO_NC
= AArch64II::MO_NC
;
1979 if ((OpFlags
& AArch64II::MO_GOT
) != 0) {
1980 BuildMI(MBB
, MI
, DL
, get(AArch64::LOADgot
), Reg
)
1981 .addGlobalAddress(GV
, 0, OpFlags
);
1982 if (Subtarget
.isTargetILP32()) {
1983 unsigned Reg32
= TRI
->getSubReg(Reg
, AArch64::sub_32
);
1984 BuildMI(MBB
, MI
, DL
, get(AArch64::LDRWui
))
1985 .addDef(Reg32
, RegState::Dead
)
1986 .addUse(Reg
, RegState::Kill
)
1988 .addMemOperand(*MI
.memoperands_begin())
1989 .addDef(Reg
, RegState::Implicit
);
1991 BuildMI(MBB
, MI
, DL
, get(AArch64::LDRXui
), Reg
)
1992 .addReg(Reg
, RegState::Kill
)
1994 .addMemOperand(*MI
.memoperands_begin());
1996 } else if (TM
.getCodeModel() == CodeModel::Large
) {
1997 assert(!Subtarget
.isTargetILP32() && "how can large exist in ILP32?");
1998 BuildMI(MBB
, MI
, DL
, get(AArch64::MOVZXi
), Reg
)
1999 .addGlobalAddress(GV
, 0, AArch64II::MO_G0
| MO_NC
)
2001 BuildMI(MBB
, MI
, DL
, get(AArch64::MOVKXi
), Reg
)
2002 .addReg(Reg
, RegState::Kill
)
2003 .addGlobalAddress(GV
, 0, AArch64II::MO_G1
| MO_NC
)
2005 BuildMI(MBB
, MI
, DL
, get(AArch64::MOVKXi
), Reg
)
2006 .addReg(Reg
, RegState::Kill
)
2007 .addGlobalAddress(GV
, 0, AArch64II::MO_G2
| MO_NC
)
2009 BuildMI(MBB
, MI
, DL
, get(AArch64::MOVKXi
), Reg
)
2010 .addReg(Reg
, RegState::Kill
)
2011 .addGlobalAddress(GV
, 0, AArch64II::MO_G3
)
2013 BuildMI(MBB
, MI
, DL
, get(AArch64::LDRXui
), Reg
)
2014 .addReg(Reg
, RegState::Kill
)
2016 .addMemOperand(*MI
.memoperands_begin());
2017 } else if (TM
.getCodeModel() == CodeModel::Tiny
) {
2018 BuildMI(MBB
, MI
, DL
, get(AArch64::ADR
), Reg
)
2019 .addGlobalAddress(GV
, 0, OpFlags
);
2021 BuildMI(MBB
, MI
, DL
, get(AArch64::ADRP
), Reg
)
2022 .addGlobalAddress(GV
, 0, OpFlags
| AArch64II::MO_PAGE
);
2023 unsigned char LoFlags
= OpFlags
| AArch64II::MO_PAGEOFF
| MO_NC
;
2024 if (Subtarget
.isTargetILP32()) {
2025 unsigned Reg32
= TRI
->getSubReg(Reg
, AArch64::sub_32
);
2026 BuildMI(MBB
, MI
, DL
, get(AArch64::LDRWui
))
2027 .addDef(Reg32
, RegState::Dead
)
2028 .addUse(Reg
, RegState::Kill
)
2029 .addGlobalAddress(GV
, 0, LoFlags
)
2030 .addMemOperand(*MI
.memoperands_begin())
2031 .addDef(Reg
, RegState::Implicit
);
2033 BuildMI(MBB
, MI
, DL
, get(AArch64::LDRXui
), Reg
)
2034 .addReg(Reg
, RegState::Kill
)
2035 .addGlobalAddress(GV
, 0, LoFlags
)
2036 .addMemOperand(*MI
.memoperands_begin());
2045 // Return true if this instruction simply sets its single destination register
2046 // to zero. This is equivalent to a register rename of the zero-register.
2047 bool AArch64InstrInfo::isGPRZero(const MachineInstr
&MI
) {
2048 switch (MI
.getOpcode()) {
2051 case AArch64::MOVZWi
:
2052 case AArch64::MOVZXi
: // movz Rd, #0 (LSL #0)
2053 if (MI
.getOperand(1).isImm() && MI
.getOperand(1).getImm() == 0) {
2054 assert(MI
.getDesc().getNumOperands() == 3 &&
2055 MI
.getOperand(2).getImm() == 0 && "invalid MOVZi operands");
2059 case AArch64::ANDWri
: // and Rd, Rzr, #imm
2060 return MI
.getOperand(1).getReg() == AArch64::WZR
;
2061 case AArch64::ANDXri
:
2062 return MI
.getOperand(1).getReg() == AArch64::XZR
;
2063 case TargetOpcode::COPY
:
2064 return MI
.getOperand(1).getReg() == AArch64::WZR
;
2069 // Return true if this instruction simply renames a general register without
2071 bool AArch64InstrInfo::isGPRCopy(const MachineInstr
&MI
) {
2072 switch (MI
.getOpcode()) {
2075 case TargetOpcode::COPY
: {
2076 // GPR32 copies will by lowered to ORRXrs
2077 Register DstReg
= MI
.getOperand(0).getReg();
2078 return (AArch64::GPR32RegClass
.contains(DstReg
) ||
2079 AArch64::GPR64RegClass
.contains(DstReg
));
2081 case AArch64::ORRXrs
: // orr Xd, Xzr, Xm (LSL #0)
2082 if (MI
.getOperand(1).getReg() == AArch64::XZR
) {
2083 assert(MI
.getDesc().getNumOperands() == 4 &&
2084 MI
.getOperand(3).getImm() == 0 && "invalid ORRrs operands");
2088 case AArch64::ADDXri
: // add Xd, Xn, #0 (LSL #0)
2089 if (MI
.getOperand(2).getImm() == 0) {
2090 assert(MI
.getDesc().getNumOperands() == 4 &&
2091 MI
.getOperand(3).getImm() == 0 && "invalid ADDXri operands");
2099 // Return true if this instruction simply renames a general register without
2101 bool AArch64InstrInfo::isFPRCopy(const MachineInstr
&MI
) {
2102 switch (MI
.getOpcode()) {
2105 case TargetOpcode::COPY
: {
2106 Register DstReg
= MI
.getOperand(0).getReg();
2107 return AArch64::FPR128RegClass
.contains(DstReg
);
2109 case AArch64::ORRv16i8
:
2110 if (MI
.getOperand(1).getReg() == MI
.getOperand(2).getReg()) {
2111 assert(MI
.getDesc().getNumOperands() == 3 && MI
.getOperand(0).isReg() &&
2112 "invalid ORRv16i8 operands");
2120 unsigned AArch64InstrInfo::isLoadFromStackSlot(const MachineInstr
&MI
,
2121 int &FrameIndex
) const {
2122 switch (MI
.getOpcode()) {
2125 case AArch64::LDRWui
:
2126 case AArch64::LDRXui
:
2127 case AArch64::LDRBui
:
2128 case AArch64::LDRHui
:
2129 case AArch64::LDRSui
:
2130 case AArch64::LDRDui
:
2131 case AArch64::LDRQui
:
2132 if (MI
.getOperand(0).getSubReg() == 0 && MI
.getOperand(1).isFI() &&
2133 MI
.getOperand(2).isImm() && MI
.getOperand(2).getImm() == 0) {
2134 FrameIndex
= MI
.getOperand(1).getIndex();
2135 return MI
.getOperand(0).getReg();
2143 unsigned AArch64InstrInfo::isStoreToStackSlot(const MachineInstr
&MI
,
2144 int &FrameIndex
) const {
2145 switch (MI
.getOpcode()) {
2148 case AArch64::STRWui
:
2149 case AArch64::STRXui
:
2150 case AArch64::STRBui
:
2151 case AArch64::STRHui
:
2152 case AArch64::STRSui
:
2153 case AArch64::STRDui
:
2154 case AArch64::STRQui
:
2155 case AArch64::LDR_PXI
:
2156 case AArch64::STR_PXI
:
2157 if (MI
.getOperand(0).getSubReg() == 0 && MI
.getOperand(1).isFI() &&
2158 MI
.getOperand(2).isImm() && MI
.getOperand(2).getImm() == 0) {
2159 FrameIndex
= MI
.getOperand(1).getIndex();
2160 return MI
.getOperand(0).getReg();
2167 /// Check all MachineMemOperands for a hint to suppress pairing.
2168 bool AArch64InstrInfo::isLdStPairSuppressed(const MachineInstr
&MI
) {
2169 return llvm::any_of(MI
.memoperands(), [](MachineMemOperand
*MMO
) {
2170 return MMO
->getFlags() & MOSuppressPair
;
2174 /// Set a flag on the first MachineMemOperand to suppress pairing.
2175 void AArch64InstrInfo::suppressLdStPair(MachineInstr
&MI
) {
2176 if (MI
.memoperands_empty())
2178 (*MI
.memoperands_begin())->setFlags(MOSuppressPair
);
2181 /// Check all MachineMemOperands for a hint that the load/store is strided.
2182 bool AArch64InstrInfo::isStridedAccess(const MachineInstr
&MI
) {
2183 return llvm::any_of(MI
.memoperands(), [](MachineMemOperand
*MMO
) {
2184 return MMO
->getFlags() & MOStridedAccess
;
2188 bool AArch64InstrInfo::hasUnscaledLdStOffset(unsigned Opc
) {
2192 case AArch64::STURSi
:
2193 case AArch64::STRSpre
:
2194 case AArch64::STURDi
:
2195 case AArch64::STRDpre
:
2196 case AArch64::STURQi
:
2197 case AArch64::STRQpre
:
2198 case AArch64::STURBBi
:
2199 case AArch64::STURHHi
:
2200 case AArch64::STURWi
:
2201 case AArch64::STRWpre
:
2202 case AArch64::STURXi
:
2203 case AArch64::STRXpre
:
2204 case AArch64::LDURSi
:
2205 case AArch64::LDRSpre
:
2206 case AArch64::LDURDi
:
2207 case AArch64::LDRDpre
:
2208 case AArch64::LDURQi
:
2209 case AArch64::LDRQpre
:
2210 case AArch64::LDURWi
:
2211 case AArch64::LDRWpre
:
2212 case AArch64::LDURXi
:
2213 case AArch64::LDRXpre
:
2214 case AArch64::LDURSWi
:
2215 case AArch64::LDURHHi
:
2216 case AArch64::LDURBBi
:
2217 case AArch64::LDURSBWi
:
2218 case AArch64::LDURSHWi
:
2223 Optional
<unsigned> AArch64InstrInfo::getUnscaledLdSt(unsigned Opc
) {
2226 case AArch64::PRFMui
: return AArch64::PRFUMi
;
2227 case AArch64::LDRXui
: return AArch64::LDURXi
;
2228 case AArch64::LDRWui
: return AArch64::LDURWi
;
2229 case AArch64::LDRBui
: return AArch64::LDURBi
;
2230 case AArch64::LDRHui
: return AArch64::LDURHi
;
2231 case AArch64::LDRSui
: return AArch64::LDURSi
;
2232 case AArch64::LDRDui
: return AArch64::LDURDi
;
2233 case AArch64::LDRQui
: return AArch64::LDURQi
;
2234 case AArch64::LDRBBui
: return AArch64::LDURBBi
;
2235 case AArch64::LDRHHui
: return AArch64::LDURHHi
;
2236 case AArch64::LDRSBXui
: return AArch64::LDURSBXi
;
2237 case AArch64::LDRSBWui
: return AArch64::LDURSBWi
;
2238 case AArch64::LDRSHXui
: return AArch64::LDURSHXi
;
2239 case AArch64::LDRSHWui
: return AArch64::LDURSHWi
;
2240 case AArch64::LDRSWui
: return AArch64::LDURSWi
;
2241 case AArch64::STRXui
: return AArch64::STURXi
;
2242 case AArch64::STRWui
: return AArch64::STURWi
;
2243 case AArch64::STRBui
: return AArch64::STURBi
;
2244 case AArch64::STRHui
: return AArch64::STURHi
;
2245 case AArch64::STRSui
: return AArch64::STURSi
;
2246 case AArch64::STRDui
: return AArch64::STURDi
;
2247 case AArch64::STRQui
: return AArch64::STURQi
;
2248 case AArch64::STRBBui
: return AArch64::STURBBi
;
2249 case AArch64::STRHHui
: return AArch64::STURHHi
;
2253 unsigned AArch64InstrInfo::getLoadStoreImmIdx(unsigned Opc
) {
2257 case AArch64::LDPXi
:
2258 case AArch64::LDPDi
:
2259 case AArch64::STPXi
:
2260 case AArch64::STPDi
:
2261 case AArch64::LDNPXi
:
2262 case AArch64::LDNPDi
:
2263 case AArch64::STNPXi
:
2264 case AArch64::STNPDi
:
2265 case AArch64::LDPQi
:
2266 case AArch64::STPQi
:
2267 case AArch64::LDNPQi
:
2268 case AArch64::STNPQi
:
2269 case AArch64::LDPWi
:
2270 case AArch64::LDPSi
:
2271 case AArch64::STPWi
:
2272 case AArch64::STPSi
:
2273 case AArch64::LDNPWi
:
2274 case AArch64::LDNPSi
:
2275 case AArch64::STNPWi
:
2276 case AArch64::STNPSi
:
2278 case AArch64::STGPi
:
2279 case AArch64::LD1B_IMM
:
2280 case AArch64::LD1H_IMM
:
2281 case AArch64::LD1W_IMM
:
2282 case AArch64::LD1D_IMM
:
2283 case AArch64::ST1B_IMM
:
2284 case AArch64::ST1H_IMM
:
2285 case AArch64::ST1W_IMM
:
2286 case AArch64::ST1D_IMM
:
2287 case AArch64::LD1B_H_IMM
:
2288 case AArch64::LD1SB_H_IMM
:
2289 case AArch64::LD1H_S_IMM
:
2290 case AArch64::LD1SH_S_IMM
:
2291 case AArch64::LD1W_D_IMM
:
2292 case AArch64::LD1SW_D_IMM
:
2293 case AArch64::ST1B_H_IMM
:
2294 case AArch64::ST1H_S_IMM
:
2295 case AArch64::ST1W_D_IMM
:
2296 case AArch64::LD1B_S_IMM
:
2297 case AArch64::LD1SB_S_IMM
:
2298 case AArch64::LD1H_D_IMM
:
2299 case AArch64::LD1SH_D_IMM
:
2300 case AArch64::ST1B_S_IMM
:
2301 case AArch64::ST1H_D_IMM
:
2302 case AArch64::LD1B_D_IMM
:
2303 case AArch64::LD1SB_D_IMM
:
2304 case AArch64::ST1B_D_IMM
:
2305 case AArch64::LD1RB_IMM
:
2306 case AArch64::LD1RB_H_IMM
:
2307 case AArch64::LD1RB_S_IMM
:
2308 case AArch64::LD1RB_D_IMM
:
2309 case AArch64::LD1RSB_H_IMM
:
2310 case AArch64::LD1RSB_S_IMM
:
2311 case AArch64::LD1RSB_D_IMM
:
2312 case AArch64::LD1RH_IMM
:
2313 case AArch64::LD1RH_S_IMM
:
2314 case AArch64::LD1RH_D_IMM
:
2315 case AArch64::LD1RSH_S_IMM
:
2316 case AArch64::LD1RSH_D_IMM
:
2317 case AArch64::LD1RW_IMM
:
2318 case AArch64::LD1RW_D_IMM
:
2319 case AArch64::LD1RSW_IMM
:
2320 case AArch64::LD1RD_IMM
:
2323 case AArch64::STGOffset
:
2324 case AArch64::LDR_PXI
:
2325 case AArch64::STR_PXI
:
2330 bool AArch64InstrInfo::isPairableLdStInst(const MachineInstr
&MI
) {
2331 switch (MI
.getOpcode()) {
2334 // Scaled instructions.
2335 case AArch64::STRSui
:
2336 case AArch64::STRDui
:
2337 case AArch64::STRQui
:
2338 case AArch64::STRXui
:
2339 case AArch64::STRWui
:
2340 case AArch64::LDRSui
:
2341 case AArch64::LDRDui
:
2342 case AArch64::LDRQui
:
2343 case AArch64::LDRXui
:
2344 case AArch64::LDRWui
:
2345 case AArch64::LDRSWui
:
2346 // Unscaled instructions.
2347 case AArch64::STURSi
:
2348 case AArch64::STRSpre
:
2349 case AArch64::STURDi
:
2350 case AArch64::STRDpre
:
2351 case AArch64::STURQi
:
2352 case AArch64::STRQpre
:
2353 case AArch64::STURWi
:
2354 case AArch64::STRWpre
:
2355 case AArch64::STURXi
:
2356 case AArch64::STRXpre
:
2357 case AArch64::LDURSi
:
2358 case AArch64::LDRSpre
:
2359 case AArch64::LDURDi
:
2360 case AArch64::LDRDpre
:
2361 case AArch64::LDURQi
:
2362 case AArch64::LDRQpre
:
2363 case AArch64::LDURWi
:
2364 case AArch64::LDRWpre
:
2365 case AArch64::LDURXi
:
2366 case AArch64::LDRXpre
:
2367 case AArch64::LDURSWi
:
2372 unsigned AArch64InstrInfo::convertToFlagSettingOpc(unsigned Opc
,
2376 llvm_unreachable("Opcode has no flag setting equivalent!");
2378 case AArch64::ADDWri
:
2380 return AArch64::ADDSWri
;
2381 case AArch64::ADDWrr
:
2383 return AArch64::ADDSWrr
;
2384 case AArch64::ADDWrs
:
2386 return AArch64::ADDSWrs
;
2387 case AArch64::ADDWrx
:
2389 return AArch64::ADDSWrx
;
2390 case AArch64::ANDWri
:
2392 return AArch64::ANDSWri
;
2393 case AArch64::ANDWrr
:
2395 return AArch64::ANDSWrr
;
2396 case AArch64::ANDWrs
:
2398 return AArch64::ANDSWrs
;
2399 case AArch64::BICWrr
:
2401 return AArch64::BICSWrr
;
2402 case AArch64::BICWrs
:
2404 return AArch64::BICSWrs
;
2405 case AArch64::SUBWri
:
2407 return AArch64::SUBSWri
;
2408 case AArch64::SUBWrr
:
2410 return AArch64::SUBSWrr
;
2411 case AArch64::SUBWrs
:
2413 return AArch64::SUBSWrs
;
2414 case AArch64::SUBWrx
:
2416 return AArch64::SUBSWrx
;
2418 case AArch64::ADDXri
:
2420 return AArch64::ADDSXri
;
2421 case AArch64::ADDXrr
:
2423 return AArch64::ADDSXrr
;
2424 case AArch64::ADDXrs
:
2426 return AArch64::ADDSXrs
;
2427 case AArch64::ADDXrx
:
2429 return AArch64::ADDSXrx
;
2430 case AArch64::ANDXri
:
2432 return AArch64::ANDSXri
;
2433 case AArch64::ANDXrr
:
2435 return AArch64::ANDSXrr
;
2436 case AArch64::ANDXrs
:
2438 return AArch64::ANDSXrs
;
2439 case AArch64::BICXrr
:
2441 return AArch64::BICSXrr
;
2442 case AArch64::BICXrs
:
2444 return AArch64::BICSXrs
;
2445 case AArch64::SUBXri
:
2447 return AArch64::SUBSXri
;
2448 case AArch64::SUBXrr
:
2450 return AArch64::SUBSXrr
;
2451 case AArch64::SUBXrs
:
2453 return AArch64::SUBSXrs
;
2454 case AArch64::SUBXrx
:
2456 return AArch64::SUBSXrx
;
2460 // Is this a candidate for ld/st merging or pairing? For example, we don't
2461 // touch volatiles or load/stores that have a hint to avoid pair formation.
2462 bool AArch64InstrInfo::isCandidateToMergeOrPair(const MachineInstr
&MI
) const {
2464 bool IsPreLdSt
= isPreLdSt(MI
);
2466 // If this is a volatile load/store, don't mess with it.
2467 if (MI
.hasOrderedMemoryRef())
2470 // Make sure this is a reg/fi+imm (as opposed to an address reloc).
2471 // For Pre-inc LD/ST, the operand is shifted by one.
2472 assert((MI
.getOperand(IsPreLdSt
? 2 : 1).isReg() ||
2473 MI
.getOperand(IsPreLdSt
? 2 : 1).isFI()) &&
2474 "Expected a reg or frame index operand.");
2476 // For Pre-indexed addressing quadword instructions, the third operand is the
2478 bool IsImmPreLdSt
= IsPreLdSt
&& MI
.getOperand(3).isImm();
2480 if (!MI
.getOperand(2).isImm() && !IsImmPreLdSt
)
2483 // Can't merge/pair if the instruction modifies the base register.
2484 // e.g., ldr x0, [x0]
2485 // This case will never occur with an FI base.
2486 // However, if the instruction is an LDR/STR<S,D,Q,W,X>pre, it can be merged.
2488 // ldr q0, [x11, #32]!
2489 // ldr q1, [x11, #16]
2491 // ldp q0, q1, [x11, #32]!
2492 if (MI
.getOperand(1).isReg() && !IsPreLdSt
) {
2493 Register BaseReg
= MI
.getOperand(1).getReg();
2494 const TargetRegisterInfo
*TRI
= &getRegisterInfo();
2495 if (MI
.modifiesRegister(BaseReg
, TRI
))
2499 // Check if this load/store has a hint to avoid pair formation.
2500 // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
2501 if (isLdStPairSuppressed(MI
))
2504 // Do not pair any callee-save store/reload instructions in the
2505 // prologue/epilogue if the CFI information encoded the operations as separate
2506 // instructions, as that will cause the size of the actual prologue to mismatch
2507 // with the prologue size recorded in the Windows CFI.
2508 const MCAsmInfo
*MAI
= MI
.getMF()->getTarget().getMCAsmInfo();
2509 bool NeedsWinCFI
= MAI
->usesWindowsCFI() &&
2510 MI
.getMF()->getFunction().needsUnwindTableEntry();
2511 if (NeedsWinCFI
&& (MI
.getFlag(MachineInstr::FrameSetup
) ||
2512 MI
.getFlag(MachineInstr::FrameDestroy
)))
2515 // On some CPUs quad load/store pairs are slower than two single load/stores.
2516 if (Subtarget
.isPaired128Slow()) {
2517 switch (MI
.getOpcode()) {
2520 case AArch64::LDURQi
:
2521 case AArch64::STURQi
:
2522 case AArch64::LDRQui
:
2523 case AArch64::STRQui
:
2531 bool AArch64InstrInfo::getMemOperandsWithOffsetWidth(
2532 const MachineInstr
&LdSt
, SmallVectorImpl
<const MachineOperand
*> &BaseOps
,
2533 int64_t &Offset
, bool &OffsetIsScalable
, unsigned &Width
,
2534 const TargetRegisterInfo
*TRI
) const {
2535 if (!LdSt
.mayLoadOrStore())
2538 const MachineOperand
*BaseOp
;
2539 if (!getMemOperandWithOffsetWidth(LdSt
, BaseOp
, Offset
, OffsetIsScalable
,
2542 BaseOps
.push_back(BaseOp
);
2546 Optional
<ExtAddrMode
>
2547 AArch64InstrInfo::getAddrModeFromMemoryOp(const MachineInstr
&MemI
,
2548 const TargetRegisterInfo
*TRI
) const {
2549 const MachineOperand
*Base
; // Filled with the base operand of MI.
2550 int64_t Offset
; // Filled with the offset of MI.
2551 bool OffsetIsScalable
;
2552 if (!getMemOperandWithOffset(MemI
, Base
, Offset
, OffsetIsScalable
, TRI
))
2558 AM
.BaseReg
= Base
->getReg();
2559 AM
.Displacement
= Offset
;
2564 bool AArch64InstrInfo::getMemOperandWithOffsetWidth(
2565 const MachineInstr
&LdSt
, const MachineOperand
*&BaseOp
, int64_t &Offset
,
2566 bool &OffsetIsScalable
, unsigned &Width
,
2567 const TargetRegisterInfo
*TRI
) const {
2568 assert(LdSt
.mayLoadOrStore() && "Expected a memory operation.");
2569 // Handle only loads/stores with base register followed by immediate offset.
2570 if (LdSt
.getNumExplicitOperands() == 3) {
2571 // Non-paired instruction (e.g., ldr x1, [x0, #8]).
2572 if ((!LdSt
.getOperand(1).isReg() && !LdSt
.getOperand(1).isFI()) ||
2573 !LdSt
.getOperand(2).isImm())
2575 } else if (LdSt
.getNumExplicitOperands() == 4) {
2576 // Paired instruction (e.g., ldp x1, x2, [x0, #8]).
2577 if (!LdSt
.getOperand(1).isReg() ||
2578 (!LdSt
.getOperand(2).isReg() && !LdSt
.getOperand(2).isFI()) ||
2579 !LdSt
.getOperand(3).isImm())
2584 // Get the scaling factor for the instruction and set the width for the
2586 TypeSize
Scale(0U, false);
2587 int64_t Dummy1
, Dummy2
;
2589 // If this returns false, then it's an instruction we don't want to handle.
2590 if (!getMemOpInfo(LdSt
.getOpcode(), Scale
, Width
, Dummy1
, Dummy2
))
2593 // Compute the offset. Offset is calculated as the immediate operand
2594 // multiplied by the scaling factor. Unscaled instructions have scaling factor
2596 if (LdSt
.getNumExplicitOperands() == 3) {
2597 BaseOp
= &LdSt
.getOperand(1);
2598 Offset
= LdSt
.getOperand(2).getImm() * Scale
.getKnownMinSize();
2600 assert(LdSt
.getNumExplicitOperands() == 4 && "invalid number of operands");
2601 BaseOp
= &LdSt
.getOperand(2);
2602 Offset
= LdSt
.getOperand(3).getImm() * Scale
.getKnownMinSize();
2604 OffsetIsScalable
= Scale
.isScalable();
2606 if (!BaseOp
->isReg() && !BaseOp
->isFI())
2613 AArch64InstrInfo::getMemOpBaseRegImmOfsOffsetOperand(MachineInstr
&LdSt
) const {
2614 assert(LdSt
.mayLoadOrStore() && "Expected a memory operation.");
2615 MachineOperand
&OfsOp
= LdSt
.getOperand(LdSt
.getNumExplicitOperands() - 1);
2616 assert(OfsOp
.isImm() && "Offset operand wasn't immediate.");
2620 bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode
, TypeSize
&Scale
,
2621 unsigned &Width
, int64_t &MinOffset
,
2622 int64_t &MaxOffset
) {
2623 const unsigned SVEMaxBytesPerVector
= AArch64::SVEMaxBitsPerVector
/ 8;
2625 // Not a memory operation or something we want to handle.
2627 Scale
= TypeSize::Fixed(0);
2629 MinOffset
= MaxOffset
= 0;
2631 case AArch64::STRWpost
:
2632 case AArch64::LDRWpost
:
2634 Scale
= TypeSize::Fixed(4);
2638 case AArch64::LDURQi
:
2639 case AArch64::STURQi
:
2641 Scale
= TypeSize::Fixed(1);
2645 case AArch64::PRFUMi
:
2646 case AArch64::LDURXi
:
2647 case AArch64::LDURDi
:
2648 case AArch64::STURXi
:
2649 case AArch64::STURDi
:
2651 Scale
= TypeSize::Fixed(1);
2655 case AArch64::LDURWi
:
2656 case AArch64::LDURSi
:
2657 case AArch64::LDURSWi
:
2658 case AArch64::STURWi
:
2659 case AArch64::STURSi
:
2661 Scale
= TypeSize::Fixed(1);
2665 case AArch64::LDURHi
:
2666 case AArch64::LDURHHi
:
2667 case AArch64::LDURSHXi
:
2668 case AArch64::LDURSHWi
:
2669 case AArch64::STURHi
:
2670 case AArch64::STURHHi
:
2672 Scale
= TypeSize::Fixed(1);
2676 case AArch64::LDURBi
:
2677 case AArch64::LDURBBi
:
2678 case AArch64::LDURSBXi
:
2679 case AArch64::LDURSBWi
:
2680 case AArch64::STURBi
:
2681 case AArch64::STURBBi
:
2683 Scale
= TypeSize::Fixed(1);
2687 case AArch64::LDPQi
:
2688 case AArch64::LDNPQi
:
2689 case AArch64::STPQi
:
2690 case AArch64::STNPQi
:
2691 Scale
= TypeSize::Fixed(16);
2696 case AArch64::LDRQui
:
2697 case AArch64::STRQui
:
2698 Scale
= TypeSize::Fixed(16);
2703 case AArch64::LDPXi
:
2704 case AArch64::LDPDi
:
2705 case AArch64::LDNPXi
:
2706 case AArch64::LDNPDi
:
2707 case AArch64::STPXi
:
2708 case AArch64::STPDi
:
2709 case AArch64::STNPXi
:
2710 case AArch64::STNPDi
:
2711 Scale
= TypeSize::Fixed(8);
2716 case AArch64::PRFMui
:
2717 case AArch64::LDRXui
:
2718 case AArch64::LDRDui
:
2719 case AArch64::STRXui
:
2720 case AArch64::STRDui
:
2721 Scale
= TypeSize::Fixed(8);
2726 case AArch64::StoreSwiftAsyncContext
:
2727 // Store is an STRXui, but there might be an ADDXri in the expansion too.
2728 Scale
= TypeSize::Fixed(1);
2733 case AArch64::LDPWi
:
2734 case AArch64::LDPSi
:
2735 case AArch64::LDNPWi
:
2736 case AArch64::LDNPSi
:
2737 case AArch64::STPWi
:
2738 case AArch64::STPSi
:
2739 case AArch64::STNPWi
:
2740 case AArch64::STNPSi
:
2741 Scale
= TypeSize::Fixed(4);
2746 case AArch64::LDRWui
:
2747 case AArch64::LDRSui
:
2748 case AArch64::LDRSWui
:
2749 case AArch64::STRWui
:
2750 case AArch64::STRSui
:
2751 Scale
= TypeSize::Fixed(4);
2756 case AArch64::LDRHui
:
2757 case AArch64::LDRHHui
:
2758 case AArch64::LDRSHWui
:
2759 case AArch64::LDRSHXui
:
2760 case AArch64::STRHui
:
2761 case AArch64::STRHHui
:
2762 Scale
= TypeSize::Fixed(2);
2767 case AArch64::LDRBui
:
2768 case AArch64::LDRBBui
:
2769 case AArch64::LDRSBWui
:
2770 case AArch64::LDRSBXui
:
2771 case AArch64::STRBui
:
2772 case AArch64::STRBBui
:
2773 Scale
= TypeSize::Fixed(1);
2778 case AArch64::STPXpre
:
2779 case AArch64::LDPXpost
:
2780 case AArch64::STPDpre
:
2781 case AArch64::LDPDpost
:
2782 Scale
= TypeSize::Fixed(8);
2787 case AArch64::STPQpre
:
2788 case AArch64::LDPQpost
:
2789 Scale
= TypeSize::Fixed(16);
2794 case AArch64::STRXpre
:
2795 case AArch64::STRDpre
:
2796 case AArch64::LDRXpost
:
2797 case AArch64::LDRDpost
:
2798 Scale
= TypeSize::Fixed(1);
2803 case AArch64::STRQpre
:
2804 case AArch64::LDRQpost
:
2805 Scale
= TypeSize::Fixed(1);
2811 Scale
= TypeSize::Fixed(16);
2816 case AArch64::TAGPstack
:
2817 Scale
= TypeSize::Fixed(16);
2819 // TAGP with a negative offset turns into SUBP, which has a maximum offset
2825 case AArch64::STGOffset
:
2826 case AArch64::STZGOffset
:
2827 Scale
= TypeSize::Fixed(16);
2832 case AArch64::STR_ZZZZXI
:
2833 case AArch64::LDR_ZZZZXI
:
2834 Scale
= TypeSize::Scalable(16);
2835 Width
= SVEMaxBytesPerVector
* 4;
2839 case AArch64::STR_ZZZXI
:
2840 case AArch64::LDR_ZZZXI
:
2841 Scale
= TypeSize::Scalable(16);
2842 Width
= SVEMaxBytesPerVector
* 3;
2846 case AArch64::STR_ZZXI
:
2847 case AArch64::LDR_ZZXI
:
2848 Scale
= TypeSize::Scalable(16);
2849 Width
= SVEMaxBytesPerVector
* 2;
2853 case AArch64::LDR_PXI
:
2854 case AArch64::STR_PXI
:
2855 Scale
= TypeSize::Scalable(2);
2856 Width
= SVEMaxBytesPerVector
/ 8;
2860 case AArch64::LDR_ZXI
:
2861 case AArch64::STR_ZXI
:
2862 Scale
= TypeSize::Scalable(16);
2863 Width
= SVEMaxBytesPerVector
;
2867 case AArch64::LD1B_IMM
:
2868 case AArch64::LD1H_IMM
:
2869 case AArch64::LD1W_IMM
:
2870 case AArch64::LD1D_IMM
:
2871 case AArch64::ST1B_IMM
:
2872 case AArch64::ST1H_IMM
:
2873 case AArch64::ST1W_IMM
:
2874 case AArch64::ST1D_IMM
:
2875 // A full vectors worth of data
2876 // Width = mbytes * elements
2877 Scale
= TypeSize::Scalable(16);
2878 Width
= SVEMaxBytesPerVector
;
2882 case AArch64::LD1B_H_IMM
:
2883 case AArch64::LD1SB_H_IMM
:
2884 case AArch64::LD1H_S_IMM
:
2885 case AArch64::LD1SH_S_IMM
:
2886 case AArch64::LD1W_D_IMM
:
2887 case AArch64::LD1SW_D_IMM
:
2888 case AArch64::ST1B_H_IMM
:
2889 case AArch64::ST1H_S_IMM
:
2890 case AArch64::ST1W_D_IMM
:
2891 // A half vector worth of data
2892 // Width = mbytes * elements
2893 Scale
= TypeSize::Scalable(8);
2894 Width
= SVEMaxBytesPerVector
/ 2;
2898 case AArch64::LD1B_S_IMM
:
2899 case AArch64::LD1SB_S_IMM
:
2900 case AArch64::LD1H_D_IMM
:
2901 case AArch64::LD1SH_D_IMM
:
2902 case AArch64::ST1B_S_IMM
:
2903 case AArch64::ST1H_D_IMM
:
2904 // A quarter vector worth of data
2905 // Width = mbytes * elements
2906 Scale
= TypeSize::Scalable(4);
2907 Width
= SVEMaxBytesPerVector
/ 4;
2911 case AArch64::LD1B_D_IMM
:
2912 case AArch64::LD1SB_D_IMM
:
2913 case AArch64::ST1B_D_IMM
:
2914 // A eighth vector worth of data
2915 // Width = mbytes * elements
2916 Scale
= TypeSize::Scalable(2);
2917 Width
= SVEMaxBytesPerVector
/ 8;
2921 case AArch64::ST2GOffset
:
2922 case AArch64::STZ2GOffset
:
2923 Scale
= TypeSize::Fixed(16);
2928 case AArch64::STGPi
:
2929 Scale
= TypeSize::Fixed(16);
2934 case AArch64::LD1RB_IMM
:
2935 case AArch64::LD1RB_H_IMM
:
2936 case AArch64::LD1RB_S_IMM
:
2937 case AArch64::LD1RB_D_IMM
:
2938 case AArch64::LD1RSB_H_IMM
:
2939 case AArch64::LD1RSB_S_IMM
:
2940 case AArch64::LD1RSB_D_IMM
:
2941 Scale
= TypeSize::Fixed(1);
2946 case AArch64::LD1RH_IMM
:
2947 case AArch64::LD1RH_S_IMM
:
2948 case AArch64::LD1RH_D_IMM
:
2949 case AArch64::LD1RSH_S_IMM
:
2950 case AArch64::LD1RSH_D_IMM
:
2951 Scale
= TypeSize::Fixed(2);
2956 case AArch64::LD1RW_IMM
:
2957 case AArch64::LD1RW_D_IMM
:
2958 case AArch64::LD1RSW_IMM
:
2959 Scale
= TypeSize::Fixed(4);
2964 case AArch64::LD1RD_IMM
:
2965 Scale
= TypeSize::Fixed(8);
2975 // Scaling factor for unscaled load or store.
2976 int AArch64InstrInfo::getMemScale(unsigned Opc
) {
2979 llvm_unreachable("Opcode has unknown scale!");
2980 case AArch64::LDRBBui
:
2981 case AArch64::LDURBBi
:
2982 case AArch64::LDRSBWui
:
2983 case AArch64::LDURSBWi
:
2984 case AArch64::STRBBui
:
2985 case AArch64::STURBBi
:
2987 case AArch64::LDRHHui
:
2988 case AArch64::LDURHHi
:
2989 case AArch64::LDRSHWui
:
2990 case AArch64::LDURSHWi
:
2991 case AArch64::STRHHui
:
2992 case AArch64::STURHHi
:
2994 case AArch64::LDRSui
:
2995 case AArch64::LDURSi
:
2996 case AArch64::LDRSpre
:
2997 case AArch64::LDRSWui
:
2998 case AArch64::LDURSWi
:
2999 case AArch64::LDRWpre
:
3000 case AArch64::LDRWui
:
3001 case AArch64::LDURWi
:
3002 case AArch64::STRSui
:
3003 case AArch64::STURSi
:
3004 case AArch64::STRSpre
:
3005 case AArch64::STRWui
:
3006 case AArch64::STURWi
:
3007 case AArch64::STRWpre
:
3008 case AArch64::LDPSi
:
3009 case AArch64::LDPSWi
:
3010 case AArch64::LDPWi
:
3011 case AArch64::STPSi
:
3012 case AArch64::STPWi
:
3014 case AArch64::LDRDui
:
3015 case AArch64::LDURDi
:
3016 case AArch64::LDRDpre
:
3017 case AArch64::LDRXui
:
3018 case AArch64::LDURXi
:
3019 case AArch64::LDRXpre
:
3020 case AArch64::STRDui
:
3021 case AArch64::STURDi
:
3022 case AArch64::STRDpre
:
3023 case AArch64::STRXui
:
3024 case AArch64::STURXi
:
3025 case AArch64::STRXpre
:
3026 case AArch64::LDPDi
:
3027 case AArch64::LDPXi
:
3028 case AArch64::STPDi
:
3029 case AArch64::STPXi
:
3031 case AArch64::LDRQui
:
3032 case AArch64::LDURQi
:
3033 case AArch64::STRQui
:
3034 case AArch64::STURQi
:
3035 case AArch64::STRQpre
:
3036 case AArch64::LDPQi
:
3037 case AArch64::LDRQpre
:
3038 case AArch64::STPQi
:
3039 case AArch64::STGOffset
:
3040 case AArch64::STZGOffset
:
3041 case AArch64::ST2GOffset
:
3042 case AArch64::STZ2GOffset
:
3043 case AArch64::STGPi
:
3048 bool AArch64InstrInfo::isPreLd(const MachineInstr
&MI
) {
3049 switch (MI
.getOpcode()) {
3052 case AArch64::LDRWpre
:
3053 case AArch64::LDRXpre
:
3054 case AArch64::LDRSpre
:
3055 case AArch64::LDRDpre
:
3056 case AArch64::LDRQpre
:
3061 bool AArch64InstrInfo::isPreSt(const MachineInstr
&MI
) {
3062 switch (MI
.getOpcode()) {
3065 case AArch64::STRWpre
:
3066 case AArch64::STRXpre
:
3067 case AArch64::STRSpre
:
3068 case AArch64::STRDpre
:
3069 case AArch64::STRQpre
:
3074 bool AArch64InstrInfo::isPreLdSt(const MachineInstr
&MI
) {
3075 return isPreLd(MI
) || isPreSt(MI
);
3078 // Scale the unscaled offsets. Returns false if the unscaled offset can't be
3080 static bool scaleOffset(unsigned Opc
, int64_t &Offset
) {
3081 int Scale
= AArch64InstrInfo::getMemScale(Opc
);
3083 // If the byte-offset isn't a multiple of the stride, we can't scale this
3085 if (Offset
% Scale
!= 0)
3088 // Convert the byte-offset used by unscaled into an "element" offset used
3089 // by the scaled pair load/store instructions.
3094 static bool canPairLdStOpc(unsigned FirstOpc
, unsigned SecondOpc
) {
3095 if (FirstOpc
== SecondOpc
)
3097 // We can also pair sign-ext and zero-ext instructions.
3101 case AArch64::LDRWui
:
3102 case AArch64::LDURWi
:
3103 return SecondOpc
== AArch64::LDRSWui
|| SecondOpc
== AArch64::LDURSWi
;
3104 case AArch64::LDRSWui
:
3105 case AArch64::LDURSWi
:
3106 return SecondOpc
== AArch64::LDRWui
|| SecondOpc
== AArch64::LDURWi
;
3108 // These instructions can't be paired based on their opcodes.
3112 static bool shouldClusterFI(const MachineFrameInfo
&MFI
, int FI1
,
3113 int64_t Offset1
, unsigned Opcode1
, int FI2
,
3114 int64_t Offset2
, unsigned Opcode2
) {
3115 // Accesses through fixed stack object frame indices may access a different
3116 // fixed stack slot. Check that the object offsets + offsets match.
3117 if (MFI
.isFixedObjectIndex(FI1
) && MFI
.isFixedObjectIndex(FI2
)) {
3118 int64_t ObjectOffset1
= MFI
.getObjectOffset(FI1
);
3119 int64_t ObjectOffset2
= MFI
.getObjectOffset(FI2
);
3120 assert(ObjectOffset1
<= ObjectOffset2
&& "Object offsets are not ordered.");
3121 // Convert to scaled object offsets.
3122 int Scale1
= AArch64InstrInfo::getMemScale(Opcode1
);
3123 if (ObjectOffset1
% Scale1
!= 0)
3125 ObjectOffset1
/= Scale1
;
3126 int Scale2
= AArch64InstrInfo::getMemScale(Opcode2
);
3127 if (ObjectOffset2
% Scale2
!= 0)
3129 ObjectOffset2
/= Scale2
;
3130 ObjectOffset1
+= Offset1
;
3131 ObjectOffset2
+= Offset2
;
3132 return ObjectOffset1
+ 1 == ObjectOffset2
;
3138 /// Detect opportunities for ldp/stp formation.
3140 /// Only called for LdSt for which getMemOperandWithOffset returns true.
3141 bool AArch64InstrInfo::shouldClusterMemOps(
3142 ArrayRef
<const MachineOperand
*> BaseOps1
,
3143 ArrayRef
<const MachineOperand
*> BaseOps2
, unsigned NumLoads
,
3144 unsigned NumBytes
) const {
3145 assert(BaseOps1
.size() == 1 && BaseOps2
.size() == 1);
3146 const MachineOperand
&BaseOp1
= *BaseOps1
.front();
3147 const MachineOperand
&BaseOp2
= *BaseOps2
.front();
3148 const MachineInstr
&FirstLdSt
= *BaseOp1
.getParent();
3149 const MachineInstr
&SecondLdSt
= *BaseOp2
.getParent();
3150 if (BaseOp1
.getType() != BaseOp2
.getType())
3153 assert((BaseOp1
.isReg() || BaseOp1
.isFI()) &&
3154 "Only base registers and frame indices are supported.");
3156 // Check for both base regs and base FI.
3157 if (BaseOp1
.isReg() && BaseOp1
.getReg() != BaseOp2
.getReg())
3160 // Only cluster up to a single pair.
3164 if (!isPairableLdStInst(FirstLdSt
) || !isPairableLdStInst(SecondLdSt
))
3167 // Can we pair these instructions based on their opcodes?
3168 unsigned FirstOpc
= FirstLdSt
.getOpcode();
3169 unsigned SecondOpc
= SecondLdSt
.getOpcode();
3170 if (!canPairLdStOpc(FirstOpc
, SecondOpc
))
3173 // Can't merge volatiles or load/stores that have a hint to avoid pair
3174 // formation, for example.
3175 if (!isCandidateToMergeOrPair(FirstLdSt
) ||
3176 !isCandidateToMergeOrPair(SecondLdSt
))
3179 // isCandidateToMergeOrPair guarantees that operand 2 is an immediate.
3180 int64_t Offset1
= FirstLdSt
.getOperand(2).getImm();
3181 if (hasUnscaledLdStOffset(FirstOpc
) && !scaleOffset(FirstOpc
, Offset1
))
3184 int64_t Offset2
= SecondLdSt
.getOperand(2).getImm();
3185 if (hasUnscaledLdStOffset(SecondOpc
) && !scaleOffset(SecondOpc
, Offset2
))
3188 // Pairwise instructions have a 7-bit signed offset field.
3189 if (Offset1
> 63 || Offset1
< -64)
3192 // The caller should already have ordered First/SecondLdSt by offset.
3193 // Note: except for non-equal frame index bases
3194 if (BaseOp1
.isFI()) {
3195 assert((!BaseOp1
.isIdenticalTo(BaseOp2
) || Offset1
<= Offset2
) &&
3196 "Caller should have ordered offsets.");
3198 const MachineFrameInfo
&MFI
=
3199 FirstLdSt
.getParent()->getParent()->getFrameInfo();
3200 return shouldClusterFI(MFI
, BaseOp1
.getIndex(), Offset1
, FirstOpc
,
3201 BaseOp2
.getIndex(), Offset2
, SecondOpc
);
3204 assert(Offset1
<= Offset2
&& "Caller should have ordered offsets.");
3206 return Offset1
+ 1 == Offset2
;
3209 static const MachineInstrBuilder
&AddSubReg(const MachineInstrBuilder
&MIB
,
3210 unsigned Reg
, unsigned SubIdx
,
3212 const TargetRegisterInfo
*TRI
) {
3214 return MIB
.addReg(Reg
, State
);
3216 if (Register::isPhysicalRegister(Reg
))
3217 return MIB
.addReg(TRI
->getSubReg(Reg
, SubIdx
), State
);
3218 return MIB
.addReg(Reg
, State
, SubIdx
);
3221 static bool forwardCopyWillClobberTuple(unsigned DestReg
, unsigned SrcReg
,
3223 // We really want the positive remainder mod 32 here, that happens to be
3224 // easily obtainable with a mask.
3225 return ((DestReg
- SrcReg
) & 0x1f) < NumRegs
;
3228 void AArch64InstrInfo::copyPhysRegTuple(MachineBasicBlock
&MBB
,
3229 MachineBasicBlock::iterator I
,
3230 const DebugLoc
&DL
, MCRegister DestReg
,
3231 MCRegister SrcReg
, bool KillSrc
,
3233 ArrayRef
<unsigned> Indices
) const {
3234 assert(Subtarget
.hasNEON() && "Unexpected register copy without NEON");
3235 const TargetRegisterInfo
*TRI
= &getRegisterInfo();
3236 uint16_t DestEncoding
= TRI
->getEncodingValue(DestReg
);
3237 uint16_t SrcEncoding
= TRI
->getEncodingValue(SrcReg
);
3238 unsigned NumRegs
= Indices
.size();
3240 int SubReg
= 0, End
= NumRegs
, Incr
= 1;
3241 if (forwardCopyWillClobberTuple(DestEncoding
, SrcEncoding
, NumRegs
)) {
3242 SubReg
= NumRegs
- 1;
3247 for (; SubReg
!= End
; SubReg
+= Incr
) {
3248 const MachineInstrBuilder MIB
= BuildMI(MBB
, I
, DL
, get(Opcode
));
3249 AddSubReg(MIB
, DestReg
, Indices
[SubReg
], RegState::Define
, TRI
);
3250 AddSubReg(MIB
, SrcReg
, Indices
[SubReg
], 0, TRI
);
3251 AddSubReg(MIB
, SrcReg
, Indices
[SubReg
], getKillRegState(KillSrc
), TRI
);
3255 void AArch64InstrInfo::copyGPRRegTuple(MachineBasicBlock
&MBB
,
3256 MachineBasicBlock::iterator I
,
3257 DebugLoc DL
, unsigned DestReg
,
3258 unsigned SrcReg
, bool KillSrc
,
3259 unsigned Opcode
, unsigned ZeroReg
,
3260 llvm::ArrayRef
<unsigned> Indices
) const {
3261 const TargetRegisterInfo
*TRI
= &getRegisterInfo();
3262 unsigned NumRegs
= Indices
.size();
3265 uint16_t DestEncoding
= TRI
->getEncodingValue(DestReg
);
3266 uint16_t SrcEncoding
= TRI
->getEncodingValue(SrcReg
);
3267 assert(DestEncoding
% NumRegs
== 0 && SrcEncoding
% NumRegs
== 0 &&
3268 "GPR reg sequences should not be able to overlap");
3271 for (unsigned SubReg
= 0; SubReg
!= NumRegs
; ++SubReg
) {
3272 const MachineInstrBuilder MIB
= BuildMI(MBB
, I
, DL
, get(Opcode
));
3273 AddSubReg(MIB
, DestReg
, Indices
[SubReg
], RegState::Define
, TRI
);
3274 MIB
.addReg(ZeroReg
);
3275 AddSubReg(MIB
, SrcReg
, Indices
[SubReg
], getKillRegState(KillSrc
), TRI
);
3280 void AArch64InstrInfo::copyPhysReg(MachineBasicBlock
&MBB
,
3281 MachineBasicBlock::iterator I
,
3282 const DebugLoc
&DL
, MCRegister DestReg
,
3283 MCRegister SrcReg
, bool KillSrc
) const {
3284 if (AArch64::GPR32spRegClass
.contains(DestReg
) &&
3285 (AArch64::GPR32spRegClass
.contains(SrcReg
) || SrcReg
== AArch64::WZR
)) {
3286 const TargetRegisterInfo
*TRI
= &getRegisterInfo();
3288 if (DestReg
== AArch64::WSP
|| SrcReg
== AArch64::WSP
) {
3289 // If either operand is WSP, expand to ADD #0.
3290 if (Subtarget
.hasZeroCycleRegMove()) {
3291 // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move.
3292 MCRegister DestRegX
= TRI
->getMatchingSuperReg(
3293 DestReg
, AArch64::sub_32
, &AArch64::GPR64spRegClass
);
3294 MCRegister SrcRegX
= TRI
->getMatchingSuperReg(
3295 SrcReg
, AArch64::sub_32
, &AArch64::GPR64spRegClass
);
3296 // This instruction is reading and writing X registers. This may upset
3297 // the register scavenger and machine verifier, so we need to indicate
3298 // that we are reading an undefined value from SrcRegX, but a proper
3299 // value from SrcReg.
3300 BuildMI(MBB
, I
, DL
, get(AArch64::ADDXri
), DestRegX
)
3301 .addReg(SrcRegX
, RegState::Undef
)
3303 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL
, 0))
3304 .addReg(SrcReg
, RegState::Implicit
| getKillRegState(KillSrc
));
3306 BuildMI(MBB
, I
, DL
, get(AArch64::ADDWri
), DestReg
)
3307 .addReg(SrcReg
, getKillRegState(KillSrc
))
3309 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL
, 0));
3311 } else if (SrcReg
== AArch64::WZR
&& Subtarget
.hasZeroCycleZeroingGP()) {
3312 BuildMI(MBB
, I
, DL
, get(AArch64::MOVZWi
), DestReg
)
3314 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL
, 0));
3316 if (Subtarget
.hasZeroCycleRegMove()) {
3317 // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
3318 MCRegister DestRegX
= TRI
->getMatchingSuperReg(
3319 DestReg
, AArch64::sub_32
, &AArch64::GPR64spRegClass
);
3320 MCRegister SrcRegX
= TRI
->getMatchingSuperReg(
3321 SrcReg
, AArch64::sub_32
, &AArch64::GPR64spRegClass
);
3322 // This instruction is reading and writing X registers. This may upset
3323 // the register scavenger and machine verifier, so we need to indicate
3324 // that we are reading an undefined value from SrcRegX, but a proper
3325 // value from SrcReg.
3326 BuildMI(MBB
, I
, DL
, get(AArch64::ORRXrr
), DestRegX
)
3327 .addReg(AArch64::XZR
)
3328 .addReg(SrcRegX
, RegState::Undef
)
3329 .addReg(SrcReg
, RegState::Implicit
| getKillRegState(KillSrc
));
3331 // Otherwise, expand to ORR WZR.
3332 BuildMI(MBB
, I
, DL
, get(AArch64::ORRWrr
), DestReg
)
3333 .addReg(AArch64::WZR
)
3334 .addReg(SrcReg
, getKillRegState(KillSrc
));
3340 // Copy a Predicate register by ORRing with itself.
3341 if (AArch64::PPRRegClass
.contains(DestReg
) &&
3342 AArch64::PPRRegClass
.contains(SrcReg
)) {
3343 assert(Subtarget
.hasSVE() && "Unexpected SVE register.");
3344 BuildMI(MBB
, I
, DL
, get(AArch64::ORR_PPzPP
), DestReg
)
3345 .addReg(SrcReg
) // Pg
3347 .addReg(SrcReg
, getKillRegState(KillSrc
));
3351 // Copy a Z register by ORRing with itself.
3352 if (AArch64::ZPRRegClass
.contains(DestReg
) &&
3353 AArch64::ZPRRegClass
.contains(SrcReg
)) {
3354 assert(Subtarget
.hasSVE() && "Unexpected SVE register.");
3355 BuildMI(MBB
, I
, DL
, get(AArch64::ORR_ZZZ
), DestReg
)
3357 .addReg(SrcReg
, getKillRegState(KillSrc
));
3361 // Copy a Z register pair by copying the individual sub-registers.
3362 if (AArch64::ZPR2RegClass
.contains(DestReg
) &&
3363 AArch64::ZPR2RegClass
.contains(SrcReg
)) {
3364 static const unsigned Indices
[] = {AArch64::zsub0
, AArch64::zsub1
};
3365 copyPhysRegTuple(MBB
, I
, DL
, DestReg
, SrcReg
, KillSrc
, AArch64::ORR_ZZZ
,
3370 // Copy a Z register triple by copying the individual sub-registers.
3371 if (AArch64::ZPR3RegClass
.contains(DestReg
) &&
3372 AArch64::ZPR3RegClass
.contains(SrcReg
)) {
3373 static const unsigned Indices
[] = {AArch64::zsub0
, AArch64::zsub1
,
3375 copyPhysRegTuple(MBB
, I
, DL
, DestReg
, SrcReg
, KillSrc
, AArch64::ORR_ZZZ
,
3380 // Copy a Z register quad by copying the individual sub-registers.
3381 if (AArch64::ZPR4RegClass
.contains(DestReg
) &&
3382 AArch64::ZPR4RegClass
.contains(SrcReg
)) {
3383 static const unsigned Indices
[] = {AArch64::zsub0
, AArch64::zsub1
,
3384 AArch64::zsub2
, AArch64::zsub3
};
3385 copyPhysRegTuple(MBB
, I
, DL
, DestReg
, SrcReg
, KillSrc
, AArch64::ORR_ZZZ
,
3390 if (AArch64::GPR64spRegClass
.contains(DestReg
) &&
3391 (AArch64::GPR64spRegClass
.contains(SrcReg
) || SrcReg
== AArch64::XZR
)) {
3392 if (DestReg
== AArch64::SP
|| SrcReg
== AArch64::SP
) {
3393 // If either operand is SP, expand to ADD #0.
3394 BuildMI(MBB
, I
, DL
, get(AArch64::ADDXri
), DestReg
)
3395 .addReg(SrcReg
, getKillRegState(KillSrc
))
3397 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL
, 0));
3398 } else if (SrcReg
== AArch64::XZR
&& Subtarget
.hasZeroCycleZeroingGP()) {
3399 BuildMI(MBB
, I
, DL
, get(AArch64::MOVZXi
), DestReg
)
3401 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL
, 0));
3403 // Otherwise, expand to ORR XZR.
3404 BuildMI(MBB
, I
, DL
, get(AArch64::ORRXrr
), DestReg
)
3405 .addReg(AArch64::XZR
)
3406 .addReg(SrcReg
, getKillRegState(KillSrc
));
3411 // Copy a DDDD register quad by copying the individual sub-registers.
3412 if (AArch64::DDDDRegClass
.contains(DestReg
) &&
3413 AArch64::DDDDRegClass
.contains(SrcReg
)) {
3414 static const unsigned Indices
[] = {AArch64::dsub0
, AArch64::dsub1
,
3415 AArch64::dsub2
, AArch64::dsub3
};
3416 copyPhysRegTuple(MBB
, I
, DL
, DestReg
, SrcReg
, KillSrc
, AArch64::ORRv8i8
,
3421 // Copy a DDD register triple by copying the individual sub-registers.
3422 if (AArch64::DDDRegClass
.contains(DestReg
) &&
3423 AArch64::DDDRegClass
.contains(SrcReg
)) {
3424 static const unsigned Indices
[] = {AArch64::dsub0
, AArch64::dsub1
,
3426 copyPhysRegTuple(MBB
, I
, DL
, DestReg
, SrcReg
, KillSrc
, AArch64::ORRv8i8
,
3431 // Copy a DD register pair by copying the individual sub-registers.
3432 if (AArch64::DDRegClass
.contains(DestReg
) &&
3433 AArch64::DDRegClass
.contains(SrcReg
)) {
3434 static const unsigned Indices
[] = {AArch64::dsub0
, AArch64::dsub1
};
3435 copyPhysRegTuple(MBB
, I
, DL
, DestReg
, SrcReg
, KillSrc
, AArch64::ORRv8i8
,
3440 // Copy a QQQQ register quad by copying the individual sub-registers.
3441 if (AArch64::QQQQRegClass
.contains(DestReg
) &&
3442 AArch64::QQQQRegClass
.contains(SrcReg
)) {
3443 static const unsigned Indices
[] = {AArch64::qsub0
, AArch64::qsub1
,
3444 AArch64::qsub2
, AArch64::qsub3
};
3445 copyPhysRegTuple(MBB
, I
, DL
, DestReg
, SrcReg
, KillSrc
, AArch64::ORRv16i8
,
3450 // Copy a QQQ register triple by copying the individual sub-registers.
3451 if (AArch64::QQQRegClass
.contains(DestReg
) &&
3452 AArch64::QQQRegClass
.contains(SrcReg
)) {
3453 static const unsigned Indices
[] = {AArch64::qsub0
, AArch64::qsub1
,
3455 copyPhysRegTuple(MBB
, I
, DL
, DestReg
, SrcReg
, KillSrc
, AArch64::ORRv16i8
,
3460 // Copy a QQ register pair by copying the individual sub-registers.
3461 if (AArch64::QQRegClass
.contains(DestReg
) &&
3462 AArch64::QQRegClass
.contains(SrcReg
)) {
3463 static const unsigned Indices
[] = {AArch64::qsub0
, AArch64::qsub1
};
3464 copyPhysRegTuple(MBB
, I
, DL
, DestReg
, SrcReg
, KillSrc
, AArch64::ORRv16i8
,
3469 if (AArch64::XSeqPairsClassRegClass
.contains(DestReg
) &&
3470 AArch64::XSeqPairsClassRegClass
.contains(SrcReg
)) {
3471 static const unsigned Indices
[] = {AArch64::sube64
, AArch64::subo64
};
3472 copyGPRRegTuple(MBB
, I
, DL
, DestReg
, SrcReg
, KillSrc
, AArch64::ORRXrs
,
3473 AArch64::XZR
, Indices
);
3477 if (AArch64::WSeqPairsClassRegClass
.contains(DestReg
) &&
3478 AArch64::WSeqPairsClassRegClass
.contains(SrcReg
)) {
3479 static const unsigned Indices
[] = {AArch64::sube32
, AArch64::subo32
};
3480 copyGPRRegTuple(MBB
, I
, DL
, DestReg
, SrcReg
, KillSrc
, AArch64::ORRWrs
,
3481 AArch64::WZR
, Indices
);
3485 if (AArch64::FPR128RegClass
.contains(DestReg
) &&
3486 AArch64::FPR128RegClass
.contains(SrcReg
)) {
3487 if (Subtarget
.hasNEON()) {
3488 BuildMI(MBB
, I
, DL
, get(AArch64::ORRv16i8
), DestReg
)
3490 .addReg(SrcReg
, getKillRegState(KillSrc
));
3492 BuildMI(MBB
, I
, DL
, get(AArch64::STRQpre
))
3493 .addReg(AArch64::SP
, RegState::Define
)
3494 .addReg(SrcReg
, getKillRegState(KillSrc
))
3495 .addReg(AArch64::SP
)
3497 BuildMI(MBB
, I
, DL
, get(AArch64::LDRQpre
))
3498 .addReg(AArch64::SP
, RegState::Define
)
3499 .addReg(DestReg
, RegState::Define
)
3500 .addReg(AArch64::SP
)
3506 if (AArch64::FPR64RegClass
.contains(DestReg
) &&
3507 AArch64::FPR64RegClass
.contains(SrcReg
)) {
3508 BuildMI(MBB
, I
, DL
, get(AArch64::FMOVDr
), DestReg
)
3509 .addReg(SrcReg
, getKillRegState(KillSrc
));
3513 if (AArch64::FPR32RegClass
.contains(DestReg
) &&
3514 AArch64::FPR32RegClass
.contains(SrcReg
)) {
3515 BuildMI(MBB
, I
, DL
, get(AArch64::FMOVSr
), DestReg
)
3516 .addReg(SrcReg
, getKillRegState(KillSrc
));
3520 if (AArch64::FPR16RegClass
.contains(DestReg
) &&
3521 AArch64::FPR16RegClass
.contains(SrcReg
)) {
3523 RI
.getMatchingSuperReg(DestReg
, AArch64::hsub
, &AArch64::FPR32RegClass
);
3525 RI
.getMatchingSuperReg(SrcReg
, AArch64::hsub
, &AArch64::FPR32RegClass
);
3526 BuildMI(MBB
, I
, DL
, get(AArch64::FMOVSr
), DestReg
)
3527 .addReg(SrcReg
, getKillRegState(KillSrc
));
3531 if (AArch64::FPR8RegClass
.contains(DestReg
) &&
3532 AArch64::FPR8RegClass
.contains(SrcReg
)) {
3534 RI
.getMatchingSuperReg(DestReg
, AArch64::bsub
, &AArch64::FPR32RegClass
);
3536 RI
.getMatchingSuperReg(SrcReg
, AArch64::bsub
, &AArch64::FPR32RegClass
);
3537 BuildMI(MBB
, I
, DL
, get(AArch64::FMOVSr
), DestReg
)
3538 .addReg(SrcReg
, getKillRegState(KillSrc
));
3542 // Copies between GPR64 and FPR64.
3543 if (AArch64::FPR64RegClass
.contains(DestReg
) &&
3544 AArch64::GPR64RegClass
.contains(SrcReg
)) {
3545 BuildMI(MBB
, I
, DL
, get(AArch64::FMOVXDr
), DestReg
)
3546 .addReg(SrcReg
, getKillRegState(KillSrc
));
3549 if (AArch64::GPR64RegClass
.contains(DestReg
) &&
3550 AArch64::FPR64RegClass
.contains(SrcReg
)) {
3551 BuildMI(MBB
, I
, DL
, get(AArch64::FMOVDXr
), DestReg
)
3552 .addReg(SrcReg
, getKillRegState(KillSrc
));
3555 // Copies between GPR32 and FPR32.
3556 if (AArch64::FPR32RegClass
.contains(DestReg
) &&
3557 AArch64::GPR32RegClass
.contains(SrcReg
)) {
3558 BuildMI(MBB
, I
, DL
, get(AArch64::FMOVWSr
), DestReg
)
3559 .addReg(SrcReg
, getKillRegState(KillSrc
));
3562 if (AArch64::GPR32RegClass
.contains(DestReg
) &&
3563 AArch64::FPR32RegClass
.contains(SrcReg
)) {
3564 BuildMI(MBB
, I
, DL
, get(AArch64::FMOVSWr
), DestReg
)
3565 .addReg(SrcReg
, getKillRegState(KillSrc
));
3569 if (DestReg
== AArch64::NZCV
) {
3570 assert(AArch64::GPR64RegClass
.contains(SrcReg
) && "Invalid NZCV copy");
3571 BuildMI(MBB
, I
, DL
, get(AArch64::MSR
))
3572 .addImm(AArch64SysReg::NZCV
)
3573 .addReg(SrcReg
, getKillRegState(KillSrc
))
3574 .addReg(AArch64::NZCV
, RegState::Implicit
| RegState::Define
);
3578 if (SrcReg
== AArch64::NZCV
) {
3579 assert(AArch64::GPR64RegClass
.contains(DestReg
) && "Invalid NZCV copy");
3580 BuildMI(MBB
, I
, DL
, get(AArch64::MRS
), DestReg
)
3581 .addImm(AArch64SysReg::NZCV
)
3582 .addReg(AArch64::NZCV
, RegState::Implicit
| getKillRegState(KillSrc
));
3587 const TargetRegisterInfo
&TRI
= getRegisterInfo();
3588 errs() << TRI
.getRegAsmName(DestReg
) << " = COPY "
3589 << TRI
.getRegAsmName(SrcReg
) << "\n";
3591 llvm_unreachable("unimplemented reg-to-reg copy");
3594 static void storeRegPairToStackSlot(const TargetRegisterInfo
&TRI
,
3595 MachineBasicBlock
&MBB
,
3596 MachineBasicBlock::iterator InsertBefore
,
3597 const MCInstrDesc
&MCID
,
3598 Register SrcReg
, bool IsKill
,
3599 unsigned SubIdx0
, unsigned SubIdx1
, int FI
,
3600 MachineMemOperand
*MMO
) {
3601 Register SrcReg0
= SrcReg
;
3602 Register SrcReg1
= SrcReg
;
3603 if (Register::isPhysicalRegister(SrcReg
)) {
3604 SrcReg0
= TRI
.getSubReg(SrcReg
, SubIdx0
);
3606 SrcReg1
= TRI
.getSubReg(SrcReg
, SubIdx1
);
3609 BuildMI(MBB
, InsertBefore
, DebugLoc(), MCID
)
3610 .addReg(SrcReg0
, getKillRegState(IsKill
), SubIdx0
)
3611 .addReg(SrcReg1
, getKillRegState(IsKill
), SubIdx1
)
3614 .addMemOperand(MMO
);
3617 void AArch64InstrInfo::storeRegToStackSlot(
3618 MachineBasicBlock
&MBB
, MachineBasicBlock::iterator MBBI
, Register SrcReg
,
3619 bool isKill
, int FI
, const TargetRegisterClass
*RC
,
3620 const TargetRegisterInfo
*TRI
) const {
3621 MachineFunction
&MF
= *MBB
.getParent();
3622 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
3624 MachinePointerInfo PtrInfo
= MachinePointerInfo::getFixedStack(MF
, FI
);
3625 MachineMemOperand
*MMO
=
3626 MF
.getMachineMemOperand(PtrInfo
, MachineMemOperand::MOStore
,
3627 MFI
.getObjectSize(FI
), MFI
.getObjectAlign(FI
));
3630 unsigned StackID
= TargetStackID::Default
;
3631 switch (TRI
->getSpillSize(*RC
)) {
3633 if (AArch64::FPR8RegClass
.hasSubClassEq(RC
))
3634 Opc
= AArch64::STRBui
;
3637 if (AArch64::FPR16RegClass
.hasSubClassEq(RC
))
3638 Opc
= AArch64::STRHui
;
3639 else if (AArch64::PPRRegClass
.hasSubClassEq(RC
)) {
3640 assert(Subtarget
.hasSVE() && "Unexpected register store without SVE");
3641 Opc
= AArch64::STR_PXI
;
3642 StackID
= TargetStackID::ScalableVector
;
3646 if (AArch64::GPR32allRegClass
.hasSubClassEq(RC
)) {
3647 Opc
= AArch64::STRWui
;
3648 if (Register::isVirtualRegister(SrcReg
))
3649 MF
.getRegInfo().constrainRegClass(SrcReg
, &AArch64::GPR32RegClass
);
3651 assert(SrcReg
!= AArch64::WSP
);
3652 } else if (AArch64::FPR32RegClass
.hasSubClassEq(RC
))
3653 Opc
= AArch64::STRSui
;
3656 if (AArch64::GPR64allRegClass
.hasSubClassEq(RC
)) {
3657 Opc
= AArch64::STRXui
;
3658 if (Register::isVirtualRegister(SrcReg
))
3659 MF
.getRegInfo().constrainRegClass(SrcReg
, &AArch64::GPR64RegClass
);
3661 assert(SrcReg
!= AArch64::SP
);
3662 } else if (AArch64::FPR64RegClass
.hasSubClassEq(RC
)) {
3663 Opc
= AArch64::STRDui
;
3664 } else if (AArch64::WSeqPairsClassRegClass
.hasSubClassEq(RC
)) {
3665 storeRegPairToStackSlot(getRegisterInfo(), MBB
, MBBI
,
3666 get(AArch64::STPWi
), SrcReg
, isKill
,
3667 AArch64::sube32
, AArch64::subo32
, FI
, MMO
);
3672 if (AArch64::FPR128RegClass
.hasSubClassEq(RC
))
3673 Opc
= AArch64::STRQui
;
3674 else if (AArch64::DDRegClass
.hasSubClassEq(RC
)) {
3675 assert(Subtarget
.hasNEON() && "Unexpected register store without NEON");
3676 Opc
= AArch64::ST1Twov1d
;
3678 } else if (AArch64::XSeqPairsClassRegClass
.hasSubClassEq(RC
)) {
3679 storeRegPairToStackSlot(getRegisterInfo(), MBB
, MBBI
,
3680 get(AArch64::STPXi
), SrcReg
, isKill
,
3681 AArch64::sube64
, AArch64::subo64
, FI
, MMO
);
3683 } else if (AArch64::ZPRRegClass
.hasSubClassEq(RC
)) {
3684 assert(Subtarget
.hasSVE() && "Unexpected register store without SVE");
3685 Opc
= AArch64::STR_ZXI
;
3686 StackID
= TargetStackID::ScalableVector
;
3690 if (AArch64::DDDRegClass
.hasSubClassEq(RC
)) {
3691 assert(Subtarget
.hasNEON() && "Unexpected register store without NEON");
3692 Opc
= AArch64::ST1Threev1d
;
3697 if (AArch64::DDDDRegClass
.hasSubClassEq(RC
)) {
3698 assert(Subtarget
.hasNEON() && "Unexpected register store without NEON");
3699 Opc
= AArch64::ST1Fourv1d
;
3701 } else if (AArch64::QQRegClass
.hasSubClassEq(RC
)) {
3702 assert(Subtarget
.hasNEON() && "Unexpected register store without NEON");
3703 Opc
= AArch64::ST1Twov2d
;
3705 } else if (AArch64::ZPR2RegClass
.hasSubClassEq(RC
)) {
3706 assert(Subtarget
.hasSVE() && "Unexpected register store without SVE");
3707 Opc
= AArch64::STR_ZZXI
;
3708 StackID
= TargetStackID::ScalableVector
;
3712 if (AArch64::QQQRegClass
.hasSubClassEq(RC
)) {
3713 assert(Subtarget
.hasNEON() && "Unexpected register store without NEON");
3714 Opc
= AArch64::ST1Threev2d
;
3716 } else if (AArch64::ZPR3RegClass
.hasSubClassEq(RC
)) {
3717 assert(Subtarget
.hasSVE() && "Unexpected register store without SVE");
3718 Opc
= AArch64::STR_ZZZXI
;
3719 StackID
= TargetStackID::ScalableVector
;
3723 if (AArch64::QQQQRegClass
.hasSubClassEq(RC
)) {
3724 assert(Subtarget
.hasNEON() && "Unexpected register store without NEON");
3725 Opc
= AArch64::ST1Fourv2d
;
3727 } else if (AArch64::ZPR4RegClass
.hasSubClassEq(RC
)) {
3728 assert(Subtarget
.hasSVE() && "Unexpected register store without SVE");
3729 Opc
= AArch64::STR_ZZZZXI
;
3730 StackID
= TargetStackID::ScalableVector
;
3734 assert(Opc
&& "Unknown register class");
3735 MFI
.setStackID(FI
, StackID
);
3737 const MachineInstrBuilder MI
= BuildMI(MBB
, MBBI
, DebugLoc(), get(Opc
))
3738 .addReg(SrcReg
, getKillRegState(isKill
))
3743 MI
.addMemOperand(MMO
);
3746 static void loadRegPairFromStackSlot(const TargetRegisterInfo
&TRI
,
3747 MachineBasicBlock
&MBB
,
3748 MachineBasicBlock::iterator InsertBefore
,
3749 const MCInstrDesc
&MCID
,
3750 Register DestReg
, unsigned SubIdx0
,
3751 unsigned SubIdx1
, int FI
,
3752 MachineMemOperand
*MMO
) {
3753 Register DestReg0
= DestReg
;
3754 Register DestReg1
= DestReg
;
3755 bool IsUndef
= true;
3756 if (Register::isPhysicalRegister(DestReg
)) {
3757 DestReg0
= TRI
.getSubReg(DestReg
, SubIdx0
);
3759 DestReg1
= TRI
.getSubReg(DestReg
, SubIdx1
);
3763 BuildMI(MBB
, InsertBefore
, DebugLoc(), MCID
)
3764 .addReg(DestReg0
, RegState::Define
| getUndefRegState(IsUndef
), SubIdx0
)
3765 .addReg(DestReg1
, RegState::Define
| getUndefRegState(IsUndef
), SubIdx1
)
3768 .addMemOperand(MMO
);
3771 void AArch64InstrInfo::loadRegFromStackSlot(
3772 MachineBasicBlock
&MBB
, MachineBasicBlock::iterator MBBI
, Register DestReg
,
3773 int FI
, const TargetRegisterClass
*RC
,
3774 const TargetRegisterInfo
*TRI
) const {
3775 MachineFunction
&MF
= *MBB
.getParent();
3776 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
3777 MachinePointerInfo PtrInfo
= MachinePointerInfo::getFixedStack(MF
, FI
);
3778 MachineMemOperand
*MMO
=
3779 MF
.getMachineMemOperand(PtrInfo
, MachineMemOperand::MOLoad
,
3780 MFI
.getObjectSize(FI
), MFI
.getObjectAlign(FI
));
3784 unsigned StackID
= TargetStackID::Default
;
3785 switch (TRI
->getSpillSize(*RC
)) {
3787 if (AArch64::FPR8RegClass
.hasSubClassEq(RC
))
3788 Opc
= AArch64::LDRBui
;
3791 if (AArch64::FPR16RegClass
.hasSubClassEq(RC
))
3792 Opc
= AArch64::LDRHui
;
3793 else if (AArch64::PPRRegClass
.hasSubClassEq(RC
)) {
3794 assert(Subtarget
.hasSVE() && "Unexpected register load without SVE");
3795 Opc
= AArch64::LDR_PXI
;
3796 StackID
= TargetStackID::ScalableVector
;
3800 if (AArch64::GPR32allRegClass
.hasSubClassEq(RC
)) {
3801 Opc
= AArch64::LDRWui
;
3802 if (Register::isVirtualRegister(DestReg
))
3803 MF
.getRegInfo().constrainRegClass(DestReg
, &AArch64::GPR32RegClass
);
3805 assert(DestReg
!= AArch64::WSP
);
3806 } else if (AArch64::FPR32RegClass
.hasSubClassEq(RC
))
3807 Opc
= AArch64::LDRSui
;
3810 if (AArch64::GPR64allRegClass
.hasSubClassEq(RC
)) {
3811 Opc
= AArch64::LDRXui
;
3812 if (Register::isVirtualRegister(DestReg
))
3813 MF
.getRegInfo().constrainRegClass(DestReg
, &AArch64::GPR64RegClass
);
3815 assert(DestReg
!= AArch64::SP
);
3816 } else if (AArch64::FPR64RegClass
.hasSubClassEq(RC
)) {
3817 Opc
= AArch64::LDRDui
;
3818 } else if (AArch64::WSeqPairsClassRegClass
.hasSubClassEq(RC
)) {
3819 loadRegPairFromStackSlot(getRegisterInfo(), MBB
, MBBI
,
3820 get(AArch64::LDPWi
), DestReg
, AArch64::sube32
,
3821 AArch64::subo32
, FI
, MMO
);
3826 if (AArch64::FPR128RegClass
.hasSubClassEq(RC
))
3827 Opc
= AArch64::LDRQui
;
3828 else if (AArch64::DDRegClass
.hasSubClassEq(RC
)) {
3829 assert(Subtarget
.hasNEON() && "Unexpected register load without NEON");
3830 Opc
= AArch64::LD1Twov1d
;
3832 } else if (AArch64::XSeqPairsClassRegClass
.hasSubClassEq(RC
)) {
3833 loadRegPairFromStackSlot(getRegisterInfo(), MBB
, MBBI
,
3834 get(AArch64::LDPXi
), DestReg
, AArch64::sube64
,
3835 AArch64::subo64
, FI
, MMO
);
3837 } else if (AArch64::ZPRRegClass
.hasSubClassEq(RC
)) {
3838 assert(Subtarget
.hasSVE() && "Unexpected register load without SVE");
3839 Opc
= AArch64::LDR_ZXI
;
3840 StackID
= TargetStackID::ScalableVector
;
3844 if (AArch64::DDDRegClass
.hasSubClassEq(RC
)) {
3845 assert(Subtarget
.hasNEON() && "Unexpected register load without NEON");
3846 Opc
= AArch64::LD1Threev1d
;
3851 if (AArch64::DDDDRegClass
.hasSubClassEq(RC
)) {
3852 assert(Subtarget
.hasNEON() && "Unexpected register load without NEON");
3853 Opc
= AArch64::LD1Fourv1d
;
3855 } else if (AArch64::QQRegClass
.hasSubClassEq(RC
)) {
3856 assert(Subtarget
.hasNEON() && "Unexpected register load without NEON");
3857 Opc
= AArch64::LD1Twov2d
;
3859 } else if (AArch64::ZPR2RegClass
.hasSubClassEq(RC
)) {
3860 assert(Subtarget
.hasSVE() && "Unexpected register load without SVE");
3861 Opc
= AArch64::LDR_ZZXI
;
3862 StackID
= TargetStackID::ScalableVector
;
3866 if (AArch64::QQQRegClass
.hasSubClassEq(RC
)) {
3867 assert(Subtarget
.hasNEON() && "Unexpected register load without NEON");
3868 Opc
= AArch64::LD1Threev2d
;
3870 } else if (AArch64::ZPR3RegClass
.hasSubClassEq(RC
)) {
3871 assert(Subtarget
.hasSVE() && "Unexpected register load without SVE");
3872 Opc
= AArch64::LDR_ZZZXI
;
3873 StackID
= TargetStackID::ScalableVector
;
3877 if (AArch64::QQQQRegClass
.hasSubClassEq(RC
)) {
3878 assert(Subtarget
.hasNEON() && "Unexpected register load without NEON");
3879 Opc
= AArch64::LD1Fourv2d
;
3881 } else if (AArch64::ZPR4RegClass
.hasSubClassEq(RC
)) {
3882 assert(Subtarget
.hasSVE() && "Unexpected register load without SVE");
3883 Opc
= AArch64::LDR_ZZZZXI
;
3884 StackID
= TargetStackID::ScalableVector
;
3889 assert(Opc
&& "Unknown register class");
3890 MFI
.setStackID(FI
, StackID
);
3892 const MachineInstrBuilder MI
= BuildMI(MBB
, MBBI
, DebugLoc(), get(Opc
))
3893 .addReg(DestReg
, getDefRegState(true))
3897 MI
.addMemOperand(MMO
);
3900 bool llvm::isNZCVTouchedInInstructionRange(const MachineInstr
&DefMI
,
3901 const MachineInstr
&UseMI
,
3902 const TargetRegisterInfo
*TRI
) {
3903 return any_of(instructionsWithoutDebug(std::next(DefMI
.getIterator()),
3904 UseMI
.getIterator()),
3905 [TRI
](const MachineInstr
&I
) {
3906 return I
.modifiesRegister(AArch64::NZCV
, TRI
) ||
3907 I
.readsRegister(AArch64::NZCV
, TRI
);
3911 void AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets(
3912 const StackOffset
&Offset
, int64_t &ByteSized
, int64_t &VGSized
) {
3913 // The smallest scalable element supported by scaled SVE addressing
3914 // modes are predicates, which are 2 scalable bytes in size. So the scalable
3915 // byte offset must always be a multiple of 2.
3916 assert(Offset
.getScalable() % 2 == 0 && "Invalid frame offset");
3918 // VGSized offsets are divided by '2', because the VG register is the
3919 // the number of 64bit granules as opposed to 128bit vector chunks,
3920 // which is how the 'n' in e.g. MVT::nxv1i8 is modelled.
3921 // So, for a stack offset of 16 MVT::nxv1i8's, the size is n x 16 bytes.
3922 // VG = n * 2 and the dwarf offset must be VG * 8 bytes.
3923 ByteSized
= Offset
.getFixed();
3924 VGSized
= Offset
.getScalable() / 2;
3927 /// Returns the offset in parts to which this frame offset can be
3928 /// decomposed for the purpose of describing a frame offset.
3929 /// For non-scalable offsets this is simply its byte size.
3930 void AArch64InstrInfo::decomposeStackOffsetForFrameOffsets(
3931 const StackOffset
&Offset
, int64_t &NumBytes
, int64_t &NumPredicateVectors
,
3932 int64_t &NumDataVectors
) {
3933 // The smallest scalable element supported by scaled SVE addressing
3934 // modes are predicates, which are 2 scalable bytes in size. So the scalable
3935 // byte offset must always be a multiple of 2.
3936 assert(Offset
.getScalable() % 2 == 0 && "Invalid frame offset");
3938 NumBytes
= Offset
.getFixed();
3940 NumPredicateVectors
= Offset
.getScalable() / 2;
3941 // This method is used to get the offsets to adjust the frame offset.
3942 // If the function requires ADDPL to be used and needs more than two ADDPL
3943 // instructions, part of the offset is folded into NumDataVectors so that it
3944 // uses ADDVL for part of it, reducing the number of ADDPL instructions.
3945 if (NumPredicateVectors
% 8 == 0 || NumPredicateVectors
< -64 ||
3946 NumPredicateVectors
> 62) {
3947 NumDataVectors
= NumPredicateVectors
/ 8;
3948 NumPredicateVectors
-= NumDataVectors
* 8;
3952 // Helper function to emit a frame offset adjustment from a given
3953 // pointer (SrcReg), stored into DestReg. This function is explicit
3954 // in that it requires the opcode.
3955 static void emitFrameOffsetAdj(MachineBasicBlock
&MBB
,
3956 MachineBasicBlock::iterator MBBI
,
3957 const DebugLoc
&DL
, unsigned DestReg
,
3958 unsigned SrcReg
, int64_t Offset
, unsigned Opc
,
3959 const TargetInstrInfo
*TII
,
3960 MachineInstr::MIFlag Flag
, bool NeedsWinCFI
,
3963 unsigned MaxEncoding
, ShiftSize
;
3965 case AArch64::ADDXri
:
3966 case AArch64::ADDSXri
:
3967 case AArch64::SUBXri
:
3968 case AArch64::SUBSXri
:
3969 MaxEncoding
= 0xfff;
3972 case AArch64::ADDVL_XXI
:
3973 case AArch64::ADDPL_XXI
:
3983 llvm_unreachable("Unsupported opcode");
3986 // FIXME: If the offset won't fit in 24-bits, compute the offset into a
3987 // scratch register. If DestReg is a virtual register, use it as the
3988 // scratch register; otherwise, create a new virtual register (to be
3989 // replaced by the scavenger at the end of PEI). That case can be optimized
3990 // slightly if DestReg is SP which is always 16-byte aligned, so the scratch
3991 // register can be loaded with offset%8 and the add/sub can use an extending
3992 // instruction with LSL#3.
3993 // Currently the function handles any offsets but generates a poor sequence
3995 // assert(Offset < (1 << 24) && "unimplemented reg plus immediate");
3997 const unsigned MaxEncodableValue
= MaxEncoding
<< ShiftSize
;
3998 Register TmpReg
= DestReg
;
3999 if (TmpReg
== AArch64::XZR
)
4000 TmpReg
= MBB
.getParent()->getRegInfo().createVirtualRegister(
4001 &AArch64::GPR64RegClass
);
4003 uint64_t ThisVal
= std::min
<uint64_t>(Offset
, MaxEncodableValue
);
4004 unsigned LocalShiftSize
= 0;
4005 if (ThisVal
> MaxEncoding
) {
4006 ThisVal
= ThisVal
>> ShiftSize
;
4007 LocalShiftSize
= ShiftSize
;
4009 assert((ThisVal
>> ShiftSize
) <= MaxEncoding
&&
4010 "Encoding cannot handle value that big");
4012 Offset
-= ThisVal
<< LocalShiftSize
;
4015 auto MBI
= BuildMI(MBB
, MBBI
, DL
, TII
->get(Opc
), TmpReg
)
4017 .addImm(Sign
* (int)ThisVal
);
4020 AArch64_AM::getShifterImm(AArch64_AM::LSL
, LocalShiftSize
));
4021 MBI
= MBI
.setMIFlag(Flag
);
4024 assert(Sign
== 1 && "SEH directives should always have a positive sign");
4025 int Imm
= (int)(ThisVal
<< LocalShiftSize
);
4026 if ((DestReg
== AArch64::FP
&& SrcReg
== AArch64::SP
) ||
4027 (SrcReg
== AArch64::FP
&& DestReg
== AArch64::SP
)) {
4031 BuildMI(MBB
, MBBI
, DL
, TII
->get(AArch64::SEH_SetFP
)).setMIFlag(Flag
);
4033 BuildMI(MBB
, MBBI
, DL
, TII
->get(AArch64::SEH_AddFP
))
4036 assert(Offset
== 0 && "Expected remaining offset to be zero to "
4037 "emit a single SEH directive");
4038 } else if (DestReg
== AArch64::SP
) {
4041 assert(SrcReg
== AArch64::SP
&& "Unexpected SrcReg for SEH_StackAlloc");
4042 BuildMI(MBB
, MBBI
, DL
, TII
->get(AArch64::SEH_StackAlloc
))
4054 void llvm::emitFrameOffset(MachineBasicBlock
&MBB
,
4055 MachineBasicBlock::iterator MBBI
, const DebugLoc
&DL
,
4056 unsigned DestReg
, unsigned SrcReg
,
4057 StackOffset Offset
, const TargetInstrInfo
*TII
,
4058 MachineInstr::MIFlag Flag
, bool SetNZCV
,
4059 bool NeedsWinCFI
, bool *HasWinCFI
) {
4060 int64_t Bytes
, NumPredicateVectors
, NumDataVectors
;
4061 AArch64InstrInfo::decomposeStackOffsetForFrameOffsets(
4062 Offset
, Bytes
, NumPredicateVectors
, NumDataVectors
);
4064 // First emit non-scalable frame offsets, or a simple 'mov'.
4065 if (Bytes
|| (!Offset
&& SrcReg
!= DestReg
)) {
4066 assert((DestReg
!= AArch64::SP
|| Bytes
% 8 == 0) &&
4067 "SP increment/decrement not 8-byte aligned");
4068 unsigned Opc
= SetNZCV
? AArch64::ADDSXri
: AArch64::ADDXri
;
4071 Opc
= SetNZCV
? AArch64::SUBSXri
: AArch64::SUBXri
;
4073 emitFrameOffsetAdj(MBB
, MBBI
, DL
, DestReg
, SrcReg
, Bytes
, Opc
, TII
, Flag
,
4074 NeedsWinCFI
, HasWinCFI
);
4078 assert(!(SetNZCV
&& (NumPredicateVectors
|| NumDataVectors
)) &&
4079 "SetNZCV not supported with SVE vectors");
4080 assert(!(NeedsWinCFI
&& (NumPredicateVectors
|| NumDataVectors
)) &&
4081 "WinCFI not supported with SVE vectors");
4083 if (NumDataVectors
) {
4084 emitFrameOffsetAdj(MBB
, MBBI
, DL
, DestReg
, SrcReg
, NumDataVectors
,
4085 AArch64::ADDVL_XXI
, TII
, Flag
, NeedsWinCFI
, nullptr);
4089 if (NumPredicateVectors
) {
4090 assert(DestReg
!= AArch64::SP
&& "Unaligned access to SP");
4091 emitFrameOffsetAdj(MBB
, MBBI
, DL
, DestReg
, SrcReg
, NumPredicateVectors
,
4092 AArch64::ADDPL_XXI
, TII
, Flag
, NeedsWinCFI
, nullptr);
4096 MachineInstr
*AArch64InstrInfo::foldMemoryOperandImpl(
4097 MachineFunction
&MF
, MachineInstr
&MI
, ArrayRef
<unsigned> Ops
,
4098 MachineBasicBlock::iterator InsertPt
, int FrameIndex
,
4099 LiveIntervals
*LIS
, VirtRegMap
*VRM
) const {
4100 // This is a bit of a hack. Consider this instruction:
4102 // %0 = COPY %sp; GPR64all:%0
4104 // We explicitly chose GPR64all for the virtual register so such a copy might
4105 // be eliminated by RegisterCoalescer. However, that may not be possible, and
4106 // %0 may even spill. We can't spill %sp, and since it is in the GPR64all
4107 // register class, TargetInstrInfo::foldMemoryOperand() is going to try.
4109 // To prevent that, we are going to constrain the %0 register class here.
4111 // <rdar://problem/11522048>
4113 if (MI
.isFullCopy()) {
4114 Register DstReg
= MI
.getOperand(0).getReg();
4115 Register SrcReg
= MI
.getOperand(1).getReg();
4116 if (SrcReg
== AArch64::SP
&& Register::isVirtualRegister(DstReg
)) {
4117 MF
.getRegInfo().constrainRegClass(DstReg
, &AArch64::GPR64RegClass
);
4120 if (DstReg
== AArch64::SP
&& Register::isVirtualRegister(SrcReg
)) {
4121 MF
.getRegInfo().constrainRegClass(SrcReg
, &AArch64::GPR64RegClass
);
4126 // Handle the case where a copy is being spilled or filled but the source
4127 // and destination register class don't match. For example:
4129 // %0 = COPY %xzr; GPR64common:%0
4131 // In this case we can still safely fold away the COPY and generate the
4132 // following spill code:
4134 // STRXui %xzr, %stack.0
4136 // This also eliminates spilled cross register class COPYs (e.g. between x and
4137 // d regs) of the same size. For example:
4139 // %0 = COPY %1; GPR64:%0, FPR64:%1
4141 // will be filled as
4143 // LDRDui %0, fi<#0>
4147 // LDRXui %Temp, fi<#0>
4150 if (MI
.isCopy() && Ops
.size() == 1 &&
4151 // Make sure we're only folding the explicit COPY defs/uses.
4152 (Ops
[0] == 0 || Ops
[0] == 1)) {
4153 bool IsSpill
= Ops
[0] == 0;
4154 bool IsFill
= !IsSpill
;
4155 const TargetRegisterInfo
&TRI
= *MF
.getSubtarget().getRegisterInfo();
4156 const MachineRegisterInfo
&MRI
= MF
.getRegInfo();
4157 MachineBasicBlock
&MBB
= *MI
.getParent();
4158 const MachineOperand
&DstMO
= MI
.getOperand(0);
4159 const MachineOperand
&SrcMO
= MI
.getOperand(1);
4160 Register DstReg
= DstMO
.getReg();
4161 Register SrcReg
= SrcMO
.getReg();
4162 // This is slightly expensive to compute for physical regs since
4163 // getMinimalPhysRegClass is slow.
4164 auto getRegClass
= [&](unsigned Reg
) {
4165 return Register::isVirtualRegister(Reg
) ? MRI
.getRegClass(Reg
)
4166 : TRI
.getMinimalPhysRegClass(Reg
);
4169 if (DstMO
.getSubReg() == 0 && SrcMO
.getSubReg() == 0) {
4170 assert(TRI
.getRegSizeInBits(*getRegClass(DstReg
)) ==
4171 TRI
.getRegSizeInBits(*getRegClass(SrcReg
)) &&
4172 "Mismatched register size in non subreg COPY");
4174 storeRegToStackSlot(MBB
, InsertPt
, SrcReg
, SrcMO
.isKill(), FrameIndex
,
4175 getRegClass(SrcReg
), &TRI
);
4177 loadRegFromStackSlot(MBB
, InsertPt
, DstReg
, FrameIndex
,
4178 getRegClass(DstReg
), &TRI
);
4179 return &*--InsertPt
;
4182 // Handle cases like spilling def of:
4184 // %0:sub_32<def,read-undef> = COPY %wzr; GPR64common:%0
4186 // where the physical register source can be widened and stored to the full
4187 // virtual reg destination stack slot, in this case producing:
4189 // STRXui %xzr, %stack.0
4191 if (IsSpill
&& DstMO
.isUndef() && Register::isPhysicalRegister(SrcReg
)) {
4192 assert(SrcMO
.getSubReg() == 0 &&
4193 "Unexpected subreg on physical register");
4194 const TargetRegisterClass
*SpillRC
;
4195 unsigned SpillSubreg
;
4196 switch (DstMO
.getSubReg()) {
4200 case AArch64::sub_32
:
4202 if (AArch64::GPR32RegClass
.contains(SrcReg
)) {
4203 SpillRC
= &AArch64::GPR64RegClass
;
4204 SpillSubreg
= AArch64::sub_32
;
4205 } else if (AArch64::FPR32RegClass
.contains(SrcReg
)) {
4206 SpillRC
= &AArch64::FPR64RegClass
;
4207 SpillSubreg
= AArch64::ssub
;
4212 if (AArch64::FPR64RegClass
.contains(SrcReg
)) {
4213 SpillRC
= &AArch64::FPR128RegClass
;
4214 SpillSubreg
= AArch64::dsub
;
4221 if (unsigned WidenedSrcReg
=
4222 TRI
.getMatchingSuperReg(SrcReg
, SpillSubreg
, SpillRC
)) {
4223 storeRegToStackSlot(MBB
, InsertPt
, WidenedSrcReg
, SrcMO
.isKill(),
4224 FrameIndex
, SpillRC
, &TRI
);
4225 return &*--InsertPt
;
4229 // Handle cases like filling use of:
4231 // %0:sub_32<def,read-undef> = COPY %1; GPR64:%0, GPR32:%1
4233 // where we can load the full virtual reg source stack slot, into the subreg
4234 // destination, in this case producing:
4236 // LDRWui %0:sub_32<def,read-undef>, %stack.0
4238 if (IsFill
&& SrcMO
.getSubReg() == 0 && DstMO
.isUndef()) {
4239 const TargetRegisterClass
*FillRC
;
4240 switch (DstMO
.getSubReg()) {
4244 case AArch64::sub_32
:
4245 FillRC
= &AArch64::GPR32RegClass
;
4248 FillRC
= &AArch64::FPR32RegClass
;
4251 FillRC
= &AArch64::FPR64RegClass
;
4256 assert(TRI
.getRegSizeInBits(*getRegClass(SrcReg
)) ==
4257 TRI
.getRegSizeInBits(*FillRC
) &&
4258 "Mismatched regclass size on folded subreg COPY");
4259 loadRegFromStackSlot(MBB
, InsertPt
, DstReg
, FrameIndex
, FillRC
, &TRI
);
4260 MachineInstr
&LoadMI
= *--InsertPt
;
4261 MachineOperand
&LoadDst
= LoadMI
.getOperand(0);
4262 assert(LoadDst
.getSubReg() == 0 && "unexpected subreg on fill load");
4263 LoadDst
.setSubReg(DstMO
.getSubReg());
4264 LoadDst
.setIsUndef();
4274 int llvm::isAArch64FrameOffsetLegal(const MachineInstr
&MI
,
4275 StackOffset
&SOffset
,
4276 bool *OutUseUnscaledOp
,
4277 unsigned *OutUnscaledOp
,
4278 int64_t *EmittableOffset
) {
4279 // Set output values in case of early exit.
4280 if (EmittableOffset
)
4281 *EmittableOffset
= 0;
4282 if (OutUseUnscaledOp
)
4283 *OutUseUnscaledOp
= false;
4287 // Exit early for structured vector spills/fills as they can't take an
4288 // immediate offset.
4289 switch (MI
.getOpcode()) {
4292 case AArch64::LD1Twov2d
:
4293 case AArch64::LD1Threev2d
:
4294 case AArch64::LD1Fourv2d
:
4295 case AArch64::LD1Twov1d
:
4296 case AArch64::LD1Threev1d
:
4297 case AArch64::LD1Fourv1d
:
4298 case AArch64::ST1Twov2d
:
4299 case AArch64::ST1Threev2d
:
4300 case AArch64::ST1Fourv2d
:
4301 case AArch64::ST1Twov1d
:
4302 case AArch64::ST1Threev1d
:
4303 case AArch64::ST1Fourv1d
:
4305 case AArch64::IRGstack
:
4306 case AArch64::STGloop
:
4307 case AArch64::STZGloop
:
4308 return AArch64FrameOffsetCannotUpdate
;
4311 // Get the min/max offset and the scale.
4312 TypeSize
ScaleValue(0U, false);
4314 int64_t MinOff
, MaxOff
;
4315 if (!AArch64InstrInfo::getMemOpInfo(MI
.getOpcode(), ScaleValue
, Width
, MinOff
,
4317 llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal");
4319 // Construct the complete offset.
4320 bool IsMulVL
= ScaleValue
.isScalable();
4321 unsigned Scale
= ScaleValue
.getKnownMinSize();
4322 int64_t Offset
= IsMulVL
? SOffset
.getScalable() : SOffset
.getFixed();
4324 const MachineOperand
&ImmOpnd
=
4325 MI
.getOperand(AArch64InstrInfo::getLoadStoreImmIdx(MI
.getOpcode()));
4326 Offset
+= ImmOpnd
.getImm() * Scale
;
4328 // If the offset doesn't match the scale, we rewrite the instruction to
4329 // use the unscaled instruction instead. Likewise, if we have a negative
4330 // offset and there is an unscaled op to use.
4331 Optional
<unsigned> UnscaledOp
=
4332 AArch64InstrInfo::getUnscaledLdSt(MI
.getOpcode());
4333 bool useUnscaledOp
= UnscaledOp
&& (Offset
% Scale
|| Offset
< 0);
4334 if (useUnscaledOp
&&
4335 !AArch64InstrInfo::getMemOpInfo(*UnscaledOp
, ScaleValue
, Width
, MinOff
,
4337 llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal");
4339 Scale
= ScaleValue
.getKnownMinSize();
4340 assert(IsMulVL
== ScaleValue
.isScalable() &&
4341 "Unscaled opcode has different value for scalable");
4343 int64_t Remainder
= Offset
% Scale
;
4344 assert(!(Remainder
&& useUnscaledOp
) &&
4345 "Cannot have remainder when using unscaled op");
4347 assert(MinOff
< MaxOff
&& "Unexpected Min/Max offsets");
4348 int64_t NewOffset
= Offset
/ Scale
;
4349 if (MinOff
<= NewOffset
&& NewOffset
<= MaxOff
)
4352 NewOffset
= NewOffset
< 0 ? MinOff
: MaxOff
;
4353 Offset
= Offset
- NewOffset
* Scale
+ Remainder
;
4356 if (EmittableOffset
)
4357 *EmittableOffset
= NewOffset
;
4358 if (OutUseUnscaledOp
)
4359 *OutUseUnscaledOp
= useUnscaledOp
;
4360 if (OutUnscaledOp
&& UnscaledOp
)
4361 *OutUnscaledOp
= *UnscaledOp
;
4364 SOffset
= StackOffset::get(SOffset
.getFixed(), Offset
);
4366 SOffset
= StackOffset::get(Offset
, SOffset
.getScalable());
4367 return AArch64FrameOffsetCanUpdate
|
4368 (SOffset
? 0 : AArch64FrameOffsetIsLegal
);
4371 bool llvm::rewriteAArch64FrameIndex(MachineInstr
&MI
, unsigned FrameRegIdx
,
4372 unsigned FrameReg
, StackOffset
&Offset
,
4373 const AArch64InstrInfo
*TII
) {
4374 unsigned Opcode
= MI
.getOpcode();
4375 unsigned ImmIdx
= FrameRegIdx
+ 1;
4377 if (Opcode
== AArch64::ADDSXri
|| Opcode
== AArch64::ADDXri
) {
4378 Offset
+= StackOffset::getFixed(MI
.getOperand(ImmIdx
).getImm());
4379 emitFrameOffset(*MI
.getParent(), MI
, MI
.getDebugLoc(),
4380 MI
.getOperand(0).getReg(), FrameReg
, Offset
, TII
,
4381 MachineInstr::NoFlags
, (Opcode
== AArch64::ADDSXri
));
4382 MI
.eraseFromParent();
4383 Offset
= StackOffset();
4388 unsigned UnscaledOp
;
4390 int Status
= isAArch64FrameOffsetLegal(MI
, Offset
, &UseUnscaledOp
,
4391 &UnscaledOp
, &NewOffset
);
4392 if (Status
& AArch64FrameOffsetCanUpdate
) {
4393 if (Status
& AArch64FrameOffsetIsLegal
)
4394 // Replace the FrameIndex with FrameReg.
4395 MI
.getOperand(FrameRegIdx
).ChangeToRegister(FrameReg
, false);
4397 MI
.setDesc(TII
->get(UnscaledOp
));
4399 MI
.getOperand(ImmIdx
).ChangeToImmediate(NewOffset
);
4406 MCInst
AArch64InstrInfo::getNop() const {
4407 return MCInstBuilder(AArch64::HINT
).addImm(0);
4410 // AArch64 supports MachineCombiner.
4411 bool AArch64InstrInfo::useMachineCombiner() const { return true; }
4413 // True when Opc sets flag
4414 static bool isCombineInstrSettingFlag(unsigned Opc
) {
4416 case AArch64::ADDSWrr
:
4417 case AArch64::ADDSWri
:
4418 case AArch64::ADDSXrr
:
4419 case AArch64::ADDSXri
:
4420 case AArch64::SUBSWrr
:
4421 case AArch64::SUBSXrr
:
4422 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
4423 case AArch64::SUBSWri
:
4424 case AArch64::SUBSXri
:
4432 // 32b Opcodes that can be combined with a MUL
4433 static bool isCombineInstrCandidate32(unsigned Opc
) {
4435 case AArch64::ADDWrr
:
4436 case AArch64::ADDWri
:
4437 case AArch64::SUBWrr
:
4438 case AArch64::ADDSWrr
:
4439 case AArch64::ADDSWri
:
4440 case AArch64::SUBSWrr
:
4441 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
4442 case AArch64::SUBWri
:
4443 case AArch64::SUBSWri
:
4451 // 64b Opcodes that can be combined with a MUL
4452 static bool isCombineInstrCandidate64(unsigned Opc
) {
4454 case AArch64::ADDXrr
:
4455 case AArch64::ADDXri
:
4456 case AArch64::SUBXrr
:
4457 case AArch64::ADDSXrr
:
4458 case AArch64::ADDSXri
:
4459 case AArch64::SUBSXrr
:
4460 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
4461 case AArch64::SUBXri
:
4462 case AArch64::SUBSXri
:
4463 case AArch64::ADDv8i8
:
4464 case AArch64::ADDv16i8
:
4465 case AArch64::ADDv4i16
:
4466 case AArch64::ADDv8i16
:
4467 case AArch64::ADDv2i32
:
4468 case AArch64::ADDv4i32
:
4469 case AArch64::SUBv8i8
:
4470 case AArch64::SUBv16i8
:
4471 case AArch64::SUBv4i16
:
4472 case AArch64::SUBv8i16
:
4473 case AArch64::SUBv2i32
:
4474 case AArch64::SUBv4i32
:
4482 // FP Opcodes that can be combined with a FMUL.
4483 static bool isCombineInstrCandidateFP(const MachineInstr
&Inst
) {
4484 switch (Inst
.getOpcode()) {
4487 case AArch64::FADDHrr
:
4488 case AArch64::FADDSrr
:
4489 case AArch64::FADDDrr
:
4490 case AArch64::FADDv4f16
:
4491 case AArch64::FADDv8f16
:
4492 case AArch64::FADDv2f32
:
4493 case AArch64::FADDv2f64
:
4494 case AArch64::FADDv4f32
:
4495 case AArch64::FSUBHrr
:
4496 case AArch64::FSUBSrr
:
4497 case AArch64::FSUBDrr
:
4498 case AArch64::FSUBv4f16
:
4499 case AArch64::FSUBv8f16
:
4500 case AArch64::FSUBv2f32
:
4501 case AArch64::FSUBv2f64
:
4502 case AArch64::FSUBv4f32
:
4503 TargetOptions Options
= Inst
.getParent()->getParent()->getTarget().Options
;
4504 // We can fuse FADD/FSUB with FMUL, if fusion is either allowed globally by
4505 // the target options or if FADD/FSUB has the contract fast-math flag.
4506 return Options
.UnsafeFPMath
||
4507 Options
.AllowFPOpFusion
== FPOpFusion::Fast
||
4508 Inst
.getFlag(MachineInstr::FmContract
);
4514 // Opcodes that can be combined with a MUL
4515 static bool isCombineInstrCandidate(unsigned Opc
) {
4516 return (isCombineInstrCandidate32(Opc
) || isCombineInstrCandidate64(Opc
));
4520 // Utility routine that checks if \param MO is defined by an
4521 // \param CombineOpc instruction in the basic block \param MBB
4522 static bool canCombine(MachineBasicBlock
&MBB
, MachineOperand
&MO
,
4523 unsigned CombineOpc
, unsigned ZeroReg
= 0,
4524 bool CheckZeroReg
= false) {
4525 MachineRegisterInfo
&MRI
= MBB
.getParent()->getRegInfo();
4526 MachineInstr
*MI
= nullptr;
4528 if (MO
.isReg() && Register::isVirtualRegister(MO
.getReg()))
4529 MI
= MRI
.getUniqueVRegDef(MO
.getReg());
4530 // And it needs to be in the trace (otherwise, it won't have a depth).
4531 if (!MI
|| MI
->getParent() != &MBB
|| (unsigned)MI
->getOpcode() != CombineOpc
)
4533 // Must only used by the user we combine with.
4534 if (!MRI
.hasOneNonDBGUse(MI
->getOperand(0).getReg()))
4538 assert(MI
->getNumOperands() >= 4 && MI
->getOperand(0).isReg() &&
4539 MI
->getOperand(1).isReg() && MI
->getOperand(2).isReg() &&
4540 MI
->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs");
4541 // The third input reg must be zero.
4542 if (MI
->getOperand(3).getReg() != ZeroReg
)
4550 // Is \param MO defined by an integer multiply and can be combined?
4551 static bool canCombineWithMUL(MachineBasicBlock
&MBB
, MachineOperand
&MO
,
4552 unsigned MulOpc
, unsigned ZeroReg
) {
4553 return canCombine(MBB
, MO
, MulOpc
, ZeroReg
, true);
4557 // Is \param MO defined by a floating-point multiply and can be combined?
4558 static bool canCombineWithFMUL(MachineBasicBlock
&MBB
, MachineOperand
&MO
,
4560 return canCombine(MBB
, MO
, MulOpc
);
4563 // TODO: There are many more machine instruction opcodes to match:
4564 // 1. Other data types (integer, vectors)
4565 // 2. Other math / logic operations (xor, or)
4566 // 3. Other forms of the same operation (intrinsics and other variants)
4567 bool AArch64InstrInfo::isAssociativeAndCommutative(
4568 const MachineInstr
&Inst
) const {
4569 switch (Inst
.getOpcode()) {
4570 case AArch64::FADDDrr
:
4571 case AArch64::FADDSrr
:
4572 case AArch64::FADDv2f32
:
4573 case AArch64::FADDv2f64
:
4574 case AArch64::FADDv4f32
:
4575 case AArch64::FMULDrr
:
4576 case AArch64::FMULSrr
:
4577 case AArch64::FMULX32
:
4578 case AArch64::FMULX64
:
4579 case AArch64::FMULXv2f32
:
4580 case AArch64::FMULXv2f64
:
4581 case AArch64::FMULXv4f32
:
4582 case AArch64::FMULv2f32
:
4583 case AArch64::FMULv2f64
:
4584 case AArch64::FMULv4f32
:
4585 return Inst
.getParent()->getParent()->getTarget().Options
.UnsafeFPMath
;
4591 /// Find instructions that can be turned into madd.
4592 static bool getMaddPatterns(MachineInstr
&Root
,
4593 SmallVectorImpl
<MachineCombinerPattern
> &Patterns
) {
4594 unsigned Opc
= Root
.getOpcode();
4595 MachineBasicBlock
&MBB
= *Root
.getParent();
4598 if (!isCombineInstrCandidate(Opc
))
4600 if (isCombineInstrSettingFlag(Opc
)) {
4601 int Cmp_NZCV
= Root
.findRegisterDefOperandIdx(AArch64::NZCV
, true);
4602 // When NZCV is live bail out.
4605 unsigned NewOpc
= convertToNonFlagSettingOpc(Root
);
4606 // When opcode can't change bail out.
4607 // CHECKME: do we miss any cases for opcode conversion?
4613 auto setFound
= [&](int Opcode
, int Operand
, unsigned ZeroReg
,
4614 MachineCombinerPattern Pattern
) {
4615 if (canCombineWithMUL(MBB
, Root
.getOperand(Operand
), Opcode
, ZeroReg
)) {
4616 Patterns
.push_back(Pattern
);
4621 auto setVFound
= [&](int Opcode
, int Operand
, MachineCombinerPattern Pattern
) {
4622 if (canCombine(MBB
, Root
.getOperand(Operand
), Opcode
)) {
4623 Patterns
.push_back(Pattern
);
4628 typedef MachineCombinerPattern MCP
;
4633 case AArch64::ADDWrr
:
4634 assert(Root
.getOperand(1).isReg() && Root
.getOperand(2).isReg() &&
4635 "ADDWrr does not have register operands");
4636 setFound(AArch64::MADDWrrr
, 1, AArch64::WZR
, MCP::MULADDW_OP1
);
4637 setFound(AArch64::MADDWrrr
, 2, AArch64::WZR
, MCP::MULADDW_OP2
);
4639 case AArch64::ADDXrr
:
4640 setFound(AArch64::MADDXrrr
, 1, AArch64::XZR
, MCP::MULADDX_OP1
);
4641 setFound(AArch64::MADDXrrr
, 2, AArch64::XZR
, MCP::MULADDX_OP2
);
4643 case AArch64::SUBWrr
:
4644 setFound(AArch64::MADDWrrr
, 1, AArch64::WZR
, MCP::MULSUBW_OP1
);
4645 setFound(AArch64::MADDWrrr
, 2, AArch64::WZR
, MCP::MULSUBW_OP2
);
4647 case AArch64::SUBXrr
:
4648 setFound(AArch64::MADDXrrr
, 1, AArch64::XZR
, MCP::MULSUBX_OP1
);
4649 setFound(AArch64::MADDXrrr
, 2, AArch64::XZR
, MCP::MULSUBX_OP2
);
4651 case AArch64::ADDWri
:
4652 setFound(AArch64::MADDWrrr
, 1, AArch64::WZR
, MCP::MULADDWI_OP1
);
4654 case AArch64::ADDXri
:
4655 setFound(AArch64::MADDXrrr
, 1, AArch64::XZR
, MCP::MULADDXI_OP1
);
4657 case AArch64::SUBWri
:
4658 setFound(AArch64::MADDWrrr
, 1, AArch64::WZR
, MCP::MULSUBWI_OP1
);
4660 case AArch64::SUBXri
:
4661 setFound(AArch64::MADDXrrr
, 1, AArch64::XZR
, MCP::MULSUBXI_OP1
);
4663 case AArch64::ADDv8i8
:
4664 setVFound(AArch64::MULv8i8
, 1, MCP::MULADDv8i8_OP1
);
4665 setVFound(AArch64::MULv8i8
, 2, MCP::MULADDv8i8_OP2
);
4667 case AArch64::ADDv16i8
:
4668 setVFound(AArch64::MULv16i8
, 1, MCP::MULADDv16i8_OP1
);
4669 setVFound(AArch64::MULv16i8
, 2, MCP::MULADDv16i8_OP2
);
4671 case AArch64::ADDv4i16
:
4672 setVFound(AArch64::MULv4i16
, 1, MCP::MULADDv4i16_OP1
);
4673 setVFound(AArch64::MULv4i16
, 2, MCP::MULADDv4i16_OP2
);
4674 setVFound(AArch64::MULv4i16_indexed
, 1, MCP::MULADDv4i16_indexed_OP1
);
4675 setVFound(AArch64::MULv4i16_indexed
, 2, MCP::MULADDv4i16_indexed_OP2
);
4677 case AArch64::ADDv8i16
:
4678 setVFound(AArch64::MULv8i16
, 1, MCP::MULADDv8i16_OP1
);
4679 setVFound(AArch64::MULv8i16
, 2, MCP::MULADDv8i16_OP2
);
4680 setVFound(AArch64::MULv8i16_indexed
, 1, MCP::MULADDv8i16_indexed_OP1
);
4681 setVFound(AArch64::MULv8i16_indexed
, 2, MCP::MULADDv8i16_indexed_OP2
);
4683 case AArch64::ADDv2i32
:
4684 setVFound(AArch64::MULv2i32
, 1, MCP::MULADDv2i32_OP1
);
4685 setVFound(AArch64::MULv2i32
, 2, MCP::MULADDv2i32_OP2
);
4686 setVFound(AArch64::MULv2i32_indexed
, 1, MCP::MULADDv2i32_indexed_OP1
);
4687 setVFound(AArch64::MULv2i32_indexed
, 2, MCP::MULADDv2i32_indexed_OP2
);
4689 case AArch64::ADDv4i32
:
4690 setVFound(AArch64::MULv4i32
, 1, MCP::MULADDv4i32_OP1
);
4691 setVFound(AArch64::MULv4i32
, 2, MCP::MULADDv4i32_OP2
);
4692 setVFound(AArch64::MULv4i32_indexed
, 1, MCP::MULADDv4i32_indexed_OP1
);
4693 setVFound(AArch64::MULv4i32_indexed
, 2, MCP::MULADDv4i32_indexed_OP2
);
4695 case AArch64::SUBv8i8
:
4696 setVFound(AArch64::MULv8i8
, 1, MCP::MULSUBv8i8_OP1
);
4697 setVFound(AArch64::MULv8i8
, 2, MCP::MULSUBv8i8_OP2
);
4699 case AArch64::SUBv16i8
:
4700 setVFound(AArch64::MULv16i8
, 1, MCP::MULSUBv16i8_OP1
);
4701 setVFound(AArch64::MULv16i8
, 2, MCP::MULSUBv16i8_OP2
);
4703 case AArch64::SUBv4i16
:
4704 setVFound(AArch64::MULv4i16
, 1, MCP::MULSUBv4i16_OP1
);
4705 setVFound(AArch64::MULv4i16
, 2, MCP::MULSUBv4i16_OP2
);
4706 setVFound(AArch64::MULv4i16_indexed
, 1, MCP::MULSUBv4i16_indexed_OP1
);
4707 setVFound(AArch64::MULv4i16_indexed
, 2, MCP::MULSUBv4i16_indexed_OP2
);
4709 case AArch64::SUBv8i16
:
4710 setVFound(AArch64::MULv8i16
, 1, MCP::MULSUBv8i16_OP1
);
4711 setVFound(AArch64::MULv8i16
, 2, MCP::MULSUBv8i16_OP2
);
4712 setVFound(AArch64::MULv8i16_indexed
, 1, MCP::MULSUBv8i16_indexed_OP1
);
4713 setVFound(AArch64::MULv8i16_indexed
, 2, MCP::MULSUBv8i16_indexed_OP2
);
4715 case AArch64::SUBv2i32
:
4716 setVFound(AArch64::MULv2i32
, 1, MCP::MULSUBv2i32_OP1
);
4717 setVFound(AArch64::MULv2i32
, 2, MCP::MULSUBv2i32_OP2
);
4718 setVFound(AArch64::MULv2i32_indexed
, 1, MCP::MULSUBv2i32_indexed_OP1
);
4719 setVFound(AArch64::MULv2i32_indexed
, 2, MCP::MULSUBv2i32_indexed_OP2
);
4721 case AArch64::SUBv4i32
:
4722 setVFound(AArch64::MULv4i32
, 1, MCP::MULSUBv4i32_OP1
);
4723 setVFound(AArch64::MULv4i32
, 2, MCP::MULSUBv4i32_OP2
);
4724 setVFound(AArch64::MULv4i32_indexed
, 1, MCP::MULSUBv4i32_indexed_OP1
);
4725 setVFound(AArch64::MULv4i32_indexed
, 2, MCP::MULSUBv4i32_indexed_OP2
);
4730 /// Floating-Point Support
4732 /// Find instructions that can be turned into madd.
4733 static bool getFMAPatterns(MachineInstr
&Root
,
4734 SmallVectorImpl
<MachineCombinerPattern
> &Patterns
) {
4736 if (!isCombineInstrCandidateFP(Root
))
4739 MachineBasicBlock
&MBB
= *Root
.getParent();
4742 auto Match
= [&](int Opcode
, int Operand
,
4743 MachineCombinerPattern Pattern
) -> bool {
4744 if (canCombineWithFMUL(MBB
, Root
.getOperand(Operand
), Opcode
)) {
4745 Patterns
.push_back(Pattern
);
4751 typedef MachineCombinerPattern MCP
;
4753 switch (Root
.getOpcode()) {
4755 assert(false && "Unsupported FP instruction in combiner\n");
4757 case AArch64::FADDHrr
:
4758 assert(Root
.getOperand(1).isReg() && Root
.getOperand(2).isReg() &&
4759 "FADDHrr does not have register operands");
4761 Found
= Match(AArch64::FMULHrr
, 1, MCP::FMULADDH_OP1
);
4762 Found
|= Match(AArch64::FMULHrr
, 2, MCP::FMULADDH_OP2
);
4764 case AArch64::FADDSrr
:
4765 assert(Root
.getOperand(1).isReg() && Root
.getOperand(2).isReg() &&
4766 "FADDSrr does not have register operands");
4768 Found
|= Match(AArch64::FMULSrr
, 1, MCP::FMULADDS_OP1
) ||
4769 Match(AArch64::FMULv1i32_indexed
, 1, MCP::FMLAv1i32_indexed_OP1
);
4771 Found
|= Match(AArch64::FMULSrr
, 2, MCP::FMULADDS_OP2
) ||
4772 Match(AArch64::FMULv1i32_indexed
, 2, MCP::FMLAv1i32_indexed_OP2
);
4774 case AArch64::FADDDrr
:
4775 Found
|= Match(AArch64::FMULDrr
, 1, MCP::FMULADDD_OP1
) ||
4776 Match(AArch64::FMULv1i64_indexed
, 1, MCP::FMLAv1i64_indexed_OP1
);
4778 Found
|= Match(AArch64::FMULDrr
, 2, MCP::FMULADDD_OP2
) ||
4779 Match(AArch64::FMULv1i64_indexed
, 2, MCP::FMLAv1i64_indexed_OP2
);
4781 case AArch64::FADDv4f16
:
4782 Found
|= Match(AArch64::FMULv4i16_indexed
, 1, MCP::FMLAv4i16_indexed_OP1
) ||
4783 Match(AArch64::FMULv4f16
, 1, MCP::FMLAv4f16_OP1
);
4785 Found
|= Match(AArch64::FMULv4i16_indexed
, 2, MCP::FMLAv4i16_indexed_OP2
) ||
4786 Match(AArch64::FMULv4f16
, 2, MCP::FMLAv4f16_OP2
);
4788 case AArch64::FADDv8f16
:
4789 Found
|= Match(AArch64::FMULv8i16_indexed
, 1, MCP::FMLAv8i16_indexed_OP1
) ||
4790 Match(AArch64::FMULv8f16
, 1, MCP::FMLAv8f16_OP1
);
4792 Found
|= Match(AArch64::FMULv8i16_indexed
, 2, MCP::FMLAv8i16_indexed_OP2
) ||
4793 Match(AArch64::FMULv8f16
, 2, MCP::FMLAv8f16_OP2
);
4795 case AArch64::FADDv2f32
:
4796 Found
|= Match(AArch64::FMULv2i32_indexed
, 1, MCP::FMLAv2i32_indexed_OP1
) ||
4797 Match(AArch64::FMULv2f32
, 1, MCP::FMLAv2f32_OP1
);
4799 Found
|= Match(AArch64::FMULv2i32_indexed
, 2, MCP::FMLAv2i32_indexed_OP2
) ||
4800 Match(AArch64::FMULv2f32
, 2, MCP::FMLAv2f32_OP2
);
4802 case AArch64::FADDv2f64
:
4803 Found
|= Match(AArch64::FMULv2i64_indexed
, 1, MCP::FMLAv2i64_indexed_OP1
) ||
4804 Match(AArch64::FMULv2f64
, 1, MCP::FMLAv2f64_OP1
);
4806 Found
|= Match(AArch64::FMULv2i64_indexed
, 2, MCP::FMLAv2i64_indexed_OP2
) ||
4807 Match(AArch64::FMULv2f64
, 2, MCP::FMLAv2f64_OP2
);
4809 case AArch64::FADDv4f32
:
4810 Found
|= Match(AArch64::FMULv4i32_indexed
, 1, MCP::FMLAv4i32_indexed_OP1
) ||
4811 Match(AArch64::FMULv4f32
, 1, MCP::FMLAv4f32_OP1
);
4813 Found
|= Match(AArch64::FMULv4i32_indexed
, 2, MCP::FMLAv4i32_indexed_OP2
) ||
4814 Match(AArch64::FMULv4f32
, 2, MCP::FMLAv4f32_OP2
);
4816 case AArch64::FSUBHrr
:
4817 Found
= Match(AArch64::FMULHrr
, 1, MCP::FMULSUBH_OP1
);
4818 Found
|= Match(AArch64::FMULHrr
, 2, MCP::FMULSUBH_OP2
);
4819 Found
|= Match(AArch64::FNMULHrr
, 1, MCP::FNMULSUBH_OP1
);
4821 case AArch64::FSUBSrr
:
4822 Found
= Match(AArch64::FMULSrr
, 1, MCP::FMULSUBS_OP1
);
4824 Found
|= Match(AArch64::FMULSrr
, 2, MCP::FMULSUBS_OP2
) ||
4825 Match(AArch64::FMULv1i32_indexed
, 2, MCP::FMLSv1i32_indexed_OP2
);
4827 Found
|= Match(AArch64::FNMULSrr
, 1, MCP::FNMULSUBS_OP1
);
4829 case AArch64::FSUBDrr
:
4830 Found
= Match(AArch64::FMULDrr
, 1, MCP::FMULSUBD_OP1
);
4832 Found
|= Match(AArch64::FMULDrr
, 2, MCP::FMULSUBD_OP2
) ||
4833 Match(AArch64::FMULv1i64_indexed
, 2, MCP::FMLSv1i64_indexed_OP2
);
4835 Found
|= Match(AArch64::FNMULDrr
, 1, MCP::FNMULSUBD_OP1
);
4837 case AArch64::FSUBv4f16
:
4838 Found
|= Match(AArch64::FMULv4i16_indexed
, 2, MCP::FMLSv4i16_indexed_OP2
) ||
4839 Match(AArch64::FMULv4f16
, 2, MCP::FMLSv4f16_OP2
);
4841 Found
|= Match(AArch64::FMULv4i16_indexed
, 1, MCP::FMLSv4i16_indexed_OP1
) ||
4842 Match(AArch64::FMULv4f16
, 1, MCP::FMLSv4f16_OP1
);
4844 case AArch64::FSUBv8f16
:
4845 Found
|= Match(AArch64::FMULv8i16_indexed
, 2, MCP::FMLSv8i16_indexed_OP2
) ||
4846 Match(AArch64::FMULv8f16
, 2, MCP::FMLSv8f16_OP2
);
4848 Found
|= Match(AArch64::FMULv8i16_indexed
, 1, MCP::FMLSv8i16_indexed_OP1
) ||
4849 Match(AArch64::FMULv8f16
, 1, MCP::FMLSv8f16_OP1
);
4851 case AArch64::FSUBv2f32
:
4852 Found
|= Match(AArch64::FMULv2i32_indexed
, 2, MCP::FMLSv2i32_indexed_OP2
) ||
4853 Match(AArch64::FMULv2f32
, 2, MCP::FMLSv2f32_OP2
);
4855 Found
|= Match(AArch64::FMULv2i32_indexed
, 1, MCP::FMLSv2i32_indexed_OP1
) ||
4856 Match(AArch64::FMULv2f32
, 1, MCP::FMLSv2f32_OP1
);
4858 case AArch64::FSUBv2f64
:
4859 Found
|= Match(AArch64::FMULv2i64_indexed
, 2, MCP::FMLSv2i64_indexed_OP2
) ||
4860 Match(AArch64::FMULv2f64
, 2, MCP::FMLSv2f64_OP2
);
4862 Found
|= Match(AArch64::FMULv2i64_indexed
, 1, MCP::FMLSv2i64_indexed_OP1
) ||
4863 Match(AArch64::FMULv2f64
, 1, MCP::FMLSv2f64_OP1
);
4865 case AArch64::FSUBv4f32
:
4866 Found
|= Match(AArch64::FMULv4i32_indexed
, 2, MCP::FMLSv4i32_indexed_OP2
) ||
4867 Match(AArch64::FMULv4f32
, 2, MCP::FMLSv4f32_OP2
);
4869 Found
|= Match(AArch64::FMULv4i32_indexed
, 1, MCP::FMLSv4i32_indexed_OP1
) ||
4870 Match(AArch64::FMULv4f32
, 1, MCP::FMLSv4f32_OP1
);
4876 /// Return true when a code sequence can improve throughput. It
4877 /// should be called only for instructions in loops.
4878 /// \param Pattern - combiner pattern
4879 bool AArch64InstrInfo::isThroughputPattern(
4880 MachineCombinerPattern Pattern
) const {
4884 case MachineCombinerPattern::FMULADDH_OP1
:
4885 case MachineCombinerPattern::FMULADDH_OP2
:
4886 case MachineCombinerPattern::FMULSUBH_OP1
:
4887 case MachineCombinerPattern::FMULSUBH_OP2
:
4888 case MachineCombinerPattern::FMULADDS_OP1
:
4889 case MachineCombinerPattern::FMULADDS_OP2
:
4890 case MachineCombinerPattern::FMULSUBS_OP1
:
4891 case MachineCombinerPattern::FMULSUBS_OP2
:
4892 case MachineCombinerPattern::FMULADDD_OP1
:
4893 case MachineCombinerPattern::FMULADDD_OP2
:
4894 case MachineCombinerPattern::FMULSUBD_OP1
:
4895 case MachineCombinerPattern::FMULSUBD_OP2
:
4896 case MachineCombinerPattern::FNMULSUBH_OP1
:
4897 case MachineCombinerPattern::FNMULSUBS_OP1
:
4898 case MachineCombinerPattern::FNMULSUBD_OP1
:
4899 case MachineCombinerPattern::FMLAv4i16_indexed_OP1
:
4900 case MachineCombinerPattern::FMLAv4i16_indexed_OP2
:
4901 case MachineCombinerPattern::FMLAv8i16_indexed_OP1
:
4902 case MachineCombinerPattern::FMLAv8i16_indexed_OP2
:
4903 case MachineCombinerPattern::FMLAv1i32_indexed_OP1
:
4904 case MachineCombinerPattern::FMLAv1i32_indexed_OP2
:
4905 case MachineCombinerPattern::FMLAv1i64_indexed_OP1
:
4906 case MachineCombinerPattern::FMLAv1i64_indexed_OP2
:
4907 case MachineCombinerPattern::FMLAv4f16_OP2
:
4908 case MachineCombinerPattern::FMLAv4f16_OP1
:
4909 case MachineCombinerPattern::FMLAv8f16_OP1
:
4910 case MachineCombinerPattern::FMLAv8f16_OP2
:
4911 case MachineCombinerPattern::FMLAv2f32_OP2
:
4912 case MachineCombinerPattern::FMLAv2f32_OP1
:
4913 case MachineCombinerPattern::FMLAv2f64_OP1
:
4914 case MachineCombinerPattern::FMLAv2f64_OP2
:
4915 case MachineCombinerPattern::FMLAv2i32_indexed_OP1
:
4916 case MachineCombinerPattern::FMLAv2i32_indexed_OP2
:
4917 case MachineCombinerPattern::FMLAv2i64_indexed_OP1
:
4918 case MachineCombinerPattern::FMLAv2i64_indexed_OP2
:
4919 case MachineCombinerPattern::FMLAv4f32_OP1
:
4920 case MachineCombinerPattern::FMLAv4f32_OP2
:
4921 case MachineCombinerPattern::FMLAv4i32_indexed_OP1
:
4922 case MachineCombinerPattern::FMLAv4i32_indexed_OP2
:
4923 case MachineCombinerPattern::FMLSv4i16_indexed_OP1
:
4924 case MachineCombinerPattern::FMLSv4i16_indexed_OP2
:
4925 case MachineCombinerPattern::FMLSv8i16_indexed_OP1
:
4926 case MachineCombinerPattern::FMLSv8i16_indexed_OP2
:
4927 case MachineCombinerPattern::FMLSv1i32_indexed_OP2
:
4928 case MachineCombinerPattern::FMLSv1i64_indexed_OP2
:
4929 case MachineCombinerPattern::FMLSv2i32_indexed_OP2
:
4930 case MachineCombinerPattern::FMLSv2i64_indexed_OP2
:
4931 case MachineCombinerPattern::FMLSv4f16_OP1
:
4932 case MachineCombinerPattern::FMLSv4f16_OP2
:
4933 case MachineCombinerPattern::FMLSv8f16_OP1
:
4934 case MachineCombinerPattern::FMLSv8f16_OP2
:
4935 case MachineCombinerPattern::FMLSv2f32_OP2
:
4936 case MachineCombinerPattern::FMLSv2f64_OP2
:
4937 case MachineCombinerPattern::FMLSv4i32_indexed_OP2
:
4938 case MachineCombinerPattern::FMLSv4f32_OP2
:
4939 case MachineCombinerPattern::MULADDv8i8_OP1
:
4940 case MachineCombinerPattern::MULADDv8i8_OP2
:
4941 case MachineCombinerPattern::MULADDv16i8_OP1
:
4942 case MachineCombinerPattern::MULADDv16i8_OP2
:
4943 case MachineCombinerPattern::MULADDv4i16_OP1
:
4944 case MachineCombinerPattern::MULADDv4i16_OP2
:
4945 case MachineCombinerPattern::MULADDv8i16_OP1
:
4946 case MachineCombinerPattern::MULADDv8i16_OP2
:
4947 case MachineCombinerPattern::MULADDv2i32_OP1
:
4948 case MachineCombinerPattern::MULADDv2i32_OP2
:
4949 case MachineCombinerPattern::MULADDv4i32_OP1
:
4950 case MachineCombinerPattern::MULADDv4i32_OP2
:
4951 case MachineCombinerPattern::MULSUBv8i8_OP1
:
4952 case MachineCombinerPattern::MULSUBv8i8_OP2
:
4953 case MachineCombinerPattern::MULSUBv16i8_OP1
:
4954 case MachineCombinerPattern::MULSUBv16i8_OP2
:
4955 case MachineCombinerPattern::MULSUBv4i16_OP1
:
4956 case MachineCombinerPattern::MULSUBv4i16_OP2
:
4957 case MachineCombinerPattern::MULSUBv8i16_OP1
:
4958 case MachineCombinerPattern::MULSUBv8i16_OP2
:
4959 case MachineCombinerPattern::MULSUBv2i32_OP1
:
4960 case MachineCombinerPattern::MULSUBv2i32_OP2
:
4961 case MachineCombinerPattern::MULSUBv4i32_OP1
:
4962 case MachineCombinerPattern::MULSUBv4i32_OP2
:
4963 case MachineCombinerPattern::MULADDv4i16_indexed_OP1
:
4964 case MachineCombinerPattern::MULADDv4i16_indexed_OP2
:
4965 case MachineCombinerPattern::MULADDv8i16_indexed_OP1
:
4966 case MachineCombinerPattern::MULADDv8i16_indexed_OP2
:
4967 case MachineCombinerPattern::MULADDv2i32_indexed_OP1
:
4968 case MachineCombinerPattern::MULADDv2i32_indexed_OP2
:
4969 case MachineCombinerPattern::MULADDv4i32_indexed_OP1
:
4970 case MachineCombinerPattern::MULADDv4i32_indexed_OP2
:
4971 case MachineCombinerPattern::MULSUBv4i16_indexed_OP1
:
4972 case MachineCombinerPattern::MULSUBv4i16_indexed_OP2
:
4973 case MachineCombinerPattern::MULSUBv8i16_indexed_OP1
:
4974 case MachineCombinerPattern::MULSUBv8i16_indexed_OP2
:
4975 case MachineCombinerPattern::MULSUBv2i32_indexed_OP1
:
4976 case MachineCombinerPattern::MULSUBv2i32_indexed_OP2
:
4977 case MachineCombinerPattern::MULSUBv4i32_indexed_OP1
:
4978 case MachineCombinerPattern::MULSUBv4i32_indexed_OP2
:
4980 } // end switch (Pattern)
4983 /// Return true when there is potentially a faster code sequence for an
4984 /// instruction chain ending in \p Root. All potential patterns are listed in
4985 /// the \p Pattern vector. Pattern should be sorted in priority order since the
4986 /// pattern evaluator stops checking as soon as it finds a faster sequence.
4988 bool AArch64InstrInfo::getMachineCombinerPatterns(
4989 MachineInstr
&Root
, SmallVectorImpl
<MachineCombinerPattern
> &Patterns
,
4990 bool DoRegPressureReduce
) const {
4992 if (getMaddPatterns(Root
, Patterns
))
4994 // Floating point patterns
4995 if (getFMAPatterns(Root
, Patterns
))
4998 return TargetInstrInfo::getMachineCombinerPatterns(Root
, Patterns
,
4999 DoRegPressureReduce
);
5002 enum class FMAInstKind
{ Default
, Indexed
, Accumulator
};
5003 /// genFusedMultiply - Generate fused multiply instructions.
5004 /// This function supports both integer and floating point instructions.
5005 /// A typical example:
5008 /// ==> F|MADD R,A,B,C
5009 /// \param MF Containing MachineFunction
5010 /// \param MRI Register information
5011 /// \param TII Target information
5012 /// \param Root is the F|ADD instruction
5013 /// \param [out] InsInstrs is a vector of machine instructions and will
5014 /// contain the generated madd instruction
5015 /// \param IdxMulOpd is index of operand in Root that is the result of
5016 /// the F|MUL. In the example above IdxMulOpd is 1.
5017 /// \param MaddOpc the opcode fo the f|madd instruction
5018 /// \param RC Register class of operands
5019 /// \param kind of fma instruction (addressing mode) to be generated
5020 /// \param ReplacedAddend is the result register from the instruction
5021 /// replacing the non-combined operand, if any.
5022 static MachineInstr
*
5023 genFusedMultiply(MachineFunction
&MF
, MachineRegisterInfo
&MRI
,
5024 const TargetInstrInfo
*TII
, MachineInstr
&Root
,
5025 SmallVectorImpl
<MachineInstr
*> &InsInstrs
, unsigned IdxMulOpd
,
5026 unsigned MaddOpc
, const TargetRegisterClass
*RC
,
5027 FMAInstKind kind
= FMAInstKind::Default
,
5028 const Register
*ReplacedAddend
= nullptr) {
5029 assert(IdxMulOpd
== 1 || IdxMulOpd
== 2);
5031 unsigned IdxOtherOpd
= IdxMulOpd
== 1 ? 2 : 1;
5032 MachineInstr
*MUL
= MRI
.getUniqueVRegDef(Root
.getOperand(IdxMulOpd
).getReg());
5033 Register ResultReg
= Root
.getOperand(0).getReg();
5034 Register SrcReg0
= MUL
->getOperand(1).getReg();
5035 bool Src0IsKill
= MUL
->getOperand(1).isKill();
5036 Register SrcReg1
= MUL
->getOperand(2).getReg();
5037 bool Src1IsKill
= MUL
->getOperand(2).isKill();
5041 if (ReplacedAddend
) {
5042 // If we just generated a new addend, we must be it's only use.
5043 SrcReg2
= *ReplacedAddend
;
5046 SrcReg2
= Root
.getOperand(IdxOtherOpd
).getReg();
5047 Src2IsKill
= Root
.getOperand(IdxOtherOpd
).isKill();
5050 if (Register::isVirtualRegister(ResultReg
))
5051 MRI
.constrainRegClass(ResultReg
, RC
);
5052 if (Register::isVirtualRegister(SrcReg0
))
5053 MRI
.constrainRegClass(SrcReg0
, RC
);
5054 if (Register::isVirtualRegister(SrcReg1
))
5055 MRI
.constrainRegClass(SrcReg1
, RC
);
5056 if (Register::isVirtualRegister(SrcReg2
))
5057 MRI
.constrainRegClass(SrcReg2
, RC
);
5059 MachineInstrBuilder MIB
;
5060 if (kind
== FMAInstKind::Default
)
5061 MIB
= BuildMI(MF
, Root
.getDebugLoc(), TII
->get(MaddOpc
), ResultReg
)
5062 .addReg(SrcReg0
, getKillRegState(Src0IsKill
))
5063 .addReg(SrcReg1
, getKillRegState(Src1IsKill
))
5064 .addReg(SrcReg2
, getKillRegState(Src2IsKill
));
5065 else if (kind
== FMAInstKind::Indexed
)
5066 MIB
= BuildMI(MF
, Root
.getDebugLoc(), TII
->get(MaddOpc
), ResultReg
)
5067 .addReg(SrcReg2
, getKillRegState(Src2IsKill
))
5068 .addReg(SrcReg0
, getKillRegState(Src0IsKill
))
5069 .addReg(SrcReg1
, getKillRegState(Src1IsKill
))
5070 .addImm(MUL
->getOperand(3).getImm());
5071 else if (kind
== FMAInstKind::Accumulator
)
5072 MIB
= BuildMI(MF
, Root
.getDebugLoc(), TII
->get(MaddOpc
), ResultReg
)
5073 .addReg(SrcReg2
, getKillRegState(Src2IsKill
))
5074 .addReg(SrcReg0
, getKillRegState(Src0IsKill
))
5075 .addReg(SrcReg1
, getKillRegState(Src1IsKill
));
5077 assert(false && "Invalid FMA instruction kind \n");
5078 // Insert the MADD (MADD, FMA, FMS, FMLA, FMSL)
5079 InsInstrs
.push_back(MIB
);
5083 /// genFusedMultiplyAcc - Helper to generate fused multiply accumulate
5086 /// \see genFusedMultiply
5087 static MachineInstr
*genFusedMultiplyAcc(
5088 MachineFunction
&MF
, MachineRegisterInfo
&MRI
, const TargetInstrInfo
*TII
,
5089 MachineInstr
&Root
, SmallVectorImpl
<MachineInstr
*> &InsInstrs
,
5090 unsigned IdxMulOpd
, unsigned MaddOpc
, const TargetRegisterClass
*RC
) {
5091 return genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, IdxMulOpd
, MaddOpc
, RC
,
5092 FMAInstKind::Accumulator
);
5095 /// genNeg - Helper to generate an intermediate negation of the second operand
5097 static Register
genNeg(MachineFunction
&MF
, MachineRegisterInfo
&MRI
,
5098 const TargetInstrInfo
*TII
, MachineInstr
&Root
,
5099 SmallVectorImpl
<MachineInstr
*> &InsInstrs
,
5100 DenseMap
<unsigned, unsigned> &InstrIdxForVirtReg
,
5101 unsigned MnegOpc
, const TargetRegisterClass
*RC
) {
5102 Register NewVR
= MRI
.createVirtualRegister(RC
);
5103 MachineInstrBuilder MIB
=
5104 BuildMI(MF
, Root
.getDebugLoc(), TII
->get(MnegOpc
), NewVR
)
5105 .add(Root
.getOperand(2));
5106 InsInstrs
.push_back(MIB
);
5108 assert(InstrIdxForVirtReg
.empty());
5109 InstrIdxForVirtReg
.insert(std::make_pair(NewVR
, 0));
5114 /// genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate
5115 /// instructions with an additional negation of the accumulator
5116 static MachineInstr
*genFusedMultiplyAccNeg(
5117 MachineFunction
&MF
, MachineRegisterInfo
&MRI
, const TargetInstrInfo
*TII
,
5118 MachineInstr
&Root
, SmallVectorImpl
<MachineInstr
*> &InsInstrs
,
5119 DenseMap
<unsigned, unsigned> &InstrIdxForVirtReg
, unsigned IdxMulOpd
,
5120 unsigned MaddOpc
, unsigned MnegOpc
, const TargetRegisterClass
*RC
) {
5121 assert(IdxMulOpd
== 1);
5124 genNeg(MF
, MRI
, TII
, Root
, InsInstrs
, InstrIdxForVirtReg
, MnegOpc
, RC
);
5125 return genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, IdxMulOpd
, MaddOpc
, RC
,
5126 FMAInstKind::Accumulator
, &NewVR
);
5129 /// genFusedMultiplyIdx - Helper to generate fused multiply accumulate
5132 /// \see genFusedMultiply
5133 static MachineInstr
*genFusedMultiplyIdx(
5134 MachineFunction
&MF
, MachineRegisterInfo
&MRI
, const TargetInstrInfo
*TII
,
5135 MachineInstr
&Root
, SmallVectorImpl
<MachineInstr
*> &InsInstrs
,
5136 unsigned IdxMulOpd
, unsigned MaddOpc
, const TargetRegisterClass
*RC
) {
5137 return genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, IdxMulOpd
, MaddOpc
, RC
,
5138 FMAInstKind::Indexed
);
5141 /// genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate
5142 /// instructions with an additional negation of the accumulator
5143 static MachineInstr
*genFusedMultiplyIdxNeg(
5144 MachineFunction
&MF
, MachineRegisterInfo
&MRI
, const TargetInstrInfo
*TII
,
5145 MachineInstr
&Root
, SmallVectorImpl
<MachineInstr
*> &InsInstrs
,
5146 DenseMap
<unsigned, unsigned> &InstrIdxForVirtReg
, unsigned IdxMulOpd
,
5147 unsigned MaddOpc
, unsigned MnegOpc
, const TargetRegisterClass
*RC
) {
5148 assert(IdxMulOpd
== 1);
5151 genNeg(MF
, MRI
, TII
, Root
, InsInstrs
, InstrIdxForVirtReg
, MnegOpc
, RC
);
5153 return genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, IdxMulOpd
, MaddOpc
, RC
,
5154 FMAInstKind::Indexed
, &NewVR
);
5157 /// genMaddR - Generate madd instruction and combine mul and add using
5158 /// an extra virtual register
5159 /// Example - an ADD intermediate needs to be stored in a register:
5162 /// ==> ORR V, ZR, Imm
5163 /// ==> MADD R,A,B,V
5164 /// \param MF Containing MachineFunction
5165 /// \param MRI Register information
5166 /// \param TII Target information
5167 /// \param Root is the ADD instruction
5168 /// \param [out] InsInstrs is a vector of machine instructions and will
5169 /// contain the generated madd instruction
5170 /// \param IdxMulOpd is index of operand in Root that is the result of
5171 /// the MUL. In the example above IdxMulOpd is 1.
5172 /// \param MaddOpc the opcode fo the madd instruction
5173 /// \param VR is a virtual register that holds the value of an ADD operand
5174 /// (V in the example above).
5175 /// \param RC Register class of operands
5176 static MachineInstr
*genMaddR(MachineFunction
&MF
, MachineRegisterInfo
&MRI
,
5177 const TargetInstrInfo
*TII
, MachineInstr
&Root
,
5178 SmallVectorImpl
<MachineInstr
*> &InsInstrs
,
5179 unsigned IdxMulOpd
, unsigned MaddOpc
, unsigned VR
,
5180 const TargetRegisterClass
*RC
) {
5181 assert(IdxMulOpd
== 1 || IdxMulOpd
== 2);
5183 MachineInstr
*MUL
= MRI
.getUniqueVRegDef(Root
.getOperand(IdxMulOpd
).getReg());
5184 Register ResultReg
= Root
.getOperand(0).getReg();
5185 Register SrcReg0
= MUL
->getOperand(1).getReg();
5186 bool Src0IsKill
= MUL
->getOperand(1).isKill();
5187 Register SrcReg1
= MUL
->getOperand(2).getReg();
5188 bool Src1IsKill
= MUL
->getOperand(2).isKill();
5190 if (Register::isVirtualRegister(ResultReg
))
5191 MRI
.constrainRegClass(ResultReg
, RC
);
5192 if (Register::isVirtualRegister(SrcReg0
))
5193 MRI
.constrainRegClass(SrcReg0
, RC
);
5194 if (Register::isVirtualRegister(SrcReg1
))
5195 MRI
.constrainRegClass(SrcReg1
, RC
);
5196 if (Register::isVirtualRegister(VR
))
5197 MRI
.constrainRegClass(VR
, RC
);
5199 MachineInstrBuilder MIB
=
5200 BuildMI(MF
, Root
.getDebugLoc(), TII
->get(MaddOpc
), ResultReg
)
5201 .addReg(SrcReg0
, getKillRegState(Src0IsKill
))
5202 .addReg(SrcReg1
, getKillRegState(Src1IsKill
))
5205 InsInstrs
.push_back(MIB
);
5209 /// When getMachineCombinerPatterns() finds potential patterns,
5210 /// this function generates the instructions that could replace the
5211 /// original code sequence
5212 void AArch64InstrInfo::genAlternativeCodeSequence(
5213 MachineInstr
&Root
, MachineCombinerPattern Pattern
,
5214 SmallVectorImpl
<MachineInstr
*> &InsInstrs
,
5215 SmallVectorImpl
<MachineInstr
*> &DelInstrs
,
5216 DenseMap
<unsigned, unsigned> &InstrIdxForVirtReg
) const {
5217 MachineBasicBlock
&MBB
= *Root
.getParent();
5218 MachineRegisterInfo
&MRI
= MBB
.getParent()->getRegInfo();
5219 MachineFunction
&MF
= *MBB
.getParent();
5220 const TargetInstrInfo
*TII
= MF
.getSubtarget().getInstrInfo();
5222 MachineInstr
*MUL
= nullptr;
5223 const TargetRegisterClass
*RC
;
5227 // Reassociate instructions.
5228 TargetInstrInfo::genAlternativeCodeSequence(Root
, Pattern
, InsInstrs
,
5229 DelInstrs
, InstrIdxForVirtReg
);
5231 case MachineCombinerPattern::MULADDW_OP1
:
5232 case MachineCombinerPattern::MULADDX_OP1
:
5236 // --- Create(MADD);
5237 if (Pattern
== MachineCombinerPattern::MULADDW_OP1
) {
5238 Opc
= AArch64::MADDWrrr
;
5239 RC
= &AArch64::GPR32RegClass
;
5241 Opc
= AArch64::MADDXrrr
;
5242 RC
= &AArch64::GPR64RegClass
;
5244 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
);
5246 case MachineCombinerPattern::MULADDW_OP2
:
5247 case MachineCombinerPattern::MULADDX_OP2
:
5251 // --- Create(MADD);
5252 if (Pattern
== MachineCombinerPattern::MULADDW_OP2
) {
5253 Opc
= AArch64::MADDWrrr
;
5254 RC
= &AArch64::GPR32RegClass
;
5256 Opc
= AArch64::MADDXrrr
;
5257 RC
= &AArch64::GPR64RegClass
;
5259 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
);
5261 case MachineCombinerPattern::MULADDWI_OP1
:
5262 case MachineCombinerPattern::MULADDXI_OP1
: {
5265 // ==> ORR V, ZR, Imm
5267 // --- Create(MADD);
5268 const TargetRegisterClass
*OrrRC
;
5269 unsigned BitSize
, OrrOpc
, ZeroReg
;
5270 if (Pattern
== MachineCombinerPattern::MULADDWI_OP1
) {
5271 OrrOpc
= AArch64::ORRWri
;
5272 OrrRC
= &AArch64::GPR32spRegClass
;
5274 ZeroReg
= AArch64::WZR
;
5275 Opc
= AArch64::MADDWrrr
;
5276 RC
= &AArch64::GPR32RegClass
;
5278 OrrOpc
= AArch64::ORRXri
;
5279 OrrRC
= &AArch64::GPR64spRegClass
;
5281 ZeroReg
= AArch64::XZR
;
5282 Opc
= AArch64::MADDXrrr
;
5283 RC
= &AArch64::GPR64RegClass
;
5285 Register NewVR
= MRI
.createVirtualRegister(OrrRC
);
5286 uint64_t Imm
= Root
.getOperand(2).getImm();
5288 if (Root
.getOperand(3).isImm()) {
5289 unsigned Val
= Root
.getOperand(3).getImm();
5292 uint64_t UImm
= SignExtend64(Imm
, BitSize
);
5294 if (AArch64_AM::processLogicalImmediate(UImm
, BitSize
, Encoding
)) {
5295 MachineInstrBuilder MIB1
=
5296 BuildMI(MF
, Root
.getDebugLoc(), TII
->get(OrrOpc
), NewVR
)
5299 InsInstrs
.push_back(MIB1
);
5300 InstrIdxForVirtReg
.insert(std::make_pair(NewVR
, 0));
5301 MUL
= genMaddR(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, NewVR
, RC
);
5305 case MachineCombinerPattern::MULSUBW_OP1
:
5306 case MachineCombinerPattern::MULSUBX_OP1
: {
5310 // ==> MADD R,A,B,V // = -C + A*B
5311 // --- Create(MADD);
5312 const TargetRegisterClass
*SubRC
;
5313 unsigned SubOpc
, ZeroReg
;
5314 if (Pattern
== MachineCombinerPattern::MULSUBW_OP1
) {
5315 SubOpc
= AArch64::SUBWrr
;
5316 SubRC
= &AArch64::GPR32spRegClass
;
5317 ZeroReg
= AArch64::WZR
;
5318 Opc
= AArch64::MADDWrrr
;
5319 RC
= &AArch64::GPR32RegClass
;
5321 SubOpc
= AArch64::SUBXrr
;
5322 SubRC
= &AArch64::GPR64spRegClass
;
5323 ZeroReg
= AArch64::XZR
;
5324 Opc
= AArch64::MADDXrrr
;
5325 RC
= &AArch64::GPR64RegClass
;
5327 Register NewVR
= MRI
.createVirtualRegister(SubRC
);
5329 MachineInstrBuilder MIB1
=
5330 BuildMI(MF
, Root
.getDebugLoc(), TII
->get(SubOpc
), NewVR
)
5332 .add(Root
.getOperand(2));
5333 InsInstrs
.push_back(MIB1
);
5334 InstrIdxForVirtReg
.insert(std::make_pair(NewVR
, 0));
5335 MUL
= genMaddR(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, NewVR
, RC
);
5338 case MachineCombinerPattern::MULSUBW_OP2
:
5339 case MachineCombinerPattern::MULSUBX_OP2
:
5342 // ==> MSUB R,A,B,C (computes C - A*B)
5343 // --- Create(MSUB);
5344 if (Pattern
== MachineCombinerPattern::MULSUBW_OP2
) {
5345 Opc
= AArch64::MSUBWrrr
;
5346 RC
= &AArch64::GPR32RegClass
;
5348 Opc
= AArch64::MSUBXrrr
;
5349 RC
= &AArch64::GPR64RegClass
;
5351 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
);
5353 case MachineCombinerPattern::MULSUBWI_OP1
:
5354 case MachineCombinerPattern::MULSUBXI_OP1
: {
5357 // ==> ORR V, ZR, -Imm
5358 // ==> MADD R,A,B,V // = -Imm + A*B
5359 // --- Create(MADD);
5360 const TargetRegisterClass
*OrrRC
;
5361 unsigned BitSize
, OrrOpc
, ZeroReg
;
5362 if (Pattern
== MachineCombinerPattern::MULSUBWI_OP1
) {
5363 OrrOpc
= AArch64::ORRWri
;
5364 OrrRC
= &AArch64::GPR32spRegClass
;
5366 ZeroReg
= AArch64::WZR
;
5367 Opc
= AArch64::MADDWrrr
;
5368 RC
= &AArch64::GPR32RegClass
;
5370 OrrOpc
= AArch64::ORRXri
;
5371 OrrRC
= &AArch64::GPR64spRegClass
;
5373 ZeroReg
= AArch64::XZR
;
5374 Opc
= AArch64::MADDXrrr
;
5375 RC
= &AArch64::GPR64RegClass
;
5377 Register NewVR
= MRI
.createVirtualRegister(OrrRC
);
5378 uint64_t Imm
= Root
.getOperand(2).getImm();
5379 if (Root
.getOperand(3).isImm()) {
5380 unsigned Val
= Root
.getOperand(3).getImm();
5383 uint64_t UImm
= SignExtend64(-Imm
, BitSize
);
5385 if (AArch64_AM::processLogicalImmediate(UImm
, BitSize
, Encoding
)) {
5386 MachineInstrBuilder MIB1
=
5387 BuildMI(MF
, Root
.getDebugLoc(), TII
->get(OrrOpc
), NewVR
)
5390 InsInstrs
.push_back(MIB1
);
5391 InstrIdxForVirtReg
.insert(std::make_pair(NewVR
, 0));
5392 MUL
= genMaddR(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, NewVR
, RC
);
5397 case MachineCombinerPattern::MULADDv8i8_OP1
:
5398 Opc
= AArch64::MLAv8i8
;
5399 RC
= &AArch64::FPR64RegClass
;
5400 MUL
= genFusedMultiplyAcc(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
);
5402 case MachineCombinerPattern::MULADDv8i8_OP2
:
5403 Opc
= AArch64::MLAv8i8
;
5404 RC
= &AArch64::FPR64RegClass
;
5405 MUL
= genFusedMultiplyAcc(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
);
5407 case MachineCombinerPattern::MULADDv16i8_OP1
:
5408 Opc
= AArch64::MLAv16i8
;
5409 RC
= &AArch64::FPR128RegClass
;
5410 MUL
= genFusedMultiplyAcc(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
);
5412 case MachineCombinerPattern::MULADDv16i8_OP2
:
5413 Opc
= AArch64::MLAv16i8
;
5414 RC
= &AArch64::FPR128RegClass
;
5415 MUL
= genFusedMultiplyAcc(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
);
5417 case MachineCombinerPattern::MULADDv4i16_OP1
:
5418 Opc
= AArch64::MLAv4i16
;
5419 RC
= &AArch64::FPR64RegClass
;
5420 MUL
= genFusedMultiplyAcc(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
);
5422 case MachineCombinerPattern::MULADDv4i16_OP2
:
5423 Opc
= AArch64::MLAv4i16
;
5424 RC
= &AArch64::FPR64RegClass
;
5425 MUL
= genFusedMultiplyAcc(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
);
5427 case MachineCombinerPattern::MULADDv8i16_OP1
:
5428 Opc
= AArch64::MLAv8i16
;
5429 RC
= &AArch64::FPR128RegClass
;
5430 MUL
= genFusedMultiplyAcc(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
);
5432 case MachineCombinerPattern::MULADDv8i16_OP2
:
5433 Opc
= AArch64::MLAv8i16
;
5434 RC
= &AArch64::FPR128RegClass
;
5435 MUL
= genFusedMultiplyAcc(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
);
5437 case MachineCombinerPattern::MULADDv2i32_OP1
:
5438 Opc
= AArch64::MLAv2i32
;
5439 RC
= &AArch64::FPR64RegClass
;
5440 MUL
= genFusedMultiplyAcc(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
);
5442 case MachineCombinerPattern::MULADDv2i32_OP2
:
5443 Opc
= AArch64::MLAv2i32
;
5444 RC
= &AArch64::FPR64RegClass
;
5445 MUL
= genFusedMultiplyAcc(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
);
5447 case MachineCombinerPattern::MULADDv4i32_OP1
:
5448 Opc
= AArch64::MLAv4i32
;
5449 RC
= &AArch64::FPR128RegClass
;
5450 MUL
= genFusedMultiplyAcc(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
);
5452 case MachineCombinerPattern::MULADDv4i32_OP2
:
5453 Opc
= AArch64::MLAv4i32
;
5454 RC
= &AArch64::FPR128RegClass
;
5455 MUL
= genFusedMultiplyAcc(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
);
5458 case MachineCombinerPattern::MULSUBv8i8_OP1
:
5459 Opc
= AArch64::MLAv8i8
;
5460 RC
= &AArch64::FPR64RegClass
;
5461 MUL
= genFusedMultiplyAccNeg(MF
, MRI
, TII
, Root
, InsInstrs
,
5462 InstrIdxForVirtReg
, 1, Opc
, AArch64::NEGv8i8
,
5465 case MachineCombinerPattern::MULSUBv8i8_OP2
:
5466 Opc
= AArch64::MLSv8i8
;
5467 RC
= &AArch64::FPR64RegClass
;
5468 MUL
= genFusedMultiplyAcc(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
);
5470 case MachineCombinerPattern::MULSUBv16i8_OP1
:
5471 Opc
= AArch64::MLAv16i8
;
5472 RC
= &AArch64::FPR128RegClass
;
5473 MUL
= genFusedMultiplyAccNeg(MF
, MRI
, TII
, Root
, InsInstrs
,
5474 InstrIdxForVirtReg
, 1, Opc
, AArch64::NEGv16i8
,
5477 case MachineCombinerPattern::MULSUBv16i8_OP2
:
5478 Opc
= AArch64::MLSv16i8
;
5479 RC
= &AArch64::FPR128RegClass
;
5480 MUL
= genFusedMultiplyAcc(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
);
5482 case MachineCombinerPattern::MULSUBv4i16_OP1
:
5483 Opc
= AArch64::MLAv4i16
;
5484 RC
= &AArch64::FPR64RegClass
;
5485 MUL
= genFusedMultiplyAccNeg(MF
, MRI
, TII
, Root
, InsInstrs
,
5486 InstrIdxForVirtReg
, 1, Opc
, AArch64::NEGv4i16
,
5489 case MachineCombinerPattern::MULSUBv4i16_OP2
:
5490 Opc
= AArch64::MLSv4i16
;
5491 RC
= &AArch64::FPR64RegClass
;
5492 MUL
= genFusedMultiplyAcc(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
);
5494 case MachineCombinerPattern::MULSUBv8i16_OP1
:
5495 Opc
= AArch64::MLAv8i16
;
5496 RC
= &AArch64::FPR128RegClass
;
5497 MUL
= genFusedMultiplyAccNeg(MF
, MRI
, TII
, Root
, InsInstrs
,
5498 InstrIdxForVirtReg
, 1, Opc
, AArch64::NEGv8i16
,
5501 case MachineCombinerPattern::MULSUBv8i16_OP2
:
5502 Opc
= AArch64::MLSv8i16
;
5503 RC
= &AArch64::FPR128RegClass
;
5504 MUL
= genFusedMultiplyAcc(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
);
5506 case MachineCombinerPattern::MULSUBv2i32_OP1
:
5507 Opc
= AArch64::MLAv2i32
;
5508 RC
= &AArch64::FPR64RegClass
;
5509 MUL
= genFusedMultiplyAccNeg(MF
, MRI
, TII
, Root
, InsInstrs
,
5510 InstrIdxForVirtReg
, 1, Opc
, AArch64::NEGv2i32
,
5513 case MachineCombinerPattern::MULSUBv2i32_OP2
:
5514 Opc
= AArch64::MLSv2i32
;
5515 RC
= &AArch64::FPR64RegClass
;
5516 MUL
= genFusedMultiplyAcc(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
);
5518 case MachineCombinerPattern::MULSUBv4i32_OP1
:
5519 Opc
= AArch64::MLAv4i32
;
5520 RC
= &AArch64::FPR128RegClass
;
5521 MUL
= genFusedMultiplyAccNeg(MF
, MRI
, TII
, Root
, InsInstrs
,
5522 InstrIdxForVirtReg
, 1, Opc
, AArch64::NEGv4i32
,
5525 case MachineCombinerPattern::MULSUBv4i32_OP2
:
5526 Opc
= AArch64::MLSv4i32
;
5527 RC
= &AArch64::FPR128RegClass
;
5528 MUL
= genFusedMultiplyAcc(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
);
5531 case MachineCombinerPattern::MULADDv4i16_indexed_OP1
:
5532 Opc
= AArch64::MLAv4i16_indexed
;
5533 RC
= &AArch64::FPR64RegClass
;
5534 MUL
= genFusedMultiplyIdx(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
);
5536 case MachineCombinerPattern::MULADDv4i16_indexed_OP2
:
5537 Opc
= AArch64::MLAv4i16_indexed
;
5538 RC
= &AArch64::FPR64RegClass
;
5539 MUL
= genFusedMultiplyIdx(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
);
5541 case MachineCombinerPattern::MULADDv8i16_indexed_OP1
:
5542 Opc
= AArch64::MLAv8i16_indexed
;
5543 RC
= &AArch64::FPR128RegClass
;
5544 MUL
= genFusedMultiplyIdx(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
);
5546 case MachineCombinerPattern::MULADDv8i16_indexed_OP2
:
5547 Opc
= AArch64::MLAv8i16_indexed
;
5548 RC
= &AArch64::FPR128RegClass
;
5549 MUL
= genFusedMultiplyIdx(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
);
5551 case MachineCombinerPattern::MULADDv2i32_indexed_OP1
:
5552 Opc
= AArch64::MLAv2i32_indexed
;
5553 RC
= &AArch64::FPR64RegClass
;
5554 MUL
= genFusedMultiplyIdx(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
);
5556 case MachineCombinerPattern::MULADDv2i32_indexed_OP2
:
5557 Opc
= AArch64::MLAv2i32_indexed
;
5558 RC
= &AArch64::FPR64RegClass
;
5559 MUL
= genFusedMultiplyIdx(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
);
5561 case MachineCombinerPattern::MULADDv4i32_indexed_OP1
:
5562 Opc
= AArch64::MLAv4i32_indexed
;
5563 RC
= &AArch64::FPR128RegClass
;
5564 MUL
= genFusedMultiplyIdx(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
);
5566 case MachineCombinerPattern::MULADDv4i32_indexed_OP2
:
5567 Opc
= AArch64::MLAv4i32_indexed
;
5568 RC
= &AArch64::FPR128RegClass
;
5569 MUL
= genFusedMultiplyIdx(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
);
5572 case MachineCombinerPattern::MULSUBv4i16_indexed_OP1
:
5573 Opc
= AArch64::MLAv4i16_indexed
;
5574 RC
= &AArch64::FPR64RegClass
;
5575 MUL
= genFusedMultiplyIdxNeg(MF
, MRI
, TII
, Root
, InsInstrs
,
5576 InstrIdxForVirtReg
, 1, Opc
, AArch64::NEGv4i16
,
5579 case MachineCombinerPattern::MULSUBv4i16_indexed_OP2
:
5580 Opc
= AArch64::MLSv4i16_indexed
;
5581 RC
= &AArch64::FPR64RegClass
;
5582 MUL
= genFusedMultiplyIdx(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
);
5584 case MachineCombinerPattern::MULSUBv8i16_indexed_OP1
:
5585 Opc
= AArch64::MLAv8i16_indexed
;
5586 RC
= &AArch64::FPR128RegClass
;
5587 MUL
= genFusedMultiplyIdxNeg(MF
, MRI
, TII
, Root
, InsInstrs
,
5588 InstrIdxForVirtReg
, 1, Opc
, AArch64::NEGv8i16
,
5591 case MachineCombinerPattern::MULSUBv8i16_indexed_OP2
:
5592 Opc
= AArch64::MLSv8i16_indexed
;
5593 RC
= &AArch64::FPR128RegClass
;
5594 MUL
= genFusedMultiplyIdx(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
);
5596 case MachineCombinerPattern::MULSUBv2i32_indexed_OP1
:
5597 Opc
= AArch64::MLAv2i32_indexed
;
5598 RC
= &AArch64::FPR64RegClass
;
5599 MUL
= genFusedMultiplyIdxNeg(MF
, MRI
, TII
, Root
, InsInstrs
,
5600 InstrIdxForVirtReg
, 1, Opc
, AArch64::NEGv2i32
,
5603 case MachineCombinerPattern::MULSUBv2i32_indexed_OP2
:
5604 Opc
= AArch64::MLSv2i32_indexed
;
5605 RC
= &AArch64::FPR64RegClass
;
5606 MUL
= genFusedMultiplyIdx(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
);
5608 case MachineCombinerPattern::MULSUBv4i32_indexed_OP1
:
5609 Opc
= AArch64::MLAv4i32_indexed
;
5610 RC
= &AArch64::FPR128RegClass
;
5611 MUL
= genFusedMultiplyIdxNeg(MF
, MRI
, TII
, Root
, InsInstrs
,
5612 InstrIdxForVirtReg
, 1, Opc
, AArch64::NEGv4i32
,
5615 case MachineCombinerPattern::MULSUBv4i32_indexed_OP2
:
5616 Opc
= AArch64::MLSv4i32_indexed
;
5617 RC
= &AArch64::FPR128RegClass
;
5618 MUL
= genFusedMultiplyIdx(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
);
5621 // Floating Point Support
5622 case MachineCombinerPattern::FMULADDH_OP1
:
5623 Opc
= AArch64::FMADDHrrr
;
5624 RC
= &AArch64::FPR16RegClass
;
5625 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
);
5627 case MachineCombinerPattern::FMULADDS_OP1
:
5628 Opc
= AArch64::FMADDSrrr
;
5629 RC
= &AArch64::FPR32RegClass
;
5630 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
);
5632 case MachineCombinerPattern::FMULADDD_OP1
:
5633 Opc
= AArch64::FMADDDrrr
;
5634 RC
= &AArch64::FPR64RegClass
;
5635 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
);
5638 case MachineCombinerPattern::FMULADDH_OP2
:
5639 Opc
= AArch64::FMADDHrrr
;
5640 RC
= &AArch64::FPR16RegClass
;
5641 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
);
5643 case MachineCombinerPattern::FMULADDS_OP2
:
5644 Opc
= AArch64::FMADDSrrr
;
5645 RC
= &AArch64::FPR32RegClass
;
5646 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
);
5648 case MachineCombinerPattern::FMULADDD_OP2
:
5649 Opc
= AArch64::FMADDDrrr
;
5650 RC
= &AArch64::FPR64RegClass
;
5651 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
);
5654 case MachineCombinerPattern::FMLAv1i32_indexed_OP1
:
5655 Opc
= AArch64::FMLAv1i32_indexed
;
5656 RC
= &AArch64::FPR32RegClass
;
5657 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
,
5658 FMAInstKind::Indexed
);
5660 case MachineCombinerPattern::FMLAv1i32_indexed_OP2
:
5661 Opc
= AArch64::FMLAv1i32_indexed
;
5662 RC
= &AArch64::FPR32RegClass
;
5663 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
,
5664 FMAInstKind::Indexed
);
5667 case MachineCombinerPattern::FMLAv1i64_indexed_OP1
:
5668 Opc
= AArch64::FMLAv1i64_indexed
;
5669 RC
= &AArch64::FPR64RegClass
;
5670 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
,
5671 FMAInstKind::Indexed
);
5673 case MachineCombinerPattern::FMLAv1i64_indexed_OP2
:
5674 Opc
= AArch64::FMLAv1i64_indexed
;
5675 RC
= &AArch64::FPR64RegClass
;
5676 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
,
5677 FMAInstKind::Indexed
);
5680 case MachineCombinerPattern::FMLAv4i16_indexed_OP1
:
5681 RC
= &AArch64::FPR64RegClass
;
5682 Opc
= AArch64::FMLAv4i16_indexed
;
5683 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
,
5684 FMAInstKind::Indexed
);
5686 case MachineCombinerPattern::FMLAv4f16_OP1
:
5687 RC
= &AArch64::FPR64RegClass
;
5688 Opc
= AArch64::FMLAv4f16
;
5689 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
,
5690 FMAInstKind::Accumulator
);
5692 case MachineCombinerPattern::FMLAv4i16_indexed_OP2
:
5693 RC
= &AArch64::FPR64RegClass
;
5694 Opc
= AArch64::FMLAv4i16_indexed
;
5695 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
,
5696 FMAInstKind::Indexed
);
5698 case MachineCombinerPattern::FMLAv4f16_OP2
:
5699 RC
= &AArch64::FPR64RegClass
;
5700 Opc
= AArch64::FMLAv4f16
;
5701 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
,
5702 FMAInstKind::Accumulator
);
5705 case MachineCombinerPattern::FMLAv2i32_indexed_OP1
:
5706 case MachineCombinerPattern::FMLAv2f32_OP1
:
5707 RC
= &AArch64::FPR64RegClass
;
5708 if (Pattern
== MachineCombinerPattern::FMLAv2i32_indexed_OP1
) {
5709 Opc
= AArch64::FMLAv2i32_indexed
;
5710 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
,
5711 FMAInstKind::Indexed
);
5713 Opc
= AArch64::FMLAv2f32
;
5714 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
,
5715 FMAInstKind::Accumulator
);
5718 case MachineCombinerPattern::FMLAv2i32_indexed_OP2
:
5719 case MachineCombinerPattern::FMLAv2f32_OP2
:
5720 RC
= &AArch64::FPR64RegClass
;
5721 if (Pattern
== MachineCombinerPattern::FMLAv2i32_indexed_OP2
) {
5722 Opc
= AArch64::FMLAv2i32_indexed
;
5723 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
,
5724 FMAInstKind::Indexed
);
5726 Opc
= AArch64::FMLAv2f32
;
5727 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
,
5728 FMAInstKind::Accumulator
);
5732 case MachineCombinerPattern::FMLAv8i16_indexed_OP1
:
5733 RC
= &AArch64::FPR128RegClass
;
5734 Opc
= AArch64::FMLAv8i16_indexed
;
5735 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
,
5736 FMAInstKind::Indexed
);
5738 case MachineCombinerPattern::FMLAv8f16_OP1
:
5739 RC
= &AArch64::FPR128RegClass
;
5740 Opc
= AArch64::FMLAv8f16
;
5741 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
,
5742 FMAInstKind::Accumulator
);
5744 case MachineCombinerPattern::FMLAv8i16_indexed_OP2
:
5745 RC
= &AArch64::FPR128RegClass
;
5746 Opc
= AArch64::FMLAv8i16_indexed
;
5747 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
,
5748 FMAInstKind::Indexed
);
5750 case MachineCombinerPattern::FMLAv8f16_OP2
:
5751 RC
= &AArch64::FPR128RegClass
;
5752 Opc
= AArch64::FMLAv8f16
;
5753 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
,
5754 FMAInstKind::Accumulator
);
5757 case MachineCombinerPattern::FMLAv2i64_indexed_OP1
:
5758 case MachineCombinerPattern::FMLAv2f64_OP1
:
5759 RC
= &AArch64::FPR128RegClass
;
5760 if (Pattern
== MachineCombinerPattern::FMLAv2i64_indexed_OP1
) {
5761 Opc
= AArch64::FMLAv2i64_indexed
;
5762 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
,
5763 FMAInstKind::Indexed
);
5765 Opc
= AArch64::FMLAv2f64
;
5766 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
,
5767 FMAInstKind::Accumulator
);
5770 case MachineCombinerPattern::FMLAv2i64_indexed_OP2
:
5771 case MachineCombinerPattern::FMLAv2f64_OP2
:
5772 RC
= &AArch64::FPR128RegClass
;
5773 if (Pattern
== MachineCombinerPattern::FMLAv2i64_indexed_OP2
) {
5774 Opc
= AArch64::FMLAv2i64_indexed
;
5775 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
,
5776 FMAInstKind::Indexed
);
5778 Opc
= AArch64::FMLAv2f64
;
5779 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
,
5780 FMAInstKind::Accumulator
);
5784 case MachineCombinerPattern::FMLAv4i32_indexed_OP1
:
5785 case MachineCombinerPattern::FMLAv4f32_OP1
:
5786 RC
= &AArch64::FPR128RegClass
;
5787 if (Pattern
== MachineCombinerPattern::FMLAv4i32_indexed_OP1
) {
5788 Opc
= AArch64::FMLAv4i32_indexed
;
5789 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
,
5790 FMAInstKind::Indexed
);
5792 Opc
= AArch64::FMLAv4f32
;
5793 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
,
5794 FMAInstKind::Accumulator
);
5798 case MachineCombinerPattern::FMLAv4i32_indexed_OP2
:
5799 case MachineCombinerPattern::FMLAv4f32_OP2
:
5800 RC
= &AArch64::FPR128RegClass
;
5801 if (Pattern
== MachineCombinerPattern::FMLAv4i32_indexed_OP2
) {
5802 Opc
= AArch64::FMLAv4i32_indexed
;
5803 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
,
5804 FMAInstKind::Indexed
);
5806 Opc
= AArch64::FMLAv4f32
;
5807 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
,
5808 FMAInstKind::Accumulator
);
5812 case MachineCombinerPattern::FMULSUBH_OP1
:
5813 Opc
= AArch64::FNMSUBHrrr
;
5814 RC
= &AArch64::FPR16RegClass
;
5815 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
);
5817 case MachineCombinerPattern::FMULSUBS_OP1
:
5818 Opc
= AArch64::FNMSUBSrrr
;
5819 RC
= &AArch64::FPR32RegClass
;
5820 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
);
5822 case MachineCombinerPattern::FMULSUBD_OP1
:
5823 Opc
= AArch64::FNMSUBDrrr
;
5824 RC
= &AArch64::FPR64RegClass
;
5825 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
);
5828 case MachineCombinerPattern::FNMULSUBH_OP1
:
5829 Opc
= AArch64::FNMADDHrrr
;
5830 RC
= &AArch64::FPR16RegClass
;
5831 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
);
5833 case MachineCombinerPattern::FNMULSUBS_OP1
:
5834 Opc
= AArch64::FNMADDSrrr
;
5835 RC
= &AArch64::FPR32RegClass
;
5836 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
);
5838 case MachineCombinerPattern::FNMULSUBD_OP1
:
5839 Opc
= AArch64::FNMADDDrrr
;
5840 RC
= &AArch64::FPR64RegClass
;
5841 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
);
5844 case MachineCombinerPattern::FMULSUBH_OP2
:
5845 Opc
= AArch64::FMSUBHrrr
;
5846 RC
= &AArch64::FPR16RegClass
;
5847 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
);
5849 case MachineCombinerPattern::FMULSUBS_OP2
:
5850 Opc
= AArch64::FMSUBSrrr
;
5851 RC
= &AArch64::FPR32RegClass
;
5852 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
);
5854 case MachineCombinerPattern::FMULSUBD_OP2
:
5855 Opc
= AArch64::FMSUBDrrr
;
5856 RC
= &AArch64::FPR64RegClass
;
5857 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
);
5860 case MachineCombinerPattern::FMLSv1i32_indexed_OP2
:
5861 Opc
= AArch64::FMLSv1i32_indexed
;
5862 RC
= &AArch64::FPR32RegClass
;
5863 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
,
5864 FMAInstKind::Indexed
);
5867 case MachineCombinerPattern::FMLSv1i64_indexed_OP2
:
5868 Opc
= AArch64::FMLSv1i64_indexed
;
5869 RC
= &AArch64::FPR64RegClass
;
5870 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
,
5871 FMAInstKind::Indexed
);
5874 case MachineCombinerPattern::FMLSv4f16_OP1
:
5875 case MachineCombinerPattern::FMLSv4i16_indexed_OP1
: {
5876 RC
= &AArch64::FPR64RegClass
;
5877 Register NewVR
= MRI
.createVirtualRegister(RC
);
5878 MachineInstrBuilder MIB1
=
5879 BuildMI(MF
, Root
.getDebugLoc(), TII
->get(AArch64::FNEGv4f16
), NewVR
)
5880 .add(Root
.getOperand(2));
5881 InsInstrs
.push_back(MIB1
);
5882 InstrIdxForVirtReg
.insert(std::make_pair(NewVR
, 0));
5883 if (Pattern
== MachineCombinerPattern::FMLSv4f16_OP1
) {
5884 Opc
= AArch64::FMLAv4f16
;
5885 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
,
5886 FMAInstKind::Accumulator
, &NewVR
);
5888 Opc
= AArch64::FMLAv4i16_indexed
;
5889 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
,
5890 FMAInstKind::Indexed
, &NewVR
);
5894 case MachineCombinerPattern::FMLSv4f16_OP2
:
5895 RC
= &AArch64::FPR64RegClass
;
5896 Opc
= AArch64::FMLSv4f16
;
5897 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
,
5898 FMAInstKind::Accumulator
);
5900 case MachineCombinerPattern::FMLSv4i16_indexed_OP2
:
5901 RC
= &AArch64::FPR64RegClass
;
5902 Opc
= AArch64::FMLSv4i16_indexed
;
5903 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
,
5904 FMAInstKind::Indexed
);
5907 case MachineCombinerPattern::FMLSv2f32_OP2
:
5908 case MachineCombinerPattern::FMLSv2i32_indexed_OP2
:
5909 RC
= &AArch64::FPR64RegClass
;
5910 if (Pattern
== MachineCombinerPattern::FMLSv2i32_indexed_OP2
) {
5911 Opc
= AArch64::FMLSv2i32_indexed
;
5912 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
,
5913 FMAInstKind::Indexed
);
5915 Opc
= AArch64::FMLSv2f32
;
5916 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
,
5917 FMAInstKind::Accumulator
);
5921 case MachineCombinerPattern::FMLSv8f16_OP1
:
5922 case MachineCombinerPattern::FMLSv8i16_indexed_OP1
: {
5923 RC
= &AArch64::FPR128RegClass
;
5924 Register NewVR
= MRI
.createVirtualRegister(RC
);
5925 MachineInstrBuilder MIB1
=
5926 BuildMI(MF
, Root
.getDebugLoc(), TII
->get(AArch64::FNEGv8f16
), NewVR
)
5927 .add(Root
.getOperand(2));
5928 InsInstrs
.push_back(MIB1
);
5929 InstrIdxForVirtReg
.insert(std::make_pair(NewVR
, 0));
5930 if (Pattern
== MachineCombinerPattern::FMLSv8f16_OP1
) {
5931 Opc
= AArch64::FMLAv8f16
;
5932 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
,
5933 FMAInstKind::Accumulator
, &NewVR
);
5935 Opc
= AArch64::FMLAv8i16_indexed
;
5936 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
,
5937 FMAInstKind::Indexed
, &NewVR
);
5941 case MachineCombinerPattern::FMLSv8f16_OP2
:
5942 RC
= &AArch64::FPR128RegClass
;
5943 Opc
= AArch64::FMLSv8f16
;
5944 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
,
5945 FMAInstKind::Accumulator
);
5947 case MachineCombinerPattern::FMLSv8i16_indexed_OP2
:
5948 RC
= &AArch64::FPR128RegClass
;
5949 Opc
= AArch64::FMLSv8i16_indexed
;
5950 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
,
5951 FMAInstKind::Indexed
);
5954 case MachineCombinerPattern::FMLSv2f64_OP2
:
5955 case MachineCombinerPattern::FMLSv2i64_indexed_OP2
:
5956 RC
= &AArch64::FPR128RegClass
;
5957 if (Pattern
== MachineCombinerPattern::FMLSv2i64_indexed_OP2
) {
5958 Opc
= AArch64::FMLSv2i64_indexed
;
5959 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
,
5960 FMAInstKind::Indexed
);
5962 Opc
= AArch64::FMLSv2f64
;
5963 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
,
5964 FMAInstKind::Accumulator
);
5968 case MachineCombinerPattern::FMLSv4f32_OP2
:
5969 case MachineCombinerPattern::FMLSv4i32_indexed_OP2
:
5970 RC
= &AArch64::FPR128RegClass
;
5971 if (Pattern
== MachineCombinerPattern::FMLSv4i32_indexed_OP2
) {
5972 Opc
= AArch64::FMLSv4i32_indexed
;
5973 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
,
5974 FMAInstKind::Indexed
);
5976 Opc
= AArch64::FMLSv4f32
;
5977 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
,
5978 FMAInstKind::Accumulator
);
5981 case MachineCombinerPattern::FMLSv2f32_OP1
:
5982 case MachineCombinerPattern::FMLSv2i32_indexed_OP1
: {
5983 RC
= &AArch64::FPR64RegClass
;
5984 Register NewVR
= MRI
.createVirtualRegister(RC
);
5985 MachineInstrBuilder MIB1
=
5986 BuildMI(MF
, Root
.getDebugLoc(), TII
->get(AArch64::FNEGv2f32
), NewVR
)
5987 .add(Root
.getOperand(2));
5988 InsInstrs
.push_back(MIB1
);
5989 InstrIdxForVirtReg
.insert(std::make_pair(NewVR
, 0));
5990 if (Pattern
== MachineCombinerPattern::FMLSv2i32_indexed_OP1
) {
5991 Opc
= AArch64::FMLAv2i32_indexed
;
5992 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
,
5993 FMAInstKind::Indexed
, &NewVR
);
5995 Opc
= AArch64::FMLAv2f32
;
5996 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
,
5997 FMAInstKind::Accumulator
, &NewVR
);
6001 case MachineCombinerPattern::FMLSv4f32_OP1
:
6002 case MachineCombinerPattern::FMLSv4i32_indexed_OP1
: {
6003 RC
= &AArch64::FPR128RegClass
;
6004 Register NewVR
= MRI
.createVirtualRegister(RC
);
6005 MachineInstrBuilder MIB1
=
6006 BuildMI(MF
, Root
.getDebugLoc(), TII
->get(AArch64::FNEGv4f32
), NewVR
)
6007 .add(Root
.getOperand(2));
6008 InsInstrs
.push_back(MIB1
);
6009 InstrIdxForVirtReg
.insert(std::make_pair(NewVR
, 0));
6010 if (Pattern
== MachineCombinerPattern::FMLSv4i32_indexed_OP1
) {
6011 Opc
= AArch64::FMLAv4i32_indexed
;
6012 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
,
6013 FMAInstKind::Indexed
, &NewVR
);
6015 Opc
= AArch64::FMLAv4f32
;
6016 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
,
6017 FMAInstKind::Accumulator
, &NewVR
);
6021 case MachineCombinerPattern::FMLSv2f64_OP1
:
6022 case MachineCombinerPattern::FMLSv2i64_indexed_OP1
: {
6023 RC
= &AArch64::FPR128RegClass
;
6024 Register NewVR
= MRI
.createVirtualRegister(RC
);
6025 MachineInstrBuilder MIB1
=
6026 BuildMI(MF
, Root
.getDebugLoc(), TII
->get(AArch64::FNEGv2f64
), NewVR
)
6027 .add(Root
.getOperand(2));
6028 InsInstrs
.push_back(MIB1
);
6029 InstrIdxForVirtReg
.insert(std::make_pair(NewVR
, 0));
6030 if (Pattern
== MachineCombinerPattern::FMLSv2i64_indexed_OP1
) {
6031 Opc
= AArch64::FMLAv2i64_indexed
;
6032 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
,
6033 FMAInstKind::Indexed
, &NewVR
);
6035 Opc
= AArch64::FMLAv2f64
;
6036 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
,
6037 FMAInstKind::Accumulator
, &NewVR
);
6041 } // end switch (Pattern)
6042 // Record MUL and ADD/SUB for deletion
6043 // FIXME: This assertion fails in CodeGen/AArch64/tailmerging_in_mbp.ll and
6044 // CodeGen/AArch64/urem-seteq-nonzero.ll.
6045 // assert(MUL && "MUL was never set");
6046 DelInstrs
.push_back(MUL
);
6047 DelInstrs
.push_back(&Root
);
6050 /// Replace csincr-branch sequence by simple conditional branch
6054 /// csinc w9, wzr, wzr, <condition code>
6055 /// tbnz w9, #0, 0x44
6059 /// b.<inverted condition code>
6063 /// csinc w9, wzr, wzr, <condition code>
6064 /// tbz w9, #0, 0x44
6068 /// b.<condition code>
6071 /// Replace compare and branch sequence by TBZ/TBNZ instruction when the
6072 /// compare's constant operand is power of 2.
6076 /// and w8, w8, #0x400
6081 /// tbnz w8, #10, L1
6084 /// \param MI Conditional Branch
6085 /// \return True when the simple conditional branch is generated
6087 bool AArch64InstrInfo::optimizeCondBranch(MachineInstr
&MI
) const {
6088 bool IsNegativeBranch
= false;
6089 bool IsTestAndBranch
= false;
6090 unsigned TargetBBInMI
= 0;
6091 switch (MI
.getOpcode()) {
6093 llvm_unreachable("Unknown branch instruction?");
6100 case AArch64::CBNZW
:
6101 case AArch64::CBNZX
:
6103 IsNegativeBranch
= true;
6108 IsTestAndBranch
= true;
6110 case AArch64::TBNZW
:
6111 case AArch64::TBNZX
:
6113 IsNegativeBranch
= true;
6114 IsTestAndBranch
= true;
6117 // So we increment a zero register and test for bits other
6118 // than bit 0? Conservatively bail out in case the verifier
6119 // missed this case.
6120 if (IsTestAndBranch
&& MI
.getOperand(1).getImm())
6124 assert(MI
.getParent() && "Incomplete machine instruciton\n");
6125 MachineBasicBlock
*MBB
= MI
.getParent();
6126 MachineFunction
*MF
= MBB
->getParent();
6127 MachineRegisterInfo
*MRI
= &MF
->getRegInfo();
6128 Register VReg
= MI
.getOperand(0).getReg();
6129 if (!Register::isVirtualRegister(VReg
))
6132 MachineInstr
*DefMI
= MRI
->getVRegDef(VReg
);
6134 // Look through COPY instructions to find definition.
6135 while (DefMI
->isCopy()) {
6136 Register CopyVReg
= DefMI
->getOperand(1).getReg();
6137 if (!MRI
->hasOneNonDBGUse(CopyVReg
))
6139 if (!MRI
->hasOneDef(CopyVReg
))
6141 DefMI
= MRI
->getVRegDef(CopyVReg
);
6144 switch (DefMI
->getOpcode()) {
6147 // Fold AND into a TBZ/TBNZ if constant operand is power of 2.
6148 case AArch64::ANDWri
:
6149 case AArch64::ANDXri
: {
6150 if (IsTestAndBranch
)
6152 if (DefMI
->getParent() != MBB
)
6154 if (!MRI
->hasOneNonDBGUse(VReg
))
6157 bool Is32Bit
= (DefMI
->getOpcode() == AArch64::ANDWri
);
6158 uint64_t Mask
= AArch64_AM::decodeLogicalImmediate(
6159 DefMI
->getOperand(2).getImm(), Is32Bit
? 32 : 64);
6160 if (!isPowerOf2_64(Mask
))
6163 MachineOperand
&MO
= DefMI
->getOperand(1);
6164 Register NewReg
= MO
.getReg();
6165 if (!Register::isVirtualRegister(NewReg
))
6168 assert(!MRI
->def_empty(NewReg
) && "Register must be defined.");
6170 MachineBasicBlock
&RefToMBB
= *MBB
;
6171 MachineBasicBlock
*TBB
= MI
.getOperand(1).getMBB();
6172 DebugLoc DL
= MI
.getDebugLoc();
6173 unsigned Imm
= Log2_64(Mask
);
6174 unsigned Opc
= (Imm
< 32)
6175 ? (IsNegativeBranch
? AArch64::TBNZW
: AArch64::TBZW
)
6176 : (IsNegativeBranch
? AArch64::TBNZX
: AArch64::TBZX
);
6177 MachineInstr
*NewMI
= BuildMI(RefToMBB
, MI
, DL
, get(Opc
))
6181 // Register lives on to the CBZ now.
6182 MO
.setIsKill(false);
6184 // For immediate smaller than 32, we need to use the 32-bit
6185 // variant (W) in all cases. Indeed the 64-bit variant does not
6186 // allow to encode them.
6187 // Therefore, if the input register is 64-bit, we need to take the
6189 if (!Is32Bit
&& Imm
< 32)
6190 NewMI
->getOperand(0).setSubReg(AArch64::sub_32
);
6191 MI
.eraseFromParent();
6195 case AArch64::CSINCWr
:
6196 case AArch64::CSINCXr
: {
6197 if (!(DefMI
->getOperand(1).getReg() == AArch64::WZR
&&
6198 DefMI
->getOperand(2).getReg() == AArch64::WZR
) &&
6199 !(DefMI
->getOperand(1).getReg() == AArch64::XZR
&&
6200 DefMI
->getOperand(2).getReg() == AArch64::XZR
))
6203 if (DefMI
->findRegisterDefOperandIdx(AArch64::NZCV
, true) != -1)
6206 AArch64CC::CondCode CC
= (AArch64CC::CondCode
)DefMI
->getOperand(3).getImm();
6207 // Convert only when the condition code is not modified between
6208 // the CSINC and the branch. The CC may be used by other
6209 // instructions in between.
6210 if (areCFlagsAccessedBetweenInstrs(DefMI
, MI
, &getRegisterInfo(), AK_Write
))
6212 MachineBasicBlock
&RefToMBB
= *MBB
;
6213 MachineBasicBlock
*TBB
= MI
.getOperand(TargetBBInMI
).getMBB();
6214 DebugLoc DL
= MI
.getDebugLoc();
6215 if (IsNegativeBranch
)
6216 CC
= AArch64CC::getInvertedCondCode(CC
);
6217 BuildMI(RefToMBB
, MI
, DL
, get(AArch64::Bcc
)).addImm(CC
).addMBB(TBB
);
6218 MI
.eraseFromParent();
6224 std::pair
<unsigned, unsigned>
6225 AArch64InstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF
) const {
6226 const unsigned Mask
= AArch64II::MO_FRAGMENT
;
6227 return std::make_pair(TF
& Mask
, TF
& ~Mask
);
6230 ArrayRef
<std::pair
<unsigned, const char *>>
6231 AArch64InstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
6232 using namespace AArch64II
;
6234 static const std::pair
<unsigned, const char *> TargetFlags
[] = {
6235 {MO_PAGE
, "aarch64-page"}, {MO_PAGEOFF
, "aarch64-pageoff"},
6236 {MO_G3
, "aarch64-g3"}, {MO_G2
, "aarch64-g2"},
6237 {MO_G1
, "aarch64-g1"}, {MO_G0
, "aarch64-g0"},
6238 {MO_HI12
, "aarch64-hi12"}};
6239 return makeArrayRef(TargetFlags
);
6242 ArrayRef
<std::pair
<unsigned, const char *>>
6243 AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
6244 using namespace AArch64II
;
6246 static const std::pair
<unsigned, const char *> TargetFlags
[] = {
6247 {MO_COFFSTUB
, "aarch64-coffstub"},
6248 {MO_GOT
, "aarch64-got"},
6249 {MO_NC
, "aarch64-nc"},
6250 {MO_S
, "aarch64-s"},
6251 {MO_TLS
, "aarch64-tls"},
6252 {MO_DLLIMPORT
, "aarch64-dllimport"},
6253 {MO_PREL
, "aarch64-prel"},
6254 {MO_TAGGED
, "aarch64-tagged"}};
6255 return makeArrayRef(TargetFlags
);
6258 ArrayRef
<std::pair
<MachineMemOperand::Flags
, const char *>>
6259 AArch64InstrInfo::getSerializableMachineMemOperandTargetFlags() const {
6260 static const std::pair
<MachineMemOperand::Flags
, const char *> TargetFlags
[] =
6261 {{MOSuppressPair
, "aarch64-suppress-pair"},
6262 {MOStridedAccess
, "aarch64-strided-access"}};
6263 return makeArrayRef(TargetFlags
);
6266 /// Constants defining how certain sequences should be outlined.
6267 /// This encompasses how an outlined function should be called, and what kind of
6268 /// frame should be emitted for that outlined function.
6270 /// \p MachineOutlinerDefault implies that the function should be called with
6271 /// a save and restore of LR to the stack.
6275 /// I1 Save LR OUTLINED_FUNCTION:
6276 /// I2 --> BL OUTLINED_FUNCTION I1
6277 /// I3 Restore LR I2
6281 /// * Call construction overhead: 3 (save + BL + restore)
6282 /// * Frame construction overhead: 1 (ret)
6283 /// * Requires stack fixups? Yes
6285 /// \p MachineOutlinerTailCall implies that the function is being created from
6286 /// a sequence of instructions ending in a return.
6290 /// I1 OUTLINED_FUNCTION:
6291 /// I2 --> B OUTLINED_FUNCTION I1
6295 /// * Call construction overhead: 1 (B)
6296 /// * Frame construction overhead: 0 (Return included in sequence)
6297 /// * Requires stack fixups? No
6299 /// \p MachineOutlinerNoLRSave implies that the function should be called using
6300 /// a BL instruction, but doesn't require LR to be saved and restored. This
6301 /// happens when LR is known to be dead.
6305 /// I1 OUTLINED_FUNCTION:
6306 /// I2 --> BL OUTLINED_FUNCTION I1
6311 /// * Call construction overhead: 1 (BL)
6312 /// * Frame construction overhead: 1 (RET)
6313 /// * Requires stack fixups? No
6315 /// \p MachineOutlinerThunk implies that the function is being created from
6316 /// a sequence of instructions ending in a call. The outlined function is
6317 /// called with a BL instruction, and the outlined function tail-calls the
6318 /// original call destination.
6322 /// I1 OUTLINED_FUNCTION:
6323 /// I2 --> BL OUTLINED_FUNCTION I1
6326 /// * Call construction overhead: 1 (BL)
6327 /// * Frame construction overhead: 0
6328 /// * Requires stack fixups? No
6330 /// \p MachineOutlinerRegSave implies that the function should be called with a
6331 /// save and restore of LR to an available register. This allows us to avoid
6332 /// stack fixups. Note that this outlining variant is compatible with the
6337 /// I1 Save LR OUTLINED_FUNCTION:
6338 /// I2 --> BL OUTLINED_FUNCTION I1
6339 /// I3 Restore LR I2
6343 /// * Call construction overhead: 3 (save + BL + restore)
6344 /// * Frame construction overhead: 1 (ret)
6345 /// * Requires stack fixups? No
6346 enum MachineOutlinerClass
{
6347 MachineOutlinerDefault
, /// Emit a save, restore, call, and return.
6348 MachineOutlinerTailCall
, /// Only emit a branch.
6349 MachineOutlinerNoLRSave
, /// Emit a call and return.
6350 MachineOutlinerThunk
, /// Emit a call and tail-call.
6351 MachineOutlinerRegSave
/// Same as default, but save to a register.
6354 enum MachineOutlinerMBBFlags
{
6355 LRUnavailableSomewhere
= 0x2,
6357 UnsafeRegsDead
= 0x8
6361 AArch64InstrInfo::findRegisterToSaveLRTo(const outliner::Candidate
&C
) const {
6362 assert(C
.LRUWasSet
&& "LRU wasn't set?");
6363 MachineFunction
*MF
= C
.getMF();
6364 const AArch64RegisterInfo
*ARI
= static_cast<const AArch64RegisterInfo
*>(
6365 MF
->getSubtarget().getRegisterInfo());
6367 // Check if there is an available register across the sequence that we can
6369 for (unsigned Reg
: AArch64::GPR64RegClass
) {
6370 if (!ARI
->isReservedReg(*MF
, Reg
) &&
6371 Reg
!= AArch64::LR
&& // LR is not reserved, but don't use it.
6372 Reg
!= AArch64::X16
&& // X16 is not guaranteed to be preserved.
6373 Reg
!= AArch64::X17
&& // Ditto for X17.
6374 C
.LRU
.available(Reg
) && C
.UsedInSequence
.available(Reg
))
6378 // No suitable register. Return 0.
6383 outliningCandidatesSigningScopeConsensus(const outliner::Candidate
&a
,
6384 const outliner::Candidate
&b
) {
6385 const auto &MFIa
= a
.getMF()->getInfo
<AArch64FunctionInfo
>();
6386 const auto &MFIb
= b
.getMF()->getInfo
<AArch64FunctionInfo
>();
6388 return MFIa
->shouldSignReturnAddress(false) == MFIb
->shouldSignReturnAddress(false) &&
6389 MFIa
->shouldSignReturnAddress(true) == MFIb
->shouldSignReturnAddress(true);
6393 outliningCandidatesSigningKeyConsensus(const outliner::Candidate
&a
,
6394 const outliner::Candidate
&b
) {
6395 const auto &MFIa
= a
.getMF()->getInfo
<AArch64FunctionInfo
>();
6396 const auto &MFIb
= b
.getMF()->getInfo
<AArch64FunctionInfo
>();
6398 return MFIa
->shouldSignWithBKey() == MFIb
->shouldSignWithBKey();
6401 static bool outliningCandidatesV8_3OpsConsensus(const outliner::Candidate
&a
,
6402 const outliner::Candidate
&b
) {
6403 const AArch64Subtarget
&SubtargetA
=
6404 a
.getMF()->getSubtarget
<AArch64Subtarget
>();
6405 const AArch64Subtarget
&SubtargetB
=
6406 b
.getMF()->getSubtarget
<AArch64Subtarget
>();
6407 return SubtargetA
.hasV8_3aOps() == SubtargetB
.hasV8_3aOps();
6410 outliner::OutlinedFunction
AArch64InstrInfo::getOutliningCandidateInfo(
6411 std::vector
<outliner::Candidate
> &RepeatedSequenceLocs
) const {
6412 outliner::Candidate
&FirstCand
= RepeatedSequenceLocs
[0];
6413 unsigned SequenceSize
=
6414 std::accumulate(FirstCand
.front(), std::next(FirstCand
.back()), 0,
6415 [this](unsigned Sum
, const MachineInstr
&MI
) {
6416 return Sum
+ getInstSizeInBytes(MI
);
6418 unsigned NumBytesToCreateFrame
= 0;
6420 // We only allow outlining for functions having exactly matching return
6421 // address signing attributes, i.e., all share the same value for the
6422 // attribute "sign-return-address" and all share the same type of key they
6424 // Additionally we require all functions to simultaniously either support
6425 // v8.3a features or not. Otherwise an outlined function could get signed
6426 // using dedicated v8.3 instructions and a call from a function that doesn't
6427 // support v8.3 instructions would therefore be invalid.
6428 if (std::adjacent_find(
6429 RepeatedSequenceLocs
.begin(), RepeatedSequenceLocs
.end(),
6430 [](const outliner::Candidate
&a
, const outliner::Candidate
&b
) {
6431 // Return true if a and b are non-equal w.r.t. return address
6432 // signing or support of v8.3a features
6433 if (outliningCandidatesSigningScopeConsensus(a
, b
) &&
6434 outliningCandidatesSigningKeyConsensus(a
, b
) &&
6435 outliningCandidatesV8_3OpsConsensus(a
, b
)) {
6439 }) != RepeatedSequenceLocs
.end()) {
6440 return outliner::OutlinedFunction();
6443 // Since at this point all candidates agree on their return address signing
6444 // picking just one is fine. If the candidate functions potentially sign their
6445 // return addresses, the outlined function should do the same. Note that in
6446 // the case of "sign-return-address"="non-leaf" this is an assumption: It is
6447 // not certainly true that the outlined function will have to sign its return
6448 // address but this decision is made later, when the decision to outline
6449 // has already been made.
6450 // The same holds for the number of additional instructions we need: On
6451 // v8.3a RET can be replaced by RETAA/RETAB and no AUT instruction is
6452 // necessary. However, at this point we don't know if the outlined function
6453 // will have a RET instruction so we assume the worst.
6454 const TargetRegisterInfo
&TRI
= getRegisterInfo();
6455 if (FirstCand
.getMF()
6456 ->getInfo
<AArch64FunctionInfo
>()
6457 ->shouldSignReturnAddress(true)) {
6458 // One PAC and one AUT instructions
6459 NumBytesToCreateFrame
+= 8;
6461 // We have to check if sp modifying instructions would get outlined.
6462 // If so we only allow outlining if sp is unchanged overall, so matching
6463 // sub and add instructions are okay to outline, all other sp modifications
6465 auto hasIllegalSPModification
= [&TRI
](outliner::Candidate
&C
) {
6467 MachineBasicBlock::iterator MBBI
= C
.front();
6469 if (MBBI
->modifiesRegister(AArch64::SP
, &TRI
)) {
6470 switch (MBBI
->getOpcode()) {
6471 case AArch64::ADDXri
:
6472 case AArch64::ADDWri
:
6473 assert(MBBI
->getNumOperands() == 4 && "Wrong number of operands");
6474 assert(MBBI
->getOperand(2).isImm() &&
6475 "Expected operand to be immediate");
6476 assert(MBBI
->getOperand(1).isReg() &&
6477 "Expected operand to be a register");
6478 // Check if the add just increments sp. If so, we search for
6479 // matching sub instructions that decrement sp. If not, the
6480 // modification is illegal
6481 if (MBBI
->getOperand(1).getReg() == AArch64::SP
)
6482 SPValue
+= MBBI
->getOperand(2).getImm();
6486 case AArch64::SUBXri
:
6487 case AArch64::SUBWri
:
6488 assert(MBBI
->getNumOperands() == 4 && "Wrong number of operands");
6489 assert(MBBI
->getOperand(2).isImm() &&
6490 "Expected operand to be immediate");
6491 assert(MBBI
->getOperand(1).isReg() &&
6492 "Expected operand to be a register");
6493 // Check if the sub just decrements sp. If so, we search for
6494 // matching add instructions that increment sp. If not, the
6495 // modification is illegal
6496 if (MBBI
->getOperand(1).getReg() == AArch64::SP
)
6497 SPValue
-= MBBI
->getOperand(2).getImm();
6505 if (MBBI
== C
.back())
6513 // Remove candidates with illegal stack modifying instructions
6514 llvm::erase_if(RepeatedSequenceLocs
, hasIllegalSPModification
);
6516 // If the sequence doesn't have enough candidates left, then we're done.
6517 if (RepeatedSequenceLocs
.size() < 2)
6518 return outliner::OutlinedFunction();
6521 // Properties about candidate MBBs that hold for all of them.
6522 unsigned FlagsSetInAll
= 0xF;
6524 // Compute liveness information for each candidate, and set FlagsSetInAll.
6525 std::for_each(RepeatedSequenceLocs
.begin(), RepeatedSequenceLocs
.end(),
6526 [&FlagsSetInAll
](outliner::Candidate
&C
) {
6527 FlagsSetInAll
&= C
.Flags
;
6530 // According to the AArch64 Procedure Call Standard, the following are
6531 // undefined on entry/exit from a function call:
6533 // * Registers x16, x17, (and thus w16, w17)
6534 // * Condition codes (and thus the NZCV register)
6536 // Because if this, we can't outline any sequence of instructions where
6538 // of these registers is live into/across it. Thus, we need to delete
6541 auto CantGuaranteeValueAcrossCall
= [&TRI
](outliner::Candidate
&C
) {
6542 // If the unsafe registers in this block are all dead, then we don't need
6543 // to compute liveness here.
6544 if (C
.Flags
& UnsafeRegsDead
)
6547 LiveRegUnits LRU
= C
.LRU
;
6548 return (!LRU
.available(AArch64::W16
) || !LRU
.available(AArch64::W17
) ||
6549 !LRU
.available(AArch64::NZCV
));
6552 // Are there any candidates where those registers are live?
6553 if (!(FlagsSetInAll
& UnsafeRegsDead
)) {
6554 // Erase every candidate that violates the restrictions above. (It could be
6555 // true that we have viable candidates, so it's not worth bailing out in
6556 // the case that, say, 1 out of 20 candidates violate the restructions.)
6557 llvm::erase_if(RepeatedSequenceLocs
, CantGuaranteeValueAcrossCall
);
6559 // If the sequence doesn't have enough candidates left, then we're done.
6560 if (RepeatedSequenceLocs
.size() < 2)
6561 return outliner::OutlinedFunction();
6564 // At this point, we have only "safe" candidates to outline. Figure out
6565 // frame + call instruction information.
6567 unsigned LastInstrOpcode
= RepeatedSequenceLocs
[0].back()->getOpcode();
6569 // Helper lambda which sets call information for every candidate.
6570 auto SetCandidateCallInfo
=
6571 [&RepeatedSequenceLocs
](unsigned CallID
, unsigned NumBytesForCall
) {
6572 for (outliner::Candidate
&C
: RepeatedSequenceLocs
)
6573 C
.setCallInfo(CallID
, NumBytesForCall
);
6576 unsigned FrameID
= MachineOutlinerDefault
;
6577 NumBytesToCreateFrame
+= 4;
6579 bool HasBTI
= any_of(RepeatedSequenceLocs
, [](outliner::Candidate
&C
) {
6580 return C
.getMF()->getInfo
<AArch64FunctionInfo
>()->branchTargetEnforcement();
6583 // We check to see if CFI Instructions are present, and if they are
6584 // we find the number of CFI Instructions in the candidates.
6585 unsigned CFICount
= 0;
6586 MachineBasicBlock::iterator MBBI
= RepeatedSequenceLocs
[0].front();
6587 for (unsigned Loc
= RepeatedSequenceLocs
[0].getStartIdx();
6588 Loc
< RepeatedSequenceLocs
[0].getEndIdx() + 1; Loc
++) {
6589 const std::vector
<MCCFIInstruction
> &CFIInstructions
=
6590 RepeatedSequenceLocs
[0].getMF()->getFrameInstructions();
6591 if (MBBI
->isCFIInstruction()) {
6592 unsigned CFIIndex
= MBBI
->getOperand(0).getCFIIndex();
6593 MCCFIInstruction CFI
= CFIInstructions
[CFIIndex
];
6599 // We compare the number of found CFI Instructions to the number of CFI
6600 // instructions in the parent function for each candidate. We must check this
6601 // since if we outline one of the CFI instructions in a function, we have to
6602 // outline them all for correctness. If we do not, the address offsets will be
6603 // incorrect between the two sections of the program.
6604 for (outliner::Candidate
&C
: RepeatedSequenceLocs
) {
6605 std::vector
<MCCFIInstruction
> CFIInstructions
=
6606 C
.getMF()->getFrameInstructions();
6608 if (CFICount
> 0 && CFICount
!= CFIInstructions
.size())
6609 return outliner::OutlinedFunction();
6612 // Returns true if an instructions is safe to fix up, false otherwise.
6613 auto IsSafeToFixup
= [this, &TRI
](MachineInstr
&MI
) {
6617 if (!MI
.modifiesRegister(AArch64::SP
, &TRI
) &&
6618 !MI
.readsRegister(AArch64::SP
, &TRI
))
6621 // Any modification of SP will break our code to save/restore LR.
6622 // FIXME: We could handle some instructions which add a constant
6623 // offset to SP, with a bit more work.
6624 if (MI
.modifiesRegister(AArch64::SP
, &TRI
))
6627 // At this point, we have a stack instruction that we might need to
6628 // fix up. We'll handle it if it's a load or store.
6629 if (MI
.mayLoadOrStore()) {
6630 const MachineOperand
*Base
; // Filled with the base operand of MI.
6631 int64_t Offset
; // Filled with the offset of MI.
6632 bool OffsetIsScalable
;
6634 // Does it allow us to offset the base operand and is the base the
6636 if (!getMemOperandWithOffset(MI
, Base
, Offset
, OffsetIsScalable
, &TRI
) ||
6637 !Base
->isReg() || Base
->getReg() != AArch64::SP
)
6640 // Fixe-up code below assumes bytes.
6641 if (OffsetIsScalable
)
6644 // Find the minimum/maximum offset for this instruction and check
6645 // if fixing it up would be in range.
6647 MaxOffset
; // Unscaled offsets for the instruction.
6648 TypeSize
Scale(0U, false); // The scale to multiply the offsets by.
6649 unsigned DummyWidth
;
6650 getMemOpInfo(MI
.getOpcode(), Scale
, DummyWidth
, MinOffset
, MaxOffset
);
6652 Offset
+= 16; // Update the offset to what it would be if we outlined.
6653 if (Offset
< MinOffset
* (int64_t)Scale
.getFixedSize() ||
6654 Offset
> MaxOffset
* (int64_t)Scale
.getFixedSize())
6657 // It's in range, so we can outline it.
6661 // FIXME: Add handling for instructions like "add x0, sp, #8".
6663 // We can't fix it up, so don't outline it.
6667 // True if it's possible to fix up each stack instruction in this sequence.
6668 // Important for frames/call variants that modify the stack.
6669 bool AllStackInstrsSafe
= std::all_of(
6670 FirstCand
.front(), std::next(FirstCand
.back()), IsSafeToFixup
);
6672 // If the last instruction in any candidate is a terminator, then we should
6673 // tail call all of the candidates.
6674 if (RepeatedSequenceLocs
[0].back()->isTerminator()) {
6675 FrameID
= MachineOutlinerTailCall
;
6676 NumBytesToCreateFrame
= 0;
6677 SetCandidateCallInfo(MachineOutlinerTailCall
, 4);
6680 else if (LastInstrOpcode
== AArch64::BL
||
6681 ((LastInstrOpcode
== AArch64::BLR
||
6682 LastInstrOpcode
== AArch64::BLRNoIP
) &&
6684 // FIXME: Do we need to check if the code after this uses the value of LR?
6685 FrameID
= MachineOutlinerThunk
;
6686 NumBytesToCreateFrame
= 0;
6687 SetCandidateCallInfo(MachineOutlinerThunk
, 4);
6691 // We need to decide how to emit calls + frames. We can always emit the same
6692 // frame if we don't need to save to the stack. If we have to save to the
6693 // stack, then we need a different frame.
6694 unsigned NumBytesNoStackCalls
= 0;
6695 std::vector
<outliner::Candidate
> CandidatesWithoutStackFixups
;
6697 // Check if we have to save LR.
6698 for (outliner::Candidate
&C
: RepeatedSequenceLocs
) {
6701 // If we have a noreturn caller, then we're going to be conservative and
6702 // say that we have to save LR. If we don't have a ret at the end of the
6703 // block, then we can't reason about liveness accurately.
6705 // FIXME: We can probably do better than always disabling this in
6706 // noreturn functions by fixing up the liveness info.
6708 C
.getMF()->getFunction().hasFnAttribute(Attribute::NoReturn
);
6710 // Is LR available? If so, we don't need a save.
6711 if (C
.LRU
.available(AArch64::LR
) && !IsNoReturn
) {
6712 NumBytesNoStackCalls
+= 4;
6713 C
.setCallInfo(MachineOutlinerNoLRSave
, 4);
6714 CandidatesWithoutStackFixups
.push_back(C
);
6717 // Is an unused register available? If so, we won't modify the stack, so
6718 // we can outline with the same frame type as those that don't save LR.
6719 else if (findRegisterToSaveLRTo(C
)) {
6720 NumBytesNoStackCalls
+= 12;
6721 C
.setCallInfo(MachineOutlinerRegSave
, 12);
6722 CandidatesWithoutStackFixups
.push_back(C
);
6725 // Is SP used in the sequence at all? If not, we don't have to modify
6726 // the stack, so we are guaranteed to get the same frame.
6727 else if (C
.UsedInSequence
.available(AArch64::SP
)) {
6728 NumBytesNoStackCalls
+= 12;
6729 C
.setCallInfo(MachineOutlinerDefault
, 12);
6730 CandidatesWithoutStackFixups
.push_back(C
);
6733 // If we outline this, we need to modify the stack. Pretend we don't
6734 // outline this by saving all of its bytes.
6736 NumBytesNoStackCalls
+= SequenceSize
;
6740 // If there are no places where we have to save LR, then note that we
6741 // don't have to update the stack. Otherwise, give every candidate the
6742 // default call type, as long as it's safe to do so.
6743 if (!AllStackInstrsSafe
||
6744 NumBytesNoStackCalls
<= RepeatedSequenceLocs
.size() * 12) {
6745 RepeatedSequenceLocs
= CandidatesWithoutStackFixups
;
6746 FrameID
= MachineOutlinerNoLRSave
;
6748 SetCandidateCallInfo(MachineOutlinerDefault
, 12);
6750 // Bugzilla ID: 46767
6751 // TODO: Check if fixing up the stack more than once is safe so we can
6754 // An outline resulting in a caller that requires stack fixups at the
6755 // callsite to a callee that also requires stack fixups can happen when
6756 // there are no available registers at the candidate callsite for a
6757 // candidate that itself also has calls.
6759 // In other words if function_containing_sequence in the following pseudo
6760 // assembly requires that we save LR at the point of the call, but there
6761 // are no available registers: in this case we save using SP and as a
6762 // result the SP offsets requires stack fixups by multiples of 16.
6764 // function_containing_sequence:
6766 // save LR to SP <- Requires stack instr fixups in OUTLINED_FUNCTION_N
6767 // call OUTLINED_FUNCTION_N
6768 // restore LR from SP
6771 // OUTLINED_FUNCTION_N:
6772 // save LR to SP <- Requires stack instr fixups in OUTLINED_FUNCTION_N
6775 // restore LR from SP
6778 // Because the code to handle more than one stack fixup does not
6779 // currently have the proper checks for legality, these cases will assert
6780 // in the AArch64 MachineOutliner. This is because the code to do this
6781 // needs more hardening, testing, better checks that generated code is
6782 // legal, etc and because it is only verified to handle a single pass of
6785 // The assert happens in AArch64InstrInfo::buildOutlinedFrame to catch
6786 // these cases until they are known to be handled. Bugzilla 46767 is
6787 // referenced in comments at the assert site.
6789 // To avoid asserting (or generating non-legal code on noassert builds)
6790 // we remove all candidates which would need more than one stack fixup by
6791 // pruning the cases where the candidate has calls while also having no
6792 // available LR and having no available general purpose registers to copy
6793 // LR to (ie one extra stack save/restore).
6795 if (FlagsSetInAll
& MachineOutlinerMBBFlags::HasCalls
) {
6796 erase_if(RepeatedSequenceLocs
, [this](outliner::Candidate
&C
) {
6797 return (std::any_of(
6798 C
.front(), std::next(C
.back()),
6799 [](const MachineInstr
&MI
) { return MI
.isCall(); })) &&
6800 (!C
.LRU
.available(AArch64::LR
) || !findRegisterToSaveLRTo(C
));
6805 // If we dropped all of the candidates, bail out here.
6806 if (RepeatedSequenceLocs
.size() < 2) {
6807 RepeatedSequenceLocs
.clear();
6808 return outliner::OutlinedFunction();
6812 // Does every candidate's MBB contain a call? If so, then we might have a call
6814 if (FlagsSetInAll
& MachineOutlinerMBBFlags::HasCalls
) {
6815 // Check if the range contains a call. These require a save + restore of the
6817 bool ModStackToSaveLR
= false;
6818 if (std::any_of(FirstCand
.front(), FirstCand
.back(),
6819 [](const MachineInstr
&MI
) { return MI
.isCall(); }))
6820 ModStackToSaveLR
= true;
6822 // Handle the last instruction separately. If this is a tail call, then the
6823 // last instruction is a call. We don't want to save + restore in this case.
6824 // However, it could be possible that the last instruction is a call without
6825 // it being valid to tail call this sequence. We should consider this as
6827 else if (FrameID
!= MachineOutlinerThunk
&&
6828 FrameID
!= MachineOutlinerTailCall
&& FirstCand
.back()->isCall())
6829 ModStackToSaveLR
= true;
6831 if (ModStackToSaveLR
) {
6832 // We can't fix up the stack. Bail out.
6833 if (!AllStackInstrsSafe
) {
6834 RepeatedSequenceLocs
.clear();
6835 return outliner::OutlinedFunction();
6838 // Save + restore LR.
6839 NumBytesToCreateFrame
+= 8;
6843 // If we have CFI instructions, we can only outline if the outlined section
6844 // can be a tail call
6845 if (FrameID
!= MachineOutlinerTailCall
&& CFICount
> 0)
6846 return outliner::OutlinedFunction();
6848 return outliner::OutlinedFunction(RepeatedSequenceLocs
, SequenceSize
,
6849 NumBytesToCreateFrame
, FrameID
);
6852 bool AArch64InstrInfo::isFunctionSafeToOutlineFrom(
6853 MachineFunction
&MF
, bool OutlineFromLinkOnceODRs
) const {
6854 const Function
&F
= MF
.getFunction();
6856 // Can F be deduplicated by the linker? If it can, don't outline from it.
6857 if (!OutlineFromLinkOnceODRs
&& F
.hasLinkOnceODRLinkage())
6860 // Don't outline from functions with section markings; the program could
6861 // expect that all the code is in the named section.
6862 // FIXME: Allow outlining from multiple functions with the same section
6867 // Outlining from functions with redzones is unsafe since the outliner may
6868 // modify the stack. Check if hasRedZone is true or unknown; if yes, don't
6870 AArch64FunctionInfo
*AFI
= MF
.getInfo
<AArch64FunctionInfo
>();
6871 if (!AFI
|| AFI
->hasRedZone().getValueOr(true))
6874 // FIXME: Teach the outliner to generate/handle Windows unwind info.
6875 if (MF
.getTarget().getMCAsmInfo()->usesWindowsCFI())
6878 // It's safe to outline from MF.
6882 bool AArch64InstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock
&MBB
,
6883 unsigned &Flags
) const {
6884 // Check if LR is available through all of the MBB. If it's not, then set
6886 assert(MBB
.getParent()->getRegInfo().tracksLiveness() &&
6887 "Suitable Machine Function for outlining must track liveness");
6888 LiveRegUnits
LRU(getRegisterInfo());
6890 std::for_each(MBB
.rbegin(), MBB
.rend(),
6891 [&LRU
](MachineInstr
&MI
) { LRU
.accumulate(MI
); });
6893 // Check if each of the unsafe registers are available...
6894 bool W16AvailableInBlock
= LRU
.available(AArch64::W16
);
6895 bool W17AvailableInBlock
= LRU
.available(AArch64::W17
);
6896 bool NZCVAvailableInBlock
= LRU
.available(AArch64::NZCV
);
6898 // If all of these are dead (and not live out), we know we don't have to check
6900 if (W16AvailableInBlock
&& W17AvailableInBlock
&& NZCVAvailableInBlock
)
6901 Flags
|= MachineOutlinerMBBFlags::UnsafeRegsDead
;
6903 // Now, add the live outs to the set.
6904 LRU
.addLiveOuts(MBB
);
6906 // If any of these registers is available in the MBB, but also a live out of
6907 // the block, then we know outlining is unsafe.
6908 if (W16AvailableInBlock
&& !LRU
.available(AArch64::W16
))
6910 if (W17AvailableInBlock
&& !LRU
.available(AArch64::W17
))
6912 if (NZCVAvailableInBlock
&& !LRU
.available(AArch64::NZCV
))
6915 // Check if there's a call inside this MachineBasicBlock. If there is, then
6917 if (any_of(MBB
, [](MachineInstr
&MI
) { return MI
.isCall(); }))
6918 Flags
|= MachineOutlinerMBBFlags::HasCalls
;
6920 MachineFunction
*MF
= MBB
.getParent();
6922 // In the event that we outline, we may have to save LR. If there is an
6923 // available register in the MBB, then we'll always save LR there. Check if
6925 bool CanSaveLR
= false;
6926 const AArch64RegisterInfo
*ARI
= static_cast<const AArch64RegisterInfo
*>(
6927 MF
->getSubtarget().getRegisterInfo());
6929 // Check if there is an available register across the sequence that we can
6931 for (unsigned Reg
: AArch64::GPR64RegClass
) {
6932 if (!ARI
->isReservedReg(*MF
, Reg
) && Reg
!= AArch64::LR
&&
6933 Reg
!= AArch64::X16
&& Reg
!= AArch64::X17
&& LRU
.available(Reg
)) {
6939 // Check if we have a register we can save LR to, and if LR was used
6940 // somewhere. If both of those things are true, then we need to evaluate the
6941 // safety of outlining stack instructions later.
6942 if (!CanSaveLR
&& !LRU
.available(AArch64::LR
))
6943 Flags
|= MachineOutlinerMBBFlags::LRUnavailableSomewhere
;
6949 AArch64InstrInfo::getOutliningType(MachineBasicBlock::iterator
&MIT
,
6950 unsigned Flags
) const {
6951 MachineInstr
&MI
= *MIT
;
6952 MachineBasicBlock
*MBB
= MI
.getParent();
6953 MachineFunction
*MF
= MBB
->getParent();
6954 AArch64FunctionInfo
*FuncInfo
= MF
->getInfo
<AArch64FunctionInfo
>();
6956 // Don't outline anything used for return address signing. The outlined
6957 // function will get signed later if needed
6958 switch (MI
.getOpcode()) {
6959 case AArch64::PACIASP
:
6960 case AArch64::PACIBSP
:
6961 case AArch64::AUTIASP
:
6962 case AArch64::AUTIBSP
:
6963 case AArch64::RETAA
:
6964 case AArch64::RETAB
:
6965 case AArch64::EMITBKEY
:
6966 return outliner::InstrType::Illegal
;
6969 // Don't outline LOHs.
6970 if (FuncInfo
->getLOHRelated().count(&MI
))
6971 return outliner::InstrType::Illegal
;
6973 // We can only outline these if we will tail call the outlined function, or
6974 // fix up the CFI offsets. Currently, CFI instructions are outlined only if
6977 // FIXME: If the proper fixups for the offset are implemented, this should be
6979 if (MI
.isCFIInstruction())
6980 return outliner::InstrType::Legal
;
6982 // Don't allow debug values to impact outlining type.
6983 if (MI
.isDebugInstr() || MI
.isIndirectDebugValue())
6984 return outliner::InstrType::Invisible
;
6986 // At this point, KILL instructions don't really tell us much so we can go
6987 // ahead and skip over them.
6989 return outliner::InstrType::Invisible
;
6991 // Is this a terminator for a basic block?
6992 if (MI
.isTerminator()) {
6994 // Is this the end of a function?
6995 if (MI
.getParent()->succ_empty())
6996 return outliner::InstrType::Legal
;
6998 // It's not, so don't outline it.
6999 return outliner::InstrType::Illegal
;
7002 // Make sure none of the operands are un-outlinable.
7003 for (const MachineOperand
&MOP
: MI
.operands()) {
7004 if (MOP
.isCPI() || MOP
.isJTI() || MOP
.isCFIIndex() || MOP
.isFI() ||
7005 MOP
.isTargetIndex())
7006 return outliner::InstrType::Illegal
;
7008 // If it uses LR or W30 explicitly, then don't touch it.
7009 if (MOP
.isReg() && !MOP
.isImplicit() &&
7010 (MOP
.getReg() == AArch64::LR
|| MOP
.getReg() == AArch64::W30
))
7011 return outliner::InstrType::Illegal
;
7014 // Special cases for instructions that can always be outlined, but will fail
7015 // the later tests. e.g, ADRPs, which are PC-relative use LR, but can always
7016 // be outlined because they don't require a *specific* value to be in LR.
7017 if (MI
.getOpcode() == AArch64::ADRP
)
7018 return outliner::InstrType::Legal
;
7020 // If MI is a call we might be able to outline it. We don't want to outline
7021 // any calls that rely on the position of items on the stack. When we outline
7022 // something containing a call, we have to emit a save and restore of LR in
7023 // the outlined function. Currently, this always happens by saving LR to the
7024 // stack. Thus, if we outline, say, half the parameters for a function call
7025 // plus the call, then we'll break the callee's expectations for the layout
7028 // FIXME: Allow calls to functions which construct a stack frame, as long
7029 // as they don't access arguments on the stack.
7030 // FIXME: Figure out some way to analyze functions defined in other modules.
7031 // We should be able to compute the memory usage based on the IR calling
7032 // convention, even if we can't see the definition.
7034 // Get the function associated with the call. Look at each operand and find
7035 // the one that represents the callee and get its name.
7036 const Function
*Callee
= nullptr;
7037 for (const MachineOperand
&MOP
: MI
.operands()) {
7038 if (MOP
.isGlobal()) {
7039 Callee
= dyn_cast
<Function
>(MOP
.getGlobal());
7044 // Never outline calls to mcount. There isn't any rule that would require
7045 // this, but the Linux kernel's "ftrace" feature depends on it.
7046 if (Callee
&& Callee
->getName() == "\01_mcount")
7047 return outliner::InstrType::Illegal
;
7049 // If we don't know anything about the callee, assume it depends on the
7050 // stack layout of the caller. In that case, it's only legal to outline
7051 // as a tail-call. Explicitly list the call instructions we know about so we
7052 // don't get unexpected results with call pseudo-instructions.
7053 auto UnknownCallOutlineType
= outliner::InstrType::Illegal
;
7054 if (MI
.getOpcode() == AArch64::BLR
||
7055 MI
.getOpcode() == AArch64::BLRNoIP
|| MI
.getOpcode() == AArch64::BL
)
7056 UnknownCallOutlineType
= outliner::InstrType::LegalTerminator
;
7059 return UnknownCallOutlineType
;
7061 // We have a function we have information about. Check it if it's something
7062 // can safely outline.
7063 MachineFunction
*CalleeMF
= MF
->getMMI().getMachineFunction(*Callee
);
7065 // We don't know what's going on with the callee at all. Don't touch it.
7067 return UnknownCallOutlineType
;
7069 // Check if we know anything about the callee saves on the function. If we
7070 // don't, then don't touch it, since that implies that we haven't
7071 // computed anything about its stack frame yet.
7072 MachineFrameInfo
&MFI
= CalleeMF
->getFrameInfo();
7073 if (!MFI
.isCalleeSavedInfoValid() || MFI
.getStackSize() > 0 ||
7074 MFI
.getNumObjects() > 0)
7075 return UnknownCallOutlineType
;
7077 // At this point, we can say that CalleeMF ought to not pass anything on the
7078 // stack. Therefore, we can outline it.
7079 return outliner::InstrType::Legal
;
7082 // Don't outline positions.
7083 if (MI
.isPosition())
7084 return outliner::InstrType::Illegal
;
7086 // Don't touch the link register or W30.
7087 if (MI
.readsRegister(AArch64::W30
, &getRegisterInfo()) ||
7088 MI
.modifiesRegister(AArch64::W30
, &getRegisterInfo()))
7089 return outliner::InstrType::Illegal
;
7091 // Don't outline BTI instructions, because that will prevent the outlining
7092 // site from being indirectly callable.
7093 if (MI
.getOpcode() == AArch64::HINT
) {
7094 int64_t Imm
= MI
.getOperand(0).getImm();
7095 if (Imm
== 32 || Imm
== 34 || Imm
== 36 || Imm
== 38)
7096 return outliner::InstrType::Illegal
;
7099 return outliner::InstrType::Legal
;
7102 void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock
&MBB
) const {
7103 for (MachineInstr
&MI
: MBB
) {
7104 const MachineOperand
*Base
;
7107 bool OffsetIsScalable
;
7109 // Is this a load or store with an immediate offset with SP as the base?
7110 if (!MI
.mayLoadOrStore() ||
7111 !getMemOperandWithOffsetWidth(MI
, Base
, Offset
, OffsetIsScalable
, Width
,
7113 (Base
->isReg() && Base
->getReg() != AArch64::SP
))
7116 // It is, so we have to fix it up.
7117 TypeSize
Scale(0U, false);
7118 int64_t Dummy1
, Dummy2
;
7120 MachineOperand
&StackOffsetOperand
= getMemOpBaseRegImmOfsOffsetOperand(MI
);
7121 assert(StackOffsetOperand
.isImm() && "Stack offset wasn't immediate!");
7122 getMemOpInfo(MI
.getOpcode(), Scale
, Width
, Dummy1
, Dummy2
);
7123 assert(Scale
!= 0 && "Unexpected opcode!");
7124 assert(!OffsetIsScalable
&& "Expected offset to be a byte offset");
7126 // We've pushed the return address to the stack, so add 16 to the offset.
7127 // This is safe, since we already checked if it would overflow when we
7128 // checked if this instruction was legal to outline.
7129 int64_t NewImm
= (Offset
+ 16) / (int64_t)Scale
.getFixedSize();
7130 StackOffsetOperand
.setImm(NewImm
);
7134 static void signOutlinedFunction(MachineFunction
&MF
, MachineBasicBlock
&MBB
,
7135 bool ShouldSignReturnAddr
,
7136 bool ShouldSignReturnAddrWithAKey
) {
7137 if (ShouldSignReturnAddr
) {
7138 MachineBasicBlock::iterator MBBPAC
= MBB
.begin();
7139 MachineBasicBlock::iterator MBBAUT
= MBB
.getFirstTerminator();
7140 const AArch64Subtarget
&Subtarget
= MF
.getSubtarget
<AArch64Subtarget
>();
7141 const TargetInstrInfo
*TII
= Subtarget
.getInstrInfo();
7144 if (MBBAUT
!= MBB
.end())
7145 DL
= MBBAUT
->getDebugLoc();
7147 // At the very beginning of the basic block we insert the following
7148 // depending on the key type
7152 // CFI_INSTRUCTION PACIBSP
7155 if (ShouldSignReturnAddrWithAKey
) {
7156 PACI
= Subtarget
.hasPAuth() ? AArch64::PACIA
: AArch64::PACIASP
;
7158 BuildMI(MBB
, MBBPAC
, DebugLoc(), TII
->get(AArch64::EMITBKEY
))
7159 .setMIFlag(MachineInstr::FrameSetup
);
7160 PACI
= Subtarget
.hasPAuth() ? AArch64::PACIB
: AArch64::PACIBSP
;
7163 auto MI
= BuildMI(MBB
, MBBPAC
, DebugLoc(), TII
->get(PACI
));
7164 if (Subtarget
.hasPAuth())
7165 MI
.addReg(AArch64::LR
, RegState::Define
)
7166 .addReg(AArch64::LR
)
7167 .addReg(AArch64::SP
, RegState::InternalRead
);
7168 MI
.setMIFlag(MachineInstr::FrameSetup
);
7171 MF
.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr));
7172 BuildMI(MBB
, MBBPAC
, DebugLoc(), TII
->get(AArch64::CFI_INSTRUCTION
))
7173 .addCFIIndex(CFIIndex
)
7174 .setMIFlags(MachineInstr::FrameSetup
);
7176 // If v8.3a features are available we can replace a RET instruction by
7177 // RETAA or RETAB and omit the AUT instructions
7178 if (Subtarget
.hasPAuth() && MBBAUT
!= MBB
.end() &&
7179 MBBAUT
->getOpcode() == AArch64::RET
) {
7180 BuildMI(MBB
, MBBAUT
, DL
,
7181 TII
->get(ShouldSignReturnAddrWithAKey
? AArch64::RETAA
7183 .copyImplicitOps(*MBBAUT
);
7186 BuildMI(MBB
, MBBAUT
, DL
,
7187 TII
->get(ShouldSignReturnAddrWithAKey
? AArch64::AUTIASP
7188 : AArch64::AUTIBSP
))
7189 .setMIFlag(MachineInstr::FrameDestroy
);
7194 void AArch64InstrInfo::buildOutlinedFrame(
7195 MachineBasicBlock
&MBB
, MachineFunction
&MF
,
7196 const outliner::OutlinedFunction
&OF
) const {
7198 AArch64FunctionInfo
*FI
= MF
.getInfo
<AArch64FunctionInfo
>();
7200 if (OF
.FrameConstructionID
== MachineOutlinerTailCall
)
7201 FI
->setOutliningStyle("Tail Call");
7202 else if (OF
.FrameConstructionID
== MachineOutlinerThunk
) {
7203 // For thunk outlining, rewrite the last instruction from a call to a
7205 MachineInstr
*Call
= &*--MBB
.instr_end();
7206 unsigned TailOpcode
;
7207 if (Call
->getOpcode() == AArch64::BL
) {
7208 TailOpcode
= AArch64::TCRETURNdi
;
7210 assert(Call
->getOpcode() == AArch64::BLR
||
7211 Call
->getOpcode() == AArch64::BLRNoIP
);
7212 TailOpcode
= AArch64::TCRETURNriALL
;
7214 MachineInstr
*TC
= BuildMI(MF
, DebugLoc(), get(TailOpcode
))
7215 .add(Call
->getOperand(0))
7217 MBB
.insert(MBB
.end(), TC
);
7218 Call
->eraseFromParent();
7220 FI
->setOutliningStyle("Thunk");
7223 bool IsLeafFunction
= true;
7225 // Is there a call in the outlined range?
7226 auto IsNonTailCall
= [](const MachineInstr
&MI
) {
7227 return MI
.isCall() && !MI
.isReturn();
7230 if (llvm::any_of(MBB
.instrs(), IsNonTailCall
)) {
7231 // Fix up the instructions in the range, since we're going to modify the
7234 // Bugzilla ID: 46767
7235 // TODO: Check if fixing up twice is safe so we can outline these.
7236 assert(OF
.FrameConstructionID
!= MachineOutlinerDefault
&&
7237 "Can only fix up stack references once");
7238 fixupPostOutline(MBB
);
7240 IsLeafFunction
= false;
7242 // LR has to be a live in so that we can save it.
7243 if (!MBB
.isLiveIn(AArch64::LR
))
7244 MBB
.addLiveIn(AArch64::LR
);
7246 MachineBasicBlock::iterator It
= MBB
.begin();
7247 MachineBasicBlock::iterator Et
= MBB
.end();
7249 if (OF
.FrameConstructionID
== MachineOutlinerTailCall
||
7250 OF
.FrameConstructionID
== MachineOutlinerThunk
)
7251 Et
= std::prev(MBB
.end());
7253 // Insert a save before the outlined region
7254 MachineInstr
*STRXpre
= BuildMI(MF
, DebugLoc(), get(AArch64::STRXpre
))
7255 .addReg(AArch64::SP
, RegState::Define
)
7256 .addReg(AArch64::LR
)
7257 .addReg(AArch64::SP
)
7259 It
= MBB
.insert(It
, STRXpre
);
7261 const TargetSubtargetInfo
&STI
= MF
.getSubtarget();
7262 const MCRegisterInfo
*MRI
= STI
.getRegisterInfo();
7263 unsigned DwarfReg
= MRI
->getDwarfRegNum(AArch64::LR
, true);
7265 // Add a CFI saying the stack was moved 16 B down.
7266 int64_t StackPosEntry
=
7267 MF
.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 16));
7268 BuildMI(MBB
, It
, DebugLoc(), get(AArch64::CFI_INSTRUCTION
))
7269 .addCFIIndex(StackPosEntry
)
7270 .setMIFlags(MachineInstr::FrameSetup
);
7272 // Add a CFI saying that the LR that we want to find is now 16 B higher than
7274 int64_t LRPosEntry
=
7275 MF
.addFrameInst(MCCFIInstruction::createOffset(nullptr, DwarfReg
, -16));
7276 BuildMI(MBB
, It
, DebugLoc(), get(AArch64::CFI_INSTRUCTION
))
7277 .addCFIIndex(LRPosEntry
)
7278 .setMIFlags(MachineInstr::FrameSetup
);
7280 // Insert a restore before the terminator for the function.
7281 MachineInstr
*LDRXpost
= BuildMI(MF
, DebugLoc(), get(AArch64::LDRXpost
))
7282 .addReg(AArch64::SP
, RegState::Define
)
7283 .addReg(AArch64::LR
, RegState::Define
)
7284 .addReg(AArch64::SP
)
7286 Et
= MBB
.insert(Et
, LDRXpost
);
7289 // If a bunch of candidates reach this point they must agree on their return
7290 // address signing. It is therefore enough to just consider the signing
7291 // behaviour of one of them
7292 const auto &MFI
= *OF
.Candidates
.front().getMF()->getInfo
<AArch64FunctionInfo
>();
7293 bool ShouldSignReturnAddr
= MFI
.shouldSignReturnAddress(!IsLeafFunction
);
7295 // a_key is the default
7296 bool ShouldSignReturnAddrWithAKey
= !MFI
.shouldSignWithBKey();
7298 // If this is a tail call outlined function, then there's already a return.
7299 if (OF
.FrameConstructionID
== MachineOutlinerTailCall
||
7300 OF
.FrameConstructionID
== MachineOutlinerThunk
) {
7301 signOutlinedFunction(MF
, MBB
, ShouldSignReturnAddr
,
7302 ShouldSignReturnAddrWithAKey
);
7306 // It's not a tail call, so we have to insert the return ourselves.
7308 // LR has to be a live in so that we can return to it.
7309 if (!MBB
.isLiveIn(AArch64::LR
))
7310 MBB
.addLiveIn(AArch64::LR
);
7312 MachineInstr
*ret
= BuildMI(MF
, DebugLoc(), get(AArch64::RET
))
7313 .addReg(AArch64::LR
);
7314 MBB
.insert(MBB
.end(), ret
);
7316 signOutlinedFunction(MF
, MBB
, ShouldSignReturnAddr
,
7317 ShouldSignReturnAddrWithAKey
);
7319 FI
->setOutliningStyle("Function");
7321 // Did we have to modify the stack by saving the link register?
7322 if (OF
.FrameConstructionID
!= MachineOutlinerDefault
)
7325 // We modified the stack.
7326 // Walk over the basic block and fix up all the stack accesses.
7327 fixupPostOutline(MBB
);
7330 MachineBasicBlock::iterator
AArch64InstrInfo::insertOutlinedCall(
7331 Module
&M
, MachineBasicBlock
&MBB
, MachineBasicBlock::iterator
&It
,
7332 MachineFunction
&MF
, const outliner::Candidate
&C
) const {
7334 // Are we tail calling?
7335 if (C
.CallConstructionID
== MachineOutlinerTailCall
) {
7336 // If yes, then we can just branch to the label.
7337 It
= MBB
.insert(It
, BuildMI(MF
, DebugLoc(), get(AArch64::TCRETURNdi
))
7338 .addGlobalAddress(M
.getNamedValue(MF
.getName()))
7343 // Are we saving the link register?
7344 if (C
.CallConstructionID
== MachineOutlinerNoLRSave
||
7345 C
.CallConstructionID
== MachineOutlinerThunk
) {
7346 // No, so just insert the call.
7347 It
= MBB
.insert(It
, BuildMI(MF
, DebugLoc(), get(AArch64::BL
))
7348 .addGlobalAddress(M
.getNamedValue(MF
.getName())));
7352 // We want to return the spot where we inserted the call.
7353 MachineBasicBlock::iterator CallPt
;
7355 // Instructions for saving and restoring LR around the call instruction we're
7358 MachineInstr
*Restore
;
7359 // Can we save to a register?
7360 if (C
.CallConstructionID
== MachineOutlinerRegSave
) {
7361 // FIXME: This logic should be sunk into a target-specific interface so that
7362 // we don't have to recompute the register.
7363 unsigned Reg
= findRegisterToSaveLRTo(C
);
7364 assert(Reg
!= 0 && "No callee-saved register available?");
7366 // Save and restore LR from that register.
7367 Save
= BuildMI(MF
, DebugLoc(), get(AArch64::ORRXrs
), Reg
)
7368 .addReg(AArch64::XZR
)
7369 .addReg(AArch64::LR
)
7371 Restore
= BuildMI(MF
, DebugLoc(), get(AArch64::ORRXrs
), AArch64::LR
)
7372 .addReg(AArch64::XZR
)
7376 // We have the default case. Save and restore from SP.
7377 Save
= BuildMI(MF
, DebugLoc(), get(AArch64::STRXpre
))
7378 .addReg(AArch64::SP
, RegState::Define
)
7379 .addReg(AArch64::LR
)
7380 .addReg(AArch64::SP
)
7382 Restore
= BuildMI(MF
, DebugLoc(), get(AArch64::LDRXpost
))
7383 .addReg(AArch64::SP
, RegState::Define
)
7384 .addReg(AArch64::LR
, RegState::Define
)
7385 .addReg(AArch64::SP
)
7389 It
= MBB
.insert(It
, Save
);
7393 It
= MBB
.insert(It
, BuildMI(MF
, DebugLoc(), get(AArch64::BL
))
7394 .addGlobalAddress(M
.getNamedValue(MF
.getName())));
7398 It
= MBB
.insert(It
, Restore
);
7402 bool AArch64InstrInfo::shouldOutlineFromFunctionByDefault(
7403 MachineFunction
&MF
) const {
7404 return MF
.getFunction().hasMinSize();
7407 Optional
<DestSourcePair
>
7408 AArch64InstrInfo::isCopyInstrImpl(const MachineInstr
&MI
) const {
7410 // AArch64::ORRWrs and AArch64::ORRXrs with WZR/XZR reg
7411 // and zero immediate operands used as an alias for mov instruction.
7412 if (MI
.getOpcode() == AArch64::ORRWrs
&&
7413 MI
.getOperand(1).getReg() == AArch64::WZR
&&
7414 MI
.getOperand(3).getImm() == 0x0) {
7415 return DestSourcePair
{MI
.getOperand(0), MI
.getOperand(2)};
7418 if (MI
.getOpcode() == AArch64::ORRXrs
&&
7419 MI
.getOperand(1).getReg() == AArch64::XZR
&&
7420 MI
.getOperand(3).getImm() == 0x0) {
7421 return DestSourcePair
{MI
.getOperand(0), MI
.getOperand(2)};
7427 Optional
<RegImmPair
> AArch64InstrInfo::isAddImmediate(const MachineInstr
&MI
,
7428 Register Reg
) const {
7432 // TODO: Handle cases where Reg is a super- or sub-register of the
7433 // destination register.
7434 const MachineOperand
&Op0
= MI
.getOperand(0);
7435 if (!Op0
.isReg() || Reg
!= Op0
.getReg())
7438 switch (MI
.getOpcode()) {
7441 case AArch64::SUBWri
:
7442 case AArch64::SUBXri
:
7443 case AArch64::SUBSWri
:
7444 case AArch64::SUBSXri
:
7447 case AArch64::ADDSWri
:
7448 case AArch64::ADDSXri
:
7449 case AArch64::ADDWri
:
7450 case AArch64::ADDXri
: {
7451 // TODO: Third operand can be global address (usually some string).
7452 if (!MI
.getOperand(0).isReg() || !MI
.getOperand(1).isReg() ||
7453 !MI
.getOperand(2).isImm())
7455 int Shift
= MI
.getOperand(3).getImm();
7456 assert((Shift
== 0 || Shift
== 12) && "Shift can be either 0 or 12");
7457 Offset
= Sign
* (MI
.getOperand(2).getImm() << Shift
);
7460 return RegImmPair
{MI
.getOperand(1).getReg(), Offset
};
7463 /// If the given ORR instruction is a copy, and \p DescribedReg overlaps with
7464 /// the destination register then, if possible, describe the value in terms of
7465 /// the source register.
7466 static Optional
<ParamLoadedValue
>
7467 describeORRLoadedValue(const MachineInstr
&MI
, Register DescribedReg
,
7468 const TargetInstrInfo
*TII
,
7469 const TargetRegisterInfo
*TRI
) {
7470 auto DestSrc
= TII
->isCopyInstr(MI
);
7474 Register DestReg
= DestSrc
->Destination
->getReg();
7475 Register SrcReg
= DestSrc
->Source
->getReg();
7477 auto Expr
= DIExpression::get(MI
.getMF()->getFunction().getContext(), {});
7479 // If the described register is the destination, just return the source.
7480 if (DestReg
== DescribedReg
)
7481 return ParamLoadedValue(MachineOperand::CreateReg(SrcReg
, false), Expr
);
7483 // ORRWrs zero-extends to 64-bits, so we need to consider such cases.
7484 if (MI
.getOpcode() == AArch64::ORRWrs
&&
7485 TRI
->isSuperRegister(DestReg
, DescribedReg
))
7486 return ParamLoadedValue(MachineOperand::CreateReg(SrcReg
, false), Expr
);
7488 // We may need to describe the lower part of a ORRXrs move.
7489 if (MI
.getOpcode() == AArch64::ORRXrs
&&
7490 TRI
->isSubRegister(DestReg
, DescribedReg
)) {
7491 Register SrcSubReg
= TRI
->getSubReg(SrcReg
, AArch64::sub_32
);
7492 return ParamLoadedValue(MachineOperand::CreateReg(SrcSubReg
, false), Expr
);
7495 assert(!TRI
->isSuperOrSubRegisterEq(DestReg
, DescribedReg
) &&
7496 "Unhandled ORR[XW]rs copy case");
7501 Optional
<ParamLoadedValue
>
7502 AArch64InstrInfo::describeLoadedValue(const MachineInstr
&MI
,
7503 Register Reg
) const {
7504 const MachineFunction
*MF
= MI
.getMF();
7505 const TargetRegisterInfo
*TRI
= MF
->getSubtarget().getRegisterInfo();
7506 switch (MI
.getOpcode()) {
7507 case AArch64::MOVZWi
:
7508 case AArch64::MOVZXi
: {
7509 // MOVZWi may be used for producing zero-extended 32-bit immediates in
7510 // 64-bit parameters, so we need to consider super-registers.
7511 if (!TRI
->isSuperRegisterEq(MI
.getOperand(0).getReg(), Reg
))
7514 if (!MI
.getOperand(1).isImm())
7516 int64_t Immediate
= MI
.getOperand(1).getImm();
7517 int Shift
= MI
.getOperand(2).getImm();
7518 return ParamLoadedValue(MachineOperand::CreateImm(Immediate
<< Shift
),
7521 case AArch64::ORRWrs
:
7522 case AArch64::ORRXrs
:
7523 return describeORRLoadedValue(MI
, Reg
, this, TRI
);
7526 return TargetInstrInfo::describeLoadedValue(MI
, Reg
);
7529 bool AArch64InstrInfo::isExtendLikelyToBeFolded(
7530 MachineInstr
&ExtMI
, MachineRegisterInfo
&MRI
) const {
7531 assert(ExtMI
.getOpcode() == TargetOpcode::G_SEXT
||
7532 ExtMI
.getOpcode() == TargetOpcode::G_ZEXT
||
7533 ExtMI
.getOpcode() == TargetOpcode::G_ANYEXT
);
7535 // Anyexts are nops.
7536 if (ExtMI
.getOpcode() == TargetOpcode::G_ANYEXT
)
7539 Register DefReg
= ExtMI
.getOperand(0).getReg();
7540 if (!MRI
.hasOneNonDBGUse(DefReg
))
7543 // It's likely that a sext/zext as a G_PTR_ADD offset will be folded into an
7545 auto *UserMI
= &*MRI
.use_instr_nodbg_begin(DefReg
);
7546 return UserMI
->getOpcode() == TargetOpcode::G_PTR_ADD
;
7549 uint64_t AArch64InstrInfo::getElementSizeForOpcode(unsigned Opc
) const {
7550 return get(Opc
).TSFlags
& AArch64::ElementSizeMask
;
7553 bool AArch64InstrInfo::isPTestLikeOpcode(unsigned Opc
) const {
7554 return get(Opc
).TSFlags
& AArch64::InstrFlagIsPTestLike
;
7557 bool AArch64InstrInfo::isWhileOpcode(unsigned Opc
) const {
7558 return get(Opc
).TSFlags
& AArch64::InstrFlagIsWhile
;
7562 AArch64InstrInfo::getTailDuplicateSize(CodeGenOpt::Level OptLevel
) const {
7563 return OptLevel
>= CodeGenOpt::Aggressive
? 6 : 2;
7566 unsigned llvm::getBLRCallOpcode(const MachineFunction
&MF
) {
7567 if (MF
.getSubtarget
<AArch64Subtarget
>().hardenSlsBlr())
7568 return AArch64::BLRNoIP
;
7570 return AArch64::BLR
;
7573 #define GET_INSTRINFO_HELPERS
7574 #define GET_INSTRMAP_INFO
7575 #include "AArch64GenInstrInfo.inc"