1 //===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file contains the AArch64 implementation of the TargetInstrInfo class.
11 //===----------------------------------------------------------------------===//
13 #include "AArch64InstrInfo.h"
14 #include "AArch64ExpandImm.h"
15 #include "AArch64MachineFunctionInfo.h"
16 #include "AArch64PointerAuth.h"
17 #include "AArch64Subtarget.h"
18 #include "MCTargetDesc/AArch64AddressingModes.h"
19 #include "MCTargetDesc/AArch64MCTargetDesc.h"
20 #include "Utils/AArch64BaseInfo.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/ADT/SmallVector.h"
24 #include "llvm/CodeGen/LivePhysRegs.h"
25 #include "llvm/CodeGen/MachineBasicBlock.h"
26 #include "llvm/CodeGen/MachineCombinerPattern.h"
27 #include "llvm/CodeGen/MachineFrameInfo.h"
28 #include "llvm/CodeGen/MachineFunction.h"
29 #include "llvm/CodeGen/MachineInstr.h"
30 #include "llvm/CodeGen/MachineInstrBuilder.h"
31 #include "llvm/CodeGen/MachineMemOperand.h"
32 #include "llvm/CodeGen/MachineModuleInfo.h"
33 #include "llvm/CodeGen/MachineOperand.h"
34 #include "llvm/CodeGen/MachineRegisterInfo.h"
35 #include "llvm/CodeGen/RegisterScavenging.h"
36 #include "llvm/CodeGen/StackMaps.h"
37 #include "llvm/CodeGen/TargetRegisterInfo.h"
38 #include "llvm/CodeGen/TargetSubtargetInfo.h"
39 #include "llvm/IR/DebugInfoMetadata.h"
40 #include "llvm/IR/DebugLoc.h"
41 #include "llvm/IR/GlobalValue.h"
42 #include "llvm/IR/Module.h"
43 #include "llvm/MC/MCAsmInfo.h"
44 #include "llvm/MC/MCInst.h"
45 #include "llvm/MC/MCInstBuilder.h"
46 #include "llvm/MC/MCInstrDesc.h"
47 #include "llvm/Support/Casting.h"
48 #include "llvm/Support/CodeGen.h"
49 #include "llvm/Support/CommandLine.h"
50 #include "llvm/Support/ErrorHandling.h"
51 #include "llvm/Support/LEB128.h"
52 #include "llvm/Support/MathExtras.h"
53 #include "llvm/Target/TargetMachine.h"
54 #include "llvm/Target/TargetOptions.h"
62 #define GET_INSTRINFO_CTOR_DTOR
63 #include "AArch64GenInstrInfo.inc"
65 static cl::opt
<unsigned> TBZDisplacementBits(
66 "aarch64-tbz-offset-bits", cl::Hidden
, cl::init(14),
67 cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"));
69 static cl::opt
<unsigned> CBZDisplacementBits(
70 "aarch64-cbz-offset-bits", cl::Hidden
, cl::init(19),
71 cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"));
73 static cl::opt
<unsigned>
74 BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden
, cl::init(19),
75 cl::desc("Restrict range of Bcc instructions (DEBUG)"));
77 static cl::opt
<unsigned>
78 BDisplacementBits("aarch64-b-offset-bits", cl::Hidden
, cl::init(26),
79 cl::desc("Restrict range of B instructions (DEBUG)"));
81 AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget
&STI
)
82 : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN
, AArch64::ADJCALLSTACKUP
,
84 RI(STI
.getTargetTriple()), Subtarget(STI
) {}
86 /// GetInstSize - Return the number of bytes of code the specified
87 /// instruction may be. This returns the maximum number of bytes.
88 unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr
&MI
) const {
89 const MachineBasicBlock
&MBB
= *MI
.getParent();
90 const MachineFunction
*MF
= MBB
.getParent();
91 const Function
&F
= MF
->getFunction();
92 const MCAsmInfo
*MAI
= MF
->getTarget().getMCAsmInfo();
95 auto Op
= MI
.getOpcode();
96 if (Op
== AArch64::INLINEASM
|| Op
== AArch64::INLINEASM_BR
)
97 return getInlineAsmLength(MI
.getOperand(0).getSymbolName(), *MAI
);
100 // Meta-instructions emit no code.
101 if (MI
.isMetaInstruction())
104 // FIXME: We currently only handle pseudoinstructions that don't get expanded
105 // before the assembly printer.
106 unsigned NumBytes
= 0;
107 const MCInstrDesc
&Desc
= MI
.getDesc();
109 if (!MI
.isBundle() && isTailCallReturnInst(MI
)) {
110 NumBytes
= Desc
.getSize() ? Desc
.getSize() : 4;
112 const auto *MFI
= MF
->getInfo
<AArch64FunctionInfo
>();
113 if (!MFI
->shouldSignReturnAddress(MF
))
116 const auto &STI
= MF
->getSubtarget
<AArch64Subtarget
>();
117 auto Method
= STI
.getAuthenticatedLRCheckMethod(*MF
);
118 NumBytes
+= AArch64PAuth::getCheckerSizeInBytes(Method
);
122 // Size should be preferably set in
123 // llvm/lib/Target/AArch64/AArch64InstrInfo.td (default case).
124 // Specific cases handle instructions of variable sizes
125 switch (Desc
.getOpcode()) {
128 return Desc
.getSize();
130 // Anything not explicitly designated otherwise (i.e. pseudo-instructions
131 // with fixed constant size but not specified in .td file) is a normal
135 case TargetOpcode::STACKMAP
:
136 // The upper bound for a stackmap intrinsic is the full length of its shadow
137 NumBytes
= StackMapOpers(&MI
).getNumPatchBytes();
138 assert(NumBytes
% 4 == 0 && "Invalid number of NOP bytes requested!");
140 case TargetOpcode::PATCHPOINT
:
141 // The size of the patchpoint intrinsic is the number of bytes requested
142 NumBytes
= PatchPointOpers(&MI
).getNumPatchBytes();
143 assert(NumBytes
% 4 == 0 && "Invalid number of NOP bytes requested!");
145 case TargetOpcode::STATEPOINT
:
146 NumBytes
= StatepointOpers(&MI
).getNumPatchBytes();
147 assert(NumBytes
% 4 == 0 && "Invalid number of NOP bytes requested!");
148 // No patch bytes means a normal call inst is emitted
152 case TargetOpcode::PATCHABLE_FUNCTION_ENTER
:
153 // If `patchable-function-entry` is set, PATCHABLE_FUNCTION_ENTER
154 // instructions are expanded to the specified number of NOPs. Otherwise,
155 // they are expanded to 36-byte XRay sleds.
157 F
.getFnAttributeAsParsedInteger("patchable-function-entry", 9) * 4;
159 case TargetOpcode::PATCHABLE_FUNCTION_EXIT
:
160 case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL
:
161 // An XRay sled can be 4 bytes of alignment plus a 32-byte block.
164 case TargetOpcode::PATCHABLE_EVENT_CALL
:
165 // EVENT_CALL XRay sleds are exactly 6 instructions long (no alignment).
170 NumBytes
= MI
.getOperand(1).getImm();
172 case TargetOpcode::BUNDLE
:
173 NumBytes
= getInstBundleLength(MI
);
180 unsigned AArch64InstrInfo::getInstBundleLength(const MachineInstr
&MI
) const {
182 MachineBasicBlock::const_instr_iterator I
= MI
.getIterator();
183 MachineBasicBlock::const_instr_iterator E
= MI
.getParent()->instr_end();
184 while (++I
!= E
&& I
->isInsideBundle()) {
185 assert(!I
->isBundle() && "No nested bundle!");
186 Size
+= getInstSizeInBytes(*I
);
191 static void parseCondBranch(MachineInstr
*LastInst
, MachineBasicBlock
*&Target
,
192 SmallVectorImpl
<MachineOperand
> &Cond
) {
193 // Block ends with fall-through condbranch.
194 switch (LastInst
->getOpcode()) {
196 llvm_unreachable("Unknown branch instruction?");
198 Target
= LastInst
->getOperand(1).getMBB();
199 Cond
.push_back(LastInst
->getOperand(0));
205 Target
= LastInst
->getOperand(1).getMBB();
206 Cond
.push_back(MachineOperand::CreateImm(-1));
207 Cond
.push_back(MachineOperand::CreateImm(LastInst
->getOpcode()));
208 Cond
.push_back(LastInst
->getOperand(0));
214 Target
= LastInst
->getOperand(2).getMBB();
215 Cond
.push_back(MachineOperand::CreateImm(-1));
216 Cond
.push_back(MachineOperand::CreateImm(LastInst
->getOpcode()));
217 Cond
.push_back(LastInst
->getOperand(0));
218 Cond
.push_back(LastInst
->getOperand(1));
222 static unsigned getBranchDisplacementBits(unsigned Opc
) {
225 llvm_unreachable("unexpected opcode!");
227 return BDisplacementBits
;
232 return TBZDisplacementBits
;
237 return CBZDisplacementBits
;
239 return BCCDisplacementBits
;
243 bool AArch64InstrInfo::isBranchOffsetInRange(unsigned BranchOp
,
244 int64_t BrOffset
) const {
245 unsigned Bits
= getBranchDisplacementBits(BranchOp
);
246 assert(Bits
>= 3 && "max branch displacement must be enough to jump"
247 "over conditional branch expansion");
248 return isIntN(Bits
, BrOffset
/ 4);
252 AArch64InstrInfo::getBranchDestBlock(const MachineInstr
&MI
) const {
253 switch (MI
.getOpcode()) {
255 llvm_unreachable("unexpected opcode!");
257 return MI
.getOperand(0).getMBB();
262 return MI
.getOperand(2).getMBB();
268 return MI
.getOperand(1).getMBB();
272 void AArch64InstrInfo::insertIndirectBranch(MachineBasicBlock
&MBB
,
273 MachineBasicBlock
&NewDestBB
,
274 MachineBasicBlock
&RestoreBB
,
277 RegScavenger
*RS
) const {
278 assert(RS
&& "RegScavenger required for long branching");
279 assert(MBB
.empty() &&
280 "new block should be inserted for expanding unconditional branch");
281 assert(MBB
.pred_size() == 1);
282 assert(RestoreBB
.empty() &&
283 "restore block should be inserted for restoring clobbered registers");
285 auto buildIndirectBranch
= [&](Register Reg
, MachineBasicBlock
&DestBB
) {
286 // Offsets outside of the signed 33-bit range are not supported for ADRP +
288 if (!isInt
<33>(BrOffset
))
290 "Branch offsets outside of the signed 33-bit range not supported");
292 BuildMI(MBB
, MBB
.end(), DL
, get(AArch64::ADRP
), Reg
)
293 .addSym(DestBB
.getSymbol(), AArch64II::MO_PAGE
);
294 BuildMI(MBB
, MBB
.end(), DL
, get(AArch64::ADDXri
), Reg
)
296 .addSym(DestBB
.getSymbol(), AArch64II::MO_PAGEOFF
| AArch64II::MO_NC
)
298 BuildMI(MBB
, MBB
.end(), DL
, get(AArch64::BR
)).addReg(Reg
);
301 RS
->enterBasicBlockEnd(MBB
);
302 // If X16 is unused, we can rely on the linker to insert a range extension
303 // thunk if NewDestBB is out of range of a single B instruction.
304 constexpr Register Reg
= AArch64::X16
;
305 if (!RS
->isRegUsed(Reg
)) {
306 insertUnconditionalBranch(MBB
, &NewDestBB
, DL
);
311 // If there's a free register and it's worth inflating the code size,
312 // manually insert the indirect branch.
313 Register Scavenged
= RS
->FindUnusedReg(&AArch64::GPR64RegClass
);
314 if (Scavenged
!= AArch64::NoRegister
&&
315 MBB
.getSectionID() == MBBSectionID::ColdSectionID
) {
316 buildIndirectBranch(Scavenged
, NewDestBB
);
317 RS
->setRegUsed(Scavenged
);
321 // Note: Spilling X16 briefly moves the stack pointer, making it incompatible
323 AArch64FunctionInfo
*AFI
= MBB
.getParent()->getInfo
<AArch64FunctionInfo
>();
324 if (!AFI
|| AFI
->hasRedZone().value_or(true))
326 "Unable to insert indirect branch inside function that has red zone");
328 // Otherwise, spill X16 and defer range extension to the linker.
329 BuildMI(MBB
, MBB
.end(), DL
, get(AArch64::STRXpre
))
330 .addReg(AArch64::SP
, RegState::Define
)
335 BuildMI(MBB
, MBB
.end(), DL
, get(AArch64::B
)).addMBB(&RestoreBB
);
337 BuildMI(RestoreBB
, RestoreBB
.end(), DL
, get(AArch64::LDRXpost
))
338 .addReg(AArch64::SP
, RegState::Define
)
339 .addReg(Reg
, RegState::Define
)
345 bool AArch64InstrInfo::analyzeBranch(MachineBasicBlock
&MBB
,
346 MachineBasicBlock
*&TBB
,
347 MachineBasicBlock
*&FBB
,
348 SmallVectorImpl
<MachineOperand
> &Cond
,
349 bool AllowModify
) const {
350 // If the block has no terminators, it just falls into the block after it.
351 MachineBasicBlock::iterator I
= MBB
.getLastNonDebugInstr();
355 // Skip over SpeculationBarrierEndBB terminators
356 if (I
->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB
||
357 I
->getOpcode() == AArch64::SpeculationBarrierSBEndBB
) {
361 if (!isUnpredicatedTerminator(*I
))
364 // Get the last instruction in the block.
365 MachineInstr
*LastInst
= &*I
;
367 // If there is only one terminator instruction, process it.
368 unsigned LastOpc
= LastInst
->getOpcode();
369 if (I
== MBB
.begin() || !isUnpredicatedTerminator(*--I
)) {
370 if (isUncondBranchOpcode(LastOpc
)) {
371 TBB
= LastInst
->getOperand(0).getMBB();
374 if (isCondBranchOpcode(LastOpc
)) {
375 // Block ends with fall-through condbranch.
376 parseCondBranch(LastInst
, TBB
, Cond
);
379 return true; // Can't handle indirect branch.
382 // Get the instruction before it if it is a terminator.
383 MachineInstr
*SecondLastInst
= &*I
;
384 unsigned SecondLastOpc
= SecondLastInst
->getOpcode();
386 // If AllowModify is true and the block ends with two or more unconditional
387 // branches, delete all but the first unconditional branch.
388 if (AllowModify
&& isUncondBranchOpcode(LastOpc
)) {
389 while (isUncondBranchOpcode(SecondLastOpc
)) {
390 LastInst
->eraseFromParent();
391 LastInst
= SecondLastInst
;
392 LastOpc
= LastInst
->getOpcode();
393 if (I
== MBB
.begin() || !isUnpredicatedTerminator(*--I
)) {
394 // Return now the only terminator is an unconditional branch.
395 TBB
= LastInst
->getOperand(0).getMBB();
398 SecondLastInst
= &*I
;
399 SecondLastOpc
= SecondLastInst
->getOpcode();
403 // If we're allowed to modify and the block ends in a unconditional branch
404 // which could simply fallthrough, remove the branch. (Note: This case only
405 // matters when we can't understand the whole sequence, otherwise it's also
406 // handled by BranchFolding.cpp.)
407 if (AllowModify
&& isUncondBranchOpcode(LastOpc
) &&
408 MBB
.isLayoutSuccessor(getBranchDestBlock(*LastInst
))) {
409 LastInst
->eraseFromParent();
410 LastInst
= SecondLastInst
;
411 LastOpc
= LastInst
->getOpcode();
412 if (I
== MBB
.begin() || !isUnpredicatedTerminator(*--I
)) {
413 assert(!isUncondBranchOpcode(LastOpc
) &&
414 "unreachable unconditional branches removed above");
416 if (isCondBranchOpcode(LastOpc
)) {
417 // Block ends with fall-through condbranch.
418 parseCondBranch(LastInst
, TBB
, Cond
);
421 return true; // Can't handle indirect branch.
423 SecondLastInst
= &*I
;
424 SecondLastOpc
= SecondLastInst
->getOpcode();
427 // If there are three terminators, we don't know what sort of block this is.
428 if (SecondLastInst
&& I
!= MBB
.begin() && isUnpredicatedTerminator(*--I
))
431 // If the block ends with a B and a Bcc, handle it.
432 if (isCondBranchOpcode(SecondLastOpc
) && isUncondBranchOpcode(LastOpc
)) {
433 parseCondBranch(SecondLastInst
, TBB
, Cond
);
434 FBB
= LastInst
->getOperand(0).getMBB();
438 // If the block ends with two unconditional branches, handle it. The second
439 // one is not executed, so remove it.
440 if (isUncondBranchOpcode(SecondLastOpc
) && isUncondBranchOpcode(LastOpc
)) {
441 TBB
= SecondLastInst
->getOperand(0).getMBB();
444 I
->eraseFromParent();
448 // ...likewise if it ends with an indirect branch followed by an unconditional
450 if (isIndirectBranchOpcode(SecondLastOpc
) && isUncondBranchOpcode(LastOpc
)) {
453 I
->eraseFromParent();
457 // Otherwise, can't handle this.
461 bool AArch64InstrInfo::analyzeBranchPredicate(MachineBasicBlock
&MBB
,
462 MachineBranchPredicate
&MBP
,
463 bool AllowModify
) const {
464 // For the moment, handle only a block which ends with a cb(n)zx followed by
465 // a fallthrough. Why this? Because it is a common form.
466 // TODO: Should we handle b.cc?
468 MachineBasicBlock::iterator I
= MBB
.getLastNonDebugInstr();
472 // Skip over SpeculationBarrierEndBB terminators
473 if (I
->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB
||
474 I
->getOpcode() == AArch64::SpeculationBarrierSBEndBB
) {
478 if (!isUnpredicatedTerminator(*I
))
481 // Get the last instruction in the block.
482 MachineInstr
*LastInst
= &*I
;
483 unsigned LastOpc
= LastInst
->getOpcode();
484 if (!isCondBranchOpcode(LastOpc
))
497 MBP
.TrueDest
= LastInst
->getOperand(1).getMBB();
498 assert(MBP
.TrueDest
&& "expected!");
499 MBP
.FalseDest
= MBB
.getNextNode();
501 MBP
.ConditionDef
= nullptr;
502 MBP
.SingleUseCondition
= false;
504 MBP
.LHS
= LastInst
->getOperand(0);
505 MBP
.RHS
= MachineOperand::CreateImm(0);
506 MBP
.Predicate
= LastOpc
== AArch64::CBNZX
? MachineBranchPredicate::PRED_NE
507 : MachineBranchPredicate::PRED_EQ
;
511 bool AArch64InstrInfo::reverseBranchCondition(
512 SmallVectorImpl
<MachineOperand
> &Cond
) const {
513 if (Cond
[0].getImm() != -1) {
515 AArch64CC::CondCode CC
= (AArch64CC::CondCode
)(int)Cond
[0].getImm();
516 Cond
[0].setImm(AArch64CC::getInvertedCondCode(CC
));
518 // Folded compare-and-branch
519 switch (Cond
[1].getImm()) {
521 llvm_unreachable("Unknown conditional branch!");
523 Cond
[1].setImm(AArch64::CBNZW
);
526 Cond
[1].setImm(AArch64::CBZW
);
529 Cond
[1].setImm(AArch64::CBNZX
);
532 Cond
[1].setImm(AArch64::CBZX
);
535 Cond
[1].setImm(AArch64::TBNZW
);
538 Cond
[1].setImm(AArch64::TBZW
);
541 Cond
[1].setImm(AArch64::TBNZX
);
544 Cond
[1].setImm(AArch64::TBZX
);
552 unsigned AArch64InstrInfo::removeBranch(MachineBasicBlock
&MBB
,
553 int *BytesRemoved
) const {
554 MachineBasicBlock::iterator I
= MBB
.getLastNonDebugInstr();
558 if (!isUncondBranchOpcode(I
->getOpcode()) &&
559 !isCondBranchOpcode(I
->getOpcode()))
562 // Remove the branch.
563 I
->eraseFromParent();
567 if (I
== MBB
.begin()) {
573 if (!isCondBranchOpcode(I
->getOpcode())) {
579 // Remove the branch.
580 I
->eraseFromParent();
587 void AArch64InstrInfo::instantiateCondBranch(
588 MachineBasicBlock
&MBB
, const DebugLoc
&DL
, MachineBasicBlock
*TBB
,
589 ArrayRef
<MachineOperand
> Cond
) const {
590 if (Cond
[0].getImm() != -1) {
592 BuildMI(&MBB
, DL
, get(AArch64::Bcc
)).addImm(Cond
[0].getImm()).addMBB(TBB
);
594 // Folded compare-and-branch
595 // Note that we use addOperand instead of addReg to keep the flags.
596 const MachineInstrBuilder MIB
=
597 BuildMI(&MBB
, DL
, get(Cond
[1].getImm())).add(Cond
[2]);
599 MIB
.addImm(Cond
[3].getImm());
604 unsigned AArch64InstrInfo::insertBranch(
605 MachineBasicBlock
&MBB
, MachineBasicBlock
*TBB
, MachineBasicBlock
*FBB
,
606 ArrayRef
<MachineOperand
> Cond
, const DebugLoc
&DL
, int *BytesAdded
) const {
607 // Shouldn't be a fall through.
608 assert(TBB
&& "insertBranch must not be told to insert a fallthrough");
611 if (Cond
.empty()) // Unconditional branch?
612 BuildMI(&MBB
, DL
, get(AArch64::B
)).addMBB(TBB
);
614 instantiateCondBranch(MBB
, DL
, TBB
, Cond
);
622 // Two-way conditional branch.
623 instantiateCondBranch(MBB
, DL
, TBB
, Cond
);
624 BuildMI(&MBB
, DL
, get(AArch64::B
)).addMBB(FBB
);
632 // Find the original register that VReg is copied from.
633 static unsigned removeCopies(const MachineRegisterInfo
&MRI
, unsigned VReg
) {
634 while (Register::isVirtualRegister(VReg
)) {
635 const MachineInstr
*DefMI
= MRI
.getVRegDef(VReg
);
636 if (!DefMI
->isFullCopy())
638 VReg
= DefMI
->getOperand(1).getReg();
643 // Determine if VReg is defined by an instruction that can be folded into a
644 // csel instruction. If so, return the folded opcode, and the replacement
646 static unsigned canFoldIntoCSel(const MachineRegisterInfo
&MRI
, unsigned VReg
,
647 unsigned *NewVReg
= nullptr) {
648 VReg
= removeCopies(MRI
, VReg
);
649 if (!Register::isVirtualRegister(VReg
))
652 bool Is64Bit
= AArch64::GPR64allRegClass
.hasSubClassEq(MRI
.getRegClass(VReg
));
653 const MachineInstr
*DefMI
= MRI
.getVRegDef(VReg
);
655 unsigned SrcOpNum
= 0;
656 switch (DefMI
->getOpcode()) {
657 case AArch64::ADDSXri
:
658 case AArch64::ADDSWri
:
659 // if NZCV is used, do not fold.
660 if (DefMI
->findRegisterDefOperandIdx(AArch64::NZCV
, /*TRI=*/nullptr,
663 // fall-through to ADDXri and ADDWri.
665 case AArch64::ADDXri
:
666 case AArch64::ADDWri
:
667 // add x, 1 -> csinc.
668 if (!DefMI
->getOperand(2).isImm() || DefMI
->getOperand(2).getImm() != 1 ||
669 DefMI
->getOperand(3).getImm() != 0)
672 Opc
= Is64Bit
? AArch64::CSINCXr
: AArch64::CSINCWr
;
675 case AArch64::ORNXrr
:
676 case AArch64::ORNWrr
: {
677 // not x -> csinv, represented as orn dst, xzr, src.
678 unsigned ZReg
= removeCopies(MRI
, DefMI
->getOperand(1).getReg());
679 if (ZReg
!= AArch64::XZR
&& ZReg
!= AArch64::WZR
)
682 Opc
= Is64Bit
? AArch64::CSINVXr
: AArch64::CSINVWr
;
686 case AArch64::SUBSXrr
:
687 case AArch64::SUBSWrr
:
688 // if NZCV is used, do not fold.
689 if (DefMI
->findRegisterDefOperandIdx(AArch64::NZCV
, /*TRI=*/nullptr,
692 // fall-through to SUBXrr and SUBWrr.
694 case AArch64::SUBXrr
:
695 case AArch64::SUBWrr
: {
696 // neg x -> csneg, represented as sub dst, xzr, src.
697 unsigned ZReg
= removeCopies(MRI
, DefMI
->getOperand(1).getReg());
698 if (ZReg
!= AArch64::XZR
&& ZReg
!= AArch64::WZR
)
701 Opc
= Is64Bit
? AArch64::CSNEGXr
: AArch64::CSNEGWr
;
707 assert(Opc
&& SrcOpNum
&& "Missing parameters");
710 *NewVReg
= DefMI
->getOperand(SrcOpNum
).getReg();
714 bool AArch64InstrInfo::canInsertSelect(const MachineBasicBlock
&MBB
,
715 ArrayRef
<MachineOperand
> Cond
,
716 Register DstReg
, Register TrueReg
,
717 Register FalseReg
, int &CondCycles
,
719 int &FalseCycles
) const {
720 // Check register classes.
721 const MachineRegisterInfo
&MRI
= MBB
.getParent()->getRegInfo();
722 const TargetRegisterClass
*RC
=
723 RI
.getCommonSubClass(MRI
.getRegClass(TrueReg
), MRI
.getRegClass(FalseReg
));
727 // Also need to check the dest regclass, in case we're trying to optimize
729 // %1(gpr) = PHI %2(fpr), bb1, %(fpr), bb2
730 if (!RI
.getCommonSubClass(RC
, MRI
.getRegClass(DstReg
)))
733 // Expanding cbz/tbz requires an extra cycle of latency on the condition.
734 unsigned ExtraCondLat
= Cond
.size() != 1;
736 // GPRs are handled by csel.
737 // FIXME: Fold in x+1, -x, and ~x when applicable.
738 if (AArch64::GPR64allRegClass
.hasSubClassEq(RC
) ||
739 AArch64::GPR32allRegClass
.hasSubClassEq(RC
)) {
740 // Single-cycle csel, csinc, csinv, and csneg.
741 CondCycles
= 1 + ExtraCondLat
;
742 TrueCycles
= FalseCycles
= 1;
743 if (canFoldIntoCSel(MRI
, TrueReg
))
745 else if (canFoldIntoCSel(MRI
, FalseReg
))
750 // Scalar floating point is handled by fcsel.
751 // FIXME: Form fabs, fmin, and fmax when applicable.
752 if (AArch64::FPR64RegClass
.hasSubClassEq(RC
) ||
753 AArch64::FPR32RegClass
.hasSubClassEq(RC
)) {
754 CondCycles
= 5 + ExtraCondLat
;
755 TrueCycles
= FalseCycles
= 2;
763 void AArch64InstrInfo::insertSelect(MachineBasicBlock
&MBB
,
764 MachineBasicBlock::iterator I
,
765 const DebugLoc
&DL
, Register DstReg
,
766 ArrayRef
<MachineOperand
> Cond
,
767 Register TrueReg
, Register FalseReg
) const {
768 MachineRegisterInfo
&MRI
= MBB
.getParent()->getRegInfo();
770 // Parse the condition code, see parseCondBranch() above.
771 AArch64CC::CondCode CC
;
772 switch (Cond
.size()) {
774 llvm_unreachable("Unknown condition opcode in Cond");
776 CC
= AArch64CC::CondCode(Cond
[0].getImm());
778 case 3: { // cbz/cbnz
779 // We must insert a compare against 0.
781 switch (Cond
[1].getImm()) {
783 llvm_unreachable("Unknown branch opcode in Cond");
801 Register SrcReg
= Cond
[2].getReg();
803 // cmp reg, #0 is actually subs xzr, reg, #0.
804 MRI
.constrainRegClass(SrcReg
, &AArch64::GPR64spRegClass
);
805 BuildMI(MBB
, I
, DL
, get(AArch64::SUBSXri
), AArch64::XZR
)
810 MRI
.constrainRegClass(SrcReg
, &AArch64::GPR32spRegClass
);
811 BuildMI(MBB
, I
, DL
, get(AArch64::SUBSWri
), AArch64::WZR
)
818 case 4: { // tbz/tbnz
819 // We must insert a tst instruction.
820 switch (Cond
[1].getImm()) {
822 llvm_unreachable("Unknown branch opcode in Cond");
832 // cmp reg, #foo is actually ands xzr, reg, #1<<foo.
833 if (Cond
[1].getImm() == AArch64::TBZW
|| Cond
[1].getImm() == AArch64::TBNZW
)
834 BuildMI(MBB
, I
, DL
, get(AArch64::ANDSWri
), AArch64::WZR
)
835 .addReg(Cond
[2].getReg())
837 AArch64_AM::encodeLogicalImmediate(1ull << Cond
[3].getImm(), 32));
839 BuildMI(MBB
, I
, DL
, get(AArch64::ANDSXri
), AArch64::XZR
)
840 .addReg(Cond
[2].getReg())
842 AArch64_AM::encodeLogicalImmediate(1ull << Cond
[3].getImm(), 64));
848 const TargetRegisterClass
*RC
= nullptr;
849 bool TryFold
= false;
850 if (MRI
.constrainRegClass(DstReg
, &AArch64::GPR64RegClass
)) {
851 RC
= &AArch64::GPR64RegClass
;
852 Opc
= AArch64::CSELXr
;
854 } else if (MRI
.constrainRegClass(DstReg
, &AArch64::GPR32RegClass
)) {
855 RC
= &AArch64::GPR32RegClass
;
856 Opc
= AArch64::CSELWr
;
858 } else if (MRI
.constrainRegClass(DstReg
, &AArch64::FPR64RegClass
)) {
859 RC
= &AArch64::FPR64RegClass
;
860 Opc
= AArch64::FCSELDrrr
;
861 } else if (MRI
.constrainRegClass(DstReg
, &AArch64::FPR32RegClass
)) {
862 RC
= &AArch64::FPR32RegClass
;
863 Opc
= AArch64::FCSELSrrr
;
865 assert(RC
&& "Unsupported regclass");
867 // Try folding simple instructions into the csel.
869 unsigned NewVReg
= 0;
870 unsigned FoldedOpc
= canFoldIntoCSel(MRI
, TrueReg
, &NewVReg
);
872 // The folded opcodes csinc, csinc and csneg apply the operation to
873 // FalseReg, so we need to invert the condition.
874 CC
= AArch64CC::getInvertedCondCode(CC
);
877 FoldedOpc
= canFoldIntoCSel(MRI
, FalseReg
, &NewVReg
);
879 // Fold the operation. Leave any dead instructions for DCE to clean up.
883 // The extends the live range of NewVReg.
884 MRI
.clearKillFlags(NewVReg
);
888 // Pull all virtual register into the appropriate class.
889 MRI
.constrainRegClass(TrueReg
, RC
);
890 MRI
.constrainRegClass(FalseReg
, RC
);
893 BuildMI(MBB
, I
, DL
, get(Opc
), DstReg
)
899 // Return true if Imm can be loaded into a register by a "cheap" sequence of
900 // instructions. For now, "cheap" means at most two instructions.
901 static bool isCheapImmediate(const MachineInstr
&MI
, unsigned BitSize
) {
905 assert(BitSize
== 64 && "Only bit sizes of 32 or 64 allowed");
906 uint64_t Imm
= static_cast<uint64_t>(MI
.getOperand(1).getImm());
907 SmallVector
<AArch64_IMM::ImmInsnModel
, 4> Is
;
908 AArch64_IMM::expandMOVImm(Imm
, BitSize
, Is
);
910 return Is
.size() <= 2;
913 // FIXME: this implementation should be micro-architecture dependent, so a
914 // micro-architecture target hook should be introduced here in future.
915 bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr
&MI
) const {
916 if (Subtarget
.hasExynosCheapAsMoveHandling()) {
917 if (isExynosCheapAsMove(MI
))
919 return MI
.isAsCheapAsAMove();
922 switch (MI
.getOpcode()) {
924 return MI
.isAsCheapAsAMove();
926 case AArch64::ADDWrs
:
927 case AArch64::ADDXrs
:
928 case AArch64::SUBWrs
:
929 case AArch64::SUBXrs
:
930 return Subtarget
.hasALULSLFast() && MI
.getOperand(3).getImm() <= 4;
932 // If MOVi32imm or MOVi64imm can be expanded into ORRWri or
933 // ORRXri, it is as cheap as MOV.
934 // Likewise if it can be expanded to MOVZ/MOVN/MOVK.
935 case AArch64::MOVi32imm
:
936 return isCheapImmediate(MI
, 32);
937 case AArch64::MOVi64imm
:
938 return isCheapImmediate(MI
, 64);
942 bool AArch64InstrInfo::isFalkorShiftExtFast(const MachineInstr
&MI
) {
943 switch (MI
.getOpcode()) {
947 case AArch64::ADDWrs
:
948 case AArch64::ADDXrs
:
949 case AArch64::ADDSWrs
:
950 case AArch64::ADDSXrs
: {
951 unsigned Imm
= MI
.getOperand(3).getImm();
952 unsigned ShiftVal
= AArch64_AM::getShiftValue(Imm
);
955 return AArch64_AM::getShiftType(Imm
) == AArch64_AM::LSL
&& ShiftVal
<= 5;
958 case AArch64::ADDWrx
:
959 case AArch64::ADDXrx
:
960 case AArch64::ADDXrx64
:
961 case AArch64::ADDSWrx
:
962 case AArch64::ADDSXrx
:
963 case AArch64::ADDSXrx64
: {
964 unsigned Imm
= MI
.getOperand(3).getImm();
965 switch (AArch64_AM::getArithExtendType(Imm
)) {
968 case AArch64_AM::UXTB
:
969 case AArch64_AM::UXTH
:
970 case AArch64_AM::UXTW
:
971 case AArch64_AM::UXTX
:
972 return AArch64_AM::getArithShiftValue(Imm
) <= 4;
976 case AArch64::SUBWrs
:
977 case AArch64::SUBSWrs
: {
978 unsigned Imm
= MI
.getOperand(3).getImm();
979 unsigned ShiftVal
= AArch64_AM::getShiftValue(Imm
);
980 return ShiftVal
== 0 ||
981 (AArch64_AM::getShiftType(Imm
) == AArch64_AM::ASR
&& ShiftVal
== 31);
984 case AArch64::SUBXrs
:
985 case AArch64::SUBSXrs
: {
986 unsigned Imm
= MI
.getOperand(3).getImm();
987 unsigned ShiftVal
= AArch64_AM::getShiftValue(Imm
);
988 return ShiftVal
== 0 ||
989 (AArch64_AM::getShiftType(Imm
) == AArch64_AM::ASR
&& ShiftVal
== 63);
992 case AArch64::SUBWrx
:
993 case AArch64::SUBXrx
:
994 case AArch64::SUBXrx64
:
995 case AArch64::SUBSWrx
:
996 case AArch64::SUBSXrx
:
997 case AArch64::SUBSXrx64
: {
998 unsigned Imm
= MI
.getOperand(3).getImm();
999 switch (AArch64_AM::getArithExtendType(Imm
)) {
1002 case AArch64_AM::UXTB
:
1003 case AArch64_AM::UXTH
:
1004 case AArch64_AM::UXTW
:
1005 case AArch64_AM::UXTX
:
1006 return AArch64_AM::getArithShiftValue(Imm
) == 0;
1010 case AArch64::LDRBBroW
:
1011 case AArch64::LDRBBroX
:
1012 case AArch64::LDRBroW
:
1013 case AArch64::LDRBroX
:
1014 case AArch64::LDRDroW
:
1015 case AArch64::LDRDroX
:
1016 case AArch64::LDRHHroW
:
1017 case AArch64::LDRHHroX
:
1018 case AArch64::LDRHroW
:
1019 case AArch64::LDRHroX
:
1020 case AArch64::LDRQroW
:
1021 case AArch64::LDRQroX
:
1022 case AArch64::LDRSBWroW
:
1023 case AArch64::LDRSBWroX
:
1024 case AArch64::LDRSBXroW
:
1025 case AArch64::LDRSBXroX
:
1026 case AArch64::LDRSHWroW
:
1027 case AArch64::LDRSHWroX
:
1028 case AArch64::LDRSHXroW
:
1029 case AArch64::LDRSHXroX
:
1030 case AArch64::LDRSWroW
:
1031 case AArch64::LDRSWroX
:
1032 case AArch64::LDRSroW
:
1033 case AArch64::LDRSroX
:
1034 case AArch64::LDRWroW
:
1035 case AArch64::LDRWroX
:
1036 case AArch64::LDRXroW
:
1037 case AArch64::LDRXroX
:
1038 case AArch64::PRFMroW
:
1039 case AArch64::PRFMroX
:
1040 case AArch64::STRBBroW
:
1041 case AArch64::STRBBroX
:
1042 case AArch64::STRBroW
:
1043 case AArch64::STRBroX
:
1044 case AArch64::STRDroW
:
1045 case AArch64::STRDroX
:
1046 case AArch64::STRHHroW
:
1047 case AArch64::STRHHroX
:
1048 case AArch64::STRHroW
:
1049 case AArch64::STRHroX
:
1050 case AArch64::STRQroW
:
1051 case AArch64::STRQroX
:
1052 case AArch64::STRSroW
:
1053 case AArch64::STRSroX
:
1054 case AArch64::STRWroW
:
1055 case AArch64::STRWroX
:
1056 case AArch64::STRXroW
:
1057 case AArch64::STRXroX
: {
1058 unsigned IsSigned
= MI
.getOperand(3).getImm();
1064 bool AArch64InstrInfo::isSEHInstruction(const MachineInstr
&MI
) {
1065 unsigned Opc
= MI
.getOpcode();
1069 case AArch64::SEH_StackAlloc
:
1070 case AArch64::SEH_SaveFPLR
:
1071 case AArch64::SEH_SaveFPLR_X
:
1072 case AArch64::SEH_SaveReg
:
1073 case AArch64::SEH_SaveReg_X
:
1074 case AArch64::SEH_SaveRegP
:
1075 case AArch64::SEH_SaveRegP_X
:
1076 case AArch64::SEH_SaveFReg
:
1077 case AArch64::SEH_SaveFReg_X
:
1078 case AArch64::SEH_SaveFRegP
:
1079 case AArch64::SEH_SaveFRegP_X
:
1080 case AArch64::SEH_SetFP
:
1081 case AArch64::SEH_AddFP
:
1082 case AArch64::SEH_Nop
:
1083 case AArch64::SEH_PrologEnd
:
1084 case AArch64::SEH_EpilogStart
:
1085 case AArch64::SEH_EpilogEnd
:
1086 case AArch64::SEH_PACSignLR
:
1087 case AArch64::SEH_SaveAnyRegQP
:
1088 case AArch64::SEH_SaveAnyRegQPX
:
1093 bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr
&MI
,
1094 Register
&SrcReg
, Register
&DstReg
,
1095 unsigned &SubIdx
) const {
1096 switch (MI
.getOpcode()) {
1099 case AArch64::SBFMXri
: // aka sxtw
1100 case AArch64::UBFMXri
: // aka uxtw
1101 // Check for the 32 -> 64 bit extension case, these instructions can do
1103 if (MI
.getOperand(2).getImm() != 0 || MI
.getOperand(3).getImm() != 31)
1105 // This is a signed or unsigned 32 -> 64 bit extension.
1106 SrcReg
= MI
.getOperand(1).getReg();
1107 DstReg
= MI
.getOperand(0).getReg();
1108 SubIdx
= AArch64::sub_32
;
1113 bool AArch64InstrInfo::areMemAccessesTriviallyDisjoint(
1114 const MachineInstr
&MIa
, const MachineInstr
&MIb
) const {
1115 const TargetRegisterInfo
*TRI
= &getRegisterInfo();
1116 const MachineOperand
*BaseOpA
= nullptr, *BaseOpB
= nullptr;
1117 int64_t OffsetA
= 0, OffsetB
= 0;
1118 TypeSize
WidthA(0, false), WidthB(0, false);
1119 bool OffsetAIsScalable
= false, OffsetBIsScalable
= false;
1121 assert(MIa
.mayLoadOrStore() && "MIa must be a load or store.");
1122 assert(MIb
.mayLoadOrStore() && "MIb must be a load or store.");
1124 if (MIa
.hasUnmodeledSideEffects() || MIb
.hasUnmodeledSideEffects() ||
1125 MIa
.hasOrderedMemoryRef() || MIb
.hasOrderedMemoryRef())
1128 // Retrieve the base, offset from the base and width. Width
1129 // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8). If
1130 // base are identical, and the offset of a lower memory access +
1131 // the width doesn't overlap the offset of a higher memory access,
1132 // then the memory accesses are different.
1133 // If OffsetAIsScalable and OffsetBIsScalable are both true, they
1134 // are assumed to have the same scale (vscale).
1135 if (getMemOperandWithOffsetWidth(MIa
, BaseOpA
, OffsetA
, OffsetAIsScalable
,
1137 getMemOperandWithOffsetWidth(MIb
, BaseOpB
, OffsetB
, OffsetBIsScalable
,
1139 if (BaseOpA
->isIdenticalTo(*BaseOpB
) &&
1140 OffsetAIsScalable
== OffsetBIsScalable
) {
1141 int LowOffset
= OffsetA
< OffsetB
? OffsetA
: OffsetB
;
1142 int HighOffset
= OffsetA
< OffsetB
? OffsetB
: OffsetA
;
1143 TypeSize LowWidth
= (LowOffset
== OffsetA
) ? WidthA
: WidthB
;
1144 if (LowWidth
.isScalable() == OffsetAIsScalable
&&
1145 LowOffset
+ (int)LowWidth
.getKnownMinValue() <= HighOffset
)
1152 bool AArch64InstrInfo::isSchedulingBoundary(const MachineInstr
&MI
,
1153 const MachineBasicBlock
*MBB
,
1154 const MachineFunction
&MF
) const {
1155 if (TargetInstrInfo::isSchedulingBoundary(MI
, MBB
, MF
))
1158 // Do not move an instruction that can be recognized as a branch target.
1159 if (hasBTISemantics(MI
))
1162 switch (MI
.getOpcode()) {
1164 // CSDB hints are scheduling barriers.
1165 if (MI
.getOperand(0).getImm() == 0x14)
1170 // DSB and ISB also are scheduling barriers.
1172 case AArch64::MSRpstatesvcrImm1
:
1173 // SMSTART and SMSTOP are also scheduling barriers.
1177 if (isSEHInstruction(MI
))
1179 auto Next
= std::next(MI
.getIterator());
1180 return Next
!= MBB
->end() && Next
->isCFIInstruction();
1183 /// analyzeCompare - For a comparison instruction, return the source registers
1184 /// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
1185 /// Return true if the comparison instruction can be analyzed.
1186 bool AArch64InstrInfo::analyzeCompare(const MachineInstr
&MI
, Register
&SrcReg
,
1187 Register
&SrcReg2
, int64_t &CmpMask
,
1188 int64_t &CmpValue
) const {
1189 // The first operand can be a frame index where we'd normally expect a
1191 assert(MI
.getNumOperands() >= 2 && "All AArch64 cmps should have 2 operands");
1192 if (!MI
.getOperand(1).isReg())
1195 switch (MI
.getOpcode()) {
1198 case AArch64::PTEST_PP
:
1199 case AArch64::PTEST_PP_ANY
:
1200 SrcReg
= MI
.getOperand(0).getReg();
1201 SrcReg2
= MI
.getOperand(1).getReg();
1202 // Not sure about the mask and value for now...
1206 case AArch64::SUBSWrr
:
1207 case AArch64::SUBSWrs
:
1208 case AArch64::SUBSWrx
:
1209 case AArch64::SUBSXrr
:
1210 case AArch64::SUBSXrs
:
1211 case AArch64::SUBSXrx
:
1212 case AArch64::ADDSWrr
:
1213 case AArch64::ADDSWrs
:
1214 case AArch64::ADDSWrx
:
1215 case AArch64::ADDSXrr
:
1216 case AArch64::ADDSXrs
:
1217 case AArch64::ADDSXrx
:
1218 // Replace SUBSWrr with SUBWrr if NZCV is not used.
1219 SrcReg
= MI
.getOperand(1).getReg();
1220 SrcReg2
= MI
.getOperand(2).getReg();
1224 case AArch64::SUBSWri
:
1225 case AArch64::ADDSWri
:
1226 case AArch64::SUBSXri
:
1227 case AArch64::ADDSXri
:
1228 SrcReg
= MI
.getOperand(1).getReg();
1231 CmpValue
= MI
.getOperand(2).getImm();
1233 case AArch64::ANDSWri
:
1234 case AArch64::ANDSXri
:
1235 // ANDS does not use the same encoding scheme as the others xxxS
1237 SrcReg
= MI
.getOperand(1).getReg();
1240 CmpValue
= AArch64_AM::decodeLogicalImmediate(
1241 MI
.getOperand(2).getImm(),
1242 MI
.getOpcode() == AArch64::ANDSWri
? 32 : 64);
1249 static bool UpdateOperandRegClass(MachineInstr
&Instr
) {
1250 MachineBasicBlock
*MBB
= Instr
.getParent();
1251 assert(MBB
&& "Can't get MachineBasicBlock here");
1252 MachineFunction
*MF
= MBB
->getParent();
1253 assert(MF
&& "Can't get MachineFunction here");
1254 const TargetInstrInfo
*TII
= MF
->getSubtarget().getInstrInfo();
1255 const TargetRegisterInfo
*TRI
= MF
->getSubtarget().getRegisterInfo();
1256 MachineRegisterInfo
*MRI
= &MF
->getRegInfo();
1258 for (unsigned OpIdx
= 0, EndIdx
= Instr
.getNumOperands(); OpIdx
< EndIdx
;
1260 MachineOperand
&MO
= Instr
.getOperand(OpIdx
);
1261 const TargetRegisterClass
*OpRegCstraints
=
1262 Instr
.getRegClassConstraint(OpIdx
, TII
, TRI
);
1264 // If there's no constraint, there's nothing to do.
1265 if (!OpRegCstraints
)
1267 // If the operand is a frame index, there's nothing to do here.
1268 // A frame index operand will resolve correctly during PEI.
1272 assert(MO
.isReg() &&
1273 "Operand has register constraints without being a register!");
1275 Register Reg
= MO
.getReg();
1276 if (Reg
.isPhysical()) {
1277 if (!OpRegCstraints
->contains(Reg
))
1279 } else if (!OpRegCstraints
->hasSubClassEq(MRI
->getRegClass(Reg
)) &&
1280 !MRI
->constrainRegClass(Reg
, OpRegCstraints
))
1287 /// Return the opcode that does not set flags when possible - otherwise
1288 /// return the original opcode. The caller is responsible to do the actual
1289 /// substitution and legality checking.
1290 static unsigned convertToNonFlagSettingOpc(const MachineInstr
&MI
) {
1291 // Don't convert all compare instructions, because for some the zero register
1292 // encoding becomes the sp register.
1293 bool MIDefinesZeroReg
= false;
1294 if (MI
.definesRegister(AArch64::WZR
, /*TRI=*/nullptr) ||
1295 MI
.definesRegister(AArch64::XZR
, /*TRI=*/nullptr))
1296 MIDefinesZeroReg
= true;
1298 switch (MI
.getOpcode()) {
1300 return MI
.getOpcode();
1301 case AArch64::ADDSWrr
:
1302 return AArch64::ADDWrr
;
1303 case AArch64::ADDSWri
:
1304 return MIDefinesZeroReg
? AArch64::ADDSWri
: AArch64::ADDWri
;
1305 case AArch64::ADDSWrs
:
1306 return MIDefinesZeroReg
? AArch64::ADDSWrs
: AArch64::ADDWrs
;
1307 case AArch64::ADDSWrx
:
1308 return AArch64::ADDWrx
;
1309 case AArch64::ADDSXrr
:
1310 return AArch64::ADDXrr
;
1311 case AArch64::ADDSXri
:
1312 return MIDefinesZeroReg
? AArch64::ADDSXri
: AArch64::ADDXri
;
1313 case AArch64::ADDSXrs
:
1314 return MIDefinesZeroReg
? AArch64::ADDSXrs
: AArch64::ADDXrs
;
1315 case AArch64::ADDSXrx
:
1316 return AArch64::ADDXrx
;
1317 case AArch64::SUBSWrr
:
1318 return AArch64::SUBWrr
;
1319 case AArch64::SUBSWri
:
1320 return MIDefinesZeroReg
? AArch64::SUBSWri
: AArch64::SUBWri
;
1321 case AArch64::SUBSWrs
:
1322 return MIDefinesZeroReg
? AArch64::SUBSWrs
: AArch64::SUBWrs
;
1323 case AArch64::SUBSWrx
:
1324 return AArch64::SUBWrx
;
1325 case AArch64::SUBSXrr
:
1326 return AArch64::SUBXrr
;
1327 case AArch64::SUBSXri
:
1328 return MIDefinesZeroReg
? AArch64::SUBSXri
: AArch64::SUBXri
;
1329 case AArch64::SUBSXrs
:
1330 return MIDefinesZeroReg
? AArch64::SUBSXrs
: AArch64::SUBXrs
;
1331 case AArch64::SUBSXrx
:
1332 return AArch64::SUBXrx
;
1336 enum AccessKind
{ AK_Write
= 0x01, AK_Read
= 0x10, AK_All
= 0x11 };
1338 /// True when condition flags are accessed (either by writing or reading)
1339 /// on the instruction trace starting at From and ending at To.
1341 /// Note: If From and To are from different blocks it's assumed CC are accessed
1343 static bool areCFlagsAccessedBetweenInstrs(
1344 MachineBasicBlock::iterator From
, MachineBasicBlock::iterator To
,
1345 const TargetRegisterInfo
*TRI
, const AccessKind AccessToCheck
= AK_All
) {
1346 // Early exit if To is at the beginning of the BB.
1347 if (To
== To
->getParent()->begin())
1350 // Check whether the instructions are in the same basic block
1351 // If not, assume the condition flags might get modified somewhere.
1352 if (To
->getParent() != From
->getParent())
1355 // From must be above To.
1357 ++To
.getReverse(), To
->getParent()->rend(),
1358 [From
](MachineInstr
&MI
) { return MI
.getIterator() == From
; }));
1360 // We iterate backward starting at \p To until we hit \p From.
1361 for (const MachineInstr
&Instr
:
1362 instructionsWithoutDebug(++To
.getReverse(), From
.getReverse())) {
1363 if (((AccessToCheck
& AK_Write
) &&
1364 Instr
.modifiesRegister(AArch64::NZCV
, TRI
)) ||
1365 ((AccessToCheck
& AK_Read
) && Instr
.readsRegister(AArch64::NZCV
, TRI
)))
1371 std::optional
<unsigned>
1372 AArch64InstrInfo::canRemovePTestInstr(MachineInstr
*PTest
, MachineInstr
*Mask
,
1374 const MachineRegisterInfo
*MRI
) const {
1375 unsigned MaskOpcode
= Mask
->getOpcode();
1376 unsigned PredOpcode
= Pred
->getOpcode();
1377 bool PredIsPTestLike
= isPTestLikeOpcode(PredOpcode
);
1378 bool PredIsWhileLike
= isWhileOpcode(PredOpcode
);
1380 if (PredIsWhileLike
) {
1381 // For PTEST(PG, PG), PTEST is redundant when PG is the result of a WHILEcc
1382 // instruction and the condition is "any" since WHILcc does an implicit
1383 // PTEST(ALL, PG) check and PG is always a subset of ALL.
1384 if ((Mask
== Pred
) && PTest
->getOpcode() == AArch64::PTEST_PP_ANY
)
1387 // For PTEST(PTRUE_ALL, WHILE), if the element size matches, the PTEST is
1388 // redundant since WHILE performs an implicit PTEST with an all active
1390 if (isPTrueOpcode(MaskOpcode
) && Mask
->getOperand(1).getImm() == 31 &&
1391 getElementSizeForOpcode(MaskOpcode
) ==
1392 getElementSizeForOpcode(PredOpcode
))
1398 if (PredIsPTestLike
) {
1399 // For PTEST(PG, PG), PTEST is redundant when PG is the result of an
1400 // instruction that sets the flags as PTEST would and the condition is
1401 // "any" since PG is always a subset of the governing predicate of the
1402 // ptest-like instruction.
1403 if ((Mask
== Pred
) && PTest
->getOpcode() == AArch64::PTEST_PP_ANY
)
1406 // For PTEST(PTRUE_ALL, PTEST_LIKE), the PTEST is redundant if the
1407 // the element size matches and either the PTEST_LIKE instruction uses
1408 // the same all active mask or the condition is "any".
1409 if (isPTrueOpcode(MaskOpcode
) && Mask
->getOperand(1).getImm() == 31 &&
1410 getElementSizeForOpcode(MaskOpcode
) ==
1411 getElementSizeForOpcode(PredOpcode
)) {
1412 auto PTestLikeMask
= MRI
->getUniqueVRegDef(Pred
->getOperand(1).getReg());
1413 if (Mask
== PTestLikeMask
|| PTest
->getOpcode() == AArch64::PTEST_PP_ANY
)
1417 // For PTEST(PG, PTEST_LIKE(PG, ...)), the PTEST is redundant since the
1418 // flags are set based on the same mask 'PG', but PTEST_LIKE must operate
1419 // on 8-bit predicates like the PTEST. Otherwise, for instructions like
1420 // compare that also support 16/32/64-bit predicates, the implicit PTEST
1421 // performed by the compare could consider fewer lanes for these element
1424 // For example, consider
1426 // ptrue p0.b ; P0=1111-1111-1111-1111
1427 // index z0.s, #0, #1 ; Z0=<0,1,2,3>
1428 // index z1.s, #1, #1 ; Z1=<1,2,3,4>
1429 // cmphi p1.s, p0/z, z1.s, z0.s ; P1=0001-0001-0001-0001
1431 // ptest p0, p1.b ; P1=0001-0001-0001-0001
1434 // where the compare generates a canonical all active 32-bit predicate
1435 // (equivalent to 'ptrue p1.s, all'). The implicit PTEST sets the last
1436 // active flag, whereas the PTEST instruction with the same mask doesn't.
1437 // For PTEST_ANY this doesn't apply as the flags in this case would be
1438 // identical regardless of element size.
1439 auto PTestLikeMask
= MRI
->getUniqueVRegDef(Pred
->getOperand(1).getReg());
1440 uint64_t PredElementSize
= getElementSizeForOpcode(PredOpcode
);
1441 if (Mask
== PTestLikeMask
&& (PredElementSize
== AArch64::ElementSizeB
||
1442 PTest
->getOpcode() == AArch64::PTEST_PP_ANY
))
1448 // If OP in PTEST(PG, OP(PG, ...)) has a flag-setting variant change the
1449 // opcode so the PTEST becomes redundant.
1450 switch (PredOpcode
) {
1451 case AArch64::AND_PPzPP
:
1452 case AArch64::BIC_PPzPP
:
1453 case AArch64::EOR_PPzPP
:
1454 case AArch64::NAND_PPzPP
:
1455 case AArch64::NOR_PPzPP
:
1456 case AArch64::ORN_PPzPP
:
1457 case AArch64::ORR_PPzPP
:
1458 case AArch64::BRKA_PPzP
:
1459 case AArch64::BRKPA_PPzPP
:
1460 case AArch64::BRKB_PPzP
:
1461 case AArch64::BRKPB_PPzPP
:
1462 case AArch64::RDFFR_PPz
: {
1463 // Check to see if our mask is the same. If not the resulting flag bits
1464 // may be different and we can't remove the ptest.
1465 auto *PredMask
= MRI
->getUniqueVRegDef(Pred
->getOperand(1).getReg());
1466 if (Mask
!= PredMask
)
1470 case AArch64::BRKN_PPzP
: {
1471 // BRKN uses an all active implicit mask to set flags unlike the other
1472 // flag-setting instructions.
1473 // PTEST(PTRUE_B(31), BRKN(PG, A, B)) -> BRKNS(PG, A, B).
1474 if ((MaskOpcode
!= AArch64::PTRUE_B
) ||
1475 (Mask
->getOperand(1).getImm() != 31))
1479 case AArch64::PTRUE_B
:
1480 // PTEST(OP=PTRUE_B(A), OP) -> PTRUES_B(A)
1483 // Bail out if we don't recognize the input
1487 return convertToFlagSettingOpc(PredOpcode
);
1490 /// optimizePTestInstr - Attempt to remove a ptest of a predicate-generating
1491 /// operation which could set the flags in an identical manner
1492 bool AArch64InstrInfo::optimizePTestInstr(
1493 MachineInstr
*PTest
, unsigned MaskReg
, unsigned PredReg
,
1494 const MachineRegisterInfo
*MRI
) const {
1495 auto *Mask
= MRI
->getUniqueVRegDef(MaskReg
);
1496 auto *Pred
= MRI
->getUniqueVRegDef(PredReg
);
1497 unsigned PredOpcode
= Pred
->getOpcode();
1498 auto NewOp
= canRemovePTestInstr(PTest
, Mask
, Pred
, MRI
);
1502 const TargetRegisterInfo
*TRI
= &getRegisterInfo();
1504 // If another instruction between Pred and PTest accesses flags, don't remove
1505 // the ptest or update the earlier instruction to modify them.
1506 if (areCFlagsAccessedBetweenInstrs(Pred
, PTest
, TRI
))
1509 // If we pass all the checks, it's safe to remove the PTEST and use the flags
1510 // as they are prior to PTEST. Sometimes this requires the tested PTEST
1511 // operand to be replaced with an equivalent instruction that also sets the
1513 PTest
->eraseFromParent();
1514 if (*NewOp
!= PredOpcode
) {
1515 Pred
->setDesc(get(*NewOp
));
1516 bool succeeded
= UpdateOperandRegClass(*Pred
);
1518 assert(succeeded
&& "Operands have incompatible register classes!");
1519 Pred
->addRegisterDefined(AArch64::NZCV
, TRI
);
1522 // Ensure that the flags def is live.
1523 if (Pred
->registerDefIsDead(AArch64::NZCV
, TRI
)) {
1524 unsigned i
= 0, e
= Pred
->getNumOperands();
1525 for (; i
!= e
; ++i
) {
1526 MachineOperand
&MO
= Pred
->getOperand(i
);
1527 if (MO
.isReg() && MO
.isDef() && MO
.getReg() == AArch64::NZCV
) {
1528 MO
.setIsDead(false);
1536 /// Try to optimize a compare instruction. A compare instruction is an
1537 /// instruction which produces AArch64::NZCV. It can be truly compare
1539 /// when there are no uses of its destination register.
1541 /// The following steps are tried in order:
1542 /// 1. Convert CmpInstr into an unconditional version.
1543 /// 2. Remove CmpInstr if above there is an instruction producing a needed
1544 /// condition code or an instruction which can be converted into such an
1546 /// Only comparison with zero is supported.
1547 bool AArch64InstrInfo::optimizeCompareInstr(
1548 MachineInstr
&CmpInstr
, Register SrcReg
, Register SrcReg2
, int64_t CmpMask
,
1549 int64_t CmpValue
, const MachineRegisterInfo
*MRI
) const {
1550 assert(CmpInstr
.getParent());
1553 // Replace SUBSWrr with SUBWrr if NZCV is not used.
1555 CmpInstr
.findRegisterDefOperandIdx(AArch64::NZCV
, /*TRI=*/nullptr, true);
1556 if (DeadNZCVIdx
!= -1) {
1557 if (CmpInstr
.definesRegister(AArch64::WZR
, /*TRI=*/nullptr) ||
1558 CmpInstr
.definesRegister(AArch64::XZR
, /*TRI=*/nullptr)) {
1559 CmpInstr
.eraseFromParent();
1562 unsigned Opc
= CmpInstr
.getOpcode();
1563 unsigned NewOpc
= convertToNonFlagSettingOpc(CmpInstr
);
1566 const MCInstrDesc
&MCID
= get(NewOpc
);
1567 CmpInstr
.setDesc(MCID
);
1568 CmpInstr
.removeOperand(DeadNZCVIdx
);
1569 bool succeeded
= UpdateOperandRegClass(CmpInstr
);
1571 assert(succeeded
&& "Some operands reg class are incompatible!");
1575 if (CmpInstr
.getOpcode() == AArch64::PTEST_PP
||
1576 CmpInstr
.getOpcode() == AArch64::PTEST_PP_ANY
)
1577 return optimizePTestInstr(&CmpInstr
, SrcReg
, SrcReg2
, MRI
);
1582 // CmpInstr is a Compare instruction if destination register is not used.
1583 if (!MRI
->use_nodbg_empty(CmpInstr
.getOperand(0).getReg()))
1586 if (CmpValue
== 0 && substituteCmpToZero(CmpInstr
, SrcReg
, *MRI
))
1588 return (CmpValue
== 0 || CmpValue
== 1) &&
1589 removeCmpToZeroOrOne(CmpInstr
, SrcReg
, CmpValue
, *MRI
);
1592 /// Get opcode of S version of Instr.
1593 /// If Instr is S version its opcode is returned.
1594 /// AArch64::INSTRUCTION_LIST_END is returned if Instr does not have S version
1595 /// or we are not interested in it.
1596 static unsigned sForm(MachineInstr
&Instr
) {
1597 switch (Instr
.getOpcode()) {
1599 return AArch64::INSTRUCTION_LIST_END
;
1601 case AArch64::ADDSWrr
:
1602 case AArch64::ADDSWri
:
1603 case AArch64::ADDSXrr
:
1604 case AArch64::ADDSXri
:
1605 case AArch64::SUBSWrr
:
1606 case AArch64::SUBSWri
:
1607 case AArch64::SUBSXrr
:
1608 case AArch64::SUBSXri
:
1609 return Instr
.getOpcode();
1611 case AArch64::ADDWrr
:
1612 return AArch64::ADDSWrr
;
1613 case AArch64::ADDWri
:
1614 return AArch64::ADDSWri
;
1615 case AArch64::ADDXrr
:
1616 return AArch64::ADDSXrr
;
1617 case AArch64::ADDXri
:
1618 return AArch64::ADDSXri
;
1619 case AArch64::ADCWr
:
1620 return AArch64::ADCSWr
;
1621 case AArch64::ADCXr
:
1622 return AArch64::ADCSXr
;
1623 case AArch64::SUBWrr
:
1624 return AArch64::SUBSWrr
;
1625 case AArch64::SUBWri
:
1626 return AArch64::SUBSWri
;
1627 case AArch64::SUBXrr
:
1628 return AArch64::SUBSXrr
;
1629 case AArch64::SUBXri
:
1630 return AArch64::SUBSXri
;
1631 case AArch64::SBCWr
:
1632 return AArch64::SBCSWr
;
1633 case AArch64::SBCXr
:
1634 return AArch64::SBCSXr
;
1635 case AArch64::ANDWri
:
1636 return AArch64::ANDSWri
;
1637 case AArch64::ANDXri
:
1638 return AArch64::ANDSXri
;
1642 /// Check if AArch64::NZCV should be alive in successors of MBB.
1643 static bool areCFlagsAliveInSuccessors(const MachineBasicBlock
*MBB
) {
1644 for (auto *BB
: MBB
->successors())
1645 if (BB
->isLiveIn(AArch64::NZCV
))
1650 /// \returns The condition code operand index for \p Instr if it is a branch
1651 /// or select and -1 otherwise.
1653 findCondCodeUseOperandIdxForBranchOrSelect(const MachineInstr
&Instr
) {
1654 switch (Instr
.getOpcode()) {
1658 case AArch64::Bcc
: {
1659 int Idx
= Instr
.findRegisterUseOperandIdx(AArch64::NZCV
, /*TRI=*/nullptr);
1664 case AArch64::CSINVWr
:
1665 case AArch64::CSINVXr
:
1666 case AArch64::CSINCWr
:
1667 case AArch64::CSINCXr
:
1668 case AArch64::CSELWr
:
1669 case AArch64::CSELXr
:
1670 case AArch64::CSNEGWr
:
1671 case AArch64::CSNEGXr
:
1672 case AArch64::FCSELSrrr
:
1673 case AArch64::FCSELDrrr
: {
1674 int Idx
= Instr
.findRegisterUseOperandIdx(AArch64::NZCV
, /*TRI=*/nullptr);
1681 /// Find a condition code used by the instruction.
1682 /// Returns AArch64CC::Invalid if either the instruction does not use condition
1683 /// codes or we don't optimize CmpInstr in the presence of such instructions.
1684 static AArch64CC::CondCode
findCondCodeUsedByInstr(const MachineInstr
&Instr
) {
1685 int CCIdx
= findCondCodeUseOperandIdxForBranchOrSelect(Instr
);
1686 return CCIdx
>= 0 ? static_cast<AArch64CC::CondCode
>(
1687 Instr
.getOperand(CCIdx
).getImm())
1688 : AArch64CC::Invalid
;
1691 static UsedNZCV
getUsedNZCV(AArch64CC::CondCode CC
) {
1692 assert(CC
!= AArch64CC::Invalid
);
1698 case AArch64CC::EQ
: // Z set
1699 case AArch64CC::NE
: // Z clear
1703 case AArch64CC::HI
: // Z clear and C set
1704 case AArch64CC::LS
: // Z set or C clear
1707 case AArch64CC::HS
: // C set
1708 case AArch64CC::LO
: // C clear
1712 case AArch64CC::MI
: // N set
1713 case AArch64CC::PL
: // N clear
1717 case AArch64CC::VS
: // V set
1718 case AArch64CC::VC
: // V clear
1722 case AArch64CC::GT
: // Z clear, N and V the same
1723 case AArch64CC::LE
: // Z set, N and V differ
1726 case AArch64CC::GE
: // N and V the same
1727 case AArch64CC::LT
: // N and V differ
1735 /// \returns Conditions flags used after \p CmpInstr in its MachineBB if NZCV
1736 /// flags are not alive in successors of the same \p CmpInstr and \p MI parent.
1737 /// \returns std::nullopt otherwise.
1739 /// Collect instructions using that flags in \p CCUseInstrs if provided.
1740 std::optional
<UsedNZCV
>
1741 llvm::examineCFlagsUse(MachineInstr
&MI
, MachineInstr
&CmpInstr
,
1742 const TargetRegisterInfo
&TRI
,
1743 SmallVectorImpl
<MachineInstr
*> *CCUseInstrs
) {
1744 MachineBasicBlock
*CmpParent
= CmpInstr
.getParent();
1745 if (MI
.getParent() != CmpParent
)
1746 return std::nullopt
;
1748 if (areCFlagsAliveInSuccessors(CmpParent
))
1749 return std::nullopt
;
1751 UsedNZCV NZCVUsedAfterCmp
;
1752 for (MachineInstr
&Instr
: instructionsWithoutDebug(
1753 std::next(CmpInstr
.getIterator()), CmpParent
->instr_end())) {
1754 if (Instr
.readsRegister(AArch64::NZCV
, &TRI
)) {
1755 AArch64CC::CondCode CC
= findCondCodeUsedByInstr(Instr
);
1756 if (CC
== AArch64CC::Invalid
) // Unsupported conditional instruction
1757 return std::nullopt
;
1758 NZCVUsedAfterCmp
|= getUsedNZCV(CC
);
1760 CCUseInstrs
->push_back(&Instr
);
1762 if (Instr
.modifiesRegister(AArch64::NZCV
, &TRI
))
1765 return NZCVUsedAfterCmp
;
1768 static bool isADDSRegImm(unsigned Opcode
) {
1769 return Opcode
== AArch64::ADDSWri
|| Opcode
== AArch64::ADDSXri
;
1772 static bool isSUBSRegImm(unsigned Opcode
) {
1773 return Opcode
== AArch64::SUBSWri
|| Opcode
== AArch64::SUBSXri
;
1776 /// Check if CmpInstr can be substituted by MI.
1778 /// CmpInstr can be substituted:
1779 /// - CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
1780 /// - and, MI and CmpInstr are from the same MachineBB
1781 /// - and, condition flags are not alive in successors of the CmpInstr parent
1782 /// - and, if MI opcode is the S form there must be no defs of flags between
1784 /// or if MI opcode is not the S form there must be neither defs of flags
1785 /// nor uses of flags between MI and CmpInstr.
1786 /// - and, if C/V flags are not used after CmpInstr
1787 /// or if N flag is used but MI produces poison value if signed overflow
1789 static bool canInstrSubstituteCmpInstr(MachineInstr
&MI
, MachineInstr
&CmpInstr
,
1790 const TargetRegisterInfo
&TRI
) {
1791 // NOTE this assertion guarantees that MI.getOpcode() is add or subtraction
1792 // that may or may not set flags.
1793 assert(sForm(MI
) != AArch64::INSTRUCTION_LIST_END
);
1795 const unsigned CmpOpcode
= CmpInstr
.getOpcode();
1796 if (!isADDSRegImm(CmpOpcode
) && !isSUBSRegImm(CmpOpcode
))
1799 assert((CmpInstr
.getOperand(2).isImm() &&
1800 CmpInstr
.getOperand(2).getImm() == 0) &&
1801 "Caller guarantees that CmpInstr compares with constant 0");
1803 std::optional
<UsedNZCV
> NZVCUsed
= examineCFlagsUse(MI
, CmpInstr
, TRI
);
1804 if (!NZVCUsed
|| NZVCUsed
->C
)
1807 // CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0', and MI is either
1808 // '%vreg = add ...' or '%vreg = sub ...'.
1809 // Condition flag V is used to indicate signed overflow.
1810 // 1) MI and CmpInstr set N and V to the same value.
1811 // 2) If MI is add/sub with no-signed-wrap, it produces a poison value when
1812 // signed overflow occurs, so CmpInstr could still be simplified away.
1813 if (NZVCUsed
->V
&& !MI
.getFlag(MachineInstr::NoSWrap
))
1816 AccessKind AccessToCheck
= AK_Write
;
1817 if (sForm(MI
) != MI
.getOpcode())
1818 AccessToCheck
= AK_All
;
1819 return !areCFlagsAccessedBetweenInstrs(&MI
, &CmpInstr
, &TRI
, AccessToCheck
);
1822 /// Substitute an instruction comparing to zero with another instruction
1823 /// which produces needed condition flags.
1825 /// Return true on success.
1826 bool AArch64InstrInfo::substituteCmpToZero(
1827 MachineInstr
&CmpInstr
, unsigned SrcReg
,
1828 const MachineRegisterInfo
&MRI
) const {
1829 // Get the unique definition of SrcReg.
1830 MachineInstr
*MI
= MRI
.getUniqueVRegDef(SrcReg
);
1834 const TargetRegisterInfo
&TRI
= getRegisterInfo();
1836 unsigned NewOpc
= sForm(*MI
);
1837 if (NewOpc
== AArch64::INSTRUCTION_LIST_END
)
1840 if (!canInstrSubstituteCmpInstr(*MI
, CmpInstr
, TRI
))
1843 // Update the instruction to set NZCV.
1844 MI
->setDesc(get(NewOpc
));
1845 CmpInstr
.eraseFromParent();
1846 bool succeeded
= UpdateOperandRegClass(*MI
);
1848 assert(succeeded
&& "Some operands reg class are incompatible!");
1849 MI
->addRegisterDefined(AArch64::NZCV
, &TRI
);
1853 /// \returns True if \p CmpInstr can be removed.
1855 /// \p IsInvertCC is true if, after removing \p CmpInstr, condition
1856 /// codes used in \p CCUseInstrs must be inverted.
1857 static bool canCmpInstrBeRemoved(MachineInstr
&MI
, MachineInstr
&CmpInstr
,
1858 int CmpValue
, const TargetRegisterInfo
&TRI
,
1859 SmallVectorImpl
<MachineInstr
*> &CCUseInstrs
,
1861 assert((CmpValue
== 0 || CmpValue
== 1) &&
1862 "Only comparisons to 0 or 1 considered for removal!");
1864 // MI is 'CSINCWr %vreg, wzr, wzr, <cc>' or 'CSINCXr %vreg, xzr, xzr, <cc>'
1865 unsigned MIOpc
= MI
.getOpcode();
1866 if (MIOpc
== AArch64::CSINCWr
) {
1867 if (MI
.getOperand(1).getReg() != AArch64::WZR
||
1868 MI
.getOperand(2).getReg() != AArch64::WZR
)
1870 } else if (MIOpc
== AArch64::CSINCXr
) {
1871 if (MI
.getOperand(1).getReg() != AArch64::XZR
||
1872 MI
.getOperand(2).getReg() != AArch64::XZR
)
1877 AArch64CC::CondCode MICC
= findCondCodeUsedByInstr(MI
);
1878 if (MICC
== AArch64CC::Invalid
)
1881 // NZCV needs to be defined
1882 if (MI
.findRegisterDefOperandIdx(AArch64::NZCV
, /*TRI=*/nullptr, true) != -1)
1885 // CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0' or 'SUBS %vreg, 1'
1886 const unsigned CmpOpcode
= CmpInstr
.getOpcode();
1887 bool IsSubsRegImm
= isSUBSRegImm(CmpOpcode
);
1888 if (CmpValue
&& !IsSubsRegImm
)
1890 if (!CmpValue
&& !IsSubsRegImm
&& !isADDSRegImm(CmpOpcode
))
1893 // MI conditions allowed: eq, ne, mi, pl
1894 UsedNZCV MIUsedNZCV
= getUsedNZCV(MICC
);
1895 if (MIUsedNZCV
.C
|| MIUsedNZCV
.V
)
1898 std::optional
<UsedNZCV
> NZCVUsedAfterCmp
=
1899 examineCFlagsUse(MI
, CmpInstr
, TRI
, &CCUseInstrs
);
1900 // Condition flags are not used in CmpInstr basic block successors and only
1901 // Z or N flags allowed to be used after CmpInstr within its basic block
1902 if (!NZCVUsedAfterCmp
|| NZCVUsedAfterCmp
->C
|| NZCVUsedAfterCmp
->V
)
1904 // Z or N flag used after CmpInstr must correspond to the flag used in MI
1905 if ((MIUsedNZCV
.Z
&& NZCVUsedAfterCmp
->N
) ||
1906 (MIUsedNZCV
.N
&& NZCVUsedAfterCmp
->Z
))
1908 // If CmpInstr is comparison to zero MI conditions are limited to eq, ne
1909 if (MIUsedNZCV
.N
&& !CmpValue
)
1912 // There must be no defs of flags between MI and CmpInstr
1913 if (areCFlagsAccessedBetweenInstrs(&MI
, &CmpInstr
, &TRI
, AK_Write
))
1916 // Condition code is inverted in the following cases:
1917 // 1. MI condition is ne; CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
1918 // 2. MI condition is eq, pl; CmpInstr is 'SUBS %vreg, 1'
1919 IsInvertCC
= (CmpValue
&& (MICC
== AArch64CC::EQ
|| MICC
== AArch64CC::PL
)) ||
1920 (!CmpValue
&& MICC
== AArch64CC::NE
);
1924 /// Remove comparison in csinc-cmp sequence
1928 /// csinc w9, wzr, wzr, ne
1934 /// csinc w9, wzr, wzr, ne
1939 /// csinc x2, xzr, xzr, mi
1945 /// csinc x2, xzr, xzr, mi
1949 /// \param CmpInstr comparison instruction
1950 /// \return True when comparison removed
1951 bool AArch64InstrInfo::removeCmpToZeroOrOne(
1952 MachineInstr
&CmpInstr
, unsigned SrcReg
, int CmpValue
,
1953 const MachineRegisterInfo
&MRI
) const {
1954 MachineInstr
*MI
= MRI
.getUniqueVRegDef(SrcReg
);
1957 const TargetRegisterInfo
&TRI
= getRegisterInfo();
1958 SmallVector
<MachineInstr
*, 4> CCUseInstrs
;
1959 bool IsInvertCC
= false;
1960 if (!canCmpInstrBeRemoved(*MI
, CmpInstr
, CmpValue
, TRI
, CCUseInstrs
,
1963 // Make transformation
1964 CmpInstr
.eraseFromParent();
1966 // Invert condition codes in CmpInstr CC users
1967 for (MachineInstr
*CCUseInstr
: CCUseInstrs
) {
1968 int Idx
= findCondCodeUseOperandIdxForBranchOrSelect(*CCUseInstr
);
1969 assert(Idx
>= 0 && "Unexpected instruction using CC.");
1970 MachineOperand
&CCOperand
= CCUseInstr
->getOperand(Idx
);
1971 AArch64CC::CondCode CCUse
= AArch64CC::getInvertedCondCode(
1972 static_cast<AArch64CC::CondCode
>(CCOperand
.getImm()));
1973 CCOperand
.setImm(CCUse
);
1979 bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr
&MI
) const {
1980 if (MI
.getOpcode() != TargetOpcode::LOAD_STACK_GUARD
&&
1981 MI
.getOpcode() != AArch64::CATCHRET
)
1984 MachineBasicBlock
&MBB
= *MI
.getParent();
1985 auto &Subtarget
= MBB
.getParent()->getSubtarget
<AArch64Subtarget
>();
1986 auto TRI
= Subtarget
.getRegisterInfo();
1987 DebugLoc DL
= MI
.getDebugLoc();
1989 if (MI
.getOpcode() == AArch64::CATCHRET
) {
1990 // Skip to the first instruction before the epilog.
1991 const TargetInstrInfo
*TII
=
1992 MBB
.getParent()->getSubtarget().getInstrInfo();
1993 MachineBasicBlock
*TargetMBB
= MI
.getOperand(0).getMBB();
1994 auto MBBI
= MachineBasicBlock::iterator(MI
);
1995 MachineBasicBlock::iterator FirstEpilogSEH
= std::prev(MBBI
);
1996 while (FirstEpilogSEH
->getFlag(MachineInstr::FrameDestroy
) &&
1997 FirstEpilogSEH
!= MBB
.begin())
1998 FirstEpilogSEH
= std::prev(FirstEpilogSEH
);
1999 if (FirstEpilogSEH
!= MBB
.begin())
2000 FirstEpilogSEH
= std::next(FirstEpilogSEH
);
2001 BuildMI(MBB
, FirstEpilogSEH
, DL
, TII
->get(AArch64::ADRP
))
2002 .addReg(AArch64::X0
, RegState::Define
)
2004 BuildMI(MBB
, FirstEpilogSEH
, DL
, TII
->get(AArch64::ADDXri
))
2005 .addReg(AArch64::X0
, RegState::Define
)
2006 .addReg(AArch64::X0
)
2009 TargetMBB
->setMachineBlockAddressTaken();
2013 Register Reg
= MI
.getOperand(0).getReg();
2014 Module
&M
= *MBB
.getParent()->getFunction().getParent();
2015 if (M
.getStackProtectorGuard() == "sysreg") {
2016 const AArch64SysReg::SysReg
*SrcReg
=
2017 AArch64SysReg::lookupSysRegByName(M
.getStackProtectorGuardReg());
2019 report_fatal_error("Unknown SysReg for Stack Protector Guard Register");
2022 BuildMI(MBB
, MI
, DL
, get(AArch64::MRS
))
2023 .addDef(Reg
, RegState::Renamable
)
2024 .addImm(SrcReg
->Encoding
);
2025 int Offset
= M
.getStackProtectorGuardOffset();
2026 if (Offset
>= 0 && Offset
<= 32760 && Offset
% 8 == 0) {
2027 // ldr xN, [xN, #offset]
2028 BuildMI(MBB
, MI
, DL
, get(AArch64::LDRXui
))
2030 .addUse(Reg
, RegState::Kill
)
2031 .addImm(Offset
/ 8);
2032 } else if (Offset
>= -256 && Offset
<= 255) {
2033 // ldur xN, [xN, #offset]
2034 BuildMI(MBB
, MI
, DL
, get(AArch64::LDURXi
))
2036 .addUse(Reg
, RegState::Kill
)
2038 } else if (Offset
>= -4095 && Offset
<= 4095) {
2040 // add xN, xN, #offset
2041 BuildMI(MBB
, MI
, DL
, get(AArch64::ADDXri
))
2043 .addUse(Reg
, RegState::Kill
)
2047 // sub xN, xN, #offset
2048 BuildMI(MBB
, MI
, DL
, get(AArch64::SUBXri
))
2050 .addUse(Reg
, RegState::Kill
)
2055 BuildMI(MBB
, MI
, DL
, get(AArch64::LDRXui
))
2057 .addUse(Reg
, RegState::Kill
)
2060 // Cases that are larger than +/- 4095 and not a multiple of 8, or larger
2062 // It might be nice to use AArch64::MOVi32imm here, which would get
2063 // expanded in PreSched2 after PostRA, but our lone scratch Reg already
2064 // contains the MRS result. findScratchNonCalleeSaveRegister() in
2065 // AArch64FrameLowering might help us find such a scratch register
2066 // though. If we failed to find a scratch register, we could emit a
2067 // stream of add instructions to build up the immediate. Or, we could try
2068 // to insert a AArch64::MOVi32imm before register allocation so that we
2069 // didn't need to scavenge for a scratch register.
2070 report_fatal_error("Unable to encode Stack Protector Guard Offset");
2076 const GlobalValue
*GV
=
2077 cast
<GlobalValue
>((*MI
.memoperands_begin())->getValue());
2078 const TargetMachine
&TM
= MBB
.getParent()->getTarget();
2079 unsigned OpFlags
= Subtarget
.ClassifyGlobalReference(GV
, TM
);
2080 const unsigned char MO_NC
= AArch64II::MO_NC
;
2082 if ((OpFlags
& AArch64II::MO_GOT
) != 0) {
2083 BuildMI(MBB
, MI
, DL
, get(AArch64::LOADgot
), Reg
)
2084 .addGlobalAddress(GV
, 0, OpFlags
);
2085 if (Subtarget
.isTargetILP32()) {
2086 unsigned Reg32
= TRI
->getSubReg(Reg
, AArch64::sub_32
);
2087 BuildMI(MBB
, MI
, DL
, get(AArch64::LDRWui
))
2088 .addDef(Reg32
, RegState::Dead
)
2089 .addUse(Reg
, RegState::Kill
)
2091 .addMemOperand(*MI
.memoperands_begin())
2092 .addDef(Reg
, RegState::Implicit
);
2094 BuildMI(MBB
, MI
, DL
, get(AArch64::LDRXui
), Reg
)
2095 .addReg(Reg
, RegState::Kill
)
2097 .addMemOperand(*MI
.memoperands_begin());
2099 } else if (TM
.getCodeModel() == CodeModel::Large
) {
2100 assert(!Subtarget
.isTargetILP32() && "how can large exist in ILP32?");
2101 BuildMI(MBB
, MI
, DL
, get(AArch64::MOVZXi
), Reg
)
2102 .addGlobalAddress(GV
, 0, AArch64II::MO_G0
| MO_NC
)
2104 BuildMI(MBB
, MI
, DL
, get(AArch64::MOVKXi
), Reg
)
2105 .addReg(Reg
, RegState::Kill
)
2106 .addGlobalAddress(GV
, 0, AArch64II::MO_G1
| MO_NC
)
2108 BuildMI(MBB
, MI
, DL
, get(AArch64::MOVKXi
), Reg
)
2109 .addReg(Reg
, RegState::Kill
)
2110 .addGlobalAddress(GV
, 0, AArch64II::MO_G2
| MO_NC
)
2112 BuildMI(MBB
, MI
, DL
, get(AArch64::MOVKXi
), Reg
)
2113 .addReg(Reg
, RegState::Kill
)
2114 .addGlobalAddress(GV
, 0, AArch64II::MO_G3
)
2116 BuildMI(MBB
, MI
, DL
, get(AArch64::LDRXui
), Reg
)
2117 .addReg(Reg
, RegState::Kill
)
2119 .addMemOperand(*MI
.memoperands_begin());
2120 } else if (TM
.getCodeModel() == CodeModel::Tiny
) {
2121 BuildMI(MBB
, MI
, DL
, get(AArch64::ADR
), Reg
)
2122 .addGlobalAddress(GV
, 0, OpFlags
);
2124 BuildMI(MBB
, MI
, DL
, get(AArch64::ADRP
), Reg
)
2125 .addGlobalAddress(GV
, 0, OpFlags
| AArch64II::MO_PAGE
);
2126 unsigned char LoFlags
= OpFlags
| AArch64II::MO_PAGEOFF
| MO_NC
;
2127 if (Subtarget
.isTargetILP32()) {
2128 unsigned Reg32
= TRI
->getSubReg(Reg
, AArch64::sub_32
);
2129 BuildMI(MBB
, MI
, DL
, get(AArch64::LDRWui
))
2130 .addDef(Reg32
, RegState::Dead
)
2131 .addUse(Reg
, RegState::Kill
)
2132 .addGlobalAddress(GV
, 0, LoFlags
)
2133 .addMemOperand(*MI
.memoperands_begin())
2134 .addDef(Reg
, RegState::Implicit
);
2136 BuildMI(MBB
, MI
, DL
, get(AArch64::LDRXui
), Reg
)
2137 .addReg(Reg
, RegState::Kill
)
2138 .addGlobalAddress(GV
, 0, LoFlags
)
2139 .addMemOperand(*MI
.memoperands_begin());
2148 // Return true if this instruction simply sets its single destination register
2149 // to zero. This is equivalent to a register rename of the zero-register.
2150 bool AArch64InstrInfo::isGPRZero(const MachineInstr
&MI
) {
2151 switch (MI
.getOpcode()) {
2154 case AArch64::MOVZWi
:
2155 case AArch64::MOVZXi
: // movz Rd, #0 (LSL #0)
2156 if (MI
.getOperand(1).isImm() && MI
.getOperand(1).getImm() == 0) {
2157 assert(MI
.getDesc().getNumOperands() == 3 &&
2158 MI
.getOperand(2).getImm() == 0 && "invalid MOVZi operands");
2162 case AArch64::ANDWri
: // and Rd, Rzr, #imm
2163 return MI
.getOperand(1).getReg() == AArch64::WZR
;
2164 case AArch64::ANDXri
:
2165 return MI
.getOperand(1).getReg() == AArch64::XZR
;
2166 case TargetOpcode::COPY
:
2167 return MI
.getOperand(1).getReg() == AArch64::WZR
;
2172 // Return true if this instruction simply renames a general register without
2174 bool AArch64InstrInfo::isGPRCopy(const MachineInstr
&MI
) {
2175 switch (MI
.getOpcode()) {
2178 case TargetOpcode::COPY
: {
2179 // GPR32 copies will by lowered to ORRXrs
2180 Register DstReg
= MI
.getOperand(0).getReg();
2181 return (AArch64::GPR32RegClass
.contains(DstReg
) ||
2182 AArch64::GPR64RegClass
.contains(DstReg
));
2184 case AArch64::ORRXrs
: // orr Xd, Xzr, Xm (LSL #0)
2185 if (MI
.getOperand(1).getReg() == AArch64::XZR
) {
2186 assert(MI
.getDesc().getNumOperands() == 4 &&
2187 MI
.getOperand(3).getImm() == 0 && "invalid ORRrs operands");
2191 case AArch64::ADDXri
: // add Xd, Xn, #0 (LSL #0)
2192 if (MI
.getOperand(2).getImm() == 0) {
2193 assert(MI
.getDesc().getNumOperands() == 4 &&
2194 MI
.getOperand(3).getImm() == 0 && "invalid ADDXri operands");
2202 // Return true if this instruction simply renames a general register without
2204 bool AArch64InstrInfo::isFPRCopy(const MachineInstr
&MI
) {
2205 switch (MI
.getOpcode()) {
2208 case TargetOpcode::COPY
: {
2209 Register DstReg
= MI
.getOperand(0).getReg();
2210 return AArch64::FPR128RegClass
.contains(DstReg
);
2212 case AArch64::ORRv16i8
:
2213 if (MI
.getOperand(1).getReg() == MI
.getOperand(2).getReg()) {
2214 assert(MI
.getDesc().getNumOperands() == 3 && MI
.getOperand(0).isReg() &&
2215 "invalid ORRv16i8 operands");
2223 Register
AArch64InstrInfo::isLoadFromStackSlot(const MachineInstr
&MI
,
2224 int &FrameIndex
) const {
2225 switch (MI
.getOpcode()) {
2228 case AArch64::LDRWui
:
2229 case AArch64::LDRXui
:
2230 case AArch64::LDRBui
:
2231 case AArch64::LDRHui
:
2232 case AArch64::LDRSui
:
2233 case AArch64::LDRDui
:
2234 case AArch64::LDRQui
:
2235 case AArch64::LDR_PXI
:
2236 if (MI
.getOperand(0).getSubReg() == 0 && MI
.getOperand(1).isFI() &&
2237 MI
.getOperand(2).isImm() && MI
.getOperand(2).getImm() == 0) {
2238 FrameIndex
= MI
.getOperand(1).getIndex();
2239 return MI
.getOperand(0).getReg();
2247 Register
AArch64InstrInfo::isStoreToStackSlot(const MachineInstr
&MI
,
2248 int &FrameIndex
) const {
2249 switch (MI
.getOpcode()) {
2252 case AArch64::STRWui
:
2253 case AArch64::STRXui
:
2254 case AArch64::STRBui
:
2255 case AArch64::STRHui
:
2256 case AArch64::STRSui
:
2257 case AArch64::STRDui
:
2258 case AArch64::STRQui
:
2259 case AArch64::STR_PXI
:
2260 if (MI
.getOperand(0).getSubReg() == 0 && MI
.getOperand(1).isFI() &&
2261 MI
.getOperand(2).isImm() && MI
.getOperand(2).getImm() == 0) {
2262 FrameIndex
= MI
.getOperand(1).getIndex();
2263 return MI
.getOperand(0).getReg();
2270 /// Check all MachineMemOperands for a hint to suppress pairing.
2271 bool AArch64InstrInfo::isLdStPairSuppressed(const MachineInstr
&MI
) {
2272 return llvm::any_of(MI
.memoperands(), [](MachineMemOperand
*MMO
) {
2273 return MMO
->getFlags() & MOSuppressPair
;
2277 /// Set a flag on the first MachineMemOperand to suppress pairing.
2278 void AArch64InstrInfo::suppressLdStPair(MachineInstr
&MI
) {
2279 if (MI
.memoperands_empty())
2281 (*MI
.memoperands_begin())->setFlags(MOSuppressPair
);
2284 /// Check all MachineMemOperands for a hint that the load/store is strided.
2285 bool AArch64InstrInfo::isStridedAccess(const MachineInstr
&MI
) {
2286 return llvm::any_of(MI
.memoperands(), [](MachineMemOperand
*MMO
) {
2287 return MMO
->getFlags() & MOStridedAccess
;
2291 bool AArch64InstrInfo::hasUnscaledLdStOffset(unsigned Opc
) {
2295 case AArch64::STURSi
:
2296 case AArch64::STRSpre
:
2297 case AArch64::STURDi
:
2298 case AArch64::STRDpre
:
2299 case AArch64::STURQi
:
2300 case AArch64::STRQpre
:
2301 case AArch64::STURBBi
:
2302 case AArch64::STURHHi
:
2303 case AArch64::STURWi
:
2304 case AArch64::STRWpre
:
2305 case AArch64::STURXi
:
2306 case AArch64::STRXpre
:
2307 case AArch64::LDURSi
:
2308 case AArch64::LDRSpre
:
2309 case AArch64::LDURDi
:
2310 case AArch64::LDRDpre
:
2311 case AArch64::LDURQi
:
2312 case AArch64::LDRQpre
:
2313 case AArch64::LDURWi
:
2314 case AArch64::LDRWpre
:
2315 case AArch64::LDURXi
:
2316 case AArch64::LDRXpre
:
2317 case AArch64::LDRSWpre
:
2318 case AArch64::LDURSWi
:
2319 case AArch64::LDURHHi
:
2320 case AArch64::LDURBBi
:
2321 case AArch64::LDURSBWi
:
2322 case AArch64::LDURSHWi
:
2327 std::optional
<unsigned> AArch64InstrInfo::getUnscaledLdSt(unsigned Opc
) {
2330 case AArch64::PRFMui
: return AArch64::PRFUMi
;
2331 case AArch64::LDRXui
: return AArch64::LDURXi
;
2332 case AArch64::LDRWui
: return AArch64::LDURWi
;
2333 case AArch64::LDRBui
: return AArch64::LDURBi
;
2334 case AArch64::LDRHui
: return AArch64::LDURHi
;
2335 case AArch64::LDRSui
: return AArch64::LDURSi
;
2336 case AArch64::LDRDui
: return AArch64::LDURDi
;
2337 case AArch64::LDRQui
: return AArch64::LDURQi
;
2338 case AArch64::LDRBBui
: return AArch64::LDURBBi
;
2339 case AArch64::LDRHHui
: return AArch64::LDURHHi
;
2340 case AArch64::LDRSBXui
: return AArch64::LDURSBXi
;
2341 case AArch64::LDRSBWui
: return AArch64::LDURSBWi
;
2342 case AArch64::LDRSHXui
: return AArch64::LDURSHXi
;
2343 case AArch64::LDRSHWui
: return AArch64::LDURSHWi
;
2344 case AArch64::LDRSWui
: return AArch64::LDURSWi
;
2345 case AArch64::STRXui
: return AArch64::STURXi
;
2346 case AArch64::STRWui
: return AArch64::STURWi
;
2347 case AArch64::STRBui
: return AArch64::STURBi
;
2348 case AArch64::STRHui
: return AArch64::STURHi
;
2349 case AArch64::STRSui
: return AArch64::STURSi
;
2350 case AArch64::STRDui
: return AArch64::STURDi
;
2351 case AArch64::STRQui
: return AArch64::STURQi
;
2352 case AArch64::STRBBui
: return AArch64::STURBBi
;
2353 case AArch64::STRHHui
: return AArch64::STURHHi
;
2357 unsigned AArch64InstrInfo::getLoadStoreImmIdx(unsigned Opc
) {
2360 llvm_unreachable("Unhandled Opcode in getLoadStoreImmIdx");
2362 case AArch64::LDAPURBi
:
2363 case AArch64::LDAPURHi
:
2364 case AArch64::LDAPURi
:
2365 case AArch64::LDAPURSBWi
:
2366 case AArch64::LDAPURSBXi
:
2367 case AArch64::LDAPURSHWi
:
2368 case AArch64::LDAPURSHXi
:
2369 case AArch64::LDAPURSWi
:
2370 case AArch64::LDAPURXi
:
2371 case AArch64::LDR_PPXI
:
2372 case AArch64::LDR_PXI
:
2373 case AArch64::LDR_ZXI
:
2374 case AArch64::LDR_ZZXI
:
2375 case AArch64::LDR_ZZZXI
:
2376 case AArch64::LDR_ZZZZXI
:
2377 case AArch64::LDRBBui
:
2378 case AArch64::LDRBui
:
2379 case AArch64::LDRDui
:
2380 case AArch64::LDRHHui
:
2381 case AArch64::LDRHui
:
2382 case AArch64::LDRQui
:
2383 case AArch64::LDRSBWui
:
2384 case AArch64::LDRSBXui
:
2385 case AArch64::LDRSHWui
:
2386 case AArch64::LDRSHXui
:
2387 case AArch64::LDRSui
:
2388 case AArch64::LDRSWui
:
2389 case AArch64::LDRWui
:
2390 case AArch64::LDRXui
:
2391 case AArch64::LDURBBi
:
2392 case AArch64::LDURBi
:
2393 case AArch64::LDURDi
:
2394 case AArch64::LDURHHi
:
2395 case AArch64::LDURHi
:
2396 case AArch64::LDURQi
:
2397 case AArch64::LDURSBWi
:
2398 case AArch64::LDURSBXi
:
2399 case AArch64::LDURSHWi
:
2400 case AArch64::LDURSHXi
:
2401 case AArch64::LDURSi
:
2402 case AArch64::LDURSWi
:
2403 case AArch64::LDURWi
:
2404 case AArch64::LDURXi
:
2405 case AArch64::PRFMui
:
2406 case AArch64::PRFUMi
:
2407 case AArch64::ST2Gi
:
2409 case AArch64::STLURBi
:
2410 case AArch64::STLURHi
:
2411 case AArch64::STLURWi
:
2412 case AArch64::STLURXi
:
2413 case AArch64::StoreSwiftAsyncContext
:
2414 case AArch64::STR_PPXI
:
2415 case AArch64::STR_PXI
:
2416 case AArch64::STR_ZXI
:
2417 case AArch64::STR_ZZXI
:
2418 case AArch64::STR_ZZZXI
:
2419 case AArch64::STR_ZZZZXI
:
2420 case AArch64::STRBBui
:
2421 case AArch64::STRBui
:
2422 case AArch64::STRDui
:
2423 case AArch64::STRHHui
:
2424 case AArch64::STRHui
:
2425 case AArch64::STRQui
:
2426 case AArch64::STRSui
:
2427 case AArch64::STRWui
:
2428 case AArch64::STRXui
:
2429 case AArch64::STURBBi
:
2430 case AArch64::STURBi
:
2431 case AArch64::STURDi
:
2432 case AArch64::STURHHi
:
2433 case AArch64::STURHi
:
2434 case AArch64::STURQi
:
2435 case AArch64::STURSi
:
2436 case AArch64::STURWi
:
2437 case AArch64::STURXi
:
2438 case AArch64::STZ2Gi
:
2439 case AArch64::STZGi
:
2440 case AArch64::TAGPstack
:
2442 case AArch64::LD1B_D_IMM
:
2443 case AArch64::LD1B_H_IMM
:
2444 case AArch64::LD1B_IMM
:
2445 case AArch64::LD1B_S_IMM
:
2446 case AArch64::LD1D_IMM
:
2447 case AArch64::LD1H_D_IMM
:
2448 case AArch64::LD1H_IMM
:
2449 case AArch64::LD1H_S_IMM
:
2450 case AArch64::LD1RB_D_IMM
:
2451 case AArch64::LD1RB_H_IMM
:
2452 case AArch64::LD1RB_IMM
:
2453 case AArch64::LD1RB_S_IMM
:
2454 case AArch64::LD1RD_IMM
:
2455 case AArch64::LD1RH_D_IMM
:
2456 case AArch64::LD1RH_IMM
:
2457 case AArch64::LD1RH_S_IMM
:
2458 case AArch64::LD1RSB_D_IMM
:
2459 case AArch64::LD1RSB_H_IMM
:
2460 case AArch64::LD1RSB_S_IMM
:
2461 case AArch64::LD1RSH_D_IMM
:
2462 case AArch64::LD1RSH_S_IMM
:
2463 case AArch64::LD1RSW_IMM
:
2464 case AArch64::LD1RW_D_IMM
:
2465 case AArch64::LD1RW_IMM
:
2466 case AArch64::LD1SB_D_IMM
:
2467 case AArch64::LD1SB_H_IMM
:
2468 case AArch64::LD1SB_S_IMM
:
2469 case AArch64::LD1SH_D_IMM
:
2470 case AArch64::LD1SH_S_IMM
:
2471 case AArch64::LD1SW_D_IMM
:
2472 case AArch64::LD1W_D_IMM
:
2473 case AArch64::LD1W_IMM
:
2474 case AArch64::LD2B_IMM
:
2475 case AArch64::LD2D_IMM
:
2476 case AArch64::LD2H_IMM
:
2477 case AArch64::LD2W_IMM
:
2478 case AArch64::LD3B_IMM
:
2479 case AArch64::LD3D_IMM
:
2480 case AArch64::LD3H_IMM
:
2481 case AArch64::LD3W_IMM
:
2482 case AArch64::LD4B_IMM
:
2483 case AArch64::LD4D_IMM
:
2484 case AArch64::LD4H_IMM
:
2485 case AArch64::LD4W_IMM
:
2487 case AArch64::LDNF1B_D_IMM
:
2488 case AArch64::LDNF1B_H_IMM
:
2489 case AArch64::LDNF1B_IMM
:
2490 case AArch64::LDNF1B_S_IMM
:
2491 case AArch64::LDNF1D_IMM
:
2492 case AArch64::LDNF1H_D_IMM
:
2493 case AArch64::LDNF1H_IMM
:
2494 case AArch64::LDNF1H_S_IMM
:
2495 case AArch64::LDNF1SB_D_IMM
:
2496 case AArch64::LDNF1SB_H_IMM
:
2497 case AArch64::LDNF1SB_S_IMM
:
2498 case AArch64::LDNF1SH_D_IMM
:
2499 case AArch64::LDNF1SH_S_IMM
:
2500 case AArch64::LDNF1SW_D_IMM
:
2501 case AArch64::LDNF1W_D_IMM
:
2502 case AArch64::LDNF1W_IMM
:
2503 case AArch64::LDNPDi
:
2504 case AArch64::LDNPQi
:
2505 case AArch64::LDNPSi
:
2506 case AArch64::LDNPWi
:
2507 case AArch64::LDNPXi
:
2508 case AArch64::LDNT1B_ZRI
:
2509 case AArch64::LDNT1D_ZRI
:
2510 case AArch64::LDNT1H_ZRI
:
2511 case AArch64::LDNT1W_ZRI
:
2512 case AArch64::LDPDi
:
2513 case AArch64::LDPQi
:
2514 case AArch64::LDPSi
:
2515 case AArch64::LDPWi
:
2516 case AArch64::LDPXi
:
2517 case AArch64::LDRBBpost
:
2518 case AArch64::LDRBBpre
:
2519 case AArch64::LDRBpost
:
2520 case AArch64::LDRBpre
:
2521 case AArch64::LDRDpost
:
2522 case AArch64::LDRDpre
:
2523 case AArch64::LDRHHpost
:
2524 case AArch64::LDRHHpre
:
2525 case AArch64::LDRHpost
:
2526 case AArch64::LDRHpre
:
2527 case AArch64::LDRQpost
:
2528 case AArch64::LDRQpre
:
2529 case AArch64::LDRSpost
:
2530 case AArch64::LDRSpre
:
2531 case AArch64::LDRWpost
:
2532 case AArch64::LDRWpre
:
2533 case AArch64::LDRXpost
:
2534 case AArch64::LDRXpre
:
2535 case AArch64::ST1B_D_IMM
:
2536 case AArch64::ST1B_H_IMM
:
2537 case AArch64::ST1B_IMM
:
2538 case AArch64::ST1B_S_IMM
:
2539 case AArch64::ST1D_IMM
:
2540 case AArch64::ST1H_D_IMM
:
2541 case AArch64::ST1H_IMM
:
2542 case AArch64::ST1H_S_IMM
:
2543 case AArch64::ST1W_D_IMM
:
2544 case AArch64::ST1W_IMM
:
2545 case AArch64::ST2B_IMM
:
2546 case AArch64::ST2D_IMM
:
2547 case AArch64::ST2H_IMM
:
2548 case AArch64::ST2W_IMM
:
2549 case AArch64::ST3B_IMM
:
2550 case AArch64::ST3D_IMM
:
2551 case AArch64::ST3H_IMM
:
2552 case AArch64::ST3W_IMM
:
2553 case AArch64::ST4B_IMM
:
2554 case AArch64::ST4D_IMM
:
2555 case AArch64::ST4H_IMM
:
2556 case AArch64::ST4W_IMM
:
2557 case AArch64::STGPi
:
2558 case AArch64::STGPreIndex
:
2559 case AArch64::STZGPreIndex
:
2560 case AArch64::ST2GPreIndex
:
2561 case AArch64::STZ2GPreIndex
:
2562 case AArch64::STGPostIndex
:
2563 case AArch64::STZGPostIndex
:
2564 case AArch64::ST2GPostIndex
:
2565 case AArch64::STZ2GPostIndex
:
2566 case AArch64::STNPDi
:
2567 case AArch64::STNPQi
:
2568 case AArch64::STNPSi
:
2569 case AArch64::STNPWi
:
2570 case AArch64::STNPXi
:
2571 case AArch64::STNT1B_ZRI
:
2572 case AArch64::STNT1D_ZRI
:
2573 case AArch64::STNT1H_ZRI
:
2574 case AArch64::STNT1W_ZRI
:
2575 case AArch64::STPDi
:
2576 case AArch64::STPQi
:
2577 case AArch64::STPSi
:
2578 case AArch64::STPWi
:
2579 case AArch64::STPXi
:
2580 case AArch64::STRBBpost
:
2581 case AArch64::STRBBpre
:
2582 case AArch64::STRBpost
:
2583 case AArch64::STRBpre
:
2584 case AArch64::STRDpost
:
2585 case AArch64::STRDpre
:
2586 case AArch64::STRHHpost
:
2587 case AArch64::STRHHpre
:
2588 case AArch64::STRHpost
:
2589 case AArch64::STRHpre
:
2590 case AArch64::STRQpost
:
2591 case AArch64::STRQpre
:
2592 case AArch64::STRSpost
:
2593 case AArch64::STRSpre
:
2594 case AArch64::STRWpost
:
2595 case AArch64::STRWpre
:
2596 case AArch64::STRXpost
:
2597 case AArch64::STRXpre
:
2599 case AArch64::LDPDpost
:
2600 case AArch64::LDPDpre
:
2601 case AArch64::LDPQpost
:
2602 case AArch64::LDPQpre
:
2603 case AArch64::LDPSpost
:
2604 case AArch64::LDPSpre
:
2605 case AArch64::LDPWpost
:
2606 case AArch64::LDPWpre
:
2607 case AArch64::LDPXpost
:
2608 case AArch64::LDPXpre
:
2609 case AArch64::STGPpre
:
2610 case AArch64::STGPpost
:
2611 case AArch64::STPDpost
:
2612 case AArch64::STPDpre
:
2613 case AArch64::STPQpost
:
2614 case AArch64::STPQpre
:
2615 case AArch64::STPSpost
:
2616 case AArch64::STPSpre
:
2617 case AArch64::STPWpost
:
2618 case AArch64::STPWpre
:
2619 case AArch64::STPXpost
:
2620 case AArch64::STPXpre
:
2625 bool AArch64InstrInfo::isPairableLdStInst(const MachineInstr
&MI
) {
2626 switch (MI
.getOpcode()) {
2629 // Scaled instructions.
2630 case AArch64::STRSui
:
2631 case AArch64::STRDui
:
2632 case AArch64::STRQui
:
2633 case AArch64::STRXui
:
2634 case AArch64::STRWui
:
2635 case AArch64::LDRSui
:
2636 case AArch64::LDRDui
:
2637 case AArch64::LDRQui
:
2638 case AArch64::LDRXui
:
2639 case AArch64::LDRWui
:
2640 case AArch64::LDRSWui
:
2641 // Unscaled instructions.
2642 case AArch64::STURSi
:
2643 case AArch64::STRSpre
:
2644 case AArch64::STURDi
:
2645 case AArch64::STRDpre
:
2646 case AArch64::STURQi
:
2647 case AArch64::STRQpre
:
2648 case AArch64::STURWi
:
2649 case AArch64::STRWpre
:
2650 case AArch64::STURXi
:
2651 case AArch64::STRXpre
:
2652 case AArch64::LDURSi
:
2653 case AArch64::LDRSpre
:
2654 case AArch64::LDURDi
:
2655 case AArch64::LDRDpre
:
2656 case AArch64::LDURQi
:
2657 case AArch64::LDRQpre
:
2658 case AArch64::LDURWi
:
2659 case AArch64::LDRWpre
:
2660 case AArch64::LDURXi
:
2661 case AArch64::LDRXpre
:
2662 case AArch64::LDURSWi
:
2663 case AArch64::LDRSWpre
:
2668 bool AArch64InstrInfo::isTailCallReturnInst(const MachineInstr
&MI
) {
2669 switch (MI
.getOpcode()) {
2671 assert((!MI
.isCall() || !MI
.isReturn()) &&
2672 "Unexpected instruction - was a new tail call opcode introduced?");
2674 case AArch64::TCRETURNdi
:
2675 case AArch64::TCRETURNri
:
2676 case AArch64::TCRETURNrix16x17
:
2677 case AArch64::TCRETURNrix17
:
2678 case AArch64::TCRETURNrinotx16
:
2679 case AArch64::TCRETURNriALL
:
2680 case AArch64::AUTH_TCRETURN
:
2681 case AArch64::AUTH_TCRETURN_BTI
:
2686 unsigned AArch64InstrInfo::convertToFlagSettingOpc(unsigned Opc
) {
2689 llvm_unreachable("Opcode has no flag setting equivalent!");
2691 case AArch64::ADDWri
:
2692 return AArch64::ADDSWri
;
2693 case AArch64::ADDWrr
:
2694 return AArch64::ADDSWrr
;
2695 case AArch64::ADDWrs
:
2696 return AArch64::ADDSWrs
;
2697 case AArch64::ADDWrx
:
2698 return AArch64::ADDSWrx
;
2699 case AArch64::ANDWri
:
2700 return AArch64::ANDSWri
;
2701 case AArch64::ANDWrr
:
2702 return AArch64::ANDSWrr
;
2703 case AArch64::ANDWrs
:
2704 return AArch64::ANDSWrs
;
2705 case AArch64::BICWrr
:
2706 return AArch64::BICSWrr
;
2707 case AArch64::BICWrs
:
2708 return AArch64::BICSWrs
;
2709 case AArch64::SUBWri
:
2710 return AArch64::SUBSWri
;
2711 case AArch64::SUBWrr
:
2712 return AArch64::SUBSWrr
;
2713 case AArch64::SUBWrs
:
2714 return AArch64::SUBSWrs
;
2715 case AArch64::SUBWrx
:
2716 return AArch64::SUBSWrx
;
2718 case AArch64::ADDXri
:
2719 return AArch64::ADDSXri
;
2720 case AArch64::ADDXrr
:
2721 return AArch64::ADDSXrr
;
2722 case AArch64::ADDXrs
:
2723 return AArch64::ADDSXrs
;
2724 case AArch64::ADDXrx
:
2725 return AArch64::ADDSXrx
;
2726 case AArch64::ANDXri
:
2727 return AArch64::ANDSXri
;
2728 case AArch64::ANDXrr
:
2729 return AArch64::ANDSXrr
;
2730 case AArch64::ANDXrs
:
2731 return AArch64::ANDSXrs
;
2732 case AArch64::BICXrr
:
2733 return AArch64::BICSXrr
;
2734 case AArch64::BICXrs
:
2735 return AArch64::BICSXrs
;
2736 case AArch64::SUBXri
:
2737 return AArch64::SUBSXri
;
2738 case AArch64::SUBXrr
:
2739 return AArch64::SUBSXrr
;
2740 case AArch64::SUBXrs
:
2741 return AArch64::SUBSXrs
;
2742 case AArch64::SUBXrx
:
2743 return AArch64::SUBSXrx
;
2744 // SVE instructions:
2745 case AArch64::AND_PPzPP
:
2746 return AArch64::ANDS_PPzPP
;
2747 case AArch64::BIC_PPzPP
:
2748 return AArch64::BICS_PPzPP
;
2749 case AArch64::EOR_PPzPP
:
2750 return AArch64::EORS_PPzPP
;
2751 case AArch64::NAND_PPzPP
:
2752 return AArch64::NANDS_PPzPP
;
2753 case AArch64::NOR_PPzPP
:
2754 return AArch64::NORS_PPzPP
;
2755 case AArch64::ORN_PPzPP
:
2756 return AArch64::ORNS_PPzPP
;
2757 case AArch64::ORR_PPzPP
:
2758 return AArch64::ORRS_PPzPP
;
2759 case AArch64::BRKA_PPzP
:
2760 return AArch64::BRKAS_PPzP
;
2761 case AArch64::BRKPA_PPzPP
:
2762 return AArch64::BRKPAS_PPzPP
;
2763 case AArch64::BRKB_PPzP
:
2764 return AArch64::BRKBS_PPzP
;
2765 case AArch64::BRKPB_PPzPP
:
2766 return AArch64::BRKPBS_PPzPP
;
2767 case AArch64::BRKN_PPzP
:
2768 return AArch64::BRKNS_PPzP
;
2769 case AArch64::RDFFR_PPz
:
2770 return AArch64::RDFFRS_PPz
;
2771 case AArch64::PTRUE_B
:
2772 return AArch64::PTRUES_B
;
2776 // Is this a candidate for ld/st merging or pairing? For example, we don't
2777 // touch volatiles or load/stores that have a hint to avoid pair formation.
2778 bool AArch64InstrInfo::isCandidateToMergeOrPair(const MachineInstr
&MI
) const {
2780 bool IsPreLdSt
= isPreLdSt(MI
);
2782 // If this is a volatile load/store, don't mess with it.
2783 if (MI
.hasOrderedMemoryRef())
2786 // Make sure this is a reg/fi+imm (as opposed to an address reloc).
2787 // For Pre-inc LD/ST, the operand is shifted by one.
2788 assert((MI
.getOperand(IsPreLdSt
? 2 : 1).isReg() ||
2789 MI
.getOperand(IsPreLdSt
? 2 : 1).isFI()) &&
2790 "Expected a reg or frame index operand.");
2792 // For Pre-indexed addressing quadword instructions, the third operand is the
2794 bool IsImmPreLdSt
= IsPreLdSt
&& MI
.getOperand(3).isImm();
2796 if (!MI
.getOperand(2).isImm() && !IsImmPreLdSt
)
2799 // Can't merge/pair if the instruction modifies the base register.
2800 // e.g., ldr x0, [x0]
2801 // This case will never occur with an FI base.
2802 // However, if the instruction is an LDR<S,D,Q,W,X,SW>pre or
2803 // STR<S,D,Q,W,X>pre, it can be merged.
2805 // ldr q0, [x11, #32]!
2806 // ldr q1, [x11, #16]
2808 // ldp q0, q1, [x11, #32]!
2809 if (MI
.getOperand(1).isReg() && !IsPreLdSt
) {
2810 Register BaseReg
= MI
.getOperand(1).getReg();
2811 const TargetRegisterInfo
*TRI
= &getRegisterInfo();
2812 if (MI
.modifiesRegister(BaseReg
, TRI
))
2816 // Check if this load/store has a hint to avoid pair formation.
2817 // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
2818 if (isLdStPairSuppressed(MI
))
2821 // Do not pair any callee-save store/reload instructions in the
2822 // prologue/epilogue if the CFI information encoded the operations as separate
2823 // instructions, as that will cause the size of the actual prologue to mismatch
2824 // with the prologue size recorded in the Windows CFI.
2825 const MCAsmInfo
*MAI
= MI
.getMF()->getTarget().getMCAsmInfo();
2826 bool NeedsWinCFI
= MAI
->usesWindowsCFI() &&
2827 MI
.getMF()->getFunction().needsUnwindTableEntry();
2828 if (NeedsWinCFI
&& (MI
.getFlag(MachineInstr::FrameSetup
) ||
2829 MI
.getFlag(MachineInstr::FrameDestroy
)))
2832 // On some CPUs quad load/store pairs are slower than two single load/stores.
2833 if (Subtarget
.isPaired128Slow()) {
2834 switch (MI
.getOpcode()) {
2837 case AArch64::LDURQi
:
2838 case AArch64::STURQi
:
2839 case AArch64::LDRQui
:
2840 case AArch64::STRQui
:
2848 bool AArch64InstrInfo::getMemOperandsWithOffsetWidth(
2849 const MachineInstr
&LdSt
, SmallVectorImpl
<const MachineOperand
*> &BaseOps
,
2850 int64_t &Offset
, bool &OffsetIsScalable
, LocationSize
&Width
,
2851 const TargetRegisterInfo
*TRI
) const {
2852 if (!LdSt
.mayLoadOrStore())
2855 const MachineOperand
*BaseOp
;
2856 TypeSize
WidthN(0, false);
2857 if (!getMemOperandWithOffsetWidth(LdSt
, BaseOp
, Offset
, OffsetIsScalable
,
2860 // The maximum vscale is 16 under AArch64, return the maximal extent for the
2862 Width
= LocationSize::precise(WidthN
);
2863 BaseOps
.push_back(BaseOp
);
2867 std::optional
<ExtAddrMode
>
2868 AArch64InstrInfo::getAddrModeFromMemoryOp(const MachineInstr
&MemI
,
2869 const TargetRegisterInfo
*TRI
) const {
2870 const MachineOperand
*Base
; // Filled with the base operand of MI.
2871 int64_t Offset
; // Filled with the offset of MI.
2872 bool OffsetIsScalable
;
2873 if (!getMemOperandWithOffset(MemI
, Base
, Offset
, OffsetIsScalable
, TRI
))
2874 return std::nullopt
;
2877 return std::nullopt
;
2879 AM
.BaseReg
= Base
->getReg();
2880 AM
.Displacement
= Offset
;
2886 bool AArch64InstrInfo::canFoldIntoAddrMode(const MachineInstr
&MemI
,
2888 const MachineInstr
&AddrI
,
2889 ExtAddrMode
&AM
) const {
2890 // Filter out instructions into which we cannot fold.
2892 int64_t OffsetScale
= 1;
2893 switch (MemI
.getOpcode()) {
2897 case AArch64::LDURQi
:
2898 case AArch64::STURQi
:
2902 case AArch64::LDURDi
:
2903 case AArch64::STURDi
:
2904 case AArch64::LDURXi
:
2905 case AArch64::STURXi
:
2909 case AArch64::LDURWi
:
2910 case AArch64::LDURSWi
:
2911 case AArch64::STURWi
:
2915 case AArch64::LDURHi
:
2916 case AArch64::STURHi
:
2917 case AArch64::LDURHHi
:
2918 case AArch64::STURHHi
:
2919 case AArch64::LDURSHXi
:
2920 case AArch64::LDURSHWi
:
2924 case AArch64::LDRBroX
:
2925 case AArch64::LDRBBroX
:
2926 case AArch64::LDRSBXroX
:
2927 case AArch64::LDRSBWroX
:
2928 case AArch64::STRBroX
:
2929 case AArch64::STRBBroX
:
2930 case AArch64::LDURBi
:
2931 case AArch64::LDURBBi
:
2932 case AArch64::LDURSBXi
:
2933 case AArch64::LDURSBWi
:
2934 case AArch64::STURBi
:
2935 case AArch64::STURBBi
:
2936 case AArch64::LDRBui
:
2937 case AArch64::LDRBBui
:
2938 case AArch64::LDRSBXui
:
2939 case AArch64::LDRSBWui
:
2940 case AArch64::STRBui
:
2941 case AArch64::STRBBui
:
2945 case AArch64::LDRQroX
:
2946 case AArch64::STRQroX
:
2947 case AArch64::LDRQui
:
2948 case AArch64::STRQui
:
2953 case AArch64::LDRDroX
:
2954 case AArch64::STRDroX
:
2955 case AArch64::LDRXroX
:
2956 case AArch64::STRXroX
:
2957 case AArch64::LDRDui
:
2958 case AArch64::STRDui
:
2959 case AArch64::LDRXui
:
2960 case AArch64::STRXui
:
2965 case AArch64::LDRWroX
:
2966 case AArch64::LDRSWroX
:
2967 case AArch64::STRWroX
:
2968 case AArch64::LDRWui
:
2969 case AArch64::LDRSWui
:
2970 case AArch64::STRWui
:
2975 case AArch64::LDRHroX
:
2976 case AArch64::STRHroX
:
2977 case AArch64::LDRHHroX
:
2978 case AArch64::STRHHroX
:
2979 case AArch64::LDRSHXroX
:
2980 case AArch64::LDRSHWroX
:
2981 case AArch64::LDRHui
:
2982 case AArch64::STRHui
:
2983 case AArch64::LDRHHui
:
2984 case AArch64::STRHHui
:
2985 case AArch64::LDRSHXui
:
2986 case AArch64::LDRSHWui
:
2992 // Check the fold operand is not the loaded/stored value.
2993 const MachineOperand
&BaseRegOp
= MemI
.getOperand(0);
2994 if (BaseRegOp
.isReg() && BaseRegOp
.getReg() == Reg
)
2997 // Handle memory instructions with a [Reg, Reg] addressing mode.
2998 if (MemI
.getOperand(2).isReg()) {
2999 // Bail if the addressing mode already includes extension of the offset
3001 if (MemI
.getOperand(3).getImm())
3004 // Check if we actually have a scaled offset.
3005 if (MemI
.getOperand(4).getImm() == 0)
3008 // If the address instructions is folded into the base register, then the
3009 // addressing mode must not have a scale. Then we can swap the base and the
3010 // scaled registers.
3011 if (MemI
.getOperand(1).getReg() == Reg
&& OffsetScale
!= 1)
3014 switch (AddrI
.getOpcode()) {
3018 case AArch64::SBFMXri
:
3020 // ldr Xd, [Xn, Xa, lsl #N]
3022 // ldr Xd, [Xn, Wm, sxtw #N]
3023 if (AddrI
.getOperand(2).getImm() != 0 ||
3024 AddrI
.getOperand(3).getImm() != 31)
3027 AM
.BaseReg
= MemI
.getOperand(1).getReg();
3028 if (AM
.BaseReg
== Reg
)
3029 AM
.BaseReg
= MemI
.getOperand(2).getReg();
3030 AM
.ScaledReg
= AddrI
.getOperand(1).getReg();
3031 AM
.Scale
= OffsetScale
;
3032 AM
.Displacement
= 0;
3033 AM
.Form
= ExtAddrMode::Formula::SExtScaledReg
;
3036 case TargetOpcode::SUBREG_TO_REG
: {
3038 // ldr Xd, [Xn, Xa, lsl #N]
3040 // ldr Xd, [Xn, Wm, uxtw #N]
3042 // Zero-extension looks like an ORRWrs followed by a SUBREG_TO_REG.
3043 if (AddrI
.getOperand(1).getImm() != 0 ||
3044 AddrI
.getOperand(3).getImm() != AArch64::sub_32
)
3047 const MachineRegisterInfo
&MRI
= AddrI
.getMF()->getRegInfo();
3048 Register OffsetReg
= AddrI
.getOperand(2).getReg();
3049 if (!OffsetReg
.isVirtual() || !MRI
.hasOneNonDBGUse(OffsetReg
))
3052 const MachineInstr
&DefMI
= *MRI
.getVRegDef(OffsetReg
);
3053 if (DefMI
.getOpcode() != AArch64::ORRWrs
||
3054 DefMI
.getOperand(1).getReg() != AArch64::WZR
||
3055 DefMI
.getOperand(3).getImm() != 0)
3058 AM
.BaseReg
= MemI
.getOperand(1).getReg();
3059 if (AM
.BaseReg
== Reg
)
3060 AM
.BaseReg
= MemI
.getOperand(2).getReg();
3061 AM
.ScaledReg
= DefMI
.getOperand(2).getReg();
3062 AM
.Scale
= OffsetScale
;
3063 AM
.Displacement
= 0;
3064 AM
.Form
= ExtAddrMode::Formula::ZExtScaledReg
;
3070 // Handle memory instructions with a [Reg, #Imm] addressing mode.
3072 // Check we are not breaking a potential conversion to an LDP.
3073 auto validateOffsetForLDP
= [](unsigned NumBytes
, int64_t OldOffset
,
3074 int64_t NewOffset
) -> bool {
3075 int64_t MinOffset
, MaxOffset
;
3092 return OldOffset
< MinOffset
|| OldOffset
> MaxOffset
||
3093 (NewOffset
>= MinOffset
&& NewOffset
<= MaxOffset
);
3095 auto canFoldAddSubImmIntoAddrMode
= [&](int64_t Disp
) -> bool {
3096 int64_t OldOffset
= MemI
.getOperand(2).getImm() * OffsetScale
;
3097 int64_t NewOffset
= OldOffset
+ Disp
;
3098 if (!isLegalAddressingMode(NumBytes
, NewOffset
, /* Scale */ 0))
3100 // If the old offset would fit into an LDP, but the new offset wouldn't,
3102 if (!validateOffsetForLDP(NumBytes
, OldOffset
, NewOffset
))
3104 AM
.BaseReg
= AddrI
.getOperand(1).getReg();
3107 AM
.Displacement
= NewOffset
;
3108 AM
.Form
= ExtAddrMode::Formula::Basic
;
3112 auto canFoldAddRegIntoAddrMode
=
3114 ExtAddrMode::Formula Form
= ExtAddrMode::Formula::Basic
) -> bool {
3115 if (MemI
.getOperand(2).getImm() != 0)
3117 if (!isLegalAddressingMode(NumBytes
, /* Offset */ 0, Scale
))
3119 AM
.BaseReg
= AddrI
.getOperand(1).getReg();
3120 AM
.ScaledReg
= AddrI
.getOperand(2).getReg();
3122 AM
.Displacement
= 0;
3127 auto avoidSlowSTRQ
= [&](const MachineInstr
&MemI
) {
3128 unsigned Opcode
= MemI
.getOpcode();
3129 return (Opcode
== AArch64::STURQi
|| Opcode
== AArch64::STRQui
) &&
3130 Subtarget
.isSTRQroSlow();
3134 const bool OptSize
= MemI
.getMF()->getFunction().hasOptSize();
3135 switch (AddrI
.getOpcode()) {
3139 case AArch64::ADDXri
:
3143 // ldr Xd, [Xn, #N'+M]
3144 Disp
= AddrI
.getOperand(2).getImm() << AddrI
.getOperand(3).getImm();
3145 return canFoldAddSubImmIntoAddrMode(Disp
);
3147 case AArch64::SUBXri
:
3151 // ldr Xd, [Xn, #N'+M]
3152 Disp
= AddrI
.getOperand(2).getImm() << AddrI
.getOperand(3).getImm();
3153 return canFoldAddSubImmIntoAddrMode(-Disp
);
3155 case AArch64::ADDXrs
: {
3156 // add Xa, Xn, Xm, lsl #N
3159 // ldr Xd, [Xn, Xm, lsl #N]
3161 // Don't fold the add if the result would be slower, unless optimising for
3163 unsigned Shift
= static_cast<unsigned>(AddrI
.getOperand(3).getImm());
3164 if (AArch64_AM::getShiftType(Shift
) != AArch64_AM::ShiftExtendType::LSL
)
3166 Shift
= AArch64_AM::getShiftValue(Shift
);
3168 if (Shift
!= 2 && Shift
!= 3 && Subtarget
.hasAddrLSLSlow14())
3170 if (avoidSlowSTRQ(MemI
))
3173 return canFoldAddRegIntoAddrMode(1ULL << Shift
);
3176 case AArch64::ADDXrr
:
3180 // ldr Xd, [Xn, Xm, lsl #0]
3182 // Don't fold the add if the result would be slower, unless optimising for
3184 if (!OptSize
&& avoidSlowSTRQ(MemI
))
3186 return canFoldAddRegIntoAddrMode(1);
3188 case AArch64::ADDXrx
:
3189 // add Xa, Xn, Wm, {s,u}xtw #N
3192 // ldr Xd, [Xn, Wm, {s,u}xtw #N]
3194 // Don't fold the add if the result would be slower, unless optimising for
3196 if (!OptSize
&& avoidSlowSTRQ(MemI
))
3199 // Can fold only sign-/zero-extend of a word.
3200 unsigned Imm
= static_cast<unsigned>(AddrI
.getOperand(3).getImm());
3201 AArch64_AM::ShiftExtendType Extend
= AArch64_AM::getArithExtendType(Imm
);
3202 if (Extend
!= AArch64_AM::UXTW
&& Extend
!= AArch64_AM::SXTW
)
3205 return canFoldAddRegIntoAddrMode(
3206 1ULL << AArch64_AM::getArithShiftValue(Imm
),
3207 (Extend
== AArch64_AM::SXTW
) ? ExtAddrMode::Formula::SExtScaledReg
3208 : ExtAddrMode::Formula::ZExtScaledReg
);
3212 // Given an opcode for an instruction with a [Reg, #Imm] addressing mode,
3213 // return the opcode of an instruction performing the same operation, but using
3214 // the [Reg, Reg] addressing mode.
3215 static unsigned regOffsetOpcode(unsigned Opcode
) {
3218 llvm_unreachable("Address folding not implemented for instruction");
3220 case AArch64::LDURQi
:
3221 case AArch64::LDRQui
:
3222 return AArch64::LDRQroX
;
3223 case AArch64::STURQi
:
3224 case AArch64::STRQui
:
3225 return AArch64::STRQroX
;
3226 case AArch64::LDURDi
:
3227 case AArch64::LDRDui
:
3228 return AArch64::LDRDroX
;
3229 case AArch64::STURDi
:
3230 case AArch64::STRDui
:
3231 return AArch64::STRDroX
;
3232 case AArch64::LDURXi
:
3233 case AArch64::LDRXui
:
3234 return AArch64::LDRXroX
;
3235 case AArch64::STURXi
:
3236 case AArch64::STRXui
:
3237 return AArch64::STRXroX
;
3238 case AArch64::LDURWi
:
3239 case AArch64::LDRWui
:
3240 return AArch64::LDRWroX
;
3241 case AArch64::LDURSWi
:
3242 case AArch64::LDRSWui
:
3243 return AArch64::LDRSWroX
;
3244 case AArch64::STURWi
:
3245 case AArch64::STRWui
:
3246 return AArch64::STRWroX
;
3247 case AArch64::LDURHi
:
3248 case AArch64::LDRHui
:
3249 return AArch64::LDRHroX
;
3250 case AArch64::STURHi
:
3251 case AArch64::STRHui
:
3252 return AArch64::STRHroX
;
3253 case AArch64::LDURHHi
:
3254 case AArch64::LDRHHui
:
3255 return AArch64::LDRHHroX
;
3256 case AArch64::STURHHi
:
3257 case AArch64::STRHHui
:
3258 return AArch64::STRHHroX
;
3259 case AArch64::LDURSHXi
:
3260 case AArch64::LDRSHXui
:
3261 return AArch64::LDRSHXroX
;
3262 case AArch64::LDURSHWi
:
3263 case AArch64::LDRSHWui
:
3264 return AArch64::LDRSHWroX
;
3265 case AArch64::LDURBi
:
3266 case AArch64::LDRBui
:
3267 return AArch64::LDRBroX
;
3268 case AArch64::LDURBBi
:
3269 case AArch64::LDRBBui
:
3270 return AArch64::LDRBBroX
;
3271 case AArch64::LDURSBXi
:
3272 case AArch64::LDRSBXui
:
3273 return AArch64::LDRSBXroX
;
3274 case AArch64::LDURSBWi
:
3275 case AArch64::LDRSBWui
:
3276 return AArch64::LDRSBWroX
;
3277 case AArch64::STURBi
:
3278 case AArch64::STRBui
:
3279 return AArch64::STRBroX
;
3280 case AArch64::STURBBi
:
3281 case AArch64::STRBBui
:
3282 return AArch64::STRBBroX
;
3286 // Given an opcode for an instruction with a [Reg, #Imm] addressing mode, return
3287 // the opcode of an instruction performing the same operation, but using the
3288 // [Reg, #Imm] addressing mode with scaled offset.
3289 unsigned scaledOffsetOpcode(unsigned Opcode
, unsigned &Scale
) {
3292 llvm_unreachable("Address folding not implemented for instruction");
3294 case AArch64::LDURQi
:
3296 return AArch64::LDRQui
;
3297 case AArch64::STURQi
:
3299 return AArch64::STRQui
;
3300 case AArch64::LDURDi
:
3302 return AArch64::LDRDui
;
3303 case AArch64::STURDi
:
3305 return AArch64::STRDui
;
3306 case AArch64::LDURXi
:
3308 return AArch64::LDRXui
;
3309 case AArch64::STURXi
:
3311 return AArch64::STRXui
;
3312 case AArch64::LDURWi
:
3314 return AArch64::LDRWui
;
3315 case AArch64::LDURSWi
:
3317 return AArch64::LDRSWui
;
3318 case AArch64::STURWi
:
3320 return AArch64::STRWui
;
3321 case AArch64::LDURHi
:
3323 return AArch64::LDRHui
;
3324 case AArch64::STURHi
:
3326 return AArch64::STRHui
;
3327 case AArch64::LDURHHi
:
3329 return AArch64::LDRHHui
;
3330 case AArch64::STURHHi
:
3332 return AArch64::STRHHui
;
3333 case AArch64::LDURSHXi
:
3335 return AArch64::LDRSHXui
;
3336 case AArch64::LDURSHWi
:
3338 return AArch64::LDRSHWui
;
3339 case AArch64::LDURBi
:
3341 return AArch64::LDRBui
;
3342 case AArch64::LDURBBi
:
3344 return AArch64::LDRBBui
;
3345 case AArch64::LDURSBXi
:
3347 return AArch64::LDRSBXui
;
3348 case AArch64::LDURSBWi
:
3350 return AArch64::LDRSBWui
;
3351 case AArch64::STURBi
:
3353 return AArch64::STRBui
;
3354 case AArch64::STURBBi
:
3356 return AArch64::STRBBui
;
3357 case AArch64::LDRQui
:
3358 case AArch64::STRQui
:
3361 case AArch64::LDRDui
:
3362 case AArch64::STRDui
:
3363 case AArch64::LDRXui
:
3364 case AArch64::STRXui
:
3367 case AArch64::LDRWui
:
3368 case AArch64::LDRSWui
:
3369 case AArch64::STRWui
:
3372 case AArch64::LDRHui
:
3373 case AArch64::STRHui
:
3374 case AArch64::LDRHHui
:
3375 case AArch64::STRHHui
:
3376 case AArch64::LDRSHXui
:
3377 case AArch64::LDRSHWui
:
3380 case AArch64::LDRBui
:
3381 case AArch64::LDRBBui
:
3382 case AArch64::LDRSBXui
:
3383 case AArch64::LDRSBWui
:
3384 case AArch64::STRBui
:
3385 case AArch64::STRBBui
:
3391 // Given an opcode for an instruction with a [Reg, #Imm] addressing mode, return
3392 // the opcode of an instruction performing the same operation, but using the
3393 // [Reg, #Imm] addressing mode with unscaled offset.
3394 unsigned unscaledOffsetOpcode(unsigned Opcode
) {
3397 llvm_unreachable("Address folding not implemented for instruction");
3399 case AArch64::LDURQi
:
3400 case AArch64::STURQi
:
3401 case AArch64::LDURDi
:
3402 case AArch64::STURDi
:
3403 case AArch64::LDURXi
:
3404 case AArch64::STURXi
:
3405 case AArch64::LDURWi
:
3406 case AArch64::LDURSWi
:
3407 case AArch64::STURWi
:
3408 case AArch64::LDURHi
:
3409 case AArch64::STURHi
:
3410 case AArch64::LDURHHi
:
3411 case AArch64::STURHHi
:
3412 case AArch64::LDURSHXi
:
3413 case AArch64::LDURSHWi
:
3414 case AArch64::LDURBi
:
3415 case AArch64::STURBi
:
3416 case AArch64::LDURBBi
:
3417 case AArch64::STURBBi
:
3418 case AArch64::LDURSBWi
:
3419 case AArch64::LDURSBXi
:
3421 case AArch64::LDRQui
:
3422 return AArch64::LDURQi
;
3423 case AArch64::STRQui
:
3424 return AArch64::STURQi
;
3425 case AArch64::LDRDui
:
3426 return AArch64::LDURDi
;
3427 case AArch64::STRDui
:
3428 return AArch64::STURDi
;
3429 case AArch64::LDRXui
:
3430 return AArch64::LDURXi
;
3431 case AArch64::STRXui
:
3432 return AArch64::STURXi
;
3433 case AArch64::LDRWui
:
3434 return AArch64::LDURWi
;
3435 case AArch64::LDRSWui
:
3436 return AArch64::LDURSWi
;
3437 case AArch64::STRWui
:
3438 return AArch64::STURWi
;
3439 case AArch64::LDRHui
:
3440 return AArch64::LDURHi
;
3441 case AArch64::STRHui
:
3442 return AArch64::STURHi
;
3443 case AArch64::LDRHHui
:
3444 return AArch64::LDURHHi
;
3445 case AArch64::STRHHui
:
3446 return AArch64::STURHHi
;
3447 case AArch64::LDRSHXui
:
3448 return AArch64::LDURSHXi
;
3449 case AArch64::LDRSHWui
:
3450 return AArch64::LDURSHWi
;
3451 case AArch64::LDRBBui
:
3452 return AArch64::LDURBBi
;
3453 case AArch64::LDRBui
:
3454 return AArch64::LDURBi
;
3455 case AArch64::STRBBui
:
3456 return AArch64::STURBBi
;
3457 case AArch64::STRBui
:
3458 return AArch64::STURBi
;
3459 case AArch64::LDRSBWui
:
3460 return AArch64::LDURSBWi
;
3461 case AArch64::LDRSBXui
:
3462 return AArch64::LDURSBXi
;
3466 // Given the opcode of a memory load/store instruction, return the opcode of an
3467 // instruction performing the same operation, but using
3468 // the [Reg, Reg, {s,u}xtw #N] addressing mode with sign-/zero-extend of the
3470 static unsigned offsetExtendOpcode(unsigned Opcode
) {
3473 llvm_unreachable("Address folding not implemented for instruction");
3475 case AArch64::LDRQroX
:
3476 case AArch64::LDURQi
:
3477 case AArch64::LDRQui
:
3478 return AArch64::LDRQroW
;
3479 case AArch64::STRQroX
:
3480 case AArch64::STURQi
:
3481 case AArch64::STRQui
:
3482 return AArch64::STRQroW
;
3483 case AArch64::LDRDroX
:
3484 case AArch64::LDURDi
:
3485 case AArch64::LDRDui
:
3486 return AArch64::LDRDroW
;
3487 case AArch64::STRDroX
:
3488 case AArch64::STURDi
:
3489 case AArch64::STRDui
:
3490 return AArch64::STRDroW
;
3491 case AArch64::LDRXroX
:
3492 case AArch64::LDURXi
:
3493 case AArch64::LDRXui
:
3494 return AArch64::LDRXroW
;
3495 case AArch64::STRXroX
:
3496 case AArch64::STURXi
:
3497 case AArch64::STRXui
:
3498 return AArch64::STRXroW
;
3499 case AArch64::LDRWroX
:
3500 case AArch64::LDURWi
:
3501 case AArch64::LDRWui
:
3502 return AArch64::LDRWroW
;
3503 case AArch64::LDRSWroX
:
3504 case AArch64::LDURSWi
:
3505 case AArch64::LDRSWui
:
3506 return AArch64::LDRSWroW
;
3507 case AArch64::STRWroX
:
3508 case AArch64::STURWi
:
3509 case AArch64::STRWui
:
3510 return AArch64::STRWroW
;
3511 case AArch64::LDRHroX
:
3512 case AArch64::LDURHi
:
3513 case AArch64::LDRHui
:
3514 return AArch64::LDRHroW
;
3515 case AArch64::STRHroX
:
3516 case AArch64::STURHi
:
3517 case AArch64::STRHui
:
3518 return AArch64::STRHroW
;
3519 case AArch64::LDRHHroX
:
3520 case AArch64::LDURHHi
:
3521 case AArch64::LDRHHui
:
3522 return AArch64::LDRHHroW
;
3523 case AArch64::STRHHroX
:
3524 case AArch64::STURHHi
:
3525 case AArch64::STRHHui
:
3526 return AArch64::STRHHroW
;
3527 case AArch64::LDRSHXroX
:
3528 case AArch64::LDURSHXi
:
3529 case AArch64::LDRSHXui
:
3530 return AArch64::LDRSHXroW
;
3531 case AArch64::LDRSHWroX
:
3532 case AArch64::LDURSHWi
:
3533 case AArch64::LDRSHWui
:
3534 return AArch64::LDRSHWroW
;
3535 case AArch64::LDRBroX
:
3536 case AArch64::LDURBi
:
3537 case AArch64::LDRBui
:
3538 return AArch64::LDRBroW
;
3539 case AArch64::LDRBBroX
:
3540 case AArch64::LDURBBi
:
3541 case AArch64::LDRBBui
:
3542 return AArch64::LDRBBroW
;
3543 case AArch64::LDRSBXroX
:
3544 case AArch64::LDURSBXi
:
3545 case AArch64::LDRSBXui
:
3546 return AArch64::LDRSBXroW
;
3547 case AArch64::LDRSBWroX
:
3548 case AArch64::LDURSBWi
:
3549 case AArch64::LDRSBWui
:
3550 return AArch64::LDRSBWroW
;
3551 case AArch64::STRBroX
:
3552 case AArch64::STURBi
:
3553 case AArch64::STRBui
:
3554 return AArch64::STRBroW
;
3555 case AArch64::STRBBroX
:
3556 case AArch64::STURBBi
:
3557 case AArch64::STRBBui
:
3558 return AArch64::STRBBroW
;
3562 MachineInstr
*AArch64InstrInfo::emitLdStWithAddr(MachineInstr
&MemI
,
3563 const ExtAddrMode
&AM
) const {
3565 const DebugLoc
&DL
= MemI
.getDebugLoc();
3566 MachineBasicBlock
&MBB
= *MemI
.getParent();
3567 MachineRegisterInfo
&MRI
= MemI
.getMF()->getRegInfo();
3569 if (AM
.Form
== ExtAddrMode::Formula::Basic
) {
3571 // The new instruction will be in the form `ldr Rt, [Xn, Xm, lsl #imm]`.
3572 unsigned Opcode
= regOffsetOpcode(MemI
.getOpcode());
3573 MRI
.constrainRegClass(AM
.BaseReg
, &AArch64::GPR64spRegClass
);
3574 auto B
= BuildMI(MBB
, MemI
, DL
, get(Opcode
))
3575 .addReg(MemI
.getOperand(0).getReg(),
3576 MemI
.mayLoad() ? RegState::Define
: 0)
3578 .addReg(AM
.ScaledReg
)
3580 .addImm(AM
.Scale
> 1)
3581 .setMemRefs(MemI
.memoperands())
3582 .setMIFlags(MemI
.getFlags());
3583 return B
.getInstr();
3586 assert(AM
.ScaledReg
== 0 && AM
.Scale
== 0 &&
3587 "Addressing mode not supported for folding");
3589 // The new instruction will be in the form `ld[u]r Rt, [Xn, #imm]`.
3591 unsigned Opcode
= MemI
.getOpcode();
3592 if (isInt
<9>(AM
.Displacement
))
3593 Opcode
= unscaledOffsetOpcode(Opcode
);
3595 Opcode
= scaledOffsetOpcode(Opcode
, Scale
);
3597 auto B
= BuildMI(MBB
, MemI
, DL
, get(Opcode
))
3598 .addReg(MemI
.getOperand(0).getReg(),
3599 MemI
.mayLoad() ? RegState::Define
: 0)
3601 .addImm(AM
.Displacement
/ Scale
)
3602 .setMemRefs(MemI
.memoperands())
3603 .setMIFlags(MemI
.getFlags());
3604 return B
.getInstr();
3607 if (AM
.Form
== ExtAddrMode::Formula::SExtScaledReg
||
3608 AM
.Form
== ExtAddrMode::Formula::ZExtScaledReg
) {
3609 // The new instruction will be in the form `ldr Rt, [Xn, Wm, {s,u}xtw #N]`.
3610 assert(AM
.ScaledReg
&& !AM
.Displacement
&&
3611 "Address offset can be a register or an immediate, but not both");
3612 unsigned Opcode
= offsetExtendOpcode(MemI
.getOpcode());
3613 MRI
.constrainRegClass(AM
.BaseReg
, &AArch64::GPR64spRegClass
);
3614 // Make sure the offset register is in the correct register class.
3615 Register OffsetReg
= AM
.ScaledReg
;
3616 const TargetRegisterClass
*RC
= MRI
.getRegClass(OffsetReg
);
3617 if (RC
->hasSuperClassEq(&AArch64::GPR64RegClass
)) {
3618 OffsetReg
= MRI
.createVirtualRegister(&AArch64::GPR32RegClass
);
3619 BuildMI(MBB
, MemI
, DL
, get(TargetOpcode::COPY
), OffsetReg
)
3620 .addReg(AM
.ScaledReg
, 0, AArch64::sub_32
);
3622 auto B
= BuildMI(MBB
, MemI
, DL
, get(Opcode
))
3623 .addReg(MemI
.getOperand(0).getReg(),
3624 MemI
.mayLoad() ? RegState::Define
: 0)
3627 .addImm(AM
.Form
== ExtAddrMode::Formula::SExtScaledReg
)
3628 .addImm(AM
.Scale
!= 1)
3629 .setMemRefs(MemI
.memoperands())
3630 .setMIFlags(MemI
.getFlags());
3632 return B
.getInstr();
3636 "Function must not be called with an addressing mode it can't handle");
3639 /// Return true if the opcode is a post-index ld/st instruction, which really
3640 /// loads from base+0.
3641 static bool isPostIndexLdStOpcode(unsigned Opcode
) {
3645 case AArch64::LD1Fourv16b_POST
:
3646 case AArch64::LD1Fourv1d_POST
:
3647 case AArch64::LD1Fourv2d_POST
:
3648 case AArch64::LD1Fourv2s_POST
:
3649 case AArch64::LD1Fourv4h_POST
:
3650 case AArch64::LD1Fourv4s_POST
:
3651 case AArch64::LD1Fourv8b_POST
:
3652 case AArch64::LD1Fourv8h_POST
:
3653 case AArch64::LD1Onev16b_POST
:
3654 case AArch64::LD1Onev1d_POST
:
3655 case AArch64::LD1Onev2d_POST
:
3656 case AArch64::LD1Onev2s_POST
:
3657 case AArch64::LD1Onev4h_POST
:
3658 case AArch64::LD1Onev4s_POST
:
3659 case AArch64::LD1Onev8b_POST
:
3660 case AArch64::LD1Onev8h_POST
:
3661 case AArch64::LD1Rv16b_POST
:
3662 case AArch64::LD1Rv1d_POST
:
3663 case AArch64::LD1Rv2d_POST
:
3664 case AArch64::LD1Rv2s_POST
:
3665 case AArch64::LD1Rv4h_POST
:
3666 case AArch64::LD1Rv4s_POST
:
3667 case AArch64::LD1Rv8b_POST
:
3668 case AArch64::LD1Rv8h_POST
:
3669 case AArch64::LD1Threev16b_POST
:
3670 case AArch64::LD1Threev1d_POST
:
3671 case AArch64::LD1Threev2d_POST
:
3672 case AArch64::LD1Threev2s_POST
:
3673 case AArch64::LD1Threev4h_POST
:
3674 case AArch64::LD1Threev4s_POST
:
3675 case AArch64::LD1Threev8b_POST
:
3676 case AArch64::LD1Threev8h_POST
:
3677 case AArch64::LD1Twov16b_POST
:
3678 case AArch64::LD1Twov1d_POST
:
3679 case AArch64::LD1Twov2d_POST
:
3680 case AArch64::LD1Twov2s_POST
:
3681 case AArch64::LD1Twov4h_POST
:
3682 case AArch64::LD1Twov4s_POST
:
3683 case AArch64::LD1Twov8b_POST
:
3684 case AArch64::LD1Twov8h_POST
:
3685 case AArch64::LD1i16_POST
:
3686 case AArch64::LD1i32_POST
:
3687 case AArch64::LD1i64_POST
:
3688 case AArch64::LD1i8_POST
:
3689 case AArch64::LD2Rv16b_POST
:
3690 case AArch64::LD2Rv1d_POST
:
3691 case AArch64::LD2Rv2d_POST
:
3692 case AArch64::LD2Rv2s_POST
:
3693 case AArch64::LD2Rv4h_POST
:
3694 case AArch64::LD2Rv4s_POST
:
3695 case AArch64::LD2Rv8b_POST
:
3696 case AArch64::LD2Rv8h_POST
:
3697 case AArch64::LD2Twov16b_POST
:
3698 case AArch64::LD2Twov2d_POST
:
3699 case AArch64::LD2Twov2s_POST
:
3700 case AArch64::LD2Twov4h_POST
:
3701 case AArch64::LD2Twov4s_POST
:
3702 case AArch64::LD2Twov8b_POST
:
3703 case AArch64::LD2Twov8h_POST
:
3704 case AArch64::LD2i16_POST
:
3705 case AArch64::LD2i32_POST
:
3706 case AArch64::LD2i64_POST
:
3707 case AArch64::LD2i8_POST
:
3708 case AArch64::LD3Rv16b_POST
:
3709 case AArch64::LD3Rv1d_POST
:
3710 case AArch64::LD3Rv2d_POST
:
3711 case AArch64::LD3Rv2s_POST
:
3712 case AArch64::LD3Rv4h_POST
:
3713 case AArch64::LD3Rv4s_POST
:
3714 case AArch64::LD3Rv8b_POST
:
3715 case AArch64::LD3Rv8h_POST
:
3716 case AArch64::LD3Threev16b_POST
:
3717 case AArch64::LD3Threev2d_POST
:
3718 case AArch64::LD3Threev2s_POST
:
3719 case AArch64::LD3Threev4h_POST
:
3720 case AArch64::LD3Threev4s_POST
:
3721 case AArch64::LD3Threev8b_POST
:
3722 case AArch64::LD3Threev8h_POST
:
3723 case AArch64::LD3i16_POST
:
3724 case AArch64::LD3i32_POST
:
3725 case AArch64::LD3i64_POST
:
3726 case AArch64::LD3i8_POST
:
3727 case AArch64::LD4Fourv16b_POST
:
3728 case AArch64::LD4Fourv2d_POST
:
3729 case AArch64::LD4Fourv2s_POST
:
3730 case AArch64::LD4Fourv4h_POST
:
3731 case AArch64::LD4Fourv4s_POST
:
3732 case AArch64::LD4Fourv8b_POST
:
3733 case AArch64::LD4Fourv8h_POST
:
3734 case AArch64::LD4Rv16b_POST
:
3735 case AArch64::LD4Rv1d_POST
:
3736 case AArch64::LD4Rv2d_POST
:
3737 case AArch64::LD4Rv2s_POST
:
3738 case AArch64::LD4Rv4h_POST
:
3739 case AArch64::LD4Rv4s_POST
:
3740 case AArch64::LD4Rv8b_POST
:
3741 case AArch64::LD4Rv8h_POST
:
3742 case AArch64::LD4i16_POST
:
3743 case AArch64::LD4i32_POST
:
3744 case AArch64::LD4i64_POST
:
3745 case AArch64::LD4i8_POST
:
3746 case AArch64::LDAPRWpost
:
3747 case AArch64::LDAPRXpost
:
3748 case AArch64::LDIAPPWpost
:
3749 case AArch64::LDIAPPXpost
:
3750 case AArch64::LDPDpost
:
3751 case AArch64::LDPQpost
:
3752 case AArch64::LDPSWpost
:
3753 case AArch64::LDPSpost
:
3754 case AArch64::LDPWpost
:
3755 case AArch64::LDPXpost
:
3756 case AArch64::LDRBBpost
:
3757 case AArch64::LDRBpost
:
3758 case AArch64::LDRDpost
:
3759 case AArch64::LDRHHpost
:
3760 case AArch64::LDRHpost
:
3761 case AArch64::LDRQpost
:
3762 case AArch64::LDRSBWpost
:
3763 case AArch64::LDRSBXpost
:
3764 case AArch64::LDRSHWpost
:
3765 case AArch64::LDRSHXpost
:
3766 case AArch64::LDRSWpost
:
3767 case AArch64::LDRSpost
:
3768 case AArch64::LDRWpost
:
3769 case AArch64::LDRXpost
:
3770 case AArch64::ST1Fourv16b_POST
:
3771 case AArch64::ST1Fourv1d_POST
:
3772 case AArch64::ST1Fourv2d_POST
:
3773 case AArch64::ST1Fourv2s_POST
:
3774 case AArch64::ST1Fourv4h_POST
:
3775 case AArch64::ST1Fourv4s_POST
:
3776 case AArch64::ST1Fourv8b_POST
:
3777 case AArch64::ST1Fourv8h_POST
:
3778 case AArch64::ST1Onev16b_POST
:
3779 case AArch64::ST1Onev1d_POST
:
3780 case AArch64::ST1Onev2d_POST
:
3781 case AArch64::ST1Onev2s_POST
:
3782 case AArch64::ST1Onev4h_POST
:
3783 case AArch64::ST1Onev4s_POST
:
3784 case AArch64::ST1Onev8b_POST
:
3785 case AArch64::ST1Onev8h_POST
:
3786 case AArch64::ST1Threev16b_POST
:
3787 case AArch64::ST1Threev1d_POST
:
3788 case AArch64::ST1Threev2d_POST
:
3789 case AArch64::ST1Threev2s_POST
:
3790 case AArch64::ST1Threev4h_POST
:
3791 case AArch64::ST1Threev4s_POST
:
3792 case AArch64::ST1Threev8b_POST
:
3793 case AArch64::ST1Threev8h_POST
:
3794 case AArch64::ST1Twov16b_POST
:
3795 case AArch64::ST1Twov1d_POST
:
3796 case AArch64::ST1Twov2d_POST
:
3797 case AArch64::ST1Twov2s_POST
:
3798 case AArch64::ST1Twov4h_POST
:
3799 case AArch64::ST1Twov4s_POST
:
3800 case AArch64::ST1Twov8b_POST
:
3801 case AArch64::ST1Twov8h_POST
:
3802 case AArch64::ST1i16_POST
:
3803 case AArch64::ST1i32_POST
:
3804 case AArch64::ST1i64_POST
:
3805 case AArch64::ST1i8_POST
:
3806 case AArch64::ST2GPostIndex
:
3807 case AArch64::ST2Twov16b_POST
:
3808 case AArch64::ST2Twov2d_POST
:
3809 case AArch64::ST2Twov2s_POST
:
3810 case AArch64::ST2Twov4h_POST
:
3811 case AArch64::ST2Twov4s_POST
:
3812 case AArch64::ST2Twov8b_POST
:
3813 case AArch64::ST2Twov8h_POST
:
3814 case AArch64::ST2i16_POST
:
3815 case AArch64::ST2i32_POST
:
3816 case AArch64::ST2i64_POST
:
3817 case AArch64::ST2i8_POST
:
3818 case AArch64::ST3Threev16b_POST
:
3819 case AArch64::ST3Threev2d_POST
:
3820 case AArch64::ST3Threev2s_POST
:
3821 case AArch64::ST3Threev4h_POST
:
3822 case AArch64::ST3Threev4s_POST
:
3823 case AArch64::ST3Threev8b_POST
:
3824 case AArch64::ST3Threev8h_POST
:
3825 case AArch64::ST3i16_POST
:
3826 case AArch64::ST3i32_POST
:
3827 case AArch64::ST3i64_POST
:
3828 case AArch64::ST3i8_POST
:
3829 case AArch64::ST4Fourv16b_POST
:
3830 case AArch64::ST4Fourv2d_POST
:
3831 case AArch64::ST4Fourv2s_POST
:
3832 case AArch64::ST4Fourv4h_POST
:
3833 case AArch64::ST4Fourv4s_POST
:
3834 case AArch64::ST4Fourv8b_POST
:
3835 case AArch64::ST4Fourv8h_POST
:
3836 case AArch64::ST4i16_POST
:
3837 case AArch64::ST4i32_POST
:
3838 case AArch64::ST4i64_POST
:
3839 case AArch64::ST4i8_POST
:
3840 case AArch64::STGPostIndex
:
3841 case AArch64::STGPpost
:
3842 case AArch64::STPDpost
:
3843 case AArch64::STPQpost
:
3844 case AArch64::STPSpost
:
3845 case AArch64::STPWpost
:
3846 case AArch64::STPXpost
:
3847 case AArch64::STRBBpost
:
3848 case AArch64::STRBpost
:
3849 case AArch64::STRDpost
:
3850 case AArch64::STRHHpost
:
3851 case AArch64::STRHpost
:
3852 case AArch64::STRQpost
:
3853 case AArch64::STRSpost
:
3854 case AArch64::STRWpost
:
3855 case AArch64::STRXpost
:
3856 case AArch64::STZ2GPostIndex
:
3857 case AArch64::STZGPostIndex
:
3862 bool AArch64InstrInfo::getMemOperandWithOffsetWidth(
3863 const MachineInstr
&LdSt
, const MachineOperand
*&BaseOp
, int64_t &Offset
,
3864 bool &OffsetIsScalable
, TypeSize
&Width
,
3865 const TargetRegisterInfo
*TRI
) const {
3866 assert(LdSt
.mayLoadOrStore() && "Expected a memory operation.");
3867 // Handle only loads/stores with base register followed by immediate offset.
3868 if (LdSt
.getNumExplicitOperands() == 3) {
3869 // Non-paired instruction (e.g., ldr x1, [x0, #8]).
3870 if ((!LdSt
.getOperand(1).isReg() && !LdSt
.getOperand(1).isFI()) ||
3871 !LdSt
.getOperand(2).isImm())
3873 } else if (LdSt
.getNumExplicitOperands() == 4) {
3874 // Paired instruction (e.g., ldp x1, x2, [x0, #8]).
3875 if (!LdSt
.getOperand(1).isReg() ||
3876 (!LdSt
.getOperand(2).isReg() && !LdSt
.getOperand(2).isFI()) ||
3877 !LdSt
.getOperand(3).isImm())
3882 // Get the scaling factor for the instruction and set the width for the
3884 TypeSize
Scale(0U, false);
3885 int64_t Dummy1
, Dummy2
;
3887 // If this returns false, then it's an instruction we don't want to handle.
3888 if (!getMemOpInfo(LdSt
.getOpcode(), Scale
, Width
, Dummy1
, Dummy2
))
3891 // Compute the offset. Offset is calculated as the immediate operand
3892 // multiplied by the scaling factor. Unscaled instructions have scaling factor
3893 // set to 1. Postindex are a special case which have an offset of 0.
3894 if (isPostIndexLdStOpcode(LdSt
.getOpcode())) {
3895 BaseOp
= &LdSt
.getOperand(2);
3897 } else if (LdSt
.getNumExplicitOperands() == 3) {
3898 BaseOp
= &LdSt
.getOperand(1);
3899 Offset
= LdSt
.getOperand(2).getImm() * Scale
.getKnownMinValue();
3901 assert(LdSt
.getNumExplicitOperands() == 4 && "invalid number of operands");
3902 BaseOp
= &LdSt
.getOperand(2);
3903 Offset
= LdSt
.getOperand(3).getImm() * Scale
.getKnownMinValue();
3905 OffsetIsScalable
= Scale
.isScalable();
3907 return BaseOp
->isReg() || BaseOp
->isFI();
3911 AArch64InstrInfo::getMemOpBaseRegImmOfsOffsetOperand(MachineInstr
&LdSt
) const {
3912 assert(LdSt
.mayLoadOrStore() && "Expected a memory operation.");
3913 MachineOperand
&OfsOp
= LdSt
.getOperand(LdSt
.getNumExplicitOperands() - 1);
3914 assert(OfsOp
.isImm() && "Offset operand wasn't immediate.");
3918 bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode
, TypeSize
&Scale
,
3919 TypeSize
&Width
, int64_t &MinOffset
,
3920 int64_t &MaxOffset
) {
3922 // Not a memory operation or something we want to handle.
3924 Scale
= TypeSize::getFixed(0);
3925 Width
= TypeSize::getFixed(0);
3926 MinOffset
= MaxOffset
= 0;
3929 case AArch64::LDRQui
:
3930 case AArch64::STRQui
:
3931 Scale
= TypeSize::getFixed(16);
3932 Width
= TypeSize::getFixed(16);
3936 case AArch64::LDRXui
:
3937 case AArch64::LDRDui
:
3938 case AArch64::STRXui
:
3939 case AArch64::STRDui
:
3940 case AArch64::PRFMui
:
3941 Scale
= TypeSize::getFixed(8);
3942 Width
= TypeSize::getFixed(8);
3946 case AArch64::LDRWui
:
3947 case AArch64::LDRSui
:
3948 case AArch64::LDRSWui
:
3949 case AArch64::STRWui
:
3950 case AArch64::STRSui
:
3951 Scale
= TypeSize::getFixed(4);
3952 Width
= TypeSize::getFixed(4);
3956 case AArch64::LDRHui
:
3957 case AArch64::LDRHHui
:
3958 case AArch64::LDRSHWui
:
3959 case AArch64::LDRSHXui
:
3960 case AArch64::STRHui
:
3961 case AArch64::STRHHui
:
3962 Scale
= TypeSize::getFixed(2);
3963 Width
= TypeSize::getFixed(2);
3967 case AArch64::LDRBui
:
3968 case AArch64::LDRBBui
:
3969 case AArch64::LDRSBWui
:
3970 case AArch64::LDRSBXui
:
3971 case AArch64::STRBui
:
3972 case AArch64::STRBBui
:
3973 Scale
= TypeSize::getFixed(1);
3974 Width
= TypeSize::getFixed(1);
3979 case AArch64::STRQpre
:
3980 case AArch64::LDRQpost
:
3981 Scale
= TypeSize::getFixed(1);
3982 Width
= TypeSize::getFixed(16);
3986 case AArch64::LDRDpost
:
3987 case AArch64::LDRDpre
:
3988 case AArch64::LDRXpost
:
3989 case AArch64::LDRXpre
:
3990 case AArch64::STRDpost
:
3991 case AArch64::STRDpre
:
3992 case AArch64::STRXpost
:
3993 case AArch64::STRXpre
:
3994 Scale
= TypeSize::getFixed(1);
3995 Width
= TypeSize::getFixed(8);
3999 case AArch64::STRWpost
:
4000 case AArch64::STRWpre
:
4001 case AArch64::LDRWpost
:
4002 case AArch64::LDRWpre
:
4003 case AArch64::STRSpost
:
4004 case AArch64::STRSpre
:
4005 case AArch64::LDRSpost
:
4006 case AArch64::LDRSpre
:
4007 Scale
= TypeSize::getFixed(1);
4008 Width
= TypeSize::getFixed(4);
4012 case AArch64::LDRHpost
:
4013 case AArch64::LDRHpre
:
4014 case AArch64::STRHpost
:
4015 case AArch64::STRHpre
:
4016 case AArch64::LDRHHpost
:
4017 case AArch64::LDRHHpre
:
4018 case AArch64::STRHHpost
:
4019 case AArch64::STRHHpre
:
4020 Scale
= TypeSize::getFixed(1);
4021 Width
= TypeSize::getFixed(2);
4025 case AArch64::LDRBpost
:
4026 case AArch64::LDRBpre
:
4027 case AArch64::STRBpost
:
4028 case AArch64::STRBpre
:
4029 case AArch64::LDRBBpost
:
4030 case AArch64::LDRBBpre
:
4031 case AArch64::STRBBpost
:
4032 case AArch64::STRBBpre
:
4033 Scale
= TypeSize::getFixed(1);
4034 Width
= TypeSize::getFixed(1);
4039 case AArch64::LDURQi
:
4040 case AArch64::STURQi
:
4041 Scale
= TypeSize::getFixed(1);
4042 Width
= TypeSize::getFixed(16);
4046 case AArch64::LDURXi
:
4047 case AArch64::LDURDi
:
4048 case AArch64::LDAPURXi
:
4049 case AArch64::STURXi
:
4050 case AArch64::STURDi
:
4051 case AArch64::STLURXi
:
4052 case AArch64::PRFUMi
:
4053 Scale
= TypeSize::getFixed(1);
4054 Width
= TypeSize::getFixed(8);
4058 case AArch64::LDURWi
:
4059 case AArch64::LDURSi
:
4060 case AArch64::LDURSWi
:
4061 case AArch64::LDAPURi
:
4062 case AArch64::LDAPURSWi
:
4063 case AArch64::STURWi
:
4064 case AArch64::STURSi
:
4065 case AArch64::STLURWi
:
4066 Scale
= TypeSize::getFixed(1);
4067 Width
= TypeSize::getFixed(4);
4071 case AArch64::LDURHi
:
4072 case AArch64::LDURHHi
:
4073 case AArch64::LDURSHXi
:
4074 case AArch64::LDURSHWi
:
4075 case AArch64::LDAPURHi
:
4076 case AArch64::LDAPURSHWi
:
4077 case AArch64::LDAPURSHXi
:
4078 case AArch64::STURHi
:
4079 case AArch64::STURHHi
:
4080 case AArch64::STLURHi
:
4081 Scale
= TypeSize::getFixed(1);
4082 Width
= TypeSize::getFixed(2);
4086 case AArch64::LDURBi
:
4087 case AArch64::LDURBBi
:
4088 case AArch64::LDURSBXi
:
4089 case AArch64::LDURSBWi
:
4090 case AArch64::LDAPURBi
:
4091 case AArch64::LDAPURSBWi
:
4092 case AArch64::LDAPURSBXi
:
4093 case AArch64::STURBi
:
4094 case AArch64::STURBBi
:
4095 case AArch64::STLURBi
:
4096 Scale
= TypeSize::getFixed(1);
4097 Width
= TypeSize::getFixed(1);
4101 // LDP / STP (including pre/post inc)
4102 case AArch64::LDPQi
:
4103 case AArch64::LDNPQi
:
4104 case AArch64::STPQi
:
4105 case AArch64::STNPQi
:
4106 case AArch64::LDPQpost
:
4107 case AArch64::LDPQpre
:
4108 case AArch64::STPQpost
:
4109 case AArch64::STPQpre
:
4110 Scale
= TypeSize::getFixed(16);
4111 Width
= TypeSize::getFixed(16 * 2);
4115 case AArch64::LDPXi
:
4116 case AArch64::LDPDi
:
4117 case AArch64::LDNPXi
:
4118 case AArch64::LDNPDi
:
4119 case AArch64::STPXi
:
4120 case AArch64::STPDi
:
4121 case AArch64::STNPXi
:
4122 case AArch64::STNPDi
:
4123 case AArch64::LDPDpost
:
4124 case AArch64::LDPDpre
:
4125 case AArch64::LDPXpost
:
4126 case AArch64::LDPXpre
:
4127 case AArch64::STPDpost
:
4128 case AArch64::STPDpre
:
4129 case AArch64::STPXpost
:
4130 case AArch64::STPXpre
:
4131 Scale
= TypeSize::getFixed(8);
4132 Width
= TypeSize::getFixed(8 * 2);
4136 case AArch64::LDPWi
:
4137 case AArch64::LDPSi
:
4138 case AArch64::LDNPWi
:
4139 case AArch64::LDNPSi
:
4140 case AArch64::STPWi
:
4141 case AArch64::STPSi
:
4142 case AArch64::STNPWi
:
4143 case AArch64::STNPSi
:
4144 case AArch64::LDPSpost
:
4145 case AArch64::LDPSpre
:
4146 case AArch64::LDPWpost
:
4147 case AArch64::LDPWpre
:
4148 case AArch64::STPSpost
:
4149 case AArch64::STPSpre
:
4150 case AArch64::STPWpost
:
4151 case AArch64::STPWpre
:
4152 Scale
= TypeSize::getFixed(4);
4153 Width
= TypeSize::getFixed(4 * 2);
4157 case AArch64::StoreSwiftAsyncContext
:
4158 // Store is an STRXui, but there might be an ADDXri in the expansion too.
4159 Scale
= TypeSize::getFixed(1);
4160 Width
= TypeSize::getFixed(8);
4165 Scale
= TypeSize::getFixed(16);
4166 Width
= TypeSize::getFixed(0);
4170 case AArch64::TAGPstack
:
4171 Scale
= TypeSize::getFixed(16);
4172 Width
= TypeSize::getFixed(0);
4173 // TAGP with a negative offset turns into SUBP, which has a maximum offset
4180 case AArch64::STGPreIndex
:
4181 case AArch64::STGPostIndex
:
4182 case AArch64::STZGi
:
4183 case AArch64::STZGPreIndex
:
4184 case AArch64::STZGPostIndex
:
4185 Scale
= TypeSize::getFixed(16);
4186 Width
= TypeSize::getFixed(16);
4191 case AArch64::STR_ZZZZXI
:
4192 case AArch64::LDR_ZZZZXI
:
4193 Scale
= TypeSize::getScalable(16);
4194 Width
= TypeSize::getScalable(16 * 4);
4198 case AArch64::STR_ZZZXI
:
4199 case AArch64::LDR_ZZZXI
:
4200 Scale
= TypeSize::getScalable(16);
4201 Width
= TypeSize::getScalable(16 * 3);
4205 case AArch64::STR_ZZXI
:
4206 case AArch64::LDR_ZZXI
:
4207 Scale
= TypeSize::getScalable(16);
4208 Width
= TypeSize::getScalable(16 * 2);
4212 case AArch64::LDR_PXI
:
4213 case AArch64::STR_PXI
:
4214 Scale
= TypeSize::getScalable(2);
4215 Width
= TypeSize::getScalable(2);
4219 case AArch64::LDR_PPXI
:
4220 case AArch64::STR_PPXI
:
4221 Scale
= TypeSize::getScalable(2);
4222 Width
= TypeSize::getScalable(2 * 2);
4226 case AArch64::LDR_ZXI
:
4227 case AArch64::STR_ZXI
:
4228 Scale
= TypeSize::getScalable(16);
4229 Width
= TypeSize::getScalable(16);
4233 case AArch64::LD1B_IMM
:
4234 case AArch64::LD1H_IMM
:
4235 case AArch64::LD1W_IMM
:
4236 case AArch64::LD1D_IMM
:
4237 case AArch64::LDNT1B_ZRI
:
4238 case AArch64::LDNT1H_ZRI
:
4239 case AArch64::LDNT1W_ZRI
:
4240 case AArch64::LDNT1D_ZRI
:
4241 case AArch64::ST1B_IMM
:
4242 case AArch64::ST1H_IMM
:
4243 case AArch64::ST1W_IMM
:
4244 case AArch64::ST1D_IMM
:
4245 case AArch64::STNT1B_ZRI
:
4246 case AArch64::STNT1H_ZRI
:
4247 case AArch64::STNT1W_ZRI
:
4248 case AArch64::STNT1D_ZRI
:
4249 case AArch64::LDNF1B_IMM
:
4250 case AArch64::LDNF1H_IMM
:
4251 case AArch64::LDNF1W_IMM
:
4252 case AArch64::LDNF1D_IMM
:
4253 // A full vectors worth of data
4254 // Width = mbytes * elements
4255 Scale
= TypeSize::getScalable(16);
4256 Width
= TypeSize::getScalable(16);
4260 case AArch64::LD2B_IMM
:
4261 case AArch64::LD2H_IMM
:
4262 case AArch64::LD2W_IMM
:
4263 case AArch64::LD2D_IMM
:
4264 case AArch64::ST2B_IMM
:
4265 case AArch64::ST2H_IMM
:
4266 case AArch64::ST2W_IMM
:
4267 case AArch64::ST2D_IMM
:
4268 Scale
= TypeSize::getScalable(32);
4269 Width
= TypeSize::getScalable(16 * 2);
4273 case AArch64::LD3B_IMM
:
4274 case AArch64::LD3H_IMM
:
4275 case AArch64::LD3W_IMM
:
4276 case AArch64::LD3D_IMM
:
4277 case AArch64::ST3B_IMM
:
4278 case AArch64::ST3H_IMM
:
4279 case AArch64::ST3W_IMM
:
4280 case AArch64::ST3D_IMM
:
4281 Scale
= TypeSize::getScalable(48);
4282 Width
= TypeSize::getScalable(16 * 3);
4286 case AArch64::LD4B_IMM
:
4287 case AArch64::LD4H_IMM
:
4288 case AArch64::LD4W_IMM
:
4289 case AArch64::LD4D_IMM
:
4290 case AArch64::ST4B_IMM
:
4291 case AArch64::ST4H_IMM
:
4292 case AArch64::ST4W_IMM
:
4293 case AArch64::ST4D_IMM
:
4294 Scale
= TypeSize::getScalable(64);
4295 Width
= TypeSize::getScalable(16 * 4);
4299 case AArch64::LD1B_H_IMM
:
4300 case AArch64::LD1SB_H_IMM
:
4301 case AArch64::LD1H_S_IMM
:
4302 case AArch64::LD1SH_S_IMM
:
4303 case AArch64::LD1W_D_IMM
:
4304 case AArch64::LD1SW_D_IMM
:
4305 case AArch64::ST1B_H_IMM
:
4306 case AArch64::ST1H_S_IMM
:
4307 case AArch64::ST1W_D_IMM
:
4308 case AArch64::LDNF1B_H_IMM
:
4309 case AArch64::LDNF1SB_H_IMM
:
4310 case AArch64::LDNF1H_S_IMM
:
4311 case AArch64::LDNF1SH_S_IMM
:
4312 case AArch64::LDNF1W_D_IMM
:
4313 case AArch64::LDNF1SW_D_IMM
:
4314 // A half vector worth of data
4315 // Width = mbytes * elements
4316 Scale
= TypeSize::getScalable(8);
4317 Width
= TypeSize::getScalable(8);
4321 case AArch64::LD1B_S_IMM
:
4322 case AArch64::LD1SB_S_IMM
:
4323 case AArch64::LD1H_D_IMM
:
4324 case AArch64::LD1SH_D_IMM
:
4325 case AArch64::ST1B_S_IMM
:
4326 case AArch64::ST1H_D_IMM
:
4327 case AArch64::LDNF1B_S_IMM
:
4328 case AArch64::LDNF1SB_S_IMM
:
4329 case AArch64::LDNF1H_D_IMM
:
4330 case AArch64::LDNF1SH_D_IMM
:
4331 // A quarter vector worth of data
4332 // Width = mbytes * elements
4333 Scale
= TypeSize::getScalable(4);
4334 Width
= TypeSize::getScalable(4);
4338 case AArch64::LD1B_D_IMM
:
4339 case AArch64::LD1SB_D_IMM
:
4340 case AArch64::ST1B_D_IMM
:
4341 case AArch64::LDNF1B_D_IMM
:
4342 case AArch64::LDNF1SB_D_IMM
:
4343 // A eighth vector worth of data
4344 // Width = mbytes * elements
4345 Scale
= TypeSize::getScalable(2);
4346 Width
= TypeSize::getScalable(2);
4350 case AArch64::ST2Gi
:
4351 case AArch64::ST2GPreIndex
:
4352 case AArch64::ST2GPostIndex
:
4353 case AArch64::STZ2Gi
:
4354 case AArch64::STZ2GPreIndex
:
4355 case AArch64::STZ2GPostIndex
:
4356 Scale
= TypeSize::getFixed(16);
4357 Width
= TypeSize::getFixed(32);
4361 case AArch64::STGPi
:
4362 case AArch64::STGPpost
:
4363 case AArch64::STGPpre
:
4364 Scale
= TypeSize::getFixed(16);
4365 Width
= TypeSize::getFixed(16);
4369 case AArch64::LD1RB_IMM
:
4370 case AArch64::LD1RB_H_IMM
:
4371 case AArch64::LD1RB_S_IMM
:
4372 case AArch64::LD1RB_D_IMM
:
4373 case AArch64::LD1RSB_H_IMM
:
4374 case AArch64::LD1RSB_S_IMM
:
4375 case AArch64::LD1RSB_D_IMM
:
4376 Scale
= TypeSize::getFixed(1);
4377 Width
= TypeSize::getFixed(1);
4381 case AArch64::LD1RH_IMM
:
4382 case AArch64::LD1RH_S_IMM
:
4383 case AArch64::LD1RH_D_IMM
:
4384 case AArch64::LD1RSH_S_IMM
:
4385 case AArch64::LD1RSH_D_IMM
:
4386 Scale
= TypeSize::getFixed(2);
4387 Width
= TypeSize::getFixed(2);
4391 case AArch64::LD1RW_IMM
:
4392 case AArch64::LD1RW_D_IMM
:
4393 case AArch64::LD1RSW_IMM
:
4394 Scale
= TypeSize::getFixed(4);
4395 Width
= TypeSize::getFixed(4);
4399 case AArch64::LD1RD_IMM
:
4400 Scale
= TypeSize::getFixed(8);
4401 Width
= TypeSize::getFixed(8);
4410 // Scaling factor for unscaled load or store.
4411 int AArch64InstrInfo::getMemScale(unsigned Opc
) {
4414 llvm_unreachable("Opcode has unknown scale!");
4415 case AArch64::LDRBBui
:
4416 case AArch64::LDURBBi
:
4417 case AArch64::LDRSBWui
:
4418 case AArch64::LDURSBWi
:
4419 case AArch64::STRBBui
:
4420 case AArch64::STURBBi
:
4422 case AArch64::LDRHHui
:
4423 case AArch64::LDURHHi
:
4424 case AArch64::LDRSHWui
:
4425 case AArch64::LDURSHWi
:
4426 case AArch64::STRHHui
:
4427 case AArch64::STURHHi
:
4429 case AArch64::LDRSui
:
4430 case AArch64::LDURSi
:
4431 case AArch64::LDRSpre
:
4432 case AArch64::LDRSWui
:
4433 case AArch64::LDURSWi
:
4434 case AArch64::LDRSWpre
:
4435 case AArch64::LDRWpre
:
4436 case AArch64::LDRWui
:
4437 case AArch64::LDURWi
:
4438 case AArch64::STRSui
:
4439 case AArch64::STURSi
:
4440 case AArch64::STRSpre
:
4441 case AArch64::STRWui
:
4442 case AArch64::STURWi
:
4443 case AArch64::STRWpre
:
4444 case AArch64::LDPSi
:
4445 case AArch64::LDPSWi
:
4446 case AArch64::LDPWi
:
4447 case AArch64::STPSi
:
4448 case AArch64::STPWi
:
4450 case AArch64::LDRDui
:
4451 case AArch64::LDURDi
:
4452 case AArch64::LDRDpre
:
4453 case AArch64::LDRXui
:
4454 case AArch64::LDURXi
:
4455 case AArch64::LDRXpre
:
4456 case AArch64::STRDui
:
4457 case AArch64::STURDi
:
4458 case AArch64::STRDpre
:
4459 case AArch64::STRXui
:
4460 case AArch64::STURXi
:
4461 case AArch64::STRXpre
:
4462 case AArch64::LDPDi
:
4463 case AArch64::LDPXi
:
4464 case AArch64::STPDi
:
4465 case AArch64::STPXi
:
4467 case AArch64::LDRQui
:
4468 case AArch64::LDURQi
:
4469 case AArch64::STRQui
:
4470 case AArch64::STURQi
:
4471 case AArch64::STRQpre
:
4472 case AArch64::LDPQi
:
4473 case AArch64::LDRQpre
:
4474 case AArch64::STPQi
:
4476 case AArch64::STZGi
:
4477 case AArch64::ST2Gi
:
4478 case AArch64::STZ2Gi
:
4479 case AArch64::STGPi
:
4484 bool AArch64InstrInfo::isPreLd(const MachineInstr
&MI
) {
4485 switch (MI
.getOpcode()) {
4488 case AArch64::LDRWpre
:
4489 case AArch64::LDRXpre
:
4490 case AArch64::LDRSWpre
:
4491 case AArch64::LDRSpre
:
4492 case AArch64::LDRDpre
:
4493 case AArch64::LDRQpre
:
4498 bool AArch64InstrInfo::isPreSt(const MachineInstr
&MI
) {
4499 switch (MI
.getOpcode()) {
4502 case AArch64::STRWpre
:
4503 case AArch64::STRXpre
:
4504 case AArch64::STRSpre
:
4505 case AArch64::STRDpre
:
4506 case AArch64::STRQpre
:
4511 bool AArch64InstrInfo::isPreLdSt(const MachineInstr
&MI
) {
4512 return isPreLd(MI
) || isPreSt(MI
);
4515 bool AArch64InstrInfo::isPairedLdSt(const MachineInstr
&MI
) {
4516 switch (MI
.getOpcode()) {
4519 case AArch64::LDPSi
:
4520 case AArch64::LDPSWi
:
4521 case AArch64::LDPDi
:
4522 case AArch64::LDPQi
:
4523 case AArch64::LDPWi
:
4524 case AArch64::LDPXi
:
4525 case AArch64::STPSi
:
4526 case AArch64::STPDi
:
4527 case AArch64::STPQi
:
4528 case AArch64::STPWi
:
4529 case AArch64::STPXi
:
4530 case AArch64::STGPi
:
4535 const MachineOperand
&AArch64InstrInfo::getLdStBaseOp(const MachineInstr
&MI
) {
4536 assert(MI
.mayLoadOrStore() && "Load or store instruction expected");
4538 AArch64InstrInfo::isPairedLdSt(MI
) || AArch64InstrInfo::isPreLdSt(MI
) ? 2
4540 return MI
.getOperand(Idx
);
4543 const MachineOperand
&
4544 AArch64InstrInfo::getLdStOffsetOp(const MachineInstr
&MI
) {
4545 assert(MI
.mayLoadOrStore() && "Load or store instruction expected");
4547 AArch64InstrInfo::isPairedLdSt(MI
) || AArch64InstrInfo::isPreLdSt(MI
) ? 3
4549 return MI
.getOperand(Idx
);
4552 const MachineOperand
&
4553 AArch64InstrInfo::getLdStAmountOp(const MachineInstr
&MI
) {
4554 switch (MI
.getOpcode()) {
4556 llvm_unreachable("Unexpected opcode");
4557 case AArch64::LDRBroX
:
4558 case AArch64::LDRBBroX
:
4559 case AArch64::LDRSBXroX
:
4560 case AArch64::LDRSBWroX
:
4561 case AArch64::LDRHroX
:
4562 case AArch64::LDRHHroX
:
4563 case AArch64::LDRSHXroX
:
4564 case AArch64::LDRSHWroX
:
4565 case AArch64::LDRWroX
:
4566 case AArch64::LDRSroX
:
4567 case AArch64::LDRSWroX
:
4568 case AArch64::LDRDroX
:
4569 case AArch64::LDRXroX
:
4570 case AArch64::LDRQroX
:
4571 return MI
.getOperand(4);
4575 static const TargetRegisterClass
*getRegClass(const MachineInstr
&MI
,
4577 if (MI
.getParent() == nullptr)
4579 const MachineFunction
*MF
= MI
.getParent()->getParent();
4580 return MF
? MF
->getRegInfo().getRegClassOrNull(Reg
) : nullptr;
4583 bool AArch64InstrInfo::isHForm(const MachineInstr
&MI
) {
4584 auto IsHFPR
= [&](const MachineOperand
&Op
) {
4587 auto Reg
= Op
.getReg();
4588 if (Reg
.isPhysical())
4589 return AArch64::FPR16RegClass
.contains(Reg
);
4590 const TargetRegisterClass
*TRC
= ::getRegClass(MI
, Reg
);
4591 return TRC
== &AArch64::FPR16RegClass
||
4592 TRC
== &AArch64::FPR16_loRegClass
;
4594 return llvm::any_of(MI
.operands(), IsHFPR
);
4597 bool AArch64InstrInfo::isQForm(const MachineInstr
&MI
) {
4598 auto IsQFPR
= [&](const MachineOperand
&Op
) {
4601 auto Reg
= Op
.getReg();
4602 if (Reg
.isPhysical())
4603 return AArch64::FPR128RegClass
.contains(Reg
);
4604 const TargetRegisterClass
*TRC
= ::getRegClass(MI
, Reg
);
4605 return TRC
== &AArch64::FPR128RegClass
||
4606 TRC
== &AArch64::FPR128_loRegClass
;
4608 return llvm::any_of(MI
.operands(), IsQFPR
);
4611 bool AArch64InstrInfo::hasBTISemantics(const MachineInstr
&MI
) {
4612 switch (MI
.getOpcode()) {
4615 case AArch64::PACIASP
:
4616 case AArch64::PACIBSP
:
4617 // Implicit BTI behavior.
4619 case AArch64::PAUTH_PROLOGUE
:
4620 // PAUTH_PROLOGUE expands to PACI(A|B)SP.
4622 case AArch64::HINT
: {
4623 unsigned Imm
= MI
.getOperand(0).getImm();
4624 // Explicit BTI instruction.
4625 if (Imm
== 32 || Imm
== 34 || Imm
== 36 || Imm
== 38)
4627 // PACI(A|B)SP instructions.
4628 if (Imm
== 25 || Imm
== 27)
4637 bool AArch64InstrInfo::isFpOrNEON(Register Reg
) {
4640 assert(Reg
.isPhysical() && "Expected physical register in isFpOrNEON");
4641 return AArch64::FPR128RegClass
.contains(Reg
) ||
4642 AArch64::FPR64RegClass
.contains(Reg
) ||
4643 AArch64::FPR32RegClass
.contains(Reg
) ||
4644 AArch64::FPR16RegClass
.contains(Reg
) ||
4645 AArch64::FPR8RegClass
.contains(Reg
);
4648 bool AArch64InstrInfo::isFpOrNEON(const MachineInstr
&MI
) {
4649 auto IsFPR
= [&](const MachineOperand
&Op
) {
4652 auto Reg
= Op
.getReg();
4653 if (Reg
.isPhysical())
4654 return isFpOrNEON(Reg
);
4656 const TargetRegisterClass
*TRC
= ::getRegClass(MI
, Reg
);
4657 return TRC
== &AArch64::FPR128RegClass
||
4658 TRC
== &AArch64::FPR128_loRegClass
||
4659 TRC
== &AArch64::FPR64RegClass
||
4660 TRC
== &AArch64::FPR64_loRegClass
||
4661 TRC
== &AArch64::FPR32RegClass
|| TRC
== &AArch64::FPR16RegClass
||
4662 TRC
== &AArch64::FPR8RegClass
;
4664 return llvm::any_of(MI
.operands(), IsFPR
);
4667 // Scale the unscaled offsets. Returns false if the unscaled offset can't be
4669 static bool scaleOffset(unsigned Opc
, int64_t &Offset
) {
4670 int Scale
= AArch64InstrInfo::getMemScale(Opc
);
4672 // If the byte-offset isn't a multiple of the stride, we can't scale this
4674 if (Offset
% Scale
!= 0)
4677 // Convert the byte-offset used by unscaled into an "element" offset used
4678 // by the scaled pair load/store instructions.
4683 static bool canPairLdStOpc(unsigned FirstOpc
, unsigned SecondOpc
) {
4684 if (FirstOpc
== SecondOpc
)
4686 // We can also pair sign-ext and zero-ext instructions.
4690 case AArch64::STRSui
:
4691 case AArch64::STURSi
:
4692 return SecondOpc
== AArch64::STRSui
|| SecondOpc
== AArch64::STURSi
;
4693 case AArch64::STRDui
:
4694 case AArch64::STURDi
:
4695 return SecondOpc
== AArch64::STRDui
|| SecondOpc
== AArch64::STURDi
;
4696 case AArch64::STRQui
:
4697 case AArch64::STURQi
:
4698 return SecondOpc
== AArch64::STRQui
|| SecondOpc
== AArch64::STURQi
;
4699 case AArch64::STRWui
:
4700 case AArch64::STURWi
:
4701 return SecondOpc
== AArch64::STRWui
|| SecondOpc
== AArch64::STURWi
;
4702 case AArch64::STRXui
:
4703 case AArch64::STURXi
:
4704 return SecondOpc
== AArch64::STRXui
|| SecondOpc
== AArch64::STURXi
;
4705 case AArch64::LDRSui
:
4706 case AArch64::LDURSi
:
4707 return SecondOpc
== AArch64::LDRSui
|| SecondOpc
== AArch64::LDURSi
;
4708 case AArch64::LDRDui
:
4709 case AArch64::LDURDi
:
4710 return SecondOpc
== AArch64::LDRDui
|| SecondOpc
== AArch64::LDURDi
;
4711 case AArch64::LDRQui
:
4712 case AArch64::LDURQi
:
4713 return SecondOpc
== AArch64::LDRQui
|| SecondOpc
== AArch64::LDURQi
;
4714 case AArch64::LDRWui
:
4715 case AArch64::LDURWi
:
4716 return SecondOpc
== AArch64::LDRSWui
|| SecondOpc
== AArch64::LDURSWi
;
4717 case AArch64::LDRSWui
:
4718 case AArch64::LDURSWi
:
4719 return SecondOpc
== AArch64::LDRWui
|| SecondOpc
== AArch64::LDURWi
;
4720 case AArch64::LDRXui
:
4721 case AArch64::LDURXi
:
4722 return SecondOpc
== AArch64::LDRXui
|| SecondOpc
== AArch64::LDURXi
;
4724 // These instructions can't be paired based on their opcodes.
4728 static bool shouldClusterFI(const MachineFrameInfo
&MFI
, int FI1
,
4729 int64_t Offset1
, unsigned Opcode1
, int FI2
,
4730 int64_t Offset2
, unsigned Opcode2
) {
4731 // Accesses through fixed stack object frame indices may access a different
4732 // fixed stack slot. Check that the object offsets + offsets match.
4733 if (MFI
.isFixedObjectIndex(FI1
) && MFI
.isFixedObjectIndex(FI2
)) {
4734 int64_t ObjectOffset1
= MFI
.getObjectOffset(FI1
);
4735 int64_t ObjectOffset2
= MFI
.getObjectOffset(FI2
);
4736 assert(ObjectOffset1
<= ObjectOffset2
&& "Object offsets are not ordered.");
4737 // Convert to scaled object offsets.
4738 int Scale1
= AArch64InstrInfo::getMemScale(Opcode1
);
4739 if (ObjectOffset1
% Scale1
!= 0)
4741 ObjectOffset1
/= Scale1
;
4742 int Scale2
= AArch64InstrInfo::getMemScale(Opcode2
);
4743 if (ObjectOffset2
% Scale2
!= 0)
4745 ObjectOffset2
/= Scale2
;
4746 ObjectOffset1
+= Offset1
;
4747 ObjectOffset2
+= Offset2
;
4748 return ObjectOffset1
+ 1 == ObjectOffset2
;
4754 /// Detect opportunities for ldp/stp formation.
4756 /// Only called for LdSt for which getMemOperandWithOffset returns true.
4757 bool AArch64InstrInfo::shouldClusterMemOps(
4758 ArrayRef
<const MachineOperand
*> BaseOps1
, int64_t OpOffset1
,
4759 bool OffsetIsScalable1
, ArrayRef
<const MachineOperand
*> BaseOps2
,
4760 int64_t OpOffset2
, bool OffsetIsScalable2
, unsigned ClusterSize
,
4761 unsigned NumBytes
) const {
4762 assert(BaseOps1
.size() == 1 && BaseOps2
.size() == 1);
4763 const MachineOperand
&BaseOp1
= *BaseOps1
.front();
4764 const MachineOperand
&BaseOp2
= *BaseOps2
.front();
4765 const MachineInstr
&FirstLdSt
= *BaseOp1
.getParent();
4766 const MachineInstr
&SecondLdSt
= *BaseOp2
.getParent();
4767 if (BaseOp1
.getType() != BaseOp2
.getType())
4770 assert((BaseOp1
.isReg() || BaseOp1
.isFI()) &&
4771 "Only base registers and frame indices are supported.");
4773 // Check for both base regs and base FI.
4774 if (BaseOp1
.isReg() && BaseOp1
.getReg() != BaseOp2
.getReg())
4777 // Only cluster up to a single pair.
4778 if (ClusterSize
> 2)
4781 if (!isPairableLdStInst(FirstLdSt
) || !isPairableLdStInst(SecondLdSt
))
4784 // Can we pair these instructions based on their opcodes?
4785 unsigned FirstOpc
= FirstLdSt
.getOpcode();
4786 unsigned SecondOpc
= SecondLdSt
.getOpcode();
4787 if (!canPairLdStOpc(FirstOpc
, SecondOpc
))
4790 // Can't merge volatiles or load/stores that have a hint to avoid pair
4791 // formation, for example.
4792 if (!isCandidateToMergeOrPair(FirstLdSt
) ||
4793 !isCandidateToMergeOrPair(SecondLdSt
))
4796 // isCandidateToMergeOrPair guarantees that operand 2 is an immediate.
4797 int64_t Offset1
= FirstLdSt
.getOperand(2).getImm();
4798 if (hasUnscaledLdStOffset(FirstOpc
) && !scaleOffset(FirstOpc
, Offset1
))
4801 int64_t Offset2
= SecondLdSt
.getOperand(2).getImm();
4802 if (hasUnscaledLdStOffset(SecondOpc
) && !scaleOffset(SecondOpc
, Offset2
))
4805 // Pairwise instructions have a 7-bit signed offset field.
4806 if (Offset1
> 63 || Offset1
< -64)
4809 // The caller should already have ordered First/SecondLdSt by offset.
4810 // Note: except for non-equal frame index bases
4811 if (BaseOp1
.isFI()) {
4812 assert((!BaseOp1
.isIdenticalTo(BaseOp2
) || Offset1
<= Offset2
) &&
4813 "Caller should have ordered offsets.");
4815 const MachineFrameInfo
&MFI
=
4816 FirstLdSt
.getParent()->getParent()->getFrameInfo();
4817 return shouldClusterFI(MFI
, BaseOp1
.getIndex(), Offset1
, FirstOpc
,
4818 BaseOp2
.getIndex(), Offset2
, SecondOpc
);
4821 assert(Offset1
<= Offset2
&& "Caller should have ordered offsets.");
4823 return Offset1
+ 1 == Offset2
;
4826 static const MachineInstrBuilder
&AddSubReg(const MachineInstrBuilder
&MIB
,
4827 MCRegister Reg
, unsigned SubIdx
,
4829 const TargetRegisterInfo
*TRI
) {
4831 return MIB
.addReg(Reg
, State
);
4833 if (Reg
.isPhysical())
4834 return MIB
.addReg(TRI
->getSubReg(Reg
, SubIdx
), State
);
4835 return MIB
.addReg(Reg
, State
, SubIdx
);
4838 static bool forwardCopyWillClobberTuple(unsigned DestReg
, unsigned SrcReg
,
4840 // We really want the positive remainder mod 32 here, that happens to be
4841 // easily obtainable with a mask.
4842 return ((DestReg
- SrcReg
) & 0x1f) < NumRegs
;
4845 void AArch64InstrInfo::copyPhysRegTuple(MachineBasicBlock
&MBB
,
4846 MachineBasicBlock::iterator I
,
4847 const DebugLoc
&DL
, MCRegister DestReg
,
4848 MCRegister SrcReg
, bool KillSrc
,
4850 ArrayRef
<unsigned> Indices
) const {
4851 assert(Subtarget
.hasNEON() && "Unexpected register copy without NEON");
4852 const TargetRegisterInfo
*TRI
= &getRegisterInfo();
4853 uint16_t DestEncoding
= TRI
->getEncodingValue(DestReg
);
4854 uint16_t SrcEncoding
= TRI
->getEncodingValue(SrcReg
);
4855 unsigned NumRegs
= Indices
.size();
4857 int SubReg
= 0, End
= NumRegs
, Incr
= 1;
4858 if (forwardCopyWillClobberTuple(DestEncoding
, SrcEncoding
, NumRegs
)) {
4859 SubReg
= NumRegs
- 1;
4864 for (; SubReg
!= End
; SubReg
+= Incr
) {
4865 const MachineInstrBuilder MIB
= BuildMI(MBB
, I
, DL
, get(Opcode
));
4866 AddSubReg(MIB
, DestReg
, Indices
[SubReg
], RegState::Define
, TRI
);
4867 AddSubReg(MIB
, SrcReg
, Indices
[SubReg
], 0, TRI
);
4868 AddSubReg(MIB
, SrcReg
, Indices
[SubReg
], getKillRegState(KillSrc
), TRI
);
4872 void AArch64InstrInfo::copyGPRRegTuple(MachineBasicBlock
&MBB
,
4873 MachineBasicBlock::iterator I
,
4874 const DebugLoc
&DL
, MCRegister DestReg
,
4875 MCRegister SrcReg
, bool KillSrc
,
4876 unsigned Opcode
, unsigned ZeroReg
,
4877 llvm::ArrayRef
<unsigned> Indices
) const {
4878 const TargetRegisterInfo
*TRI
= &getRegisterInfo();
4879 unsigned NumRegs
= Indices
.size();
4882 uint16_t DestEncoding
= TRI
->getEncodingValue(DestReg
);
4883 uint16_t SrcEncoding
= TRI
->getEncodingValue(SrcReg
);
4884 assert(DestEncoding
% NumRegs
== 0 && SrcEncoding
% NumRegs
== 0 &&
4885 "GPR reg sequences should not be able to overlap");
4888 for (unsigned SubReg
= 0; SubReg
!= NumRegs
; ++SubReg
) {
4889 const MachineInstrBuilder MIB
= BuildMI(MBB
, I
, DL
, get(Opcode
));
4890 AddSubReg(MIB
, DestReg
, Indices
[SubReg
], RegState::Define
, TRI
);
4891 MIB
.addReg(ZeroReg
);
4892 AddSubReg(MIB
, SrcReg
, Indices
[SubReg
], getKillRegState(KillSrc
), TRI
);
4897 void AArch64InstrInfo::copyPhysReg(MachineBasicBlock
&MBB
,
4898 MachineBasicBlock::iterator I
,
4899 const DebugLoc
&DL
, MCRegister DestReg
,
4900 MCRegister SrcReg
, bool KillSrc
,
4902 bool RenamableSrc
) const {
4903 if (AArch64::GPR32spRegClass
.contains(DestReg
) &&
4904 (AArch64::GPR32spRegClass
.contains(SrcReg
) || SrcReg
== AArch64::WZR
)) {
4905 const TargetRegisterInfo
*TRI
= &getRegisterInfo();
4907 if (DestReg
== AArch64::WSP
|| SrcReg
== AArch64::WSP
) {
4908 // If either operand is WSP, expand to ADD #0.
4909 if (Subtarget
.hasZeroCycleRegMove()) {
4910 // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move.
4911 MCRegister DestRegX
= TRI
->getMatchingSuperReg(
4912 DestReg
, AArch64::sub_32
, &AArch64::GPR64spRegClass
);
4913 MCRegister SrcRegX
= TRI
->getMatchingSuperReg(
4914 SrcReg
, AArch64::sub_32
, &AArch64::GPR64spRegClass
);
4915 // This instruction is reading and writing X registers. This may upset
4916 // the register scavenger and machine verifier, so we need to indicate
4917 // that we are reading an undefined value from SrcRegX, but a proper
4918 // value from SrcReg.
4919 BuildMI(MBB
, I
, DL
, get(AArch64::ADDXri
), DestRegX
)
4920 .addReg(SrcRegX
, RegState::Undef
)
4922 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL
, 0))
4923 .addReg(SrcReg
, RegState::Implicit
| getKillRegState(KillSrc
));
4925 BuildMI(MBB
, I
, DL
, get(AArch64::ADDWri
), DestReg
)
4926 .addReg(SrcReg
, getKillRegState(KillSrc
))
4928 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL
, 0));
4930 } else if (SrcReg
== AArch64::WZR
&& Subtarget
.hasZeroCycleZeroingGP()) {
4931 BuildMI(MBB
, I
, DL
, get(AArch64::MOVZWi
), DestReg
)
4933 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL
, 0));
4935 if (Subtarget
.hasZeroCycleRegMove()) {
4936 // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
4937 MCRegister DestRegX
= TRI
->getMatchingSuperReg(
4938 DestReg
, AArch64::sub_32
, &AArch64::GPR64spRegClass
);
4939 MCRegister SrcRegX
= TRI
->getMatchingSuperReg(
4940 SrcReg
, AArch64::sub_32
, &AArch64::GPR64spRegClass
);
4941 // This instruction is reading and writing X registers. This may upset
4942 // the register scavenger and machine verifier, so we need to indicate
4943 // that we are reading an undefined value from SrcRegX, but a proper
4944 // value from SrcReg.
4945 BuildMI(MBB
, I
, DL
, get(AArch64::ORRXrr
), DestRegX
)
4946 .addReg(AArch64::XZR
)
4947 .addReg(SrcRegX
, RegState::Undef
)
4948 .addReg(SrcReg
, RegState::Implicit
| getKillRegState(KillSrc
));
4950 // Otherwise, expand to ORR WZR.
4951 BuildMI(MBB
, I
, DL
, get(AArch64::ORRWrr
), DestReg
)
4952 .addReg(AArch64::WZR
)
4953 .addReg(SrcReg
, getKillRegState(KillSrc
));
4959 // Copy a Predicate register by ORRing with itself.
4960 if (AArch64::PPRRegClass
.contains(DestReg
) &&
4961 AArch64::PPRRegClass
.contains(SrcReg
)) {
4962 assert(Subtarget
.isSVEorStreamingSVEAvailable() &&
4963 "Unexpected SVE register.");
4964 BuildMI(MBB
, I
, DL
, get(AArch64::ORR_PPzPP
), DestReg
)
4965 .addReg(SrcReg
) // Pg
4967 .addReg(SrcReg
, getKillRegState(KillSrc
));
4971 // Copy a predicate-as-counter register by ORRing with itself as if it
4972 // were a regular predicate (mask) register.
4973 bool DestIsPNR
= AArch64::PNRRegClass
.contains(DestReg
);
4974 bool SrcIsPNR
= AArch64::PNRRegClass
.contains(SrcReg
);
4975 if (DestIsPNR
|| SrcIsPNR
) {
4976 auto ToPPR
= [](MCRegister R
) -> MCRegister
{
4977 return (R
- AArch64::PN0
) + AArch64::P0
;
4979 MCRegister PPRSrcReg
= SrcIsPNR
? ToPPR(SrcReg
) : SrcReg
;
4980 MCRegister PPRDestReg
= DestIsPNR
? ToPPR(DestReg
) : DestReg
;
4982 if (PPRSrcReg
!= PPRDestReg
) {
4983 auto NewMI
= BuildMI(MBB
, I
, DL
, get(AArch64::ORR_PPzPP
), PPRDestReg
)
4984 .addReg(PPRSrcReg
) // Pg
4986 .addReg(PPRSrcReg
, getKillRegState(KillSrc
));
4988 NewMI
.addDef(DestReg
, RegState::Implicit
);
4993 // Copy a Z register by ORRing with itself.
4994 if (AArch64::ZPRRegClass
.contains(DestReg
) &&
4995 AArch64::ZPRRegClass
.contains(SrcReg
)) {
4996 assert(Subtarget
.isSVEorStreamingSVEAvailable() &&
4997 "Unexpected SVE register.");
4998 BuildMI(MBB
, I
, DL
, get(AArch64::ORR_ZZZ
), DestReg
)
5000 .addReg(SrcReg
, getKillRegState(KillSrc
));
5004 // Copy a Z register pair by copying the individual sub-registers.
5005 if ((AArch64::ZPR2RegClass
.contains(DestReg
) ||
5006 AArch64::ZPR2StridedOrContiguousRegClass
.contains(DestReg
)) &&
5007 (AArch64::ZPR2RegClass
.contains(SrcReg
) ||
5008 AArch64::ZPR2StridedOrContiguousRegClass
.contains(SrcReg
))) {
5009 assert(Subtarget
.isSVEorStreamingSVEAvailable() &&
5010 "Unexpected SVE register.");
5011 static const unsigned Indices
[] = {AArch64::zsub0
, AArch64::zsub1
};
5012 copyPhysRegTuple(MBB
, I
, DL
, DestReg
, SrcReg
, KillSrc
, AArch64::ORR_ZZZ
,
5017 // Copy a Z register triple by copying the individual sub-registers.
5018 if (AArch64::ZPR3RegClass
.contains(DestReg
) &&
5019 AArch64::ZPR3RegClass
.contains(SrcReg
)) {
5020 assert(Subtarget
.isSVEorStreamingSVEAvailable() &&
5021 "Unexpected SVE register.");
5022 static const unsigned Indices
[] = {AArch64::zsub0
, AArch64::zsub1
,
5024 copyPhysRegTuple(MBB
, I
, DL
, DestReg
, SrcReg
, KillSrc
, AArch64::ORR_ZZZ
,
5029 // Copy a Z register quad by copying the individual sub-registers.
5030 if ((AArch64::ZPR4RegClass
.contains(DestReg
) ||
5031 AArch64::ZPR4StridedOrContiguousRegClass
.contains(DestReg
)) &&
5032 (AArch64::ZPR4RegClass
.contains(SrcReg
) ||
5033 AArch64::ZPR4StridedOrContiguousRegClass
.contains(SrcReg
))) {
5034 assert(Subtarget
.isSVEorStreamingSVEAvailable() &&
5035 "Unexpected SVE register.");
5036 static const unsigned Indices
[] = {AArch64::zsub0
, AArch64::zsub1
,
5037 AArch64::zsub2
, AArch64::zsub3
};
5038 copyPhysRegTuple(MBB
, I
, DL
, DestReg
, SrcReg
, KillSrc
, AArch64::ORR_ZZZ
,
5043 if (AArch64::GPR64spRegClass
.contains(DestReg
) &&
5044 (AArch64::GPR64spRegClass
.contains(SrcReg
) || SrcReg
== AArch64::XZR
)) {
5045 if (DestReg
== AArch64::SP
|| SrcReg
== AArch64::SP
) {
5046 // If either operand is SP, expand to ADD #0.
5047 BuildMI(MBB
, I
, DL
, get(AArch64::ADDXri
), DestReg
)
5048 .addReg(SrcReg
, getKillRegState(KillSrc
))
5050 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL
, 0));
5051 } else if (SrcReg
== AArch64::XZR
&& Subtarget
.hasZeroCycleZeroingGP()) {
5052 BuildMI(MBB
, I
, DL
, get(AArch64::MOVZXi
), DestReg
)
5054 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL
, 0));
5056 // Otherwise, expand to ORR XZR.
5057 BuildMI(MBB
, I
, DL
, get(AArch64::ORRXrr
), DestReg
)
5058 .addReg(AArch64::XZR
)
5059 .addReg(SrcReg
, getKillRegState(KillSrc
));
5064 // Copy a DDDD register quad by copying the individual sub-registers.
5065 if (AArch64::DDDDRegClass
.contains(DestReg
) &&
5066 AArch64::DDDDRegClass
.contains(SrcReg
)) {
5067 static const unsigned Indices
[] = {AArch64::dsub0
, AArch64::dsub1
,
5068 AArch64::dsub2
, AArch64::dsub3
};
5069 copyPhysRegTuple(MBB
, I
, DL
, DestReg
, SrcReg
, KillSrc
, AArch64::ORRv8i8
,
5074 // Copy a DDD register triple by copying the individual sub-registers.
5075 if (AArch64::DDDRegClass
.contains(DestReg
) &&
5076 AArch64::DDDRegClass
.contains(SrcReg
)) {
5077 static const unsigned Indices
[] = {AArch64::dsub0
, AArch64::dsub1
,
5079 copyPhysRegTuple(MBB
, I
, DL
, DestReg
, SrcReg
, KillSrc
, AArch64::ORRv8i8
,
5084 // Copy a DD register pair by copying the individual sub-registers.
5085 if (AArch64::DDRegClass
.contains(DestReg
) &&
5086 AArch64::DDRegClass
.contains(SrcReg
)) {
5087 static const unsigned Indices
[] = {AArch64::dsub0
, AArch64::dsub1
};
5088 copyPhysRegTuple(MBB
, I
, DL
, DestReg
, SrcReg
, KillSrc
, AArch64::ORRv8i8
,
5093 // Copy a QQQQ register quad by copying the individual sub-registers.
5094 if (AArch64::QQQQRegClass
.contains(DestReg
) &&
5095 AArch64::QQQQRegClass
.contains(SrcReg
)) {
5096 static const unsigned Indices
[] = {AArch64::qsub0
, AArch64::qsub1
,
5097 AArch64::qsub2
, AArch64::qsub3
};
5098 copyPhysRegTuple(MBB
, I
, DL
, DestReg
, SrcReg
, KillSrc
, AArch64::ORRv16i8
,
5103 // Copy a QQQ register triple by copying the individual sub-registers.
5104 if (AArch64::QQQRegClass
.contains(DestReg
) &&
5105 AArch64::QQQRegClass
.contains(SrcReg
)) {
5106 static const unsigned Indices
[] = {AArch64::qsub0
, AArch64::qsub1
,
5108 copyPhysRegTuple(MBB
, I
, DL
, DestReg
, SrcReg
, KillSrc
, AArch64::ORRv16i8
,
5113 // Copy a QQ register pair by copying the individual sub-registers.
5114 if (AArch64::QQRegClass
.contains(DestReg
) &&
5115 AArch64::QQRegClass
.contains(SrcReg
)) {
5116 static const unsigned Indices
[] = {AArch64::qsub0
, AArch64::qsub1
};
5117 copyPhysRegTuple(MBB
, I
, DL
, DestReg
, SrcReg
, KillSrc
, AArch64::ORRv16i8
,
5122 if (AArch64::XSeqPairsClassRegClass
.contains(DestReg
) &&
5123 AArch64::XSeqPairsClassRegClass
.contains(SrcReg
)) {
5124 static const unsigned Indices
[] = {AArch64::sube64
, AArch64::subo64
};
5125 copyGPRRegTuple(MBB
, I
, DL
, DestReg
, SrcReg
, KillSrc
, AArch64::ORRXrs
,
5126 AArch64::XZR
, Indices
);
5130 if (AArch64::WSeqPairsClassRegClass
.contains(DestReg
) &&
5131 AArch64::WSeqPairsClassRegClass
.contains(SrcReg
)) {
5132 static const unsigned Indices
[] = {AArch64::sube32
, AArch64::subo32
};
5133 copyGPRRegTuple(MBB
, I
, DL
, DestReg
, SrcReg
, KillSrc
, AArch64::ORRWrs
,
5134 AArch64::WZR
, Indices
);
5138 if (AArch64::FPR128RegClass
.contains(DestReg
) &&
5139 AArch64::FPR128RegClass
.contains(SrcReg
)) {
5140 if (Subtarget
.isSVEorStreamingSVEAvailable() &&
5141 !Subtarget
.isNeonAvailable())
5142 BuildMI(MBB
, I
, DL
, get(AArch64::ORR_ZZZ
))
5143 .addReg(AArch64::Z0
+ (DestReg
- AArch64::Q0
), RegState::Define
)
5144 .addReg(AArch64::Z0
+ (SrcReg
- AArch64::Q0
))
5145 .addReg(AArch64::Z0
+ (SrcReg
- AArch64::Q0
));
5146 else if (Subtarget
.isNeonAvailable())
5147 BuildMI(MBB
, I
, DL
, get(AArch64::ORRv16i8
), DestReg
)
5149 .addReg(SrcReg
, getKillRegState(KillSrc
));
5151 BuildMI(MBB
, I
, DL
, get(AArch64::STRQpre
))
5152 .addReg(AArch64::SP
, RegState::Define
)
5153 .addReg(SrcReg
, getKillRegState(KillSrc
))
5154 .addReg(AArch64::SP
)
5156 BuildMI(MBB
, I
, DL
, get(AArch64::LDRQpost
))
5157 .addReg(AArch64::SP
, RegState::Define
)
5158 .addReg(DestReg
, RegState::Define
)
5159 .addReg(AArch64::SP
)
5165 if (AArch64::FPR64RegClass
.contains(DestReg
) &&
5166 AArch64::FPR64RegClass
.contains(SrcReg
)) {
5167 BuildMI(MBB
, I
, DL
, get(AArch64::FMOVDr
), DestReg
)
5168 .addReg(SrcReg
, getKillRegState(KillSrc
));
5172 if (AArch64::FPR32RegClass
.contains(DestReg
) &&
5173 AArch64::FPR32RegClass
.contains(SrcReg
)) {
5174 BuildMI(MBB
, I
, DL
, get(AArch64::FMOVSr
), DestReg
)
5175 .addReg(SrcReg
, getKillRegState(KillSrc
));
5179 if (AArch64::FPR16RegClass
.contains(DestReg
) &&
5180 AArch64::FPR16RegClass
.contains(SrcReg
)) {
5182 RI
.getMatchingSuperReg(DestReg
, AArch64::hsub
, &AArch64::FPR32RegClass
);
5184 RI
.getMatchingSuperReg(SrcReg
, AArch64::hsub
, &AArch64::FPR32RegClass
);
5185 BuildMI(MBB
, I
, DL
, get(AArch64::FMOVSr
), DestReg
)
5186 .addReg(SrcReg
, getKillRegState(KillSrc
));
5190 if (AArch64::FPR8RegClass
.contains(DestReg
) &&
5191 AArch64::FPR8RegClass
.contains(SrcReg
)) {
5193 RI
.getMatchingSuperReg(DestReg
, AArch64::bsub
, &AArch64::FPR32RegClass
);
5195 RI
.getMatchingSuperReg(SrcReg
, AArch64::bsub
, &AArch64::FPR32RegClass
);
5196 BuildMI(MBB
, I
, DL
, get(AArch64::FMOVSr
), DestReg
)
5197 .addReg(SrcReg
, getKillRegState(KillSrc
));
5201 // Copies between GPR64 and FPR64.
5202 if (AArch64::FPR64RegClass
.contains(DestReg
) &&
5203 AArch64::GPR64RegClass
.contains(SrcReg
)) {
5204 BuildMI(MBB
, I
, DL
, get(AArch64::FMOVXDr
), DestReg
)
5205 .addReg(SrcReg
, getKillRegState(KillSrc
));
5208 if (AArch64::GPR64RegClass
.contains(DestReg
) &&
5209 AArch64::FPR64RegClass
.contains(SrcReg
)) {
5210 BuildMI(MBB
, I
, DL
, get(AArch64::FMOVDXr
), DestReg
)
5211 .addReg(SrcReg
, getKillRegState(KillSrc
));
5214 // Copies between GPR32 and FPR32.
5215 if (AArch64::FPR32RegClass
.contains(DestReg
) &&
5216 AArch64::GPR32RegClass
.contains(SrcReg
)) {
5217 BuildMI(MBB
, I
, DL
, get(AArch64::FMOVWSr
), DestReg
)
5218 .addReg(SrcReg
, getKillRegState(KillSrc
));
5221 if (AArch64::GPR32RegClass
.contains(DestReg
) &&
5222 AArch64::FPR32RegClass
.contains(SrcReg
)) {
5223 BuildMI(MBB
, I
, DL
, get(AArch64::FMOVSWr
), DestReg
)
5224 .addReg(SrcReg
, getKillRegState(KillSrc
));
5228 if (DestReg
== AArch64::NZCV
) {
5229 assert(AArch64::GPR64RegClass
.contains(SrcReg
) && "Invalid NZCV copy");
5230 BuildMI(MBB
, I
, DL
, get(AArch64::MSR
))
5231 .addImm(AArch64SysReg::NZCV
)
5232 .addReg(SrcReg
, getKillRegState(KillSrc
))
5233 .addReg(AArch64::NZCV
, RegState::Implicit
| RegState::Define
);
5237 if (SrcReg
== AArch64::NZCV
) {
5238 assert(AArch64::GPR64RegClass
.contains(DestReg
) && "Invalid NZCV copy");
5239 BuildMI(MBB
, I
, DL
, get(AArch64::MRS
), DestReg
)
5240 .addImm(AArch64SysReg::NZCV
)
5241 .addReg(AArch64::NZCV
, RegState::Implicit
| getKillRegState(KillSrc
));
5246 const TargetRegisterInfo
&TRI
= getRegisterInfo();
5247 errs() << TRI
.getRegAsmName(DestReg
) << " = COPY "
5248 << TRI
.getRegAsmName(SrcReg
) << "\n";
5250 llvm_unreachable("unimplemented reg-to-reg copy");
5253 static void storeRegPairToStackSlot(const TargetRegisterInfo
&TRI
,
5254 MachineBasicBlock
&MBB
,
5255 MachineBasicBlock::iterator InsertBefore
,
5256 const MCInstrDesc
&MCID
,
5257 Register SrcReg
, bool IsKill
,
5258 unsigned SubIdx0
, unsigned SubIdx1
, int FI
,
5259 MachineMemOperand
*MMO
) {
5260 Register SrcReg0
= SrcReg
;
5261 Register SrcReg1
= SrcReg
;
5262 if (SrcReg
.isPhysical()) {
5263 SrcReg0
= TRI
.getSubReg(SrcReg
, SubIdx0
);
5265 SrcReg1
= TRI
.getSubReg(SrcReg
, SubIdx1
);
5268 BuildMI(MBB
, InsertBefore
, DebugLoc(), MCID
)
5269 .addReg(SrcReg0
, getKillRegState(IsKill
), SubIdx0
)
5270 .addReg(SrcReg1
, getKillRegState(IsKill
), SubIdx1
)
5273 .addMemOperand(MMO
);
5276 void AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock
&MBB
,
5277 MachineBasicBlock::iterator MBBI
,
5278 Register SrcReg
, bool isKill
, int FI
,
5279 const TargetRegisterClass
*RC
,
5280 const TargetRegisterInfo
*TRI
,
5282 MachineInstr::MIFlag Flags
) const {
5283 MachineFunction
&MF
= *MBB
.getParent();
5284 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
5286 MachinePointerInfo PtrInfo
= MachinePointerInfo::getFixedStack(MF
, FI
);
5287 MachineMemOperand
*MMO
=
5288 MF
.getMachineMemOperand(PtrInfo
, MachineMemOperand::MOStore
,
5289 MFI
.getObjectSize(FI
), MFI
.getObjectAlign(FI
));
5292 MCRegister PNRReg
= MCRegister::NoRegister
;
5293 unsigned StackID
= TargetStackID::Default
;
5294 switch (TRI
->getSpillSize(*RC
)) {
5296 if (AArch64::FPR8RegClass
.hasSubClassEq(RC
))
5297 Opc
= AArch64::STRBui
;
5300 if (AArch64::FPR16RegClass
.hasSubClassEq(RC
))
5301 Opc
= AArch64::STRHui
;
5302 else if (AArch64::PNRRegClass
.hasSubClassEq(RC
) ||
5303 AArch64::PPRRegClass
.hasSubClassEq(RC
)) {
5304 assert(Subtarget
.isSVEorStreamingSVEAvailable() &&
5305 "Unexpected register store without SVE store instructions");
5306 Opc
= AArch64::STR_PXI
;
5307 StackID
= TargetStackID::ScalableVector
;
5312 if (AArch64::GPR32allRegClass
.hasSubClassEq(RC
)) {
5313 Opc
= AArch64::STRWui
;
5314 if (SrcReg
.isVirtual())
5315 MF
.getRegInfo().constrainRegClass(SrcReg
, &AArch64::GPR32RegClass
);
5317 assert(SrcReg
!= AArch64::WSP
);
5318 } else if (AArch64::FPR32RegClass
.hasSubClassEq(RC
))
5319 Opc
= AArch64::STRSui
;
5320 else if (AArch64::PPR2RegClass
.hasSubClassEq(RC
)) {
5321 Opc
= AArch64::STR_PPXI
;
5322 StackID
= TargetStackID::ScalableVector
;
5326 if (AArch64::GPR64allRegClass
.hasSubClassEq(RC
)) {
5327 Opc
= AArch64::STRXui
;
5328 if (SrcReg
.isVirtual())
5329 MF
.getRegInfo().constrainRegClass(SrcReg
, &AArch64::GPR64RegClass
);
5331 assert(SrcReg
!= AArch64::SP
);
5332 } else if (AArch64::FPR64RegClass
.hasSubClassEq(RC
)) {
5333 Opc
= AArch64::STRDui
;
5334 } else if (AArch64::WSeqPairsClassRegClass
.hasSubClassEq(RC
)) {
5335 storeRegPairToStackSlot(getRegisterInfo(), MBB
, MBBI
,
5336 get(AArch64::STPWi
), SrcReg
, isKill
,
5337 AArch64::sube32
, AArch64::subo32
, FI
, MMO
);
5342 if (AArch64::FPR128RegClass
.hasSubClassEq(RC
))
5343 Opc
= AArch64::STRQui
;
5344 else if (AArch64::DDRegClass
.hasSubClassEq(RC
)) {
5345 assert(Subtarget
.hasNEON() && "Unexpected register store without NEON");
5346 Opc
= AArch64::ST1Twov1d
;
5348 } else if (AArch64::XSeqPairsClassRegClass
.hasSubClassEq(RC
)) {
5349 storeRegPairToStackSlot(getRegisterInfo(), MBB
, MBBI
,
5350 get(AArch64::STPXi
), SrcReg
, isKill
,
5351 AArch64::sube64
, AArch64::subo64
, FI
, MMO
);
5353 } else if (AArch64::ZPRRegClass
.hasSubClassEq(RC
)) {
5354 assert(Subtarget
.isSVEorStreamingSVEAvailable() &&
5355 "Unexpected register store without SVE store instructions");
5356 Opc
= AArch64::STR_ZXI
;
5357 StackID
= TargetStackID::ScalableVector
;
5361 if (AArch64::DDDRegClass
.hasSubClassEq(RC
)) {
5362 assert(Subtarget
.hasNEON() && "Unexpected register store without NEON");
5363 Opc
= AArch64::ST1Threev1d
;
5368 if (AArch64::DDDDRegClass
.hasSubClassEq(RC
)) {
5369 assert(Subtarget
.hasNEON() && "Unexpected register store without NEON");
5370 Opc
= AArch64::ST1Fourv1d
;
5372 } else if (AArch64::QQRegClass
.hasSubClassEq(RC
)) {
5373 assert(Subtarget
.hasNEON() && "Unexpected register store without NEON");
5374 Opc
= AArch64::ST1Twov2d
;
5376 } else if (AArch64::ZPR2RegClass
.hasSubClassEq(RC
) ||
5377 AArch64::ZPR2StridedOrContiguousRegClass
.hasSubClassEq(RC
)) {
5378 assert(Subtarget
.isSVEorStreamingSVEAvailable() &&
5379 "Unexpected register store without SVE store instructions");
5380 Opc
= AArch64::STR_ZZXI
;
5381 StackID
= TargetStackID::ScalableVector
;
5385 if (AArch64::QQQRegClass
.hasSubClassEq(RC
)) {
5386 assert(Subtarget
.hasNEON() && "Unexpected register store without NEON");
5387 Opc
= AArch64::ST1Threev2d
;
5389 } else if (AArch64::ZPR3RegClass
.hasSubClassEq(RC
)) {
5390 assert(Subtarget
.isSVEorStreamingSVEAvailable() &&
5391 "Unexpected register store without SVE store instructions");
5392 Opc
= AArch64::STR_ZZZXI
;
5393 StackID
= TargetStackID::ScalableVector
;
5397 if (AArch64::QQQQRegClass
.hasSubClassEq(RC
)) {
5398 assert(Subtarget
.hasNEON() && "Unexpected register store without NEON");
5399 Opc
= AArch64::ST1Fourv2d
;
5401 } else if (AArch64::ZPR4RegClass
.hasSubClassEq(RC
) ||
5402 AArch64::ZPR4StridedOrContiguousRegClass
.hasSubClassEq(RC
)) {
5403 assert(Subtarget
.isSVEorStreamingSVEAvailable() &&
5404 "Unexpected register store without SVE store instructions");
5405 Opc
= AArch64::STR_ZZZZXI
;
5406 StackID
= TargetStackID::ScalableVector
;
5410 assert(Opc
&& "Unknown register class");
5411 MFI
.setStackID(FI
, StackID
);
5413 const MachineInstrBuilder MI
= BuildMI(MBB
, MBBI
, DebugLoc(), get(Opc
))
5414 .addReg(SrcReg
, getKillRegState(isKill
))
5419 if (PNRReg
.isValid())
5420 MI
.addDef(PNRReg
, RegState::Implicit
);
5421 MI
.addMemOperand(MMO
);
5424 static void loadRegPairFromStackSlot(const TargetRegisterInfo
&TRI
,
5425 MachineBasicBlock
&MBB
,
5426 MachineBasicBlock::iterator InsertBefore
,
5427 const MCInstrDesc
&MCID
,
5428 Register DestReg
, unsigned SubIdx0
,
5429 unsigned SubIdx1
, int FI
,
5430 MachineMemOperand
*MMO
) {
5431 Register DestReg0
= DestReg
;
5432 Register DestReg1
= DestReg
;
5433 bool IsUndef
= true;
5434 if (DestReg
.isPhysical()) {
5435 DestReg0
= TRI
.getSubReg(DestReg
, SubIdx0
);
5437 DestReg1
= TRI
.getSubReg(DestReg
, SubIdx1
);
5441 BuildMI(MBB
, InsertBefore
, DebugLoc(), MCID
)
5442 .addReg(DestReg0
, RegState::Define
| getUndefRegState(IsUndef
), SubIdx0
)
5443 .addReg(DestReg1
, RegState::Define
| getUndefRegState(IsUndef
), SubIdx1
)
5446 .addMemOperand(MMO
);
5449 void AArch64InstrInfo::loadRegFromStackSlot(
5450 MachineBasicBlock
&MBB
, MachineBasicBlock::iterator MBBI
, Register DestReg
,
5451 int FI
, const TargetRegisterClass
*RC
, const TargetRegisterInfo
*TRI
,
5452 Register VReg
, MachineInstr::MIFlag Flags
) const {
5453 MachineFunction
&MF
= *MBB
.getParent();
5454 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
5455 MachinePointerInfo PtrInfo
= MachinePointerInfo::getFixedStack(MF
, FI
);
5456 MachineMemOperand
*MMO
=
5457 MF
.getMachineMemOperand(PtrInfo
, MachineMemOperand::MOLoad
,
5458 MFI
.getObjectSize(FI
), MFI
.getObjectAlign(FI
));
5462 unsigned StackID
= TargetStackID::Default
;
5463 Register PNRReg
= MCRegister::NoRegister
;
5464 switch (TRI
->getSpillSize(*RC
)) {
5466 if (AArch64::FPR8RegClass
.hasSubClassEq(RC
))
5467 Opc
= AArch64::LDRBui
;
5470 bool IsPNR
= AArch64::PNRRegClass
.hasSubClassEq(RC
);
5471 if (AArch64::FPR16RegClass
.hasSubClassEq(RC
))
5472 Opc
= AArch64::LDRHui
;
5473 else if (IsPNR
|| AArch64::PPRRegClass
.hasSubClassEq(RC
)) {
5474 assert(Subtarget
.isSVEorStreamingSVEAvailable() &&
5475 "Unexpected register load without SVE load instructions");
5478 Opc
= AArch64::LDR_PXI
;
5479 StackID
= TargetStackID::ScalableVector
;
5484 if (AArch64::GPR32allRegClass
.hasSubClassEq(RC
)) {
5485 Opc
= AArch64::LDRWui
;
5486 if (DestReg
.isVirtual())
5487 MF
.getRegInfo().constrainRegClass(DestReg
, &AArch64::GPR32RegClass
);
5489 assert(DestReg
!= AArch64::WSP
);
5490 } else if (AArch64::FPR32RegClass
.hasSubClassEq(RC
))
5491 Opc
= AArch64::LDRSui
;
5492 else if (AArch64::PPR2RegClass
.hasSubClassEq(RC
)) {
5493 Opc
= AArch64::LDR_PPXI
;
5494 StackID
= TargetStackID::ScalableVector
;
5498 if (AArch64::GPR64allRegClass
.hasSubClassEq(RC
)) {
5499 Opc
= AArch64::LDRXui
;
5500 if (DestReg
.isVirtual())
5501 MF
.getRegInfo().constrainRegClass(DestReg
, &AArch64::GPR64RegClass
);
5503 assert(DestReg
!= AArch64::SP
);
5504 } else if (AArch64::FPR64RegClass
.hasSubClassEq(RC
)) {
5505 Opc
= AArch64::LDRDui
;
5506 } else if (AArch64::WSeqPairsClassRegClass
.hasSubClassEq(RC
)) {
5507 loadRegPairFromStackSlot(getRegisterInfo(), MBB
, MBBI
,
5508 get(AArch64::LDPWi
), DestReg
, AArch64::sube32
,
5509 AArch64::subo32
, FI
, MMO
);
5514 if (AArch64::FPR128RegClass
.hasSubClassEq(RC
))
5515 Opc
= AArch64::LDRQui
;
5516 else if (AArch64::DDRegClass
.hasSubClassEq(RC
)) {
5517 assert(Subtarget
.hasNEON() && "Unexpected register load without NEON");
5518 Opc
= AArch64::LD1Twov1d
;
5520 } else if (AArch64::XSeqPairsClassRegClass
.hasSubClassEq(RC
)) {
5521 loadRegPairFromStackSlot(getRegisterInfo(), MBB
, MBBI
,
5522 get(AArch64::LDPXi
), DestReg
, AArch64::sube64
,
5523 AArch64::subo64
, FI
, MMO
);
5525 } else if (AArch64::ZPRRegClass
.hasSubClassEq(RC
)) {
5526 assert(Subtarget
.isSVEorStreamingSVEAvailable() &&
5527 "Unexpected register load without SVE load instructions");
5528 Opc
= AArch64::LDR_ZXI
;
5529 StackID
= TargetStackID::ScalableVector
;
5533 if (AArch64::DDDRegClass
.hasSubClassEq(RC
)) {
5534 assert(Subtarget
.hasNEON() && "Unexpected register load without NEON");
5535 Opc
= AArch64::LD1Threev1d
;
5540 if (AArch64::DDDDRegClass
.hasSubClassEq(RC
)) {
5541 assert(Subtarget
.hasNEON() && "Unexpected register load without NEON");
5542 Opc
= AArch64::LD1Fourv1d
;
5544 } else if (AArch64::QQRegClass
.hasSubClassEq(RC
)) {
5545 assert(Subtarget
.hasNEON() && "Unexpected register load without NEON");
5546 Opc
= AArch64::LD1Twov2d
;
5548 } else if (AArch64::ZPR2RegClass
.hasSubClassEq(RC
) ||
5549 AArch64::ZPR2StridedOrContiguousRegClass
.hasSubClassEq(RC
)) {
5550 assert(Subtarget
.isSVEorStreamingSVEAvailable() &&
5551 "Unexpected register load without SVE load instructions");
5552 Opc
= AArch64::LDR_ZZXI
;
5553 StackID
= TargetStackID::ScalableVector
;
5557 if (AArch64::QQQRegClass
.hasSubClassEq(RC
)) {
5558 assert(Subtarget
.hasNEON() && "Unexpected register load without NEON");
5559 Opc
= AArch64::LD1Threev2d
;
5561 } else if (AArch64::ZPR3RegClass
.hasSubClassEq(RC
)) {
5562 assert(Subtarget
.isSVEorStreamingSVEAvailable() &&
5563 "Unexpected register load without SVE load instructions");
5564 Opc
= AArch64::LDR_ZZZXI
;
5565 StackID
= TargetStackID::ScalableVector
;
5569 if (AArch64::QQQQRegClass
.hasSubClassEq(RC
)) {
5570 assert(Subtarget
.hasNEON() && "Unexpected register load without NEON");
5571 Opc
= AArch64::LD1Fourv2d
;
5573 } else if (AArch64::ZPR4RegClass
.hasSubClassEq(RC
) ||
5574 AArch64::ZPR4StridedOrContiguousRegClass
.hasSubClassEq(RC
)) {
5575 assert(Subtarget
.isSVEorStreamingSVEAvailable() &&
5576 "Unexpected register load without SVE load instructions");
5577 Opc
= AArch64::LDR_ZZZZXI
;
5578 StackID
= TargetStackID::ScalableVector
;
5583 assert(Opc
&& "Unknown register class");
5584 MFI
.setStackID(FI
, StackID
);
5586 const MachineInstrBuilder MI
= BuildMI(MBB
, MBBI
, DebugLoc(), get(Opc
))
5587 .addReg(DestReg
, getDefRegState(true))
5591 if (PNRReg
.isValid() && !PNRReg
.isVirtual())
5592 MI
.addDef(PNRReg
, RegState::Implicit
);
5593 MI
.addMemOperand(MMO
);
5596 bool llvm::isNZCVTouchedInInstructionRange(const MachineInstr
&DefMI
,
5597 const MachineInstr
&UseMI
,
5598 const TargetRegisterInfo
*TRI
) {
5599 return any_of(instructionsWithoutDebug(std::next(DefMI
.getIterator()),
5600 UseMI
.getIterator()),
5601 [TRI
](const MachineInstr
&I
) {
5602 return I
.modifiesRegister(AArch64::NZCV
, TRI
) ||
5603 I
.readsRegister(AArch64::NZCV
, TRI
);
5607 void AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets(
5608 const StackOffset
&Offset
, int64_t &ByteSized
, int64_t &VGSized
) {
5609 // The smallest scalable element supported by scaled SVE addressing
5610 // modes are predicates, which are 2 scalable bytes in size. So the scalable
5611 // byte offset must always be a multiple of 2.
5612 assert(Offset
.getScalable() % 2 == 0 && "Invalid frame offset");
5614 // VGSized offsets are divided by '2', because the VG register is the
5615 // the number of 64bit granules as opposed to 128bit vector chunks,
5616 // which is how the 'n' in e.g. MVT::nxv1i8 is modelled.
5617 // So, for a stack offset of 16 MVT::nxv1i8's, the size is n x 16 bytes.
5618 // VG = n * 2 and the dwarf offset must be VG * 8 bytes.
5619 ByteSized
= Offset
.getFixed();
5620 VGSized
= Offset
.getScalable() / 2;
5623 /// Returns the offset in parts to which this frame offset can be
5624 /// decomposed for the purpose of describing a frame offset.
5625 /// For non-scalable offsets this is simply its byte size.
5626 void AArch64InstrInfo::decomposeStackOffsetForFrameOffsets(
5627 const StackOffset
&Offset
, int64_t &NumBytes
, int64_t &NumPredicateVectors
,
5628 int64_t &NumDataVectors
) {
5629 // The smallest scalable element supported by scaled SVE addressing
5630 // modes are predicates, which are 2 scalable bytes in size. So the scalable
5631 // byte offset must always be a multiple of 2.
5632 assert(Offset
.getScalable() % 2 == 0 && "Invalid frame offset");
5634 NumBytes
= Offset
.getFixed();
5636 NumPredicateVectors
= Offset
.getScalable() / 2;
5637 // This method is used to get the offsets to adjust the frame offset.
5638 // If the function requires ADDPL to be used and needs more than two ADDPL
5639 // instructions, part of the offset is folded into NumDataVectors so that it
5640 // uses ADDVL for part of it, reducing the number of ADDPL instructions.
5641 if (NumPredicateVectors
% 8 == 0 || NumPredicateVectors
< -64 ||
5642 NumPredicateVectors
> 62) {
5643 NumDataVectors
= NumPredicateVectors
/ 8;
5644 NumPredicateVectors
-= NumDataVectors
* 8;
5648 // Convenience function to create a DWARF expression for
5649 // Expr + NumBytes + NumVGScaledBytes * AArch64::VG
5650 static void appendVGScaledOffsetExpr(SmallVectorImpl
<char> &Expr
, int NumBytes
,
5651 int NumVGScaledBytes
, unsigned VG
,
5652 llvm::raw_string_ostream
&Comment
) {
5656 Expr
.push_back(dwarf::DW_OP_consts
);
5657 Expr
.append(buffer
, buffer
+ encodeSLEB128(NumBytes
, buffer
));
5658 Expr
.push_back((uint8_t)dwarf::DW_OP_plus
);
5659 Comment
<< (NumBytes
< 0 ? " - " : " + ") << std::abs(NumBytes
);
5662 if (NumVGScaledBytes
) {
5663 Expr
.push_back((uint8_t)dwarf::DW_OP_consts
);
5664 Expr
.append(buffer
, buffer
+ encodeSLEB128(NumVGScaledBytes
, buffer
));
5666 Expr
.push_back((uint8_t)dwarf::DW_OP_bregx
);
5667 Expr
.append(buffer
, buffer
+ encodeULEB128(VG
, buffer
));
5670 Expr
.push_back((uint8_t)dwarf::DW_OP_mul
);
5671 Expr
.push_back((uint8_t)dwarf::DW_OP_plus
);
5673 Comment
<< (NumVGScaledBytes
< 0 ? " - " : " + ")
5674 << std::abs(NumVGScaledBytes
) << " * VG";
5678 // Creates an MCCFIInstruction:
5679 // { DW_CFA_def_cfa_expression, ULEB128 (sizeof expr), expr }
5680 static MCCFIInstruction
createDefCFAExpression(const TargetRegisterInfo
&TRI
,
5682 const StackOffset
&Offset
) {
5683 int64_t NumBytes
, NumVGScaledBytes
;
5684 AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets(Offset
, NumBytes
,
5686 std::string CommentBuffer
;
5687 llvm::raw_string_ostream
Comment(CommentBuffer
);
5689 if (Reg
== AArch64::SP
)
5691 else if (Reg
== AArch64::FP
)
5694 Comment
<< printReg(Reg
, &TRI
);
5696 // Build up the expression (Reg + NumBytes + NumVGScaledBytes * AArch64::VG)
5697 SmallString
<64> Expr
;
5698 unsigned DwarfReg
= TRI
.getDwarfRegNum(Reg
, true);
5699 Expr
.push_back((uint8_t)(dwarf::DW_OP_breg0
+ DwarfReg
));
5701 appendVGScaledOffsetExpr(Expr
, NumBytes
, NumVGScaledBytes
,
5702 TRI
.getDwarfRegNum(AArch64::VG
, true), Comment
);
5704 // Wrap this into DW_CFA_def_cfa.
5705 SmallString
<64> DefCfaExpr
;
5706 DefCfaExpr
.push_back(dwarf::DW_CFA_def_cfa_expression
);
5708 DefCfaExpr
.append(buffer
, buffer
+ encodeULEB128(Expr
.size(), buffer
));
5709 DefCfaExpr
.append(Expr
.str());
5710 return MCCFIInstruction::createEscape(nullptr, DefCfaExpr
.str(), SMLoc(),
5714 MCCFIInstruction
llvm::createDefCFA(const TargetRegisterInfo
&TRI
,
5715 unsigned FrameReg
, unsigned Reg
,
5716 const StackOffset
&Offset
,
5717 bool LastAdjustmentWasScalable
) {
5718 if (Offset
.getScalable())
5719 return createDefCFAExpression(TRI
, Reg
, Offset
);
5721 if (FrameReg
== Reg
&& !LastAdjustmentWasScalable
)
5722 return MCCFIInstruction::cfiDefCfaOffset(nullptr, int(Offset
.getFixed()));
5724 unsigned DwarfReg
= TRI
.getDwarfRegNum(Reg
, true);
5725 return MCCFIInstruction::cfiDefCfa(nullptr, DwarfReg
, (int)Offset
.getFixed());
5728 MCCFIInstruction
llvm::createCFAOffset(const TargetRegisterInfo
&TRI
,
5730 const StackOffset
&OffsetFromDefCFA
) {
5731 int64_t NumBytes
, NumVGScaledBytes
;
5732 AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets(
5733 OffsetFromDefCFA
, NumBytes
, NumVGScaledBytes
);
5735 unsigned DwarfReg
= TRI
.getDwarfRegNum(Reg
, true);
5737 // Non-scalable offsets can use DW_CFA_offset directly.
5738 if (!NumVGScaledBytes
)
5739 return MCCFIInstruction::createOffset(nullptr, DwarfReg
, NumBytes
);
5741 std::string CommentBuffer
;
5742 llvm::raw_string_ostream
Comment(CommentBuffer
);
5743 Comment
<< printReg(Reg
, &TRI
) << " @ cfa";
5745 // Build up expression (NumBytes + NumVGScaledBytes * AArch64::VG)
5746 SmallString
<64> OffsetExpr
;
5747 appendVGScaledOffsetExpr(OffsetExpr
, NumBytes
, NumVGScaledBytes
,
5748 TRI
.getDwarfRegNum(AArch64::VG
, true), Comment
);
5750 // Wrap this into DW_CFA_expression
5751 SmallString
<64> CfaExpr
;
5752 CfaExpr
.push_back(dwarf::DW_CFA_expression
);
5754 CfaExpr
.append(buffer
, buffer
+ encodeULEB128(DwarfReg
, buffer
));
5755 CfaExpr
.append(buffer
, buffer
+ encodeULEB128(OffsetExpr
.size(), buffer
));
5756 CfaExpr
.append(OffsetExpr
.str());
5758 return MCCFIInstruction::createEscape(nullptr, CfaExpr
.str(), SMLoc(),
5762 // Helper function to emit a frame offset adjustment from a given
5763 // pointer (SrcReg), stored into DestReg. This function is explicit
5764 // in that it requires the opcode.
5765 static void emitFrameOffsetAdj(MachineBasicBlock
&MBB
,
5766 MachineBasicBlock::iterator MBBI
,
5767 const DebugLoc
&DL
, unsigned DestReg
,
5768 unsigned SrcReg
, int64_t Offset
, unsigned Opc
,
5769 const TargetInstrInfo
*TII
,
5770 MachineInstr::MIFlag Flag
, bool NeedsWinCFI
,
5771 bool *HasWinCFI
, bool EmitCFAOffset
,
5772 StackOffset CFAOffset
, unsigned FrameReg
) {
5774 unsigned MaxEncoding
, ShiftSize
;
5776 case AArch64::ADDXri
:
5777 case AArch64::ADDSXri
:
5778 case AArch64::SUBXri
:
5779 case AArch64::SUBSXri
:
5780 MaxEncoding
= 0xfff;
5783 case AArch64::ADDVL_XXI
:
5784 case AArch64::ADDPL_XXI
:
5785 case AArch64::ADDSVL_XXI
:
5786 case AArch64::ADDSPL_XXI
:
5796 llvm_unreachable("Unsupported opcode");
5799 // `Offset` can be in bytes or in "scalable bytes".
5801 if (Opc
== AArch64::ADDVL_XXI
|| Opc
== AArch64::ADDSVL_XXI
)
5803 else if (Opc
== AArch64::ADDPL_XXI
|| Opc
== AArch64::ADDSPL_XXI
)
5806 // FIXME: If the offset won't fit in 24-bits, compute the offset into a
5807 // scratch register. If DestReg is a virtual register, use it as the
5808 // scratch register; otherwise, create a new virtual register (to be
5809 // replaced by the scavenger at the end of PEI). That case can be optimized
5810 // slightly if DestReg is SP which is always 16-byte aligned, so the scratch
5811 // register can be loaded with offset%8 and the add/sub can use an extending
5812 // instruction with LSL#3.
5813 // Currently the function handles any offsets but generates a poor sequence
5815 // assert(Offset < (1 << 24) && "unimplemented reg plus immediate");
5817 const unsigned MaxEncodableValue
= MaxEncoding
<< ShiftSize
;
5818 Register TmpReg
= DestReg
;
5819 if (TmpReg
== AArch64::XZR
)
5820 TmpReg
= MBB
.getParent()->getRegInfo().createVirtualRegister(
5821 &AArch64::GPR64RegClass
);
5823 uint64_t ThisVal
= std::min
<uint64_t>(Offset
, MaxEncodableValue
);
5824 unsigned LocalShiftSize
= 0;
5825 if (ThisVal
> MaxEncoding
) {
5826 ThisVal
= ThisVal
>> ShiftSize
;
5827 LocalShiftSize
= ShiftSize
;
5829 assert((ThisVal
>> ShiftSize
) <= MaxEncoding
&&
5830 "Encoding cannot handle value that big");
5832 Offset
-= ThisVal
<< LocalShiftSize
;
5835 auto MBI
= BuildMI(MBB
, MBBI
, DL
, TII
->get(Opc
), TmpReg
)
5837 .addImm(Sign
* (int)ThisVal
);
5840 AArch64_AM::getShifterImm(AArch64_AM::LSL
, LocalShiftSize
));
5841 MBI
= MBI
.setMIFlag(Flag
);
5845 ? StackOffset::getFixed(ThisVal
<< LocalShiftSize
)
5846 : StackOffset::getScalable(VScale
* (ThisVal
<< LocalShiftSize
));
5847 if (Sign
== -1 || Opc
== AArch64::SUBXri
|| Opc
== AArch64::SUBSXri
)
5848 CFAOffset
+= Change
;
5850 CFAOffset
-= Change
;
5851 if (EmitCFAOffset
&& DestReg
== TmpReg
) {
5852 MachineFunction
&MF
= *MBB
.getParent();
5853 const TargetSubtargetInfo
&STI
= MF
.getSubtarget();
5854 const TargetRegisterInfo
&TRI
= *STI
.getRegisterInfo();
5856 unsigned CFIIndex
= MF
.addFrameInst(
5857 createDefCFA(TRI
, FrameReg
, DestReg
, CFAOffset
, VScale
!= 1));
5858 BuildMI(MBB
, MBBI
, DL
, TII
->get(TargetOpcode::CFI_INSTRUCTION
))
5859 .addCFIIndex(CFIIndex
)
5864 assert(Sign
== 1 && "SEH directives should always have a positive sign");
5865 int Imm
= (int)(ThisVal
<< LocalShiftSize
);
5866 if ((DestReg
== AArch64::FP
&& SrcReg
== AArch64::SP
) ||
5867 (SrcReg
== AArch64::FP
&& DestReg
== AArch64::SP
)) {
5871 BuildMI(MBB
, MBBI
, DL
, TII
->get(AArch64::SEH_SetFP
)).setMIFlag(Flag
);
5873 BuildMI(MBB
, MBBI
, DL
, TII
->get(AArch64::SEH_AddFP
))
5876 assert(Offset
== 0 && "Expected remaining offset to be zero to "
5877 "emit a single SEH directive");
5878 } else if (DestReg
== AArch64::SP
) {
5881 assert(SrcReg
== AArch64::SP
&& "Unexpected SrcReg for SEH_StackAlloc");
5882 BuildMI(MBB
, MBBI
, DL
, TII
->get(AArch64::SEH_StackAlloc
))
5892 void llvm::emitFrameOffset(MachineBasicBlock
&MBB
,
5893 MachineBasicBlock::iterator MBBI
, const DebugLoc
&DL
,
5894 unsigned DestReg
, unsigned SrcReg
,
5895 StackOffset Offset
, const TargetInstrInfo
*TII
,
5896 MachineInstr::MIFlag Flag
, bool SetNZCV
,
5897 bool NeedsWinCFI
, bool *HasWinCFI
,
5898 bool EmitCFAOffset
, StackOffset CFAOffset
,
5899 unsigned FrameReg
) {
5900 // If a function is marked as arm_locally_streaming, then the runtime value of
5901 // vscale in the prologue/epilogue is different the runtime value of vscale
5902 // in the function's body. To avoid having to consider multiple vscales,
5903 // we can use `addsvl` to allocate any scalable stack-slots, which under
5904 // most circumstances will be only locals, not callee-save slots.
5905 const Function
&F
= MBB
.getParent()->getFunction();
5906 bool UseSVL
= F
.hasFnAttribute("aarch64_pstate_sm_body");
5908 int64_t Bytes
, NumPredicateVectors
, NumDataVectors
;
5909 AArch64InstrInfo::decomposeStackOffsetForFrameOffsets(
5910 Offset
, Bytes
, NumPredicateVectors
, NumDataVectors
);
5912 // First emit non-scalable frame offsets, or a simple 'mov'.
5913 if (Bytes
|| (!Offset
&& SrcReg
!= DestReg
)) {
5914 assert((DestReg
!= AArch64::SP
|| Bytes
% 8 == 0) &&
5915 "SP increment/decrement not 8-byte aligned");
5916 unsigned Opc
= SetNZCV
? AArch64::ADDSXri
: AArch64::ADDXri
;
5919 Opc
= SetNZCV
? AArch64::SUBSXri
: AArch64::SUBXri
;
5921 emitFrameOffsetAdj(MBB
, MBBI
, DL
, DestReg
, SrcReg
, Bytes
, Opc
, TII
, Flag
,
5922 NeedsWinCFI
, HasWinCFI
, EmitCFAOffset
, CFAOffset
,
5924 CFAOffset
+= (Opc
== AArch64::ADDXri
|| Opc
== AArch64::ADDSXri
)
5925 ? StackOffset::getFixed(-Bytes
)
5926 : StackOffset::getFixed(Bytes
);
5931 assert(!(SetNZCV
&& (NumPredicateVectors
|| NumDataVectors
)) &&
5932 "SetNZCV not supported with SVE vectors");
5933 assert(!(NeedsWinCFI
&& (NumPredicateVectors
|| NumDataVectors
)) &&
5934 "WinCFI not supported with SVE vectors");
5936 if (NumDataVectors
) {
5937 emitFrameOffsetAdj(MBB
, MBBI
, DL
, DestReg
, SrcReg
, NumDataVectors
,
5938 UseSVL
? AArch64::ADDSVL_XXI
: AArch64::ADDVL_XXI
,
5939 TII
, Flag
, NeedsWinCFI
, nullptr, EmitCFAOffset
,
5940 CFAOffset
, FrameReg
);
5941 CFAOffset
+= StackOffset::getScalable(-NumDataVectors
* 16);
5945 if (NumPredicateVectors
) {
5946 assert(DestReg
!= AArch64::SP
&& "Unaligned access to SP");
5947 emitFrameOffsetAdj(MBB
, MBBI
, DL
, DestReg
, SrcReg
, NumPredicateVectors
,
5948 UseSVL
? AArch64::ADDSPL_XXI
: AArch64::ADDPL_XXI
,
5949 TII
, Flag
, NeedsWinCFI
, nullptr, EmitCFAOffset
,
5950 CFAOffset
, FrameReg
);
5954 MachineInstr
*AArch64InstrInfo::foldMemoryOperandImpl(
5955 MachineFunction
&MF
, MachineInstr
&MI
, ArrayRef
<unsigned> Ops
,
5956 MachineBasicBlock::iterator InsertPt
, int FrameIndex
,
5957 LiveIntervals
*LIS
, VirtRegMap
*VRM
) const {
5958 // This is a bit of a hack. Consider this instruction:
5960 // %0 = COPY %sp; GPR64all:%0
5962 // We explicitly chose GPR64all for the virtual register so such a copy might
5963 // be eliminated by RegisterCoalescer. However, that may not be possible, and
5964 // %0 may even spill. We can't spill %sp, and since it is in the GPR64all
5965 // register class, TargetInstrInfo::foldMemoryOperand() is going to try.
5967 // To prevent that, we are going to constrain the %0 register class here.
5968 if (MI
.isFullCopy()) {
5969 Register DstReg
= MI
.getOperand(0).getReg();
5970 Register SrcReg
= MI
.getOperand(1).getReg();
5971 if (SrcReg
== AArch64::SP
&& DstReg
.isVirtual()) {
5972 MF
.getRegInfo().constrainRegClass(DstReg
, &AArch64::GPR64RegClass
);
5975 if (DstReg
== AArch64::SP
&& SrcReg
.isVirtual()) {
5976 MF
.getRegInfo().constrainRegClass(SrcReg
, &AArch64::GPR64RegClass
);
5979 // Nothing can folded with copy from/to NZCV.
5980 if (SrcReg
== AArch64::NZCV
|| DstReg
== AArch64::NZCV
)
5984 // Handle the case where a copy is being spilled or filled but the source
5985 // and destination register class don't match. For example:
5987 // %0 = COPY %xzr; GPR64common:%0
5989 // In this case we can still safely fold away the COPY and generate the
5990 // following spill code:
5992 // STRXui %xzr, %stack.0
5994 // This also eliminates spilled cross register class COPYs (e.g. between x and
5995 // d regs) of the same size. For example:
5997 // %0 = COPY %1; GPR64:%0, FPR64:%1
5999 // will be filled as
6001 // LDRDui %0, fi<#0>
6005 // LDRXui %Temp, fi<#0>
6008 if (MI
.isCopy() && Ops
.size() == 1 &&
6009 // Make sure we're only folding the explicit COPY defs/uses.
6010 (Ops
[0] == 0 || Ops
[0] == 1)) {
6011 bool IsSpill
= Ops
[0] == 0;
6012 bool IsFill
= !IsSpill
;
6013 const TargetRegisterInfo
&TRI
= *MF
.getSubtarget().getRegisterInfo();
6014 const MachineRegisterInfo
&MRI
= MF
.getRegInfo();
6015 MachineBasicBlock
&MBB
= *MI
.getParent();
6016 const MachineOperand
&DstMO
= MI
.getOperand(0);
6017 const MachineOperand
&SrcMO
= MI
.getOperand(1);
6018 Register DstReg
= DstMO
.getReg();
6019 Register SrcReg
= SrcMO
.getReg();
6020 // This is slightly expensive to compute for physical regs since
6021 // getMinimalPhysRegClass is slow.
6022 auto getRegClass
= [&](unsigned Reg
) {
6023 return Register::isVirtualRegister(Reg
) ? MRI
.getRegClass(Reg
)
6024 : TRI
.getMinimalPhysRegClass(Reg
);
6027 if (DstMO
.getSubReg() == 0 && SrcMO
.getSubReg() == 0) {
6028 assert(TRI
.getRegSizeInBits(*getRegClass(DstReg
)) ==
6029 TRI
.getRegSizeInBits(*getRegClass(SrcReg
)) &&
6030 "Mismatched register size in non subreg COPY");
6032 storeRegToStackSlot(MBB
, InsertPt
, SrcReg
, SrcMO
.isKill(), FrameIndex
,
6033 getRegClass(SrcReg
), &TRI
, Register());
6035 loadRegFromStackSlot(MBB
, InsertPt
, DstReg
, FrameIndex
,
6036 getRegClass(DstReg
), &TRI
, Register());
6037 return &*--InsertPt
;
6040 // Handle cases like spilling def of:
6042 // %0:sub_32<def,read-undef> = COPY %wzr; GPR64common:%0
6044 // where the physical register source can be widened and stored to the full
6045 // virtual reg destination stack slot, in this case producing:
6047 // STRXui %xzr, %stack.0
6049 if (IsSpill
&& DstMO
.isUndef() && SrcReg
== AArch64::WZR
&&
6050 TRI
.getRegSizeInBits(*getRegClass(DstReg
)) == 64) {
6051 assert(SrcMO
.getSubReg() == 0 &&
6052 "Unexpected subreg on physical register");
6053 storeRegToStackSlot(MBB
, InsertPt
, AArch64::XZR
, SrcMO
.isKill(),
6054 FrameIndex
, &AArch64::GPR64RegClass
, &TRI
,
6056 return &*--InsertPt
;
6059 // Handle cases like filling use of:
6061 // %0:sub_32<def,read-undef> = COPY %1; GPR64:%0, GPR32:%1
6063 // where we can load the full virtual reg source stack slot, into the subreg
6064 // destination, in this case producing:
6066 // LDRWui %0:sub_32<def,read-undef>, %stack.0
6068 if (IsFill
&& SrcMO
.getSubReg() == 0 && DstMO
.isUndef()) {
6069 const TargetRegisterClass
*FillRC
;
6070 switch (DstMO
.getSubReg()) {
6074 case AArch64::sub_32
:
6075 FillRC
= &AArch64::GPR32RegClass
;
6078 FillRC
= &AArch64::FPR32RegClass
;
6081 FillRC
= &AArch64::FPR64RegClass
;
6086 assert(TRI
.getRegSizeInBits(*getRegClass(SrcReg
)) ==
6087 TRI
.getRegSizeInBits(*FillRC
) &&
6088 "Mismatched regclass size on folded subreg COPY");
6089 loadRegFromStackSlot(MBB
, InsertPt
, DstReg
, FrameIndex
, FillRC
, &TRI
,
6091 MachineInstr
&LoadMI
= *--InsertPt
;
6092 MachineOperand
&LoadDst
= LoadMI
.getOperand(0);
6093 assert(LoadDst
.getSubReg() == 0 && "unexpected subreg on fill load");
6094 LoadDst
.setSubReg(DstMO
.getSubReg());
6095 LoadDst
.setIsUndef();
6105 int llvm::isAArch64FrameOffsetLegal(const MachineInstr
&MI
,
6106 StackOffset
&SOffset
,
6107 bool *OutUseUnscaledOp
,
6108 unsigned *OutUnscaledOp
,
6109 int64_t *EmittableOffset
) {
6110 // Set output values in case of early exit.
6111 if (EmittableOffset
)
6112 *EmittableOffset
= 0;
6113 if (OutUseUnscaledOp
)
6114 *OutUseUnscaledOp
= false;
6118 // Exit early for structured vector spills/fills as they can't take an
6119 // immediate offset.
6120 switch (MI
.getOpcode()) {
6123 case AArch64::LD1Rv1d
:
6124 case AArch64::LD1Rv2s
:
6125 case AArch64::LD1Rv2d
:
6126 case AArch64::LD1Rv4h
:
6127 case AArch64::LD1Rv4s
:
6128 case AArch64::LD1Rv8b
:
6129 case AArch64::LD1Rv8h
:
6130 case AArch64::LD1Rv16b
:
6131 case AArch64::LD1Twov2d
:
6132 case AArch64::LD1Threev2d
:
6133 case AArch64::LD1Fourv2d
:
6134 case AArch64::LD1Twov1d
:
6135 case AArch64::LD1Threev1d
:
6136 case AArch64::LD1Fourv1d
:
6137 case AArch64::ST1Twov2d
:
6138 case AArch64::ST1Threev2d
:
6139 case AArch64::ST1Fourv2d
:
6140 case AArch64::ST1Twov1d
:
6141 case AArch64::ST1Threev1d
:
6142 case AArch64::ST1Fourv1d
:
6143 case AArch64::ST1i8
:
6144 case AArch64::ST1i16
:
6145 case AArch64::ST1i32
:
6146 case AArch64::ST1i64
:
6148 case AArch64::IRGstack
:
6149 case AArch64::STGloop
:
6150 case AArch64::STZGloop
:
6151 return AArch64FrameOffsetCannotUpdate
;
6154 // Get the min/max offset and the scale.
6155 TypeSize
ScaleValue(0U, false), Width(0U, false);
6156 int64_t MinOff
, MaxOff
;
6157 if (!AArch64InstrInfo::getMemOpInfo(MI
.getOpcode(), ScaleValue
, Width
, MinOff
,
6159 llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal");
6161 // Construct the complete offset.
6162 bool IsMulVL
= ScaleValue
.isScalable();
6163 unsigned Scale
= ScaleValue
.getKnownMinValue();
6164 int64_t Offset
= IsMulVL
? SOffset
.getScalable() : SOffset
.getFixed();
6166 const MachineOperand
&ImmOpnd
=
6167 MI
.getOperand(AArch64InstrInfo::getLoadStoreImmIdx(MI
.getOpcode()));
6168 Offset
+= ImmOpnd
.getImm() * Scale
;
6170 // If the offset doesn't match the scale, we rewrite the instruction to
6171 // use the unscaled instruction instead. Likewise, if we have a negative
6172 // offset and there is an unscaled op to use.
6173 std::optional
<unsigned> UnscaledOp
=
6174 AArch64InstrInfo::getUnscaledLdSt(MI
.getOpcode());
6175 bool useUnscaledOp
= UnscaledOp
&& (Offset
% Scale
|| Offset
< 0);
6176 if (useUnscaledOp
&&
6177 !AArch64InstrInfo::getMemOpInfo(*UnscaledOp
, ScaleValue
, Width
, MinOff
,
6179 llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal");
6181 Scale
= ScaleValue
.getKnownMinValue();
6182 assert(IsMulVL
== ScaleValue
.isScalable() &&
6183 "Unscaled opcode has different value for scalable");
6185 int64_t Remainder
= Offset
% Scale
;
6186 assert(!(Remainder
&& useUnscaledOp
) &&
6187 "Cannot have remainder when using unscaled op");
6189 assert(MinOff
< MaxOff
&& "Unexpected Min/Max offsets");
6190 int64_t NewOffset
= Offset
/ Scale
;
6191 if (MinOff
<= NewOffset
&& NewOffset
<= MaxOff
)
6194 NewOffset
= NewOffset
< 0 ? MinOff
: MaxOff
;
6195 Offset
= Offset
- (NewOffset
* Scale
);
6198 if (EmittableOffset
)
6199 *EmittableOffset
= NewOffset
;
6200 if (OutUseUnscaledOp
)
6201 *OutUseUnscaledOp
= useUnscaledOp
;
6202 if (OutUnscaledOp
&& UnscaledOp
)
6203 *OutUnscaledOp
= *UnscaledOp
;
6206 SOffset
= StackOffset::get(SOffset
.getFixed(), Offset
);
6208 SOffset
= StackOffset::get(Offset
, SOffset
.getScalable());
6209 return AArch64FrameOffsetCanUpdate
|
6210 (SOffset
? 0 : AArch64FrameOffsetIsLegal
);
6213 bool llvm::rewriteAArch64FrameIndex(MachineInstr
&MI
, unsigned FrameRegIdx
,
6214 unsigned FrameReg
, StackOffset
&Offset
,
6215 const AArch64InstrInfo
*TII
) {
6216 unsigned Opcode
= MI
.getOpcode();
6217 unsigned ImmIdx
= FrameRegIdx
+ 1;
6219 if (Opcode
== AArch64::ADDSXri
|| Opcode
== AArch64::ADDXri
) {
6220 Offset
+= StackOffset::getFixed(MI
.getOperand(ImmIdx
).getImm());
6221 emitFrameOffset(*MI
.getParent(), MI
, MI
.getDebugLoc(),
6222 MI
.getOperand(0).getReg(), FrameReg
, Offset
, TII
,
6223 MachineInstr::NoFlags
, (Opcode
== AArch64::ADDSXri
));
6224 MI
.eraseFromParent();
6225 Offset
= StackOffset();
6230 unsigned UnscaledOp
;
6232 int Status
= isAArch64FrameOffsetLegal(MI
, Offset
, &UseUnscaledOp
,
6233 &UnscaledOp
, &NewOffset
);
6234 if (Status
& AArch64FrameOffsetCanUpdate
) {
6235 if (Status
& AArch64FrameOffsetIsLegal
)
6236 // Replace the FrameIndex with FrameReg.
6237 MI
.getOperand(FrameRegIdx
).ChangeToRegister(FrameReg
, false);
6239 MI
.setDesc(TII
->get(UnscaledOp
));
6241 MI
.getOperand(ImmIdx
).ChangeToImmediate(NewOffset
);
6248 void AArch64InstrInfo::insertNoop(MachineBasicBlock
&MBB
,
6249 MachineBasicBlock::iterator MI
) const {
6251 BuildMI(MBB
, MI
, DL
, get(AArch64::HINT
)).addImm(0);
6254 MCInst
AArch64InstrInfo::getNop() const {
6255 return MCInstBuilder(AArch64::HINT
).addImm(0);
6258 // AArch64 supports MachineCombiner.
6259 bool AArch64InstrInfo::useMachineCombiner() const { return true; }
6261 // True when Opc sets flag
6262 static bool isCombineInstrSettingFlag(unsigned Opc
) {
6264 case AArch64::ADDSWrr
:
6265 case AArch64::ADDSWri
:
6266 case AArch64::ADDSXrr
:
6267 case AArch64::ADDSXri
:
6268 case AArch64::SUBSWrr
:
6269 case AArch64::SUBSXrr
:
6270 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
6271 case AArch64::SUBSWri
:
6272 case AArch64::SUBSXri
:
6280 // 32b Opcodes that can be combined with a MUL
6281 static bool isCombineInstrCandidate32(unsigned Opc
) {
6283 case AArch64::ADDWrr
:
6284 case AArch64::ADDWri
:
6285 case AArch64::SUBWrr
:
6286 case AArch64::ADDSWrr
:
6287 case AArch64::ADDSWri
:
6288 case AArch64::SUBSWrr
:
6289 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
6290 case AArch64::SUBWri
:
6291 case AArch64::SUBSWri
:
6299 // 64b Opcodes that can be combined with a MUL
6300 static bool isCombineInstrCandidate64(unsigned Opc
) {
6302 case AArch64::ADDXrr
:
6303 case AArch64::ADDXri
:
6304 case AArch64::SUBXrr
:
6305 case AArch64::ADDSXrr
:
6306 case AArch64::ADDSXri
:
6307 case AArch64::SUBSXrr
:
6308 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
6309 case AArch64::SUBXri
:
6310 case AArch64::SUBSXri
:
6311 case AArch64::ADDv8i8
:
6312 case AArch64::ADDv16i8
:
6313 case AArch64::ADDv4i16
:
6314 case AArch64::ADDv8i16
:
6315 case AArch64::ADDv2i32
:
6316 case AArch64::ADDv4i32
:
6317 case AArch64::SUBv8i8
:
6318 case AArch64::SUBv16i8
:
6319 case AArch64::SUBv4i16
:
6320 case AArch64::SUBv8i16
:
6321 case AArch64::SUBv2i32
:
6322 case AArch64::SUBv4i32
:
6330 // FP Opcodes that can be combined with a FMUL.
6331 static bool isCombineInstrCandidateFP(const MachineInstr
&Inst
) {
6332 switch (Inst
.getOpcode()) {
6335 case AArch64::FADDHrr
:
6336 case AArch64::FADDSrr
:
6337 case AArch64::FADDDrr
:
6338 case AArch64::FADDv4f16
:
6339 case AArch64::FADDv8f16
:
6340 case AArch64::FADDv2f32
:
6341 case AArch64::FADDv2f64
:
6342 case AArch64::FADDv4f32
:
6343 case AArch64::FSUBHrr
:
6344 case AArch64::FSUBSrr
:
6345 case AArch64::FSUBDrr
:
6346 case AArch64::FSUBv4f16
:
6347 case AArch64::FSUBv8f16
:
6348 case AArch64::FSUBv2f32
:
6349 case AArch64::FSUBv2f64
:
6350 case AArch64::FSUBv4f32
:
6351 TargetOptions Options
= Inst
.getParent()->getParent()->getTarget().Options
;
6352 // We can fuse FADD/FSUB with FMUL, if fusion is either allowed globally by
6353 // the target options or if FADD/FSUB has the contract fast-math flag.
6354 return Options
.UnsafeFPMath
||
6355 Options
.AllowFPOpFusion
== FPOpFusion::Fast
||
6356 Inst
.getFlag(MachineInstr::FmContract
);
6362 // Opcodes that can be combined with a MUL
6363 static bool isCombineInstrCandidate(unsigned Opc
) {
6364 return (isCombineInstrCandidate32(Opc
) || isCombineInstrCandidate64(Opc
));
6368 // Utility routine that checks if \param MO is defined by an
6369 // \param CombineOpc instruction in the basic block \param MBB
6370 static bool canCombine(MachineBasicBlock
&MBB
, MachineOperand
&MO
,
6371 unsigned CombineOpc
, unsigned ZeroReg
= 0,
6372 bool CheckZeroReg
= false) {
6373 MachineRegisterInfo
&MRI
= MBB
.getParent()->getRegInfo();
6374 MachineInstr
*MI
= nullptr;
6376 if (MO
.isReg() && MO
.getReg().isVirtual())
6377 MI
= MRI
.getUniqueVRegDef(MO
.getReg());
6378 // And it needs to be in the trace (otherwise, it won't have a depth).
6379 if (!MI
|| MI
->getParent() != &MBB
|| (unsigned)MI
->getOpcode() != CombineOpc
)
6381 // Must only used by the user we combine with.
6382 if (!MRI
.hasOneNonDBGUse(MI
->getOperand(0).getReg()))
6386 assert(MI
->getNumOperands() >= 4 && MI
->getOperand(0).isReg() &&
6387 MI
->getOperand(1).isReg() && MI
->getOperand(2).isReg() &&
6388 MI
->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs");
6389 // The third input reg must be zero.
6390 if (MI
->getOperand(3).getReg() != ZeroReg
)
6394 if (isCombineInstrSettingFlag(CombineOpc
) &&
6395 MI
->findRegisterDefOperandIdx(AArch64::NZCV
, /*TRI=*/nullptr, true) == -1)
6402 // Is \param MO defined by an integer multiply and can be combined?
6403 static bool canCombineWithMUL(MachineBasicBlock
&MBB
, MachineOperand
&MO
,
6404 unsigned MulOpc
, unsigned ZeroReg
) {
6405 return canCombine(MBB
, MO
, MulOpc
, ZeroReg
, true);
6409 // Is \param MO defined by a floating-point multiply and can be combined?
6410 static bool canCombineWithFMUL(MachineBasicBlock
&MBB
, MachineOperand
&MO
,
6412 return canCombine(MBB
, MO
, MulOpc
);
6415 // TODO: There are many more machine instruction opcodes to match:
6416 // 1. Other data types (integer, vectors)
6417 // 2. Other math / logic operations (xor, or)
6418 // 3. Other forms of the same operation (intrinsics and other variants)
6419 bool AArch64InstrInfo::isAssociativeAndCommutative(const MachineInstr
&Inst
,
6420 bool Invert
) const {
6423 switch (Inst
.getOpcode()) {
6424 // == Floating-point types ==
6425 // -- Floating-point instructions --
6426 case AArch64::FADDHrr
:
6427 case AArch64::FADDSrr
:
6428 case AArch64::FADDDrr
:
6429 case AArch64::FMULHrr
:
6430 case AArch64::FMULSrr
:
6431 case AArch64::FMULDrr
:
6432 case AArch64::FMULX16
:
6433 case AArch64::FMULX32
:
6434 case AArch64::FMULX64
:
6435 // -- Advanced SIMD instructions --
6436 case AArch64::FADDv4f16
:
6437 case AArch64::FADDv8f16
:
6438 case AArch64::FADDv2f32
:
6439 case AArch64::FADDv4f32
:
6440 case AArch64::FADDv2f64
:
6441 case AArch64::FMULv4f16
:
6442 case AArch64::FMULv8f16
:
6443 case AArch64::FMULv2f32
:
6444 case AArch64::FMULv4f32
:
6445 case AArch64::FMULv2f64
:
6446 case AArch64::FMULXv4f16
:
6447 case AArch64::FMULXv8f16
:
6448 case AArch64::FMULXv2f32
:
6449 case AArch64::FMULXv4f32
:
6450 case AArch64::FMULXv2f64
:
6451 // -- SVE instructions --
6452 // Opcodes FMULX_ZZZ_? don't exist because there is no unpredicated FMULX
6453 // in the SVE instruction set (though there are predicated ones).
6454 case AArch64::FADD_ZZZ_H
:
6455 case AArch64::FADD_ZZZ_S
:
6456 case AArch64::FADD_ZZZ_D
:
6457 case AArch64::FMUL_ZZZ_H
:
6458 case AArch64::FMUL_ZZZ_S
:
6459 case AArch64::FMUL_ZZZ_D
:
6460 return Inst
.getParent()->getParent()->getTarget().Options
.UnsafeFPMath
||
6461 (Inst
.getFlag(MachineInstr::MIFlag::FmReassoc
) &&
6462 Inst
.getFlag(MachineInstr::MIFlag::FmNsz
));
6464 // == Integer types ==
6465 // -- Base instructions --
6466 // Opcodes MULWrr and MULXrr don't exist because
6467 // `MUL <Wd>, <Wn>, <Wm>` and `MUL <Xd>, <Xn>, <Xm>` are aliases of
6468 // `MADD <Wd>, <Wn>, <Wm>, WZR` and `MADD <Xd>, <Xn>, <Xm>, XZR` respectively.
6469 // The machine-combiner does not support three-source-operands machine
6470 // instruction. So we cannot reassociate MULs.
6471 case AArch64::ADDWrr
:
6472 case AArch64::ADDXrr
:
6473 case AArch64::ANDWrr
:
6474 case AArch64::ANDXrr
:
6475 case AArch64::ORRWrr
:
6476 case AArch64::ORRXrr
:
6477 case AArch64::EORWrr
:
6478 case AArch64::EORXrr
:
6479 case AArch64::EONWrr
:
6480 case AArch64::EONXrr
:
6481 // -- Advanced SIMD instructions --
6482 // Opcodes MULv1i64 and MULv2i64 don't exist because there is no 64-bit MUL
6483 // in the Advanced SIMD instruction set.
6484 case AArch64::ADDv8i8
:
6485 case AArch64::ADDv16i8
:
6486 case AArch64::ADDv4i16
:
6487 case AArch64::ADDv8i16
:
6488 case AArch64::ADDv2i32
:
6489 case AArch64::ADDv4i32
:
6490 case AArch64::ADDv1i64
:
6491 case AArch64::ADDv2i64
:
6492 case AArch64::MULv8i8
:
6493 case AArch64::MULv16i8
:
6494 case AArch64::MULv4i16
:
6495 case AArch64::MULv8i16
:
6496 case AArch64::MULv2i32
:
6497 case AArch64::MULv4i32
:
6498 case AArch64::ANDv8i8
:
6499 case AArch64::ANDv16i8
:
6500 case AArch64::ORRv8i8
:
6501 case AArch64::ORRv16i8
:
6502 case AArch64::EORv8i8
:
6503 case AArch64::EORv16i8
:
6504 // -- SVE instructions --
6505 case AArch64::ADD_ZZZ_B
:
6506 case AArch64::ADD_ZZZ_H
:
6507 case AArch64::ADD_ZZZ_S
:
6508 case AArch64::ADD_ZZZ_D
:
6509 case AArch64::MUL_ZZZ_B
:
6510 case AArch64::MUL_ZZZ_H
:
6511 case AArch64::MUL_ZZZ_S
:
6512 case AArch64::MUL_ZZZ_D
:
6513 case AArch64::AND_ZZZ
:
6514 case AArch64::ORR_ZZZ
:
6515 case AArch64::EOR_ZZZ
:
6523 /// Find instructions that can be turned into madd.
6524 static bool getMaddPatterns(MachineInstr
&Root
,
6525 SmallVectorImpl
<unsigned> &Patterns
) {
6526 unsigned Opc
= Root
.getOpcode();
6527 MachineBasicBlock
&MBB
= *Root
.getParent();
6530 if (!isCombineInstrCandidate(Opc
))
6532 if (isCombineInstrSettingFlag(Opc
)) {
6534 Root
.findRegisterDefOperandIdx(AArch64::NZCV
, /*TRI=*/nullptr, true);
6535 // When NZCV is live bail out.
6538 unsigned NewOpc
= convertToNonFlagSettingOpc(Root
);
6539 // When opcode can't change bail out.
6540 // CHECKME: do we miss any cases for opcode conversion?
6546 auto setFound
= [&](int Opcode
, int Operand
, unsigned ZeroReg
,
6548 if (canCombineWithMUL(MBB
, Root
.getOperand(Operand
), Opcode
, ZeroReg
)) {
6549 Patterns
.push_back(Pattern
);
6554 auto setVFound
= [&](int Opcode
, int Operand
, unsigned Pattern
) {
6555 if (canCombine(MBB
, Root
.getOperand(Operand
), Opcode
)) {
6556 Patterns
.push_back(Pattern
);
6561 typedef AArch64MachineCombinerPattern MCP
;
6566 case AArch64::ADDWrr
:
6567 assert(Root
.getOperand(1).isReg() && Root
.getOperand(2).isReg() &&
6568 "ADDWrr does not have register operands");
6569 setFound(AArch64::MADDWrrr
, 1, AArch64::WZR
, MCP::MULADDW_OP1
);
6570 setFound(AArch64::MADDWrrr
, 2, AArch64::WZR
, MCP::MULADDW_OP2
);
6572 case AArch64::ADDXrr
:
6573 setFound(AArch64::MADDXrrr
, 1, AArch64::XZR
, MCP::MULADDX_OP1
);
6574 setFound(AArch64::MADDXrrr
, 2, AArch64::XZR
, MCP::MULADDX_OP2
);
6576 case AArch64::SUBWrr
:
6577 setFound(AArch64::MADDWrrr
, 2, AArch64::WZR
, MCP::MULSUBW_OP2
);
6578 setFound(AArch64::MADDWrrr
, 1, AArch64::WZR
, MCP::MULSUBW_OP1
);
6580 case AArch64::SUBXrr
:
6581 setFound(AArch64::MADDXrrr
, 2, AArch64::XZR
, MCP::MULSUBX_OP2
);
6582 setFound(AArch64::MADDXrrr
, 1, AArch64::XZR
, MCP::MULSUBX_OP1
);
6584 case AArch64::ADDWri
:
6585 setFound(AArch64::MADDWrrr
, 1, AArch64::WZR
, MCP::MULADDWI_OP1
);
6587 case AArch64::ADDXri
:
6588 setFound(AArch64::MADDXrrr
, 1, AArch64::XZR
, MCP::MULADDXI_OP1
);
6590 case AArch64::SUBWri
:
6591 setFound(AArch64::MADDWrrr
, 1, AArch64::WZR
, MCP::MULSUBWI_OP1
);
6593 case AArch64::SUBXri
:
6594 setFound(AArch64::MADDXrrr
, 1, AArch64::XZR
, MCP::MULSUBXI_OP1
);
6596 case AArch64::ADDv8i8
:
6597 setVFound(AArch64::MULv8i8
, 1, MCP::MULADDv8i8_OP1
);
6598 setVFound(AArch64::MULv8i8
, 2, MCP::MULADDv8i8_OP2
);
6600 case AArch64::ADDv16i8
:
6601 setVFound(AArch64::MULv16i8
, 1, MCP::MULADDv16i8_OP1
);
6602 setVFound(AArch64::MULv16i8
, 2, MCP::MULADDv16i8_OP2
);
6604 case AArch64::ADDv4i16
:
6605 setVFound(AArch64::MULv4i16
, 1, MCP::MULADDv4i16_OP1
);
6606 setVFound(AArch64::MULv4i16
, 2, MCP::MULADDv4i16_OP2
);
6607 setVFound(AArch64::MULv4i16_indexed
, 1, MCP::MULADDv4i16_indexed_OP1
);
6608 setVFound(AArch64::MULv4i16_indexed
, 2, MCP::MULADDv4i16_indexed_OP2
);
6610 case AArch64::ADDv8i16
:
6611 setVFound(AArch64::MULv8i16
, 1, MCP::MULADDv8i16_OP1
);
6612 setVFound(AArch64::MULv8i16
, 2, MCP::MULADDv8i16_OP2
);
6613 setVFound(AArch64::MULv8i16_indexed
, 1, MCP::MULADDv8i16_indexed_OP1
);
6614 setVFound(AArch64::MULv8i16_indexed
, 2, MCP::MULADDv8i16_indexed_OP2
);
6616 case AArch64::ADDv2i32
:
6617 setVFound(AArch64::MULv2i32
, 1, MCP::MULADDv2i32_OP1
);
6618 setVFound(AArch64::MULv2i32
, 2, MCP::MULADDv2i32_OP2
);
6619 setVFound(AArch64::MULv2i32_indexed
, 1, MCP::MULADDv2i32_indexed_OP1
);
6620 setVFound(AArch64::MULv2i32_indexed
, 2, MCP::MULADDv2i32_indexed_OP2
);
6622 case AArch64::ADDv4i32
:
6623 setVFound(AArch64::MULv4i32
, 1, MCP::MULADDv4i32_OP1
);
6624 setVFound(AArch64::MULv4i32
, 2, MCP::MULADDv4i32_OP2
);
6625 setVFound(AArch64::MULv4i32_indexed
, 1, MCP::MULADDv4i32_indexed_OP1
);
6626 setVFound(AArch64::MULv4i32_indexed
, 2, MCP::MULADDv4i32_indexed_OP2
);
6628 case AArch64::SUBv8i8
:
6629 setVFound(AArch64::MULv8i8
, 1, MCP::MULSUBv8i8_OP1
);
6630 setVFound(AArch64::MULv8i8
, 2, MCP::MULSUBv8i8_OP2
);
6632 case AArch64::SUBv16i8
:
6633 setVFound(AArch64::MULv16i8
, 1, MCP::MULSUBv16i8_OP1
);
6634 setVFound(AArch64::MULv16i8
, 2, MCP::MULSUBv16i8_OP2
);
6636 case AArch64::SUBv4i16
:
6637 setVFound(AArch64::MULv4i16
, 1, MCP::MULSUBv4i16_OP1
);
6638 setVFound(AArch64::MULv4i16
, 2, MCP::MULSUBv4i16_OP2
);
6639 setVFound(AArch64::MULv4i16_indexed
, 1, MCP::MULSUBv4i16_indexed_OP1
);
6640 setVFound(AArch64::MULv4i16_indexed
, 2, MCP::MULSUBv4i16_indexed_OP2
);
6642 case AArch64::SUBv8i16
:
6643 setVFound(AArch64::MULv8i16
, 1, MCP::MULSUBv8i16_OP1
);
6644 setVFound(AArch64::MULv8i16
, 2, MCP::MULSUBv8i16_OP2
);
6645 setVFound(AArch64::MULv8i16_indexed
, 1, MCP::MULSUBv8i16_indexed_OP1
);
6646 setVFound(AArch64::MULv8i16_indexed
, 2, MCP::MULSUBv8i16_indexed_OP2
);
6648 case AArch64::SUBv2i32
:
6649 setVFound(AArch64::MULv2i32
, 1, MCP::MULSUBv2i32_OP1
);
6650 setVFound(AArch64::MULv2i32
, 2, MCP::MULSUBv2i32_OP2
);
6651 setVFound(AArch64::MULv2i32_indexed
, 1, MCP::MULSUBv2i32_indexed_OP1
);
6652 setVFound(AArch64::MULv2i32_indexed
, 2, MCP::MULSUBv2i32_indexed_OP2
);
6654 case AArch64::SUBv4i32
:
6655 setVFound(AArch64::MULv4i32
, 1, MCP::MULSUBv4i32_OP1
);
6656 setVFound(AArch64::MULv4i32
, 2, MCP::MULSUBv4i32_OP2
);
6657 setVFound(AArch64::MULv4i32_indexed
, 1, MCP::MULSUBv4i32_indexed_OP1
);
6658 setVFound(AArch64::MULv4i32_indexed
, 2, MCP::MULSUBv4i32_indexed_OP2
);
6663 /// Floating-Point Support
6665 /// Find instructions that can be turned into madd.
6666 static bool getFMAPatterns(MachineInstr
&Root
,
6667 SmallVectorImpl
<unsigned> &Patterns
) {
6669 if (!isCombineInstrCandidateFP(Root
))
6672 MachineBasicBlock
&MBB
= *Root
.getParent();
6675 auto Match
= [&](int Opcode
, int Operand
, unsigned Pattern
) -> bool {
6676 if (canCombineWithFMUL(MBB
, Root
.getOperand(Operand
), Opcode
)) {
6677 Patterns
.push_back(Pattern
);
6683 typedef AArch64MachineCombinerPattern MCP
;
6685 switch (Root
.getOpcode()) {
6687 assert(false && "Unsupported FP instruction in combiner\n");
6689 case AArch64::FADDHrr
:
6690 assert(Root
.getOperand(1).isReg() && Root
.getOperand(2).isReg() &&
6691 "FADDHrr does not have register operands");
6693 Found
= Match(AArch64::FMULHrr
, 1, MCP::FMULADDH_OP1
);
6694 Found
|= Match(AArch64::FMULHrr
, 2, MCP::FMULADDH_OP2
);
6696 case AArch64::FADDSrr
:
6697 assert(Root
.getOperand(1).isReg() && Root
.getOperand(2).isReg() &&
6698 "FADDSrr does not have register operands");
6700 Found
|= Match(AArch64::FMULSrr
, 1, MCP::FMULADDS_OP1
) ||
6701 Match(AArch64::FMULv1i32_indexed
, 1, MCP::FMLAv1i32_indexed_OP1
);
6703 Found
|= Match(AArch64::FMULSrr
, 2, MCP::FMULADDS_OP2
) ||
6704 Match(AArch64::FMULv1i32_indexed
, 2, MCP::FMLAv1i32_indexed_OP2
);
6706 case AArch64::FADDDrr
:
6707 Found
|= Match(AArch64::FMULDrr
, 1, MCP::FMULADDD_OP1
) ||
6708 Match(AArch64::FMULv1i64_indexed
, 1, MCP::FMLAv1i64_indexed_OP1
);
6710 Found
|= Match(AArch64::FMULDrr
, 2, MCP::FMULADDD_OP2
) ||
6711 Match(AArch64::FMULv1i64_indexed
, 2, MCP::FMLAv1i64_indexed_OP2
);
6713 case AArch64::FADDv4f16
:
6714 Found
|= Match(AArch64::FMULv4i16_indexed
, 1, MCP::FMLAv4i16_indexed_OP1
) ||
6715 Match(AArch64::FMULv4f16
, 1, MCP::FMLAv4f16_OP1
);
6717 Found
|= Match(AArch64::FMULv4i16_indexed
, 2, MCP::FMLAv4i16_indexed_OP2
) ||
6718 Match(AArch64::FMULv4f16
, 2, MCP::FMLAv4f16_OP2
);
6720 case AArch64::FADDv8f16
:
6721 Found
|= Match(AArch64::FMULv8i16_indexed
, 1, MCP::FMLAv8i16_indexed_OP1
) ||
6722 Match(AArch64::FMULv8f16
, 1, MCP::FMLAv8f16_OP1
);
6724 Found
|= Match(AArch64::FMULv8i16_indexed
, 2, MCP::FMLAv8i16_indexed_OP2
) ||
6725 Match(AArch64::FMULv8f16
, 2, MCP::FMLAv8f16_OP2
);
6727 case AArch64::FADDv2f32
:
6728 Found
|= Match(AArch64::FMULv2i32_indexed
, 1, MCP::FMLAv2i32_indexed_OP1
) ||
6729 Match(AArch64::FMULv2f32
, 1, MCP::FMLAv2f32_OP1
);
6731 Found
|= Match(AArch64::FMULv2i32_indexed
, 2, MCP::FMLAv2i32_indexed_OP2
) ||
6732 Match(AArch64::FMULv2f32
, 2, MCP::FMLAv2f32_OP2
);
6734 case AArch64::FADDv2f64
:
6735 Found
|= Match(AArch64::FMULv2i64_indexed
, 1, MCP::FMLAv2i64_indexed_OP1
) ||
6736 Match(AArch64::FMULv2f64
, 1, MCP::FMLAv2f64_OP1
);
6738 Found
|= Match(AArch64::FMULv2i64_indexed
, 2, MCP::FMLAv2i64_indexed_OP2
) ||
6739 Match(AArch64::FMULv2f64
, 2, MCP::FMLAv2f64_OP2
);
6741 case AArch64::FADDv4f32
:
6742 Found
|= Match(AArch64::FMULv4i32_indexed
, 1, MCP::FMLAv4i32_indexed_OP1
) ||
6743 Match(AArch64::FMULv4f32
, 1, MCP::FMLAv4f32_OP1
);
6745 Found
|= Match(AArch64::FMULv4i32_indexed
, 2, MCP::FMLAv4i32_indexed_OP2
) ||
6746 Match(AArch64::FMULv4f32
, 2, MCP::FMLAv4f32_OP2
);
6748 case AArch64::FSUBHrr
:
6749 Found
= Match(AArch64::FMULHrr
, 1, MCP::FMULSUBH_OP1
);
6750 Found
|= Match(AArch64::FMULHrr
, 2, MCP::FMULSUBH_OP2
);
6751 Found
|= Match(AArch64::FNMULHrr
, 1, MCP::FNMULSUBH_OP1
);
6753 case AArch64::FSUBSrr
:
6754 Found
= Match(AArch64::FMULSrr
, 1, MCP::FMULSUBS_OP1
);
6756 Found
|= Match(AArch64::FMULSrr
, 2, MCP::FMULSUBS_OP2
) ||
6757 Match(AArch64::FMULv1i32_indexed
, 2, MCP::FMLSv1i32_indexed_OP2
);
6759 Found
|= Match(AArch64::FNMULSrr
, 1, MCP::FNMULSUBS_OP1
);
6761 case AArch64::FSUBDrr
:
6762 Found
= Match(AArch64::FMULDrr
, 1, MCP::FMULSUBD_OP1
);
6764 Found
|= Match(AArch64::FMULDrr
, 2, MCP::FMULSUBD_OP2
) ||
6765 Match(AArch64::FMULv1i64_indexed
, 2, MCP::FMLSv1i64_indexed_OP2
);
6767 Found
|= Match(AArch64::FNMULDrr
, 1, MCP::FNMULSUBD_OP1
);
6769 case AArch64::FSUBv4f16
:
6770 Found
|= Match(AArch64::FMULv4i16_indexed
, 2, MCP::FMLSv4i16_indexed_OP2
) ||
6771 Match(AArch64::FMULv4f16
, 2, MCP::FMLSv4f16_OP2
);
6773 Found
|= Match(AArch64::FMULv4i16_indexed
, 1, MCP::FMLSv4i16_indexed_OP1
) ||
6774 Match(AArch64::FMULv4f16
, 1, MCP::FMLSv4f16_OP1
);
6776 case AArch64::FSUBv8f16
:
6777 Found
|= Match(AArch64::FMULv8i16_indexed
, 2, MCP::FMLSv8i16_indexed_OP2
) ||
6778 Match(AArch64::FMULv8f16
, 2, MCP::FMLSv8f16_OP2
);
6780 Found
|= Match(AArch64::FMULv8i16_indexed
, 1, MCP::FMLSv8i16_indexed_OP1
) ||
6781 Match(AArch64::FMULv8f16
, 1, MCP::FMLSv8f16_OP1
);
6783 case AArch64::FSUBv2f32
:
6784 Found
|= Match(AArch64::FMULv2i32_indexed
, 2, MCP::FMLSv2i32_indexed_OP2
) ||
6785 Match(AArch64::FMULv2f32
, 2, MCP::FMLSv2f32_OP2
);
6787 Found
|= Match(AArch64::FMULv2i32_indexed
, 1, MCP::FMLSv2i32_indexed_OP1
) ||
6788 Match(AArch64::FMULv2f32
, 1, MCP::FMLSv2f32_OP1
);
6790 case AArch64::FSUBv2f64
:
6791 Found
|= Match(AArch64::FMULv2i64_indexed
, 2, MCP::FMLSv2i64_indexed_OP2
) ||
6792 Match(AArch64::FMULv2f64
, 2, MCP::FMLSv2f64_OP2
);
6794 Found
|= Match(AArch64::FMULv2i64_indexed
, 1, MCP::FMLSv2i64_indexed_OP1
) ||
6795 Match(AArch64::FMULv2f64
, 1, MCP::FMLSv2f64_OP1
);
6797 case AArch64::FSUBv4f32
:
6798 Found
|= Match(AArch64::FMULv4i32_indexed
, 2, MCP::FMLSv4i32_indexed_OP2
) ||
6799 Match(AArch64::FMULv4f32
, 2, MCP::FMLSv4f32_OP2
);
6801 Found
|= Match(AArch64::FMULv4i32_indexed
, 1, MCP::FMLSv4i32_indexed_OP1
) ||
6802 Match(AArch64::FMULv4f32
, 1, MCP::FMLSv4f32_OP1
);
6808 static bool getFMULPatterns(MachineInstr
&Root
,
6809 SmallVectorImpl
<unsigned> &Patterns
) {
6810 MachineBasicBlock
&MBB
= *Root
.getParent();
6813 auto Match
= [&](unsigned Opcode
, int Operand
, unsigned Pattern
) -> bool {
6814 MachineRegisterInfo
&MRI
= MBB
.getParent()->getRegInfo();
6815 MachineOperand
&MO
= Root
.getOperand(Operand
);
6816 MachineInstr
*MI
= nullptr;
6817 if (MO
.isReg() && MO
.getReg().isVirtual())
6818 MI
= MRI
.getUniqueVRegDef(MO
.getReg());
6819 // Ignore No-op COPYs in FMUL(COPY(DUP(..)))
6820 if (MI
&& MI
->getOpcode() == TargetOpcode::COPY
&&
6821 MI
->getOperand(1).getReg().isVirtual())
6822 MI
= MRI
.getUniqueVRegDef(MI
->getOperand(1).getReg());
6823 if (MI
&& MI
->getOpcode() == Opcode
) {
6824 Patterns
.push_back(Pattern
);
6830 typedef AArch64MachineCombinerPattern MCP
;
6832 switch (Root
.getOpcode()) {
6835 case AArch64::FMULv2f32
:
6836 Found
= Match(AArch64::DUPv2i32lane
, 1, MCP::FMULv2i32_indexed_OP1
);
6837 Found
|= Match(AArch64::DUPv2i32lane
, 2, MCP::FMULv2i32_indexed_OP2
);
6839 case AArch64::FMULv2f64
:
6840 Found
= Match(AArch64::DUPv2i64lane
, 1, MCP::FMULv2i64_indexed_OP1
);
6841 Found
|= Match(AArch64::DUPv2i64lane
, 2, MCP::FMULv2i64_indexed_OP2
);
6843 case AArch64::FMULv4f16
:
6844 Found
= Match(AArch64::DUPv4i16lane
, 1, MCP::FMULv4i16_indexed_OP1
);
6845 Found
|= Match(AArch64::DUPv4i16lane
, 2, MCP::FMULv4i16_indexed_OP2
);
6847 case AArch64::FMULv4f32
:
6848 Found
= Match(AArch64::DUPv4i32lane
, 1, MCP::FMULv4i32_indexed_OP1
);
6849 Found
|= Match(AArch64::DUPv4i32lane
, 2, MCP::FMULv4i32_indexed_OP2
);
6851 case AArch64::FMULv8f16
:
6852 Found
= Match(AArch64::DUPv8i16lane
, 1, MCP::FMULv8i16_indexed_OP1
);
6853 Found
|= Match(AArch64::DUPv8i16lane
, 2, MCP::FMULv8i16_indexed_OP2
);
6860 static bool getFNEGPatterns(MachineInstr
&Root
,
6861 SmallVectorImpl
<unsigned> &Patterns
) {
6862 unsigned Opc
= Root
.getOpcode();
6863 MachineBasicBlock
&MBB
= *Root
.getParent();
6864 MachineRegisterInfo
&MRI
= MBB
.getParent()->getRegInfo();
6866 auto Match
= [&](unsigned Opcode
, unsigned Pattern
) -> bool {
6867 MachineOperand
&MO
= Root
.getOperand(1);
6868 MachineInstr
*MI
= MRI
.getUniqueVRegDef(MO
.getReg());
6869 if (MI
!= nullptr && (MI
->getOpcode() == Opcode
) &&
6870 MRI
.hasOneNonDBGUse(MI
->getOperand(0).getReg()) &&
6871 Root
.getFlag(MachineInstr::MIFlag::FmContract
) &&
6872 Root
.getFlag(MachineInstr::MIFlag::FmNsz
) &&
6873 MI
->getFlag(MachineInstr::MIFlag::FmContract
) &&
6874 MI
->getFlag(MachineInstr::MIFlag::FmNsz
)) {
6875 Patterns
.push_back(Pattern
);
6884 case AArch64::FNEGDr
:
6885 return Match(AArch64::FMADDDrrr
, AArch64MachineCombinerPattern::FNMADD
);
6886 case AArch64::FNEGSr
:
6887 return Match(AArch64::FMADDSrrr
, AArch64MachineCombinerPattern::FNMADD
);
6893 /// Return true when a code sequence can improve throughput. It
6894 /// should be called only for instructions in loops.
6895 /// \param Pattern - combiner pattern
6896 bool AArch64InstrInfo::isThroughputPattern(unsigned Pattern
) const {
6900 case AArch64MachineCombinerPattern::FMULADDH_OP1
:
6901 case AArch64MachineCombinerPattern::FMULADDH_OP2
:
6902 case AArch64MachineCombinerPattern::FMULSUBH_OP1
:
6903 case AArch64MachineCombinerPattern::FMULSUBH_OP2
:
6904 case AArch64MachineCombinerPattern::FMULADDS_OP1
:
6905 case AArch64MachineCombinerPattern::FMULADDS_OP2
:
6906 case AArch64MachineCombinerPattern::FMULSUBS_OP1
:
6907 case AArch64MachineCombinerPattern::FMULSUBS_OP2
:
6908 case AArch64MachineCombinerPattern::FMULADDD_OP1
:
6909 case AArch64MachineCombinerPattern::FMULADDD_OP2
:
6910 case AArch64MachineCombinerPattern::FMULSUBD_OP1
:
6911 case AArch64MachineCombinerPattern::FMULSUBD_OP2
:
6912 case AArch64MachineCombinerPattern::FNMULSUBH_OP1
:
6913 case AArch64MachineCombinerPattern::FNMULSUBS_OP1
:
6914 case AArch64MachineCombinerPattern::FNMULSUBD_OP1
:
6915 case AArch64MachineCombinerPattern::FMLAv4i16_indexed_OP1
:
6916 case AArch64MachineCombinerPattern::FMLAv4i16_indexed_OP2
:
6917 case AArch64MachineCombinerPattern::FMLAv8i16_indexed_OP1
:
6918 case AArch64MachineCombinerPattern::FMLAv8i16_indexed_OP2
:
6919 case AArch64MachineCombinerPattern::FMLAv1i32_indexed_OP1
:
6920 case AArch64MachineCombinerPattern::FMLAv1i32_indexed_OP2
:
6921 case AArch64MachineCombinerPattern::FMLAv1i64_indexed_OP1
:
6922 case AArch64MachineCombinerPattern::FMLAv1i64_indexed_OP2
:
6923 case AArch64MachineCombinerPattern::FMLAv4f16_OP2
:
6924 case AArch64MachineCombinerPattern::FMLAv4f16_OP1
:
6925 case AArch64MachineCombinerPattern::FMLAv8f16_OP1
:
6926 case AArch64MachineCombinerPattern::FMLAv8f16_OP2
:
6927 case AArch64MachineCombinerPattern::FMLAv2f32_OP2
:
6928 case AArch64MachineCombinerPattern::FMLAv2f32_OP1
:
6929 case AArch64MachineCombinerPattern::FMLAv2f64_OP1
:
6930 case AArch64MachineCombinerPattern::FMLAv2f64_OP2
:
6931 case AArch64MachineCombinerPattern::FMLAv2i32_indexed_OP1
:
6932 case AArch64MachineCombinerPattern::FMLAv2i32_indexed_OP2
:
6933 case AArch64MachineCombinerPattern::FMLAv2i64_indexed_OP1
:
6934 case AArch64MachineCombinerPattern::FMLAv2i64_indexed_OP2
:
6935 case AArch64MachineCombinerPattern::FMLAv4f32_OP1
:
6936 case AArch64MachineCombinerPattern::FMLAv4f32_OP2
:
6937 case AArch64MachineCombinerPattern::FMLAv4i32_indexed_OP1
:
6938 case AArch64MachineCombinerPattern::FMLAv4i32_indexed_OP2
:
6939 case AArch64MachineCombinerPattern::FMLSv4i16_indexed_OP1
:
6940 case AArch64MachineCombinerPattern::FMLSv4i16_indexed_OP2
:
6941 case AArch64MachineCombinerPattern::FMLSv8i16_indexed_OP1
:
6942 case AArch64MachineCombinerPattern::FMLSv8i16_indexed_OP2
:
6943 case AArch64MachineCombinerPattern::FMLSv1i32_indexed_OP2
:
6944 case AArch64MachineCombinerPattern::FMLSv1i64_indexed_OP2
:
6945 case AArch64MachineCombinerPattern::FMLSv2i32_indexed_OP2
:
6946 case AArch64MachineCombinerPattern::FMLSv2i64_indexed_OP2
:
6947 case AArch64MachineCombinerPattern::FMLSv4f16_OP1
:
6948 case AArch64MachineCombinerPattern::FMLSv4f16_OP2
:
6949 case AArch64MachineCombinerPattern::FMLSv8f16_OP1
:
6950 case AArch64MachineCombinerPattern::FMLSv8f16_OP2
:
6951 case AArch64MachineCombinerPattern::FMLSv2f32_OP2
:
6952 case AArch64MachineCombinerPattern::FMLSv2f64_OP2
:
6953 case AArch64MachineCombinerPattern::FMLSv4i32_indexed_OP2
:
6954 case AArch64MachineCombinerPattern::FMLSv4f32_OP2
:
6955 case AArch64MachineCombinerPattern::FMULv2i32_indexed_OP1
:
6956 case AArch64MachineCombinerPattern::FMULv2i32_indexed_OP2
:
6957 case AArch64MachineCombinerPattern::FMULv2i64_indexed_OP1
:
6958 case AArch64MachineCombinerPattern::FMULv2i64_indexed_OP2
:
6959 case AArch64MachineCombinerPattern::FMULv4i16_indexed_OP1
:
6960 case AArch64MachineCombinerPattern::FMULv4i16_indexed_OP2
:
6961 case AArch64MachineCombinerPattern::FMULv4i32_indexed_OP1
:
6962 case AArch64MachineCombinerPattern::FMULv4i32_indexed_OP2
:
6963 case AArch64MachineCombinerPattern::FMULv8i16_indexed_OP1
:
6964 case AArch64MachineCombinerPattern::FMULv8i16_indexed_OP2
:
6965 case AArch64MachineCombinerPattern::MULADDv8i8_OP1
:
6966 case AArch64MachineCombinerPattern::MULADDv8i8_OP2
:
6967 case AArch64MachineCombinerPattern::MULADDv16i8_OP1
:
6968 case AArch64MachineCombinerPattern::MULADDv16i8_OP2
:
6969 case AArch64MachineCombinerPattern::MULADDv4i16_OP1
:
6970 case AArch64MachineCombinerPattern::MULADDv4i16_OP2
:
6971 case AArch64MachineCombinerPattern::MULADDv8i16_OP1
:
6972 case AArch64MachineCombinerPattern::MULADDv8i16_OP2
:
6973 case AArch64MachineCombinerPattern::MULADDv2i32_OP1
:
6974 case AArch64MachineCombinerPattern::MULADDv2i32_OP2
:
6975 case AArch64MachineCombinerPattern::MULADDv4i32_OP1
:
6976 case AArch64MachineCombinerPattern::MULADDv4i32_OP2
:
6977 case AArch64MachineCombinerPattern::MULSUBv8i8_OP1
:
6978 case AArch64MachineCombinerPattern::MULSUBv8i8_OP2
:
6979 case AArch64MachineCombinerPattern::MULSUBv16i8_OP1
:
6980 case AArch64MachineCombinerPattern::MULSUBv16i8_OP2
:
6981 case AArch64MachineCombinerPattern::MULSUBv4i16_OP1
:
6982 case AArch64MachineCombinerPattern::MULSUBv4i16_OP2
:
6983 case AArch64MachineCombinerPattern::MULSUBv8i16_OP1
:
6984 case AArch64MachineCombinerPattern::MULSUBv8i16_OP2
:
6985 case AArch64MachineCombinerPattern::MULSUBv2i32_OP1
:
6986 case AArch64MachineCombinerPattern::MULSUBv2i32_OP2
:
6987 case AArch64MachineCombinerPattern::MULSUBv4i32_OP1
:
6988 case AArch64MachineCombinerPattern::MULSUBv4i32_OP2
:
6989 case AArch64MachineCombinerPattern::MULADDv4i16_indexed_OP1
:
6990 case AArch64MachineCombinerPattern::MULADDv4i16_indexed_OP2
:
6991 case AArch64MachineCombinerPattern::MULADDv8i16_indexed_OP1
:
6992 case AArch64MachineCombinerPattern::MULADDv8i16_indexed_OP2
:
6993 case AArch64MachineCombinerPattern::MULADDv2i32_indexed_OP1
:
6994 case AArch64MachineCombinerPattern::MULADDv2i32_indexed_OP2
:
6995 case AArch64MachineCombinerPattern::MULADDv4i32_indexed_OP1
:
6996 case AArch64MachineCombinerPattern::MULADDv4i32_indexed_OP2
:
6997 case AArch64MachineCombinerPattern::MULSUBv4i16_indexed_OP1
:
6998 case AArch64MachineCombinerPattern::MULSUBv4i16_indexed_OP2
:
6999 case AArch64MachineCombinerPattern::MULSUBv8i16_indexed_OP1
:
7000 case AArch64MachineCombinerPattern::MULSUBv8i16_indexed_OP2
:
7001 case AArch64MachineCombinerPattern::MULSUBv2i32_indexed_OP1
:
7002 case AArch64MachineCombinerPattern::MULSUBv2i32_indexed_OP2
:
7003 case AArch64MachineCombinerPattern::MULSUBv4i32_indexed_OP1
:
7004 case AArch64MachineCombinerPattern::MULSUBv4i32_indexed_OP2
:
7006 } // end switch (Pattern)
7010 /// Find other MI combine patterns.
7011 static bool getMiscPatterns(MachineInstr
&Root
,
7012 SmallVectorImpl
<unsigned> &Patterns
) {
7013 // A - (B + C) ==> (A - B) - C or (A - C) - B
7014 unsigned Opc
= Root
.getOpcode();
7015 MachineBasicBlock
&MBB
= *Root
.getParent();
7018 case AArch64::SUBWrr
:
7019 case AArch64::SUBSWrr
:
7020 case AArch64::SUBXrr
:
7021 case AArch64::SUBSXrr
:
7022 // Found candidate root.
7028 if (isCombineInstrSettingFlag(Opc
) &&
7029 Root
.findRegisterDefOperandIdx(AArch64::NZCV
, /*TRI=*/nullptr, true) ==
7033 if (canCombine(MBB
, Root
.getOperand(2), AArch64::ADDWrr
) ||
7034 canCombine(MBB
, Root
.getOperand(2), AArch64::ADDSWrr
) ||
7035 canCombine(MBB
, Root
.getOperand(2), AArch64::ADDXrr
) ||
7036 canCombine(MBB
, Root
.getOperand(2), AArch64::ADDSXrr
)) {
7037 Patterns
.push_back(AArch64MachineCombinerPattern::SUBADD_OP1
);
7038 Patterns
.push_back(AArch64MachineCombinerPattern::SUBADD_OP2
);
7046 AArch64InstrInfo::getCombinerObjective(unsigned Pattern
) const {
7048 case AArch64MachineCombinerPattern::SUBADD_OP1
:
7049 case AArch64MachineCombinerPattern::SUBADD_OP2
:
7050 return CombinerObjective::MustReduceDepth
;
7052 return TargetInstrInfo::getCombinerObjective(Pattern
);
7056 /// Return true when there is potentially a faster code sequence for an
7057 /// instruction chain ending in \p Root. All potential patterns are listed in
7058 /// the \p Pattern vector. Pattern should be sorted in priority order since the
7059 /// pattern evaluator stops checking as soon as it finds a faster sequence.
7061 bool AArch64InstrInfo::getMachineCombinerPatterns(
7062 MachineInstr
&Root
, SmallVectorImpl
<unsigned> &Patterns
,
7063 bool DoRegPressureReduce
) const {
7065 if (getMaddPatterns(Root
, Patterns
))
7067 // Floating point patterns
7068 if (getFMULPatterns(Root
, Patterns
))
7070 if (getFMAPatterns(Root
, Patterns
))
7072 if (getFNEGPatterns(Root
, Patterns
))
7076 if (getMiscPatterns(Root
, Patterns
))
7079 return TargetInstrInfo::getMachineCombinerPatterns(Root
, Patterns
,
7080 DoRegPressureReduce
);
7083 enum class FMAInstKind
{ Default
, Indexed
, Accumulator
};
7084 /// genFusedMultiply - Generate fused multiply instructions.
7085 /// This function supports both integer and floating point instructions.
7086 /// A typical example:
7089 /// ==> F|MADD R,A,B,C
7090 /// \param MF Containing MachineFunction
7091 /// \param MRI Register information
7092 /// \param TII Target information
7093 /// \param Root is the F|ADD instruction
7094 /// \param [out] InsInstrs is a vector of machine instructions and will
7095 /// contain the generated madd instruction
7096 /// \param IdxMulOpd is index of operand in Root that is the result of
7097 /// the F|MUL. In the example above IdxMulOpd is 1.
7098 /// \param MaddOpc the opcode fo the f|madd instruction
7099 /// \param RC Register class of operands
7100 /// \param kind of fma instruction (addressing mode) to be generated
7101 /// \param ReplacedAddend is the result register from the instruction
7102 /// replacing the non-combined operand, if any.
7103 static MachineInstr
*
7104 genFusedMultiply(MachineFunction
&MF
, MachineRegisterInfo
&MRI
,
7105 const TargetInstrInfo
*TII
, MachineInstr
&Root
,
7106 SmallVectorImpl
<MachineInstr
*> &InsInstrs
, unsigned IdxMulOpd
,
7107 unsigned MaddOpc
, const TargetRegisterClass
*RC
,
7108 FMAInstKind kind
= FMAInstKind::Default
,
7109 const Register
*ReplacedAddend
= nullptr) {
7110 assert(IdxMulOpd
== 1 || IdxMulOpd
== 2);
7112 unsigned IdxOtherOpd
= IdxMulOpd
== 1 ? 2 : 1;
7113 MachineInstr
*MUL
= MRI
.getUniqueVRegDef(Root
.getOperand(IdxMulOpd
).getReg());
7114 Register ResultReg
= Root
.getOperand(0).getReg();
7115 Register SrcReg0
= MUL
->getOperand(1).getReg();
7116 bool Src0IsKill
= MUL
->getOperand(1).isKill();
7117 Register SrcReg1
= MUL
->getOperand(2).getReg();
7118 bool Src1IsKill
= MUL
->getOperand(2).isKill();
7122 if (ReplacedAddend
) {
7123 // If we just generated a new addend, we must be it's only use.
7124 SrcReg2
= *ReplacedAddend
;
7127 SrcReg2
= Root
.getOperand(IdxOtherOpd
).getReg();
7128 Src2IsKill
= Root
.getOperand(IdxOtherOpd
).isKill();
7131 if (ResultReg
.isVirtual())
7132 MRI
.constrainRegClass(ResultReg
, RC
);
7133 if (SrcReg0
.isVirtual())
7134 MRI
.constrainRegClass(SrcReg0
, RC
);
7135 if (SrcReg1
.isVirtual())
7136 MRI
.constrainRegClass(SrcReg1
, RC
);
7137 if (SrcReg2
.isVirtual())
7138 MRI
.constrainRegClass(SrcReg2
, RC
);
7140 MachineInstrBuilder MIB
;
7141 if (kind
== FMAInstKind::Default
)
7142 MIB
= BuildMI(MF
, MIMetadata(Root
), TII
->get(MaddOpc
), ResultReg
)
7143 .addReg(SrcReg0
, getKillRegState(Src0IsKill
))
7144 .addReg(SrcReg1
, getKillRegState(Src1IsKill
))
7145 .addReg(SrcReg2
, getKillRegState(Src2IsKill
));
7146 else if (kind
== FMAInstKind::Indexed
)
7147 MIB
= BuildMI(MF
, MIMetadata(Root
), TII
->get(MaddOpc
), ResultReg
)
7148 .addReg(SrcReg2
, getKillRegState(Src2IsKill
))
7149 .addReg(SrcReg0
, getKillRegState(Src0IsKill
))
7150 .addReg(SrcReg1
, getKillRegState(Src1IsKill
))
7151 .addImm(MUL
->getOperand(3).getImm());
7152 else if (kind
== FMAInstKind::Accumulator
)
7153 MIB
= BuildMI(MF
, MIMetadata(Root
), TII
->get(MaddOpc
), ResultReg
)
7154 .addReg(SrcReg2
, getKillRegState(Src2IsKill
))
7155 .addReg(SrcReg0
, getKillRegState(Src0IsKill
))
7156 .addReg(SrcReg1
, getKillRegState(Src1IsKill
));
7158 assert(false && "Invalid FMA instruction kind \n");
7159 // Insert the MADD (MADD, FMA, FMS, FMLA, FMSL)
7160 InsInstrs
.push_back(MIB
);
7164 static MachineInstr
*
7165 genFNegatedMAD(MachineFunction
&MF
, MachineRegisterInfo
&MRI
,
7166 const TargetInstrInfo
*TII
, MachineInstr
&Root
,
7167 SmallVectorImpl
<MachineInstr
*> &InsInstrs
) {
7168 MachineInstr
*MAD
= MRI
.getUniqueVRegDef(Root
.getOperand(1).getReg());
7171 const TargetRegisterClass
*RC
= MRI
.getRegClass(MAD
->getOperand(0).getReg());
7172 if (AArch64::FPR32RegClass
.hasSubClassEq(RC
))
7173 Opc
= AArch64::FNMADDSrrr
;
7174 else if (AArch64::FPR64RegClass
.hasSubClassEq(RC
))
7175 Opc
= AArch64::FNMADDDrrr
;
7179 Register ResultReg
= Root
.getOperand(0).getReg();
7180 Register SrcReg0
= MAD
->getOperand(1).getReg();
7181 Register SrcReg1
= MAD
->getOperand(2).getReg();
7182 Register SrcReg2
= MAD
->getOperand(3).getReg();
7183 bool Src0IsKill
= MAD
->getOperand(1).isKill();
7184 bool Src1IsKill
= MAD
->getOperand(2).isKill();
7185 bool Src2IsKill
= MAD
->getOperand(3).isKill();
7186 if (ResultReg
.isVirtual())
7187 MRI
.constrainRegClass(ResultReg
, RC
);
7188 if (SrcReg0
.isVirtual())
7189 MRI
.constrainRegClass(SrcReg0
, RC
);
7190 if (SrcReg1
.isVirtual())
7191 MRI
.constrainRegClass(SrcReg1
, RC
);
7192 if (SrcReg2
.isVirtual())
7193 MRI
.constrainRegClass(SrcReg2
, RC
);
7195 MachineInstrBuilder MIB
=
7196 BuildMI(MF
, MIMetadata(Root
), TII
->get(Opc
), ResultReg
)
7197 .addReg(SrcReg0
, getKillRegState(Src0IsKill
))
7198 .addReg(SrcReg1
, getKillRegState(Src1IsKill
))
7199 .addReg(SrcReg2
, getKillRegState(Src2IsKill
));
7200 InsInstrs
.push_back(MIB
);
7205 /// Fold (FMUL x (DUP y lane)) into (FMUL_indexed x y lane)
7206 static MachineInstr
*
7207 genIndexedMultiply(MachineInstr
&Root
,
7208 SmallVectorImpl
<MachineInstr
*> &InsInstrs
,
7209 unsigned IdxDupOp
, unsigned MulOpc
,
7210 const TargetRegisterClass
*RC
, MachineRegisterInfo
&MRI
) {
7211 assert(((IdxDupOp
== 1) || (IdxDupOp
== 2)) &&
7212 "Invalid index of FMUL operand");
7214 MachineFunction
&MF
= *Root
.getMF();
7215 const TargetInstrInfo
*TII
= MF
.getSubtarget().getInstrInfo();
7218 MF
.getRegInfo().getUniqueVRegDef(Root
.getOperand(IdxDupOp
).getReg());
7220 if (Dup
->getOpcode() == TargetOpcode::COPY
)
7221 Dup
= MRI
.getUniqueVRegDef(Dup
->getOperand(1).getReg());
7223 Register DupSrcReg
= Dup
->getOperand(1).getReg();
7224 MRI
.clearKillFlags(DupSrcReg
);
7225 MRI
.constrainRegClass(DupSrcReg
, RC
);
7227 unsigned DupSrcLane
= Dup
->getOperand(2).getImm();
7229 unsigned IdxMulOp
= IdxDupOp
== 1 ? 2 : 1;
7230 MachineOperand
&MulOp
= Root
.getOperand(IdxMulOp
);
7232 Register ResultReg
= Root
.getOperand(0).getReg();
7234 MachineInstrBuilder MIB
;
7235 MIB
= BuildMI(MF
, MIMetadata(Root
), TII
->get(MulOpc
), ResultReg
)
7238 .addImm(DupSrcLane
);
7240 InsInstrs
.push_back(MIB
);
7244 /// genFusedMultiplyAcc - Helper to generate fused multiply accumulate
7247 /// \see genFusedMultiply
7248 static MachineInstr
*genFusedMultiplyAcc(
7249 MachineFunction
&MF
, MachineRegisterInfo
&MRI
, const TargetInstrInfo
*TII
,
7250 MachineInstr
&Root
, SmallVectorImpl
<MachineInstr
*> &InsInstrs
,
7251 unsigned IdxMulOpd
, unsigned MaddOpc
, const TargetRegisterClass
*RC
) {
7252 return genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, IdxMulOpd
, MaddOpc
, RC
,
7253 FMAInstKind::Accumulator
);
7256 /// genNeg - Helper to generate an intermediate negation of the second operand
7258 static Register
genNeg(MachineFunction
&MF
, MachineRegisterInfo
&MRI
,
7259 const TargetInstrInfo
*TII
, MachineInstr
&Root
,
7260 SmallVectorImpl
<MachineInstr
*> &InsInstrs
,
7261 DenseMap
<unsigned, unsigned> &InstrIdxForVirtReg
,
7262 unsigned MnegOpc
, const TargetRegisterClass
*RC
) {
7263 Register NewVR
= MRI
.createVirtualRegister(RC
);
7264 MachineInstrBuilder MIB
=
7265 BuildMI(MF
, MIMetadata(Root
), TII
->get(MnegOpc
), NewVR
)
7266 .add(Root
.getOperand(2));
7267 InsInstrs
.push_back(MIB
);
7269 assert(InstrIdxForVirtReg
.empty());
7270 InstrIdxForVirtReg
.insert(std::make_pair(NewVR
, 0));
7275 /// genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate
7276 /// instructions with an additional negation of the accumulator
7277 static MachineInstr
*genFusedMultiplyAccNeg(
7278 MachineFunction
&MF
, MachineRegisterInfo
&MRI
, const TargetInstrInfo
*TII
,
7279 MachineInstr
&Root
, SmallVectorImpl
<MachineInstr
*> &InsInstrs
,
7280 DenseMap
<unsigned, unsigned> &InstrIdxForVirtReg
, unsigned IdxMulOpd
,
7281 unsigned MaddOpc
, unsigned MnegOpc
, const TargetRegisterClass
*RC
) {
7282 assert(IdxMulOpd
== 1);
7285 genNeg(MF
, MRI
, TII
, Root
, InsInstrs
, InstrIdxForVirtReg
, MnegOpc
, RC
);
7286 return genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, IdxMulOpd
, MaddOpc
, RC
,
7287 FMAInstKind::Accumulator
, &NewVR
);
7290 /// genFusedMultiplyIdx - Helper to generate fused multiply accumulate
7293 /// \see genFusedMultiply
7294 static MachineInstr
*genFusedMultiplyIdx(
7295 MachineFunction
&MF
, MachineRegisterInfo
&MRI
, const TargetInstrInfo
*TII
,
7296 MachineInstr
&Root
, SmallVectorImpl
<MachineInstr
*> &InsInstrs
,
7297 unsigned IdxMulOpd
, unsigned MaddOpc
, const TargetRegisterClass
*RC
) {
7298 return genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, IdxMulOpd
, MaddOpc
, RC
,
7299 FMAInstKind::Indexed
);
7302 /// genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate
7303 /// instructions with an additional negation of the accumulator
7304 static MachineInstr
*genFusedMultiplyIdxNeg(
7305 MachineFunction
&MF
, MachineRegisterInfo
&MRI
, const TargetInstrInfo
*TII
,
7306 MachineInstr
&Root
, SmallVectorImpl
<MachineInstr
*> &InsInstrs
,
7307 DenseMap
<unsigned, unsigned> &InstrIdxForVirtReg
, unsigned IdxMulOpd
,
7308 unsigned MaddOpc
, unsigned MnegOpc
, const TargetRegisterClass
*RC
) {
7309 assert(IdxMulOpd
== 1);
7312 genNeg(MF
, MRI
, TII
, Root
, InsInstrs
, InstrIdxForVirtReg
, MnegOpc
, RC
);
7314 return genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, IdxMulOpd
, MaddOpc
, RC
,
7315 FMAInstKind::Indexed
, &NewVR
);
7318 /// genMaddR - Generate madd instruction and combine mul and add using
7319 /// an extra virtual register
7320 /// Example - an ADD intermediate needs to be stored in a register:
7323 /// ==> ORR V, ZR, Imm
7324 /// ==> MADD R,A,B,V
7325 /// \param MF Containing MachineFunction
7326 /// \param MRI Register information
7327 /// \param TII Target information
7328 /// \param Root is the ADD instruction
7329 /// \param [out] InsInstrs is a vector of machine instructions and will
7330 /// contain the generated madd instruction
7331 /// \param IdxMulOpd is index of operand in Root that is the result of
7332 /// the MUL. In the example above IdxMulOpd is 1.
7333 /// \param MaddOpc the opcode fo the madd instruction
7334 /// \param VR is a virtual register that holds the value of an ADD operand
7335 /// (V in the example above).
7336 /// \param RC Register class of operands
7337 static MachineInstr
*genMaddR(MachineFunction
&MF
, MachineRegisterInfo
&MRI
,
7338 const TargetInstrInfo
*TII
, MachineInstr
&Root
,
7339 SmallVectorImpl
<MachineInstr
*> &InsInstrs
,
7340 unsigned IdxMulOpd
, unsigned MaddOpc
, unsigned VR
,
7341 const TargetRegisterClass
*RC
) {
7342 assert(IdxMulOpd
== 1 || IdxMulOpd
== 2);
7344 MachineInstr
*MUL
= MRI
.getUniqueVRegDef(Root
.getOperand(IdxMulOpd
).getReg());
7345 Register ResultReg
= Root
.getOperand(0).getReg();
7346 Register SrcReg0
= MUL
->getOperand(1).getReg();
7347 bool Src0IsKill
= MUL
->getOperand(1).isKill();
7348 Register SrcReg1
= MUL
->getOperand(2).getReg();
7349 bool Src1IsKill
= MUL
->getOperand(2).isKill();
7351 if (ResultReg
.isVirtual())
7352 MRI
.constrainRegClass(ResultReg
, RC
);
7353 if (SrcReg0
.isVirtual())
7354 MRI
.constrainRegClass(SrcReg0
, RC
);
7355 if (SrcReg1
.isVirtual())
7356 MRI
.constrainRegClass(SrcReg1
, RC
);
7357 if (Register::isVirtualRegister(VR
))
7358 MRI
.constrainRegClass(VR
, RC
);
7360 MachineInstrBuilder MIB
=
7361 BuildMI(MF
, MIMetadata(Root
), TII
->get(MaddOpc
), ResultReg
)
7362 .addReg(SrcReg0
, getKillRegState(Src0IsKill
))
7363 .addReg(SrcReg1
, getKillRegState(Src1IsKill
))
7366 InsInstrs
.push_back(MIB
);
7370 /// Do the following transformation
7371 /// A - (B + C) ==> (A - B) - C
7372 /// A - (B + C) ==> (A - C) - B
7374 genSubAdd2SubSub(MachineFunction
&MF
, MachineRegisterInfo
&MRI
,
7375 const TargetInstrInfo
*TII
, MachineInstr
&Root
,
7376 SmallVectorImpl
<MachineInstr
*> &InsInstrs
,
7377 SmallVectorImpl
<MachineInstr
*> &DelInstrs
,
7379 DenseMap
<unsigned, unsigned> &InstrIdxForVirtReg
) {
7380 assert(IdxOpd1
== 1 || IdxOpd1
== 2);
7381 unsigned IdxOtherOpd
= IdxOpd1
== 1 ? 2 : 1;
7382 MachineInstr
*AddMI
= MRI
.getUniqueVRegDef(Root
.getOperand(2).getReg());
7384 Register ResultReg
= Root
.getOperand(0).getReg();
7385 Register RegA
= Root
.getOperand(1).getReg();
7386 bool RegAIsKill
= Root
.getOperand(1).isKill();
7387 Register RegB
= AddMI
->getOperand(IdxOpd1
).getReg();
7388 bool RegBIsKill
= AddMI
->getOperand(IdxOpd1
).isKill();
7389 Register RegC
= AddMI
->getOperand(IdxOtherOpd
).getReg();
7390 bool RegCIsKill
= AddMI
->getOperand(IdxOtherOpd
).isKill();
7392 MRI
.createVirtualRegister(MRI
.getRegClass(Root
.getOperand(2).getReg()));
7394 unsigned Opcode
= Root
.getOpcode();
7395 if (Opcode
== AArch64::SUBSWrr
)
7396 Opcode
= AArch64::SUBWrr
;
7397 else if (Opcode
== AArch64::SUBSXrr
)
7398 Opcode
= AArch64::SUBXrr
;
7400 assert((Opcode
== AArch64::SUBWrr
|| Opcode
== AArch64::SUBXrr
) &&
7401 "Unexpected instruction opcode.");
7403 uint32_t Flags
= Root
.mergeFlagsWith(*AddMI
);
7404 Flags
&= ~MachineInstr::NoSWrap
;
7405 Flags
&= ~MachineInstr::NoUWrap
;
7407 MachineInstrBuilder MIB1
=
7408 BuildMI(MF
, MIMetadata(Root
), TII
->get(Opcode
), NewVR
)
7409 .addReg(RegA
, getKillRegState(RegAIsKill
))
7410 .addReg(RegB
, getKillRegState(RegBIsKill
))
7412 MachineInstrBuilder MIB2
=
7413 BuildMI(MF
, MIMetadata(Root
), TII
->get(Opcode
), ResultReg
)
7414 .addReg(NewVR
, getKillRegState(true))
7415 .addReg(RegC
, getKillRegState(RegCIsKill
))
7418 InstrIdxForVirtReg
.insert(std::make_pair(NewVR
, 0));
7419 InsInstrs
.push_back(MIB1
);
7420 InsInstrs
.push_back(MIB2
);
7421 DelInstrs
.push_back(AddMI
);
7422 DelInstrs
.push_back(&Root
);
7425 /// When getMachineCombinerPatterns() finds potential patterns,
7426 /// this function generates the instructions that could replace the
7427 /// original code sequence
7428 void AArch64InstrInfo::genAlternativeCodeSequence(
7429 MachineInstr
&Root
, unsigned Pattern
,
7430 SmallVectorImpl
<MachineInstr
*> &InsInstrs
,
7431 SmallVectorImpl
<MachineInstr
*> &DelInstrs
,
7432 DenseMap
<unsigned, unsigned> &InstrIdxForVirtReg
) const {
7433 MachineBasicBlock
&MBB
= *Root
.getParent();
7434 MachineRegisterInfo
&MRI
= MBB
.getParent()->getRegInfo();
7435 MachineFunction
&MF
= *MBB
.getParent();
7436 const TargetInstrInfo
*TII
= MF
.getSubtarget().getInstrInfo();
7438 MachineInstr
*MUL
= nullptr;
7439 const TargetRegisterClass
*RC
;
7443 // Reassociate instructions.
7444 TargetInstrInfo::genAlternativeCodeSequence(Root
, Pattern
, InsInstrs
,
7445 DelInstrs
, InstrIdxForVirtReg
);
7447 case AArch64MachineCombinerPattern::SUBADD_OP1
:
7450 genSubAdd2SubSub(MF
, MRI
, TII
, Root
, InsInstrs
, DelInstrs
, 1,
7451 InstrIdxForVirtReg
);
7453 case AArch64MachineCombinerPattern::SUBADD_OP2
:
7456 genSubAdd2SubSub(MF
, MRI
, TII
, Root
, InsInstrs
, DelInstrs
, 2,
7457 InstrIdxForVirtReg
);
7459 case AArch64MachineCombinerPattern::MULADDW_OP1
:
7460 case AArch64MachineCombinerPattern::MULADDX_OP1
:
7464 // --- Create(MADD);
7465 if (Pattern
== AArch64MachineCombinerPattern::MULADDW_OP1
) {
7466 Opc
= AArch64::MADDWrrr
;
7467 RC
= &AArch64::GPR32RegClass
;
7469 Opc
= AArch64::MADDXrrr
;
7470 RC
= &AArch64::GPR64RegClass
;
7472 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
);
7474 case AArch64MachineCombinerPattern::MULADDW_OP2
:
7475 case AArch64MachineCombinerPattern::MULADDX_OP2
:
7479 // --- Create(MADD);
7480 if (Pattern
== AArch64MachineCombinerPattern::MULADDW_OP2
) {
7481 Opc
= AArch64::MADDWrrr
;
7482 RC
= &AArch64::GPR32RegClass
;
7484 Opc
= AArch64::MADDXrrr
;
7485 RC
= &AArch64::GPR64RegClass
;
7487 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
);
7489 case AArch64MachineCombinerPattern::MULADDWI_OP1
:
7490 case AArch64MachineCombinerPattern::MULADDXI_OP1
: {
7495 // --- Create(MADD);
7496 const TargetRegisterClass
*OrrRC
;
7497 unsigned BitSize
, OrrOpc
, ZeroReg
;
7498 if (Pattern
== AArch64MachineCombinerPattern::MULADDWI_OP1
) {
7499 OrrOpc
= AArch64::ORRWri
;
7500 OrrRC
= &AArch64::GPR32spRegClass
;
7502 ZeroReg
= AArch64::WZR
;
7503 Opc
= AArch64::MADDWrrr
;
7504 RC
= &AArch64::GPR32RegClass
;
7506 OrrOpc
= AArch64::ORRXri
;
7507 OrrRC
= &AArch64::GPR64spRegClass
;
7509 ZeroReg
= AArch64::XZR
;
7510 Opc
= AArch64::MADDXrrr
;
7511 RC
= &AArch64::GPR64RegClass
;
7513 Register NewVR
= MRI
.createVirtualRegister(OrrRC
);
7514 uint64_t Imm
= Root
.getOperand(2).getImm();
7516 if (Root
.getOperand(3).isImm()) {
7517 unsigned Val
= Root
.getOperand(3).getImm();
7520 uint64_t UImm
= SignExtend64(Imm
, BitSize
);
7521 // The immediate can be composed via a single instruction.
7522 SmallVector
<AArch64_IMM::ImmInsnModel
, 4> Insn
;
7523 AArch64_IMM::expandMOVImm(UImm
, BitSize
, Insn
);
7524 if (Insn
.size() != 1)
7526 auto MovI
= Insn
.begin();
7527 MachineInstrBuilder MIB1
;
7528 // MOV is an alias for one of three instructions: movz, movn, and orr.
7529 if (MovI
->Opcode
== OrrOpc
)
7530 MIB1
= BuildMI(MF
, MIMetadata(Root
), TII
->get(OrrOpc
), NewVR
)
7535 assert((MovI
->Opcode
== AArch64::MOVNWi
||
7536 MovI
->Opcode
== AArch64::MOVZWi
) &&
7539 assert((MovI
->Opcode
== AArch64::MOVNXi
||
7540 MovI
->Opcode
== AArch64::MOVZXi
) &&
7542 MIB1
= BuildMI(MF
, MIMetadata(Root
), TII
->get(MovI
->Opcode
), NewVR
)
7546 InsInstrs
.push_back(MIB1
);
7547 InstrIdxForVirtReg
.insert(std::make_pair(NewVR
, 0));
7548 MUL
= genMaddR(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, NewVR
, RC
);
7551 case AArch64MachineCombinerPattern::MULSUBW_OP1
:
7552 case AArch64MachineCombinerPattern::MULSUBX_OP1
: {
7556 // ==> MADD R,A,B,V // = -C + A*B
7557 // --- Create(MADD);
7558 const TargetRegisterClass
*SubRC
;
7559 unsigned SubOpc
, ZeroReg
;
7560 if (Pattern
== AArch64MachineCombinerPattern::MULSUBW_OP1
) {
7561 SubOpc
= AArch64::SUBWrr
;
7562 SubRC
= &AArch64::GPR32spRegClass
;
7563 ZeroReg
= AArch64::WZR
;
7564 Opc
= AArch64::MADDWrrr
;
7565 RC
= &AArch64::GPR32RegClass
;
7567 SubOpc
= AArch64::SUBXrr
;
7568 SubRC
= &AArch64::GPR64spRegClass
;
7569 ZeroReg
= AArch64::XZR
;
7570 Opc
= AArch64::MADDXrrr
;
7571 RC
= &AArch64::GPR64RegClass
;
7573 Register NewVR
= MRI
.createVirtualRegister(SubRC
);
7575 MachineInstrBuilder MIB1
=
7576 BuildMI(MF
, MIMetadata(Root
), TII
->get(SubOpc
), NewVR
)
7578 .add(Root
.getOperand(2));
7579 InsInstrs
.push_back(MIB1
);
7580 InstrIdxForVirtReg
.insert(std::make_pair(NewVR
, 0));
7581 MUL
= genMaddR(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, NewVR
, RC
);
7584 case AArch64MachineCombinerPattern::MULSUBW_OP2
:
7585 case AArch64MachineCombinerPattern::MULSUBX_OP2
:
7588 // ==> MSUB R,A,B,C (computes C - A*B)
7589 // --- Create(MSUB);
7590 if (Pattern
== AArch64MachineCombinerPattern::MULSUBW_OP2
) {
7591 Opc
= AArch64::MSUBWrrr
;
7592 RC
= &AArch64::GPR32RegClass
;
7594 Opc
= AArch64::MSUBXrrr
;
7595 RC
= &AArch64::GPR64RegClass
;
7597 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
);
7599 case AArch64MachineCombinerPattern::MULSUBWI_OP1
:
7600 case AArch64MachineCombinerPattern::MULSUBXI_OP1
: {
7604 // ==> MADD R,A,B,V // = -Imm + A*B
7605 // --- Create(MADD);
7606 const TargetRegisterClass
*OrrRC
;
7607 unsigned BitSize
, OrrOpc
, ZeroReg
;
7608 if (Pattern
== AArch64MachineCombinerPattern::MULSUBWI_OP1
) {
7609 OrrOpc
= AArch64::ORRWri
;
7610 OrrRC
= &AArch64::GPR32spRegClass
;
7612 ZeroReg
= AArch64::WZR
;
7613 Opc
= AArch64::MADDWrrr
;
7614 RC
= &AArch64::GPR32RegClass
;
7616 OrrOpc
= AArch64::ORRXri
;
7617 OrrRC
= &AArch64::GPR64spRegClass
;
7619 ZeroReg
= AArch64::XZR
;
7620 Opc
= AArch64::MADDXrrr
;
7621 RC
= &AArch64::GPR64RegClass
;
7623 Register NewVR
= MRI
.createVirtualRegister(OrrRC
);
7624 uint64_t Imm
= Root
.getOperand(2).getImm();
7625 if (Root
.getOperand(3).isImm()) {
7626 unsigned Val
= Root
.getOperand(3).getImm();
7629 uint64_t UImm
= SignExtend64(-Imm
, BitSize
);
7630 // The immediate can be composed via a single instruction.
7631 SmallVector
<AArch64_IMM::ImmInsnModel
, 4> Insn
;
7632 AArch64_IMM::expandMOVImm(UImm
, BitSize
, Insn
);
7633 if (Insn
.size() != 1)
7635 auto MovI
= Insn
.begin();
7636 MachineInstrBuilder MIB1
;
7637 // MOV is an alias for one of three instructions: movz, movn, and orr.
7638 if (MovI
->Opcode
== OrrOpc
)
7639 MIB1
= BuildMI(MF
, MIMetadata(Root
), TII
->get(OrrOpc
), NewVR
)
7644 assert((MovI
->Opcode
== AArch64::MOVNWi
||
7645 MovI
->Opcode
== AArch64::MOVZWi
) &&
7648 assert((MovI
->Opcode
== AArch64::MOVNXi
||
7649 MovI
->Opcode
== AArch64::MOVZXi
) &&
7651 MIB1
= BuildMI(MF
, MIMetadata(Root
), TII
->get(MovI
->Opcode
), NewVR
)
7655 InsInstrs
.push_back(MIB1
);
7656 InstrIdxForVirtReg
.insert(std::make_pair(NewVR
, 0));
7657 MUL
= genMaddR(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, NewVR
, RC
);
7661 case AArch64MachineCombinerPattern::MULADDv8i8_OP1
:
7662 Opc
= AArch64::MLAv8i8
;
7663 RC
= &AArch64::FPR64RegClass
;
7664 MUL
= genFusedMultiplyAcc(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
);
7666 case AArch64MachineCombinerPattern::MULADDv8i8_OP2
:
7667 Opc
= AArch64::MLAv8i8
;
7668 RC
= &AArch64::FPR64RegClass
;
7669 MUL
= genFusedMultiplyAcc(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
);
7671 case AArch64MachineCombinerPattern::MULADDv16i8_OP1
:
7672 Opc
= AArch64::MLAv16i8
;
7673 RC
= &AArch64::FPR128RegClass
;
7674 MUL
= genFusedMultiplyAcc(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
);
7676 case AArch64MachineCombinerPattern::MULADDv16i8_OP2
:
7677 Opc
= AArch64::MLAv16i8
;
7678 RC
= &AArch64::FPR128RegClass
;
7679 MUL
= genFusedMultiplyAcc(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
);
7681 case AArch64MachineCombinerPattern::MULADDv4i16_OP1
:
7682 Opc
= AArch64::MLAv4i16
;
7683 RC
= &AArch64::FPR64RegClass
;
7684 MUL
= genFusedMultiplyAcc(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
);
7686 case AArch64MachineCombinerPattern::MULADDv4i16_OP2
:
7687 Opc
= AArch64::MLAv4i16
;
7688 RC
= &AArch64::FPR64RegClass
;
7689 MUL
= genFusedMultiplyAcc(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
);
7691 case AArch64MachineCombinerPattern::MULADDv8i16_OP1
:
7692 Opc
= AArch64::MLAv8i16
;
7693 RC
= &AArch64::FPR128RegClass
;
7694 MUL
= genFusedMultiplyAcc(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
);
7696 case AArch64MachineCombinerPattern::MULADDv8i16_OP2
:
7697 Opc
= AArch64::MLAv8i16
;
7698 RC
= &AArch64::FPR128RegClass
;
7699 MUL
= genFusedMultiplyAcc(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
);
7701 case AArch64MachineCombinerPattern::MULADDv2i32_OP1
:
7702 Opc
= AArch64::MLAv2i32
;
7703 RC
= &AArch64::FPR64RegClass
;
7704 MUL
= genFusedMultiplyAcc(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
);
7706 case AArch64MachineCombinerPattern::MULADDv2i32_OP2
:
7707 Opc
= AArch64::MLAv2i32
;
7708 RC
= &AArch64::FPR64RegClass
;
7709 MUL
= genFusedMultiplyAcc(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
);
7711 case AArch64MachineCombinerPattern::MULADDv4i32_OP1
:
7712 Opc
= AArch64::MLAv4i32
;
7713 RC
= &AArch64::FPR128RegClass
;
7714 MUL
= genFusedMultiplyAcc(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
);
7716 case AArch64MachineCombinerPattern::MULADDv4i32_OP2
:
7717 Opc
= AArch64::MLAv4i32
;
7718 RC
= &AArch64::FPR128RegClass
;
7719 MUL
= genFusedMultiplyAcc(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
);
7722 case AArch64MachineCombinerPattern::MULSUBv8i8_OP1
:
7723 Opc
= AArch64::MLAv8i8
;
7724 RC
= &AArch64::FPR64RegClass
;
7725 MUL
= genFusedMultiplyAccNeg(MF
, MRI
, TII
, Root
, InsInstrs
,
7726 InstrIdxForVirtReg
, 1, Opc
, AArch64::NEGv8i8
,
7729 case AArch64MachineCombinerPattern::MULSUBv8i8_OP2
:
7730 Opc
= AArch64::MLSv8i8
;
7731 RC
= &AArch64::FPR64RegClass
;
7732 MUL
= genFusedMultiplyAcc(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
);
7734 case AArch64MachineCombinerPattern::MULSUBv16i8_OP1
:
7735 Opc
= AArch64::MLAv16i8
;
7736 RC
= &AArch64::FPR128RegClass
;
7737 MUL
= genFusedMultiplyAccNeg(MF
, MRI
, TII
, Root
, InsInstrs
,
7738 InstrIdxForVirtReg
, 1, Opc
, AArch64::NEGv16i8
,
7741 case AArch64MachineCombinerPattern::MULSUBv16i8_OP2
:
7742 Opc
= AArch64::MLSv16i8
;
7743 RC
= &AArch64::FPR128RegClass
;
7744 MUL
= genFusedMultiplyAcc(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
);
7746 case AArch64MachineCombinerPattern::MULSUBv4i16_OP1
:
7747 Opc
= AArch64::MLAv4i16
;
7748 RC
= &AArch64::FPR64RegClass
;
7749 MUL
= genFusedMultiplyAccNeg(MF
, MRI
, TII
, Root
, InsInstrs
,
7750 InstrIdxForVirtReg
, 1, Opc
, AArch64::NEGv4i16
,
7753 case AArch64MachineCombinerPattern::MULSUBv4i16_OP2
:
7754 Opc
= AArch64::MLSv4i16
;
7755 RC
= &AArch64::FPR64RegClass
;
7756 MUL
= genFusedMultiplyAcc(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
);
7758 case AArch64MachineCombinerPattern::MULSUBv8i16_OP1
:
7759 Opc
= AArch64::MLAv8i16
;
7760 RC
= &AArch64::FPR128RegClass
;
7761 MUL
= genFusedMultiplyAccNeg(MF
, MRI
, TII
, Root
, InsInstrs
,
7762 InstrIdxForVirtReg
, 1, Opc
, AArch64::NEGv8i16
,
7765 case AArch64MachineCombinerPattern::MULSUBv8i16_OP2
:
7766 Opc
= AArch64::MLSv8i16
;
7767 RC
= &AArch64::FPR128RegClass
;
7768 MUL
= genFusedMultiplyAcc(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
);
7770 case AArch64MachineCombinerPattern::MULSUBv2i32_OP1
:
7771 Opc
= AArch64::MLAv2i32
;
7772 RC
= &AArch64::FPR64RegClass
;
7773 MUL
= genFusedMultiplyAccNeg(MF
, MRI
, TII
, Root
, InsInstrs
,
7774 InstrIdxForVirtReg
, 1, Opc
, AArch64::NEGv2i32
,
7777 case AArch64MachineCombinerPattern::MULSUBv2i32_OP2
:
7778 Opc
= AArch64::MLSv2i32
;
7779 RC
= &AArch64::FPR64RegClass
;
7780 MUL
= genFusedMultiplyAcc(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
);
7782 case AArch64MachineCombinerPattern::MULSUBv4i32_OP1
:
7783 Opc
= AArch64::MLAv4i32
;
7784 RC
= &AArch64::FPR128RegClass
;
7785 MUL
= genFusedMultiplyAccNeg(MF
, MRI
, TII
, Root
, InsInstrs
,
7786 InstrIdxForVirtReg
, 1, Opc
, AArch64::NEGv4i32
,
7789 case AArch64MachineCombinerPattern::MULSUBv4i32_OP2
:
7790 Opc
= AArch64::MLSv4i32
;
7791 RC
= &AArch64::FPR128RegClass
;
7792 MUL
= genFusedMultiplyAcc(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
);
7795 case AArch64MachineCombinerPattern::MULADDv4i16_indexed_OP1
:
7796 Opc
= AArch64::MLAv4i16_indexed
;
7797 RC
= &AArch64::FPR64RegClass
;
7798 MUL
= genFusedMultiplyIdx(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
);
7800 case AArch64MachineCombinerPattern::MULADDv4i16_indexed_OP2
:
7801 Opc
= AArch64::MLAv4i16_indexed
;
7802 RC
= &AArch64::FPR64RegClass
;
7803 MUL
= genFusedMultiplyIdx(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
);
7805 case AArch64MachineCombinerPattern::MULADDv8i16_indexed_OP1
:
7806 Opc
= AArch64::MLAv8i16_indexed
;
7807 RC
= &AArch64::FPR128RegClass
;
7808 MUL
= genFusedMultiplyIdx(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
);
7810 case AArch64MachineCombinerPattern::MULADDv8i16_indexed_OP2
:
7811 Opc
= AArch64::MLAv8i16_indexed
;
7812 RC
= &AArch64::FPR128RegClass
;
7813 MUL
= genFusedMultiplyIdx(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
);
7815 case AArch64MachineCombinerPattern::MULADDv2i32_indexed_OP1
:
7816 Opc
= AArch64::MLAv2i32_indexed
;
7817 RC
= &AArch64::FPR64RegClass
;
7818 MUL
= genFusedMultiplyIdx(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
);
7820 case AArch64MachineCombinerPattern::MULADDv2i32_indexed_OP2
:
7821 Opc
= AArch64::MLAv2i32_indexed
;
7822 RC
= &AArch64::FPR64RegClass
;
7823 MUL
= genFusedMultiplyIdx(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
);
7825 case AArch64MachineCombinerPattern::MULADDv4i32_indexed_OP1
:
7826 Opc
= AArch64::MLAv4i32_indexed
;
7827 RC
= &AArch64::FPR128RegClass
;
7828 MUL
= genFusedMultiplyIdx(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
);
7830 case AArch64MachineCombinerPattern::MULADDv4i32_indexed_OP2
:
7831 Opc
= AArch64::MLAv4i32_indexed
;
7832 RC
= &AArch64::FPR128RegClass
;
7833 MUL
= genFusedMultiplyIdx(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
);
7836 case AArch64MachineCombinerPattern::MULSUBv4i16_indexed_OP1
:
7837 Opc
= AArch64::MLAv4i16_indexed
;
7838 RC
= &AArch64::FPR64RegClass
;
7839 MUL
= genFusedMultiplyIdxNeg(MF
, MRI
, TII
, Root
, InsInstrs
,
7840 InstrIdxForVirtReg
, 1, Opc
, AArch64::NEGv4i16
,
7843 case AArch64MachineCombinerPattern::MULSUBv4i16_indexed_OP2
:
7844 Opc
= AArch64::MLSv4i16_indexed
;
7845 RC
= &AArch64::FPR64RegClass
;
7846 MUL
= genFusedMultiplyIdx(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
);
7848 case AArch64MachineCombinerPattern::MULSUBv8i16_indexed_OP1
:
7849 Opc
= AArch64::MLAv8i16_indexed
;
7850 RC
= &AArch64::FPR128RegClass
;
7851 MUL
= genFusedMultiplyIdxNeg(MF
, MRI
, TII
, Root
, InsInstrs
,
7852 InstrIdxForVirtReg
, 1, Opc
, AArch64::NEGv8i16
,
7855 case AArch64MachineCombinerPattern::MULSUBv8i16_indexed_OP2
:
7856 Opc
= AArch64::MLSv8i16_indexed
;
7857 RC
= &AArch64::FPR128RegClass
;
7858 MUL
= genFusedMultiplyIdx(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
);
7860 case AArch64MachineCombinerPattern::MULSUBv2i32_indexed_OP1
:
7861 Opc
= AArch64::MLAv2i32_indexed
;
7862 RC
= &AArch64::FPR64RegClass
;
7863 MUL
= genFusedMultiplyIdxNeg(MF
, MRI
, TII
, Root
, InsInstrs
,
7864 InstrIdxForVirtReg
, 1, Opc
, AArch64::NEGv2i32
,
7867 case AArch64MachineCombinerPattern::MULSUBv2i32_indexed_OP2
:
7868 Opc
= AArch64::MLSv2i32_indexed
;
7869 RC
= &AArch64::FPR64RegClass
;
7870 MUL
= genFusedMultiplyIdx(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
);
7872 case AArch64MachineCombinerPattern::MULSUBv4i32_indexed_OP1
:
7873 Opc
= AArch64::MLAv4i32_indexed
;
7874 RC
= &AArch64::FPR128RegClass
;
7875 MUL
= genFusedMultiplyIdxNeg(MF
, MRI
, TII
, Root
, InsInstrs
,
7876 InstrIdxForVirtReg
, 1, Opc
, AArch64::NEGv4i32
,
7879 case AArch64MachineCombinerPattern::MULSUBv4i32_indexed_OP2
:
7880 Opc
= AArch64::MLSv4i32_indexed
;
7881 RC
= &AArch64::FPR128RegClass
;
7882 MUL
= genFusedMultiplyIdx(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
);
7885 // Floating Point Support
7886 case AArch64MachineCombinerPattern::FMULADDH_OP1
:
7887 Opc
= AArch64::FMADDHrrr
;
7888 RC
= &AArch64::FPR16RegClass
;
7889 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
);
7891 case AArch64MachineCombinerPattern::FMULADDS_OP1
:
7892 Opc
= AArch64::FMADDSrrr
;
7893 RC
= &AArch64::FPR32RegClass
;
7894 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
);
7896 case AArch64MachineCombinerPattern::FMULADDD_OP1
:
7897 Opc
= AArch64::FMADDDrrr
;
7898 RC
= &AArch64::FPR64RegClass
;
7899 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
);
7902 case AArch64MachineCombinerPattern::FMULADDH_OP2
:
7903 Opc
= AArch64::FMADDHrrr
;
7904 RC
= &AArch64::FPR16RegClass
;
7905 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
);
7907 case AArch64MachineCombinerPattern::FMULADDS_OP2
:
7908 Opc
= AArch64::FMADDSrrr
;
7909 RC
= &AArch64::FPR32RegClass
;
7910 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
);
7912 case AArch64MachineCombinerPattern::FMULADDD_OP2
:
7913 Opc
= AArch64::FMADDDrrr
;
7914 RC
= &AArch64::FPR64RegClass
;
7915 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
);
7918 case AArch64MachineCombinerPattern::FMLAv1i32_indexed_OP1
:
7919 Opc
= AArch64::FMLAv1i32_indexed
;
7920 RC
= &AArch64::FPR32RegClass
;
7921 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
,
7922 FMAInstKind::Indexed
);
7924 case AArch64MachineCombinerPattern::FMLAv1i32_indexed_OP2
:
7925 Opc
= AArch64::FMLAv1i32_indexed
;
7926 RC
= &AArch64::FPR32RegClass
;
7927 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
,
7928 FMAInstKind::Indexed
);
7931 case AArch64MachineCombinerPattern::FMLAv1i64_indexed_OP1
:
7932 Opc
= AArch64::FMLAv1i64_indexed
;
7933 RC
= &AArch64::FPR64RegClass
;
7934 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
,
7935 FMAInstKind::Indexed
);
7937 case AArch64MachineCombinerPattern::FMLAv1i64_indexed_OP2
:
7938 Opc
= AArch64::FMLAv1i64_indexed
;
7939 RC
= &AArch64::FPR64RegClass
;
7940 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
,
7941 FMAInstKind::Indexed
);
7944 case AArch64MachineCombinerPattern::FMLAv4i16_indexed_OP1
:
7945 RC
= &AArch64::FPR64RegClass
;
7946 Opc
= AArch64::FMLAv4i16_indexed
;
7947 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
,
7948 FMAInstKind::Indexed
);
7950 case AArch64MachineCombinerPattern::FMLAv4f16_OP1
:
7951 RC
= &AArch64::FPR64RegClass
;
7952 Opc
= AArch64::FMLAv4f16
;
7953 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
,
7954 FMAInstKind::Accumulator
);
7956 case AArch64MachineCombinerPattern::FMLAv4i16_indexed_OP2
:
7957 RC
= &AArch64::FPR64RegClass
;
7958 Opc
= AArch64::FMLAv4i16_indexed
;
7959 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
,
7960 FMAInstKind::Indexed
);
7962 case AArch64MachineCombinerPattern::FMLAv4f16_OP2
:
7963 RC
= &AArch64::FPR64RegClass
;
7964 Opc
= AArch64::FMLAv4f16
;
7965 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
,
7966 FMAInstKind::Accumulator
);
7969 case AArch64MachineCombinerPattern::FMLAv2i32_indexed_OP1
:
7970 case AArch64MachineCombinerPattern::FMLAv2f32_OP1
:
7971 RC
= &AArch64::FPR64RegClass
;
7972 if (Pattern
== AArch64MachineCombinerPattern::FMLAv2i32_indexed_OP1
) {
7973 Opc
= AArch64::FMLAv2i32_indexed
;
7974 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
,
7975 FMAInstKind::Indexed
);
7977 Opc
= AArch64::FMLAv2f32
;
7978 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
,
7979 FMAInstKind::Accumulator
);
7982 case AArch64MachineCombinerPattern::FMLAv2i32_indexed_OP2
:
7983 case AArch64MachineCombinerPattern::FMLAv2f32_OP2
:
7984 RC
= &AArch64::FPR64RegClass
;
7985 if (Pattern
== AArch64MachineCombinerPattern::FMLAv2i32_indexed_OP2
) {
7986 Opc
= AArch64::FMLAv2i32_indexed
;
7987 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
,
7988 FMAInstKind::Indexed
);
7990 Opc
= AArch64::FMLAv2f32
;
7991 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
,
7992 FMAInstKind::Accumulator
);
7996 case AArch64MachineCombinerPattern::FMLAv8i16_indexed_OP1
:
7997 RC
= &AArch64::FPR128RegClass
;
7998 Opc
= AArch64::FMLAv8i16_indexed
;
7999 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
,
8000 FMAInstKind::Indexed
);
8002 case AArch64MachineCombinerPattern::FMLAv8f16_OP1
:
8003 RC
= &AArch64::FPR128RegClass
;
8004 Opc
= AArch64::FMLAv8f16
;
8005 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
,
8006 FMAInstKind::Accumulator
);
8008 case AArch64MachineCombinerPattern::FMLAv8i16_indexed_OP2
:
8009 RC
= &AArch64::FPR128RegClass
;
8010 Opc
= AArch64::FMLAv8i16_indexed
;
8011 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
,
8012 FMAInstKind::Indexed
);
8014 case AArch64MachineCombinerPattern::FMLAv8f16_OP2
:
8015 RC
= &AArch64::FPR128RegClass
;
8016 Opc
= AArch64::FMLAv8f16
;
8017 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
,
8018 FMAInstKind::Accumulator
);
8021 case AArch64MachineCombinerPattern::FMLAv2i64_indexed_OP1
:
8022 case AArch64MachineCombinerPattern::FMLAv2f64_OP1
:
8023 RC
= &AArch64::FPR128RegClass
;
8024 if (Pattern
== AArch64MachineCombinerPattern::FMLAv2i64_indexed_OP1
) {
8025 Opc
= AArch64::FMLAv2i64_indexed
;
8026 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
,
8027 FMAInstKind::Indexed
);
8029 Opc
= AArch64::FMLAv2f64
;
8030 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
,
8031 FMAInstKind::Accumulator
);
8034 case AArch64MachineCombinerPattern::FMLAv2i64_indexed_OP2
:
8035 case AArch64MachineCombinerPattern::FMLAv2f64_OP2
:
8036 RC
= &AArch64::FPR128RegClass
;
8037 if (Pattern
== AArch64MachineCombinerPattern::FMLAv2i64_indexed_OP2
) {
8038 Opc
= AArch64::FMLAv2i64_indexed
;
8039 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
,
8040 FMAInstKind::Indexed
);
8042 Opc
= AArch64::FMLAv2f64
;
8043 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
,
8044 FMAInstKind::Accumulator
);
8048 case AArch64MachineCombinerPattern::FMLAv4i32_indexed_OP1
:
8049 case AArch64MachineCombinerPattern::FMLAv4f32_OP1
:
8050 RC
= &AArch64::FPR128RegClass
;
8051 if (Pattern
== AArch64MachineCombinerPattern::FMLAv4i32_indexed_OP1
) {
8052 Opc
= AArch64::FMLAv4i32_indexed
;
8053 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
,
8054 FMAInstKind::Indexed
);
8056 Opc
= AArch64::FMLAv4f32
;
8057 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
,
8058 FMAInstKind::Accumulator
);
8062 case AArch64MachineCombinerPattern::FMLAv4i32_indexed_OP2
:
8063 case AArch64MachineCombinerPattern::FMLAv4f32_OP2
:
8064 RC
= &AArch64::FPR128RegClass
;
8065 if (Pattern
== AArch64MachineCombinerPattern::FMLAv4i32_indexed_OP2
) {
8066 Opc
= AArch64::FMLAv4i32_indexed
;
8067 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
,
8068 FMAInstKind::Indexed
);
8070 Opc
= AArch64::FMLAv4f32
;
8071 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
,
8072 FMAInstKind::Accumulator
);
8076 case AArch64MachineCombinerPattern::FMULSUBH_OP1
:
8077 Opc
= AArch64::FNMSUBHrrr
;
8078 RC
= &AArch64::FPR16RegClass
;
8079 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
);
8081 case AArch64MachineCombinerPattern::FMULSUBS_OP1
:
8082 Opc
= AArch64::FNMSUBSrrr
;
8083 RC
= &AArch64::FPR32RegClass
;
8084 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
);
8086 case AArch64MachineCombinerPattern::FMULSUBD_OP1
:
8087 Opc
= AArch64::FNMSUBDrrr
;
8088 RC
= &AArch64::FPR64RegClass
;
8089 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
);
8092 case AArch64MachineCombinerPattern::FNMULSUBH_OP1
:
8093 Opc
= AArch64::FNMADDHrrr
;
8094 RC
= &AArch64::FPR16RegClass
;
8095 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
);
8097 case AArch64MachineCombinerPattern::FNMULSUBS_OP1
:
8098 Opc
= AArch64::FNMADDSrrr
;
8099 RC
= &AArch64::FPR32RegClass
;
8100 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
);
8102 case AArch64MachineCombinerPattern::FNMULSUBD_OP1
:
8103 Opc
= AArch64::FNMADDDrrr
;
8104 RC
= &AArch64::FPR64RegClass
;
8105 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
);
8108 case AArch64MachineCombinerPattern::FMULSUBH_OP2
:
8109 Opc
= AArch64::FMSUBHrrr
;
8110 RC
= &AArch64::FPR16RegClass
;
8111 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
);
8113 case AArch64MachineCombinerPattern::FMULSUBS_OP2
:
8114 Opc
= AArch64::FMSUBSrrr
;
8115 RC
= &AArch64::FPR32RegClass
;
8116 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
);
8118 case AArch64MachineCombinerPattern::FMULSUBD_OP2
:
8119 Opc
= AArch64::FMSUBDrrr
;
8120 RC
= &AArch64::FPR64RegClass
;
8121 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
);
8124 case AArch64MachineCombinerPattern::FMLSv1i32_indexed_OP2
:
8125 Opc
= AArch64::FMLSv1i32_indexed
;
8126 RC
= &AArch64::FPR32RegClass
;
8127 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
,
8128 FMAInstKind::Indexed
);
8131 case AArch64MachineCombinerPattern::FMLSv1i64_indexed_OP2
:
8132 Opc
= AArch64::FMLSv1i64_indexed
;
8133 RC
= &AArch64::FPR64RegClass
;
8134 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
,
8135 FMAInstKind::Indexed
);
8138 case AArch64MachineCombinerPattern::FMLSv4f16_OP1
:
8139 case AArch64MachineCombinerPattern::FMLSv4i16_indexed_OP1
: {
8140 RC
= &AArch64::FPR64RegClass
;
8141 Register NewVR
= MRI
.createVirtualRegister(RC
);
8142 MachineInstrBuilder MIB1
=
8143 BuildMI(MF
, MIMetadata(Root
), TII
->get(AArch64::FNEGv4f16
), NewVR
)
8144 .add(Root
.getOperand(2));
8145 InsInstrs
.push_back(MIB1
);
8146 InstrIdxForVirtReg
.insert(std::make_pair(NewVR
, 0));
8147 if (Pattern
== AArch64MachineCombinerPattern::FMLSv4f16_OP1
) {
8148 Opc
= AArch64::FMLAv4f16
;
8149 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
,
8150 FMAInstKind::Accumulator
, &NewVR
);
8152 Opc
= AArch64::FMLAv4i16_indexed
;
8153 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
,
8154 FMAInstKind::Indexed
, &NewVR
);
8158 case AArch64MachineCombinerPattern::FMLSv4f16_OP2
:
8159 RC
= &AArch64::FPR64RegClass
;
8160 Opc
= AArch64::FMLSv4f16
;
8161 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
,
8162 FMAInstKind::Accumulator
);
8164 case AArch64MachineCombinerPattern::FMLSv4i16_indexed_OP2
:
8165 RC
= &AArch64::FPR64RegClass
;
8166 Opc
= AArch64::FMLSv4i16_indexed
;
8167 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
,
8168 FMAInstKind::Indexed
);
8171 case AArch64MachineCombinerPattern::FMLSv2f32_OP2
:
8172 case AArch64MachineCombinerPattern::FMLSv2i32_indexed_OP2
:
8173 RC
= &AArch64::FPR64RegClass
;
8174 if (Pattern
== AArch64MachineCombinerPattern::FMLSv2i32_indexed_OP2
) {
8175 Opc
= AArch64::FMLSv2i32_indexed
;
8176 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
,
8177 FMAInstKind::Indexed
);
8179 Opc
= AArch64::FMLSv2f32
;
8180 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
,
8181 FMAInstKind::Accumulator
);
8185 case AArch64MachineCombinerPattern::FMLSv8f16_OP1
:
8186 case AArch64MachineCombinerPattern::FMLSv8i16_indexed_OP1
: {
8187 RC
= &AArch64::FPR128RegClass
;
8188 Register NewVR
= MRI
.createVirtualRegister(RC
);
8189 MachineInstrBuilder MIB1
=
8190 BuildMI(MF
, MIMetadata(Root
), TII
->get(AArch64::FNEGv8f16
), NewVR
)
8191 .add(Root
.getOperand(2));
8192 InsInstrs
.push_back(MIB1
);
8193 InstrIdxForVirtReg
.insert(std::make_pair(NewVR
, 0));
8194 if (Pattern
== AArch64MachineCombinerPattern::FMLSv8f16_OP1
) {
8195 Opc
= AArch64::FMLAv8f16
;
8196 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
,
8197 FMAInstKind::Accumulator
, &NewVR
);
8199 Opc
= AArch64::FMLAv8i16_indexed
;
8200 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
,
8201 FMAInstKind::Indexed
, &NewVR
);
8205 case AArch64MachineCombinerPattern::FMLSv8f16_OP2
:
8206 RC
= &AArch64::FPR128RegClass
;
8207 Opc
= AArch64::FMLSv8f16
;
8208 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
,
8209 FMAInstKind::Accumulator
);
8211 case AArch64MachineCombinerPattern::FMLSv8i16_indexed_OP2
:
8212 RC
= &AArch64::FPR128RegClass
;
8213 Opc
= AArch64::FMLSv8i16_indexed
;
8214 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
,
8215 FMAInstKind::Indexed
);
8218 case AArch64MachineCombinerPattern::FMLSv2f64_OP2
:
8219 case AArch64MachineCombinerPattern::FMLSv2i64_indexed_OP2
:
8220 RC
= &AArch64::FPR128RegClass
;
8221 if (Pattern
== AArch64MachineCombinerPattern::FMLSv2i64_indexed_OP2
) {
8222 Opc
= AArch64::FMLSv2i64_indexed
;
8223 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
,
8224 FMAInstKind::Indexed
);
8226 Opc
= AArch64::FMLSv2f64
;
8227 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
,
8228 FMAInstKind::Accumulator
);
8232 case AArch64MachineCombinerPattern::FMLSv4f32_OP2
:
8233 case AArch64MachineCombinerPattern::FMLSv4i32_indexed_OP2
:
8234 RC
= &AArch64::FPR128RegClass
;
8235 if (Pattern
== AArch64MachineCombinerPattern::FMLSv4i32_indexed_OP2
) {
8236 Opc
= AArch64::FMLSv4i32_indexed
;
8237 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
,
8238 FMAInstKind::Indexed
);
8240 Opc
= AArch64::FMLSv4f32
;
8241 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 2, Opc
, RC
,
8242 FMAInstKind::Accumulator
);
8245 case AArch64MachineCombinerPattern::FMLSv2f32_OP1
:
8246 case AArch64MachineCombinerPattern::FMLSv2i32_indexed_OP1
: {
8247 RC
= &AArch64::FPR64RegClass
;
8248 Register NewVR
= MRI
.createVirtualRegister(RC
);
8249 MachineInstrBuilder MIB1
=
8250 BuildMI(MF
, MIMetadata(Root
), TII
->get(AArch64::FNEGv2f32
), NewVR
)
8251 .add(Root
.getOperand(2));
8252 InsInstrs
.push_back(MIB1
);
8253 InstrIdxForVirtReg
.insert(std::make_pair(NewVR
, 0));
8254 if (Pattern
== AArch64MachineCombinerPattern::FMLSv2i32_indexed_OP1
) {
8255 Opc
= AArch64::FMLAv2i32_indexed
;
8256 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
,
8257 FMAInstKind::Indexed
, &NewVR
);
8259 Opc
= AArch64::FMLAv2f32
;
8260 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
,
8261 FMAInstKind::Accumulator
, &NewVR
);
8265 case AArch64MachineCombinerPattern::FMLSv4f32_OP1
:
8266 case AArch64MachineCombinerPattern::FMLSv4i32_indexed_OP1
: {
8267 RC
= &AArch64::FPR128RegClass
;
8268 Register NewVR
= MRI
.createVirtualRegister(RC
);
8269 MachineInstrBuilder MIB1
=
8270 BuildMI(MF
, MIMetadata(Root
), TII
->get(AArch64::FNEGv4f32
), NewVR
)
8271 .add(Root
.getOperand(2));
8272 InsInstrs
.push_back(MIB1
);
8273 InstrIdxForVirtReg
.insert(std::make_pair(NewVR
, 0));
8274 if (Pattern
== AArch64MachineCombinerPattern::FMLSv4i32_indexed_OP1
) {
8275 Opc
= AArch64::FMLAv4i32_indexed
;
8276 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
,
8277 FMAInstKind::Indexed
, &NewVR
);
8279 Opc
= AArch64::FMLAv4f32
;
8280 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
,
8281 FMAInstKind::Accumulator
, &NewVR
);
8285 case AArch64MachineCombinerPattern::FMLSv2f64_OP1
:
8286 case AArch64MachineCombinerPattern::FMLSv2i64_indexed_OP1
: {
8287 RC
= &AArch64::FPR128RegClass
;
8288 Register NewVR
= MRI
.createVirtualRegister(RC
);
8289 MachineInstrBuilder MIB1
=
8290 BuildMI(MF
, MIMetadata(Root
), TII
->get(AArch64::FNEGv2f64
), NewVR
)
8291 .add(Root
.getOperand(2));
8292 InsInstrs
.push_back(MIB1
);
8293 InstrIdxForVirtReg
.insert(std::make_pair(NewVR
, 0));
8294 if (Pattern
== AArch64MachineCombinerPattern::FMLSv2i64_indexed_OP1
) {
8295 Opc
= AArch64::FMLAv2i64_indexed
;
8296 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
,
8297 FMAInstKind::Indexed
, &NewVR
);
8299 Opc
= AArch64::FMLAv2f64
;
8300 MUL
= genFusedMultiply(MF
, MRI
, TII
, Root
, InsInstrs
, 1, Opc
, RC
,
8301 FMAInstKind::Accumulator
, &NewVR
);
8305 case AArch64MachineCombinerPattern::FMULv2i32_indexed_OP1
:
8306 case AArch64MachineCombinerPattern::FMULv2i32_indexed_OP2
: {
8308 (Pattern
== AArch64MachineCombinerPattern::FMULv2i32_indexed_OP1
) ? 1
8310 genIndexedMultiply(Root
, InsInstrs
, IdxDupOp
, AArch64::FMULv2i32_indexed
,
8311 &AArch64::FPR128RegClass
, MRI
);
8314 case AArch64MachineCombinerPattern::FMULv2i64_indexed_OP1
:
8315 case AArch64MachineCombinerPattern::FMULv2i64_indexed_OP2
: {
8317 (Pattern
== AArch64MachineCombinerPattern::FMULv2i64_indexed_OP1
) ? 1
8319 genIndexedMultiply(Root
, InsInstrs
, IdxDupOp
, AArch64::FMULv2i64_indexed
,
8320 &AArch64::FPR128RegClass
, MRI
);
8323 case AArch64MachineCombinerPattern::FMULv4i16_indexed_OP1
:
8324 case AArch64MachineCombinerPattern::FMULv4i16_indexed_OP2
: {
8326 (Pattern
== AArch64MachineCombinerPattern::FMULv4i16_indexed_OP1
) ? 1
8328 genIndexedMultiply(Root
, InsInstrs
, IdxDupOp
, AArch64::FMULv4i16_indexed
,
8329 &AArch64::FPR128_loRegClass
, MRI
);
8332 case AArch64MachineCombinerPattern::FMULv4i32_indexed_OP1
:
8333 case AArch64MachineCombinerPattern::FMULv4i32_indexed_OP2
: {
8335 (Pattern
== AArch64MachineCombinerPattern::FMULv4i32_indexed_OP1
) ? 1
8337 genIndexedMultiply(Root
, InsInstrs
, IdxDupOp
, AArch64::FMULv4i32_indexed
,
8338 &AArch64::FPR128RegClass
, MRI
);
8341 case AArch64MachineCombinerPattern::FMULv8i16_indexed_OP1
:
8342 case AArch64MachineCombinerPattern::FMULv8i16_indexed_OP2
: {
8344 (Pattern
== AArch64MachineCombinerPattern::FMULv8i16_indexed_OP1
) ? 1
8346 genIndexedMultiply(Root
, InsInstrs
, IdxDupOp
, AArch64::FMULv8i16_indexed
,
8347 &AArch64::FPR128_loRegClass
, MRI
);
8350 case AArch64MachineCombinerPattern::FNMADD
: {
8351 MUL
= genFNegatedMAD(MF
, MRI
, TII
, Root
, InsInstrs
);
8355 } // end switch (Pattern)
8356 // Record MUL and ADD/SUB for deletion
8358 DelInstrs
.push_back(MUL
);
8359 DelInstrs
.push_back(&Root
);
8361 // Set the flags on the inserted instructions to be the merged flags of the
8362 // instructions that we have combined.
8363 uint32_t Flags
= Root
.getFlags();
8365 Flags
= Root
.mergeFlagsWith(*MUL
);
8366 for (auto *MI
: InsInstrs
)
8367 MI
->setFlags(Flags
);
8370 /// Replace csincr-branch sequence by simple conditional branch
8374 /// csinc w9, wzr, wzr, <condition code>
8375 /// tbnz w9, #0, 0x44
8379 /// b.<inverted condition code>
8383 /// csinc w9, wzr, wzr, <condition code>
8384 /// tbz w9, #0, 0x44
8388 /// b.<condition code>
8391 /// Replace compare and branch sequence by TBZ/TBNZ instruction when the
8392 /// compare's constant operand is power of 2.
8396 /// and w8, w8, #0x400
8401 /// tbnz w8, #10, L1
8404 /// \param MI Conditional Branch
8405 /// \return True when the simple conditional branch is generated
8407 bool AArch64InstrInfo::optimizeCondBranch(MachineInstr
&MI
) const {
8408 bool IsNegativeBranch
= false;
8409 bool IsTestAndBranch
= false;
8410 unsigned TargetBBInMI
= 0;
8411 switch (MI
.getOpcode()) {
8413 llvm_unreachable("Unknown branch instruction?");
8420 case AArch64::CBNZW
:
8421 case AArch64::CBNZX
:
8423 IsNegativeBranch
= true;
8428 IsTestAndBranch
= true;
8430 case AArch64::TBNZW
:
8431 case AArch64::TBNZX
:
8433 IsNegativeBranch
= true;
8434 IsTestAndBranch
= true;
8437 // So we increment a zero register and test for bits other
8438 // than bit 0? Conservatively bail out in case the verifier
8439 // missed this case.
8440 if (IsTestAndBranch
&& MI
.getOperand(1).getImm())
8444 assert(MI
.getParent() && "Incomplete machine instruciton\n");
8445 MachineBasicBlock
*MBB
= MI
.getParent();
8446 MachineFunction
*MF
= MBB
->getParent();
8447 MachineRegisterInfo
*MRI
= &MF
->getRegInfo();
8448 Register VReg
= MI
.getOperand(0).getReg();
8449 if (!VReg
.isVirtual())
8452 MachineInstr
*DefMI
= MRI
->getVRegDef(VReg
);
8454 // Look through COPY instructions to find definition.
8455 while (DefMI
->isCopy()) {
8456 Register CopyVReg
= DefMI
->getOperand(1).getReg();
8457 if (!MRI
->hasOneNonDBGUse(CopyVReg
))
8459 if (!MRI
->hasOneDef(CopyVReg
))
8461 DefMI
= MRI
->getVRegDef(CopyVReg
);
8464 switch (DefMI
->getOpcode()) {
8467 // Fold AND into a TBZ/TBNZ if constant operand is power of 2.
8468 case AArch64::ANDWri
:
8469 case AArch64::ANDXri
: {
8470 if (IsTestAndBranch
)
8472 if (DefMI
->getParent() != MBB
)
8474 if (!MRI
->hasOneNonDBGUse(VReg
))
8477 bool Is32Bit
= (DefMI
->getOpcode() == AArch64::ANDWri
);
8478 uint64_t Mask
= AArch64_AM::decodeLogicalImmediate(
8479 DefMI
->getOperand(2).getImm(), Is32Bit
? 32 : 64);
8480 if (!isPowerOf2_64(Mask
))
8483 MachineOperand
&MO
= DefMI
->getOperand(1);
8484 Register NewReg
= MO
.getReg();
8485 if (!NewReg
.isVirtual())
8488 assert(!MRI
->def_empty(NewReg
) && "Register must be defined.");
8490 MachineBasicBlock
&RefToMBB
= *MBB
;
8491 MachineBasicBlock
*TBB
= MI
.getOperand(1).getMBB();
8492 DebugLoc DL
= MI
.getDebugLoc();
8493 unsigned Imm
= Log2_64(Mask
);
8494 unsigned Opc
= (Imm
< 32)
8495 ? (IsNegativeBranch
? AArch64::TBNZW
: AArch64::TBZW
)
8496 : (IsNegativeBranch
? AArch64::TBNZX
: AArch64::TBZX
);
8497 MachineInstr
*NewMI
= BuildMI(RefToMBB
, MI
, DL
, get(Opc
))
8501 // Register lives on to the CBZ now.
8502 MO
.setIsKill(false);
8504 // For immediate smaller than 32, we need to use the 32-bit
8505 // variant (W) in all cases. Indeed the 64-bit variant does not
8506 // allow to encode them.
8507 // Therefore, if the input register is 64-bit, we need to take the
8509 if (!Is32Bit
&& Imm
< 32)
8510 NewMI
->getOperand(0).setSubReg(AArch64::sub_32
);
8511 MI
.eraseFromParent();
8515 case AArch64::CSINCWr
:
8516 case AArch64::CSINCXr
: {
8517 if (!(DefMI
->getOperand(1).getReg() == AArch64::WZR
&&
8518 DefMI
->getOperand(2).getReg() == AArch64::WZR
) &&
8519 !(DefMI
->getOperand(1).getReg() == AArch64::XZR
&&
8520 DefMI
->getOperand(2).getReg() == AArch64::XZR
))
8523 if (DefMI
->findRegisterDefOperandIdx(AArch64::NZCV
, /*TRI=*/nullptr,
8527 AArch64CC::CondCode CC
= (AArch64CC::CondCode
)DefMI
->getOperand(3).getImm();
8528 // Convert only when the condition code is not modified between
8529 // the CSINC and the branch. The CC may be used by other
8530 // instructions in between.
8531 if (areCFlagsAccessedBetweenInstrs(DefMI
, MI
, &getRegisterInfo(), AK_Write
))
8533 MachineBasicBlock
&RefToMBB
= *MBB
;
8534 MachineBasicBlock
*TBB
= MI
.getOperand(TargetBBInMI
).getMBB();
8535 DebugLoc DL
= MI
.getDebugLoc();
8536 if (IsNegativeBranch
)
8537 CC
= AArch64CC::getInvertedCondCode(CC
);
8538 BuildMI(RefToMBB
, MI
, DL
, get(AArch64::Bcc
)).addImm(CC
).addMBB(TBB
);
8539 MI
.eraseFromParent();
8545 std::pair
<unsigned, unsigned>
8546 AArch64InstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF
) const {
8547 const unsigned Mask
= AArch64II::MO_FRAGMENT
;
8548 return std::make_pair(TF
& Mask
, TF
& ~Mask
);
8551 ArrayRef
<std::pair
<unsigned, const char *>>
8552 AArch64InstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
8553 using namespace AArch64II
;
8555 static const std::pair
<unsigned, const char *> TargetFlags
[] = {
8556 {MO_PAGE
, "aarch64-page"}, {MO_PAGEOFF
, "aarch64-pageoff"},
8557 {MO_G3
, "aarch64-g3"}, {MO_G2
, "aarch64-g2"},
8558 {MO_G1
, "aarch64-g1"}, {MO_G0
, "aarch64-g0"},
8559 {MO_HI12
, "aarch64-hi12"}};
8560 return ArrayRef(TargetFlags
);
8563 ArrayRef
<std::pair
<unsigned, const char *>>
8564 AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
8565 using namespace AArch64II
;
8567 static const std::pair
<unsigned, const char *> TargetFlags
[] = {
8568 {MO_COFFSTUB
, "aarch64-coffstub"},
8569 {MO_GOT
, "aarch64-got"},
8570 {MO_NC
, "aarch64-nc"},
8571 {MO_S
, "aarch64-s"},
8572 {MO_TLS
, "aarch64-tls"},
8573 {MO_DLLIMPORT
, "aarch64-dllimport"},
8574 {MO_PREL
, "aarch64-prel"},
8575 {MO_TAGGED
, "aarch64-tagged"},
8576 {MO_ARM64EC_CALLMANGLE
, "aarch64-arm64ec-callmangle"},
8578 return ArrayRef(TargetFlags
);
8581 ArrayRef
<std::pair
<MachineMemOperand::Flags
, const char *>>
8582 AArch64InstrInfo::getSerializableMachineMemOperandTargetFlags() const {
8583 static const std::pair
<MachineMemOperand::Flags
, const char *> TargetFlags
[] =
8584 {{MOSuppressPair
, "aarch64-suppress-pair"},
8585 {MOStridedAccess
, "aarch64-strided-access"}};
8586 return ArrayRef(TargetFlags
);
8589 /// Constants defining how certain sequences should be outlined.
8590 /// This encompasses how an outlined function should be called, and what kind of
8591 /// frame should be emitted for that outlined function.
8593 /// \p MachineOutlinerDefault implies that the function should be called with
8594 /// a save and restore of LR to the stack.
8598 /// I1 Save LR OUTLINED_FUNCTION:
8599 /// I2 --> BL OUTLINED_FUNCTION I1
8600 /// I3 Restore LR I2
8604 /// * Call construction overhead: 3 (save + BL + restore)
8605 /// * Frame construction overhead: 1 (ret)
8606 /// * Requires stack fixups? Yes
8608 /// \p MachineOutlinerTailCall implies that the function is being created from
8609 /// a sequence of instructions ending in a return.
8613 /// I1 OUTLINED_FUNCTION:
8614 /// I2 --> B OUTLINED_FUNCTION I1
8618 /// * Call construction overhead: 1 (B)
8619 /// * Frame construction overhead: 0 (Return included in sequence)
8620 /// * Requires stack fixups? No
8622 /// \p MachineOutlinerNoLRSave implies that the function should be called using
8623 /// a BL instruction, but doesn't require LR to be saved and restored. This
8624 /// happens when LR is known to be dead.
8628 /// I1 OUTLINED_FUNCTION:
8629 /// I2 --> BL OUTLINED_FUNCTION I1
8634 /// * Call construction overhead: 1 (BL)
8635 /// * Frame construction overhead: 1 (RET)
8636 /// * Requires stack fixups? No
8638 /// \p MachineOutlinerThunk implies that the function is being created from
8639 /// a sequence of instructions ending in a call. The outlined function is
8640 /// called with a BL instruction, and the outlined function tail-calls the
8641 /// original call destination.
8645 /// I1 OUTLINED_FUNCTION:
8646 /// I2 --> BL OUTLINED_FUNCTION I1
8649 /// * Call construction overhead: 1 (BL)
8650 /// * Frame construction overhead: 0
8651 /// * Requires stack fixups? No
8653 /// \p MachineOutlinerRegSave implies that the function should be called with a
8654 /// save and restore of LR to an available register. This allows us to avoid
8655 /// stack fixups. Note that this outlining variant is compatible with the
8660 /// I1 Save LR OUTLINED_FUNCTION:
8661 /// I2 --> BL OUTLINED_FUNCTION I1
8662 /// I3 Restore LR I2
8666 /// * Call construction overhead: 3 (save + BL + restore)
8667 /// * Frame construction overhead: 1 (ret)
8668 /// * Requires stack fixups? No
8669 enum MachineOutlinerClass
{
8670 MachineOutlinerDefault
, /// Emit a save, restore, call, and return.
8671 MachineOutlinerTailCall
, /// Only emit a branch.
8672 MachineOutlinerNoLRSave
, /// Emit a call and return.
8673 MachineOutlinerThunk
, /// Emit a call and tail-call.
8674 MachineOutlinerRegSave
/// Same as default, but save to a register.
8677 enum MachineOutlinerMBBFlags
{
8678 LRUnavailableSomewhere
= 0x2,
8680 UnsafeRegsDead
= 0x8
8684 AArch64InstrInfo::findRegisterToSaveLRTo(outliner::Candidate
&C
) const {
8685 MachineFunction
*MF
= C
.getMF();
8686 const TargetRegisterInfo
&TRI
= *MF
->getSubtarget().getRegisterInfo();
8687 const AArch64RegisterInfo
*ARI
=
8688 static_cast<const AArch64RegisterInfo
*>(&TRI
);
8689 // Check if there is an available register across the sequence that we can
8691 for (unsigned Reg
: AArch64::GPR64RegClass
) {
8692 if (!ARI
->isReservedReg(*MF
, Reg
) &&
8693 Reg
!= AArch64::LR
&& // LR is not reserved, but don't use it.
8694 Reg
!= AArch64::X16
&& // X16 is not guaranteed to be preserved.
8695 Reg
!= AArch64::X17
&& // Ditto for X17.
8696 C
.isAvailableAcrossAndOutOfSeq(Reg
, TRI
) &&
8697 C
.isAvailableInsideSeq(Reg
, TRI
))
8704 outliningCandidatesSigningScopeConsensus(const outliner::Candidate
&a
,
8705 const outliner::Candidate
&b
) {
8706 const auto &MFIa
= a
.getMF()->getInfo
<AArch64FunctionInfo
>();
8707 const auto &MFIb
= b
.getMF()->getInfo
<AArch64FunctionInfo
>();
8709 return MFIa
->shouldSignReturnAddress(false) == MFIb
->shouldSignReturnAddress(false) &&
8710 MFIa
->shouldSignReturnAddress(true) == MFIb
->shouldSignReturnAddress(true);
8714 outliningCandidatesSigningKeyConsensus(const outliner::Candidate
&a
,
8715 const outliner::Candidate
&b
) {
8716 const auto &MFIa
= a
.getMF()->getInfo
<AArch64FunctionInfo
>();
8717 const auto &MFIb
= b
.getMF()->getInfo
<AArch64FunctionInfo
>();
8719 return MFIa
->shouldSignWithBKey() == MFIb
->shouldSignWithBKey();
8722 static bool outliningCandidatesV8_3OpsConsensus(const outliner::Candidate
&a
,
8723 const outliner::Candidate
&b
) {
8724 const AArch64Subtarget
&SubtargetA
=
8725 a
.getMF()->getSubtarget
<AArch64Subtarget
>();
8726 const AArch64Subtarget
&SubtargetB
=
8727 b
.getMF()->getSubtarget
<AArch64Subtarget
>();
8728 return SubtargetA
.hasV8_3aOps() == SubtargetB
.hasV8_3aOps();
8731 std::optional
<std::unique_ptr
<outliner::OutlinedFunction
>>
8732 AArch64InstrInfo::getOutliningCandidateInfo(
8733 const MachineModuleInfo
&MMI
,
8734 std::vector
<outliner::Candidate
> &RepeatedSequenceLocs
,
8735 unsigned MinRepeats
) const {
8736 unsigned SequenceSize
= 0;
8737 for (auto &MI
: RepeatedSequenceLocs
[0])
8738 SequenceSize
+= getInstSizeInBytes(MI
);
8740 unsigned NumBytesToCreateFrame
= 0;
8742 // We only allow outlining for functions having exactly matching return
8743 // address signing attributes, i.e., all share the same value for the
8744 // attribute "sign-return-address" and all share the same type of key they
8746 // Additionally we require all functions to simultaniously either support
8747 // v8.3a features or not. Otherwise an outlined function could get signed
8748 // using dedicated v8.3 instructions and a call from a function that doesn't
8749 // support v8.3 instructions would therefore be invalid.
8750 if (std::adjacent_find(
8751 RepeatedSequenceLocs
.begin(), RepeatedSequenceLocs
.end(),
8752 [](const outliner::Candidate
&a
, const outliner::Candidate
&b
) {
8753 // Return true if a and b are non-equal w.r.t. return address
8754 // signing or support of v8.3a features
8755 if (outliningCandidatesSigningScopeConsensus(a
, b
) &&
8756 outliningCandidatesSigningKeyConsensus(a
, b
) &&
8757 outliningCandidatesV8_3OpsConsensus(a
, b
)) {
8761 }) != RepeatedSequenceLocs
.end()) {
8762 return std::nullopt
;
8765 // Since at this point all candidates agree on their return address signing
8766 // picking just one is fine. If the candidate functions potentially sign their
8767 // return addresses, the outlined function should do the same. Note that in
8768 // the case of "sign-return-address"="non-leaf" this is an assumption: It is
8769 // not certainly true that the outlined function will have to sign its return
8770 // address but this decision is made later, when the decision to outline
8771 // has already been made.
8772 // The same holds for the number of additional instructions we need: On
8773 // v8.3a RET can be replaced by RETAA/RETAB and no AUT instruction is
8774 // necessary. However, at this point we don't know if the outlined function
8775 // will have a RET instruction so we assume the worst.
8776 const TargetRegisterInfo
&TRI
= getRegisterInfo();
8777 // Performing a tail call may require extra checks when PAuth is enabled.
8778 // If PAuth is disabled, set it to zero for uniformity.
8779 unsigned NumBytesToCheckLRInTCEpilogue
= 0;
8780 if (RepeatedSequenceLocs
[0]
8782 ->getInfo
<AArch64FunctionInfo
>()
8783 ->shouldSignReturnAddress(true)) {
8784 // One PAC and one AUT instructions
8785 NumBytesToCreateFrame
+= 8;
8787 // PAuth is enabled - set extra tail call cost, if any.
8788 auto LRCheckMethod
= Subtarget
.getAuthenticatedLRCheckMethod(
8789 *RepeatedSequenceLocs
[0].getMF());
8790 NumBytesToCheckLRInTCEpilogue
=
8791 AArch64PAuth::getCheckerSizeInBytes(LRCheckMethod
);
8792 // Checking the authenticated LR value may significantly impact
8793 // SequenceSize, so account for it for more precise results.
8794 if (isTailCallReturnInst(RepeatedSequenceLocs
[0].back()))
8795 SequenceSize
+= NumBytesToCheckLRInTCEpilogue
;
8797 // We have to check if sp modifying instructions would get outlined.
8798 // If so we only allow outlining if sp is unchanged overall, so matching
8799 // sub and add instructions are okay to outline, all other sp modifications
8801 auto hasIllegalSPModification
= [&TRI
](outliner::Candidate
&C
) {
8803 for (auto &MI
: C
) {
8804 if (MI
.modifiesRegister(AArch64::SP
, &TRI
)) {
8805 switch (MI
.getOpcode()) {
8806 case AArch64::ADDXri
:
8807 case AArch64::ADDWri
:
8808 assert(MI
.getNumOperands() == 4 && "Wrong number of operands");
8809 assert(MI
.getOperand(2).isImm() &&
8810 "Expected operand to be immediate");
8811 assert(MI
.getOperand(1).isReg() &&
8812 "Expected operand to be a register");
8813 // Check if the add just increments sp. If so, we search for
8814 // matching sub instructions that decrement sp. If not, the
8815 // modification is illegal
8816 if (MI
.getOperand(1).getReg() == AArch64::SP
)
8817 SPValue
+= MI
.getOperand(2).getImm();
8821 case AArch64::SUBXri
:
8822 case AArch64::SUBWri
:
8823 assert(MI
.getNumOperands() == 4 && "Wrong number of operands");
8824 assert(MI
.getOperand(2).isImm() &&
8825 "Expected operand to be immediate");
8826 assert(MI
.getOperand(1).isReg() &&
8827 "Expected operand to be a register");
8828 // Check if the sub just decrements sp. If so, we search for
8829 // matching add instructions that increment sp. If not, the
8830 // modification is illegal
8831 if (MI
.getOperand(1).getReg() == AArch64::SP
)
8832 SPValue
-= MI
.getOperand(2).getImm();
8845 // Remove candidates with illegal stack modifying instructions
8846 llvm::erase_if(RepeatedSequenceLocs
, hasIllegalSPModification
);
8848 // If the sequence doesn't have enough candidates left, then we're done.
8849 if (RepeatedSequenceLocs
.size() < MinRepeats
)
8850 return std::nullopt
;
8853 // Properties about candidate MBBs that hold for all of them.
8854 unsigned FlagsSetInAll
= 0xF;
8856 // Compute liveness information for each candidate, and set FlagsSetInAll.
8857 for (outliner::Candidate
&C
: RepeatedSequenceLocs
)
8858 FlagsSetInAll
&= C
.Flags
;
8860 unsigned LastInstrOpcode
= RepeatedSequenceLocs
[0].back().getOpcode();
8862 // Helper lambda which sets call information for every candidate.
8863 auto SetCandidateCallInfo
=
8864 [&RepeatedSequenceLocs
](unsigned CallID
, unsigned NumBytesForCall
) {
8865 for (outliner::Candidate
&C
: RepeatedSequenceLocs
)
8866 C
.setCallInfo(CallID
, NumBytesForCall
);
8869 unsigned FrameID
= MachineOutlinerDefault
;
8870 NumBytesToCreateFrame
+= 4;
8872 bool HasBTI
= any_of(RepeatedSequenceLocs
, [](outliner::Candidate
&C
) {
8873 return C
.getMF()->getInfo
<AArch64FunctionInfo
>()->branchTargetEnforcement();
8876 // We check to see if CFI Instructions are present, and if they are
8877 // we find the number of CFI Instructions in the candidates.
8878 unsigned CFICount
= 0;
8879 for (auto &I
: RepeatedSequenceLocs
[0]) {
8880 if (I
.isCFIInstruction())
8884 // We compare the number of found CFI Instructions to the number of CFI
8885 // instructions in the parent function for each candidate. We must check this
8886 // since if we outline one of the CFI instructions in a function, we have to
8887 // outline them all for correctness. If we do not, the address offsets will be
8888 // incorrect between the two sections of the program.
8889 for (outliner::Candidate
&C
: RepeatedSequenceLocs
) {
8890 std::vector
<MCCFIInstruction
> CFIInstructions
=
8891 C
.getMF()->getFrameInstructions();
8893 if (CFICount
> 0 && CFICount
!= CFIInstructions
.size())
8894 return std::nullopt
;
8897 // Returns true if an instructions is safe to fix up, false otherwise.
8898 auto IsSafeToFixup
= [this, &TRI
](MachineInstr
&MI
) {
8902 if (!MI
.modifiesRegister(AArch64::SP
, &TRI
) &&
8903 !MI
.readsRegister(AArch64::SP
, &TRI
))
8906 // Any modification of SP will break our code to save/restore LR.
8907 // FIXME: We could handle some instructions which add a constant
8908 // offset to SP, with a bit more work.
8909 if (MI
.modifiesRegister(AArch64::SP
, &TRI
))
8912 // At this point, we have a stack instruction that we might need to
8913 // fix up. We'll handle it if it's a load or store.
8914 if (MI
.mayLoadOrStore()) {
8915 const MachineOperand
*Base
; // Filled with the base operand of MI.
8916 int64_t Offset
; // Filled with the offset of MI.
8917 bool OffsetIsScalable
;
8919 // Does it allow us to offset the base operand and is the base the
8921 if (!getMemOperandWithOffset(MI
, Base
, Offset
, OffsetIsScalable
, &TRI
) ||
8922 !Base
->isReg() || Base
->getReg() != AArch64::SP
)
8925 // Fixe-up code below assumes bytes.
8926 if (OffsetIsScalable
)
8929 // Find the minimum/maximum offset for this instruction and check
8930 // if fixing it up would be in range.
8932 MaxOffset
; // Unscaled offsets for the instruction.
8933 // The scale to multiply the offsets by.
8934 TypeSize
Scale(0U, false), DummyWidth(0U, false);
8935 getMemOpInfo(MI
.getOpcode(), Scale
, DummyWidth
, MinOffset
, MaxOffset
);
8937 Offset
+= 16; // Update the offset to what it would be if we outlined.
8938 if (Offset
< MinOffset
* (int64_t)Scale
.getFixedValue() ||
8939 Offset
> MaxOffset
* (int64_t)Scale
.getFixedValue())
8942 // It's in range, so we can outline it.
8946 // FIXME: Add handling for instructions like "add x0, sp, #8".
8948 // We can't fix it up, so don't outline it.
8952 // True if it's possible to fix up each stack instruction in this sequence.
8953 // Important for frames/call variants that modify the stack.
8954 bool AllStackInstrsSafe
=
8955 llvm::all_of(RepeatedSequenceLocs
[0], IsSafeToFixup
);
8957 // If the last instruction in any candidate is a terminator, then we should
8958 // tail call all of the candidates.
8959 if (RepeatedSequenceLocs
[0].back().isTerminator()) {
8960 FrameID
= MachineOutlinerTailCall
;
8961 NumBytesToCreateFrame
= 0;
8962 unsigned NumBytesForCall
= 4 + NumBytesToCheckLRInTCEpilogue
;
8963 SetCandidateCallInfo(MachineOutlinerTailCall
, NumBytesForCall
);
8966 else if (LastInstrOpcode
== AArch64::BL
||
8967 ((LastInstrOpcode
== AArch64::BLR
||
8968 LastInstrOpcode
== AArch64::BLRNoIP
) &&
8970 // FIXME: Do we need to check if the code after this uses the value of LR?
8971 FrameID
= MachineOutlinerThunk
;
8972 NumBytesToCreateFrame
= NumBytesToCheckLRInTCEpilogue
;
8973 SetCandidateCallInfo(MachineOutlinerThunk
, 4);
8977 // We need to decide how to emit calls + frames. We can always emit the same
8978 // frame if we don't need to save to the stack. If we have to save to the
8979 // stack, then we need a different frame.
8980 unsigned NumBytesNoStackCalls
= 0;
8981 std::vector
<outliner::Candidate
> CandidatesWithoutStackFixups
;
8983 // Check if we have to save LR.
8984 for (outliner::Candidate
&C
: RepeatedSequenceLocs
) {
8986 (C
.Flags
& MachineOutlinerMBBFlags::LRUnavailableSomewhere
)
8987 ? C
.isAvailableAcrossAndOutOfSeq(AArch64::LR
, TRI
)
8989 // If we have a noreturn caller, then we're going to be conservative and
8990 // say that we have to save LR. If we don't have a ret at the end of the
8991 // block, then we can't reason about liveness accurately.
8993 // FIXME: We can probably do better than always disabling this in
8994 // noreturn functions by fixing up the liveness info.
8996 C
.getMF()->getFunction().hasFnAttribute(Attribute::NoReturn
);
8998 // Is LR available? If so, we don't need a save.
8999 if (LRAvailable
&& !IsNoReturn
) {
9000 NumBytesNoStackCalls
+= 4;
9001 C
.setCallInfo(MachineOutlinerNoLRSave
, 4);
9002 CandidatesWithoutStackFixups
.push_back(C
);
9005 // Is an unused register available? If so, we won't modify the stack, so
9006 // we can outline with the same frame type as those that don't save LR.
9007 else if (findRegisterToSaveLRTo(C
)) {
9008 NumBytesNoStackCalls
+= 12;
9009 C
.setCallInfo(MachineOutlinerRegSave
, 12);
9010 CandidatesWithoutStackFixups
.push_back(C
);
9013 // Is SP used in the sequence at all? If not, we don't have to modify
9014 // the stack, so we are guaranteed to get the same frame.
9015 else if (C
.isAvailableInsideSeq(AArch64::SP
, TRI
)) {
9016 NumBytesNoStackCalls
+= 12;
9017 C
.setCallInfo(MachineOutlinerDefault
, 12);
9018 CandidatesWithoutStackFixups
.push_back(C
);
9021 // If we outline this, we need to modify the stack. Pretend we don't
9022 // outline this by saving all of its bytes.
9024 NumBytesNoStackCalls
+= SequenceSize
;
9028 // If there are no places where we have to save LR, then note that we
9029 // don't have to update the stack. Otherwise, give every candidate the
9030 // default call type, as long as it's safe to do so.
9031 if (!AllStackInstrsSafe
||
9032 NumBytesNoStackCalls
<= RepeatedSequenceLocs
.size() * 12) {
9033 RepeatedSequenceLocs
= CandidatesWithoutStackFixups
;
9034 FrameID
= MachineOutlinerNoLRSave
;
9035 if (RepeatedSequenceLocs
.size() < MinRepeats
)
9036 return std::nullopt
;
9038 SetCandidateCallInfo(MachineOutlinerDefault
, 12);
9040 // Bugzilla ID: 46767
9041 // TODO: Check if fixing up the stack more than once is safe so we can
9044 // An outline resulting in a caller that requires stack fixups at the
9045 // callsite to a callee that also requires stack fixups can happen when
9046 // there are no available registers at the candidate callsite for a
9047 // candidate that itself also has calls.
9049 // In other words if function_containing_sequence in the following pseudo
9050 // assembly requires that we save LR at the point of the call, but there
9051 // are no available registers: in this case we save using SP and as a
9052 // result the SP offsets requires stack fixups by multiples of 16.
9054 // function_containing_sequence:
9056 // save LR to SP <- Requires stack instr fixups in OUTLINED_FUNCTION_N
9057 // call OUTLINED_FUNCTION_N
9058 // restore LR from SP
9061 // OUTLINED_FUNCTION_N:
9062 // save LR to SP <- Requires stack instr fixups in OUTLINED_FUNCTION_N
9065 // restore LR from SP
9068 // Because the code to handle more than one stack fixup does not
9069 // currently have the proper checks for legality, these cases will assert
9070 // in the AArch64 MachineOutliner. This is because the code to do this
9071 // needs more hardening, testing, better checks that generated code is
9072 // legal, etc and because it is only verified to handle a single pass of
9075 // The assert happens in AArch64InstrInfo::buildOutlinedFrame to catch
9076 // these cases until they are known to be handled. Bugzilla 46767 is
9077 // referenced in comments at the assert site.
9079 // To avoid asserting (or generating non-legal code on noassert builds)
9080 // we remove all candidates which would need more than one stack fixup by
9081 // pruning the cases where the candidate has calls while also having no
9082 // available LR and having no available general purpose registers to copy
9083 // LR to (ie one extra stack save/restore).
9085 if (FlagsSetInAll
& MachineOutlinerMBBFlags::HasCalls
) {
9086 erase_if(RepeatedSequenceLocs
, [this, &TRI
](outliner::Candidate
&C
) {
9087 auto IsCall
= [](const MachineInstr
&MI
) { return MI
.isCall(); };
9088 return (llvm::any_of(C
, IsCall
)) &&
9089 (!C
.isAvailableAcrossAndOutOfSeq(AArch64::LR
, TRI
) ||
9090 !findRegisterToSaveLRTo(C
));
9095 // If we dropped all of the candidates, bail out here.
9096 if (RepeatedSequenceLocs
.size() < MinRepeats
)
9097 return std::nullopt
;
9100 // Does every candidate's MBB contain a call? If so, then we might have a call
9102 if (FlagsSetInAll
& MachineOutlinerMBBFlags::HasCalls
) {
9103 // Check if the range contains a call. These require a save + restore of the
9105 outliner::Candidate
&FirstCand
= RepeatedSequenceLocs
[0];
9106 bool ModStackToSaveLR
= false;
9107 if (any_of(drop_end(FirstCand
),
9108 [](const MachineInstr
&MI
) { return MI
.isCall(); }))
9109 ModStackToSaveLR
= true;
9111 // Handle the last instruction separately. If this is a tail call, then the
9112 // last instruction is a call. We don't want to save + restore in this case.
9113 // However, it could be possible that the last instruction is a call without
9114 // it being valid to tail call this sequence. We should consider this as
9116 else if (FrameID
!= MachineOutlinerThunk
&&
9117 FrameID
!= MachineOutlinerTailCall
&& FirstCand
.back().isCall())
9118 ModStackToSaveLR
= true;
9120 if (ModStackToSaveLR
) {
9121 // We can't fix up the stack. Bail out.
9122 if (!AllStackInstrsSafe
)
9123 return std::nullopt
;
9125 // Save + restore LR.
9126 NumBytesToCreateFrame
+= 8;
9130 // If we have CFI instructions, we can only outline if the outlined section
9131 // can be a tail call
9132 if (FrameID
!= MachineOutlinerTailCall
&& CFICount
> 0)
9133 return std::nullopt
;
9135 return std::make_unique
<outliner::OutlinedFunction
>(
9136 RepeatedSequenceLocs
, SequenceSize
, NumBytesToCreateFrame
, FrameID
);
9139 void AArch64InstrInfo::mergeOutliningCandidateAttributes(
9140 Function
&F
, std::vector
<outliner::Candidate
> &Candidates
) const {
9141 // If a bunch of candidates reach this point they must agree on their return
9142 // address signing. It is therefore enough to just consider the signing
9143 // behaviour of one of them
9144 const auto &CFn
= Candidates
.front().getMF()->getFunction();
9146 if (CFn
.hasFnAttribute("ptrauth-returns"))
9147 F
.addFnAttr(CFn
.getFnAttribute("ptrauth-returns"));
9148 if (CFn
.hasFnAttribute("ptrauth-auth-traps"))
9149 F
.addFnAttr(CFn
.getFnAttribute("ptrauth-auth-traps"));
9150 // Since all candidates belong to the same module, just copy the
9151 // function-level attributes of an arbitrary function.
9152 if (CFn
.hasFnAttribute("sign-return-address"))
9153 F
.addFnAttr(CFn
.getFnAttribute("sign-return-address"));
9154 if (CFn
.hasFnAttribute("sign-return-address-key"))
9155 F
.addFnAttr(CFn
.getFnAttribute("sign-return-address-key"));
9157 AArch64GenInstrInfo::mergeOutliningCandidateAttributes(F
, Candidates
);
9160 bool AArch64InstrInfo::isFunctionSafeToOutlineFrom(
9161 MachineFunction
&MF
, bool OutlineFromLinkOnceODRs
) const {
9162 const Function
&F
= MF
.getFunction();
9164 // Can F be deduplicated by the linker? If it can, don't outline from it.
9165 if (!OutlineFromLinkOnceODRs
&& F
.hasLinkOnceODRLinkage())
9168 // Don't outline from functions with section markings; the program could
9169 // expect that all the code is in the named section.
9170 // FIXME: Allow outlining from multiple functions with the same section
9175 // Outlining from functions with redzones is unsafe since the outliner may
9176 // modify the stack. Check if hasRedZone is true or unknown; if yes, don't
9178 AArch64FunctionInfo
*AFI
= MF
.getInfo
<AArch64FunctionInfo
>();
9179 if (!AFI
|| AFI
->hasRedZone().value_or(true))
9182 // FIXME: Determine whether it is safe to outline from functions which contain
9183 // streaming-mode changes. We may need to ensure any smstart/smstop pairs are
9184 // outlined together and ensure it is safe to outline with async unwind info,
9185 // required for saving & restoring VG around calls.
9186 if (AFI
->hasStreamingModeChanges())
9189 // FIXME: Teach the outliner to generate/handle Windows unwind info.
9190 if (MF
.getTarget().getMCAsmInfo()->usesWindowsCFI())
9193 // It's safe to outline from MF.
9197 SmallVector
<std::pair
<MachineBasicBlock::iterator
, MachineBasicBlock::iterator
>>
9198 AArch64InstrInfo::getOutlinableRanges(MachineBasicBlock
&MBB
,
9199 unsigned &Flags
) const {
9200 assert(MBB
.getParent()->getRegInfo().tracksLiveness() &&
9201 "Must track liveness!");
9203 std::pair
<MachineBasicBlock::iterator
, MachineBasicBlock::iterator
>>
9205 // According to the AArch64 Procedure Call Standard, the following are
9206 // undefined on entry/exit from a function call:
9208 // * Registers x16, x17, (and thus w16, w17)
9209 // * Condition codes (and thus the NZCV register)
9211 // If any of these registers are used inside or live across an outlined
9212 // function, then they may be modified later, either by the compiler or
9213 // some other tool (like the linker).
9215 // To avoid outlining in these situations, partition each block into ranges
9216 // where these registers are dead. We will only outline from those ranges.
9217 LiveRegUnits
LRU(getRegisterInfo());
9218 auto AreAllUnsafeRegsDead
= [&LRU
]() {
9219 return LRU
.available(AArch64::W16
) && LRU
.available(AArch64::W17
) &&
9220 LRU
.available(AArch64::NZCV
);
9223 // We need to know if LR is live across an outlining boundary later on in
9224 // order to decide how we'll create the outlined call, frame, etc.
9226 // It's pretty expensive to check this for *every candidate* within a block.
9227 // That's some potentially n^2 behaviour, since in the worst case, we'd need
9228 // to compute liveness from the end of the block for O(n) candidates within
9231 // So, to improve the average case, let's keep track of liveness from the end
9232 // of the block to the beginning of *every outlinable range*. If we know that
9233 // LR is available in every range we could outline from, then we know that
9234 // we don't need to check liveness for any candidate within that range.
9235 bool LRAvailableEverywhere
= true;
9236 // Compute liveness bottom-up.
9237 LRU
.addLiveOuts(MBB
);
9238 // Update flags that require info about the entire MBB.
9239 auto UpdateWholeMBBFlags
= [&Flags
](const MachineInstr
&MI
) {
9240 if (MI
.isCall() && !MI
.isTerminator())
9241 Flags
|= MachineOutlinerMBBFlags::HasCalls
;
9243 // Range: [RangeBegin, RangeEnd)
9244 MachineBasicBlock::instr_iterator RangeBegin
, RangeEnd
;
9246 auto CreateNewRangeStartingAt
=
9247 [&RangeBegin
, &RangeEnd
,
9248 &RangeLen
](MachineBasicBlock::instr_iterator NewBegin
) {
9249 RangeBegin
= NewBegin
;
9250 RangeEnd
= std::next(RangeBegin
);
9253 auto SaveRangeIfNonEmpty
= [&RangeLen
, &Ranges
, &RangeBegin
, &RangeEnd
]() {
9254 // At least one unsafe register is not dead. We do not want to outline at
9255 // this point. If it is long enough to outline from, save the range
9256 // [RangeBegin, RangeEnd).
9258 Ranges
.push_back(std::make_pair(RangeBegin
, RangeEnd
));
9260 // Find the first point where all unsafe registers are dead.
9261 // FIND: <safe instr> <-- end of first potential range
9262 // SKIP: <unsafe def>
9263 // SKIP: ... everything between ...
9264 // SKIP: <unsafe use>
9265 auto FirstPossibleEndPt
= MBB
.instr_rbegin();
9266 for (; FirstPossibleEndPt
!= MBB
.instr_rend(); ++FirstPossibleEndPt
) {
9267 LRU
.stepBackward(*FirstPossibleEndPt
);
9268 // Update flags that impact how we outline across the entire block,
9269 // regardless of safety.
9270 UpdateWholeMBBFlags(*FirstPossibleEndPt
);
9271 if (AreAllUnsafeRegsDead())
9274 // If we exhausted the entire block, we have no safe ranges to outline.
9275 if (FirstPossibleEndPt
== MBB
.instr_rend())
9278 CreateNewRangeStartingAt(FirstPossibleEndPt
->getIterator());
9279 // StartPt points to the first place where all unsafe registers
9280 // are dead (if there is any such point). Begin partitioning the MBB into
9282 for (auto &MI
: make_range(FirstPossibleEndPt
, MBB
.instr_rend())) {
9283 LRU
.stepBackward(MI
);
9284 UpdateWholeMBBFlags(MI
);
9285 if (!AreAllUnsafeRegsDead()) {
9286 SaveRangeIfNonEmpty();
9287 CreateNewRangeStartingAt(MI
.getIterator());
9290 LRAvailableEverywhere
&= LRU
.available(AArch64::LR
);
9291 RangeBegin
= MI
.getIterator();
9294 // Above loop misses the last (or only) range. If we are still safe, then
9295 // let's save the range.
9296 if (AreAllUnsafeRegsDead())
9297 SaveRangeIfNonEmpty();
9300 // We found the ranges bottom-up. Mapping expects the top-down. Reverse
9302 std::reverse(Ranges
.begin(), Ranges
.end());
9303 // If there is at least one outlinable range where LR is unavailable
9304 // somewhere, remember that.
9305 if (!LRAvailableEverywhere
)
9306 Flags
|= MachineOutlinerMBBFlags::LRUnavailableSomewhere
;
9311 AArch64InstrInfo::getOutliningTypeImpl(const MachineModuleInfo
&MMI
,
9312 MachineBasicBlock::iterator
&MIT
,
9313 unsigned Flags
) const {
9314 MachineInstr
&MI
= *MIT
;
9315 MachineBasicBlock
*MBB
= MI
.getParent();
9316 MachineFunction
*MF
= MBB
->getParent();
9317 AArch64FunctionInfo
*FuncInfo
= MF
->getInfo
<AArch64FunctionInfo
>();
9319 // Don't outline anything used for return address signing. The outlined
9320 // function will get signed later if needed
9321 switch (MI
.getOpcode()) {
9323 case AArch64::PACIASP
:
9324 case AArch64::PACIBSP
:
9325 case AArch64::PACIASPPC
:
9326 case AArch64::PACIBSPPC
:
9327 case AArch64::AUTIASP
:
9328 case AArch64::AUTIBSP
:
9329 case AArch64::AUTIASPPCi
:
9330 case AArch64::AUTIASPPCr
:
9331 case AArch64::AUTIBSPPCi
:
9332 case AArch64::AUTIBSPPCr
:
9333 case AArch64::RETAA
:
9334 case AArch64::RETAB
:
9335 case AArch64::RETAASPPCi
:
9336 case AArch64::RETAASPPCr
:
9337 case AArch64::RETABSPPCi
:
9338 case AArch64::RETABSPPCr
:
9339 case AArch64::EMITBKEY
:
9340 case AArch64::PAUTH_PROLOGUE
:
9341 case AArch64::PAUTH_EPILOGUE
:
9342 return outliner::InstrType::Illegal
;
9345 // Don't outline LOHs.
9346 if (FuncInfo
->getLOHRelated().count(&MI
))
9347 return outliner::InstrType::Illegal
;
9349 // We can only outline these if we will tail call the outlined function, or
9350 // fix up the CFI offsets. Currently, CFI instructions are outlined only if
9353 // FIXME: If the proper fixups for the offset are implemented, this should be
9355 if (MI
.isCFIInstruction())
9356 return outliner::InstrType::Legal
;
9358 // Is this a terminator for a basic block?
9359 if (MI
.isTerminator())
9360 // TargetInstrInfo::getOutliningType has already filtered out anything
9361 // that would break this, so we can allow it here.
9362 return outliner::InstrType::Legal
;
9364 // Make sure none of the operands are un-outlinable.
9365 for (const MachineOperand
&MOP
: MI
.operands()) {
9366 // A check preventing CFI indices was here before, but only CFI
9367 // instructions should have those.
9368 assert(!MOP
.isCFIIndex());
9370 // If it uses LR or W30 explicitly, then don't touch it.
9371 if (MOP
.isReg() && !MOP
.isImplicit() &&
9372 (MOP
.getReg() == AArch64::LR
|| MOP
.getReg() == AArch64::W30
))
9373 return outliner::InstrType::Illegal
;
9376 // Special cases for instructions that can always be outlined, but will fail
9377 // the later tests. e.g, ADRPs, which are PC-relative use LR, but can always
9378 // be outlined because they don't require a *specific* value to be in LR.
9379 if (MI
.getOpcode() == AArch64::ADRP
)
9380 return outliner::InstrType::Legal
;
9382 // If MI is a call we might be able to outline it. We don't want to outline
9383 // any calls that rely on the position of items on the stack. When we outline
9384 // something containing a call, we have to emit a save and restore of LR in
9385 // the outlined function. Currently, this always happens by saving LR to the
9386 // stack. Thus, if we outline, say, half the parameters for a function call
9387 // plus the call, then we'll break the callee's expectations for the layout
9390 // FIXME: Allow calls to functions which construct a stack frame, as long
9391 // as they don't access arguments on the stack.
9392 // FIXME: Figure out some way to analyze functions defined in other modules.
9393 // We should be able to compute the memory usage based on the IR calling
9394 // convention, even if we can't see the definition.
9396 // Get the function associated with the call. Look at each operand and find
9397 // the one that represents the callee and get its name.
9398 const Function
*Callee
= nullptr;
9399 for (const MachineOperand
&MOP
: MI
.operands()) {
9400 if (MOP
.isGlobal()) {
9401 Callee
= dyn_cast
<Function
>(MOP
.getGlobal());
9406 // Never outline calls to mcount. There isn't any rule that would require
9407 // this, but the Linux kernel's "ftrace" feature depends on it.
9408 if (Callee
&& Callee
->getName() == "\01_mcount")
9409 return outliner::InstrType::Illegal
;
9411 // If we don't know anything about the callee, assume it depends on the
9412 // stack layout of the caller. In that case, it's only legal to outline
9413 // as a tail-call. Explicitly list the call instructions we know about so we
9414 // don't get unexpected results with call pseudo-instructions.
9415 auto UnknownCallOutlineType
= outliner::InstrType::Illegal
;
9416 if (MI
.getOpcode() == AArch64::BLR
||
9417 MI
.getOpcode() == AArch64::BLRNoIP
|| MI
.getOpcode() == AArch64::BL
)
9418 UnknownCallOutlineType
= outliner::InstrType::LegalTerminator
;
9421 return UnknownCallOutlineType
;
9423 // We have a function we have information about. Check it if it's something
9424 // can safely outline.
9425 MachineFunction
*CalleeMF
= MMI
.getMachineFunction(*Callee
);
9427 // We don't know what's going on with the callee at all. Don't touch it.
9429 return UnknownCallOutlineType
;
9431 // Check if we know anything about the callee saves on the function. If we
9432 // don't, then don't touch it, since that implies that we haven't
9433 // computed anything about its stack frame yet.
9434 MachineFrameInfo
&MFI
= CalleeMF
->getFrameInfo();
9435 if (!MFI
.isCalleeSavedInfoValid() || MFI
.getStackSize() > 0 ||
9436 MFI
.getNumObjects() > 0)
9437 return UnknownCallOutlineType
;
9439 // At this point, we can say that CalleeMF ought to not pass anything on the
9440 // stack. Therefore, we can outline it.
9441 return outliner::InstrType::Legal
;
9444 // Don't touch the link register or W30.
9445 if (MI
.readsRegister(AArch64::W30
, &getRegisterInfo()) ||
9446 MI
.modifiesRegister(AArch64::W30
, &getRegisterInfo()))
9447 return outliner::InstrType::Illegal
;
9449 // Don't outline BTI instructions, because that will prevent the outlining
9450 // site from being indirectly callable.
9451 if (hasBTISemantics(MI
))
9452 return outliner::InstrType::Illegal
;
9454 return outliner::InstrType::Legal
;
9457 void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock
&MBB
) const {
9458 for (MachineInstr
&MI
: MBB
) {
9459 const MachineOperand
*Base
;
9460 TypeSize
Width(0, false);
9462 bool OffsetIsScalable
;
9464 // Is this a load or store with an immediate offset with SP as the base?
9465 if (!MI
.mayLoadOrStore() ||
9466 !getMemOperandWithOffsetWidth(MI
, Base
, Offset
, OffsetIsScalable
, Width
,
9468 (Base
->isReg() && Base
->getReg() != AArch64::SP
))
9471 // It is, so we have to fix it up.
9472 TypeSize
Scale(0U, false);
9473 int64_t Dummy1
, Dummy2
;
9475 MachineOperand
&StackOffsetOperand
= getMemOpBaseRegImmOfsOffsetOperand(MI
);
9476 assert(StackOffsetOperand
.isImm() && "Stack offset wasn't immediate!");
9477 getMemOpInfo(MI
.getOpcode(), Scale
, Width
, Dummy1
, Dummy2
);
9478 assert(Scale
!= 0 && "Unexpected opcode!");
9479 assert(!OffsetIsScalable
&& "Expected offset to be a byte offset");
9481 // We've pushed the return address to the stack, so add 16 to the offset.
9482 // This is safe, since we already checked if it would overflow when we
9483 // checked if this instruction was legal to outline.
9484 int64_t NewImm
= (Offset
+ 16) / (int64_t)Scale
.getFixedValue();
9485 StackOffsetOperand
.setImm(NewImm
);
9489 static void signOutlinedFunction(MachineFunction
&MF
, MachineBasicBlock
&MBB
,
9490 const AArch64InstrInfo
*TII
,
9491 bool ShouldSignReturnAddr
) {
9492 if (!ShouldSignReturnAddr
)
9495 BuildMI(MBB
, MBB
.begin(), DebugLoc(), TII
->get(AArch64::PAUTH_PROLOGUE
))
9496 .setMIFlag(MachineInstr::FrameSetup
);
9497 BuildMI(MBB
, MBB
.getFirstInstrTerminator(), DebugLoc(),
9498 TII
->get(AArch64::PAUTH_EPILOGUE
))
9499 .setMIFlag(MachineInstr::FrameDestroy
);
9502 void AArch64InstrInfo::buildOutlinedFrame(
9503 MachineBasicBlock
&MBB
, MachineFunction
&MF
,
9504 const outliner::OutlinedFunction
&OF
) const {
9506 AArch64FunctionInfo
*FI
= MF
.getInfo
<AArch64FunctionInfo
>();
9508 if (OF
.FrameConstructionID
== MachineOutlinerTailCall
)
9509 FI
->setOutliningStyle("Tail Call");
9510 else if (OF
.FrameConstructionID
== MachineOutlinerThunk
) {
9511 // For thunk outlining, rewrite the last instruction from a call to a
9513 MachineInstr
*Call
= &*--MBB
.instr_end();
9514 unsigned TailOpcode
;
9515 if (Call
->getOpcode() == AArch64::BL
) {
9516 TailOpcode
= AArch64::TCRETURNdi
;
9518 assert(Call
->getOpcode() == AArch64::BLR
||
9519 Call
->getOpcode() == AArch64::BLRNoIP
);
9520 TailOpcode
= AArch64::TCRETURNriALL
;
9522 MachineInstr
*TC
= BuildMI(MF
, DebugLoc(), get(TailOpcode
))
9523 .add(Call
->getOperand(0))
9525 MBB
.insert(MBB
.end(), TC
);
9526 Call
->eraseFromParent();
9528 FI
->setOutliningStyle("Thunk");
9531 bool IsLeafFunction
= true;
9533 // Is there a call in the outlined range?
9534 auto IsNonTailCall
= [](const MachineInstr
&MI
) {
9535 return MI
.isCall() && !MI
.isReturn();
9538 if (llvm::any_of(MBB
.instrs(), IsNonTailCall
)) {
9539 // Fix up the instructions in the range, since we're going to modify the
9542 // Bugzilla ID: 46767
9543 // TODO: Check if fixing up twice is safe so we can outline these.
9544 assert(OF
.FrameConstructionID
!= MachineOutlinerDefault
&&
9545 "Can only fix up stack references once");
9546 fixupPostOutline(MBB
);
9548 IsLeafFunction
= false;
9550 // LR has to be a live in so that we can save it.
9551 if (!MBB
.isLiveIn(AArch64::LR
))
9552 MBB
.addLiveIn(AArch64::LR
);
9554 MachineBasicBlock::iterator It
= MBB
.begin();
9555 MachineBasicBlock::iterator Et
= MBB
.end();
9557 if (OF
.FrameConstructionID
== MachineOutlinerTailCall
||
9558 OF
.FrameConstructionID
== MachineOutlinerThunk
)
9559 Et
= std::prev(MBB
.end());
9561 // Insert a save before the outlined region
9562 MachineInstr
*STRXpre
= BuildMI(MF
, DebugLoc(), get(AArch64::STRXpre
))
9563 .addReg(AArch64::SP
, RegState::Define
)
9564 .addReg(AArch64::LR
)
9565 .addReg(AArch64::SP
)
9567 It
= MBB
.insert(It
, STRXpre
);
9569 if (MF
.getInfo
<AArch64FunctionInfo
>()->needsDwarfUnwindInfo(MF
)) {
9570 const TargetSubtargetInfo
&STI
= MF
.getSubtarget();
9571 const MCRegisterInfo
*MRI
= STI
.getRegisterInfo();
9572 unsigned DwarfReg
= MRI
->getDwarfRegNum(AArch64::LR
, true);
9574 // Add a CFI saying the stack was moved 16 B down.
9575 int64_t StackPosEntry
=
9576 MF
.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 16));
9577 BuildMI(MBB
, It
, DebugLoc(), get(AArch64::CFI_INSTRUCTION
))
9578 .addCFIIndex(StackPosEntry
)
9579 .setMIFlags(MachineInstr::FrameSetup
);
9581 // Add a CFI saying that the LR that we want to find is now 16 B higher
9583 int64_t LRPosEntry
= MF
.addFrameInst(
9584 MCCFIInstruction::createOffset(nullptr, DwarfReg
, -16));
9585 BuildMI(MBB
, It
, DebugLoc(), get(AArch64::CFI_INSTRUCTION
))
9586 .addCFIIndex(LRPosEntry
)
9587 .setMIFlags(MachineInstr::FrameSetup
);
9590 // Insert a restore before the terminator for the function.
9591 MachineInstr
*LDRXpost
= BuildMI(MF
, DebugLoc(), get(AArch64::LDRXpost
))
9592 .addReg(AArch64::SP
, RegState::Define
)
9593 .addReg(AArch64::LR
, RegState::Define
)
9594 .addReg(AArch64::SP
)
9596 Et
= MBB
.insert(Et
, LDRXpost
);
9599 bool ShouldSignReturnAddr
= FI
->shouldSignReturnAddress(!IsLeafFunction
);
9601 // If this is a tail call outlined function, then there's already a return.
9602 if (OF
.FrameConstructionID
== MachineOutlinerTailCall
||
9603 OF
.FrameConstructionID
== MachineOutlinerThunk
) {
9604 signOutlinedFunction(MF
, MBB
, this, ShouldSignReturnAddr
);
9608 // It's not a tail call, so we have to insert the return ourselves.
9610 // LR has to be a live in so that we can return to it.
9611 if (!MBB
.isLiveIn(AArch64::LR
))
9612 MBB
.addLiveIn(AArch64::LR
);
9614 MachineInstr
*ret
= BuildMI(MF
, DebugLoc(), get(AArch64::RET
))
9615 .addReg(AArch64::LR
);
9616 MBB
.insert(MBB
.end(), ret
);
9618 signOutlinedFunction(MF
, MBB
, this, ShouldSignReturnAddr
);
9620 FI
->setOutliningStyle("Function");
9622 // Did we have to modify the stack by saving the link register?
9623 if (OF
.FrameConstructionID
!= MachineOutlinerDefault
)
9626 // We modified the stack.
9627 // Walk over the basic block and fix up all the stack accesses.
9628 fixupPostOutline(MBB
);
9631 MachineBasicBlock::iterator
AArch64InstrInfo::insertOutlinedCall(
9632 Module
&M
, MachineBasicBlock
&MBB
, MachineBasicBlock::iterator
&It
,
9633 MachineFunction
&MF
, outliner::Candidate
&C
) const {
9635 // Are we tail calling?
9636 if (C
.CallConstructionID
== MachineOutlinerTailCall
) {
9637 // If yes, then we can just branch to the label.
9638 It
= MBB
.insert(It
, BuildMI(MF
, DebugLoc(), get(AArch64::TCRETURNdi
))
9639 .addGlobalAddress(M
.getNamedValue(MF
.getName()))
9644 // Are we saving the link register?
9645 if (C
.CallConstructionID
== MachineOutlinerNoLRSave
||
9646 C
.CallConstructionID
== MachineOutlinerThunk
) {
9647 // No, so just insert the call.
9648 It
= MBB
.insert(It
, BuildMI(MF
, DebugLoc(), get(AArch64::BL
))
9649 .addGlobalAddress(M
.getNamedValue(MF
.getName())));
9653 // We want to return the spot where we inserted the call.
9654 MachineBasicBlock::iterator CallPt
;
9656 // Instructions for saving and restoring LR around the call instruction we're
9659 MachineInstr
*Restore
;
9660 // Can we save to a register?
9661 if (C
.CallConstructionID
== MachineOutlinerRegSave
) {
9662 // FIXME: This logic should be sunk into a target-specific interface so that
9663 // we don't have to recompute the register.
9664 Register Reg
= findRegisterToSaveLRTo(C
);
9665 assert(Reg
&& "No callee-saved register available?");
9667 // LR has to be a live in so that we can save it.
9668 if (!MBB
.isLiveIn(AArch64::LR
))
9669 MBB
.addLiveIn(AArch64::LR
);
9671 // Save and restore LR from Reg.
9672 Save
= BuildMI(MF
, DebugLoc(), get(AArch64::ORRXrs
), Reg
)
9673 .addReg(AArch64::XZR
)
9674 .addReg(AArch64::LR
)
9676 Restore
= BuildMI(MF
, DebugLoc(), get(AArch64::ORRXrs
), AArch64::LR
)
9677 .addReg(AArch64::XZR
)
9681 // We have the default case. Save and restore from SP.
9682 Save
= BuildMI(MF
, DebugLoc(), get(AArch64::STRXpre
))
9683 .addReg(AArch64::SP
, RegState::Define
)
9684 .addReg(AArch64::LR
)
9685 .addReg(AArch64::SP
)
9687 Restore
= BuildMI(MF
, DebugLoc(), get(AArch64::LDRXpost
))
9688 .addReg(AArch64::SP
, RegState::Define
)
9689 .addReg(AArch64::LR
, RegState::Define
)
9690 .addReg(AArch64::SP
)
9694 It
= MBB
.insert(It
, Save
);
9698 It
= MBB
.insert(It
, BuildMI(MF
, DebugLoc(), get(AArch64::BL
))
9699 .addGlobalAddress(M
.getNamedValue(MF
.getName())));
9703 It
= MBB
.insert(It
, Restore
);
9707 bool AArch64InstrInfo::shouldOutlineFromFunctionByDefault(
9708 MachineFunction
&MF
) const {
9709 return MF
.getFunction().hasMinSize();
9712 void AArch64InstrInfo::buildClearRegister(Register Reg
, MachineBasicBlock
&MBB
,
9713 MachineBasicBlock::iterator Iter
,
9715 bool AllowSideEffects
) const {
9716 const MachineFunction
&MF
= *MBB
.getParent();
9717 const AArch64Subtarget
&STI
= MF
.getSubtarget
<AArch64Subtarget
>();
9718 const AArch64RegisterInfo
&TRI
= *STI
.getRegisterInfo();
9720 if (TRI
.isGeneralPurposeRegister(MF
, Reg
)) {
9721 BuildMI(MBB
, Iter
, DL
, get(AArch64::MOVZXi
), Reg
).addImm(0).addImm(0);
9722 } else if (STI
.isSVEorStreamingSVEAvailable()) {
9723 BuildMI(MBB
, Iter
, DL
, get(AArch64::DUP_ZI_D
), Reg
)
9726 } else if (STI
.isNeonAvailable()) {
9727 BuildMI(MBB
, Iter
, DL
, get(AArch64::MOVIv2d_ns
), Reg
)
9730 // This is a streaming-compatible function without SVE. We don't have full
9731 // Neon (just FPRs), so we can at most use the first 64-bit sub-register.
9732 // So given `movi v..` would be illegal use `fmov d..` instead.
9733 assert(STI
.hasNEON() && "Expected to have NEON.");
9734 Register Reg64
= TRI
.getSubReg(Reg
, AArch64::dsub
);
9735 BuildMI(MBB
, Iter
, DL
, get(AArch64::FMOVD0
), Reg64
);
9739 std::optional
<DestSourcePair
>
9740 AArch64InstrInfo::isCopyInstrImpl(const MachineInstr
&MI
) const {
9742 // AArch64::ORRWrs and AArch64::ORRXrs with WZR/XZR reg
9743 // and zero immediate operands used as an alias for mov instruction.
9744 if (((MI
.getOpcode() == AArch64::ORRWrs
&&
9745 MI
.getOperand(1).getReg() == AArch64::WZR
&&
9746 MI
.getOperand(3).getImm() == 0x0) ||
9747 (MI
.getOpcode() == AArch64::ORRWrr
&&
9748 MI
.getOperand(1).getReg() == AArch64::WZR
)) &&
9749 // Check that the w->w move is not a zero-extending w->x mov.
9750 (!MI
.getOperand(0).getReg().isVirtual() ||
9751 MI
.getOperand(0).getSubReg() == 0) &&
9752 (!MI
.getOperand(0).getReg().isPhysical() ||
9753 MI
.findRegisterDefOperandIdx(MI
.getOperand(0).getReg() - AArch64::W0
+
9755 /*TRI=*/nullptr) == -1))
9756 return DestSourcePair
{MI
.getOperand(0), MI
.getOperand(2)};
9758 if (MI
.getOpcode() == AArch64::ORRXrs
&&
9759 MI
.getOperand(1).getReg() == AArch64::XZR
&&
9760 MI
.getOperand(3).getImm() == 0x0)
9761 return DestSourcePair
{MI
.getOperand(0), MI
.getOperand(2)};
9763 return std::nullopt
;
9766 std::optional
<DestSourcePair
>
9767 AArch64InstrInfo::isCopyLikeInstrImpl(const MachineInstr
&MI
) const {
9768 if ((MI
.getOpcode() == AArch64::ORRWrs
&&
9769 MI
.getOperand(1).getReg() == AArch64::WZR
&&
9770 MI
.getOperand(3).getImm() == 0x0) ||
9771 (MI
.getOpcode() == AArch64::ORRWrr
&&
9772 MI
.getOperand(1).getReg() == AArch64::WZR
))
9773 return DestSourcePair
{MI
.getOperand(0), MI
.getOperand(2)};
9774 return std::nullopt
;
9777 std::optional
<RegImmPair
>
9778 AArch64InstrInfo::isAddImmediate(const MachineInstr
&MI
, Register Reg
) const {
9782 // TODO: Handle cases where Reg is a super- or sub-register of the
9783 // destination register.
9784 const MachineOperand
&Op0
= MI
.getOperand(0);
9785 if (!Op0
.isReg() || Reg
!= Op0
.getReg())
9786 return std::nullopt
;
9788 switch (MI
.getOpcode()) {
9790 return std::nullopt
;
9791 case AArch64::SUBWri
:
9792 case AArch64::SUBXri
:
9793 case AArch64::SUBSWri
:
9794 case AArch64::SUBSXri
:
9797 case AArch64::ADDSWri
:
9798 case AArch64::ADDSXri
:
9799 case AArch64::ADDWri
:
9800 case AArch64::ADDXri
: {
9801 // TODO: Third operand can be global address (usually some string).
9802 if (!MI
.getOperand(0).isReg() || !MI
.getOperand(1).isReg() ||
9803 !MI
.getOperand(2).isImm())
9804 return std::nullopt
;
9805 int Shift
= MI
.getOperand(3).getImm();
9806 assert((Shift
== 0 || Shift
== 12) && "Shift can be either 0 or 12");
9807 Offset
= Sign
* (MI
.getOperand(2).getImm() << Shift
);
9810 return RegImmPair
{MI
.getOperand(1).getReg(), Offset
};
9813 /// If the given ORR instruction is a copy, and \p DescribedReg overlaps with
9814 /// the destination register then, if possible, describe the value in terms of
9815 /// the source register.
9816 static std::optional
<ParamLoadedValue
>
9817 describeORRLoadedValue(const MachineInstr
&MI
, Register DescribedReg
,
9818 const TargetInstrInfo
*TII
,
9819 const TargetRegisterInfo
*TRI
) {
9820 auto DestSrc
= TII
->isCopyLikeInstr(MI
);
9822 return std::nullopt
;
9824 Register DestReg
= DestSrc
->Destination
->getReg();
9825 Register SrcReg
= DestSrc
->Source
->getReg();
9827 auto Expr
= DIExpression::get(MI
.getMF()->getFunction().getContext(), {});
9829 // If the described register is the destination, just return the source.
9830 if (DestReg
== DescribedReg
)
9831 return ParamLoadedValue(MachineOperand::CreateReg(SrcReg
, false), Expr
);
9833 // ORRWrs zero-extends to 64-bits, so we need to consider such cases.
9834 if (MI
.getOpcode() == AArch64::ORRWrs
&&
9835 TRI
->isSuperRegister(DestReg
, DescribedReg
))
9836 return ParamLoadedValue(MachineOperand::CreateReg(SrcReg
, false), Expr
);
9838 // We may need to describe the lower part of a ORRXrs move.
9839 if (MI
.getOpcode() == AArch64::ORRXrs
&&
9840 TRI
->isSubRegister(DestReg
, DescribedReg
)) {
9841 Register SrcSubReg
= TRI
->getSubReg(SrcReg
, AArch64::sub_32
);
9842 return ParamLoadedValue(MachineOperand::CreateReg(SrcSubReg
, false), Expr
);
9845 assert(!TRI
->isSuperOrSubRegisterEq(DestReg
, DescribedReg
) &&
9846 "Unhandled ORR[XW]rs copy case");
9848 return std::nullopt
;
9851 bool AArch64InstrInfo::isFunctionSafeToSplit(const MachineFunction
&MF
) const {
9852 // Functions cannot be split to different sections on AArch64 if they have
9853 // a red zone. This is because relaxing a cross-section branch may require
9854 // incrementing the stack pointer to spill a register, which would overwrite
9856 if (MF
.getInfo
<AArch64FunctionInfo
>()->hasRedZone().value_or(true))
9859 return TargetInstrInfo::isFunctionSafeToSplit(MF
);
9862 bool AArch64InstrInfo::isMBBSafeToSplitToCold(
9863 const MachineBasicBlock
&MBB
) const {
9864 // Asm Goto blocks can contain conditional branches to goto labels, which can
9865 // get moved out of range of the branch instruction.
9866 auto isAsmGoto
= [](const MachineInstr
&MI
) {
9867 return MI
.getOpcode() == AArch64::INLINEASM_BR
;
9869 if (llvm::any_of(MBB
, isAsmGoto
) || MBB
.isInlineAsmBrIndirectTarget())
9872 // Because jump tables are label-relative instead of table-relative, they all
9873 // must be in the same section or relocation fixup handling will fail.
9875 // Check if MBB is a jump table target
9876 const MachineJumpTableInfo
*MJTI
= MBB
.getParent()->getJumpTableInfo();
9877 auto containsMBB
= [&MBB
](const MachineJumpTableEntry
&JTE
) {
9878 return llvm::is_contained(JTE
.MBBs
, &MBB
);
9880 if (MJTI
!= nullptr && llvm::any_of(MJTI
->getJumpTables(), containsMBB
))
9883 // Check if MBB contains a jump table lookup
9884 for (const MachineInstr
&MI
: MBB
) {
9885 switch (MI
.getOpcode()) {
9886 case TargetOpcode::G_BRJT
:
9887 case AArch64::JumpTableDest32
:
9888 case AArch64::JumpTableDest16
:
9889 case AArch64::JumpTableDest8
:
9896 // MBB isn't a special case, so it's safe to be split to the cold section.
9900 std::optional
<ParamLoadedValue
>
9901 AArch64InstrInfo::describeLoadedValue(const MachineInstr
&MI
,
9902 Register Reg
) const {
9903 const MachineFunction
*MF
= MI
.getMF();
9904 const TargetRegisterInfo
*TRI
= MF
->getSubtarget().getRegisterInfo();
9905 switch (MI
.getOpcode()) {
9906 case AArch64::MOVZWi
:
9907 case AArch64::MOVZXi
: {
9908 // MOVZWi may be used for producing zero-extended 32-bit immediates in
9909 // 64-bit parameters, so we need to consider super-registers.
9910 if (!TRI
->isSuperRegisterEq(MI
.getOperand(0).getReg(), Reg
))
9911 return std::nullopt
;
9913 if (!MI
.getOperand(1).isImm())
9914 return std::nullopt
;
9915 int64_t Immediate
= MI
.getOperand(1).getImm();
9916 int Shift
= MI
.getOperand(2).getImm();
9917 return ParamLoadedValue(MachineOperand::CreateImm(Immediate
<< Shift
),
9920 case AArch64::ORRWrs
:
9921 case AArch64::ORRXrs
:
9922 return describeORRLoadedValue(MI
, Reg
, this, TRI
);
9925 return TargetInstrInfo::describeLoadedValue(MI
, Reg
);
9928 bool AArch64InstrInfo::isExtendLikelyToBeFolded(
9929 MachineInstr
&ExtMI
, MachineRegisterInfo
&MRI
) const {
9930 assert(ExtMI
.getOpcode() == TargetOpcode::G_SEXT
||
9931 ExtMI
.getOpcode() == TargetOpcode::G_ZEXT
||
9932 ExtMI
.getOpcode() == TargetOpcode::G_ANYEXT
);
9934 // Anyexts are nops.
9935 if (ExtMI
.getOpcode() == TargetOpcode::G_ANYEXT
)
9938 Register DefReg
= ExtMI
.getOperand(0).getReg();
9939 if (!MRI
.hasOneNonDBGUse(DefReg
))
9942 // It's likely that a sext/zext as a G_PTR_ADD offset will be folded into an
9944 auto *UserMI
= &*MRI
.use_instr_nodbg_begin(DefReg
);
9945 return UserMI
->getOpcode() == TargetOpcode::G_PTR_ADD
;
9948 uint64_t AArch64InstrInfo::getElementSizeForOpcode(unsigned Opc
) const {
9949 return get(Opc
).TSFlags
& AArch64::ElementSizeMask
;
9952 bool AArch64InstrInfo::isPTestLikeOpcode(unsigned Opc
) const {
9953 return get(Opc
).TSFlags
& AArch64::InstrFlagIsPTestLike
;
9956 bool AArch64InstrInfo::isWhileOpcode(unsigned Opc
) const {
9957 return get(Opc
).TSFlags
& AArch64::InstrFlagIsWhile
;
9961 AArch64InstrInfo::getTailDuplicateSize(CodeGenOptLevel OptLevel
) const {
9962 return OptLevel
>= CodeGenOptLevel::Aggressive
? 6 : 2;
9965 bool AArch64InstrInfo::isLegalAddressingMode(unsigned NumBytes
, int64_t Offset
,
9966 unsigned Scale
) const {
9967 if (Offset
&& Scale
)
9972 // 9-bit signed offset
9973 if (isInt
<9>(Offset
))
9976 // 12-bit unsigned offset
9977 unsigned Shift
= Log2_64(NumBytes
);
9978 if (NumBytes
&& Offset
> 0 && (Offset
/ NumBytes
) <= (1LL << 12) - 1 &&
9979 // Must be a multiple of NumBytes (NumBytes is a power of 2)
9980 (Offset
>> Shift
) << Shift
== Offset
)
9985 // Check reg1 + SIZE_IN_BYTES * reg2 and reg1 + reg2
9986 return Scale
== 1 || (Scale
> 0 && Scale
== NumBytes
);
9989 unsigned llvm::getBLRCallOpcode(const MachineFunction
&MF
) {
9990 if (MF
.getSubtarget
<AArch64Subtarget
>().hardenSlsBlr())
9991 return AArch64::BLRNoIP
;
9993 return AArch64::BLR
;
9996 MachineBasicBlock::iterator
9997 AArch64InstrInfo::probedStackAlloc(MachineBasicBlock::iterator MBBI
,
9998 Register TargetReg
, bool FrameSetup
) const {
9999 assert(TargetReg
!= AArch64::SP
&& "New top of stack cannot aleady be in SP");
10001 MachineBasicBlock
&MBB
= *MBBI
->getParent();
10002 MachineFunction
&MF
= *MBB
.getParent();
10003 const AArch64InstrInfo
*TII
=
10004 MF
.getSubtarget
<AArch64Subtarget
>().getInstrInfo();
10005 int64_t ProbeSize
= MF
.getInfo
<AArch64FunctionInfo
>()->getStackProbeSize();
10006 DebugLoc DL
= MBB
.findDebugLoc(MBBI
);
10008 MachineFunction::iterator MBBInsertPoint
= std::next(MBB
.getIterator());
10009 MachineBasicBlock
*LoopTestMBB
=
10010 MF
.CreateMachineBasicBlock(MBB
.getBasicBlock());
10011 MF
.insert(MBBInsertPoint
, LoopTestMBB
);
10012 MachineBasicBlock
*LoopBodyMBB
=
10013 MF
.CreateMachineBasicBlock(MBB
.getBasicBlock());
10014 MF
.insert(MBBInsertPoint
, LoopBodyMBB
);
10015 MachineBasicBlock
*ExitMBB
= MF
.CreateMachineBasicBlock(MBB
.getBasicBlock());
10016 MF
.insert(MBBInsertPoint
, ExitMBB
);
10017 MachineInstr::MIFlag Flags
=
10018 FrameSetup
? MachineInstr::FrameSetup
: MachineInstr::NoFlags
;
10021 // SUB SP, SP, #ProbeSize
10022 emitFrameOffset(*LoopTestMBB
, LoopTestMBB
->end(), DL
, AArch64::SP
,
10023 AArch64::SP
, StackOffset::getFixed(-ProbeSize
), TII
, Flags
);
10025 // CMP SP, TargetReg
10026 BuildMI(*LoopTestMBB
, LoopTestMBB
->end(), DL
, TII
->get(AArch64::SUBSXrx64
),
10028 .addReg(AArch64::SP
)
10030 .addImm(AArch64_AM::getArithExtendImm(AArch64_AM::UXTX
, 0))
10031 .setMIFlags(Flags
);
10033 // B.<Cond> LoopExit
10034 BuildMI(*LoopTestMBB
, LoopTestMBB
->end(), DL
, TII
->get(AArch64::Bcc
))
10035 .addImm(AArch64CC::LE
)
10037 .setMIFlags(Flags
);
10040 BuildMI(*LoopBodyMBB
, LoopBodyMBB
->end(), DL
, TII
->get(AArch64::STRXui
))
10041 .addReg(AArch64::XZR
)
10042 .addReg(AArch64::SP
)
10044 .setMIFlags(Flags
);
10047 BuildMI(*LoopBodyMBB
, LoopBodyMBB
->end(), DL
, TII
->get(AArch64::B
))
10048 .addMBB(LoopTestMBB
)
10049 .setMIFlags(Flags
);
10052 // MOV SP, TargetReg
10053 BuildMI(*ExitMBB
, ExitMBB
->end(), DL
, TII
->get(AArch64::ADDXri
), AArch64::SP
)
10056 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL
, 0))
10057 .setMIFlags(Flags
);
10060 BuildMI(*ExitMBB
, ExitMBB
->end(), DL
, TII
->get(AArch64::LDRXui
))
10061 .addReg(AArch64::XZR
, RegState::Define
)
10062 .addReg(AArch64::SP
)
10064 .setMIFlags(Flags
);
10066 ExitMBB
->splice(ExitMBB
->end(), &MBB
, std::next(MBBI
), MBB
.end());
10067 ExitMBB
->transferSuccessorsAndUpdatePHIs(&MBB
);
10069 LoopTestMBB
->addSuccessor(ExitMBB
);
10070 LoopTestMBB
->addSuccessor(LoopBodyMBB
);
10071 LoopBodyMBB
->addSuccessor(LoopTestMBB
);
10072 MBB
.addSuccessor(LoopTestMBB
);
10075 if (MF
.getRegInfo().reservedRegsFrozen())
10076 fullyRecomputeLiveIns({ExitMBB
, LoopBodyMBB
, LoopTestMBB
});
10078 return ExitMBB
->begin();
10082 class AArch64PipelinerLoopInfo
: public TargetInstrInfo::PipelinerLoopInfo
{
10083 MachineFunction
*MF
;
10084 const TargetInstrInfo
*TII
;
10085 const TargetRegisterInfo
*TRI
;
10086 MachineRegisterInfo
&MRI
;
10088 /// The block of the loop
10089 MachineBasicBlock
*LoopBB
;
10090 /// The conditional branch of the loop
10091 MachineInstr
*CondBranch
;
10092 /// The compare instruction for loop control
10093 MachineInstr
*Comp
;
10094 /// The number of the operand of the loop counter value in Comp
10095 unsigned CompCounterOprNum
;
10096 /// The instruction that updates the loop counter value
10097 MachineInstr
*Update
;
10098 /// The number of the operand of the loop counter value in Update
10099 unsigned UpdateCounterOprNum
;
10100 /// The initial value of the loop counter
10102 /// True iff Update is a predecessor of Comp
10103 bool IsUpdatePriorComp
;
10105 /// The normalized condition used by createTripCountGreaterCondition()
10106 SmallVector
<MachineOperand
, 4> Cond
;
10109 AArch64PipelinerLoopInfo(MachineBasicBlock
*LoopBB
, MachineInstr
*CondBranch
,
10110 MachineInstr
*Comp
, unsigned CompCounterOprNum
,
10111 MachineInstr
*Update
, unsigned UpdateCounterOprNum
,
10112 Register Init
, bool IsUpdatePriorComp
,
10113 const SmallVectorImpl
<MachineOperand
> &Cond
)
10114 : MF(Comp
->getParent()->getParent()),
10115 TII(MF
->getSubtarget().getInstrInfo()),
10116 TRI(MF
->getSubtarget().getRegisterInfo()), MRI(MF
->getRegInfo()),
10117 LoopBB(LoopBB
), CondBranch(CondBranch
), Comp(Comp
),
10118 CompCounterOprNum(CompCounterOprNum
), Update(Update
),
10119 UpdateCounterOprNum(UpdateCounterOprNum
), Init(Init
),
10120 IsUpdatePriorComp(IsUpdatePriorComp
), Cond(Cond
.begin(), Cond
.end()) {}
10122 bool shouldIgnoreForPipelining(const MachineInstr
*MI
) const override
{
10123 // Make the instructions for loop control be placed in stage 0.
10124 // The predecessors of Comp are considered by the caller.
10128 std::optional
<bool> createTripCountGreaterCondition(
10129 int TC
, MachineBasicBlock
&MBB
,
10130 SmallVectorImpl
<MachineOperand
> &CondParam
) override
{
10131 // A branch instruction will be inserted as "if (Cond) goto epilogue".
10132 // Cond is normalized for such use.
10133 // The predecessors of the branch are assumed to have already been inserted.
10138 void createRemainingIterationsGreaterCondition(
10139 int TC
, MachineBasicBlock
&MBB
, SmallVectorImpl
<MachineOperand
> &Cond
,
10140 DenseMap
<MachineInstr
*, MachineInstr
*> &LastStage0Insts
) override
;
10142 void setPreheader(MachineBasicBlock
*NewPreheader
) override
{}
10144 void adjustTripCount(int TripCountAdjust
) override
{}
10146 bool isMVEExpanderSupported() override
{ return true; }
10150 /// Clone an instruction from MI. The register of ReplaceOprNum-th operand
10151 /// is replaced by ReplaceReg. The output register is newly created.
10152 /// The other operands are unchanged from MI.
10153 static Register
cloneInstr(const MachineInstr
*MI
, unsigned ReplaceOprNum
,
10154 Register ReplaceReg
, MachineBasicBlock
&MBB
,
10155 MachineBasicBlock::iterator InsertTo
) {
10156 MachineRegisterInfo
&MRI
= MBB
.getParent()->getRegInfo();
10157 const TargetInstrInfo
*TII
= MBB
.getParent()->getSubtarget().getInstrInfo();
10158 const TargetRegisterInfo
*TRI
=
10159 MBB
.getParent()->getSubtarget().getRegisterInfo();
10160 MachineInstr
*NewMI
= MBB
.getParent()->CloneMachineInstr(MI
);
10161 Register Result
= 0;
10162 for (unsigned I
= 0; I
< NewMI
->getNumOperands(); ++I
) {
10163 if (I
== 0 && NewMI
->getOperand(0).getReg().isVirtual()) {
10164 Result
= MRI
.createVirtualRegister(
10165 MRI
.getRegClass(NewMI
->getOperand(0).getReg()));
10166 NewMI
->getOperand(I
).setReg(Result
);
10167 } else if (I
== ReplaceOprNum
) {
10168 MRI
.constrainRegClass(
10170 TII
->getRegClass(NewMI
->getDesc(), I
, TRI
, *MBB
.getParent()));
10171 NewMI
->getOperand(I
).setReg(ReplaceReg
);
10174 MBB
.insert(InsertTo
, NewMI
);
10178 void AArch64PipelinerLoopInfo::createRemainingIterationsGreaterCondition(
10179 int TC
, MachineBasicBlock
&MBB
, SmallVectorImpl
<MachineOperand
> &Cond
,
10180 DenseMap
<MachineInstr
*, MachineInstr
*> &LastStage0Insts
) {
10181 // Create and accumulate conditions for next TC iterations.
10183 // SUBSXrr N, counter, implicit-def $nzcv # compare instruction for the last
10184 // # iteration of the kernel
10186 // # insert the following instructions
10187 // cond = CSINCXr 0, 0, C, implicit $nzcv
10188 // counter = ADDXri counter, 1 # clone from this->Update
10189 // SUBSXrr n, counter, implicit-def $nzcv # clone from this->Comp
10190 // cond = CSINCXr cond, cond, C, implicit $nzcv
10191 // ... (repeat TC times)
10192 // SUBSXri cond, 0, implicit-def $nzcv
10194 assert(CondBranch
->getOpcode() == AArch64::Bcc
);
10195 // CondCode to exit the loop
10196 AArch64CC::CondCode CC
=
10197 (AArch64CC::CondCode
)CondBranch
->getOperand(0).getImm();
10198 if (CondBranch
->getOperand(1).getMBB() == LoopBB
)
10199 CC
= AArch64CC::getInvertedCondCode(CC
);
10201 // Accumulate conditions to exit the loop
10202 Register AccCond
= AArch64::XZR
;
10204 // If CC holds, CurCond+1 is returned; otherwise CurCond is returned.
10205 auto AccumulateCond
= [&](Register CurCond
,
10206 AArch64CC::CondCode CC
) -> Register
{
10207 Register NewCond
= MRI
.createVirtualRegister(&AArch64::GPR64commonRegClass
);
10208 BuildMI(MBB
, MBB
.end(), Comp
->getDebugLoc(), TII
->get(AArch64::CSINCXr
))
10209 .addReg(NewCond
, RegState::Define
)
10212 .addImm(AArch64CC::getInvertedCondCode(CC
));
10216 if (!LastStage0Insts
.empty() && LastStage0Insts
[Comp
]->getParent() == &MBB
) {
10217 // Update and Comp for I==0 are already exists in MBB
10218 // (MBB is an unrolled kernel)
10220 for (int I
= 0; I
<= TC
; ++I
) {
10221 Register NextCounter
;
10224 cloneInstr(Comp
, CompCounterOprNum
, Counter
, MBB
, MBB
.end());
10226 AccCond
= AccumulateCond(AccCond
, CC
);
10230 if (Update
!= Comp
&& IsUpdatePriorComp
) {
10232 LastStage0Insts
[Comp
]->getOperand(CompCounterOprNum
).getReg();
10233 NextCounter
= cloneInstr(Update
, UpdateCounterOprNum
, Counter
, MBB
,
10236 // can use already calculated value
10237 NextCounter
= LastStage0Insts
[Update
]->getOperand(0).getReg();
10239 } else if (Update
!= Comp
) {
10241 cloneInstr(Update
, UpdateCounterOprNum
, Counter
, MBB
, MBB
.end());
10244 Counter
= NextCounter
;
10248 if (LastStage0Insts
.empty()) {
10249 // use initial counter value (testing if the trip count is sufficient to
10250 // be executed by pipelined code)
10252 if (IsUpdatePriorComp
)
10254 cloneInstr(Update
, UpdateCounterOprNum
, Counter
, MBB
, MBB
.end());
10256 // MBB is an epilogue block. LastStage0Insts[Comp] is in the kernel block.
10257 Counter
= LastStage0Insts
[Comp
]->getOperand(CompCounterOprNum
).getReg();
10260 for (int I
= 0; I
<= TC
; ++I
) {
10261 Register NextCounter
;
10263 cloneInstr(Comp
, CompCounterOprNum
, Counter
, MBB
, MBB
.end());
10264 AccCond
= AccumulateCond(AccCond
, CC
);
10265 if (I
!= TC
&& Update
!= Comp
)
10267 cloneInstr(Update
, UpdateCounterOprNum
, Counter
, MBB
, MBB
.end());
10268 Counter
= NextCounter
;
10272 // If AccCond == 0, the remainder is greater than TC.
10273 BuildMI(MBB
, MBB
.end(), Comp
->getDebugLoc(), TII
->get(AArch64::SUBSXri
))
10274 .addReg(AArch64::XZR
, RegState::Define
| RegState::Dead
)
10279 Cond
.push_back(MachineOperand::CreateImm(AArch64CC::EQ
));
10282 static void extractPhiReg(const MachineInstr
&Phi
, const MachineBasicBlock
*MBB
,
10283 Register
&RegMBB
, Register
&RegOther
) {
10284 assert(Phi
.getNumOperands() == 5);
10285 if (Phi
.getOperand(2).getMBB() == MBB
) {
10286 RegMBB
= Phi
.getOperand(1).getReg();
10287 RegOther
= Phi
.getOperand(3).getReg();
10289 assert(Phi
.getOperand(4).getMBB() == MBB
);
10290 RegMBB
= Phi
.getOperand(3).getReg();
10291 RegOther
= Phi
.getOperand(1).getReg();
10295 static bool isDefinedOutside(Register Reg
, const MachineBasicBlock
*BB
) {
10296 if (!Reg
.isVirtual())
10298 const MachineRegisterInfo
&MRI
= BB
->getParent()->getRegInfo();
10299 return MRI
.getVRegDef(Reg
)->getParent() != BB
;
10302 /// If Reg is an induction variable, return true and set some parameters
10303 static bool getIndVarInfo(Register Reg
, const MachineBasicBlock
*LoopBB
,
10304 MachineInstr
*&UpdateInst
,
10305 unsigned &UpdateCounterOprNum
, Register
&InitReg
,
10306 bool &IsUpdatePriorComp
) {
10312 // Reg0 = PHI (InitReg, Preheader), (Reg1, LoopBB)
10313 // Reg = COPY Reg0 ; COPY is ignored.
10314 // Reg1 = ADD Reg, #1; UpdateInst. Incremented by a loop invariant value.
10315 // ; Reg is the value calculated in the previous
10316 // ; iteration, so IsUpdatePriorComp == false.
10318 if (LoopBB
->pred_size() != 2)
10320 if (!Reg
.isVirtual())
10322 const MachineRegisterInfo
&MRI
= LoopBB
->getParent()->getRegInfo();
10323 UpdateInst
= nullptr;
10324 UpdateCounterOprNum
= 0;
10326 IsUpdatePriorComp
= true;
10327 Register CurReg
= Reg
;
10329 MachineInstr
*Def
= MRI
.getVRegDef(CurReg
);
10330 if (Def
->getParent() != LoopBB
)
10332 if (Def
->isCopy()) {
10333 // Ignore copy instructions unless they contain subregisters
10334 if (Def
->getOperand(0).getSubReg() || Def
->getOperand(1).getSubReg())
10336 CurReg
= Def
->getOperand(1).getReg();
10337 } else if (Def
->isPHI()) {
10341 IsUpdatePriorComp
= false;
10342 extractPhiReg(*Def
, LoopBB
, CurReg
, InitReg
);
10346 switch (Def
->getOpcode()) {
10347 case AArch64::ADDSXri
:
10348 case AArch64::ADDSWri
:
10349 case AArch64::SUBSXri
:
10350 case AArch64::SUBSWri
:
10351 case AArch64::ADDXri
:
10352 case AArch64::ADDWri
:
10353 case AArch64::SUBXri
:
10354 case AArch64::SUBWri
:
10356 UpdateCounterOprNum
= 1;
10358 case AArch64::ADDSXrr
:
10359 case AArch64::ADDSWrr
:
10360 case AArch64::SUBSXrr
:
10361 case AArch64::SUBSWrr
:
10362 case AArch64::ADDXrr
:
10363 case AArch64::ADDWrr
:
10364 case AArch64::SUBXrr
:
10365 case AArch64::SUBWrr
:
10367 if (isDefinedOutside(Def
->getOperand(2).getReg(), LoopBB
))
10368 UpdateCounterOprNum
= 1;
10369 else if (isDefinedOutside(Def
->getOperand(1).getReg(), LoopBB
))
10370 UpdateCounterOprNum
= 2;
10377 CurReg
= Def
->getOperand(UpdateCounterOprNum
).getReg();
10380 if (!CurReg
.isVirtual())
10392 std::unique_ptr
<TargetInstrInfo::PipelinerLoopInfo
>
10393 AArch64InstrInfo::analyzeLoopForPipelining(MachineBasicBlock
*LoopBB
) const {
10394 // Accept loops that meet the following conditions
10395 // * The conditional branch is BCC
10396 // * The compare instruction is ADDS/SUBS/WHILEXX
10397 // * One operand of the compare is an induction variable and the other is a
10398 // loop invariant value
10399 // * The induction variable is incremented/decremented by a single instruction
10400 // * Does not contain CALL or instructions which have unmodeled side effects
10402 for (MachineInstr
&MI
: *LoopBB
)
10403 if (MI
.isCall() || MI
.hasUnmodeledSideEffects())
10404 // This instruction may use NZCV, which interferes with the instruction to
10405 // be inserted for loop control.
10408 MachineBasicBlock
*TBB
= nullptr, *FBB
= nullptr;
10409 SmallVector
<MachineOperand
, 4> Cond
;
10410 if (analyzeBranch(*LoopBB
, TBB
, FBB
, Cond
))
10413 // Infinite loops are not supported
10414 if (TBB
== LoopBB
&& FBB
== LoopBB
)
10417 // Must be conditional branch
10418 if (TBB
!= LoopBB
&& FBB
== nullptr)
10421 assert((TBB
== LoopBB
|| FBB
== LoopBB
) &&
10422 "The Loop must be a single-basic-block loop");
10424 MachineInstr
*CondBranch
= &*LoopBB
->getFirstTerminator();
10425 const TargetRegisterInfo
&TRI
= getRegisterInfo();
10427 if (CondBranch
->getOpcode() != AArch64::Bcc
)
10430 // Normalization for createTripCountGreaterCondition()
10432 reverseBranchCondition(Cond
);
10434 MachineInstr
*Comp
= nullptr;
10435 unsigned CompCounterOprNum
= 0;
10436 for (MachineInstr
&MI
: reverse(*LoopBB
)) {
10437 if (MI
.modifiesRegister(AArch64::NZCV
, &TRI
)) {
10438 // Guarantee that the compare is SUBS/ADDS/WHILEXX and that one of the
10439 // operands is a loop invariant value
10441 switch (MI
.getOpcode()) {
10442 case AArch64::SUBSXri
:
10443 case AArch64::SUBSWri
:
10444 case AArch64::ADDSXri
:
10445 case AArch64::ADDSWri
:
10447 CompCounterOprNum
= 1;
10449 case AArch64::ADDSWrr
:
10450 case AArch64::ADDSXrr
:
10451 case AArch64::SUBSWrr
:
10452 case AArch64::SUBSXrr
:
10456 if (isWhileOpcode(MI
.getOpcode())) {
10463 if (CompCounterOprNum
== 0) {
10464 if (isDefinedOutside(Comp
->getOperand(1).getReg(), LoopBB
))
10465 CompCounterOprNum
= 2;
10466 else if (isDefinedOutside(Comp
->getOperand(2).getReg(), LoopBB
))
10467 CompCounterOprNum
= 1;
10477 MachineInstr
*Update
= nullptr;
10479 bool IsUpdatePriorComp
;
10480 unsigned UpdateCounterOprNum
;
10481 if (!getIndVarInfo(Comp
->getOperand(CompCounterOprNum
).getReg(), LoopBB
,
10482 Update
, UpdateCounterOprNum
, Init
, IsUpdatePriorComp
))
10485 return std::make_unique
<AArch64PipelinerLoopInfo
>(
10486 LoopBB
, CondBranch
, Comp
, CompCounterOprNum
, Update
, UpdateCounterOprNum
,
10487 Init
, IsUpdatePriorComp
, Cond
);
10490 /// verifyInstruction - Perform target specific instruction verification.
10491 bool AArch64InstrInfo::verifyInstruction(const MachineInstr
&MI
,
10492 StringRef
&ErrInfo
) const {
10494 // Verify that immediate offsets on load/store instructions are within range.
10495 // Stack objects with an FI operand are excluded as they can be fixed up
10497 TypeSize
Scale(0U, false), Width(0U, false);
10498 int64_t MinOffset
, MaxOffset
;
10499 if (getMemOpInfo(MI
.getOpcode(), Scale
, Width
, MinOffset
, MaxOffset
)) {
10500 unsigned ImmIdx
= getLoadStoreImmIdx(MI
.getOpcode());
10501 if (MI
.getOperand(ImmIdx
).isImm() && !MI
.getOperand(ImmIdx
- 1).isFI()) {
10502 int64_t Imm
= MI
.getOperand(ImmIdx
).getImm();
10503 if (Imm
< MinOffset
|| Imm
> MaxOffset
) {
10504 ErrInfo
= "Unexpected immediate on load/store instruction";
10512 #define GET_INSTRINFO_HELPERS
10513 #define GET_INSTRMAP_INFO
10514 #include "AArch64GenInstrInfo.inc"