[ARM] Better patterns for fp <> predicate vectors
[llvm-complete.git] / lib / Target / AArch64 / AArch64InstructionSelector.cpp
blob4f2b4dc7f4682192767a4e6aa1943ba900059825
1 //===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file implements the targeting of the InstructionSelector class for
10 /// AArch64.
11 /// \todo This should be generated by TableGen.
12 //===----------------------------------------------------------------------===//
14 #include "AArch64InstrInfo.h"
15 #include "AArch64MachineFunctionInfo.h"
16 #include "AArch64RegisterBankInfo.h"
17 #include "AArch64RegisterInfo.h"
18 #include "AArch64Subtarget.h"
19 #include "AArch64TargetMachine.h"
20 #include "MCTargetDesc/AArch64AddressingModes.h"
21 #include "llvm/ADT/Optional.h"
22 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
23 #include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
24 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
25 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
26 #include "llvm/CodeGen/GlobalISel/Utils.h"
27 #include "llvm/CodeGen/MachineBasicBlock.h"
28 #include "llvm/CodeGen/MachineConstantPool.h"
29 #include "llvm/CodeGen/MachineFunction.h"
30 #include "llvm/CodeGen/MachineInstr.h"
31 #include "llvm/CodeGen/MachineInstrBuilder.h"
32 #include "llvm/CodeGen/MachineOperand.h"
33 #include "llvm/CodeGen/MachineRegisterInfo.h"
34 #include "llvm/IR/Type.h"
35 #include "llvm/Support/Debug.h"
36 #include "llvm/Support/raw_ostream.h"
38 #define DEBUG_TYPE "aarch64-isel"
40 using namespace llvm;
42 namespace {
44 #define GET_GLOBALISEL_PREDICATE_BITSET
45 #include "AArch64GenGlobalISel.inc"
46 #undef GET_GLOBALISEL_PREDICATE_BITSET
48 class AArch64InstructionSelector : public InstructionSelector {
49 public:
50 AArch64InstructionSelector(const AArch64TargetMachine &TM,
51 const AArch64Subtarget &STI,
52 const AArch64RegisterBankInfo &RBI);
54 bool select(MachineInstr &I, CodeGenCoverage &CoverageInfo) const override;
55 static const char *getName() { return DEBUG_TYPE; }
57 private:
58 /// tblgen-erated 'select' implementation, used as the initial selector for
59 /// the patterns that don't require complex C++.
60 bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
62 // A lowering phase that runs before any selection attempts.
64 void preISelLower(MachineInstr &I) const;
66 // An early selection function that runs before the selectImpl() call.
67 bool earlySelect(MachineInstr &I) const;
69 bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
70 bool earlySelectLoad(MachineInstr &I, MachineRegisterInfo &MRI) const;
72 /// Eliminate same-sized cross-bank copies into stores before selectImpl().
73 void contractCrossBankCopyIntoStore(MachineInstr &I,
74 MachineRegisterInfo &MRI) const;
76 bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
77 MachineRegisterInfo &MRI) const;
78 bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
79 MachineRegisterInfo &MRI) const;
81 bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
82 MachineRegisterInfo &MRI) const;
84 bool selectVectorASHR(MachineInstr &I, MachineRegisterInfo &MRI) const;
85 bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
87 // Helper to generate an equivalent of scalar_to_vector into a new register,
88 // returned via 'Dst'.
89 MachineInstr *emitScalarToVector(unsigned EltSize,
90 const TargetRegisterClass *DstRC,
91 Register Scalar,
92 MachineIRBuilder &MIRBuilder) const;
94 /// Emit a lane insert into \p DstReg, or a new vector register if None is
95 /// provided.
96 ///
97 /// The lane inserted into is defined by \p LaneIdx. The vector source
98 /// register is given by \p SrcReg. The register containing the element is
99 /// given by \p EltReg.
100 MachineInstr *emitLaneInsert(Optional<Register> DstReg, Register SrcReg,
101 Register EltReg, unsigned LaneIdx,
102 const RegisterBank &RB,
103 MachineIRBuilder &MIRBuilder) const;
104 bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
105 bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
106 bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const;
107 bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const;
109 void collectShuffleMaskIndices(MachineInstr &I, MachineRegisterInfo &MRI,
110 SmallVectorImpl<Optional<int>> &Idxs) const;
111 bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
112 bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
113 bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI) const;
114 bool selectSplitVectorUnmerge(MachineInstr &I,
115 MachineRegisterInfo &MRI) const;
116 bool selectIntrinsicWithSideEffects(MachineInstr &I,
117 MachineRegisterInfo &MRI) const;
118 bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI) const;
119 bool selectVectorICmp(MachineInstr &I, MachineRegisterInfo &MRI) const;
120 bool selectIntrinsicTrunc(MachineInstr &I, MachineRegisterInfo &MRI) const;
121 bool selectIntrinsicRound(MachineInstr &I, MachineRegisterInfo &MRI) const;
122 bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI) const;
123 bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI) const;
125 unsigned emitConstantPoolEntry(Constant *CPVal, MachineFunction &MF) const;
126 MachineInstr *emitLoadFromConstantPool(Constant *CPVal,
127 MachineIRBuilder &MIRBuilder) const;
129 // Emit a vector concat operation.
130 MachineInstr *emitVectorConcat(Optional<Register> Dst, Register Op1,
131 Register Op2,
132 MachineIRBuilder &MIRBuilder) const;
133 MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
134 MachineOperand &Predicate,
135 MachineIRBuilder &MIRBuilder) const;
136 MachineInstr *emitADD(Register DefReg, MachineOperand &LHS, MachineOperand &RHS,
137 MachineIRBuilder &MIRBuilder) const;
138 MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS,
139 MachineIRBuilder &MIRBuilder) const;
140 MachineInstr *emitTST(const Register &LHS, const Register &RHS,
141 MachineIRBuilder &MIRBuilder) const;
142 MachineInstr *emitExtractVectorElt(Optional<Register> DstReg,
143 const RegisterBank &DstRB, LLT ScalarTy,
144 Register VecReg, unsigned LaneIdx,
145 MachineIRBuilder &MIRBuilder) const;
147 /// Helper function for selecting G_FCONSTANT. If the G_FCONSTANT can be
148 /// materialized using a FMOV instruction, then update MI and return it.
149 /// Otherwise, do nothing and return a nullptr.
150 MachineInstr *emitFMovForFConstant(MachineInstr &MI,
151 MachineRegisterInfo &MRI) const;
153 /// Emit a CSet for a compare.
154 MachineInstr *emitCSetForICMP(Register DefReg, unsigned Pred,
155 MachineIRBuilder &MIRBuilder) const;
157 // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td.
158 // We use these manually instead of using the importer since it doesn't
159 // support SDNodeXForm.
160 ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const;
161 ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const;
162 ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const;
163 ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const;
165 ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
167 ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
168 unsigned Size) const;
170 ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const {
171 return selectAddrModeUnscaled(Root, 1);
173 ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const {
174 return selectAddrModeUnscaled(Root, 2);
176 ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const {
177 return selectAddrModeUnscaled(Root, 4);
179 ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const {
180 return selectAddrModeUnscaled(Root, 8);
182 ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const {
183 return selectAddrModeUnscaled(Root, 16);
186 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,
187 unsigned Size) const;
188 template <int Width>
189 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
190 return selectAddrModeIndexed(Root, Width / 8);
193 bool isWorthFoldingIntoExtendedReg(MachineInstr &MI,
194 const MachineRegisterInfo &MRI) const;
195 ComplexRendererFns
196 selectAddrModeShiftedExtendXReg(MachineOperand &Root,
197 unsigned SizeInBytes) const;
198 ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const;
199 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root,
200 unsigned SizeInBytes) const;
202 void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI) const;
204 // Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
205 void materializeLargeCMVal(MachineInstr &I, const Value *V,
206 unsigned char OpFlags) const;
208 // Optimization methods.
209 bool tryOptVectorShuffle(MachineInstr &I) const;
210 bool tryOptVectorDup(MachineInstr &MI) const;
211 bool tryOptSelect(MachineInstr &MI) const;
212 MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
213 MachineOperand &Predicate,
214 MachineIRBuilder &MIRBuilder) const;
216 const AArch64TargetMachine &TM;
217 const AArch64Subtarget &STI;
218 const AArch64InstrInfo &TII;
219 const AArch64RegisterInfo &TRI;
220 const AArch64RegisterBankInfo &RBI;
222 #define GET_GLOBALISEL_PREDICATES_DECL
223 #include "AArch64GenGlobalISel.inc"
224 #undef GET_GLOBALISEL_PREDICATES_DECL
226 // We declare the temporaries used by selectImpl() in the class to minimize the
227 // cost of constructing placeholder values.
228 #define GET_GLOBALISEL_TEMPORARIES_DECL
229 #include "AArch64GenGlobalISel.inc"
230 #undef GET_GLOBALISEL_TEMPORARIES_DECL
233 } // end anonymous namespace
235 #define GET_GLOBALISEL_IMPL
236 #include "AArch64GenGlobalISel.inc"
237 #undef GET_GLOBALISEL_IMPL
239 AArch64InstructionSelector::AArch64InstructionSelector(
240 const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
241 const AArch64RegisterBankInfo &RBI)
242 : InstructionSelector(), TM(TM), STI(STI), TII(*STI.getInstrInfo()),
243 TRI(*STI.getRegisterInfo()), RBI(RBI),
244 #define GET_GLOBALISEL_PREDICATES_INIT
245 #include "AArch64GenGlobalISel.inc"
246 #undef GET_GLOBALISEL_PREDICATES_INIT
247 #define GET_GLOBALISEL_TEMPORARIES_INIT
248 #include "AArch64GenGlobalISel.inc"
249 #undef GET_GLOBALISEL_TEMPORARIES_INIT
253 // FIXME: This should be target-independent, inferred from the types declared
254 // for each class in the bank.
255 static const TargetRegisterClass *
256 getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
257 const RegisterBankInfo &RBI,
258 bool GetAllRegSet = false) {
259 if (RB.getID() == AArch64::GPRRegBankID) {
260 if (Ty.getSizeInBits() <= 32)
261 return GetAllRegSet ? &AArch64::GPR32allRegClass
262 : &AArch64::GPR32RegClass;
263 if (Ty.getSizeInBits() == 64)
264 return GetAllRegSet ? &AArch64::GPR64allRegClass
265 : &AArch64::GPR64RegClass;
266 return nullptr;
269 if (RB.getID() == AArch64::FPRRegBankID) {
270 if (Ty.getSizeInBits() <= 16)
271 return &AArch64::FPR16RegClass;
272 if (Ty.getSizeInBits() == 32)
273 return &AArch64::FPR32RegClass;
274 if (Ty.getSizeInBits() == 64)
275 return &AArch64::FPR64RegClass;
276 if (Ty.getSizeInBits() == 128)
277 return &AArch64::FPR128RegClass;
278 return nullptr;
281 return nullptr;
284 /// Given a register bank, and size in bits, return the smallest register class
285 /// that can represent that combination.
286 static const TargetRegisterClass *
287 getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits,
288 bool GetAllRegSet = false) {
289 unsigned RegBankID = RB.getID();
291 if (RegBankID == AArch64::GPRRegBankID) {
292 if (SizeInBits <= 32)
293 return GetAllRegSet ? &AArch64::GPR32allRegClass
294 : &AArch64::GPR32RegClass;
295 if (SizeInBits == 64)
296 return GetAllRegSet ? &AArch64::GPR64allRegClass
297 : &AArch64::GPR64RegClass;
300 if (RegBankID == AArch64::FPRRegBankID) {
301 switch (SizeInBits) {
302 default:
303 return nullptr;
304 case 8:
305 return &AArch64::FPR8RegClass;
306 case 16:
307 return &AArch64::FPR16RegClass;
308 case 32:
309 return &AArch64::FPR32RegClass;
310 case 64:
311 return &AArch64::FPR64RegClass;
312 case 128:
313 return &AArch64::FPR128RegClass;
317 return nullptr;
320 /// Returns the correct subregister to use for a given register class.
321 static bool getSubRegForClass(const TargetRegisterClass *RC,
322 const TargetRegisterInfo &TRI, unsigned &SubReg) {
323 switch (TRI.getRegSizeInBits(*RC)) {
324 case 8:
325 SubReg = AArch64::bsub;
326 break;
327 case 16:
328 SubReg = AArch64::hsub;
329 break;
330 case 32:
331 if (RC == &AArch64::GPR32RegClass)
332 SubReg = AArch64::sub_32;
333 else
334 SubReg = AArch64::ssub;
335 break;
336 case 64:
337 SubReg = AArch64::dsub;
338 break;
339 default:
340 LLVM_DEBUG(
341 dbgs() << "Couldn't find appropriate subregister for register class.");
342 return false;
345 return true;
348 /// Check whether \p I is a currently unsupported binary operation:
349 /// - it has an unsized type
350 /// - an operand is not a vreg
351 /// - all operands are not in the same bank
352 /// These are checks that should someday live in the verifier, but right now,
353 /// these are mostly limitations of the aarch64 selector.
354 static bool unsupportedBinOp(const MachineInstr &I,
355 const AArch64RegisterBankInfo &RBI,
356 const MachineRegisterInfo &MRI,
357 const AArch64RegisterInfo &TRI) {
358 LLT Ty = MRI.getType(I.getOperand(0).getReg());
359 if (!Ty.isValid()) {
360 LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n");
361 return true;
364 const RegisterBank *PrevOpBank = nullptr;
365 for (auto &MO : I.operands()) {
366 // FIXME: Support non-register operands.
367 if (!MO.isReg()) {
368 LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n");
369 return true;
372 // FIXME: Can generic operations have physical registers operands? If
373 // so, this will need to be taught about that, and we'll need to get the
374 // bank out of the minimal class for the register.
375 // Either way, this needs to be documented (and possibly verified).
376 if (!TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
377 LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n");
378 return true;
381 const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI);
382 if (!OpBank) {
383 LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n");
384 return true;
387 if (PrevOpBank && OpBank != PrevOpBank) {
388 LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n");
389 return true;
391 PrevOpBank = OpBank;
393 return false;
396 /// Select the AArch64 opcode for the basic binary operation \p GenericOpc
397 /// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID
398 /// and of size \p OpSize.
399 /// \returns \p GenericOpc if the combination is unsupported.
400 static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
401 unsigned OpSize) {
402 switch (RegBankID) {
403 case AArch64::GPRRegBankID:
404 if (OpSize == 32) {
405 switch (GenericOpc) {
406 case TargetOpcode::G_SHL:
407 return AArch64::LSLVWr;
408 case TargetOpcode::G_LSHR:
409 return AArch64::LSRVWr;
410 case TargetOpcode::G_ASHR:
411 return AArch64::ASRVWr;
412 default:
413 return GenericOpc;
415 } else if (OpSize == 64) {
416 switch (GenericOpc) {
417 case TargetOpcode::G_GEP:
418 return AArch64::ADDXrr;
419 case TargetOpcode::G_SHL:
420 return AArch64::LSLVXr;
421 case TargetOpcode::G_LSHR:
422 return AArch64::LSRVXr;
423 case TargetOpcode::G_ASHR:
424 return AArch64::ASRVXr;
425 default:
426 return GenericOpc;
429 break;
430 case AArch64::FPRRegBankID:
431 switch (OpSize) {
432 case 32:
433 switch (GenericOpc) {
434 case TargetOpcode::G_FADD:
435 return AArch64::FADDSrr;
436 case TargetOpcode::G_FSUB:
437 return AArch64::FSUBSrr;
438 case TargetOpcode::G_FMUL:
439 return AArch64::FMULSrr;
440 case TargetOpcode::G_FDIV:
441 return AArch64::FDIVSrr;
442 default:
443 return GenericOpc;
445 case 64:
446 switch (GenericOpc) {
447 case TargetOpcode::G_FADD:
448 return AArch64::FADDDrr;
449 case TargetOpcode::G_FSUB:
450 return AArch64::FSUBDrr;
451 case TargetOpcode::G_FMUL:
452 return AArch64::FMULDrr;
453 case TargetOpcode::G_FDIV:
454 return AArch64::FDIVDrr;
455 case TargetOpcode::G_OR:
456 return AArch64::ORRv8i8;
457 default:
458 return GenericOpc;
461 break;
463 return GenericOpc;
466 /// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc,
467 /// appropriate for the (value) register bank \p RegBankID and of memory access
468 /// size \p OpSize. This returns the variant with the base+unsigned-immediate
469 /// addressing mode (e.g., LDRXui).
470 /// \returns \p GenericOpc if the combination is unsupported.
471 static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
472 unsigned OpSize) {
473 const bool isStore = GenericOpc == TargetOpcode::G_STORE;
474 switch (RegBankID) {
475 case AArch64::GPRRegBankID:
476 switch (OpSize) {
477 case 8:
478 return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
479 case 16:
480 return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
481 case 32:
482 return isStore ? AArch64::STRWui : AArch64::LDRWui;
483 case 64:
484 return isStore ? AArch64::STRXui : AArch64::LDRXui;
486 break;
487 case AArch64::FPRRegBankID:
488 switch (OpSize) {
489 case 8:
490 return isStore ? AArch64::STRBui : AArch64::LDRBui;
491 case 16:
492 return isStore ? AArch64::STRHui : AArch64::LDRHui;
493 case 32:
494 return isStore ? AArch64::STRSui : AArch64::LDRSui;
495 case 64:
496 return isStore ? AArch64::STRDui : AArch64::LDRDui;
498 break;
500 return GenericOpc;
503 #ifndef NDEBUG
504 /// Helper function that verifies that we have a valid copy at the end of
505 /// selectCopy. Verifies that the source and dest have the expected sizes and
506 /// then returns true.
507 static bool isValidCopy(const MachineInstr &I, const RegisterBank &DstBank,
508 const MachineRegisterInfo &MRI,
509 const TargetRegisterInfo &TRI,
510 const RegisterBankInfo &RBI) {
511 const unsigned DstReg = I.getOperand(0).getReg();
512 const unsigned SrcReg = I.getOperand(1).getReg();
513 const unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
514 const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
516 // Make sure the size of the source and dest line up.
517 assert(
518 (DstSize == SrcSize ||
519 // Copies are a mean to setup initial types, the number of
520 // bits may not exactly match.
521 (TargetRegisterInfo::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
522 // Copies are a mean to copy bits around, as long as we are
523 // on the same register class, that's fine. Otherwise, that
524 // means we need some SUBREG_TO_REG or AND & co.
525 (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) &&
526 "Copy with different width?!");
528 // Check the size of the destination.
529 assert((DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID) &&
530 "GPRs cannot get more than 64-bit width values");
532 return true;
534 #endif
536 /// Helper function for selectCopy. Inserts a subregister copy from
537 /// \p *From to \p *To, linking it up to \p I.
539 /// e.g, given I = "Dst = COPY SrcReg", we'll transform that into
541 /// CopyReg (From class) = COPY SrcReg
542 /// SubRegCopy (To class) = COPY CopyReg:SubReg
543 /// Dst = COPY SubRegCopy
544 static bool selectSubregisterCopy(MachineInstr &I, MachineRegisterInfo &MRI,
545 const RegisterBankInfo &RBI, unsigned SrcReg,
546 const TargetRegisterClass *From,
547 const TargetRegisterClass *To,
548 unsigned SubReg) {
549 MachineIRBuilder MIB(I);
550 auto Copy = MIB.buildCopy({From}, {SrcReg});
551 auto SubRegCopy = MIB.buildInstr(TargetOpcode::COPY, {To}, {})
552 .addReg(Copy.getReg(0), 0, SubReg);
553 MachineOperand &RegOp = I.getOperand(1);
554 RegOp.setReg(SubRegCopy.getReg(0));
556 // It's possible that the destination register won't be constrained. Make
557 // sure that happens.
558 if (!TargetRegisterInfo::isPhysicalRegister(I.getOperand(0).getReg()))
559 RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI);
561 return true;
564 /// Helper function to get the source and destination register classes for a
565 /// copy. Returns a std::pair containing the source register class for the
566 /// copy, and the destination register class for the copy. If a register class
567 /// cannot be determined, then it will be nullptr.
568 static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
569 getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII,
570 MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
571 const RegisterBankInfo &RBI) {
572 unsigned DstReg = I.getOperand(0).getReg();
573 unsigned SrcReg = I.getOperand(1).getReg();
574 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
575 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
576 unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
577 unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
579 // Special casing for cross-bank copies of s1s. We can technically represent
580 // a 1-bit value with any size of register. The minimum size for a GPR is 32
581 // bits. So, we need to put the FPR on 32 bits as well.
583 // FIXME: I'm not sure if this case holds true outside of copies. If it does,
584 // then we can pull it into the helpers that get the appropriate class for a
585 // register bank. Or make a new helper that carries along some constraint
586 // information.
587 if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1))
588 SrcSize = DstSize = 32;
590 return {getMinClassForRegBank(SrcRegBank, SrcSize, true),
591 getMinClassForRegBank(DstRegBank, DstSize, true)};
594 static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
595 MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
596 const RegisterBankInfo &RBI) {
598 unsigned DstReg = I.getOperand(0).getReg();
599 unsigned SrcReg = I.getOperand(1).getReg();
600 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
601 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
603 // Find the correct register classes for the source and destination registers.
604 const TargetRegisterClass *SrcRC;
605 const TargetRegisterClass *DstRC;
606 std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI);
608 if (!DstRC) {
609 LLVM_DEBUG(dbgs() << "Unexpected dest size "
610 << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n');
611 return false;
614 // A couple helpers below, for making sure that the copy we produce is valid.
616 // Set to true if we insert a SUBREG_TO_REG. If we do this, then we don't want
617 // to verify that the src and dst are the same size, since that's handled by
618 // the SUBREG_TO_REG.
619 bool KnownValid = false;
621 // Returns true, or asserts if something we don't expect happens. Instead of
622 // returning true, we return isValidCopy() to ensure that we verify the
623 // result.
624 auto CheckCopy = [&]() {
625 // If we have a bitcast or something, we can't have physical registers.
626 assert(
627 (I.isCopy() ||
628 (!TargetRegisterInfo::isPhysicalRegister(I.getOperand(0).getReg()) &&
629 !TargetRegisterInfo::isPhysicalRegister(I.getOperand(1).getReg()))) &&
630 "No phys reg on generic operator!");
631 assert(KnownValid || isValidCopy(I, DstRegBank, MRI, TRI, RBI));
632 (void)KnownValid;
633 return true;
636 // Is this a copy? If so, then we may need to insert a subregister copy, or
637 // a SUBREG_TO_REG.
638 if (I.isCopy()) {
639 // Yes. Check if there's anything to fix up.
640 if (!SrcRC) {
641 LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n");
642 return false;
645 // Is this a cross-bank copy?
646 if (DstRegBank.getID() != SrcRegBank.getID()) {
647 // If we're doing a cross-bank copy on different-sized registers, we need
648 // to do a bit more work.
649 unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC);
650 unsigned DstSize = TRI.getRegSizeInBits(*DstRC);
652 if (SrcSize > DstSize) {
653 // We're doing a cross-bank copy into a smaller register. We need a
654 // subregister copy. First, get a register class that's on the same bank
655 // as the destination, but the same size as the source.
656 const TargetRegisterClass *SubregRC =
657 getMinClassForRegBank(DstRegBank, SrcSize, true);
658 assert(SubregRC && "Didn't get a register class for subreg?");
660 // Get the appropriate subregister for the destination.
661 unsigned SubReg = 0;
662 if (!getSubRegForClass(DstRC, TRI, SubReg)) {
663 LLVM_DEBUG(dbgs() << "Couldn't determine subregister for copy.\n");
664 return false;
667 // Now, insert a subregister copy using the new register class.
668 selectSubregisterCopy(I, MRI, RBI, SrcReg, SubregRC, DstRC, SubReg);
669 return CheckCopy();
672 else if (DstRegBank.getID() == AArch64::GPRRegBankID && DstSize == 32 &&
673 SrcSize == 16) {
674 // Special case for FPR16 to GPR32.
675 // FIXME: This can probably be generalized like the above case.
676 unsigned PromoteReg =
677 MRI.createVirtualRegister(&AArch64::FPR32RegClass);
678 BuildMI(*I.getParent(), I, I.getDebugLoc(),
679 TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
680 .addImm(0)
681 .addUse(SrcReg)
682 .addImm(AArch64::hsub);
683 MachineOperand &RegOp = I.getOperand(1);
684 RegOp.setReg(PromoteReg);
686 // Promise that the copy is implicitly validated by the SUBREG_TO_REG.
687 KnownValid = true;
691 // If the destination is a physical register, then there's nothing to
692 // change, so we're done.
693 if (TargetRegisterInfo::isPhysicalRegister(DstReg))
694 return CheckCopy();
697 // No need to constrain SrcReg. It will get constrained when we hit another
698 // of its use or its defs. Copies do not have constraints.
699 if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
700 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
701 << " operand\n");
702 return false;
704 I.setDesc(TII.get(AArch64::COPY));
705 return CheckCopy();
708 static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
709 if (!DstTy.isScalar() || !SrcTy.isScalar())
710 return GenericOpc;
712 const unsigned DstSize = DstTy.getSizeInBits();
713 const unsigned SrcSize = SrcTy.getSizeInBits();
715 switch (DstSize) {
716 case 32:
717 switch (SrcSize) {
718 case 32:
719 switch (GenericOpc) {
720 case TargetOpcode::G_SITOFP:
721 return AArch64::SCVTFUWSri;
722 case TargetOpcode::G_UITOFP:
723 return AArch64::UCVTFUWSri;
724 case TargetOpcode::G_FPTOSI:
725 return AArch64::FCVTZSUWSr;
726 case TargetOpcode::G_FPTOUI:
727 return AArch64::FCVTZUUWSr;
728 default:
729 return GenericOpc;
731 case 64:
732 switch (GenericOpc) {
733 case TargetOpcode::G_SITOFP:
734 return AArch64::SCVTFUXSri;
735 case TargetOpcode::G_UITOFP:
736 return AArch64::UCVTFUXSri;
737 case TargetOpcode::G_FPTOSI:
738 return AArch64::FCVTZSUWDr;
739 case TargetOpcode::G_FPTOUI:
740 return AArch64::FCVTZUUWDr;
741 default:
742 return GenericOpc;
744 default:
745 return GenericOpc;
747 case 64:
748 switch (SrcSize) {
749 case 32:
750 switch (GenericOpc) {
751 case TargetOpcode::G_SITOFP:
752 return AArch64::SCVTFUWDri;
753 case TargetOpcode::G_UITOFP:
754 return AArch64::UCVTFUWDri;
755 case TargetOpcode::G_FPTOSI:
756 return AArch64::FCVTZSUXSr;
757 case TargetOpcode::G_FPTOUI:
758 return AArch64::FCVTZUUXSr;
759 default:
760 return GenericOpc;
762 case 64:
763 switch (GenericOpc) {
764 case TargetOpcode::G_SITOFP:
765 return AArch64::SCVTFUXDri;
766 case TargetOpcode::G_UITOFP:
767 return AArch64::UCVTFUXDri;
768 case TargetOpcode::G_FPTOSI:
769 return AArch64::FCVTZSUXDr;
770 case TargetOpcode::G_FPTOUI:
771 return AArch64::FCVTZUUXDr;
772 default:
773 return GenericOpc;
775 default:
776 return GenericOpc;
778 default:
779 return GenericOpc;
781 return GenericOpc;
784 static unsigned selectSelectOpc(MachineInstr &I, MachineRegisterInfo &MRI,
785 const RegisterBankInfo &RBI) {
786 const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
787 bool IsFP = (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
788 AArch64::GPRRegBankID);
789 LLT Ty = MRI.getType(I.getOperand(0).getReg());
790 if (Ty == LLT::scalar(32))
791 return IsFP ? AArch64::FCSELSrrr : AArch64::CSELWr;
792 else if (Ty == LLT::scalar(64) || Ty == LLT::pointer(0, 64))
793 return IsFP ? AArch64::FCSELDrrr : AArch64::CSELXr;
794 return 0;
797 /// Helper function to select the opcode for a G_FCMP.
798 static unsigned selectFCMPOpc(MachineInstr &I, MachineRegisterInfo &MRI) {
799 // If this is a compare against +0.0, then we don't have to explicitly
800 // materialize a constant.
801 const ConstantFP *FPImm = getConstantFPVRegVal(I.getOperand(3).getReg(), MRI);
802 bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
803 unsigned OpSize = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
804 if (OpSize != 32 && OpSize != 64)
805 return 0;
806 unsigned CmpOpcTbl[2][2] = {{AArch64::FCMPSrr, AArch64::FCMPDrr},
807 {AArch64::FCMPSri, AArch64::FCMPDri}};
808 return CmpOpcTbl[ShouldUseImm][OpSize == 64];
811 /// Returns true if \p P is an unsigned integer comparison predicate.
812 static bool isUnsignedICMPPred(const CmpInst::Predicate P) {
813 switch (P) {
814 default:
815 return false;
816 case CmpInst::ICMP_UGT:
817 case CmpInst::ICMP_UGE:
818 case CmpInst::ICMP_ULT:
819 case CmpInst::ICMP_ULE:
820 return true;
824 static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) {
825 switch (P) {
826 default:
827 llvm_unreachable("Unknown condition code!");
828 case CmpInst::ICMP_NE:
829 return AArch64CC::NE;
830 case CmpInst::ICMP_EQ:
831 return AArch64CC::EQ;
832 case CmpInst::ICMP_SGT:
833 return AArch64CC::GT;
834 case CmpInst::ICMP_SGE:
835 return AArch64CC::GE;
836 case CmpInst::ICMP_SLT:
837 return AArch64CC::LT;
838 case CmpInst::ICMP_SLE:
839 return AArch64CC::LE;
840 case CmpInst::ICMP_UGT:
841 return AArch64CC::HI;
842 case CmpInst::ICMP_UGE:
843 return AArch64CC::HS;
844 case CmpInst::ICMP_ULT:
845 return AArch64CC::LO;
846 case CmpInst::ICMP_ULE:
847 return AArch64CC::LS;
851 static void changeFCMPPredToAArch64CC(CmpInst::Predicate P,
852 AArch64CC::CondCode &CondCode,
853 AArch64CC::CondCode &CondCode2) {
854 CondCode2 = AArch64CC::AL;
855 switch (P) {
856 default:
857 llvm_unreachable("Unknown FP condition!");
858 case CmpInst::FCMP_OEQ:
859 CondCode = AArch64CC::EQ;
860 break;
861 case CmpInst::FCMP_OGT:
862 CondCode = AArch64CC::GT;
863 break;
864 case CmpInst::FCMP_OGE:
865 CondCode = AArch64CC::GE;
866 break;
867 case CmpInst::FCMP_OLT:
868 CondCode = AArch64CC::MI;
869 break;
870 case CmpInst::FCMP_OLE:
871 CondCode = AArch64CC::LS;
872 break;
873 case CmpInst::FCMP_ONE:
874 CondCode = AArch64CC::MI;
875 CondCode2 = AArch64CC::GT;
876 break;
877 case CmpInst::FCMP_ORD:
878 CondCode = AArch64CC::VC;
879 break;
880 case CmpInst::FCMP_UNO:
881 CondCode = AArch64CC::VS;
882 break;
883 case CmpInst::FCMP_UEQ:
884 CondCode = AArch64CC::EQ;
885 CondCode2 = AArch64CC::VS;
886 break;
887 case CmpInst::FCMP_UGT:
888 CondCode = AArch64CC::HI;
889 break;
890 case CmpInst::FCMP_UGE:
891 CondCode = AArch64CC::PL;
892 break;
893 case CmpInst::FCMP_ULT:
894 CondCode = AArch64CC::LT;
895 break;
896 case CmpInst::FCMP_ULE:
897 CondCode = AArch64CC::LE;
898 break;
899 case CmpInst::FCMP_UNE:
900 CondCode = AArch64CC::NE;
901 break;
905 bool AArch64InstructionSelector::selectCompareBranch(
906 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
908 const Register CondReg = I.getOperand(0).getReg();
909 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
910 MachineInstr *CCMI = MRI.getVRegDef(CondReg);
911 if (CCMI->getOpcode() == TargetOpcode::G_TRUNC)
912 CCMI = MRI.getVRegDef(CCMI->getOperand(1).getReg());
913 if (CCMI->getOpcode() != TargetOpcode::G_ICMP)
914 return false;
916 Register LHS = CCMI->getOperand(2).getReg();
917 Register RHS = CCMI->getOperand(3).getReg();
918 auto VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
919 if (!VRegAndVal)
920 std::swap(RHS, LHS);
922 VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
923 if (!VRegAndVal || VRegAndVal->Value != 0) {
924 MachineIRBuilder MIB(I);
925 // If we can't select a CBZ then emit a cmp + Bcc.
926 if (!emitIntegerCompare(CCMI->getOperand(2), CCMI->getOperand(3),
927 CCMI->getOperand(1), MIB))
928 return false;
929 const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(
930 (CmpInst::Predicate)CCMI->getOperand(1).getPredicate());
931 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
932 I.eraseFromParent();
933 return true;
936 const RegisterBank &RB = *RBI.getRegBank(LHS, MRI, TRI);
937 if (RB.getID() != AArch64::GPRRegBankID)
938 return false;
940 const auto Pred = (CmpInst::Predicate)CCMI->getOperand(1).getPredicate();
941 if (Pred != CmpInst::ICMP_NE && Pred != CmpInst::ICMP_EQ)
942 return false;
944 const unsigned CmpWidth = MRI.getType(LHS).getSizeInBits();
945 unsigned CBOpc = 0;
946 if (CmpWidth <= 32)
947 CBOpc = (Pred == CmpInst::ICMP_EQ ? AArch64::CBZW : AArch64::CBNZW);
948 else if (CmpWidth == 64)
949 CBOpc = (Pred == CmpInst::ICMP_EQ ? AArch64::CBZX : AArch64::CBNZX);
950 else
951 return false;
953 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(CBOpc))
954 .addUse(LHS)
955 .addMBB(DestMBB)
956 .constrainAllUses(TII, TRI, RBI);
958 I.eraseFromParent();
959 return true;
962 bool AArch64InstructionSelector::selectVectorSHL(
963 MachineInstr &I, MachineRegisterInfo &MRI) const {
964 assert(I.getOpcode() == TargetOpcode::G_SHL);
965 Register DstReg = I.getOperand(0).getReg();
966 const LLT Ty = MRI.getType(DstReg);
967 Register Src1Reg = I.getOperand(1).getReg();
968 Register Src2Reg = I.getOperand(2).getReg();
970 if (!Ty.isVector())
971 return false;
973 unsigned Opc = 0;
974 if (Ty == LLT::vector(4, 32)) {
975 Opc = AArch64::USHLv4i32;
976 } else if (Ty == LLT::vector(2, 32)) {
977 Opc = AArch64::USHLv2i32;
978 } else {
979 LLVM_DEBUG(dbgs() << "Unhandled G_SHL type");
980 return false;
983 MachineIRBuilder MIB(I);
984 auto UShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Src2Reg});
985 constrainSelectedInstRegOperands(*UShl, TII, TRI, RBI);
986 I.eraseFromParent();
987 return true;
990 bool AArch64InstructionSelector::selectVectorASHR(
991 MachineInstr &I, MachineRegisterInfo &MRI) const {
992 assert(I.getOpcode() == TargetOpcode::G_ASHR);
993 Register DstReg = I.getOperand(0).getReg();
994 const LLT Ty = MRI.getType(DstReg);
995 Register Src1Reg = I.getOperand(1).getReg();
996 Register Src2Reg = I.getOperand(2).getReg();
998 if (!Ty.isVector())
999 return false;
1001 // There is not a shift right register instruction, but the shift left
1002 // register instruction takes a signed value, where negative numbers specify a
1003 // right shift.
1005 unsigned Opc = 0;
1006 unsigned NegOpc = 0;
1007 const TargetRegisterClass *RC = nullptr;
1008 if (Ty == LLT::vector(4, 32)) {
1009 Opc = AArch64::SSHLv4i32;
1010 NegOpc = AArch64::NEGv4i32;
1011 RC = &AArch64::FPR128RegClass;
1012 } else if (Ty == LLT::vector(2, 32)) {
1013 Opc = AArch64::SSHLv2i32;
1014 NegOpc = AArch64::NEGv2i32;
1015 RC = &AArch64::FPR64RegClass;
1016 } else {
1017 LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type");
1018 return false;
1021 MachineIRBuilder MIB(I);
1022 auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg});
1023 constrainSelectedInstRegOperands(*Neg, TII, TRI, RBI);
1024 auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
1025 constrainSelectedInstRegOperands(*SShl, TII, TRI, RBI);
1026 I.eraseFromParent();
1027 return true;
1030 bool AArch64InstructionSelector::selectVaStartAAPCS(
1031 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1032 return false;
1035 bool AArch64InstructionSelector::selectVaStartDarwin(
1036 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1037 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
1038 Register ListReg = I.getOperand(0).getReg();
1040 Register ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1042 auto MIB =
1043 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri))
1044 .addDef(ArgsAddrReg)
1045 .addFrameIndex(FuncInfo->getVarArgsStackIndex())
1046 .addImm(0)
1047 .addImm(0);
1049 constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
1051 MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui))
1052 .addUse(ArgsAddrReg)
1053 .addUse(ListReg)
1054 .addImm(0)
1055 .addMemOperand(*I.memoperands_begin());
1057 constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
1058 I.eraseFromParent();
1059 return true;
1062 void AArch64InstructionSelector::materializeLargeCMVal(
1063 MachineInstr &I, const Value *V, unsigned char OpFlags) const {
1064 MachineBasicBlock &MBB = *I.getParent();
1065 MachineFunction &MF = *MBB.getParent();
1066 MachineRegisterInfo &MRI = MF.getRegInfo();
1067 MachineIRBuilder MIB(I);
1069 auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
1070 MovZ->addOperand(MF, I.getOperand(1));
1071 MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
1072 AArch64II::MO_NC);
1073 MovZ->addOperand(MF, MachineOperand::CreateImm(0));
1074 constrainSelectedInstRegOperands(*MovZ, TII, TRI, RBI);
1076 auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset,
1077 Register ForceDstReg) {
1078 Register DstReg = ForceDstReg
1079 ? ForceDstReg
1080 : MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1081 auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg);
1082 if (auto *GV = dyn_cast<GlobalValue>(V)) {
1083 MovI->addOperand(MF, MachineOperand::CreateGA(
1084 GV, MovZ->getOperand(1).getOffset(), Flags));
1085 } else {
1086 MovI->addOperand(
1087 MF, MachineOperand::CreateBA(cast<BlockAddress>(V),
1088 MovZ->getOperand(1).getOffset(), Flags));
1090 MovI->addOperand(MF, MachineOperand::CreateImm(Offset));
1091 constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI);
1092 return DstReg;
1094 Register DstReg = BuildMovK(MovZ.getReg(0),
1095 AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0);
1096 DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
1097 BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
1098 return;
1101 void AArch64InstructionSelector::preISelLower(MachineInstr &I) const {
1102 MachineBasicBlock &MBB = *I.getParent();
1103 MachineFunction &MF = *MBB.getParent();
1104 MachineRegisterInfo &MRI = MF.getRegInfo();
1106 switch (I.getOpcode()) {
1107 case TargetOpcode::G_SHL:
1108 case TargetOpcode::G_ASHR:
1109 case TargetOpcode::G_LSHR: {
1110 // These shifts are legalized to have 64 bit shift amounts because we want
1111 // to take advantage of the existing imported selection patterns that assume
1112 // the immediates are s64s. However, if the shifted type is 32 bits and for
1113 // some reason we receive input GMIR that has an s64 shift amount that's not
1114 // a G_CONSTANT, insert a truncate so that we can still select the s32
1115 // register-register variant.
1116 unsigned SrcReg = I.getOperand(1).getReg();
1117 unsigned ShiftReg = I.getOperand(2).getReg();
1118 const LLT ShiftTy = MRI.getType(ShiftReg);
1119 const LLT SrcTy = MRI.getType(SrcReg);
1120 if (SrcTy.isVector())
1121 return;
1122 assert(!ShiftTy.isVector() && "unexpected vector shift ty");
1123 if (SrcTy.getSizeInBits() != 32 || ShiftTy.getSizeInBits() != 64)
1124 return;
1125 auto *AmtMI = MRI.getVRegDef(ShiftReg);
1126 assert(AmtMI && "could not find a vreg definition for shift amount");
1127 if (AmtMI->getOpcode() != TargetOpcode::G_CONSTANT) {
1128 // Insert a subregister copy to implement a 64->32 trunc
1129 MachineIRBuilder MIB(I);
1130 auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {})
1131 .addReg(ShiftReg, 0, AArch64::sub_32);
1132 MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
1133 I.getOperand(2).setReg(Trunc.getReg(0));
1135 return;
1137 case TargetOpcode::G_STORE:
1138 contractCrossBankCopyIntoStore(I, MRI);
1139 return;
1140 default:
1141 return;
1145 bool AArch64InstructionSelector::earlySelectSHL(
1146 MachineInstr &I, MachineRegisterInfo &MRI) const {
1147 // We try to match the immediate variant of LSL, which is actually an alias
1148 // for a special case of UBFM. Otherwise, we fall back to the imported
1149 // selector which will match the register variant.
1150 assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op");
1151 const auto &MO = I.getOperand(2);
1152 auto VRegAndVal = getConstantVRegVal(MO.getReg(), MRI);
1153 if (!VRegAndVal)
1154 return false;
1156 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1157 if (DstTy.isVector())
1158 return false;
1159 bool Is64Bit = DstTy.getSizeInBits() == 64;
1160 auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO);
1161 auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO);
1162 MachineIRBuilder MIB(I);
1164 if (!Imm1Fn || !Imm2Fn)
1165 return false;
1167 auto NewI =
1168 MIB.buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,
1169 {I.getOperand(0).getReg()}, {I.getOperand(1).getReg()});
1171 for (auto &RenderFn : *Imm1Fn)
1172 RenderFn(NewI);
1173 for (auto &RenderFn : *Imm2Fn)
1174 RenderFn(NewI);
1176 I.eraseFromParent();
1177 return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
1180 void AArch64InstructionSelector::contractCrossBankCopyIntoStore(
1181 MachineInstr &I, MachineRegisterInfo &MRI) const {
1182 assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE");
1183 // If we're storing a scalar, it doesn't matter what register bank that
1184 // scalar is on. All that matters is the size.
1186 // So, if we see something like this (with a 32-bit scalar as an example):
1188 // %x:gpr(s32) = ... something ...
1189 // %y:fpr(s32) = COPY %x:gpr(s32)
1190 // G_STORE %y:fpr(s32)
1192 // We can fix this up into something like this:
1194 // G_STORE %x:gpr(s32)
1196 // And then continue the selection process normally.
1197 MachineInstr *Def = getDefIgnoringCopies(I.getOperand(0).getReg(), MRI);
1198 if (!Def)
1199 return;
1200 Register DefDstReg = Def->getOperand(0).getReg();
1201 LLT DefDstTy = MRI.getType(DefDstReg);
1202 Register StoreSrcReg = I.getOperand(0).getReg();
1203 LLT StoreSrcTy = MRI.getType(StoreSrcReg);
1205 // If we get something strange like a physical register, then we shouldn't
1206 // go any further.
1207 if (!DefDstTy.isValid())
1208 return;
1210 // Are the source and dst types the same size?
1211 if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits())
1212 return;
1214 if (RBI.getRegBank(StoreSrcReg, MRI, TRI) ==
1215 RBI.getRegBank(DefDstReg, MRI, TRI))
1216 return;
1218 // We have a cross-bank copy, which is entering a store. Let's fold it.
1219 I.getOperand(0).setReg(DefDstReg);
1222 bool AArch64InstructionSelector::earlySelectLoad(
1223 MachineInstr &I, MachineRegisterInfo &MRI) const {
1224 // Try to fold in shifts, etc into the addressing mode of a load.
1225 assert(I.getOpcode() == TargetOpcode::G_LOAD && "unexpected op");
1227 // Don't handle atomic loads/stores yet.
1228 auto &MemOp = **I.memoperands_begin();
1229 if (MemOp.getOrdering() != AtomicOrdering::NotAtomic) {
1230 LLVM_DEBUG(dbgs() << "Atomic load/store not supported yet\n");
1231 return false;
1234 unsigned MemBytes = MemOp.getSize();
1236 // Only support 64-bit loads for now.
1237 if (MemBytes != 8)
1238 return false;
1240 Register DstReg = I.getOperand(0).getReg();
1241 const LLT DstTy = MRI.getType(DstReg);
1242 // Don't handle vectors.
1243 if (DstTy.isVector())
1244 return false;
1246 unsigned DstSize = DstTy.getSizeInBits();
1247 // TODO: 32-bit destinations.
1248 if (DstSize != 64)
1249 return false;
1251 // Check if we can do any folding from GEPs/shifts etc. into the load.
1252 auto ImmFn = selectAddrModeXRO(I.getOperand(1), MemBytes);
1253 if (!ImmFn)
1254 return false;
1256 // We can fold something. Emit the load here.
1257 MachineIRBuilder MIB(I);
1259 // Choose the instruction based off the size of the element being loaded, and
1260 // whether or not we're loading into a FPR.
1261 const RegisterBank &RB = *RBI.getRegBank(DstReg, MRI, TRI);
1262 unsigned Opc =
1263 RB.getID() == AArch64::GPRRegBankID ? AArch64::LDRXroX : AArch64::LDRDroX;
1264 // Construct the load.
1265 auto LoadMI = MIB.buildInstr(Opc, {DstReg}, {});
1266 for (auto &RenderFn : *ImmFn)
1267 RenderFn(LoadMI);
1268 LoadMI.addMemOperand(*I.memoperands_begin());
1269 I.eraseFromParent();
1270 return constrainSelectedInstRegOperands(*LoadMI, TII, TRI, RBI);
1273 bool AArch64InstructionSelector::earlySelect(MachineInstr &I) const {
1274 assert(I.getParent() && "Instruction should be in a basic block!");
1275 assert(I.getParent()->getParent() && "Instruction should be in a function!");
1277 MachineBasicBlock &MBB = *I.getParent();
1278 MachineFunction &MF = *MBB.getParent();
1279 MachineRegisterInfo &MRI = MF.getRegInfo();
1281 switch (I.getOpcode()) {
1282 case TargetOpcode::G_SHL:
1283 return earlySelectSHL(I, MRI);
1284 case TargetOpcode::G_LOAD:
1285 return earlySelectLoad(I, MRI);
1286 default:
1287 return false;
1291 bool AArch64InstructionSelector::select(MachineInstr &I,
1292 CodeGenCoverage &CoverageInfo) const {
1293 assert(I.getParent() && "Instruction should be in a basic block!");
1294 assert(I.getParent()->getParent() && "Instruction should be in a function!");
1296 MachineBasicBlock &MBB = *I.getParent();
1297 MachineFunction &MF = *MBB.getParent();
1298 MachineRegisterInfo &MRI = MF.getRegInfo();
1300 unsigned Opcode = I.getOpcode();
1301 // G_PHI requires same handling as PHI
1302 if (!isPreISelGenericOpcode(Opcode) || Opcode == TargetOpcode::G_PHI) {
1303 // Certain non-generic instructions also need some special handling.
1305 if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
1306 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1308 if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
1309 const Register DefReg = I.getOperand(0).getReg();
1310 const LLT DefTy = MRI.getType(DefReg);
1312 const RegClassOrRegBank &RegClassOrBank =
1313 MRI.getRegClassOrRegBank(DefReg);
1315 const TargetRegisterClass *DefRC
1316 = RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
1317 if (!DefRC) {
1318 if (!DefTy.isValid()) {
1319 LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
1320 return false;
1322 const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
1323 DefRC = getRegClassForTypeOnBank(DefTy, RB, RBI);
1324 if (!DefRC) {
1325 LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
1326 return false;
1330 I.setDesc(TII.get(TargetOpcode::PHI));
1332 return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
1335 if (I.isCopy())
1336 return selectCopy(I, TII, MRI, TRI, RBI);
1338 return true;
1342 if (I.getNumOperands() != I.getNumExplicitOperands()) {
1343 LLVM_DEBUG(
1344 dbgs() << "Generic instruction has unexpected implicit operands\n");
1345 return false;
1348 // Try to do some lowering before we start instruction selecting. These
1349 // lowerings are purely transformations on the input G_MIR and so selection
1350 // must continue after any modification of the instruction.
1351 preISelLower(I);
1353 // There may be patterns where the importer can't deal with them optimally,
1354 // but does select it to a suboptimal sequence so our custom C++ selection
1355 // code later never has a chance to work on it. Therefore, we have an early
1356 // selection attempt here to give priority to certain selection routines
1357 // over the imported ones.
1358 if (earlySelect(I))
1359 return true;
1361 if (selectImpl(I, CoverageInfo))
1362 return true;
1364 LLT Ty =
1365 I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{};
1367 MachineIRBuilder MIB(I);
1369 switch (Opcode) {
1370 case TargetOpcode::G_BRCOND: {
1371 if (Ty.getSizeInBits() > 32) {
1372 // We shouldn't need this on AArch64, but it would be implemented as an
1373 // EXTRACT_SUBREG followed by a TBNZW because TBNZX has no encoding if the
1374 // bit being tested is < 32.
1375 LLVM_DEBUG(dbgs() << "G_BRCOND has type: " << Ty
1376 << ", expected at most 32-bits");
1377 return false;
1380 const Register CondReg = I.getOperand(0).getReg();
1381 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1383 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1384 // instructions will not be produced, as they are conditional branch
1385 // instructions that do not set flags.
1386 bool ProduceNonFlagSettingCondBr =
1387 !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
1388 if (ProduceNonFlagSettingCondBr && selectCompareBranch(I, MF, MRI))
1389 return true;
1391 if (ProduceNonFlagSettingCondBr) {
1392 auto MIB = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::TBNZW))
1393 .addUse(CondReg)
1394 .addImm(/*bit offset=*/0)
1395 .addMBB(DestMBB);
1397 I.eraseFromParent();
1398 return constrainSelectedInstRegOperands(*MIB.getInstr(), TII, TRI, RBI);
1399 } else {
1400 auto CMP = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ANDSWri))
1401 .addDef(AArch64::WZR)
1402 .addUse(CondReg)
1403 .addImm(1);
1404 constrainSelectedInstRegOperands(*CMP.getInstr(), TII, TRI, RBI);
1405 auto Bcc =
1406 BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::Bcc))
1407 .addImm(AArch64CC::EQ)
1408 .addMBB(DestMBB);
1410 I.eraseFromParent();
1411 return constrainSelectedInstRegOperands(*Bcc.getInstr(), TII, TRI, RBI);
1415 case TargetOpcode::G_BRINDIRECT: {
1416 I.setDesc(TII.get(AArch64::BR));
1417 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1420 case TargetOpcode::G_BRJT:
1421 return selectBrJT(I, MRI);
1423 case TargetOpcode::G_BSWAP: {
1424 // Handle vector types for G_BSWAP directly.
1425 Register DstReg = I.getOperand(0).getReg();
1426 LLT DstTy = MRI.getType(DstReg);
1428 // We should only get vector types here; everything else is handled by the
1429 // importer right now.
1430 if (!DstTy.isVector() || DstTy.getSizeInBits() > 128) {
1431 LLVM_DEBUG(dbgs() << "Dst type for G_BSWAP currently unsupported.\n");
1432 return false;
1435 // Only handle 4 and 2 element vectors for now.
1436 // TODO: 16-bit elements.
1437 unsigned NumElts = DstTy.getNumElements();
1438 if (NumElts != 4 && NumElts != 2) {
1439 LLVM_DEBUG(dbgs() << "Unsupported number of elements for G_BSWAP.\n");
1440 return false;
1443 // Choose the correct opcode for the supported types. Right now, that's
1444 // v2s32, v4s32, and v2s64.
1445 unsigned Opc = 0;
1446 unsigned EltSize = DstTy.getElementType().getSizeInBits();
1447 if (EltSize == 32)
1448 Opc = (DstTy.getNumElements() == 2) ? AArch64::REV32v8i8
1449 : AArch64::REV32v16i8;
1450 else if (EltSize == 64)
1451 Opc = AArch64::REV64v16i8;
1453 // We should always get something by the time we get here...
1454 assert(Opc != 0 && "Didn't get an opcode for G_BSWAP?");
1456 I.setDesc(TII.get(Opc));
1457 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1460 case TargetOpcode::G_FCONSTANT:
1461 case TargetOpcode::G_CONSTANT: {
1462 const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
1464 const LLT s8 = LLT::scalar(8);
1465 const LLT s16 = LLT::scalar(16);
1466 const LLT s32 = LLT::scalar(32);
1467 const LLT s64 = LLT::scalar(64);
1468 const LLT p0 = LLT::pointer(0, 64);
1470 const Register DefReg = I.getOperand(0).getReg();
1471 const LLT DefTy = MRI.getType(DefReg);
1472 const unsigned DefSize = DefTy.getSizeInBits();
1473 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
1475 // FIXME: Redundant check, but even less readable when factored out.
1476 if (isFP) {
1477 if (Ty != s32 && Ty != s64) {
1478 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
1479 << " constant, expected: " << s32 << " or " << s64
1480 << '\n');
1481 return false;
1484 if (RB.getID() != AArch64::FPRRegBankID) {
1485 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
1486 << " constant on bank: " << RB
1487 << ", expected: FPR\n");
1488 return false;
1491 // The case when we have 0.0 is covered by tablegen. Reject it here so we
1492 // can be sure tablegen works correctly and isn't rescued by this code.
1493 if (I.getOperand(1).getFPImm()->getValueAPF().isExactlyValue(0.0))
1494 return false;
1495 } else {
1496 // s32 and s64 are covered by tablegen.
1497 if (Ty != p0 && Ty != s8 && Ty != s16) {
1498 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
1499 << " constant, expected: " << s32 << ", " << s64
1500 << ", or " << p0 << '\n');
1501 return false;
1504 if (RB.getID() != AArch64::GPRRegBankID) {
1505 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
1506 << " constant on bank: " << RB
1507 << ", expected: GPR\n");
1508 return false;
1512 // We allow G_CONSTANT of types < 32b.
1513 const unsigned MovOpc =
1514 DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
1516 if (isFP) {
1517 // Either emit a FMOV, or emit a copy to emit a normal mov.
1518 const TargetRegisterClass &GPRRC =
1519 DefSize == 32 ? AArch64::GPR32RegClass : AArch64::GPR64RegClass;
1520 const TargetRegisterClass &FPRRC =
1521 DefSize == 32 ? AArch64::FPR32RegClass : AArch64::FPR64RegClass;
1523 // Can we use a FMOV instruction to represent the immediate?
1524 if (emitFMovForFConstant(I, MRI))
1525 return true;
1527 // Nope. Emit a copy and use a normal mov instead.
1528 const Register DefGPRReg = MRI.createVirtualRegister(&GPRRC);
1529 MachineOperand &RegOp = I.getOperand(0);
1530 RegOp.setReg(DefGPRReg);
1531 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
1532 MIB.buildCopy({DefReg}, {DefGPRReg});
1534 if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {
1535 LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n");
1536 return false;
1539 MachineOperand &ImmOp = I.getOperand(1);
1540 // FIXME: Is going through int64_t always correct?
1541 ImmOp.ChangeToImmediate(
1542 ImmOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
1543 } else if (I.getOperand(1).isCImm()) {
1544 uint64_t Val = I.getOperand(1).getCImm()->getZExtValue();
1545 I.getOperand(1).ChangeToImmediate(Val);
1546 } else if (I.getOperand(1).isImm()) {
1547 uint64_t Val = I.getOperand(1).getImm();
1548 I.getOperand(1).ChangeToImmediate(Val);
1551 I.setDesc(TII.get(MovOpc));
1552 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1553 return true;
1555 case TargetOpcode::G_EXTRACT: {
1556 Register DstReg = I.getOperand(0).getReg();
1557 Register SrcReg = I.getOperand(1).getReg();
1558 LLT SrcTy = MRI.getType(SrcReg);
1559 LLT DstTy = MRI.getType(DstReg);
1560 (void)DstTy;
1561 unsigned SrcSize = SrcTy.getSizeInBits();
1563 if (SrcTy.getSizeInBits() > 64) {
1564 // This should be an extract of an s128, which is like a vector extract.
1565 if (SrcTy.getSizeInBits() != 128)
1566 return false;
1567 // Only support extracting 64 bits from an s128 at the moment.
1568 if (DstTy.getSizeInBits() != 64)
1569 return false;
1571 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
1572 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
1573 // Check we have the right regbank always.
1574 assert(SrcRB.getID() == AArch64::FPRRegBankID &&
1575 DstRB.getID() == AArch64::FPRRegBankID &&
1576 "Wrong extract regbank!");
1577 (void)SrcRB;
1579 // Emit the same code as a vector extract.
1580 // Offset must be a multiple of 64.
1581 unsigned Offset = I.getOperand(2).getImm();
1582 if (Offset % 64 != 0)
1583 return false;
1584 unsigned LaneIdx = Offset / 64;
1585 MachineIRBuilder MIB(I);
1586 MachineInstr *Extract = emitExtractVectorElt(
1587 DstReg, DstRB, LLT::scalar(64), SrcReg, LaneIdx, MIB);
1588 if (!Extract)
1589 return false;
1590 I.eraseFromParent();
1591 return true;
1594 I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
1595 MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() +
1596 Ty.getSizeInBits() - 1);
1598 if (SrcSize < 64) {
1599 assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 &&
1600 "unexpected G_EXTRACT types");
1601 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1604 DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
1605 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
1606 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
1607 .addReg(DstReg, 0, AArch64::sub_32);
1608 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
1609 AArch64::GPR32RegClass, MRI);
1610 I.getOperand(0).setReg(DstReg);
1612 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1615 case TargetOpcode::G_INSERT: {
1616 LLT SrcTy = MRI.getType(I.getOperand(2).getReg());
1617 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1618 unsigned DstSize = DstTy.getSizeInBits();
1619 // Larger inserts are vectors, same-size ones should be something else by
1620 // now (split up or turned into COPYs).
1621 if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32)
1622 return false;
1624 I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
1625 unsigned LSB = I.getOperand(3).getImm();
1626 unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
1627 I.getOperand(3).setImm((DstSize - LSB) % DstSize);
1628 MachineInstrBuilder(MF, I).addImm(Width - 1);
1630 if (DstSize < 64) {
1631 assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 &&
1632 "unexpected G_INSERT types");
1633 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1636 Register SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
1637 BuildMI(MBB, I.getIterator(), I.getDebugLoc(),
1638 TII.get(AArch64::SUBREG_TO_REG))
1639 .addDef(SrcReg)
1640 .addImm(0)
1641 .addUse(I.getOperand(2).getReg())
1642 .addImm(AArch64::sub_32);
1643 RBI.constrainGenericRegister(I.getOperand(2).getReg(),
1644 AArch64::GPR32RegClass, MRI);
1645 I.getOperand(2).setReg(SrcReg);
1647 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1649 case TargetOpcode::G_FRAME_INDEX: {
1650 // allocas and G_FRAME_INDEX are only supported in addrspace(0).
1651 if (Ty != LLT::pointer(0, 64)) {
1652 LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Ty
1653 << ", expected: " << LLT::pointer(0, 64) << '\n');
1654 return false;
1656 I.setDesc(TII.get(AArch64::ADDXri));
1658 // MOs for a #0 shifted immediate.
1659 I.addOperand(MachineOperand::CreateImm(0));
1660 I.addOperand(MachineOperand::CreateImm(0));
1662 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1665 case TargetOpcode::G_GLOBAL_VALUE: {
1666 auto GV = I.getOperand(1).getGlobal();
1667 if (GV->isThreadLocal()) {
1668 // FIXME: we don't support TLS yet.
1669 return false;
1671 unsigned char OpFlags = STI.ClassifyGlobalReference(GV, TM);
1672 if (OpFlags & AArch64II::MO_GOT) {
1673 I.setDesc(TII.get(AArch64::LOADgot));
1674 I.getOperand(1).setTargetFlags(OpFlags);
1675 } else if (TM.getCodeModel() == CodeModel::Large) {
1676 // Materialize the global using movz/movk instructions.
1677 materializeLargeCMVal(I, GV, OpFlags);
1678 I.eraseFromParent();
1679 return true;
1680 } else if (TM.getCodeModel() == CodeModel::Tiny) {
1681 I.setDesc(TII.get(AArch64::ADR));
1682 I.getOperand(1).setTargetFlags(OpFlags);
1683 } else {
1684 I.setDesc(TII.get(AArch64::MOVaddr));
1685 I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE);
1686 MachineInstrBuilder MIB(MF, I);
1687 MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(),
1688 OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
1690 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1693 case TargetOpcode::G_ZEXTLOAD:
1694 case TargetOpcode::G_LOAD:
1695 case TargetOpcode::G_STORE: {
1696 bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
1697 MachineIRBuilder MIB(I);
1699 LLT PtrTy = MRI.getType(I.getOperand(1).getReg());
1701 if (PtrTy != LLT::pointer(0, 64)) {
1702 LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTy
1703 << ", expected: " << LLT::pointer(0, 64) << '\n');
1704 return false;
1707 auto &MemOp = **I.memoperands_begin();
1708 if (MemOp.getOrdering() != AtomicOrdering::NotAtomic) {
1709 LLVM_DEBUG(dbgs() << "Atomic load/store not supported yet\n");
1710 return false;
1712 unsigned MemSizeInBits = MemOp.getSize() * 8;
1714 const Register PtrReg = I.getOperand(1).getReg();
1715 #ifndef NDEBUG
1716 const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
1717 // Sanity-check the pointer register.
1718 assert(PtrRB.getID() == AArch64::GPRRegBankID &&
1719 "Load/Store pointer operand isn't a GPR");
1720 assert(MRI.getType(PtrReg).isPointer() &&
1721 "Load/Store pointer operand isn't a pointer");
1722 #endif
1724 const Register ValReg = I.getOperand(0).getReg();
1725 const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
1727 const unsigned NewOpc =
1728 selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
1729 if (NewOpc == I.getOpcode())
1730 return false;
1732 I.setDesc(TII.get(NewOpc));
1734 uint64_t Offset = 0;
1735 auto *PtrMI = MRI.getVRegDef(PtrReg);
1737 // Try to fold a GEP into our unsigned immediate addressing mode.
1738 if (PtrMI->getOpcode() == TargetOpcode::G_GEP) {
1739 if (auto COff = getConstantVRegVal(PtrMI->getOperand(2).getReg(), MRI)) {
1740 int64_t Imm = *COff;
1741 const unsigned Size = MemSizeInBits / 8;
1742 const unsigned Scale = Log2_32(Size);
1743 if ((Imm & (Size - 1)) == 0 && Imm >= 0 && Imm < (0x1000 << Scale)) {
1744 unsigned Ptr2Reg = PtrMI->getOperand(1).getReg();
1745 I.getOperand(1).setReg(Ptr2Reg);
1746 PtrMI = MRI.getVRegDef(Ptr2Reg);
1747 Offset = Imm / Size;
1752 // If we haven't folded anything into our addressing mode yet, try to fold
1753 // a frame index into the base+offset.
1754 if (!Offset && PtrMI->getOpcode() == TargetOpcode::G_FRAME_INDEX)
1755 I.getOperand(1).ChangeToFrameIndex(PtrMI->getOperand(1).getIndex());
1757 I.addOperand(MachineOperand::CreateImm(Offset));
1759 // If we're storing a 0, use WZR/XZR.
1760 if (auto CVal = getConstantVRegVal(ValReg, MRI)) {
1761 if (*CVal == 0 && Opcode == TargetOpcode::G_STORE) {
1762 if (I.getOpcode() == AArch64::STRWui)
1763 I.getOperand(0).setReg(AArch64::WZR);
1764 else if (I.getOpcode() == AArch64::STRXui)
1765 I.getOperand(0).setReg(AArch64::XZR);
1769 if (IsZExtLoad) {
1770 // The zextload from a smaller type to i32 should be handled by the importer.
1771 if (MRI.getType(ValReg).getSizeInBits() != 64)
1772 return false;
1773 // If we have a ZEXTLOAD then change the load's type to be a narrower reg
1774 //and zero_extend with SUBREG_TO_REG.
1775 Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
1776 Register DstReg = I.getOperand(0).getReg();
1777 I.getOperand(0).setReg(LdReg);
1779 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
1780 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
1781 .addImm(0)
1782 .addUse(LdReg)
1783 .addImm(AArch64::sub_32);
1784 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1785 return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
1786 MRI);
1788 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1791 case TargetOpcode::G_SMULH:
1792 case TargetOpcode::G_UMULH: {
1793 // Reject the various things we don't support yet.
1794 if (unsupportedBinOp(I, RBI, MRI, TRI))
1795 return false;
1797 const Register DefReg = I.getOperand(0).getReg();
1798 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
1800 if (RB.getID() != AArch64::GPRRegBankID) {
1801 LLVM_DEBUG(dbgs() << "G_[SU]MULH on bank: " << RB << ", expected: GPR\n");
1802 return false;
1805 if (Ty != LLT::scalar(64)) {
1806 LLVM_DEBUG(dbgs() << "G_[SU]MULH has type: " << Ty
1807 << ", expected: " << LLT::scalar(64) << '\n');
1808 return false;
1811 unsigned NewOpc = I.getOpcode() == TargetOpcode::G_SMULH ? AArch64::SMULHrr
1812 : AArch64::UMULHrr;
1813 I.setDesc(TII.get(NewOpc));
1815 // Now that we selected an opcode, we need to constrain the register
1816 // operands to use appropriate classes.
1817 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1819 case TargetOpcode::G_FADD:
1820 case TargetOpcode::G_FSUB:
1821 case TargetOpcode::G_FMUL:
1822 case TargetOpcode::G_FDIV:
1824 case TargetOpcode::G_ASHR:
1825 if (MRI.getType(I.getOperand(0).getReg()).isVector())
1826 return selectVectorASHR(I, MRI);
1827 LLVM_FALLTHROUGH;
1828 case TargetOpcode::G_SHL:
1829 if (Opcode == TargetOpcode::G_SHL &&
1830 MRI.getType(I.getOperand(0).getReg()).isVector())
1831 return selectVectorSHL(I, MRI);
1832 LLVM_FALLTHROUGH;
1833 case TargetOpcode::G_OR:
1834 case TargetOpcode::G_LSHR: {
1835 // Reject the various things we don't support yet.
1836 if (unsupportedBinOp(I, RBI, MRI, TRI))
1837 return false;
1839 const unsigned OpSize = Ty.getSizeInBits();
1841 const Register DefReg = I.getOperand(0).getReg();
1842 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
1844 const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize);
1845 if (NewOpc == I.getOpcode())
1846 return false;
1848 I.setDesc(TII.get(NewOpc));
1849 // FIXME: Should the type be always reset in setDesc?
1851 // Now that we selected an opcode, we need to constrain the register
1852 // operands to use appropriate classes.
1853 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1856 case TargetOpcode::G_GEP: {
1857 MachineIRBuilder MIRBuilder(I);
1858 emitADD(I.getOperand(0).getReg(), I.getOperand(1), I.getOperand(2),
1859 MIRBuilder);
1860 I.eraseFromParent();
1861 return true;
1863 case TargetOpcode::G_UADDO: {
1864 // TODO: Support other types.
1865 unsigned OpSize = Ty.getSizeInBits();
1866 if (OpSize != 32 && OpSize != 64) {
1867 LLVM_DEBUG(
1868 dbgs()
1869 << "G_UADDO currently only supported for 32 and 64 b types.\n");
1870 return false;
1873 // TODO: Support vectors.
1874 if (Ty.isVector()) {
1875 LLVM_DEBUG(dbgs() << "G_UADDO currently only supported for scalars.\n");
1876 return false;
1879 // Add and set the set condition flag.
1880 unsigned AddsOpc = OpSize == 32 ? AArch64::ADDSWrr : AArch64::ADDSXrr;
1881 MachineIRBuilder MIRBuilder(I);
1882 auto AddsMI = MIRBuilder.buildInstr(
1883 AddsOpc, {I.getOperand(0).getReg()},
1884 {I.getOperand(2).getReg(), I.getOperand(3).getReg()});
1885 constrainSelectedInstRegOperands(*AddsMI, TII, TRI, RBI);
1887 // Now, put the overflow result in the register given by the first operand
1888 // to the G_UADDO. CSINC increments the result when the predicate is false,
1889 // so to get the increment when it's true, we need to use the inverse. In
1890 // this case, we want to increment when carry is set.
1891 auto CsetMI = MIRBuilder
1892 .buildInstr(AArch64::CSINCWr, {I.getOperand(1).getReg()},
1893 {Register(AArch64::WZR), Register(AArch64::WZR)})
1894 .addImm(getInvertedCondCode(AArch64CC::HS));
1895 constrainSelectedInstRegOperands(*CsetMI, TII, TRI, RBI);
1896 I.eraseFromParent();
1897 return true;
1900 case TargetOpcode::G_PTR_MASK: {
1901 uint64_t Align = I.getOperand(2).getImm();
1902 if (Align >= 64 || Align == 0)
1903 return false;
1905 uint64_t Mask = ~((1ULL << Align) - 1);
1906 I.setDesc(TII.get(AArch64::ANDXri));
1907 I.getOperand(2).setImm(AArch64_AM::encodeLogicalImmediate(Mask, 64));
1909 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1911 case TargetOpcode::G_PTRTOINT:
1912 case TargetOpcode::G_TRUNC: {
1913 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1914 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
1916 const Register DstReg = I.getOperand(0).getReg();
1917 const Register SrcReg = I.getOperand(1).getReg();
1919 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
1920 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
1922 if (DstRB.getID() != SrcRB.getID()) {
1923 LLVM_DEBUG(
1924 dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n");
1925 return false;
1928 if (DstRB.getID() == AArch64::GPRRegBankID) {
1929 const TargetRegisterClass *DstRC =
1930 getRegClassForTypeOnBank(DstTy, DstRB, RBI);
1931 if (!DstRC)
1932 return false;
1934 const TargetRegisterClass *SrcRC =
1935 getRegClassForTypeOnBank(SrcTy, SrcRB, RBI);
1936 if (!SrcRC)
1937 return false;
1939 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
1940 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
1941 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n");
1942 return false;
1945 if (DstRC == SrcRC) {
1946 // Nothing to be done
1947 } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) &&
1948 SrcTy == LLT::scalar(64)) {
1949 llvm_unreachable("TableGen can import this case");
1950 return false;
1951 } else if (DstRC == &AArch64::GPR32RegClass &&
1952 SrcRC == &AArch64::GPR64RegClass) {
1953 I.getOperand(1).setSubReg(AArch64::sub_32);
1954 } else {
1955 LLVM_DEBUG(
1956 dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n");
1957 return false;
1960 I.setDesc(TII.get(TargetOpcode::COPY));
1961 return true;
1962 } else if (DstRB.getID() == AArch64::FPRRegBankID) {
1963 if (DstTy == LLT::vector(4, 16) && SrcTy == LLT::vector(4, 32)) {
1964 I.setDesc(TII.get(AArch64::XTNv4i16));
1965 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1966 return true;
1969 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) {
1970 MachineIRBuilder MIB(I);
1971 MachineInstr *Extract = emitExtractVectorElt(
1972 DstReg, DstRB, LLT::scalar(DstTy.getSizeInBits()), SrcReg, 0, MIB);
1973 if (!Extract)
1974 return false;
1975 I.eraseFromParent();
1976 return true;
1980 return false;
1983 case TargetOpcode::G_ANYEXT: {
1984 const Register DstReg = I.getOperand(0).getReg();
1985 const Register SrcReg = I.getOperand(1).getReg();
1987 const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI);
1988 if (RBDst.getID() != AArch64::GPRRegBankID) {
1989 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDst
1990 << ", expected: GPR\n");
1991 return false;
1994 const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI);
1995 if (RBSrc.getID() != AArch64::GPRRegBankID) {
1996 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrc
1997 << ", expected: GPR\n");
1998 return false;
2001 const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
2003 if (DstSize == 0) {
2004 LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n");
2005 return false;
2008 if (DstSize != 64 && DstSize > 32) {
2009 LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSize
2010 << ", expected: 32 or 64\n");
2011 return false;
2013 // At this point G_ANYEXT is just like a plain COPY, but we need
2014 // to explicitly form the 64-bit value if any.
2015 if (DstSize > 32) {
2016 Register ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
2017 BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
2018 .addDef(ExtSrc)
2019 .addImm(0)
2020 .addUse(SrcReg)
2021 .addImm(AArch64::sub_32);
2022 I.getOperand(1).setReg(ExtSrc);
2024 return selectCopy(I, TII, MRI, TRI, RBI);
2027 case TargetOpcode::G_ZEXT:
2028 case TargetOpcode::G_SEXT: {
2029 unsigned Opcode = I.getOpcode();
2030 const bool IsSigned = Opcode == TargetOpcode::G_SEXT;
2031 const Register DefReg = I.getOperand(0).getReg();
2032 const Register SrcReg = I.getOperand(1).getReg();
2033 const LLT DstTy = MRI.getType(DefReg);
2034 const LLT SrcTy = MRI.getType(SrcReg);
2035 unsigned DstSize = DstTy.getSizeInBits();
2036 unsigned SrcSize = SrcTy.getSizeInBits();
2038 assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() ==
2039 AArch64::GPRRegBankID &&
2040 "Unexpected ext regbank");
2042 MachineIRBuilder MIB(I);
2043 MachineInstr *ExtI;
2044 if (DstTy.isVector())
2045 return false; // Should be handled by imported patterns.
2047 if (DstSize == 64) {
2048 // FIXME: Can we avoid manually doing this?
2049 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass, MRI)) {
2050 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)
2051 << " operand\n");
2052 return false;
2055 auto SubregToReg =
2056 MIB.buildInstr(AArch64::SUBREG_TO_REG, {&AArch64::GPR64RegClass}, {})
2057 .addImm(0)
2058 .addUse(SrcReg)
2059 .addImm(AArch64::sub_32);
2061 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri,
2062 {DefReg}, {SubregToReg})
2063 .addImm(0)
2064 .addImm(SrcSize - 1);
2065 } else if (DstSize <= 32) {
2066 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri,
2067 {DefReg}, {SrcReg})
2068 .addImm(0)
2069 .addImm(SrcSize - 1);
2070 } else {
2071 return false;
2074 constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
2075 I.eraseFromParent();
2076 return true;
2079 case TargetOpcode::G_SITOFP:
2080 case TargetOpcode::G_UITOFP:
2081 case TargetOpcode::G_FPTOSI:
2082 case TargetOpcode::G_FPTOUI: {
2083 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
2084 SrcTy = MRI.getType(I.getOperand(1).getReg());
2085 const unsigned NewOpc = selectFPConvOpc(Opcode, DstTy, SrcTy);
2086 if (NewOpc == Opcode)
2087 return false;
2089 I.setDesc(TII.get(NewOpc));
2090 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2092 return true;
2096 case TargetOpcode::G_INTTOPTR:
2097 // The importer is currently unable to import pointer types since they
2098 // didn't exist in SelectionDAG.
2099 return selectCopy(I, TII, MRI, TRI, RBI);
2101 case TargetOpcode::G_BITCAST:
2102 // Imported SelectionDAG rules can handle every bitcast except those that
2103 // bitcast from a type to the same type. Ideally, these shouldn't occur
2104 // but we might not run an optimizer that deletes them. The other exception
2105 // is bitcasts involving pointer types, as SelectionDAG has no knowledge
2106 // of them.
2107 return selectCopy(I, TII, MRI, TRI, RBI);
2109 case TargetOpcode::G_SELECT: {
2110 if (MRI.getType(I.getOperand(1).getReg()) != LLT::scalar(1)) {
2111 LLVM_DEBUG(dbgs() << "G_SELECT cond has type: " << Ty
2112 << ", expected: " << LLT::scalar(1) << '\n');
2113 return false;
2116 const Register CondReg = I.getOperand(1).getReg();
2117 const Register TReg = I.getOperand(2).getReg();
2118 const Register FReg = I.getOperand(3).getReg();
2120 if (tryOptSelect(I))
2121 return true;
2123 Register CSelOpc = selectSelectOpc(I, MRI, RBI);
2124 MachineInstr &TstMI =
2125 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ANDSWri))
2126 .addDef(AArch64::WZR)
2127 .addUse(CondReg)
2128 .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
2130 MachineInstr &CSelMI = *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CSelOpc))
2131 .addDef(I.getOperand(0).getReg())
2132 .addUse(TReg)
2133 .addUse(FReg)
2134 .addImm(AArch64CC::NE);
2136 constrainSelectedInstRegOperands(TstMI, TII, TRI, RBI);
2137 constrainSelectedInstRegOperands(CSelMI, TII, TRI, RBI);
2139 I.eraseFromParent();
2140 return true;
2142 case TargetOpcode::G_ICMP: {
2143 if (Ty.isVector())
2144 return selectVectorICmp(I, MRI);
2146 if (Ty != LLT::scalar(32)) {
2147 LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Ty
2148 << ", expected: " << LLT::scalar(32) << '\n');
2149 return false;
2152 MachineIRBuilder MIRBuilder(I);
2153 if (!emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1),
2154 MIRBuilder))
2155 return false;
2156 emitCSetForICMP(I.getOperand(0).getReg(), I.getOperand(1).getPredicate(),
2157 MIRBuilder);
2158 I.eraseFromParent();
2159 return true;
2162 case TargetOpcode::G_FCMP: {
2163 if (Ty != LLT::scalar(32)) {
2164 LLVM_DEBUG(dbgs() << "G_FCMP result has type: " << Ty
2165 << ", expected: " << LLT::scalar(32) << '\n');
2166 return false;
2169 unsigned CmpOpc = selectFCMPOpc(I, MRI);
2170 if (!CmpOpc)
2171 return false;
2173 // FIXME: regbank
2175 AArch64CC::CondCode CC1, CC2;
2176 changeFCMPPredToAArch64CC(
2177 (CmpInst::Predicate)I.getOperand(1).getPredicate(), CC1, CC2);
2179 // Partially build the compare. Decide if we need to add a use for the
2180 // third operand based off whether or not we're comparing against 0.0.
2181 auto CmpMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(CmpOpc))
2182 .addUse(I.getOperand(2).getReg());
2184 // If we don't have an immediate compare, then we need to add a use of the
2185 // register which wasn't used for the immediate.
2186 // Note that the immediate will always be the last operand.
2187 if (CmpOpc != AArch64::FCMPSri && CmpOpc != AArch64::FCMPDri)
2188 CmpMI = CmpMI.addUse(I.getOperand(3).getReg());
2190 const Register DefReg = I.getOperand(0).getReg();
2191 Register Def1Reg = DefReg;
2192 if (CC2 != AArch64CC::AL)
2193 Def1Reg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2195 MachineInstr &CSetMI =
2196 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr))
2197 .addDef(Def1Reg)
2198 .addUse(AArch64::WZR)
2199 .addUse(AArch64::WZR)
2200 .addImm(getInvertedCondCode(CC1));
2202 if (CC2 != AArch64CC::AL) {
2203 Register Def2Reg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2204 MachineInstr &CSet2MI =
2205 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr))
2206 .addDef(Def2Reg)
2207 .addUse(AArch64::WZR)
2208 .addUse(AArch64::WZR)
2209 .addImm(getInvertedCondCode(CC2));
2210 MachineInstr &OrMI =
2211 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ORRWrr))
2212 .addDef(DefReg)
2213 .addUse(Def1Reg)
2214 .addUse(Def2Reg);
2215 constrainSelectedInstRegOperands(OrMI, TII, TRI, RBI);
2216 constrainSelectedInstRegOperands(CSet2MI, TII, TRI, RBI);
2218 constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
2219 constrainSelectedInstRegOperands(CSetMI, TII, TRI, RBI);
2221 I.eraseFromParent();
2222 return true;
2224 case TargetOpcode::G_VASTART:
2225 return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
2226 : selectVaStartAAPCS(I, MF, MRI);
2227 case TargetOpcode::G_INTRINSIC:
2228 return selectIntrinsic(I, MRI);
2229 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
2230 return selectIntrinsicWithSideEffects(I, MRI);
2231 case TargetOpcode::G_IMPLICIT_DEF: {
2232 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
2233 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2234 const Register DstReg = I.getOperand(0).getReg();
2235 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2236 const TargetRegisterClass *DstRC =
2237 getRegClassForTypeOnBank(DstTy, DstRB, RBI);
2238 RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
2239 return true;
2241 case TargetOpcode::G_BLOCK_ADDR: {
2242 if (TM.getCodeModel() == CodeModel::Large) {
2243 materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0);
2244 I.eraseFromParent();
2245 return true;
2246 } else {
2247 I.setDesc(TII.get(AArch64::MOVaddrBA));
2248 auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA),
2249 I.getOperand(0).getReg())
2250 .addBlockAddress(I.getOperand(1).getBlockAddress(),
2251 /* Offset */ 0, AArch64II::MO_PAGE)
2252 .addBlockAddress(
2253 I.getOperand(1).getBlockAddress(), /* Offset */ 0,
2254 AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
2255 I.eraseFromParent();
2256 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
2259 case TargetOpcode::G_INTRINSIC_TRUNC:
2260 return selectIntrinsicTrunc(I, MRI);
2261 case TargetOpcode::G_INTRINSIC_ROUND:
2262 return selectIntrinsicRound(I, MRI);
2263 case TargetOpcode::G_BUILD_VECTOR:
2264 return selectBuildVector(I, MRI);
2265 case TargetOpcode::G_MERGE_VALUES:
2266 return selectMergeValues(I, MRI);
2267 case TargetOpcode::G_UNMERGE_VALUES:
2268 return selectUnmergeValues(I, MRI);
2269 case TargetOpcode::G_SHUFFLE_VECTOR:
2270 return selectShuffleVector(I, MRI);
2271 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
2272 return selectExtractElt(I, MRI);
2273 case TargetOpcode::G_INSERT_VECTOR_ELT:
2274 return selectInsertElt(I, MRI);
2275 case TargetOpcode::G_CONCAT_VECTORS:
2276 return selectConcatVectors(I, MRI);
2277 case TargetOpcode::G_JUMP_TABLE:
2278 return selectJumpTable(I, MRI);
2281 return false;
2284 bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
2285 MachineRegisterInfo &MRI) const {
2286 assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT");
2287 Register JTAddr = I.getOperand(0).getReg();
2288 unsigned JTI = I.getOperand(1).getIndex();
2289 Register Index = I.getOperand(2).getReg();
2290 MachineIRBuilder MIB(I);
2292 Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2293 Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
2294 MIB.buildInstr(AArch64::JumpTableDest32, {TargetReg, ScratchReg},
2295 {JTAddr, Index})
2296 .addJumpTableIndex(JTI);
2298 // Build the indirect branch.
2299 MIB.buildInstr(AArch64::BR, {}, {TargetReg});
2300 I.eraseFromParent();
2301 return true;
2304 bool AArch64InstructionSelector::selectJumpTable(
2305 MachineInstr &I, MachineRegisterInfo &MRI) const {
2306 assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table");
2307 assert(I.getOperand(1).isJTI() && "Jump table op should have a JTI!");
2309 Register DstReg = I.getOperand(0).getReg();
2310 unsigned JTI = I.getOperand(1).getIndex();
2311 // We generate a MOVaddrJT which will get expanded to an ADRP + ADD later.
2312 MachineIRBuilder MIB(I);
2313 auto MovMI =
2314 MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {})
2315 .addJumpTableIndex(JTI, AArch64II::MO_PAGE)
2316 .addJumpTableIndex(JTI, AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
2317 I.eraseFromParent();
2318 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
2321 bool AArch64InstructionSelector::selectIntrinsicTrunc(
2322 MachineInstr &I, MachineRegisterInfo &MRI) const {
2323 const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
2325 // Select the correct opcode.
2326 unsigned Opc = 0;
2327 if (!SrcTy.isVector()) {
2328 switch (SrcTy.getSizeInBits()) {
2329 default:
2330 case 16:
2331 Opc = AArch64::FRINTZHr;
2332 break;
2333 case 32:
2334 Opc = AArch64::FRINTZSr;
2335 break;
2336 case 64:
2337 Opc = AArch64::FRINTZDr;
2338 break;
2340 } else {
2341 unsigned NumElts = SrcTy.getNumElements();
2342 switch (SrcTy.getElementType().getSizeInBits()) {
2343 default:
2344 break;
2345 case 16:
2346 if (NumElts == 4)
2347 Opc = AArch64::FRINTZv4f16;
2348 else if (NumElts == 8)
2349 Opc = AArch64::FRINTZv8f16;
2350 break;
2351 case 32:
2352 if (NumElts == 2)
2353 Opc = AArch64::FRINTZv2f32;
2354 else if (NumElts == 4)
2355 Opc = AArch64::FRINTZv4f32;
2356 break;
2357 case 64:
2358 if (NumElts == 2)
2359 Opc = AArch64::FRINTZv2f64;
2360 break;
2364 if (!Opc) {
2365 // Didn't get an opcode above, bail.
2366 LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_TRUNC!\n");
2367 return false;
2370 // Legalization would have set us up perfectly for this; we just need to
2371 // set the opcode and move on.
2372 I.setDesc(TII.get(Opc));
2373 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2376 bool AArch64InstructionSelector::selectIntrinsicRound(
2377 MachineInstr &I, MachineRegisterInfo &MRI) const {
2378 const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
2380 // Select the correct opcode.
2381 unsigned Opc = 0;
2382 if (!SrcTy.isVector()) {
2383 switch (SrcTy.getSizeInBits()) {
2384 default:
2385 case 16:
2386 Opc = AArch64::FRINTAHr;
2387 break;
2388 case 32:
2389 Opc = AArch64::FRINTASr;
2390 break;
2391 case 64:
2392 Opc = AArch64::FRINTADr;
2393 break;
2395 } else {
2396 unsigned NumElts = SrcTy.getNumElements();
2397 switch (SrcTy.getElementType().getSizeInBits()) {
2398 default:
2399 break;
2400 case 16:
2401 if (NumElts == 4)
2402 Opc = AArch64::FRINTAv4f16;
2403 else if (NumElts == 8)
2404 Opc = AArch64::FRINTAv8f16;
2405 break;
2406 case 32:
2407 if (NumElts == 2)
2408 Opc = AArch64::FRINTAv2f32;
2409 else if (NumElts == 4)
2410 Opc = AArch64::FRINTAv4f32;
2411 break;
2412 case 64:
2413 if (NumElts == 2)
2414 Opc = AArch64::FRINTAv2f64;
2415 break;
2419 if (!Opc) {
2420 // Didn't get an opcode above, bail.
2421 LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_ROUND!\n");
2422 return false;
2425 // Legalization would have set us up perfectly for this; we just need to
2426 // set the opcode and move on.
2427 I.setDesc(TII.get(Opc));
2428 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2431 bool AArch64InstructionSelector::selectVectorICmp(
2432 MachineInstr &I, MachineRegisterInfo &MRI) const {
2433 Register DstReg = I.getOperand(0).getReg();
2434 LLT DstTy = MRI.getType(DstReg);
2435 Register SrcReg = I.getOperand(2).getReg();
2436 Register Src2Reg = I.getOperand(3).getReg();
2437 LLT SrcTy = MRI.getType(SrcReg);
2439 unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
2440 unsigned NumElts = DstTy.getNumElements();
2442 // First index is element size, 0 == 8b, 1 == 16b, 2 == 32b, 3 == 64b
2443 // Second index is num elts, 0 == v2, 1 == v4, 2 == v8, 3 == v16
2444 // Third index is cc opcode:
2445 // 0 == eq
2446 // 1 == ugt
2447 // 2 == uge
2448 // 3 == ult
2449 // 4 == ule
2450 // 5 == sgt
2451 // 6 == sge
2452 // 7 == slt
2453 // 8 == sle
2454 // ne is done by negating 'eq' result.
2456 // This table below assumes that for some comparisons the operands will be
2457 // commuted.
2458 // ult op == commute + ugt op
2459 // ule op == commute + uge op
2460 // slt op == commute + sgt op
2461 // sle op == commute + sge op
2462 unsigned PredIdx = 0;
2463 bool SwapOperands = false;
2464 CmpInst::Predicate Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate();
2465 switch (Pred) {
2466 case CmpInst::ICMP_NE:
2467 case CmpInst::ICMP_EQ:
2468 PredIdx = 0;
2469 break;
2470 case CmpInst::ICMP_UGT:
2471 PredIdx = 1;
2472 break;
2473 case CmpInst::ICMP_UGE:
2474 PredIdx = 2;
2475 break;
2476 case CmpInst::ICMP_ULT:
2477 PredIdx = 3;
2478 SwapOperands = true;
2479 break;
2480 case CmpInst::ICMP_ULE:
2481 PredIdx = 4;
2482 SwapOperands = true;
2483 break;
2484 case CmpInst::ICMP_SGT:
2485 PredIdx = 5;
2486 break;
2487 case CmpInst::ICMP_SGE:
2488 PredIdx = 6;
2489 break;
2490 case CmpInst::ICMP_SLT:
2491 PredIdx = 7;
2492 SwapOperands = true;
2493 break;
2494 case CmpInst::ICMP_SLE:
2495 PredIdx = 8;
2496 SwapOperands = true;
2497 break;
2498 default:
2499 llvm_unreachable("Unhandled icmp predicate");
2500 return false;
2503 // This table obviously should be tablegen'd when we have our GISel native
2504 // tablegen selector.
2506 static const unsigned OpcTable[4][4][9] = {
2508 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2509 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2510 0 /* invalid */},
2511 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2512 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2513 0 /* invalid */},
2514 {AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8,
2515 AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8,
2516 AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8},
2517 {AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8,
2518 AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8,
2519 AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8}
2522 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2523 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2524 0 /* invalid */},
2525 {AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16,
2526 AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16,
2527 AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16},
2528 {AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16,
2529 AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16,
2530 AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16},
2531 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2532 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2533 0 /* invalid */}
2536 {AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32,
2537 AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32,
2538 AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32},
2539 {AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32,
2540 AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32,
2541 AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32},
2542 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2543 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2544 0 /* invalid */},
2545 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2546 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2547 0 /* invalid */}
2550 {AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64,
2551 AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64,
2552 AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64},
2553 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2554 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2555 0 /* invalid */},
2556 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2557 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2558 0 /* invalid */},
2559 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2560 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2561 0 /* invalid */}
2564 unsigned EltIdx = Log2_32(SrcEltSize / 8);
2565 unsigned NumEltsIdx = Log2_32(NumElts / 2);
2566 unsigned Opc = OpcTable[EltIdx][NumEltsIdx][PredIdx];
2567 if (!Opc) {
2568 LLVM_DEBUG(dbgs() << "Could not map G_ICMP to cmp opcode");
2569 return false;
2572 const RegisterBank &VecRB = *RBI.getRegBank(SrcReg, MRI, TRI);
2573 const TargetRegisterClass *SrcRC =
2574 getRegClassForTypeOnBank(SrcTy, VecRB, RBI, true);
2575 if (!SrcRC) {
2576 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
2577 return false;
2580 unsigned NotOpc = Pred == ICmpInst::ICMP_NE ? AArch64::NOTv8i8 : 0;
2581 if (SrcTy.getSizeInBits() == 128)
2582 NotOpc = NotOpc ? AArch64::NOTv16i8 : 0;
2584 if (SwapOperands)
2585 std::swap(SrcReg, Src2Reg);
2587 MachineIRBuilder MIB(I);
2588 auto Cmp = MIB.buildInstr(Opc, {SrcRC}, {SrcReg, Src2Reg});
2589 constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
2591 // Invert if we had a 'ne' cc.
2592 if (NotOpc) {
2593 Cmp = MIB.buildInstr(NotOpc, {DstReg}, {Cmp});
2594 constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
2595 } else {
2596 MIB.buildCopy(DstReg, Cmp.getReg(0));
2598 RBI.constrainGenericRegister(DstReg, *SrcRC, MRI);
2599 I.eraseFromParent();
2600 return true;
2603 MachineInstr *AArch64InstructionSelector::emitScalarToVector(
2604 unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar,
2605 MachineIRBuilder &MIRBuilder) const {
2606 auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
2608 auto BuildFn = [&](unsigned SubregIndex) {
2609 auto Ins =
2610 MIRBuilder
2611 .buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar})
2612 .addImm(SubregIndex);
2613 constrainSelectedInstRegOperands(*Undef, TII, TRI, RBI);
2614 constrainSelectedInstRegOperands(*Ins, TII, TRI, RBI);
2615 return &*Ins;
2618 switch (EltSize) {
2619 case 16:
2620 return BuildFn(AArch64::hsub);
2621 case 32:
2622 return BuildFn(AArch64::ssub);
2623 case 64:
2624 return BuildFn(AArch64::dsub);
2625 default:
2626 return nullptr;
2630 bool AArch64InstructionSelector::selectMergeValues(
2631 MachineInstr &I, MachineRegisterInfo &MRI) const {
2632 assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode");
2633 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2634 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
2635 assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation");
2636 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
2638 if (I.getNumOperands() != 3)
2639 return false;
2641 // Merging 2 s64s into an s128.
2642 if (DstTy == LLT::scalar(128)) {
2643 if (SrcTy.getSizeInBits() != 64)
2644 return false;
2645 MachineIRBuilder MIB(I);
2646 Register DstReg = I.getOperand(0).getReg();
2647 Register Src1Reg = I.getOperand(1).getReg();
2648 Register Src2Reg = I.getOperand(2).getReg();
2649 auto Tmp = MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {});
2650 MachineInstr *InsMI =
2651 emitLaneInsert(None, Tmp.getReg(0), Src1Reg, /* LaneIdx */ 0, RB, MIB);
2652 if (!InsMI)
2653 return false;
2654 MachineInstr *Ins2MI = emitLaneInsert(DstReg, InsMI->getOperand(0).getReg(),
2655 Src2Reg, /* LaneIdx */ 1, RB, MIB);
2656 if (!Ins2MI)
2657 return false;
2658 constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
2659 constrainSelectedInstRegOperands(*Ins2MI, TII, TRI, RBI);
2660 I.eraseFromParent();
2661 return true;
2664 if (RB.getID() != AArch64::GPRRegBankID)
2665 return false;
2667 if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32)
2668 return false;
2670 auto *DstRC = &AArch64::GPR64RegClass;
2671 Register SubToRegDef = MRI.createVirtualRegister(DstRC);
2672 MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
2673 TII.get(TargetOpcode::SUBREG_TO_REG))
2674 .addDef(SubToRegDef)
2675 .addImm(0)
2676 .addUse(I.getOperand(1).getReg())
2677 .addImm(AArch64::sub_32);
2678 Register SubToRegDef2 = MRI.createVirtualRegister(DstRC);
2679 // Need to anyext the second scalar before we can use bfm
2680 MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
2681 TII.get(TargetOpcode::SUBREG_TO_REG))
2682 .addDef(SubToRegDef2)
2683 .addImm(0)
2684 .addUse(I.getOperand(2).getReg())
2685 .addImm(AArch64::sub_32);
2686 MachineInstr &BFM =
2687 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri))
2688 .addDef(I.getOperand(0).getReg())
2689 .addUse(SubToRegDef)
2690 .addUse(SubToRegDef2)
2691 .addImm(32)
2692 .addImm(31);
2693 constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI);
2694 constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI);
2695 constrainSelectedInstRegOperands(BFM, TII, TRI, RBI);
2696 I.eraseFromParent();
2697 return true;
2700 static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
2701 const unsigned EltSize) {
2702 // Choose a lane copy opcode and subregister based off of the size of the
2703 // vector's elements.
2704 switch (EltSize) {
2705 case 16:
2706 CopyOpc = AArch64::CPYi16;
2707 ExtractSubReg = AArch64::hsub;
2708 break;
2709 case 32:
2710 CopyOpc = AArch64::CPYi32;
2711 ExtractSubReg = AArch64::ssub;
2712 break;
2713 case 64:
2714 CopyOpc = AArch64::CPYi64;
2715 ExtractSubReg = AArch64::dsub;
2716 break;
2717 default:
2718 // Unknown size, bail out.
2719 LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n");
2720 return false;
2722 return true;
2725 MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
2726 Optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy,
2727 Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const {
2728 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
2729 unsigned CopyOpc = 0;
2730 unsigned ExtractSubReg = 0;
2731 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) {
2732 LLVM_DEBUG(
2733 dbgs() << "Couldn't determine lane copy opcode for instruction.\n");
2734 return nullptr;
2737 const TargetRegisterClass *DstRC =
2738 getRegClassForTypeOnBank(ScalarTy, DstRB, RBI, true);
2739 if (!DstRC) {
2740 LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n");
2741 return nullptr;
2744 const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI);
2745 const LLT &VecTy = MRI.getType(VecReg);
2746 const TargetRegisterClass *VecRC =
2747 getRegClassForTypeOnBank(VecTy, VecRB, RBI, true);
2748 if (!VecRC) {
2749 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
2750 return nullptr;
2753 // The register that we're going to copy into.
2754 Register InsertReg = VecReg;
2755 if (!DstReg)
2756 DstReg = MRI.createVirtualRegister(DstRC);
2757 // If the lane index is 0, we just use a subregister COPY.
2758 if (LaneIdx == 0) {
2759 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {})
2760 .addReg(VecReg, 0, ExtractSubReg);
2761 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
2762 return &*Copy;
2765 // Lane copies require 128-bit wide registers. If we're dealing with an
2766 // unpacked vector, then we need to move up to that width. Insert an implicit
2767 // def and a subregister insert to get us there.
2768 if (VecTy.getSizeInBits() != 128) {
2769 MachineInstr *ScalarToVector = emitScalarToVector(
2770 VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
2771 if (!ScalarToVector)
2772 return nullptr;
2773 InsertReg = ScalarToVector->getOperand(0).getReg();
2776 MachineInstr *LaneCopyMI =
2777 MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
2778 constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI);
2780 // Make sure that we actually constrain the initial copy.
2781 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
2782 return LaneCopyMI;
2785 bool AArch64InstructionSelector::selectExtractElt(
2786 MachineInstr &I, MachineRegisterInfo &MRI) const {
2787 assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
2788 "unexpected opcode!");
2789 Register DstReg = I.getOperand(0).getReg();
2790 const LLT NarrowTy = MRI.getType(DstReg);
2791 const Register SrcReg = I.getOperand(1).getReg();
2792 const LLT WideTy = MRI.getType(SrcReg);
2793 (void)WideTy;
2794 assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&
2795 "source register size too small!");
2796 assert(NarrowTy.isScalar() && "cannot extract vector into vector!");
2798 // Need the lane index to determine the correct copy opcode.
2799 MachineOperand &LaneIdxOp = I.getOperand(2);
2800 assert(LaneIdxOp.isReg() && "Lane index operand was not a register?");
2802 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
2803 LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n");
2804 return false;
2807 // Find the index to extract from.
2808 auto VRegAndVal = getConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI);
2809 if (!VRegAndVal)
2810 return false;
2811 unsigned LaneIdx = VRegAndVal->Value;
2813 MachineIRBuilder MIRBuilder(I);
2815 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2816 MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
2817 LaneIdx, MIRBuilder);
2818 if (!Extract)
2819 return false;
2821 I.eraseFromParent();
2822 return true;
2825 bool AArch64InstructionSelector::selectSplitVectorUnmerge(
2826 MachineInstr &I, MachineRegisterInfo &MRI) const {
2827 unsigned NumElts = I.getNumOperands() - 1;
2828 Register SrcReg = I.getOperand(NumElts).getReg();
2829 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
2830 const LLT SrcTy = MRI.getType(SrcReg);
2832 assert(NarrowTy.isVector() && "Expected an unmerge into vectors");
2833 if (SrcTy.getSizeInBits() > 128) {
2834 LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge");
2835 return false;
2838 MachineIRBuilder MIB(I);
2840 // We implement a split vector operation by treating the sub-vectors as
2841 // scalars and extracting them.
2842 const RegisterBank &DstRB =
2843 *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI);
2844 for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
2845 Register Dst = I.getOperand(OpIdx).getReg();
2846 MachineInstr *Extract =
2847 emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
2848 if (!Extract)
2849 return false;
2851 I.eraseFromParent();
2852 return true;
2855 bool AArch64InstructionSelector::selectUnmergeValues(
2856 MachineInstr &I, MachineRegisterInfo &MRI) const {
2857 assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2858 "unexpected opcode");
2860 // TODO: Handle unmerging into GPRs and from scalars to scalars.
2861 if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
2862 AArch64::FPRRegBankID ||
2863 RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
2864 AArch64::FPRRegBankID) {
2865 LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
2866 "currently unsupported.\n");
2867 return false;
2870 // The last operand is the vector source register, and every other operand is
2871 // a register to unpack into.
2872 unsigned NumElts = I.getNumOperands() - 1;
2873 Register SrcReg = I.getOperand(NumElts).getReg();
2874 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
2875 const LLT WideTy = MRI.getType(SrcReg);
2876 (void)WideTy;
2877 assert(WideTy.isVector() && "can only unmerge from vector types!");
2878 assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&
2879 "source register size too small!");
2881 if (!NarrowTy.isScalar())
2882 return selectSplitVectorUnmerge(I, MRI);
2884 MachineIRBuilder MIB(I);
2886 // Choose a lane copy opcode and subregister based off of the size of the
2887 // vector's elements.
2888 unsigned CopyOpc = 0;
2889 unsigned ExtractSubReg = 0;
2890 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits()))
2891 return false;
2893 // Set up for the lane copies.
2894 MachineBasicBlock &MBB = *I.getParent();
2896 // Stores the registers we'll be copying from.
2897 SmallVector<Register, 4> InsertRegs;
2899 // We'll use the first register twice, so we only need NumElts-1 registers.
2900 unsigned NumInsertRegs = NumElts - 1;
2902 // If our elements fit into exactly 128 bits, then we can copy from the source
2903 // directly. Otherwise, we need to do a bit of setup with some subregister
2904 // inserts.
2905 if (NarrowTy.getSizeInBits() * NumElts == 128) {
2906 InsertRegs = SmallVector<Register, 4>(NumInsertRegs, SrcReg);
2907 } else {
2908 // No. We have to perform subregister inserts. For each insert, create an
2909 // implicit def and a subregister insert, and save the register we create.
2910 for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) {
2911 Register ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
2912 MachineInstr &ImpDefMI =
2913 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF),
2914 ImpDefReg);
2916 // Now, create the subregister insert from SrcReg.
2917 Register InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
2918 MachineInstr &InsMI =
2919 *BuildMI(MBB, I, I.getDebugLoc(),
2920 TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
2921 .addUse(ImpDefReg)
2922 .addUse(SrcReg)
2923 .addImm(AArch64::dsub);
2925 constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);
2926 constrainSelectedInstRegOperands(InsMI, TII, TRI, RBI);
2928 // Save the register so that we can copy from it after.
2929 InsertRegs.push_back(InsertReg);
2933 // Now that we've created any necessary subregister inserts, we can
2934 // create the copies.
2936 // Perform the first copy separately as a subregister copy.
2937 Register CopyTo = I.getOperand(0).getReg();
2938 auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {})
2939 .addReg(InsertRegs[0], 0, ExtractSubReg);
2940 constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI);
2942 // Now, perform the remaining copies as vector lane copies.
2943 unsigned LaneIdx = 1;
2944 for (Register InsReg : InsertRegs) {
2945 Register CopyTo = I.getOperand(LaneIdx).getReg();
2946 MachineInstr &CopyInst =
2947 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo)
2948 .addUse(InsReg)
2949 .addImm(LaneIdx);
2950 constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI);
2951 ++LaneIdx;
2954 // Separately constrain the first copy's destination. Because of the
2955 // limitation in constrainOperandRegClass, we can't guarantee that this will
2956 // actually be constrained. So, do it ourselves using the second operand.
2957 const TargetRegisterClass *RC =
2958 MRI.getRegClassOrNull(I.getOperand(1).getReg());
2959 if (!RC) {
2960 LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n");
2961 return false;
2964 RBI.constrainGenericRegister(CopyTo, *RC, MRI);
2965 I.eraseFromParent();
2966 return true;
2969 bool AArch64InstructionSelector::selectConcatVectors(
2970 MachineInstr &I, MachineRegisterInfo &MRI) const {
2971 assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
2972 "Unexpected opcode");
2973 Register Dst = I.getOperand(0).getReg();
2974 Register Op1 = I.getOperand(1).getReg();
2975 Register Op2 = I.getOperand(2).getReg();
2976 MachineIRBuilder MIRBuilder(I);
2977 MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIRBuilder);
2978 if (!ConcatMI)
2979 return false;
2980 I.eraseFromParent();
2981 return true;
2984 void AArch64InstructionSelector::collectShuffleMaskIndices(
2985 MachineInstr &I, MachineRegisterInfo &MRI,
2986 SmallVectorImpl<Optional<int>> &Idxs) const {
2987 MachineInstr *MaskDef = MRI.getVRegDef(I.getOperand(3).getReg());
2988 assert(
2989 MaskDef->getOpcode() == TargetOpcode::G_BUILD_VECTOR &&
2990 "G_SHUFFLE_VECTOR should have a constant mask operand as G_BUILD_VECTOR");
2991 // Find the constant indices.
2992 for (unsigned i = 1, e = MaskDef->getNumOperands(); i < e; ++i) {
2993 // Look through copies.
2994 MachineInstr *ScalarDef =
2995 getDefIgnoringCopies(MaskDef->getOperand(i).getReg(), MRI);
2996 assert(ScalarDef && "Could not find vreg def of shufflevec index op");
2997 if (ScalarDef->getOpcode() != TargetOpcode::G_CONSTANT) {
2998 // This be an undef if not a constant.
2999 assert(ScalarDef->getOpcode() == TargetOpcode::G_IMPLICIT_DEF);
3000 Idxs.push_back(None);
3001 } else {
3002 Idxs.push_back(ScalarDef->getOperand(1).getCImm()->getSExtValue());
3007 unsigned
3008 AArch64InstructionSelector::emitConstantPoolEntry(Constant *CPVal,
3009 MachineFunction &MF) const {
3010 Type *CPTy = CPVal->getType();
3011 unsigned Align = MF.getDataLayout().getPrefTypeAlignment(CPTy);
3012 if (Align == 0)
3013 Align = MF.getDataLayout().getTypeAllocSize(CPTy);
3015 MachineConstantPool *MCP = MF.getConstantPool();
3016 return MCP->getConstantPoolIndex(CPVal, Align);
3019 MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
3020 Constant *CPVal, MachineIRBuilder &MIRBuilder) const {
3021 unsigned CPIdx = emitConstantPoolEntry(CPVal, MIRBuilder.getMF());
3023 auto Adrp =
3024 MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
3025 .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
3027 MachineInstr *LoadMI = nullptr;
3028 switch (MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType())) {
3029 case 16:
3030 LoadMI =
3031 &*MIRBuilder
3032 .buildInstr(AArch64::LDRQui, {&AArch64::FPR128RegClass}, {Adrp})
3033 .addConstantPoolIndex(CPIdx, 0,
3034 AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
3035 break;
3036 case 8:
3037 LoadMI = &*MIRBuilder
3038 .buildInstr(AArch64::LDRDui, {&AArch64::FPR64RegClass}, {Adrp})
3039 .addConstantPoolIndex(
3040 CPIdx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
3041 break;
3042 default:
3043 LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "
3044 << *CPVal->getType());
3045 return nullptr;
3047 constrainSelectedInstRegOperands(*Adrp, TII, TRI, RBI);
3048 constrainSelectedInstRegOperands(*LoadMI, TII, TRI, RBI);
3049 return LoadMI;
3052 /// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given
3053 /// size and RB.
3054 static std::pair<unsigned, unsigned>
3055 getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
3056 unsigned Opc, SubregIdx;
3057 if (RB.getID() == AArch64::GPRRegBankID) {
3058 if (EltSize == 32) {
3059 Opc = AArch64::INSvi32gpr;
3060 SubregIdx = AArch64::ssub;
3061 } else if (EltSize == 64) {
3062 Opc = AArch64::INSvi64gpr;
3063 SubregIdx = AArch64::dsub;
3064 } else {
3065 llvm_unreachable("invalid elt size!");
3067 } else {
3068 if (EltSize == 8) {
3069 Opc = AArch64::INSvi8lane;
3070 SubregIdx = AArch64::bsub;
3071 } else if (EltSize == 16) {
3072 Opc = AArch64::INSvi16lane;
3073 SubregIdx = AArch64::hsub;
3074 } else if (EltSize == 32) {
3075 Opc = AArch64::INSvi32lane;
3076 SubregIdx = AArch64::ssub;
3077 } else if (EltSize == 64) {
3078 Opc = AArch64::INSvi64lane;
3079 SubregIdx = AArch64::dsub;
3080 } else {
3081 llvm_unreachable("invalid elt size!");
3084 return std::make_pair(Opc, SubregIdx);
3087 MachineInstr *
3088 AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS,
3089 MachineOperand &RHS,
3090 MachineIRBuilder &MIRBuilder) const {
3091 assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
3092 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
3093 static const unsigned OpcTable[2][2]{{AArch64::ADDXrr, AArch64::ADDXri},
3094 {AArch64::ADDWrr, AArch64::ADDWri}};
3095 bool Is32Bit = MRI.getType(LHS.getReg()).getSizeInBits() == 32;
3096 auto ImmFns = selectArithImmed(RHS);
3097 unsigned Opc = OpcTable[Is32Bit][ImmFns.hasValue()];
3098 auto AddMI = MIRBuilder.buildInstr(Opc, {DefReg}, {LHS.getReg()});
3100 // If we matched a valid constant immediate, add those operands.
3101 if (ImmFns) {
3102 for (auto &RenderFn : *ImmFns)
3103 RenderFn(AddMI);
3104 } else {
3105 AddMI.addUse(RHS.getReg());
3108 constrainSelectedInstRegOperands(*AddMI, TII, TRI, RBI);
3109 return &*AddMI;
3112 MachineInstr *
3113 AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS,
3114 MachineIRBuilder &MIRBuilder) const {
3115 assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
3116 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
3117 static const unsigned OpcTable[2][2]{{AArch64::ADDSXrr, AArch64::ADDSXri},
3118 {AArch64::ADDSWrr, AArch64::ADDSWri}};
3119 bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32);
3120 auto ImmFns = selectArithImmed(RHS);
3121 unsigned Opc = OpcTable[Is32Bit][ImmFns.hasValue()];
3122 Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
3124 auto CmpMI = MIRBuilder.buildInstr(Opc, {ZReg}, {LHS.getReg()});
3126 // If we matched a valid constant immediate, add those operands.
3127 if (ImmFns) {
3128 for (auto &RenderFn : *ImmFns)
3129 RenderFn(CmpMI);
3130 } else {
3131 CmpMI.addUse(RHS.getReg());
3134 constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
3135 return &*CmpMI;
3138 MachineInstr *
3139 AArch64InstructionSelector::emitTST(const Register &LHS, const Register &RHS,
3140 MachineIRBuilder &MIRBuilder) const {
3141 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
3142 unsigned RegSize = MRI.getType(LHS).getSizeInBits();
3143 bool Is32Bit = (RegSize == 32);
3144 static const unsigned OpcTable[2][2]{{AArch64::ANDSXrr, AArch64::ANDSXri},
3145 {AArch64::ANDSWrr, AArch64::ANDSWri}};
3146 Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
3148 // We might be able to fold in an immediate into the TST. We need to make sure
3149 // it's a logical immediate though, since ANDS requires that.
3150 auto ValAndVReg = getConstantVRegValWithLookThrough(RHS, MRI);
3151 bool IsImmForm = ValAndVReg.hasValue() &&
3152 AArch64_AM::isLogicalImmediate(ValAndVReg->Value, RegSize);
3153 unsigned Opc = OpcTable[Is32Bit][IsImmForm];
3154 auto TstMI = MIRBuilder.buildInstr(Opc, {ZReg}, {LHS});
3156 if (IsImmForm)
3157 TstMI.addImm(
3158 AArch64_AM::encodeLogicalImmediate(ValAndVReg->Value, RegSize));
3159 else
3160 TstMI.addUse(RHS);
3162 constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
3163 return &*TstMI;
3166 MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
3167 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
3168 MachineIRBuilder &MIRBuilder) const {
3169 assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
3170 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
3172 // Fold the compare if possible.
3173 MachineInstr *FoldCmp =
3174 tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder);
3175 if (FoldCmp)
3176 return FoldCmp;
3178 // Can't fold into a CMN. Just emit a normal compare.
3179 unsigned CmpOpc = 0;
3180 Register ZReg;
3182 LLT CmpTy = MRI.getType(LHS.getReg());
3183 assert((CmpTy.isScalar() || CmpTy.isPointer()) &&
3184 "Expected scalar or pointer");
3185 if (CmpTy == LLT::scalar(32)) {
3186 CmpOpc = AArch64::SUBSWrr;
3187 ZReg = AArch64::WZR;
3188 } else if (CmpTy == LLT::scalar(64) || CmpTy.isPointer()) {
3189 CmpOpc = AArch64::SUBSXrr;
3190 ZReg = AArch64::XZR;
3191 } else {
3192 return nullptr;
3195 // Try to match immediate forms.
3196 auto ImmFns = selectArithImmed(RHS);
3197 if (ImmFns)
3198 CmpOpc = CmpOpc == AArch64::SUBSWrr ? AArch64::SUBSWri : AArch64::SUBSXri;
3200 auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addDef(ZReg).addUse(LHS.getReg());
3201 // If we matched a valid constant immediate, add those operands.
3202 if (ImmFns) {
3203 for (auto &RenderFn : *ImmFns)
3204 RenderFn(CmpMI);
3205 } else {
3206 CmpMI.addUse(RHS.getReg());
3209 // Make sure that we can constrain the compare that we emitted.
3210 constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
3211 return &*CmpMI;
3214 MachineInstr *AArch64InstructionSelector::emitVectorConcat(
3215 Optional<Register> Dst, Register Op1, Register Op2,
3216 MachineIRBuilder &MIRBuilder) const {
3217 // We implement a vector concat by:
3218 // 1. Use scalar_to_vector to insert the lower vector into the larger dest
3219 // 2. Insert the upper vector into the destination's upper element
3220 // TODO: some of this code is common with G_BUILD_VECTOR handling.
3221 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
3223 const LLT Op1Ty = MRI.getType(Op1);
3224 const LLT Op2Ty = MRI.getType(Op2);
3226 if (Op1Ty != Op2Ty) {
3227 LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys");
3228 return nullptr;
3230 assert(Op1Ty.isVector() && "Expected a vector for vector concat");
3232 if (Op1Ty.getSizeInBits() >= 128) {
3233 LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors");
3234 return nullptr;
3237 // At the moment we just support 64 bit vector concats.
3238 if (Op1Ty.getSizeInBits() != 64) {
3239 LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors");
3240 return nullptr;
3243 const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits());
3244 const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI);
3245 const TargetRegisterClass *DstRC =
3246 getMinClassForRegBank(FPRBank, Op1Ty.getSizeInBits() * 2);
3248 MachineInstr *WidenedOp1 =
3249 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder);
3250 MachineInstr *WidenedOp2 =
3251 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder);
3252 if (!WidenedOp1 || !WidenedOp2) {
3253 LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value");
3254 return nullptr;
3257 // Now do the insert of the upper element.
3258 unsigned InsertOpc, InsSubRegIdx;
3259 std::tie(InsertOpc, InsSubRegIdx) =
3260 getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits());
3262 if (!Dst)
3263 Dst = MRI.createVirtualRegister(DstRC);
3264 auto InsElt =
3265 MIRBuilder
3266 .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()})
3267 .addImm(1) /* Lane index */
3268 .addUse(WidenedOp2->getOperand(0).getReg())
3269 .addImm(0);
3270 constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
3271 return &*InsElt;
3274 MachineInstr *AArch64InstructionSelector::emitFMovForFConstant(
3275 MachineInstr &I, MachineRegisterInfo &MRI) const {
3276 assert(I.getOpcode() == TargetOpcode::G_FCONSTANT &&
3277 "Expected a G_FCONSTANT!");
3278 MachineOperand &ImmOp = I.getOperand(1);
3279 unsigned DefSize = MRI.getType(I.getOperand(0).getReg()).getSizeInBits();
3281 // Only handle 32 and 64 bit defs for now.
3282 if (DefSize != 32 && DefSize != 64)
3283 return nullptr;
3285 // Don't handle null values using FMOV.
3286 if (ImmOp.getFPImm()->isNullValue())
3287 return nullptr;
3289 // Get the immediate representation for the FMOV.
3290 const APFloat &ImmValAPF = ImmOp.getFPImm()->getValueAPF();
3291 int Imm = DefSize == 32 ? AArch64_AM::getFP32Imm(ImmValAPF)
3292 : AArch64_AM::getFP64Imm(ImmValAPF);
3294 // If this is -1, it means the immediate can't be represented as the requested
3295 // floating point value. Bail.
3296 if (Imm == -1)
3297 return nullptr;
3299 // Update MI to represent the new FMOV instruction, constrain it, and return.
3300 ImmOp.ChangeToImmediate(Imm);
3301 unsigned MovOpc = DefSize == 32 ? AArch64::FMOVSi : AArch64::FMOVDi;
3302 I.setDesc(TII.get(MovOpc));
3303 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3304 return &I;
3307 MachineInstr *
3308 AArch64InstructionSelector::emitCSetForICMP(Register DefReg, unsigned Pred,
3309 MachineIRBuilder &MIRBuilder) const {
3310 // CSINC increments the result when the predicate is false. Invert it.
3311 const AArch64CC::CondCode InvCC = changeICMPPredToAArch64CC(
3312 CmpInst::getInversePredicate((CmpInst::Predicate)Pred));
3313 auto I =
3314 MIRBuilder
3315 .buildInstr(AArch64::CSINCWr, {DefReg}, {Register(AArch64::WZR), Register(AArch64::WZR)})
3316 .addImm(InvCC);
3317 constrainSelectedInstRegOperands(*I, TII, TRI, RBI);
3318 return &*I;
3321 bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) const {
3322 MachineIRBuilder MIB(I);
3323 MachineRegisterInfo &MRI = *MIB.getMRI();
3324 const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
3326 // We want to recognize this pattern:
3328 // $z = G_FCMP pred, $x, $y
3329 // ...
3330 // $w = G_SELECT $z, $a, $b
3332 // Where the value of $z is *only* ever used by the G_SELECT (possibly with
3333 // some copies/truncs in between.)
3335 // If we see this, then we can emit something like this:
3337 // fcmp $x, $y
3338 // fcsel $w, $a, $b, pred
3340 // Rather than emitting both of the rather long sequences in the standard
3341 // G_FCMP/G_SELECT select methods.
3343 // First, check if the condition is defined by a compare.
3344 MachineInstr *CondDef = MRI.getVRegDef(I.getOperand(1).getReg());
3345 while (CondDef) {
3346 // We can only fold if all of the defs have one use.
3347 if (!MRI.hasOneUse(CondDef->getOperand(0).getReg()))
3348 return false;
3350 // We can skip over G_TRUNC since the condition is 1-bit.
3351 // Truncating/extending can have no impact on the value.
3352 unsigned Opc = CondDef->getOpcode();
3353 if (Opc != TargetOpcode::COPY && Opc != TargetOpcode::G_TRUNC)
3354 break;
3356 // Can't see past copies from physregs.
3357 if (Opc == TargetOpcode::COPY &&
3358 TargetRegisterInfo::isPhysicalRegister(CondDef->getOperand(1).getReg()))
3359 return false;
3361 CondDef = MRI.getVRegDef(CondDef->getOperand(1).getReg());
3364 // Is the condition defined by a compare?
3365 if (!CondDef)
3366 return false;
3368 unsigned CondOpc = CondDef->getOpcode();
3369 if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP)
3370 return false;
3372 AArch64CC::CondCode CondCode;
3373 if (CondOpc == TargetOpcode::G_ICMP) {
3374 CondCode = changeICMPPredToAArch64CC(
3375 (CmpInst::Predicate)CondDef->getOperand(1).getPredicate());
3376 if (!emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3),
3377 CondDef->getOperand(1), MIB)) {
3378 LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n");
3379 return false;
3381 } else {
3382 // Get the condition code for the select.
3383 AArch64CC::CondCode CondCode2;
3384 changeFCMPPredToAArch64CC(
3385 (CmpInst::Predicate)CondDef->getOperand(1).getPredicate(), CondCode,
3386 CondCode2);
3388 // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two
3389 // instructions to emit the comparison.
3390 // TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be
3391 // unnecessary.
3392 if (CondCode2 != AArch64CC::AL)
3393 return false;
3395 // Make sure we'll be able to select the compare.
3396 unsigned CmpOpc = selectFCMPOpc(*CondDef, MRI);
3397 if (!CmpOpc)
3398 return false;
3400 // Emit a new compare.
3401 auto Cmp = MIB.buildInstr(CmpOpc, {}, {CondDef->getOperand(2).getReg()});
3402 if (CmpOpc != AArch64::FCMPSri && CmpOpc != AArch64::FCMPDri)
3403 Cmp.addUse(CondDef->getOperand(3).getReg());
3404 constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
3407 // Emit the select.
3408 unsigned CSelOpc = selectSelectOpc(I, MRI, RBI);
3409 auto CSel =
3410 MIB.buildInstr(CSelOpc, {I.getOperand(0).getReg()},
3411 {I.getOperand(2).getReg(), I.getOperand(3).getReg()})
3412 .addImm(CondCode);
3413 constrainSelectedInstRegOperands(*CSel, TII, TRI, RBI);
3414 I.eraseFromParent();
3415 return true;
3418 MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
3419 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
3420 MachineIRBuilder &MIRBuilder) const {
3421 assert(LHS.isReg() && RHS.isReg() && Predicate.isPredicate() &&
3422 "Unexpected MachineOperand");
3423 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3424 // We want to find this sort of thing:
3425 // x = G_SUB 0, y
3426 // G_ICMP z, x
3428 // In this case, we can fold the G_SUB into the G_ICMP using a CMN instead.
3429 // e.g:
3431 // cmn z, y
3433 // Helper lambda to detect the subtract followed by the compare.
3434 // Takes in the def of the LHS or RHS, and checks if it's a subtract from 0.
3435 auto IsCMN = [&](MachineInstr *DefMI, const AArch64CC::CondCode &CC) {
3436 if (!DefMI || DefMI->getOpcode() != TargetOpcode::G_SUB)
3437 return false;
3439 // Need to make sure NZCV is the same at the end of the transformation.
3440 if (CC != AArch64CC::EQ && CC != AArch64CC::NE)
3441 return false;
3443 // We want to match against SUBs.
3444 if (DefMI->getOpcode() != TargetOpcode::G_SUB)
3445 return false;
3447 // Make sure that we're getting
3448 // x = G_SUB 0, y
3449 auto ValAndVReg =
3450 getConstantVRegValWithLookThrough(DefMI->getOperand(1).getReg(), MRI);
3451 if (!ValAndVReg || ValAndVReg->Value != 0)
3452 return false;
3454 // This can safely be represented as a CMN.
3455 return true;
3458 // Check if the RHS or LHS of the G_ICMP is defined by a SUB
3459 MachineInstr *LHSDef = getDefIgnoringCopies(LHS.getReg(), MRI);
3460 MachineInstr *RHSDef = getDefIgnoringCopies(RHS.getReg(), MRI);
3461 CmpInst::Predicate P = (CmpInst::Predicate)Predicate.getPredicate();
3462 const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(P);
3464 // Given this:
3466 // x = G_SUB 0, y
3467 // G_ICMP x, z
3469 // Produce this:
3471 // cmn y, z
3472 if (IsCMN(LHSDef, CC))
3473 return emitCMN(LHSDef->getOperand(2), RHS, MIRBuilder);
3475 // Same idea here, but with the RHS of the compare instead:
3477 // Given this:
3479 // x = G_SUB 0, y
3480 // G_ICMP z, x
3482 // Produce this:
3484 // cmn z, y
3485 if (IsCMN(RHSDef, CC))
3486 return emitCMN(LHS, RHSDef->getOperand(2), MIRBuilder);
3488 // Given this:
3490 // z = G_AND x, y
3491 // G_ICMP z, 0
3493 // Produce this if the compare is signed:
3495 // tst x, y
3496 if (!isUnsignedICMPPred(P) && LHSDef &&
3497 LHSDef->getOpcode() == TargetOpcode::G_AND) {
3498 // Make sure that the RHS is 0.
3499 auto ValAndVReg = getConstantVRegValWithLookThrough(RHS.getReg(), MRI);
3500 if (!ValAndVReg || ValAndVReg->Value != 0)
3501 return nullptr;
3503 return emitTST(LHSDef->getOperand(1).getReg(),
3504 LHSDef->getOperand(2).getReg(), MIRBuilder);
3507 return nullptr;
3510 bool AArch64InstructionSelector::tryOptVectorDup(MachineInstr &I) const {
3511 // Try to match a vector splat operation into a dup instruction.
3512 // We're looking for this pattern:
3513 // %scalar:gpr(s64) = COPY $x0
3514 // %undef:fpr(<2 x s64>) = G_IMPLICIT_DEF
3515 // %cst0:gpr(s32) = G_CONSTANT i32 0
3516 // %zerovec:fpr(<2 x s32>) = G_BUILD_VECTOR %cst0(s32), %cst0(s32)
3517 // %ins:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT %undef, %scalar(s64), %cst0(s32)
3518 // %splat:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %ins(<2 x s64>), %undef,
3519 // %zerovec(<2 x s32>)
3521 // ...into:
3522 // %splat = DUP %scalar
3523 // We use the regbank of the scalar to determine which kind of dup to use.
3524 MachineIRBuilder MIB(I);
3525 MachineRegisterInfo &MRI = *MIB.getMRI();
3526 const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
3527 using namespace TargetOpcode;
3528 using namespace MIPatternMatch;
3530 // Begin matching the insert.
3531 auto *InsMI =
3532 getOpcodeDef(G_INSERT_VECTOR_ELT, I.getOperand(1).getReg(), MRI);
3533 if (!InsMI)
3534 return false;
3535 // Match the undef vector operand.
3536 auto *UndefMI =
3537 getOpcodeDef(G_IMPLICIT_DEF, InsMI->getOperand(1).getReg(), MRI);
3538 if (!UndefMI)
3539 return false;
3540 // Match the scalar being splatted.
3541 Register ScalarReg = InsMI->getOperand(2).getReg();
3542 const RegisterBank *ScalarRB = RBI.getRegBank(ScalarReg, MRI, TRI);
3543 // Match the index constant 0.
3544 int64_t Index = 0;
3545 if (!mi_match(InsMI->getOperand(3).getReg(), MRI, m_ICst(Index)) || Index)
3546 return false;
3548 // The shuffle's second operand doesn't matter if the mask is all zero.
3549 auto *ZeroVec = getOpcodeDef(G_BUILD_VECTOR, I.getOperand(3).getReg(), MRI);
3550 if (!ZeroVec)
3551 return false;
3552 int64_t Zero = 0;
3553 if (!mi_match(ZeroVec->getOperand(1).getReg(), MRI, m_ICst(Zero)) || Zero)
3554 return false;
3555 for (unsigned i = 1, e = ZeroVec->getNumOperands(); i < e; ++i) {
3556 if (ZeroVec->getOperand(i).getReg() != ZeroVec->getOperand(1).getReg())
3557 return false; // This wasn't an all zeros vector.
3560 // We're done, now find out what kind of splat we need.
3561 LLT VecTy = MRI.getType(I.getOperand(0).getReg());
3562 LLT EltTy = VecTy.getElementType();
3563 if (VecTy.getSizeInBits() != 128 || EltTy.getSizeInBits() < 32) {
3564 LLVM_DEBUG(dbgs() << "Could not optimize splat pattern < 128b yet");
3565 return false;
3567 bool IsFP = ScalarRB->getID() == AArch64::FPRRegBankID;
3568 static const unsigned OpcTable[2][2] = {
3569 {AArch64::DUPv4i32gpr, AArch64::DUPv2i64gpr},
3570 {AArch64::DUPv4i32lane, AArch64::DUPv2i64lane}};
3571 unsigned Opc = OpcTable[IsFP][EltTy.getSizeInBits() == 64];
3573 // For FP splats, we need to widen the scalar reg via undef too.
3574 if (IsFP) {
3575 MachineInstr *Widen = emitScalarToVector(
3576 EltTy.getSizeInBits(), &AArch64::FPR128RegClass, ScalarReg, MIB);
3577 if (!Widen)
3578 return false;
3579 ScalarReg = Widen->getOperand(0).getReg();
3581 auto Dup = MIB.buildInstr(Opc, {I.getOperand(0).getReg()}, {ScalarReg});
3582 if (IsFP)
3583 Dup.addImm(0);
3584 constrainSelectedInstRegOperands(*Dup, TII, TRI, RBI);
3585 I.eraseFromParent();
3586 return true;
3589 bool AArch64InstructionSelector::tryOptVectorShuffle(MachineInstr &I) const {
3590 if (TM.getOptLevel() == CodeGenOpt::None)
3591 return false;
3592 if (tryOptVectorDup(I))
3593 return true;
3594 return false;
3597 bool AArch64InstructionSelector::selectShuffleVector(
3598 MachineInstr &I, MachineRegisterInfo &MRI) const {
3599 if (tryOptVectorShuffle(I))
3600 return true;
3601 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3602 Register Src1Reg = I.getOperand(1).getReg();
3603 const LLT Src1Ty = MRI.getType(Src1Reg);
3604 Register Src2Reg = I.getOperand(2).getReg();
3605 const LLT Src2Ty = MRI.getType(Src2Reg);
3607 MachineBasicBlock &MBB = *I.getParent();
3608 MachineFunction &MF = *MBB.getParent();
3609 LLVMContext &Ctx = MF.getFunction().getContext();
3611 // G_SHUFFLE_VECTOR doesn't really have a strictly enforced constant mask
3612 // operand, it comes in as a normal vector value which we have to analyze to
3613 // find the mask indices. If the mask element is undef, then
3614 // collectShuffleMaskIndices() will add a None entry for that index into
3615 // the list.
3616 SmallVector<Optional<int>, 8> Mask;
3617 collectShuffleMaskIndices(I, MRI, Mask);
3618 assert(!Mask.empty() && "Expected to find mask indices");
3620 // G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if
3621 // it's originated from a <1 x T> type. Those should have been lowered into
3622 // G_BUILD_VECTOR earlier.
3623 if (!Src1Ty.isVector() || !Src2Ty.isVector()) {
3624 LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n");
3625 return false;
3628 unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8;
3630 SmallVector<Constant *, 64> CstIdxs;
3631 for (auto &MaybeVal : Mask) {
3632 // For now, any undef indexes we'll just assume to be 0. This should be
3633 // optimized in future, e.g. to select DUP etc.
3634 int Val = MaybeVal.hasValue() ? *MaybeVal : 0;
3635 for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
3636 unsigned Offset = Byte + Val * BytesPerElt;
3637 CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset));
3641 MachineIRBuilder MIRBuilder(I);
3643 // Use a constant pool to load the index vector for TBL.
3644 Constant *CPVal = ConstantVector::get(CstIdxs);
3645 MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIRBuilder);
3646 if (!IndexLoad) {
3647 LLVM_DEBUG(dbgs() << "Could not load from a constant pool");
3648 return false;
3651 if (DstTy.getSizeInBits() != 128) {
3652 assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty");
3653 // This case can be done with TBL1.
3654 MachineInstr *Concat = emitVectorConcat(None, Src1Reg, Src2Reg, MIRBuilder);
3655 if (!Concat) {
3656 LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1");
3657 return false;
3660 // The constant pool load will be 64 bits, so need to convert to FPR128 reg.
3661 IndexLoad =
3662 emitScalarToVector(64, &AArch64::FPR128RegClass,
3663 IndexLoad->getOperand(0).getReg(), MIRBuilder);
3665 auto TBL1 = MIRBuilder.buildInstr(
3666 AArch64::TBLv16i8One, {&AArch64::FPR128RegClass},
3667 {Concat->getOperand(0).getReg(), IndexLoad->getOperand(0).getReg()});
3668 constrainSelectedInstRegOperands(*TBL1, TII, TRI, RBI);
3670 auto Copy =
3671 MIRBuilder
3672 .buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
3673 .addReg(TBL1.getReg(0), 0, AArch64::dsub);
3674 RBI.constrainGenericRegister(Copy.getReg(0), AArch64::FPR64RegClass, MRI);
3675 I.eraseFromParent();
3676 return true;
3679 // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive
3680 // Q registers for regalloc.
3681 auto RegSeq = MIRBuilder
3682 .buildInstr(TargetOpcode::REG_SEQUENCE,
3683 {&AArch64::QQRegClass}, {Src1Reg})
3684 .addImm(AArch64::qsub0)
3685 .addUse(Src2Reg)
3686 .addImm(AArch64::qsub1);
3688 auto TBL2 =
3689 MIRBuilder.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0).getReg()},
3690 {RegSeq, IndexLoad->getOperand(0).getReg()});
3691 constrainSelectedInstRegOperands(*RegSeq, TII, TRI, RBI);
3692 constrainSelectedInstRegOperands(*TBL2, TII, TRI, RBI);
3693 I.eraseFromParent();
3694 return true;
3697 MachineInstr *AArch64InstructionSelector::emitLaneInsert(
3698 Optional<Register> DstReg, Register SrcReg, Register EltReg,
3699 unsigned LaneIdx, const RegisterBank &RB,
3700 MachineIRBuilder &MIRBuilder) const {
3701 MachineInstr *InsElt = nullptr;
3702 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
3703 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3705 // Create a register to define with the insert if one wasn't passed in.
3706 if (!DstReg)
3707 DstReg = MRI.createVirtualRegister(DstRC);
3709 unsigned EltSize = MRI.getType(EltReg).getSizeInBits();
3710 unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first;
3712 if (RB.getID() == AArch64::FPRRegBankID) {
3713 auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder);
3714 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
3715 .addImm(LaneIdx)
3716 .addUse(InsSub->getOperand(0).getReg())
3717 .addImm(0);
3718 } else {
3719 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
3720 .addImm(LaneIdx)
3721 .addUse(EltReg);
3724 constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
3725 return InsElt;
3728 bool AArch64InstructionSelector::selectInsertElt(
3729 MachineInstr &I, MachineRegisterInfo &MRI) const {
3730 assert(I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT);
3732 // Get information on the destination.
3733 Register DstReg = I.getOperand(0).getReg();
3734 const LLT DstTy = MRI.getType(DstReg);
3735 unsigned VecSize = DstTy.getSizeInBits();
3737 // Get information on the element we want to insert into the destination.
3738 Register EltReg = I.getOperand(2).getReg();
3739 const LLT EltTy = MRI.getType(EltReg);
3740 unsigned EltSize = EltTy.getSizeInBits();
3741 if (EltSize < 16 || EltSize > 64)
3742 return false; // Don't support all element types yet.
3744 // Find the definition of the index. Bail out if it's not defined by a
3745 // G_CONSTANT.
3746 Register IdxReg = I.getOperand(3).getReg();
3747 auto VRegAndVal = getConstantVRegValWithLookThrough(IdxReg, MRI);
3748 if (!VRegAndVal)
3749 return false;
3750 unsigned LaneIdx = VRegAndVal->Value;
3752 // Perform the lane insert.
3753 Register SrcReg = I.getOperand(1).getReg();
3754 const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
3755 MachineIRBuilder MIRBuilder(I);
3757 if (VecSize < 128) {
3758 // If the vector we're inserting into is smaller than 128 bits, widen it
3759 // to 128 to do the insert.
3760 MachineInstr *ScalarToVec = emitScalarToVector(
3761 VecSize, &AArch64::FPR128RegClass, SrcReg, MIRBuilder);
3762 if (!ScalarToVec)
3763 return false;
3764 SrcReg = ScalarToVec->getOperand(0).getReg();
3767 // Create an insert into a new FPR128 register.
3768 // Note that if our vector is already 128 bits, we end up emitting an extra
3769 // register.
3770 MachineInstr *InsMI =
3771 emitLaneInsert(None, SrcReg, EltReg, LaneIdx, EltRB, MIRBuilder);
3773 if (VecSize < 128) {
3774 // If we had to widen to perform the insert, then we have to demote back to
3775 // the original size to get the result we want.
3776 Register DemoteVec = InsMI->getOperand(0).getReg();
3777 const TargetRegisterClass *RC =
3778 getMinClassForRegBank(*RBI.getRegBank(DemoteVec, MRI, TRI), VecSize);
3779 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
3780 LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
3781 return false;
3783 unsigned SubReg = 0;
3784 if (!getSubRegForClass(RC, TRI, SubReg))
3785 return false;
3786 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
3787 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << VecSize
3788 << "\n");
3789 return false;
3791 MIRBuilder.buildInstr(TargetOpcode::COPY, {DstReg}, {})
3792 .addReg(DemoteVec, 0, SubReg);
3793 RBI.constrainGenericRegister(DstReg, *RC, MRI);
3794 } else {
3795 // No widening needed.
3796 InsMI->getOperand(0).setReg(DstReg);
3797 constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
3800 I.eraseFromParent();
3801 return true;
3804 bool AArch64InstructionSelector::selectBuildVector(
3805 MachineInstr &I, MachineRegisterInfo &MRI) const {
3806 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
3807 // Until we port more of the optimized selections, for now just use a vector
3808 // insert sequence.
3809 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3810 const LLT EltTy = MRI.getType(I.getOperand(1).getReg());
3811 unsigned EltSize = EltTy.getSizeInBits();
3812 if (EltSize < 16 || EltSize > 64)
3813 return false; // Don't support all element types yet.
3814 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
3815 MachineIRBuilder MIRBuilder(I);
3817 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
3818 MachineInstr *ScalarToVec =
3819 emitScalarToVector(DstTy.getElementType().getSizeInBits(), DstRC,
3820 I.getOperand(1).getReg(), MIRBuilder);
3821 if (!ScalarToVec)
3822 return false;
3824 Register DstVec = ScalarToVec->getOperand(0).getReg();
3825 unsigned DstSize = DstTy.getSizeInBits();
3827 // Keep track of the last MI we inserted. Later on, we might be able to save
3828 // a copy using it.
3829 MachineInstr *PrevMI = nullptr;
3830 for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) {
3831 // Note that if we don't do a subregister copy, we can end up making an
3832 // extra register.
3833 PrevMI = &*emitLaneInsert(None, DstVec, I.getOperand(i).getReg(), i - 1, RB,
3834 MIRBuilder);
3835 DstVec = PrevMI->getOperand(0).getReg();
3838 // If DstTy's size in bits is less than 128, then emit a subregister copy
3839 // from DstVec to the last register we've defined.
3840 if (DstSize < 128) {
3841 // Force this to be FPR using the destination vector.
3842 const TargetRegisterClass *RC =
3843 getMinClassForRegBank(*RBI.getRegBank(DstVec, MRI, TRI), DstSize);
3844 if (!RC)
3845 return false;
3846 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
3847 LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
3848 return false;
3851 unsigned SubReg = 0;
3852 if (!getSubRegForClass(RC, TRI, SubReg))
3853 return false;
3854 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
3855 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSize
3856 << "\n");
3857 return false;
3860 Register Reg = MRI.createVirtualRegister(RC);
3861 Register DstReg = I.getOperand(0).getReg();
3863 MIRBuilder.buildInstr(TargetOpcode::COPY, {DstReg}, {})
3864 .addReg(DstVec, 0, SubReg);
3865 MachineOperand &RegOp = I.getOperand(1);
3866 RegOp.setReg(Reg);
3867 RBI.constrainGenericRegister(DstReg, *RC, MRI);
3868 } else {
3869 // We don't need a subregister copy. Save a copy by re-using the
3870 // destination register on the final insert.
3871 assert(PrevMI && "PrevMI was null?");
3872 PrevMI->getOperand(0).setReg(I.getOperand(0).getReg());
3873 constrainSelectedInstRegOperands(*PrevMI, TII, TRI, RBI);
3876 I.eraseFromParent();
3877 return true;
3880 /// Helper function to find an intrinsic ID on an a MachineInstr. Returns the
3881 /// ID if it exists, and 0 otherwise.
3882 static unsigned findIntrinsicID(MachineInstr &I) {
3883 auto IntrinOp = find_if(I.operands(), [&](const MachineOperand &Op) {
3884 return Op.isIntrinsicID();
3886 if (IntrinOp == I.operands_end())
3887 return 0;
3888 return IntrinOp->getIntrinsicID();
3891 /// Helper function to emit the correct opcode for a llvm.aarch64.stlxr
3892 /// intrinsic.
3893 static unsigned getStlxrOpcode(unsigned NumBytesToStore) {
3894 switch (NumBytesToStore) {
3895 // TODO: 1 and 2 byte stores
3896 case 4:
3897 return AArch64::STLXRW;
3898 case 8:
3899 return AArch64::STLXRX;
3900 default:
3901 LLVM_DEBUG(dbgs() << "Unexpected number of bytes to store! ("
3902 << NumBytesToStore << ")\n");
3903 break;
3905 return 0;
3908 bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
3909 MachineInstr &I, MachineRegisterInfo &MRI) const {
3910 // Find the intrinsic ID.
3911 unsigned IntrinID = findIntrinsicID(I);
3912 if (!IntrinID)
3913 return false;
3914 MachineIRBuilder MIRBuilder(I);
3916 // Select the instruction.
3917 switch (IntrinID) {
3918 default:
3919 return false;
3920 case Intrinsic::trap:
3921 MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(1);
3922 break;
3923 case Intrinsic::debugtrap:
3924 if (!STI.isTargetWindows())
3925 return false;
3926 MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(0xF000);
3927 break;
3928 case Intrinsic::aarch64_stlxr:
3929 Register StatReg = I.getOperand(0).getReg();
3930 assert(RBI.getSizeInBits(StatReg, MRI, TRI) == 32 &&
3931 "Status register must be 32 bits!");
3932 Register SrcReg = I.getOperand(2).getReg();
3934 if (RBI.getSizeInBits(SrcReg, MRI, TRI) != 64) {
3935 LLVM_DEBUG(dbgs() << "Only support 64-bit sources right now.\n");
3936 return false;
3939 Register PtrReg = I.getOperand(3).getReg();
3940 assert(MRI.getType(PtrReg).isPointer() && "Expected pointer operand");
3942 // Expect only one memory operand.
3943 if (!I.hasOneMemOperand())
3944 return false;
3946 const MachineMemOperand *MemOp = *I.memoperands_begin();
3947 unsigned NumBytesToStore = MemOp->getSize();
3948 unsigned Opc = getStlxrOpcode(NumBytesToStore);
3949 if (!Opc)
3950 return false;
3951 unsigned NumBitsToStore = NumBytesToStore * 8;
3952 if (NumBitsToStore != 64) {
3953 // The intrinsic always has a 64-bit source, but we might actually want
3954 // a differently-sized source for the instruction. Try to get it.
3955 // TODO: For 1 and 2-byte stores, this will have a G_AND. For now, let's
3956 // just handle 4-byte stores.
3957 // TODO: If we don't find a G_ZEXT, we'll have to truncate the value down
3958 // to the right size for the STLXR.
3959 MachineInstr *Zext = getOpcodeDef(TargetOpcode::G_ZEXT, SrcReg, MRI);
3960 if (!Zext)
3961 return false;
3962 SrcReg = Zext->getOperand(1).getReg();
3963 // We should get an appropriately-sized register here.
3964 if (RBI.getSizeInBits(SrcReg, MRI, TRI) != NumBitsToStore)
3965 return false;
3967 auto StoreMI = MIRBuilder.buildInstr(Opc, {StatReg}, {SrcReg, PtrReg})
3968 .addMemOperand(*I.memoperands_begin());
3969 constrainSelectedInstRegOperands(*StoreMI, TII, TRI, RBI);
3972 I.eraseFromParent();
3973 return true;
3976 bool AArch64InstructionSelector::selectIntrinsic(
3977 MachineInstr &I, MachineRegisterInfo &MRI) const {
3978 unsigned IntrinID = findIntrinsicID(I);
3979 if (!IntrinID)
3980 return false;
3981 MachineIRBuilder MIRBuilder(I);
3983 switch (IntrinID) {
3984 default:
3985 break;
3986 case Intrinsic::aarch64_crypto_sha1h:
3987 Register DstReg = I.getOperand(0).getReg();
3988 Register SrcReg = I.getOperand(2).getReg();
3990 // FIXME: Should this be an assert?
3991 if (MRI.getType(DstReg).getSizeInBits() != 32 ||
3992 MRI.getType(SrcReg).getSizeInBits() != 32)
3993 return false;
3995 // The operation has to happen on FPRs. Set up some new FPR registers for
3996 // the source and destination if they are on GPRs.
3997 if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
3998 SrcReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
3999 MIRBuilder.buildCopy({SrcReg}, {I.getOperand(2)});
4001 // Make sure the copy ends up getting constrained properly.
4002 RBI.constrainGenericRegister(I.getOperand(2).getReg(),
4003 AArch64::GPR32RegClass, MRI);
4006 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID)
4007 DstReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
4009 // Actually insert the instruction.
4010 auto SHA1Inst = MIRBuilder.buildInstr(AArch64::SHA1Hrr, {DstReg}, {SrcReg});
4011 constrainSelectedInstRegOperands(*SHA1Inst, TII, TRI, RBI);
4013 // Did we create a new register for the destination?
4014 if (DstReg != I.getOperand(0).getReg()) {
4015 // Yep. Copy the result of the instruction back into the original
4016 // destination.
4017 MIRBuilder.buildCopy({I.getOperand(0)}, {DstReg});
4018 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
4019 AArch64::GPR32RegClass, MRI);
4022 I.eraseFromParent();
4023 return true;
4025 return false;
4028 static Optional<uint64_t> getImmedFromMO(const MachineOperand &Root) {
4029 auto &MI = *Root.getParent();
4030 auto &MBB = *MI.getParent();
4031 auto &MF = *MBB.getParent();
4032 auto &MRI = MF.getRegInfo();
4033 uint64_t Immed;
4034 if (Root.isImm())
4035 Immed = Root.getImm();
4036 else if (Root.isCImm())
4037 Immed = Root.getCImm()->getZExtValue();
4038 else if (Root.isReg()) {
4039 auto ValAndVReg =
4040 getConstantVRegValWithLookThrough(Root.getReg(), MRI, true);
4041 if (!ValAndVReg)
4042 return None;
4043 Immed = ValAndVReg->Value;
4044 } else
4045 return None;
4046 return Immed;
4049 InstructionSelector::ComplexRendererFns
4050 AArch64InstructionSelector::selectShiftA_32(const MachineOperand &Root) const {
4051 auto MaybeImmed = getImmedFromMO(Root);
4052 if (MaybeImmed == None || *MaybeImmed > 31)
4053 return None;
4054 uint64_t Enc = (32 - *MaybeImmed) & 0x1f;
4055 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
4058 InstructionSelector::ComplexRendererFns
4059 AArch64InstructionSelector::selectShiftB_32(const MachineOperand &Root) const {
4060 auto MaybeImmed = getImmedFromMO(Root);
4061 if (MaybeImmed == None || *MaybeImmed > 31)
4062 return None;
4063 uint64_t Enc = 31 - *MaybeImmed;
4064 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
4067 InstructionSelector::ComplexRendererFns
4068 AArch64InstructionSelector::selectShiftA_64(const MachineOperand &Root) const {
4069 auto MaybeImmed = getImmedFromMO(Root);
4070 if (MaybeImmed == None || *MaybeImmed > 63)
4071 return None;
4072 uint64_t Enc = (64 - *MaybeImmed) & 0x3f;
4073 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
4076 InstructionSelector::ComplexRendererFns
4077 AArch64InstructionSelector::selectShiftB_64(const MachineOperand &Root) const {
4078 auto MaybeImmed = getImmedFromMO(Root);
4079 if (MaybeImmed == None || *MaybeImmed > 63)
4080 return None;
4081 uint64_t Enc = 63 - *MaybeImmed;
4082 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
4085 /// SelectArithImmed - Select an immediate value that can be represented as
4086 /// a 12-bit value shifted left by either 0 or 12. If so, return true with
4087 /// Val set to the 12-bit value and Shift set to the shifter operand.
4088 InstructionSelector::ComplexRendererFns
4089 AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const {
4090 // This function is called from the addsub_shifted_imm ComplexPattern,
4091 // which lists [imm] as the list of opcode it's interested in, however
4092 // we still need to check whether the operand is actually an immediate
4093 // here because the ComplexPattern opcode list is only used in
4094 // root-level opcode matching.
4095 auto MaybeImmed = getImmedFromMO(Root);
4096 if (MaybeImmed == None)
4097 return None;
4098 uint64_t Immed = *MaybeImmed;
4099 unsigned ShiftAmt;
4101 if (Immed >> 12 == 0) {
4102 ShiftAmt = 0;
4103 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
4104 ShiftAmt = 12;
4105 Immed = Immed >> 12;
4106 } else
4107 return None;
4109 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
4110 return {{
4111 [=](MachineInstrBuilder &MIB) { MIB.addImm(Immed); },
4112 [=](MachineInstrBuilder &MIB) { MIB.addImm(ShVal); },
4116 /// Return true if it is worth folding MI into an extended register. That is,
4117 /// if it's safe to pull it into the addressing mode of a load or store as a
4118 /// shift.
4119 bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg(
4120 MachineInstr &MI, const MachineRegisterInfo &MRI) const {
4121 // Always fold if there is one use, or if we're optimizing for size.
4122 Register DefReg = MI.getOperand(0).getReg();
4123 if (MRI.hasOneUse(DefReg) ||
4124 MI.getParent()->getParent()->getFunction().hasMinSize())
4125 return true;
4127 // It's better to avoid folding and recomputing shifts when we don't have a
4128 // fastpath.
4129 if (!STI.hasLSLFast())
4130 return false;
4132 // We have a fastpath, so folding a shift in and potentially computing it
4133 // many times may be beneficial. Check if this is only used in memory ops.
4134 // If it is, then we should fold.
4135 return all_of(MRI.use_instructions(DefReg),
4136 [](MachineInstr &Use) { return Use.mayLoadOrStore(); });
4139 /// This is used for computing addresses like this:
4141 /// ldr x1, [x2, x3, lsl #3]
4143 /// Where x2 is the base register, and x3 is an offset register. The shift-left
4144 /// is a constant value specific to this load instruction. That is, we'll never
4145 /// see anything other than a 3 here (which corresponds to the size of the
4146 /// element being loaded.)
4147 InstructionSelector::ComplexRendererFns
4148 AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
4149 MachineOperand &Root, unsigned SizeInBytes) const {
4150 if (!Root.isReg())
4151 return None;
4152 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
4154 // Make sure that the memory op is a valid size.
4155 int64_t LegalShiftVal = Log2_32(SizeInBytes);
4156 if (LegalShiftVal == 0)
4157 return None;
4159 // We want to find something like this:
4161 // val = G_CONSTANT LegalShiftVal
4162 // shift = G_SHL off_reg val
4163 // ptr = G_GEP base_reg shift
4164 // x = G_LOAD ptr
4166 // And fold it into this addressing mode:
4168 // ldr x, [base_reg, off_reg, lsl #LegalShiftVal]
4170 // Check if we can find the G_GEP.
4171 MachineInstr *Gep = getOpcodeDef(TargetOpcode::G_GEP, Root.getReg(), MRI);
4172 if (!Gep || !isWorthFoldingIntoExtendedReg(*Gep, MRI))
4173 return None;
4175 // Now, try to match an opcode which will match our specific offset.
4176 // We want a G_SHL or a G_MUL.
4177 MachineInstr *OffsetInst = getDefIgnoringCopies(Gep->getOperand(2).getReg(), MRI);
4178 if (!OffsetInst)
4179 return None;
4181 unsigned OffsetOpc = OffsetInst->getOpcode();
4182 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL)
4183 return None;
4185 if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI))
4186 return None;
4188 // Now, try to find the specific G_CONSTANT. Start by assuming that the
4189 // register we will offset is the LHS, and the register containing the
4190 // constant is the RHS.
4191 Register OffsetReg = OffsetInst->getOperand(1).getReg();
4192 Register ConstantReg = OffsetInst->getOperand(2).getReg();
4193 auto ValAndVReg = getConstantVRegValWithLookThrough(ConstantReg, MRI);
4194 if (!ValAndVReg) {
4195 // We didn't get a constant on the RHS. If the opcode is a shift, then
4196 // we're done.
4197 if (OffsetOpc == TargetOpcode::G_SHL)
4198 return None;
4200 // If we have a G_MUL, we can use either register. Try looking at the RHS.
4201 std::swap(OffsetReg, ConstantReg);
4202 ValAndVReg = getConstantVRegValWithLookThrough(ConstantReg, MRI);
4203 if (!ValAndVReg)
4204 return None;
4207 // The value must fit into 3 bits, and must be positive. Make sure that is
4208 // true.
4209 int64_t ImmVal = ValAndVReg->Value;
4211 // Since we're going to pull this into a shift, the constant value must be
4212 // a power of 2. If we got a multiply, then we need to check this.
4213 if (OffsetOpc == TargetOpcode::G_MUL) {
4214 if (!isPowerOf2_32(ImmVal))
4215 return None;
4217 // Got a power of 2. So, the amount we'll shift is the log base-2 of that.
4218 ImmVal = Log2_32(ImmVal);
4221 if ((ImmVal & 0x7) != ImmVal)
4222 return None;
4224 // We are only allowed to shift by LegalShiftVal. This shift value is built
4225 // into the instruction, so we can't just use whatever we want.
4226 if (ImmVal != LegalShiftVal)
4227 return None;
4229 // We can use the LHS of the GEP as the base, and the LHS of the shift as an
4230 // offset. Signify that we are shifting by setting the shift flag to 1.
4231 return {{
4232 [=](MachineInstrBuilder &MIB) { MIB.add(Gep->getOperand(1)); },
4233 [=](MachineInstrBuilder &MIB) { MIB.addUse(OffsetReg); },
4234 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
4235 [=](MachineInstrBuilder &MIB) { MIB.addImm(1); },
4239 /// This is used for computing addresses like this:
4241 /// ldr x1, [x2, x3]
4243 /// Where x2 is the base register, and x3 is an offset register.
4245 /// When possible (or profitable) to fold a G_GEP into the address calculation,
4246 /// this will do so. Otherwise, it will return None.
4247 InstructionSelector::ComplexRendererFns
4248 AArch64InstructionSelector::selectAddrModeRegisterOffset(
4249 MachineOperand &Root) const {
4250 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
4252 // We need a GEP.
4253 MachineInstr *Gep = MRI.getVRegDef(Root.getReg());
4254 if (!Gep || Gep->getOpcode() != TargetOpcode::G_GEP)
4255 return None;
4257 // If this is used more than once, let's not bother folding.
4258 // TODO: Check if they are memory ops. If they are, then we can still fold
4259 // without having to recompute anything.
4260 if (!MRI.hasOneUse(Gep->getOperand(0).getReg()))
4261 return None;
4263 // Base is the GEP's LHS, offset is its RHS.
4264 return {{
4265 [=](MachineInstrBuilder &MIB) { MIB.add(Gep->getOperand(1)); },
4266 [=](MachineInstrBuilder &MIB) { MIB.add(Gep->getOperand(2)); },
4267 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
4268 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
4272 /// This is intended to be equivalent to selectAddrModeXRO in
4273 /// AArch64ISelDAGtoDAG. It's used for selecting X register offset loads.
4274 InstructionSelector::ComplexRendererFns
4275 AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root,
4276 unsigned SizeInBytes) const {
4277 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
4279 // If we have a constant offset, then we probably don't want to match a
4280 // register offset.
4281 if (isBaseWithConstantOffset(Root, MRI))
4282 return None;
4284 // Try to fold shifts into the addressing mode.
4285 auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes);
4286 if (AddrModeFns)
4287 return AddrModeFns;
4289 // If that doesn't work, see if it's possible to fold in registers from
4290 // a GEP.
4291 return selectAddrModeRegisterOffset(Root);
4294 /// Select a "register plus unscaled signed 9-bit immediate" address. This
4295 /// should only match when there is an offset that is not valid for a scaled
4296 /// immediate addressing mode. The "Size" argument is the size in bytes of the
4297 /// memory reference, which is needed here to know what is valid for a scaled
4298 /// immediate.
4299 InstructionSelector::ComplexRendererFns
4300 AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root,
4301 unsigned Size) const {
4302 MachineRegisterInfo &MRI =
4303 Root.getParent()->getParent()->getParent()->getRegInfo();
4305 if (!Root.isReg())
4306 return None;
4308 if (!isBaseWithConstantOffset(Root, MRI))
4309 return None;
4311 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
4312 if (!RootDef)
4313 return None;
4315 MachineOperand &OffImm = RootDef->getOperand(2);
4316 if (!OffImm.isReg())
4317 return None;
4318 MachineInstr *RHS = MRI.getVRegDef(OffImm.getReg());
4319 if (!RHS || RHS->getOpcode() != TargetOpcode::G_CONSTANT)
4320 return None;
4321 int64_t RHSC;
4322 MachineOperand &RHSOp1 = RHS->getOperand(1);
4323 if (!RHSOp1.isCImm() || RHSOp1.getCImm()->getBitWidth() > 64)
4324 return None;
4325 RHSC = RHSOp1.getCImm()->getSExtValue();
4327 // If the offset is valid as a scaled immediate, don't match here.
4328 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Log2_32(Size)))
4329 return None;
4330 if (RHSC >= -256 && RHSC < 256) {
4331 MachineOperand &Base = RootDef->getOperand(1);
4332 return {{
4333 [=](MachineInstrBuilder &MIB) { MIB.add(Base); },
4334 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC); },
4337 return None;
4340 /// Select a "register plus scaled unsigned 12-bit immediate" address. The
4341 /// "Size" argument is the size in bytes of the memory reference, which
4342 /// determines the scale.
4343 InstructionSelector::ComplexRendererFns
4344 AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root,
4345 unsigned Size) const {
4346 MachineRegisterInfo &MRI =
4347 Root.getParent()->getParent()->getParent()->getRegInfo();
4349 if (!Root.isReg())
4350 return None;
4352 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
4353 if (!RootDef)
4354 return None;
4356 if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
4357 return {{
4358 [=](MachineInstrBuilder &MIB) { MIB.add(RootDef->getOperand(1)); },
4359 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
4363 if (isBaseWithConstantOffset(Root, MRI)) {
4364 MachineOperand &LHS = RootDef->getOperand(1);
4365 MachineOperand &RHS = RootDef->getOperand(2);
4366 MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg());
4367 MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg());
4368 if (LHSDef && RHSDef) {
4369 int64_t RHSC = (int64_t)RHSDef->getOperand(1).getCImm()->getZExtValue();
4370 unsigned Scale = Log2_32(Size);
4371 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
4372 if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
4373 return {{
4374 [=](MachineInstrBuilder &MIB) { MIB.add(LHSDef->getOperand(1)); },
4375 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
4378 return {{
4379 [=](MachineInstrBuilder &MIB) { MIB.add(LHS); },
4380 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
4386 // Before falling back to our general case, check if the unscaled
4387 // instructions can handle this. If so, that's preferable.
4388 if (selectAddrModeUnscaled(Root, Size).hasValue())
4389 return None;
4391 return {{
4392 [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
4393 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
4397 void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB,
4398 const MachineInstr &MI) const {
4399 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
4400 assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && "Expected G_CONSTANT");
4401 Optional<int64_t> CstVal = getConstantVRegVal(MI.getOperand(0).getReg(), MRI);
4402 assert(CstVal && "Expected constant value");
4403 MIB.addImm(CstVal.getValue());
4406 namespace llvm {
4407 InstructionSelector *
4408 createAArch64InstructionSelector(const AArch64TargetMachine &TM,
4409 AArch64Subtarget &Subtarget,
4410 AArch64RegisterBankInfo &RBI) {
4411 return new AArch64InstructionSelector(TM, Subtarget, RBI);