lib/Target/AArch64/AArch64InstructionSelector.cpp

   1 //===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 /// \file
   9 /// This file implements the targeting of the InstructionSelector class for
  10 /// AArch64.
  11 /// \todo This should be generated by TableGen.
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "AArch64InstrInfo.h"
  15 #include "AArch64MachineFunctionInfo.h"
  16 #include "AArch64RegisterBankInfo.h"
  17 #include "AArch64RegisterInfo.h"
  18 #include "AArch64Subtarget.h"
  19 #include "AArch64TargetMachine.h"
  20 #include "MCTargetDesc/AArch64AddressingModes.h"
  21 #include "llvm/ADT/Optional.h"
  22 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
  23 #include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
  24 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
  25 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
  26 #include "llvm/CodeGen/GlobalISel/Utils.h"
  27 #include "llvm/CodeGen/MachineBasicBlock.h"
  28 #include "llvm/CodeGen/MachineConstantPool.h"
  29 #include "llvm/CodeGen/MachineFunction.h"
  30 #include "llvm/CodeGen/MachineInstr.h"
  31 #include "llvm/CodeGen/MachineInstrBuilder.h"
  32 #include "llvm/CodeGen/MachineOperand.h"
  33 #include "llvm/CodeGen/MachineRegisterInfo.h"
  34 #include "llvm/IR/Type.h"
  35 #include "llvm/Support/Debug.h"
  36 #include "llvm/Support/raw_ostream.h"
  37
  38 #define DEBUG_TYPE "aarch64-isel"
  39
  40 using namespace llvm;
  41
  42 namespace {
  43
  44 #define GET_GLOBALISEL_PREDICATE_BITSET
  45 #include "AArch64GenGlobalISel.inc"
  46 #undef GET_GLOBALISEL_PREDICATE_BITSET
  47
  48 class AArch64InstructionSelector : public InstructionSelector {
  49 public:
  50   AArch64InstructionSelector(const AArch64TargetMachine &TM,
  51                              const AArch64Subtarget &STI,
  52                              const AArch64RegisterBankInfo &RBI);
  53
  54   bool select(MachineInstr &I, CodeGenCoverage &CoverageInfo) const override;
  55   static const char *getName() { return DEBUG_TYPE; }
  56
  57 private:
  58   /// tblgen-erated 'select' implementation, used as the initial selector for
  59   /// the patterns that don't require complex C++.
  60   bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
  61
  62   // A lowering phase that runs before any selection attempts.
  63
  64   void preISelLower(MachineInstr &I) const;
  65
  66   // An early selection function that runs before the selectImpl() call.
  67   bool earlySelect(MachineInstr &I) const;
  68
  69   bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
  70   bool earlySelectLoad(MachineInstr &I, MachineRegisterInfo &MRI) const;
  71
  72   /// Eliminate same-sized cross-bank copies into stores before selectImpl().
  73   void contractCrossBankCopyIntoStore(MachineInstr &I,
  74                                       MachineRegisterInfo &MRI) const;
  75
  76   bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
  77                           MachineRegisterInfo &MRI) const;
  78   bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
  79                            MachineRegisterInfo &MRI) const;
  80
  81   bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
  82                            MachineRegisterInfo &MRI) const;
  83
  84   bool selectVectorASHR(MachineInstr &I, MachineRegisterInfo &MRI) const;
  85   bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
  86
  87   // Helper to generate an equivalent of scalar_to_vector into a new register,
  88   // returned via 'Dst'.
  89   MachineInstr *emitScalarToVector(unsigned EltSize,
  90                                    const TargetRegisterClass *DstRC,
  91                                    Register Scalar,
  92                                    MachineIRBuilder &MIRBuilder) const;
  93
  94   /// Emit a lane insert into \p DstReg, or a new vector register if None is
  95   /// provided.
  96   ///
  97   /// The lane inserted into is defined by \p LaneIdx. The vector source
  98   /// register is given by \p SrcReg. The register containing the element is
  99   /// given by \p EltReg.
 100   MachineInstr *emitLaneInsert(Optional<Register> DstReg, Register SrcReg,
 101                                Register EltReg, unsigned LaneIdx,
 102                                const RegisterBank &RB,
 103                                MachineIRBuilder &MIRBuilder) const;
 104   bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
 105   bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
 106   bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const;
 107   bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const;
 108
 109   void collectShuffleMaskIndices(MachineInstr &I, MachineRegisterInfo &MRI,
 110                                  SmallVectorImpl<Optional<int>> &Idxs) const;
 111   bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
 112   bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
 113   bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI) const;
 114   bool selectSplitVectorUnmerge(MachineInstr &I,
 115                                 MachineRegisterInfo &MRI) const;
 116   bool selectIntrinsicWithSideEffects(MachineInstr &I,
 117                                       MachineRegisterInfo &MRI) const;
 118   bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI) const;
 119   bool selectVectorICmp(MachineInstr &I, MachineRegisterInfo &MRI) const;
 120   bool selectIntrinsicTrunc(MachineInstr &I, MachineRegisterInfo &MRI) const;
 121   bool selectIntrinsicRound(MachineInstr &I, MachineRegisterInfo &MRI) const;
 122   bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI) const;
 123   bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI) const;
 124
 125   unsigned emitConstantPoolEntry(Constant *CPVal, MachineFunction &MF) const;
 126   MachineInstr *emitLoadFromConstantPool(Constant *CPVal,
 127                                          MachineIRBuilder &MIRBuilder) const;
 128
 129   // Emit a vector concat operation.
 130   MachineInstr *emitVectorConcat(Optional<Register> Dst, Register Op1,
 131                                  Register Op2,
 132                                  MachineIRBuilder &MIRBuilder) const;
 133   MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
 134                                    MachineOperand &Predicate,
 135                                    MachineIRBuilder &MIRBuilder) const;
 136   MachineInstr *emitADD(Register DefReg, MachineOperand &LHS, MachineOperand &RHS,
 137                         MachineIRBuilder &MIRBuilder) const;
 138   MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS,
 139                         MachineIRBuilder &MIRBuilder) const;
 140   MachineInstr *emitTST(const Register &LHS, const Register &RHS,
 141                         MachineIRBuilder &MIRBuilder) const;
 142   MachineInstr *emitExtractVectorElt(Optional<Register> DstReg,
 143                                      const RegisterBank &DstRB, LLT ScalarTy,
 144                                      Register VecReg, unsigned LaneIdx,
 145                                      MachineIRBuilder &MIRBuilder) const;
 146
 147   /// Helper function for selecting G_FCONSTANT. If the G_FCONSTANT can be
 148   /// materialized using a FMOV instruction, then update MI and return it.
 149   /// Otherwise, do nothing and return a nullptr.
 150   MachineInstr *emitFMovForFConstant(MachineInstr &MI,
 151                                      MachineRegisterInfo &MRI) const;
 152
 153   /// Emit a CSet for a compare.
 154   MachineInstr *emitCSetForICMP(Register DefReg, unsigned Pred,
 155                                 MachineIRBuilder &MIRBuilder) const;
 156
 157   // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td.
 158   // We use these manually instead of using the importer since it doesn't
 159   // support SDNodeXForm.
 160   ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const;
 161   ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const;
 162   ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const;
 163   ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const;
 164
 165   ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
 166
 167   ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
 168                                             unsigned Size) const;
 169
 170   ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const {
 171     return selectAddrModeUnscaled(Root, 1);
 172   }
 173   ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const {
 174     return selectAddrModeUnscaled(Root, 2);
 175   }
 176   ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const {
 177     return selectAddrModeUnscaled(Root, 4);
 178   }
 179   ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const {
 180     return selectAddrModeUnscaled(Root, 8);
 181   }
 182   ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const {
 183     return selectAddrModeUnscaled(Root, 16);
 184   }
 185
 186   ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,
 187                                            unsigned Size) const;
 188   template <int Width>
 189   ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
 190     return selectAddrModeIndexed(Root, Width / 8);
 191   }
 192
 193   bool isWorthFoldingIntoExtendedReg(MachineInstr &MI,
 194                                      const MachineRegisterInfo &MRI) const;
 195   ComplexRendererFns
 196   selectAddrModeShiftedExtendXReg(MachineOperand &Root,
 197                                   unsigned SizeInBytes) const;
 198   ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const;
 199   ComplexRendererFns selectAddrModeXRO(MachineOperand &Root,
 200                                        unsigned SizeInBytes) const;
 201
 202   void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI) const;
 203
 204   // Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
 205   void materializeLargeCMVal(MachineInstr &I, const Value *V,
 206                              unsigned char OpFlags) const;
 207
 208   // Optimization methods.
 209   bool tryOptVectorShuffle(MachineInstr &I) const;
 210   bool tryOptVectorDup(MachineInstr &MI) const;
 211   bool tryOptSelect(MachineInstr &MI) const;
 212   MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
 213                                       MachineOperand &Predicate,
 214                                       MachineIRBuilder &MIRBuilder) const;
 215
 216   const AArch64TargetMachine &TM;
 217   const AArch64Subtarget &STI;
 218   const AArch64InstrInfo &TII;
 219   const AArch64RegisterInfo &TRI;
 220   const AArch64RegisterBankInfo &RBI;
 221
 222 #define GET_GLOBALISEL_PREDICATES_DECL
 223 #include "AArch64GenGlobalISel.inc"
 224 #undef GET_GLOBALISEL_PREDICATES_DECL
 225
 226 // We declare the temporaries used by selectImpl() in the class to minimize the
 227 // cost of constructing placeholder values.
 228 #define GET_GLOBALISEL_TEMPORARIES_DECL
 229 #include "AArch64GenGlobalISel.inc"
 230 #undef GET_GLOBALISEL_TEMPORARIES_DECL
 231 };
 232
 233 } // end anonymous namespace
 234
 235 #define GET_GLOBALISEL_IMPL
 236 #include "AArch64GenGlobalISel.inc"
 237 #undef GET_GLOBALISEL_IMPL
 238
 239 AArch64InstructionSelector::AArch64InstructionSelector(
 240     const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
 241     const AArch64RegisterBankInfo &RBI)
 242     : InstructionSelector(), TM(TM), STI(STI), TII(*STI.getInstrInfo()),
 243       TRI(*STI.getRegisterInfo()), RBI(RBI),
 244 #define GET_GLOBALISEL_PREDICATES_INIT
 245 #include "AArch64GenGlobalISel.inc"
 246 #undef GET_GLOBALISEL_PREDICATES_INIT
 247 #define GET_GLOBALISEL_TEMPORARIES_INIT
 248 #include "AArch64GenGlobalISel.inc"
 249 #undef GET_GLOBALISEL_TEMPORARIES_INIT
 250 {
 251 }
 252
 253 // FIXME: This should be target-independent, inferred from the types declared
 254 // for each class in the bank.
 255 static const TargetRegisterClass *
 256 getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
 257                          const RegisterBankInfo &RBI,
 258                          bool GetAllRegSet = false) {
 259   if (RB.getID() == AArch64::GPRRegBankID) {
 260     if (Ty.getSizeInBits() <= 32)
 261       return GetAllRegSet ? &AArch64::GPR32allRegClass
 262                           : &AArch64::GPR32RegClass;
 263     if (Ty.getSizeInBits() == 64)
 264       return GetAllRegSet ? &AArch64::GPR64allRegClass
 265                           : &AArch64::GPR64RegClass;
 266     return nullptr;
 267   }
 268
 269   if (RB.getID() == AArch64::FPRRegBankID) {
 270     if (Ty.getSizeInBits() <= 16)
 271       return &AArch64::FPR16RegClass;
 272     if (Ty.getSizeInBits() == 32)
 273       return &AArch64::FPR32RegClass;
 274     if (Ty.getSizeInBits() == 64)
 275       return &AArch64::FPR64RegClass;
 276     if (Ty.getSizeInBits() == 128)
 277       return &AArch64::FPR128RegClass;
 278     return nullptr;
 279   }
 280
 281   return nullptr;
 282 }
 283
 284 /// Given a register bank, and size in bits, return the smallest register class
 285 /// that can represent that combination.
 286 static const TargetRegisterClass *
 287 getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits,
 288                       bool GetAllRegSet = false) {
 289   unsigned RegBankID = RB.getID();
 290
 291   if (RegBankID == AArch64::GPRRegBankID) {
 292     if (SizeInBits <= 32)
 293       return GetAllRegSet ? &AArch64::GPR32allRegClass
 294                           : &AArch64::GPR32RegClass;
 295     if (SizeInBits == 64)
 296       return GetAllRegSet ? &AArch64::GPR64allRegClass
 297                           : &AArch64::GPR64RegClass;
 298   }
 299
 300   if (RegBankID == AArch64::FPRRegBankID) {
 301     switch (SizeInBits) {
 302     default:
 303       return nullptr;
 304     case 8:
 305       return &AArch64::FPR8RegClass;
 306     case 16:
 307       return &AArch64::FPR16RegClass;
 308     case 32:
 309       return &AArch64::FPR32RegClass;
 310     case 64:
 311       return &AArch64::FPR64RegClass;
 312     case 128:
 313       return &AArch64::FPR128RegClass;
 314     }
 315   }
 316
 317   return nullptr;
 318 }
 319
 320 /// Returns the correct subregister to use for a given register class.
 321 static bool getSubRegForClass(const TargetRegisterClass *RC,
 322                               const TargetRegisterInfo &TRI, unsigned &SubReg) {
 323   switch (TRI.getRegSizeInBits(*RC)) {
 324   case 8:
 325     SubReg = AArch64::bsub;
 326     break;
 327   case 16:
 328     SubReg = AArch64::hsub;
 329     break;
 330   case 32:
 331     if (RC == &AArch64::GPR32RegClass)
 332       SubReg = AArch64::sub_32;
 333     else
 334       SubReg = AArch64::ssub;
 335     break;
 336   case 64:
 337     SubReg = AArch64::dsub;
 338     break;
 339   default:
 340     LLVM_DEBUG(
 341         dbgs() << "Couldn't find appropriate subregister for register class.");
 342     return false;
 343   }
 344
 345   return true;
 346 }
 347
 348 /// Check whether \p I is a currently unsupported binary operation:
 349 /// - it has an unsized type
 350 /// - an operand is not a vreg
 351 /// - all operands are not in the same bank
 352 /// These are checks that should someday live in the verifier, but right now,
 353 /// these are mostly limitations of the aarch64 selector.
 354 static bool unsupportedBinOp(const MachineInstr &I,
 355                              const AArch64RegisterBankInfo &RBI,
 356                              const MachineRegisterInfo &MRI,
 357                              const AArch64RegisterInfo &TRI) {
 358   LLT Ty = MRI.getType(I.getOperand(0).getReg());
 359   if (!Ty.isValid()) {
 360     LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n");
 361     return true;
 362   }
 363
 364   const RegisterBank *PrevOpBank = nullptr;
 365   for (auto &MO : I.operands()) {
 366     // FIXME: Support non-register operands.
 367     if (!MO.isReg()) {
 368       LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n");
 369       return true;
 370     }
 371
 372     // FIXME: Can generic operations have physical registers operands? If
 373     // so, this will need to be taught about that, and we'll need to get the
 374     // bank out of the minimal class for the register.
 375     // Either way, this needs to be documented (and possibly verified).
 376     if (!TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
 377       LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n");
 378       return true;
 379     }
 380
 381     const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI);
 382     if (!OpBank) {
 383       LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n");
 384       return true;
 385     }
 386
 387     if (PrevOpBank && OpBank != PrevOpBank) {
 388       LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n");
 389       return true;
 390     }
 391     PrevOpBank = OpBank;
 392   }
 393   return false;
 394 }
 395
 396 /// Select the AArch64 opcode for the basic binary operation \p GenericOpc
 397 /// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID
 398 /// and of size \p OpSize.
 399 /// \returns \p GenericOpc if the combination is unsupported.
 400 static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
 401                                unsigned OpSize) {
 402   switch (RegBankID) {
 403   case AArch64::GPRRegBankID:
 404     if (OpSize == 32) {
 405       switch (GenericOpc) {
 406       case TargetOpcode::G_SHL:
 407         return AArch64::LSLVWr;
 408       case TargetOpcode::G_LSHR:
 409         return AArch64::LSRVWr;
 410       case TargetOpcode::G_ASHR:
 411         return AArch64::ASRVWr;
 412       default:
 413         return GenericOpc;
 414       }
 415     } else if (OpSize == 64) {
 416       switch (GenericOpc) {
 417       case TargetOpcode::G_GEP:
 418         return AArch64::ADDXrr;
 419       case TargetOpcode::G_SHL:
 420         return AArch64::LSLVXr;
 421       case TargetOpcode::G_LSHR:
 422         return AArch64::LSRVXr;
 423       case TargetOpcode::G_ASHR:
 424         return AArch64::ASRVXr;
 425       default:
 426         return GenericOpc;
 427       }
 428     }
 429     break;
 430   case AArch64::FPRRegBankID:
 431     switch (OpSize) {
 432     case 32:
 433       switch (GenericOpc) {
 434       case TargetOpcode::G_FADD:
 435         return AArch64::FADDSrr;
 436       case TargetOpcode::G_FSUB:
 437         return AArch64::FSUBSrr;
 438       case TargetOpcode::G_FMUL:
 439         return AArch64::FMULSrr;
 440       case TargetOpcode::G_FDIV:
 441         return AArch64::FDIVSrr;
 442       default:
 443         return GenericOpc;
 444       }
 445     case 64:
 446       switch (GenericOpc) {
 447       case TargetOpcode::G_FADD:
 448         return AArch64::FADDDrr;
 449       case TargetOpcode::G_FSUB:
 450         return AArch64::FSUBDrr;
 451       case TargetOpcode::G_FMUL:
 452         return AArch64::FMULDrr;
 453       case TargetOpcode::G_FDIV:
 454         return AArch64::FDIVDrr;
 455       case TargetOpcode::G_OR:
 456         return AArch64::ORRv8i8;
 457       default:
 458         return GenericOpc;
 459       }
 460     }
 461     break;
 462   }
 463   return GenericOpc;
 464 }
 465
 466 /// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc,
 467 /// appropriate for the (value) register bank \p RegBankID and of memory access
 468 /// size \p OpSize.  This returns the variant with the base+unsigned-immediate
 469 /// addressing mode (e.g., LDRXui).
 470 /// \returns \p GenericOpc if the combination is unsupported.
 471 static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
 472                                     unsigned OpSize) {
 473   const bool isStore = GenericOpc == TargetOpcode::G_STORE;
 474   switch (RegBankID) {
 475   case AArch64::GPRRegBankID:
 476     switch (OpSize) {
 477     case 8:
 478       return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
 479     case 16:
 480       return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
 481     case 32:
 482       return isStore ? AArch64::STRWui : AArch64::LDRWui;
 483     case 64:
 484       return isStore ? AArch64::STRXui : AArch64::LDRXui;
 485     }
 486     break;
 487   case AArch64::FPRRegBankID:
 488     switch (OpSize) {
 489     case 8:
 490       return isStore ? AArch64::STRBui : AArch64::LDRBui;
 491     case 16:
 492       return isStore ? AArch64::STRHui : AArch64::LDRHui;
 493     case 32:
 494       return isStore ? AArch64::STRSui : AArch64::LDRSui;
 495     case 64:
 496       return isStore ? AArch64::STRDui : AArch64::LDRDui;
 497     }
 498     break;
 499   }
 500   return GenericOpc;
 501 }
 502
 503 #ifndef NDEBUG
 504 /// Helper function that verifies that we have a valid copy at the end of
 505 /// selectCopy. Verifies that the source and dest have the expected sizes and
 506 /// then returns true.
 507 static bool isValidCopy(const MachineInstr &I, const RegisterBank &DstBank,
 508                         const MachineRegisterInfo &MRI,
 509                         const TargetRegisterInfo &TRI,
 510                         const RegisterBankInfo &RBI) {
 511   const unsigned DstReg = I.getOperand(0).getReg();
 512   const unsigned SrcReg = I.getOperand(1).getReg();
 513   const unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
 514   const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
 515
 516   // Make sure the size of the source and dest line up.
 517   assert(
 518       (DstSize == SrcSize ||
 519        // Copies are a mean to setup initial types, the number of
 520        // bits may not exactly match.
 521        (TargetRegisterInfo::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
 522        // Copies are a mean to copy bits around, as long as we are
 523        // on the same register class, that's fine. Otherwise, that
 524        // means we need some SUBREG_TO_REG or AND & co.
 525        (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) &&
 526       "Copy with different width?!");
 527
 528   // Check the size of the destination.
 529   assert((DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID) &&
 530          "GPRs cannot get more than 64-bit width values");
 531
 532   return true;
 533 }
 534 #endif
 535
 536 /// Helper function for selectCopy. Inserts a subregister copy from
 537 /// \p *From to \p *To, linking it up to \p I.
 538 ///
 539 /// e.g, given I = "Dst = COPY SrcReg", we'll transform that into
 540 ///
 541 /// CopyReg (From class) = COPY SrcReg
 542 /// SubRegCopy (To class) = COPY CopyReg:SubReg
 543 /// Dst = COPY SubRegCopy
 544 static bool selectSubregisterCopy(MachineInstr &I, MachineRegisterInfo &MRI,
 545                                   const RegisterBankInfo &RBI, unsigned SrcReg,
 546                                   const TargetRegisterClass *From,
 547                                   const TargetRegisterClass *To,
 548                                   unsigned SubReg) {
 549   MachineIRBuilder MIB(I);
 550   auto Copy = MIB.buildCopy({From}, {SrcReg});
 551   auto SubRegCopy = MIB.buildInstr(TargetOpcode::COPY, {To}, {})
 552                         .addReg(Copy.getReg(0), 0, SubReg);
 553   MachineOperand &RegOp = I.getOperand(1);
 554   RegOp.setReg(SubRegCopy.getReg(0));
 555
 556   // It's possible that the destination register won't be constrained. Make
 557   // sure that happens.
 558   if (!TargetRegisterInfo::isPhysicalRegister(I.getOperand(0).getReg()))
 559     RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI);
 560
 561   return true;
 562 }
 563
 564 /// Helper function to get the source and destination register classes for a
 565 /// copy. Returns a std::pair containing the source register class for the
 566 /// copy, and the destination register class for the copy. If a register class
 567 /// cannot be determined, then it will be nullptr.
 568 static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
 569 getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII,
 570                      MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
 571                      const RegisterBankInfo &RBI) {
 572   unsigned DstReg = I.getOperand(0).getReg();
 573   unsigned SrcReg = I.getOperand(1).getReg();
 574   const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
 575   const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
 576   unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
 577   unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
 578
 579   // Special casing for cross-bank copies of s1s. We can technically represent
 580   // a 1-bit value with any size of register. The minimum size for a GPR is 32
 581   // bits. So, we need to put the FPR on 32 bits as well.
 582   //
 583   // FIXME: I'm not sure if this case holds true outside of copies. If it does,
 584   // then we can pull it into the helpers that get the appropriate class for a
 585   // register bank. Or make a new helper that carries along some constraint
 586   // information.
 587   if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1))
 588     SrcSize = DstSize = 32;
 589
 590   return {getMinClassForRegBank(SrcRegBank, SrcSize, true),
 591           getMinClassForRegBank(DstRegBank, DstSize, true)};
 592 }
 593
 594 static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
 595                        MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
 596                        const RegisterBankInfo &RBI) {
 597
 598   unsigned DstReg = I.getOperand(0).getReg();
 599   unsigned SrcReg = I.getOperand(1).getReg();
 600   const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
 601   const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
 602
 603   // Find the correct register classes for the source and destination registers.
 604   const TargetRegisterClass *SrcRC;
 605   const TargetRegisterClass *DstRC;
 606   std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI);
 607
 608   if (!DstRC) {
 609     LLVM_DEBUG(dbgs() << "Unexpected dest size "
 610                       << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n');
 611     return false;
 612   }
 613
 614   // A couple helpers below, for making sure that the copy we produce is valid.
 615
 616   // Set to true if we insert a SUBREG_TO_REG. If we do this, then we don't want
 617   // to verify that the src and dst are the same size, since that's handled by
 618   // the SUBREG_TO_REG.
 619   bool KnownValid = false;
 620
 621   // Returns true, or asserts if something we don't expect happens. Instead of
 622   // returning true, we return isValidCopy() to ensure that we verify the
 623   // result.
 624   auto CheckCopy = [&]() {
 625     // If we have a bitcast or something, we can't have physical registers.
 626     assert(
 627         (I.isCopy() ||
 628          (!TargetRegisterInfo::isPhysicalRegister(I.getOperand(0).getReg()) &&
 629           !TargetRegisterInfo::isPhysicalRegister(I.getOperand(1).getReg()))) &&
 630         "No phys reg on generic operator!");
 631     assert(KnownValid || isValidCopy(I, DstRegBank, MRI, TRI, RBI));
 632     (void)KnownValid;
 633     return true;
 634   };
 635
 636   // Is this a copy? If so, then we may need to insert a subregister copy, or
 637   // a SUBREG_TO_REG.
 638   if (I.isCopy()) {
 639     // Yes. Check if there's anything to fix up.
 640     if (!SrcRC) {
 641       LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n");
 642       return false;
 643     }
 644
 645     // Is this a cross-bank copy?
 646     if (DstRegBank.getID() != SrcRegBank.getID()) {
 647       // If we're doing a cross-bank copy on different-sized registers, we need
 648       // to do a bit more work.
 649       unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC);
 650       unsigned DstSize = TRI.getRegSizeInBits(*DstRC);
 651
 652       if (SrcSize > DstSize) {
 653         // We're doing a cross-bank copy into a smaller register. We need a
 654         // subregister copy. First, get a register class that's on the same bank
 655         // as the destination, but the same size as the source.
 656         const TargetRegisterClass *SubregRC =
 657             getMinClassForRegBank(DstRegBank, SrcSize, true);
 658         assert(SubregRC && "Didn't get a register class for subreg?");
 659
 660         // Get the appropriate subregister for the destination.
 661         unsigned SubReg = 0;
 662         if (!getSubRegForClass(DstRC, TRI, SubReg)) {
 663           LLVM_DEBUG(dbgs() << "Couldn't determine subregister for copy.\n");
 664           return false;
 665         }
 666
 667         // Now, insert a subregister copy using the new register class.
 668         selectSubregisterCopy(I, MRI, RBI, SrcReg, SubregRC, DstRC, SubReg);
 669         return CheckCopy();
 670       }
 671
 672       else if (DstRegBank.getID() == AArch64::GPRRegBankID && DstSize == 32 &&
 673                SrcSize == 16) {
 674         // Special case for FPR16 to GPR32.
 675         // FIXME: This can probably be generalized like the above case.
 676         unsigned PromoteReg =
 677             MRI.createVirtualRegister(&AArch64::FPR32RegClass);
 678         BuildMI(*I.getParent(), I, I.getDebugLoc(),
 679                 TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
 680             .addImm(0)
 681             .addUse(SrcReg)
 682             .addImm(AArch64::hsub);
 683         MachineOperand &RegOp = I.getOperand(1);
 684         RegOp.setReg(PromoteReg);
 685
 686         // Promise that the copy is implicitly validated by the SUBREG_TO_REG.
 687         KnownValid = true;
 688       }
 689     }
 690
 691     // If the destination is a physical register, then there's nothing to
 692     // change, so we're done.
 693     if (TargetRegisterInfo::isPhysicalRegister(DstReg))
 694       return CheckCopy();
 695   }
 696
 697   // No need to constrain SrcReg. It will get constrained when we hit another
 698   // of its use or its defs. Copies do not have constraints.
 699   if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
 700     LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
 701                       << " operand\n");
 702     return false;
 703   }
 704   I.setDesc(TII.get(AArch64::COPY));
 705   return CheckCopy();
 706 }
 707
 708 static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
 709   if (!DstTy.isScalar() || !SrcTy.isScalar())
 710     return GenericOpc;
 711
 712   const unsigned DstSize = DstTy.getSizeInBits();
 713   const unsigned SrcSize = SrcTy.getSizeInBits();
 714
 715   switch (DstSize) {
 716   case 32:
 717     switch (SrcSize) {
 718     case 32:
 719       switch (GenericOpc) {
 720       case TargetOpcode::G_SITOFP:
 721         return AArch64::SCVTFUWSri;
 722       case TargetOpcode::G_UITOFP:
 723         return AArch64::UCVTFUWSri;
 724       case TargetOpcode::G_FPTOSI:
 725         return AArch64::FCVTZSUWSr;
 726       case TargetOpcode::G_FPTOUI:
 727         return AArch64::FCVTZUUWSr;
 728       default:
 729         return GenericOpc;
 730       }
 731     case 64:
 732       switch (GenericOpc) {
 733       case TargetOpcode::G_SITOFP:
 734         return AArch64::SCVTFUXSri;
 735       case TargetOpcode::G_UITOFP:
 736         return AArch64::UCVTFUXSri;
 737       case TargetOpcode::G_FPTOSI:
 738         return AArch64::FCVTZSUWDr;
 739       case TargetOpcode::G_FPTOUI:
 740         return AArch64::FCVTZUUWDr;
 741       default:
 742         return GenericOpc;
 743       }
 744     default:
 745       return GenericOpc;
 746     }
 747   case 64:
 748     switch (SrcSize) {
 749     case 32:
 750       switch (GenericOpc) {
 751       case TargetOpcode::G_SITOFP:
 752         return AArch64::SCVTFUWDri;
 753       case TargetOpcode::G_UITOFP:
 754         return AArch64::UCVTFUWDri;
 755       case TargetOpcode::G_FPTOSI:
 756         return AArch64::FCVTZSUXSr;
 757       case TargetOpcode::G_FPTOUI:
 758         return AArch64::FCVTZUUXSr;
 759       default:
 760         return GenericOpc;
 761       }
 762     case 64:
 763       switch (GenericOpc) {
 764       case TargetOpcode::G_SITOFP:
 765         return AArch64::SCVTFUXDri;
 766       case TargetOpcode::G_UITOFP:
 767         return AArch64::UCVTFUXDri;
 768       case TargetOpcode::G_FPTOSI:
 769         return AArch64::FCVTZSUXDr;
 770       case TargetOpcode::G_FPTOUI:
 771         return AArch64::FCVTZUUXDr;
 772       default:
 773         return GenericOpc;
 774       }
 775     default:
 776       return GenericOpc;
 777     }
 778   default:
 779     return GenericOpc;
 780   };
 781   return GenericOpc;
 782 }
 783
 784 static unsigned selectSelectOpc(MachineInstr &I, MachineRegisterInfo &MRI,
 785                                 const RegisterBankInfo &RBI) {
 786   const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
 787   bool IsFP = (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
 788                AArch64::GPRRegBankID);
 789   LLT Ty = MRI.getType(I.getOperand(0).getReg());
 790   if (Ty == LLT::scalar(32))
 791     return IsFP ? AArch64::FCSELSrrr : AArch64::CSELWr;
 792   else if (Ty == LLT::scalar(64) || Ty == LLT::pointer(0, 64))
 793     return IsFP ? AArch64::FCSELDrrr : AArch64::CSELXr;
 794   return 0;
 795 }
 796
 797 /// Helper function to select the opcode for a G_FCMP.
 798 static unsigned selectFCMPOpc(MachineInstr &I, MachineRegisterInfo &MRI) {
 799   // If this is a compare against +0.0, then we don't have to explicitly
 800   // materialize a constant.
 801   const ConstantFP *FPImm = getConstantFPVRegVal(I.getOperand(3).getReg(), MRI);
 802   bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
 803   unsigned OpSize = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
 804   if (OpSize != 32 && OpSize != 64)
 805     return 0;
 806   unsigned CmpOpcTbl[2][2] = {{AArch64::FCMPSrr, AArch64::FCMPDrr},
 807                               {AArch64::FCMPSri, AArch64::FCMPDri}};
 808   return CmpOpcTbl[ShouldUseImm][OpSize == 64];
 809 }
 810
 811 /// Returns true if \p P is an unsigned integer comparison predicate.
 812 static bool isUnsignedICMPPred(const CmpInst::Predicate P) {
 813   switch (P) {
 814   default:
 815     return false;
 816   case CmpInst::ICMP_UGT:
 817   case CmpInst::ICMP_UGE:
 818   case CmpInst::ICMP_ULT:
 819   case CmpInst::ICMP_ULE:
 820     return true;
 821   }
 822 }
 823
 824 static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) {
 825   switch (P) {
 826   default:
 827     llvm_unreachable("Unknown condition code!");
 828   case CmpInst::ICMP_NE:
 829     return AArch64CC::NE;
 830   case CmpInst::ICMP_EQ:
 831     return AArch64CC::EQ;
 832   case CmpInst::ICMP_SGT:
 833     return AArch64CC::GT;
 834   case CmpInst::ICMP_SGE:
 835     return AArch64CC::GE;
 836   case CmpInst::ICMP_SLT:
 837     return AArch64CC::LT;
 838   case CmpInst::ICMP_SLE:
 839     return AArch64CC::LE;
 840   case CmpInst::ICMP_UGT:
 841     return AArch64CC::HI;
 842   case CmpInst::ICMP_UGE:
 843     return AArch64CC::HS;
 844   case CmpInst::ICMP_ULT:
 845     return AArch64CC::LO;
 846   case CmpInst::ICMP_ULE:
 847     return AArch64CC::LS;
 848   }
 849 }
 850
 851 static void changeFCMPPredToAArch64CC(CmpInst::Predicate P,
 852                                       AArch64CC::CondCode &CondCode,
 853                                       AArch64CC::CondCode &CondCode2) {
 854   CondCode2 = AArch64CC::AL;
 855   switch (P) {
 856   default:
 857     llvm_unreachable("Unknown FP condition!");
 858   case CmpInst::FCMP_OEQ:
 859     CondCode = AArch64CC::EQ;
 860     break;
 861   case CmpInst::FCMP_OGT:
 862     CondCode = AArch64CC::GT;
 863     break;
 864   case CmpInst::FCMP_OGE:
 865     CondCode = AArch64CC::GE;
 866     break;
 867   case CmpInst::FCMP_OLT:
 868     CondCode = AArch64CC::MI;
 869     break;
 870   case CmpInst::FCMP_OLE:
 871     CondCode = AArch64CC::LS;
 872     break;
 873   case CmpInst::FCMP_ONE:
 874     CondCode = AArch64CC::MI;
 875     CondCode2 = AArch64CC::GT;
 876     break;
 877   case CmpInst::FCMP_ORD:
 878     CondCode = AArch64CC::VC;
 879     break;
 880   case CmpInst::FCMP_UNO:
 881     CondCode = AArch64CC::VS;
 882     break;
 883   case CmpInst::FCMP_UEQ:
 884     CondCode = AArch64CC::EQ;
 885     CondCode2 = AArch64CC::VS;
 886     break;
 887   case CmpInst::FCMP_UGT:
 888     CondCode = AArch64CC::HI;
 889     break;
 890   case CmpInst::FCMP_UGE:
 891     CondCode = AArch64CC::PL;
 892     break;
 893   case CmpInst::FCMP_ULT:
 894     CondCode = AArch64CC::LT;
 895     break;
 896   case CmpInst::FCMP_ULE:
 897     CondCode = AArch64CC::LE;
 898     break;
 899   case CmpInst::FCMP_UNE:
 900     CondCode = AArch64CC::NE;
 901     break;
 902   }
 903 }
 904
 905 bool AArch64InstructionSelector::selectCompareBranch(
 906     MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
 907
 908   const Register CondReg = I.getOperand(0).getReg();
 909   MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
 910   MachineInstr *CCMI = MRI.getVRegDef(CondReg);
 911   if (CCMI->getOpcode() == TargetOpcode::G_TRUNC)
 912     CCMI = MRI.getVRegDef(CCMI->getOperand(1).getReg());
 913   if (CCMI->getOpcode() != TargetOpcode::G_ICMP)
 914     return false;
 915
 916   Register LHS = CCMI->getOperand(2).getReg();
 917   Register RHS = CCMI->getOperand(3).getReg();
 918   auto VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
 919   if (!VRegAndVal)
 920     std::swap(RHS, LHS);
 921
 922   VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
 923   if (!VRegAndVal || VRegAndVal->Value != 0) {
 924     MachineIRBuilder MIB(I);
 925     // If we can't select a CBZ then emit a cmp + Bcc.
 926     if (!emitIntegerCompare(CCMI->getOperand(2), CCMI->getOperand(3),
 927                             CCMI->getOperand(1), MIB))
 928       return false;
 929     const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(
 930         (CmpInst::Predicate)CCMI->getOperand(1).getPredicate());
 931     MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
 932     I.eraseFromParent();
 933     return true;
 934   }
 935
 936   const RegisterBank &RB = *RBI.getRegBank(LHS, MRI, TRI);
 937   if (RB.getID() != AArch64::GPRRegBankID)
 938     return false;
 939
 940   const auto Pred = (CmpInst::Predicate)CCMI->getOperand(1).getPredicate();
 941   if (Pred != CmpInst::ICMP_NE && Pred != CmpInst::ICMP_EQ)
 942     return false;
 943
 944   const unsigned CmpWidth = MRI.getType(LHS).getSizeInBits();
 945   unsigned CBOpc = 0;
 946   if (CmpWidth <= 32)
 947     CBOpc = (Pred == CmpInst::ICMP_EQ ? AArch64::CBZW : AArch64::CBNZW);
 948   else if (CmpWidth == 64)
 949     CBOpc = (Pred == CmpInst::ICMP_EQ ? AArch64::CBZX : AArch64::CBNZX);
 950   else
 951     return false;
 952
 953   BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(CBOpc))
 954       .addUse(LHS)
 955       .addMBB(DestMBB)
 956       .constrainAllUses(TII, TRI, RBI);
 957
 958   I.eraseFromParent();
 959   return true;
 960 }
 961
 962 bool AArch64InstructionSelector::selectVectorSHL(
 963     MachineInstr &I, MachineRegisterInfo &MRI) const {
 964   assert(I.getOpcode() == TargetOpcode::G_SHL);
 965   Register DstReg = I.getOperand(0).getReg();
 966   const LLT Ty = MRI.getType(DstReg);
 967   Register Src1Reg = I.getOperand(1).getReg();
 968   Register Src2Reg = I.getOperand(2).getReg();
 969
 970   if (!Ty.isVector())
 971     return false;
 972
 973   unsigned Opc = 0;
 974   if (Ty == LLT::vector(4, 32)) {
 975     Opc = AArch64::USHLv4i32;
 976   } else if (Ty == LLT::vector(2, 32)) {
 977     Opc = AArch64::USHLv2i32;
 978   } else {
 979     LLVM_DEBUG(dbgs() << "Unhandled G_SHL type");
 980     return false;
 981   }
 982
 983   MachineIRBuilder MIB(I);
 984   auto UShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Src2Reg});
 985   constrainSelectedInstRegOperands(*UShl, TII, TRI, RBI);
 986   I.eraseFromParent();
 987   return true;
 988 }
 989
 990 bool AArch64InstructionSelector::selectVectorASHR(
 991     MachineInstr &I, MachineRegisterInfo &MRI) const {
 992   assert(I.getOpcode() == TargetOpcode::G_ASHR);
 993   Register DstReg = I.getOperand(0).getReg();
 994   const LLT Ty = MRI.getType(DstReg);
 995   Register Src1Reg = I.getOperand(1).getReg();
 996   Register Src2Reg = I.getOperand(2).getReg();
 997
 998   if (!Ty.isVector())
 999     return false;
1000
1001   // There is not a shift right register instruction, but the shift left
1002   // register instruction takes a signed value, where negative numbers specify a
1003   // right shift.
1004
1005   unsigned Opc = 0;
1006   unsigned NegOpc = 0;
1007   const TargetRegisterClass *RC = nullptr;
1008   if (Ty == LLT::vector(4, 32)) {
1009     Opc = AArch64::SSHLv4i32;
1010     NegOpc = AArch64::NEGv4i32;
1011     RC = &AArch64::FPR128RegClass;
1012   } else if (Ty == LLT::vector(2, 32)) {
1013     Opc = AArch64::SSHLv2i32;
1014     NegOpc = AArch64::NEGv2i32;
1015     RC = &AArch64::FPR64RegClass;
1016   } else {
1017     LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type");
1018     return false;
1019   }
1020
1021   MachineIRBuilder MIB(I);
1022   auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg});
1023   constrainSelectedInstRegOperands(*Neg, TII, TRI, RBI);
1024   auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
1025   constrainSelectedInstRegOperands(*SShl, TII, TRI, RBI);
1026   I.eraseFromParent();
1027   return true;
1028 }
1029
1030 bool AArch64InstructionSelector::selectVaStartAAPCS(
1031     MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1032   return false;
1033 }
1034
1035 bool AArch64InstructionSelector::selectVaStartDarwin(
1036     MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1037   AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
1038   Register ListReg = I.getOperand(0).getReg();
1039
1040   Register ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1041
1042   auto MIB =
1043       BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri))
1044           .addDef(ArgsAddrReg)
1045           .addFrameIndex(FuncInfo->getVarArgsStackIndex())
1046           .addImm(0)
1047           .addImm(0);
1048
1049   constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
1050
1051   MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui))
1052             .addUse(ArgsAddrReg)
1053             .addUse(ListReg)
1054             .addImm(0)
1055             .addMemOperand(*I.memoperands_begin());
1056
1057   constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
1058   I.eraseFromParent();
1059   return true;
1060 }
1061
1062 void AArch64InstructionSelector::materializeLargeCMVal(
1063     MachineInstr &I, const Value *V, unsigned char OpFlags) const {
1064   MachineBasicBlock &MBB = *I.getParent();
1065   MachineFunction &MF = *MBB.getParent();
1066   MachineRegisterInfo &MRI = MF.getRegInfo();
1067   MachineIRBuilder MIB(I);
1068
1069   auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
1070   MovZ->addOperand(MF, I.getOperand(1));
1071   MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
1072                                      AArch64II::MO_NC);
1073   MovZ->addOperand(MF, MachineOperand::CreateImm(0));
1074   constrainSelectedInstRegOperands(*MovZ, TII, TRI, RBI);
1075
1076   auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset,
1077                        Register ForceDstReg) {
1078     Register DstReg = ForceDstReg
1079                           ? ForceDstReg
1080                           : MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1081     auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg);
1082     if (auto *GV = dyn_cast<GlobalValue>(V)) {
1083       MovI->addOperand(MF, MachineOperand::CreateGA(
1084                                GV, MovZ->getOperand(1).getOffset(), Flags));
1085     } else {
1086       MovI->addOperand(
1087           MF, MachineOperand::CreateBA(cast<BlockAddress>(V),
1088                                        MovZ->getOperand(1).getOffset(), Flags));
1089     }
1090     MovI->addOperand(MF, MachineOperand::CreateImm(Offset));
1091     constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI);
1092     return DstReg;
1093   };
1094   Register DstReg = BuildMovK(MovZ.getReg(0),
1095                               AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0);
1096   DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
1097   BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
1098   return;
1099 }
1100
1101 void AArch64InstructionSelector::preISelLower(MachineInstr &I) const {
1102   MachineBasicBlock &MBB = *I.getParent();
1103   MachineFunction &MF = *MBB.getParent();
1104   MachineRegisterInfo &MRI = MF.getRegInfo();
1105
1106   switch (I.getOpcode()) {
1107   case TargetOpcode::G_SHL:
1108   case TargetOpcode::G_ASHR:
1109   case TargetOpcode::G_LSHR: {
1110     // These shifts are legalized to have 64 bit shift amounts because we want
1111     // to take advantage of the existing imported selection patterns that assume
1112     // the immediates are s64s. However, if the shifted type is 32 bits and for
1113     // some reason we receive input GMIR that has an s64 shift amount that's not
1114     // a G_CONSTANT, insert a truncate so that we can still select the s32
1115     // register-register variant.
1116     unsigned SrcReg = I.getOperand(1).getReg();
1117     unsigned ShiftReg = I.getOperand(2).getReg();
1118     const LLT ShiftTy = MRI.getType(ShiftReg);
1119     const LLT SrcTy = MRI.getType(SrcReg);
1120     if (SrcTy.isVector())
1121       return;
1122     assert(!ShiftTy.isVector() && "unexpected vector shift ty");
1123     if (SrcTy.getSizeInBits() != 32 || ShiftTy.getSizeInBits() != 64)
1124       return;
1125     auto *AmtMI = MRI.getVRegDef(ShiftReg);
1126     assert(AmtMI && "could not find a vreg definition for shift amount");
1127     if (AmtMI->getOpcode() != TargetOpcode::G_CONSTANT) {
1128       // Insert a subregister copy to implement a 64->32 trunc
1129       MachineIRBuilder MIB(I);
1130       auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {})
1131                        .addReg(ShiftReg, 0, AArch64::sub_32);
1132       MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
1133       I.getOperand(2).setReg(Trunc.getReg(0));
1134     }
1135     return;
1136   }
1137   case TargetOpcode::G_STORE:
1138     contractCrossBankCopyIntoStore(I, MRI);
1139     return;
1140   default:
1141     return;
1142   }
1143 }
1144
1145 bool AArch64InstructionSelector::earlySelectSHL(
1146     MachineInstr &I, MachineRegisterInfo &MRI) const {
1147   // We try to match the immediate variant of LSL, which is actually an alias
1148   // for a special case of UBFM. Otherwise, we fall back to the imported
1149   // selector which will match the register variant.
1150   assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op");
1151   const auto &MO = I.getOperand(2);
1152   auto VRegAndVal = getConstantVRegVal(MO.getReg(), MRI);
1153   if (!VRegAndVal)
1154     return false;
1155
1156   const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1157   if (DstTy.isVector())
1158     return false;
1159   bool Is64Bit = DstTy.getSizeInBits() == 64;
1160   auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO);
1161   auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO);
1162   MachineIRBuilder MIB(I);
1163
1164   if (!Imm1Fn || !Imm2Fn)
1165     return false;
1166
1167   auto NewI =
1168       MIB.buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,
1169                      {I.getOperand(0).getReg()}, {I.getOperand(1).getReg()});
1170
1171   for (auto &RenderFn : *Imm1Fn)
1172     RenderFn(NewI);
1173   for (auto &RenderFn : *Imm2Fn)
1174     RenderFn(NewI);
1175
1176   I.eraseFromParent();
1177   return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
1178 }
1179
1180 void AArch64InstructionSelector::contractCrossBankCopyIntoStore(
1181     MachineInstr &I, MachineRegisterInfo &MRI) const {
1182   assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE");
1183   // If we're storing a scalar, it doesn't matter what register bank that
1184   // scalar is on. All that matters is the size.
1185   //
1186   // So, if we see something like this (with a 32-bit scalar as an example):
1187   //
1188   // %x:gpr(s32) = ... something ...
1189   // %y:fpr(s32) = COPY %x:gpr(s32)
1190   // G_STORE %y:fpr(s32)
1191   //
1192   // We can fix this up into something like this:
1193   //
1194   // G_STORE %x:gpr(s32)
1195   //
1196   // And then continue the selection process normally.
1197   MachineInstr *Def = getDefIgnoringCopies(I.getOperand(0).getReg(), MRI);
1198   if (!Def)
1199     return;
1200   Register DefDstReg = Def->getOperand(0).getReg();
1201   LLT DefDstTy = MRI.getType(DefDstReg);
1202   Register StoreSrcReg = I.getOperand(0).getReg();
1203   LLT StoreSrcTy = MRI.getType(StoreSrcReg);
1204
1205   // If we get something strange like a physical register, then we shouldn't
1206   // go any further.
1207   if (!DefDstTy.isValid())
1208     return;
1209
1210   // Are the source and dst types the same size?
1211   if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits())
1212     return;
1213
1214   if (RBI.getRegBank(StoreSrcReg, MRI, TRI) ==
1215       RBI.getRegBank(DefDstReg, MRI, TRI))
1216     return;
1217
1218   // We have a cross-bank copy, which is entering a store. Let's fold it.
1219   I.getOperand(0).setReg(DefDstReg);
1220 }
1221
1222 bool AArch64InstructionSelector::earlySelectLoad(
1223     MachineInstr &I, MachineRegisterInfo &MRI) const {
1224   // Try to fold in shifts, etc into the addressing mode of a load.
1225   assert(I.getOpcode() == TargetOpcode::G_LOAD && "unexpected op");
1226
1227   // Don't handle atomic loads/stores yet.
1228   auto &MemOp = **I.memoperands_begin();
1229   if (MemOp.getOrdering() != AtomicOrdering::NotAtomic) {
1230     LLVM_DEBUG(dbgs() << "Atomic load/store not supported yet\n");
1231     return false;
1232   }
1233
1234   unsigned MemBytes = MemOp.getSize();
1235
1236   // Only support 64-bit loads for now.
1237   if (MemBytes != 8)
1238     return false;
1239
1240   Register DstReg = I.getOperand(0).getReg();
1241   const LLT DstTy = MRI.getType(DstReg);
1242   // Don't handle vectors.
1243   if (DstTy.isVector())
1244     return false;
1245
1246   unsigned DstSize = DstTy.getSizeInBits();
1247   // TODO: 32-bit destinations.
1248   if (DstSize != 64)
1249     return false;
1250
1251   // Check if we can do any folding from GEPs/shifts etc. into the load.
1252   auto ImmFn = selectAddrModeXRO(I.getOperand(1), MemBytes);
1253   if (!ImmFn)
1254     return false;
1255
1256   // We can fold something. Emit the load here.
1257   MachineIRBuilder MIB(I);
1258
1259   // Choose the instruction based off the size of the element being loaded, and
1260   // whether or not we're loading into a FPR.
1261   const RegisterBank &RB = *RBI.getRegBank(DstReg, MRI, TRI);
1262   unsigned Opc =
1263       RB.getID() == AArch64::GPRRegBankID ? AArch64::LDRXroX : AArch64::LDRDroX;
1264   // Construct the load.
1265   auto LoadMI = MIB.buildInstr(Opc, {DstReg}, {});
1266   for (auto &RenderFn : *ImmFn)
1267     RenderFn(LoadMI);
1268   LoadMI.addMemOperand(*I.memoperands_begin());
1269   I.eraseFromParent();
1270   return constrainSelectedInstRegOperands(*LoadMI, TII, TRI, RBI);
1271 }
1272
1273 bool AArch64InstructionSelector::earlySelect(MachineInstr &I) const {
1274   assert(I.getParent() && "Instruction should be in a basic block!");
1275   assert(I.getParent()->getParent() && "Instruction should be in a function!");
1276
1277   MachineBasicBlock &MBB = *I.getParent();
1278   MachineFunction &MF = *MBB.getParent();
1279   MachineRegisterInfo &MRI = MF.getRegInfo();
1280
1281   switch (I.getOpcode()) {
1282   case TargetOpcode::G_SHL:
1283     return earlySelectSHL(I, MRI);
1284   case TargetOpcode::G_LOAD:
1285     return earlySelectLoad(I, MRI);
1286   default:
1287     return false;
1288   }
1289 }
1290
1291 bool AArch64InstructionSelector::select(MachineInstr &I,
1292                                         CodeGenCoverage &CoverageInfo) const {
1293   assert(I.getParent() && "Instruction should be in a basic block!");
1294   assert(I.getParent()->getParent() && "Instruction should be in a function!");
1295
1296   MachineBasicBlock &MBB = *I.getParent();
1297   MachineFunction &MF = *MBB.getParent();
1298   MachineRegisterInfo &MRI = MF.getRegInfo();
1299
1300   unsigned Opcode = I.getOpcode();
1301   // G_PHI requires same handling as PHI
1302   if (!isPreISelGenericOpcode(Opcode) || Opcode == TargetOpcode::G_PHI) {
1303     // Certain non-generic instructions also need some special handling.
1304
1305     if (Opcode ==  TargetOpcode::LOAD_STACK_GUARD)
1306       return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1307
1308     if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
1309       const Register DefReg = I.getOperand(0).getReg();
1310       const LLT DefTy = MRI.getType(DefReg);
1311
1312       const RegClassOrRegBank &RegClassOrBank =
1313         MRI.getRegClassOrRegBank(DefReg);
1314
1315       const TargetRegisterClass *DefRC
1316         = RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
1317       if (!DefRC) {
1318         if (!DefTy.isValid()) {
1319           LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
1320           return false;
1321         }
1322         const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
1323         DefRC = getRegClassForTypeOnBank(DefTy, RB, RBI);
1324         if (!DefRC) {
1325           LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
1326           return false;
1327         }
1328       }
1329
1330       I.setDesc(TII.get(TargetOpcode::PHI));
1331
1332       return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
1333     }
1334
1335     if (I.isCopy())
1336       return selectCopy(I, TII, MRI, TRI, RBI);
1337
1338     return true;
1339   }
1340
1341
1342   if (I.getNumOperands() != I.getNumExplicitOperands()) {
1343     LLVM_DEBUG(
1344         dbgs() << "Generic instruction has unexpected implicit operands\n");
1345     return false;
1346   }
1347
1348   // Try to do some lowering before we start instruction selecting. These
1349   // lowerings are purely transformations on the input G_MIR and so selection
1350   // must continue after any modification of the instruction.
1351   preISelLower(I);
1352
1353   // There may be patterns where the importer can't deal with them optimally,
1354   // but does select it to a suboptimal sequence so our custom C++ selection
1355   // code later never has a chance to work on it. Therefore, we have an early
1356   // selection attempt here to give priority to certain selection routines
1357   // over the imported ones.
1358   if (earlySelect(I))
1359     return true;
1360
1361   if (selectImpl(I, CoverageInfo))
1362     return true;
1363
1364   LLT Ty =
1365       I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{};
1366
1367   MachineIRBuilder MIB(I);
1368
1369   switch (Opcode) {
1370   case TargetOpcode::G_BRCOND: {
1371     if (Ty.getSizeInBits() > 32) {
1372       // We shouldn't need this on AArch64, but it would be implemented as an
1373       // EXTRACT_SUBREG followed by a TBNZW because TBNZX has no encoding if the
1374       // bit being tested is < 32.
1375       LLVM_DEBUG(dbgs() << "G_BRCOND has type: " << Ty
1376                         << ", expected at most 32-bits");
1377       return false;
1378     }
1379
1380     const Register CondReg = I.getOperand(0).getReg();
1381     MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1382
1383     // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1384     // instructions will not be produced, as they are conditional branch
1385     // instructions that do not set flags.
1386     bool ProduceNonFlagSettingCondBr =
1387         !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
1388     if (ProduceNonFlagSettingCondBr && selectCompareBranch(I, MF, MRI))
1389       return true;
1390
1391     if (ProduceNonFlagSettingCondBr) {
1392       auto MIB = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::TBNZW))
1393                      .addUse(CondReg)
1394                      .addImm(/*bit offset=*/0)
1395                      .addMBB(DestMBB);
1396
1397       I.eraseFromParent();
1398       return constrainSelectedInstRegOperands(*MIB.getInstr(), TII, TRI, RBI);
1399     } else {
1400       auto CMP = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ANDSWri))
1401                      .addDef(AArch64::WZR)
1402                      .addUse(CondReg)
1403                      .addImm(1);
1404       constrainSelectedInstRegOperands(*CMP.getInstr(), TII, TRI, RBI);
1405       auto Bcc =
1406           BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::Bcc))
1407               .addImm(AArch64CC::EQ)
1408               .addMBB(DestMBB);
1409
1410       I.eraseFromParent();
1411       return constrainSelectedInstRegOperands(*Bcc.getInstr(), TII, TRI, RBI);
1412     }
1413   }
1414
1415   case TargetOpcode::G_BRINDIRECT: {
1416     I.setDesc(TII.get(AArch64::BR));
1417     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1418   }
1419
1420   case TargetOpcode::G_BRJT:
1421     return selectBrJT(I, MRI);
1422
1423   case TargetOpcode::G_BSWAP: {
1424     // Handle vector types for G_BSWAP directly.
1425     Register DstReg = I.getOperand(0).getReg();
1426     LLT DstTy = MRI.getType(DstReg);
1427
1428     // We should only get vector types here; everything else is handled by the
1429     // importer right now.
1430     if (!DstTy.isVector() || DstTy.getSizeInBits() > 128) {
1431       LLVM_DEBUG(dbgs() << "Dst type for G_BSWAP currently unsupported.\n");
1432       return false;
1433     }
1434
1435     // Only handle 4 and 2 element vectors for now.
1436     // TODO: 16-bit elements.
1437     unsigned NumElts = DstTy.getNumElements();
1438     if (NumElts != 4 && NumElts != 2) {
1439       LLVM_DEBUG(dbgs() << "Unsupported number of elements for G_BSWAP.\n");
1440       return false;
1441     }
1442
1443     // Choose the correct opcode for the supported types. Right now, that's
1444     // v2s32, v4s32, and v2s64.
1445     unsigned Opc = 0;
1446     unsigned EltSize = DstTy.getElementType().getSizeInBits();
1447     if (EltSize == 32)
1448       Opc = (DstTy.getNumElements() == 2) ? AArch64::REV32v8i8
1449                                           : AArch64::REV32v16i8;
1450     else if (EltSize == 64)
1451       Opc = AArch64::REV64v16i8;
1452
1453     // We should always get something by the time we get here...
1454     assert(Opc != 0 && "Didn't get an opcode for G_BSWAP?");
1455
1456     I.setDesc(TII.get(Opc));
1457     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1458   }
1459
1460   case TargetOpcode::G_FCONSTANT:
1461   case TargetOpcode::G_CONSTANT: {
1462     const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
1463
1464     const LLT s8 = LLT::scalar(8);
1465     const LLT s16 = LLT::scalar(16);
1466     const LLT s32 = LLT::scalar(32);
1467     const LLT s64 = LLT::scalar(64);
1468     const LLT p0 = LLT::pointer(0, 64);
1469
1470     const Register DefReg = I.getOperand(0).getReg();
1471     const LLT DefTy = MRI.getType(DefReg);
1472     const unsigned DefSize = DefTy.getSizeInBits();
1473     const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
1474
1475     // FIXME: Redundant check, but even less readable when factored out.
1476     if (isFP) {
1477       if (Ty != s32 && Ty != s64) {
1478         LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
1479                           << " constant, expected: " << s32 << " or " << s64
1480                           << '\n');
1481         return false;
1482       }
1483
1484       if (RB.getID() != AArch64::FPRRegBankID) {
1485         LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
1486                           << " constant on bank: " << RB
1487                           << ", expected: FPR\n");
1488         return false;
1489       }
1490
1491       // The case when we have 0.0 is covered by tablegen. Reject it here so we
1492       // can be sure tablegen works correctly and isn't rescued by this code.
1493       if (I.getOperand(1).getFPImm()->getValueAPF().isExactlyValue(0.0))
1494         return false;
1495     } else {
1496       // s32 and s64 are covered by tablegen.
1497       if (Ty != p0 && Ty != s8 && Ty != s16) {
1498         LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
1499                           << " constant, expected: " << s32 << ", " << s64
1500                           << ", or " << p0 << '\n');
1501         return false;
1502       }
1503
1504       if (RB.getID() != AArch64::GPRRegBankID) {
1505         LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
1506                           << " constant on bank: " << RB
1507                           << ", expected: GPR\n");
1508         return false;
1509       }
1510     }
1511
1512     // We allow G_CONSTANT of types < 32b.
1513     const unsigned MovOpc =
1514         DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
1515
1516     if (isFP) {
1517       // Either emit a FMOV, or emit a copy to emit a normal mov.
1518       const TargetRegisterClass &GPRRC =
1519           DefSize == 32 ? AArch64::GPR32RegClass : AArch64::GPR64RegClass;
1520       const TargetRegisterClass &FPRRC =
1521           DefSize == 32 ? AArch64::FPR32RegClass : AArch64::FPR64RegClass;
1522
1523       // Can we use a FMOV instruction to represent the immediate?
1524       if (emitFMovForFConstant(I, MRI))
1525         return true;
1526
1527       // Nope. Emit a copy and use a normal mov instead.
1528       const Register DefGPRReg = MRI.createVirtualRegister(&GPRRC);
1529       MachineOperand &RegOp = I.getOperand(0);
1530       RegOp.setReg(DefGPRReg);
1531       MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
1532       MIB.buildCopy({DefReg}, {DefGPRReg});
1533
1534       if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {
1535         LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n");
1536         return false;
1537       }
1538
1539       MachineOperand &ImmOp = I.getOperand(1);
1540       // FIXME: Is going through int64_t always correct?
1541       ImmOp.ChangeToImmediate(
1542           ImmOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
1543     } else if (I.getOperand(1).isCImm()) {
1544       uint64_t Val = I.getOperand(1).getCImm()->getZExtValue();
1545       I.getOperand(1).ChangeToImmediate(Val);
1546     } else if (I.getOperand(1).isImm()) {
1547       uint64_t Val = I.getOperand(1).getImm();
1548       I.getOperand(1).ChangeToImmediate(Val);
1549     }
1550
1551     I.setDesc(TII.get(MovOpc));
1552     constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1553     return true;
1554   }
1555   case TargetOpcode::G_EXTRACT: {
1556     Register DstReg = I.getOperand(0).getReg();
1557     Register SrcReg = I.getOperand(1).getReg();
1558     LLT SrcTy = MRI.getType(SrcReg);
1559     LLT DstTy = MRI.getType(DstReg);
1560     (void)DstTy;
1561     unsigned SrcSize = SrcTy.getSizeInBits();
1562
1563     if (SrcTy.getSizeInBits() > 64) {
1564       // This should be an extract of an s128, which is like a vector extract.
1565       if (SrcTy.getSizeInBits() != 128)
1566         return false;
1567       // Only support extracting 64 bits from an s128 at the moment.
1568       if (DstTy.getSizeInBits() != 64)
1569         return false;
1570
1571       const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
1572       const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
1573       // Check we have the right regbank always.
1574       assert(SrcRB.getID() == AArch64::FPRRegBankID &&
1575              DstRB.getID() == AArch64::FPRRegBankID &&
1576              "Wrong extract regbank!");
1577       (void)SrcRB;
1578
1579       // Emit the same code as a vector extract.
1580       // Offset must be a multiple of 64.
1581       unsigned Offset = I.getOperand(2).getImm();
1582       if (Offset % 64 != 0)
1583         return false;
1584       unsigned LaneIdx = Offset / 64;
1585       MachineIRBuilder MIB(I);
1586       MachineInstr *Extract = emitExtractVectorElt(
1587           DstReg, DstRB, LLT::scalar(64), SrcReg, LaneIdx, MIB);
1588       if (!Extract)
1589         return false;
1590       I.eraseFromParent();
1591       return true;
1592     }
1593
1594     I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
1595     MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() +
1596                                       Ty.getSizeInBits() - 1);
1597
1598     if (SrcSize < 64) {
1599       assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 &&
1600              "unexpected G_EXTRACT types");
1601       return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1602     }
1603
1604     DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
1605     MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
1606     MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
1607         .addReg(DstReg, 0, AArch64::sub_32);
1608     RBI.constrainGenericRegister(I.getOperand(0).getReg(),
1609                                  AArch64::GPR32RegClass, MRI);
1610     I.getOperand(0).setReg(DstReg);
1611
1612     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1613   }
1614
1615   case TargetOpcode::G_INSERT: {
1616     LLT SrcTy = MRI.getType(I.getOperand(2).getReg());
1617     LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1618     unsigned DstSize = DstTy.getSizeInBits();
1619     // Larger inserts are vectors, same-size ones should be something else by
1620     // now (split up or turned into COPYs).
1621     if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32)
1622       return false;
1623
1624     I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
1625     unsigned LSB = I.getOperand(3).getImm();
1626     unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
1627     I.getOperand(3).setImm((DstSize - LSB) % DstSize);
1628     MachineInstrBuilder(MF, I).addImm(Width - 1);
1629
1630     if (DstSize < 64) {
1631       assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 &&
1632              "unexpected G_INSERT types");
1633       return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1634     }
1635
1636     Register SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
1637     BuildMI(MBB, I.getIterator(), I.getDebugLoc(),
1638             TII.get(AArch64::SUBREG_TO_REG))
1639         .addDef(SrcReg)
1640         .addImm(0)
1641         .addUse(I.getOperand(2).getReg())
1642         .addImm(AArch64::sub_32);
1643     RBI.constrainGenericRegister(I.getOperand(2).getReg(),
1644                                  AArch64::GPR32RegClass, MRI);
1645     I.getOperand(2).setReg(SrcReg);
1646
1647     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1648   }
1649   case TargetOpcode::G_FRAME_INDEX: {
1650     // allocas and G_FRAME_INDEX are only supported in addrspace(0).
1651     if (Ty != LLT::pointer(0, 64)) {
1652       LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Ty
1653                         << ", expected: " << LLT::pointer(0, 64) << '\n');
1654       return false;
1655     }
1656     I.setDesc(TII.get(AArch64::ADDXri));
1657
1658     // MOs for a #0 shifted immediate.
1659     I.addOperand(MachineOperand::CreateImm(0));
1660     I.addOperand(MachineOperand::CreateImm(0));
1661
1662     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1663   }
1664
1665   case TargetOpcode::G_GLOBAL_VALUE: {
1666     auto GV = I.getOperand(1).getGlobal();
1667     if (GV->isThreadLocal()) {
1668       // FIXME: we don't support TLS yet.
1669       return false;
1670     }
1671     unsigned char OpFlags = STI.ClassifyGlobalReference(GV, TM);
1672     if (OpFlags & AArch64II::MO_GOT) {
1673       I.setDesc(TII.get(AArch64::LOADgot));
1674       I.getOperand(1).setTargetFlags(OpFlags);
1675     } else if (TM.getCodeModel() == CodeModel::Large) {
1676       // Materialize the global using movz/movk instructions.
1677       materializeLargeCMVal(I, GV, OpFlags);
1678       I.eraseFromParent();
1679       return true;
1680     } else if (TM.getCodeModel() == CodeModel::Tiny) {
1681       I.setDesc(TII.get(AArch64::ADR));
1682       I.getOperand(1).setTargetFlags(OpFlags);
1683     } else {
1684       I.setDesc(TII.get(AArch64::MOVaddr));
1685       I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE);
1686       MachineInstrBuilder MIB(MF, I);
1687       MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(),
1688                            OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
1689     }
1690     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1691   }
1692
1693   case TargetOpcode::G_ZEXTLOAD:
1694   case TargetOpcode::G_LOAD:
1695   case TargetOpcode::G_STORE: {
1696     bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
1697     MachineIRBuilder MIB(I);
1698
1699     LLT PtrTy = MRI.getType(I.getOperand(1).getReg());
1700
1701     if (PtrTy != LLT::pointer(0, 64)) {
1702       LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTy
1703                         << ", expected: " << LLT::pointer(0, 64) << '\n');
1704       return false;
1705     }
1706
1707     auto &MemOp = **I.memoperands_begin();
1708     if (MemOp.getOrdering() != AtomicOrdering::NotAtomic) {
1709       LLVM_DEBUG(dbgs() << "Atomic load/store not supported yet\n");
1710       return false;
1711     }
1712     unsigned MemSizeInBits = MemOp.getSize() * 8;
1713
1714     const Register PtrReg = I.getOperand(1).getReg();
1715 #ifndef NDEBUG
1716     const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
1717     // Sanity-check the pointer register.
1718     assert(PtrRB.getID() == AArch64::GPRRegBankID &&
1719            "Load/Store pointer operand isn't a GPR");
1720     assert(MRI.getType(PtrReg).isPointer() &&
1721            "Load/Store pointer operand isn't a pointer");
1722 #endif
1723
1724     const Register ValReg = I.getOperand(0).getReg();
1725     const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
1726
1727     const unsigned NewOpc =
1728         selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
1729     if (NewOpc == I.getOpcode())
1730       return false;
1731
1732     I.setDesc(TII.get(NewOpc));
1733
1734     uint64_t Offset = 0;
1735     auto *PtrMI = MRI.getVRegDef(PtrReg);
1736
1737     // Try to fold a GEP into our unsigned immediate addressing mode.
1738     if (PtrMI->getOpcode() == TargetOpcode::G_GEP) {
1739       if (auto COff = getConstantVRegVal(PtrMI->getOperand(2).getReg(), MRI)) {
1740         int64_t Imm = *COff;
1741         const unsigned Size = MemSizeInBits / 8;
1742         const unsigned Scale = Log2_32(Size);
1743         if ((Imm & (Size - 1)) == 0 && Imm >= 0 && Imm < (0x1000 << Scale)) {
1744           unsigned Ptr2Reg = PtrMI->getOperand(1).getReg();
1745           I.getOperand(1).setReg(Ptr2Reg);
1746           PtrMI = MRI.getVRegDef(Ptr2Reg);
1747           Offset = Imm / Size;
1748         }
1749       }
1750     }
1751
1752     // If we haven't folded anything into our addressing mode yet, try to fold
1753     // a frame index into the base+offset.
1754     if (!Offset && PtrMI->getOpcode() == TargetOpcode::G_FRAME_INDEX)
1755       I.getOperand(1).ChangeToFrameIndex(PtrMI->getOperand(1).getIndex());
1756
1757     I.addOperand(MachineOperand::CreateImm(Offset));
1758
1759     // If we're storing a 0, use WZR/XZR.
1760     if (auto CVal = getConstantVRegVal(ValReg, MRI)) {
1761       if (*CVal == 0 && Opcode == TargetOpcode::G_STORE) {
1762         if (I.getOpcode() == AArch64::STRWui)
1763           I.getOperand(0).setReg(AArch64::WZR);
1764         else if (I.getOpcode() == AArch64::STRXui)
1765           I.getOperand(0).setReg(AArch64::XZR);
1766       }
1767     }
1768
1769     if (IsZExtLoad) {
1770       // The zextload from a smaller type to i32 should be handled by the importer.
1771       if (MRI.getType(ValReg).getSizeInBits() != 64)
1772         return false;
1773       // If we have a ZEXTLOAD then change the load's type to be a narrower reg
1774       //and zero_extend with SUBREG_TO_REG.
1775       Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
1776       Register DstReg = I.getOperand(0).getReg();
1777       I.getOperand(0).setReg(LdReg);
1778
1779       MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
1780       MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
1781           .addImm(0)
1782           .addUse(LdReg)
1783           .addImm(AArch64::sub_32);
1784       constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1785       return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
1786                                           MRI);
1787     }
1788     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1789   }
1790
1791   case TargetOpcode::G_SMULH:
1792   case TargetOpcode::G_UMULH: {
1793     // Reject the various things we don't support yet.
1794     if (unsupportedBinOp(I, RBI, MRI, TRI))
1795       return false;
1796
1797     const Register DefReg = I.getOperand(0).getReg();
1798     const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
1799
1800     if (RB.getID() != AArch64::GPRRegBankID) {
1801       LLVM_DEBUG(dbgs() << "G_[SU]MULH on bank: " << RB << ", expected: GPR\n");
1802       return false;
1803     }
1804
1805     if (Ty != LLT::scalar(64)) {
1806       LLVM_DEBUG(dbgs() << "G_[SU]MULH has type: " << Ty
1807                         << ", expected: " << LLT::scalar(64) << '\n');
1808       return false;
1809     }
1810
1811     unsigned NewOpc = I.getOpcode() == TargetOpcode::G_SMULH ? AArch64::SMULHrr
1812                                                              : AArch64::UMULHrr;
1813     I.setDesc(TII.get(NewOpc));
1814
1815     // Now that we selected an opcode, we need to constrain the register
1816     // operands to use appropriate classes.
1817     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1818   }
1819   case TargetOpcode::G_FADD:
1820   case TargetOpcode::G_FSUB:
1821   case TargetOpcode::G_FMUL:
1822   case TargetOpcode::G_FDIV:
1823
1824   case TargetOpcode::G_ASHR:
1825     if (MRI.getType(I.getOperand(0).getReg()).isVector())
1826       return selectVectorASHR(I, MRI);
1827     LLVM_FALLTHROUGH;
1828   case TargetOpcode::G_SHL:
1829     if (Opcode == TargetOpcode::G_SHL &&
1830         MRI.getType(I.getOperand(0).getReg()).isVector())
1831       return selectVectorSHL(I, MRI);
1832     LLVM_FALLTHROUGH;
1833   case TargetOpcode::G_OR:
1834   case TargetOpcode::G_LSHR: {
1835     // Reject the various things we don't support yet.
1836     if (unsupportedBinOp(I, RBI, MRI, TRI))
1837       return false;
1838
1839     const unsigned OpSize = Ty.getSizeInBits();
1840
1841     const Register DefReg = I.getOperand(0).getReg();
1842     const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
1843
1844     const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize);
1845     if (NewOpc == I.getOpcode())
1846       return false;
1847
1848     I.setDesc(TII.get(NewOpc));
1849     // FIXME: Should the type be always reset in setDesc?
1850
1851     // Now that we selected an opcode, we need to constrain the register
1852     // operands to use appropriate classes.
1853     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1854   }
1855
1856   case TargetOpcode::G_GEP: {
1857     MachineIRBuilder MIRBuilder(I);
1858     emitADD(I.getOperand(0).getReg(), I.getOperand(1), I.getOperand(2),
1859             MIRBuilder);
1860     I.eraseFromParent();
1861     return true;
1862   }
1863   case TargetOpcode::G_UADDO: {
1864     // TODO: Support other types.
1865     unsigned OpSize = Ty.getSizeInBits();
1866     if (OpSize != 32 && OpSize != 64) {
1867       LLVM_DEBUG(
1868           dbgs()
1869           << "G_UADDO currently only supported for 32 and 64 b types.\n");
1870       return false;
1871     }
1872
1873     // TODO: Support vectors.
1874     if (Ty.isVector()) {
1875       LLVM_DEBUG(dbgs() << "G_UADDO currently only supported for scalars.\n");
1876       return false;
1877     }
1878
1879     // Add and set the set condition flag.
1880     unsigned AddsOpc = OpSize == 32 ? AArch64::ADDSWrr : AArch64::ADDSXrr;
1881     MachineIRBuilder MIRBuilder(I);
1882     auto AddsMI = MIRBuilder.buildInstr(
1883         AddsOpc, {I.getOperand(0).getReg()},
1884         {I.getOperand(2).getReg(), I.getOperand(3).getReg()});
1885     constrainSelectedInstRegOperands(*AddsMI, TII, TRI, RBI);
1886
1887     // Now, put the overflow result in the register given by the first operand
1888     // to the G_UADDO. CSINC increments the result when the predicate is false,
1889     // so to get the increment when it's true, we need to use the inverse. In
1890     // this case, we want to increment when carry is set.
1891     auto CsetMI = MIRBuilder
1892                       .buildInstr(AArch64::CSINCWr, {I.getOperand(1).getReg()},
1893                                   {Register(AArch64::WZR), Register(AArch64::WZR)})
1894                       .addImm(getInvertedCondCode(AArch64CC::HS));
1895     constrainSelectedInstRegOperands(*CsetMI, TII, TRI, RBI);
1896     I.eraseFromParent();
1897     return true;
1898   }
1899
1900   case TargetOpcode::G_PTR_MASK: {
1901     uint64_t Align = I.getOperand(2).getImm();
1902     if (Align >= 64 || Align == 0)
1903       return false;
1904
1905     uint64_t Mask = ~((1ULL << Align) - 1);
1906     I.setDesc(TII.get(AArch64::ANDXri));
1907     I.getOperand(2).setImm(AArch64_AM::encodeLogicalImmediate(Mask, 64));
1908
1909     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1910   }
1911   case TargetOpcode::G_PTRTOINT:
1912   case TargetOpcode::G_TRUNC: {
1913     const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1914     const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
1915
1916     const Register DstReg = I.getOperand(0).getReg();
1917     const Register SrcReg = I.getOperand(1).getReg();
1918
1919     const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
1920     const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
1921
1922     if (DstRB.getID() != SrcRB.getID()) {
1923       LLVM_DEBUG(
1924           dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n");
1925       return false;
1926     }
1927
1928     if (DstRB.getID() == AArch64::GPRRegBankID) {
1929       const TargetRegisterClass *DstRC =
1930           getRegClassForTypeOnBank(DstTy, DstRB, RBI);
1931       if (!DstRC)
1932         return false;
1933
1934       const TargetRegisterClass *SrcRC =
1935           getRegClassForTypeOnBank(SrcTy, SrcRB, RBI);
1936       if (!SrcRC)
1937         return false;
1938
1939       if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
1940           !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
1941         LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n");
1942         return false;
1943       }
1944
1945       if (DstRC == SrcRC) {
1946         // Nothing to be done
1947       } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) &&
1948                  SrcTy == LLT::scalar(64)) {
1949         llvm_unreachable("TableGen can import this case");
1950         return false;
1951       } else if (DstRC == &AArch64::GPR32RegClass &&
1952                  SrcRC == &AArch64::GPR64RegClass) {
1953         I.getOperand(1).setSubReg(AArch64::sub_32);
1954       } else {
1955         LLVM_DEBUG(
1956             dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n");
1957         return false;
1958       }
1959
1960       I.setDesc(TII.get(TargetOpcode::COPY));
1961       return true;
1962     } else if (DstRB.getID() == AArch64::FPRRegBankID) {
1963       if (DstTy == LLT::vector(4, 16) && SrcTy == LLT::vector(4, 32)) {
1964         I.setDesc(TII.get(AArch64::XTNv4i16));
1965         constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1966         return true;
1967       }
1968
1969       if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) {
1970         MachineIRBuilder MIB(I);
1971         MachineInstr *Extract = emitExtractVectorElt(
1972             DstReg, DstRB, LLT::scalar(DstTy.getSizeInBits()), SrcReg, 0, MIB);
1973         if (!Extract)
1974           return false;
1975         I.eraseFromParent();
1976         return true;
1977       }
1978     }
1979
1980     return false;
1981   }
1982
1983   case TargetOpcode::G_ANYEXT: {
1984     const Register DstReg = I.getOperand(0).getReg();
1985     const Register SrcReg = I.getOperand(1).getReg();
1986
1987     const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI);
1988     if (RBDst.getID() != AArch64::GPRRegBankID) {
1989       LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDst
1990                         << ", expected: GPR\n");
1991       return false;
1992     }
1993
1994     const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI);
1995     if (RBSrc.getID() != AArch64::GPRRegBankID) {
1996       LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrc
1997                         << ", expected: GPR\n");
1998       return false;
1999     }
2000
2001     const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
2002
2003     if (DstSize == 0) {
2004       LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n");
2005       return false;
2006     }
2007
2008     if (DstSize != 64 && DstSize > 32) {
2009       LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSize
2010                         << ", expected: 32 or 64\n");
2011       return false;
2012     }
2013     // At this point G_ANYEXT is just like a plain COPY, but we need
2014     // to explicitly form the 64-bit value if any.
2015     if (DstSize > 32) {
2016       Register ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
2017       BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
2018           .addDef(ExtSrc)
2019           .addImm(0)
2020           .addUse(SrcReg)
2021           .addImm(AArch64::sub_32);
2022       I.getOperand(1).setReg(ExtSrc);
2023     }
2024     return selectCopy(I, TII, MRI, TRI, RBI);
2025   }
2026
2027   case TargetOpcode::G_ZEXT:
2028   case TargetOpcode::G_SEXT: {
2029     unsigned Opcode = I.getOpcode();
2030     const bool IsSigned = Opcode == TargetOpcode::G_SEXT;
2031     const Register DefReg = I.getOperand(0).getReg();
2032     const Register SrcReg = I.getOperand(1).getReg();
2033     const LLT DstTy = MRI.getType(DefReg);
2034     const LLT SrcTy = MRI.getType(SrcReg);
2035     unsigned DstSize = DstTy.getSizeInBits();
2036     unsigned SrcSize = SrcTy.getSizeInBits();
2037
2038     assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() ==
2039                AArch64::GPRRegBankID &&
2040            "Unexpected ext regbank");
2041
2042     MachineIRBuilder MIB(I);
2043     MachineInstr *ExtI;
2044     if (DstTy.isVector())
2045       return false; // Should be handled by imported patterns.
2046
2047     if (DstSize == 64) {
2048       // FIXME: Can we avoid manually doing this?
2049       if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass, MRI)) {
2050         LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)
2051                           << " operand\n");
2052         return false;
2053       }
2054
2055       auto SubregToReg =
2056           MIB.buildInstr(AArch64::SUBREG_TO_REG, {&AArch64::GPR64RegClass}, {})
2057               .addImm(0)
2058               .addUse(SrcReg)
2059               .addImm(AArch64::sub_32);
2060
2061       ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri,
2062                              {DefReg}, {SubregToReg})
2063                   .addImm(0)
2064                   .addImm(SrcSize - 1);
2065     } else if (DstSize <= 32) {
2066       ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri,
2067                              {DefReg}, {SrcReg})
2068                   .addImm(0)
2069                   .addImm(SrcSize - 1);
2070     } else {
2071       return false;
2072     }
2073
2074     constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
2075     I.eraseFromParent();
2076     return true;
2077   }
2078
2079   case TargetOpcode::G_SITOFP:
2080   case TargetOpcode::G_UITOFP:
2081   case TargetOpcode::G_FPTOSI:
2082   case TargetOpcode::G_FPTOUI: {
2083     const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
2084               SrcTy = MRI.getType(I.getOperand(1).getReg());
2085     const unsigned NewOpc = selectFPConvOpc(Opcode, DstTy, SrcTy);
2086     if (NewOpc == Opcode)
2087       return false;
2088
2089     I.setDesc(TII.get(NewOpc));
2090     constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2091
2092     return true;
2093   }
2094
2095
2096   case TargetOpcode::G_INTTOPTR:
2097     // The importer is currently unable to import pointer types since they
2098     // didn't exist in SelectionDAG.
2099     return selectCopy(I, TII, MRI, TRI, RBI);
2100
2101   case TargetOpcode::G_BITCAST:
2102     // Imported SelectionDAG rules can handle every bitcast except those that
2103     // bitcast from a type to the same type. Ideally, these shouldn't occur
2104     // but we might not run an optimizer that deletes them. The other exception
2105     // is bitcasts involving pointer types, as SelectionDAG has no knowledge
2106     // of them.
2107     return selectCopy(I, TII, MRI, TRI, RBI);
2108
2109   case TargetOpcode::G_SELECT: {
2110     if (MRI.getType(I.getOperand(1).getReg()) != LLT::scalar(1)) {
2111       LLVM_DEBUG(dbgs() << "G_SELECT cond has type: " << Ty
2112                         << ", expected: " << LLT::scalar(1) << '\n');
2113       return false;
2114     }
2115
2116     const Register CondReg = I.getOperand(1).getReg();
2117     const Register TReg = I.getOperand(2).getReg();
2118     const Register FReg = I.getOperand(3).getReg();
2119
2120     if (tryOptSelect(I))
2121       return true;
2122
2123     Register CSelOpc = selectSelectOpc(I, MRI, RBI);
2124     MachineInstr &TstMI =
2125         *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ANDSWri))
2126              .addDef(AArch64::WZR)
2127              .addUse(CondReg)
2128              .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
2129
2130     MachineInstr &CSelMI = *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CSelOpc))
2131                                 .addDef(I.getOperand(0).getReg())
2132                                 .addUse(TReg)
2133                                 .addUse(FReg)
2134                                 .addImm(AArch64CC::NE);
2135
2136     constrainSelectedInstRegOperands(TstMI, TII, TRI, RBI);
2137     constrainSelectedInstRegOperands(CSelMI, TII, TRI, RBI);
2138
2139     I.eraseFromParent();
2140     return true;
2141   }
2142   case TargetOpcode::G_ICMP: {
2143     if (Ty.isVector())
2144       return selectVectorICmp(I, MRI);
2145
2146     if (Ty != LLT::scalar(32)) {
2147       LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Ty
2148                         << ", expected: " << LLT::scalar(32) << '\n');
2149       return false;
2150     }
2151
2152     MachineIRBuilder MIRBuilder(I);
2153     if (!emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1),
2154                             MIRBuilder))
2155       return false;
2156     emitCSetForICMP(I.getOperand(0).getReg(), I.getOperand(1).getPredicate(),
2157                     MIRBuilder);
2158     I.eraseFromParent();
2159     return true;
2160   }
2161
2162   case TargetOpcode::G_FCMP: {
2163     if (Ty != LLT::scalar(32)) {
2164       LLVM_DEBUG(dbgs() << "G_FCMP result has type: " << Ty
2165                         << ", expected: " << LLT::scalar(32) << '\n');
2166       return false;
2167     }
2168
2169     unsigned CmpOpc = selectFCMPOpc(I, MRI);
2170     if (!CmpOpc)
2171       return false;
2172
2173     // FIXME: regbank
2174
2175     AArch64CC::CondCode CC1, CC2;
2176     changeFCMPPredToAArch64CC(
2177         (CmpInst::Predicate)I.getOperand(1).getPredicate(), CC1, CC2);
2178
2179     // Partially build the compare. Decide if we need to add a use for the
2180     // third operand based off whether or not we're comparing against 0.0.
2181     auto CmpMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(CmpOpc))
2182                      .addUse(I.getOperand(2).getReg());
2183
2184     // If we don't have an immediate compare, then we need to add a use of the
2185     // register which wasn't used for the immediate.
2186     // Note that the immediate will always be the last operand.
2187     if (CmpOpc != AArch64::FCMPSri && CmpOpc != AArch64::FCMPDri)
2188       CmpMI = CmpMI.addUse(I.getOperand(3).getReg());
2189
2190     const Register DefReg = I.getOperand(0).getReg();
2191     Register Def1Reg = DefReg;
2192     if (CC2 != AArch64CC::AL)
2193       Def1Reg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2194
2195     MachineInstr &CSetMI =
2196         *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr))
2197              .addDef(Def1Reg)
2198              .addUse(AArch64::WZR)
2199              .addUse(AArch64::WZR)
2200              .addImm(getInvertedCondCode(CC1));
2201
2202     if (CC2 != AArch64CC::AL) {
2203       Register Def2Reg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2204       MachineInstr &CSet2MI =
2205           *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr))
2206                .addDef(Def2Reg)
2207                .addUse(AArch64::WZR)
2208                .addUse(AArch64::WZR)
2209                .addImm(getInvertedCondCode(CC2));
2210       MachineInstr &OrMI =
2211           *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ORRWrr))
2212                .addDef(DefReg)
2213                .addUse(Def1Reg)
2214                .addUse(Def2Reg);
2215       constrainSelectedInstRegOperands(OrMI, TII, TRI, RBI);
2216       constrainSelectedInstRegOperands(CSet2MI, TII, TRI, RBI);
2217     }
2218     constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
2219     constrainSelectedInstRegOperands(CSetMI, TII, TRI, RBI);
2220
2221     I.eraseFromParent();
2222     return true;
2223   }
2224   case TargetOpcode::G_VASTART:
2225     return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
2226                                 : selectVaStartAAPCS(I, MF, MRI);
2227   case TargetOpcode::G_INTRINSIC:
2228     return selectIntrinsic(I, MRI);
2229   case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
2230     return selectIntrinsicWithSideEffects(I, MRI);
2231   case TargetOpcode::G_IMPLICIT_DEF: {
2232     I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
2233     const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2234     const Register DstReg = I.getOperand(0).getReg();
2235     const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2236     const TargetRegisterClass *DstRC =
2237         getRegClassForTypeOnBank(DstTy, DstRB, RBI);
2238     RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
2239     return true;
2240   }
2241   case TargetOpcode::G_BLOCK_ADDR: {
2242     if (TM.getCodeModel() == CodeModel::Large) {
2243       materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0);
2244       I.eraseFromParent();
2245       return true;
2246     } else {
2247       I.setDesc(TII.get(AArch64::MOVaddrBA));
2248       auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA),
2249                            I.getOperand(0).getReg())
2250                        .addBlockAddress(I.getOperand(1).getBlockAddress(),
2251                                         /* Offset */ 0, AArch64II::MO_PAGE)
2252                        .addBlockAddress(
2253                            I.getOperand(1).getBlockAddress(), /* Offset */ 0,
2254                            AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
2255       I.eraseFromParent();
2256       return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
2257     }
2258   }
2259   case TargetOpcode::G_INTRINSIC_TRUNC:
2260     return selectIntrinsicTrunc(I, MRI);
2261   case TargetOpcode::G_INTRINSIC_ROUND:
2262     return selectIntrinsicRound(I, MRI);
2263   case TargetOpcode::G_BUILD_VECTOR:
2264     return selectBuildVector(I, MRI);
2265   case TargetOpcode::G_MERGE_VALUES:
2266     return selectMergeValues(I, MRI);
2267   case TargetOpcode::G_UNMERGE_VALUES:
2268     return selectUnmergeValues(I, MRI);
2269   case TargetOpcode::G_SHUFFLE_VECTOR:
2270     return selectShuffleVector(I, MRI);
2271   case TargetOpcode::G_EXTRACT_VECTOR_ELT:
2272     return selectExtractElt(I, MRI);
2273   case TargetOpcode::G_INSERT_VECTOR_ELT:
2274     return selectInsertElt(I, MRI);
2275   case TargetOpcode::G_CONCAT_VECTORS:
2276     return selectConcatVectors(I, MRI);
2277   case TargetOpcode::G_JUMP_TABLE:
2278     return selectJumpTable(I, MRI);
2279   }
2280
2281   return false;
2282 }
2283
2284 bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
2285                                             MachineRegisterInfo &MRI) const {
2286   assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT");
2287   Register JTAddr = I.getOperand(0).getReg();
2288   unsigned JTI = I.getOperand(1).getIndex();
2289   Register Index = I.getOperand(2).getReg();
2290   MachineIRBuilder MIB(I);
2291
2292   Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2293   Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
2294   MIB.buildInstr(AArch64::JumpTableDest32, {TargetReg, ScratchReg},
2295                  {JTAddr, Index})
2296       .addJumpTableIndex(JTI);
2297
2298   // Build the indirect branch.
2299   MIB.buildInstr(AArch64::BR, {}, {TargetReg});
2300   I.eraseFromParent();
2301   return true;
2302 }
2303
2304 bool AArch64InstructionSelector::selectJumpTable(
2305     MachineInstr &I, MachineRegisterInfo &MRI) const {
2306   assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table");
2307   assert(I.getOperand(1).isJTI() && "Jump table op should have a JTI!");
2308
2309   Register DstReg = I.getOperand(0).getReg();
2310   unsigned JTI = I.getOperand(1).getIndex();
2311   // We generate a MOVaddrJT which will get expanded to an ADRP + ADD later.
2312   MachineIRBuilder MIB(I);
2313   auto MovMI =
2314     MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {})
2315           .addJumpTableIndex(JTI, AArch64II::MO_PAGE)
2316           .addJumpTableIndex(JTI, AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
2317   I.eraseFromParent();
2318   return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
2319 }
2320
2321 bool AArch64InstructionSelector::selectIntrinsicTrunc(
2322     MachineInstr &I, MachineRegisterInfo &MRI) const {
2323   const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
2324
2325   // Select the correct opcode.
2326   unsigned Opc = 0;
2327   if (!SrcTy.isVector()) {
2328     switch (SrcTy.getSizeInBits()) {
2329     default:
2330     case 16:
2331       Opc = AArch64::FRINTZHr;
2332       break;
2333     case 32:
2334       Opc = AArch64::FRINTZSr;
2335       break;
2336     case 64:
2337       Opc = AArch64::FRINTZDr;
2338       break;
2339     }
2340   } else {
2341     unsigned NumElts = SrcTy.getNumElements();
2342     switch (SrcTy.getElementType().getSizeInBits()) {
2343     default:
2344       break;
2345     case 16:
2346       if (NumElts == 4)
2347         Opc = AArch64::FRINTZv4f16;
2348       else if (NumElts == 8)
2349         Opc = AArch64::FRINTZv8f16;
2350       break;
2351     case 32:
2352       if (NumElts == 2)
2353         Opc = AArch64::FRINTZv2f32;
2354       else if (NumElts == 4)
2355         Opc = AArch64::FRINTZv4f32;
2356       break;
2357     case 64:
2358       if (NumElts == 2)
2359         Opc = AArch64::FRINTZv2f64;
2360       break;
2361     }
2362   }
2363
2364   if (!Opc) {
2365     // Didn't get an opcode above, bail.
2366     LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_TRUNC!\n");
2367     return false;
2368   }
2369
2370   // Legalization would have set us up perfectly for this; we just need to
2371   // set the opcode and move on.
2372   I.setDesc(TII.get(Opc));
2373   return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2374 }
2375
2376 bool AArch64InstructionSelector::selectIntrinsicRound(
2377     MachineInstr &I, MachineRegisterInfo &MRI) const {
2378   const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
2379
2380   // Select the correct opcode.
2381   unsigned Opc = 0;
2382   if (!SrcTy.isVector()) {
2383     switch (SrcTy.getSizeInBits()) {
2384     default:
2385     case 16:
2386       Opc = AArch64::FRINTAHr;
2387       break;
2388     case 32:
2389       Opc = AArch64::FRINTASr;
2390       break;
2391     case 64:
2392       Opc = AArch64::FRINTADr;
2393       break;
2394     }
2395   } else {
2396     unsigned NumElts = SrcTy.getNumElements();
2397     switch (SrcTy.getElementType().getSizeInBits()) {
2398     default:
2399       break;
2400     case 16:
2401       if (NumElts == 4)
2402         Opc = AArch64::FRINTAv4f16;
2403       else if (NumElts == 8)
2404         Opc = AArch64::FRINTAv8f16;
2405       break;
2406     case 32:
2407       if (NumElts == 2)
2408         Opc = AArch64::FRINTAv2f32;
2409       else if (NumElts == 4)
2410         Opc = AArch64::FRINTAv4f32;
2411       break;
2412     case 64:
2413       if (NumElts == 2)
2414         Opc = AArch64::FRINTAv2f64;
2415       break;
2416     }
2417   }
2418
2419   if (!Opc) {
2420     // Didn't get an opcode above, bail.
2421     LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_ROUND!\n");
2422     return false;
2423   }
2424
2425   // Legalization would have set us up perfectly for this; we just need to
2426   // set the opcode and move on.
2427   I.setDesc(TII.get(Opc));
2428   return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2429 }
2430
2431 bool AArch64InstructionSelector::selectVectorICmp(
2432     MachineInstr &I, MachineRegisterInfo &MRI) const {
2433   Register DstReg = I.getOperand(0).getReg();
2434   LLT DstTy = MRI.getType(DstReg);
2435   Register SrcReg = I.getOperand(2).getReg();
2436   Register Src2Reg = I.getOperand(3).getReg();
2437   LLT SrcTy = MRI.getType(SrcReg);
2438
2439   unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
2440   unsigned NumElts = DstTy.getNumElements();
2441
2442   // First index is element size, 0 == 8b, 1 == 16b, 2 == 32b, 3 == 64b
2443   // Second index is num elts, 0 == v2, 1 == v4, 2 == v8, 3 == v16
2444   // Third index is cc opcode:
2445   // 0 == eq
2446   // 1 == ugt
2447   // 2 == uge
2448   // 3 == ult
2449   // 4 == ule
2450   // 5 == sgt
2451   // 6 == sge
2452   // 7 == slt
2453   // 8 == sle
2454   // ne is done by negating 'eq' result.
2455
2456   // This table below assumes that for some comparisons the operands will be
2457   // commuted.
2458   // ult op == commute + ugt op
2459   // ule op == commute + uge op
2460   // slt op == commute + sgt op
2461   // sle op == commute + sge op
2462   unsigned PredIdx = 0;
2463   bool SwapOperands = false;
2464   CmpInst::Predicate Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate();
2465   switch (Pred) {
2466   case CmpInst::ICMP_NE:
2467   case CmpInst::ICMP_EQ:
2468     PredIdx = 0;
2469     break;
2470   case CmpInst::ICMP_UGT:
2471     PredIdx = 1;
2472     break;
2473   case CmpInst::ICMP_UGE:
2474     PredIdx = 2;
2475     break;
2476   case CmpInst::ICMP_ULT:
2477     PredIdx = 3;
2478     SwapOperands = true;
2479     break;
2480   case CmpInst::ICMP_ULE:
2481     PredIdx = 4;
2482     SwapOperands = true;
2483     break;
2484   case CmpInst::ICMP_SGT:
2485     PredIdx = 5;
2486     break;
2487   case CmpInst::ICMP_SGE:
2488     PredIdx = 6;
2489     break;
2490   case CmpInst::ICMP_SLT:
2491     PredIdx = 7;
2492     SwapOperands = true;
2493     break;
2494   case CmpInst::ICMP_SLE:
2495     PredIdx = 8;
2496     SwapOperands = true;
2497     break;
2498   default:
2499     llvm_unreachable("Unhandled icmp predicate");
2500     return false;
2501   }
2502
2503   // This table obviously should be tablegen'd when we have our GISel native
2504   // tablegen selector.
2505
2506   static const unsigned OpcTable[4][4][9] = {
2507       {
2508           {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2509            0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2510            0 /* invalid */},
2511           {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2512            0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2513            0 /* invalid */},
2514           {AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8,
2515            AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8,
2516            AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8},
2517           {AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8,
2518            AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8,
2519            AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8}
2520       },
2521       {
2522           {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2523            0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2524            0 /* invalid */},
2525           {AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16,
2526            AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16,
2527            AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16},
2528           {AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16,
2529            AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16,
2530            AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16},
2531           {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2532            0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2533            0 /* invalid */}
2534       },
2535       {
2536           {AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32,
2537            AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32,
2538            AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32},
2539           {AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32,
2540            AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32,
2541            AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32},
2542           {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2543            0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2544            0 /* invalid */},
2545           {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2546            0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2547            0 /* invalid */}
2548       },
2549       {
2550           {AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64,
2551            AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64,
2552            AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64},
2553           {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2554            0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2555            0 /* invalid */},
2556           {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2557            0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2558            0 /* invalid */},
2559           {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2560            0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2561            0 /* invalid */}
2562       },
2563   };
2564   unsigned EltIdx = Log2_32(SrcEltSize / 8);
2565   unsigned NumEltsIdx = Log2_32(NumElts / 2);
2566   unsigned Opc = OpcTable[EltIdx][NumEltsIdx][PredIdx];
2567   if (!Opc) {
2568     LLVM_DEBUG(dbgs() << "Could not map G_ICMP to cmp opcode");
2569     return false;
2570   }
2571
2572   const RegisterBank &VecRB = *RBI.getRegBank(SrcReg, MRI, TRI);
2573   const TargetRegisterClass *SrcRC =
2574       getRegClassForTypeOnBank(SrcTy, VecRB, RBI, true);
2575   if (!SrcRC) {
2576     LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
2577     return false;
2578   }
2579
2580   unsigned NotOpc = Pred == ICmpInst::ICMP_NE ? AArch64::NOTv8i8 : 0;
2581   if (SrcTy.getSizeInBits() == 128)
2582     NotOpc = NotOpc ? AArch64::NOTv16i8 : 0;
2583
2584   if (SwapOperands)
2585     std::swap(SrcReg, Src2Reg);
2586
2587   MachineIRBuilder MIB(I);
2588   auto Cmp = MIB.buildInstr(Opc, {SrcRC}, {SrcReg, Src2Reg});
2589   constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
2590
2591   // Invert if we had a 'ne' cc.
2592   if (NotOpc) {
2593     Cmp = MIB.buildInstr(NotOpc, {DstReg}, {Cmp});
2594     constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
2595   } else {
2596     MIB.buildCopy(DstReg, Cmp.getReg(0));
2597   }
2598   RBI.constrainGenericRegister(DstReg, *SrcRC, MRI);
2599   I.eraseFromParent();
2600   return true;
2601 }
2602
2603 MachineInstr *AArch64InstructionSelector::emitScalarToVector(
2604     unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar,
2605     MachineIRBuilder &MIRBuilder) const {
2606   auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
2607
2608   auto BuildFn = [&](unsigned SubregIndex) {
2609     auto Ins =
2610         MIRBuilder
2611             .buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar})
2612             .addImm(SubregIndex);
2613     constrainSelectedInstRegOperands(*Undef, TII, TRI, RBI);
2614     constrainSelectedInstRegOperands(*Ins, TII, TRI, RBI);
2615     return &*Ins;
2616   };
2617
2618   switch (EltSize) {
2619   case 16:
2620     return BuildFn(AArch64::hsub);
2621   case 32:
2622     return BuildFn(AArch64::ssub);
2623   case 64:
2624     return BuildFn(AArch64::dsub);
2625   default:
2626     return nullptr;
2627   }
2628 }
2629
2630 bool AArch64InstructionSelector::selectMergeValues(
2631     MachineInstr &I, MachineRegisterInfo &MRI) const {
2632   assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode");
2633   const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2634   const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
2635   assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation");
2636   const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
2637
2638   if (I.getNumOperands() != 3)
2639     return false;
2640
2641   // Merging 2 s64s into an s128.
2642   if (DstTy == LLT::scalar(128)) {
2643     if (SrcTy.getSizeInBits() != 64)
2644       return false;
2645     MachineIRBuilder MIB(I);
2646     Register DstReg = I.getOperand(0).getReg();
2647     Register Src1Reg = I.getOperand(1).getReg();
2648     Register Src2Reg = I.getOperand(2).getReg();
2649     auto Tmp = MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {});
2650     MachineInstr *InsMI =
2651         emitLaneInsert(None, Tmp.getReg(0), Src1Reg, /* LaneIdx */ 0, RB, MIB);
2652     if (!InsMI)
2653       return false;
2654     MachineInstr *Ins2MI = emitLaneInsert(DstReg, InsMI->getOperand(0).getReg(),
2655                                           Src2Reg, /* LaneIdx */ 1, RB, MIB);
2656     if (!Ins2MI)
2657       return false;
2658     constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
2659     constrainSelectedInstRegOperands(*Ins2MI, TII, TRI, RBI);
2660     I.eraseFromParent();
2661     return true;
2662   }
2663
2664   if (RB.getID() != AArch64::GPRRegBankID)
2665     return false;
2666
2667   if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32)
2668     return false;
2669
2670   auto *DstRC = &AArch64::GPR64RegClass;
2671   Register SubToRegDef = MRI.createVirtualRegister(DstRC);
2672   MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
2673                                     TII.get(TargetOpcode::SUBREG_TO_REG))
2674                                 .addDef(SubToRegDef)
2675                                 .addImm(0)
2676                                 .addUse(I.getOperand(1).getReg())
2677                                 .addImm(AArch64::sub_32);
2678   Register SubToRegDef2 = MRI.createVirtualRegister(DstRC);
2679   // Need to anyext the second scalar before we can use bfm
2680   MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
2681                                     TII.get(TargetOpcode::SUBREG_TO_REG))
2682                                 .addDef(SubToRegDef2)
2683                                 .addImm(0)
2684                                 .addUse(I.getOperand(2).getReg())
2685                                 .addImm(AArch64::sub_32);
2686   MachineInstr &BFM =
2687       *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri))
2688            .addDef(I.getOperand(0).getReg())
2689            .addUse(SubToRegDef)
2690            .addUse(SubToRegDef2)
2691            .addImm(32)
2692            .addImm(31);
2693   constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI);
2694   constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI);
2695   constrainSelectedInstRegOperands(BFM, TII, TRI, RBI);
2696   I.eraseFromParent();
2697   return true;
2698 }
2699
2700 static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
2701                               const unsigned EltSize) {
2702   // Choose a lane copy opcode and subregister based off of the size of the
2703   // vector's elements.
2704   switch (EltSize) {
2705   case 16:
2706     CopyOpc = AArch64::CPYi16;
2707     ExtractSubReg = AArch64::hsub;
2708     break;
2709   case 32:
2710     CopyOpc = AArch64::CPYi32;
2711     ExtractSubReg = AArch64::ssub;
2712     break;
2713   case 64:
2714     CopyOpc = AArch64::CPYi64;
2715     ExtractSubReg = AArch64::dsub;
2716     break;
2717   default:
2718     // Unknown size, bail out.
2719     LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n");
2720     return false;
2721   }
2722   return true;
2723 }
2724
2725 MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
2726     Optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy,
2727     Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const {
2728   MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
2729   unsigned CopyOpc = 0;
2730   unsigned ExtractSubReg = 0;
2731   if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) {
2732     LLVM_DEBUG(
2733         dbgs() << "Couldn't determine lane copy opcode for instruction.\n");
2734     return nullptr;
2735   }
2736
2737   const TargetRegisterClass *DstRC =
2738       getRegClassForTypeOnBank(ScalarTy, DstRB, RBI, true);
2739   if (!DstRC) {
2740     LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n");
2741     return nullptr;
2742   }
2743
2744   const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI);
2745   const LLT &VecTy = MRI.getType(VecReg);
2746   const TargetRegisterClass *VecRC =
2747       getRegClassForTypeOnBank(VecTy, VecRB, RBI, true);
2748   if (!VecRC) {
2749     LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
2750     return nullptr;
2751   }
2752
2753   // The register that we're going to copy into.
2754   Register InsertReg = VecReg;
2755   if (!DstReg)
2756     DstReg = MRI.createVirtualRegister(DstRC);
2757   // If the lane index is 0, we just use a subregister COPY.
2758   if (LaneIdx == 0) {
2759     auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {})
2760                     .addReg(VecReg, 0, ExtractSubReg);
2761     RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
2762     return &*Copy;
2763   }
2764
2765   // Lane copies require 128-bit wide registers. If we're dealing with an
2766   // unpacked vector, then we need to move up to that width. Insert an implicit
2767   // def and a subregister insert to get us there.
2768   if (VecTy.getSizeInBits() != 128) {
2769     MachineInstr *ScalarToVector = emitScalarToVector(
2770         VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
2771     if (!ScalarToVector)
2772       return nullptr;
2773     InsertReg = ScalarToVector->getOperand(0).getReg();
2774   }
2775
2776   MachineInstr *LaneCopyMI =
2777       MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
2778   constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI);
2779
2780   // Make sure that we actually constrain the initial copy.
2781   RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
2782   return LaneCopyMI;
2783 }
2784
2785 bool AArch64InstructionSelector::selectExtractElt(
2786     MachineInstr &I, MachineRegisterInfo &MRI) const {
2787   assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
2788          "unexpected opcode!");
2789   Register DstReg = I.getOperand(0).getReg();
2790   const LLT NarrowTy = MRI.getType(DstReg);
2791   const Register SrcReg = I.getOperand(1).getReg();
2792   const LLT WideTy = MRI.getType(SrcReg);
2793   (void)WideTy;
2794   assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&
2795          "source register size too small!");
2796   assert(NarrowTy.isScalar() && "cannot extract vector into vector!");
2797
2798   // Need the lane index to determine the correct copy opcode.
2799   MachineOperand &LaneIdxOp = I.getOperand(2);
2800   assert(LaneIdxOp.isReg() && "Lane index operand was not a register?");
2801
2802   if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
2803     LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n");
2804     return false;
2805   }
2806
2807   // Find the index to extract from.
2808   auto VRegAndVal = getConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI);
2809   if (!VRegAndVal)
2810     return false;
2811   unsigned LaneIdx = VRegAndVal->Value;
2812
2813   MachineIRBuilder MIRBuilder(I);
2814
2815   const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2816   MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
2817                                                LaneIdx, MIRBuilder);
2818   if (!Extract)
2819     return false;
2820
2821   I.eraseFromParent();
2822   return true;
2823 }
2824
2825 bool AArch64InstructionSelector::selectSplitVectorUnmerge(
2826     MachineInstr &I, MachineRegisterInfo &MRI) const {
2827   unsigned NumElts = I.getNumOperands() - 1;
2828   Register SrcReg = I.getOperand(NumElts).getReg();
2829   const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
2830   const LLT SrcTy = MRI.getType(SrcReg);
2831
2832   assert(NarrowTy.isVector() && "Expected an unmerge into vectors");
2833   if (SrcTy.getSizeInBits() > 128) {
2834     LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge");
2835     return false;
2836   }
2837
2838   MachineIRBuilder MIB(I);
2839
2840   // We implement a split vector operation by treating the sub-vectors as
2841   // scalars and extracting them.
2842   const RegisterBank &DstRB =
2843       *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI);
2844   for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
2845     Register Dst = I.getOperand(OpIdx).getReg();
2846     MachineInstr *Extract =
2847         emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
2848     if (!Extract)
2849       return false;
2850   }
2851   I.eraseFromParent();
2852   return true;
2853 }
2854
2855 bool AArch64InstructionSelector::selectUnmergeValues(
2856     MachineInstr &I, MachineRegisterInfo &MRI) const {
2857   assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2858          "unexpected opcode");
2859
2860   // TODO: Handle unmerging into GPRs and from scalars to scalars.
2861   if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
2862           AArch64::FPRRegBankID ||
2863       RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
2864           AArch64::FPRRegBankID) {
2865     LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
2866                          "currently unsupported.\n");
2867     return false;
2868   }
2869
2870   // The last operand is the vector source register, and every other operand is
2871   // a register to unpack into.
2872   unsigned NumElts = I.getNumOperands() - 1;
2873   Register SrcReg = I.getOperand(NumElts).getReg();
2874   const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
2875   const LLT WideTy = MRI.getType(SrcReg);
2876   (void)WideTy;
2877   assert(WideTy.isVector() && "can only unmerge from vector types!");
2878   assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&
2879          "source register size too small!");
2880
2881   if (!NarrowTy.isScalar())
2882     return selectSplitVectorUnmerge(I, MRI);
2883
2884   MachineIRBuilder MIB(I);
2885
2886   // Choose a lane copy opcode and subregister based off of the size of the
2887   // vector's elements.
2888   unsigned CopyOpc = 0;
2889   unsigned ExtractSubReg = 0;
2890   if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits()))
2891     return false;
2892
2893   // Set up for the lane copies.
2894   MachineBasicBlock &MBB = *I.getParent();
2895
2896   // Stores the registers we'll be copying from.
2897   SmallVector<Register, 4> InsertRegs;
2898
2899   // We'll use the first register twice, so we only need NumElts-1 registers.
2900   unsigned NumInsertRegs = NumElts - 1;
2901
2902   // If our elements fit into exactly 128 bits, then we can copy from the source
2903   // directly. Otherwise, we need to do a bit of setup with some subregister
2904   // inserts.
2905   if (NarrowTy.getSizeInBits() * NumElts == 128) {
2906     InsertRegs = SmallVector<Register, 4>(NumInsertRegs, SrcReg);
2907   } else {
2908     // No. We have to perform subregister inserts. For each insert, create an
2909     // implicit def and a subregister insert, and save the register we create.
2910     for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) {
2911       Register ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
2912       MachineInstr &ImpDefMI =
2913           *BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF),
2914                    ImpDefReg);
2915
2916       // Now, create the subregister insert from SrcReg.
2917       Register InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
2918       MachineInstr &InsMI =
2919           *BuildMI(MBB, I, I.getDebugLoc(),
2920                    TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
2921                .addUse(ImpDefReg)
2922                .addUse(SrcReg)
2923                .addImm(AArch64::dsub);
2924
2925       constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);
2926       constrainSelectedInstRegOperands(InsMI, TII, TRI, RBI);
2927
2928       // Save the register so that we can copy from it after.
2929       InsertRegs.push_back(InsertReg);
2930     }
2931   }
2932
2933   // Now that we've created any necessary subregister inserts, we can
2934   // create the copies.
2935   //
2936   // Perform the first copy separately as a subregister copy.
2937   Register CopyTo = I.getOperand(0).getReg();
2938   auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {})
2939                        .addReg(InsertRegs[0], 0, ExtractSubReg);
2940   constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI);
2941
2942   // Now, perform the remaining copies as vector lane copies.
2943   unsigned LaneIdx = 1;
2944   for (Register InsReg : InsertRegs) {
2945     Register CopyTo = I.getOperand(LaneIdx).getReg();
2946     MachineInstr &CopyInst =
2947         *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo)
2948              .addUse(InsReg)
2949              .addImm(LaneIdx);
2950     constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI);
2951     ++LaneIdx;
2952   }
2953
2954   // Separately constrain the first copy's destination. Because of the
2955   // limitation in constrainOperandRegClass, we can't guarantee that this will
2956   // actually be constrained. So, do it ourselves using the second operand.
2957   const TargetRegisterClass *RC =
2958       MRI.getRegClassOrNull(I.getOperand(1).getReg());
2959   if (!RC) {
2960     LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n");
2961     return false;
2962   }
2963
2964   RBI.constrainGenericRegister(CopyTo, *RC, MRI);
2965   I.eraseFromParent();
2966   return true;
2967 }
2968
2969 bool AArch64InstructionSelector::selectConcatVectors(
2970     MachineInstr &I, MachineRegisterInfo &MRI) const {
2971   assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
2972          "Unexpected opcode");
2973   Register Dst = I.getOperand(0).getReg();
2974   Register Op1 = I.getOperand(1).getReg();
2975   Register Op2 = I.getOperand(2).getReg();
2976   MachineIRBuilder MIRBuilder(I);
2977   MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIRBuilder);
2978   if (!ConcatMI)
2979     return false;
2980   I.eraseFromParent();
2981   return true;
2982 }
2983
2984 void AArch64InstructionSelector::collectShuffleMaskIndices(
2985     MachineInstr &I, MachineRegisterInfo &MRI,
2986     SmallVectorImpl<Optional<int>> &Idxs) const {
2987   MachineInstr *MaskDef = MRI.getVRegDef(I.getOperand(3).getReg());
2988   assert(
2989       MaskDef->getOpcode() == TargetOpcode::G_BUILD_VECTOR &&
2990       "G_SHUFFLE_VECTOR should have a constant mask operand as G_BUILD_VECTOR");
2991   // Find the constant indices.
2992   for (unsigned i = 1, e = MaskDef->getNumOperands(); i < e; ++i) {
2993     // Look through copies.
2994     MachineInstr *ScalarDef =
2995         getDefIgnoringCopies(MaskDef->getOperand(i).getReg(), MRI);
2996     assert(ScalarDef && "Could not find vreg def of shufflevec index op");
2997     if (ScalarDef->getOpcode() != TargetOpcode::G_CONSTANT) {
2998       // This be an undef if not a constant.
2999       assert(ScalarDef->getOpcode() == TargetOpcode::G_IMPLICIT_DEF);
3000       Idxs.push_back(None);
3001     } else {
3002       Idxs.push_back(ScalarDef->getOperand(1).getCImm()->getSExtValue());
3003     }
3004   }
3005 }
3006
3007 unsigned
3008 AArch64InstructionSelector::emitConstantPoolEntry(Constant *CPVal,
3009                                                   MachineFunction &MF) const {
3010   Type *CPTy = CPVal->getType();
3011   unsigned Align = MF.getDataLayout().getPrefTypeAlignment(CPTy);
3012   if (Align == 0)
3013     Align = MF.getDataLayout().getTypeAllocSize(CPTy);
3014
3015   MachineConstantPool *MCP = MF.getConstantPool();
3016   return MCP->getConstantPoolIndex(CPVal, Align);
3017 }
3018
3019 MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
3020     Constant *CPVal, MachineIRBuilder &MIRBuilder) const {
3021   unsigned CPIdx = emitConstantPoolEntry(CPVal, MIRBuilder.getMF());
3022
3023   auto Adrp =
3024       MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
3025           .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
3026
3027   MachineInstr *LoadMI = nullptr;
3028   switch (MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType())) {
3029   case 16:
3030     LoadMI =
3031         &*MIRBuilder
3032               .buildInstr(AArch64::LDRQui, {&AArch64::FPR128RegClass}, {Adrp})
3033               .addConstantPoolIndex(CPIdx, 0,
3034                                     AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
3035     break;
3036   case 8:
3037     LoadMI = &*MIRBuilder
3038                  .buildInstr(AArch64::LDRDui, {&AArch64::FPR64RegClass}, {Adrp})
3039                  .addConstantPoolIndex(
3040                      CPIdx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
3041     break;
3042   default:
3043     LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "
3044                       << *CPVal->getType());
3045     return nullptr;
3046   }
3047   constrainSelectedInstRegOperands(*Adrp, TII, TRI, RBI);
3048   constrainSelectedInstRegOperands(*LoadMI, TII, TRI, RBI);
3049   return LoadMI;
3050 }
3051
3052 /// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given
3053 /// size and RB.
3054 static std::pair<unsigned, unsigned>
3055 getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
3056   unsigned Opc, SubregIdx;
3057   if (RB.getID() == AArch64::GPRRegBankID) {
3058     if (EltSize == 32) {
3059       Opc = AArch64::INSvi32gpr;
3060       SubregIdx = AArch64::ssub;
3061     } else if (EltSize == 64) {
3062       Opc = AArch64::INSvi64gpr;
3063       SubregIdx = AArch64::dsub;
3064     } else {
3065       llvm_unreachable("invalid elt size!");
3066     }
3067   } else {
3068     if (EltSize == 8) {
3069       Opc = AArch64::INSvi8lane;
3070       SubregIdx = AArch64::bsub;
3071     } else if (EltSize == 16) {
3072       Opc = AArch64::INSvi16lane;
3073       SubregIdx = AArch64::hsub;
3074     } else if (EltSize == 32) {
3075       Opc = AArch64::INSvi32lane;
3076       SubregIdx = AArch64::ssub;
3077     } else if (EltSize == 64) {
3078       Opc = AArch64::INSvi64lane;
3079       SubregIdx = AArch64::dsub;
3080     } else {
3081       llvm_unreachable("invalid elt size!");
3082     }
3083   }
3084   return std::make_pair(Opc, SubregIdx);
3085 }
3086
3087 MachineInstr *
3088 AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS,
3089                                     MachineOperand &RHS,
3090                                     MachineIRBuilder &MIRBuilder) const {
3091   assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
3092   MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
3093   static const unsigned OpcTable[2][2]{{AArch64::ADDXrr, AArch64::ADDXri},
3094                                        {AArch64::ADDWrr, AArch64::ADDWri}};
3095   bool Is32Bit = MRI.getType(LHS.getReg()).getSizeInBits() == 32;
3096   auto ImmFns = selectArithImmed(RHS);
3097   unsigned Opc = OpcTable[Is32Bit][ImmFns.hasValue()];
3098   auto AddMI = MIRBuilder.buildInstr(Opc, {DefReg}, {LHS.getReg()});
3099
3100   // If we matched a valid constant immediate, add those operands.
3101   if (ImmFns) {
3102     for (auto &RenderFn : *ImmFns)
3103       RenderFn(AddMI);
3104   } else {
3105     AddMI.addUse(RHS.getReg());
3106   }
3107
3108   constrainSelectedInstRegOperands(*AddMI, TII, TRI, RBI);
3109   return &*AddMI;
3110 }
3111
3112 MachineInstr *
3113 AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS,
3114                                     MachineIRBuilder &MIRBuilder) const {
3115   assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
3116   MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
3117   static const unsigned OpcTable[2][2]{{AArch64::ADDSXrr, AArch64::ADDSXri},
3118                                        {AArch64::ADDSWrr, AArch64::ADDSWri}};
3119   bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32);
3120   auto ImmFns = selectArithImmed(RHS);
3121   unsigned Opc = OpcTable[Is32Bit][ImmFns.hasValue()];
3122   Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
3123
3124   auto CmpMI = MIRBuilder.buildInstr(Opc, {ZReg}, {LHS.getReg()});
3125
3126   // If we matched a valid constant immediate, add those operands.
3127   if (ImmFns) {
3128     for (auto &RenderFn : *ImmFns)
3129       RenderFn(CmpMI);
3130   } else {
3131     CmpMI.addUse(RHS.getReg());
3132   }
3133
3134   constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
3135   return &*CmpMI;
3136 }
3137
3138 MachineInstr *
3139 AArch64InstructionSelector::emitTST(const Register &LHS, const Register &RHS,
3140                                     MachineIRBuilder &MIRBuilder) const {
3141   MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
3142   unsigned RegSize = MRI.getType(LHS).getSizeInBits();
3143   bool Is32Bit = (RegSize == 32);
3144   static const unsigned OpcTable[2][2]{{AArch64::ANDSXrr, AArch64::ANDSXri},
3145                                        {AArch64::ANDSWrr, AArch64::ANDSWri}};
3146   Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
3147
3148   // We might be able to fold in an immediate into the TST. We need to make sure
3149   // it's a logical immediate though, since ANDS requires that.
3150   auto ValAndVReg = getConstantVRegValWithLookThrough(RHS, MRI);
3151   bool IsImmForm = ValAndVReg.hasValue() &&
3152                    AArch64_AM::isLogicalImmediate(ValAndVReg->Value, RegSize);
3153   unsigned Opc = OpcTable[Is32Bit][IsImmForm];
3154   auto TstMI = MIRBuilder.buildInstr(Opc, {ZReg}, {LHS});
3155
3156   if (IsImmForm)
3157     TstMI.addImm(
3158         AArch64_AM::encodeLogicalImmediate(ValAndVReg->Value, RegSize));
3159   else
3160     TstMI.addUse(RHS);
3161
3162   constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
3163   return &*TstMI;
3164 }
3165
3166 MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
3167     MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
3168     MachineIRBuilder &MIRBuilder) const {
3169   assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
3170   MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
3171
3172   // Fold the compare if possible.
3173   MachineInstr *FoldCmp =
3174       tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder);
3175   if (FoldCmp)
3176     return FoldCmp;
3177
3178   // Can't fold into a CMN. Just emit a normal compare.
3179   unsigned CmpOpc = 0;
3180   Register ZReg;
3181
3182   LLT CmpTy = MRI.getType(LHS.getReg());
3183   assert((CmpTy.isScalar() || CmpTy.isPointer()) &&
3184          "Expected scalar or pointer");
3185   if (CmpTy == LLT::scalar(32)) {
3186     CmpOpc = AArch64::SUBSWrr;
3187     ZReg = AArch64::WZR;
3188   } else if (CmpTy == LLT::scalar(64) || CmpTy.isPointer()) {
3189     CmpOpc = AArch64::SUBSXrr;
3190     ZReg = AArch64::XZR;
3191   } else {
3192     return nullptr;
3193   }
3194
3195   // Try to match immediate forms.
3196   auto ImmFns = selectArithImmed(RHS);
3197   if (ImmFns)
3198     CmpOpc = CmpOpc == AArch64::SUBSWrr ? AArch64::SUBSWri : AArch64::SUBSXri;
3199
3200   auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addDef(ZReg).addUse(LHS.getReg());
3201   // If we matched a valid constant immediate, add those operands.
3202   if (ImmFns) {
3203     for (auto &RenderFn : *ImmFns)
3204       RenderFn(CmpMI);
3205   } else {
3206     CmpMI.addUse(RHS.getReg());
3207   }
3208
3209   // Make sure that we can constrain the compare that we emitted.
3210   constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
3211   return &*CmpMI;
3212 }
3213
3214 MachineInstr *AArch64InstructionSelector::emitVectorConcat(
3215     Optional<Register> Dst, Register Op1, Register Op2,
3216     MachineIRBuilder &MIRBuilder) const {
3217   // We implement a vector concat by:
3218   // 1. Use scalar_to_vector to insert the lower vector into the larger dest
3219   // 2. Insert the upper vector into the destination's upper element
3220   // TODO: some of this code is common with G_BUILD_VECTOR handling.
3221   MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
3222
3223   const LLT Op1Ty = MRI.getType(Op1);
3224   const LLT Op2Ty = MRI.getType(Op2);
3225
3226   if (Op1Ty != Op2Ty) {
3227     LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys");
3228     return nullptr;
3229   }
3230   assert(Op1Ty.isVector() && "Expected a vector for vector concat");
3231
3232   if (Op1Ty.getSizeInBits() >= 128) {
3233     LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors");
3234     return nullptr;
3235   }
3236
3237   // At the moment we just support 64 bit vector concats.
3238   if (Op1Ty.getSizeInBits() != 64) {
3239     LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors");
3240     return nullptr;
3241   }
3242
3243   const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits());
3244   const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI);
3245   const TargetRegisterClass *DstRC =
3246       getMinClassForRegBank(FPRBank, Op1Ty.getSizeInBits() * 2);
3247
3248   MachineInstr *WidenedOp1 =
3249       emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder);
3250   MachineInstr *WidenedOp2 =
3251       emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder);
3252   if (!WidenedOp1 || !WidenedOp2) {
3253     LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value");
3254     return nullptr;
3255   }
3256
3257   // Now do the insert of the upper element.
3258   unsigned InsertOpc, InsSubRegIdx;
3259   std::tie(InsertOpc, InsSubRegIdx) =
3260       getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits());
3261
3262   if (!Dst)
3263     Dst = MRI.createVirtualRegister(DstRC);
3264   auto InsElt =
3265       MIRBuilder
3266           .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()})
3267           .addImm(1) /* Lane index */
3268           .addUse(WidenedOp2->getOperand(0).getReg())
3269           .addImm(0);
3270   constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
3271   return &*InsElt;
3272 }
3273
3274 MachineInstr *AArch64InstructionSelector::emitFMovForFConstant(
3275     MachineInstr &I, MachineRegisterInfo &MRI) const {
3276   assert(I.getOpcode() == TargetOpcode::G_FCONSTANT &&
3277          "Expected a G_FCONSTANT!");
3278   MachineOperand &ImmOp = I.getOperand(1);
3279   unsigned DefSize = MRI.getType(I.getOperand(0).getReg()).getSizeInBits();
3280
3281   // Only handle 32 and 64 bit defs for now.
3282   if (DefSize != 32 && DefSize != 64)
3283     return nullptr;
3284
3285   // Don't handle null values using FMOV.
3286   if (ImmOp.getFPImm()->isNullValue())
3287     return nullptr;
3288
3289   // Get the immediate representation for the FMOV.
3290   const APFloat &ImmValAPF = ImmOp.getFPImm()->getValueAPF();
3291   int Imm = DefSize == 32 ? AArch64_AM::getFP32Imm(ImmValAPF)
3292                           : AArch64_AM::getFP64Imm(ImmValAPF);
3293
3294   // If this is -1, it means the immediate can't be represented as the requested
3295   // floating point value. Bail.
3296   if (Imm == -1)
3297     return nullptr;
3298
3299   // Update MI to represent the new FMOV instruction, constrain it, and return.
3300   ImmOp.ChangeToImmediate(Imm);
3301   unsigned MovOpc = DefSize == 32 ? AArch64::FMOVSi : AArch64::FMOVDi;
3302   I.setDesc(TII.get(MovOpc));
3303   constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3304   return &I;
3305 }
3306
3307 MachineInstr *
3308 AArch64InstructionSelector::emitCSetForICMP(Register DefReg, unsigned Pred,
3309                                      MachineIRBuilder &MIRBuilder) const {
3310   // CSINC increments the result when the predicate is false. Invert it.
3311   const AArch64CC::CondCode InvCC = changeICMPPredToAArch64CC(
3312       CmpInst::getInversePredicate((CmpInst::Predicate)Pred));
3313   auto I =
3314       MIRBuilder
3315     .buildInstr(AArch64::CSINCWr, {DefReg}, {Register(AArch64::WZR), Register(AArch64::WZR)})
3316           .addImm(InvCC);
3317   constrainSelectedInstRegOperands(*I, TII, TRI, RBI);
3318   return &*I;
3319 }
3320
3321 bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) const {
3322   MachineIRBuilder MIB(I);
3323   MachineRegisterInfo &MRI = *MIB.getMRI();
3324   const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
3325
3326   // We want to recognize this pattern:
3327   //
3328   // $z = G_FCMP pred, $x, $y
3329   // ...
3330   // $w = G_SELECT $z, $a, $b
3331   //
3332   // Where the value of $z is *only* ever used by the G_SELECT (possibly with
3333   // some copies/truncs in between.)
3334   //
3335   // If we see this, then we can emit something like this:
3336   //
3337   // fcmp $x, $y
3338   // fcsel $w, $a, $b, pred
3339   //
3340   // Rather than emitting both of the rather long sequences in the standard
3341   // G_FCMP/G_SELECT select methods.
3342
3343   // First, check if the condition is defined by a compare.
3344   MachineInstr *CondDef = MRI.getVRegDef(I.getOperand(1).getReg());
3345   while (CondDef) {
3346     // We can only fold if all of the defs have one use.
3347     if (!MRI.hasOneUse(CondDef->getOperand(0).getReg()))
3348       return false;
3349
3350     // We can skip over G_TRUNC since the condition is 1-bit.
3351     // Truncating/extending can have no impact on the value.
3352     unsigned Opc = CondDef->getOpcode();
3353     if (Opc != TargetOpcode::COPY && Opc != TargetOpcode::G_TRUNC)
3354       break;
3355
3356     // Can't see past copies from physregs.
3357     if (Opc == TargetOpcode::COPY &&
3358         TargetRegisterInfo::isPhysicalRegister(CondDef->getOperand(1).getReg()))
3359       return false;
3360
3361     CondDef = MRI.getVRegDef(CondDef->getOperand(1).getReg());
3362   }
3363
3364   // Is the condition defined by a compare?
3365   if (!CondDef)
3366     return false;
3367
3368   unsigned CondOpc = CondDef->getOpcode();
3369   if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP)
3370     return false;
3371
3372   AArch64CC::CondCode CondCode;
3373   if (CondOpc == TargetOpcode::G_ICMP) {
3374     CondCode = changeICMPPredToAArch64CC(
3375         (CmpInst::Predicate)CondDef->getOperand(1).getPredicate());
3376     if (!emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3),
3377                             CondDef->getOperand(1), MIB)) {
3378       LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n");
3379       return false;
3380     }
3381   } else {
3382     // Get the condition code for the select.
3383     AArch64CC::CondCode CondCode2;
3384     changeFCMPPredToAArch64CC(
3385         (CmpInst::Predicate)CondDef->getOperand(1).getPredicate(), CondCode,
3386         CondCode2);
3387
3388     // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two
3389     // instructions to emit the comparison.
3390     // TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be
3391     // unnecessary.
3392     if (CondCode2 != AArch64CC::AL)
3393       return false;
3394
3395     // Make sure we'll be able to select the compare.
3396     unsigned CmpOpc = selectFCMPOpc(*CondDef, MRI);
3397     if (!CmpOpc)
3398       return false;
3399
3400     // Emit a new compare.
3401     auto Cmp = MIB.buildInstr(CmpOpc, {}, {CondDef->getOperand(2).getReg()});
3402     if (CmpOpc != AArch64::FCMPSri && CmpOpc != AArch64::FCMPDri)
3403       Cmp.addUse(CondDef->getOperand(3).getReg());
3404     constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
3405   }
3406
3407   // Emit the select.
3408   unsigned CSelOpc = selectSelectOpc(I, MRI, RBI);
3409   auto CSel =
3410       MIB.buildInstr(CSelOpc, {I.getOperand(0).getReg()},
3411                      {I.getOperand(2).getReg(), I.getOperand(3).getReg()})
3412           .addImm(CondCode);
3413   constrainSelectedInstRegOperands(*CSel, TII, TRI, RBI);
3414   I.eraseFromParent();
3415   return true;
3416 }
3417
3418 MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
3419     MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
3420     MachineIRBuilder &MIRBuilder) const {
3421   assert(LHS.isReg() && RHS.isReg() && Predicate.isPredicate() &&
3422          "Unexpected MachineOperand");
3423   MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3424   // We want to find this sort of thing:
3425   // x = G_SUB 0, y
3426   // G_ICMP z, x
3427   //
3428   // In this case, we can fold the G_SUB into the G_ICMP using a CMN instead.
3429   // e.g:
3430   //
3431   // cmn z, y
3432
3433   // Helper lambda to detect the subtract followed by the compare.
3434   // Takes in the def of the LHS or RHS, and checks if it's a subtract from 0.
3435   auto IsCMN = [&](MachineInstr *DefMI, const AArch64CC::CondCode &CC) {
3436     if (!DefMI || DefMI->getOpcode() != TargetOpcode::G_SUB)
3437       return false;
3438
3439     // Need to make sure NZCV is the same at the end of the transformation.
3440     if (CC != AArch64CC::EQ && CC != AArch64CC::NE)
3441       return false;
3442
3443     // We want to match against SUBs.
3444     if (DefMI->getOpcode() != TargetOpcode::G_SUB)
3445       return false;
3446
3447     // Make sure that we're getting
3448     // x = G_SUB 0, y
3449     auto ValAndVReg =
3450         getConstantVRegValWithLookThrough(DefMI->getOperand(1).getReg(), MRI);
3451     if (!ValAndVReg || ValAndVReg->Value != 0)
3452       return false;
3453
3454     // This can safely be represented as a CMN.
3455     return true;
3456   };
3457
3458   // Check if the RHS or LHS of the G_ICMP is defined by a SUB
3459   MachineInstr *LHSDef = getDefIgnoringCopies(LHS.getReg(), MRI);
3460   MachineInstr *RHSDef = getDefIgnoringCopies(RHS.getReg(), MRI);
3461   CmpInst::Predicate P = (CmpInst::Predicate)Predicate.getPredicate();
3462   const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(P);
3463
3464   // Given this:
3465   //
3466   // x = G_SUB 0, y
3467   // G_ICMP x, z
3468   //
3469   // Produce this:
3470   //
3471   // cmn y, z
3472   if (IsCMN(LHSDef, CC))
3473     return emitCMN(LHSDef->getOperand(2), RHS, MIRBuilder);
3474
3475   // Same idea here, but with the RHS of the compare instead:
3476   //
3477   // Given this:
3478   //
3479   // x = G_SUB 0, y
3480   // G_ICMP z, x
3481   //
3482   // Produce this:
3483   //
3484   // cmn z, y
3485   if (IsCMN(RHSDef, CC))
3486     return emitCMN(LHS, RHSDef->getOperand(2), MIRBuilder);
3487
3488   // Given this:
3489   //
3490   // z = G_AND x, y
3491   // G_ICMP z, 0
3492   //
3493   // Produce this if the compare is signed:
3494   //
3495   // tst x, y
3496   if (!isUnsignedICMPPred(P) && LHSDef &&
3497       LHSDef->getOpcode() == TargetOpcode::G_AND) {
3498     // Make sure that the RHS is 0.
3499     auto ValAndVReg = getConstantVRegValWithLookThrough(RHS.getReg(), MRI);
3500     if (!ValAndVReg || ValAndVReg->Value != 0)
3501       return nullptr;
3502
3503     return emitTST(LHSDef->getOperand(1).getReg(),
3504                    LHSDef->getOperand(2).getReg(), MIRBuilder);
3505   }
3506
3507   return nullptr;
3508 }
3509
3510 bool AArch64InstructionSelector::tryOptVectorDup(MachineInstr &I) const {
3511   // Try to match a vector splat operation into a dup instruction.
3512   // We're looking for this pattern:
3513   //    %scalar:gpr(s64) = COPY $x0
3514   //    %undef:fpr(<2 x s64>) = G_IMPLICIT_DEF
3515   //    %cst0:gpr(s32) = G_CONSTANT i32 0
3516   //    %zerovec:fpr(<2 x s32>) = G_BUILD_VECTOR %cst0(s32), %cst0(s32)
3517   //    %ins:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT %undef, %scalar(s64), %cst0(s32)
3518   //    %splat:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %ins(<2 x s64>), %undef,
3519   //                                             %zerovec(<2 x s32>)
3520   //
3521   // ...into:
3522   // %splat = DUP %scalar
3523   // We use the regbank of the scalar to determine which kind of dup to use.
3524   MachineIRBuilder MIB(I);
3525   MachineRegisterInfo &MRI = *MIB.getMRI();
3526   const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
3527   using namespace TargetOpcode;
3528   using namespace MIPatternMatch;
3529
3530   // Begin matching the insert.
3531   auto *InsMI =
3532       getOpcodeDef(G_INSERT_VECTOR_ELT, I.getOperand(1).getReg(), MRI);
3533   if (!InsMI)
3534     return false;
3535   // Match the undef vector operand.
3536   auto *UndefMI =
3537       getOpcodeDef(G_IMPLICIT_DEF, InsMI->getOperand(1).getReg(), MRI);
3538   if (!UndefMI)
3539     return false;
3540   // Match the scalar being splatted.
3541   Register ScalarReg = InsMI->getOperand(2).getReg();
3542   const RegisterBank *ScalarRB = RBI.getRegBank(ScalarReg, MRI, TRI);
3543   // Match the index constant 0.
3544   int64_t Index = 0;
3545   if (!mi_match(InsMI->getOperand(3).getReg(), MRI, m_ICst(Index)) || Index)
3546     return false;
3547
3548   // The shuffle's second operand doesn't matter if the mask is all zero.
3549   auto *ZeroVec = getOpcodeDef(G_BUILD_VECTOR, I.getOperand(3).getReg(), MRI);
3550   if (!ZeroVec)
3551     return false;
3552   int64_t Zero = 0;
3553   if (!mi_match(ZeroVec->getOperand(1).getReg(), MRI, m_ICst(Zero)) || Zero)
3554     return false;
3555   for (unsigned i = 1, e = ZeroVec->getNumOperands(); i < e; ++i) {
3556     if (ZeroVec->getOperand(i).getReg() != ZeroVec->getOperand(1).getReg())
3557       return false; // This wasn't an all zeros vector.
3558   }
3559
3560   // We're done, now find out what kind of splat we need.
3561   LLT VecTy = MRI.getType(I.getOperand(0).getReg());
3562   LLT EltTy = VecTy.getElementType();
3563   if (VecTy.getSizeInBits() != 128 || EltTy.getSizeInBits() < 32) {
3564     LLVM_DEBUG(dbgs() << "Could not optimize splat pattern < 128b yet");
3565     return false;
3566   }
3567   bool IsFP = ScalarRB->getID() == AArch64::FPRRegBankID;
3568   static const unsigned OpcTable[2][2] = {
3569       {AArch64::DUPv4i32gpr, AArch64::DUPv2i64gpr},
3570       {AArch64::DUPv4i32lane, AArch64::DUPv2i64lane}};
3571   unsigned Opc = OpcTable[IsFP][EltTy.getSizeInBits() == 64];
3572
3573   // For FP splats, we need to widen the scalar reg via undef too.
3574   if (IsFP) {
3575     MachineInstr *Widen = emitScalarToVector(
3576         EltTy.getSizeInBits(), &AArch64::FPR128RegClass, ScalarReg, MIB);
3577     if (!Widen)
3578       return false;
3579     ScalarReg = Widen->getOperand(0).getReg();
3580   }
3581   auto Dup = MIB.buildInstr(Opc, {I.getOperand(0).getReg()}, {ScalarReg});
3582   if (IsFP)
3583     Dup.addImm(0);
3584   constrainSelectedInstRegOperands(*Dup, TII, TRI, RBI);
3585   I.eraseFromParent();
3586   return true;
3587 }
3588
3589 bool AArch64InstructionSelector::tryOptVectorShuffle(MachineInstr &I) const {
3590   if (TM.getOptLevel() == CodeGenOpt::None)
3591     return false;
3592   if (tryOptVectorDup(I))
3593     return true;
3594   return false;
3595 }
3596
3597 bool AArch64InstructionSelector::selectShuffleVector(
3598     MachineInstr &I, MachineRegisterInfo &MRI) const {
3599   if (tryOptVectorShuffle(I))
3600     return true;
3601   const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3602   Register Src1Reg = I.getOperand(1).getReg();
3603   const LLT Src1Ty = MRI.getType(Src1Reg);
3604   Register Src2Reg = I.getOperand(2).getReg();
3605   const LLT Src2Ty = MRI.getType(Src2Reg);
3606
3607   MachineBasicBlock &MBB = *I.getParent();
3608   MachineFunction &MF = *MBB.getParent();
3609   LLVMContext &Ctx = MF.getFunction().getContext();
3610
3611   // G_SHUFFLE_VECTOR doesn't really have a strictly enforced constant mask
3612   // operand, it comes in as a normal vector value which we have to analyze to
3613   // find the mask indices. If the mask element is undef, then
3614   // collectShuffleMaskIndices() will add a None entry for that index into
3615   // the list.
3616   SmallVector<Optional<int>, 8> Mask;
3617   collectShuffleMaskIndices(I, MRI, Mask);
3618   assert(!Mask.empty() && "Expected to find mask indices");
3619
3620   // G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if
3621   // it's originated from a <1 x T> type. Those should have been lowered into
3622   // G_BUILD_VECTOR earlier.
3623   if (!Src1Ty.isVector() || !Src2Ty.isVector()) {
3624     LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n");
3625     return false;
3626   }
3627
3628   unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8;
3629
3630   SmallVector<Constant *, 64> CstIdxs;
3631   for (auto &MaybeVal : Mask) {
3632     // For now, any undef indexes we'll just assume to be 0. This should be
3633     // optimized in future, e.g. to select DUP etc.
3634     int Val = MaybeVal.hasValue() ? *MaybeVal : 0;
3635     for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
3636       unsigned Offset = Byte + Val * BytesPerElt;
3637       CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset));
3638     }
3639   }
3640
3641   MachineIRBuilder MIRBuilder(I);
3642
3643   // Use a constant pool to load the index vector for TBL.
3644   Constant *CPVal = ConstantVector::get(CstIdxs);
3645   MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIRBuilder);
3646   if (!IndexLoad) {
3647     LLVM_DEBUG(dbgs() << "Could not load from a constant pool");
3648     return false;
3649   }
3650
3651   if (DstTy.getSizeInBits() != 128) {
3652     assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty");
3653     // This case can be done with TBL1.
3654     MachineInstr *Concat = emitVectorConcat(None, Src1Reg, Src2Reg, MIRBuilder);
3655     if (!Concat) {
3656       LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1");
3657       return false;
3658     }
3659
3660     // The constant pool load will be 64 bits, so need to convert to FPR128 reg.
3661     IndexLoad =
3662         emitScalarToVector(64, &AArch64::FPR128RegClass,
3663                            IndexLoad->getOperand(0).getReg(), MIRBuilder);
3664
3665     auto TBL1 = MIRBuilder.buildInstr(
3666         AArch64::TBLv16i8One, {&AArch64::FPR128RegClass},
3667         {Concat->getOperand(0).getReg(), IndexLoad->getOperand(0).getReg()});
3668     constrainSelectedInstRegOperands(*TBL1, TII, TRI, RBI);
3669
3670     auto Copy =
3671         MIRBuilder
3672             .buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
3673             .addReg(TBL1.getReg(0), 0, AArch64::dsub);
3674     RBI.constrainGenericRegister(Copy.getReg(0), AArch64::FPR64RegClass, MRI);
3675     I.eraseFromParent();
3676     return true;
3677   }
3678
3679   // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive
3680   // Q registers for regalloc.
3681   auto RegSeq = MIRBuilder
3682                     .buildInstr(TargetOpcode::REG_SEQUENCE,
3683                                 {&AArch64::QQRegClass}, {Src1Reg})
3684                     .addImm(AArch64::qsub0)
3685                     .addUse(Src2Reg)
3686                     .addImm(AArch64::qsub1);
3687
3688   auto TBL2 =
3689       MIRBuilder.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0).getReg()},
3690                             {RegSeq, IndexLoad->getOperand(0).getReg()});
3691   constrainSelectedInstRegOperands(*RegSeq, TII, TRI, RBI);
3692   constrainSelectedInstRegOperands(*TBL2, TII, TRI, RBI);
3693   I.eraseFromParent();
3694   return true;
3695 }
3696
3697 MachineInstr *AArch64InstructionSelector::emitLaneInsert(
3698     Optional<Register> DstReg, Register SrcReg, Register EltReg,
3699     unsigned LaneIdx, const RegisterBank &RB,
3700     MachineIRBuilder &MIRBuilder) const {
3701   MachineInstr *InsElt = nullptr;
3702   const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
3703   MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3704
3705   // Create a register to define with the insert if one wasn't passed in.
3706   if (!DstReg)
3707     DstReg = MRI.createVirtualRegister(DstRC);
3708
3709   unsigned EltSize = MRI.getType(EltReg).getSizeInBits();
3710   unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first;
3711
3712   if (RB.getID() == AArch64::FPRRegBankID) {
3713     auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder);
3714     InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
3715                  .addImm(LaneIdx)
3716                  .addUse(InsSub->getOperand(0).getReg())
3717                  .addImm(0);
3718   } else {
3719     InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
3720                  .addImm(LaneIdx)
3721                  .addUse(EltReg);
3722   }
3723
3724   constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
3725   return InsElt;
3726 }
3727
3728 bool AArch64InstructionSelector::selectInsertElt(
3729     MachineInstr &I, MachineRegisterInfo &MRI) const {
3730   assert(I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT);
3731
3732   // Get information on the destination.
3733   Register DstReg = I.getOperand(0).getReg();
3734   const LLT DstTy = MRI.getType(DstReg);
3735   unsigned VecSize = DstTy.getSizeInBits();
3736
3737   // Get information on the element we want to insert into the destination.
3738   Register EltReg = I.getOperand(2).getReg();
3739   const LLT EltTy = MRI.getType(EltReg);
3740   unsigned EltSize = EltTy.getSizeInBits();
3741   if (EltSize < 16 || EltSize > 64)
3742     return false; // Don't support all element types yet.
3743
3744   // Find the definition of the index. Bail out if it's not defined by a
3745   // G_CONSTANT.
3746   Register IdxReg = I.getOperand(3).getReg();
3747   auto VRegAndVal = getConstantVRegValWithLookThrough(IdxReg, MRI);
3748   if (!VRegAndVal)
3749     return false;
3750   unsigned LaneIdx = VRegAndVal->Value;
3751
3752   // Perform the lane insert.
3753   Register SrcReg = I.getOperand(1).getReg();
3754   const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
3755   MachineIRBuilder MIRBuilder(I);
3756
3757   if (VecSize < 128) {
3758     // If the vector we're inserting into is smaller than 128 bits, widen it
3759     // to 128 to do the insert.
3760     MachineInstr *ScalarToVec = emitScalarToVector(
3761         VecSize, &AArch64::FPR128RegClass, SrcReg, MIRBuilder);
3762     if (!ScalarToVec)
3763       return false;
3764     SrcReg = ScalarToVec->getOperand(0).getReg();
3765   }
3766
3767   // Create an insert into a new FPR128 register.
3768   // Note that if our vector is already 128 bits, we end up emitting an extra
3769   // register.
3770   MachineInstr *InsMI =
3771       emitLaneInsert(None, SrcReg, EltReg, LaneIdx, EltRB, MIRBuilder);
3772
3773   if (VecSize < 128) {
3774     // If we had to widen to perform the insert, then we have to demote back to
3775     // the original size to get the result we want.
3776     Register DemoteVec = InsMI->getOperand(0).getReg();
3777     const TargetRegisterClass *RC =
3778         getMinClassForRegBank(*RBI.getRegBank(DemoteVec, MRI, TRI), VecSize);
3779     if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
3780       LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
3781       return false;
3782     }
3783     unsigned SubReg = 0;
3784     if (!getSubRegForClass(RC, TRI, SubReg))
3785       return false;
3786     if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
3787       LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << VecSize
3788                         << "\n");
3789       return false;
3790     }
3791     MIRBuilder.buildInstr(TargetOpcode::COPY, {DstReg}, {})
3792         .addReg(DemoteVec, 0, SubReg);
3793     RBI.constrainGenericRegister(DstReg, *RC, MRI);
3794   } else {
3795     // No widening needed.
3796     InsMI->getOperand(0).setReg(DstReg);
3797     constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
3798   }
3799
3800   I.eraseFromParent();
3801   return true;
3802 }
3803
3804 bool AArch64InstructionSelector::selectBuildVector(
3805     MachineInstr &I, MachineRegisterInfo &MRI) const {
3806   assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
3807   // Until we port more of the optimized selections, for now just use a vector
3808   // insert sequence.
3809   const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3810   const LLT EltTy = MRI.getType(I.getOperand(1).getReg());
3811   unsigned EltSize = EltTy.getSizeInBits();
3812   if (EltSize < 16 || EltSize > 64)
3813     return false; // Don't support all element types yet.
3814   const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
3815   MachineIRBuilder MIRBuilder(I);
3816
3817   const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
3818   MachineInstr *ScalarToVec =
3819       emitScalarToVector(DstTy.getElementType().getSizeInBits(), DstRC,
3820                          I.getOperand(1).getReg(), MIRBuilder);
3821   if (!ScalarToVec)
3822     return false;
3823
3824   Register DstVec = ScalarToVec->getOperand(0).getReg();
3825   unsigned DstSize = DstTy.getSizeInBits();
3826
3827   // Keep track of the last MI we inserted. Later on, we might be able to save
3828   // a copy using it.
3829   MachineInstr *PrevMI = nullptr;
3830   for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) {
3831     // Note that if we don't do a subregister copy, we can end up making an
3832     // extra register.
3833     PrevMI = &*emitLaneInsert(None, DstVec, I.getOperand(i).getReg(), i - 1, RB,
3834                               MIRBuilder);
3835     DstVec = PrevMI->getOperand(0).getReg();
3836   }
3837
3838   // If DstTy's size in bits is less than 128, then emit a subregister copy
3839   // from DstVec to the last register we've defined.
3840   if (DstSize < 128) {
3841     // Force this to be FPR using the destination vector.
3842     const TargetRegisterClass *RC =
3843         getMinClassForRegBank(*RBI.getRegBank(DstVec, MRI, TRI), DstSize);
3844     if (!RC)
3845       return false;
3846     if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
3847       LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
3848       return false;
3849     }
3850
3851     unsigned SubReg = 0;
3852     if (!getSubRegForClass(RC, TRI, SubReg))
3853       return false;
3854     if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
3855       LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSize
3856                         << "\n");
3857       return false;
3858     }
3859
3860     Register Reg = MRI.createVirtualRegister(RC);
3861     Register DstReg = I.getOperand(0).getReg();
3862
3863     MIRBuilder.buildInstr(TargetOpcode::COPY, {DstReg}, {})
3864         .addReg(DstVec, 0, SubReg);
3865     MachineOperand &RegOp = I.getOperand(1);
3866     RegOp.setReg(Reg);
3867     RBI.constrainGenericRegister(DstReg, *RC, MRI);
3868   } else {
3869     // We don't need a subregister copy. Save a copy by re-using the
3870     // destination register on the final insert.
3871     assert(PrevMI && "PrevMI was null?");
3872     PrevMI->getOperand(0).setReg(I.getOperand(0).getReg());
3873     constrainSelectedInstRegOperands(*PrevMI, TII, TRI, RBI);
3874   }
3875
3876   I.eraseFromParent();
3877   return true;
3878 }
3879
3880 /// Helper function to find an intrinsic ID on an a MachineInstr. Returns the
3881 /// ID if it exists, and 0 otherwise.
3882 static unsigned findIntrinsicID(MachineInstr &I) {
3883   auto IntrinOp = find_if(I.operands(), [&](const MachineOperand &Op) {
3884     return Op.isIntrinsicID();
3885   });
3886   if (IntrinOp == I.operands_end())
3887     return 0;
3888   return IntrinOp->getIntrinsicID();
3889 }
3890
3891 /// Helper function to emit the correct opcode for a llvm.aarch64.stlxr
3892 /// intrinsic.
3893 static unsigned getStlxrOpcode(unsigned NumBytesToStore) {
3894   switch (NumBytesToStore) {
3895   // TODO: 1 and 2 byte stores
3896   case 4:
3897     return AArch64::STLXRW;
3898   case 8:
3899     return AArch64::STLXRX;
3900   default:
3901     LLVM_DEBUG(dbgs() << "Unexpected number of bytes to store! ("
3902                       << NumBytesToStore << ")\n");
3903     break;
3904   }
3905   return 0;
3906 }
3907
3908 bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
3909     MachineInstr &I, MachineRegisterInfo &MRI) const {
3910   // Find the intrinsic ID.
3911   unsigned IntrinID = findIntrinsicID(I);
3912   if (!IntrinID)
3913     return false;
3914   MachineIRBuilder MIRBuilder(I);
3915
3916   // Select the instruction.
3917   switch (IntrinID) {
3918   default:
3919     return false;
3920   case Intrinsic::trap:
3921     MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(1);
3922     break;
3923   case Intrinsic::debugtrap:
3924     if (!STI.isTargetWindows())
3925       return false;
3926     MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(0xF000);
3927     break;
3928   case Intrinsic::aarch64_stlxr:
3929     Register StatReg = I.getOperand(0).getReg();
3930     assert(RBI.getSizeInBits(StatReg, MRI, TRI) == 32 &&
3931            "Status register must be 32 bits!");
3932     Register SrcReg = I.getOperand(2).getReg();
3933
3934     if (RBI.getSizeInBits(SrcReg, MRI, TRI) != 64) {
3935       LLVM_DEBUG(dbgs() << "Only support 64-bit sources right now.\n");
3936       return false;
3937     }
3938
3939     Register PtrReg = I.getOperand(3).getReg();
3940     assert(MRI.getType(PtrReg).isPointer() && "Expected pointer operand");
3941
3942     // Expect only one memory operand.
3943     if (!I.hasOneMemOperand())
3944       return false;
3945
3946     const MachineMemOperand *MemOp = *I.memoperands_begin();
3947     unsigned NumBytesToStore = MemOp->getSize();
3948     unsigned Opc = getStlxrOpcode(NumBytesToStore);
3949     if (!Opc)
3950       return false;
3951     unsigned NumBitsToStore = NumBytesToStore * 8;
3952     if (NumBitsToStore != 64) {
3953       // The intrinsic always has a 64-bit source, but we might actually want
3954       // a differently-sized source for the instruction. Try to get it.
3955       // TODO: For 1 and 2-byte stores, this will have a G_AND. For now, let's
3956       // just handle 4-byte stores.
3957       // TODO: If we don't find a G_ZEXT, we'll have to truncate the value down
3958       // to the right size for the STLXR.
3959       MachineInstr *Zext = getOpcodeDef(TargetOpcode::G_ZEXT, SrcReg, MRI);
3960       if (!Zext)
3961         return false;
3962       SrcReg = Zext->getOperand(1).getReg();
3963       // We should get an appropriately-sized register here.
3964       if (RBI.getSizeInBits(SrcReg, MRI, TRI) != NumBitsToStore)
3965         return false;
3966     }
3967     auto StoreMI = MIRBuilder.buildInstr(Opc, {StatReg}, {SrcReg, PtrReg})
3968                        .addMemOperand(*I.memoperands_begin());
3969     constrainSelectedInstRegOperands(*StoreMI, TII, TRI, RBI);
3970   }
3971
3972   I.eraseFromParent();
3973   return true;
3974 }
3975
3976 bool AArch64InstructionSelector::selectIntrinsic(
3977     MachineInstr &I, MachineRegisterInfo &MRI) const {
3978   unsigned IntrinID = findIntrinsicID(I);
3979   if (!IntrinID)
3980     return false;
3981   MachineIRBuilder MIRBuilder(I);
3982
3983   switch (IntrinID) {
3984   default:
3985     break;
3986   case Intrinsic::aarch64_crypto_sha1h:
3987     Register DstReg = I.getOperand(0).getReg();
3988     Register SrcReg = I.getOperand(2).getReg();
3989
3990     // FIXME: Should this be an assert?
3991     if (MRI.getType(DstReg).getSizeInBits() != 32 ||
3992         MRI.getType(SrcReg).getSizeInBits() != 32)
3993       return false;
3994
3995     // The operation has to happen on FPRs. Set up some new FPR registers for
3996     // the source and destination if they are on GPRs.
3997     if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
3998       SrcReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
3999       MIRBuilder.buildCopy({SrcReg}, {I.getOperand(2)});
4000
4001       // Make sure the copy ends up getting constrained properly.
4002       RBI.constrainGenericRegister(I.getOperand(2).getReg(),
4003                                    AArch64::GPR32RegClass, MRI);
4004     }
4005
4006     if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID)
4007       DstReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
4008
4009     // Actually insert the instruction.
4010     auto SHA1Inst = MIRBuilder.buildInstr(AArch64::SHA1Hrr, {DstReg}, {SrcReg});
4011     constrainSelectedInstRegOperands(*SHA1Inst, TII, TRI, RBI);
4012
4013     // Did we create a new register for the destination?
4014     if (DstReg != I.getOperand(0).getReg()) {
4015       // Yep. Copy the result of the instruction back into the original
4016       // destination.
4017       MIRBuilder.buildCopy({I.getOperand(0)}, {DstReg});
4018       RBI.constrainGenericRegister(I.getOperand(0).getReg(),
4019                                    AArch64::GPR32RegClass, MRI);
4020     }
4021
4022     I.eraseFromParent();
4023     return true;
4024   }
4025   return false;
4026 }
4027
4028 static Optional<uint64_t> getImmedFromMO(const MachineOperand &Root) {
4029   auto &MI = *Root.getParent();
4030   auto &MBB = *MI.getParent();
4031   auto &MF = *MBB.getParent();
4032   auto &MRI = MF.getRegInfo();
4033   uint64_t Immed;
4034   if (Root.isImm())
4035     Immed = Root.getImm();
4036   else if (Root.isCImm())
4037     Immed = Root.getCImm()->getZExtValue();
4038   else if (Root.isReg()) {
4039     auto ValAndVReg =
4040         getConstantVRegValWithLookThrough(Root.getReg(), MRI, true);
4041     if (!ValAndVReg)
4042       return None;
4043     Immed = ValAndVReg->Value;
4044   } else
4045     return None;
4046   return Immed;
4047 }
4048
4049 InstructionSelector::ComplexRendererFns
4050 AArch64InstructionSelector::selectShiftA_32(const MachineOperand &Root) const {
4051   auto MaybeImmed = getImmedFromMO(Root);
4052   if (MaybeImmed == None || *MaybeImmed > 31)
4053     return None;
4054   uint64_t Enc = (32 - *MaybeImmed) & 0x1f;
4055   return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
4056 }
4057
4058 InstructionSelector::ComplexRendererFns
4059 AArch64InstructionSelector::selectShiftB_32(const MachineOperand &Root) const {
4060   auto MaybeImmed = getImmedFromMO(Root);
4061   if (MaybeImmed == None || *MaybeImmed > 31)
4062     return None;
4063   uint64_t Enc = 31 - *MaybeImmed;
4064   return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
4065 }
4066
4067 InstructionSelector::ComplexRendererFns
4068 AArch64InstructionSelector::selectShiftA_64(const MachineOperand &Root) const {
4069   auto MaybeImmed = getImmedFromMO(Root);
4070   if (MaybeImmed == None || *MaybeImmed > 63)
4071     return None;
4072   uint64_t Enc = (64 - *MaybeImmed) & 0x3f;
4073   return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
4074 }
4075
4076 InstructionSelector::ComplexRendererFns
4077 AArch64InstructionSelector::selectShiftB_64(const MachineOperand &Root) const {
4078   auto MaybeImmed = getImmedFromMO(Root);
4079   if (MaybeImmed == None || *MaybeImmed > 63)
4080     return None;
4081   uint64_t Enc = 63 - *MaybeImmed;
4082   return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
4083 }
4084
4085 /// SelectArithImmed - Select an immediate value that can be represented as
4086 /// a 12-bit value shifted left by either 0 or 12.  If so, return true with
4087 /// Val set to the 12-bit value and Shift set to the shifter operand.
4088 InstructionSelector::ComplexRendererFns
4089 AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const {
4090   // This function is called from the addsub_shifted_imm ComplexPattern,
4091   // which lists [imm] as the list of opcode it's interested in, however
4092   // we still need to check whether the operand is actually an immediate
4093   // here because the ComplexPattern opcode list is only used in
4094   // root-level opcode matching.
4095   auto MaybeImmed = getImmedFromMO(Root);
4096   if (MaybeImmed == None)
4097     return None;
4098   uint64_t Immed = *MaybeImmed;
4099   unsigned ShiftAmt;
4100
4101   if (Immed >> 12 == 0) {
4102     ShiftAmt = 0;
4103   } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
4104     ShiftAmt = 12;
4105     Immed = Immed >> 12;
4106   } else
4107     return None;
4108
4109   unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
4110   return {{
4111       [=](MachineInstrBuilder &MIB) { MIB.addImm(Immed); },
4112       [=](MachineInstrBuilder &MIB) { MIB.addImm(ShVal); },
4113   }};
4114 }
4115
4116 /// Return true if it is worth folding MI into an extended register. That is,
4117 /// if it's safe to pull it into the addressing mode of a load or store as a
4118 /// shift.
4119 bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg(
4120     MachineInstr &MI, const MachineRegisterInfo &MRI) const {
4121   // Always fold if there is one use, or if we're optimizing for size.
4122   Register DefReg = MI.getOperand(0).getReg();
4123   if (MRI.hasOneUse(DefReg) ||
4124       MI.getParent()->getParent()->getFunction().hasMinSize())
4125     return true;
4126
4127   // It's better to avoid folding and recomputing shifts when we don't have a
4128   // fastpath.
4129   if (!STI.hasLSLFast())
4130     return false;
4131
4132   // We have a fastpath, so folding a shift in and potentially computing it
4133   // many times may be beneficial. Check if this is only used in memory ops.
4134   // If it is, then we should fold.
4135   return all_of(MRI.use_instructions(DefReg),
4136                 [](MachineInstr &Use) { return Use.mayLoadOrStore(); });
4137 }
4138
4139 /// This is used for computing addresses like this:
4140 ///
4141 /// ldr x1, [x2, x3, lsl #3]
4142 ///
4143 /// Where x2 is the base register, and x3 is an offset register. The shift-left
4144 /// is a constant value specific to this load instruction. That is, we'll never
4145 /// see anything other than a 3 here (which corresponds to the size of the
4146 /// element being loaded.)
4147 InstructionSelector::ComplexRendererFns
4148 AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
4149     MachineOperand &Root, unsigned SizeInBytes) const {
4150   if (!Root.isReg())
4151     return None;
4152   MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
4153
4154   // Make sure that the memory op is a valid size.
4155   int64_t LegalShiftVal = Log2_32(SizeInBytes);
4156   if (LegalShiftVal == 0)
4157     return None;
4158
4159   // We want to find something like this:
4160   //
4161   // val = G_CONSTANT LegalShiftVal
4162   // shift = G_SHL off_reg val
4163   // ptr = G_GEP base_reg shift
4164   // x = G_LOAD ptr
4165   //
4166   // And fold it into this addressing mode:
4167   //
4168   // ldr x, [base_reg, off_reg, lsl #LegalShiftVal]
4169
4170   // Check if we can find the G_GEP.
4171   MachineInstr *Gep = getOpcodeDef(TargetOpcode::G_GEP, Root.getReg(), MRI);
4172   if (!Gep || !isWorthFoldingIntoExtendedReg(*Gep, MRI))
4173     return None;
4174
4175   // Now, try to match an opcode which will match our specific offset.
4176   // We want a G_SHL or a G_MUL.
4177   MachineInstr *OffsetInst = getDefIgnoringCopies(Gep->getOperand(2).getReg(), MRI);
4178   if (!OffsetInst)
4179     return None;
4180
4181   unsigned OffsetOpc = OffsetInst->getOpcode();
4182   if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL)
4183     return None;
4184
4185   if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI))
4186     return None;
4187
4188   // Now, try to find the specific G_CONSTANT. Start by assuming that the
4189   // register we will offset is the LHS, and the register containing the
4190   // constant is the RHS.
4191   Register OffsetReg = OffsetInst->getOperand(1).getReg();
4192   Register ConstantReg = OffsetInst->getOperand(2).getReg();
4193   auto ValAndVReg = getConstantVRegValWithLookThrough(ConstantReg, MRI);
4194   if (!ValAndVReg) {
4195     // We didn't get a constant on the RHS. If the opcode is a shift, then
4196     // we're done.
4197     if (OffsetOpc == TargetOpcode::G_SHL)
4198       return None;
4199
4200     // If we have a G_MUL, we can use either register. Try looking at the RHS.
4201     std::swap(OffsetReg, ConstantReg);
4202     ValAndVReg = getConstantVRegValWithLookThrough(ConstantReg, MRI);
4203     if (!ValAndVReg)
4204       return None;
4205   }
4206
4207   // The value must fit into 3 bits, and must be positive. Make sure that is
4208   // true.
4209   int64_t ImmVal = ValAndVReg->Value;
4210
4211   // Since we're going to pull this into a shift, the constant value must be
4212   // a power of 2. If we got a multiply, then we need to check this.
4213   if (OffsetOpc == TargetOpcode::G_MUL) {
4214     if (!isPowerOf2_32(ImmVal))
4215       return None;
4216
4217     // Got a power of 2. So, the amount we'll shift is the log base-2 of that.
4218     ImmVal = Log2_32(ImmVal);
4219   }
4220
4221   if ((ImmVal & 0x7) != ImmVal)
4222     return None;
4223
4224   // We are only allowed to shift by LegalShiftVal. This shift value is built
4225   // into the instruction, so we can't just use whatever we want.
4226   if (ImmVal != LegalShiftVal)
4227     return None;
4228
4229   // We can use the LHS of the GEP as the base, and the LHS of the shift as an
4230   // offset. Signify that we are shifting by setting the shift flag to 1.
4231   return {{
4232       [=](MachineInstrBuilder &MIB) { MIB.add(Gep->getOperand(1)); },
4233       [=](MachineInstrBuilder &MIB) { MIB.addUse(OffsetReg); },
4234       [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
4235       [=](MachineInstrBuilder &MIB) { MIB.addImm(1); },
4236   }};
4237 }
4238
4239 /// This is used for computing addresses like this:
4240 ///
4241 /// ldr x1, [x2, x3]
4242 ///
4243 /// Where x2 is the base register, and x3 is an offset register.
4244 ///
4245 /// When possible (or profitable) to fold a G_GEP into the address calculation,
4246 /// this will do so. Otherwise, it will return None.
4247 InstructionSelector::ComplexRendererFns
4248 AArch64InstructionSelector::selectAddrModeRegisterOffset(
4249     MachineOperand &Root) const {
4250   MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
4251
4252   // We need a GEP.
4253   MachineInstr *Gep = MRI.getVRegDef(Root.getReg());
4254   if (!Gep || Gep->getOpcode() != TargetOpcode::G_GEP)
4255     return None;
4256
4257   // If this is used more than once, let's not bother folding.
4258   // TODO: Check if they are memory ops. If they are, then we can still fold
4259   // without having to recompute anything.
4260   if (!MRI.hasOneUse(Gep->getOperand(0).getReg()))
4261     return None;
4262
4263   // Base is the GEP's LHS, offset is its RHS.
4264   return {{
4265       [=](MachineInstrBuilder &MIB) { MIB.add(Gep->getOperand(1)); },
4266       [=](MachineInstrBuilder &MIB) { MIB.add(Gep->getOperand(2)); },
4267       [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
4268       [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
4269   }};
4270 }
4271
4272 /// This is intended to be equivalent to selectAddrModeXRO in
4273 /// AArch64ISelDAGtoDAG. It's used for selecting X register offset loads.
4274 InstructionSelector::ComplexRendererFns
4275 AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root,
4276                                               unsigned SizeInBytes) const {
4277   MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
4278
4279   // If we have a constant offset, then we probably don't want to match a
4280   // register offset.
4281   if (isBaseWithConstantOffset(Root, MRI))
4282     return None;
4283
4284   // Try to fold shifts into the addressing mode.
4285   auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes);
4286   if (AddrModeFns)
4287     return AddrModeFns;
4288
4289   // If that doesn't work, see if it's possible to fold in registers from
4290   // a GEP.
4291   return selectAddrModeRegisterOffset(Root);
4292 }
4293
4294 /// Select a "register plus unscaled signed 9-bit immediate" address.  This
4295 /// should only match when there is an offset that is not valid for a scaled
4296 /// immediate addressing mode.  The "Size" argument is the size in bytes of the
4297 /// memory reference, which is needed here to know what is valid for a scaled
4298 /// immediate.
4299 InstructionSelector::ComplexRendererFns
4300 AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root,
4301                                                    unsigned Size) const {
4302   MachineRegisterInfo &MRI =
4303       Root.getParent()->getParent()->getParent()->getRegInfo();
4304
4305   if (!Root.isReg())
4306     return None;
4307
4308   if (!isBaseWithConstantOffset(Root, MRI))
4309     return None;
4310
4311   MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
4312   if (!RootDef)
4313     return None;
4314
4315   MachineOperand &OffImm = RootDef->getOperand(2);
4316   if (!OffImm.isReg())
4317     return None;
4318   MachineInstr *RHS = MRI.getVRegDef(OffImm.getReg());
4319   if (!RHS || RHS->getOpcode() != TargetOpcode::G_CONSTANT)
4320     return None;
4321   int64_t RHSC;
4322   MachineOperand &RHSOp1 = RHS->getOperand(1);
4323   if (!RHSOp1.isCImm() || RHSOp1.getCImm()->getBitWidth() > 64)
4324     return None;
4325   RHSC = RHSOp1.getCImm()->getSExtValue();
4326
4327   // If the offset is valid as a scaled immediate, don't match here.
4328   if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Log2_32(Size)))
4329     return None;
4330   if (RHSC >= -256 && RHSC < 256) {
4331     MachineOperand &Base = RootDef->getOperand(1);
4332     return {{
4333         [=](MachineInstrBuilder &MIB) { MIB.add(Base); },
4334         [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC); },
4335     }};
4336   }
4337   return None;
4338 }
4339
4340 /// Select a "register plus scaled unsigned 12-bit immediate" address.  The
4341 /// "Size" argument is the size in bytes of the memory reference, which
4342 /// determines the scale.
4343 InstructionSelector::ComplexRendererFns
4344 AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root,
4345                                                   unsigned Size) const {
4346   MachineRegisterInfo &MRI =
4347       Root.getParent()->getParent()->getParent()->getRegInfo();
4348
4349   if (!Root.isReg())
4350     return None;
4351
4352   MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
4353   if (!RootDef)
4354     return None;
4355
4356   if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
4357     return {{
4358         [=](MachineInstrBuilder &MIB) { MIB.add(RootDef->getOperand(1)); },
4359         [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
4360     }};
4361   }
4362
4363   if (isBaseWithConstantOffset(Root, MRI)) {
4364     MachineOperand &LHS = RootDef->getOperand(1);
4365     MachineOperand &RHS = RootDef->getOperand(2);
4366     MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg());
4367     MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg());
4368     if (LHSDef && RHSDef) {
4369       int64_t RHSC = (int64_t)RHSDef->getOperand(1).getCImm()->getZExtValue();
4370       unsigned Scale = Log2_32(Size);
4371       if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
4372         if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
4373           return {{
4374               [=](MachineInstrBuilder &MIB) { MIB.add(LHSDef->getOperand(1)); },
4375               [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
4376           }};
4377
4378         return {{
4379             [=](MachineInstrBuilder &MIB) { MIB.add(LHS); },
4380             [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
4381         }};
4382       }
4383     }
4384   }
4385
4386   // Before falling back to our general case, check if the unscaled
4387   // instructions can handle this. If so, that's preferable.
4388   if (selectAddrModeUnscaled(Root, Size).hasValue())
4389     return None;
4390
4391   return {{
4392       [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
4393       [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
4394   }};
4395 }
4396
4397 void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB,
4398                                                 const MachineInstr &MI) const {
4399   const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
4400   assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && "Expected G_CONSTANT");
4401   Optional<int64_t> CstVal = getConstantVRegVal(MI.getOperand(0).getReg(), MRI);
4402   assert(CstVal && "Expected constant value");
4403   MIB.addImm(CstVal.getValue());
4404 }
4405
4406 namespace llvm {
4407 InstructionSelector *
4408 createAArch64InstructionSelector(const AArch64TargetMachine &TM,
4409                                  AArch64Subtarget &Subtarget,
4410                                  AArch64RegisterBankInfo &RBI) {
4411   return new AArch64InstructionSelector(TM, Subtarget, RBI);
4412 }
4413 }