lib/Target/AArch64/AArch64InstructionSelector.cpp

   1 //===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 /// \file
   9 /// This file implements the targeting of the InstructionSelector class for
  10 /// AArch64.
  11 /// \todo This should be generated by TableGen.
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "AArch64InstrInfo.h"
  15 #include "AArch64MachineFunctionInfo.h"
  16 #include "AArch64RegisterBankInfo.h"
  17 #include "AArch64RegisterInfo.h"
  18 #include "AArch64Subtarget.h"
  19 #include "AArch64TargetMachine.h"
  20 #include "MCTargetDesc/AArch64AddressingModes.h"
  21 #include "llvm/ADT/Optional.h"
  22 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
  23 #include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
  24 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
  25 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
  26 #include "llvm/CodeGen/GlobalISel/Utils.h"
  27 #include "llvm/CodeGen/MachineBasicBlock.h"
  28 #include "llvm/CodeGen/MachineConstantPool.h"
  29 #include "llvm/CodeGen/MachineFunction.h"
  30 #include "llvm/CodeGen/MachineInstr.h"
  31 #include "llvm/CodeGen/MachineInstrBuilder.h"
  32 #include "llvm/CodeGen/MachineOperand.h"
  33 #include "llvm/CodeGen/MachineRegisterInfo.h"
  34 #include "llvm/IR/Type.h"
  35 #include "llvm/Support/Debug.h"
  36 #include "llvm/Support/raw_ostream.h"
  37
  38 #define DEBUG_TYPE "aarch64-isel"
  39
  40 using namespace llvm;
  41
  42 namespace {
  43
  44 #define GET_GLOBALISEL_PREDICATE_BITSET
  45 #include "AArch64GenGlobalISel.inc"
  46 #undef GET_GLOBALISEL_PREDICATE_BITSET
  47
  48 class AArch64InstructionSelector : public InstructionSelector {
  49 public:
  50   AArch64InstructionSelector(const AArch64TargetMachine &TM,
  51                              const AArch64Subtarget &STI,
  52                              const AArch64RegisterBankInfo &RBI);
  53
  54   bool select(MachineInstr &I) override;
  55   static const char *getName() { return DEBUG_TYPE; }
  56
  57   void setupMF(MachineFunction &MF, GISelKnownBits &KB,
  58                CodeGenCoverage &CoverageInfo) override {
  59     InstructionSelector::setupMF(MF, KB, CoverageInfo);
  60
  61     // hasFnAttribute() is expensive to call on every BRCOND selection, so
  62     // cache it here for each run of the selector.
  63     ProduceNonFlagSettingCondBr =
  64         !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
  65   }
  66
  67 private:
  68   /// tblgen-erated 'select' implementation, used as the initial selector for
  69   /// the patterns that don't require complex C++.
  70   bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
  71
  72   // A lowering phase that runs before any selection attempts.
  73
  74   void preISelLower(MachineInstr &I) const;
  75
  76   // An early selection function that runs before the selectImpl() call.
  77   bool earlySelect(MachineInstr &I) const;
  78
  79   bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
  80
  81   /// Eliminate same-sized cross-bank copies into stores before selectImpl().
  82   void contractCrossBankCopyIntoStore(MachineInstr &I,
  83                                       MachineRegisterInfo &MRI) const;
  84
  85   bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
  86                           MachineRegisterInfo &MRI) const;
  87   bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
  88                            MachineRegisterInfo &MRI) const;
  89
  90   bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
  91                            MachineRegisterInfo &MRI) const;
  92
  93   bool selectVectorASHR(MachineInstr &I, MachineRegisterInfo &MRI) const;
  94   bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
  95
  96   // Helper to generate an equivalent of scalar_to_vector into a new register,
  97   // returned via 'Dst'.
  98   MachineInstr *emitScalarToVector(unsigned EltSize,
  99                                    const TargetRegisterClass *DstRC,
 100                                    Register Scalar,
 101                                    MachineIRBuilder &MIRBuilder) const;
 102
 103   /// Emit a lane insert into \p DstReg, or a new vector register if None is
 104   /// provided.
 105   ///
 106   /// The lane inserted into is defined by \p LaneIdx. The vector source
 107   /// register is given by \p SrcReg. The register containing the element is
 108   /// given by \p EltReg.
 109   MachineInstr *emitLaneInsert(Optional<Register> DstReg, Register SrcReg,
 110                                Register EltReg, unsigned LaneIdx,
 111                                const RegisterBank &RB,
 112                                MachineIRBuilder &MIRBuilder) const;
 113   bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
 114   bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
 115   bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const;
 116   bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const;
 117
 118   bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
 119   bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
 120   bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI) const;
 121   bool selectSplitVectorUnmerge(MachineInstr &I,
 122                                 MachineRegisterInfo &MRI) const;
 123   bool selectIntrinsicWithSideEffects(MachineInstr &I,
 124                                       MachineRegisterInfo &MRI) const;
 125   bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI) const;
 126   bool selectVectorICmp(MachineInstr &I, MachineRegisterInfo &MRI) const;
 127   bool selectIntrinsicTrunc(MachineInstr &I, MachineRegisterInfo &MRI) const;
 128   bool selectIntrinsicRound(MachineInstr &I, MachineRegisterInfo &MRI) const;
 129   bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI) const;
 130   bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI) const;
 131   bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI) const;
 132
 133   unsigned emitConstantPoolEntry(Constant *CPVal, MachineFunction &MF) const;
 134   MachineInstr *emitLoadFromConstantPool(Constant *CPVal,
 135                                          MachineIRBuilder &MIRBuilder) const;
 136
 137   // Emit a vector concat operation.
 138   MachineInstr *emitVectorConcat(Optional<Register> Dst, Register Op1,
 139                                  Register Op2,
 140                                  MachineIRBuilder &MIRBuilder) const;
 141   MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
 142                                    MachineOperand &Predicate,
 143                                    MachineIRBuilder &MIRBuilder) const;
 144   MachineInstr *emitADD(Register DefReg, MachineOperand &LHS, MachineOperand &RHS,
 145                         MachineIRBuilder &MIRBuilder) const;
 146   MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS,
 147                         MachineIRBuilder &MIRBuilder) const;
 148   MachineInstr *emitTST(const Register &LHS, const Register &RHS,
 149                         MachineIRBuilder &MIRBuilder) const;
 150   MachineInstr *emitExtractVectorElt(Optional<Register> DstReg,
 151                                      const RegisterBank &DstRB, LLT ScalarTy,
 152                                      Register VecReg, unsigned LaneIdx,
 153                                      MachineIRBuilder &MIRBuilder) const;
 154
 155   /// Helper function for selecting G_FCONSTANT. If the G_FCONSTANT can be
 156   /// materialized using a FMOV instruction, then update MI and return it.
 157   /// Otherwise, do nothing and return a nullptr.
 158   MachineInstr *emitFMovForFConstant(MachineInstr &MI,
 159                                      MachineRegisterInfo &MRI) const;
 160
 161   /// Emit a CSet for a compare.
 162   MachineInstr *emitCSetForICMP(Register DefReg, unsigned Pred,
 163                                 MachineIRBuilder &MIRBuilder) const;
 164
 165   // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td.
 166   // We use these manually instead of using the importer since it doesn't
 167   // support SDNodeXForm.
 168   ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const;
 169   ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const;
 170   ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const;
 171   ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const;
 172
 173   ComplexRendererFns select12BitValueWithLeftShift(uint64_t Immed) const;
 174   ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
 175   ComplexRendererFns selectNegArithImmed(MachineOperand &Root) const;
 176
 177   ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
 178                                             unsigned Size) const;
 179
 180   ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const {
 181     return selectAddrModeUnscaled(Root, 1);
 182   }
 183   ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const {
 184     return selectAddrModeUnscaled(Root, 2);
 185   }
 186   ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const {
 187     return selectAddrModeUnscaled(Root, 4);
 188   }
 189   ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const {
 190     return selectAddrModeUnscaled(Root, 8);
 191   }
 192   ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const {
 193     return selectAddrModeUnscaled(Root, 16);
 194   }
 195
 196   ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,
 197                                            unsigned Size) const;
 198   template <int Width>
 199   ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
 200     return selectAddrModeIndexed(Root, Width / 8);
 201   }
 202
 203   bool isWorthFoldingIntoExtendedReg(MachineInstr &MI,
 204                                      const MachineRegisterInfo &MRI) const;
 205   ComplexRendererFns
 206   selectAddrModeShiftedExtendXReg(MachineOperand &Root,
 207                                   unsigned SizeInBytes) const;
 208   ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const;
 209   ComplexRendererFns selectAddrModeXRO(MachineOperand &Root,
 210                                        unsigned SizeInBytes) const;
 211   template <int Width>
 212   ComplexRendererFns selectAddrModeXRO(MachineOperand &Root) const {
 213     return selectAddrModeXRO(Root, Width / 8);
 214   }
 215
 216   ComplexRendererFns selectShiftedRegister(MachineOperand &Root) const;
 217
 218   ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const {
 219     return selectShiftedRegister(Root);
 220   }
 221
 222   ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const {
 223     // TODO: selectShiftedRegister should allow for rotates on logical shifts.
 224     // For now, make them the same. The only difference between the two is that
 225     // logical shifts are allowed to fold in rotates. Otherwise, these are
 226     // functionally the same.
 227     return selectShiftedRegister(Root);
 228   }
 229
 230   /// Instructions that accept extend modifiers like UXTW expect the register
 231   /// being extended to be a GPR32. Narrow ExtReg to a 32-bit register using a
 232   /// subregister copy if necessary. Return either ExtReg, or the result of the
 233   /// new copy.
 234   Register narrowExtendRegIfNeeded(Register ExtReg,
 235                                              MachineIRBuilder &MIB) const;
 236   ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const;
 237
 238   void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI) const;
 239   void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I) const;
 240   void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I) const;
 241
 242   // Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
 243   void materializeLargeCMVal(MachineInstr &I, const Value *V,
 244                              unsigned OpFlags) const;
 245
 246   // Optimization methods.
 247   bool tryOptVectorShuffle(MachineInstr &I) const;
 248   bool tryOptVectorDup(MachineInstr &MI) const;
 249   bool tryOptSelect(MachineInstr &MI) const;
 250   MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
 251                                       MachineOperand &Predicate,
 252                                       MachineIRBuilder &MIRBuilder) const;
 253
 254   /// Return true if \p MI is a load or store of \p NumBytes bytes.
 255   bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const;
 256
 257   /// Returns true if \p MI is guaranteed to have the high-half of a 64-bit
 258   /// register zeroed out. In other words, the result of MI has been explicitly
 259   /// zero extended.
 260   bool isDef32(const MachineInstr &MI) const;
 261
 262   const AArch64TargetMachine &TM;
 263   const AArch64Subtarget &STI;
 264   const AArch64InstrInfo &TII;
 265   const AArch64RegisterInfo &TRI;
 266   const AArch64RegisterBankInfo &RBI;
 267
 268   bool ProduceNonFlagSettingCondBr = false;
 269
 270 #define GET_GLOBALISEL_PREDICATES_DECL
 271 #include "AArch64GenGlobalISel.inc"
 272 #undef GET_GLOBALISEL_PREDICATES_DECL
 273
 274 // We declare the temporaries used by selectImpl() in the class to minimize the
 275 // cost of constructing placeholder values.
 276 #define GET_GLOBALISEL_TEMPORARIES_DECL
 277 #include "AArch64GenGlobalISel.inc"
 278 #undef GET_GLOBALISEL_TEMPORARIES_DECL
 279 };
 280
 281 } // end anonymous namespace
 282
 283 #define GET_GLOBALISEL_IMPL
 284 #include "AArch64GenGlobalISel.inc"
 285 #undef GET_GLOBALISEL_IMPL
 286
 287 AArch64InstructionSelector::AArch64InstructionSelector(
 288     const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
 289     const AArch64RegisterBankInfo &RBI)
 290     : InstructionSelector(), TM(TM), STI(STI), TII(*STI.getInstrInfo()),
 291       TRI(*STI.getRegisterInfo()), RBI(RBI),
 292 #define GET_GLOBALISEL_PREDICATES_INIT
 293 #include "AArch64GenGlobalISel.inc"
 294 #undef GET_GLOBALISEL_PREDICATES_INIT
 295 #define GET_GLOBALISEL_TEMPORARIES_INIT
 296 #include "AArch64GenGlobalISel.inc"
 297 #undef GET_GLOBALISEL_TEMPORARIES_INIT
 298 {
 299 }
 300
 301 // FIXME: This should be target-independent, inferred from the types declared
 302 // for each class in the bank.
 303 static const TargetRegisterClass *
 304 getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
 305                          const RegisterBankInfo &RBI,
 306                          bool GetAllRegSet = false) {
 307   if (RB.getID() == AArch64::GPRRegBankID) {
 308     if (Ty.getSizeInBits() <= 32)
 309       return GetAllRegSet ? &AArch64::GPR32allRegClass
 310                           : &AArch64::GPR32RegClass;
 311     if (Ty.getSizeInBits() == 64)
 312       return GetAllRegSet ? &AArch64::GPR64allRegClass
 313                           : &AArch64::GPR64RegClass;
 314     return nullptr;
 315   }
 316
 317   if (RB.getID() == AArch64::FPRRegBankID) {
 318     if (Ty.getSizeInBits() <= 16)
 319       return &AArch64::FPR16RegClass;
 320     if (Ty.getSizeInBits() == 32)
 321       return &AArch64::FPR32RegClass;
 322     if (Ty.getSizeInBits() == 64)
 323       return &AArch64::FPR64RegClass;
 324     if (Ty.getSizeInBits() == 128)
 325       return &AArch64::FPR128RegClass;
 326     return nullptr;
 327   }
 328
 329   return nullptr;
 330 }
 331
 332 /// Given a register bank, and size in bits, return the smallest register class
 333 /// that can represent that combination.
 334 static const TargetRegisterClass *
 335 getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits,
 336                       bool GetAllRegSet = false) {
 337   unsigned RegBankID = RB.getID();
 338
 339   if (RegBankID == AArch64::GPRRegBankID) {
 340     if (SizeInBits <= 32)
 341       return GetAllRegSet ? &AArch64::GPR32allRegClass
 342                           : &AArch64::GPR32RegClass;
 343     if (SizeInBits == 64)
 344       return GetAllRegSet ? &AArch64::GPR64allRegClass
 345                           : &AArch64::GPR64RegClass;
 346   }
 347
 348   if (RegBankID == AArch64::FPRRegBankID) {
 349     switch (SizeInBits) {
 350     default:
 351       return nullptr;
 352     case 8:
 353       return &AArch64::FPR8RegClass;
 354     case 16:
 355       return &AArch64::FPR16RegClass;
 356     case 32:
 357       return &AArch64::FPR32RegClass;
 358     case 64:
 359       return &AArch64::FPR64RegClass;
 360     case 128:
 361       return &AArch64::FPR128RegClass;
 362     }
 363   }
 364
 365   return nullptr;
 366 }
 367
 368 /// Returns the correct subregister to use for a given register class.
 369 static bool getSubRegForClass(const TargetRegisterClass *RC,
 370                               const TargetRegisterInfo &TRI, unsigned &SubReg) {
 371   switch (TRI.getRegSizeInBits(*RC)) {
 372   case 8:
 373     SubReg = AArch64::bsub;
 374     break;
 375   case 16:
 376     SubReg = AArch64::hsub;
 377     break;
 378   case 32:
 379     if (RC != &AArch64::FPR32RegClass)
 380       SubReg = AArch64::sub_32;
 381     else
 382       SubReg = AArch64::ssub;
 383     break;
 384   case 64:
 385     SubReg = AArch64::dsub;
 386     break;
 387   default:
 388     LLVM_DEBUG(
 389         dbgs() << "Couldn't find appropriate subregister for register class.");
 390     return false;
 391   }
 392
 393   return true;
 394 }
 395
 396 /// Check whether \p I is a currently unsupported binary operation:
 397 /// - it has an unsized type
 398 /// - an operand is not a vreg
 399 /// - all operands are not in the same bank
 400 /// These are checks that should someday live in the verifier, but right now,
 401 /// these are mostly limitations of the aarch64 selector.
 402 static bool unsupportedBinOp(const MachineInstr &I,
 403                              const AArch64RegisterBankInfo &RBI,
 404                              const MachineRegisterInfo &MRI,
 405                              const AArch64RegisterInfo &TRI) {
 406   LLT Ty = MRI.getType(I.getOperand(0).getReg());
 407   if (!Ty.isValid()) {
 408     LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n");
 409     return true;
 410   }
 411
 412   const RegisterBank *PrevOpBank = nullptr;
 413   for (auto &MO : I.operands()) {
 414     // FIXME: Support non-register operands.
 415     if (!MO.isReg()) {
 416       LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n");
 417       return true;
 418     }
 419
 420     // FIXME: Can generic operations have physical registers operands? If
 421     // so, this will need to be taught about that, and we'll need to get the
 422     // bank out of the minimal class for the register.
 423     // Either way, this needs to be documented (and possibly verified).
 424     if (!Register::isVirtualRegister(MO.getReg())) {
 425       LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n");
 426       return true;
 427     }
 428
 429     const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI);
 430     if (!OpBank) {
 431       LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n");
 432       return true;
 433     }
 434
 435     if (PrevOpBank && OpBank != PrevOpBank) {
 436       LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n");
 437       return true;
 438     }
 439     PrevOpBank = OpBank;
 440   }
 441   return false;
 442 }
 443
 444 /// Select the AArch64 opcode for the basic binary operation \p GenericOpc
 445 /// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID
 446 /// and of size \p OpSize.
 447 /// \returns \p GenericOpc if the combination is unsupported.
 448 static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
 449                                unsigned OpSize) {
 450   switch (RegBankID) {
 451   case AArch64::GPRRegBankID:
 452     if (OpSize == 32) {
 453       switch (GenericOpc) {
 454       case TargetOpcode::G_SHL:
 455         return AArch64::LSLVWr;
 456       case TargetOpcode::G_LSHR:
 457         return AArch64::LSRVWr;
 458       case TargetOpcode::G_ASHR:
 459         return AArch64::ASRVWr;
 460       default:
 461         return GenericOpc;
 462       }
 463     } else if (OpSize == 64) {
 464       switch (GenericOpc) {
 465       case TargetOpcode::G_GEP:
 466         return AArch64::ADDXrr;
 467       case TargetOpcode::G_SHL:
 468         return AArch64::LSLVXr;
 469       case TargetOpcode::G_LSHR:
 470         return AArch64::LSRVXr;
 471       case TargetOpcode::G_ASHR:
 472         return AArch64::ASRVXr;
 473       default:
 474         return GenericOpc;
 475       }
 476     }
 477     break;
 478   case AArch64::FPRRegBankID:
 479     switch (OpSize) {
 480     case 32:
 481       switch (GenericOpc) {
 482       case TargetOpcode::G_FADD:
 483         return AArch64::FADDSrr;
 484       case TargetOpcode::G_FSUB:
 485         return AArch64::FSUBSrr;
 486       case TargetOpcode::G_FMUL:
 487         return AArch64::FMULSrr;
 488       case TargetOpcode::G_FDIV:
 489         return AArch64::FDIVSrr;
 490       default:
 491         return GenericOpc;
 492       }
 493     case 64:
 494       switch (GenericOpc) {
 495       case TargetOpcode::G_FADD:
 496         return AArch64::FADDDrr;
 497       case TargetOpcode::G_FSUB:
 498         return AArch64::FSUBDrr;
 499       case TargetOpcode::G_FMUL:
 500         return AArch64::FMULDrr;
 501       case TargetOpcode::G_FDIV:
 502         return AArch64::FDIVDrr;
 503       case TargetOpcode::G_OR:
 504         return AArch64::ORRv8i8;
 505       default:
 506         return GenericOpc;
 507       }
 508     }
 509     break;
 510   }
 511   return GenericOpc;
 512 }
 513
 514 /// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc,
 515 /// appropriate for the (value) register bank \p RegBankID and of memory access
 516 /// size \p OpSize.  This returns the variant with the base+unsigned-immediate
 517 /// addressing mode (e.g., LDRXui).
 518 /// \returns \p GenericOpc if the combination is unsupported.
 519 static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
 520                                     unsigned OpSize) {
 521   const bool isStore = GenericOpc == TargetOpcode::G_STORE;
 522   switch (RegBankID) {
 523   case AArch64::GPRRegBankID:
 524     switch (OpSize) {
 525     case 8:
 526       return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
 527     case 16:
 528       return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
 529     case 32:
 530       return isStore ? AArch64::STRWui : AArch64::LDRWui;
 531     case 64:
 532       return isStore ? AArch64::STRXui : AArch64::LDRXui;
 533     }
 534     break;
 535   case AArch64::FPRRegBankID:
 536     switch (OpSize) {
 537     case 8:
 538       return isStore ? AArch64::STRBui : AArch64::LDRBui;
 539     case 16:
 540       return isStore ? AArch64::STRHui : AArch64::LDRHui;
 541     case 32:
 542       return isStore ? AArch64::STRSui : AArch64::LDRSui;
 543     case 64:
 544       return isStore ? AArch64::STRDui : AArch64::LDRDui;
 545     }
 546     break;
 547   }
 548   return GenericOpc;
 549 }
 550
 551 #ifndef NDEBUG
 552 /// Helper function that verifies that we have a valid copy at the end of
 553 /// selectCopy. Verifies that the source and dest have the expected sizes and
 554 /// then returns true.
 555 static bool isValidCopy(const MachineInstr &I, const RegisterBank &DstBank,
 556                         const MachineRegisterInfo &MRI,
 557                         const TargetRegisterInfo &TRI,
 558                         const RegisterBankInfo &RBI) {
 559   const Register DstReg = I.getOperand(0).getReg();
 560   const Register SrcReg = I.getOperand(1).getReg();
 561   const unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
 562   const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
 563
 564   // Make sure the size of the source and dest line up.
 565   assert(
 566       (DstSize == SrcSize ||
 567        // Copies are a mean to setup initial types, the number of
 568        // bits may not exactly match.
 569        (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
 570        // Copies are a mean to copy bits around, as long as we are
 571        // on the same register class, that's fine. Otherwise, that
 572        // means we need some SUBREG_TO_REG or AND & co.
 573        (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) &&
 574       "Copy with different width?!");
 575
 576   // Check the size of the destination.
 577   assert((DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID) &&
 578          "GPRs cannot get more than 64-bit width values");
 579
 580   return true;
 581 }
 582 #endif
 583
 584 /// Helper function for selectCopy. Inserts a subregister copy from
 585 /// \p *From to \p *To, linking it up to \p I.
 586 ///
 587 /// e.g, given I = "Dst = COPY SrcReg", we'll transform that into
 588 ///
 589 /// CopyReg (From class) = COPY SrcReg
 590 /// SubRegCopy (To class) = COPY CopyReg:SubReg
 591 /// Dst = COPY SubRegCopy
 592 static bool selectSubregisterCopy(MachineInstr &I, MachineRegisterInfo &MRI,
 593                                   const RegisterBankInfo &RBI, Register SrcReg,
 594                                   const TargetRegisterClass *From,
 595                                   const TargetRegisterClass *To,
 596                                   unsigned SubReg) {
 597   MachineIRBuilder MIB(I);
 598   auto Copy = MIB.buildCopy({From}, {SrcReg});
 599   auto SubRegCopy = MIB.buildInstr(TargetOpcode::COPY, {To}, {})
 600                         .addReg(Copy.getReg(0), 0, SubReg);
 601   MachineOperand &RegOp = I.getOperand(1);
 602   RegOp.setReg(SubRegCopy.getReg(0));
 603
 604   // It's possible that the destination register won't be constrained. Make
 605   // sure that happens.
 606   if (!Register::isPhysicalRegister(I.getOperand(0).getReg()))
 607     RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI);
 608
 609   return true;
 610 }
 611
 612 /// Helper function to get the source and destination register classes for a
 613 /// copy. Returns a std::pair containing the source register class for the
 614 /// copy, and the destination register class for the copy. If a register class
 615 /// cannot be determined, then it will be nullptr.
 616 static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
 617 getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII,
 618                      MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
 619                      const RegisterBankInfo &RBI) {
 620   Register DstReg = I.getOperand(0).getReg();
 621   Register SrcReg = I.getOperand(1).getReg();
 622   const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
 623   const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
 624   unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
 625   unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
 626
 627   // Special casing for cross-bank copies of s1s. We can technically represent
 628   // a 1-bit value with any size of register. The minimum size for a GPR is 32
 629   // bits. So, we need to put the FPR on 32 bits as well.
 630   //
 631   // FIXME: I'm not sure if this case holds true outside of copies. If it does,
 632   // then we can pull it into the helpers that get the appropriate class for a
 633   // register bank. Or make a new helper that carries along some constraint
 634   // information.
 635   if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1))
 636     SrcSize = DstSize = 32;
 637
 638   return {getMinClassForRegBank(SrcRegBank, SrcSize, true),
 639           getMinClassForRegBank(DstRegBank, DstSize, true)};
 640 }
 641
 642 static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
 643                        MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
 644                        const RegisterBankInfo &RBI) {
 645
 646   Register DstReg = I.getOperand(0).getReg();
 647   Register SrcReg = I.getOperand(1).getReg();
 648   const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
 649   const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
 650
 651   // Find the correct register classes for the source and destination registers.
 652   const TargetRegisterClass *SrcRC;
 653   const TargetRegisterClass *DstRC;
 654   std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI);
 655
 656   if (!DstRC) {
 657     LLVM_DEBUG(dbgs() << "Unexpected dest size "
 658                       << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n');
 659     return false;
 660   }
 661
 662   // A couple helpers below, for making sure that the copy we produce is valid.
 663
 664   // Set to true if we insert a SUBREG_TO_REG. If we do this, then we don't want
 665   // to verify that the src and dst are the same size, since that's handled by
 666   // the SUBREG_TO_REG.
 667   bool KnownValid = false;
 668
 669   // Returns true, or asserts if something we don't expect happens. Instead of
 670   // returning true, we return isValidCopy() to ensure that we verify the
 671   // result.
 672   auto CheckCopy = [&]() {
 673     // If we have a bitcast or something, we can't have physical registers.
 674     assert((I.isCopy() ||
 675             (!Register::isPhysicalRegister(I.getOperand(0).getReg()) &&
 676              !Register::isPhysicalRegister(I.getOperand(1).getReg()))) &&
 677            "No phys reg on generic operator!");
 678     assert(KnownValid || isValidCopy(I, DstRegBank, MRI, TRI, RBI));
 679     (void)KnownValid;
 680     return true;
 681   };
 682
 683   // Is this a copy? If so, then we may need to insert a subregister copy, or
 684   // a SUBREG_TO_REG.
 685   if (I.isCopy()) {
 686     // Yes. Check if there's anything to fix up.
 687     if (!SrcRC) {
 688       LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n");
 689       return false;
 690     }
 691
 692     unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC);
 693     unsigned DstSize = TRI.getRegSizeInBits(*DstRC);
 694
 695     // If we're doing a cross-bank copy on different-sized registers, we need
 696     // to do a bit more work.
 697     if (SrcSize > DstSize) {
 698       // We're doing a cross-bank copy into a smaller register. We need a
 699       // subregister copy. First, get a register class that's on the same bank
 700       // as the destination, but the same size as the source.
 701       const TargetRegisterClass *SubregRC =
 702           getMinClassForRegBank(DstRegBank, SrcSize, true);
 703       assert(SubregRC && "Didn't get a register class for subreg?");
 704
 705       // Get the appropriate subregister for the destination.
 706       unsigned SubReg = 0;
 707       if (!getSubRegForClass(DstRC, TRI, SubReg)) {
 708         LLVM_DEBUG(dbgs() << "Couldn't determine subregister for copy.\n");
 709         return false;
 710       }
 711
 712       // Now, insert a subregister copy using the new register class.
 713       selectSubregisterCopy(I, MRI, RBI, SrcReg, SubregRC, DstRC, SubReg);
 714       return CheckCopy();
 715     }
 716
 717     // Is this a cross-bank copy?
 718     if (DstRegBank.getID() != SrcRegBank.getID()) {
 719       if (DstRegBank.getID() == AArch64::GPRRegBankID && DstSize == 32 &&
 720           SrcSize == 16) {
 721         // Special case for FPR16 to GPR32.
 722         // FIXME: This can probably be generalized like the above case.
 723         Register PromoteReg =
 724             MRI.createVirtualRegister(&AArch64::FPR32RegClass);
 725         BuildMI(*I.getParent(), I, I.getDebugLoc(),
 726                 TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
 727             .addImm(0)
 728             .addUse(SrcReg)
 729             .addImm(AArch64::hsub);
 730         MachineOperand &RegOp = I.getOperand(1);
 731         RegOp.setReg(PromoteReg);
 732
 733         // Promise that the copy is implicitly validated by the SUBREG_TO_REG.
 734         KnownValid = true;
 735       }
 736     }
 737
 738     // If the destination is a physical register, then there's nothing to
 739     // change, so we're done.
 740     if (Register::isPhysicalRegister(DstReg))
 741       return CheckCopy();
 742   }
 743
 744   // No need to constrain SrcReg. It will get constrained when we hit another
 745   // of its use or its defs. Copies do not have constraints.
 746   if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
 747     LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
 748                       << " operand\n");
 749     return false;
 750   }
 751   I.setDesc(TII.get(AArch64::COPY));
 752   return CheckCopy();
 753 }
 754
 755 static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
 756   if (!DstTy.isScalar() || !SrcTy.isScalar())
 757     return GenericOpc;
 758
 759   const unsigned DstSize = DstTy.getSizeInBits();
 760   const unsigned SrcSize = SrcTy.getSizeInBits();
 761
 762   switch (DstSize) {
 763   case 32:
 764     switch (SrcSize) {
 765     case 32:
 766       switch (GenericOpc) {
 767       case TargetOpcode::G_SITOFP:
 768         return AArch64::SCVTFUWSri;
 769       case TargetOpcode::G_UITOFP:
 770         return AArch64::UCVTFUWSri;
 771       case TargetOpcode::G_FPTOSI:
 772         return AArch64::FCVTZSUWSr;
 773       case TargetOpcode::G_FPTOUI:
 774         return AArch64::FCVTZUUWSr;
 775       default:
 776         return GenericOpc;
 777       }
 778     case 64:
 779       switch (GenericOpc) {
 780       case TargetOpcode::G_SITOFP:
 781         return AArch64::SCVTFUXSri;
 782       case TargetOpcode::G_UITOFP:
 783         return AArch64::UCVTFUXSri;
 784       case TargetOpcode::G_FPTOSI:
 785         return AArch64::FCVTZSUWDr;
 786       case TargetOpcode::G_FPTOUI:
 787         return AArch64::FCVTZUUWDr;
 788       default:
 789         return GenericOpc;
 790       }
 791     default:
 792       return GenericOpc;
 793     }
 794   case 64:
 795     switch (SrcSize) {
 796     case 32:
 797       switch (GenericOpc) {
 798       case TargetOpcode::G_SITOFP:
 799         return AArch64::SCVTFUWDri;
 800       case TargetOpcode::G_UITOFP:
 801         return AArch64::UCVTFUWDri;
 802       case TargetOpcode::G_FPTOSI:
 803         return AArch64::FCVTZSUXSr;
 804       case TargetOpcode::G_FPTOUI:
 805         return AArch64::FCVTZUUXSr;
 806       default:
 807         return GenericOpc;
 808       }
 809     case 64:
 810       switch (GenericOpc) {
 811       case TargetOpcode::G_SITOFP:
 812         return AArch64::SCVTFUXDri;
 813       case TargetOpcode::G_UITOFP:
 814         return AArch64::UCVTFUXDri;
 815       case TargetOpcode::G_FPTOSI:
 816         return AArch64::FCVTZSUXDr;
 817       case TargetOpcode::G_FPTOUI:
 818         return AArch64::FCVTZUUXDr;
 819       default:
 820         return GenericOpc;
 821       }
 822     default:
 823       return GenericOpc;
 824     }
 825   default:
 826     return GenericOpc;
 827   };
 828   return GenericOpc;
 829 }
 830
 831 static unsigned selectSelectOpc(MachineInstr &I, MachineRegisterInfo &MRI,
 832                                 const RegisterBankInfo &RBI) {
 833   const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
 834   bool IsFP = (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
 835                AArch64::GPRRegBankID);
 836   LLT Ty = MRI.getType(I.getOperand(0).getReg());
 837   if (Ty == LLT::scalar(32))
 838     return IsFP ? AArch64::FCSELSrrr : AArch64::CSELWr;
 839   else if (Ty == LLT::scalar(64) || Ty == LLT::pointer(0, 64))
 840     return IsFP ? AArch64::FCSELDrrr : AArch64::CSELXr;
 841   return 0;
 842 }
 843
 844 /// Helper function to select the opcode for a G_FCMP.
 845 static unsigned selectFCMPOpc(MachineInstr &I, MachineRegisterInfo &MRI) {
 846   // If this is a compare against +0.0, then we don't have to explicitly
 847   // materialize a constant.
 848   const ConstantFP *FPImm = getConstantFPVRegVal(I.getOperand(3).getReg(), MRI);
 849   bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
 850   unsigned OpSize = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
 851   if (OpSize != 32 && OpSize != 64)
 852     return 0;
 853   unsigned CmpOpcTbl[2][2] = {{AArch64::FCMPSrr, AArch64::FCMPDrr},
 854                               {AArch64::FCMPSri, AArch64::FCMPDri}};
 855   return CmpOpcTbl[ShouldUseImm][OpSize == 64];
 856 }
 857
 858 /// Returns true if \p P is an unsigned integer comparison predicate.
 859 static bool isUnsignedICMPPred(const CmpInst::Predicate P) {
 860   switch (P) {
 861   default:
 862     return false;
 863   case CmpInst::ICMP_UGT:
 864   case CmpInst::ICMP_UGE:
 865   case CmpInst::ICMP_ULT:
 866   case CmpInst::ICMP_ULE:
 867     return true;
 868   }
 869 }
 870
 871 static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) {
 872   switch (P) {
 873   default:
 874     llvm_unreachable("Unknown condition code!");
 875   case CmpInst::ICMP_NE:
 876     return AArch64CC::NE;
 877   case CmpInst::ICMP_EQ:
 878     return AArch64CC::EQ;
 879   case CmpInst::ICMP_SGT:
 880     return AArch64CC::GT;
 881   case CmpInst::ICMP_SGE:
 882     return AArch64CC::GE;
 883   case CmpInst::ICMP_SLT:
 884     return AArch64CC::LT;
 885   case CmpInst::ICMP_SLE:
 886     return AArch64CC::LE;
 887   case CmpInst::ICMP_UGT:
 888     return AArch64CC::HI;
 889   case CmpInst::ICMP_UGE:
 890     return AArch64CC::HS;
 891   case CmpInst::ICMP_ULT:
 892     return AArch64CC::LO;
 893   case CmpInst::ICMP_ULE:
 894     return AArch64CC::LS;
 895   }
 896 }
 897
 898 static void changeFCMPPredToAArch64CC(CmpInst::Predicate P,
 899                                       AArch64CC::CondCode &CondCode,
 900                                       AArch64CC::CondCode &CondCode2) {
 901   CondCode2 = AArch64CC::AL;
 902   switch (P) {
 903   default:
 904     llvm_unreachable("Unknown FP condition!");
 905   case CmpInst::FCMP_OEQ:
 906     CondCode = AArch64CC::EQ;
 907     break;
 908   case CmpInst::FCMP_OGT:
 909     CondCode = AArch64CC::GT;
 910     break;
 911   case CmpInst::FCMP_OGE:
 912     CondCode = AArch64CC::GE;
 913     break;
 914   case CmpInst::FCMP_OLT:
 915     CondCode = AArch64CC::MI;
 916     break;
 917   case CmpInst::FCMP_OLE:
 918     CondCode = AArch64CC::LS;
 919     break;
 920   case CmpInst::FCMP_ONE:
 921     CondCode = AArch64CC::MI;
 922     CondCode2 = AArch64CC::GT;
 923     break;
 924   case CmpInst::FCMP_ORD:
 925     CondCode = AArch64CC::VC;
 926     break;
 927   case CmpInst::FCMP_UNO:
 928     CondCode = AArch64CC::VS;
 929     break;
 930   case CmpInst::FCMP_UEQ:
 931     CondCode = AArch64CC::EQ;
 932     CondCode2 = AArch64CC::VS;
 933     break;
 934   case CmpInst::FCMP_UGT:
 935     CondCode = AArch64CC::HI;
 936     break;
 937   case CmpInst::FCMP_UGE:
 938     CondCode = AArch64CC::PL;
 939     break;
 940   case CmpInst::FCMP_ULT:
 941     CondCode = AArch64CC::LT;
 942     break;
 943   case CmpInst::FCMP_ULE:
 944     CondCode = AArch64CC::LE;
 945     break;
 946   case CmpInst::FCMP_UNE:
 947     CondCode = AArch64CC::NE;
 948     break;
 949   }
 950 }
 951
 952 bool AArch64InstructionSelector::selectCompareBranch(
 953     MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
 954
 955   const Register CondReg = I.getOperand(0).getReg();
 956   MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
 957   MachineInstr *CCMI = MRI.getVRegDef(CondReg);
 958   if (CCMI->getOpcode() == TargetOpcode::G_TRUNC)
 959     CCMI = MRI.getVRegDef(CCMI->getOperand(1).getReg());
 960   if (CCMI->getOpcode() != TargetOpcode::G_ICMP)
 961     return false;
 962
 963   Register LHS = CCMI->getOperand(2).getReg();
 964   Register RHS = CCMI->getOperand(3).getReg();
 965   auto VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
 966   if (!VRegAndVal)
 967     std::swap(RHS, LHS);
 968
 969   VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
 970   if (!VRegAndVal || VRegAndVal->Value != 0) {
 971     MachineIRBuilder MIB(I);
 972     // If we can't select a CBZ then emit a cmp + Bcc.
 973     if (!emitIntegerCompare(CCMI->getOperand(2), CCMI->getOperand(3),
 974                             CCMI->getOperand(1), MIB))
 975       return false;
 976     const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(
 977         (CmpInst::Predicate)CCMI->getOperand(1).getPredicate());
 978     MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
 979     I.eraseFromParent();
 980     return true;
 981   }
 982
 983   const RegisterBank &RB = *RBI.getRegBank(LHS, MRI, TRI);
 984   if (RB.getID() != AArch64::GPRRegBankID)
 985     return false;
 986
 987   const auto Pred = (CmpInst::Predicate)CCMI->getOperand(1).getPredicate();
 988   if (Pred != CmpInst::ICMP_NE && Pred != CmpInst::ICMP_EQ)
 989     return false;
 990
 991   const unsigned CmpWidth = MRI.getType(LHS).getSizeInBits();
 992   unsigned CBOpc = 0;
 993   if (CmpWidth <= 32)
 994     CBOpc = (Pred == CmpInst::ICMP_EQ ? AArch64::CBZW : AArch64::CBNZW);
 995   else if (CmpWidth == 64)
 996     CBOpc = (Pred == CmpInst::ICMP_EQ ? AArch64::CBZX : AArch64::CBNZX);
 997   else
 998     return false;
 999
1000   BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(CBOpc))
1001       .addUse(LHS)
1002       .addMBB(DestMBB)
1003       .constrainAllUses(TII, TRI, RBI);
1004
1005   I.eraseFromParent();
1006   return true;
1007 }
1008
1009 bool AArch64InstructionSelector::selectVectorSHL(
1010     MachineInstr &I, MachineRegisterInfo &MRI) const {
1011   assert(I.getOpcode() == TargetOpcode::G_SHL);
1012   Register DstReg = I.getOperand(0).getReg();
1013   const LLT Ty = MRI.getType(DstReg);
1014   Register Src1Reg = I.getOperand(1).getReg();
1015   Register Src2Reg = I.getOperand(2).getReg();
1016
1017   if (!Ty.isVector())
1018     return false;
1019
1020   unsigned Opc = 0;
1021   if (Ty == LLT::vector(4, 32)) {
1022     Opc = AArch64::USHLv4i32;
1023   } else if (Ty == LLT::vector(2, 32)) {
1024     Opc = AArch64::USHLv2i32;
1025   } else {
1026     LLVM_DEBUG(dbgs() << "Unhandled G_SHL type");
1027     return false;
1028   }
1029
1030   MachineIRBuilder MIB(I);
1031   auto UShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Src2Reg});
1032   constrainSelectedInstRegOperands(*UShl, TII, TRI, RBI);
1033   I.eraseFromParent();
1034   return true;
1035 }
1036
1037 bool AArch64InstructionSelector::selectVectorASHR(
1038     MachineInstr &I, MachineRegisterInfo &MRI) const {
1039   assert(I.getOpcode() == TargetOpcode::G_ASHR);
1040   Register DstReg = I.getOperand(0).getReg();
1041   const LLT Ty = MRI.getType(DstReg);
1042   Register Src1Reg = I.getOperand(1).getReg();
1043   Register Src2Reg = I.getOperand(2).getReg();
1044
1045   if (!Ty.isVector())
1046     return false;
1047
1048   // There is not a shift right register instruction, but the shift left
1049   // register instruction takes a signed value, where negative numbers specify a
1050   // right shift.
1051
1052   unsigned Opc = 0;
1053   unsigned NegOpc = 0;
1054   const TargetRegisterClass *RC = nullptr;
1055   if (Ty == LLT::vector(4, 32)) {
1056     Opc = AArch64::SSHLv4i32;
1057     NegOpc = AArch64::NEGv4i32;
1058     RC = &AArch64::FPR128RegClass;
1059   } else if (Ty == LLT::vector(2, 32)) {
1060     Opc = AArch64::SSHLv2i32;
1061     NegOpc = AArch64::NEGv2i32;
1062     RC = &AArch64::FPR64RegClass;
1063   } else {
1064     LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type");
1065     return false;
1066   }
1067
1068   MachineIRBuilder MIB(I);
1069   auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg});
1070   constrainSelectedInstRegOperands(*Neg, TII, TRI, RBI);
1071   auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
1072   constrainSelectedInstRegOperands(*SShl, TII, TRI, RBI);
1073   I.eraseFromParent();
1074   return true;
1075 }
1076
1077 bool AArch64InstructionSelector::selectVaStartAAPCS(
1078     MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1079   return false;
1080 }
1081
1082 bool AArch64InstructionSelector::selectVaStartDarwin(
1083     MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1084   AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
1085   Register ListReg = I.getOperand(0).getReg();
1086
1087   Register ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1088
1089   auto MIB =
1090       BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri))
1091           .addDef(ArgsAddrReg)
1092           .addFrameIndex(FuncInfo->getVarArgsStackIndex())
1093           .addImm(0)
1094           .addImm(0);
1095
1096   constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
1097
1098   MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui))
1099             .addUse(ArgsAddrReg)
1100             .addUse(ListReg)
1101             .addImm(0)
1102             .addMemOperand(*I.memoperands_begin());
1103
1104   constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
1105   I.eraseFromParent();
1106   return true;
1107 }
1108
1109 void AArch64InstructionSelector::materializeLargeCMVal(
1110     MachineInstr &I, const Value *V, unsigned OpFlags) const {
1111   MachineBasicBlock &MBB = *I.getParent();
1112   MachineFunction &MF = *MBB.getParent();
1113   MachineRegisterInfo &MRI = MF.getRegInfo();
1114   MachineIRBuilder MIB(I);
1115
1116   auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
1117   MovZ->addOperand(MF, I.getOperand(1));
1118   MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
1119                                      AArch64II::MO_NC);
1120   MovZ->addOperand(MF, MachineOperand::CreateImm(0));
1121   constrainSelectedInstRegOperands(*MovZ, TII, TRI, RBI);
1122
1123   auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset,
1124                        Register ForceDstReg) {
1125     Register DstReg = ForceDstReg
1126                           ? ForceDstReg
1127                           : MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1128     auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg);
1129     if (auto *GV = dyn_cast<GlobalValue>(V)) {
1130       MovI->addOperand(MF, MachineOperand::CreateGA(
1131                                GV, MovZ->getOperand(1).getOffset(), Flags));
1132     } else {
1133       MovI->addOperand(
1134           MF, MachineOperand::CreateBA(cast<BlockAddress>(V),
1135                                        MovZ->getOperand(1).getOffset(), Flags));
1136     }
1137     MovI->addOperand(MF, MachineOperand::CreateImm(Offset));
1138     constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI);
1139     return DstReg;
1140   };
1141   Register DstReg = BuildMovK(MovZ.getReg(0),
1142                               AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0);
1143   DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
1144   BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
1145   return;
1146 }
1147
1148 void AArch64InstructionSelector::preISelLower(MachineInstr &I) const {
1149   MachineBasicBlock &MBB = *I.getParent();
1150   MachineFunction &MF = *MBB.getParent();
1151   MachineRegisterInfo &MRI = MF.getRegInfo();
1152
1153   switch (I.getOpcode()) {
1154   case TargetOpcode::G_SHL:
1155   case TargetOpcode::G_ASHR:
1156   case TargetOpcode::G_LSHR: {
1157     // These shifts are legalized to have 64 bit shift amounts because we want
1158     // to take advantage of the existing imported selection patterns that assume
1159     // the immediates are s64s. However, if the shifted type is 32 bits and for
1160     // some reason we receive input GMIR that has an s64 shift amount that's not
1161     // a G_CONSTANT, insert a truncate so that we can still select the s32
1162     // register-register variant.
1163     Register SrcReg = I.getOperand(1).getReg();
1164     Register ShiftReg = I.getOperand(2).getReg();
1165     const LLT ShiftTy = MRI.getType(ShiftReg);
1166     const LLT SrcTy = MRI.getType(SrcReg);
1167     if (SrcTy.isVector())
1168       return;
1169     assert(!ShiftTy.isVector() && "unexpected vector shift ty");
1170     if (SrcTy.getSizeInBits() != 32 || ShiftTy.getSizeInBits() != 64)
1171       return;
1172     auto *AmtMI = MRI.getVRegDef(ShiftReg);
1173     assert(AmtMI && "could not find a vreg definition for shift amount");
1174     if (AmtMI->getOpcode() != TargetOpcode::G_CONSTANT) {
1175       // Insert a subregister copy to implement a 64->32 trunc
1176       MachineIRBuilder MIB(I);
1177       auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {})
1178                        .addReg(ShiftReg, 0, AArch64::sub_32);
1179       MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
1180       I.getOperand(2).setReg(Trunc.getReg(0));
1181     }
1182     return;
1183   }
1184   case TargetOpcode::G_STORE:
1185     contractCrossBankCopyIntoStore(I, MRI);
1186     return;
1187   default:
1188     return;
1189   }
1190 }
1191
1192 bool AArch64InstructionSelector::earlySelectSHL(
1193     MachineInstr &I, MachineRegisterInfo &MRI) const {
1194   // We try to match the immediate variant of LSL, which is actually an alias
1195   // for a special case of UBFM. Otherwise, we fall back to the imported
1196   // selector which will match the register variant.
1197   assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op");
1198   const auto &MO = I.getOperand(2);
1199   auto VRegAndVal = getConstantVRegVal(MO.getReg(), MRI);
1200   if (!VRegAndVal)
1201     return false;
1202
1203   const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1204   if (DstTy.isVector())
1205     return false;
1206   bool Is64Bit = DstTy.getSizeInBits() == 64;
1207   auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO);
1208   auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO);
1209   MachineIRBuilder MIB(I);
1210
1211   if (!Imm1Fn || !Imm2Fn)
1212     return false;
1213
1214   auto NewI =
1215       MIB.buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,
1216                      {I.getOperand(0).getReg()}, {I.getOperand(1).getReg()});
1217
1218   for (auto &RenderFn : *Imm1Fn)
1219     RenderFn(NewI);
1220   for (auto &RenderFn : *Imm2Fn)
1221     RenderFn(NewI);
1222
1223   I.eraseFromParent();
1224   return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
1225 }
1226
1227 void AArch64InstructionSelector::contractCrossBankCopyIntoStore(
1228     MachineInstr &I, MachineRegisterInfo &MRI) const {
1229   assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE");
1230   // If we're storing a scalar, it doesn't matter what register bank that
1231   // scalar is on. All that matters is the size.
1232   //
1233   // So, if we see something like this (with a 32-bit scalar as an example):
1234   //
1235   // %x:gpr(s32) = ... something ...
1236   // %y:fpr(s32) = COPY %x:gpr(s32)
1237   // G_STORE %y:fpr(s32)
1238   //
1239   // We can fix this up into something like this:
1240   //
1241   // G_STORE %x:gpr(s32)
1242   //
1243   // And then continue the selection process normally.
1244   MachineInstr *Def = getDefIgnoringCopies(I.getOperand(0).getReg(), MRI);
1245   if (!Def)
1246     return;
1247   Register DefDstReg = Def->getOperand(0).getReg();
1248   LLT DefDstTy = MRI.getType(DefDstReg);
1249   Register StoreSrcReg = I.getOperand(0).getReg();
1250   LLT StoreSrcTy = MRI.getType(StoreSrcReg);
1251
1252   // If we get something strange like a physical register, then we shouldn't
1253   // go any further.
1254   if (!DefDstTy.isValid())
1255     return;
1256
1257   // Are the source and dst types the same size?
1258   if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits())
1259     return;
1260
1261   if (RBI.getRegBank(StoreSrcReg, MRI, TRI) ==
1262       RBI.getRegBank(DefDstReg, MRI, TRI))
1263     return;
1264
1265   // We have a cross-bank copy, which is entering a store. Let's fold it.
1266   I.getOperand(0).setReg(DefDstReg);
1267 }
1268
1269 bool AArch64InstructionSelector::earlySelect(MachineInstr &I) const {
1270   assert(I.getParent() && "Instruction should be in a basic block!");
1271   assert(I.getParent()->getParent() && "Instruction should be in a function!");
1272
1273   MachineBasicBlock &MBB = *I.getParent();
1274   MachineFunction &MF = *MBB.getParent();
1275   MachineRegisterInfo &MRI = MF.getRegInfo();
1276
1277   switch (I.getOpcode()) {
1278   case TargetOpcode::G_SHL:
1279     return earlySelectSHL(I, MRI);
1280   case TargetOpcode::G_CONSTANT: {
1281     bool IsZero = false;
1282     if (I.getOperand(1).isCImm())
1283       IsZero = I.getOperand(1).getCImm()->getZExtValue() == 0;
1284     else if (I.getOperand(1).isImm())
1285       IsZero = I.getOperand(1).getImm() == 0;
1286
1287     if (!IsZero)
1288       return false;
1289
1290     Register DefReg = I.getOperand(0).getReg();
1291     LLT Ty = MRI.getType(DefReg);
1292     if (Ty != LLT::scalar(64) && Ty != LLT::scalar(32))
1293       return false;
1294
1295     if (Ty == LLT::scalar(64)) {
1296       I.getOperand(1).ChangeToRegister(AArch64::XZR, false);
1297       RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
1298     } else {
1299       I.getOperand(1).ChangeToRegister(AArch64::WZR, false);
1300       RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass, MRI);
1301     }
1302     I.setDesc(TII.get(TargetOpcode::COPY));
1303     return true;
1304   }
1305   default:
1306     return false;
1307   }
1308 }
1309
1310 bool AArch64InstructionSelector::select(MachineInstr &I) {
1311   assert(I.getParent() && "Instruction should be in a basic block!");
1312   assert(I.getParent()->getParent() && "Instruction should be in a function!");
1313
1314   MachineBasicBlock &MBB = *I.getParent();
1315   MachineFunction &MF = *MBB.getParent();
1316   MachineRegisterInfo &MRI = MF.getRegInfo();
1317
1318   unsigned Opcode = I.getOpcode();
1319   // G_PHI requires same handling as PHI
1320   if (!isPreISelGenericOpcode(Opcode) || Opcode == TargetOpcode::G_PHI) {
1321     // Certain non-generic instructions also need some special handling.
1322
1323     if (Opcode ==  TargetOpcode::LOAD_STACK_GUARD)
1324       return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1325
1326     if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
1327       const Register DefReg = I.getOperand(0).getReg();
1328       const LLT DefTy = MRI.getType(DefReg);
1329
1330       const RegClassOrRegBank &RegClassOrBank =
1331         MRI.getRegClassOrRegBank(DefReg);
1332
1333       const TargetRegisterClass *DefRC
1334         = RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
1335       if (!DefRC) {
1336         if (!DefTy.isValid()) {
1337           LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
1338           return false;
1339         }
1340         const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
1341         DefRC = getRegClassForTypeOnBank(DefTy, RB, RBI);
1342         if (!DefRC) {
1343           LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
1344           return false;
1345         }
1346       }
1347
1348       I.setDesc(TII.get(TargetOpcode::PHI));
1349
1350       return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
1351     }
1352
1353     if (I.isCopy())
1354       return selectCopy(I, TII, MRI, TRI, RBI);
1355
1356     return true;
1357   }
1358
1359
1360   if (I.getNumOperands() != I.getNumExplicitOperands()) {
1361     LLVM_DEBUG(
1362         dbgs() << "Generic instruction has unexpected implicit operands\n");
1363     return false;
1364   }
1365
1366   // Try to do some lowering before we start instruction selecting. These
1367   // lowerings are purely transformations on the input G_MIR and so selection
1368   // must continue after any modification of the instruction.
1369   preISelLower(I);
1370
1371   // There may be patterns where the importer can't deal with them optimally,
1372   // but does select it to a suboptimal sequence so our custom C++ selection
1373   // code later never has a chance to work on it. Therefore, we have an early
1374   // selection attempt here to give priority to certain selection routines
1375   // over the imported ones.
1376   if (earlySelect(I))
1377     return true;
1378
1379   if (selectImpl(I, *CoverageInfo))
1380     return true;
1381
1382   LLT Ty =
1383       I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{};
1384
1385   MachineIRBuilder MIB(I);
1386
1387   switch (Opcode) {
1388   case TargetOpcode::G_BRCOND: {
1389     if (Ty.getSizeInBits() > 32) {
1390       // We shouldn't need this on AArch64, but it would be implemented as an
1391       // EXTRACT_SUBREG followed by a TBNZW because TBNZX has no encoding if the
1392       // bit being tested is < 32.
1393       LLVM_DEBUG(dbgs() << "G_BRCOND has type: " << Ty
1394                         << ", expected at most 32-bits");
1395       return false;
1396     }
1397
1398     const Register CondReg = I.getOperand(0).getReg();
1399     MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1400
1401     // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1402     // instructions will not be produced, as they are conditional branch
1403     // instructions that do not set flags.
1404     bool ProduceNonFlagSettingCondBr =
1405         !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
1406     if (ProduceNonFlagSettingCondBr && selectCompareBranch(I, MF, MRI))
1407       return true;
1408
1409     if (ProduceNonFlagSettingCondBr) {
1410       auto MIB = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::TBNZW))
1411                      .addUse(CondReg)
1412                      .addImm(/*bit offset=*/0)
1413                      .addMBB(DestMBB);
1414
1415       I.eraseFromParent();
1416       return constrainSelectedInstRegOperands(*MIB.getInstr(), TII, TRI, RBI);
1417     } else {
1418       auto CMP = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ANDSWri))
1419                      .addDef(AArch64::WZR)
1420                      .addUse(CondReg)
1421                      .addImm(1);
1422       constrainSelectedInstRegOperands(*CMP.getInstr(), TII, TRI, RBI);
1423       auto Bcc =
1424           BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::Bcc))
1425               .addImm(AArch64CC::EQ)
1426               .addMBB(DestMBB);
1427
1428       I.eraseFromParent();
1429       return constrainSelectedInstRegOperands(*Bcc.getInstr(), TII, TRI, RBI);
1430     }
1431   }
1432
1433   case TargetOpcode::G_BRINDIRECT: {
1434     I.setDesc(TII.get(AArch64::BR));
1435     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1436   }
1437
1438   case TargetOpcode::G_BRJT:
1439     return selectBrJT(I, MRI);
1440
1441   case TargetOpcode::G_BSWAP: {
1442     // Handle vector types for G_BSWAP directly.
1443     Register DstReg = I.getOperand(0).getReg();
1444     LLT DstTy = MRI.getType(DstReg);
1445
1446     // We should only get vector types here; everything else is handled by the
1447     // importer right now.
1448     if (!DstTy.isVector() || DstTy.getSizeInBits() > 128) {
1449       LLVM_DEBUG(dbgs() << "Dst type for G_BSWAP currently unsupported.\n");
1450       return false;
1451     }
1452
1453     // Only handle 4 and 2 element vectors for now.
1454     // TODO: 16-bit elements.
1455     unsigned NumElts = DstTy.getNumElements();
1456     if (NumElts != 4 && NumElts != 2) {
1457       LLVM_DEBUG(dbgs() << "Unsupported number of elements for G_BSWAP.\n");
1458       return false;
1459     }
1460
1461     // Choose the correct opcode for the supported types. Right now, that's
1462     // v2s32, v4s32, and v2s64.
1463     unsigned Opc = 0;
1464     unsigned EltSize = DstTy.getElementType().getSizeInBits();
1465     if (EltSize == 32)
1466       Opc = (DstTy.getNumElements() == 2) ? AArch64::REV32v8i8
1467                                           : AArch64::REV32v16i8;
1468     else if (EltSize == 64)
1469       Opc = AArch64::REV64v16i8;
1470
1471     // We should always get something by the time we get here...
1472     assert(Opc != 0 && "Didn't get an opcode for G_BSWAP?");
1473
1474     I.setDesc(TII.get(Opc));
1475     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1476   }
1477
1478   case TargetOpcode::G_FCONSTANT:
1479   case TargetOpcode::G_CONSTANT: {
1480     const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
1481
1482     const LLT s8 = LLT::scalar(8);
1483     const LLT s16 = LLT::scalar(16);
1484     const LLT s32 = LLT::scalar(32);
1485     const LLT s64 = LLT::scalar(64);
1486     const LLT p0 = LLT::pointer(0, 64);
1487
1488     const Register DefReg = I.getOperand(0).getReg();
1489     const LLT DefTy = MRI.getType(DefReg);
1490     const unsigned DefSize = DefTy.getSizeInBits();
1491     const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
1492
1493     // FIXME: Redundant check, but even less readable when factored out.
1494     if (isFP) {
1495       if (Ty != s32 && Ty != s64) {
1496         LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
1497                           << " constant, expected: " << s32 << " or " << s64
1498                           << '\n');
1499         return false;
1500       }
1501
1502       if (RB.getID() != AArch64::FPRRegBankID) {
1503         LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
1504                           << " constant on bank: " << RB
1505                           << ", expected: FPR\n");
1506         return false;
1507       }
1508
1509       // The case when we have 0.0 is covered by tablegen. Reject it here so we
1510       // can be sure tablegen works correctly and isn't rescued by this code.
1511       if (I.getOperand(1).getFPImm()->getValueAPF().isExactlyValue(0.0))
1512         return false;
1513     } else {
1514       // s32 and s64 are covered by tablegen.
1515       if (Ty != p0 && Ty != s8 && Ty != s16) {
1516         LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
1517                           << " constant, expected: " << s32 << ", " << s64
1518                           << ", or " << p0 << '\n');
1519         return false;
1520       }
1521
1522       if (RB.getID() != AArch64::GPRRegBankID) {
1523         LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
1524                           << " constant on bank: " << RB
1525                           << ", expected: GPR\n");
1526         return false;
1527       }
1528     }
1529
1530     // We allow G_CONSTANT of types < 32b.
1531     const unsigned MovOpc =
1532         DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
1533
1534     if (isFP) {
1535       // Either emit a FMOV, or emit a copy to emit a normal mov.
1536       const TargetRegisterClass &GPRRC =
1537           DefSize == 32 ? AArch64::GPR32RegClass : AArch64::GPR64RegClass;
1538       const TargetRegisterClass &FPRRC =
1539           DefSize == 32 ? AArch64::FPR32RegClass : AArch64::FPR64RegClass;
1540
1541       // Can we use a FMOV instruction to represent the immediate?
1542       if (emitFMovForFConstant(I, MRI))
1543         return true;
1544
1545       // Nope. Emit a copy and use a normal mov instead.
1546       const Register DefGPRReg = MRI.createVirtualRegister(&GPRRC);
1547       MachineOperand &RegOp = I.getOperand(0);
1548       RegOp.setReg(DefGPRReg);
1549       MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
1550       MIB.buildCopy({DefReg}, {DefGPRReg});
1551
1552       if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {
1553         LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n");
1554         return false;
1555       }
1556
1557       MachineOperand &ImmOp = I.getOperand(1);
1558       // FIXME: Is going through int64_t always correct?
1559       ImmOp.ChangeToImmediate(
1560           ImmOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
1561     } else if (I.getOperand(1).isCImm()) {
1562       uint64_t Val = I.getOperand(1).getCImm()->getZExtValue();
1563       I.getOperand(1).ChangeToImmediate(Val);
1564     } else if (I.getOperand(1).isImm()) {
1565       uint64_t Val = I.getOperand(1).getImm();
1566       I.getOperand(1).ChangeToImmediate(Val);
1567     }
1568
1569     I.setDesc(TII.get(MovOpc));
1570     constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1571     return true;
1572   }
1573   case TargetOpcode::G_EXTRACT: {
1574     Register DstReg = I.getOperand(0).getReg();
1575     Register SrcReg = I.getOperand(1).getReg();
1576     LLT SrcTy = MRI.getType(SrcReg);
1577     LLT DstTy = MRI.getType(DstReg);
1578     (void)DstTy;
1579     unsigned SrcSize = SrcTy.getSizeInBits();
1580
1581     if (SrcTy.getSizeInBits() > 64) {
1582       // This should be an extract of an s128, which is like a vector extract.
1583       if (SrcTy.getSizeInBits() != 128)
1584         return false;
1585       // Only support extracting 64 bits from an s128 at the moment.
1586       if (DstTy.getSizeInBits() != 64)
1587         return false;
1588
1589       const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
1590       const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
1591       // Check we have the right regbank always.
1592       assert(SrcRB.getID() == AArch64::FPRRegBankID &&
1593              DstRB.getID() == AArch64::FPRRegBankID &&
1594              "Wrong extract regbank!");
1595       (void)SrcRB;
1596
1597       // Emit the same code as a vector extract.
1598       // Offset must be a multiple of 64.
1599       unsigned Offset = I.getOperand(2).getImm();
1600       if (Offset % 64 != 0)
1601         return false;
1602       unsigned LaneIdx = Offset / 64;
1603       MachineIRBuilder MIB(I);
1604       MachineInstr *Extract = emitExtractVectorElt(
1605           DstReg, DstRB, LLT::scalar(64), SrcReg, LaneIdx, MIB);
1606       if (!Extract)
1607         return false;
1608       I.eraseFromParent();
1609       return true;
1610     }
1611
1612     I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
1613     MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() +
1614                                       Ty.getSizeInBits() - 1);
1615
1616     if (SrcSize < 64) {
1617       assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 &&
1618              "unexpected G_EXTRACT types");
1619       return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1620     }
1621
1622     DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
1623     MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
1624     MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
1625         .addReg(DstReg, 0, AArch64::sub_32);
1626     RBI.constrainGenericRegister(I.getOperand(0).getReg(),
1627                                  AArch64::GPR32RegClass, MRI);
1628     I.getOperand(0).setReg(DstReg);
1629
1630     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1631   }
1632
1633   case TargetOpcode::G_INSERT: {
1634     LLT SrcTy = MRI.getType(I.getOperand(2).getReg());
1635     LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1636     unsigned DstSize = DstTy.getSizeInBits();
1637     // Larger inserts are vectors, same-size ones should be something else by
1638     // now (split up or turned into COPYs).
1639     if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32)
1640       return false;
1641
1642     I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
1643     unsigned LSB = I.getOperand(3).getImm();
1644     unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
1645     I.getOperand(3).setImm((DstSize - LSB) % DstSize);
1646     MachineInstrBuilder(MF, I).addImm(Width - 1);
1647
1648     if (DstSize < 64) {
1649       assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 &&
1650              "unexpected G_INSERT types");
1651       return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1652     }
1653
1654     Register SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
1655     BuildMI(MBB, I.getIterator(), I.getDebugLoc(),
1656             TII.get(AArch64::SUBREG_TO_REG))
1657         .addDef(SrcReg)
1658         .addImm(0)
1659         .addUse(I.getOperand(2).getReg())
1660         .addImm(AArch64::sub_32);
1661     RBI.constrainGenericRegister(I.getOperand(2).getReg(),
1662                                  AArch64::GPR32RegClass, MRI);
1663     I.getOperand(2).setReg(SrcReg);
1664
1665     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1666   }
1667   case TargetOpcode::G_FRAME_INDEX: {
1668     // allocas and G_FRAME_INDEX are only supported in addrspace(0).
1669     if (Ty != LLT::pointer(0, 64)) {
1670       LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Ty
1671                         << ", expected: " << LLT::pointer(0, 64) << '\n');
1672       return false;
1673     }
1674     I.setDesc(TII.get(AArch64::ADDXri));
1675
1676     // MOs for a #0 shifted immediate.
1677     I.addOperand(MachineOperand::CreateImm(0));
1678     I.addOperand(MachineOperand::CreateImm(0));
1679
1680     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1681   }
1682
1683   case TargetOpcode::G_GLOBAL_VALUE: {
1684     auto GV = I.getOperand(1).getGlobal();
1685     if (GV->isThreadLocal())
1686       return selectTLSGlobalValue(I, MRI);
1687
1688     unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM);
1689     if (OpFlags & AArch64II::MO_GOT) {
1690       I.setDesc(TII.get(AArch64::LOADgot));
1691       I.getOperand(1).setTargetFlags(OpFlags);
1692     } else if (TM.getCodeModel() == CodeModel::Large) {
1693       // Materialize the global using movz/movk instructions.
1694       materializeLargeCMVal(I, GV, OpFlags);
1695       I.eraseFromParent();
1696       return true;
1697     } else if (TM.getCodeModel() == CodeModel::Tiny) {
1698       I.setDesc(TII.get(AArch64::ADR));
1699       I.getOperand(1).setTargetFlags(OpFlags);
1700     } else {
1701       I.setDesc(TII.get(AArch64::MOVaddr));
1702       I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE);
1703       MachineInstrBuilder MIB(MF, I);
1704       MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(),
1705                            OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
1706     }
1707     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1708   }
1709
1710   case TargetOpcode::G_ZEXTLOAD:
1711   case TargetOpcode::G_LOAD:
1712   case TargetOpcode::G_STORE: {
1713     bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
1714     MachineIRBuilder MIB(I);
1715
1716     LLT PtrTy = MRI.getType(I.getOperand(1).getReg());
1717
1718     if (PtrTy != LLT::pointer(0, 64)) {
1719       LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTy
1720                         << ", expected: " << LLT::pointer(0, 64) << '\n');
1721       return false;
1722     }
1723
1724     auto &MemOp = **I.memoperands_begin();
1725     if (MemOp.isAtomic()) {
1726       // For now we just support s8 acquire loads to be able to compile stack
1727       // protector code.
1728       if (MemOp.getOrdering() == AtomicOrdering::Acquire &&
1729           MemOp.getSize() == 1) {
1730         I.setDesc(TII.get(AArch64::LDARB));
1731         return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1732       }
1733       LLVM_DEBUG(dbgs() << "Atomic load/store not fully supported yet\n");
1734       return false;
1735     }
1736     unsigned MemSizeInBits = MemOp.getSize() * 8;
1737
1738     const Register PtrReg = I.getOperand(1).getReg();
1739 #ifndef NDEBUG
1740     const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
1741     // Sanity-check the pointer register.
1742     assert(PtrRB.getID() == AArch64::GPRRegBankID &&
1743            "Load/Store pointer operand isn't a GPR");
1744     assert(MRI.getType(PtrReg).isPointer() &&
1745            "Load/Store pointer operand isn't a pointer");
1746 #endif
1747
1748     const Register ValReg = I.getOperand(0).getReg();
1749     const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
1750
1751     const unsigned NewOpc =
1752         selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
1753     if (NewOpc == I.getOpcode())
1754       return false;
1755
1756     I.setDesc(TII.get(NewOpc));
1757
1758     uint64_t Offset = 0;
1759     auto *PtrMI = MRI.getVRegDef(PtrReg);
1760
1761     // Try to fold a GEP into our unsigned immediate addressing mode.
1762     if (PtrMI->getOpcode() == TargetOpcode::G_GEP) {
1763       if (auto COff = getConstantVRegVal(PtrMI->getOperand(2).getReg(), MRI)) {
1764         int64_t Imm = *COff;
1765         const unsigned Size = MemSizeInBits / 8;
1766         const unsigned Scale = Log2_32(Size);
1767         if ((Imm & (Size - 1)) == 0 && Imm >= 0 && Imm < (0x1000 << Scale)) {
1768           Register Ptr2Reg = PtrMI->getOperand(1).getReg();
1769           I.getOperand(1).setReg(Ptr2Reg);
1770           PtrMI = MRI.getVRegDef(Ptr2Reg);
1771           Offset = Imm / Size;
1772         }
1773       }
1774     }
1775
1776     // If we haven't folded anything into our addressing mode yet, try to fold
1777     // a frame index into the base+offset.
1778     if (!Offset && PtrMI->getOpcode() == TargetOpcode::G_FRAME_INDEX)
1779       I.getOperand(1).ChangeToFrameIndex(PtrMI->getOperand(1).getIndex());
1780
1781     I.addOperand(MachineOperand::CreateImm(Offset));
1782
1783     // If we're storing a 0, use WZR/XZR.
1784     if (auto CVal = getConstantVRegVal(ValReg, MRI)) {
1785       if (*CVal == 0 && Opcode == TargetOpcode::G_STORE) {
1786         if (I.getOpcode() == AArch64::STRWui)
1787           I.getOperand(0).setReg(AArch64::WZR);
1788         else if (I.getOpcode() == AArch64::STRXui)
1789           I.getOperand(0).setReg(AArch64::XZR);
1790       }
1791     }
1792
1793     if (IsZExtLoad) {
1794       // The zextload from a smaller type to i32 should be handled by the importer.
1795       if (MRI.getType(ValReg).getSizeInBits() != 64)
1796         return false;
1797       // If we have a ZEXTLOAD then change the load's type to be a narrower reg
1798       //and zero_extend with SUBREG_TO_REG.
1799       Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
1800       Register DstReg = I.getOperand(0).getReg();
1801       I.getOperand(0).setReg(LdReg);
1802
1803       MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
1804       MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
1805           .addImm(0)
1806           .addUse(LdReg)
1807           .addImm(AArch64::sub_32);
1808       constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1809       return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
1810                                           MRI);
1811     }
1812     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1813   }
1814
1815   case TargetOpcode::G_SMULH:
1816   case TargetOpcode::G_UMULH: {
1817     // Reject the various things we don't support yet.
1818     if (unsupportedBinOp(I, RBI, MRI, TRI))
1819       return false;
1820
1821     const Register DefReg = I.getOperand(0).getReg();
1822     const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
1823
1824     if (RB.getID() != AArch64::GPRRegBankID) {
1825       LLVM_DEBUG(dbgs() << "G_[SU]MULH on bank: " << RB << ", expected: GPR\n");
1826       return false;
1827     }
1828
1829     if (Ty != LLT::scalar(64)) {
1830       LLVM_DEBUG(dbgs() << "G_[SU]MULH has type: " << Ty
1831                         << ", expected: " << LLT::scalar(64) << '\n');
1832       return false;
1833     }
1834
1835     unsigned NewOpc = I.getOpcode() == TargetOpcode::G_SMULH ? AArch64::SMULHrr
1836                                                              : AArch64::UMULHrr;
1837     I.setDesc(TII.get(NewOpc));
1838
1839     // Now that we selected an opcode, we need to constrain the register
1840     // operands to use appropriate classes.
1841     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1842   }
1843   case TargetOpcode::G_FADD:
1844   case TargetOpcode::G_FSUB:
1845   case TargetOpcode::G_FMUL:
1846   case TargetOpcode::G_FDIV:
1847
1848   case TargetOpcode::G_ASHR:
1849     if (MRI.getType(I.getOperand(0).getReg()).isVector())
1850       return selectVectorASHR(I, MRI);
1851     LLVM_FALLTHROUGH;
1852   case TargetOpcode::G_SHL:
1853     if (Opcode == TargetOpcode::G_SHL &&
1854         MRI.getType(I.getOperand(0).getReg()).isVector())
1855       return selectVectorSHL(I, MRI);
1856     LLVM_FALLTHROUGH;
1857   case TargetOpcode::G_OR:
1858   case TargetOpcode::G_LSHR: {
1859     // Reject the various things we don't support yet.
1860     if (unsupportedBinOp(I, RBI, MRI, TRI))
1861       return false;
1862
1863     const unsigned OpSize = Ty.getSizeInBits();
1864
1865     const Register DefReg = I.getOperand(0).getReg();
1866     const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
1867
1868     const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize);
1869     if (NewOpc == I.getOpcode())
1870       return false;
1871
1872     I.setDesc(TII.get(NewOpc));
1873     // FIXME: Should the type be always reset in setDesc?
1874
1875     // Now that we selected an opcode, we need to constrain the register
1876     // operands to use appropriate classes.
1877     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1878   }
1879
1880   case TargetOpcode::G_GEP: {
1881     MachineIRBuilder MIRBuilder(I);
1882     emitADD(I.getOperand(0).getReg(), I.getOperand(1), I.getOperand(2),
1883             MIRBuilder);
1884     I.eraseFromParent();
1885     return true;
1886   }
1887   case TargetOpcode::G_UADDO: {
1888     // TODO: Support other types.
1889     unsigned OpSize = Ty.getSizeInBits();
1890     if (OpSize != 32 && OpSize != 64) {
1891       LLVM_DEBUG(
1892           dbgs()
1893           << "G_UADDO currently only supported for 32 and 64 b types.\n");
1894       return false;
1895     }
1896
1897     // TODO: Support vectors.
1898     if (Ty.isVector()) {
1899       LLVM_DEBUG(dbgs() << "G_UADDO currently only supported for scalars.\n");
1900       return false;
1901     }
1902
1903     // Add and set the set condition flag.
1904     unsigned AddsOpc = OpSize == 32 ? AArch64::ADDSWrr : AArch64::ADDSXrr;
1905     MachineIRBuilder MIRBuilder(I);
1906     auto AddsMI = MIRBuilder.buildInstr(
1907         AddsOpc, {I.getOperand(0).getReg()},
1908         {I.getOperand(2).getReg(), I.getOperand(3).getReg()});
1909     constrainSelectedInstRegOperands(*AddsMI, TII, TRI, RBI);
1910
1911     // Now, put the overflow result in the register given by the first operand
1912     // to the G_UADDO. CSINC increments the result when the predicate is false,
1913     // so to get the increment when it's true, we need to use the inverse. In
1914     // this case, we want to increment when carry is set.
1915     auto CsetMI = MIRBuilder
1916                       .buildInstr(AArch64::CSINCWr, {I.getOperand(1).getReg()},
1917                                   {Register(AArch64::WZR), Register(AArch64::WZR)})
1918                       .addImm(getInvertedCondCode(AArch64CC::HS));
1919     constrainSelectedInstRegOperands(*CsetMI, TII, TRI, RBI);
1920     I.eraseFromParent();
1921     return true;
1922   }
1923
1924   case TargetOpcode::G_PTR_MASK: {
1925     uint64_t Align = I.getOperand(2).getImm();
1926     if (Align >= 64 || Align == 0)
1927       return false;
1928
1929     uint64_t Mask = ~((1ULL << Align) - 1);
1930     I.setDesc(TII.get(AArch64::ANDXri));
1931     I.getOperand(2).setImm(AArch64_AM::encodeLogicalImmediate(Mask, 64));
1932
1933     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1934   }
1935   case TargetOpcode::G_PTRTOINT:
1936   case TargetOpcode::G_TRUNC: {
1937     const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1938     const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
1939
1940     const Register DstReg = I.getOperand(0).getReg();
1941     const Register SrcReg = I.getOperand(1).getReg();
1942
1943     const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
1944     const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
1945
1946     if (DstRB.getID() != SrcRB.getID()) {
1947       LLVM_DEBUG(
1948           dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n");
1949       return false;
1950     }
1951
1952     if (DstRB.getID() == AArch64::GPRRegBankID) {
1953       const TargetRegisterClass *DstRC =
1954           getRegClassForTypeOnBank(DstTy, DstRB, RBI);
1955       if (!DstRC)
1956         return false;
1957
1958       const TargetRegisterClass *SrcRC =
1959           getRegClassForTypeOnBank(SrcTy, SrcRB, RBI);
1960       if (!SrcRC)
1961         return false;
1962
1963       if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
1964           !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
1965         LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n");
1966         return false;
1967       }
1968
1969       if (DstRC == SrcRC) {
1970         // Nothing to be done
1971       } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) &&
1972                  SrcTy == LLT::scalar(64)) {
1973         llvm_unreachable("TableGen can import this case");
1974         return false;
1975       } else if (DstRC == &AArch64::GPR32RegClass &&
1976                  SrcRC == &AArch64::GPR64RegClass) {
1977         I.getOperand(1).setSubReg(AArch64::sub_32);
1978       } else {
1979         LLVM_DEBUG(
1980             dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n");
1981         return false;
1982       }
1983
1984       I.setDesc(TII.get(TargetOpcode::COPY));
1985       return true;
1986     } else if (DstRB.getID() == AArch64::FPRRegBankID) {
1987       if (DstTy == LLT::vector(4, 16) && SrcTy == LLT::vector(4, 32)) {
1988         I.setDesc(TII.get(AArch64::XTNv4i16));
1989         constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1990         return true;
1991       }
1992
1993       if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) {
1994         MachineIRBuilder MIB(I);
1995         MachineInstr *Extract = emitExtractVectorElt(
1996             DstReg, DstRB, LLT::scalar(DstTy.getSizeInBits()), SrcReg, 0, MIB);
1997         if (!Extract)
1998           return false;
1999         I.eraseFromParent();
2000         return true;
2001       }
2002     }
2003
2004     return false;
2005   }
2006
2007   case TargetOpcode::G_ANYEXT: {
2008     const Register DstReg = I.getOperand(0).getReg();
2009     const Register SrcReg = I.getOperand(1).getReg();
2010
2011     const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI);
2012     if (RBDst.getID() != AArch64::GPRRegBankID) {
2013       LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDst
2014                         << ", expected: GPR\n");
2015       return false;
2016     }
2017
2018     const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI);
2019     if (RBSrc.getID() != AArch64::GPRRegBankID) {
2020       LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrc
2021                         << ", expected: GPR\n");
2022       return false;
2023     }
2024
2025     const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
2026
2027     if (DstSize == 0) {
2028       LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n");
2029       return false;
2030     }
2031
2032     if (DstSize != 64 && DstSize > 32) {
2033       LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSize
2034                         << ", expected: 32 or 64\n");
2035       return false;
2036     }
2037     // At this point G_ANYEXT is just like a plain COPY, but we need
2038     // to explicitly form the 64-bit value if any.
2039     if (DstSize > 32) {
2040       Register ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
2041       BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
2042           .addDef(ExtSrc)
2043           .addImm(0)
2044           .addUse(SrcReg)
2045           .addImm(AArch64::sub_32);
2046       I.getOperand(1).setReg(ExtSrc);
2047     }
2048     return selectCopy(I, TII, MRI, TRI, RBI);
2049   }
2050
2051   case TargetOpcode::G_ZEXT:
2052   case TargetOpcode::G_SEXT: {
2053     unsigned Opcode = I.getOpcode();
2054     const bool IsSigned = Opcode == TargetOpcode::G_SEXT;
2055     const Register DefReg = I.getOperand(0).getReg();
2056     const Register SrcReg = I.getOperand(1).getReg();
2057     const LLT DstTy = MRI.getType(DefReg);
2058     const LLT SrcTy = MRI.getType(SrcReg);
2059     unsigned DstSize = DstTy.getSizeInBits();
2060     unsigned SrcSize = SrcTy.getSizeInBits();
2061
2062     assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() ==
2063                AArch64::GPRRegBankID &&
2064            "Unexpected ext regbank");
2065
2066     MachineIRBuilder MIB(I);
2067     MachineInstr *ExtI;
2068     if (DstTy.isVector())
2069       return false; // Should be handled by imported patterns.
2070
2071     // First check if we're extending the result of a load which has a dest type
2072     // smaller than 32 bits, then this zext is redundant. GPR32 is the smallest
2073     // GPR register on AArch64 and all loads which are smaller automatically
2074     // zero-extend the upper bits. E.g.
2075     // %v(s8) = G_LOAD %p, :: (load 1)
2076     // %v2(s32) = G_ZEXT %v(s8)
2077     if (!IsSigned) {
2078       auto *LoadMI = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI);
2079       if (LoadMI &&
2080           RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID) {
2081         const MachineMemOperand *MemOp = *LoadMI->memoperands_begin();
2082         unsigned BytesLoaded = MemOp->getSize();
2083         if (BytesLoaded < 4 && SrcTy.getSizeInBytes() == BytesLoaded)
2084           return selectCopy(I, TII, MRI, TRI, RBI);
2085       }
2086     }
2087
2088     if (DstSize == 64) {
2089       // FIXME: Can we avoid manually doing this?
2090       if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass, MRI)) {
2091         LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)
2092                           << " operand\n");
2093         return false;
2094       }
2095
2096       auto SubregToReg =
2097           MIB.buildInstr(AArch64::SUBREG_TO_REG, {&AArch64::GPR64RegClass}, {})
2098               .addImm(0)
2099               .addUse(SrcReg)
2100               .addImm(AArch64::sub_32);
2101
2102       ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri,
2103                              {DefReg}, {SubregToReg})
2104                   .addImm(0)
2105                   .addImm(SrcSize - 1);
2106     } else if (DstSize <= 32) {
2107       ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri,
2108                              {DefReg}, {SrcReg})
2109                   .addImm(0)
2110                   .addImm(SrcSize - 1);
2111     } else {
2112       return false;
2113     }
2114
2115     constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
2116     I.eraseFromParent();
2117     return true;
2118   }
2119
2120   case TargetOpcode::G_SITOFP:
2121   case TargetOpcode::G_UITOFP:
2122   case TargetOpcode::G_FPTOSI:
2123   case TargetOpcode::G_FPTOUI: {
2124     const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
2125               SrcTy = MRI.getType(I.getOperand(1).getReg());
2126     const unsigned NewOpc = selectFPConvOpc(Opcode, DstTy, SrcTy);
2127     if (NewOpc == Opcode)
2128       return false;
2129
2130     I.setDesc(TII.get(NewOpc));
2131     constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2132
2133     return true;
2134   }
2135
2136
2137   case TargetOpcode::G_INTTOPTR:
2138     // The importer is currently unable to import pointer types since they
2139     // didn't exist in SelectionDAG.
2140     return selectCopy(I, TII, MRI, TRI, RBI);
2141
2142   case TargetOpcode::G_BITCAST:
2143     // Imported SelectionDAG rules can handle every bitcast except those that
2144     // bitcast from a type to the same type. Ideally, these shouldn't occur
2145     // but we might not run an optimizer that deletes them. The other exception
2146     // is bitcasts involving pointer types, as SelectionDAG has no knowledge
2147     // of them.
2148     return selectCopy(I, TII, MRI, TRI, RBI);
2149
2150   case TargetOpcode::G_SELECT: {
2151     if (MRI.getType(I.getOperand(1).getReg()) != LLT::scalar(1)) {
2152       LLVM_DEBUG(dbgs() << "G_SELECT cond has type: " << Ty
2153                         << ", expected: " << LLT::scalar(1) << '\n');
2154       return false;
2155     }
2156
2157     const Register CondReg = I.getOperand(1).getReg();
2158     const Register TReg = I.getOperand(2).getReg();
2159     const Register FReg = I.getOperand(3).getReg();
2160
2161     if (tryOptSelect(I))
2162       return true;
2163
2164     Register CSelOpc = selectSelectOpc(I, MRI, RBI);
2165     MachineInstr &TstMI =
2166         *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ANDSWri))
2167              .addDef(AArch64::WZR)
2168              .addUse(CondReg)
2169              .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
2170
2171     MachineInstr &CSelMI = *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CSelOpc))
2172                                 .addDef(I.getOperand(0).getReg())
2173                                 .addUse(TReg)
2174                                 .addUse(FReg)
2175                                 .addImm(AArch64CC::NE);
2176
2177     constrainSelectedInstRegOperands(TstMI, TII, TRI, RBI);
2178     constrainSelectedInstRegOperands(CSelMI, TII, TRI, RBI);
2179
2180     I.eraseFromParent();
2181     return true;
2182   }
2183   case TargetOpcode::G_ICMP: {
2184     if (Ty.isVector())
2185       return selectVectorICmp(I, MRI);
2186
2187     if (Ty != LLT::scalar(32)) {
2188       LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Ty
2189                         << ", expected: " << LLT::scalar(32) << '\n');
2190       return false;
2191     }
2192
2193     MachineIRBuilder MIRBuilder(I);
2194     if (!emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1),
2195                             MIRBuilder))
2196       return false;
2197     emitCSetForICMP(I.getOperand(0).getReg(), I.getOperand(1).getPredicate(),
2198                     MIRBuilder);
2199     I.eraseFromParent();
2200     return true;
2201   }
2202
2203   case TargetOpcode::G_FCMP: {
2204     if (Ty != LLT::scalar(32)) {
2205       LLVM_DEBUG(dbgs() << "G_FCMP result has type: " << Ty
2206                         << ", expected: " << LLT::scalar(32) << '\n');
2207       return false;
2208     }
2209
2210     unsigned CmpOpc = selectFCMPOpc(I, MRI);
2211     if (!CmpOpc)
2212       return false;
2213
2214     // FIXME: regbank
2215
2216     AArch64CC::CondCode CC1, CC2;
2217     changeFCMPPredToAArch64CC(
2218         (CmpInst::Predicate)I.getOperand(1).getPredicate(), CC1, CC2);
2219
2220     // Partially build the compare. Decide if we need to add a use for the
2221     // third operand based off whether or not we're comparing against 0.0.
2222     auto CmpMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(CmpOpc))
2223                      .addUse(I.getOperand(2).getReg());
2224
2225     // If we don't have an immediate compare, then we need to add a use of the
2226     // register which wasn't used for the immediate.
2227     // Note that the immediate will always be the last operand.
2228     if (CmpOpc != AArch64::FCMPSri && CmpOpc != AArch64::FCMPDri)
2229       CmpMI = CmpMI.addUse(I.getOperand(3).getReg());
2230
2231     const Register DefReg = I.getOperand(0).getReg();
2232     Register Def1Reg = DefReg;
2233     if (CC2 != AArch64CC::AL)
2234       Def1Reg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2235
2236     MachineInstr &CSetMI =
2237         *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr))
2238              .addDef(Def1Reg)
2239              .addUse(AArch64::WZR)
2240              .addUse(AArch64::WZR)
2241              .addImm(getInvertedCondCode(CC1));
2242
2243     if (CC2 != AArch64CC::AL) {
2244       Register Def2Reg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2245       MachineInstr &CSet2MI =
2246           *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr))
2247                .addDef(Def2Reg)
2248                .addUse(AArch64::WZR)
2249                .addUse(AArch64::WZR)
2250                .addImm(getInvertedCondCode(CC2));
2251       MachineInstr &OrMI =
2252           *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ORRWrr))
2253                .addDef(DefReg)
2254                .addUse(Def1Reg)
2255                .addUse(Def2Reg);
2256       constrainSelectedInstRegOperands(OrMI, TII, TRI, RBI);
2257       constrainSelectedInstRegOperands(CSet2MI, TII, TRI, RBI);
2258     }
2259     constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
2260     constrainSelectedInstRegOperands(CSetMI, TII, TRI, RBI);
2261
2262     I.eraseFromParent();
2263     return true;
2264   }
2265   case TargetOpcode::G_VASTART:
2266     return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
2267                                 : selectVaStartAAPCS(I, MF, MRI);
2268   case TargetOpcode::G_INTRINSIC:
2269     return selectIntrinsic(I, MRI);
2270   case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
2271     return selectIntrinsicWithSideEffects(I, MRI);
2272   case TargetOpcode::G_IMPLICIT_DEF: {
2273     I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
2274     const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2275     const Register DstReg = I.getOperand(0).getReg();
2276     const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2277     const TargetRegisterClass *DstRC =
2278         getRegClassForTypeOnBank(DstTy, DstRB, RBI);
2279     RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
2280     return true;
2281   }
2282   case TargetOpcode::G_BLOCK_ADDR: {
2283     if (TM.getCodeModel() == CodeModel::Large) {
2284       materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0);
2285       I.eraseFromParent();
2286       return true;
2287     } else {
2288       I.setDesc(TII.get(AArch64::MOVaddrBA));
2289       auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA),
2290                            I.getOperand(0).getReg())
2291                        .addBlockAddress(I.getOperand(1).getBlockAddress(),
2292                                         /* Offset */ 0, AArch64II::MO_PAGE)
2293                        .addBlockAddress(
2294                            I.getOperand(1).getBlockAddress(), /* Offset */ 0,
2295                            AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
2296       I.eraseFromParent();
2297       return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
2298     }
2299   }
2300   case TargetOpcode::G_INTRINSIC_TRUNC:
2301     return selectIntrinsicTrunc(I, MRI);
2302   case TargetOpcode::G_INTRINSIC_ROUND:
2303     return selectIntrinsicRound(I, MRI);
2304   case TargetOpcode::G_BUILD_VECTOR:
2305     return selectBuildVector(I, MRI);
2306   case TargetOpcode::G_MERGE_VALUES:
2307     return selectMergeValues(I, MRI);
2308   case TargetOpcode::G_UNMERGE_VALUES:
2309     return selectUnmergeValues(I, MRI);
2310   case TargetOpcode::G_SHUFFLE_VECTOR:
2311     return selectShuffleVector(I, MRI);
2312   case TargetOpcode::G_EXTRACT_VECTOR_ELT:
2313     return selectExtractElt(I, MRI);
2314   case TargetOpcode::G_INSERT_VECTOR_ELT:
2315     return selectInsertElt(I, MRI);
2316   case TargetOpcode::G_CONCAT_VECTORS:
2317     return selectConcatVectors(I, MRI);
2318   case TargetOpcode::G_JUMP_TABLE:
2319     return selectJumpTable(I, MRI);
2320   }
2321
2322   return false;
2323 }
2324
2325 bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
2326                                             MachineRegisterInfo &MRI) const {
2327   assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT");
2328   Register JTAddr = I.getOperand(0).getReg();
2329   unsigned JTI = I.getOperand(1).getIndex();
2330   Register Index = I.getOperand(2).getReg();
2331   MachineIRBuilder MIB(I);
2332
2333   Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2334   Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
2335   MIB.buildInstr(AArch64::JumpTableDest32, {TargetReg, ScratchReg},
2336                  {JTAddr, Index})
2337       .addJumpTableIndex(JTI);
2338
2339   // Build the indirect branch.
2340   MIB.buildInstr(AArch64::BR, {}, {TargetReg});
2341   I.eraseFromParent();
2342   return true;
2343 }
2344
2345 bool AArch64InstructionSelector::selectJumpTable(
2346     MachineInstr &I, MachineRegisterInfo &MRI) const {
2347   assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table");
2348   assert(I.getOperand(1).isJTI() && "Jump table op should have a JTI!");
2349
2350   Register DstReg = I.getOperand(0).getReg();
2351   unsigned JTI = I.getOperand(1).getIndex();
2352   // We generate a MOVaddrJT which will get expanded to an ADRP + ADD later.
2353   MachineIRBuilder MIB(I);
2354   auto MovMI =
2355     MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {})
2356           .addJumpTableIndex(JTI, AArch64II::MO_PAGE)
2357           .addJumpTableIndex(JTI, AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
2358   I.eraseFromParent();
2359   return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
2360 }
2361
2362 bool AArch64InstructionSelector::selectTLSGlobalValue(
2363     MachineInstr &I, MachineRegisterInfo &MRI) const {
2364   if (!STI.isTargetMachO())
2365     return false;
2366   MachineFunction &MF = *I.getParent()->getParent();
2367   MF.getFrameInfo().setAdjustsStack(true);
2368
2369   const GlobalValue &GV = *I.getOperand(1).getGlobal();
2370   MachineIRBuilder MIB(I);
2371
2372   MIB.buildInstr(AArch64::LOADgot, {AArch64::X0}, {})
2373       .addGlobalAddress(&GV, 0, AArch64II::MO_TLS);
2374
2375   auto Load = MIB.buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass},
2376                              {Register(AArch64::X0)})
2377                   .addImm(0);
2378
2379   // TLS calls preserve all registers except those that absolutely must be
2380   // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
2381   // silly).
2382   MIB.buildInstr(AArch64::BLR, {}, {Load})
2383       .addDef(AArch64::X0, RegState::Implicit)
2384       .addRegMask(TRI.getTLSCallPreservedMask());
2385
2386   MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X0));
2387   RBI.constrainGenericRegister(I.getOperand(0).getReg(), AArch64::GPR64RegClass,
2388                                MRI);
2389   I.eraseFromParent();
2390   return true;
2391 }
2392
2393 bool AArch64InstructionSelector::selectIntrinsicTrunc(
2394     MachineInstr &I, MachineRegisterInfo &MRI) const {
2395   const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
2396
2397   // Select the correct opcode.
2398   unsigned Opc = 0;
2399   if (!SrcTy.isVector()) {
2400     switch (SrcTy.getSizeInBits()) {
2401     default:
2402     case 16:
2403       Opc = AArch64::FRINTZHr;
2404       break;
2405     case 32:
2406       Opc = AArch64::FRINTZSr;
2407       break;
2408     case 64:
2409       Opc = AArch64::FRINTZDr;
2410       break;
2411     }
2412   } else {
2413     unsigned NumElts = SrcTy.getNumElements();
2414     switch (SrcTy.getElementType().getSizeInBits()) {
2415     default:
2416       break;
2417     case 16:
2418       if (NumElts == 4)
2419         Opc = AArch64::FRINTZv4f16;
2420       else if (NumElts == 8)
2421         Opc = AArch64::FRINTZv8f16;
2422       break;
2423     case 32:
2424       if (NumElts == 2)
2425         Opc = AArch64::FRINTZv2f32;
2426       else if (NumElts == 4)
2427         Opc = AArch64::FRINTZv4f32;
2428       break;
2429     case 64:
2430       if (NumElts == 2)
2431         Opc = AArch64::FRINTZv2f64;
2432       break;
2433     }
2434   }
2435
2436   if (!Opc) {
2437     // Didn't get an opcode above, bail.
2438     LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_TRUNC!\n");
2439     return false;
2440   }
2441
2442   // Legalization would have set us up perfectly for this; we just need to
2443   // set the opcode and move on.
2444   I.setDesc(TII.get(Opc));
2445   return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2446 }
2447
2448 bool AArch64InstructionSelector::selectIntrinsicRound(
2449     MachineInstr &I, MachineRegisterInfo &MRI) const {
2450   const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
2451
2452   // Select the correct opcode.
2453   unsigned Opc = 0;
2454   if (!SrcTy.isVector()) {
2455     switch (SrcTy.getSizeInBits()) {
2456     default:
2457     case 16:
2458       Opc = AArch64::FRINTAHr;
2459       break;
2460     case 32:
2461       Opc = AArch64::FRINTASr;
2462       break;
2463     case 64:
2464       Opc = AArch64::FRINTADr;
2465       break;
2466     }
2467   } else {
2468     unsigned NumElts = SrcTy.getNumElements();
2469     switch (SrcTy.getElementType().getSizeInBits()) {
2470     default:
2471       break;
2472     case 16:
2473       if (NumElts == 4)
2474         Opc = AArch64::FRINTAv4f16;
2475       else if (NumElts == 8)
2476         Opc = AArch64::FRINTAv8f16;
2477       break;
2478     case 32:
2479       if (NumElts == 2)
2480         Opc = AArch64::FRINTAv2f32;
2481       else if (NumElts == 4)
2482         Opc = AArch64::FRINTAv4f32;
2483       break;
2484     case 64:
2485       if (NumElts == 2)
2486         Opc = AArch64::FRINTAv2f64;
2487       break;
2488     }
2489   }
2490
2491   if (!Opc) {
2492     // Didn't get an opcode above, bail.
2493     LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_ROUND!\n");
2494     return false;
2495   }
2496
2497   // Legalization would have set us up perfectly for this; we just need to
2498   // set the opcode and move on.
2499   I.setDesc(TII.get(Opc));
2500   return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2501 }
2502
2503 bool AArch64InstructionSelector::selectVectorICmp(
2504     MachineInstr &I, MachineRegisterInfo &MRI) const {
2505   Register DstReg = I.getOperand(0).getReg();
2506   LLT DstTy = MRI.getType(DstReg);
2507   Register SrcReg = I.getOperand(2).getReg();
2508   Register Src2Reg = I.getOperand(3).getReg();
2509   LLT SrcTy = MRI.getType(SrcReg);
2510
2511   unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
2512   unsigned NumElts = DstTy.getNumElements();
2513
2514   // First index is element size, 0 == 8b, 1 == 16b, 2 == 32b, 3 == 64b
2515   // Second index is num elts, 0 == v2, 1 == v4, 2 == v8, 3 == v16
2516   // Third index is cc opcode:
2517   // 0 == eq
2518   // 1 == ugt
2519   // 2 == uge
2520   // 3 == ult
2521   // 4 == ule
2522   // 5 == sgt
2523   // 6 == sge
2524   // 7 == slt
2525   // 8 == sle
2526   // ne is done by negating 'eq' result.
2527
2528   // This table below assumes that for some comparisons the operands will be
2529   // commuted.
2530   // ult op == commute + ugt op
2531   // ule op == commute + uge op
2532   // slt op == commute + sgt op
2533   // sle op == commute + sge op
2534   unsigned PredIdx = 0;
2535   bool SwapOperands = false;
2536   CmpInst::Predicate Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate();
2537   switch (Pred) {
2538   case CmpInst::ICMP_NE:
2539   case CmpInst::ICMP_EQ:
2540     PredIdx = 0;
2541     break;
2542   case CmpInst::ICMP_UGT:
2543     PredIdx = 1;
2544     break;
2545   case CmpInst::ICMP_UGE:
2546     PredIdx = 2;
2547     break;
2548   case CmpInst::ICMP_ULT:
2549     PredIdx = 3;
2550     SwapOperands = true;
2551     break;
2552   case CmpInst::ICMP_ULE:
2553     PredIdx = 4;
2554     SwapOperands = true;
2555     break;
2556   case CmpInst::ICMP_SGT:
2557     PredIdx = 5;
2558     break;
2559   case CmpInst::ICMP_SGE:
2560     PredIdx = 6;
2561     break;
2562   case CmpInst::ICMP_SLT:
2563     PredIdx = 7;
2564     SwapOperands = true;
2565     break;
2566   case CmpInst::ICMP_SLE:
2567     PredIdx = 8;
2568     SwapOperands = true;
2569     break;
2570   default:
2571     llvm_unreachable("Unhandled icmp predicate");
2572     return false;
2573   }
2574
2575   // This table obviously should be tablegen'd when we have our GISel native
2576   // tablegen selector.
2577
2578   static const unsigned OpcTable[4][4][9] = {
2579       {
2580           {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2581            0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2582            0 /* invalid */},
2583           {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2584            0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2585            0 /* invalid */},
2586           {AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8,
2587            AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8,
2588            AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8},
2589           {AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8,
2590            AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8,
2591            AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8}
2592       },
2593       {
2594           {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2595            0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2596            0 /* invalid */},
2597           {AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16,
2598            AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16,
2599            AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16},
2600           {AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16,
2601            AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16,
2602            AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16},
2603           {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2604            0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2605            0 /* invalid */}
2606       },
2607       {
2608           {AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32,
2609            AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32,
2610            AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32},
2611           {AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32,
2612            AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32,
2613            AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32},
2614           {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2615            0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2616            0 /* invalid */},
2617           {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2618            0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2619            0 /* invalid */}
2620       },
2621       {
2622           {AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64,
2623            AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64,
2624            AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64},
2625           {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2626            0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2627            0 /* invalid */},
2628           {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2629            0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2630            0 /* invalid */},
2631           {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2632            0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2633            0 /* invalid */}
2634       },
2635   };
2636   unsigned EltIdx = Log2_32(SrcEltSize / 8);
2637   unsigned NumEltsIdx = Log2_32(NumElts / 2);
2638   unsigned Opc = OpcTable[EltIdx][NumEltsIdx][PredIdx];
2639   if (!Opc) {
2640     LLVM_DEBUG(dbgs() << "Could not map G_ICMP to cmp opcode");
2641     return false;
2642   }
2643
2644   const RegisterBank &VecRB = *RBI.getRegBank(SrcReg, MRI, TRI);
2645   const TargetRegisterClass *SrcRC =
2646       getRegClassForTypeOnBank(SrcTy, VecRB, RBI, true);
2647   if (!SrcRC) {
2648     LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
2649     return false;
2650   }
2651
2652   unsigned NotOpc = Pred == ICmpInst::ICMP_NE ? AArch64::NOTv8i8 : 0;
2653   if (SrcTy.getSizeInBits() == 128)
2654     NotOpc = NotOpc ? AArch64::NOTv16i8 : 0;
2655
2656   if (SwapOperands)
2657     std::swap(SrcReg, Src2Reg);
2658
2659   MachineIRBuilder MIB(I);
2660   auto Cmp = MIB.buildInstr(Opc, {SrcRC}, {SrcReg, Src2Reg});
2661   constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
2662
2663   // Invert if we had a 'ne' cc.
2664   if (NotOpc) {
2665     Cmp = MIB.buildInstr(NotOpc, {DstReg}, {Cmp});
2666     constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
2667   } else {
2668     MIB.buildCopy(DstReg, Cmp.getReg(0));
2669   }
2670   RBI.constrainGenericRegister(DstReg, *SrcRC, MRI);
2671   I.eraseFromParent();
2672   return true;
2673 }
2674
2675 MachineInstr *AArch64InstructionSelector::emitScalarToVector(
2676     unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar,
2677     MachineIRBuilder &MIRBuilder) const {
2678   auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
2679
2680   auto BuildFn = [&](unsigned SubregIndex) {
2681     auto Ins =
2682         MIRBuilder
2683             .buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar})
2684             .addImm(SubregIndex);
2685     constrainSelectedInstRegOperands(*Undef, TII, TRI, RBI);
2686     constrainSelectedInstRegOperands(*Ins, TII, TRI, RBI);
2687     return &*Ins;
2688   };
2689
2690   switch (EltSize) {
2691   case 16:
2692     return BuildFn(AArch64::hsub);
2693   case 32:
2694     return BuildFn(AArch64::ssub);
2695   case 64:
2696     return BuildFn(AArch64::dsub);
2697   default:
2698     return nullptr;
2699   }
2700 }
2701
2702 bool AArch64InstructionSelector::selectMergeValues(
2703     MachineInstr &I, MachineRegisterInfo &MRI) const {
2704   assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode");
2705   const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2706   const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
2707   assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation");
2708   const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
2709
2710   if (I.getNumOperands() != 3)
2711     return false;
2712
2713   // Merging 2 s64s into an s128.
2714   if (DstTy == LLT::scalar(128)) {
2715     if (SrcTy.getSizeInBits() != 64)
2716       return false;
2717     MachineIRBuilder MIB(I);
2718     Register DstReg = I.getOperand(0).getReg();
2719     Register Src1Reg = I.getOperand(1).getReg();
2720     Register Src2Reg = I.getOperand(2).getReg();
2721     auto Tmp = MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {});
2722     MachineInstr *InsMI =
2723         emitLaneInsert(None, Tmp.getReg(0), Src1Reg, /* LaneIdx */ 0, RB, MIB);
2724     if (!InsMI)
2725       return false;
2726     MachineInstr *Ins2MI = emitLaneInsert(DstReg, InsMI->getOperand(0).getReg(),
2727                                           Src2Reg, /* LaneIdx */ 1, RB, MIB);
2728     if (!Ins2MI)
2729       return false;
2730     constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
2731     constrainSelectedInstRegOperands(*Ins2MI, TII, TRI, RBI);
2732     I.eraseFromParent();
2733     return true;
2734   }
2735
2736   if (RB.getID() != AArch64::GPRRegBankID)
2737     return false;
2738
2739   if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32)
2740     return false;
2741
2742   auto *DstRC = &AArch64::GPR64RegClass;
2743   Register SubToRegDef = MRI.createVirtualRegister(DstRC);
2744   MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
2745                                     TII.get(TargetOpcode::SUBREG_TO_REG))
2746                                 .addDef(SubToRegDef)
2747                                 .addImm(0)
2748                                 .addUse(I.getOperand(1).getReg())
2749                                 .addImm(AArch64::sub_32);
2750   Register SubToRegDef2 = MRI.createVirtualRegister(DstRC);
2751   // Need to anyext the second scalar before we can use bfm
2752   MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
2753                                     TII.get(TargetOpcode::SUBREG_TO_REG))
2754                                 .addDef(SubToRegDef2)
2755                                 .addImm(0)
2756                                 .addUse(I.getOperand(2).getReg())
2757                                 .addImm(AArch64::sub_32);
2758   MachineInstr &BFM =
2759       *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri))
2760            .addDef(I.getOperand(0).getReg())
2761            .addUse(SubToRegDef)
2762            .addUse(SubToRegDef2)
2763            .addImm(32)
2764            .addImm(31);
2765   constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI);
2766   constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI);
2767   constrainSelectedInstRegOperands(BFM, TII, TRI, RBI);
2768   I.eraseFromParent();
2769   return true;
2770 }
2771
2772 static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
2773                               const unsigned EltSize) {
2774   // Choose a lane copy opcode and subregister based off of the size of the
2775   // vector's elements.
2776   switch (EltSize) {
2777   case 16:
2778     CopyOpc = AArch64::CPYi16;
2779     ExtractSubReg = AArch64::hsub;
2780     break;
2781   case 32:
2782     CopyOpc = AArch64::CPYi32;
2783     ExtractSubReg = AArch64::ssub;
2784     break;
2785   case 64:
2786     CopyOpc = AArch64::CPYi64;
2787     ExtractSubReg = AArch64::dsub;
2788     break;
2789   default:
2790     // Unknown size, bail out.
2791     LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n");
2792     return false;
2793   }
2794   return true;
2795 }
2796
2797 MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
2798     Optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy,
2799     Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const {
2800   MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
2801   unsigned CopyOpc = 0;
2802   unsigned ExtractSubReg = 0;
2803   if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) {
2804     LLVM_DEBUG(
2805         dbgs() << "Couldn't determine lane copy opcode for instruction.\n");
2806     return nullptr;
2807   }
2808
2809   const TargetRegisterClass *DstRC =
2810       getRegClassForTypeOnBank(ScalarTy, DstRB, RBI, true);
2811   if (!DstRC) {
2812     LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n");
2813     return nullptr;
2814   }
2815
2816   const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI);
2817   const LLT &VecTy = MRI.getType(VecReg);
2818   const TargetRegisterClass *VecRC =
2819       getRegClassForTypeOnBank(VecTy, VecRB, RBI, true);
2820   if (!VecRC) {
2821     LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
2822     return nullptr;
2823   }
2824
2825   // The register that we're going to copy into.
2826   Register InsertReg = VecReg;
2827   if (!DstReg)
2828     DstReg = MRI.createVirtualRegister(DstRC);
2829   // If the lane index is 0, we just use a subregister COPY.
2830   if (LaneIdx == 0) {
2831     auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {})
2832                     .addReg(VecReg, 0, ExtractSubReg);
2833     RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
2834     return &*Copy;
2835   }
2836
2837   // Lane copies require 128-bit wide registers. If we're dealing with an
2838   // unpacked vector, then we need to move up to that width. Insert an implicit
2839   // def and a subregister insert to get us there.
2840   if (VecTy.getSizeInBits() != 128) {
2841     MachineInstr *ScalarToVector = emitScalarToVector(
2842         VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
2843     if (!ScalarToVector)
2844       return nullptr;
2845     InsertReg = ScalarToVector->getOperand(0).getReg();
2846   }
2847
2848   MachineInstr *LaneCopyMI =
2849       MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
2850   constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI);
2851
2852   // Make sure that we actually constrain the initial copy.
2853   RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
2854   return LaneCopyMI;
2855 }
2856
2857 bool AArch64InstructionSelector::selectExtractElt(
2858     MachineInstr &I, MachineRegisterInfo &MRI) const {
2859   assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
2860          "unexpected opcode!");
2861   Register DstReg = I.getOperand(0).getReg();
2862   const LLT NarrowTy = MRI.getType(DstReg);
2863   const Register SrcReg = I.getOperand(1).getReg();
2864   const LLT WideTy = MRI.getType(SrcReg);
2865   (void)WideTy;
2866   assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&
2867          "source register size too small!");
2868   assert(NarrowTy.isScalar() && "cannot extract vector into vector!");
2869
2870   // Need the lane index to determine the correct copy opcode.
2871   MachineOperand &LaneIdxOp = I.getOperand(2);
2872   assert(LaneIdxOp.isReg() && "Lane index operand was not a register?");
2873
2874   if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
2875     LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n");
2876     return false;
2877   }
2878
2879   // Find the index to extract from.
2880   auto VRegAndVal = getConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI);
2881   if (!VRegAndVal)
2882     return false;
2883   unsigned LaneIdx = VRegAndVal->Value;
2884
2885   MachineIRBuilder MIRBuilder(I);
2886
2887   const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2888   MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
2889                                                LaneIdx, MIRBuilder);
2890   if (!Extract)
2891     return false;
2892
2893   I.eraseFromParent();
2894   return true;
2895 }
2896
2897 bool AArch64InstructionSelector::selectSplitVectorUnmerge(
2898     MachineInstr &I, MachineRegisterInfo &MRI) const {
2899   unsigned NumElts = I.getNumOperands() - 1;
2900   Register SrcReg = I.getOperand(NumElts).getReg();
2901   const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
2902   const LLT SrcTy = MRI.getType(SrcReg);
2903
2904   assert(NarrowTy.isVector() && "Expected an unmerge into vectors");
2905   if (SrcTy.getSizeInBits() > 128) {
2906     LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge");
2907     return false;
2908   }
2909
2910   MachineIRBuilder MIB(I);
2911
2912   // We implement a split vector operation by treating the sub-vectors as
2913   // scalars and extracting them.
2914   const RegisterBank &DstRB =
2915       *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI);
2916   for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
2917     Register Dst = I.getOperand(OpIdx).getReg();
2918     MachineInstr *Extract =
2919         emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
2920     if (!Extract)
2921       return false;
2922   }
2923   I.eraseFromParent();
2924   return true;
2925 }
2926
2927 bool AArch64InstructionSelector::selectUnmergeValues(
2928     MachineInstr &I, MachineRegisterInfo &MRI) const {
2929   assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2930          "unexpected opcode");
2931
2932   // TODO: Handle unmerging into GPRs and from scalars to scalars.
2933   if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
2934           AArch64::FPRRegBankID ||
2935       RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
2936           AArch64::FPRRegBankID) {
2937     LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
2938                          "currently unsupported.\n");
2939     return false;
2940   }
2941
2942   // The last operand is the vector source register, and every other operand is
2943   // a register to unpack into.
2944   unsigned NumElts = I.getNumOperands() - 1;
2945   Register SrcReg = I.getOperand(NumElts).getReg();
2946   const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
2947   const LLT WideTy = MRI.getType(SrcReg);
2948   (void)WideTy;
2949   assert((WideTy.isVector() || WideTy.getSizeInBits() == 128) &&
2950          "can only unmerge from vector or s128 types!");
2951   assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&
2952          "source register size too small!");
2953
2954   if (!NarrowTy.isScalar())
2955     return selectSplitVectorUnmerge(I, MRI);
2956
2957   MachineIRBuilder MIB(I);
2958
2959   // Choose a lane copy opcode and subregister based off of the size of the
2960   // vector's elements.
2961   unsigned CopyOpc = 0;
2962   unsigned ExtractSubReg = 0;
2963   if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits()))
2964     return false;
2965
2966   // Set up for the lane copies.
2967   MachineBasicBlock &MBB = *I.getParent();
2968
2969   // Stores the registers we'll be copying from.
2970   SmallVector<Register, 4> InsertRegs;
2971
2972   // We'll use the first register twice, so we only need NumElts-1 registers.
2973   unsigned NumInsertRegs = NumElts - 1;
2974
2975   // If our elements fit into exactly 128 bits, then we can copy from the source
2976   // directly. Otherwise, we need to do a bit of setup with some subregister
2977   // inserts.
2978   if (NarrowTy.getSizeInBits() * NumElts == 128) {
2979     InsertRegs = SmallVector<Register, 4>(NumInsertRegs, SrcReg);
2980   } else {
2981     // No. We have to perform subregister inserts. For each insert, create an
2982     // implicit def and a subregister insert, and save the register we create.
2983     for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) {
2984       Register ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
2985       MachineInstr &ImpDefMI =
2986           *BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF),
2987                    ImpDefReg);
2988
2989       // Now, create the subregister insert from SrcReg.
2990       Register InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
2991       MachineInstr &InsMI =
2992           *BuildMI(MBB, I, I.getDebugLoc(),
2993                    TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
2994                .addUse(ImpDefReg)
2995                .addUse(SrcReg)
2996                .addImm(AArch64::dsub);
2997
2998       constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);
2999       constrainSelectedInstRegOperands(InsMI, TII, TRI, RBI);
3000
3001       // Save the register so that we can copy from it after.
3002       InsertRegs.push_back(InsertReg);
3003     }
3004   }
3005
3006   // Now that we've created any necessary subregister inserts, we can
3007   // create the copies.
3008   //
3009   // Perform the first copy separately as a subregister copy.
3010   Register CopyTo = I.getOperand(0).getReg();
3011   auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {})
3012                        .addReg(InsertRegs[0], 0, ExtractSubReg);
3013   constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI);
3014
3015   // Now, perform the remaining copies as vector lane copies.
3016   unsigned LaneIdx = 1;
3017   for (Register InsReg : InsertRegs) {
3018     Register CopyTo = I.getOperand(LaneIdx).getReg();
3019     MachineInstr &CopyInst =
3020         *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo)
3021              .addUse(InsReg)
3022              .addImm(LaneIdx);
3023     constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI);
3024     ++LaneIdx;
3025   }
3026
3027   // Separately constrain the first copy's destination. Because of the
3028   // limitation in constrainOperandRegClass, we can't guarantee that this will
3029   // actually be constrained. So, do it ourselves using the second operand.
3030   const TargetRegisterClass *RC =
3031       MRI.getRegClassOrNull(I.getOperand(1).getReg());
3032   if (!RC) {
3033     LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n");
3034     return false;
3035   }
3036
3037   RBI.constrainGenericRegister(CopyTo, *RC, MRI);
3038   I.eraseFromParent();
3039   return true;
3040 }
3041
3042 bool AArch64InstructionSelector::selectConcatVectors(
3043     MachineInstr &I, MachineRegisterInfo &MRI) const {
3044   assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
3045          "Unexpected opcode");
3046   Register Dst = I.getOperand(0).getReg();
3047   Register Op1 = I.getOperand(1).getReg();
3048   Register Op2 = I.getOperand(2).getReg();
3049   MachineIRBuilder MIRBuilder(I);
3050   MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIRBuilder);
3051   if (!ConcatMI)
3052     return false;
3053   I.eraseFromParent();
3054   return true;
3055 }
3056
3057 unsigned
3058 AArch64InstructionSelector::emitConstantPoolEntry(Constant *CPVal,
3059                                                   MachineFunction &MF) const {
3060   Type *CPTy = CPVal->getType();
3061   unsigned Align = MF.getDataLayout().getPrefTypeAlignment(CPTy);
3062   if (Align == 0)
3063     Align = MF.getDataLayout().getTypeAllocSize(CPTy);
3064
3065   MachineConstantPool *MCP = MF.getConstantPool();
3066   return MCP->getConstantPoolIndex(CPVal, Align);
3067 }
3068
3069 MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
3070     Constant *CPVal, MachineIRBuilder &MIRBuilder) const {
3071   unsigned CPIdx = emitConstantPoolEntry(CPVal, MIRBuilder.getMF());
3072
3073   auto Adrp =
3074       MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
3075           .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
3076
3077   MachineInstr *LoadMI = nullptr;
3078   switch (MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType())) {
3079   case 16:
3080     LoadMI =
3081         &*MIRBuilder
3082               .buildInstr(AArch64::LDRQui, {&AArch64::FPR128RegClass}, {Adrp})
3083               .addConstantPoolIndex(CPIdx, 0,
3084                                     AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
3085     break;
3086   case 8:
3087     LoadMI = &*MIRBuilder
3088                  .buildInstr(AArch64::LDRDui, {&AArch64::FPR64RegClass}, {Adrp})
3089                  .addConstantPoolIndex(
3090                      CPIdx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
3091     break;
3092   default:
3093     LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "
3094                       << *CPVal->getType());
3095     return nullptr;
3096   }
3097   constrainSelectedInstRegOperands(*Adrp, TII, TRI, RBI);
3098   constrainSelectedInstRegOperands(*LoadMI, TII, TRI, RBI);
3099   return LoadMI;
3100 }
3101
3102 /// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given
3103 /// size and RB.
3104 static std::pair<unsigned, unsigned>
3105 getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
3106   unsigned Opc, SubregIdx;
3107   if (RB.getID() == AArch64::GPRRegBankID) {
3108     if (EltSize == 32) {
3109       Opc = AArch64::INSvi32gpr;
3110       SubregIdx = AArch64::ssub;
3111     } else if (EltSize == 64) {
3112       Opc = AArch64::INSvi64gpr;
3113       SubregIdx = AArch64::dsub;
3114     } else {
3115       llvm_unreachable("invalid elt size!");
3116     }
3117   } else {
3118     if (EltSize == 8) {
3119       Opc = AArch64::INSvi8lane;
3120       SubregIdx = AArch64::bsub;
3121     } else if (EltSize == 16) {
3122       Opc = AArch64::INSvi16lane;
3123       SubregIdx = AArch64::hsub;
3124     } else if (EltSize == 32) {
3125       Opc = AArch64::INSvi32lane;
3126       SubregIdx = AArch64::ssub;
3127     } else if (EltSize == 64) {
3128       Opc = AArch64::INSvi64lane;
3129       SubregIdx = AArch64::dsub;
3130     } else {
3131       llvm_unreachable("invalid elt size!");
3132     }
3133   }
3134   return std::make_pair(Opc, SubregIdx);
3135 }
3136
3137 MachineInstr *
3138 AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS,
3139                                     MachineOperand &RHS,
3140                                     MachineIRBuilder &MIRBuilder) const {
3141   assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
3142   MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
3143   static const unsigned OpcTable[2][2]{{AArch64::ADDXrr, AArch64::ADDXri},
3144                                        {AArch64::ADDWrr, AArch64::ADDWri}};
3145   bool Is32Bit = MRI.getType(LHS.getReg()).getSizeInBits() == 32;
3146   auto ImmFns = selectArithImmed(RHS);
3147   unsigned Opc = OpcTable[Is32Bit][ImmFns.hasValue()];
3148   auto AddMI = MIRBuilder.buildInstr(Opc, {DefReg}, {LHS.getReg()});
3149
3150   // If we matched a valid constant immediate, add those operands.
3151   if (ImmFns) {
3152     for (auto &RenderFn : *ImmFns)
3153       RenderFn(AddMI);
3154   } else {
3155     AddMI.addUse(RHS.getReg());
3156   }
3157
3158   constrainSelectedInstRegOperands(*AddMI, TII, TRI, RBI);
3159   return &*AddMI;
3160 }
3161
3162 MachineInstr *
3163 AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS,
3164                                     MachineIRBuilder &MIRBuilder) const {
3165   assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
3166   MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
3167   static const unsigned OpcTable[2][2]{{AArch64::ADDSXrr, AArch64::ADDSXri},
3168                                        {AArch64::ADDSWrr, AArch64::ADDSWri}};
3169   bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32);
3170   auto ImmFns = selectArithImmed(RHS);
3171   unsigned Opc = OpcTable[Is32Bit][ImmFns.hasValue()];
3172   Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
3173
3174   auto CmpMI = MIRBuilder.buildInstr(Opc, {ZReg}, {LHS.getReg()});
3175
3176   // If we matched a valid constant immediate, add those operands.
3177   if (ImmFns) {
3178     for (auto &RenderFn : *ImmFns)
3179       RenderFn(CmpMI);
3180   } else {
3181     CmpMI.addUse(RHS.getReg());
3182   }
3183
3184   constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
3185   return &*CmpMI;
3186 }
3187
3188 MachineInstr *
3189 AArch64InstructionSelector::emitTST(const Register &LHS, const Register &RHS,
3190                                     MachineIRBuilder &MIRBuilder) const {
3191   MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
3192   unsigned RegSize = MRI.getType(LHS).getSizeInBits();
3193   bool Is32Bit = (RegSize == 32);
3194   static const unsigned OpcTable[2][2]{{AArch64::ANDSXrr, AArch64::ANDSXri},
3195                                        {AArch64::ANDSWrr, AArch64::ANDSWri}};
3196   Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
3197
3198   // We might be able to fold in an immediate into the TST. We need to make sure
3199   // it's a logical immediate though, since ANDS requires that.
3200   auto ValAndVReg = getConstantVRegValWithLookThrough(RHS, MRI);
3201   bool IsImmForm = ValAndVReg.hasValue() &&
3202                    AArch64_AM::isLogicalImmediate(ValAndVReg->Value, RegSize);
3203   unsigned Opc = OpcTable[Is32Bit][IsImmForm];
3204   auto TstMI = MIRBuilder.buildInstr(Opc, {ZReg}, {LHS});
3205
3206   if (IsImmForm)
3207     TstMI.addImm(
3208         AArch64_AM::encodeLogicalImmediate(ValAndVReg->Value, RegSize));
3209   else
3210     TstMI.addUse(RHS);
3211
3212   constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
3213   return &*TstMI;
3214 }
3215
3216 MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
3217     MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
3218     MachineIRBuilder &MIRBuilder) const {
3219   assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
3220   MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
3221
3222   // Fold the compare if possible.
3223   MachineInstr *FoldCmp =
3224       tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder);
3225   if (FoldCmp)
3226     return FoldCmp;
3227
3228   // Can't fold into a CMN. Just emit a normal compare.
3229   unsigned CmpOpc = 0;
3230   Register ZReg;
3231
3232   LLT CmpTy = MRI.getType(LHS.getReg());
3233   assert((CmpTy.isScalar() || CmpTy.isPointer()) &&
3234          "Expected scalar or pointer");
3235   if (CmpTy == LLT::scalar(32)) {
3236     CmpOpc = AArch64::SUBSWrr;
3237     ZReg = AArch64::WZR;
3238   } else if (CmpTy == LLT::scalar(64) || CmpTy.isPointer()) {
3239     CmpOpc = AArch64::SUBSXrr;
3240     ZReg = AArch64::XZR;
3241   } else {
3242     return nullptr;
3243   }
3244
3245   // Try to match immediate forms.
3246   auto ImmFns = selectArithImmed(RHS);
3247   if (ImmFns)
3248     CmpOpc = CmpOpc == AArch64::SUBSWrr ? AArch64::SUBSWri : AArch64::SUBSXri;
3249
3250   auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addDef(ZReg).addUse(LHS.getReg());
3251   // If we matched a valid constant immediate, add those operands.
3252   if (ImmFns) {
3253     for (auto &RenderFn : *ImmFns)
3254       RenderFn(CmpMI);
3255   } else {
3256     CmpMI.addUse(RHS.getReg());
3257   }
3258
3259   // Make sure that we can constrain the compare that we emitted.
3260   constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
3261   return &*CmpMI;
3262 }
3263
3264 MachineInstr *AArch64InstructionSelector::emitVectorConcat(
3265     Optional<Register> Dst, Register Op1, Register Op2,
3266     MachineIRBuilder &MIRBuilder) const {
3267   // We implement a vector concat by:
3268   // 1. Use scalar_to_vector to insert the lower vector into the larger dest
3269   // 2. Insert the upper vector into the destination's upper element
3270   // TODO: some of this code is common with G_BUILD_VECTOR handling.
3271   MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
3272
3273   const LLT Op1Ty = MRI.getType(Op1);
3274   const LLT Op2Ty = MRI.getType(Op2);
3275
3276   if (Op1Ty != Op2Ty) {
3277     LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys");
3278     return nullptr;
3279   }
3280   assert(Op1Ty.isVector() && "Expected a vector for vector concat");
3281
3282   if (Op1Ty.getSizeInBits() >= 128) {
3283     LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors");
3284     return nullptr;
3285   }
3286
3287   // At the moment we just support 64 bit vector concats.
3288   if (Op1Ty.getSizeInBits() != 64) {
3289     LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors");
3290     return nullptr;
3291   }
3292
3293   const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits());
3294   const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI);
3295   const TargetRegisterClass *DstRC =
3296       getMinClassForRegBank(FPRBank, Op1Ty.getSizeInBits() * 2);
3297
3298   MachineInstr *WidenedOp1 =
3299       emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder);
3300   MachineInstr *WidenedOp2 =
3301       emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder);
3302   if (!WidenedOp1 || !WidenedOp2) {
3303     LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value");
3304     return nullptr;
3305   }
3306
3307   // Now do the insert of the upper element.
3308   unsigned InsertOpc, InsSubRegIdx;
3309   std::tie(InsertOpc, InsSubRegIdx) =
3310       getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits());
3311
3312   if (!Dst)
3313     Dst = MRI.createVirtualRegister(DstRC);
3314   auto InsElt =
3315       MIRBuilder
3316           .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()})
3317           .addImm(1) /* Lane index */
3318           .addUse(WidenedOp2->getOperand(0).getReg())
3319           .addImm(0);
3320   constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
3321   return &*InsElt;
3322 }
3323
3324 MachineInstr *AArch64InstructionSelector::emitFMovForFConstant(
3325     MachineInstr &I, MachineRegisterInfo &MRI) const {
3326   assert(I.getOpcode() == TargetOpcode::G_FCONSTANT &&
3327          "Expected a G_FCONSTANT!");
3328   MachineOperand &ImmOp = I.getOperand(1);
3329   unsigned DefSize = MRI.getType(I.getOperand(0).getReg()).getSizeInBits();
3330
3331   // Only handle 32 and 64 bit defs for now.
3332   if (DefSize != 32 && DefSize != 64)
3333     return nullptr;
3334
3335   // Don't handle null values using FMOV.
3336   if (ImmOp.getFPImm()->isNullValue())
3337     return nullptr;
3338
3339   // Get the immediate representation for the FMOV.
3340   const APFloat &ImmValAPF = ImmOp.getFPImm()->getValueAPF();
3341   int Imm = DefSize == 32 ? AArch64_AM::getFP32Imm(ImmValAPF)
3342                           : AArch64_AM::getFP64Imm(ImmValAPF);
3343
3344   // If this is -1, it means the immediate can't be represented as the requested
3345   // floating point value. Bail.
3346   if (Imm == -1)
3347     return nullptr;
3348
3349   // Update MI to represent the new FMOV instruction, constrain it, and return.
3350   ImmOp.ChangeToImmediate(Imm);
3351   unsigned MovOpc = DefSize == 32 ? AArch64::FMOVSi : AArch64::FMOVDi;
3352   I.setDesc(TII.get(MovOpc));
3353   constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3354   return &I;
3355 }
3356
3357 MachineInstr *
3358 AArch64InstructionSelector::emitCSetForICMP(Register DefReg, unsigned Pred,
3359                                      MachineIRBuilder &MIRBuilder) const {
3360   // CSINC increments the result when the predicate is false. Invert it.
3361   const AArch64CC::CondCode InvCC = changeICMPPredToAArch64CC(
3362       CmpInst::getInversePredicate((CmpInst::Predicate)Pred));
3363   auto I =
3364       MIRBuilder
3365     .buildInstr(AArch64::CSINCWr, {DefReg}, {Register(AArch64::WZR), Register(AArch64::WZR)})
3366           .addImm(InvCC);
3367   constrainSelectedInstRegOperands(*I, TII, TRI, RBI);
3368   return &*I;
3369 }
3370
3371 bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) const {
3372   MachineIRBuilder MIB(I);
3373   MachineRegisterInfo &MRI = *MIB.getMRI();
3374   const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
3375
3376   // We want to recognize this pattern:
3377   //
3378   // $z = G_FCMP pred, $x, $y
3379   // ...
3380   // $w = G_SELECT $z, $a, $b
3381   //
3382   // Where the value of $z is *only* ever used by the G_SELECT (possibly with
3383   // some copies/truncs in between.)
3384   //
3385   // If we see this, then we can emit something like this:
3386   //
3387   // fcmp $x, $y
3388   // fcsel $w, $a, $b, pred
3389   //
3390   // Rather than emitting both of the rather long sequences in the standard
3391   // G_FCMP/G_SELECT select methods.
3392
3393   // First, check if the condition is defined by a compare.
3394   MachineInstr *CondDef = MRI.getVRegDef(I.getOperand(1).getReg());
3395   while (CondDef) {
3396     // We can only fold if all of the defs have one use.
3397     if (!MRI.hasOneUse(CondDef->getOperand(0).getReg()))
3398       return false;
3399
3400     // We can skip over G_TRUNC since the condition is 1-bit.
3401     // Truncating/extending can have no impact on the value.
3402     unsigned Opc = CondDef->getOpcode();
3403     if (Opc != TargetOpcode::COPY && Opc != TargetOpcode::G_TRUNC)
3404       break;
3405
3406     // Can't see past copies from physregs.
3407     if (Opc == TargetOpcode::COPY &&
3408         Register::isPhysicalRegister(CondDef->getOperand(1).getReg()))
3409       return false;
3410
3411     CondDef = MRI.getVRegDef(CondDef->getOperand(1).getReg());
3412   }
3413
3414   // Is the condition defined by a compare?
3415   if (!CondDef)
3416     return false;
3417
3418   unsigned CondOpc = CondDef->getOpcode();
3419   if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP)
3420     return false;
3421
3422   AArch64CC::CondCode CondCode;
3423   if (CondOpc == TargetOpcode::G_ICMP) {
3424     CondCode = changeICMPPredToAArch64CC(
3425         (CmpInst::Predicate)CondDef->getOperand(1).getPredicate());
3426     if (!emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3),
3427                             CondDef->getOperand(1), MIB)) {
3428       LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n");
3429       return false;
3430     }
3431   } else {
3432     // Get the condition code for the select.
3433     AArch64CC::CondCode CondCode2;
3434     changeFCMPPredToAArch64CC(
3435         (CmpInst::Predicate)CondDef->getOperand(1).getPredicate(), CondCode,
3436         CondCode2);
3437
3438     // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two
3439     // instructions to emit the comparison.
3440     // TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be
3441     // unnecessary.
3442     if (CondCode2 != AArch64CC::AL)
3443       return false;
3444
3445     // Make sure we'll be able to select the compare.
3446     unsigned CmpOpc = selectFCMPOpc(*CondDef, MRI);
3447     if (!CmpOpc)
3448       return false;
3449
3450     // Emit a new compare.
3451     auto Cmp = MIB.buildInstr(CmpOpc, {}, {CondDef->getOperand(2).getReg()});
3452     if (CmpOpc != AArch64::FCMPSri && CmpOpc != AArch64::FCMPDri)
3453       Cmp.addUse(CondDef->getOperand(3).getReg());
3454     constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
3455   }
3456
3457   // Emit the select.
3458   unsigned CSelOpc = selectSelectOpc(I, MRI, RBI);
3459   auto CSel =
3460       MIB.buildInstr(CSelOpc, {I.getOperand(0).getReg()},
3461                      {I.getOperand(2).getReg(), I.getOperand(3).getReg()})
3462           .addImm(CondCode);
3463   constrainSelectedInstRegOperands(*CSel, TII, TRI, RBI);
3464   I.eraseFromParent();
3465   return true;
3466 }
3467
3468 MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
3469     MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
3470     MachineIRBuilder &MIRBuilder) const {
3471   assert(LHS.isReg() && RHS.isReg() && Predicate.isPredicate() &&
3472          "Unexpected MachineOperand");
3473   MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3474   // We want to find this sort of thing:
3475   // x = G_SUB 0, y
3476   // G_ICMP z, x
3477   //
3478   // In this case, we can fold the G_SUB into the G_ICMP using a CMN instead.
3479   // e.g:
3480   //
3481   // cmn z, y
3482
3483   // Helper lambda to detect the subtract followed by the compare.
3484   // Takes in the def of the LHS or RHS, and checks if it's a subtract from 0.
3485   auto IsCMN = [&](MachineInstr *DefMI, const AArch64CC::CondCode &CC) {
3486     if (!DefMI || DefMI->getOpcode() != TargetOpcode::G_SUB)
3487       return false;
3488
3489     // Need to make sure NZCV is the same at the end of the transformation.
3490     if (CC != AArch64CC::EQ && CC != AArch64CC::NE)
3491       return false;
3492
3493     // We want to match against SUBs.
3494     if (DefMI->getOpcode() != TargetOpcode::G_SUB)
3495       return false;
3496
3497     // Make sure that we're getting
3498     // x = G_SUB 0, y
3499     auto ValAndVReg =
3500         getConstantVRegValWithLookThrough(DefMI->getOperand(1).getReg(), MRI);
3501     if (!ValAndVReg || ValAndVReg->Value != 0)
3502       return false;
3503
3504     // This can safely be represented as a CMN.
3505     return true;
3506   };
3507
3508   // Check if the RHS or LHS of the G_ICMP is defined by a SUB
3509   MachineInstr *LHSDef = getDefIgnoringCopies(LHS.getReg(), MRI);
3510   MachineInstr *RHSDef = getDefIgnoringCopies(RHS.getReg(), MRI);
3511   CmpInst::Predicate P = (CmpInst::Predicate)Predicate.getPredicate();
3512   const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(P);
3513
3514   // Given this:
3515   //
3516   // x = G_SUB 0, y
3517   // G_ICMP x, z
3518   //
3519   // Produce this:
3520   //
3521   // cmn y, z
3522   if (IsCMN(LHSDef, CC))
3523     return emitCMN(LHSDef->getOperand(2), RHS, MIRBuilder);
3524
3525   // Same idea here, but with the RHS of the compare instead:
3526   //
3527   // Given this:
3528   //
3529   // x = G_SUB 0, y
3530   // G_ICMP z, x
3531   //
3532   // Produce this:
3533   //
3534   // cmn z, y
3535   if (IsCMN(RHSDef, CC))
3536     return emitCMN(LHS, RHSDef->getOperand(2), MIRBuilder);
3537
3538   // Given this:
3539   //
3540   // z = G_AND x, y
3541   // G_ICMP z, 0
3542   //
3543   // Produce this if the compare is signed:
3544   //
3545   // tst x, y
3546   if (!isUnsignedICMPPred(P) && LHSDef &&
3547       LHSDef->getOpcode() == TargetOpcode::G_AND) {
3548     // Make sure that the RHS is 0.
3549     auto ValAndVReg = getConstantVRegValWithLookThrough(RHS.getReg(), MRI);
3550     if (!ValAndVReg || ValAndVReg->Value != 0)
3551       return nullptr;
3552
3553     return emitTST(LHSDef->getOperand(1).getReg(),
3554                    LHSDef->getOperand(2).getReg(), MIRBuilder);
3555   }
3556
3557   return nullptr;
3558 }
3559
3560 bool AArch64InstructionSelector::tryOptVectorDup(MachineInstr &I) const {
3561   // Try to match a vector splat operation into a dup instruction.
3562   // We're looking for this pattern:
3563   //    %scalar:gpr(s64) = COPY $x0
3564   //    %undef:fpr(<2 x s64>) = G_IMPLICIT_DEF
3565   //    %cst0:gpr(s32) = G_CONSTANT i32 0
3566   //    %zerovec:fpr(<2 x s32>) = G_BUILD_VECTOR %cst0(s32), %cst0(s32)
3567   //    %ins:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT %undef, %scalar(s64), %cst0(s32)
3568   //    %splat:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %ins(<2 x s64>), %undef,
3569   //                                             %zerovec(<2 x s32>)
3570   //
3571   // ...into:
3572   // %splat = DUP %scalar
3573   // We use the regbank of the scalar to determine which kind of dup to use.
3574   MachineIRBuilder MIB(I);
3575   MachineRegisterInfo &MRI = *MIB.getMRI();
3576   const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
3577   using namespace TargetOpcode;
3578   using namespace MIPatternMatch;
3579
3580   // Begin matching the insert.
3581   auto *InsMI =
3582       getOpcodeDef(G_INSERT_VECTOR_ELT, I.getOperand(1).getReg(), MRI);
3583   if (!InsMI)
3584     return false;
3585   // Match the undef vector operand.
3586   auto *UndefMI =
3587       getOpcodeDef(G_IMPLICIT_DEF, InsMI->getOperand(1).getReg(), MRI);
3588   if (!UndefMI)
3589     return false;
3590   // Match the scalar being splatted.
3591   Register ScalarReg = InsMI->getOperand(2).getReg();
3592   const RegisterBank *ScalarRB = RBI.getRegBank(ScalarReg, MRI, TRI);
3593   // Match the index constant 0.
3594   int64_t Index = 0;
3595   if (!mi_match(InsMI->getOperand(3).getReg(), MRI, m_ICst(Index)) || Index)
3596     return false;
3597
3598   // The shuffle's second operand doesn't matter if the mask is all zero.
3599   const Constant *Mask = I.getOperand(3).getShuffleMask();
3600   if (!isa<ConstantAggregateZero>(Mask))
3601     return false;
3602
3603   // We're done, now find out what kind of splat we need.
3604   LLT VecTy = MRI.getType(I.getOperand(0).getReg());
3605   LLT EltTy = VecTy.getElementType();
3606   if (VecTy.getSizeInBits() != 128 || EltTy.getSizeInBits() < 32) {
3607     LLVM_DEBUG(dbgs() << "Could not optimize splat pattern < 128b yet");
3608     return false;
3609   }
3610   bool IsFP = ScalarRB->getID() == AArch64::FPRRegBankID;
3611   static const unsigned OpcTable[2][2] = {
3612       {AArch64::DUPv4i32gpr, AArch64::DUPv2i64gpr},
3613       {AArch64::DUPv4i32lane, AArch64::DUPv2i64lane}};
3614   unsigned Opc = OpcTable[IsFP][EltTy.getSizeInBits() == 64];
3615
3616   // For FP splats, we need to widen the scalar reg via undef too.
3617   if (IsFP) {
3618     MachineInstr *Widen = emitScalarToVector(
3619         EltTy.getSizeInBits(), &AArch64::FPR128RegClass, ScalarReg, MIB);
3620     if (!Widen)
3621       return false;
3622     ScalarReg = Widen->getOperand(0).getReg();
3623   }
3624   auto Dup = MIB.buildInstr(Opc, {I.getOperand(0).getReg()}, {ScalarReg});
3625   if (IsFP)
3626     Dup.addImm(0);
3627   constrainSelectedInstRegOperands(*Dup, TII, TRI, RBI);
3628   I.eraseFromParent();
3629   return true;
3630 }
3631
3632 bool AArch64InstructionSelector::tryOptVectorShuffle(MachineInstr &I) const {
3633   if (TM.getOptLevel() == CodeGenOpt::None)
3634     return false;
3635   if (tryOptVectorDup(I))
3636     return true;
3637   return false;
3638 }
3639
3640 bool AArch64InstructionSelector::selectShuffleVector(
3641     MachineInstr &I, MachineRegisterInfo &MRI) const {
3642   if (tryOptVectorShuffle(I))
3643     return true;
3644   const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3645   Register Src1Reg = I.getOperand(1).getReg();
3646   const LLT Src1Ty = MRI.getType(Src1Reg);
3647   Register Src2Reg = I.getOperand(2).getReg();
3648   const LLT Src2Ty = MRI.getType(Src2Reg);
3649   const Constant *ShuffleMask = I.getOperand(3).getShuffleMask();
3650
3651   MachineBasicBlock &MBB = *I.getParent();
3652   MachineFunction &MF = *MBB.getParent();
3653   LLVMContext &Ctx = MF.getFunction().getContext();
3654
3655   SmallVector<int, 8> Mask;
3656   ShuffleVectorInst::getShuffleMask(ShuffleMask, Mask);
3657
3658   // G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if
3659   // it's originated from a <1 x T> type. Those should have been lowered into
3660   // G_BUILD_VECTOR earlier.
3661   if (!Src1Ty.isVector() || !Src2Ty.isVector()) {
3662     LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n");
3663     return false;
3664   }
3665
3666   unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8;
3667
3668   SmallVector<Constant *, 64> CstIdxs;
3669   for (int Val : Mask) {
3670     // For now, any undef indexes we'll just assume to be 0. This should be
3671     // optimized in future, e.g. to select DUP etc.
3672     Val = Val < 0 ? 0 : Val;
3673     for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
3674       unsigned Offset = Byte + Val * BytesPerElt;
3675       CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset));
3676     }
3677   }
3678
3679   MachineIRBuilder MIRBuilder(I);
3680
3681   // Use a constant pool to load the index vector for TBL.
3682   Constant *CPVal = ConstantVector::get(CstIdxs);
3683   MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIRBuilder);
3684   if (!IndexLoad) {
3685     LLVM_DEBUG(dbgs() << "Could not load from a constant pool");
3686     return false;
3687   }
3688
3689   if (DstTy.getSizeInBits() != 128) {
3690     assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty");
3691     // This case can be done with TBL1.
3692     MachineInstr *Concat = emitVectorConcat(None, Src1Reg, Src2Reg, MIRBuilder);
3693     if (!Concat) {
3694       LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1");
3695       return false;
3696     }
3697
3698     // The constant pool load will be 64 bits, so need to convert to FPR128 reg.
3699     IndexLoad =
3700         emitScalarToVector(64, &AArch64::FPR128RegClass,
3701                            IndexLoad->getOperand(0).getReg(), MIRBuilder);
3702
3703     auto TBL1 = MIRBuilder.buildInstr(
3704         AArch64::TBLv16i8One, {&AArch64::FPR128RegClass},
3705         {Concat->getOperand(0).getReg(), IndexLoad->getOperand(0).getReg()});
3706     constrainSelectedInstRegOperands(*TBL1, TII, TRI, RBI);
3707
3708     auto Copy =
3709         MIRBuilder
3710             .buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
3711             .addReg(TBL1.getReg(0), 0, AArch64::dsub);
3712     RBI.constrainGenericRegister(Copy.getReg(0), AArch64::FPR64RegClass, MRI);
3713     I.eraseFromParent();
3714     return true;
3715   }
3716
3717   // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive
3718   // Q registers for regalloc.
3719   auto RegSeq = MIRBuilder
3720                     .buildInstr(TargetOpcode::REG_SEQUENCE,
3721                                 {&AArch64::QQRegClass}, {Src1Reg})
3722                     .addImm(AArch64::qsub0)
3723                     .addUse(Src2Reg)
3724                     .addImm(AArch64::qsub1);
3725
3726   auto TBL2 =
3727       MIRBuilder.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0).getReg()},
3728                             {RegSeq, IndexLoad->getOperand(0).getReg()});
3729   constrainSelectedInstRegOperands(*RegSeq, TII, TRI, RBI);
3730   constrainSelectedInstRegOperands(*TBL2, TII, TRI, RBI);
3731   I.eraseFromParent();
3732   return true;
3733 }
3734
3735 MachineInstr *AArch64InstructionSelector::emitLaneInsert(
3736     Optional<Register> DstReg, Register SrcReg, Register EltReg,
3737     unsigned LaneIdx, const RegisterBank &RB,
3738     MachineIRBuilder &MIRBuilder) const {
3739   MachineInstr *InsElt = nullptr;
3740   const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
3741   MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3742
3743   // Create a register to define with the insert if one wasn't passed in.
3744   if (!DstReg)
3745     DstReg = MRI.createVirtualRegister(DstRC);
3746
3747   unsigned EltSize = MRI.getType(EltReg).getSizeInBits();
3748   unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first;
3749
3750   if (RB.getID() == AArch64::FPRRegBankID) {
3751     auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder);
3752     InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
3753                  .addImm(LaneIdx)
3754                  .addUse(InsSub->getOperand(0).getReg())
3755                  .addImm(0);
3756   } else {
3757     InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
3758                  .addImm(LaneIdx)
3759                  .addUse(EltReg);
3760   }
3761
3762   constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
3763   return InsElt;
3764 }
3765
3766 bool AArch64InstructionSelector::selectInsertElt(
3767     MachineInstr &I, MachineRegisterInfo &MRI) const {
3768   assert(I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT);
3769
3770   // Get information on the destination.
3771   Register DstReg = I.getOperand(0).getReg();
3772   const LLT DstTy = MRI.getType(DstReg);
3773   unsigned VecSize = DstTy.getSizeInBits();
3774
3775   // Get information on the element we want to insert into the destination.
3776   Register EltReg = I.getOperand(2).getReg();
3777   const LLT EltTy = MRI.getType(EltReg);
3778   unsigned EltSize = EltTy.getSizeInBits();
3779   if (EltSize < 16 || EltSize > 64)
3780     return false; // Don't support all element types yet.
3781
3782   // Find the definition of the index. Bail out if it's not defined by a
3783   // G_CONSTANT.
3784   Register IdxReg = I.getOperand(3).getReg();
3785   auto VRegAndVal = getConstantVRegValWithLookThrough(IdxReg, MRI);
3786   if (!VRegAndVal)
3787     return false;
3788   unsigned LaneIdx = VRegAndVal->Value;
3789
3790   // Perform the lane insert.
3791   Register SrcReg = I.getOperand(1).getReg();
3792   const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
3793   MachineIRBuilder MIRBuilder(I);
3794
3795   if (VecSize < 128) {
3796     // If the vector we're inserting into is smaller than 128 bits, widen it
3797     // to 128 to do the insert.
3798     MachineInstr *ScalarToVec = emitScalarToVector(
3799         VecSize, &AArch64::FPR128RegClass, SrcReg, MIRBuilder);
3800     if (!ScalarToVec)
3801       return false;
3802     SrcReg = ScalarToVec->getOperand(0).getReg();
3803   }
3804
3805   // Create an insert into a new FPR128 register.
3806   // Note that if our vector is already 128 bits, we end up emitting an extra
3807   // register.
3808   MachineInstr *InsMI =
3809       emitLaneInsert(None, SrcReg, EltReg, LaneIdx, EltRB, MIRBuilder);
3810
3811   if (VecSize < 128) {
3812     // If we had to widen to perform the insert, then we have to demote back to
3813     // the original size to get the result we want.
3814     Register DemoteVec = InsMI->getOperand(0).getReg();
3815     const TargetRegisterClass *RC =
3816         getMinClassForRegBank(*RBI.getRegBank(DemoteVec, MRI, TRI), VecSize);
3817     if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
3818       LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
3819       return false;
3820     }
3821     unsigned SubReg = 0;
3822     if (!getSubRegForClass(RC, TRI, SubReg))
3823       return false;
3824     if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
3825       LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << VecSize
3826                         << "\n");
3827       return false;
3828     }
3829     MIRBuilder.buildInstr(TargetOpcode::COPY, {DstReg}, {})
3830         .addReg(DemoteVec, 0, SubReg);
3831     RBI.constrainGenericRegister(DstReg, *RC, MRI);
3832   } else {
3833     // No widening needed.
3834     InsMI->getOperand(0).setReg(DstReg);
3835     constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
3836   }
3837
3838   I.eraseFromParent();
3839   return true;
3840 }
3841
3842 bool AArch64InstructionSelector::selectBuildVector(
3843     MachineInstr &I, MachineRegisterInfo &MRI) const {
3844   assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
3845   // Until we port more of the optimized selections, for now just use a vector
3846   // insert sequence.
3847   const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3848   const LLT EltTy = MRI.getType(I.getOperand(1).getReg());
3849   unsigned EltSize = EltTy.getSizeInBits();
3850   if (EltSize < 16 || EltSize > 64)
3851     return false; // Don't support all element types yet.
3852   const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
3853   MachineIRBuilder MIRBuilder(I);
3854
3855   const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
3856   MachineInstr *ScalarToVec =
3857       emitScalarToVector(DstTy.getElementType().getSizeInBits(), DstRC,
3858                          I.getOperand(1).getReg(), MIRBuilder);
3859   if (!ScalarToVec)
3860     return false;
3861
3862   Register DstVec = ScalarToVec->getOperand(0).getReg();
3863   unsigned DstSize = DstTy.getSizeInBits();
3864
3865   // Keep track of the last MI we inserted. Later on, we might be able to save
3866   // a copy using it.
3867   MachineInstr *PrevMI = nullptr;
3868   for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) {
3869     // Note that if we don't do a subregister copy, we can end up making an
3870     // extra register.
3871     PrevMI = &*emitLaneInsert(None, DstVec, I.getOperand(i).getReg(), i - 1, RB,
3872                               MIRBuilder);
3873     DstVec = PrevMI->getOperand(0).getReg();
3874   }
3875
3876   // If DstTy's size in bits is less than 128, then emit a subregister copy
3877   // from DstVec to the last register we've defined.
3878   if (DstSize < 128) {
3879     // Force this to be FPR using the destination vector.
3880     const TargetRegisterClass *RC =
3881         getMinClassForRegBank(*RBI.getRegBank(DstVec, MRI, TRI), DstSize);
3882     if (!RC)
3883       return false;
3884     if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
3885       LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
3886       return false;
3887     }
3888
3889     unsigned SubReg = 0;
3890     if (!getSubRegForClass(RC, TRI, SubReg))
3891       return false;
3892     if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
3893       LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSize
3894                         << "\n");
3895       return false;
3896     }
3897
3898     Register Reg = MRI.createVirtualRegister(RC);
3899     Register DstReg = I.getOperand(0).getReg();
3900
3901     MIRBuilder.buildInstr(TargetOpcode::COPY, {DstReg}, {})
3902         .addReg(DstVec, 0, SubReg);
3903     MachineOperand &RegOp = I.getOperand(1);
3904     RegOp.setReg(Reg);
3905     RBI.constrainGenericRegister(DstReg, *RC, MRI);
3906   } else {
3907     // We don't need a subregister copy. Save a copy by re-using the
3908     // destination register on the final insert.
3909     assert(PrevMI && "PrevMI was null?");
3910     PrevMI->getOperand(0).setReg(I.getOperand(0).getReg());
3911     constrainSelectedInstRegOperands(*PrevMI, TII, TRI, RBI);
3912   }
3913
3914   I.eraseFromParent();
3915   return true;
3916 }
3917
3918 /// Helper function to find an intrinsic ID on an a MachineInstr. Returns the
3919 /// ID if it exists, and 0 otherwise.
3920 static unsigned findIntrinsicID(MachineInstr &I) {
3921   auto IntrinOp = find_if(I.operands(), [&](const MachineOperand &Op) {
3922     return Op.isIntrinsicID();
3923   });
3924   if (IntrinOp == I.operands_end())
3925     return 0;
3926   return IntrinOp->getIntrinsicID();
3927 }
3928
3929 bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
3930     MachineInstr &I, MachineRegisterInfo &MRI) const {
3931   // Find the intrinsic ID.
3932   unsigned IntrinID = findIntrinsicID(I);
3933   if (!IntrinID)
3934     return false;
3935   MachineIRBuilder MIRBuilder(I);
3936
3937   // Select the instruction.
3938   switch (IntrinID) {
3939   default:
3940     return false;
3941   case Intrinsic::trap:
3942     MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(1);
3943     break;
3944   case Intrinsic::debugtrap:
3945     if (!STI.isTargetWindows())
3946       return false;
3947     MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(0xF000);
3948     break;
3949   }
3950
3951   I.eraseFromParent();
3952   return true;
3953 }
3954
3955 bool AArch64InstructionSelector::selectIntrinsic(
3956     MachineInstr &I, MachineRegisterInfo &MRI) const {
3957   unsigned IntrinID = findIntrinsicID(I);
3958   if (!IntrinID)
3959     return false;
3960   MachineIRBuilder MIRBuilder(I);
3961
3962   switch (IntrinID) {
3963   default:
3964     break;
3965   case Intrinsic::aarch64_crypto_sha1h:
3966     Register DstReg = I.getOperand(0).getReg();
3967     Register SrcReg = I.getOperand(2).getReg();
3968
3969     // FIXME: Should this be an assert?
3970     if (MRI.getType(DstReg).getSizeInBits() != 32 ||
3971         MRI.getType(SrcReg).getSizeInBits() != 32)
3972       return false;
3973
3974     // The operation has to happen on FPRs. Set up some new FPR registers for
3975     // the source and destination if they are on GPRs.
3976     if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
3977       SrcReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
3978       MIRBuilder.buildCopy({SrcReg}, {I.getOperand(2)});
3979
3980       // Make sure the copy ends up getting constrained properly.
3981       RBI.constrainGenericRegister(I.getOperand(2).getReg(),
3982                                    AArch64::GPR32RegClass, MRI);
3983     }
3984
3985     if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID)
3986       DstReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
3987
3988     // Actually insert the instruction.
3989     auto SHA1Inst = MIRBuilder.buildInstr(AArch64::SHA1Hrr, {DstReg}, {SrcReg});
3990     constrainSelectedInstRegOperands(*SHA1Inst, TII, TRI, RBI);
3991
3992     // Did we create a new register for the destination?
3993     if (DstReg != I.getOperand(0).getReg()) {
3994       // Yep. Copy the result of the instruction back into the original
3995       // destination.
3996       MIRBuilder.buildCopy({I.getOperand(0)}, {DstReg});
3997       RBI.constrainGenericRegister(I.getOperand(0).getReg(),
3998                                    AArch64::GPR32RegClass, MRI);
3999     }
4000
4001     I.eraseFromParent();
4002     return true;
4003   }
4004   return false;
4005 }
4006
4007 static Optional<uint64_t> getImmedFromMO(const MachineOperand &Root) {
4008   auto &MI = *Root.getParent();
4009   auto &MBB = *MI.getParent();
4010   auto &MF = *MBB.getParent();
4011   auto &MRI = MF.getRegInfo();
4012   uint64_t Immed;
4013   if (Root.isImm())
4014     Immed = Root.getImm();
4015   else if (Root.isCImm())
4016     Immed = Root.getCImm()->getZExtValue();
4017   else if (Root.isReg()) {
4018     auto ValAndVReg =
4019         getConstantVRegValWithLookThrough(Root.getReg(), MRI, true);
4020     if (!ValAndVReg)
4021       return None;
4022     Immed = ValAndVReg->Value;
4023   } else
4024     return None;
4025   return Immed;
4026 }
4027
4028 InstructionSelector::ComplexRendererFns
4029 AArch64InstructionSelector::selectShiftA_32(const MachineOperand &Root) const {
4030   auto MaybeImmed = getImmedFromMO(Root);
4031   if (MaybeImmed == None || *MaybeImmed > 31)
4032     return None;
4033   uint64_t Enc = (32 - *MaybeImmed) & 0x1f;
4034   return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
4035 }
4036
4037 InstructionSelector::ComplexRendererFns
4038 AArch64InstructionSelector::selectShiftB_32(const MachineOperand &Root) const {
4039   auto MaybeImmed = getImmedFromMO(Root);
4040   if (MaybeImmed == None || *MaybeImmed > 31)
4041     return None;
4042   uint64_t Enc = 31 - *MaybeImmed;
4043   return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
4044 }
4045
4046 InstructionSelector::ComplexRendererFns
4047 AArch64InstructionSelector::selectShiftA_64(const MachineOperand &Root) const {
4048   auto MaybeImmed = getImmedFromMO(Root);
4049   if (MaybeImmed == None || *MaybeImmed > 63)
4050     return None;
4051   uint64_t Enc = (64 - *MaybeImmed) & 0x3f;
4052   return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
4053 }
4054
4055 InstructionSelector::ComplexRendererFns
4056 AArch64InstructionSelector::selectShiftB_64(const MachineOperand &Root) const {
4057   auto MaybeImmed = getImmedFromMO(Root);
4058   if (MaybeImmed == None || *MaybeImmed > 63)
4059     return None;
4060   uint64_t Enc = 63 - *MaybeImmed;
4061   return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
4062 }
4063
4064 /// Helper to select an immediate value that can be represented as a 12-bit
4065 /// value shifted left by either 0 or 12. If it is possible to do so, return
4066 /// the immediate and shift value. If not, return None.
4067 ///
4068 /// Used by selectArithImmed and selectNegArithImmed.
4069 InstructionSelector::ComplexRendererFns
4070 AArch64InstructionSelector::select12BitValueWithLeftShift(
4071     uint64_t Immed) const {
4072   unsigned ShiftAmt;
4073   if (Immed >> 12 == 0) {
4074     ShiftAmt = 0;
4075   } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
4076     ShiftAmt = 12;
4077     Immed = Immed >> 12;
4078   } else
4079     return None;
4080
4081   unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
4082   return {{
4083       [=](MachineInstrBuilder &MIB) { MIB.addImm(Immed); },
4084       [=](MachineInstrBuilder &MIB) { MIB.addImm(ShVal); },
4085   }};
4086 }
4087
4088 /// SelectArithImmed - Select an immediate value that can be represented as
4089 /// a 12-bit value shifted left by either 0 or 12.  If so, return true with
4090 /// Val set to the 12-bit value and Shift set to the shifter operand.
4091 InstructionSelector::ComplexRendererFns
4092 AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const {
4093   // This function is called from the addsub_shifted_imm ComplexPattern,
4094   // which lists [imm] as the list of opcode it's interested in, however
4095   // we still need to check whether the operand is actually an immediate
4096   // here because the ComplexPattern opcode list is only used in
4097   // root-level opcode matching.
4098   auto MaybeImmed = getImmedFromMO(Root);
4099   if (MaybeImmed == None)
4100     return None;
4101   return select12BitValueWithLeftShift(*MaybeImmed);
4102 }
4103
4104 /// SelectNegArithImmed - As above, but negates the value before trying to
4105 /// select it.
4106 InstructionSelector::ComplexRendererFns
4107 AArch64InstructionSelector::selectNegArithImmed(MachineOperand &Root) const {
4108   // We need a register here, because we need to know if we have a 64 or 32
4109   // bit immediate.
4110   if (!Root.isReg())
4111     return None;
4112   auto MaybeImmed = getImmedFromMO(Root);
4113   if (MaybeImmed == None)
4114     return None;
4115   uint64_t Immed = *MaybeImmed;
4116
4117   // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
4118   // have the opposite effect on the C flag, so this pattern mustn't match under
4119   // those circumstances.
4120   if (Immed == 0)
4121     return None;
4122
4123   // Check if we're dealing with a 32-bit type on the root or a 64-bit type on
4124   // the root.
4125   MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
4126   if (MRI.getType(Root.getReg()).getSizeInBits() == 32)
4127     Immed = ~((uint32_t)Immed) + 1;
4128   else
4129     Immed = ~Immed + 1ULL;
4130
4131   if (Immed & 0xFFFFFFFFFF000000ULL)
4132     return None;
4133
4134   Immed &= 0xFFFFFFULL;
4135   return select12BitValueWithLeftShift(Immed);
4136 }
4137
4138 /// Return true if it is worth folding MI into an extended register. That is,
4139 /// if it's safe to pull it into the addressing mode of a load or store as a
4140 /// shift.
4141 bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg(
4142     MachineInstr &MI, const MachineRegisterInfo &MRI) const {
4143   // Always fold if there is one use, or if we're optimizing for size.
4144   Register DefReg = MI.getOperand(0).getReg();
4145   if (MRI.hasOneUse(DefReg) ||
4146       MI.getParent()->getParent()->getFunction().hasMinSize())
4147     return true;
4148
4149   // It's better to avoid folding and recomputing shifts when we don't have a
4150   // fastpath.
4151   if (!STI.hasLSLFast())
4152     return false;
4153
4154   // We have a fastpath, so folding a shift in and potentially computing it
4155   // many times may be beneficial. Check if this is only used in memory ops.
4156   // If it is, then we should fold.
4157   return all_of(MRI.use_instructions(DefReg),
4158                 [](MachineInstr &Use) { return Use.mayLoadOrStore(); });
4159 }
4160
4161 /// This is used for computing addresses like this:
4162 ///
4163 /// ldr x1, [x2, x3, lsl #3]
4164 ///
4165 /// Where x2 is the base register, and x3 is an offset register. The shift-left
4166 /// is a constant value specific to this load instruction. That is, we'll never
4167 /// see anything other than a 3 here (which corresponds to the size of the
4168 /// element being loaded.)
4169 InstructionSelector::ComplexRendererFns
4170 AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
4171     MachineOperand &Root, unsigned SizeInBytes) const {
4172   if (!Root.isReg())
4173     return None;
4174   MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
4175
4176   // Make sure that the memory op is a valid size.
4177   int64_t LegalShiftVal = Log2_32(SizeInBytes);
4178   if (LegalShiftVal == 0)
4179     return None;
4180
4181   // We want to find something like this:
4182   //
4183   // val = G_CONSTANT LegalShiftVal
4184   // shift = G_SHL off_reg val
4185   // ptr = G_GEP base_reg shift
4186   // x = G_LOAD ptr
4187   //
4188   // And fold it into this addressing mode:
4189   //
4190   // ldr x, [base_reg, off_reg, lsl #LegalShiftVal]
4191
4192   // Check if we can find the G_GEP.
4193   MachineInstr *Gep = getOpcodeDef(TargetOpcode::G_GEP, Root.getReg(), MRI);
4194   if (!Gep || !isWorthFoldingIntoExtendedReg(*Gep, MRI))
4195     return None;
4196
4197   // Now, try to match an opcode which will match our specific offset.
4198   // We want a G_SHL or a G_MUL.
4199   MachineInstr *OffsetInst = getDefIgnoringCopies(Gep->getOperand(2).getReg(), MRI);
4200   if (!OffsetInst)
4201     return None;
4202
4203   unsigned OffsetOpc = OffsetInst->getOpcode();
4204   if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL)
4205     return None;
4206
4207   if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI))
4208     return None;
4209
4210   // Now, try to find the specific G_CONSTANT. Start by assuming that the
4211   // register we will offset is the LHS, and the register containing the
4212   // constant is the RHS.
4213   Register OffsetReg = OffsetInst->getOperand(1).getReg();
4214   Register ConstantReg = OffsetInst->getOperand(2).getReg();
4215   auto ValAndVReg = getConstantVRegValWithLookThrough(ConstantReg, MRI);
4216   if (!ValAndVReg) {
4217     // We didn't get a constant on the RHS. If the opcode is a shift, then
4218     // we're done.
4219     if (OffsetOpc == TargetOpcode::G_SHL)
4220       return None;
4221
4222     // If we have a G_MUL, we can use either register. Try looking at the RHS.
4223     std::swap(OffsetReg, ConstantReg);
4224     ValAndVReg = getConstantVRegValWithLookThrough(ConstantReg, MRI);
4225     if (!ValAndVReg)
4226       return None;
4227   }
4228
4229   // The value must fit into 3 bits, and must be positive. Make sure that is
4230   // true.
4231   int64_t ImmVal = ValAndVReg->Value;
4232
4233   // Since we're going to pull this into a shift, the constant value must be
4234   // a power of 2. If we got a multiply, then we need to check this.
4235   if (OffsetOpc == TargetOpcode::G_MUL) {
4236     if (!isPowerOf2_32(ImmVal))
4237       return None;
4238
4239     // Got a power of 2. So, the amount we'll shift is the log base-2 of that.
4240     ImmVal = Log2_32(ImmVal);
4241   }
4242
4243   if ((ImmVal & 0x7) != ImmVal)
4244     return None;
4245
4246   // We are only allowed to shift by LegalShiftVal. This shift value is built
4247   // into the instruction, so we can't just use whatever we want.
4248   if (ImmVal != LegalShiftVal)
4249     return None;
4250
4251   // We can use the LHS of the GEP as the base, and the LHS of the shift as an
4252   // offset. Signify that we are shifting by setting the shift flag to 1.
4253   return {{[=](MachineInstrBuilder &MIB) {
4254              MIB.addUse(Gep->getOperand(1).getReg());
4255            },
4256            [=](MachineInstrBuilder &MIB) { MIB.addUse(OffsetReg); },
4257            [=](MachineInstrBuilder &MIB) {
4258              // Need to add both immediates here to make sure that they are both
4259              // added to the instruction.
4260              MIB.addImm(0);
4261              MIB.addImm(1);
4262            }}};
4263 }
4264
4265 /// This is used for computing addresses like this:
4266 ///
4267 /// ldr x1, [x2, x3]
4268 ///
4269 /// Where x2 is the base register, and x3 is an offset register.
4270 ///
4271 /// When possible (or profitable) to fold a G_GEP into the address calculation,
4272 /// this will do so. Otherwise, it will return None.
4273 InstructionSelector::ComplexRendererFns
4274 AArch64InstructionSelector::selectAddrModeRegisterOffset(
4275     MachineOperand &Root) const {
4276   MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
4277
4278   // We need a GEP.
4279   MachineInstr *Gep = MRI.getVRegDef(Root.getReg());
4280   if (!Gep || Gep->getOpcode() != TargetOpcode::G_GEP)
4281     return None;
4282
4283   // If this is used more than once, let's not bother folding.
4284   // TODO: Check if they are memory ops. If they are, then we can still fold
4285   // without having to recompute anything.
4286   if (!MRI.hasOneUse(Gep->getOperand(0).getReg()))
4287     return None;
4288
4289   // Base is the GEP's LHS, offset is its RHS.
4290   return {{[=](MachineInstrBuilder &MIB) {
4291              MIB.addUse(Gep->getOperand(1).getReg());
4292            },
4293            [=](MachineInstrBuilder &MIB) {
4294              MIB.addUse(Gep->getOperand(2).getReg());
4295            },
4296            [=](MachineInstrBuilder &MIB) {
4297              // Need to add both immediates here to make sure that they are both
4298              // added to the instruction.
4299              MIB.addImm(0);
4300              MIB.addImm(0);
4301            }}};
4302 }
4303
4304 /// This is intended to be equivalent to selectAddrModeXRO in
4305 /// AArch64ISelDAGtoDAG. It's used for selecting X register offset loads.
4306 InstructionSelector::ComplexRendererFns
4307 AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root,
4308                                               unsigned SizeInBytes) const {
4309   MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
4310
4311   // If we have a constant offset, then we probably don't want to match a
4312   // register offset.
4313   if (isBaseWithConstantOffset(Root, MRI))
4314     return None;
4315
4316   // Try to fold shifts into the addressing mode.
4317   auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes);
4318   if (AddrModeFns)
4319     return AddrModeFns;
4320
4321   // If that doesn't work, see if it's possible to fold in registers from
4322   // a GEP.
4323   return selectAddrModeRegisterOffset(Root);
4324 }
4325
4326 /// Select a "register plus unscaled signed 9-bit immediate" address.  This
4327 /// should only match when there is an offset that is not valid for a scaled
4328 /// immediate addressing mode.  The "Size" argument is the size in bytes of the
4329 /// memory reference, which is needed here to know what is valid for a scaled
4330 /// immediate.
4331 InstructionSelector::ComplexRendererFns
4332 AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root,
4333                                                    unsigned Size) const {
4334   MachineRegisterInfo &MRI =
4335       Root.getParent()->getParent()->getParent()->getRegInfo();
4336
4337   if (!Root.isReg())
4338     return None;
4339
4340   if (!isBaseWithConstantOffset(Root, MRI))
4341     return None;
4342
4343   MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
4344   if (!RootDef)
4345     return None;
4346
4347   MachineOperand &OffImm = RootDef->getOperand(2);
4348   if (!OffImm.isReg())
4349     return None;
4350   MachineInstr *RHS = MRI.getVRegDef(OffImm.getReg());
4351   if (!RHS || RHS->getOpcode() != TargetOpcode::G_CONSTANT)
4352     return None;
4353   int64_t RHSC;
4354   MachineOperand &RHSOp1 = RHS->getOperand(1);
4355   if (!RHSOp1.isCImm() || RHSOp1.getCImm()->getBitWidth() > 64)
4356     return None;
4357   RHSC = RHSOp1.getCImm()->getSExtValue();
4358
4359   // If the offset is valid as a scaled immediate, don't match here.
4360   if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Log2_32(Size)))
4361     return None;
4362   if (RHSC >= -256 && RHSC < 256) {
4363     MachineOperand &Base = RootDef->getOperand(1);
4364     return {{
4365         [=](MachineInstrBuilder &MIB) { MIB.add(Base); },
4366         [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC); },
4367     }};
4368   }
4369   return None;
4370 }
4371
4372 /// Select a "register plus scaled unsigned 12-bit immediate" address.  The
4373 /// "Size" argument is the size in bytes of the memory reference, which
4374 /// determines the scale.
4375 InstructionSelector::ComplexRendererFns
4376 AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root,
4377                                                   unsigned Size) const {
4378   MachineRegisterInfo &MRI =
4379       Root.getParent()->getParent()->getParent()->getRegInfo();
4380
4381   if (!Root.isReg())
4382     return None;
4383
4384   MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
4385   if (!RootDef)
4386     return None;
4387
4388   if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
4389     return {{
4390         [=](MachineInstrBuilder &MIB) { MIB.add(RootDef->getOperand(1)); },
4391         [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
4392     }};
4393   }
4394
4395   if (isBaseWithConstantOffset(Root, MRI)) {
4396     MachineOperand &LHS = RootDef->getOperand(1);
4397     MachineOperand &RHS = RootDef->getOperand(2);
4398     MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg());
4399     MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg());
4400     if (LHSDef && RHSDef) {
4401       int64_t RHSC = (int64_t)RHSDef->getOperand(1).getCImm()->getZExtValue();
4402       unsigned Scale = Log2_32(Size);
4403       if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
4404         if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
4405           return {{
4406               [=](MachineInstrBuilder &MIB) { MIB.add(LHSDef->getOperand(1)); },
4407               [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
4408           }};
4409
4410         return {{
4411             [=](MachineInstrBuilder &MIB) { MIB.add(LHS); },
4412             [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
4413         }};
4414       }
4415     }
4416   }
4417
4418   // Before falling back to our general case, check if the unscaled
4419   // instructions can handle this. If so, that's preferable.
4420   if (selectAddrModeUnscaled(Root, Size).hasValue())
4421     return None;
4422
4423   return {{
4424       [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
4425       [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
4426   }};
4427 }
4428
4429 /// Given a shift instruction, return the correct shift type for that
4430 /// instruction.
4431 static AArch64_AM::ShiftExtendType getShiftTypeForInst(MachineInstr &MI) {
4432   // TODO: Handle AArch64_AM::ROR
4433   switch (MI.getOpcode()) {
4434   default:
4435     return AArch64_AM::InvalidShiftExtend;
4436   case TargetOpcode::G_SHL:
4437     return AArch64_AM::LSL;
4438   case TargetOpcode::G_LSHR:
4439     return AArch64_AM::LSR;
4440   case TargetOpcode::G_ASHR:
4441     return AArch64_AM::ASR;
4442   }
4443 }
4444
4445 /// Select a "shifted register" operand. If the value is not shifted, set the
4446 /// shift operand to a default value of "lsl 0".
4447 ///
4448 /// TODO: Allow shifted register to be rotated in logical instructions.
4449 InstructionSelector::ComplexRendererFns
4450 AArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root) const {
4451   if (!Root.isReg())
4452     return None;
4453   MachineRegisterInfo &MRI =
4454       Root.getParent()->getParent()->getParent()->getRegInfo();
4455
4456   // Check if the operand is defined by an instruction which corresponds to
4457   // a ShiftExtendType. E.g. a G_SHL, G_LSHR, etc.
4458   //
4459   // TODO: Handle AArch64_AM::ROR for logical instructions.
4460   MachineInstr *ShiftInst = MRI.getVRegDef(Root.getReg());
4461   if (!ShiftInst)
4462     return None;
4463   AArch64_AM::ShiftExtendType ShType = getShiftTypeForInst(*ShiftInst);
4464   if (ShType == AArch64_AM::InvalidShiftExtend)
4465     return None;
4466   if (!isWorthFoldingIntoExtendedReg(*ShiftInst, MRI))
4467     return None;
4468
4469   // Need an immediate on the RHS.
4470   MachineOperand &ShiftRHS = ShiftInst->getOperand(2);
4471   auto Immed = getImmedFromMO(ShiftRHS);
4472   if (!Immed)
4473     return None;
4474
4475   // We have something that we can fold. Fold in the shift's LHS and RHS into
4476   // the instruction.
4477   MachineOperand &ShiftLHS = ShiftInst->getOperand(1);
4478   Register ShiftReg = ShiftLHS.getReg();
4479
4480   unsigned NumBits = MRI.getType(ShiftReg).getSizeInBits();
4481   unsigned Val = *Immed & (NumBits - 1);
4482   unsigned ShiftVal = AArch64_AM::getShifterImm(ShType, Val);
4483
4484   return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ShiftReg); },
4485            [=](MachineInstrBuilder &MIB) { MIB.addImm(ShiftVal); }}};
4486 }
4487
4488 /// Get the correct ShiftExtendType for an extend instruction.
4489 static AArch64_AM::ShiftExtendType
4490 getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI) {
4491   unsigned Opc = MI.getOpcode();
4492
4493   // Handle explicit extend instructions first.
4494   if (Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG) {
4495     unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
4496     assert(Size != 64 && "Extend from 64 bits?");
4497     switch (Size) {
4498     case 8:
4499       return AArch64_AM::SXTB;
4500     case 16:
4501       return AArch64_AM::SXTH;
4502     case 32:
4503       return AArch64_AM::SXTW;
4504     default:
4505       return AArch64_AM::InvalidShiftExtend;
4506     }
4507   }
4508
4509   if (Opc == TargetOpcode::G_ZEXT || Opc == TargetOpcode::G_ANYEXT) {
4510     unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
4511     assert(Size != 64 && "Extend from 64 bits?");
4512     switch (Size) {
4513     case 8:
4514       return AArch64_AM::UXTB;
4515     case 16:
4516       return AArch64_AM::UXTH;
4517     case 32:
4518       return AArch64_AM::UXTW;
4519     default:
4520       return AArch64_AM::InvalidShiftExtend;
4521     }
4522   }
4523
4524   // Don't have an explicit extend. Try to handle a G_AND with a constant mask
4525   // on the RHS.
4526   if (Opc != TargetOpcode::G_AND)
4527     return AArch64_AM::InvalidShiftExtend;
4528
4529   Optional<uint64_t> MaybeAndMask = getImmedFromMO(MI.getOperand(2));
4530   if (!MaybeAndMask)
4531     return AArch64_AM::InvalidShiftExtend;
4532   uint64_t AndMask = *MaybeAndMask;
4533   switch (AndMask) {
4534   default:
4535     return AArch64_AM::InvalidShiftExtend;
4536   case 0xFF:
4537     return AArch64_AM::UXTB;
4538   case 0xFFFF:
4539     return AArch64_AM::UXTH;
4540   case 0xFFFFFFFF:
4541     return AArch64_AM::UXTW;
4542   }
4543 }
4544
4545 Register AArch64InstructionSelector::narrowExtendRegIfNeeded(
4546     Register ExtReg, MachineIRBuilder &MIB) const {
4547   MachineRegisterInfo &MRI = *MIB.getMRI();
4548   if (MRI.getType(ExtReg).getSizeInBits() == 32)
4549     return ExtReg;
4550
4551   // Insert a copy to move ExtReg to GPR32.
4552   Register NarrowReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
4553   auto Copy = MIB.buildCopy({NarrowReg}, {ExtReg});
4554
4555   // Select the copy into a subregister copy.
4556   selectCopy(*Copy, TII, MRI, TRI, RBI);
4557   return Copy.getReg(0);
4558 }
4559
4560 /// Select an "extended register" operand. This operand folds in an extend
4561 /// followed by an optional left shift.
4562 InstructionSelector::ComplexRendererFns
4563 AArch64InstructionSelector::selectArithExtendedRegister(
4564     MachineOperand &Root) const {
4565   if (!Root.isReg())
4566     return None;
4567   MachineRegisterInfo &MRI =
4568       Root.getParent()->getParent()->getParent()->getRegInfo();
4569
4570   uint64_t ShiftVal = 0;
4571   Register ExtReg;
4572   AArch64_AM::ShiftExtendType Ext;
4573   MachineInstr *RootDef = getDefIgnoringCopies(Root.getReg(), MRI);
4574   if (!RootDef)
4575     return None;
4576
4577   if (!isWorthFoldingIntoExtendedReg(*RootDef, MRI))
4578     return None;
4579
4580   // Check if we can fold a shift and an extend.
4581   if (RootDef->getOpcode() == TargetOpcode::G_SHL) {
4582     // Look for a constant on the RHS of the shift.
4583     MachineOperand &RHS = RootDef->getOperand(2);
4584     Optional<uint64_t> MaybeShiftVal = getImmedFromMO(RHS);
4585     if (!MaybeShiftVal)
4586       return None;
4587     ShiftVal = *MaybeShiftVal;
4588     if (ShiftVal > 4)
4589       return None;
4590     // Look for a valid extend instruction on the LHS of the shift.
4591     MachineOperand &LHS = RootDef->getOperand(1);
4592     MachineInstr *ExtDef = getDefIgnoringCopies(LHS.getReg(), MRI);
4593     if (!ExtDef)
4594       return None;
4595     Ext = getExtendTypeForInst(*ExtDef, MRI);
4596     if (Ext == AArch64_AM::InvalidShiftExtend)
4597       return None;
4598     ExtReg = ExtDef->getOperand(1).getReg();
4599   } else {
4600     // Didn't get a shift. Try just folding an extend.
4601     Ext = getExtendTypeForInst(*RootDef, MRI);
4602     if (Ext == AArch64_AM::InvalidShiftExtend)
4603       return None;
4604     ExtReg = RootDef->getOperand(1).getReg();
4605
4606     // If we have a 32 bit instruction which zeroes out the high half of a
4607     // register, we get an implicit zero extend for free. Check if we have one.
4608     // FIXME: We actually emit the extend right now even though we don't have
4609     // to.
4610     if (Ext == AArch64_AM::UXTW && MRI.getType(ExtReg).getSizeInBits() == 32) {
4611       MachineInstr *ExtInst = MRI.getVRegDef(ExtReg);
4612       if (ExtInst && isDef32(*ExtInst))
4613         return None;
4614     }
4615   }
4616
4617   // We require a GPR32 here. Narrow the ExtReg if needed using a subregister
4618   // copy.
4619   MachineIRBuilder MIB(*RootDef);
4620   ExtReg = narrowExtendRegIfNeeded(ExtReg, MIB);
4621
4622   return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },
4623            [=](MachineInstrBuilder &MIB) {
4624              MIB.addImm(getArithExtendImm(Ext, ShiftVal));
4625            }}};
4626 }
4627
4628 void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB,
4629                                                 const MachineInstr &MI) const {
4630   const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
4631   assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && "Expected G_CONSTANT");
4632   Optional<int64_t> CstVal = getConstantVRegVal(MI.getOperand(0).getReg(), MRI);
4633   assert(CstVal && "Expected constant value");
4634   MIB.addImm(CstVal.getValue());
4635 }
4636
4637 void AArch64InstructionSelector::renderLogicalImm32(
4638     MachineInstrBuilder &MIB, const MachineInstr &I) const {
4639   assert(I.getOpcode() == TargetOpcode::G_CONSTANT && "Expected G_CONSTANT");
4640   uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
4641   uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 32);
4642   MIB.addImm(Enc);
4643 }
4644
4645 void AArch64InstructionSelector::renderLogicalImm64(
4646     MachineInstrBuilder &MIB, const MachineInstr &I) const {
4647   assert(I.getOpcode() == TargetOpcode::G_CONSTANT && "Expected G_CONSTANT");
4648   uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
4649   uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 64);
4650   MIB.addImm(Enc);
4651 }
4652
4653 bool AArch64InstructionSelector::isLoadStoreOfNumBytes(
4654     const MachineInstr &MI, unsigned NumBytes) const {
4655   if (!MI.mayLoadOrStore())
4656     return false;
4657   assert(MI.hasOneMemOperand() &&
4658          "Expected load/store to have only one mem op!");
4659   return (*MI.memoperands_begin())->getSize() == NumBytes;
4660 }
4661
4662 bool AArch64InstructionSelector::isDef32(const MachineInstr &MI) const {
4663   const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
4664   if (MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() != 32)
4665     return false;
4666
4667   // Only return true if we know the operation will zero-out the high half of
4668   // the 64-bit register. Truncates can be subregister copies, which don't
4669   // zero out the high bits. Copies and other copy-like instructions can be
4670   // fed by truncates, or could be lowered as subregister copies.
4671   switch (MI.getOpcode()) {
4672   default:
4673     return true;
4674   case TargetOpcode::COPY:
4675   case TargetOpcode::G_BITCAST:
4676   case TargetOpcode::G_TRUNC:
4677   case TargetOpcode::G_PHI:
4678     return false;
4679   }
4680 }
4681
4682 namespace llvm {
4683 InstructionSelector *
4684 createAArch64InstructionSelector(const AArch64TargetMachine &TM,
4685                                  AArch64Subtarget &Subtarget,
4686                                  AArch64RegisterBankInfo &RBI) {
4687   return new AArch64InstructionSelector(TM, Subtarget, RBI);
4688 }
4689 }