lib/Target/AArch64/AArch64InstructionSelector.cpp

   1 //===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 /// \file
   9 /// This file implements the targeting of the InstructionSelector class for
  10 /// AArch64.
  11 /// \todo This should be generated by TableGen.
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "AArch64InstrInfo.h"
  15 #include "AArch64MachineFunctionInfo.h"
  16 #include "AArch64RegisterBankInfo.h"
  17 #include "AArch64RegisterInfo.h"
  18 #include "AArch64Subtarget.h"
  19 #include "AArch64TargetMachine.h"
  20 #include "MCTargetDesc/AArch64AddressingModes.h"
  21 #include "llvm/ADT/Optional.h"
  22 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
  23 #include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
  24 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
  25 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
  26 #include "llvm/CodeGen/GlobalISel/Utils.h"
  27 #include "llvm/CodeGen/MachineBasicBlock.h"
  28 #include "llvm/CodeGen/MachineConstantPool.h"
  29 #include "llvm/CodeGen/MachineFunction.h"
  30 #include "llvm/CodeGen/MachineInstr.h"
  31 #include "llvm/CodeGen/MachineInstrBuilder.h"
  32 #include "llvm/CodeGen/MachineOperand.h"
  33 #include "llvm/CodeGen/MachineRegisterInfo.h"
  34 #include "llvm/IR/Type.h"
  35 #include "llvm/Support/Debug.h"
  36 #include "llvm/Support/raw_ostream.h"
  37
  38 #define DEBUG_TYPE "aarch64-isel"
  39
  40 using namespace llvm;
  41
  42 namespace {
  43
  44 #define GET_GLOBALISEL_PREDICATE_BITSET
  45 #include "AArch64GenGlobalISel.inc"
  46 #undef GET_GLOBALISEL_PREDICATE_BITSET
  47
  48 class AArch64InstructionSelector : public InstructionSelector {
  49 public:
  50   AArch64InstructionSelector(const AArch64TargetMachine &TM,
  51                              const AArch64Subtarget &STI,
  52                              const AArch64RegisterBankInfo &RBI);
  53
  54   bool select(MachineInstr &I) override;
  55   static const char *getName() { return DEBUG_TYPE; }
  56
  57   void setupMF(MachineFunction &MF, GISelKnownBits &KB,
  58                CodeGenCoverage &CoverageInfo) override {
  59     InstructionSelector::setupMF(MF, KB, CoverageInfo);
  60
  61     // hasFnAttribute() is expensive to call on every BRCOND selection, so
  62     // cache it here for each run of the selector.
  63     ProduceNonFlagSettingCondBr =
  64         !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
  65   }
  66
  67 private:
  68   /// tblgen-erated 'select' implementation, used as the initial selector for
  69   /// the patterns that don't require complex C++.
  70   bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
  71
  72   // A lowering phase that runs before any selection attempts.
  73
  74   void preISelLower(MachineInstr &I) const;
  75
  76   // An early selection function that runs before the selectImpl() call.
  77   bool earlySelect(MachineInstr &I) const;
  78
  79   bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
  80
  81   /// Eliminate same-sized cross-bank copies into stores before selectImpl().
  82   void contractCrossBankCopyIntoStore(MachineInstr &I,
  83                                       MachineRegisterInfo &MRI) const;
  84
  85   bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
  86                           MachineRegisterInfo &MRI) const;
  87   bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
  88                            MachineRegisterInfo &MRI) const;
  89
  90   bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
  91                            MachineRegisterInfo &MRI) const;
  92
  93   bool selectVectorASHR(MachineInstr &I, MachineRegisterInfo &MRI) const;
  94   bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
  95
  96   // Helper to generate an equivalent of scalar_to_vector into a new register,
  97   // returned via 'Dst'.
  98   MachineInstr *emitScalarToVector(unsigned EltSize,
  99                                    const TargetRegisterClass *DstRC,
 100                                    Register Scalar,
 101                                    MachineIRBuilder &MIRBuilder) const;
 102
 103   /// Emit a lane insert into \p DstReg, or a new vector register if None is
 104   /// provided.
 105   ///
 106   /// The lane inserted into is defined by \p LaneIdx. The vector source
 107   /// register is given by \p SrcReg. The register containing the element is
 108   /// given by \p EltReg.
 109   MachineInstr *emitLaneInsert(Optional<Register> DstReg, Register SrcReg,
 110                                Register EltReg, unsigned LaneIdx,
 111                                const RegisterBank &RB,
 112                                MachineIRBuilder &MIRBuilder) const;
 113   bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
 114   bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
 115   bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const;
 116   bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const;
 117
 118   bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
 119   bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
 120   bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI) const;
 121   bool selectSplitVectorUnmerge(MachineInstr &I,
 122                                 MachineRegisterInfo &MRI) const;
 123   bool selectIntrinsicWithSideEffects(MachineInstr &I,
 124                                       MachineRegisterInfo &MRI) const;
 125   bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI) const;
 126   bool selectVectorICmp(MachineInstr &I, MachineRegisterInfo &MRI) const;
 127   bool selectIntrinsicTrunc(MachineInstr &I, MachineRegisterInfo &MRI) const;
 128   bool selectIntrinsicRound(MachineInstr &I, MachineRegisterInfo &MRI) const;
 129   bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI) const;
 130   bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI) const;
 131   bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI) const;
 132
 133   unsigned emitConstantPoolEntry(Constant *CPVal, MachineFunction &MF) const;
 134   MachineInstr *emitLoadFromConstantPool(Constant *CPVal,
 135                                          MachineIRBuilder &MIRBuilder) const;
 136
 137   // Emit a vector concat operation.
 138   MachineInstr *emitVectorConcat(Optional<Register> Dst, Register Op1,
 139                                  Register Op2,
 140                                  MachineIRBuilder &MIRBuilder) const;
 141   MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
 142                                    MachineOperand &Predicate,
 143                                    MachineIRBuilder &MIRBuilder) const;
 144   MachineInstr *emitADD(Register DefReg, MachineOperand &LHS, MachineOperand &RHS,
 145                         MachineIRBuilder &MIRBuilder) const;
 146   MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS,
 147                         MachineIRBuilder &MIRBuilder) const;
 148   MachineInstr *emitTST(const Register &LHS, const Register &RHS,
 149                         MachineIRBuilder &MIRBuilder) const;
 150   MachineInstr *emitExtractVectorElt(Optional<Register> DstReg,
 151                                      const RegisterBank &DstRB, LLT ScalarTy,
 152                                      Register VecReg, unsigned LaneIdx,
 153                                      MachineIRBuilder &MIRBuilder) const;
 154
 155   /// Helper function for selecting G_FCONSTANT. If the G_FCONSTANT can be
 156   /// materialized using a FMOV instruction, then update MI and return it.
 157   /// Otherwise, do nothing and return a nullptr.
 158   MachineInstr *emitFMovForFConstant(MachineInstr &MI,
 159                                      MachineRegisterInfo &MRI) const;
 160
 161   /// Emit a CSet for a compare.
 162   MachineInstr *emitCSetForICMP(Register DefReg, unsigned Pred,
 163                                 MachineIRBuilder &MIRBuilder) const;
 164
 165   // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td.
 166   // We use these manually instead of using the importer since it doesn't
 167   // support SDNodeXForm.
 168   ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const;
 169   ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const;
 170   ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const;
 171   ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const;
 172
 173   ComplexRendererFns select12BitValueWithLeftShift(uint64_t Immed) const;
 174   ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
 175   ComplexRendererFns selectNegArithImmed(MachineOperand &Root) const;
 176
 177   ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
 178                                             unsigned Size) const;
 179
 180   ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const {
 181     return selectAddrModeUnscaled(Root, 1);
 182   }
 183   ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const {
 184     return selectAddrModeUnscaled(Root, 2);
 185   }
 186   ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const {
 187     return selectAddrModeUnscaled(Root, 4);
 188   }
 189   ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const {
 190     return selectAddrModeUnscaled(Root, 8);
 191   }
 192   ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const {
 193     return selectAddrModeUnscaled(Root, 16);
 194   }
 195
 196   ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,
 197                                            unsigned Size) const;
 198   template <int Width>
 199   ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
 200     return selectAddrModeIndexed(Root, Width / 8);
 201   }
 202
 203   bool isWorthFoldingIntoExtendedReg(MachineInstr &MI,
 204                                      const MachineRegisterInfo &MRI) const;
 205   ComplexRendererFns
 206   selectAddrModeShiftedExtendXReg(MachineOperand &Root,
 207                                   unsigned SizeInBytes) const;
 208   ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const;
 209   ComplexRendererFns selectAddrModeXRO(MachineOperand &Root,
 210                                        unsigned SizeInBytes) const;
 211   template <int Width>
 212   ComplexRendererFns selectAddrModeXRO(MachineOperand &Root) const {
 213     return selectAddrModeXRO(Root, Width / 8);
 214   }
 215
 216   ComplexRendererFns selectShiftedRegister(MachineOperand &Root) const;
 217
 218   ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const {
 219     return selectShiftedRegister(Root);
 220   }
 221
 222   ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const {
 223     // TODO: selectShiftedRegister should allow for rotates on logical shifts.
 224     // For now, make them the same. The only difference between the two is that
 225     // logical shifts are allowed to fold in rotates. Otherwise, these are
 226     // functionally the same.
 227     return selectShiftedRegister(Root);
 228   }
 229
 230   /// Instructions that accept extend modifiers like UXTW expect the register
 231   /// being extended to be a GPR32. Narrow ExtReg to a 32-bit register using a
 232   /// subregister copy if necessary. Return either ExtReg, or the result of the
 233   /// new copy.
 234   Register narrowExtendRegIfNeeded(Register ExtReg,
 235                                              MachineIRBuilder &MIB) const;
 236   ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const;
 237
 238   void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI) const;
 239   void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I) const;
 240   void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I) const;
 241
 242   // Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
 243   void materializeLargeCMVal(MachineInstr &I, const Value *V,
 244                              unsigned OpFlags) const;
 245
 246   // Optimization methods.
 247   bool tryOptVectorShuffle(MachineInstr &I) const;
 248   bool tryOptVectorDup(MachineInstr &MI) const;
 249   bool tryOptSelect(MachineInstr &MI) const;
 250   MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
 251                                       MachineOperand &Predicate,
 252                                       MachineIRBuilder &MIRBuilder) const;
 253
 254   /// Return true if \p MI is a load or store of \p NumBytes bytes.
 255   bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const;
 256
 257   /// Returns true if \p MI is guaranteed to have the high-half of a 64-bit
 258   /// register zeroed out. In other words, the result of MI has been explicitly
 259   /// zero extended.
 260   bool isDef32(const MachineInstr &MI) const;
 261
 262   const AArch64TargetMachine &TM;
 263   const AArch64Subtarget &STI;
 264   const AArch64InstrInfo &TII;
 265   const AArch64RegisterInfo &TRI;
 266   const AArch64RegisterBankInfo &RBI;
 267
 268   bool ProduceNonFlagSettingCondBr = false;
 269
 270 #define GET_GLOBALISEL_PREDICATES_DECL
 271 #include "AArch64GenGlobalISel.inc"
 272 #undef GET_GLOBALISEL_PREDICATES_DECL
 273
 274 // We declare the temporaries used by selectImpl() in the class to minimize the
 275 // cost of constructing placeholder values.
 276 #define GET_GLOBALISEL_TEMPORARIES_DECL
 277 #include "AArch64GenGlobalISel.inc"
 278 #undef GET_GLOBALISEL_TEMPORARIES_DECL
 279 };
 280
 281 } // end anonymous namespace
 282
 283 #define GET_GLOBALISEL_IMPL
 284 #include "AArch64GenGlobalISel.inc"
 285 #undef GET_GLOBALISEL_IMPL
 286
 287 AArch64InstructionSelector::AArch64InstructionSelector(
 288     const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
 289     const AArch64RegisterBankInfo &RBI)
 290     : InstructionSelector(), TM(TM), STI(STI), TII(*STI.getInstrInfo()),
 291       TRI(*STI.getRegisterInfo()), RBI(RBI),
 292 #define GET_GLOBALISEL_PREDICATES_INIT
 293 #include "AArch64GenGlobalISel.inc"
 294 #undef GET_GLOBALISEL_PREDICATES_INIT
 295 #define GET_GLOBALISEL_TEMPORARIES_INIT
 296 #include "AArch64GenGlobalISel.inc"
 297 #undef GET_GLOBALISEL_TEMPORARIES_INIT
 298 {
 299 }
 300
 301 // FIXME: This should be target-independent, inferred from the types declared
 302 // for each class in the bank.
 303 static const TargetRegisterClass *
 304 getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
 305                          const RegisterBankInfo &RBI,
 306                          bool GetAllRegSet = false) {
 307   if (RB.getID() == AArch64::GPRRegBankID) {
 308     if (Ty.getSizeInBits() <= 32)
 309       return GetAllRegSet ? &AArch64::GPR32allRegClass
 310                           : &AArch64::GPR32RegClass;
 311     if (Ty.getSizeInBits() == 64)
 312       return GetAllRegSet ? &AArch64::GPR64allRegClass
 313                           : &AArch64::GPR64RegClass;
 314     return nullptr;
 315   }
 316
 317   if (RB.getID() == AArch64::FPRRegBankID) {
 318     if (Ty.getSizeInBits() <= 16)
 319       return &AArch64::FPR16RegClass;
 320     if (Ty.getSizeInBits() == 32)
 321       return &AArch64::FPR32RegClass;
 322     if (Ty.getSizeInBits() == 64)
 323       return &AArch64::FPR64RegClass;
 324     if (Ty.getSizeInBits() == 128)
 325       return &AArch64::FPR128RegClass;
 326     return nullptr;
 327   }
 328
 329   return nullptr;
 330 }
 331
 332 /// Given a register bank, and size in bits, return the smallest register class
 333 /// that can represent that combination.
 334 static const TargetRegisterClass *
 335 getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits,
 336                       bool GetAllRegSet = false) {
 337   unsigned RegBankID = RB.getID();
 338
 339   if (RegBankID == AArch64::GPRRegBankID) {
 340     if (SizeInBits <= 32)
 341       return GetAllRegSet ? &AArch64::GPR32allRegClass
 342                           : &AArch64::GPR32RegClass;
 343     if (SizeInBits == 64)
 344       return GetAllRegSet ? &AArch64::GPR64allRegClass
 345                           : &AArch64::GPR64RegClass;
 346   }
 347
 348   if (RegBankID == AArch64::FPRRegBankID) {
 349     switch (SizeInBits) {
 350     default:
 351       return nullptr;
 352     case 8:
 353       return &AArch64::FPR8RegClass;
 354     case 16:
 355       return &AArch64::FPR16RegClass;
 356     case 32:
 357       return &AArch64::FPR32RegClass;
 358     case 64:
 359       return &AArch64::FPR64RegClass;
 360     case 128:
 361       return &AArch64::FPR128RegClass;
 362     }
 363   }
 364
 365   return nullptr;
 366 }
 367
 368 /// Returns the correct subregister to use for a given register class.
 369 static bool getSubRegForClass(const TargetRegisterClass *RC,
 370                               const TargetRegisterInfo &TRI, unsigned &SubReg) {
 371   switch (TRI.getRegSizeInBits(*RC)) {
 372   case 8:
 373     SubReg = AArch64::bsub;
 374     break;
 375   case 16:
 376     SubReg = AArch64::hsub;
 377     break;
 378   case 32:
 379     if (RC != &AArch64::FPR32RegClass)
 380       SubReg = AArch64::sub_32;
 381     else
 382       SubReg = AArch64::ssub;
 383     break;
 384   case 64:
 385     SubReg = AArch64::dsub;
 386     break;
 387   default:
 388     LLVM_DEBUG(
 389         dbgs() << "Couldn't find appropriate subregister for register class.");
 390     return false;
 391   }
 392
 393   return true;
 394 }
 395
 396 /// Check whether \p I is a currently unsupported binary operation:
 397 /// - it has an unsized type
 398 /// - an operand is not a vreg
 399 /// - all operands are not in the same bank
 400 /// These are checks that should someday live in the verifier, but right now,
 401 /// these are mostly limitations of the aarch64 selector.
 402 static bool unsupportedBinOp(const MachineInstr &I,
 403                              const AArch64RegisterBankInfo &RBI,
 404                              const MachineRegisterInfo &MRI,
 405                              const AArch64RegisterInfo &TRI) {
 406   LLT Ty = MRI.getType(I.getOperand(0).getReg());
 407   if (!Ty.isValid()) {
 408     LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n");
 409     return true;
 410   }
 411
 412   const RegisterBank *PrevOpBank = nullptr;
 413   for (auto &MO : I.operands()) {
 414     // FIXME: Support non-register operands.
 415     if (!MO.isReg()) {
 416       LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n");
 417       return true;
 418     }
 419
 420     // FIXME: Can generic operations have physical registers operands? If
 421     // so, this will need to be taught about that, and we'll need to get the
 422     // bank out of the minimal class for the register.
 423     // Either way, this needs to be documented (and possibly verified).
 424     if (!Register::isVirtualRegister(MO.getReg())) {
 425       LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n");
 426       return true;
 427     }
 428
 429     const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI);
 430     if (!OpBank) {
 431       LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n");
 432       return true;
 433     }
 434
 435     if (PrevOpBank && OpBank != PrevOpBank) {
 436       LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n");
 437       return true;
 438     }
 439     PrevOpBank = OpBank;
 440   }
 441   return false;
 442 }
 443
 444 /// Select the AArch64 opcode for the basic binary operation \p GenericOpc
 445 /// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID
 446 /// and of size \p OpSize.
 447 /// \returns \p GenericOpc if the combination is unsupported.
 448 static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
 449                                unsigned OpSize) {
 450   switch (RegBankID) {
 451   case AArch64::GPRRegBankID:
 452     if (OpSize == 32) {
 453       switch (GenericOpc) {
 454       case TargetOpcode::G_SHL:
 455         return AArch64::LSLVWr;
 456       case TargetOpcode::G_LSHR:
 457         return AArch64::LSRVWr;
 458       case TargetOpcode::G_ASHR:
 459         return AArch64::ASRVWr;
 460       default:
 461         return GenericOpc;
 462       }
 463     } else if (OpSize == 64) {
 464       switch (GenericOpc) {
 465       case TargetOpcode::G_GEP:
 466         return AArch64::ADDXrr;
 467       case TargetOpcode::G_SHL:
 468         return AArch64::LSLVXr;
 469       case TargetOpcode::G_LSHR:
 470         return AArch64::LSRVXr;
 471       case TargetOpcode::G_ASHR:
 472         return AArch64::ASRVXr;
 473       default:
 474         return GenericOpc;
 475       }
 476     }
 477     break;
 478   case AArch64::FPRRegBankID:
 479     switch (OpSize) {
 480     case 32:
 481       switch (GenericOpc) {
 482       case TargetOpcode::G_FADD:
 483         return AArch64::FADDSrr;
 484       case TargetOpcode::G_FSUB:
 485         return AArch64::FSUBSrr;
 486       case TargetOpcode::G_FMUL:
 487         return AArch64::FMULSrr;
 488       case TargetOpcode::G_FDIV:
 489         return AArch64::FDIVSrr;
 490       default:
 491         return GenericOpc;
 492       }
 493     case 64:
 494       switch (GenericOpc) {
 495       case TargetOpcode::G_FADD:
 496         return AArch64::FADDDrr;
 497       case TargetOpcode::G_FSUB:
 498         return AArch64::FSUBDrr;
 499       case TargetOpcode::G_FMUL:
 500         return AArch64::FMULDrr;
 501       case TargetOpcode::G_FDIV:
 502         return AArch64::FDIVDrr;
 503       case TargetOpcode::G_OR:
 504         return AArch64::ORRv8i8;
 505       default:
 506         return GenericOpc;
 507       }
 508     }
 509     break;
 510   }
 511   return GenericOpc;
 512 }
 513
 514 /// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc,
 515 /// appropriate for the (value) register bank \p RegBankID and of memory access
 516 /// size \p OpSize.  This returns the variant with the base+unsigned-immediate
 517 /// addressing mode (e.g., LDRXui).
 518 /// \returns \p GenericOpc if the combination is unsupported.
 519 static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
 520                                     unsigned OpSize) {
 521   const bool isStore = GenericOpc == TargetOpcode::G_STORE;
 522   switch (RegBankID) {
 523   case AArch64::GPRRegBankID:
 524     switch (OpSize) {
 525     case 8:
 526       return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
 527     case 16:
 528       return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
 529     case 32:
 530       return isStore ? AArch64::STRWui : AArch64::LDRWui;
 531     case 64:
 532       return isStore ? AArch64::STRXui : AArch64::LDRXui;
 533     }
 534     break;
 535   case AArch64::FPRRegBankID:
 536     switch (OpSize) {
 537     case 8:
 538       return isStore ? AArch64::STRBui : AArch64::LDRBui;
 539     case 16:
 540       return isStore ? AArch64::STRHui : AArch64::LDRHui;
 541     case 32:
 542       return isStore ? AArch64::STRSui : AArch64::LDRSui;
 543     case 64:
 544       return isStore ? AArch64::STRDui : AArch64::LDRDui;
 545     }
 546     break;
 547   }
 548   return GenericOpc;
 549 }
 550
 551 #ifndef NDEBUG
 552 /// Helper function that verifies that we have a valid copy at the end of
 553 /// selectCopy. Verifies that the source and dest have the expected sizes and
 554 /// then returns true.
 555 static bool isValidCopy(const MachineInstr &I, const RegisterBank &DstBank,
 556                         const MachineRegisterInfo &MRI,
 557                         const TargetRegisterInfo &TRI,
 558                         const RegisterBankInfo &RBI) {
 559   const Register DstReg = I.getOperand(0).getReg();
 560   const Register SrcReg = I.getOperand(1).getReg();
 561   const unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
 562   const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
 563
 564   // Make sure the size of the source and dest line up.
 565   assert(
 566       (DstSize == SrcSize ||
 567        // Copies are a mean to setup initial types, the number of
 568        // bits may not exactly match.
 569        (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
 570        // Copies are a mean to copy bits around, as long as we are
 571        // on the same register class, that's fine. Otherwise, that
 572        // means we need some SUBREG_TO_REG or AND & co.
 573        (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) &&
 574       "Copy with different width?!");
 575
 576   // Check the size of the destination.
 577   assert((DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID) &&
 578          "GPRs cannot get more than 64-bit width values");
 579
 580   return true;
 581 }
 582 #endif
 583
 584 /// Helper function for selectCopy. Inserts a subregister copy from
 585 /// \p *From to \p *To, linking it up to \p I.
 586 ///
 587 /// e.g, given I = "Dst = COPY SrcReg", we'll transform that into
 588 ///
 589 /// CopyReg (From class) = COPY SrcReg
 590 /// SubRegCopy (To class) = COPY CopyReg:SubReg
 591 /// Dst = COPY SubRegCopy
 592 static bool selectSubregisterCopy(MachineInstr &I, MachineRegisterInfo &MRI,
 593                                   const RegisterBankInfo &RBI, Register SrcReg,
 594                                   const TargetRegisterClass *From,
 595                                   const TargetRegisterClass *To,
 596                                   unsigned SubReg) {
 597   MachineIRBuilder MIB(I);
 598   auto Copy = MIB.buildCopy({From}, {SrcReg});
 599   auto SubRegCopy = MIB.buildInstr(TargetOpcode::COPY, {To}, {})
 600                         .addReg(Copy.getReg(0), 0, SubReg);
 601   MachineOperand &RegOp = I.getOperand(1);
 602   RegOp.setReg(SubRegCopy.getReg(0));
 603
 604   // It's possible that the destination register won't be constrained. Make
 605   // sure that happens.
 606   if (!Register::isPhysicalRegister(I.getOperand(0).getReg()))
 607     RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI);
 608
 609   return true;
 610 }
 611
 612 /// Helper function to get the source and destination register classes for a
 613 /// copy. Returns a std::pair containing the source register class for the
 614 /// copy, and the destination register class for the copy. If a register class
 615 /// cannot be determined, then it will be nullptr.
 616 static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
 617 getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII,
 618                      MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
 619                      const RegisterBankInfo &RBI) {
 620   Register DstReg = I.getOperand(0).getReg();
 621   Register SrcReg = I.getOperand(1).getReg();
 622   const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
 623   const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
 624   unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
 625   unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
 626
 627   // Special casing for cross-bank copies of s1s. We can technically represent
 628   // a 1-bit value with any size of register. The minimum size for a GPR is 32
 629   // bits. So, we need to put the FPR on 32 bits as well.
 630   //
 631   // FIXME: I'm not sure if this case holds true outside of copies. If it does,
 632   // then we can pull it into the helpers that get the appropriate class for a
 633   // register bank. Or make a new helper that carries along some constraint
 634   // information.
 635   if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1))
 636     SrcSize = DstSize = 32;
 637
 638   return {getMinClassForRegBank(SrcRegBank, SrcSize, true),
 639           getMinClassForRegBank(DstRegBank, DstSize, true)};
 640 }
 641
 642 static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
 643                        MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
 644                        const RegisterBankInfo &RBI) {
 645
 646   Register DstReg = I.getOperand(0).getReg();
 647   Register SrcReg = I.getOperand(1).getReg();
 648   const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
 649   const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
 650
 651   // Find the correct register classes for the source and destination registers.
 652   const TargetRegisterClass *SrcRC;
 653   const TargetRegisterClass *DstRC;
 654   std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI);
 655
 656   if (!DstRC) {
 657     LLVM_DEBUG(dbgs() << "Unexpected dest size "
 658                       << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n');
 659     return false;
 660   }
 661
 662   // A couple helpers below, for making sure that the copy we produce is valid.
 663
 664   // Set to true if we insert a SUBREG_TO_REG. If we do this, then we don't want
 665   // to verify that the src and dst are the same size, since that's handled by
 666   // the SUBREG_TO_REG.
 667   bool KnownValid = false;
 668
 669   // Returns true, or asserts if something we don't expect happens. Instead of
 670   // returning true, we return isValidCopy() to ensure that we verify the
 671   // result.
 672   auto CheckCopy = [&]() {
 673     // If we have a bitcast or something, we can't have physical registers.
 674     assert((I.isCopy() ||
 675             (!Register::isPhysicalRegister(I.getOperand(0).getReg()) &&
 676              !Register::isPhysicalRegister(I.getOperand(1).getReg()))) &&
 677            "No phys reg on generic operator!");
 678     assert(KnownValid || isValidCopy(I, DstRegBank, MRI, TRI, RBI));
 679     (void)KnownValid;
 680     return true;
 681   };
 682
 683   // Is this a copy? If so, then we may need to insert a subregister copy, or
 684   // a SUBREG_TO_REG.
 685   if (I.isCopy()) {
 686     // Yes. Check if there's anything to fix up.
 687     if (!SrcRC) {
 688       LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n");
 689       return false;
 690     }
 691
 692     unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC);
 693     unsigned DstSize = TRI.getRegSizeInBits(*DstRC);
 694
 695     // If we're doing a cross-bank copy on different-sized registers, we need
 696     // to do a bit more work.
 697     if (SrcSize > DstSize) {
 698       // We're doing a cross-bank copy into a smaller register. We need a
 699       // subregister copy. First, get a register class that's on the same bank
 700       // as the destination, but the same size as the source.
 701       const TargetRegisterClass *SubregRC =
 702           getMinClassForRegBank(DstRegBank, SrcSize, true);
 703       assert(SubregRC && "Didn't get a register class for subreg?");
 704
 705       // Get the appropriate subregister for the destination.
 706       unsigned SubReg = 0;
 707       if (!getSubRegForClass(DstRC, TRI, SubReg)) {
 708         LLVM_DEBUG(dbgs() << "Couldn't determine subregister for copy.\n");
 709         return false;
 710       }
 711
 712       // Now, insert a subregister copy using the new register class.
 713       selectSubregisterCopy(I, MRI, RBI, SrcReg, SubregRC, DstRC, SubReg);
 714       return CheckCopy();
 715     }
 716
 717     // Is this a cross-bank copy?
 718     if (DstRegBank.getID() != SrcRegBank.getID()) {
 719       if (DstRegBank.getID() == AArch64::GPRRegBankID && DstSize == 32 &&
 720           SrcSize == 16) {
 721         // Special case for FPR16 to GPR32.
 722         // FIXME: This can probably be generalized like the above case.
 723         Register PromoteReg =
 724             MRI.createVirtualRegister(&AArch64::FPR32RegClass);
 725         BuildMI(*I.getParent(), I, I.getDebugLoc(),
 726                 TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
 727             .addImm(0)
 728             .addUse(SrcReg)
 729             .addImm(AArch64::hsub);
 730         MachineOperand &RegOp = I.getOperand(1);
 731         RegOp.setReg(PromoteReg);
 732
 733         // Promise that the copy is implicitly validated by the SUBREG_TO_REG.
 734         KnownValid = true;
 735       }
 736     }
 737
 738     // If the destination is a physical register, then there's nothing to
 739     // change, so we're done.
 740     if (Register::isPhysicalRegister(DstReg))
 741       return CheckCopy();
 742   }
 743
 744   // No need to constrain SrcReg. It will get constrained when we hit another
 745   // of its use or its defs. Copies do not have constraints.
 746   if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
 747     LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
 748                       << " operand\n");
 749     return false;
 750   }
 751   I.setDesc(TII.get(AArch64::COPY));
 752   return CheckCopy();
 753 }
 754
 755 static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
 756   if (!DstTy.isScalar() || !SrcTy.isScalar())
 757     return GenericOpc;
 758
 759   const unsigned DstSize = DstTy.getSizeInBits();
 760   const unsigned SrcSize = SrcTy.getSizeInBits();
 761
 762   switch (DstSize) {
 763   case 32:
 764     switch (SrcSize) {
 765     case 32:
 766       switch (GenericOpc) {
 767       case TargetOpcode::G_SITOFP:
 768         return AArch64::SCVTFUWSri;
 769       case TargetOpcode::G_UITOFP:
 770         return AArch64::UCVTFUWSri;
 771       case TargetOpcode::G_FPTOSI:
 772         return AArch64::FCVTZSUWSr;
 773       case TargetOpcode::G_FPTOUI:
 774         return AArch64::FCVTZUUWSr;
 775       default:
 776         return GenericOpc;
 777       }
 778     case 64:
 779       switch (GenericOpc) {
 780       case TargetOpcode::G_SITOFP:
 781         return AArch64::SCVTFUXSri;
 782       case TargetOpcode::G_UITOFP:
 783         return AArch64::UCVTFUXSri;
 784       case TargetOpcode::G_FPTOSI:
 785         return AArch64::FCVTZSUWDr;
 786       case TargetOpcode::G_FPTOUI:
 787         return AArch64::FCVTZUUWDr;
 788       default:
 789         return GenericOpc;
 790       }
 791     default:
 792       return GenericOpc;
 793     }
 794   case 64:
 795     switch (SrcSize) {
 796     case 32:
 797       switch (GenericOpc) {
 798       case TargetOpcode::G_SITOFP:
 799         return AArch64::SCVTFUWDri;
 800       case TargetOpcode::G_UITOFP:
 801         return AArch64::UCVTFUWDri;
 802       case TargetOpcode::G_FPTOSI:
 803         return AArch64::FCVTZSUXSr;
 804       case TargetOpcode::G_FPTOUI:
 805         return AArch64::FCVTZUUXSr;
 806       default:
 807         return GenericOpc;
 808       }
 809     case 64:
 810       switch (GenericOpc) {
 811       case TargetOpcode::G_SITOFP:
 812         return AArch64::SCVTFUXDri;
 813       case TargetOpcode::G_UITOFP:
 814         return AArch64::UCVTFUXDri;
 815       case TargetOpcode::G_FPTOSI:
 816         return AArch64::FCVTZSUXDr;
 817       case TargetOpcode::G_FPTOUI:
 818         return AArch64::FCVTZUUXDr;
 819       default:
 820         return GenericOpc;
 821       }
 822     default:
 823       return GenericOpc;
 824     }
 825   default:
 826     return GenericOpc;
 827   };
 828   return GenericOpc;
 829 }
 830
 831 static unsigned selectSelectOpc(MachineInstr &I, MachineRegisterInfo &MRI,
 832                                 const RegisterBankInfo &RBI) {
 833   const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
 834   bool IsFP = (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
 835                AArch64::GPRRegBankID);
 836   LLT Ty = MRI.getType(I.getOperand(0).getReg());
 837   if (Ty == LLT::scalar(32))
 838     return IsFP ? AArch64::FCSELSrrr : AArch64::CSELWr;
 839   else if (Ty == LLT::scalar(64) || Ty == LLT::pointer(0, 64))
 840     return IsFP ? AArch64::FCSELDrrr : AArch64::CSELXr;
 841   return 0;
 842 }
 843
 844 /// Helper function to select the opcode for a G_FCMP.
 845 static unsigned selectFCMPOpc(MachineInstr &I, MachineRegisterInfo &MRI) {
 846   // If this is a compare against +0.0, then we don't have to explicitly
 847   // materialize a constant.
 848   const ConstantFP *FPImm = getConstantFPVRegVal(I.getOperand(3).getReg(), MRI);
 849   bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
 850   unsigned OpSize = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
 851   if (OpSize != 32 && OpSize != 64)
 852     return 0;
 853   unsigned CmpOpcTbl[2][2] = {{AArch64::FCMPSrr, AArch64::FCMPDrr},
 854                               {AArch64::FCMPSri, AArch64::FCMPDri}};
 855   return CmpOpcTbl[ShouldUseImm][OpSize == 64];
 856 }
 857
 858 /// Returns true if \p P is an unsigned integer comparison predicate.
 859 static bool isUnsignedICMPPred(const CmpInst::Predicate P) {
 860   switch (P) {
 861   default:
 862     return false;
 863   case CmpInst::ICMP_UGT:
 864   case CmpInst::ICMP_UGE:
 865   case CmpInst::ICMP_ULT:
 866   case CmpInst::ICMP_ULE:
 867     return true;
 868   }
 869 }
 870
 871 static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) {
 872   switch (P) {
 873   default:
 874     llvm_unreachable("Unknown condition code!");
 875   case CmpInst::ICMP_NE:
 876     return AArch64CC::NE;
 877   case CmpInst::ICMP_EQ:
 878     return AArch64CC::EQ;
 879   case CmpInst::ICMP_SGT:
 880     return AArch64CC::GT;
 881   case CmpInst::ICMP_SGE:
 882     return AArch64CC::GE;
 883   case CmpInst::ICMP_SLT:
 884     return AArch64CC::LT;
 885   case CmpInst::ICMP_SLE:
 886     return AArch64CC::LE;
 887   case CmpInst::ICMP_UGT:
 888     return AArch64CC::HI;
 889   case CmpInst::ICMP_UGE:
 890     return AArch64CC::HS;
 891   case CmpInst::ICMP_ULT:
 892     return AArch64CC::LO;
 893   case CmpInst::ICMP_ULE:
 894     return AArch64CC::LS;
 895   }
 896 }
 897
 898 static void changeFCMPPredToAArch64CC(CmpInst::Predicate P,
 899                                       AArch64CC::CondCode &CondCode,
 900                                       AArch64CC::CondCode &CondCode2) {
 901   CondCode2 = AArch64CC::AL;
 902   switch (P) {
 903   default:
 904     llvm_unreachable("Unknown FP condition!");
 905   case CmpInst::FCMP_OEQ:
 906     CondCode = AArch64CC::EQ;
 907     break;
 908   case CmpInst::FCMP_OGT:
 909     CondCode = AArch64CC::GT;
 910     break;
 911   case CmpInst::FCMP_OGE:
 912     CondCode = AArch64CC::GE;
 913     break;
 914   case CmpInst::FCMP_OLT:
 915     CondCode = AArch64CC::MI;
 916     break;
 917   case CmpInst::FCMP_OLE:
 918     CondCode = AArch64CC::LS;
 919     break;
 920   case CmpInst::FCMP_ONE:
 921     CondCode = AArch64CC::MI;
 922     CondCode2 = AArch64CC::GT;
 923     break;
 924   case CmpInst::FCMP_ORD:
 925     CondCode = AArch64CC::VC;
 926     break;
 927   case CmpInst::FCMP_UNO:
 928     CondCode = AArch64CC::VS;
 929     break;
 930   case CmpInst::FCMP_UEQ:
 931     CondCode = AArch64CC::EQ;
 932     CondCode2 = AArch64CC::VS;
 933     break;
 934   case CmpInst::FCMP_UGT:
 935     CondCode = AArch64CC::HI;
 936     break;
 937   case CmpInst::FCMP_UGE:
 938     CondCode = AArch64CC::PL;
 939     break;
 940   case CmpInst::FCMP_ULT:
 941     CondCode = AArch64CC::LT;
 942     break;
 943   case CmpInst::FCMP_ULE:
 944     CondCode = AArch64CC::LE;
 945     break;
 946   case CmpInst::FCMP_UNE:
 947     CondCode = AArch64CC::NE;
 948     break;
 949   }
 950 }
 951
 952 bool AArch64InstructionSelector::selectCompareBranch(
 953     MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
 954
 955   const Register CondReg = I.getOperand(0).getReg();
 956   MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
 957   MachineInstr *CCMI = MRI.getVRegDef(CondReg);
 958   if (CCMI->getOpcode() == TargetOpcode::G_TRUNC)
 959     CCMI = MRI.getVRegDef(CCMI->getOperand(1).getReg());
 960   if (CCMI->getOpcode() != TargetOpcode::G_ICMP)
 961     return false;
 962
 963   Register LHS = CCMI->getOperand(2).getReg();
 964   Register RHS = CCMI->getOperand(3).getReg();
 965   auto VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
 966   if (!VRegAndVal)
 967     std::swap(RHS, LHS);
 968
 969   VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
 970   if (!VRegAndVal || VRegAndVal->Value != 0) {
 971     MachineIRBuilder MIB(I);
 972     // If we can't select a CBZ then emit a cmp + Bcc.
 973     if (!emitIntegerCompare(CCMI->getOperand(2), CCMI->getOperand(3),
 974                             CCMI->getOperand(1), MIB))
 975       return false;
 976     const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(
 977         (CmpInst::Predicate)CCMI->getOperand(1).getPredicate());
 978     MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
 979     I.eraseFromParent();
 980     return true;
 981   }
 982
 983   const RegisterBank &RB = *RBI.getRegBank(LHS, MRI, TRI);
 984   if (RB.getID() != AArch64::GPRRegBankID)
 985     return false;
 986
 987   const auto Pred = (CmpInst::Predicate)CCMI->getOperand(1).getPredicate();
 988   if (Pred != CmpInst::ICMP_NE && Pred != CmpInst::ICMP_EQ)
 989     return false;
 990
 991   const unsigned CmpWidth = MRI.getType(LHS).getSizeInBits();
 992   unsigned CBOpc = 0;
 993   if (CmpWidth <= 32)
 994     CBOpc = (Pred == CmpInst::ICMP_EQ ? AArch64::CBZW : AArch64::CBNZW);
 995   else if (CmpWidth == 64)
 996     CBOpc = (Pred == CmpInst::ICMP_EQ ? AArch64::CBZX : AArch64::CBNZX);
 997   else
 998     return false;
 999
1000   BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(CBOpc))
1001       .addUse(LHS)
1002       .addMBB(DestMBB)
1003       .constrainAllUses(TII, TRI, RBI);
1004
1005   I.eraseFromParent();
1006   return true;
1007 }
1008
1009 bool AArch64InstructionSelector::selectVectorSHL(
1010     MachineInstr &I, MachineRegisterInfo &MRI) const {
1011   assert(I.getOpcode() == TargetOpcode::G_SHL);
1012   Register DstReg = I.getOperand(0).getReg();
1013   const LLT Ty = MRI.getType(DstReg);
1014   Register Src1Reg = I.getOperand(1).getReg();
1015   Register Src2Reg = I.getOperand(2).getReg();
1016
1017   if (!Ty.isVector())
1018     return false;
1019
1020   unsigned Opc = 0;
1021   if (Ty == LLT::vector(2, 64)) {
1022     Opc = AArch64::USHLv2i64;
1023   } else if (Ty == LLT::vector(4, 32)) {
1024     Opc = AArch64::USHLv4i32;
1025   } else if (Ty == LLT::vector(2, 32)) {
1026     Opc = AArch64::USHLv2i32;
1027   } else {
1028     LLVM_DEBUG(dbgs() << "Unhandled G_SHL type");
1029     return false;
1030   }
1031
1032   MachineIRBuilder MIB(I);
1033   auto UShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Src2Reg});
1034   constrainSelectedInstRegOperands(*UShl, TII, TRI, RBI);
1035   I.eraseFromParent();
1036   return true;
1037 }
1038
1039 bool AArch64InstructionSelector::selectVectorASHR(
1040     MachineInstr &I, MachineRegisterInfo &MRI) const {
1041   assert(I.getOpcode() == TargetOpcode::G_ASHR);
1042   Register DstReg = I.getOperand(0).getReg();
1043   const LLT Ty = MRI.getType(DstReg);
1044   Register Src1Reg = I.getOperand(1).getReg();
1045   Register Src2Reg = I.getOperand(2).getReg();
1046
1047   if (!Ty.isVector())
1048     return false;
1049
1050   // There is not a shift right register instruction, but the shift left
1051   // register instruction takes a signed value, where negative numbers specify a
1052   // right shift.
1053
1054   unsigned Opc = 0;
1055   unsigned NegOpc = 0;
1056   const TargetRegisterClass *RC = nullptr;
1057   if (Ty == LLT::vector(2, 64)) {
1058     Opc = AArch64::SSHLv2i64;
1059     NegOpc = AArch64::NEGv2i64;
1060     RC = &AArch64::FPR128RegClass;
1061   } else if (Ty == LLT::vector(4, 32)) {
1062     Opc = AArch64::SSHLv4i32;
1063     NegOpc = AArch64::NEGv4i32;
1064     RC = &AArch64::FPR128RegClass;
1065   } else if (Ty == LLT::vector(2, 32)) {
1066     Opc = AArch64::SSHLv2i32;
1067     NegOpc = AArch64::NEGv2i32;
1068     RC = &AArch64::FPR64RegClass;
1069   } else {
1070     LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type");
1071     return false;
1072   }
1073
1074   MachineIRBuilder MIB(I);
1075   auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg});
1076   constrainSelectedInstRegOperands(*Neg, TII, TRI, RBI);
1077   auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
1078   constrainSelectedInstRegOperands(*SShl, TII, TRI, RBI);
1079   I.eraseFromParent();
1080   return true;
1081 }
1082
1083 bool AArch64InstructionSelector::selectVaStartAAPCS(
1084     MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1085   return false;
1086 }
1087
1088 bool AArch64InstructionSelector::selectVaStartDarwin(
1089     MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1090   AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
1091   Register ListReg = I.getOperand(0).getReg();
1092
1093   Register ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1094
1095   auto MIB =
1096       BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri))
1097           .addDef(ArgsAddrReg)
1098           .addFrameIndex(FuncInfo->getVarArgsStackIndex())
1099           .addImm(0)
1100           .addImm(0);
1101
1102   constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
1103
1104   MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui))
1105             .addUse(ArgsAddrReg)
1106             .addUse(ListReg)
1107             .addImm(0)
1108             .addMemOperand(*I.memoperands_begin());
1109
1110   constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
1111   I.eraseFromParent();
1112   return true;
1113 }
1114
1115 void AArch64InstructionSelector::materializeLargeCMVal(
1116     MachineInstr &I, const Value *V, unsigned OpFlags) const {
1117   MachineBasicBlock &MBB = *I.getParent();
1118   MachineFunction &MF = *MBB.getParent();
1119   MachineRegisterInfo &MRI = MF.getRegInfo();
1120   MachineIRBuilder MIB(I);
1121
1122   auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
1123   MovZ->addOperand(MF, I.getOperand(1));
1124   MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
1125                                      AArch64II::MO_NC);
1126   MovZ->addOperand(MF, MachineOperand::CreateImm(0));
1127   constrainSelectedInstRegOperands(*MovZ, TII, TRI, RBI);
1128
1129   auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset,
1130                        Register ForceDstReg) {
1131     Register DstReg = ForceDstReg
1132                           ? ForceDstReg
1133                           : MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1134     auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg);
1135     if (auto *GV = dyn_cast<GlobalValue>(V)) {
1136       MovI->addOperand(MF, MachineOperand::CreateGA(
1137                                GV, MovZ->getOperand(1).getOffset(), Flags));
1138     } else {
1139       MovI->addOperand(
1140           MF, MachineOperand::CreateBA(cast<BlockAddress>(V),
1141                                        MovZ->getOperand(1).getOffset(), Flags));
1142     }
1143     MovI->addOperand(MF, MachineOperand::CreateImm(Offset));
1144     constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI);
1145     return DstReg;
1146   };
1147   Register DstReg = BuildMovK(MovZ.getReg(0),
1148                               AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0);
1149   DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
1150   BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
1151   return;
1152 }
1153
1154 void AArch64InstructionSelector::preISelLower(MachineInstr &I) const {
1155   MachineBasicBlock &MBB = *I.getParent();
1156   MachineFunction &MF = *MBB.getParent();
1157   MachineRegisterInfo &MRI = MF.getRegInfo();
1158
1159   switch (I.getOpcode()) {
1160   case TargetOpcode::G_SHL:
1161   case TargetOpcode::G_ASHR:
1162   case TargetOpcode::G_LSHR: {
1163     // These shifts are legalized to have 64 bit shift amounts because we want
1164     // to take advantage of the existing imported selection patterns that assume
1165     // the immediates are s64s. However, if the shifted type is 32 bits and for
1166     // some reason we receive input GMIR that has an s64 shift amount that's not
1167     // a G_CONSTANT, insert a truncate so that we can still select the s32
1168     // register-register variant.
1169     Register SrcReg = I.getOperand(1).getReg();
1170     Register ShiftReg = I.getOperand(2).getReg();
1171     const LLT ShiftTy = MRI.getType(ShiftReg);
1172     const LLT SrcTy = MRI.getType(SrcReg);
1173     if (SrcTy.isVector())
1174       return;
1175     assert(!ShiftTy.isVector() && "unexpected vector shift ty");
1176     if (SrcTy.getSizeInBits() != 32 || ShiftTy.getSizeInBits() != 64)
1177       return;
1178     auto *AmtMI = MRI.getVRegDef(ShiftReg);
1179     assert(AmtMI && "could not find a vreg definition for shift amount");
1180     if (AmtMI->getOpcode() != TargetOpcode::G_CONSTANT) {
1181       // Insert a subregister copy to implement a 64->32 trunc
1182       MachineIRBuilder MIB(I);
1183       auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {})
1184                        .addReg(ShiftReg, 0, AArch64::sub_32);
1185       MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
1186       I.getOperand(2).setReg(Trunc.getReg(0));
1187     }
1188     return;
1189   }
1190   case TargetOpcode::G_STORE:
1191     contractCrossBankCopyIntoStore(I, MRI);
1192     return;
1193   default:
1194     return;
1195   }
1196 }
1197
1198 bool AArch64InstructionSelector::earlySelectSHL(
1199     MachineInstr &I, MachineRegisterInfo &MRI) const {
1200   // We try to match the immediate variant of LSL, which is actually an alias
1201   // for a special case of UBFM. Otherwise, we fall back to the imported
1202   // selector which will match the register variant.
1203   assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op");
1204   const auto &MO = I.getOperand(2);
1205   auto VRegAndVal = getConstantVRegVal(MO.getReg(), MRI);
1206   if (!VRegAndVal)
1207     return false;
1208
1209   const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1210   if (DstTy.isVector())
1211     return false;
1212   bool Is64Bit = DstTy.getSizeInBits() == 64;
1213   auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO);
1214   auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO);
1215   MachineIRBuilder MIB(I);
1216
1217   if (!Imm1Fn || !Imm2Fn)
1218     return false;
1219
1220   auto NewI =
1221       MIB.buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,
1222                      {I.getOperand(0).getReg()}, {I.getOperand(1).getReg()});
1223
1224   for (auto &RenderFn : *Imm1Fn)
1225     RenderFn(NewI);
1226   for (auto &RenderFn : *Imm2Fn)
1227     RenderFn(NewI);
1228
1229   I.eraseFromParent();
1230   return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
1231 }
1232
1233 void AArch64InstructionSelector::contractCrossBankCopyIntoStore(
1234     MachineInstr &I, MachineRegisterInfo &MRI) const {
1235   assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE");
1236   // If we're storing a scalar, it doesn't matter what register bank that
1237   // scalar is on. All that matters is the size.
1238   //
1239   // So, if we see something like this (with a 32-bit scalar as an example):
1240   //
1241   // %x:gpr(s32) = ... something ...
1242   // %y:fpr(s32) = COPY %x:gpr(s32)
1243   // G_STORE %y:fpr(s32)
1244   //
1245   // We can fix this up into something like this:
1246   //
1247   // G_STORE %x:gpr(s32)
1248   //
1249   // And then continue the selection process normally.
1250   MachineInstr *Def = getDefIgnoringCopies(I.getOperand(0).getReg(), MRI);
1251   if (!Def)
1252     return;
1253   Register DefDstReg = Def->getOperand(0).getReg();
1254   LLT DefDstTy = MRI.getType(DefDstReg);
1255   Register StoreSrcReg = I.getOperand(0).getReg();
1256   LLT StoreSrcTy = MRI.getType(StoreSrcReg);
1257
1258   // If we get something strange like a physical register, then we shouldn't
1259   // go any further.
1260   if (!DefDstTy.isValid())
1261     return;
1262
1263   // Are the source and dst types the same size?
1264   if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits())
1265     return;
1266
1267   if (RBI.getRegBank(StoreSrcReg, MRI, TRI) ==
1268       RBI.getRegBank(DefDstReg, MRI, TRI))
1269     return;
1270
1271   // We have a cross-bank copy, which is entering a store. Let's fold it.
1272   I.getOperand(0).setReg(DefDstReg);
1273 }
1274
1275 bool AArch64InstructionSelector::earlySelect(MachineInstr &I) const {
1276   assert(I.getParent() && "Instruction should be in a basic block!");
1277   assert(I.getParent()->getParent() && "Instruction should be in a function!");
1278
1279   MachineBasicBlock &MBB = *I.getParent();
1280   MachineFunction &MF = *MBB.getParent();
1281   MachineRegisterInfo &MRI = MF.getRegInfo();
1282
1283   switch (I.getOpcode()) {
1284   case TargetOpcode::G_SHL:
1285     return earlySelectSHL(I, MRI);
1286   case TargetOpcode::G_CONSTANT: {
1287     bool IsZero = false;
1288     if (I.getOperand(1).isCImm())
1289       IsZero = I.getOperand(1).getCImm()->getZExtValue() == 0;
1290     else if (I.getOperand(1).isImm())
1291       IsZero = I.getOperand(1).getImm() == 0;
1292
1293     if (!IsZero)
1294       return false;
1295
1296     Register DefReg = I.getOperand(0).getReg();
1297     LLT Ty = MRI.getType(DefReg);
1298     if (Ty != LLT::scalar(64) && Ty != LLT::scalar(32))
1299       return false;
1300
1301     if (Ty == LLT::scalar(64)) {
1302       I.getOperand(1).ChangeToRegister(AArch64::XZR, false);
1303       RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
1304     } else {
1305       I.getOperand(1).ChangeToRegister(AArch64::WZR, false);
1306       RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass, MRI);
1307     }
1308     I.setDesc(TII.get(TargetOpcode::COPY));
1309     return true;
1310   }
1311   default:
1312     return false;
1313   }
1314 }
1315
1316 bool AArch64InstructionSelector::select(MachineInstr &I) {
1317   assert(I.getParent() && "Instruction should be in a basic block!");
1318   assert(I.getParent()->getParent() && "Instruction should be in a function!");
1319
1320   MachineBasicBlock &MBB = *I.getParent();
1321   MachineFunction &MF = *MBB.getParent();
1322   MachineRegisterInfo &MRI = MF.getRegInfo();
1323
1324   unsigned Opcode = I.getOpcode();
1325   // G_PHI requires same handling as PHI
1326   if (!isPreISelGenericOpcode(Opcode) || Opcode == TargetOpcode::G_PHI) {
1327     // Certain non-generic instructions also need some special handling.
1328
1329     if (Opcode ==  TargetOpcode::LOAD_STACK_GUARD)
1330       return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1331
1332     if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
1333       const Register DefReg = I.getOperand(0).getReg();
1334       const LLT DefTy = MRI.getType(DefReg);
1335
1336       const RegClassOrRegBank &RegClassOrBank =
1337         MRI.getRegClassOrRegBank(DefReg);
1338
1339       const TargetRegisterClass *DefRC
1340         = RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
1341       if (!DefRC) {
1342         if (!DefTy.isValid()) {
1343           LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
1344           return false;
1345         }
1346         const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
1347         DefRC = getRegClassForTypeOnBank(DefTy, RB, RBI);
1348         if (!DefRC) {
1349           LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
1350           return false;
1351         }
1352       }
1353
1354       I.setDesc(TII.get(TargetOpcode::PHI));
1355
1356       return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
1357     }
1358
1359     if (I.isCopy())
1360       return selectCopy(I, TII, MRI, TRI, RBI);
1361
1362     return true;
1363   }
1364
1365
1366   if (I.getNumOperands() != I.getNumExplicitOperands()) {
1367     LLVM_DEBUG(
1368         dbgs() << "Generic instruction has unexpected implicit operands\n");
1369     return false;
1370   }
1371
1372   // Try to do some lowering before we start instruction selecting. These
1373   // lowerings are purely transformations on the input G_MIR and so selection
1374   // must continue after any modification of the instruction.
1375   preISelLower(I);
1376
1377   // There may be patterns where the importer can't deal with them optimally,
1378   // but does select it to a suboptimal sequence so our custom C++ selection
1379   // code later never has a chance to work on it. Therefore, we have an early
1380   // selection attempt here to give priority to certain selection routines
1381   // over the imported ones.
1382   if (earlySelect(I))
1383     return true;
1384
1385   if (selectImpl(I, *CoverageInfo))
1386     return true;
1387
1388   LLT Ty =
1389       I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{};
1390
1391   MachineIRBuilder MIB(I);
1392
1393   switch (Opcode) {
1394   case TargetOpcode::G_BRCOND: {
1395     if (Ty.getSizeInBits() > 32) {
1396       // We shouldn't need this on AArch64, but it would be implemented as an
1397       // EXTRACT_SUBREG followed by a TBNZW because TBNZX has no encoding if the
1398       // bit being tested is < 32.
1399       LLVM_DEBUG(dbgs() << "G_BRCOND has type: " << Ty
1400                         << ", expected at most 32-bits");
1401       return false;
1402     }
1403
1404     const Register CondReg = I.getOperand(0).getReg();
1405     MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1406
1407     // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1408     // instructions will not be produced, as they are conditional branch
1409     // instructions that do not set flags.
1410     bool ProduceNonFlagSettingCondBr =
1411         !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
1412     if (ProduceNonFlagSettingCondBr && selectCompareBranch(I, MF, MRI))
1413       return true;
1414
1415     if (ProduceNonFlagSettingCondBr) {
1416       auto MIB = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::TBNZW))
1417                      .addUse(CondReg)
1418                      .addImm(/*bit offset=*/0)
1419                      .addMBB(DestMBB);
1420
1421       I.eraseFromParent();
1422       return constrainSelectedInstRegOperands(*MIB.getInstr(), TII, TRI, RBI);
1423     } else {
1424       auto CMP = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ANDSWri))
1425                      .addDef(AArch64::WZR)
1426                      .addUse(CondReg)
1427                      .addImm(1);
1428       constrainSelectedInstRegOperands(*CMP.getInstr(), TII, TRI, RBI);
1429       auto Bcc =
1430           BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::Bcc))
1431               .addImm(AArch64CC::EQ)
1432               .addMBB(DestMBB);
1433
1434       I.eraseFromParent();
1435       return constrainSelectedInstRegOperands(*Bcc.getInstr(), TII, TRI, RBI);
1436     }
1437   }
1438
1439   case TargetOpcode::G_BRINDIRECT: {
1440     I.setDesc(TII.get(AArch64::BR));
1441     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1442   }
1443
1444   case TargetOpcode::G_BRJT:
1445     return selectBrJT(I, MRI);
1446
1447   case TargetOpcode::G_BSWAP: {
1448     // Handle vector types for G_BSWAP directly.
1449     Register DstReg = I.getOperand(0).getReg();
1450     LLT DstTy = MRI.getType(DstReg);
1451
1452     // We should only get vector types here; everything else is handled by the
1453     // importer right now.
1454     if (!DstTy.isVector() || DstTy.getSizeInBits() > 128) {
1455       LLVM_DEBUG(dbgs() << "Dst type for G_BSWAP currently unsupported.\n");
1456       return false;
1457     }
1458
1459     // Only handle 4 and 2 element vectors for now.
1460     // TODO: 16-bit elements.
1461     unsigned NumElts = DstTy.getNumElements();
1462     if (NumElts != 4 && NumElts != 2) {
1463       LLVM_DEBUG(dbgs() << "Unsupported number of elements for G_BSWAP.\n");
1464       return false;
1465     }
1466
1467     // Choose the correct opcode for the supported types. Right now, that's
1468     // v2s32, v4s32, and v2s64.
1469     unsigned Opc = 0;
1470     unsigned EltSize = DstTy.getElementType().getSizeInBits();
1471     if (EltSize == 32)
1472       Opc = (DstTy.getNumElements() == 2) ? AArch64::REV32v8i8
1473                                           : AArch64::REV32v16i8;
1474     else if (EltSize == 64)
1475       Opc = AArch64::REV64v16i8;
1476
1477     // We should always get something by the time we get here...
1478     assert(Opc != 0 && "Didn't get an opcode for G_BSWAP?");
1479
1480     I.setDesc(TII.get(Opc));
1481     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1482   }
1483
1484   case TargetOpcode::G_FCONSTANT:
1485   case TargetOpcode::G_CONSTANT: {
1486     const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
1487
1488     const LLT s8 = LLT::scalar(8);
1489     const LLT s16 = LLT::scalar(16);
1490     const LLT s32 = LLT::scalar(32);
1491     const LLT s64 = LLT::scalar(64);
1492     const LLT p0 = LLT::pointer(0, 64);
1493
1494     const Register DefReg = I.getOperand(0).getReg();
1495     const LLT DefTy = MRI.getType(DefReg);
1496     const unsigned DefSize = DefTy.getSizeInBits();
1497     const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
1498
1499     // FIXME: Redundant check, but even less readable when factored out.
1500     if (isFP) {
1501       if (Ty != s32 && Ty != s64) {
1502         LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
1503                           << " constant, expected: " << s32 << " or " << s64
1504                           << '\n');
1505         return false;
1506       }
1507
1508       if (RB.getID() != AArch64::FPRRegBankID) {
1509         LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
1510                           << " constant on bank: " << RB
1511                           << ", expected: FPR\n");
1512         return false;
1513       }
1514
1515       // The case when we have 0.0 is covered by tablegen. Reject it here so we
1516       // can be sure tablegen works correctly and isn't rescued by this code.
1517       if (I.getOperand(1).getFPImm()->getValueAPF().isExactlyValue(0.0))
1518         return false;
1519     } else {
1520       // s32 and s64 are covered by tablegen.
1521       if (Ty != p0 && Ty != s8 && Ty != s16) {
1522         LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
1523                           << " constant, expected: " << s32 << ", " << s64
1524                           << ", or " << p0 << '\n');
1525         return false;
1526       }
1527
1528       if (RB.getID() != AArch64::GPRRegBankID) {
1529         LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
1530                           << " constant on bank: " << RB
1531                           << ", expected: GPR\n");
1532         return false;
1533       }
1534     }
1535
1536     // We allow G_CONSTANT of types < 32b.
1537     const unsigned MovOpc =
1538         DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
1539
1540     if (isFP) {
1541       // Either emit a FMOV, or emit a copy to emit a normal mov.
1542       const TargetRegisterClass &GPRRC =
1543           DefSize == 32 ? AArch64::GPR32RegClass : AArch64::GPR64RegClass;
1544       const TargetRegisterClass &FPRRC =
1545           DefSize == 32 ? AArch64::FPR32RegClass : AArch64::FPR64RegClass;
1546
1547       // Can we use a FMOV instruction to represent the immediate?
1548       if (emitFMovForFConstant(I, MRI))
1549         return true;
1550
1551       // Nope. Emit a copy and use a normal mov instead.
1552       const Register DefGPRReg = MRI.createVirtualRegister(&GPRRC);
1553       MachineOperand &RegOp = I.getOperand(0);
1554       RegOp.setReg(DefGPRReg);
1555       MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
1556       MIB.buildCopy({DefReg}, {DefGPRReg});
1557
1558       if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {
1559         LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n");
1560         return false;
1561       }
1562
1563       MachineOperand &ImmOp = I.getOperand(1);
1564       // FIXME: Is going through int64_t always correct?
1565       ImmOp.ChangeToImmediate(
1566           ImmOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
1567     } else if (I.getOperand(1).isCImm()) {
1568       uint64_t Val = I.getOperand(1).getCImm()->getZExtValue();
1569       I.getOperand(1).ChangeToImmediate(Val);
1570     } else if (I.getOperand(1).isImm()) {
1571       uint64_t Val = I.getOperand(1).getImm();
1572       I.getOperand(1).ChangeToImmediate(Val);
1573     }
1574
1575     I.setDesc(TII.get(MovOpc));
1576     constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1577     return true;
1578   }
1579   case TargetOpcode::G_EXTRACT: {
1580     Register DstReg = I.getOperand(0).getReg();
1581     Register SrcReg = I.getOperand(1).getReg();
1582     LLT SrcTy = MRI.getType(SrcReg);
1583     LLT DstTy = MRI.getType(DstReg);
1584     (void)DstTy;
1585     unsigned SrcSize = SrcTy.getSizeInBits();
1586
1587     if (SrcTy.getSizeInBits() > 64) {
1588       // This should be an extract of an s128, which is like a vector extract.
1589       if (SrcTy.getSizeInBits() != 128)
1590         return false;
1591       // Only support extracting 64 bits from an s128 at the moment.
1592       if (DstTy.getSizeInBits() != 64)
1593         return false;
1594
1595       const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
1596       const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
1597       // Check we have the right regbank always.
1598       assert(SrcRB.getID() == AArch64::FPRRegBankID &&
1599              DstRB.getID() == AArch64::FPRRegBankID &&
1600              "Wrong extract regbank!");
1601       (void)SrcRB;
1602
1603       // Emit the same code as a vector extract.
1604       // Offset must be a multiple of 64.
1605       unsigned Offset = I.getOperand(2).getImm();
1606       if (Offset % 64 != 0)
1607         return false;
1608       unsigned LaneIdx = Offset / 64;
1609       MachineIRBuilder MIB(I);
1610       MachineInstr *Extract = emitExtractVectorElt(
1611           DstReg, DstRB, LLT::scalar(64), SrcReg, LaneIdx, MIB);
1612       if (!Extract)
1613         return false;
1614       I.eraseFromParent();
1615       return true;
1616     }
1617
1618     I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
1619     MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() +
1620                                       Ty.getSizeInBits() - 1);
1621
1622     if (SrcSize < 64) {
1623       assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 &&
1624              "unexpected G_EXTRACT types");
1625       return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1626     }
1627
1628     DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
1629     MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
1630     MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
1631         .addReg(DstReg, 0, AArch64::sub_32);
1632     RBI.constrainGenericRegister(I.getOperand(0).getReg(),
1633                                  AArch64::GPR32RegClass, MRI);
1634     I.getOperand(0).setReg(DstReg);
1635
1636     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1637   }
1638
1639   case TargetOpcode::G_INSERT: {
1640     LLT SrcTy = MRI.getType(I.getOperand(2).getReg());
1641     LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1642     unsigned DstSize = DstTy.getSizeInBits();
1643     // Larger inserts are vectors, same-size ones should be something else by
1644     // now (split up or turned into COPYs).
1645     if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32)
1646       return false;
1647
1648     I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
1649     unsigned LSB = I.getOperand(3).getImm();
1650     unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
1651     I.getOperand(3).setImm((DstSize - LSB) % DstSize);
1652     MachineInstrBuilder(MF, I).addImm(Width - 1);
1653
1654     if (DstSize < 64) {
1655       assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 &&
1656              "unexpected G_INSERT types");
1657       return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1658     }
1659
1660     Register SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
1661     BuildMI(MBB, I.getIterator(), I.getDebugLoc(),
1662             TII.get(AArch64::SUBREG_TO_REG))
1663         .addDef(SrcReg)
1664         .addImm(0)
1665         .addUse(I.getOperand(2).getReg())
1666         .addImm(AArch64::sub_32);
1667     RBI.constrainGenericRegister(I.getOperand(2).getReg(),
1668                                  AArch64::GPR32RegClass, MRI);
1669     I.getOperand(2).setReg(SrcReg);
1670
1671     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1672   }
1673   case TargetOpcode::G_FRAME_INDEX: {
1674     // allocas and G_FRAME_INDEX are only supported in addrspace(0).
1675     if (Ty != LLT::pointer(0, 64)) {
1676       LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Ty
1677                         << ", expected: " << LLT::pointer(0, 64) << '\n');
1678       return false;
1679     }
1680     I.setDesc(TII.get(AArch64::ADDXri));
1681
1682     // MOs for a #0 shifted immediate.
1683     I.addOperand(MachineOperand::CreateImm(0));
1684     I.addOperand(MachineOperand::CreateImm(0));
1685
1686     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1687   }
1688
1689   case TargetOpcode::G_GLOBAL_VALUE: {
1690     auto GV = I.getOperand(1).getGlobal();
1691     if (GV->isThreadLocal())
1692       return selectTLSGlobalValue(I, MRI);
1693
1694     unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM);
1695     if (OpFlags & AArch64II::MO_GOT) {
1696       I.setDesc(TII.get(AArch64::LOADgot));
1697       I.getOperand(1).setTargetFlags(OpFlags);
1698     } else if (TM.getCodeModel() == CodeModel::Large) {
1699       // Materialize the global using movz/movk instructions.
1700       materializeLargeCMVal(I, GV, OpFlags);
1701       I.eraseFromParent();
1702       return true;
1703     } else if (TM.getCodeModel() == CodeModel::Tiny) {
1704       I.setDesc(TII.get(AArch64::ADR));
1705       I.getOperand(1).setTargetFlags(OpFlags);
1706     } else {
1707       I.setDesc(TII.get(AArch64::MOVaddr));
1708       I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE);
1709       MachineInstrBuilder MIB(MF, I);
1710       MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(),
1711                            OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
1712     }
1713     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1714   }
1715
1716   case TargetOpcode::G_ZEXTLOAD:
1717   case TargetOpcode::G_LOAD:
1718   case TargetOpcode::G_STORE: {
1719     bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
1720     MachineIRBuilder MIB(I);
1721
1722     LLT PtrTy = MRI.getType(I.getOperand(1).getReg());
1723
1724     if (PtrTy != LLT::pointer(0, 64)) {
1725       LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTy
1726                         << ", expected: " << LLT::pointer(0, 64) << '\n');
1727       return false;
1728     }
1729
1730     auto &MemOp = **I.memoperands_begin();
1731     if (MemOp.isAtomic()) {
1732       // For now we just support s8 acquire loads to be able to compile stack
1733       // protector code.
1734       if (MemOp.getOrdering() == AtomicOrdering::Acquire &&
1735           MemOp.getSize() == 1) {
1736         I.setDesc(TII.get(AArch64::LDARB));
1737         return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1738       }
1739       LLVM_DEBUG(dbgs() << "Atomic load/store not fully supported yet\n");
1740       return false;
1741     }
1742     unsigned MemSizeInBits = MemOp.getSize() * 8;
1743
1744     const Register PtrReg = I.getOperand(1).getReg();
1745 #ifndef NDEBUG
1746     const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
1747     // Sanity-check the pointer register.
1748     assert(PtrRB.getID() == AArch64::GPRRegBankID &&
1749            "Load/Store pointer operand isn't a GPR");
1750     assert(MRI.getType(PtrReg).isPointer() &&
1751            "Load/Store pointer operand isn't a pointer");
1752 #endif
1753
1754     const Register ValReg = I.getOperand(0).getReg();
1755     const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
1756
1757     const unsigned NewOpc =
1758         selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
1759     if (NewOpc == I.getOpcode())
1760       return false;
1761
1762     I.setDesc(TII.get(NewOpc));
1763
1764     uint64_t Offset = 0;
1765     auto *PtrMI = MRI.getVRegDef(PtrReg);
1766
1767     // Try to fold a GEP into our unsigned immediate addressing mode.
1768     if (PtrMI->getOpcode() == TargetOpcode::G_GEP) {
1769       if (auto COff = getConstantVRegVal(PtrMI->getOperand(2).getReg(), MRI)) {
1770         int64_t Imm = *COff;
1771         const unsigned Size = MemSizeInBits / 8;
1772         const unsigned Scale = Log2_32(Size);
1773         if ((Imm & (Size - 1)) == 0 && Imm >= 0 && Imm < (0x1000 << Scale)) {
1774           Register Ptr2Reg = PtrMI->getOperand(1).getReg();
1775           I.getOperand(1).setReg(Ptr2Reg);
1776           PtrMI = MRI.getVRegDef(Ptr2Reg);
1777           Offset = Imm / Size;
1778         }
1779       }
1780     }
1781
1782     // If we haven't folded anything into our addressing mode yet, try to fold
1783     // a frame index into the base+offset.
1784     if (!Offset && PtrMI->getOpcode() == TargetOpcode::G_FRAME_INDEX)
1785       I.getOperand(1).ChangeToFrameIndex(PtrMI->getOperand(1).getIndex());
1786
1787     I.addOperand(MachineOperand::CreateImm(Offset));
1788
1789     // If we're storing a 0, use WZR/XZR.
1790     if (auto CVal = getConstantVRegVal(ValReg, MRI)) {
1791       if (*CVal == 0 && Opcode == TargetOpcode::G_STORE) {
1792         if (I.getOpcode() == AArch64::STRWui)
1793           I.getOperand(0).setReg(AArch64::WZR);
1794         else if (I.getOpcode() == AArch64::STRXui)
1795           I.getOperand(0).setReg(AArch64::XZR);
1796       }
1797     }
1798
1799     if (IsZExtLoad) {
1800       // The zextload from a smaller type to i32 should be handled by the importer.
1801       if (MRI.getType(ValReg).getSizeInBits() != 64)
1802         return false;
1803       // If we have a ZEXTLOAD then change the load's type to be a narrower reg
1804       //and zero_extend with SUBREG_TO_REG.
1805       Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
1806       Register DstReg = I.getOperand(0).getReg();
1807       I.getOperand(0).setReg(LdReg);
1808
1809       MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
1810       MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
1811           .addImm(0)
1812           .addUse(LdReg)
1813           .addImm(AArch64::sub_32);
1814       constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1815       return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
1816                                           MRI);
1817     }
1818     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1819   }
1820
1821   case TargetOpcode::G_SMULH:
1822   case TargetOpcode::G_UMULH: {
1823     // Reject the various things we don't support yet.
1824     if (unsupportedBinOp(I, RBI, MRI, TRI))
1825       return false;
1826
1827     const Register DefReg = I.getOperand(0).getReg();
1828     const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
1829
1830     if (RB.getID() != AArch64::GPRRegBankID) {
1831       LLVM_DEBUG(dbgs() << "G_[SU]MULH on bank: " << RB << ", expected: GPR\n");
1832       return false;
1833     }
1834
1835     if (Ty != LLT::scalar(64)) {
1836       LLVM_DEBUG(dbgs() << "G_[SU]MULH has type: " << Ty
1837                         << ", expected: " << LLT::scalar(64) << '\n');
1838       return false;
1839     }
1840
1841     unsigned NewOpc = I.getOpcode() == TargetOpcode::G_SMULH ? AArch64::SMULHrr
1842                                                              : AArch64::UMULHrr;
1843     I.setDesc(TII.get(NewOpc));
1844
1845     // Now that we selected an opcode, we need to constrain the register
1846     // operands to use appropriate classes.
1847     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1848   }
1849   case TargetOpcode::G_FADD:
1850   case TargetOpcode::G_FSUB:
1851   case TargetOpcode::G_FMUL:
1852   case TargetOpcode::G_FDIV:
1853
1854   case TargetOpcode::G_ASHR:
1855     if (MRI.getType(I.getOperand(0).getReg()).isVector())
1856       return selectVectorASHR(I, MRI);
1857     LLVM_FALLTHROUGH;
1858   case TargetOpcode::G_SHL:
1859     if (Opcode == TargetOpcode::G_SHL &&
1860         MRI.getType(I.getOperand(0).getReg()).isVector())
1861       return selectVectorSHL(I, MRI);
1862     LLVM_FALLTHROUGH;
1863   case TargetOpcode::G_OR:
1864   case TargetOpcode::G_LSHR: {
1865     // Reject the various things we don't support yet.
1866     if (unsupportedBinOp(I, RBI, MRI, TRI))
1867       return false;
1868
1869     const unsigned OpSize = Ty.getSizeInBits();
1870
1871     const Register DefReg = I.getOperand(0).getReg();
1872     const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
1873
1874     const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize);
1875     if (NewOpc == I.getOpcode())
1876       return false;
1877
1878     I.setDesc(TII.get(NewOpc));
1879     // FIXME: Should the type be always reset in setDesc?
1880
1881     // Now that we selected an opcode, we need to constrain the register
1882     // operands to use appropriate classes.
1883     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1884   }
1885
1886   case TargetOpcode::G_GEP: {
1887     MachineIRBuilder MIRBuilder(I);
1888     emitADD(I.getOperand(0).getReg(), I.getOperand(1), I.getOperand(2),
1889             MIRBuilder);
1890     I.eraseFromParent();
1891     return true;
1892   }
1893   case TargetOpcode::G_UADDO: {
1894     // TODO: Support other types.
1895     unsigned OpSize = Ty.getSizeInBits();
1896     if (OpSize != 32 && OpSize != 64) {
1897       LLVM_DEBUG(
1898           dbgs()
1899           << "G_UADDO currently only supported for 32 and 64 b types.\n");
1900       return false;
1901     }
1902
1903     // TODO: Support vectors.
1904     if (Ty.isVector()) {
1905       LLVM_DEBUG(dbgs() << "G_UADDO currently only supported for scalars.\n");
1906       return false;
1907     }
1908
1909     // Add and set the set condition flag.
1910     unsigned AddsOpc = OpSize == 32 ? AArch64::ADDSWrr : AArch64::ADDSXrr;
1911     MachineIRBuilder MIRBuilder(I);
1912     auto AddsMI = MIRBuilder.buildInstr(
1913         AddsOpc, {I.getOperand(0).getReg()},
1914         {I.getOperand(2).getReg(), I.getOperand(3).getReg()});
1915     constrainSelectedInstRegOperands(*AddsMI, TII, TRI, RBI);
1916
1917     // Now, put the overflow result in the register given by the first operand
1918     // to the G_UADDO. CSINC increments the result when the predicate is false,
1919     // so to get the increment when it's true, we need to use the inverse. In
1920     // this case, we want to increment when carry is set.
1921     auto CsetMI = MIRBuilder
1922                       .buildInstr(AArch64::CSINCWr, {I.getOperand(1).getReg()},
1923                                   {Register(AArch64::WZR), Register(AArch64::WZR)})
1924                       .addImm(getInvertedCondCode(AArch64CC::HS));
1925     constrainSelectedInstRegOperands(*CsetMI, TII, TRI, RBI);
1926     I.eraseFromParent();
1927     return true;
1928   }
1929
1930   case TargetOpcode::G_PTR_MASK: {
1931     uint64_t Align = I.getOperand(2).getImm();
1932     if (Align >= 64 || Align == 0)
1933       return false;
1934
1935     uint64_t Mask = ~((1ULL << Align) - 1);
1936     I.setDesc(TII.get(AArch64::ANDXri));
1937     I.getOperand(2).setImm(AArch64_AM::encodeLogicalImmediate(Mask, 64));
1938
1939     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1940   }
1941   case TargetOpcode::G_PTRTOINT:
1942   case TargetOpcode::G_TRUNC: {
1943     const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1944     const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
1945
1946     const Register DstReg = I.getOperand(0).getReg();
1947     const Register SrcReg = I.getOperand(1).getReg();
1948
1949     const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
1950     const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
1951
1952     if (DstRB.getID() != SrcRB.getID()) {
1953       LLVM_DEBUG(
1954           dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n");
1955       return false;
1956     }
1957
1958     if (DstRB.getID() == AArch64::GPRRegBankID) {
1959       const TargetRegisterClass *DstRC =
1960           getRegClassForTypeOnBank(DstTy, DstRB, RBI);
1961       if (!DstRC)
1962         return false;
1963
1964       const TargetRegisterClass *SrcRC =
1965           getRegClassForTypeOnBank(SrcTy, SrcRB, RBI);
1966       if (!SrcRC)
1967         return false;
1968
1969       if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
1970           !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
1971         LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n");
1972         return false;
1973       }
1974
1975       if (DstRC == SrcRC) {
1976         // Nothing to be done
1977       } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) &&
1978                  SrcTy == LLT::scalar(64)) {
1979         llvm_unreachable("TableGen can import this case");
1980         return false;
1981       } else if (DstRC == &AArch64::GPR32RegClass &&
1982                  SrcRC == &AArch64::GPR64RegClass) {
1983         I.getOperand(1).setSubReg(AArch64::sub_32);
1984       } else {
1985         LLVM_DEBUG(
1986             dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n");
1987         return false;
1988       }
1989
1990       I.setDesc(TII.get(TargetOpcode::COPY));
1991       return true;
1992     } else if (DstRB.getID() == AArch64::FPRRegBankID) {
1993       if (DstTy == LLT::vector(4, 16) && SrcTy == LLT::vector(4, 32)) {
1994         I.setDesc(TII.get(AArch64::XTNv4i16));
1995         constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1996         return true;
1997       }
1998
1999       if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) {
2000         MachineIRBuilder MIB(I);
2001         MachineInstr *Extract = emitExtractVectorElt(
2002             DstReg, DstRB, LLT::scalar(DstTy.getSizeInBits()), SrcReg, 0, MIB);
2003         if (!Extract)
2004           return false;
2005         I.eraseFromParent();
2006         return true;
2007       }
2008     }
2009
2010     return false;
2011   }
2012
2013   case TargetOpcode::G_ANYEXT: {
2014     const Register DstReg = I.getOperand(0).getReg();
2015     const Register SrcReg = I.getOperand(1).getReg();
2016
2017     const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI);
2018     if (RBDst.getID() != AArch64::GPRRegBankID) {
2019       LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDst
2020                         << ", expected: GPR\n");
2021       return false;
2022     }
2023
2024     const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI);
2025     if (RBSrc.getID() != AArch64::GPRRegBankID) {
2026       LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrc
2027                         << ", expected: GPR\n");
2028       return false;
2029     }
2030
2031     const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
2032
2033     if (DstSize == 0) {
2034       LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n");
2035       return false;
2036     }
2037
2038     if (DstSize != 64 && DstSize > 32) {
2039       LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSize
2040                         << ", expected: 32 or 64\n");
2041       return false;
2042     }
2043     // At this point G_ANYEXT is just like a plain COPY, but we need
2044     // to explicitly form the 64-bit value if any.
2045     if (DstSize > 32) {
2046       Register ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
2047       BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
2048           .addDef(ExtSrc)
2049           .addImm(0)
2050           .addUse(SrcReg)
2051           .addImm(AArch64::sub_32);
2052       I.getOperand(1).setReg(ExtSrc);
2053     }
2054     return selectCopy(I, TII, MRI, TRI, RBI);
2055   }
2056
2057   case TargetOpcode::G_ZEXT:
2058   case TargetOpcode::G_SEXT: {
2059     unsigned Opcode = I.getOpcode();
2060     const bool IsSigned = Opcode == TargetOpcode::G_SEXT;
2061     const Register DefReg = I.getOperand(0).getReg();
2062     const Register SrcReg = I.getOperand(1).getReg();
2063     const LLT DstTy = MRI.getType(DefReg);
2064     const LLT SrcTy = MRI.getType(SrcReg);
2065     unsigned DstSize = DstTy.getSizeInBits();
2066     unsigned SrcSize = SrcTy.getSizeInBits();
2067
2068     assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() ==
2069                AArch64::GPRRegBankID &&
2070            "Unexpected ext regbank");
2071
2072     MachineIRBuilder MIB(I);
2073     MachineInstr *ExtI;
2074     if (DstTy.isVector())
2075       return false; // Should be handled by imported patterns.
2076
2077     // First check if we're extending the result of a load which has a dest type
2078     // smaller than 32 bits, then this zext is redundant. GPR32 is the smallest
2079     // GPR register on AArch64 and all loads which are smaller automatically
2080     // zero-extend the upper bits. E.g.
2081     // %v(s8) = G_LOAD %p, :: (load 1)
2082     // %v2(s32) = G_ZEXT %v(s8)
2083     if (!IsSigned) {
2084       auto *LoadMI = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI);
2085       if (LoadMI &&
2086           RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID) {
2087         const MachineMemOperand *MemOp = *LoadMI->memoperands_begin();
2088         unsigned BytesLoaded = MemOp->getSize();
2089         if (BytesLoaded < 4 && SrcTy.getSizeInBytes() == BytesLoaded)
2090           return selectCopy(I, TII, MRI, TRI, RBI);
2091       }
2092     }
2093
2094     if (DstSize == 64) {
2095       // FIXME: Can we avoid manually doing this?
2096       if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass, MRI)) {
2097         LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)
2098                           << " operand\n");
2099         return false;
2100       }
2101
2102       auto SubregToReg =
2103           MIB.buildInstr(AArch64::SUBREG_TO_REG, {&AArch64::GPR64RegClass}, {})
2104               .addImm(0)
2105               .addUse(SrcReg)
2106               .addImm(AArch64::sub_32);
2107
2108       ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri,
2109                              {DefReg}, {SubregToReg})
2110                   .addImm(0)
2111                   .addImm(SrcSize - 1);
2112     } else if (DstSize <= 32) {
2113       ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri,
2114                              {DefReg}, {SrcReg})
2115                   .addImm(0)
2116                   .addImm(SrcSize - 1);
2117     } else {
2118       return false;
2119     }
2120
2121     constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
2122     I.eraseFromParent();
2123     return true;
2124   }
2125
2126   case TargetOpcode::G_SITOFP:
2127   case TargetOpcode::G_UITOFP:
2128   case TargetOpcode::G_FPTOSI:
2129   case TargetOpcode::G_FPTOUI: {
2130     const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
2131               SrcTy = MRI.getType(I.getOperand(1).getReg());
2132     const unsigned NewOpc = selectFPConvOpc(Opcode, DstTy, SrcTy);
2133     if (NewOpc == Opcode)
2134       return false;
2135
2136     I.setDesc(TII.get(NewOpc));
2137     constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2138
2139     return true;
2140   }
2141
2142
2143   case TargetOpcode::G_INTTOPTR:
2144     // The importer is currently unable to import pointer types since they
2145     // didn't exist in SelectionDAG.
2146     return selectCopy(I, TII, MRI, TRI, RBI);
2147
2148   case TargetOpcode::G_BITCAST:
2149     // Imported SelectionDAG rules can handle every bitcast except those that
2150     // bitcast from a type to the same type. Ideally, these shouldn't occur
2151     // but we might not run an optimizer that deletes them. The other exception
2152     // is bitcasts involving pointer types, as SelectionDAG has no knowledge
2153     // of them.
2154     return selectCopy(I, TII, MRI, TRI, RBI);
2155
2156   case TargetOpcode::G_SELECT: {
2157     if (MRI.getType(I.getOperand(1).getReg()) != LLT::scalar(1)) {
2158       LLVM_DEBUG(dbgs() << "G_SELECT cond has type: " << Ty
2159                         << ", expected: " << LLT::scalar(1) << '\n');
2160       return false;
2161     }
2162
2163     const Register CondReg = I.getOperand(1).getReg();
2164     const Register TReg = I.getOperand(2).getReg();
2165     const Register FReg = I.getOperand(3).getReg();
2166
2167     if (tryOptSelect(I))
2168       return true;
2169
2170     Register CSelOpc = selectSelectOpc(I, MRI, RBI);
2171     MachineInstr &TstMI =
2172         *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ANDSWri))
2173              .addDef(AArch64::WZR)
2174              .addUse(CondReg)
2175              .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
2176
2177     MachineInstr &CSelMI = *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CSelOpc))
2178                                 .addDef(I.getOperand(0).getReg())
2179                                 .addUse(TReg)
2180                                 .addUse(FReg)
2181                                 .addImm(AArch64CC::NE);
2182
2183     constrainSelectedInstRegOperands(TstMI, TII, TRI, RBI);
2184     constrainSelectedInstRegOperands(CSelMI, TII, TRI, RBI);
2185
2186     I.eraseFromParent();
2187     return true;
2188   }
2189   case TargetOpcode::G_ICMP: {
2190     if (Ty.isVector())
2191       return selectVectorICmp(I, MRI);
2192
2193     if (Ty != LLT::scalar(32)) {
2194       LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Ty
2195                         << ", expected: " << LLT::scalar(32) << '\n');
2196       return false;
2197     }
2198
2199     MachineIRBuilder MIRBuilder(I);
2200     if (!emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1),
2201                             MIRBuilder))
2202       return false;
2203     emitCSetForICMP(I.getOperand(0).getReg(), I.getOperand(1).getPredicate(),
2204                     MIRBuilder);
2205     I.eraseFromParent();
2206     return true;
2207   }
2208
2209   case TargetOpcode::G_FCMP: {
2210     if (Ty != LLT::scalar(32)) {
2211       LLVM_DEBUG(dbgs() << "G_FCMP result has type: " << Ty
2212                         << ", expected: " << LLT::scalar(32) << '\n');
2213       return false;
2214     }
2215
2216     unsigned CmpOpc = selectFCMPOpc(I, MRI);
2217     if (!CmpOpc)
2218       return false;
2219
2220     // FIXME: regbank
2221
2222     AArch64CC::CondCode CC1, CC2;
2223     changeFCMPPredToAArch64CC(
2224         (CmpInst::Predicate)I.getOperand(1).getPredicate(), CC1, CC2);
2225
2226     // Partially build the compare. Decide if we need to add a use for the
2227     // third operand based off whether or not we're comparing against 0.0.
2228     auto CmpMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(CmpOpc))
2229                      .addUse(I.getOperand(2).getReg());
2230
2231     // If we don't have an immediate compare, then we need to add a use of the
2232     // register which wasn't used for the immediate.
2233     // Note that the immediate will always be the last operand.
2234     if (CmpOpc != AArch64::FCMPSri && CmpOpc != AArch64::FCMPDri)
2235       CmpMI = CmpMI.addUse(I.getOperand(3).getReg());
2236
2237     const Register DefReg = I.getOperand(0).getReg();
2238     Register Def1Reg = DefReg;
2239     if (CC2 != AArch64CC::AL)
2240       Def1Reg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2241
2242     MachineInstr &CSetMI =
2243         *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr))
2244              .addDef(Def1Reg)
2245              .addUse(AArch64::WZR)
2246              .addUse(AArch64::WZR)
2247              .addImm(getInvertedCondCode(CC1));
2248
2249     if (CC2 != AArch64CC::AL) {
2250       Register Def2Reg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2251       MachineInstr &CSet2MI =
2252           *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr))
2253                .addDef(Def2Reg)
2254                .addUse(AArch64::WZR)
2255                .addUse(AArch64::WZR)
2256                .addImm(getInvertedCondCode(CC2));
2257       MachineInstr &OrMI =
2258           *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ORRWrr))
2259                .addDef(DefReg)
2260                .addUse(Def1Reg)
2261                .addUse(Def2Reg);
2262       constrainSelectedInstRegOperands(OrMI, TII, TRI, RBI);
2263       constrainSelectedInstRegOperands(CSet2MI, TII, TRI, RBI);
2264     }
2265     constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
2266     constrainSelectedInstRegOperands(CSetMI, TII, TRI, RBI);
2267
2268     I.eraseFromParent();
2269     return true;
2270   }
2271   case TargetOpcode::G_VASTART:
2272     return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
2273                                 : selectVaStartAAPCS(I, MF, MRI);
2274   case TargetOpcode::G_INTRINSIC:
2275     return selectIntrinsic(I, MRI);
2276   case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
2277     return selectIntrinsicWithSideEffects(I, MRI);
2278   case TargetOpcode::G_IMPLICIT_DEF: {
2279     I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
2280     const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2281     const Register DstReg = I.getOperand(0).getReg();
2282     const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2283     const TargetRegisterClass *DstRC =
2284         getRegClassForTypeOnBank(DstTy, DstRB, RBI);
2285     RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
2286     return true;
2287   }
2288   case TargetOpcode::G_BLOCK_ADDR: {
2289     if (TM.getCodeModel() == CodeModel::Large) {
2290       materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0);
2291       I.eraseFromParent();
2292       return true;
2293     } else {
2294       I.setDesc(TII.get(AArch64::MOVaddrBA));
2295       auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA),
2296                            I.getOperand(0).getReg())
2297                        .addBlockAddress(I.getOperand(1).getBlockAddress(),
2298                                         /* Offset */ 0, AArch64II::MO_PAGE)
2299                        .addBlockAddress(
2300                            I.getOperand(1).getBlockAddress(), /* Offset */ 0,
2301                            AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
2302       I.eraseFromParent();
2303       return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
2304     }
2305   }
2306   case TargetOpcode::G_INTRINSIC_TRUNC:
2307     return selectIntrinsicTrunc(I, MRI);
2308   case TargetOpcode::G_INTRINSIC_ROUND:
2309     return selectIntrinsicRound(I, MRI);
2310   case TargetOpcode::G_BUILD_VECTOR:
2311     return selectBuildVector(I, MRI);
2312   case TargetOpcode::G_MERGE_VALUES:
2313     return selectMergeValues(I, MRI);
2314   case TargetOpcode::G_UNMERGE_VALUES:
2315     return selectUnmergeValues(I, MRI);
2316   case TargetOpcode::G_SHUFFLE_VECTOR:
2317     return selectShuffleVector(I, MRI);
2318   case TargetOpcode::G_EXTRACT_VECTOR_ELT:
2319     return selectExtractElt(I, MRI);
2320   case TargetOpcode::G_INSERT_VECTOR_ELT:
2321     return selectInsertElt(I, MRI);
2322   case TargetOpcode::G_CONCAT_VECTORS:
2323     return selectConcatVectors(I, MRI);
2324   case TargetOpcode::G_JUMP_TABLE:
2325     return selectJumpTable(I, MRI);
2326   }
2327
2328   return false;
2329 }
2330
2331 bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
2332                                             MachineRegisterInfo &MRI) const {
2333   assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT");
2334   Register JTAddr = I.getOperand(0).getReg();
2335   unsigned JTI = I.getOperand(1).getIndex();
2336   Register Index = I.getOperand(2).getReg();
2337   MachineIRBuilder MIB(I);
2338
2339   Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2340   Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
2341   MIB.buildInstr(AArch64::JumpTableDest32, {TargetReg, ScratchReg},
2342                  {JTAddr, Index})
2343       .addJumpTableIndex(JTI);
2344
2345   // Build the indirect branch.
2346   MIB.buildInstr(AArch64::BR, {}, {TargetReg});
2347   I.eraseFromParent();
2348   return true;
2349 }
2350
2351 bool AArch64InstructionSelector::selectJumpTable(
2352     MachineInstr &I, MachineRegisterInfo &MRI) const {
2353   assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table");
2354   assert(I.getOperand(1).isJTI() && "Jump table op should have a JTI!");
2355
2356   Register DstReg = I.getOperand(0).getReg();
2357   unsigned JTI = I.getOperand(1).getIndex();
2358   // We generate a MOVaddrJT which will get expanded to an ADRP + ADD later.
2359   MachineIRBuilder MIB(I);
2360   auto MovMI =
2361     MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {})
2362           .addJumpTableIndex(JTI, AArch64II::MO_PAGE)
2363           .addJumpTableIndex(JTI, AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
2364   I.eraseFromParent();
2365   return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
2366 }
2367
2368 bool AArch64InstructionSelector::selectTLSGlobalValue(
2369     MachineInstr &I, MachineRegisterInfo &MRI) const {
2370   if (!STI.isTargetMachO())
2371     return false;
2372   MachineFunction &MF = *I.getParent()->getParent();
2373   MF.getFrameInfo().setAdjustsStack(true);
2374
2375   const GlobalValue &GV = *I.getOperand(1).getGlobal();
2376   MachineIRBuilder MIB(I);
2377
2378   MIB.buildInstr(AArch64::LOADgot, {AArch64::X0}, {})
2379       .addGlobalAddress(&GV, 0, AArch64II::MO_TLS);
2380
2381   auto Load = MIB.buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass},
2382                              {Register(AArch64::X0)})
2383                   .addImm(0);
2384
2385   // TLS calls preserve all registers except those that absolutely must be
2386   // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
2387   // silly).
2388   MIB.buildInstr(AArch64::BLR, {}, {Load})
2389       .addDef(AArch64::X0, RegState::Implicit)
2390       .addRegMask(TRI.getTLSCallPreservedMask());
2391
2392   MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X0));
2393   RBI.constrainGenericRegister(I.getOperand(0).getReg(), AArch64::GPR64RegClass,
2394                                MRI);
2395   I.eraseFromParent();
2396   return true;
2397 }
2398
2399 bool AArch64InstructionSelector::selectIntrinsicTrunc(
2400     MachineInstr &I, MachineRegisterInfo &MRI) const {
2401   const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
2402
2403   // Select the correct opcode.
2404   unsigned Opc = 0;
2405   if (!SrcTy.isVector()) {
2406     switch (SrcTy.getSizeInBits()) {
2407     default:
2408     case 16:
2409       Opc = AArch64::FRINTZHr;
2410       break;
2411     case 32:
2412       Opc = AArch64::FRINTZSr;
2413       break;
2414     case 64:
2415       Opc = AArch64::FRINTZDr;
2416       break;
2417     }
2418   } else {
2419     unsigned NumElts = SrcTy.getNumElements();
2420     switch (SrcTy.getElementType().getSizeInBits()) {
2421     default:
2422       break;
2423     case 16:
2424       if (NumElts == 4)
2425         Opc = AArch64::FRINTZv4f16;
2426       else if (NumElts == 8)
2427         Opc = AArch64::FRINTZv8f16;
2428       break;
2429     case 32:
2430       if (NumElts == 2)
2431         Opc = AArch64::FRINTZv2f32;
2432       else if (NumElts == 4)
2433         Opc = AArch64::FRINTZv4f32;
2434       break;
2435     case 64:
2436       if (NumElts == 2)
2437         Opc = AArch64::FRINTZv2f64;
2438       break;
2439     }
2440   }
2441
2442   if (!Opc) {
2443     // Didn't get an opcode above, bail.
2444     LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_TRUNC!\n");
2445     return false;
2446   }
2447
2448   // Legalization would have set us up perfectly for this; we just need to
2449   // set the opcode and move on.
2450   I.setDesc(TII.get(Opc));
2451   return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2452 }
2453
2454 bool AArch64InstructionSelector::selectIntrinsicRound(
2455     MachineInstr &I, MachineRegisterInfo &MRI) const {
2456   const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
2457
2458   // Select the correct opcode.
2459   unsigned Opc = 0;
2460   if (!SrcTy.isVector()) {
2461     switch (SrcTy.getSizeInBits()) {
2462     default:
2463     case 16:
2464       Opc = AArch64::FRINTAHr;
2465       break;
2466     case 32:
2467       Opc = AArch64::FRINTASr;
2468       break;
2469     case 64:
2470       Opc = AArch64::FRINTADr;
2471       break;
2472     }
2473   } else {
2474     unsigned NumElts = SrcTy.getNumElements();
2475     switch (SrcTy.getElementType().getSizeInBits()) {
2476     default:
2477       break;
2478     case 16:
2479       if (NumElts == 4)
2480         Opc = AArch64::FRINTAv4f16;
2481       else if (NumElts == 8)
2482         Opc = AArch64::FRINTAv8f16;
2483       break;
2484     case 32:
2485       if (NumElts == 2)
2486         Opc = AArch64::FRINTAv2f32;
2487       else if (NumElts == 4)
2488         Opc = AArch64::FRINTAv4f32;
2489       break;
2490     case 64:
2491       if (NumElts == 2)
2492         Opc = AArch64::FRINTAv2f64;
2493       break;
2494     }
2495   }
2496
2497   if (!Opc) {
2498     // Didn't get an opcode above, bail.
2499     LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_ROUND!\n");
2500     return false;
2501   }
2502
2503   // Legalization would have set us up perfectly for this; we just need to
2504   // set the opcode and move on.
2505   I.setDesc(TII.get(Opc));
2506   return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2507 }
2508
2509 bool AArch64InstructionSelector::selectVectorICmp(
2510     MachineInstr &I, MachineRegisterInfo &MRI) const {
2511   Register DstReg = I.getOperand(0).getReg();
2512   LLT DstTy = MRI.getType(DstReg);
2513   Register SrcReg = I.getOperand(2).getReg();
2514   Register Src2Reg = I.getOperand(3).getReg();
2515   LLT SrcTy = MRI.getType(SrcReg);
2516
2517   unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
2518   unsigned NumElts = DstTy.getNumElements();
2519
2520   // First index is element size, 0 == 8b, 1 == 16b, 2 == 32b, 3 == 64b
2521   // Second index is num elts, 0 == v2, 1 == v4, 2 == v8, 3 == v16
2522   // Third index is cc opcode:
2523   // 0 == eq
2524   // 1 == ugt
2525   // 2 == uge
2526   // 3 == ult
2527   // 4 == ule
2528   // 5 == sgt
2529   // 6 == sge
2530   // 7 == slt
2531   // 8 == sle
2532   // ne is done by negating 'eq' result.
2533
2534   // This table below assumes that for some comparisons the operands will be
2535   // commuted.
2536   // ult op == commute + ugt op
2537   // ule op == commute + uge op
2538   // slt op == commute + sgt op
2539   // sle op == commute + sge op
2540   unsigned PredIdx = 0;
2541   bool SwapOperands = false;
2542   CmpInst::Predicate Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate();
2543   switch (Pred) {
2544   case CmpInst::ICMP_NE:
2545   case CmpInst::ICMP_EQ:
2546     PredIdx = 0;
2547     break;
2548   case CmpInst::ICMP_UGT:
2549     PredIdx = 1;
2550     break;
2551   case CmpInst::ICMP_UGE:
2552     PredIdx = 2;
2553     break;
2554   case CmpInst::ICMP_ULT:
2555     PredIdx = 3;
2556     SwapOperands = true;
2557     break;
2558   case CmpInst::ICMP_ULE:
2559     PredIdx = 4;
2560     SwapOperands = true;
2561     break;
2562   case CmpInst::ICMP_SGT:
2563     PredIdx = 5;
2564     break;
2565   case CmpInst::ICMP_SGE:
2566     PredIdx = 6;
2567     break;
2568   case CmpInst::ICMP_SLT:
2569     PredIdx = 7;
2570     SwapOperands = true;
2571     break;
2572   case CmpInst::ICMP_SLE:
2573     PredIdx = 8;
2574     SwapOperands = true;
2575     break;
2576   default:
2577     llvm_unreachable("Unhandled icmp predicate");
2578     return false;
2579   }
2580
2581   // This table obviously should be tablegen'd when we have our GISel native
2582   // tablegen selector.
2583
2584   static const unsigned OpcTable[4][4][9] = {
2585       {
2586           {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2587            0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2588            0 /* invalid */},
2589           {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2590            0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2591            0 /* invalid */},
2592           {AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8,
2593            AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8,
2594            AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8},
2595           {AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8,
2596            AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8,
2597            AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8}
2598       },
2599       {
2600           {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2601            0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2602            0 /* invalid */},
2603           {AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16,
2604            AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16,
2605            AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16},
2606           {AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16,
2607            AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16,
2608            AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16},
2609           {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2610            0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2611            0 /* invalid */}
2612       },
2613       {
2614           {AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32,
2615            AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32,
2616            AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32},
2617           {AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32,
2618            AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32,
2619            AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32},
2620           {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2621            0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2622            0 /* invalid */},
2623           {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2624            0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2625            0 /* invalid */}
2626       },
2627       {
2628           {AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64,
2629            AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64,
2630            AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64},
2631           {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2632            0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2633            0 /* invalid */},
2634           {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2635            0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2636            0 /* invalid */},
2637           {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2638            0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2639            0 /* invalid */}
2640       },
2641   };
2642   unsigned EltIdx = Log2_32(SrcEltSize / 8);
2643   unsigned NumEltsIdx = Log2_32(NumElts / 2);
2644   unsigned Opc = OpcTable[EltIdx][NumEltsIdx][PredIdx];
2645   if (!Opc) {
2646     LLVM_DEBUG(dbgs() << "Could not map G_ICMP to cmp opcode");
2647     return false;
2648   }
2649
2650   const RegisterBank &VecRB = *RBI.getRegBank(SrcReg, MRI, TRI);
2651   const TargetRegisterClass *SrcRC =
2652       getRegClassForTypeOnBank(SrcTy, VecRB, RBI, true);
2653   if (!SrcRC) {
2654     LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
2655     return false;
2656   }
2657
2658   unsigned NotOpc = Pred == ICmpInst::ICMP_NE ? AArch64::NOTv8i8 : 0;
2659   if (SrcTy.getSizeInBits() == 128)
2660     NotOpc = NotOpc ? AArch64::NOTv16i8 : 0;
2661
2662   if (SwapOperands)
2663     std::swap(SrcReg, Src2Reg);
2664
2665   MachineIRBuilder MIB(I);
2666   auto Cmp = MIB.buildInstr(Opc, {SrcRC}, {SrcReg, Src2Reg});
2667   constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
2668
2669   // Invert if we had a 'ne' cc.
2670   if (NotOpc) {
2671     Cmp = MIB.buildInstr(NotOpc, {DstReg}, {Cmp});
2672     constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
2673   } else {
2674     MIB.buildCopy(DstReg, Cmp.getReg(0));
2675   }
2676   RBI.constrainGenericRegister(DstReg, *SrcRC, MRI);
2677   I.eraseFromParent();
2678   return true;
2679 }
2680
2681 MachineInstr *AArch64InstructionSelector::emitScalarToVector(
2682     unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar,
2683     MachineIRBuilder &MIRBuilder) const {
2684   auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
2685
2686   auto BuildFn = [&](unsigned SubregIndex) {
2687     auto Ins =
2688         MIRBuilder
2689             .buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar})
2690             .addImm(SubregIndex);
2691     constrainSelectedInstRegOperands(*Undef, TII, TRI, RBI);
2692     constrainSelectedInstRegOperands(*Ins, TII, TRI, RBI);
2693     return &*Ins;
2694   };
2695
2696   switch (EltSize) {
2697   case 16:
2698     return BuildFn(AArch64::hsub);
2699   case 32:
2700     return BuildFn(AArch64::ssub);
2701   case 64:
2702     return BuildFn(AArch64::dsub);
2703   default:
2704     return nullptr;
2705   }
2706 }
2707
2708 bool AArch64InstructionSelector::selectMergeValues(
2709     MachineInstr &I, MachineRegisterInfo &MRI) const {
2710   assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode");
2711   const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2712   const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
2713   assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation");
2714   const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
2715
2716   if (I.getNumOperands() != 3)
2717     return false;
2718
2719   // Merging 2 s64s into an s128.
2720   if (DstTy == LLT::scalar(128)) {
2721     if (SrcTy.getSizeInBits() != 64)
2722       return false;
2723     MachineIRBuilder MIB(I);
2724     Register DstReg = I.getOperand(0).getReg();
2725     Register Src1Reg = I.getOperand(1).getReg();
2726     Register Src2Reg = I.getOperand(2).getReg();
2727     auto Tmp = MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {});
2728     MachineInstr *InsMI =
2729         emitLaneInsert(None, Tmp.getReg(0), Src1Reg, /* LaneIdx */ 0, RB, MIB);
2730     if (!InsMI)
2731       return false;
2732     MachineInstr *Ins2MI = emitLaneInsert(DstReg, InsMI->getOperand(0).getReg(),
2733                                           Src2Reg, /* LaneIdx */ 1, RB, MIB);
2734     if (!Ins2MI)
2735       return false;
2736     constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
2737     constrainSelectedInstRegOperands(*Ins2MI, TII, TRI, RBI);
2738     I.eraseFromParent();
2739     return true;
2740   }
2741
2742   if (RB.getID() != AArch64::GPRRegBankID)
2743     return false;
2744
2745   if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32)
2746     return false;
2747
2748   auto *DstRC = &AArch64::GPR64RegClass;
2749   Register SubToRegDef = MRI.createVirtualRegister(DstRC);
2750   MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
2751                                     TII.get(TargetOpcode::SUBREG_TO_REG))
2752                                 .addDef(SubToRegDef)
2753                                 .addImm(0)
2754                                 .addUse(I.getOperand(1).getReg())
2755                                 .addImm(AArch64::sub_32);
2756   Register SubToRegDef2 = MRI.createVirtualRegister(DstRC);
2757   // Need to anyext the second scalar before we can use bfm
2758   MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
2759                                     TII.get(TargetOpcode::SUBREG_TO_REG))
2760                                 .addDef(SubToRegDef2)
2761                                 .addImm(0)
2762                                 .addUse(I.getOperand(2).getReg())
2763                                 .addImm(AArch64::sub_32);
2764   MachineInstr &BFM =
2765       *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri))
2766            .addDef(I.getOperand(0).getReg())
2767            .addUse(SubToRegDef)
2768            .addUse(SubToRegDef2)
2769            .addImm(32)
2770            .addImm(31);
2771   constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI);
2772   constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI);
2773   constrainSelectedInstRegOperands(BFM, TII, TRI, RBI);
2774   I.eraseFromParent();
2775   return true;
2776 }
2777
2778 static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
2779                               const unsigned EltSize) {
2780   // Choose a lane copy opcode and subregister based off of the size of the
2781   // vector's elements.
2782   switch (EltSize) {
2783   case 16:
2784     CopyOpc = AArch64::CPYi16;
2785     ExtractSubReg = AArch64::hsub;
2786     break;
2787   case 32:
2788     CopyOpc = AArch64::CPYi32;
2789     ExtractSubReg = AArch64::ssub;
2790     break;
2791   case 64:
2792     CopyOpc = AArch64::CPYi64;
2793     ExtractSubReg = AArch64::dsub;
2794     break;
2795   default:
2796     // Unknown size, bail out.
2797     LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n");
2798     return false;
2799   }
2800   return true;
2801 }
2802
2803 MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
2804     Optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy,
2805     Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const {
2806   MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
2807   unsigned CopyOpc = 0;
2808   unsigned ExtractSubReg = 0;
2809   if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) {
2810     LLVM_DEBUG(
2811         dbgs() << "Couldn't determine lane copy opcode for instruction.\n");
2812     return nullptr;
2813   }
2814
2815   const TargetRegisterClass *DstRC =
2816       getRegClassForTypeOnBank(ScalarTy, DstRB, RBI, true);
2817   if (!DstRC) {
2818     LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n");
2819     return nullptr;
2820   }
2821
2822   const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI);
2823   const LLT &VecTy = MRI.getType(VecReg);
2824   const TargetRegisterClass *VecRC =
2825       getRegClassForTypeOnBank(VecTy, VecRB, RBI, true);
2826   if (!VecRC) {
2827     LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
2828     return nullptr;
2829   }
2830
2831   // The register that we're going to copy into.
2832   Register InsertReg = VecReg;
2833   if (!DstReg)
2834     DstReg = MRI.createVirtualRegister(DstRC);
2835   // If the lane index is 0, we just use a subregister COPY.
2836   if (LaneIdx == 0) {
2837     auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {})
2838                     .addReg(VecReg, 0, ExtractSubReg);
2839     RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
2840     return &*Copy;
2841   }
2842
2843   // Lane copies require 128-bit wide registers. If we're dealing with an
2844   // unpacked vector, then we need to move up to that width. Insert an implicit
2845   // def and a subregister insert to get us there.
2846   if (VecTy.getSizeInBits() != 128) {
2847     MachineInstr *ScalarToVector = emitScalarToVector(
2848         VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
2849     if (!ScalarToVector)
2850       return nullptr;
2851     InsertReg = ScalarToVector->getOperand(0).getReg();
2852   }
2853
2854   MachineInstr *LaneCopyMI =
2855       MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
2856   constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI);
2857
2858   // Make sure that we actually constrain the initial copy.
2859   RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
2860   return LaneCopyMI;
2861 }
2862
2863 bool AArch64InstructionSelector::selectExtractElt(
2864     MachineInstr &I, MachineRegisterInfo &MRI) const {
2865   assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
2866          "unexpected opcode!");
2867   Register DstReg = I.getOperand(0).getReg();
2868   const LLT NarrowTy = MRI.getType(DstReg);
2869   const Register SrcReg = I.getOperand(1).getReg();
2870   const LLT WideTy = MRI.getType(SrcReg);
2871   (void)WideTy;
2872   assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&
2873          "source register size too small!");
2874   assert(NarrowTy.isScalar() && "cannot extract vector into vector!");
2875
2876   // Need the lane index to determine the correct copy opcode.
2877   MachineOperand &LaneIdxOp = I.getOperand(2);
2878   assert(LaneIdxOp.isReg() && "Lane index operand was not a register?");
2879
2880   if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
2881     LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n");
2882     return false;
2883   }
2884
2885   // Find the index to extract from.
2886   auto VRegAndVal = getConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI);
2887   if (!VRegAndVal)
2888     return false;
2889   unsigned LaneIdx = VRegAndVal->Value;
2890
2891   MachineIRBuilder MIRBuilder(I);
2892
2893   const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2894   MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
2895                                                LaneIdx, MIRBuilder);
2896   if (!Extract)
2897     return false;
2898
2899   I.eraseFromParent();
2900   return true;
2901 }
2902
2903 bool AArch64InstructionSelector::selectSplitVectorUnmerge(
2904     MachineInstr &I, MachineRegisterInfo &MRI) const {
2905   unsigned NumElts = I.getNumOperands() - 1;
2906   Register SrcReg = I.getOperand(NumElts).getReg();
2907   const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
2908   const LLT SrcTy = MRI.getType(SrcReg);
2909
2910   assert(NarrowTy.isVector() && "Expected an unmerge into vectors");
2911   if (SrcTy.getSizeInBits() > 128) {
2912     LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge");
2913     return false;
2914   }
2915
2916   MachineIRBuilder MIB(I);
2917
2918   // We implement a split vector operation by treating the sub-vectors as
2919   // scalars and extracting them.
2920   const RegisterBank &DstRB =
2921       *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI);
2922   for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
2923     Register Dst = I.getOperand(OpIdx).getReg();
2924     MachineInstr *Extract =
2925         emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
2926     if (!Extract)
2927       return false;
2928   }
2929   I.eraseFromParent();
2930   return true;
2931 }
2932
2933 bool AArch64InstructionSelector::selectUnmergeValues(
2934     MachineInstr &I, MachineRegisterInfo &MRI) const {
2935   assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2936          "unexpected opcode");
2937
2938   // TODO: Handle unmerging into GPRs and from scalars to scalars.
2939   if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
2940           AArch64::FPRRegBankID ||
2941       RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
2942           AArch64::FPRRegBankID) {
2943     LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
2944                          "currently unsupported.\n");
2945     return false;
2946   }
2947
2948   // The last operand is the vector source register, and every other operand is
2949   // a register to unpack into.
2950   unsigned NumElts = I.getNumOperands() - 1;
2951   Register SrcReg = I.getOperand(NumElts).getReg();
2952   const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
2953   const LLT WideTy = MRI.getType(SrcReg);
2954   (void)WideTy;
2955   assert((WideTy.isVector() || WideTy.getSizeInBits() == 128) &&
2956          "can only unmerge from vector or s128 types!");
2957   assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&
2958          "source register size too small!");
2959
2960   if (!NarrowTy.isScalar())
2961     return selectSplitVectorUnmerge(I, MRI);
2962
2963   MachineIRBuilder MIB(I);
2964
2965   // Choose a lane copy opcode and subregister based off of the size of the
2966   // vector's elements.
2967   unsigned CopyOpc = 0;
2968   unsigned ExtractSubReg = 0;
2969   if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits()))
2970     return false;
2971
2972   // Set up for the lane copies.
2973   MachineBasicBlock &MBB = *I.getParent();
2974
2975   // Stores the registers we'll be copying from.
2976   SmallVector<Register, 4> InsertRegs;
2977
2978   // We'll use the first register twice, so we only need NumElts-1 registers.
2979   unsigned NumInsertRegs = NumElts - 1;
2980
2981   // If our elements fit into exactly 128 bits, then we can copy from the source
2982   // directly. Otherwise, we need to do a bit of setup with some subregister
2983   // inserts.
2984   if (NarrowTy.getSizeInBits() * NumElts == 128) {
2985     InsertRegs = SmallVector<Register, 4>(NumInsertRegs, SrcReg);
2986   } else {
2987     // No. We have to perform subregister inserts. For each insert, create an
2988     // implicit def and a subregister insert, and save the register we create.
2989     for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) {
2990       Register ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
2991       MachineInstr &ImpDefMI =
2992           *BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF),
2993                    ImpDefReg);
2994
2995       // Now, create the subregister insert from SrcReg.
2996       Register InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
2997       MachineInstr &InsMI =
2998           *BuildMI(MBB, I, I.getDebugLoc(),
2999                    TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
3000                .addUse(ImpDefReg)
3001                .addUse(SrcReg)
3002                .addImm(AArch64::dsub);
3003
3004       constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);
3005       constrainSelectedInstRegOperands(InsMI, TII, TRI, RBI);
3006
3007       // Save the register so that we can copy from it after.
3008       InsertRegs.push_back(InsertReg);
3009     }
3010   }
3011
3012   // Now that we've created any necessary subregister inserts, we can
3013   // create the copies.
3014   //
3015   // Perform the first copy separately as a subregister copy.
3016   Register CopyTo = I.getOperand(0).getReg();
3017   auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {})
3018                        .addReg(InsertRegs[0], 0, ExtractSubReg);
3019   constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI);
3020
3021   // Now, perform the remaining copies as vector lane copies.
3022   unsigned LaneIdx = 1;
3023   for (Register InsReg : InsertRegs) {
3024     Register CopyTo = I.getOperand(LaneIdx).getReg();
3025     MachineInstr &CopyInst =
3026         *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo)
3027              .addUse(InsReg)
3028              .addImm(LaneIdx);
3029     constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI);
3030     ++LaneIdx;
3031   }
3032
3033   // Separately constrain the first copy's destination. Because of the
3034   // limitation in constrainOperandRegClass, we can't guarantee that this will
3035   // actually be constrained. So, do it ourselves using the second operand.
3036   const TargetRegisterClass *RC =
3037       MRI.getRegClassOrNull(I.getOperand(1).getReg());
3038   if (!RC) {
3039     LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n");
3040     return false;
3041   }
3042
3043   RBI.constrainGenericRegister(CopyTo, *RC, MRI);
3044   I.eraseFromParent();
3045   return true;
3046 }
3047
3048 bool AArch64InstructionSelector::selectConcatVectors(
3049     MachineInstr &I, MachineRegisterInfo &MRI) const {
3050   assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
3051          "Unexpected opcode");
3052   Register Dst = I.getOperand(0).getReg();
3053   Register Op1 = I.getOperand(1).getReg();
3054   Register Op2 = I.getOperand(2).getReg();
3055   MachineIRBuilder MIRBuilder(I);
3056   MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIRBuilder);
3057   if (!ConcatMI)
3058     return false;
3059   I.eraseFromParent();
3060   return true;
3061 }
3062
3063 unsigned
3064 AArch64InstructionSelector::emitConstantPoolEntry(Constant *CPVal,
3065                                                   MachineFunction &MF) const {
3066   Type *CPTy = CPVal->getType();
3067   unsigned Align = MF.getDataLayout().getPrefTypeAlignment(CPTy);
3068   if (Align == 0)
3069     Align = MF.getDataLayout().getTypeAllocSize(CPTy);
3070
3071   MachineConstantPool *MCP = MF.getConstantPool();
3072   return MCP->getConstantPoolIndex(CPVal, Align);
3073 }
3074
3075 MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
3076     Constant *CPVal, MachineIRBuilder &MIRBuilder) const {
3077   unsigned CPIdx = emitConstantPoolEntry(CPVal, MIRBuilder.getMF());
3078
3079   auto Adrp =
3080       MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
3081           .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
3082
3083   MachineInstr *LoadMI = nullptr;
3084   switch (MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType())) {
3085   case 16:
3086     LoadMI =
3087         &*MIRBuilder
3088               .buildInstr(AArch64::LDRQui, {&AArch64::FPR128RegClass}, {Adrp})
3089               .addConstantPoolIndex(CPIdx, 0,
3090                                     AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
3091     break;
3092   case 8:
3093     LoadMI = &*MIRBuilder
3094                  .buildInstr(AArch64::LDRDui, {&AArch64::FPR64RegClass}, {Adrp})
3095                  .addConstantPoolIndex(
3096                      CPIdx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
3097     break;
3098   default:
3099     LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "
3100                       << *CPVal->getType());
3101     return nullptr;
3102   }
3103   constrainSelectedInstRegOperands(*Adrp, TII, TRI, RBI);
3104   constrainSelectedInstRegOperands(*LoadMI, TII, TRI, RBI);
3105   return LoadMI;
3106 }
3107
3108 /// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given
3109 /// size and RB.
3110 static std::pair<unsigned, unsigned>
3111 getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
3112   unsigned Opc, SubregIdx;
3113   if (RB.getID() == AArch64::GPRRegBankID) {
3114     if (EltSize == 32) {
3115       Opc = AArch64::INSvi32gpr;
3116       SubregIdx = AArch64::ssub;
3117     } else if (EltSize == 64) {
3118       Opc = AArch64::INSvi64gpr;
3119       SubregIdx = AArch64::dsub;
3120     } else {
3121       llvm_unreachable("invalid elt size!");
3122     }
3123   } else {
3124     if (EltSize == 8) {
3125       Opc = AArch64::INSvi8lane;
3126       SubregIdx = AArch64::bsub;
3127     } else if (EltSize == 16) {
3128       Opc = AArch64::INSvi16lane;
3129       SubregIdx = AArch64::hsub;
3130     } else if (EltSize == 32) {
3131       Opc = AArch64::INSvi32lane;
3132       SubregIdx = AArch64::ssub;
3133     } else if (EltSize == 64) {
3134       Opc = AArch64::INSvi64lane;
3135       SubregIdx = AArch64::dsub;
3136     } else {
3137       llvm_unreachable("invalid elt size!");
3138     }
3139   }
3140   return std::make_pair(Opc, SubregIdx);
3141 }
3142
3143 MachineInstr *
3144 AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS,
3145                                     MachineOperand &RHS,
3146                                     MachineIRBuilder &MIRBuilder) const {
3147   assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
3148   MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
3149   static const unsigned OpcTable[2][2]{{AArch64::ADDXrr, AArch64::ADDXri},
3150                                        {AArch64::ADDWrr, AArch64::ADDWri}};
3151   bool Is32Bit = MRI.getType(LHS.getReg()).getSizeInBits() == 32;
3152   auto ImmFns = selectArithImmed(RHS);
3153   unsigned Opc = OpcTable[Is32Bit][ImmFns.hasValue()];
3154   auto AddMI = MIRBuilder.buildInstr(Opc, {DefReg}, {LHS.getReg()});
3155
3156   // If we matched a valid constant immediate, add those operands.
3157   if (ImmFns) {
3158     for (auto &RenderFn : *ImmFns)
3159       RenderFn(AddMI);
3160   } else {
3161     AddMI.addUse(RHS.getReg());
3162   }
3163
3164   constrainSelectedInstRegOperands(*AddMI, TII, TRI, RBI);
3165   return &*AddMI;
3166 }
3167
3168 MachineInstr *
3169 AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS,
3170                                     MachineIRBuilder &MIRBuilder) const {
3171   assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
3172   MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
3173   static const unsigned OpcTable[2][2]{{AArch64::ADDSXrr, AArch64::ADDSXri},
3174                                        {AArch64::ADDSWrr, AArch64::ADDSWri}};
3175   bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32);
3176   auto ImmFns = selectArithImmed(RHS);
3177   unsigned Opc = OpcTable[Is32Bit][ImmFns.hasValue()];
3178   Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
3179
3180   auto CmpMI = MIRBuilder.buildInstr(Opc, {ZReg}, {LHS.getReg()});
3181
3182   // If we matched a valid constant immediate, add those operands.
3183   if (ImmFns) {
3184     for (auto &RenderFn : *ImmFns)
3185       RenderFn(CmpMI);
3186   } else {
3187     CmpMI.addUse(RHS.getReg());
3188   }
3189
3190   constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
3191   return &*CmpMI;
3192 }
3193
3194 MachineInstr *
3195 AArch64InstructionSelector::emitTST(const Register &LHS, const Register &RHS,
3196                                     MachineIRBuilder &MIRBuilder) const {
3197   MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
3198   unsigned RegSize = MRI.getType(LHS).getSizeInBits();
3199   bool Is32Bit = (RegSize == 32);
3200   static const unsigned OpcTable[2][2]{{AArch64::ANDSXrr, AArch64::ANDSXri},
3201                                        {AArch64::ANDSWrr, AArch64::ANDSWri}};
3202   Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
3203
3204   // We might be able to fold in an immediate into the TST. We need to make sure
3205   // it's a logical immediate though, since ANDS requires that.
3206   auto ValAndVReg = getConstantVRegValWithLookThrough(RHS, MRI);
3207   bool IsImmForm = ValAndVReg.hasValue() &&
3208                    AArch64_AM::isLogicalImmediate(ValAndVReg->Value, RegSize);
3209   unsigned Opc = OpcTable[Is32Bit][IsImmForm];
3210   auto TstMI = MIRBuilder.buildInstr(Opc, {ZReg}, {LHS});
3211
3212   if (IsImmForm)
3213     TstMI.addImm(
3214         AArch64_AM::encodeLogicalImmediate(ValAndVReg->Value, RegSize));
3215   else
3216     TstMI.addUse(RHS);
3217
3218   constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
3219   return &*TstMI;
3220 }
3221
3222 MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
3223     MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
3224     MachineIRBuilder &MIRBuilder) const {
3225   assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
3226   MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
3227
3228   // Fold the compare if possible.
3229   MachineInstr *FoldCmp =
3230       tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder);
3231   if (FoldCmp)
3232     return FoldCmp;
3233
3234   // Can't fold into a CMN. Just emit a normal compare.
3235   unsigned CmpOpc = 0;
3236   Register ZReg;
3237
3238   LLT CmpTy = MRI.getType(LHS.getReg());
3239   assert((CmpTy.isScalar() || CmpTy.isPointer()) &&
3240          "Expected scalar or pointer");
3241   if (CmpTy == LLT::scalar(32)) {
3242     CmpOpc = AArch64::SUBSWrr;
3243     ZReg = AArch64::WZR;
3244   } else if (CmpTy == LLT::scalar(64) || CmpTy.isPointer()) {
3245     CmpOpc = AArch64::SUBSXrr;
3246     ZReg = AArch64::XZR;
3247   } else {
3248     return nullptr;
3249   }
3250
3251   // Try to match immediate forms.
3252   auto ImmFns = selectArithImmed(RHS);
3253   if (ImmFns)
3254     CmpOpc = CmpOpc == AArch64::SUBSWrr ? AArch64::SUBSWri : AArch64::SUBSXri;
3255
3256   auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addDef(ZReg).addUse(LHS.getReg());
3257   // If we matched a valid constant immediate, add those operands.
3258   if (ImmFns) {
3259     for (auto &RenderFn : *ImmFns)
3260       RenderFn(CmpMI);
3261   } else {
3262     CmpMI.addUse(RHS.getReg());
3263   }
3264
3265   // Make sure that we can constrain the compare that we emitted.
3266   constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
3267   return &*CmpMI;
3268 }
3269
3270 MachineInstr *AArch64InstructionSelector::emitVectorConcat(
3271     Optional<Register> Dst, Register Op1, Register Op2,
3272     MachineIRBuilder &MIRBuilder) const {
3273   // We implement a vector concat by:
3274   // 1. Use scalar_to_vector to insert the lower vector into the larger dest
3275   // 2. Insert the upper vector into the destination's upper element
3276   // TODO: some of this code is common with G_BUILD_VECTOR handling.
3277   MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
3278
3279   const LLT Op1Ty = MRI.getType(Op1);
3280   const LLT Op2Ty = MRI.getType(Op2);
3281
3282   if (Op1Ty != Op2Ty) {
3283     LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys");
3284     return nullptr;
3285   }
3286   assert(Op1Ty.isVector() && "Expected a vector for vector concat");
3287
3288   if (Op1Ty.getSizeInBits() >= 128) {
3289     LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors");
3290     return nullptr;
3291   }
3292
3293   // At the moment we just support 64 bit vector concats.
3294   if (Op1Ty.getSizeInBits() != 64) {
3295     LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors");
3296     return nullptr;
3297   }
3298
3299   const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits());
3300   const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI);
3301   const TargetRegisterClass *DstRC =
3302       getMinClassForRegBank(FPRBank, Op1Ty.getSizeInBits() * 2);
3303
3304   MachineInstr *WidenedOp1 =
3305       emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder);
3306   MachineInstr *WidenedOp2 =
3307       emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder);
3308   if (!WidenedOp1 || !WidenedOp2) {
3309     LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value");
3310     return nullptr;
3311   }
3312
3313   // Now do the insert of the upper element.
3314   unsigned InsertOpc, InsSubRegIdx;
3315   std::tie(InsertOpc, InsSubRegIdx) =
3316       getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits());
3317
3318   if (!Dst)
3319     Dst = MRI.createVirtualRegister(DstRC);
3320   auto InsElt =
3321       MIRBuilder
3322           .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()})
3323           .addImm(1) /* Lane index */
3324           .addUse(WidenedOp2->getOperand(0).getReg())
3325           .addImm(0);
3326   constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
3327   return &*InsElt;
3328 }
3329
3330 MachineInstr *AArch64InstructionSelector::emitFMovForFConstant(
3331     MachineInstr &I, MachineRegisterInfo &MRI) const {
3332   assert(I.getOpcode() == TargetOpcode::G_FCONSTANT &&
3333          "Expected a G_FCONSTANT!");
3334   MachineOperand &ImmOp = I.getOperand(1);
3335   unsigned DefSize = MRI.getType(I.getOperand(0).getReg()).getSizeInBits();
3336
3337   // Only handle 32 and 64 bit defs for now.
3338   if (DefSize != 32 && DefSize != 64)
3339     return nullptr;
3340
3341   // Don't handle null values using FMOV.
3342   if (ImmOp.getFPImm()->isNullValue())
3343     return nullptr;
3344
3345   // Get the immediate representation for the FMOV.
3346   const APFloat &ImmValAPF = ImmOp.getFPImm()->getValueAPF();
3347   int Imm = DefSize == 32 ? AArch64_AM::getFP32Imm(ImmValAPF)
3348                           : AArch64_AM::getFP64Imm(ImmValAPF);
3349
3350   // If this is -1, it means the immediate can't be represented as the requested
3351   // floating point value. Bail.
3352   if (Imm == -1)
3353     return nullptr;
3354
3355   // Update MI to represent the new FMOV instruction, constrain it, and return.
3356   ImmOp.ChangeToImmediate(Imm);
3357   unsigned MovOpc = DefSize == 32 ? AArch64::FMOVSi : AArch64::FMOVDi;
3358   I.setDesc(TII.get(MovOpc));
3359   constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3360   return &I;
3361 }
3362
3363 MachineInstr *
3364 AArch64InstructionSelector::emitCSetForICMP(Register DefReg, unsigned Pred,
3365                                      MachineIRBuilder &MIRBuilder) const {
3366   // CSINC increments the result when the predicate is false. Invert it.
3367   const AArch64CC::CondCode InvCC = changeICMPPredToAArch64CC(
3368       CmpInst::getInversePredicate((CmpInst::Predicate)Pred));
3369   auto I =
3370       MIRBuilder
3371     .buildInstr(AArch64::CSINCWr, {DefReg}, {Register(AArch64::WZR), Register(AArch64::WZR)})
3372           .addImm(InvCC);
3373   constrainSelectedInstRegOperands(*I, TII, TRI, RBI);
3374   return &*I;
3375 }
3376
3377 bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) const {
3378   MachineIRBuilder MIB(I);
3379   MachineRegisterInfo &MRI = *MIB.getMRI();
3380   const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
3381
3382   // We want to recognize this pattern:
3383   //
3384   // $z = G_FCMP pred, $x, $y
3385   // ...
3386   // $w = G_SELECT $z, $a, $b
3387   //
3388   // Where the value of $z is *only* ever used by the G_SELECT (possibly with
3389   // some copies/truncs in between.)
3390   //
3391   // If we see this, then we can emit something like this:
3392   //
3393   // fcmp $x, $y
3394   // fcsel $w, $a, $b, pred
3395   //
3396   // Rather than emitting both of the rather long sequences in the standard
3397   // G_FCMP/G_SELECT select methods.
3398
3399   // First, check if the condition is defined by a compare.
3400   MachineInstr *CondDef = MRI.getVRegDef(I.getOperand(1).getReg());
3401   while (CondDef) {
3402     // We can only fold if all of the defs have one use.
3403     if (!MRI.hasOneUse(CondDef->getOperand(0).getReg()))
3404       return false;
3405
3406     // We can skip over G_TRUNC since the condition is 1-bit.
3407     // Truncating/extending can have no impact on the value.
3408     unsigned Opc = CondDef->getOpcode();
3409     if (Opc != TargetOpcode::COPY && Opc != TargetOpcode::G_TRUNC)
3410       break;
3411
3412     // Can't see past copies from physregs.
3413     if (Opc == TargetOpcode::COPY &&
3414         Register::isPhysicalRegister(CondDef->getOperand(1).getReg()))
3415       return false;
3416
3417     CondDef = MRI.getVRegDef(CondDef->getOperand(1).getReg());
3418   }
3419
3420   // Is the condition defined by a compare?
3421   if (!CondDef)
3422     return false;
3423
3424   unsigned CondOpc = CondDef->getOpcode();
3425   if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP)
3426     return false;
3427
3428   AArch64CC::CondCode CondCode;
3429   if (CondOpc == TargetOpcode::G_ICMP) {
3430     CondCode = changeICMPPredToAArch64CC(
3431         (CmpInst::Predicate)CondDef->getOperand(1).getPredicate());
3432     if (!emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3),
3433                             CondDef->getOperand(1), MIB)) {
3434       LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n");
3435       return false;
3436     }
3437   } else {
3438     // Get the condition code for the select.
3439     AArch64CC::CondCode CondCode2;
3440     changeFCMPPredToAArch64CC(
3441         (CmpInst::Predicate)CondDef->getOperand(1).getPredicate(), CondCode,
3442         CondCode2);
3443
3444     // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two
3445     // instructions to emit the comparison.
3446     // TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be
3447     // unnecessary.
3448     if (CondCode2 != AArch64CC::AL)
3449       return false;
3450
3451     // Make sure we'll be able to select the compare.
3452     unsigned CmpOpc = selectFCMPOpc(*CondDef, MRI);
3453     if (!CmpOpc)
3454       return false;
3455
3456     // Emit a new compare.
3457     auto Cmp = MIB.buildInstr(CmpOpc, {}, {CondDef->getOperand(2).getReg()});
3458     if (CmpOpc != AArch64::FCMPSri && CmpOpc != AArch64::FCMPDri)
3459       Cmp.addUse(CondDef->getOperand(3).getReg());
3460     constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
3461   }
3462
3463   // Emit the select.
3464   unsigned CSelOpc = selectSelectOpc(I, MRI, RBI);
3465   auto CSel =
3466       MIB.buildInstr(CSelOpc, {I.getOperand(0).getReg()},
3467                      {I.getOperand(2).getReg(), I.getOperand(3).getReg()})
3468           .addImm(CondCode);
3469   constrainSelectedInstRegOperands(*CSel, TII, TRI, RBI);
3470   I.eraseFromParent();
3471   return true;
3472 }
3473
3474 MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
3475     MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
3476     MachineIRBuilder &MIRBuilder) const {
3477   assert(LHS.isReg() && RHS.isReg() && Predicate.isPredicate() &&
3478          "Unexpected MachineOperand");
3479   MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3480   // We want to find this sort of thing:
3481   // x = G_SUB 0, y
3482   // G_ICMP z, x
3483   //
3484   // In this case, we can fold the G_SUB into the G_ICMP using a CMN instead.
3485   // e.g:
3486   //
3487   // cmn z, y
3488
3489   // Helper lambda to detect the subtract followed by the compare.
3490   // Takes in the def of the LHS or RHS, and checks if it's a subtract from 0.
3491   auto IsCMN = [&](MachineInstr *DefMI, const AArch64CC::CondCode &CC) {
3492     if (!DefMI || DefMI->getOpcode() != TargetOpcode::G_SUB)
3493       return false;
3494
3495     // Need to make sure NZCV is the same at the end of the transformation.
3496     if (CC != AArch64CC::EQ && CC != AArch64CC::NE)
3497       return false;
3498
3499     // We want to match against SUBs.
3500     if (DefMI->getOpcode() != TargetOpcode::G_SUB)
3501       return false;
3502
3503     // Make sure that we're getting
3504     // x = G_SUB 0, y
3505     auto ValAndVReg =
3506         getConstantVRegValWithLookThrough(DefMI->getOperand(1).getReg(), MRI);
3507     if (!ValAndVReg || ValAndVReg->Value != 0)
3508       return false;
3509
3510     // This can safely be represented as a CMN.
3511     return true;
3512   };
3513
3514   // Check if the RHS or LHS of the G_ICMP is defined by a SUB
3515   MachineInstr *LHSDef = getDefIgnoringCopies(LHS.getReg(), MRI);
3516   MachineInstr *RHSDef = getDefIgnoringCopies(RHS.getReg(), MRI);
3517   CmpInst::Predicate P = (CmpInst::Predicate)Predicate.getPredicate();
3518   const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(P);
3519
3520   // Given this:
3521   //
3522   // x = G_SUB 0, y
3523   // G_ICMP x, z
3524   //
3525   // Produce this:
3526   //
3527   // cmn y, z
3528   if (IsCMN(LHSDef, CC))
3529     return emitCMN(LHSDef->getOperand(2), RHS, MIRBuilder);
3530
3531   // Same idea here, but with the RHS of the compare instead:
3532   //
3533   // Given this:
3534   //
3535   // x = G_SUB 0, y
3536   // G_ICMP z, x
3537   //
3538   // Produce this:
3539   //
3540   // cmn z, y
3541   if (IsCMN(RHSDef, CC))
3542     return emitCMN(LHS, RHSDef->getOperand(2), MIRBuilder);
3543
3544   // Given this:
3545   //
3546   // z = G_AND x, y
3547   // G_ICMP z, 0
3548   //
3549   // Produce this if the compare is signed:
3550   //
3551   // tst x, y
3552   if (!isUnsignedICMPPred(P) && LHSDef &&
3553       LHSDef->getOpcode() == TargetOpcode::G_AND) {
3554     // Make sure that the RHS is 0.
3555     auto ValAndVReg = getConstantVRegValWithLookThrough(RHS.getReg(), MRI);
3556     if (!ValAndVReg || ValAndVReg->Value != 0)
3557       return nullptr;
3558
3559     return emitTST(LHSDef->getOperand(1).getReg(),
3560                    LHSDef->getOperand(2).getReg(), MIRBuilder);
3561   }
3562
3563   return nullptr;
3564 }
3565
3566 bool AArch64InstructionSelector::tryOptVectorDup(MachineInstr &I) const {
3567   // Try to match a vector splat operation into a dup instruction.
3568   // We're looking for this pattern:
3569   //    %scalar:gpr(s64) = COPY $x0
3570   //    %undef:fpr(<2 x s64>) = G_IMPLICIT_DEF
3571   //    %cst0:gpr(s32) = G_CONSTANT i32 0
3572   //    %zerovec:fpr(<2 x s32>) = G_BUILD_VECTOR %cst0(s32), %cst0(s32)
3573   //    %ins:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT %undef, %scalar(s64), %cst0(s32)
3574   //    %splat:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %ins(<2 x s64>), %undef,
3575   //                                             %zerovec(<2 x s32>)
3576   //
3577   // ...into:
3578   // %splat = DUP %scalar
3579   // We use the regbank of the scalar to determine which kind of dup to use.
3580   MachineIRBuilder MIB(I);
3581   MachineRegisterInfo &MRI = *MIB.getMRI();
3582   const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
3583   using namespace TargetOpcode;
3584   using namespace MIPatternMatch;
3585
3586   // Begin matching the insert.
3587   auto *InsMI =
3588       getOpcodeDef(G_INSERT_VECTOR_ELT, I.getOperand(1).getReg(), MRI);
3589   if (!InsMI)
3590     return false;
3591   // Match the undef vector operand.
3592   auto *UndefMI =
3593       getOpcodeDef(G_IMPLICIT_DEF, InsMI->getOperand(1).getReg(), MRI);
3594   if (!UndefMI)
3595     return false;
3596   // Match the scalar being splatted.
3597   Register ScalarReg = InsMI->getOperand(2).getReg();
3598   const RegisterBank *ScalarRB = RBI.getRegBank(ScalarReg, MRI, TRI);
3599   // Match the index constant 0.
3600   int64_t Index = 0;
3601   if (!mi_match(InsMI->getOperand(3).getReg(), MRI, m_ICst(Index)) || Index)
3602     return false;
3603
3604   // The shuffle's second operand doesn't matter if the mask is all zero.
3605   const Constant *Mask = I.getOperand(3).getShuffleMask();
3606   if (!isa<ConstantAggregateZero>(Mask))
3607     return false;
3608
3609   // We're done, now find out what kind of splat we need.
3610   LLT VecTy = MRI.getType(I.getOperand(0).getReg());
3611   LLT EltTy = VecTy.getElementType();
3612   if (VecTy.getSizeInBits() != 128 || EltTy.getSizeInBits() < 32) {
3613     LLVM_DEBUG(dbgs() << "Could not optimize splat pattern < 128b yet");
3614     return false;
3615   }
3616   bool IsFP = ScalarRB->getID() == AArch64::FPRRegBankID;
3617   static const unsigned OpcTable[2][2] = {
3618       {AArch64::DUPv4i32gpr, AArch64::DUPv2i64gpr},
3619       {AArch64::DUPv4i32lane, AArch64::DUPv2i64lane}};
3620   unsigned Opc = OpcTable[IsFP][EltTy.getSizeInBits() == 64];
3621
3622   // For FP splats, we need to widen the scalar reg via undef too.
3623   if (IsFP) {
3624     MachineInstr *Widen = emitScalarToVector(
3625         EltTy.getSizeInBits(), &AArch64::FPR128RegClass, ScalarReg, MIB);
3626     if (!Widen)
3627       return false;
3628     ScalarReg = Widen->getOperand(0).getReg();
3629   }
3630   auto Dup = MIB.buildInstr(Opc, {I.getOperand(0).getReg()}, {ScalarReg});
3631   if (IsFP)
3632     Dup.addImm(0);
3633   constrainSelectedInstRegOperands(*Dup, TII, TRI, RBI);
3634   I.eraseFromParent();
3635   return true;
3636 }
3637
3638 bool AArch64InstructionSelector::tryOptVectorShuffle(MachineInstr &I) const {
3639   if (TM.getOptLevel() == CodeGenOpt::None)
3640     return false;
3641   if (tryOptVectorDup(I))
3642     return true;
3643   return false;
3644 }
3645
3646 bool AArch64InstructionSelector::selectShuffleVector(
3647     MachineInstr &I, MachineRegisterInfo &MRI) const {
3648   if (tryOptVectorShuffle(I))
3649     return true;
3650   const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3651   Register Src1Reg = I.getOperand(1).getReg();
3652   const LLT Src1Ty = MRI.getType(Src1Reg);
3653   Register Src2Reg = I.getOperand(2).getReg();
3654   const LLT Src2Ty = MRI.getType(Src2Reg);
3655   const Constant *ShuffleMask = I.getOperand(3).getShuffleMask();
3656
3657   MachineBasicBlock &MBB = *I.getParent();
3658   MachineFunction &MF = *MBB.getParent();
3659   LLVMContext &Ctx = MF.getFunction().getContext();
3660
3661   SmallVector<int, 8> Mask;
3662   ShuffleVectorInst::getShuffleMask(ShuffleMask, Mask);
3663
3664   // G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if
3665   // it's originated from a <1 x T> type. Those should have been lowered into
3666   // G_BUILD_VECTOR earlier.
3667   if (!Src1Ty.isVector() || !Src2Ty.isVector()) {
3668     LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n");
3669     return false;
3670   }
3671
3672   unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8;
3673
3674   SmallVector<Constant *, 64> CstIdxs;
3675   for (int Val : Mask) {
3676     // For now, any undef indexes we'll just assume to be 0. This should be
3677     // optimized in future, e.g. to select DUP etc.
3678     Val = Val < 0 ? 0 : Val;
3679     for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
3680       unsigned Offset = Byte + Val * BytesPerElt;
3681       CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset));
3682     }
3683   }
3684
3685   MachineIRBuilder MIRBuilder(I);
3686
3687   // Use a constant pool to load the index vector for TBL.
3688   Constant *CPVal = ConstantVector::get(CstIdxs);
3689   MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIRBuilder);
3690   if (!IndexLoad) {
3691     LLVM_DEBUG(dbgs() << "Could not load from a constant pool");
3692     return false;
3693   }
3694
3695   if (DstTy.getSizeInBits() != 128) {
3696     assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty");
3697     // This case can be done with TBL1.
3698     MachineInstr *Concat = emitVectorConcat(None, Src1Reg, Src2Reg, MIRBuilder);
3699     if (!Concat) {
3700       LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1");
3701       return false;
3702     }
3703
3704     // The constant pool load will be 64 bits, so need to convert to FPR128 reg.
3705     IndexLoad =
3706         emitScalarToVector(64, &AArch64::FPR128RegClass,
3707                            IndexLoad->getOperand(0).getReg(), MIRBuilder);
3708
3709     auto TBL1 = MIRBuilder.buildInstr(
3710         AArch64::TBLv16i8One, {&AArch64::FPR128RegClass},
3711         {Concat->getOperand(0).getReg(), IndexLoad->getOperand(0).getReg()});
3712     constrainSelectedInstRegOperands(*TBL1, TII, TRI, RBI);
3713
3714     auto Copy =
3715         MIRBuilder
3716             .buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
3717             .addReg(TBL1.getReg(0), 0, AArch64::dsub);
3718     RBI.constrainGenericRegister(Copy.getReg(0), AArch64::FPR64RegClass, MRI);
3719     I.eraseFromParent();
3720     return true;
3721   }
3722
3723   // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive
3724   // Q registers for regalloc.
3725   auto RegSeq = MIRBuilder
3726                     .buildInstr(TargetOpcode::REG_SEQUENCE,
3727                                 {&AArch64::QQRegClass}, {Src1Reg})
3728                     .addImm(AArch64::qsub0)
3729                     .addUse(Src2Reg)
3730                     .addImm(AArch64::qsub1);
3731
3732   auto TBL2 =
3733       MIRBuilder.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0).getReg()},
3734                             {RegSeq, IndexLoad->getOperand(0).getReg()});
3735   constrainSelectedInstRegOperands(*RegSeq, TII, TRI, RBI);
3736   constrainSelectedInstRegOperands(*TBL2, TII, TRI, RBI);
3737   I.eraseFromParent();
3738   return true;
3739 }
3740
3741 MachineInstr *AArch64InstructionSelector::emitLaneInsert(
3742     Optional<Register> DstReg, Register SrcReg, Register EltReg,
3743     unsigned LaneIdx, const RegisterBank &RB,
3744     MachineIRBuilder &MIRBuilder) const {
3745   MachineInstr *InsElt = nullptr;
3746   const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
3747   MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3748
3749   // Create a register to define with the insert if one wasn't passed in.
3750   if (!DstReg)
3751     DstReg = MRI.createVirtualRegister(DstRC);
3752
3753   unsigned EltSize = MRI.getType(EltReg).getSizeInBits();
3754   unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first;
3755
3756   if (RB.getID() == AArch64::FPRRegBankID) {
3757     auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder);
3758     InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
3759                  .addImm(LaneIdx)
3760                  .addUse(InsSub->getOperand(0).getReg())
3761                  .addImm(0);
3762   } else {
3763     InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
3764                  .addImm(LaneIdx)
3765                  .addUse(EltReg);
3766   }
3767
3768   constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
3769   return InsElt;
3770 }
3771
3772 bool AArch64InstructionSelector::selectInsertElt(
3773     MachineInstr &I, MachineRegisterInfo &MRI) const {
3774   assert(I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT);
3775
3776   // Get information on the destination.
3777   Register DstReg = I.getOperand(0).getReg();
3778   const LLT DstTy = MRI.getType(DstReg);
3779   unsigned VecSize = DstTy.getSizeInBits();
3780
3781   // Get information on the element we want to insert into the destination.
3782   Register EltReg = I.getOperand(2).getReg();
3783   const LLT EltTy = MRI.getType(EltReg);
3784   unsigned EltSize = EltTy.getSizeInBits();
3785   if (EltSize < 16 || EltSize > 64)
3786     return false; // Don't support all element types yet.
3787
3788   // Find the definition of the index. Bail out if it's not defined by a
3789   // G_CONSTANT.
3790   Register IdxReg = I.getOperand(3).getReg();
3791   auto VRegAndVal = getConstantVRegValWithLookThrough(IdxReg, MRI);
3792   if (!VRegAndVal)
3793     return false;
3794   unsigned LaneIdx = VRegAndVal->Value;
3795
3796   // Perform the lane insert.
3797   Register SrcReg = I.getOperand(1).getReg();
3798   const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
3799   MachineIRBuilder MIRBuilder(I);
3800
3801   if (VecSize < 128) {
3802     // If the vector we're inserting into is smaller than 128 bits, widen it
3803     // to 128 to do the insert.
3804     MachineInstr *ScalarToVec = emitScalarToVector(
3805         VecSize, &AArch64::FPR128RegClass, SrcReg, MIRBuilder);
3806     if (!ScalarToVec)
3807       return false;
3808     SrcReg = ScalarToVec->getOperand(0).getReg();
3809   }
3810
3811   // Create an insert into a new FPR128 register.
3812   // Note that if our vector is already 128 bits, we end up emitting an extra
3813   // register.
3814   MachineInstr *InsMI =
3815       emitLaneInsert(None, SrcReg, EltReg, LaneIdx, EltRB, MIRBuilder);
3816
3817   if (VecSize < 128) {
3818     // If we had to widen to perform the insert, then we have to demote back to
3819     // the original size to get the result we want.
3820     Register DemoteVec = InsMI->getOperand(0).getReg();
3821     const TargetRegisterClass *RC =
3822         getMinClassForRegBank(*RBI.getRegBank(DemoteVec, MRI, TRI), VecSize);
3823     if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
3824       LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
3825       return false;
3826     }
3827     unsigned SubReg = 0;
3828     if (!getSubRegForClass(RC, TRI, SubReg))
3829       return false;
3830     if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
3831       LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << VecSize
3832                         << "\n");
3833       return false;
3834     }
3835     MIRBuilder.buildInstr(TargetOpcode::COPY, {DstReg}, {})
3836         .addReg(DemoteVec, 0, SubReg);
3837     RBI.constrainGenericRegister(DstReg, *RC, MRI);
3838   } else {
3839     // No widening needed.
3840     InsMI->getOperand(0).setReg(DstReg);
3841     constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
3842   }
3843
3844   I.eraseFromParent();
3845   return true;
3846 }
3847
3848 bool AArch64InstructionSelector::selectBuildVector(
3849     MachineInstr &I, MachineRegisterInfo &MRI) const {
3850   assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
3851   // Until we port more of the optimized selections, for now just use a vector
3852   // insert sequence.
3853   const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3854   const LLT EltTy = MRI.getType(I.getOperand(1).getReg());
3855   unsigned EltSize = EltTy.getSizeInBits();
3856   if (EltSize < 16 || EltSize > 64)
3857     return false; // Don't support all element types yet.
3858   const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
3859   MachineIRBuilder MIRBuilder(I);
3860
3861   const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
3862   MachineInstr *ScalarToVec =
3863       emitScalarToVector(DstTy.getElementType().getSizeInBits(), DstRC,
3864                          I.getOperand(1).getReg(), MIRBuilder);
3865   if (!ScalarToVec)
3866     return false;
3867
3868   Register DstVec = ScalarToVec->getOperand(0).getReg();
3869   unsigned DstSize = DstTy.getSizeInBits();
3870
3871   // Keep track of the last MI we inserted. Later on, we might be able to save
3872   // a copy using it.
3873   MachineInstr *PrevMI = nullptr;
3874   for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) {
3875     // Note that if we don't do a subregister copy, we can end up making an
3876     // extra register.
3877     PrevMI = &*emitLaneInsert(None, DstVec, I.getOperand(i).getReg(), i - 1, RB,
3878                               MIRBuilder);
3879     DstVec = PrevMI->getOperand(0).getReg();
3880   }
3881
3882   // If DstTy's size in bits is less than 128, then emit a subregister copy
3883   // from DstVec to the last register we've defined.
3884   if (DstSize < 128) {
3885     // Force this to be FPR using the destination vector.
3886     const TargetRegisterClass *RC =
3887         getMinClassForRegBank(*RBI.getRegBank(DstVec, MRI, TRI), DstSize);
3888     if (!RC)
3889       return false;
3890     if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
3891       LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
3892       return false;
3893     }
3894
3895     unsigned SubReg = 0;
3896     if (!getSubRegForClass(RC, TRI, SubReg))
3897       return false;
3898     if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
3899       LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSize
3900                         << "\n");
3901       return false;
3902     }
3903
3904     Register Reg = MRI.createVirtualRegister(RC);
3905     Register DstReg = I.getOperand(0).getReg();
3906
3907     MIRBuilder.buildInstr(TargetOpcode::COPY, {DstReg}, {})
3908         .addReg(DstVec, 0, SubReg);
3909     MachineOperand &RegOp = I.getOperand(1);
3910     RegOp.setReg(Reg);
3911     RBI.constrainGenericRegister(DstReg, *RC, MRI);
3912   } else {
3913     // We don't need a subregister copy. Save a copy by re-using the
3914     // destination register on the final insert.
3915     assert(PrevMI && "PrevMI was null?");
3916     PrevMI->getOperand(0).setReg(I.getOperand(0).getReg());
3917     constrainSelectedInstRegOperands(*PrevMI, TII, TRI, RBI);
3918   }
3919
3920   I.eraseFromParent();
3921   return true;
3922 }
3923
3924 /// Helper function to find an intrinsic ID on an a MachineInstr. Returns the
3925 /// ID if it exists, and 0 otherwise.
3926 static unsigned findIntrinsicID(MachineInstr &I) {
3927   auto IntrinOp = find_if(I.operands(), [&](const MachineOperand &Op) {
3928     return Op.isIntrinsicID();
3929   });
3930   if (IntrinOp == I.operands_end())
3931     return 0;
3932   return IntrinOp->getIntrinsicID();
3933 }
3934
3935 bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
3936     MachineInstr &I, MachineRegisterInfo &MRI) const {
3937   // Find the intrinsic ID.
3938   unsigned IntrinID = findIntrinsicID(I);
3939   if (!IntrinID)
3940     return false;
3941   MachineIRBuilder MIRBuilder(I);
3942
3943   // Select the instruction.
3944   switch (IntrinID) {
3945   default:
3946     return false;
3947   case Intrinsic::trap:
3948     MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(1);
3949     break;
3950   case Intrinsic::debugtrap:
3951     if (!STI.isTargetWindows())
3952       return false;
3953     MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(0xF000);
3954     break;
3955   }
3956
3957   I.eraseFromParent();
3958   return true;
3959 }
3960
3961 bool AArch64InstructionSelector::selectIntrinsic(
3962     MachineInstr &I, MachineRegisterInfo &MRI) const {
3963   unsigned IntrinID = findIntrinsicID(I);
3964   if (!IntrinID)
3965     return false;
3966   MachineIRBuilder MIRBuilder(I);
3967
3968   switch (IntrinID) {
3969   default:
3970     break;
3971   case Intrinsic::aarch64_crypto_sha1h:
3972     Register DstReg = I.getOperand(0).getReg();
3973     Register SrcReg = I.getOperand(2).getReg();
3974
3975     // FIXME: Should this be an assert?
3976     if (MRI.getType(DstReg).getSizeInBits() != 32 ||
3977         MRI.getType(SrcReg).getSizeInBits() != 32)
3978       return false;
3979
3980     // The operation has to happen on FPRs. Set up some new FPR registers for
3981     // the source and destination if they are on GPRs.
3982     if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
3983       SrcReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
3984       MIRBuilder.buildCopy({SrcReg}, {I.getOperand(2)});
3985
3986       // Make sure the copy ends up getting constrained properly.
3987       RBI.constrainGenericRegister(I.getOperand(2).getReg(),
3988                                    AArch64::GPR32RegClass, MRI);
3989     }
3990
3991     if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID)
3992       DstReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
3993
3994     // Actually insert the instruction.
3995     auto SHA1Inst = MIRBuilder.buildInstr(AArch64::SHA1Hrr, {DstReg}, {SrcReg});
3996     constrainSelectedInstRegOperands(*SHA1Inst, TII, TRI, RBI);
3997
3998     // Did we create a new register for the destination?
3999     if (DstReg != I.getOperand(0).getReg()) {
4000       // Yep. Copy the result of the instruction back into the original
4001       // destination.
4002       MIRBuilder.buildCopy({I.getOperand(0)}, {DstReg});
4003       RBI.constrainGenericRegister(I.getOperand(0).getReg(),
4004                                    AArch64::GPR32RegClass, MRI);
4005     }
4006
4007     I.eraseFromParent();
4008     return true;
4009   }
4010   return false;
4011 }
4012
4013 static Optional<uint64_t> getImmedFromMO(const MachineOperand &Root) {
4014   auto &MI = *Root.getParent();
4015   auto &MBB = *MI.getParent();
4016   auto &MF = *MBB.getParent();
4017   auto &MRI = MF.getRegInfo();
4018   uint64_t Immed;
4019   if (Root.isImm())
4020     Immed = Root.getImm();
4021   else if (Root.isCImm())
4022     Immed = Root.getCImm()->getZExtValue();
4023   else if (Root.isReg()) {
4024     auto ValAndVReg =
4025         getConstantVRegValWithLookThrough(Root.getReg(), MRI, true);
4026     if (!ValAndVReg)
4027       return None;
4028     Immed = ValAndVReg->Value;
4029   } else
4030     return None;
4031   return Immed;
4032 }
4033
4034 InstructionSelector::ComplexRendererFns
4035 AArch64InstructionSelector::selectShiftA_32(const MachineOperand &Root) const {
4036   auto MaybeImmed = getImmedFromMO(Root);
4037   if (MaybeImmed == None || *MaybeImmed > 31)
4038     return None;
4039   uint64_t Enc = (32 - *MaybeImmed) & 0x1f;
4040   return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
4041 }
4042
4043 InstructionSelector::ComplexRendererFns
4044 AArch64InstructionSelector::selectShiftB_32(const MachineOperand &Root) const {
4045   auto MaybeImmed = getImmedFromMO(Root);
4046   if (MaybeImmed == None || *MaybeImmed > 31)
4047     return None;
4048   uint64_t Enc = 31 - *MaybeImmed;
4049   return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
4050 }
4051
4052 InstructionSelector::ComplexRendererFns
4053 AArch64InstructionSelector::selectShiftA_64(const MachineOperand &Root) const {
4054   auto MaybeImmed = getImmedFromMO(Root);
4055   if (MaybeImmed == None || *MaybeImmed > 63)
4056     return None;
4057   uint64_t Enc = (64 - *MaybeImmed) & 0x3f;
4058   return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
4059 }
4060
4061 InstructionSelector::ComplexRendererFns
4062 AArch64InstructionSelector::selectShiftB_64(const MachineOperand &Root) const {
4063   auto MaybeImmed = getImmedFromMO(Root);
4064   if (MaybeImmed == None || *MaybeImmed > 63)
4065     return None;
4066   uint64_t Enc = 63 - *MaybeImmed;
4067   return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
4068 }
4069
4070 /// Helper to select an immediate value that can be represented as a 12-bit
4071 /// value shifted left by either 0 or 12. If it is possible to do so, return
4072 /// the immediate and shift value. If not, return None.
4073 ///
4074 /// Used by selectArithImmed and selectNegArithImmed.
4075 InstructionSelector::ComplexRendererFns
4076 AArch64InstructionSelector::select12BitValueWithLeftShift(
4077     uint64_t Immed) const {
4078   unsigned ShiftAmt;
4079   if (Immed >> 12 == 0) {
4080     ShiftAmt = 0;
4081   } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
4082     ShiftAmt = 12;
4083     Immed = Immed >> 12;
4084   } else
4085     return None;
4086
4087   unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
4088   return {{
4089       [=](MachineInstrBuilder &MIB) { MIB.addImm(Immed); },
4090       [=](MachineInstrBuilder &MIB) { MIB.addImm(ShVal); },
4091   }};
4092 }
4093
4094 /// SelectArithImmed - Select an immediate value that can be represented as
4095 /// a 12-bit value shifted left by either 0 or 12.  If so, return true with
4096 /// Val set to the 12-bit value and Shift set to the shifter operand.
4097 InstructionSelector::ComplexRendererFns
4098 AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const {
4099   // This function is called from the addsub_shifted_imm ComplexPattern,
4100   // which lists [imm] as the list of opcode it's interested in, however
4101   // we still need to check whether the operand is actually an immediate
4102   // here because the ComplexPattern opcode list is only used in
4103   // root-level opcode matching.
4104   auto MaybeImmed = getImmedFromMO(Root);
4105   if (MaybeImmed == None)
4106     return None;
4107   return select12BitValueWithLeftShift(*MaybeImmed);
4108 }
4109
4110 /// SelectNegArithImmed - As above, but negates the value before trying to
4111 /// select it.
4112 InstructionSelector::ComplexRendererFns
4113 AArch64InstructionSelector::selectNegArithImmed(MachineOperand &Root) const {
4114   // We need a register here, because we need to know if we have a 64 or 32
4115   // bit immediate.
4116   if (!Root.isReg())
4117     return None;
4118   auto MaybeImmed = getImmedFromMO(Root);
4119   if (MaybeImmed == None)
4120     return None;
4121   uint64_t Immed = *MaybeImmed;
4122
4123   // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
4124   // have the opposite effect on the C flag, so this pattern mustn't match under
4125   // those circumstances.
4126   if (Immed == 0)
4127     return None;
4128
4129   // Check if we're dealing with a 32-bit type on the root or a 64-bit type on
4130   // the root.
4131   MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
4132   if (MRI.getType(Root.getReg()).getSizeInBits() == 32)
4133     Immed = ~((uint32_t)Immed) + 1;
4134   else
4135     Immed = ~Immed + 1ULL;
4136
4137   if (Immed & 0xFFFFFFFFFF000000ULL)
4138     return None;
4139
4140   Immed &= 0xFFFFFFULL;
4141   return select12BitValueWithLeftShift(Immed);
4142 }
4143
4144 /// Return true if it is worth folding MI into an extended register. That is,
4145 /// if it's safe to pull it into the addressing mode of a load or store as a
4146 /// shift.
4147 bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg(
4148     MachineInstr &MI, const MachineRegisterInfo &MRI) const {
4149   // Always fold if there is one use, or if we're optimizing for size.
4150   Register DefReg = MI.getOperand(0).getReg();
4151   if (MRI.hasOneUse(DefReg) ||
4152       MI.getParent()->getParent()->getFunction().hasMinSize())
4153     return true;
4154
4155   // It's better to avoid folding and recomputing shifts when we don't have a
4156   // fastpath.
4157   if (!STI.hasLSLFast())
4158     return false;
4159
4160   // We have a fastpath, so folding a shift in and potentially computing it
4161   // many times may be beneficial. Check if this is only used in memory ops.
4162   // If it is, then we should fold.
4163   return all_of(MRI.use_instructions(DefReg),
4164                 [](MachineInstr &Use) { return Use.mayLoadOrStore(); });
4165 }
4166
4167 /// This is used for computing addresses like this:
4168 ///
4169 /// ldr x1, [x2, x3, lsl #3]
4170 ///
4171 /// Where x2 is the base register, and x3 is an offset register. The shift-left
4172 /// is a constant value specific to this load instruction. That is, we'll never
4173 /// see anything other than a 3 here (which corresponds to the size of the
4174 /// element being loaded.)
4175 InstructionSelector::ComplexRendererFns
4176 AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
4177     MachineOperand &Root, unsigned SizeInBytes) const {
4178   if (!Root.isReg())
4179     return None;
4180   MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
4181
4182   // Make sure that the memory op is a valid size.
4183   int64_t LegalShiftVal = Log2_32(SizeInBytes);
4184   if (LegalShiftVal == 0)
4185     return None;
4186
4187   // We want to find something like this:
4188   //
4189   // val = G_CONSTANT LegalShiftVal
4190   // shift = G_SHL off_reg val
4191   // ptr = G_GEP base_reg shift
4192   // x = G_LOAD ptr
4193   //
4194   // And fold it into this addressing mode:
4195   //
4196   // ldr x, [base_reg, off_reg, lsl #LegalShiftVal]
4197
4198   // Check if we can find the G_GEP.
4199   MachineInstr *Gep = getOpcodeDef(TargetOpcode::G_GEP, Root.getReg(), MRI);
4200   if (!Gep || !isWorthFoldingIntoExtendedReg(*Gep, MRI))
4201     return None;
4202
4203   // Now, try to match an opcode which will match our specific offset.
4204   // We want a G_SHL or a G_MUL.
4205   MachineInstr *OffsetInst = getDefIgnoringCopies(Gep->getOperand(2).getReg(), MRI);
4206   if (!OffsetInst)
4207     return None;
4208
4209   unsigned OffsetOpc = OffsetInst->getOpcode();
4210   if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL)
4211     return None;
4212
4213   if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI))
4214     return None;
4215
4216   // Now, try to find the specific G_CONSTANT. Start by assuming that the
4217   // register we will offset is the LHS, and the register containing the
4218   // constant is the RHS.
4219   Register OffsetReg = OffsetInst->getOperand(1).getReg();
4220   Register ConstantReg = OffsetInst->getOperand(2).getReg();
4221   auto ValAndVReg = getConstantVRegValWithLookThrough(ConstantReg, MRI);
4222   if (!ValAndVReg) {
4223     // We didn't get a constant on the RHS. If the opcode is a shift, then
4224     // we're done.
4225     if (OffsetOpc == TargetOpcode::G_SHL)
4226       return None;
4227
4228     // If we have a G_MUL, we can use either register. Try looking at the RHS.
4229     std::swap(OffsetReg, ConstantReg);
4230     ValAndVReg = getConstantVRegValWithLookThrough(ConstantReg, MRI);
4231     if (!ValAndVReg)
4232       return None;
4233   }
4234
4235   // The value must fit into 3 bits, and must be positive. Make sure that is
4236   // true.
4237   int64_t ImmVal = ValAndVReg->Value;
4238
4239   // Since we're going to pull this into a shift, the constant value must be
4240   // a power of 2. If we got a multiply, then we need to check this.
4241   if (OffsetOpc == TargetOpcode::G_MUL) {
4242     if (!isPowerOf2_32(ImmVal))
4243       return None;
4244
4245     // Got a power of 2. So, the amount we'll shift is the log base-2 of that.
4246     ImmVal = Log2_32(ImmVal);
4247   }
4248
4249   if ((ImmVal & 0x7) != ImmVal)
4250     return None;
4251
4252   // We are only allowed to shift by LegalShiftVal. This shift value is built
4253   // into the instruction, so we can't just use whatever we want.
4254   if (ImmVal != LegalShiftVal)
4255     return None;
4256
4257   // We can use the LHS of the GEP as the base, and the LHS of the shift as an
4258   // offset. Signify that we are shifting by setting the shift flag to 1.
4259   return {{[=](MachineInstrBuilder &MIB) {
4260              MIB.addUse(Gep->getOperand(1).getReg());
4261            },
4262            [=](MachineInstrBuilder &MIB) { MIB.addUse(OffsetReg); },
4263            [=](MachineInstrBuilder &MIB) {
4264              // Need to add both immediates here to make sure that they are both
4265              // added to the instruction.
4266              MIB.addImm(0);
4267              MIB.addImm(1);
4268            }}};
4269 }
4270
4271 /// This is used for computing addresses like this:
4272 ///
4273 /// ldr x1, [x2, x3]
4274 ///
4275 /// Where x2 is the base register, and x3 is an offset register.
4276 ///
4277 /// When possible (or profitable) to fold a G_GEP into the address calculation,
4278 /// this will do so. Otherwise, it will return None.
4279 InstructionSelector::ComplexRendererFns
4280 AArch64InstructionSelector::selectAddrModeRegisterOffset(
4281     MachineOperand &Root) const {
4282   MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
4283
4284   // We need a GEP.
4285   MachineInstr *Gep = MRI.getVRegDef(Root.getReg());
4286   if (!Gep || Gep->getOpcode() != TargetOpcode::G_GEP)
4287     return None;
4288
4289   // If this is used more than once, let's not bother folding.
4290   // TODO: Check if they are memory ops. If they are, then we can still fold
4291   // without having to recompute anything.
4292   if (!MRI.hasOneUse(Gep->getOperand(0).getReg()))
4293     return None;
4294
4295   // Base is the GEP's LHS, offset is its RHS.
4296   return {{[=](MachineInstrBuilder &MIB) {
4297              MIB.addUse(Gep->getOperand(1).getReg());
4298            },
4299            [=](MachineInstrBuilder &MIB) {
4300              MIB.addUse(Gep->getOperand(2).getReg());
4301            },
4302            [=](MachineInstrBuilder &MIB) {
4303              // Need to add both immediates here to make sure that they are both
4304              // added to the instruction.
4305              MIB.addImm(0);
4306              MIB.addImm(0);
4307            }}};
4308 }
4309
4310 /// This is intended to be equivalent to selectAddrModeXRO in
4311 /// AArch64ISelDAGtoDAG. It's used for selecting X register offset loads.
4312 InstructionSelector::ComplexRendererFns
4313 AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root,
4314                                               unsigned SizeInBytes) const {
4315   MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
4316
4317   // If we have a constant offset, then we probably don't want to match a
4318   // register offset.
4319   if (isBaseWithConstantOffset(Root, MRI))
4320     return None;
4321
4322   // Try to fold shifts into the addressing mode.
4323   auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes);
4324   if (AddrModeFns)
4325     return AddrModeFns;
4326
4327   // If that doesn't work, see if it's possible to fold in registers from
4328   // a GEP.
4329   return selectAddrModeRegisterOffset(Root);
4330 }
4331
4332 /// Select a "register plus unscaled signed 9-bit immediate" address.  This
4333 /// should only match when there is an offset that is not valid for a scaled
4334 /// immediate addressing mode.  The "Size" argument is the size in bytes of the
4335 /// memory reference, which is needed here to know what is valid for a scaled
4336 /// immediate.
4337 InstructionSelector::ComplexRendererFns
4338 AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root,
4339                                                    unsigned Size) const {
4340   MachineRegisterInfo &MRI =
4341       Root.getParent()->getParent()->getParent()->getRegInfo();
4342
4343   if (!Root.isReg())
4344     return None;
4345
4346   if (!isBaseWithConstantOffset(Root, MRI))
4347     return None;
4348
4349   MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
4350   if (!RootDef)
4351     return None;
4352
4353   MachineOperand &OffImm = RootDef->getOperand(2);
4354   if (!OffImm.isReg())
4355     return None;
4356   MachineInstr *RHS = MRI.getVRegDef(OffImm.getReg());
4357   if (!RHS || RHS->getOpcode() != TargetOpcode::G_CONSTANT)
4358     return None;
4359   int64_t RHSC;
4360   MachineOperand &RHSOp1 = RHS->getOperand(1);
4361   if (!RHSOp1.isCImm() || RHSOp1.getCImm()->getBitWidth() > 64)
4362     return None;
4363   RHSC = RHSOp1.getCImm()->getSExtValue();
4364
4365   // If the offset is valid as a scaled immediate, don't match here.
4366   if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Log2_32(Size)))
4367     return None;
4368   if (RHSC >= -256 && RHSC < 256) {
4369     MachineOperand &Base = RootDef->getOperand(1);
4370     return {{
4371         [=](MachineInstrBuilder &MIB) { MIB.add(Base); },
4372         [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC); },
4373     }};
4374   }
4375   return None;
4376 }
4377
4378 /// Select a "register plus scaled unsigned 12-bit immediate" address.  The
4379 /// "Size" argument is the size in bytes of the memory reference, which
4380 /// determines the scale.
4381 InstructionSelector::ComplexRendererFns
4382 AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root,
4383                                                   unsigned Size) const {
4384   MachineRegisterInfo &MRI =
4385       Root.getParent()->getParent()->getParent()->getRegInfo();
4386
4387   if (!Root.isReg())
4388     return None;
4389
4390   MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
4391   if (!RootDef)
4392     return None;
4393
4394   if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
4395     return {{
4396         [=](MachineInstrBuilder &MIB) { MIB.add(RootDef->getOperand(1)); },
4397         [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
4398     }};
4399   }
4400
4401   if (isBaseWithConstantOffset(Root, MRI)) {
4402     MachineOperand &LHS = RootDef->getOperand(1);
4403     MachineOperand &RHS = RootDef->getOperand(2);
4404     MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg());
4405     MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg());
4406     if (LHSDef && RHSDef) {
4407       int64_t RHSC = (int64_t)RHSDef->getOperand(1).getCImm()->getZExtValue();
4408       unsigned Scale = Log2_32(Size);
4409       if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
4410         if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
4411           return {{
4412               [=](MachineInstrBuilder &MIB) { MIB.add(LHSDef->getOperand(1)); },
4413               [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
4414           }};
4415
4416         return {{
4417             [=](MachineInstrBuilder &MIB) { MIB.add(LHS); },
4418             [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
4419         }};
4420       }
4421     }
4422   }
4423
4424   // Before falling back to our general case, check if the unscaled
4425   // instructions can handle this. If so, that's preferable.
4426   if (selectAddrModeUnscaled(Root, Size).hasValue())
4427     return None;
4428
4429   return {{
4430       [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
4431       [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
4432   }};
4433 }
4434
4435 /// Given a shift instruction, return the correct shift type for that
4436 /// instruction.
4437 static AArch64_AM::ShiftExtendType getShiftTypeForInst(MachineInstr &MI) {
4438   // TODO: Handle AArch64_AM::ROR
4439   switch (MI.getOpcode()) {
4440   default:
4441     return AArch64_AM::InvalidShiftExtend;
4442   case TargetOpcode::G_SHL:
4443     return AArch64_AM::LSL;
4444   case TargetOpcode::G_LSHR:
4445     return AArch64_AM::LSR;
4446   case TargetOpcode::G_ASHR:
4447     return AArch64_AM::ASR;
4448   }
4449 }
4450
4451 /// Select a "shifted register" operand. If the value is not shifted, set the
4452 /// shift operand to a default value of "lsl 0".
4453 ///
4454 /// TODO: Allow shifted register to be rotated in logical instructions.
4455 InstructionSelector::ComplexRendererFns
4456 AArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root) const {
4457   if (!Root.isReg())
4458     return None;
4459   MachineRegisterInfo &MRI =
4460       Root.getParent()->getParent()->getParent()->getRegInfo();
4461
4462   // Check if the operand is defined by an instruction which corresponds to
4463   // a ShiftExtendType. E.g. a G_SHL, G_LSHR, etc.
4464   //
4465   // TODO: Handle AArch64_AM::ROR for logical instructions.
4466   MachineInstr *ShiftInst = MRI.getVRegDef(Root.getReg());
4467   if (!ShiftInst)
4468     return None;
4469   AArch64_AM::ShiftExtendType ShType = getShiftTypeForInst(*ShiftInst);
4470   if (ShType == AArch64_AM::InvalidShiftExtend)
4471     return None;
4472   if (!isWorthFoldingIntoExtendedReg(*ShiftInst, MRI))
4473     return None;
4474
4475   // Need an immediate on the RHS.
4476   MachineOperand &ShiftRHS = ShiftInst->getOperand(2);
4477   auto Immed = getImmedFromMO(ShiftRHS);
4478   if (!Immed)
4479     return None;
4480
4481   // We have something that we can fold. Fold in the shift's LHS and RHS into
4482   // the instruction.
4483   MachineOperand &ShiftLHS = ShiftInst->getOperand(1);
4484   Register ShiftReg = ShiftLHS.getReg();
4485
4486   unsigned NumBits = MRI.getType(ShiftReg).getSizeInBits();
4487   unsigned Val = *Immed & (NumBits - 1);
4488   unsigned ShiftVal = AArch64_AM::getShifterImm(ShType, Val);
4489
4490   return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ShiftReg); },
4491            [=](MachineInstrBuilder &MIB) { MIB.addImm(ShiftVal); }}};
4492 }
4493
4494 /// Get the correct ShiftExtendType for an extend instruction.
4495 static AArch64_AM::ShiftExtendType
4496 getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI) {
4497   unsigned Opc = MI.getOpcode();
4498
4499   // Handle explicit extend instructions first.
4500   if (Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG) {
4501     unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
4502     assert(Size != 64 && "Extend from 64 bits?");
4503     switch (Size) {
4504     case 8:
4505       return AArch64_AM::SXTB;
4506     case 16:
4507       return AArch64_AM::SXTH;
4508     case 32:
4509       return AArch64_AM::SXTW;
4510     default:
4511       return AArch64_AM::InvalidShiftExtend;
4512     }
4513   }
4514
4515   if (Opc == TargetOpcode::G_ZEXT || Opc == TargetOpcode::G_ANYEXT) {
4516     unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
4517     assert(Size != 64 && "Extend from 64 bits?");
4518     switch (Size) {
4519     case 8:
4520       return AArch64_AM::UXTB;
4521     case 16:
4522       return AArch64_AM::UXTH;
4523     case 32:
4524       return AArch64_AM::UXTW;
4525     default:
4526       return AArch64_AM::InvalidShiftExtend;
4527     }
4528   }
4529
4530   // Don't have an explicit extend. Try to handle a G_AND with a constant mask
4531   // on the RHS.
4532   if (Opc != TargetOpcode::G_AND)
4533     return AArch64_AM::InvalidShiftExtend;
4534
4535   Optional<uint64_t> MaybeAndMask = getImmedFromMO(MI.getOperand(2));
4536   if (!MaybeAndMask)
4537     return AArch64_AM::InvalidShiftExtend;
4538   uint64_t AndMask = *MaybeAndMask;
4539   switch (AndMask) {
4540   default:
4541     return AArch64_AM::InvalidShiftExtend;
4542   case 0xFF:
4543     return AArch64_AM::UXTB;
4544   case 0xFFFF:
4545     return AArch64_AM::UXTH;
4546   case 0xFFFFFFFF:
4547     return AArch64_AM::UXTW;
4548   }
4549 }
4550
4551 Register AArch64InstructionSelector::narrowExtendRegIfNeeded(
4552     Register ExtReg, MachineIRBuilder &MIB) const {
4553   MachineRegisterInfo &MRI = *MIB.getMRI();
4554   if (MRI.getType(ExtReg).getSizeInBits() == 32)
4555     return ExtReg;
4556
4557   // Insert a copy to move ExtReg to GPR32.
4558   Register NarrowReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
4559   auto Copy = MIB.buildCopy({NarrowReg}, {ExtReg});
4560
4561   // Select the copy into a subregister copy.
4562   selectCopy(*Copy, TII, MRI, TRI, RBI);
4563   return Copy.getReg(0);
4564 }
4565
4566 /// Select an "extended register" operand. This operand folds in an extend
4567 /// followed by an optional left shift.
4568 InstructionSelector::ComplexRendererFns
4569 AArch64InstructionSelector::selectArithExtendedRegister(
4570     MachineOperand &Root) const {
4571   if (!Root.isReg())
4572     return None;
4573   MachineRegisterInfo &MRI =
4574       Root.getParent()->getParent()->getParent()->getRegInfo();
4575
4576   uint64_t ShiftVal = 0;
4577   Register ExtReg;
4578   AArch64_AM::ShiftExtendType Ext;
4579   MachineInstr *RootDef = getDefIgnoringCopies(Root.getReg(), MRI);
4580   if (!RootDef)
4581     return None;
4582
4583   if (!isWorthFoldingIntoExtendedReg(*RootDef, MRI))
4584     return None;
4585
4586   // Check if we can fold a shift and an extend.
4587   if (RootDef->getOpcode() == TargetOpcode::G_SHL) {
4588     // Look for a constant on the RHS of the shift.
4589     MachineOperand &RHS = RootDef->getOperand(2);
4590     Optional<uint64_t> MaybeShiftVal = getImmedFromMO(RHS);
4591     if (!MaybeShiftVal)
4592       return None;
4593     ShiftVal = *MaybeShiftVal;
4594     if (ShiftVal > 4)
4595       return None;
4596     // Look for a valid extend instruction on the LHS of the shift.
4597     MachineOperand &LHS = RootDef->getOperand(1);
4598     MachineInstr *ExtDef = getDefIgnoringCopies(LHS.getReg(), MRI);
4599     if (!ExtDef)
4600       return None;
4601     Ext = getExtendTypeForInst(*ExtDef, MRI);
4602     if (Ext == AArch64_AM::InvalidShiftExtend)
4603       return None;
4604     ExtReg = ExtDef->getOperand(1).getReg();
4605   } else {
4606     // Didn't get a shift. Try just folding an extend.
4607     Ext = getExtendTypeForInst(*RootDef, MRI);
4608     if (Ext == AArch64_AM::InvalidShiftExtend)
4609       return None;
4610     ExtReg = RootDef->getOperand(1).getReg();
4611
4612     // If we have a 32 bit instruction which zeroes out the high half of a
4613     // register, we get an implicit zero extend for free. Check if we have one.
4614     // FIXME: We actually emit the extend right now even though we don't have
4615     // to.
4616     if (Ext == AArch64_AM::UXTW && MRI.getType(ExtReg).getSizeInBits() == 32) {
4617       MachineInstr *ExtInst = MRI.getVRegDef(ExtReg);
4618       if (ExtInst && isDef32(*ExtInst))
4619         return None;
4620     }
4621   }
4622
4623   // We require a GPR32 here. Narrow the ExtReg if needed using a subregister
4624   // copy.
4625   MachineIRBuilder MIB(*RootDef);
4626   ExtReg = narrowExtendRegIfNeeded(ExtReg, MIB);
4627
4628   return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },
4629            [=](MachineInstrBuilder &MIB) {
4630              MIB.addImm(getArithExtendImm(Ext, ShiftVal));
4631            }}};
4632 }
4633
4634 void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB,
4635                                                 const MachineInstr &MI) const {
4636   const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
4637   assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && "Expected G_CONSTANT");
4638   Optional<int64_t> CstVal = getConstantVRegVal(MI.getOperand(0).getReg(), MRI);
4639   assert(CstVal && "Expected constant value");
4640   MIB.addImm(CstVal.getValue());
4641 }
4642
4643 void AArch64InstructionSelector::renderLogicalImm32(
4644     MachineInstrBuilder &MIB, const MachineInstr &I) const {
4645   assert(I.getOpcode() == TargetOpcode::G_CONSTANT && "Expected G_CONSTANT");
4646   uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
4647   uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 32);
4648   MIB.addImm(Enc);
4649 }
4650
4651 void AArch64InstructionSelector::renderLogicalImm64(
4652     MachineInstrBuilder &MIB, const MachineInstr &I) const {
4653   assert(I.getOpcode() == TargetOpcode::G_CONSTANT && "Expected G_CONSTANT");
4654   uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
4655   uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 64);
4656   MIB.addImm(Enc);
4657 }
4658
4659 bool AArch64InstructionSelector::isLoadStoreOfNumBytes(
4660     const MachineInstr &MI, unsigned NumBytes) const {
4661   if (!MI.mayLoadOrStore())
4662     return false;
4663   assert(MI.hasOneMemOperand() &&
4664          "Expected load/store to have only one mem op!");
4665   return (*MI.memoperands_begin())->getSize() == NumBytes;
4666 }
4667
4668 bool AArch64InstructionSelector::isDef32(const MachineInstr &MI) const {
4669   const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
4670   if (MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() != 32)
4671     return false;
4672
4673   // Only return true if we know the operation will zero-out the high half of
4674   // the 64-bit register. Truncates can be subregister copies, which don't
4675   // zero out the high bits. Copies and other copy-like instructions can be
4676   // fed by truncates, or could be lowered as subregister copies.
4677   switch (MI.getOpcode()) {
4678   default:
4679     return true;
4680   case TargetOpcode::COPY:
4681   case TargetOpcode::G_BITCAST:
4682   case TargetOpcode::G_TRUNC:
4683   case TargetOpcode::G_PHI:
4684     return false;
4685   }
4686 }
4687
4688 namespace llvm {
4689 InstructionSelector *
4690 createAArch64InstructionSelector(const AArch64TargetMachine &TM,
4691                                  AArch64Subtarget &Subtarget,
4692                                  AArch64RegisterBankInfo &RBI) {
4693   return new AArch64InstructionSelector(TM, Subtarget, RBI);
4694 }
4695 }