lib/Target/ARM/ARMISelDAGToDAG.cpp

   1 //===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This file defines an instruction selector for the ARM target.
  10 //
  11 //===----------------------------------------------------------------------===//
  12
  13 #include "ARM.h"
  14 #include "ARMBaseInstrInfo.h"
  15 #include "ARMTargetMachine.h"
  16 #include "MCTargetDesc/ARMAddressingModes.h"
  17 #include "Utils/ARMBaseInfo.h"
  18 #include "llvm/ADT/StringSwitch.h"
  19 #include "llvm/CodeGen/MachineFrameInfo.h"
  20 #include "llvm/CodeGen/MachineFunction.h"
  21 #include "llvm/CodeGen/MachineInstrBuilder.h"
  22 #include "llvm/CodeGen/MachineRegisterInfo.h"
  23 #include "llvm/CodeGen/SelectionDAG.h"
  24 #include "llvm/CodeGen/SelectionDAGISel.h"
  25 #include "llvm/CodeGen/TargetLowering.h"
  26 #include "llvm/IR/CallingConv.h"
  27 #include "llvm/IR/Constants.h"
  28 #include "llvm/IR/DerivedTypes.h"
  29 #include "llvm/IR/Function.h"
  30 #include "llvm/IR/Intrinsics.h"
  31 #include "llvm/IR/LLVMContext.h"
  32 #include "llvm/Support/CommandLine.h"
  33 #include "llvm/Support/Debug.h"
  34 #include "llvm/Support/ErrorHandling.h"
  35 #include "llvm/Target/TargetOptions.h"
  36
  37 using namespace llvm;
  38
  39 #define DEBUG_TYPE "arm-isel"
  40
  41 static cl::opt<bool>
  42 DisableShifterOp("disable-shifter-op", cl::Hidden,
  43   cl::desc("Disable isel of shifter-op"),
  44   cl::init(false));
  45
  46 //===--------------------------------------------------------------------===//
  47 /// ARMDAGToDAGISel - ARM specific code to select ARM machine
  48 /// instructions for SelectionDAG operations.
  49 ///
  50 namespace {
  51
  52 class ARMDAGToDAGISel : public SelectionDAGISel {
  53   /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
  54   /// make the right decision when generating code for different targets.
  55   const ARMSubtarget *Subtarget;
  56
  57 public:
  58   explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOpt::Level OptLevel)
  59       : SelectionDAGISel(tm, OptLevel) {}
  60
  61   bool runOnMachineFunction(MachineFunction &MF) override {
  62     // Reset the subtarget each time through.
  63     Subtarget = &MF.getSubtarget<ARMSubtarget>();
  64     SelectionDAGISel::runOnMachineFunction(MF);
  65     return true;
  66   }
  67
  68   StringRef getPassName() const override { return "ARM Instruction Selection"; }
  69
  70   void PreprocessISelDAG() override;
  71
  72   /// getI32Imm - Return a target constant of type i32 with the specified
  73   /// value.
  74   inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
  75     return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
  76   }
  77
  78   void Select(SDNode *N) override;
  79
  80   bool hasNoVMLxHazardUse(SDNode *N) const;
  81   bool isShifterOpProfitable(const SDValue &Shift,
  82                              ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt);
  83   bool SelectRegShifterOperand(SDValue N, SDValue &A,
  84                                SDValue &B, SDValue &C,
  85                                bool CheckProfitability = true);
  86   bool SelectImmShifterOperand(SDValue N, SDValue &A,
  87                                SDValue &B, bool CheckProfitability = true);
  88   bool SelectShiftRegShifterOperand(SDValue N, SDValue &A,
  89                                     SDValue &B, SDValue &C) {
  90     // Don't apply the profitability check
  91     return SelectRegShifterOperand(N, A, B, C, false);
  92   }
  93   bool SelectShiftImmShifterOperand(SDValue N, SDValue &A,
  94                                     SDValue &B) {
  95     // Don't apply the profitability check
  96     return SelectImmShifterOperand(N, A, B, false);
  97   }
  98
  99   bool SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out);
 100
 101   bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
 102   bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);
 103
 104   bool SelectCMOVPred(SDValue N, SDValue &Pred, SDValue &Reg) {
 105     const ConstantSDNode *CN = cast<ConstantSDNode>(N);
 106     Pred = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(N), MVT::i32);
 107     Reg = CurDAG->getRegister(ARM::CPSR, MVT::i32);
 108     return true;
 109   }
 110
 111   bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
 112                              SDValue &Offset, SDValue &Opc);
 113   bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
 114                              SDValue &Offset, SDValue &Opc);
 115   bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
 116                              SDValue &Offset, SDValue &Opc);
 117   bool SelectAddrOffsetNone(SDValue N, SDValue &Base);
 118   bool SelectAddrMode3(SDValue N, SDValue &Base,
 119                        SDValue &Offset, SDValue &Opc);
 120   bool SelectAddrMode3Offset(SDNode *Op, SDValue N,
 121                              SDValue &Offset, SDValue &Opc);
 122   bool IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset, bool FP16);
 123   bool SelectAddrMode5(SDValue N, SDValue &Base, SDValue &Offset);
 124   bool SelectAddrMode5FP16(SDValue N, SDValue &Base, SDValue &Offset);
 125   bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align);
 126   bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset);
 127
 128   bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label);
 129
 130   // Thumb Addressing Modes:
 131   bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset);
 132   bool SelectThumbAddrModeRRSext(SDValue N, SDValue &Base, SDValue &Offset);
 133   bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base,
 134                                 SDValue &OffImm);
 135   bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
 136                                  SDValue &OffImm);
 137   bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
 138                                  SDValue &OffImm);
 139   bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
 140                                  SDValue &OffImm);
 141   bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm);
 142
 143   // Thumb 2 Addressing Modes:
 144   bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
 145   bool SelectT2AddrModeImm8(SDValue N, SDValue &Base,
 146                             SDValue &OffImm);
 147   bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
 148                                  SDValue &OffImm);
 149   template <unsigned Shift>
 150   bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm);
 151   bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm,
 152                                   unsigned Shift);
 153   template <unsigned Shift>
 154   bool SelectT2AddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm);
 155   bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base,
 156                              SDValue &OffReg, SDValue &ShImm);
 157   bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm);
 158
 159   inline bool is_so_imm(unsigned Imm) const {
 160     return ARM_AM::getSOImmVal(Imm) != -1;
 161   }
 162
 163   inline bool is_so_imm_not(unsigned Imm) const {
 164     return ARM_AM::getSOImmVal(~Imm) != -1;
 165   }
 166
 167   inline bool is_t2_so_imm(unsigned Imm) const {
 168     return ARM_AM::getT2SOImmVal(Imm) != -1;
 169   }
 170
 171   inline bool is_t2_so_imm_not(unsigned Imm) const {
 172     return ARM_AM::getT2SOImmVal(~Imm) != -1;
 173   }
 174
 175   // Include the pieces autogenerated from the target description.
 176 #include "ARMGenDAGISel.inc"
 177
 178 private:
 179   void transferMemOperands(SDNode *Src, SDNode *Dst);
 180
 181   /// Indexed (pre/post inc/dec) load matching code for ARM.
 182   bool tryARMIndexedLoad(SDNode *N);
 183   bool tryT1IndexedLoad(SDNode *N);
 184   bool tryT2IndexedLoad(SDNode *N);
 185   bool tryMVEIndexedLoad(SDNode *N);
 186
 187   /// SelectVLD - Select NEON load intrinsics.  NumVecs should be
 188   /// 1, 2, 3 or 4.  The opcode arrays specify the instructions used for
 189   /// loads of D registers and even subregs and odd subregs of Q registers.
 190   /// For NumVecs <= 2, QOpcodes1 is not used.
 191   void SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
 192                  const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
 193                  const uint16_t *QOpcodes1);
 194
 195   /// SelectVST - Select NEON store intrinsics.  NumVecs should
 196   /// be 1, 2, 3 or 4.  The opcode arrays specify the instructions used for
 197   /// stores of D registers and even subregs and odd subregs of Q registers.
 198   /// For NumVecs <= 2, QOpcodes1 is not used.
 199   void SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
 200                  const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
 201                  const uint16_t *QOpcodes1);
 202
 203   /// SelectVLDSTLane - Select NEON load/store lane intrinsics.  NumVecs should
 204   /// be 2, 3 or 4.  The opcode arrays specify the instructions used for
 205   /// load/store of D registers and Q registers.
 206   void SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
 207                        unsigned NumVecs, const uint16_t *DOpcodes,
 208                        const uint16_t *QOpcodes);
 209
 210   /// SelectVLDDup - Select NEON load-duplicate intrinsics.  NumVecs
 211   /// should be 1, 2, 3 or 4.  The opcode array specifies the instructions used
 212   /// for loading D registers.
 213   void SelectVLDDup(SDNode *N, bool IsIntrinsic, bool isUpdating,
 214                     unsigned NumVecs, const uint16_t *DOpcodes,
 215                     const uint16_t *QOpcodes0 = nullptr,
 216                     const uint16_t *QOpcodes1 = nullptr);
 217
 218   /// Try to select SBFX/UBFX instructions for ARM.
 219   bool tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned);
 220
 221   // Select special operations if node forms integer ABS pattern
 222   bool tryABSOp(SDNode *N);
 223
 224   bool tryReadRegister(SDNode *N);
 225   bool tryWriteRegister(SDNode *N);
 226
 227   bool tryInlineAsm(SDNode *N);
 228
 229   void SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI);
 230
 231   void SelectCMP_SWAP(SDNode *N);
 232
 233   /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
 234   /// inline asm expressions.
 235   bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
 236                                     std::vector<SDValue> &OutOps) override;
 237
 238   // Form pairs of consecutive R, S, D, or Q registers.
 239   SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1);
 240   SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1);
 241   SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1);
 242   SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1);
 243
 244   // Form sequences of 4 consecutive S, D, or Q registers.
 245   SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
 246   SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
 247   SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
 248
 249   // Get the alignment operand for a NEON VLD or VST instruction.
 250   SDValue GetVLDSTAlign(SDValue Align, const SDLoc &dl, unsigned NumVecs,
 251                         bool is64BitVector);
 252
 253   /// Checks if N is a multiplication by a constant where we can extract out a
 254   /// power of two from the constant so that it can be used in a shift, but only
 255   /// if it simplifies the materialization of the constant. Returns true if it
 256   /// is, and assigns to PowerOfTwo the power of two that should be extracted
 257   /// out and to NewMulConst the new constant to be multiplied by.
 258   bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift,
 259                               unsigned &PowerOfTwo, SDValue &NewMulConst) const;
 260
 261   /// Replace N with M in CurDAG, in a way that also ensures that M gets
 262   /// selected when N would have been selected.
 263   void replaceDAGValue(const SDValue &N, SDValue M);
 264 };
 265 }
 266
 267 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant
 268 /// operand. If so Imm will receive the 32-bit value.
 269 static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
 270   if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
 271     Imm = cast<ConstantSDNode>(N)->getZExtValue();
 272     return true;
 273   }
 274   return false;
 275 }
 276
 277 // isInt32Immediate - This method tests to see if a constant operand.
 278 // If so Imm will receive the 32 bit value.
 279 static bool isInt32Immediate(SDValue N, unsigned &Imm) {
 280   return isInt32Immediate(N.getNode(), Imm);
 281 }
 282
 283 // isOpcWithIntImmediate - This method tests to see if the node is a specific
 284 // opcode and that it has a immediate integer right operand.
 285 // If so Imm will receive the 32 bit value.
 286 static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
 287   return N->getOpcode() == Opc &&
 288          isInt32Immediate(N->getOperand(1).getNode(), Imm);
 289 }
 290
 291 /// Check whether a particular node is a constant value representable as
 292 /// (N * Scale) where (N in [\p RangeMin, \p RangeMax).
 293 ///
 294 /// \param ScaledConstant [out] - On success, the pre-scaled constant value.
 295 static bool isScaledConstantInRange(SDValue Node, int Scale,
 296                                     int RangeMin, int RangeMax,
 297                                     int &ScaledConstant) {
 298   assert(Scale > 0 && "Invalid scale!");
 299
 300   // Check that this is a constant.
 301   const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node);
 302   if (!C)
 303     return false;
 304
 305   ScaledConstant = (int) C->getZExtValue();
 306   if ((ScaledConstant % Scale) != 0)
 307     return false;
 308
 309   ScaledConstant /= Scale;
 310   return ScaledConstant >= RangeMin && ScaledConstant < RangeMax;
 311 }
 312
 313 void ARMDAGToDAGISel::PreprocessISelDAG() {
 314   if (!Subtarget->hasV6T2Ops())
 315     return;
 316
 317   bool isThumb2 = Subtarget->isThumb();
 318   for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
 319        E = CurDAG->allnodes_end(); I != E; ) {
 320     SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues.
 321
 322     if (N->getOpcode() != ISD::ADD)
 323       continue;
 324
 325     // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with
 326     // leading zeros, followed by consecutive set bits, followed by 1 or 2
 327     // trailing zeros, e.g. 1020.
 328     // Transform the expression to
 329     // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number
 330     // of trailing zeros of c2. The left shift would be folded as an shifter
 331     // operand of 'add' and the 'and' and 'srl' would become a bits extraction
 332     // node (UBFX).
 333
 334     SDValue N0 = N->getOperand(0);
 335     SDValue N1 = N->getOperand(1);
 336     unsigned And_imm = 0;
 337     if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) {
 338       if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm))
 339         std::swap(N0, N1);
 340     }
 341     if (!And_imm)
 342       continue;
 343
 344     // Check if the AND mask is an immediate of the form: 000.....1111111100
 345     unsigned TZ = countTrailingZeros(And_imm);
 346     if (TZ != 1 && TZ != 2)
 347       // Be conservative here. Shifter operands aren't always free. e.g. On
 348       // Swift, left shifter operand of 1 / 2 for free but others are not.
 349       // e.g.
 350       //  ubfx   r3, r1, #16, #8
 351       //  ldr.w  r3, [r0, r3, lsl #2]
 352       // vs.
 353       //  mov.w  r9, #1020
 354       //  and.w  r2, r9, r1, lsr #14
 355       //  ldr    r2, [r0, r2]
 356       continue;
 357     And_imm >>= TZ;
 358     if (And_imm & (And_imm + 1))
 359       continue;
 360
 361     // Look for (and (srl X, c1), c2).
 362     SDValue Srl = N1.getOperand(0);
 363     unsigned Srl_imm = 0;
 364     if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) ||
 365         (Srl_imm <= 2))
 366       continue;
 367
 368     // Make sure first operand is not a shifter operand which would prevent
 369     // folding of the left shift.
 370     SDValue CPTmp0;
 371     SDValue CPTmp1;
 372     SDValue CPTmp2;
 373     if (isThumb2) {
 374       if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1))
 375         continue;
 376     } else {
 377       if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) ||
 378           SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2))
 379         continue;
 380     }
 381
 382     // Now make the transformation.
 383     Srl = CurDAG->getNode(ISD::SRL, SDLoc(Srl), MVT::i32,
 384                           Srl.getOperand(0),
 385                           CurDAG->getConstant(Srl_imm + TZ, SDLoc(Srl),
 386                                               MVT::i32));
 387     N1 = CurDAG->getNode(ISD::AND, SDLoc(N1), MVT::i32,
 388                          Srl,
 389                          CurDAG->getConstant(And_imm, SDLoc(Srl), MVT::i32));
 390     N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32,
 391                          N1, CurDAG->getConstant(TZ, SDLoc(Srl), MVT::i32));
 392     CurDAG->UpdateNodeOperands(N, N0, N1);
 393   }
 394 }
 395
 396 /// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS
 397 /// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at
 398 /// least on current ARM implementations) which should be avoidded.
 399 bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const {
 400   if (OptLevel == CodeGenOpt::None)
 401     return true;
 402
 403   if (!Subtarget->hasVMLxHazards())
 404     return true;
 405
 406   if (!N->hasOneUse())
 407     return false;
 408
 409   SDNode *Use = *N->use_begin();
 410   if (Use->getOpcode() == ISD::CopyToReg)
 411     return true;
 412   if (Use->isMachineOpcode()) {
 413     const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>(
 414         CurDAG->getSubtarget().getInstrInfo());
 415
 416     const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode());
 417     if (MCID.mayStore())
 418       return true;
 419     unsigned Opcode = MCID.getOpcode();
 420     if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
 421       return true;
 422     // vmlx feeding into another vmlx. We actually want to unfold
 423     // the use later in the MLxExpansion pass. e.g.
 424     // vmla
 425     // vmla (stall 8 cycles)
 426     //
 427     // vmul (5 cycles)
 428     // vadd (5 cycles)
 429     // vmla
 430     // This adds up to about 18 - 19 cycles.
 431     //
 432     // vmla
 433     // vmul (stall 4 cycles)
 434     // vadd adds up to about 14 cycles.
 435     return TII->isFpMLxInstruction(Opcode);
 436   }
 437
 438   return false;
 439 }
 440
 441 bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
 442                                             ARM_AM::ShiftOpc ShOpcVal,
 443                                             unsigned ShAmt) {
 444   if (!Subtarget->isLikeA9() && !Subtarget->isSwift())
 445     return true;
 446   if (Shift.hasOneUse())
 447     return true;
 448   // R << 2 is free.
 449   return ShOpcVal == ARM_AM::lsl &&
 450          (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1));
 451 }
 452
 453 bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N,
 454                                              unsigned MaxShift,
 455                                              unsigned &PowerOfTwo,
 456                                              SDValue &NewMulConst) const {
 457   assert(N.getOpcode() == ISD::MUL);
 458   assert(MaxShift > 0);
 459
 460   // If the multiply is used in more than one place then changing the constant
 461   // will make other uses incorrect, so don't.
 462   if (!N.hasOneUse()) return false;
 463   // Check if the multiply is by a constant
 464   ConstantSDNode *MulConst = dyn_cast<ConstantSDNode>(N.getOperand(1));
 465   if (!MulConst) return false;
 466   // If the constant is used in more than one place then modifying it will mean
 467   // we need to materialize two constants instead of one, which is a bad idea.
 468   if (!MulConst->hasOneUse()) return false;
 469   unsigned MulConstVal = MulConst->getZExtValue();
 470   if (MulConstVal == 0) return false;
 471
 472   // Find the largest power of 2 that MulConstVal is a multiple of
 473   PowerOfTwo = MaxShift;
 474   while ((MulConstVal % (1 << PowerOfTwo)) != 0) {
 475     --PowerOfTwo;
 476     if (PowerOfTwo == 0) return false;
 477   }
 478
 479   // Only optimise if the new cost is better
 480   unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo);
 481   NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc(N), MVT::i32);
 482   unsigned OldCost = ConstantMaterializationCost(MulConstVal, Subtarget);
 483   unsigned NewCost = ConstantMaterializationCost(NewMulConstVal, Subtarget);
 484   return NewCost < OldCost;
 485 }
 486
 487 void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) {
 488   CurDAG->RepositionNode(N.getNode()->getIterator(), M.getNode());
 489   ReplaceUses(N, M);
 490 }
 491
 492 bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
 493                                               SDValue &BaseReg,
 494                                               SDValue &Opc,
 495                                               bool CheckProfitability) {
 496   if (DisableShifterOp)
 497     return false;
 498
 499   // If N is a multiply-by-constant and it's profitable to extract a shift and
 500   // use it in a shifted operand do so.
 501   if (N.getOpcode() == ISD::MUL) {
 502     unsigned PowerOfTwo = 0;
 503     SDValue NewMulConst;
 504     if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) {
 505       HandleSDNode Handle(N);
 506       SDLoc Loc(N);
 507       replaceDAGValue(N.getOperand(1), NewMulConst);
 508       BaseReg = Handle.getValue();
 509       Opc = CurDAG->getTargetConstant(
 510           ARM_AM::getSORegOpc(ARM_AM::lsl, PowerOfTwo), Loc, MVT::i32);
 511       return true;
 512     }
 513   }
 514
 515   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
 516
 517   // Don't match base register only case. That is matched to a separate
 518   // lower complexity pattern with explicit register operand.
 519   if (ShOpcVal == ARM_AM::no_shift) return false;
 520
 521   BaseReg = N.getOperand(0);
 522   unsigned ShImmVal = 0;
 523   ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
 524   if (!RHS) return false;
 525   ShImmVal = RHS->getZExtValue() & 31;
 526   Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
 527                                   SDLoc(N), MVT::i32);
 528   return true;
 529 }
 530
 531 bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N,
 532                                               SDValue &BaseReg,
 533                                               SDValue &ShReg,
 534                                               SDValue &Opc,
 535                                               bool CheckProfitability) {
 536   if (DisableShifterOp)
 537     return false;
 538
 539   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
 540
 541   // Don't match base register only case. That is matched to a separate
 542   // lower complexity pattern with explicit register operand.
 543   if (ShOpcVal == ARM_AM::no_shift) return false;
 544
 545   BaseReg = N.getOperand(0);
 546   unsigned ShImmVal = 0;
 547   ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
 548   if (RHS) return false;
 549
 550   ShReg = N.getOperand(1);
 551   if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal))
 552     return false;
 553   Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
 554                                   SDLoc(N), MVT::i32);
 555   return true;
 556 }
 557
 558 // Determine whether an ISD::OR's operands are suitable to turn the operation
 559 // into an addition, which often has more compact encodings.
 560 bool ARMDAGToDAGISel::SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out) {
 561   assert(Parent->getOpcode() == ISD::OR && "unexpected parent");
 562   Out = N;
 563   return CurDAG->haveNoCommonBitsSet(N, Parent->getOperand(1));
 564 }
 565
 566
 567 bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
 568                                           SDValue &Base,
 569                                           SDValue &OffImm) {
 570   // Match simple R + imm12 operands.
 571
 572   // Base only.
 573   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
 574       !CurDAG->isBaseWithConstantOffset(N)) {
 575     if (N.getOpcode() == ISD::FrameIndex) {
 576       // Match frame index.
 577       int FI = cast<FrameIndexSDNode>(N)->getIndex();
 578       Base = CurDAG->getTargetFrameIndex(
 579           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 580       OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
 581       return true;
 582     }
 583
 584     if (N.getOpcode() == ARMISD::Wrapper &&
 585         N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
 586         N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
 587         N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
 588       Base = N.getOperand(0);
 589     } else
 590       Base = N;
 591     OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
 592     return true;
 593   }
 594
 595   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
 596     int RHSC = (int)RHS->getSExtValue();
 597     if (N.getOpcode() == ISD::SUB)
 598       RHSC = -RHSC;
 599
 600     if (RHSC > -0x1000 && RHSC < 0x1000) { // 12 bits
 601       Base   = N.getOperand(0);
 602       if (Base.getOpcode() == ISD::FrameIndex) {
 603         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
 604         Base = CurDAG->getTargetFrameIndex(
 605             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 606       }
 607       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
 608       return true;
 609     }
 610   }
 611
 612   // Base only.
 613   Base = N;
 614   OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
 615   return true;
 616 }
 617
 618
 619
 620 bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
 621                                       SDValue &Opc) {
 622   if (N.getOpcode() == ISD::MUL &&
 623       ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) {
 624     if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
 625       // X * [3,5,9] -> X + X * [2,4,8] etc.
 626       int RHSC = (int)RHS->getZExtValue();
 627       if (RHSC & 1) {
 628         RHSC = RHSC & ~1;
 629         ARM_AM::AddrOpc AddSub = ARM_AM::add;
 630         if (RHSC < 0) {
 631           AddSub = ARM_AM::sub;
 632           RHSC = - RHSC;
 633         }
 634         if (isPowerOf2_32(RHSC)) {
 635           unsigned ShAmt = Log2_32(RHSC);
 636           Base = Offset = N.getOperand(0);
 637           Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
 638                                                             ARM_AM::lsl),
 639                                           SDLoc(N), MVT::i32);
 640           return true;
 641         }
 642       }
 643     }
 644   }
 645
 646   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
 647       // ISD::OR that is equivalent to an ISD::ADD.
 648       !CurDAG->isBaseWithConstantOffset(N))
 649     return false;
 650
 651   // Leave simple R +/- imm12 operands for LDRi12
 652   if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
 653     int RHSC;
 654     if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
 655                                 -0x1000+1, 0x1000, RHSC)) // 12 bits.
 656       return false;
 657   }
 658
 659   // Otherwise this is R +/- [possibly shifted] R.
 660   ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add;
 661   ARM_AM::ShiftOpc ShOpcVal =
 662     ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
 663   unsigned ShAmt = 0;
 664
 665   Base   = N.getOperand(0);
 666   Offset = N.getOperand(1);
 667
 668   if (ShOpcVal != ARM_AM::no_shift) {
 669     // Check to see if the RHS of the shift is a constant, if not, we can't fold
 670     // it.
 671     if (ConstantSDNode *Sh =
 672            dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
 673       ShAmt = Sh->getZExtValue();
 674       if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
 675         Offset = N.getOperand(1).getOperand(0);
 676       else {
 677         ShAmt = 0;
 678         ShOpcVal = ARM_AM::no_shift;
 679       }
 680     } else {
 681       ShOpcVal = ARM_AM::no_shift;
 682     }
 683   }
 684
 685   // Try matching (R shl C) + (R).
 686   if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
 687       !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
 688         N.getOperand(0).hasOneUse())) {
 689     ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
 690     if (ShOpcVal != ARM_AM::no_shift) {
 691       // Check to see if the RHS of the shift is a constant, if not, we can't
 692       // fold it.
 693       if (ConstantSDNode *Sh =
 694           dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
 695         ShAmt = Sh->getZExtValue();
 696         if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
 697           Offset = N.getOperand(0).getOperand(0);
 698           Base = N.getOperand(1);
 699         } else {
 700           ShAmt = 0;
 701           ShOpcVal = ARM_AM::no_shift;
 702         }
 703       } else {
 704         ShOpcVal = ARM_AM::no_shift;
 705       }
 706     }
 707   }
 708
 709   // If Offset is a multiply-by-constant and it's profitable to extract a shift
 710   // and use it in a shifted operand do so.
 711   if (Offset.getOpcode() == ISD::MUL && N.hasOneUse()) {
 712     unsigned PowerOfTwo = 0;
 713     SDValue NewMulConst;
 714     if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) {
 715       HandleSDNode Handle(Offset);
 716       replaceDAGValue(Offset.getOperand(1), NewMulConst);
 717       Offset = Handle.getValue();
 718       ShAmt = PowerOfTwo;
 719       ShOpcVal = ARM_AM::lsl;
 720     }
 721   }
 722
 723   Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
 724                                   SDLoc(N), MVT::i32);
 725   return true;
 726 }
 727
 728 bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
 729                                             SDValue &Offset, SDValue &Opc) {
 730   unsigned Opcode = Op->getOpcode();
 731   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
 732     ? cast<LoadSDNode>(Op)->getAddressingMode()
 733     : cast<StoreSDNode>(Op)->getAddressingMode();
 734   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
 735     ? ARM_AM::add : ARM_AM::sub;
 736   int Val;
 737   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val))
 738     return false;
 739
 740   Offset = N;
 741   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
 742   unsigned ShAmt = 0;
 743   if (ShOpcVal != ARM_AM::no_shift) {
 744     // Check to see if the RHS of the shift is a constant, if not, we can't fold
 745     // it.
 746     if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
 747       ShAmt = Sh->getZExtValue();
 748       if (isShifterOpProfitable(N, ShOpcVal, ShAmt))
 749         Offset = N.getOperand(0);
 750       else {
 751         ShAmt = 0;
 752         ShOpcVal = ARM_AM::no_shift;
 753       }
 754     } else {
 755       ShOpcVal = ARM_AM::no_shift;
 756     }
 757   }
 758
 759   Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
 760                                   SDLoc(N), MVT::i32);
 761   return true;
 762 }
 763
 764 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
 765                                             SDValue &Offset, SDValue &Opc) {
 766   unsigned Opcode = Op->getOpcode();
 767   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
 768     ? cast<LoadSDNode>(Op)->getAddressingMode()
 769     : cast<StoreSDNode>(Op)->getAddressingMode();
 770   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
 771     ? ARM_AM::add : ARM_AM::sub;
 772   int Val;
 773   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
 774     if (AddSub == ARM_AM::sub) Val *= -1;
 775     Offset = CurDAG->getRegister(0, MVT::i32);
 776     Opc = CurDAG->getTargetConstant(Val, SDLoc(Op), MVT::i32);
 777     return true;
 778   }
 779
 780   return false;
 781 }
 782
 783
 784 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
 785                                             SDValue &Offset, SDValue &Opc) {
 786   unsigned Opcode = Op->getOpcode();
 787   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
 788     ? cast<LoadSDNode>(Op)->getAddressingMode()
 789     : cast<StoreSDNode>(Op)->getAddressingMode();
 790   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
 791     ? ARM_AM::add : ARM_AM::sub;
 792   int Val;
 793   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
 794     Offset = CurDAG->getRegister(0, MVT::i32);
 795     Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val,
 796                                                       ARM_AM::no_shift),
 797                                     SDLoc(Op), MVT::i32);
 798     return true;
 799   }
 800
 801   return false;
 802 }
 803
 804 bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) {
 805   Base = N;
 806   return true;
 807 }
 808
 809 bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
 810                                       SDValue &Base, SDValue &Offset,
 811                                       SDValue &Opc) {
 812   if (N.getOpcode() == ISD::SUB) {
 813     // X - C  is canonicalize to X + -C, no need to handle it here.
 814     Base = N.getOperand(0);
 815     Offset = N.getOperand(1);
 816     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0), SDLoc(N),
 817                                     MVT::i32);
 818     return true;
 819   }
 820
 821   if (!CurDAG->isBaseWithConstantOffset(N)) {
 822     Base = N;
 823     if (N.getOpcode() == ISD::FrameIndex) {
 824       int FI = cast<FrameIndexSDNode>(N)->getIndex();
 825       Base = CurDAG->getTargetFrameIndex(
 826           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 827     }
 828     Offset = CurDAG->getRegister(0, MVT::i32);
 829     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
 830                                     MVT::i32);
 831     return true;
 832   }
 833
 834   // If the RHS is +/- imm8, fold into addr mode.
 835   int RHSC;
 836   if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
 837                               -256 + 1, 256, RHSC)) { // 8 bits.
 838     Base = N.getOperand(0);
 839     if (Base.getOpcode() == ISD::FrameIndex) {
 840       int FI = cast<FrameIndexSDNode>(Base)->getIndex();
 841       Base = CurDAG->getTargetFrameIndex(
 842           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 843     }
 844     Offset = CurDAG->getRegister(0, MVT::i32);
 845
 846     ARM_AM::AddrOpc AddSub = ARM_AM::add;
 847     if (RHSC < 0) {
 848       AddSub = ARM_AM::sub;
 849       RHSC = -RHSC;
 850     }
 851     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC), SDLoc(N),
 852                                     MVT::i32);
 853     return true;
 854   }
 855
 856   Base = N.getOperand(0);
 857   Offset = N.getOperand(1);
 858   Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
 859                                   MVT::i32);
 860   return true;
 861 }
 862
 863 bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N,
 864                                             SDValue &Offset, SDValue &Opc) {
 865   unsigned Opcode = Op->getOpcode();
 866   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
 867     ? cast<LoadSDNode>(Op)->getAddressingMode()
 868     : cast<StoreSDNode>(Op)->getAddressingMode();
 869   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
 870     ? ARM_AM::add : ARM_AM::sub;
 871   int Val;
 872   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits.
 873     Offset = CurDAG->getRegister(0, MVT::i32);
 874     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), SDLoc(Op),
 875                                     MVT::i32);
 876     return true;
 877   }
 878
 879   Offset = N;
 880   Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), SDLoc(Op),
 881                                   MVT::i32);
 882   return true;
 883 }
 884
 885 bool ARMDAGToDAGISel::IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset,
 886                                         bool FP16) {
 887   if (!CurDAG->isBaseWithConstantOffset(N)) {
 888     Base = N;
 889     if (N.getOpcode() == ISD::FrameIndex) {
 890       int FI = cast<FrameIndexSDNode>(N)->getIndex();
 891       Base = CurDAG->getTargetFrameIndex(
 892           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 893     } else if (N.getOpcode() == ARMISD::Wrapper &&
 894                N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
 895                N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
 896                N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
 897       Base = N.getOperand(0);
 898     }
 899     Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
 900                                        SDLoc(N), MVT::i32);
 901     return true;
 902   }
 903
 904   // If the RHS is +/- imm8, fold into addr mode.
 905   int RHSC;
 906   const int Scale = FP16 ? 2 : 4;
 907
 908   if (isScaledConstantInRange(N.getOperand(1), Scale, -255, 256, RHSC)) {
 909     Base = N.getOperand(0);
 910     if (Base.getOpcode() == ISD::FrameIndex) {
 911       int FI = cast<FrameIndexSDNode>(Base)->getIndex();
 912       Base = CurDAG->getTargetFrameIndex(
 913           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 914     }
 915
 916     ARM_AM::AddrOpc AddSub = ARM_AM::add;
 917     if (RHSC < 0) {
 918       AddSub = ARM_AM::sub;
 919       RHSC = -RHSC;
 920     }
 921
 922     if (FP16)
 923       Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(AddSub, RHSC),
 924                                          SDLoc(N), MVT::i32);
 925     else
 926       Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC),
 927                                          SDLoc(N), MVT::i32);
 928
 929     return true;
 930   }
 931
 932   Base = N;
 933
 934   if (FP16)
 935     Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(ARM_AM::add, 0),
 936                                        SDLoc(N), MVT::i32);
 937   else
 938     Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
 939                                        SDLoc(N), MVT::i32);
 940
 941   return true;
 942 }
 943
 944 bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
 945                                       SDValue &Base, SDValue &Offset) {
 946   return IsAddressingMode5(N, Base, Offset, /*FP16=*/ false);
 947 }
 948
 949 bool ARMDAGToDAGISel::SelectAddrMode5FP16(SDValue N,
 950                                           SDValue &Base, SDValue &Offset) {
 951   return IsAddressingMode5(N, Base, Offset, /*FP16=*/ true);
 952 }
 953
 954 bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
 955                                       SDValue &Align) {
 956   Addr = N;
 957
 958   unsigned Alignment = 0;
 959
 960   MemSDNode *MemN = cast<MemSDNode>(Parent);
 961
 962   if (isa<LSBaseSDNode>(MemN) ||
 963       ((MemN->getOpcode() == ARMISD::VST1_UPD ||
 964         MemN->getOpcode() == ARMISD::VLD1_UPD) &&
 965        MemN->getConstantOperandVal(MemN->getNumOperands() - 1) == 1)) {
 966     // This case occurs only for VLD1-lane/dup and VST1-lane instructions.
 967     // The maximum alignment is equal to the memory size being referenced.
 968     unsigned MMOAlign = MemN->getAlignment();
 969     unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8;
 970     if (MMOAlign >= MemSize && MemSize > 1)
 971       Alignment = MemSize;
 972   } else {
 973     // All other uses of addrmode6 are for intrinsics.  For now just record
 974     // the raw alignment value; it will be refined later based on the legal
 975     // alignment operands for the intrinsic.
 976     Alignment = MemN->getAlignment();
 977   }
 978
 979   Align = CurDAG->getTargetConstant(Alignment, SDLoc(N), MVT::i32);
 980   return true;
 981 }
 982
 983 bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N,
 984                                             SDValue &Offset) {
 985   LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op);
 986   ISD::MemIndexedMode AM = LdSt->getAddressingMode();
 987   if (AM != ISD::POST_INC)
 988     return false;
 989   Offset = N;
 990   if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) {
 991     if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits())
 992       Offset = CurDAG->getRegister(0, MVT::i32);
 993   }
 994   return true;
 995 }
 996
 997 bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N,
 998                                        SDValue &Offset, SDValue &Label) {
 999   if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
1000     Offset = N.getOperand(0);
1001     SDValue N1 = N.getOperand(1);
1002     Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
1003                                       SDLoc(N), MVT::i32);
1004     return true;
1005   }
1006
1007   return false;
1008 }
1009
1010
1011 //===----------------------------------------------------------------------===//
1012 //                         Thumb Addressing Modes
1013 //===----------------------------------------------------------------------===//
1014
1015 static bool shouldUseZeroOffsetLdSt(SDValue N) {
1016   // Negative numbers are difficult to materialise in thumb1. If we are
1017   // selecting the add of a negative, instead try to select ri with a zero
1018   // offset, so create the add node directly which will become a sub.
1019   if (N.getOpcode() != ISD::ADD)
1020     return false;
1021
1022   // Look for an imm which is not legal for ld/st, but is legal for sub.
1023   if (auto C = dyn_cast<ConstantSDNode>(N.getOperand(1)))
1024     return C->getSExtValue() < 0 && C->getSExtValue() >= -255;
1025
1026   return false;
1027 }
1028
1029 bool ARMDAGToDAGISel::SelectThumbAddrModeRRSext(SDValue N, SDValue &Base,
1030                                                 SDValue &Offset) {
1031   if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) {
1032     ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N);
1033     if (!NC || !NC->isNullValue())
1034       return false;
1035
1036     Base = Offset = N;
1037     return true;
1038   }
1039
1040   Base = N.getOperand(0);
1041   Offset = N.getOperand(1);
1042   return true;
1043 }
1044
1045 bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N, SDValue &Base,
1046                                             SDValue &Offset) {
1047   if (shouldUseZeroOffsetLdSt(N))
1048     return false; // Select ri instead
1049   return SelectThumbAddrModeRRSext(N, Base, Offset);
1050 }
1051
1052 bool
1053 ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
1054                                           SDValue &Base, SDValue &OffImm) {
1055   if (shouldUseZeroOffsetLdSt(N)) {
1056     Base = N;
1057     OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1058     return true;
1059   }
1060
1061   if (!CurDAG->isBaseWithConstantOffset(N)) {
1062     if (N.getOpcode() == ISD::ADD) {
1063       return false; // We want to select register offset instead
1064     } else if (N.getOpcode() == ARMISD::Wrapper &&
1065         N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1066         N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1067         N.getOperand(0).getOpcode() != ISD::TargetConstantPool &&
1068         N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1069       Base = N.getOperand(0);
1070     } else {
1071       Base = N;
1072     }
1073
1074     OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1075     return true;
1076   }
1077
1078   // If the RHS is + imm5 * scale, fold into addr mode.
1079   int RHSC;
1080   if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) {
1081     Base = N.getOperand(0);
1082     OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1083     return true;
1084   }
1085
1086   // Offset is too large, so use register offset instead.
1087   return false;
1088 }
1089
1090 bool
1091 ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
1092                                            SDValue &OffImm) {
1093   return SelectThumbAddrModeImm5S(N, 4, Base, OffImm);
1094 }
1095
1096 bool
1097 ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
1098                                            SDValue &OffImm) {
1099   return SelectThumbAddrModeImm5S(N, 2, Base, OffImm);
1100 }
1101
1102 bool
1103 ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
1104                                            SDValue &OffImm) {
1105   return SelectThumbAddrModeImm5S(N, 1, Base, OffImm);
1106 }
1107
1108 bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
1109                                             SDValue &Base, SDValue &OffImm) {
1110   if (N.getOpcode() == ISD::FrameIndex) {
1111     int FI = cast<FrameIndexSDNode>(N)->getIndex();
1112     // Only multiples of 4 are allowed for the offset, so the frame object
1113     // alignment must be at least 4.
1114     MachineFrameInfo &MFI = MF->getFrameInfo();
1115     if (MFI.getObjectAlignment(FI) < 4)
1116       MFI.setObjectAlignment(FI, 4);
1117     Base = CurDAG->getTargetFrameIndex(
1118         FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1119     OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1120     return true;
1121   }
1122
1123   if (!CurDAG->isBaseWithConstantOffset(N))
1124     return false;
1125
1126   if (N.getOperand(0).getOpcode() == ISD::FrameIndex) {
1127     // If the RHS is + imm8 * scale, fold into addr mode.
1128     int RHSC;
1129     if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) {
1130       Base = N.getOperand(0);
1131       int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1132       // Make sure the offset is inside the object, or we might fail to
1133       // allocate an emergency spill slot. (An out-of-range access is UB, but
1134       // it could show up anyway.)
1135       MachineFrameInfo &MFI = MF->getFrameInfo();
1136       if (RHSC * 4 < MFI.getObjectSize(FI)) {
1137         // For LHS+RHS to result in an offset that's a multiple of 4 the object
1138         // indexed by the LHS must be 4-byte aligned.
1139         if (!MFI.isFixedObjectIndex(FI) && MFI.getObjectAlignment(FI) < 4)
1140           MFI.setObjectAlignment(FI, 4);
1141         if (MFI.getObjectAlignment(FI) >= 4) {
1142           Base = CurDAG->getTargetFrameIndex(
1143               FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1144           OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1145           return true;
1146         }
1147       }
1148     }
1149   }
1150
1151   return false;
1152 }
1153
1154
1155 //===----------------------------------------------------------------------===//
1156 //                        Thumb 2 Addressing Modes
1157 //===----------------------------------------------------------------------===//
1158
1159
1160 bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
1161                                             SDValue &Base, SDValue &OffImm) {
1162   // Match simple R + imm12 operands.
1163
1164   // Base only.
1165   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1166       !CurDAG->isBaseWithConstantOffset(N)) {
1167     if (N.getOpcode() == ISD::FrameIndex) {
1168       // Match frame index.
1169       int FI = cast<FrameIndexSDNode>(N)->getIndex();
1170       Base = CurDAG->getTargetFrameIndex(
1171           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1172       OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1173       return true;
1174     }
1175
1176     if (N.getOpcode() == ARMISD::Wrapper &&
1177         N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1178         N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1179         N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1180       Base = N.getOperand(0);
1181       if (Base.getOpcode() == ISD::TargetConstantPool)
1182         return false;  // We want to select t2LDRpci instead.
1183     } else
1184       Base = N;
1185     OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1186     return true;
1187   }
1188
1189   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1190     if (SelectT2AddrModeImm8(N, Base, OffImm))
1191       // Let t2LDRi8 handle (R - imm8).
1192       return false;
1193
1194     int RHSC = (int)RHS->getZExtValue();
1195     if (N.getOpcode() == ISD::SUB)
1196       RHSC = -RHSC;
1197
1198     if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned)
1199       Base   = N.getOperand(0);
1200       if (Base.getOpcode() == ISD::FrameIndex) {
1201         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1202         Base = CurDAG->getTargetFrameIndex(
1203             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1204       }
1205       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1206       return true;
1207     }
1208   }
1209
1210   // Base only.
1211   Base = N;
1212   OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1213   return true;
1214 }
1215
1216 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
1217                                            SDValue &Base, SDValue &OffImm) {
1218   // Match simple R - imm8 operands.
1219   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1220       !CurDAG->isBaseWithConstantOffset(N))
1221     return false;
1222
1223   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1224     int RHSC = (int)RHS->getSExtValue();
1225     if (N.getOpcode() == ISD::SUB)
1226       RHSC = -RHSC;
1227
1228     if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative)
1229       Base = N.getOperand(0);
1230       if (Base.getOpcode() == ISD::FrameIndex) {
1231         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1232         Base = CurDAG->getTargetFrameIndex(
1233             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1234       }
1235       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1236       return true;
1237     }
1238   }
1239
1240   return false;
1241 }
1242
1243 bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
1244                                                  SDValue &OffImm){
1245   unsigned Opcode = Op->getOpcode();
1246   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
1247     ? cast<LoadSDNode>(Op)->getAddressingMode()
1248     : cast<StoreSDNode>(Op)->getAddressingMode();
1249   int RHSC;
1250   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits.
1251     OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
1252       ? CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32)
1253       : CurDAG->getTargetConstant(-RHSC, SDLoc(N), MVT::i32);
1254     return true;
1255   }
1256
1257   return false;
1258 }
1259
1260 template <unsigned Shift>
1261 bool ARMDAGToDAGISel::SelectT2AddrModeImm7(SDValue N, SDValue &Base,
1262                                            SDValue &OffImm) {
1263   if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) {
1264     int RHSC;
1265     if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -0x7f, 0x80,
1266                                 RHSC)) {
1267       Base = N.getOperand(0);
1268       if (Base.getOpcode() == ISD::FrameIndex) {
1269         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1270         Base = CurDAG->getTargetFrameIndex(
1271             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1272       }
1273
1274       if (N.getOpcode() == ISD::SUB)
1275         RHSC = -RHSC;
1276       OffImm =
1277           CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32);
1278       return true;
1279     }
1280   }
1281
1282   // Base only.
1283   Base = N;
1284   OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1285   return true;
1286 }
1287
1288 template <unsigned Shift>
1289 bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N,
1290                                                  SDValue &OffImm) {
1291   return SelectT2AddrModeImm7Offset(Op, N, OffImm, Shift);
1292 }
1293
1294 bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N,
1295                                                  SDValue &OffImm,
1296                                                  unsigned Shift) {
1297   unsigned Opcode = Op->getOpcode();
1298   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
1299                                ? cast<LoadSDNode>(Op)->getAddressingMode()
1300                                : cast<StoreSDNode>(Op)->getAddressingMode();
1301   int RHSC;
1302   if (isScaledConstantInRange(N, 1 << Shift, 0, 0x80, RHSC)) { // 7 bits.
1303     OffImm =
1304         ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
1305             ? CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32)
1306             : CurDAG->getTargetConstant(-RHSC * (1 << Shift), SDLoc(N),
1307                                         MVT::i32);
1308     return true;
1309   }
1310   return false;
1311 }
1312
1313 bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
1314                                             SDValue &Base,
1315                                             SDValue &OffReg, SDValue &ShImm) {
1316   // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12.
1317   if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N))
1318     return false;
1319
1320   // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8.
1321   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1322     int RHSC = (int)RHS->getZExtValue();
1323     if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned)
1324       return false;
1325     else if (RHSC < 0 && RHSC >= -255) // 8 bits
1326       return false;
1327   }
1328
1329   // Look for (R + R) or (R + (R << [1,2,3])).
1330   unsigned ShAmt = 0;
1331   Base   = N.getOperand(0);
1332   OffReg = N.getOperand(1);
1333
1334   // Swap if it is ((R << c) + R).
1335   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg.getOpcode());
1336   if (ShOpcVal != ARM_AM::lsl) {
1337     ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode());
1338     if (ShOpcVal == ARM_AM::lsl)
1339       std::swap(Base, OffReg);
1340   }
1341
1342   if (ShOpcVal == ARM_AM::lsl) {
1343     // Check to see if the RHS of the shift is a constant, if not, we can't fold
1344     // it.
1345     if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) {
1346       ShAmt = Sh->getZExtValue();
1347       if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt))
1348         OffReg = OffReg.getOperand(0);
1349       else {
1350         ShAmt = 0;
1351       }
1352     }
1353   }
1354
1355   // If OffReg is a multiply-by-constant and it's profitable to extract a shift
1356   // and use it in a shifted operand do so.
1357   if (OffReg.getOpcode() == ISD::MUL && N.hasOneUse()) {
1358     unsigned PowerOfTwo = 0;
1359     SDValue NewMulConst;
1360     if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) {
1361       HandleSDNode Handle(OffReg);
1362       replaceDAGValue(OffReg.getOperand(1), NewMulConst);
1363       OffReg = Handle.getValue();
1364       ShAmt = PowerOfTwo;
1365     }
1366   }
1367
1368   ShImm = CurDAG->getTargetConstant(ShAmt, SDLoc(N), MVT::i32);
1369
1370   return true;
1371 }
1372
1373 bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base,
1374                                                 SDValue &OffImm) {
1375   // This *must* succeed since it's used for the irreplaceable ldrex and strex
1376   // instructions.
1377   Base = N;
1378   OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1379
1380   if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N))
1381     return true;
1382
1383   ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
1384   if (!RHS)
1385     return true;
1386
1387   uint32_t RHSC = (int)RHS->getZExtValue();
1388   if (RHSC > 1020 || RHSC % 4 != 0)
1389     return true;
1390
1391   Base = N.getOperand(0);
1392   if (Base.getOpcode() == ISD::FrameIndex) {
1393     int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1394     Base = CurDAG->getTargetFrameIndex(
1395         FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1396   }
1397
1398   OffImm = CurDAG->getTargetConstant(RHSC/4, SDLoc(N), MVT::i32);
1399   return true;
1400 }
1401
1402 //===--------------------------------------------------------------------===//
1403
1404 /// getAL - Returns a ARMCC::AL immediate node.
1405 static inline SDValue getAL(SelectionDAG *CurDAG, const SDLoc &dl) {
1406   return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32);
1407 }
1408
1409 void ARMDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
1410   MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
1411   CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
1412 }
1413
1414 bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) {
1415   LoadSDNode *LD = cast<LoadSDNode>(N);
1416   ISD::MemIndexedMode AM = LD->getAddressingMode();
1417   if (AM == ISD::UNINDEXED)
1418     return false;
1419
1420   EVT LoadedVT = LD->getMemoryVT();
1421   SDValue Offset, AMOpc;
1422   bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1423   unsigned Opcode = 0;
1424   bool Match = false;
1425   if (LoadedVT == MVT::i32 && isPre &&
1426       SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1427     Opcode = ARM::LDR_PRE_IMM;
1428     Match = true;
1429   } else if (LoadedVT == MVT::i32 && !isPre &&
1430       SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1431     Opcode = ARM::LDR_POST_IMM;
1432     Match = true;
1433   } else if (LoadedVT == MVT::i32 &&
1434       SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1435     Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG;
1436     Match = true;
1437
1438   } else if (LoadedVT == MVT::i16 &&
1439              SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1440     Match = true;
1441     Opcode = (LD->getExtensionType() == ISD::SEXTLOAD)
1442       ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST)
1443       : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST);
1444   } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) {
1445     if (LD->getExtensionType() == ISD::SEXTLOAD) {
1446       if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1447         Match = true;
1448         Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST;
1449       }
1450     } else {
1451       if (isPre &&
1452           SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1453         Match = true;
1454         Opcode = ARM::LDRB_PRE_IMM;
1455       } else if (!isPre &&
1456                   SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1457         Match = true;
1458         Opcode = ARM::LDRB_POST_IMM;
1459       } else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1460         Match = true;
1461         Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG;
1462       }
1463     }
1464   }
1465
1466   if (Match) {
1467     if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) {
1468       SDValue Chain = LD->getChain();
1469       SDValue Base = LD->getBasePtr();
1470       SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, SDLoc(N)),
1471                        CurDAG->getRegister(0, MVT::i32), Chain };
1472       SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1473                                            MVT::Other, Ops);
1474       transferMemOperands(N, New);
1475       ReplaceNode(N, New);
1476       return true;
1477     } else {
1478       SDValue Chain = LD->getChain();
1479       SDValue Base = LD->getBasePtr();
1480       SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, SDLoc(N)),
1481                        CurDAG->getRegister(0, MVT::i32), Chain };
1482       SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1483                                            MVT::Other, Ops);
1484       transferMemOperands(N, New);
1485       ReplaceNode(N, New);
1486       return true;
1487     }
1488   }
1489
1490   return false;
1491 }
1492
1493 bool ARMDAGToDAGISel::tryT1IndexedLoad(SDNode *N) {
1494   LoadSDNode *LD = cast<LoadSDNode>(N);
1495   EVT LoadedVT = LD->getMemoryVT();
1496   ISD::MemIndexedMode AM = LD->getAddressingMode();
1497   if (AM != ISD::POST_INC || LD->getExtensionType() != ISD::NON_EXTLOAD ||
1498       LoadedVT.getSimpleVT().SimpleTy != MVT::i32)
1499     return false;
1500
1501   auto *COffs = dyn_cast<ConstantSDNode>(LD->getOffset());
1502   if (!COffs || COffs->getZExtValue() != 4)
1503     return false;
1504
1505   // A T1 post-indexed load is just a single register LDM: LDM r0!, {r1}.
1506   // The encoding of LDM is not how the rest of ISel expects a post-inc load to
1507   // look however, so we use a pseudo here and switch it for a tLDMIA_UPD after
1508   // ISel.
1509   SDValue Chain = LD->getChain();
1510   SDValue Base = LD->getBasePtr();
1511   SDValue Ops[]= { Base, getAL(CurDAG, SDLoc(N)),
1512                    CurDAG->getRegister(0, MVT::i32), Chain };
1513   SDNode *New = CurDAG->getMachineNode(ARM::tLDR_postidx, SDLoc(N), MVT::i32,
1514                                        MVT::i32, MVT::Other, Ops);
1515   transferMemOperands(N, New);
1516   ReplaceNode(N, New);
1517   return true;
1518 }
1519
1520 bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) {
1521   LoadSDNode *LD = cast<LoadSDNode>(N);
1522   ISD::MemIndexedMode AM = LD->getAddressingMode();
1523   if (AM == ISD::UNINDEXED)
1524     return false;
1525
1526   EVT LoadedVT = LD->getMemoryVT();
1527   bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1528   SDValue Offset;
1529   bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1530   unsigned Opcode = 0;
1531   bool Match = false;
1532   if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) {
1533     switch (LoadedVT.getSimpleVT().SimpleTy) {
1534     case MVT::i32:
1535       Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST;
1536       break;
1537     case MVT::i16:
1538       if (isSExtLd)
1539         Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST;
1540       else
1541         Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST;
1542       break;
1543     case MVT::i8:
1544     case MVT::i1:
1545       if (isSExtLd)
1546         Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST;
1547       else
1548         Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST;
1549       break;
1550     default:
1551       return false;
1552     }
1553     Match = true;
1554   }
1555
1556   if (Match) {
1557     SDValue Chain = LD->getChain();
1558     SDValue Base = LD->getBasePtr();
1559     SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)),
1560                      CurDAG->getRegister(0, MVT::i32), Chain };
1561     SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1562                                          MVT::Other, Ops);
1563     transferMemOperands(N, New);
1564     ReplaceNode(N, New);
1565     return true;
1566   }
1567
1568   return false;
1569 }
1570
1571 bool ARMDAGToDAGISel::tryMVEIndexedLoad(SDNode *N) {
1572   LoadSDNode *LD = cast<LoadSDNode>(N);
1573   ISD::MemIndexedMode AM = LD->getAddressingMode();
1574   if (AM == ISD::UNINDEXED)
1575     return false;
1576   EVT LoadedVT = LD->getMemoryVT();
1577   if (!LoadedVT.isVector())
1578     return false;
1579   bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1580   SDValue Offset;
1581   bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1582   unsigned Opcode = 0;
1583   unsigned Align = LD->getAlignment();
1584   bool IsLE = Subtarget->isLittle();
1585
1586   if (Align >= 2 && LoadedVT == MVT::v4i16 &&
1587       SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 1)) {
1588     if (isSExtLd)
1589       Opcode = isPre ? ARM::MVE_VLDRHS32_pre : ARM::MVE_VLDRHS32_post;
1590     else
1591       Opcode = isPre ? ARM::MVE_VLDRHU32_pre : ARM::MVE_VLDRHU32_post;
1592   } else if (LoadedVT == MVT::v8i8 &&
1593              SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 0)) {
1594     if (isSExtLd)
1595       Opcode = isPre ? ARM::MVE_VLDRBS16_pre : ARM::MVE_VLDRBS16_post;
1596     else
1597       Opcode = isPre ? ARM::MVE_VLDRBU16_pre : ARM::MVE_VLDRBU16_post;
1598   } else if (LoadedVT == MVT::v4i8 &&
1599              SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 0)) {
1600     if (isSExtLd)
1601       Opcode = isPre ? ARM::MVE_VLDRBS32_pre : ARM::MVE_VLDRBS32_post;
1602     else
1603       Opcode = isPre ? ARM::MVE_VLDRBU32_pre : ARM::MVE_VLDRBU32_post;
1604   } else if (Align >= 4 &&
1605              (IsLE || LoadedVT == MVT::v4i32 || LoadedVT == MVT::v4f32) &&
1606              SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 2))
1607     Opcode = isPre ? ARM::MVE_VLDRWU32_pre : ARM::MVE_VLDRWU32_post;
1608   else if (Align >= 2 &&
1609            (IsLE || LoadedVT == MVT::v8i16 || LoadedVT == MVT::v8f16) &&
1610            SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 1))
1611     Opcode = isPre ? ARM::MVE_VLDRHU16_pre : ARM::MVE_VLDRHU16_post;
1612   else if ((IsLE || LoadedVT == MVT::v16i8) &&
1613            SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 0))
1614     Opcode = isPre ? ARM::MVE_VLDRBU8_pre : ARM::MVE_VLDRBU8_post;
1615   else
1616     return false;
1617
1618   SDValue Chain = LD->getChain();
1619   SDValue Base = LD->getBasePtr();
1620   SDValue Ops[] = {Base, Offset,
1621                    CurDAG->getTargetConstant(ARMVCC::None, SDLoc(N), MVT::i32),
1622                    CurDAG->getRegister(0, MVT::i32), Chain};
1623   SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), LD->getValueType(0),
1624                                        MVT::i32, MVT::Other, Ops);
1625   transferMemOperands(N, New);
1626   ReplaceUses(SDValue(N, 0), SDValue(New, 1));
1627   ReplaceUses(SDValue(N, 1), SDValue(New, 0));
1628   ReplaceUses(SDValue(N, 2), SDValue(New, 2));
1629   CurDAG->RemoveDeadNode(N);
1630   return true;
1631 }
1632
1633 /// Form a GPRPair pseudo register from a pair of GPR regs.
1634 SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) {
1635   SDLoc dl(V0.getNode());
1636   SDValue RegClass =
1637     CurDAG->getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
1638   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
1639   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
1640   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1641   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1642 }
1643
1644 /// Form a D register from a pair of S registers.
1645 SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1646   SDLoc dl(V0.getNode());
1647   SDValue RegClass =
1648     CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, dl, MVT::i32);
1649   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1650   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1651   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1652   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1653 }
1654
1655 /// Form a quad register from a pair of D registers.
1656 SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1657   SDLoc dl(V0.getNode());
1658   SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, dl,
1659                                                MVT::i32);
1660   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1661   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1662   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1663   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1664 }
1665
1666 /// Form 4 consecutive D registers from a pair of Q registers.
1667 SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1668   SDLoc dl(V0.getNode());
1669   SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1670                                                MVT::i32);
1671   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1672   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1673   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1674   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1675 }
1676
1677 /// Form 4 consecutive S registers.
1678 SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1,
1679                                    SDValue V2, SDValue V3) {
1680   SDLoc dl(V0.getNode());
1681   SDValue RegClass =
1682     CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, dl, MVT::i32);
1683   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1684   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1685   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, dl, MVT::i32);
1686   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, dl, MVT::i32);
1687   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1688                                     V2, SubReg2, V3, SubReg3 };
1689   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1690 }
1691
1692 /// Form 4 consecutive D registers.
1693 SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1,
1694                                    SDValue V2, SDValue V3) {
1695   SDLoc dl(V0.getNode());
1696   SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1697                                                MVT::i32);
1698   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1699   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1700   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, dl, MVT::i32);
1701   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, dl, MVT::i32);
1702   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1703                                     V2, SubReg2, V3, SubReg3 };
1704   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1705 }
1706
1707 /// Form 4 consecutive Q registers.
1708 SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1,
1709                                    SDValue V2, SDValue V3) {
1710   SDLoc dl(V0.getNode());
1711   SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, dl,
1712                                                MVT::i32);
1713   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1714   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1715   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, dl, MVT::i32);
1716   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, dl, MVT::i32);
1717   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1718                                     V2, SubReg2, V3, SubReg3 };
1719   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1720 }
1721
1722 /// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
1723 /// of a NEON VLD or VST instruction.  The supported values depend on the
1724 /// number of registers being loaded.
1725 SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, const SDLoc &dl,
1726                                        unsigned NumVecs, bool is64BitVector) {
1727   unsigned NumRegs = NumVecs;
1728   if (!is64BitVector && NumVecs < 3)
1729     NumRegs *= 2;
1730
1731   unsigned Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
1732   if (Alignment >= 32 && NumRegs == 4)
1733     Alignment = 32;
1734   else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4))
1735     Alignment = 16;
1736   else if (Alignment >= 8)
1737     Alignment = 8;
1738   else
1739     Alignment = 0;
1740
1741   return CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
1742 }
1743
1744 static bool isVLDfixed(unsigned Opc)
1745 {
1746   switch (Opc) {
1747   default: return false;
1748   case ARM::VLD1d8wb_fixed : return true;
1749   case ARM::VLD1d16wb_fixed : return true;
1750   case ARM::VLD1d64Qwb_fixed : return true;
1751   case ARM::VLD1d32wb_fixed : return true;
1752   case ARM::VLD1d64wb_fixed : return true;
1753   case ARM::VLD1d64TPseudoWB_fixed : return true;
1754   case ARM::VLD1d64QPseudoWB_fixed : return true;
1755   case ARM::VLD1q8wb_fixed : return true;
1756   case ARM::VLD1q16wb_fixed : return true;
1757   case ARM::VLD1q32wb_fixed : return true;
1758   case ARM::VLD1q64wb_fixed : return true;
1759   case ARM::VLD1DUPd8wb_fixed : return true;
1760   case ARM::VLD1DUPd16wb_fixed : return true;
1761   case ARM::VLD1DUPd32wb_fixed : return true;
1762   case ARM::VLD1DUPq8wb_fixed : return true;
1763   case ARM::VLD1DUPq16wb_fixed : return true;
1764   case ARM::VLD1DUPq32wb_fixed : return true;
1765   case ARM::VLD2d8wb_fixed : return true;
1766   case ARM::VLD2d16wb_fixed : return true;
1767   case ARM::VLD2d32wb_fixed : return true;
1768   case ARM::VLD2q8PseudoWB_fixed : return true;
1769   case ARM::VLD2q16PseudoWB_fixed : return true;
1770   case ARM::VLD2q32PseudoWB_fixed : return true;
1771   case ARM::VLD2DUPd8wb_fixed : return true;
1772   case ARM::VLD2DUPd16wb_fixed : return true;
1773   case ARM::VLD2DUPd32wb_fixed : return true;
1774   }
1775 }
1776
1777 static bool isVSTfixed(unsigned Opc)
1778 {
1779   switch (Opc) {
1780   default: return false;
1781   case ARM::VST1d8wb_fixed : return true;
1782   case ARM::VST1d16wb_fixed : return true;
1783   case ARM::VST1d32wb_fixed : return true;
1784   case ARM::VST1d64wb_fixed : return true;
1785   case ARM::VST1q8wb_fixed : return true;
1786   case ARM::VST1q16wb_fixed : return true;
1787   case ARM::VST1q32wb_fixed : return true;
1788   case ARM::VST1q64wb_fixed : return true;
1789   case ARM::VST1d64TPseudoWB_fixed : return true;
1790   case ARM::VST1d64QPseudoWB_fixed : return true;
1791   case ARM::VST2d8wb_fixed : return true;
1792   case ARM::VST2d16wb_fixed : return true;
1793   case ARM::VST2d32wb_fixed : return true;
1794   case ARM::VST2q8PseudoWB_fixed : return true;
1795   case ARM::VST2q16PseudoWB_fixed : return true;
1796   case ARM::VST2q32PseudoWB_fixed : return true;
1797   }
1798 }
1799
1800 // Get the register stride update opcode of a VLD/VST instruction that
1801 // is otherwise equivalent to the given fixed stride updating instruction.
1802 static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
1803   assert((isVLDfixed(Opc) || isVSTfixed(Opc))
1804     && "Incorrect fixed stride updating instruction.");
1805   switch (Opc) {
1806   default: break;
1807   case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register;
1808   case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register;
1809   case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register;
1810   case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register;
1811   case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register;
1812   case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register;
1813   case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register;
1814   case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register;
1815   case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register;
1816   case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register;
1817   case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register;
1818   case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register;
1819   case ARM::VLD1DUPd8wb_fixed : return ARM::VLD1DUPd8wb_register;
1820   case ARM::VLD1DUPd16wb_fixed : return ARM::VLD1DUPd16wb_register;
1821   case ARM::VLD1DUPd32wb_fixed : return ARM::VLD1DUPd32wb_register;
1822   case ARM::VLD1DUPq8wb_fixed : return ARM::VLD1DUPq8wb_register;
1823   case ARM::VLD1DUPq16wb_fixed : return ARM::VLD1DUPq16wb_register;
1824   case ARM::VLD1DUPq32wb_fixed : return ARM::VLD1DUPq32wb_register;
1825
1826   case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register;
1827   case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register;
1828   case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register;
1829   case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register;
1830   case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register;
1831   case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register;
1832   case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register;
1833   case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register;
1834   case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register;
1835   case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register;
1836
1837   case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register;
1838   case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register;
1839   case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register;
1840   case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register;
1841   case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register;
1842   case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register;
1843
1844   case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register;
1845   case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register;
1846   case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register;
1847   case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register;
1848   case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register;
1849   case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register;
1850
1851   case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register;
1852   case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register;
1853   case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register;
1854   }
1855   return Opc; // If not one we handle, return it unchanged.
1856 }
1857
1858 /// Returns true if the given increment is a Constant known to be equal to the
1859 /// access size performed by a NEON load/store. This means the "[rN]!" form can
1860 /// be used.
1861 static bool isPerfectIncrement(SDValue Inc, EVT VecTy, unsigned NumVecs) {
1862   auto C = dyn_cast<ConstantSDNode>(Inc);
1863   return C && C->getZExtValue() == VecTy.getSizeInBits() / 8 * NumVecs;
1864 }
1865
1866 void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
1867                                 const uint16_t *DOpcodes,
1868                                 const uint16_t *QOpcodes0,
1869                                 const uint16_t *QOpcodes1) {
1870   assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
1871   SDLoc dl(N);
1872
1873   SDValue MemAddr, Align;
1874   bool IsIntrinsic = !isUpdating;  // By coincidence, all supported updating
1875                                    // nodes are not intrinsics.
1876   unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
1877   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
1878     return;
1879
1880   SDValue Chain = N->getOperand(0);
1881   EVT VT = N->getValueType(0);
1882   bool is64BitVector = VT.is64BitVector();
1883   Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
1884
1885   unsigned OpcodeIndex;
1886   switch (VT.getSimpleVT().SimpleTy) {
1887   default: llvm_unreachable("unhandled vld type");
1888     // Double-register operations:
1889   case MVT::v8i8:  OpcodeIndex = 0; break;
1890   case MVT::v4f16:
1891   case MVT::v4i16: OpcodeIndex = 1; break;
1892   case MVT::v2f32:
1893   case MVT::v2i32: OpcodeIndex = 2; break;
1894   case MVT::v1i64: OpcodeIndex = 3; break;
1895     // Quad-register operations:
1896   case MVT::v16i8: OpcodeIndex = 0; break;
1897   case MVT::v8f16:
1898   case MVT::v8i16: OpcodeIndex = 1; break;
1899   case MVT::v4f32:
1900   case MVT::v4i32: OpcodeIndex = 2; break;
1901   case MVT::v2f64:
1902   case MVT::v2i64: OpcodeIndex = 3; break;
1903   }
1904
1905   EVT ResTy;
1906   if (NumVecs == 1)
1907     ResTy = VT;
1908   else {
1909     unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
1910     if (!is64BitVector)
1911       ResTyElts *= 2;
1912     ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
1913   }
1914   std::vector<EVT> ResTys;
1915   ResTys.push_back(ResTy);
1916   if (isUpdating)
1917     ResTys.push_back(MVT::i32);
1918   ResTys.push_back(MVT::Other);
1919
1920   SDValue Pred = getAL(CurDAG, dl);
1921   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
1922   SDNode *VLd;
1923   SmallVector<SDValue, 7> Ops;
1924
1925   // Double registers and VLD1/VLD2 quad registers are directly supported.
1926   if (is64BitVector || NumVecs <= 2) {
1927     unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
1928                     QOpcodes0[OpcodeIndex]);
1929     Ops.push_back(MemAddr);
1930     Ops.push_back(Align);
1931     if (isUpdating) {
1932       SDValue Inc = N->getOperand(AddrOpIdx + 1);
1933       bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
1934       if (!IsImmUpdate) {
1935         // We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
1936         // check for the opcode rather than the number of vector elements.
1937         if (isVLDfixed(Opc))
1938           Opc = getVLDSTRegisterUpdateOpcode(Opc);
1939         Ops.push_back(Inc);
1940       // VLD1/VLD2 fixed increment does not need Reg0 so only include it in
1941       // the operands if not such an opcode.
1942       } else if (!isVLDfixed(Opc))
1943         Ops.push_back(Reg0);
1944     }
1945     Ops.push_back(Pred);
1946     Ops.push_back(Reg0);
1947     Ops.push_back(Chain);
1948     VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1949
1950   } else {
1951     // Otherwise, quad registers are loaded with two separate instructions,
1952     // where one loads the even registers and the other loads the odd registers.
1953     EVT AddrTy = MemAddr.getValueType();
1954
1955     // Load the even subregs.  This is always an updating load, so that it
1956     // provides the address to the second load for the odd subregs.
1957     SDValue ImplDef =
1958       SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
1959     const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
1960     SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
1961                                           ResTy, AddrTy, MVT::Other, OpsA);
1962     Chain = SDValue(VLdA, 2);
1963
1964     // Load the odd subregs.
1965     Ops.push_back(SDValue(VLdA, 1));
1966     Ops.push_back(Align);
1967     if (isUpdating) {
1968       SDValue Inc = N->getOperand(AddrOpIdx + 1);
1969       assert(isa<ConstantSDNode>(Inc.getNode()) &&
1970              "only constant post-increment update allowed for VLD3/4");
1971       (void)Inc;
1972       Ops.push_back(Reg0);
1973     }
1974     Ops.push_back(SDValue(VLdA, 0));
1975     Ops.push_back(Pred);
1976     Ops.push_back(Reg0);
1977     Ops.push_back(Chain);
1978     VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops);
1979   }
1980
1981   // Transfer memoperands.
1982   MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1983   CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLd), {MemOp});
1984
1985   if (NumVecs == 1) {
1986     ReplaceNode(N, VLd);
1987     return;
1988   }
1989
1990   // Extract out the subregisters.
1991   SDValue SuperReg = SDValue(VLd, 0);
1992   static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
1993                     ARM::qsub_3 == ARM::qsub_0 + 3,
1994                 "Unexpected subreg numbering");
1995   unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0);
1996   for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
1997     ReplaceUses(SDValue(N, Vec),
1998                 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
1999   ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
2000   if (isUpdating)
2001     ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2));
2002   CurDAG->RemoveDeadNode(N);
2003 }
2004
2005 void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
2006                                 const uint16_t *DOpcodes,
2007                                 const uint16_t *QOpcodes0,
2008                                 const uint16_t *QOpcodes1) {
2009   assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
2010   SDLoc dl(N);
2011
2012   SDValue MemAddr, Align;
2013   bool IsIntrinsic = !isUpdating;  // By coincidence, all supported updating
2014                                    // nodes are not intrinsics.
2015   unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2016   unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2017   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2018     return;
2019
2020   MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2021
2022   SDValue Chain = N->getOperand(0);
2023   EVT VT = N->getOperand(Vec0Idx).getValueType();
2024   bool is64BitVector = VT.is64BitVector();
2025   Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
2026
2027   unsigned OpcodeIndex;
2028   switch (VT.getSimpleVT().SimpleTy) {
2029   default: llvm_unreachable("unhandled vst type");
2030     // Double-register operations:
2031   case MVT::v8i8:  OpcodeIndex = 0; break;
2032   case MVT::v4f16:
2033   case MVT::v4i16: OpcodeIndex = 1; break;
2034   case MVT::v2f32:
2035   case MVT::v2i32: OpcodeIndex = 2; break;
2036   case MVT::v1i64: OpcodeIndex = 3; break;
2037     // Quad-register operations:
2038   case MVT::v16i8: OpcodeIndex = 0; break;
2039   case MVT::v8f16:
2040   case MVT::v8i16: OpcodeIndex = 1; break;
2041   case MVT::v4f32:
2042   case MVT::v4i32: OpcodeIndex = 2; break;
2043   case MVT::v2f64:
2044   case MVT::v2i64: OpcodeIndex = 3; break;
2045   }
2046
2047   std::vector<EVT> ResTys;
2048   if (isUpdating)
2049     ResTys.push_back(MVT::i32);
2050   ResTys.push_back(MVT::Other);
2051
2052   SDValue Pred = getAL(CurDAG, dl);
2053   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2054   SmallVector<SDValue, 7> Ops;
2055
2056   // Double registers and VST1/VST2 quad registers are directly supported.
2057   if (is64BitVector || NumVecs <= 2) {
2058     SDValue SrcReg;
2059     if (NumVecs == 1) {
2060       SrcReg = N->getOperand(Vec0Idx);
2061     } else if (is64BitVector) {
2062       // Form a REG_SEQUENCE to force register allocation.
2063       SDValue V0 = N->getOperand(Vec0Idx + 0);
2064       SDValue V1 = N->getOperand(Vec0Idx + 1);
2065       if (NumVecs == 2)
2066         SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2067       else {
2068         SDValue V2 = N->getOperand(Vec0Idx + 2);
2069         // If it's a vst3, form a quad D-register and leave the last part as
2070         // an undef.
2071         SDValue V3 = (NumVecs == 3)
2072           ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
2073           : N->getOperand(Vec0Idx + 3);
2074         SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2075       }
2076     } else {
2077       // Form a QQ register.
2078       SDValue Q0 = N->getOperand(Vec0Idx);
2079       SDValue Q1 = N->getOperand(Vec0Idx + 1);
2080       SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0);
2081     }
2082
2083     unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2084                     QOpcodes0[OpcodeIndex]);
2085     Ops.push_back(MemAddr);
2086     Ops.push_back(Align);
2087     if (isUpdating) {
2088       SDValue Inc = N->getOperand(AddrOpIdx + 1);
2089       bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
2090       if (!IsImmUpdate) {
2091         // We use a VST1 for v1i64 even if the pseudo says VST2/3/4, so
2092         // check for the opcode rather than the number of vector elements.
2093         if (isVSTfixed(Opc))
2094           Opc = getVLDSTRegisterUpdateOpcode(Opc);
2095         Ops.push_back(Inc);
2096       }
2097       // VST1/VST2 fixed increment does not need Reg0 so only include it in
2098       // the operands if not such an opcode.
2099       else if (!isVSTfixed(Opc))
2100         Ops.push_back(Reg0);
2101     }
2102     Ops.push_back(SrcReg);
2103     Ops.push_back(Pred);
2104     Ops.push_back(Reg0);
2105     Ops.push_back(Chain);
2106     SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2107
2108     // Transfer memoperands.
2109     CurDAG->setNodeMemRefs(cast<MachineSDNode>(VSt), {MemOp});
2110
2111     ReplaceNode(N, VSt);
2112     return;
2113   }
2114
2115   // Otherwise, quad registers are stored with two separate instructions,
2116   // where one stores the even registers and the other stores the odd registers.
2117
2118   // Form the QQQQ REG_SEQUENCE.
2119   SDValue V0 = N->getOperand(Vec0Idx + 0);
2120   SDValue V1 = N->getOperand(Vec0Idx + 1);
2121   SDValue V2 = N->getOperand(Vec0Idx + 2);
2122   SDValue V3 = (NumVecs == 3)
2123     ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2124     : N->getOperand(Vec0Idx + 3);
2125   SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2126
2127   // Store the even D registers.  This is always an updating store, so that it
2128   // provides the address to the second store for the odd subregs.
2129   const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain };
2130   SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
2131                                         MemAddr.getValueType(),
2132                                         MVT::Other, OpsA);
2133   CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStA), {MemOp});
2134   Chain = SDValue(VStA, 1);
2135
2136   // Store the odd D registers.
2137   Ops.push_back(SDValue(VStA, 0));
2138   Ops.push_back(Align);
2139   if (isUpdating) {
2140     SDValue Inc = N->getOperand(AddrOpIdx + 1);
2141     assert(isa<ConstantSDNode>(Inc.getNode()) &&
2142            "only constant post-increment update allowed for VST3/4");
2143     (void)Inc;
2144     Ops.push_back(Reg0);
2145   }
2146   Ops.push_back(RegSeq);
2147   Ops.push_back(Pred);
2148   Ops.push_back(Reg0);
2149   Ops.push_back(Chain);
2150   SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
2151                                         Ops);
2152   CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStB), {MemOp});
2153   ReplaceNode(N, VStB);
2154 }
2155
2156 void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
2157                                       unsigned NumVecs,
2158                                       const uint16_t *DOpcodes,
2159                                       const uint16_t *QOpcodes) {
2160   assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
2161   SDLoc dl(N);
2162
2163   SDValue MemAddr, Align;
2164   bool IsIntrinsic = !isUpdating;  // By coincidence, all supported updating
2165                                    // nodes are not intrinsics.
2166   unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2167   unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2168   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2169     return;
2170
2171   MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2172
2173   SDValue Chain = N->getOperand(0);
2174   unsigned Lane =
2175     cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue();
2176   EVT VT = N->getOperand(Vec0Idx).getValueType();
2177   bool is64BitVector = VT.is64BitVector();
2178
2179   unsigned Alignment = 0;
2180   if (NumVecs != 3) {
2181     Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
2182     unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2183     if (Alignment > NumBytes)
2184       Alignment = NumBytes;
2185     if (Alignment < 8 && Alignment < NumBytes)
2186       Alignment = 0;
2187     // Alignment must be a power of two; make sure of that.
2188     Alignment = (Alignment & -Alignment);
2189     if (Alignment == 1)
2190       Alignment = 0;
2191   }
2192   Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2193
2194   unsigned OpcodeIndex;
2195   switch (VT.getSimpleVT().SimpleTy) {
2196   default: llvm_unreachable("unhandled vld/vst lane type");
2197     // Double-register operations:
2198   case MVT::v8i8:  OpcodeIndex = 0; break;
2199   case MVT::v4f16:
2200   case MVT::v4i16: OpcodeIndex = 1; break;
2201   case MVT::v2f32:
2202   case MVT::v2i32: OpcodeIndex = 2; break;
2203     // Quad-register operations:
2204   case MVT::v8f16:
2205   case MVT::v8i16: OpcodeIndex = 0; break;
2206   case MVT::v4f32:
2207   case MVT::v4i32: OpcodeIndex = 1; break;
2208   }
2209
2210   std::vector<EVT> ResTys;
2211   if (IsLoad) {
2212     unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2213     if (!is64BitVector)
2214       ResTyElts *= 2;
2215     ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(),
2216                                       MVT::i64, ResTyElts));
2217   }
2218   if (isUpdating)
2219     ResTys.push_back(MVT::i32);
2220   ResTys.push_back(MVT::Other);
2221
2222   SDValue Pred = getAL(CurDAG, dl);
2223   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2224
2225   SmallVector<SDValue, 8> Ops;
2226   Ops.push_back(MemAddr);
2227   Ops.push_back(Align);
2228   if (isUpdating) {
2229     SDValue Inc = N->getOperand(AddrOpIdx + 1);
2230     bool IsImmUpdate =
2231         isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
2232     Ops.push_back(IsImmUpdate ? Reg0 : Inc);
2233   }
2234
2235   SDValue SuperReg;
2236   SDValue V0 = N->getOperand(Vec0Idx + 0);
2237   SDValue V1 = N->getOperand(Vec0Idx + 1);
2238   if (NumVecs == 2) {
2239     if (is64BitVector)
2240       SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2241     else
2242       SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0);
2243   } else {
2244     SDValue V2 = N->getOperand(Vec0Idx + 2);
2245     SDValue V3 = (NumVecs == 3)
2246       ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2247       : N->getOperand(Vec0Idx + 3);
2248     if (is64BitVector)
2249       SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2250     else
2251       SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2252   }
2253   Ops.push_back(SuperReg);
2254   Ops.push_back(getI32Imm(Lane, dl));
2255   Ops.push_back(Pred);
2256   Ops.push_back(Reg0);
2257   Ops.push_back(Chain);
2258
2259   unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2260                                   QOpcodes[OpcodeIndex]);
2261   SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2262   CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdLn), {MemOp});
2263   if (!IsLoad) {
2264     ReplaceNode(N, VLdLn);
2265     return;
2266   }
2267
2268   // Extract the subregisters.
2269   SuperReg = SDValue(VLdLn, 0);
2270   static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
2271                     ARM::qsub_3 == ARM::qsub_0 + 3,
2272                 "Unexpected subreg numbering");
2273   unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
2274   for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2275     ReplaceUses(SDValue(N, Vec),
2276                 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
2277   ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1));
2278   if (isUpdating)
2279     ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2));
2280   CurDAG->RemoveDeadNode(N);
2281 }
2282
2283 void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool IsIntrinsic,
2284                                    bool isUpdating, unsigned NumVecs,
2285                                    const uint16_t *DOpcodes,
2286                                    const uint16_t *QOpcodes0,
2287                                    const uint16_t *QOpcodes1) {
2288   assert(NumVecs >= 1 && NumVecs <= 4 && "VLDDup NumVecs out-of-range");
2289   SDLoc dl(N);
2290
2291   SDValue MemAddr, Align;
2292   unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2293   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2294     return;
2295
2296   SDValue Chain = N->getOperand(0);
2297   EVT VT = N->getValueType(0);
2298   bool is64BitVector = VT.is64BitVector();
2299
2300   unsigned Alignment = 0;
2301   if (NumVecs != 3) {
2302     Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
2303     unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2304     if (Alignment > NumBytes)
2305       Alignment = NumBytes;
2306     if (Alignment < 8 && Alignment < NumBytes)
2307       Alignment = 0;
2308     // Alignment must be a power of two; make sure of that.
2309     Alignment = (Alignment & -Alignment);
2310     if (Alignment == 1)
2311       Alignment = 0;
2312   }
2313   Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2314
2315   unsigned OpcodeIndex;
2316   switch (VT.getSimpleVT().SimpleTy) {
2317   default: llvm_unreachable("unhandled vld-dup type");
2318   case MVT::v8i8:
2319   case MVT::v16i8: OpcodeIndex = 0; break;
2320   case MVT::v4i16:
2321   case MVT::v8i16:
2322   case MVT::v4f16:
2323   case MVT::v8f16:
2324                   OpcodeIndex = 1; break;
2325   case MVT::v2f32:
2326   case MVT::v2i32:
2327   case MVT::v4f32:
2328   case MVT::v4i32: OpcodeIndex = 2; break;
2329   case MVT::v1f64:
2330   case MVT::v1i64: OpcodeIndex = 3; break;
2331   }
2332
2333   unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2334   if (!is64BitVector)
2335     ResTyElts *= 2;
2336   EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
2337
2338   std::vector<EVT> ResTys;
2339   ResTys.push_back(ResTy);
2340   if (isUpdating)
2341     ResTys.push_back(MVT::i32);
2342   ResTys.push_back(MVT::Other);
2343
2344   SDValue Pred = getAL(CurDAG, dl);
2345   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2346
2347   SDNode *VLdDup;
2348   if (is64BitVector || NumVecs == 1) {
2349     SmallVector<SDValue, 6> Ops;
2350     Ops.push_back(MemAddr);
2351     Ops.push_back(Align);
2352     unsigned Opc = is64BitVector ? DOpcodes[OpcodeIndex] :
2353                                    QOpcodes0[OpcodeIndex];
2354     if (isUpdating) {
2355       // fixed-stride update instructions don't have an explicit writeback
2356       // operand. It's implicit in the opcode itself.
2357       SDValue Inc = N->getOperand(2);
2358       bool IsImmUpdate =
2359           isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
2360       if (NumVecs <= 2 && !IsImmUpdate)
2361         Opc = getVLDSTRegisterUpdateOpcode(Opc);
2362       if (!IsImmUpdate)
2363         Ops.push_back(Inc);
2364       // FIXME: VLD3 and VLD4 haven't been updated to that form yet.
2365       else if (NumVecs > 2)
2366         Ops.push_back(Reg0);
2367     }
2368     Ops.push_back(Pred);
2369     Ops.push_back(Reg0);
2370     Ops.push_back(Chain);
2371     VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2372   } else if (NumVecs == 2) {
2373     const SDValue OpsA[] = { MemAddr, Align, Pred, Reg0, Chain };
2374     SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex],
2375                                           dl, ResTys, OpsA);
2376
2377     Chain = SDValue(VLdA, 1);
2378     const SDValue OpsB[] = { MemAddr, Align, Pred, Reg0, Chain };
2379     VLdDup = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, OpsB);
2380   } else {
2381     SDValue ImplDef =
2382       SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
2383     const SDValue OpsA[] = { MemAddr, Align, ImplDef, Pred, Reg0, Chain };
2384     SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex],
2385                                           dl, ResTys, OpsA);
2386
2387     SDValue SuperReg = SDValue(VLdA, 0);
2388     Chain = SDValue(VLdA, 1);
2389     const SDValue OpsB[] = { MemAddr, Align, SuperReg, Pred, Reg0, Chain };
2390     VLdDup = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, OpsB);
2391   }
2392
2393   // Transfer memoperands.
2394   MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2395   CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdDup), {MemOp});
2396
2397   // Extract the subregisters.
2398   if (NumVecs == 1) {
2399     ReplaceUses(SDValue(N, 0), SDValue(VLdDup, 0));
2400   } else {
2401     SDValue SuperReg = SDValue(VLdDup, 0);
2402     static_assert(ARM::dsub_7 == ARM::dsub_0 + 7, "Unexpected subreg numbering");
2403     unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
2404     for (unsigned Vec = 0; Vec != NumVecs; ++Vec) {
2405       ReplaceUses(SDValue(N, Vec),
2406                   CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg));
2407     }
2408   }
2409   ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1));
2410   if (isUpdating)
2411     ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2));
2412   CurDAG->RemoveDeadNode(N);
2413 }
2414
2415 bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) {
2416   if (!Subtarget->hasV6T2Ops())
2417     return false;
2418
2419   unsigned Opc = isSigned
2420     ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)
2421     : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX);
2422   SDLoc dl(N);
2423
2424   // For unsigned extracts, check for a shift right and mask
2425   unsigned And_imm = 0;
2426   if (N->getOpcode() == ISD::AND) {
2427     if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) {
2428
2429       // The immediate is a mask of the low bits iff imm & (imm+1) == 0
2430       if (And_imm & (And_imm + 1))
2431         return false;
2432
2433       unsigned Srl_imm = 0;
2434       if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL,
2435                                 Srl_imm)) {
2436         assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2437
2438         // Mask off the unnecessary bits of the AND immediate; normally
2439         // DAGCombine will do this, but that might not happen if
2440         // targetShrinkDemandedConstant chooses a different immediate.
2441         And_imm &= -1U >> Srl_imm;
2442
2443         // Note: The width operand is encoded as width-1.
2444         unsigned Width = countTrailingOnes(And_imm) - 1;
2445         unsigned LSB = Srl_imm;
2446
2447         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2448
2449         if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) {
2450           // It's cheaper to use a right shift to extract the top bits.
2451           if (Subtarget->isThumb()) {
2452             Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri;
2453             SDValue Ops[] = { N->getOperand(0).getOperand(0),
2454                               CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2455                               getAL(CurDAG, dl), Reg0, Reg0 };
2456             CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2457             return true;
2458           }
2459
2460           // ARM models shift instructions as MOVsi with shifter operand.
2461           ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(ISD::SRL);
2462           SDValue ShOpc =
2463             CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB), dl,
2464                                       MVT::i32);
2465           SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc,
2466                             getAL(CurDAG, dl), Reg0, Reg0 };
2467           CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops);
2468           return true;
2469         }
2470
2471         assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
2472         SDValue Ops[] = { N->getOperand(0).getOperand(0),
2473                           CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2474                           CurDAG->getTargetConstant(Width, dl, MVT::i32),
2475                           getAL(CurDAG, dl), Reg0 };
2476         CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2477         return true;
2478       }
2479     }
2480     return false;
2481   }
2482
2483   // Otherwise, we're looking for a shift of a shift
2484   unsigned Shl_imm = 0;
2485   if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) {
2486     assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!");
2487     unsigned Srl_imm = 0;
2488     if (isInt32Immediate(N->getOperand(1), Srl_imm)) {
2489       assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2490       // Note: The width operand is encoded as width-1.
2491       unsigned Width = 32 - Srl_imm - 1;
2492       int LSB = Srl_imm - Shl_imm;
2493       if (LSB < 0)
2494         return false;
2495       SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2496       assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
2497       SDValue Ops[] = { N->getOperand(0).getOperand(0),
2498                         CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2499                         CurDAG->getTargetConstant(Width, dl, MVT::i32),
2500                         getAL(CurDAG, dl), Reg0 };
2501       CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2502       return true;
2503     }
2504   }
2505
2506   // Or we are looking for a shift of an and, with a mask operand
2507   if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_imm) &&
2508       isShiftedMask_32(And_imm)) {
2509     unsigned Srl_imm = 0;
2510     unsigned LSB = countTrailingZeros(And_imm);
2511     // Shift must be the same as the ands lsb
2512     if (isInt32Immediate(N->getOperand(1), Srl_imm) && Srl_imm == LSB) {
2513       assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2514       unsigned MSB = 31 - countLeadingZeros(And_imm);
2515       // Note: The width operand is encoded as width-1.
2516       unsigned Width = MSB - LSB;
2517       SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2518       assert(Srl_imm + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
2519       SDValue Ops[] = { N->getOperand(0).getOperand(0),
2520                         CurDAG->getTargetConstant(Srl_imm, dl, MVT::i32),
2521                         CurDAG->getTargetConstant(Width, dl, MVT::i32),
2522                         getAL(CurDAG, dl), Reg0 };
2523       CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2524       return true;
2525     }
2526   }
2527
2528   if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) {
2529     unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2530     unsigned LSB = 0;
2531     if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, LSB) &&
2532         !isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRA, LSB))
2533       return false;
2534
2535     if (LSB + Width > 32)
2536       return false;
2537
2538     SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2539     assert(LSB + Width <= 32 && "Shouldn't create an invalid ubfx");
2540     SDValue Ops[] = { N->getOperand(0).getOperand(0),
2541                       CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2542                       CurDAG->getTargetConstant(Width - 1, dl, MVT::i32),
2543                       getAL(CurDAG, dl), Reg0 };
2544     CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2545     return true;
2546   }
2547
2548   return false;
2549 }
2550
2551 /// Target-specific DAG combining for ISD::XOR.
2552 /// Target-independent combining lowers SELECT_CC nodes of the form
2553 /// select_cc setg[ge] X,  0,  X, -X
2554 /// select_cc setgt    X, -1,  X, -X
2555 /// select_cc setl[te] X,  0, -X,  X
2556 /// select_cc setlt    X,  1, -X,  X
2557 /// which represent Integer ABS into:
2558 /// Y = sra (X, size(X)-1); xor (add (X, Y), Y)
2559 /// ARM instruction selection detects the latter and matches it to
2560 /// ARM::ABS or ARM::t2ABS machine node.
2561 bool ARMDAGToDAGISel::tryABSOp(SDNode *N){
2562   SDValue XORSrc0 = N->getOperand(0);
2563   SDValue XORSrc1 = N->getOperand(1);
2564   EVT VT = N->getValueType(0);
2565
2566   if (Subtarget->isThumb1Only())
2567     return false;
2568
2569   if (XORSrc0.getOpcode() != ISD::ADD || XORSrc1.getOpcode() != ISD::SRA)
2570     return false;
2571
2572   SDValue ADDSrc0 = XORSrc0.getOperand(0);
2573   SDValue ADDSrc1 = XORSrc0.getOperand(1);
2574   SDValue SRASrc0 = XORSrc1.getOperand(0);
2575   SDValue SRASrc1 = XORSrc1.getOperand(1);
2576   ConstantSDNode *SRAConstant =  dyn_cast<ConstantSDNode>(SRASrc1);
2577   EVT XType = SRASrc0.getValueType();
2578   unsigned Size = XType.getSizeInBits() - 1;
2579
2580   if (ADDSrc1 == XORSrc1 && ADDSrc0 == SRASrc0 &&
2581       XType.isInteger() && SRAConstant != nullptr &&
2582       Size == SRAConstant->getZExtValue()) {
2583     unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS;
2584     CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0);
2585     return true;
2586   }
2587
2588   return false;
2589 }
2590
2591 /// We've got special pseudo-instructions for these
2592 void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
2593   unsigned Opcode;
2594   EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
2595   if (MemTy == MVT::i8)
2596     Opcode = ARM::CMP_SWAP_8;
2597   else if (MemTy == MVT::i16)
2598     Opcode = ARM::CMP_SWAP_16;
2599   else if (MemTy == MVT::i32)
2600     Opcode = ARM::CMP_SWAP_32;
2601   else
2602     llvm_unreachable("Unknown AtomicCmpSwap type");
2603
2604   SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
2605                    N->getOperand(0)};
2606   SDNode *CmpSwap = CurDAG->getMachineNode(
2607       Opcode, SDLoc(N),
2608       CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other), Ops);
2609
2610   MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
2611   CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
2612
2613   ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
2614   ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
2615   CurDAG->RemoveDeadNode(N);
2616 }
2617
2618 static Optional<std::pair<unsigned, unsigned>>
2619 getContiguousRangeOfSetBits(const APInt &A) {
2620   unsigned FirstOne = A.getBitWidth() - A.countLeadingZeros() - 1;
2621   unsigned LastOne = A.countTrailingZeros();
2622   if (A.countPopulation() != (FirstOne - LastOne + 1))
2623     return Optional<std::pair<unsigned,unsigned>>();
2624   return std::make_pair(FirstOne, LastOne);
2625 }
2626
2627 void ARMDAGToDAGISel::SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI) {
2628   assert(N->getOpcode() == ARMISD::CMPZ);
2629   SwitchEQNEToPLMI = false;
2630
2631   if (!Subtarget->isThumb())
2632     // FIXME: Work out whether it is profitable to do this in A32 mode - LSL and
2633     // LSR don't exist as standalone instructions - they need the barrel shifter.
2634     return;
2635
2636   // select (cmpz (and X, C), #0) -> (LSLS X) or (LSRS X) or (LSRS (LSLS X))
2637   SDValue And = N->getOperand(0);
2638   if (!And->hasOneUse())
2639     return;
2640
2641   SDValue Zero = N->getOperand(1);
2642   if (!isa<ConstantSDNode>(Zero) || !cast<ConstantSDNode>(Zero)->isNullValue() ||
2643       And->getOpcode() != ISD::AND)
2644     return;
2645   SDValue X = And.getOperand(0);
2646   auto C = dyn_cast<ConstantSDNode>(And.getOperand(1));
2647
2648   if (!C)
2649     return;
2650   auto Range = getContiguousRangeOfSetBits(C->getAPIntValue());
2651   if (!Range)
2652     return;
2653
2654   // There are several ways to lower this:
2655   SDNode *NewN;
2656   SDLoc dl(N);
2657
2658   auto EmitShift = [&](unsigned Opc, SDValue Src, unsigned Imm) -> SDNode* {
2659     if (Subtarget->isThumb2()) {
2660       Opc = (Opc == ARM::tLSLri) ? ARM::t2LSLri : ARM::t2LSRri;
2661       SDValue Ops[] = { Src, CurDAG->getTargetConstant(Imm, dl, MVT::i32),
2662                         getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
2663                         CurDAG->getRegister(0, MVT::i32) };
2664       return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
2665     } else {
2666       SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), Src,
2667                        CurDAG->getTargetConstant(Imm, dl, MVT::i32),
2668                        getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
2669       return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
2670     }
2671   };
2672
2673   if (Range->second == 0) {
2674     //  1. Mask includes the LSB -> Simply shift the top N bits off
2675     NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
2676     ReplaceNode(And.getNode(), NewN);
2677   } else if (Range->first == 31) {
2678     //  2. Mask includes the MSB -> Simply shift the bottom N bits off
2679     NewN = EmitShift(ARM::tLSRri, X, Range->second);
2680     ReplaceNode(And.getNode(), NewN);
2681   } else if (Range->first == Range->second) {
2682     //  3. Only one bit is set. We can shift this into the sign bit and use a
2683     //     PL/MI comparison.
2684     NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
2685     ReplaceNode(And.getNode(), NewN);
2686
2687     SwitchEQNEToPLMI = true;
2688   } else if (!Subtarget->hasV6T2Ops()) {
2689     //  4. Do a double shift to clear bottom and top bits, but only in
2690     //     thumb-1 mode as in thumb-2 we can use UBFX.
2691     NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
2692     NewN = EmitShift(ARM::tLSRri, SDValue(NewN, 0),
2693                      Range->second + (31 - Range->first));
2694     ReplaceNode(And.getNode(), NewN);
2695   }
2696
2697 }
2698
2699 void ARMDAGToDAGISel::Select(SDNode *N) {
2700   SDLoc dl(N);
2701
2702   if (N->isMachineOpcode()) {
2703     N->setNodeId(-1);
2704     return;   // Already selected.
2705   }
2706
2707   switch (N->getOpcode()) {
2708   default: break;
2709   case ISD::STORE: {
2710     // For Thumb1, match an sp-relative store in C++. This is a little
2711     // unfortunate, but I don't think I can make the chain check work
2712     // otherwise.  (The chain of the store has to be the same as the chain
2713     // of the CopyFromReg, or else we can't replace the CopyFromReg with
2714     // a direct reference to "SP".)
2715     //
2716     // This is only necessary on Thumb1 because Thumb1 sp-relative stores use
2717     // a different addressing mode from other four-byte stores.
2718     //
2719     // This pattern usually comes up with call arguments.
2720     StoreSDNode *ST = cast<StoreSDNode>(N);
2721     SDValue Ptr = ST->getBasePtr();
2722     if (Subtarget->isThumb1Only() && ST->isUnindexed()) {
2723       int RHSC = 0;
2724       if (Ptr.getOpcode() == ISD::ADD &&
2725           isScaledConstantInRange(Ptr.getOperand(1), /*Scale=*/4, 0, 256, RHSC))
2726         Ptr = Ptr.getOperand(0);
2727
2728       if (Ptr.getOpcode() == ISD::CopyFromReg &&
2729           cast<RegisterSDNode>(Ptr.getOperand(1))->getReg() == ARM::SP &&
2730           Ptr.getOperand(0) == ST->getChain()) {
2731         SDValue Ops[] = {ST->getValue(),
2732                          CurDAG->getRegister(ARM::SP, MVT::i32),
2733                          CurDAG->getTargetConstant(RHSC, dl, MVT::i32),
2734                          getAL(CurDAG, dl),
2735                          CurDAG->getRegister(0, MVT::i32),
2736                          ST->getChain()};
2737         MachineSDNode *ResNode =
2738             CurDAG->getMachineNode(ARM::tSTRspi, dl, MVT::Other, Ops);
2739         MachineMemOperand *MemOp = ST->getMemOperand();
2740         CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp});
2741         ReplaceNode(N, ResNode);
2742         return;
2743       }
2744     }
2745     break;
2746   }
2747   case ISD::WRITE_REGISTER:
2748     if (tryWriteRegister(N))
2749       return;
2750     break;
2751   case ISD::READ_REGISTER:
2752     if (tryReadRegister(N))
2753       return;
2754     break;
2755   case ISD::INLINEASM:
2756   case ISD::INLINEASM_BR:
2757     if (tryInlineAsm(N))
2758       return;
2759     break;
2760   case ISD::XOR:
2761     // Select special operations if XOR node forms integer ABS pattern
2762     if (tryABSOp(N))
2763       return;
2764     // Other cases are autogenerated.
2765     break;
2766   case ISD::Constant: {
2767     unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
2768     // If we can't materialize the constant we need to use a literal pool
2769     if (ConstantMaterializationCost(Val, Subtarget) > 2) {
2770       SDValue CPIdx = CurDAG->getTargetConstantPool(
2771           ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val),
2772           TLI->getPointerTy(CurDAG->getDataLayout()));
2773
2774       SDNode *ResNode;
2775       if (Subtarget->isThumb()) {
2776         SDValue Ops[] = {
2777           CPIdx,
2778           getAL(CurDAG, dl),
2779           CurDAG->getRegister(0, MVT::i32),
2780           CurDAG->getEntryNode()
2781         };
2782         ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other,
2783                                          Ops);
2784       } else {
2785         SDValue Ops[] = {
2786           CPIdx,
2787           CurDAG->getTargetConstant(0, dl, MVT::i32),
2788           getAL(CurDAG, dl),
2789           CurDAG->getRegister(0, MVT::i32),
2790           CurDAG->getEntryNode()
2791         };
2792         ResNode = CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other,
2793                                          Ops);
2794       }
2795       // Annotate the Node with memory operand information so that MachineInstr
2796       // queries work properly. This e.g. gives the register allocation the
2797       // required information for rematerialization.
2798       MachineFunction& MF = CurDAG->getMachineFunction();
2799       MachineMemOperand *MemOp =
2800           MF.getMachineMemOperand(MachinePointerInfo::getConstantPool(MF),
2801                                   MachineMemOperand::MOLoad, 4, 4);
2802
2803       CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp});
2804
2805       ReplaceNode(N, ResNode);
2806       return;
2807     }
2808
2809     // Other cases are autogenerated.
2810     break;
2811   }
2812   case ISD::FrameIndex: {
2813     // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
2814     int FI = cast<FrameIndexSDNode>(N)->getIndex();
2815     SDValue TFI = CurDAG->getTargetFrameIndex(
2816         FI, TLI->getPointerTy(CurDAG->getDataLayout()));
2817     if (Subtarget->isThumb1Only()) {
2818       // Set the alignment of the frame object to 4, to avoid having to generate
2819       // more than one ADD
2820       MachineFrameInfo &MFI = MF->getFrameInfo();
2821       if (MFI.getObjectAlignment(FI) < 4)
2822         MFI.setObjectAlignment(FI, 4);
2823       CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI,
2824                            CurDAG->getTargetConstant(0, dl, MVT::i32));
2825       return;
2826     } else {
2827       unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ?
2828                       ARM::t2ADDri : ARM::ADDri);
2829       SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, dl, MVT::i32),
2830                         getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
2831                         CurDAG->getRegister(0, MVT::i32) };
2832       CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2833       return;
2834     }
2835   }
2836   case ISD::SRL:
2837     if (tryV6T2BitfieldExtractOp(N, false))
2838       return;
2839     break;
2840   case ISD::SIGN_EXTEND_INREG:
2841   case ISD::SRA:
2842     if (tryV6T2BitfieldExtractOp(N, true))
2843       return;
2844     break;
2845   case ISD::MUL:
2846     if (Subtarget->isThumb1Only())
2847       break;
2848     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
2849       unsigned RHSV = C->getZExtValue();
2850       if (!RHSV) break;
2851       if (isPowerOf2_32(RHSV-1)) {  // 2^n+1?
2852         unsigned ShImm = Log2_32(RHSV-1);
2853         if (ShImm >= 32)
2854           break;
2855         SDValue V = N->getOperand(0);
2856         ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
2857         SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
2858         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2859         if (Subtarget->isThumb()) {
2860           SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
2861           CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops);
2862           return;
2863         } else {
2864           SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
2865                             Reg0 };
2866           CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops);
2867           return;
2868         }
2869       }
2870       if (isPowerOf2_32(RHSV+1)) {  // 2^n-1?
2871         unsigned ShImm = Log2_32(RHSV+1);
2872         if (ShImm >= 32)
2873           break;
2874         SDValue V = N->getOperand(0);
2875         ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
2876         SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
2877         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2878         if (Subtarget->isThumb()) {
2879           SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
2880           CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops);
2881           return;
2882         } else {
2883           SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
2884                             Reg0 };
2885           CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops);
2886           return;
2887         }
2888       }
2889     }
2890     break;
2891   case ISD::AND: {
2892     // Check for unsigned bitfield extract
2893     if (tryV6T2BitfieldExtractOp(N, false))
2894       return;
2895
2896     // If an immediate is used in an AND node, it is possible that the immediate
2897     // can be more optimally materialized when negated. If this is the case we
2898     // can negate the immediate and use a BIC instead.
2899     auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
2900     if (N1C && N1C->hasOneUse() && Subtarget->isThumb()) {
2901       uint32_t Imm = (uint32_t) N1C->getZExtValue();
2902
2903       // In Thumb2 mode, an AND can take a 12-bit immediate. If this
2904       // immediate can be negated and fit in the immediate operand of
2905       // a t2BIC, don't do any manual transform here as this can be
2906       // handled by the generic ISel machinery.
2907       bool PreferImmediateEncoding =
2908         Subtarget->hasThumb2() && (is_t2_so_imm(Imm) || is_t2_so_imm_not(Imm));
2909       if (!PreferImmediateEncoding &&
2910           ConstantMaterializationCost(Imm, Subtarget) >
2911               ConstantMaterializationCost(~Imm, Subtarget)) {
2912         // The current immediate costs more to materialize than a negated
2913         // immediate, so negate the immediate and use a BIC.
2914         SDValue NewImm =
2915           CurDAG->getConstant(~N1C->getZExtValue(), dl, MVT::i32);
2916         // If the new constant didn't exist before, reposition it in the topological
2917         // ordering so it is just before N. Otherwise, don't touch its location.
2918         if (NewImm->getNodeId() == -1)
2919           CurDAG->RepositionNode(N->getIterator(), NewImm.getNode());
2920
2921         if (!Subtarget->hasThumb2()) {
2922           SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32),
2923                            N->getOperand(0), NewImm, getAL(CurDAG, dl),
2924                            CurDAG->getRegister(0, MVT::i32)};
2925           ReplaceNode(N, CurDAG->getMachineNode(ARM::tBIC, dl, MVT::i32, Ops));
2926           return;
2927         } else {
2928           SDValue Ops[] = {N->getOperand(0), NewImm, getAL(CurDAG, dl),
2929                            CurDAG->getRegister(0, MVT::i32),
2930                            CurDAG->getRegister(0, MVT::i32)};
2931           ReplaceNode(N,
2932                       CurDAG->getMachineNode(ARM::t2BICrr, dl, MVT::i32, Ops));
2933           return;
2934         }
2935       }
2936     }
2937
2938     // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits
2939     // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits
2940     // are entirely contributed by c2 and lower 16-bits are entirely contributed
2941     // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)).
2942     // Select it to: "movt x, ((c1 & 0xffff) >> 16)
2943     EVT VT = N->getValueType(0);
2944     if (VT != MVT::i32)
2945       break;
2946     unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2())
2947       ? ARM::t2MOVTi16
2948       : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0);
2949     if (!Opc)
2950       break;
2951     SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
2952     N1C = dyn_cast<ConstantSDNode>(N1);
2953     if (!N1C)
2954       break;
2955     if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) {
2956       SDValue N2 = N0.getOperand(1);
2957       ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
2958       if (!N2C)
2959         break;
2960       unsigned N1CVal = N1C->getZExtValue();
2961       unsigned N2CVal = N2C->getZExtValue();
2962       if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) &&
2963           (N1CVal & 0xffffU) == 0xffffU &&
2964           (N2CVal & 0xffffU) == 0x0U) {
2965         SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16,
2966                                                   dl, MVT::i32);
2967         SDValue Ops[] = { N0.getOperand(0), Imm16,
2968                           getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
2969         ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
2970         return;
2971       }
2972     }
2973
2974     break;
2975   }
2976   case ARMISD::UMAAL: {
2977     unsigned Opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL;
2978     SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
2979                       N->getOperand(2), N->getOperand(3),
2980                       getAL(CurDAG, dl),
2981                       CurDAG->getRegister(0, MVT::i32) };
2982     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::i32, Ops));
2983     return;
2984   }
2985   case ARMISD::UMLAL:{
2986     if (Subtarget->isThumb()) {
2987       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2988                         N->getOperand(3), getAL(CurDAG, dl),
2989                         CurDAG->getRegister(0, MVT::i32)};
2990       ReplaceNode(
2991           N, CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops));
2992       return;
2993     }else{
2994       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2995                         N->getOperand(3), getAL(CurDAG, dl),
2996                         CurDAG->getRegister(0, MVT::i32),
2997                         CurDAG->getRegister(0, MVT::i32) };
2998       ReplaceNode(N, CurDAG->getMachineNode(
2999                          Subtarget->hasV6Ops() ? ARM::UMLAL : ARM::UMLALv5, dl,
3000                          MVT::i32, MVT::i32, Ops));
3001       return;
3002     }
3003   }
3004   case ARMISD::SMLAL:{
3005     if (Subtarget->isThumb()) {
3006       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3007                         N->getOperand(3), getAL(CurDAG, dl),
3008                         CurDAG->getRegister(0, MVT::i32)};
3009       ReplaceNode(
3010           N, CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops));
3011       return;
3012     }else{
3013       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3014                         N->getOperand(3), getAL(CurDAG, dl),
3015                         CurDAG->getRegister(0, MVT::i32),
3016                         CurDAG->getRegister(0, MVT::i32) };
3017       ReplaceNode(N, CurDAG->getMachineNode(
3018                          Subtarget->hasV6Ops() ? ARM::SMLAL : ARM::SMLALv5, dl,
3019                          MVT::i32, MVT::i32, Ops));
3020       return;
3021     }
3022   }
3023   case ARMISD::SUBE: {
3024     if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
3025       break;
3026     // Look for a pattern to match SMMLS
3027     // (sube a, (smul_loHi a, b), (subc 0, (smul_LOhi(a, b))))
3028     if (N->getOperand(1).getOpcode() != ISD::SMUL_LOHI ||
3029         N->getOperand(2).getOpcode() != ARMISD::SUBC ||
3030         !SDValue(N, 1).use_empty())
3031       break;
3032
3033     if (Subtarget->isThumb())
3034       assert(Subtarget->hasThumb2() &&
3035              "This pattern should not be generated for Thumb");
3036
3037     SDValue SmulLoHi = N->getOperand(1);
3038     SDValue Subc = N->getOperand(2);
3039     auto *Zero = dyn_cast<ConstantSDNode>(Subc.getOperand(0));
3040
3041     if (!Zero || Zero->getZExtValue() != 0 ||
3042         Subc.getOperand(1) != SmulLoHi.getValue(0) ||
3043         N->getOperand(1) != SmulLoHi.getValue(1) ||
3044         N->getOperand(2) != Subc.getValue(1))
3045       break;
3046
3047     unsigned Opc = Subtarget->isThumb2() ? ARM::t2SMMLS : ARM::SMMLS;
3048     SDValue Ops[] = { SmulLoHi.getOperand(0), SmulLoHi.getOperand(1),
3049                       N->getOperand(0), getAL(CurDAG, dl),
3050                       CurDAG->getRegister(0, MVT::i32) };
3051     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops));
3052     return;
3053   }
3054   case ISD::LOAD: {
3055     if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N))
3056       return;
3057     if (Subtarget->isThumb() && Subtarget->hasThumb2()) {
3058       if (tryT2IndexedLoad(N))
3059         return;
3060     } else if (Subtarget->isThumb()) {
3061       if (tryT1IndexedLoad(N))
3062         return;
3063     } else if (tryARMIndexedLoad(N))
3064       return;
3065     // Other cases are autogenerated.
3066     break;
3067   }
3068   case ARMISD::WLS:
3069   case ARMISD::LE: {
3070     SDValue Ops[] = { N->getOperand(1),
3071                       N->getOperand(2),
3072                       N->getOperand(0) };
3073     unsigned Opc = N->getOpcode() == ARMISD::WLS ?
3074       ARM::t2WhileLoopStart : ARM::t2LoopEnd;
3075     SDNode *New = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
3076     ReplaceUses(N, New);
3077     CurDAG->RemoveDeadNode(N);
3078     return;
3079   }
3080   case ARMISD::LOOP_DEC: {
3081     SDValue Ops[] = { N->getOperand(1),
3082                       N->getOperand(2),
3083                       N->getOperand(0) };
3084     SDNode *Dec =
3085       CurDAG->getMachineNode(ARM::t2LoopDec, dl,
3086                              CurDAG->getVTList(MVT::i32, MVT::Other), Ops);
3087     ReplaceUses(N, Dec);
3088     CurDAG->RemoveDeadNode(N);
3089     return;
3090   }
3091   case ARMISD::BRCOND: {
3092     // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
3093     // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)
3094     // Pattern complexity = 6  cost = 1  size = 0
3095
3096     // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
3097     // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc)
3098     // Pattern complexity = 6  cost = 1  size = 0
3099
3100     // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
3101     // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc)
3102     // Pattern complexity = 6  cost = 1  size = 0
3103
3104     unsigned Opc = Subtarget->isThumb() ?
3105       ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc;
3106     SDValue Chain = N->getOperand(0);
3107     SDValue N1 = N->getOperand(1);
3108     SDValue N2 = N->getOperand(2);
3109     SDValue N3 = N->getOperand(3);
3110     SDValue InFlag = N->getOperand(4);
3111     assert(N1.getOpcode() == ISD::BasicBlock);
3112     assert(N2.getOpcode() == ISD::Constant);
3113     assert(N3.getOpcode() == ISD::Register);
3114
3115     unsigned CC = (unsigned) cast<ConstantSDNode>(N2)->getZExtValue();
3116
3117     if (InFlag.getOpcode() == ARMISD::CMPZ) {
3118       if (InFlag.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN) {
3119         SDValue Int = InFlag.getOperand(0);
3120         uint64_t ID = cast<ConstantSDNode>(Int->getOperand(1))->getZExtValue();
3121
3122         // Handle low-overhead loops.
3123         if (ID == Intrinsic::loop_decrement_reg) {
3124           SDValue Elements = Int.getOperand(2);
3125           SDValue Size = CurDAG->getTargetConstant(
3126             cast<ConstantSDNode>(Int.getOperand(3))->getZExtValue(), dl,
3127                                  MVT::i32);
3128
3129           SDValue Args[] = { Elements, Size, Int.getOperand(0) };
3130           SDNode *LoopDec =
3131             CurDAG->getMachineNode(ARM::t2LoopDec, dl,
3132                                    CurDAG->getVTList(MVT::i32, MVT::Other),
3133                                    Args);
3134           ReplaceUses(Int.getNode(), LoopDec);
3135
3136           SDValue EndArgs[] = { SDValue(LoopDec, 0), N1, Chain };
3137           SDNode *LoopEnd =
3138             CurDAG->getMachineNode(ARM::t2LoopEnd, dl, MVT::Other, EndArgs);
3139
3140           ReplaceUses(N, LoopEnd);
3141           CurDAG->RemoveDeadNode(N);
3142           CurDAG->RemoveDeadNode(InFlag.getNode());
3143           CurDAG->RemoveDeadNode(Int.getNode());
3144           return;
3145         }
3146       }
3147
3148       bool SwitchEQNEToPLMI;
3149       SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI);
3150       InFlag = N->getOperand(4);
3151
3152       if (SwitchEQNEToPLMI) {
3153         switch ((ARMCC::CondCodes)CC) {
3154         default: llvm_unreachable("CMPZ must be either NE or EQ!");
3155         case ARMCC::NE:
3156           CC = (unsigned)ARMCC::MI;
3157           break;
3158         case ARMCC::EQ:
3159           CC = (unsigned)ARMCC::PL;
3160           break;
3161         }
3162       }
3163     }
3164
3165     SDValue Tmp2 = CurDAG->getTargetConstant(CC, dl, MVT::i32);
3166     SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag };
3167     SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
3168                                              MVT::Glue, Ops);
3169     Chain = SDValue(ResNode, 0);
3170     if (N->getNumValues() == 2) {
3171       InFlag = SDValue(ResNode, 1);
3172       ReplaceUses(SDValue(N, 1), InFlag);
3173     }
3174     ReplaceUses(SDValue(N, 0),
3175                 SDValue(Chain.getNode(), Chain.getResNo()));
3176     CurDAG->RemoveDeadNode(N);
3177     return;
3178   }
3179
3180   case ARMISD::CMPZ: {
3181     // select (CMPZ X, #-C) -> (CMPZ (ADDS X, #C), #0)
3182     //   This allows us to avoid materializing the expensive negative constant.
3183     //   The CMPZ #0 is useless and will be peepholed away but we need to keep it
3184     //   for its glue output.
3185     SDValue X = N->getOperand(0);
3186     auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1).getNode());
3187     if (C && C->getSExtValue() < 0 && Subtarget->isThumb()) {
3188       int64_t Addend = -C->getSExtValue();
3189
3190       SDNode *Add = nullptr;
3191       // ADDS can be better than CMN if the immediate fits in a
3192       // 16-bit ADDS, which means either [0,256) for tADDi8 or [0,8) for tADDi3.
3193       // Outside that range we can just use a CMN which is 32-bit but has a
3194       // 12-bit immediate range.
3195       if (Addend < 1<<8) {
3196         if (Subtarget->isThumb2()) {
3197           SDValue Ops[] = { X, CurDAG->getTargetConstant(Addend, dl, MVT::i32),
3198                             getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
3199                             CurDAG->getRegister(0, MVT::i32) };
3200           Add = CurDAG->getMachineNode(ARM::t2ADDri, dl, MVT::i32, Ops);
3201         } else {
3202           unsigned Opc = (Addend < 1<<3) ? ARM::tADDi3 : ARM::tADDi8;
3203           SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X,
3204                            CurDAG->getTargetConstant(Addend, dl, MVT::i32),
3205                            getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
3206           Add = CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
3207         }
3208       }
3209       if (Add) {
3210         SDValue Ops2[] = {SDValue(Add, 0), CurDAG->getConstant(0, dl, MVT::i32)};
3211         CurDAG->MorphNodeTo(N, ARMISD::CMPZ, CurDAG->getVTList(MVT::Glue), Ops2);
3212       }
3213     }
3214     // Other cases are autogenerated.
3215     break;
3216   }
3217
3218   case ARMISD::CMOV: {
3219     SDValue InFlag = N->getOperand(4);
3220
3221     if (InFlag.getOpcode() == ARMISD::CMPZ) {
3222       bool SwitchEQNEToPLMI;
3223       SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI);
3224
3225       if (SwitchEQNEToPLMI) {
3226         SDValue ARMcc = N->getOperand(2);
3227         ARMCC::CondCodes CC =
3228           (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue();
3229
3230         switch (CC) {
3231         default: llvm_unreachable("CMPZ must be either NE or EQ!");
3232         case ARMCC::NE:
3233           CC = ARMCC::MI;
3234           break;
3235         case ARMCC::EQ:
3236           CC = ARMCC::PL;
3237           break;
3238         }
3239         SDValue NewARMcc = CurDAG->getConstant((unsigned)CC, dl, MVT::i32);
3240         SDValue Ops[] = {N->getOperand(0), N->getOperand(1), NewARMcc,
3241                          N->getOperand(3), N->getOperand(4)};
3242         CurDAG->MorphNodeTo(N, ARMISD::CMOV, N->getVTList(), Ops);
3243       }
3244
3245     }
3246     // Other cases are autogenerated.
3247     break;
3248   }
3249
3250   case ARMISD::VZIP: {
3251     unsigned Opc = 0;
3252     EVT VT = N->getValueType(0);
3253     switch (VT.getSimpleVT().SimpleTy) {
3254     default: return;
3255     case MVT::v8i8:  Opc = ARM::VZIPd8; break;
3256     case MVT::v4f16:
3257     case MVT::v4i16: Opc = ARM::VZIPd16; break;
3258     case MVT::v2f32:
3259     // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
3260     case MVT::v2i32: Opc = ARM::VTRNd32; break;
3261     case MVT::v16i8: Opc = ARM::VZIPq8; break;
3262     case MVT::v8f16:
3263     case MVT::v8i16: Opc = ARM::VZIPq16; break;
3264     case MVT::v4f32:
3265     case MVT::v4i32: Opc = ARM::VZIPq32; break;
3266     }
3267     SDValue Pred = getAL(CurDAG, dl);
3268     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3269     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3270     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3271     return;
3272   }
3273   case ARMISD::VUZP: {
3274     unsigned Opc = 0;
3275     EVT VT = N->getValueType(0);
3276     switch (VT.getSimpleVT().SimpleTy) {
3277     default: return;
3278     case MVT::v8i8:  Opc = ARM::VUZPd8; break;
3279     case MVT::v4f16:
3280     case MVT::v4i16: Opc = ARM::VUZPd16; break;
3281     case MVT::v2f32:
3282     // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
3283     case MVT::v2i32: Opc = ARM::VTRNd32; break;
3284     case MVT::v16i8: Opc = ARM::VUZPq8; break;
3285     case MVT::v8f16:
3286     case MVT::v8i16: Opc = ARM::VUZPq16; break;
3287     case MVT::v4f32:
3288     case MVT::v4i32: Opc = ARM::VUZPq32; break;
3289     }
3290     SDValue Pred = getAL(CurDAG, dl);
3291     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3292     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3293     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3294     return;
3295   }
3296   case ARMISD::VTRN: {
3297     unsigned Opc = 0;
3298     EVT VT = N->getValueType(0);
3299     switch (VT.getSimpleVT().SimpleTy) {
3300     default: return;
3301     case MVT::v8i8:  Opc = ARM::VTRNd8; break;
3302     case MVT::v4f16:
3303     case MVT::v4i16: Opc = ARM::VTRNd16; break;
3304     case MVT::v2f32:
3305     case MVT::v2i32: Opc = ARM::VTRNd32; break;
3306     case MVT::v16i8: Opc = ARM::VTRNq8; break;
3307     case MVT::v8f16:
3308     case MVT::v8i16: Opc = ARM::VTRNq16; break;
3309     case MVT::v4f32:
3310     case MVT::v4i32: Opc = ARM::VTRNq32; break;
3311     }
3312     SDValue Pred = getAL(CurDAG, dl);
3313     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3314     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3315     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3316     return;
3317   }
3318   case ARMISD::BUILD_VECTOR: {
3319     EVT VecVT = N->getValueType(0);
3320     EVT EltVT = VecVT.getVectorElementType();
3321     unsigned NumElts = VecVT.getVectorNumElements();
3322     if (EltVT == MVT::f64) {
3323       assert(NumElts == 2 && "unexpected type for BUILD_VECTOR");
3324       ReplaceNode(
3325           N, createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
3326       return;
3327     }
3328     assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR");
3329     if (NumElts == 2) {
3330       ReplaceNode(
3331           N, createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
3332       return;
3333     }
3334     assert(NumElts == 4 && "unexpected type for BUILD_VECTOR");
3335     ReplaceNode(N,
3336                 createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1),
3337                                     N->getOperand(2), N->getOperand(3)));
3338     return;
3339   }
3340
3341   case ARMISD::VLD1DUP: {
3342     static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8, ARM::VLD1DUPd16,
3343                                          ARM::VLD1DUPd32 };
3344     static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8, ARM::VLD1DUPq16,
3345                                          ARM::VLD1DUPq32 };
3346     SelectVLDDup(N, /* IsIntrinsic= */ false, false, 1, DOpcodes, QOpcodes);
3347     return;
3348   }
3349
3350   case ARMISD::VLD2DUP: {
3351     static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
3352                                         ARM::VLD2DUPd32 };
3353     SelectVLDDup(N, /* IsIntrinsic= */ false, false, 2, Opcodes);
3354     return;
3355   }
3356
3357   case ARMISD::VLD3DUP: {
3358     static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo,
3359                                         ARM::VLD3DUPd16Pseudo,
3360                                         ARM::VLD3DUPd32Pseudo };
3361     SelectVLDDup(N, /* IsIntrinsic= */ false, false, 3, Opcodes);
3362     return;
3363   }
3364
3365   case ARMISD::VLD4DUP: {
3366     static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo,
3367                                         ARM::VLD4DUPd16Pseudo,
3368                                         ARM::VLD4DUPd32Pseudo };
3369     SelectVLDDup(N, /* IsIntrinsic= */ false, false, 4, Opcodes);
3370     return;
3371   }
3372
3373   case ARMISD::VLD1DUP_UPD: {
3374     static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8wb_fixed,
3375                                          ARM::VLD1DUPd16wb_fixed,
3376                                          ARM::VLD1DUPd32wb_fixed };
3377     static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8wb_fixed,
3378                                          ARM::VLD1DUPq16wb_fixed,
3379                                          ARM::VLD1DUPq32wb_fixed };
3380     SelectVLDDup(N, /* IsIntrinsic= */ false, true, 1, DOpcodes, QOpcodes);
3381     return;
3382   }
3383
3384   case ARMISD::VLD2DUP_UPD: {
3385     static const uint16_t Opcodes[] = { ARM::VLD2DUPd8wb_fixed,
3386                                         ARM::VLD2DUPd16wb_fixed,
3387                                         ARM::VLD2DUPd32wb_fixed };
3388     SelectVLDDup(N, /* IsIntrinsic= */ false, true, 2, Opcodes);
3389     return;
3390   }
3391
3392   case ARMISD::VLD3DUP_UPD: {
3393     static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo_UPD,
3394                                         ARM::VLD3DUPd16Pseudo_UPD,
3395                                         ARM::VLD3DUPd32Pseudo_UPD };
3396     SelectVLDDup(N, /* IsIntrinsic= */ false, true, 3, Opcodes);
3397     return;
3398   }
3399
3400   case ARMISD::VLD4DUP_UPD: {
3401     static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo_UPD,
3402                                         ARM::VLD4DUPd16Pseudo_UPD,
3403                                         ARM::VLD4DUPd32Pseudo_UPD };
3404     SelectVLDDup(N, /* IsIntrinsic= */ false, true, 4, Opcodes);
3405     return;
3406   }
3407
3408   case ARMISD::VLD1_UPD: {
3409     static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed,
3410                                          ARM::VLD1d16wb_fixed,
3411                                          ARM::VLD1d32wb_fixed,
3412                                          ARM::VLD1d64wb_fixed };
3413     static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed,
3414                                          ARM::VLD1q16wb_fixed,
3415                                          ARM::VLD1q32wb_fixed,
3416                                          ARM::VLD1q64wb_fixed };
3417     SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr);
3418     return;
3419   }
3420
3421   case ARMISD::VLD2_UPD: {
3422     static const uint16_t DOpcodes[] = { ARM::VLD2d8wb_fixed,
3423                                          ARM::VLD2d16wb_fixed,
3424                                          ARM::VLD2d32wb_fixed,
3425                                          ARM::VLD1q64wb_fixed};
3426     static const uint16_t QOpcodes[] = { ARM::VLD2q8PseudoWB_fixed,
3427                                          ARM::VLD2q16PseudoWB_fixed,
3428                                          ARM::VLD2q32PseudoWB_fixed };
3429     SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr);
3430     return;
3431   }
3432
3433   case ARMISD::VLD3_UPD: {
3434     static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD,
3435                                          ARM::VLD3d16Pseudo_UPD,
3436                                          ARM::VLD3d32Pseudo_UPD,
3437                                          ARM::VLD1d64TPseudoWB_fixed};
3438     static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
3439                                           ARM::VLD3q16Pseudo_UPD,
3440                                           ARM::VLD3q32Pseudo_UPD };
3441     static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
3442                                           ARM::VLD3q16oddPseudo_UPD,
3443                                           ARM::VLD3q32oddPseudo_UPD };
3444     SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
3445     return;
3446   }
3447
3448   case ARMISD::VLD4_UPD: {
3449     static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo_UPD,
3450                                          ARM::VLD4d16Pseudo_UPD,
3451                                          ARM::VLD4d32Pseudo_UPD,
3452                                          ARM::VLD1d64QPseudoWB_fixed};
3453     static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
3454                                           ARM::VLD4q16Pseudo_UPD,
3455                                           ARM::VLD4q32Pseudo_UPD };
3456     static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo_UPD,
3457                                           ARM::VLD4q16oddPseudo_UPD,
3458                                           ARM::VLD4q32oddPseudo_UPD };
3459     SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
3460     return;
3461   }
3462
3463   case ARMISD::VLD2LN_UPD: {
3464     static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD,
3465                                          ARM::VLD2LNd16Pseudo_UPD,
3466                                          ARM::VLD2LNd32Pseudo_UPD };
3467     static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD,
3468                                          ARM::VLD2LNq32Pseudo_UPD };
3469     SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes);
3470     return;
3471   }
3472
3473   case ARMISD::VLD3LN_UPD: {
3474     static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD,
3475                                          ARM::VLD3LNd16Pseudo_UPD,
3476                                          ARM::VLD3LNd32Pseudo_UPD };
3477     static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD,
3478                                          ARM::VLD3LNq32Pseudo_UPD };
3479     SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes);
3480     return;
3481   }
3482
3483   case ARMISD::VLD4LN_UPD: {
3484     static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD,
3485                                          ARM::VLD4LNd16Pseudo_UPD,
3486                                          ARM::VLD4LNd32Pseudo_UPD };
3487     static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD,
3488                                          ARM::VLD4LNq32Pseudo_UPD };
3489     SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes);
3490     return;
3491   }
3492
3493   case ARMISD::VST1_UPD: {
3494     static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed,
3495                                          ARM::VST1d16wb_fixed,
3496                                          ARM::VST1d32wb_fixed,
3497                                          ARM::VST1d64wb_fixed };
3498     static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed,
3499                                          ARM::VST1q16wb_fixed,
3500                                          ARM::VST1q32wb_fixed,
3501                                          ARM::VST1q64wb_fixed };
3502     SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr);
3503     return;
3504   }
3505
3506   case ARMISD::VST2_UPD: {
3507     static const uint16_t DOpcodes[] = { ARM::VST2d8wb_fixed,
3508                                          ARM::VST2d16wb_fixed,
3509                                          ARM::VST2d32wb_fixed,
3510                                          ARM::VST1q64wb_fixed};
3511     static const uint16_t QOpcodes[] = { ARM::VST2q8PseudoWB_fixed,
3512                                          ARM::VST2q16PseudoWB_fixed,
3513                                          ARM::VST2q32PseudoWB_fixed };
3514     SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr);
3515     return;
3516   }
3517
3518   case ARMISD::VST3_UPD: {
3519     static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD,
3520                                          ARM::VST3d16Pseudo_UPD,
3521                                          ARM::VST3d32Pseudo_UPD,
3522                                          ARM::VST1d64TPseudoWB_fixed};
3523     static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
3524                                           ARM::VST3q16Pseudo_UPD,
3525                                           ARM::VST3q32Pseudo_UPD };
3526     static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
3527                                           ARM::VST3q16oddPseudo_UPD,
3528                                           ARM::VST3q32oddPseudo_UPD };
3529     SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
3530     return;
3531   }
3532
3533   case ARMISD::VST4_UPD: {
3534     static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo_UPD,
3535                                          ARM::VST4d16Pseudo_UPD,
3536                                          ARM::VST4d32Pseudo_UPD,
3537                                          ARM::VST1d64QPseudoWB_fixed};
3538     static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
3539                                           ARM::VST4q16Pseudo_UPD,
3540                                           ARM::VST4q32Pseudo_UPD };
3541     static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD,
3542                                           ARM::VST4q16oddPseudo_UPD,
3543                                           ARM::VST4q32oddPseudo_UPD };
3544     SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
3545     return;
3546   }
3547
3548   case ARMISD::VST2LN_UPD: {
3549     static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD,
3550                                          ARM::VST2LNd16Pseudo_UPD,
3551                                          ARM::VST2LNd32Pseudo_UPD };
3552     static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD,
3553                                          ARM::VST2LNq32Pseudo_UPD };
3554     SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes);
3555     return;
3556   }
3557
3558   case ARMISD::VST3LN_UPD: {
3559     static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD,
3560                                          ARM::VST3LNd16Pseudo_UPD,
3561                                          ARM::VST3LNd32Pseudo_UPD };
3562     static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD,
3563                                          ARM::VST3LNq32Pseudo_UPD };
3564     SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes);
3565     return;
3566   }
3567
3568   case ARMISD::VST4LN_UPD: {
3569     static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD,
3570                                          ARM::VST4LNd16Pseudo_UPD,
3571                                          ARM::VST4LNd32Pseudo_UPD };
3572     static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD,
3573                                          ARM::VST4LNq32Pseudo_UPD };
3574     SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes);
3575     return;
3576   }
3577
3578   case ISD::INTRINSIC_VOID:
3579   case ISD::INTRINSIC_W_CHAIN: {
3580     unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
3581     switch (IntNo) {
3582     default:
3583       break;
3584
3585     case Intrinsic::arm_mrrc:
3586     case Intrinsic::arm_mrrc2: {
3587       SDLoc dl(N);
3588       SDValue Chain = N->getOperand(0);
3589       unsigned Opc;
3590
3591       if (Subtarget->isThumb())
3592         Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::t2MRRC : ARM::t2MRRC2);
3593       else
3594         Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::MRRC : ARM::MRRC2);
3595
3596       SmallVector<SDValue, 5> Ops;
3597       Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(), dl)); /* coproc */
3598       Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(), dl)); /* opc */
3599       Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(4))->getZExtValue(), dl)); /* CRm */
3600
3601       // The mrrc2 instruction in ARM doesn't allow predicates, the top 4 bits of the encoded
3602       // instruction will always be '1111' but it is possible in assembly language to specify
3603       // AL as a predicate to mrrc2 but it doesn't make any difference to the encoded instruction.
3604       if (Opc != ARM::MRRC2) {
3605         Ops.push_back(getAL(CurDAG, dl));
3606         Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3607       }
3608
3609       Ops.push_back(Chain);
3610
3611       // Writes to two registers.
3612       const EVT RetType[] = {MVT::i32, MVT::i32, MVT::Other};
3613
3614       ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, RetType, Ops));
3615       return;
3616     }
3617     case Intrinsic::arm_ldaexd:
3618     case Intrinsic::arm_ldrexd: {
3619       SDLoc dl(N);
3620       SDValue Chain = N->getOperand(0);
3621       SDValue MemAddr = N->getOperand(2);
3622       bool isThumb = Subtarget->isThumb() && Subtarget->hasV8MBaselineOps();
3623
3624       bool IsAcquire = IntNo == Intrinsic::arm_ldaexd;
3625       unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD)
3626                                 : (IsAcquire ? ARM::LDAEXD : ARM::LDREXD);
3627
3628       // arm_ldrexd returns a i64 value in {i32, i32}
3629       std::vector<EVT> ResTys;
3630       if (isThumb) {
3631         ResTys.push_back(MVT::i32);
3632         ResTys.push_back(MVT::i32);
3633       } else
3634         ResTys.push_back(MVT::Untyped);
3635       ResTys.push_back(MVT::Other);
3636
3637       // Place arguments in the right order.
3638       SDValue Ops[] = {MemAddr, getAL(CurDAG, dl),
3639                        CurDAG->getRegister(0, MVT::i32), Chain};
3640       SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
3641       // Transfer memoperands.
3642       MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3643       CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
3644
3645       // Remap uses.
3646       SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1);
3647       if (!SDValue(N, 0).use_empty()) {
3648         SDValue Result;
3649         if (isThumb)
3650           Result = SDValue(Ld, 0);
3651         else {
3652           SDValue SubRegIdx =
3653             CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
3654           SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
3655               dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
3656           Result = SDValue(ResNode,0);
3657         }
3658         ReplaceUses(SDValue(N, 0), Result);
3659       }
3660       if (!SDValue(N, 1).use_empty()) {
3661         SDValue Result;
3662         if (isThumb)
3663           Result = SDValue(Ld, 1);
3664         else {
3665           SDValue SubRegIdx =
3666             CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
3667           SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
3668               dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
3669           Result = SDValue(ResNode,0);
3670         }
3671         ReplaceUses(SDValue(N, 1), Result);
3672       }
3673       ReplaceUses(SDValue(N, 2), OutChain);
3674       CurDAG->RemoveDeadNode(N);
3675       return;
3676     }
3677     case Intrinsic::arm_stlexd:
3678     case Intrinsic::arm_strexd: {
3679       SDLoc dl(N);
3680       SDValue Chain = N->getOperand(0);
3681       SDValue Val0 = N->getOperand(2);
3682       SDValue Val1 = N->getOperand(3);
3683       SDValue MemAddr = N->getOperand(4);
3684
3685       // Store exclusive double return a i32 value which is the return status
3686       // of the issued store.
3687       const EVT ResTys[] = {MVT::i32, MVT::Other};
3688
3689       bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
3690       // Place arguments in the right order.
3691       SmallVector<SDValue, 7> Ops;
3692       if (isThumb) {
3693         Ops.push_back(Val0);
3694         Ops.push_back(Val1);
3695       } else
3696         // arm_strexd uses GPRPair.
3697         Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0));
3698       Ops.push_back(MemAddr);
3699       Ops.push_back(getAL(CurDAG, dl));
3700       Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3701       Ops.push_back(Chain);
3702
3703       bool IsRelease = IntNo == Intrinsic::arm_stlexd;
3704       unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD)
3705                                 : (IsRelease ? ARM::STLEXD : ARM::STREXD);
3706
3707       SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
3708       // Transfer memoperands.
3709       MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3710       CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
3711
3712       ReplaceNode(N, St);
3713       return;
3714     }
3715
3716     case Intrinsic::arm_neon_vld1: {
3717       static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
3718                                            ARM::VLD1d32, ARM::VLD1d64 };
3719       static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
3720                                            ARM::VLD1q32, ARM::VLD1q64};
3721       SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr);
3722       return;
3723     }
3724
3725     case Intrinsic::arm_neon_vld1x2: {
3726       static const uint16_t DOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
3727                                            ARM::VLD1q32, ARM::VLD1q64 };
3728       static const uint16_t QOpcodes[] = { ARM::VLD1d8QPseudo,
3729                                            ARM::VLD1d16QPseudo,
3730                                            ARM::VLD1d32QPseudo,
3731                                            ARM::VLD1d64QPseudo };
3732       SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
3733       return;
3734     }
3735
3736     case Intrinsic::arm_neon_vld1x3: {
3737       static const uint16_t DOpcodes[] = { ARM::VLD1d8TPseudo,
3738                                            ARM::VLD1d16TPseudo,
3739                                            ARM::VLD1d32TPseudo,
3740                                            ARM::VLD1d64TPseudo };
3741       static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowTPseudo_UPD,
3742                                             ARM::VLD1q16LowTPseudo_UPD,
3743                                             ARM::VLD1q32LowTPseudo_UPD,
3744                                             ARM::VLD1q64LowTPseudo_UPD };
3745       static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighTPseudo,
3746                                             ARM::VLD1q16HighTPseudo,
3747                                             ARM::VLD1q32HighTPseudo,
3748                                             ARM::VLD1q64HighTPseudo };
3749       SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3750       return;
3751     }
3752
3753     case Intrinsic::arm_neon_vld1x4: {
3754       static const uint16_t DOpcodes[] = { ARM::VLD1d8QPseudo,
3755                                            ARM::VLD1d16QPseudo,
3756                                            ARM::VLD1d32QPseudo,
3757                                            ARM::VLD1d64QPseudo };
3758       static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowQPseudo_UPD,
3759                                             ARM::VLD1q16LowQPseudo_UPD,
3760                                             ARM::VLD1q32LowQPseudo_UPD,
3761                                             ARM::VLD1q64LowQPseudo_UPD };
3762       static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighQPseudo,
3763                                             ARM::VLD1q16HighQPseudo,
3764                                             ARM::VLD1q32HighQPseudo,
3765                                             ARM::VLD1q64HighQPseudo };
3766       SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3767       return;
3768     }
3769
3770     case Intrinsic::arm_neon_vld2: {
3771       static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
3772                                            ARM::VLD2d32, ARM::VLD1q64 };
3773       static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
3774                                            ARM::VLD2q32Pseudo };
3775       SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
3776       return;
3777     }
3778
3779     case Intrinsic::arm_neon_vld3: {
3780       static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo,
3781                                            ARM::VLD3d16Pseudo,
3782                                            ARM::VLD3d32Pseudo,
3783                                            ARM::VLD1d64TPseudo };
3784       static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
3785                                             ARM::VLD3q16Pseudo_UPD,
3786                                             ARM::VLD3q32Pseudo_UPD };
3787       static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo,
3788                                             ARM::VLD3q16oddPseudo,
3789                                             ARM::VLD3q32oddPseudo };
3790       SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3791       return;
3792     }
3793
3794     case Intrinsic::arm_neon_vld4: {
3795       static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo,
3796                                            ARM::VLD4d16Pseudo,
3797                                            ARM::VLD4d32Pseudo,
3798                                            ARM::VLD1d64QPseudo };
3799       static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
3800                                             ARM::VLD4q16Pseudo_UPD,
3801                                             ARM::VLD4q32Pseudo_UPD };
3802       static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo,
3803                                             ARM::VLD4q16oddPseudo,
3804                                             ARM::VLD4q32oddPseudo };
3805       SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3806       return;
3807     }
3808
3809     case Intrinsic::arm_neon_vld2dup: {
3810       static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
3811                                            ARM::VLD2DUPd32, ARM::VLD1q64 };
3812       static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo,
3813                                             ARM::VLD2DUPq16EvenPseudo,
3814                                             ARM::VLD2DUPq32EvenPseudo };
3815       static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudo,
3816                                             ARM::VLD2DUPq16OddPseudo,
3817                                             ARM::VLD2DUPq32OddPseudo };
3818       SelectVLDDup(N, /* IsIntrinsic= */ true, false, 2,
3819                    DOpcodes, QOpcodes0, QOpcodes1);
3820       return;
3821     }
3822
3823     case Intrinsic::arm_neon_vld3dup: {
3824       static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo,
3825                                            ARM::VLD3DUPd16Pseudo,
3826                                            ARM::VLD3DUPd32Pseudo,
3827                                            ARM::VLD1d64TPseudo };
3828       static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo,
3829                                             ARM::VLD3DUPq16EvenPseudo,
3830                                             ARM::VLD3DUPq32EvenPseudo };
3831       static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo,
3832                                             ARM::VLD3DUPq16OddPseudo,
3833                                             ARM::VLD3DUPq32OddPseudo };
3834       SelectVLDDup(N, /* IsIntrinsic= */ true, false, 3,
3835                    DOpcodes, QOpcodes0, QOpcodes1);
3836       return;
3837     }
3838
3839     case Intrinsic::arm_neon_vld4dup: {
3840       static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo,
3841                                            ARM::VLD4DUPd16Pseudo,
3842                                            ARM::VLD4DUPd32Pseudo,
3843                                            ARM::VLD1d64QPseudo };
3844       static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo,
3845                                             ARM::VLD4DUPq16EvenPseudo,
3846                                             ARM::VLD4DUPq32EvenPseudo };
3847       static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo,
3848                                             ARM::VLD4DUPq16OddPseudo,
3849                                             ARM::VLD4DUPq32OddPseudo };
3850       SelectVLDDup(N, /* IsIntrinsic= */ true, false, 4,
3851                    DOpcodes, QOpcodes0, QOpcodes1);
3852       return;
3853     }
3854
3855     case Intrinsic::arm_neon_vld2lane: {
3856       static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo,
3857                                            ARM::VLD2LNd16Pseudo,
3858                                            ARM::VLD2LNd32Pseudo };
3859       static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo,
3860                                            ARM::VLD2LNq32Pseudo };
3861       SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes);
3862       return;
3863     }
3864
3865     case Intrinsic::arm_neon_vld3lane: {
3866       static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo,
3867                                            ARM::VLD3LNd16Pseudo,
3868                                            ARM::VLD3LNd32Pseudo };
3869       static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo,
3870                                            ARM::VLD3LNq32Pseudo };
3871       SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes);
3872       return;
3873     }
3874
3875     case Intrinsic::arm_neon_vld4lane: {
3876       static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo,
3877                                            ARM::VLD4LNd16Pseudo,
3878                                            ARM::VLD4LNd32Pseudo };
3879       static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo,
3880                                            ARM::VLD4LNq32Pseudo };
3881       SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes);
3882       return;
3883     }
3884
3885     case Intrinsic::arm_neon_vst1: {
3886       static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
3887                                            ARM::VST1d32, ARM::VST1d64 };
3888       static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
3889                                            ARM::VST1q32, ARM::VST1q64 };
3890       SelectVST(N, false, 1, DOpcodes, QOpcodes, nullptr);
3891       return;
3892     }
3893
3894     case Intrinsic::arm_neon_vst1x2: {
3895       static const uint16_t DOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
3896                                            ARM::VST1q32, ARM::VST1q64 };
3897       static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudo,
3898                                            ARM::VST1d16QPseudo,
3899                                            ARM::VST1d32QPseudo,
3900                                            ARM::VST1d64QPseudo };
3901       SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
3902       return;
3903     }
3904
3905     case Intrinsic::arm_neon_vst1x3: {
3906       static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudo,
3907                                            ARM::VST1d16TPseudo,
3908                                            ARM::VST1d32TPseudo,
3909                                            ARM::VST1d64TPseudo };
3910       static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD,
3911                                             ARM::VST1q16LowTPseudo_UPD,
3912                                             ARM::VST1q32LowTPseudo_UPD,
3913                                             ARM::VST1q64LowTPseudo_UPD };
3914       static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo,
3915                                             ARM::VST1q16HighTPseudo,
3916                                             ARM::VST1q32HighTPseudo,
3917                                             ARM::VST1q64HighTPseudo };
3918       SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3919       return;
3920     }
3921
3922     case Intrinsic::arm_neon_vst1x4: {
3923       static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudo,
3924                                            ARM::VST1d16QPseudo,
3925                                            ARM::VST1d32QPseudo,
3926                                            ARM::VST1d64QPseudo };
3927       static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD,
3928                                             ARM::VST1q16LowQPseudo_UPD,
3929                                             ARM::VST1q32LowQPseudo_UPD,
3930                                             ARM::VST1q64LowQPseudo_UPD };
3931       static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo,
3932                                             ARM::VST1q16HighQPseudo,
3933                                             ARM::VST1q32HighQPseudo,
3934                                             ARM::VST1q64HighQPseudo };
3935       SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3936       return;
3937     }
3938
3939     case Intrinsic::arm_neon_vst2: {
3940       static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
3941                                            ARM::VST2d32, ARM::VST1q64 };
3942       static const uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
3943                                            ARM::VST2q32Pseudo };
3944       SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
3945       return;
3946     }
3947
3948     case Intrinsic::arm_neon_vst3: {
3949       static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo,
3950                                            ARM::VST3d16Pseudo,
3951                                            ARM::VST3d32Pseudo,
3952                                            ARM::VST1d64TPseudo };
3953       static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
3954                                             ARM::VST3q16Pseudo_UPD,
3955                                             ARM::VST3q32Pseudo_UPD };
3956       static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo,
3957                                             ARM::VST3q16oddPseudo,
3958                                             ARM::VST3q32oddPseudo };
3959       SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3960       return;
3961     }
3962
3963     case Intrinsic::arm_neon_vst4: {
3964       static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo,
3965                                            ARM::VST4d16Pseudo,
3966                                            ARM::VST4d32Pseudo,
3967                                            ARM::VST1d64QPseudo };
3968       static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
3969                                             ARM::VST4q16Pseudo_UPD,
3970                                             ARM::VST4q32Pseudo_UPD };
3971       static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo,
3972                                             ARM::VST4q16oddPseudo,
3973                                             ARM::VST4q32oddPseudo };
3974       SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3975       return;
3976     }
3977
3978     case Intrinsic::arm_neon_vst2lane: {
3979       static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo,
3980                                            ARM::VST2LNd16Pseudo,
3981                                            ARM::VST2LNd32Pseudo };
3982       static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo,
3983                                            ARM::VST2LNq32Pseudo };
3984       SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes);
3985       return;
3986     }
3987
3988     case Intrinsic::arm_neon_vst3lane: {
3989       static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo,
3990                                            ARM::VST3LNd16Pseudo,
3991                                            ARM::VST3LNd32Pseudo };
3992       static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo,
3993                                            ARM::VST3LNq32Pseudo };
3994       SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes);
3995       return;
3996     }
3997
3998     case Intrinsic::arm_neon_vst4lane: {
3999       static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo,
4000                                            ARM::VST4LNd16Pseudo,
4001                                            ARM::VST4LNd32Pseudo };
4002       static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo,
4003                                            ARM::VST4LNq32Pseudo };
4004       SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes);
4005       return;
4006     }
4007     }
4008     break;
4009   }
4010
4011   case ISD::ATOMIC_CMP_SWAP:
4012     SelectCMP_SWAP(N);
4013     return;
4014   }
4015
4016   SelectCode(N);
4017 }
4018
4019 // Inspect a register string of the form
4020 // cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or
4021 // cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string
4022 // and obtain the integer operands from them, adding these operands to the
4023 // provided vector.
4024 static void getIntOperandsFromRegisterString(StringRef RegString,
4025                                              SelectionDAG *CurDAG,
4026                                              const SDLoc &DL,
4027                                              std::vector<SDValue> &Ops) {
4028   SmallVector<StringRef, 5> Fields;
4029   RegString.split(Fields, ':');
4030
4031   if (Fields.size() > 1) {
4032     bool AllIntFields = true;
4033
4034     for (StringRef Field : Fields) {
4035       // Need to trim out leading 'cp' characters and get the integer field.
4036       unsigned IntField;
4037       AllIntFields &= !Field.trim("CPcp").getAsInteger(10, IntField);
4038       Ops.push_back(CurDAG->getTargetConstant(IntField, DL, MVT::i32));
4039     }
4040
4041     assert(AllIntFields &&
4042             "Unexpected non-integer value in special register string.");
4043   }
4044 }
4045
4046 // Maps a Banked Register string to its mask value. The mask value returned is
4047 // for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register
4048 // mask operand, which expresses which register is to be used, e.g. r8, and in
4049 // which mode it is to be used, e.g. usr. Returns -1 to signify that the string
4050 // was invalid.
4051 static inline int getBankedRegisterMask(StringRef RegString) {
4052   auto TheReg = ARMBankedReg::lookupBankedRegByName(RegString.lower());
4053   if (!TheReg)
4054      return -1;
4055   return TheReg->Encoding;
4056 }
4057
4058 // The flags here are common to those allowed for apsr in the A class cores and
4059 // those allowed for the special registers in the M class cores. Returns a
4060 // value representing which flags were present, -1 if invalid.
4061 static inline int getMClassFlagsMask(StringRef Flags) {
4062   return StringSwitch<int>(Flags)
4063           .Case("", 0x2) // no flags means nzcvq for psr registers, and 0x2 is
4064                          // correct when flags are not permitted
4065           .Case("g", 0x1)
4066           .Case("nzcvq", 0x2)
4067           .Case("nzcvqg", 0x3)
4068           .Default(-1);
4069 }
4070
4071 // Maps MClass special registers string to its value for use in the
4072 // t2MRS_M/t2MSR_M instruction nodes as the SYSm value operand.
4073 // Returns -1 to signify that the string was invalid.
4074 static int getMClassRegisterMask(StringRef Reg, const ARMSubtarget *Subtarget) {
4075   auto TheReg = ARMSysReg::lookupMClassSysRegByName(Reg);
4076   const FeatureBitset &FeatureBits = Subtarget->getFeatureBits();
4077   if (!TheReg || !TheReg->hasRequiredFeatures(FeatureBits))
4078     return -1;
4079   return (int)(TheReg->Encoding & 0xFFF); // SYSm value
4080 }
4081
4082 static int getARClassRegisterMask(StringRef Reg, StringRef Flags) {
4083   // The mask operand contains the special register (R Bit) in bit 4, whether
4084   // the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and
4085   // bits 3-0 contains the fields to be accessed in the special register, set by
4086   // the flags provided with the register.
4087   int Mask = 0;
4088   if (Reg == "apsr") {
4089     // The flags permitted for apsr are the same flags that are allowed in
4090     // M class registers. We get the flag value and then shift the flags into
4091     // the correct place to combine with the mask.
4092     Mask = getMClassFlagsMask(Flags);
4093     if (Mask == -1)
4094       return -1;
4095     return Mask << 2;
4096   }
4097
4098   if (Reg != "cpsr" && Reg != "spsr") {
4099     return -1;
4100   }
4101
4102   // This is the same as if the flags were "fc"
4103   if (Flags.empty() || Flags == "all")
4104     return Mask | 0x9;
4105
4106   // Inspect the supplied flags string and set the bits in the mask for
4107   // the relevant and valid flags allowed for cpsr and spsr.
4108   for (char Flag : Flags) {
4109     int FlagVal;
4110     switch (Flag) {
4111       case 'c':
4112         FlagVal = 0x1;
4113         break;
4114       case 'x':
4115         FlagVal = 0x2;
4116         break;
4117       case 's':
4118         FlagVal = 0x4;
4119         break;
4120       case 'f':
4121         FlagVal = 0x8;
4122         break;
4123       default:
4124         FlagVal = 0;
4125     }
4126
4127     // This avoids allowing strings where the same flag bit appears twice.
4128     if (!FlagVal || (Mask & FlagVal))
4129       return -1;
4130     Mask |= FlagVal;
4131   }
4132
4133   // If the register is spsr then we need to set the R bit.
4134   if (Reg == "spsr")
4135     Mask |= 0x10;
4136
4137   return Mask;
4138 }
4139
4140 // Lower the read_register intrinsic to ARM specific DAG nodes
4141 // using the supplied metadata string to select the instruction node to use
4142 // and the registers/masks to construct as operands for the node.
4143 bool ARMDAGToDAGISel::tryReadRegister(SDNode *N){
4144   const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
4145   const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
4146   bool IsThumb2 = Subtarget->isThumb2();
4147   SDLoc DL(N);
4148
4149   std::vector<SDValue> Ops;
4150   getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
4151
4152   if (!Ops.empty()) {
4153     // If the special register string was constructed of fields (as defined
4154     // in the ACLE) then need to lower to MRC node (32 bit) or
4155     // MRRC node(64 bit), we can make the distinction based on the number of
4156     // operands we have.
4157     unsigned Opcode;
4158     SmallVector<EVT, 3> ResTypes;
4159     if (Ops.size() == 5){
4160       Opcode = IsThumb2 ? ARM::t2MRC : ARM::MRC;
4161       ResTypes.append({ MVT::i32, MVT::Other });
4162     } else {
4163       assert(Ops.size() == 3 &&
4164               "Invalid number of fields in special register string.");
4165       Opcode = IsThumb2 ? ARM::t2MRRC : ARM::MRRC;
4166       ResTypes.append({ MVT::i32, MVT::i32, MVT::Other });
4167     }
4168
4169     Ops.push_back(getAL(CurDAG, DL));
4170     Ops.push_back(CurDAG->getRegister(0, MVT::i32));
4171     Ops.push_back(N->getOperand(0));
4172     ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, ResTypes, Ops));
4173     return true;
4174   }
4175
4176   std::string SpecialReg = RegString->getString().lower();
4177
4178   int BankedReg = getBankedRegisterMask(SpecialReg);
4179   if (BankedReg != -1) {
4180     Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32),
4181             getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4182             N->getOperand(0) };
4183     ReplaceNode(
4184         N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked,
4185                                   DL, MVT::i32, MVT::Other, Ops));
4186     return true;
4187   }
4188
4189   // The VFP registers are read by creating SelectionDAG nodes with opcodes
4190   // corresponding to the register that is being read from. So we switch on the
4191   // string to find which opcode we need to use.
4192   unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
4193                     .Case("fpscr", ARM::VMRS)
4194                     .Case("fpexc", ARM::VMRS_FPEXC)
4195                     .Case("fpsid", ARM::VMRS_FPSID)
4196                     .Case("mvfr0", ARM::VMRS_MVFR0)
4197                     .Case("mvfr1", ARM::VMRS_MVFR1)
4198                     .Case("mvfr2", ARM::VMRS_MVFR2)
4199                     .Case("fpinst", ARM::VMRS_FPINST)
4200                     .Case("fpinst2", ARM::VMRS_FPINST2)
4201                     .Default(0);
4202
4203   // If an opcode was found then we can lower the read to a VFP instruction.
4204   if (Opcode) {
4205     if (!Subtarget->hasVFP2Base())
4206       return false;
4207     if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8Base())
4208       return false;
4209
4210     Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4211             N->getOperand(0) };
4212     ReplaceNode(N,
4213                 CurDAG->getMachineNode(Opcode, DL, MVT::i32, MVT::Other, Ops));
4214     return true;
4215   }
4216
4217   // If the target is M Class then need to validate that the register string
4218   // is an acceptable value, so check that a mask can be constructed from the
4219   // string.
4220   if (Subtarget->isMClass()) {
4221     int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget);
4222     if (SYSmValue == -1)
4223       return false;
4224
4225     SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
4226                       getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4227                       N->getOperand(0) };
4228     ReplaceNode(
4229         N, CurDAG->getMachineNode(ARM::t2MRS_M, DL, MVT::i32, MVT::Other, Ops));
4230     return true;
4231   }
4232
4233   // Here we know the target is not M Class so we need to check if it is one
4234   // of the remaining possible values which are apsr, cpsr or spsr.
4235   if (SpecialReg == "apsr" || SpecialReg == "cpsr") {
4236     Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4237             N->getOperand(0) };
4238     ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRS_AR : ARM::MRS,
4239                                           DL, MVT::i32, MVT::Other, Ops));
4240     return true;
4241   }
4242
4243   if (SpecialReg == "spsr") {
4244     Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4245             N->getOperand(0) };
4246     ReplaceNode(
4247         N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys, DL,
4248                                   MVT::i32, MVT::Other, Ops));
4249     return true;
4250   }
4251
4252   return false;
4253 }
4254
4255 // Lower the write_register intrinsic to ARM specific DAG nodes
4256 // using the supplied metadata string to select the instruction node to use
4257 // and the registers/masks to use in the nodes
4258 bool ARMDAGToDAGISel::tryWriteRegister(SDNode *N){
4259   const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
4260   const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
4261   bool IsThumb2 = Subtarget->isThumb2();
4262   SDLoc DL(N);
4263
4264   std::vector<SDValue> Ops;
4265   getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
4266
4267   if (!Ops.empty()) {
4268     // If the special register string was constructed of fields (as defined
4269     // in the ACLE) then need to lower to MCR node (32 bit) or
4270     // MCRR node(64 bit), we can make the distinction based on the number of
4271     // operands we have.
4272     unsigned Opcode;
4273     if (Ops.size() == 5) {
4274       Opcode = IsThumb2 ? ARM::t2MCR : ARM::MCR;
4275       Ops.insert(Ops.begin()+2, N->getOperand(2));
4276     } else {
4277       assert(Ops.size() == 3 &&
4278               "Invalid number of fields in special register string.");
4279       Opcode = IsThumb2 ? ARM::t2MCRR : ARM::MCRR;
4280       SDValue WriteValue[] = { N->getOperand(2), N->getOperand(3) };
4281       Ops.insert(Ops.begin()+2, WriteValue, WriteValue+2);
4282     }
4283
4284     Ops.push_back(getAL(CurDAG, DL));
4285     Ops.push_back(CurDAG->getRegister(0, MVT::i32));
4286     Ops.push_back(N->getOperand(0));
4287
4288     ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
4289     return true;
4290   }
4291
4292   std::string SpecialReg = RegString->getString().lower();
4293   int BankedReg = getBankedRegisterMask(SpecialReg);
4294   if (BankedReg != -1) {
4295     Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), N->getOperand(2),
4296             getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4297             N->getOperand(0) };
4298     ReplaceNode(
4299         N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked,
4300                                   DL, MVT::Other, Ops));
4301     return true;
4302   }
4303
4304   // The VFP registers are written to by creating SelectionDAG nodes with
4305   // opcodes corresponding to the register that is being written. So we switch
4306   // on the string to find which opcode we need to use.
4307   unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
4308                     .Case("fpscr", ARM::VMSR)
4309                     .Case("fpexc", ARM::VMSR_FPEXC)
4310                     .Case("fpsid", ARM::VMSR_FPSID)
4311                     .Case("fpinst", ARM::VMSR_FPINST)
4312                     .Case("fpinst2", ARM::VMSR_FPINST2)
4313                     .Default(0);
4314
4315   if (Opcode) {
4316     if (!Subtarget->hasVFP2Base())
4317       return false;
4318     Ops = { N->getOperand(2), getAL(CurDAG, DL),
4319             CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
4320     ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
4321     return true;
4322   }
4323
4324   std::pair<StringRef, StringRef> Fields;
4325   Fields = StringRef(SpecialReg).rsplit('_');
4326   std::string Reg = Fields.first.str();
4327   StringRef Flags = Fields.second;
4328
4329   // If the target was M Class then need to validate the special register value
4330   // and retrieve the mask for use in the instruction node.
4331   if (Subtarget->isMClass()) {
4332     int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget);
4333     if (SYSmValue == -1)
4334       return false;
4335
4336     SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
4337                       N->getOperand(2), getAL(CurDAG, DL),
4338                       CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
4339     ReplaceNode(N, CurDAG->getMachineNode(ARM::t2MSR_M, DL, MVT::Other, Ops));
4340     return true;
4341   }
4342
4343   // We then check to see if a valid mask can be constructed for one of the
4344   // register string values permitted for the A and R class cores. These values
4345   // are apsr, spsr and cpsr; these are also valid on older cores.
4346   int Mask = getARClassRegisterMask(Reg, Flags);
4347   if (Mask != -1) {
4348     Ops = { CurDAG->getTargetConstant(Mask, DL, MVT::i32), N->getOperand(2),
4349             getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4350             N->getOperand(0) };
4351     ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSR_AR : ARM::MSR,
4352                                           DL, MVT::Other, Ops));
4353     return true;
4354   }
4355
4356   return false;
4357 }
4358
4359 bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){
4360   std::vector<SDValue> AsmNodeOperands;
4361   unsigned Flag, Kind;
4362   bool Changed = false;
4363   unsigned NumOps = N->getNumOperands();
4364
4365   // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint.
4366   // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require
4367   // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs
4368   // respectively. Since there is no constraint to explicitly specify a
4369   // reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb,
4370   // the 64-bit data may be referred by H, Q, R modifiers, so we still pack
4371   // them into a GPRPair.
4372
4373   SDLoc dl(N);
4374   SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps-1)
4375                                    : SDValue(nullptr,0);
4376
4377   SmallVector<bool, 8> OpChanged;
4378   // Glue node will be appended late.
4379   for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) {
4380     SDValue op = N->getOperand(i);
4381     AsmNodeOperands.push_back(op);
4382
4383     if (i < InlineAsm::Op_FirstOperand)
4384       continue;
4385
4386     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(i))) {
4387       Flag = C->getZExtValue();
4388       Kind = InlineAsm::getKind(Flag);
4389     }
4390     else
4391       continue;
4392
4393     // Immediate operands to inline asm in the SelectionDAG are modeled with
4394     // two operands. The first is a constant of value InlineAsm::Kind_Imm, and
4395     // the second is a constant with the value of the immediate. If we get here
4396     // and we have a Kind_Imm, skip the next operand, and continue.
4397     if (Kind == InlineAsm::Kind_Imm) {
4398       SDValue op = N->getOperand(++i);
4399       AsmNodeOperands.push_back(op);
4400       continue;
4401     }
4402
4403     unsigned NumRegs = InlineAsm::getNumOperandRegisters(Flag);
4404     if (NumRegs)
4405       OpChanged.push_back(false);
4406
4407     unsigned DefIdx = 0;
4408     bool IsTiedToChangedOp = false;
4409     // If it's a use that is tied with a previous def, it has no
4410     // reg class constraint.
4411     if (Changed && InlineAsm::isUseOperandTiedToDef(Flag, DefIdx))
4412       IsTiedToChangedOp = OpChanged[DefIdx];
4413
4414     // Memory operands to inline asm in the SelectionDAG are modeled with two
4415     // operands: a constant of value InlineAsm::Kind_Mem followed by the input
4416     // operand. If we get here and we have a Kind_Mem, skip the next operand (so
4417     // it doesn't get misinterpreted), and continue. We do this here because
4418     // it's important to update the OpChanged array correctly before moving on.
4419     if (Kind == InlineAsm::Kind_Mem) {
4420       SDValue op = N->getOperand(++i);
4421       AsmNodeOperands.push_back(op);
4422       continue;
4423     }
4424
4425     if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef
4426         && Kind != InlineAsm::Kind_RegDefEarlyClobber)
4427       continue;
4428
4429     unsigned RC;
4430     bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC);
4431     if ((!IsTiedToChangedOp && (!HasRC || RC != ARM::GPRRegClassID))
4432         || NumRegs != 2)
4433       continue;
4434
4435     assert((i+2 < NumOps) && "Invalid number of operands in inline asm");
4436     SDValue V0 = N->getOperand(i+1);
4437     SDValue V1 = N->getOperand(i+2);
4438     unsigned Reg0 = cast<RegisterSDNode>(V0)->getReg();
4439     unsigned Reg1 = cast<RegisterSDNode>(V1)->getReg();
4440     SDValue PairedReg;
4441     MachineRegisterInfo &MRI = MF->getRegInfo();
4442
4443     if (Kind == InlineAsm::Kind_RegDef ||
4444         Kind == InlineAsm::Kind_RegDefEarlyClobber) {
4445       // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to
4446       // the original GPRs.
4447
4448       Register GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
4449       PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
4450       SDValue Chain = SDValue(N,0);
4451
4452       SDNode *GU = N->getGluedUser();
4453       SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped,
4454                                                Chain.getValue(1));
4455
4456       // Extract values from a GPRPair reg and copy to the original GPR reg.
4457       SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32,
4458                                                     RegCopy);
4459       SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,
4460                                                     RegCopy);
4461       SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0,
4462                                         RegCopy.getValue(1));
4463       SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1));
4464
4465       // Update the original glue user.
4466       std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1);
4467       Ops.push_back(T1.getValue(1));
4468       CurDAG->UpdateNodeOperands(GU, Ops);
4469     }
4470     else {
4471       // For Kind  == InlineAsm::Kind_RegUse, we first copy two GPRs into a
4472       // GPRPair and then pass the GPRPair to the inline asm.
4473       SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain];
4474
4475       // As REG_SEQ doesn't take RegisterSDNode, we copy them first.
4476       SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32,
4477                                           Chain.getValue(1));
4478       SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32,
4479                                           T0.getValue(1));
4480       SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), 0);
4481
4482       // Copy REG_SEQ into a GPRPair-typed VR and replace the original two
4483       // i32 VRs of inline asm with it.
4484       Register GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
4485       PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
4486       Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1));
4487
4488       AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
4489       Glue = Chain.getValue(1);
4490     }
4491
4492     Changed = true;
4493
4494     if(PairedReg.getNode()) {
4495       OpChanged[OpChanged.size() -1 ] = true;
4496       Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/);
4497       if (IsTiedToChangedOp)
4498         Flag = InlineAsm::getFlagWordForMatchingOp(Flag, DefIdx);
4499       else
4500         Flag = InlineAsm::getFlagWordForRegClass(Flag, ARM::GPRPairRegClassID);
4501       // Replace the current flag.
4502       AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant(
4503           Flag, dl, MVT::i32);
4504       // Add the new register node and skip the original two GPRs.
4505       AsmNodeOperands.push_back(PairedReg);
4506       // Skip the next two GPRs.
4507       i += 2;
4508     }
4509   }
4510
4511   if (Glue.getNode())
4512     AsmNodeOperands.push_back(Glue);
4513   if (!Changed)
4514     return false;
4515
4516   SDValue New = CurDAG->getNode(N->getOpcode(), SDLoc(N),
4517       CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands);
4518   New->setNodeId(-1);
4519   ReplaceNode(N, New.getNode());
4520   return true;
4521 }
4522
4523
4524 bool ARMDAGToDAGISel::
4525 SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
4526                              std::vector<SDValue> &OutOps) {
4527   switch(ConstraintID) {
4528   default:
4529     llvm_unreachable("Unexpected asm memory constraint");
4530   case InlineAsm::Constraint_i:
4531     // FIXME: It seems strange that 'i' is needed here since it's supposed to
4532     //        be an immediate and not a memory constraint.
4533     LLVM_FALLTHROUGH;
4534   case InlineAsm::Constraint_m:
4535   case InlineAsm::Constraint_o:
4536   case InlineAsm::Constraint_Q:
4537   case InlineAsm::Constraint_Um:
4538   case InlineAsm::Constraint_Un:
4539   case InlineAsm::Constraint_Uq:
4540   case InlineAsm::Constraint_Us:
4541   case InlineAsm::Constraint_Ut:
4542   case InlineAsm::Constraint_Uv:
4543   case InlineAsm::Constraint_Uy:
4544     // Require the address to be in a register.  That is safe for all ARM
4545     // variants and it is hard to do anything much smarter without knowing
4546     // how the operand is used.
4547     OutOps.push_back(Op);
4548     return false;
4549   }
4550   return true;
4551 }
4552
4553 /// createARMISelDag - This pass converts a legalized DAG into a
4554 /// ARM-specific DAG, ready for instruction scheduling.
4555 ///
4556 FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM,
4557                                      CodeGenOpt::Level OptLevel) {
4558   return new ARMDAGToDAGISel(TM, OptLevel);
4559 }