lib/Target/ARM/ARMISelDAGToDAG.cpp

   1 //===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This file defines an instruction selector for the ARM target.
  10 //
  11 //===----------------------------------------------------------------------===//
  12
  13 #include "ARM.h"
  14 #include "ARMBaseInstrInfo.h"
  15 #include "ARMTargetMachine.h"
  16 #include "MCTargetDesc/ARMAddressingModes.h"
  17 #include "Utils/ARMBaseInfo.h"
  18 #include "llvm/ADT/StringSwitch.h"
  19 #include "llvm/CodeGen/MachineFrameInfo.h"
  20 #include "llvm/CodeGen/MachineFunction.h"
  21 #include "llvm/CodeGen/MachineInstrBuilder.h"
  22 #include "llvm/CodeGen/MachineRegisterInfo.h"
  23 #include "llvm/CodeGen/SelectionDAG.h"
  24 #include "llvm/CodeGen/SelectionDAGISel.h"
  25 #include "llvm/CodeGen/TargetLowering.h"
  26 #include "llvm/IR/CallingConv.h"
  27 #include "llvm/IR/Constants.h"
  28 #include "llvm/IR/DerivedTypes.h"
  29 #include "llvm/IR/Function.h"
  30 #include "llvm/IR/Intrinsics.h"
  31 #include "llvm/IR/LLVMContext.h"
  32 #include "llvm/Support/CommandLine.h"
  33 #include "llvm/Support/Debug.h"
  34 #include "llvm/Support/ErrorHandling.h"
  35 #include "llvm/Target/TargetOptions.h"
  36
  37 using namespace llvm;
  38
  39 #define DEBUG_TYPE "arm-isel"
  40
  41 static cl::opt<bool>
  42 DisableShifterOp("disable-shifter-op", cl::Hidden,
  43   cl::desc("Disable isel of shifter-op"),
  44   cl::init(false));
  45
  46 //===--------------------------------------------------------------------===//
  47 /// ARMDAGToDAGISel - ARM specific code to select ARM machine
  48 /// instructions for SelectionDAG operations.
  49 ///
  50 namespace {
  51
  52 class ARMDAGToDAGISel : public SelectionDAGISel {
  53   /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
  54   /// make the right decision when generating code for different targets.
  55   const ARMSubtarget *Subtarget;
  56
  57 public:
  58   explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOpt::Level OptLevel)
  59       : SelectionDAGISel(tm, OptLevel) {}
  60
  61   bool runOnMachineFunction(MachineFunction &MF) override {
  62     // Reset the subtarget each time through.
  63     Subtarget = &MF.getSubtarget<ARMSubtarget>();
  64     SelectionDAGISel::runOnMachineFunction(MF);
  65     return true;
  66   }
  67
  68   StringRef getPassName() const override { return "ARM Instruction Selection"; }
  69
  70   void PreprocessISelDAG() override;
  71
  72   /// getI32Imm - Return a target constant of type i32 with the specified
  73   /// value.
  74   inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
  75     return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
  76   }
  77
  78   void Select(SDNode *N) override;
  79
  80   bool hasNoVMLxHazardUse(SDNode *N) const;
  81   bool isShifterOpProfitable(const SDValue &Shift,
  82                              ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt);
  83   bool SelectRegShifterOperand(SDValue N, SDValue &A,
  84                                SDValue &B, SDValue &C,
  85                                bool CheckProfitability = true);
  86   bool SelectImmShifterOperand(SDValue N, SDValue &A,
  87                                SDValue &B, bool CheckProfitability = true);
  88   bool SelectShiftRegShifterOperand(SDValue N, SDValue &A,
  89                                     SDValue &B, SDValue &C) {
  90     // Don't apply the profitability check
  91     return SelectRegShifterOperand(N, A, B, C, false);
  92   }
  93   bool SelectShiftImmShifterOperand(SDValue N, SDValue &A,
  94                                     SDValue &B) {
  95     // Don't apply the profitability check
  96     return SelectImmShifterOperand(N, A, B, false);
  97   }
  98
  99   bool SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out);
 100
 101   bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
 102   bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);
 103
 104   bool SelectCMOVPred(SDValue N, SDValue &Pred, SDValue &Reg) {
 105     const ConstantSDNode *CN = cast<ConstantSDNode>(N);
 106     Pred = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(N), MVT::i32);
 107     Reg = CurDAG->getRegister(ARM::CPSR, MVT::i32);
 108     return true;
 109   }
 110
 111   bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
 112                              SDValue &Offset, SDValue &Opc);
 113   bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
 114                              SDValue &Offset, SDValue &Opc);
 115   bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
 116                              SDValue &Offset, SDValue &Opc);
 117   bool SelectAddrOffsetNone(SDValue N, SDValue &Base);
 118   bool SelectAddrMode3(SDValue N, SDValue &Base,
 119                        SDValue &Offset, SDValue &Opc);
 120   bool SelectAddrMode3Offset(SDNode *Op, SDValue N,
 121                              SDValue &Offset, SDValue &Opc);
 122   bool IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset,
 123                          int Lwb, int Upb, bool FP16);
 124   bool SelectAddrMode5(SDValue N, SDValue &Base, SDValue &Offset);
 125   bool SelectAddrMode5FP16(SDValue N, SDValue &Base, SDValue &Offset);
 126   bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align);
 127   bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset);
 128
 129   bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label);
 130
 131   // Thumb Addressing Modes:
 132   bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset);
 133   bool SelectThumbAddrModeRRSext(SDValue N, SDValue &Base, SDValue &Offset);
 134   bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base,
 135                                 SDValue &OffImm);
 136   bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
 137                                  SDValue &OffImm);
 138   bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
 139                                  SDValue &OffImm);
 140   bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
 141                                  SDValue &OffImm);
 142   bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm);
 143
 144   // Thumb 2 Addressing Modes:
 145   bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
 146   bool SelectT2AddrModeImm8(SDValue N, SDValue &Base,
 147                             SDValue &OffImm);
 148   bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
 149                                  SDValue &OffImm);
 150   bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base,
 151                              SDValue &OffReg, SDValue &ShImm);
 152   bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm);
 153
 154   inline bool is_so_imm(unsigned Imm) const {
 155     return ARM_AM::getSOImmVal(Imm) != -1;
 156   }
 157
 158   inline bool is_so_imm_not(unsigned Imm) const {
 159     return ARM_AM::getSOImmVal(~Imm) != -1;
 160   }
 161
 162   inline bool is_t2_so_imm(unsigned Imm) const {
 163     return ARM_AM::getT2SOImmVal(Imm) != -1;
 164   }
 165
 166   inline bool is_t2_so_imm_not(unsigned Imm) const {
 167     return ARM_AM::getT2SOImmVal(~Imm) != -1;
 168   }
 169
 170   // Include the pieces autogenerated from the target description.
 171 #include "ARMGenDAGISel.inc"
 172
 173 private:
 174   void transferMemOperands(SDNode *Src, SDNode *Dst);
 175
 176   /// Indexed (pre/post inc/dec) load matching code for ARM.
 177   bool tryARMIndexedLoad(SDNode *N);
 178   bool tryT1IndexedLoad(SDNode *N);
 179   bool tryT2IndexedLoad(SDNode *N);
 180
 181   /// SelectVLD - Select NEON load intrinsics.  NumVecs should be
 182   /// 1, 2, 3 or 4.  The opcode arrays specify the instructions used for
 183   /// loads of D registers and even subregs and odd subregs of Q registers.
 184   /// For NumVecs <= 2, QOpcodes1 is not used.
 185   void SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
 186                  const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
 187                  const uint16_t *QOpcodes1);
 188
 189   /// SelectVST - Select NEON store intrinsics.  NumVecs should
 190   /// be 1, 2, 3 or 4.  The opcode arrays specify the instructions used for
 191   /// stores of D registers and even subregs and odd subregs of Q registers.
 192   /// For NumVecs <= 2, QOpcodes1 is not used.
 193   void SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
 194                  const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
 195                  const uint16_t *QOpcodes1);
 196
 197   /// SelectVLDSTLane - Select NEON load/store lane intrinsics.  NumVecs should
 198   /// be 2, 3 or 4.  The opcode arrays specify the instructions used for
 199   /// load/store of D registers and Q registers.
 200   void SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
 201                        unsigned NumVecs, const uint16_t *DOpcodes,
 202                        const uint16_t *QOpcodes);
 203
 204   /// SelectVLDDup - Select NEON load-duplicate intrinsics.  NumVecs
 205   /// should be 1, 2, 3 or 4.  The opcode array specifies the instructions used
 206   /// for loading D registers.
 207   void SelectVLDDup(SDNode *N, bool IsIntrinsic, bool isUpdating,
 208                     unsigned NumVecs, const uint16_t *DOpcodes,
 209                     const uint16_t *QOpcodes0 = nullptr,
 210                     const uint16_t *QOpcodes1 = nullptr);
 211
 212   /// Try to select SBFX/UBFX instructions for ARM.
 213   bool tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned);
 214
 215   // Select special operations if node forms integer ABS pattern
 216   bool tryABSOp(SDNode *N);
 217
 218   bool tryReadRegister(SDNode *N);
 219   bool tryWriteRegister(SDNode *N);
 220
 221   bool tryInlineAsm(SDNode *N);
 222
 223   void SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI);
 224
 225   void SelectCMP_SWAP(SDNode *N);
 226
 227   /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
 228   /// inline asm expressions.
 229   bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
 230                                     std::vector<SDValue> &OutOps) override;
 231
 232   // Form pairs of consecutive R, S, D, or Q registers.
 233   SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1);
 234   SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1);
 235   SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1);
 236   SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1);
 237
 238   // Form sequences of 4 consecutive S, D, or Q registers.
 239   SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
 240   SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
 241   SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
 242
 243   // Get the alignment operand for a NEON VLD or VST instruction.
 244   SDValue GetVLDSTAlign(SDValue Align, const SDLoc &dl, unsigned NumVecs,
 245                         bool is64BitVector);
 246
 247   /// Returns the number of instructions required to materialize the given
 248   /// constant in a register, or 3 if a literal pool load is needed.
 249   unsigned ConstantMaterializationCost(unsigned Val) const;
 250
 251   /// Checks if N is a multiplication by a constant where we can extract out a
 252   /// power of two from the constant so that it can be used in a shift, but only
 253   /// if it simplifies the materialization of the constant. Returns true if it
 254   /// is, and assigns to PowerOfTwo the power of two that should be extracted
 255   /// out and to NewMulConst the new constant to be multiplied by.
 256   bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift,
 257                               unsigned &PowerOfTwo, SDValue &NewMulConst) const;
 258
 259   /// Replace N with M in CurDAG, in a way that also ensures that M gets
 260   /// selected when N would have been selected.
 261   void replaceDAGValue(const SDValue &N, SDValue M);
 262 };
 263 }
 264
 265 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant
 266 /// operand. If so Imm will receive the 32-bit value.
 267 static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
 268   if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
 269     Imm = cast<ConstantSDNode>(N)->getZExtValue();
 270     return true;
 271   }
 272   return false;
 273 }
 274
 275 // isInt32Immediate - This method tests to see if a constant operand.
 276 // If so Imm will receive the 32 bit value.
 277 static bool isInt32Immediate(SDValue N, unsigned &Imm) {
 278   return isInt32Immediate(N.getNode(), Imm);
 279 }
 280
 281 // isOpcWithIntImmediate - This method tests to see if the node is a specific
 282 // opcode and that it has a immediate integer right operand.
 283 // If so Imm will receive the 32 bit value.
 284 static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
 285   return N->getOpcode() == Opc &&
 286          isInt32Immediate(N->getOperand(1).getNode(), Imm);
 287 }
 288
 289 /// Check whether a particular node is a constant value representable as
 290 /// (N * Scale) where (N in [\p RangeMin, \p RangeMax).
 291 ///
 292 /// \param ScaledConstant [out] - On success, the pre-scaled constant value.
 293 static bool isScaledConstantInRange(SDValue Node, int Scale,
 294                                     int RangeMin, int RangeMax,
 295                                     int &ScaledConstant) {
 296   assert(Scale > 0 && "Invalid scale!");
 297
 298   // Check that this is a constant.
 299   const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node);
 300   if (!C)
 301     return false;
 302
 303   ScaledConstant = (int) C->getZExtValue();
 304   if ((ScaledConstant % Scale) != 0)
 305     return false;
 306
 307   ScaledConstant /= Scale;
 308   return ScaledConstant >= RangeMin && ScaledConstant < RangeMax;
 309 }
 310
 311 void ARMDAGToDAGISel::PreprocessISelDAG() {
 312   if (!Subtarget->hasV6T2Ops())
 313     return;
 314
 315   bool isThumb2 = Subtarget->isThumb();
 316   for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
 317        E = CurDAG->allnodes_end(); I != E; ) {
 318     SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues.
 319
 320     if (N->getOpcode() != ISD::ADD)
 321       continue;
 322
 323     // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with
 324     // leading zeros, followed by consecutive set bits, followed by 1 or 2
 325     // trailing zeros, e.g. 1020.
 326     // Transform the expression to
 327     // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number
 328     // of trailing zeros of c2. The left shift would be folded as an shifter
 329     // operand of 'add' and the 'and' and 'srl' would become a bits extraction
 330     // node (UBFX).
 331
 332     SDValue N0 = N->getOperand(0);
 333     SDValue N1 = N->getOperand(1);
 334     unsigned And_imm = 0;
 335     if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) {
 336       if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm))
 337         std::swap(N0, N1);
 338     }
 339     if (!And_imm)
 340       continue;
 341
 342     // Check if the AND mask is an immediate of the form: 000.....1111111100
 343     unsigned TZ = countTrailingZeros(And_imm);
 344     if (TZ != 1 && TZ != 2)
 345       // Be conservative here. Shifter operands aren't always free. e.g. On
 346       // Swift, left shifter operand of 1 / 2 for free but others are not.
 347       // e.g.
 348       //  ubfx   r3, r1, #16, #8
 349       //  ldr.w  r3, [r0, r3, lsl #2]
 350       // vs.
 351       //  mov.w  r9, #1020
 352       //  and.w  r2, r9, r1, lsr #14
 353       //  ldr    r2, [r0, r2]
 354       continue;
 355     And_imm >>= TZ;
 356     if (And_imm & (And_imm + 1))
 357       continue;
 358
 359     // Look for (and (srl X, c1), c2).
 360     SDValue Srl = N1.getOperand(0);
 361     unsigned Srl_imm = 0;
 362     if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) ||
 363         (Srl_imm <= 2))
 364       continue;
 365
 366     // Make sure first operand is not a shifter operand which would prevent
 367     // folding of the left shift.
 368     SDValue CPTmp0;
 369     SDValue CPTmp1;
 370     SDValue CPTmp2;
 371     if (isThumb2) {
 372       if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1))
 373         continue;
 374     } else {
 375       if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) ||
 376           SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2))
 377         continue;
 378     }
 379
 380     // Now make the transformation.
 381     Srl = CurDAG->getNode(ISD::SRL, SDLoc(Srl), MVT::i32,
 382                           Srl.getOperand(0),
 383                           CurDAG->getConstant(Srl_imm + TZ, SDLoc(Srl),
 384                                               MVT::i32));
 385     N1 = CurDAG->getNode(ISD::AND, SDLoc(N1), MVT::i32,
 386                          Srl,
 387                          CurDAG->getConstant(And_imm, SDLoc(Srl), MVT::i32));
 388     N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32,
 389                          N1, CurDAG->getConstant(TZ, SDLoc(Srl), MVT::i32));
 390     CurDAG->UpdateNodeOperands(N, N0, N1);
 391   }
 392 }
 393
 394 /// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS
 395 /// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at
 396 /// least on current ARM implementations) which should be avoidded.
 397 bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const {
 398   if (OptLevel == CodeGenOpt::None)
 399     return true;
 400
 401   if (!Subtarget->hasVMLxHazards())
 402     return true;
 403
 404   if (!N->hasOneUse())
 405     return false;
 406
 407   SDNode *Use = *N->use_begin();
 408   if (Use->getOpcode() == ISD::CopyToReg)
 409     return true;
 410   if (Use->isMachineOpcode()) {
 411     const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>(
 412         CurDAG->getSubtarget().getInstrInfo());
 413
 414     const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode());
 415     if (MCID.mayStore())
 416       return true;
 417     unsigned Opcode = MCID.getOpcode();
 418     if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
 419       return true;
 420     // vmlx feeding into another vmlx. We actually want to unfold
 421     // the use later in the MLxExpansion pass. e.g.
 422     // vmla
 423     // vmla (stall 8 cycles)
 424     //
 425     // vmul (5 cycles)
 426     // vadd (5 cycles)
 427     // vmla
 428     // This adds up to about 18 - 19 cycles.
 429     //
 430     // vmla
 431     // vmul (stall 4 cycles)
 432     // vadd adds up to about 14 cycles.
 433     return TII->isFpMLxInstruction(Opcode);
 434   }
 435
 436   return false;
 437 }
 438
 439 bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
 440                                             ARM_AM::ShiftOpc ShOpcVal,
 441                                             unsigned ShAmt) {
 442   if (!Subtarget->isLikeA9() && !Subtarget->isSwift())
 443     return true;
 444   if (Shift.hasOneUse())
 445     return true;
 446   // R << 2 is free.
 447   return ShOpcVal == ARM_AM::lsl &&
 448          (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1));
 449 }
 450
 451 unsigned ARMDAGToDAGISel::ConstantMaterializationCost(unsigned Val) const {
 452   if (Subtarget->isThumb()) {
 453     if (Val <= 255) return 1;                               // MOV
 454     if (Subtarget->hasV6T2Ops() &&
 455         (Val <= 0xffff ||                                   // MOV
 456          ARM_AM::getT2SOImmVal(Val) != -1 ||                // MOVW
 457          ARM_AM::getT2SOImmVal(~Val) != -1))                // MVN
 458       return 1;
 459     if (Val <= 510) return 2;                               // MOV + ADDi8
 460     if (~Val <= 255) return 2;                              // MOV + MVN
 461     if (ARM_AM::isThumbImmShiftedVal(Val)) return 2;        // MOV + LSL
 462   } else {
 463     if (ARM_AM::getSOImmVal(Val) != -1) return 1;           // MOV
 464     if (ARM_AM::getSOImmVal(~Val) != -1) return 1;          // MVN
 465     if (Subtarget->hasV6T2Ops() && Val <= 0xffff) return 1; // MOVW
 466     if (ARM_AM::isSOImmTwoPartVal(Val)) return 2;           // two instrs
 467   }
 468   if (Subtarget->useMovt()) return 2; // MOVW + MOVT
 469   return 3; // Literal pool load
 470 }
 471
 472 bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N,
 473                                              unsigned MaxShift,
 474                                              unsigned &PowerOfTwo,
 475                                              SDValue &NewMulConst) const {
 476   assert(N.getOpcode() == ISD::MUL);
 477   assert(MaxShift > 0);
 478
 479   // If the multiply is used in more than one place then changing the constant
 480   // will make other uses incorrect, so don't.
 481   if (!N.hasOneUse()) return false;
 482   // Check if the multiply is by a constant
 483   ConstantSDNode *MulConst = dyn_cast<ConstantSDNode>(N.getOperand(1));
 484   if (!MulConst) return false;
 485   // If the constant is used in more than one place then modifying it will mean
 486   // we need to materialize two constants instead of one, which is a bad idea.
 487   if (!MulConst->hasOneUse()) return false;
 488   unsigned MulConstVal = MulConst->getZExtValue();
 489   if (MulConstVal == 0) return false;
 490
 491   // Find the largest power of 2 that MulConstVal is a multiple of
 492   PowerOfTwo = MaxShift;
 493   while ((MulConstVal % (1 << PowerOfTwo)) != 0) {
 494     --PowerOfTwo;
 495     if (PowerOfTwo == 0) return false;
 496   }
 497
 498   // Only optimise if the new cost is better
 499   unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo);
 500   NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc(N), MVT::i32);
 501   unsigned OldCost = ConstantMaterializationCost(MulConstVal);
 502   unsigned NewCost = ConstantMaterializationCost(NewMulConstVal);
 503   return NewCost < OldCost;
 504 }
 505
 506 void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) {
 507   CurDAG->RepositionNode(N.getNode()->getIterator(), M.getNode());
 508   ReplaceUses(N, M);
 509 }
 510
 511 bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
 512                                               SDValue &BaseReg,
 513                                               SDValue &Opc,
 514                                               bool CheckProfitability) {
 515   if (DisableShifterOp)
 516     return false;
 517
 518   // If N is a multiply-by-constant and it's profitable to extract a shift and
 519   // use it in a shifted operand do so.
 520   if (N.getOpcode() == ISD::MUL) {
 521     unsigned PowerOfTwo = 0;
 522     SDValue NewMulConst;
 523     if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) {
 524       HandleSDNode Handle(N);
 525       SDLoc Loc(N);
 526       replaceDAGValue(N.getOperand(1), NewMulConst);
 527       BaseReg = Handle.getValue();
 528       Opc = CurDAG->getTargetConstant(
 529           ARM_AM::getSORegOpc(ARM_AM::lsl, PowerOfTwo), Loc, MVT::i32);
 530       return true;
 531     }
 532   }
 533
 534   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
 535
 536   // Don't match base register only case. That is matched to a separate
 537   // lower complexity pattern with explicit register operand.
 538   if (ShOpcVal == ARM_AM::no_shift) return false;
 539
 540   BaseReg = N.getOperand(0);
 541   unsigned ShImmVal = 0;
 542   ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
 543   if (!RHS) return false;
 544   ShImmVal = RHS->getZExtValue() & 31;
 545   Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
 546                                   SDLoc(N), MVT::i32);
 547   return true;
 548 }
 549
 550 bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N,
 551                                               SDValue &BaseReg,
 552                                               SDValue &ShReg,
 553                                               SDValue &Opc,
 554                                               bool CheckProfitability) {
 555   if (DisableShifterOp)
 556     return false;
 557
 558   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
 559
 560   // Don't match base register only case. That is matched to a separate
 561   // lower complexity pattern with explicit register operand.
 562   if (ShOpcVal == ARM_AM::no_shift) return false;
 563
 564   BaseReg = N.getOperand(0);
 565   unsigned ShImmVal = 0;
 566   ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
 567   if (RHS) return false;
 568
 569   ShReg = N.getOperand(1);
 570   if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal))
 571     return false;
 572   Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
 573                                   SDLoc(N), MVT::i32);
 574   return true;
 575 }
 576
 577 // Determine whether an ISD::OR's operands are suitable to turn the operation
 578 // into an addition, which often has more compact encodings.
 579 bool ARMDAGToDAGISel::SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out) {
 580   assert(Parent->getOpcode() == ISD::OR && "unexpected parent");
 581   Out = N;
 582   return CurDAG->haveNoCommonBitsSet(N, Parent->getOperand(1));
 583 }
 584
 585
 586 bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
 587                                           SDValue &Base,
 588                                           SDValue &OffImm) {
 589   // Match simple R + imm12 operands.
 590
 591   // Base only.
 592   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
 593       !CurDAG->isBaseWithConstantOffset(N)) {
 594     if (N.getOpcode() == ISD::FrameIndex) {
 595       // Match frame index.
 596       int FI = cast<FrameIndexSDNode>(N)->getIndex();
 597       Base = CurDAG->getTargetFrameIndex(
 598           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 599       OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
 600       return true;
 601     }
 602
 603     if (N.getOpcode() == ARMISD::Wrapper &&
 604         N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
 605         N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
 606         N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
 607       Base = N.getOperand(0);
 608     } else
 609       Base = N;
 610     OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
 611     return true;
 612   }
 613
 614   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
 615     int RHSC = (int)RHS->getSExtValue();
 616     if (N.getOpcode() == ISD::SUB)
 617       RHSC = -RHSC;
 618
 619     if (RHSC > -0x1000 && RHSC < 0x1000) { // 12 bits
 620       Base   = N.getOperand(0);
 621       if (Base.getOpcode() == ISD::FrameIndex) {
 622         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
 623         Base = CurDAG->getTargetFrameIndex(
 624             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 625       }
 626       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
 627       return true;
 628     }
 629   }
 630
 631   // Base only.
 632   Base = N;
 633   OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
 634   return true;
 635 }
 636
 637
 638
 639 bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
 640                                       SDValue &Opc) {
 641   if (N.getOpcode() == ISD::MUL &&
 642       ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) {
 643     if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
 644       // X * [3,5,9] -> X + X * [2,4,8] etc.
 645       int RHSC = (int)RHS->getZExtValue();
 646       if (RHSC & 1) {
 647         RHSC = RHSC & ~1;
 648         ARM_AM::AddrOpc AddSub = ARM_AM::add;
 649         if (RHSC < 0) {
 650           AddSub = ARM_AM::sub;
 651           RHSC = - RHSC;
 652         }
 653         if (isPowerOf2_32(RHSC)) {
 654           unsigned ShAmt = Log2_32(RHSC);
 655           Base = Offset = N.getOperand(0);
 656           Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
 657                                                             ARM_AM::lsl),
 658                                           SDLoc(N), MVT::i32);
 659           return true;
 660         }
 661       }
 662     }
 663   }
 664
 665   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
 666       // ISD::OR that is equivalent to an ISD::ADD.
 667       !CurDAG->isBaseWithConstantOffset(N))
 668     return false;
 669
 670   // Leave simple R +/- imm12 operands for LDRi12
 671   if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
 672     int RHSC;
 673     if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
 674                                 -0x1000+1, 0x1000, RHSC)) // 12 bits.
 675       return false;
 676   }
 677
 678   // Otherwise this is R +/- [possibly shifted] R.
 679   ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add;
 680   ARM_AM::ShiftOpc ShOpcVal =
 681     ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
 682   unsigned ShAmt = 0;
 683
 684   Base   = N.getOperand(0);
 685   Offset = N.getOperand(1);
 686
 687   if (ShOpcVal != ARM_AM::no_shift) {
 688     // Check to see if the RHS of the shift is a constant, if not, we can't fold
 689     // it.
 690     if (ConstantSDNode *Sh =
 691            dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
 692       ShAmt = Sh->getZExtValue();
 693       if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
 694         Offset = N.getOperand(1).getOperand(0);
 695       else {
 696         ShAmt = 0;
 697         ShOpcVal = ARM_AM::no_shift;
 698       }
 699     } else {
 700       ShOpcVal = ARM_AM::no_shift;
 701     }
 702   }
 703
 704   // Try matching (R shl C) + (R).
 705   if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
 706       !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
 707         N.getOperand(0).hasOneUse())) {
 708     ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
 709     if (ShOpcVal != ARM_AM::no_shift) {
 710       // Check to see if the RHS of the shift is a constant, if not, we can't
 711       // fold it.
 712       if (ConstantSDNode *Sh =
 713           dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
 714         ShAmt = Sh->getZExtValue();
 715         if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
 716           Offset = N.getOperand(0).getOperand(0);
 717           Base = N.getOperand(1);
 718         } else {
 719           ShAmt = 0;
 720           ShOpcVal = ARM_AM::no_shift;
 721         }
 722       } else {
 723         ShOpcVal = ARM_AM::no_shift;
 724       }
 725     }
 726   }
 727
 728   // If Offset is a multiply-by-constant and it's profitable to extract a shift
 729   // and use it in a shifted operand do so.
 730   if (Offset.getOpcode() == ISD::MUL && N.hasOneUse()) {
 731     unsigned PowerOfTwo = 0;
 732     SDValue NewMulConst;
 733     if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) {
 734       HandleSDNode Handle(Offset);
 735       replaceDAGValue(Offset.getOperand(1), NewMulConst);
 736       Offset = Handle.getValue();
 737       ShAmt = PowerOfTwo;
 738       ShOpcVal = ARM_AM::lsl;
 739     }
 740   }
 741
 742   Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
 743                                   SDLoc(N), MVT::i32);
 744   return true;
 745 }
 746
 747 bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
 748                                             SDValue &Offset, SDValue &Opc) {
 749   unsigned Opcode = Op->getOpcode();
 750   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
 751     ? cast<LoadSDNode>(Op)->getAddressingMode()
 752     : cast<StoreSDNode>(Op)->getAddressingMode();
 753   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
 754     ? ARM_AM::add : ARM_AM::sub;
 755   int Val;
 756   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val))
 757     return false;
 758
 759   Offset = N;
 760   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
 761   unsigned ShAmt = 0;
 762   if (ShOpcVal != ARM_AM::no_shift) {
 763     // Check to see if the RHS of the shift is a constant, if not, we can't fold
 764     // it.
 765     if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
 766       ShAmt = Sh->getZExtValue();
 767       if (isShifterOpProfitable(N, ShOpcVal, ShAmt))
 768         Offset = N.getOperand(0);
 769       else {
 770         ShAmt = 0;
 771         ShOpcVal = ARM_AM::no_shift;
 772       }
 773     } else {
 774       ShOpcVal = ARM_AM::no_shift;
 775     }
 776   }
 777
 778   Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
 779                                   SDLoc(N), MVT::i32);
 780   return true;
 781 }
 782
 783 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
 784                                             SDValue &Offset, SDValue &Opc) {
 785   unsigned Opcode = Op->getOpcode();
 786   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
 787     ? cast<LoadSDNode>(Op)->getAddressingMode()
 788     : cast<StoreSDNode>(Op)->getAddressingMode();
 789   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
 790     ? ARM_AM::add : ARM_AM::sub;
 791   int Val;
 792   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
 793     if (AddSub == ARM_AM::sub) Val *= -1;
 794     Offset = CurDAG->getRegister(0, MVT::i32);
 795     Opc = CurDAG->getTargetConstant(Val, SDLoc(Op), MVT::i32);
 796     return true;
 797   }
 798
 799   return false;
 800 }
 801
 802
 803 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
 804                                             SDValue &Offset, SDValue &Opc) {
 805   unsigned Opcode = Op->getOpcode();
 806   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
 807     ? cast<LoadSDNode>(Op)->getAddressingMode()
 808     : cast<StoreSDNode>(Op)->getAddressingMode();
 809   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
 810     ? ARM_AM::add : ARM_AM::sub;
 811   int Val;
 812   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
 813     Offset = CurDAG->getRegister(0, MVT::i32);
 814     Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val,
 815                                                       ARM_AM::no_shift),
 816                                     SDLoc(Op), MVT::i32);
 817     return true;
 818   }
 819
 820   return false;
 821 }
 822
 823 bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) {
 824   Base = N;
 825   return true;
 826 }
 827
 828 bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
 829                                       SDValue &Base, SDValue &Offset,
 830                                       SDValue &Opc) {
 831   if (N.getOpcode() == ISD::SUB) {
 832     // X - C  is canonicalize to X + -C, no need to handle it here.
 833     Base = N.getOperand(0);
 834     Offset = N.getOperand(1);
 835     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0), SDLoc(N),
 836                                     MVT::i32);
 837     return true;
 838   }
 839
 840   if (!CurDAG->isBaseWithConstantOffset(N)) {
 841     Base = N;
 842     if (N.getOpcode() == ISD::FrameIndex) {
 843       int FI = cast<FrameIndexSDNode>(N)->getIndex();
 844       Base = CurDAG->getTargetFrameIndex(
 845           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 846     }
 847     Offset = CurDAG->getRegister(0, MVT::i32);
 848     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
 849                                     MVT::i32);
 850     return true;
 851   }
 852
 853   // If the RHS is +/- imm8, fold into addr mode.
 854   int RHSC;
 855   if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
 856                               -256 + 1, 256, RHSC)) { // 8 bits.
 857     Base = N.getOperand(0);
 858     if (Base.getOpcode() == ISD::FrameIndex) {
 859       int FI = cast<FrameIndexSDNode>(Base)->getIndex();
 860       Base = CurDAG->getTargetFrameIndex(
 861           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 862     }
 863     Offset = CurDAG->getRegister(0, MVT::i32);
 864
 865     ARM_AM::AddrOpc AddSub = ARM_AM::add;
 866     if (RHSC < 0) {
 867       AddSub = ARM_AM::sub;
 868       RHSC = -RHSC;
 869     }
 870     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC), SDLoc(N),
 871                                     MVT::i32);
 872     return true;
 873   }
 874
 875   Base = N.getOperand(0);
 876   Offset = N.getOperand(1);
 877   Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
 878                                   MVT::i32);
 879   return true;
 880 }
 881
 882 bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N,
 883                                             SDValue &Offset, SDValue &Opc) {
 884   unsigned Opcode = Op->getOpcode();
 885   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
 886     ? cast<LoadSDNode>(Op)->getAddressingMode()
 887     : cast<StoreSDNode>(Op)->getAddressingMode();
 888   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
 889     ? ARM_AM::add : ARM_AM::sub;
 890   int Val;
 891   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits.
 892     Offset = CurDAG->getRegister(0, MVT::i32);
 893     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), SDLoc(Op),
 894                                     MVT::i32);
 895     return true;
 896   }
 897
 898   Offset = N;
 899   Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), SDLoc(Op),
 900                                   MVT::i32);
 901   return true;
 902 }
 903
 904 bool ARMDAGToDAGISel::IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset,
 905                                         int Lwb, int Upb, bool FP16) {
 906   if (!CurDAG->isBaseWithConstantOffset(N)) {
 907     Base = N;
 908     if (N.getOpcode() == ISD::FrameIndex) {
 909       int FI = cast<FrameIndexSDNode>(N)->getIndex();
 910       Base = CurDAG->getTargetFrameIndex(
 911           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 912     } else if (N.getOpcode() == ARMISD::Wrapper &&
 913                N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
 914                N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
 915                N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
 916       Base = N.getOperand(0);
 917     }
 918     Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
 919                                        SDLoc(N), MVT::i32);
 920     return true;
 921   }
 922
 923   // If the RHS is +/- imm8, fold into addr mode.
 924   int RHSC;
 925   const int Scale = FP16 ? 2 : 4;
 926
 927   if (isScaledConstantInRange(N.getOperand(1), Scale, Lwb, Upb, RHSC)) {
 928     Base = N.getOperand(0);
 929     if (Base.getOpcode() == ISD::FrameIndex) {
 930       int FI = cast<FrameIndexSDNode>(Base)->getIndex();
 931       Base = CurDAG->getTargetFrameIndex(
 932           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 933     }
 934
 935     ARM_AM::AddrOpc AddSub = ARM_AM::add;
 936     if (RHSC < 0) {
 937       AddSub = ARM_AM::sub;
 938       RHSC = -RHSC;
 939     }
 940
 941     if (FP16)
 942       Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(AddSub, RHSC),
 943                                          SDLoc(N), MVT::i32);
 944     else
 945       Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC),
 946                                          SDLoc(N), MVT::i32);
 947
 948     return true;
 949   }
 950
 951   Base = N;
 952
 953   if (FP16)
 954     Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(ARM_AM::add, 0),
 955                                        SDLoc(N), MVT::i32);
 956   else
 957     Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
 958                                        SDLoc(N), MVT::i32);
 959
 960   return true;
 961 }
 962
 963 bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
 964                                       SDValue &Base, SDValue &Offset) {
 965   int Lwb = -256 + 1;
 966   int Upb = 256;
 967   return IsAddressingMode5(N, Base, Offset, Lwb, Upb, /*FP16=*/ false);
 968 }
 969
 970 bool ARMDAGToDAGISel::SelectAddrMode5FP16(SDValue N,
 971                                           SDValue &Base, SDValue &Offset) {
 972   int Lwb = -512 + 1;
 973   int Upb = 512;
 974   return IsAddressingMode5(N, Base, Offset, Lwb, Upb, /*FP16=*/ true);
 975 }
 976
 977 bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
 978                                       SDValue &Align) {
 979   Addr = N;
 980
 981   unsigned Alignment = 0;
 982
 983   MemSDNode *MemN = cast<MemSDNode>(Parent);
 984
 985   if (isa<LSBaseSDNode>(MemN) ||
 986       ((MemN->getOpcode() == ARMISD::VST1_UPD ||
 987         MemN->getOpcode() == ARMISD::VLD1_UPD) &&
 988        MemN->getConstantOperandVal(MemN->getNumOperands() - 1) == 1)) {
 989     // This case occurs only for VLD1-lane/dup and VST1-lane instructions.
 990     // The maximum alignment is equal to the memory size being referenced.
 991     unsigned MMOAlign = MemN->getAlignment();
 992     unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8;
 993     if (MMOAlign >= MemSize && MemSize > 1)
 994       Alignment = MemSize;
 995   } else {
 996     // All other uses of addrmode6 are for intrinsics.  For now just record
 997     // the raw alignment value; it will be refined later based on the legal
 998     // alignment operands for the intrinsic.
 999     Alignment = MemN->getAlignment();
1000   }
1001
1002   Align = CurDAG->getTargetConstant(Alignment, SDLoc(N), MVT::i32);
1003   return true;
1004 }
1005
1006 bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N,
1007                                             SDValue &Offset) {
1008   LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op);
1009   ISD::MemIndexedMode AM = LdSt->getAddressingMode();
1010   if (AM != ISD::POST_INC)
1011     return false;
1012   Offset = N;
1013   if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) {
1014     if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits())
1015       Offset = CurDAG->getRegister(0, MVT::i32);
1016   }
1017   return true;
1018 }
1019
1020 bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N,
1021                                        SDValue &Offset, SDValue &Label) {
1022   if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
1023     Offset = N.getOperand(0);
1024     SDValue N1 = N.getOperand(1);
1025     Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
1026                                       SDLoc(N), MVT::i32);
1027     return true;
1028   }
1029
1030   return false;
1031 }
1032
1033
1034 //===----------------------------------------------------------------------===//
1035 //                         Thumb Addressing Modes
1036 //===----------------------------------------------------------------------===//
1037
1038 static bool shouldUseZeroOffsetLdSt(SDValue N) {
1039   // Negative numbers are difficult to materialise in thumb1. If we are
1040   // selecting the add of a negative, instead try to select ri with a zero
1041   // offset, so create the add node directly which will become a sub.
1042   if (N.getOpcode() != ISD::ADD)
1043     return false;
1044
1045   // Look for an imm which is not legal for ld/st, but is legal for sub.
1046   if (auto C = dyn_cast<ConstantSDNode>(N.getOperand(1)))
1047     return C->getSExtValue() < 0 && C->getSExtValue() >= -255;
1048
1049   return false;
1050 }
1051
1052 bool ARMDAGToDAGISel::SelectThumbAddrModeRRSext(SDValue N, SDValue &Base,
1053                                                 SDValue &Offset) {
1054   if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) {
1055     ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N);
1056     if (!NC || !NC->isNullValue())
1057       return false;
1058
1059     Base = Offset = N;
1060     return true;
1061   }
1062
1063   Base = N.getOperand(0);
1064   Offset = N.getOperand(1);
1065   return true;
1066 }
1067
1068 bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N, SDValue &Base,
1069                                             SDValue &Offset) {
1070   if (shouldUseZeroOffsetLdSt(N))
1071     return false; // Select ri instead
1072   return SelectThumbAddrModeRRSext(N, Base, Offset);
1073 }
1074
1075 bool
1076 ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
1077                                           SDValue &Base, SDValue &OffImm) {
1078   if (shouldUseZeroOffsetLdSt(N)) {
1079     Base = N;
1080     OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1081     return true;
1082   }
1083
1084   if (!CurDAG->isBaseWithConstantOffset(N)) {
1085     if (N.getOpcode() == ISD::ADD) {
1086       return false; // We want to select register offset instead
1087     } else if (N.getOpcode() == ARMISD::Wrapper &&
1088         N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1089         N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1090         N.getOperand(0).getOpcode() != ISD::TargetConstantPool &&
1091         N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1092       Base = N.getOperand(0);
1093     } else {
1094       Base = N;
1095     }
1096
1097     OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1098     return true;
1099   }
1100
1101   // If the RHS is + imm5 * scale, fold into addr mode.
1102   int RHSC;
1103   if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) {
1104     Base = N.getOperand(0);
1105     OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1106     return true;
1107   }
1108
1109   // Offset is too large, so use register offset instead.
1110   return false;
1111 }
1112
1113 bool
1114 ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
1115                                            SDValue &OffImm) {
1116   return SelectThumbAddrModeImm5S(N, 4, Base, OffImm);
1117 }
1118
1119 bool
1120 ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
1121                                            SDValue &OffImm) {
1122   return SelectThumbAddrModeImm5S(N, 2, Base, OffImm);
1123 }
1124
1125 bool
1126 ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
1127                                            SDValue &OffImm) {
1128   return SelectThumbAddrModeImm5S(N, 1, Base, OffImm);
1129 }
1130
1131 bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
1132                                             SDValue &Base, SDValue &OffImm) {
1133   if (N.getOpcode() == ISD::FrameIndex) {
1134     int FI = cast<FrameIndexSDNode>(N)->getIndex();
1135     // Only multiples of 4 are allowed for the offset, so the frame object
1136     // alignment must be at least 4.
1137     MachineFrameInfo &MFI = MF->getFrameInfo();
1138     if (MFI.getObjectAlignment(FI) < 4)
1139       MFI.setObjectAlignment(FI, 4);
1140     Base = CurDAG->getTargetFrameIndex(
1141         FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1142     OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1143     return true;
1144   }
1145
1146   if (!CurDAG->isBaseWithConstantOffset(N))
1147     return false;
1148
1149   RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0));
1150   if (N.getOperand(0).getOpcode() == ISD::FrameIndex ||
1151       (LHSR && LHSR->getReg() == ARM::SP)) {
1152     // If the RHS is + imm8 * scale, fold into addr mode.
1153     int RHSC;
1154     if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) {
1155       Base = N.getOperand(0);
1156       if (Base.getOpcode() == ISD::FrameIndex) {
1157         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1158         // For LHS+RHS to result in an offset that's a multiple of 4 the object
1159         // indexed by the LHS must be 4-byte aligned.
1160         MachineFrameInfo &MFI = MF->getFrameInfo();
1161         if (MFI.getObjectAlignment(FI) < 4)
1162           MFI.setObjectAlignment(FI, 4);
1163         Base = CurDAG->getTargetFrameIndex(
1164             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1165       }
1166       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1167       return true;
1168     }
1169   }
1170
1171   return false;
1172 }
1173
1174
1175 //===----------------------------------------------------------------------===//
1176 //                        Thumb 2 Addressing Modes
1177 //===----------------------------------------------------------------------===//
1178
1179
1180 bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
1181                                             SDValue &Base, SDValue &OffImm) {
1182   // Match simple R + imm12 operands.
1183
1184   // Base only.
1185   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1186       !CurDAG->isBaseWithConstantOffset(N)) {
1187     if (N.getOpcode() == ISD::FrameIndex) {
1188       // Match frame index.
1189       int FI = cast<FrameIndexSDNode>(N)->getIndex();
1190       Base = CurDAG->getTargetFrameIndex(
1191           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1192       OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1193       return true;
1194     }
1195
1196     if (N.getOpcode() == ARMISD::Wrapper &&
1197         N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1198         N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1199         N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1200       Base = N.getOperand(0);
1201       if (Base.getOpcode() == ISD::TargetConstantPool)
1202         return false;  // We want to select t2LDRpci instead.
1203     } else
1204       Base = N;
1205     OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1206     return true;
1207   }
1208
1209   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1210     if (SelectT2AddrModeImm8(N, Base, OffImm))
1211       // Let t2LDRi8 handle (R - imm8).
1212       return false;
1213
1214     int RHSC = (int)RHS->getZExtValue();
1215     if (N.getOpcode() == ISD::SUB)
1216       RHSC = -RHSC;
1217
1218     if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned)
1219       Base   = N.getOperand(0);
1220       if (Base.getOpcode() == ISD::FrameIndex) {
1221         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1222         Base = CurDAG->getTargetFrameIndex(
1223             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1224       }
1225       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1226       return true;
1227     }
1228   }
1229
1230   // Base only.
1231   Base = N;
1232   OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1233   return true;
1234 }
1235
1236 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
1237                                            SDValue &Base, SDValue &OffImm) {
1238   // Match simple R - imm8 operands.
1239   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1240       !CurDAG->isBaseWithConstantOffset(N))
1241     return false;
1242
1243   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1244     int RHSC = (int)RHS->getSExtValue();
1245     if (N.getOpcode() == ISD::SUB)
1246       RHSC = -RHSC;
1247
1248     if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative)
1249       Base = N.getOperand(0);
1250       if (Base.getOpcode() == ISD::FrameIndex) {
1251         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1252         Base = CurDAG->getTargetFrameIndex(
1253             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1254       }
1255       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1256       return true;
1257     }
1258   }
1259
1260   return false;
1261 }
1262
1263 bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
1264                                                  SDValue &OffImm){
1265   unsigned Opcode = Op->getOpcode();
1266   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
1267     ? cast<LoadSDNode>(Op)->getAddressingMode()
1268     : cast<StoreSDNode>(Op)->getAddressingMode();
1269   int RHSC;
1270   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits.
1271     OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
1272       ? CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32)
1273       : CurDAG->getTargetConstant(-RHSC, SDLoc(N), MVT::i32);
1274     return true;
1275   }
1276
1277   return false;
1278 }
1279
1280 bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
1281                                             SDValue &Base,
1282                                             SDValue &OffReg, SDValue &ShImm) {
1283   // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12.
1284   if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N))
1285     return false;
1286
1287   // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8.
1288   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1289     int RHSC = (int)RHS->getZExtValue();
1290     if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned)
1291       return false;
1292     else if (RHSC < 0 && RHSC >= -255) // 8 bits
1293       return false;
1294   }
1295
1296   // Look for (R + R) or (R + (R << [1,2,3])).
1297   unsigned ShAmt = 0;
1298   Base   = N.getOperand(0);
1299   OffReg = N.getOperand(1);
1300
1301   // Swap if it is ((R << c) + R).
1302   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg.getOpcode());
1303   if (ShOpcVal != ARM_AM::lsl) {
1304     ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode());
1305     if (ShOpcVal == ARM_AM::lsl)
1306       std::swap(Base, OffReg);
1307   }
1308
1309   if (ShOpcVal == ARM_AM::lsl) {
1310     // Check to see if the RHS of the shift is a constant, if not, we can't fold
1311     // it.
1312     if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) {
1313       ShAmt = Sh->getZExtValue();
1314       if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt))
1315         OffReg = OffReg.getOperand(0);
1316       else {
1317         ShAmt = 0;
1318       }
1319     }
1320   }
1321
1322   // If OffReg is a multiply-by-constant and it's profitable to extract a shift
1323   // and use it in a shifted operand do so.
1324   if (OffReg.getOpcode() == ISD::MUL && N.hasOneUse()) {
1325     unsigned PowerOfTwo = 0;
1326     SDValue NewMulConst;
1327     if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) {
1328       HandleSDNode Handle(OffReg);
1329       replaceDAGValue(OffReg.getOperand(1), NewMulConst);
1330       OffReg = Handle.getValue();
1331       ShAmt = PowerOfTwo;
1332     }
1333   }
1334
1335   ShImm = CurDAG->getTargetConstant(ShAmt, SDLoc(N), MVT::i32);
1336
1337   return true;
1338 }
1339
1340 bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base,
1341                                                 SDValue &OffImm) {
1342   // This *must* succeed since it's used for the irreplaceable ldrex and strex
1343   // instructions.
1344   Base = N;
1345   OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1346
1347   if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N))
1348     return true;
1349
1350   ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
1351   if (!RHS)
1352     return true;
1353
1354   uint32_t RHSC = (int)RHS->getZExtValue();
1355   if (RHSC > 1020 || RHSC % 4 != 0)
1356     return true;
1357
1358   Base = N.getOperand(0);
1359   if (Base.getOpcode() == ISD::FrameIndex) {
1360     int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1361     Base = CurDAG->getTargetFrameIndex(
1362         FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1363   }
1364
1365   OffImm = CurDAG->getTargetConstant(RHSC/4, SDLoc(N), MVT::i32);
1366   return true;
1367 }
1368
1369 //===--------------------------------------------------------------------===//
1370
1371 /// getAL - Returns a ARMCC::AL immediate node.
1372 static inline SDValue getAL(SelectionDAG *CurDAG, const SDLoc &dl) {
1373   return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32);
1374 }
1375
1376 void ARMDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
1377   MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
1378   CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
1379 }
1380
1381 bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) {
1382   LoadSDNode *LD = cast<LoadSDNode>(N);
1383   ISD::MemIndexedMode AM = LD->getAddressingMode();
1384   if (AM == ISD::UNINDEXED)
1385     return false;
1386
1387   EVT LoadedVT = LD->getMemoryVT();
1388   SDValue Offset, AMOpc;
1389   bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1390   unsigned Opcode = 0;
1391   bool Match = false;
1392   if (LoadedVT == MVT::i32 && isPre &&
1393       SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1394     Opcode = ARM::LDR_PRE_IMM;
1395     Match = true;
1396   } else if (LoadedVT == MVT::i32 && !isPre &&
1397       SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1398     Opcode = ARM::LDR_POST_IMM;
1399     Match = true;
1400   } else if (LoadedVT == MVT::i32 &&
1401       SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1402     Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG;
1403     Match = true;
1404
1405   } else if (LoadedVT == MVT::i16 &&
1406              SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1407     Match = true;
1408     Opcode = (LD->getExtensionType() == ISD::SEXTLOAD)
1409       ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST)
1410       : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST);
1411   } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) {
1412     if (LD->getExtensionType() == ISD::SEXTLOAD) {
1413       if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1414         Match = true;
1415         Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST;
1416       }
1417     } else {
1418       if (isPre &&
1419           SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1420         Match = true;
1421         Opcode = ARM::LDRB_PRE_IMM;
1422       } else if (!isPre &&
1423                   SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1424         Match = true;
1425         Opcode = ARM::LDRB_POST_IMM;
1426       } else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1427         Match = true;
1428         Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG;
1429       }
1430     }
1431   }
1432
1433   if (Match) {
1434     if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) {
1435       SDValue Chain = LD->getChain();
1436       SDValue Base = LD->getBasePtr();
1437       SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, SDLoc(N)),
1438                        CurDAG->getRegister(0, MVT::i32), Chain };
1439       SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1440                                            MVT::Other, Ops);
1441       transferMemOperands(N, New);
1442       ReplaceNode(N, New);
1443       return true;
1444     } else {
1445       SDValue Chain = LD->getChain();
1446       SDValue Base = LD->getBasePtr();
1447       SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, SDLoc(N)),
1448                        CurDAG->getRegister(0, MVT::i32), Chain };
1449       SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1450                                            MVT::Other, Ops);
1451       transferMemOperands(N, New);
1452       ReplaceNode(N, New);
1453       return true;
1454     }
1455   }
1456
1457   return false;
1458 }
1459
1460 bool ARMDAGToDAGISel::tryT1IndexedLoad(SDNode *N) {
1461   LoadSDNode *LD = cast<LoadSDNode>(N);
1462   EVT LoadedVT = LD->getMemoryVT();
1463   ISD::MemIndexedMode AM = LD->getAddressingMode();
1464   if (AM != ISD::POST_INC || LD->getExtensionType() != ISD::NON_EXTLOAD ||
1465       LoadedVT.getSimpleVT().SimpleTy != MVT::i32)
1466     return false;
1467
1468   auto *COffs = dyn_cast<ConstantSDNode>(LD->getOffset());
1469   if (!COffs || COffs->getZExtValue() != 4)
1470     return false;
1471
1472   // A T1 post-indexed load is just a single register LDM: LDM r0!, {r1}.
1473   // The encoding of LDM is not how the rest of ISel expects a post-inc load to
1474   // look however, so we use a pseudo here and switch it for a tLDMIA_UPD after
1475   // ISel.
1476   SDValue Chain = LD->getChain();
1477   SDValue Base = LD->getBasePtr();
1478   SDValue Ops[]= { Base, getAL(CurDAG, SDLoc(N)),
1479                    CurDAG->getRegister(0, MVT::i32), Chain };
1480   SDNode *New = CurDAG->getMachineNode(ARM::tLDR_postidx, SDLoc(N), MVT::i32,
1481                                        MVT::i32, MVT::Other, Ops);
1482   transferMemOperands(N, New);
1483   ReplaceNode(N, New);
1484   return true;
1485 }
1486
1487 bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) {
1488   LoadSDNode *LD = cast<LoadSDNode>(N);
1489   ISD::MemIndexedMode AM = LD->getAddressingMode();
1490   if (AM == ISD::UNINDEXED)
1491     return false;
1492
1493   EVT LoadedVT = LD->getMemoryVT();
1494   bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1495   SDValue Offset;
1496   bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1497   unsigned Opcode = 0;
1498   bool Match = false;
1499   if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) {
1500     switch (LoadedVT.getSimpleVT().SimpleTy) {
1501     case MVT::i32:
1502       Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST;
1503       break;
1504     case MVT::i16:
1505       if (isSExtLd)
1506         Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST;
1507       else
1508         Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST;
1509       break;
1510     case MVT::i8:
1511     case MVT::i1:
1512       if (isSExtLd)
1513         Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST;
1514       else
1515         Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST;
1516       break;
1517     default:
1518       return false;
1519     }
1520     Match = true;
1521   }
1522
1523   if (Match) {
1524     SDValue Chain = LD->getChain();
1525     SDValue Base = LD->getBasePtr();
1526     SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)),
1527                      CurDAG->getRegister(0, MVT::i32), Chain };
1528     SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1529                                          MVT::Other, Ops);
1530     transferMemOperands(N, New);
1531     ReplaceNode(N, New);
1532     return true;
1533   }
1534
1535   return false;
1536 }
1537
1538 /// Form a GPRPair pseudo register from a pair of GPR regs.
1539 SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) {
1540   SDLoc dl(V0.getNode());
1541   SDValue RegClass =
1542     CurDAG->getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
1543   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
1544   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
1545   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1546   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1547 }
1548
1549 /// Form a D register from a pair of S registers.
1550 SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1551   SDLoc dl(V0.getNode());
1552   SDValue RegClass =
1553     CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, dl, MVT::i32);
1554   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1555   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1556   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1557   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1558 }
1559
1560 /// Form a quad register from a pair of D registers.
1561 SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1562   SDLoc dl(V0.getNode());
1563   SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, dl,
1564                                                MVT::i32);
1565   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1566   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1567   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1568   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1569 }
1570
1571 /// Form 4 consecutive D registers from a pair of Q registers.
1572 SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1573   SDLoc dl(V0.getNode());
1574   SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1575                                                MVT::i32);
1576   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1577   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1578   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1579   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1580 }
1581
1582 /// Form 4 consecutive S registers.
1583 SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1,
1584                                    SDValue V2, SDValue V3) {
1585   SDLoc dl(V0.getNode());
1586   SDValue RegClass =
1587     CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, dl, MVT::i32);
1588   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1589   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1590   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, dl, MVT::i32);
1591   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, dl, MVT::i32);
1592   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1593                                     V2, SubReg2, V3, SubReg3 };
1594   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1595 }
1596
1597 /// Form 4 consecutive D registers.
1598 SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1,
1599                                    SDValue V2, SDValue V3) {
1600   SDLoc dl(V0.getNode());
1601   SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1602                                                MVT::i32);
1603   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1604   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1605   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, dl, MVT::i32);
1606   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, dl, MVT::i32);
1607   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1608                                     V2, SubReg2, V3, SubReg3 };
1609   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1610 }
1611
1612 /// Form 4 consecutive Q registers.
1613 SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1,
1614                                    SDValue V2, SDValue V3) {
1615   SDLoc dl(V0.getNode());
1616   SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, dl,
1617                                                MVT::i32);
1618   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1619   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1620   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, dl, MVT::i32);
1621   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, dl, MVT::i32);
1622   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1623                                     V2, SubReg2, V3, SubReg3 };
1624   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1625 }
1626
1627 /// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
1628 /// of a NEON VLD or VST instruction.  The supported values depend on the
1629 /// number of registers being loaded.
1630 SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, const SDLoc &dl,
1631                                        unsigned NumVecs, bool is64BitVector) {
1632   unsigned NumRegs = NumVecs;
1633   if (!is64BitVector && NumVecs < 3)
1634     NumRegs *= 2;
1635
1636   unsigned Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
1637   if (Alignment >= 32 && NumRegs == 4)
1638     Alignment = 32;
1639   else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4))
1640     Alignment = 16;
1641   else if (Alignment >= 8)
1642     Alignment = 8;
1643   else
1644     Alignment = 0;
1645
1646   return CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
1647 }
1648
1649 static bool isVLDfixed(unsigned Opc)
1650 {
1651   switch (Opc) {
1652   default: return false;
1653   case ARM::VLD1d8wb_fixed : return true;
1654   case ARM::VLD1d16wb_fixed : return true;
1655   case ARM::VLD1d64Qwb_fixed : return true;
1656   case ARM::VLD1d32wb_fixed : return true;
1657   case ARM::VLD1d64wb_fixed : return true;
1658   case ARM::VLD1d64TPseudoWB_fixed : return true;
1659   case ARM::VLD1d64QPseudoWB_fixed : return true;
1660   case ARM::VLD1q8wb_fixed : return true;
1661   case ARM::VLD1q16wb_fixed : return true;
1662   case ARM::VLD1q32wb_fixed : return true;
1663   case ARM::VLD1q64wb_fixed : return true;
1664   case ARM::VLD1DUPd8wb_fixed : return true;
1665   case ARM::VLD1DUPd16wb_fixed : return true;
1666   case ARM::VLD1DUPd32wb_fixed : return true;
1667   case ARM::VLD1DUPq8wb_fixed : return true;
1668   case ARM::VLD1DUPq16wb_fixed : return true;
1669   case ARM::VLD1DUPq32wb_fixed : return true;
1670   case ARM::VLD2d8wb_fixed : return true;
1671   case ARM::VLD2d16wb_fixed : return true;
1672   case ARM::VLD2d32wb_fixed : return true;
1673   case ARM::VLD2q8PseudoWB_fixed : return true;
1674   case ARM::VLD2q16PseudoWB_fixed : return true;
1675   case ARM::VLD2q32PseudoWB_fixed : return true;
1676   case ARM::VLD2DUPd8wb_fixed : return true;
1677   case ARM::VLD2DUPd16wb_fixed : return true;
1678   case ARM::VLD2DUPd32wb_fixed : return true;
1679   }
1680 }
1681
1682 static bool isVSTfixed(unsigned Opc)
1683 {
1684   switch (Opc) {
1685   default: return false;
1686   case ARM::VST1d8wb_fixed : return true;
1687   case ARM::VST1d16wb_fixed : return true;
1688   case ARM::VST1d32wb_fixed : return true;
1689   case ARM::VST1d64wb_fixed : return true;
1690   case ARM::VST1q8wb_fixed : return true;
1691   case ARM::VST1q16wb_fixed : return true;
1692   case ARM::VST1q32wb_fixed : return true;
1693   case ARM::VST1q64wb_fixed : return true;
1694   case ARM::VST1d64TPseudoWB_fixed : return true;
1695   case ARM::VST1d64QPseudoWB_fixed : return true;
1696   case ARM::VST2d8wb_fixed : return true;
1697   case ARM::VST2d16wb_fixed : return true;
1698   case ARM::VST2d32wb_fixed : return true;
1699   case ARM::VST2q8PseudoWB_fixed : return true;
1700   case ARM::VST2q16PseudoWB_fixed : return true;
1701   case ARM::VST2q32PseudoWB_fixed : return true;
1702   }
1703 }
1704
1705 // Get the register stride update opcode of a VLD/VST instruction that
1706 // is otherwise equivalent to the given fixed stride updating instruction.
1707 static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
1708   assert((isVLDfixed(Opc) || isVSTfixed(Opc))
1709     && "Incorrect fixed stride updating instruction.");
1710   switch (Opc) {
1711   default: break;
1712   case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register;
1713   case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register;
1714   case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register;
1715   case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register;
1716   case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register;
1717   case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register;
1718   case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register;
1719   case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register;
1720   case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register;
1721   case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register;
1722   case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register;
1723   case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register;
1724   case ARM::VLD1DUPd8wb_fixed : return ARM::VLD1DUPd8wb_register;
1725   case ARM::VLD1DUPd16wb_fixed : return ARM::VLD1DUPd16wb_register;
1726   case ARM::VLD1DUPd32wb_fixed : return ARM::VLD1DUPd32wb_register;
1727   case ARM::VLD1DUPq8wb_fixed : return ARM::VLD1DUPq8wb_register;
1728   case ARM::VLD1DUPq16wb_fixed : return ARM::VLD1DUPq16wb_register;
1729   case ARM::VLD1DUPq32wb_fixed : return ARM::VLD1DUPq32wb_register;
1730
1731   case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register;
1732   case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register;
1733   case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register;
1734   case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register;
1735   case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register;
1736   case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register;
1737   case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register;
1738   case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register;
1739   case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register;
1740   case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register;
1741
1742   case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register;
1743   case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register;
1744   case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register;
1745   case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register;
1746   case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register;
1747   case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register;
1748
1749   case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register;
1750   case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register;
1751   case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register;
1752   case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register;
1753   case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register;
1754   case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register;
1755
1756   case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register;
1757   case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register;
1758   case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register;
1759   }
1760   return Opc; // If not one we handle, return it unchanged.
1761 }
1762
1763 /// Returns true if the given increment is a Constant known to be equal to the
1764 /// access size performed by a NEON load/store. This means the "[rN]!" form can
1765 /// be used.
1766 static bool isPerfectIncrement(SDValue Inc, EVT VecTy, unsigned NumVecs) {
1767   auto C = dyn_cast<ConstantSDNode>(Inc);
1768   return C && C->getZExtValue() == VecTy.getSizeInBits() / 8 * NumVecs;
1769 }
1770
1771 void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
1772                                 const uint16_t *DOpcodes,
1773                                 const uint16_t *QOpcodes0,
1774                                 const uint16_t *QOpcodes1) {
1775   assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
1776   SDLoc dl(N);
1777
1778   SDValue MemAddr, Align;
1779   bool IsIntrinsic = !isUpdating;  // By coincidence, all supported updating
1780                                    // nodes are not intrinsics.
1781   unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
1782   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
1783     return;
1784
1785   SDValue Chain = N->getOperand(0);
1786   EVT VT = N->getValueType(0);
1787   bool is64BitVector = VT.is64BitVector();
1788   Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
1789
1790   unsigned OpcodeIndex;
1791   switch (VT.getSimpleVT().SimpleTy) {
1792   default: llvm_unreachable("unhandled vld type");
1793     // Double-register operations:
1794   case MVT::v8i8:  OpcodeIndex = 0; break;
1795   case MVT::v4f16:
1796   case MVT::v4i16: OpcodeIndex = 1; break;
1797   case MVT::v2f32:
1798   case MVT::v2i32: OpcodeIndex = 2; break;
1799   case MVT::v1i64: OpcodeIndex = 3; break;
1800     // Quad-register operations:
1801   case MVT::v16i8: OpcodeIndex = 0; break;
1802   case MVT::v8f16:
1803   case MVT::v8i16: OpcodeIndex = 1; break;
1804   case MVT::v4f32:
1805   case MVT::v4i32: OpcodeIndex = 2; break;
1806   case MVT::v2f64:
1807   case MVT::v2i64: OpcodeIndex = 3; break;
1808   }
1809
1810   EVT ResTy;
1811   if (NumVecs == 1)
1812     ResTy = VT;
1813   else {
1814     unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
1815     if (!is64BitVector)
1816       ResTyElts *= 2;
1817     ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
1818   }
1819   std::vector<EVT> ResTys;
1820   ResTys.push_back(ResTy);
1821   if (isUpdating)
1822     ResTys.push_back(MVT::i32);
1823   ResTys.push_back(MVT::Other);
1824
1825   SDValue Pred = getAL(CurDAG, dl);
1826   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
1827   SDNode *VLd;
1828   SmallVector<SDValue, 7> Ops;
1829
1830   // Double registers and VLD1/VLD2 quad registers are directly supported.
1831   if (is64BitVector || NumVecs <= 2) {
1832     unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
1833                     QOpcodes0[OpcodeIndex]);
1834     Ops.push_back(MemAddr);
1835     Ops.push_back(Align);
1836     if (isUpdating) {
1837       SDValue Inc = N->getOperand(AddrOpIdx + 1);
1838       bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
1839       if (!IsImmUpdate) {
1840         // We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
1841         // check for the opcode rather than the number of vector elements.
1842         if (isVLDfixed(Opc))
1843           Opc = getVLDSTRegisterUpdateOpcode(Opc);
1844         Ops.push_back(Inc);
1845       // VLD1/VLD2 fixed increment does not need Reg0 so only include it in
1846       // the operands if not such an opcode.
1847       } else if (!isVLDfixed(Opc))
1848         Ops.push_back(Reg0);
1849     }
1850     Ops.push_back(Pred);
1851     Ops.push_back(Reg0);
1852     Ops.push_back(Chain);
1853     VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1854
1855   } else {
1856     // Otherwise, quad registers are loaded with two separate instructions,
1857     // where one loads the even registers and the other loads the odd registers.
1858     EVT AddrTy = MemAddr.getValueType();
1859
1860     // Load the even subregs.  This is always an updating load, so that it
1861     // provides the address to the second load for the odd subregs.
1862     SDValue ImplDef =
1863       SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
1864     const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
1865     SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
1866                                           ResTy, AddrTy, MVT::Other, OpsA);
1867     Chain = SDValue(VLdA, 2);
1868
1869     // Load the odd subregs.
1870     Ops.push_back(SDValue(VLdA, 1));
1871     Ops.push_back(Align);
1872     if (isUpdating) {
1873       SDValue Inc = N->getOperand(AddrOpIdx + 1);
1874       assert(isa<ConstantSDNode>(Inc.getNode()) &&
1875              "only constant post-increment update allowed for VLD3/4");
1876       (void)Inc;
1877       Ops.push_back(Reg0);
1878     }
1879     Ops.push_back(SDValue(VLdA, 0));
1880     Ops.push_back(Pred);
1881     Ops.push_back(Reg0);
1882     Ops.push_back(Chain);
1883     VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops);
1884   }
1885
1886   // Transfer memoperands.
1887   MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1888   CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLd), {MemOp});
1889
1890   if (NumVecs == 1) {
1891     ReplaceNode(N, VLd);
1892     return;
1893   }
1894
1895   // Extract out the subregisters.
1896   SDValue SuperReg = SDValue(VLd, 0);
1897   static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
1898                     ARM::qsub_3 == ARM::qsub_0 + 3,
1899                 "Unexpected subreg numbering");
1900   unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0);
1901   for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
1902     ReplaceUses(SDValue(N, Vec),
1903                 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
1904   ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
1905   if (isUpdating)
1906     ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2));
1907   CurDAG->RemoveDeadNode(N);
1908 }
1909
1910 void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
1911                                 const uint16_t *DOpcodes,
1912                                 const uint16_t *QOpcodes0,
1913                                 const uint16_t *QOpcodes1) {
1914   assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
1915   SDLoc dl(N);
1916
1917   SDValue MemAddr, Align;
1918   bool IsIntrinsic = !isUpdating;  // By coincidence, all supported updating
1919                                    // nodes are not intrinsics.
1920   unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
1921   unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
1922   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
1923     return;
1924
1925   MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1926
1927   SDValue Chain = N->getOperand(0);
1928   EVT VT = N->getOperand(Vec0Idx).getValueType();
1929   bool is64BitVector = VT.is64BitVector();
1930   Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
1931
1932   unsigned OpcodeIndex;
1933   switch (VT.getSimpleVT().SimpleTy) {
1934   default: llvm_unreachable("unhandled vst type");
1935     // Double-register operations:
1936   case MVT::v8i8:  OpcodeIndex = 0; break;
1937   case MVT::v4f16:
1938   case MVT::v4i16: OpcodeIndex = 1; break;
1939   case MVT::v2f32:
1940   case MVT::v2i32: OpcodeIndex = 2; break;
1941   case MVT::v1i64: OpcodeIndex = 3; break;
1942     // Quad-register operations:
1943   case MVT::v16i8: OpcodeIndex = 0; break;
1944   case MVT::v8f16:
1945   case MVT::v8i16: OpcodeIndex = 1; break;
1946   case MVT::v4f32:
1947   case MVT::v4i32: OpcodeIndex = 2; break;
1948   case MVT::v2f64:
1949   case MVT::v2i64: OpcodeIndex = 3; break;
1950   }
1951
1952   std::vector<EVT> ResTys;
1953   if (isUpdating)
1954     ResTys.push_back(MVT::i32);
1955   ResTys.push_back(MVT::Other);
1956
1957   SDValue Pred = getAL(CurDAG, dl);
1958   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
1959   SmallVector<SDValue, 7> Ops;
1960
1961   // Double registers and VST1/VST2 quad registers are directly supported.
1962   if (is64BitVector || NumVecs <= 2) {
1963     SDValue SrcReg;
1964     if (NumVecs == 1) {
1965       SrcReg = N->getOperand(Vec0Idx);
1966     } else if (is64BitVector) {
1967       // Form a REG_SEQUENCE to force register allocation.
1968       SDValue V0 = N->getOperand(Vec0Idx + 0);
1969       SDValue V1 = N->getOperand(Vec0Idx + 1);
1970       if (NumVecs == 2)
1971         SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
1972       else {
1973         SDValue V2 = N->getOperand(Vec0Idx + 2);
1974         // If it's a vst3, form a quad D-register and leave the last part as
1975         // an undef.
1976         SDValue V3 = (NumVecs == 3)
1977           ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
1978           : N->getOperand(Vec0Idx + 3);
1979         SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
1980       }
1981     } else {
1982       // Form a QQ register.
1983       SDValue Q0 = N->getOperand(Vec0Idx);
1984       SDValue Q1 = N->getOperand(Vec0Idx + 1);
1985       SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0);
1986     }
1987
1988     unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
1989                     QOpcodes0[OpcodeIndex]);
1990     Ops.push_back(MemAddr);
1991     Ops.push_back(Align);
1992     if (isUpdating) {
1993       SDValue Inc = N->getOperand(AddrOpIdx + 1);
1994       bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
1995       if (!IsImmUpdate) {
1996         // We use a VST1 for v1i64 even if the pseudo says VST2/3/4, so
1997         // check for the opcode rather than the number of vector elements.
1998         if (isVSTfixed(Opc))
1999           Opc = getVLDSTRegisterUpdateOpcode(Opc);
2000         Ops.push_back(Inc);
2001       }
2002       // VST1/VST2 fixed increment does not need Reg0 so only include it in
2003       // the operands if not such an opcode.
2004       else if (!isVSTfixed(Opc))
2005         Ops.push_back(Reg0);
2006     }
2007     Ops.push_back(SrcReg);
2008     Ops.push_back(Pred);
2009     Ops.push_back(Reg0);
2010     Ops.push_back(Chain);
2011     SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2012
2013     // Transfer memoperands.
2014     CurDAG->setNodeMemRefs(cast<MachineSDNode>(VSt), {MemOp});
2015
2016     ReplaceNode(N, VSt);
2017     return;
2018   }
2019
2020   // Otherwise, quad registers are stored with two separate instructions,
2021   // where one stores the even registers and the other stores the odd registers.
2022
2023   // Form the QQQQ REG_SEQUENCE.
2024   SDValue V0 = N->getOperand(Vec0Idx + 0);
2025   SDValue V1 = N->getOperand(Vec0Idx + 1);
2026   SDValue V2 = N->getOperand(Vec0Idx + 2);
2027   SDValue V3 = (NumVecs == 3)
2028     ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2029     : N->getOperand(Vec0Idx + 3);
2030   SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2031
2032   // Store the even D registers.  This is always an updating store, so that it
2033   // provides the address to the second store for the odd subregs.
2034   const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain };
2035   SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
2036                                         MemAddr.getValueType(),
2037                                         MVT::Other, OpsA);
2038   CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStA), {MemOp});
2039   Chain = SDValue(VStA, 1);
2040
2041   // Store the odd D registers.
2042   Ops.push_back(SDValue(VStA, 0));
2043   Ops.push_back(Align);
2044   if (isUpdating) {
2045     SDValue Inc = N->getOperand(AddrOpIdx + 1);
2046     assert(isa<ConstantSDNode>(Inc.getNode()) &&
2047            "only constant post-increment update allowed for VST3/4");
2048     (void)Inc;
2049     Ops.push_back(Reg0);
2050   }
2051   Ops.push_back(RegSeq);
2052   Ops.push_back(Pred);
2053   Ops.push_back(Reg0);
2054   Ops.push_back(Chain);
2055   SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
2056                                         Ops);
2057   CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStB), {MemOp});
2058   ReplaceNode(N, VStB);
2059 }
2060
2061 void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
2062                                       unsigned NumVecs,
2063                                       const uint16_t *DOpcodes,
2064                                       const uint16_t *QOpcodes) {
2065   assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
2066   SDLoc dl(N);
2067
2068   SDValue MemAddr, Align;
2069   bool IsIntrinsic = !isUpdating;  // By coincidence, all supported updating
2070                                    // nodes are not intrinsics.
2071   unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2072   unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2073   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2074     return;
2075
2076   MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2077
2078   SDValue Chain = N->getOperand(0);
2079   unsigned Lane =
2080     cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue();
2081   EVT VT = N->getOperand(Vec0Idx).getValueType();
2082   bool is64BitVector = VT.is64BitVector();
2083
2084   unsigned Alignment = 0;
2085   if (NumVecs != 3) {
2086     Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
2087     unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2088     if (Alignment > NumBytes)
2089       Alignment = NumBytes;
2090     if (Alignment < 8 && Alignment < NumBytes)
2091       Alignment = 0;
2092     // Alignment must be a power of two; make sure of that.
2093     Alignment = (Alignment & -Alignment);
2094     if (Alignment == 1)
2095       Alignment = 0;
2096   }
2097   Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2098
2099   unsigned OpcodeIndex;
2100   switch (VT.getSimpleVT().SimpleTy) {
2101   default: llvm_unreachable("unhandled vld/vst lane type");
2102     // Double-register operations:
2103   case MVT::v8i8:  OpcodeIndex = 0; break;
2104   case MVT::v4i16: OpcodeIndex = 1; break;
2105   case MVT::v2f32:
2106   case MVT::v2i32: OpcodeIndex = 2; break;
2107     // Quad-register operations:
2108   case MVT::v8i16: OpcodeIndex = 0; break;
2109   case MVT::v4f32:
2110   case MVT::v4i32: OpcodeIndex = 1; break;
2111   }
2112
2113   std::vector<EVT> ResTys;
2114   if (IsLoad) {
2115     unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2116     if (!is64BitVector)
2117       ResTyElts *= 2;
2118     ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(),
2119                                       MVT::i64, ResTyElts));
2120   }
2121   if (isUpdating)
2122     ResTys.push_back(MVT::i32);
2123   ResTys.push_back(MVT::Other);
2124
2125   SDValue Pred = getAL(CurDAG, dl);
2126   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2127
2128   SmallVector<SDValue, 8> Ops;
2129   Ops.push_back(MemAddr);
2130   Ops.push_back(Align);
2131   if (isUpdating) {
2132     SDValue Inc = N->getOperand(AddrOpIdx + 1);
2133     bool IsImmUpdate =
2134         isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
2135     Ops.push_back(IsImmUpdate ? Reg0 : Inc);
2136   }
2137
2138   SDValue SuperReg;
2139   SDValue V0 = N->getOperand(Vec0Idx + 0);
2140   SDValue V1 = N->getOperand(Vec0Idx + 1);
2141   if (NumVecs == 2) {
2142     if (is64BitVector)
2143       SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2144     else
2145       SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0);
2146   } else {
2147     SDValue V2 = N->getOperand(Vec0Idx + 2);
2148     SDValue V3 = (NumVecs == 3)
2149       ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2150       : N->getOperand(Vec0Idx + 3);
2151     if (is64BitVector)
2152       SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2153     else
2154       SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2155   }
2156   Ops.push_back(SuperReg);
2157   Ops.push_back(getI32Imm(Lane, dl));
2158   Ops.push_back(Pred);
2159   Ops.push_back(Reg0);
2160   Ops.push_back(Chain);
2161
2162   unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2163                                   QOpcodes[OpcodeIndex]);
2164   SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2165   CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdLn), {MemOp});
2166   if (!IsLoad) {
2167     ReplaceNode(N, VLdLn);
2168     return;
2169   }
2170
2171   // Extract the subregisters.
2172   SuperReg = SDValue(VLdLn, 0);
2173   static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
2174                     ARM::qsub_3 == ARM::qsub_0 + 3,
2175                 "Unexpected subreg numbering");
2176   unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
2177   for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2178     ReplaceUses(SDValue(N, Vec),
2179                 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
2180   ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1));
2181   if (isUpdating)
2182     ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2));
2183   CurDAG->RemoveDeadNode(N);
2184 }
2185
2186 void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool IsIntrinsic,
2187                                    bool isUpdating, unsigned NumVecs,
2188                                    const uint16_t *DOpcodes,
2189                                    const uint16_t *QOpcodes0,
2190                                    const uint16_t *QOpcodes1) {
2191   assert(NumVecs >= 1 && NumVecs <= 4 && "VLDDup NumVecs out-of-range");
2192   SDLoc dl(N);
2193
2194   SDValue MemAddr, Align;
2195   unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2196   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2197     return;
2198
2199   SDValue Chain = N->getOperand(0);
2200   EVT VT = N->getValueType(0);
2201   bool is64BitVector = VT.is64BitVector();
2202
2203   unsigned Alignment = 0;
2204   if (NumVecs != 3) {
2205     Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
2206     unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2207     if (Alignment > NumBytes)
2208       Alignment = NumBytes;
2209     if (Alignment < 8 && Alignment < NumBytes)
2210       Alignment = 0;
2211     // Alignment must be a power of two; make sure of that.
2212     Alignment = (Alignment & -Alignment);
2213     if (Alignment == 1)
2214       Alignment = 0;
2215   }
2216   Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2217
2218   unsigned OpcodeIndex;
2219   switch (VT.getSimpleVT().SimpleTy) {
2220   default: llvm_unreachable("unhandled vld-dup type");
2221   case MVT::v8i8:
2222   case MVT::v16i8: OpcodeIndex = 0; break;
2223   case MVT::v4i16:
2224   case MVT::v8i16: OpcodeIndex = 1; break;
2225   case MVT::v2f32:
2226   case MVT::v2i32:
2227   case MVT::v4f32:
2228   case MVT::v4i32: OpcodeIndex = 2; break;
2229   case MVT::v1f64:
2230   case MVT::v1i64: OpcodeIndex = 3; break;
2231   }
2232
2233   unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2234   if (!is64BitVector)
2235     ResTyElts *= 2;
2236   EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
2237
2238   std::vector<EVT> ResTys;
2239   ResTys.push_back(ResTy);
2240   if (isUpdating)
2241     ResTys.push_back(MVT::i32);
2242   ResTys.push_back(MVT::Other);
2243
2244   SDValue Pred = getAL(CurDAG, dl);
2245   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2246
2247   SDNode *VLdDup;
2248   if (is64BitVector || NumVecs == 1) {
2249     SmallVector<SDValue, 6> Ops;
2250     Ops.push_back(MemAddr);
2251     Ops.push_back(Align);
2252     unsigned Opc = is64BitVector ? DOpcodes[OpcodeIndex] :
2253                                    QOpcodes0[OpcodeIndex];
2254     if (isUpdating) {
2255       // fixed-stride update instructions don't have an explicit writeback
2256       // operand. It's implicit in the opcode itself.
2257       SDValue Inc = N->getOperand(2);
2258       bool IsImmUpdate =
2259           isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
2260       if (NumVecs <= 2 && !IsImmUpdate)
2261         Opc = getVLDSTRegisterUpdateOpcode(Opc);
2262       if (!IsImmUpdate)
2263         Ops.push_back(Inc);
2264       // FIXME: VLD3 and VLD4 haven't been updated to that form yet.
2265       else if (NumVecs > 2)
2266         Ops.push_back(Reg0);
2267     }
2268     Ops.push_back(Pred);
2269     Ops.push_back(Reg0);
2270     Ops.push_back(Chain);
2271     VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2272   } else if (NumVecs == 2) {
2273     const SDValue OpsA[] = { MemAddr, Align, Pred, Reg0, Chain };
2274     SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex],
2275                                           dl, ResTys, OpsA);
2276
2277     Chain = SDValue(VLdA, 1);
2278     const SDValue OpsB[] = { MemAddr, Align, Pred, Reg0, Chain };
2279     VLdDup = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, OpsB);
2280   } else {
2281     SDValue ImplDef =
2282       SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
2283     const SDValue OpsA[] = { MemAddr, Align, ImplDef, Pred, Reg0, Chain };
2284     SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex],
2285                                           dl, ResTys, OpsA);
2286
2287     SDValue SuperReg = SDValue(VLdA, 0);
2288     Chain = SDValue(VLdA, 1);
2289     const SDValue OpsB[] = { MemAddr, Align, SuperReg, Pred, Reg0, Chain };
2290     VLdDup = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, OpsB);
2291   }
2292
2293   // Transfer memoperands.
2294   MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2295   CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdDup), {MemOp});
2296
2297   // Extract the subregisters.
2298   if (NumVecs == 1) {
2299     ReplaceUses(SDValue(N, 0), SDValue(VLdDup, 0));
2300   } else {
2301     SDValue SuperReg = SDValue(VLdDup, 0);
2302     static_assert(ARM::dsub_7 == ARM::dsub_0 + 7, "Unexpected subreg numbering");
2303     unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
2304     for (unsigned Vec = 0; Vec != NumVecs; ++Vec) {
2305       ReplaceUses(SDValue(N, Vec),
2306                   CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg));
2307     }
2308   }
2309   ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1));
2310   if (isUpdating)
2311     ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2));
2312   CurDAG->RemoveDeadNode(N);
2313 }
2314
2315 bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) {
2316   if (!Subtarget->hasV6T2Ops())
2317     return false;
2318
2319   unsigned Opc = isSigned
2320     ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)
2321     : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX);
2322   SDLoc dl(N);
2323
2324   // For unsigned extracts, check for a shift right and mask
2325   unsigned And_imm = 0;
2326   if (N->getOpcode() == ISD::AND) {
2327     if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) {
2328
2329       // The immediate is a mask of the low bits iff imm & (imm+1) == 0
2330       if (And_imm & (And_imm + 1))
2331         return false;
2332
2333       unsigned Srl_imm = 0;
2334       if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL,
2335                                 Srl_imm)) {
2336         assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2337
2338         // Mask off the unnecessary bits of the AND immediate; normally
2339         // DAGCombine will do this, but that might not happen if
2340         // targetShrinkDemandedConstant chooses a different immediate.
2341         And_imm &= -1U >> Srl_imm;
2342
2343         // Note: The width operand is encoded as width-1.
2344         unsigned Width = countTrailingOnes(And_imm) - 1;
2345         unsigned LSB = Srl_imm;
2346
2347         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2348
2349         if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) {
2350           // It's cheaper to use a right shift to extract the top bits.
2351           if (Subtarget->isThumb()) {
2352             Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri;
2353             SDValue Ops[] = { N->getOperand(0).getOperand(0),
2354                               CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2355                               getAL(CurDAG, dl), Reg0, Reg0 };
2356             CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2357             return true;
2358           }
2359
2360           // ARM models shift instructions as MOVsi with shifter operand.
2361           ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(ISD::SRL);
2362           SDValue ShOpc =
2363             CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB), dl,
2364                                       MVT::i32);
2365           SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc,
2366                             getAL(CurDAG, dl), Reg0, Reg0 };
2367           CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops);
2368           return true;
2369         }
2370
2371         assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
2372         SDValue Ops[] = { N->getOperand(0).getOperand(0),
2373                           CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2374                           CurDAG->getTargetConstant(Width, dl, MVT::i32),
2375                           getAL(CurDAG, dl), Reg0 };
2376         CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2377         return true;
2378       }
2379     }
2380     return false;
2381   }
2382
2383   // Otherwise, we're looking for a shift of a shift
2384   unsigned Shl_imm = 0;
2385   if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) {
2386     assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!");
2387     unsigned Srl_imm = 0;
2388     if (isInt32Immediate(N->getOperand(1), Srl_imm)) {
2389       assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2390       // Note: The width operand is encoded as width-1.
2391       unsigned Width = 32 - Srl_imm - 1;
2392       int LSB = Srl_imm - Shl_imm;
2393       if (LSB < 0)
2394         return false;
2395       SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2396       assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
2397       SDValue Ops[] = { N->getOperand(0).getOperand(0),
2398                         CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2399                         CurDAG->getTargetConstant(Width, dl, MVT::i32),
2400                         getAL(CurDAG, dl), Reg0 };
2401       CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2402       return true;
2403     }
2404   }
2405
2406   // Or we are looking for a shift of an and, with a mask operand
2407   if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_imm) &&
2408       isShiftedMask_32(And_imm)) {
2409     unsigned Srl_imm = 0;
2410     unsigned LSB = countTrailingZeros(And_imm);
2411     // Shift must be the same as the ands lsb
2412     if (isInt32Immediate(N->getOperand(1), Srl_imm) && Srl_imm == LSB) {
2413       assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2414       unsigned MSB = 31 - countLeadingZeros(And_imm);
2415       // Note: The width operand is encoded as width-1.
2416       unsigned Width = MSB - LSB;
2417       SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2418       assert(Srl_imm + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
2419       SDValue Ops[] = { N->getOperand(0).getOperand(0),
2420                         CurDAG->getTargetConstant(Srl_imm, dl, MVT::i32),
2421                         CurDAG->getTargetConstant(Width, dl, MVT::i32),
2422                         getAL(CurDAG, dl), Reg0 };
2423       CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2424       return true;
2425     }
2426   }
2427
2428   if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) {
2429     unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2430     unsigned LSB = 0;
2431     if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, LSB) &&
2432         !isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRA, LSB))
2433       return false;
2434
2435     if (LSB + Width > 32)
2436       return false;
2437
2438     SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2439     assert(LSB + Width <= 32 && "Shouldn't create an invalid ubfx");
2440     SDValue Ops[] = { N->getOperand(0).getOperand(0),
2441                       CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2442                       CurDAG->getTargetConstant(Width - 1, dl, MVT::i32),
2443                       getAL(CurDAG, dl), Reg0 };
2444     CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2445     return true;
2446   }
2447
2448   return false;
2449 }
2450
2451 /// Target-specific DAG combining for ISD::XOR.
2452 /// Target-independent combining lowers SELECT_CC nodes of the form
2453 /// select_cc setg[ge] X,  0,  X, -X
2454 /// select_cc setgt    X, -1,  X, -X
2455 /// select_cc setl[te] X,  0, -X,  X
2456 /// select_cc setlt    X,  1, -X,  X
2457 /// which represent Integer ABS into:
2458 /// Y = sra (X, size(X)-1); xor (add (X, Y), Y)
2459 /// ARM instruction selection detects the latter and matches it to
2460 /// ARM::ABS or ARM::t2ABS machine node.
2461 bool ARMDAGToDAGISel::tryABSOp(SDNode *N){
2462   SDValue XORSrc0 = N->getOperand(0);
2463   SDValue XORSrc1 = N->getOperand(1);
2464   EVT VT = N->getValueType(0);
2465
2466   if (Subtarget->isThumb1Only())
2467     return false;
2468
2469   if (XORSrc0.getOpcode() != ISD::ADD || XORSrc1.getOpcode() != ISD::SRA)
2470     return false;
2471
2472   SDValue ADDSrc0 = XORSrc0.getOperand(0);
2473   SDValue ADDSrc1 = XORSrc0.getOperand(1);
2474   SDValue SRASrc0 = XORSrc1.getOperand(0);
2475   SDValue SRASrc1 = XORSrc1.getOperand(1);
2476   ConstantSDNode *SRAConstant =  dyn_cast<ConstantSDNode>(SRASrc1);
2477   EVT XType = SRASrc0.getValueType();
2478   unsigned Size = XType.getSizeInBits() - 1;
2479
2480   if (ADDSrc1 == XORSrc1 && ADDSrc0 == SRASrc0 &&
2481       XType.isInteger() && SRAConstant != nullptr &&
2482       Size == SRAConstant->getZExtValue()) {
2483     unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS;
2484     CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0);
2485     return true;
2486   }
2487
2488   return false;
2489 }
2490
2491 /// We've got special pseudo-instructions for these
2492 void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
2493   unsigned Opcode;
2494   EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
2495   if (MemTy == MVT::i8)
2496     Opcode = ARM::CMP_SWAP_8;
2497   else if (MemTy == MVT::i16)
2498     Opcode = ARM::CMP_SWAP_16;
2499   else if (MemTy == MVT::i32)
2500     Opcode = ARM::CMP_SWAP_32;
2501   else
2502     llvm_unreachable("Unknown AtomicCmpSwap type");
2503
2504   SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
2505                    N->getOperand(0)};
2506   SDNode *CmpSwap = CurDAG->getMachineNode(
2507       Opcode, SDLoc(N),
2508       CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other), Ops);
2509
2510   MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
2511   CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
2512
2513   ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
2514   ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
2515   CurDAG->RemoveDeadNode(N);
2516 }
2517
2518 static Optional<std::pair<unsigned, unsigned>>
2519 getContiguousRangeOfSetBits(const APInt &A) {
2520   unsigned FirstOne = A.getBitWidth() - A.countLeadingZeros() - 1;
2521   unsigned LastOne = A.countTrailingZeros();
2522   if (A.countPopulation() != (FirstOne - LastOne + 1))
2523     return Optional<std::pair<unsigned,unsigned>>();
2524   return std::make_pair(FirstOne, LastOne);
2525 }
2526
2527 void ARMDAGToDAGISel::SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI) {
2528   assert(N->getOpcode() == ARMISD::CMPZ);
2529   SwitchEQNEToPLMI = false;
2530
2531   if (!Subtarget->isThumb())
2532     // FIXME: Work out whether it is profitable to do this in A32 mode - LSL and
2533     // LSR don't exist as standalone instructions - they need the barrel shifter.
2534     return;
2535
2536   // select (cmpz (and X, C), #0) -> (LSLS X) or (LSRS X) or (LSRS (LSLS X))
2537   SDValue And = N->getOperand(0);
2538   if (!And->hasOneUse())
2539     return;
2540
2541   SDValue Zero = N->getOperand(1);
2542   if (!isa<ConstantSDNode>(Zero) || !cast<ConstantSDNode>(Zero)->isNullValue() ||
2543       And->getOpcode() != ISD::AND)
2544     return;
2545   SDValue X = And.getOperand(0);
2546   auto C = dyn_cast<ConstantSDNode>(And.getOperand(1));
2547
2548   if (!C)
2549     return;
2550   auto Range = getContiguousRangeOfSetBits(C->getAPIntValue());
2551   if (!Range)
2552     return;
2553
2554   // There are several ways to lower this:
2555   SDNode *NewN;
2556   SDLoc dl(N);
2557
2558   auto EmitShift = [&](unsigned Opc, SDValue Src, unsigned Imm) -> SDNode* {
2559     if (Subtarget->isThumb2()) {
2560       Opc = (Opc == ARM::tLSLri) ? ARM::t2LSLri : ARM::t2LSRri;
2561       SDValue Ops[] = { Src, CurDAG->getTargetConstant(Imm, dl, MVT::i32),
2562                         getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
2563                         CurDAG->getRegister(0, MVT::i32) };
2564       return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
2565     } else {
2566       SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), Src,
2567                        CurDAG->getTargetConstant(Imm, dl, MVT::i32),
2568                        getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
2569       return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
2570     }
2571   };
2572
2573   if (Range->second == 0) {
2574     //  1. Mask includes the LSB -> Simply shift the top N bits off
2575     NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
2576     ReplaceNode(And.getNode(), NewN);
2577   } else if (Range->first == 31) {
2578     //  2. Mask includes the MSB -> Simply shift the bottom N bits off
2579     NewN = EmitShift(ARM::tLSRri, X, Range->second);
2580     ReplaceNode(And.getNode(), NewN);
2581   } else if (Range->first == Range->second) {
2582     //  3. Only one bit is set. We can shift this into the sign bit and use a
2583     //     PL/MI comparison.
2584     NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
2585     ReplaceNode(And.getNode(), NewN);
2586
2587     SwitchEQNEToPLMI = true;
2588   } else if (!Subtarget->hasV6T2Ops()) {
2589     //  4. Do a double shift to clear bottom and top bits, but only in
2590     //     thumb-1 mode as in thumb-2 we can use UBFX.
2591     NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
2592     NewN = EmitShift(ARM::tLSRri, SDValue(NewN, 0),
2593                      Range->second + (31 - Range->first));
2594     ReplaceNode(And.getNode(), NewN);
2595   }
2596
2597 }
2598
2599 void ARMDAGToDAGISel::Select(SDNode *N) {
2600   SDLoc dl(N);
2601
2602   if (N->isMachineOpcode()) {
2603     N->setNodeId(-1);
2604     return;   // Already selected.
2605   }
2606
2607   switch (N->getOpcode()) {
2608   default: break;
2609   case ISD::WRITE_REGISTER:
2610     if (tryWriteRegister(N))
2611       return;
2612     break;
2613   case ISD::READ_REGISTER:
2614     if (tryReadRegister(N))
2615       return;
2616     break;
2617   case ISD::INLINEASM:
2618   case ISD::INLINEASM_BR:
2619     if (tryInlineAsm(N))
2620       return;
2621     break;
2622   case ISD::XOR:
2623     // Select special operations if XOR node forms integer ABS pattern
2624     if (tryABSOp(N))
2625       return;
2626     // Other cases are autogenerated.
2627     break;
2628   case ISD::Constant: {
2629     unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
2630     // If we can't materialize the constant we need to use a literal pool
2631     if (ConstantMaterializationCost(Val) > 2) {
2632       SDValue CPIdx = CurDAG->getTargetConstantPool(
2633           ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val),
2634           TLI->getPointerTy(CurDAG->getDataLayout()));
2635
2636       SDNode *ResNode;
2637       if (Subtarget->isThumb()) {
2638         SDValue Ops[] = {
2639           CPIdx,
2640           getAL(CurDAG, dl),
2641           CurDAG->getRegister(0, MVT::i32),
2642           CurDAG->getEntryNode()
2643         };
2644         ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other,
2645                                          Ops);
2646       } else {
2647         SDValue Ops[] = {
2648           CPIdx,
2649           CurDAG->getTargetConstant(0, dl, MVT::i32),
2650           getAL(CurDAG, dl),
2651           CurDAG->getRegister(0, MVT::i32),
2652           CurDAG->getEntryNode()
2653         };
2654         ResNode = CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other,
2655                                          Ops);
2656       }
2657       // Annotate the Node with memory operand information so that MachineInstr
2658       // queries work properly. This e.g. gives the register allocation the
2659       // required information for rematerialization.
2660       MachineFunction& MF = CurDAG->getMachineFunction();
2661       MachineMemOperand *MemOp =
2662           MF.getMachineMemOperand(MachinePointerInfo::getConstantPool(MF),
2663                                   MachineMemOperand::MOLoad, 4, 4);
2664
2665       CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp});
2666
2667       ReplaceNode(N, ResNode);
2668       return;
2669     }
2670
2671     // Other cases are autogenerated.
2672     break;
2673   }
2674   case ISD::FrameIndex: {
2675     // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
2676     int FI = cast<FrameIndexSDNode>(N)->getIndex();
2677     SDValue TFI = CurDAG->getTargetFrameIndex(
2678         FI, TLI->getPointerTy(CurDAG->getDataLayout()));
2679     if (Subtarget->isThumb1Only()) {
2680       // Set the alignment of the frame object to 4, to avoid having to generate
2681       // more than one ADD
2682       MachineFrameInfo &MFI = MF->getFrameInfo();
2683       if (MFI.getObjectAlignment(FI) < 4)
2684         MFI.setObjectAlignment(FI, 4);
2685       CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI,
2686                            CurDAG->getTargetConstant(0, dl, MVT::i32));
2687       return;
2688     } else {
2689       unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ?
2690                       ARM::t2ADDri : ARM::ADDri);
2691       SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, dl, MVT::i32),
2692                         getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
2693                         CurDAG->getRegister(0, MVT::i32) };
2694       CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2695       return;
2696     }
2697   }
2698   case ISD::SRL:
2699     if (tryV6T2BitfieldExtractOp(N, false))
2700       return;
2701     break;
2702   case ISD::SIGN_EXTEND_INREG:
2703   case ISD::SRA:
2704     if (tryV6T2BitfieldExtractOp(N, true))
2705       return;
2706     break;
2707   case ISD::MUL:
2708     if (Subtarget->isThumb1Only())
2709       break;
2710     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
2711       unsigned RHSV = C->getZExtValue();
2712       if (!RHSV) break;
2713       if (isPowerOf2_32(RHSV-1)) {  // 2^n+1?
2714         unsigned ShImm = Log2_32(RHSV-1);
2715         if (ShImm >= 32)
2716           break;
2717         SDValue V = N->getOperand(0);
2718         ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
2719         SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
2720         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2721         if (Subtarget->isThumb()) {
2722           SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
2723           CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops);
2724           return;
2725         } else {
2726           SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
2727                             Reg0 };
2728           CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops);
2729           return;
2730         }
2731       }
2732       if (isPowerOf2_32(RHSV+1)) {  // 2^n-1?
2733         unsigned ShImm = Log2_32(RHSV+1);
2734         if (ShImm >= 32)
2735           break;
2736         SDValue V = N->getOperand(0);
2737         ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
2738         SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
2739         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2740         if (Subtarget->isThumb()) {
2741           SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
2742           CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops);
2743           return;
2744         } else {
2745           SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
2746                             Reg0 };
2747           CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops);
2748           return;
2749         }
2750       }
2751     }
2752     break;
2753   case ISD::AND: {
2754     // Check for unsigned bitfield extract
2755     if (tryV6T2BitfieldExtractOp(N, false))
2756       return;
2757
2758     // If an immediate is used in an AND node, it is possible that the immediate
2759     // can be more optimally materialized when negated. If this is the case we
2760     // can negate the immediate and use a BIC instead.
2761     auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
2762     if (N1C && N1C->hasOneUse() && Subtarget->isThumb()) {
2763       uint32_t Imm = (uint32_t) N1C->getZExtValue();
2764
2765       // In Thumb2 mode, an AND can take a 12-bit immediate. If this
2766       // immediate can be negated and fit in the immediate operand of
2767       // a t2BIC, don't do any manual transform here as this can be
2768       // handled by the generic ISel machinery.
2769       bool PreferImmediateEncoding =
2770         Subtarget->hasThumb2() && (is_t2_so_imm(Imm) || is_t2_so_imm_not(Imm));
2771       if (!PreferImmediateEncoding &&
2772           ConstantMaterializationCost(Imm) >
2773               ConstantMaterializationCost(~Imm)) {
2774         // The current immediate costs more to materialize than a negated
2775         // immediate, so negate the immediate and use a BIC.
2776         SDValue NewImm =
2777           CurDAG->getConstant(~N1C->getZExtValue(), dl, MVT::i32);
2778         // If the new constant didn't exist before, reposition it in the topological
2779         // ordering so it is just before N. Otherwise, don't touch its location.
2780         if (NewImm->getNodeId() == -1)
2781           CurDAG->RepositionNode(N->getIterator(), NewImm.getNode());
2782
2783         if (!Subtarget->hasThumb2()) {
2784           SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32),
2785                            N->getOperand(0), NewImm, getAL(CurDAG, dl),
2786                            CurDAG->getRegister(0, MVT::i32)};
2787           ReplaceNode(N, CurDAG->getMachineNode(ARM::tBIC, dl, MVT::i32, Ops));
2788           return;
2789         } else {
2790           SDValue Ops[] = {N->getOperand(0), NewImm, getAL(CurDAG, dl),
2791                            CurDAG->getRegister(0, MVT::i32),
2792                            CurDAG->getRegister(0, MVT::i32)};
2793           ReplaceNode(N,
2794                       CurDAG->getMachineNode(ARM::t2BICrr, dl, MVT::i32, Ops));
2795           return;
2796         }
2797       }
2798     }
2799
2800     // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits
2801     // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits
2802     // are entirely contributed by c2 and lower 16-bits are entirely contributed
2803     // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)).
2804     // Select it to: "movt x, ((c1 & 0xffff) >> 16)
2805     EVT VT = N->getValueType(0);
2806     if (VT != MVT::i32)
2807       break;
2808     unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2())
2809       ? ARM::t2MOVTi16
2810       : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0);
2811     if (!Opc)
2812       break;
2813     SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
2814     N1C = dyn_cast<ConstantSDNode>(N1);
2815     if (!N1C)
2816       break;
2817     if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) {
2818       SDValue N2 = N0.getOperand(1);
2819       ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
2820       if (!N2C)
2821         break;
2822       unsigned N1CVal = N1C->getZExtValue();
2823       unsigned N2CVal = N2C->getZExtValue();
2824       if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) &&
2825           (N1CVal & 0xffffU) == 0xffffU &&
2826           (N2CVal & 0xffffU) == 0x0U) {
2827         SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16,
2828                                                   dl, MVT::i32);
2829         SDValue Ops[] = { N0.getOperand(0), Imm16,
2830                           getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
2831         ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
2832         return;
2833       }
2834     }
2835
2836     break;
2837   }
2838   case ARMISD::UMAAL: {
2839     unsigned Opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL;
2840     SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
2841                       N->getOperand(2), N->getOperand(3),
2842                       getAL(CurDAG, dl),
2843                       CurDAG->getRegister(0, MVT::i32) };
2844     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::i32, Ops));
2845     return;
2846   }
2847   case ARMISD::UMLAL:{
2848     if (Subtarget->isThumb()) {
2849       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2850                         N->getOperand(3), getAL(CurDAG, dl),
2851                         CurDAG->getRegister(0, MVT::i32)};
2852       ReplaceNode(
2853           N, CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops));
2854       return;
2855     }else{
2856       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2857                         N->getOperand(3), getAL(CurDAG, dl),
2858                         CurDAG->getRegister(0, MVT::i32),
2859                         CurDAG->getRegister(0, MVT::i32) };
2860       ReplaceNode(N, CurDAG->getMachineNode(
2861                          Subtarget->hasV6Ops() ? ARM::UMLAL : ARM::UMLALv5, dl,
2862                          MVT::i32, MVT::i32, Ops));
2863       return;
2864     }
2865   }
2866   case ARMISD::SMLAL:{
2867     if (Subtarget->isThumb()) {
2868       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2869                         N->getOperand(3), getAL(CurDAG, dl),
2870                         CurDAG->getRegister(0, MVT::i32)};
2871       ReplaceNode(
2872           N, CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops));
2873       return;
2874     }else{
2875       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2876                         N->getOperand(3), getAL(CurDAG, dl),
2877                         CurDAG->getRegister(0, MVT::i32),
2878                         CurDAG->getRegister(0, MVT::i32) };
2879       ReplaceNode(N, CurDAG->getMachineNode(
2880                          Subtarget->hasV6Ops() ? ARM::SMLAL : ARM::SMLALv5, dl,
2881                          MVT::i32, MVT::i32, Ops));
2882       return;
2883     }
2884   }
2885   case ARMISD::SUBE: {
2886     if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
2887       break;
2888     // Look for a pattern to match SMMLS
2889     // (sube a, (smul_loHi a, b), (subc 0, (smul_LOhi(a, b))))
2890     if (N->getOperand(1).getOpcode() != ISD::SMUL_LOHI ||
2891         N->getOperand(2).getOpcode() != ARMISD::SUBC ||
2892         !SDValue(N, 1).use_empty())
2893       break;
2894
2895     if (Subtarget->isThumb())
2896       assert(Subtarget->hasThumb2() &&
2897              "This pattern should not be generated for Thumb");
2898
2899     SDValue SmulLoHi = N->getOperand(1);
2900     SDValue Subc = N->getOperand(2);
2901     auto *Zero = dyn_cast<ConstantSDNode>(Subc.getOperand(0));
2902
2903     if (!Zero || Zero->getZExtValue() != 0 ||
2904         Subc.getOperand(1) != SmulLoHi.getValue(0) ||
2905         N->getOperand(1) != SmulLoHi.getValue(1) ||
2906         N->getOperand(2) != Subc.getValue(1))
2907       break;
2908
2909     unsigned Opc = Subtarget->isThumb2() ? ARM::t2SMMLS : ARM::SMMLS;
2910     SDValue Ops[] = { SmulLoHi.getOperand(0), SmulLoHi.getOperand(1),
2911                       N->getOperand(0), getAL(CurDAG, dl),
2912                       CurDAG->getRegister(0, MVT::i32) };
2913     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops));
2914     return;
2915   }
2916   case ISD::LOAD: {
2917     if (Subtarget->isThumb() && Subtarget->hasThumb2()) {
2918       if (tryT2IndexedLoad(N))
2919         return;
2920     } else if (Subtarget->isThumb()) {
2921       if (tryT1IndexedLoad(N))
2922         return;
2923     } else if (tryARMIndexedLoad(N))
2924       return;
2925     // Other cases are autogenerated.
2926     break;
2927   }
2928   case ARMISD::BRCOND: {
2929     // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
2930     // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)
2931     // Pattern complexity = 6  cost = 1  size = 0
2932
2933     // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
2934     // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc)
2935     // Pattern complexity = 6  cost = 1  size = 0
2936
2937     // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
2938     // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc)
2939     // Pattern complexity = 6  cost = 1  size = 0
2940
2941     unsigned Opc = Subtarget->isThumb() ?
2942       ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc;
2943     SDValue Chain = N->getOperand(0);
2944     SDValue N1 = N->getOperand(1);
2945     SDValue N2 = N->getOperand(2);
2946     SDValue N3 = N->getOperand(3);
2947     SDValue InFlag = N->getOperand(4);
2948     assert(N1.getOpcode() == ISD::BasicBlock);
2949     assert(N2.getOpcode() == ISD::Constant);
2950     assert(N3.getOpcode() == ISD::Register);
2951
2952     unsigned CC = (unsigned) cast<ConstantSDNode>(N2)->getZExtValue();
2953
2954     if (InFlag.getOpcode() == ARMISD::CMPZ) {
2955       bool SwitchEQNEToPLMI;
2956       SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI);
2957       InFlag = N->getOperand(4);
2958
2959       if (SwitchEQNEToPLMI) {
2960         switch ((ARMCC::CondCodes)CC) {
2961         default: llvm_unreachable("CMPZ must be either NE or EQ!");
2962         case ARMCC::NE:
2963           CC = (unsigned)ARMCC::MI;
2964           break;
2965         case ARMCC::EQ:
2966           CC = (unsigned)ARMCC::PL;
2967           break;
2968         }
2969       }
2970     }
2971
2972     SDValue Tmp2 = CurDAG->getTargetConstant(CC, dl, MVT::i32);
2973     SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag };
2974     SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
2975                                              MVT::Glue, Ops);
2976     Chain = SDValue(ResNode, 0);
2977     if (N->getNumValues() == 2) {
2978       InFlag = SDValue(ResNode, 1);
2979       ReplaceUses(SDValue(N, 1), InFlag);
2980     }
2981     ReplaceUses(SDValue(N, 0),
2982                 SDValue(Chain.getNode(), Chain.getResNo()));
2983     CurDAG->RemoveDeadNode(N);
2984     return;
2985   }
2986
2987   case ARMISD::CMPZ: {
2988     // select (CMPZ X, #-C) -> (CMPZ (ADDS X, #C), #0)
2989     //   This allows us to avoid materializing the expensive negative constant.
2990     //   The CMPZ #0 is useless and will be peepholed away but we need to keep it
2991     //   for its glue output.
2992     SDValue X = N->getOperand(0);
2993     auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1).getNode());
2994     if (C && C->getSExtValue() < 0 && Subtarget->isThumb()) {
2995       int64_t Addend = -C->getSExtValue();
2996
2997       SDNode *Add = nullptr;
2998       // ADDS can be better than CMN if the immediate fits in a
2999       // 16-bit ADDS, which means either [0,256) for tADDi8 or [0,8) for tADDi3.
3000       // Outside that range we can just use a CMN which is 32-bit but has a
3001       // 12-bit immediate range.
3002       if (Addend < 1<<8) {
3003         if (Subtarget->isThumb2()) {
3004           SDValue Ops[] = { X, CurDAG->getTargetConstant(Addend, dl, MVT::i32),
3005                             getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
3006                             CurDAG->getRegister(0, MVT::i32) };
3007           Add = CurDAG->getMachineNode(ARM::t2ADDri, dl, MVT::i32, Ops);
3008         } else {
3009           unsigned Opc = (Addend < 1<<3) ? ARM::tADDi3 : ARM::tADDi8;
3010           SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X,
3011                            CurDAG->getTargetConstant(Addend, dl, MVT::i32),
3012                            getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
3013           Add = CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
3014         }
3015       }
3016       if (Add) {
3017         SDValue Ops2[] = {SDValue(Add, 0), CurDAG->getConstant(0, dl, MVT::i32)};
3018         CurDAG->MorphNodeTo(N, ARMISD::CMPZ, CurDAG->getVTList(MVT::Glue), Ops2);
3019       }
3020     }
3021     // Other cases are autogenerated.
3022     break;
3023   }
3024
3025   case ARMISD::CMOV: {
3026     SDValue InFlag = N->getOperand(4);
3027
3028     if (InFlag.getOpcode() == ARMISD::CMPZ) {
3029       bool SwitchEQNEToPLMI;
3030       SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI);
3031
3032       if (SwitchEQNEToPLMI) {
3033         SDValue ARMcc = N->getOperand(2);
3034         ARMCC::CondCodes CC =
3035           (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue();
3036
3037         switch (CC) {
3038         default: llvm_unreachable("CMPZ must be either NE or EQ!");
3039         case ARMCC::NE:
3040           CC = ARMCC::MI;
3041           break;
3042         case ARMCC::EQ:
3043           CC = ARMCC::PL;
3044           break;
3045         }
3046         SDValue NewARMcc = CurDAG->getConstant((unsigned)CC, dl, MVT::i32);
3047         SDValue Ops[] = {N->getOperand(0), N->getOperand(1), NewARMcc,
3048                          N->getOperand(3), N->getOperand(4)};
3049         CurDAG->MorphNodeTo(N, ARMISD::CMOV, N->getVTList(), Ops);
3050       }
3051
3052     }
3053     // Other cases are autogenerated.
3054     break;
3055   }
3056
3057   case ARMISD::VZIP: {
3058     unsigned Opc = 0;
3059     EVT VT = N->getValueType(0);
3060     switch (VT.getSimpleVT().SimpleTy) {
3061     default: return;
3062     case MVT::v8i8:  Opc = ARM::VZIPd8; break;
3063     case MVT::v4f16:
3064     case MVT::v4i16: Opc = ARM::VZIPd16; break;
3065     case MVT::v2f32:
3066     // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
3067     case MVT::v2i32: Opc = ARM::VTRNd32; break;
3068     case MVT::v16i8: Opc = ARM::VZIPq8; break;
3069     case MVT::v8f16:
3070     case MVT::v8i16: Opc = ARM::VZIPq16; break;
3071     case MVT::v4f32:
3072     case MVT::v4i32: Opc = ARM::VZIPq32; break;
3073     }
3074     SDValue Pred = getAL(CurDAG, dl);
3075     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3076     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3077     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3078     return;
3079   }
3080   case ARMISD::VUZP: {
3081     unsigned Opc = 0;
3082     EVT VT = N->getValueType(0);
3083     switch (VT.getSimpleVT().SimpleTy) {
3084     default: return;
3085     case MVT::v8i8:  Opc = ARM::VUZPd8; break;
3086     case MVT::v4f16:
3087     case MVT::v4i16: Opc = ARM::VUZPd16; break;
3088     case MVT::v2f32:
3089     // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
3090     case MVT::v2i32: Opc = ARM::VTRNd32; break;
3091     case MVT::v16i8: Opc = ARM::VUZPq8; break;
3092     case MVT::v8f16:
3093     case MVT::v8i16: Opc = ARM::VUZPq16; break;
3094     case MVT::v4f32:
3095     case MVT::v4i32: Opc = ARM::VUZPq32; break;
3096     }
3097     SDValue Pred = getAL(CurDAG, dl);
3098     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3099     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3100     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3101     return;
3102   }
3103   case ARMISD::VTRN: {
3104     unsigned Opc = 0;
3105     EVT VT = N->getValueType(0);
3106     switch (VT.getSimpleVT().SimpleTy) {
3107     default: return;
3108     case MVT::v8i8:  Opc = ARM::VTRNd8; break;
3109     case MVT::v4f16:
3110     case MVT::v4i16: Opc = ARM::VTRNd16; break;
3111     case MVT::v2f32:
3112     case MVT::v2i32: Opc = ARM::VTRNd32; break;
3113     case MVT::v16i8: Opc = ARM::VTRNq8; break;
3114     case MVT::v8f16:
3115     case MVT::v8i16: Opc = ARM::VTRNq16; break;
3116     case MVT::v4f32:
3117     case MVT::v4i32: Opc = ARM::VTRNq32; break;
3118     }
3119     SDValue Pred = getAL(CurDAG, dl);
3120     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3121     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3122     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3123     return;
3124   }
3125   case ARMISD::BUILD_VECTOR: {
3126     EVT VecVT = N->getValueType(0);
3127     EVT EltVT = VecVT.getVectorElementType();
3128     unsigned NumElts = VecVT.getVectorNumElements();
3129     if (EltVT == MVT::f64) {
3130       assert(NumElts == 2 && "unexpected type for BUILD_VECTOR");
3131       ReplaceNode(
3132           N, createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
3133       return;
3134     }
3135     assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR");
3136     if (NumElts == 2) {
3137       ReplaceNode(
3138           N, createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
3139       return;
3140     }
3141     assert(NumElts == 4 && "unexpected type for BUILD_VECTOR");
3142     ReplaceNode(N,
3143                 createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1),
3144                                     N->getOperand(2), N->getOperand(3)));
3145     return;
3146   }
3147
3148   case ARMISD::VLD1DUP: {
3149     static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8, ARM::VLD1DUPd16,
3150                                          ARM::VLD1DUPd32 };
3151     static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8, ARM::VLD1DUPq16,
3152                                          ARM::VLD1DUPq32 };
3153     SelectVLDDup(N, /* IsIntrinsic= */ false, false, 1, DOpcodes, QOpcodes);
3154     return;
3155   }
3156
3157   case ARMISD::VLD2DUP: {
3158     static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
3159                                         ARM::VLD2DUPd32 };
3160     SelectVLDDup(N, /* IsIntrinsic= */ false, false, 2, Opcodes);
3161     return;
3162   }
3163
3164   case ARMISD::VLD3DUP: {
3165     static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo,
3166                                         ARM::VLD3DUPd16Pseudo,
3167                                         ARM::VLD3DUPd32Pseudo };
3168     SelectVLDDup(N, /* IsIntrinsic= */ false, false, 3, Opcodes);
3169     return;
3170   }
3171
3172   case ARMISD::VLD4DUP: {
3173     static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo,
3174                                         ARM::VLD4DUPd16Pseudo,
3175                                         ARM::VLD4DUPd32Pseudo };
3176     SelectVLDDup(N, /* IsIntrinsic= */ false, false, 4, Opcodes);
3177     return;
3178   }
3179
3180   case ARMISD::VLD1DUP_UPD: {
3181     static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8wb_fixed,
3182                                          ARM::VLD1DUPd16wb_fixed,
3183                                          ARM::VLD1DUPd32wb_fixed };
3184     static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8wb_fixed,
3185                                          ARM::VLD1DUPq16wb_fixed,
3186                                          ARM::VLD1DUPq32wb_fixed };
3187     SelectVLDDup(N, /* IsIntrinsic= */ false, true, 1, DOpcodes, QOpcodes);
3188     return;
3189   }
3190
3191   case ARMISD::VLD2DUP_UPD: {
3192     static const uint16_t Opcodes[] = { ARM::VLD2DUPd8wb_fixed,
3193                                         ARM::VLD2DUPd16wb_fixed,
3194                                         ARM::VLD2DUPd32wb_fixed };
3195     SelectVLDDup(N, /* IsIntrinsic= */ false, true, 2, Opcodes);
3196     return;
3197   }
3198
3199   case ARMISD::VLD3DUP_UPD: {
3200     static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo_UPD,
3201                                         ARM::VLD3DUPd16Pseudo_UPD,
3202                                         ARM::VLD3DUPd32Pseudo_UPD };
3203     SelectVLDDup(N, /* IsIntrinsic= */ false, true, 3, Opcodes);
3204     return;
3205   }
3206
3207   case ARMISD::VLD4DUP_UPD: {
3208     static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo_UPD,
3209                                         ARM::VLD4DUPd16Pseudo_UPD,
3210                                         ARM::VLD4DUPd32Pseudo_UPD };
3211     SelectVLDDup(N, /* IsIntrinsic= */ false, true, 4, Opcodes);
3212     return;
3213   }
3214
3215   case ARMISD::VLD1_UPD: {
3216     static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed,
3217                                          ARM::VLD1d16wb_fixed,
3218                                          ARM::VLD1d32wb_fixed,
3219                                          ARM::VLD1d64wb_fixed };
3220     static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed,
3221                                          ARM::VLD1q16wb_fixed,
3222                                          ARM::VLD1q32wb_fixed,
3223                                          ARM::VLD1q64wb_fixed };
3224     SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr);
3225     return;
3226   }
3227
3228   case ARMISD::VLD2_UPD: {
3229     static const uint16_t DOpcodes[] = { ARM::VLD2d8wb_fixed,
3230                                          ARM::VLD2d16wb_fixed,
3231                                          ARM::VLD2d32wb_fixed,
3232                                          ARM::VLD1q64wb_fixed};
3233     static const uint16_t QOpcodes[] = { ARM::VLD2q8PseudoWB_fixed,
3234                                          ARM::VLD2q16PseudoWB_fixed,
3235                                          ARM::VLD2q32PseudoWB_fixed };
3236     SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr);
3237     return;
3238   }
3239
3240   case ARMISD::VLD3_UPD: {
3241     static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD,
3242                                          ARM::VLD3d16Pseudo_UPD,
3243                                          ARM::VLD3d32Pseudo_UPD,
3244                                          ARM::VLD1d64TPseudoWB_fixed};
3245     static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
3246                                           ARM::VLD3q16Pseudo_UPD,
3247                                           ARM::VLD3q32Pseudo_UPD };
3248     static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
3249                                           ARM::VLD3q16oddPseudo_UPD,
3250                                           ARM::VLD3q32oddPseudo_UPD };
3251     SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
3252     return;
3253   }
3254
3255   case ARMISD::VLD4_UPD: {
3256     static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo_UPD,
3257                                          ARM::VLD4d16Pseudo_UPD,
3258                                          ARM::VLD4d32Pseudo_UPD,
3259                                          ARM::VLD1d64QPseudoWB_fixed};
3260     static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
3261                                           ARM::VLD4q16Pseudo_UPD,
3262                                           ARM::VLD4q32Pseudo_UPD };
3263     static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo_UPD,
3264                                           ARM::VLD4q16oddPseudo_UPD,
3265                                           ARM::VLD4q32oddPseudo_UPD };
3266     SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
3267     return;
3268   }
3269
3270   case ARMISD::VLD2LN_UPD: {
3271     static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD,
3272                                          ARM::VLD2LNd16Pseudo_UPD,
3273                                          ARM::VLD2LNd32Pseudo_UPD };
3274     static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD,
3275                                          ARM::VLD2LNq32Pseudo_UPD };
3276     SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes);
3277     return;
3278   }
3279
3280   case ARMISD::VLD3LN_UPD: {
3281     static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD,
3282                                          ARM::VLD3LNd16Pseudo_UPD,
3283                                          ARM::VLD3LNd32Pseudo_UPD };
3284     static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD,
3285                                          ARM::VLD3LNq32Pseudo_UPD };
3286     SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes);
3287     return;
3288   }
3289
3290   case ARMISD::VLD4LN_UPD: {
3291     static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD,
3292                                          ARM::VLD4LNd16Pseudo_UPD,
3293                                          ARM::VLD4LNd32Pseudo_UPD };
3294     static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD,
3295                                          ARM::VLD4LNq32Pseudo_UPD };
3296     SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes);
3297     return;
3298   }
3299
3300   case ARMISD::VST1_UPD: {
3301     static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed,
3302                                          ARM::VST1d16wb_fixed,
3303                                          ARM::VST1d32wb_fixed,
3304                                          ARM::VST1d64wb_fixed };
3305     static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed,
3306                                          ARM::VST1q16wb_fixed,
3307                                          ARM::VST1q32wb_fixed,
3308                                          ARM::VST1q64wb_fixed };
3309     SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr);
3310     return;
3311   }
3312
3313   case ARMISD::VST2_UPD: {
3314     static const uint16_t DOpcodes[] = { ARM::VST2d8wb_fixed,
3315                                          ARM::VST2d16wb_fixed,
3316                                          ARM::VST2d32wb_fixed,
3317                                          ARM::VST1q64wb_fixed};
3318     static const uint16_t QOpcodes[] = { ARM::VST2q8PseudoWB_fixed,
3319                                          ARM::VST2q16PseudoWB_fixed,
3320                                          ARM::VST2q32PseudoWB_fixed };
3321     SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr);
3322     return;
3323   }
3324
3325   case ARMISD::VST3_UPD: {
3326     static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD,
3327                                          ARM::VST3d16Pseudo_UPD,
3328                                          ARM::VST3d32Pseudo_UPD,
3329                                          ARM::VST1d64TPseudoWB_fixed};
3330     static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
3331                                           ARM::VST3q16Pseudo_UPD,
3332                                           ARM::VST3q32Pseudo_UPD };
3333     static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
3334                                           ARM::VST3q16oddPseudo_UPD,
3335                                           ARM::VST3q32oddPseudo_UPD };
3336     SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
3337     return;
3338   }
3339
3340   case ARMISD::VST4_UPD: {
3341     static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo_UPD,
3342                                          ARM::VST4d16Pseudo_UPD,
3343                                          ARM::VST4d32Pseudo_UPD,
3344                                          ARM::VST1d64QPseudoWB_fixed};
3345     static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
3346                                           ARM::VST4q16Pseudo_UPD,
3347                                           ARM::VST4q32Pseudo_UPD };
3348     static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD,
3349                                           ARM::VST4q16oddPseudo_UPD,
3350                                           ARM::VST4q32oddPseudo_UPD };
3351     SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
3352     return;
3353   }
3354
3355   case ARMISD::VST2LN_UPD: {
3356     static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD,
3357                                          ARM::VST2LNd16Pseudo_UPD,
3358                                          ARM::VST2LNd32Pseudo_UPD };
3359     static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD,
3360                                          ARM::VST2LNq32Pseudo_UPD };
3361     SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes);
3362     return;
3363   }
3364
3365   case ARMISD::VST3LN_UPD: {
3366     static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD,
3367                                          ARM::VST3LNd16Pseudo_UPD,
3368                                          ARM::VST3LNd32Pseudo_UPD };
3369     static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD,
3370                                          ARM::VST3LNq32Pseudo_UPD };
3371     SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes);
3372     return;
3373   }
3374
3375   case ARMISD::VST4LN_UPD: {
3376     static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD,
3377                                          ARM::VST4LNd16Pseudo_UPD,
3378                                          ARM::VST4LNd32Pseudo_UPD };
3379     static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD,
3380                                          ARM::VST4LNq32Pseudo_UPD };
3381     SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes);
3382     return;
3383   }
3384
3385   case ISD::INTRINSIC_VOID:
3386   case ISD::INTRINSIC_W_CHAIN: {
3387     unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
3388     switch (IntNo) {
3389     default:
3390       break;
3391
3392     case Intrinsic::arm_mrrc:
3393     case Intrinsic::arm_mrrc2: {
3394       SDLoc dl(N);
3395       SDValue Chain = N->getOperand(0);
3396       unsigned Opc;
3397
3398       if (Subtarget->isThumb())
3399         Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::t2MRRC : ARM::t2MRRC2);
3400       else
3401         Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::MRRC : ARM::MRRC2);
3402
3403       SmallVector<SDValue, 5> Ops;
3404       Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(), dl)); /* coproc */
3405       Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(), dl)); /* opc */
3406       Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(4))->getZExtValue(), dl)); /* CRm */
3407
3408       // The mrrc2 instruction in ARM doesn't allow predicates, the top 4 bits of the encoded
3409       // instruction will always be '1111' but it is possible in assembly language to specify
3410       // AL as a predicate to mrrc2 but it doesn't make any difference to the encoded instruction.
3411       if (Opc != ARM::MRRC2) {
3412         Ops.push_back(getAL(CurDAG, dl));
3413         Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3414       }
3415
3416       Ops.push_back(Chain);
3417
3418       // Writes to two registers.
3419       const EVT RetType[] = {MVT::i32, MVT::i32, MVT::Other};
3420
3421       ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, RetType, Ops));
3422       return;
3423     }
3424     case Intrinsic::arm_ldaexd:
3425     case Intrinsic::arm_ldrexd: {
3426       SDLoc dl(N);
3427       SDValue Chain = N->getOperand(0);
3428       SDValue MemAddr = N->getOperand(2);
3429       bool isThumb = Subtarget->isThumb() && Subtarget->hasV8MBaselineOps();
3430
3431       bool IsAcquire = IntNo == Intrinsic::arm_ldaexd;
3432       unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD)
3433                                 : (IsAcquire ? ARM::LDAEXD : ARM::LDREXD);
3434
3435       // arm_ldrexd returns a i64 value in {i32, i32}
3436       std::vector<EVT> ResTys;
3437       if (isThumb) {
3438         ResTys.push_back(MVT::i32);
3439         ResTys.push_back(MVT::i32);
3440       } else
3441         ResTys.push_back(MVT::Untyped);
3442       ResTys.push_back(MVT::Other);
3443
3444       // Place arguments in the right order.
3445       SDValue Ops[] = {MemAddr, getAL(CurDAG, dl),
3446                        CurDAG->getRegister(0, MVT::i32), Chain};
3447       SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
3448       // Transfer memoperands.
3449       MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3450       CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
3451
3452       // Remap uses.
3453       SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1);
3454       if (!SDValue(N, 0).use_empty()) {
3455         SDValue Result;
3456         if (isThumb)
3457           Result = SDValue(Ld, 0);
3458         else {
3459           SDValue SubRegIdx =
3460             CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
3461           SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
3462               dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
3463           Result = SDValue(ResNode,0);
3464         }
3465         ReplaceUses(SDValue(N, 0), Result);
3466       }
3467       if (!SDValue(N, 1).use_empty()) {
3468         SDValue Result;
3469         if (isThumb)
3470           Result = SDValue(Ld, 1);
3471         else {
3472           SDValue SubRegIdx =
3473             CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
3474           SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
3475               dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
3476           Result = SDValue(ResNode,0);
3477         }
3478         ReplaceUses(SDValue(N, 1), Result);
3479       }
3480       ReplaceUses(SDValue(N, 2), OutChain);
3481       CurDAG->RemoveDeadNode(N);
3482       return;
3483     }
3484     case Intrinsic::arm_stlexd:
3485     case Intrinsic::arm_strexd: {
3486       SDLoc dl(N);
3487       SDValue Chain = N->getOperand(0);
3488       SDValue Val0 = N->getOperand(2);
3489       SDValue Val1 = N->getOperand(3);
3490       SDValue MemAddr = N->getOperand(4);
3491
3492       // Store exclusive double return a i32 value which is the return status
3493       // of the issued store.
3494       const EVT ResTys[] = {MVT::i32, MVT::Other};
3495
3496       bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
3497       // Place arguments in the right order.
3498       SmallVector<SDValue, 7> Ops;
3499       if (isThumb) {
3500         Ops.push_back(Val0);
3501         Ops.push_back(Val1);
3502       } else
3503         // arm_strexd uses GPRPair.
3504         Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0));
3505       Ops.push_back(MemAddr);
3506       Ops.push_back(getAL(CurDAG, dl));
3507       Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3508       Ops.push_back(Chain);
3509
3510       bool IsRelease = IntNo == Intrinsic::arm_stlexd;
3511       unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD)
3512                                 : (IsRelease ? ARM::STLEXD : ARM::STREXD);
3513
3514       SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
3515       // Transfer memoperands.
3516       MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3517       CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
3518
3519       ReplaceNode(N, St);
3520       return;
3521     }
3522
3523     case Intrinsic::arm_neon_vld1: {
3524       static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
3525                                            ARM::VLD1d32, ARM::VLD1d64 };
3526       static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
3527                                            ARM::VLD1q32, ARM::VLD1q64};
3528       SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr);
3529       return;
3530     }
3531
3532     case Intrinsic::arm_neon_vld1x2: {
3533       static const uint16_t DOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
3534                                            ARM::VLD1q32, ARM::VLD1q64 };
3535       static const uint16_t QOpcodes[] = { ARM::VLD1d8QPseudo,
3536                                            ARM::VLD1d16QPseudo,
3537                                            ARM::VLD1d32QPseudo,
3538                                            ARM::VLD1d64QPseudo };
3539       SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
3540       return;
3541     }
3542
3543     case Intrinsic::arm_neon_vld1x3: {
3544       static const uint16_t DOpcodes[] = { ARM::VLD1d8TPseudo,
3545                                            ARM::VLD1d16TPseudo,
3546                                            ARM::VLD1d32TPseudo,
3547                                            ARM::VLD1d64TPseudo };
3548       static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowTPseudo_UPD,
3549                                             ARM::VLD1q16LowTPseudo_UPD,
3550                                             ARM::VLD1q32LowTPseudo_UPD,
3551                                             ARM::VLD1q64LowTPseudo_UPD };
3552       static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighTPseudo,
3553                                             ARM::VLD1q16HighTPseudo,
3554                                             ARM::VLD1q32HighTPseudo,
3555                                             ARM::VLD1q64HighTPseudo };
3556       SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3557       return;
3558     }
3559
3560     case Intrinsic::arm_neon_vld1x4: {
3561       static const uint16_t DOpcodes[] = { ARM::VLD1d8QPseudo,
3562                                            ARM::VLD1d16QPseudo,
3563                                            ARM::VLD1d32QPseudo,
3564                                            ARM::VLD1d64QPseudo };
3565       static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowQPseudo_UPD,
3566                                             ARM::VLD1q16LowQPseudo_UPD,
3567                                             ARM::VLD1q32LowQPseudo_UPD,
3568                                             ARM::VLD1q64LowQPseudo_UPD };
3569       static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighQPseudo,
3570                                             ARM::VLD1q16HighQPseudo,
3571                                             ARM::VLD1q32HighQPseudo,
3572                                             ARM::VLD1q64HighQPseudo };
3573       SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3574       return;
3575     }
3576
3577     case Intrinsic::arm_neon_vld2: {
3578       static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
3579                                            ARM::VLD2d32, ARM::VLD1q64 };
3580       static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
3581                                            ARM::VLD2q32Pseudo };
3582       SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
3583       return;
3584     }
3585
3586     case Intrinsic::arm_neon_vld3: {
3587       static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo,
3588                                            ARM::VLD3d16Pseudo,
3589                                            ARM::VLD3d32Pseudo,
3590                                            ARM::VLD1d64TPseudo };
3591       static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
3592                                             ARM::VLD3q16Pseudo_UPD,
3593                                             ARM::VLD3q32Pseudo_UPD };
3594       static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo,
3595                                             ARM::VLD3q16oddPseudo,
3596                                             ARM::VLD3q32oddPseudo };
3597       SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3598       return;
3599     }
3600
3601     case Intrinsic::arm_neon_vld4: {
3602       static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo,
3603                                            ARM::VLD4d16Pseudo,
3604                                            ARM::VLD4d32Pseudo,
3605                                            ARM::VLD1d64QPseudo };
3606       static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
3607                                             ARM::VLD4q16Pseudo_UPD,
3608                                             ARM::VLD4q32Pseudo_UPD };
3609       static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo,
3610                                             ARM::VLD4q16oddPseudo,
3611                                             ARM::VLD4q32oddPseudo };
3612       SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3613       return;
3614     }
3615
3616     case Intrinsic::arm_neon_vld2dup: {
3617       static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
3618                                            ARM::VLD2DUPd32, ARM::VLD1q64 };
3619       static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo,
3620                                             ARM::VLD2DUPq16EvenPseudo,
3621                                             ARM::VLD2DUPq32EvenPseudo };
3622       static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudo,
3623                                             ARM::VLD2DUPq16OddPseudo,
3624                                             ARM::VLD2DUPq32OddPseudo };
3625       SelectVLDDup(N, /* IsIntrinsic= */ true, false, 2,
3626                    DOpcodes, QOpcodes0, QOpcodes1);
3627       return;
3628     }
3629
3630     case Intrinsic::arm_neon_vld3dup: {
3631       static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo,
3632                                            ARM::VLD3DUPd16Pseudo,
3633                                            ARM::VLD3DUPd32Pseudo,
3634                                            ARM::VLD1d64TPseudo };
3635       static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo,
3636                                             ARM::VLD3DUPq16EvenPseudo,
3637                                             ARM::VLD3DUPq32EvenPseudo };
3638       static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo,
3639                                             ARM::VLD3DUPq16OddPseudo,
3640                                             ARM::VLD3DUPq32OddPseudo };
3641       SelectVLDDup(N, /* IsIntrinsic= */ true, false, 3,
3642                    DOpcodes, QOpcodes0, QOpcodes1);
3643       return;
3644     }
3645
3646     case Intrinsic::arm_neon_vld4dup: {
3647       static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo,
3648                                            ARM::VLD4DUPd16Pseudo,
3649                                            ARM::VLD4DUPd32Pseudo,
3650                                            ARM::VLD1d64QPseudo };
3651       static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo,
3652                                             ARM::VLD4DUPq16EvenPseudo,
3653                                             ARM::VLD4DUPq32EvenPseudo };
3654       static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo,
3655                                             ARM::VLD4DUPq16OddPseudo,
3656                                             ARM::VLD4DUPq32OddPseudo };
3657       SelectVLDDup(N, /* IsIntrinsic= */ true, false, 4,
3658                    DOpcodes, QOpcodes0, QOpcodes1);
3659       return;
3660     }
3661
3662     case Intrinsic::arm_neon_vld2lane: {
3663       static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo,
3664                                            ARM::VLD2LNd16Pseudo,
3665                                            ARM::VLD2LNd32Pseudo };
3666       static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo,
3667                                            ARM::VLD2LNq32Pseudo };
3668       SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes);
3669       return;
3670     }
3671
3672     case Intrinsic::arm_neon_vld3lane: {
3673       static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo,
3674                                            ARM::VLD3LNd16Pseudo,
3675                                            ARM::VLD3LNd32Pseudo };
3676       static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo,
3677                                            ARM::VLD3LNq32Pseudo };
3678       SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes);
3679       return;
3680     }
3681
3682     case Intrinsic::arm_neon_vld4lane: {
3683       static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo,
3684                                            ARM::VLD4LNd16Pseudo,
3685                                            ARM::VLD4LNd32Pseudo };
3686       static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo,
3687                                            ARM::VLD4LNq32Pseudo };
3688       SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes);
3689       return;
3690     }
3691
3692     case Intrinsic::arm_neon_vst1: {
3693       static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
3694                                            ARM::VST1d32, ARM::VST1d64 };
3695       static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
3696                                            ARM::VST1q32, ARM::VST1q64 };
3697       SelectVST(N, false, 1, DOpcodes, QOpcodes, nullptr);
3698       return;
3699     }
3700
3701     case Intrinsic::arm_neon_vst1x2: {
3702       static const uint16_t DOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
3703                                            ARM::VST1q32, ARM::VST1q64 };
3704       static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudo,
3705                                            ARM::VST1d16QPseudo,
3706                                            ARM::VST1d32QPseudo,
3707                                            ARM::VST1d64QPseudo };
3708       SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
3709       return;
3710     }
3711
3712     case Intrinsic::arm_neon_vst1x3: {
3713       static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudo,
3714                                            ARM::VST1d16TPseudo,
3715                                            ARM::VST1d32TPseudo,
3716                                            ARM::VST1d64TPseudo };
3717       static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD,
3718                                             ARM::VST1q16LowTPseudo_UPD,
3719                                             ARM::VST1q32LowTPseudo_UPD,
3720                                             ARM::VST1q64LowTPseudo_UPD };
3721       static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo,
3722                                             ARM::VST1q16HighTPseudo,
3723                                             ARM::VST1q32HighTPseudo,
3724                                             ARM::VST1q64HighTPseudo };
3725       SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3726       return;
3727     }
3728
3729     case Intrinsic::arm_neon_vst1x4: {
3730       static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudo,
3731                                            ARM::VST1d16QPseudo,
3732                                            ARM::VST1d32QPseudo,
3733                                            ARM::VST1d64QPseudo };
3734       static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD,
3735                                             ARM::VST1q16LowQPseudo_UPD,
3736                                             ARM::VST1q32LowQPseudo_UPD,
3737                                             ARM::VST1q64LowQPseudo_UPD };
3738       static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo,
3739                                             ARM::VST1q16HighQPseudo,
3740                                             ARM::VST1q32HighQPseudo,
3741                                             ARM::VST1q64HighQPseudo };
3742       SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3743       return;
3744     }
3745
3746     case Intrinsic::arm_neon_vst2: {
3747       static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
3748                                            ARM::VST2d32, ARM::VST1q64 };
3749       static const uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
3750                                            ARM::VST2q32Pseudo };
3751       SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
3752       return;
3753     }
3754
3755     case Intrinsic::arm_neon_vst3: {
3756       static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo,
3757                                            ARM::VST3d16Pseudo,
3758                                            ARM::VST3d32Pseudo,
3759                                            ARM::VST1d64TPseudo };
3760       static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
3761                                             ARM::VST3q16Pseudo_UPD,
3762                                             ARM::VST3q32Pseudo_UPD };
3763       static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo,
3764                                             ARM::VST3q16oddPseudo,
3765                                             ARM::VST3q32oddPseudo };
3766       SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3767       return;
3768     }
3769
3770     case Intrinsic::arm_neon_vst4: {
3771       static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo,
3772                                            ARM::VST4d16Pseudo,
3773                                            ARM::VST4d32Pseudo,
3774                                            ARM::VST1d64QPseudo };
3775       static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
3776                                             ARM::VST4q16Pseudo_UPD,
3777                                             ARM::VST4q32Pseudo_UPD };
3778       static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo,
3779                                             ARM::VST4q16oddPseudo,
3780                                             ARM::VST4q32oddPseudo };
3781       SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3782       return;
3783     }
3784
3785     case Intrinsic::arm_neon_vst2lane: {
3786       static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo,
3787                                            ARM::VST2LNd16Pseudo,
3788                                            ARM::VST2LNd32Pseudo };
3789       static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo,
3790                                            ARM::VST2LNq32Pseudo };
3791       SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes);
3792       return;
3793     }
3794
3795     case Intrinsic::arm_neon_vst3lane: {
3796       static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo,
3797                                            ARM::VST3LNd16Pseudo,
3798                                            ARM::VST3LNd32Pseudo };
3799       static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo,
3800                                            ARM::VST3LNq32Pseudo };
3801       SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes);
3802       return;
3803     }
3804
3805     case Intrinsic::arm_neon_vst4lane: {
3806       static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo,
3807                                            ARM::VST4LNd16Pseudo,
3808                                            ARM::VST4LNd32Pseudo };
3809       static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo,
3810                                            ARM::VST4LNq32Pseudo };
3811       SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes);
3812       return;
3813     }
3814     }
3815     break;
3816   }
3817
3818   case ISD::ATOMIC_CMP_SWAP:
3819     SelectCMP_SWAP(N);
3820     return;
3821   }
3822
3823   SelectCode(N);
3824 }
3825
3826 // Inspect a register string of the form
3827 // cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or
3828 // cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string
3829 // and obtain the integer operands from them, adding these operands to the
3830 // provided vector.
3831 static void getIntOperandsFromRegisterString(StringRef RegString,
3832                                              SelectionDAG *CurDAG,
3833                                              const SDLoc &DL,
3834                                              std::vector<SDValue> &Ops) {
3835   SmallVector<StringRef, 5> Fields;
3836   RegString.split(Fields, ':');
3837
3838   if (Fields.size() > 1) {
3839     bool AllIntFields = true;
3840
3841     for (StringRef Field : Fields) {
3842       // Need to trim out leading 'cp' characters and get the integer field.
3843       unsigned IntField;
3844       AllIntFields &= !Field.trim("CPcp").getAsInteger(10, IntField);
3845       Ops.push_back(CurDAG->getTargetConstant(IntField, DL, MVT::i32));
3846     }
3847
3848     assert(AllIntFields &&
3849             "Unexpected non-integer value in special register string.");
3850   }
3851 }
3852
3853 // Maps a Banked Register string to its mask value. The mask value returned is
3854 // for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register
3855 // mask operand, which expresses which register is to be used, e.g. r8, and in
3856 // which mode it is to be used, e.g. usr. Returns -1 to signify that the string
3857 // was invalid.
3858 static inline int getBankedRegisterMask(StringRef RegString) {
3859   auto TheReg = ARMBankedReg::lookupBankedRegByName(RegString.lower());
3860   if (!TheReg)
3861      return -1;
3862   return TheReg->Encoding;
3863 }
3864
3865 // The flags here are common to those allowed for apsr in the A class cores and
3866 // those allowed for the special registers in the M class cores. Returns a
3867 // value representing which flags were present, -1 if invalid.
3868 static inline int getMClassFlagsMask(StringRef Flags) {
3869   return StringSwitch<int>(Flags)
3870           .Case("", 0x2) // no flags means nzcvq for psr registers, and 0x2 is
3871                          // correct when flags are not permitted
3872           .Case("g", 0x1)
3873           .Case("nzcvq", 0x2)
3874           .Case("nzcvqg", 0x3)
3875           .Default(-1);
3876 }
3877
3878 // Maps MClass special registers string to its value for use in the
3879 // t2MRS_M/t2MSR_M instruction nodes as the SYSm value operand.
3880 // Returns -1 to signify that the string was invalid.
3881 static int getMClassRegisterMask(StringRef Reg, const ARMSubtarget *Subtarget) {
3882   auto TheReg = ARMSysReg::lookupMClassSysRegByName(Reg);
3883   const FeatureBitset &FeatureBits = Subtarget->getFeatureBits();
3884   if (!TheReg || !TheReg->hasRequiredFeatures(FeatureBits))
3885     return -1;
3886   return (int)(TheReg->Encoding & 0xFFF); // SYSm value
3887 }
3888
3889 static int getARClassRegisterMask(StringRef Reg, StringRef Flags) {
3890   // The mask operand contains the special register (R Bit) in bit 4, whether
3891   // the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and
3892   // bits 3-0 contains the fields to be accessed in the special register, set by
3893   // the flags provided with the register.
3894   int Mask = 0;
3895   if (Reg == "apsr") {
3896     // The flags permitted for apsr are the same flags that are allowed in
3897     // M class registers. We get the flag value and then shift the flags into
3898     // the correct place to combine with the mask.
3899     Mask = getMClassFlagsMask(Flags);
3900     if (Mask == -1)
3901       return -1;
3902     return Mask << 2;
3903   }
3904
3905   if (Reg != "cpsr" && Reg != "spsr") {
3906     return -1;
3907   }
3908
3909   // This is the same as if the flags were "fc"
3910   if (Flags.empty() || Flags == "all")
3911     return Mask | 0x9;
3912
3913   // Inspect the supplied flags string and set the bits in the mask for
3914   // the relevant and valid flags allowed for cpsr and spsr.
3915   for (char Flag : Flags) {
3916     int FlagVal;
3917     switch (Flag) {
3918       case 'c':
3919         FlagVal = 0x1;
3920         break;
3921       case 'x':
3922         FlagVal = 0x2;
3923         break;
3924       case 's':
3925         FlagVal = 0x4;
3926         break;
3927       case 'f':
3928         FlagVal = 0x8;
3929         break;
3930       default:
3931         FlagVal = 0;
3932     }
3933
3934     // This avoids allowing strings where the same flag bit appears twice.
3935     if (!FlagVal || (Mask & FlagVal))
3936       return -1;
3937     Mask |= FlagVal;
3938   }
3939
3940   // If the register is spsr then we need to set the R bit.
3941   if (Reg == "spsr")
3942     Mask |= 0x10;
3943
3944   return Mask;
3945 }
3946
3947 // Lower the read_register intrinsic to ARM specific DAG nodes
3948 // using the supplied metadata string to select the instruction node to use
3949 // and the registers/masks to construct as operands for the node.
3950 bool ARMDAGToDAGISel::tryReadRegister(SDNode *N){
3951   const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
3952   const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
3953   bool IsThumb2 = Subtarget->isThumb2();
3954   SDLoc DL(N);
3955
3956   std::vector<SDValue> Ops;
3957   getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
3958
3959   if (!Ops.empty()) {
3960     // If the special register string was constructed of fields (as defined
3961     // in the ACLE) then need to lower to MRC node (32 bit) or
3962     // MRRC node(64 bit), we can make the distinction based on the number of
3963     // operands we have.
3964     unsigned Opcode;
3965     SmallVector<EVT, 3> ResTypes;
3966     if (Ops.size() == 5){
3967       Opcode = IsThumb2 ? ARM::t2MRC : ARM::MRC;
3968       ResTypes.append({ MVT::i32, MVT::Other });
3969     } else {
3970       assert(Ops.size() == 3 &&
3971               "Invalid number of fields in special register string.");
3972       Opcode = IsThumb2 ? ARM::t2MRRC : ARM::MRRC;
3973       ResTypes.append({ MVT::i32, MVT::i32, MVT::Other });
3974     }
3975
3976     Ops.push_back(getAL(CurDAG, DL));
3977     Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3978     Ops.push_back(N->getOperand(0));
3979     ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, ResTypes, Ops));
3980     return true;
3981   }
3982
3983   std::string SpecialReg = RegString->getString().lower();
3984
3985   int BankedReg = getBankedRegisterMask(SpecialReg);
3986   if (BankedReg != -1) {
3987     Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32),
3988             getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
3989             N->getOperand(0) };
3990     ReplaceNode(
3991         N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked,
3992                                   DL, MVT::i32, MVT::Other, Ops));
3993     return true;
3994   }
3995
3996   // The VFP registers are read by creating SelectionDAG nodes with opcodes
3997   // corresponding to the register that is being read from. So we switch on the
3998   // string to find which opcode we need to use.
3999   unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
4000                     .Case("fpscr", ARM::VMRS)
4001                     .Case("fpexc", ARM::VMRS_FPEXC)
4002                     .Case("fpsid", ARM::VMRS_FPSID)
4003                     .Case("mvfr0", ARM::VMRS_MVFR0)
4004                     .Case("mvfr1", ARM::VMRS_MVFR1)
4005                     .Case("mvfr2", ARM::VMRS_MVFR2)
4006                     .Case("fpinst", ARM::VMRS_FPINST)
4007                     .Case("fpinst2", ARM::VMRS_FPINST2)
4008                     .Default(0);
4009
4010   // If an opcode was found then we can lower the read to a VFP instruction.
4011   if (Opcode) {
4012     if (!Subtarget->hasVFP2())
4013       return false;
4014     if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8())
4015       return false;
4016
4017     Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4018             N->getOperand(0) };
4019     ReplaceNode(N,
4020                 CurDAG->getMachineNode(Opcode, DL, MVT::i32, MVT::Other, Ops));
4021     return true;
4022   }
4023
4024   // If the target is M Class then need to validate that the register string
4025   // is an acceptable value, so check that a mask can be constructed from the
4026   // string.
4027   if (Subtarget->isMClass()) {
4028     int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget);
4029     if (SYSmValue == -1)
4030       return false;
4031
4032     SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
4033                       getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4034                       N->getOperand(0) };
4035     ReplaceNode(
4036         N, CurDAG->getMachineNode(ARM::t2MRS_M, DL, MVT::i32, MVT::Other, Ops));
4037     return true;
4038   }
4039
4040   // Here we know the target is not M Class so we need to check if it is one
4041   // of the remaining possible values which are apsr, cpsr or spsr.
4042   if (SpecialReg == "apsr" || SpecialReg == "cpsr") {
4043     Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4044             N->getOperand(0) };
4045     ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRS_AR : ARM::MRS,
4046                                           DL, MVT::i32, MVT::Other, Ops));
4047     return true;
4048   }
4049
4050   if (SpecialReg == "spsr") {
4051     Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4052             N->getOperand(0) };
4053     ReplaceNode(
4054         N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys, DL,
4055                                   MVT::i32, MVT::Other, Ops));
4056     return true;
4057   }
4058
4059   return false;
4060 }
4061
4062 // Lower the write_register intrinsic to ARM specific DAG nodes
4063 // using the supplied metadata string to select the instruction node to use
4064 // and the registers/masks to use in the nodes
4065 bool ARMDAGToDAGISel::tryWriteRegister(SDNode *N){
4066   const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
4067   const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
4068   bool IsThumb2 = Subtarget->isThumb2();
4069   SDLoc DL(N);
4070
4071   std::vector<SDValue> Ops;
4072   getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
4073
4074   if (!Ops.empty()) {
4075     // If the special register string was constructed of fields (as defined
4076     // in the ACLE) then need to lower to MCR node (32 bit) or
4077     // MCRR node(64 bit), we can make the distinction based on the number of
4078     // operands we have.
4079     unsigned Opcode;
4080     if (Ops.size() == 5) {
4081       Opcode = IsThumb2 ? ARM::t2MCR : ARM::MCR;
4082       Ops.insert(Ops.begin()+2, N->getOperand(2));
4083     } else {
4084       assert(Ops.size() == 3 &&
4085               "Invalid number of fields in special register string.");
4086       Opcode = IsThumb2 ? ARM::t2MCRR : ARM::MCRR;
4087       SDValue WriteValue[] = { N->getOperand(2), N->getOperand(3) };
4088       Ops.insert(Ops.begin()+2, WriteValue, WriteValue+2);
4089     }
4090
4091     Ops.push_back(getAL(CurDAG, DL));
4092     Ops.push_back(CurDAG->getRegister(0, MVT::i32));
4093     Ops.push_back(N->getOperand(0));
4094
4095     ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
4096     return true;
4097   }
4098
4099   std::string SpecialReg = RegString->getString().lower();
4100   int BankedReg = getBankedRegisterMask(SpecialReg);
4101   if (BankedReg != -1) {
4102     Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), N->getOperand(2),
4103             getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4104             N->getOperand(0) };
4105     ReplaceNode(
4106         N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked,
4107                                   DL, MVT::Other, Ops));
4108     return true;
4109   }
4110
4111   // The VFP registers are written to by creating SelectionDAG nodes with
4112   // opcodes corresponding to the register that is being written. So we switch
4113   // on the string to find which opcode we need to use.
4114   unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
4115                     .Case("fpscr", ARM::VMSR)
4116                     .Case("fpexc", ARM::VMSR_FPEXC)
4117                     .Case("fpsid", ARM::VMSR_FPSID)
4118                     .Case("fpinst", ARM::VMSR_FPINST)
4119                     .Case("fpinst2", ARM::VMSR_FPINST2)
4120                     .Default(0);
4121
4122   if (Opcode) {
4123     if (!Subtarget->hasVFP2())
4124       return false;
4125     Ops = { N->getOperand(2), getAL(CurDAG, DL),
4126             CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
4127     ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
4128     return true;
4129   }
4130
4131   std::pair<StringRef, StringRef> Fields;
4132   Fields = StringRef(SpecialReg).rsplit('_');
4133   std::string Reg = Fields.first.str();
4134   StringRef Flags = Fields.second;
4135
4136   // If the target was M Class then need to validate the special register value
4137   // and retrieve the mask for use in the instruction node.
4138   if (Subtarget->isMClass()) {
4139     int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget);
4140     if (SYSmValue == -1)
4141       return false;
4142
4143     SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
4144                       N->getOperand(2), getAL(CurDAG, DL),
4145                       CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
4146     ReplaceNode(N, CurDAG->getMachineNode(ARM::t2MSR_M, DL, MVT::Other, Ops));
4147     return true;
4148   }
4149
4150   // We then check to see if a valid mask can be constructed for one of the
4151   // register string values permitted for the A and R class cores. These values
4152   // are apsr, spsr and cpsr; these are also valid on older cores.
4153   int Mask = getARClassRegisterMask(Reg, Flags);
4154   if (Mask != -1) {
4155     Ops = { CurDAG->getTargetConstant(Mask, DL, MVT::i32), N->getOperand(2),
4156             getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4157             N->getOperand(0) };
4158     ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSR_AR : ARM::MSR,
4159                                           DL, MVT::Other, Ops));
4160     return true;
4161   }
4162
4163   return false;
4164 }
4165
4166 bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){
4167   std::vector<SDValue> AsmNodeOperands;
4168   unsigned Flag, Kind;
4169   bool Changed = false;
4170   unsigned NumOps = N->getNumOperands();
4171
4172   // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint.
4173   // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require
4174   // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs
4175   // respectively. Since there is no constraint to explicitly specify a
4176   // reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb,
4177   // the 64-bit data may be referred by H, Q, R modifiers, so we still pack
4178   // them into a GPRPair.
4179
4180   SDLoc dl(N);
4181   SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps-1)
4182                                    : SDValue(nullptr,0);
4183
4184   SmallVector<bool, 8> OpChanged;
4185   // Glue node will be appended late.
4186   for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) {
4187     SDValue op = N->getOperand(i);
4188     AsmNodeOperands.push_back(op);
4189
4190     if (i < InlineAsm::Op_FirstOperand)
4191       continue;
4192
4193     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(i))) {
4194       Flag = C->getZExtValue();
4195       Kind = InlineAsm::getKind(Flag);
4196     }
4197     else
4198       continue;
4199
4200     // Immediate operands to inline asm in the SelectionDAG are modeled with
4201     // two operands. The first is a constant of value InlineAsm::Kind_Imm, and
4202     // the second is a constant with the value of the immediate. If we get here
4203     // and we have a Kind_Imm, skip the next operand, and continue.
4204     if (Kind == InlineAsm::Kind_Imm) {
4205       SDValue op = N->getOperand(++i);
4206       AsmNodeOperands.push_back(op);
4207       continue;
4208     }
4209
4210     unsigned NumRegs = InlineAsm::getNumOperandRegisters(Flag);
4211     if (NumRegs)
4212       OpChanged.push_back(false);
4213
4214     unsigned DefIdx = 0;
4215     bool IsTiedToChangedOp = false;
4216     // If it's a use that is tied with a previous def, it has no
4217     // reg class constraint.
4218     if (Changed && InlineAsm::isUseOperandTiedToDef(Flag, DefIdx))
4219       IsTiedToChangedOp = OpChanged[DefIdx];
4220
4221     // Memory operands to inline asm in the SelectionDAG are modeled with two
4222     // operands: a constant of value InlineAsm::Kind_Mem followed by the input
4223     // operand. If we get here and we have a Kind_Mem, skip the next operand (so
4224     // it doesn't get misinterpreted), and continue. We do this here because
4225     // it's important to update the OpChanged array correctly before moving on.
4226     if (Kind == InlineAsm::Kind_Mem) {
4227       SDValue op = N->getOperand(++i);
4228       AsmNodeOperands.push_back(op);
4229       continue;
4230     }
4231
4232     if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef
4233         && Kind != InlineAsm::Kind_RegDefEarlyClobber)
4234       continue;
4235
4236     unsigned RC;
4237     bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC);
4238     if ((!IsTiedToChangedOp && (!HasRC || RC != ARM::GPRRegClassID))
4239         || NumRegs != 2)
4240       continue;
4241
4242     assert((i+2 < NumOps) && "Invalid number of operands in inline asm");
4243     SDValue V0 = N->getOperand(i+1);
4244     SDValue V1 = N->getOperand(i+2);
4245     unsigned Reg0 = cast<RegisterSDNode>(V0)->getReg();
4246     unsigned Reg1 = cast<RegisterSDNode>(V1)->getReg();
4247     SDValue PairedReg;
4248     MachineRegisterInfo &MRI = MF->getRegInfo();
4249
4250     if (Kind == InlineAsm::Kind_RegDef ||
4251         Kind == InlineAsm::Kind_RegDefEarlyClobber) {
4252       // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to
4253       // the original GPRs.
4254
4255       unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
4256       PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
4257       SDValue Chain = SDValue(N,0);
4258
4259       SDNode *GU = N->getGluedUser();
4260       SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped,
4261                                                Chain.getValue(1));
4262
4263       // Extract values from a GPRPair reg and copy to the original GPR reg.
4264       SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32,
4265                                                     RegCopy);
4266       SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,
4267                                                     RegCopy);
4268       SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0,
4269                                         RegCopy.getValue(1));
4270       SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1));
4271
4272       // Update the original glue user.
4273       std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1);
4274       Ops.push_back(T1.getValue(1));
4275       CurDAG->UpdateNodeOperands(GU, Ops);
4276     }
4277     else {
4278       // For Kind  == InlineAsm::Kind_RegUse, we first copy two GPRs into a
4279       // GPRPair and then pass the GPRPair to the inline asm.
4280       SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain];
4281
4282       // As REG_SEQ doesn't take RegisterSDNode, we copy them first.
4283       SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32,
4284                                           Chain.getValue(1));
4285       SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32,
4286                                           T0.getValue(1));
4287       SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), 0);
4288
4289       // Copy REG_SEQ into a GPRPair-typed VR and replace the original two
4290       // i32 VRs of inline asm with it.
4291       unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
4292       PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
4293       Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1));
4294
4295       AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
4296       Glue = Chain.getValue(1);
4297     }
4298
4299     Changed = true;
4300
4301     if(PairedReg.getNode()) {
4302       OpChanged[OpChanged.size() -1 ] = true;
4303       Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/);
4304       if (IsTiedToChangedOp)
4305         Flag = InlineAsm::getFlagWordForMatchingOp(Flag, DefIdx);
4306       else
4307         Flag = InlineAsm::getFlagWordForRegClass(Flag, ARM::GPRPairRegClassID);
4308       // Replace the current flag.
4309       AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant(
4310           Flag, dl, MVT::i32);
4311       // Add the new register node and skip the original two GPRs.
4312       AsmNodeOperands.push_back(PairedReg);
4313       // Skip the next two GPRs.
4314       i += 2;
4315     }
4316   }
4317
4318   if (Glue.getNode())
4319     AsmNodeOperands.push_back(Glue);
4320   if (!Changed)
4321     return false;
4322
4323   SDValue New = CurDAG->getNode(N->getOpcode(), SDLoc(N),
4324       CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands);
4325   New->setNodeId(-1);
4326   ReplaceNode(N, New.getNode());
4327   return true;
4328 }
4329
4330
4331 bool ARMDAGToDAGISel::
4332 SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
4333                              std::vector<SDValue> &OutOps) {
4334   switch(ConstraintID) {
4335   default:
4336     llvm_unreachable("Unexpected asm memory constraint");
4337   case InlineAsm::Constraint_i:
4338     // FIXME: It seems strange that 'i' is needed here since it's supposed to
4339     //        be an immediate and not a memory constraint.
4340     LLVM_FALLTHROUGH;
4341   case InlineAsm::Constraint_m:
4342   case InlineAsm::Constraint_o:
4343   case InlineAsm::Constraint_Q:
4344   case InlineAsm::Constraint_Um:
4345   case InlineAsm::Constraint_Un:
4346   case InlineAsm::Constraint_Uq:
4347   case InlineAsm::Constraint_Us:
4348   case InlineAsm::Constraint_Ut:
4349   case InlineAsm::Constraint_Uv:
4350   case InlineAsm::Constraint_Uy:
4351     // Require the address to be in a register.  That is safe for all ARM
4352     // variants and it is hard to do anything much smarter without knowing
4353     // how the operand is used.
4354     OutOps.push_back(Op);
4355     return false;
4356   }
4357   return true;
4358 }
4359
4360 /// createARMISelDag - This pass converts a legalized DAG into a
4361 /// ARM-specific DAG, ready for instruction scheduling.
4362 ///
4363 FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM,
4364                                      CodeGenOpt::Level OptLevel) {
4365   return new ARMDAGToDAGISel(TM, OptLevel);
4366 }