lib/Target/ARM/ARMISelDAGToDAG.cpp

   1 //===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This file defines an instruction selector for the ARM target.
  10 //
  11 //===----------------------------------------------------------------------===//
  12
  13 #include "ARM.h"
  14 #include "ARMBaseInstrInfo.h"
  15 #include "ARMTargetMachine.h"
  16 #include "MCTargetDesc/ARMAddressingModes.h"
  17 #include "Utils/ARMBaseInfo.h"
  18 #include "llvm/ADT/StringSwitch.h"
  19 #include "llvm/CodeGen/MachineFrameInfo.h"
  20 #include "llvm/CodeGen/MachineFunction.h"
  21 #include "llvm/CodeGen/MachineInstrBuilder.h"
  22 #include "llvm/CodeGen/MachineRegisterInfo.h"
  23 #include "llvm/CodeGen/SelectionDAG.h"
  24 #include "llvm/CodeGen/SelectionDAGISel.h"
  25 #include "llvm/CodeGen/TargetLowering.h"
  26 #include "llvm/IR/CallingConv.h"
  27 #include "llvm/IR/Constants.h"
  28 #include "llvm/IR/DerivedTypes.h"
  29 #include "llvm/IR/Function.h"
  30 #include "llvm/IR/Intrinsics.h"
  31 #include "llvm/IR/LLVMContext.h"
  32 #include "llvm/Support/CommandLine.h"
  33 #include "llvm/Support/Debug.h"
  34 #include "llvm/Support/ErrorHandling.h"
  35 #include "llvm/Target/TargetOptions.h"
  36
  37 using namespace llvm;
  38
  39 #define DEBUG_TYPE "arm-isel"
  40
  41 static cl::opt<bool>
  42 DisableShifterOp("disable-shifter-op", cl::Hidden,
  43   cl::desc("Disable isel of shifter-op"),
  44   cl::init(false));
  45
  46 //===--------------------------------------------------------------------===//
  47 /// ARMDAGToDAGISel - ARM specific code to select ARM machine
  48 /// instructions for SelectionDAG operations.
  49 ///
  50 namespace {
  51
  52 class ARMDAGToDAGISel : public SelectionDAGISel {
  53   /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
  54   /// make the right decision when generating code for different targets.
  55   const ARMSubtarget *Subtarget;
  56
  57 public:
  58   explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOpt::Level OptLevel)
  59       : SelectionDAGISel(tm, OptLevel) {}
  60
  61   bool runOnMachineFunction(MachineFunction &MF) override {
  62     // Reset the subtarget each time through.
  63     Subtarget = &MF.getSubtarget<ARMSubtarget>();
  64     SelectionDAGISel::runOnMachineFunction(MF);
  65     return true;
  66   }
  67
  68   StringRef getPassName() const override { return "ARM Instruction Selection"; }
  69
  70   void PreprocessISelDAG() override;
  71
  72   /// getI32Imm - Return a target constant of type i32 with the specified
  73   /// value.
  74   inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
  75     return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
  76   }
  77
  78   void Select(SDNode *N) override;
  79
  80   bool hasNoVMLxHazardUse(SDNode *N) const;
  81   bool isShifterOpProfitable(const SDValue &Shift,
  82                              ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt);
  83   bool SelectRegShifterOperand(SDValue N, SDValue &A,
  84                                SDValue &B, SDValue &C,
  85                                bool CheckProfitability = true);
  86   bool SelectImmShifterOperand(SDValue N, SDValue &A,
  87                                SDValue &B, bool CheckProfitability = true);
  88   bool SelectShiftRegShifterOperand(SDValue N, SDValue &A,
  89                                     SDValue &B, SDValue &C) {
  90     // Don't apply the profitability check
  91     return SelectRegShifterOperand(N, A, B, C, false);
  92   }
  93   bool SelectShiftImmShifterOperand(SDValue N, SDValue &A,
  94                                     SDValue &B) {
  95     // Don't apply the profitability check
  96     return SelectImmShifterOperand(N, A, B, false);
  97   }
  98
  99   bool SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out);
 100
 101   bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
 102   bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);
 103
 104   bool SelectCMOVPred(SDValue N, SDValue &Pred, SDValue &Reg) {
 105     const ConstantSDNode *CN = cast<ConstantSDNode>(N);
 106     Pred = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(N), MVT::i32);
 107     Reg = CurDAG->getRegister(ARM::CPSR, MVT::i32);
 108     return true;
 109   }
 110
 111   bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
 112                              SDValue &Offset, SDValue &Opc);
 113   bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
 114                              SDValue &Offset, SDValue &Opc);
 115   bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
 116                              SDValue &Offset, SDValue &Opc);
 117   bool SelectAddrOffsetNone(SDValue N, SDValue &Base);
 118   bool SelectAddrMode3(SDValue N, SDValue &Base,
 119                        SDValue &Offset, SDValue &Opc);
 120   bool SelectAddrMode3Offset(SDNode *Op, SDValue N,
 121                              SDValue &Offset, SDValue &Opc);
 122   bool IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset, bool FP16);
 123   bool SelectAddrMode5(SDValue N, SDValue &Base, SDValue &Offset);
 124   bool SelectAddrMode5FP16(SDValue N, SDValue &Base, SDValue &Offset);
 125   bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align);
 126   bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset);
 127
 128   bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label);
 129
 130   // Thumb Addressing Modes:
 131   bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset);
 132   bool SelectThumbAddrModeRRSext(SDValue N, SDValue &Base, SDValue &Offset);
 133   bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base,
 134                                 SDValue &OffImm);
 135   bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
 136                                  SDValue &OffImm);
 137   bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
 138                                  SDValue &OffImm);
 139   bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
 140                                  SDValue &OffImm);
 141   bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm);
 142
 143   // Thumb 2 Addressing Modes:
 144   bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
 145   bool SelectT2AddrModeImm8(SDValue N, SDValue &Base,
 146                             SDValue &OffImm);
 147   bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
 148                                  SDValue &OffImm);
 149   template<unsigned Shift>
 150   bool SelectT2AddrModeImm7(SDValue N, SDValue &Base,
 151                             SDValue &OffImm);
 152   bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base,
 153                              SDValue &OffReg, SDValue &ShImm);
 154   bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm);
 155
 156   inline bool is_so_imm(unsigned Imm) const {
 157     return ARM_AM::getSOImmVal(Imm) != -1;
 158   }
 159
 160   inline bool is_so_imm_not(unsigned Imm) const {
 161     return ARM_AM::getSOImmVal(~Imm) != -1;
 162   }
 163
 164   inline bool is_t2_so_imm(unsigned Imm) const {
 165     return ARM_AM::getT2SOImmVal(Imm) != -1;
 166   }
 167
 168   inline bool is_t2_so_imm_not(unsigned Imm) const {
 169     return ARM_AM::getT2SOImmVal(~Imm) != -1;
 170   }
 171
 172   // Include the pieces autogenerated from the target description.
 173 #include "ARMGenDAGISel.inc"
 174
 175 private:
 176   void transferMemOperands(SDNode *Src, SDNode *Dst);
 177
 178   /// Indexed (pre/post inc/dec) load matching code for ARM.
 179   bool tryARMIndexedLoad(SDNode *N);
 180   bool tryT1IndexedLoad(SDNode *N);
 181   bool tryT2IndexedLoad(SDNode *N);
 182
 183   /// SelectVLD - Select NEON load intrinsics.  NumVecs should be
 184   /// 1, 2, 3 or 4.  The opcode arrays specify the instructions used for
 185   /// loads of D registers and even subregs and odd subregs of Q registers.
 186   /// For NumVecs <= 2, QOpcodes1 is not used.
 187   void SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
 188                  const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
 189                  const uint16_t *QOpcodes1);
 190
 191   /// SelectVST - Select NEON store intrinsics.  NumVecs should
 192   /// be 1, 2, 3 or 4.  The opcode arrays specify the instructions used for
 193   /// stores of D registers and even subregs and odd subregs of Q registers.
 194   /// For NumVecs <= 2, QOpcodes1 is not used.
 195   void SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
 196                  const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
 197                  const uint16_t *QOpcodes1);
 198
 199   /// SelectVLDSTLane - Select NEON load/store lane intrinsics.  NumVecs should
 200   /// be 2, 3 or 4.  The opcode arrays specify the instructions used for
 201   /// load/store of D registers and Q registers.
 202   void SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
 203                        unsigned NumVecs, const uint16_t *DOpcodes,
 204                        const uint16_t *QOpcodes);
 205
 206   /// SelectVLDDup - Select NEON load-duplicate intrinsics.  NumVecs
 207   /// should be 1, 2, 3 or 4.  The opcode array specifies the instructions used
 208   /// for loading D registers.
 209   void SelectVLDDup(SDNode *N, bool IsIntrinsic, bool isUpdating,
 210                     unsigned NumVecs, const uint16_t *DOpcodes,
 211                     const uint16_t *QOpcodes0 = nullptr,
 212                     const uint16_t *QOpcodes1 = nullptr);
 213
 214   /// Try to select SBFX/UBFX instructions for ARM.
 215   bool tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned);
 216
 217   // Select special operations if node forms integer ABS pattern
 218   bool tryABSOp(SDNode *N);
 219
 220   bool tryReadRegister(SDNode *N);
 221   bool tryWriteRegister(SDNode *N);
 222
 223   bool tryInlineAsm(SDNode *N);
 224
 225   void SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI);
 226
 227   void SelectCMP_SWAP(SDNode *N);
 228
 229   /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
 230   /// inline asm expressions.
 231   bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
 232                                     std::vector<SDValue> &OutOps) override;
 233
 234   // Form pairs of consecutive R, S, D, or Q registers.
 235   SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1);
 236   SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1);
 237   SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1);
 238   SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1);
 239
 240   // Form sequences of 4 consecutive S, D, or Q registers.
 241   SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
 242   SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
 243   SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
 244
 245   // Get the alignment operand for a NEON VLD or VST instruction.
 246   SDValue GetVLDSTAlign(SDValue Align, const SDLoc &dl, unsigned NumVecs,
 247                         bool is64BitVector);
 248
 249   /// Returns the number of instructions required to materialize the given
 250   /// constant in a register, or 3 if a literal pool load is needed.
 251   unsigned ConstantMaterializationCost(unsigned Val) const;
 252
 253   /// Checks if N is a multiplication by a constant where we can extract out a
 254   /// power of two from the constant so that it can be used in a shift, but only
 255   /// if it simplifies the materialization of the constant. Returns true if it
 256   /// is, and assigns to PowerOfTwo the power of two that should be extracted
 257   /// out and to NewMulConst the new constant to be multiplied by.
 258   bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift,
 259                               unsigned &PowerOfTwo, SDValue &NewMulConst) const;
 260
 261   /// Replace N with M in CurDAG, in a way that also ensures that M gets
 262   /// selected when N would have been selected.
 263   void replaceDAGValue(const SDValue &N, SDValue M);
 264 };
 265 }
 266
 267 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant
 268 /// operand. If so Imm will receive the 32-bit value.
 269 static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
 270   if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
 271     Imm = cast<ConstantSDNode>(N)->getZExtValue();
 272     return true;
 273   }
 274   return false;
 275 }
 276
 277 // isInt32Immediate - This method tests to see if a constant operand.
 278 // If so Imm will receive the 32 bit value.
 279 static bool isInt32Immediate(SDValue N, unsigned &Imm) {
 280   return isInt32Immediate(N.getNode(), Imm);
 281 }
 282
 283 // isOpcWithIntImmediate - This method tests to see if the node is a specific
 284 // opcode and that it has a immediate integer right operand.
 285 // If so Imm will receive the 32 bit value.
 286 static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
 287   return N->getOpcode() == Opc &&
 288          isInt32Immediate(N->getOperand(1).getNode(), Imm);
 289 }
 290
 291 /// Check whether a particular node is a constant value representable as
 292 /// (N * Scale) where (N in [\p RangeMin, \p RangeMax).
 293 ///
 294 /// \param ScaledConstant [out] - On success, the pre-scaled constant value.
 295 static bool isScaledConstantInRange(SDValue Node, int Scale,
 296                                     int RangeMin, int RangeMax,
 297                                     int &ScaledConstant) {
 298   assert(Scale > 0 && "Invalid scale!");
 299
 300   // Check that this is a constant.
 301   const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node);
 302   if (!C)
 303     return false;
 304
 305   ScaledConstant = (int) C->getZExtValue();
 306   if ((ScaledConstant % Scale) != 0)
 307     return false;
 308
 309   ScaledConstant /= Scale;
 310   return ScaledConstant >= RangeMin && ScaledConstant < RangeMax;
 311 }
 312
 313 void ARMDAGToDAGISel::PreprocessISelDAG() {
 314   if (!Subtarget->hasV6T2Ops())
 315     return;
 316
 317   bool isThumb2 = Subtarget->isThumb();
 318   for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
 319        E = CurDAG->allnodes_end(); I != E; ) {
 320     SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues.
 321
 322     if (N->getOpcode() != ISD::ADD)
 323       continue;
 324
 325     // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with
 326     // leading zeros, followed by consecutive set bits, followed by 1 or 2
 327     // trailing zeros, e.g. 1020.
 328     // Transform the expression to
 329     // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number
 330     // of trailing zeros of c2. The left shift would be folded as an shifter
 331     // operand of 'add' and the 'and' and 'srl' would become a bits extraction
 332     // node (UBFX).
 333
 334     SDValue N0 = N->getOperand(0);
 335     SDValue N1 = N->getOperand(1);
 336     unsigned And_imm = 0;
 337     if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) {
 338       if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm))
 339         std::swap(N0, N1);
 340     }
 341     if (!And_imm)
 342       continue;
 343
 344     // Check if the AND mask is an immediate of the form: 000.....1111111100
 345     unsigned TZ = countTrailingZeros(And_imm);
 346     if (TZ != 1 && TZ != 2)
 347       // Be conservative here. Shifter operands aren't always free. e.g. On
 348       // Swift, left shifter operand of 1 / 2 for free but others are not.
 349       // e.g.
 350       //  ubfx   r3, r1, #16, #8
 351       //  ldr.w  r3, [r0, r3, lsl #2]
 352       // vs.
 353       //  mov.w  r9, #1020
 354       //  and.w  r2, r9, r1, lsr #14
 355       //  ldr    r2, [r0, r2]
 356       continue;
 357     And_imm >>= TZ;
 358     if (And_imm & (And_imm + 1))
 359       continue;
 360
 361     // Look for (and (srl X, c1), c2).
 362     SDValue Srl = N1.getOperand(0);
 363     unsigned Srl_imm = 0;
 364     if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) ||
 365         (Srl_imm <= 2))
 366       continue;
 367
 368     // Make sure first operand is not a shifter operand which would prevent
 369     // folding of the left shift.
 370     SDValue CPTmp0;
 371     SDValue CPTmp1;
 372     SDValue CPTmp2;
 373     if (isThumb2) {
 374       if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1))
 375         continue;
 376     } else {
 377       if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) ||
 378           SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2))
 379         continue;
 380     }
 381
 382     // Now make the transformation.
 383     Srl = CurDAG->getNode(ISD::SRL, SDLoc(Srl), MVT::i32,
 384                           Srl.getOperand(0),
 385                           CurDAG->getConstant(Srl_imm + TZ, SDLoc(Srl),
 386                                               MVT::i32));
 387     N1 = CurDAG->getNode(ISD::AND, SDLoc(N1), MVT::i32,
 388                          Srl,
 389                          CurDAG->getConstant(And_imm, SDLoc(Srl), MVT::i32));
 390     N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32,
 391                          N1, CurDAG->getConstant(TZ, SDLoc(Srl), MVT::i32));
 392     CurDAG->UpdateNodeOperands(N, N0, N1);
 393   }
 394 }
 395
 396 /// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS
 397 /// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at
 398 /// least on current ARM implementations) which should be avoidded.
 399 bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const {
 400   if (OptLevel == CodeGenOpt::None)
 401     return true;
 402
 403   if (!Subtarget->hasVMLxHazards())
 404     return true;
 405
 406   if (!N->hasOneUse())
 407     return false;
 408
 409   SDNode *Use = *N->use_begin();
 410   if (Use->getOpcode() == ISD::CopyToReg)
 411     return true;
 412   if (Use->isMachineOpcode()) {
 413     const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>(
 414         CurDAG->getSubtarget().getInstrInfo());
 415
 416     const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode());
 417     if (MCID.mayStore())
 418       return true;
 419     unsigned Opcode = MCID.getOpcode();
 420     if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
 421       return true;
 422     // vmlx feeding into another vmlx. We actually want to unfold
 423     // the use later in the MLxExpansion pass. e.g.
 424     // vmla
 425     // vmla (stall 8 cycles)
 426     //
 427     // vmul (5 cycles)
 428     // vadd (5 cycles)
 429     // vmla
 430     // This adds up to about 18 - 19 cycles.
 431     //
 432     // vmla
 433     // vmul (stall 4 cycles)
 434     // vadd adds up to about 14 cycles.
 435     return TII->isFpMLxInstruction(Opcode);
 436   }
 437
 438   return false;
 439 }
 440
 441 bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
 442                                             ARM_AM::ShiftOpc ShOpcVal,
 443                                             unsigned ShAmt) {
 444   if (!Subtarget->isLikeA9() && !Subtarget->isSwift())
 445     return true;
 446   if (Shift.hasOneUse())
 447     return true;
 448   // R << 2 is free.
 449   return ShOpcVal == ARM_AM::lsl &&
 450          (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1));
 451 }
 452
 453 unsigned ARMDAGToDAGISel::ConstantMaterializationCost(unsigned Val) const {
 454   if (Subtarget->isThumb()) {
 455     if (Val <= 255) return 1;                               // MOV
 456     if (Subtarget->hasV6T2Ops() &&
 457         (Val <= 0xffff ||                                   // MOV
 458          ARM_AM::getT2SOImmVal(Val) != -1 ||                // MOVW
 459          ARM_AM::getT2SOImmVal(~Val) != -1))                // MVN
 460       return 1;
 461     if (Val <= 510) return 2;                               // MOV + ADDi8
 462     if (~Val <= 255) return 2;                              // MOV + MVN
 463     if (ARM_AM::isThumbImmShiftedVal(Val)) return 2;        // MOV + LSL
 464   } else {
 465     if (ARM_AM::getSOImmVal(Val) != -1) return 1;           // MOV
 466     if (ARM_AM::getSOImmVal(~Val) != -1) return 1;          // MVN
 467     if (Subtarget->hasV6T2Ops() && Val <= 0xffff) return 1; // MOVW
 468     if (ARM_AM::isSOImmTwoPartVal(Val)) return 2;           // two instrs
 469   }
 470   if (Subtarget->useMovt()) return 2; // MOVW + MOVT
 471   return 3; // Literal pool load
 472 }
 473
 474 bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N,
 475                                              unsigned MaxShift,
 476                                              unsigned &PowerOfTwo,
 477                                              SDValue &NewMulConst) const {
 478   assert(N.getOpcode() == ISD::MUL);
 479   assert(MaxShift > 0);
 480
 481   // If the multiply is used in more than one place then changing the constant
 482   // will make other uses incorrect, so don't.
 483   if (!N.hasOneUse()) return false;
 484   // Check if the multiply is by a constant
 485   ConstantSDNode *MulConst = dyn_cast<ConstantSDNode>(N.getOperand(1));
 486   if (!MulConst) return false;
 487   // If the constant is used in more than one place then modifying it will mean
 488   // we need to materialize two constants instead of one, which is a bad idea.
 489   if (!MulConst->hasOneUse()) return false;
 490   unsigned MulConstVal = MulConst->getZExtValue();
 491   if (MulConstVal == 0) return false;
 492
 493   // Find the largest power of 2 that MulConstVal is a multiple of
 494   PowerOfTwo = MaxShift;
 495   while ((MulConstVal % (1 << PowerOfTwo)) != 0) {
 496     --PowerOfTwo;
 497     if (PowerOfTwo == 0) return false;
 498   }
 499
 500   // Only optimise if the new cost is better
 501   unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo);
 502   NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc(N), MVT::i32);
 503   unsigned OldCost = ConstantMaterializationCost(MulConstVal);
 504   unsigned NewCost = ConstantMaterializationCost(NewMulConstVal);
 505   return NewCost < OldCost;
 506 }
 507
 508 void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) {
 509   CurDAG->RepositionNode(N.getNode()->getIterator(), M.getNode());
 510   ReplaceUses(N, M);
 511 }
 512
 513 bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
 514                                               SDValue &BaseReg,
 515                                               SDValue &Opc,
 516                                               bool CheckProfitability) {
 517   if (DisableShifterOp)
 518     return false;
 519
 520   // If N is a multiply-by-constant and it's profitable to extract a shift and
 521   // use it in a shifted operand do so.
 522   if (N.getOpcode() == ISD::MUL) {
 523     unsigned PowerOfTwo = 0;
 524     SDValue NewMulConst;
 525     if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) {
 526       HandleSDNode Handle(N);
 527       SDLoc Loc(N);
 528       replaceDAGValue(N.getOperand(1), NewMulConst);
 529       BaseReg = Handle.getValue();
 530       Opc = CurDAG->getTargetConstant(
 531           ARM_AM::getSORegOpc(ARM_AM::lsl, PowerOfTwo), Loc, MVT::i32);
 532       return true;
 533     }
 534   }
 535
 536   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
 537
 538   // Don't match base register only case. That is matched to a separate
 539   // lower complexity pattern with explicit register operand.
 540   if (ShOpcVal == ARM_AM::no_shift) return false;
 541
 542   BaseReg = N.getOperand(0);
 543   unsigned ShImmVal = 0;
 544   ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
 545   if (!RHS) return false;
 546   ShImmVal = RHS->getZExtValue() & 31;
 547   Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
 548                                   SDLoc(N), MVT::i32);
 549   return true;
 550 }
 551
 552 bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N,
 553                                               SDValue &BaseReg,
 554                                               SDValue &ShReg,
 555                                               SDValue &Opc,
 556                                               bool CheckProfitability) {
 557   if (DisableShifterOp)
 558     return false;
 559
 560   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
 561
 562   // Don't match base register only case. That is matched to a separate
 563   // lower complexity pattern with explicit register operand.
 564   if (ShOpcVal == ARM_AM::no_shift) return false;
 565
 566   BaseReg = N.getOperand(0);
 567   unsigned ShImmVal = 0;
 568   ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
 569   if (RHS) return false;
 570
 571   ShReg = N.getOperand(1);
 572   if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal))
 573     return false;
 574   Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
 575                                   SDLoc(N), MVT::i32);
 576   return true;
 577 }
 578
 579 // Determine whether an ISD::OR's operands are suitable to turn the operation
 580 // into an addition, which often has more compact encodings.
 581 bool ARMDAGToDAGISel::SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out) {
 582   assert(Parent->getOpcode() == ISD::OR && "unexpected parent");
 583   Out = N;
 584   return CurDAG->haveNoCommonBitsSet(N, Parent->getOperand(1));
 585 }
 586
 587
 588 bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
 589                                           SDValue &Base,
 590                                           SDValue &OffImm) {
 591   // Match simple R + imm12 operands.
 592
 593   // Base only.
 594   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
 595       !CurDAG->isBaseWithConstantOffset(N)) {
 596     if (N.getOpcode() == ISD::FrameIndex) {
 597       // Match frame index.
 598       int FI = cast<FrameIndexSDNode>(N)->getIndex();
 599       Base = CurDAG->getTargetFrameIndex(
 600           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 601       OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
 602       return true;
 603     }
 604
 605     if (N.getOpcode() == ARMISD::Wrapper &&
 606         N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
 607         N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
 608         N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
 609       Base = N.getOperand(0);
 610     } else
 611       Base = N;
 612     OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
 613     return true;
 614   }
 615
 616   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
 617     int RHSC = (int)RHS->getSExtValue();
 618     if (N.getOpcode() == ISD::SUB)
 619       RHSC = -RHSC;
 620
 621     if (RHSC > -0x1000 && RHSC < 0x1000) { // 12 bits
 622       Base   = N.getOperand(0);
 623       if (Base.getOpcode() == ISD::FrameIndex) {
 624         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
 625         Base = CurDAG->getTargetFrameIndex(
 626             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 627       }
 628       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
 629       return true;
 630     }
 631   }
 632
 633   // Base only.
 634   Base = N;
 635   OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
 636   return true;
 637 }
 638
 639
 640
 641 bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
 642                                       SDValue &Opc) {
 643   if (N.getOpcode() == ISD::MUL &&
 644       ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) {
 645     if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
 646       // X * [3,5,9] -> X + X * [2,4,8] etc.
 647       int RHSC = (int)RHS->getZExtValue();
 648       if (RHSC & 1) {
 649         RHSC = RHSC & ~1;
 650         ARM_AM::AddrOpc AddSub = ARM_AM::add;
 651         if (RHSC < 0) {
 652           AddSub = ARM_AM::sub;
 653           RHSC = - RHSC;
 654         }
 655         if (isPowerOf2_32(RHSC)) {
 656           unsigned ShAmt = Log2_32(RHSC);
 657           Base = Offset = N.getOperand(0);
 658           Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
 659                                                             ARM_AM::lsl),
 660                                           SDLoc(N), MVT::i32);
 661           return true;
 662         }
 663       }
 664     }
 665   }
 666
 667   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
 668       // ISD::OR that is equivalent to an ISD::ADD.
 669       !CurDAG->isBaseWithConstantOffset(N))
 670     return false;
 671
 672   // Leave simple R +/- imm12 operands for LDRi12
 673   if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
 674     int RHSC;
 675     if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
 676                                 -0x1000+1, 0x1000, RHSC)) // 12 bits.
 677       return false;
 678   }
 679
 680   // Otherwise this is R +/- [possibly shifted] R.
 681   ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add;
 682   ARM_AM::ShiftOpc ShOpcVal =
 683     ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
 684   unsigned ShAmt = 0;
 685
 686   Base   = N.getOperand(0);
 687   Offset = N.getOperand(1);
 688
 689   if (ShOpcVal != ARM_AM::no_shift) {
 690     // Check to see if the RHS of the shift is a constant, if not, we can't fold
 691     // it.
 692     if (ConstantSDNode *Sh =
 693            dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
 694       ShAmt = Sh->getZExtValue();
 695       if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
 696         Offset = N.getOperand(1).getOperand(0);
 697       else {
 698         ShAmt = 0;
 699         ShOpcVal = ARM_AM::no_shift;
 700       }
 701     } else {
 702       ShOpcVal = ARM_AM::no_shift;
 703     }
 704   }
 705
 706   // Try matching (R shl C) + (R).
 707   if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
 708       !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
 709         N.getOperand(0).hasOneUse())) {
 710     ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
 711     if (ShOpcVal != ARM_AM::no_shift) {
 712       // Check to see if the RHS of the shift is a constant, if not, we can't
 713       // fold it.
 714       if (ConstantSDNode *Sh =
 715           dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
 716         ShAmt = Sh->getZExtValue();
 717         if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
 718           Offset = N.getOperand(0).getOperand(0);
 719           Base = N.getOperand(1);
 720         } else {
 721           ShAmt = 0;
 722           ShOpcVal = ARM_AM::no_shift;
 723         }
 724       } else {
 725         ShOpcVal = ARM_AM::no_shift;
 726       }
 727     }
 728   }
 729
 730   // If Offset is a multiply-by-constant and it's profitable to extract a shift
 731   // and use it in a shifted operand do so.
 732   if (Offset.getOpcode() == ISD::MUL && N.hasOneUse()) {
 733     unsigned PowerOfTwo = 0;
 734     SDValue NewMulConst;
 735     if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) {
 736       HandleSDNode Handle(Offset);
 737       replaceDAGValue(Offset.getOperand(1), NewMulConst);
 738       Offset = Handle.getValue();
 739       ShAmt = PowerOfTwo;
 740       ShOpcVal = ARM_AM::lsl;
 741     }
 742   }
 743
 744   Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
 745                                   SDLoc(N), MVT::i32);
 746   return true;
 747 }
 748
 749 bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
 750                                             SDValue &Offset, SDValue &Opc) {
 751   unsigned Opcode = Op->getOpcode();
 752   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
 753     ? cast<LoadSDNode>(Op)->getAddressingMode()
 754     : cast<StoreSDNode>(Op)->getAddressingMode();
 755   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
 756     ? ARM_AM::add : ARM_AM::sub;
 757   int Val;
 758   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val))
 759     return false;
 760
 761   Offset = N;
 762   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
 763   unsigned ShAmt = 0;
 764   if (ShOpcVal != ARM_AM::no_shift) {
 765     // Check to see if the RHS of the shift is a constant, if not, we can't fold
 766     // it.
 767     if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
 768       ShAmt = Sh->getZExtValue();
 769       if (isShifterOpProfitable(N, ShOpcVal, ShAmt))
 770         Offset = N.getOperand(0);
 771       else {
 772         ShAmt = 0;
 773         ShOpcVal = ARM_AM::no_shift;
 774       }
 775     } else {
 776       ShOpcVal = ARM_AM::no_shift;
 777     }
 778   }
 779
 780   Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
 781                                   SDLoc(N), MVT::i32);
 782   return true;
 783 }
 784
 785 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
 786                                             SDValue &Offset, SDValue &Opc) {
 787   unsigned Opcode = Op->getOpcode();
 788   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
 789     ? cast<LoadSDNode>(Op)->getAddressingMode()
 790     : cast<StoreSDNode>(Op)->getAddressingMode();
 791   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
 792     ? ARM_AM::add : ARM_AM::sub;
 793   int Val;
 794   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
 795     if (AddSub == ARM_AM::sub) Val *= -1;
 796     Offset = CurDAG->getRegister(0, MVT::i32);
 797     Opc = CurDAG->getTargetConstant(Val, SDLoc(Op), MVT::i32);
 798     return true;
 799   }
 800
 801   return false;
 802 }
 803
 804
 805 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
 806                                             SDValue &Offset, SDValue &Opc) {
 807   unsigned Opcode = Op->getOpcode();
 808   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
 809     ? cast<LoadSDNode>(Op)->getAddressingMode()
 810     : cast<StoreSDNode>(Op)->getAddressingMode();
 811   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
 812     ? ARM_AM::add : ARM_AM::sub;
 813   int Val;
 814   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
 815     Offset = CurDAG->getRegister(0, MVT::i32);
 816     Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val,
 817                                                       ARM_AM::no_shift),
 818                                     SDLoc(Op), MVT::i32);
 819     return true;
 820   }
 821
 822   return false;
 823 }
 824
 825 bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) {
 826   Base = N;
 827   return true;
 828 }
 829
 830 bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
 831                                       SDValue &Base, SDValue &Offset,
 832                                       SDValue &Opc) {
 833   if (N.getOpcode() == ISD::SUB) {
 834     // X - C  is canonicalize to X + -C, no need to handle it here.
 835     Base = N.getOperand(0);
 836     Offset = N.getOperand(1);
 837     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0), SDLoc(N),
 838                                     MVT::i32);
 839     return true;
 840   }
 841
 842   if (!CurDAG->isBaseWithConstantOffset(N)) {
 843     Base = N;
 844     if (N.getOpcode() == ISD::FrameIndex) {
 845       int FI = cast<FrameIndexSDNode>(N)->getIndex();
 846       Base = CurDAG->getTargetFrameIndex(
 847           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 848     }
 849     Offset = CurDAG->getRegister(0, MVT::i32);
 850     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
 851                                     MVT::i32);
 852     return true;
 853   }
 854
 855   // If the RHS is +/- imm8, fold into addr mode.
 856   int RHSC;
 857   if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
 858                               -256 + 1, 256, RHSC)) { // 8 bits.
 859     Base = N.getOperand(0);
 860     if (Base.getOpcode() == ISD::FrameIndex) {
 861       int FI = cast<FrameIndexSDNode>(Base)->getIndex();
 862       Base = CurDAG->getTargetFrameIndex(
 863           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 864     }
 865     Offset = CurDAG->getRegister(0, MVT::i32);
 866
 867     ARM_AM::AddrOpc AddSub = ARM_AM::add;
 868     if (RHSC < 0) {
 869       AddSub = ARM_AM::sub;
 870       RHSC = -RHSC;
 871     }
 872     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC), SDLoc(N),
 873                                     MVT::i32);
 874     return true;
 875   }
 876
 877   Base = N.getOperand(0);
 878   Offset = N.getOperand(1);
 879   Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
 880                                   MVT::i32);
 881   return true;
 882 }
 883
 884 bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N,
 885                                             SDValue &Offset, SDValue &Opc) {
 886   unsigned Opcode = Op->getOpcode();
 887   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
 888     ? cast<LoadSDNode>(Op)->getAddressingMode()
 889     : cast<StoreSDNode>(Op)->getAddressingMode();
 890   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
 891     ? ARM_AM::add : ARM_AM::sub;
 892   int Val;
 893   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits.
 894     Offset = CurDAG->getRegister(0, MVT::i32);
 895     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), SDLoc(Op),
 896                                     MVT::i32);
 897     return true;
 898   }
 899
 900   Offset = N;
 901   Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), SDLoc(Op),
 902                                   MVT::i32);
 903   return true;
 904 }
 905
 906 bool ARMDAGToDAGISel::IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset,
 907                                         bool FP16) {
 908   if (!CurDAG->isBaseWithConstantOffset(N)) {
 909     Base = N;
 910     if (N.getOpcode() == ISD::FrameIndex) {
 911       int FI = cast<FrameIndexSDNode>(N)->getIndex();
 912       Base = CurDAG->getTargetFrameIndex(
 913           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 914     } else if (N.getOpcode() == ARMISD::Wrapper &&
 915                N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
 916                N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
 917                N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
 918       Base = N.getOperand(0);
 919     }
 920     Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
 921                                        SDLoc(N), MVT::i32);
 922     return true;
 923   }
 924
 925   // If the RHS is +/- imm8, fold into addr mode.
 926   int RHSC;
 927   const int Scale = FP16 ? 2 : 4;
 928
 929   if (isScaledConstantInRange(N.getOperand(1), Scale, -255, 256, RHSC)) {
 930     Base = N.getOperand(0);
 931     if (Base.getOpcode() == ISD::FrameIndex) {
 932       int FI = cast<FrameIndexSDNode>(Base)->getIndex();
 933       Base = CurDAG->getTargetFrameIndex(
 934           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 935     }
 936
 937     ARM_AM::AddrOpc AddSub = ARM_AM::add;
 938     if (RHSC < 0) {
 939       AddSub = ARM_AM::sub;
 940       RHSC = -RHSC;
 941     }
 942
 943     if (FP16)
 944       Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(AddSub, RHSC),
 945                                          SDLoc(N), MVT::i32);
 946     else
 947       Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC),
 948                                          SDLoc(N), MVT::i32);
 949
 950     return true;
 951   }
 952
 953   Base = N;
 954
 955   if (FP16)
 956     Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(ARM_AM::add, 0),
 957                                        SDLoc(N), MVT::i32);
 958   else
 959     Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
 960                                        SDLoc(N), MVT::i32);
 961
 962   return true;
 963 }
 964
 965 bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
 966                                       SDValue &Base, SDValue &Offset) {
 967   return IsAddressingMode5(N, Base, Offset, /*FP16=*/ false);
 968 }
 969
 970 bool ARMDAGToDAGISel::SelectAddrMode5FP16(SDValue N,
 971                                           SDValue &Base, SDValue &Offset) {
 972   return IsAddressingMode5(N, Base, Offset, /*FP16=*/ true);
 973 }
 974
 975 bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
 976                                       SDValue &Align) {
 977   Addr = N;
 978
 979   unsigned Alignment = 0;
 980
 981   MemSDNode *MemN = cast<MemSDNode>(Parent);
 982
 983   if (isa<LSBaseSDNode>(MemN) ||
 984       ((MemN->getOpcode() == ARMISD::VST1_UPD ||
 985         MemN->getOpcode() == ARMISD::VLD1_UPD) &&
 986        MemN->getConstantOperandVal(MemN->getNumOperands() - 1) == 1)) {
 987     // This case occurs only for VLD1-lane/dup and VST1-lane instructions.
 988     // The maximum alignment is equal to the memory size being referenced.
 989     unsigned MMOAlign = MemN->getAlignment();
 990     unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8;
 991     if (MMOAlign >= MemSize && MemSize > 1)
 992       Alignment = MemSize;
 993   } else {
 994     // All other uses of addrmode6 are for intrinsics.  For now just record
 995     // the raw alignment value; it will be refined later based on the legal
 996     // alignment operands for the intrinsic.
 997     Alignment = MemN->getAlignment();
 998   }
 999
1000   Align = CurDAG->getTargetConstant(Alignment, SDLoc(N), MVT::i32);
1001   return true;
1002 }
1003
1004 bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N,
1005                                             SDValue &Offset) {
1006   LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op);
1007   ISD::MemIndexedMode AM = LdSt->getAddressingMode();
1008   if (AM != ISD::POST_INC)
1009     return false;
1010   Offset = N;
1011   if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) {
1012     if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits())
1013       Offset = CurDAG->getRegister(0, MVT::i32);
1014   }
1015   return true;
1016 }
1017
1018 bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N,
1019                                        SDValue &Offset, SDValue &Label) {
1020   if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
1021     Offset = N.getOperand(0);
1022     SDValue N1 = N.getOperand(1);
1023     Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
1024                                       SDLoc(N), MVT::i32);
1025     return true;
1026   }
1027
1028   return false;
1029 }
1030
1031
1032 //===----------------------------------------------------------------------===//
1033 //                         Thumb Addressing Modes
1034 //===----------------------------------------------------------------------===//
1035
1036 static bool shouldUseZeroOffsetLdSt(SDValue N) {
1037   // Negative numbers are difficult to materialise in thumb1. If we are
1038   // selecting the add of a negative, instead try to select ri with a zero
1039   // offset, so create the add node directly which will become a sub.
1040   if (N.getOpcode() != ISD::ADD)
1041     return false;
1042
1043   // Look for an imm which is not legal for ld/st, but is legal for sub.
1044   if (auto C = dyn_cast<ConstantSDNode>(N.getOperand(1)))
1045     return C->getSExtValue() < 0 && C->getSExtValue() >= -255;
1046
1047   return false;
1048 }
1049
1050 bool ARMDAGToDAGISel::SelectThumbAddrModeRRSext(SDValue N, SDValue &Base,
1051                                                 SDValue &Offset) {
1052   if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) {
1053     ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N);
1054     if (!NC || !NC->isNullValue())
1055       return false;
1056
1057     Base = Offset = N;
1058     return true;
1059   }
1060
1061   Base = N.getOperand(0);
1062   Offset = N.getOperand(1);
1063   return true;
1064 }
1065
1066 bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N, SDValue &Base,
1067                                             SDValue &Offset) {
1068   if (shouldUseZeroOffsetLdSt(N))
1069     return false; // Select ri instead
1070   return SelectThumbAddrModeRRSext(N, Base, Offset);
1071 }
1072
1073 bool
1074 ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
1075                                           SDValue &Base, SDValue &OffImm) {
1076   if (shouldUseZeroOffsetLdSt(N)) {
1077     Base = N;
1078     OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1079     return true;
1080   }
1081
1082   if (!CurDAG->isBaseWithConstantOffset(N)) {
1083     if (N.getOpcode() == ISD::ADD) {
1084       return false; // We want to select register offset instead
1085     } else if (N.getOpcode() == ARMISD::Wrapper &&
1086         N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1087         N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1088         N.getOperand(0).getOpcode() != ISD::TargetConstantPool &&
1089         N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1090       Base = N.getOperand(0);
1091     } else {
1092       Base = N;
1093     }
1094
1095     OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1096     return true;
1097   }
1098
1099   // If the RHS is + imm5 * scale, fold into addr mode.
1100   int RHSC;
1101   if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) {
1102     Base = N.getOperand(0);
1103     OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1104     return true;
1105   }
1106
1107   // Offset is too large, so use register offset instead.
1108   return false;
1109 }
1110
1111 bool
1112 ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
1113                                            SDValue &OffImm) {
1114   return SelectThumbAddrModeImm5S(N, 4, Base, OffImm);
1115 }
1116
1117 bool
1118 ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
1119                                            SDValue &OffImm) {
1120   return SelectThumbAddrModeImm5S(N, 2, Base, OffImm);
1121 }
1122
1123 bool
1124 ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
1125                                            SDValue &OffImm) {
1126   return SelectThumbAddrModeImm5S(N, 1, Base, OffImm);
1127 }
1128
1129 bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
1130                                             SDValue &Base, SDValue &OffImm) {
1131   if (N.getOpcode() == ISD::FrameIndex) {
1132     int FI = cast<FrameIndexSDNode>(N)->getIndex();
1133     // Only multiples of 4 are allowed for the offset, so the frame object
1134     // alignment must be at least 4.
1135     MachineFrameInfo &MFI = MF->getFrameInfo();
1136     if (MFI.getObjectAlignment(FI) < 4)
1137       MFI.setObjectAlignment(FI, 4);
1138     Base = CurDAG->getTargetFrameIndex(
1139         FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1140     OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1141     return true;
1142   }
1143
1144   if (!CurDAG->isBaseWithConstantOffset(N))
1145     return false;
1146
1147   if (N.getOperand(0).getOpcode() == ISD::FrameIndex) {
1148     // If the RHS is + imm8 * scale, fold into addr mode.
1149     int RHSC;
1150     if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) {
1151       Base = N.getOperand(0);
1152       int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1153       // Make sure the offset is inside the object, or we might fail to
1154       // allocate an emergency spill slot. (An out-of-range access is UB, but
1155       // it could show up anyway.)
1156       MachineFrameInfo &MFI = MF->getFrameInfo();
1157       if (RHSC * 4 < MFI.getObjectSize(FI)) {
1158         // For LHS+RHS to result in an offset that's a multiple of 4 the object
1159         // indexed by the LHS must be 4-byte aligned.
1160         if (!MFI.isFixedObjectIndex(FI) && MFI.getObjectAlignment(FI) < 4)
1161           MFI.setObjectAlignment(FI, 4);
1162         if (MFI.getObjectAlignment(FI) >= 4) {
1163           Base = CurDAG->getTargetFrameIndex(
1164               FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1165           OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1166           return true;
1167         }
1168       }
1169     }
1170   }
1171
1172   return false;
1173 }
1174
1175
1176 //===----------------------------------------------------------------------===//
1177 //                        Thumb 2 Addressing Modes
1178 //===----------------------------------------------------------------------===//
1179
1180
1181 bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
1182                                             SDValue &Base, SDValue &OffImm) {
1183   // Match simple R + imm12 operands.
1184
1185   // Base only.
1186   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1187       !CurDAG->isBaseWithConstantOffset(N)) {
1188     if (N.getOpcode() == ISD::FrameIndex) {
1189       // Match frame index.
1190       int FI = cast<FrameIndexSDNode>(N)->getIndex();
1191       Base = CurDAG->getTargetFrameIndex(
1192           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1193       OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1194       return true;
1195     }
1196
1197     if (N.getOpcode() == ARMISD::Wrapper &&
1198         N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1199         N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1200         N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1201       Base = N.getOperand(0);
1202       if (Base.getOpcode() == ISD::TargetConstantPool)
1203         return false;  // We want to select t2LDRpci instead.
1204     } else
1205       Base = N;
1206     OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1207     return true;
1208   }
1209
1210   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1211     if (SelectT2AddrModeImm8(N, Base, OffImm))
1212       // Let t2LDRi8 handle (R - imm8).
1213       return false;
1214
1215     int RHSC = (int)RHS->getZExtValue();
1216     if (N.getOpcode() == ISD::SUB)
1217       RHSC = -RHSC;
1218
1219     if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned)
1220       Base   = N.getOperand(0);
1221       if (Base.getOpcode() == ISD::FrameIndex) {
1222         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1223         Base = CurDAG->getTargetFrameIndex(
1224             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1225       }
1226       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1227       return true;
1228     }
1229   }
1230
1231   // Base only.
1232   Base = N;
1233   OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1234   return true;
1235 }
1236
1237 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
1238                                            SDValue &Base, SDValue &OffImm) {
1239   // Match simple R - imm8 operands.
1240   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1241       !CurDAG->isBaseWithConstantOffset(N))
1242     return false;
1243
1244   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1245     int RHSC = (int)RHS->getSExtValue();
1246     if (N.getOpcode() == ISD::SUB)
1247       RHSC = -RHSC;
1248
1249     if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative)
1250       Base = N.getOperand(0);
1251       if (Base.getOpcode() == ISD::FrameIndex) {
1252         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1253         Base = CurDAG->getTargetFrameIndex(
1254             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1255       }
1256       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1257       return true;
1258     }
1259   }
1260
1261   return false;
1262 }
1263
1264 bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
1265                                                  SDValue &OffImm){
1266   unsigned Opcode = Op->getOpcode();
1267   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
1268     ? cast<LoadSDNode>(Op)->getAddressingMode()
1269     : cast<StoreSDNode>(Op)->getAddressingMode();
1270   int RHSC;
1271   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits.
1272     OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
1273       ? CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32)
1274       : CurDAG->getTargetConstant(-RHSC, SDLoc(N), MVT::i32);
1275     return true;
1276   }
1277
1278   return false;
1279 }
1280
1281 template<unsigned Shift>
1282 bool ARMDAGToDAGISel::SelectT2AddrModeImm7(SDValue N,
1283                                            SDValue &Base, SDValue &OffImm) {
1284   if (N.getOpcode() == ISD::SUB ||
1285       CurDAG->isBaseWithConstantOffset(N)) {
1286     if (auto RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1287       int RHSC = (int)RHS->getZExtValue();
1288       if (N.getOpcode() == ISD::SUB)
1289         RHSC = -RHSC;
1290
1291       if (isShiftedInt<7, Shift>(RHSC)) {
1292         Base = N.getOperand(0);
1293         if (Base.getOpcode() == ISD::FrameIndex) {
1294           int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1295           Base = CurDAG->getTargetFrameIndex(
1296             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1297         }
1298         OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1299         return true;
1300       }
1301     }
1302   }
1303
1304   // Base only.
1305   Base = N;
1306   OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1307   return true;
1308 }
1309
1310 bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
1311                                             SDValue &Base,
1312                                             SDValue &OffReg, SDValue &ShImm) {
1313   // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12.
1314   if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N))
1315     return false;
1316
1317   // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8.
1318   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1319     int RHSC = (int)RHS->getZExtValue();
1320     if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned)
1321       return false;
1322     else if (RHSC < 0 && RHSC >= -255) // 8 bits
1323       return false;
1324   }
1325
1326   // Look for (R + R) or (R + (R << [1,2,3])).
1327   unsigned ShAmt = 0;
1328   Base   = N.getOperand(0);
1329   OffReg = N.getOperand(1);
1330
1331   // Swap if it is ((R << c) + R).
1332   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg.getOpcode());
1333   if (ShOpcVal != ARM_AM::lsl) {
1334     ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode());
1335     if (ShOpcVal == ARM_AM::lsl)
1336       std::swap(Base, OffReg);
1337   }
1338
1339   if (ShOpcVal == ARM_AM::lsl) {
1340     // Check to see if the RHS of the shift is a constant, if not, we can't fold
1341     // it.
1342     if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) {
1343       ShAmt = Sh->getZExtValue();
1344       if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt))
1345         OffReg = OffReg.getOperand(0);
1346       else {
1347         ShAmt = 0;
1348       }
1349     }
1350   }
1351
1352   // If OffReg is a multiply-by-constant and it's profitable to extract a shift
1353   // and use it in a shifted operand do so.
1354   if (OffReg.getOpcode() == ISD::MUL && N.hasOneUse()) {
1355     unsigned PowerOfTwo = 0;
1356     SDValue NewMulConst;
1357     if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) {
1358       HandleSDNode Handle(OffReg);
1359       replaceDAGValue(OffReg.getOperand(1), NewMulConst);
1360       OffReg = Handle.getValue();
1361       ShAmt = PowerOfTwo;
1362     }
1363   }
1364
1365   ShImm = CurDAG->getTargetConstant(ShAmt, SDLoc(N), MVT::i32);
1366
1367   return true;
1368 }
1369
1370 bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base,
1371                                                 SDValue &OffImm) {
1372   // This *must* succeed since it's used for the irreplaceable ldrex and strex
1373   // instructions.
1374   Base = N;
1375   OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1376
1377   if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N))
1378     return true;
1379
1380   ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
1381   if (!RHS)
1382     return true;
1383
1384   uint32_t RHSC = (int)RHS->getZExtValue();
1385   if (RHSC > 1020 || RHSC % 4 != 0)
1386     return true;
1387
1388   Base = N.getOperand(0);
1389   if (Base.getOpcode() == ISD::FrameIndex) {
1390     int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1391     Base = CurDAG->getTargetFrameIndex(
1392         FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1393   }
1394
1395   OffImm = CurDAG->getTargetConstant(RHSC/4, SDLoc(N), MVT::i32);
1396   return true;
1397 }
1398
1399 //===--------------------------------------------------------------------===//
1400
1401 /// getAL - Returns a ARMCC::AL immediate node.
1402 static inline SDValue getAL(SelectionDAG *CurDAG, const SDLoc &dl) {
1403   return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32);
1404 }
1405
1406 void ARMDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
1407   MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
1408   CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
1409 }
1410
1411 bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) {
1412   LoadSDNode *LD = cast<LoadSDNode>(N);
1413   ISD::MemIndexedMode AM = LD->getAddressingMode();
1414   if (AM == ISD::UNINDEXED)
1415     return false;
1416
1417   EVT LoadedVT = LD->getMemoryVT();
1418   SDValue Offset, AMOpc;
1419   bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1420   unsigned Opcode = 0;
1421   bool Match = false;
1422   if (LoadedVT == MVT::i32 && isPre &&
1423       SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1424     Opcode = ARM::LDR_PRE_IMM;
1425     Match = true;
1426   } else if (LoadedVT == MVT::i32 && !isPre &&
1427       SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1428     Opcode = ARM::LDR_POST_IMM;
1429     Match = true;
1430   } else if (LoadedVT == MVT::i32 &&
1431       SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1432     Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG;
1433     Match = true;
1434
1435   } else if (LoadedVT == MVT::i16 &&
1436              SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1437     Match = true;
1438     Opcode = (LD->getExtensionType() == ISD::SEXTLOAD)
1439       ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST)
1440       : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST);
1441   } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) {
1442     if (LD->getExtensionType() == ISD::SEXTLOAD) {
1443       if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1444         Match = true;
1445         Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST;
1446       }
1447     } else {
1448       if (isPre &&
1449           SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1450         Match = true;
1451         Opcode = ARM::LDRB_PRE_IMM;
1452       } else if (!isPre &&
1453                   SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1454         Match = true;
1455         Opcode = ARM::LDRB_POST_IMM;
1456       } else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1457         Match = true;
1458         Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG;
1459       }
1460     }
1461   }
1462
1463   if (Match) {
1464     if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) {
1465       SDValue Chain = LD->getChain();
1466       SDValue Base = LD->getBasePtr();
1467       SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, SDLoc(N)),
1468                        CurDAG->getRegister(0, MVT::i32), Chain };
1469       SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1470                                            MVT::Other, Ops);
1471       transferMemOperands(N, New);
1472       ReplaceNode(N, New);
1473       return true;
1474     } else {
1475       SDValue Chain = LD->getChain();
1476       SDValue Base = LD->getBasePtr();
1477       SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, SDLoc(N)),
1478                        CurDAG->getRegister(0, MVT::i32), Chain };
1479       SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1480                                            MVT::Other, Ops);
1481       transferMemOperands(N, New);
1482       ReplaceNode(N, New);
1483       return true;
1484     }
1485   }
1486
1487   return false;
1488 }
1489
1490 bool ARMDAGToDAGISel::tryT1IndexedLoad(SDNode *N) {
1491   LoadSDNode *LD = cast<LoadSDNode>(N);
1492   EVT LoadedVT = LD->getMemoryVT();
1493   ISD::MemIndexedMode AM = LD->getAddressingMode();
1494   if (AM != ISD::POST_INC || LD->getExtensionType() != ISD::NON_EXTLOAD ||
1495       LoadedVT.getSimpleVT().SimpleTy != MVT::i32)
1496     return false;
1497
1498   auto *COffs = dyn_cast<ConstantSDNode>(LD->getOffset());
1499   if (!COffs || COffs->getZExtValue() != 4)
1500     return false;
1501
1502   // A T1 post-indexed load is just a single register LDM: LDM r0!, {r1}.
1503   // The encoding of LDM is not how the rest of ISel expects a post-inc load to
1504   // look however, so we use a pseudo here and switch it for a tLDMIA_UPD after
1505   // ISel.
1506   SDValue Chain = LD->getChain();
1507   SDValue Base = LD->getBasePtr();
1508   SDValue Ops[]= { Base, getAL(CurDAG, SDLoc(N)),
1509                    CurDAG->getRegister(0, MVT::i32), Chain };
1510   SDNode *New = CurDAG->getMachineNode(ARM::tLDR_postidx, SDLoc(N), MVT::i32,
1511                                        MVT::i32, MVT::Other, Ops);
1512   transferMemOperands(N, New);
1513   ReplaceNode(N, New);
1514   return true;
1515 }
1516
1517 bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) {
1518   LoadSDNode *LD = cast<LoadSDNode>(N);
1519   ISD::MemIndexedMode AM = LD->getAddressingMode();
1520   if (AM == ISD::UNINDEXED)
1521     return false;
1522
1523   EVT LoadedVT = LD->getMemoryVT();
1524   bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1525   SDValue Offset;
1526   bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1527   unsigned Opcode = 0;
1528   bool Match = false;
1529   if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) {
1530     switch (LoadedVT.getSimpleVT().SimpleTy) {
1531     case MVT::i32:
1532       Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST;
1533       break;
1534     case MVT::i16:
1535       if (isSExtLd)
1536         Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST;
1537       else
1538         Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST;
1539       break;
1540     case MVT::i8:
1541     case MVT::i1:
1542       if (isSExtLd)
1543         Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST;
1544       else
1545         Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST;
1546       break;
1547     default:
1548       return false;
1549     }
1550     Match = true;
1551   }
1552
1553   if (Match) {
1554     SDValue Chain = LD->getChain();
1555     SDValue Base = LD->getBasePtr();
1556     SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)),
1557                      CurDAG->getRegister(0, MVT::i32), Chain };
1558     SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1559                                          MVT::Other, Ops);
1560     transferMemOperands(N, New);
1561     ReplaceNode(N, New);
1562     return true;
1563   }
1564
1565   return false;
1566 }
1567
1568 /// Form a GPRPair pseudo register from a pair of GPR regs.
1569 SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) {
1570   SDLoc dl(V0.getNode());
1571   SDValue RegClass =
1572     CurDAG->getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
1573   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
1574   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
1575   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1576   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1577 }
1578
1579 /// Form a D register from a pair of S registers.
1580 SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1581   SDLoc dl(V0.getNode());
1582   SDValue RegClass =
1583     CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, dl, MVT::i32);
1584   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1585   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1586   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1587   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1588 }
1589
1590 /// Form a quad register from a pair of D registers.
1591 SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1592   SDLoc dl(V0.getNode());
1593   SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, dl,
1594                                                MVT::i32);
1595   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1596   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1597   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1598   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1599 }
1600
1601 /// Form 4 consecutive D registers from a pair of Q registers.
1602 SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1603   SDLoc dl(V0.getNode());
1604   SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1605                                                MVT::i32);
1606   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1607   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1608   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1609   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1610 }
1611
1612 /// Form 4 consecutive S registers.
1613 SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1,
1614                                    SDValue V2, SDValue V3) {
1615   SDLoc dl(V0.getNode());
1616   SDValue RegClass =
1617     CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, dl, MVT::i32);
1618   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1619   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1620   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, dl, MVT::i32);
1621   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, dl, MVT::i32);
1622   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1623                                     V2, SubReg2, V3, SubReg3 };
1624   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1625 }
1626
1627 /// Form 4 consecutive D registers.
1628 SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1,
1629                                    SDValue V2, SDValue V3) {
1630   SDLoc dl(V0.getNode());
1631   SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1632                                                MVT::i32);
1633   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1634   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1635   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, dl, MVT::i32);
1636   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, dl, MVT::i32);
1637   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1638                                     V2, SubReg2, V3, SubReg3 };
1639   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1640 }
1641
1642 /// Form 4 consecutive Q registers.
1643 SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1,
1644                                    SDValue V2, SDValue V3) {
1645   SDLoc dl(V0.getNode());
1646   SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, dl,
1647                                                MVT::i32);
1648   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1649   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1650   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, dl, MVT::i32);
1651   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, dl, MVT::i32);
1652   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1653                                     V2, SubReg2, V3, SubReg3 };
1654   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1655 }
1656
1657 /// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
1658 /// of a NEON VLD or VST instruction.  The supported values depend on the
1659 /// number of registers being loaded.
1660 SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, const SDLoc &dl,
1661                                        unsigned NumVecs, bool is64BitVector) {
1662   unsigned NumRegs = NumVecs;
1663   if (!is64BitVector && NumVecs < 3)
1664     NumRegs *= 2;
1665
1666   unsigned Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
1667   if (Alignment >= 32 && NumRegs == 4)
1668     Alignment = 32;
1669   else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4))
1670     Alignment = 16;
1671   else if (Alignment >= 8)
1672     Alignment = 8;
1673   else
1674     Alignment = 0;
1675
1676   return CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
1677 }
1678
1679 static bool isVLDfixed(unsigned Opc)
1680 {
1681   switch (Opc) {
1682   default: return false;
1683   case ARM::VLD1d8wb_fixed : return true;
1684   case ARM::VLD1d16wb_fixed : return true;
1685   case ARM::VLD1d64Qwb_fixed : return true;
1686   case ARM::VLD1d32wb_fixed : return true;
1687   case ARM::VLD1d64wb_fixed : return true;
1688   case ARM::VLD1d64TPseudoWB_fixed : return true;
1689   case ARM::VLD1d64QPseudoWB_fixed : return true;
1690   case ARM::VLD1q8wb_fixed : return true;
1691   case ARM::VLD1q16wb_fixed : return true;
1692   case ARM::VLD1q32wb_fixed : return true;
1693   case ARM::VLD1q64wb_fixed : return true;
1694   case ARM::VLD1DUPd8wb_fixed : return true;
1695   case ARM::VLD1DUPd16wb_fixed : return true;
1696   case ARM::VLD1DUPd32wb_fixed : return true;
1697   case ARM::VLD1DUPq8wb_fixed : return true;
1698   case ARM::VLD1DUPq16wb_fixed : return true;
1699   case ARM::VLD1DUPq32wb_fixed : return true;
1700   case ARM::VLD2d8wb_fixed : return true;
1701   case ARM::VLD2d16wb_fixed : return true;
1702   case ARM::VLD2d32wb_fixed : return true;
1703   case ARM::VLD2q8PseudoWB_fixed : return true;
1704   case ARM::VLD2q16PseudoWB_fixed : return true;
1705   case ARM::VLD2q32PseudoWB_fixed : return true;
1706   case ARM::VLD2DUPd8wb_fixed : return true;
1707   case ARM::VLD2DUPd16wb_fixed : return true;
1708   case ARM::VLD2DUPd32wb_fixed : return true;
1709   }
1710 }
1711
1712 static bool isVSTfixed(unsigned Opc)
1713 {
1714   switch (Opc) {
1715   default: return false;
1716   case ARM::VST1d8wb_fixed : return true;
1717   case ARM::VST1d16wb_fixed : return true;
1718   case ARM::VST1d32wb_fixed : return true;
1719   case ARM::VST1d64wb_fixed : return true;
1720   case ARM::VST1q8wb_fixed : return true;
1721   case ARM::VST1q16wb_fixed : return true;
1722   case ARM::VST1q32wb_fixed : return true;
1723   case ARM::VST1q64wb_fixed : return true;
1724   case ARM::VST1d64TPseudoWB_fixed : return true;
1725   case ARM::VST1d64QPseudoWB_fixed : return true;
1726   case ARM::VST2d8wb_fixed : return true;
1727   case ARM::VST2d16wb_fixed : return true;
1728   case ARM::VST2d32wb_fixed : return true;
1729   case ARM::VST2q8PseudoWB_fixed : return true;
1730   case ARM::VST2q16PseudoWB_fixed : return true;
1731   case ARM::VST2q32PseudoWB_fixed : return true;
1732   }
1733 }
1734
1735 // Get the register stride update opcode of a VLD/VST instruction that
1736 // is otherwise equivalent to the given fixed stride updating instruction.
1737 static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
1738   assert((isVLDfixed(Opc) || isVSTfixed(Opc))
1739     && "Incorrect fixed stride updating instruction.");
1740   switch (Opc) {
1741   default: break;
1742   case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register;
1743   case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register;
1744   case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register;
1745   case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register;
1746   case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register;
1747   case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register;
1748   case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register;
1749   case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register;
1750   case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register;
1751   case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register;
1752   case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register;
1753   case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register;
1754   case ARM::VLD1DUPd8wb_fixed : return ARM::VLD1DUPd8wb_register;
1755   case ARM::VLD1DUPd16wb_fixed : return ARM::VLD1DUPd16wb_register;
1756   case ARM::VLD1DUPd32wb_fixed : return ARM::VLD1DUPd32wb_register;
1757   case ARM::VLD1DUPq8wb_fixed : return ARM::VLD1DUPq8wb_register;
1758   case ARM::VLD1DUPq16wb_fixed : return ARM::VLD1DUPq16wb_register;
1759   case ARM::VLD1DUPq32wb_fixed : return ARM::VLD1DUPq32wb_register;
1760
1761   case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register;
1762   case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register;
1763   case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register;
1764   case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register;
1765   case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register;
1766   case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register;
1767   case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register;
1768   case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register;
1769   case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register;
1770   case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register;
1771
1772   case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register;
1773   case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register;
1774   case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register;
1775   case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register;
1776   case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register;
1777   case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register;
1778
1779   case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register;
1780   case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register;
1781   case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register;
1782   case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register;
1783   case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register;
1784   case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register;
1785
1786   case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register;
1787   case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register;
1788   case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register;
1789   }
1790   return Opc; // If not one we handle, return it unchanged.
1791 }
1792
1793 /// Returns true if the given increment is a Constant known to be equal to the
1794 /// access size performed by a NEON load/store. This means the "[rN]!" form can
1795 /// be used.
1796 static bool isPerfectIncrement(SDValue Inc, EVT VecTy, unsigned NumVecs) {
1797   auto C = dyn_cast<ConstantSDNode>(Inc);
1798   return C && C->getZExtValue() == VecTy.getSizeInBits() / 8 * NumVecs;
1799 }
1800
1801 void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
1802                                 const uint16_t *DOpcodes,
1803                                 const uint16_t *QOpcodes0,
1804                                 const uint16_t *QOpcodes1) {
1805   assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
1806   SDLoc dl(N);
1807
1808   SDValue MemAddr, Align;
1809   bool IsIntrinsic = !isUpdating;  // By coincidence, all supported updating
1810                                    // nodes are not intrinsics.
1811   unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
1812   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
1813     return;
1814
1815   SDValue Chain = N->getOperand(0);
1816   EVT VT = N->getValueType(0);
1817   bool is64BitVector = VT.is64BitVector();
1818   Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
1819
1820   unsigned OpcodeIndex;
1821   switch (VT.getSimpleVT().SimpleTy) {
1822   default: llvm_unreachable("unhandled vld type");
1823     // Double-register operations:
1824   case MVT::v8i8:  OpcodeIndex = 0; break;
1825   case MVT::v4f16:
1826   case MVT::v4i16: OpcodeIndex = 1; break;
1827   case MVT::v2f32:
1828   case MVT::v2i32: OpcodeIndex = 2; break;
1829   case MVT::v1i64: OpcodeIndex = 3; break;
1830     // Quad-register operations:
1831   case MVT::v16i8: OpcodeIndex = 0; break;
1832   case MVT::v8f16:
1833   case MVT::v8i16: OpcodeIndex = 1; break;
1834   case MVT::v4f32:
1835   case MVT::v4i32: OpcodeIndex = 2; break;
1836   case MVT::v2f64:
1837   case MVT::v2i64: OpcodeIndex = 3; break;
1838   }
1839
1840   EVT ResTy;
1841   if (NumVecs == 1)
1842     ResTy = VT;
1843   else {
1844     unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
1845     if (!is64BitVector)
1846       ResTyElts *= 2;
1847     ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
1848   }
1849   std::vector<EVT> ResTys;
1850   ResTys.push_back(ResTy);
1851   if (isUpdating)
1852     ResTys.push_back(MVT::i32);
1853   ResTys.push_back(MVT::Other);
1854
1855   SDValue Pred = getAL(CurDAG, dl);
1856   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
1857   SDNode *VLd;
1858   SmallVector<SDValue, 7> Ops;
1859
1860   // Double registers and VLD1/VLD2 quad registers are directly supported.
1861   if (is64BitVector || NumVecs <= 2) {
1862     unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
1863                     QOpcodes0[OpcodeIndex]);
1864     Ops.push_back(MemAddr);
1865     Ops.push_back(Align);
1866     if (isUpdating) {
1867       SDValue Inc = N->getOperand(AddrOpIdx + 1);
1868       bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
1869       if (!IsImmUpdate) {
1870         // We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
1871         // check for the opcode rather than the number of vector elements.
1872         if (isVLDfixed(Opc))
1873           Opc = getVLDSTRegisterUpdateOpcode(Opc);
1874         Ops.push_back(Inc);
1875       // VLD1/VLD2 fixed increment does not need Reg0 so only include it in
1876       // the operands if not such an opcode.
1877       } else if (!isVLDfixed(Opc))
1878         Ops.push_back(Reg0);
1879     }
1880     Ops.push_back(Pred);
1881     Ops.push_back(Reg0);
1882     Ops.push_back(Chain);
1883     VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1884
1885   } else {
1886     // Otherwise, quad registers are loaded with two separate instructions,
1887     // where one loads the even registers and the other loads the odd registers.
1888     EVT AddrTy = MemAddr.getValueType();
1889
1890     // Load the even subregs.  This is always an updating load, so that it
1891     // provides the address to the second load for the odd subregs.
1892     SDValue ImplDef =
1893       SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
1894     const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
1895     SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
1896                                           ResTy, AddrTy, MVT::Other, OpsA);
1897     Chain = SDValue(VLdA, 2);
1898
1899     // Load the odd subregs.
1900     Ops.push_back(SDValue(VLdA, 1));
1901     Ops.push_back(Align);
1902     if (isUpdating) {
1903       SDValue Inc = N->getOperand(AddrOpIdx + 1);
1904       assert(isa<ConstantSDNode>(Inc.getNode()) &&
1905              "only constant post-increment update allowed for VLD3/4");
1906       (void)Inc;
1907       Ops.push_back(Reg0);
1908     }
1909     Ops.push_back(SDValue(VLdA, 0));
1910     Ops.push_back(Pred);
1911     Ops.push_back(Reg0);
1912     Ops.push_back(Chain);
1913     VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops);
1914   }
1915
1916   // Transfer memoperands.
1917   MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1918   CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLd), {MemOp});
1919
1920   if (NumVecs == 1) {
1921     ReplaceNode(N, VLd);
1922     return;
1923   }
1924
1925   // Extract out the subregisters.
1926   SDValue SuperReg = SDValue(VLd, 0);
1927   static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
1928                     ARM::qsub_3 == ARM::qsub_0 + 3,
1929                 "Unexpected subreg numbering");
1930   unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0);
1931   for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
1932     ReplaceUses(SDValue(N, Vec),
1933                 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
1934   ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
1935   if (isUpdating)
1936     ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2));
1937   CurDAG->RemoveDeadNode(N);
1938 }
1939
1940 void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
1941                                 const uint16_t *DOpcodes,
1942                                 const uint16_t *QOpcodes0,
1943                                 const uint16_t *QOpcodes1) {
1944   assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
1945   SDLoc dl(N);
1946
1947   SDValue MemAddr, Align;
1948   bool IsIntrinsic = !isUpdating;  // By coincidence, all supported updating
1949                                    // nodes are not intrinsics.
1950   unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
1951   unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
1952   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
1953     return;
1954
1955   MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1956
1957   SDValue Chain = N->getOperand(0);
1958   EVT VT = N->getOperand(Vec0Idx).getValueType();
1959   bool is64BitVector = VT.is64BitVector();
1960   Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
1961
1962   unsigned OpcodeIndex;
1963   switch (VT.getSimpleVT().SimpleTy) {
1964   default: llvm_unreachable("unhandled vst type");
1965     // Double-register operations:
1966   case MVT::v8i8:  OpcodeIndex = 0; break;
1967   case MVT::v4f16:
1968   case MVT::v4i16: OpcodeIndex = 1; break;
1969   case MVT::v2f32:
1970   case MVT::v2i32: OpcodeIndex = 2; break;
1971   case MVT::v1i64: OpcodeIndex = 3; break;
1972     // Quad-register operations:
1973   case MVT::v16i8: OpcodeIndex = 0; break;
1974   case MVT::v8f16:
1975   case MVT::v8i16: OpcodeIndex = 1; break;
1976   case MVT::v4f32:
1977   case MVT::v4i32: OpcodeIndex = 2; break;
1978   case MVT::v2f64:
1979   case MVT::v2i64: OpcodeIndex = 3; break;
1980   }
1981
1982   std::vector<EVT> ResTys;
1983   if (isUpdating)
1984     ResTys.push_back(MVT::i32);
1985   ResTys.push_back(MVT::Other);
1986
1987   SDValue Pred = getAL(CurDAG, dl);
1988   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
1989   SmallVector<SDValue, 7> Ops;
1990
1991   // Double registers and VST1/VST2 quad registers are directly supported.
1992   if (is64BitVector || NumVecs <= 2) {
1993     SDValue SrcReg;
1994     if (NumVecs == 1) {
1995       SrcReg = N->getOperand(Vec0Idx);
1996     } else if (is64BitVector) {
1997       // Form a REG_SEQUENCE to force register allocation.
1998       SDValue V0 = N->getOperand(Vec0Idx + 0);
1999       SDValue V1 = N->getOperand(Vec0Idx + 1);
2000       if (NumVecs == 2)
2001         SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2002       else {
2003         SDValue V2 = N->getOperand(Vec0Idx + 2);
2004         // If it's a vst3, form a quad D-register and leave the last part as
2005         // an undef.
2006         SDValue V3 = (NumVecs == 3)
2007           ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
2008           : N->getOperand(Vec0Idx + 3);
2009         SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2010       }
2011     } else {
2012       // Form a QQ register.
2013       SDValue Q0 = N->getOperand(Vec0Idx);
2014       SDValue Q1 = N->getOperand(Vec0Idx + 1);
2015       SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0);
2016     }
2017
2018     unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2019                     QOpcodes0[OpcodeIndex]);
2020     Ops.push_back(MemAddr);
2021     Ops.push_back(Align);
2022     if (isUpdating) {
2023       SDValue Inc = N->getOperand(AddrOpIdx + 1);
2024       bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
2025       if (!IsImmUpdate) {
2026         // We use a VST1 for v1i64 even if the pseudo says VST2/3/4, so
2027         // check for the opcode rather than the number of vector elements.
2028         if (isVSTfixed(Opc))
2029           Opc = getVLDSTRegisterUpdateOpcode(Opc);
2030         Ops.push_back(Inc);
2031       }
2032       // VST1/VST2 fixed increment does not need Reg0 so only include it in
2033       // the operands if not such an opcode.
2034       else if (!isVSTfixed(Opc))
2035         Ops.push_back(Reg0);
2036     }
2037     Ops.push_back(SrcReg);
2038     Ops.push_back(Pred);
2039     Ops.push_back(Reg0);
2040     Ops.push_back(Chain);
2041     SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2042
2043     // Transfer memoperands.
2044     CurDAG->setNodeMemRefs(cast<MachineSDNode>(VSt), {MemOp});
2045
2046     ReplaceNode(N, VSt);
2047     return;
2048   }
2049
2050   // Otherwise, quad registers are stored with two separate instructions,
2051   // where one stores the even registers and the other stores the odd registers.
2052
2053   // Form the QQQQ REG_SEQUENCE.
2054   SDValue V0 = N->getOperand(Vec0Idx + 0);
2055   SDValue V1 = N->getOperand(Vec0Idx + 1);
2056   SDValue V2 = N->getOperand(Vec0Idx + 2);
2057   SDValue V3 = (NumVecs == 3)
2058     ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2059     : N->getOperand(Vec0Idx + 3);
2060   SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2061
2062   // Store the even D registers.  This is always an updating store, so that it
2063   // provides the address to the second store for the odd subregs.
2064   const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain };
2065   SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
2066                                         MemAddr.getValueType(),
2067                                         MVT::Other, OpsA);
2068   CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStA), {MemOp});
2069   Chain = SDValue(VStA, 1);
2070
2071   // Store the odd D registers.
2072   Ops.push_back(SDValue(VStA, 0));
2073   Ops.push_back(Align);
2074   if (isUpdating) {
2075     SDValue Inc = N->getOperand(AddrOpIdx + 1);
2076     assert(isa<ConstantSDNode>(Inc.getNode()) &&
2077            "only constant post-increment update allowed for VST3/4");
2078     (void)Inc;
2079     Ops.push_back(Reg0);
2080   }
2081   Ops.push_back(RegSeq);
2082   Ops.push_back(Pred);
2083   Ops.push_back(Reg0);
2084   Ops.push_back(Chain);
2085   SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
2086                                         Ops);
2087   CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStB), {MemOp});
2088   ReplaceNode(N, VStB);
2089 }
2090
2091 void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
2092                                       unsigned NumVecs,
2093                                       const uint16_t *DOpcodes,
2094                                       const uint16_t *QOpcodes) {
2095   assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
2096   SDLoc dl(N);
2097
2098   SDValue MemAddr, Align;
2099   bool IsIntrinsic = !isUpdating;  // By coincidence, all supported updating
2100                                    // nodes are not intrinsics.
2101   unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2102   unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2103   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2104     return;
2105
2106   MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2107
2108   SDValue Chain = N->getOperand(0);
2109   unsigned Lane =
2110     cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue();
2111   EVT VT = N->getOperand(Vec0Idx).getValueType();
2112   bool is64BitVector = VT.is64BitVector();
2113
2114   unsigned Alignment = 0;
2115   if (NumVecs != 3) {
2116     Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
2117     unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2118     if (Alignment > NumBytes)
2119       Alignment = NumBytes;
2120     if (Alignment < 8 && Alignment < NumBytes)
2121       Alignment = 0;
2122     // Alignment must be a power of two; make sure of that.
2123     Alignment = (Alignment & -Alignment);
2124     if (Alignment == 1)
2125       Alignment = 0;
2126   }
2127   Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2128
2129   unsigned OpcodeIndex;
2130   switch (VT.getSimpleVT().SimpleTy) {
2131   default: llvm_unreachable("unhandled vld/vst lane type");
2132     // Double-register operations:
2133   case MVT::v8i8:  OpcodeIndex = 0; break;
2134   case MVT::v4f16:
2135   case MVT::v4i16: OpcodeIndex = 1; break;
2136   case MVT::v2f32:
2137   case MVT::v2i32: OpcodeIndex = 2; break;
2138     // Quad-register operations:
2139   case MVT::v8f16:
2140   case MVT::v8i16: OpcodeIndex = 0; break;
2141   case MVT::v4f32:
2142   case MVT::v4i32: OpcodeIndex = 1; break;
2143   }
2144
2145   std::vector<EVT> ResTys;
2146   if (IsLoad) {
2147     unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2148     if (!is64BitVector)
2149       ResTyElts *= 2;
2150     ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(),
2151                                       MVT::i64, ResTyElts));
2152   }
2153   if (isUpdating)
2154     ResTys.push_back(MVT::i32);
2155   ResTys.push_back(MVT::Other);
2156
2157   SDValue Pred = getAL(CurDAG, dl);
2158   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2159
2160   SmallVector<SDValue, 8> Ops;
2161   Ops.push_back(MemAddr);
2162   Ops.push_back(Align);
2163   if (isUpdating) {
2164     SDValue Inc = N->getOperand(AddrOpIdx + 1);
2165     bool IsImmUpdate =
2166         isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
2167     Ops.push_back(IsImmUpdate ? Reg0 : Inc);
2168   }
2169
2170   SDValue SuperReg;
2171   SDValue V0 = N->getOperand(Vec0Idx + 0);
2172   SDValue V1 = N->getOperand(Vec0Idx + 1);
2173   if (NumVecs == 2) {
2174     if (is64BitVector)
2175       SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2176     else
2177       SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0);
2178   } else {
2179     SDValue V2 = N->getOperand(Vec0Idx + 2);
2180     SDValue V3 = (NumVecs == 3)
2181       ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2182       : N->getOperand(Vec0Idx + 3);
2183     if (is64BitVector)
2184       SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2185     else
2186       SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2187   }
2188   Ops.push_back(SuperReg);
2189   Ops.push_back(getI32Imm(Lane, dl));
2190   Ops.push_back(Pred);
2191   Ops.push_back(Reg0);
2192   Ops.push_back(Chain);
2193
2194   unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2195                                   QOpcodes[OpcodeIndex]);
2196   SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2197   CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdLn), {MemOp});
2198   if (!IsLoad) {
2199     ReplaceNode(N, VLdLn);
2200     return;
2201   }
2202
2203   // Extract the subregisters.
2204   SuperReg = SDValue(VLdLn, 0);
2205   static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
2206                     ARM::qsub_3 == ARM::qsub_0 + 3,
2207                 "Unexpected subreg numbering");
2208   unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
2209   for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2210     ReplaceUses(SDValue(N, Vec),
2211                 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
2212   ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1));
2213   if (isUpdating)
2214     ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2));
2215   CurDAG->RemoveDeadNode(N);
2216 }
2217
2218 void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool IsIntrinsic,
2219                                    bool isUpdating, unsigned NumVecs,
2220                                    const uint16_t *DOpcodes,
2221                                    const uint16_t *QOpcodes0,
2222                                    const uint16_t *QOpcodes1) {
2223   assert(NumVecs >= 1 && NumVecs <= 4 && "VLDDup NumVecs out-of-range");
2224   SDLoc dl(N);
2225
2226   SDValue MemAddr, Align;
2227   unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2228   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2229     return;
2230
2231   SDValue Chain = N->getOperand(0);
2232   EVT VT = N->getValueType(0);
2233   bool is64BitVector = VT.is64BitVector();
2234
2235   unsigned Alignment = 0;
2236   if (NumVecs != 3) {
2237     Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
2238     unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2239     if (Alignment > NumBytes)
2240       Alignment = NumBytes;
2241     if (Alignment < 8 && Alignment < NumBytes)
2242       Alignment = 0;
2243     // Alignment must be a power of two; make sure of that.
2244     Alignment = (Alignment & -Alignment);
2245     if (Alignment == 1)
2246       Alignment = 0;
2247   }
2248   Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2249
2250   unsigned OpcodeIndex;
2251   switch (VT.getSimpleVT().SimpleTy) {
2252   default: llvm_unreachable("unhandled vld-dup type");
2253   case MVT::v8i8:
2254   case MVT::v16i8: OpcodeIndex = 0; break;
2255   case MVT::v4i16:
2256   case MVT::v8i16:
2257   case MVT::v4f16:
2258   case MVT::v8f16:
2259                   OpcodeIndex = 1; break;
2260   case MVT::v2f32:
2261   case MVT::v2i32:
2262   case MVT::v4f32:
2263   case MVT::v4i32: OpcodeIndex = 2; break;
2264   case MVT::v1f64:
2265   case MVT::v1i64: OpcodeIndex = 3; break;
2266   }
2267
2268   unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2269   if (!is64BitVector)
2270     ResTyElts *= 2;
2271   EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
2272
2273   std::vector<EVT> ResTys;
2274   ResTys.push_back(ResTy);
2275   if (isUpdating)
2276     ResTys.push_back(MVT::i32);
2277   ResTys.push_back(MVT::Other);
2278
2279   SDValue Pred = getAL(CurDAG, dl);
2280   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2281
2282   SDNode *VLdDup;
2283   if (is64BitVector || NumVecs == 1) {
2284     SmallVector<SDValue, 6> Ops;
2285     Ops.push_back(MemAddr);
2286     Ops.push_back(Align);
2287     unsigned Opc = is64BitVector ? DOpcodes[OpcodeIndex] :
2288                                    QOpcodes0[OpcodeIndex];
2289     if (isUpdating) {
2290       // fixed-stride update instructions don't have an explicit writeback
2291       // operand. It's implicit in the opcode itself.
2292       SDValue Inc = N->getOperand(2);
2293       bool IsImmUpdate =
2294           isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
2295       if (NumVecs <= 2 && !IsImmUpdate)
2296         Opc = getVLDSTRegisterUpdateOpcode(Opc);
2297       if (!IsImmUpdate)
2298         Ops.push_back(Inc);
2299       // FIXME: VLD3 and VLD4 haven't been updated to that form yet.
2300       else if (NumVecs > 2)
2301         Ops.push_back(Reg0);
2302     }
2303     Ops.push_back(Pred);
2304     Ops.push_back(Reg0);
2305     Ops.push_back(Chain);
2306     VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2307   } else if (NumVecs == 2) {
2308     const SDValue OpsA[] = { MemAddr, Align, Pred, Reg0, Chain };
2309     SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex],
2310                                           dl, ResTys, OpsA);
2311
2312     Chain = SDValue(VLdA, 1);
2313     const SDValue OpsB[] = { MemAddr, Align, Pred, Reg0, Chain };
2314     VLdDup = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, OpsB);
2315   } else {
2316     SDValue ImplDef =
2317       SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
2318     const SDValue OpsA[] = { MemAddr, Align, ImplDef, Pred, Reg0, Chain };
2319     SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex],
2320                                           dl, ResTys, OpsA);
2321
2322     SDValue SuperReg = SDValue(VLdA, 0);
2323     Chain = SDValue(VLdA, 1);
2324     const SDValue OpsB[] = { MemAddr, Align, SuperReg, Pred, Reg0, Chain };
2325     VLdDup = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, OpsB);
2326   }
2327
2328   // Transfer memoperands.
2329   MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2330   CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdDup), {MemOp});
2331
2332   // Extract the subregisters.
2333   if (NumVecs == 1) {
2334     ReplaceUses(SDValue(N, 0), SDValue(VLdDup, 0));
2335   } else {
2336     SDValue SuperReg = SDValue(VLdDup, 0);
2337     static_assert(ARM::dsub_7 == ARM::dsub_0 + 7, "Unexpected subreg numbering");
2338     unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
2339     for (unsigned Vec = 0; Vec != NumVecs; ++Vec) {
2340       ReplaceUses(SDValue(N, Vec),
2341                   CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg));
2342     }
2343   }
2344   ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1));
2345   if (isUpdating)
2346     ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2));
2347   CurDAG->RemoveDeadNode(N);
2348 }
2349
2350 bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) {
2351   if (!Subtarget->hasV6T2Ops())
2352     return false;
2353
2354   unsigned Opc = isSigned
2355     ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)
2356     : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX);
2357   SDLoc dl(N);
2358
2359   // For unsigned extracts, check for a shift right and mask
2360   unsigned And_imm = 0;
2361   if (N->getOpcode() == ISD::AND) {
2362     if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) {
2363
2364       // The immediate is a mask of the low bits iff imm & (imm+1) == 0
2365       if (And_imm & (And_imm + 1))
2366         return false;
2367
2368       unsigned Srl_imm = 0;
2369       if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL,
2370                                 Srl_imm)) {
2371         assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2372
2373         // Mask off the unnecessary bits of the AND immediate; normally
2374         // DAGCombine will do this, but that might not happen if
2375         // targetShrinkDemandedConstant chooses a different immediate.
2376         And_imm &= -1U >> Srl_imm;
2377
2378         // Note: The width operand is encoded as width-1.
2379         unsigned Width = countTrailingOnes(And_imm) - 1;
2380         unsigned LSB = Srl_imm;
2381
2382         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2383
2384         if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) {
2385           // It's cheaper to use a right shift to extract the top bits.
2386           if (Subtarget->isThumb()) {
2387             Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri;
2388             SDValue Ops[] = { N->getOperand(0).getOperand(0),
2389                               CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2390                               getAL(CurDAG, dl), Reg0, Reg0 };
2391             CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2392             return true;
2393           }
2394
2395           // ARM models shift instructions as MOVsi with shifter operand.
2396           ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(ISD::SRL);
2397           SDValue ShOpc =
2398             CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB), dl,
2399                                       MVT::i32);
2400           SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc,
2401                             getAL(CurDAG, dl), Reg0, Reg0 };
2402           CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops);
2403           return true;
2404         }
2405
2406         assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
2407         SDValue Ops[] = { N->getOperand(0).getOperand(0),
2408                           CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2409                           CurDAG->getTargetConstant(Width, dl, MVT::i32),
2410                           getAL(CurDAG, dl), Reg0 };
2411         CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2412         return true;
2413       }
2414     }
2415     return false;
2416   }
2417
2418   // Otherwise, we're looking for a shift of a shift
2419   unsigned Shl_imm = 0;
2420   if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) {
2421     assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!");
2422     unsigned Srl_imm = 0;
2423     if (isInt32Immediate(N->getOperand(1), Srl_imm)) {
2424       assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2425       // Note: The width operand is encoded as width-1.
2426       unsigned Width = 32 - Srl_imm - 1;
2427       int LSB = Srl_imm - Shl_imm;
2428       if (LSB < 0)
2429         return false;
2430       SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2431       assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
2432       SDValue Ops[] = { N->getOperand(0).getOperand(0),
2433                         CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2434                         CurDAG->getTargetConstant(Width, dl, MVT::i32),
2435                         getAL(CurDAG, dl), Reg0 };
2436       CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2437       return true;
2438     }
2439   }
2440
2441   // Or we are looking for a shift of an and, with a mask operand
2442   if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_imm) &&
2443       isShiftedMask_32(And_imm)) {
2444     unsigned Srl_imm = 0;
2445     unsigned LSB = countTrailingZeros(And_imm);
2446     // Shift must be the same as the ands lsb
2447     if (isInt32Immediate(N->getOperand(1), Srl_imm) && Srl_imm == LSB) {
2448       assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2449       unsigned MSB = 31 - countLeadingZeros(And_imm);
2450       // Note: The width operand is encoded as width-1.
2451       unsigned Width = MSB - LSB;
2452       SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2453       assert(Srl_imm + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
2454       SDValue Ops[] = { N->getOperand(0).getOperand(0),
2455                         CurDAG->getTargetConstant(Srl_imm, dl, MVT::i32),
2456                         CurDAG->getTargetConstant(Width, dl, MVT::i32),
2457                         getAL(CurDAG, dl), Reg0 };
2458       CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2459       return true;
2460     }
2461   }
2462
2463   if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) {
2464     unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2465     unsigned LSB = 0;
2466     if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, LSB) &&
2467         !isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRA, LSB))
2468       return false;
2469
2470     if (LSB + Width > 32)
2471       return false;
2472
2473     SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2474     assert(LSB + Width <= 32 && "Shouldn't create an invalid ubfx");
2475     SDValue Ops[] = { N->getOperand(0).getOperand(0),
2476                       CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2477                       CurDAG->getTargetConstant(Width - 1, dl, MVT::i32),
2478                       getAL(CurDAG, dl), Reg0 };
2479     CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2480     return true;
2481   }
2482
2483   return false;
2484 }
2485
2486 /// Target-specific DAG combining for ISD::XOR.
2487 /// Target-independent combining lowers SELECT_CC nodes of the form
2488 /// select_cc setg[ge] X,  0,  X, -X
2489 /// select_cc setgt    X, -1,  X, -X
2490 /// select_cc setl[te] X,  0, -X,  X
2491 /// select_cc setlt    X,  1, -X,  X
2492 /// which represent Integer ABS into:
2493 /// Y = sra (X, size(X)-1); xor (add (X, Y), Y)
2494 /// ARM instruction selection detects the latter and matches it to
2495 /// ARM::ABS or ARM::t2ABS machine node.
2496 bool ARMDAGToDAGISel::tryABSOp(SDNode *N){
2497   SDValue XORSrc0 = N->getOperand(0);
2498   SDValue XORSrc1 = N->getOperand(1);
2499   EVT VT = N->getValueType(0);
2500
2501   if (Subtarget->isThumb1Only())
2502     return false;
2503
2504   if (XORSrc0.getOpcode() != ISD::ADD || XORSrc1.getOpcode() != ISD::SRA)
2505     return false;
2506
2507   SDValue ADDSrc0 = XORSrc0.getOperand(0);
2508   SDValue ADDSrc1 = XORSrc0.getOperand(1);
2509   SDValue SRASrc0 = XORSrc1.getOperand(0);
2510   SDValue SRASrc1 = XORSrc1.getOperand(1);
2511   ConstantSDNode *SRAConstant =  dyn_cast<ConstantSDNode>(SRASrc1);
2512   EVT XType = SRASrc0.getValueType();
2513   unsigned Size = XType.getSizeInBits() - 1;
2514
2515   if (ADDSrc1 == XORSrc1 && ADDSrc0 == SRASrc0 &&
2516       XType.isInteger() && SRAConstant != nullptr &&
2517       Size == SRAConstant->getZExtValue()) {
2518     unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS;
2519     CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0);
2520     return true;
2521   }
2522
2523   return false;
2524 }
2525
2526 /// We've got special pseudo-instructions for these
2527 void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
2528   unsigned Opcode;
2529   EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
2530   if (MemTy == MVT::i8)
2531     Opcode = ARM::CMP_SWAP_8;
2532   else if (MemTy == MVT::i16)
2533     Opcode = ARM::CMP_SWAP_16;
2534   else if (MemTy == MVT::i32)
2535     Opcode = ARM::CMP_SWAP_32;
2536   else
2537     llvm_unreachable("Unknown AtomicCmpSwap type");
2538
2539   SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
2540                    N->getOperand(0)};
2541   SDNode *CmpSwap = CurDAG->getMachineNode(
2542       Opcode, SDLoc(N),
2543       CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other), Ops);
2544
2545   MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
2546   CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
2547
2548   ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
2549   ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
2550   CurDAG->RemoveDeadNode(N);
2551 }
2552
2553 static Optional<std::pair<unsigned, unsigned>>
2554 getContiguousRangeOfSetBits(const APInt &A) {
2555   unsigned FirstOne = A.getBitWidth() - A.countLeadingZeros() - 1;
2556   unsigned LastOne = A.countTrailingZeros();
2557   if (A.countPopulation() != (FirstOne - LastOne + 1))
2558     return Optional<std::pair<unsigned,unsigned>>();
2559   return std::make_pair(FirstOne, LastOne);
2560 }
2561
2562 void ARMDAGToDAGISel::SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI) {
2563   assert(N->getOpcode() == ARMISD::CMPZ);
2564   SwitchEQNEToPLMI = false;
2565
2566   if (!Subtarget->isThumb())
2567     // FIXME: Work out whether it is profitable to do this in A32 mode - LSL and
2568     // LSR don't exist as standalone instructions - they need the barrel shifter.
2569     return;
2570
2571   // select (cmpz (and X, C), #0) -> (LSLS X) or (LSRS X) or (LSRS (LSLS X))
2572   SDValue And = N->getOperand(0);
2573   if (!And->hasOneUse())
2574     return;
2575
2576   SDValue Zero = N->getOperand(1);
2577   if (!isa<ConstantSDNode>(Zero) || !cast<ConstantSDNode>(Zero)->isNullValue() ||
2578       And->getOpcode() != ISD::AND)
2579     return;
2580   SDValue X = And.getOperand(0);
2581   auto C = dyn_cast<ConstantSDNode>(And.getOperand(1));
2582
2583   if (!C)
2584     return;
2585   auto Range = getContiguousRangeOfSetBits(C->getAPIntValue());
2586   if (!Range)
2587     return;
2588
2589   // There are several ways to lower this:
2590   SDNode *NewN;
2591   SDLoc dl(N);
2592
2593   auto EmitShift = [&](unsigned Opc, SDValue Src, unsigned Imm) -> SDNode* {
2594     if (Subtarget->isThumb2()) {
2595       Opc = (Opc == ARM::tLSLri) ? ARM::t2LSLri : ARM::t2LSRri;
2596       SDValue Ops[] = { Src, CurDAG->getTargetConstant(Imm, dl, MVT::i32),
2597                         getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
2598                         CurDAG->getRegister(0, MVT::i32) };
2599       return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
2600     } else {
2601       SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), Src,
2602                        CurDAG->getTargetConstant(Imm, dl, MVT::i32),
2603                        getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
2604       return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
2605     }
2606   };
2607
2608   if (Range->second == 0) {
2609     //  1. Mask includes the LSB -> Simply shift the top N bits off
2610     NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
2611     ReplaceNode(And.getNode(), NewN);
2612   } else if (Range->first == 31) {
2613     //  2. Mask includes the MSB -> Simply shift the bottom N bits off
2614     NewN = EmitShift(ARM::tLSRri, X, Range->second);
2615     ReplaceNode(And.getNode(), NewN);
2616   } else if (Range->first == Range->second) {
2617     //  3. Only one bit is set. We can shift this into the sign bit and use a
2618     //     PL/MI comparison.
2619     NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
2620     ReplaceNode(And.getNode(), NewN);
2621
2622     SwitchEQNEToPLMI = true;
2623   } else if (!Subtarget->hasV6T2Ops()) {
2624     //  4. Do a double shift to clear bottom and top bits, but only in
2625     //     thumb-1 mode as in thumb-2 we can use UBFX.
2626     NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
2627     NewN = EmitShift(ARM::tLSRri, SDValue(NewN, 0),
2628                      Range->second + (31 - Range->first));
2629     ReplaceNode(And.getNode(), NewN);
2630   }
2631
2632 }
2633
2634 void ARMDAGToDAGISel::Select(SDNode *N) {
2635   SDLoc dl(N);
2636
2637   if (N->isMachineOpcode()) {
2638     N->setNodeId(-1);
2639     return;   // Already selected.
2640   }
2641
2642   switch (N->getOpcode()) {
2643   default: break;
2644   case ISD::STORE: {
2645     // For Thumb1, match an sp-relative store in C++. This is a little
2646     // unfortunate, but I don't think I can make the chain check work
2647     // otherwise.  (The chain of the store has to be the same as the chain
2648     // of the CopyFromReg, or else we can't replace the CopyFromReg with
2649     // a direct reference to "SP".)
2650     //
2651     // This is only necessary on Thumb1 because Thumb1 sp-relative stores use
2652     // a different addressing mode from other four-byte stores.
2653     //
2654     // This pattern usually comes up with call arguments.
2655     StoreSDNode *ST = cast<StoreSDNode>(N);
2656     SDValue Ptr = ST->getBasePtr();
2657     if (Subtarget->isThumb1Only() && ST->isUnindexed()) {
2658       int RHSC = 0;
2659       if (Ptr.getOpcode() == ISD::ADD &&
2660           isScaledConstantInRange(Ptr.getOperand(1), /*Scale=*/4, 0, 256, RHSC))
2661         Ptr = Ptr.getOperand(0);
2662
2663       if (Ptr.getOpcode() == ISD::CopyFromReg &&
2664           cast<RegisterSDNode>(Ptr.getOperand(1))->getReg() == ARM::SP &&
2665           Ptr.getOperand(0) == ST->getChain()) {
2666         SDValue Ops[] = {ST->getValue(),
2667                          CurDAG->getRegister(ARM::SP, MVT::i32),
2668                          CurDAG->getTargetConstant(RHSC, dl, MVT::i32),
2669                          getAL(CurDAG, dl),
2670                          CurDAG->getRegister(0, MVT::i32),
2671                          ST->getChain()};
2672         MachineSDNode *ResNode =
2673             CurDAG->getMachineNode(ARM::tSTRspi, dl, MVT::Other, Ops);
2674         MachineMemOperand *MemOp = ST->getMemOperand();
2675         CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp});
2676         ReplaceNode(N, ResNode);
2677         return;
2678       }
2679     }
2680     break;
2681   }
2682   case ISD::WRITE_REGISTER:
2683     if (tryWriteRegister(N))
2684       return;
2685     break;
2686   case ISD::READ_REGISTER:
2687     if (tryReadRegister(N))
2688       return;
2689     break;
2690   case ISD::INLINEASM:
2691   case ISD::INLINEASM_BR:
2692     if (tryInlineAsm(N))
2693       return;
2694     break;
2695   case ISD::XOR:
2696     // Select special operations if XOR node forms integer ABS pattern
2697     if (tryABSOp(N))
2698       return;
2699     // Other cases are autogenerated.
2700     break;
2701   case ISD::Constant: {
2702     unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
2703     // If we can't materialize the constant we need to use a literal pool
2704     if (ConstantMaterializationCost(Val) > 2) {
2705       SDValue CPIdx = CurDAG->getTargetConstantPool(
2706           ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val),
2707           TLI->getPointerTy(CurDAG->getDataLayout()));
2708
2709       SDNode *ResNode;
2710       if (Subtarget->isThumb()) {
2711         SDValue Ops[] = {
2712           CPIdx,
2713           getAL(CurDAG, dl),
2714           CurDAG->getRegister(0, MVT::i32),
2715           CurDAG->getEntryNode()
2716         };
2717         ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other,
2718                                          Ops);
2719       } else {
2720         SDValue Ops[] = {
2721           CPIdx,
2722           CurDAG->getTargetConstant(0, dl, MVT::i32),
2723           getAL(CurDAG, dl),
2724           CurDAG->getRegister(0, MVT::i32),
2725           CurDAG->getEntryNode()
2726         };
2727         ResNode = CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other,
2728                                          Ops);
2729       }
2730       // Annotate the Node with memory operand information so that MachineInstr
2731       // queries work properly. This e.g. gives the register allocation the
2732       // required information for rematerialization.
2733       MachineFunction& MF = CurDAG->getMachineFunction();
2734       MachineMemOperand *MemOp =
2735           MF.getMachineMemOperand(MachinePointerInfo::getConstantPool(MF),
2736                                   MachineMemOperand::MOLoad, 4, 4);
2737
2738       CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp});
2739
2740       ReplaceNode(N, ResNode);
2741       return;
2742     }
2743
2744     // Other cases are autogenerated.
2745     break;
2746   }
2747   case ISD::FrameIndex: {
2748     // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
2749     int FI = cast<FrameIndexSDNode>(N)->getIndex();
2750     SDValue TFI = CurDAG->getTargetFrameIndex(
2751         FI, TLI->getPointerTy(CurDAG->getDataLayout()));
2752     if (Subtarget->isThumb1Only()) {
2753       // Set the alignment of the frame object to 4, to avoid having to generate
2754       // more than one ADD
2755       MachineFrameInfo &MFI = MF->getFrameInfo();
2756       if (MFI.getObjectAlignment(FI) < 4)
2757         MFI.setObjectAlignment(FI, 4);
2758       CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI,
2759                            CurDAG->getTargetConstant(0, dl, MVT::i32));
2760       return;
2761     } else {
2762       unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ?
2763                       ARM::t2ADDri : ARM::ADDri);
2764       SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, dl, MVT::i32),
2765                         getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
2766                         CurDAG->getRegister(0, MVT::i32) };
2767       CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2768       return;
2769     }
2770   }
2771   case ISD::SRL:
2772     if (tryV6T2BitfieldExtractOp(N, false))
2773       return;
2774     break;
2775   case ISD::SIGN_EXTEND_INREG:
2776   case ISD::SRA:
2777     if (tryV6T2BitfieldExtractOp(N, true))
2778       return;
2779     break;
2780   case ISD::MUL:
2781     if (Subtarget->isThumb1Only())
2782       break;
2783     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
2784       unsigned RHSV = C->getZExtValue();
2785       if (!RHSV) break;
2786       if (isPowerOf2_32(RHSV-1)) {  // 2^n+1?
2787         unsigned ShImm = Log2_32(RHSV-1);
2788         if (ShImm >= 32)
2789           break;
2790         SDValue V = N->getOperand(0);
2791         ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
2792         SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
2793         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2794         if (Subtarget->isThumb()) {
2795           SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
2796           CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops);
2797           return;
2798         } else {
2799           SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
2800                             Reg0 };
2801           CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops);
2802           return;
2803         }
2804       }
2805       if (isPowerOf2_32(RHSV+1)) {  // 2^n-1?
2806         unsigned ShImm = Log2_32(RHSV+1);
2807         if (ShImm >= 32)
2808           break;
2809         SDValue V = N->getOperand(0);
2810         ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
2811         SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
2812         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2813         if (Subtarget->isThumb()) {
2814           SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
2815           CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops);
2816           return;
2817         } else {
2818           SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
2819                             Reg0 };
2820           CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops);
2821           return;
2822         }
2823       }
2824     }
2825     break;
2826   case ISD::AND: {
2827     // Check for unsigned bitfield extract
2828     if (tryV6T2BitfieldExtractOp(N, false))
2829       return;
2830
2831     // If an immediate is used in an AND node, it is possible that the immediate
2832     // can be more optimally materialized when negated. If this is the case we
2833     // can negate the immediate and use a BIC instead.
2834     auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
2835     if (N1C && N1C->hasOneUse() && Subtarget->isThumb()) {
2836       uint32_t Imm = (uint32_t) N1C->getZExtValue();
2837
2838       // In Thumb2 mode, an AND can take a 12-bit immediate. If this
2839       // immediate can be negated and fit in the immediate operand of
2840       // a t2BIC, don't do any manual transform here as this can be
2841       // handled by the generic ISel machinery.
2842       bool PreferImmediateEncoding =
2843         Subtarget->hasThumb2() && (is_t2_so_imm(Imm) || is_t2_so_imm_not(Imm));
2844       if (!PreferImmediateEncoding &&
2845           ConstantMaterializationCost(Imm) >
2846               ConstantMaterializationCost(~Imm)) {
2847         // The current immediate costs more to materialize than a negated
2848         // immediate, so negate the immediate and use a BIC.
2849         SDValue NewImm =
2850           CurDAG->getConstant(~N1C->getZExtValue(), dl, MVT::i32);
2851         // If the new constant didn't exist before, reposition it in the topological
2852         // ordering so it is just before N. Otherwise, don't touch its location.
2853         if (NewImm->getNodeId() == -1)
2854           CurDAG->RepositionNode(N->getIterator(), NewImm.getNode());
2855
2856         if (!Subtarget->hasThumb2()) {
2857           SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32),
2858                            N->getOperand(0), NewImm, getAL(CurDAG, dl),
2859                            CurDAG->getRegister(0, MVT::i32)};
2860           ReplaceNode(N, CurDAG->getMachineNode(ARM::tBIC, dl, MVT::i32, Ops));
2861           return;
2862         } else {
2863           SDValue Ops[] = {N->getOperand(0), NewImm, getAL(CurDAG, dl),
2864                            CurDAG->getRegister(0, MVT::i32),
2865                            CurDAG->getRegister(0, MVT::i32)};
2866           ReplaceNode(N,
2867                       CurDAG->getMachineNode(ARM::t2BICrr, dl, MVT::i32, Ops));
2868           return;
2869         }
2870       }
2871     }
2872
2873     // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits
2874     // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits
2875     // are entirely contributed by c2 and lower 16-bits are entirely contributed
2876     // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)).
2877     // Select it to: "movt x, ((c1 & 0xffff) >> 16)
2878     EVT VT = N->getValueType(0);
2879     if (VT != MVT::i32)
2880       break;
2881     unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2())
2882       ? ARM::t2MOVTi16
2883       : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0);
2884     if (!Opc)
2885       break;
2886     SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
2887     N1C = dyn_cast<ConstantSDNode>(N1);
2888     if (!N1C)
2889       break;
2890     if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) {
2891       SDValue N2 = N0.getOperand(1);
2892       ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
2893       if (!N2C)
2894         break;
2895       unsigned N1CVal = N1C->getZExtValue();
2896       unsigned N2CVal = N2C->getZExtValue();
2897       if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) &&
2898           (N1CVal & 0xffffU) == 0xffffU &&
2899           (N2CVal & 0xffffU) == 0x0U) {
2900         SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16,
2901                                                   dl, MVT::i32);
2902         SDValue Ops[] = { N0.getOperand(0), Imm16,
2903                           getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
2904         ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
2905         return;
2906       }
2907     }
2908
2909     break;
2910   }
2911   case ARMISD::UMAAL: {
2912     unsigned Opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL;
2913     SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
2914                       N->getOperand(2), N->getOperand(3),
2915                       getAL(CurDAG, dl),
2916                       CurDAG->getRegister(0, MVT::i32) };
2917     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::i32, Ops));
2918     return;
2919   }
2920   case ARMISD::UMLAL:{
2921     if (Subtarget->isThumb()) {
2922       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2923                         N->getOperand(3), getAL(CurDAG, dl),
2924                         CurDAG->getRegister(0, MVT::i32)};
2925       ReplaceNode(
2926           N, CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops));
2927       return;
2928     }else{
2929       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2930                         N->getOperand(3), getAL(CurDAG, dl),
2931                         CurDAG->getRegister(0, MVT::i32),
2932                         CurDAG->getRegister(0, MVT::i32) };
2933       ReplaceNode(N, CurDAG->getMachineNode(
2934                          Subtarget->hasV6Ops() ? ARM::UMLAL : ARM::UMLALv5, dl,
2935                          MVT::i32, MVT::i32, Ops));
2936       return;
2937     }
2938   }
2939   case ARMISD::SMLAL:{
2940     if (Subtarget->isThumb()) {
2941       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2942                         N->getOperand(3), getAL(CurDAG, dl),
2943                         CurDAG->getRegister(0, MVT::i32)};
2944       ReplaceNode(
2945           N, CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops));
2946       return;
2947     }else{
2948       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2949                         N->getOperand(3), getAL(CurDAG, dl),
2950                         CurDAG->getRegister(0, MVT::i32),
2951                         CurDAG->getRegister(0, MVT::i32) };
2952       ReplaceNode(N, CurDAG->getMachineNode(
2953                          Subtarget->hasV6Ops() ? ARM::SMLAL : ARM::SMLALv5, dl,
2954                          MVT::i32, MVT::i32, Ops));
2955       return;
2956     }
2957   }
2958   case ARMISD::SUBE: {
2959     if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
2960       break;
2961     // Look for a pattern to match SMMLS
2962     // (sube a, (smul_loHi a, b), (subc 0, (smul_LOhi(a, b))))
2963     if (N->getOperand(1).getOpcode() != ISD::SMUL_LOHI ||
2964         N->getOperand(2).getOpcode() != ARMISD::SUBC ||
2965         !SDValue(N, 1).use_empty())
2966       break;
2967
2968     if (Subtarget->isThumb())
2969       assert(Subtarget->hasThumb2() &&
2970              "This pattern should not be generated for Thumb");
2971
2972     SDValue SmulLoHi = N->getOperand(1);
2973     SDValue Subc = N->getOperand(2);
2974     auto *Zero = dyn_cast<ConstantSDNode>(Subc.getOperand(0));
2975
2976     if (!Zero || Zero->getZExtValue() != 0 ||
2977         Subc.getOperand(1) != SmulLoHi.getValue(0) ||
2978         N->getOperand(1) != SmulLoHi.getValue(1) ||
2979         N->getOperand(2) != Subc.getValue(1))
2980       break;
2981
2982     unsigned Opc = Subtarget->isThumb2() ? ARM::t2SMMLS : ARM::SMMLS;
2983     SDValue Ops[] = { SmulLoHi.getOperand(0), SmulLoHi.getOperand(1),
2984                       N->getOperand(0), getAL(CurDAG, dl),
2985                       CurDAG->getRegister(0, MVT::i32) };
2986     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops));
2987     return;
2988   }
2989   case ISD::LOAD: {
2990     if (Subtarget->isThumb() && Subtarget->hasThumb2()) {
2991       if (tryT2IndexedLoad(N))
2992         return;
2993     } else if (Subtarget->isThumb()) {
2994       if (tryT1IndexedLoad(N))
2995         return;
2996     } else if (tryARMIndexedLoad(N))
2997       return;
2998     // Other cases are autogenerated.
2999     break;
3000   }
3001   case ARMISD::WLS:
3002   case ARMISD::LE: {
3003     SDValue Ops[] = { N->getOperand(1),
3004                       N->getOperand(2),
3005                       N->getOperand(0) };
3006     unsigned Opc = N->getOpcode() == ARMISD::WLS ?
3007       ARM::t2WhileLoopStart : ARM::t2LoopEnd;
3008     SDNode *New = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
3009     ReplaceUses(N, New);
3010     CurDAG->RemoveDeadNode(N);
3011     return;
3012   }
3013   case ARMISD::LOOP_DEC: {
3014     SDValue Ops[] = { N->getOperand(1),
3015                       N->getOperand(2),
3016                       N->getOperand(0) };
3017     SDNode *Dec =
3018       CurDAG->getMachineNode(ARM::t2LoopDec, dl,
3019                              CurDAG->getVTList(MVT::i32, MVT::Other), Ops);
3020     ReplaceUses(N, Dec);
3021     CurDAG->RemoveDeadNode(N);
3022     return;
3023   }
3024   case ARMISD::BRCOND: {
3025     // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
3026     // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)
3027     // Pattern complexity = 6  cost = 1  size = 0
3028
3029     // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
3030     // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc)
3031     // Pattern complexity = 6  cost = 1  size = 0
3032
3033     // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
3034     // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc)
3035     // Pattern complexity = 6  cost = 1  size = 0
3036
3037     unsigned Opc = Subtarget->isThumb() ?
3038       ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc;
3039     SDValue Chain = N->getOperand(0);
3040     SDValue N1 = N->getOperand(1);
3041     SDValue N2 = N->getOperand(2);
3042     SDValue N3 = N->getOperand(3);
3043     SDValue InFlag = N->getOperand(4);
3044     assert(N1.getOpcode() == ISD::BasicBlock);
3045     assert(N2.getOpcode() == ISD::Constant);
3046     assert(N3.getOpcode() == ISD::Register);
3047
3048     unsigned CC = (unsigned) cast<ConstantSDNode>(N2)->getZExtValue();
3049
3050     if (InFlag.getOpcode() == ARMISD::CMPZ) {
3051       if (InFlag.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN) {
3052         SDValue Int = InFlag.getOperand(0);
3053         uint64_t ID = cast<ConstantSDNode>(Int->getOperand(1))->getZExtValue();
3054
3055         // Handle low-overhead loops.
3056         if (ID == Intrinsic::loop_decrement_reg) {
3057           SDValue Elements = Int.getOperand(2);
3058           SDValue Size = CurDAG->getTargetConstant(
3059             cast<ConstantSDNode>(Int.getOperand(3))->getZExtValue(), dl,
3060                                  MVT::i32);
3061
3062           SDValue Args[] = { Elements, Size, Int.getOperand(0) };
3063           SDNode *LoopDec =
3064             CurDAG->getMachineNode(ARM::t2LoopDec, dl,
3065                                    CurDAG->getVTList(MVT::i32, MVT::Other),
3066                                    Args);
3067           ReplaceUses(Int.getNode(), LoopDec);
3068
3069           SDValue EndArgs[] = { SDValue(LoopDec, 0), N1, Chain };
3070           SDNode *LoopEnd =
3071             CurDAG->getMachineNode(ARM::t2LoopEnd, dl, MVT::Other, EndArgs);
3072
3073           ReplaceUses(N, LoopEnd);
3074           CurDAG->RemoveDeadNode(N);
3075           CurDAG->RemoveDeadNode(InFlag.getNode());
3076           CurDAG->RemoveDeadNode(Int.getNode());
3077           return;
3078         }
3079       }
3080
3081       bool SwitchEQNEToPLMI;
3082       SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI);
3083       InFlag = N->getOperand(4);
3084
3085       if (SwitchEQNEToPLMI) {
3086         switch ((ARMCC::CondCodes)CC) {
3087         default: llvm_unreachable("CMPZ must be either NE or EQ!");
3088         case ARMCC::NE:
3089           CC = (unsigned)ARMCC::MI;
3090           break;
3091         case ARMCC::EQ:
3092           CC = (unsigned)ARMCC::PL;
3093           break;
3094         }
3095       }
3096     }
3097
3098     SDValue Tmp2 = CurDAG->getTargetConstant(CC, dl, MVT::i32);
3099     SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag };
3100     SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
3101                                              MVT::Glue, Ops);
3102     Chain = SDValue(ResNode, 0);
3103     if (N->getNumValues() == 2) {
3104       InFlag = SDValue(ResNode, 1);
3105       ReplaceUses(SDValue(N, 1), InFlag);
3106     }
3107     ReplaceUses(SDValue(N, 0),
3108                 SDValue(Chain.getNode(), Chain.getResNo()));
3109     CurDAG->RemoveDeadNode(N);
3110     return;
3111   }
3112
3113   case ARMISD::CMPZ: {
3114     // select (CMPZ X, #-C) -> (CMPZ (ADDS X, #C), #0)
3115     //   This allows us to avoid materializing the expensive negative constant.
3116     //   The CMPZ #0 is useless and will be peepholed away but we need to keep it
3117     //   for its glue output.
3118     SDValue X = N->getOperand(0);
3119     auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1).getNode());
3120     if (C && C->getSExtValue() < 0 && Subtarget->isThumb()) {
3121       int64_t Addend = -C->getSExtValue();
3122
3123       SDNode *Add = nullptr;
3124       // ADDS can be better than CMN if the immediate fits in a
3125       // 16-bit ADDS, which means either [0,256) for tADDi8 or [0,8) for tADDi3.
3126       // Outside that range we can just use a CMN which is 32-bit but has a
3127       // 12-bit immediate range.
3128       if (Addend < 1<<8) {
3129         if (Subtarget->isThumb2()) {
3130           SDValue Ops[] = { X, CurDAG->getTargetConstant(Addend, dl, MVT::i32),
3131                             getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
3132                             CurDAG->getRegister(0, MVT::i32) };
3133           Add = CurDAG->getMachineNode(ARM::t2ADDri, dl, MVT::i32, Ops);
3134         } else {
3135           unsigned Opc = (Addend < 1<<3) ? ARM::tADDi3 : ARM::tADDi8;
3136           SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X,
3137                            CurDAG->getTargetConstant(Addend, dl, MVT::i32),
3138                            getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
3139           Add = CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
3140         }
3141       }
3142       if (Add) {
3143         SDValue Ops2[] = {SDValue(Add, 0), CurDAG->getConstant(0, dl, MVT::i32)};
3144         CurDAG->MorphNodeTo(N, ARMISD::CMPZ, CurDAG->getVTList(MVT::Glue), Ops2);
3145       }
3146     }
3147     // Other cases are autogenerated.
3148     break;
3149   }
3150
3151   case ARMISD::CMOV: {
3152     SDValue InFlag = N->getOperand(4);
3153
3154     if (InFlag.getOpcode() == ARMISD::CMPZ) {
3155       bool SwitchEQNEToPLMI;
3156       SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI);
3157
3158       if (SwitchEQNEToPLMI) {
3159         SDValue ARMcc = N->getOperand(2);
3160         ARMCC::CondCodes CC =
3161           (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue();
3162
3163         switch (CC) {
3164         default: llvm_unreachable("CMPZ must be either NE or EQ!");
3165         case ARMCC::NE:
3166           CC = ARMCC::MI;
3167           break;
3168         case ARMCC::EQ:
3169           CC = ARMCC::PL;
3170           break;
3171         }
3172         SDValue NewARMcc = CurDAG->getConstant((unsigned)CC, dl, MVT::i32);
3173         SDValue Ops[] = {N->getOperand(0), N->getOperand(1), NewARMcc,
3174                          N->getOperand(3), N->getOperand(4)};
3175         CurDAG->MorphNodeTo(N, ARMISD::CMOV, N->getVTList(), Ops);
3176       }
3177
3178     }
3179     // Other cases are autogenerated.
3180     break;
3181   }
3182
3183   case ARMISD::VZIP: {
3184     unsigned Opc = 0;
3185     EVT VT = N->getValueType(0);
3186     switch (VT.getSimpleVT().SimpleTy) {
3187     default: return;
3188     case MVT::v8i8:  Opc = ARM::VZIPd8; break;
3189     case MVT::v4f16:
3190     case MVT::v4i16: Opc = ARM::VZIPd16; break;
3191     case MVT::v2f32:
3192     // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
3193     case MVT::v2i32: Opc = ARM::VTRNd32; break;
3194     case MVT::v16i8: Opc = ARM::VZIPq8; break;
3195     case MVT::v8f16:
3196     case MVT::v8i16: Opc = ARM::VZIPq16; break;
3197     case MVT::v4f32:
3198     case MVT::v4i32: Opc = ARM::VZIPq32; break;
3199     }
3200     SDValue Pred = getAL(CurDAG, dl);
3201     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3202     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3203     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3204     return;
3205   }
3206   case ARMISD::VUZP: {
3207     unsigned Opc = 0;
3208     EVT VT = N->getValueType(0);
3209     switch (VT.getSimpleVT().SimpleTy) {
3210     default: return;
3211     case MVT::v8i8:  Opc = ARM::VUZPd8; break;
3212     case MVT::v4f16:
3213     case MVT::v4i16: Opc = ARM::VUZPd16; break;
3214     case MVT::v2f32:
3215     // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
3216     case MVT::v2i32: Opc = ARM::VTRNd32; break;
3217     case MVT::v16i8: Opc = ARM::VUZPq8; break;
3218     case MVT::v8f16:
3219     case MVT::v8i16: Opc = ARM::VUZPq16; break;
3220     case MVT::v4f32:
3221     case MVT::v4i32: Opc = ARM::VUZPq32; break;
3222     }
3223     SDValue Pred = getAL(CurDAG, dl);
3224     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3225     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3226     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3227     return;
3228   }
3229   case ARMISD::VTRN: {
3230     unsigned Opc = 0;
3231     EVT VT = N->getValueType(0);
3232     switch (VT.getSimpleVT().SimpleTy) {
3233     default: return;
3234     case MVT::v8i8:  Opc = ARM::VTRNd8; break;
3235     case MVT::v4f16:
3236     case MVT::v4i16: Opc = ARM::VTRNd16; break;
3237     case MVT::v2f32:
3238     case MVT::v2i32: Opc = ARM::VTRNd32; break;
3239     case MVT::v16i8: Opc = ARM::VTRNq8; break;
3240     case MVT::v8f16:
3241     case MVT::v8i16: Opc = ARM::VTRNq16; break;
3242     case MVT::v4f32:
3243     case MVT::v4i32: Opc = ARM::VTRNq32; break;
3244     }
3245     SDValue Pred = getAL(CurDAG, dl);
3246     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3247     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3248     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3249     return;
3250   }
3251   case ARMISD::BUILD_VECTOR: {
3252     EVT VecVT = N->getValueType(0);
3253     EVT EltVT = VecVT.getVectorElementType();
3254     unsigned NumElts = VecVT.getVectorNumElements();
3255     if (EltVT == MVT::f64) {
3256       assert(NumElts == 2 && "unexpected type for BUILD_VECTOR");
3257       ReplaceNode(
3258           N, createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
3259       return;
3260     }
3261     assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR");
3262     if (NumElts == 2) {
3263       ReplaceNode(
3264           N, createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
3265       return;
3266     }
3267     assert(NumElts == 4 && "unexpected type for BUILD_VECTOR");
3268     ReplaceNode(N,
3269                 createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1),
3270                                     N->getOperand(2), N->getOperand(3)));
3271     return;
3272   }
3273
3274   case ARMISD::VLD1DUP: {
3275     static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8, ARM::VLD1DUPd16,
3276                                          ARM::VLD1DUPd32 };
3277     static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8, ARM::VLD1DUPq16,
3278                                          ARM::VLD1DUPq32 };
3279     SelectVLDDup(N, /* IsIntrinsic= */ false, false, 1, DOpcodes, QOpcodes);
3280     return;
3281   }
3282
3283   case ARMISD::VLD2DUP: {
3284     static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
3285                                         ARM::VLD2DUPd32 };
3286     SelectVLDDup(N, /* IsIntrinsic= */ false, false, 2, Opcodes);
3287     return;
3288   }
3289
3290   case ARMISD::VLD3DUP: {
3291     static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo,
3292                                         ARM::VLD3DUPd16Pseudo,
3293                                         ARM::VLD3DUPd32Pseudo };
3294     SelectVLDDup(N, /* IsIntrinsic= */ false, false, 3, Opcodes);
3295     return;
3296   }
3297
3298   case ARMISD::VLD4DUP: {
3299     static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo,
3300                                         ARM::VLD4DUPd16Pseudo,
3301                                         ARM::VLD4DUPd32Pseudo };
3302     SelectVLDDup(N, /* IsIntrinsic= */ false, false, 4, Opcodes);
3303     return;
3304   }
3305
3306   case ARMISD::VLD1DUP_UPD: {
3307     static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8wb_fixed,
3308                                          ARM::VLD1DUPd16wb_fixed,
3309                                          ARM::VLD1DUPd32wb_fixed };
3310     static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8wb_fixed,
3311                                          ARM::VLD1DUPq16wb_fixed,
3312                                          ARM::VLD1DUPq32wb_fixed };
3313     SelectVLDDup(N, /* IsIntrinsic= */ false, true, 1, DOpcodes, QOpcodes);
3314     return;
3315   }
3316
3317   case ARMISD::VLD2DUP_UPD: {
3318     static const uint16_t Opcodes[] = { ARM::VLD2DUPd8wb_fixed,
3319                                         ARM::VLD2DUPd16wb_fixed,
3320                                         ARM::VLD2DUPd32wb_fixed };
3321     SelectVLDDup(N, /* IsIntrinsic= */ false, true, 2, Opcodes);
3322     return;
3323   }
3324
3325   case ARMISD::VLD3DUP_UPD: {
3326     static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo_UPD,
3327                                         ARM::VLD3DUPd16Pseudo_UPD,
3328                                         ARM::VLD3DUPd32Pseudo_UPD };
3329     SelectVLDDup(N, /* IsIntrinsic= */ false, true, 3, Opcodes);
3330     return;
3331   }
3332
3333   case ARMISD::VLD4DUP_UPD: {
3334     static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo_UPD,
3335                                         ARM::VLD4DUPd16Pseudo_UPD,
3336                                         ARM::VLD4DUPd32Pseudo_UPD };
3337     SelectVLDDup(N, /* IsIntrinsic= */ false, true, 4, Opcodes);
3338     return;
3339   }
3340
3341   case ARMISD::VLD1_UPD: {
3342     static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed,
3343                                          ARM::VLD1d16wb_fixed,
3344                                          ARM::VLD1d32wb_fixed,
3345                                          ARM::VLD1d64wb_fixed };
3346     static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed,
3347                                          ARM::VLD1q16wb_fixed,
3348                                          ARM::VLD1q32wb_fixed,
3349                                          ARM::VLD1q64wb_fixed };
3350     SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr);
3351     return;
3352   }
3353
3354   case ARMISD::VLD2_UPD: {
3355     static const uint16_t DOpcodes[] = { ARM::VLD2d8wb_fixed,
3356                                          ARM::VLD2d16wb_fixed,
3357                                          ARM::VLD2d32wb_fixed,
3358                                          ARM::VLD1q64wb_fixed};
3359     static const uint16_t QOpcodes[] = { ARM::VLD2q8PseudoWB_fixed,
3360                                          ARM::VLD2q16PseudoWB_fixed,
3361                                          ARM::VLD2q32PseudoWB_fixed };
3362     SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr);
3363     return;
3364   }
3365
3366   case ARMISD::VLD3_UPD: {
3367     static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD,
3368                                          ARM::VLD3d16Pseudo_UPD,
3369                                          ARM::VLD3d32Pseudo_UPD,
3370                                          ARM::VLD1d64TPseudoWB_fixed};
3371     static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
3372                                           ARM::VLD3q16Pseudo_UPD,
3373                                           ARM::VLD3q32Pseudo_UPD };
3374     static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
3375                                           ARM::VLD3q16oddPseudo_UPD,
3376                                           ARM::VLD3q32oddPseudo_UPD };
3377     SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
3378     return;
3379   }
3380
3381   case ARMISD::VLD4_UPD: {
3382     static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo_UPD,
3383                                          ARM::VLD4d16Pseudo_UPD,
3384                                          ARM::VLD4d32Pseudo_UPD,
3385                                          ARM::VLD1d64QPseudoWB_fixed};
3386     static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
3387                                           ARM::VLD4q16Pseudo_UPD,
3388                                           ARM::VLD4q32Pseudo_UPD };
3389     static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo_UPD,
3390                                           ARM::VLD4q16oddPseudo_UPD,
3391                                           ARM::VLD4q32oddPseudo_UPD };
3392     SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
3393     return;
3394   }
3395
3396   case ARMISD::VLD2LN_UPD: {
3397     static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD,
3398                                          ARM::VLD2LNd16Pseudo_UPD,
3399                                          ARM::VLD2LNd32Pseudo_UPD };
3400     static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD,
3401                                          ARM::VLD2LNq32Pseudo_UPD };
3402     SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes);
3403     return;
3404   }
3405
3406   case ARMISD::VLD3LN_UPD: {
3407     static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD,
3408                                          ARM::VLD3LNd16Pseudo_UPD,
3409                                          ARM::VLD3LNd32Pseudo_UPD };
3410     static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD,
3411                                          ARM::VLD3LNq32Pseudo_UPD };
3412     SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes);
3413     return;
3414   }
3415
3416   case ARMISD::VLD4LN_UPD: {
3417     static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD,
3418                                          ARM::VLD4LNd16Pseudo_UPD,
3419                                          ARM::VLD4LNd32Pseudo_UPD };
3420     static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD,
3421                                          ARM::VLD4LNq32Pseudo_UPD };
3422     SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes);
3423     return;
3424   }
3425
3426   case ARMISD::VST1_UPD: {
3427     static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed,
3428                                          ARM::VST1d16wb_fixed,
3429                                          ARM::VST1d32wb_fixed,
3430                                          ARM::VST1d64wb_fixed };
3431     static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed,
3432                                          ARM::VST1q16wb_fixed,
3433                                          ARM::VST1q32wb_fixed,
3434                                          ARM::VST1q64wb_fixed };
3435     SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr);
3436     return;
3437   }
3438
3439   case ARMISD::VST2_UPD: {
3440     static const uint16_t DOpcodes[] = { ARM::VST2d8wb_fixed,
3441                                          ARM::VST2d16wb_fixed,
3442                                          ARM::VST2d32wb_fixed,
3443                                          ARM::VST1q64wb_fixed};
3444     static const uint16_t QOpcodes[] = { ARM::VST2q8PseudoWB_fixed,
3445                                          ARM::VST2q16PseudoWB_fixed,
3446                                          ARM::VST2q32PseudoWB_fixed };
3447     SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr);
3448     return;
3449   }
3450
3451   case ARMISD::VST3_UPD: {
3452     static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD,
3453                                          ARM::VST3d16Pseudo_UPD,
3454                                          ARM::VST3d32Pseudo_UPD,
3455                                          ARM::VST1d64TPseudoWB_fixed};
3456     static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
3457                                           ARM::VST3q16Pseudo_UPD,
3458                                           ARM::VST3q32Pseudo_UPD };
3459     static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
3460                                           ARM::VST3q16oddPseudo_UPD,
3461                                           ARM::VST3q32oddPseudo_UPD };
3462     SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
3463     return;
3464   }
3465
3466   case ARMISD::VST4_UPD: {
3467     static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo_UPD,
3468                                          ARM::VST4d16Pseudo_UPD,
3469                                          ARM::VST4d32Pseudo_UPD,
3470                                          ARM::VST1d64QPseudoWB_fixed};
3471     static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
3472                                           ARM::VST4q16Pseudo_UPD,
3473                                           ARM::VST4q32Pseudo_UPD };
3474     static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD,
3475                                           ARM::VST4q16oddPseudo_UPD,
3476                                           ARM::VST4q32oddPseudo_UPD };
3477     SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
3478     return;
3479   }
3480
3481   case ARMISD::VST2LN_UPD: {
3482     static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD,
3483                                          ARM::VST2LNd16Pseudo_UPD,
3484                                          ARM::VST2LNd32Pseudo_UPD };
3485     static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD,
3486                                          ARM::VST2LNq32Pseudo_UPD };
3487     SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes);
3488     return;
3489   }
3490
3491   case ARMISD::VST3LN_UPD: {
3492     static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD,
3493                                          ARM::VST3LNd16Pseudo_UPD,
3494                                          ARM::VST3LNd32Pseudo_UPD };
3495     static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD,
3496                                          ARM::VST3LNq32Pseudo_UPD };
3497     SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes);
3498     return;
3499   }
3500
3501   case ARMISD::VST4LN_UPD: {
3502     static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD,
3503                                          ARM::VST4LNd16Pseudo_UPD,
3504                                          ARM::VST4LNd32Pseudo_UPD };
3505     static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD,
3506                                          ARM::VST4LNq32Pseudo_UPD };
3507     SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes);
3508     return;
3509   }
3510
3511   case ISD::INTRINSIC_VOID:
3512   case ISD::INTRINSIC_W_CHAIN: {
3513     unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
3514     switch (IntNo) {
3515     default:
3516       break;
3517
3518     case Intrinsic::arm_mrrc:
3519     case Intrinsic::arm_mrrc2: {
3520       SDLoc dl(N);
3521       SDValue Chain = N->getOperand(0);
3522       unsigned Opc;
3523
3524       if (Subtarget->isThumb())
3525         Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::t2MRRC : ARM::t2MRRC2);
3526       else
3527         Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::MRRC : ARM::MRRC2);
3528
3529       SmallVector<SDValue, 5> Ops;
3530       Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(), dl)); /* coproc */
3531       Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(), dl)); /* opc */
3532       Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(4))->getZExtValue(), dl)); /* CRm */
3533
3534       // The mrrc2 instruction in ARM doesn't allow predicates, the top 4 bits of the encoded
3535       // instruction will always be '1111' but it is possible in assembly language to specify
3536       // AL as a predicate to mrrc2 but it doesn't make any difference to the encoded instruction.
3537       if (Opc != ARM::MRRC2) {
3538         Ops.push_back(getAL(CurDAG, dl));
3539         Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3540       }
3541
3542       Ops.push_back(Chain);
3543
3544       // Writes to two registers.
3545       const EVT RetType[] = {MVT::i32, MVT::i32, MVT::Other};
3546
3547       ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, RetType, Ops));
3548       return;
3549     }
3550     case Intrinsic::arm_ldaexd:
3551     case Intrinsic::arm_ldrexd: {
3552       SDLoc dl(N);
3553       SDValue Chain = N->getOperand(0);
3554       SDValue MemAddr = N->getOperand(2);
3555       bool isThumb = Subtarget->isThumb() && Subtarget->hasV8MBaselineOps();
3556
3557       bool IsAcquire = IntNo == Intrinsic::arm_ldaexd;
3558       unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD)
3559                                 : (IsAcquire ? ARM::LDAEXD : ARM::LDREXD);
3560
3561       // arm_ldrexd returns a i64 value in {i32, i32}
3562       std::vector<EVT> ResTys;
3563       if (isThumb) {
3564         ResTys.push_back(MVT::i32);
3565         ResTys.push_back(MVT::i32);
3566       } else
3567         ResTys.push_back(MVT::Untyped);
3568       ResTys.push_back(MVT::Other);
3569
3570       // Place arguments in the right order.
3571       SDValue Ops[] = {MemAddr, getAL(CurDAG, dl),
3572                        CurDAG->getRegister(0, MVT::i32), Chain};
3573       SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
3574       // Transfer memoperands.
3575       MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3576       CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
3577
3578       // Remap uses.
3579       SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1);
3580       if (!SDValue(N, 0).use_empty()) {
3581         SDValue Result;
3582         if (isThumb)
3583           Result = SDValue(Ld, 0);
3584         else {
3585           SDValue SubRegIdx =
3586             CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
3587           SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
3588               dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
3589           Result = SDValue(ResNode,0);
3590         }
3591         ReplaceUses(SDValue(N, 0), Result);
3592       }
3593       if (!SDValue(N, 1).use_empty()) {
3594         SDValue Result;
3595         if (isThumb)
3596           Result = SDValue(Ld, 1);
3597         else {
3598           SDValue SubRegIdx =
3599             CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
3600           SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
3601               dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
3602           Result = SDValue(ResNode,0);
3603         }
3604         ReplaceUses(SDValue(N, 1), Result);
3605       }
3606       ReplaceUses(SDValue(N, 2), OutChain);
3607       CurDAG->RemoveDeadNode(N);
3608       return;
3609     }
3610     case Intrinsic::arm_stlexd:
3611     case Intrinsic::arm_strexd: {
3612       SDLoc dl(N);
3613       SDValue Chain = N->getOperand(0);
3614       SDValue Val0 = N->getOperand(2);
3615       SDValue Val1 = N->getOperand(3);
3616       SDValue MemAddr = N->getOperand(4);
3617
3618       // Store exclusive double return a i32 value which is the return status
3619       // of the issued store.
3620       const EVT ResTys[] = {MVT::i32, MVT::Other};
3621
3622       bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
3623       // Place arguments in the right order.
3624       SmallVector<SDValue, 7> Ops;
3625       if (isThumb) {
3626         Ops.push_back(Val0);
3627         Ops.push_back(Val1);
3628       } else
3629         // arm_strexd uses GPRPair.
3630         Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0));
3631       Ops.push_back(MemAddr);
3632       Ops.push_back(getAL(CurDAG, dl));
3633       Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3634       Ops.push_back(Chain);
3635
3636       bool IsRelease = IntNo == Intrinsic::arm_stlexd;
3637       unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD)
3638                                 : (IsRelease ? ARM::STLEXD : ARM::STREXD);
3639
3640       SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
3641       // Transfer memoperands.
3642       MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3643       CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
3644
3645       ReplaceNode(N, St);
3646       return;
3647     }
3648
3649     case Intrinsic::arm_neon_vld1: {
3650       static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
3651                                            ARM::VLD1d32, ARM::VLD1d64 };
3652       static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
3653                                            ARM::VLD1q32, ARM::VLD1q64};
3654       SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr);
3655       return;
3656     }
3657
3658     case Intrinsic::arm_neon_vld1x2: {
3659       static const uint16_t DOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
3660                                            ARM::VLD1q32, ARM::VLD1q64 };
3661       static const uint16_t QOpcodes[] = { ARM::VLD1d8QPseudo,
3662                                            ARM::VLD1d16QPseudo,
3663                                            ARM::VLD1d32QPseudo,
3664                                            ARM::VLD1d64QPseudo };
3665       SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
3666       return;
3667     }
3668
3669     case Intrinsic::arm_neon_vld1x3: {
3670       static const uint16_t DOpcodes[] = { ARM::VLD1d8TPseudo,
3671                                            ARM::VLD1d16TPseudo,
3672                                            ARM::VLD1d32TPseudo,
3673                                            ARM::VLD1d64TPseudo };
3674       static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowTPseudo_UPD,
3675                                             ARM::VLD1q16LowTPseudo_UPD,
3676                                             ARM::VLD1q32LowTPseudo_UPD,
3677                                             ARM::VLD1q64LowTPseudo_UPD };
3678       static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighTPseudo,
3679                                             ARM::VLD1q16HighTPseudo,
3680                                             ARM::VLD1q32HighTPseudo,
3681                                             ARM::VLD1q64HighTPseudo };
3682       SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3683       return;
3684     }
3685
3686     case Intrinsic::arm_neon_vld1x4: {
3687       static const uint16_t DOpcodes[] = { ARM::VLD1d8QPseudo,
3688                                            ARM::VLD1d16QPseudo,
3689                                            ARM::VLD1d32QPseudo,
3690                                            ARM::VLD1d64QPseudo };
3691       static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowQPseudo_UPD,
3692                                             ARM::VLD1q16LowQPseudo_UPD,
3693                                             ARM::VLD1q32LowQPseudo_UPD,
3694                                             ARM::VLD1q64LowQPseudo_UPD };
3695       static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighQPseudo,
3696                                             ARM::VLD1q16HighQPseudo,
3697                                             ARM::VLD1q32HighQPseudo,
3698                                             ARM::VLD1q64HighQPseudo };
3699       SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3700       return;
3701     }
3702
3703     case Intrinsic::arm_neon_vld2: {
3704       static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
3705                                            ARM::VLD2d32, ARM::VLD1q64 };
3706       static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
3707                                            ARM::VLD2q32Pseudo };
3708       SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
3709       return;
3710     }
3711
3712     case Intrinsic::arm_neon_vld3: {
3713       static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo,
3714                                            ARM::VLD3d16Pseudo,
3715                                            ARM::VLD3d32Pseudo,
3716                                            ARM::VLD1d64TPseudo };
3717       static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
3718                                             ARM::VLD3q16Pseudo_UPD,
3719                                             ARM::VLD3q32Pseudo_UPD };
3720       static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo,
3721                                             ARM::VLD3q16oddPseudo,
3722                                             ARM::VLD3q32oddPseudo };
3723       SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3724       return;
3725     }
3726
3727     case Intrinsic::arm_neon_vld4: {
3728       static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo,
3729                                            ARM::VLD4d16Pseudo,
3730                                            ARM::VLD4d32Pseudo,
3731                                            ARM::VLD1d64QPseudo };
3732       static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
3733                                             ARM::VLD4q16Pseudo_UPD,
3734                                             ARM::VLD4q32Pseudo_UPD };
3735       static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo,
3736                                             ARM::VLD4q16oddPseudo,
3737                                             ARM::VLD4q32oddPseudo };
3738       SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3739       return;
3740     }
3741
3742     case Intrinsic::arm_neon_vld2dup: {
3743       static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
3744                                            ARM::VLD2DUPd32, ARM::VLD1q64 };
3745       static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo,
3746                                             ARM::VLD2DUPq16EvenPseudo,
3747                                             ARM::VLD2DUPq32EvenPseudo };
3748       static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudo,
3749                                             ARM::VLD2DUPq16OddPseudo,
3750                                             ARM::VLD2DUPq32OddPseudo };
3751       SelectVLDDup(N, /* IsIntrinsic= */ true, false, 2,
3752                    DOpcodes, QOpcodes0, QOpcodes1);
3753       return;
3754     }
3755
3756     case Intrinsic::arm_neon_vld3dup: {
3757       static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo,
3758                                            ARM::VLD3DUPd16Pseudo,
3759                                            ARM::VLD3DUPd32Pseudo,
3760                                            ARM::VLD1d64TPseudo };
3761       static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo,
3762                                             ARM::VLD3DUPq16EvenPseudo,
3763                                             ARM::VLD3DUPq32EvenPseudo };
3764       static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo,
3765                                             ARM::VLD3DUPq16OddPseudo,
3766                                             ARM::VLD3DUPq32OddPseudo };
3767       SelectVLDDup(N, /* IsIntrinsic= */ true, false, 3,
3768                    DOpcodes, QOpcodes0, QOpcodes1);
3769       return;
3770     }
3771
3772     case Intrinsic::arm_neon_vld4dup: {
3773       static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo,
3774                                            ARM::VLD4DUPd16Pseudo,
3775                                            ARM::VLD4DUPd32Pseudo,
3776                                            ARM::VLD1d64QPseudo };
3777       static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo,
3778                                             ARM::VLD4DUPq16EvenPseudo,
3779                                             ARM::VLD4DUPq32EvenPseudo };
3780       static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo,
3781                                             ARM::VLD4DUPq16OddPseudo,
3782                                             ARM::VLD4DUPq32OddPseudo };
3783       SelectVLDDup(N, /* IsIntrinsic= */ true, false, 4,
3784                    DOpcodes, QOpcodes0, QOpcodes1);
3785       return;
3786     }
3787
3788     case Intrinsic::arm_neon_vld2lane: {
3789       static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo,
3790                                            ARM::VLD2LNd16Pseudo,
3791                                            ARM::VLD2LNd32Pseudo };
3792       static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo,
3793                                            ARM::VLD2LNq32Pseudo };
3794       SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes);
3795       return;
3796     }
3797
3798     case Intrinsic::arm_neon_vld3lane: {
3799       static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo,
3800                                            ARM::VLD3LNd16Pseudo,
3801                                            ARM::VLD3LNd32Pseudo };
3802       static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo,
3803                                            ARM::VLD3LNq32Pseudo };
3804       SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes);
3805       return;
3806     }
3807
3808     case Intrinsic::arm_neon_vld4lane: {
3809       static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo,
3810                                            ARM::VLD4LNd16Pseudo,
3811                                            ARM::VLD4LNd32Pseudo };
3812       static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo,
3813                                            ARM::VLD4LNq32Pseudo };
3814       SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes);
3815       return;
3816     }
3817
3818     case Intrinsic::arm_neon_vst1: {
3819       static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
3820                                            ARM::VST1d32, ARM::VST1d64 };
3821       static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
3822                                            ARM::VST1q32, ARM::VST1q64 };
3823       SelectVST(N, false, 1, DOpcodes, QOpcodes, nullptr);
3824       return;
3825     }
3826
3827     case Intrinsic::arm_neon_vst1x2: {
3828       static const uint16_t DOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
3829                                            ARM::VST1q32, ARM::VST1q64 };
3830       static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudo,
3831                                            ARM::VST1d16QPseudo,
3832                                            ARM::VST1d32QPseudo,
3833                                            ARM::VST1d64QPseudo };
3834       SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
3835       return;
3836     }
3837
3838     case Intrinsic::arm_neon_vst1x3: {
3839       static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudo,
3840                                            ARM::VST1d16TPseudo,
3841                                            ARM::VST1d32TPseudo,
3842                                            ARM::VST1d64TPseudo };
3843       static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD,
3844                                             ARM::VST1q16LowTPseudo_UPD,
3845                                             ARM::VST1q32LowTPseudo_UPD,
3846                                             ARM::VST1q64LowTPseudo_UPD };
3847       static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo,
3848                                             ARM::VST1q16HighTPseudo,
3849                                             ARM::VST1q32HighTPseudo,
3850                                             ARM::VST1q64HighTPseudo };
3851       SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3852       return;
3853     }
3854
3855     case Intrinsic::arm_neon_vst1x4: {
3856       static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudo,
3857                                            ARM::VST1d16QPseudo,
3858                                            ARM::VST1d32QPseudo,
3859                                            ARM::VST1d64QPseudo };
3860       static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD,
3861                                             ARM::VST1q16LowQPseudo_UPD,
3862                                             ARM::VST1q32LowQPseudo_UPD,
3863                                             ARM::VST1q64LowQPseudo_UPD };
3864       static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo,
3865                                             ARM::VST1q16HighQPseudo,
3866                                             ARM::VST1q32HighQPseudo,
3867                                             ARM::VST1q64HighQPseudo };
3868       SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3869       return;
3870     }
3871
3872     case Intrinsic::arm_neon_vst2: {
3873       static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
3874                                            ARM::VST2d32, ARM::VST1q64 };
3875       static const uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
3876                                            ARM::VST2q32Pseudo };
3877       SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
3878       return;
3879     }
3880
3881     case Intrinsic::arm_neon_vst3: {
3882       static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo,
3883                                            ARM::VST3d16Pseudo,
3884                                            ARM::VST3d32Pseudo,
3885                                            ARM::VST1d64TPseudo };
3886       static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
3887                                             ARM::VST3q16Pseudo_UPD,
3888                                             ARM::VST3q32Pseudo_UPD };
3889       static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo,
3890                                             ARM::VST3q16oddPseudo,
3891                                             ARM::VST3q32oddPseudo };
3892       SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3893       return;
3894     }
3895
3896     case Intrinsic::arm_neon_vst4: {
3897       static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo,
3898                                            ARM::VST4d16Pseudo,
3899                                            ARM::VST4d32Pseudo,
3900                                            ARM::VST1d64QPseudo };
3901       static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
3902                                             ARM::VST4q16Pseudo_UPD,
3903                                             ARM::VST4q32Pseudo_UPD };
3904       static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo,
3905                                             ARM::VST4q16oddPseudo,
3906                                             ARM::VST4q32oddPseudo };
3907       SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3908       return;
3909     }
3910
3911     case Intrinsic::arm_neon_vst2lane: {
3912       static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo,
3913                                            ARM::VST2LNd16Pseudo,
3914                                            ARM::VST2LNd32Pseudo };
3915       static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo,
3916                                            ARM::VST2LNq32Pseudo };
3917       SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes);
3918       return;
3919     }
3920
3921     case Intrinsic::arm_neon_vst3lane: {
3922       static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo,
3923                                            ARM::VST3LNd16Pseudo,
3924                                            ARM::VST3LNd32Pseudo };
3925       static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo,
3926                                            ARM::VST3LNq32Pseudo };
3927       SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes);
3928       return;
3929     }
3930
3931     case Intrinsic::arm_neon_vst4lane: {
3932       static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo,
3933                                            ARM::VST4LNd16Pseudo,
3934                                            ARM::VST4LNd32Pseudo };
3935       static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo,
3936                                            ARM::VST4LNq32Pseudo };
3937       SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes);
3938       return;
3939     }
3940     }
3941     break;
3942   }
3943
3944   case ISD::ATOMIC_CMP_SWAP:
3945     SelectCMP_SWAP(N);
3946     return;
3947   }
3948
3949   SelectCode(N);
3950 }
3951
3952 // Inspect a register string of the form
3953 // cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or
3954 // cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string
3955 // and obtain the integer operands from them, adding these operands to the
3956 // provided vector.
3957 static void getIntOperandsFromRegisterString(StringRef RegString,
3958                                              SelectionDAG *CurDAG,
3959                                              const SDLoc &DL,
3960                                              std::vector<SDValue> &Ops) {
3961   SmallVector<StringRef, 5> Fields;
3962   RegString.split(Fields, ':');
3963
3964   if (Fields.size() > 1) {
3965     bool AllIntFields = true;
3966
3967     for (StringRef Field : Fields) {
3968       // Need to trim out leading 'cp' characters and get the integer field.
3969       unsigned IntField;
3970       AllIntFields &= !Field.trim("CPcp").getAsInteger(10, IntField);
3971       Ops.push_back(CurDAG->getTargetConstant(IntField, DL, MVT::i32));
3972     }
3973
3974     assert(AllIntFields &&
3975             "Unexpected non-integer value in special register string.");
3976   }
3977 }
3978
3979 // Maps a Banked Register string to its mask value. The mask value returned is
3980 // for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register
3981 // mask operand, which expresses which register is to be used, e.g. r8, and in
3982 // which mode it is to be used, e.g. usr. Returns -1 to signify that the string
3983 // was invalid.
3984 static inline int getBankedRegisterMask(StringRef RegString) {
3985   auto TheReg = ARMBankedReg::lookupBankedRegByName(RegString.lower());
3986   if (!TheReg)
3987      return -1;
3988   return TheReg->Encoding;
3989 }
3990
3991 // The flags here are common to those allowed for apsr in the A class cores and
3992 // those allowed for the special registers in the M class cores. Returns a
3993 // value representing which flags were present, -1 if invalid.
3994 static inline int getMClassFlagsMask(StringRef Flags) {
3995   return StringSwitch<int>(Flags)
3996           .Case("", 0x2) // no flags means nzcvq for psr registers, and 0x2 is
3997                          // correct when flags are not permitted
3998           .Case("g", 0x1)
3999           .Case("nzcvq", 0x2)
4000           .Case("nzcvqg", 0x3)
4001           .Default(-1);
4002 }
4003
4004 // Maps MClass special registers string to its value for use in the
4005 // t2MRS_M/t2MSR_M instruction nodes as the SYSm value operand.
4006 // Returns -1 to signify that the string was invalid.
4007 static int getMClassRegisterMask(StringRef Reg, const ARMSubtarget *Subtarget) {
4008   auto TheReg = ARMSysReg::lookupMClassSysRegByName(Reg);
4009   const FeatureBitset &FeatureBits = Subtarget->getFeatureBits();
4010   if (!TheReg || !TheReg->hasRequiredFeatures(FeatureBits))
4011     return -1;
4012   return (int)(TheReg->Encoding & 0xFFF); // SYSm value
4013 }
4014
4015 static int getARClassRegisterMask(StringRef Reg, StringRef Flags) {
4016   // The mask operand contains the special register (R Bit) in bit 4, whether
4017   // the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and
4018   // bits 3-0 contains the fields to be accessed in the special register, set by
4019   // the flags provided with the register.
4020   int Mask = 0;
4021   if (Reg == "apsr") {
4022     // The flags permitted for apsr are the same flags that are allowed in
4023     // M class registers. We get the flag value and then shift the flags into
4024     // the correct place to combine with the mask.
4025     Mask = getMClassFlagsMask(Flags);
4026     if (Mask == -1)
4027       return -1;
4028     return Mask << 2;
4029   }
4030
4031   if (Reg != "cpsr" && Reg != "spsr") {
4032     return -1;
4033   }
4034
4035   // This is the same as if the flags were "fc"
4036   if (Flags.empty() || Flags == "all")
4037     return Mask | 0x9;
4038
4039   // Inspect the supplied flags string and set the bits in the mask for
4040   // the relevant and valid flags allowed for cpsr and spsr.
4041   for (char Flag : Flags) {
4042     int FlagVal;
4043     switch (Flag) {
4044       case 'c':
4045         FlagVal = 0x1;
4046         break;
4047       case 'x':
4048         FlagVal = 0x2;
4049         break;
4050       case 's':
4051         FlagVal = 0x4;
4052         break;
4053       case 'f':
4054         FlagVal = 0x8;
4055         break;
4056       default:
4057         FlagVal = 0;
4058     }
4059
4060     // This avoids allowing strings where the same flag bit appears twice.
4061     if (!FlagVal || (Mask & FlagVal))
4062       return -1;
4063     Mask |= FlagVal;
4064   }
4065
4066   // If the register is spsr then we need to set the R bit.
4067   if (Reg == "spsr")
4068     Mask |= 0x10;
4069
4070   return Mask;
4071 }
4072
4073 // Lower the read_register intrinsic to ARM specific DAG nodes
4074 // using the supplied metadata string to select the instruction node to use
4075 // and the registers/masks to construct as operands for the node.
4076 bool ARMDAGToDAGISel::tryReadRegister(SDNode *N){
4077   const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
4078   const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
4079   bool IsThumb2 = Subtarget->isThumb2();
4080   SDLoc DL(N);
4081
4082   std::vector<SDValue> Ops;
4083   getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
4084
4085   if (!Ops.empty()) {
4086     // If the special register string was constructed of fields (as defined
4087     // in the ACLE) then need to lower to MRC node (32 bit) or
4088     // MRRC node(64 bit), we can make the distinction based on the number of
4089     // operands we have.
4090     unsigned Opcode;
4091     SmallVector<EVT, 3> ResTypes;
4092     if (Ops.size() == 5){
4093       Opcode = IsThumb2 ? ARM::t2MRC : ARM::MRC;
4094       ResTypes.append({ MVT::i32, MVT::Other });
4095     } else {
4096       assert(Ops.size() == 3 &&
4097               "Invalid number of fields in special register string.");
4098       Opcode = IsThumb2 ? ARM::t2MRRC : ARM::MRRC;
4099       ResTypes.append({ MVT::i32, MVT::i32, MVT::Other });
4100     }
4101
4102     Ops.push_back(getAL(CurDAG, DL));
4103     Ops.push_back(CurDAG->getRegister(0, MVT::i32));
4104     Ops.push_back(N->getOperand(0));
4105     ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, ResTypes, Ops));
4106     return true;
4107   }
4108
4109   std::string SpecialReg = RegString->getString().lower();
4110
4111   int BankedReg = getBankedRegisterMask(SpecialReg);
4112   if (BankedReg != -1) {
4113     Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32),
4114             getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4115             N->getOperand(0) };
4116     ReplaceNode(
4117         N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked,
4118                                   DL, MVT::i32, MVT::Other, Ops));
4119     return true;
4120   }
4121
4122   // The VFP registers are read by creating SelectionDAG nodes with opcodes
4123   // corresponding to the register that is being read from. So we switch on the
4124   // string to find which opcode we need to use.
4125   unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
4126                     .Case("fpscr", ARM::VMRS)
4127                     .Case("fpexc", ARM::VMRS_FPEXC)
4128                     .Case("fpsid", ARM::VMRS_FPSID)
4129                     .Case("mvfr0", ARM::VMRS_MVFR0)
4130                     .Case("mvfr1", ARM::VMRS_MVFR1)
4131                     .Case("mvfr2", ARM::VMRS_MVFR2)
4132                     .Case("fpinst", ARM::VMRS_FPINST)
4133                     .Case("fpinst2", ARM::VMRS_FPINST2)
4134                     .Default(0);
4135
4136   // If an opcode was found then we can lower the read to a VFP instruction.
4137   if (Opcode) {
4138     if (!Subtarget->hasVFP2Base())
4139       return false;
4140     if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8Base())
4141       return false;
4142
4143     Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4144             N->getOperand(0) };
4145     ReplaceNode(N,
4146                 CurDAG->getMachineNode(Opcode, DL, MVT::i32, MVT::Other, Ops));
4147     return true;
4148   }
4149
4150   // If the target is M Class then need to validate that the register string
4151   // is an acceptable value, so check that a mask can be constructed from the
4152   // string.
4153   if (Subtarget->isMClass()) {
4154     int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget);
4155     if (SYSmValue == -1)
4156       return false;
4157
4158     SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
4159                       getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4160                       N->getOperand(0) };
4161     ReplaceNode(
4162         N, CurDAG->getMachineNode(ARM::t2MRS_M, DL, MVT::i32, MVT::Other, Ops));
4163     return true;
4164   }
4165
4166   // Here we know the target is not M Class so we need to check if it is one
4167   // of the remaining possible values which are apsr, cpsr or spsr.
4168   if (SpecialReg == "apsr" || SpecialReg == "cpsr") {
4169     Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4170             N->getOperand(0) };
4171     ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRS_AR : ARM::MRS,
4172                                           DL, MVT::i32, MVT::Other, Ops));
4173     return true;
4174   }
4175
4176   if (SpecialReg == "spsr") {
4177     Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4178             N->getOperand(0) };
4179     ReplaceNode(
4180         N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys, DL,
4181                                   MVT::i32, MVT::Other, Ops));
4182     return true;
4183   }
4184
4185   return false;
4186 }
4187
4188 // Lower the write_register intrinsic to ARM specific DAG nodes
4189 // using the supplied metadata string to select the instruction node to use
4190 // and the registers/masks to use in the nodes
4191 bool ARMDAGToDAGISel::tryWriteRegister(SDNode *N){
4192   const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
4193   const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
4194   bool IsThumb2 = Subtarget->isThumb2();
4195   SDLoc DL(N);
4196
4197   std::vector<SDValue> Ops;
4198   getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
4199
4200   if (!Ops.empty()) {
4201     // If the special register string was constructed of fields (as defined
4202     // in the ACLE) then need to lower to MCR node (32 bit) or
4203     // MCRR node(64 bit), we can make the distinction based on the number of
4204     // operands we have.
4205     unsigned Opcode;
4206     if (Ops.size() == 5) {
4207       Opcode = IsThumb2 ? ARM::t2MCR : ARM::MCR;
4208       Ops.insert(Ops.begin()+2, N->getOperand(2));
4209     } else {
4210       assert(Ops.size() == 3 &&
4211               "Invalid number of fields in special register string.");
4212       Opcode = IsThumb2 ? ARM::t2MCRR : ARM::MCRR;
4213       SDValue WriteValue[] = { N->getOperand(2), N->getOperand(3) };
4214       Ops.insert(Ops.begin()+2, WriteValue, WriteValue+2);
4215     }
4216
4217     Ops.push_back(getAL(CurDAG, DL));
4218     Ops.push_back(CurDAG->getRegister(0, MVT::i32));
4219     Ops.push_back(N->getOperand(0));
4220
4221     ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
4222     return true;
4223   }
4224
4225   std::string SpecialReg = RegString->getString().lower();
4226   int BankedReg = getBankedRegisterMask(SpecialReg);
4227   if (BankedReg != -1) {
4228     Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), N->getOperand(2),
4229             getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4230             N->getOperand(0) };
4231     ReplaceNode(
4232         N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked,
4233                                   DL, MVT::Other, Ops));
4234     return true;
4235   }
4236
4237   // The VFP registers are written to by creating SelectionDAG nodes with
4238   // opcodes corresponding to the register that is being written. So we switch
4239   // on the string to find which opcode we need to use.
4240   unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
4241                     .Case("fpscr", ARM::VMSR)
4242                     .Case("fpexc", ARM::VMSR_FPEXC)
4243                     .Case("fpsid", ARM::VMSR_FPSID)
4244                     .Case("fpinst", ARM::VMSR_FPINST)
4245                     .Case("fpinst2", ARM::VMSR_FPINST2)
4246                     .Default(0);
4247
4248   if (Opcode) {
4249     if (!Subtarget->hasVFP2Base())
4250       return false;
4251     Ops = { N->getOperand(2), getAL(CurDAG, DL),
4252             CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
4253     ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
4254     return true;
4255   }
4256
4257   std::pair<StringRef, StringRef> Fields;
4258   Fields = StringRef(SpecialReg).rsplit('_');
4259   std::string Reg = Fields.first.str();
4260   StringRef Flags = Fields.second;
4261
4262   // If the target was M Class then need to validate the special register value
4263   // and retrieve the mask for use in the instruction node.
4264   if (Subtarget->isMClass()) {
4265     int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget);
4266     if (SYSmValue == -1)
4267       return false;
4268
4269     SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
4270                       N->getOperand(2), getAL(CurDAG, DL),
4271                       CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
4272     ReplaceNode(N, CurDAG->getMachineNode(ARM::t2MSR_M, DL, MVT::Other, Ops));
4273     return true;
4274   }
4275
4276   // We then check to see if a valid mask can be constructed for one of the
4277   // register string values permitted for the A and R class cores. These values
4278   // are apsr, spsr and cpsr; these are also valid on older cores.
4279   int Mask = getARClassRegisterMask(Reg, Flags);
4280   if (Mask != -1) {
4281     Ops = { CurDAG->getTargetConstant(Mask, DL, MVT::i32), N->getOperand(2),
4282             getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4283             N->getOperand(0) };
4284     ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSR_AR : ARM::MSR,
4285                                           DL, MVT::Other, Ops));
4286     return true;
4287   }
4288
4289   return false;
4290 }
4291
4292 bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){
4293   std::vector<SDValue> AsmNodeOperands;
4294   unsigned Flag, Kind;
4295   bool Changed = false;
4296   unsigned NumOps = N->getNumOperands();
4297
4298   // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint.
4299   // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require
4300   // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs
4301   // respectively. Since there is no constraint to explicitly specify a
4302   // reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb,
4303   // the 64-bit data may be referred by H, Q, R modifiers, so we still pack
4304   // them into a GPRPair.
4305
4306   SDLoc dl(N);
4307   SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps-1)
4308                                    : SDValue(nullptr,0);
4309
4310   SmallVector<bool, 8> OpChanged;
4311   // Glue node will be appended late.
4312   for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) {
4313     SDValue op = N->getOperand(i);
4314     AsmNodeOperands.push_back(op);
4315
4316     if (i < InlineAsm::Op_FirstOperand)
4317       continue;
4318
4319     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(i))) {
4320       Flag = C->getZExtValue();
4321       Kind = InlineAsm::getKind(Flag);
4322     }
4323     else
4324       continue;
4325
4326     // Immediate operands to inline asm in the SelectionDAG are modeled with
4327     // two operands. The first is a constant of value InlineAsm::Kind_Imm, and
4328     // the second is a constant with the value of the immediate. If we get here
4329     // and we have a Kind_Imm, skip the next operand, and continue.
4330     if (Kind == InlineAsm::Kind_Imm) {
4331       SDValue op = N->getOperand(++i);
4332       AsmNodeOperands.push_back(op);
4333       continue;
4334     }
4335
4336     unsigned NumRegs = InlineAsm::getNumOperandRegisters(Flag);
4337     if (NumRegs)
4338       OpChanged.push_back(false);
4339
4340     unsigned DefIdx = 0;
4341     bool IsTiedToChangedOp = false;
4342     // If it's a use that is tied with a previous def, it has no
4343     // reg class constraint.
4344     if (Changed && InlineAsm::isUseOperandTiedToDef(Flag, DefIdx))
4345       IsTiedToChangedOp = OpChanged[DefIdx];
4346
4347     // Memory operands to inline asm in the SelectionDAG are modeled with two
4348     // operands: a constant of value InlineAsm::Kind_Mem followed by the input
4349     // operand. If we get here and we have a Kind_Mem, skip the next operand (so
4350     // it doesn't get misinterpreted), and continue. We do this here because
4351     // it's important to update the OpChanged array correctly before moving on.
4352     if (Kind == InlineAsm::Kind_Mem) {
4353       SDValue op = N->getOperand(++i);
4354       AsmNodeOperands.push_back(op);
4355       continue;
4356     }
4357
4358     if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef
4359         && Kind != InlineAsm::Kind_RegDefEarlyClobber)
4360       continue;
4361
4362     unsigned RC;
4363     bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC);
4364     if ((!IsTiedToChangedOp && (!HasRC || RC != ARM::GPRRegClassID))
4365         || NumRegs != 2)
4366       continue;
4367
4368     assert((i+2 < NumOps) && "Invalid number of operands in inline asm");
4369     SDValue V0 = N->getOperand(i+1);
4370     SDValue V1 = N->getOperand(i+2);
4371     unsigned Reg0 = cast<RegisterSDNode>(V0)->getReg();
4372     unsigned Reg1 = cast<RegisterSDNode>(V1)->getReg();
4373     SDValue PairedReg;
4374     MachineRegisterInfo &MRI = MF->getRegInfo();
4375
4376     if (Kind == InlineAsm::Kind_RegDef ||
4377         Kind == InlineAsm::Kind_RegDefEarlyClobber) {
4378       // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to
4379       // the original GPRs.
4380
4381       unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
4382       PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
4383       SDValue Chain = SDValue(N,0);
4384
4385       SDNode *GU = N->getGluedUser();
4386       SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped,
4387                                                Chain.getValue(1));
4388
4389       // Extract values from a GPRPair reg and copy to the original GPR reg.
4390       SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32,
4391                                                     RegCopy);
4392       SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,
4393                                                     RegCopy);
4394       SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0,
4395                                         RegCopy.getValue(1));
4396       SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1));
4397
4398       // Update the original glue user.
4399       std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1);
4400       Ops.push_back(T1.getValue(1));
4401       CurDAG->UpdateNodeOperands(GU, Ops);
4402     }
4403     else {
4404       // For Kind  == InlineAsm::Kind_RegUse, we first copy two GPRs into a
4405       // GPRPair and then pass the GPRPair to the inline asm.
4406       SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain];
4407
4408       // As REG_SEQ doesn't take RegisterSDNode, we copy them first.
4409       SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32,
4410                                           Chain.getValue(1));
4411       SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32,
4412                                           T0.getValue(1));
4413       SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), 0);
4414
4415       // Copy REG_SEQ into a GPRPair-typed VR and replace the original two
4416       // i32 VRs of inline asm with it.
4417       unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
4418       PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
4419       Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1));
4420
4421       AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
4422       Glue = Chain.getValue(1);
4423     }
4424
4425     Changed = true;
4426
4427     if(PairedReg.getNode()) {
4428       OpChanged[OpChanged.size() -1 ] = true;
4429       Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/);
4430       if (IsTiedToChangedOp)
4431         Flag = InlineAsm::getFlagWordForMatchingOp(Flag, DefIdx);
4432       else
4433         Flag = InlineAsm::getFlagWordForRegClass(Flag, ARM::GPRPairRegClassID);
4434       // Replace the current flag.
4435       AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant(
4436           Flag, dl, MVT::i32);
4437       // Add the new register node and skip the original two GPRs.
4438       AsmNodeOperands.push_back(PairedReg);
4439       // Skip the next two GPRs.
4440       i += 2;
4441     }
4442   }
4443
4444   if (Glue.getNode())
4445     AsmNodeOperands.push_back(Glue);
4446   if (!Changed)
4447     return false;
4448
4449   SDValue New = CurDAG->getNode(N->getOpcode(), SDLoc(N),
4450       CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands);
4451   New->setNodeId(-1);
4452   ReplaceNode(N, New.getNode());
4453   return true;
4454 }
4455
4456
4457 bool ARMDAGToDAGISel::
4458 SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
4459                              std::vector<SDValue> &OutOps) {
4460   switch(ConstraintID) {
4461   default:
4462     llvm_unreachable("Unexpected asm memory constraint");
4463   case InlineAsm::Constraint_i:
4464     // FIXME: It seems strange that 'i' is needed here since it's supposed to
4465     //        be an immediate and not a memory constraint.
4466     LLVM_FALLTHROUGH;
4467   case InlineAsm::Constraint_m:
4468   case InlineAsm::Constraint_o:
4469   case InlineAsm::Constraint_Q:
4470   case InlineAsm::Constraint_Um:
4471   case InlineAsm::Constraint_Un:
4472   case InlineAsm::Constraint_Uq:
4473   case InlineAsm::Constraint_Us:
4474   case InlineAsm::Constraint_Ut:
4475   case InlineAsm::Constraint_Uv:
4476   case InlineAsm::Constraint_Uy:
4477     // Require the address to be in a register.  That is safe for all ARM
4478     // variants and it is hard to do anything much smarter without knowing
4479     // how the operand is used.
4480     OutOps.push_back(Op);
4481     return false;
4482   }
4483   return true;
4484 }
4485
4486 /// createARMISelDag - This pass converts a legalized DAG into a
4487 /// ARM-specific DAG, ready for instruction scheduling.
4488 ///
4489 FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM,
4490                                      CodeGenOpt::Level OptLevel) {
4491   return new ARMDAGToDAGISel(TM, OptLevel);
4492 }