lib/Target/ARM/ARMISelDAGToDAG.cpp

   1 //===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This file defines an instruction selector for the ARM target.
  10 //
  11 //===----------------------------------------------------------------------===//
  12
  13 #include "ARM.h"
  14 #include "ARMBaseInstrInfo.h"
  15 #include "ARMTargetMachine.h"
  16 #include "MCTargetDesc/ARMAddressingModes.h"
  17 #include "Utils/ARMBaseInfo.h"
  18 #include "llvm/ADT/StringSwitch.h"
  19 #include "llvm/CodeGen/MachineFrameInfo.h"
  20 #include "llvm/CodeGen/MachineFunction.h"
  21 #include "llvm/CodeGen/MachineInstrBuilder.h"
  22 #include "llvm/CodeGen/MachineRegisterInfo.h"
  23 #include "llvm/CodeGen/SelectionDAG.h"
  24 #include "llvm/CodeGen/SelectionDAGISel.h"
  25 #include "llvm/CodeGen/TargetLowering.h"
  26 #include "llvm/IR/CallingConv.h"
  27 #include "llvm/IR/Constants.h"
  28 #include "llvm/IR/DerivedTypes.h"
  29 #include "llvm/IR/Function.h"
  30 #include "llvm/IR/Intrinsics.h"
  31 #include "llvm/IR/LLVMContext.h"
  32 #include "llvm/Support/CommandLine.h"
  33 #include "llvm/Support/Debug.h"
  34 #include "llvm/Support/ErrorHandling.h"
  35 #include "llvm/Target/TargetOptions.h"
  36
  37 using namespace llvm;
  38
  39 #define DEBUG_TYPE "arm-isel"
  40
  41 static cl::opt<bool>
  42 DisableShifterOp("disable-shifter-op", cl::Hidden,
  43   cl::desc("Disable isel of shifter-op"),
  44   cl::init(false));
  45
  46 //===--------------------------------------------------------------------===//
  47 /// ARMDAGToDAGISel - ARM specific code to select ARM machine
  48 /// instructions for SelectionDAG operations.
  49 ///
  50 namespace {
  51
  52 class ARMDAGToDAGISel : public SelectionDAGISel {
  53   /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
  54   /// make the right decision when generating code for different targets.
  55   const ARMSubtarget *Subtarget;
  56
  57 public:
  58   explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOpt::Level OptLevel)
  59       : SelectionDAGISel(tm, OptLevel) {}
  60
  61   bool runOnMachineFunction(MachineFunction &MF) override {
  62     // Reset the subtarget each time through.
  63     Subtarget = &MF.getSubtarget<ARMSubtarget>();
  64     SelectionDAGISel::runOnMachineFunction(MF);
  65     return true;
  66   }
  67
  68   StringRef getPassName() const override { return "ARM Instruction Selection"; }
  69
  70   void PreprocessISelDAG() override;
  71
  72   /// getI32Imm - Return a target constant of type i32 with the specified
  73   /// value.
  74   inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
  75     return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
  76   }
  77
  78   void Select(SDNode *N) override;
  79
  80   bool hasNoVMLxHazardUse(SDNode *N) const;
  81   bool isShifterOpProfitable(const SDValue &Shift,
  82                              ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt);
  83   bool SelectRegShifterOperand(SDValue N, SDValue &A,
  84                                SDValue &B, SDValue &C,
  85                                bool CheckProfitability = true);
  86   bool SelectImmShifterOperand(SDValue N, SDValue &A,
  87                                SDValue &B, bool CheckProfitability = true);
  88   bool SelectShiftRegShifterOperand(SDValue N, SDValue &A,
  89                                     SDValue &B, SDValue &C) {
  90     // Don't apply the profitability check
  91     return SelectRegShifterOperand(N, A, B, C, false);
  92   }
  93   bool SelectShiftImmShifterOperand(SDValue N, SDValue &A,
  94                                     SDValue &B) {
  95     // Don't apply the profitability check
  96     return SelectImmShifterOperand(N, A, B, false);
  97   }
  98
  99   bool SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out);
 100
 101   bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
 102   bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);
 103
 104   bool SelectCMOVPred(SDValue N, SDValue &Pred, SDValue &Reg) {
 105     const ConstantSDNode *CN = cast<ConstantSDNode>(N);
 106     Pred = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(N), MVT::i32);
 107     Reg = CurDAG->getRegister(ARM::CPSR, MVT::i32);
 108     return true;
 109   }
 110
 111   bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
 112                              SDValue &Offset, SDValue &Opc);
 113   bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
 114                              SDValue &Offset, SDValue &Opc);
 115   bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
 116                              SDValue &Offset, SDValue &Opc);
 117   bool SelectAddrOffsetNone(SDValue N, SDValue &Base);
 118   bool SelectAddrMode3(SDValue N, SDValue &Base,
 119                        SDValue &Offset, SDValue &Opc);
 120   bool SelectAddrMode3Offset(SDNode *Op, SDValue N,
 121                              SDValue &Offset, SDValue &Opc);
 122   bool IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset, bool FP16);
 123   bool SelectAddrMode5(SDValue N, SDValue &Base, SDValue &Offset);
 124   bool SelectAddrMode5FP16(SDValue N, SDValue &Base, SDValue &Offset);
 125   bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align);
 126   bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset);
 127
 128   bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label);
 129
 130   // Thumb Addressing Modes:
 131   bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset);
 132   bool SelectThumbAddrModeRRSext(SDValue N, SDValue &Base, SDValue &Offset);
 133   bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base,
 134                                 SDValue &OffImm);
 135   bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
 136                                  SDValue &OffImm);
 137   bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
 138                                  SDValue &OffImm);
 139   bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
 140                                  SDValue &OffImm);
 141   bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm);
 142
 143   // Thumb 2 Addressing Modes:
 144   bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
 145   bool SelectT2AddrModeImm8(SDValue N, SDValue &Base,
 146                             SDValue &OffImm);
 147   bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
 148                                  SDValue &OffImm);
 149   bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base,
 150                              SDValue &OffReg, SDValue &ShImm);
 151   bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm);
 152
 153   inline bool is_so_imm(unsigned Imm) const {
 154     return ARM_AM::getSOImmVal(Imm) != -1;
 155   }
 156
 157   inline bool is_so_imm_not(unsigned Imm) const {
 158     return ARM_AM::getSOImmVal(~Imm) != -1;
 159   }
 160
 161   inline bool is_t2_so_imm(unsigned Imm) const {
 162     return ARM_AM::getT2SOImmVal(Imm) != -1;
 163   }
 164
 165   inline bool is_t2_so_imm_not(unsigned Imm) const {
 166     return ARM_AM::getT2SOImmVal(~Imm) != -1;
 167   }
 168
 169   // Include the pieces autogenerated from the target description.
 170 #include "ARMGenDAGISel.inc"
 171
 172 private:
 173   void transferMemOperands(SDNode *Src, SDNode *Dst);
 174
 175   /// Indexed (pre/post inc/dec) load matching code for ARM.
 176   bool tryARMIndexedLoad(SDNode *N);
 177   bool tryT1IndexedLoad(SDNode *N);
 178   bool tryT2IndexedLoad(SDNode *N);
 179
 180   /// SelectVLD - Select NEON load intrinsics.  NumVecs should be
 181   /// 1, 2, 3 or 4.  The opcode arrays specify the instructions used for
 182   /// loads of D registers and even subregs and odd subregs of Q registers.
 183   /// For NumVecs <= 2, QOpcodes1 is not used.
 184   void SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
 185                  const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
 186                  const uint16_t *QOpcodes1);
 187
 188   /// SelectVST - Select NEON store intrinsics.  NumVecs should
 189   /// be 1, 2, 3 or 4.  The opcode arrays specify the instructions used for
 190   /// stores of D registers and even subregs and odd subregs of Q registers.
 191   /// For NumVecs <= 2, QOpcodes1 is not used.
 192   void SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
 193                  const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
 194                  const uint16_t *QOpcodes1);
 195
 196   /// SelectVLDSTLane - Select NEON load/store lane intrinsics.  NumVecs should
 197   /// be 2, 3 or 4.  The opcode arrays specify the instructions used for
 198   /// load/store of D registers and Q registers.
 199   void SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
 200                        unsigned NumVecs, const uint16_t *DOpcodes,
 201                        const uint16_t *QOpcodes);
 202
 203   /// SelectVLDDup - Select NEON load-duplicate intrinsics.  NumVecs
 204   /// should be 1, 2, 3 or 4.  The opcode array specifies the instructions used
 205   /// for loading D registers.
 206   void SelectVLDDup(SDNode *N, bool IsIntrinsic, bool isUpdating,
 207                     unsigned NumVecs, const uint16_t *DOpcodes,
 208                     const uint16_t *QOpcodes0 = nullptr,
 209                     const uint16_t *QOpcodes1 = nullptr);
 210
 211   /// Try to select SBFX/UBFX instructions for ARM.
 212   bool tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned);
 213
 214   // Select special operations if node forms integer ABS pattern
 215   bool tryABSOp(SDNode *N);
 216
 217   bool tryReadRegister(SDNode *N);
 218   bool tryWriteRegister(SDNode *N);
 219
 220   bool tryInlineAsm(SDNode *N);
 221
 222   void SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI);
 223
 224   void SelectCMP_SWAP(SDNode *N);
 225
 226   /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
 227   /// inline asm expressions.
 228   bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
 229                                     std::vector<SDValue> &OutOps) override;
 230
 231   // Form pairs of consecutive R, S, D, or Q registers.
 232   SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1);
 233   SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1);
 234   SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1);
 235   SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1);
 236
 237   // Form sequences of 4 consecutive S, D, or Q registers.
 238   SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
 239   SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
 240   SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
 241
 242   // Get the alignment operand for a NEON VLD or VST instruction.
 243   SDValue GetVLDSTAlign(SDValue Align, const SDLoc &dl, unsigned NumVecs,
 244                         bool is64BitVector);
 245
 246   /// Returns the number of instructions required to materialize the given
 247   /// constant in a register, or 3 if a literal pool load is needed.
 248   unsigned ConstantMaterializationCost(unsigned Val) const;
 249
 250   /// Checks if N is a multiplication by a constant where we can extract out a
 251   /// power of two from the constant so that it can be used in a shift, but only
 252   /// if it simplifies the materialization of the constant. Returns true if it
 253   /// is, and assigns to PowerOfTwo the power of two that should be extracted
 254   /// out and to NewMulConst the new constant to be multiplied by.
 255   bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift,
 256                               unsigned &PowerOfTwo, SDValue &NewMulConst) const;
 257
 258   /// Replace N with M in CurDAG, in a way that also ensures that M gets
 259   /// selected when N would have been selected.
 260   void replaceDAGValue(const SDValue &N, SDValue M);
 261 };
 262 }
 263
 264 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant
 265 /// operand. If so Imm will receive the 32-bit value.
 266 static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
 267   if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
 268     Imm = cast<ConstantSDNode>(N)->getZExtValue();
 269     return true;
 270   }
 271   return false;
 272 }
 273
 274 // isInt32Immediate - This method tests to see if a constant operand.
 275 // If so Imm will receive the 32 bit value.
 276 static bool isInt32Immediate(SDValue N, unsigned &Imm) {
 277   return isInt32Immediate(N.getNode(), Imm);
 278 }
 279
 280 // isOpcWithIntImmediate - This method tests to see if the node is a specific
 281 // opcode and that it has a immediate integer right operand.
 282 // If so Imm will receive the 32 bit value.
 283 static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
 284   return N->getOpcode() == Opc &&
 285          isInt32Immediate(N->getOperand(1).getNode(), Imm);
 286 }
 287
 288 /// Check whether a particular node is a constant value representable as
 289 /// (N * Scale) where (N in [\p RangeMin, \p RangeMax).
 290 ///
 291 /// \param ScaledConstant [out] - On success, the pre-scaled constant value.
 292 static bool isScaledConstantInRange(SDValue Node, int Scale,
 293                                     int RangeMin, int RangeMax,
 294                                     int &ScaledConstant) {
 295   assert(Scale > 0 && "Invalid scale!");
 296
 297   // Check that this is a constant.
 298   const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node);
 299   if (!C)
 300     return false;
 301
 302   ScaledConstant = (int) C->getZExtValue();
 303   if ((ScaledConstant % Scale) != 0)
 304     return false;
 305
 306   ScaledConstant /= Scale;
 307   return ScaledConstant >= RangeMin && ScaledConstant < RangeMax;
 308 }
 309
 310 void ARMDAGToDAGISel::PreprocessISelDAG() {
 311   if (!Subtarget->hasV6T2Ops())
 312     return;
 313
 314   bool isThumb2 = Subtarget->isThumb();
 315   for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
 316        E = CurDAG->allnodes_end(); I != E; ) {
 317     SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues.
 318
 319     if (N->getOpcode() != ISD::ADD)
 320       continue;
 321
 322     // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with
 323     // leading zeros, followed by consecutive set bits, followed by 1 or 2
 324     // trailing zeros, e.g. 1020.
 325     // Transform the expression to
 326     // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number
 327     // of trailing zeros of c2. The left shift would be folded as an shifter
 328     // operand of 'add' and the 'and' and 'srl' would become a bits extraction
 329     // node (UBFX).
 330
 331     SDValue N0 = N->getOperand(0);
 332     SDValue N1 = N->getOperand(1);
 333     unsigned And_imm = 0;
 334     if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) {
 335       if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm))
 336         std::swap(N0, N1);
 337     }
 338     if (!And_imm)
 339       continue;
 340
 341     // Check if the AND mask is an immediate of the form: 000.....1111111100
 342     unsigned TZ = countTrailingZeros(And_imm);
 343     if (TZ != 1 && TZ != 2)
 344       // Be conservative here. Shifter operands aren't always free. e.g. On
 345       // Swift, left shifter operand of 1 / 2 for free but others are not.
 346       // e.g.
 347       //  ubfx   r3, r1, #16, #8
 348       //  ldr.w  r3, [r0, r3, lsl #2]
 349       // vs.
 350       //  mov.w  r9, #1020
 351       //  and.w  r2, r9, r1, lsr #14
 352       //  ldr    r2, [r0, r2]
 353       continue;
 354     And_imm >>= TZ;
 355     if (And_imm & (And_imm + 1))
 356       continue;
 357
 358     // Look for (and (srl X, c1), c2).
 359     SDValue Srl = N1.getOperand(0);
 360     unsigned Srl_imm = 0;
 361     if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) ||
 362         (Srl_imm <= 2))
 363       continue;
 364
 365     // Make sure first operand is not a shifter operand which would prevent
 366     // folding of the left shift.
 367     SDValue CPTmp0;
 368     SDValue CPTmp1;
 369     SDValue CPTmp2;
 370     if (isThumb2) {
 371       if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1))
 372         continue;
 373     } else {
 374       if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) ||
 375           SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2))
 376         continue;
 377     }
 378
 379     // Now make the transformation.
 380     Srl = CurDAG->getNode(ISD::SRL, SDLoc(Srl), MVT::i32,
 381                           Srl.getOperand(0),
 382                           CurDAG->getConstant(Srl_imm + TZ, SDLoc(Srl),
 383                                               MVT::i32));
 384     N1 = CurDAG->getNode(ISD::AND, SDLoc(N1), MVT::i32,
 385                          Srl,
 386                          CurDAG->getConstant(And_imm, SDLoc(Srl), MVT::i32));
 387     N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32,
 388                          N1, CurDAG->getConstant(TZ, SDLoc(Srl), MVT::i32));
 389     CurDAG->UpdateNodeOperands(N, N0, N1);
 390   }
 391 }
 392
 393 /// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS
 394 /// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at
 395 /// least on current ARM implementations) which should be avoidded.
 396 bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const {
 397   if (OptLevel == CodeGenOpt::None)
 398     return true;
 399
 400   if (!Subtarget->hasVMLxHazards())
 401     return true;
 402
 403   if (!N->hasOneUse())
 404     return false;
 405
 406   SDNode *Use = *N->use_begin();
 407   if (Use->getOpcode() == ISD::CopyToReg)
 408     return true;
 409   if (Use->isMachineOpcode()) {
 410     const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>(
 411         CurDAG->getSubtarget().getInstrInfo());
 412
 413     const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode());
 414     if (MCID.mayStore())
 415       return true;
 416     unsigned Opcode = MCID.getOpcode();
 417     if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
 418       return true;
 419     // vmlx feeding into another vmlx. We actually want to unfold
 420     // the use later in the MLxExpansion pass. e.g.
 421     // vmla
 422     // vmla (stall 8 cycles)
 423     //
 424     // vmul (5 cycles)
 425     // vadd (5 cycles)
 426     // vmla
 427     // This adds up to about 18 - 19 cycles.
 428     //
 429     // vmla
 430     // vmul (stall 4 cycles)
 431     // vadd adds up to about 14 cycles.
 432     return TII->isFpMLxInstruction(Opcode);
 433   }
 434
 435   return false;
 436 }
 437
 438 bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
 439                                             ARM_AM::ShiftOpc ShOpcVal,
 440                                             unsigned ShAmt) {
 441   if (!Subtarget->isLikeA9() && !Subtarget->isSwift())
 442     return true;
 443   if (Shift.hasOneUse())
 444     return true;
 445   // R << 2 is free.
 446   return ShOpcVal == ARM_AM::lsl &&
 447          (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1));
 448 }
 449
 450 unsigned ARMDAGToDAGISel::ConstantMaterializationCost(unsigned Val) const {
 451   if (Subtarget->isThumb()) {
 452     if (Val <= 255) return 1;                               // MOV
 453     if (Subtarget->hasV6T2Ops() &&
 454         (Val <= 0xffff ||                                   // MOV
 455          ARM_AM::getT2SOImmVal(Val) != -1 ||                // MOVW
 456          ARM_AM::getT2SOImmVal(~Val) != -1))                // MVN
 457       return 1;
 458     if (Val <= 510) return 2;                               // MOV + ADDi8
 459     if (~Val <= 255) return 2;                              // MOV + MVN
 460     if (ARM_AM::isThumbImmShiftedVal(Val)) return 2;        // MOV + LSL
 461   } else {
 462     if (ARM_AM::getSOImmVal(Val) != -1) return 1;           // MOV
 463     if (ARM_AM::getSOImmVal(~Val) != -1) return 1;          // MVN
 464     if (Subtarget->hasV6T2Ops() && Val <= 0xffff) return 1; // MOVW
 465     if (ARM_AM::isSOImmTwoPartVal(Val)) return 2;           // two instrs
 466   }
 467   if (Subtarget->useMovt()) return 2; // MOVW + MOVT
 468   return 3; // Literal pool load
 469 }
 470
 471 bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N,
 472                                              unsigned MaxShift,
 473                                              unsigned &PowerOfTwo,
 474                                              SDValue &NewMulConst) const {
 475   assert(N.getOpcode() == ISD::MUL);
 476   assert(MaxShift > 0);
 477
 478   // If the multiply is used in more than one place then changing the constant
 479   // will make other uses incorrect, so don't.
 480   if (!N.hasOneUse()) return false;
 481   // Check if the multiply is by a constant
 482   ConstantSDNode *MulConst = dyn_cast<ConstantSDNode>(N.getOperand(1));
 483   if (!MulConst) return false;
 484   // If the constant is used in more than one place then modifying it will mean
 485   // we need to materialize two constants instead of one, which is a bad idea.
 486   if (!MulConst->hasOneUse()) return false;
 487   unsigned MulConstVal = MulConst->getZExtValue();
 488   if (MulConstVal == 0) return false;
 489
 490   // Find the largest power of 2 that MulConstVal is a multiple of
 491   PowerOfTwo = MaxShift;
 492   while ((MulConstVal % (1 << PowerOfTwo)) != 0) {
 493     --PowerOfTwo;
 494     if (PowerOfTwo == 0) return false;
 495   }
 496
 497   // Only optimise if the new cost is better
 498   unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo);
 499   NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc(N), MVT::i32);
 500   unsigned OldCost = ConstantMaterializationCost(MulConstVal);
 501   unsigned NewCost = ConstantMaterializationCost(NewMulConstVal);
 502   return NewCost < OldCost;
 503 }
 504
 505 void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) {
 506   CurDAG->RepositionNode(N.getNode()->getIterator(), M.getNode());
 507   ReplaceUses(N, M);
 508 }
 509
 510 bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
 511                                               SDValue &BaseReg,
 512                                               SDValue &Opc,
 513                                               bool CheckProfitability) {
 514   if (DisableShifterOp)
 515     return false;
 516
 517   // If N is a multiply-by-constant and it's profitable to extract a shift and
 518   // use it in a shifted operand do so.
 519   if (N.getOpcode() == ISD::MUL) {
 520     unsigned PowerOfTwo = 0;
 521     SDValue NewMulConst;
 522     if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) {
 523       HandleSDNode Handle(N);
 524       SDLoc Loc(N);
 525       replaceDAGValue(N.getOperand(1), NewMulConst);
 526       BaseReg = Handle.getValue();
 527       Opc = CurDAG->getTargetConstant(
 528           ARM_AM::getSORegOpc(ARM_AM::lsl, PowerOfTwo), Loc, MVT::i32);
 529       return true;
 530     }
 531   }
 532
 533   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
 534
 535   // Don't match base register only case. That is matched to a separate
 536   // lower complexity pattern with explicit register operand.
 537   if (ShOpcVal == ARM_AM::no_shift) return false;
 538
 539   BaseReg = N.getOperand(0);
 540   unsigned ShImmVal = 0;
 541   ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
 542   if (!RHS) return false;
 543   ShImmVal = RHS->getZExtValue() & 31;
 544   Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
 545                                   SDLoc(N), MVT::i32);
 546   return true;
 547 }
 548
 549 bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N,
 550                                               SDValue &BaseReg,
 551                                               SDValue &ShReg,
 552                                               SDValue &Opc,
 553                                               bool CheckProfitability) {
 554   if (DisableShifterOp)
 555     return false;
 556
 557   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
 558
 559   // Don't match base register only case. That is matched to a separate
 560   // lower complexity pattern with explicit register operand.
 561   if (ShOpcVal == ARM_AM::no_shift) return false;
 562
 563   BaseReg = N.getOperand(0);
 564   unsigned ShImmVal = 0;
 565   ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
 566   if (RHS) return false;
 567
 568   ShReg = N.getOperand(1);
 569   if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal))
 570     return false;
 571   Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
 572                                   SDLoc(N), MVT::i32);
 573   return true;
 574 }
 575
 576 // Determine whether an ISD::OR's operands are suitable to turn the operation
 577 // into an addition, which often has more compact encodings.
 578 bool ARMDAGToDAGISel::SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out) {
 579   assert(Parent->getOpcode() == ISD::OR && "unexpected parent");
 580   Out = N;
 581   return CurDAG->haveNoCommonBitsSet(N, Parent->getOperand(1));
 582 }
 583
 584
 585 bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
 586                                           SDValue &Base,
 587                                           SDValue &OffImm) {
 588   // Match simple R + imm12 operands.
 589
 590   // Base only.
 591   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
 592       !CurDAG->isBaseWithConstantOffset(N)) {
 593     if (N.getOpcode() == ISD::FrameIndex) {
 594       // Match frame index.
 595       int FI = cast<FrameIndexSDNode>(N)->getIndex();
 596       Base = CurDAG->getTargetFrameIndex(
 597           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 598       OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
 599       return true;
 600     }
 601
 602     if (N.getOpcode() == ARMISD::Wrapper &&
 603         N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
 604         N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
 605         N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
 606       Base = N.getOperand(0);
 607     } else
 608       Base = N;
 609     OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
 610     return true;
 611   }
 612
 613   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
 614     int RHSC = (int)RHS->getSExtValue();
 615     if (N.getOpcode() == ISD::SUB)
 616       RHSC = -RHSC;
 617
 618     if (RHSC > -0x1000 && RHSC < 0x1000) { // 12 bits
 619       Base   = N.getOperand(0);
 620       if (Base.getOpcode() == ISD::FrameIndex) {
 621         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
 622         Base = CurDAG->getTargetFrameIndex(
 623             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 624       }
 625       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
 626       return true;
 627     }
 628   }
 629
 630   // Base only.
 631   Base = N;
 632   OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
 633   return true;
 634 }
 635
 636
 637
 638 bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
 639                                       SDValue &Opc) {
 640   if (N.getOpcode() == ISD::MUL &&
 641       ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) {
 642     if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
 643       // X * [3,5,9] -> X + X * [2,4,8] etc.
 644       int RHSC = (int)RHS->getZExtValue();
 645       if (RHSC & 1) {
 646         RHSC = RHSC & ~1;
 647         ARM_AM::AddrOpc AddSub = ARM_AM::add;
 648         if (RHSC < 0) {
 649           AddSub = ARM_AM::sub;
 650           RHSC = - RHSC;
 651         }
 652         if (isPowerOf2_32(RHSC)) {
 653           unsigned ShAmt = Log2_32(RHSC);
 654           Base = Offset = N.getOperand(0);
 655           Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
 656                                                             ARM_AM::lsl),
 657                                           SDLoc(N), MVT::i32);
 658           return true;
 659         }
 660       }
 661     }
 662   }
 663
 664   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
 665       // ISD::OR that is equivalent to an ISD::ADD.
 666       !CurDAG->isBaseWithConstantOffset(N))
 667     return false;
 668
 669   // Leave simple R +/- imm12 operands for LDRi12
 670   if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
 671     int RHSC;
 672     if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
 673                                 -0x1000+1, 0x1000, RHSC)) // 12 bits.
 674       return false;
 675   }
 676
 677   // Otherwise this is R +/- [possibly shifted] R.
 678   ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add;
 679   ARM_AM::ShiftOpc ShOpcVal =
 680     ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
 681   unsigned ShAmt = 0;
 682
 683   Base   = N.getOperand(0);
 684   Offset = N.getOperand(1);
 685
 686   if (ShOpcVal != ARM_AM::no_shift) {
 687     // Check to see if the RHS of the shift is a constant, if not, we can't fold
 688     // it.
 689     if (ConstantSDNode *Sh =
 690            dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
 691       ShAmt = Sh->getZExtValue();
 692       if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
 693         Offset = N.getOperand(1).getOperand(0);
 694       else {
 695         ShAmt = 0;
 696         ShOpcVal = ARM_AM::no_shift;
 697       }
 698     } else {
 699       ShOpcVal = ARM_AM::no_shift;
 700     }
 701   }
 702
 703   // Try matching (R shl C) + (R).
 704   if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
 705       !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
 706         N.getOperand(0).hasOneUse())) {
 707     ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
 708     if (ShOpcVal != ARM_AM::no_shift) {
 709       // Check to see if the RHS of the shift is a constant, if not, we can't
 710       // fold it.
 711       if (ConstantSDNode *Sh =
 712           dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
 713         ShAmt = Sh->getZExtValue();
 714         if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
 715           Offset = N.getOperand(0).getOperand(0);
 716           Base = N.getOperand(1);
 717         } else {
 718           ShAmt = 0;
 719           ShOpcVal = ARM_AM::no_shift;
 720         }
 721       } else {
 722         ShOpcVal = ARM_AM::no_shift;
 723       }
 724     }
 725   }
 726
 727   // If Offset is a multiply-by-constant and it's profitable to extract a shift
 728   // and use it in a shifted operand do so.
 729   if (Offset.getOpcode() == ISD::MUL && N.hasOneUse()) {
 730     unsigned PowerOfTwo = 0;
 731     SDValue NewMulConst;
 732     if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) {
 733       HandleSDNode Handle(Offset);
 734       replaceDAGValue(Offset.getOperand(1), NewMulConst);
 735       Offset = Handle.getValue();
 736       ShAmt = PowerOfTwo;
 737       ShOpcVal = ARM_AM::lsl;
 738     }
 739   }
 740
 741   Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
 742                                   SDLoc(N), MVT::i32);
 743   return true;
 744 }
 745
 746 bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
 747                                             SDValue &Offset, SDValue &Opc) {
 748   unsigned Opcode = Op->getOpcode();
 749   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
 750     ? cast<LoadSDNode>(Op)->getAddressingMode()
 751     : cast<StoreSDNode>(Op)->getAddressingMode();
 752   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
 753     ? ARM_AM::add : ARM_AM::sub;
 754   int Val;
 755   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val))
 756     return false;
 757
 758   Offset = N;
 759   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
 760   unsigned ShAmt = 0;
 761   if (ShOpcVal != ARM_AM::no_shift) {
 762     // Check to see if the RHS of the shift is a constant, if not, we can't fold
 763     // it.
 764     if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
 765       ShAmt = Sh->getZExtValue();
 766       if (isShifterOpProfitable(N, ShOpcVal, ShAmt))
 767         Offset = N.getOperand(0);
 768       else {
 769         ShAmt = 0;
 770         ShOpcVal = ARM_AM::no_shift;
 771       }
 772     } else {
 773       ShOpcVal = ARM_AM::no_shift;
 774     }
 775   }
 776
 777   Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
 778                                   SDLoc(N), MVT::i32);
 779   return true;
 780 }
 781
 782 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
 783                                             SDValue &Offset, SDValue &Opc) {
 784   unsigned Opcode = Op->getOpcode();
 785   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
 786     ? cast<LoadSDNode>(Op)->getAddressingMode()
 787     : cast<StoreSDNode>(Op)->getAddressingMode();
 788   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
 789     ? ARM_AM::add : ARM_AM::sub;
 790   int Val;
 791   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
 792     if (AddSub == ARM_AM::sub) Val *= -1;
 793     Offset = CurDAG->getRegister(0, MVT::i32);
 794     Opc = CurDAG->getTargetConstant(Val, SDLoc(Op), MVT::i32);
 795     return true;
 796   }
 797
 798   return false;
 799 }
 800
 801
 802 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
 803                                             SDValue &Offset, SDValue &Opc) {
 804   unsigned Opcode = Op->getOpcode();
 805   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
 806     ? cast<LoadSDNode>(Op)->getAddressingMode()
 807     : cast<StoreSDNode>(Op)->getAddressingMode();
 808   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
 809     ? ARM_AM::add : ARM_AM::sub;
 810   int Val;
 811   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
 812     Offset = CurDAG->getRegister(0, MVT::i32);
 813     Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val,
 814                                                       ARM_AM::no_shift),
 815                                     SDLoc(Op), MVT::i32);
 816     return true;
 817   }
 818
 819   return false;
 820 }
 821
 822 bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) {
 823   Base = N;
 824   return true;
 825 }
 826
 827 bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
 828                                       SDValue &Base, SDValue &Offset,
 829                                       SDValue &Opc) {
 830   if (N.getOpcode() == ISD::SUB) {
 831     // X - C  is canonicalize to X + -C, no need to handle it here.
 832     Base = N.getOperand(0);
 833     Offset = N.getOperand(1);
 834     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0), SDLoc(N),
 835                                     MVT::i32);
 836     return true;
 837   }
 838
 839   if (!CurDAG->isBaseWithConstantOffset(N)) {
 840     Base = N;
 841     if (N.getOpcode() == ISD::FrameIndex) {
 842       int FI = cast<FrameIndexSDNode>(N)->getIndex();
 843       Base = CurDAG->getTargetFrameIndex(
 844           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 845     }
 846     Offset = CurDAG->getRegister(0, MVT::i32);
 847     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
 848                                     MVT::i32);
 849     return true;
 850   }
 851
 852   // If the RHS is +/- imm8, fold into addr mode.
 853   int RHSC;
 854   if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
 855                               -256 + 1, 256, RHSC)) { // 8 bits.
 856     Base = N.getOperand(0);
 857     if (Base.getOpcode() == ISD::FrameIndex) {
 858       int FI = cast<FrameIndexSDNode>(Base)->getIndex();
 859       Base = CurDAG->getTargetFrameIndex(
 860           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 861     }
 862     Offset = CurDAG->getRegister(0, MVT::i32);
 863
 864     ARM_AM::AddrOpc AddSub = ARM_AM::add;
 865     if (RHSC < 0) {
 866       AddSub = ARM_AM::sub;
 867       RHSC = -RHSC;
 868     }
 869     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC), SDLoc(N),
 870                                     MVT::i32);
 871     return true;
 872   }
 873
 874   Base = N.getOperand(0);
 875   Offset = N.getOperand(1);
 876   Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
 877                                   MVT::i32);
 878   return true;
 879 }
 880
 881 bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N,
 882                                             SDValue &Offset, SDValue &Opc) {
 883   unsigned Opcode = Op->getOpcode();
 884   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
 885     ? cast<LoadSDNode>(Op)->getAddressingMode()
 886     : cast<StoreSDNode>(Op)->getAddressingMode();
 887   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
 888     ? ARM_AM::add : ARM_AM::sub;
 889   int Val;
 890   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits.
 891     Offset = CurDAG->getRegister(0, MVT::i32);
 892     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), SDLoc(Op),
 893                                     MVT::i32);
 894     return true;
 895   }
 896
 897   Offset = N;
 898   Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), SDLoc(Op),
 899                                   MVT::i32);
 900   return true;
 901 }
 902
 903 bool ARMDAGToDAGISel::IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset,
 904                                         bool FP16) {
 905   if (!CurDAG->isBaseWithConstantOffset(N)) {
 906     Base = N;
 907     if (N.getOpcode() == ISD::FrameIndex) {
 908       int FI = cast<FrameIndexSDNode>(N)->getIndex();
 909       Base = CurDAG->getTargetFrameIndex(
 910           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 911     } else if (N.getOpcode() == ARMISD::Wrapper &&
 912                N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
 913                N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
 914                N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
 915       Base = N.getOperand(0);
 916     }
 917     Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
 918                                        SDLoc(N), MVT::i32);
 919     return true;
 920   }
 921
 922   // If the RHS is +/- imm8, fold into addr mode.
 923   int RHSC;
 924   const int Scale = FP16 ? 2 : 4;
 925
 926   if (isScaledConstantInRange(N.getOperand(1), Scale, -255, 256, RHSC)) {
 927     Base = N.getOperand(0);
 928     if (Base.getOpcode() == ISD::FrameIndex) {
 929       int FI = cast<FrameIndexSDNode>(Base)->getIndex();
 930       Base = CurDAG->getTargetFrameIndex(
 931           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 932     }
 933
 934     ARM_AM::AddrOpc AddSub = ARM_AM::add;
 935     if (RHSC < 0) {
 936       AddSub = ARM_AM::sub;
 937       RHSC = -RHSC;
 938     }
 939
 940     if (FP16)
 941       Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(AddSub, RHSC),
 942                                          SDLoc(N), MVT::i32);
 943     else
 944       Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC),
 945                                          SDLoc(N), MVT::i32);
 946
 947     return true;
 948   }
 949
 950   Base = N;
 951
 952   if (FP16)
 953     Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(ARM_AM::add, 0),
 954                                        SDLoc(N), MVT::i32);
 955   else
 956     Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
 957                                        SDLoc(N), MVT::i32);
 958
 959   return true;
 960 }
 961
 962 bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
 963                                       SDValue &Base, SDValue &Offset) {
 964   return IsAddressingMode5(N, Base, Offset, /*FP16=*/ false);
 965 }
 966
 967 bool ARMDAGToDAGISel::SelectAddrMode5FP16(SDValue N,
 968                                           SDValue &Base, SDValue &Offset) {
 969   return IsAddressingMode5(N, Base, Offset, /*FP16=*/ true);
 970 }
 971
 972 bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
 973                                       SDValue &Align) {
 974   Addr = N;
 975
 976   unsigned Alignment = 0;
 977
 978   MemSDNode *MemN = cast<MemSDNode>(Parent);
 979
 980   if (isa<LSBaseSDNode>(MemN) ||
 981       ((MemN->getOpcode() == ARMISD::VST1_UPD ||
 982         MemN->getOpcode() == ARMISD::VLD1_UPD) &&
 983        MemN->getConstantOperandVal(MemN->getNumOperands() - 1) == 1)) {
 984     // This case occurs only for VLD1-lane/dup and VST1-lane instructions.
 985     // The maximum alignment is equal to the memory size being referenced.
 986     unsigned MMOAlign = MemN->getAlignment();
 987     unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8;
 988     if (MMOAlign >= MemSize && MemSize > 1)
 989       Alignment = MemSize;
 990   } else {
 991     // All other uses of addrmode6 are for intrinsics.  For now just record
 992     // the raw alignment value; it will be refined later based on the legal
 993     // alignment operands for the intrinsic.
 994     Alignment = MemN->getAlignment();
 995   }
 996
 997   Align = CurDAG->getTargetConstant(Alignment, SDLoc(N), MVT::i32);
 998   return true;
 999 }
1000
1001 bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N,
1002                                             SDValue &Offset) {
1003   LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op);
1004   ISD::MemIndexedMode AM = LdSt->getAddressingMode();
1005   if (AM != ISD::POST_INC)
1006     return false;
1007   Offset = N;
1008   if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) {
1009     if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits())
1010       Offset = CurDAG->getRegister(0, MVT::i32);
1011   }
1012   return true;
1013 }
1014
1015 bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N,
1016                                        SDValue &Offset, SDValue &Label) {
1017   if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
1018     Offset = N.getOperand(0);
1019     SDValue N1 = N.getOperand(1);
1020     Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
1021                                       SDLoc(N), MVT::i32);
1022     return true;
1023   }
1024
1025   return false;
1026 }
1027
1028
1029 //===----------------------------------------------------------------------===//
1030 //                         Thumb Addressing Modes
1031 //===----------------------------------------------------------------------===//
1032
1033 static bool shouldUseZeroOffsetLdSt(SDValue N) {
1034   // Negative numbers are difficult to materialise in thumb1. If we are
1035   // selecting the add of a negative, instead try to select ri with a zero
1036   // offset, so create the add node directly which will become a sub.
1037   if (N.getOpcode() != ISD::ADD)
1038     return false;
1039
1040   // Look for an imm which is not legal for ld/st, but is legal for sub.
1041   if (auto C = dyn_cast<ConstantSDNode>(N.getOperand(1)))
1042     return C->getSExtValue() < 0 && C->getSExtValue() >= -255;
1043
1044   return false;
1045 }
1046
1047 bool ARMDAGToDAGISel::SelectThumbAddrModeRRSext(SDValue N, SDValue &Base,
1048                                                 SDValue &Offset) {
1049   if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) {
1050     ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N);
1051     if (!NC || !NC->isNullValue())
1052       return false;
1053
1054     Base = Offset = N;
1055     return true;
1056   }
1057
1058   Base = N.getOperand(0);
1059   Offset = N.getOperand(1);
1060   return true;
1061 }
1062
1063 bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N, SDValue &Base,
1064                                             SDValue &Offset) {
1065   if (shouldUseZeroOffsetLdSt(N))
1066     return false; // Select ri instead
1067   return SelectThumbAddrModeRRSext(N, Base, Offset);
1068 }
1069
1070 bool
1071 ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
1072                                           SDValue &Base, SDValue &OffImm) {
1073   if (shouldUseZeroOffsetLdSt(N)) {
1074     Base = N;
1075     OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1076     return true;
1077   }
1078
1079   if (!CurDAG->isBaseWithConstantOffset(N)) {
1080     if (N.getOpcode() == ISD::ADD) {
1081       return false; // We want to select register offset instead
1082     } else if (N.getOpcode() == ARMISD::Wrapper &&
1083         N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1084         N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1085         N.getOperand(0).getOpcode() != ISD::TargetConstantPool &&
1086         N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1087       Base = N.getOperand(0);
1088     } else {
1089       Base = N;
1090     }
1091
1092     OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1093     return true;
1094   }
1095
1096   // If the RHS is + imm5 * scale, fold into addr mode.
1097   int RHSC;
1098   if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) {
1099     Base = N.getOperand(0);
1100     OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1101     return true;
1102   }
1103
1104   // Offset is too large, so use register offset instead.
1105   return false;
1106 }
1107
1108 bool
1109 ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
1110                                            SDValue &OffImm) {
1111   return SelectThumbAddrModeImm5S(N, 4, Base, OffImm);
1112 }
1113
1114 bool
1115 ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
1116                                            SDValue &OffImm) {
1117   return SelectThumbAddrModeImm5S(N, 2, Base, OffImm);
1118 }
1119
1120 bool
1121 ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
1122                                            SDValue &OffImm) {
1123   return SelectThumbAddrModeImm5S(N, 1, Base, OffImm);
1124 }
1125
1126 bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
1127                                             SDValue &Base, SDValue &OffImm) {
1128   if (N.getOpcode() == ISD::FrameIndex) {
1129     int FI = cast<FrameIndexSDNode>(N)->getIndex();
1130     // Only multiples of 4 are allowed for the offset, so the frame object
1131     // alignment must be at least 4.
1132     MachineFrameInfo &MFI = MF->getFrameInfo();
1133     if (MFI.getObjectAlignment(FI) < 4)
1134       MFI.setObjectAlignment(FI, 4);
1135     Base = CurDAG->getTargetFrameIndex(
1136         FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1137     OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1138     return true;
1139   }
1140
1141   if (!CurDAG->isBaseWithConstantOffset(N))
1142     return false;
1143
1144   if (N.getOperand(0).getOpcode() == ISD::FrameIndex) {
1145     // If the RHS is + imm8 * scale, fold into addr mode.
1146     int RHSC;
1147     if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) {
1148       Base = N.getOperand(0);
1149       int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1150       // For LHS+RHS to result in an offset that's a multiple of 4 the object
1151       // indexed by the LHS must be 4-byte aligned.
1152       MachineFrameInfo &MFI = MF->getFrameInfo();
1153       if (MFI.getObjectAlignment(FI) < 4)
1154         MFI.setObjectAlignment(FI, 4);
1155       Base = CurDAG->getTargetFrameIndex(
1156           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1157       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1158       return true;
1159     }
1160   }
1161
1162   return false;
1163 }
1164
1165
1166 //===----------------------------------------------------------------------===//
1167 //                        Thumb 2 Addressing Modes
1168 //===----------------------------------------------------------------------===//
1169
1170
1171 bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
1172                                             SDValue &Base, SDValue &OffImm) {
1173   // Match simple R + imm12 operands.
1174
1175   // Base only.
1176   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1177       !CurDAG->isBaseWithConstantOffset(N)) {
1178     if (N.getOpcode() == ISD::FrameIndex) {
1179       // Match frame index.
1180       int FI = cast<FrameIndexSDNode>(N)->getIndex();
1181       Base = CurDAG->getTargetFrameIndex(
1182           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1183       OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1184       return true;
1185     }
1186
1187     if (N.getOpcode() == ARMISD::Wrapper &&
1188         N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1189         N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1190         N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1191       Base = N.getOperand(0);
1192       if (Base.getOpcode() == ISD::TargetConstantPool)
1193         return false;  // We want to select t2LDRpci instead.
1194     } else
1195       Base = N;
1196     OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1197     return true;
1198   }
1199
1200   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1201     if (SelectT2AddrModeImm8(N, Base, OffImm))
1202       // Let t2LDRi8 handle (R - imm8).
1203       return false;
1204
1205     int RHSC = (int)RHS->getZExtValue();
1206     if (N.getOpcode() == ISD::SUB)
1207       RHSC = -RHSC;
1208
1209     if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned)
1210       Base   = N.getOperand(0);
1211       if (Base.getOpcode() == ISD::FrameIndex) {
1212         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1213         Base = CurDAG->getTargetFrameIndex(
1214             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1215       }
1216       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1217       return true;
1218     }
1219   }
1220
1221   // Base only.
1222   Base = N;
1223   OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1224   return true;
1225 }
1226
1227 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
1228                                            SDValue &Base, SDValue &OffImm) {
1229   // Match simple R - imm8 operands.
1230   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1231       !CurDAG->isBaseWithConstantOffset(N))
1232     return false;
1233
1234   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1235     int RHSC = (int)RHS->getSExtValue();
1236     if (N.getOpcode() == ISD::SUB)
1237       RHSC = -RHSC;
1238
1239     if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative)
1240       Base = N.getOperand(0);
1241       if (Base.getOpcode() == ISD::FrameIndex) {
1242         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1243         Base = CurDAG->getTargetFrameIndex(
1244             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1245       }
1246       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1247       return true;
1248     }
1249   }
1250
1251   return false;
1252 }
1253
1254 bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
1255                                                  SDValue &OffImm){
1256   unsigned Opcode = Op->getOpcode();
1257   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
1258     ? cast<LoadSDNode>(Op)->getAddressingMode()
1259     : cast<StoreSDNode>(Op)->getAddressingMode();
1260   int RHSC;
1261   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits.
1262     OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
1263       ? CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32)
1264       : CurDAG->getTargetConstant(-RHSC, SDLoc(N), MVT::i32);
1265     return true;
1266   }
1267
1268   return false;
1269 }
1270
1271 bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
1272                                             SDValue &Base,
1273                                             SDValue &OffReg, SDValue &ShImm) {
1274   // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12.
1275   if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N))
1276     return false;
1277
1278   // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8.
1279   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1280     int RHSC = (int)RHS->getZExtValue();
1281     if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned)
1282       return false;
1283     else if (RHSC < 0 && RHSC >= -255) // 8 bits
1284       return false;
1285   }
1286
1287   // Look for (R + R) or (R + (R << [1,2,3])).
1288   unsigned ShAmt = 0;
1289   Base   = N.getOperand(0);
1290   OffReg = N.getOperand(1);
1291
1292   // Swap if it is ((R << c) + R).
1293   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg.getOpcode());
1294   if (ShOpcVal != ARM_AM::lsl) {
1295     ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode());
1296     if (ShOpcVal == ARM_AM::lsl)
1297       std::swap(Base, OffReg);
1298   }
1299
1300   if (ShOpcVal == ARM_AM::lsl) {
1301     // Check to see if the RHS of the shift is a constant, if not, we can't fold
1302     // it.
1303     if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) {
1304       ShAmt = Sh->getZExtValue();
1305       if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt))
1306         OffReg = OffReg.getOperand(0);
1307       else {
1308         ShAmt = 0;
1309       }
1310     }
1311   }
1312
1313   // If OffReg is a multiply-by-constant and it's profitable to extract a shift
1314   // and use it in a shifted operand do so.
1315   if (OffReg.getOpcode() == ISD::MUL && N.hasOneUse()) {
1316     unsigned PowerOfTwo = 0;
1317     SDValue NewMulConst;
1318     if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) {
1319       HandleSDNode Handle(OffReg);
1320       replaceDAGValue(OffReg.getOperand(1), NewMulConst);
1321       OffReg = Handle.getValue();
1322       ShAmt = PowerOfTwo;
1323     }
1324   }
1325
1326   ShImm = CurDAG->getTargetConstant(ShAmt, SDLoc(N), MVT::i32);
1327
1328   return true;
1329 }
1330
1331 bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base,
1332                                                 SDValue &OffImm) {
1333   // This *must* succeed since it's used for the irreplaceable ldrex and strex
1334   // instructions.
1335   Base = N;
1336   OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1337
1338   if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N))
1339     return true;
1340
1341   ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
1342   if (!RHS)
1343     return true;
1344
1345   uint32_t RHSC = (int)RHS->getZExtValue();
1346   if (RHSC > 1020 || RHSC % 4 != 0)
1347     return true;
1348
1349   Base = N.getOperand(0);
1350   if (Base.getOpcode() == ISD::FrameIndex) {
1351     int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1352     Base = CurDAG->getTargetFrameIndex(
1353         FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1354   }
1355
1356   OffImm = CurDAG->getTargetConstant(RHSC/4, SDLoc(N), MVT::i32);
1357   return true;
1358 }
1359
1360 //===--------------------------------------------------------------------===//
1361
1362 /// getAL - Returns a ARMCC::AL immediate node.
1363 static inline SDValue getAL(SelectionDAG *CurDAG, const SDLoc &dl) {
1364   return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32);
1365 }
1366
1367 void ARMDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
1368   MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
1369   CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
1370 }
1371
1372 bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) {
1373   LoadSDNode *LD = cast<LoadSDNode>(N);
1374   ISD::MemIndexedMode AM = LD->getAddressingMode();
1375   if (AM == ISD::UNINDEXED)
1376     return false;
1377
1378   EVT LoadedVT = LD->getMemoryVT();
1379   SDValue Offset, AMOpc;
1380   bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1381   unsigned Opcode = 0;
1382   bool Match = false;
1383   if (LoadedVT == MVT::i32 && isPre &&
1384       SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1385     Opcode = ARM::LDR_PRE_IMM;
1386     Match = true;
1387   } else if (LoadedVT == MVT::i32 && !isPre &&
1388       SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1389     Opcode = ARM::LDR_POST_IMM;
1390     Match = true;
1391   } else if (LoadedVT == MVT::i32 &&
1392       SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1393     Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG;
1394     Match = true;
1395
1396   } else if (LoadedVT == MVT::i16 &&
1397              SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1398     Match = true;
1399     Opcode = (LD->getExtensionType() == ISD::SEXTLOAD)
1400       ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST)
1401       : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST);
1402   } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) {
1403     if (LD->getExtensionType() == ISD::SEXTLOAD) {
1404       if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1405         Match = true;
1406         Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST;
1407       }
1408     } else {
1409       if (isPre &&
1410           SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1411         Match = true;
1412         Opcode = ARM::LDRB_PRE_IMM;
1413       } else if (!isPre &&
1414                   SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1415         Match = true;
1416         Opcode = ARM::LDRB_POST_IMM;
1417       } else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1418         Match = true;
1419         Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG;
1420       }
1421     }
1422   }
1423
1424   if (Match) {
1425     if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) {
1426       SDValue Chain = LD->getChain();
1427       SDValue Base = LD->getBasePtr();
1428       SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, SDLoc(N)),
1429                        CurDAG->getRegister(0, MVT::i32), Chain };
1430       SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1431                                            MVT::Other, Ops);
1432       transferMemOperands(N, New);
1433       ReplaceNode(N, New);
1434       return true;
1435     } else {
1436       SDValue Chain = LD->getChain();
1437       SDValue Base = LD->getBasePtr();
1438       SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, SDLoc(N)),
1439                        CurDAG->getRegister(0, MVT::i32), Chain };
1440       SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1441                                            MVT::Other, Ops);
1442       transferMemOperands(N, New);
1443       ReplaceNode(N, New);
1444       return true;
1445     }
1446   }
1447
1448   return false;
1449 }
1450
1451 bool ARMDAGToDAGISel::tryT1IndexedLoad(SDNode *N) {
1452   LoadSDNode *LD = cast<LoadSDNode>(N);
1453   EVT LoadedVT = LD->getMemoryVT();
1454   ISD::MemIndexedMode AM = LD->getAddressingMode();
1455   if (AM != ISD::POST_INC || LD->getExtensionType() != ISD::NON_EXTLOAD ||
1456       LoadedVT.getSimpleVT().SimpleTy != MVT::i32)
1457     return false;
1458
1459   auto *COffs = dyn_cast<ConstantSDNode>(LD->getOffset());
1460   if (!COffs || COffs->getZExtValue() != 4)
1461     return false;
1462
1463   // A T1 post-indexed load is just a single register LDM: LDM r0!, {r1}.
1464   // The encoding of LDM is not how the rest of ISel expects a post-inc load to
1465   // look however, so we use a pseudo here and switch it for a tLDMIA_UPD after
1466   // ISel.
1467   SDValue Chain = LD->getChain();
1468   SDValue Base = LD->getBasePtr();
1469   SDValue Ops[]= { Base, getAL(CurDAG, SDLoc(N)),
1470                    CurDAG->getRegister(0, MVT::i32), Chain };
1471   SDNode *New = CurDAG->getMachineNode(ARM::tLDR_postidx, SDLoc(N), MVT::i32,
1472                                        MVT::i32, MVT::Other, Ops);
1473   transferMemOperands(N, New);
1474   ReplaceNode(N, New);
1475   return true;
1476 }
1477
1478 bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) {
1479   LoadSDNode *LD = cast<LoadSDNode>(N);
1480   ISD::MemIndexedMode AM = LD->getAddressingMode();
1481   if (AM == ISD::UNINDEXED)
1482     return false;
1483
1484   EVT LoadedVT = LD->getMemoryVT();
1485   bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1486   SDValue Offset;
1487   bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1488   unsigned Opcode = 0;
1489   bool Match = false;
1490   if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) {
1491     switch (LoadedVT.getSimpleVT().SimpleTy) {
1492     case MVT::i32:
1493       Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST;
1494       break;
1495     case MVT::i16:
1496       if (isSExtLd)
1497         Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST;
1498       else
1499         Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST;
1500       break;
1501     case MVT::i8:
1502     case MVT::i1:
1503       if (isSExtLd)
1504         Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST;
1505       else
1506         Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST;
1507       break;
1508     default:
1509       return false;
1510     }
1511     Match = true;
1512   }
1513
1514   if (Match) {
1515     SDValue Chain = LD->getChain();
1516     SDValue Base = LD->getBasePtr();
1517     SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)),
1518                      CurDAG->getRegister(0, MVT::i32), Chain };
1519     SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1520                                          MVT::Other, Ops);
1521     transferMemOperands(N, New);
1522     ReplaceNode(N, New);
1523     return true;
1524   }
1525
1526   return false;
1527 }
1528
1529 /// Form a GPRPair pseudo register from a pair of GPR regs.
1530 SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) {
1531   SDLoc dl(V0.getNode());
1532   SDValue RegClass =
1533     CurDAG->getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
1534   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
1535   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
1536   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1537   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1538 }
1539
1540 /// Form a D register from a pair of S registers.
1541 SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1542   SDLoc dl(V0.getNode());
1543   SDValue RegClass =
1544     CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, dl, MVT::i32);
1545   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1546   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1547   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1548   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1549 }
1550
1551 /// Form a quad register from a pair of D registers.
1552 SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1553   SDLoc dl(V0.getNode());
1554   SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, dl,
1555                                                MVT::i32);
1556   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1557   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1558   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1559   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1560 }
1561
1562 /// Form 4 consecutive D registers from a pair of Q registers.
1563 SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1564   SDLoc dl(V0.getNode());
1565   SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1566                                                MVT::i32);
1567   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1568   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1569   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1570   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1571 }
1572
1573 /// Form 4 consecutive S registers.
1574 SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1,
1575                                    SDValue V2, SDValue V3) {
1576   SDLoc dl(V0.getNode());
1577   SDValue RegClass =
1578     CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, dl, MVT::i32);
1579   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1580   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1581   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, dl, MVT::i32);
1582   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, dl, MVT::i32);
1583   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1584                                     V2, SubReg2, V3, SubReg3 };
1585   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1586 }
1587
1588 /// Form 4 consecutive D registers.
1589 SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1,
1590                                    SDValue V2, SDValue V3) {
1591   SDLoc dl(V0.getNode());
1592   SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1593                                                MVT::i32);
1594   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1595   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1596   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, dl, MVT::i32);
1597   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, dl, MVT::i32);
1598   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1599                                     V2, SubReg2, V3, SubReg3 };
1600   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1601 }
1602
1603 /// Form 4 consecutive Q registers.
1604 SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1,
1605                                    SDValue V2, SDValue V3) {
1606   SDLoc dl(V0.getNode());
1607   SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, dl,
1608                                                MVT::i32);
1609   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1610   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1611   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, dl, MVT::i32);
1612   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, dl, MVT::i32);
1613   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1614                                     V2, SubReg2, V3, SubReg3 };
1615   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1616 }
1617
1618 /// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
1619 /// of a NEON VLD or VST instruction.  The supported values depend on the
1620 /// number of registers being loaded.
1621 SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, const SDLoc &dl,
1622                                        unsigned NumVecs, bool is64BitVector) {
1623   unsigned NumRegs = NumVecs;
1624   if (!is64BitVector && NumVecs < 3)
1625     NumRegs *= 2;
1626
1627   unsigned Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
1628   if (Alignment >= 32 && NumRegs == 4)
1629     Alignment = 32;
1630   else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4))
1631     Alignment = 16;
1632   else if (Alignment >= 8)
1633     Alignment = 8;
1634   else
1635     Alignment = 0;
1636
1637   return CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
1638 }
1639
1640 static bool isVLDfixed(unsigned Opc)
1641 {
1642   switch (Opc) {
1643   default: return false;
1644   case ARM::VLD1d8wb_fixed : return true;
1645   case ARM::VLD1d16wb_fixed : return true;
1646   case ARM::VLD1d64Qwb_fixed : return true;
1647   case ARM::VLD1d32wb_fixed : return true;
1648   case ARM::VLD1d64wb_fixed : return true;
1649   case ARM::VLD1d64TPseudoWB_fixed : return true;
1650   case ARM::VLD1d64QPseudoWB_fixed : return true;
1651   case ARM::VLD1q8wb_fixed : return true;
1652   case ARM::VLD1q16wb_fixed : return true;
1653   case ARM::VLD1q32wb_fixed : return true;
1654   case ARM::VLD1q64wb_fixed : return true;
1655   case ARM::VLD1DUPd8wb_fixed : return true;
1656   case ARM::VLD1DUPd16wb_fixed : return true;
1657   case ARM::VLD1DUPd32wb_fixed : return true;
1658   case ARM::VLD1DUPq8wb_fixed : return true;
1659   case ARM::VLD1DUPq16wb_fixed : return true;
1660   case ARM::VLD1DUPq32wb_fixed : return true;
1661   case ARM::VLD2d8wb_fixed : return true;
1662   case ARM::VLD2d16wb_fixed : return true;
1663   case ARM::VLD2d32wb_fixed : return true;
1664   case ARM::VLD2q8PseudoWB_fixed : return true;
1665   case ARM::VLD2q16PseudoWB_fixed : return true;
1666   case ARM::VLD2q32PseudoWB_fixed : return true;
1667   case ARM::VLD2DUPd8wb_fixed : return true;
1668   case ARM::VLD2DUPd16wb_fixed : return true;
1669   case ARM::VLD2DUPd32wb_fixed : return true;
1670   }
1671 }
1672
1673 static bool isVSTfixed(unsigned Opc)
1674 {
1675   switch (Opc) {
1676   default: return false;
1677   case ARM::VST1d8wb_fixed : return true;
1678   case ARM::VST1d16wb_fixed : return true;
1679   case ARM::VST1d32wb_fixed : return true;
1680   case ARM::VST1d64wb_fixed : return true;
1681   case ARM::VST1q8wb_fixed : return true;
1682   case ARM::VST1q16wb_fixed : return true;
1683   case ARM::VST1q32wb_fixed : return true;
1684   case ARM::VST1q64wb_fixed : return true;
1685   case ARM::VST1d64TPseudoWB_fixed : return true;
1686   case ARM::VST1d64QPseudoWB_fixed : return true;
1687   case ARM::VST2d8wb_fixed : return true;
1688   case ARM::VST2d16wb_fixed : return true;
1689   case ARM::VST2d32wb_fixed : return true;
1690   case ARM::VST2q8PseudoWB_fixed : return true;
1691   case ARM::VST2q16PseudoWB_fixed : return true;
1692   case ARM::VST2q32PseudoWB_fixed : return true;
1693   }
1694 }
1695
1696 // Get the register stride update opcode of a VLD/VST instruction that
1697 // is otherwise equivalent to the given fixed stride updating instruction.
1698 static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
1699   assert((isVLDfixed(Opc) || isVSTfixed(Opc))
1700     && "Incorrect fixed stride updating instruction.");
1701   switch (Opc) {
1702   default: break;
1703   case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register;
1704   case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register;
1705   case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register;
1706   case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register;
1707   case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register;
1708   case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register;
1709   case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register;
1710   case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register;
1711   case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register;
1712   case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register;
1713   case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register;
1714   case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register;
1715   case ARM::VLD1DUPd8wb_fixed : return ARM::VLD1DUPd8wb_register;
1716   case ARM::VLD1DUPd16wb_fixed : return ARM::VLD1DUPd16wb_register;
1717   case ARM::VLD1DUPd32wb_fixed : return ARM::VLD1DUPd32wb_register;
1718   case ARM::VLD1DUPq8wb_fixed : return ARM::VLD1DUPq8wb_register;
1719   case ARM::VLD1DUPq16wb_fixed : return ARM::VLD1DUPq16wb_register;
1720   case ARM::VLD1DUPq32wb_fixed : return ARM::VLD1DUPq32wb_register;
1721
1722   case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register;
1723   case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register;
1724   case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register;
1725   case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register;
1726   case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register;
1727   case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register;
1728   case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register;
1729   case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register;
1730   case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register;
1731   case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register;
1732
1733   case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register;
1734   case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register;
1735   case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register;
1736   case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register;
1737   case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register;
1738   case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register;
1739
1740   case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register;
1741   case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register;
1742   case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register;
1743   case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register;
1744   case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register;
1745   case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register;
1746
1747   case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register;
1748   case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register;
1749   case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register;
1750   }
1751   return Opc; // If not one we handle, return it unchanged.
1752 }
1753
1754 /// Returns true if the given increment is a Constant known to be equal to the
1755 /// access size performed by a NEON load/store. This means the "[rN]!" form can
1756 /// be used.
1757 static bool isPerfectIncrement(SDValue Inc, EVT VecTy, unsigned NumVecs) {
1758   auto C = dyn_cast<ConstantSDNode>(Inc);
1759   return C && C->getZExtValue() == VecTy.getSizeInBits() / 8 * NumVecs;
1760 }
1761
1762 void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
1763                                 const uint16_t *DOpcodes,
1764                                 const uint16_t *QOpcodes0,
1765                                 const uint16_t *QOpcodes1) {
1766   assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
1767   SDLoc dl(N);
1768
1769   SDValue MemAddr, Align;
1770   bool IsIntrinsic = !isUpdating;  // By coincidence, all supported updating
1771                                    // nodes are not intrinsics.
1772   unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
1773   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
1774     return;
1775
1776   SDValue Chain = N->getOperand(0);
1777   EVT VT = N->getValueType(0);
1778   bool is64BitVector = VT.is64BitVector();
1779   Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
1780
1781   unsigned OpcodeIndex;
1782   switch (VT.getSimpleVT().SimpleTy) {
1783   default: llvm_unreachable("unhandled vld type");
1784     // Double-register operations:
1785   case MVT::v8i8:  OpcodeIndex = 0; break;
1786   case MVT::v4f16:
1787   case MVT::v4i16: OpcodeIndex = 1; break;
1788   case MVT::v2f32:
1789   case MVT::v2i32: OpcodeIndex = 2; break;
1790   case MVT::v1i64: OpcodeIndex = 3; break;
1791     // Quad-register operations:
1792   case MVT::v16i8: OpcodeIndex = 0; break;
1793   case MVT::v8f16:
1794   case MVT::v8i16: OpcodeIndex = 1; break;
1795   case MVT::v4f32:
1796   case MVT::v4i32: OpcodeIndex = 2; break;
1797   case MVT::v2f64:
1798   case MVT::v2i64: OpcodeIndex = 3; break;
1799   }
1800
1801   EVT ResTy;
1802   if (NumVecs == 1)
1803     ResTy = VT;
1804   else {
1805     unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
1806     if (!is64BitVector)
1807       ResTyElts *= 2;
1808     ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
1809   }
1810   std::vector<EVT> ResTys;
1811   ResTys.push_back(ResTy);
1812   if (isUpdating)
1813     ResTys.push_back(MVT::i32);
1814   ResTys.push_back(MVT::Other);
1815
1816   SDValue Pred = getAL(CurDAG, dl);
1817   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
1818   SDNode *VLd;
1819   SmallVector<SDValue, 7> Ops;
1820
1821   // Double registers and VLD1/VLD2 quad registers are directly supported.
1822   if (is64BitVector || NumVecs <= 2) {
1823     unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
1824                     QOpcodes0[OpcodeIndex]);
1825     Ops.push_back(MemAddr);
1826     Ops.push_back(Align);
1827     if (isUpdating) {
1828       SDValue Inc = N->getOperand(AddrOpIdx + 1);
1829       bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
1830       if (!IsImmUpdate) {
1831         // We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
1832         // check for the opcode rather than the number of vector elements.
1833         if (isVLDfixed(Opc))
1834           Opc = getVLDSTRegisterUpdateOpcode(Opc);
1835         Ops.push_back(Inc);
1836       // VLD1/VLD2 fixed increment does not need Reg0 so only include it in
1837       // the operands if not such an opcode.
1838       } else if (!isVLDfixed(Opc))
1839         Ops.push_back(Reg0);
1840     }
1841     Ops.push_back(Pred);
1842     Ops.push_back(Reg0);
1843     Ops.push_back(Chain);
1844     VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1845
1846   } else {
1847     // Otherwise, quad registers are loaded with two separate instructions,
1848     // where one loads the even registers and the other loads the odd registers.
1849     EVT AddrTy = MemAddr.getValueType();
1850
1851     // Load the even subregs.  This is always an updating load, so that it
1852     // provides the address to the second load for the odd subregs.
1853     SDValue ImplDef =
1854       SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
1855     const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
1856     SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
1857                                           ResTy, AddrTy, MVT::Other, OpsA);
1858     Chain = SDValue(VLdA, 2);
1859
1860     // Load the odd subregs.
1861     Ops.push_back(SDValue(VLdA, 1));
1862     Ops.push_back(Align);
1863     if (isUpdating) {
1864       SDValue Inc = N->getOperand(AddrOpIdx + 1);
1865       assert(isa<ConstantSDNode>(Inc.getNode()) &&
1866              "only constant post-increment update allowed for VLD3/4");
1867       (void)Inc;
1868       Ops.push_back(Reg0);
1869     }
1870     Ops.push_back(SDValue(VLdA, 0));
1871     Ops.push_back(Pred);
1872     Ops.push_back(Reg0);
1873     Ops.push_back(Chain);
1874     VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops);
1875   }
1876
1877   // Transfer memoperands.
1878   MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1879   CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLd), {MemOp});
1880
1881   if (NumVecs == 1) {
1882     ReplaceNode(N, VLd);
1883     return;
1884   }
1885
1886   // Extract out the subregisters.
1887   SDValue SuperReg = SDValue(VLd, 0);
1888   static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
1889                     ARM::qsub_3 == ARM::qsub_0 + 3,
1890                 "Unexpected subreg numbering");
1891   unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0);
1892   for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
1893     ReplaceUses(SDValue(N, Vec),
1894                 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
1895   ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
1896   if (isUpdating)
1897     ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2));
1898   CurDAG->RemoveDeadNode(N);
1899 }
1900
1901 void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
1902                                 const uint16_t *DOpcodes,
1903                                 const uint16_t *QOpcodes0,
1904                                 const uint16_t *QOpcodes1) {
1905   assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
1906   SDLoc dl(N);
1907
1908   SDValue MemAddr, Align;
1909   bool IsIntrinsic = !isUpdating;  // By coincidence, all supported updating
1910                                    // nodes are not intrinsics.
1911   unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
1912   unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
1913   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
1914     return;
1915
1916   MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1917
1918   SDValue Chain = N->getOperand(0);
1919   EVT VT = N->getOperand(Vec0Idx).getValueType();
1920   bool is64BitVector = VT.is64BitVector();
1921   Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
1922
1923   unsigned OpcodeIndex;
1924   switch (VT.getSimpleVT().SimpleTy) {
1925   default: llvm_unreachable("unhandled vst type");
1926     // Double-register operations:
1927   case MVT::v8i8:  OpcodeIndex = 0; break;
1928   case MVT::v4f16:
1929   case MVT::v4i16: OpcodeIndex = 1; break;
1930   case MVT::v2f32:
1931   case MVT::v2i32: OpcodeIndex = 2; break;
1932   case MVT::v1i64: OpcodeIndex = 3; break;
1933     // Quad-register operations:
1934   case MVT::v16i8: OpcodeIndex = 0; break;
1935   case MVT::v8f16:
1936   case MVT::v8i16: OpcodeIndex = 1; break;
1937   case MVT::v4f32:
1938   case MVT::v4i32: OpcodeIndex = 2; break;
1939   case MVT::v2f64:
1940   case MVT::v2i64: OpcodeIndex = 3; break;
1941   }
1942
1943   std::vector<EVT> ResTys;
1944   if (isUpdating)
1945     ResTys.push_back(MVT::i32);
1946   ResTys.push_back(MVT::Other);
1947
1948   SDValue Pred = getAL(CurDAG, dl);
1949   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
1950   SmallVector<SDValue, 7> Ops;
1951
1952   // Double registers and VST1/VST2 quad registers are directly supported.
1953   if (is64BitVector || NumVecs <= 2) {
1954     SDValue SrcReg;
1955     if (NumVecs == 1) {
1956       SrcReg = N->getOperand(Vec0Idx);
1957     } else if (is64BitVector) {
1958       // Form a REG_SEQUENCE to force register allocation.
1959       SDValue V0 = N->getOperand(Vec0Idx + 0);
1960       SDValue V1 = N->getOperand(Vec0Idx + 1);
1961       if (NumVecs == 2)
1962         SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
1963       else {
1964         SDValue V2 = N->getOperand(Vec0Idx + 2);
1965         // If it's a vst3, form a quad D-register and leave the last part as
1966         // an undef.
1967         SDValue V3 = (NumVecs == 3)
1968           ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
1969           : N->getOperand(Vec0Idx + 3);
1970         SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
1971       }
1972     } else {
1973       // Form a QQ register.
1974       SDValue Q0 = N->getOperand(Vec0Idx);
1975       SDValue Q1 = N->getOperand(Vec0Idx + 1);
1976       SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0);
1977     }
1978
1979     unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
1980                     QOpcodes0[OpcodeIndex]);
1981     Ops.push_back(MemAddr);
1982     Ops.push_back(Align);
1983     if (isUpdating) {
1984       SDValue Inc = N->getOperand(AddrOpIdx + 1);
1985       bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
1986       if (!IsImmUpdate) {
1987         // We use a VST1 for v1i64 even if the pseudo says VST2/3/4, so
1988         // check for the opcode rather than the number of vector elements.
1989         if (isVSTfixed(Opc))
1990           Opc = getVLDSTRegisterUpdateOpcode(Opc);
1991         Ops.push_back(Inc);
1992       }
1993       // VST1/VST2 fixed increment does not need Reg0 so only include it in
1994       // the operands if not such an opcode.
1995       else if (!isVSTfixed(Opc))
1996         Ops.push_back(Reg0);
1997     }
1998     Ops.push_back(SrcReg);
1999     Ops.push_back(Pred);
2000     Ops.push_back(Reg0);
2001     Ops.push_back(Chain);
2002     SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2003
2004     // Transfer memoperands.
2005     CurDAG->setNodeMemRefs(cast<MachineSDNode>(VSt), {MemOp});
2006
2007     ReplaceNode(N, VSt);
2008     return;
2009   }
2010
2011   // Otherwise, quad registers are stored with two separate instructions,
2012   // where one stores the even registers and the other stores the odd registers.
2013
2014   // Form the QQQQ REG_SEQUENCE.
2015   SDValue V0 = N->getOperand(Vec0Idx + 0);
2016   SDValue V1 = N->getOperand(Vec0Idx + 1);
2017   SDValue V2 = N->getOperand(Vec0Idx + 2);
2018   SDValue V3 = (NumVecs == 3)
2019     ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2020     : N->getOperand(Vec0Idx + 3);
2021   SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2022
2023   // Store the even D registers.  This is always an updating store, so that it
2024   // provides the address to the second store for the odd subregs.
2025   const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain };
2026   SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
2027                                         MemAddr.getValueType(),
2028                                         MVT::Other, OpsA);
2029   CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStA), {MemOp});
2030   Chain = SDValue(VStA, 1);
2031
2032   // Store the odd D registers.
2033   Ops.push_back(SDValue(VStA, 0));
2034   Ops.push_back(Align);
2035   if (isUpdating) {
2036     SDValue Inc = N->getOperand(AddrOpIdx + 1);
2037     assert(isa<ConstantSDNode>(Inc.getNode()) &&
2038            "only constant post-increment update allowed for VST3/4");
2039     (void)Inc;
2040     Ops.push_back(Reg0);
2041   }
2042   Ops.push_back(RegSeq);
2043   Ops.push_back(Pred);
2044   Ops.push_back(Reg0);
2045   Ops.push_back(Chain);
2046   SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
2047                                         Ops);
2048   CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStB), {MemOp});
2049   ReplaceNode(N, VStB);
2050 }
2051
2052 void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
2053                                       unsigned NumVecs,
2054                                       const uint16_t *DOpcodes,
2055                                       const uint16_t *QOpcodes) {
2056   assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
2057   SDLoc dl(N);
2058
2059   SDValue MemAddr, Align;
2060   bool IsIntrinsic = !isUpdating;  // By coincidence, all supported updating
2061                                    // nodes are not intrinsics.
2062   unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2063   unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2064   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2065     return;
2066
2067   MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2068
2069   SDValue Chain = N->getOperand(0);
2070   unsigned Lane =
2071     cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue();
2072   EVT VT = N->getOperand(Vec0Idx).getValueType();
2073   bool is64BitVector = VT.is64BitVector();
2074
2075   unsigned Alignment = 0;
2076   if (NumVecs != 3) {
2077     Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
2078     unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2079     if (Alignment > NumBytes)
2080       Alignment = NumBytes;
2081     if (Alignment < 8 && Alignment < NumBytes)
2082       Alignment = 0;
2083     // Alignment must be a power of two; make sure of that.
2084     Alignment = (Alignment & -Alignment);
2085     if (Alignment == 1)
2086       Alignment = 0;
2087   }
2088   Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2089
2090   unsigned OpcodeIndex;
2091   switch (VT.getSimpleVT().SimpleTy) {
2092   default: llvm_unreachable("unhandled vld/vst lane type");
2093     // Double-register operations:
2094   case MVT::v8i8:  OpcodeIndex = 0; break;
2095   case MVT::v4f16:
2096   case MVT::v4i16: OpcodeIndex = 1; break;
2097   case MVT::v2f32:
2098   case MVT::v2i32: OpcodeIndex = 2; break;
2099     // Quad-register operations:
2100   case MVT::v8f16:
2101   case MVT::v8i16: OpcodeIndex = 0; break;
2102   case MVT::v4f32:
2103   case MVT::v4i32: OpcodeIndex = 1; break;
2104   }
2105
2106   std::vector<EVT> ResTys;
2107   if (IsLoad) {
2108     unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2109     if (!is64BitVector)
2110       ResTyElts *= 2;
2111     ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(),
2112                                       MVT::i64, ResTyElts));
2113   }
2114   if (isUpdating)
2115     ResTys.push_back(MVT::i32);
2116   ResTys.push_back(MVT::Other);
2117
2118   SDValue Pred = getAL(CurDAG, dl);
2119   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2120
2121   SmallVector<SDValue, 8> Ops;
2122   Ops.push_back(MemAddr);
2123   Ops.push_back(Align);
2124   if (isUpdating) {
2125     SDValue Inc = N->getOperand(AddrOpIdx + 1);
2126     bool IsImmUpdate =
2127         isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
2128     Ops.push_back(IsImmUpdate ? Reg0 : Inc);
2129   }
2130
2131   SDValue SuperReg;
2132   SDValue V0 = N->getOperand(Vec0Idx + 0);
2133   SDValue V1 = N->getOperand(Vec0Idx + 1);
2134   if (NumVecs == 2) {
2135     if (is64BitVector)
2136       SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2137     else
2138       SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0);
2139   } else {
2140     SDValue V2 = N->getOperand(Vec0Idx + 2);
2141     SDValue V3 = (NumVecs == 3)
2142       ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2143       : N->getOperand(Vec0Idx + 3);
2144     if (is64BitVector)
2145       SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2146     else
2147       SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2148   }
2149   Ops.push_back(SuperReg);
2150   Ops.push_back(getI32Imm(Lane, dl));
2151   Ops.push_back(Pred);
2152   Ops.push_back(Reg0);
2153   Ops.push_back(Chain);
2154
2155   unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2156                                   QOpcodes[OpcodeIndex]);
2157   SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2158   CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdLn), {MemOp});
2159   if (!IsLoad) {
2160     ReplaceNode(N, VLdLn);
2161     return;
2162   }
2163
2164   // Extract the subregisters.
2165   SuperReg = SDValue(VLdLn, 0);
2166   static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
2167                     ARM::qsub_3 == ARM::qsub_0 + 3,
2168                 "Unexpected subreg numbering");
2169   unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
2170   for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2171     ReplaceUses(SDValue(N, Vec),
2172                 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
2173   ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1));
2174   if (isUpdating)
2175     ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2));
2176   CurDAG->RemoveDeadNode(N);
2177 }
2178
2179 void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool IsIntrinsic,
2180                                    bool isUpdating, unsigned NumVecs,
2181                                    const uint16_t *DOpcodes,
2182                                    const uint16_t *QOpcodes0,
2183                                    const uint16_t *QOpcodes1) {
2184   assert(NumVecs >= 1 && NumVecs <= 4 && "VLDDup NumVecs out-of-range");
2185   SDLoc dl(N);
2186
2187   SDValue MemAddr, Align;
2188   unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2189   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2190     return;
2191
2192   SDValue Chain = N->getOperand(0);
2193   EVT VT = N->getValueType(0);
2194   bool is64BitVector = VT.is64BitVector();
2195
2196   unsigned Alignment = 0;
2197   if (NumVecs != 3) {
2198     Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
2199     unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2200     if (Alignment > NumBytes)
2201       Alignment = NumBytes;
2202     if (Alignment < 8 && Alignment < NumBytes)
2203       Alignment = 0;
2204     // Alignment must be a power of two; make sure of that.
2205     Alignment = (Alignment & -Alignment);
2206     if (Alignment == 1)
2207       Alignment = 0;
2208   }
2209   Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2210
2211   unsigned OpcodeIndex;
2212   switch (VT.getSimpleVT().SimpleTy) {
2213   default: llvm_unreachable("unhandled vld-dup type");
2214   case MVT::v8i8:
2215   case MVT::v16i8: OpcodeIndex = 0; break;
2216   case MVT::v4i16:
2217   case MVT::v8i16:
2218   case MVT::v4f16:
2219   case MVT::v8f16:
2220                   OpcodeIndex = 1; break;
2221   case MVT::v2f32:
2222   case MVT::v2i32:
2223   case MVT::v4f32:
2224   case MVT::v4i32: OpcodeIndex = 2; break;
2225   case MVT::v1f64:
2226   case MVT::v1i64: OpcodeIndex = 3; break;
2227   }
2228
2229   unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2230   if (!is64BitVector)
2231     ResTyElts *= 2;
2232   EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
2233
2234   std::vector<EVT> ResTys;
2235   ResTys.push_back(ResTy);
2236   if (isUpdating)
2237     ResTys.push_back(MVT::i32);
2238   ResTys.push_back(MVT::Other);
2239
2240   SDValue Pred = getAL(CurDAG, dl);
2241   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2242
2243   SDNode *VLdDup;
2244   if (is64BitVector || NumVecs == 1) {
2245     SmallVector<SDValue, 6> Ops;
2246     Ops.push_back(MemAddr);
2247     Ops.push_back(Align);
2248     unsigned Opc = is64BitVector ? DOpcodes[OpcodeIndex] :
2249                                    QOpcodes0[OpcodeIndex];
2250     if (isUpdating) {
2251       // fixed-stride update instructions don't have an explicit writeback
2252       // operand. It's implicit in the opcode itself.
2253       SDValue Inc = N->getOperand(2);
2254       bool IsImmUpdate =
2255           isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
2256       if (NumVecs <= 2 && !IsImmUpdate)
2257         Opc = getVLDSTRegisterUpdateOpcode(Opc);
2258       if (!IsImmUpdate)
2259         Ops.push_back(Inc);
2260       // FIXME: VLD3 and VLD4 haven't been updated to that form yet.
2261       else if (NumVecs > 2)
2262         Ops.push_back(Reg0);
2263     }
2264     Ops.push_back(Pred);
2265     Ops.push_back(Reg0);
2266     Ops.push_back(Chain);
2267     VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2268   } else if (NumVecs == 2) {
2269     const SDValue OpsA[] = { MemAddr, Align, Pred, Reg0, Chain };
2270     SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex],
2271                                           dl, ResTys, OpsA);
2272
2273     Chain = SDValue(VLdA, 1);
2274     const SDValue OpsB[] = { MemAddr, Align, Pred, Reg0, Chain };
2275     VLdDup = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, OpsB);
2276   } else {
2277     SDValue ImplDef =
2278       SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
2279     const SDValue OpsA[] = { MemAddr, Align, ImplDef, Pred, Reg0, Chain };
2280     SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex],
2281                                           dl, ResTys, OpsA);
2282
2283     SDValue SuperReg = SDValue(VLdA, 0);
2284     Chain = SDValue(VLdA, 1);
2285     const SDValue OpsB[] = { MemAddr, Align, SuperReg, Pred, Reg0, Chain };
2286     VLdDup = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, OpsB);
2287   }
2288
2289   // Transfer memoperands.
2290   MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2291   CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdDup), {MemOp});
2292
2293   // Extract the subregisters.
2294   if (NumVecs == 1) {
2295     ReplaceUses(SDValue(N, 0), SDValue(VLdDup, 0));
2296   } else {
2297     SDValue SuperReg = SDValue(VLdDup, 0);
2298     static_assert(ARM::dsub_7 == ARM::dsub_0 + 7, "Unexpected subreg numbering");
2299     unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
2300     for (unsigned Vec = 0; Vec != NumVecs; ++Vec) {
2301       ReplaceUses(SDValue(N, Vec),
2302                   CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg));
2303     }
2304   }
2305   ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1));
2306   if (isUpdating)
2307     ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2));
2308   CurDAG->RemoveDeadNode(N);
2309 }
2310
2311 bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) {
2312   if (!Subtarget->hasV6T2Ops())
2313     return false;
2314
2315   unsigned Opc = isSigned
2316     ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)
2317     : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX);
2318   SDLoc dl(N);
2319
2320   // For unsigned extracts, check for a shift right and mask
2321   unsigned And_imm = 0;
2322   if (N->getOpcode() == ISD::AND) {
2323     if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) {
2324
2325       // The immediate is a mask of the low bits iff imm & (imm+1) == 0
2326       if (And_imm & (And_imm + 1))
2327         return false;
2328
2329       unsigned Srl_imm = 0;
2330       if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL,
2331                                 Srl_imm)) {
2332         assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2333
2334         // Mask off the unnecessary bits of the AND immediate; normally
2335         // DAGCombine will do this, but that might not happen if
2336         // targetShrinkDemandedConstant chooses a different immediate.
2337         And_imm &= -1U >> Srl_imm;
2338
2339         // Note: The width operand is encoded as width-1.
2340         unsigned Width = countTrailingOnes(And_imm) - 1;
2341         unsigned LSB = Srl_imm;
2342
2343         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2344
2345         if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) {
2346           // It's cheaper to use a right shift to extract the top bits.
2347           if (Subtarget->isThumb()) {
2348             Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri;
2349             SDValue Ops[] = { N->getOperand(0).getOperand(0),
2350                               CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2351                               getAL(CurDAG, dl), Reg0, Reg0 };
2352             CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2353             return true;
2354           }
2355
2356           // ARM models shift instructions as MOVsi with shifter operand.
2357           ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(ISD::SRL);
2358           SDValue ShOpc =
2359             CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB), dl,
2360                                       MVT::i32);
2361           SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc,
2362                             getAL(CurDAG, dl), Reg0, Reg0 };
2363           CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops);
2364           return true;
2365         }
2366
2367         assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
2368         SDValue Ops[] = { N->getOperand(0).getOperand(0),
2369                           CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2370                           CurDAG->getTargetConstant(Width, dl, MVT::i32),
2371                           getAL(CurDAG, dl), Reg0 };
2372         CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2373         return true;
2374       }
2375     }
2376     return false;
2377   }
2378
2379   // Otherwise, we're looking for a shift of a shift
2380   unsigned Shl_imm = 0;
2381   if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) {
2382     assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!");
2383     unsigned Srl_imm = 0;
2384     if (isInt32Immediate(N->getOperand(1), Srl_imm)) {
2385       assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2386       // Note: The width operand is encoded as width-1.
2387       unsigned Width = 32 - Srl_imm - 1;
2388       int LSB = Srl_imm - Shl_imm;
2389       if (LSB < 0)
2390         return false;
2391       SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2392       assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
2393       SDValue Ops[] = { N->getOperand(0).getOperand(0),
2394                         CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2395                         CurDAG->getTargetConstant(Width, dl, MVT::i32),
2396                         getAL(CurDAG, dl), Reg0 };
2397       CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2398       return true;
2399     }
2400   }
2401
2402   // Or we are looking for a shift of an and, with a mask operand
2403   if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_imm) &&
2404       isShiftedMask_32(And_imm)) {
2405     unsigned Srl_imm = 0;
2406     unsigned LSB = countTrailingZeros(And_imm);
2407     // Shift must be the same as the ands lsb
2408     if (isInt32Immediate(N->getOperand(1), Srl_imm) && Srl_imm == LSB) {
2409       assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2410       unsigned MSB = 31 - countLeadingZeros(And_imm);
2411       // Note: The width operand is encoded as width-1.
2412       unsigned Width = MSB - LSB;
2413       SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2414       assert(Srl_imm + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
2415       SDValue Ops[] = { N->getOperand(0).getOperand(0),
2416                         CurDAG->getTargetConstant(Srl_imm, dl, MVT::i32),
2417                         CurDAG->getTargetConstant(Width, dl, MVT::i32),
2418                         getAL(CurDAG, dl), Reg0 };
2419       CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2420       return true;
2421     }
2422   }
2423
2424   if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) {
2425     unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2426     unsigned LSB = 0;
2427     if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, LSB) &&
2428         !isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRA, LSB))
2429       return false;
2430
2431     if (LSB + Width > 32)
2432       return false;
2433
2434     SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2435     assert(LSB + Width <= 32 && "Shouldn't create an invalid ubfx");
2436     SDValue Ops[] = { N->getOperand(0).getOperand(0),
2437                       CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2438                       CurDAG->getTargetConstant(Width - 1, dl, MVT::i32),
2439                       getAL(CurDAG, dl), Reg0 };
2440     CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2441     return true;
2442   }
2443
2444   return false;
2445 }
2446
2447 /// Target-specific DAG combining for ISD::XOR.
2448 /// Target-independent combining lowers SELECT_CC nodes of the form
2449 /// select_cc setg[ge] X,  0,  X, -X
2450 /// select_cc setgt    X, -1,  X, -X
2451 /// select_cc setl[te] X,  0, -X,  X
2452 /// select_cc setlt    X,  1, -X,  X
2453 /// which represent Integer ABS into:
2454 /// Y = sra (X, size(X)-1); xor (add (X, Y), Y)
2455 /// ARM instruction selection detects the latter and matches it to
2456 /// ARM::ABS or ARM::t2ABS machine node.
2457 bool ARMDAGToDAGISel::tryABSOp(SDNode *N){
2458   SDValue XORSrc0 = N->getOperand(0);
2459   SDValue XORSrc1 = N->getOperand(1);
2460   EVT VT = N->getValueType(0);
2461
2462   if (Subtarget->isThumb1Only())
2463     return false;
2464
2465   if (XORSrc0.getOpcode() != ISD::ADD || XORSrc1.getOpcode() != ISD::SRA)
2466     return false;
2467
2468   SDValue ADDSrc0 = XORSrc0.getOperand(0);
2469   SDValue ADDSrc1 = XORSrc0.getOperand(1);
2470   SDValue SRASrc0 = XORSrc1.getOperand(0);
2471   SDValue SRASrc1 = XORSrc1.getOperand(1);
2472   ConstantSDNode *SRAConstant =  dyn_cast<ConstantSDNode>(SRASrc1);
2473   EVT XType = SRASrc0.getValueType();
2474   unsigned Size = XType.getSizeInBits() - 1;
2475
2476   if (ADDSrc1 == XORSrc1 && ADDSrc0 == SRASrc0 &&
2477       XType.isInteger() && SRAConstant != nullptr &&
2478       Size == SRAConstant->getZExtValue()) {
2479     unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS;
2480     CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0);
2481     return true;
2482   }
2483
2484   return false;
2485 }
2486
2487 /// We've got special pseudo-instructions for these
2488 void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
2489   unsigned Opcode;
2490   EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
2491   if (MemTy == MVT::i8)
2492     Opcode = ARM::CMP_SWAP_8;
2493   else if (MemTy == MVT::i16)
2494     Opcode = ARM::CMP_SWAP_16;
2495   else if (MemTy == MVT::i32)
2496     Opcode = ARM::CMP_SWAP_32;
2497   else
2498     llvm_unreachable("Unknown AtomicCmpSwap type");
2499
2500   SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
2501                    N->getOperand(0)};
2502   SDNode *CmpSwap = CurDAG->getMachineNode(
2503       Opcode, SDLoc(N),
2504       CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other), Ops);
2505
2506   MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
2507   CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
2508
2509   ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
2510   ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
2511   CurDAG->RemoveDeadNode(N);
2512 }
2513
2514 static Optional<std::pair<unsigned, unsigned>>
2515 getContiguousRangeOfSetBits(const APInt &A) {
2516   unsigned FirstOne = A.getBitWidth() - A.countLeadingZeros() - 1;
2517   unsigned LastOne = A.countTrailingZeros();
2518   if (A.countPopulation() != (FirstOne - LastOne + 1))
2519     return Optional<std::pair<unsigned,unsigned>>();
2520   return std::make_pair(FirstOne, LastOne);
2521 }
2522
2523 void ARMDAGToDAGISel::SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI) {
2524   assert(N->getOpcode() == ARMISD::CMPZ);
2525   SwitchEQNEToPLMI = false;
2526
2527   if (!Subtarget->isThumb())
2528     // FIXME: Work out whether it is profitable to do this in A32 mode - LSL and
2529     // LSR don't exist as standalone instructions - they need the barrel shifter.
2530     return;
2531
2532   // select (cmpz (and X, C), #0) -> (LSLS X) or (LSRS X) or (LSRS (LSLS X))
2533   SDValue And = N->getOperand(0);
2534   if (!And->hasOneUse())
2535     return;
2536
2537   SDValue Zero = N->getOperand(1);
2538   if (!isa<ConstantSDNode>(Zero) || !cast<ConstantSDNode>(Zero)->isNullValue() ||
2539       And->getOpcode() != ISD::AND)
2540     return;
2541   SDValue X = And.getOperand(0);
2542   auto C = dyn_cast<ConstantSDNode>(And.getOperand(1));
2543
2544   if (!C)
2545     return;
2546   auto Range = getContiguousRangeOfSetBits(C->getAPIntValue());
2547   if (!Range)
2548     return;
2549
2550   // There are several ways to lower this:
2551   SDNode *NewN;
2552   SDLoc dl(N);
2553
2554   auto EmitShift = [&](unsigned Opc, SDValue Src, unsigned Imm) -> SDNode* {
2555     if (Subtarget->isThumb2()) {
2556       Opc = (Opc == ARM::tLSLri) ? ARM::t2LSLri : ARM::t2LSRri;
2557       SDValue Ops[] = { Src, CurDAG->getTargetConstant(Imm, dl, MVT::i32),
2558                         getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
2559                         CurDAG->getRegister(0, MVT::i32) };
2560       return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
2561     } else {
2562       SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), Src,
2563                        CurDAG->getTargetConstant(Imm, dl, MVT::i32),
2564                        getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
2565       return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
2566     }
2567   };
2568
2569   if (Range->second == 0) {
2570     //  1. Mask includes the LSB -> Simply shift the top N bits off
2571     NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
2572     ReplaceNode(And.getNode(), NewN);
2573   } else if (Range->first == 31) {
2574     //  2. Mask includes the MSB -> Simply shift the bottom N bits off
2575     NewN = EmitShift(ARM::tLSRri, X, Range->second);
2576     ReplaceNode(And.getNode(), NewN);
2577   } else if (Range->first == Range->second) {
2578     //  3. Only one bit is set. We can shift this into the sign bit and use a
2579     //     PL/MI comparison.
2580     NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
2581     ReplaceNode(And.getNode(), NewN);
2582
2583     SwitchEQNEToPLMI = true;
2584   } else if (!Subtarget->hasV6T2Ops()) {
2585     //  4. Do a double shift to clear bottom and top bits, but only in
2586     //     thumb-1 mode as in thumb-2 we can use UBFX.
2587     NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
2588     NewN = EmitShift(ARM::tLSRri, SDValue(NewN, 0),
2589                      Range->second + (31 - Range->first));
2590     ReplaceNode(And.getNode(), NewN);
2591   }
2592
2593 }
2594
2595 void ARMDAGToDAGISel::Select(SDNode *N) {
2596   SDLoc dl(N);
2597
2598   if (N->isMachineOpcode()) {
2599     N->setNodeId(-1);
2600     return;   // Already selected.
2601   }
2602
2603   switch (N->getOpcode()) {
2604   default: break;
2605   case ISD::STORE: {
2606     // For Thumb1, match an sp-relative store in C++. This is a little
2607     // unfortunate, but I don't think I can make the chain check work
2608     // otherwise.  (The chain of the store has to be the same as the chain
2609     // of the CopyFromReg, or else we can't replace the CopyFromReg with
2610     // a direct reference to "SP".)
2611     //
2612     // This is only necessary on Thumb1 because Thumb1 sp-relative stores use
2613     // a different addressing mode from other four-byte stores.
2614     //
2615     // This pattern usually comes up with call arguments.
2616     StoreSDNode *ST = cast<StoreSDNode>(N);
2617     SDValue Ptr = ST->getBasePtr();
2618     if (Subtarget->isThumb1Only() && ST->isUnindexed()) {
2619       int RHSC = 0;
2620       if (Ptr.getOpcode() == ISD::ADD &&
2621           isScaledConstantInRange(Ptr.getOperand(1), /*Scale=*/4, 0, 256, RHSC))
2622         Ptr = Ptr.getOperand(0);
2623
2624       if (Ptr.getOpcode() == ISD::CopyFromReg &&
2625           cast<RegisterSDNode>(Ptr.getOperand(1))->getReg() == ARM::SP &&
2626           Ptr.getOperand(0) == ST->getChain()) {
2627         SDValue Ops[] = {ST->getValue(),
2628                          CurDAG->getRegister(ARM::SP, MVT::i32),
2629                          CurDAG->getTargetConstant(RHSC, dl, MVT::i32),
2630                          getAL(CurDAG, dl),
2631                          CurDAG->getRegister(0, MVT::i32),
2632                          ST->getChain()};
2633         MachineSDNode *ResNode =
2634             CurDAG->getMachineNode(ARM::tSTRspi, dl, MVT::Other, Ops);
2635         MachineMemOperand *MemOp = ST->getMemOperand();
2636         CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp});
2637         ReplaceNode(N, ResNode);
2638         return;
2639       }
2640     }
2641     break;
2642   }
2643   case ISD::WRITE_REGISTER:
2644     if (tryWriteRegister(N))
2645       return;
2646     break;
2647   case ISD::READ_REGISTER:
2648     if (tryReadRegister(N))
2649       return;
2650     break;
2651   case ISD::INLINEASM:
2652   case ISD::INLINEASM_BR:
2653     if (tryInlineAsm(N))
2654       return;
2655     break;
2656   case ISD::XOR:
2657     // Select special operations if XOR node forms integer ABS pattern
2658     if (tryABSOp(N))
2659       return;
2660     // Other cases are autogenerated.
2661     break;
2662   case ISD::Constant: {
2663     unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
2664     // If we can't materialize the constant we need to use a literal pool
2665     if (ConstantMaterializationCost(Val) > 2) {
2666       SDValue CPIdx = CurDAG->getTargetConstantPool(
2667           ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val),
2668           TLI->getPointerTy(CurDAG->getDataLayout()));
2669
2670       SDNode *ResNode;
2671       if (Subtarget->isThumb()) {
2672         SDValue Ops[] = {
2673           CPIdx,
2674           getAL(CurDAG, dl),
2675           CurDAG->getRegister(0, MVT::i32),
2676           CurDAG->getEntryNode()
2677         };
2678         ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other,
2679                                          Ops);
2680       } else {
2681         SDValue Ops[] = {
2682           CPIdx,
2683           CurDAG->getTargetConstant(0, dl, MVT::i32),
2684           getAL(CurDAG, dl),
2685           CurDAG->getRegister(0, MVT::i32),
2686           CurDAG->getEntryNode()
2687         };
2688         ResNode = CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other,
2689                                          Ops);
2690       }
2691       // Annotate the Node with memory operand information so that MachineInstr
2692       // queries work properly. This e.g. gives the register allocation the
2693       // required information for rematerialization.
2694       MachineFunction& MF = CurDAG->getMachineFunction();
2695       MachineMemOperand *MemOp =
2696           MF.getMachineMemOperand(MachinePointerInfo::getConstantPool(MF),
2697                                   MachineMemOperand::MOLoad, 4, 4);
2698
2699       CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp});
2700
2701       ReplaceNode(N, ResNode);
2702       return;
2703     }
2704
2705     // Other cases are autogenerated.
2706     break;
2707   }
2708   case ISD::FrameIndex: {
2709     // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
2710     int FI = cast<FrameIndexSDNode>(N)->getIndex();
2711     SDValue TFI = CurDAG->getTargetFrameIndex(
2712         FI, TLI->getPointerTy(CurDAG->getDataLayout()));
2713     if (Subtarget->isThumb1Only()) {
2714       // Set the alignment of the frame object to 4, to avoid having to generate
2715       // more than one ADD
2716       MachineFrameInfo &MFI = MF->getFrameInfo();
2717       if (MFI.getObjectAlignment(FI) < 4)
2718         MFI.setObjectAlignment(FI, 4);
2719       CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI,
2720                            CurDAG->getTargetConstant(0, dl, MVT::i32));
2721       return;
2722     } else {
2723       unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ?
2724                       ARM::t2ADDri : ARM::ADDri);
2725       SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, dl, MVT::i32),
2726                         getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
2727                         CurDAG->getRegister(0, MVT::i32) };
2728       CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2729       return;
2730     }
2731   }
2732   case ISD::SRL:
2733     if (tryV6T2BitfieldExtractOp(N, false))
2734       return;
2735     break;
2736   case ISD::SIGN_EXTEND_INREG:
2737   case ISD::SRA:
2738     if (tryV6T2BitfieldExtractOp(N, true))
2739       return;
2740     break;
2741   case ISD::MUL:
2742     if (Subtarget->isThumb1Only())
2743       break;
2744     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
2745       unsigned RHSV = C->getZExtValue();
2746       if (!RHSV) break;
2747       if (isPowerOf2_32(RHSV-1)) {  // 2^n+1?
2748         unsigned ShImm = Log2_32(RHSV-1);
2749         if (ShImm >= 32)
2750           break;
2751         SDValue V = N->getOperand(0);
2752         ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
2753         SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
2754         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2755         if (Subtarget->isThumb()) {
2756           SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
2757           CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops);
2758           return;
2759         } else {
2760           SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
2761                             Reg0 };
2762           CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops);
2763           return;
2764         }
2765       }
2766       if (isPowerOf2_32(RHSV+1)) {  // 2^n-1?
2767         unsigned ShImm = Log2_32(RHSV+1);
2768         if (ShImm >= 32)
2769           break;
2770         SDValue V = N->getOperand(0);
2771         ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
2772         SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
2773         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2774         if (Subtarget->isThumb()) {
2775           SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
2776           CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops);
2777           return;
2778         } else {
2779           SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
2780                             Reg0 };
2781           CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops);
2782           return;
2783         }
2784       }
2785     }
2786     break;
2787   case ISD::AND: {
2788     // Check for unsigned bitfield extract
2789     if (tryV6T2BitfieldExtractOp(N, false))
2790       return;
2791
2792     // If an immediate is used in an AND node, it is possible that the immediate
2793     // can be more optimally materialized when negated. If this is the case we
2794     // can negate the immediate and use a BIC instead.
2795     auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
2796     if (N1C && N1C->hasOneUse() && Subtarget->isThumb()) {
2797       uint32_t Imm = (uint32_t) N1C->getZExtValue();
2798
2799       // In Thumb2 mode, an AND can take a 12-bit immediate. If this
2800       // immediate can be negated and fit in the immediate operand of
2801       // a t2BIC, don't do any manual transform here as this can be
2802       // handled by the generic ISel machinery.
2803       bool PreferImmediateEncoding =
2804         Subtarget->hasThumb2() && (is_t2_so_imm(Imm) || is_t2_so_imm_not(Imm));
2805       if (!PreferImmediateEncoding &&
2806           ConstantMaterializationCost(Imm) >
2807               ConstantMaterializationCost(~Imm)) {
2808         // The current immediate costs more to materialize than a negated
2809         // immediate, so negate the immediate and use a BIC.
2810         SDValue NewImm =
2811           CurDAG->getConstant(~N1C->getZExtValue(), dl, MVT::i32);
2812         // If the new constant didn't exist before, reposition it in the topological
2813         // ordering so it is just before N. Otherwise, don't touch its location.
2814         if (NewImm->getNodeId() == -1)
2815           CurDAG->RepositionNode(N->getIterator(), NewImm.getNode());
2816
2817         if (!Subtarget->hasThumb2()) {
2818           SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32),
2819                            N->getOperand(0), NewImm, getAL(CurDAG, dl),
2820                            CurDAG->getRegister(0, MVT::i32)};
2821           ReplaceNode(N, CurDAG->getMachineNode(ARM::tBIC, dl, MVT::i32, Ops));
2822           return;
2823         } else {
2824           SDValue Ops[] = {N->getOperand(0), NewImm, getAL(CurDAG, dl),
2825                            CurDAG->getRegister(0, MVT::i32),
2826                            CurDAG->getRegister(0, MVT::i32)};
2827           ReplaceNode(N,
2828                       CurDAG->getMachineNode(ARM::t2BICrr, dl, MVT::i32, Ops));
2829           return;
2830         }
2831       }
2832     }
2833
2834     // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits
2835     // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits
2836     // are entirely contributed by c2 and lower 16-bits are entirely contributed
2837     // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)).
2838     // Select it to: "movt x, ((c1 & 0xffff) >> 16)
2839     EVT VT = N->getValueType(0);
2840     if (VT != MVT::i32)
2841       break;
2842     unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2())
2843       ? ARM::t2MOVTi16
2844       : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0);
2845     if (!Opc)
2846       break;
2847     SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
2848     N1C = dyn_cast<ConstantSDNode>(N1);
2849     if (!N1C)
2850       break;
2851     if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) {
2852       SDValue N2 = N0.getOperand(1);
2853       ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
2854       if (!N2C)
2855         break;
2856       unsigned N1CVal = N1C->getZExtValue();
2857       unsigned N2CVal = N2C->getZExtValue();
2858       if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) &&
2859           (N1CVal & 0xffffU) == 0xffffU &&
2860           (N2CVal & 0xffffU) == 0x0U) {
2861         SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16,
2862                                                   dl, MVT::i32);
2863         SDValue Ops[] = { N0.getOperand(0), Imm16,
2864                           getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
2865         ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
2866         return;
2867       }
2868     }
2869
2870     break;
2871   }
2872   case ARMISD::UMAAL: {
2873     unsigned Opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL;
2874     SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
2875                       N->getOperand(2), N->getOperand(3),
2876                       getAL(CurDAG, dl),
2877                       CurDAG->getRegister(0, MVT::i32) };
2878     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::i32, Ops));
2879     return;
2880   }
2881   case ARMISD::UMLAL:{
2882     if (Subtarget->isThumb()) {
2883       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2884                         N->getOperand(3), getAL(CurDAG, dl),
2885                         CurDAG->getRegister(0, MVT::i32)};
2886       ReplaceNode(
2887           N, CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops));
2888       return;
2889     }else{
2890       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2891                         N->getOperand(3), getAL(CurDAG, dl),
2892                         CurDAG->getRegister(0, MVT::i32),
2893                         CurDAG->getRegister(0, MVT::i32) };
2894       ReplaceNode(N, CurDAG->getMachineNode(
2895                          Subtarget->hasV6Ops() ? ARM::UMLAL : ARM::UMLALv5, dl,
2896                          MVT::i32, MVT::i32, Ops));
2897       return;
2898     }
2899   }
2900   case ARMISD::SMLAL:{
2901     if (Subtarget->isThumb()) {
2902       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2903                         N->getOperand(3), getAL(CurDAG, dl),
2904                         CurDAG->getRegister(0, MVT::i32)};
2905       ReplaceNode(
2906           N, CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops));
2907       return;
2908     }else{
2909       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2910                         N->getOperand(3), getAL(CurDAG, dl),
2911                         CurDAG->getRegister(0, MVT::i32),
2912                         CurDAG->getRegister(0, MVT::i32) };
2913       ReplaceNode(N, CurDAG->getMachineNode(
2914                          Subtarget->hasV6Ops() ? ARM::SMLAL : ARM::SMLALv5, dl,
2915                          MVT::i32, MVT::i32, Ops));
2916       return;
2917     }
2918   }
2919   case ARMISD::SUBE: {
2920     if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
2921       break;
2922     // Look for a pattern to match SMMLS
2923     // (sube a, (smul_loHi a, b), (subc 0, (smul_LOhi(a, b))))
2924     if (N->getOperand(1).getOpcode() != ISD::SMUL_LOHI ||
2925         N->getOperand(2).getOpcode() != ARMISD::SUBC ||
2926         !SDValue(N, 1).use_empty())
2927       break;
2928
2929     if (Subtarget->isThumb())
2930       assert(Subtarget->hasThumb2() &&
2931              "This pattern should not be generated for Thumb");
2932
2933     SDValue SmulLoHi = N->getOperand(1);
2934     SDValue Subc = N->getOperand(2);
2935     auto *Zero = dyn_cast<ConstantSDNode>(Subc.getOperand(0));
2936
2937     if (!Zero || Zero->getZExtValue() != 0 ||
2938         Subc.getOperand(1) != SmulLoHi.getValue(0) ||
2939         N->getOperand(1) != SmulLoHi.getValue(1) ||
2940         N->getOperand(2) != Subc.getValue(1))
2941       break;
2942
2943     unsigned Opc = Subtarget->isThumb2() ? ARM::t2SMMLS : ARM::SMMLS;
2944     SDValue Ops[] = { SmulLoHi.getOperand(0), SmulLoHi.getOperand(1),
2945                       N->getOperand(0), getAL(CurDAG, dl),
2946                       CurDAG->getRegister(0, MVT::i32) };
2947     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops));
2948     return;
2949   }
2950   case ISD::LOAD: {
2951     if (Subtarget->isThumb() && Subtarget->hasThumb2()) {
2952       if (tryT2IndexedLoad(N))
2953         return;
2954     } else if (Subtarget->isThumb()) {
2955       if (tryT1IndexedLoad(N))
2956         return;
2957     } else if (tryARMIndexedLoad(N))
2958       return;
2959     // Other cases are autogenerated.
2960     break;
2961   }
2962   case ARMISD::BRCOND: {
2963     // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
2964     // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)
2965     // Pattern complexity = 6  cost = 1  size = 0
2966
2967     // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
2968     // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc)
2969     // Pattern complexity = 6  cost = 1  size = 0
2970
2971     // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
2972     // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc)
2973     // Pattern complexity = 6  cost = 1  size = 0
2974
2975     unsigned Opc = Subtarget->isThumb() ?
2976       ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc;
2977     SDValue Chain = N->getOperand(0);
2978     SDValue N1 = N->getOperand(1);
2979     SDValue N2 = N->getOperand(2);
2980     SDValue N3 = N->getOperand(3);
2981     SDValue InFlag = N->getOperand(4);
2982     assert(N1.getOpcode() == ISD::BasicBlock);
2983     assert(N2.getOpcode() == ISD::Constant);
2984     assert(N3.getOpcode() == ISD::Register);
2985
2986     unsigned CC = (unsigned) cast<ConstantSDNode>(N2)->getZExtValue();
2987
2988     if (InFlag.getOpcode() == ARMISD::CMPZ) {
2989       if (InFlag.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN) {
2990         SDValue Int = InFlag.getOperand(0);
2991         uint64_t ID = cast<ConstantSDNode>(Int->getOperand(1))->getZExtValue();
2992
2993         // Handle low-overhead loops.
2994         if (ID == Intrinsic::loop_decrement_reg) {
2995           SDValue Elements = Int.getOperand(2);
2996           SDValue Size = CurDAG->getTargetConstant(
2997             cast<ConstantSDNode>(Int.getOperand(3))->getZExtValue(), dl,
2998                                  MVT::i32);
2999
3000           SDValue Args[] = { Elements, Size, Int.getOperand(0) };
3001           SDNode *LoopDec =
3002             CurDAG->getMachineNode(ARM::t2LoopDec, dl,
3003                                    CurDAG->getVTList(MVT::i32, MVT::Other),
3004                                    Args);
3005           ReplaceUses(Int.getNode(), LoopDec);
3006
3007           SDValue EndArgs[] = { SDValue(LoopDec, 0), N1, Chain };
3008           SDNode *LoopEnd =
3009             CurDAG->getMachineNode(ARM::t2LoopEnd, dl, MVT::Other, EndArgs);
3010
3011           ReplaceUses(N, LoopEnd);
3012           CurDAG->RemoveDeadNode(N);
3013           CurDAG->RemoveDeadNode(InFlag.getNode());
3014           CurDAG->RemoveDeadNode(Int.getNode());
3015           return;
3016         }
3017       }
3018
3019       bool SwitchEQNEToPLMI;
3020       SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI);
3021       InFlag = N->getOperand(4);
3022
3023       if (SwitchEQNEToPLMI) {
3024         switch ((ARMCC::CondCodes)CC) {
3025         default: llvm_unreachable("CMPZ must be either NE or EQ!");
3026         case ARMCC::NE:
3027           CC = (unsigned)ARMCC::MI;
3028           break;
3029         case ARMCC::EQ:
3030           CC = (unsigned)ARMCC::PL;
3031           break;
3032         }
3033       }
3034     }
3035
3036     SDValue Tmp2 = CurDAG->getTargetConstant(CC, dl, MVT::i32);
3037     SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag };
3038     SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
3039                                              MVT::Glue, Ops);
3040     Chain = SDValue(ResNode, 0);
3041     if (N->getNumValues() == 2) {
3042       InFlag = SDValue(ResNode, 1);
3043       ReplaceUses(SDValue(N, 1), InFlag);
3044     }
3045     ReplaceUses(SDValue(N, 0),
3046                 SDValue(Chain.getNode(), Chain.getResNo()));
3047     CurDAG->RemoveDeadNode(N);
3048     return;
3049   }
3050
3051   case ARMISD::CMPZ: {
3052     // select (CMPZ X, #-C) -> (CMPZ (ADDS X, #C), #0)
3053     //   This allows us to avoid materializing the expensive negative constant.
3054     //   The CMPZ #0 is useless and will be peepholed away but we need to keep it
3055     //   for its glue output.
3056     SDValue X = N->getOperand(0);
3057     auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1).getNode());
3058     if (C && C->getSExtValue() < 0 && Subtarget->isThumb()) {
3059       int64_t Addend = -C->getSExtValue();
3060
3061       SDNode *Add = nullptr;
3062       // ADDS can be better than CMN if the immediate fits in a
3063       // 16-bit ADDS, which means either [0,256) for tADDi8 or [0,8) for tADDi3.
3064       // Outside that range we can just use a CMN which is 32-bit but has a
3065       // 12-bit immediate range.
3066       if (Addend < 1<<8) {
3067         if (Subtarget->isThumb2()) {
3068           SDValue Ops[] = { X, CurDAG->getTargetConstant(Addend, dl, MVT::i32),
3069                             getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
3070                             CurDAG->getRegister(0, MVT::i32) };
3071           Add = CurDAG->getMachineNode(ARM::t2ADDri, dl, MVT::i32, Ops);
3072         } else {
3073           unsigned Opc = (Addend < 1<<3) ? ARM::tADDi3 : ARM::tADDi8;
3074           SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X,
3075                            CurDAG->getTargetConstant(Addend, dl, MVT::i32),
3076                            getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
3077           Add = CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
3078         }
3079       }
3080       if (Add) {
3081         SDValue Ops2[] = {SDValue(Add, 0), CurDAG->getConstant(0, dl, MVT::i32)};
3082         CurDAG->MorphNodeTo(N, ARMISD::CMPZ, CurDAG->getVTList(MVT::Glue), Ops2);
3083       }
3084     }
3085     // Other cases are autogenerated.
3086     break;
3087   }
3088
3089   case ARMISD::CMOV: {
3090     SDValue InFlag = N->getOperand(4);
3091
3092     if (InFlag.getOpcode() == ARMISD::CMPZ) {
3093       bool SwitchEQNEToPLMI;
3094       SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI);
3095
3096       if (SwitchEQNEToPLMI) {
3097         SDValue ARMcc = N->getOperand(2);
3098         ARMCC::CondCodes CC =
3099           (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue();
3100
3101         switch (CC) {
3102         default: llvm_unreachable("CMPZ must be either NE or EQ!");
3103         case ARMCC::NE:
3104           CC = ARMCC::MI;
3105           break;
3106         case ARMCC::EQ:
3107           CC = ARMCC::PL;
3108           break;
3109         }
3110         SDValue NewARMcc = CurDAG->getConstant((unsigned)CC, dl, MVT::i32);
3111         SDValue Ops[] = {N->getOperand(0), N->getOperand(1), NewARMcc,
3112                          N->getOperand(3), N->getOperand(4)};
3113         CurDAG->MorphNodeTo(N, ARMISD::CMOV, N->getVTList(), Ops);
3114       }
3115
3116     }
3117     // Other cases are autogenerated.
3118     break;
3119   }
3120
3121   case ARMISD::VZIP: {
3122     unsigned Opc = 0;
3123     EVT VT = N->getValueType(0);
3124     switch (VT.getSimpleVT().SimpleTy) {
3125     default: return;
3126     case MVT::v8i8:  Opc = ARM::VZIPd8; break;
3127     case MVT::v4f16:
3128     case MVT::v4i16: Opc = ARM::VZIPd16; break;
3129     case MVT::v2f32:
3130     // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
3131     case MVT::v2i32: Opc = ARM::VTRNd32; break;
3132     case MVT::v16i8: Opc = ARM::VZIPq8; break;
3133     case MVT::v8f16:
3134     case MVT::v8i16: Opc = ARM::VZIPq16; break;
3135     case MVT::v4f32:
3136     case MVT::v4i32: Opc = ARM::VZIPq32; break;
3137     }
3138     SDValue Pred = getAL(CurDAG, dl);
3139     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3140     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3141     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3142     return;
3143   }
3144   case ARMISD::VUZP: {
3145     unsigned Opc = 0;
3146     EVT VT = N->getValueType(0);
3147     switch (VT.getSimpleVT().SimpleTy) {
3148     default: return;
3149     case MVT::v8i8:  Opc = ARM::VUZPd8; break;
3150     case MVT::v4f16:
3151     case MVT::v4i16: Opc = ARM::VUZPd16; break;
3152     case MVT::v2f32:
3153     // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
3154     case MVT::v2i32: Opc = ARM::VTRNd32; break;
3155     case MVT::v16i8: Opc = ARM::VUZPq8; break;
3156     case MVT::v8f16:
3157     case MVT::v8i16: Opc = ARM::VUZPq16; break;
3158     case MVT::v4f32:
3159     case MVT::v4i32: Opc = ARM::VUZPq32; break;
3160     }
3161     SDValue Pred = getAL(CurDAG, dl);
3162     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3163     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3164     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3165     return;
3166   }
3167   case ARMISD::VTRN: {
3168     unsigned Opc = 0;
3169     EVT VT = N->getValueType(0);
3170     switch (VT.getSimpleVT().SimpleTy) {
3171     default: return;
3172     case MVT::v8i8:  Opc = ARM::VTRNd8; break;
3173     case MVT::v4f16:
3174     case MVT::v4i16: Opc = ARM::VTRNd16; break;
3175     case MVT::v2f32:
3176     case MVT::v2i32: Opc = ARM::VTRNd32; break;
3177     case MVT::v16i8: Opc = ARM::VTRNq8; break;
3178     case MVT::v8f16:
3179     case MVT::v8i16: Opc = ARM::VTRNq16; break;
3180     case MVT::v4f32:
3181     case MVT::v4i32: Opc = ARM::VTRNq32; break;
3182     }
3183     SDValue Pred = getAL(CurDAG, dl);
3184     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3185     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3186     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3187     return;
3188   }
3189   case ARMISD::BUILD_VECTOR: {
3190     EVT VecVT = N->getValueType(0);
3191     EVT EltVT = VecVT.getVectorElementType();
3192     unsigned NumElts = VecVT.getVectorNumElements();
3193     if (EltVT == MVT::f64) {
3194       assert(NumElts == 2 && "unexpected type for BUILD_VECTOR");
3195       ReplaceNode(
3196           N, createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
3197       return;
3198     }
3199     assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR");
3200     if (NumElts == 2) {
3201       ReplaceNode(
3202           N, createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
3203       return;
3204     }
3205     assert(NumElts == 4 && "unexpected type for BUILD_VECTOR");
3206     ReplaceNode(N,
3207                 createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1),
3208                                     N->getOperand(2), N->getOperand(3)));
3209     return;
3210   }
3211
3212   case ARMISD::VLD1DUP: {
3213     static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8, ARM::VLD1DUPd16,
3214                                          ARM::VLD1DUPd32 };
3215     static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8, ARM::VLD1DUPq16,
3216                                          ARM::VLD1DUPq32 };
3217     SelectVLDDup(N, /* IsIntrinsic= */ false, false, 1, DOpcodes, QOpcodes);
3218     return;
3219   }
3220
3221   case ARMISD::VLD2DUP: {
3222     static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
3223                                         ARM::VLD2DUPd32 };
3224     SelectVLDDup(N, /* IsIntrinsic= */ false, false, 2, Opcodes);
3225     return;
3226   }
3227
3228   case ARMISD::VLD3DUP: {
3229     static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo,
3230                                         ARM::VLD3DUPd16Pseudo,
3231                                         ARM::VLD3DUPd32Pseudo };
3232     SelectVLDDup(N, /* IsIntrinsic= */ false, false, 3, Opcodes);
3233     return;
3234   }
3235
3236   case ARMISD::VLD4DUP: {
3237     static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo,
3238                                         ARM::VLD4DUPd16Pseudo,
3239                                         ARM::VLD4DUPd32Pseudo };
3240     SelectVLDDup(N, /* IsIntrinsic= */ false, false, 4, Opcodes);
3241     return;
3242   }
3243
3244   case ARMISD::VLD1DUP_UPD: {
3245     static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8wb_fixed,
3246                                          ARM::VLD1DUPd16wb_fixed,
3247                                          ARM::VLD1DUPd32wb_fixed };
3248     static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8wb_fixed,
3249                                          ARM::VLD1DUPq16wb_fixed,
3250                                          ARM::VLD1DUPq32wb_fixed };
3251     SelectVLDDup(N, /* IsIntrinsic= */ false, true, 1, DOpcodes, QOpcodes);
3252     return;
3253   }
3254
3255   case ARMISD::VLD2DUP_UPD: {
3256     static const uint16_t Opcodes[] = { ARM::VLD2DUPd8wb_fixed,
3257                                         ARM::VLD2DUPd16wb_fixed,
3258                                         ARM::VLD2DUPd32wb_fixed };
3259     SelectVLDDup(N, /* IsIntrinsic= */ false, true, 2, Opcodes);
3260     return;
3261   }
3262
3263   case ARMISD::VLD3DUP_UPD: {
3264     static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo_UPD,
3265                                         ARM::VLD3DUPd16Pseudo_UPD,
3266                                         ARM::VLD3DUPd32Pseudo_UPD };
3267     SelectVLDDup(N, /* IsIntrinsic= */ false, true, 3, Opcodes);
3268     return;
3269   }
3270
3271   case ARMISD::VLD4DUP_UPD: {
3272     static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo_UPD,
3273                                         ARM::VLD4DUPd16Pseudo_UPD,
3274                                         ARM::VLD4DUPd32Pseudo_UPD };
3275     SelectVLDDup(N, /* IsIntrinsic= */ false, true, 4, Opcodes);
3276     return;
3277   }
3278
3279   case ARMISD::VLD1_UPD: {
3280     static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed,
3281                                          ARM::VLD1d16wb_fixed,
3282                                          ARM::VLD1d32wb_fixed,
3283                                          ARM::VLD1d64wb_fixed };
3284     static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed,
3285                                          ARM::VLD1q16wb_fixed,
3286                                          ARM::VLD1q32wb_fixed,
3287                                          ARM::VLD1q64wb_fixed };
3288     SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr);
3289     return;
3290   }
3291
3292   case ARMISD::VLD2_UPD: {
3293     static const uint16_t DOpcodes[] = { ARM::VLD2d8wb_fixed,
3294                                          ARM::VLD2d16wb_fixed,
3295                                          ARM::VLD2d32wb_fixed,
3296                                          ARM::VLD1q64wb_fixed};
3297     static const uint16_t QOpcodes[] = { ARM::VLD2q8PseudoWB_fixed,
3298                                          ARM::VLD2q16PseudoWB_fixed,
3299                                          ARM::VLD2q32PseudoWB_fixed };
3300     SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr);
3301     return;
3302   }
3303
3304   case ARMISD::VLD3_UPD: {
3305     static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD,
3306                                          ARM::VLD3d16Pseudo_UPD,
3307                                          ARM::VLD3d32Pseudo_UPD,
3308                                          ARM::VLD1d64TPseudoWB_fixed};
3309     static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
3310                                           ARM::VLD3q16Pseudo_UPD,
3311                                           ARM::VLD3q32Pseudo_UPD };
3312     static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
3313                                           ARM::VLD3q16oddPseudo_UPD,
3314                                           ARM::VLD3q32oddPseudo_UPD };
3315     SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
3316     return;
3317   }
3318
3319   case ARMISD::VLD4_UPD: {
3320     static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo_UPD,
3321                                          ARM::VLD4d16Pseudo_UPD,
3322                                          ARM::VLD4d32Pseudo_UPD,
3323                                          ARM::VLD1d64QPseudoWB_fixed};
3324     static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
3325                                           ARM::VLD4q16Pseudo_UPD,
3326                                           ARM::VLD4q32Pseudo_UPD };
3327     static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo_UPD,
3328                                           ARM::VLD4q16oddPseudo_UPD,
3329                                           ARM::VLD4q32oddPseudo_UPD };
3330     SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
3331     return;
3332   }
3333
3334   case ARMISD::VLD2LN_UPD: {
3335     static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD,
3336                                          ARM::VLD2LNd16Pseudo_UPD,
3337                                          ARM::VLD2LNd32Pseudo_UPD };
3338     static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD,
3339                                          ARM::VLD2LNq32Pseudo_UPD };
3340     SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes);
3341     return;
3342   }
3343
3344   case ARMISD::VLD3LN_UPD: {
3345     static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD,
3346                                          ARM::VLD3LNd16Pseudo_UPD,
3347                                          ARM::VLD3LNd32Pseudo_UPD };
3348     static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD,
3349                                          ARM::VLD3LNq32Pseudo_UPD };
3350     SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes);
3351     return;
3352   }
3353
3354   case ARMISD::VLD4LN_UPD: {
3355     static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD,
3356                                          ARM::VLD4LNd16Pseudo_UPD,
3357                                          ARM::VLD4LNd32Pseudo_UPD };
3358     static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD,
3359                                          ARM::VLD4LNq32Pseudo_UPD };
3360     SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes);
3361     return;
3362   }
3363
3364   case ARMISD::VST1_UPD: {
3365     static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed,
3366                                          ARM::VST1d16wb_fixed,
3367                                          ARM::VST1d32wb_fixed,
3368                                          ARM::VST1d64wb_fixed };
3369     static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed,
3370                                          ARM::VST1q16wb_fixed,
3371                                          ARM::VST1q32wb_fixed,
3372                                          ARM::VST1q64wb_fixed };
3373     SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr);
3374     return;
3375   }
3376
3377   case ARMISD::VST2_UPD: {
3378     static const uint16_t DOpcodes[] = { ARM::VST2d8wb_fixed,
3379                                          ARM::VST2d16wb_fixed,
3380                                          ARM::VST2d32wb_fixed,
3381                                          ARM::VST1q64wb_fixed};
3382     static const uint16_t QOpcodes[] = { ARM::VST2q8PseudoWB_fixed,
3383                                          ARM::VST2q16PseudoWB_fixed,
3384                                          ARM::VST2q32PseudoWB_fixed };
3385     SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr);
3386     return;
3387   }
3388
3389   case ARMISD::VST3_UPD: {
3390     static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD,
3391                                          ARM::VST3d16Pseudo_UPD,
3392                                          ARM::VST3d32Pseudo_UPD,
3393                                          ARM::VST1d64TPseudoWB_fixed};
3394     static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
3395                                           ARM::VST3q16Pseudo_UPD,
3396                                           ARM::VST3q32Pseudo_UPD };
3397     static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
3398                                           ARM::VST3q16oddPseudo_UPD,
3399                                           ARM::VST3q32oddPseudo_UPD };
3400     SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
3401     return;
3402   }
3403
3404   case ARMISD::VST4_UPD: {
3405     static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo_UPD,
3406                                          ARM::VST4d16Pseudo_UPD,
3407                                          ARM::VST4d32Pseudo_UPD,
3408                                          ARM::VST1d64QPseudoWB_fixed};
3409     static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
3410                                           ARM::VST4q16Pseudo_UPD,
3411                                           ARM::VST4q32Pseudo_UPD };
3412     static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD,
3413                                           ARM::VST4q16oddPseudo_UPD,
3414                                           ARM::VST4q32oddPseudo_UPD };
3415     SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
3416     return;
3417   }
3418
3419   case ARMISD::VST2LN_UPD: {
3420     static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD,
3421                                          ARM::VST2LNd16Pseudo_UPD,
3422                                          ARM::VST2LNd32Pseudo_UPD };
3423     static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD,
3424                                          ARM::VST2LNq32Pseudo_UPD };
3425     SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes);
3426     return;
3427   }
3428
3429   case ARMISD::VST3LN_UPD: {
3430     static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD,
3431                                          ARM::VST3LNd16Pseudo_UPD,
3432                                          ARM::VST3LNd32Pseudo_UPD };
3433     static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD,
3434                                          ARM::VST3LNq32Pseudo_UPD };
3435     SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes);
3436     return;
3437   }
3438
3439   case ARMISD::VST4LN_UPD: {
3440     static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD,
3441                                          ARM::VST4LNd16Pseudo_UPD,
3442                                          ARM::VST4LNd32Pseudo_UPD };
3443     static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD,
3444                                          ARM::VST4LNq32Pseudo_UPD };
3445     SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes);
3446     return;
3447   }
3448
3449   case ISD::INTRINSIC_VOID:
3450   case ISD::INTRINSIC_W_CHAIN: {
3451     unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
3452     switch (IntNo) {
3453     default:
3454       break;
3455
3456     case Intrinsic::arm_mrrc:
3457     case Intrinsic::arm_mrrc2: {
3458       SDLoc dl(N);
3459       SDValue Chain = N->getOperand(0);
3460       unsigned Opc;
3461
3462       if (Subtarget->isThumb())
3463         Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::t2MRRC : ARM::t2MRRC2);
3464       else
3465         Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::MRRC : ARM::MRRC2);
3466
3467       SmallVector<SDValue, 5> Ops;
3468       Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(), dl)); /* coproc */
3469       Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(), dl)); /* opc */
3470       Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(4))->getZExtValue(), dl)); /* CRm */
3471
3472       // The mrrc2 instruction in ARM doesn't allow predicates, the top 4 bits of the encoded
3473       // instruction will always be '1111' but it is possible in assembly language to specify
3474       // AL as a predicate to mrrc2 but it doesn't make any difference to the encoded instruction.
3475       if (Opc != ARM::MRRC2) {
3476         Ops.push_back(getAL(CurDAG, dl));
3477         Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3478       }
3479
3480       Ops.push_back(Chain);
3481
3482       // Writes to two registers.
3483       const EVT RetType[] = {MVT::i32, MVT::i32, MVT::Other};
3484
3485       ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, RetType, Ops));
3486       return;
3487     }
3488     case Intrinsic::arm_ldaexd:
3489     case Intrinsic::arm_ldrexd: {
3490       SDLoc dl(N);
3491       SDValue Chain = N->getOperand(0);
3492       SDValue MemAddr = N->getOperand(2);
3493       bool isThumb = Subtarget->isThumb() && Subtarget->hasV8MBaselineOps();
3494
3495       bool IsAcquire = IntNo == Intrinsic::arm_ldaexd;
3496       unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD)
3497                                 : (IsAcquire ? ARM::LDAEXD : ARM::LDREXD);
3498
3499       // arm_ldrexd returns a i64 value in {i32, i32}
3500       std::vector<EVT> ResTys;
3501       if (isThumb) {
3502         ResTys.push_back(MVT::i32);
3503         ResTys.push_back(MVT::i32);
3504       } else
3505         ResTys.push_back(MVT::Untyped);
3506       ResTys.push_back(MVT::Other);
3507
3508       // Place arguments in the right order.
3509       SDValue Ops[] = {MemAddr, getAL(CurDAG, dl),
3510                        CurDAG->getRegister(0, MVT::i32), Chain};
3511       SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
3512       // Transfer memoperands.
3513       MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3514       CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
3515
3516       // Remap uses.
3517       SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1);
3518       if (!SDValue(N, 0).use_empty()) {
3519         SDValue Result;
3520         if (isThumb)
3521           Result = SDValue(Ld, 0);
3522         else {
3523           SDValue SubRegIdx =
3524             CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
3525           SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
3526               dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
3527           Result = SDValue(ResNode,0);
3528         }
3529         ReplaceUses(SDValue(N, 0), Result);
3530       }
3531       if (!SDValue(N, 1).use_empty()) {
3532         SDValue Result;
3533         if (isThumb)
3534           Result = SDValue(Ld, 1);
3535         else {
3536           SDValue SubRegIdx =
3537             CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
3538           SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
3539               dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
3540           Result = SDValue(ResNode,0);
3541         }
3542         ReplaceUses(SDValue(N, 1), Result);
3543       }
3544       ReplaceUses(SDValue(N, 2), OutChain);
3545       CurDAG->RemoveDeadNode(N);
3546       return;
3547     }
3548     case Intrinsic::arm_stlexd:
3549     case Intrinsic::arm_strexd: {
3550       SDLoc dl(N);
3551       SDValue Chain = N->getOperand(0);
3552       SDValue Val0 = N->getOperand(2);
3553       SDValue Val1 = N->getOperand(3);
3554       SDValue MemAddr = N->getOperand(4);
3555
3556       // Store exclusive double return a i32 value which is the return status
3557       // of the issued store.
3558       const EVT ResTys[] = {MVT::i32, MVT::Other};
3559
3560       bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
3561       // Place arguments in the right order.
3562       SmallVector<SDValue, 7> Ops;
3563       if (isThumb) {
3564         Ops.push_back(Val0);
3565         Ops.push_back(Val1);
3566       } else
3567         // arm_strexd uses GPRPair.
3568         Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0));
3569       Ops.push_back(MemAddr);
3570       Ops.push_back(getAL(CurDAG, dl));
3571       Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3572       Ops.push_back(Chain);
3573
3574       bool IsRelease = IntNo == Intrinsic::arm_stlexd;
3575       unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD)
3576                                 : (IsRelease ? ARM::STLEXD : ARM::STREXD);
3577
3578       SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
3579       // Transfer memoperands.
3580       MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3581       CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
3582
3583       ReplaceNode(N, St);
3584       return;
3585     }
3586
3587     case Intrinsic::arm_neon_vld1: {
3588       static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
3589                                            ARM::VLD1d32, ARM::VLD1d64 };
3590       static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
3591                                            ARM::VLD1q32, ARM::VLD1q64};
3592       SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr);
3593       return;
3594     }
3595
3596     case Intrinsic::arm_neon_vld1x2: {
3597       static const uint16_t DOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
3598                                            ARM::VLD1q32, ARM::VLD1q64 };
3599       static const uint16_t QOpcodes[] = { ARM::VLD1d8QPseudo,
3600                                            ARM::VLD1d16QPseudo,
3601                                            ARM::VLD1d32QPseudo,
3602                                            ARM::VLD1d64QPseudo };
3603       SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
3604       return;
3605     }
3606
3607     case Intrinsic::arm_neon_vld1x3: {
3608       static const uint16_t DOpcodes[] = { ARM::VLD1d8TPseudo,
3609                                            ARM::VLD1d16TPseudo,
3610                                            ARM::VLD1d32TPseudo,
3611                                            ARM::VLD1d64TPseudo };
3612       static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowTPseudo_UPD,
3613                                             ARM::VLD1q16LowTPseudo_UPD,
3614                                             ARM::VLD1q32LowTPseudo_UPD,
3615                                             ARM::VLD1q64LowTPseudo_UPD };
3616       static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighTPseudo,
3617                                             ARM::VLD1q16HighTPseudo,
3618                                             ARM::VLD1q32HighTPseudo,
3619                                             ARM::VLD1q64HighTPseudo };
3620       SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3621       return;
3622     }
3623
3624     case Intrinsic::arm_neon_vld1x4: {
3625       static const uint16_t DOpcodes[] = { ARM::VLD1d8QPseudo,
3626                                            ARM::VLD1d16QPseudo,
3627                                            ARM::VLD1d32QPseudo,
3628                                            ARM::VLD1d64QPseudo };
3629       static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowQPseudo_UPD,
3630                                             ARM::VLD1q16LowQPseudo_UPD,
3631                                             ARM::VLD1q32LowQPseudo_UPD,
3632                                             ARM::VLD1q64LowQPseudo_UPD };
3633       static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighQPseudo,
3634                                             ARM::VLD1q16HighQPseudo,
3635                                             ARM::VLD1q32HighQPseudo,
3636                                             ARM::VLD1q64HighQPseudo };
3637       SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3638       return;
3639     }
3640
3641     case Intrinsic::arm_neon_vld2: {
3642       static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
3643                                            ARM::VLD2d32, ARM::VLD1q64 };
3644       static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
3645                                            ARM::VLD2q32Pseudo };
3646       SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
3647       return;
3648     }
3649
3650     case Intrinsic::arm_neon_vld3: {
3651       static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo,
3652                                            ARM::VLD3d16Pseudo,
3653                                            ARM::VLD3d32Pseudo,
3654                                            ARM::VLD1d64TPseudo };
3655       static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
3656                                             ARM::VLD3q16Pseudo_UPD,
3657                                             ARM::VLD3q32Pseudo_UPD };
3658       static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo,
3659                                             ARM::VLD3q16oddPseudo,
3660                                             ARM::VLD3q32oddPseudo };
3661       SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3662       return;
3663     }
3664
3665     case Intrinsic::arm_neon_vld4: {
3666       static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo,
3667                                            ARM::VLD4d16Pseudo,
3668                                            ARM::VLD4d32Pseudo,
3669                                            ARM::VLD1d64QPseudo };
3670       static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
3671                                             ARM::VLD4q16Pseudo_UPD,
3672                                             ARM::VLD4q32Pseudo_UPD };
3673       static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo,
3674                                             ARM::VLD4q16oddPseudo,
3675                                             ARM::VLD4q32oddPseudo };
3676       SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3677       return;
3678     }
3679
3680     case Intrinsic::arm_neon_vld2dup: {
3681       static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
3682                                            ARM::VLD2DUPd32, ARM::VLD1q64 };
3683       static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo,
3684                                             ARM::VLD2DUPq16EvenPseudo,
3685                                             ARM::VLD2DUPq32EvenPseudo };
3686       static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudo,
3687                                             ARM::VLD2DUPq16OddPseudo,
3688                                             ARM::VLD2DUPq32OddPseudo };
3689       SelectVLDDup(N, /* IsIntrinsic= */ true, false, 2,
3690                    DOpcodes, QOpcodes0, QOpcodes1);
3691       return;
3692     }
3693
3694     case Intrinsic::arm_neon_vld3dup: {
3695       static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo,
3696                                            ARM::VLD3DUPd16Pseudo,
3697                                            ARM::VLD3DUPd32Pseudo,
3698                                            ARM::VLD1d64TPseudo };
3699       static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo,
3700                                             ARM::VLD3DUPq16EvenPseudo,
3701                                             ARM::VLD3DUPq32EvenPseudo };
3702       static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo,
3703                                             ARM::VLD3DUPq16OddPseudo,
3704                                             ARM::VLD3DUPq32OddPseudo };
3705       SelectVLDDup(N, /* IsIntrinsic= */ true, false, 3,
3706                    DOpcodes, QOpcodes0, QOpcodes1);
3707       return;
3708     }
3709
3710     case Intrinsic::arm_neon_vld4dup: {
3711       static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo,
3712                                            ARM::VLD4DUPd16Pseudo,
3713                                            ARM::VLD4DUPd32Pseudo,
3714                                            ARM::VLD1d64QPseudo };
3715       static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo,
3716                                             ARM::VLD4DUPq16EvenPseudo,
3717                                             ARM::VLD4DUPq32EvenPseudo };
3718       static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo,
3719                                             ARM::VLD4DUPq16OddPseudo,
3720                                             ARM::VLD4DUPq32OddPseudo };
3721       SelectVLDDup(N, /* IsIntrinsic= */ true, false, 4,
3722                    DOpcodes, QOpcodes0, QOpcodes1);
3723       return;
3724     }
3725
3726     case Intrinsic::arm_neon_vld2lane: {
3727       static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo,
3728                                            ARM::VLD2LNd16Pseudo,
3729                                            ARM::VLD2LNd32Pseudo };
3730       static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo,
3731                                            ARM::VLD2LNq32Pseudo };
3732       SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes);
3733       return;
3734     }
3735
3736     case Intrinsic::arm_neon_vld3lane: {
3737       static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo,
3738                                            ARM::VLD3LNd16Pseudo,
3739                                            ARM::VLD3LNd32Pseudo };
3740       static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo,
3741                                            ARM::VLD3LNq32Pseudo };
3742       SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes);
3743       return;
3744     }
3745
3746     case Intrinsic::arm_neon_vld4lane: {
3747       static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo,
3748                                            ARM::VLD4LNd16Pseudo,
3749                                            ARM::VLD4LNd32Pseudo };
3750       static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo,
3751                                            ARM::VLD4LNq32Pseudo };
3752       SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes);
3753       return;
3754     }
3755
3756     case Intrinsic::arm_neon_vst1: {
3757       static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
3758                                            ARM::VST1d32, ARM::VST1d64 };
3759       static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
3760                                            ARM::VST1q32, ARM::VST1q64 };
3761       SelectVST(N, false, 1, DOpcodes, QOpcodes, nullptr);
3762       return;
3763     }
3764
3765     case Intrinsic::arm_neon_vst1x2: {
3766       static const uint16_t DOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
3767                                            ARM::VST1q32, ARM::VST1q64 };
3768       static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudo,
3769                                            ARM::VST1d16QPseudo,
3770                                            ARM::VST1d32QPseudo,
3771                                            ARM::VST1d64QPseudo };
3772       SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
3773       return;
3774     }
3775
3776     case Intrinsic::arm_neon_vst1x3: {
3777       static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudo,
3778                                            ARM::VST1d16TPseudo,
3779                                            ARM::VST1d32TPseudo,
3780                                            ARM::VST1d64TPseudo };
3781       static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD,
3782                                             ARM::VST1q16LowTPseudo_UPD,
3783                                             ARM::VST1q32LowTPseudo_UPD,
3784                                             ARM::VST1q64LowTPseudo_UPD };
3785       static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo,
3786                                             ARM::VST1q16HighTPseudo,
3787                                             ARM::VST1q32HighTPseudo,
3788                                             ARM::VST1q64HighTPseudo };
3789       SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3790       return;
3791     }
3792
3793     case Intrinsic::arm_neon_vst1x4: {
3794       static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudo,
3795                                            ARM::VST1d16QPseudo,
3796                                            ARM::VST1d32QPseudo,
3797                                            ARM::VST1d64QPseudo };
3798       static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD,
3799                                             ARM::VST1q16LowQPseudo_UPD,
3800                                             ARM::VST1q32LowQPseudo_UPD,
3801                                             ARM::VST1q64LowQPseudo_UPD };
3802       static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo,
3803                                             ARM::VST1q16HighQPseudo,
3804                                             ARM::VST1q32HighQPseudo,
3805                                             ARM::VST1q64HighQPseudo };
3806       SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3807       return;
3808     }
3809
3810     case Intrinsic::arm_neon_vst2: {
3811       static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
3812                                            ARM::VST2d32, ARM::VST1q64 };
3813       static const uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
3814                                            ARM::VST2q32Pseudo };
3815       SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
3816       return;
3817     }
3818
3819     case Intrinsic::arm_neon_vst3: {
3820       static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo,
3821                                            ARM::VST3d16Pseudo,
3822                                            ARM::VST3d32Pseudo,
3823                                            ARM::VST1d64TPseudo };
3824       static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
3825                                             ARM::VST3q16Pseudo_UPD,
3826                                             ARM::VST3q32Pseudo_UPD };
3827       static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo,
3828                                             ARM::VST3q16oddPseudo,
3829                                             ARM::VST3q32oddPseudo };
3830       SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3831       return;
3832     }
3833
3834     case Intrinsic::arm_neon_vst4: {
3835       static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo,
3836                                            ARM::VST4d16Pseudo,
3837                                            ARM::VST4d32Pseudo,
3838                                            ARM::VST1d64QPseudo };
3839       static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
3840                                             ARM::VST4q16Pseudo_UPD,
3841                                             ARM::VST4q32Pseudo_UPD };
3842       static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo,
3843                                             ARM::VST4q16oddPseudo,
3844                                             ARM::VST4q32oddPseudo };
3845       SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3846       return;
3847     }
3848
3849     case Intrinsic::arm_neon_vst2lane: {
3850       static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo,
3851                                            ARM::VST2LNd16Pseudo,
3852                                            ARM::VST2LNd32Pseudo };
3853       static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo,
3854                                            ARM::VST2LNq32Pseudo };
3855       SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes);
3856       return;
3857     }
3858
3859     case Intrinsic::arm_neon_vst3lane: {
3860       static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo,
3861                                            ARM::VST3LNd16Pseudo,
3862                                            ARM::VST3LNd32Pseudo };
3863       static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo,
3864                                            ARM::VST3LNq32Pseudo };
3865       SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes);
3866       return;
3867     }
3868
3869     case Intrinsic::arm_neon_vst4lane: {
3870       static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo,
3871                                            ARM::VST4LNd16Pseudo,
3872                                            ARM::VST4LNd32Pseudo };
3873       static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo,
3874                                            ARM::VST4LNq32Pseudo };
3875       SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes);
3876       return;
3877     }
3878     }
3879     break;
3880   }
3881
3882   case ISD::ATOMIC_CMP_SWAP:
3883     SelectCMP_SWAP(N);
3884     return;
3885   }
3886
3887   SelectCode(N);
3888 }
3889
3890 // Inspect a register string of the form
3891 // cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or
3892 // cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string
3893 // and obtain the integer operands from them, adding these operands to the
3894 // provided vector.
3895 static void getIntOperandsFromRegisterString(StringRef RegString,
3896                                              SelectionDAG *CurDAG,
3897                                              const SDLoc &DL,
3898                                              std::vector<SDValue> &Ops) {
3899   SmallVector<StringRef, 5> Fields;
3900   RegString.split(Fields, ':');
3901
3902   if (Fields.size() > 1) {
3903     bool AllIntFields = true;
3904
3905     for (StringRef Field : Fields) {
3906       // Need to trim out leading 'cp' characters and get the integer field.
3907       unsigned IntField;
3908       AllIntFields &= !Field.trim("CPcp").getAsInteger(10, IntField);
3909       Ops.push_back(CurDAG->getTargetConstant(IntField, DL, MVT::i32));
3910     }
3911
3912     assert(AllIntFields &&
3913             "Unexpected non-integer value in special register string.");
3914   }
3915 }
3916
3917 // Maps a Banked Register string to its mask value. The mask value returned is
3918 // for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register
3919 // mask operand, which expresses which register is to be used, e.g. r8, and in
3920 // which mode it is to be used, e.g. usr. Returns -1 to signify that the string
3921 // was invalid.
3922 static inline int getBankedRegisterMask(StringRef RegString) {
3923   auto TheReg = ARMBankedReg::lookupBankedRegByName(RegString.lower());
3924   if (!TheReg)
3925      return -1;
3926   return TheReg->Encoding;
3927 }
3928
3929 // The flags here are common to those allowed for apsr in the A class cores and
3930 // those allowed for the special registers in the M class cores. Returns a
3931 // value representing which flags were present, -1 if invalid.
3932 static inline int getMClassFlagsMask(StringRef Flags) {
3933   return StringSwitch<int>(Flags)
3934           .Case("", 0x2) // no flags means nzcvq for psr registers, and 0x2 is
3935                          // correct when flags are not permitted
3936           .Case("g", 0x1)
3937           .Case("nzcvq", 0x2)
3938           .Case("nzcvqg", 0x3)
3939           .Default(-1);
3940 }
3941
3942 // Maps MClass special registers string to its value for use in the
3943 // t2MRS_M/t2MSR_M instruction nodes as the SYSm value operand.
3944 // Returns -1 to signify that the string was invalid.
3945 static int getMClassRegisterMask(StringRef Reg, const ARMSubtarget *Subtarget) {
3946   auto TheReg = ARMSysReg::lookupMClassSysRegByName(Reg);
3947   const FeatureBitset &FeatureBits = Subtarget->getFeatureBits();
3948   if (!TheReg || !TheReg->hasRequiredFeatures(FeatureBits))
3949     return -1;
3950   return (int)(TheReg->Encoding & 0xFFF); // SYSm value
3951 }
3952
3953 static int getARClassRegisterMask(StringRef Reg, StringRef Flags) {
3954   // The mask operand contains the special register (R Bit) in bit 4, whether
3955   // the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and
3956   // bits 3-0 contains the fields to be accessed in the special register, set by
3957   // the flags provided with the register.
3958   int Mask = 0;
3959   if (Reg == "apsr") {
3960     // The flags permitted for apsr are the same flags that are allowed in
3961     // M class registers. We get the flag value and then shift the flags into
3962     // the correct place to combine with the mask.
3963     Mask = getMClassFlagsMask(Flags);
3964     if (Mask == -1)
3965       return -1;
3966     return Mask << 2;
3967   }
3968
3969   if (Reg != "cpsr" && Reg != "spsr") {
3970     return -1;
3971   }
3972
3973   // This is the same as if the flags were "fc"
3974   if (Flags.empty() || Flags == "all")
3975     return Mask | 0x9;
3976
3977   // Inspect the supplied flags string and set the bits in the mask for
3978   // the relevant and valid flags allowed for cpsr and spsr.
3979   for (char Flag : Flags) {
3980     int FlagVal;
3981     switch (Flag) {
3982       case 'c':
3983         FlagVal = 0x1;
3984         break;
3985       case 'x':
3986         FlagVal = 0x2;
3987         break;
3988       case 's':
3989         FlagVal = 0x4;
3990         break;
3991       case 'f':
3992         FlagVal = 0x8;
3993         break;
3994       default:
3995         FlagVal = 0;
3996     }
3997
3998     // This avoids allowing strings where the same flag bit appears twice.
3999     if (!FlagVal || (Mask & FlagVal))
4000       return -1;
4001     Mask |= FlagVal;
4002   }
4003
4004   // If the register is spsr then we need to set the R bit.
4005   if (Reg == "spsr")
4006     Mask |= 0x10;
4007
4008   return Mask;
4009 }
4010
4011 // Lower the read_register intrinsic to ARM specific DAG nodes
4012 // using the supplied metadata string to select the instruction node to use
4013 // and the registers/masks to construct as operands for the node.
4014 bool ARMDAGToDAGISel::tryReadRegister(SDNode *N){
4015   const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
4016   const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
4017   bool IsThumb2 = Subtarget->isThumb2();
4018   SDLoc DL(N);
4019
4020   std::vector<SDValue> Ops;
4021   getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
4022
4023   if (!Ops.empty()) {
4024     // If the special register string was constructed of fields (as defined
4025     // in the ACLE) then need to lower to MRC node (32 bit) or
4026     // MRRC node(64 bit), we can make the distinction based on the number of
4027     // operands we have.
4028     unsigned Opcode;
4029     SmallVector<EVT, 3> ResTypes;
4030     if (Ops.size() == 5){
4031       Opcode = IsThumb2 ? ARM::t2MRC : ARM::MRC;
4032       ResTypes.append({ MVT::i32, MVT::Other });
4033     } else {
4034       assert(Ops.size() == 3 &&
4035               "Invalid number of fields in special register string.");
4036       Opcode = IsThumb2 ? ARM::t2MRRC : ARM::MRRC;
4037       ResTypes.append({ MVT::i32, MVT::i32, MVT::Other });
4038     }
4039
4040     Ops.push_back(getAL(CurDAG, DL));
4041     Ops.push_back(CurDAG->getRegister(0, MVT::i32));
4042     Ops.push_back(N->getOperand(0));
4043     ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, ResTypes, Ops));
4044     return true;
4045   }
4046
4047   std::string SpecialReg = RegString->getString().lower();
4048
4049   int BankedReg = getBankedRegisterMask(SpecialReg);
4050   if (BankedReg != -1) {
4051     Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32),
4052             getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4053             N->getOperand(0) };
4054     ReplaceNode(
4055         N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked,
4056                                   DL, MVT::i32, MVT::Other, Ops));
4057     return true;
4058   }
4059
4060   // The VFP registers are read by creating SelectionDAG nodes with opcodes
4061   // corresponding to the register that is being read from. So we switch on the
4062   // string to find which opcode we need to use.
4063   unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
4064                     .Case("fpscr", ARM::VMRS)
4065                     .Case("fpexc", ARM::VMRS_FPEXC)
4066                     .Case("fpsid", ARM::VMRS_FPSID)
4067                     .Case("mvfr0", ARM::VMRS_MVFR0)
4068                     .Case("mvfr1", ARM::VMRS_MVFR1)
4069                     .Case("mvfr2", ARM::VMRS_MVFR2)
4070                     .Case("fpinst", ARM::VMRS_FPINST)
4071                     .Case("fpinst2", ARM::VMRS_FPINST2)
4072                     .Default(0);
4073
4074   // If an opcode was found then we can lower the read to a VFP instruction.
4075   if (Opcode) {
4076     if (!Subtarget->hasVFP2Base())
4077       return false;
4078     if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8Base())
4079       return false;
4080
4081     Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4082             N->getOperand(0) };
4083     ReplaceNode(N,
4084                 CurDAG->getMachineNode(Opcode, DL, MVT::i32, MVT::Other, Ops));
4085     return true;
4086   }
4087
4088   // If the target is M Class then need to validate that the register string
4089   // is an acceptable value, so check that a mask can be constructed from the
4090   // string.
4091   if (Subtarget->isMClass()) {
4092     int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget);
4093     if (SYSmValue == -1)
4094       return false;
4095
4096     SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
4097                       getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4098                       N->getOperand(0) };
4099     ReplaceNode(
4100         N, CurDAG->getMachineNode(ARM::t2MRS_M, DL, MVT::i32, MVT::Other, Ops));
4101     return true;
4102   }
4103
4104   // Here we know the target is not M Class so we need to check if it is one
4105   // of the remaining possible values which are apsr, cpsr or spsr.
4106   if (SpecialReg == "apsr" || SpecialReg == "cpsr") {
4107     Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4108             N->getOperand(0) };
4109     ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRS_AR : ARM::MRS,
4110                                           DL, MVT::i32, MVT::Other, Ops));
4111     return true;
4112   }
4113
4114   if (SpecialReg == "spsr") {
4115     Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4116             N->getOperand(0) };
4117     ReplaceNode(
4118         N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys, DL,
4119                                   MVT::i32, MVT::Other, Ops));
4120     return true;
4121   }
4122
4123   return false;
4124 }
4125
4126 // Lower the write_register intrinsic to ARM specific DAG nodes
4127 // using the supplied metadata string to select the instruction node to use
4128 // and the registers/masks to use in the nodes
4129 bool ARMDAGToDAGISel::tryWriteRegister(SDNode *N){
4130   const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
4131   const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
4132   bool IsThumb2 = Subtarget->isThumb2();
4133   SDLoc DL(N);
4134
4135   std::vector<SDValue> Ops;
4136   getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
4137
4138   if (!Ops.empty()) {
4139     // If the special register string was constructed of fields (as defined
4140     // in the ACLE) then need to lower to MCR node (32 bit) or
4141     // MCRR node(64 bit), we can make the distinction based on the number of
4142     // operands we have.
4143     unsigned Opcode;
4144     if (Ops.size() == 5) {
4145       Opcode = IsThumb2 ? ARM::t2MCR : ARM::MCR;
4146       Ops.insert(Ops.begin()+2, N->getOperand(2));
4147     } else {
4148       assert(Ops.size() == 3 &&
4149               "Invalid number of fields in special register string.");
4150       Opcode = IsThumb2 ? ARM::t2MCRR : ARM::MCRR;
4151       SDValue WriteValue[] = { N->getOperand(2), N->getOperand(3) };
4152       Ops.insert(Ops.begin()+2, WriteValue, WriteValue+2);
4153     }
4154
4155     Ops.push_back(getAL(CurDAG, DL));
4156     Ops.push_back(CurDAG->getRegister(0, MVT::i32));
4157     Ops.push_back(N->getOperand(0));
4158
4159     ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
4160     return true;
4161   }
4162
4163   std::string SpecialReg = RegString->getString().lower();
4164   int BankedReg = getBankedRegisterMask(SpecialReg);
4165   if (BankedReg != -1) {
4166     Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), N->getOperand(2),
4167             getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4168             N->getOperand(0) };
4169     ReplaceNode(
4170         N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked,
4171                                   DL, MVT::Other, Ops));
4172     return true;
4173   }
4174
4175   // The VFP registers are written to by creating SelectionDAG nodes with
4176   // opcodes corresponding to the register that is being written. So we switch
4177   // on the string to find which opcode we need to use.
4178   unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
4179                     .Case("fpscr", ARM::VMSR)
4180                     .Case("fpexc", ARM::VMSR_FPEXC)
4181                     .Case("fpsid", ARM::VMSR_FPSID)
4182                     .Case("fpinst", ARM::VMSR_FPINST)
4183                     .Case("fpinst2", ARM::VMSR_FPINST2)
4184                     .Default(0);
4185
4186   if (Opcode) {
4187     if (!Subtarget->hasVFP2Base())
4188       return false;
4189     Ops = { N->getOperand(2), getAL(CurDAG, DL),
4190             CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
4191     ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
4192     return true;
4193   }
4194
4195   std::pair<StringRef, StringRef> Fields;
4196   Fields = StringRef(SpecialReg).rsplit('_');
4197   std::string Reg = Fields.first.str();
4198   StringRef Flags = Fields.second;
4199
4200   // If the target was M Class then need to validate the special register value
4201   // and retrieve the mask for use in the instruction node.
4202   if (Subtarget->isMClass()) {
4203     int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget);
4204     if (SYSmValue == -1)
4205       return false;
4206
4207     SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
4208                       N->getOperand(2), getAL(CurDAG, DL),
4209                       CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
4210     ReplaceNode(N, CurDAG->getMachineNode(ARM::t2MSR_M, DL, MVT::Other, Ops));
4211     return true;
4212   }
4213
4214   // We then check to see if a valid mask can be constructed for one of the
4215   // register string values permitted for the A and R class cores. These values
4216   // are apsr, spsr and cpsr; these are also valid on older cores.
4217   int Mask = getARClassRegisterMask(Reg, Flags);
4218   if (Mask != -1) {
4219     Ops = { CurDAG->getTargetConstant(Mask, DL, MVT::i32), N->getOperand(2),
4220             getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4221             N->getOperand(0) };
4222     ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSR_AR : ARM::MSR,
4223                                           DL, MVT::Other, Ops));
4224     return true;
4225   }
4226
4227   return false;
4228 }
4229
4230 bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){
4231   std::vector<SDValue> AsmNodeOperands;
4232   unsigned Flag, Kind;
4233   bool Changed = false;
4234   unsigned NumOps = N->getNumOperands();
4235
4236   // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint.
4237   // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require
4238   // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs
4239   // respectively. Since there is no constraint to explicitly specify a
4240   // reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb,
4241   // the 64-bit data may be referred by H, Q, R modifiers, so we still pack
4242   // them into a GPRPair.
4243
4244   SDLoc dl(N);
4245   SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps-1)
4246                                    : SDValue(nullptr,0);
4247
4248   SmallVector<bool, 8> OpChanged;
4249   // Glue node will be appended late.
4250   for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) {
4251     SDValue op = N->getOperand(i);
4252     AsmNodeOperands.push_back(op);
4253
4254     if (i < InlineAsm::Op_FirstOperand)
4255       continue;
4256
4257     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(i))) {
4258       Flag = C->getZExtValue();
4259       Kind = InlineAsm::getKind(Flag);
4260     }
4261     else
4262       continue;
4263
4264     // Immediate operands to inline asm in the SelectionDAG are modeled with
4265     // two operands. The first is a constant of value InlineAsm::Kind_Imm, and
4266     // the second is a constant with the value of the immediate. If we get here
4267     // and we have a Kind_Imm, skip the next operand, and continue.
4268     if (Kind == InlineAsm::Kind_Imm) {
4269       SDValue op = N->getOperand(++i);
4270       AsmNodeOperands.push_back(op);
4271       continue;
4272     }
4273
4274     unsigned NumRegs = InlineAsm::getNumOperandRegisters(Flag);
4275     if (NumRegs)
4276       OpChanged.push_back(false);
4277
4278     unsigned DefIdx = 0;
4279     bool IsTiedToChangedOp = false;
4280     // If it's a use that is tied with a previous def, it has no
4281     // reg class constraint.
4282     if (Changed && InlineAsm::isUseOperandTiedToDef(Flag, DefIdx))
4283       IsTiedToChangedOp = OpChanged[DefIdx];
4284
4285     // Memory operands to inline asm in the SelectionDAG are modeled with two
4286     // operands: a constant of value InlineAsm::Kind_Mem followed by the input
4287     // operand. If we get here and we have a Kind_Mem, skip the next operand (so
4288     // it doesn't get misinterpreted), and continue. We do this here because
4289     // it's important to update the OpChanged array correctly before moving on.
4290     if (Kind == InlineAsm::Kind_Mem) {
4291       SDValue op = N->getOperand(++i);
4292       AsmNodeOperands.push_back(op);
4293       continue;
4294     }
4295
4296     if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef
4297         && Kind != InlineAsm::Kind_RegDefEarlyClobber)
4298       continue;
4299
4300     unsigned RC;
4301     bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC);
4302     if ((!IsTiedToChangedOp && (!HasRC || RC != ARM::GPRRegClassID))
4303         || NumRegs != 2)
4304       continue;
4305
4306     assert((i+2 < NumOps) && "Invalid number of operands in inline asm");
4307     SDValue V0 = N->getOperand(i+1);
4308     SDValue V1 = N->getOperand(i+2);
4309     unsigned Reg0 = cast<RegisterSDNode>(V0)->getReg();
4310     unsigned Reg1 = cast<RegisterSDNode>(V1)->getReg();
4311     SDValue PairedReg;
4312     MachineRegisterInfo &MRI = MF->getRegInfo();
4313
4314     if (Kind == InlineAsm::Kind_RegDef ||
4315         Kind == InlineAsm::Kind_RegDefEarlyClobber) {
4316       // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to
4317       // the original GPRs.
4318
4319       unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
4320       PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
4321       SDValue Chain = SDValue(N,0);
4322
4323       SDNode *GU = N->getGluedUser();
4324       SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped,
4325                                                Chain.getValue(1));
4326
4327       // Extract values from a GPRPair reg and copy to the original GPR reg.
4328       SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32,
4329                                                     RegCopy);
4330       SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,
4331                                                     RegCopy);
4332       SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0,
4333                                         RegCopy.getValue(1));
4334       SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1));
4335
4336       // Update the original glue user.
4337       std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1);
4338       Ops.push_back(T1.getValue(1));
4339       CurDAG->UpdateNodeOperands(GU, Ops);
4340     }
4341     else {
4342       // For Kind  == InlineAsm::Kind_RegUse, we first copy two GPRs into a
4343       // GPRPair and then pass the GPRPair to the inline asm.
4344       SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain];
4345
4346       // As REG_SEQ doesn't take RegisterSDNode, we copy them first.
4347       SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32,
4348                                           Chain.getValue(1));
4349       SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32,
4350                                           T0.getValue(1));
4351       SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), 0);
4352
4353       // Copy REG_SEQ into a GPRPair-typed VR and replace the original two
4354       // i32 VRs of inline asm with it.
4355       unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
4356       PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
4357       Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1));
4358
4359       AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
4360       Glue = Chain.getValue(1);
4361     }
4362
4363     Changed = true;
4364
4365     if(PairedReg.getNode()) {
4366       OpChanged[OpChanged.size() -1 ] = true;
4367       Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/);
4368       if (IsTiedToChangedOp)
4369         Flag = InlineAsm::getFlagWordForMatchingOp(Flag, DefIdx);
4370       else
4371         Flag = InlineAsm::getFlagWordForRegClass(Flag, ARM::GPRPairRegClassID);
4372       // Replace the current flag.
4373       AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant(
4374           Flag, dl, MVT::i32);
4375       // Add the new register node and skip the original two GPRs.
4376       AsmNodeOperands.push_back(PairedReg);
4377       // Skip the next two GPRs.
4378       i += 2;
4379     }
4380   }
4381
4382   if (Glue.getNode())
4383     AsmNodeOperands.push_back(Glue);
4384   if (!Changed)
4385     return false;
4386
4387   SDValue New = CurDAG->getNode(N->getOpcode(), SDLoc(N),
4388       CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands);
4389   New->setNodeId(-1);
4390   ReplaceNode(N, New.getNode());
4391   return true;
4392 }
4393
4394
4395 bool ARMDAGToDAGISel::
4396 SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
4397                              std::vector<SDValue> &OutOps) {
4398   switch(ConstraintID) {
4399   default:
4400     llvm_unreachable("Unexpected asm memory constraint");
4401   case InlineAsm::Constraint_i:
4402     // FIXME: It seems strange that 'i' is needed here since it's supposed to
4403     //        be an immediate and not a memory constraint.
4404     LLVM_FALLTHROUGH;
4405   case InlineAsm::Constraint_m:
4406   case InlineAsm::Constraint_o:
4407   case InlineAsm::Constraint_Q:
4408   case InlineAsm::Constraint_Um:
4409   case InlineAsm::Constraint_Un:
4410   case InlineAsm::Constraint_Uq:
4411   case InlineAsm::Constraint_Us:
4412   case InlineAsm::Constraint_Ut:
4413   case InlineAsm::Constraint_Uv:
4414   case InlineAsm::Constraint_Uy:
4415     // Require the address to be in a register.  That is safe for all ARM
4416     // variants and it is hard to do anything much smarter without knowing
4417     // how the operand is used.
4418     OutOps.push_back(Op);
4419     return false;
4420   }
4421   return true;
4422 }
4423
4424 /// createARMISelDag - This pass converts a legalized DAG into a
4425 /// ARM-specific DAG, ready for instruction scheduling.
4426 ///
4427 FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM,
4428                                      CodeGenOpt::Level OptLevel) {
4429   return new ARMDAGToDAGISel(TM, OptLevel);
4430 }