lib/Target/ARM/ARMISelDAGToDAG.cpp

   1 //===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This file defines an instruction selector for the ARM target.
  10 //
  11 //===----------------------------------------------------------------------===//
  12
  13 #include "ARM.h"
  14 #include "ARMBaseInstrInfo.h"
  15 #include "ARMTargetMachine.h"
  16 #include "MCTargetDesc/ARMAddressingModes.h"
  17 #include "Utils/ARMBaseInfo.h"
  18 #include "llvm/ADT/StringSwitch.h"
  19 #include "llvm/CodeGen/MachineFrameInfo.h"
  20 #include "llvm/CodeGen/MachineFunction.h"
  21 #include "llvm/CodeGen/MachineInstrBuilder.h"
  22 #include "llvm/CodeGen/MachineRegisterInfo.h"
  23 #include "llvm/CodeGen/SelectionDAG.h"
  24 #include "llvm/CodeGen/SelectionDAGISel.h"
  25 #include "llvm/CodeGen/TargetLowering.h"
  26 #include "llvm/IR/CallingConv.h"
  27 #include "llvm/IR/Constants.h"
  28 #include "llvm/IR/DerivedTypes.h"
  29 #include "llvm/IR/Function.h"
  30 #include "llvm/IR/Intrinsics.h"
  31 #include "llvm/IR/LLVMContext.h"
  32 #include "llvm/Support/CommandLine.h"
  33 #include "llvm/Support/Debug.h"
  34 #include "llvm/Support/ErrorHandling.h"
  35 #include "llvm/Target/TargetOptions.h"
  36
  37 using namespace llvm;
  38
  39 #define DEBUG_TYPE "arm-isel"
  40
  41 static cl::opt<bool>
  42 DisableShifterOp("disable-shifter-op", cl::Hidden,
  43   cl::desc("Disable isel of shifter-op"),
  44   cl::init(false));
  45
  46 //===--------------------------------------------------------------------===//
  47 /// ARMDAGToDAGISel - ARM specific code to select ARM machine
  48 /// instructions for SelectionDAG operations.
  49 ///
  50 namespace {
  51
  52 class ARMDAGToDAGISel : public SelectionDAGISel {
  53   /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
  54   /// make the right decision when generating code for different targets.
  55   const ARMSubtarget *Subtarget;
  56
  57 public:
  58   explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOpt::Level OptLevel)
  59       : SelectionDAGISel(tm, OptLevel) {}
  60
  61   bool runOnMachineFunction(MachineFunction &MF) override {
  62     // Reset the subtarget each time through.
  63     Subtarget = &MF.getSubtarget<ARMSubtarget>();
  64     SelectionDAGISel::runOnMachineFunction(MF);
  65     return true;
  66   }
  67
  68   StringRef getPassName() const override { return "ARM Instruction Selection"; }
  69
  70   void PreprocessISelDAG() override;
  71
  72   /// getI32Imm - Return a target constant of type i32 with the specified
  73   /// value.
  74   inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
  75     return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
  76   }
  77
  78   void Select(SDNode *N) override;
  79
  80   bool hasNoVMLxHazardUse(SDNode *N) const;
  81   bool isShifterOpProfitable(const SDValue &Shift,
  82                              ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt);
  83   bool SelectRegShifterOperand(SDValue N, SDValue &A,
  84                                SDValue &B, SDValue &C,
  85                                bool CheckProfitability = true);
  86   bool SelectImmShifterOperand(SDValue N, SDValue &A,
  87                                SDValue &B, bool CheckProfitability = true);
  88   bool SelectShiftRegShifterOperand(SDValue N, SDValue &A,
  89                                     SDValue &B, SDValue &C) {
  90     // Don't apply the profitability check
  91     return SelectRegShifterOperand(N, A, B, C, false);
  92   }
  93   bool SelectShiftImmShifterOperand(SDValue N, SDValue &A,
  94                                     SDValue &B) {
  95     // Don't apply the profitability check
  96     return SelectImmShifterOperand(N, A, B, false);
  97   }
  98
  99   bool SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out);
 100
 101   bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
 102   bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);
 103
 104   bool SelectCMOVPred(SDValue N, SDValue &Pred, SDValue &Reg) {
 105     const ConstantSDNode *CN = cast<ConstantSDNode>(N);
 106     Pred = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(N), MVT::i32);
 107     Reg = CurDAG->getRegister(ARM::CPSR, MVT::i32);
 108     return true;
 109   }
 110
 111   bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
 112                              SDValue &Offset, SDValue &Opc);
 113   bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
 114                              SDValue &Offset, SDValue &Opc);
 115   bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
 116                              SDValue &Offset, SDValue &Opc);
 117   bool SelectAddrOffsetNone(SDValue N, SDValue &Base);
 118   bool SelectAddrMode3(SDValue N, SDValue &Base,
 119                        SDValue &Offset, SDValue &Opc);
 120   bool SelectAddrMode3Offset(SDNode *Op, SDValue N,
 121                              SDValue &Offset, SDValue &Opc);
 122   bool IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset, bool FP16);
 123   bool SelectAddrMode5(SDValue N, SDValue &Base, SDValue &Offset);
 124   bool SelectAddrMode5FP16(SDValue N, SDValue &Base, SDValue &Offset);
 125   bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align);
 126   bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset);
 127
 128   bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label);
 129
 130   // Thumb Addressing Modes:
 131   bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset);
 132   bool SelectThumbAddrModeRRSext(SDValue N, SDValue &Base, SDValue &Offset);
 133   bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base,
 134                                 SDValue &OffImm);
 135   bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
 136                                  SDValue &OffImm);
 137   bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
 138                                  SDValue &OffImm);
 139   bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
 140                                  SDValue &OffImm);
 141   bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm);
 142   template <unsigned Shift>
 143   bool SelectTAddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm);
 144
 145   // Thumb 2 Addressing Modes:
 146   bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
 147   bool SelectT2AddrModeImm8(SDValue N, SDValue &Base,
 148                             SDValue &OffImm);
 149   bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
 150                                  SDValue &OffImm);
 151   template <unsigned Shift>
 152   bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm);
 153   bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm,
 154                                   unsigned Shift);
 155   template <unsigned Shift>
 156   bool SelectT2AddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm);
 157   bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base,
 158                              SDValue &OffReg, SDValue &ShImm);
 159   bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm);
 160
 161   inline bool is_so_imm(unsigned Imm) const {
 162     return ARM_AM::getSOImmVal(Imm) != -1;
 163   }
 164
 165   inline bool is_so_imm_not(unsigned Imm) const {
 166     return ARM_AM::getSOImmVal(~Imm) != -1;
 167   }
 168
 169   inline bool is_t2_so_imm(unsigned Imm) const {
 170     return ARM_AM::getT2SOImmVal(Imm) != -1;
 171   }
 172
 173   inline bool is_t2_so_imm_not(unsigned Imm) const {
 174     return ARM_AM::getT2SOImmVal(~Imm) != -1;
 175   }
 176
 177   // Include the pieces autogenerated from the target description.
 178 #include "ARMGenDAGISel.inc"
 179
 180 private:
 181   void transferMemOperands(SDNode *Src, SDNode *Dst);
 182
 183   /// Indexed (pre/post inc/dec) load matching code for ARM.
 184   bool tryARMIndexedLoad(SDNode *N);
 185   bool tryT1IndexedLoad(SDNode *N);
 186   bool tryT2IndexedLoad(SDNode *N);
 187   bool tryMVEIndexedLoad(SDNode *N);
 188
 189   /// SelectVLD - Select NEON load intrinsics.  NumVecs should be
 190   /// 1, 2, 3 or 4.  The opcode arrays specify the instructions used for
 191   /// loads of D registers and even subregs and odd subregs of Q registers.
 192   /// For NumVecs <= 2, QOpcodes1 is not used.
 193   void SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
 194                  const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
 195                  const uint16_t *QOpcodes1);
 196
 197   /// SelectVST - Select NEON store intrinsics.  NumVecs should
 198   /// be 1, 2, 3 or 4.  The opcode arrays specify the instructions used for
 199   /// stores of D registers and even subregs and odd subregs of Q registers.
 200   /// For NumVecs <= 2, QOpcodes1 is not used.
 201   void SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
 202                  const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
 203                  const uint16_t *QOpcodes1);
 204
 205   /// SelectVLDSTLane - Select NEON load/store lane intrinsics.  NumVecs should
 206   /// be 2, 3 or 4.  The opcode arrays specify the instructions used for
 207   /// load/store of D registers and Q registers.
 208   void SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
 209                        unsigned NumVecs, const uint16_t *DOpcodes,
 210                        const uint16_t *QOpcodes);
 211
 212   /// SelectVLDDup - Select NEON load-duplicate intrinsics.  NumVecs
 213   /// should be 1, 2, 3 or 4.  The opcode array specifies the instructions used
 214   /// for loading D registers.
 215   void SelectVLDDup(SDNode *N, bool IsIntrinsic, bool isUpdating,
 216                     unsigned NumVecs, const uint16_t *DOpcodes,
 217                     const uint16_t *QOpcodes0 = nullptr,
 218                     const uint16_t *QOpcodes1 = nullptr);
 219
 220   /// Try to select SBFX/UBFX instructions for ARM.
 221   bool tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned);
 222
 223   // Select special operations if node forms integer ABS pattern
 224   bool tryABSOp(SDNode *N);
 225
 226   bool tryReadRegister(SDNode *N);
 227   bool tryWriteRegister(SDNode *N);
 228
 229   bool tryInlineAsm(SDNode *N);
 230
 231   void SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI);
 232
 233   void SelectCMP_SWAP(SDNode *N);
 234
 235   /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
 236   /// inline asm expressions.
 237   bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
 238                                     std::vector<SDValue> &OutOps) override;
 239
 240   // Form pairs of consecutive R, S, D, or Q registers.
 241   SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1);
 242   SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1);
 243   SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1);
 244   SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1);
 245
 246   // Form sequences of 4 consecutive S, D, or Q registers.
 247   SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
 248   SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
 249   SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
 250
 251   // Get the alignment operand for a NEON VLD or VST instruction.
 252   SDValue GetVLDSTAlign(SDValue Align, const SDLoc &dl, unsigned NumVecs,
 253                         bool is64BitVector);
 254
 255   /// Checks if N is a multiplication by a constant where we can extract out a
 256   /// power of two from the constant so that it can be used in a shift, but only
 257   /// if it simplifies the materialization of the constant. Returns true if it
 258   /// is, and assigns to PowerOfTwo the power of two that should be extracted
 259   /// out and to NewMulConst the new constant to be multiplied by.
 260   bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift,
 261                               unsigned &PowerOfTwo, SDValue &NewMulConst) const;
 262
 263   /// Replace N with M in CurDAG, in a way that also ensures that M gets
 264   /// selected when N would have been selected.
 265   void replaceDAGValue(const SDValue &N, SDValue M);
 266 };
 267 }
 268
 269 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant
 270 /// operand. If so Imm will receive the 32-bit value.
 271 static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
 272   if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
 273     Imm = cast<ConstantSDNode>(N)->getZExtValue();
 274     return true;
 275   }
 276   return false;
 277 }
 278
 279 // isInt32Immediate - This method tests to see if a constant operand.
 280 // If so Imm will receive the 32 bit value.
 281 static bool isInt32Immediate(SDValue N, unsigned &Imm) {
 282   return isInt32Immediate(N.getNode(), Imm);
 283 }
 284
 285 // isOpcWithIntImmediate - This method tests to see if the node is a specific
 286 // opcode and that it has a immediate integer right operand.
 287 // If so Imm will receive the 32 bit value.
 288 static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
 289   return N->getOpcode() == Opc &&
 290          isInt32Immediate(N->getOperand(1).getNode(), Imm);
 291 }
 292
 293 /// Check whether a particular node is a constant value representable as
 294 /// (N * Scale) where (N in [\p RangeMin, \p RangeMax).
 295 ///
 296 /// \param ScaledConstant [out] - On success, the pre-scaled constant value.
 297 static bool isScaledConstantInRange(SDValue Node, int Scale,
 298                                     int RangeMin, int RangeMax,
 299                                     int &ScaledConstant) {
 300   assert(Scale > 0 && "Invalid scale!");
 301
 302   // Check that this is a constant.
 303   const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node);
 304   if (!C)
 305     return false;
 306
 307   ScaledConstant = (int) C->getZExtValue();
 308   if ((ScaledConstant % Scale) != 0)
 309     return false;
 310
 311   ScaledConstant /= Scale;
 312   return ScaledConstant >= RangeMin && ScaledConstant < RangeMax;
 313 }
 314
 315 void ARMDAGToDAGISel::PreprocessISelDAG() {
 316   if (!Subtarget->hasV6T2Ops())
 317     return;
 318
 319   bool isThumb2 = Subtarget->isThumb();
 320   for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
 321        E = CurDAG->allnodes_end(); I != E; ) {
 322     SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues.
 323
 324     if (N->getOpcode() != ISD::ADD)
 325       continue;
 326
 327     // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with
 328     // leading zeros, followed by consecutive set bits, followed by 1 or 2
 329     // trailing zeros, e.g. 1020.
 330     // Transform the expression to
 331     // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number
 332     // of trailing zeros of c2. The left shift would be folded as an shifter
 333     // operand of 'add' and the 'and' and 'srl' would become a bits extraction
 334     // node (UBFX).
 335
 336     SDValue N0 = N->getOperand(0);
 337     SDValue N1 = N->getOperand(1);
 338     unsigned And_imm = 0;
 339     if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) {
 340       if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm))
 341         std::swap(N0, N1);
 342     }
 343     if (!And_imm)
 344       continue;
 345
 346     // Check if the AND mask is an immediate of the form: 000.....1111111100
 347     unsigned TZ = countTrailingZeros(And_imm);
 348     if (TZ != 1 && TZ != 2)
 349       // Be conservative here. Shifter operands aren't always free. e.g. On
 350       // Swift, left shifter operand of 1 / 2 for free but others are not.
 351       // e.g.
 352       //  ubfx   r3, r1, #16, #8
 353       //  ldr.w  r3, [r0, r3, lsl #2]
 354       // vs.
 355       //  mov.w  r9, #1020
 356       //  and.w  r2, r9, r1, lsr #14
 357       //  ldr    r2, [r0, r2]
 358       continue;
 359     And_imm >>= TZ;
 360     if (And_imm & (And_imm + 1))
 361       continue;
 362
 363     // Look for (and (srl X, c1), c2).
 364     SDValue Srl = N1.getOperand(0);
 365     unsigned Srl_imm = 0;
 366     if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) ||
 367         (Srl_imm <= 2))
 368       continue;
 369
 370     // Make sure first operand is not a shifter operand which would prevent
 371     // folding of the left shift.
 372     SDValue CPTmp0;
 373     SDValue CPTmp1;
 374     SDValue CPTmp2;
 375     if (isThumb2) {
 376       if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1))
 377         continue;
 378     } else {
 379       if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) ||
 380           SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2))
 381         continue;
 382     }
 383
 384     // Now make the transformation.
 385     Srl = CurDAG->getNode(ISD::SRL, SDLoc(Srl), MVT::i32,
 386                           Srl.getOperand(0),
 387                           CurDAG->getConstant(Srl_imm + TZ, SDLoc(Srl),
 388                                               MVT::i32));
 389     N1 = CurDAG->getNode(ISD::AND, SDLoc(N1), MVT::i32,
 390                          Srl,
 391                          CurDAG->getConstant(And_imm, SDLoc(Srl), MVT::i32));
 392     N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32,
 393                          N1, CurDAG->getConstant(TZ, SDLoc(Srl), MVT::i32));
 394     CurDAG->UpdateNodeOperands(N, N0, N1);
 395   }
 396 }
 397
 398 /// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS
 399 /// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at
 400 /// least on current ARM implementations) which should be avoidded.
 401 bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const {
 402   if (OptLevel == CodeGenOpt::None)
 403     return true;
 404
 405   if (!Subtarget->hasVMLxHazards())
 406     return true;
 407
 408   if (!N->hasOneUse())
 409     return false;
 410
 411   SDNode *Use = *N->use_begin();
 412   if (Use->getOpcode() == ISD::CopyToReg)
 413     return true;
 414   if (Use->isMachineOpcode()) {
 415     const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>(
 416         CurDAG->getSubtarget().getInstrInfo());
 417
 418     const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode());
 419     if (MCID.mayStore())
 420       return true;
 421     unsigned Opcode = MCID.getOpcode();
 422     if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
 423       return true;
 424     // vmlx feeding into another vmlx. We actually want to unfold
 425     // the use later in the MLxExpansion pass. e.g.
 426     // vmla
 427     // vmla (stall 8 cycles)
 428     //
 429     // vmul (5 cycles)
 430     // vadd (5 cycles)
 431     // vmla
 432     // This adds up to about 18 - 19 cycles.
 433     //
 434     // vmla
 435     // vmul (stall 4 cycles)
 436     // vadd adds up to about 14 cycles.
 437     return TII->isFpMLxInstruction(Opcode);
 438   }
 439
 440   return false;
 441 }
 442
 443 bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
 444                                             ARM_AM::ShiftOpc ShOpcVal,
 445                                             unsigned ShAmt) {
 446   if (!Subtarget->isLikeA9() && !Subtarget->isSwift())
 447     return true;
 448   if (Shift.hasOneUse())
 449     return true;
 450   // R << 2 is free.
 451   return ShOpcVal == ARM_AM::lsl &&
 452          (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1));
 453 }
 454
 455 bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N,
 456                                              unsigned MaxShift,
 457                                              unsigned &PowerOfTwo,
 458                                              SDValue &NewMulConst) const {
 459   assert(N.getOpcode() == ISD::MUL);
 460   assert(MaxShift > 0);
 461
 462   // If the multiply is used in more than one place then changing the constant
 463   // will make other uses incorrect, so don't.
 464   if (!N.hasOneUse()) return false;
 465   // Check if the multiply is by a constant
 466   ConstantSDNode *MulConst = dyn_cast<ConstantSDNode>(N.getOperand(1));
 467   if (!MulConst) return false;
 468   // If the constant is used in more than one place then modifying it will mean
 469   // we need to materialize two constants instead of one, which is a bad idea.
 470   if (!MulConst->hasOneUse()) return false;
 471   unsigned MulConstVal = MulConst->getZExtValue();
 472   if (MulConstVal == 0) return false;
 473
 474   // Find the largest power of 2 that MulConstVal is a multiple of
 475   PowerOfTwo = MaxShift;
 476   while ((MulConstVal % (1 << PowerOfTwo)) != 0) {
 477     --PowerOfTwo;
 478     if (PowerOfTwo == 0) return false;
 479   }
 480
 481   // Only optimise if the new cost is better
 482   unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo);
 483   NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc(N), MVT::i32);
 484   unsigned OldCost = ConstantMaterializationCost(MulConstVal, Subtarget);
 485   unsigned NewCost = ConstantMaterializationCost(NewMulConstVal, Subtarget);
 486   return NewCost < OldCost;
 487 }
 488
 489 void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) {
 490   CurDAG->RepositionNode(N.getNode()->getIterator(), M.getNode());
 491   ReplaceUses(N, M);
 492 }
 493
 494 bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
 495                                               SDValue &BaseReg,
 496                                               SDValue &Opc,
 497                                               bool CheckProfitability) {
 498   if (DisableShifterOp)
 499     return false;
 500
 501   // If N is a multiply-by-constant and it's profitable to extract a shift and
 502   // use it in a shifted operand do so.
 503   if (N.getOpcode() == ISD::MUL) {
 504     unsigned PowerOfTwo = 0;
 505     SDValue NewMulConst;
 506     if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) {
 507       HandleSDNode Handle(N);
 508       SDLoc Loc(N);
 509       replaceDAGValue(N.getOperand(1), NewMulConst);
 510       BaseReg = Handle.getValue();
 511       Opc = CurDAG->getTargetConstant(
 512           ARM_AM::getSORegOpc(ARM_AM::lsl, PowerOfTwo), Loc, MVT::i32);
 513       return true;
 514     }
 515   }
 516
 517   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
 518
 519   // Don't match base register only case. That is matched to a separate
 520   // lower complexity pattern with explicit register operand.
 521   if (ShOpcVal == ARM_AM::no_shift) return false;
 522
 523   BaseReg = N.getOperand(0);
 524   unsigned ShImmVal = 0;
 525   ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
 526   if (!RHS) return false;
 527   ShImmVal = RHS->getZExtValue() & 31;
 528   Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
 529                                   SDLoc(N), MVT::i32);
 530   return true;
 531 }
 532
 533 bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N,
 534                                               SDValue &BaseReg,
 535                                               SDValue &ShReg,
 536                                               SDValue &Opc,
 537                                               bool CheckProfitability) {
 538   if (DisableShifterOp)
 539     return false;
 540
 541   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
 542
 543   // Don't match base register only case. That is matched to a separate
 544   // lower complexity pattern with explicit register operand.
 545   if (ShOpcVal == ARM_AM::no_shift) return false;
 546
 547   BaseReg = N.getOperand(0);
 548   unsigned ShImmVal = 0;
 549   ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
 550   if (RHS) return false;
 551
 552   ShReg = N.getOperand(1);
 553   if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal))
 554     return false;
 555   Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
 556                                   SDLoc(N), MVT::i32);
 557   return true;
 558 }
 559
 560 // Determine whether an ISD::OR's operands are suitable to turn the operation
 561 // into an addition, which often has more compact encodings.
 562 bool ARMDAGToDAGISel::SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out) {
 563   assert(Parent->getOpcode() == ISD::OR && "unexpected parent");
 564   Out = N;
 565   return CurDAG->haveNoCommonBitsSet(N, Parent->getOperand(1));
 566 }
 567
 568
 569 bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
 570                                           SDValue &Base,
 571                                           SDValue &OffImm) {
 572   // Match simple R + imm12 operands.
 573
 574   // Base only.
 575   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
 576       !CurDAG->isBaseWithConstantOffset(N)) {
 577     if (N.getOpcode() == ISD::FrameIndex) {
 578       // Match frame index.
 579       int FI = cast<FrameIndexSDNode>(N)->getIndex();
 580       Base = CurDAG->getTargetFrameIndex(
 581           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 582       OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
 583       return true;
 584     }
 585
 586     if (N.getOpcode() == ARMISD::Wrapper &&
 587         N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
 588         N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
 589         N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
 590       Base = N.getOperand(0);
 591     } else
 592       Base = N;
 593     OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
 594     return true;
 595   }
 596
 597   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
 598     int RHSC = (int)RHS->getSExtValue();
 599     if (N.getOpcode() == ISD::SUB)
 600       RHSC = -RHSC;
 601
 602     if (RHSC > -0x1000 && RHSC < 0x1000) { // 12 bits
 603       Base   = N.getOperand(0);
 604       if (Base.getOpcode() == ISD::FrameIndex) {
 605         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
 606         Base = CurDAG->getTargetFrameIndex(
 607             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 608       }
 609       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
 610       return true;
 611     }
 612   }
 613
 614   // Base only.
 615   Base = N;
 616   OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
 617   return true;
 618 }
 619
 620
 621
 622 bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
 623                                       SDValue &Opc) {
 624   if (N.getOpcode() == ISD::MUL &&
 625       ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) {
 626     if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
 627       // X * [3,5,9] -> X + X * [2,4,8] etc.
 628       int RHSC = (int)RHS->getZExtValue();
 629       if (RHSC & 1) {
 630         RHSC = RHSC & ~1;
 631         ARM_AM::AddrOpc AddSub = ARM_AM::add;
 632         if (RHSC < 0) {
 633           AddSub = ARM_AM::sub;
 634           RHSC = - RHSC;
 635         }
 636         if (isPowerOf2_32(RHSC)) {
 637           unsigned ShAmt = Log2_32(RHSC);
 638           Base = Offset = N.getOperand(0);
 639           Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
 640                                                             ARM_AM::lsl),
 641                                           SDLoc(N), MVT::i32);
 642           return true;
 643         }
 644       }
 645     }
 646   }
 647
 648   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
 649       // ISD::OR that is equivalent to an ISD::ADD.
 650       !CurDAG->isBaseWithConstantOffset(N))
 651     return false;
 652
 653   // Leave simple R +/- imm12 operands for LDRi12
 654   if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
 655     int RHSC;
 656     if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
 657                                 -0x1000+1, 0x1000, RHSC)) // 12 bits.
 658       return false;
 659   }
 660
 661   // Otherwise this is R +/- [possibly shifted] R.
 662   ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add;
 663   ARM_AM::ShiftOpc ShOpcVal =
 664     ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
 665   unsigned ShAmt = 0;
 666
 667   Base   = N.getOperand(0);
 668   Offset = N.getOperand(1);
 669
 670   if (ShOpcVal != ARM_AM::no_shift) {
 671     // Check to see if the RHS of the shift is a constant, if not, we can't fold
 672     // it.
 673     if (ConstantSDNode *Sh =
 674            dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
 675       ShAmt = Sh->getZExtValue();
 676       if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
 677         Offset = N.getOperand(1).getOperand(0);
 678       else {
 679         ShAmt = 0;
 680         ShOpcVal = ARM_AM::no_shift;
 681       }
 682     } else {
 683       ShOpcVal = ARM_AM::no_shift;
 684     }
 685   }
 686
 687   // Try matching (R shl C) + (R).
 688   if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
 689       !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
 690         N.getOperand(0).hasOneUse())) {
 691     ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
 692     if (ShOpcVal != ARM_AM::no_shift) {
 693       // Check to see if the RHS of the shift is a constant, if not, we can't
 694       // fold it.
 695       if (ConstantSDNode *Sh =
 696           dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
 697         ShAmt = Sh->getZExtValue();
 698         if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
 699           Offset = N.getOperand(0).getOperand(0);
 700           Base = N.getOperand(1);
 701         } else {
 702           ShAmt = 0;
 703           ShOpcVal = ARM_AM::no_shift;
 704         }
 705       } else {
 706         ShOpcVal = ARM_AM::no_shift;
 707       }
 708     }
 709   }
 710
 711   // If Offset is a multiply-by-constant and it's profitable to extract a shift
 712   // and use it in a shifted operand do so.
 713   if (Offset.getOpcode() == ISD::MUL && N.hasOneUse()) {
 714     unsigned PowerOfTwo = 0;
 715     SDValue NewMulConst;
 716     if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) {
 717       HandleSDNode Handle(Offset);
 718       replaceDAGValue(Offset.getOperand(1), NewMulConst);
 719       Offset = Handle.getValue();
 720       ShAmt = PowerOfTwo;
 721       ShOpcVal = ARM_AM::lsl;
 722     }
 723   }
 724
 725   Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
 726                                   SDLoc(N), MVT::i32);
 727   return true;
 728 }
 729
 730 bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
 731                                             SDValue &Offset, SDValue &Opc) {
 732   unsigned Opcode = Op->getOpcode();
 733   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
 734     ? cast<LoadSDNode>(Op)->getAddressingMode()
 735     : cast<StoreSDNode>(Op)->getAddressingMode();
 736   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
 737     ? ARM_AM::add : ARM_AM::sub;
 738   int Val;
 739   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val))
 740     return false;
 741
 742   Offset = N;
 743   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
 744   unsigned ShAmt = 0;
 745   if (ShOpcVal != ARM_AM::no_shift) {
 746     // Check to see if the RHS of the shift is a constant, if not, we can't fold
 747     // it.
 748     if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
 749       ShAmt = Sh->getZExtValue();
 750       if (isShifterOpProfitable(N, ShOpcVal, ShAmt))
 751         Offset = N.getOperand(0);
 752       else {
 753         ShAmt = 0;
 754         ShOpcVal = ARM_AM::no_shift;
 755       }
 756     } else {
 757       ShOpcVal = ARM_AM::no_shift;
 758     }
 759   }
 760
 761   Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
 762                                   SDLoc(N), MVT::i32);
 763   return true;
 764 }
 765
 766 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
 767                                             SDValue &Offset, SDValue &Opc) {
 768   unsigned Opcode = Op->getOpcode();
 769   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
 770     ? cast<LoadSDNode>(Op)->getAddressingMode()
 771     : cast<StoreSDNode>(Op)->getAddressingMode();
 772   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
 773     ? ARM_AM::add : ARM_AM::sub;
 774   int Val;
 775   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
 776     if (AddSub == ARM_AM::sub) Val *= -1;
 777     Offset = CurDAG->getRegister(0, MVT::i32);
 778     Opc = CurDAG->getTargetConstant(Val, SDLoc(Op), MVT::i32);
 779     return true;
 780   }
 781
 782   return false;
 783 }
 784
 785
 786 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
 787                                             SDValue &Offset, SDValue &Opc) {
 788   unsigned Opcode = Op->getOpcode();
 789   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
 790     ? cast<LoadSDNode>(Op)->getAddressingMode()
 791     : cast<StoreSDNode>(Op)->getAddressingMode();
 792   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
 793     ? ARM_AM::add : ARM_AM::sub;
 794   int Val;
 795   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
 796     Offset = CurDAG->getRegister(0, MVT::i32);
 797     Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val,
 798                                                       ARM_AM::no_shift),
 799                                     SDLoc(Op), MVT::i32);
 800     return true;
 801   }
 802
 803   return false;
 804 }
 805
 806 bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) {
 807   Base = N;
 808   return true;
 809 }
 810
 811 bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
 812                                       SDValue &Base, SDValue &Offset,
 813                                       SDValue &Opc) {
 814   if (N.getOpcode() == ISD::SUB) {
 815     // X - C  is canonicalize to X + -C, no need to handle it here.
 816     Base = N.getOperand(0);
 817     Offset = N.getOperand(1);
 818     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0), SDLoc(N),
 819                                     MVT::i32);
 820     return true;
 821   }
 822
 823   if (!CurDAG->isBaseWithConstantOffset(N)) {
 824     Base = N;
 825     if (N.getOpcode() == ISD::FrameIndex) {
 826       int FI = cast<FrameIndexSDNode>(N)->getIndex();
 827       Base = CurDAG->getTargetFrameIndex(
 828           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 829     }
 830     Offset = CurDAG->getRegister(0, MVT::i32);
 831     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
 832                                     MVT::i32);
 833     return true;
 834   }
 835
 836   // If the RHS is +/- imm8, fold into addr mode.
 837   int RHSC;
 838   if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
 839                               -256 + 1, 256, RHSC)) { // 8 bits.
 840     Base = N.getOperand(0);
 841     if (Base.getOpcode() == ISD::FrameIndex) {
 842       int FI = cast<FrameIndexSDNode>(Base)->getIndex();
 843       Base = CurDAG->getTargetFrameIndex(
 844           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 845     }
 846     Offset = CurDAG->getRegister(0, MVT::i32);
 847
 848     ARM_AM::AddrOpc AddSub = ARM_AM::add;
 849     if (RHSC < 0) {
 850       AddSub = ARM_AM::sub;
 851       RHSC = -RHSC;
 852     }
 853     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC), SDLoc(N),
 854                                     MVT::i32);
 855     return true;
 856   }
 857
 858   Base = N.getOperand(0);
 859   Offset = N.getOperand(1);
 860   Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
 861                                   MVT::i32);
 862   return true;
 863 }
 864
 865 bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N,
 866                                             SDValue &Offset, SDValue &Opc) {
 867   unsigned Opcode = Op->getOpcode();
 868   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
 869     ? cast<LoadSDNode>(Op)->getAddressingMode()
 870     : cast<StoreSDNode>(Op)->getAddressingMode();
 871   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
 872     ? ARM_AM::add : ARM_AM::sub;
 873   int Val;
 874   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits.
 875     Offset = CurDAG->getRegister(0, MVT::i32);
 876     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), SDLoc(Op),
 877                                     MVT::i32);
 878     return true;
 879   }
 880
 881   Offset = N;
 882   Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), SDLoc(Op),
 883                                   MVT::i32);
 884   return true;
 885 }
 886
 887 bool ARMDAGToDAGISel::IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset,
 888                                         bool FP16) {
 889   if (!CurDAG->isBaseWithConstantOffset(N)) {
 890     Base = N;
 891     if (N.getOpcode() == ISD::FrameIndex) {
 892       int FI = cast<FrameIndexSDNode>(N)->getIndex();
 893       Base = CurDAG->getTargetFrameIndex(
 894           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 895     } else if (N.getOpcode() == ARMISD::Wrapper &&
 896                N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
 897                N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
 898                N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
 899       Base = N.getOperand(0);
 900     }
 901     Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
 902                                        SDLoc(N), MVT::i32);
 903     return true;
 904   }
 905
 906   // If the RHS is +/- imm8, fold into addr mode.
 907   int RHSC;
 908   const int Scale = FP16 ? 2 : 4;
 909
 910   if (isScaledConstantInRange(N.getOperand(1), Scale, -255, 256, RHSC)) {
 911     Base = N.getOperand(0);
 912     if (Base.getOpcode() == ISD::FrameIndex) {
 913       int FI = cast<FrameIndexSDNode>(Base)->getIndex();
 914       Base = CurDAG->getTargetFrameIndex(
 915           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 916     }
 917
 918     ARM_AM::AddrOpc AddSub = ARM_AM::add;
 919     if (RHSC < 0) {
 920       AddSub = ARM_AM::sub;
 921       RHSC = -RHSC;
 922     }
 923
 924     if (FP16)
 925       Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(AddSub, RHSC),
 926                                          SDLoc(N), MVT::i32);
 927     else
 928       Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC),
 929                                          SDLoc(N), MVT::i32);
 930
 931     return true;
 932   }
 933
 934   Base = N;
 935
 936   if (FP16)
 937     Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(ARM_AM::add, 0),
 938                                        SDLoc(N), MVT::i32);
 939   else
 940     Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
 941                                        SDLoc(N), MVT::i32);
 942
 943   return true;
 944 }
 945
 946 bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
 947                                       SDValue &Base, SDValue &Offset) {
 948   return IsAddressingMode5(N, Base, Offset, /*FP16=*/ false);
 949 }
 950
 951 bool ARMDAGToDAGISel::SelectAddrMode5FP16(SDValue N,
 952                                           SDValue &Base, SDValue &Offset) {
 953   return IsAddressingMode5(N, Base, Offset, /*FP16=*/ true);
 954 }
 955
 956 bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
 957                                       SDValue &Align) {
 958   Addr = N;
 959
 960   unsigned Alignment = 0;
 961
 962   MemSDNode *MemN = cast<MemSDNode>(Parent);
 963
 964   if (isa<LSBaseSDNode>(MemN) ||
 965       ((MemN->getOpcode() == ARMISD::VST1_UPD ||
 966         MemN->getOpcode() == ARMISD::VLD1_UPD) &&
 967        MemN->getConstantOperandVal(MemN->getNumOperands() - 1) == 1)) {
 968     // This case occurs only for VLD1-lane/dup and VST1-lane instructions.
 969     // The maximum alignment is equal to the memory size being referenced.
 970     unsigned MMOAlign = MemN->getAlignment();
 971     unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8;
 972     if (MMOAlign >= MemSize && MemSize > 1)
 973       Alignment = MemSize;
 974   } else {
 975     // All other uses of addrmode6 are for intrinsics.  For now just record
 976     // the raw alignment value; it will be refined later based on the legal
 977     // alignment operands for the intrinsic.
 978     Alignment = MemN->getAlignment();
 979   }
 980
 981   Align = CurDAG->getTargetConstant(Alignment, SDLoc(N), MVT::i32);
 982   return true;
 983 }
 984
 985 bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N,
 986                                             SDValue &Offset) {
 987   LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op);
 988   ISD::MemIndexedMode AM = LdSt->getAddressingMode();
 989   if (AM != ISD::POST_INC)
 990     return false;
 991   Offset = N;
 992   if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) {
 993     if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits())
 994       Offset = CurDAG->getRegister(0, MVT::i32);
 995   }
 996   return true;
 997 }
 998
 999 bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N,
1000                                        SDValue &Offset, SDValue &Label) {
1001   if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
1002     Offset = N.getOperand(0);
1003     SDValue N1 = N.getOperand(1);
1004     Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
1005                                       SDLoc(N), MVT::i32);
1006     return true;
1007   }
1008
1009   return false;
1010 }
1011
1012
1013 //===----------------------------------------------------------------------===//
1014 //                         Thumb Addressing Modes
1015 //===----------------------------------------------------------------------===//
1016
1017 static bool shouldUseZeroOffsetLdSt(SDValue N) {
1018   // Negative numbers are difficult to materialise in thumb1. If we are
1019   // selecting the add of a negative, instead try to select ri with a zero
1020   // offset, so create the add node directly which will become a sub.
1021   if (N.getOpcode() != ISD::ADD)
1022     return false;
1023
1024   // Look for an imm which is not legal for ld/st, but is legal for sub.
1025   if (auto C = dyn_cast<ConstantSDNode>(N.getOperand(1)))
1026     return C->getSExtValue() < 0 && C->getSExtValue() >= -255;
1027
1028   return false;
1029 }
1030
1031 bool ARMDAGToDAGISel::SelectThumbAddrModeRRSext(SDValue N, SDValue &Base,
1032                                                 SDValue &Offset) {
1033   if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) {
1034     ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N);
1035     if (!NC || !NC->isNullValue())
1036       return false;
1037
1038     Base = Offset = N;
1039     return true;
1040   }
1041
1042   Base = N.getOperand(0);
1043   Offset = N.getOperand(1);
1044   return true;
1045 }
1046
1047 bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N, SDValue &Base,
1048                                             SDValue &Offset) {
1049   if (shouldUseZeroOffsetLdSt(N))
1050     return false; // Select ri instead
1051   return SelectThumbAddrModeRRSext(N, Base, Offset);
1052 }
1053
1054 bool
1055 ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
1056                                           SDValue &Base, SDValue &OffImm) {
1057   if (shouldUseZeroOffsetLdSt(N)) {
1058     Base = N;
1059     OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1060     return true;
1061   }
1062
1063   if (!CurDAG->isBaseWithConstantOffset(N)) {
1064     if (N.getOpcode() == ISD::ADD) {
1065       return false; // We want to select register offset instead
1066     } else if (N.getOpcode() == ARMISD::Wrapper &&
1067         N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1068         N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1069         N.getOperand(0).getOpcode() != ISD::TargetConstantPool &&
1070         N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1071       Base = N.getOperand(0);
1072     } else {
1073       Base = N;
1074     }
1075
1076     OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1077     return true;
1078   }
1079
1080   // If the RHS is + imm5 * scale, fold into addr mode.
1081   int RHSC;
1082   if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) {
1083     Base = N.getOperand(0);
1084     OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1085     return true;
1086   }
1087
1088   // Offset is too large, so use register offset instead.
1089   return false;
1090 }
1091
1092 bool
1093 ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
1094                                            SDValue &OffImm) {
1095   return SelectThumbAddrModeImm5S(N, 4, Base, OffImm);
1096 }
1097
1098 bool
1099 ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
1100                                            SDValue &OffImm) {
1101   return SelectThumbAddrModeImm5S(N, 2, Base, OffImm);
1102 }
1103
1104 bool
1105 ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
1106                                            SDValue &OffImm) {
1107   return SelectThumbAddrModeImm5S(N, 1, Base, OffImm);
1108 }
1109
1110 bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
1111                                             SDValue &Base, SDValue &OffImm) {
1112   if (N.getOpcode() == ISD::FrameIndex) {
1113     int FI = cast<FrameIndexSDNode>(N)->getIndex();
1114     // Only multiples of 4 are allowed for the offset, so the frame object
1115     // alignment must be at least 4.
1116     MachineFrameInfo &MFI = MF->getFrameInfo();
1117     if (MFI.getObjectAlignment(FI) < 4)
1118       MFI.setObjectAlignment(FI, 4);
1119     Base = CurDAG->getTargetFrameIndex(
1120         FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1121     OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1122     return true;
1123   }
1124
1125   if (!CurDAG->isBaseWithConstantOffset(N))
1126     return false;
1127
1128   if (N.getOperand(0).getOpcode() == ISD::FrameIndex) {
1129     // If the RHS is + imm8 * scale, fold into addr mode.
1130     int RHSC;
1131     if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) {
1132       Base = N.getOperand(0);
1133       int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1134       // Make sure the offset is inside the object, or we might fail to
1135       // allocate an emergency spill slot. (An out-of-range access is UB, but
1136       // it could show up anyway.)
1137       MachineFrameInfo &MFI = MF->getFrameInfo();
1138       if (RHSC * 4 < MFI.getObjectSize(FI)) {
1139         // For LHS+RHS to result in an offset that's a multiple of 4 the object
1140         // indexed by the LHS must be 4-byte aligned.
1141         if (!MFI.isFixedObjectIndex(FI) && MFI.getObjectAlignment(FI) < 4)
1142           MFI.setObjectAlignment(FI, 4);
1143         if (MFI.getObjectAlignment(FI) >= 4) {
1144           Base = CurDAG->getTargetFrameIndex(
1145               FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1146           OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1147           return true;
1148         }
1149       }
1150     }
1151   }
1152
1153   return false;
1154 }
1155
1156 template <unsigned Shift>
1157 bool ARMDAGToDAGISel::SelectTAddrModeImm7(SDValue N, SDValue &Base,
1158                                           SDValue &OffImm) {
1159   if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) {
1160     int RHSC;
1161     if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -0x7f, 0x80,
1162                                 RHSC)) {
1163       Base = N.getOperand(0);
1164       if (N.getOpcode() == ISD::SUB)
1165         RHSC = -RHSC;
1166       OffImm =
1167           CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32);
1168       return true;
1169     }
1170   }
1171
1172   // Base only.
1173   Base = N;
1174   OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1175   return true;
1176 }
1177
1178
1179 //===----------------------------------------------------------------------===//
1180 //                        Thumb 2 Addressing Modes
1181 //===----------------------------------------------------------------------===//
1182
1183
1184 bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
1185                                             SDValue &Base, SDValue &OffImm) {
1186   // Match simple R + imm12 operands.
1187
1188   // Base only.
1189   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1190       !CurDAG->isBaseWithConstantOffset(N)) {
1191     if (N.getOpcode() == ISD::FrameIndex) {
1192       // Match frame index.
1193       int FI = cast<FrameIndexSDNode>(N)->getIndex();
1194       Base = CurDAG->getTargetFrameIndex(
1195           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1196       OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1197       return true;
1198     }
1199
1200     if (N.getOpcode() == ARMISD::Wrapper &&
1201         N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1202         N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1203         N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1204       Base = N.getOperand(0);
1205       if (Base.getOpcode() == ISD::TargetConstantPool)
1206         return false;  // We want to select t2LDRpci instead.
1207     } else
1208       Base = N;
1209     OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1210     return true;
1211   }
1212
1213   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1214     if (SelectT2AddrModeImm8(N, Base, OffImm))
1215       // Let t2LDRi8 handle (R - imm8).
1216       return false;
1217
1218     int RHSC = (int)RHS->getZExtValue();
1219     if (N.getOpcode() == ISD::SUB)
1220       RHSC = -RHSC;
1221
1222     if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned)
1223       Base   = N.getOperand(0);
1224       if (Base.getOpcode() == ISD::FrameIndex) {
1225         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1226         Base = CurDAG->getTargetFrameIndex(
1227             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1228       }
1229       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1230       return true;
1231     }
1232   }
1233
1234   // Base only.
1235   Base = N;
1236   OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1237   return true;
1238 }
1239
1240 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
1241                                            SDValue &Base, SDValue &OffImm) {
1242   // Match simple R - imm8 operands.
1243   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1244       !CurDAG->isBaseWithConstantOffset(N))
1245     return false;
1246
1247   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1248     int RHSC = (int)RHS->getSExtValue();
1249     if (N.getOpcode() == ISD::SUB)
1250       RHSC = -RHSC;
1251
1252     if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative)
1253       Base = N.getOperand(0);
1254       if (Base.getOpcode() == ISD::FrameIndex) {
1255         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1256         Base = CurDAG->getTargetFrameIndex(
1257             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1258       }
1259       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1260       return true;
1261     }
1262   }
1263
1264   return false;
1265 }
1266
1267 bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
1268                                                  SDValue &OffImm){
1269   unsigned Opcode = Op->getOpcode();
1270   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
1271     ? cast<LoadSDNode>(Op)->getAddressingMode()
1272     : cast<StoreSDNode>(Op)->getAddressingMode();
1273   int RHSC;
1274   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits.
1275     OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
1276       ? CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32)
1277       : CurDAG->getTargetConstant(-RHSC, SDLoc(N), MVT::i32);
1278     return true;
1279   }
1280
1281   return false;
1282 }
1283
1284 template <unsigned Shift>
1285 bool ARMDAGToDAGISel::SelectT2AddrModeImm7(SDValue N, SDValue &Base,
1286                                            SDValue &OffImm) {
1287   if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) {
1288     int RHSC;
1289     if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -0x7f, 0x80,
1290                                 RHSC)) {
1291       Base = N.getOperand(0);
1292       if (Base.getOpcode() == ISD::FrameIndex) {
1293         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1294         Base = CurDAG->getTargetFrameIndex(
1295             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1296       }
1297
1298       if (N.getOpcode() == ISD::SUB)
1299         RHSC = -RHSC;
1300       OffImm =
1301           CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32);
1302       return true;
1303     }
1304   }
1305
1306   // Base only.
1307   Base = N;
1308   OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1309   return true;
1310 }
1311
1312 template <unsigned Shift>
1313 bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N,
1314                                                  SDValue &OffImm) {
1315   return SelectT2AddrModeImm7Offset(Op, N, OffImm, Shift);
1316 }
1317
1318 bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N,
1319                                                  SDValue &OffImm,
1320                                                  unsigned Shift) {
1321   unsigned Opcode = Op->getOpcode();
1322   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
1323                                ? cast<LoadSDNode>(Op)->getAddressingMode()
1324                                : cast<StoreSDNode>(Op)->getAddressingMode();
1325   int RHSC;
1326   if (isScaledConstantInRange(N, 1 << Shift, 0, 0x80, RHSC)) { // 7 bits.
1327     OffImm =
1328         ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
1329             ? CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32)
1330             : CurDAG->getTargetConstant(-RHSC * (1 << Shift), SDLoc(N),
1331                                         MVT::i32);
1332     return true;
1333   }
1334   return false;
1335 }
1336
1337 bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
1338                                             SDValue &Base,
1339                                             SDValue &OffReg, SDValue &ShImm) {
1340   // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12.
1341   if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N))
1342     return false;
1343
1344   // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8.
1345   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1346     int RHSC = (int)RHS->getZExtValue();
1347     if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned)
1348       return false;
1349     else if (RHSC < 0 && RHSC >= -255) // 8 bits
1350       return false;
1351   }
1352
1353   // Look for (R + R) or (R + (R << [1,2,3])).
1354   unsigned ShAmt = 0;
1355   Base   = N.getOperand(0);
1356   OffReg = N.getOperand(1);
1357
1358   // Swap if it is ((R << c) + R).
1359   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg.getOpcode());
1360   if (ShOpcVal != ARM_AM::lsl) {
1361     ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode());
1362     if (ShOpcVal == ARM_AM::lsl)
1363       std::swap(Base, OffReg);
1364   }
1365
1366   if (ShOpcVal == ARM_AM::lsl) {
1367     // Check to see if the RHS of the shift is a constant, if not, we can't fold
1368     // it.
1369     if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) {
1370       ShAmt = Sh->getZExtValue();
1371       if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt))
1372         OffReg = OffReg.getOperand(0);
1373       else {
1374         ShAmt = 0;
1375       }
1376     }
1377   }
1378
1379   // If OffReg is a multiply-by-constant and it's profitable to extract a shift
1380   // and use it in a shifted operand do so.
1381   if (OffReg.getOpcode() == ISD::MUL && N.hasOneUse()) {
1382     unsigned PowerOfTwo = 0;
1383     SDValue NewMulConst;
1384     if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) {
1385       HandleSDNode Handle(OffReg);
1386       replaceDAGValue(OffReg.getOperand(1), NewMulConst);
1387       OffReg = Handle.getValue();
1388       ShAmt = PowerOfTwo;
1389     }
1390   }
1391
1392   ShImm = CurDAG->getTargetConstant(ShAmt, SDLoc(N), MVT::i32);
1393
1394   return true;
1395 }
1396
1397 bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base,
1398                                                 SDValue &OffImm) {
1399   // This *must* succeed since it's used for the irreplaceable ldrex and strex
1400   // instructions.
1401   Base = N;
1402   OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1403
1404   if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N))
1405     return true;
1406
1407   ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
1408   if (!RHS)
1409     return true;
1410
1411   uint32_t RHSC = (int)RHS->getZExtValue();
1412   if (RHSC > 1020 || RHSC % 4 != 0)
1413     return true;
1414
1415   Base = N.getOperand(0);
1416   if (Base.getOpcode() == ISD::FrameIndex) {
1417     int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1418     Base = CurDAG->getTargetFrameIndex(
1419         FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1420   }
1421
1422   OffImm = CurDAG->getTargetConstant(RHSC/4, SDLoc(N), MVT::i32);
1423   return true;
1424 }
1425
1426 //===--------------------------------------------------------------------===//
1427
1428 /// getAL - Returns a ARMCC::AL immediate node.
1429 static inline SDValue getAL(SelectionDAG *CurDAG, const SDLoc &dl) {
1430   return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32);
1431 }
1432
1433 void ARMDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
1434   MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
1435   CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
1436 }
1437
1438 bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) {
1439   LoadSDNode *LD = cast<LoadSDNode>(N);
1440   ISD::MemIndexedMode AM = LD->getAddressingMode();
1441   if (AM == ISD::UNINDEXED)
1442     return false;
1443
1444   EVT LoadedVT = LD->getMemoryVT();
1445   SDValue Offset, AMOpc;
1446   bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1447   unsigned Opcode = 0;
1448   bool Match = false;
1449   if (LoadedVT == MVT::i32 && isPre &&
1450       SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1451     Opcode = ARM::LDR_PRE_IMM;
1452     Match = true;
1453   } else if (LoadedVT == MVT::i32 && !isPre &&
1454       SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1455     Opcode = ARM::LDR_POST_IMM;
1456     Match = true;
1457   } else if (LoadedVT == MVT::i32 &&
1458       SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1459     Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG;
1460     Match = true;
1461
1462   } else if (LoadedVT == MVT::i16 &&
1463              SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1464     Match = true;
1465     Opcode = (LD->getExtensionType() == ISD::SEXTLOAD)
1466       ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST)
1467       : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST);
1468   } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) {
1469     if (LD->getExtensionType() == ISD::SEXTLOAD) {
1470       if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1471         Match = true;
1472         Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST;
1473       }
1474     } else {
1475       if (isPre &&
1476           SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1477         Match = true;
1478         Opcode = ARM::LDRB_PRE_IMM;
1479       } else if (!isPre &&
1480                   SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1481         Match = true;
1482         Opcode = ARM::LDRB_POST_IMM;
1483       } else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1484         Match = true;
1485         Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG;
1486       }
1487     }
1488   }
1489
1490   if (Match) {
1491     if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) {
1492       SDValue Chain = LD->getChain();
1493       SDValue Base = LD->getBasePtr();
1494       SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, SDLoc(N)),
1495                        CurDAG->getRegister(0, MVT::i32), Chain };
1496       SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1497                                            MVT::Other, Ops);
1498       transferMemOperands(N, New);
1499       ReplaceNode(N, New);
1500       return true;
1501     } else {
1502       SDValue Chain = LD->getChain();
1503       SDValue Base = LD->getBasePtr();
1504       SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, SDLoc(N)),
1505                        CurDAG->getRegister(0, MVT::i32), Chain };
1506       SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1507                                            MVT::Other, Ops);
1508       transferMemOperands(N, New);
1509       ReplaceNode(N, New);
1510       return true;
1511     }
1512   }
1513
1514   return false;
1515 }
1516
1517 bool ARMDAGToDAGISel::tryT1IndexedLoad(SDNode *N) {
1518   LoadSDNode *LD = cast<LoadSDNode>(N);
1519   EVT LoadedVT = LD->getMemoryVT();
1520   ISD::MemIndexedMode AM = LD->getAddressingMode();
1521   if (AM != ISD::POST_INC || LD->getExtensionType() != ISD::NON_EXTLOAD ||
1522       LoadedVT.getSimpleVT().SimpleTy != MVT::i32)
1523     return false;
1524
1525   auto *COffs = dyn_cast<ConstantSDNode>(LD->getOffset());
1526   if (!COffs || COffs->getZExtValue() != 4)
1527     return false;
1528
1529   // A T1 post-indexed load is just a single register LDM: LDM r0!, {r1}.
1530   // The encoding of LDM is not how the rest of ISel expects a post-inc load to
1531   // look however, so we use a pseudo here and switch it for a tLDMIA_UPD after
1532   // ISel.
1533   SDValue Chain = LD->getChain();
1534   SDValue Base = LD->getBasePtr();
1535   SDValue Ops[]= { Base, getAL(CurDAG, SDLoc(N)),
1536                    CurDAG->getRegister(0, MVT::i32), Chain };
1537   SDNode *New = CurDAG->getMachineNode(ARM::tLDR_postidx, SDLoc(N), MVT::i32,
1538                                        MVT::i32, MVT::Other, Ops);
1539   transferMemOperands(N, New);
1540   ReplaceNode(N, New);
1541   return true;
1542 }
1543
1544 bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) {
1545   LoadSDNode *LD = cast<LoadSDNode>(N);
1546   ISD::MemIndexedMode AM = LD->getAddressingMode();
1547   if (AM == ISD::UNINDEXED)
1548     return false;
1549
1550   EVT LoadedVT = LD->getMemoryVT();
1551   bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1552   SDValue Offset;
1553   bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1554   unsigned Opcode = 0;
1555   bool Match = false;
1556   if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) {
1557     switch (LoadedVT.getSimpleVT().SimpleTy) {
1558     case MVT::i32:
1559       Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST;
1560       break;
1561     case MVT::i16:
1562       if (isSExtLd)
1563         Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST;
1564       else
1565         Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST;
1566       break;
1567     case MVT::i8:
1568     case MVT::i1:
1569       if (isSExtLd)
1570         Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST;
1571       else
1572         Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST;
1573       break;
1574     default:
1575       return false;
1576     }
1577     Match = true;
1578   }
1579
1580   if (Match) {
1581     SDValue Chain = LD->getChain();
1582     SDValue Base = LD->getBasePtr();
1583     SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)),
1584                      CurDAG->getRegister(0, MVT::i32), Chain };
1585     SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1586                                          MVT::Other, Ops);
1587     transferMemOperands(N, New);
1588     ReplaceNode(N, New);
1589     return true;
1590   }
1591
1592   return false;
1593 }
1594
1595 bool ARMDAGToDAGISel::tryMVEIndexedLoad(SDNode *N) {
1596   LoadSDNode *LD = cast<LoadSDNode>(N);
1597   ISD::MemIndexedMode AM = LD->getAddressingMode();
1598   if (AM == ISD::UNINDEXED)
1599     return false;
1600   EVT LoadedVT = LD->getMemoryVT();
1601   if (!LoadedVT.isVector())
1602     return false;
1603   bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1604   SDValue Offset;
1605   bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1606   unsigned Opcode = 0;
1607   unsigned Align = LD->getAlignment();
1608   bool IsLE = Subtarget->isLittle();
1609
1610   if (Align >= 2 && LoadedVT == MVT::v4i16 &&
1611       SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 1)) {
1612     if (isSExtLd)
1613       Opcode = isPre ? ARM::MVE_VLDRHS32_pre : ARM::MVE_VLDRHS32_post;
1614     else
1615       Opcode = isPre ? ARM::MVE_VLDRHU32_pre : ARM::MVE_VLDRHU32_post;
1616   } else if (LoadedVT == MVT::v8i8 &&
1617              SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 0)) {
1618     if (isSExtLd)
1619       Opcode = isPre ? ARM::MVE_VLDRBS16_pre : ARM::MVE_VLDRBS16_post;
1620     else
1621       Opcode = isPre ? ARM::MVE_VLDRBU16_pre : ARM::MVE_VLDRBU16_post;
1622   } else if (LoadedVT == MVT::v4i8 &&
1623              SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 0)) {
1624     if (isSExtLd)
1625       Opcode = isPre ? ARM::MVE_VLDRBS32_pre : ARM::MVE_VLDRBS32_post;
1626     else
1627       Opcode = isPre ? ARM::MVE_VLDRBU32_pre : ARM::MVE_VLDRBU32_post;
1628   } else if (Align >= 4 &&
1629              (IsLE || LoadedVT == MVT::v4i32 || LoadedVT == MVT::v4f32) &&
1630              SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 2))
1631     Opcode = isPre ? ARM::MVE_VLDRWU32_pre : ARM::MVE_VLDRWU32_post;
1632   else if (Align >= 2 &&
1633            (IsLE || LoadedVT == MVT::v8i16 || LoadedVT == MVT::v8f16) &&
1634            SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 1))
1635     Opcode = isPre ? ARM::MVE_VLDRHU16_pre : ARM::MVE_VLDRHU16_post;
1636   else if ((IsLE || LoadedVT == MVT::v16i8) &&
1637            SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 0))
1638     Opcode = isPre ? ARM::MVE_VLDRBU8_pre : ARM::MVE_VLDRBU8_post;
1639   else
1640     return false;
1641
1642   SDValue Chain = LD->getChain();
1643   SDValue Base = LD->getBasePtr();
1644   SDValue Ops[] = {Base, Offset,
1645                    CurDAG->getTargetConstant(ARMVCC::None, SDLoc(N), MVT::i32),
1646                    CurDAG->getRegister(0, MVT::i32), Chain};
1647   SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), LD->getValueType(0),
1648                                        MVT::i32, MVT::Other, Ops);
1649   transferMemOperands(N, New);
1650   ReplaceUses(SDValue(N, 0), SDValue(New, 1));
1651   ReplaceUses(SDValue(N, 1), SDValue(New, 0));
1652   ReplaceUses(SDValue(N, 2), SDValue(New, 2));
1653   CurDAG->RemoveDeadNode(N);
1654   return true;
1655 }
1656
1657 /// Form a GPRPair pseudo register from a pair of GPR regs.
1658 SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) {
1659   SDLoc dl(V0.getNode());
1660   SDValue RegClass =
1661     CurDAG->getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
1662   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
1663   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
1664   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1665   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1666 }
1667
1668 /// Form a D register from a pair of S registers.
1669 SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1670   SDLoc dl(V0.getNode());
1671   SDValue RegClass =
1672     CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, dl, MVT::i32);
1673   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1674   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1675   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1676   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1677 }
1678
1679 /// Form a quad register from a pair of D registers.
1680 SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1681   SDLoc dl(V0.getNode());
1682   SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, dl,
1683                                                MVT::i32);
1684   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1685   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1686   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1687   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1688 }
1689
1690 /// Form 4 consecutive D registers from a pair of Q registers.
1691 SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1692   SDLoc dl(V0.getNode());
1693   SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1694                                                MVT::i32);
1695   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1696   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1697   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1698   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1699 }
1700
1701 /// Form 4 consecutive S registers.
1702 SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1,
1703                                    SDValue V2, SDValue V3) {
1704   SDLoc dl(V0.getNode());
1705   SDValue RegClass =
1706     CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, dl, MVT::i32);
1707   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1708   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1709   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, dl, MVT::i32);
1710   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, dl, MVT::i32);
1711   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1712                                     V2, SubReg2, V3, SubReg3 };
1713   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1714 }
1715
1716 /// Form 4 consecutive D registers.
1717 SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1,
1718                                    SDValue V2, SDValue V3) {
1719   SDLoc dl(V0.getNode());
1720   SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1721                                                MVT::i32);
1722   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1723   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1724   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, dl, MVT::i32);
1725   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, dl, MVT::i32);
1726   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1727                                     V2, SubReg2, V3, SubReg3 };
1728   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1729 }
1730
1731 /// Form 4 consecutive Q registers.
1732 SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1,
1733                                    SDValue V2, SDValue V3) {
1734   SDLoc dl(V0.getNode());
1735   SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, dl,
1736                                                MVT::i32);
1737   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1738   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1739   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, dl, MVT::i32);
1740   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, dl, MVT::i32);
1741   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1742                                     V2, SubReg2, V3, SubReg3 };
1743   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1744 }
1745
1746 /// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
1747 /// of a NEON VLD or VST instruction.  The supported values depend on the
1748 /// number of registers being loaded.
1749 SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, const SDLoc &dl,
1750                                        unsigned NumVecs, bool is64BitVector) {
1751   unsigned NumRegs = NumVecs;
1752   if (!is64BitVector && NumVecs < 3)
1753     NumRegs *= 2;
1754
1755   unsigned Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
1756   if (Alignment >= 32 && NumRegs == 4)
1757     Alignment = 32;
1758   else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4))
1759     Alignment = 16;
1760   else if (Alignment >= 8)
1761     Alignment = 8;
1762   else
1763     Alignment = 0;
1764
1765   return CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
1766 }
1767
1768 static bool isVLDfixed(unsigned Opc)
1769 {
1770   switch (Opc) {
1771   default: return false;
1772   case ARM::VLD1d8wb_fixed : return true;
1773   case ARM::VLD1d16wb_fixed : return true;
1774   case ARM::VLD1d64Qwb_fixed : return true;
1775   case ARM::VLD1d32wb_fixed : return true;
1776   case ARM::VLD1d64wb_fixed : return true;
1777   case ARM::VLD1d64TPseudoWB_fixed : return true;
1778   case ARM::VLD1d64QPseudoWB_fixed : return true;
1779   case ARM::VLD1q8wb_fixed : return true;
1780   case ARM::VLD1q16wb_fixed : return true;
1781   case ARM::VLD1q32wb_fixed : return true;
1782   case ARM::VLD1q64wb_fixed : return true;
1783   case ARM::VLD1DUPd8wb_fixed : return true;
1784   case ARM::VLD1DUPd16wb_fixed : return true;
1785   case ARM::VLD1DUPd32wb_fixed : return true;
1786   case ARM::VLD1DUPq8wb_fixed : return true;
1787   case ARM::VLD1DUPq16wb_fixed : return true;
1788   case ARM::VLD1DUPq32wb_fixed : return true;
1789   case ARM::VLD2d8wb_fixed : return true;
1790   case ARM::VLD2d16wb_fixed : return true;
1791   case ARM::VLD2d32wb_fixed : return true;
1792   case ARM::VLD2q8PseudoWB_fixed : return true;
1793   case ARM::VLD2q16PseudoWB_fixed : return true;
1794   case ARM::VLD2q32PseudoWB_fixed : return true;
1795   case ARM::VLD2DUPd8wb_fixed : return true;
1796   case ARM::VLD2DUPd16wb_fixed : return true;
1797   case ARM::VLD2DUPd32wb_fixed : return true;
1798   }
1799 }
1800
1801 static bool isVSTfixed(unsigned Opc)
1802 {
1803   switch (Opc) {
1804   default: return false;
1805   case ARM::VST1d8wb_fixed : return true;
1806   case ARM::VST1d16wb_fixed : return true;
1807   case ARM::VST1d32wb_fixed : return true;
1808   case ARM::VST1d64wb_fixed : return true;
1809   case ARM::VST1q8wb_fixed : return true;
1810   case ARM::VST1q16wb_fixed : return true;
1811   case ARM::VST1q32wb_fixed : return true;
1812   case ARM::VST1q64wb_fixed : return true;
1813   case ARM::VST1d64TPseudoWB_fixed : return true;
1814   case ARM::VST1d64QPseudoWB_fixed : return true;
1815   case ARM::VST2d8wb_fixed : return true;
1816   case ARM::VST2d16wb_fixed : return true;
1817   case ARM::VST2d32wb_fixed : return true;
1818   case ARM::VST2q8PseudoWB_fixed : return true;
1819   case ARM::VST2q16PseudoWB_fixed : return true;
1820   case ARM::VST2q32PseudoWB_fixed : return true;
1821   }
1822 }
1823
1824 // Get the register stride update opcode of a VLD/VST instruction that
1825 // is otherwise equivalent to the given fixed stride updating instruction.
1826 static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
1827   assert((isVLDfixed(Opc) || isVSTfixed(Opc))
1828     && "Incorrect fixed stride updating instruction.");
1829   switch (Opc) {
1830   default: break;
1831   case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register;
1832   case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register;
1833   case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register;
1834   case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register;
1835   case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register;
1836   case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register;
1837   case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register;
1838   case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register;
1839   case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register;
1840   case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register;
1841   case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register;
1842   case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register;
1843   case ARM::VLD1DUPd8wb_fixed : return ARM::VLD1DUPd8wb_register;
1844   case ARM::VLD1DUPd16wb_fixed : return ARM::VLD1DUPd16wb_register;
1845   case ARM::VLD1DUPd32wb_fixed : return ARM::VLD1DUPd32wb_register;
1846   case ARM::VLD1DUPq8wb_fixed : return ARM::VLD1DUPq8wb_register;
1847   case ARM::VLD1DUPq16wb_fixed : return ARM::VLD1DUPq16wb_register;
1848   case ARM::VLD1DUPq32wb_fixed : return ARM::VLD1DUPq32wb_register;
1849
1850   case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register;
1851   case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register;
1852   case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register;
1853   case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register;
1854   case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register;
1855   case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register;
1856   case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register;
1857   case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register;
1858   case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register;
1859   case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register;
1860
1861   case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register;
1862   case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register;
1863   case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register;
1864   case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register;
1865   case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register;
1866   case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register;
1867
1868   case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register;
1869   case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register;
1870   case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register;
1871   case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register;
1872   case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register;
1873   case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register;
1874
1875   case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register;
1876   case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register;
1877   case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register;
1878   }
1879   return Opc; // If not one we handle, return it unchanged.
1880 }
1881
1882 /// Returns true if the given increment is a Constant known to be equal to the
1883 /// access size performed by a NEON load/store. This means the "[rN]!" form can
1884 /// be used.
1885 static bool isPerfectIncrement(SDValue Inc, EVT VecTy, unsigned NumVecs) {
1886   auto C = dyn_cast<ConstantSDNode>(Inc);
1887   return C && C->getZExtValue() == VecTy.getSizeInBits() / 8 * NumVecs;
1888 }
1889
1890 void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
1891                                 const uint16_t *DOpcodes,
1892                                 const uint16_t *QOpcodes0,
1893                                 const uint16_t *QOpcodes1) {
1894   assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
1895   SDLoc dl(N);
1896
1897   SDValue MemAddr, Align;
1898   bool IsIntrinsic = !isUpdating;  // By coincidence, all supported updating
1899                                    // nodes are not intrinsics.
1900   unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
1901   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
1902     return;
1903
1904   SDValue Chain = N->getOperand(0);
1905   EVT VT = N->getValueType(0);
1906   bool is64BitVector = VT.is64BitVector();
1907   Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
1908
1909   unsigned OpcodeIndex;
1910   switch (VT.getSimpleVT().SimpleTy) {
1911   default: llvm_unreachable("unhandled vld type");
1912     // Double-register operations:
1913   case MVT::v8i8:  OpcodeIndex = 0; break;
1914   case MVT::v4f16:
1915   case MVT::v4i16: OpcodeIndex = 1; break;
1916   case MVT::v2f32:
1917   case MVT::v2i32: OpcodeIndex = 2; break;
1918   case MVT::v1i64: OpcodeIndex = 3; break;
1919     // Quad-register operations:
1920   case MVT::v16i8: OpcodeIndex = 0; break;
1921   case MVT::v8f16:
1922   case MVT::v8i16: OpcodeIndex = 1; break;
1923   case MVT::v4f32:
1924   case MVT::v4i32: OpcodeIndex = 2; break;
1925   case MVT::v2f64:
1926   case MVT::v2i64: OpcodeIndex = 3; break;
1927   }
1928
1929   EVT ResTy;
1930   if (NumVecs == 1)
1931     ResTy = VT;
1932   else {
1933     unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
1934     if (!is64BitVector)
1935       ResTyElts *= 2;
1936     ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
1937   }
1938   std::vector<EVT> ResTys;
1939   ResTys.push_back(ResTy);
1940   if (isUpdating)
1941     ResTys.push_back(MVT::i32);
1942   ResTys.push_back(MVT::Other);
1943
1944   SDValue Pred = getAL(CurDAG, dl);
1945   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
1946   SDNode *VLd;
1947   SmallVector<SDValue, 7> Ops;
1948
1949   // Double registers and VLD1/VLD2 quad registers are directly supported.
1950   if (is64BitVector || NumVecs <= 2) {
1951     unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
1952                     QOpcodes0[OpcodeIndex]);
1953     Ops.push_back(MemAddr);
1954     Ops.push_back(Align);
1955     if (isUpdating) {
1956       SDValue Inc = N->getOperand(AddrOpIdx + 1);
1957       bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
1958       if (!IsImmUpdate) {
1959         // We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
1960         // check for the opcode rather than the number of vector elements.
1961         if (isVLDfixed(Opc))
1962           Opc = getVLDSTRegisterUpdateOpcode(Opc);
1963         Ops.push_back(Inc);
1964       // VLD1/VLD2 fixed increment does not need Reg0 so only include it in
1965       // the operands if not such an opcode.
1966       } else if (!isVLDfixed(Opc))
1967         Ops.push_back(Reg0);
1968     }
1969     Ops.push_back(Pred);
1970     Ops.push_back(Reg0);
1971     Ops.push_back(Chain);
1972     VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1973
1974   } else {
1975     // Otherwise, quad registers are loaded with two separate instructions,
1976     // where one loads the even registers and the other loads the odd registers.
1977     EVT AddrTy = MemAddr.getValueType();
1978
1979     // Load the even subregs.  This is always an updating load, so that it
1980     // provides the address to the second load for the odd subregs.
1981     SDValue ImplDef =
1982       SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
1983     const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
1984     SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
1985                                           ResTy, AddrTy, MVT::Other, OpsA);
1986     Chain = SDValue(VLdA, 2);
1987
1988     // Load the odd subregs.
1989     Ops.push_back(SDValue(VLdA, 1));
1990     Ops.push_back(Align);
1991     if (isUpdating) {
1992       SDValue Inc = N->getOperand(AddrOpIdx + 1);
1993       assert(isa<ConstantSDNode>(Inc.getNode()) &&
1994              "only constant post-increment update allowed for VLD3/4");
1995       (void)Inc;
1996       Ops.push_back(Reg0);
1997     }
1998     Ops.push_back(SDValue(VLdA, 0));
1999     Ops.push_back(Pred);
2000     Ops.push_back(Reg0);
2001     Ops.push_back(Chain);
2002     VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops);
2003   }
2004
2005   // Transfer memoperands.
2006   MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2007   CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLd), {MemOp});
2008
2009   if (NumVecs == 1) {
2010     ReplaceNode(N, VLd);
2011     return;
2012   }
2013
2014   // Extract out the subregisters.
2015   SDValue SuperReg = SDValue(VLd, 0);
2016   static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
2017                     ARM::qsub_3 == ARM::qsub_0 + 3,
2018                 "Unexpected subreg numbering");
2019   unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0);
2020   for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2021     ReplaceUses(SDValue(N, Vec),
2022                 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
2023   ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
2024   if (isUpdating)
2025     ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2));
2026   CurDAG->RemoveDeadNode(N);
2027 }
2028
2029 void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
2030                                 const uint16_t *DOpcodes,
2031                                 const uint16_t *QOpcodes0,
2032                                 const uint16_t *QOpcodes1) {
2033   assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
2034   SDLoc dl(N);
2035
2036   SDValue MemAddr, Align;
2037   bool IsIntrinsic = !isUpdating;  // By coincidence, all supported updating
2038                                    // nodes are not intrinsics.
2039   unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2040   unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2041   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2042     return;
2043
2044   MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2045
2046   SDValue Chain = N->getOperand(0);
2047   EVT VT = N->getOperand(Vec0Idx).getValueType();
2048   bool is64BitVector = VT.is64BitVector();
2049   Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
2050
2051   unsigned OpcodeIndex;
2052   switch (VT.getSimpleVT().SimpleTy) {
2053   default: llvm_unreachable("unhandled vst type");
2054     // Double-register operations:
2055   case MVT::v8i8:  OpcodeIndex = 0; break;
2056   case MVT::v4f16:
2057   case MVT::v4i16: OpcodeIndex = 1; break;
2058   case MVT::v2f32:
2059   case MVT::v2i32: OpcodeIndex = 2; break;
2060   case MVT::v1i64: OpcodeIndex = 3; break;
2061     // Quad-register operations:
2062   case MVT::v16i8: OpcodeIndex = 0; break;
2063   case MVT::v8f16:
2064   case MVT::v8i16: OpcodeIndex = 1; break;
2065   case MVT::v4f32:
2066   case MVT::v4i32: OpcodeIndex = 2; break;
2067   case MVT::v2f64:
2068   case MVT::v2i64: OpcodeIndex = 3; break;
2069   }
2070
2071   std::vector<EVT> ResTys;
2072   if (isUpdating)
2073     ResTys.push_back(MVT::i32);
2074   ResTys.push_back(MVT::Other);
2075
2076   SDValue Pred = getAL(CurDAG, dl);
2077   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2078   SmallVector<SDValue, 7> Ops;
2079
2080   // Double registers and VST1/VST2 quad registers are directly supported.
2081   if (is64BitVector || NumVecs <= 2) {
2082     SDValue SrcReg;
2083     if (NumVecs == 1) {
2084       SrcReg = N->getOperand(Vec0Idx);
2085     } else if (is64BitVector) {
2086       // Form a REG_SEQUENCE to force register allocation.
2087       SDValue V0 = N->getOperand(Vec0Idx + 0);
2088       SDValue V1 = N->getOperand(Vec0Idx + 1);
2089       if (NumVecs == 2)
2090         SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2091       else {
2092         SDValue V2 = N->getOperand(Vec0Idx + 2);
2093         // If it's a vst3, form a quad D-register and leave the last part as
2094         // an undef.
2095         SDValue V3 = (NumVecs == 3)
2096           ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
2097           : N->getOperand(Vec0Idx + 3);
2098         SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2099       }
2100     } else {
2101       // Form a QQ register.
2102       SDValue Q0 = N->getOperand(Vec0Idx);
2103       SDValue Q1 = N->getOperand(Vec0Idx + 1);
2104       SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0);
2105     }
2106
2107     unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2108                     QOpcodes0[OpcodeIndex]);
2109     Ops.push_back(MemAddr);
2110     Ops.push_back(Align);
2111     if (isUpdating) {
2112       SDValue Inc = N->getOperand(AddrOpIdx + 1);
2113       bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
2114       if (!IsImmUpdate) {
2115         // We use a VST1 for v1i64 even if the pseudo says VST2/3/4, so
2116         // check for the opcode rather than the number of vector elements.
2117         if (isVSTfixed(Opc))
2118           Opc = getVLDSTRegisterUpdateOpcode(Opc);
2119         Ops.push_back(Inc);
2120       }
2121       // VST1/VST2 fixed increment does not need Reg0 so only include it in
2122       // the operands if not such an opcode.
2123       else if (!isVSTfixed(Opc))
2124         Ops.push_back(Reg0);
2125     }
2126     Ops.push_back(SrcReg);
2127     Ops.push_back(Pred);
2128     Ops.push_back(Reg0);
2129     Ops.push_back(Chain);
2130     SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2131
2132     // Transfer memoperands.
2133     CurDAG->setNodeMemRefs(cast<MachineSDNode>(VSt), {MemOp});
2134
2135     ReplaceNode(N, VSt);
2136     return;
2137   }
2138
2139   // Otherwise, quad registers are stored with two separate instructions,
2140   // where one stores the even registers and the other stores the odd registers.
2141
2142   // Form the QQQQ REG_SEQUENCE.
2143   SDValue V0 = N->getOperand(Vec0Idx + 0);
2144   SDValue V1 = N->getOperand(Vec0Idx + 1);
2145   SDValue V2 = N->getOperand(Vec0Idx + 2);
2146   SDValue V3 = (NumVecs == 3)
2147     ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2148     : N->getOperand(Vec0Idx + 3);
2149   SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2150
2151   // Store the even D registers.  This is always an updating store, so that it
2152   // provides the address to the second store for the odd subregs.
2153   const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain };
2154   SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
2155                                         MemAddr.getValueType(),
2156                                         MVT::Other, OpsA);
2157   CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStA), {MemOp});
2158   Chain = SDValue(VStA, 1);
2159
2160   // Store the odd D registers.
2161   Ops.push_back(SDValue(VStA, 0));
2162   Ops.push_back(Align);
2163   if (isUpdating) {
2164     SDValue Inc = N->getOperand(AddrOpIdx + 1);
2165     assert(isa<ConstantSDNode>(Inc.getNode()) &&
2166            "only constant post-increment update allowed for VST3/4");
2167     (void)Inc;
2168     Ops.push_back(Reg0);
2169   }
2170   Ops.push_back(RegSeq);
2171   Ops.push_back(Pred);
2172   Ops.push_back(Reg0);
2173   Ops.push_back(Chain);
2174   SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
2175                                         Ops);
2176   CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStB), {MemOp});
2177   ReplaceNode(N, VStB);
2178 }
2179
2180 void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
2181                                       unsigned NumVecs,
2182                                       const uint16_t *DOpcodes,
2183                                       const uint16_t *QOpcodes) {
2184   assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
2185   SDLoc dl(N);
2186
2187   SDValue MemAddr, Align;
2188   bool IsIntrinsic = !isUpdating;  // By coincidence, all supported updating
2189                                    // nodes are not intrinsics.
2190   unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2191   unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2192   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2193     return;
2194
2195   MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2196
2197   SDValue Chain = N->getOperand(0);
2198   unsigned Lane =
2199     cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue();
2200   EVT VT = N->getOperand(Vec0Idx).getValueType();
2201   bool is64BitVector = VT.is64BitVector();
2202
2203   unsigned Alignment = 0;
2204   if (NumVecs != 3) {
2205     Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
2206     unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2207     if (Alignment > NumBytes)
2208       Alignment = NumBytes;
2209     if (Alignment < 8 && Alignment < NumBytes)
2210       Alignment = 0;
2211     // Alignment must be a power of two; make sure of that.
2212     Alignment = (Alignment & -Alignment);
2213     if (Alignment == 1)
2214       Alignment = 0;
2215   }
2216   Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2217
2218   unsigned OpcodeIndex;
2219   switch (VT.getSimpleVT().SimpleTy) {
2220   default: llvm_unreachable("unhandled vld/vst lane type");
2221     // Double-register operations:
2222   case MVT::v8i8:  OpcodeIndex = 0; break;
2223   case MVT::v4f16:
2224   case MVT::v4i16: OpcodeIndex = 1; break;
2225   case MVT::v2f32:
2226   case MVT::v2i32: OpcodeIndex = 2; break;
2227     // Quad-register operations:
2228   case MVT::v8f16:
2229   case MVT::v8i16: OpcodeIndex = 0; break;
2230   case MVT::v4f32:
2231   case MVT::v4i32: OpcodeIndex = 1; break;
2232   }
2233
2234   std::vector<EVT> ResTys;
2235   if (IsLoad) {
2236     unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2237     if (!is64BitVector)
2238       ResTyElts *= 2;
2239     ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(),
2240                                       MVT::i64, ResTyElts));
2241   }
2242   if (isUpdating)
2243     ResTys.push_back(MVT::i32);
2244   ResTys.push_back(MVT::Other);
2245
2246   SDValue Pred = getAL(CurDAG, dl);
2247   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2248
2249   SmallVector<SDValue, 8> Ops;
2250   Ops.push_back(MemAddr);
2251   Ops.push_back(Align);
2252   if (isUpdating) {
2253     SDValue Inc = N->getOperand(AddrOpIdx + 1);
2254     bool IsImmUpdate =
2255         isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
2256     Ops.push_back(IsImmUpdate ? Reg0 : Inc);
2257   }
2258
2259   SDValue SuperReg;
2260   SDValue V0 = N->getOperand(Vec0Idx + 0);
2261   SDValue V1 = N->getOperand(Vec0Idx + 1);
2262   if (NumVecs == 2) {
2263     if (is64BitVector)
2264       SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2265     else
2266       SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0);
2267   } else {
2268     SDValue V2 = N->getOperand(Vec0Idx + 2);
2269     SDValue V3 = (NumVecs == 3)
2270       ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2271       : N->getOperand(Vec0Idx + 3);
2272     if (is64BitVector)
2273       SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2274     else
2275       SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2276   }
2277   Ops.push_back(SuperReg);
2278   Ops.push_back(getI32Imm(Lane, dl));
2279   Ops.push_back(Pred);
2280   Ops.push_back(Reg0);
2281   Ops.push_back(Chain);
2282
2283   unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2284                                   QOpcodes[OpcodeIndex]);
2285   SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2286   CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdLn), {MemOp});
2287   if (!IsLoad) {
2288     ReplaceNode(N, VLdLn);
2289     return;
2290   }
2291
2292   // Extract the subregisters.
2293   SuperReg = SDValue(VLdLn, 0);
2294   static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
2295                     ARM::qsub_3 == ARM::qsub_0 + 3,
2296                 "Unexpected subreg numbering");
2297   unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
2298   for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2299     ReplaceUses(SDValue(N, Vec),
2300                 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
2301   ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1));
2302   if (isUpdating)
2303     ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2));
2304   CurDAG->RemoveDeadNode(N);
2305 }
2306
2307 void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool IsIntrinsic,
2308                                    bool isUpdating, unsigned NumVecs,
2309                                    const uint16_t *DOpcodes,
2310                                    const uint16_t *QOpcodes0,
2311                                    const uint16_t *QOpcodes1) {
2312   assert(NumVecs >= 1 && NumVecs <= 4 && "VLDDup NumVecs out-of-range");
2313   SDLoc dl(N);
2314
2315   SDValue MemAddr, Align;
2316   unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2317   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2318     return;
2319
2320   SDValue Chain = N->getOperand(0);
2321   EVT VT = N->getValueType(0);
2322   bool is64BitVector = VT.is64BitVector();
2323
2324   unsigned Alignment = 0;
2325   if (NumVecs != 3) {
2326     Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
2327     unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2328     if (Alignment > NumBytes)
2329       Alignment = NumBytes;
2330     if (Alignment < 8 && Alignment < NumBytes)
2331       Alignment = 0;
2332     // Alignment must be a power of two; make sure of that.
2333     Alignment = (Alignment & -Alignment);
2334     if (Alignment == 1)
2335       Alignment = 0;
2336   }
2337   Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2338
2339   unsigned OpcodeIndex;
2340   switch (VT.getSimpleVT().SimpleTy) {
2341   default: llvm_unreachable("unhandled vld-dup type");
2342   case MVT::v8i8:
2343   case MVT::v16i8: OpcodeIndex = 0; break;
2344   case MVT::v4i16:
2345   case MVT::v8i16:
2346   case MVT::v4f16:
2347   case MVT::v8f16:
2348                   OpcodeIndex = 1; break;
2349   case MVT::v2f32:
2350   case MVT::v2i32:
2351   case MVT::v4f32:
2352   case MVT::v4i32: OpcodeIndex = 2; break;
2353   case MVT::v1f64:
2354   case MVT::v1i64: OpcodeIndex = 3; break;
2355   }
2356
2357   unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2358   if (!is64BitVector)
2359     ResTyElts *= 2;
2360   EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
2361
2362   std::vector<EVT> ResTys;
2363   ResTys.push_back(ResTy);
2364   if (isUpdating)
2365     ResTys.push_back(MVT::i32);
2366   ResTys.push_back(MVT::Other);
2367
2368   SDValue Pred = getAL(CurDAG, dl);
2369   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2370
2371   SDNode *VLdDup;
2372   if (is64BitVector || NumVecs == 1) {
2373     SmallVector<SDValue, 6> Ops;
2374     Ops.push_back(MemAddr);
2375     Ops.push_back(Align);
2376     unsigned Opc = is64BitVector ? DOpcodes[OpcodeIndex] :
2377                                    QOpcodes0[OpcodeIndex];
2378     if (isUpdating) {
2379       // fixed-stride update instructions don't have an explicit writeback
2380       // operand. It's implicit in the opcode itself.
2381       SDValue Inc = N->getOperand(2);
2382       bool IsImmUpdate =
2383           isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
2384       if (NumVecs <= 2 && !IsImmUpdate)
2385         Opc = getVLDSTRegisterUpdateOpcode(Opc);
2386       if (!IsImmUpdate)
2387         Ops.push_back(Inc);
2388       // FIXME: VLD3 and VLD4 haven't been updated to that form yet.
2389       else if (NumVecs > 2)
2390         Ops.push_back(Reg0);
2391     }
2392     Ops.push_back(Pred);
2393     Ops.push_back(Reg0);
2394     Ops.push_back(Chain);
2395     VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2396   } else if (NumVecs == 2) {
2397     const SDValue OpsA[] = { MemAddr, Align, Pred, Reg0, Chain };
2398     SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex],
2399                                           dl, ResTys, OpsA);
2400
2401     Chain = SDValue(VLdA, 1);
2402     const SDValue OpsB[] = { MemAddr, Align, Pred, Reg0, Chain };
2403     VLdDup = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, OpsB);
2404   } else {
2405     SDValue ImplDef =
2406       SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
2407     const SDValue OpsA[] = { MemAddr, Align, ImplDef, Pred, Reg0, Chain };
2408     SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex],
2409                                           dl, ResTys, OpsA);
2410
2411     SDValue SuperReg = SDValue(VLdA, 0);
2412     Chain = SDValue(VLdA, 1);
2413     const SDValue OpsB[] = { MemAddr, Align, SuperReg, Pred, Reg0, Chain };
2414     VLdDup = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, OpsB);
2415   }
2416
2417   // Transfer memoperands.
2418   MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2419   CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdDup), {MemOp});
2420
2421   // Extract the subregisters.
2422   if (NumVecs == 1) {
2423     ReplaceUses(SDValue(N, 0), SDValue(VLdDup, 0));
2424   } else {
2425     SDValue SuperReg = SDValue(VLdDup, 0);
2426     static_assert(ARM::dsub_7 == ARM::dsub_0 + 7, "Unexpected subreg numbering");
2427     unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
2428     for (unsigned Vec = 0; Vec != NumVecs; ++Vec) {
2429       ReplaceUses(SDValue(N, Vec),
2430                   CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg));
2431     }
2432   }
2433   ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1));
2434   if (isUpdating)
2435     ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2));
2436   CurDAG->RemoveDeadNode(N);
2437 }
2438
2439 bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) {
2440   if (!Subtarget->hasV6T2Ops())
2441     return false;
2442
2443   unsigned Opc = isSigned
2444     ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)
2445     : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX);
2446   SDLoc dl(N);
2447
2448   // For unsigned extracts, check for a shift right and mask
2449   unsigned And_imm = 0;
2450   if (N->getOpcode() == ISD::AND) {
2451     if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) {
2452
2453       // The immediate is a mask of the low bits iff imm & (imm+1) == 0
2454       if (And_imm & (And_imm + 1))
2455         return false;
2456
2457       unsigned Srl_imm = 0;
2458       if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL,
2459                                 Srl_imm)) {
2460         assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2461
2462         // Mask off the unnecessary bits of the AND immediate; normally
2463         // DAGCombine will do this, but that might not happen if
2464         // targetShrinkDemandedConstant chooses a different immediate.
2465         And_imm &= -1U >> Srl_imm;
2466
2467         // Note: The width operand is encoded as width-1.
2468         unsigned Width = countTrailingOnes(And_imm) - 1;
2469         unsigned LSB = Srl_imm;
2470
2471         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2472
2473         if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) {
2474           // It's cheaper to use a right shift to extract the top bits.
2475           if (Subtarget->isThumb()) {
2476             Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri;
2477             SDValue Ops[] = { N->getOperand(0).getOperand(0),
2478                               CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2479                               getAL(CurDAG, dl), Reg0, Reg0 };
2480             CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2481             return true;
2482           }
2483
2484           // ARM models shift instructions as MOVsi with shifter operand.
2485           ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(ISD::SRL);
2486           SDValue ShOpc =
2487             CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB), dl,
2488                                       MVT::i32);
2489           SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc,
2490                             getAL(CurDAG, dl), Reg0, Reg0 };
2491           CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops);
2492           return true;
2493         }
2494
2495         assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
2496         SDValue Ops[] = { N->getOperand(0).getOperand(0),
2497                           CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2498                           CurDAG->getTargetConstant(Width, dl, MVT::i32),
2499                           getAL(CurDAG, dl), Reg0 };
2500         CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2501         return true;
2502       }
2503     }
2504     return false;
2505   }
2506
2507   // Otherwise, we're looking for a shift of a shift
2508   unsigned Shl_imm = 0;
2509   if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) {
2510     assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!");
2511     unsigned Srl_imm = 0;
2512     if (isInt32Immediate(N->getOperand(1), Srl_imm)) {
2513       assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2514       // Note: The width operand is encoded as width-1.
2515       unsigned Width = 32 - Srl_imm - 1;
2516       int LSB = Srl_imm - Shl_imm;
2517       if (LSB < 0)
2518         return false;
2519       SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2520       assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
2521       SDValue Ops[] = { N->getOperand(0).getOperand(0),
2522                         CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2523                         CurDAG->getTargetConstant(Width, dl, MVT::i32),
2524                         getAL(CurDAG, dl), Reg0 };
2525       CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2526       return true;
2527     }
2528   }
2529
2530   // Or we are looking for a shift of an and, with a mask operand
2531   if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_imm) &&
2532       isShiftedMask_32(And_imm)) {
2533     unsigned Srl_imm = 0;
2534     unsigned LSB = countTrailingZeros(And_imm);
2535     // Shift must be the same as the ands lsb
2536     if (isInt32Immediate(N->getOperand(1), Srl_imm) && Srl_imm == LSB) {
2537       assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2538       unsigned MSB = 31 - countLeadingZeros(And_imm);
2539       // Note: The width operand is encoded as width-1.
2540       unsigned Width = MSB - LSB;
2541       SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2542       assert(Srl_imm + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
2543       SDValue Ops[] = { N->getOperand(0).getOperand(0),
2544                         CurDAG->getTargetConstant(Srl_imm, dl, MVT::i32),
2545                         CurDAG->getTargetConstant(Width, dl, MVT::i32),
2546                         getAL(CurDAG, dl), Reg0 };
2547       CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2548       return true;
2549     }
2550   }
2551
2552   if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) {
2553     unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2554     unsigned LSB = 0;
2555     if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, LSB) &&
2556         !isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRA, LSB))
2557       return false;
2558
2559     if (LSB + Width > 32)
2560       return false;
2561
2562     SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2563     assert(LSB + Width <= 32 && "Shouldn't create an invalid ubfx");
2564     SDValue Ops[] = { N->getOperand(0).getOperand(0),
2565                       CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2566                       CurDAG->getTargetConstant(Width - 1, dl, MVT::i32),
2567                       getAL(CurDAG, dl), Reg0 };
2568     CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2569     return true;
2570   }
2571
2572   return false;
2573 }
2574
2575 /// Target-specific DAG combining for ISD::XOR.
2576 /// Target-independent combining lowers SELECT_CC nodes of the form
2577 /// select_cc setg[ge] X,  0,  X, -X
2578 /// select_cc setgt    X, -1,  X, -X
2579 /// select_cc setl[te] X,  0, -X,  X
2580 /// select_cc setlt    X,  1, -X,  X
2581 /// which represent Integer ABS into:
2582 /// Y = sra (X, size(X)-1); xor (add (X, Y), Y)
2583 /// ARM instruction selection detects the latter and matches it to
2584 /// ARM::ABS or ARM::t2ABS machine node.
2585 bool ARMDAGToDAGISel::tryABSOp(SDNode *N){
2586   SDValue XORSrc0 = N->getOperand(0);
2587   SDValue XORSrc1 = N->getOperand(1);
2588   EVT VT = N->getValueType(0);
2589
2590   if (Subtarget->isThumb1Only())
2591     return false;
2592
2593   if (XORSrc0.getOpcode() != ISD::ADD || XORSrc1.getOpcode() != ISD::SRA)
2594     return false;
2595
2596   SDValue ADDSrc0 = XORSrc0.getOperand(0);
2597   SDValue ADDSrc1 = XORSrc0.getOperand(1);
2598   SDValue SRASrc0 = XORSrc1.getOperand(0);
2599   SDValue SRASrc1 = XORSrc1.getOperand(1);
2600   ConstantSDNode *SRAConstant =  dyn_cast<ConstantSDNode>(SRASrc1);
2601   EVT XType = SRASrc0.getValueType();
2602   unsigned Size = XType.getSizeInBits() - 1;
2603
2604   if (ADDSrc1 == XORSrc1 && ADDSrc0 == SRASrc0 &&
2605       XType.isInteger() && SRAConstant != nullptr &&
2606       Size == SRAConstant->getZExtValue()) {
2607     unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS;
2608     CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0);
2609     return true;
2610   }
2611
2612   return false;
2613 }
2614
2615 /// We've got special pseudo-instructions for these
2616 void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
2617   unsigned Opcode;
2618   EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
2619   if (MemTy == MVT::i8)
2620     Opcode = ARM::CMP_SWAP_8;
2621   else if (MemTy == MVT::i16)
2622     Opcode = ARM::CMP_SWAP_16;
2623   else if (MemTy == MVT::i32)
2624     Opcode = ARM::CMP_SWAP_32;
2625   else
2626     llvm_unreachable("Unknown AtomicCmpSwap type");
2627
2628   SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
2629                    N->getOperand(0)};
2630   SDNode *CmpSwap = CurDAG->getMachineNode(
2631       Opcode, SDLoc(N),
2632       CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other), Ops);
2633
2634   MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
2635   CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
2636
2637   ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
2638   ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
2639   CurDAG->RemoveDeadNode(N);
2640 }
2641
2642 static Optional<std::pair<unsigned, unsigned>>
2643 getContiguousRangeOfSetBits(const APInt &A) {
2644   unsigned FirstOne = A.getBitWidth() - A.countLeadingZeros() - 1;
2645   unsigned LastOne = A.countTrailingZeros();
2646   if (A.countPopulation() != (FirstOne - LastOne + 1))
2647     return Optional<std::pair<unsigned,unsigned>>();
2648   return std::make_pair(FirstOne, LastOne);
2649 }
2650
2651 void ARMDAGToDAGISel::SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI) {
2652   assert(N->getOpcode() == ARMISD::CMPZ);
2653   SwitchEQNEToPLMI = false;
2654
2655   if (!Subtarget->isThumb())
2656     // FIXME: Work out whether it is profitable to do this in A32 mode - LSL and
2657     // LSR don't exist as standalone instructions - they need the barrel shifter.
2658     return;
2659
2660   // select (cmpz (and X, C), #0) -> (LSLS X) or (LSRS X) or (LSRS (LSLS X))
2661   SDValue And = N->getOperand(0);
2662   if (!And->hasOneUse())
2663     return;
2664
2665   SDValue Zero = N->getOperand(1);
2666   if (!isa<ConstantSDNode>(Zero) || !cast<ConstantSDNode>(Zero)->isNullValue() ||
2667       And->getOpcode() != ISD::AND)
2668     return;
2669   SDValue X = And.getOperand(0);
2670   auto C = dyn_cast<ConstantSDNode>(And.getOperand(1));
2671
2672   if (!C)
2673     return;
2674   auto Range = getContiguousRangeOfSetBits(C->getAPIntValue());
2675   if (!Range)
2676     return;
2677
2678   // There are several ways to lower this:
2679   SDNode *NewN;
2680   SDLoc dl(N);
2681
2682   auto EmitShift = [&](unsigned Opc, SDValue Src, unsigned Imm) -> SDNode* {
2683     if (Subtarget->isThumb2()) {
2684       Opc = (Opc == ARM::tLSLri) ? ARM::t2LSLri : ARM::t2LSRri;
2685       SDValue Ops[] = { Src, CurDAG->getTargetConstant(Imm, dl, MVT::i32),
2686                         getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
2687                         CurDAG->getRegister(0, MVT::i32) };
2688       return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
2689     } else {
2690       SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), Src,
2691                        CurDAG->getTargetConstant(Imm, dl, MVT::i32),
2692                        getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
2693       return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
2694     }
2695   };
2696
2697   if (Range->second == 0) {
2698     //  1. Mask includes the LSB -> Simply shift the top N bits off
2699     NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
2700     ReplaceNode(And.getNode(), NewN);
2701   } else if (Range->first == 31) {
2702     //  2. Mask includes the MSB -> Simply shift the bottom N bits off
2703     NewN = EmitShift(ARM::tLSRri, X, Range->second);
2704     ReplaceNode(And.getNode(), NewN);
2705   } else if (Range->first == Range->second) {
2706     //  3. Only one bit is set. We can shift this into the sign bit and use a
2707     //     PL/MI comparison.
2708     NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
2709     ReplaceNode(And.getNode(), NewN);
2710
2711     SwitchEQNEToPLMI = true;
2712   } else if (!Subtarget->hasV6T2Ops()) {
2713     //  4. Do a double shift to clear bottom and top bits, but only in
2714     //     thumb-1 mode as in thumb-2 we can use UBFX.
2715     NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
2716     NewN = EmitShift(ARM::tLSRri, SDValue(NewN, 0),
2717                      Range->second + (31 - Range->first));
2718     ReplaceNode(And.getNode(), NewN);
2719   }
2720
2721 }
2722
2723 void ARMDAGToDAGISel::Select(SDNode *N) {
2724   SDLoc dl(N);
2725
2726   if (N->isMachineOpcode()) {
2727     N->setNodeId(-1);
2728     return;   // Already selected.
2729   }
2730
2731   switch (N->getOpcode()) {
2732   default: break;
2733   case ISD::STORE: {
2734     // For Thumb1, match an sp-relative store in C++. This is a little
2735     // unfortunate, but I don't think I can make the chain check work
2736     // otherwise.  (The chain of the store has to be the same as the chain
2737     // of the CopyFromReg, or else we can't replace the CopyFromReg with
2738     // a direct reference to "SP".)
2739     //
2740     // This is only necessary on Thumb1 because Thumb1 sp-relative stores use
2741     // a different addressing mode from other four-byte stores.
2742     //
2743     // This pattern usually comes up with call arguments.
2744     StoreSDNode *ST = cast<StoreSDNode>(N);
2745     SDValue Ptr = ST->getBasePtr();
2746     if (Subtarget->isThumb1Only() && ST->isUnindexed()) {
2747       int RHSC = 0;
2748       if (Ptr.getOpcode() == ISD::ADD &&
2749           isScaledConstantInRange(Ptr.getOperand(1), /*Scale=*/4, 0, 256, RHSC))
2750         Ptr = Ptr.getOperand(0);
2751
2752       if (Ptr.getOpcode() == ISD::CopyFromReg &&
2753           cast<RegisterSDNode>(Ptr.getOperand(1))->getReg() == ARM::SP &&
2754           Ptr.getOperand(0) == ST->getChain()) {
2755         SDValue Ops[] = {ST->getValue(),
2756                          CurDAG->getRegister(ARM::SP, MVT::i32),
2757                          CurDAG->getTargetConstant(RHSC, dl, MVT::i32),
2758                          getAL(CurDAG, dl),
2759                          CurDAG->getRegister(0, MVT::i32),
2760                          ST->getChain()};
2761         MachineSDNode *ResNode =
2762             CurDAG->getMachineNode(ARM::tSTRspi, dl, MVT::Other, Ops);
2763         MachineMemOperand *MemOp = ST->getMemOperand();
2764         CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp});
2765         ReplaceNode(N, ResNode);
2766         return;
2767       }
2768     }
2769     break;
2770   }
2771   case ISD::WRITE_REGISTER:
2772     if (tryWriteRegister(N))
2773       return;
2774     break;
2775   case ISD::READ_REGISTER:
2776     if (tryReadRegister(N))
2777       return;
2778     break;
2779   case ISD::INLINEASM:
2780   case ISD::INLINEASM_BR:
2781     if (tryInlineAsm(N))
2782       return;
2783     break;
2784   case ISD::XOR:
2785     // Select special operations if XOR node forms integer ABS pattern
2786     if (tryABSOp(N))
2787       return;
2788     // Other cases are autogenerated.
2789     break;
2790   case ISD::Constant: {
2791     unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
2792     // If we can't materialize the constant we need to use a literal pool
2793     if (ConstantMaterializationCost(Val, Subtarget) > 2) {
2794       SDValue CPIdx = CurDAG->getTargetConstantPool(
2795           ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val),
2796           TLI->getPointerTy(CurDAG->getDataLayout()));
2797
2798       SDNode *ResNode;
2799       if (Subtarget->isThumb()) {
2800         SDValue Ops[] = {
2801           CPIdx,
2802           getAL(CurDAG, dl),
2803           CurDAG->getRegister(0, MVT::i32),
2804           CurDAG->getEntryNode()
2805         };
2806         ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other,
2807                                          Ops);
2808       } else {
2809         SDValue Ops[] = {
2810           CPIdx,
2811           CurDAG->getTargetConstant(0, dl, MVT::i32),
2812           getAL(CurDAG, dl),
2813           CurDAG->getRegister(0, MVT::i32),
2814           CurDAG->getEntryNode()
2815         };
2816         ResNode = CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other,
2817                                          Ops);
2818       }
2819       // Annotate the Node with memory operand information so that MachineInstr
2820       // queries work properly. This e.g. gives the register allocation the
2821       // required information for rematerialization.
2822       MachineFunction& MF = CurDAG->getMachineFunction();
2823       MachineMemOperand *MemOp =
2824           MF.getMachineMemOperand(MachinePointerInfo::getConstantPool(MF),
2825                                   MachineMemOperand::MOLoad, 4, 4);
2826
2827       CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp});
2828
2829       ReplaceNode(N, ResNode);
2830       return;
2831     }
2832
2833     // Other cases are autogenerated.
2834     break;
2835   }
2836   case ISD::FrameIndex: {
2837     // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
2838     int FI = cast<FrameIndexSDNode>(N)->getIndex();
2839     SDValue TFI = CurDAG->getTargetFrameIndex(
2840         FI, TLI->getPointerTy(CurDAG->getDataLayout()));
2841     if (Subtarget->isThumb1Only()) {
2842       // Set the alignment of the frame object to 4, to avoid having to generate
2843       // more than one ADD
2844       MachineFrameInfo &MFI = MF->getFrameInfo();
2845       if (MFI.getObjectAlignment(FI) < 4)
2846         MFI.setObjectAlignment(FI, 4);
2847       CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI,
2848                            CurDAG->getTargetConstant(0, dl, MVT::i32));
2849       return;
2850     } else {
2851       unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ?
2852                       ARM::t2ADDri : ARM::ADDri);
2853       SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, dl, MVT::i32),
2854                         getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
2855                         CurDAG->getRegister(0, MVT::i32) };
2856       CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2857       return;
2858     }
2859   }
2860   case ISD::SRL:
2861     if (tryV6T2BitfieldExtractOp(N, false))
2862       return;
2863     break;
2864   case ISD::SIGN_EXTEND_INREG:
2865   case ISD::SRA:
2866     if (tryV6T2BitfieldExtractOp(N, true))
2867       return;
2868     break;
2869   case ISD::MUL:
2870     if (Subtarget->isThumb1Only())
2871       break;
2872     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
2873       unsigned RHSV = C->getZExtValue();
2874       if (!RHSV) break;
2875       if (isPowerOf2_32(RHSV-1)) {  // 2^n+1?
2876         unsigned ShImm = Log2_32(RHSV-1);
2877         if (ShImm >= 32)
2878           break;
2879         SDValue V = N->getOperand(0);
2880         ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
2881         SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
2882         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2883         if (Subtarget->isThumb()) {
2884           SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
2885           CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops);
2886           return;
2887         } else {
2888           SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
2889                             Reg0 };
2890           CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops);
2891           return;
2892         }
2893       }
2894       if (isPowerOf2_32(RHSV+1)) {  // 2^n-1?
2895         unsigned ShImm = Log2_32(RHSV+1);
2896         if (ShImm >= 32)
2897           break;
2898         SDValue V = N->getOperand(0);
2899         ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
2900         SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
2901         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2902         if (Subtarget->isThumb()) {
2903           SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
2904           CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops);
2905           return;
2906         } else {
2907           SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
2908                             Reg0 };
2909           CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops);
2910           return;
2911         }
2912       }
2913     }
2914     break;
2915   case ISD::AND: {
2916     // Check for unsigned bitfield extract
2917     if (tryV6T2BitfieldExtractOp(N, false))
2918       return;
2919
2920     // If an immediate is used in an AND node, it is possible that the immediate
2921     // can be more optimally materialized when negated. If this is the case we
2922     // can negate the immediate and use a BIC instead.
2923     auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
2924     if (N1C && N1C->hasOneUse() && Subtarget->isThumb()) {
2925       uint32_t Imm = (uint32_t) N1C->getZExtValue();
2926
2927       // In Thumb2 mode, an AND can take a 12-bit immediate. If this
2928       // immediate can be negated and fit in the immediate operand of
2929       // a t2BIC, don't do any manual transform here as this can be
2930       // handled by the generic ISel machinery.
2931       bool PreferImmediateEncoding =
2932         Subtarget->hasThumb2() && (is_t2_so_imm(Imm) || is_t2_so_imm_not(Imm));
2933       if (!PreferImmediateEncoding &&
2934           ConstantMaterializationCost(Imm, Subtarget) >
2935               ConstantMaterializationCost(~Imm, Subtarget)) {
2936         // The current immediate costs more to materialize than a negated
2937         // immediate, so negate the immediate and use a BIC.
2938         SDValue NewImm =
2939           CurDAG->getConstant(~N1C->getZExtValue(), dl, MVT::i32);
2940         // If the new constant didn't exist before, reposition it in the topological
2941         // ordering so it is just before N. Otherwise, don't touch its location.
2942         if (NewImm->getNodeId() == -1)
2943           CurDAG->RepositionNode(N->getIterator(), NewImm.getNode());
2944
2945         if (!Subtarget->hasThumb2()) {
2946           SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32),
2947                            N->getOperand(0), NewImm, getAL(CurDAG, dl),
2948                            CurDAG->getRegister(0, MVT::i32)};
2949           ReplaceNode(N, CurDAG->getMachineNode(ARM::tBIC, dl, MVT::i32, Ops));
2950           return;
2951         } else {
2952           SDValue Ops[] = {N->getOperand(0), NewImm, getAL(CurDAG, dl),
2953                            CurDAG->getRegister(0, MVT::i32),
2954                            CurDAG->getRegister(0, MVT::i32)};
2955           ReplaceNode(N,
2956                       CurDAG->getMachineNode(ARM::t2BICrr, dl, MVT::i32, Ops));
2957           return;
2958         }
2959       }
2960     }
2961
2962     // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits
2963     // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits
2964     // are entirely contributed by c2 and lower 16-bits are entirely contributed
2965     // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)).
2966     // Select it to: "movt x, ((c1 & 0xffff) >> 16)
2967     EVT VT = N->getValueType(0);
2968     if (VT != MVT::i32)
2969       break;
2970     unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2())
2971       ? ARM::t2MOVTi16
2972       : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0);
2973     if (!Opc)
2974       break;
2975     SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
2976     N1C = dyn_cast<ConstantSDNode>(N1);
2977     if (!N1C)
2978       break;
2979     if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) {
2980       SDValue N2 = N0.getOperand(1);
2981       ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
2982       if (!N2C)
2983         break;
2984       unsigned N1CVal = N1C->getZExtValue();
2985       unsigned N2CVal = N2C->getZExtValue();
2986       if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) &&
2987           (N1CVal & 0xffffU) == 0xffffU &&
2988           (N2CVal & 0xffffU) == 0x0U) {
2989         SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16,
2990                                                   dl, MVT::i32);
2991         SDValue Ops[] = { N0.getOperand(0), Imm16,
2992                           getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
2993         ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
2994         return;
2995       }
2996     }
2997
2998     break;
2999   }
3000   case ARMISD::UMAAL: {
3001     unsigned Opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL;
3002     SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
3003                       N->getOperand(2), N->getOperand(3),
3004                       getAL(CurDAG, dl),
3005                       CurDAG->getRegister(0, MVT::i32) };
3006     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::i32, Ops));
3007     return;
3008   }
3009   case ARMISD::UMLAL:{
3010     if (Subtarget->isThumb()) {
3011       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3012                         N->getOperand(3), getAL(CurDAG, dl),
3013                         CurDAG->getRegister(0, MVT::i32)};
3014       ReplaceNode(
3015           N, CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops));
3016       return;
3017     }else{
3018       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3019                         N->getOperand(3), getAL(CurDAG, dl),
3020                         CurDAG->getRegister(0, MVT::i32),
3021                         CurDAG->getRegister(0, MVT::i32) };
3022       ReplaceNode(N, CurDAG->getMachineNode(
3023                          Subtarget->hasV6Ops() ? ARM::UMLAL : ARM::UMLALv5, dl,
3024                          MVT::i32, MVT::i32, Ops));
3025       return;
3026     }
3027   }
3028   case ARMISD::SMLAL:{
3029     if (Subtarget->isThumb()) {
3030       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3031                         N->getOperand(3), getAL(CurDAG, dl),
3032                         CurDAG->getRegister(0, MVT::i32)};
3033       ReplaceNode(
3034           N, CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops));
3035       return;
3036     }else{
3037       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3038                         N->getOperand(3), getAL(CurDAG, dl),
3039                         CurDAG->getRegister(0, MVT::i32),
3040                         CurDAG->getRegister(0, MVT::i32) };
3041       ReplaceNode(N, CurDAG->getMachineNode(
3042                          Subtarget->hasV6Ops() ? ARM::SMLAL : ARM::SMLALv5, dl,
3043                          MVT::i32, MVT::i32, Ops));
3044       return;
3045     }
3046   }
3047   case ARMISD::SUBE: {
3048     if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
3049       break;
3050     // Look for a pattern to match SMMLS
3051     // (sube a, (smul_loHi a, b), (subc 0, (smul_LOhi(a, b))))
3052     if (N->getOperand(1).getOpcode() != ISD::SMUL_LOHI ||
3053         N->getOperand(2).getOpcode() != ARMISD::SUBC ||
3054         !SDValue(N, 1).use_empty())
3055       break;
3056
3057     if (Subtarget->isThumb())
3058       assert(Subtarget->hasThumb2() &&
3059              "This pattern should not be generated for Thumb");
3060
3061     SDValue SmulLoHi = N->getOperand(1);
3062     SDValue Subc = N->getOperand(2);
3063     auto *Zero = dyn_cast<ConstantSDNode>(Subc.getOperand(0));
3064
3065     if (!Zero || Zero->getZExtValue() != 0 ||
3066         Subc.getOperand(1) != SmulLoHi.getValue(0) ||
3067         N->getOperand(1) != SmulLoHi.getValue(1) ||
3068         N->getOperand(2) != Subc.getValue(1))
3069       break;
3070
3071     unsigned Opc = Subtarget->isThumb2() ? ARM::t2SMMLS : ARM::SMMLS;
3072     SDValue Ops[] = { SmulLoHi.getOperand(0), SmulLoHi.getOperand(1),
3073                       N->getOperand(0), getAL(CurDAG, dl),
3074                       CurDAG->getRegister(0, MVT::i32) };
3075     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops));
3076     return;
3077   }
3078   case ISD::LOAD: {
3079     if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N))
3080       return;
3081     if (Subtarget->isThumb() && Subtarget->hasThumb2()) {
3082       if (tryT2IndexedLoad(N))
3083         return;
3084     } else if (Subtarget->isThumb()) {
3085       if (tryT1IndexedLoad(N))
3086         return;
3087     } else if (tryARMIndexedLoad(N))
3088       return;
3089     // Other cases are autogenerated.
3090     break;
3091   }
3092   case ARMISD::WLS:
3093   case ARMISD::LE: {
3094     SDValue Ops[] = { N->getOperand(1),
3095                       N->getOperand(2),
3096                       N->getOperand(0) };
3097     unsigned Opc = N->getOpcode() == ARMISD::WLS ?
3098       ARM::t2WhileLoopStart : ARM::t2LoopEnd;
3099     SDNode *New = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
3100     ReplaceUses(N, New);
3101     CurDAG->RemoveDeadNode(N);
3102     return;
3103   }
3104   case ARMISD::LOOP_DEC: {
3105     SDValue Ops[] = { N->getOperand(1),
3106                       N->getOperand(2),
3107                       N->getOperand(0) };
3108     SDNode *Dec =
3109       CurDAG->getMachineNode(ARM::t2LoopDec, dl,
3110                              CurDAG->getVTList(MVT::i32, MVT::Other), Ops);
3111     ReplaceUses(N, Dec);
3112     CurDAG->RemoveDeadNode(N);
3113     return;
3114   }
3115   case ARMISD::BRCOND: {
3116     // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
3117     // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)
3118     // Pattern complexity = 6  cost = 1  size = 0
3119
3120     // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
3121     // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc)
3122     // Pattern complexity = 6  cost = 1  size = 0
3123
3124     // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
3125     // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc)
3126     // Pattern complexity = 6  cost = 1  size = 0
3127
3128     unsigned Opc = Subtarget->isThumb() ?
3129       ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc;
3130     SDValue Chain = N->getOperand(0);
3131     SDValue N1 = N->getOperand(1);
3132     SDValue N2 = N->getOperand(2);
3133     SDValue N3 = N->getOperand(3);
3134     SDValue InFlag = N->getOperand(4);
3135     assert(N1.getOpcode() == ISD::BasicBlock);
3136     assert(N2.getOpcode() == ISD::Constant);
3137     assert(N3.getOpcode() == ISD::Register);
3138
3139     unsigned CC = (unsigned) cast<ConstantSDNode>(N2)->getZExtValue();
3140
3141     if (InFlag.getOpcode() == ARMISD::CMPZ) {
3142       if (InFlag.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN) {
3143         SDValue Int = InFlag.getOperand(0);
3144         uint64_t ID = cast<ConstantSDNode>(Int->getOperand(1))->getZExtValue();
3145
3146         // Handle low-overhead loops.
3147         if (ID == Intrinsic::loop_decrement_reg) {
3148           SDValue Elements = Int.getOperand(2);
3149           SDValue Size = CurDAG->getTargetConstant(
3150             cast<ConstantSDNode>(Int.getOperand(3))->getZExtValue(), dl,
3151                                  MVT::i32);
3152
3153           SDValue Args[] = { Elements, Size, Int.getOperand(0) };
3154           SDNode *LoopDec =
3155             CurDAG->getMachineNode(ARM::t2LoopDec, dl,
3156                                    CurDAG->getVTList(MVT::i32, MVT::Other),
3157                                    Args);
3158           ReplaceUses(Int.getNode(), LoopDec);
3159
3160           SDValue EndArgs[] = { SDValue(LoopDec, 0), N1, Chain };
3161           SDNode *LoopEnd =
3162             CurDAG->getMachineNode(ARM::t2LoopEnd, dl, MVT::Other, EndArgs);
3163
3164           ReplaceUses(N, LoopEnd);
3165           CurDAG->RemoveDeadNode(N);
3166           CurDAG->RemoveDeadNode(InFlag.getNode());
3167           CurDAG->RemoveDeadNode(Int.getNode());
3168           return;
3169         }
3170       }
3171
3172       bool SwitchEQNEToPLMI;
3173       SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI);
3174       InFlag = N->getOperand(4);
3175
3176       if (SwitchEQNEToPLMI) {
3177         switch ((ARMCC::CondCodes)CC) {
3178         default: llvm_unreachable("CMPZ must be either NE or EQ!");
3179         case ARMCC::NE:
3180           CC = (unsigned)ARMCC::MI;
3181           break;
3182         case ARMCC::EQ:
3183           CC = (unsigned)ARMCC::PL;
3184           break;
3185         }
3186       }
3187     }
3188
3189     SDValue Tmp2 = CurDAG->getTargetConstant(CC, dl, MVT::i32);
3190     SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag };
3191     SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
3192                                              MVT::Glue, Ops);
3193     Chain = SDValue(ResNode, 0);
3194     if (N->getNumValues() == 2) {
3195       InFlag = SDValue(ResNode, 1);
3196       ReplaceUses(SDValue(N, 1), InFlag);
3197     }
3198     ReplaceUses(SDValue(N, 0),
3199                 SDValue(Chain.getNode(), Chain.getResNo()));
3200     CurDAG->RemoveDeadNode(N);
3201     return;
3202   }
3203
3204   case ARMISD::CMPZ: {
3205     // select (CMPZ X, #-C) -> (CMPZ (ADDS X, #C), #0)
3206     //   This allows us to avoid materializing the expensive negative constant.
3207     //   The CMPZ #0 is useless and will be peepholed away but we need to keep it
3208     //   for its glue output.
3209     SDValue X = N->getOperand(0);
3210     auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1).getNode());
3211     if (C && C->getSExtValue() < 0 && Subtarget->isThumb()) {
3212       int64_t Addend = -C->getSExtValue();
3213
3214       SDNode *Add = nullptr;
3215       // ADDS can be better than CMN if the immediate fits in a
3216       // 16-bit ADDS, which means either [0,256) for tADDi8 or [0,8) for tADDi3.
3217       // Outside that range we can just use a CMN which is 32-bit but has a
3218       // 12-bit immediate range.
3219       if (Addend < 1<<8) {
3220         if (Subtarget->isThumb2()) {
3221           SDValue Ops[] = { X, CurDAG->getTargetConstant(Addend, dl, MVT::i32),
3222                             getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
3223                             CurDAG->getRegister(0, MVT::i32) };
3224           Add = CurDAG->getMachineNode(ARM::t2ADDri, dl, MVT::i32, Ops);
3225         } else {
3226           unsigned Opc = (Addend < 1<<3) ? ARM::tADDi3 : ARM::tADDi8;
3227           SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X,
3228                            CurDAG->getTargetConstant(Addend, dl, MVT::i32),
3229                            getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
3230           Add = CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
3231         }
3232       }
3233       if (Add) {
3234         SDValue Ops2[] = {SDValue(Add, 0), CurDAG->getConstant(0, dl, MVT::i32)};
3235         CurDAG->MorphNodeTo(N, ARMISD::CMPZ, CurDAG->getVTList(MVT::Glue), Ops2);
3236       }
3237     }
3238     // Other cases are autogenerated.
3239     break;
3240   }
3241
3242   case ARMISD::CMOV: {
3243     SDValue InFlag = N->getOperand(4);
3244
3245     if (InFlag.getOpcode() == ARMISD::CMPZ) {
3246       bool SwitchEQNEToPLMI;
3247       SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI);
3248
3249       if (SwitchEQNEToPLMI) {
3250         SDValue ARMcc = N->getOperand(2);
3251         ARMCC::CondCodes CC =
3252           (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue();
3253
3254         switch (CC) {
3255         default: llvm_unreachable("CMPZ must be either NE or EQ!");
3256         case ARMCC::NE:
3257           CC = ARMCC::MI;
3258           break;
3259         case ARMCC::EQ:
3260           CC = ARMCC::PL;
3261           break;
3262         }
3263         SDValue NewARMcc = CurDAG->getConstant((unsigned)CC, dl, MVT::i32);
3264         SDValue Ops[] = {N->getOperand(0), N->getOperand(1), NewARMcc,
3265                          N->getOperand(3), N->getOperand(4)};
3266         CurDAG->MorphNodeTo(N, ARMISD::CMOV, N->getVTList(), Ops);
3267       }
3268
3269     }
3270     // Other cases are autogenerated.
3271     break;
3272   }
3273
3274   case ARMISD::VZIP: {
3275     unsigned Opc = 0;
3276     EVT VT = N->getValueType(0);
3277     switch (VT.getSimpleVT().SimpleTy) {
3278     default: return;
3279     case MVT::v8i8:  Opc = ARM::VZIPd8; break;
3280     case MVT::v4f16:
3281     case MVT::v4i16: Opc = ARM::VZIPd16; break;
3282     case MVT::v2f32:
3283     // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
3284     case MVT::v2i32: Opc = ARM::VTRNd32; break;
3285     case MVT::v16i8: Opc = ARM::VZIPq8; break;
3286     case MVT::v8f16:
3287     case MVT::v8i16: Opc = ARM::VZIPq16; break;
3288     case MVT::v4f32:
3289     case MVT::v4i32: Opc = ARM::VZIPq32; break;
3290     }
3291     SDValue Pred = getAL(CurDAG, dl);
3292     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3293     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3294     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3295     return;
3296   }
3297   case ARMISD::VUZP: {
3298     unsigned Opc = 0;
3299     EVT VT = N->getValueType(0);
3300     switch (VT.getSimpleVT().SimpleTy) {
3301     default: return;
3302     case MVT::v8i8:  Opc = ARM::VUZPd8; break;
3303     case MVT::v4f16:
3304     case MVT::v4i16: Opc = ARM::VUZPd16; break;
3305     case MVT::v2f32:
3306     // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
3307     case MVT::v2i32: Opc = ARM::VTRNd32; break;
3308     case MVT::v16i8: Opc = ARM::VUZPq8; break;
3309     case MVT::v8f16:
3310     case MVT::v8i16: Opc = ARM::VUZPq16; break;
3311     case MVT::v4f32:
3312     case MVT::v4i32: Opc = ARM::VUZPq32; break;
3313     }
3314     SDValue Pred = getAL(CurDAG, dl);
3315     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3316     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3317     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3318     return;
3319   }
3320   case ARMISD::VTRN: {
3321     unsigned Opc = 0;
3322     EVT VT = N->getValueType(0);
3323     switch (VT.getSimpleVT().SimpleTy) {
3324     default: return;
3325     case MVT::v8i8:  Opc = ARM::VTRNd8; break;
3326     case MVT::v4f16:
3327     case MVT::v4i16: Opc = ARM::VTRNd16; break;
3328     case MVT::v2f32:
3329     case MVT::v2i32: Opc = ARM::VTRNd32; break;
3330     case MVT::v16i8: Opc = ARM::VTRNq8; break;
3331     case MVT::v8f16:
3332     case MVT::v8i16: Opc = ARM::VTRNq16; break;
3333     case MVT::v4f32:
3334     case MVT::v4i32: Opc = ARM::VTRNq32; break;
3335     }
3336     SDValue Pred = getAL(CurDAG, dl);
3337     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3338     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3339     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3340     return;
3341   }
3342   case ARMISD::BUILD_VECTOR: {
3343     EVT VecVT = N->getValueType(0);
3344     EVT EltVT = VecVT.getVectorElementType();
3345     unsigned NumElts = VecVT.getVectorNumElements();
3346     if (EltVT == MVT::f64) {
3347       assert(NumElts == 2 && "unexpected type for BUILD_VECTOR");
3348       ReplaceNode(
3349           N, createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
3350       return;
3351     }
3352     assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR");
3353     if (NumElts == 2) {
3354       ReplaceNode(
3355           N, createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
3356       return;
3357     }
3358     assert(NumElts == 4 && "unexpected type for BUILD_VECTOR");
3359     ReplaceNode(N,
3360                 createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1),
3361                                     N->getOperand(2), N->getOperand(3)));
3362     return;
3363   }
3364
3365   case ARMISD::VLD1DUP: {
3366     static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8, ARM::VLD1DUPd16,
3367                                          ARM::VLD1DUPd32 };
3368     static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8, ARM::VLD1DUPq16,
3369                                          ARM::VLD1DUPq32 };
3370     SelectVLDDup(N, /* IsIntrinsic= */ false, false, 1, DOpcodes, QOpcodes);
3371     return;
3372   }
3373
3374   case ARMISD::VLD2DUP: {
3375     static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
3376                                         ARM::VLD2DUPd32 };
3377     SelectVLDDup(N, /* IsIntrinsic= */ false, false, 2, Opcodes);
3378     return;
3379   }
3380
3381   case ARMISD::VLD3DUP: {
3382     static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo,
3383                                         ARM::VLD3DUPd16Pseudo,
3384                                         ARM::VLD3DUPd32Pseudo };
3385     SelectVLDDup(N, /* IsIntrinsic= */ false, false, 3, Opcodes);
3386     return;
3387   }
3388
3389   case ARMISD::VLD4DUP: {
3390     static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo,
3391                                         ARM::VLD4DUPd16Pseudo,
3392                                         ARM::VLD4DUPd32Pseudo };
3393     SelectVLDDup(N, /* IsIntrinsic= */ false, false, 4, Opcodes);
3394     return;
3395   }
3396
3397   case ARMISD::VLD1DUP_UPD: {
3398     static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8wb_fixed,
3399                                          ARM::VLD1DUPd16wb_fixed,
3400                                          ARM::VLD1DUPd32wb_fixed };
3401     static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8wb_fixed,
3402                                          ARM::VLD1DUPq16wb_fixed,
3403                                          ARM::VLD1DUPq32wb_fixed };
3404     SelectVLDDup(N, /* IsIntrinsic= */ false, true, 1, DOpcodes, QOpcodes);
3405     return;
3406   }
3407
3408   case ARMISD::VLD2DUP_UPD: {
3409     static const uint16_t Opcodes[] = { ARM::VLD2DUPd8wb_fixed,
3410                                         ARM::VLD2DUPd16wb_fixed,
3411                                         ARM::VLD2DUPd32wb_fixed };
3412     SelectVLDDup(N, /* IsIntrinsic= */ false, true, 2, Opcodes);
3413     return;
3414   }
3415
3416   case ARMISD::VLD3DUP_UPD: {
3417     static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo_UPD,
3418                                         ARM::VLD3DUPd16Pseudo_UPD,
3419                                         ARM::VLD3DUPd32Pseudo_UPD };
3420     SelectVLDDup(N, /* IsIntrinsic= */ false, true, 3, Opcodes);
3421     return;
3422   }
3423
3424   case ARMISD::VLD4DUP_UPD: {
3425     static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo_UPD,
3426                                         ARM::VLD4DUPd16Pseudo_UPD,
3427                                         ARM::VLD4DUPd32Pseudo_UPD };
3428     SelectVLDDup(N, /* IsIntrinsic= */ false, true, 4, Opcodes);
3429     return;
3430   }
3431
3432   case ARMISD::VLD1_UPD: {
3433     static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed,
3434                                          ARM::VLD1d16wb_fixed,
3435                                          ARM::VLD1d32wb_fixed,
3436                                          ARM::VLD1d64wb_fixed };
3437     static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed,
3438                                          ARM::VLD1q16wb_fixed,
3439                                          ARM::VLD1q32wb_fixed,
3440                                          ARM::VLD1q64wb_fixed };
3441     SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr);
3442     return;
3443   }
3444
3445   case ARMISD::VLD2_UPD: {
3446     static const uint16_t DOpcodes[] = { ARM::VLD2d8wb_fixed,
3447                                          ARM::VLD2d16wb_fixed,
3448                                          ARM::VLD2d32wb_fixed,
3449                                          ARM::VLD1q64wb_fixed};
3450     static const uint16_t QOpcodes[] = { ARM::VLD2q8PseudoWB_fixed,
3451                                          ARM::VLD2q16PseudoWB_fixed,
3452                                          ARM::VLD2q32PseudoWB_fixed };
3453     SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr);
3454     return;
3455   }
3456
3457   case ARMISD::VLD3_UPD: {
3458     static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD,
3459                                          ARM::VLD3d16Pseudo_UPD,
3460                                          ARM::VLD3d32Pseudo_UPD,
3461                                          ARM::VLD1d64TPseudoWB_fixed};
3462     static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
3463                                           ARM::VLD3q16Pseudo_UPD,
3464                                           ARM::VLD3q32Pseudo_UPD };
3465     static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
3466                                           ARM::VLD3q16oddPseudo_UPD,
3467                                           ARM::VLD3q32oddPseudo_UPD };
3468     SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
3469     return;
3470   }
3471
3472   case ARMISD::VLD4_UPD: {
3473     static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo_UPD,
3474                                          ARM::VLD4d16Pseudo_UPD,
3475                                          ARM::VLD4d32Pseudo_UPD,
3476                                          ARM::VLD1d64QPseudoWB_fixed};
3477     static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
3478                                           ARM::VLD4q16Pseudo_UPD,
3479                                           ARM::VLD4q32Pseudo_UPD };
3480     static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo_UPD,
3481                                           ARM::VLD4q16oddPseudo_UPD,
3482                                           ARM::VLD4q32oddPseudo_UPD };
3483     SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
3484     return;
3485   }
3486
3487   case ARMISD::VLD2LN_UPD: {
3488     static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD,
3489                                          ARM::VLD2LNd16Pseudo_UPD,
3490                                          ARM::VLD2LNd32Pseudo_UPD };
3491     static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD,
3492                                          ARM::VLD2LNq32Pseudo_UPD };
3493     SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes);
3494     return;
3495   }
3496
3497   case ARMISD::VLD3LN_UPD: {
3498     static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD,
3499                                          ARM::VLD3LNd16Pseudo_UPD,
3500                                          ARM::VLD3LNd32Pseudo_UPD };
3501     static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD,
3502                                          ARM::VLD3LNq32Pseudo_UPD };
3503     SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes);
3504     return;
3505   }
3506
3507   case ARMISD::VLD4LN_UPD: {
3508     static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD,
3509                                          ARM::VLD4LNd16Pseudo_UPD,
3510                                          ARM::VLD4LNd32Pseudo_UPD };
3511     static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD,
3512                                          ARM::VLD4LNq32Pseudo_UPD };
3513     SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes);
3514     return;
3515   }
3516
3517   case ARMISD::VST1_UPD: {
3518     static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed,
3519                                          ARM::VST1d16wb_fixed,
3520                                          ARM::VST1d32wb_fixed,
3521                                          ARM::VST1d64wb_fixed };
3522     static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed,
3523                                          ARM::VST1q16wb_fixed,
3524                                          ARM::VST1q32wb_fixed,
3525                                          ARM::VST1q64wb_fixed };
3526     SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr);
3527     return;
3528   }
3529
3530   case ARMISD::VST2_UPD: {
3531     static const uint16_t DOpcodes[] = { ARM::VST2d8wb_fixed,
3532                                          ARM::VST2d16wb_fixed,
3533                                          ARM::VST2d32wb_fixed,
3534                                          ARM::VST1q64wb_fixed};
3535     static const uint16_t QOpcodes[] = { ARM::VST2q8PseudoWB_fixed,
3536                                          ARM::VST2q16PseudoWB_fixed,
3537                                          ARM::VST2q32PseudoWB_fixed };
3538     SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr);
3539     return;
3540   }
3541
3542   case ARMISD::VST3_UPD: {
3543     static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD,
3544                                          ARM::VST3d16Pseudo_UPD,
3545                                          ARM::VST3d32Pseudo_UPD,
3546                                          ARM::VST1d64TPseudoWB_fixed};
3547     static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
3548                                           ARM::VST3q16Pseudo_UPD,
3549                                           ARM::VST3q32Pseudo_UPD };
3550     static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
3551                                           ARM::VST3q16oddPseudo_UPD,
3552                                           ARM::VST3q32oddPseudo_UPD };
3553     SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
3554     return;
3555   }
3556
3557   case ARMISD::VST4_UPD: {
3558     static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo_UPD,
3559                                          ARM::VST4d16Pseudo_UPD,
3560                                          ARM::VST4d32Pseudo_UPD,
3561                                          ARM::VST1d64QPseudoWB_fixed};
3562     static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
3563                                           ARM::VST4q16Pseudo_UPD,
3564                                           ARM::VST4q32Pseudo_UPD };
3565     static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD,
3566                                           ARM::VST4q16oddPseudo_UPD,
3567                                           ARM::VST4q32oddPseudo_UPD };
3568     SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
3569     return;
3570   }
3571
3572   case ARMISD::VST2LN_UPD: {
3573     static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD,
3574                                          ARM::VST2LNd16Pseudo_UPD,
3575                                          ARM::VST2LNd32Pseudo_UPD };
3576     static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD,
3577                                          ARM::VST2LNq32Pseudo_UPD };
3578     SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes);
3579     return;
3580   }
3581
3582   case ARMISD::VST3LN_UPD: {
3583     static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD,
3584                                          ARM::VST3LNd16Pseudo_UPD,
3585                                          ARM::VST3LNd32Pseudo_UPD };
3586     static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD,
3587                                          ARM::VST3LNq32Pseudo_UPD };
3588     SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes);
3589     return;
3590   }
3591
3592   case ARMISD::VST4LN_UPD: {
3593     static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD,
3594                                          ARM::VST4LNd16Pseudo_UPD,
3595                                          ARM::VST4LNd32Pseudo_UPD };
3596     static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD,
3597                                          ARM::VST4LNq32Pseudo_UPD };
3598     SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes);
3599     return;
3600   }
3601
3602   case ISD::INTRINSIC_VOID:
3603   case ISD::INTRINSIC_W_CHAIN: {
3604     unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
3605     switch (IntNo) {
3606     default:
3607       break;
3608
3609     case Intrinsic::arm_mrrc:
3610     case Intrinsic::arm_mrrc2: {
3611       SDLoc dl(N);
3612       SDValue Chain = N->getOperand(0);
3613       unsigned Opc;
3614
3615       if (Subtarget->isThumb())
3616         Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::t2MRRC : ARM::t2MRRC2);
3617       else
3618         Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::MRRC : ARM::MRRC2);
3619
3620       SmallVector<SDValue, 5> Ops;
3621       Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(), dl)); /* coproc */
3622       Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(), dl)); /* opc */
3623       Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(4))->getZExtValue(), dl)); /* CRm */
3624
3625       // The mrrc2 instruction in ARM doesn't allow predicates, the top 4 bits of the encoded
3626       // instruction will always be '1111' but it is possible in assembly language to specify
3627       // AL as a predicate to mrrc2 but it doesn't make any difference to the encoded instruction.
3628       if (Opc != ARM::MRRC2) {
3629         Ops.push_back(getAL(CurDAG, dl));
3630         Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3631       }
3632
3633       Ops.push_back(Chain);
3634
3635       // Writes to two registers.
3636       const EVT RetType[] = {MVT::i32, MVT::i32, MVT::Other};
3637
3638       ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, RetType, Ops));
3639       return;
3640     }
3641     case Intrinsic::arm_ldaexd:
3642     case Intrinsic::arm_ldrexd: {
3643       SDLoc dl(N);
3644       SDValue Chain = N->getOperand(0);
3645       SDValue MemAddr = N->getOperand(2);
3646       bool isThumb = Subtarget->isThumb() && Subtarget->hasV8MBaselineOps();
3647
3648       bool IsAcquire = IntNo == Intrinsic::arm_ldaexd;
3649       unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD)
3650                                 : (IsAcquire ? ARM::LDAEXD : ARM::LDREXD);
3651
3652       // arm_ldrexd returns a i64 value in {i32, i32}
3653       std::vector<EVT> ResTys;
3654       if (isThumb) {
3655         ResTys.push_back(MVT::i32);
3656         ResTys.push_back(MVT::i32);
3657       } else
3658         ResTys.push_back(MVT::Untyped);
3659       ResTys.push_back(MVT::Other);
3660
3661       // Place arguments in the right order.
3662       SDValue Ops[] = {MemAddr, getAL(CurDAG, dl),
3663                        CurDAG->getRegister(0, MVT::i32), Chain};
3664       SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
3665       // Transfer memoperands.
3666       MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3667       CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
3668
3669       // Remap uses.
3670       SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1);
3671       if (!SDValue(N, 0).use_empty()) {
3672         SDValue Result;
3673         if (isThumb)
3674           Result = SDValue(Ld, 0);
3675         else {
3676           SDValue SubRegIdx =
3677             CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
3678           SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
3679               dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
3680           Result = SDValue(ResNode,0);
3681         }
3682         ReplaceUses(SDValue(N, 0), Result);
3683       }
3684       if (!SDValue(N, 1).use_empty()) {
3685         SDValue Result;
3686         if (isThumb)
3687           Result = SDValue(Ld, 1);
3688         else {
3689           SDValue SubRegIdx =
3690             CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
3691           SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
3692               dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
3693           Result = SDValue(ResNode,0);
3694         }
3695         ReplaceUses(SDValue(N, 1), Result);
3696       }
3697       ReplaceUses(SDValue(N, 2), OutChain);
3698       CurDAG->RemoveDeadNode(N);
3699       return;
3700     }
3701     case Intrinsic::arm_stlexd:
3702     case Intrinsic::arm_strexd: {
3703       SDLoc dl(N);
3704       SDValue Chain = N->getOperand(0);
3705       SDValue Val0 = N->getOperand(2);
3706       SDValue Val1 = N->getOperand(3);
3707       SDValue MemAddr = N->getOperand(4);
3708
3709       // Store exclusive double return a i32 value which is the return status
3710       // of the issued store.
3711       const EVT ResTys[] = {MVT::i32, MVT::Other};
3712
3713       bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
3714       // Place arguments in the right order.
3715       SmallVector<SDValue, 7> Ops;
3716       if (isThumb) {
3717         Ops.push_back(Val0);
3718         Ops.push_back(Val1);
3719       } else
3720         // arm_strexd uses GPRPair.
3721         Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0));
3722       Ops.push_back(MemAddr);
3723       Ops.push_back(getAL(CurDAG, dl));
3724       Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3725       Ops.push_back(Chain);
3726
3727       bool IsRelease = IntNo == Intrinsic::arm_stlexd;
3728       unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD)
3729                                 : (IsRelease ? ARM::STLEXD : ARM::STREXD);
3730
3731       SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
3732       // Transfer memoperands.
3733       MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3734       CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
3735
3736       ReplaceNode(N, St);
3737       return;
3738     }
3739
3740     case Intrinsic::arm_neon_vld1: {
3741       static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
3742                                            ARM::VLD1d32, ARM::VLD1d64 };
3743       static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
3744                                            ARM::VLD1q32, ARM::VLD1q64};
3745       SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr);
3746       return;
3747     }
3748
3749     case Intrinsic::arm_neon_vld1x2: {
3750       static const uint16_t DOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
3751                                            ARM::VLD1q32, ARM::VLD1q64 };
3752       static const uint16_t QOpcodes[] = { ARM::VLD1d8QPseudo,
3753                                            ARM::VLD1d16QPseudo,
3754                                            ARM::VLD1d32QPseudo,
3755                                            ARM::VLD1d64QPseudo };
3756       SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
3757       return;
3758     }
3759
3760     case Intrinsic::arm_neon_vld1x3: {
3761       static const uint16_t DOpcodes[] = { ARM::VLD1d8TPseudo,
3762                                            ARM::VLD1d16TPseudo,
3763                                            ARM::VLD1d32TPseudo,
3764                                            ARM::VLD1d64TPseudo };
3765       static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowTPseudo_UPD,
3766                                             ARM::VLD1q16LowTPseudo_UPD,
3767                                             ARM::VLD1q32LowTPseudo_UPD,
3768                                             ARM::VLD1q64LowTPseudo_UPD };
3769       static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighTPseudo,
3770                                             ARM::VLD1q16HighTPseudo,
3771                                             ARM::VLD1q32HighTPseudo,
3772                                             ARM::VLD1q64HighTPseudo };
3773       SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3774       return;
3775     }
3776
3777     case Intrinsic::arm_neon_vld1x4: {
3778       static const uint16_t DOpcodes[] = { ARM::VLD1d8QPseudo,
3779                                            ARM::VLD1d16QPseudo,
3780                                            ARM::VLD1d32QPseudo,
3781                                            ARM::VLD1d64QPseudo };
3782       static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowQPseudo_UPD,
3783                                             ARM::VLD1q16LowQPseudo_UPD,
3784                                             ARM::VLD1q32LowQPseudo_UPD,
3785                                             ARM::VLD1q64LowQPseudo_UPD };
3786       static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighQPseudo,
3787                                             ARM::VLD1q16HighQPseudo,
3788                                             ARM::VLD1q32HighQPseudo,
3789                                             ARM::VLD1q64HighQPseudo };
3790       SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3791       return;
3792     }
3793
3794     case Intrinsic::arm_neon_vld2: {
3795       static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
3796                                            ARM::VLD2d32, ARM::VLD1q64 };
3797       static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
3798                                            ARM::VLD2q32Pseudo };
3799       SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
3800       return;
3801     }
3802
3803     case Intrinsic::arm_neon_vld3: {
3804       static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo,
3805                                            ARM::VLD3d16Pseudo,
3806                                            ARM::VLD3d32Pseudo,
3807                                            ARM::VLD1d64TPseudo };
3808       static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
3809                                             ARM::VLD3q16Pseudo_UPD,
3810                                             ARM::VLD3q32Pseudo_UPD };
3811       static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo,
3812                                             ARM::VLD3q16oddPseudo,
3813                                             ARM::VLD3q32oddPseudo };
3814       SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3815       return;
3816     }
3817
3818     case Intrinsic::arm_neon_vld4: {
3819       static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo,
3820                                            ARM::VLD4d16Pseudo,
3821                                            ARM::VLD4d32Pseudo,
3822                                            ARM::VLD1d64QPseudo };
3823       static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
3824                                             ARM::VLD4q16Pseudo_UPD,
3825                                             ARM::VLD4q32Pseudo_UPD };
3826       static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo,
3827                                             ARM::VLD4q16oddPseudo,
3828                                             ARM::VLD4q32oddPseudo };
3829       SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3830       return;
3831     }
3832
3833     case Intrinsic::arm_neon_vld2dup: {
3834       static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
3835                                            ARM::VLD2DUPd32, ARM::VLD1q64 };
3836       static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo,
3837                                             ARM::VLD2DUPq16EvenPseudo,
3838                                             ARM::VLD2DUPq32EvenPseudo };
3839       static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudo,
3840                                             ARM::VLD2DUPq16OddPseudo,
3841                                             ARM::VLD2DUPq32OddPseudo };
3842       SelectVLDDup(N, /* IsIntrinsic= */ true, false, 2,
3843                    DOpcodes, QOpcodes0, QOpcodes1);
3844       return;
3845     }
3846
3847     case Intrinsic::arm_neon_vld3dup: {
3848       static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo,
3849                                            ARM::VLD3DUPd16Pseudo,
3850                                            ARM::VLD3DUPd32Pseudo,
3851                                            ARM::VLD1d64TPseudo };
3852       static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo,
3853                                             ARM::VLD3DUPq16EvenPseudo,
3854                                             ARM::VLD3DUPq32EvenPseudo };
3855       static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo,
3856                                             ARM::VLD3DUPq16OddPseudo,
3857                                             ARM::VLD3DUPq32OddPseudo };
3858       SelectVLDDup(N, /* IsIntrinsic= */ true, false, 3,
3859                    DOpcodes, QOpcodes0, QOpcodes1);
3860       return;
3861     }
3862
3863     case Intrinsic::arm_neon_vld4dup: {
3864       static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo,
3865                                            ARM::VLD4DUPd16Pseudo,
3866                                            ARM::VLD4DUPd32Pseudo,
3867                                            ARM::VLD1d64QPseudo };
3868       static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo,
3869                                             ARM::VLD4DUPq16EvenPseudo,
3870                                             ARM::VLD4DUPq32EvenPseudo };
3871       static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo,
3872                                             ARM::VLD4DUPq16OddPseudo,
3873                                             ARM::VLD4DUPq32OddPseudo };
3874       SelectVLDDup(N, /* IsIntrinsic= */ true, false, 4,
3875                    DOpcodes, QOpcodes0, QOpcodes1);
3876       return;
3877     }
3878
3879     case Intrinsic::arm_neon_vld2lane: {
3880       static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo,
3881                                            ARM::VLD2LNd16Pseudo,
3882                                            ARM::VLD2LNd32Pseudo };
3883       static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo,
3884                                            ARM::VLD2LNq32Pseudo };
3885       SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes);
3886       return;
3887     }
3888
3889     case Intrinsic::arm_neon_vld3lane: {
3890       static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo,
3891                                            ARM::VLD3LNd16Pseudo,
3892                                            ARM::VLD3LNd32Pseudo };
3893       static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo,
3894                                            ARM::VLD3LNq32Pseudo };
3895       SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes);
3896       return;
3897     }
3898
3899     case Intrinsic::arm_neon_vld4lane: {
3900       static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo,
3901                                            ARM::VLD4LNd16Pseudo,
3902                                            ARM::VLD4LNd32Pseudo };
3903       static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo,
3904                                            ARM::VLD4LNq32Pseudo };
3905       SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes);
3906       return;
3907     }
3908
3909     case Intrinsic::arm_neon_vst1: {
3910       static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
3911                                            ARM::VST1d32, ARM::VST1d64 };
3912       static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
3913                                            ARM::VST1q32, ARM::VST1q64 };
3914       SelectVST(N, false, 1, DOpcodes, QOpcodes, nullptr);
3915       return;
3916     }
3917
3918     case Intrinsic::arm_neon_vst1x2: {
3919       static const uint16_t DOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
3920                                            ARM::VST1q32, ARM::VST1q64 };
3921       static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudo,
3922                                            ARM::VST1d16QPseudo,
3923                                            ARM::VST1d32QPseudo,
3924                                            ARM::VST1d64QPseudo };
3925       SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
3926       return;
3927     }
3928
3929     case Intrinsic::arm_neon_vst1x3: {
3930       static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudo,
3931                                            ARM::VST1d16TPseudo,
3932                                            ARM::VST1d32TPseudo,
3933                                            ARM::VST1d64TPseudo };
3934       static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD,
3935                                             ARM::VST1q16LowTPseudo_UPD,
3936                                             ARM::VST1q32LowTPseudo_UPD,
3937                                             ARM::VST1q64LowTPseudo_UPD };
3938       static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo,
3939                                             ARM::VST1q16HighTPseudo,
3940                                             ARM::VST1q32HighTPseudo,
3941                                             ARM::VST1q64HighTPseudo };
3942       SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3943       return;
3944     }
3945
3946     case Intrinsic::arm_neon_vst1x4: {
3947       static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudo,
3948                                            ARM::VST1d16QPseudo,
3949                                            ARM::VST1d32QPseudo,
3950                                            ARM::VST1d64QPseudo };
3951       static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD,
3952                                             ARM::VST1q16LowQPseudo_UPD,
3953                                             ARM::VST1q32LowQPseudo_UPD,
3954                                             ARM::VST1q64LowQPseudo_UPD };
3955       static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo,
3956                                             ARM::VST1q16HighQPseudo,
3957                                             ARM::VST1q32HighQPseudo,
3958                                             ARM::VST1q64HighQPseudo };
3959       SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3960       return;
3961     }
3962
3963     case Intrinsic::arm_neon_vst2: {
3964       static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
3965                                            ARM::VST2d32, ARM::VST1q64 };
3966       static const uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
3967                                            ARM::VST2q32Pseudo };
3968       SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
3969       return;
3970     }
3971
3972     case Intrinsic::arm_neon_vst3: {
3973       static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo,
3974                                            ARM::VST3d16Pseudo,
3975                                            ARM::VST3d32Pseudo,
3976                                            ARM::VST1d64TPseudo };
3977       static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
3978                                             ARM::VST3q16Pseudo_UPD,
3979                                             ARM::VST3q32Pseudo_UPD };
3980       static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo,
3981                                             ARM::VST3q16oddPseudo,
3982                                             ARM::VST3q32oddPseudo };
3983       SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3984       return;
3985     }
3986
3987     case Intrinsic::arm_neon_vst4: {
3988       static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo,
3989                                            ARM::VST4d16Pseudo,
3990                                            ARM::VST4d32Pseudo,
3991                                            ARM::VST1d64QPseudo };
3992       static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
3993                                             ARM::VST4q16Pseudo_UPD,
3994                                             ARM::VST4q32Pseudo_UPD };
3995       static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo,
3996                                             ARM::VST4q16oddPseudo,
3997                                             ARM::VST4q32oddPseudo };
3998       SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3999       return;
4000     }
4001
4002     case Intrinsic::arm_neon_vst2lane: {
4003       static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo,
4004                                            ARM::VST2LNd16Pseudo,
4005                                            ARM::VST2LNd32Pseudo };
4006       static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo,
4007                                            ARM::VST2LNq32Pseudo };
4008       SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes);
4009       return;
4010     }
4011
4012     case Intrinsic::arm_neon_vst3lane: {
4013       static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo,
4014                                            ARM::VST3LNd16Pseudo,
4015                                            ARM::VST3LNd32Pseudo };
4016       static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo,
4017                                            ARM::VST3LNq32Pseudo };
4018       SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes);
4019       return;
4020     }
4021
4022     case Intrinsic::arm_neon_vst4lane: {
4023       static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo,
4024                                            ARM::VST4LNd16Pseudo,
4025                                            ARM::VST4LNd32Pseudo };
4026       static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo,
4027                                            ARM::VST4LNq32Pseudo };
4028       SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes);
4029       return;
4030     }
4031     }
4032     break;
4033   }
4034
4035   case ISD::ATOMIC_CMP_SWAP:
4036     SelectCMP_SWAP(N);
4037     return;
4038   }
4039
4040   SelectCode(N);
4041 }
4042
4043 // Inspect a register string of the form
4044 // cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or
4045 // cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string
4046 // and obtain the integer operands from them, adding these operands to the
4047 // provided vector.
4048 static void getIntOperandsFromRegisterString(StringRef RegString,
4049                                              SelectionDAG *CurDAG,
4050                                              const SDLoc &DL,
4051                                              std::vector<SDValue> &Ops) {
4052   SmallVector<StringRef, 5> Fields;
4053   RegString.split(Fields, ':');
4054
4055   if (Fields.size() > 1) {
4056     bool AllIntFields = true;
4057
4058     for (StringRef Field : Fields) {
4059       // Need to trim out leading 'cp' characters and get the integer field.
4060       unsigned IntField;
4061       AllIntFields &= !Field.trim("CPcp").getAsInteger(10, IntField);
4062       Ops.push_back(CurDAG->getTargetConstant(IntField, DL, MVT::i32));
4063     }
4064
4065     assert(AllIntFields &&
4066             "Unexpected non-integer value in special register string.");
4067   }
4068 }
4069
4070 // Maps a Banked Register string to its mask value. The mask value returned is
4071 // for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register
4072 // mask operand, which expresses which register is to be used, e.g. r8, and in
4073 // which mode it is to be used, e.g. usr. Returns -1 to signify that the string
4074 // was invalid.
4075 static inline int getBankedRegisterMask(StringRef RegString) {
4076   auto TheReg = ARMBankedReg::lookupBankedRegByName(RegString.lower());
4077   if (!TheReg)
4078      return -1;
4079   return TheReg->Encoding;
4080 }
4081
4082 // The flags here are common to those allowed for apsr in the A class cores and
4083 // those allowed for the special registers in the M class cores. Returns a
4084 // value representing which flags were present, -1 if invalid.
4085 static inline int getMClassFlagsMask(StringRef Flags) {
4086   return StringSwitch<int>(Flags)
4087           .Case("", 0x2) // no flags means nzcvq for psr registers, and 0x2 is
4088                          // correct when flags are not permitted
4089           .Case("g", 0x1)
4090           .Case("nzcvq", 0x2)
4091           .Case("nzcvqg", 0x3)
4092           .Default(-1);
4093 }
4094
4095 // Maps MClass special registers string to its value for use in the
4096 // t2MRS_M/t2MSR_M instruction nodes as the SYSm value operand.
4097 // Returns -1 to signify that the string was invalid.
4098 static int getMClassRegisterMask(StringRef Reg, const ARMSubtarget *Subtarget) {
4099   auto TheReg = ARMSysReg::lookupMClassSysRegByName(Reg);
4100   const FeatureBitset &FeatureBits = Subtarget->getFeatureBits();
4101   if (!TheReg || !TheReg->hasRequiredFeatures(FeatureBits))
4102     return -1;
4103   return (int)(TheReg->Encoding & 0xFFF); // SYSm value
4104 }
4105
4106 static int getARClassRegisterMask(StringRef Reg, StringRef Flags) {
4107   // The mask operand contains the special register (R Bit) in bit 4, whether
4108   // the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and
4109   // bits 3-0 contains the fields to be accessed in the special register, set by
4110   // the flags provided with the register.
4111   int Mask = 0;
4112   if (Reg == "apsr") {
4113     // The flags permitted for apsr are the same flags that are allowed in
4114     // M class registers. We get the flag value and then shift the flags into
4115     // the correct place to combine with the mask.
4116     Mask = getMClassFlagsMask(Flags);
4117     if (Mask == -1)
4118       return -1;
4119     return Mask << 2;
4120   }
4121
4122   if (Reg != "cpsr" && Reg != "spsr") {
4123     return -1;
4124   }
4125
4126   // This is the same as if the flags were "fc"
4127   if (Flags.empty() || Flags == "all")
4128     return Mask | 0x9;
4129
4130   // Inspect the supplied flags string and set the bits in the mask for
4131   // the relevant and valid flags allowed for cpsr and spsr.
4132   for (char Flag : Flags) {
4133     int FlagVal;
4134     switch (Flag) {
4135       case 'c':
4136         FlagVal = 0x1;
4137         break;
4138       case 'x':
4139         FlagVal = 0x2;
4140         break;
4141       case 's':
4142         FlagVal = 0x4;
4143         break;
4144       case 'f':
4145         FlagVal = 0x8;
4146         break;
4147       default:
4148         FlagVal = 0;
4149     }
4150
4151     // This avoids allowing strings where the same flag bit appears twice.
4152     if (!FlagVal || (Mask & FlagVal))
4153       return -1;
4154     Mask |= FlagVal;
4155   }
4156
4157   // If the register is spsr then we need to set the R bit.
4158   if (Reg == "spsr")
4159     Mask |= 0x10;
4160
4161   return Mask;
4162 }
4163
4164 // Lower the read_register intrinsic to ARM specific DAG nodes
4165 // using the supplied metadata string to select the instruction node to use
4166 // and the registers/masks to construct as operands for the node.
4167 bool ARMDAGToDAGISel::tryReadRegister(SDNode *N){
4168   const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
4169   const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
4170   bool IsThumb2 = Subtarget->isThumb2();
4171   SDLoc DL(N);
4172
4173   std::vector<SDValue> Ops;
4174   getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
4175
4176   if (!Ops.empty()) {
4177     // If the special register string was constructed of fields (as defined
4178     // in the ACLE) then need to lower to MRC node (32 bit) or
4179     // MRRC node(64 bit), we can make the distinction based on the number of
4180     // operands we have.
4181     unsigned Opcode;
4182     SmallVector<EVT, 3> ResTypes;
4183     if (Ops.size() == 5){
4184       Opcode = IsThumb2 ? ARM::t2MRC : ARM::MRC;
4185       ResTypes.append({ MVT::i32, MVT::Other });
4186     } else {
4187       assert(Ops.size() == 3 &&
4188               "Invalid number of fields in special register string.");
4189       Opcode = IsThumb2 ? ARM::t2MRRC : ARM::MRRC;
4190       ResTypes.append({ MVT::i32, MVT::i32, MVT::Other });
4191     }
4192
4193     Ops.push_back(getAL(CurDAG, DL));
4194     Ops.push_back(CurDAG->getRegister(0, MVT::i32));
4195     Ops.push_back(N->getOperand(0));
4196     ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, ResTypes, Ops));
4197     return true;
4198   }
4199
4200   std::string SpecialReg = RegString->getString().lower();
4201
4202   int BankedReg = getBankedRegisterMask(SpecialReg);
4203   if (BankedReg != -1) {
4204     Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32),
4205             getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4206             N->getOperand(0) };
4207     ReplaceNode(
4208         N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked,
4209                                   DL, MVT::i32, MVT::Other, Ops));
4210     return true;
4211   }
4212
4213   // The VFP registers are read by creating SelectionDAG nodes with opcodes
4214   // corresponding to the register that is being read from. So we switch on the
4215   // string to find which opcode we need to use.
4216   unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
4217                     .Case("fpscr", ARM::VMRS)
4218                     .Case("fpexc", ARM::VMRS_FPEXC)
4219                     .Case("fpsid", ARM::VMRS_FPSID)
4220                     .Case("mvfr0", ARM::VMRS_MVFR0)
4221                     .Case("mvfr1", ARM::VMRS_MVFR1)
4222                     .Case("mvfr2", ARM::VMRS_MVFR2)
4223                     .Case("fpinst", ARM::VMRS_FPINST)
4224                     .Case("fpinst2", ARM::VMRS_FPINST2)
4225                     .Default(0);
4226
4227   // If an opcode was found then we can lower the read to a VFP instruction.
4228   if (Opcode) {
4229     if (!Subtarget->hasVFP2Base())
4230       return false;
4231     if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8Base())
4232       return false;
4233
4234     Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4235             N->getOperand(0) };
4236     ReplaceNode(N,
4237                 CurDAG->getMachineNode(Opcode, DL, MVT::i32, MVT::Other, Ops));
4238     return true;
4239   }
4240
4241   // If the target is M Class then need to validate that the register string
4242   // is an acceptable value, so check that a mask can be constructed from the
4243   // string.
4244   if (Subtarget->isMClass()) {
4245     int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget);
4246     if (SYSmValue == -1)
4247       return false;
4248
4249     SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
4250                       getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4251                       N->getOperand(0) };
4252     ReplaceNode(
4253         N, CurDAG->getMachineNode(ARM::t2MRS_M, DL, MVT::i32, MVT::Other, Ops));
4254     return true;
4255   }
4256
4257   // Here we know the target is not M Class so we need to check if it is one
4258   // of the remaining possible values which are apsr, cpsr or spsr.
4259   if (SpecialReg == "apsr" || SpecialReg == "cpsr") {
4260     Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4261             N->getOperand(0) };
4262     ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRS_AR : ARM::MRS,
4263                                           DL, MVT::i32, MVT::Other, Ops));
4264     return true;
4265   }
4266
4267   if (SpecialReg == "spsr") {
4268     Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4269             N->getOperand(0) };
4270     ReplaceNode(
4271         N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys, DL,
4272                                   MVT::i32, MVT::Other, Ops));
4273     return true;
4274   }
4275
4276   return false;
4277 }
4278
4279 // Lower the write_register intrinsic to ARM specific DAG nodes
4280 // using the supplied metadata string to select the instruction node to use
4281 // and the registers/masks to use in the nodes
4282 bool ARMDAGToDAGISel::tryWriteRegister(SDNode *N){
4283   const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
4284   const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
4285   bool IsThumb2 = Subtarget->isThumb2();
4286   SDLoc DL(N);
4287
4288   std::vector<SDValue> Ops;
4289   getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
4290
4291   if (!Ops.empty()) {
4292     // If the special register string was constructed of fields (as defined
4293     // in the ACLE) then need to lower to MCR node (32 bit) or
4294     // MCRR node(64 bit), we can make the distinction based on the number of
4295     // operands we have.
4296     unsigned Opcode;
4297     if (Ops.size() == 5) {
4298       Opcode = IsThumb2 ? ARM::t2MCR : ARM::MCR;
4299       Ops.insert(Ops.begin()+2, N->getOperand(2));
4300     } else {
4301       assert(Ops.size() == 3 &&
4302               "Invalid number of fields in special register string.");
4303       Opcode = IsThumb2 ? ARM::t2MCRR : ARM::MCRR;
4304       SDValue WriteValue[] = { N->getOperand(2), N->getOperand(3) };
4305       Ops.insert(Ops.begin()+2, WriteValue, WriteValue+2);
4306     }
4307
4308     Ops.push_back(getAL(CurDAG, DL));
4309     Ops.push_back(CurDAG->getRegister(0, MVT::i32));
4310     Ops.push_back(N->getOperand(0));
4311
4312     ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
4313     return true;
4314   }
4315
4316   std::string SpecialReg = RegString->getString().lower();
4317   int BankedReg = getBankedRegisterMask(SpecialReg);
4318   if (BankedReg != -1) {
4319     Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), N->getOperand(2),
4320             getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4321             N->getOperand(0) };
4322     ReplaceNode(
4323         N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked,
4324                                   DL, MVT::Other, Ops));
4325     return true;
4326   }
4327
4328   // The VFP registers are written to by creating SelectionDAG nodes with
4329   // opcodes corresponding to the register that is being written. So we switch
4330   // on the string to find which opcode we need to use.
4331   unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
4332                     .Case("fpscr", ARM::VMSR)
4333                     .Case("fpexc", ARM::VMSR_FPEXC)
4334                     .Case("fpsid", ARM::VMSR_FPSID)
4335                     .Case("fpinst", ARM::VMSR_FPINST)
4336                     .Case("fpinst2", ARM::VMSR_FPINST2)
4337                     .Default(0);
4338
4339   if (Opcode) {
4340     if (!Subtarget->hasVFP2Base())
4341       return false;
4342     Ops = { N->getOperand(2), getAL(CurDAG, DL),
4343             CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
4344     ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
4345     return true;
4346   }
4347
4348   std::pair<StringRef, StringRef> Fields;
4349   Fields = StringRef(SpecialReg).rsplit('_');
4350   std::string Reg = Fields.first.str();
4351   StringRef Flags = Fields.second;
4352
4353   // If the target was M Class then need to validate the special register value
4354   // and retrieve the mask for use in the instruction node.
4355   if (Subtarget->isMClass()) {
4356     int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget);
4357     if (SYSmValue == -1)
4358       return false;
4359
4360     SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
4361                       N->getOperand(2), getAL(CurDAG, DL),
4362                       CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
4363     ReplaceNode(N, CurDAG->getMachineNode(ARM::t2MSR_M, DL, MVT::Other, Ops));
4364     return true;
4365   }
4366
4367   // We then check to see if a valid mask can be constructed for one of the
4368   // register string values permitted for the A and R class cores. These values
4369   // are apsr, spsr and cpsr; these are also valid on older cores.
4370   int Mask = getARClassRegisterMask(Reg, Flags);
4371   if (Mask != -1) {
4372     Ops = { CurDAG->getTargetConstant(Mask, DL, MVT::i32), N->getOperand(2),
4373             getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4374             N->getOperand(0) };
4375     ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSR_AR : ARM::MSR,
4376                                           DL, MVT::Other, Ops));
4377     return true;
4378   }
4379
4380   return false;
4381 }
4382
4383 bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){
4384   std::vector<SDValue> AsmNodeOperands;
4385   unsigned Flag, Kind;
4386   bool Changed = false;
4387   unsigned NumOps = N->getNumOperands();
4388
4389   // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint.
4390   // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require
4391   // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs
4392   // respectively. Since there is no constraint to explicitly specify a
4393   // reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb,
4394   // the 64-bit data may be referred by H, Q, R modifiers, so we still pack
4395   // them into a GPRPair.
4396
4397   SDLoc dl(N);
4398   SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps-1)
4399                                    : SDValue(nullptr,0);
4400
4401   SmallVector<bool, 8> OpChanged;
4402   // Glue node will be appended late.
4403   for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) {
4404     SDValue op = N->getOperand(i);
4405     AsmNodeOperands.push_back(op);
4406
4407     if (i < InlineAsm::Op_FirstOperand)
4408       continue;
4409
4410     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(i))) {
4411       Flag = C->getZExtValue();
4412       Kind = InlineAsm::getKind(Flag);
4413     }
4414     else
4415       continue;
4416
4417     // Immediate operands to inline asm in the SelectionDAG are modeled with
4418     // two operands. The first is a constant of value InlineAsm::Kind_Imm, and
4419     // the second is a constant with the value of the immediate. If we get here
4420     // and we have a Kind_Imm, skip the next operand, and continue.
4421     if (Kind == InlineAsm::Kind_Imm) {
4422       SDValue op = N->getOperand(++i);
4423       AsmNodeOperands.push_back(op);
4424       continue;
4425     }
4426
4427     unsigned NumRegs = InlineAsm::getNumOperandRegisters(Flag);
4428     if (NumRegs)
4429       OpChanged.push_back(false);
4430
4431     unsigned DefIdx = 0;
4432     bool IsTiedToChangedOp = false;
4433     // If it's a use that is tied with a previous def, it has no
4434     // reg class constraint.
4435     if (Changed && InlineAsm::isUseOperandTiedToDef(Flag, DefIdx))
4436       IsTiedToChangedOp = OpChanged[DefIdx];
4437
4438     // Memory operands to inline asm in the SelectionDAG are modeled with two
4439     // operands: a constant of value InlineAsm::Kind_Mem followed by the input
4440     // operand. If we get here and we have a Kind_Mem, skip the next operand (so
4441     // it doesn't get misinterpreted), and continue. We do this here because
4442     // it's important to update the OpChanged array correctly before moving on.
4443     if (Kind == InlineAsm::Kind_Mem) {
4444       SDValue op = N->getOperand(++i);
4445       AsmNodeOperands.push_back(op);
4446       continue;
4447     }
4448
4449     if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef
4450         && Kind != InlineAsm::Kind_RegDefEarlyClobber)
4451       continue;
4452
4453     unsigned RC;
4454     bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC);
4455     if ((!IsTiedToChangedOp && (!HasRC || RC != ARM::GPRRegClassID))
4456         || NumRegs != 2)
4457       continue;
4458
4459     assert((i+2 < NumOps) && "Invalid number of operands in inline asm");
4460     SDValue V0 = N->getOperand(i+1);
4461     SDValue V1 = N->getOperand(i+2);
4462     unsigned Reg0 = cast<RegisterSDNode>(V0)->getReg();
4463     unsigned Reg1 = cast<RegisterSDNode>(V1)->getReg();
4464     SDValue PairedReg;
4465     MachineRegisterInfo &MRI = MF->getRegInfo();
4466
4467     if (Kind == InlineAsm::Kind_RegDef ||
4468         Kind == InlineAsm::Kind_RegDefEarlyClobber) {
4469       // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to
4470       // the original GPRs.
4471
4472       Register GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
4473       PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
4474       SDValue Chain = SDValue(N,0);
4475
4476       SDNode *GU = N->getGluedUser();
4477       SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped,
4478                                                Chain.getValue(1));
4479
4480       // Extract values from a GPRPair reg and copy to the original GPR reg.
4481       SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32,
4482                                                     RegCopy);
4483       SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,
4484                                                     RegCopy);
4485       SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0,
4486                                         RegCopy.getValue(1));
4487       SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1));
4488
4489       // Update the original glue user.
4490       std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1);
4491       Ops.push_back(T1.getValue(1));
4492       CurDAG->UpdateNodeOperands(GU, Ops);
4493     }
4494     else {
4495       // For Kind  == InlineAsm::Kind_RegUse, we first copy two GPRs into a
4496       // GPRPair and then pass the GPRPair to the inline asm.
4497       SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain];
4498
4499       // As REG_SEQ doesn't take RegisterSDNode, we copy them first.
4500       SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32,
4501                                           Chain.getValue(1));
4502       SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32,
4503                                           T0.getValue(1));
4504       SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), 0);
4505
4506       // Copy REG_SEQ into a GPRPair-typed VR and replace the original two
4507       // i32 VRs of inline asm with it.
4508       Register GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
4509       PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
4510       Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1));
4511
4512       AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
4513       Glue = Chain.getValue(1);
4514     }
4515
4516     Changed = true;
4517
4518     if(PairedReg.getNode()) {
4519       OpChanged[OpChanged.size() -1 ] = true;
4520       Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/);
4521       if (IsTiedToChangedOp)
4522         Flag = InlineAsm::getFlagWordForMatchingOp(Flag, DefIdx);
4523       else
4524         Flag = InlineAsm::getFlagWordForRegClass(Flag, ARM::GPRPairRegClassID);
4525       // Replace the current flag.
4526       AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant(
4527           Flag, dl, MVT::i32);
4528       // Add the new register node and skip the original two GPRs.
4529       AsmNodeOperands.push_back(PairedReg);
4530       // Skip the next two GPRs.
4531       i += 2;
4532     }
4533   }
4534
4535   if (Glue.getNode())
4536     AsmNodeOperands.push_back(Glue);
4537   if (!Changed)
4538     return false;
4539
4540   SDValue New = CurDAG->getNode(N->getOpcode(), SDLoc(N),
4541       CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands);
4542   New->setNodeId(-1);
4543   ReplaceNode(N, New.getNode());
4544   return true;
4545 }
4546
4547
4548 bool ARMDAGToDAGISel::
4549 SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
4550                              std::vector<SDValue> &OutOps) {
4551   switch(ConstraintID) {
4552   default:
4553     llvm_unreachable("Unexpected asm memory constraint");
4554   case InlineAsm::Constraint_i:
4555     // FIXME: It seems strange that 'i' is needed here since it's supposed to
4556     //        be an immediate and not a memory constraint.
4557     LLVM_FALLTHROUGH;
4558   case InlineAsm::Constraint_m:
4559   case InlineAsm::Constraint_o:
4560   case InlineAsm::Constraint_Q:
4561   case InlineAsm::Constraint_Um:
4562   case InlineAsm::Constraint_Un:
4563   case InlineAsm::Constraint_Uq:
4564   case InlineAsm::Constraint_Us:
4565   case InlineAsm::Constraint_Ut:
4566   case InlineAsm::Constraint_Uv:
4567   case InlineAsm::Constraint_Uy:
4568     // Require the address to be in a register.  That is safe for all ARM
4569     // variants and it is hard to do anything much smarter without knowing
4570     // how the operand is used.
4571     OutOps.push_back(Op);
4572     return false;
4573   }
4574   return true;
4575 }
4576
4577 /// createARMISelDag - This pass converts a legalized DAG into a
4578 /// ARM-specific DAG, ready for instruction scheduling.
4579 ///
4580 FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM,
4581                                      CodeGenOpt::Level OptLevel) {
4582   return new ARMDAGToDAGISel(TM, OptLevel);
4583 }