lib/Target/ARM/ARMISelDAGToDAG.cpp

   1 //===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This file defines an instruction selector for the ARM target.
  10 //
  11 //===----------------------------------------------------------------------===//
  12
  13 #include "ARM.h"
  14 #include "ARMBaseInstrInfo.h"
  15 #include "ARMTargetMachine.h"
  16 #include "MCTargetDesc/ARMAddressingModes.h"
  17 #include "Utils/ARMBaseInfo.h"
  18 #include "llvm/ADT/StringSwitch.h"
  19 #include "llvm/CodeGen/MachineFrameInfo.h"
  20 #include "llvm/CodeGen/MachineFunction.h"
  21 #include "llvm/CodeGen/MachineInstrBuilder.h"
  22 #include "llvm/CodeGen/MachineRegisterInfo.h"
  23 #include "llvm/CodeGen/SelectionDAG.h"
  24 #include "llvm/CodeGen/SelectionDAGISel.h"
  25 #include "llvm/CodeGen/TargetLowering.h"
  26 #include "llvm/IR/CallingConv.h"
  27 #include "llvm/IR/Constants.h"
  28 #include "llvm/IR/DerivedTypes.h"
  29 #include "llvm/IR/Function.h"
  30 #include "llvm/IR/Intrinsics.h"
  31 #include "llvm/IR/LLVMContext.h"
  32 #include "llvm/Support/CommandLine.h"
  33 #include "llvm/Support/Debug.h"
  34 #include "llvm/Support/ErrorHandling.h"
  35 #include "llvm/Target/TargetOptions.h"
  36
  37 using namespace llvm;
  38
  39 #define DEBUG_TYPE "arm-isel"
  40
  41 static cl::opt<bool>
  42 DisableShifterOp("disable-shifter-op", cl::Hidden,
  43   cl::desc("Disable isel of shifter-op"),
  44   cl::init(false));
  45
  46 //===--------------------------------------------------------------------===//
  47 /// ARMDAGToDAGISel - ARM specific code to select ARM machine
  48 /// instructions for SelectionDAG operations.
  49 ///
  50 namespace {
  51
  52 class ARMDAGToDAGISel : public SelectionDAGISel {
  53   /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
  54   /// make the right decision when generating code for different targets.
  55   const ARMSubtarget *Subtarget;
  56
  57 public:
  58   explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOpt::Level OptLevel)
  59       : SelectionDAGISel(tm, OptLevel) {}
  60
  61   bool runOnMachineFunction(MachineFunction &MF) override {
  62     // Reset the subtarget each time through.
  63     Subtarget = &MF.getSubtarget<ARMSubtarget>();
  64     SelectionDAGISel::runOnMachineFunction(MF);
  65     return true;
  66   }
  67
  68   StringRef getPassName() const override { return "ARM Instruction Selection"; }
  69
  70   void PreprocessISelDAG() override;
  71
  72   /// getI32Imm - Return a target constant of type i32 with the specified
  73   /// value.
  74   inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
  75     return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
  76   }
  77
  78   void Select(SDNode *N) override;
  79
  80   bool hasNoVMLxHazardUse(SDNode *N) const;
  81   bool isShifterOpProfitable(const SDValue &Shift,
  82                              ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt);
  83   bool SelectRegShifterOperand(SDValue N, SDValue &A,
  84                                SDValue &B, SDValue &C,
  85                                bool CheckProfitability = true);
  86   bool SelectImmShifterOperand(SDValue N, SDValue &A,
  87                                SDValue &B, bool CheckProfitability = true);
  88   bool SelectShiftRegShifterOperand(SDValue N, SDValue &A,
  89                                     SDValue &B, SDValue &C) {
  90     // Don't apply the profitability check
  91     return SelectRegShifterOperand(N, A, B, C, false);
  92   }
  93   bool SelectShiftImmShifterOperand(SDValue N, SDValue &A,
  94                                     SDValue &B) {
  95     // Don't apply the profitability check
  96     return SelectImmShifterOperand(N, A, B, false);
  97   }
  98
  99   bool SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out);
 100
 101   bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
 102   bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);
 103
 104   bool SelectCMOVPred(SDValue N, SDValue &Pred, SDValue &Reg) {
 105     const ConstantSDNode *CN = cast<ConstantSDNode>(N);
 106     Pred = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(N), MVT::i32);
 107     Reg = CurDAG->getRegister(ARM::CPSR, MVT::i32);
 108     return true;
 109   }
 110
 111   bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
 112                              SDValue &Offset, SDValue &Opc);
 113   bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
 114                              SDValue &Offset, SDValue &Opc);
 115   bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
 116                              SDValue &Offset, SDValue &Opc);
 117   bool SelectAddrOffsetNone(SDValue N, SDValue &Base);
 118   bool SelectAddrMode3(SDValue N, SDValue &Base,
 119                        SDValue &Offset, SDValue &Opc);
 120   bool SelectAddrMode3Offset(SDNode *Op, SDValue N,
 121                              SDValue &Offset, SDValue &Opc);
 122   bool IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset, bool FP16);
 123   bool SelectAddrMode5(SDValue N, SDValue &Base, SDValue &Offset);
 124   bool SelectAddrMode5FP16(SDValue N, SDValue &Base, SDValue &Offset);
 125   bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align);
 126   bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset);
 127
 128   bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label);
 129
 130   // Thumb Addressing Modes:
 131   bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset);
 132   bool SelectThumbAddrModeRRSext(SDValue N, SDValue &Base, SDValue &Offset);
 133   bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base,
 134                                 SDValue &OffImm);
 135   bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
 136                                  SDValue &OffImm);
 137   bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
 138                                  SDValue &OffImm);
 139   bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
 140                                  SDValue &OffImm);
 141   bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm);
 142
 143   // Thumb 2 Addressing Modes:
 144   bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
 145   bool SelectT2AddrModeImm8(SDValue N, SDValue &Base,
 146                             SDValue &OffImm);
 147   bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
 148                                  SDValue &OffImm);
 149   template <unsigned Shift>
 150   bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm);
 151   bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm,
 152                                   unsigned Shift);
 153   template <unsigned Shift>
 154   bool SelectT2AddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm);
 155   bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base,
 156                              SDValue &OffReg, SDValue &ShImm);
 157   bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm);
 158
 159   inline bool is_so_imm(unsigned Imm) const {
 160     return ARM_AM::getSOImmVal(Imm) != -1;
 161   }
 162
 163   inline bool is_so_imm_not(unsigned Imm) const {
 164     return ARM_AM::getSOImmVal(~Imm) != -1;
 165   }
 166
 167   inline bool is_t2_so_imm(unsigned Imm) const {
 168     return ARM_AM::getT2SOImmVal(Imm) != -1;
 169   }
 170
 171   inline bool is_t2_so_imm_not(unsigned Imm) const {
 172     return ARM_AM::getT2SOImmVal(~Imm) != -1;
 173   }
 174
 175   // Include the pieces autogenerated from the target description.
 176 #include "ARMGenDAGISel.inc"
 177
 178 private:
 179   void transferMemOperands(SDNode *Src, SDNode *Dst);
 180
 181   /// Indexed (pre/post inc/dec) load matching code for ARM.
 182   bool tryARMIndexedLoad(SDNode *N);
 183   bool tryT1IndexedLoad(SDNode *N);
 184   bool tryT2IndexedLoad(SDNode *N);
 185   bool tryMVEIndexedLoad(SDNode *N);
 186
 187   /// SelectVLD - Select NEON load intrinsics.  NumVecs should be
 188   /// 1, 2, 3 or 4.  The opcode arrays specify the instructions used for
 189   /// loads of D registers and even subregs and odd subregs of Q registers.
 190   /// For NumVecs <= 2, QOpcodes1 is not used.
 191   void SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
 192                  const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
 193                  const uint16_t *QOpcodes1);
 194
 195   /// SelectVST - Select NEON store intrinsics.  NumVecs should
 196   /// be 1, 2, 3 or 4.  The opcode arrays specify the instructions used for
 197   /// stores of D registers and even subregs and odd subregs of Q registers.
 198   /// For NumVecs <= 2, QOpcodes1 is not used.
 199   void SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
 200                  const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
 201                  const uint16_t *QOpcodes1);
 202
 203   /// SelectVLDSTLane - Select NEON load/store lane intrinsics.  NumVecs should
 204   /// be 2, 3 or 4.  The opcode arrays specify the instructions used for
 205   /// load/store of D registers and Q registers.
 206   void SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
 207                        unsigned NumVecs, const uint16_t *DOpcodes,
 208                        const uint16_t *QOpcodes);
 209
 210   /// SelectVLDDup - Select NEON load-duplicate intrinsics.  NumVecs
 211   /// should be 1, 2, 3 or 4.  The opcode array specifies the instructions used
 212   /// for loading D registers.
 213   void SelectVLDDup(SDNode *N, bool IsIntrinsic, bool isUpdating,
 214                     unsigned NumVecs, const uint16_t *DOpcodes,
 215                     const uint16_t *QOpcodes0 = nullptr,
 216                     const uint16_t *QOpcodes1 = nullptr);
 217
 218   /// Try to select SBFX/UBFX instructions for ARM.
 219   bool tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned);
 220
 221   // Select special operations if node forms integer ABS pattern
 222   bool tryABSOp(SDNode *N);
 223
 224   bool tryReadRegister(SDNode *N);
 225   bool tryWriteRegister(SDNode *N);
 226
 227   bool tryInlineAsm(SDNode *N);
 228
 229   void SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI);
 230
 231   void SelectCMP_SWAP(SDNode *N);
 232
 233   /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
 234   /// inline asm expressions.
 235   bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
 236                                     std::vector<SDValue> &OutOps) override;
 237
 238   // Form pairs of consecutive R, S, D, or Q registers.
 239   SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1);
 240   SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1);
 241   SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1);
 242   SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1);
 243
 244   // Form sequences of 4 consecutive S, D, or Q registers.
 245   SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
 246   SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
 247   SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
 248
 249   // Get the alignment operand for a NEON VLD or VST instruction.
 250   SDValue GetVLDSTAlign(SDValue Align, const SDLoc &dl, unsigned NumVecs,
 251                         bool is64BitVector);
 252
 253   /// Returns the number of instructions required to materialize the given
 254   /// constant in a register, or 3 if a literal pool load is needed.
 255   unsigned ConstantMaterializationCost(unsigned Val) const;
 256
 257   /// Checks if N is a multiplication by a constant where we can extract out a
 258   /// power of two from the constant so that it can be used in a shift, but only
 259   /// if it simplifies the materialization of the constant. Returns true if it
 260   /// is, and assigns to PowerOfTwo the power of two that should be extracted
 261   /// out and to NewMulConst the new constant to be multiplied by.
 262   bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift,
 263                               unsigned &PowerOfTwo, SDValue &NewMulConst) const;
 264
 265   /// Replace N with M in CurDAG, in a way that also ensures that M gets
 266   /// selected when N would have been selected.
 267   void replaceDAGValue(const SDValue &N, SDValue M);
 268 };
 269 }
 270
 271 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant
 272 /// operand. If so Imm will receive the 32-bit value.
 273 static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
 274   if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
 275     Imm = cast<ConstantSDNode>(N)->getZExtValue();
 276     return true;
 277   }
 278   return false;
 279 }
 280
 281 // isInt32Immediate - This method tests to see if a constant operand.
 282 // If so Imm will receive the 32 bit value.
 283 static bool isInt32Immediate(SDValue N, unsigned &Imm) {
 284   return isInt32Immediate(N.getNode(), Imm);
 285 }
 286
 287 // isOpcWithIntImmediate - This method tests to see if the node is a specific
 288 // opcode and that it has a immediate integer right operand.
 289 // If so Imm will receive the 32 bit value.
 290 static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
 291   return N->getOpcode() == Opc &&
 292          isInt32Immediate(N->getOperand(1).getNode(), Imm);
 293 }
 294
 295 /// Check whether a particular node is a constant value representable as
 296 /// (N * Scale) where (N in [\p RangeMin, \p RangeMax).
 297 ///
 298 /// \param ScaledConstant [out] - On success, the pre-scaled constant value.
 299 static bool isScaledConstantInRange(SDValue Node, int Scale,
 300                                     int RangeMin, int RangeMax,
 301                                     int &ScaledConstant) {
 302   assert(Scale > 0 && "Invalid scale!");
 303
 304   // Check that this is a constant.
 305   const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node);
 306   if (!C)
 307     return false;
 308
 309   ScaledConstant = (int) C->getZExtValue();
 310   if ((ScaledConstant % Scale) != 0)
 311     return false;
 312
 313   ScaledConstant /= Scale;
 314   return ScaledConstant >= RangeMin && ScaledConstant < RangeMax;
 315 }
 316
 317 void ARMDAGToDAGISel::PreprocessISelDAG() {
 318   if (!Subtarget->hasV6T2Ops())
 319     return;
 320
 321   bool isThumb2 = Subtarget->isThumb();
 322   for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
 323        E = CurDAG->allnodes_end(); I != E; ) {
 324     SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues.
 325
 326     if (N->getOpcode() != ISD::ADD)
 327       continue;
 328
 329     // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with
 330     // leading zeros, followed by consecutive set bits, followed by 1 or 2
 331     // trailing zeros, e.g. 1020.
 332     // Transform the expression to
 333     // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number
 334     // of trailing zeros of c2. The left shift would be folded as an shifter
 335     // operand of 'add' and the 'and' and 'srl' would become a bits extraction
 336     // node (UBFX).
 337
 338     SDValue N0 = N->getOperand(0);
 339     SDValue N1 = N->getOperand(1);
 340     unsigned And_imm = 0;
 341     if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) {
 342       if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm))
 343         std::swap(N0, N1);
 344     }
 345     if (!And_imm)
 346       continue;
 347
 348     // Check if the AND mask is an immediate of the form: 000.....1111111100
 349     unsigned TZ = countTrailingZeros(And_imm);
 350     if (TZ != 1 && TZ != 2)
 351       // Be conservative here. Shifter operands aren't always free. e.g. On
 352       // Swift, left shifter operand of 1 / 2 for free but others are not.
 353       // e.g.
 354       //  ubfx   r3, r1, #16, #8
 355       //  ldr.w  r3, [r0, r3, lsl #2]
 356       // vs.
 357       //  mov.w  r9, #1020
 358       //  and.w  r2, r9, r1, lsr #14
 359       //  ldr    r2, [r0, r2]
 360       continue;
 361     And_imm >>= TZ;
 362     if (And_imm & (And_imm + 1))
 363       continue;
 364
 365     // Look for (and (srl X, c1), c2).
 366     SDValue Srl = N1.getOperand(0);
 367     unsigned Srl_imm = 0;
 368     if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) ||
 369         (Srl_imm <= 2))
 370       continue;
 371
 372     // Make sure first operand is not a shifter operand which would prevent
 373     // folding of the left shift.
 374     SDValue CPTmp0;
 375     SDValue CPTmp1;
 376     SDValue CPTmp2;
 377     if (isThumb2) {
 378       if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1))
 379         continue;
 380     } else {
 381       if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) ||
 382           SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2))
 383         continue;
 384     }
 385
 386     // Now make the transformation.
 387     Srl = CurDAG->getNode(ISD::SRL, SDLoc(Srl), MVT::i32,
 388                           Srl.getOperand(0),
 389                           CurDAG->getConstant(Srl_imm + TZ, SDLoc(Srl),
 390                                               MVT::i32));
 391     N1 = CurDAG->getNode(ISD::AND, SDLoc(N1), MVT::i32,
 392                          Srl,
 393                          CurDAG->getConstant(And_imm, SDLoc(Srl), MVT::i32));
 394     N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32,
 395                          N1, CurDAG->getConstant(TZ, SDLoc(Srl), MVT::i32));
 396     CurDAG->UpdateNodeOperands(N, N0, N1);
 397   }
 398 }
 399
 400 /// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS
 401 /// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at
 402 /// least on current ARM implementations) which should be avoidded.
 403 bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const {
 404   if (OptLevel == CodeGenOpt::None)
 405     return true;
 406
 407   if (!Subtarget->hasVMLxHazards())
 408     return true;
 409
 410   if (!N->hasOneUse())
 411     return false;
 412
 413   SDNode *Use = *N->use_begin();
 414   if (Use->getOpcode() == ISD::CopyToReg)
 415     return true;
 416   if (Use->isMachineOpcode()) {
 417     const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>(
 418         CurDAG->getSubtarget().getInstrInfo());
 419
 420     const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode());
 421     if (MCID.mayStore())
 422       return true;
 423     unsigned Opcode = MCID.getOpcode();
 424     if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
 425       return true;
 426     // vmlx feeding into another vmlx. We actually want to unfold
 427     // the use later in the MLxExpansion pass. e.g.
 428     // vmla
 429     // vmla (stall 8 cycles)
 430     //
 431     // vmul (5 cycles)
 432     // vadd (5 cycles)
 433     // vmla
 434     // This adds up to about 18 - 19 cycles.
 435     //
 436     // vmla
 437     // vmul (stall 4 cycles)
 438     // vadd adds up to about 14 cycles.
 439     return TII->isFpMLxInstruction(Opcode);
 440   }
 441
 442   return false;
 443 }
 444
 445 bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
 446                                             ARM_AM::ShiftOpc ShOpcVal,
 447                                             unsigned ShAmt) {
 448   if (!Subtarget->isLikeA9() && !Subtarget->isSwift())
 449     return true;
 450   if (Shift.hasOneUse())
 451     return true;
 452   // R << 2 is free.
 453   return ShOpcVal == ARM_AM::lsl &&
 454          (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1));
 455 }
 456
 457 unsigned ARMDAGToDAGISel::ConstantMaterializationCost(unsigned Val) const {
 458   if (Subtarget->isThumb()) {
 459     if (Val <= 255) return 1;                               // MOV
 460     if (Subtarget->hasV6T2Ops() &&
 461         (Val <= 0xffff ||                                   // MOV
 462          ARM_AM::getT2SOImmVal(Val) != -1 ||                // MOVW
 463          ARM_AM::getT2SOImmVal(~Val) != -1))                // MVN
 464       return 1;
 465     if (Val <= 510) return 2;                               // MOV + ADDi8
 466     if (~Val <= 255) return 2;                              // MOV + MVN
 467     if (ARM_AM::isThumbImmShiftedVal(Val)) return 2;        // MOV + LSL
 468   } else {
 469     if (ARM_AM::getSOImmVal(Val) != -1) return 1;           // MOV
 470     if (ARM_AM::getSOImmVal(~Val) != -1) return 1;          // MVN
 471     if (Subtarget->hasV6T2Ops() && Val <= 0xffff) return 1; // MOVW
 472     if (ARM_AM::isSOImmTwoPartVal(Val)) return 2;           // two instrs
 473   }
 474   if (Subtarget->useMovt()) return 2; // MOVW + MOVT
 475   return 3; // Literal pool load
 476 }
 477
 478 bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N,
 479                                              unsigned MaxShift,
 480                                              unsigned &PowerOfTwo,
 481                                              SDValue &NewMulConst) const {
 482   assert(N.getOpcode() == ISD::MUL);
 483   assert(MaxShift > 0);
 484
 485   // If the multiply is used in more than one place then changing the constant
 486   // will make other uses incorrect, so don't.
 487   if (!N.hasOneUse()) return false;
 488   // Check if the multiply is by a constant
 489   ConstantSDNode *MulConst = dyn_cast<ConstantSDNode>(N.getOperand(1));
 490   if (!MulConst) return false;
 491   // If the constant is used in more than one place then modifying it will mean
 492   // we need to materialize two constants instead of one, which is a bad idea.
 493   if (!MulConst->hasOneUse()) return false;
 494   unsigned MulConstVal = MulConst->getZExtValue();
 495   if (MulConstVal == 0) return false;
 496
 497   // Find the largest power of 2 that MulConstVal is a multiple of
 498   PowerOfTwo = MaxShift;
 499   while ((MulConstVal % (1 << PowerOfTwo)) != 0) {
 500     --PowerOfTwo;
 501     if (PowerOfTwo == 0) return false;
 502   }
 503
 504   // Only optimise if the new cost is better
 505   unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo);
 506   NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc(N), MVT::i32);
 507   unsigned OldCost = ConstantMaterializationCost(MulConstVal);
 508   unsigned NewCost = ConstantMaterializationCost(NewMulConstVal);
 509   return NewCost < OldCost;
 510 }
 511
 512 void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) {
 513   CurDAG->RepositionNode(N.getNode()->getIterator(), M.getNode());
 514   ReplaceUses(N, M);
 515 }
 516
 517 bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
 518                                               SDValue &BaseReg,
 519                                               SDValue &Opc,
 520                                               bool CheckProfitability) {
 521   if (DisableShifterOp)
 522     return false;
 523
 524   // If N is a multiply-by-constant and it's profitable to extract a shift and
 525   // use it in a shifted operand do so.
 526   if (N.getOpcode() == ISD::MUL) {
 527     unsigned PowerOfTwo = 0;
 528     SDValue NewMulConst;
 529     if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) {
 530       HandleSDNode Handle(N);
 531       SDLoc Loc(N);
 532       replaceDAGValue(N.getOperand(1), NewMulConst);
 533       BaseReg = Handle.getValue();
 534       Opc = CurDAG->getTargetConstant(
 535           ARM_AM::getSORegOpc(ARM_AM::lsl, PowerOfTwo), Loc, MVT::i32);
 536       return true;
 537     }
 538   }
 539
 540   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
 541
 542   // Don't match base register only case. That is matched to a separate
 543   // lower complexity pattern with explicit register operand.
 544   if (ShOpcVal == ARM_AM::no_shift) return false;
 545
 546   BaseReg = N.getOperand(0);
 547   unsigned ShImmVal = 0;
 548   ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
 549   if (!RHS) return false;
 550   ShImmVal = RHS->getZExtValue() & 31;
 551   Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
 552                                   SDLoc(N), MVT::i32);
 553   return true;
 554 }
 555
 556 bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N,
 557                                               SDValue &BaseReg,
 558                                               SDValue &ShReg,
 559                                               SDValue &Opc,
 560                                               bool CheckProfitability) {
 561   if (DisableShifterOp)
 562     return false;
 563
 564   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
 565
 566   // Don't match base register only case. That is matched to a separate
 567   // lower complexity pattern with explicit register operand.
 568   if (ShOpcVal == ARM_AM::no_shift) return false;
 569
 570   BaseReg = N.getOperand(0);
 571   unsigned ShImmVal = 0;
 572   ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
 573   if (RHS) return false;
 574
 575   ShReg = N.getOperand(1);
 576   if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal))
 577     return false;
 578   Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
 579                                   SDLoc(N), MVT::i32);
 580   return true;
 581 }
 582
 583 // Determine whether an ISD::OR's operands are suitable to turn the operation
 584 // into an addition, which often has more compact encodings.
 585 bool ARMDAGToDAGISel::SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out) {
 586   assert(Parent->getOpcode() == ISD::OR && "unexpected parent");
 587   Out = N;
 588   return CurDAG->haveNoCommonBitsSet(N, Parent->getOperand(1));
 589 }
 590
 591
 592 bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
 593                                           SDValue &Base,
 594                                           SDValue &OffImm) {
 595   // Match simple R + imm12 operands.
 596
 597   // Base only.
 598   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
 599       !CurDAG->isBaseWithConstantOffset(N)) {
 600     if (N.getOpcode() == ISD::FrameIndex) {
 601       // Match frame index.
 602       int FI = cast<FrameIndexSDNode>(N)->getIndex();
 603       Base = CurDAG->getTargetFrameIndex(
 604           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 605       OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
 606       return true;
 607     }
 608
 609     if (N.getOpcode() == ARMISD::Wrapper &&
 610         N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
 611         N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
 612         N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
 613       Base = N.getOperand(0);
 614     } else
 615       Base = N;
 616     OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
 617     return true;
 618   }
 619
 620   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
 621     int RHSC = (int)RHS->getSExtValue();
 622     if (N.getOpcode() == ISD::SUB)
 623       RHSC = -RHSC;
 624
 625     if (RHSC > -0x1000 && RHSC < 0x1000) { // 12 bits
 626       Base   = N.getOperand(0);
 627       if (Base.getOpcode() == ISD::FrameIndex) {
 628         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
 629         Base = CurDAG->getTargetFrameIndex(
 630             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 631       }
 632       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
 633       return true;
 634     }
 635   }
 636
 637   // Base only.
 638   Base = N;
 639   OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
 640   return true;
 641 }
 642
 643
 644
 645 bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
 646                                       SDValue &Opc) {
 647   if (N.getOpcode() == ISD::MUL &&
 648       ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) {
 649     if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
 650       // X * [3,5,9] -> X + X * [2,4,8] etc.
 651       int RHSC = (int)RHS->getZExtValue();
 652       if (RHSC & 1) {
 653         RHSC = RHSC & ~1;
 654         ARM_AM::AddrOpc AddSub = ARM_AM::add;
 655         if (RHSC < 0) {
 656           AddSub = ARM_AM::sub;
 657           RHSC = - RHSC;
 658         }
 659         if (isPowerOf2_32(RHSC)) {
 660           unsigned ShAmt = Log2_32(RHSC);
 661           Base = Offset = N.getOperand(0);
 662           Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
 663                                                             ARM_AM::lsl),
 664                                           SDLoc(N), MVT::i32);
 665           return true;
 666         }
 667       }
 668     }
 669   }
 670
 671   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
 672       // ISD::OR that is equivalent to an ISD::ADD.
 673       !CurDAG->isBaseWithConstantOffset(N))
 674     return false;
 675
 676   // Leave simple R +/- imm12 operands for LDRi12
 677   if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
 678     int RHSC;
 679     if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
 680                                 -0x1000+1, 0x1000, RHSC)) // 12 bits.
 681       return false;
 682   }
 683
 684   // Otherwise this is R +/- [possibly shifted] R.
 685   ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add;
 686   ARM_AM::ShiftOpc ShOpcVal =
 687     ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
 688   unsigned ShAmt = 0;
 689
 690   Base   = N.getOperand(0);
 691   Offset = N.getOperand(1);
 692
 693   if (ShOpcVal != ARM_AM::no_shift) {
 694     // Check to see if the RHS of the shift is a constant, if not, we can't fold
 695     // it.
 696     if (ConstantSDNode *Sh =
 697            dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
 698       ShAmt = Sh->getZExtValue();
 699       if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
 700         Offset = N.getOperand(1).getOperand(0);
 701       else {
 702         ShAmt = 0;
 703         ShOpcVal = ARM_AM::no_shift;
 704       }
 705     } else {
 706       ShOpcVal = ARM_AM::no_shift;
 707     }
 708   }
 709
 710   // Try matching (R shl C) + (R).
 711   if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
 712       !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
 713         N.getOperand(0).hasOneUse())) {
 714     ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
 715     if (ShOpcVal != ARM_AM::no_shift) {
 716       // Check to see if the RHS of the shift is a constant, if not, we can't
 717       // fold it.
 718       if (ConstantSDNode *Sh =
 719           dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
 720         ShAmt = Sh->getZExtValue();
 721         if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
 722           Offset = N.getOperand(0).getOperand(0);
 723           Base = N.getOperand(1);
 724         } else {
 725           ShAmt = 0;
 726           ShOpcVal = ARM_AM::no_shift;
 727         }
 728       } else {
 729         ShOpcVal = ARM_AM::no_shift;
 730       }
 731     }
 732   }
 733
 734   // If Offset is a multiply-by-constant and it's profitable to extract a shift
 735   // and use it in a shifted operand do so.
 736   if (Offset.getOpcode() == ISD::MUL && N.hasOneUse()) {
 737     unsigned PowerOfTwo = 0;
 738     SDValue NewMulConst;
 739     if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) {
 740       HandleSDNode Handle(Offset);
 741       replaceDAGValue(Offset.getOperand(1), NewMulConst);
 742       Offset = Handle.getValue();
 743       ShAmt = PowerOfTwo;
 744       ShOpcVal = ARM_AM::lsl;
 745     }
 746   }
 747
 748   Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
 749                                   SDLoc(N), MVT::i32);
 750   return true;
 751 }
 752
 753 bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
 754                                             SDValue &Offset, SDValue &Opc) {
 755   unsigned Opcode = Op->getOpcode();
 756   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
 757     ? cast<LoadSDNode>(Op)->getAddressingMode()
 758     : cast<StoreSDNode>(Op)->getAddressingMode();
 759   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
 760     ? ARM_AM::add : ARM_AM::sub;
 761   int Val;
 762   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val))
 763     return false;
 764
 765   Offset = N;
 766   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
 767   unsigned ShAmt = 0;
 768   if (ShOpcVal != ARM_AM::no_shift) {
 769     // Check to see if the RHS of the shift is a constant, if not, we can't fold
 770     // it.
 771     if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
 772       ShAmt = Sh->getZExtValue();
 773       if (isShifterOpProfitable(N, ShOpcVal, ShAmt))
 774         Offset = N.getOperand(0);
 775       else {
 776         ShAmt = 0;
 777         ShOpcVal = ARM_AM::no_shift;
 778       }
 779     } else {
 780       ShOpcVal = ARM_AM::no_shift;
 781     }
 782   }
 783
 784   Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
 785                                   SDLoc(N), MVT::i32);
 786   return true;
 787 }
 788
 789 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
 790                                             SDValue &Offset, SDValue &Opc) {
 791   unsigned Opcode = Op->getOpcode();
 792   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
 793     ? cast<LoadSDNode>(Op)->getAddressingMode()
 794     : cast<StoreSDNode>(Op)->getAddressingMode();
 795   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
 796     ? ARM_AM::add : ARM_AM::sub;
 797   int Val;
 798   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
 799     if (AddSub == ARM_AM::sub) Val *= -1;
 800     Offset = CurDAG->getRegister(0, MVT::i32);
 801     Opc = CurDAG->getTargetConstant(Val, SDLoc(Op), MVT::i32);
 802     return true;
 803   }
 804
 805   return false;
 806 }
 807
 808
 809 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
 810                                             SDValue &Offset, SDValue &Opc) {
 811   unsigned Opcode = Op->getOpcode();
 812   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
 813     ? cast<LoadSDNode>(Op)->getAddressingMode()
 814     : cast<StoreSDNode>(Op)->getAddressingMode();
 815   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
 816     ? ARM_AM::add : ARM_AM::sub;
 817   int Val;
 818   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
 819     Offset = CurDAG->getRegister(0, MVT::i32);
 820     Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val,
 821                                                       ARM_AM::no_shift),
 822                                     SDLoc(Op), MVT::i32);
 823     return true;
 824   }
 825
 826   return false;
 827 }
 828
 829 bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) {
 830   Base = N;
 831   return true;
 832 }
 833
 834 bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
 835                                       SDValue &Base, SDValue &Offset,
 836                                       SDValue &Opc) {
 837   if (N.getOpcode() == ISD::SUB) {
 838     // X - C  is canonicalize to X + -C, no need to handle it here.
 839     Base = N.getOperand(0);
 840     Offset = N.getOperand(1);
 841     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0), SDLoc(N),
 842                                     MVT::i32);
 843     return true;
 844   }
 845
 846   if (!CurDAG->isBaseWithConstantOffset(N)) {
 847     Base = N;
 848     if (N.getOpcode() == ISD::FrameIndex) {
 849       int FI = cast<FrameIndexSDNode>(N)->getIndex();
 850       Base = CurDAG->getTargetFrameIndex(
 851           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 852     }
 853     Offset = CurDAG->getRegister(0, MVT::i32);
 854     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
 855                                     MVT::i32);
 856     return true;
 857   }
 858
 859   // If the RHS is +/- imm8, fold into addr mode.
 860   int RHSC;
 861   if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
 862                               -256 + 1, 256, RHSC)) { // 8 bits.
 863     Base = N.getOperand(0);
 864     if (Base.getOpcode() == ISD::FrameIndex) {
 865       int FI = cast<FrameIndexSDNode>(Base)->getIndex();
 866       Base = CurDAG->getTargetFrameIndex(
 867           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 868     }
 869     Offset = CurDAG->getRegister(0, MVT::i32);
 870
 871     ARM_AM::AddrOpc AddSub = ARM_AM::add;
 872     if (RHSC < 0) {
 873       AddSub = ARM_AM::sub;
 874       RHSC = -RHSC;
 875     }
 876     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC), SDLoc(N),
 877                                     MVT::i32);
 878     return true;
 879   }
 880
 881   Base = N.getOperand(0);
 882   Offset = N.getOperand(1);
 883   Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
 884                                   MVT::i32);
 885   return true;
 886 }
 887
 888 bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N,
 889                                             SDValue &Offset, SDValue &Opc) {
 890   unsigned Opcode = Op->getOpcode();
 891   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
 892     ? cast<LoadSDNode>(Op)->getAddressingMode()
 893     : cast<StoreSDNode>(Op)->getAddressingMode();
 894   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
 895     ? ARM_AM::add : ARM_AM::sub;
 896   int Val;
 897   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits.
 898     Offset = CurDAG->getRegister(0, MVT::i32);
 899     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), SDLoc(Op),
 900                                     MVT::i32);
 901     return true;
 902   }
 903
 904   Offset = N;
 905   Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), SDLoc(Op),
 906                                   MVT::i32);
 907   return true;
 908 }
 909
 910 bool ARMDAGToDAGISel::IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset,
 911                                         bool FP16) {
 912   if (!CurDAG->isBaseWithConstantOffset(N)) {
 913     Base = N;
 914     if (N.getOpcode() == ISD::FrameIndex) {
 915       int FI = cast<FrameIndexSDNode>(N)->getIndex();
 916       Base = CurDAG->getTargetFrameIndex(
 917           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 918     } else if (N.getOpcode() == ARMISD::Wrapper &&
 919                N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
 920                N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
 921                N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
 922       Base = N.getOperand(0);
 923     }
 924     Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
 925                                        SDLoc(N), MVT::i32);
 926     return true;
 927   }
 928
 929   // If the RHS is +/- imm8, fold into addr mode.
 930   int RHSC;
 931   const int Scale = FP16 ? 2 : 4;
 932
 933   if (isScaledConstantInRange(N.getOperand(1), Scale, -255, 256, RHSC)) {
 934     Base = N.getOperand(0);
 935     if (Base.getOpcode() == ISD::FrameIndex) {
 936       int FI = cast<FrameIndexSDNode>(Base)->getIndex();
 937       Base = CurDAG->getTargetFrameIndex(
 938           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 939     }
 940
 941     ARM_AM::AddrOpc AddSub = ARM_AM::add;
 942     if (RHSC < 0) {
 943       AddSub = ARM_AM::sub;
 944       RHSC = -RHSC;
 945     }
 946
 947     if (FP16)
 948       Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(AddSub, RHSC),
 949                                          SDLoc(N), MVT::i32);
 950     else
 951       Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC),
 952                                          SDLoc(N), MVT::i32);
 953
 954     return true;
 955   }
 956
 957   Base = N;
 958
 959   if (FP16)
 960     Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(ARM_AM::add, 0),
 961                                        SDLoc(N), MVT::i32);
 962   else
 963     Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
 964                                        SDLoc(N), MVT::i32);
 965
 966   return true;
 967 }
 968
 969 bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
 970                                       SDValue &Base, SDValue &Offset) {
 971   return IsAddressingMode5(N, Base, Offset, /*FP16=*/ false);
 972 }
 973
 974 bool ARMDAGToDAGISel::SelectAddrMode5FP16(SDValue N,
 975                                           SDValue &Base, SDValue &Offset) {
 976   return IsAddressingMode5(N, Base, Offset, /*FP16=*/ true);
 977 }
 978
 979 bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
 980                                       SDValue &Align) {
 981   Addr = N;
 982
 983   unsigned Alignment = 0;
 984
 985   MemSDNode *MemN = cast<MemSDNode>(Parent);
 986
 987   if (isa<LSBaseSDNode>(MemN) ||
 988       ((MemN->getOpcode() == ARMISD::VST1_UPD ||
 989         MemN->getOpcode() == ARMISD::VLD1_UPD) &&
 990        MemN->getConstantOperandVal(MemN->getNumOperands() - 1) == 1)) {
 991     // This case occurs only for VLD1-lane/dup and VST1-lane instructions.
 992     // The maximum alignment is equal to the memory size being referenced.
 993     unsigned MMOAlign = MemN->getAlignment();
 994     unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8;
 995     if (MMOAlign >= MemSize && MemSize > 1)
 996       Alignment = MemSize;
 997   } else {
 998     // All other uses of addrmode6 are for intrinsics.  For now just record
 999     // the raw alignment value; it will be refined later based on the legal
1000     // alignment operands for the intrinsic.
1001     Alignment = MemN->getAlignment();
1002   }
1003
1004   Align = CurDAG->getTargetConstant(Alignment, SDLoc(N), MVT::i32);
1005   return true;
1006 }
1007
1008 bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N,
1009                                             SDValue &Offset) {
1010   LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op);
1011   ISD::MemIndexedMode AM = LdSt->getAddressingMode();
1012   if (AM != ISD::POST_INC)
1013     return false;
1014   Offset = N;
1015   if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) {
1016     if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits())
1017       Offset = CurDAG->getRegister(0, MVT::i32);
1018   }
1019   return true;
1020 }
1021
1022 bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N,
1023                                        SDValue &Offset, SDValue &Label) {
1024   if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
1025     Offset = N.getOperand(0);
1026     SDValue N1 = N.getOperand(1);
1027     Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
1028                                       SDLoc(N), MVT::i32);
1029     return true;
1030   }
1031
1032   return false;
1033 }
1034
1035
1036 //===----------------------------------------------------------------------===//
1037 //                         Thumb Addressing Modes
1038 //===----------------------------------------------------------------------===//
1039
1040 static bool shouldUseZeroOffsetLdSt(SDValue N) {
1041   // Negative numbers are difficult to materialise in thumb1. If we are
1042   // selecting the add of a negative, instead try to select ri with a zero
1043   // offset, so create the add node directly which will become a sub.
1044   if (N.getOpcode() != ISD::ADD)
1045     return false;
1046
1047   // Look for an imm which is not legal for ld/st, but is legal for sub.
1048   if (auto C = dyn_cast<ConstantSDNode>(N.getOperand(1)))
1049     return C->getSExtValue() < 0 && C->getSExtValue() >= -255;
1050
1051   return false;
1052 }
1053
1054 bool ARMDAGToDAGISel::SelectThumbAddrModeRRSext(SDValue N, SDValue &Base,
1055                                                 SDValue &Offset) {
1056   if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) {
1057     ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N);
1058     if (!NC || !NC->isNullValue())
1059       return false;
1060
1061     Base = Offset = N;
1062     return true;
1063   }
1064
1065   Base = N.getOperand(0);
1066   Offset = N.getOperand(1);
1067   return true;
1068 }
1069
1070 bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N, SDValue &Base,
1071                                             SDValue &Offset) {
1072   if (shouldUseZeroOffsetLdSt(N))
1073     return false; // Select ri instead
1074   return SelectThumbAddrModeRRSext(N, Base, Offset);
1075 }
1076
1077 bool
1078 ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
1079                                           SDValue &Base, SDValue &OffImm) {
1080   if (shouldUseZeroOffsetLdSt(N)) {
1081     Base = N;
1082     OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1083     return true;
1084   }
1085
1086   if (!CurDAG->isBaseWithConstantOffset(N)) {
1087     if (N.getOpcode() == ISD::ADD) {
1088       return false; // We want to select register offset instead
1089     } else if (N.getOpcode() == ARMISD::Wrapper &&
1090         N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1091         N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1092         N.getOperand(0).getOpcode() != ISD::TargetConstantPool &&
1093         N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1094       Base = N.getOperand(0);
1095     } else {
1096       Base = N;
1097     }
1098
1099     OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1100     return true;
1101   }
1102
1103   // If the RHS is + imm5 * scale, fold into addr mode.
1104   int RHSC;
1105   if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) {
1106     Base = N.getOperand(0);
1107     OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1108     return true;
1109   }
1110
1111   // Offset is too large, so use register offset instead.
1112   return false;
1113 }
1114
1115 bool
1116 ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
1117                                            SDValue &OffImm) {
1118   return SelectThumbAddrModeImm5S(N, 4, Base, OffImm);
1119 }
1120
1121 bool
1122 ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
1123                                            SDValue &OffImm) {
1124   return SelectThumbAddrModeImm5S(N, 2, Base, OffImm);
1125 }
1126
1127 bool
1128 ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
1129                                            SDValue &OffImm) {
1130   return SelectThumbAddrModeImm5S(N, 1, Base, OffImm);
1131 }
1132
1133 bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
1134                                             SDValue &Base, SDValue &OffImm) {
1135   if (N.getOpcode() == ISD::FrameIndex) {
1136     int FI = cast<FrameIndexSDNode>(N)->getIndex();
1137     // Only multiples of 4 are allowed for the offset, so the frame object
1138     // alignment must be at least 4.
1139     MachineFrameInfo &MFI = MF->getFrameInfo();
1140     if (MFI.getObjectAlignment(FI) < 4)
1141       MFI.setObjectAlignment(FI, 4);
1142     Base = CurDAG->getTargetFrameIndex(
1143         FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1144     OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1145     return true;
1146   }
1147
1148   if (!CurDAG->isBaseWithConstantOffset(N))
1149     return false;
1150
1151   if (N.getOperand(0).getOpcode() == ISD::FrameIndex) {
1152     // If the RHS is + imm8 * scale, fold into addr mode.
1153     int RHSC;
1154     if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) {
1155       Base = N.getOperand(0);
1156       int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1157       // Make sure the offset is inside the object, or we might fail to
1158       // allocate an emergency spill slot. (An out-of-range access is UB, but
1159       // it could show up anyway.)
1160       MachineFrameInfo &MFI = MF->getFrameInfo();
1161       if (RHSC * 4 < MFI.getObjectSize(FI)) {
1162         // For LHS+RHS to result in an offset that's a multiple of 4 the object
1163         // indexed by the LHS must be 4-byte aligned.
1164         if (!MFI.isFixedObjectIndex(FI) && MFI.getObjectAlignment(FI) < 4)
1165           MFI.setObjectAlignment(FI, 4);
1166         if (MFI.getObjectAlignment(FI) >= 4) {
1167           Base = CurDAG->getTargetFrameIndex(
1168               FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1169           OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1170           return true;
1171         }
1172       }
1173     }
1174   }
1175
1176   return false;
1177 }
1178
1179
1180 //===----------------------------------------------------------------------===//
1181 //                        Thumb 2 Addressing Modes
1182 //===----------------------------------------------------------------------===//
1183
1184
1185 bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
1186                                             SDValue &Base, SDValue &OffImm) {
1187   // Match simple R + imm12 operands.
1188
1189   // Base only.
1190   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1191       !CurDAG->isBaseWithConstantOffset(N)) {
1192     if (N.getOpcode() == ISD::FrameIndex) {
1193       // Match frame index.
1194       int FI = cast<FrameIndexSDNode>(N)->getIndex();
1195       Base = CurDAG->getTargetFrameIndex(
1196           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1197       OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1198       return true;
1199     }
1200
1201     if (N.getOpcode() == ARMISD::Wrapper &&
1202         N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1203         N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1204         N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1205       Base = N.getOperand(0);
1206       if (Base.getOpcode() == ISD::TargetConstantPool)
1207         return false;  // We want to select t2LDRpci instead.
1208     } else
1209       Base = N;
1210     OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1211     return true;
1212   }
1213
1214   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1215     if (SelectT2AddrModeImm8(N, Base, OffImm))
1216       // Let t2LDRi8 handle (R - imm8).
1217       return false;
1218
1219     int RHSC = (int)RHS->getZExtValue();
1220     if (N.getOpcode() == ISD::SUB)
1221       RHSC = -RHSC;
1222
1223     if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned)
1224       Base   = N.getOperand(0);
1225       if (Base.getOpcode() == ISD::FrameIndex) {
1226         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1227         Base = CurDAG->getTargetFrameIndex(
1228             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1229       }
1230       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1231       return true;
1232     }
1233   }
1234
1235   // Base only.
1236   Base = N;
1237   OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1238   return true;
1239 }
1240
1241 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
1242                                            SDValue &Base, SDValue &OffImm) {
1243   // Match simple R - imm8 operands.
1244   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1245       !CurDAG->isBaseWithConstantOffset(N))
1246     return false;
1247
1248   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1249     int RHSC = (int)RHS->getSExtValue();
1250     if (N.getOpcode() == ISD::SUB)
1251       RHSC = -RHSC;
1252
1253     if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative)
1254       Base = N.getOperand(0);
1255       if (Base.getOpcode() == ISD::FrameIndex) {
1256         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1257         Base = CurDAG->getTargetFrameIndex(
1258             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1259       }
1260       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1261       return true;
1262     }
1263   }
1264
1265   return false;
1266 }
1267
1268 bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
1269                                                  SDValue &OffImm){
1270   unsigned Opcode = Op->getOpcode();
1271   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
1272     ? cast<LoadSDNode>(Op)->getAddressingMode()
1273     : cast<StoreSDNode>(Op)->getAddressingMode();
1274   int RHSC;
1275   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits.
1276     OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
1277       ? CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32)
1278       : CurDAG->getTargetConstant(-RHSC, SDLoc(N), MVT::i32);
1279     return true;
1280   }
1281
1282   return false;
1283 }
1284
1285 template<unsigned Shift>
1286 bool ARMDAGToDAGISel::SelectT2AddrModeImm7(SDValue N,
1287                                            SDValue &Base, SDValue &OffImm) {
1288   if (N.getOpcode() == ISD::SUB ||
1289       CurDAG->isBaseWithConstantOffset(N)) {
1290     if (auto RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1291       int RHSC = (int)RHS->getZExtValue();
1292       if (N.getOpcode() == ISD::SUB)
1293         RHSC = -RHSC;
1294
1295       if (isShiftedInt<7, Shift>(RHSC)) {
1296         Base = N.getOperand(0);
1297         if (Base.getOpcode() == ISD::FrameIndex) {
1298           int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1299           Base = CurDAG->getTargetFrameIndex(
1300             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1301         }
1302         OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1303         return true;
1304       }
1305     }
1306   }
1307
1308   // Base only.
1309   Base = N;
1310   OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1311   return true;
1312 }
1313
1314 template <unsigned Shift>
1315 bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N,
1316                                                  SDValue &OffImm) {
1317   return SelectT2AddrModeImm7Offset(Op, N, OffImm, Shift);
1318 }
1319
1320 bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N,
1321                                                  SDValue &OffImm,
1322                                                  unsigned Shift) {
1323   unsigned Opcode = Op->getOpcode();
1324   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
1325                                ? cast<LoadSDNode>(Op)->getAddressingMode()
1326                                : cast<StoreSDNode>(Op)->getAddressingMode();
1327   int RHSC;
1328   if (isScaledConstantInRange(N, 1 << Shift, 0, 0x80, RHSC)) { // 7 bits.
1329     OffImm =
1330         ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
1331             ? CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32)
1332             : CurDAG->getTargetConstant(-RHSC * (1 << Shift), SDLoc(N),
1333                                         MVT::i32);
1334     return true;
1335   }
1336   return false;
1337 }
1338
1339 bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
1340                                             SDValue &Base,
1341                                             SDValue &OffReg, SDValue &ShImm) {
1342   // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12.
1343   if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N))
1344     return false;
1345
1346   // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8.
1347   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1348     int RHSC = (int)RHS->getZExtValue();
1349     if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned)
1350       return false;
1351     else if (RHSC < 0 && RHSC >= -255) // 8 bits
1352       return false;
1353   }
1354
1355   // Look for (R + R) or (R + (R << [1,2,3])).
1356   unsigned ShAmt = 0;
1357   Base   = N.getOperand(0);
1358   OffReg = N.getOperand(1);
1359
1360   // Swap if it is ((R << c) + R).
1361   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg.getOpcode());
1362   if (ShOpcVal != ARM_AM::lsl) {
1363     ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode());
1364     if (ShOpcVal == ARM_AM::lsl)
1365       std::swap(Base, OffReg);
1366   }
1367
1368   if (ShOpcVal == ARM_AM::lsl) {
1369     // Check to see if the RHS of the shift is a constant, if not, we can't fold
1370     // it.
1371     if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) {
1372       ShAmt = Sh->getZExtValue();
1373       if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt))
1374         OffReg = OffReg.getOperand(0);
1375       else {
1376         ShAmt = 0;
1377       }
1378     }
1379   }
1380
1381   // If OffReg is a multiply-by-constant and it's profitable to extract a shift
1382   // and use it in a shifted operand do so.
1383   if (OffReg.getOpcode() == ISD::MUL && N.hasOneUse()) {
1384     unsigned PowerOfTwo = 0;
1385     SDValue NewMulConst;
1386     if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) {
1387       HandleSDNode Handle(OffReg);
1388       replaceDAGValue(OffReg.getOperand(1), NewMulConst);
1389       OffReg = Handle.getValue();
1390       ShAmt = PowerOfTwo;
1391     }
1392   }
1393
1394   ShImm = CurDAG->getTargetConstant(ShAmt, SDLoc(N), MVT::i32);
1395
1396   return true;
1397 }
1398
1399 bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base,
1400                                                 SDValue &OffImm) {
1401   // This *must* succeed since it's used for the irreplaceable ldrex and strex
1402   // instructions.
1403   Base = N;
1404   OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1405
1406   if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N))
1407     return true;
1408
1409   ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
1410   if (!RHS)
1411     return true;
1412
1413   uint32_t RHSC = (int)RHS->getZExtValue();
1414   if (RHSC > 1020 || RHSC % 4 != 0)
1415     return true;
1416
1417   Base = N.getOperand(0);
1418   if (Base.getOpcode() == ISD::FrameIndex) {
1419     int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1420     Base = CurDAG->getTargetFrameIndex(
1421         FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1422   }
1423
1424   OffImm = CurDAG->getTargetConstant(RHSC/4, SDLoc(N), MVT::i32);
1425   return true;
1426 }
1427
1428 //===--------------------------------------------------------------------===//
1429
1430 /// getAL - Returns a ARMCC::AL immediate node.
1431 static inline SDValue getAL(SelectionDAG *CurDAG, const SDLoc &dl) {
1432   return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32);
1433 }
1434
1435 void ARMDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
1436   MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
1437   CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
1438 }
1439
1440 bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) {
1441   LoadSDNode *LD = cast<LoadSDNode>(N);
1442   ISD::MemIndexedMode AM = LD->getAddressingMode();
1443   if (AM == ISD::UNINDEXED)
1444     return false;
1445
1446   EVT LoadedVT = LD->getMemoryVT();
1447   SDValue Offset, AMOpc;
1448   bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1449   unsigned Opcode = 0;
1450   bool Match = false;
1451   if (LoadedVT == MVT::i32 && isPre &&
1452       SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1453     Opcode = ARM::LDR_PRE_IMM;
1454     Match = true;
1455   } else if (LoadedVT == MVT::i32 && !isPre &&
1456       SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1457     Opcode = ARM::LDR_POST_IMM;
1458     Match = true;
1459   } else if (LoadedVT == MVT::i32 &&
1460       SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1461     Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG;
1462     Match = true;
1463
1464   } else if (LoadedVT == MVT::i16 &&
1465              SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1466     Match = true;
1467     Opcode = (LD->getExtensionType() == ISD::SEXTLOAD)
1468       ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST)
1469       : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST);
1470   } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) {
1471     if (LD->getExtensionType() == ISD::SEXTLOAD) {
1472       if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1473         Match = true;
1474         Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST;
1475       }
1476     } else {
1477       if (isPre &&
1478           SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1479         Match = true;
1480         Opcode = ARM::LDRB_PRE_IMM;
1481       } else if (!isPre &&
1482                   SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1483         Match = true;
1484         Opcode = ARM::LDRB_POST_IMM;
1485       } else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1486         Match = true;
1487         Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG;
1488       }
1489     }
1490   }
1491
1492   if (Match) {
1493     if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) {
1494       SDValue Chain = LD->getChain();
1495       SDValue Base = LD->getBasePtr();
1496       SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, SDLoc(N)),
1497                        CurDAG->getRegister(0, MVT::i32), Chain };
1498       SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1499                                            MVT::Other, Ops);
1500       transferMemOperands(N, New);
1501       ReplaceNode(N, New);
1502       return true;
1503     } else {
1504       SDValue Chain = LD->getChain();
1505       SDValue Base = LD->getBasePtr();
1506       SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, SDLoc(N)),
1507                        CurDAG->getRegister(0, MVT::i32), Chain };
1508       SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1509                                            MVT::Other, Ops);
1510       transferMemOperands(N, New);
1511       ReplaceNode(N, New);
1512       return true;
1513     }
1514   }
1515
1516   return false;
1517 }
1518
1519 bool ARMDAGToDAGISel::tryT1IndexedLoad(SDNode *N) {
1520   LoadSDNode *LD = cast<LoadSDNode>(N);
1521   EVT LoadedVT = LD->getMemoryVT();
1522   ISD::MemIndexedMode AM = LD->getAddressingMode();
1523   if (AM != ISD::POST_INC || LD->getExtensionType() != ISD::NON_EXTLOAD ||
1524       LoadedVT.getSimpleVT().SimpleTy != MVT::i32)
1525     return false;
1526
1527   auto *COffs = dyn_cast<ConstantSDNode>(LD->getOffset());
1528   if (!COffs || COffs->getZExtValue() != 4)
1529     return false;
1530
1531   // A T1 post-indexed load is just a single register LDM: LDM r0!, {r1}.
1532   // The encoding of LDM is not how the rest of ISel expects a post-inc load to
1533   // look however, so we use a pseudo here and switch it for a tLDMIA_UPD after
1534   // ISel.
1535   SDValue Chain = LD->getChain();
1536   SDValue Base = LD->getBasePtr();
1537   SDValue Ops[]= { Base, getAL(CurDAG, SDLoc(N)),
1538                    CurDAG->getRegister(0, MVT::i32), Chain };
1539   SDNode *New = CurDAG->getMachineNode(ARM::tLDR_postidx, SDLoc(N), MVT::i32,
1540                                        MVT::i32, MVT::Other, Ops);
1541   transferMemOperands(N, New);
1542   ReplaceNode(N, New);
1543   return true;
1544 }
1545
1546 bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) {
1547   LoadSDNode *LD = cast<LoadSDNode>(N);
1548   ISD::MemIndexedMode AM = LD->getAddressingMode();
1549   if (AM == ISD::UNINDEXED)
1550     return false;
1551
1552   EVT LoadedVT = LD->getMemoryVT();
1553   bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1554   SDValue Offset;
1555   bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1556   unsigned Opcode = 0;
1557   bool Match = false;
1558   if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) {
1559     switch (LoadedVT.getSimpleVT().SimpleTy) {
1560     case MVT::i32:
1561       Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST;
1562       break;
1563     case MVT::i16:
1564       if (isSExtLd)
1565         Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST;
1566       else
1567         Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST;
1568       break;
1569     case MVT::i8:
1570     case MVT::i1:
1571       if (isSExtLd)
1572         Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST;
1573       else
1574         Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST;
1575       break;
1576     default:
1577       return false;
1578     }
1579     Match = true;
1580   }
1581
1582   if (Match) {
1583     SDValue Chain = LD->getChain();
1584     SDValue Base = LD->getBasePtr();
1585     SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)),
1586                      CurDAG->getRegister(0, MVT::i32), Chain };
1587     SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1588                                          MVT::Other, Ops);
1589     transferMemOperands(N, New);
1590     ReplaceNode(N, New);
1591     return true;
1592   }
1593
1594   return false;
1595 }
1596
1597 bool ARMDAGToDAGISel::tryMVEIndexedLoad(SDNode *N) {
1598   LoadSDNode *LD = cast<LoadSDNode>(N);
1599   ISD::MemIndexedMode AM = LD->getAddressingMode();
1600   if (AM == ISD::UNINDEXED)
1601     return false;
1602   EVT LoadedVT = LD->getMemoryVT();
1603   if (!LoadedVT.isVector())
1604     return false;
1605   bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1606   SDValue Offset;
1607   bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1608   unsigned Opcode = 0;
1609   unsigned Align = LD->getAlignment();
1610   bool IsLE = Subtarget->isLittle();
1611
1612   if (Align >= 2 && LoadedVT == MVT::v4i16 &&
1613       SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 1)) {
1614     if (isSExtLd)
1615       Opcode = isPre ? ARM::MVE_VLDRHS32_pre : ARM::MVE_VLDRHS32_post;
1616     else
1617       Opcode = isPre ? ARM::MVE_VLDRHU32_pre : ARM::MVE_VLDRHU32_post;
1618   } else if (LoadedVT == MVT::v8i8 &&
1619              SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 0)) {
1620     if (isSExtLd)
1621       Opcode = isPre ? ARM::MVE_VLDRBS16_pre : ARM::MVE_VLDRBS16_post;
1622     else
1623       Opcode = isPre ? ARM::MVE_VLDRBU16_pre : ARM::MVE_VLDRBU16_post;
1624   } else if (LoadedVT == MVT::v4i8 &&
1625              SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 0)) {
1626     if (isSExtLd)
1627       Opcode = isPre ? ARM::MVE_VLDRBS32_pre : ARM::MVE_VLDRBS32_post;
1628     else
1629       Opcode = isPre ? ARM::MVE_VLDRBU32_pre : ARM::MVE_VLDRBU32_post;
1630   } else if (Align >= 4 &&
1631              (IsLE || LoadedVT == MVT::v4i32 || LoadedVT == MVT::v4f32) &&
1632              SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 2))
1633     Opcode = isPre ? ARM::MVE_VLDRWU32_pre : ARM::MVE_VLDRWU32_post;
1634   else if (Align >= 2 &&
1635            (IsLE || LoadedVT == MVT::v8i16 || LoadedVT == MVT::v8f16) &&
1636            SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 1))
1637     Opcode = isPre ? ARM::MVE_VLDRHU16_pre : ARM::MVE_VLDRHU16_post;
1638   else if ((IsLE || LoadedVT == MVT::v16i8) &&
1639            SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 0))
1640     Opcode = isPre ? ARM::MVE_VLDRBU8_pre : ARM::MVE_VLDRBU8_post;
1641   else
1642     return false;
1643
1644   SDValue Chain = LD->getChain();
1645   SDValue Base = LD->getBasePtr();
1646   SDValue Ops[] = {Base, Offset,
1647                    CurDAG->getTargetConstant(ARMVCC::None, SDLoc(N), MVT::i32),
1648                    CurDAG->getRegister(0, MVT::i32), Chain};
1649   SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), LD->getValueType(0),
1650                                        MVT::i32, MVT::Other, Ops);
1651   transferMemOperands(N, New);
1652   ReplaceUses(SDValue(N, 0), SDValue(New, 1));
1653   ReplaceUses(SDValue(N, 1), SDValue(New, 0));
1654   ReplaceUses(SDValue(N, 2), SDValue(New, 2));
1655   CurDAG->RemoveDeadNode(N);
1656   return true;
1657 }
1658
1659 /// Form a GPRPair pseudo register from a pair of GPR regs.
1660 SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) {
1661   SDLoc dl(V0.getNode());
1662   SDValue RegClass =
1663     CurDAG->getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
1664   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
1665   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
1666   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1667   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1668 }
1669
1670 /// Form a D register from a pair of S registers.
1671 SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1672   SDLoc dl(V0.getNode());
1673   SDValue RegClass =
1674     CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, dl, MVT::i32);
1675   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1676   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1677   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1678   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1679 }
1680
1681 /// Form a quad register from a pair of D registers.
1682 SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1683   SDLoc dl(V0.getNode());
1684   SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, dl,
1685                                                MVT::i32);
1686   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1687   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1688   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1689   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1690 }
1691
1692 /// Form 4 consecutive D registers from a pair of Q registers.
1693 SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1694   SDLoc dl(V0.getNode());
1695   SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1696                                                MVT::i32);
1697   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1698   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1699   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1700   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1701 }
1702
1703 /// Form 4 consecutive S registers.
1704 SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1,
1705                                    SDValue V2, SDValue V3) {
1706   SDLoc dl(V0.getNode());
1707   SDValue RegClass =
1708     CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, dl, MVT::i32);
1709   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1710   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1711   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, dl, MVT::i32);
1712   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, dl, MVT::i32);
1713   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1714                                     V2, SubReg2, V3, SubReg3 };
1715   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1716 }
1717
1718 /// Form 4 consecutive D registers.
1719 SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1,
1720                                    SDValue V2, SDValue V3) {
1721   SDLoc dl(V0.getNode());
1722   SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1723                                                MVT::i32);
1724   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1725   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1726   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, dl, MVT::i32);
1727   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, dl, MVT::i32);
1728   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1729                                     V2, SubReg2, V3, SubReg3 };
1730   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1731 }
1732
1733 /// Form 4 consecutive Q registers.
1734 SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1,
1735                                    SDValue V2, SDValue V3) {
1736   SDLoc dl(V0.getNode());
1737   SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, dl,
1738                                                MVT::i32);
1739   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1740   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1741   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, dl, MVT::i32);
1742   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, dl, MVT::i32);
1743   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1744                                     V2, SubReg2, V3, SubReg3 };
1745   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1746 }
1747
1748 /// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
1749 /// of a NEON VLD or VST instruction.  The supported values depend on the
1750 /// number of registers being loaded.
1751 SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, const SDLoc &dl,
1752                                        unsigned NumVecs, bool is64BitVector) {
1753   unsigned NumRegs = NumVecs;
1754   if (!is64BitVector && NumVecs < 3)
1755     NumRegs *= 2;
1756
1757   unsigned Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
1758   if (Alignment >= 32 && NumRegs == 4)
1759     Alignment = 32;
1760   else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4))
1761     Alignment = 16;
1762   else if (Alignment >= 8)
1763     Alignment = 8;
1764   else
1765     Alignment = 0;
1766
1767   return CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
1768 }
1769
1770 static bool isVLDfixed(unsigned Opc)
1771 {
1772   switch (Opc) {
1773   default: return false;
1774   case ARM::VLD1d8wb_fixed : return true;
1775   case ARM::VLD1d16wb_fixed : return true;
1776   case ARM::VLD1d64Qwb_fixed : return true;
1777   case ARM::VLD1d32wb_fixed : return true;
1778   case ARM::VLD1d64wb_fixed : return true;
1779   case ARM::VLD1d64TPseudoWB_fixed : return true;
1780   case ARM::VLD1d64QPseudoWB_fixed : return true;
1781   case ARM::VLD1q8wb_fixed : return true;
1782   case ARM::VLD1q16wb_fixed : return true;
1783   case ARM::VLD1q32wb_fixed : return true;
1784   case ARM::VLD1q64wb_fixed : return true;
1785   case ARM::VLD1DUPd8wb_fixed : return true;
1786   case ARM::VLD1DUPd16wb_fixed : return true;
1787   case ARM::VLD1DUPd32wb_fixed : return true;
1788   case ARM::VLD1DUPq8wb_fixed : return true;
1789   case ARM::VLD1DUPq16wb_fixed : return true;
1790   case ARM::VLD1DUPq32wb_fixed : return true;
1791   case ARM::VLD2d8wb_fixed : return true;
1792   case ARM::VLD2d16wb_fixed : return true;
1793   case ARM::VLD2d32wb_fixed : return true;
1794   case ARM::VLD2q8PseudoWB_fixed : return true;
1795   case ARM::VLD2q16PseudoWB_fixed : return true;
1796   case ARM::VLD2q32PseudoWB_fixed : return true;
1797   case ARM::VLD2DUPd8wb_fixed : return true;
1798   case ARM::VLD2DUPd16wb_fixed : return true;
1799   case ARM::VLD2DUPd32wb_fixed : return true;
1800   }
1801 }
1802
1803 static bool isVSTfixed(unsigned Opc)
1804 {
1805   switch (Opc) {
1806   default: return false;
1807   case ARM::VST1d8wb_fixed : return true;
1808   case ARM::VST1d16wb_fixed : return true;
1809   case ARM::VST1d32wb_fixed : return true;
1810   case ARM::VST1d64wb_fixed : return true;
1811   case ARM::VST1q8wb_fixed : return true;
1812   case ARM::VST1q16wb_fixed : return true;
1813   case ARM::VST1q32wb_fixed : return true;
1814   case ARM::VST1q64wb_fixed : return true;
1815   case ARM::VST1d64TPseudoWB_fixed : return true;
1816   case ARM::VST1d64QPseudoWB_fixed : return true;
1817   case ARM::VST2d8wb_fixed : return true;
1818   case ARM::VST2d16wb_fixed : return true;
1819   case ARM::VST2d32wb_fixed : return true;
1820   case ARM::VST2q8PseudoWB_fixed : return true;
1821   case ARM::VST2q16PseudoWB_fixed : return true;
1822   case ARM::VST2q32PseudoWB_fixed : return true;
1823   }
1824 }
1825
1826 // Get the register stride update opcode of a VLD/VST instruction that
1827 // is otherwise equivalent to the given fixed stride updating instruction.
1828 static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
1829   assert((isVLDfixed(Opc) || isVSTfixed(Opc))
1830     && "Incorrect fixed stride updating instruction.");
1831   switch (Opc) {
1832   default: break;
1833   case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register;
1834   case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register;
1835   case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register;
1836   case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register;
1837   case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register;
1838   case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register;
1839   case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register;
1840   case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register;
1841   case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register;
1842   case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register;
1843   case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register;
1844   case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register;
1845   case ARM::VLD1DUPd8wb_fixed : return ARM::VLD1DUPd8wb_register;
1846   case ARM::VLD1DUPd16wb_fixed : return ARM::VLD1DUPd16wb_register;
1847   case ARM::VLD1DUPd32wb_fixed : return ARM::VLD1DUPd32wb_register;
1848   case ARM::VLD1DUPq8wb_fixed : return ARM::VLD1DUPq8wb_register;
1849   case ARM::VLD1DUPq16wb_fixed : return ARM::VLD1DUPq16wb_register;
1850   case ARM::VLD1DUPq32wb_fixed : return ARM::VLD1DUPq32wb_register;
1851
1852   case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register;
1853   case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register;
1854   case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register;
1855   case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register;
1856   case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register;
1857   case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register;
1858   case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register;
1859   case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register;
1860   case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register;
1861   case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register;
1862
1863   case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register;
1864   case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register;
1865   case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register;
1866   case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register;
1867   case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register;
1868   case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register;
1869
1870   case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register;
1871   case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register;
1872   case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register;
1873   case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register;
1874   case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register;
1875   case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register;
1876
1877   case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register;
1878   case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register;
1879   case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register;
1880   }
1881   return Opc; // If not one we handle, return it unchanged.
1882 }
1883
1884 /// Returns true if the given increment is a Constant known to be equal to the
1885 /// access size performed by a NEON load/store. This means the "[rN]!" form can
1886 /// be used.
1887 static bool isPerfectIncrement(SDValue Inc, EVT VecTy, unsigned NumVecs) {
1888   auto C = dyn_cast<ConstantSDNode>(Inc);
1889   return C && C->getZExtValue() == VecTy.getSizeInBits() / 8 * NumVecs;
1890 }
1891
1892 void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
1893                                 const uint16_t *DOpcodes,
1894                                 const uint16_t *QOpcodes0,
1895                                 const uint16_t *QOpcodes1) {
1896   assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
1897   SDLoc dl(N);
1898
1899   SDValue MemAddr, Align;
1900   bool IsIntrinsic = !isUpdating;  // By coincidence, all supported updating
1901                                    // nodes are not intrinsics.
1902   unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
1903   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
1904     return;
1905
1906   SDValue Chain = N->getOperand(0);
1907   EVT VT = N->getValueType(0);
1908   bool is64BitVector = VT.is64BitVector();
1909   Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
1910
1911   unsigned OpcodeIndex;
1912   switch (VT.getSimpleVT().SimpleTy) {
1913   default: llvm_unreachable("unhandled vld type");
1914     // Double-register operations:
1915   case MVT::v8i8:  OpcodeIndex = 0; break;
1916   case MVT::v4f16:
1917   case MVT::v4i16: OpcodeIndex = 1; break;
1918   case MVT::v2f32:
1919   case MVT::v2i32: OpcodeIndex = 2; break;
1920   case MVT::v1i64: OpcodeIndex = 3; break;
1921     // Quad-register operations:
1922   case MVT::v16i8: OpcodeIndex = 0; break;
1923   case MVT::v8f16:
1924   case MVT::v8i16: OpcodeIndex = 1; break;
1925   case MVT::v4f32:
1926   case MVT::v4i32: OpcodeIndex = 2; break;
1927   case MVT::v2f64:
1928   case MVT::v2i64: OpcodeIndex = 3; break;
1929   }
1930
1931   EVT ResTy;
1932   if (NumVecs == 1)
1933     ResTy = VT;
1934   else {
1935     unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
1936     if (!is64BitVector)
1937       ResTyElts *= 2;
1938     ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
1939   }
1940   std::vector<EVT> ResTys;
1941   ResTys.push_back(ResTy);
1942   if (isUpdating)
1943     ResTys.push_back(MVT::i32);
1944   ResTys.push_back(MVT::Other);
1945
1946   SDValue Pred = getAL(CurDAG, dl);
1947   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
1948   SDNode *VLd;
1949   SmallVector<SDValue, 7> Ops;
1950
1951   // Double registers and VLD1/VLD2 quad registers are directly supported.
1952   if (is64BitVector || NumVecs <= 2) {
1953     unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
1954                     QOpcodes0[OpcodeIndex]);
1955     Ops.push_back(MemAddr);
1956     Ops.push_back(Align);
1957     if (isUpdating) {
1958       SDValue Inc = N->getOperand(AddrOpIdx + 1);
1959       bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
1960       if (!IsImmUpdate) {
1961         // We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
1962         // check for the opcode rather than the number of vector elements.
1963         if (isVLDfixed(Opc))
1964           Opc = getVLDSTRegisterUpdateOpcode(Opc);
1965         Ops.push_back(Inc);
1966       // VLD1/VLD2 fixed increment does not need Reg0 so only include it in
1967       // the operands if not such an opcode.
1968       } else if (!isVLDfixed(Opc))
1969         Ops.push_back(Reg0);
1970     }
1971     Ops.push_back(Pred);
1972     Ops.push_back(Reg0);
1973     Ops.push_back(Chain);
1974     VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1975
1976   } else {
1977     // Otherwise, quad registers are loaded with two separate instructions,
1978     // where one loads the even registers and the other loads the odd registers.
1979     EVT AddrTy = MemAddr.getValueType();
1980
1981     // Load the even subregs.  This is always an updating load, so that it
1982     // provides the address to the second load for the odd subregs.
1983     SDValue ImplDef =
1984       SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
1985     const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
1986     SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
1987                                           ResTy, AddrTy, MVT::Other, OpsA);
1988     Chain = SDValue(VLdA, 2);
1989
1990     // Load the odd subregs.
1991     Ops.push_back(SDValue(VLdA, 1));
1992     Ops.push_back(Align);
1993     if (isUpdating) {
1994       SDValue Inc = N->getOperand(AddrOpIdx + 1);
1995       assert(isa<ConstantSDNode>(Inc.getNode()) &&
1996              "only constant post-increment update allowed for VLD3/4");
1997       (void)Inc;
1998       Ops.push_back(Reg0);
1999     }
2000     Ops.push_back(SDValue(VLdA, 0));
2001     Ops.push_back(Pred);
2002     Ops.push_back(Reg0);
2003     Ops.push_back(Chain);
2004     VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops);
2005   }
2006
2007   // Transfer memoperands.
2008   MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2009   CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLd), {MemOp});
2010
2011   if (NumVecs == 1) {
2012     ReplaceNode(N, VLd);
2013     return;
2014   }
2015
2016   // Extract out the subregisters.
2017   SDValue SuperReg = SDValue(VLd, 0);
2018   static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
2019                     ARM::qsub_3 == ARM::qsub_0 + 3,
2020                 "Unexpected subreg numbering");
2021   unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0);
2022   for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2023     ReplaceUses(SDValue(N, Vec),
2024                 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
2025   ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
2026   if (isUpdating)
2027     ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2));
2028   CurDAG->RemoveDeadNode(N);
2029 }
2030
2031 void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
2032                                 const uint16_t *DOpcodes,
2033                                 const uint16_t *QOpcodes0,
2034                                 const uint16_t *QOpcodes1) {
2035   assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
2036   SDLoc dl(N);
2037
2038   SDValue MemAddr, Align;
2039   bool IsIntrinsic = !isUpdating;  // By coincidence, all supported updating
2040                                    // nodes are not intrinsics.
2041   unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2042   unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2043   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2044     return;
2045
2046   MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2047
2048   SDValue Chain = N->getOperand(0);
2049   EVT VT = N->getOperand(Vec0Idx).getValueType();
2050   bool is64BitVector = VT.is64BitVector();
2051   Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
2052
2053   unsigned OpcodeIndex;
2054   switch (VT.getSimpleVT().SimpleTy) {
2055   default: llvm_unreachable("unhandled vst type");
2056     // Double-register operations:
2057   case MVT::v8i8:  OpcodeIndex = 0; break;
2058   case MVT::v4f16:
2059   case MVT::v4i16: OpcodeIndex = 1; break;
2060   case MVT::v2f32:
2061   case MVT::v2i32: OpcodeIndex = 2; break;
2062   case MVT::v1i64: OpcodeIndex = 3; break;
2063     // Quad-register operations:
2064   case MVT::v16i8: OpcodeIndex = 0; break;
2065   case MVT::v8f16:
2066   case MVT::v8i16: OpcodeIndex = 1; break;
2067   case MVT::v4f32:
2068   case MVT::v4i32: OpcodeIndex = 2; break;
2069   case MVT::v2f64:
2070   case MVT::v2i64: OpcodeIndex = 3; break;
2071   }
2072
2073   std::vector<EVT> ResTys;
2074   if (isUpdating)
2075     ResTys.push_back(MVT::i32);
2076   ResTys.push_back(MVT::Other);
2077
2078   SDValue Pred = getAL(CurDAG, dl);
2079   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2080   SmallVector<SDValue, 7> Ops;
2081
2082   // Double registers and VST1/VST2 quad registers are directly supported.
2083   if (is64BitVector || NumVecs <= 2) {
2084     SDValue SrcReg;
2085     if (NumVecs == 1) {
2086       SrcReg = N->getOperand(Vec0Idx);
2087     } else if (is64BitVector) {
2088       // Form a REG_SEQUENCE to force register allocation.
2089       SDValue V0 = N->getOperand(Vec0Idx + 0);
2090       SDValue V1 = N->getOperand(Vec0Idx + 1);
2091       if (NumVecs == 2)
2092         SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2093       else {
2094         SDValue V2 = N->getOperand(Vec0Idx + 2);
2095         // If it's a vst3, form a quad D-register and leave the last part as
2096         // an undef.
2097         SDValue V3 = (NumVecs == 3)
2098           ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
2099           : N->getOperand(Vec0Idx + 3);
2100         SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2101       }
2102     } else {
2103       // Form a QQ register.
2104       SDValue Q0 = N->getOperand(Vec0Idx);
2105       SDValue Q1 = N->getOperand(Vec0Idx + 1);
2106       SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0);
2107     }
2108
2109     unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2110                     QOpcodes0[OpcodeIndex]);
2111     Ops.push_back(MemAddr);
2112     Ops.push_back(Align);
2113     if (isUpdating) {
2114       SDValue Inc = N->getOperand(AddrOpIdx + 1);
2115       bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
2116       if (!IsImmUpdate) {
2117         // We use a VST1 for v1i64 even if the pseudo says VST2/3/4, so
2118         // check for the opcode rather than the number of vector elements.
2119         if (isVSTfixed(Opc))
2120           Opc = getVLDSTRegisterUpdateOpcode(Opc);
2121         Ops.push_back(Inc);
2122       }
2123       // VST1/VST2 fixed increment does not need Reg0 so only include it in
2124       // the operands if not such an opcode.
2125       else if (!isVSTfixed(Opc))
2126         Ops.push_back(Reg0);
2127     }
2128     Ops.push_back(SrcReg);
2129     Ops.push_back(Pred);
2130     Ops.push_back(Reg0);
2131     Ops.push_back(Chain);
2132     SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2133
2134     // Transfer memoperands.
2135     CurDAG->setNodeMemRefs(cast<MachineSDNode>(VSt), {MemOp});
2136
2137     ReplaceNode(N, VSt);
2138     return;
2139   }
2140
2141   // Otherwise, quad registers are stored with two separate instructions,
2142   // where one stores the even registers and the other stores the odd registers.
2143
2144   // Form the QQQQ REG_SEQUENCE.
2145   SDValue V0 = N->getOperand(Vec0Idx + 0);
2146   SDValue V1 = N->getOperand(Vec0Idx + 1);
2147   SDValue V2 = N->getOperand(Vec0Idx + 2);
2148   SDValue V3 = (NumVecs == 3)
2149     ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2150     : N->getOperand(Vec0Idx + 3);
2151   SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2152
2153   // Store the even D registers.  This is always an updating store, so that it
2154   // provides the address to the second store for the odd subregs.
2155   const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain };
2156   SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
2157                                         MemAddr.getValueType(),
2158                                         MVT::Other, OpsA);
2159   CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStA), {MemOp});
2160   Chain = SDValue(VStA, 1);
2161
2162   // Store the odd D registers.
2163   Ops.push_back(SDValue(VStA, 0));
2164   Ops.push_back(Align);
2165   if (isUpdating) {
2166     SDValue Inc = N->getOperand(AddrOpIdx + 1);
2167     assert(isa<ConstantSDNode>(Inc.getNode()) &&
2168            "only constant post-increment update allowed for VST3/4");
2169     (void)Inc;
2170     Ops.push_back(Reg0);
2171   }
2172   Ops.push_back(RegSeq);
2173   Ops.push_back(Pred);
2174   Ops.push_back(Reg0);
2175   Ops.push_back(Chain);
2176   SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
2177                                         Ops);
2178   CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStB), {MemOp});
2179   ReplaceNode(N, VStB);
2180 }
2181
2182 void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
2183                                       unsigned NumVecs,
2184                                       const uint16_t *DOpcodes,
2185                                       const uint16_t *QOpcodes) {
2186   assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
2187   SDLoc dl(N);
2188
2189   SDValue MemAddr, Align;
2190   bool IsIntrinsic = !isUpdating;  // By coincidence, all supported updating
2191                                    // nodes are not intrinsics.
2192   unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2193   unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2194   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2195     return;
2196
2197   MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2198
2199   SDValue Chain = N->getOperand(0);
2200   unsigned Lane =
2201     cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue();
2202   EVT VT = N->getOperand(Vec0Idx).getValueType();
2203   bool is64BitVector = VT.is64BitVector();
2204
2205   unsigned Alignment = 0;
2206   if (NumVecs != 3) {
2207     Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
2208     unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2209     if (Alignment > NumBytes)
2210       Alignment = NumBytes;
2211     if (Alignment < 8 && Alignment < NumBytes)
2212       Alignment = 0;
2213     // Alignment must be a power of two; make sure of that.
2214     Alignment = (Alignment & -Alignment);
2215     if (Alignment == 1)
2216       Alignment = 0;
2217   }
2218   Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2219
2220   unsigned OpcodeIndex;
2221   switch (VT.getSimpleVT().SimpleTy) {
2222   default: llvm_unreachable("unhandled vld/vst lane type");
2223     // Double-register operations:
2224   case MVT::v8i8:  OpcodeIndex = 0; break;
2225   case MVT::v4f16:
2226   case MVT::v4i16: OpcodeIndex = 1; break;
2227   case MVT::v2f32:
2228   case MVT::v2i32: OpcodeIndex = 2; break;
2229     // Quad-register operations:
2230   case MVT::v8f16:
2231   case MVT::v8i16: OpcodeIndex = 0; break;
2232   case MVT::v4f32:
2233   case MVT::v4i32: OpcodeIndex = 1; break;
2234   }
2235
2236   std::vector<EVT> ResTys;
2237   if (IsLoad) {
2238     unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2239     if (!is64BitVector)
2240       ResTyElts *= 2;
2241     ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(),
2242                                       MVT::i64, ResTyElts));
2243   }
2244   if (isUpdating)
2245     ResTys.push_back(MVT::i32);
2246   ResTys.push_back(MVT::Other);
2247
2248   SDValue Pred = getAL(CurDAG, dl);
2249   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2250
2251   SmallVector<SDValue, 8> Ops;
2252   Ops.push_back(MemAddr);
2253   Ops.push_back(Align);
2254   if (isUpdating) {
2255     SDValue Inc = N->getOperand(AddrOpIdx + 1);
2256     bool IsImmUpdate =
2257         isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
2258     Ops.push_back(IsImmUpdate ? Reg0 : Inc);
2259   }
2260
2261   SDValue SuperReg;
2262   SDValue V0 = N->getOperand(Vec0Idx + 0);
2263   SDValue V1 = N->getOperand(Vec0Idx + 1);
2264   if (NumVecs == 2) {
2265     if (is64BitVector)
2266       SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2267     else
2268       SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0);
2269   } else {
2270     SDValue V2 = N->getOperand(Vec0Idx + 2);
2271     SDValue V3 = (NumVecs == 3)
2272       ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2273       : N->getOperand(Vec0Idx + 3);
2274     if (is64BitVector)
2275       SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2276     else
2277       SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2278   }
2279   Ops.push_back(SuperReg);
2280   Ops.push_back(getI32Imm(Lane, dl));
2281   Ops.push_back(Pred);
2282   Ops.push_back(Reg0);
2283   Ops.push_back(Chain);
2284
2285   unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2286                                   QOpcodes[OpcodeIndex]);
2287   SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2288   CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdLn), {MemOp});
2289   if (!IsLoad) {
2290     ReplaceNode(N, VLdLn);
2291     return;
2292   }
2293
2294   // Extract the subregisters.
2295   SuperReg = SDValue(VLdLn, 0);
2296   static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
2297                     ARM::qsub_3 == ARM::qsub_0 + 3,
2298                 "Unexpected subreg numbering");
2299   unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
2300   for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2301     ReplaceUses(SDValue(N, Vec),
2302                 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
2303   ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1));
2304   if (isUpdating)
2305     ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2));
2306   CurDAG->RemoveDeadNode(N);
2307 }
2308
2309 void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool IsIntrinsic,
2310                                    bool isUpdating, unsigned NumVecs,
2311                                    const uint16_t *DOpcodes,
2312                                    const uint16_t *QOpcodes0,
2313                                    const uint16_t *QOpcodes1) {
2314   assert(NumVecs >= 1 && NumVecs <= 4 && "VLDDup NumVecs out-of-range");
2315   SDLoc dl(N);
2316
2317   SDValue MemAddr, Align;
2318   unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2319   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2320     return;
2321
2322   SDValue Chain = N->getOperand(0);
2323   EVT VT = N->getValueType(0);
2324   bool is64BitVector = VT.is64BitVector();
2325
2326   unsigned Alignment = 0;
2327   if (NumVecs != 3) {
2328     Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
2329     unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2330     if (Alignment > NumBytes)
2331       Alignment = NumBytes;
2332     if (Alignment < 8 && Alignment < NumBytes)
2333       Alignment = 0;
2334     // Alignment must be a power of two; make sure of that.
2335     Alignment = (Alignment & -Alignment);
2336     if (Alignment == 1)
2337       Alignment = 0;
2338   }
2339   Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2340
2341   unsigned OpcodeIndex;
2342   switch (VT.getSimpleVT().SimpleTy) {
2343   default: llvm_unreachable("unhandled vld-dup type");
2344   case MVT::v8i8:
2345   case MVT::v16i8: OpcodeIndex = 0; break;
2346   case MVT::v4i16:
2347   case MVT::v8i16:
2348   case MVT::v4f16:
2349   case MVT::v8f16:
2350                   OpcodeIndex = 1; break;
2351   case MVT::v2f32:
2352   case MVT::v2i32:
2353   case MVT::v4f32:
2354   case MVT::v4i32: OpcodeIndex = 2; break;
2355   case MVT::v1f64:
2356   case MVT::v1i64: OpcodeIndex = 3; break;
2357   }
2358
2359   unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2360   if (!is64BitVector)
2361     ResTyElts *= 2;
2362   EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
2363
2364   std::vector<EVT> ResTys;
2365   ResTys.push_back(ResTy);
2366   if (isUpdating)
2367     ResTys.push_back(MVT::i32);
2368   ResTys.push_back(MVT::Other);
2369
2370   SDValue Pred = getAL(CurDAG, dl);
2371   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2372
2373   SDNode *VLdDup;
2374   if (is64BitVector || NumVecs == 1) {
2375     SmallVector<SDValue, 6> Ops;
2376     Ops.push_back(MemAddr);
2377     Ops.push_back(Align);
2378     unsigned Opc = is64BitVector ? DOpcodes[OpcodeIndex] :
2379                                    QOpcodes0[OpcodeIndex];
2380     if (isUpdating) {
2381       // fixed-stride update instructions don't have an explicit writeback
2382       // operand. It's implicit in the opcode itself.
2383       SDValue Inc = N->getOperand(2);
2384       bool IsImmUpdate =
2385           isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
2386       if (NumVecs <= 2 && !IsImmUpdate)
2387         Opc = getVLDSTRegisterUpdateOpcode(Opc);
2388       if (!IsImmUpdate)
2389         Ops.push_back(Inc);
2390       // FIXME: VLD3 and VLD4 haven't been updated to that form yet.
2391       else if (NumVecs > 2)
2392         Ops.push_back(Reg0);
2393     }
2394     Ops.push_back(Pred);
2395     Ops.push_back(Reg0);
2396     Ops.push_back(Chain);
2397     VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2398   } else if (NumVecs == 2) {
2399     const SDValue OpsA[] = { MemAddr, Align, Pred, Reg0, Chain };
2400     SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex],
2401                                           dl, ResTys, OpsA);
2402
2403     Chain = SDValue(VLdA, 1);
2404     const SDValue OpsB[] = { MemAddr, Align, Pred, Reg0, Chain };
2405     VLdDup = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, OpsB);
2406   } else {
2407     SDValue ImplDef =
2408       SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
2409     const SDValue OpsA[] = { MemAddr, Align, ImplDef, Pred, Reg0, Chain };
2410     SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex],
2411                                           dl, ResTys, OpsA);
2412
2413     SDValue SuperReg = SDValue(VLdA, 0);
2414     Chain = SDValue(VLdA, 1);
2415     const SDValue OpsB[] = { MemAddr, Align, SuperReg, Pred, Reg0, Chain };
2416     VLdDup = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, OpsB);
2417   }
2418
2419   // Transfer memoperands.
2420   MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2421   CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdDup), {MemOp});
2422
2423   // Extract the subregisters.
2424   if (NumVecs == 1) {
2425     ReplaceUses(SDValue(N, 0), SDValue(VLdDup, 0));
2426   } else {
2427     SDValue SuperReg = SDValue(VLdDup, 0);
2428     static_assert(ARM::dsub_7 == ARM::dsub_0 + 7, "Unexpected subreg numbering");
2429     unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
2430     for (unsigned Vec = 0; Vec != NumVecs; ++Vec) {
2431       ReplaceUses(SDValue(N, Vec),
2432                   CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg));
2433     }
2434   }
2435   ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1));
2436   if (isUpdating)
2437     ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2));
2438   CurDAG->RemoveDeadNode(N);
2439 }
2440
2441 bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) {
2442   if (!Subtarget->hasV6T2Ops())
2443     return false;
2444
2445   unsigned Opc = isSigned
2446     ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)
2447     : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX);
2448   SDLoc dl(N);
2449
2450   // For unsigned extracts, check for a shift right and mask
2451   unsigned And_imm = 0;
2452   if (N->getOpcode() == ISD::AND) {
2453     if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) {
2454
2455       // The immediate is a mask of the low bits iff imm & (imm+1) == 0
2456       if (And_imm & (And_imm + 1))
2457         return false;
2458
2459       unsigned Srl_imm = 0;
2460       if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL,
2461                                 Srl_imm)) {
2462         assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2463
2464         // Mask off the unnecessary bits of the AND immediate; normally
2465         // DAGCombine will do this, but that might not happen if
2466         // targetShrinkDemandedConstant chooses a different immediate.
2467         And_imm &= -1U >> Srl_imm;
2468
2469         // Note: The width operand is encoded as width-1.
2470         unsigned Width = countTrailingOnes(And_imm) - 1;
2471         unsigned LSB = Srl_imm;
2472
2473         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2474
2475         if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) {
2476           // It's cheaper to use a right shift to extract the top bits.
2477           if (Subtarget->isThumb()) {
2478             Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri;
2479             SDValue Ops[] = { N->getOperand(0).getOperand(0),
2480                               CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2481                               getAL(CurDAG, dl), Reg0, Reg0 };
2482             CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2483             return true;
2484           }
2485
2486           // ARM models shift instructions as MOVsi with shifter operand.
2487           ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(ISD::SRL);
2488           SDValue ShOpc =
2489             CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB), dl,
2490                                       MVT::i32);
2491           SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc,
2492                             getAL(CurDAG, dl), Reg0, Reg0 };
2493           CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops);
2494           return true;
2495         }
2496
2497         assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
2498         SDValue Ops[] = { N->getOperand(0).getOperand(0),
2499                           CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2500                           CurDAG->getTargetConstant(Width, dl, MVT::i32),
2501                           getAL(CurDAG, dl), Reg0 };
2502         CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2503         return true;
2504       }
2505     }
2506     return false;
2507   }
2508
2509   // Otherwise, we're looking for a shift of a shift
2510   unsigned Shl_imm = 0;
2511   if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) {
2512     assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!");
2513     unsigned Srl_imm = 0;
2514     if (isInt32Immediate(N->getOperand(1), Srl_imm)) {
2515       assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2516       // Note: The width operand is encoded as width-1.
2517       unsigned Width = 32 - Srl_imm - 1;
2518       int LSB = Srl_imm - Shl_imm;
2519       if (LSB < 0)
2520         return false;
2521       SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2522       assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
2523       SDValue Ops[] = { N->getOperand(0).getOperand(0),
2524                         CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2525                         CurDAG->getTargetConstant(Width, dl, MVT::i32),
2526                         getAL(CurDAG, dl), Reg0 };
2527       CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2528       return true;
2529     }
2530   }
2531
2532   // Or we are looking for a shift of an and, with a mask operand
2533   if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_imm) &&
2534       isShiftedMask_32(And_imm)) {
2535     unsigned Srl_imm = 0;
2536     unsigned LSB = countTrailingZeros(And_imm);
2537     // Shift must be the same as the ands lsb
2538     if (isInt32Immediate(N->getOperand(1), Srl_imm) && Srl_imm == LSB) {
2539       assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2540       unsigned MSB = 31 - countLeadingZeros(And_imm);
2541       // Note: The width operand is encoded as width-1.
2542       unsigned Width = MSB - LSB;
2543       SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2544       assert(Srl_imm + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
2545       SDValue Ops[] = { N->getOperand(0).getOperand(0),
2546                         CurDAG->getTargetConstant(Srl_imm, dl, MVT::i32),
2547                         CurDAG->getTargetConstant(Width, dl, MVT::i32),
2548                         getAL(CurDAG, dl), Reg0 };
2549       CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2550       return true;
2551     }
2552   }
2553
2554   if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) {
2555     unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2556     unsigned LSB = 0;
2557     if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, LSB) &&
2558         !isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRA, LSB))
2559       return false;
2560
2561     if (LSB + Width > 32)
2562       return false;
2563
2564     SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2565     assert(LSB + Width <= 32 && "Shouldn't create an invalid ubfx");
2566     SDValue Ops[] = { N->getOperand(0).getOperand(0),
2567                       CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2568                       CurDAG->getTargetConstant(Width - 1, dl, MVT::i32),
2569                       getAL(CurDAG, dl), Reg0 };
2570     CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2571     return true;
2572   }
2573
2574   return false;
2575 }
2576
2577 /// Target-specific DAG combining for ISD::XOR.
2578 /// Target-independent combining lowers SELECT_CC nodes of the form
2579 /// select_cc setg[ge] X,  0,  X, -X
2580 /// select_cc setgt    X, -1,  X, -X
2581 /// select_cc setl[te] X,  0, -X,  X
2582 /// select_cc setlt    X,  1, -X,  X
2583 /// which represent Integer ABS into:
2584 /// Y = sra (X, size(X)-1); xor (add (X, Y), Y)
2585 /// ARM instruction selection detects the latter and matches it to
2586 /// ARM::ABS or ARM::t2ABS machine node.
2587 bool ARMDAGToDAGISel::tryABSOp(SDNode *N){
2588   SDValue XORSrc0 = N->getOperand(0);
2589   SDValue XORSrc1 = N->getOperand(1);
2590   EVT VT = N->getValueType(0);
2591
2592   if (Subtarget->isThumb1Only())
2593     return false;
2594
2595   if (XORSrc0.getOpcode() != ISD::ADD || XORSrc1.getOpcode() != ISD::SRA)
2596     return false;
2597
2598   SDValue ADDSrc0 = XORSrc0.getOperand(0);
2599   SDValue ADDSrc1 = XORSrc0.getOperand(1);
2600   SDValue SRASrc0 = XORSrc1.getOperand(0);
2601   SDValue SRASrc1 = XORSrc1.getOperand(1);
2602   ConstantSDNode *SRAConstant =  dyn_cast<ConstantSDNode>(SRASrc1);
2603   EVT XType = SRASrc0.getValueType();
2604   unsigned Size = XType.getSizeInBits() - 1;
2605
2606   if (ADDSrc1 == XORSrc1 && ADDSrc0 == SRASrc0 &&
2607       XType.isInteger() && SRAConstant != nullptr &&
2608       Size == SRAConstant->getZExtValue()) {
2609     unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS;
2610     CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0);
2611     return true;
2612   }
2613
2614   return false;
2615 }
2616
2617 /// We've got special pseudo-instructions for these
2618 void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
2619   unsigned Opcode;
2620   EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
2621   if (MemTy == MVT::i8)
2622     Opcode = ARM::CMP_SWAP_8;
2623   else if (MemTy == MVT::i16)
2624     Opcode = ARM::CMP_SWAP_16;
2625   else if (MemTy == MVT::i32)
2626     Opcode = ARM::CMP_SWAP_32;
2627   else
2628     llvm_unreachable("Unknown AtomicCmpSwap type");
2629
2630   SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
2631                    N->getOperand(0)};
2632   SDNode *CmpSwap = CurDAG->getMachineNode(
2633       Opcode, SDLoc(N),
2634       CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other), Ops);
2635
2636   MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
2637   CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
2638
2639   ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
2640   ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
2641   CurDAG->RemoveDeadNode(N);
2642 }
2643
2644 static Optional<std::pair<unsigned, unsigned>>
2645 getContiguousRangeOfSetBits(const APInt &A) {
2646   unsigned FirstOne = A.getBitWidth() - A.countLeadingZeros() - 1;
2647   unsigned LastOne = A.countTrailingZeros();
2648   if (A.countPopulation() != (FirstOne - LastOne + 1))
2649     return Optional<std::pair<unsigned,unsigned>>();
2650   return std::make_pair(FirstOne, LastOne);
2651 }
2652
2653 void ARMDAGToDAGISel::SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI) {
2654   assert(N->getOpcode() == ARMISD::CMPZ);
2655   SwitchEQNEToPLMI = false;
2656
2657   if (!Subtarget->isThumb())
2658     // FIXME: Work out whether it is profitable to do this in A32 mode - LSL and
2659     // LSR don't exist as standalone instructions - they need the barrel shifter.
2660     return;
2661
2662   // select (cmpz (and X, C), #0) -> (LSLS X) or (LSRS X) or (LSRS (LSLS X))
2663   SDValue And = N->getOperand(0);
2664   if (!And->hasOneUse())
2665     return;
2666
2667   SDValue Zero = N->getOperand(1);
2668   if (!isa<ConstantSDNode>(Zero) || !cast<ConstantSDNode>(Zero)->isNullValue() ||
2669       And->getOpcode() != ISD::AND)
2670     return;
2671   SDValue X = And.getOperand(0);
2672   auto C = dyn_cast<ConstantSDNode>(And.getOperand(1));
2673
2674   if (!C)
2675     return;
2676   auto Range = getContiguousRangeOfSetBits(C->getAPIntValue());
2677   if (!Range)
2678     return;
2679
2680   // There are several ways to lower this:
2681   SDNode *NewN;
2682   SDLoc dl(N);
2683
2684   auto EmitShift = [&](unsigned Opc, SDValue Src, unsigned Imm) -> SDNode* {
2685     if (Subtarget->isThumb2()) {
2686       Opc = (Opc == ARM::tLSLri) ? ARM::t2LSLri : ARM::t2LSRri;
2687       SDValue Ops[] = { Src, CurDAG->getTargetConstant(Imm, dl, MVT::i32),
2688                         getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
2689                         CurDAG->getRegister(0, MVT::i32) };
2690       return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
2691     } else {
2692       SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), Src,
2693                        CurDAG->getTargetConstant(Imm, dl, MVT::i32),
2694                        getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
2695       return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
2696     }
2697   };
2698
2699   if (Range->second == 0) {
2700     //  1. Mask includes the LSB -> Simply shift the top N bits off
2701     NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
2702     ReplaceNode(And.getNode(), NewN);
2703   } else if (Range->first == 31) {
2704     //  2. Mask includes the MSB -> Simply shift the bottom N bits off
2705     NewN = EmitShift(ARM::tLSRri, X, Range->second);
2706     ReplaceNode(And.getNode(), NewN);
2707   } else if (Range->first == Range->second) {
2708     //  3. Only one bit is set. We can shift this into the sign bit and use a
2709     //     PL/MI comparison.
2710     NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
2711     ReplaceNode(And.getNode(), NewN);
2712
2713     SwitchEQNEToPLMI = true;
2714   } else if (!Subtarget->hasV6T2Ops()) {
2715     //  4. Do a double shift to clear bottom and top bits, but only in
2716     //     thumb-1 mode as in thumb-2 we can use UBFX.
2717     NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
2718     NewN = EmitShift(ARM::tLSRri, SDValue(NewN, 0),
2719                      Range->second + (31 - Range->first));
2720     ReplaceNode(And.getNode(), NewN);
2721   }
2722
2723 }
2724
2725 void ARMDAGToDAGISel::Select(SDNode *N) {
2726   SDLoc dl(N);
2727
2728   if (N->isMachineOpcode()) {
2729     N->setNodeId(-1);
2730     return;   // Already selected.
2731   }
2732
2733   switch (N->getOpcode()) {
2734   default: break;
2735   case ISD::STORE: {
2736     // For Thumb1, match an sp-relative store in C++. This is a little
2737     // unfortunate, but I don't think I can make the chain check work
2738     // otherwise.  (The chain of the store has to be the same as the chain
2739     // of the CopyFromReg, or else we can't replace the CopyFromReg with
2740     // a direct reference to "SP".)
2741     //
2742     // This is only necessary on Thumb1 because Thumb1 sp-relative stores use
2743     // a different addressing mode from other four-byte stores.
2744     //
2745     // This pattern usually comes up with call arguments.
2746     StoreSDNode *ST = cast<StoreSDNode>(N);
2747     SDValue Ptr = ST->getBasePtr();
2748     if (Subtarget->isThumb1Only() && ST->isUnindexed()) {
2749       int RHSC = 0;
2750       if (Ptr.getOpcode() == ISD::ADD &&
2751           isScaledConstantInRange(Ptr.getOperand(1), /*Scale=*/4, 0, 256, RHSC))
2752         Ptr = Ptr.getOperand(0);
2753
2754       if (Ptr.getOpcode() == ISD::CopyFromReg &&
2755           cast<RegisterSDNode>(Ptr.getOperand(1))->getReg() == ARM::SP &&
2756           Ptr.getOperand(0) == ST->getChain()) {
2757         SDValue Ops[] = {ST->getValue(),
2758                          CurDAG->getRegister(ARM::SP, MVT::i32),
2759                          CurDAG->getTargetConstant(RHSC, dl, MVT::i32),
2760                          getAL(CurDAG, dl),
2761                          CurDAG->getRegister(0, MVT::i32),
2762                          ST->getChain()};
2763         MachineSDNode *ResNode =
2764             CurDAG->getMachineNode(ARM::tSTRspi, dl, MVT::Other, Ops);
2765         MachineMemOperand *MemOp = ST->getMemOperand();
2766         CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp});
2767         ReplaceNode(N, ResNode);
2768         return;
2769       }
2770     }
2771     break;
2772   }
2773   case ISD::WRITE_REGISTER:
2774     if (tryWriteRegister(N))
2775       return;
2776     break;
2777   case ISD::READ_REGISTER:
2778     if (tryReadRegister(N))
2779       return;
2780     break;
2781   case ISD::INLINEASM:
2782   case ISD::INLINEASM_BR:
2783     if (tryInlineAsm(N))
2784       return;
2785     break;
2786   case ISD::XOR:
2787     // Select special operations if XOR node forms integer ABS pattern
2788     if (tryABSOp(N))
2789       return;
2790     // Other cases are autogenerated.
2791     break;
2792   case ISD::Constant: {
2793     unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
2794     // If we can't materialize the constant we need to use a literal pool
2795     if (ConstantMaterializationCost(Val) > 2) {
2796       SDValue CPIdx = CurDAG->getTargetConstantPool(
2797           ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val),
2798           TLI->getPointerTy(CurDAG->getDataLayout()));
2799
2800       SDNode *ResNode;
2801       if (Subtarget->isThumb()) {
2802         SDValue Ops[] = {
2803           CPIdx,
2804           getAL(CurDAG, dl),
2805           CurDAG->getRegister(0, MVT::i32),
2806           CurDAG->getEntryNode()
2807         };
2808         ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other,
2809                                          Ops);
2810       } else {
2811         SDValue Ops[] = {
2812           CPIdx,
2813           CurDAG->getTargetConstant(0, dl, MVT::i32),
2814           getAL(CurDAG, dl),
2815           CurDAG->getRegister(0, MVT::i32),
2816           CurDAG->getEntryNode()
2817         };
2818         ResNode = CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other,
2819                                          Ops);
2820       }
2821       // Annotate the Node with memory operand information so that MachineInstr
2822       // queries work properly. This e.g. gives the register allocation the
2823       // required information for rematerialization.
2824       MachineFunction& MF = CurDAG->getMachineFunction();
2825       MachineMemOperand *MemOp =
2826           MF.getMachineMemOperand(MachinePointerInfo::getConstantPool(MF),
2827                                   MachineMemOperand::MOLoad, 4, 4);
2828
2829       CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp});
2830
2831       ReplaceNode(N, ResNode);
2832       return;
2833     }
2834
2835     // Other cases are autogenerated.
2836     break;
2837   }
2838   case ISD::FrameIndex: {
2839     // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
2840     int FI = cast<FrameIndexSDNode>(N)->getIndex();
2841     SDValue TFI = CurDAG->getTargetFrameIndex(
2842         FI, TLI->getPointerTy(CurDAG->getDataLayout()));
2843     if (Subtarget->isThumb1Only()) {
2844       // Set the alignment of the frame object to 4, to avoid having to generate
2845       // more than one ADD
2846       MachineFrameInfo &MFI = MF->getFrameInfo();
2847       if (MFI.getObjectAlignment(FI) < 4)
2848         MFI.setObjectAlignment(FI, 4);
2849       CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI,
2850                            CurDAG->getTargetConstant(0, dl, MVT::i32));
2851       return;
2852     } else {
2853       unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ?
2854                       ARM::t2ADDri : ARM::ADDri);
2855       SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, dl, MVT::i32),
2856                         getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
2857                         CurDAG->getRegister(0, MVT::i32) };
2858       CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2859       return;
2860     }
2861   }
2862   case ISD::SRL:
2863     if (tryV6T2BitfieldExtractOp(N, false))
2864       return;
2865     break;
2866   case ISD::SIGN_EXTEND_INREG:
2867   case ISD::SRA:
2868     if (tryV6T2BitfieldExtractOp(N, true))
2869       return;
2870     break;
2871   case ISD::MUL:
2872     if (Subtarget->isThumb1Only())
2873       break;
2874     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
2875       unsigned RHSV = C->getZExtValue();
2876       if (!RHSV) break;
2877       if (isPowerOf2_32(RHSV-1)) {  // 2^n+1?
2878         unsigned ShImm = Log2_32(RHSV-1);
2879         if (ShImm >= 32)
2880           break;
2881         SDValue V = N->getOperand(0);
2882         ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
2883         SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
2884         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2885         if (Subtarget->isThumb()) {
2886           SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
2887           CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops);
2888           return;
2889         } else {
2890           SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
2891                             Reg0 };
2892           CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops);
2893           return;
2894         }
2895       }
2896       if (isPowerOf2_32(RHSV+1)) {  // 2^n-1?
2897         unsigned ShImm = Log2_32(RHSV+1);
2898         if (ShImm >= 32)
2899           break;
2900         SDValue V = N->getOperand(0);
2901         ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
2902         SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
2903         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2904         if (Subtarget->isThumb()) {
2905           SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
2906           CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops);
2907           return;
2908         } else {
2909           SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
2910                             Reg0 };
2911           CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops);
2912           return;
2913         }
2914       }
2915     }
2916     break;
2917   case ISD::AND: {
2918     // Check for unsigned bitfield extract
2919     if (tryV6T2BitfieldExtractOp(N, false))
2920       return;
2921
2922     // If an immediate is used in an AND node, it is possible that the immediate
2923     // can be more optimally materialized when negated. If this is the case we
2924     // can negate the immediate and use a BIC instead.
2925     auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
2926     if (N1C && N1C->hasOneUse() && Subtarget->isThumb()) {
2927       uint32_t Imm = (uint32_t) N1C->getZExtValue();
2928
2929       // In Thumb2 mode, an AND can take a 12-bit immediate. If this
2930       // immediate can be negated and fit in the immediate operand of
2931       // a t2BIC, don't do any manual transform here as this can be
2932       // handled by the generic ISel machinery.
2933       bool PreferImmediateEncoding =
2934         Subtarget->hasThumb2() && (is_t2_so_imm(Imm) || is_t2_so_imm_not(Imm));
2935       if (!PreferImmediateEncoding &&
2936           ConstantMaterializationCost(Imm) >
2937               ConstantMaterializationCost(~Imm)) {
2938         // The current immediate costs more to materialize than a negated
2939         // immediate, so negate the immediate and use a BIC.
2940         SDValue NewImm =
2941           CurDAG->getConstant(~N1C->getZExtValue(), dl, MVT::i32);
2942         // If the new constant didn't exist before, reposition it in the topological
2943         // ordering so it is just before N. Otherwise, don't touch its location.
2944         if (NewImm->getNodeId() == -1)
2945           CurDAG->RepositionNode(N->getIterator(), NewImm.getNode());
2946
2947         if (!Subtarget->hasThumb2()) {
2948           SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32),
2949                            N->getOperand(0), NewImm, getAL(CurDAG, dl),
2950                            CurDAG->getRegister(0, MVT::i32)};
2951           ReplaceNode(N, CurDAG->getMachineNode(ARM::tBIC, dl, MVT::i32, Ops));
2952           return;
2953         } else {
2954           SDValue Ops[] = {N->getOperand(0), NewImm, getAL(CurDAG, dl),
2955                            CurDAG->getRegister(0, MVT::i32),
2956                            CurDAG->getRegister(0, MVT::i32)};
2957           ReplaceNode(N,
2958                       CurDAG->getMachineNode(ARM::t2BICrr, dl, MVT::i32, Ops));
2959           return;
2960         }
2961       }
2962     }
2963
2964     // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits
2965     // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits
2966     // are entirely contributed by c2 and lower 16-bits are entirely contributed
2967     // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)).
2968     // Select it to: "movt x, ((c1 & 0xffff) >> 16)
2969     EVT VT = N->getValueType(0);
2970     if (VT != MVT::i32)
2971       break;
2972     unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2())
2973       ? ARM::t2MOVTi16
2974       : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0);
2975     if (!Opc)
2976       break;
2977     SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
2978     N1C = dyn_cast<ConstantSDNode>(N1);
2979     if (!N1C)
2980       break;
2981     if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) {
2982       SDValue N2 = N0.getOperand(1);
2983       ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
2984       if (!N2C)
2985         break;
2986       unsigned N1CVal = N1C->getZExtValue();
2987       unsigned N2CVal = N2C->getZExtValue();
2988       if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) &&
2989           (N1CVal & 0xffffU) == 0xffffU &&
2990           (N2CVal & 0xffffU) == 0x0U) {
2991         SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16,
2992                                                   dl, MVT::i32);
2993         SDValue Ops[] = { N0.getOperand(0), Imm16,
2994                           getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
2995         ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
2996         return;
2997       }
2998     }
2999
3000     break;
3001   }
3002   case ARMISD::UMAAL: {
3003     unsigned Opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL;
3004     SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
3005                       N->getOperand(2), N->getOperand(3),
3006                       getAL(CurDAG, dl),
3007                       CurDAG->getRegister(0, MVT::i32) };
3008     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::i32, Ops));
3009     return;
3010   }
3011   case ARMISD::UMLAL:{
3012     if (Subtarget->isThumb()) {
3013       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3014                         N->getOperand(3), getAL(CurDAG, dl),
3015                         CurDAG->getRegister(0, MVT::i32)};
3016       ReplaceNode(
3017           N, CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops));
3018       return;
3019     }else{
3020       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3021                         N->getOperand(3), getAL(CurDAG, dl),
3022                         CurDAG->getRegister(0, MVT::i32),
3023                         CurDAG->getRegister(0, MVT::i32) };
3024       ReplaceNode(N, CurDAG->getMachineNode(
3025                          Subtarget->hasV6Ops() ? ARM::UMLAL : ARM::UMLALv5, dl,
3026                          MVT::i32, MVT::i32, Ops));
3027       return;
3028     }
3029   }
3030   case ARMISD::SMLAL:{
3031     if (Subtarget->isThumb()) {
3032       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3033                         N->getOperand(3), getAL(CurDAG, dl),
3034                         CurDAG->getRegister(0, MVT::i32)};
3035       ReplaceNode(
3036           N, CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops));
3037       return;
3038     }else{
3039       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3040                         N->getOperand(3), getAL(CurDAG, dl),
3041                         CurDAG->getRegister(0, MVT::i32),
3042                         CurDAG->getRegister(0, MVT::i32) };
3043       ReplaceNode(N, CurDAG->getMachineNode(
3044                          Subtarget->hasV6Ops() ? ARM::SMLAL : ARM::SMLALv5, dl,
3045                          MVT::i32, MVT::i32, Ops));
3046       return;
3047     }
3048   }
3049   case ARMISD::SUBE: {
3050     if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
3051       break;
3052     // Look for a pattern to match SMMLS
3053     // (sube a, (smul_loHi a, b), (subc 0, (smul_LOhi(a, b))))
3054     if (N->getOperand(1).getOpcode() != ISD::SMUL_LOHI ||
3055         N->getOperand(2).getOpcode() != ARMISD::SUBC ||
3056         !SDValue(N, 1).use_empty())
3057       break;
3058
3059     if (Subtarget->isThumb())
3060       assert(Subtarget->hasThumb2() &&
3061              "This pattern should not be generated for Thumb");
3062
3063     SDValue SmulLoHi = N->getOperand(1);
3064     SDValue Subc = N->getOperand(2);
3065     auto *Zero = dyn_cast<ConstantSDNode>(Subc.getOperand(0));
3066
3067     if (!Zero || Zero->getZExtValue() != 0 ||
3068         Subc.getOperand(1) != SmulLoHi.getValue(0) ||
3069         N->getOperand(1) != SmulLoHi.getValue(1) ||
3070         N->getOperand(2) != Subc.getValue(1))
3071       break;
3072
3073     unsigned Opc = Subtarget->isThumb2() ? ARM::t2SMMLS : ARM::SMMLS;
3074     SDValue Ops[] = { SmulLoHi.getOperand(0), SmulLoHi.getOperand(1),
3075                       N->getOperand(0), getAL(CurDAG, dl),
3076                       CurDAG->getRegister(0, MVT::i32) };
3077     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops));
3078     return;
3079   }
3080   case ISD::LOAD: {
3081     if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N))
3082       return;
3083     if (Subtarget->isThumb() && Subtarget->hasThumb2()) {
3084       if (tryT2IndexedLoad(N))
3085         return;
3086     } else if (Subtarget->isThumb()) {
3087       if (tryT1IndexedLoad(N))
3088         return;
3089     } else if (tryARMIndexedLoad(N))
3090       return;
3091     // Other cases are autogenerated.
3092     break;
3093   }
3094   case ARMISD::WLS:
3095   case ARMISD::LE: {
3096     SDValue Ops[] = { N->getOperand(1),
3097                       N->getOperand(2),
3098                       N->getOperand(0) };
3099     unsigned Opc = N->getOpcode() == ARMISD::WLS ?
3100       ARM::t2WhileLoopStart : ARM::t2LoopEnd;
3101     SDNode *New = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
3102     ReplaceUses(N, New);
3103     CurDAG->RemoveDeadNode(N);
3104     return;
3105   }
3106   case ARMISD::LOOP_DEC: {
3107     SDValue Ops[] = { N->getOperand(1),
3108                       N->getOperand(2),
3109                       N->getOperand(0) };
3110     SDNode *Dec =
3111       CurDAG->getMachineNode(ARM::t2LoopDec, dl,
3112                              CurDAG->getVTList(MVT::i32, MVT::Other), Ops);
3113     ReplaceUses(N, Dec);
3114     CurDAG->RemoveDeadNode(N);
3115     return;
3116   }
3117   case ARMISD::BRCOND: {
3118     // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
3119     // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)
3120     // Pattern complexity = 6  cost = 1  size = 0
3121
3122     // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
3123     // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc)
3124     // Pattern complexity = 6  cost = 1  size = 0
3125
3126     // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
3127     // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc)
3128     // Pattern complexity = 6  cost = 1  size = 0
3129
3130     unsigned Opc = Subtarget->isThumb() ?
3131       ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc;
3132     SDValue Chain = N->getOperand(0);
3133     SDValue N1 = N->getOperand(1);
3134     SDValue N2 = N->getOperand(2);
3135     SDValue N3 = N->getOperand(3);
3136     SDValue InFlag = N->getOperand(4);
3137     assert(N1.getOpcode() == ISD::BasicBlock);
3138     assert(N2.getOpcode() == ISD::Constant);
3139     assert(N3.getOpcode() == ISD::Register);
3140
3141     unsigned CC = (unsigned) cast<ConstantSDNode>(N2)->getZExtValue();
3142
3143     if (InFlag.getOpcode() == ARMISD::CMPZ) {
3144       if (InFlag.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN) {
3145         SDValue Int = InFlag.getOperand(0);
3146         uint64_t ID = cast<ConstantSDNode>(Int->getOperand(1))->getZExtValue();
3147
3148         // Handle low-overhead loops.
3149         if (ID == Intrinsic::loop_decrement_reg) {
3150           SDValue Elements = Int.getOperand(2);
3151           SDValue Size = CurDAG->getTargetConstant(
3152             cast<ConstantSDNode>(Int.getOperand(3))->getZExtValue(), dl,
3153                                  MVT::i32);
3154
3155           SDValue Args[] = { Elements, Size, Int.getOperand(0) };
3156           SDNode *LoopDec =
3157             CurDAG->getMachineNode(ARM::t2LoopDec, dl,
3158                                    CurDAG->getVTList(MVT::i32, MVT::Other),
3159                                    Args);
3160           ReplaceUses(Int.getNode(), LoopDec);
3161
3162           SDValue EndArgs[] = { SDValue(LoopDec, 0), N1, Chain };
3163           SDNode *LoopEnd =
3164             CurDAG->getMachineNode(ARM::t2LoopEnd, dl, MVT::Other, EndArgs);
3165
3166           ReplaceUses(N, LoopEnd);
3167           CurDAG->RemoveDeadNode(N);
3168           CurDAG->RemoveDeadNode(InFlag.getNode());
3169           CurDAG->RemoveDeadNode(Int.getNode());
3170           return;
3171         }
3172       }
3173
3174       bool SwitchEQNEToPLMI;
3175       SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI);
3176       InFlag = N->getOperand(4);
3177
3178       if (SwitchEQNEToPLMI) {
3179         switch ((ARMCC::CondCodes)CC) {
3180         default: llvm_unreachable("CMPZ must be either NE or EQ!");
3181         case ARMCC::NE:
3182           CC = (unsigned)ARMCC::MI;
3183           break;
3184         case ARMCC::EQ:
3185           CC = (unsigned)ARMCC::PL;
3186           break;
3187         }
3188       }
3189     }
3190
3191     SDValue Tmp2 = CurDAG->getTargetConstant(CC, dl, MVT::i32);
3192     SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag };
3193     SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
3194                                              MVT::Glue, Ops);
3195     Chain = SDValue(ResNode, 0);
3196     if (N->getNumValues() == 2) {
3197       InFlag = SDValue(ResNode, 1);
3198       ReplaceUses(SDValue(N, 1), InFlag);
3199     }
3200     ReplaceUses(SDValue(N, 0),
3201                 SDValue(Chain.getNode(), Chain.getResNo()));
3202     CurDAG->RemoveDeadNode(N);
3203     return;
3204   }
3205
3206   case ARMISD::CMPZ: {
3207     // select (CMPZ X, #-C) -> (CMPZ (ADDS X, #C), #0)
3208     //   This allows us to avoid materializing the expensive negative constant.
3209     //   The CMPZ #0 is useless and will be peepholed away but we need to keep it
3210     //   for its glue output.
3211     SDValue X = N->getOperand(0);
3212     auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1).getNode());
3213     if (C && C->getSExtValue() < 0 && Subtarget->isThumb()) {
3214       int64_t Addend = -C->getSExtValue();
3215
3216       SDNode *Add = nullptr;
3217       // ADDS can be better than CMN if the immediate fits in a
3218       // 16-bit ADDS, which means either [0,256) for tADDi8 or [0,8) for tADDi3.
3219       // Outside that range we can just use a CMN which is 32-bit but has a
3220       // 12-bit immediate range.
3221       if (Addend < 1<<8) {
3222         if (Subtarget->isThumb2()) {
3223           SDValue Ops[] = { X, CurDAG->getTargetConstant(Addend, dl, MVT::i32),
3224                             getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
3225                             CurDAG->getRegister(0, MVT::i32) };
3226           Add = CurDAG->getMachineNode(ARM::t2ADDri, dl, MVT::i32, Ops);
3227         } else {
3228           unsigned Opc = (Addend < 1<<3) ? ARM::tADDi3 : ARM::tADDi8;
3229           SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X,
3230                            CurDAG->getTargetConstant(Addend, dl, MVT::i32),
3231                            getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
3232           Add = CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
3233         }
3234       }
3235       if (Add) {
3236         SDValue Ops2[] = {SDValue(Add, 0), CurDAG->getConstant(0, dl, MVT::i32)};
3237         CurDAG->MorphNodeTo(N, ARMISD::CMPZ, CurDAG->getVTList(MVT::Glue), Ops2);
3238       }
3239     }
3240     // Other cases are autogenerated.
3241     break;
3242   }
3243
3244   case ARMISD::CMOV: {
3245     SDValue InFlag = N->getOperand(4);
3246
3247     if (InFlag.getOpcode() == ARMISD::CMPZ) {
3248       bool SwitchEQNEToPLMI;
3249       SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI);
3250
3251       if (SwitchEQNEToPLMI) {
3252         SDValue ARMcc = N->getOperand(2);
3253         ARMCC::CondCodes CC =
3254           (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue();
3255
3256         switch (CC) {
3257         default: llvm_unreachable("CMPZ must be either NE or EQ!");
3258         case ARMCC::NE:
3259           CC = ARMCC::MI;
3260           break;
3261         case ARMCC::EQ:
3262           CC = ARMCC::PL;
3263           break;
3264         }
3265         SDValue NewARMcc = CurDAG->getConstant((unsigned)CC, dl, MVT::i32);
3266         SDValue Ops[] = {N->getOperand(0), N->getOperand(1), NewARMcc,
3267                          N->getOperand(3), N->getOperand(4)};
3268         CurDAG->MorphNodeTo(N, ARMISD::CMOV, N->getVTList(), Ops);
3269       }
3270
3271     }
3272     // Other cases are autogenerated.
3273     break;
3274   }
3275
3276   case ARMISD::VZIP: {
3277     unsigned Opc = 0;
3278     EVT VT = N->getValueType(0);
3279     switch (VT.getSimpleVT().SimpleTy) {
3280     default: return;
3281     case MVT::v8i8:  Opc = ARM::VZIPd8; break;
3282     case MVT::v4f16:
3283     case MVT::v4i16: Opc = ARM::VZIPd16; break;
3284     case MVT::v2f32:
3285     // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
3286     case MVT::v2i32: Opc = ARM::VTRNd32; break;
3287     case MVT::v16i8: Opc = ARM::VZIPq8; break;
3288     case MVT::v8f16:
3289     case MVT::v8i16: Opc = ARM::VZIPq16; break;
3290     case MVT::v4f32:
3291     case MVT::v4i32: Opc = ARM::VZIPq32; break;
3292     }
3293     SDValue Pred = getAL(CurDAG, dl);
3294     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3295     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3296     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3297     return;
3298   }
3299   case ARMISD::VUZP: {
3300     unsigned Opc = 0;
3301     EVT VT = N->getValueType(0);
3302     switch (VT.getSimpleVT().SimpleTy) {
3303     default: return;
3304     case MVT::v8i8:  Opc = ARM::VUZPd8; break;
3305     case MVT::v4f16:
3306     case MVT::v4i16: Opc = ARM::VUZPd16; break;
3307     case MVT::v2f32:
3308     // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
3309     case MVT::v2i32: Opc = ARM::VTRNd32; break;
3310     case MVT::v16i8: Opc = ARM::VUZPq8; break;
3311     case MVT::v8f16:
3312     case MVT::v8i16: Opc = ARM::VUZPq16; break;
3313     case MVT::v4f32:
3314     case MVT::v4i32: Opc = ARM::VUZPq32; break;
3315     }
3316     SDValue Pred = getAL(CurDAG, dl);
3317     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3318     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3319     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3320     return;
3321   }
3322   case ARMISD::VTRN: {
3323     unsigned Opc = 0;
3324     EVT VT = N->getValueType(0);
3325     switch (VT.getSimpleVT().SimpleTy) {
3326     default: return;
3327     case MVT::v8i8:  Opc = ARM::VTRNd8; break;
3328     case MVT::v4f16:
3329     case MVT::v4i16: Opc = ARM::VTRNd16; break;
3330     case MVT::v2f32:
3331     case MVT::v2i32: Opc = ARM::VTRNd32; break;
3332     case MVT::v16i8: Opc = ARM::VTRNq8; break;
3333     case MVT::v8f16:
3334     case MVT::v8i16: Opc = ARM::VTRNq16; break;
3335     case MVT::v4f32:
3336     case MVT::v4i32: Opc = ARM::VTRNq32; break;
3337     }
3338     SDValue Pred = getAL(CurDAG, dl);
3339     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3340     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3341     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3342     return;
3343   }
3344   case ARMISD::BUILD_VECTOR: {
3345     EVT VecVT = N->getValueType(0);
3346     EVT EltVT = VecVT.getVectorElementType();
3347     unsigned NumElts = VecVT.getVectorNumElements();
3348     if (EltVT == MVT::f64) {
3349       assert(NumElts == 2 && "unexpected type for BUILD_VECTOR");
3350       ReplaceNode(
3351           N, createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
3352       return;
3353     }
3354     assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR");
3355     if (NumElts == 2) {
3356       ReplaceNode(
3357           N, createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
3358       return;
3359     }
3360     assert(NumElts == 4 && "unexpected type for BUILD_VECTOR");
3361     ReplaceNode(N,
3362                 createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1),
3363                                     N->getOperand(2), N->getOperand(3)));
3364     return;
3365   }
3366
3367   case ARMISD::VLD1DUP: {
3368     static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8, ARM::VLD1DUPd16,
3369                                          ARM::VLD1DUPd32 };
3370     static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8, ARM::VLD1DUPq16,
3371                                          ARM::VLD1DUPq32 };
3372     SelectVLDDup(N, /* IsIntrinsic= */ false, false, 1, DOpcodes, QOpcodes);
3373     return;
3374   }
3375
3376   case ARMISD::VLD2DUP: {
3377     static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
3378                                         ARM::VLD2DUPd32 };
3379     SelectVLDDup(N, /* IsIntrinsic= */ false, false, 2, Opcodes);
3380     return;
3381   }
3382
3383   case ARMISD::VLD3DUP: {
3384     static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo,
3385                                         ARM::VLD3DUPd16Pseudo,
3386                                         ARM::VLD3DUPd32Pseudo };
3387     SelectVLDDup(N, /* IsIntrinsic= */ false, false, 3, Opcodes);
3388     return;
3389   }
3390
3391   case ARMISD::VLD4DUP: {
3392     static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo,
3393                                         ARM::VLD4DUPd16Pseudo,
3394                                         ARM::VLD4DUPd32Pseudo };
3395     SelectVLDDup(N, /* IsIntrinsic= */ false, false, 4, Opcodes);
3396     return;
3397   }
3398
3399   case ARMISD::VLD1DUP_UPD: {
3400     static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8wb_fixed,
3401                                          ARM::VLD1DUPd16wb_fixed,
3402                                          ARM::VLD1DUPd32wb_fixed };
3403     static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8wb_fixed,
3404                                          ARM::VLD1DUPq16wb_fixed,
3405                                          ARM::VLD1DUPq32wb_fixed };
3406     SelectVLDDup(N, /* IsIntrinsic= */ false, true, 1, DOpcodes, QOpcodes);
3407     return;
3408   }
3409
3410   case ARMISD::VLD2DUP_UPD: {
3411     static const uint16_t Opcodes[] = { ARM::VLD2DUPd8wb_fixed,
3412                                         ARM::VLD2DUPd16wb_fixed,
3413                                         ARM::VLD2DUPd32wb_fixed };
3414     SelectVLDDup(N, /* IsIntrinsic= */ false, true, 2, Opcodes);
3415     return;
3416   }
3417
3418   case ARMISD::VLD3DUP_UPD: {
3419     static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo_UPD,
3420                                         ARM::VLD3DUPd16Pseudo_UPD,
3421                                         ARM::VLD3DUPd32Pseudo_UPD };
3422     SelectVLDDup(N, /* IsIntrinsic= */ false, true, 3, Opcodes);
3423     return;
3424   }
3425
3426   case ARMISD::VLD4DUP_UPD: {
3427     static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo_UPD,
3428                                         ARM::VLD4DUPd16Pseudo_UPD,
3429                                         ARM::VLD4DUPd32Pseudo_UPD };
3430     SelectVLDDup(N, /* IsIntrinsic= */ false, true, 4, Opcodes);
3431     return;
3432   }
3433
3434   case ARMISD::VLD1_UPD: {
3435     static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed,
3436                                          ARM::VLD1d16wb_fixed,
3437                                          ARM::VLD1d32wb_fixed,
3438                                          ARM::VLD1d64wb_fixed };
3439     static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed,
3440                                          ARM::VLD1q16wb_fixed,
3441                                          ARM::VLD1q32wb_fixed,
3442                                          ARM::VLD1q64wb_fixed };
3443     SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr);
3444     return;
3445   }
3446
3447   case ARMISD::VLD2_UPD: {
3448     static const uint16_t DOpcodes[] = { ARM::VLD2d8wb_fixed,
3449                                          ARM::VLD2d16wb_fixed,
3450                                          ARM::VLD2d32wb_fixed,
3451                                          ARM::VLD1q64wb_fixed};
3452     static const uint16_t QOpcodes[] = { ARM::VLD2q8PseudoWB_fixed,
3453                                          ARM::VLD2q16PseudoWB_fixed,
3454                                          ARM::VLD2q32PseudoWB_fixed };
3455     SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr);
3456     return;
3457   }
3458
3459   case ARMISD::VLD3_UPD: {
3460     static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD,
3461                                          ARM::VLD3d16Pseudo_UPD,
3462                                          ARM::VLD3d32Pseudo_UPD,
3463                                          ARM::VLD1d64TPseudoWB_fixed};
3464     static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
3465                                           ARM::VLD3q16Pseudo_UPD,
3466                                           ARM::VLD3q32Pseudo_UPD };
3467     static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
3468                                           ARM::VLD3q16oddPseudo_UPD,
3469                                           ARM::VLD3q32oddPseudo_UPD };
3470     SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
3471     return;
3472   }
3473
3474   case ARMISD::VLD4_UPD: {
3475     static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo_UPD,
3476                                          ARM::VLD4d16Pseudo_UPD,
3477                                          ARM::VLD4d32Pseudo_UPD,
3478                                          ARM::VLD1d64QPseudoWB_fixed};
3479     static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
3480                                           ARM::VLD4q16Pseudo_UPD,
3481                                           ARM::VLD4q32Pseudo_UPD };
3482     static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo_UPD,
3483                                           ARM::VLD4q16oddPseudo_UPD,
3484                                           ARM::VLD4q32oddPseudo_UPD };
3485     SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
3486     return;
3487   }
3488
3489   case ARMISD::VLD2LN_UPD: {
3490     static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD,
3491                                          ARM::VLD2LNd16Pseudo_UPD,
3492                                          ARM::VLD2LNd32Pseudo_UPD };
3493     static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD,
3494                                          ARM::VLD2LNq32Pseudo_UPD };
3495     SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes);
3496     return;
3497   }
3498
3499   case ARMISD::VLD3LN_UPD: {
3500     static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD,
3501                                          ARM::VLD3LNd16Pseudo_UPD,
3502                                          ARM::VLD3LNd32Pseudo_UPD };
3503     static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD,
3504                                          ARM::VLD3LNq32Pseudo_UPD };
3505     SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes);
3506     return;
3507   }
3508
3509   case ARMISD::VLD4LN_UPD: {
3510     static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD,
3511                                          ARM::VLD4LNd16Pseudo_UPD,
3512                                          ARM::VLD4LNd32Pseudo_UPD };
3513     static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD,
3514                                          ARM::VLD4LNq32Pseudo_UPD };
3515     SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes);
3516     return;
3517   }
3518
3519   case ARMISD::VST1_UPD: {
3520     static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed,
3521                                          ARM::VST1d16wb_fixed,
3522                                          ARM::VST1d32wb_fixed,
3523                                          ARM::VST1d64wb_fixed };
3524     static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed,
3525                                          ARM::VST1q16wb_fixed,
3526                                          ARM::VST1q32wb_fixed,
3527                                          ARM::VST1q64wb_fixed };
3528     SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr);
3529     return;
3530   }
3531
3532   case ARMISD::VST2_UPD: {
3533     static const uint16_t DOpcodes[] = { ARM::VST2d8wb_fixed,
3534                                          ARM::VST2d16wb_fixed,
3535                                          ARM::VST2d32wb_fixed,
3536                                          ARM::VST1q64wb_fixed};
3537     static const uint16_t QOpcodes[] = { ARM::VST2q8PseudoWB_fixed,
3538                                          ARM::VST2q16PseudoWB_fixed,
3539                                          ARM::VST2q32PseudoWB_fixed };
3540     SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr);
3541     return;
3542   }
3543
3544   case ARMISD::VST3_UPD: {
3545     static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD,
3546                                          ARM::VST3d16Pseudo_UPD,
3547                                          ARM::VST3d32Pseudo_UPD,
3548                                          ARM::VST1d64TPseudoWB_fixed};
3549     static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
3550                                           ARM::VST3q16Pseudo_UPD,
3551                                           ARM::VST3q32Pseudo_UPD };
3552     static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
3553                                           ARM::VST3q16oddPseudo_UPD,
3554                                           ARM::VST3q32oddPseudo_UPD };
3555     SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
3556     return;
3557   }
3558
3559   case ARMISD::VST4_UPD: {
3560     static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo_UPD,
3561                                          ARM::VST4d16Pseudo_UPD,
3562                                          ARM::VST4d32Pseudo_UPD,
3563                                          ARM::VST1d64QPseudoWB_fixed};
3564     static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
3565                                           ARM::VST4q16Pseudo_UPD,
3566                                           ARM::VST4q32Pseudo_UPD };
3567     static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD,
3568                                           ARM::VST4q16oddPseudo_UPD,
3569                                           ARM::VST4q32oddPseudo_UPD };
3570     SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
3571     return;
3572   }
3573
3574   case ARMISD::VST2LN_UPD: {
3575     static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD,
3576                                          ARM::VST2LNd16Pseudo_UPD,
3577                                          ARM::VST2LNd32Pseudo_UPD };
3578     static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD,
3579                                          ARM::VST2LNq32Pseudo_UPD };
3580     SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes);
3581     return;
3582   }
3583
3584   case ARMISD::VST3LN_UPD: {
3585     static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD,
3586                                          ARM::VST3LNd16Pseudo_UPD,
3587                                          ARM::VST3LNd32Pseudo_UPD };
3588     static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD,
3589                                          ARM::VST3LNq32Pseudo_UPD };
3590     SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes);
3591     return;
3592   }
3593
3594   case ARMISD::VST4LN_UPD: {
3595     static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD,
3596                                          ARM::VST4LNd16Pseudo_UPD,
3597                                          ARM::VST4LNd32Pseudo_UPD };
3598     static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD,
3599                                          ARM::VST4LNq32Pseudo_UPD };
3600     SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes);
3601     return;
3602   }
3603
3604   case ISD::INTRINSIC_VOID:
3605   case ISD::INTRINSIC_W_CHAIN: {
3606     unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
3607     switch (IntNo) {
3608     default:
3609       break;
3610
3611     case Intrinsic::arm_mrrc:
3612     case Intrinsic::arm_mrrc2: {
3613       SDLoc dl(N);
3614       SDValue Chain = N->getOperand(0);
3615       unsigned Opc;
3616
3617       if (Subtarget->isThumb())
3618         Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::t2MRRC : ARM::t2MRRC2);
3619       else
3620         Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::MRRC : ARM::MRRC2);
3621
3622       SmallVector<SDValue, 5> Ops;
3623       Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(), dl)); /* coproc */
3624       Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(), dl)); /* opc */
3625       Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(4))->getZExtValue(), dl)); /* CRm */
3626
3627       // The mrrc2 instruction in ARM doesn't allow predicates, the top 4 bits of the encoded
3628       // instruction will always be '1111' but it is possible in assembly language to specify
3629       // AL as a predicate to mrrc2 but it doesn't make any difference to the encoded instruction.
3630       if (Opc != ARM::MRRC2) {
3631         Ops.push_back(getAL(CurDAG, dl));
3632         Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3633       }
3634
3635       Ops.push_back(Chain);
3636
3637       // Writes to two registers.
3638       const EVT RetType[] = {MVT::i32, MVT::i32, MVT::Other};
3639
3640       ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, RetType, Ops));
3641       return;
3642     }
3643     case Intrinsic::arm_ldaexd:
3644     case Intrinsic::arm_ldrexd: {
3645       SDLoc dl(N);
3646       SDValue Chain = N->getOperand(0);
3647       SDValue MemAddr = N->getOperand(2);
3648       bool isThumb = Subtarget->isThumb() && Subtarget->hasV8MBaselineOps();
3649
3650       bool IsAcquire = IntNo == Intrinsic::arm_ldaexd;
3651       unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD)
3652                                 : (IsAcquire ? ARM::LDAEXD : ARM::LDREXD);
3653
3654       // arm_ldrexd returns a i64 value in {i32, i32}
3655       std::vector<EVT> ResTys;
3656       if (isThumb) {
3657         ResTys.push_back(MVT::i32);
3658         ResTys.push_back(MVT::i32);
3659       } else
3660         ResTys.push_back(MVT::Untyped);
3661       ResTys.push_back(MVT::Other);
3662
3663       // Place arguments in the right order.
3664       SDValue Ops[] = {MemAddr, getAL(CurDAG, dl),
3665                        CurDAG->getRegister(0, MVT::i32), Chain};
3666       SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
3667       // Transfer memoperands.
3668       MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3669       CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
3670
3671       // Remap uses.
3672       SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1);
3673       if (!SDValue(N, 0).use_empty()) {
3674         SDValue Result;
3675         if (isThumb)
3676           Result = SDValue(Ld, 0);
3677         else {
3678           SDValue SubRegIdx =
3679             CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
3680           SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
3681               dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
3682           Result = SDValue(ResNode,0);
3683         }
3684         ReplaceUses(SDValue(N, 0), Result);
3685       }
3686       if (!SDValue(N, 1).use_empty()) {
3687         SDValue Result;
3688         if (isThumb)
3689           Result = SDValue(Ld, 1);
3690         else {
3691           SDValue SubRegIdx =
3692             CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
3693           SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
3694               dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
3695           Result = SDValue(ResNode,0);
3696         }
3697         ReplaceUses(SDValue(N, 1), Result);
3698       }
3699       ReplaceUses(SDValue(N, 2), OutChain);
3700       CurDAG->RemoveDeadNode(N);
3701       return;
3702     }
3703     case Intrinsic::arm_stlexd:
3704     case Intrinsic::arm_strexd: {
3705       SDLoc dl(N);
3706       SDValue Chain = N->getOperand(0);
3707       SDValue Val0 = N->getOperand(2);
3708       SDValue Val1 = N->getOperand(3);
3709       SDValue MemAddr = N->getOperand(4);
3710
3711       // Store exclusive double return a i32 value which is the return status
3712       // of the issued store.
3713       const EVT ResTys[] = {MVT::i32, MVT::Other};
3714
3715       bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
3716       // Place arguments in the right order.
3717       SmallVector<SDValue, 7> Ops;
3718       if (isThumb) {
3719         Ops.push_back(Val0);
3720         Ops.push_back(Val1);
3721       } else
3722         // arm_strexd uses GPRPair.
3723         Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0));
3724       Ops.push_back(MemAddr);
3725       Ops.push_back(getAL(CurDAG, dl));
3726       Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3727       Ops.push_back(Chain);
3728
3729       bool IsRelease = IntNo == Intrinsic::arm_stlexd;
3730       unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD)
3731                                 : (IsRelease ? ARM::STLEXD : ARM::STREXD);
3732
3733       SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
3734       // Transfer memoperands.
3735       MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3736       CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
3737
3738       ReplaceNode(N, St);
3739       return;
3740     }
3741
3742     case Intrinsic::arm_neon_vld1: {
3743       static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
3744                                            ARM::VLD1d32, ARM::VLD1d64 };
3745       static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
3746                                            ARM::VLD1q32, ARM::VLD1q64};
3747       SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr);
3748       return;
3749     }
3750
3751     case Intrinsic::arm_neon_vld1x2: {
3752       static const uint16_t DOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
3753                                            ARM::VLD1q32, ARM::VLD1q64 };
3754       static const uint16_t QOpcodes[] = { ARM::VLD1d8QPseudo,
3755                                            ARM::VLD1d16QPseudo,
3756                                            ARM::VLD1d32QPseudo,
3757                                            ARM::VLD1d64QPseudo };
3758       SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
3759       return;
3760     }
3761
3762     case Intrinsic::arm_neon_vld1x3: {
3763       static const uint16_t DOpcodes[] = { ARM::VLD1d8TPseudo,
3764                                            ARM::VLD1d16TPseudo,
3765                                            ARM::VLD1d32TPseudo,
3766                                            ARM::VLD1d64TPseudo };
3767       static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowTPseudo_UPD,
3768                                             ARM::VLD1q16LowTPseudo_UPD,
3769                                             ARM::VLD1q32LowTPseudo_UPD,
3770                                             ARM::VLD1q64LowTPseudo_UPD };
3771       static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighTPseudo,
3772                                             ARM::VLD1q16HighTPseudo,
3773                                             ARM::VLD1q32HighTPseudo,
3774                                             ARM::VLD1q64HighTPseudo };
3775       SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3776       return;
3777     }
3778
3779     case Intrinsic::arm_neon_vld1x4: {
3780       static const uint16_t DOpcodes[] = { ARM::VLD1d8QPseudo,
3781                                            ARM::VLD1d16QPseudo,
3782                                            ARM::VLD1d32QPseudo,
3783                                            ARM::VLD1d64QPseudo };
3784       static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowQPseudo_UPD,
3785                                             ARM::VLD1q16LowQPseudo_UPD,
3786                                             ARM::VLD1q32LowQPseudo_UPD,
3787                                             ARM::VLD1q64LowQPseudo_UPD };
3788       static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighQPseudo,
3789                                             ARM::VLD1q16HighQPseudo,
3790                                             ARM::VLD1q32HighQPseudo,
3791                                             ARM::VLD1q64HighQPseudo };
3792       SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3793       return;
3794     }
3795
3796     case Intrinsic::arm_neon_vld2: {
3797       static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
3798                                            ARM::VLD2d32, ARM::VLD1q64 };
3799       static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
3800                                            ARM::VLD2q32Pseudo };
3801       SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
3802       return;
3803     }
3804
3805     case Intrinsic::arm_neon_vld3: {
3806       static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo,
3807                                            ARM::VLD3d16Pseudo,
3808                                            ARM::VLD3d32Pseudo,
3809                                            ARM::VLD1d64TPseudo };
3810       static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
3811                                             ARM::VLD3q16Pseudo_UPD,
3812                                             ARM::VLD3q32Pseudo_UPD };
3813       static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo,
3814                                             ARM::VLD3q16oddPseudo,
3815                                             ARM::VLD3q32oddPseudo };
3816       SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3817       return;
3818     }
3819
3820     case Intrinsic::arm_neon_vld4: {
3821       static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo,
3822                                            ARM::VLD4d16Pseudo,
3823                                            ARM::VLD4d32Pseudo,
3824                                            ARM::VLD1d64QPseudo };
3825       static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
3826                                             ARM::VLD4q16Pseudo_UPD,
3827                                             ARM::VLD4q32Pseudo_UPD };
3828       static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo,
3829                                             ARM::VLD4q16oddPseudo,
3830                                             ARM::VLD4q32oddPseudo };
3831       SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3832       return;
3833     }
3834
3835     case Intrinsic::arm_neon_vld2dup: {
3836       static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
3837                                            ARM::VLD2DUPd32, ARM::VLD1q64 };
3838       static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo,
3839                                             ARM::VLD2DUPq16EvenPseudo,
3840                                             ARM::VLD2DUPq32EvenPseudo };
3841       static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudo,
3842                                             ARM::VLD2DUPq16OddPseudo,
3843                                             ARM::VLD2DUPq32OddPseudo };
3844       SelectVLDDup(N, /* IsIntrinsic= */ true, false, 2,
3845                    DOpcodes, QOpcodes0, QOpcodes1);
3846       return;
3847     }
3848
3849     case Intrinsic::arm_neon_vld3dup: {
3850       static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo,
3851                                            ARM::VLD3DUPd16Pseudo,
3852                                            ARM::VLD3DUPd32Pseudo,
3853                                            ARM::VLD1d64TPseudo };
3854       static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo,
3855                                             ARM::VLD3DUPq16EvenPseudo,
3856                                             ARM::VLD3DUPq32EvenPseudo };
3857       static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo,
3858                                             ARM::VLD3DUPq16OddPseudo,
3859                                             ARM::VLD3DUPq32OddPseudo };
3860       SelectVLDDup(N, /* IsIntrinsic= */ true, false, 3,
3861                    DOpcodes, QOpcodes0, QOpcodes1);
3862       return;
3863     }
3864
3865     case Intrinsic::arm_neon_vld4dup: {
3866       static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo,
3867                                            ARM::VLD4DUPd16Pseudo,
3868                                            ARM::VLD4DUPd32Pseudo,
3869                                            ARM::VLD1d64QPseudo };
3870       static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo,
3871                                             ARM::VLD4DUPq16EvenPseudo,
3872                                             ARM::VLD4DUPq32EvenPseudo };
3873       static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo,
3874                                             ARM::VLD4DUPq16OddPseudo,
3875                                             ARM::VLD4DUPq32OddPseudo };
3876       SelectVLDDup(N, /* IsIntrinsic= */ true, false, 4,
3877                    DOpcodes, QOpcodes0, QOpcodes1);
3878       return;
3879     }
3880
3881     case Intrinsic::arm_neon_vld2lane: {
3882       static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo,
3883                                            ARM::VLD2LNd16Pseudo,
3884                                            ARM::VLD2LNd32Pseudo };
3885       static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo,
3886                                            ARM::VLD2LNq32Pseudo };
3887       SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes);
3888       return;
3889     }
3890
3891     case Intrinsic::arm_neon_vld3lane: {
3892       static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo,
3893                                            ARM::VLD3LNd16Pseudo,
3894                                            ARM::VLD3LNd32Pseudo };
3895       static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo,
3896                                            ARM::VLD3LNq32Pseudo };
3897       SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes);
3898       return;
3899     }
3900
3901     case Intrinsic::arm_neon_vld4lane: {
3902       static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo,
3903                                            ARM::VLD4LNd16Pseudo,
3904                                            ARM::VLD4LNd32Pseudo };
3905       static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo,
3906                                            ARM::VLD4LNq32Pseudo };
3907       SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes);
3908       return;
3909     }
3910
3911     case Intrinsic::arm_neon_vst1: {
3912       static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
3913                                            ARM::VST1d32, ARM::VST1d64 };
3914       static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
3915                                            ARM::VST1q32, ARM::VST1q64 };
3916       SelectVST(N, false, 1, DOpcodes, QOpcodes, nullptr);
3917       return;
3918     }
3919
3920     case Intrinsic::arm_neon_vst1x2: {
3921       static const uint16_t DOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
3922                                            ARM::VST1q32, ARM::VST1q64 };
3923       static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudo,
3924                                            ARM::VST1d16QPseudo,
3925                                            ARM::VST1d32QPseudo,
3926                                            ARM::VST1d64QPseudo };
3927       SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
3928       return;
3929     }
3930
3931     case Intrinsic::arm_neon_vst1x3: {
3932       static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudo,
3933                                            ARM::VST1d16TPseudo,
3934                                            ARM::VST1d32TPseudo,
3935                                            ARM::VST1d64TPseudo };
3936       static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD,
3937                                             ARM::VST1q16LowTPseudo_UPD,
3938                                             ARM::VST1q32LowTPseudo_UPD,
3939                                             ARM::VST1q64LowTPseudo_UPD };
3940       static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo,
3941                                             ARM::VST1q16HighTPseudo,
3942                                             ARM::VST1q32HighTPseudo,
3943                                             ARM::VST1q64HighTPseudo };
3944       SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3945       return;
3946     }
3947
3948     case Intrinsic::arm_neon_vst1x4: {
3949       static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudo,
3950                                            ARM::VST1d16QPseudo,
3951                                            ARM::VST1d32QPseudo,
3952                                            ARM::VST1d64QPseudo };
3953       static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD,
3954                                             ARM::VST1q16LowQPseudo_UPD,
3955                                             ARM::VST1q32LowQPseudo_UPD,
3956                                             ARM::VST1q64LowQPseudo_UPD };
3957       static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo,
3958                                             ARM::VST1q16HighQPseudo,
3959                                             ARM::VST1q32HighQPseudo,
3960                                             ARM::VST1q64HighQPseudo };
3961       SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3962       return;
3963     }
3964
3965     case Intrinsic::arm_neon_vst2: {
3966       static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
3967                                            ARM::VST2d32, ARM::VST1q64 };
3968       static const uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
3969                                            ARM::VST2q32Pseudo };
3970       SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
3971       return;
3972     }
3973
3974     case Intrinsic::arm_neon_vst3: {
3975       static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo,
3976                                            ARM::VST3d16Pseudo,
3977                                            ARM::VST3d32Pseudo,
3978                                            ARM::VST1d64TPseudo };
3979       static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
3980                                             ARM::VST3q16Pseudo_UPD,
3981                                             ARM::VST3q32Pseudo_UPD };
3982       static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo,
3983                                             ARM::VST3q16oddPseudo,
3984                                             ARM::VST3q32oddPseudo };
3985       SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3986       return;
3987     }
3988
3989     case Intrinsic::arm_neon_vst4: {
3990       static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo,
3991                                            ARM::VST4d16Pseudo,
3992                                            ARM::VST4d32Pseudo,
3993                                            ARM::VST1d64QPseudo };
3994       static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
3995                                             ARM::VST4q16Pseudo_UPD,
3996                                             ARM::VST4q32Pseudo_UPD };
3997       static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo,
3998                                             ARM::VST4q16oddPseudo,
3999                                             ARM::VST4q32oddPseudo };
4000       SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
4001       return;
4002     }
4003
4004     case Intrinsic::arm_neon_vst2lane: {
4005       static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo,
4006                                            ARM::VST2LNd16Pseudo,
4007                                            ARM::VST2LNd32Pseudo };
4008       static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo,
4009                                            ARM::VST2LNq32Pseudo };
4010       SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes);
4011       return;
4012     }
4013
4014     case Intrinsic::arm_neon_vst3lane: {
4015       static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo,
4016                                            ARM::VST3LNd16Pseudo,
4017                                            ARM::VST3LNd32Pseudo };
4018       static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo,
4019                                            ARM::VST3LNq32Pseudo };
4020       SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes);
4021       return;
4022     }
4023
4024     case Intrinsic::arm_neon_vst4lane: {
4025       static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo,
4026                                            ARM::VST4LNd16Pseudo,
4027                                            ARM::VST4LNd32Pseudo };
4028       static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo,
4029                                            ARM::VST4LNq32Pseudo };
4030       SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes);
4031       return;
4032     }
4033     }
4034     break;
4035   }
4036
4037   case ISD::ATOMIC_CMP_SWAP:
4038     SelectCMP_SWAP(N);
4039     return;
4040   }
4041
4042   SelectCode(N);
4043 }
4044
4045 // Inspect a register string of the form
4046 // cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or
4047 // cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string
4048 // and obtain the integer operands from them, adding these operands to the
4049 // provided vector.
4050 static void getIntOperandsFromRegisterString(StringRef RegString,
4051                                              SelectionDAG *CurDAG,
4052                                              const SDLoc &DL,
4053                                              std::vector<SDValue> &Ops) {
4054   SmallVector<StringRef, 5> Fields;
4055   RegString.split(Fields, ':');
4056
4057   if (Fields.size() > 1) {
4058     bool AllIntFields = true;
4059
4060     for (StringRef Field : Fields) {
4061       // Need to trim out leading 'cp' characters and get the integer field.
4062       unsigned IntField;
4063       AllIntFields &= !Field.trim("CPcp").getAsInteger(10, IntField);
4064       Ops.push_back(CurDAG->getTargetConstant(IntField, DL, MVT::i32));
4065     }
4066
4067     assert(AllIntFields &&
4068             "Unexpected non-integer value in special register string.");
4069   }
4070 }
4071
4072 // Maps a Banked Register string to its mask value. The mask value returned is
4073 // for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register
4074 // mask operand, which expresses which register is to be used, e.g. r8, and in
4075 // which mode it is to be used, e.g. usr. Returns -1 to signify that the string
4076 // was invalid.
4077 static inline int getBankedRegisterMask(StringRef RegString) {
4078   auto TheReg = ARMBankedReg::lookupBankedRegByName(RegString.lower());
4079   if (!TheReg)
4080      return -1;
4081   return TheReg->Encoding;
4082 }
4083
4084 // The flags here are common to those allowed for apsr in the A class cores and
4085 // those allowed for the special registers in the M class cores. Returns a
4086 // value representing which flags were present, -1 if invalid.
4087 static inline int getMClassFlagsMask(StringRef Flags) {
4088   return StringSwitch<int>(Flags)
4089           .Case("", 0x2) // no flags means nzcvq for psr registers, and 0x2 is
4090                          // correct when flags are not permitted
4091           .Case("g", 0x1)
4092           .Case("nzcvq", 0x2)
4093           .Case("nzcvqg", 0x3)
4094           .Default(-1);
4095 }
4096
4097 // Maps MClass special registers string to its value for use in the
4098 // t2MRS_M/t2MSR_M instruction nodes as the SYSm value operand.
4099 // Returns -1 to signify that the string was invalid.
4100 static int getMClassRegisterMask(StringRef Reg, const ARMSubtarget *Subtarget) {
4101   auto TheReg = ARMSysReg::lookupMClassSysRegByName(Reg);
4102   const FeatureBitset &FeatureBits = Subtarget->getFeatureBits();
4103   if (!TheReg || !TheReg->hasRequiredFeatures(FeatureBits))
4104     return -1;
4105   return (int)(TheReg->Encoding & 0xFFF); // SYSm value
4106 }
4107
4108 static int getARClassRegisterMask(StringRef Reg, StringRef Flags) {
4109   // The mask operand contains the special register (R Bit) in bit 4, whether
4110   // the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and
4111   // bits 3-0 contains the fields to be accessed in the special register, set by
4112   // the flags provided with the register.
4113   int Mask = 0;
4114   if (Reg == "apsr") {
4115     // The flags permitted for apsr are the same flags that are allowed in
4116     // M class registers. We get the flag value and then shift the flags into
4117     // the correct place to combine with the mask.
4118     Mask = getMClassFlagsMask(Flags);
4119     if (Mask == -1)
4120       return -1;
4121     return Mask << 2;
4122   }
4123
4124   if (Reg != "cpsr" && Reg != "spsr") {
4125     return -1;
4126   }
4127
4128   // This is the same as if the flags were "fc"
4129   if (Flags.empty() || Flags == "all")
4130     return Mask | 0x9;
4131
4132   // Inspect the supplied flags string and set the bits in the mask for
4133   // the relevant and valid flags allowed for cpsr and spsr.
4134   for (char Flag : Flags) {
4135     int FlagVal;
4136     switch (Flag) {
4137       case 'c':
4138         FlagVal = 0x1;
4139         break;
4140       case 'x':
4141         FlagVal = 0x2;
4142         break;
4143       case 's':
4144         FlagVal = 0x4;
4145         break;
4146       case 'f':
4147         FlagVal = 0x8;
4148         break;
4149       default:
4150         FlagVal = 0;
4151     }
4152
4153     // This avoids allowing strings where the same flag bit appears twice.
4154     if (!FlagVal || (Mask & FlagVal))
4155       return -1;
4156     Mask |= FlagVal;
4157   }
4158
4159   // If the register is spsr then we need to set the R bit.
4160   if (Reg == "spsr")
4161     Mask |= 0x10;
4162
4163   return Mask;
4164 }
4165
4166 // Lower the read_register intrinsic to ARM specific DAG nodes
4167 // using the supplied metadata string to select the instruction node to use
4168 // and the registers/masks to construct as operands for the node.
4169 bool ARMDAGToDAGISel::tryReadRegister(SDNode *N){
4170   const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
4171   const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
4172   bool IsThumb2 = Subtarget->isThumb2();
4173   SDLoc DL(N);
4174
4175   std::vector<SDValue> Ops;
4176   getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
4177
4178   if (!Ops.empty()) {
4179     // If the special register string was constructed of fields (as defined
4180     // in the ACLE) then need to lower to MRC node (32 bit) or
4181     // MRRC node(64 bit), we can make the distinction based on the number of
4182     // operands we have.
4183     unsigned Opcode;
4184     SmallVector<EVT, 3> ResTypes;
4185     if (Ops.size() == 5){
4186       Opcode = IsThumb2 ? ARM::t2MRC : ARM::MRC;
4187       ResTypes.append({ MVT::i32, MVT::Other });
4188     } else {
4189       assert(Ops.size() == 3 &&
4190               "Invalid number of fields in special register string.");
4191       Opcode = IsThumb2 ? ARM::t2MRRC : ARM::MRRC;
4192       ResTypes.append({ MVT::i32, MVT::i32, MVT::Other });
4193     }
4194
4195     Ops.push_back(getAL(CurDAG, DL));
4196     Ops.push_back(CurDAG->getRegister(0, MVT::i32));
4197     Ops.push_back(N->getOperand(0));
4198     ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, ResTypes, Ops));
4199     return true;
4200   }
4201
4202   std::string SpecialReg = RegString->getString().lower();
4203
4204   int BankedReg = getBankedRegisterMask(SpecialReg);
4205   if (BankedReg != -1) {
4206     Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32),
4207             getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4208             N->getOperand(0) };
4209     ReplaceNode(
4210         N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked,
4211                                   DL, MVT::i32, MVT::Other, Ops));
4212     return true;
4213   }
4214
4215   // The VFP registers are read by creating SelectionDAG nodes with opcodes
4216   // corresponding to the register that is being read from. So we switch on the
4217   // string to find which opcode we need to use.
4218   unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
4219                     .Case("fpscr", ARM::VMRS)
4220                     .Case("fpexc", ARM::VMRS_FPEXC)
4221                     .Case("fpsid", ARM::VMRS_FPSID)
4222                     .Case("mvfr0", ARM::VMRS_MVFR0)
4223                     .Case("mvfr1", ARM::VMRS_MVFR1)
4224                     .Case("mvfr2", ARM::VMRS_MVFR2)
4225                     .Case("fpinst", ARM::VMRS_FPINST)
4226                     .Case("fpinst2", ARM::VMRS_FPINST2)
4227                     .Default(0);
4228
4229   // If an opcode was found then we can lower the read to a VFP instruction.
4230   if (Opcode) {
4231     if (!Subtarget->hasVFP2Base())
4232       return false;
4233     if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8Base())
4234       return false;
4235
4236     Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4237             N->getOperand(0) };
4238     ReplaceNode(N,
4239                 CurDAG->getMachineNode(Opcode, DL, MVT::i32, MVT::Other, Ops));
4240     return true;
4241   }
4242
4243   // If the target is M Class then need to validate that the register string
4244   // is an acceptable value, so check that a mask can be constructed from the
4245   // string.
4246   if (Subtarget->isMClass()) {
4247     int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget);
4248     if (SYSmValue == -1)
4249       return false;
4250
4251     SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
4252                       getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4253                       N->getOperand(0) };
4254     ReplaceNode(
4255         N, CurDAG->getMachineNode(ARM::t2MRS_M, DL, MVT::i32, MVT::Other, Ops));
4256     return true;
4257   }
4258
4259   // Here we know the target is not M Class so we need to check if it is one
4260   // of the remaining possible values which are apsr, cpsr or spsr.
4261   if (SpecialReg == "apsr" || SpecialReg == "cpsr") {
4262     Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4263             N->getOperand(0) };
4264     ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRS_AR : ARM::MRS,
4265                                           DL, MVT::i32, MVT::Other, Ops));
4266     return true;
4267   }
4268
4269   if (SpecialReg == "spsr") {
4270     Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4271             N->getOperand(0) };
4272     ReplaceNode(
4273         N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys, DL,
4274                                   MVT::i32, MVT::Other, Ops));
4275     return true;
4276   }
4277
4278   return false;
4279 }
4280
4281 // Lower the write_register intrinsic to ARM specific DAG nodes
4282 // using the supplied metadata string to select the instruction node to use
4283 // and the registers/masks to use in the nodes
4284 bool ARMDAGToDAGISel::tryWriteRegister(SDNode *N){
4285   const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
4286   const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
4287   bool IsThumb2 = Subtarget->isThumb2();
4288   SDLoc DL(N);
4289
4290   std::vector<SDValue> Ops;
4291   getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
4292
4293   if (!Ops.empty()) {
4294     // If the special register string was constructed of fields (as defined
4295     // in the ACLE) then need to lower to MCR node (32 bit) or
4296     // MCRR node(64 bit), we can make the distinction based on the number of
4297     // operands we have.
4298     unsigned Opcode;
4299     if (Ops.size() == 5) {
4300       Opcode = IsThumb2 ? ARM::t2MCR : ARM::MCR;
4301       Ops.insert(Ops.begin()+2, N->getOperand(2));
4302     } else {
4303       assert(Ops.size() == 3 &&
4304               "Invalid number of fields in special register string.");
4305       Opcode = IsThumb2 ? ARM::t2MCRR : ARM::MCRR;
4306       SDValue WriteValue[] = { N->getOperand(2), N->getOperand(3) };
4307       Ops.insert(Ops.begin()+2, WriteValue, WriteValue+2);
4308     }
4309
4310     Ops.push_back(getAL(CurDAG, DL));
4311     Ops.push_back(CurDAG->getRegister(0, MVT::i32));
4312     Ops.push_back(N->getOperand(0));
4313
4314     ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
4315     return true;
4316   }
4317
4318   std::string SpecialReg = RegString->getString().lower();
4319   int BankedReg = getBankedRegisterMask(SpecialReg);
4320   if (BankedReg != -1) {
4321     Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), N->getOperand(2),
4322             getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4323             N->getOperand(0) };
4324     ReplaceNode(
4325         N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked,
4326                                   DL, MVT::Other, Ops));
4327     return true;
4328   }
4329
4330   // The VFP registers are written to by creating SelectionDAG nodes with
4331   // opcodes corresponding to the register that is being written. So we switch
4332   // on the string to find which opcode we need to use.
4333   unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
4334                     .Case("fpscr", ARM::VMSR)
4335                     .Case("fpexc", ARM::VMSR_FPEXC)
4336                     .Case("fpsid", ARM::VMSR_FPSID)
4337                     .Case("fpinst", ARM::VMSR_FPINST)
4338                     .Case("fpinst2", ARM::VMSR_FPINST2)
4339                     .Default(0);
4340
4341   if (Opcode) {
4342     if (!Subtarget->hasVFP2Base())
4343       return false;
4344     Ops = { N->getOperand(2), getAL(CurDAG, DL),
4345             CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
4346     ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
4347     return true;
4348   }
4349
4350   std::pair<StringRef, StringRef> Fields;
4351   Fields = StringRef(SpecialReg).rsplit('_');
4352   std::string Reg = Fields.first.str();
4353   StringRef Flags = Fields.second;
4354
4355   // If the target was M Class then need to validate the special register value
4356   // and retrieve the mask for use in the instruction node.
4357   if (Subtarget->isMClass()) {
4358     int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget);
4359     if (SYSmValue == -1)
4360       return false;
4361
4362     SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
4363                       N->getOperand(2), getAL(CurDAG, DL),
4364                       CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
4365     ReplaceNode(N, CurDAG->getMachineNode(ARM::t2MSR_M, DL, MVT::Other, Ops));
4366     return true;
4367   }
4368
4369   // We then check to see if a valid mask can be constructed for one of the
4370   // register string values permitted for the A and R class cores. These values
4371   // are apsr, spsr and cpsr; these are also valid on older cores.
4372   int Mask = getARClassRegisterMask(Reg, Flags);
4373   if (Mask != -1) {
4374     Ops = { CurDAG->getTargetConstant(Mask, DL, MVT::i32), N->getOperand(2),
4375             getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4376             N->getOperand(0) };
4377     ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSR_AR : ARM::MSR,
4378                                           DL, MVT::Other, Ops));
4379     return true;
4380   }
4381
4382   return false;
4383 }
4384
4385 bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){
4386   std::vector<SDValue> AsmNodeOperands;
4387   unsigned Flag, Kind;
4388   bool Changed = false;
4389   unsigned NumOps = N->getNumOperands();
4390
4391   // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint.
4392   // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require
4393   // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs
4394   // respectively. Since there is no constraint to explicitly specify a
4395   // reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb,
4396   // the 64-bit data may be referred by H, Q, R modifiers, so we still pack
4397   // them into a GPRPair.
4398
4399   SDLoc dl(N);
4400   SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps-1)
4401                                    : SDValue(nullptr,0);
4402
4403   SmallVector<bool, 8> OpChanged;
4404   // Glue node will be appended late.
4405   for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) {
4406     SDValue op = N->getOperand(i);
4407     AsmNodeOperands.push_back(op);
4408
4409     if (i < InlineAsm::Op_FirstOperand)
4410       continue;
4411
4412     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(i))) {
4413       Flag = C->getZExtValue();
4414       Kind = InlineAsm::getKind(Flag);
4415     }
4416     else
4417       continue;
4418
4419     // Immediate operands to inline asm in the SelectionDAG are modeled with
4420     // two operands. The first is a constant of value InlineAsm::Kind_Imm, and
4421     // the second is a constant with the value of the immediate. If we get here
4422     // and we have a Kind_Imm, skip the next operand, and continue.
4423     if (Kind == InlineAsm::Kind_Imm) {
4424       SDValue op = N->getOperand(++i);
4425       AsmNodeOperands.push_back(op);
4426       continue;
4427     }
4428
4429     unsigned NumRegs = InlineAsm::getNumOperandRegisters(Flag);
4430     if (NumRegs)
4431       OpChanged.push_back(false);
4432
4433     unsigned DefIdx = 0;
4434     bool IsTiedToChangedOp = false;
4435     // If it's a use that is tied with a previous def, it has no
4436     // reg class constraint.
4437     if (Changed && InlineAsm::isUseOperandTiedToDef(Flag, DefIdx))
4438       IsTiedToChangedOp = OpChanged[DefIdx];
4439
4440     // Memory operands to inline asm in the SelectionDAG are modeled with two
4441     // operands: a constant of value InlineAsm::Kind_Mem followed by the input
4442     // operand. If we get here and we have a Kind_Mem, skip the next operand (so
4443     // it doesn't get misinterpreted), and continue. We do this here because
4444     // it's important to update the OpChanged array correctly before moving on.
4445     if (Kind == InlineAsm::Kind_Mem) {
4446       SDValue op = N->getOperand(++i);
4447       AsmNodeOperands.push_back(op);
4448       continue;
4449     }
4450
4451     if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef
4452         && Kind != InlineAsm::Kind_RegDefEarlyClobber)
4453       continue;
4454
4455     unsigned RC;
4456     bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC);
4457     if ((!IsTiedToChangedOp && (!HasRC || RC != ARM::GPRRegClassID))
4458         || NumRegs != 2)
4459       continue;
4460
4461     assert((i+2 < NumOps) && "Invalid number of operands in inline asm");
4462     SDValue V0 = N->getOperand(i+1);
4463     SDValue V1 = N->getOperand(i+2);
4464     unsigned Reg0 = cast<RegisterSDNode>(V0)->getReg();
4465     unsigned Reg1 = cast<RegisterSDNode>(V1)->getReg();
4466     SDValue PairedReg;
4467     MachineRegisterInfo &MRI = MF->getRegInfo();
4468
4469     if (Kind == InlineAsm::Kind_RegDef ||
4470         Kind == InlineAsm::Kind_RegDefEarlyClobber) {
4471       // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to
4472       // the original GPRs.
4473
4474       Register GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
4475       PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
4476       SDValue Chain = SDValue(N,0);
4477
4478       SDNode *GU = N->getGluedUser();
4479       SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped,
4480                                                Chain.getValue(1));
4481
4482       // Extract values from a GPRPair reg and copy to the original GPR reg.
4483       SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32,
4484                                                     RegCopy);
4485       SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,
4486                                                     RegCopy);
4487       SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0,
4488                                         RegCopy.getValue(1));
4489       SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1));
4490
4491       // Update the original glue user.
4492       std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1);
4493       Ops.push_back(T1.getValue(1));
4494       CurDAG->UpdateNodeOperands(GU, Ops);
4495     }
4496     else {
4497       // For Kind  == InlineAsm::Kind_RegUse, we first copy two GPRs into a
4498       // GPRPair and then pass the GPRPair to the inline asm.
4499       SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain];
4500
4501       // As REG_SEQ doesn't take RegisterSDNode, we copy them first.
4502       SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32,
4503                                           Chain.getValue(1));
4504       SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32,
4505                                           T0.getValue(1));
4506       SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), 0);
4507
4508       // Copy REG_SEQ into a GPRPair-typed VR and replace the original two
4509       // i32 VRs of inline asm with it.
4510       Register GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
4511       PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
4512       Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1));
4513
4514       AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
4515       Glue = Chain.getValue(1);
4516     }
4517
4518     Changed = true;
4519
4520     if(PairedReg.getNode()) {
4521       OpChanged[OpChanged.size() -1 ] = true;
4522       Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/);
4523       if (IsTiedToChangedOp)
4524         Flag = InlineAsm::getFlagWordForMatchingOp(Flag, DefIdx);
4525       else
4526         Flag = InlineAsm::getFlagWordForRegClass(Flag, ARM::GPRPairRegClassID);
4527       // Replace the current flag.
4528       AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant(
4529           Flag, dl, MVT::i32);
4530       // Add the new register node and skip the original two GPRs.
4531       AsmNodeOperands.push_back(PairedReg);
4532       // Skip the next two GPRs.
4533       i += 2;
4534     }
4535   }
4536
4537   if (Glue.getNode())
4538     AsmNodeOperands.push_back(Glue);
4539   if (!Changed)
4540     return false;
4541
4542   SDValue New = CurDAG->getNode(N->getOpcode(), SDLoc(N),
4543       CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands);
4544   New->setNodeId(-1);
4545   ReplaceNode(N, New.getNode());
4546   return true;
4547 }
4548
4549
4550 bool ARMDAGToDAGISel::
4551 SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
4552                              std::vector<SDValue> &OutOps) {
4553   switch(ConstraintID) {
4554   default:
4555     llvm_unreachable("Unexpected asm memory constraint");
4556   case InlineAsm::Constraint_i:
4557     // FIXME: It seems strange that 'i' is needed here since it's supposed to
4558     //        be an immediate and not a memory constraint.
4559     LLVM_FALLTHROUGH;
4560   case InlineAsm::Constraint_m:
4561   case InlineAsm::Constraint_o:
4562   case InlineAsm::Constraint_Q:
4563   case InlineAsm::Constraint_Um:
4564   case InlineAsm::Constraint_Un:
4565   case InlineAsm::Constraint_Uq:
4566   case InlineAsm::Constraint_Us:
4567   case InlineAsm::Constraint_Ut:
4568   case InlineAsm::Constraint_Uv:
4569   case InlineAsm::Constraint_Uy:
4570     // Require the address to be in a register.  That is safe for all ARM
4571     // variants and it is hard to do anything much smarter without knowing
4572     // how the operand is used.
4573     OutOps.push_back(Op);
4574     return false;
4575   }
4576   return true;
4577 }
4578
4579 /// createARMISelDag - This pass converts a legalized DAG into a
4580 /// ARM-specific DAG, ready for instruction scheduling.
4581 ///
4582 FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM,
4583                                      CodeGenOpt::Level OptLevel) {
4584   return new ARMDAGToDAGISel(TM, OptLevel);
4585 }