lib/Target/ARM/ARMISelDAGToDAG.cpp

   1 //===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This file defines an instruction selector for the ARM target.
  10 //
  11 //===----------------------------------------------------------------------===//
  12
  13 #include "ARM.h"
  14 #include "ARMBaseInstrInfo.h"
  15 #include "ARMTargetMachine.h"
  16 #include "MCTargetDesc/ARMAddressingModes.h"
  17 #include "Utils/ARMBaseInfo.h"
  18 #include "llvm/ADT/StringSwitch.h"
  19 #include "llvm/CodeGen/MachineFrameInfo.h"
  20 #include "llvm/CodeGen/MachineFunction.h"
  21 #include "llvm/CodeGen/MachineInstrBuilder.h"
  22 #include "llvm/CodeGen/MachineRegisterInfo.h"
  23 #include "llvm/CodeGen/SelectionDAG.h"
  24 #include "llvm/CodeGen/SelectionDAGISel.h"
  25 #include "llvm/CodeGen/TargetLowering.h"
  26 #include "llvm/IR/CallingConv.h"
  27 #include "llvm/IR/Constants.h"
  28 #include "llvm/IR/DerivedTypes.h"
  29 #include "llvm/IR/Function.h"
  30 #include "llvm/IR/Intrinsics.h"
  31 #include "llvm/IR/LLVMContext.h"
  32 #include "llvm/Support/CommandLine.h"
  33 #include "llvm/Support/Debug.h"
  34 #include "llvm/Support/ErrorHandling.h"
  35 #include "llvm/Target/TargetOptions.h"
  36
  37 using namespace llvm;
  38
  39 #define DEBUG_TYPE "arm-isel"
  40
  41 static cl::opt<bool>
  42 DisableShifterOp("disable-shifter-op", cl::Hidden,
  43   cl::desc("Disable isel of shifter-op"),
  44   cl::init(false));
  45
  46 //===--------------------------------------------------------------------===//
  47 /// ARMDAGToDAGISel - ARM specific code to select ARM machine
  48 /// instructions for SelectionDAG operations.
  49 ///
  50 namespace {
  51
  52 class ARMDAGToDAGISel : public SelectionDAGISel {
  53   /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
  54   /// make the right decision when generating code for different targets.
  55   const ARMSubtarget *Subtarget;
  56
  57 public:
  58   explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOpt::Level OptLevel)
  59       : SelectionDAGISel(tm, OptLevel) {}
  60
  61   bool runOnMachineFunction(MachineFunction &MF) override {
  62     // Reset the subtarget each time through.
  63     Subtarget = &MF.getSubtarget<ARMSubtarget>();
  64     SelectionDAGISel::runOnMachineFunction(MF);
  65     return true;
  66   }
  67
  68   StringRef getPassName() const override { return "ARM Instruction Selection"; }
  69
  70   void PreprocessISelDAG() override;
  71
  72   /// getI32Imm - Return a target constant of type i32 with the specified
  73   /// value.
  74   inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
  75     return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
  76   }
  77
  78   void Select(SDNode *N) override;
  79
  80   bool hasNoVMLxHazardUse(SDNode *N) const;
  81   bool isShifterOpProfitable(const SDValue &Shift,
  82                              ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt);
  83   bool SelectRegShifterOperand(SDValue N, SDValue &A,
  84                                SDValue &B, SDValue &C,
  85                                bool CheckProfitability = true);
  86   bool SelectImmShifterOperand(SDValue N, SDValue &A,
  87                                SDValue &B, bool CheckProfitability = true);
  88   bool SelectShiftRegShifterOperand(SDValue N, SDValue &A,
  89                                     SDValue &B, SDValue &C) {
  90     // Don't apply the profitability check
  91     return SelectRegShifterOperand(N, A, B, C, false);
  92   }
  93   bool SelectShiftImmShifterOperand(SDValue N, SDValue &A,
  94                                     SDValue &B) {
  95     // Don't apply the profitability check
  96     return SelectImmShifterOperand(N, A, B, false);
  97   }
  98
  99   bool SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out);
 100
 101   bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
 102   bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);
 103
 104   bool SelectCMOVPred(SDValue N, SDValue &Pred, SDValue &Reg) {
 105     const ConstantSDNode *CN = cast<ConstantSDNode>(N);
 106     Pred = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(N), MVT::i32);
 107     Reg = CurDAG->getRegister(ARM::CPSR, MVT::i32);
 108     return true;
 109   }
 110
 111   bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
 112                              SDValue &Offset, SDValue &Opc);
 113   bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
 114                              SDValue &Offset, SDValue &Opc);
 115   bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
 116                              SDValue &Offset, SDValue &Opc);
 117   bool SelectAddrOffsetNone(SDValue N, SDValue &Base);
 118   bool SelectAddrMode3(SDValue N, SDValue &Base,
 119                        SDValue &Offset, SDValue &Opc);
 120   bool SelectAddrMode3Offset(SDNode *Op, SDValue N,
 121                              SDValue &Offset, SDValue &Opc);
 122   bool IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset, bool FP16);
 123   bool SelectAddrMode5(SDValue N, SDValue &Base, SDValue &Offset);
 124   bool SelectAddrMode5FP16(SDValue N, SDValue &Base, SDValue &Offset);
 125   bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align);
 126   bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset);
 127
 128   bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label);
 129
 130   // Thumb Addressing Modes:
 131   bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset);
 132   bool SelectThumbAddrModeRRSext(SDValue N, SDValue &Base, SDValue &Offset);
 133   bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base,
 134                                 SDValue &OffImm);
 135   bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
 136                                  SDValue &OffImm);
 137   bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
 138                                  SDValue &OffImm);
 139   bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
 140                                  SDValue &OffImm);
 141   bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm);
 142
 143   // Thumb 2 Addressing Modes:
 144   bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
 145   bool SelectT2AddrModeImm8(SDValue N, SDValue &Base,
 146                             SDValue &OffImm);
 147   bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
 148                                  SDValue &OffImm);
 149   template <unsigned Shift>
 150   bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm);
 151   bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm,
 152                                   unsigned Shift);
 153   template <unsigned Shift>
 154   bool SelectT2AddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm);
 155   bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base,
 156                              SDValue &OffReg, SDValue &ShImm);
 157   bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm);
 158
 159   inline bool is_so_imm(unsigned Imm) const {
 160     return ARM_AM::getSOImmVal(Imm) != -1;
 161   }
 162
 163   inline bool is_so_imm_not(unsigned Imm) const {
 164     return ARM_AM::getSOImmVal(~Imm) != -1;
 165   }
 166
 167   inline bool is_t2_so_imm(unsigned Imm) const {
 168     return ARM_AM::getT2SOImmVal(Imm) != -1;
 169   }
 170
 171   inline bool is_t2_so_imm_not(unsigned Imm) const {
 172     return ARM_AM::getT2SOImmVal(~Imm) != -1;
 173   }
 174
 175   // Include the pieces autogenerated from the target description.
 176 #include "ARMGenDAGISel.inc"
 177
 178 private:
 179   void transferMemOperands(SDNode *Src, SDNode *Dst);
 180
 181   /// Indexed (pre/post inc/dec) load matching code for ARM.
 182   bool tryARMIndexedLoad(SDNode *N);
 183   bool tryT1IndexedLoad(SDNode *N);
 184   bool tryT2IndexedLoad(SDNode *N);
 185   bool tryMVEIndexedLoad(SDNode *N);
 186
 187   /// SelectVLD - Select NEON load intrinsics.  NumVecs should be
 188   /// 1, 2, 3 or 4.  The opcode arrays specify the instructions used for
 189   /// loads of D registers and even subregs and odd subregs of Q registers.
 190   /// For NumVecs <= 2, QOpcodes1 is not used.
 191   void SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
 192                  const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
 193                  const uint16_t *QOpcodes1);
 194
 195   /// SelectVST - Select NEON store intrinsics.  NumVecs should
 196   /// be 1, 2, 3 or 4.  The opcode arrays specify the instructions used for
 197   /// stores of D registers and even subregs and odd subregs of Q registers.
 198   /// For NumVecs <= 2, QOpcodes1 is not used.
 199   void SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
 200                  const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
 201                  const uint16_t *QOpcodes1);
 202
 203   /// SelectVLDSTLane - Select NEON load/store lane intrinsics.  NumVecs should
 204   /// be 2, 3 or 4.  The opcode arrays specify the instructions used for
 205   /// load/store of D registers and Q registers.
 206   void SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
 207                        unsigned NumVecs, const uint16_t *DOpcodes,
 208                        const uint16_t *QOpcodes);
 209
 210   /// SelectVLDDup - Select NEON load-duplicate intrinsics.  NumVecs
 211   /// should be 1, 2, 3 or 4.  The opcode array specifies the instructions used
 212   /// for loading D registers.
 213   void SelectVLDDup(SDNode *N, bool IsIntrinsic, bool isUpdating,
 214                     unsigned NumVecs, const uint16_t *DOpcodes,
 215                     const uint16_t *QOpcodes0 = nullptr,
 216                     const uint16_t *QOpcodes1 = nullptr);
 217
 218   /// Try to select SBFX/UBFX instructions for ARM.
 219   bool tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned);
 220
 221   // Select special operations if node forms integer ABS pattern
 222   bool tryABSOp(SDNode *N);
 223
 224   bool tryReadRegister(SDNode *N);
 225   bool tryWriteRegister(SDNode *N);
 226
 227   bool tryInlineAsm(SDNode *N);
 228
 229   void SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI);
 230
 231   void SelectCMP_SWAP(SDNode *N);
 232
 233   /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
 234   /// inline asm expressions.
 235   bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
 236                                     std::vector<SDValue> &OutOps) override;
 237
 238   // Form pairs of consecutive R, S, D, or Q registers.
 239   SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1);
 240   SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1);
 241   SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1);
 242   SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1);
 243
 244   // Form sequences of 4 consecutive S, D, or Q registers.
 245   SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
 246   SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
 247   SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
 248
 249   // Get the alignment operand for a NEON VLD or VST instruction.
 250   SDValue GetVLDSTAlign(SDValue Align, const SDLoc &dl, unsigned NumVecs,
 251                         bool is64BitVector);
 252
 253   /// Returns the number of instructions required to materialize the given
 254   /// constant in a register, or 3 if a literal pool load is needed.
 255   unsigned ConstantMaterializationCost(unsigned Val) const;
 256
 257   /// Checks if N is a multiplication by a constant where we can extract out a
 258   /// power of two from the constant so that it can be used in a shift, but only
 259   /// if it simplifies the materialization of the constant. Returns true if it
 260   /// is, and assigns to PowerOfTwo the power of two that should be extracted
 261   /// out and to NewMulConst the new constant to be multiplied by.
 262   bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift,
 263                               unsigned &PowerOfTwo, SDValue &NewMulConst) const;
 264
 265   /// Replace N with M in CurDAG, in a way that also ensures that M gets
 266   /// selected when N would have been selected.
 267   void replaceDAGValue(const SDValue &N, SDValue M);
 268 };
 269 }
 270
 271 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant
 272 /// operand. If so Imm will receive the 32-bit value.
 273 static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
 274   if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
 275     Imm = cast<ConstantSDNode>(N)->getZExtValue();
 276     return true;
 277   }
 278   return false;
 279 }
 280
 281 // isInt32Immediate - This method tests to see if a constant operand.
 282 // If so Imm will receive the 32 bit value.
 283 static bool isInt32Immediate(SDValue N, unsigned &Imm) {
 284   return isInt32Immediate(N.getNode(), Imm);
 285 }
 286
 287 // isOpcWithIntImmediate - This method tests to see if the node is a specific
 288 // opcode and that it has a immediate integer right operand.
 289 // If so Imm will receive the 32 bit value.
 290 static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
 291   return N->getOpcode() == Opc &&
 292          isInt32Immediate(N->getOperand(1).getNode(), Imm);
 293 }
 294
 295 /// Check whether a particular node is a constant value representable as
 296 /// (N * Scale) where (N in [\p RangeMin, \p RangeMax).
 297 ///
 298 /// \param ScaledConstant [out] - On success, the pre-scaled constant value.
 299 static bool isScaledConstantInRange(SDValue Node, int Scale,
 300                                     int RangeMin, int RangeMax,
 301                                     int &ScaledConstant) {
 302   assert(Scale > 0 && "Invalid scale!");
 303
 304   // Check that this is a constant.
 305   const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node);
 306   if (!C)
 307     return false;
 308
 309   ScaledConstant = (int) C->getZExtValue();
 310   if ((ScaledConstant % Scale) != 0)
 311     return false;
 312
 313   ScaledConstant /= Scale;
 314   return ScaledConstant >= RangeMin && ScaledConstant < RangeMax;
 315 }
 316
 317 void ARMDAGToDAGISel::PreprocessISelDAG() {
 318   if (!Subtarget->hasV6T2Ops())
 319     return;
 320
 321   bool isThumb2 = Subtarget->isThumb();
 322   for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
 323        E = CurDAG->allnodes_end(); I != E; ) {
 324     SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues.
 325
 326     if (N->getOpcode() != ISD::ADD)
 327       continue;
 328
 329     // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with
 330     // leading zeros, followed by consecutive set bits, followed by 1 or 2
 331     // trailing zeros, e.g. 1020.
 332     // Transform the expression to
 333     // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number
 334     // of trailing zeros of c2. The left shift would be folded as an shifter
 335     // operand of 'add' and the 'and' and 'srl' would become a bits extraction
 336     // node (UBFX).
 337
 338     SDValue N0 = N->getOperand(0);
 339     SDValue N1 = N->getOperand(1);
 340     unsigned And_imm = 0;
 341     if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) {
 342       if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm))
 343         std::swap(N0, N1);
 344     }
 345     if (!And_imm)
 346       continue;
 347
 348     // Check if the AND mask is an immediate of the form: 000.....1111111100
 349     unsigned TZ = countTrailingZeros(And_imm);
 350     if (TZ != 1 && TZ != 2)
 351       // Be conservative here. Shifter operands aren't always free. e.g. On
 352       // Swift, left shifter operand of 1 / 2 for free but others are not.
 353       // e.g.
 354       //  ubfx   r3, r1, #16, #8
 355       //  ldr.w  r3, [r0, r3, lsl #2]
 356       // vs.
 357       //  mov.w  r9, #1020
 358       //  and.w  r2, r9, r1, lsr #14
 359       //  ldr    r2, [r0, r2]
 360       continue;
 361     And_imm >>= TZ;
 362     if (And_imm & (And_imm + 1))
 363       continue;
 364
 365     // Look for (and (srl X, c1), c2).
 366     SDValue Srl = N1.getOperand(0);
 367     unsigned Srl_imm = 0;
 368     if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) ||
 369         (Srl_imm <= 2))
 370       continue;
 371
 372     // Make sure first operand is not a shifter operand which would prevent
 373     // folding of the left shift.
 374     SDValue CPTmp0;
 375     SDValue CPTmp1;
 376     SDValue CPTmp2;
 377     if (isThumb2) {
 378       if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1))
 379         continue;
 380     } else {
 381       if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) ||
 382           SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2))
 383         continue;
 384     }
 385
 386     // Now make the transformation.
 387     Srl = CurDAG->getNode(ISD::SRL, SDLoc(Srl), MVT::i32,
 388                           Srl.getOperand(0),
 389                           CurDAG->getConstant(Srl_imm + TZ, SDLoc(Srl),
 390                                               MVT::i32));
 391     N1 = CurDAG->getNode(ISD::AND, SDLoc(N1), MVT::i32,
 392                          Srl,
 393                          CurDAG->getConstant(And_imm, SDLoc(Srl), MVT::i32));
 394     N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32,
 395                          N1, CurDAG->getConstant(TZ, SDLoc(Srl), MVT::i32));
 396     CurDAG->UpdateNodeOperands(N, N0, N1);
 397   }
 398 }
 399
 400 /// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS
 401 /// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at
 402 /// least on current ARM implementations) which should be avoidded.
 403 bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const {
 404   if (OptLevel == CodeGenOpt::None)
 405     return true;
 406
 407   if (!Subtarget->hasVMLxHazards())
 408     return true;
 409
 410   if (!N->hasOneUse())
 411     return false;
 412
 413   SDNode *Use = *N->use_begin();
 414   if (Use->getOpcode() == ISD::CopyToReg)
 415     return true;
 416   if (Use->isMachineOpcode()) {
 417     const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>(
 418         CurDAG->getSubtarget().getInstrInfo());
 419
 420     const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode());
 421     if (MCID.mayStore())
 422       return true;
 423     unsigned Opcode = MCID.getOpcode();
 424     if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
 425       return true;
 426     // vmlx feeding into another vmlx. We actually want to unfold
 427     // the use later in the MLxExpansion pass. e.g.
 428     // vmla
 429     // vmla (stall 8 cycles)
 430     //
 431     // vmul (5 cycles)
 432     // vadd (5 cycles)
 433     // vmla
 434     // This adds up to about 18 - 19 cycles.
 435     //
 436     // vmla
 437     // vmul (stall 4 cycles)
 438     // vadd adds up to about 14 cycles.
 439     return TII->isFpMLxInstruction(Opcode);
 440   }
 441
 442   return false;
 443 }
 444
 445 bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
 446                                             ARM_AM::ShiftOpc ShOpcVal,
 447                                             unsigned ShAmt) {
 448   if (!Subtarget->isLikeA9() && !Subtarget->isSwift())
 449     return true;
 450   if (Shift.hasOneUse())
 451     return true;
 452   // R << 2 is free.
 453   return ShOpcVal == ARM_AM::lsl &&
 454          (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1));
 455 }
 456
 457 unsigned ARMDAGToDAGISel::ConstantMaterializationCost(unsigned Val) const {
 458   if (Subtarget->isThumb()) {
 459     if (Val <= 255) return 1;                               // MOV
 460     if (Subtarget->hasV6T2Ops() &&
 461         (Val <= 0xffff ||                                   // MOV
 462          ARM_AM::getT2SOImmVal(Val) != -1 ||                // MOVW
 463          ARM_AM::getT2SOImmVal(~Val) != -1))                // MVN
 464       return 1;
 465     if (Val <= 510) return 2;                               // MOV + ADDi8
 466     if (~Val <= 255) return 2;                              // MOV + MVN
 467     if (ARM_AM::isThumbImmShiftedVal(Val)) return 2;        // MOV + LSL
 468   } else {
 469     if (ARM_AM::getSOImmVal(Val) != -1) return 1;           // MOV
 470     if (ARM_AM::getSOImmVal(~Val) != -1) return 1;          // MVN
 471     if (Subtarget->hasV6T2Ops() && Val <= 0xffff) return 1; // MOVW
 472     if (ARM_AM::isSOImmTwoPartVal(Val)) return 2;           // two instrs
 473   }
 474   if (Subtarget->useMovt()) return 2; // MOVW + MOVT
 475   return 3; // Literal pool load
 476 }
 477
 478 bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N,
 479                                              unsigned MaxShift,
 480                                              unsigned &PowerOfTwo,
 481                                              SDValue &NewMulConst) const {
 482   assert(N.getOpcode() == ISD::MUL);
 483   assert(MaxShift > 0);
 484
 485   // If the multiply is used in more than one place then changing the constant
 486   // will make other uses incorrect, so don't.
 487   if (!N.hasOneUse()) return false;
 488   // Check if the multiply is by a constant
 489   ConstantSDNode *MulConst = dyn_cast<ConstantSDNode>(N.getOperand(1));
 490   if (!MulConst) return false;
 491   // If the constant is used in more than one place then modifying it will mean
 492   // we need to materialize two constants instead of one, which is a bad idea.
 493   if (!MulConst->hasOneUse()) return false;
 494   unsigned MulConstVal = MulConst->getZExtValue();
 495   if (MulConstVal == 0) return false;
 496
 497   // Find the largest power of 2 that MulConstVal is a multiple of
 498   PowerOfTwo = MaxShift;
 499   while ((MulConstVal % (1 << PowerOfTwo)) != 0) {
 500     --PowerOfTwo;
 501     if (PowerOfTwo == 0) return false;
 502   }
 503
 504   // Only optimise if the new cost is better
 505   unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo);
 506   NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc(N), MVT::i32);
 507   unsigned OldCost = ConstantMaterializationCost(MulConstVal);
 508   unsigned NewCost = ConstantMaterializationCost(NewMulConstVal);
 509   return NewCost < OldCost;
 510 }
 511
 512 void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) {
 513   CurDAG->RepositionNode(N.getNode()->getIterator(), M.getNode());
 514   ReplaceUses(N, M);
 515 }
 516
 517 bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
 518                                               SDValue &BaseReg,
 519                                               SDValue &Opc,
 520                                               bool CheckProfitability) {
 521   if (DisableShifterOp)
 522     return false;
 523
 524   // If N is a multiply-by-constant and it's profitable to extract a shift and
 525   // use it in a shifted operand do so.
 526   if (N.getOpcode() == ISD::MUL) {
 527     unsigned PowerOfTwo = 0;
 528     SDValue NewMulConst;
 529     if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) {
 530       HandleSDNode Handle(N);
 531       SDLoc Loc(N);
 532       replaceDAGValue(N.getOperand(1), NewMulConst);
 533       BaseReg = Handle.getValue();
 534       Opc = CurDAG->getTargetConstant(
 535           ARM_AM::getSORegOpc(ARM_AM::lsl, PowerOfTwo), Loc, MVT::i32);
 536       return true;
 537     }
 538   }
 539
 540   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
 541
 542   // Don't match base register only case. That is matched to a separate
 543   // lower complexity pattern with explicit register operand.
 544   if (ShOpcVal == ARM_AM::no_shift) return false;
 545
 546   BaseReg = N.getOperand(0);
 547   unsigned ShImmVal = 0;
 548   ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
 549   if (!RHS) return false;
 550   ShImmVal = RHS->getZExtValue() & 31;
 551   Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
 552                                   SDLoc(N), MVT::i32);
 553   return true;
 554 }
 555
 556 bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N,
 557                                               SDValue &BaseReg,
 558                                               SDValue &ShReg,
 559                                               SDValue &Opc,
 560                                               bool CheckProfitability) {
 561   if (DisableShifterOp)
 562     return false;
 563
 564   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
 565
 566   // Don't match base register only case. That is matched to a separate
 567   // lower complexity pattern with explicit register operand.
 568   if (ShOpcVal == ARM_AM::no_shift) return false;
 569
 570   BaseReg = N.getOperand(0);
 571   unsigned ShImmVal = 0;
 572   ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
 573   if (RHS) return false;
 574
 575   ShReg = N.getOperand(1);
 576   if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal))
 577     return false;
 578   Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
 579                                   SDLoc(N), MVT::i32);
 580   return true;
 581 }
 582
 583 // Determine whether an ISD::OR's operands are suitable to turn the operation
 584 // into an addition, which often has more compact encodings.
 585 bool ARMDAGToDAGISel::SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out) {
 586   assert(Parent->getOpcode() == ISD::OR && "unexpected parent");
 587   Out = N;
 588   return CurDAG->haveNoCommonBitsSet(N, Parent->getOperand(1));
 589 }
 590
 591
 592 bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
 593                                           SDValue &Base,
 594                                           SDValue &OffImm) {
 595   // Match simple R + imm12 operands.
 596
 597   // Base only.
 598   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
 599       !CurDAG->isBaseWithConstantOffset(N)) {
 600     if (N.getOpcode() == ISD::FrameIndex) {
 601       // Match frame index.
 602       int FI = cast<FrameIndexSDNode>(N)->getIndex();
 603       Base = CurDAG->getTargetFrameIndex(
 604           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 605       OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
 606       return true;
 607     }
 608
 609     if (N.getOpcode() == ARMISD::Wrapper &&
 610         N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
 611         N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
 612         N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
 613       Base = N.getOperand(0);
 614     } else
 615       Base = N;
 616     OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
 617     return true;
 618   }
 619
 620   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
 621     int RHSC = (int)RHS->getSExtValue();
 622     if (N.getOpcode() == ISD::SUB)
 623       RHSC = -RHSC;
 624
 625     if (RHSC > -0x1000 && RHSC < 0x1000) { // 12 bits
 626       Base   = N.getOperand(0);
 627       if (Base.getOpcode() == ISD::FrameIndex) {
 628         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
 629         Base = CurDAG->getTargetFrameIndex(
 630             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 631       }
 632       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
 633       return true;
 634     }
 635   }
 636
 637   // Base only.
 638   Base = N;
 639   OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
 640   return true;
 641 }
 642
 643
 644
 645 bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
 646                                       SDValue &Opc) {
 647   if (N.getOpcode() == ISD::MUL &&
 648       ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) {
 649     if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
 650       // X * [3,5,9] -> X + X * [2,4,8] etc.
 651       int RHSC = (int)RHS->getZExtValue();
 652       if (RHSC & 1) {
 653         RHSC = RHSC & ~1;
 654         ARM_AM::AddrOpc AddSub = ARM_AM::add;
 655         if (RHSC < 0) {
 656           AddSub = ARM_AM::sub;
 657           RHSC = - RHSC;
 658         }
 659         if (isPowerOf2_32(RHSC)) {
 660           unsigned ShAmt = Log2_32(RHSC);
 661           Base = Offset = N.getOperand(0);
 662           Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
 663                                                             ARM_AM::lsl),
 664                                           SDLoc(N), MVT::i32);
 665           return true;
 666         }
 667       }
 668     }
 669   }
 670
 671   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
 672       // ISD::OR that is equivalent to an ISD::ADD.
 673       !CurDAG->isBaseWithConstantOffset(N))
 674     return false;
 675
 676   // Leave simple R +/- imm12 operands for LDRi12
 677   if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
 678     int RHSC;
 679     if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
 680                                 -0x1000+1, 0x1000, RHSC)) // 12 bits.
 681       return false;
 682   }
 683
 684   // Otherwise this is R +/- [possibly shifted] R.
 685   ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add;
 686   ARM_AM::ShiftOpc ShOpcVal =
 687     ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
 688   unsigned ShAmt = 0;
 689
 690   Base   = N.getOperand(0);
 691   Offset = N.getOperand(1);
 692
 693   if (ShOpcVal != ARM_AM::no_shift) {
 694     // Check to see if the RHS of the shift is a constant, if not, we can't fold
 695     // it.
 696     if (ConstantSDNode *Sh =
 697            dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
 698       ShAmt = Sh->getZExtValue();
 699       if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
 700         Offset = N.getOperand(1).getOperand(0);
 701       else {
 702         ShAmt = 0;
 703         ShOpcVal = ARM_AM::no_shift;
 704       }
 705     } else {
 706       ShOpcVal = ARM_AM::no_shift;
 707     }
 708   }
 709
 710   // Try matching (R shl C) + (R).
 711   if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
 712       !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
 713         N.getOperand(0).hasOneUse())) {
 714     ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
 715     if (ShOpcVal != ARM_AM::no_shift) {
 716       // Check to see if the RHS of the shift is a constant, if not, we can't
 717       // fold it.
 718       if (ConstantSDNode *Sh =
 719           dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
 720         ShAmt = Sh->getZExtValue();
 721         if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
 722           Offset = N.getOperand(0).getOperand(0);
 723           Base = N.getOperand(1);
 724         } else {
 725           ShAmt = 0;
 726           ShOpcVal = ARM_AM::no_shift;
 727         }
 728       } else {
 729         ShOpcVal = ARM_AM::no_shift;
 730       }
 731     }
 732   }
 733
 734   // If Offset is a multiply-by-constant and it's profitable to extract a shift
 735   // and use it in a shifted operand do so.
 736   if (Offset.getOpcode() == ISD::MUL && N.hasOneUse()) {
 737     unsigned PowerOfTwo = 0;
 738     SDValue NewMulConst;
 739     if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) {
 740       HandleSDNode Handle(Offset);
 741       replaceDAGValue(Offset.getOperand(1), NewMulConst);
 742       Offset = Handle.getValue();
 743       ShAmt = PowerOfTwo;
 744       ShOpcVal = ARM_AM::lsl;
 745     }
 746   }
 747
 748   Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
 749                                   SDLoc(N), MVT::i32);
 750   return true;
 751 }
 752
 753 bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
 754                                             SDValue &Offset, SDValue &Opc) {
 755   unsigned Opcode = Op->getOpcode();
 756   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
 757     ? cast<LoadSDNode>(Op)->getAddressingMode()
 758     : cast<StoreSDNode>(Op)->getAddressingMode();
 759   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
 760     ? ARM_AM::add : ARM_AM::sub;
 761   int Val;
 762   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val))
 763     return false;
 764
 765   Offset = N;
 766   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
 767   unsigned ShAmt = 0;
 768   if (ShOpcVal != ARM_AM::no_shift) {
 769     // Check to see if the RHS of the shift is a constant, if not, we can't fold
 770     // it.
 771     if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
 772       ShAmt = Sh->getZExtValue();
 773       if (isShifterOpProfitable(N, ShOpcVal, ShAmt))
 774         Offset = N.getOperand(0);
 775       else {
 776         ShAmt = 0;
 777         ShOpcVal = ARM_AM::no_shift;
 778       }
 779     } else {
 780       ShOpcVal = ARM_AM::no_shift;
 781     }
 782   }
 783
 784   Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
 785                                   SDLoc(N), MVT::i32);
 786   return true;
 787 }
 788
 789 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
 790                                             SDValue &Offset, SDValue &Opc) {
 791   unsigned Opcode = Op->getOpcode();
 792   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
 793     ? cast<LoadSDNode>(Op)->getAddressingMode()
 794     : cast<StoreSDNode>(Op)->getAddressingMode();
 795   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
 796     ? ARM_AM::add : ARM_AM::sub;
 797   int Val;
 798   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
 799     if (AddSub == ARM_AM::sub) Val *= -1;
 800     Offset = CurDAG->getRegister(0, MVT::i32);
 801     Opc = CurDAG->getTargetConstant(Val, SDLoc(Op), MVT::i32);
 802     return true;
 803   }
 804
 805   return false;
 806 }
 807
 808
 809 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
 810                                             SDValue &Offset, SDValue &Opc) {
 811   unsigned Opcode = Op->getOpcode();
 812   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
 813     ? cast<LoadSDNode>(Op)->getAddressingMode()
 814     : cast<StoreSDNode>(Op)->getAddressingMode();
 815   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
 816     ? ARM_AM::add : ARM_AM::sub;
 817   int Val;
 818   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
 819     Offset = CurDAG->getRegister(0, MVT::i32);
 820     Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val,
 821                                                       ARM_AM::no_shift),
 822                                     SDLoc(Op), MVT::i32);
 823     return true;
 824   }
 825
 826   return false;
 827 }
 828
 829 bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) {
 830   Base = N;
 831   return true;
 832 }
 833
 834 bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
 835                                       SDValue &Base, SDValue &Offset,
 836                                       SDValue &Opc) {
 837   if (N.getOpcode() == ISD::SUB) {
 838     // X - C  is canonicalize to X + -C, no need to handle it here.
 839     Base = N.getOperand(0);
 840     Offset = N.getOperand(1);
 841     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0), SDLoc(N),
 842                                     MVT::i32);
 843     return true;
 844   }
 845
 846   if (!CurDAG->isBaseWithConstantOffset(N)) {
 847     Base = N;
 848     if (N.getOpcode() == ISD::FrameIndex) {
 849       int FI = cast<FrameIndexSDNode>(N)->getIndex();
 850       Base = CurDAG->getTargetFrameIndex(
 851           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 852     }
 853     Offset = CurDAG->getRegister(0, MVT::i32);
 854     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
 855                                     MVT::i32);
 856     return true;
 857   }
 858
 859   // If the RHS is +/- imm8, fold into addr mode.
 860   int RHSC;
 861   if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
 862                               -256 + 1, 256, RHSC)) { // 8 bits.
 863     Base = N.getOperand(0);
 864     if (Base.getOpcode() == ISD::FrameIndex) {
 865       int FI = cast<FrameIndexSDNode>(Base)->getIndex();
 866       Base = CurDAG->getTargetFrameIndex(
 867           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 868     }
 869     Offset = CurDAG->getRegister(0, MVT::i32);
 870
 871     ARM_AM::AddrOpc AddSub = ARM_AM::add;
 872     if (RHSC < 0) {
 873       AddSub = ARM_AM::sub;
 874       RHSC = -RHSC;
 875     }
 876     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC), SDLoc(N),
 877                                     MVT::i32);
 878     return true;
 879   }
 880
 881   Base = N.getOperand(0);
 882   Offset = N.getOperand(1);
 883   Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
 884                                   MVT::i32);
 885   return true;
 886 }
 887
 888 bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N,
 889                                             SDValue &Offset, SDValue &Opc) {
 890   unsigned Opcode = Op->getOpcode();
 891   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
 892     ? cast<LoadSDNode>(Op)->getAddressingMode()
 893     : cast<StoreSDNode>(Op)->getAddressingMode();
 894   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
 895     ? ARM_AM::add : ARM_AM::sub;
 896   int Val;
 897   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits.
 898     Offset = CurDAG->getRegister(0, MVT::i32);
 899     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), SDLoc(Op),
 900                                     MVT::i32);
 901     return true;
 902   }
 903
 904   Offset = N;
 905   Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), SDLoc(Op),
 906                                   MVT::i32);
 907   return true;
 908 }
 909
 910 bool ARMDAGToDAGISel::IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset,
 911                                         bool FP16) {
 912   if (!CurDAG->isBaseWithConstantOffset(N)) {
 913     Base = N;
 914     if (N.getOpcode() == ISD::FrameIndex) {
 915       int FI = cast<FrameIndexSDNode>(N)->getIndex();
 916       Base = CurDAG->getTargetFrameIndex(
 917           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 918     } else if (N.getOpcode() == ARMISD::Wrapper &&
 919                N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
 920                N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
 921                N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
 922       Base = N.getOperand(0);
 923     }
 924     Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
 925                                        SDLoc(N), MVT::i32);
 926     return true;
 927   }
 928
 929   // If the RHS is +/- imm8, fold into addr mode.
 930   int RHSC;
 931   const int Scale = FP16 ? 2 : 4;
 932
 933   if (isScaledConstantInRange(N.getOperand(1), Scale, -255, 256, RHSC)) {
 934     Base = N.getOperand(0);
 935     if (Base.getOpcode() == ISD::FrameIndex) {
 936       int FI = cast<FrameIndexSDNode>(Base)->getIndex();
 937       Base = CurDAG->getTargetFrameIndex(
 938           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 939     }
 940
 941     ARM_AM::AddrOpc AddSub = ARM_AM::add;
 942     if (RHSC < 0) {
 943       AddSub = ARM_AM::sub;
 944       RHSC = -RHSC;
 945     }
 946
 947     if (FP16)
 948       Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(AddSub, RHSC),
 949                                          SDLoc(N), MVT::i32);
 950     else
 951       Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC),
 952                                          SDLoc(N), MVT::i32);
 953
 954     return true;
 955   }
 956
 957   Base = N;
 958
 959   if (FP16)
 960     Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(ARM_AM::add, 0),
 961                                        SDLoc(N), MVT::i32);
 962   else
 963     Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
 964                                        SDLoc(N), MVT::i32);
 965
 966   return true;
 967 }
 968
 969 bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
 970                                       SDValue &Base, SDValue &Offset) {
 971   return IsAddressingMode5(N, Base, Offset, /*FP16=*/ false);
 972 }
 973
 974 bool ARMDAGToDAGISel::SelectAddrMode5FP16(SDValue N,
 975                                           SDValue &Base, SDValue &Offset) {
 976   return IsAddressingMode5(N, Base, Offset, /*FP16=*/ true);
 977 }
 978
 979 bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
 980                                       SDValue &Align) {
 981   Addr = N;
 982
 983   unsigned Alignment = 0;
 984
 985   MemSDNode *MemN = cast<MemSDNode>(Parent);
 986
 987   if (isa<LSBaseSDNode>(MemN) ||
 988       ((MemN->getOpcode() == ARMISD::VST1_UPD ||
 989         MemN->getOpcode() == ARMISD::VLD1_UPD) &&
 990        MemN->getConstantOperandVal(MemN->getNumOperands() - 1) == 1)) {
 991     // This case occurs only for VLD1-lane/dup and VST1-lane instructions.
 992     // The maximum alignment is equal to the memory size being referenced.
 993     unsigned MMOAlign = MemN->getAlignment();
 994     unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8;
 995     if (MMOAlign >= MemSize && MemSize > 1)
 996       Alignment = MemSize;
 997   } else {
 998     // All other uses of addrmode6 are for intrinsics.  For now just record
 999     // the raw alignment value; it will be refined later based on the legal
1000     // alignment operands for the intrinsic.
1001     Alignment = MemN->getAlignment();
1002   }
1003
1004   Align = CurDAG->getTargetConstant(Alignment, SDLoc(N), MVT::i32);
1005   return true;
1006 }
1007
1008 bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N,
1009                                             SDValue &Offset) {
1010   LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op);
1011   ISD::MemIndexedMode AM = LdSt->getAddressingMode();
1012   if (AM != ISD::POST_INC)
1013     return false;
1014   Offset = N;
1015   if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) {
1016     if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits())
1017       Offset = CurDAG->getRegister(0, MVT::i32);
1018   }
1019   return true;
1020 }
1021
1022 bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N,
1023                                        SDValue &Offset, SDValue &Label) {
1024   if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
1025     Offset = N.getOperand(0);
1026     SDValue N1 = N.getOperand(1);
1027     Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
1028                                       SDLoc(N), MVT::i32);
1029     return true;
1030   }
1031
1032   return false;
1033 }
1034
1035
1036 //===----------------------------------------------------------------------===//
1037 //                         Thumb Addressing Modes
1038 //===----------------------------------------------------------------------===//
1039
1040 static bool shouldUseZeroOffsetLdSt(SDValue N) {
1041   // Negative numbers are difficult to materialise in thumb1. If we are
1042   // selecting the add of a negative, instead try to select ri with a zero
1043   // offset, so create the add node directly which will become a sub.
1044   if (N.getOpcode() != ISD::ADD)
1045     return false;
1046
1047   // Look for an imm which is not legal for ld/st, but is legal for sub.
1048   if (auto C = dyn_cast<ConstantSDNode>(N.getOperand(1)))
1049     return C->getSExtValue() < 0 && C->getSExtValue() >= -255;
1050
1051   return false;
1052 }
1053
1054 bool ARMDAGToDAGISel::SelectThumbAddrModeRRSext(SDValue N, SDValue &Base,
1055                                                 SDValue &Offset) {
1056   if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) {
1057     ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N);
1058     if (!NC || !NC->isNullValue())
1059       return false;
1060
1061     Base = Offset = N;
1062     return true;
1063   }
1064
1065   Base = N.getOperand(0);
1066   Offset = N.getOperand(1);
1067   return true;
1068 }
1069
1070 bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N, SDValue &Base,
1071                                             SDValue &Offset) {
1072   if (shouldUseZeroOffsetLdSt(N))
1073     return false; // Select ri instead
1074   return SelectThumbAddrModeRRSext(N, Base, Offset);
1075 }
1076
1077 bool
1078 ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
1079                                           SDValue &Base, SDValue &OffImm) {
1080   if (shouldUseZeroOffsetLdSt(N)) {
1081     Base = N;
1082     OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1083     return true;
1084   }
1085
1086   if (!CurDAG->isBaseWithConstantOffset(N)) {
1087     if (N.getOpcode() == ISD::ADD) {
1088       return false; // We want to select register offset instead
1089     } else if (N.getOpcode() == ARMISD::Wrapper &&
1090         N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1091         N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1092         N.getOperand(0).getOpcode() != ISD::TargetConstantPool &&
1093         N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1094       Base = N.getOperand(0);
1095     } else {
1096       Base = N;
1097     }
1098
1099     OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1100     return true;
1101   }
1102
1103   // If the RHS is + imm5 * scale, fold into addr mode.
1104   int RHSC;
1105   if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) {
1106     Base = N.getOperand(0);
1107     OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1108     return true;
1109   }
1110
1111   // Offset is too large, so use register offset instead.
1112   return false;
1113 }
1114
1115 bool
1116 ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
1117                                            SDValue &OffImm) {
1118   return SelectThumbAddrModeImm5S(N, 4, Base, OffImm);
1119 }
1120
1121 bool
1122 ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
1123                                            SDValue &OffImm) {
1124   return SelectThumbAddrModeImm5S(N, 2, Base, OffImm);
1125 }
1126
1127 bool
1128 ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
1129                                            SDValue &OffImm) {
1130   return SelectThumbAddrModeImm5S(N, 1, Base, OffImm);
1131 }
1132
1133 bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
1134                                             SDValue &Base, SDValue &OffImm) {
1135   if (N.getOpcode() == ISD::FrameIndex) {
1136     int FI = cast<FrameIndexSDNode>(N)->getIndex();
1137     // Only multiples of 4 are allowed for the offset, so the frame object
1138     // alignment must be at least 4.
1139     MachineFrameInfo &MFI = MF->getFrameInfo();
1140     if (MFI.getObjectAlignment(FI) < 4)
1141       MFI.setObjectAlignment(FI, 4);
1142     Base = CurDAG->getTargetFrameIndex(
1143         FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1144     OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1145     return true;
1146   }
1147
1148   if (!CurDAG->isBaseWithConstantOffset(N))
1149     return false;
1150
1151   if (N.getOperand(0).getOpcode() == ISD::FrameIndex) {
1152     // If the RHS is + imm8 * scale, fold into addr mode.
1153     int RHSC;
1154     if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) {
1155       Base = N.getOperand(0);
1156       int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1157       // Make sure the offset is inside the object, or we might fail to
1158       // allocate an emergency spill slot. (An out-of-range access is UB, but
1159       // it could show up anyway.)
1160       MachineFrameInfo &MFI = MF->getFrameInfo();
1161       if (RHSC * 4 < MFI.getObjectSize(FI)) {
1162         // For LHS+RHS to result in an offset that's a multiple of 4 the object
1163         // indexed by the LHS must be 4-byte aligned.
1164         if (!MFI.isFixedObjectIndex(FI) && MFI.getObjectAlignment(FI) < 4)
1165           MFI.setObjectAlignment(FI, 4);
1166         if (MFI.getObjectAlignment(FI) >= 4) {
1167           Base = CurDAG->getTargetFrameIndex(
1168               FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1169           OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1170           return true;
1171         }
1172       }
1173     }
1174   }
1175
1176   return false;
1177 }
1178
1179
1180 //===----------------------------------------------------------------------===//
1181 //                        Thumb 2 Addressing Modes
1182 //===----------------------------------------------------------------------===//
1183
1184
1185 bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
1186                                             SDValue &Base, SDValue &OffImm) {
1187   // Match simple R + imm12 operands.
1188
1189   // Base only.
1190   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1191       !CurDAG->isBaseWithConstantOffset(N)) {
1192     if (N.getOpcode() == ISD::FrameIndex) {
1193       // Match frame index.
1194       int FI = cast<FrameIndexSDNode>(N)->getIndex();
1195       Base = CurDAG->getTargetFrameIndex(
1196           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1197       OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1198       return true;
1199     }
1200
1201     if (N.getOpcode() == ARMISD::Wrapper &&
1202         N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1203         N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1204         N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1205       Base = N.getOperand(0);
1206       if (Base.getOpcode() == ISD::TargetConstantPool)
1207         return false;  // We want to select t2LDRpci instead.
1208     } else
1209       Base = N;
1210     OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1211     return true;
1212   }
1213
1214   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1215     if (SelectT2AddrModeImm8(N, Base, OffImm))
1216       // Let t2LDRi8 handle (R - imm8).
1217       return false;
1218
1219     int RHSC = (int)RHS->getZExtValue();
1220     if (N.getOpcode() == ISD::SUB)
1221       RHSC = -RHSC;
1222
1223     if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned)
1224       Base   = N.getOperand(0);
1225       if (Base.getOpcode() == ISD::FrameIndex) {
1226         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1227         Base = CurDAG->getTargetFrameIndex(
1228             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1229       }
1230       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1231       return true;
1232     }
1233   }
1234
1235   // Base only.
1236   Base = N;
1237   OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1238   return true;
1239 }
1240
1241 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
1242                                            SDValue &Base, SDValue &OffImm) {
1243   // Match simple R - imm8 operands.
1244   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1245       !CurDAG->isBaseWithConstantOffset(N))
1246     return false;
1247
1248   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1249     int RHSC = (int)RHS->getSExtValue();
1250     if (N.getOpcode() == ISD::SUB)
1251       RHSC = -RHSC;
1252
1253     if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative)
1254       Base = N.getOperand(0);
1255       if (Base.getOpcode() == ISD::FrameIndex) {
1256         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1257         Base = CurDAG->getTargetFrameIndex(
1258             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1259       }
1260       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1261       return true;
1262     }
1263   }
1264
1265   return false;
1266 }
1267
1268 bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
1269                                                  SDValue &OffImm){
1270   unsigned Opcode = Op->getOpcode();
1271   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
1272     ? cast<LoadSDNode>(Op)->getAddressingMode()
1273     : cast<StoreSDNode>(Op)->getAddressingMode();
1274   int RHSC;
1275   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits.
1276     OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
1277       ? CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32)
1278       : CurDAG->getTargetConstant(-RHSC, SDLoc(N), MVT::i32);
1279     return true;
1280   }
1281
1282   return false;
1283 }
1284
1285 template <unsigned Shift>
1286 bool ARMDAGToDAGISel::SelectT2AddrModeImm7(SDValue N, SDValue &Base,
1287                                            SDValue &OffImm) {
1288   if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) {
1289     int RHSC;
1290     if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -0x7f, 0x80,
1291                                 RHSC)) {
1292       Base = N.getOperand(0);
1293       if (Base.getOpcode() == ISD::FrameIndex) {
1294         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1295         Base = CurDAG->getTargetFrameIndex(
1296             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1297       }
1298
1299       if (N.getOpcode() == ISD::SUB)
1300         RHSC = -RHSC;
1301       OffImm =
1302           CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32);
1303       return true;
1304     }
1305   }
1306
1307   // Base only.
1308   Base = N;
1309   OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1310   return true;
1311 }
1312
1313 template <unsigned Shift>
1314 bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N,
1315                                                  SDValue &OffImm) {
1316   return SelectT2AddrModeImm7Offset(Op, N, OffImm, Shift);
1317 }
1318
1319 bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N,
1320                                                  SDValue &OffImm,
1321                                                  unsigned Shift) {
1322   unsigned Opcode = Op->getOpcode();
1323   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
1324                                ? cast<LoadSDNode>(Op)->getAddressingMode()
1325                                : cast<StoreSDNode>(Op)->getAddressingMode();
1326   int RHSC;
1327   if (isScaledConstantInRange(N, 1 << Shift, 0, 0x80, RHSC)) { // 7 bits.
1328     OffImm =
1329         ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
1330             ? CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32)
1331             : CurDAG->getTargetConstant(-RHSC * (1 << Shift), SDLoc(N),
1332                                         MVT::i32);
1333     return true;
1334   }
1335   return false;
1336 }
1337
1338 bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
1339                                             SDValue &Base,
1340                                             SDValue &OffReg, SDValue &ShImm) {
1341   // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12.
1342   if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N))
1343     return false;
1344
1345   // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8.
1346   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1347     int RHSC = (int)RHS->getZExtValue();
1348     if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned)
1349       return false;
1350     else if (RHSC < 0 && RHSC >= -255) // 8 bits
1351       return false;
1352   }
1353
1354   // Look for (R + R) or (R + (R << [1,2,3])).
1355   unsigned ShAmt = 0;
1356   Base   = N.getOperand(0);
1357   OffReg = N.getOperand(1);
1358
1359   // Swap if it is ((R << c) + R).
1360   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg.getOpcode());
1361   if (ShOpcVal != ARM_AM::lsl) {
1362     ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode());
1363     if (ShOpcVal == ARM_AM::lsl)
1364       std::swap(Base, OffReg);
1365   }
1366
1367   if (ShOpcVal == ARM_AM::lsl) {
1368     // Check to see if the RHS of the shift is a constant, if not, we can't fold
1369     // it.
1370     if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) {
1371       ShAmt = Sh->getZExtValue();
1372       if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt))
1373         OffReg = OffReg.getOperand(0);
1374       else {
1375         ShAmt = 0;
1376       }
1377     }
1378   }
1379
1380   // If OffReg is a multiply-by-constant and it's profitable to extract a shift
1381   // and use it in a shifted operand do so.
1382   if (OffReg.getOpcode() == ISD::MUL && N.hasOneUse()) {
1383     unsigned PowerOfTwo = 0;
1384     SDValue NewMulConst;
1385     if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) {
1386       HandleSDNode Handle(OffReg);
1387       replaceDAGValue(OffReg.getOperand(1), NewMulConst);
1388       OffReg = Handle.getValue();
1389       ShAmt = PowerOfTwo;
1390     }
1391   }
1392
1393   ShImm = CurDAG->getTargetConstant(ShAmt, SDLoc(N), MVT::i32);
1394
1395   return true;
1396 }
1397
1398 bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base,
1399                                                 SDValue &OffImm) {
1400   // This *must* succeed since it's used for the irreplaceable ldrex and strex
1401   // instructions.
1402   Base = N;
1403   OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1404
1405   if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N))
1406     return true;
1407
1408   ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
1409   if (!RHS)
1410     return true;
1411
1412   uint32_t RHSC = (int)RHS->getZExtValue();
1413   if (RHSC > 1020 || RHSC % 4 != 0)
1414     return true;
1415
1416   Base = N.getOperand(0);
1417   if (Base.getOpcode() == ISD::FrameIndex) {
1418     int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1419     Base = CurDAG->getTargetFrameIndex(
1420         FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1421   }
1422
1423   OffImm = CurDAG->getTargetConstant(RHSC/4, SDLoc(N), MVT::i32);
1424   return true;
1425 }
1426
1427 //===--------------------------------------------------------------------===//
1428
1429 /// getAL - Returns a ARMCC::AL immediate node.
1430 static inline SDValue getAL(SelectionDAG *CurDAG, const SDLoc &dl) {
1431   return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32);
1432 }
1433
1434 void ARMDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
1435   MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
1436   CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
1437 }
1438
1439 bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) {
1440   LoadSDNode *LD = cast<LoadSDNode>(N);
1441   ISD::MemIndexedMode AM = LD->getAddressingMode();
1442   if (AM == ISD::UNINDEXED)
1443     return false;
1444
1445   EVT LoadedVT = LD->getMemoryVT();
1446   SDValue Offset, AMOpc;
1447   bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1448   unsigned Opcode = 0;
1449   bool Match = false;
1450   if (LoadedVT == MVT::i32 && isPre &&
1451       SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1452     Opcode = ARM::LDR_PRE_IMM;
1453     Match = true;
1454   } else if (LoadedVT == MVT::i32 && !isPre &&
1455       SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1456     Opcode = ARM::LDR_POST_IMM;
1457     Match = true;
1458   } else if (LoadedVT == MVT::i32 &&
1459       SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1460     Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG;
1461     Match = true;
1462
1463   } else if (LoadedVT == MVT::i16 &&
1464              SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1465     Match = true;
1466     Opcode = (LD->getExtensionType() == ISD::SEXTLOAD)
1467       ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST)
1468       : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST);
1469   } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) {
1470     if (LD->getExtensionType() == ISD::SEXTLOAD) {
1471       if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1472         Match = true;
1473         Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST;
1474       }
1475     } else {
1476       if (isPre &&
1477           SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1478         Match = true;
1479         Opcode = ARM::LDRB_PRE_IMM;
1480       } else if (!isPre &&
1481                   SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1482         Match = true;
1483         Opcode = ARM::LDRB_POST_IMM;
1484       } else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1485         Match = true;
1486         Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG;
1487       }
1488     }
1489   }
1490
1491   if (Match) {
1492     if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) {
1493       SDValue Chain = LD->getChain();
1494       SDValue Base = LD->getBasePtr();
1495       SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, SDLoc(N)),
1496                        CurDAG->getRegister(0, MVT::i32), Chain };
1497       SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1498                                            MVT::Other, Ops);
1499       transferMemOperands(N, New);
1500       ReplaceNode(N, New);
1501       return true;
1502     } else {
1503       SDValue Chain = LD->getChain();
1504       SDValue Base = LD->getBasePtr();
1505       SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, SDLoc(N)),
1506                        CurDAG->getRegister(0, MVT::i32), Chain };
1507       SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1508                                            MVT::Other, Ops);
1509       transferMemOperands(N, New);
1510       ReplaceNode(N, New);
1511       return true;
1512     }
1513   }
1514
1515   return false;
1516 }
1517
1518 bool ARMDAGToDAGISel::tryT1IndexedLoad(SDNode *N) {
1519   LoadSDNode *LD = cast<LoadSDNode>(N);
1520   EVT LoadedVT = LD->getMemoryVT();
1521   ISD::MemIndexedMode AM = LD->getAddressingMode();
1522   if (AM != ISD::POST_INC || LD->getExtensionType() != ISD::NON_EXTLOAD ||
1523       LoadedVT.getSimpleVT().SimpleTy != MVT::i32)
1524     return false;
1525
1526   auto *COffs = dyn_cast<ConstantSDNode>(LD->getOffset());
1527   if (!COffs || COffs->getZExtValue() != 4)
1528     return false;
1529
1530   // A T1 post-indexed load is just a single register LDM: LDM r0!, {r1}.
1531   // The encoding of LDM is not how the rest of ISel expects a post-inc load to
1532   // look however, so we use a pseudo here and switch it for a tLDMIA_UPD after
1533   // ISel.
1534   SDValue Chain = LD->getChain();
1535   SDValue Base = LD->getBasePtr();
1536   SDValue Ops[]= { Base, getAL(CurDAG, SDLoc(N)),
1537                    CurDAG->getRegister(0, MVT::i32), Chain };
1538   SDNode *New = CurDAG->getMachineNode(ARM::tLDR_postidx, SDLoc(N), MVT::i32,
1539                                        MVT::i32, MVT::Other, Ops);
1540   transferMemOperands(N, New);
1541   ReplaceNode(N, New);
1542   return true;
1543 }
1544
1545 bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) {
1546   LoadSDNode *LD = cast<LoadSDNode>(N);
1547   ISD::MemIndexedMode AM = LD->getAddressingMode();
1548   if (AM == ISD::UNINDEXED)
1549     return false;
1550
1551   EVT LoadedVT = LD->getMemoryVT();
1552   bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1553   SDValue Offset;
1554   bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1555   unsigned Opcode = 0;
1556   bool Match = false;
1557   if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) {
1558     switch (LoadedVT.getSimpleVT().SimpleTy) {
1559     case MVT::i32:
1560       Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST;
1561       break;
1562     case MVT::i16:
1563       if (isSExtLd)
1564         Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST;
1565       else
1566         Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST;
1567       break;
1568     case MVT::i8:
1569     case MVT::i1:
1570       if (isSExtLd)
1571         Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST;
1572       else
1573         Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST;
1574       break;
1575     default:
1576       return false;
1577     }
1578     Match = true;
1579   }
1580
1581   if (Match) {
1582     SDValue Chain = LD->getChain();
1583     SDValue Base = LD->getBasePtr();
1584     SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)),
1585                      CurDAG->getRegister(0, MVT::i32), Chain };
1586     SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1587                                          MVT::Other, Ops);
1588     transferMemOperands(N, New);
1589     ReplaceNode(N, New);
1590     return true;
1591   }
1592
1593   return false;
1594 }
1595
1596 bool ARMDAGToDAGISel::tryMVEIndexedLoad(SDNode *N) {
1597   LoadSDNode *LD = cast<LoadSDNode>(N);
1598   ISD::MemIndexedMode AM = LD->getAddressingMode();
1599   if (AM == ISD::UNINDEXED)
1600     return false;
1601   EVT LoadedVT = LD->getMemoryVT();
1602   if (!LoadedVT.isVector())
1603     return false;
1604   bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1605   SDValue Offset;
1606   bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1607   unsigned Opcode = 0;
1608   unsigned Align = LD->getAlignment();
1609   bool IsLE = Subtarget->isLittle();
1610
1611   if (Align >= 2 && LoadedVT == MVT::v4i16 &&
1612       SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 1)) {
1613     if (isSExtLd)
1614       Opcode = isPre ? ARM::MVE_VLDRHS32_pre : ARM::MVE_VLDRHS32_post;
1615     else
1616       Opcode = isPre ? ARM::MVE_VLDRHU32_pre : ARM::MVE_VLDRHU32_post;
1617   } else if (LoadedVT == MVT::v8i8 &&
1618              SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 0)) {
1619     if (isSExtLd)
1620       Opcode = isPre ? ARM::MVE_VLDRBS16_pre : ARM::MVE_VLDRBS16_post;
1621     else
1622       Opcode = isPre ? ARM::MVE_VLDRBU16_pre : ARM::MVE_VLDRBU16_post;
1623   } else if (LoadedVT == MVT::v4i8 &&
1624              SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 0)) {
1625     if (isSExtLd)
1626       Opcode = isPre ? ARM::MVE_VLDRBS32_pre : ARM::MVE_VLDRBS32_post;
1627     else
1628       Opcode = isPre ? ARM::MVE_VLDRBU32_pre : ARM::MVE_VLDRBU32_post;
1629   } else if (Align >= 4 &&
1630              (IsLE || LoadedVT == MVT::v4i32 || LoadedVT == MVT::v4f32) &&
1631              SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 2))
1632     Opcode = isPre ? ARM::MVE_VLDRWU32_pre : ARM::MVE_VLDRWU32_post;
1633   else if (Align >= 2 &&
1634            (IsLE || LoadedVT == MVT::v8i16 || LoadedVT == MVT::v8f16) &&
1635            SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 1))
1636     Opcode = isPre ? ARM::MVE_VLDRHU16_pre : ARM::MVE_VLDRHU16_post;
1637   else if ((IsLE || LoadedVT == MVT::v16i8) &&
1638            SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 0))
1639     Opcode = isPre ? ARM::MVE_VLDRBU8_pre : ARM::MVE_VLDRBU8_post;
1640   else
1641     return false;
1642
1643   SDValue Chain = LD->getChain();
1644   SDValue Base = LD->getBasePtr();
1645   SDValue Ops[] = {Base, Offset,
1646                    CurDAG->getTargetConstant(ARMVCC::None, SDLoc(N), MVT::i32),
1647                    CurDAG->getRegister(0, MVT::i32), Chain};
1648   SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), LD->getValueType(0),
1649                                        MVT::i32, MVT::Other, Ops);
1650   transferMemOperands(N, New);
1651   ReplaceUses(SDValue(N, 0), SDValue(New, 1));
1652   ReplaceUses(SDValue(N, 1), SDValue(New, 0));
1653   ReplaceUses(SDValue(N, 2), SDValue(New, 2));
1654   CurDAG->RemoveDeadNode(N);
1655   return true;
1656 }
1657
1658 /// Form a GPRPair pseudo register from a pair of GPR regs.
1659 SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) {
1660   SDLoc dl(V0.getNode());
1661   SDValue RegClass =
1662     CurDAG->getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
1663   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
1664   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
1665   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1666   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1667 }
1668
1669 /// Form a D register from a pair of S registers.
1670 SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1671   SDLoc dl(V0.getNode());
1672   SDValue RegClass =
1673     CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, dl, MVT::i32);
1674   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1675   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1676   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1677   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1678 }
1679
1680 /// Form a quad register from a pair of D registers.
1681 SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1682   SDLoc dl(V0.getNode());
1683   SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, dl,
1684                                                MVT::i32);
1685   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1686   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1687   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1688   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1689 }
1690
1691 /// Form 4 consecutive D registers from a pair of Q registers.
1692 SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1693   SDLoc dl(V0.getNode());
1694   SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1695                                                MVT::i32);
1696   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1697   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1698   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1699   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1700 }
1701
1702 /// Form 4 consecutive S registers.
1703 SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1,
1704                                    SDValue V2, SDValue V3) {
1705   SDLoc dl(V0.getNode());
1706   SDValue RegClass =
1707     CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, dl, MVT::i32);
1708   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1709   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1710   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, dl, MVT::i32);
1711   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, dl, MVT::i32);
1712   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1713                                     V2, SubReg2, V3, SubReg3 };
1714   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1715 }
1716
1717 /// Form 4 consecutive D registers.
1718 SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1,
1719                                    SDValue V2, SDValue V3) {
1720   SDLoc dl(V0.getNode());
1721   SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1722                                                MVT::i32);
1723   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1724   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1725   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, dl, MVT::i32);
1726   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, dl, MVT::i32);
1727   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1728                                     V2, SubReg2, V3, SubReg3 };
1729   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1730 }
1731
1732 /// Form 4 consecutive Q registers.
1733 SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1,
1734                                    SDValue V2, SDValue V3) {
1735   SDLoc dl(V0.getNode());
1736   SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, dl,
1737                                                MVT::i32);
1738   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1739   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1740   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, dl, MVT::i32);
1741   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, dl, MVT::i32);
1742   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1743                                     V2, SubReg2, V3, SubReg3 };
1744   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1745 }
1746
1747 /// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
1748 /// of a NEON VLD or VST instruction.  The supported values depend on the
1749 /// number of registers being loaded.
1750 SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, const SDLoc &dl,
1751                                        unsigned NumVecs, bool is64BitVector) {
1752   unsigned NumRegs = NumVecs;
1753   if (!is64BitVector && NumVecs < 3)
1754     NumRegs *= 2;
1755
1756   unsigned Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
1757   if (Alignment >= 32 && NumRegs == 4)
1758     Alignment = 32;
1759   else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4))
1760     Alignment = 16;
1761   else if (Alignment >= 8)
1762     Alignment = 8;
1763   else
1764     Alignment = 0;
1765
1766   return CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
1767 }
1768
1769 static bool isVLDfixed(unsigned Opc)
1770 {
1771   switch (Opc) {
1772   default: return false;
1773   case ARM::VLD1d8wb_fixed : return true;
1774   case ARM::VLD1d16wb_fixed : return true;
1775   case ARM::VLD1d64Qwb_fixed : return true;
1776   case ARM::VLD1d32wb_fixed : return true;
1777   case ARM::VLD1d64wb_fixed : return true;
1778   case ARM::VLD1d64TPseudoWB_fixed : return true;
1779   case ARM::VLD1d64QPseudoWB_fixed : return true;
1780   case ARM::VLD1q8wb_fixed : return true;
1781   case ARM::VLD1q16wb_fixed : return true;
1782   case ARM::VLD1q32wb_fixed : return true;
1783   case ARM::VLD1q64wb_fixed : return true;
1784   case ARM::VLD1DUPd8wb_fixed : return true;
1785   case ARM::VLD1DUPd16wb_fixed : return true;
1786   case ARM::VLD1DUPd32wb_fixed : return true;
1787   case ARM::VLD1DUPq8wb_fixed : return true;
1788   case ARM::VLD1DUPq16wb_fixed : return true;
1789   case ARM::VLD1DUPq32wb_fixed : return true;
1790   case ARM::VLD2d8wb_fixed : return true;
1791   case ARM::VLD2d16wb_fixed : return true;
1792   case ARM::VLD2d32wb_fixed : return true;
1793   case ARM::VLD2q8PseudoWB_fixed : return true;
1794   case ARM::VLD2q16PseudoWB_fixed : return true;
1795   case ARM::VLD2q32PseudoWB_fixed : return true;
1796   case ARM::VLD2DUPd8wb_fixed : return true;
1797   case ARM::VLD2DUPd16wb_fixed : return true;
1798   case ARM::VLD2DUPd32wb_fixed : return true;
1799   }
1800 }
1801
1802 static bool isVSTfixed(unsigned Opc)
1803 {
1804   switch (Opc) {
1805   default: return false;
1806   case ARM::VST1d8wb_fixed : return true;
1807   case ARM::VST1d16wb_fixed : return true;
1808   case ARM::VST1d32wb_fixed : return true;
1809   case ARM::VST1d64wb_fixed : return true;
1810   case ARM::VST1q8wb_fixed : return true;
1811   case ARM::VST1q16wb_fixed : return true;
1812   case ARM::VST1q32wb_fixed : return true;
1813   case ARM::VST1q64wb_fixed : return true;
1814   case ARM::VST1d64TPseudoWB_fixed : return true;
1815   case ARM::VST1d64QPseudoWB_fixed : return true;
1816   case ARM::VST2d8wb_fixed : return true;
1817   case ARM::VST2d16wb_fixed : return true;
1818   case ARM::VST2d32wb_fixed : return true;
1819   case ARM::VST2q8PseudoWB_fixed : return true;
1820   case ARM::VST2q16PseudoWB_fixed : return true;
1821   case ARM::VST2q32PseudoWB_fixed : return true;
1822   }
1823 }
1824
1825 // Get the register stride update opcode of a VLD/VST instruction that
1826 // is otherwise equivalent to the given fixed stride updating instruction.
1827 static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
1828   assert((isVLDfixed(Opc) || isVSTfixed(Opc))
1829     && "Incorrect fixed stride updating instruction.");
1830   switch (Opc) {
1831   default: break;
1832   case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register;
1833   case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register;
1834   case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register;
1835   case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register;
1836   case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register;
1837   case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register;
1838   case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register;
1839   case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register;
1840   case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register;
1841   case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register;
1842   case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register;
1843   case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register;
1844   case ARM::VLD1DUPd8wb_fixed : return ARM::VLD1DUPd8wb_register;
1845   case ARM::VLD1DUPd16wb_fixed : return ARM::VLD1DUPd16wb_register;
1846   case ARM::VLD1DUPd32wb_fixed : return ARM::VLD1DUPd32wb_register;
1847   case ARM::VLD1DUPq8wb_fixed : return ARM::VLD1DUPq8wb_register;
1848   case ARM::VLD1DUPq16wb_fixed : return ARM::VLD1DUPq16wb_register;
1849   case ARM::VLD1DUPq32wb_fixed : return ARM::VLD1DUPq32wb_register;
1850
1851   case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register;
1852   case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register;
1853   case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register;
1854   case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register;
1855   case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register;
1856   case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register;
1857   case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register;
1858   case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register;
1859   case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register;
1860   case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register;
1861
1862   case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register;
1863   case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register;
1864   case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register;
1865   case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register;
1866   case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register;
1867   case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register;
1868
1869   case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register;
1870   case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register;
1871   case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register;
1872   case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register;
1873   case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register;
1874   case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register;
1875
1876   case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register;
1877   case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register;
1878   case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register;
1879   }
1880   return Opc; // If not one we handle, return it unchanged.
1881 }
1882
1883 /// Returns true if the given increment is a Constant known to be equal to the
1884 /// access size performed by a NEON load/store. This means the "[rN]!" form can
1885 /// be used.
1886 static bool isPerfectIncrement(SDValue Inc, EVT VecTy, unsigned NumVecs) {
1887   auto C = dyn_cast<ConstantSDNode>(Inc);
1888   return C && C->getZExtValue() == VecTy.getSizeInBits() / 8 * NumVecs;
1889 }
1890
1891 void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
1892                                 const uint16_t *DOpcodes,
1893                                 const uint16_t *QOpcodes0,
1894                                 const uint16_t *QOpcodes1) {
1895   assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
1896   SDLoc dl(N);
1897
1898   SDValue MemAddr, Align;
1899   bool IsIntrinsic = !isUpdating;  // By coincidence, all supported updating
1900                                    // nodes are not intrinsics.
1901   unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
1902   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
1903     return;
1904
1905   SDValue Chain = N->getOperand(0);
1906   EVT VT = N->getValueType(0);
1907   bool is64BitVector = VT.is64BitVector();
1908   Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
1909
1910   unsigned OpcodeIndex;
1911   switch (VT.getSimpleVT().SimpleTy) {
1912   default: llvm_unreachable("unhandled vld type");
1913     // Double-register operations:
1914   case MVT::v8i8:  OpcodeIndex = 0; break;
1915   case MVT::v4f16:
1916   case MVT::v4i16: OpcodeIndex = 1; break;
1917   case MVT::v2f32:
1918   case MVT::v2i32: OpcodeIndex = 2; break;
1919   case MVT::v1i64: OpcodeIndex = 3; break;
1920     // Quad-register operations:
1921   case MVT::v16i8: OpcodeIndex = 0; break;
1922   case MVT::v8f16:
1923   case MVT::v8i16: OpcodeIndex = 1; break;
1924   case MVT::v4f32:
1925   case MVT::v4i32: OpcodeIndex = 2; break;
1926   case MVT::v2f64:
1927   case MVT::v2i64: OpcodeIndex = 3; break;
1928   }
1929
1930   EVT ResTy;
1931   if (NumVecs == 1)
1932     ResTy = VT;
1933   else {
1934     unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
1935     if (!is64BitVector)
1936       ResTyElts *= 2;
1937     ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
1938   }
1939   std::vector<EVT> ResTys;
1940   ResTys.push_back(ResTy);
1941   if (isUpdating)
1942     ResTys.push_back(MVT::i32);
1943   ResTys.push_back(MVT::Other);
1944
1945   SDValue Pred = getAL(CurDAG, dl);
1946   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
1947   SDNode *VLd;
1948   SmallVector<SDValue, 7> Ops;
1949
1950   // Double registers and VLD1/VLD2 quad registers are directly supported.
1951   if (is64BitVector || NumVecs <= 2) {
1952     unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
1953                     QOpcodes0[OpcodeIndex]);
1954     Ops.push_back(MemAddr);
1955     Ops.push_back(Align);
1956     if (isUpdating) {
1957       SDValue Inc = N->getOperand(AddrOpIdx + 1);
1958       bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
1959       if (!IsImmUpdate) {
1960         // We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
1961         // check for the opcode rather than the number of vector elements.
1962         if (isVLDfixed(Opc))
1963           Opc = getVLDSTRegisterUpdateOpcode(Opc);
1964         Ops.push_back(Inc);
1965       // VLD1/VLD2 fixed increment does not need Reg0 so only include it in
1966       // the operands if not such an opcode.
1967       } else if (!isVLDfixed(Opc))
1968         Ops.push_back(Reg0);
1969     }
1970     Ops.push_back(Pred);
1971     Ops.push_back(Reg0);
1972     Ops.push_back(Chain);
1973     VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1974
1975   } else {
1976     // Otherwise, quad registers are loaded with two separate instructions,
1977     // where one loads the even registers and the other loads the odd registers.
1978     EVT AddrTy = MemAddr.getValueType();
1979
1980     // Load the even subregs.  This is always an updating load, so that it
1981     // provides the address to the second load for the odd subregs.
1982     SDValue ImplDef =
1983       SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
1984     const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
1985     SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
1986                                           ResTy, AddrTy, MVT::Other, OpsA);
1987     Chain = SDValue(VLdA, 2);
1988
1989     // Load the odd subregs.
1990     Ops.push_back(SDValue(VLdA, 1));
1991     Ops.push_back(Align);
1992     if (isUpdating) {
1993       SDValue Inc = N->getOperand(AddrOpIdx + 1);
1994       assert(isa<ConstantSDNode>(Inc.getNode()) &&
1995              "only constant post-increment update allowed for VLD3/4");
1996       (void)Inc;
1997       Ops.push_back(Reg0);
1998     }
1999     Ops.push_back(SDValue(VLdA, 0));
2000     Ops.push_back(Pred);
2001     Ops.push_back(Reg0);
2002     Ops.push_back(Chain);
2003     VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops);
2004   }
2005
2006   // Transfer memoperands.
2007   MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2008   CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLd), {MemOp});
2009
2010   if (NumVecs == 1) {
2011     ReplaceNode(N, VLd);
2012     return;
2013   }
2014
2015   // Extract out the subregisters.
2016   SDValue SuperReg = SDValue(VLd, 0);
2017   static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
2018                     ARM::qsub_3 == ARM::qsub_0 + 3,
2019                 "Unexpected subreg numbering");
2020   unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0);
2021   for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2022     ReplaceUses(SDValue(N, Vec),
2023                 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
2024   ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
2025   if (isUpdating)
2026     ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2));
2027   CurDAG->RemoveDeadNode(N);
2028 }
2029
2030 void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
2031                                 const uint16_t *DOpcodes,
2032                                 const uint16_t *QOpcodes0,
2033                                 const uint16_t *QOpcodes1) {
2034   assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
2035   SDLoc dl(N);
2036
2037   SDValue MemAddr, Align;
2038   bool IsIntrinsic = !isUpdating;  // By coincidence, all supported updating
2039                                    // nodes are not intrinsics.
2040   unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2041   unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2042   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2043     return;
2044
2045   MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2046
2047   SDValue Chain = N->getOperand(0);
2048   EVT VT = N->getOperand(Vec0Idx).getValueType();
2049   bool is64BitVector = VT.is64BitVector();
2050   Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
2051
2052   unsigned OpcodeIndex;
2053   switch (VT.getSimpleVT().SimpleTy) {
2054   default: llvm_unreachable("unhandled vst type");
2055     // Double-register operations:
2056   case MVT::v8i8:  OpcodeIndex = 0; break;
2057   case MVT::v4f16:
2058   case MVT::v4i16: OpcodeIndex = 1; break;
2059   case MVT::v2f32:
2060   case MVT::v2i32: OpcodeIndex = 2; break;
2061   case MVT::v1i64: OpcodeIndex = 3; break;
2062     // Quad-register operations:
2063   case MVT::v16i8: OpcodeIndex = 0; break;
2064   case MVT::v8f16:
2065   case MVT::v8i16: OpcodeIndex = 1; break;
2066   case MVT::v4f32:
2067   case MVT::v4i32: OpcodeIndex = 2; break;
2068   case MVT::v2f64:
2069   case MVT::v2i64: OpcodeIndex = 3; break;
2070   }
2071
2072   std::vector<EVT> ResTys;
2073   if (isUpdating)
2074     ResTys.push_back(MVT::i32);
2075   ResTys.push_back(MVT::Other);
2076
2077   SDValue Pred = getAL(CurDAG, dl);
2078   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2079   SmallVector<SDValue, 7> Ops;
2080
2081   // Double registers and VST1/VST2 quad registers are directly supported.
2082   if (is64BitVector || NumVecs <= 2) {
2083     SDValue SrcReg;
2084     if (NumVecs == 1) {
2085       SrcReg = N->getOperand(Vec0Idx);
2086     } else if (is64BitVector) {
2087       // Form a REG_SEQUENCE to force register allocation.
2088       SDValue V0 = N->getOperand(Vec0Idx + 0);
2089       SDValue V1 = N->getOperand(Vec0Idx + 1);
2090       if (NumVecs == 2)
2091         SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2092       else {
2093         SDValue V2 = N->getOperand(Vec0Idx + 2);
2094         // If it's a vst3, form a quad D-register and leave the last part as
2095         // an undef.
2096         SDValue V3 = (NumVecs == 3)
2097           ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
2098           : N->getOperand(Vec0Idx + 3);
2099         SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2100       }
2101     } else {
2102       // Form a QQ register.
2103       SDValue Q0 = N->getOperand(Vec0Idx);
2104       SDValue Q1 = N->getOperand(Vec0Idx + 1);
2105       SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0);
2106     }
2107
2108     unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2109                     QOpcodes0[OpcodeIndex]);
2110     Ops.push_back(MemAddr);
2111     Ops.push_back(Align);
2112     if (isUpdating) {
2113       SDValue Inc = N->getOperand(AddrOpIdx + 1);
2114       bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
2115       if (!IsImmUpdate) {
2116         // We use a VST1 for v1i64 even if the pseudo says VST2/3/4, so
2117         // check for the opcode rather than the number of vector elements.
2118         if (isVSTfixed(Opc))
2119           Opc = getVLDSTRegisterUpdateOpcode(Opc);
2120         Ops.push_back(Inc);
2121       }
2122       // VST1/VST2 fixed increment does not need Reg0 so only include it in
2123       // the operands if not such an opcode.
2124       else if (!isVSTfixed(Opc))
2125         Ops.push_back(Reg0);
2126     }
2127     Ops.push_back(SrcReg);
2128     Ops.push_back(Pred);
2129     Ops.push_back(Reg0);
2130     Ops.push_back(Chain);
2131     SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2132
2133     // Transfer memoperands.
2134     CurDAG->setNodeMemRefs(cast<MachineSDNode>(VSt), {MemOp});
2135
2136     ReplaceNode(N, VSt);
2137     return;
2138   }
2139
2140   // Otherwise, quad registers are stored with two separate instructions,
2141   // where one stores the even registers and the other stores the odd registers.
2142
2143   // Form the QQQQ REG_SEQUENCE.
2144   SDValue V0 = N->getOperand(Vec0Idx + 0);
2145   SDValue V1 = N->getOperand(Vec0Idx + 1);
2146   SDValue V2 = N->getOperand(Vec0Idx + 2);
2147   SDValue V3 = (NumVecs == 3)
2148     ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2149     : N->getOperand(Vec0Idx + 3);
2150   SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2151
2152   // Store the even D registers.  This is always an updating store, so that it
2153   // provides the address to the second store for the odd subregs.
2154   const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain };
2155   SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
2156                                         MemAddr.getValueType(),
2157                                         MVT::Other, OpsA);
2158   CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStA), {MemOp});
2159   Chain = SDValue(VStA, 1);
2160
2161   // Store the odd D registers.
2162   Ops.push_back(SDValue(VStA, 0));
2163   Ops.push_back(Align);
2164   if (isUpdating) {
2165     SDValue Inc = N->getOperand(AddrOpIdx + 1);
2166     assert(isa<ConstantSDNode>(Inc.getNode()) &&
2167            "only constant post-increment update allowed for VST3/4");
2168     (void)Inc;
2169     Ops.push_back(Reg0);
2170   }
2171   Ops.push_back(RegSeq);
2172   Ops.push_back(Pred);
2173   Ops.push_back(Reg0);
2174   Ops.push_back(Chain);
2175   SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
2176                                         Ops);
2177   CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStB), {MemOp});
2178   ReplaceNode(N, VStB);
2179 }
2180
2181 void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
2182                                       unsigned NumVecs,
2183                                       const uint16_t *DOpcodes,
2184                                       const uint16_t *QOpcodes) {
2185   assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
2186   SDLoc dl(N);
2187
2188   SDValue MemAddr, Align;
2189   bool IsIntrinsic = !isUpdating;  // By coincidence, all supported updating
2190                                    // nodes are not intrinsics.
2191   unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2192   unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2193   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2194     return;
2195
2196   MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2197
2198   SDValue Chain = N->getOperand(0);
2199   unsigned Lane =
2200     cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue();
2201   EVT VT = N->getOperand(Vec0Idx).getValueType();
2202   bool is64BitVector = VT.is64BitVector();
2203
2204   unsigned Alignment = 0;
2205   if (NumVecs != 3) {
2206     Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
2207     unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2208     if (Alignment > NumBytes)
2209       Alignment = NumBytes;
2210     if (Alignment < 8 && Alignment < NumBytes)
2211       Alignment = 0;
2212     // Alignment must be a power of two; make sure of that.
2213     Alignment = (Alignment & -Alignment);
2214     if (Alignment == 1)
2215       Alignment = 0;
2216   }
2217   Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2218
2219   unsigned OpcodeIndex;
2220   switch (VT.getSimpleVT().SimpleTy) {
2221   default: llvm_unreachable("unhandled vld/vst lane type");
2222     // Double-register operations:
2223   case MVT::v8i8:  OpcodeIndex = 0; break;
2224   case MVT::v4f16:
2225   case MVT::v4i16: OpcodeIndex = 1; break;
2226   case MVT::v2f32:
2227   case MVT::v2i32: OpcodeIndex = 2; break;
2228     // Quad-register operations:
2229   case MVT::v8f16:
2230   case MVT::v8i16: OpcodeIndex = 0; break;
2231   case MVT::v4f32:
2232   case MVT::v4i32: OpcodeIndex = 1; break;
2233   }
2234
2235   std::vector<EVT> ResTys;
2236   if (IsLoad) {
2237     unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2238     if (!is64BitVector)
2239       ResTyElts *= 2;
2240     ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(),
2241                                       MVT::i64, ResTyElts));
2242   }
2243   if (isUpdating)
2244     ResTys.push_back(MVT::i32);
2245   ResTys.push_back(MVT::Other);
2246
2247   SDValue Pred = getAL(CurDAG, dl);
2248   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2249
2250   SmallVector<SDValue, 8> Ops;
2251   Ops.push_back(MemAddr);
2252   Ops.push_back(Align);
2253   if (isUpdating) {
2254     SDValue Inc = N->getOperand(AddrOpIdx + 1);
2255     bool IsImmUpdate =
2256         isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
2257     Ops.push_back(IsImmUpdate ? Reg0 : Inc);
2258   }
2259
2260   SDValue SuperReg;
2261   SDValue V0 = N->getOperand(Vec0Idx + 0);
2262   SDValue V1 = N->getOperand(Vec0Idx + 1);
2263   if (NumVecs == 2) {
2264     if (is64BitVector)
2265       SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2266     else
2267       SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0);
2268   } else {
2269     SDValue V2 = N->getOperand(Vec0Idx + 2);
2270     SDValue V3 = (NumVecs == 3)
2271       ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2272       : N->getOperand(Vec0Idx + 3);
2273     if (is64BitVector)
2274       SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2275     else
2276       SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2277   }
2278   Ops.push_back(SuperReg);
2279   Ops.push_back(getI32Imm(Lane, dl));
2280   Ops.push_back(Pred);
2281   Ops.push_back(Reg0);
2282   Ops.push_back(Chain);
2283
2284   unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2285                                   QOpcodes[OpcodeIndex]);
2286   SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2287   CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdLn), {MemOp});
2288   if (!IsLoad) {
2289     ReplaceNode(N, VLdLn);
2290     return;
2291   }
2292
2293   // Extract the subregisters.
2294   SuperReg = SDValue(VLdLn, 0);
2295   static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
2296                     ARM::qsub_3 == ARM::qsub_0 + 3,
2297                 "Unexpected subreg numbering");
2298   unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
2299   for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2300     ReplaceUses(SDValue(N, Vec),
2301                 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
2302   ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1));
2303   if (isUpdating)
2304     ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2));
2305   CurDAG->RemoveDeadNode(N);
2306 }
2307
2308 void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool IsIntrinsic,
2309                                    bool isUpdating, unsigned NumVecs,
2310                                    const uint16_t *DOpcodes,
2311                                    const uint16_t *QOpcodes0,
2312                                    const uint16_t *QOpcodes1) {
2313   assert(NumVecs >= 1 && NumVecs <= 4 && "VLDDup NumVecs out-of-range");
2314   SDLoc dl(N);
2315
2316   SDValue MemAddr, Align;
2317   unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2318   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2319     return;
2320
2321   SDValue Chain = N->getOperand(0);
2322   EVT VT = N->getValueType(0);
2323   bool is64BitVector = VT.is64BitVector();
2324
2325   unsigned Alignment = 0;
2326   if (NumVecs != 3) {
2327     Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
2328     unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2329     if (Alignment > NumBytes)
2330       Alignment = NumBytes;
2331     if (Alignment < 8 && Alignment < NumBytes)
2332       Alignment = 0;
2333     // Alignment must be a power of two; make sure of that.
2334     Alignment = (Alignment & -Alignment);
2335     if (Alignment == 1)
2336       Alignment = 0;
2337   }
2338   Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2339
2340   unsigned OpcodeIndex;
2341   switch (VT.getSimpleVT().SimpleTy) {
2342   default: llvm_unreachable("unhandled vld-dup type");
2343   case MVT::v8i8:
2344   case MVT::v16i8: OpcodeIndex = 0; break;
2345   case MVT::v4i16:
2346   case MVT::v8i16:
2347   case MVT::v4f16:
2348   case MVT::v8f16:
2349                   OpcodeIndex = 1; break;
2350   case MVT::v2f32:
2351   case MVT::v2i32:
2352   case MVT::v4f32:
2353   case MVT::v4i32: OpcodeIndex = 2; break;
2354   case MVT::v1f64:
2355   case MVT::v1i64: OpcodeIndex = 3; break;
2356   }
2357
2358   unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2359   if (!is64BitVector)
2360     ResTyElts *= 2;
2361   EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
2362
2363   std::vector<EVT> ResTys;
2364   ResTys.push_back(ResTy);
2365   if (isUpdating)
2366     ResTys.push_back(MVT::i32);
2367   ResTys.push_back(MVT::Other);
2368
2369   SDValue Pred = getAL(CurDAG, dl);
2370   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2371
2372   SDNode *VLdDup;
2373   if (is64BitVector || NumVecs == 1) {
2374     SmallVector<SDValue, 6> Ops;
2375     Ops.push_back(MemAddr);
2376     Ops.push_back(Align);
2377     unsigned Opc = is64BitVector ? DOpcodes[OpcodeIndex] :
2378                                    QOpcodes0[OpcodeIndex];
2379     if (isUpdating) {
2380       // fixed-stride update instructions don't have an explicit writeback
2381       // operand. It's implicit in the opcode itself.
2382       SDValue Inc = N->getOperand(2);
2383       bool IsImmUpdate =
2384           isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
2385       if (NumVecs <= 2 && !IsImmUpdate)
2386         Opc = getVLDSTRegisterUpdateOpcode(Opc);
2387       if (!IsImmUpdate)
2388         Ops.push_back(Inc);
2389       // FIXME: VLD3 and VLD4 haven't been updated to that form yet.
2390       else if (NumVecs > 2)
2391         Ops.push_back(Reg0);
2392     }
2393     Ops.push_back(Pred);
2394     Ops.push_back(Reg0);
2395     Ops.push_back(Chain);
2396     VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2397   } else if (NumVecs == 2) {
2398     const SDValue OpsA[] = { MemAddr, Align, Pred, Reg0, Chain };
2399     SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex],
2400                                           dl, ResTys, OpsA);
2401
2402     Chain = SDValue(VLdA, 1);
2403     const SDValue OpsB[] = { MemAddr, Align, Pred, Reg0, Chain };
2404     VLdDup = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, OpsB);
2405   } else {
2406     SDValue ImplDef =
2407       SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
2408     const SDValue OpsA[] = { MemAddr, Align, ImplDef, Pred, Reg0, Chain };
2409     SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex],
2410                                           dl, ResTys, OpsA);
2411
2412     SDValue SuperReg = SDValue(VLdA, 0);
2413     Chain = SDValue(VLdA, 1);
2414     const SDValue OpsB[] = { MemAddr, Align, SuperReg, Pred, Reg0, Chain };
2415     VLdDup = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, OpsB);
2416   }
2417
2418   // Transfer memoperands.
2419   MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2420   CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdDup), {MemOp});
2421
2422   // Extract the subregisters.
2423   if (NumVecs == 1) {
2424     ReplaceUses(SDValue(N, 0), SDValue(VLdDup, 0));
2425   } else {
2426     SDValue SuperReg = SDValue(VLdDup, 0);
2427     static_assert(ARM::dsub_7 == ARM::dsub_0 + 7, "Unexpected subreg numbering");
2428     unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
2429     for (unsigned Vec = 0; Vec != NumVecs; ++Vec) {
2430       ReplaceUses(SDValue(N, Vec),
2431                   CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg));
2432     }
2433   }
2434   ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1));
2435   if (isUpdating)
2436     ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2));
2437   CurDAG->RemoveDeadNode(N);
2438 }
2439
2440 bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) {
2441   if (!Subtarget->hasV6T2Ops())
2442     return false;
2443
2444   unsigned Opc = isSigned
2445     ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)
2446     : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX);
2447   SDLoc dl(N);
2448
2449   // For unsigned extracts, check for a shift right and mask
2450   unsigned And_imm = 0;
2451   if (N->getOpcode() == ISD::AND) {
2452     if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) {
2453
2454       // The immediate is a mask of the low bits iff imm & (imm+1) == 0
2455       if (And_imm & (And_imm + 1))
2456         return false;
2457
2458       unsigned Srl_imm = 0;
2459       if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL,
2460                                 Srl_imm)) {
2461         assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2462
2463         // Mask off the unnecessary bits of the AND immediate; normally
2464         // DAGCombine will do this, but that might not happen if
2465         // targetShrinkDemandedConstant chooses a different immediate.
2466         And_imm &= -1U >> Srl_imm;
2467
2468         // Note: The width operand is encoded as width-1.
2469         unsigned Width = countTrailingOnes(And_imm) - 1;
2470         unsigned LSB = Srl_imm;
2471
2472         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2473
2474         if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) {
2475           // It's cheaper to use a right shift to extract the top bits.
2476           if (Subtarget->isThumb()) {
2477             Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri;
2478             SDValue Ops[] = { N->getOperand(0).getOperand(0),
2479                               CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2480                               getAL(CurDAG, dl), Reg0, Reg0 };
2481             CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2482             return true;
2483           }
2484
2485           // ARM models shift instructions as MOVsi with shifter operand.
2486           ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(ISD::SRL);
2487           SDValue ShOpc =
2488             CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB), dl,
2489                                       MVT::i32);
2490           SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc,
2491                             getAL(CurDAG, dl), Reg0, Reg0 };
2492           CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops);
2493           return true;
2494         }
2495
2496         assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
2497         SDValue Ops[] = { N->getOperand(0).getOperand(0),
2498                           CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2499                           CurDAG->getTargetConstant(Width, dl, MVT::i32),
2500                           getAL(CurDAG, dl), Reg0 };
2501         CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2502         return true;
2503       }
2504     }
2505     return false;
2506   }
2507
2508   // Otherwise, we're looking for a shift of a shift
2509   unsigned Shl_imm = 0;
2510   if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) {
2511     assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!");
2512     unsigned Srl_imm = 0;
2513     if (isInt32Immediate(N->getOperand(1), Srl_imm)) {
2514       assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2515       // Note: The width operand is encoded as width-1.
2516       unsigned Width = 32 - Srl_imm - 1;
2517       int LSB = Srl_imm - Shl_imm;
2518       if (LSB < 0)
2519         return false;
2520       SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2521       assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
2522       SDValue Ops[] = { N->getOperand(0).getOperand(0),
2523                         CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2524                         CurDAG->getTargetConstant(Width, dl, MVT::i32),
2525                         getAL(CurDAG, dl), Reg0 };
2526       CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2527       return true;
2528     }
2529   }
2530
2531   // Or we are looking for a shift of an and, with a mask operand
2532   if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_imm) &&
2533       isShiftedMask_32(And_imm)) {
2534     unsigned Srl_imm = 0;
2535     unsigned LSB = countTrailingZeros(And_imm);
2536     // Shift must be the same as the ands lsb
2537     if (isInt32Immediate(N->getOperand(1), Srl_imm) && Srl_imm == LSB) {
2538       assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2539       unsigned MSB = 31 - countLeadingZeros(And_imm);
2540       // Note: The width operand is encoded as width-1.
2541       unsigned Width = MSB - LSB;
2542       SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2543       assert(Srl_imm + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
2544       SDValue Ops[] = { N->getOperand(0).getOperand(0),
2545                         CurDAG->getTargetConstant(Srl_imm, dl, MVT::i32),
2546                         CurDAG->getTargetConstant(Width, dl, MVT::i32),
2547                         getAL(CurDAG, dl), Reg0 };
2548       CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2549       return true;
2550     }
2551   }
2552
2553   if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) {
2554     unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2555     unsigned LSB = 0;
2556     if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, LSB) &&
2557         !isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRA, LSB))
2558       return false;
2559
2560     if (LSB + Width > 32)
2561       return false;
2562
2563     SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2564     assert(LSB + Width <= 32 && "Shouldn't create an invalid ubfx");
2565     SDValue Ops[] = { N->getOperand(0).getOperand(0),
2566                       CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2567                       CurDAG->getTargetConstant(Width - 1, dl, MVT::i32),
2568                       getAL(CurDAG, dl), Reg0 };
2569     CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2570     return true;
2571   }
2572
2573   return false;
2574 }
2575
2576 /// Target-specific DAG combining for ISD::XOR.
2577 /// Target-independent combining lowers SELECT_CC nodes of the form
2578 /// select_cc setg[ge] X,  0,  X, -X
2579 /// select_cc setgt    X, -1,  X, -X
2580 /// select_cc setl[te] X,  0, -X,  X
2581 /// select_cc setlt    X,  1, -X,  X
2582 /// which represent Integer ABS into:
2583 /// Y = sra (X, size(X)-1); xor (add (X, Y), Y)
2584 /// ARM instruction selection detects the latter and matches it to
2585 /// ARM::ABS or ARM::t2ABS machine node.
2586 bool ARMDAGToDAGISel::tryABSOp(SDNode *N){
2587   SDValue XORSrc0 = N->getOperand(0);
2588   SDValue XORSrc1 = N->getOperand(1);
2589   EVT VT = N->getValueType(0);
2590
2591   if (Subtarget->isThumb1Only())
2592     return false;
2593
2594   if (XORSrc0.getOpcode() != ISD::ADD || XORSrc1.getOpcode() != ISD::SRA)
2595     return false;
2596
2597   SDValue ADDSrc0 = XORSrc0.getOperand(0);
2598   SDValue ADDSrc1 = XORSrc0.getOperand(1);
2599   SDValue SRASrc0 = XORSrc1.getOperand(0);
2600   SDValue SRASrc1 = XORSrc1.getOperand(1);
2601   ConstantSDNode *SRAConstant =  dyn_cast<ConstantSDNode>(SRASrc1);
2602   EVT XType = SRASrc0.getValueType();
2603   unsigned Size = XType.getSizeInBits() - 1;
2604
2605   if (ADDSrc1 == XORSrc1 && ADDSrc0 == SRASrc0 &&
2606       XType.isInteger() && SRAConstant != nullptr &&
2607       Size == SRAConstant->getZExtValue()) {
2608     unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS;
2609     CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0);
2610     return true;
2611   }
2612
2613   return false;
2614 }
2615
2616 /// We've got special pseudo-instructions for these
2617 void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
2618   unsigned Opcode;
2619   EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
2620   if (MemTy == MVT::i8)
2621     Opcode = ARM::CMP_SWAP_8;
2622   else if (MemTy == MVT::i16)
2623     Opcode = ARM::CMP_SWAP_16;
2624   else if (MemTy == MVT::i32)
2625     Opcode = ARM::CMP_SWAP_32;
2626   else
2627     llvm_unreachable("Unknown AtomicCmpSwap type");
2628
2629   SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
2630                    N->getOperand(0)};
2631   SDNode *CmpSwap = CurDAG->getMachineNode(
2632       Opcode, SDLoc(N),
2633       CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other), Ops);
2634
2635   MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
2636   CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
2637
2638   ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
2639   ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
2640   CurDAG->RemoveDeadNode(N);
2641 }
2642
2643 static Optional<std::pair<unsigned, unsigned>>
2644 getContiguousRangeOfSetBits(const APInt &A) {
2645   unsigned FirstOne = A.getBitWidth() - A.countLeadingZeros() - 1;
2646   unsigned LastOne = A.countTrailingZeros();
2647   if (A.countPopulation() != (FirstOne - LastOne + 1))
2648     return Optional<std::pair<unsigned,unsigned>>();
2649   return std::make_pair(FirstOne, LastOne);
2650 }
2651
2652 void ARMDAGToDAGISel::SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI) {
2653   assert(N->getOpcode() == ARMISD::CMPZ);
2654   SwitchEQNEToPLMI = false;
2655
2656   if (!Subtarget->isThumb())
2657     // FIXME: Work out whether it is profitable to do this in A32 mode - LSL and
2658     // LSR don't exist as standalone instructions - they need the barrel shifter.
2659     return;
2660
2661   // select (cmpz (and X, C), #0) -> (LSLS X) or (LSRS X) or (LSRS (LSLS X))
2662   SDValue And = N->getOperand(0);
2663   if (!And->hasOneUse())
2664     return;
2665
2666   SDValue Zero = N->getOperand(1);
2667   if (!isa<ConstantSDNode>(Zero) || !cast<ConstantSDNode>(Zero)->isNullValue() ||
2668       And->getOpcode() != ISD::AND)
2669     return;
2670   SDValue X = And.getOperand(0);
2671   auto C = dyn_cast<ConstantSDNode>(And.getOperand(1));
2672
2673   if (!C)
2674     return;
2675   auto Range = getContiguousRangeOfSetBits(C->getAPIntValue());
2676   if (!Range)
2677     return;
2678
2679   // There are several ways to lower this:
2680   SDNode *NewN;
2681   SDLoc dl(N);
2682
2683   auto EmitShift = [&](unsigned Opc, SDValue Src, unsigned Imm) -> SDNode* {
2684     if (Subtarget->isThumb2()) {
2685       Opc = (Opc == ARM::tLSLri) ? ARM::t2LSLri : ARM::t2LSRri;
2686       SDValue Ops[] = { Src, CurDAG->getTargetConstant(Imm, dl, MVT::i32),
2687                         getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
2688                         CurDAG->getRegister(0, MVT::i32) };
2689       return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
2690     } else {
2691       SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), Src,
2692                        CurDAG->getTargetConstant(Imm, dl, MVT::i32),
2693                        getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
2694       return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
2695     }
2696   };
2697
2698   if (Range->second == 0) {
2699     //  1. Mask includes the LSB -> Simply shift the top N bits off
2700     NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
2701     ReplaceNode(And.getNode(), NewN);
2702   } else if (Range->first == 31) {
2703     //  2. Mask includes the MSB -> Simply shift the bottom N bits off
2704     NewN = EmitShift(ARM::tLSRri, X, Range->second);
2705     ReplaceNode(And.getNode(), NewN);
2706   } else if (Range->first == Range->second) {
2707     //  3. Only one bit is set. We can shift this into the sign bit and use a
2708     //     PL/MI comparison.
2709     NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
2710     ReplaceNode(And.getNode(), NewN);
2711
2712     SwitchEQNEToPLMI = true;
2713   } else if (!Subtarget->hasV6T2Ops()) {
2714     //  4. Do a double shift to clear bottom and top bits, but only in
2715     //     thumb-1 mode as in thumb-2 we can use UBFX.
2716     NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
2717     NewN = EmitShift(ARM::tLSRri, SDValue(NewN, 0),
2718                      Range->second + (31 - Range->first));
2719     ReplaceNode(And.getNode(), NewN);
2720   }
2721
2722 }
2723
2724 void ARMDAGToDAGISel::Select(SDNode *N) {
2725   SDLoc dl(N);
2726
2727   if (N->isMachineOpcode()) {
2728     N->setNodeId(-1);
2729     return;   // Already selected.
2730   }
2731
2732   switch (N->getOpcode()) {
2733   default: break;
2734   case ISD::STORE: {
2735     // For Thumb1, match an sp-relative store in C++. This is a little
2736     // unfortunate, but I don't think I can make the chain check work
2737     // otherwise.  (The chain of the store has to be the same as the chain
2738     // of the CopyFromReg, or else we can't replace the CopyFromReg with
2739     // a direct reference to "SP".)
2740     //
2741     // This is only necessary on Thumb1 because Thumb1 sp-relative stores use
2742     // a different addressing mode from other four-byte stores.
2743     //
2744     // This pattern usually comes up with call arguments.
2745     StoreSDNode *ST = cast<StoreSDNode>(N);
2746     SDValue Ptr = ST->getBasePtr();
2747     if (Subtarget->isThumb1Only() && ST->isUnindexed()) {
2748       int RHSC = 0;
2749       if (Ptr.getOpcode() == ISD::ADD &&
2750           isScaledConstantInRange(Ptr.getOperand(1), /*Scale=*/4, 0, 256, RHSC))
2751         Ptr = Ptr.getOperand(0);
2752
2753       if (Ptr.getOpcode() == ISD::CopyFromReg &&
2754           cast<RegisterSDNode>(Ptr.getOperand(1))->getReg() == ARM::SP &&
2755           Ptr.getOperand(0) == ST->getChain()) {
2756         SDValue Ops[] = {ST->getValue(),
2757                          CurDAG->getRegister(ARM::SP, MVT::i32),
2758                          CurDAG->getTargetConstant(RHSC, dl, MVT::i32),
2759                          getAL(CurDAG, dl),
2760                          CurDAG->getRegister(0, MVT::i32),
2761                          ST->getChain()};
2762         MachineSDNode *ResNode =
2763             CurDAG->getMachineNode(ARM::tSTRspi, dl, MVT::Other, Ops);
2764         MachineMemOperand *MemOp = ST->getMemOperand();
2765         CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp});
2766         ReplaceNode(N, ResNode);
2767         return;
2768       }
2769     }
2770     break;
2771   }
2772   case ISD::WRITE_REGISTER:
2773     if (tryWriteRegister(N))
2774       return;
2775     break;
2776   case ISD::READ_REGISTER:
2777     if (tryReadRegister(N))
2778       return;
2779     break;
2780   case ISD::INLINEASM:
2781   case ISD::INLINEASM_BR:
2782     if (tryInlineAsm(N))
2783       return;
2784     break;
2785   case ISD::XOR:
2786     // Select special operations if XOR node forms integer ABS pattern
2787     if (tryABSOp(N))
2788       return;
2789     // Other cases are autogenerated.
2790     break;
2791   case ISD::Constant: {
2792     unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
2793     // If we can't materialize the constant we need to use a literal pool
2794     if (ConstantMaterializationCost(Val) > 2) {
2795       SDValue CPIdx = CurDAG->getTargetConstantPool(
2796           ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val),
2797           TLI->getPointerTy(CurDAG->getDataLayout()));
2798
2799       SDNode *ResNode;
2800       if (Subtarget->isThumb()) {
2801         SDValue Ops[] = {
2802           CPIdx,
2803           getAL(CurDAG, dl),
2804           CurDAG->getRegister(0, MVT::i32),
2805           CurDAG->getEntryNode()
2806         };
2807         ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other,
2808                                          Ops);
2809       } else {
2810         SDValue Ops[] = {
2811           CPIdx,
2812           CurDAG->getTargetConstant(0, dl, MVT::i32),
2813           getAL(CurDAG, dl),
2814           CurDAG->getRegister(0, MVT::i32),
2815           CurDAG->getEntryNode()
2816         };
2817         ResNode = CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other,
2818                                          Ops);
2819       }
2820       // Annotate the Node with memory operand information so that MachineInstr
2821       // queries work properly. This e.g. gives the register allocation the
2822       // required information for rematerialization.
2823       MachineFunction& MF = CurDAG->getMachineFunction();
2824       MachineMemOperand *MemOp =
2825           MF.getMachineMemOperand(MachinePointerInfo::getConstantPool(MF),
2826                                   MachineMemOperand::MOLoad, 4, 4);
2827
2828       CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp});
2829
2830       ReplaceNode(N, ResNode);
2831       return;
2832     }
2833
2834     // Other cases are autogenerated.
2835     break;
2836   }
2837   case ISD::FrameIndex: {
2838     // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
2839     int FI = cast<FrameIndexSDNode>(N)->getIndex();
2840     SDValue TFI = CurDAG->getTargetFrameIndex(
2841         FI, TLI->getPointerTy(CurDAG->getDataLayout()));
2842     if (Subtarget->isThumb1Only()) {
2843       // Set the alignment of the frame object to 4, to avoid having to generate
2844       // more than one ADD
2845       MachineFrameInfo &MFI = MF->getFrameInfo();
2846       if (MFI.getObjectAlignment(FI) < 4)
2847         MFI.setObjectAlignment(FI, 4);
2848       CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI,
2849                            CurDAG->getTargetConstant(0, dl, MVT::i32));
2850       return;
2851     } else {
2852       unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ?
2853                       ARM::t2ADDri : ARM::ADDri);
2854       SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, dl, MVT::i32),
2855                         getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
2856                         CurDAG->getRegister(0, MVT::i32) };
2857       CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2858       return;
2859     }
2860   }
2861   case ISD::SRL:
2862     if (tryV6T2BitfieldExtractOp(N, false))
2863       return;
2864     break;
2865   case ISD::SIGN_EXTEND_INREG:
2866   case ISD::SRA:
2867     if (tryV6T2BitfieldExtractOp(N, true))
2868       return;
2869     break;
2870   case ISD::MUL:
2871     if (Subtarget->isThumb1Only())
2872       break;
2873     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
2874       unsigned RHSV = C->getZExtValue();
2875       if (!RHSV) break;
2876       if (isPowerOf2_32(RHSV-1)) {  // 2^n+1?
2877         unsigned ShImm = Log2_32(RHSV-1);
2878         if (ShImm >= 32)
2879           break;
2880         SDValue V = N->getOperand(0);
2881         ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
2882         SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
2883         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2884         if (Subtarget->isThumb()) {
2885           SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
2886           CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops);
2887           return;
2888         } else {
2889           SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
2890                             Reg0 };
2891           CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops);
2892           return;
2893         }
2894       }
2895       if (isPowerOf2_32(RHSV+1)) {  // 2^n-1?
2896         unsigned ShImm = Log2_32(RHSV+1);
2897         if (ShImm >= 32)
2898           break;
2899         SDValue V = N->getOperand(0);
2900         ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
2901         SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
2902         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2903         if (Subtarget->isThumb()) {
2904           SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
2905           CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops);
2906           return;
2907         } else {
2908           SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
2909                             Reg0 };
2910           CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops);
2911           return;
2912         }
2913       }
2914     }
2915     break;
2916   case ISD::AND: {
2917     // Check for unsigned bitfield extract
2918     if (tryV6T2BitfieldExtractOp(N, false))
2919       return;
2920
2921     // If an immediate is used in an AND node, it is possible that the immediate
2922     // can be more optimally materialized when negated. If this is the case we
2923     // can negate the immediate and use a BIC instead.
2924     auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
2925     if (N1C && N1C->hasOneUse() && Subtarget->isThumb()) {
2926       uint32_t Imm = (uint32_t) N1C->getZExtValue();
2927
2928       // In Thumb2 mode, an AND can take a 12-bit immediate. If this
2929       // immediate can be negated and fit in the immediate operand of
2930       // a t2BIC, don't do any manual transform here as this can be
2931       // handled by the generic ISel machinery.
2932       bool PreferImmediateEncoding =
2933         Subtarget->hasThumb2() && (is_t2_so_imm(Imm) || is_t2_so_imm_not(Imm));
2934       if (!PreferImmediateEncoding &&
2935           ConstantMaterializationCost(Imm) >
2936               ConstantMaterializationCost(~Imm)) {
2937         // The current immediate costs more to materialize than a negated
2938         // immediate, so negate the immediate and use a BIC.
2939         SDValue NewImm =
2940           CurDAG->getConstant(~N1C->getZExtValue(), dl, MVT::i32);
2941         // If the new constant didn't exist before, reposition it in the topological
2942         // ordering so it is just before N. Otherwise, don't touch its location.
2943         if (NewImm->getNodeId() == -1)
2944           CurDAG->RepositionNode(N->getIterator(), NewImm.getNode());
2945
2946         if (!Subtarget->hasThumb2()) {
2947           SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32),
2948                            N->getOperand(0), NewImm, getAL(CurDAG, dl),
2949                            CurDAG->getRegister(0, MVT::i32)};
2950           ReplaceNode(N, CurDAG->getMachineNode(ARM::tBIC, dl, MVT::i32, Ops));
2951           return;
2952         } else {
2953           SDValue Ops[] = {N->getOperand(0), NewImm, getAL(CurDAG, dl),
2954                            CurDAG->getRegister(0, MVT::i32),
2955                            CurDAG->getRegister(0, MVT::i32)};
2956           ReplaceNode(N,
2957                       CurDAG->getMachineNode(ARM::t2BICrr, dl, MVT::i32, Ops));
2958           return;
2959         }
2960       }
2961     }
2962
2963     // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits
2964     // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits
2965     // are entirely contributed by c2 and lower 16-bits are entirely contributed
2966     // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)).
2967     // Select it to: "movt x, ((c1 & 0xffff) >> 16)
2968     EVT VT = N->getValueType(0);
2969     if (VT != MVT::i32)
2970       break;
2971     unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2())
2972       ? ARM::t2MOVTi16
2973       : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0);
2974     if (!Opc)
2975       break;
2976     SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
2977     N1C = dyn_cast<ConstantSDNode>(N1);
2978     if (!N1C)
2979       break;
2980     if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) {
2981       SDValue N2 = N0.getOperand(1);
2982       ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
2983       if (!N2C)
2984         break;
2985       unsigned N1CVal = N1C->getZExtValue();
2986       unsigned N2CVal = N2C->getZExtValue();
2987       if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) &&
2988           (N1CVal & 0xffffU) == 0xffffU &&
2989           (N2CVal & 0xffffU) == 0x0U) {
2990         SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16,
2991                                                   dl, MVT::i32);
2992         SDValue Ops[] = { N0.getOperand(0), Imm16,
2993                           getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
2994         ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
2995         return;
2996       }
2997     }
2998
2999     break;
3000   }
3001   case ARMISD::UMAAL: {
3002     unsigned Opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL;
3003     SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
3004                       N->getOperand(2), N->getOperand(3),
3005                       getAL(CurDAG, dl),
3006                       CurDAG->getRegister(0, MVT::i32) };
3007     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::i32, Ops));
3008     return;
3009   }
3010   case ARMISD::UMLAL:{
3011     if (Subtarget->isThumb()) {
3012       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3013                         N->getOperand(3), getAL(CurDAG, dl),
3014                         CurDAG->getRegister(0, MVT::i32)};
3015       ReplaceNode(
3016           N, CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops));
3017       return;
3018     }else{
3019       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3020                         N->getOperand(3), getAL(CurDAG, dl),
3021                         CurDAG->getRegister(0, MVT::i32),
3022                         CurDAG->getRegister(0, MVT::i32) };
3023       ReplaceNode(N, CurDAG->getMachineNode(
3024                          Subtarget->hasV6Ops() ? ARM::UMLAL : ARM::UMLALv5, dl,
3025                          MVT::i32, MVT::i32, Ops));
3026       return;
3027     }
3028   }
3029   case ARMISD::SMLAL:{
3030     if (Subtarget->isThumb()) {
3031       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3032                         N->getOperand(3), getAL(CurDAG, dl),
3033                         CurDAG->getRegister(0, MVT::i32)};
3034       ReplaceNode(
3035           N, CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops));
3036       return;
3037     }else{
3038       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3039                         N->getOperand(3), getAL(CurDAG, dl),
3040                         CurDAG->getRegister(0, MVT::i32),
3041                         CurDAG->getRegister(0, MVT::i32) };
3042       ReplaceNode(N, CurDAG->getMachineNode(
3043                          Subtarget->hasV6Ops() ? ARM::SMLAL : ARM::SMLALv5, dl,
3044                          MVT::i32, MVT::i32, Ops));
3045       return;
3046     }
3047   }
3048   case ARMISD::SUBE: {
3049     if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
3050       break;
3051     // Look for a pattern to match SMMLS
3052     // (sube a, (smul_loHi a, b), (subc 0, (smul_LOhi(a, b))))
3053     if (N->getOperand(1).getOpcode() != ISD::SMUL_LOHI ||
3054         N->getOperand(2).getOpcode() != ARMISD::SUBC ||
3055         !SDValue(N, 1).use_empty())
3056       break;
3057
3058     if (Subtarget->isThumb())
3059       assert(Subtarget->hasThumb2() &&
3060              "This pattern should not be generated for Thumb");
3061
3062     SDValue SmulLoHi = N->getOperand(1);
3063     SDValue Subc = N->getOperand(2);
3064     auto *Zero = dyn_cast<ConstantSDNode>(Subc.getOperand(0));
3065
3066     if (!Zero || Zero->getZExtValue() != 0 ||
3067         Subc.getOperand(1) != SmulLoHi.getValue(0) ||
3068         N->getOperand(1) != SmulLoHi.getValue(1) ||
3069         N->getOperand(2) != Subc.getValue(1))
3070       break;
3071
3072     unsigned Opc = Subtarget->isThumb2() ? ARM::t2SMMLS : ARM::SMMLS;
3073     SDValue Ops[] = { SmulLoHi.getOperand(0), SmulLoHi.getOperand(1),
3074                       N->getOperand(0), getAL(CurDAG, dl),
3075                       CurDAG->getRegister(0, MVT::i32) };
3076     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops));
3077     return;
3078   }
3079   case ISD::LOAD: {
3080     if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N))
3081       return;
3082     if (Subtarget->isThumb() && Subtarget->hasThumb2()) {
3083       if (tryT2IndexedLoad(N))
3084         return;
3085     } else if (Subtarget->isThumb()) {
3086       if (tryT1IndexedLoad(N))
3087         return;
3088     } else if (tryARMIndexedLoad(N))
3089       return;
3090     // Other cases are autogenerated.
3091     break;
3092   }
3093   case ARMISD::WLS:
3094   case ARMISD::LE: {
3095     SDValue Ops[] = { N->getOperand(1),
3096                       N->getOperand(2),
3097                       N->getOperand(0) };
3098     unsigned Opc = N->getOpcode() == ARMISD::WLS ?
3099       ARM::t2WhileLoopStart : ARM::t2LoopEnd;
3100     SDNode *New = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
3101     ReplaceUses(N, New);
3102     CurDAG->RemoveDeadNode(N);
3103     return;
3104   }
3105   case ARMISD::LOOP_DEC: {
3106     SDValue Ops[] = { N->getOperand(1),
3107                       N->getOperand(2),
3108                       N->getOperand(0) };
3109     SDNode *Dec =
3110       CurDAG->getMachineNode(ARM::t2LoopDec, dl,
3111                              CurDAG->getVTList(MVT::i32, MVT::Other), Ops);
3112     ReplaceUses(N, Dec);
3113     CurDAG->RemoveDeadNode(N);
3114     return;
3115   }
3116   case ARMISD::BRCOND: {
3117     // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
3118     // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)
3119     // Pattern complexity = 6  cost = 1  size = 0
3120
3121     // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
3122     // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc)
3123     // Pattern complexity = 6  cost = 1  size = 0
3124
3125     // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
3126     // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc)
3127     // Pattern complexity = 6  cost = 1  size = 0
3128
3129     unsigned Opc = Subtarget->isThumb() ?
3130       ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc;
3131     SDValue Chain = N->getOperand(0);
3132     SDValue N1 = N->getOperand(1);
3133     SDValue N2 = N->getOperand(2);
3134     SDValue N3 = N->getOperand(3);
3135     SDValue InFlag = N->getOperand(4);
3136     assert(N1.getOpcode() == ISD::BasicBlock);
3137     assert(N2.getOpcode() == ISD::Constant);
3138     assert(N3.getOpcode() == ISD::Register);
3139
3140     unsigned CC = (unsigned) cast<ConstantSDNode>(N2)->getZExtValue();
3141
3142     if (InFlag.getOpcode() == ARMISD::CMPZ) {
3143       if (InFlag.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN) {
3144         SDValue Int = InFlag.getOperand(0);
3145         uint64_t ID = cast<ConstantSDNode>(Int->getOperand(1))->getZExtValue();
3146
3147         // Handle low-overhead loops.
3148         if (ID == Intrinsic::loop_decrement_reg) {
3149           SDValue Elements = Int.getOperand(2);
3150           SDValue Size = CurDAG->getTargetConstant(
3151             cast<ConstantSDNode>(Int.getOperand(3))->getZExtValue(), dl,
3152                                  MVT::i32);
3153
3154           SDValue Args[] = { Elements, Size, Int.getOperand(0) };
3155           SDNode *LoopDec =
3156             CurDAG->getMachineNode(ARM::t2LoopDec, dl,
3157                                    CurDAG->getVTList(MVT::i32, MVT::Other),
3158                                    Args);
3159           ReplaceUses(Int.getNode(), LoopDec);
3160
3161           SDValue EndArgs[] = { SDValue(LoopDec, 0), N1, Chain };
3162           SDNode *LoopEnd =
3163             CurDAG->getMachineNode(ARM::t2LoopEnd, dl, MVT::Other, EndArgs);
3164
3165           ReplaceUses(N, LoopEnd);
3166           CurDAG->RemoveDeadNode(N);
3167           CurDAG->RemoveDeadNode(InFlag.getNode());
3168           CurDAG->RemoveDeadNode(Int.getNode());
3169           return;
3170         }
3171       }
3172
3173       bool SwitchEQNEToPLMI;
3174       SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI);
3175       InFlag = N->getOperand(4);
3176
3177       if (SwitchEQNEToPLMI) {
3178         switch ((ARMCC::CondCodes)CC) {
3179         default: llvm_unreachable("CMPZ must be either NE or EQ!");
3180         case ARMCC::NE:
3181           CC = (unsigned)ARMCC::MI;
3182           break;
3183         case ARMCC::EQ:
3184           CC = (unsigned)ARMCC::PL;
3185           break;
3186         }
3187       }
3188     }
3189
3190     SDValue Tmp2 = CurDAG->getTargetConstant(CC, dl, MVT::i32);
3191     SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag };
3192     SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
3193                                              MVT::Glue, Ops);
3194     Chain = SDValue(ResNode, 0);
3195     if (N->getNumValues() == 2) {
3196       InFlag = SDValue(ResNode, 1);
3197       ReplaceUses(SDValue(N, 1), InFlag);
3198     }
3199     ReplaceUses(SDValue(N, 0),
3200                 SDValue(Chain.getNode(), Chain.getResNo()));
3201     CurDAG->RemoveDeadNode(N);
3202     return;
3203   }
3204
3205   case ARMISD::CMPZ: {
3206     // select (CMPZ X, #-C) -> (CMPZ (ADDS X, #C), #0)
3207     //   This allows us to avoid materializing the expensive negative constant.
3208     //   The CMPZ #0 is useless and will be peepholed away but we need to keep it
3209     //   for its glue output.
3210     SDValue X = N->getOperand(0);
3211     auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1).getNode());
3212     if (C && C->getSExtValue() < 0 && Subtarget->isThumb()) {
3213       int64_t Addend = -C->getSExtValue();
3214
3215       SDNode *Add = nullptr;
3216       // ADDS can be better than CMN if the immediate fits in a
3217       // 16-bit ADDS, which means either [0,256) for tADDi8 or [0,8) for tADDi3.
3218       // Outside that range we can just use a CMN which is 32-bit but has a
3219       // 12-bit immediate range.
3220       if (Addend < 1<<8) {
3221         if (Subtarget->isThumb2()) {
3222           SDValue Ops[] = { X, CurDAG->getTargetConstant(Addend, dl, MVT::i32),
3223                             getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
3224                             CurDAG->getRegister(0, MVT::i32) };
3225           Add = CurDAG->getMachineNode(ARM::t2ADDri, dl, MVT::i32, Ops);
3226         } else {
3227           unsigned Opc = (Addend < 1<<3) ? ARM::tADDi3 : ARM::tADDi8;
3228           SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X,
3229                            CurDAG->getTargetConstant(Addend, dl, MVT::i32),
3230                            getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
3231           Add = CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
3232         }
3233       }
3234       if (Add) {
3235         SDValue Ops2[] = {SDValue(Add, 0), CurDAG->getConstant(0, dl, MVT::i32)};
3236         CurDAG->MorphNodeTo(N, ARMISD::CMPZ, CurDAG->getVTList(MVT::Glue), Ops2);
3237       }
3238     }
3239     // Other cases are autogenerated.
3240     break;
3241   }
3242
3243   case ARMISD::CMOV: {
3244     SDValue InFlag = N->getOperand(4);
3245
3246     if (InFlag.getOpcode() == ARMISD::CMPZ) {
3247       bool SwitchEQNEToPLMI;
3248       SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI);
3249
3250       if (SwitchEQNEToPLMI) {
3251         SDValue ARMcc = N->getOperand(2);
3252         ARMCC::CondCodes CC =
3253           (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue();
3254
3255         switch (CC) {
3256         default: llvm_unreachable("CMPZ must be either NE or EQ!");
3257         case ARMCC::NE:
3258           CC = ARMCC::MI;
3259           break;
3260         case ARMCC::EQ:
3261           CC = ARMCC::PL;
3262           break;
3263         }
3264         SDValue NewARMcc = CurDAG->getConstant((unsigned)CC, dl, MVT::i32);
3265         SDValue Ops[] = {N->getOperand(0), N->getOperand(1), NewARMcc,
3266                          N->getOperand(3), N->getOperand(4)};
3267         CurDAG->MorphNodeTo(N, ARMISD::CMOV, N->getVTList(), Ops);
3268       }
3269
3270     }
3271     // Other cases are autogenerated.
3272     break;
3273   }
3274
3275   case ARMISD::VZIP: {
3276     unsigned Opc = 0;
3277     EVT VT = N->getValueType(0);
3278     switch (VT.getSimpleVT().SimpleTy) {
3279     default: return;
3280     case MVT::v8i8:  Opc = ARM::VZIPd8; break;
3281     case MVT::v4f16:
3282     case MVT::v4i16: Opc = ARM::VZIPd16; break;
3283     case MVT::v2f32:
3284     // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
3285     case MVT::v2i32: Opc = ARM::VTRNd32; break;
3286     case MVT::v16i8: Opc = ARM::VZIPq8; break;
3287     case MVT::v8f16:
3288     case MVT::v8i16: Opc = ARM::VZIPq16; break;
3289     case MVT::v4f32:
3290     case MVT::v4i32: Opc = ARM::VZIPq32; break;
3291     }
3292     SDValue Pred = getAL(CurDAG, dl);
3293     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3294     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3295     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3296     return;
3297   }
3298   case ARMISD::VUZP: {
3299     unsigned Opc = 0;
3300     EVT VT = N->getValueType(0);
3301     switch (VT.getSimpleVT().SimpleTy) {
3302     default: return;
3303     case MVT::v8i8:  Opc = ARM::VUZPd8; break;
3304     case MVT::v4f16:
3305     case MVT::v4i16: Opc = ARM::VUZPd16; break;
3306     case MVT::v2f32:
3307     // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
3308     case MVT::v2i32: Opc = ARM::VTRNd32; break;
3309     case MVT::v16i8: Opc = ARM::VUZPq8; break;
3310     case MVT::v8f16:
3311     case MVT::v8i16: Opc = ARM::VUZPq16; break;
3312     case MVT::v4f32:
3313     case MVT::v4i32: Opc = ARM::VUZPq32; break;
3314     }
3315     SDValue Pred = getAL(CurDAG, dl);
3316     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3317     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3318     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3319     return;
3320   }
3321   case ARMISD::VTRN: {
3322     unsigned Opc = 0;
3323     EVT VT = N->getValueType(0);
3324     switch (VT.getSimpleVT().SimpleTy) {
3325     default: return;
3326     case MVT::v8i8:  Opc = ARM::VTRNd8; break;
3327     case MVT::v4f16:
3328     case MVT::v4i16: Opc = ARM::VTRNd16; break;
3329     case MVT::v2f32:
3330     case MVT::v2i32: Opc = ARM::VTRNd32; break;
3331     case MVT::v16i8: Opc = ARM::VTRNq8; break;
3332     case MVT::v8f16:
3333     case MVT::v8i16: Opc = ARM::VTRNq16; break;
3334     case MVT::v4f32:
3335     case MVT::v4i32: Opc = ARM::VTRNq32; break;
3336     }
3337     SDValue Pred = getAL(CurDAG, dl);
3338     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3339     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3340     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3341     return;
3342   }
3343   case ARMISD::BUILD_VECTOR: {
3344     EVT VecVT = N->getValueType(0);
3345     EVT EltVT = VecVT.getVectorElementType();
3346     unsigned NumElts = VecVT.getVectorNumElements();
3347     if (EltVT == MVT::f64) {
3348       assert(NumElts == 2 && "unexpected type for BUILD_VECTOR");
3349       ReplaceNode(
3350           N, createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
3351       return;
3352     }
3353     assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR");
3354     if (NumElts == 2) {
3355       ReplaceNode(
3356           N, createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
3357       return;
3358     }
3359     assert(NumElts == 4 && "unexpected type for BUILD_VECTOR");
3360     ReplaceNode(N,
3361                 createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1),
3362                                     N->getOperand(2), N->getOperand(3)));
3363     return;
3364   }
3365
3366   case ARMISD::VLD1DUP: {
3367     static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8, ARM::VLD1DUPd16,
3368                                          ARM::VLD1DUPd32 };
3369     static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8, ARM::VLD1DUPq16,
3370                                          ARM::VLD1DUPq32 };
3371     SelectVLDDup(N, /* IsIntrinsic= */ false, false, 1, DOpcodes, QOpcodes);
3372     return;
3373   }
3374
3375   case ARMISD::VLD2DUP: {
3376     static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
3377                                         ARM::VLD2DUPd32 };
3378     SelectVLDDup(N, /* IsIntrinsic= */ false, false, 2, Opcodes);
3379     return;
3380   }
3381
3382   case ARMISD::VLD3DUP: {
3383     static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo,
3384                                         ARM::VLD3DUPd16Pseudo,
3385                                         ARM::VLD3DUPd32Pseudo };
3386     SelectVLDDup(N, /* IsIntrinsic= */ false, false, 3, Opcodes);
3387     return;
3388   }
3389
3390   case ARMISD::VLD4DUP: {
3391     static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo,
3392                                         ARM::VLD4DUPd16Pseudo,
3393                                         ARM::VLD4DUPd32Pseudo };
3394     SelectVLDDup(N, /* IsIntrinsic= */ false, false, 4, Opcodes);
3395     return;
3396   }
3397
3398   case ARMISD::VLD1DUP_UPD: {
3399     static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8wb_fixed,
3400                                          ARM::VLD1DUPd16wb_fixed,
3401                                          ARM::VLD1DUPd32wb_fixed };
3402     static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8wb_fixed,
3403                                          ARM::VLD1DUPq16wb_fixed,
3404                                          ARM::VLD1DUPq32wb_fixed };
3405     SelectVLDDup(N, /* IsIntrinsic= */ false, true, 1, DOpcodes, QOpcodes);
3406     return;
3407   }
3408
3409   case ARMISD::VLD2DUP_UPD: {
3410     static const uint16_t Opcodes[] = { ARM::VLD2DUPd8wb_fixed,
3411                                         ARM::VLD2DUPd16wb_fixed,
3412                                         ARM::VLD2DUPd32wb_fixed };
3413     SelectVLDDup(N, /* IsIntrinsic= */ false, true, 2, Opcodes);
3414     return;
3415   }
3416
3417   case ARMISD::VLD3DUP_UPD: {
3418     static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo_UPD,
3419                                         ARM::VLD3DUPd16Pseudo_UPD,
3420                                         ARM::VLD3DUPd32Pseudo_UPD };
3421     SelectVLDDup(N, /* IsIntrinsic= */ false, true, 3, Opcodes);
3422     return;
3423   }
3424
3425   case ARMISD::VLD4DUP_UPD: {
3426     static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo_UPD,
3427                                         ARM::VLD4DUPd16Pseudo_UPD,
3428                                         ARM::VLD4DUPd32Pseudo_UPD };
3429     SelectVLDDup(N, /* IsIntrinsic= */ false, true, 4, Opcodes);
3430     return;
3431   }
3432
3433   case ARMISD::VLD1_UPD: {
3434     static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed,
3435                                          ARM::VLD1d16wb_fixed,
3436                                          ARM::VLD1d32wb_fixed,
3437                                          ARM::VLD1d64wb_fixed };
3438     static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed,
3439                                          ARM::VLD1q16wb_fixed,
3440                                          ARM::VLD1q32wb_fixed,
3441                                          ARM::VLD1q64wb_fixed };
3442     SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr);
3443     return;
3444   }
3445
3446   case ARMISD::VLD2_UPD: {
3447     static const uint16_t DOpcodes[] = { ARM::VLD2d8wb_fixed,
3448                                          ARM::VLD2d16wb_fixed,
3449                                          ARM::VLD2d32wb_fixed,
3450                                          ARM::VLD1q64wb_fixed};
3451     static const uint16_t QOpcodes[] = { ARM::VLD2q8PseudoWB_fixed,
3452                                          ARM::VLD2q16PseudoWB_fixed,
3453                                          ARM::VLD2q32PseudoWB_fixed };
3454     SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr);
3455     return;
3456   }
3457
3458   case ARMISD::VLD3_UPD: {
3459     static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD,
3460                                          ARM::VLD3d16Pseudo_UPD,
3461                                          ARM::VLD3d32Pseudo_UPD,
3462                                          ARM::VLD1d64TPseudoWB_fixed};
3463     static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
3464                                           ARM::VLD3q16Pseudo_UPD,
3465                                           ARM::VLD3q32Pseudo_UPD };
3466     static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
3467                                           ARM::VLD3q16oddPseudo_UPD,
3468                                           ARM::VLD3q32oddPseudo_UPD };
3469     SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
3470     return;
3471   }
3472
3473   case ARMISD::VLD4_UPD: {
3474     static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo_UPD,
3475                                          ARM::VLD4d16Pseudo_UPD,
3476                                          ARM::VLD4d32Pseudo_UPD,
3477                                          ARM::VLD1d64QPseudoWB_fixed};
3478     static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
3479                                           ARM::VLD4q16Pseudo_UPD,
3480                                           ARM::VLD4q32Pseudo_UPD };
3481     static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo_UPD,
3482                                           ARM::VLD4q16oddPseudo_UPD,
3483                                           ARM::VLD4q32oddPseudo_UPD };
3484     SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
3485     return;
3486   }
3487
3488   case ARMISD::VLD2LN_UPD: {
3489     static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD,
3490                                          ARM::VLD2LNd16Pseudo_UPD,
3491                                          ARM::VLD2LNd32Pseudo_UPD };
3492     static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD,
3493                                          ARM::VLD2LNq32Pseudo_UPD };
3494     SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes);
3495     return;
3496   }
3497
3498   case ARMISD::VLD3LN_UPD: {
3499     static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD,
3500                                          ARM::VLD3LNd16Pseudo_UPD,
3501                                          ARM::VLD3LNd32Pseudo_UPD };
3502     static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD,
3503                                          ARM::VLD3LNq32Pseudo_UPD };
3504     SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes);
3505     return;
3506   }
3507
3508   case ARMISD::VLD4LN_UPD: {
3509     static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD,
3510                                          ARM::VLD4LNd16Pseudo_UPD,
3511                                          ARM::VLD4LNd32Pseudo_UPD };
3512     static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD,
3513                                          ARM::VLD4LNq32Pseudo_UPD };
3514     SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes);
3515     return;
3516   }
3517
3518   case ARMISD::VST1_UPD: {
3519     static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed,
3520                                          ARM::VST1d16wb_fixed,
3521                                          ARM::VST1d32wb_fixed,
3522                                          ARM::VST1d64wb_fixed };
3523     static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed,
3524                                          ARM::VST1q16wb_fixed,
3525                                          ARM::VST1q32wb_fixed,
3526                                          ARM::VST1q64wb_fixed };
3527     SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr);
3528     return;
3529   }
3530
3531   case ARMISD::VST2_UPD: {
3532     static const uint16_t DOpcodes[] = { ARM::VST2d8wb_fixed,
3533                                          ARM::VST2d16wb_fixed,
3534                                          ARM::VST2d32wb_fixed,
3535                                          ARM::VST1q64wb_fixed};
3536     static const uint16_t QOpcodes[] = { ARM::VST2q8PseudoWB_fixed,
3537                                          ARM::VST2q16PseudoWB_fixed,
3538                                          ARM::VST2q32PseudoWB_fixed };
3539     SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr);
3540     return;
3541   }
3542
3543   case ARMISD::VST3_UPD: {
3544     static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD,
3545                                          ARM::VST3d16Pseudo_UPD,
3546                                          ARM::VST3d32Pseudo_UPD,
3547                                          ARM::VST1d64TPseudoWB_fixed};
3548     static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
3549                                           ARM::VST3q16Pseudo_UPD,
3550                                           ARM::VST3q32Pseudo_UPD };
3551     static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
3552                                           ARM::VST3q16oddPseudo_UPD,
3553                                           ARM::VST3q32oddPseudo_UPD };
3554     SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
3555     return;
3556   }
3557
3558   case ARMISD::VST4_UPD: {
3559     static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo_UPD,
3560                                          ARM::VST4d16Pseudo_UPD,
3561                                          ARM::VST4d32Pseudo_UPD,
3562                                          ARM::VST1d64QPseudoWB_fixed};
3563     static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
3564                                           ARM::VST4q16Pseudo_UPD,
3565                                           ARM::VST4q32Pseudo_UPD };
3566     static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD,
3567                                           ARM::VST4q16oddPseudo_UPD,
3568                                           ARM::VST4q32oddPseudo_UPD };
3569     SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
3570     return;
3571   }
3572
3573   case ARMISD::VST2LN_UPD: {
3574     static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD,
3575                                          ARM::VST2LNd16Pseudo_UPD,
3576                                          ARM::VST2LNd32Pseudo_UPD };
3577     static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD,
3578                                          ARM::VST2LNq32Pseudo_UPD };
3579     SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes);
3580     return;
3581   }
3582
3583   case ARMISD::VST3LN_UPD: {
3584     static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD,
3585                                          ARM::VST3LNd16Pseudo_UPD,
3586                                          ARM::VST3LNd32Pseudo_UPD };
3587     static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD,
3588                                          ARM::VST3LNq32Pseudo_UPD };
3589     SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes);
3590     return;
3591   }
3592
3593   case ARMISD::VST4LN_UPD: {
3594     static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD,
3595                                          ARM::VST4LNd16Pseudo_UPD,
3596                                          ARM::VST4LNd32Pseudo_UPD };
3597     static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD,
3598                                          ARM::VST4LNq32Pseudo_UPD };
3599     SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes);
3600     return;
3601   }
3602
3603   case ISD::INTRINSIC_VOID:
3604   case ISD::INTRINSIC_W_CHAIN: {
3605     unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
3606     switch (IntNo) {
3607     default:
3608       break;
3609
3610     case Intrinsic::arm_mrrc:
3611     case Intrinsic::arm_mrrc2: {
3612       SDLoc dl(N);
3613       SDValue Chain = N->getOperand(0);
3614       unsigned Opc;
3615
3616       if (Subtarget->isThumb())
3617         Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::t2MRRC : ARM::t2MRRC2);
3618       else
3619         Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::MRRC : ARM::MRRC2);
3620
3621       SmallVector<SDValue, 5> Ops;
3622       Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(), dl)); /* coproc */
3623       Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(), dl)); /* opc */
3624       Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(4))->getZExtValue(), dl)); /* CRm */
3625
3626       // The mrrc2 instruction in ARM doesn't allow predicates, the top 4 bits of the encoded
3627       // instruction will always be '1111' but it is possible in assembly language to specify
3628       // AL as a predicate to mrrc2 but it doesn't make any difference to the encoded instruction.
3629       if (Opc != ARM::MRRC2) {
3630         Ops.push_back(getAL(CurDAG, dl));
3631         Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3632       }
3633
3634       Ops.push_back(Chain);
3635
3636       // Writes to two registers.
3637       const EVT RetType[] = {MVT::i32, MVT::i32, MVT::Other};
3638
3639       ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, RetType, Ops));
3640       return;
3641     }
3642     case Intrinsic::arm_ldaexd:
3643     case Intrinsic::arm_ldrexd: {
3644       SDLoc dl(N);
3645       SDValue Chain = N->getOperand(0);
3646       SDValue MemAddr = N->getOperand(2);
3647       bool isThumb = Subtarget->isThumb() && Subtarget->hasV8MBaselineOps();
3648
3649       bool IsAcquire = IntNo == Intrinsic::arm_ldaexd;
3650       unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD)
3651                                 : (IsAcquire ? ARM::LDAEXD : ARM::LDREXD);
3652
3653       // arm_ldrexd returns a i64 value in {i32, i32}
3654       std::vector<EVT> ResTys;
3655       if (isThumb) {
3656         ResTys.push_back(MVT::i32);
3657         ResTys.push_back(MVT::i32);
3658       } else
3659         ResTys.push_back(MVT::Untyped);
3660       ResTys.push_back(MVT::Other);
3661
3662       // Place arguments in the right order.
3663       SDValue Ops[] = {MemAddr, getAL(CurDAG, dl),
3664                        CurDAG->getRegister(0, MVT::i32), Chain};
3665       SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
3666       // Transfer memoperands.
3667       MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3668       CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
3669
3670       // Remap uses.
3671       SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1);
3672       if (!SDValue(N, 0).use_empty()) {
3673         SDValue Result;
3674         if (isThumb)
3675           Result = SDValue(Ld, 0);
3676         else {
3677           SDValue SubRegIdx =
3678             CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
3679           SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
3680               dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
3681           Result = SDValue(ResNode,0);
3682         }
3683         ReplaceUses(SDValue(N, 0), Result);
3684       }
3685       if (!SDValue(N, 1).use_empty()) {
3686         SDValue Result;
3687         if (isThumb)
3688           Result = SDValue(Ld, 1);
3689         else {
3690           SDValue SubRegIdx =
3691             CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
3692           SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
3693               dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
3694           Result = SDValue(ResNode,0);
3695         }
3696         ReplaceUses(SDValue(N, 1), Result);
3697       }
3698       ReplaceUses(SDValue(N, 2), OutChain);
3699       CurDAG->RemoveDeadNode(N);
3700       return;
3701     }
3702     case Intrinsic::arm_stlexd:
3703     case Intrinsic::arm_strexd: {
3704       SDLoc dl(N);
3705       SDValue Chain = N->getOperand(0);
3706       SDValue Val0 = N->getOperand(2);
3707       SDValue Val1 = N->getOperand(3);
3708       SDValue MemAddr = N->getOperand(4);
3709
3710       // Store exclusive double return a i32 value which is the return status
3711       // of the issued store.
3712       const EVT ResTys[] = {MVT::i32, MVT::Other};
3713
3714       bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
3715       // Place arguments in the right order.
3716       SmallVector<SDValue, 7> Ops;
3717       if (isThumb) {
3718         Ops.push_back(Val0);
3719         Ops.push_back(Val1);
3720       } else
3721         // arm_strexd uses GPRPair.
3722         Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0));
3723       Ops.push_back(MemAddr);
3724       Ops.push_back(getAL(CurDAG, dl));
3725       Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3726       Ops.push_back(Chain);
3727
3728       bool IsRelease = IntNo == Intrinsic::arm_stlexd;
3729       unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD)
3730                                 : (IsRelease ? ARM::STLEXD : ARM::STREXD);
3731
3732       SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
3733       // Transfer memoperands.
3734       MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3735       CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
3736
3737       ReplaceNode(N, St);
3738       return;
3739     }
3740
3741     case Intrinsic::arm_neon_vld1: {
3742       static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
3743                                            ARM::VLD1d32, ARM::VLD1d64 };
3744       static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
3745                                            ARM::VLD1q32, ARM::VLD1q64};
3746       SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr);
3747       return;
3748     }
3749
3750     case Intrinsic::arm_neon_vld1x2: {
3751       static const uint16_t DOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
3752                                            ARM::VLD1q32, ARM::VLD1q64 };
3753       static const uint16_t QOpcodes[] = { ARM::VLD1d8QPseudo,
3754                                            ARM::VLD1d16QPseudo,
3755                                            ARM::VLD1d32QPseudo,
3756                                            ARM::VLD1d64QPseudo };
3757       SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
3758       return;
3759     }
3760
3761     case Intrinsic::arm_neon_vld1x3: {
3762       static const uint16_t DOpcodes[] = { ARM::VLD1d8TPseudo,
3763                                            ARM::VLD1d16TPseudo,
3764                                            ARM::VLD1d32TPseudo,
3765                                            ARM::VLD1d64TPseudo };
3766       static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowTPseudo_UPD,
3767                                             ARM::VLD1q16LowTPseudo_UPD,
3768                                             ARM::VLD1q32LowTPseudo_UPD,
3769                                             ARM::VLD1q64LowTPseudo_UPD };
3770       static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighTPseudo,
3771                                             ARM::VLD1q16HighTPseudo,
3772                                             ARM::VLD1q32HighTPseudo,
3773                                             ARM::VLD1q64HighTPseudo };
3774       SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3775       return;
3776     }
3777
3778     case Intrinsic::arm_neon_vld1x4: {
3779       static const uint16_t DOpcodes[] = { ARM::VLD1d8QPseudo,
3780                                            ARM::VLD1d16QPseudo,
3781                                            ARM::VLD1d32QPseudo,
3782                                            ARM::VLD1d64QPseudo };
3783       static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowQPseudo_UPD,
3784                                             ARM::VLD1q16LowQPseudo_UPD,
3785                                             ARM::VLD1q32LowQPseudo_UPD,
3786                                             ARM::VLD1q64LowQPseudo_UPD };
3787       static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighQPseudo,
3788                                             ARM::VLD1q16HighQPseudo,
3789                                             ARM::VLD1q32HighQPseudo,
3790                                             ARM::VLD1q64HighQPseudo };
3791       SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3792       return;
3793     }
3794
3795     case Intrinsic::arm_neon_vld2: {
3796       static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
3797                                            ARM::VLD2d32, ARM::VLD1q64 };
3798       static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
3799                                            ARM::VLD2q32Pseudo };
3800       SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
3801       return;
3802     }
3803
3804     case Intrinsic::arm_neon_vld3: {
3805       static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo,
3806                                            ARM::VLD3d16Pseudo,
3807                                            ARM::VLD3d32Pseudo,
3808                                            ARM::VLD1d64TPseudo };
3809       static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
3810                                             ARM::VLD3q16Pseudo_UPD,
3811                                             ARM::VLD3q32Pseudo_UPD };
3812       static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo,
3813                                             ARM::VLD3q16oddPseudo,
3814                                             ARM::VLD3q32oddPseudo };
3815       SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3816       return;
3817     }
3818
3819     case Intrinsic::arm_neon_vld4: {
3820       static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo,
3821                                            ARM::VLD4d16Pseudo,
3822                                            ARM::VLD4d32Pseudo,
3823                                            ARM::VLD1d64QPseudo };
3824       static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
3825                                             ARM::VLD4q16Pseudo_UPD,
3826                                             ARM::VLD4q32Pseudo_UPD };
3827       static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo,
3828                                             ARM::VLD4q16oddPseudo,
3829                                             ARM::VLD4q32oddPseudo };
3830       SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3831       return;
3832     }
3833
3834     case Intrinsic::arm_neon_vld2dup: {
3835       static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
3836                                            ARM::VLD2DUPd32, ARM::VLD1q64 };
3837       static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo,
3838                                             ARM::VLD2DUPq16EvenPseudo,
3839                                             ARM::VLD2DUPq32EvenPseudo };
3840       static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudo,
3841                                             ARM::VLD2DUPq16OddPseudo,
3842                                             ARM::VLD2DUPq32OddPseudo };
3843       SelectVLDDup(N, /* IsIntrinsic= */ true, false, 2,
3844                    DOpcodes, QOpcodes0, QOpcodes1);
3845       return;
3846     }
3847
3848     case Intrinsic::arm_neon_vld3dup: {
3849       static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo,
3850                                            ARM::VLD3DUPd16Pseudo,
3851                                            ARM::VLD3DUPd32Pseudo,
3852                                            ARM::VLD1d64TPseudo };
3853       static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo,
3854                                             ARM::VLD3DUPq16EvenPseudo,
3855                                             ARM::VLD3DUPq32EvenPseudo };
3856       static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo,
3857                                             ARM::VLD3DUPq16OddPseudo,
3858                                             ARM::VLD3DUPq32OddPseudo };
3859       SelectVLDDup(N, /* IsIntrinsic= */ true, false, 3,
3860                    DOpcodes, QOpcodes0, QOpcodes1);
3861       return;
3862     }
3863
3864     case Intrinsic::arm_neon_vld4dup: {
3865       static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo,
3866                                            ARM::VLD4DUPd16Pseudo,
3867                                            ARM::VLD4DUPd32Pseudo,
3868                                            ARM::VLD1d64QPseudo };
3869       static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo,
3870                                             ARM::VLD4DUPq16EvenPseudo,
3871                                             ARM::VLD4DUPq32EvenPseudo };
3872       static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo,
3873                                             ARM::VLD4DUPq16OddPseudo,
3874                                             ARM::VLD4DUPq32OddPseudo };
3875       SelectVLDDup(N, /* IsIntrinsic= */ true, false, 4,
3876                    DOpcodes, QOpcodes0, QOpcodes1);
3877       return;
3878     }
3879
3880     case Intrinsic::arm_neon_vld2lane: {
3881       static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo,
3882                                            ARM::VLD2LNd16Pseudo,
3883                                            ARM::VLD2LNd32Pseudo };
3884       static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo,
3885                                            ARM::VLD2LNq32Pseudo };
3886       SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes);
3887       return;
3888     }
3889
3890     case Intrinsic::arm_neon_vld3lane: {
3891       static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo,
3892                                            ARM::VLD3LNd16Pseudo,
3893                                            ARM::VLD3LNd32Pseudo };
3894       static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo,
3895                                            ARM::VLD3LNq32Pseudo };
3896       SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes);
3897       return;
3898     }
3899
3900     case Intrinsic::arm_neon_vld4lane: {
3901       static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo,
3902                                            ARM::VLD4LNd16Pseudo,
3903                                            ARM::VLD4LNd32Pseudo };
3904       static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo,
3905                                            ARM::VLD4LNq32Pseudo };
3906       SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes);
3907       return;
3908     }
3909
3910     case Intrinsic::arm_neon_vst1: {
3911       static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
3912                                            ARM::VST1d32, ARM::VST1d64 };
3913       static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
3914                                            ARM::VST1q32, ARM::VST1q64 };
3915       SelectVST(N, false, 1, DOpcodes, QOpcodes, nullptr);
3916       return;
3917     }
3918
3919     case Intrinsic::arm_neon_vst1x2: {
3920       static const uint16_t DOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
3921                                            ARM::VST1q32, ARM::VST1q64 };
3922       static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudo,
3923                                            ARM::VST1d16QPseudo,
3924                                            ARM::VST1d32QPseudo,
3925                                            ARM::VST1d64QPseudo };
3926       SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
3927       return;
3928     }
3929
3930     case Intrinsic::arm_neon_vst1x3: {
3931       static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudo,
3932                                            ARM::VST1d16TPseudo,
3933                                            ARM::VST1d32TPseudo,
3934                                            ARM::VST1d64TPseudo };
3935       static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD,
3936                                             ARM::VST1q16LowTPseudo_UPD,
3937                                             ARM::VST1q32LowTPseudo_UPD,
3938                                             ARM::VST1q64LowTPseudo_UPD };
3939       static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo,
3940                                             ARM::VST1q16HighTPseudo,
3941                                             ARM::VST1q32HighTPseudo,
3942                                             ARM::VST1q64HighTPseudo };
3943       SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3944       return;
3945     }
3946
3947     case Intrinsic::arm_neon_vst1x4: {
3948       static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudo,
3949                                            ARM::VST1d16QPseudo,
3950                                            ARM::VST1d32QPseudo,
3951                                            ARM::VST1d64QPseudo };
3952       static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD,
3953                                             ARM::VST1q16LowQPseudo_UPD,
3954                                             ARM::VST1q32LowQPseudo_UPD,
3955                                             ARM::VST1q64LowQPseudo_UPD };
3956       static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo,
3957                                             ARM::VST1q16HighQPseudo,
3958                                             ARM::VST1q32HighQPseudo,
3959                                             ARM::VST1q64HighQPseudo };
3960       SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3961       return;
3962     }
3963
3964     case Intrinsic::arm_neon_vst2: {
3965       static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
3966                                            ARM::VST2d32, ARM::VST1q64 };
3967       static const uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
3968                                            ARM::VST2q32Pseudo };
3969       SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
3970       return;
3971     }
3972
3973     case Intrinsic::arm_neon_vst3: {
3974       static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo,
3975                                            ARM::VST3d16Pseudo,
3976                                            ARM::VST3d32Pseudo,
3977                                            ARM::VST1d64TPseudo };
3978       static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
3979                                             ARM::VST3q16Pseudo_UPD,
3980                                             ARM::VST3q32Pseudo_UPD };
3981       static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo,
3982                                             ARM::VST3q16oddPseudo,
3983                                             ARM::VST3q32oddPseudo };
3984       SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3985       return;
3986     }
3987
3988     case Intrinsic::arm_neon_vst4: {
3989       static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo,
3990                                            ARM::VST4d16Pseudo,
3991                                            ARM::VST4d32Pseudo,
3992                                            ARM::VST1d64QPseudo };
3993       static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
3994                                             ARM::VST4q16Pseudo_UPD,
3995                                             ARM::VST4q32Pseudo_UPD };
3996       static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo,
3997                                             ARM::VST4q16oddPseudo,
3998                                             ARM::VST4q32oddPseudo };
3999       SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
4000       return;
4001     }
4002
4003     case Intrinsic::arm_neon_vst2lane: {
4004       static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo,
4005                                            ARM::VST2LNd16Pseudo,
4006                                            ARM::VST2LNd32Pseudo };
4007       static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo,
4008                                            ARM::VST2LNq32Pseudo };
4009       SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes);
4010       return;
4011     }
4012
4013     case Intrinsic::arm_neon_vst3lane: {
4014       static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo,
4015                                            ARM::VST3LNd16Pseudo,
4016                                            ARM::VST3LNd32Pseudo };
4017       static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo,
4018                                            ARM::VST3LNq32Pseudo };
4019       SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes);
4020       return;
4021     }
4022
4023     case Intrinsic::arm_neon_vst4lane: {
4024       static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo,
4025                                            ARM::VST4LNd16Pseudo,
4026                                            ARM::VST4LNd32Pseudo };
4027       static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo,
4028                                            ARM::VST4LNq32Pseudo };
4029       SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes);
4030       return;
4031     }
4032     }
4033     break;
4034   }
4035
4036   case ISD::ATOMIC_CMP_SWAP:
4037     SelectCMP_SWAP(N);
4038     return;
4039   }
4040
4041   SelectCode(N);
4042 }
4043
4044 // Inspect a register string of the form
4045 // cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or
4046 // cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string
4047 // and obtain the integer operands from them, adding these operands to the
4048 // provided vector.
4049 static void getIntOperandsFromRegisterString(StringRef RegString,
4050                                              SelectionDAG *CurDAG,
4051                                              const SDLoc &DL,
4052                                              std::vector<SDValue> &Ops) {
4053   SmallVector<StringRef, 5> Fields;
4054   RegString.split(Fields, ':');
4055
4056   if (Fields.size() > 1) {
4057     bool AllIntFields = true;
4058
4059     for (StringRef Field : Fields) {
4060       // Need to trim out leading 'cp' characters and get the integer field.
4061       unsigned IntField;
4062       AllIntFields &= !Field.trim("CPcp").getAsInteger(10, IntField);
4063       Ops.push_back(CurDAG->getTargetConstant(IntField, DL, MVT::i32));
4064     }
4065
4066     assert(AllIntFields &&
4067             "Unexpected non-integer value in special register string.");
4068   }
4069 }
4070
4071 // Maps a Banked Register string to its mask value. The mask value returned is
4072 // for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register
4073 // mask operand, which expresses which register is to be used, e.g. r8, and in
4074 // which mode it is to be used, e.g. usr. Returns -1 to signify that the string
4075 // was invalid.
4076 static inline int getBankedRegisterMask(StringRef RegString) {
4077   auto TheReg = ARMBankedReg::lookupBankedRegByName(RegString.lower());
4078   if (!TheReg)
4079      return -1;
4080   return TheReg->Encoding;
4081 }
4082
4083 // The flags here are common to those allowed for apsr in the A class cores and
4084 // those allowed for the special registers in the M class cores. Returns a
4085 // value representing which flags were present, -1 if invalid.
4086 static inline int getMClassFlagsMask(StringRef Flags) {
4087   return StringSwitch<int>(Flags)
4088           .Case("", 0x2) // no flags means nzcvq for psr registers, and 0x2 is
4089                          // correct when flags are not permitted
4090           .Case("g", 0x1)
4091           .Case("nzcvq", 0x2)
4092           .Case("nzcvqg", 0x3)
4093           .Default(-1);
4094 }
4095
4096 // Maps MClass special registers string to its value for use in the
4097 // t2MRS_M/t2MSR_M instruction nodes as the SYSm value operand.
4098 // Returns -1 to signify that the string was invalid.
4099 static int getMClassRegisterMask(StringRef Reg, const ARMSubtarget *Subtarget) {
4100   auto TheReg = ARMSysReg::lookupMClassSysRegByName(Reg);
4101   const FeatureBitset &FeatureBits = Subtarget->getFeatureBits();
4102   if (!TheReg || !TheReg->hasRequiredFeatures(FeatureBits))
4103     return -1;
4104   return (int)(TheReg->Encoding & 0xFFF); // SYSm value
4105 }
4106
4107 static int getARClassRegisterMask(StringRef Reg, StringRef Flags) {
4108   // The mask operand contains the special register (R Bit) in bit 4, whether
4109   // the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and
4110   // bits 3-0 contains the fields to be accessed in the special register, set by
4111   // the flags provided with the register.
4112   int Mask = 0;
4113   if (Reg == "apsr") {
4114     // The flags permitted for apsr are the same flags that are allowed in
4115     // M class registers. We get the flag value and then shift the flags into
4116     // the correct place to combine with the mask.
4117     Mask = getMClassFlagsMask(Flags);
4118     if (Mask == -1)
4119       return -1;
4120     return Mask << 2;
4121   }
4122
4123   if (Reg != "cpsr" && Reg != "spsr") {
4124     return -1;
4125   }
4126
4127   // This is the same as if the flags were "fc"
4128   if (Flags.empty() || Flags == "all")
4129     return Mask | 0x9;
4130
4131   // Inspect the supplied flags string and set the bits in the mask for
4132   // the relevant and valid flags allowed for cpsr and spsr.
4133   for (char Flag : Flags) {
4134     int FlagVal;
4135     switch (Flag) {
4136       case 'c':
4137         FlagVal = 0x1;
4138         break;
4139       case 'x':
4140         FlagVal = 0x2;
4141         break;
4142       case 's':
4143         FlagVal = 0x4;
4144         break;
4145       case 'f':
4146         FlagVal = 0x8;
4147         break;
4148       default:
4149         FlagVal = 0;
4150     }
4151
4152     // This avoids allowing strings where the same flag bit appears twice.
4153     if (!FlagVal || (Mask & FlagVal))
4154       return -1;
4155     Mask |= FlagVal;
4156   }
4157
4158   // If the register is spsr then we need to set the R bit.
4159   if (Reg == "spsr")
4160     Mask |= 0x10;
4161
4162   return Mask;
4163 }
4164
4165 // Lower the read_register intrinsic to ARM specific DAG nodes
4166 // using the supplied metadata string to select the instruction node to use
4167 // and the registers/masks to construct as operands for the node.
4168 bool ARMDAGToDAGISel::tryReadRegister(SDNode *N){
4169   const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
4170   const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
4171   bool IsThumb2 = Subtarget->isThumb2();
4172   SDLoc DL(N);
4173
4174   std::vector<SDValue> Ops;
4175   getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
4176
4177   if (!Ops.empty()) {
4178     // If the special register string was constructed of fields (as defined
4179     // in the ACLE) then need to lower to MRC node (32 bit) or
4180     // MRRC node(64 bit), we can make the distinction based on the number of
4181     // operands we have.
4182     unsigned Opcode;
4183     SmallVector<EVT, 3> ResTypes;
4184     if (Ops.size() == 5){
4185       Opcode = IsThumb2 ? ARM::t2MRC : ARM::MRC;
4186       ResTypes.append({ MVT::i32, MVT::Other });
4187     } else {
4188       assert(Ops.size() == 3 &&
4189               "Invalid number of fields in special register string.");
4190       Opcode = IsThumb2 ? ARM::t2MRRC : ARM::MRRC;
4191       ResTypes.append({ MVT::i32, MVT::i32, MVT::Other });
4192     }
4193
4194     Ops.push_back(getAL(CurDAG, DL));
4195     Ops.push_back(CurDAG->getRegister(0, MVT::i32));
4196     Ops.push_back(N->getOperand(0));
4197     ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, ResTypes, Ops));
4198     return true;
4199   }
4200
4201   std::string SpecialReg = RegString->getString().lower();
4202
4203   int BankedReg = getBankedRegisterMask(SpecialReg);
4204   if (BankedReg != -1) {
4205     Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32),
4206             getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4207             N->getOperand(0) };
4208     ReplaceNode(
4209         N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked,
4210                                   DL, MVT::i32, MVT::Other, Ops));
4211     return true;
4212   }
4213
4214   // The VFP registers are read by creating SelectionDAG nodes with opcodes
4215   // corresponding to the register that is being read from. So we switch on the
4216   // string to find which opcode we need to use.
4217   unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
4218                     .Case("fpscr", ARM::VMRS)
4219                     .Case("fpexc", ARM::VMRS_FPEXC)
4220                     .Case("fpsid", ARM::VMRS_FPSID)
4221                     .Case("mvfr0", ARM::VMRS_MVFR0)
4222                     .Case("mvfr1", ARM::VMRS_MVFR1)
4223                     .Case("mvfr2", ARM::VMRS_MVFR2)
4224                     .Case("fpinst", ARM::VMRS_FPINST)
4225                     .Case("fpinst2", ARM::VMRS_FPINST2)
4226                     .Default(0);
4227
4228   // If an opcode was found then we can lower the read to a VFP instruction.
4229   if (Opcode) {
4230     if (!Subtarget->hasVFP2Base())
4231       return false;
4232     if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8Base())
4233       return false;
4234
4235     Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4236             N->getOperand(0) };
4237     ReplaceNode(N,
4238                 CurDAG->getMachineNode(Opcode, DL, MVT::i32, MVT::Other, Ops));
4239     return true;
4240   }
4241
4242   // If the target is M Class then need to validate that the register string
4243   // is an acceptable value, so check that a mask can be constructed from the
4244   // string.
4245   if (Subtarget->isMClass()) {
4246     int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget);
4247     if (SYSmValue == -1)
4248       return false;
4249
4250     SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
4251                       getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4252                       N->getOperand(0) };
4253     ReplaceNode(
4254         N, CurDAG->getMachineNode(ARM::t2MRS_M, DL, MVT::i32, MVT::Other, Ops));
4255     return true;
4256   }
4257
4258   // Here we know the target is not M Class so we need to check if it is one
4259   // of the remaining possible values which are apsr, cpsr or spsr.
4260   if (SpecialReg == "apsr" || SpecialReg == "cpsr") {
4261     Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4262             N->getOperand(0) };
4263     ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRS_AR : ARM::MRS,
4264                                           DL, MVT::i32, MVT::Other, Ops));
4265     return true;
4266   }
4267
4268   if (SpecialReg == "spsr") {
4269     Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4270             N->getOperand(0) };
4271     ReplaceNode(
4272         N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys, DL,
4273                                   MVT::i32, MVT::Other, Ops));
4274     return true;
4275   }
4276
4277   return false;
4278 }
4279
4280 // Lower the write_register intrinsic to ARM specific DAG nodes
4281 // using the supplied metadata string to select the instruction node to use
4282 // and the registers/masks to use in the nodes
4283 bool ARMDAGToDAGISel::tryWriteRegister(SDNode *N){
4284   const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
4285   const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
4286   bool IsThumb2 = Subtarget->isThumb2();
4287   SDLoc DL(N);
4288
4289   std::vector<SDValue> Ops;
4290   getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
4291
4292   if (!Ops.empty()) {
4293     // If the special register string was constructed of fields (as defined
4294     // in the ACLE) then need to lower to MCR node (32 bit) or
4295     // MCRR node(64 bit), we can make the distinction based on the number of
4296     // operands we have.
4297     unsigned Opcode;
4298     if (Ops.size() == 5) {
4299       Opcode = IsThumb2 ? ARM::t2MCR : ARM::MCR;
4300       Ops.insert(Ops.begin()+2, N->getOperand(2));
4301     } else {
4302       assert(Ops.size() == 3 &&
4303               "Invalid number of fields in special register string.");
4304       Opcode = IsThumb2 ? ARM::t2MCRR : ARM::MCRR;
4305       SDValue WriteValue[] = { N->getOperand(2), N->getOperand(3) };
4306       Ops.insert(Ops.begin()+2, WriteValue, WriteValue+2);
4307     }
4308
4309     Ops.push_back(getAL(CurDAG, DL));
4310     Ops.push_back(CurDAG->getRegister(0, MVT::i32));
4311     Ops.push_back(N->getOperand(0));
4312
4313     ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
4314     return true;
4315   }
4316
4317   std::string SpecialReg = RegString->getString().lower();
4318   int BankedReg = getBankedRegisterMask(SpecialReg);
4319   if (BankedReg != -1) {
4320     Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), N->getOperand(2),
4321             getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4322             N->getOperand(0) };
4323     ReplaceNode(
4324         N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked,
4325                                   DL, MVT::Other, Ops));
4326     return true;
4327   }
4328
4329   // The VFP registers are written to by creating SelectionDAG nodes with
4330   // opcodes corresponding to the register that is being written. So we switch
4331   // on the string to find which opcode we need to use.
4332   unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
4333                     .Case("fpscr", ARM::VMSR)
4334                     .Case("fpexc", ARM::VMSR_FPEXC)
4335                     .Case("fpsid", ARM::VMSR_FPSID)
4336                     .Case("fpinst", ARM::VMSR_FPINST)
4337                     .Case("fpinst2", ARM::VMSR_FPINST2)
4338                     .Default(0);
4339
4340   if (Opcode) {
4341     if (!Subtarget->hasVFP2Base())
4342       return false;
4343     Ops = { N->getOperand(2), getAL(CurDAG, DL),
4344             CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
4345     ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
4346     return true;
4347   }
4348
4349   std::pair<StringRef, StringRef> Fields;
4350   Fields = StringRef(SpecialReg).rsplit('_');
4351   std::string Reg = Fields.first.str();
4352   StringRef Flags = Fields.second;
4353
4354   // If the target was M Class then need to validate the special register value
4355   // and retrieve the mask for use in the instruction node.
4356   if (Subtarget->isMClass()) {
4357     int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget);
4358     if (SYSmValue == -1)
4359       return false;
4360
4361     SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
4362                       N->getOperand(2), getAL(CurDAG, DL),
4363                       CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
4364     ReplaceNode(N, CurDAG->getMachineNode(ARM::t2MSR_M, DL, MVT::Other, Ops));
4365     return true;
4366   }
4367
4368   // We then check to see if a valid mask can be constructed for one of the
4369   // register string values permitted for the A and R class cores. These values
4370   // are apsr, spsr and cpsr; these are also valid on older cores.
4371   int Mask = getARClassRegisterMask(Reg, Flags);
4372   if (Mask != -1) {
4373     Ops = { CurDAG->getTargetConstant(Mask, DL, MVT::i32), N->getOperand(2),
4374             getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4375             N->getOperand(0) };
4376     ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSR_AR : ARM::MSR,
4377                                           DL, MVT::Other, Ops));
4378     return true;
4379   }
4380
4381   return false;
4382 }
4383
4384 bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){
4385   std::vector<SDValue> AsmNodeOperands;
4386   unsigned Flag, Kind;
4387   bool Changed = false;
4388   unsigned NumOps = N->getNumOperands();
4389
4390   // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint.
4391   // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require
4392   // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs
4393   // respectively. Since there is no constraint to explicitly specify a
4394   // reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb,
4395   // the 64-bit data may be referred by H, Q, R modifiers, so we still pack
4396   // them into a GPRPair.
4397
4398   SDLoc dl(N);
4399   SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps-1)
4400                                    : SDValue(nullptr,0);
4401
4402   SmallVector<bool, 8> OpChanged;
4403   // Glue node will be appended late.
4404   for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) {
4405     SDValue op = N->getOperand(i);
4406     AsmNodeOperands.push_back(op);
4407
4408     if (i < InlineAsm::Op_FirstOperand)
4409       continue;
4410
4411     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(i))) {
4412       Flag = C->getZExtValue();
4413       Kind = InlineAsm::getKind(Flag);
4414     }
4415     else
4416       continue;
4417
4418     // Immediate operands to inline asm in the SelectionDAG are modeled with
4419     // two operands. The first is a constant of value InlineAsm::Kind_Imm, and
4420     // the second is a constant with the value of the immediate. If we get here
4421     // and we have a Kind_Imm, skip the next operand, and continue.
4422     if (Kind == InlineAsm::Kind_Imm) {
4423       SDValue op = N->getOperand(++i);
4424       AsmNodeOperands.push_back(op);
4425       continue;
4426     }
4427
4428     unsigned NumRegs = InlineAsm::getNumOperandRegisters(Flag);
4429     if (NumRegs)
4430       OpChanged.push_back(false);
4431
4432     unsigned DefIdx = 0;
4433     bool IsTiedToChangedOp = false;
4434     // If it's a use that is tied with a previous def, it has no
4435     // reg class constraint.
4436     if (Changed && InlineAsm::isUseOperandTiedToDef(Flag, DefIdx))
4437       IsTiedToChangedOp = OpChanged[DefIdx];
4438
4439     // Memory operands to inline asm in the SelectionDAG are modeled with two
4440     // operands: a constant of value InlineAsm::Kind_Mem followed by the input
4441     // operand. If we get here and we have a Kind_Mem, skip the next operand (so
4442     // it doesn't get misinterpreted), and continue. We do this here because
4443     // it's important to update the OpChanged array correctly before moving on.
4444     if (Kind == InlineAsm::Kind_Mem) {
4445       SDValue op = N->getOperand(++i);
4446       AsmNodeOperands.push_back(op);
4447       continue;
4448     }
4449
4450     if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef
4451         && Kind != InlineAsm::Kind_RegDefEarlyClobber)
4452       continue;
4453
4454     unsigned RC;
4455     bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC);
4456     if ((!IsTiedToChangedOp && (!HasRC || RC != ARM::GPRRegClassID))
4457         || NumRegs != 2)
4458       continue;
4459
4460     assert((i+2 < NumOps) && "Invalid number of operands in inline asm");
4461     SDValue V0 = N->getOperand(i+1);
4462     SDValue V1 = N->getOperand(i+2);
4463     unsigned Reg0 = cast<RegisterSDNode>(V0)->getReg();
4464     unsigned Reg1 = cast<RegisterSDNode>(V1)->getReg();
4465     SDValue PairedReg;
4466     MachineRegisterInfo &MRI = MF->getRegInfo();
4467
4468     if (Kind == InlineAsm::Kind_RegDef ||
4469         Kind == InlineAsm::Kind_RegDefEarlyClobber) {
4470       // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to
4471       // the original GPRs.
4472
4473       Register GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
4474       PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
4475       SDValue Chain = SDValue(N,0);
4476
4477       SDNode *GU = N->getGluedUser();
4478       SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped,
4479                                                Chain.getValue(1));
4480
4481       // Extract values from a GPRPair reg and copy to the original GPR reg.
4482       SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32,
4483                                                     RegCopy);
4484       SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,
4485                                                     RegCopy);
4486       SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0,
4487                                         RegCopy.getValue(1));
4488       SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1));
4489
4490       // Update the original glue user.
4491       std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1);
4492       Ops.push_back(T1.getValue(1));
4493       CurDAG->UpdateNodeOperands(GU, Ops);
4494     }
4495     else {
4496       // For Kind  == InlineAsm::Kind_RegUse, we first copy two GPRs into a
4497       // GPRPair and then pass the GPRPair to the inline asm.
4498       SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain];
4499
4500       // As REG_SEQ doesn't take RegisterSDNode, we copy them first.
4501       SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32,
4502                                           Chain.getValue(1));
4503       SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32,
4504                                           T0.getValue(1));
4505       SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), 0);
4506
4507       // Copy REG_SEQ into a GPRPair-typed VR and replace the original two
4508       // i32 VRs of inline asm with it.
4509       Register GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
4510       PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
4511       Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1));
4512
4513       AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
4514       Glue = Chain.getValue(1);
4515     }
4516
4517     Changed = true;
4518
4519     if(PairedReg.getNode()) {
4520       OpChanged[OpChanged.size() -1 ] = true;
4521       Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/);
4522       if (IsTiedToChangedOp)
4523         Flag = InlineAsm::getFlagWordForMatchingOp(Flag, DefIdx);
4524       else
4525         Flag = InlineAsm::getFlagWordForRegClass(Flag, ARM::GPRPairRegClassID);
4526       // Replace the current flag.
4527       AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant(
4528           Flag, dl, MVT::i32);
4529       // Add the new register node and skip the original two GPRs.
4530       AsmNodeOperands.push_back(PairedReg);
4531       // Skip the next two GPRs.
4532       i += 2;
4533     }
4534   }
4535
4536   if (Glue.getNode())
4537     AsmNodeOperands.push_back(Glue);
4538   if (!Changed)
4539     return false;
4540
4541   SDValue New = CurDAG->getNode(N->getOpcode(), SDLoc(N),
4542       CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands);
4543   New->setNodeId(-1);
4544   ReplaceNode(N, New.getNode());
4545   return true;
4546 }
4547
4548
4549 bool ARMDAGToDAGISel::
4550 SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
4551                              std::vector<SDValue> &OutOps) {
4552   switch(ConstraintID) {
4553   default:
4554     llvm_unreachable("Unexpected asm memory constraint");
4555   case InlineAsm::Constraint_i:
4556     // FIXME: It seems strange that 'i' is needed here since it's supposed to
4557     //        be an immediate and not a memory constraint.
4558     LLVM_FALLTHROUGH;
4559   case InlineAsm::Constraint_m:
4560   case InlineAsm::Constraint_o:
4561   case InlineAsm::Constraint_Q:
4562   case InlineAsm::Constraint_Um:
4563   case InlineAsm::Constraint_Un:
4564   case InlineAsm::Constraint_Uq:
4565   case InlineAsm::Constraint_Us:
4566   case InlineAsm::Constraint_Ut:
4567   case InlineAsm::Constraint_Uv:
4568   case InlineAsm::Constraint_Uy:
4569     // Require the address to be in a register.  That is safe for all ARM
4570     // variants and it is hard to do anything much smarter without knowing
4571     // how the operand is used.
4572     OutOps.push_back(Op);
4573     return false;
4574   }
4575   return true;
4576 }
4577
4578 /// createARMISelDag - This pass converts a legalized DAG into a
4579 /// ARM-specific DAG, ready for instruction scheduling.
4580 ///
4581 FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM,
4582                                      CodeGenOpt::Level OptLevel) {
4583   return new ARMDAGToDAGISel(TM, OptLevel);
4584 }