[ARM] Lower sadd_sat to qadd8 and qadd16
[llvm-complete.git] / lib / Target / ARM / ARMISelDAGToDAG.cpp
blob8f6515c423eb4283cc488918b34c8e9247201b83
1 //===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines an instruction selector for the ARM target.
11 //===----------------------------------------------------------------------===//
13 #include "ARM.h"
14 #include "ARMBaseInstrInfo.h"
15 #include "ARMTargetMachine.h"
16 #include "MCTargetDesc/ARMAddressingModes.h"
17 #include "Utils/ARMBaseInfo.h"
18 #include "llvm/ADT/StringSwitch.h"
19 #include "llvm/CodeGen/MachineFrameInfo.h"
20 #include "llvm/CodeGen/MachineFunction.h"
21 #include "llvm/CodeGen/MachineInstrBuilder.h"
22 #include "llvm/CodeGen/MachineRegisterInfo.h"
23 #include "llvm/CodeGen/SelectionDAG.h"
24 #include "llvm/CodeGen/SelectionDAGISel.h"
25 #include "llvm/CodeGen/TargetLowering.h"
26 #include "llvm/IR/CallingConv.h"
27 #include "llvm/IR/Constants.h"
28 #include "llvm/IR/DerivedTypes.h"
29 #include "llvm/IR/Function.h"
30 #include "llvm/IR/Intrinsics.h"
31 #include "llvm/IR/LLVMContext.h"
32 #include "llvm/Support/CommandLine.h"
33 #include "llvm/Support/Debug.h"
34 #include "llvm/Support/ErrorHandling.h"
35 #include "llvm/Target/TargetOptions.h"
37 using namespace llvm;
39 #define DEBUG_TYPE "arm-isel"
41 static cl::opt<bool>
42 DisableShifterOp("disable-shifter-op", cl::Hidden,
43 cl::desc("Disable isel of shifter-op"),
44 cl::init(false));
46 //===--------------------------------------------------------------------===//
47 /// ARMDAGToDAGISel - ARM specific code to select ARM machine
48 /// instructions for SelectionDAG operations.
49 ///
50 namespace {
52 class ARMDAGToDAGISel : public SelectionDAGISel {
53 /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
54 /// make the right decision when generating code for different targets.
55 const ARMSubtarget *Subtarget;
57 public:
58 explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOpt::Level OptLevel)
59 : SelectionDAGISel(tm, OptLevel) {}
61 bool runOnMachineFunction(MachineFunction &MF) override {
62 // Reset the subtarget each time through.
63 Subtarget = &MF.getSubtarget<ARMSubtarget>();
64 SelectionDAGISel::runOnMachineFunction(MF);
65 return true;
68 StringRef getPassName() const override { return "ARM Instruction Selection"; }
70 void PreprocessISelDAG() override;
72 /// getI32Imm - Return a target constant of type i32 with the specified
73 /// value.
74 inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
75 return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
78 void Select(SDNode *N) override;
80 bool hasNoVMLxHazardUse(SDNode *N) const;
81 bool isShifterOpProfitable(const SDValue &Shift,
82 ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt);
83 bool SelectRegShifterOperand(SDValue N, SDValue &A,
84 SDValue &B, SDValue &C,
85 bool CheckProfitability = true);
86 bool SelectImmShifterOperand(SDValue N, SDValue &A,
87 SDValue &B, bool CheckProfitability = true);
88 bool SelectShiftRegShifterOperand(SDValue N, SDValue &A,
89 SDValue &B, SDValue &C) {
90 // Don't apply the profitability check
91 return SelectRegShifterOperand(N, A, B, C, false);
93 bool SelectShiftImmShifterOperand(SDValue N, SDValue &A,
94 SDValue &B) {
95 // Don't apply the profitability check
96 return SelectImmShifterOperand(N, A, B, false);
99 bool SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out);
101 bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
102 bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);
104 bool SelectCMOVPred(SDValue N, SDValue &Pred, SDValue &Reg) {
105 const ConstantSDNode *CN = cast<ConstantSDNode>(N);
106 Pred = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(N), MVT::i32);
107 Reg = CurDAG->getRegister(ARM::CPSR, MVT::i32);
108 return true;
111 bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
112 SDValue &Offset, SDValue &Opc);
113 bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
114 SDValue &Offset, SDValue &Opc);
115 bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
116 SDValue &Offset, SDValue &Opc);
117 bool SelectAddrOffsetNone(SDValue N, SDValue &Base);
118 bool SelectAddrMode3(SDValue N, SDValue &Base,
119 SDValue &Offset, SDValue &Opc);
120 bool SelectAddrMode3Offset(SDNode *Op, SDValue N,
121 SDValue &Offset, SDValue &Opc);
122 bool IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset, bool FP16);
123 bool SelectAddrMode5(SDValue N, SDValue &Base, SDValue &Offset);
124 bool SelectAddrMode5FP16(SDValue N, SDValue &Base, SDValue &Offset);
125 bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align);
126 bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset);
128 bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label);
130 // Thumb Addressing Modes:
131 bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset);
132 bool SelectThumbAddrModeRRSext(SDValue N, SDValue &Base, SDValue &Offset);
133 bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base,
134 SDValue &OffImm);
135 bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
136 SDValue &OffImm);
137 bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
138 SDValue &OffImm);
139 bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
140 SDValue &OffImm);
141 bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm);
142 template <unsigned Shift>
143 bool SelectTAddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm);
145 // Thumb 2 Addressing Modes:
146 bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
147 bool SelectT2AddrModeImm8(SDValue N, SDValue &Base,
148 SDValue &OffImm);
149 bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
150 SDValue &OffImm);
151 template <unsigned Shift>
152 bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm);
153 bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm,
154 unsigned Shift);
155 template <unsigned Shift>
156 bool SelectT2AddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm);
157 bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base,
158 SDValue &OffReg, SDValue &ShImm);
159 bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm);
161 inline bool is_so_imm(unsigned Imm) const {
162 return ARM_AM::getSOImmVal(Imm) != -1;
165 inline bool is_so_imm_not(unsigned Imm) const {
166 return ARM_AM::getSOImmVal(~Imm) != -1;
169 inline bool is_t2_so_imm(unsigned Imm) const {
170 return ARM_AM::getT2SOImmVal(Imm) != -1;
173 inline bool is_t2_so_imm_not(unsigned Imm) const {
174 return ARM_AM::getT2SOImmVal(~Imm) != -1;
177 // Include the pieces autogenerated from the target description.
178 #include "ARMGenDAGISel.inc"
180 private:
181 void transferMemOperands(SDNode *Src, SDNode *Dst);
183 /// Indexed (pre/post inc/dec) load matching code for ARM.
184 bool tryARMIndexedLoad(SDNode *N);
185 bool tryT1IndexedLoad(SDNode *N);
186 bool tryT2IndexedLoad(SDNode *N);
187 bool tryMVEIndexedLoad(SDNode *N);
189 /// SelectVLD - Select NEON load intrinsics. NumVecs should be
190 /// 1, 2, 3 or 4. The opcode arrays specify the instructions used for
191 /// loads of D registers and even subregs and odd subregs of Q registers.
192 /// For NumVecs <= 2, QOpcodes1 is not used.
193 void SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
194 const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
195 const uint16_t *QOpcodes1);
197 /// SelectVST - Select NEON store intrinsics. NumVecs should
198 /// be 1, 2, 3 or 4. The opcode arrays specify the instructions used for
199 /// stores of D registers and even subregs and odd subregs of Q registers.
200 /// For NumVecs <= 2, QOpcodes1 is not used.
201 void SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
202 const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
203 const uint16_t *QOpcodes1);
205 /// SelectVLDSTLane - Select NEON load/store lane intrinsics. NumVecs should
206 /// be 2, 3 or 4. The opcode arrays specify the instructions used for
207 /// load/store of D registers and Q registers.
208 void SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
209 unsigned NumVecs, const uint16_t *DOpcodes,
210 const uint16_t *QOpcodes);
212 /// SelectVLDDup - Select NEON load-duplicate intrinsics. NumVecs
213 /// should be 1, 2, 3 or 4. The opcode array specifies the instructions used
214 /// for loading D registers.
215 void SelectVLDDup(SDNode *N, bool IsIntrinsic, bool isUpdating,
216 unsigned NumVecs, const uint16_t *DOpcodes,
217 const uint16_t *QOpcodes0 = nullptr,
218 const uint16_t *QOpcodes1 = nullptr);
220 /// Try to select SBFX/UBFX instructions for ARM.
221 bool tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned);
223 // Select special operations if node forms integer ABS pattern
224 bool tryABSOp(SDNode *N);
226 bool tryReadRegister(SDNode *N);
227 bool tryWriteRegister(SDNode *N);
229 bool tryInlineAsm(SDNode *N);
231 void SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI);
233 void SelectCMP_SWAP(SDNode *N);
235 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
236 /// inline asm expressions.
237 bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
238 std::vector<SDValue> &OutOps) override;
240 // Form pairs of consecutive R, S, D, or Q registers.
241 SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1);
242 SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1);
243 SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1);
244 SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1);
246 // Form sequences of 4 consecutive S, D, or Q registers.
247 SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
248 SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
249 SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
251 // Get the alignment operand for a NEON VLD or VST instruction.
252 SDValue GetVLDSTAlign(SDValue Align, const SDLoc &dl, unsigned NumVecs,
253 bool is64BitVector);
255 /// Checks if N is a multiplication by a constant where we can extract out a
256 /// power of two from the constant so that it can be used in a shift, but only
257 /// if it simplifies the materialization of the constant. Returns true if it
258 /// is, and assigns to PowerOfTwo the power of two that should be extracted
259 /// out and to NewMulConst the new constant to be multiplied by.
260 bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift,
261 unsigned &PowerOfTwo, SDValue &NewMulConst) const;
263 /// Replace N with M in CurDAG, in a way that also ensures that M gets
264 /// selected when N would have been selected.
265 void replaceDAGValue(const SDValue &N, SDValue M);
269 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant
270 /// operand. If so Imm will receive the 32-bit value.
271 static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
272 if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
273 Imm = cast<ConstantSDNode>(N)->getZExtValue();
274 return true;
276 return false;
279 // isInt32Immediate - This method tests to see if a constant operand.
280 // If so Imm will receive the 32 bit value.
281 static bool isInt32Immediate(SDValue N, unsigned &Imm) {
282 return isInt32Immediate(N.getNode(), Imm);
285 // isOpcWithIntImmediate - This method tests to see if the node is a specific
286 // opcode and that it has a immediate integer right operand.
287 // If so Imm will receive the 32 bit value.
288 static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
289 return N->getOpcode() == Opc &&
290 isInt32Immediate(N->getOperand(1).getNode(), Imm);
293 /// Check whether a particular node is a constant value representable as
294 /// (N * Scale) where (N in [\p RangeMin, \p RangeMax).
296 /// \param ScaledConstant [out] - On success, the pre-scaled constant value.
297 static bool isScaledConstantInRange(SDValue Node, int Scale,
298 int RangeMin, int RangeMax,
299 int &ScaledConstant) {
300 assert(Scale > 0 && "Invalid scale!");
302 // Check that this is a constant.
303 const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node);
304 if (!C)
305 return false;
307 ScaledConstant = (int) C->getZExtValue();
308 if ((ScaledConstant % Scale) != 0)
309 return false;
311 ScaledConstant /= Scale;
312 return ScaledConstant >= RangeMin && ScaledConstant < RangeMax;
315 void ARMDAGToDAGISel::PreprocessISelDAG() {
316 if (!Subtarget->hasV6T2Ops())
317 return;
319 bool isThumb2 = Subtarget->isThumb();
320 for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
321 E = CurDAG->allnodes_end(); I != E; ) {
322 SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues.
324 if (N->getOpcode() != ISD::ADD)
325 continue;
327 // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with
328 // leading zeros, followed by consecutive set bits, followed by 1 or 2
329 // trailing zeros, e.g. 1020.
330 // Transform the expression to
331 // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number
332 // of trailing zeros of c2. The left shift would be folded as an shifter
333 // operand of 'add' and the 'and' and 'srl' would become a bits extraction
334 // node (UBFX).
336 SDValue N0 = N->getOperand(0);
337 SDValue N1 = N->getOperand(1);
338 unsigned And_imm = 0;
339 if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) {
340 if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm))
341 std::swap(N0, N1);
343 if (!And_imm)
344 continue;
346 // Check if the AND mask is an immediate of the form: 000.....1111111100
347 unsigned TZ = countTrailingZeros(And_imm);
348 if (TZ != 1 && TZ != 2)
349 // Be conservative here. Shifter operands aren't always free. e.g. On
350 // Swift, left shifter operand of 1 / 2 for free but others are not.
351 // e.g.
352 // ubfx r3, r1, #16, #8
353 // ldr.w r3, [r0, r3, lsl #2]
354 // vs.
355 // mov.w r9, #1020
356 // and.w r2, r9, r1, lsr #14
357 // ldr r2, [r0, r2]
358 continue;
359 And_imm >>= TZ;
360 if (And_imm & (And_imm + 1))
361 continue;
363 // Look for (and (srl X, c1), c2).
364 SDValue Srl = N1.getOperand(0);
365 unsigned Srl_imm = 0;
366 if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) ||
367 (Srl_imm <= 2))
368 continue;
370 // Make sure first operand is not a shifter operand which would prevent
371 // folding of the left shift.
372 SDValue CPTmp0;
373 SDValue CPTmp1;
374 SDValue CPTmp2;
375 if (isThumb2) {
376 if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1))
377 continue;
378 } else {
379 if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) ||
380 SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2))
381 continue;
384 // Now make the transformation.
385 Srl = CurDAG->getNode(ISD::SRL, SDLoc(Srl), MVT::i32,
386 Srl.getOperand(0),
387 CurDAG->getConstant(Srl_imm + TZ, SDLoc(Srl),
388 MVT::i32));
389 N1 = CurDAG->getNode(ISD::AND, SDLoc(N1), MVT::i32,
390 Srl,
391 CurDAG->getConstant(And_imm, SDLoc(Srl), MVT::i32));
392 N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32,
393 N1, CurDAG->getConstant(TZ, SDLoc(Srl), MVT::i32));
394 CurDAG->UpdateNodeOperands(N, N0, N1);
398 /// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS
399 /// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at
400 /// least on current ARM implementations) which should be avoidded.
401 bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const {
402 if (OptLevel == CodeGenOpt::None)
403 return true;
405 if (!Subtarget->hasVMLxHazards())
406 return true;
408 if (!N->hasOneUse())
409 return false;
411 SDNode *Use = *N->use_begin();
412 if (Use->getOpcode() == ISD::CopyToReg)
413 return true;
414 if (Use->isMachineOpcode()) {
415 const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>(
416 CurDAG->getSubtarget().getInstrInfo());
418 const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode());
419 if (MCID.mayStore())
420 return true;
421 unsigned Opcode = MCID.getOpcode();
422 if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
423 return true;
424 // vmlx feeding into another vmlx. We actually want to unfold
425 // the use later in the MLxExpansion pass. e.g.
426 // vmla
427 // vmla (stall 8 cycles)
429 // vmul (5 cycles)
430 // vadd (5 cycles)
431 // vmla
432 // This adds up to about 18 - 19 cycles.
434 // vmla
435 // vmul (stall 4 cycles)
436 // vadd adds up to about 14 cycles.
437 return TII->isFpMLxInstruction(Opcode);
440 return false;
443 bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
444 ARM_AM::ShiftOpc ShOpcVal,
445 unsigned ShAmt) {
446 if (!Subtarget->isLikeA9() && !Subtarget->isSwift())
447 return true;
448 if (Shift.hasOneUse())
449 return true;
450 // R << 2 is free.
451 return ShOpcVal == ARM_AM::lsl &&
452 (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1));
455 bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N,
456 unsigned MaxShift,
457 unsigned &PowerOfTwo,
458 SDValue &NewMulConst) const {
459 assert(N.getOpcode() == ISD::MUL);
460 assert(MaxShift > 0);
462 // If the multiply is used in more than one place then changing the constant
463 // will make other uses incorrect, so don't.
464 if (!N.hasOneUse()) return false;
465 // Check if the multiply is by a constant
466 ConstantSDNode *MulConst = dyn_cast<ConstantSDNode>(N.getOperand(1));
467 if (!MulConst) return false;
468 // If the constant is used in more than one place then modifying it will mean
469 // we need to materialize two constants instead of one, which is a bad idea.
470 if (!MulConst->hasOneUse()) return false;
471 unsigned MulConstVal = MulConst->getZExtValue();
472 if (MulConstVal == 0) return false;
474 // Find the largest power of 2 that MulConstVal is a multiple of
475 PowerOfTwo = MaxShift;
476 while ((MulConstVal % (1 << PowerOfTwo)) != 0) {
477 --PowerOfTwo;
478 if (PowerOfTwo == 0) return false;
481 // Only optimise if the new cost is better
482 unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo);
483 NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc(N), MVT::i32);
484 unsigned OldCost = ConstantMaterializationCost(MulConstVal, Subtarget);
485 unsigned NewCost = ConstantMaterializationCost(NewMulConstVal, Subtarget);
486 return NewCost < OldCost;
489 void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) {
490 CurDAG->RepositionNode(N.getNode()->getIterator(), M.getNode());
491 ReplaceUses(N, M);
494 bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
495 SDValue &BaseReg,
496 SDValue &Opc,
497 bool CheckProfitability) {
498 if (DisableShifterOp)
499 return false;
501 // If N is a multiply-by-constant and it's profitable to extract a shift and
502 // use it in a shifted operand do so.
503 if (N.getOpcode() == ISD::MUL) {
504 unsigned PowerOfTwo = 0;
505 SDValue NewMulConst;
506 if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) {
507 HandleSDNode Handle(N);
508 SDLoc Loc(N);
509 replaceDAGValue(N.getOperand(1), NewMulConst);
510 BaseReg = Handle.getValue();
511 Opc = CurDAG->getTargetConstant(
512 ARM_AM::getSORegOpc(ARM_AM::lsl, PowerOfTwo), Loc, MVT::i32);
513 return true;
517 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
519 // Don't match base register only case. That is matched to a separate
520 // lower complexity pattern with explicit register operand.
521 if (ShOpcVal == ARM_AM::no_shift) return false;
523 BaseReg = N.getOperand(0);
524 unsigned ShImmVal = 0;
525 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
526 if (!RHS) return false;
527 ShImmVal = RHS->getZExtValue() & 31;
528 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
529 SDLoc(N), MVT::i32);
530 return true;
533 bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N,
534 SDValue &BaseReg,
535 SDValue &ShReg,
536 SDValue &Opc,
537 bool CheckProfitability) {
538 if (DisableShifterOp)
539 return false;
541 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
543 // Don't match base register only case. That is matched to a separate
544 // lower complexity pattern with explicit register operand.
545 if (ShOpcVal == ARM_AM::no_shift) return false;
547 BaseReg = N.getOperand(0);
548 unsigned ShImmVal = 0;
549 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
550 if (RHS) return false;
552 ShReg = N.getOperand(1);
553 if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal))
554 return false;
555 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
556 SDLoc(N), MVT::i32);
557 return true;
560 // Determine whether an ISD::OR's operands are suitable to turn the operation
561 // into an addition, which often has more compact encodings.
562 bool ARMDAGToDAGISel::SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out) {
563 assert(Parent->getOpcode() == ISD::OR && "unexpected parent");
564 Out = N;
565 return CurDAG->haveNoCommonBitsSet(N, Parent->getOperand(1));
569 bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
570 SDValue &Base,
571 SDValue &OffImm) {
572 // Match simple R + imm12 operands.
574 // Base only.
575 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
576 !CurDAG->isBaseWithConstantOffset(N)) {
577 if (N.getOpcode() == ISD::FrameIndex) {
578 // Match frame index.
579 int FI = cast<FrameIndexSDNode>(N)->getIndex();
580 Base = CurDAG->getTargetFrameIndex(
581 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
582 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
583 return true;
586 if (N.getOpcode() == ARMISD::Wrapper &&
587 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
588 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
589 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
590 Base = N.getOperand(0);
591 } else
592 Base = N;
593 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
594 return true;
597 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
598 int RHSC = (int)RHS->getSExtValue();
599 if (N.getOpcode() == ISD::SUB)
600 RHSC = -RHSC;
602 if (RHSC > -0x1000 && RHSC < 0x1000) { // 12 bits
603 Base = N.getOperand(0);
604 if (Base.getOpcode() == ISD::FrameIndex) {
605 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
606 Base = CurDAG->getTargetFrameIndex(
607 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
609 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
610 return true;
614 // Base only.
615 Base = N;
616 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
617 return true;
622 bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
623 SDValue &Opc) {
624 if (N.getOpcode() == ISD::MUL &&
625 ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) {
626 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
627 // X * [3,5,9] -> X + X * [2,4,8] etc.
628 int RHSC = (int)RHS->getZExtValue();
629 if (RHSC & 1) {
630 RHSC = RHSC & ~1;
631 ARM_AM::AddrOpc AddSub = ARM_AM::add;
632 if (RHSC < 0) {
633 AddSub = ARM_AM::sub;
634 RHSC = - RHSC;
636 if (isPowerOf2_32(RHSC)) {
637 unsigned ShAmt = Log2_32(RHSC);
638 Base = Offset = N.getOperand(0);
639 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
640 ARM_AM::lsl),
641 SDLoc(N), MVT::i32);
642 return true;
648 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
649 // ISD::OR that is equivalent to an ISD::ADD.
650 !CurDAG->isBaseWithConstantOffset(N))
651 return false;
653 // Leave simple R +/- imm12 operands for LDRi12
654 if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
655 int RHSC;
656 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
657 -0x1000+1, 0x1000, RHSC)) // 12 bits.
658 return false;
661 // Otherwise this is R +/- [possibly shifted] R.
662 ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add;
663 ARM_AM::ShiftOpc ShOpcVal =
664 ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
665 unsigned ShAmt = 0;
667 Base = N.getOperand(0);
668 Offset = N.getOperand(1);
670 if (ShOpcVal != ARM_AM::no_shift) {
671 // Check to see if the RHS of the shift is a constant, if not, we can't fold
672 // it.
673 if (ConstantSDNode *Sh =
674 dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
675 ShAmt = Sh->getZExtValue();
676 if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
677 Offset = N.getOperand(1).getOperand(0);
678 else {
679 ShAmt = 0;
680 ShOpcVal = ARM_AM::no_shift;
682 } else {
683 ShOpcVal = ARM_AM::no_shift;
687 // Try matching (R shl C) + (R).
688 if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
689 !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
690 N.getOperand(0).hasOneUse())) {
691 ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
692 if (ShOpcVal != ARM_AM::no_shift) {
693 // Check to see if the RHS of the shift is a constant, if not, we can't
694 // fold it.
695 if (ConstantSDNode *Sh =
696 dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
697 ShAmt = Sh->getZExtValue();
698 if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
699 Offset = N.getOperand(0).getOperand(0);
700 Base = N.getOperand(1);
701 } else {
702 ShAmt = 0;
703 ShOpcVal = ARM_AM::no_shift;
705 } else {
706 ShOpcVal = ARM_AM::no_shift;
711 // If Offset is a multiply-by-constant and it's profitable to extract a shift
712 // and use it in a shifted operand do so.
713 if (Offset.getOpcode() == ISD::MUL && N.hasOneUse()) {
714 unsigned PowerOfTwo = 0;
715 SDValue NewMulConst;
716 if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) {
717 HandleSDNode Handle(Offset);
718 replaceDAGValue(Offset.getOperand(1), NewMulConst);
719 Offset = Handle.getValue();
720 ShAmt = PowerOfTwo;
721 ShOpcVal = ARM_AM::lsl;
725 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
726 SDLoc(N), MVT::i32);
727 return true;
730 bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
731 SDValue &Offset, SDValue &Opc) {
732 unsigned Opcode = Op->getOpcode();
733 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
734 ? cast<LoadSDNode>(Op)->getAddressingMode()
735 : cast<StoreSDNode>(Op)->getAddressingMode();
736 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
737 ? ARM_AM::add : ARM_AM::sub;
738 int Val;
739 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val))
740 return false;
742 Offset = N;
743 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
744 unsigned ShAmt = 0;
745 if (ShOpcVal != ARM_AM::no_shift) {
746 // Check to see if the RHS of the shift is a constant, if not, we can't fold
747 // it.
748 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
749 ShAmt = Sh->getZExtValue();
750 if (isShifterOpProfitable(N, ShOpcVal, ShAmt))
751 Offset = N.getOperand(0);
752 else {
753 ShAmt = 0;
754 ShOpcVal = ARM_AM::no_shift;
756 } else {
757 ShOpcVal = ARM_AM::no_shift;
761 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
762 SDLoc(N), MVT::i32);
763 return true;
766 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
767 SDValue &Offset, SDValue &Opc) {
768 unsigned Opcode = Op->getOpcode();
769 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
770 ? cast<LoadSDNode>(Op)->getAddressingMode()
771 : cast<StoreSDNode>(Op)->getAddressingMode();
772 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
773 ? ARM_AM::add : ARM_AM::sub;
774 int Val;
775 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
776 if (AddSub == ARM_AM::sub) Val *= -1;
777 Offset = CurDAG->getRegister(0, MVT::i32);
778 Opc = CurDAG->getTargetConstant(Val, SDLoc(Op), MVT::i32);
779 return true;
782 return false;
786 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
787 SDValue &Offset, SDValue &Opc) {
788 unsigned Opcode = Op->getOpcode();
789 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
790 ? cast<LoadSDNode>(Op)->getAddressingMode()
791 : cast<StoreSDNode>(Op)->getAddressingMode();
792 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
793 ? ARM_AM::add : ARM_AM::sub;
794 int Val;
795 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
796 Offset = CurDAG->getRegister(0, MVT::i32);
797 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val,
798 ARM_AM::no_shift),
799 SDLoc(Op), MVT::i32);
800 return true;
803 return false;
806 bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) {
807 Base = N;
808 return true;
811 bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
812 SDValue &Base, SDValue &Offset,
813 SDValue &Opc) {
814 if (N.getOpcode() == ISD::SUB) {
815 // X - C is canonicalize to X + -C, no need to handle it here.
816 Base = N.getOperand(0);
817 Offset = N.getOperand(1);
818 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0), SDLoc(N),
819 MVT::i32);
820 return true;
823 if (!CurDAG->isBaseWithConstantOffset(N)) {
824 Base = N;
825 if (N.getOpcode() == ISD::FrameIndex) {
826 int FI = cast<FrameIndexSDNode>(N)->getIndex();
827 Base = CurDAG->getTargetFrameIndex(
828 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
830 Offset = CurDAG->getRegister(0, MVT::i32);
831 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
832 MVT::i32);
833 return true;
836 // If the RHS is +/- imm8, fold into addr mode.
837 int RHSC;
838 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
839 -256 + 1, 256, RHSC)) { // 8 bits.
840 Base = N.getOperand(0);
841 if (Base.getOpcode() == ISD::FrameIndex) {
842 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
843 Base = CurDAG->getTargetFrameIndex(
844 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
846 Offset = CurDAG->getRegister(0, MVT::i32);
848 ARM_AM::AddrOpc AddSub = ARM_AM::add;
849 if (RHSC < 0) {
850 AddSub = ARM_AM::sub;
851 RHSC = -RHSC;
853 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC), SDLoc(N),
854 MVT::i32);
855 return true;
858 Base = N.getOperand(0);
859 Offset = N.getOperand(1);
860 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
861 MVT::i32);
862 return true;
865 bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N,
866 SDValue &Offset, SDValue &Opc) {
867 unsigned Opcode = Op->getOpcode();
868 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
869 ? cast<LoadSDNode>(Op)->getAddressingMode()
870 : cast<StoreSDNode>(Op)->getAddressingMode();
871 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
872 ? ARM_AM::add : ARM_AM::sub;
873 int Val;
874 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits.
875 Offset = CurDAG->getRegister(0, MVT::i32);
876 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), SDLoc(Op),
877 MVT::i32);
878 return true;
881 Offset = N;
882 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), SDLoc(Op),
883 MVT::i32);
884 return true;
887 bool ARMDAGToDAGISel::IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset,
888 bool FP16) {
889 if (!CurDAG->isBaseWithConstantOffset(N)) {
890 Base = N;
891 if (N.getOpcode() == ISD::FrameIndex) {
892 int FI = cast<FrameIndexSDNode>(N)->getIndex();
893 Base = CurDAG->getTargetFrameIndex(
894 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
895 } else if (N.getOpcode() == ARMISD::Wrapper &&
896 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
897 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
898 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
899 Base = N.getOperand(0);
901 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
902 SDLoc(N), MVT::i32);
903 return true;
906 // If the RHS is +/- imm8, fold into addr mode.
907 int RHSC;
908 const int Scale = FP16 ? 2 : 4;
910 if (isScaledConstantInRange(N.getOperand(1), Scale, -255, 256, RHSC)) {
911 Base = N.getOperand(0);
912 if (Base.getOpcode() == ISD::FrameIndex) {
913 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
914 Base = CurDAG->getTargetFrameIndex(
915 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
918 ARM_AM::AddrOpc AddSub = ARM_AM::add;
919 if (RHSC < 0) {
920 AddSub = ARM_AM::sub;
921 RHSC = -RHSC;
924 if (FP16)
925 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(AddSub, RHSC),
926 SDLoc(N), MVT::i32);
927 else
928 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC),
929 SDLoc(N), MVT::i32);
931 return true;
934 Base = N;
936 if (FP16)
937 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(ARM_AM::add, 0),
938 SDLoc(N), MVT::i32);
939 else
940 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
941 SDLoc(N), MVT::i32);
943 return true;
946 bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
947 SDValue &Base, SDValue &Offset) {
948 return IsAddressingMode5(N, Base, Offset, /*FP16=*/ false);
951 bool ARMDAGToDAGISel::SelectAddrMode5FP16(SDValue N,
952 SDValue &Base, SDValue &Offset) {
953 return IsAddressingMode5(N, Base, Offset, /*FP16=*/ true);
956 bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
957 SDValue &Align) {
958 Addr = N;
960 unsigned Alignment = 0;
962 MemSDNode *MemN = cast<MemSDNode>(Parent);
964 if (isa<LSBaseSDNode>(MemN) ||
965 ((MemN->getOpcode() == ARMISD::VST1_UPD ||
966 MemN->getOpcode() == ARMISD::VLD1_UPD) &&
967 MemN->getConstantOperandVal(MemN->getNumOperands() - 1) == 1)) {
968 // This case occurs only for VLD1-lane/dup and VST1-lane instructions.
969 // The maximum alignment is equal to the memory size being referenced.
970 unsigned MMOAlign = MemN->getAlignment();
971 unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8;
972 if (MMOAlign >= MemSize && MemSize > 1)
973 Alignment = MemSize;
974 } else {
975 // All other uses of addrmode6 are for intrinsics. For now just record
976 // the raw alignment value; it will be refined later based on the legal
977 // alignment operands for the intrinsic.
978 Alignment = MemN->getAlignment();
981 Align = CurDAG->getTargetConstant(Alignment, SDLoc(N), MVT::i32);
982 return true;
985 bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N,
986 SDValue &Offset) {
987 LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op);
988 ISD::MemIndexedMode AM = LdSt->getAddressingMode();
989 if (AM != ISD::POST_INC)
990 return false;
991 Offset = N;
992 if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) {
993 if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits())
994 Offset = CurDAG->getRegister(0, MVT::i32);
996 return true;
999 bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N,
1000 SDValue &Offset, SDValue &Label) {
1001 if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
1002 Offset = N.getOperand(0);
1003 SDValue N1 = N.getOperand(1);
1004 Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
1005 SDLoc(N), MVT::i32);
1006 return true;
1009 return false;
1013 //===----------------------------------------------------------------------===//
1014 // Thumb Addressing Modes
1015 //===----------------------------------------------------------------------===//
1017 static bool shouldUseZeroOffsetLdSt(SDValue N) {
1018 // Negative numbers are difficult to materialise in thumb1. If we are
1019 // selecting the add of a negative, instead try to select ri with a zero
1020 // offset, so create the add node directly which will become a sub.
1021 if (N.getOpcode() != ISD::ADD)
1022 return false;
1024 // Look for an imm which is not legal for ld/st, but is legal for sub.
1025 if (auto C = dyn_cast<ConstantSDNode>(N.getOperand(1)))
1026 return C->getSExtValue() < 0 && C->getSExtValue() >= -255;
1028 return false;
1031 bool ARMDAGToDAGISel::SelectThumbAddrModeRRSext(SDValue N, SDValue &Base,
1032 SDValue &Offset) {
1033 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) {
1034 ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N);
1035 if (!NC || !NC->isNullValue())
1036 return false;
1038 Base = Offset = N;
1039 return true;
1042 Base = N.getOperand(0);
1043 Offset = N.getOperand(1);
1044 return true;
1047 bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N, SDValue &Base,
1048 SDValue &Offset) {
1049 if (shouldUseZeroOffsetLdSt(N))
1050 return false; // Select ri instead
1051 return SelectThumbAddrModeRRSext(N, Base, Offset);
1054 bool
1055 ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
1056 SDValue &Base, SDValue &OffImm) {
1057 if (shouldUseZeroOffsetLdSt(N)) {
1058 Base = N;
1059 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1060 return true;
1063 if (!CurDAG->isBaseWithConstantOffset(N)) {
1064 if (N.getOpcode() == ISD::ADD) {
1065 return false; // We want to select register offset instead
1066 } else if (N.getOpcode() == ARMISD::Wrapper &&
1067 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1068 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1069 N.getOperand(0).getOpcode() != ISD::TargetConstantPool &&
1070 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1071 Base = N.getOperand(0);
1072 } else {
1073 Base = N;
1076 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1077 return true;
1080 // If the RHS is + imm5 * scale, fold into addr mode.
1081 int RHSC;
1082 if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) {
1083 Base = N.getOperand(0);
1084 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1085 return true;
1088 // Offset is too large, so use register offset instead.
1089 return false;
1092 bool
1093 ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
1094 SDValue &OffImm) {
1095 return SelectThumbAddrModeImm5S(N, 4, Base, OffImm);
1098 bool
1099 ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
1100 SDValue &OffImm) {
1101 return SelectThumbAddrModeImm5S(N, 2, Base, OffImm);
1104 bool
1105 ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
1106 SDValue &OffImm) {
1107 return SelectThumbAddrModeImm5S(N, 1, Base, OffImm);
1110 bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
1111 SDValue &Base, SDValue &OffImm) {
1112 if (N.getOpcode() == ISD::FrameIndex) {
1113 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1114 // Only multiples of 4 are allowed for the offset, so the frame object
1115 // alignment must be at least 4.
1116 MachineFrameInfo &MFI = MF->getFrameInfo();
1117 if (MFI.getObjectAlignment(FI) < 4)
1118 MFI.setObjectAlignment(FI, 4);
1119 Base = CurDAG->getTargetFrameIndex(
1120 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1121 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1122 return true;
1125 if (!CurDAG->isBaseWithConstantOffset(N))
1126 return false;
1128 if (N.getOperand(0).getOpcode() == ISD::FrameIndex) {
1129 // If the RHS is + imm8 * scale, fold into addr mode.
1130 int RHSC;
1131 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) {
1132 Base = N.getOperand(0);
1133 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1134 // Make sure the offset is inside the object, or we might fail to
1135 // allocate an emergency spill slot. (An out-of-range access is UB, but
1136 // it could show up anyway.)
1137 MachineFrameInfo &MFI = MF->getFrameInfo();
1138 if (RHSC * 4 < MFI.getObjectSize(FI)) {
1139 // For LHS+RHS to result in an offset that's a multiple of 4 the object
1140 // indexed by the LHS must be 4-byte aligned.
1141 if (!MFI.isFixedObjectIndex(FI) && MFI.getObjectAlignment(FI) < 4)
1142 MFI.setObjectAlignment(FI, 4);
1143 if (MFI.getObjectAlignment(FI) >= 4) {
1144 Base = CurDAG->getTargetFrameIndex(
1145 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1146 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1147 return true;
1153 return false;
1156 template <unsigned Shift>
1157 bool ARMDAGToDAGISel::SelectTAddrModeImm7(SDValue N, SDValue &Base,
1158 SDValue &OffImm) {
1159 if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) {
1160 int RHSC;
1161 if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -0x7f, 0x80,
1162 RHSC)) {
1163 Base = N.getOperand(0);
1164 if (N.getOpcode() == ISD::SUB)
1165 RHSC = -RHSC;
1166 OffImm =
1167 CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32);
1168 return true;
1172 // Base only.
1173 Base = N;
1174 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1175 return true;
1179 //===----------------------------------------------------------------------===//
1180 // Thumb 2 Addressing Modes
1181 //===----------------------------------------------------------------------===//
1184 bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
1185 SDValue &Base, SDValue &OffImm) {
1186 // Match simple R + imm12 operands.
1188 // Base only.
1189 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1190 !CurDAG->isBaseWithConstantOffset(N)) {
1191 if (N.getOpcode() == ISD::FrameIndex) {
1192 // Match frame index.
1193 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1194 Base = CurDAG->getTargetFrameIndex(
1195 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1196 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1197 return true;
1200 if (N.getOpcode() == ARMISD::Wrapper &&
1201 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1202 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1203 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1204 Base = N.getOperand(0);
1205 if (Base.getOpcode() == ISD::TargetConstantPool)
1206 return false; // We want to select t2LDRpci instead.
1207 } else
1208 Base = N;
1209 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1210 return true;
1213 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1214 if (SelectT2AddrModeImm8(N, Base, OffImm))
1215 // Let t2LDRi8 handle (R - imm8).
1216 return false;
1218 int RHSC = (int)RHS->getZExtValue();
1219 if (N.getOpcode() == ISD::SUB)
1220 RHSC = -RHSC;
1222 if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned)
1223 Base = N.getOperand(0);
1224 if (Base.getOpcode() == ISD::FrameIndex) {
1225 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1226 Base = CurDAG->getTargetFrameIndex(
1227 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1229 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1230 return true;
1234 // Base only.
1235 Base = N;
1236 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1237 return true;
1240 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
1241 SDValue &Base, SDValue &OffImm) {
1242 // Match simple R - imm8 operands.
1243 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1244 !CurDAG->isBaseWithConstantOffset(N))
1245 return false;
1247 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1248 int RHSC = (int)RHS->getSExtValue();
1249 if (N.getOpcode() == ISD::SUB)
1250 RHSC = -RHSC;
1252 if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative)
1253 Base = N.getOperand(0);
1254 if (Base.getOpcode() == ISD::FrameIndex) {
1255 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1256 Base = CurDAG->getTargetFrameIndex(
1257 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1259 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1260 return true;
1264 return false;
1267 bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
1268 SDValue &OffImm){
1269 unsigned Opcode = Op->getOpcode();
1270 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
1271 ? cast<LoadSDNode>(Op)->getAddressingMode()
1272 : cast<StoreSDNode>(Op)->getAddressingMode();
1273 int RHSC;
1274 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits.
1275 OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
1276 ? CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32)
1277 : CurDAG->getTargetConstant(-RHSC, SDLoc(N), MVT::i32);
1278 return true;
1281 return false;
1284 template <unsigned Shift>
1285 bool ARMDAGToDAGISel::SelectT2AddrModeImm7(SDValue N, SDValue &Base,
1286 SDValue &OffImm) {
1287 if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) {
1288 int RHSC;
1289 if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -0x7f, 0x80,
1290 RHSC)) {
1291 Base = N.getOperand(0);
1292 if (Base.getOpcode() == ISD::FrameIndex) {
1293 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1294 Base = CurDAG->getTargetFrameIndex(
1295 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1298 if (N.getOpcode() == ISD::SUB)
1299 RHSC = -RHSC;
1300 OffImm =
1301 CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32);
1302 return true;
1306 // Base only.
1307 Base = N;
1308 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1309 return true;
1312 template <unsigned Shift>
1313 bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N,
1314 SDValue &OffImm) {
1315 return SelectT2AddrModeImm7Offset(Op, N, OffImm, Shift);
1318 bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N,
1319 SDValue &OffImm,
1320 unsigned Shift) {
1321 unsigned Opcode = Op->getOpcode();
1322 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
1323 ? cast<LoadSDNode>(Op)->getAddressingMode()
1324 : cast<StoreSDNode>(Op)->getAddressingMode();
1325 int RHSC;
1326 if (isScaledConstantInRange(N, 1 << Shift, 0, 0x80, RHSC)) { // 7 bits.
1327 OffImm =
1328 ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
1329 ? CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32)
1330 : CurDAG->getTargetConstant(-RHSC * (1 << Shift), SDLoc(N),
1331 MVT::i32);
1332 return true;
1334 return false;
1337 bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
1338 SDValue &Base,
1339 SDValue &OffReg, SDValue &ShImm) {
1340 // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12.
1341 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N))
1342 return false;
1344 // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8.
1345 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1346 int RHSC = (int)RHS->getZExtValue();
1347 if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned)
1348 return false;
1349 else if (RHSC < 0 && RHSC >= -255) // 8 bits
1350 return false;
1353 // Look for (R + R) or (R + (R << [1,2,3])).
1354 unsigned ShAmt = 0;
1355 Base = N.getOperand(0);
1356 OffReg = N.getOperand(1);
1358 // Swap if it is ((R << c) + R).
1359 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg.getOpcode());
1360 if (ShOpcVal != ARM_AM::lsl) {
1361 ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode());
1362 if (ShOpcVal == ARM_AM::lsl)
1363 std::swap(Base, OffReg);
1366 if (ShOpcVal == ARM_AM::lsl) {
1367 // Check to see if the RHS of the shift is a constant, if not, we can't fold
1368 // it.
1369 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) {
1370 ShAmt = Sh->getZExtValue();
1371 if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt))
1372 OffReg = OffReg.getOperand(0);
1373 else {
1374 ShAmt = 0;
1379 // If OffReg is a multiply-by-constant and it's profitable to extract a shift
1380 // and use it in a shifted operand do so.
1381 if (OffReg.getOpcode() == ISD::MUL && N.hasOneUse()) {
1382 unsigned PowerOfTwo = 0;
1383 SDValue NewMulConst;
1384 if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) {
1385 HandleSDNode Handle(OffReg);
1386 replaceDAGValue(OffReg.getOperand(1), NewMulConst);
1387 OffReg = Handle.getValue();
1388 ShAmt = PowerOfTwo;
1392 ShImm = CurDAG->getTargetConstant(ShAmt, SDLoc(N), MVT::i32);
1394 return true;
1397 bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base,
1398 SDValue &OffImm) {
1399 // This *must* succeed since it's used for the irreplaceable ldrex and strex
1400 // instructions.
1401 Base = N;
1402 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1404 if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N))
1405 return true;
1407 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
1408 if (!RHS)
1409 return true;
1411 uint32_t RHSC = (int)RHS->getZExtValue();
1412 if (RHSC > 1020 || RHSC % 4 != 0)
1413 return true;
1415 Base = N.getOperand(0);
1416 if (Base.getOpcode() == ISD::FrameIndex) {
1417 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1418 Base = CurDAG->getTargetFrameIndex(
1419 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1422 OffImm = CurDAG->getTargetConstant(RHSC/4, SDLoc(N), MVT::i32);
1423 return true;
1426 //===--------------------------------------------------------------------===//
1428 /// getAL - Returns a ARMCC::AL immediate node.
1429 static inline SDValue getAL(SelectionDAG *CurDAG, const SDLoc &dl) {
1430 return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32);
1433 void ARMDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
1434 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
1435 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
1438 bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) {
1439 LoadSDNode *LD = cast<LoadSDNode>(N);
1440 ISD::MemIndexedMode AM = LD->getAddressingMode();
1441 if (AM == ISD::UNINDEXED)
1442 return false;
1444 EVT LoadedVT = LD->getMemoryVT();
1445 SDValue Offset, AMOpc;
1446 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1447 unsigned Opcode = 0;
1448 bool Match = false;
1449 if (LoadedVT == MVT::i32 && isPre &&
1450 SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1451 Opcode = ARM::LDR_PRE_IMM;
1452 Match = true;
1453 } else if (LoadedVT == MVT::i32 && !isPre &&
1454 SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1455 Opcode = ARM::LDR_POST_IMM;
1456 Match = true;
1457 } else if (LoadedVT == MVT::i32 &&
1458 SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1459 Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG;
1460 Match = true;
1462 } else if (LoadedVT == MVT::i16 &&
1463 SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1464 Match = true;
1465 Opcode = (LD->getExtensionType() == ISD::SEXTLOAD)
1466 ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST)
1467 : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST);
1468 } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) {
1469 if (LD->getExtensionType() == ISD::SEXTLOAD) {
1470 if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1471 Match = true;
1472 Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST;
1474 } else {
1475 if (isPre &&
1476 SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1477 Match = true;
1478 Opcode = ARM::LDRB_PRE_IMM;
1479 } else if (!isPre &&
1480 SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1481 Match = true;
1482 Opcode = ARM::LDRB_POST_IMM;
1483 } else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1484 Match = true;
1485 Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG;
1490 if (Match) {
1491 if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) {
1492 SDValue Chain = LD->getChain();
1493 SDValue Base = LD->getBasePtr();
1494 SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, SDLoc(N)),
1495 CurDAG->getRegister(0, MVT::i32), Chain };
1496 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1497 MVT::Other, Ops);
1498 transferMemOperands(N, New);
1499 ReplaceNode(N, New);
1500 return true;
1501 } else {
1502 SDValue Chain = LD->getChain();
1503 SDValue Base = LD->getBasePtr();
1504 SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, SDLoc(N)),
1505 CurDAG->getRegister(0, MVT::i32), Chain };
1506 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1507 MVT::Other, Ops);
1508 transferMemOperands(N, New);
1509 ReplaceNode(N, New);
1510 return true;
1514 return false;
1517 bool ARMDAGToDAGISel::tryT1IndexedLoad(SDNode *N) {
1518 LoadSDNode *LD = cast<LoadSDNode>(N);
1519 EVT LoadedVT = LD->getMemoryVT();
1520 ISD::MemIndexedMode AM = LD->getAddressingMode();
1521 if (AM != ISD::POST_INC || LD->getExtensionType() != ISD::NON_EXTLOAD ||
1522 LoadedVT.getSimpleVT().SimpleTy != MVT::i32)
1523 return false;
1525 auto *COffs = dyn_cast<ConstantSDNode>(LD->getOffset());
1526 if (!COffs || COffs->getZExtValue() != 4)
1527 return false;
1529 // A T1 post-indexed load is just a single register LDM: LDM r0!, {r1}.
1530 // The encoding of LDM is not how the rest of ISel expects a post-inc load to
1531 // look however, so we use a pseudo here and switch it for a tLDMIA_UPD after
1532 // ISel.
1533 SDValue Chain = LD->getChain();
1534 SDValue Base = LD->getBasePtr();
1535 SDValue Ops[]= { Base, getAL(CurDAG, SDLoc(N)),
1536 CurDAG->getRegister(0, MVT::i32), Chain };
1537 SDNode *New = CurDAG->getMachineNode(ARM::tLDR_postidx, SDLoc(N), MVT::i32,
1538 MVT::i32, MVT::Other, Ops);
1539 transferMemOperands(N, New);
1540 ReplaceNode(N, New);
1541 return true;
1544 bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) {
1545 LoadSDNode *LD = cast<LoadSDNode>(N);
1546 ISD::MemIndexedMode AM = LD->getAddressingMode();
1547 if (AM == ISD::UNINDEXED)
1548 return false;
1550 EVT LoadedVT = LD->getMemoryVT();
1551 bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1552 SDValue Offset;
1553 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1554 unsigned Opcode = 0;
1555 bool Match = false;
1556 if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) {
1557 switch (LoadedVT.getSimpleVT().SimpleTy) {
1558 case MVT::i32:
1559 Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST;
1560 break;
1561 case MVT::i16:
1562 if (isSExtLd)
1563 Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST;
1564 else
1565 Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST;
1566 break;
1567 case MVT::i8:
1568 case MVT::i1:
1569 if (isSExtLd)
1570 Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST;
1571 else
1572 Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST;
1573 break;
1574 default:
1575 return false;
1577 Match = true;
1580 if (Match) {
1581 SDValue Chain = LD->getChain();
1582 SDValue Base = LD->getBasePtr();
1583 SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)),
1584 CurDAG->getRegister(0, MVT::i32), Chain };
1585 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1586 MVT::Other, Ops);
1587 transferMemOperands(N, New);
1588 ReplaceNode(N, New);
1589 return true;
1592 return false;
1595 bool ARMDAGToDAGISel::tryMVEIndexedLoad(SDNode *N) {
1596 LoadSDNode *LD = cast<LoadSDNode>(N);
1597 ISD::MemIndexedMode AM = LD->getAddressingMode();
1598 if (AM == ISD::UNINDEXED)
1599 return false;
1600 EVT LoadedVT = LD->getMemoryVT();
1601 if (!LoadedVT.isVector())
1602 return false;
1603 bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1604 SDValue Offset;
1605 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1606 unsigned Opcode = 0;
1607 unsigned Align = LD->getAlignment();
1608 bool IsLE = Subtarget->isLittle();
1610 if (Align >= 2 && LoadedVT == MVT::v4i16 &&
1611 SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 1)) {
1612 if (isSExtLd)
1613 Opcode = isPre ? ARM::MVE_VLDRHS32_pre : ARM::MVE_VLDRHS32_post;
1614 else
1615 Opcode = isPre ? ARM::MVE_VLDRHU32_pre : ARM::MVE_VLDRHU32_post;
1616 } else if (LoadedVT == MVT::v8i8 &&
1617 SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 0)) {
1618 if (isSExtLd)
1619 Opcode = isPre ? ARM::MVE_VLDRBS16_pre : ARM::MVE_VLDRBS16_post;
1620 else
1621 Opcode = isPre ? ARM::MVE_VLDRBU16_pre : ARM::MVE_VLDRBU16_post;
1622 } else if (LoadedVT == MVT::v4i8 &&
1623 SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 0)) {
1624 if (isSExtLd)
1625 Opcode = isPre ? ARM::MVE_VLDRBS32_pre : ARM::MVE_VLDRBS32_post;
1626 else
1627 Opcode = isPre ? ARM::MVE_VLDRBU32_pre : ARM::MVE_VLDRBU32_post;
1628 } else if (Align >= 4 &&
1629 (IsLE || LoadedVT == MVT::v4i32 || LoadedVT == MVT::v4f32) &&
1630 SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 2))
1631 Opcode = isPre ? ARM::MVE_VLDRWU32_pre : ARM::MVE_VLDRWU32_post;
1632 else if (Align >= 2 &&
1633 (IsLE || LoadedVT == MVT::v8i16 || LoadedVT == MVT::v8f16) &&
1634 SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 1))
1635 Opcode = isPre ? ARM::MVE_VLDRHU16_pre : ARM::MVE_VLDRHU16_post;
1636 else if ((IsLE || LoadedVT == MVT::v16i8) &&
1637 SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 0))
1638 Opcode = isPre ? ARM::MVE_VLDRBU8_pre : ARM::MVE_VLDRBU8_post;
1639 else
1640 return false;
1642 SDValue Chain = LD->getChain();
1643 SDValue Base = LD->getBasePtr();
1644 SDValue Ops[] = {Base, Offset,
1645 CurDAG->getTargetConstant(ARMVCC::None, SDLoc(N), MVT::i32),
1646 CurDAG->getRegister(0, MVT::i32), Chain};
1647 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), LD->getValueType(0),
1648 MVT::i32, MVT::Other, Ops);
1649 transferMemOperands(N, New);
1650 ReplaceUses(SDValue(N, 0), SDValue(New, 1));
1651 ReplaceUses(SDValue(N, 1), SDValue(New, 0));
1652 ReplaceUses(SDValue(N, 2), SDValue(New, 2));
1653 CurDAG->RemoveDeadNode(N);
1654 return true;
1657 /// Form a GPRPair pseudo register from a pair of GPR regs.
1658 SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) {
1659 SDLoc dl(V0.getNode());
1660 SDValue RegClass =
1661 CurDAG->getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
1662 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
1663 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
1664 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1665 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1668 /// Form a D register from a pair of S registers.
1669 SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1670 SDLoc dl(V0.getNode());
1671 SDValue RegClass =
1672 CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, dl, MVT::i32);
1673 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1674 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1675 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1676 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1679 /// Form a quad register from a pair of D registers.
1680 SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1681 SDLoc dl(V0.getNode());
1682 SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, dl,
1683 MVT::i32);
1684 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1685 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1686 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1687 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1690 /// Form 4 consecutive D registers from a pair of Q registers.
1691 SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1692 SDLoc dl(V0.getNode());
1693 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1694 MVT::i32);
1695 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1696 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1697 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1698 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1701 /// Form 4 consecutive S registers.
1702 SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1,
1703 SDValue V2, SDValue V3) {
1704 SDLoc dl(V0.getNode());
1705 SDValue RegClass =
1706 CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, dl, MVT::i32);
1707 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1708 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1709 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, dl, MVT::i32);
1710 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, dl, MVT::i32);
1711 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1712 V2, SubReg2, V3, SubReg3 };
1713 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1716 /// Form 4 consecutive D registers.
1717 SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1,
1718 SDValue V2, SDValue V3) {
1719 SDLoc dl(V0.getNode());
1720 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1721 MVT::i32);
1722 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1723 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1724 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, dl, MVT::i32);
1725 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, dl, MVT::i32);
1726 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1727 V2, SubReg2, V3, SubReg3 };
1728 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1731 /// Form 4 consecutive Q registers.
1732 SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1,
1733 SDValue V2, SDValue V3) {
1734 SDLoc dl(V0.getNode());
1735 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, dl,
1736 MVT::i32);
1737 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1738 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1739 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, dl, MVT::i32);
1740 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, dl, MVT::i32);
1741 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1742 V2, SubReg2, V3, SubReg3 };
1743 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1746 /// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
1747 /// of a NEON VLD or VST instruction. The supported values depend on the
1748 /// number of registers being loaded.
1749 SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, const SDLoc &dl,
1750 unsigned NumVecs, bool is64BitVector) {
1751 unsigned NumRegs = NumVecs;
1752 if (!is64BitVector && NumVecs < 3)
1753 NumRegs *= 2;
1755 unsigned Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
1756 if (Alignment >= 32 && NumRegs == 4)
1757 Alignment = 32;
1758 else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4))
1759 Alignment = 16;
1760 else if (Alignment >= 8)
1761 Alignment = 8;
1762 else
1763 Alignment = 0;
1765 return CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
1768 static bool isVLDfixed(unsigned Opc)
1770 switch (Opc) {
1771 default: return false;
1772 case ARM::VLD1d8wb_fixed : return true;
1773 case ARM::VLD1d16wb_fixed : return true;
1774 case ARM::VLD1d64Qwb_fixed : return true;
1775 case ARM::VLD1d32wb_fixed : return true;
1776 case ARM::VLD1d64wb_fixed : return true;
1777 case ARM::VLD1d64TPseudoWB_fixed : return true;
1778 case ARM::VLD1d64QPseudoWB_fixed : return true;
1779 case ARM::VLD1q8wb_fixed : return true;
1780 case ARM::VLD1q16wb_fixed : return true;
1781 case ARM::VLD1q32wb_fixed : return true;
1782 case ARM::VLD1q64wb_fixed : return true;
1783 case ARM::VLD1DUPd8wb_fixed : return true;
1784 case ARM::VLD1DUPd16wb_fixed : return true;
1785 case ARM::VLD1DUPd32wb_fixed : return true;
1786 case ARM::VLD1DUPq8wb_fixed : return true;
1787 case ARM::VLD1DUPq16wb_fixed : return true;
1788 case ARM::VLD1DUPq32wb_fixed : return true;
1789 case ARM::VLD2d8wb_fixed : return true;
1790 case ARM::VLD2d16wb_fixed : return true;
1791 case ARM::VLD2d32wb_fixed : return true;
1792 case ARM::VLD2q8PseudoWB_fixed : return true;
1793 case ARM::VLD2q16PseudoWB_fixed : return true;
1794 case ARM::VLD2q32PseudoWB_fixed : return true;
1795 case ARM::VLD2DUPd8wb_fixed : return true;
1796 case ARM::VLD2DUPd16wb_fixed : return true;
1797 case ARM::VLD2DUPd32wb_fixed : return true;
1801 static bool isVSTfixed(unsigned Opc)
1803 switch (Opc) {
1804 default: return false;
1805 case ARM::VST1d8wb_fixed : return true;
1806 case ARM::VST1d16wb_fixed : return true;
1807 case ARM::VST1d32wb_fixed : return true;
1808 case ARM::VST1d64wb_fixed : return true;
1809 case ARM::VST1q8wb_fixed : return true;
1810 case ARM::VST1q16wb_fixed : return true;
1811 case ARM::VST1q32wb_fixed : return true;
1812 case ARM::VST1q64wb_fixed : return true;
1813 case ARM::VST1d64TPseudoWB_fixed : return true;
1814 case ARM::VST1d64QPseudoWB_fixed : return true;
1815 case ARM::VST2d8wb_fixed : return true;
1816 case ARM::VST2d16wb_fixed : return true;
1817 case ARM::VST2d32wb_fixed : return true;
1818 case ARM::VST2q8PseudoWB_fixed : return true;
1819 case ARM::VST2q16PseudoWB_fixed : return true;
1820 case ARM::VST2q32PseudoWB_fixed : return true;
1824 // Get the register stride update opcode of a VLD/VST instruction that
1825 // is otherwise equivalent to the given fixed stride updating instruction.
1826 static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
1827 assert((isVLDfixed(Opc) || isVSTfixed(Opc))
1828 && "Incorrect fixed stride updating instruction.");
1829 switch (Opc) {
1830 default: break;
1831 case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register;
1832 case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register;
1833 case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register;
1834 case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register;
1835 case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register;
1836 case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register;
1837 case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register;
1838 case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register;
1839 case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register;
1840 case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register;
1841 case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register;
1842 case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register;
1843 case ARM::VLD1DUPd8wb_fixed : return ARM::VLD1DUPd8wb_register;
1844 case ARM::VLD1DUPd16wb_fixed : return ARM::VLD1DUPd16wb_register;
1845 case ARM::VLD1DUPd32wb_fixed : return ARM::VLD1DUPd32wb_register;
1846 case ARM::VLD1DUPq8wb_fixed : return ARM::VLD1DUPq8wb_register;
1847 case ARM::VLD1DUPq16wb_fixed : return ARM::VLD1DUPq16wb_register;
1848 case ARM::VLD1DUPq32wb_fixed : return ARM::VLD1DUPq32wb_register;
1850 case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register;
1851 case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register;
1852 case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register;
1853 case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register;
1854 case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register;
1855 case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register;
1856 case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register;
1857 case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register;
1858 case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register;
1859 case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register;
1861 case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register;
1862 case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register;
1863 case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register;
1864 case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register;
1865 case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register;
1866 case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register;
1868 case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register;
1869 case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register;
1870 case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register;
1871 case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register;
1872 case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register;
1873 case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register;
1875 case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register;
1876 case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register;
1877 case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register;
1879 return Opc; // If not one we handle, return it unchanged.
1882 /// Returns true if the given increment is a Constant known to be equal to the
1883 /// access size performed by a NEON load/store. This means the "[rN]!" form can
1884 /// be used.
1885 static bool isPerfectIncrement(SDValue Inc, EVT VecTy, unsigned NumVecs) {
1886 auto C = dyn_cast<ConstantSDNode>(Inc);
1887 return C && C->getZExtValue() == VecTy.getSizeInBits() / 8 * NumVecs;
1890 void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
1891 const uint16_t *DOpcodes,
1892 const uint16_t *QOpcodes0,
1893 const uint16_t *QOpcodes1) {
1894 assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
1895 SDLoc dl(N);
1897 SDValue MemAddr, Align;
1898 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
1899 // nodes are not intrinsics.
1900 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
1901 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
1902 return;
1904 SDValue Chain = N->getOperand(0);
1905 EVT VT = N->getValueType(0);
1906 bool is64BitVector = VT.is64BitVector();
1907 Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
1909 unsigned OpcodeIndex;
1910 switch (VT.getSimpleVT().SimpleTy) {
1911 default: llvm_unreachable("unhandled vld type");
1912 // Double-register operations:
1913 case MVT::v8i8: OpcodeIndex = 0; break;
1914 case MVT::v4f16:
1915 case MVT::v4i16: OpcodeIndex = 1; break;
1916 case MVT::v2f32:
1917 case MVT::v2i32: OpcodeIndex = 2; break;
1918 case MVT::v1i64: OpcodeIndex = 3; break;
1919 // Quad-register operations:
1920 case MVT::v16i8: OpcodeIndex = 0; break;
1921 case MVT::v8f16:
1922 case MVT::v8i16: OpcodeIndex = 1; break;
1923 case MVT::v4f32:
1924 case MVT::v4i32: OpcodeIndex = 2; break;
1925 case MVT::v2f64:
1926 case MVT::v2i64: OpcodeIndex = 3; break;
1929 EVT ResTy;
1930 if (NumVecs == 1)
1931 ResTy = VT;
1932 else {
1933 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
1934 if (!is64BitVector)
1935 ResTyElts *= 2;
1936 ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
1938 std::vector<EVT> ResTys;
1939 ResTys.push_back(ResTy);
1940 if (isUpdating)
1941 ResTys.push_back(MVT::i32);
1942 ResTys.push_back(MVT::Other);
1944 SDValue Pred = getAL(CurDAG, dl);
1945 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
1946 SDNode *VLd;
1947 SmallVector<SDValue, 7> Ops;
1949 // Double registers and VLD1/VLD2 quad registers are directly supported.
1950 if (is64BitVector || NumVecs <= 2) {
1951 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
1952 QOpcodes0[OpcodeIndex]);
1953 Ops.push_back(MemAddr);
1954 Ops.push_back(Align);
1955 if (isUpdating) {
1956 SDValue Inc = N->getOperand(AddrOpIdx + 1);
1957 bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
1958 if (!IsImmUpdate) {
1959 // We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
1960 // check for the opcode rather than the number of vector elements.
1961 if (isVLDfixed(Opc))
1962 Opc = getVLDSTRegisterUpdateOpcode(Opc);
1963 Ops.push_back(Inc);
1964 // VLD1/VLD2 fixed increment does not need Reg0 so only include it in
1965 // the operands if not such an opcode.
1966 } else if (!isVLDfixed(Opc))
1967 Ops.push_back(Reg0);
1969 Ops.push_back(Pred);
1970 Ops.push_back(Reg0);
1971 Ops.push_back(Chain);
1972 VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1974 } else {
1975 // Otherwise, quad registers are loaded with two separate instructions,
1976 // where one loads the even registers and the other loads the odd registers.
1977 EVT AddrTy = MemAddr.getValueType();
1979 // Load the even subregs. This is always an updating load, so that it
1980 // provides the address to the second load for the odd subregs.
1981 SDValue ImplDef =
1982 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
1983 const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
1984 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
1985 ResTy, AddrTy, MVT::Other, OpsA);
1986 Chain = SDValue(VLdA, 2);
1988 // Load the odd subregs.
1989 Ops.push_back(SDValue(VLdA, 1));
1990 Ops.push_back(Align);
1991 if (isUpdating) {
1992 SDValue Inc = N->getOperand(AddrOpIdx + 1);
1993 assert(isa<ConstantSDNode>(Inc.getNode()) &&
1994 "only constant post-increment update allowed for VLD3/4");
1995 (void)Inc;
1996 Ops.push_back(Reg0);
1998 Ops.push_back(SDValue(VLdA, 0));
1999 Ops.push_back(Pred);
2000 Ops.push_back(Reg0);
2001 Ops.push_back(Chain);
2002 VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops);
2005 // Transfer memoperands.
2006 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2007 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLd), {MemOp});
2009 if (NumVecs == 1) {
2010 ReplaceNode(N, VLd);
2011 return;
2014 // Extract out the subregisters.
2015 SDValue SuperReg = SDValue(VLd, 0);
2016 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
2017 ARM::qsub_3 == ARM::qsub_0 + 3,
2018 "Unexpected subreg numbering");
2019 unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0);
2020 for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2021 ReplaceUses(SDValue(N, Vec),
2022 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
2023 ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
2024 if (isUpdating)
2025 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2));
2026 CurDAG->RemoveDeadNode(N);
2029 void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
2030 const uint16_t *DOpcodes,
2031 const uint16_t *QOpcodes0,
2032 const uint16_t *QOpcodes1) {
2033 assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
2034 SDLoc dl(N);
2036 SDValue MemAddr, Align;
2037 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
2038 // nodes are not intrinsics.
2039 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2040 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2041 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2042 return;
2044 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2046 SDValue Chain = N->getOperand(0);
2047 EVT VT = N->getOperand(Vec0Idx).getValueType();
2048 bool is64BitVector = VT.is64BitVector();
2049 Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
2051 unsigned OpcodeIndex;
2052 switch (VT.getSimpleVT().SimpleTy) {
2053 default: llvm_unreachable("unhandled vst type");
2054 // Double-register operations:
2055 case MVT::v8i8: OpcodeIndex = 0; break;
2056 case MVT::v4f16:
2057 case MVT::v4i16: OpcodeIndex = 1; break;
2058 case MVT::v2f32:
2059 case MVT::v2i32: OpcodeIndex = 2; break;
2060 case MVT::v1i64: OpcodeIndex = 3; break;
2061 // Quad-register operations:
2062 case MVT::v16i8: OpcodeIndex = 0; break;
2063 case MVT::v8f16:
2064 case MVT::v8i16: OpcodeIndex = 1; break;
2065 case MVT::v4f32:
2066 case MVT::v4i32: OpcodeIndex = 2; break;
2067 case MVT::v2f64:
2068 case MVT::v2i64: OpcodeIndex = 3; break;
2071 std::vector<EVT> ResTys;
2072 if (isUpdating)
2073 ResTys.push_back(MVT::i32);
2074 ResTys.push_back(MVT::Other);
2076 SDValue Pred = getAL(CurDAG, dl);
2077 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2078 SmallVector<SDValue, 7> Ops;
2080 // Double registers and VST1/VST2 quad registers are directly supported.
2081 if (is64BitVector || NumVecs <= 2) {
2082 SDValue SrcReg;
2083 if (NumVecs == 1) {
2084 SrcReg = N->getOperand(Vec0Idx);
2085 } else if (is64BitVector) {
2086 // Form a REG_SEQUENCE to force register allocation.
2087 SDValue V0 = N->getOperand(Vec0Idx + 0);
2088 SDValue V1 = N->getOperand(Vec0Idx + 1);
2089 if (NumVecs == 2)
2090 SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2091 else {
2092 SDValue V2 = N->getOperand(Vec0Idx + 2);
2093 // If it's a vst3, form a quad D-register and leave the last part as
2094 // an undef.
2095 SDValue V3 = (NumVecs == 3)
2096 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
2097 : N->getOperand(Vec0Idx + 3);
2098 SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2100 } else {
2101 // Form a QQ register.
2102 SDValue Q0 = N->getOperand(Vec0Idx);
2103 SDValue Q1 = N->getOperand(Vec0Idx + 1);
2104 SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0);
2107 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2108 QOpcodes0[OpcodeIndex]);
2109 Ops.push_back(MemAddr);
2110 Ops.push_back(Align);
2111 if (isUpdating) {
2112 SDValue Inc = N->getOperand(AddrOpIdx + 1);
2113 bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
2114 if (!IsImmUpdate) {
2115 // We use a VST1 for v1i64 even if the pseudo says VST2/3/4, so
2116 // check for the opcode rather than the number of vector elements.
2117 if (isVSTfixed(Opc))
2118 Opc = getVLDSTRegisterUpdateOpcode(Opc);
2119 Ops.push_back(Inc);
2121 // VST1/VST2 fixed increment does not need Reg0 so only include it in
2122 // the operands if not such an opcode.
2123 else if (!isVSTfixed(Opc))
2124 Ops.push_back(Reg0);
2126 Ops.push_back(SrcReg);
2127 Ops.push_back(Pred);
2128 Ops.push_back(Reg0);
2129 Ops.push_back(Chain);
2130 SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2132 // Transfer memoperands.
2133 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VSt), {MemOp});
2135 ReplaceNode(N, VSt);
2136 return;
2139 // Otherwise, quad registers are stored with two separate instructions,
2140 // where one stores the even registers and the other stores the odd registers.
2142 // Form the QQQQ REG_SEQUENCE.
2143 SDValue V0 = N->getOperand(Vec0Idx + 0);
2144 SDValue V1 = N->getOperand(Vec0Idx + 1);
2145 SDValue V2 = N->getOperand(Vec0Idx + 2);
2146 SDValue V3 = (NumVecs == 3)
2147 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2148 : N->getOperand(Vec0Idx + 3);
2149 SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2151 // Store the even D registers. This is always an updating store, so that it
2152 // provides the address to the second store for the odd subregs.
2153 const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain };
2154 SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
2155 MemAddr.getValueType(),
2156 MVT::Other, OpsA);
2157 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStA), {MemOp});
2158 Chain = SDValue(VStA, 1);
2160 // Store the odd D registers.
2161 Ops.push_back(SDValue(VStA, 0));
2162 Ops.push_back(Align);
2163 if (isUpdating) {
2164 SDValue Inc = N->getOperand(AddrOpIdx + 1);
2165 assert(isa<ConstantSDNode>(Inc.getNode()) &&
2166 "only constant post-increment update allowed for VST3/4");
2167 (void)Inc;
2168 Ops.push_back(Reg0);
2170 Ops.push_back(RegSeq);
2171 Ops.push_back(Pred);
2172 Ops.push_back(Reg0);
2173 Ops.push_back(Chain);
2174 SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
2175 Ops);
2176 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStB), {MemOp});
2177 ReplaceNode(N, VStB);
2180 void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
2181 unsigned NumVecs,
2182 const uint16_t *DOpcodes,
2183 const uint16_t *QOpcodes) {
2184 assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
2185 SDLoc dl(N);
2187 SDValue MemAddr, Align;
2188 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
2189 // nodes are not intrinsics.
2190 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2191 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2192 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2193 return;
2195 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2197 SDValue Chain = N->getOperand(0);
2198 unsigned Lane =
2199 cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue();
2200 EVT VT = N->getOperand(Vec0Idx).getValueType();
2201 bool is64BitVector = VT.is64BitVector();
2203 unsigned Alignment = 0;
2204 if (NumVecs != 3) {
2205 Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
2206 unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2207 if (Alignment > NumBytes)
2208 Alignment = NumBytes;
2209 if (Alignment < 8 && Alignment < NumBytes)
2210 Alignment = 0;
2211 // Alignment must be a power of two; make sure of that.
2212 Alignment = (Alignment & -Alignment);
2213 if (Alignment == 1)
2214 Alignment = 0;
2216 Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2218 unsigned OpcodeIndex;
2219 switch (VT.getSimpleVT().SimpleTy) {
2220 default: llvm_unreachable("unhandled vld/vst lane type");
2221 // Double-register operations:
2222 case MVT::v8i8: OpcodeIndex = 0; break;
2223 case MVT::v4f16:
2224 case MVT::v4i16: OpcodeIndex = 1; break;
2225 case MVT::v2f32:
2226 case MVT::v2i32: OpcodeIndex = 2; break;
2227 // Quad-register operations:
2228 case MVT::v8f16:
2229 case MVT::v8i16: OpcodeIndex = 0; break;
2230 case MVT::v4f32:
2231 case MVT::v4i32: OpcodeIndex = 1; break;
2234 std::vector<EVT> ResTys;
2235 if (IsLoad) {
2236 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2237 if (!is64BitVector)
2238 ResTyElts *= 2;
2239 ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(),
2240 MVT::i64, ResTyElts));
2242 if (isUpdating)
2243 ResTys.push_back(MVT::i32);
2244 ResTys.push_back(MVT::Other);
2246 SDValue Pred = getAL(CurDAG, dl);
2247 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2249 SmallVector<SDValue, 8> Ops;
2250 Ops.push_back(MemAddr);
2251 Ops.push_back(Align);
2252 if (isUpdating) {
2253 SDValue Inc = N->getOperand(AddrOpIdx + 1);
2254 bool IsImmUpdate =
2255 isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
2256 Ops.push_back(IsImmUpdate ? Reg0 : Inc);
2259 SDValue SuperReg;
2260 SDValue V0 = N->getOperand(Vec0Idx + 0);
2261 SDValue V1 = N->getOperand(Vec0Idx + 1);
2262 if (NumVecs == 2) {
2263 if (is64BitVector)
2264 SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2265 else
2266 SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0);
2267 } else {
2268 SDValue V2 = N->getOperand(Vec0Idx + 2);
2269 SDValue V3 = (NumVecs == 3)
2270 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2271 : N->getOperand(Vec0Idx + 3);
2272 if (is64BitVector)
2273 SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2274 else
2275 SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2277 Ops.push_back(SuperReg);
2278 Ops.push_back(getI32Imm(Lane, dl));
2279 Ops.push_back(Pred);
2280 Ops.push_back(Reg0);
2281 Ops.push_back(Chain);
2283 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2284 QOpcodes[OpcodeIndex]);
2285 SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2286 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdLn), {MemOp});
2287 if (!IsLoad) {
2288 ReplaceNode(N, VLdLn);
2289 return;
2292 // Extract the subregisters.
2293 SuperReg = SDValue(VLdLn, 0);
2294 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
2295 ARM::qsub_3 == ARM::qsub_0 + 3,
2296 "Unexpected subreg numbering");
2297 unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
2298 for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2299 ReplaceUses(SDValue(N, Vec),
2300 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
2301 ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1));
2302 if (isUpdating)
2303 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2));
2304 CurDAG->RemoveDeadNode(N);
2307 void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool IsIntrinsic,
2308 bool isUpdating, unsigned NumVecs,
2309 const uint16_t *DOpcodes,
2310 const uint16_t *QOpcodes0,
2311 const uint16_t *QOpcodes1) {
2312 assert(NumVecs >= 1 && NumVecs <= 4 && "VLDDup NumVecs out-of-range");
2313 SDLoc dl(N);
2315 SDValue MemAddr, Align;
2316 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2317 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2318 return;
2320 SDValue Chain = N->getOperand(0);
2321 EVT VT = N->getValueType(0);
2322 bool is64BitVector = VT.is64BitVector();
2324 unsigned Alignment = 0;
2325 if (NumVecs != 3) {
2326 Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
2327 unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2328 if (Alignment > NumBytes)
2329 Alignment = NumBytes;
2330 if (Alignment < 8 && Alignment < NumBytes)
2331 Alignment = 0;
2332 // Alignment must be a power of two; make sure of that.
2333 Alignment = (Alignment & -Alignment);
2334 if (Alignment == 1)
2335 Alignment = 0;
2337 Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2339 unsigned OpcodeIndex;
2340 switch (VT.getSimpleVT().SimpleTy) {
2341 default: llvm_unreachable("unhandled vld-dup type");
2342 case MVT::v8i8:
2343 case MVT::v16i8: OpcodeIndex = 0; break;
2344 case MVT::v4i16:
2345 case MVT::v8i16:
2346 case MVT::v4f16:
2347 case MVT::v8f16:
2348 OpcodeIndex = 1; break;
2349 case MVT::v2f32:
2350 case MVT::v2i32:
2351 case MVT::v4f32:
2352 case MVT::v4i32: OpcodeIndex = 2; break;
2353 case MVT::v1f64:
2354 case MVT::v1i64: OpcodeIndex = 3; break;
2357 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2358 if (!is64BitVector)
2359 ResTyElts *= 2;
2360 EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
2362 std::vector<EVT> ResTys;
2363 ResTys.push_back(ResTy);
2364 if (isUpdating)
2365 ResTys.push_back(MVT::i32);
2366 ResTys.push_back(MVT::Other);
2368 SDValue Pred = getAL(CurDAG, dl);
2369 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2371 SDNode *VLdDup;
2372 if (is64BitVector || NumVecs == 1) {
2373 SmallVector<SDValue, 6> Ops;
2374 Ops.push_back(MemAddr);
2375 Ops.push_back(Align);
2376 unsigned Opc = is64BitVector ? DOpcodes[OpcodeIndex] :
2377 QOpcodes0[OpcodeIndex];
2378 if (isUpdating) {
2379 // fixed-stride update instructions don't have an explicit writeback
2380 // operand. It's implicit in the opcode itself.
2381 SDValue Inc = N->getOperand(2);
2382 bool IsImmUpdate =
2383 isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
2384 if (NumVecs <= 2 && !IsImmUpdate)
2385 Opc = getVLDSTRegisterUpdateOpcode(Opc);
2386 if (!IsImmUpdate)
2387 Ops.push_back(Inc);
2388 // FIXME: VLD3 and VLD4 haven't been updated to that form yet.
2389 else if (NumVecs > 2)
2390 Ops.push_back(Reg0);
2392 Ops.push_back(Pred);
2393 Ops.push_back(Reg0);
2394 Ops.push_back(Chain);
2395 VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2396 } else if (NumVecs == 2) {
2397 const SDValue OpsA[] = { MemAddr, Align, Pred, Reg0, Chain };
2398 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex],
2399 dl, ResTys, OpsA);
2401 Chain = SDValue(VLdA, 1);
2402 const SDValue OpsB[] = { MemAddr, Align, Pred, Reg0, Chain };
2403 VLdDup = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, OpsB);
2404 } else {
2405 SDValue ImplDef =
2406 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
2407 const SDValue OpsA[] = { MemAddr, Align, ImplDef, Pred, Reg0, Chain };
2408 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex],
2409 dl, ResTys, OpsA);
2411 SDValue SuperReg = SDValue(VLdA, 0);
2412 Chain = SDValue(VLdA, 1);
2413 const SDValue OpsB[] = { MemAddr, Align, SuperReg, Pred, Reg0, Chain };
2414 VLdDup = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, OpsB);
2417 // Transfer memoperands.
2418 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2419 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdDup), {MemOp});
2421 // Extract the subregisters.
2422 if (NumVecs == 1) {
2423 ReplaceUses(SDValue(N, 0), SDValue(VLdDup, 0));
2424 } else {
2425 SDValue SuperReg = SDValue(VLdDup, 0);
2426 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7, "Unexpected subreg numbering");
2427 unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
2428 for (unsigned Vec = 0; Vec != NumVecs; ++Vec) {
2429 ReplaceUses(SDValue(N, Vec),
2430 CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg));
2433 ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1));
2434 if (isUpdating)
2435 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2));
2436 CurDAG->RemoveDeadNode(N);
2439 bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) {
2440 if (!Subtarget->hasV6T2Ops())
2441 return false;
2443 unsigned Opc = isSigned
2444 ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)
2445 : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX);
2446 SDLoc dl(N);
2448 // For unsigned extracts, check for a shift right and mask
2449 unsigned And_imm = 0;
2450 if (N->getOpcode() == ISD::AND) {
2451 if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) {
2453 // The immediate is a mask of the low bits iff imm & (imm+1) == 0
2454 if (And_imm & (And_imm + 1))
2455 return false;
2457 unsigned Srl_imm = 0;
2458 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL,
2459 Srl_imm)) {
2460 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2462 // Mask off the unnecessary bits of the AND immediate; normally
2463 // DAGCombine will do this, but that might not happen if
2464 // targetShrinkDemandedConstant chooses a different immediate.
2465 And_imm &= -1U >> Srl_imm;
2467 // Note: The width operand is encoded as width-1.
2468 unsigned Width = countTrailingOnes(And_imm) - 1;
2469 unsigned LSB = Srl_imm;
2471 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2473 if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) {
2474 // It's cheaper to use a right shift to extract the top bits.
2475 if (Subtarget->isThumb()) {
2476 Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri;
2477 SDValue Ops[] = { N->getOperand(0).getOperand(0),
2478 CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2479 getAL(CurDAG, dl), Reg0, Reg0 };
2480 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2481 return true;
2484 // ARM models shift instructions as MOVsi with shifter operand.
2485 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(ISD::SRL);
2486 SDValue ShOpc =
2487 CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB), dl,
2488 MVT::i32);
2489 SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc,
2490 getAL(CurDAG, dl), Reg0, Reg0 };
2491 CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops);
2492 return true;
2495 assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
2496 SDValue Ops[] = { N->getOperand(0).getOperand(0),
2497 CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2498 CurDAG->getTargetConstant(Width, dl, MVT::i32),
2499 getAL(CurDAG, dl), Reg0 };
2500 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2501 return true;
2504 return false;
2507 // Otherwise, we're looking for a shift of a shift
2508 unsigned Shl_imm = 0;
2509 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) {
2510 assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!");
2511 unsigned Srl_imm = 0;
2512 if (isInt32Immediate(N->getOperand(1), Srl_imm)) {
2513 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2514 // Note: The width operand is encoded as width-1.
2515 unsigned Width = 32 - Srl_imm - 1;
2516 int LSB = Srl_imm - Shl_imm;
2517 if (LSB < 0)
2518 return false;
2519 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2520 assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
2521 SDValue Ops[] = { N->getOperand(0).getOperand(0),
2522 CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2523 CurDAG->getTargetConstant(Width, dl, MVT::i32),
2524 getAL(CurDAG, dl), Reg0 };
2525 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2526 return true;
2530 // Or we are looking for a shift of an and, with a mask operand
2531 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_imm) &&
2532 isShiftedMask_32(And_imm)) {
2533 unsigned Srl_imm = 0;
2534 unsigned LSB = countTrailingZeros(And_imm);
2535 // Shift must be the same as the ands lsb
2536 if (isInt32Immediate(N->getOperand(1), Srl_imm) && Srl_imm == LSB) {
2537 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2538 unsigned MSB = 31 - countLeadingZeros(And_imm);
2539 // Note: The width operand is encoded as width-1.
2540 unsigned Width = MSB - LSB;
2541 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2542 assert(Srl_imm + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
2543 SDValue Ops[] = { N->getOperand(0).getOperand(0),
2544 CurDAG->getTargetConstant(Srl_imm, dl, MVT::i32),
2545 CurDAG->getTargetConstant(Width, dl, MVT::i32),
2546 getAL(CurDAG, dl), Reg0 };
2547 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2548 return true;
2552 if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) {
2553 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2554 unsigned LSB = 0;
2555 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, LSB) &&
2556 !isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRA, LSB))
2557 return false;
2559 if (LSB + Width > 32)
2560 return false;
2562 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2563 assert(LSB + Width <= 32 && "Shouldn't create an invalid ubfx");
2564 SDValue Ops[] = { N->getOperand(0).getOperand(0),
2565 CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2566 CurDAG->getTargetConstant(Width - 1, dl, MVT::i32),
2567 getAL(CurDAG, dl), Reg0 };
2568 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2569 return true;
2572 return false;
2575 /// Target-specific DAG combining for ISD::XOR.
2576 /// Target-independent combining lowers SELECT_CC nodes of the form
2577 /// select_cc setg[ge] X, 0, X, -X
2578 /// select_cc setgt X, -1, X, -X
2579 /// select_cc setl[te] X, 0, -X, X
2580 /// select_cc setlt X, 1, -X, X
2581 /// which represent Integer ABS into:
2582 /// Y = sra (X, size(X)-1); xor (add (X, Y), Y)
2583 /// ARM instruction selection detects the latter and matches it to
2584 /// ARM::ABS or ARM::t2ABS machine node.
2585 bool ARMDAGToDAGISel::tryABSOp(SDNode *N){
2586 SDValue XORSrc0 = N->getOperand(0);
2587 SDValue XORSrc1 = N->getOperand(1);
2588 EVT VT = N->getValueType(0);
2590 if (Subtarget->isThumb1Only())
2591 return false;
2593 if (XORSrc0.getOpcode() != ISD::ADD || XORSrc1.getOpcode() != ISD::SRA)
2594 return false;
2596 SDValue ADDSrc0 = XORSrc0.getOperand(0);
2597 SDValue ADDSrc1 = XORSrc0.getOperand(1);
2598 SDValue SRASrc0 = XORSrc1.getOperand(0);
2599 SDValue SRASrc1 = XORSrc1.getOperand(1);
2600 ConstantSDNode *SRAConstant = dyn_cast<ConstantSDNode>(SRASrc1);
2601 EVT XType = SRASrc0.getValueType();
2602 unsigned Size = XType.getSizeInBits() - 1;
2604 if (ADDSrc1 == XORSrc1 && ADDSrc0 == SRASrc0 &&
2605 XType.isInteger() && SRAConstant != nullptr &&
2606 Size == SRAConstant->getZExtValue()) {
2607 unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS;
2608 CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0);
2609 return true;
2612 return false;
2615 /// We've got special pseudo-instructions for these
2616 void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
2617 unsigned Opcode;
2618 EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
2619 if (MemTy == MVT::i8)
2620 Opcode = ARM::CMP_SWAP_8;
2621 else if (MemTy == MVT::i16)
2622 Opcode = ARM::CMP_SWAP_16;
2623 else if (MemTy == MVT::i32)
2624 Opcode = ARM::CMP_SWAP_32;
2625 else
2626 llvm_unreachable("Unknown AtomicCmpSwap type");
2628 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
2629 N->getOperand(0)};
2630 SDNode *CmpSwap = CurDAG->getMachineNode(
2631 Opcode, SDLoc(N),
2632 CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other), Ops);
2634 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
2635 CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
2637 ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
2638 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
2639 CurDAG->RemoveDeadNode(N);
2642 static Optional<std::pair<unsigned, unsigned>>
2643 getContiguousRangeOfSetBits(const APInt &A) {
2644 unsigned FirstOne = A.getBitWidth() - A.countLeadingZeros() - 1;
2645 unsigned LastOne = A.countTrailingZeros();
2646 if (A.countPopulation() != (FirstOne - LastOne + 1))
2647 return Optional<std::pair<unsigned,unsigned>>();
2648 return std::make_pair(FirstOne, LastOne);
2651 void ARMDAGToDAGISel::SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI) {
2652 assert(N->getOpcode() == ARMISD::CMPZ);
2653 SwitchEQNEToPLMI = false;
2655 if (!Subtarget->isThumb())
2656 // FIXME: Work out whether it is profitable to do this in A32 mode - LSL and
2657 // LSR don't exist as standalone instructions - they need the barrel shifter.
2658 return;
2660 // select (cmpz (and X, C), #0) -> (LSLS X) or (LSRS X) or (LSRS (LSLS X))
2661 SDValue And = N->getOperand(0);
2662 if (!And->hasOneUse())
2663 return;
2665 SDValue Zero = N->getOperand(1);
2666 if (!isa<ConstantSDNode>(Zero) || !cast<ConstantSDNode>(Zero)->isNullValue() ||
2667 And->getOpcode() != ISD::AND)
2668 return;
2669 SDValue X = And.getOperand(0);
2670 auto C = dyn_cast<ConstantSDNode>(And.getOperand(1));
2672 if (!C)
2673 return;
2674 auto Range = getContiguousRangeOfSetBits(C->getAPIntValue());
2675 if (!Range)
2676 return;
2678 // There are several ways to lower this:
2679 SDNode *NewN;
2680 SDLoc dl(N);
2682 auto EmitShift = [&](unsigned Opc, SDValue Src, unsigned Imm) -> SDNode* {
2683 if (Subtarget->isThumb2()) {
2684 Opc = (Opc == ARM::tLSLri) ? ARM::t2LSLri : ARM::t2LSRri;
2685 SDValue Ops[] = { Src, CurDAG->getTargetConstant(Imm, dl, MVT::i32),
2686 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
2687 CurDAG->getRegister(0, MVT::i32) };
2688 return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
2689 } else {
2690 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), Src,
2691 CurDAG->getTargetConstant(Imm, dl, MVT::i32),
2692 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
2693 return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
2697 if (Range->second == 0) {
2698 // 1. Mask includes the LSB -> Simply shift the top N bits off
2699 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
2700 ReplaceNode(And.getNode(), NewN);
2701 } else if (Range->first == 31) {
2702 // 2. Mask includes the MSB -> Simply shift the bottom N bits off
2703 NewN = EmitShift(ARM::tLSRri, X, Range->second);
2704 ReplaceNode(And.getNode(), NewN);
2705 } else if (Range->first == Range->second) {
2706 // 3. Only one bit is set. We can shift this into the sign bit and use a
2707 // PL/MI comparison.
2708 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
2709 ReplaceNode(And.getNode(), NewN);
2711 SwitchEQNEToPLMI = true;
2712 } else if (!Subtarget->hasV6T2Ops()) {
2713 // 4. Do a double shift to clear bottom and top bits, but only in
2714 // thumb-1 mode as in thumb-2 we can use UBFX.
2715 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
2716 NewN = EmitShift(ARM::tLSRri, SDValue(NewN, 0),
2717 Range->second + (31 - Range->first));
2718 ReplaceNode(And.getNode(), NewN);
2723 void ARMDAGToDAGISel::Select(SDNode *N) {
2724 SDLoc dl(N);
2726 if (N->isMachineOpcode()) {
2727 N->setNodeId(-1);
2728 return; // Already selected.
2731 switch (N->getOpcode()) {
2732 default: break;
2733 case ISD::STORE: {
2734 // For Thumb1, match an sp-relative store in C++. This is a little
2735 // unfortunate, but I don't think I can make the chain check work
2736 // otherwise. (The chain of the store has to be the same as the chain
2737 // of the CopyFromReg, or else we can't replace the CopyFromReg with
2738 // a direct reference to "SP".)
2740 // This is only necessary on Thumb1 because Thumb1 sp-relative stores use
2741 // a different addressing mode from other four-byte stores.
2743 // This pattern usually comes up with call arguments.
2744 StoreSDNode *ST = cast<StoreSDNode>(N);
2745 SDValue Ptr = ST->getBasePtr();
2746 if (Subtarget->isThumb1Only() && ST->isUnindexed()) {
2747 int RHSC = 0;
2748 if (Ptr.getOpcode() == ISD::ADD &&
2749 isScaledConstantInRange(Ptr.getOperand(1), /*Scale=*/4, 0, 256, RHSC))
2750 Ptr = Ptr.getOperand(0);
2752 if (Ptr.getOpcode() == ISD::CopyFromReg &&
2753 cast<RegisterSDNode>(Ptr.getOperand(1))->getReg() == ARM::SP &&
2754 Ptr.getOperand(0) == ST->getChain()) {
2755 SDValue Ops[] = {ST->getValue(),
2756 CurDAG->getRegister(ARM::SP, MVT::i32),
2757 CurDAG->getTargetConstant(RHSC, dl, MVT::i32),
2758 getAL(CurDAG, dl),
2759 CurDAG->getRegister(0, MVT::i32),
2760 ST->getChain()};
2761 MachineSDNode *ResNode =
2762 CurDAG->getMachineNode(ARM::tSTRspi, dl, MVT::Other, Ops);
2763 MachineMemOperand *MemOp = ST->getMemOperand();
2764 CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp});
2765 ReplaceNode(N, ResNode);
2766 return;
2769 break;
2771 case ISD::WRITE_REGISTER:
2772 if (tryWriteRegister(N))
2773 return;
2774 break;
2775 case ISD::READ_REGISTER:
2776 if (tryReadRegister(N))
2777 return;
2778 break;
2779 case ISD::INLINEASM:
2780 case ISD::INLINEASM_BR:
2781 if (tryInlineAsm(N))
2782 return;
2783 break;
2784 case ISD::XOR:
2785 // Select special operations if XOR node forms integer ABS pattern
2786 if (tryABSOp(N))
2787 return;
2788 // Other cases are autogenerated.
2789 break;
2790 case ISD::Constant: {
2791 unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
2792 // If we can't materialize the constant we need to use a literal pool
2793 if (ConstantMaterializationCost(Val, Subtarget) > 2) {
2794 SDValue CPIdx = CurDAG->getTargetConstantPool(
2795 ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val),
2796 TLI->getPointerTy(CurDAG->getDataLayout()));
2798 SDNode *ResNode;
2799 if (Subtarget->isThumb()) {
2800 SDValue Ops[] = {
2801 CPIdx,
2802 getAL(CurDAG, dl),
2803 CurDAG->getRegister(0, MVT::i32),
2804 CurDAG->getEntryNode()
2806 ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other,
2807 Ops);
2808 } else {
2809 SDValue Ops[] = {
2810 CPIdx,
2811 CurDAG->getTargetConstant(0, dl, MVT::i32),
2812 getAL(CurDAG, dl),
2813 CurDAG->getRegister(0, MVT::i32),
2814 CurDAG->getEntryNode()
2816 ResNode = CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other,
2817 Ops);
2819 // Annotate the Node with memory operand information so that MachineInstr
2820 // queries work properly. This e.g. gives the register allocation the
2821 // required information for rematerialization.
2822 MachineFunction& MF = CurDAG->getMachineFunction();
2823 MachineMemOperand *MemOp =
2824 MF.getMachineMemOperand(MachinePointerInfo::getConstantPool(MF),
2825 MachineMemOperand::MOLoad, 4, 4);
2827 CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp});
2829 ReplaceNode(N, ResNode);
2830 return;
2833 // Other cases are autogenerated.
2834 break;
2836 case ISD::FrameIndex: {
2837 // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
2838 int FI = cast<FrameIndexSDNode>(N)->getIndex();
2839 SDValue TFI = CurDAG->getTargetFrameIndex(
2840 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
2841 if (Subtarget->isThumb1Only()) {
2842 // Set the alignment of the frame object to 4, to avoid having to generate
2843 // more than one ADD
2844 MachineFrameInfo &MFI = MF->getFrameInfo();
2845 if (MFI.getObjectAlignment(FI) < 4)
2846 MFI.setObjectAlignment(FI, 4);
2847 CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI,
2848 CurDAG->getTargetConstant(0, dl, MVT::i32));
2849 return;
2850 } else {
2851 unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ?
2852 ARM::t2ADDri : ARM::ADDri);
2853 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, dl, MVT::i32),
2854 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
2855 CurDAG->getRegister(0, MVT::i32) };
2856 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2857 return;
2860 case ISD::SRL:
2861 if (tryV6T2BitfieldExtractOp(N, false))
2862 return;
2863 break;
2864 case ISD::SIGN_EXTEND_INREG:
2865 case ISD::SRA:
2866 if (tryV6T2BitfieldExtractOp(N, true))
2867 return;
2868 break;
2869 case ISD::MUL:
2870 if (Subtarget->isThumb1Only())
2871 break;
2872 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
2873 unsigned RHSV = C->getZExtValue();
2874 if (!RHSV) break;
2875 if (isPowerOf2_32(RHSV-1)) { // 2^n+1?
2876 unsigned ShImm = Log2_32(RHSV-1);
2877 if (ShImm >= 32)
2878 break;
2879 SDValue V = N->getOperand(0);
2880 ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
2881 SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
2882 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2883 if (Subtarget->isThumb()) {
2884 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
2885 CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops);
2886 return;
2887 } else {
2888 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
2889 Reg0 };
2890 CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops);
2891 return;
2894 if (isPowerOf2_32(RHSV+1)) { // 2^n-1?
2895 unsigned ShImm = Log2_32(RHSV+1);
2896 if (ShImm >= 32)
2897 break;
2898 SDValue V = N->getOperand(0);
2899 ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
2900 SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
2901 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2902 if (Subtarget->isThumb()) {
2903 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
2904 CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops);
2905 return;
2906 } else {
2907 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
2908 Reg0 };
2909 CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops);
2910 return;
2914 break;
2915 case ISD::AND: {
2916 // Check for unsigned bitfield extract
2917 if (tryV6T2BitfieldExtractOp(N, false))
2918 return;
2920 // If an immediate is used in an AND node, it is possible that the immediate
2921 // can be more optimally materialized when negated. If this is the case we
2922 // can negate the immediate and use a BIC instead.
2923 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
2924 if (N1C && N1C->hasOneUse() && Subtarget->isThumb()) {
2925 uint32_t Imm = (uint32_t) N1C->getZExtValue();
2927 // In Thumb2 mode, an AND can take a 12-bit immediate. If this
2928 // immediate can be negated and fit in the immediate operand of
2929 // a t2BIC, don't do any manual transform here as this can be
2930 // handled by the generic ISel machinery.
2931 bool PreferImmediateEncoding =
2932 Subtarget->hasThumb2() && (is_t2_so_imm(Imm) || is_t2_so_imm_not(Imm));
2933 if (!PreferImmediateEncoding &&
2934 ConstantMaterializationCost(Imm, Subtarget) >
2935 ConstantMaterializationCost(~Imm, Subtarget)) {
2936 // The current immediate costs more to materialize than a negated
2937 // immediate, so negate the immediate and use a BIC.
2938 SDValue NewImm =
2939 CurDAG->getConstant(~N1C->getZExtValue(), dl, MVT::i32);
2940 // If the new constant didn't exist before, reposition it in the topological
2941 // ordering so it is just before N. Otherwise, don't touch its location.
2942 if (NewImm->getNodeId() == -1)
2943 CurDAG->RepositionNode(N->getIterator(), NewImm.getNode());
2945 if (!Subtarget->hasThumb2()) {
2946 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32),
2947 N->getOperand(0), NewImm, getAL(CurDAG, dl),
2948 CurDAG->getRegister(0, MVT::i32)};
2949 ReplaceNode(N, CurDAG->getMachineNode(ARM::tBIC, dl, MVT::i32, Ops));
2950 return;
2951 } else {
2952 SDValue Ops[] = {N->getOperand(0), NewImm, getAL(CurDAG, dl),
2953 CurDAG->getRegister(0, MVT::i32),
2954 CurDAG->getRegister(0, MVT::i32)};
2955 ReplaceNode(N,
2956 CurDAG->getMachineNode(ARM::t2BICrr, dl, MVT::i32, Ops));
2957 return;
2962 // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits
2963 // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits
2964 // are entirely contributed by c2 and lower 16-bits are entirely contributed
2965 // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)).
2966 // Select it to: "movt x, ((c1 & 0xffff) >> 16)
2967 EVT VT = N->getValueType(0);
2968 if (VT != MVT::i32)
2969 break;
2970 unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2())
2971 ? ARM::t2MOVTi16
2972 : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0);
2973 if (!Opc)
2974 break;
2975 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
2976 N1C = dyn_cast<ConstantSDNode>(N1);
2977 if (!N1C)
2978 break;
2979 if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) {
2980 SDValue N2 = N0.getOperand(1);
2981 ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
2982 if (!N2C)
2983 break;
2984 unsigned N1CVal = N1C->getZExtValue();
2985 unsigned N2CVal = N2C->getZExtValue();
2986 if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) &&
2987 (N1CVal & 0xffffU) == 0xffffU &&
2988 (N2CVal & 0xffffU) == 0x0U) {
2989 SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16,
2990 dl, MVT::i32);
2991 SDValue Ops[] = { N0.getOperand(0), Imm16,
2992 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
2993 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
2994 return;
2998 break;
3000 case ARMISD::UMAAL: {
3001 unsigned Opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL;
3002 SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
3003 N->getOperand(2), N->getOperand(3),
3004 getAL(CurDAG, dl),
3005 CurDAG->getRegister(0, MVT::i32) };
3006 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::i32, Ops));
3007 return;
3009 case ARMISD::UMLAL:{
3010 if (Subtarget->isThumb()) {
3011 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3012 N->getOperand(3), getAL(CurDAG, dl),
3013 CurDAG->getRegister(0, MVT::i32)};
3014 ReplaceNode(
3015 N, CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops));
3016 return;
3017 }else{
3018 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3019 N->getOperand(3), getAL(CurDAG, dl),
3020 CurDAG->getRegister(0, MVT::i32),
3021 CurDAG->getRegister(0, MVT::i32) };
3022 ReplaceNode(N, CurDAG->getMachineNode(
3023 Subtarget->hasV6Ops() ? ARM::UMLAL : ARM::UMLALv5, dl,
3024 MVT::i32, MVT::i32, Ops));
3025 return;
3028 case ARMISD::SMLAL:{
3029 if (Subtarget->isThumb()) {
3030 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3031 N->getOperand(3), getAL(CurDAG, dl),
3032 CurDAG->getRegister(0, MVT::i32)};
3033 ReplaceNode(
3034 N, CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops));
3035 return;
3036 }else{
3037 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3038 N->getOperand(3), getAL(CurDAG, dl),
3039 CurDAG->getRegister(0, MVT::i32),
3040 CurDAG->getRegister(0, MVT::i32) };
3041 ReplaceNode(N, CurDAG->getMachineNode(
3042 Subtarget->hasV6Ops() ? ARM::SMLAL : ARM::SMLALv5, dl,
3043 MVT::i32, MVT::i32, Ops));
3044 return;
3047 case ARMISD::SUBE: {
3048 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
3049 break;
3050 // Look for a pattern to match SMMLS
3051 // (sube a, (smul_loHi a, b), (subc 0, (smul_LOhi(a, b))))
3052 if (N->getOperand(1).getOpcode() != ISD::SMUL_LOHI ||
3053 N->getOperand(2).getOpcode() != ARMISD::SUBC ||
3054 !SDValue(N, 1).use_empty())
3055 break;
3057 if (Subtarget->isThumb())
3058 assert(Subtarget->hasThumb2() &&
3059 "This pattern should not be generated for Thumb");
3061 SDValue SmulLoHi = N->getOperand(1);
3062 SDValue Subc = N->getOperand(2);
3063 auto *Zero = dyn_cast<ConstantSDNode>(Subc.getOperand(0));
3065 if (!Zero || Zero->getZExtValue() != 0 ||
3066 Subc.getOperand(1) != SmulLoHi.getValue(0) ||
3067 N->getOperand(1) != SmulLoHi.getValue(1) ||
3068 N->getOperand(2) != Subc.getValue(1))
3069 break;
3071 unsigned Opc = Subtarget->isThumb2() ? ARM::t2SMMLS : ARM::SMMLS;
3072 SDValue Ops[] = { SmulLoHi.getOperand(0), SmulLoHi.getOperand(1),
3073 N->getOperand(0), getAL(CurDAG, dl),
3074 CurDAG->getRegister(0, MVT::i32) };
3075 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops));
3076 return;
3078 case ISD::LOAD: {
3079 if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N))
3080 return;
3081 if (Subtarget->isThumb() && Subtarget->hasThumb2()) {
3082 if (tryT2IndexedLoad(N))
3083 return;
3084 } else if (Subtarget->isThumb()) {
3085 if (tryT1IndexedLoad(N))
3086 return;
3087 } else if (tryARMIndexedLoad(N))
3088 return;
3089 // Other cases are autogenerated.
3090 break;
3092 case ARMISD::WLS:
3093 case ARMISD::LE: {
3094 SDValue Ops[] = { N->getOperand(1),
3095 N->getOperand(2),
3096 N->getOperand(0) };
3097 unsigned Opc = N->getOpcode() == ARMISD::WLS ?
3098 ARM::t2WhileLoopStart : ARM::t2LoopEnd;
3099 SDNode *New = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
3100 ReplaceUses(N, New);
3101 CurDAG->RemoveDeadNode(N);
3102 return;
3104 case ARMISD::LOOP_DEC: {
3105 SDValue Ops[] = { N->getOperand(1),
3106 N->getOperand(2),
3107 N->getOperand(0) };
3108 SDNode *Dec =
3109 CurDAG->getMachineNode(ARM::t2LoopDec, dl,
3110 CurDAG->getVTList(MVT::i32, MVT::Other), Ops);
3111 ReplaceUses(N, Dec);
3112 CurDAG->RemoveDeadNode(N);
3113 return;
3115 case ARMISD::BRCOND: {
3116 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
3117 // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)
3118 // Pattern complexity = 6 cost = 1 size = 0
3120 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
3121 // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc)
3122 // Pattern complexity = 6 cost = 1 size = 0
3124 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
3125 // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc)
3126 // Pattern complexity = 6 cost = 1 size = 0
3128 unsigned Opc = Subtarget->isThumb() ?
3129 ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc;
3130 SDValue Chain = N->getOperand(0);
3131 SDValue N1 = N->getOperand(1);
3132 SDValue N2 = N->getOperand(2);
3133 SDValue N3 = N->getOperand(3);
3134 SDValue InFlag = N->getOperand(4);
3135 assert(N1.getOpcode() == ISD::BasicBlock);
3136 assert(N2.getOpcode() == ISD::Constant);
3137 assert(N3.getOpcode() == ISD::Register);
3139 unsigned CC = (unsigned) cast<ConstantSDNode>(N2)->getZExtValue();
3141 if (InFlag.getOpcode() == ARMISD::CMPZ) {
3142 if (InFlag.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN) {
3143 SDValue Int = InFlag.getOperand(0);
3144 uint64_t ID = cast<ConstantSDNode>(Int->getOperand(1))->getZExtValue();
3146 // Handle low-overhead loops.
3147 if (ID == Intrinsic::loop_decrement_reg) {
3148 SDValue Elements = Int.getOperand(2);
3149 SDValue Size = CurDAG->getTargetConstant(
3150 cast<ConstantSDNode>(Int.getOperand(3))->getZExtValue(), dl,
3151 MVT::i32);
3153 SDValue Args[] = { Elements, Size, Int.getOperand(0) };
3154 SDNode *LoopDec =
3155 CurDAG->getMachineNode(ARM::t2LoopDec, dl,
3156 CurDAG->getVTList(MVT::i32, MVT::Other),
3157 Args);
3158 ReplaceUses(Int.getNode(), LoopDec);
3160 SDValue EndArgs[] = { SDValue(LoopDec, 0), N1, Chain };
3161 SDNode *LoopEnd =
3162 CurDAG->getMachineNode(ARM::t2LoopEnd, dl, MVT::Other, EndArgs);
3164 ReplaceUses(N, LoopEnd);
3165 CurDAG->RemoveDeadNode(N);
3166 CurDAG->RemoveDeadNode(InFlag.getNode());
3167 CurDAG->RemoveDeadNode(Int.getNode());
3168 return;
3172 bool SwitchEQNEToPLMI;
3173 SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI);
3174 InFlag = N->getOperand(4);
3176 if (SwitchEQNEToPLMI) {
3177 switch ((ARMCC::CondCodes)CC) {
3178 default: llvm_unreachable("CMPZ must be either NE or EQ!");
3179 case ARMCC::NE:
3180 CC = (unsigned)ARMCC::MI;
3181 break;
3182 case ARMCC::EQ:
3183 CC = (unsigned)ARMCC::PL;
3184 break;
3189 SDValue Tmp2 = CurDAG->getTargetConstant(CC, dl, MVT::i32);
3190 SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag };
3191 SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
3192 MVT::Glue, Ops);
3193 Chain = SDValue(ResNode, 0);
3194 if (N->getNumValues() == 2) {
3195 InFlag = SDValue(ResNode, 1);
3196 ReplaceUses(SDValue(N, 1), InFlag);
3198 ReplaceUses(SDValue(N, 0),
3199 SDValue(Chain.getNode(), Chain.getResNo()));
3200 CurDAG->RemoveDeadNode(N);
3201 return;
3204 case ARMISD::CMPZ: {
3205 // select (CMPZ X, #-C) -> (CMPZ (ADDS X, #C), #0)
3206 // This allows us to avoid materializing the expensive negative constant.
3207 // The CMPZ #0 is useless and will be peepholed away but we need to keep it
3208 // for its glue output.
3209 SDValue X = N->getOperand(0);
3210 auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1).getNode());
3211 if (C && C->getSExtValue() < 0 && Subtarget->isThumb()) {
3212 int64_t Addend = -C->getSExtValue();
3214 SDNode *Add = nullptr;
3215 // ADDS can be better than CMN if the immediate fits in a
3216 // 16-bit ADDS, which means either [0,256) for tADDi8 or [0,8) for tADDi3.
3217 // Outside that range we can just use a CMN which is 32-bit but has a
3218 // 12-bit immediate range.
3219 if (Addend < 1<<8) {
3220 if (Subtarget->isThumb2()) {
3221 SDValue Ops[] = { X, CurDAG->getTargetConstant(Addend, dl, MVT::i32),
3222 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
3223 CurDAG->getRegister(0, MVT::i32) };
3224 Add = CurDAG->getMachineNode(ARM::t2ADDri, dl, MVT::i32, Ops);
3225 } else {
3226 unsigned Opc = (Addend < 1<<3) ? ARM::tADDi3 : ARM::tADDi8;
3227 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X,
3228 CurDAG->getTargetConstant(Addend, dl, MVT::i32),
3229 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
3230 Add = CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
3233 if (Add) {
3234 SDValue Ops2[] = {SDValue(Add, 0), CurDAG->getConstant(0, dl, MVT::i32)};
3235 CurDAG->MorphNodeTo(N, ARMISD::CMPZ, CurDAG->getVTList(MVT::Glue), Ops2);
3238 // Other cases are autogenerated.
3239 break;
3242 case ARMISD::CMOV: {
3243 SDValue InFlag = N->getOperand(4);
3245 if (InFlag.getOpcode() == ARMISD::CMPZ) {
3246 bool SwitchEQNEToPLMI;
3247 SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI);
3249 if (SwitchEQNEToPLMI) {
3250 SDValue ARMcc = N->getOperand(2);
3251 ARMCC::CondCodes CC =
3252 (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue();
3254 switch (CC) {
3255 default: llvm_unreachable("CMPZ must be either NE or EQ!");
3256 case ARMCC::NE:
3257 CC = ARMCC::MI;
3258 break;
3259 case ARMCC::EQ:
3260 CC = ARMCC::PL;
3261 break;
3263 SDValue NewARMcc = CurDAG->getConstant((unsigned)CC, dl, MVT::i32);
3264 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), NewARMcc,
3265 N->getOperand(3), N->getOperand(4)};
3266 CurDAG->MorphNodeTo(N, ARMISD::CMOV, N->getVTList(), Ops);
3270 // Other cases are autogenerated.
3271 break;
3274 case ARMISD::VZIP: {
3275 unsigned Opc = 0;
3276 EVT VT = N->getValueType(0);
3277 switch (VT.getSimpleVT().SimpleTy) {
3278 default: return;
3279 case MVT::v8i8: Opc = ARM::VZIPd8; break;
3280 case MVT::v4f16:
3281 case MVT::v4i16: Opc = ARM::VZIPd16; break;
3282 case MVT::v2f32:
3283 // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
3284 case MVT::v2i32: Opc = ARM::VTRNd32; break;
3285 case MVT::v16i8: Opc = ARM::VZIPq8; break;
3286 case MVT::v8f16:
3287 case MVT::v8i16: Opc = ARM::VZIPq16; break;
3288 case MVT::v4f32:
3289 case MVT::v4i32: Opc = ARM::VZIPq32; break;
3291 SDValue Pred = getAL(CurDAG, dl);
3292 SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3293 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3294 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3295 return;
3297 case ARMISD::VUZP: {
3298 unsigned Opc = 0;
3299 EVT VT = N->getValueType(0);
3300 switch (VT.getSimpleVT().SimpleTy) {
3301 default: return;
3302 case MVT::v8i8: Opc = ARM::VUZPd8; break;
3303 case MVT::v4f16:
3304 case MVT::v4i16: Opc = ARM::VUZPd16; break;
3305 case MVT::v2f32:
3306 // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
3307 case MVT::v2i32: Opc = ARM::VTRNd32; break;
3308 case MVT::v16i8: Opc = ARM::VUZPq8; break;
3309 case MVT::v8f16:
3310 case MVT::v8i16: Opc = ARM::VUZPq16; break;
3311 case MVT::v4f32:
3312 case MVT::v4i32: Opc = ARM::VUZPq32; break;
3314 SDValue Pred = getAL(CurDAG, dl);
3315 SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3316 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3317 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3318 return;
3320 case ARMISD::VTRN: {
3321 unsigned Opc = 0;
3322 EVT VT = N->getValueType(0);
3323 switch (VT.getSimpleVT().SimpleTy) {
3324 default: return;
3325 case MVT::v8i8: Opc = ARM::VTRNd8; break;
3326 case MVT::v4f16:
3327 case MVT::v4i16: Opc = ARM::VTRNd16; break;
3328 case MVT::v2f32:
3329 case MVT::v2i32: Opc = ARM::VTRNd32; break;
3330 case MVT::v16i8: Opc = ARM::VTRNq8; break;
3331 case MVT::v8f16:
3332 case MVT::v8i16: Opc = ARM::VTRNq16; break;
3333 case MVT::v4f32:
3334 case MVT::v4i32: Opc = ARM::VTRNq32; break;
3336 SDValue Pred = getAL(CurDAG, dl);
3337 SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3338 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3339 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3340 return;
3342 case ARMISD::BUILD_VECTOR: {
3343 EVT VecVT = N->getValueType(0);
3344 EVT EltVT = VecVT.getVectorElementType();
3345 unsigned NumElts = VecVT.getVectorNumElements();
3346 if (EltVT == MVT::f64) {
3347 assert(NumElts == 2 && "unexpected type for BUILD_VECTOR");
3348 ReplaceNode(
3349 N, createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
3350 return;
3352 assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR");
3353 if (NumElts == 2) {
3354 ReplaceNode(
3355 N, createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
3356 return;
3358 assert(NumElts == 4 && "unexpected type for BUILD_VECTOR");
3359 ReplaceNode(N,
3360 createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1),
3361 N->getOperand(2), N->getOperand(3)));
3362 return;
3365 case ARMISD::VLD1DUP: {
3366 static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8, ARM::VLD1DUPd16,
3367 ARM::VLD1DUPd32 };
3368 static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8, ARM::VLD1DUPq16,
3369 ARM::VLD1DUPq32 };
3370 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 1, DOpcodes, QOpcodes);
3371 return;
3374 case ARMISD::VLD2DUP: {
3375 static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
3376 ARM::VLD2DUPd32 };
3377 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 2, Opcodes);
3378 return;
3381 case ARMISD::VLD3DUP: {
3382 static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo,
3383 ARM::VLD3DUPd16Pseudo,
3384 ARM::VLD3DUPd32Pseudo };
3385 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 3, Opcodes);
3386 return;
3389 case ARMISD::VLD4DUP: {
3390 static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo,
3391 ARM::VLD4DUPd16Pseudo,
3392 ARM::VLD4DUPd32Pseudo };
3393 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 4, Opcodes);
3394 return;
3397 case ARMISD::VLD1DUP_UPD: {
3398 static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8wb_fixed,
3399 ARM::VLD1DUPd16wb_fixed,
3400 ARM::VLD1DUPd32wb_fixed };
3401 static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8wb_fixed,
3402 ARM::VLD1DUPq16wb_fixed,
3403 ARM::VLD1DUPq32wb_fixed };
3404 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 1, DOpcodes, QOpcodes);
3405 return;
3408 case ARMISD::VLD2DUP_UPD: {
3409 static const uint16_t Opcodes[] = { ARM::VLD2DUPd8wb_fixed,
3410 ARM::VLD2DUPd16wb_fixed,
3411 ARM::VLD2DUPd32wb_fixed };
3412 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 2, Opcodes);
3413 return;
3416 case ARMISD::VLD3DUP_UPD: {
3417 static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo_UPD,
3418 ARM::VLD3DUPd16Pseudo_UPD,
3419 ARM::VLD3DUPd32Pseudo_UPD };
3420 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 3, Opcodes);
3421 return;
3424 case ARMISD::VLD4DUP_UPD: {
3425 static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo_UPD,
3426 ARM::VLD4DUPd16Pseudo_UPD,
3427 ARM::VLD4DUPd32Pseudo_UPD };
3428 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 4, Opcodes);
3429 return;
3432 case ARMISD::VLD1_UPD: {
3433 static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed,
3434 ARM::VLD1d16wb_fixed,
3435 ARM::VLD1d32wb_fixed,
3436 ARM::VLD1d64wb_fixed };
3437 static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed,
3438 ARM::VLD1q16wb_fixed,
3439 ARM::VLD1q32wb_fixed,
3440 ARM::VLD1q64wb_fixed };
3441 SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr);
3442 return;
3445 case ARMISD::VLD2_UPD: {
3446 static const uint16_t DOpcodes[] = { ARM::VLD2d8wb_fixed,
3447 ARM::VLD2d16wb_fixed,
3448 ARM::VLD2d32wb_fixed,
3449 ARM::VLD1q64wb_fixed};
3450 static const uint16_t QOpcodes[] = { ARM::VLD2q8PseudoWB_fixed,
3451 ARM::VLD2q16PseudoWB_fixed,
3452 ARM::VLD2q32PseudoWB_fixed };
3453 SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr);
3454 return;
3457 case ARMISD::VLD3_UPD: {
3458 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD,
3459 ARM::VLD3d16Pseudo_UPD,
3460 ARM::VLD3d32Pseudo_UPD,
3461 ARM::VLD1d64TPseudoWB_fixed};
3462 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
3463 ARM::VLD3q16Pseudo_UPD,
3464 ARM::VLD3q32Pseudo_UPD };
3465 static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
3466 ARM::VLD3q16oddPseudo_UPD,
3467 ARM::VLD3q32oddPseudo_UPD };
3468 SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
3469 return;
3472 case ARMISD::VLD4_UPD: {
3473 static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo_UPD,
3474 ARM::VLD4d16Pseudo_UPD,
3475 ARM::VLD4d32Pseudo_UPD,
3476 ARM::VLD1d64QPseudoWB_fixed};
3477 static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
3478 ARM::VLD4q16Pseudo_UPD,
3479 ARM::VLD4q32Pseudo_UPD };
3480 static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo_UPD,
3481 ARM::VLD4q16oddPseudo_UPD,
3482 ARM::VLD4q32oddPseudo_UPD };
3483 SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
3484 return;
3487 case ARMISD::VLD2LN_UPD: {
3488 static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD,
3489 ARM::VLD2LNd16Pseudo_UPD,
3490 ARM::VLD2LNd32Pseudo_UPD };
3491 static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD,
3492 ARM::VLD2LNq32Pseudo_UPD };
3493 SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes);
3494 return;
3497 case ARMISD::VLD3LN_UPD: {
3498 static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD,
3499 ARM::VLD3LNd16Pseudo_UPD,
3500 ARM::VLD3LNd32Pseudo_UPD };
3501 static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD,
3502 ARM::VLD3LNq32Pseudo_UPD };
3503 SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes);
3504 return;
3507 case ARMISD::VLD4LN_UPD: {
3508 static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD,
3509 ARM::VLD4LNd16Pseudo_UPD,
3510 ARM::VLD4LNd32Pseudo_UPD };
3511 static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD,
3512 ARM::VLD4LNq32Pseudo_UPD };
3513 SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes);
3514 return;
3517 case ARMISD::VST1_UPD: {
3518 static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed,
3519 ARM::VST1d16wb_fixed,
3520 ARM::VST1d32wb_fixed,
3521 ARM::VST1d64wb_fixed };
3522 static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed,
3523 ARM::VST1q16wb_fixed,
3524 ARM::VST1q32wb_fixed,
3525 ARM::VST1q64wb_fixed };
3526 SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr);
3527 return;
3530 case ARMISD::VST2_UPD: {
3531 static const uint16_t DOpcodes[] = { ARM::VST2d8wb_fixed,
3532 ARM::VST2d16wb_fixed,
3533 ARM::VST2d32wb_fixed,
3534 ARM::VST1q64wb_fixed};
3535 static const uint16_t QOpcodes[] = { ARM::VST2q8PseudoWB_fixed,
3536 ARM::VST2q16PseudoWB_fixed,
3537 ARM::VST2q32PseudoWB_fixed };
3538 SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr);
3539 return;
3542 case ARMISD::VST3_UPD: {
3543 static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD,
3544 ARM::VST3d16Pseudo_UPD,
3545 ARM::VST3d32Pseudo_UPD,
3546 ARM::VST1d64TPseudoWB_fixed};
3547 static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
3548 ARM::VST3q16Pseudo_UPD,
3549 ARM::VST3q32Pseudo_UPD };
3550 static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
3551 ARM::VST3q16oddPseudo_UPD,
3552 ARM::VST3q32oddPseudo_UPD };
3553 SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
3554 return;
3557 case ARMISD::VST4_UPD: {
3558 static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo_UPD,
3559 ARM::VST4d16Pseudo_UPD,
3560 ARM::VST4d32Pseudo_UPD,
3561 ARM::VST1d64QPseudoWB_fixed};
3562 static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
3563 ARM::VST4q16Pseudo_UPD,
3564 ARM::VST4q32Pseudo_UPD };
3565 static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD,
3566 ARM::VST4q16oddPseudo_UPD,
3567 ARM::VST4q32oddPseudo_UPD };
3568 SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
3569 return;
3572 case ARMISD::VST2LN_UPD: {
3573 static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD,
3574 ARM::VST2LNd16Pseudo_UPD,
3575 ARM::VST2LNd32Pseudo_UPD };
3576 static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD,
3577 ARM::VST2LNq32Pseudo_UPD };
3578 SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes);
3579 return;
3582 case ARMISD::VST3LN_UPD: {
3583 static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD,
3584 ARM::VST3LNd16Pseudo_UPD,
3585 ARM::VST3LNd32Pseudo_UPD };
3586 static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD,
3587 ARM::VST3LNq32Pseudo_UPD };
3588 SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes);
3589 return;
3592 case ARMISD::VST4LN_UPD: {
3593 static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD,
3594 ARM::VST4LNd16Pseudo_UPD,
3595 ARM::VST4LNd32Pseudo_UPD };
3596 static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD,
3597 ARM::VST4LNq32Pseudo_UPD };
3598 SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes);
3599 return;
3602 case ISD::INTRINSIC_VOID:
3603 case ISD::INTRINSIC_W_CHAIN: {
3604 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
3605 switch (IntNo) {
3606 default:
3607 break;
3609 case Intrinsic::arm_mrrc:
3610 case Intrinsic::arm_mrrc2: {
3611 SDLoc dl(N);
3612 SDValue Chain = N->getOperand(0);
3613 unsigned Opc;
3615 if (Subtarget->isThumb())
3616 Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::t2MRRC : ARM::t2MRRC2);
3617 else
3618 Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::MRRC : ARM::MRRC2);
3620 SmallVector<SDValue, 5> Ops;
3621 Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(), dl)); /* coproc */
3622 Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(), dl)); /* opc */
3623 Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(4))->getZExtValue(), dl)); /* CRm */
3625 // The mrrc2 instruction in ARM doesn't allow predicates, the top 4 bits of the encoded
3626 // instruction will always be '1111' but it is possible in assembly language to specify
3627 // AL as a predicate to mrrc2 but it doesn't make any difference to the encoded instruction.
3628 if (Opc != ARM::MRRC2) {
3629 Ops.push_back(getAL(CurDAG, dl));
3630 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3633 Ops.push_back(Chain);
3635 // Writes to two registers.
3636 const EVT RetType[] = {MVT::i32, MVT::i32, MVT::Other};
3638 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, RetType, Ops));
3639 return;
3641 case Intrinsic::arm_ldaexd:
3642 case Intrinsic::arm_ldrexd: {
3643 SDLoc dl(N);
3644 SDValue Chain = N->getOperand(0);
3645 SDValue MemAddr = N->getOperand(2);
3646 bool isThumb = Subtarget->isThumb() && Subtarget->hasV8MBaselineOps();
3648 bool IsAcquire = IntNo == Intrinsic::arm_ldaexd;
3649 unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD)
3650 : (IsAcquire ? ARM::LDAEXD : ARM::LDREXD);
3652 // arm_ldrexd returns a i64 value in {i32, i32}
3653 std::vector<EVT> ResTys;
3654 if (isThumb) {
3655 ResTys.push_back(MVT::i32);
3656 ResTys.push_back(MVT::i32);
3657 } else
3658 ResTys.push_back(MVT::Untyped);
3659 ResTys.push_back(MVT::Other);
3661 // Place arguments in the right order.
3662 SDValue Ops[] = {MemAddr, getAL(CurDAG, dl),
3663 CurDAG->getRegister(0, MVT::i32), Chain};
3664 SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
3665 // Transfer memoperands.
3666 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3667 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
3669 // Remap uses.
3670 SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1);
3671 if (!SDValue(N, 0).use_empty()) {
3672 SDValue Result;
3673 if (isThumb)
3674 Result = SDValue(Ld, 0);
3675 else {
3676 SDValue SubRegIdx =
3677 CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
3678 SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
3679 dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
3680 Result = SDValue(ResNode,0);
3682 ReplaceUses(SDValue(N, 0), Result);
3684 if (!SDValue(N, 1).use_empty()) {
3685 SDValue Result;
3686 if (isThumb)
3687 Result = SDValue(Ld, 1);
3688 else {
3689 SDValue SubRegIdx =
3690 CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
3691 SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
3692 dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
3693 Result = SDValue(ResNode,0);
3695 ReplaceUses(SDValue(N, 1), Result);
3697 ReplaceUses(SDValue(N, 2), OutChain);
3698 CurDAG->RemoveDeadNode(N);
3699 return;
3701 case Intrinsic::arm_stlexd:
3702 case Intrinsic::arm_strexd: {
3703 SDLoc dl(N);
3704 SDValue Chain = N->getOperand(0);
3705 SDValue Val0 = N->getOperand(2);
3706 SDValue Val1 = N->getOperand(3);
3707 SDValue MemAddr = N->getOperand(4);
3709 // Store exclusive double return a i32 value which is the return status
3710 // of the issued store.
3711 const EVT ResTys[] = {MVT::i32, MVT::Other};
3713 bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
3714 // Place arguments in the right order.
3715 SmallVector<SDValue, 7> Ops;
3716 if (isThumb) {
3717 Ops.push_back(Val0);
3718 Ops.push_back(Val1);
3719 } else
3720 // arm_strexd uses GPRPair.
3721 Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0));
3722 Ops.push_back(MemAddr);
3723 Ops.push_back(getAL(CurDAG, dl));
3724 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3725 Ops.push_back(Chain);
3727 bool IsRelease = IntNo == Intrinsic::arm_stlexd;
3728 unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD)
3729 : (IsRelease ? ARM::STLEXD : ARM::STREXD);
3731 SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
3732 // Transfer memoperands.
3733 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3734 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
3736 ReplaceNode(N, St);
3737 return;
3740 case Intrinsic::arm_neon_vld1: {
3741 static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
3742 ARM::VLD1d32, ARM::VLD1d64 };
3743 static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
3744 ARM::VLD1q32, ARM::VLD1q64};
3745 SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr);
3746 return;
3749 case Intrinsic::arm_neon_vld1x2: {
3750 static const uint16_t DOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
3751 ARM::VLD1q32, ARM::VLD1q64 };
3752 static const uint16_t QOpcodes[] = { ARM::VLD1d8QPseudo,
3753 ARM::VLD1d16QPseudo,
3754 ARM::VLD1d32QPseudo,
3755 ARM::VLD1d64QPseudo };
3756 SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
3757 return;
3760 case Intrinsic::arm_neon_vld1x3: {
3761 static const uint16_t DOpcodes[] = { ARM::VLD1d8TPseudo,
3762 ARM::VLD1d16TPseudo,
3763 ARM::VLD1d32TPseudo,
3764 ARM::VLD1d64TPseudo };
3765 static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowTPseudo_UPD,
3766 ARM::VLD1q16LowTPseudo_UPD,
3767 ARM::VLD1q32LowTPseudo_UPD,
3768 ARM::VLD1q64LowTPseudo_UPD };
3769 static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighTPseudo,
3770 ARM::VLD1q16HighTPseudo,
3771 ARM::VLD1q32HighTPseudo,
3772 ARM::VLD1q64HighTPseudo };
3773 SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3774 return;
3777 case Intrinsic::arm_neon_vld1x4: {
3778 static const uint16_t DOpcodes[] = { ARM::VLD1d8QPseudo,
3779 ARM::VLD1d16QPseudo,
3780 ARM::VLD1d32QPseudo,
3781 ARM::VLD1d64QPseudo };
3782 static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowQPseudo_UPD,
3783 ARM::VLD1q16LowQPseudo_UPD,
3784 ARM::VLD1q32LowQPseudo_UPD,
3785 ARM::VLD1q64LowQPseudo_UPD };
3786 static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighQPseudo,
3787 ARM::VLD1q16HighQPseudo,
3788 ARM::VLD1q32HighQPseudo,
3789 ARM::VLD1q64HighQPseudo };
3790 SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3791 return;
3794 case Intrinsic::arm_neon_vld2: {
3795 static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
3796 ARM::VLD2d32, ARM::VLD1q64 };
3797 static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
3798 ARM::VLD2q32Pseudo };
3799 SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
3800 return;
3803 case Intrinsic::arm_neon_vld3: {
3804 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo,
3805 ARM::VLD3d16Pseudo,
3806 ARM::VLD3d32Pseudo,
3807 ARM::VLD1d64TPseudo };
3808 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
3809 ARM::VLD3q16Pseudo_UPD,
3810 ARM::VLD3q32Pseudo_UPD };
3811 static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo,
3812 ARM::VLD3q16oddPseudo,
3813 ARM::VLD3q32oddPseudo };
3814 SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3815 return;
3818 case Intrinsic::arm_neon_vld4: {
3819 static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo,
3820 ARM::VLD4d16Pseudo,
3821 ARM::VLD4d32Pseudo,
3822 ARM::VLD1d64QPseudo };
3823 static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
3824 ARM::VLD4q16Pseudo_UPD,
3825 ARM::VLD4q32Pseudo_UPD };
3826 static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo,
3827 ARM::VLD4q16oddPseudo,
3828 ARM::VLD4q32oddPseudo };
3829 SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3830 return;
3833 case Intrinsic::arm_neon_vld2dup: {
3834 static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
3835 ARM::VLD2DUPd32, ARM::VLD1q64 };
3836 static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo,
3837 ARM::VLD2DUPq16EvenPseudo,
3838 ARM::VLD2DUPq32EvenPseudo };
3839 static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudo,
3840 ARM::VLD2DUPq16OddPseudo,
3841 ARM::VLD2DUPq32OddPseudo };
3842 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 2,
3843 DOpcodes, QOpcodes0, QOpcodes1);
3844 return;
3847 case Intrinsic::arm_neon_vld3dup: {
3848 static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo,
3849 ARM::VLD3DUPd16Pseudo,
3850 ARM::VLD3DUPd32Pseudo,
3851 ARM::VLD1d64TPseudo };
3852 static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo,
3853 ARM::VLD3DUPq16EvenPseudo,
3854 ARM::VLD3DUPq32EvenPseudo };
3855 static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo,
3856 ARM::VLD3DUPq16OddPseudo,
3857 ARM::VLD3DUPq32OddPseudo };
3858 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 3,
3859 DOpcodes, QOpcodes0, QOpcodes1);
3860 return;
3863 case Intrinsic::arm_neon_vld4dup: {
3864 static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo,
3865 ARM::VLD4DUPd16Pseudo,
3866 ARM::VLD4DUPd32Pseudo,
3867 ARM::VLD1d64QPseudo };
3868 static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo,
3869 ARM::VLD4DUPq16EvenPseudo,
3870 ARM::VLD4DUPq32EvenPseudo };
3871 static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo,
3872 ARM::VLD4DUPq16OddPseudo,
3873 ARM::VLD4DUPq32OddPseudo };
3874 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 4,
3875 DOpcodes, QOpcodes0, QOpcodes1);
3876 return;
3879 case Intrinsic::arm_neon_vld2lane: {
3880 static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo,
3881 ARM::VLD2LNd16Pseudo,
3882 ARM::VLD2LNd32Pseudo };
3883 static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo,
3884 ARM::VLD2LNq32Pseudo };
3885 SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes);
3886 return;
3889 case Intrinsic::arm_neon_vld3lane: {
3890 static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo,
3891 ARM::VLD3LNd16Pseudo,
3892 ARM::VLD3LNd32Pseudo };
3893 static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo,
3894 ARM::VLD3LNq32Pseudo };
3895 SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes);
3896 return;
3899 case Intrinsic::arm_neon_vld4lane: {
3900 static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo,
3901 ARM::VLD4LNd16Pseudo,
3902 ARM::VLD4LNd32Pseudo };
3903 static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo,
3904 ARM::VLD4LNq32Pseudo };
3905 SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes);
3906 return;
3909 case Intrinsic::arm_neon_vst1: {
3910 static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
3911 ARM::VST1d32, ARM::VST1d64 };
3912 static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
3913 ARM::VST1q32, ARM::VST1q64 };
3914 SelectVST(N, false, 1, DOpcodes, QOpcodes, nullptr);
3915 return;
3918 case Intrinsic::arm_neon_vst1x2: {
3919 static const uint16_t DOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
3920 ARM::VST1q32, ARM::VST1q64 };
3921 static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudo,
3922 ARM::VST1d16QPseudo,
3923 ARM::VST1d32QPseudo,
3924 ARM::VST1d64QPseudo };
3925 SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
3926 return;
3929 case Intrinsic::arm_neon_vst1x3: {
3930 static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudo,
3931 ARM::VST1d16TPseudo,
3932 ARM::VST1d32TPseudo,
3933 ARM::VST1d64TPseudo };
3934 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD,
3935 ARM::VST1q16LowTPseudo_UPD,
3936 ARM::VST1q32LowTPseudo_UPD,
3937 ARM::VST1q64LowTPseudo_UPD };
3938 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo,
3939 ARM::VST1q16HighTPseudo,
3940 ARM::VST1q32HighTPseudo,
3941 ARM::VST1q64HighTPseudo };
3942 SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3943 return;
3946 case Intrinsic::arm_neon_vst1x4: {
3947 static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudo,
3948 ARM::VST1d16QPseudo,
3949 ARM::VST1d32QPseudo,
3950 ARM::VST1d64QPseudo };
3951 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD,
3952 ARM::VST1q16LowQPseudo_UPD,
3953 ARM::VST1q32LowQPseudo_UPD,
3954 ARM::VST1q64LowQPseudo_UPD };
3955 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo,
3956 ARM::VST1q16HighQPseudo,
3957 ARM::VST1q32HighQPseudo,
3958 ARM::VST1q64HighQPseudo };
3959 SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3960 return;
3963 case Intrinsic::arm_neon_vst2: {
3964 static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
3965 ARM::VST2d32, ARM::VST1q64 };
3966 static const uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
3967 ARM::VST2q32Pseudo };
3968 SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
3969 return;
3972 case Intrinsic::arm_neon_vst3: {
3973 static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo,
3974 ARM::VST3d16Pseudo,
3975 ARM::VST3d32Pseudo,
3976 ARM::VST1d64TPseudo };
3977 static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
3978 ARM::VST3q16Pseudo_UPD,
3979 ARM::VST3q32Pseudo_UPD };
3980 static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo,
3981 ARM::VST3q16oddPseudo,
3982 ARM::VST3q32oddPseudo };
3983 SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3984 return;
3987 case Intrinsic::arm_neon_vst4: {
3988 static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo,
3989 ARM::VST4d16Pseudo,
3990 ARM::VST4d32Pseudo,
3991 ARM::VST1d64QPseudo };
3992 static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
3993 ARM::VST4q16Pseudo_UPD,
3994 ARM::VST4q32Pseudo_UPD };
3995 static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo,
3996 ARM::VST4q16oddPseudo,
3997 ARM::VST4q32oddPseudo };
3998 SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3999 return;
4002 case Intrinsic::arm_neon_vst2lane: {
4003 static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo,
4004 ARM::VST2LNd16Pseudo,
4005 ARM::VST2LNd32Pseudo };
4006 static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo,
4007 ARM::VST2LNq32Pseudo };
4008 SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes);
4009 return;
4012 case Intrinsic::arm_neon_vst3lane: {
4013 static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo,
4014 ARM::VST3LNd16Pseudo,
4015 ARM::VST3LNd32Pseudo };
4016 static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo,
4017 ARM::VST3LNq32Pseudo };
4018 SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes);
4019 return;
4022 case Intrinsic::arm_neon_vst4lane: {
4023 static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo,
4024 ARM::VST4LNd16Pseudo,
4025 ARM::VST4LNd32Pseudo };
4026 static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo,
4027 ARM::VST4LNq32Pseudo };
4028 SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes);
4029 return;
4032 break;
4035 case ISD::ATOMIC_CMP_SWAP:
4036 SelectCMP_SWAP(N);
4037 return;
4040 SelectCode(N);
4043 // Inspect a register string of the form
4044 // cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or
4045 // cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string
4046 // and obtain the integer operands from them, adding these operands to the
4047 // provided vector.
4048 static void getIntOperandsFromRegisterString(StringRef RegString,
4049 SelectionDAG *CurDAG,
4050 const SDLoc &DL,
4051 std::vector<SDValue> &Ops) {
4052 SmallVector<StringRef, 5> Fields;
4053 RegString.split(Fields, ':');
4055 if (Fields.size() > 1) {
4056 bool AllIntFields = true;
4058 for (StringRef Field : Fields) {
4059 // Need to trim out leading 'cp' characters and get the integer field.
4060 unsigned IntField;
4061 AllIntFields &= !Field.trim("CPcp").getAsInteger(10, IntField);
4062 Ops.push_back(CurDAG->getTargetConstant(IntField, DL, MVT::i32));
4065 assert(AllIntFields &&
4066 "Unexpected non-integer value in special register string.");
4070 // Maps a Banked Register string to its mask value. The mask value returned is
4071 // for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register
4072 // mask operand, which expresses which register is to be used, e.g. r8, and in
4073 // which mode it is to be used, e.g. usr. Returns -1 to signify that the string
4074 // was invalid.
4075 static inline int getBankedRegisterMask(StringRef RegString) {
4076 auto TheReg = ARMBankedReg::lookupBankedRegByName(RegString.lower());
4077 if (!TheReg)
4078 return -1;
4079 return TheReg->Encoding;
4082 // The flags here are common to those allowed for apsr in the A class cores and
4083 // those allowed for the special registers in the M class cores. Returns a
4084 // value representing which flags were present, -1 if invalid.
4085 static inline int getMClassFlagsMask(StringRef Flags) {
4086 return StringSwitch<int>(Flags)
4087 .Case("", 0x2) // no flags means nzcvq for psr registers, and 0x2 is
4088 // correct when flags are not permitted
4089 .Case("g", 0x1)
4090 .Case("nzcvq", 0x2)
4091 .Case("nzcvqg", 0x3)
4092 .Default(-1);
4095 // Maps MClass special registers string to its value for use in the
4096 // t2MRS_M/t2MSR_M instruction nodes as the SYSm value operand.
4097 // Returns -1 to signify that the string was invalid.
4098 static int getMClassRegisterMask(StringRef Reg, const ARMSubtarget *Subtarget) {
4099 auto TheReg = ARMSysReg::lookupMClassSysRegByName(Reg);
4100 const FeatureBitset &FeatureBits = Subtarget->getFeatureBits();
4101 if (!TheReg || !TheReg->hasRequiredFeatures(FeatureBits))
4102 return -1;
4103 return (int)(TheReg->Encoding & 0xFFF); // SYSm value
4106 static int getARClassRegisterMask(StringRef Reg, StringRef Flags) {
4107 // The mask operand contains the special register (R Bit) in bit 4, whether
4108 // the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and
4109 // bits 3-0 contains the fields to be accessed in the special register, set by
4110 // the flags provided with the register.
4111 int Mask = 0;
4112 if (Reg == "apsr") {
4113 // The flags permitted for apsr are the same flags that are allowed in
4114 // M class registers. We get the flag value and then shift the flags into
4115 // the correct place to combine with the mask.
4116 Mask = getMClassFlagsMask(Flags);
4117 if (Mask == -1)
4118 return -1;
4119 return Mask << 2;
4122 if (Reg != "cpsr" && Reg != "spsr") {
4123 return -1;
4126 // This is the same as if the flags were "fc"
4127 if (Flags.empty() || Flags == "all")
4128 return Mask | 0x9;
4130 // Inspect the supplied flags string and set the bits in the mask for
4131 // the relevant and valid flags allowed for cpsr and spsr.
4132 for (char Flag : Flags) {
4133 int FlagVal;
4134 switch (Flag) {
4135 case 'c':
4136 FlagVal = 0x1;
4137 break;
4138 case 'x':
4139 FlagVal = 0x2;
4140 break;
4141 case 's':
4142 FlagVal = 0x4;
4143 break;
4144 case 'f':
4145 FlagVal = 0x8;
4146 break;
4147 default:
4148 FlagVal = 0;
4151 // This avoids allowing strings where the same flag bit appears twice.
4152 if (!FlagVal || (Mask & FlagVal))
4153 return -1;
4154 Mask |= FlagVal;
4157 // If the register is spsr then we need to set the R bit.
4158 if (Reg == "spsr")
4159 Mask |= 0x10;
4161 return Mask;
4164 // Lower the read_register intrinsic to ARM specific DAG nodes
4165 // using the supplied metadata string to select the instruction node to use
4166 // and the registers/masks to construct as operands for the node.
4167 bool ARMDAGToDAGISel::tryReadRegister(SDNode *N){
4168 const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
4169 const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
4170 bool IsThumb2 = Subtarget->isThumb2();
4171 SDLoc DL(N);
4173 std::vector<SDValue> Ops;
4174 getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
4176 if (!Ops.empty()) {
4177 // If the special register string was constructed of fields (as defined
4178 // in the ACLE) then need to lower to MRC node (32 bit) or
4179 // MRRC node(64 bit), we can make the distinction based on the number of
4180 // operands we have.
4181 unsigned Opcode;
4182 SmallVector<EVT, 3> ResTypes;
4183 if (Ops.size() == 5){
4184 Opcode = IsThumb2 ? ARM::t2MRC : ARM::MRC;
4185 ResTypes.append({ MVT::i32, MVT::Other });
4186 } else {
4187 assert(Ops.size() == 3 &&
4188 "Invalid number of fields in special register string.");
4189 Opcode = IsThumb2 ? ARM::t2MRRC : ARM::MRRC;
4190 ResTypes.append({ MVT::i32, MVT::i32, MVT::Other });
4193 Ops.push_back(getAL(CurDAG, DL));
4194 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
4195 Ops.push_back(N->getOperand(0));
4196 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, ResTypes, Ops));
4197 return true;
4200 std::string SpecialReg = RegString->getString().lower();
4202 int BankedReg = getBankedRegisterMask(SpecialReg);
4203 if (BankedReg != -1) {
4204 Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32),
4205 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4206 N->getOperand(0) };
4207 ReplaceNode(
4208 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked,
4209 DL, MVT::i32, MVT::Other, Ops));
4210 return true;
4213 // The VFP registers are read by creating SelectionDAG nodes with opcodes
4214 // corresponding to the register that is being read from. So we switch on the
4215 // string to find which opcode we need to use.
4216 unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
4217 .Case("fpscr", ARM::VMRS)
4218 .Case("fpexc", ARM::VMRS_FPEXC)
4219 .Case("fpsid", ARM::VMRS_FPSID)
4220 .Case("mvfr0", ARM::VMRS_MVFR0)
4221 .Case("mvfr1", ARM::VMRS_MVFR1)
4222 .Case("mvfr2", ARM::VMRS_MVFR2)
4223 .Case("fpinst", ARM::VMRS_FPINST)
4224 .Case("fpinst2", ARM::VMRS_FPINST2)
4225 .Default(0);
4227 // If an opcode was found then we can lower the read to a VFP instruction.
4228 if (Opcode) {
4229 if (!Subtarget->hasVFP2Base())
4230 return false;
4231 if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8Base())
4232 return false;
4234 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4235 N->getOperand(0) };
4236 ReplaceNode(N,
4237 CurDAG->getMachineNode(Opcode, DL, MVT::i32, MVT::Other, Ops));
4238 return true;
4241 // If the target is M Class then need to validate that the register string
4242 // is an acceptable value, so check that a mask can be constructed from the
4243 // string.
4244 if (Subtarget->isMClass()) {
4245 int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget);
4246 if (SYSmValue == -1)
4247 return false;
4249 SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
4250 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4251 N->getOperand(0) };
4252 ReplaceNode(
4253 N, CurDAG->getMachineNode(ARM::t2MRS_M, DL, MVT::i32, MVT::Other, Ops));
4254 return true;
4257 // Here we know the target is not M Class so we need to check if it is one
4258 // of the remaining possible values which are apsr, cpsr or spsr.
4259 if (SpecialReg == "apsr" || SpecialReg == "cpsr") {
4260 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4261 N->getOperand(0) };
4262 ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRS_AR : ARM::MRS,
4263 DL, MVT::i32, MVT::Other, Ops));
4264 return true;
4267 if (SpecialReg == "spsr") {
4268 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4269 N->getOperand(0) };
4270 ReplaceNode(
4271 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys, DL,
4272 MVT::i32, MVT::Other, Ops));
4273 return true;
4276 return false;
4279 // Lower the write_register intrinsic to ARM specific DAG nodes
4280 // using the supplied metadata string to select the instruction node to use
4281 // and the registers/masks to use in the nodes
4282 bool ARMDAGToDAGISel::tryWriteRegister(SDNode *N){
4283 const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
4284 const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
4285 bool IsThumb2 = Subtarget->isThumb2();
4286 SDLoc DL(N);
4288 std::vector<SDValue> Ops;
4289 getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
4291 if (!Ops.empty()) {
4292 // If the special register string was constructed of fields (as defined
4293 // in the ACLE) then need to lower to MCR node (32 bit) or
4294 // MCRR node(64 bit), we can make the distinction based on the number of
4295 // operands we have.
4296 unsigned Opcode;
4297 if (Ops.size() == 5) {
4298 Opcode = IsThumb2 ? ARM::t2MCR : ARM::MCR;
4299 Ops.insert(Ops.begin()+2, N->getOperand(2));
4300 } else {
4301 assert(Ops.size() == 3 &&
4302 "Invalid number of fields in special register string.");
4303 Opcode = IsThumb2 ? ARM::t2MCRR : ARM::MCRR;
4304 SDValue WriteValue[] = { N->getOperand(2), N->getOperand(3) };
4305 Ops.insert(Ops.begin()+2, WriteValue, WriteValue+2);
4308 Ops.push_back(getAL(CurDAG, DL));
4309 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
4310 Ops.push_back(N->getOperand(0));
4312 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
4313 return true;
4316 std::string SpecialReg = RegString->getString().lower();
4317 int BankedReg = getBankedRegisterMask(SpecialReg);
4318 if (BankedReg != -1) {
4319 Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), N->getOperand(2),
4320 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4321 N->getOperand(0) };
4322 ReplaceNode(
4323 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked,
4324 DL, MVT::Other, Ops));
4325 return true;
4328 // The VFP registers are written to by creating SelectionDAG nodes with
4329 // opcodes corresponding to the register that is being written. So we switch
4330 // on the string to find which opcode we need to use.
4331 unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
4332 .Case("fpscr", ARM::VMSR)
4333 .Case("fpexc", ARM::VMSR_FPEXC)
4334 .Case("fpsid", ARM::VMSR_FPSID)
4335 .Case("fpinst", ARM::VMSR_FPINST)
4336 .Case("fpinst2", ARM::VMSR_FPINST2)
4337 .Default(0);
4339 if (Opcode) {
4340 if (!Subtarget->hasVFP2Base())
4341 return false;
4342 Ops = { N->getOperand(2), getAL(CurDAG, DL),
4343 CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
4344 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
4345 return true;
4348 std::pair<StringRef, StringRef> Fields;
4349 Fields = StringRef(SpecialReg).rsplit('_');
4350 std::string Reg = Fields.first.str();
4351 StringRef Flags = Fields.second;
4353 // If the target was M Class then need to validate the special register value
4354 // and retrieve the mask for use in the instruction node.
4355 if (Subtarget->isMClass()) {
4356 int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget);
4357 if (SYSmValue == -1)
4358 return false;
4360 SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
4361 N->getOperand(2), getAL(CurDAG, DL),
4362 CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
4363 ReplaceNode(N, CurDAG->getMachineNode(ARM::t2MSR_M, DL, MVT::Other, Ops));
4364 return true;
4367 // We then check to see if a valid mask can be constructed for one of the
4368 // register string values permitted for the A and R class cores. These values
4369 // are apsr, spsr and cpsr; these are also valid on older cores.
4370 int Mask = getARClassRegisterMask(Reg, Flags);
4371 if (Mask != -1) {
4372 Ops = { CurDAG->getTargetConstant(Mask, DL, MVT::i32), N->getOperand(2),
4373 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4374 N->getOperand(0) };
4375 ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSR_AR : ARM::MSR,
4376 DL, MVT::Other, Ops));
4377 return true;
4380 return false;
4383 bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){
4384 std::vector<SDValue> AsmNodeOperands;
4385 unsigned Flag, Kind;
4386 bool Changed = false;
4387 unsigned NumOps = N->getNumOperands();
4389 // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint.
4390 // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require
4391 // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs
4392 // respectively. Since there is no constraint to explicitly specify a
4393 // reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb,
4394 // the 64-bit data may be referred by H, Q, R modifiers, so we still pack
4395 // them into a GPRPair.
4397 SDLoc dl(N);
4398 SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps-1)
4399 : SDValue(nullptr,0);
4401 SmallVector<bool, 8> OpChanged;
4402 // Glue node will be appended late.
4403 for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) {
4404 SDValue op = N->getOperand(i);
4405 AsmNodeOperands.push_back(op);
4407 if (i < InlineAsm::Op_FirstOperand)
4408 continue;
4410 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(i))) {
4411 Flag = C->getZExtValue();
4412 Kind = InlineAsm::getKind(Flag);
4414 else
4415 continue;
4417 // Immediate operands to inline asm in the SelectionDAG are modeled with
4418 // two operands. The first is a constant of value InlineAsm::Kind_Imm, and
4419 // the second is a constant with the value of the immediate. If we get here
4420 // and we have a Kind_Imm, skip the next operand, and continue.
4421 if (Kind == InlineAsm::Kind_Imm) {
4422 SDValue op = N->getOperand(++i);
4423 AsmNodeOperands.push_back(op);
4424 continue;
4427 unsigned NumRegs = InlineAsm::getNumOperandRegisters(Flag);
4428 if (NumRegs)
4429 OpChanged.push_back(false);
4431 unsigned DefIdx = 0;
4432 bool IsTiedToChangedOp = false;
4433 // If it's a use that is tied with a previous def, it has no
4434 // reg class constraint.
4435 if (Changed && InlineAsm::isUseOperandTiedToDef(Flag, DefIdx))
4436 IsTiedToChangedOp = OpChanged[DefIdx];
4438 // Memory operands to inline asm in the SelectionDAG are modeled with two
4439 // operands: a constant of value InlineAsm::Kind_Mem followed by the input
4440 // operand. If we get here and we have a Kind_Mem, skip the next operand (so
4441 // it doesn't get misinterpreted), and continue. We do this here because
4442 // it's important to update the OpChanged array correctly before moving on.
4443 if (Kind == InlineAsm::Kind_Mem) {
4444 SDValue op = N->getOperand(++i);
4445 AsmNodeOperands.push_back(op);
4446 continue;
4449 if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef
4450 && Kind != InlineAsm::Kind_RegDefEarlyClobber)
4451 continue;
4453 unsigned RC;
4454 bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC);
4455 if ((!IsTiedToChangedOp && (!HasRC || RC != ARM::GPRRegClassID))
4456 || NumRegs != 2)
4457 continue;
4459 assert((i+2 < NumOps) && "Invalid number of operands in inline asm");
4460 SDValue V0 = N->getOperand(i+1);
4461 SDValue V1 = N->getOperand(i+2);
4462 unsigned Reg0 = cast<RegisterSDNode>(V0)->getReg();
4463 unsigned Reg1 = cast<RegisterSDNode>(V1)->getReg();
4464 SDValue PairedReg;
4465 MachineRegisterInfo &MRI = MF->getRegInfo();
4467 if (Kind == InlineAsm::Kind_RegDef ||
4468 Kind == InlineAsm::Kind_RegDefEarlyClobber) {
4469 // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to
4470 // the original GPRs.
4472 Register GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
4473 PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
4474 SDValue Chain = SDValue(N,0);
4476 SDNode *GU = N->getGluedUser();
4477 SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped,
4478 Chain.getValue(1));
4480 // Extract values from a GPRPair reg and copy to the original GPR reg.
4481 SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32,
4482 RegCopy);
4483 SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,
4484 RegCopy);
4485 SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0,
4486 RegCopy.getValue(1));
4487 SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1));
4489 // Update the original glue user.
4490 std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1);
4491 Ops.push_back(T1.getValue(1));
4492 CurDAG->UpdateNodeOperands(GU, Ops);
4494 else {
4495 // For Kind == InlineAsm::Kind_RegUse, we first copy two GPRs into a
4496 // GPRPair and then pass the GPRPair to the inline asm.
4497 SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain];
4499 // As REG_SEQ doesn't take RegisterSDNode, we copy them first.
4500 SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32,
4501 Chain.getValue(1));
4502 SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32,
4503 T0.getValue(1));
4504 SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), 0);
4506 // Copy REG_SEQ into a GPRPair-typed VR and replace the original two
4507 // i32 VRs of inline asm with it.
4508 Register GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
4509 PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
4510 Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1));
4512 AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
4513 Glue = Chain.getValue(1);
4516 Changed = true;
4518 if(PairedReg.getNode()) {
4519 OpChanged[OpChanged.size() -1 ] = true;
4520 Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/);
4521 if (IsTiedToChangedOp)
4522 Flag = InlineAsm::getFlagWordForMatchingOp(Flag, DefIdx);
4523 else
4524 Flag = InlineAsm::getFlagWordForRegClass(Flag, ARM::GPRPairRegClassID);
4525 // Replace the current flag.
4526 AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant(
4527 Flag, dl, MVT::i32);
4528 // Add the new register node and skip the original two GPRs.
4529 AsmNodeOperands.push_back(PairedReg);
4530 // Skip the next two GPRs.
4531 i += 2;
4535 if (Glue.getNode())
4536 AsmNodeOperands.push_back(Glue);
4537 if (!Changed)
4538 return false;
4540 SDValue New = CurDAG->getNode(N->getOpcode(), SDLoc(N),
4541 CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands);
4542 New->setNodeId(-1);
4543 ReplaceNode(N, New.getNode());
4544 return true;
4548 bool ARMDAGToDAGISel::
4549 SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
4550 std::vector<SDValue> &OutOps) {
4551 switch(ConstraintID) {
4552 default:
4553 llvm_unreachable("Unexpected asm memory constraint");
4554 case InlineAsm::Constraint_i:
4555 // FIXME: It seems strange that 'i' is needed here since it's supposed to
4556 // be an immediate and not a memory constraint.
4557 LLVM_FALLTHROUGH;
4558 case InlineAsm::Constraint_m:
4559 case InlineAsm::Constraint_o:
4560 case InlineAsm::Constraint_Q:
4561 case InlineAsm::Constraint_Um:
4562 case InlineAsm::Constraint_Un:
4563 case InlineAsm::Constraint_Uq:
4564 case InlineAsm::Constraint_Us:
4565 case InlineAsm::Constraint_Ut:
4566 case InlineAsm::Constraint_Uv:
4567 case InlineAsm::Constraint_Uy:
4568 // Require the address to be in a register. That is safe for all ARM
4569 // variants and it is hard to do anything much smarter without knowing
4570 // how the operand is used.
4571 OutOps.push_back(Op);
4572 return false;
4574 return true;
4577 /// createARMISelDag - This pass converts a legalized DAG into a
4578 /// ARM-specific DAG, ready for instruction scheduling.
4580 FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM,
4581 CodeGenOpt::Level OptLevel) {
4582 return new ARMDAGToDAGISel(TM, OptLevel);