[ARM] Fix MVE ldst offset ranges
[llvm-core.git] / lib / Target / ARM / ARMISelDAGToDAG.cpp
blob9cdf2eb9c32e33503d3ee7dea637280260e015b2
1 //===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines an instruction selector for the ARM target.
11 //===----------------------------------------------------------------------===//
13 #include "ARM.h"
14 #include "ARMBaseInstrInfo.h"
15 #include "ARMTargetMachine.h"
16 #include "MCTargetDesc/ARMAddressingModes.h"
17 #include "Utils/ARMBaseInfo.h"
18 #include "llvm/ADT/StringSwitch.h"
19 #include "llvm/CodeGen/MachineFrameInfo.h"
20 #include "llvm/CodeGen/MachineFunction.h"
21 #include "llvm/CodeGen/MachineInstrBuilder.h"
22 #include "llvm/CodeGen/MachineRegisterInfo.h"
23 #include "llvm/CodeGen/SelectionDAG.h"
24 #include "llvm/CodeGen/SelectionDAGISel.h"
25 #include "llvm/CodeGen/TargetLowering.h"
26 #include "llvm/IR/CallingConv.h"
27 #include "llvm/IR/Constants.h"
28 #include "llvm/IR/DerivedTypes.h"
29 #include "llvm/IR/Function.h"
30 #include "llvm/IR/Intrinsics.h"
31 #include "llvm/IR/LLVMContext.h"
32 #include "llvm/Support/CommandLine.h"
33 #include "llvm/Support/Debug.h"
34 #include "llvm/Support/ErrorHandling.h"
35 #include "llvm/Target/TargetOptions.h"
37 using namespace llvm;
39 #define DEBUG_TYPE "arm-isel"
41 static cl::opt<bool>
42 DisableShifterOp("disable-shifter-op", cl::Hidden,
43 cl::desc("Disable isel of shifter-op"),
44 cl::init(false));
46 //===--------------------------------------------------------------------===//
47 /// ARMDAGToDAGISel - ARM specific code to select ARM machine
48 /// instructions for SelectionDAG operations.
49 ///
50 namespace {
52 class ARMDAGToDAGISel : public SelectionDAGISel {
53 /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
54 /// make the right decision when generating code for different targets.
55 const ARMSubtarget *Subtarget;
57 public:
58 explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOpt::Level OptLevel)
59 : SelectionDAGISel(tm, OptLevel) {}
61 bool runOnMachineFunction(MachineFunction &MF) override {
62 // Reset the subtarget each time through.
63 Subtarget = &MF.getSubtarget<ARMSubtarget>();
64 SelectionDAGISel::runOnMachineFunction(MF);
65 return true;
68 StringRef getPassName() const override { return "ARM Instruction Selection"; }
70 void PreprocessISelDAG() override;
72 /// getI32Imm - Return a target constant of type i32 with the specified
73 /// value.
74 inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
75 return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
78 void Select(SDNode *N) override;
80 bool hasNoVMLxHazardUse(SDNode *N) const;
81 bool isShifterOpProfitable(const SDValue &Shift,
82 ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt);
83 bool SelectRegShifterOperand(SDValue N, SDValue &A,
84 SDValue &B, SDValue &C,
85 bool CheckProfitability = true);
86 bool SelectImmShifterOperand(SDValue N, SDValue &A,
87 SDValue &B, bool CheckProfitability = true);
88 bool SelectShiftRegShifterOperand(SDValue N, SDValue &A,
89 SDValue &B, SDValue &C) {
90 // Don't apply the profitability check
91 return SelectRegShifterOperand(N, A, B, C, false);
93 bool SelectShiftImmShifterOperand(SDValue N, SDValue &A,
94 SDValue &B) {
95 // Don't apply the profitability check
96 return SelectImmShifterOperand(N, A, B, false);
99 bool SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out);
101 bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
102 bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);
104 bool SelectCMOVPred(SDValue N, SDValue &Pred, SDValue &Reg) {
105 const ConstantSDNode *CN = cast<ConstantSDNode>(N);
106 Pred = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(N), MVT::i32);
107 Reg = CurDAG->getRegister(ARM::CPSR, MVT::i32);
108 return true;
111 bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
112 SDValue &Offset, SDValue &Opc);
113 bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
114 SDValue &Offset, SDValue &Opc);
115 bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
116 SDValue &Offset, SDValue &Opc);
117 bool SelectAddrOffsetNone(SDValue N, SDValue &Base);
118 bool SelectAddrMode3(SDValue N, SDValue &Base,
119 SDValue &Offset, SDValue &Opc);
120 bool SelectAddrMode3Offset(SDNode *Op, SDValue N,
121 SDValue &Offset, SDValue &Opc);
122 bool IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset, bool FP16);
123 bool SelectAddrMode5(SDValue N, SDValue &Base, SDValue &Offset);
124 bool SelectAddrMode5FP16(SDValue N, SDValue &Base, SDValue &Offset);
125 bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align);
126 bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset);
128 bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label);
130 // Thumb Addressing Modes:
131 bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset);
132 bool SelectThumbAddrModeRRSext(SDValue N, SDValue &Base, SDValue &Offset);
133 bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base,
134 SDValue &OffImm);
135 bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
136 SDValue &OffImm);
137 bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
138 SDValue &OffImm);
139 bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
140 SDValue &OffImm);
141 bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm);
143 // Thumb 2 Addressing Modes:
144 bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
145 bool SelectT2AddrModeImm8(SDValue N, SDValue &Base,
146 SDValue &OffImm);
147 bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
148 SDValue &OffImm);
149 template <unsigned Shift>
150 bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm);
151 bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm,
152 unsigned Shift);
153 template <unsigned Shift>
154 bool SelectT2AddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm);
155 bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base,
156 SDValue &OffReg, SDValue &ShImm);
157 bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm);
159 inline bool is_so_imm(unsigned Imm) const {
160 return ARM_AM::getSOImmVal(Imm) != -1;
163 inline bool is_so_imm_not(unsigned Imm) const {
164 return ARM_AM::getSOImmVal(~Imm) != -1;
167 inline bool is_t2_so_imm(unsigned Imm) const {
168 return ARM_AM::getT2SOImmVal(Imm) != -1;
171 inline bool is_t2_so_imm_not(unsigned Imm) const {
172 return ARM_AM::getT2SOImmVal(~Imm) != -1;
175 // Include the pieces autogenerated from the target description.
176 #include "ARMGenDAGISel.inc"
178 private:
179 void transferMemOperands(SDNode *Src, SDNode *Dst);
181 /// Indexed (pre/post inc/dec) load matching code for ARM.
182 bool tryARMIndexedLoad(SDNode *N);
183 bool tryT1IndexedLoad(SDNode *N);
184 bool tryT2IndexedLoad(SDNode *N);
185 bool tryMVEIndexedLoad(SDNode *N);
187 /// SelectVLD - Select NEON load intrinsics. NumVecs should be
188 /// 1, 2, 3 or 4. The opcode arrays specify the instructions used for
189 /// loads of D registers and even subregs and odd subregs of Q registers.
190 /// For NumVecs <= 2, QOpcodes1 is not used.
191 void SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
192 const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
193 const uint16_t *QOpcodes1);
195 /// SelectVST - Select NEON store intrinsics. NumVecs should
196 /// be 1, 2, 3 or 4. The opcode arrays specify the instructions used for
197 /// stores of D registers and even subregs and odd subregs of Q registers.
198 /// For NumVecs <= 2, QOpcodes1 is not used.
199 void SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
200 const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
201 const uint16_t *QOpcodes1);
203 /// SelectVLDSTLane - Select NEON load/store lane intrinsics. NumVecs should
204 /// be 2, 3 or 4. The opcode arrays specify the instructions used for
205 /// load/store of D registers and Q registers.
206 void SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
207 unsigned NumVecs, const uint16_t *DOpcodes,
208 const uint16_t *QOpcodes);
210 /// SelectVLDDup - Select NEON load-duplicate intrinsics. NumVecs
211 /// should be 1, 2, 3 or 4. The opcode array specifies the instructions used
212 /// for loading D registers.
213 void SelectVLDDup(SDNode *N, bool IsIntrinsic, bool isUpdating,
214 unsigned NumVecs, const uint16_t *DOpcodes,
215 const uint16_t *QOpcodes0 = nullptr,
216 const uint16_t *QOpcodes1 = nullptr);
218 /// Try to select SBFX/UBFX instructions for ARM.
219 bool tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned);
221 // Select special operations if node forms integer ABS pattern
222 bool tryABSOp(SDNode *N);
224 bool tryReadRegister(SDNode *N);
225 bool tryWriteRegister(SDNode *N);
227 bool tryInlineAsm(SDNode *N);
229 void SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI);
231 void SelectCMP_SWAP(SDNode *N);
233 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
234 /// inline asm expressions.
235 bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
236 std::vector<SDValue> &OutOps) override;
238 // Form pairs of consecutive R, S, D, or Q registers.
239 SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1);
240 SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1);
241 SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1);
242 SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1);
244 // Form sequences of 4 consecutive S, D, or Q registers.
245 SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
246 SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
247 SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
249 // Get the alignment operand for a NEON VLD or VST instruction.
250 SDValue GetVLDSTAlign(SDValue Align, const SDLoc &dl, unsigned NumVecs,
251 bool is64BitVector);
253 /// Returns the number of instructions required to materialize the given
254 /// constant in a register, or 3 if a literal pool load is needed.
255 unsigned ConstantMaterializationCost(unsigned Val) const;
257 /// Checks if N is a multiplication by a constant where we can extract out a
258 /// power of two from the constant so that it can be used in a shift, but only
259 /// if it simplifies the materialization of the constant. Returns true if it
260 /// is, and assigns to PowerOfTwo the power of two that should be extracted
261 /// out and to NewMulConst the new constant to be multiplied by.
262 bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift,
263 unsigned &PowerOfTwo, SDValue &NewMulConst) const;
265 /// Replace N with M in CurDAG, in a way that also ensures that M gets
266 /// selected when N would have been selected.
267 void replaceDAGValue(const SDValue &N, SDValue M);
271 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant
272 /// operand. If so Imm will receive the 32-bit value.
273 static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
274 if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
275 Imm = cast<ConstantSDNode>(N)->getZExtValue();
276 return true;
278 return false;
281 // isInt32Immediate - This method tests to see if a constant operand.
282 // If so Imm will receive the 32 bit value.
283 static bool isInt32Immediate(SDValue N, unsigned &Imm) {
284 return isInt32Immediate(N.getNode(), Imm);
287 // isOpcWithIntImmediate - This method tests to see if the node is a specific
288 // opcode and that it has a immediate integer right operand.
289 // If so Imm will receive the 32 bit value.
290 static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
291 return N->getOpcode() == Opc &&
292 isInt32Immediate(N->getOperand(1).getNode(), Imm);
295 /// Check whether a particular node is a constant value representable as
296 /// (N * Scale) where (N in [\p RangeMin, \p RangeMax).
298 /// \param ScaledConstant [out] - On success, the pre-scaled constant value.
299 static bool isScaledConstantInRange(SDValue Node, int Scale,
300 int RangeMin, int RangeMax,
301 int &ScaledConstant) {
302 assert(Scale > 0 && "Invalid scale!");
304 // Check that this is a constant.
305 const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node);
306 if (!C)
307 return false;
309 ScaledConstant = (int) C->getZExtValue();
310 if ((ScaledConstant % Scale) != 0)
311 return false;
313 ScaledConstant /= Scale;
314 return ScaledConstant >= RangeMin && ScaledConstant < RangeMax;
317 void ARMDAGToDAGISel::PreprocessISelDAG() {
318 if (!Subtarget->hasV6T2Ops())
319 return;
321 bool isThumb2 = Subtarget->isThumb();
322 for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
323 E = CurDAG->allnodes_end(); I != E; ) {
324 SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues.
326 if (N->getOpcode() != ISD::ADD)
327 continue;
329 // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with
330 // leading zeros, followed by consecutive set bits, followed by 1 or 2
331 // trailing zeros, e.g. 1020.
332 // Transform the expression to
333 // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number
334 // of trailing zeros of c2. The left shift would be folded as an shifter
335 // operand of 'add' and the 'and' and 'srl' would become a bits extraction
336 // node (UBFX).
338 SDValue N0 = N->getOperand(0);
339 SDValue N1 = N->getOperand(1);
340 unsigned And_imm = 0;
341 if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) {
342 if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm))
343 std::swap(N0, N1);
345 if (!And_imm)
346 continue;
348 // Check if the AND mask is an immediate of the form: 000.....1111111100
349 unsigned TZ = countTrailingZeros(And_imm);
350 if (TZ != 1 && TZ != 2)
351 // Be conservative here. Shifter operands aren't always free. e.g. On
352 // Swift, left shifter operand of 1 / 2 for free but others are not.
353 // e.g.
354 // ubfx r3, r1, #16, #8
355 // ldr.w r3, [r0, r3, lsl #2]
356 // vs.
357 // mov.w r9, #1020
358 // and.w r2, r9, r1, lsr #14
359 // ldr r2, [r0, r2]
360 continue;
361 And_imm >>= TZ;
362 if (And_imm & (And_imm + 1))
363 continue;
365 // Look for (and (srl X, c1), c2).
366 SDValue Srl = N1.getOperand(0);
367 unsigned Srl_imm = 0;
368 if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) ||
369 (Srl_imm <= 2))
370 continue;
372 // Make sure first operand is not a shifter operand which would prevent
373 // folding of the left shift.
374 SDValue CPTmp0;
375 SDValue CPTmp1;
376 SDValue CPTmp2;
377 if (isThumb2) {
378 if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1))
379 continue;
380 } else {
381 if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) ||
382 SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2))
383 continue;
386 // Now make the transformation.
387 Srl = CurDAG->getNode(ISD::SRL, SDLoc(Srl), MVT::i32,
388 Srl.getOperand(0),
389 CurDAG->getConstant(Srl_imm + TZ, SDLoc(Srl),
390 MVT::i32));
391 N1 = CurDAG->getNode(ISD::AND, SDLoc(N1), MVT::i32,
392 Srl,
393 CurDAG->getConstant(And_imm, SDLoc(Srl), MVT::i32));
394 N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32,
395 N1, CurDAG->getConstant(TZ, SDLoc(Srl), MVT::i32));
396 CurDAG->UpdateNodeOperands(N, N0, N1);
400 /// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS
401 /// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at
402 /// least on current ARM implementations) which should be avoidded.
403 bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const {
404 if (OptLevel == CodeGenOpt::None)
405 return true;
407 if (!Subtarget->hasVMLxHazards())
408 return true;
410 if (!N->hasOneUse())
411 return false;
413 SDNode *Use = *N->use_begin();
414 if (Use->getOpcode() == ISD::CopyToReg)
415 return true;
416 if (Use->isMachineOpcode()) {
417 const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>(
418 CurDAG->getSubtarget().getInstrInfo());
420 const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode());
421 if (MCID.mayStore())
422 return true;
423 unsigned Opcode = MCID.getOpcode();
424 if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
425 return true;
426 // vmlx feeding into another vmlx. We actually want to unfold
427 // the use later in the MLxExpansion pass. e.g.
428 // vmla
429 // vmla (stall 8 cycles)
431 // vmul (5 cycles)
432 // vadd (5 cycles)
433 // vmla
434 // This adds up to about 18 - 19 cycles.
436 // vmla
437 // vmul (stall 4 cycles)
438 // vadd adds up to about 14 cycles.
439 return TII->isFpMLxInstruction(Opcode);
442 return false;
445 bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
446 ARM_AM::ShiftOpc ShOpcVal,
447 unsigned ShAmt) {
448 if (!Subtarget->isLikeA9() && !Subtarget->isSwift())
449 return true;
450 if (Shift.hasOneUse())
451 return true;
452 // R << 2 is free.
453 return ShOpcVal == ARM_AM::lsl &&
454 (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1));
457 unsigned ARMDAGToDAGISel::ConstantMaterializationCost(unsigned Val) const {
458 if (Subtarget->isThumb()) {
459 if (Val <= 255) return 1; // MOV
460 if (Subtarget->hasV6T2Ops() &&
461 (Val <= 0xffff || // MOV
462 ARM_AM::getT2SOImmVal(Val) != -1 || // MOVW
463 ARM_AM::getT2SOImmVal(~Val) != -1)) // MVN
464 return 1;
465 if (Val <= 510) return 2; // MOV + ADDi8
466 if (~Val <= 255) return 2; // MOV + MVN
467 if (ARM_AM::isThumbImmShiftedVal(Val)) return 2; // MOV + LSL
468 } else {
469 if (ARM_AM::getSOImmVal(Val) != -1) return 1; // MOV
470 if (ARM_AM::getSOImmVal(~Val) != -1) return 1; // MVN
471 if (Subtarget->hasV6T2Ops() && Val <= 0xffff) return 1; // MOVW
472 if (ARM_AM::isSOImmTwoPartVal(Val)) return 2; // two instrs
474 if (Subtarget->useMovt()) return 2; // MOVW + MOVT
475 return 3; // Literal pool load
478 bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N,
479 unsigned MaxShift,
480 unsigned &PowerOfTwo,
481 SDValue &NewMulConst) const {
482 assert(N.getOpcode() == ISD::MUL);
483 assert(MaxShift > 0);
485 // If the multiply is used in more than one place then changing the constant
486 // will make other uses incorrect, so don't.
487 if (!N.hasOneUse()) return false;
488 // Check if the multiply is by a constant
489 ConstantSDNode *MulConst = dyn_cast<ConstantSDNode>(N.getOperand(1));
490 if (!MulConst) return false;
491 // If the constant is used in more than one place then modifying it will mean
492 // we need to materialize two constants instead of one, which is a bad idea.
493 if (!MulConst->hasOneUse()) return false;
494 unsigned MulConstVal = MulConst->getZExtValue();
495 if (MulConstVal == 0) return false;
497 // Find the largest power of 2 that MulConstVal is a multiple of
498 PowerOfTwo = MaxShift;
499 while ((MulConstVal % (1 << PowerOfTwo)) != 0) {
500 --PowerOfTwo;
501 if (PowerOfTwo == 0) return false;
504 // Only optimise if the new cost is better
505 unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo);
506 NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc(N), MVT::i32);
507 unsigned OldCost = ConstantMaterializationCost(MulConstVal);
508 unsigned NewCost = ConstantMaterializationCost(NewMulConstVal);
509 return NewCost < OldCost;
512 void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) {
513 CurDAG->RepositionNode(N.getNode()->getIterator(), M.getNode());
514 ReplaceUses(N, M);
517 bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
518 SDValue &BaseReg,
519 SDValue &Opc,
520 bool CheckProfitability) {
521 if (DisableShifterOp)
522 return false;
524 // If N is a multiply-by-constant and it's profitable to extract a shift and
525 // use it in a shifted operand do so.
526 if (N.getOpcode() == ISD::MUL) {
527 unsigned PowerOfTwo = 0;
528 SDValue NewMulConst;
529 if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) {
530 HandleSDNode Handle(N);
531 SDLoc Loc(N);
532 replaceDAGValue(N.getOperand(1), NewMulConst);
533 BaseReg = Handle.getValue();
534 Opc = CurDAG->getTargetConstant(
535 ARM_AM::getSORegOpc(ARM_AM::lsl, PowerOfTwo), Loc, MVT::i32);
536 return true;
540 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
542 // Don't match base register only case. That is matched to a separate
543 // lower complexity pattern with explicit register operand.
544 if (ShOpcVal == ARM_AM::no_shift) return false;
546 BaseReg = N.getOperand(0);
547 unsigned ShImmVal = 0;
548 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
549 if (!RHS) return false;
550 ShImmVal = RHS->getZExtValue() & 31;
551 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
552 SDLoc(N), MVT::i32);
553 return true;
556 bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N,
557 SDValue &BaseReg,
558 SDValue &ShReg,
559 SDValue &Opc,
560 bool CheckProfitability) {
561 if (DisableShifterOp)
562 return false;
564 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
566 // Don't match base register only case. That is matched to a separate
567 // lower complexity pattern with explicit register operand.
568 if (ShOpcVal == ARM_AM::no_shift) return false;
570 BaseReg = N.getOperand(0);
571 unsigned ShImmVal = 0;
572 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
573 if (RHS) return false;
575 ShReg = N.getOperand(1);
576 if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal))
577 return false;
578 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
579 SDLoc(N), MVT::i32);
580 return true;
583 // Determine whether an ISD::OR's operands are suitable to turn the operation
584 // into an addition, which often has more compact encodings.
585 bool ARMDAGToDAGISel::SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out) {
586 assert(Parent->getOpcode() == ISD::OR && "unexpected parent");
587 Out = N;
588 return CurDAG->haveNoCommonBitsSet(N, Parent->getOperand(1));
592 bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
593 SDValue &Base,
594 SDValue &OffImm) {
595 // Match simple R + imm12 operands.
597 // Base only.
598 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
599 !CurDAG->isBaseWithConstantOffset(N)) {
600 if (N.getOpcode() == ISD::FrameIndex) {
601 // Match frame index.
602 int FI = cast<FrameIndexSDNode>(N)->getIndex();
603 Base = CurDAG->getTargetFrameIndex(
604 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
605 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
606 return true;
609 if (N.getOpcode() == ARMISD::Wrapper &&
610 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
611 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
612 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
613 Base = N.getOperand(0);
614 } else
615 Base = N;
616 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
617 return true;
620 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
621 int RHSC = (int)RHS->getSExtValue();
622 if (N.getOpcode() == ISD::SUB)
623 RHSC = -RHSC;
625 if (RHSC > -0x1000 && RHSC < 0x1000) { // 12 bits
626 Base = N.getOperand(0);
627 if (Base.getOpcode() == ISD::FrameIndex) {
628 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
629 Base = CurDAG->getTargetFrameIndex(
630 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
632 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
633 return true;
637 // Base only.
638 Base = N;
639 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
640 return true;
645 bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
646 SDValue &Opc) {
647 if (N.getOpcode() == ISD::MUL &&
648 ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) {
649 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
650 // X * [3,5,9] -> X + X * [2,4,8] etc.
651 int RHSC = (int)RHS->getZExtValue();
652 if (RHSC & 1) {
653 RHSC = RHSC & ~1;
654 ARM_AM::AddrOpc AddSub = ARM_AM::add;
655 if (RHSC < 0) {
656 AddSub = ARM_AM::sub;
657 RHSC = - RHSC;
659 if (isPowerOf2_32(RHSC)) {
660 unsigned ShAmt = Log2_32(RHSC);
661 Base = Offset = N.getOperand(0);
662 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
663 ARM_AM::lsl),
664 SDLoc(N), MVT::i32);
665 return true;
671 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
672 // ISD::OR that is equivalent to an ISD::ADD.
673 !CurDAG->isBaseWithConstantOffset(N))
674 return false;
676 // Leave simple R +/- imm12 operands for LDRi12
677 if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
678 int RHSC;
679 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
680 -0x1000+1, 0x1000, RHSC)) // 12 bits.
681 return false;
684 // Otherwise this is R +/- [possibly shifted] R.
685 ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add;
686 ARM_AM::ShiftOpc ShOpcVal =
687 ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
688 unsigned ShAmt = 0;
690 Base = N.getOperand(0);
691 Offset = N.getOperand(1);
693 if (ShOpcVal != ARM_AM::no_shift) {
694 // Check to see if the RHS of the shift is a constant, if not, we can't fold
695 // it.
696 if (ConstantSDNode *Sh =
697 dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
698 ShAmt = Sh->getZExtValue();
699 if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
700 Offset = N.getOperand(1).getOperand(0);
701 else {
702 ShAmt = 0;
703 ShOpcVal = ARM_AM::no_shift;
705 } else {
706 ShOpcVal = ARM_AM::no_shift;
710 // Try matching (R shl C) + (R).
711 if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
712 !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
713 N.getOperand(0).hasOneUse())) {
714 ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
715 if (ShOpcVal != ARM_AM::no_shift) {
716 // Check to see if the RHS of the shift is a constant, if not, we can't
717 // fold it.
718 if (ConstantSDNode *Sh =
719 dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
720 ShAmt = Sh->getZExtValue();
721 if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
722 Offset = N.getOperand(0).getOperand(0);
723 Base = N.getOperand(1);
724 } else {
725 ShAmt = 0;
726 ShOpcVal = ARM_AM::no_shift;
728 } else {
729 ShOpcVal = ARM_AM::no_shift;
734 // If Offset is a multiply-by-constant and it's profitable to extract a shift
735 // and use it in a shifted operand do so.
736 if (Offset.getOpcode() == ISD::MUL && N.hasOneUse()) {
737 unsigned PowerOfTwo = 0;
738 SDValue NewMulConst;
739 if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) {
740 HandleSDNode Handle(Offset);
741 replaceDAGValue(Offset.getOperand(1), NewMulConst);
742 Offset = Handle.getValue();
743 ShAmt = PowerOfTwo;
744 ShOpcVal = ARM_AM::lsl;
748 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
749 SDLoc(N), MVT::i32);
750 return true;
753 bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
754 SDValue &Offset, SDValue &Opc) {
755 unsigned Opcode = Op->getOpcode();
756 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
757 ? cast<LoadSDNode>(Op)->getAddressingMode()
758 : cast<StoreSDNode>(Op)->getAddressingMode();
759 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
760 ? ARM_AM::add : ARM_AM::sub;
761 int Val;
762 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val))
763 return false;
765 Offset = N;
766 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
767 unsigned ShAmt = 0;
768 if (ShOpcVal != ARM_AM::no_shift) {
769 // Check to see if the RHS of the shift is a constant, if not, we can't fold
770 // it.
771 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
772 ShAmt = Sh->getZExtValue();
773 if (isShifterOpProfitable(N, ShOpcVal, ShAmt))
774 Offset = N.getOperand(0);
775 else {
776 ShAmt = 0;
777 ShOpcVal = ARM_AM::no_shift;
779 } else {
780 ShOpcVal = ARM_AM::no_shift;
784 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
785 SDLoc(N), MVT::i32);
786 return true;
789 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
790 SDValue &Offset, SDValue &Opc) {
791 unsigned Opcode = Op->getOpcode();
792 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
793 ? cast<LoadSDNode>(Op)->getAddressingMode()
794 : cast<StoreSDNode>(Op)->getAddressingMode();
795 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
796 ? ARM_AM::add : ARM_AM::sub;
797 int Val;
798 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
799 if (AddSub == ARM_AM::sub) Val *= -1;
800 Offset = CurDAG->getRegister(0, MVT::i32);
801 Opc = CurDAG->getTargetConstant(Val, SDLoc(Op), MVT::i32);
802 return true;
805 return false;
809 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
810 SDValue &Offset, SDValue &Opc) {
811 unsigned Opcode = Op->getOpcode();
812 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
813 ? cast<LoadSDNode>(Op)->getAddressingMode()
814 : cast<StoreSDNode>(Op)->getAddressingMode();
815 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
816 ? ARM_AM::add : ARM_AM::sub;
817 int Val;
818 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
819 Offset = CurDAG->getRegister(0, MVT::i32);
820 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val,
821 ARM_AM::no_shift),
822 SDLoc(Op), MVT::i32);
823 return true;
826 return false;
829 bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) {
830 Base = N;
831 return true;
834 bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
835 SDValue &Base, SDValue &Offset,
836 SDValue &Opc) {
837 if (N.getOpcode() == ISD::SUB) {
838 // X - C is canonicalize to X + -C, no need to handle it here.
839 Base = N.getOperand(0);
840 Offset = N.getOperand(1);
841 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0), SDLoc(N),
842 MVT::i32);
843 return true;
846 if (!CurDAG->isBaseWithConstantOffset(N)) {
847 Base = N;
848 if (N.getOpcode() == ISD::FrameIndex) {
849 int FI = cast<FrameIndexSDNode>(N)->getIndex();
850 Base = CurDAG->getTargetFrameIndex(
851 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
853 Offset = CurDAG->getRegister(0, MVT::i32);
854 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
855 MVT::i32);
856 return true;
859 // If the RHS is +/- imm8, fold into addr mode.
860 int RHSC;
861 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
862 -256 + 1, 256, RHSC)) { // 8 bits.
863 Base = N.getOperand(0);
864 if (Base.getOpcode() == ISD::FrameIndex) {
865 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
866 Base = CurDAG->getTargetFrameIndex(
867 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
869 Offset = CurDAG->getRegister(0, MVT::i32);
871 ARM_AM::AddrOpc AddSub = ARM_AM::add;
872 if (RHSC < 0) {
873 AddSub = ARM_AM::sub;
874 RHSC = -RHSC;
876 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC), SDLoc(N),
877 MVT::i32);
878 return true;
881 Base = N.getOperand(0);
882 Offset = N.getOperand(1);
883 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
884 MVT::i32);
885 return true;
888 bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N,
889 SDValue &Offset, SDValue &Opc) {
890 unsigned Opcode = Op->getOpcode();
891 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
892 ? cast<LoadSDNode>(Op)->getAddressingMode()
893 : cast<StoreSDNode>(Op)->getAddressingMode();
894 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
895 ? ARM_AM::add : ARM_AM::sub;
896 int Val;
897 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits.
898 Offset = CurDAG->getRegister(0, MVT::i32);
899 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), SDLoc(Op),
900 MVT::i32);
901 return true;
904 Offset = N;
905 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), SDLoc(Op),
906 MVT::i32);
907 return true;
910 bool ARMDAGToDAGISel::IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset,
911 bool FP16) {
912 if (!CurDAG->isBaseWithConstantOffset(N)) {
913 Base = N;
914 if (N.getOpcode() == ISD::FrameIndex) {
915 int FI = cast<FrameIndexSDNode>(N)->getIndex();
916 Base = CurDAG->getTargetFrameIndex(
917 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
918 } else if (N.getOpcode() == ARMISD::Wrapper &&
919 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
920 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
921 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
922 Base = N.getOperand(0);
924 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
925 SDLoc(N), MVT::i32);
926 return true;
929 // If the RHS is +/- imm8, fold into addr mode.
930 int RHSC;
931 const int Scale = FP16 ? 2 : 4;
933 if (isScaledConstantInRange(N.getOperand(1), Scale, -255, 256, RHSC)) {
934 Base = N.getOperand(0);
935 if (Base.getOpcode() == ISD::FrameIndex) {
936 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
937 Base = CurDAG->getTargetFrameIndex(
938 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
941 ARM_AM::AddrOpc AddSub = ARM_AM::add;
942 if (RHSC < 0) {
943 AddSub = ARM_AM::sub;
944 RHSC = -RHSC;
947 if (FP16)
948 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(AddSub, RHSC),
949 SDLoc(N), MVT::i32);
950 else
951 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC),
952 SDLoc(N), MVT::i32);
954 return true;
957 Base = N;
959 if (FP16)
960 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(ARM_AM::add, 0),
961 SDLoc(N), MVT::i32);
962 else
963 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
964 SDLoc(N), MVT::i32);
966 return true;
969 bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
970 SDValue &Base, SDValue &Offset) {
971 return IsAddressingMode5(N, Base, Offset, /*FP16=*/ false);
974 bool ARMDAGToDAGISel::SelectAddrMode5FP16(SDValue N,
975 SDValue &Base, SDValue &Offset) {
976 return IsAddressingMode5(N, Base, Offset, /*FP16=*/ true);
979 bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
980 SDValue &Align) {
981 Addr = N;
983 unsigned Alignment = 0;
985 MemSDNode *MemN = cast<MemSDNode>(Parent);
987 if (isa<LSBaseSDNode>(MemN) ||
988 ((MemN->getOpcode() == ARMISD::VST1_UPD ||
989 MemN->getOpcode() == ARMISD::VLD1_UPD) &&
990 MemN->getConstantOperandVal(MemN->getNumOperands() - 1) == 1)) {
991 // This case occurs only for VLD1-lane/dup and VST1-lane instructions.
992 // The maximum alignment is equal to the memory size being referenced.
993 unsigned MMOAlign = MemN->getAlignment();
994 unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8;
995 if (MMOAlign >= MemSize && MemSize > 1)
996 Alignment = MemSize;
997 } else {
998 // All other uses of addrmode6 are for intrinsics. For now just record
999 // the raw alignment value; it will be refined later based on the legal
1000 // alignment operands for the intrinsic.
1001 Alignment = MemN->getAlignment();
1004 Align = CurDAG->getTargetConstant(Alignment, SDLoc(N), MVT::i32);
1005 return true;
1008 bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N,
1009 SDValue &Offset) {
1010 LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op);
1011 ISD::MemIndexedMode AM = LdSt->getAddressingMode();
1012 if (AM != ISD::POST_INC)
1013 return false;
1014 Offset = N;
1015 if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) {
1016 if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits())
1017 Offset = CurDAG->getRegister(0, MVT::i32);
1019 return true;
1022 bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N,
1023 SDValue &Offset, SDValue &Label) {
1024 if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
1025 Offset = N.getOperand(0);
1026 SDValue N1 = N.getOperand(1);
1027 Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
1028 SDLoc(N), MVT::i32);
1029 return true;
1032 return false;
1036 //===----------------------------------------------------------------------===//
1037 // Thumb Addressing Modes
1038 //===----------------------------------------------------------------------===//
1040 static bool shouldUseZeroOffsetLdSt(SDValue N) {
1041 // Negative numbers are difficult to materialise in thumb1. If we are
1042 // selecting the add of a negative, instead try to select ri with a zero
1043 // offset, so create the add node directly which will become a sub.
1044 if (N.getOpcode() != ISD::ADD)
1045 return false;
1047 // Look for an imm which is not legal for ld/st, but is legal for sub.
1048 if (auto C = dyn_cast<ConstantSDNode>(N.getOperand(1)))
1049 return C->getSExtValue() < 0 && C->getSExtValue() >= -255;
1051 return false;
1054 bool ARMDAGToDAGISel::SelectThumbAddrModeRRSext(SDValue N, SDValue &Base,
1055 SDValue &Offset) {
1056 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) {
1057 ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N);
1058 if (!NC || !NC->isNullValue())
1059 return false;
1061 Base = Offset = N;
1062 return true;
1065 Base = N.getOperand(0);
1066 Offset = N.getOperand(1);
1067 return true;
1070 bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N, SDValue &Base,
1071 SDValue &Offset) {
1072 if (shouldUseZeroOffsetLdSt(N))
1073 return false; // Select ri instead
1074 return SelectThumbAddrModeRRSext(N, Base, Offset);
1077 bool
1078 ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
1079 SDValue &Base, SDValue &OffImm) {
1080 if (shouldUseZeroOffsetLdSt(N)) {
1081 Base = N;
1082 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1083 return true;
1086 if (!CurDAG->isBaseWithConstantOffset(N)) {
1087 if (N.getOpcode() == ISD::ADD) {
1088 return false; // We want to select register offset instead
1089 } else if (N.getOpcode() == ARMISD::Wrapper &&
1090 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1091 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1092 N.getOperand(0).getOpcode() != ISD::TargetConstantPool &&
1093 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1094 Base = N.getOperand(0);
1095 } else {
1096 Base = N;
1099 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1100 return true;
1103 // If the RHS is + imm5 * scale, fold into addr mode.
1104 int RHSC;
1105 if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) {
1106 Base = N.getOperand(0);
1107 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1108 return true;
1111 // Offset is too large, so use register offset instead.
1112 return false;
1115 bool
1116 ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
1117 SDValue &OffImm) {
1118 return SelectThumbAddrModeImm5S(N, 4, Base, OffImm);
1121 bool
1122 ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
1123 SDValue &OffImm) {
1124 return SelectThumbAddrModeImm5S(N, 2, Base, OffImm);
1127 bool
1128 ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
1129 SDValue &OffImm) {
1130 return SelectThumbAddrModeImm5S(N, 1, Base, OffImm);
1133 bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
1134 SDValue &Base, SDValue &OffImm) {
1135 if (N.getOpcode() == ISD::FrameIndex) {
1136 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1137 // Only multiples of 4 are allowed for the offset, so the frame object
1138 // alignment must be at least 4.
1139 MachineFrameInfo &MFI = MF->getFrameInfo();
1140 if (MFI.getObjectAlignment(FI) < 4)
1141 MFI.setObjectAlignment(FI, 4);
1142 Base = CurDAG->getTargetFrameIndex(
1143 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1144 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1145 return true;
1148 if (!CurDAG->isBaseWithConstantOffset(N))
1149 return false;
1151 if (N.getOperand(0).getOpcode() == ISD::FrameIndex) {
1152 // If the RHS is + imm8 * scale, fold into addr mode.
1153 int RHSC;
1154 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) {
1155 Base = N.getOperand(0);
1156 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1157 // Make sure the offset is inside the object, or we might fail to
1158 // allocate an emergency spill slot. (An out-of-range access is UB, but
1159 // it could show up anyway.)
1160 MachineFrameInfo &MFI = MF->getFrameInfo();
1161 if (RHSC * 4 < MFI.getObjectSize(FI)) {
1162 // For LHS+RHS to result in an offset that's a multiple of 4 the object
1163 // indexed by the LHS must be 4-byte aligned.
1164 if (!MFI.isFixedObjectIndex(FI) && MFI.getObjectAlignment(FI) < 4)
1165 MFI.setObjectAlignment(FI, 4);
1166 if (MFI.getObjectAlignment(FI) >= 4) {
1167 Base = CurDAG->getTargetFrameIndex(
1168 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1169 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1170 return true;
1176 return false;
1180 //===----------------------------------------------------------------------===//
1181 // Thumb 2 Addressing Modes
1182 //===----------------------------------------------------------------------===//
1185 bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
1186 SDValue &Base, SDValue &OffImm) {
1187 // Match simple R + imm12 operands.
1189 // Base only.
1190 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1191 !CurDAG->isBaseWithConstantOffset(N)) {
1192 if (N.getOpcode() == ISD::FrameIndex) {
1193 // Match frame index.
1194 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1195 Base = CurDAG->getTargetFrameIndex(
1196 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1197 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1198 return true;
1201 if (N.getOpcode() == ARMISD::Wrapper &&
1202 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1203 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1204 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1205 Base = N.getOperand(0);
1206 if (Base.getOpcode() == ISD::TargetConstantPool)
1207 return false; // We want to select t2LDRpci instead.
1208 } else
1209 Base = N;
1210 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1211 return true;
1214 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1215 if (SelectT2AddrModeImm8(N, Base, OffImm))
1216 // Let t2LDRi8 handle (R - imm8).
1217 return false;
1219 int RHSC = (int)RHS->getZExtValue();
1220 if (N.getOpcode() == ISD::SUB)
1221 RHSC = -RHSC;
1223 if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned)
1224 Base = N.getOperand(0);
1225 if (Base.getOpcode() == ISD::FrameIndex) {
1226 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1227 Base = CurDAG->getTargetFrameIndex(
1228 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1230 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1231 return true;
1235 // Base only.
1236 Base = N;
1237 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1238 return true;
1241 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
1242 SDValue &Base, SDValue &OffImm) {
1243 // Match simple R - imm8 operands.
1244 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1245 !CurDAG->isBaseWithConstantOffset(N))
1246 return false;
1248 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1249 int RHSC = (int)RHS->getSExtValue();
1250 if (N.getOpcode() == ISD::SUB)
1251 RHSC = -RHSC;
1253 if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative)
1254 Base = N.getOperand(0);
1255 if (Base.getOpcode() == ISD::FrameIndex) {
1256 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1257 Base = CurDAG->getTargetFrameIndex(
1258 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1260 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1261 return true;
1265 return false;
1268 bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
1269 SDValue &OffImm){
1270 unsigned Opcode = Op->getOpcode();
1271 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
1272 ? cast<LoadSDNode>(Op)->getAddressingMode()
1273 : cast<StoreSDNode>(Op)->getAddressingMode();
1274 int RHSC;
1275 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits.
1276 OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
1277 ? CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32)
1278 : CurDAG->getTargetConstant(-RHSC, SDLoc(N), MVT::i32);
1279 return true;
1282 return false;
1285 template <unsigned Shift>
1286 bool ARMDAGToDAGISel::SelectT2AddrModeImm7(SDValue N, SDValue &Base,
1287 SDValue &OffImm) {
1288 if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) {
1289 int RHSC;
1290 if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -0x7f, 0x80,
1291 RHSC)) {
1292 Base = N.getOperand(0);
1293 if (Base.getOpcode() == ISD::FrameIndex) {
1294 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1295 Base = CurDAG->getTargetFrameIndex(
1296 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1299 if (N.getOpcode() == ISD::SUB)
1300 RHSC = -RHSC;
1301 OffImm =
1302 CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32);
1303 return true;
1307 // Base only.
1308 Base = N;
1309 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1310 return true;
1313 template <unsigned Shift>
1314 bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N,
1315 SDValue &OffImm) {
1316 return SelectT2AddrModeImm7Offset(Op, N, OffImm, Shift);
1319 bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N,
1320 SDValue &OffImm,
1321 unsigned Shift) {
1322 unsigned Opcode = Op->getOpcode();
1323 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
1324 ? cast<LoadSDNode>(Op)->getAddressingMode()
1325 : cast<StoreSDNode>(Op)->getAddressingMode();
1326 int RHSC;
1327 if (isScaledConstantInRange(N, 1 << Shift, 0, 0x80, RHSC)) { // 7 bits.
1328 OffImm =
1329 ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
1330 ? CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32)
1331 : CurDAG->getTargetConstant(-RHSC * (1 << Shift), SDLoc(N),
1332 MVT::i32);
1333 return true;
1335 return false;
1338 bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
1339 SDValue &Base,
1340 SDValue &OffReg, SDValue &ShImm) {
1341 // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12.
1342 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N))
1343 return false;
1345 // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8.
1346 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1347 int RHSC = (int)RHS->getZExtValue();
1348 if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned)
1349 return false;
1350 else if (RHSC < 0 && RHSC >= -255) // 8 bits
1351 return false;
1354 // Look for (R + R) or (R + (R << [1,2,3])).
1355 unsigned ShAmt = 0;
1356 Base = N.getOperand(0);
1357 OffReg = N.getOperand(1);
1359 // Swap if it is ((R << c) + R).
1360 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg.getOpcode());
1361 if (ShOpcVal != ARM_AM::lsl) {
1362 ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode());
1363 if (ShOpcVal == ARM_AM::lsl)
1364 std::swap(Base, OffReg);
1367 if (ShOpcVal == ARM_AM::lsl) {
1368 // Check to see if the RHS of the shift is a constant, if not, we can't fold
1369 // it.
1370 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) {
1371 ShAmt = Sh->getZExtValue();
1372 if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt))
1373 OffReg = OffReg.getOperand(0);
1374 else {
1375 ShAmt = 0;
1380 // If OffReg is a multiply-by-constant and it's profitable to extract a shift
1381 // and use it in a shifted operand do so.
1382 if (OffReg.getOpcode() == ISD::MUL && N.hasOneUse()) {
1383 unsigned PowerOfTwo = 0;
1384 SDValue NewMulConst;
1385 if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) {
1386 HandleSDNode Handle(OffReg);
1387 replaceDAGValue(OffReg.getOperand(1), NewMulConst);
1388 OffReg = Handle.getValue();
1389 ShAmt = PowerOfTwo;
1393 ShImm = CurDAG->getTargetConstant(ShAmt, SDLoc(N), MVT::i32);
1395 return true;
1398 bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base,
1399 SDValue &OffImm) {
1400 // This *must* succeed since it's used for the irreplaceable ldrex and strex
1401 // instructions.
1402 Base = N;
1403 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1405 if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N))
1406 return true;
1408 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
1409 if (!RHS)
1410 return true;
1412 uint32_t RHSC = (int)RHS->getZExtValue();
1413 if (RHSC > 1020 || RHSC % 4 != 0)
1414 return true;
1416 Base = N.getOperand(0);
1417 if (Base.getOpcode() == ISD::FrameIndex) {
1418 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1419 Base = CurDAG->getTargetFrameIndex(
1420 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1423 OffImm = CurDAG->getTargetConstant(RHSC/4, SDLoc(N), MVT::i32);
1424 return true;
1427 //===--------------------------------------------------------------------===//
1429 /// getAL - Returns a ARMCC::AL immediate node.
1430 static inline SDValue getAL(SelectionDAG *CurDAG, const SDLoc &dl) {
1431 return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32);
1434 void ARMDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
1435 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
1436 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
1439 bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) {
1440 LoadSDNode *LD = cast<LoadSDNode>(N);
1441 ISD::MemIndexedMode AM = LD->getAddressingMode();
1442 if (AM == ISD::UNINDEXED)
1443 return false;
1445 EVT LoadedVT = LD->getMemoryVT();
1446 SDValue Offset, AMOpc;
1447 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1448 unsigned Opcode = 0;
1449 bool Match = false;
1450 if (LoadedVT == MVT::i32 && isPre &&
1451 SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1452 Opcode = ARM::LDR_PRE_IMM;
1453 Match = true;
1454 } else if (LoadedVT == MVT::i32 && !isPre &&
1455 SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1456 Opcode = ARM::LDR_POST_IMM;
1457 Match = true;
1458 } else if (LoadedVT == MVT::i32 &&
1459 SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1460 Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG;
1461 Match = true;
1463 } else if (LoadedVT == MVT::i16 &&
1464 SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1465 Match = true;
1466 Opcode = (LD->getExtensionType() == ISD::SEXTLOAD)
1467 ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST)
1468 : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST);
1469 } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) {
1470 if (LD->getExtensionType() == ISD::SEXTLOAD) {
1471 if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1472 Match = true;
1473 Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST;
1475 } else {
1476 if (isPre &&
1477 SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1478 Match = true;
1479 Opcode = ARM::LDRB_PRE_IMM;
1480 } else if (!isPre &&
1481 SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1482 Match = true;
1483 Opcode = ARM::LDRB_POST_IMM;
1484 } else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1485 Match = true;
1486 Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG;
1491 if (Match) {
1492 if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) {
1493 SDValue Chain = LD->getChain();
1494 SDValue Base = LD->getBasePtr();
1495 SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, SDLoc(N)),
1496 CurDAG->getRegister(0, MVT::i32), Chain };
1497 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1498 MVT::Other, Ops);
1499 transferMemOperands(N, New);
1500 ReplaceNode(N, New);
1501 return true;
1502 } else {
1503 SDValue Chain = LD->getChain();
1504 SDValue Base = LD->getBasePtr();
1505 SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, SDLoc(N)),
1506 CurDAG->getRegister(0, MVT::i32), Chain };
1507 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1508 MVT::Other, Ops);
1509 transferMemOperands(N, New);
1510 ReplaceNode(N, New);
1511 return true;
1515 return false;
1518 bool ARMDAGToDAGISel::tryT1IndexedLoad(SDNode *N) {
1519 LoadSDNode *LD = cast<LoadSDNode>(N);
1520 EVT LoadedVT = LD->getMemoryVT();
1521 ISD::MemIndexedMode AM = LD->getAddressingMode();
1522 if (AM != ISD::POST_INC || LD->getExtensionType() != ISD::NON_EXTLOAD ||
1523 LoadedVT.getSimpleVT().SimpleTy != MVT::i32)
1524 return false;
1526 auto *COffs = dyn_cast<ConstantSDNode>(LD->getOffset());
1527 if (!COffs || COffs->getZExtValue() != 4)
1528 return false;
1530 // A T1 post-indexed load is just a single register LDM: LDM r0!, {r1}.
1531 // The encoding of LDM is not how the rest of ISel expects a post-inc load to
1532 // look however, so we use a pseudo here and switch it for a tLDMIA_UPD after
1533 // ISel.
1534 SDValue Chain = LD->getChain();
1535 SDValue Base = LD->getBasePtr();
1536 SDValue Ops[]= { Base, getAL(CurDAG, SDLoc(N)),
1537 CurDAG->getRegister(0, MVT::i32), Chain };
1538 SDNode *New = CurDAG->getMachineNode(ARM::tLDR_postidx, SDLoc(N), MVT::i32,
1539 MVT::i32, MVT::Other, Ops);
1540 transferMemOperands(N, New);
1541 ReplaceNode(N, New);
1542 return true;
1545 bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) {
1546 LoadSDNode *LD = cast<LoadSDNode>(N);
1547 ISD::MemIndexedMode AM = LD->getAddressingMode();
1548 if (AM == ISD::UNINDEXED)
1549 return false;
1551 EVT LoadedVT = LD->getMemoryVT();
1552 bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1553 SDValue Offset;
1554 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1555 unsigned Opcode = 0;
1556 bool Match = false;
1557 if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) {
1558 switch (LoadedVT.getSimpleVT().SimpleTy) {
1559 case MVT::i32:
1560 Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST;
1561 break;
1562 case MVT::i16:
1563 if (isSExtLd)
1564 Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST;
1565 else
1566 Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST;
1567 break;
1568 case MVT::i8:
1569 case MVT::i1:
1570 if (isSExtLd)
1571 Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST;
1572 else
1573 Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST;
1574 break;
1575 default:
1576 return false;
1578 Match = true;
1581 if (Match) {
1582 SDValue Chain = LD->getChain();
1583 SDValue Base = LD->getBasePtr();
1584 SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)),
1585 CurDAG->getRegister(0, MVT::i32), Chain };
1586 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1587 MVT::Other, Ops);
1588 transferMemOperands(N, New);
1589 ReplaceNode(N, New);
1590 return true;
1593 return false;
1596 bool ARMDAGToDAGISel::tryMVEIndexedLoad(SDNode *N) {
1597 LoadSDNode *LD = cast<LoadSDNode>(N);
1598 ISD::MemIndexedMode AM = LD->getAddressingMode();
1599 if (AM == ISD::UNINDEXED)
1600 return false;
1601 EVT LoadedVT = LD->getMemoryVT();
1602 if (!LoadedVT.isVector())
1603 return false;
1604 bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1605 SDValue Offset;
1606 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1607 unsigned Opcode = 0;
1608 unsigned Align = LD->getAlignment();
1609 bool IsLE = Subtarget->isLittle();
1611 if (Align >= 2 && LoadedVT == MVT::v4i16 &&
1612 SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 1)) {
1613 if (isSExtLd)
1614 Opcode = isPre ? ARM::MVE_VLDRHS32_pre : ARM::MVE_VLDRHS32_post;
1615 else
1616 Opcode = isPre ? ARM::MVE_VLDRHU32_pre : ARM::MVE_VLDRHU32_post;
1617 } else if (LoadedVT == MVT::v8i8 &&
1618 SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 0)) {
1619 if (isSExtLd)
1620 Opcode = isPre ? ARM::MVE_VLDRBS16_pre : ARM::MVE_VLDRBS16_post;
1621 else
1622 Opcode = isPre ? ARM::MVE_VLDRBU16_pre : ARM::MVE_VLDRBU16_post;
1623 } else if (LoadedVT == MVT::v4i8 &&
1624 SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 0)) {
1625 if (isSExtLd)
1626 Opcode = isPre ? ARM::MVE_VLDRBS32_pre : ARM::MVE_VLDRBS32_post;
1627 else
1628 Opcode = isPre ? ARM::MVE_VLDRBU32_pre : ARM::MVE_VLDRBU32_post;
1629 } else if (Align >= 4 &&
1630 (IsLE || LoadedVT == MVT::v4i32 || LoadedVT == MVT::v4f32) &&
1631 SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 2))
1632 Opcode = isPre ? ARM::MVE_VLDRWU32_pre : ARM::MVE_VLDRWU32_post;
1633 else if (Align >= 2 &&
1634 (IsLE || LoadedVT == MVT::v8i16 || LoadedVT == MVT::v8f16) &&
1635 SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 1))
1636 Opcode = isPre ? ARM::MVE_VLDRHU16_pre : ARM::MVE_VLDRHU16_post;
1637 else if ((IsLE || LoadedVT == MVT::v16i8) &&
1638 SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 0))
1639 Opcode = isPre ? ARM::MVE_VLDRBU8_pre : ARM::MVE_VLDRBU8_post;
1640 else
1641 return false;
1643 SDValue Chain = LD->getChain();
1644 SDValue Base = LD->getBasePtr();
1645 SDValue Ops[] = {Base, Offset,
1646 CurDAG->getTargetConstant(ARMVCC::None, SDLoc(N), MVT::i32),
1647 CurDAG->getRegister(0, MVT::i32), Chain};
1648 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), LD->getValueType(0),
1649 MVT::i32, MVT::Other, Ops);
1650 transferMemOperands(N, New);
1651 ReplaceUses(SDValue(N, 0), SDValue(New, 1));
1652 ReplaceUses(SDValue(N, 1), SDValue(New, 0));
1653 ReplaceUses(SDValue(N, 2), SDValue(New, 2));
1654 CurDAG->RemoveDeadNode(N);
1655 return true;
1658 /// Form a GPRPair pseudo register from a pair of GPR regs.
1659 SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) {
1660 SDLoc dl(V0.getNode());
1661 SDValue RegClass =
1662 CurDAG->getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
1663 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
1664 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
1665 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1666 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1669 /// Form a D register from a pair of S registers.
1670 SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1671 SDLoc dl(V0.getNode());
1672 SDValue RegClass =
1673 CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, dl, MVT::i32);
1674 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1675 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1676 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1677 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1680 /// Form a quad register from a pair of D registers.
1681 SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1682 SDLoc dl(V0.getNode());
1683 SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, dl,
1684 MVT::i32);
1685 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1686 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1687 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1688 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1691 /// Form 4 consecutive D registers from a pair of Q registers.
1692 SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1693 SDLoc dl(V0.getNode());
1694 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1695 MVT::i32);
1696 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1697 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1698 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1699 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1702 /// Form 4 consecutive S registers.
1703 SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1,
1704 SDValue V2, SDValue V3) {
1705 SDLoc dl(V0.getNode());
1706 SDValue RegClass =
1707 CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, dl, MVT::i32);
1708 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1709 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1710 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, dl, MVT::i32);
1711 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, dl, MVT::i32);
1712 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1713 V2, SubReg2, V3, SubReg3 };
1714 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1717 /// Form 4 consecutive D registers.
1718 SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1,
1719 SDValue V2, SDValue V3) {
1720 SDLoc dl(V0.getNode());
1721 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1722 MVT::i32);
1723 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1724 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1725 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, dl, MVT::i32);
1726 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, dl, MVT::i32);
1727 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1728 V2, SubReg2, V3, SubReg3 };
1729 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1732 /// Form 4 consecutive Q registers.
1733 SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1,
1734 SDValue V2, SDValue V3) {
1735 SDLoc dl(V0.getNode());
1736 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, dl,
1737 MVT::i32);
1738 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1739 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1740 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, dl, MVT::i32);
1741 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, dl, MVT::i32);
1742 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1743 V2, SubReg2, V3, SubReg3 };
1744 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1747 /// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
1748 /// of a NEON VLD or VST instruction. The supported values depend on the
1749 /// number of registers being loaded.
1750 SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, const SDLoc &dl,
1751 unsigned NumVecs, bool is64BitVector) {
1752 unsigned NumRegs = NumVecs;
1753 if (!is64BitVector && NumVecs < 3)
1754 NumRegs *= 2;
1756 unsigned Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
1757 if (Alignment >= 32 && NumRegs == 4)
1758 Alignment = 32;
1759 else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4))
1760 Alignment = 16;
1761 else if (Alignment >= 8)
1762 Alignment = 8;
1763 else
1764 Alignment = 0;
1766 return CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
1769 static bool isVLDfixed(unsigned Opc)
1771 switch (Opc) {
1772 default: return false;
1773 case ARM::VLD1d8wb_fixed : return true;
1774 case ARM::VLD1d16wb_fixed : return true;
1775 case ARM::VLD1d64Qwb_fixed : return true;
1776 case ARM::VLD1d32wb_fixed : return true;
1777 case ARM::VLD1d64wb_fixed : return true;
1778 case ARM::VLD1d64TPseudoWB_fixed : return true;
1779 case ARM::VLD1d64QPseudoWB_fixed : return true;
1780 case ARM::VLD1q8wb_fixed : return true;
1781 case ARM::VLD1q16wb_fixed : return true;
1782 case ARM::VLD1q32wb_fixed : return true;
1783 case ARM::VLD1q64wb_fixed : return true;
1784 case ARM::VLD1DUPd8wb_fixed : return true;
1785 case ARM::VLD1DUPd16wb_fixed : return true;
1786 case ARM::VLD1DUPd32wb_fixed : return true;
1787 case ARM::VLD1DUPq8wb_fixed : return true;
1788 case ARM::VLD1DUPq16wb_fixed : return true;
1789 case ARM::VLD1DUPq32wb_fixed : return true;
1790 case ARM::VLD2d8wb_fixed : return true;
1791 case ARM::VLD2d16wb_fixed : return true;
1792 case ARM::VLD2d32wb_fixed : return true;
1793 case ARM::VLD2q8PseudoWB_fixed : return true;
1794 case ARM::VLD2q16PseudoWB_fixed : return true;
1795 case ARM::VLD2q32PseudoWB_fixed : return true;
1796 case ARM::VLD2DUPd8wb_fixed : return true;
1797 case ARM::VLD2DUPd16wb_fixed : return true;
1798 case ARM::VLD2DUPd32wb_fixed : return true;
1802 static bool isVSTfixed(unsigned Opc)
1804 switch (Opc) {
1805 default: return false;
1806 case ARM::VST1d8wb_fixed : return true;
1807 case ARM::VST1d16wb_fixed : return true;
1808 case ARM::VST1d32wb_fixed : return true;
1809 case ARM::VST1d64wb_fixed : return true;
1810 case ARM::VST1q8wb_fixed : return true;
1811 case ARM::VST1q16wb_fixed : return true;
1812 case ARM::VST1q32wb_fixed : return true;
1813 case ARM::VST1q64wb_fixed : return true;
1814 case ARM::VST1d64TPseudoWB_fixed : return true;
1815 case ARM::VST1d64QPseudoWB_fixed : return true;
1816 case ARM::VST2d8wb_fixed : return true;
1817 case ARM::VST2d16wb_fixed : return true;
1818 case ARM::VST2d32wb_fixed : return true;
1819 case ARM::VST2q8PseudoWB_fixed : return true;
1820 case ARM::VST2q16PseudoWB_fixed : return true;
1821 case ARM::VST2q32PseudoWB_fixed : return true;
1825 // Get the register stride update opcode of a VLD/VST instruction that
1826 // is otherwise equivalent to the given fixed stride updating instruction.
1827 static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
1828 assert((isVLDfixed(Opc) || isVSTfixed(Opc))
1829 && "Incorrect fixed stride updating instruction.");
1830 switch (Opc) {
1831 default: break;
1832 case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register;
1833 case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register;
1834 case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register;
1835 case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register;
1836 case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register;
1837 case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register;
1838 case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register;
1839 case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register;
1840 case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register;
1841 case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register;
1842 case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register;
1843 case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register;
1844 case ARM::VLD1DUPd8wb_fixed : return ARM::VLD1DUPd8wb_register;
1845 case ARM::VLD1DUPd16wb_fixed : return ARM::VLD1DUPd16wb_register;
1846 case ARM::VLD1DUPd32wb_fixed : return ARM::VLD1DUPd32wb_register;
1847 case ARM::VLD1DUPq8wb_fixed : return ARM::VLD1DUPq8wb_register;
1848 case ARM::VLD1DUPq16wb_fixed : return ARM::VLD1DUPq16wb_register;
1849 case ARM::VLD1DUPq32wb_fixed : return ARM::VLD1DUPq32wb_register;
1851 case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register;
1852 case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register;
1853 case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register;
1854 case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register;
1855 case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register;
1856 case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register;
1857 case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register;
1858 case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register;
1859 case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register;
1860 case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register;
1862 case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register;
1863 case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register;
1864 case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register;
1865 case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register;
1866 case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register;
1867 case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register;
1869 case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register;
1870 case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register;
1871 case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register;
1872 case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register;
1873 case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register;
1874 case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register;
1876 case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register;
1877 case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register;
1878 case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register;
1880 return Opc; // If not one we handle, return it unchanged.
1883 /// Returns true if the given increment is a Constant known to be equal to the
1884 /// access size performed by a NEON load/store. This means the "[rN]!" form can
1885 /// be used.
1886 static bool isPerfectIncrement(SDValue Inc, EVT VecTy, unsigned NumVecs) {
1887 auto C = dyn_cast<ConstantSDNode>(Inc);
1888 return C && C->getZExtValue() == VecTy.getSizeInBits() / 8 * NumVecs;
1891 void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
1892 const uint16_t *DOpcodes,
1893 const uint16_t *QOpcodes0,
1894 const uint16_t *QOpcodes1) {
1895 assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
1896 SDLoc dl(N);
1898 SDValue MemAddr, Align;
1899 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
1900 // nodes are not intrinsics.
1901 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
1902 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
1903 return;
1905 SDValue Chain = N->getOperand(0);
1906 EVT VT = N->getValueType(0);
1907 bool is64BitVector = VT.is64BitVector();
1908 Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
1910 unsigned OpcodeIndex;
1911 switch (VT.getSimpleVT().SimpleTy) {
1912 default: llvm_unreachable("unhandled vld type");
1913 // Double-register operations:
1914 case MVT::v8i8: OpcodeIndex = 0; break;
1915 case MVT::v4f16:
1916 case MVT::v4i16: OpcodeIndex = 1; break;
1917 case MVT::v2f32:
1918 case MVT::v2i32: OpcodeIndex = 2; break;
1919 case MVT::v1i64: OpcodeIndex = 3; break;
1920 // Quad-register operations:
1921 case MVT::v16i8: OpcodeIndex = 0; break;
1922 case MVT::v8f16:
1923 case MVT::v8i16: OpcodeIndex = 1; break;
1924 case MVT::v4f32:
1925 case MVT::v4i32: OpcodeIndex = 2; break;
1926 case MVT::v2f64:
1927 case MVT::v2i64: OpcodeIndex = 3; break;
1930 EVT ResTy;
1931 if (NumVecs == 1)
1932 ResTy = VT;
1933 else {
1934 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
1935 if (!is64BitVector)
1936 ResTyElts *= 2;
1937 ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
1939 std::vector<EVT> ResTys;
1940 ResTys.push_back(ResTy);
1941 if (isUpdating)
1942 ResTys.push_back(MVT::i32);
1943 ResTys.push_back(MVT::Other);
1945 SDValue Pred = getAL(CurDAG, dl);
1946 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
1947 SDNode *VLd;
1948 SmallVector<SDValue, 7> Ops;
1950 // Double registers and VLD1/VLD2 quad registers are directly supported.
1951 if (is64BitVector || NumVecs <= 2) {
1952 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
1953 QOpcodes0[OpcodeIndex]);
1954 Ops.push_back(MemAddr);
1955 Ops.push_back(Align);
1956 if (isUpdating) {
1957 SDValue Inc = N->getOperand(AddrOpIdx + 1);
1958 bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
1959 if (!IsImmUpdate) {
1960 // We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
1961 // check for the opcode rather than the number of vector elements.
1962 if (isVLDfixed(Opc))
1963 Opc = getVLDSTRegisterUpdateOpcode(Opc);
1964 Ops.push_back(Inc);
1965 // VLD1/VLD2 fixed increment does not need Reg0 so only include it in
1966 // the operands if not such an opcode.
1967 } else if (!isVLDfixed(Opc))
1968 Ops.push_back(Reg0);
1970 Ops.push_back(Pred);
1971 Ops.push_back(Reg0);
1972 Ops.push_back(Chain);
1973 VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1975 } else {
1976 // Otherwise, quad registers are loaded with two separate instructions,
1977 // where one loads the even registers and the other loads the odd registers.
1978 EVT AddrTy = MemAddr.getValueType();
1980 // Load the even subregs. This is always an updating load, so that it
1981 // provides the address to the second load for the odd subregs.
1982 SDValue ImplDef =
1983 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
1984 const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
1985 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
1986 ResTy, AddrTy, MVT::Other, OpsA);
1987 Chain = SDValue(VLdA, 2);
1989 // Load the odd subregs.
1990 Ops.push_back(SDValue(VLdA, 1));
1991 Ops.push_back(Align);
1992 if (isUpdating) {
1993 SDValue Inc = N->getOperand(AddrOpIdx + 1);
1994 assert(isa<ConstantSDNode>(Inc.getNode()) &&
1995 "only constant post-increment update allowed for VLD3/4");
1996 (void)Inc;
1997 Ops.push_back(Reg0);
1999 Ops.push_back(SDValue(VLdA, 0));
2000 Ops.push_back(Pred);
2001 Ops.push_back(Reg0);
2002 Ops.push_back(Chain);
2003 VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops);
2006 // Transfer memoperands.
2007 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2008 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLd), {MemOp});
2010 if (NumVecs == 1) {
2011 ReplaceNode(N, VLd);
2012 return;
2015 // Extract out the subregisters.
2016 SDValue SuperReg = SDValue(VLd, 0);
2017 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
2018 ARM::qsub_3 == ARM::qsub_0 + 3,
2019 "Unexpected subreg numbering");
2020 unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0);
2021 for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2022 ReplaceUses(SDValue(N, Vec),
2023 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
2024 ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
2025 if (isUpdating)
2026 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2));
2027 CurDAG->RemoveDeadNode(N);
2030 void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
2031 const uint16_t *DOpcodes,
2032 const uint16_t *QOpcodes0,
2033 const uint16_t *QOpcodes1) {
2034 assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
2035 SDLoc dl(N);
2037 SDValue MemAddr, Align;
2038 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
2039 // nodes are not intrinsics.
2040 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2041 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2042 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2043 return;
2045 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2047 SDValue Chain = N->getOperand(0);
2048 EVT VT = N->getOperand(Vec0Idx).getValueType();
2049 bool is64BitVector = VT.is64BitVector();
2050 Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
2052 unsigned OpcodeIndex;
2053 switch (VT.getSimpleVT().SimpleTy) {
2054 default: llvm_unreachable("unhandled vst type");
2055 // Double-register operations:
2056 case MVT::v8i8: OpcodeIndex = 0; break;
2057 case MVT::v4f16:
2058 case MVT::v4i16: OpcodeIndex = 1; break;
2059 case MVT::v2f32:
2060 case MVT::v2i32: OpcodeIndex = 2; break;
2061 case MVT::v1i64: OpcodeIndex = 3; break;
2062 // Quad-register operations:
2063 case MVT::v16i8: OpcodeIndex = 0; break;
2064 case MVT::v8f16:
2065 case MVT::v8i16: OpcodeIndex = 1; break;
2066 case MVT::v4f32:
2067 case MVT::v4i32: OpcodeIndex = 2; break;
2068 case MVT::v2f64:
2069 case MVT::v2i64: OpcodeIndex = 3; break;
2072 std::vector<EVT> ResTys;
2073 if (isUpdating)
2074 ResTys.push_back(MVT::i32);
2075 ResTys.push_back(MVT::Other);
2077 SDValue Pred = getAL(CurDAG, dl);
2078 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2079 SmallVector<SDValue, 7> Ops;
2081 // Double registers and VST1/VST2 quad registers are directly supported.
2082 if (is64BitVector || NumVecs <= 2) {
2083 SDValue SrcReg;
2084 if (NumVecs == 1) {
2085 SrcReg = N->getOperand(Vec0Idx);
2086 } else if (is64BitVector) {
2087 // Form a REG_SEQUENCE to force register allocation.
2088 SDValue V0 = N->getOperand(Vec0Idx + 0);
2089 SDValue V1 = N->getOperand(Vec0Idx + 1);
2090 if (NumVecs == 2)
2091 SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2092 else {
2093 SDValue V2 = N->getOperand(Vec0Idx + 2);
2094 // If it's a vst3, form a quad D-register and leave the last part as
2095 // an undef.
2096 SDValue V3 = (NumVecs == 3)
2097 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
2098 : N->getOperand(Vec0Idx + 3);
2099 SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2101 } else {
2102 // Form a QQ register.
2103 SDValue Q0 = N->getOperand(Vec0Idx);
2104 SDValue Q1 = N->getOperand(Vec0Idx + 1);
2105 SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0);
2108 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2109 QOpcodes0[OpcodeIndex]);
2110 Ops.push_back(MemAddr);
2111 Ops.push_back(Align);
2112 if (isUpdating) {
2113 SDValue Inc = N->getOperand(AddrOpIdx + 1);
2114 bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
2115 if (!IsImmUpdate) {
2116 // We use a VST1 for v1i64 even if the pseudo says VST2/3/4, so
2117 // check for the opcode rather than the number of vector elements.
2118 if (isVSTfixed(Opc))
2119 Opc = getVLDSTRegisterUpdateOpcode(Opc);
2120 Ops.push_back(Inc);
2122 // VST1/VST2 fixed increment does not need Reg0 so only include it in
2123 // the operands if not such an opcode.
2124 else if (!isVSTfixed(Opc))
2125 Ops.push_back(Reg0);
2127 Ops.push_back(SrcReg);
2128 Ops.push_back(Pred);
2129 Ops.push_back(Reg0);
2130 Ops.push_back(Chain);
2131 SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2133 // Transfer memoperands.
2134 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VSt), {MemOp});
2136 ReplaceNode(N, VSt);
2137 return;
2140 // Otherwise, quad registers are stored with two separate instructions,
2141 // where one stores the even registers and the other stores the odd registers.
2143 // Form the QQQQ REG_SEQUENCE.
2144 SDValue V0 = N->getOperand(Vec0Idx + 0);
2145 SDValue V1 = N->getOperand(Vec0Idx + 1);
2146 SDValue V2 = N->getOperand(Vec0Idx + 2);
2147 SDValue V3 = (NumVecs == 3)
2148 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2149 : N->getOperand(Vec0Idx + 3);
2150 SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2152 // Store the even D registers. This is always an updating store, so that it
2153 // provides the address to the second store for the odd subregs.
2154 const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain };
2155 SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
2156 MemAddr.getValueType(),
2157 MVT::Other, OpsA);
2158 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStA), {MemOp});
2159 Chain = SDValue(VStA, 1);
2161 // Store the odd D registers.
2162 Ops.push_back(SDValue(VStA, 0));
2163 Ops.push_back(Align);
2164 if (isUpdating) {
2165 SDValue Inc = N->getOperand(AddrOpIdx + 1);
2166 assert(isa<ConstantSDNode>(Inc.getNode()) &&
2167 "only constant post-increment update allowed for VST3/4");
2168 (void)Inc;
2169 Ops.push_back(Reg0);
2171 Ops.push_back(RegSeq);
2172 Ops.push_back(Pred);
2173 Ops.push_back(Reg0);
2174 Ops.push_back(Chain);
2175 SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
2176 Ops);
2177 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStB), {MemOp});
2178 ReplaceNode(N, VStB);
2181 void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
2182 unsigned NumVecs,
2183 const uint16_t *DOpcodes,
2184 const uint16_t *QOpcodes) {
2185 assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
2186 SDLoc dl(N);
2188 SDValue MemAddr, Align;
2189 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
2190 // nodes are not intrinsics.
2191 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2192 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2193 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2194 return;
2196 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2198 SDValue Chain = N->getOperand(0);
2199 unsigned Lane =
2200 cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue();
2201 EVT VT = N->getOperand(Vec0Idx).getValueType();
2202 bool is64BitVector = VT.is64BitVector();
2204 unsigned Alignment = 0;
2205 if (NumVecs != 3) {
2206 Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
2207 unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2208 if (Alignment > NumBytes)
2209 Alignment = NumBytes;
2210 if (Alignment < 8 && Alignment < NumBytes)
2211 Alignment = 0;
2212 // Alignment must be a power of two; make sure of that.
2213 Alignment = (Alignment & -Alignment);
2214 if (Alignment == 1)
2215 Alignment = 0;
2217 Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2219 unsigned OpcodeIndex;
2220 switch (VT.getSimpleVT().SimpleTy) {
2221 default: llvm_unreachable("unhandled vld/vst lane type");
2222 // Double-register operations:
2223 case MVT::v8i8: OpcodeIndex = 0; break;
2224 case MVT::v4f16:
2225 case MVT::v4i16: OpcodeIndex = 1; break;
2226 case MVT::v2f32:
2227 case MVT::v2i32: OpcodeIndex = 2; break;
2228 // Quad-register operations:
2229 case MVT::v8f16:
2230 case MVT::v8i16: OpcodeIndex = 0; break;
2231 case MVT::v4f32:
2232 case MVT::v4i32: OpcodeIndex = 1; break;
2235 std::vector<EVT> ResTys;
2236 if (IsLoad) {
2237 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2238 if (!is64BitVector)
2239 ResTyElts *= 2;
2240 ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(),
2241 MVT::i64, ResTyElts));
2243 if (isUpdating)
2244 ResTys.push_back(MVT::i32);
2245 ResTys.push_back(MVT::Other);
2247 SDValue Pred = getAL(CurDAG, dl);
2248 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2250 SmallVector<SDValue, 8> Ops;
2251 Ops.push_back(MemAddr);
2252 Ops.push_back(Align);
2253 if (isUpdating) {
2254 SDValue Inc = N->getOperand(AddrOpIdx + 1);
2255 bool IsImmUpdate =
2256 isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
2257 Ops.push_back(IsImmUpdate ? Reg0 : Inc);
2260 SDValue SuperReg;
2261 SDValue V0 = N->getOperand(Vec0Idx + 0);
2262 SDValue V1 = N->getOperand(Vec0Idx + 1);
2263 if (NumVecs == 2) {
2264 if (is64BitVector)
2265 SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2266 else
2267 SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0);
2268 } else {
2269 SDValue V2 = N->getOperand(Vec0Idx + 2);
2270 SDValue V3 = (NumVecs == 3)
2271 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2272 : N->getOperand(Vec0Idx + 3);
2273 if (is64BitVector)
2274 SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2275 else
2276 SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2278 Ops.push_back(SuperReg);
2279 Ops.push_back(getI32Imm(Lane, dl));
2280 Ops.push_back(Pred);
2281 Ops.push_back(Reg0);
2282 Ops.push_back(Chain);
2284 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2285 QOpcodes[OpcodeIndex]);
2286 SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2287 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdLn), {MemOp});
2288 if (!IsLoad) {
2289 ReplaceNode(N, VLdLn);
2290 return;
2293 // Extract the subregisters.
2294 SuperReg = SDValue(VLdLn, 0);
2295 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
2296 ARM::qsub_3 == ARM::qsub_0 + 3,
2297 "Unexpected subreg numbering");
2298 unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
2299 for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2300 ReplaceUses(SDValue(N, Vec),
2301 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
2302 ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1));
2303 if (isUpdating)
2304 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2));
2305 CurDAG->RemoveDeadNode(N);
2308 void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool IsIntrinsic,
2309 bool isUpdating, unsigned NumVecs,
2310 const uint16_t *DOpcodes,
2311 const uint16_t *QOpcodes0,
2312 const uint16_t *QOpcodes1) {
2313 assert(NumVecs >= 1 && NumVecs <= 4 && "VLDDup NumVecs out-of-range");
2314 SDLoc dl(N);
2316 SDValue MemAddr, Align;
2317 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2318 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2319 return;
2321 SDValue Chain = N->getOperand(0);
2322 EVT VT = N->getValueType(0);
2323 bool is64BitVector = VT.is64BitVector();
2325 unsigned Alignment = 0;
2326 if (NumVecs != 3) {
2327 Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
2328 unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2329 if (Alignment > NumBytes)
2330 Alignment = NumBytes;
2331 if (Alignment < 8 && Alignment < NumBytes)
2332 Alignment = 0;
2333 // Alignment must be a power of two; make sure of that.
2334 Alignment = (Alignment & -Alignment);
2335 if (Alignment == 1)
2336 Alignment = 0;
2338 Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2340 unsigned OpcodeIndex;
2341 switch (VT.getSimpleVT().SimpleTy) {
2342 default: llvm_unreachable("unhandled vld-dup type");
2343 case MVT::v8i8:
2344 case MVT::v16i8: OpcodeIndex = 0; break;
2345 case MVT::v4i16:
2346 case MVT::v8i16:
2347 case MVT::v4f16:
2348 case MVT::v8f16:
2349 OpcodeIndex = 1; break;
2350 case MVT::v2f32:
2351 case MVT::v2i32:
2352 case MVT::v4f32:
2353 case MVT::v4i32: OpcodeIndex = 2; break;
2354 case MVT::v1f64:
2355 case MVT::v1i64: OpcodeIndex = 3; break;
2358 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2359 if (!is64BitVector)
2360 ResTyElts *= 2;
2361 EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
2363 std::vector<EVT> ResTys;
2364 ResTys.push_back(ResTy);
2365 if (isUpdating)
2366 ResTys.push_back(MVT::i32);
2367 ResTys.push_back(MVT::Other);
2369 SDValue Pred = getAL(CurDAG, dl);
2370 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2372 SDNode *VLdDup;
2373 if (is64BitVector || NumVecs == 1) {
2374 SmallVector<SDValue, 6> Ops;
2375 Ops.push_back(MemAddr);
2376 Ops.push_back(Align);
2377 unsigned Opc = is64BitVector ? DOpcodes[OpcodeIndex] :
2378 QOpcodes0[OpcodeIndex];
2379 if (isUpdating) {
2380 // fixed-stride update instructions don't have an explicit writeback
2381 // operand. It's implicit in the opcode itself.
2382 SDValue Inc = N->getOperand(2);
2383 bool IsImmUpdate =
2384 isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
2385 if (NumVecs <= 2 && !IsImmUpdate)
2386 Opc = getVLDSTRegisterUpdateOpcode(Opc);
2387 if (!IsImmUpdate)
2388 Ops.push_back(Inc);
2389 // FIXME: VLD3 and VLD4 haven't been updated to that form yet.
2390 else if (NumVecs > 2)
2391 Ops.push_back(Reg0);
2393 Ops.push_back(Pred);
2394 Ops.push_back(Reg0);
2395 Ops.push_back(Chain);
2396 VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2397 } else if (NumVecs == 2) {
2398 const SDValue OpsA[] = { MemAddr, Align, Pred, Reg0, Chain };
2399 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex],
2400 dl, ResTys, OpsA);
2402 Chain = SDValue(VLdA, 1);
2403 const SDValue OpsB[] = { MemAddr, Align, Pred, Reg0, Chain };
2404 VLdDup = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, OpsB);
2405 } else {
2406 SDValue ImplDef =
2407 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
2408 const SDValue OpsA[] = { MemAddr, Align, ImplDef, Pred, Reg0, Chain };
2409 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex],
2410 dl, ResTys, OpsA);
2412 SDValue SuperReg = SDValue(VLdA, 0);
2413 Chain = SDValue(VLdA, 1);
2414 const SDValue OpsB[] = { MemAddr, Align, SuperReg, Pred, Reg0, Chain };
2415 VLdDup = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, OpsB);
2418 // Transfer memoperands.
2419 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2420 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdDup), {MemOp});
2422 // Extract the subregisters.
2423 if (NumVecs == 1) {
2424 ReplaceUses(SDValue(N, 0), SDValue(VLdDup, 0));
2425 } else {
2426 SDValue SuperReg = SDValue(VLdDup, 0);
2427 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7, "Unexpected subreg numbering");
2428 unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
2429 for (unsigned Vec = 0; Vec != NumVecs; ++Vec) {
2430 ReplaceUses(SDValue(N, Vec),
2431 CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg));
2434 ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1));
2435 if (isUpdating)
2436 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2));
2437 CurDAG->RemoveDeadNode(N);
2440 bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) {
2441 if (!Subtarget->hasV6T2Ops())
2442 return false;
2444 unsigned Opc = isSigned
2445 ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)
2446 : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX);
2447 SDLoc dl(N);
2449 // For unsigned extracts, check for a shift right and mask
2450 unsigned And_imm = 0;
2451 if (N->getOpcode() == ISD::AND) {
2452 if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) {
2454 // The immediate is a mask of the low bits iff imm & (imm+1) == 0
2455 if (And_imm & (And_imm + 1))
2456 return false;
2458 unsigned Srl_imm = 0;
2459 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL,
2460 Srl_imm)) {
2461 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2463 // Mask off the unnecessary bits of the AND immediate; normally
2464 // DAGCombine will do this, but that might not happen if
2465 // targetShrinkDemandedConstant chooses a different immediate.
2466 And_imm &= -1U >> Srl_imm;
2468 // Note: The width operand is encoded as width-1.
2469 unsigned Width = countTrailingOnes(And_imm) - 1;
2470 unsigned LSB = Srl_imm;
2472 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2474 if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) {
2475 // It's cheaper to use a right shift to extract the top bits.
2476 if (Subtarget->isThumb()) {
2477 Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri;
2478 SDValue Ops[] = { N->getOperand(0).getOperand(0),
2479 CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2480 getAL(CurDAG, dl), Reg0, Reg0 };
2481 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2482 return true;
2485 // ARM models shift instructions as MOVsi with shifter operand.
2486 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(ISD::SRL);
2487 SDValue ShOpc =
2488 CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB), dl,
2489 MVT::i32);
2490 SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc,
2491 getAL(CurDAG, dl), Reg0, Reg0 };
2492 CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops);
2493 return true;
2496 assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
2497 SDValue Ops[] = { N->getOperand(0).getOperand(0),
2498 CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2499 CurDAG->getTargetConstant(Width, dl, MVT::i32),
2500 getAL(CurDAG, dl), Reg0 };
2501 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2502 return true;
2505 return false;
2508 // Otherwise, we're looking for a shift of a shift
2509 unsigned Shl_imm = 0;
2510 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) {
2511 assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!");
2512 unsigned Srl_imm = 0;
2513 if (isInt32Immediate(N->getOperand(1), Srl_imm)) {
2514 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2515 // Note: The width operand is encoded as width-1.
2516 unsigned Width = 32 - Srl_imm - 1;
2517 int LSB = Srl_imm - Shl_imm;
2518 if (LSB < 0)
2519 return false;
2520 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2521 assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
2522 SDValue Ops[] = { N->getOperand(0).getOperand(0),
2523 CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2524 CurDAG->getTargetConstant(Width, dl, MVT::i32),
2525 getAL(CurDAG, dl), Reg0 };
2526 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2527 return true;
2531 // Or we are looking for a shift of an and, with a mask operand
2532 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_imm) &&
2533 isShiftedMask_32(And_imm)) {
2534 unsigned Srl_imm = 0;
2535 unsigned LSB = countTrailingZeros(And_imm);
2536 // Shift must be the same as the ands lsb
2537 if (isInt32Immediate(N->getOperand(1), Srl_imm) && Srl_imm == LSB) {
2538 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2539 unsigned MSB = 31 - countLeadingZeros(And_imm);
2540 // Note: The width operand is encoded as width-1.
2541 unsigned Width = MSB - LSB;
2542 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2543 assert(Srl_imm + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
2544 SDValue Ops[] = { N->getOperand(0).getOperand(0),
2545 CurDAG->getTargetConstant(Srl_imm, dl, MVT::i32),
2546 CurDAG->getTargetConstant(Width, dl, MVT::i32),
2547 getAL(CurDAG, dl), Reg0 };
2548 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2549 return true;
2553 if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) {
2554 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2555 unsigned LSB = 0;
2556 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, LSB) &&
2557 !isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRA, LSB))
2558 return false;
2560 if (LSB + Width > 32)
2561 return false;
2563 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2564 assert(LSB + Width <= 32 && "Shouldn't create an invalid ubfx");
2565 SDValue Ops[] = { N->getOperand(0).getOperand(0),
2566 CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2567 CurDAG->getTargetConstant(Width - 1, dl, MVT::i32),
2568 getAL(CurDAG, dl), Reg0 };
2569 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2570 return true;
2573 return false;
2576 /// Target-specific DAG combining for ISD::XOR.
2577 /// Target-independent combining lowers SELECT_CC nodes of the form
2578 /// select_cc setg[ge] X, 0, X, -X
2579 /// select_cc setgt X, -1, X, -X
2580 /// select_cc setl[te] X, 0, -X, X
2581 /// select_cc setlt X, 1, -X, X
2582 /// which represent Integer ABS into:
2583 /// Y = sra (X, size(X)-1); xor (add (X, Y), Y)
2584 /// ARM instruction selection detects the latter and matches it to
2585 /// ARM::ABS or ARM::t2ABS machine node.
2586 bool ARMDAGToDAGISel::tryABSOp(SDNode *N){
2587 SDValue XORSrc0 = N->getOperand(0);
2588 SDValue XORSrc1 = N->getOperand(1);
2589 EVT VT = N->getValueType(0);
2591 if (Subtarget->isThumb1Only())
2592 return false;
2594 if (XORSrc0.getOpcode() != ISD::ADD || XORSrc1.getOpcode() != ISD::SRA)
2595 return false;
2597 SDValue ADDSrc0 = XORSrc0.getOperand(0);
2598 SDValue ADDSrc1 = XORSrc0.getOperand(1);
2599 SDValue SRASrc0 = XORSrc1.getOperand(0);
2600 SDValue SRASrc1 = XORSrc1.getOperand(1);
2601 ConstantSDNode *SRAConstant = dyn_cast<ConstantSDNode>(SRASrc1);
2602 EVT XType = SRASrc0.getValueType();
2603 unsigned Size = XType.getSizeInBits() - 1;
2605 if (ADDSrc1 == XORSrc1 && ADDSrc0 == SRASrc0 &&
2606 XType.isInteger() && SRAConstant != nullptr &&
2607 Size == SRAConstant->getZExtValue()) {
2608 unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS;
2609 CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0);
2610 return true;
2613 return false;
2616 /// We've got special pseudo-instructions for these
2617 void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
2618 unsigned Opcode;
2619 EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
2620 if (MemTy == MVT::i8)
2621 Opcode = ARM::CMP_SWAP_8;
2622 else if (MemTy == MVT::i16)
2623 Opcode = ARM::CMP_SWAP_16;
2624 else if (MemTy == MVT::i32)
2625 Opcode = ARM::CMP_SWAP_32;
2626 else
2627 llvm_unreachable("Unknown AtomicCmpSwap type");
2629 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
2630 N->getOperand(0)};
2631 SDNode *CmpSwap = CurDAG->getMachineNode(
2632 Opcode, SDLoc(N),
2633 CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other), Ops);
2635 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
2636 CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
2638 ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
2639 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
2640 CurDAG->RemoveDeadNode(N);
2643 static Optional<std::pair<unsigned, unsigned>>
2644 getContiguousRangeOfSetBits(const APInt &A) {
2645 unsigned FirstOne = A.getBitWidth() - A.countLeadingZeros() - 1;
2646 unsigned LastOne = A.countTrailingZeros();
2647 if (A.countPopulation() != (FirstOne - LastOne + 1))
2648 return Optional<std::pair<unsigned,unsigned>>();
2649 return std::make_pair(FirstOne, LastOne);
2652 void ARMDAGToDAGISel::SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI) {
2653 assert(N->getOpcode() == ARMISD::CMPZ);
2654 SwitchEQNEToPLMI = false;
2656 if (!Subtarget->isThumb())
2657 // FIXME: Work out whether it is profitable to do this in A32 mode - LSL and
2658 // LSR don't exist as standalone instructions - they need the barrel shifter.
2659 return;
2661 // select (cmpz (and X, C), #0) -> (LSLS X) or (LSRS X) or (LSRS (LSLS X))
2662 SDValue And = N->getOperand(0);
2663 if (!And->hasOneUse())
2664 return;
2666 SDValue Zero = N->getOperand(1);
2667 if (!isa<ConstantSDNode>(Zero) || !cast<ConstantSDNode>(Zero)->isNullValue() ||
2668 And->getOpcode() != ISD::AND)
2669 return;
2670 SDValue X = And.getOperand(0);
2671 auto C = dyn_cast<ConstantSDNode>(And.getOperand(1));
2673 if (!C)
2674 return;
2675 auto Range = getContiguousRangeOfSetBits(C->getAPIntValue());
2676 if (!Range)
2677 return;
2679 // There are several ways to lower this:
2680 SDNode *NewN;
2681 SDLoc dl(N);
2683 auto EmitShift = [&](unsigned Opc, SDValue Src, unsigned Imm) -> SDNode* {
2684 if (Subtarget->isThumb2()) {
2685 Opc = (Opc == ARM::tLSLri) ? ARM::t2LSLri : ARM::t2LSRri;
2686 SDValue Ops[] = { Src, CurDAG->getTargetConstant(Imm, dl, MVT::i32),
2687 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
2688 CurDAG->getRegister(0, MVT::i32) };
2689 return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
2690 } else {
2691 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), Src,
2692 CurDAG->getTargetConstant(Imm, dl, MVT::i32),
2693 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
2694 return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
2698 if (Range->second == 0) {
2699 // 1. Mask includes the LSB -> Simply shift the top N bits off
2700 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
2701 ReplaceNode(And.getNode(), NewN);
2702 } else if (Range->first == 31) {
2703 // 2. Mask includes the MSB -> Simply shift the bottom N bits off
2704 NewN = EmitShift(ARM::tLSRri, X, Range->second);
2705 ReplaceNode(And.getNode(), NewN);
2706 } else if (Range->first == Range->second) {
2707 // 3. Only one bit is set. We can shift this into the sign bit and use a
2708 // PL/MI comparison.
2709 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
2710 ReplaceNode(And.getNode(), NewN);
2712 SwitchEQNEToPLMI = true;
2713 } else if (!Subtarget->hasV6T2Ops()) {
2714 // 4. Do a double shift to clear bottom and top bits, but only in
2715 // thumb-1 mode as in thumb-2 we can use UBFX.
2716 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
2717 NewN = EmitShift(ARM::tLSRri, SDValue(NewN, 0),
2718 Range->second + (31 - Range->first));
2719 ReplaceNode(And.getNode(), NewN);
2724 void ARMDAGToDAGISel::Select(SDNode *N) {
2725 SDLoc dl(N);
2727 if (N->isMachineOpcode()) {
2728 N->setNodeId(-1);
2729 return; // Already selected.
2732 switch (N->getOpcode()) {
2733 default: break;
2734 case ISD::STORE: {
2735 // For Thumb1, match an sp-relative store in C++. This is a little
2736 // unfortunate, but I don't think I can make the chain check work
2737 // otherwise. (The chain of the store has to be the same as the chain
2738 // of the CopyFromReg, or else we can't replace the CopyFromReg with
2739 // a direct reference to "SP".)
2741 // This is only necessary on Thumb1 because Thumb1 sp-relative stores use
2742 // a different addressing mode from other four-byte stores.
2744 // This pattern usually comes up with call arguments.
2745 StoreSDNode *ST = cast<StoreSDNode>(N);
2746 SDValue Ptr = ST->getBasePtr();
2747 if (Subtarget->isThumb1Only() && ST->isUnindexed()) {
2748 int RHSC = 0;
2749 if (Ptr.getOpcode() == ISD::ADD &&
2750 isScaledConstantInRange(Ptr.getOperand(1), /*Scale=*/4, 0, 256, RHSC))
2751 Ptr = Ptr.getOperand(0);
2753 if (Ptr.getOpcode() == ISD::CopyFromReg &&
2754 cast<RegisterSDNode>(Ptr.getOperand(1))->getReg() == ARM::SP &&
2755 Ptr.getOperand(0) == ST->getChain()) {
2756 SDValue Ops[] = {ST->getValue(),
2757 CurDAG->getRegister(ARM::SP, MVT::i32),
2758 CurDAG->getTargetConstant(RHSC, dl, MVT::i32),
2759 getAL(CurDAG, dl),
2760 CurDAG->getRegister(0, MVT::i32),
2761 ST->getChain()};
2762 MachineSDNode *ResNode =
2763 CurDAG->getMachineNode(ARM::tSTRspi, dl, MVT::Other, Ops);
2764 MachineMemOperand *MemOp = ST->getMemOperand();
2765 CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp});
2766 ReplaceNode(N, ResNode);
2767 return;
2770 break;
2772 case ISD::WRITE_REGISTER:
2773 if (tryWriteRegister(N))
2774 return;
2775 break;
2776 case ISD::READ_REGISTER:
2777 if (tryReadRegister(N))
2778 return;
2779 break;
2780 case ISD::INLINEASM:
2781 case ISD::INLINEASM_BR:
2782 if (tryInlineAsm(N))
2783 return;
2784 break;
2785 case ISD::XOR:
2786 // Select special operations if XOR node forms integer ABS pattern
2787 if (tryABSOp(N))
2788 return;
2789 // Other cases are autogenerated.
2790 break;
2791 case ISD::Constant: {
2792 unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
2793 // If we can't materialize the constant we need to use a literal pool
2794 if (ConstantMaterializationCost(Val) > 2) {
2795 SDValue CPIdx = CurDAG->getTargetConstantPool(
2796 ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val),
2797 TLI->getPointerTy(CurDAG->getDataLayout()));
2799 SDNode *ResNode;
2800 if (Subtarget->isThumb()) {
2801 SDValue Ops[] = {
2802 CPIdx,
2803 getAL(CurDAG, dl),
2804 CurDAG->getRegister(0, MVT::i32),
2805 CurDAG->getEntryNode()
2807 ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other,
2808 Ops);
2809 } else {
2810 SDValue Ops[] = {
2811 CPIdx,
2812 CurDAG->getTargetConstant(0, dl, MVT::i32),
2813 getAL(CurDAG, dl),
2814 CurDAG->getRegister(0, MVT::i32),
2815 CurDAG->getEntryNode()
2817 ResNode = CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other,
2818 Ops);
2820 // Annotate the Node with memory operand information so that MachineInstr
2821 // queries work properly. This e.g. gives the register allocation the
2822 // required information for rematerialization.
2823 MachineFunction& MF = CurDAG->getMachineFunction();
2824 MachineMemOperand *MemOp =
2825 MF.getMachineMemOperand(MachinePointerInfo::getConstantPool(MF),
2826 MachineMemOperand::MOLoad, 4, 4);
2828 CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp});
2830 ReplaceNode(N, ResNode);
2831 return;
2834 // Other cases are autogenerated.
2835 break;
2837 case ISD::FrameIndex: {
2838 // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
2839 int FI = cast<FrameIndexSDNode>(N)->getIndex();
2840 SDValue TFI = CurDAG->getTargetFrameIndex(
2841 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
2842 if (Subtarget->isThumb1Only()) {
2843 // Set the alignment of the frame object to 4, to avoid having to generate
2844 // more than one ADD
2845 MachineFrameInfo &MFI = MF->getFrameInfo();
2846 if (MFI.getObjectAlignment(FI) < 4)
2847 MFI.setObjectAlignment(FI, 4);
2848 CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI,
2849 CurDAG->getTargetConstant(0, dl, MVT::i32));
2850 return;
2851 } else {
2852 unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ?
2853 ARM::t2ADDri : ARM::ADDri);
2854 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, dl, MVT::i32),
2855 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
2856 CurDAG->getRegister(0, MVT::i32) };
2857 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2858 return;
2861 case ISD::SRL:
2862 if (tryV6T2BitfieldExtractOp(N, false))
2863 return;
2864 break;
2865 case ISD::SIGN_EXTEND_INREG:
2866 case ISD::SRA:
2867 if (tryV6T2BitfieldExtractOp(N, true))
2868 return;
2869 break;
2870 case ISD::MUL:
2871 if (Subtarget->isThumb1Only())
2872 break;
2873 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
2874 unsigned RHSV = C->getZExtValue();
2875 if (!RHSV) break;
2876 if (isPowerOf2_32(RHSV-1)) { // 2^n+1?
2877 unsigned ShImm = Log2_32(RHSV-1);
2878 if (ShImm >= 32)
2879 break;
2880 SDValue V = N->getOperand(0);
2881 ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
2882 SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
2883 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2884 if (Subtarget->isThumb()) {
2885 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
2886 CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops);
2887 return;
2888 } else {
2889 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
2890 Reg0 };
2891 CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops);
2892 return;
2895 if (isPowerOf2_32(RHSV+1)) { // 2^n-1?
2896 unsigned ShImm = Log2_32(RHSV+1);
2897 if (ShImm >= 32)
2898 break;
2899 SDValue V = N->getOperand(0);
2900 ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
2901 SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
2902 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2903 if (Subtarget->isThumb()) {
2904 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
2905 CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops);
2906 return;
2907 } else {
2908 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
2909 Reg0 };
2910 CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops);
2911 return;
2915 break;
2916 case ISD::AND: {
2917 // Check for unsigned bitfield extract
2918 if (tryV6T2BitfieldExtractOp(N, false))
2919 return;
2921 // If an immediate is used in an AND node, it is possible that the immediate
2922 // can be more optimally materialized when negated. If this is the case we
2923 // can negate the immediate and use a BIC instead.
2924 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
2925 if (N1C && N1C->hasOneUse() && Subtarget->isThumb()) {
2926 uint32_t Imm = (uint32_t) N1C->getZExtValue();
2928 // In Thumb2 mode, an AND can take a 12-bit immediate. If this
2929 // immediate can be negated and fit in the immediate operand of
2930 // a t2BIC, don't do any manual transform here as this can be
2931 // handled by the generic ISel machinery.
2932 bool PreferImmediateEncoding =
2933 Subtarget->hasThumb2() && (is_t2_so_imm(Imm) || is_t2_so_imm_not(Imm));
2934 if (!PreferImmediateEncoding &&
2935 ConstantMaterializationCost(Imm) >
2936 ConstantMaterializationCost(~Imm)) {
2937 // The current immediate costs more to materialize than a negated
2938 // immediate, so negate the immediate and use a BIC.
2939 SDValue NewImm =
2940 CurDAG->getConstant(~N1C->getZExtValue(), dl, MVT::i32);
2941 // If the new constant didn't exist before, reposition it in the topological
2942 // ordering so it is just before N. Otherwise, don't touch its location.
2943 if (NewImm->getNodeId() == -1)
2944 CurDAG->RepositionNode(N->getIterator(), NewImm.getNode());
2946 if (!Subtarget->hasThumb2()) {
2947 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32),
2948 N->getOperand(0), NewImm, getAL(CurDAG, dl),
2949 CurDAG->getRegister(0, MVT::i32)};
2950 ReplaceNode(N, CurDAG->getMachineNode(ARM::tBIC, dl, MVT::i32, Ops));
2951 return;
2952 } else {
2953 SDValue Ops[] = {N->getOperand(0), NewImm, getAL(CurDAG, dl),
2954 CurDAG->getRegister(0, MVT::i32),
2955 CurDAG->getRegister(0, MVT::i32)};
2956 ReplaceNode(N,
2957 CurDAG->getMachineNode(ARM::t2BICrr, dl, MVT::i32, Ops));
2958 return;
2963 // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits
2964 // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits
2965 // are entirely contributed by c2 and lower 16-bits are entirely contributed
2966 // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)).
2967 // Select it to: "movt x, ((c1 & 0xffff) >> 16)
2968 EVT VT = N->getValueType(0);
2969 if (VT != MVT::i32)
2970 break;
2971 unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2())
2972 ? ARM::t2MOVTi16
2973 : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0);
2974 if (!Opc)
2975 break;
2976 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
2977 N1C = dyn_cast<ConstantSDNode>(N1);
2978 if (!N1C)
2979 break;
2980 if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) {
2981 SDValue N2 = N0.getOperand(1);
2982 ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
2983 if (!N2C)
2984 break;
2985 unsigned N1CVal = N1C->getZExtValue();
2986 unsigned N2CVal = N2C->getZExtValue();
2987 if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) &&
2988 (N1CVal & 0xffffU) == 0xffffU &&
2989 (N2CVal & 0xffffU) == 0x0U) {
2990 SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16,
2991 dl, MVT::i32);
2992 SDValue Ops[] = { N0.getOperand(0), Imm16,
2993 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
2994 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
2995 return;
2999 break;
3001 case ARMISD::UMAAL: {
3002 unsigned Opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL;
3003 SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
3004 N->getOperand(2), N->getOperand(3),
3005 getAL(CurDAG, dl),
3006 CurDAG->getRegister(0, MVT::i32) };
3007 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::i32, Ops));
3008 return;
3010 case ARMISD::UMLAL:{
3011 if (Subtarget->isThumb()) {
3012 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3013 N->getOperand(3), getAL(CurDAG, dl),
3014 CurDAG->getRegister(0, MVT::i32)};
3015 ReplaceNode(
3016 N, CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops));
3017 return;
3018 }else{
3019 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3020 N->getOperand(3), getAL(CurDAG, dl),
3021 CurDAG->getRegister(0, MVT::i32),
3022 CurDAG->getRegister(0, MVT::i32) };
3023 ReplaceNode(N, CurDAG->getMachineNode(
3024 Subtarget->hasV6Ops() ? ARM::UMLAL : ARM::UMLALv5, dl,
3025 MVT::i32, MVT::i32, Ops));
3026 return;
3029 case ARMISD::SMLAL:{
3030 if (Subtarget->isThumb()) {
3031 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3032 N->getOperand(3), getAL(CurDAG, dl),
3033 CurDAG->getRegister(0, MVT::i32)};
3034 ReplaceNode(
3035 N, CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops));
3036 return;
3037 }else{
3038 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3039 N->getOperand(3), getAL(CurDAG, dl),
3040 CurDAG->getRegister(0, MVT::i32),
3041 CurDAG->getRegister(0, MVT::i32) };
3042 ReplaceNode(N, CurDAG->getMachineNode(
3043 Subtarget->hasV6Ops() ? ARM::SMLAL : ARM::SMLALv5, dl,
3044 MVT::i32, MVT::i32, Ops));
3045 return;
3048 case ARMISD::SUBE: {
3049 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
3050 break;
3051 // Look for a pattern to match SMMLS
3052 // (sube a, (smul_loHi a, b), (subc 0, (smul_LOhi(a, b))))
3053 if (N->getOperand(1).getOpcode() != ISD::SMUL_LOHI ||
3054 N->getOperand(2).getOpcode() != ARMISD::SUBC ||
3055 !SDValue(N, 1).use_empty())
3056 break;
3058 if (Subtarget->isThumb())
3059 assert(Subtarget->hasThumb2() &&
3060 "This pattern should not be generated for Thumb");
3062 SDValue SmulLoHi = N->getOperand(1);
3063 SDValue Subc = N->getOperand(2);
3064 auto *Zero = dyn_cast<ConstantSDNode>(Subc.getOperand(0));
3066 if (!Zero || Zero->getZExtValue() != 0 ||
3067 Subc.getOperand(1) != SmulLoHi.getValue(0) ||
3068 N->getOperand(1) != SmulLoHi.getValue(1) ||
3069 N->getOperand(2) != Subc.getValue(1))
3070 break;
3072 unsigned Opc = Subtarget->isThumb2() ? ARM::t2SMMLS : ARM::SMMLS;
3073 SDValue Ops[] = { SmulLoHi.getOperand(0), SmulLoHi.getOperand(1),
3074 N->getOperand(0), getAL(CurDAG, dl),
3075 CurDAG->getRegister(0, MVT::i32) };
3076 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops));
3077 return;
3079 case ISD::LOAD: {
3080 if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N))
3081 return;
3082 if (Subtarget->isThumb() && Subtarget->hasThumb2()) {
3083 if (tryT2IndexedLoad(N))
3084 return;
3085 } else if (Subtarget->isThumb()) {
3086 if (tryT1IndexedLoad(N))
3087 return;
3088 } else if (tryARMIndexedLoad(N))
3089 return;
3090 // Other cases are autogenerated.
3091 break;
3093 case ARMISD::WLS:
3094 case ARMISD::LE: {
3095 SDValue Ops[] = { N->getOperand(1),
3096 N->getOperand(2),
3097 N->getOperand(0) };
3098 unsigned Opc = N->getOpcode() == ARMISD::WLS ?
3099 ARM::t2WhileLoopStart : ARM::t2LoopEnd;
3100 SDNode *New = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
3101 ReplaceUses(N, New);
3102 CurDAG->RemoveDeadNode(N);
3103 return;
3105 case ARMISD::LOOP_DEC: {
3106 SDValue Ops[] = { N->getOperand(1),
3107 N->getOperand(2),
3108 N->getOperand(0) };
3109 SDNode *Dec =
3110 CurDAG->getMachineNode(ARM::t2LoopDec, dl,
3111 CurDAG->getVTList(MVT::i32, MVT::Other), Ops);
3112 ReplaceUses(N, Dec);
3113 CurDAG->RemoveDeadNode(N);
3114 return;
3116 case ARMISD::BRCOND: {
3117 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
3118 // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)
3119 // Pattern complexity = 6 cost = 1 size = 0
3121 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
3122 // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc)
3123 // Pattern complexity = 6 cost = 1 size = 0
3125 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
3126 // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc)
3127 // Pattern complexity = 6 cost = 1 size = 0
3129 unsigned Opc = Subtarget->isThumb() ?
3130 ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc;
3131 SDValue Chain = N->getOperand(0);
3132 SDValue N1 = N->getOperand(1);
3133 SDValue N2 = N->getOperand(2);
3134 SDValue N3 = N->getOperand(3);
3135 SDValue InFlag = N->getOperand(4);
3136 assert(N1.getOpcode() == ISD::BasicBlock);
3137 assert(N2.getOpcode() == ISD::Constant);
3138 assert(N3.getOpcode() == ISD::Register);
3140 unsigned CC = (unsigned) cast<ConstantSDNode>(N2)->getZExtValue();
3142 if (InFlag.getOpcode() == ARMISD::CMPZ) {
3143 if (InFlag.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN) {
3144 SDValue Int = InFlag.getOperand(0);
3145 uint64_t ID = cast<ConstantSDNode>(Int->getOperand(1))->getZExtValue();
3147 // Handle low-overhead loops.
3148 if (ID == Intrinsic::loop_decrement_reg) {
3149 SDValue Elements = Int.getOperand(2);
3150 SDValue Size = CurDAG->getTargetConstant(
3151 cast<ConstantSDNode>(Int.getOperand(3))->getZExtValue(), dl,
3152 MVT::i32);
3154 SDValue Args[] = { Elements, Size, Int.getOperand(0) };
3155 SDNode *LoopDec =
3156 CurDAG->getMachineNode(ARM::t2LoopDec, dl,
3157 CurDAG->getVTList(MVT::i32, MVT::Other),
3158 Args);
3159 ReplaceUses(Int.getNode(), LoopDec);
3161 SDValue EndArgs[] = { SDValue(LoopDec, 0), N1, Chain };
3162 SDNode *LoopEnd =
3163 CurDAG->getMachineNode(ARM::t2LoopEnd, dl, MVT::Other, EndArgs);
3165 ReplaceUses(N, LoopEnd);
3166 CurDAG->RemoveDeadNode(N);
3167 CurDAG->RemoveDeadNode(InFlag.getNode());
3168 CurDAG->RemoveDeadNode(Int.getNode());
3169 return;
3173 bool SwitchEQNEToPLMI;
3174 SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI);
3175 InFlag = N->getOperand(4);
3177 if (SwitchEQNEToPLMI) {
3178 switch ((ARMCC::CondCodes)CC) {
3179 default: llvm_unreachable("CMPZ must be either NE or EQ!");
3180 case ARMCC::NE:
3181 CC = (unsigned)ARMCC::MI;
3182 break;
3183 case ARMCC::EQ:
3184 CC = (unsigned)ARMCC::PL;
3185 break;
3190 SDValue Tmp2 = CurDAG->getTargetConstant(CC, dl, MVT::i32);
3191 SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag };
3192 SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
3193 MVT::Glue, Ops);
3194 Chain = SDValue(ResNode, 0);
3195 if (N->getNumValues() == 2) {
3196 InFlag = SDValue(ResNode, 1);
3197 ReplaceUses(SDValue(N, 1), InFlag);
3199 ReplaceUses(SDValue(N, 0),
3200 SDValue(Chain.getNode(), Chain.getResNo()));
3201 CurDAG->RemoveDeadNode(N);
3202 return;
3205 case ARMISD::CMPZ: {
3206 // select (CMPZ X, #-C) -> (CMPZ (ADDS X, #C), #0)
3207 // This allows us to avoid materializing the expensive negative constant.
3208 // The CMPZ #0 is useless and will be peepholed away but we need to keep it
3209 // for its glue output.
3210 SDValue X = N->getOperand(0);
3211 auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1).getNode());
3212 if (C && C->getSExtValue() < 0 && Subtarget->isThumb()) {
3213 int64_t Addend = -C->getSExtValue();
3215 SDNode *Add = nullptr;
3216 // ADDS can be better than CMN if the immediate fits in a
3217 // 16-bit ADDS, which means either [0,256) for tADDi8 or [0,8) for tADDi3.
3218 // Outside that range we can just use a CMN which is 32-bit but has a
3219 // 12-bit immediate range.
3220 if (Addend < 1<<8) {
3221 if (Subtarget->isThumb2()) {
3222 SDValue Ops[] = { X, CurDAG->getTargetConstant(Addend, dl, MVT::i32),
3223 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
3224 CurDAG->getRegister(0, MVT::i32) };
3225 Add = CurDAG->getMachineNode(ARM::t2ADDri, dl, MVT::i32, Ops);
3226 } else {
3227 unsigned Opc = (Addend < 1<<3) ? ARM::tADDi3 : ARM::tADDi8;
3228 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X,
3229 CurDAG->getTargetConstant(Addend, dl, MVT::i32),
3230 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
3231 Add = CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
3234 if (Add) {
3235 SDValue Ops2[] = {SDValue(Add, 0), CurDAG->getConstant(0, dl, MVT::i32)};
3236 CurDAG->MorphNodeTo(N, ARMISD::CMPZ, CurDAG->getVTList(MVT::Glue), Ops2);
3239 // Other cases are autogenerated.
3240 break;
3243 case ARMISD::CMOV: {
3244 SDValue InFlag = N->getOperand(4);
3246 if (InFlag.getOpcode() == ARMISD::CMPZ) {
3247 bool SwitchEQNEToPLMI;
3248 SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI);
3250 if (SwitchEQNEToPLMI) {
3251 SDValue ARMcc = N->getOperand(2);
3252 ARMCC::CondCodes CC =
3253 (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue();
3255 switch (CC) {
3256 default: llvm_unreachable("CMPZ must be either NE or EQ!");
3257 case ARMCC::NE:
3258 CC = ARMCC::MI;
3259 break;
3260 case ARMCC::EQ:
3261 CC = ARMCC::PL;
3262 break;
3264 SDValue NewARMcc = CurDAG->getConstant((unsigned)CC, dl, MVT::i32);
3265 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), NewARMcc,
3266 N->getOperand(3), N->getOperand(4)};
3267 CurDAG->MorphNodeTo(N, ARMISD::CMOV, N->getVTList(), Ops);
3271 // Other cases are autogenerated.
3272 break;
3275 case ARMISD::VZIP: {
3276 unsigned Opc = 0;
3277 EVT VT = N->getValueType(0);
3278 switch (VT.getSimpleVT().SimpleTy) {
3279 default: return;
3280 case MVT::v8i8: Opc = ARM::VZIPd8; break;
3281 case MVT::v4f16:
3282 case MVT::v4i16: Opc = ARM::VZIPd16; break;
3283 case MVT::v2f32:
3284 // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
3285 case MVT::v2i32: Opc = ARM::VTRNd32; break;
3286 case MVT::v16i8: Opc = ARM::VZIPq8; break;
3287 case MVT::v8f16:
3288 case MVT::v8i16: Opc = ARM::VZIPq16; break;
3289 case MVT::v4f32:
3290 case MVT::v4i32: Opc = ARM::VZIPq32; break;
3292 SDValue Pred = getAL(CurDAG, dl);
3293 SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3294 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3295 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3296 return;
3298 case ARMISD::VUZP: {
3299 unsigned Opc = 0;
3300 EVT VT = N->getValueType(0);
3301 switch (VT.getSimpleVT().SimpleTy) {
3302 default: return;
3303 case MVT::v8i8: Opc = ARM::VUZPd8; break;
3304 case MVT::v4f16:
3305 case MVT::v4i16: Opc = ARM::VUZPd16; break;
3306 case MVT::v2f32:
3307 // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
3308 case MVT::v2i32: Opc = ARM::VTRNd32; break;
3309 case MVT::v16i8: Opc = ARM::VUZPq8; break;
3310 case MVT::v8f16:
3311 case MVT::v8i16: Opc = ARM::VUZPq16; break;
3312 case MVT::v4f32:
3313 case MVT::v4i32: Opc = ARM::VUZPq32; break;
3315 SDValue Pred = getAL(CurDAG, dl);
3316 SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3317 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3318 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3319 return;
3321 case ARMISD::VTRN: {
3322 unsigned Opc = 0;
3323 EVT VT = N->getValueType(0);
3324 switch (VT.getSimpleVT().SimpleTy) {
3325 default: return;
3326 case MVT::v8i8: Opc = ARM::VTRNd8; break;
3327 case MVT::v4f16:
3328 case MVT::v4i16: Opc = ARM::VTRNd16; break;
3329 case MVT::v2f32:
3330 case MVT::v2i32: Opc = ARM::VTRNd32; break;
3331 case MVT::v16i8: Opc = ARM::VTRNq8; break;
3332 case MVT::v8f16:
3333 case MVT::v8i16: Opc = ARM::VTRNq16; break;
3334 case MVT::v4f32:
3335 case MVT::v4i32: Opc = ARM::VTRNq32; break;
3337 SDValue Pred = getAL(CurDAG, dl);
3338 SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3339 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3340 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3341 return;
3343 case ARMISD::BUILD_VECTOR: {
3344 EVT VecVT = N->getValueType(0);
3345 EVT EltVT = VecVT.getVectorElementType();
3346 unsigned NumElts = VecVT.getVectorNumElements();
3347 if (EltVT == MVT::f64) {
3348 assert(NumElts == 2 && "unexpected type for BUILD_VECTOR");
3349 ReplaceNode(
3350 N, createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
3351 return;
3353 assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR");
3354 if (NumElts == 2) {
3355 ReplaceNode(
3356 N, createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
3357 return;
3359 assert(NumElts == 4 && "unexpected type for BUILD_VECTOR");
3360 ReplaceNode(N,
3361 createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1),
3362 N->getOperand(2), N->getOperand(3)));
3363 return;
3366 case ARMISD::VLD1DUP: {
3367 static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8, ARM::VLD1DUPd16,
3368 ARM::VLD1DUPd32 };
3369 static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8, ARM::VLD1DUPq16,
3370 ARM::VLD1DUPq32 };
3371 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 1, DOpcodes, QOpcodes);
3372 return;
3375 case ARMISD::VLD2DUP: {
3376 static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
3377 ARM::VLD2DUPd32 };
3378 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 2, Opcodes);
3379 return;
3382 case ARMISD::VLD3DUP: {
3383 static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo,
3384 ARM::VLD3DUPd16Pseudo,
3385 ARM::VLD3DUPd32Pseudo };
3386 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 3, Opcodes);
3387 return;
3390 case ARMISD::VLD4DUP: {
3391 static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo,
3392 ARM::VLD4DUPd16Pseudo,
3393 ARM::VLD4DUPd32Pseudo };
3394 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 4, Opcodes);
3395 return;
3398 case ARMISD::VLD1DUP_UPD: {
3399 static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8wb_fixed,
3400 ARM::VLD1DUPd16wb_fixed,
3401 ARM::VLD1DUPd32wb_fixed };
3402 static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8wb_fixed,
3403 ARM::VLD1DUPq16wb_fixed,
3404 ARM::VLD1DUPq32wb_fixed };
3405 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 1, DOpcodes, QOpcodes);
3406 return;
3409 case ARMISD::VLD2DUP_UPD: {
3410 static const uint16_t Opcodes[] = { ARM::VLD2DUPd8wb_fixed,
3411 ARM::VLD2DUPd16wb_fixed,
3412 ARM::VLD2DUPd32wb_fixed };
3413 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 2, Opcodes);
3414 return;
3417 case ARMISD::VLD3DUP_UPD: {
3418 static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo_UPD,
3419 ARM::VLD3DUPd16Pseudo_UPD,
3420 ARM::VLD3DUPd32Pseudo_UPD };
3421 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 3, Opcodes);
3422 return;
3425 case ARMISD::VLD4DUP_UPD: {
3426 static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo_UPD,
3427 ARM::VLD4DUPd16Pseudo_UPD,
3428 ARM::VLD4DUPd32Pseudo_UPD };
3429 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 4, Opcodes);
3430 return;
3433 case ARMISD::VLD1_UPD: {
3434 static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed,
3435 ARM::VLD1d16wb_fixed,
3436 ARM::VLD1d32wb_fixed,
3437 ARM::VLD1d64wb_fixed };
3438 static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed,
3439 ARM::VLD1q16wb_fixed,
3440 ARM::VLD1q32wb_fixed,
3441 ARM::VLD1q64wb_fixed };
3442 SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr);
3443 return;
3446 case ARMISD::VLD2_UPD: {
3447 static const uint16_t DOpcodes[] = { ARM::VLD2d8wb_fixed,
3448 ARM::VLD2d16wb_fixed,
3449 ARM::VLD2d32wb_fixed,
3450 ARM::VLD1q64wb_fixed};
3451 static const uint16_t QOpcodes[] = { ARM::VLD2q8PseudoWB_fixed,
3452 ARM::VLD2q16PseudoWB_fixed,
3453 ARM::VLD2q32PseudoWB_fixed };
3454 SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr);
3455 return;
3458 case ARMISD::VLD3_UPD: {
3459 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD,
3460 ARM::VLD3d16Pseudo_UPD,
3461 ARM::VLD3d32Pseudo_UPD,
3462 ARM::VLD1d64TPseudoWB_fixed};
3463 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
3464 ARM::VLD3q16Pseudo_UPD,
3465 ARM::VLD3q32Pseudo_UPD };
3466 static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
3467 ARM::VLD3q16oddPseudo_UPD,
3468 ARM::VLD3q32oddPseudo_UPD };
3469 SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
3470 return;
3473 case ARMISD::VLD4_UPD: {
3474 static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo_UPD,
3475 ARM::VLD4d16Pseudo_UPD,
3476 ARM::VLD4d32Pseudo_UPD,
3477 ARM::VLD1d64QPseudoWB_fixed};
3478 static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
3479 ARM::VLD4q16Pseudo_UPD,
3480 ARM::VLD4q32Pseudo_UPD };
3481 static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo_UPD,
3482 ARM::VLD4q16oddPseudo_UPD,
3483 ARM::VLD4q32oddPseudo_UPD };
3484 SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
3485 return;
3488 case ARMISD::VLD2LN_UPD: {
3489 static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD,
3490 ARM::VLD2LNd16Pseudo_UPD,
3491 ARM::VLD2LNd32Pseudo_UPD };
3492 static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD,
3493 ARM::VLD2LNq32Pseudo_UPD };
3494 SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes);
3495 return;
3498 case ARMISD::VLD3LN_UPD: {
3499 static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD,
3500 ARM::VLD3LNd16Pseudo_UPD,
3501 ARM::VLD3LNd32Pseudo_UPD };
3502 static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD,
3503 ARM::VLD3LNq32Pseudo_UPD };
3504 SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes);
3505 return;
3508 case ARMISD::VLD4LN_UPD: {
3509 static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD,
3510 ARM::VLD4LNd16Pseudo_UPD,
3511 ARM::VLD4LNd32Pseudo_UPD };
3512 static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD,
3513 ARM::VLD4LNq32Pseudo_UPD };
3514 SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes);
3515 return;
3518 case ARMISD::VST1_UPD: {
3519 static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed,
3520 ARM::VST1d16wb_fixed,
3521 ARM::VST1d32wb_fixed,
3522 ARM::VST1d64wb_fixed };
3523 static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed,
3524 ARM::VST1q16wb_fixed,
3525 ARM::VST1q32wb_fixed,
3526 ARM::VST1q64wb_fixed };
3527 SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr);
3528 return;
3531 case ARMISD::VST2_UPD: {
3532 static const uint16_t DOpcodes[] = { ARM::VST2d8wb_fixed,
3533 ARM::VST2d16wb_fixed,
3534 ARM::VST2d32wb_fixed,
3535 ARM::VST1q64wb_fixed};
3536 static const uint16_t QOpcodes[] = { ARM::VST2q8PseudoWB_fixed,
3537 ARM::VST2q16PseudoWB_fixed,
3538 ARM::VST2q32PseudoWB_fixed };
3539 SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr);
3540 return;
3543 case ARMISD::VST3_UPD: {
3544 static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD,
3545 ARM::VST3d16Pseudo_UPD,
3546 ARM::VST3d32Pseudo_UPD,
3547 ARM::VST1d64TPseudoWB_fixed};
3548 static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
3549 ARM::VST3q16Pseudo_UPD,
3550 ARM::VST3q32Pseudo_UPD };
3551 static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
3552 ARM::VST3q16oddPseudo_UPD,
3553 ARM::VST3q32oddPseudo_UPD };
3554 SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
3555 return;
3558 case ARMISD::VST4_UPD: {
3559 static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo_UPD,
3560 ARM::VST4d16Pseudo_UPD,
3561 ARM::VST4d32Pseudo_UPD,
3562 ARM::VST1d64QPseudoWB_fixed};
3563 static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
3564 ARM::VST4q16Pseudo_UPD,
3565 ARM::VST4q32Pseudo_UPD };
3566 static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD,
3567 ARM::VST4q16oddPseudo_UPD,
3568 ARM::VST4q32oddPseudo_UPD };
3569 SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
3570 return;
3573 case ARMISD::VST2LN_UPD: {
3574 static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD,
3575 ARM::VST2LNd16Pseudo_UPD,
3576 ARM::VST2LNd32Pseudo_UPD };
3577 static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD,
3578 ARM::VST2LNq32Pseudo_UPD };
3579 SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes);
3580 return;
3583 case ARMISD::VST3LN_UPD: {
3584 static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD,
3585 ARM::VST3LNd16Pseudo_UPD,
3586 ARM::VST3LNd32Pseudo_UPD };
3587 static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD,
3588 ARM::VST3LNq32Pseudo_UPD };
3589 SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes);
3590 return;
3593 case ARMISD::VST4LN_UPD: {
3594 static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD,
3595 ARM::VST4LNd16Pseudo_UPD,
3596 ARM::VST4LNd32Pseudo_UPD };
3597 static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD,
3598 ARM::VST4LNq32Pseudo_UPD };
3599 SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes);
3600 return;
3603 case ISD::INTRINSIC_VOID:
3604 case ISD::INTRINSIC_W_CHAIN: {
3605 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
3606 switch (IntNo) {
3607 default:
3608 break;
3610 case Intrinsic::arm_mrrc:
3611 case Intrinsic::arm_mrrc2: {
3612 SDLoc dl(N);
3613 SDValue Chain = N->getOperand(0);
3614 unsigned Opc;
3616 if (Subtarget->isThumb())
3617 Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::t2MRRC : ARM::t2MRRC2);
3618 else
3619 Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::MRRC : ARM::MRRC2);
3621 SmallVector<SDValue, 5> Ops;
3622 Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(), dl)); /* coproc */
3623 Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(), dl)); /* opc */
3624 Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(4))->getZExtValue(), dl)); /* CRm */
3626 // The mrrc2 instruction in ARM doesn't allow predicates, the top 4 bits of the encoded
3627 // instruction will always be '1111' but it is possible in assembly language to specify
3628 // AL as a predicate to mrrc2 but it doesn't make any difference to the encoded instruction.
3629 if (Opc != ARM::MRRC2) {
3630 Ops.push_back(getAL(CurDAG, dl));
3631 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3634 Ops.push_back(Chain);
3636 // Writes to two registers.
3637 const EVT RetType[] = {MVT::i32, MVT::i32, MVT::Other};
3639 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, RetType, Ops));
3640 return;
3642 case Intrinsic::arm_ldaexd:
3643 case Intrinsic::arm_ldrexd: {
3644 SDLoc dl(N);
3645 SDValue Chain = N->getOperand(0);
3646 SDValue MemAddr = N->getOperand(2);
3647 bool isThumb = Subtarget->isThumb() && Subtarget->hasV8MBaselineOps();
3649 bool IsAcquire = IntNo == Intrinsic::arm_ldaexd;
3650 unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD)
3651 : (IsAcquire ? ARM::LDAEXD : ARM::LDREXD);
3653 // arm_ldrexd returns a i64 value in {i32, i32}
3654 std::vector<EVT> ResTys;
3655 if (isThumb) {
3656 ResTys.push_back(MVT::i32);
3657 ResTys.push_back(MVT::i32);
3658 } else
3659 ResTys.push_back(MVT::Untyped);
3660 ResTys.push_back(MVT::Other);
3662 // Place arguments in the right order.
3663 SDValue Ops[] = {MemAddr, getAL(CurDAG, dl),
3664 CurDAG->getRegister(0, MVT::i32), Chain};
3665 SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
3666 // Transfer memoperands.
3667 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3668 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
3670 // Remap uses.
3671 SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1);
3672 if (!SDValue(N, 0).use_empty()) {
3673 SDValue Result;
3674 if (isThumb)
3675 Result = SDValue(Ld, 0);
3676 else {
3677 SDValue SubRegIdx =
3678 CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
3679 SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
3680 dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
3681 Result = SDValue(ResNode,0);
3683 ReplaceUses(SDValue(N, 0), Result);
3685 if (!SDValue(N, 1).use_empty()) {
3686 SDValue Result;
3687 if (isThumb)
3688 Result = SDValue(Ld, 1);
3689 else {
3690 SDValue SubRegIdx =
3691 CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
3692 SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
3693 dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
3694 Result = SDValue(ResNode,0);
3696 ReplaceUses(SDValue(N, 1), Result);
3698 ReplaceUses(SDValue(N, 2), OutChain);
3699 CurDAG->RemoveDeadNode(N);
3700 return;
3702 case Intrinsic::arm_stlexd:
3703 case Intrinsic::arm_strexd: {
3704 SDLoc dl(N);
3705 SDValue Chain = N->getOperand(0);
3706 SDValue Val0 = N->getOperand(2);
3707 SDValue Val1 = N->getOperand(3);
3708 SDValue MemAddr = N->getOperand(4);
3710 // Store exclusive double return a i32 value which is the return status
3711 // of the issued store.
3712 const EVT ResTys[] = {MVT::i32, MVT::Other};
3714 bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
3715 // Place arguments in the right order.
3716 SmallVector<SDValue, 7> Ops;
3717 if (isThumb) {
3718 Ops.push_back(Val0);
3719 Ops.push_back(Val1);
3720 } else
3721 // arm_strexd uses GPRPair.
3722 Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0));
3723 Ops.push_back(MemAddr);
3724 Ops.push_back(getAL(CurDAG, dl));
3725 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3726 Ops.push_back(Chain);
3728 bool IsRelease = IntNo == Intrinsic::arm_stlexd;
3729 unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD)
3730 : (IsRelease ? ARM::STLEXD : ARM::STREXD);
3732 SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
3733 // Transfer memoperands.
3734 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3735 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
3737 ReplaceNode(N, St);
3738 return;
3741 case Intrinsic::arm_neon_vld1: {
3742 static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
3743 ARM::VLD1d32, ARM::VLD1d64 };
3744 static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
3745 ARM::VLD1q32, ARM::VLD1q64};
3746 SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr);
3747 return;
3750 case Intrinsic::arm_neon_vld1x2: {
3751 static const uint16_t DOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
3752 ARM::VLD1q32, ARM::VLD1q64 };
3753 static const uint16_t QOpcodes[] = { ARM::VLD1d8QPseudo,
3754 ARM::VLD1d16QPseudo,
3755 ARM::VLD1d32QPseudo,
3756 ARM::VLD1d64QPseudo };
3757 SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
3758 return;
3761 case Intrinsic::arm_neon_vld1x3: {
3762 static const uint16_t DOpcodes[] = { ARM::VLD1d8TPseudo,
3763 ARM::VLD1d16TPseudo,
3764 ARM::VLD1d32TPseudo,
3765 ARM::VLD1d64TPseudo };
3766 static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowTPseudo_UPD,
3767 ARM::VLD1q16LowTPseudo_UPD,
3768 ARM::VLD1q32LowTPseudo_UPD,
3769 ARM::VLD1q64LowTPseudo_UPD };
3770 static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighTPseudo,
3771 ARM::VLD1q16HighTPseudo,
3772 ARM::VLD1q32HighTPseudo,
3773 ARM::VLD1q64HighTPseudo };
3774 SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3775 return;
3778 case Intrinsic::arm_neon_vld1x4: {
3779 static const uint16_t DOpcodes[] = { ARM::VLD1d8QPseudo,
3780 ARM::VLD1d16QPseudo,
3781 ARM::VLD1d32QPseudo,
3782 ARM::VLD1d64QPseudo };
3783 static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowQPseudo_UPD,
3784 ARM::VLD1q16LowQPseudo_UPD,
3785 ARM::VLD1q32LowQPseudo_UPD,
3786 ARM::VLD1q64LowQPseudo_UPD };
3787 static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighQPseudo,
3788 ARM::VLD1q16HighQPseudo,
3789 ARM::VLD1q32HighQPseudo,
3790 ARM::VLD1q64HighQPseudo };
3791 SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3792 return;
3795 case Intrinsic::arm_neon_vld2: {
3796 static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
3797 ARM::VLD2d32, ARM::VLD1q64 };
3798 static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
3799 ARM::VLD2q32Pseudo };
3800 SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
3801 return;
3804 case Intrinsic::arm_neon_vld3: {
3805 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo,
3806 ARM::VLD3d16Pseudo,
3807 ARM::VLD3d32Pseudo,
3808 ARM::VLD1d64TPseudo };
3809 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
3810 ARM::VLD3q16Pseudo_UPD,
3811 ARM::VLD3q32Pseudo_UPD };
3812 static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo,
3813 ARM::VLD3q16oddPseudo,
3814 ARM::VLD3q32oddPseudo };
3815 SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3816 return;
3819 case Intrinsic::arm_neon_vld4: {
3820 static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo,
3821 ARM::VLD4d16Pseudo,
3822 ARM::VLD4d32Pseudo,
3823 ARM::VLD1d64QPseudo };
3824 static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
3825 ARM::VLD4q16Pseudo_UPD,
3826 ARM::VLD4q32Pseudo_UPD };
3827 static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo,
3828 ARM::VLD4q16oddPseudo,
3829 ARM::VLD4q32oddPseudo };
3830 SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3831 return;
3834 case Intrinsic::arm_neon_vld2dup: {
3835 static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
3836 ARM::VLD2DUPd32, ARM::VLD1q64 };
3837 static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo,
3838 ARM::VLD2DUPq16EvenPseudo,
3839 ARM::VLD2DUPq32EvenPseudo };
3840 static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudo,
3841 ARM::VLD2DUPq16OddPseudo,
3842 ARM::VLD2DUPq32OddPseudo };
3843 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 2,
3844 DOpcodes, QOpcodes0, QOpcodes1);
3845 return;
3848 case Intrinsic::arm_neon_vld3dup: {
3849 static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo,
3850 ARM::VLD3DUPd16Pseudo,
3851 ARM::VLD3DUPd32Pseudo,
3852 ARM::VLD1d64TPseudo };
3853 static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo,
3854 ARM::VLD3DUPq16EvenPseudo,
3855 ARM::VLD3DUPq32EvenPseudo };
3856 static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo,
3857 ARM::VLD3DUPq16OddPseudo,
3858 ARM::VLD3DUPq32OddPseudo };
3859 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 3,
3860 DOpcodes, QOpcodes0, QOpcodes1);
3861 return;
3864 case Intrinsic::arm_neon_vld4dup: {
3865 static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo,
3866 ARM::VLD4DUPd16Pseudo,
3867 ARM::VLD4DUPd32Pseudo,
3868 ARM::VLD1d64QPseudo };
3869 static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo,
3870 ARM::VLD4DUPq16EvenPseudo,
3871 ARM::VLD4DUPq32EvenPseudo };
3872 static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo,
3873 ARM::VLD4DUPq16OddPseudo,
3874 ARM::VLD4DUPq32OddPseudo };
3875 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 4,
3876 DOpcodes, QOpcodes0, QOpcodes1);
3877 return;
3880 case Intrinsic::arm_neon_vld2lane: {
3881 static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo,
3882 ARM::VLD2LNd16Pseudo,
3883 ARM::VLD2LNd32Pseudo };
3884 static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo,
3885 ARM::VLD2LNq32Pseudo };
3886 SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes);
3887 return;
3890 case Intrinsic::arm_neon_vld3lane: {
3891 static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo,
3892 ARM::VLD3LNd16Pseudo,
3893 ARM::VLD3LNd32Pseudo };
3894 static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo,
3895 ARM::VLD3LNq32Pseudo };
3896 SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes);
3897 return;
3900 case Intrinsic::arm_neon_vld4lane: {
3901 static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo,
3902 ARM::VLD4LNd16Pseudo,
3903 ARM::VLD4LNd32Pseudo };
3904 static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo,
3905 ARM::VLD4LNq32Pseudo };
3906 SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes);
3907 return;
3910 case Intrinsic::arm_neon_vst1: {
3911 static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
3912 ARM::VST1d32, ARM::VST1d64 };
3913 static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
3914 ARM::VST1q32, ARM::VST1q64 };
3915 SelectVST(N, false, 1, DOpcodes, QOpcodes, nullptr);
3916 return;
3919 case Intrinsic::arm_neon_vst1x2: {
3920 static const uint16_t DOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
3921 ARM::VST1q32, ARM::VST1q64 };
3922 static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudo,
3923 ARM::VST1d16QPseudo,
3924 ARM::VST1d32QPseudo,
3925 ARM::VST1d64QPseudo };
3926 SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
3927 return;
3930 case Intrinsic::arm_neon_vst1x3: {
3931 static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudo,
3932 ARM::VST1d16TPseudo,
3933 ARM::VST1d32TPseudo,
3934 ARM::VST1d64TPseudo };
3935 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD,
3936 ARM::VST1q16LowTPseudo_UPD,
3937 ARM::VST1q32LowTPseudo_UPD,
3938 ARM::VST1q64LowTPseudo_UPD };
3939 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo,
3940 ARM::VST1q16HighTPseudo,
3941 ARM::VST1q32HighTPseudo,
3942 ARM::VST1q64HighTPseudo };
3943 SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3944 return;
3947 case Intrinsic::arm_neon_vst1x4: {
3948 static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudo,
3949 ARM::VST1d16QPseudo,
3950 ARM::VST1d32QPseudo,
3951 ARM::VST1d64QPseudo };
3952 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD,
3953 ARM::VST1q16LowQPseudo_UPD,
3954 ARM::VST1q32LowQPseudo_UPD,
3955 ARM::VST1q64LowQPseudo_UPD };
3956 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo,
3957 ARM::VST1q16HighQPseudo,
3958 ARM::VST1q32HighQPseudo,
3959 ARM::VST1q64HighQPseudo };
3960 SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3961 return;
3964 case Intrinsic::arm_neon_vst2: {
3965 static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
3966 ARM::VST2d32, ARM::VST1q64 };
3967 static const uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
3968 ARM::VST2q32Pseudo };
3969 SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
3970 return;
3973 case Intrinsic::arm_neon_vst3: {
3974 static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo,
3975 ARM::VST3d16Pseudo,
3976 ARM::VST3d32Pseudo,
3977 ARM::VST1d64TPseudo };
3978 static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
3979 ARM::VST3q16Pseudo_UPD,
3980 ARM::VST3q32Pseudo_UPD };
3981 static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo,
3982 ARM::VST3q16oddPseudo,
3983 ARM::VST3q32oddPseudo };
3984 SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3985 return;
3988 case Intrinsic::arm_neon_vst4: {
3989 static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo,
3990 ARM::VST4d16Pseudo,
3991 ARM::VST4d32Pseudo,
3992 ARM::VST1d64QPseudo };
3993 static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
3994 ARM::VST4q16Pseudo_UPD,
3995 ARM::VST4q32Pseudo_UPD };
3996 static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo,
3997 ARM::VST4q16oddPseudo,
3998 ARM::VST4q32oddPseudo };
3999 SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
4000 return;
4003 case Intrinsic::arm_neon_vst2lane: {
4004 static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo,
4005 ARM::VST2LNd16Pseudo,
4006 ARM::VST2LNd32Pseudo };
4007 static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo,
4008 ARM::VST2LNq32Pseudo };
4009 SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes);
4010 return;
4013 case Intrinsic::arm_neon_vst3lane: {
4014 static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo,
4015 ARM::VST3LNd16Pseudo,
4016 ARM::VST3LNd32Pseudo };
4017 static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo,
4018 ARM::VST3LNq32Pseudo };
4019 SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes);
4020 return;
4023 case Intrinsic::arm_neon_vst4lane: {
4024 static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo,
4025 ARM::VST4LNd16Pseudo,
4026 ARM::VST4LNd32Pseudo };
4027 static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo,
4028 ARM::VST4LNq32Pseudo };
4029 SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes);
4030 return;
4033 break;
4036 case ISD::ATOMIC_CMP_SWAP:
4037 SelectCMP_SWAP(N);
4038 return;
4041 SelectCode(N);
4044 // Inspect a register string of the form
4045 // cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or
4046 // cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string
4047 // and obtain the integer operands from them, adding these operands to the
4048 // provided vector.
4049 static void getIntOperandsFromRegisterString(StringRef RegString,
4050 SelectionDAG *CurDAG,
4051 const SDLoc &DL,
4052 std::vector<SDValue> &Ops) {
4053 SmallVector<StringRef, 5> Fields;
4054 RegString.split(Fields, ':');
4056 if (Fields.size() > 1) {
4057 bool AllIntFields = true;
4059 for (StringRef Field : Fields) {
4060 // Need to trim out leading 'cp' characters and get the integer field.
4061 unsigned IntField;
4062 AllIntFields &= !Field.trim("CPcp").getAsInteger(10, IntField);
4063 Ops.push_back(CurDAG->getTargetConstant(IntField, DL, MVT::i32));
4066 assert(AllIntFields &&
4067 "Unexpected non-integer value in special register string.");
4071 // Maps a Banked Register string to its mask value. The mask value returned is
4072 // for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register
4073 // mask operand, which expresses which register is to be used, e.g. r8, and in
4074 // which mode it is to be used, e.g. usr. Returns -1 to signify that the string
4075 // was invalid.
4076 static inline int getBankedRegisterMask(StringRef RegString) {
4077 auto TheReg = ARMBankedReg::lookupBankedRegByName(RegString.lower());
4078 if (!TheReg)
4079 return -1;
4080 return TheReg->Encoding;
4083 // The flags here are common to those allowed for apsr in the A class cores and
4084 // those allowed for the special registers in the M class cores. Returns a
4085 // value representing which flags were present, -1 if invalid.
4086 static inline int getMClassFlagsMask(StringRef Flags) {
4087 return StringSwitch<int>(Flags)
4088 .Case("", 0x2) // no flags means nzcvq for psr registers, and 0x2 is
4089 // correct when flags are not permitted
4090 .Case("g", 0x1)
4091 .Case("nzcvq", 0x2)
4092 .Case("nzcvqg", 0x3)
4093 .Default(-1);
4096 // Maps MClass special registers string to its value for use in the
4097 // t2MRS_M/t2MSR_M instruction nodes as the SYSm value operand.
4098 // Returns -1 to signify that the string was invalid.
4099 static int getMClassRegisterMask(StringRef Reg, const ARMSubtarget *Subtarget) {
4100 auto TheReg = ARMSysReg::lookupMClassSysRegByName(Reg);
4101 const FeatureBitset &FeatureBits = Subtarget->getFeatureBits();
4102 if (!TheReg || !TheReg->hasRequiredFeatures(FeatureBits))
4103 return -1;
4104 return (int)(TheReg->Encoding & 0xFFF); // SYSm value
4107 static int getARClassRegisterMask(StringRef Reg, StringRef Flags) {
4108 // The mask operand contains the special register (R Bit) in bit 4, whether
4109 // the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and
4110 // bits 3-0 contains the fields to be accessed in the special register, set by
4111 // the flags provided with the register.
4112 int Mask = 0;
4113 if (Reg == "apsr") {
4114 // The flags permitted for apsr are the same flags that are allowed in
4115 // M class registers. We get the flag value and then shift the flags into
4116 // the correct place to combine with the mask.
4117 Mask = getMClassFlagsMask(Flags);
4118 if (Mask == -1)
4119 return -1;
4120 return Mask << 2;
4123 if (Reg != "cpsr" && Reg != "spsr") {
4124 return -1;
4127 // This is the same as if the flags were "fc"
4128 if (Flags.empty() || Flags == "all")
4129 return Mask | 0x9;
4131 // Inspect the supplied flags string and set the bits in the mask for
4132 // the relevant and valid flags allowed for cpsr and spsr.
4133 for (char Flag : Flags) {
4134 int FlagVal;
4135 switch (Flag) {
4136 case 'c':
4137 FlagVal = 0x1;
4138 break;
4139 case 'x':
4140 FlagVal = 0x2;
4141 break;
4142 case 's':
4143 FlagVal = 0x4;
4144 break;
4145 case 'f':
4146 FlagVal = 0x8;
4147 break;
4148 default:
4149 FlagVal = 0;
4152 // This avoids allowing strings where the same flag bit appears twice.
4153 if (!FlagVal || (Mask & FlagVal))
4154 return -1;
4155 Mask |= FlagVal;
4158 // If the register is spsr then we need to set the R bit.
4159 if (Reg == "spsr")
4160 Mask |= 0x10;
4162 return Mask;
4165 // Lower the read_register intrinsic to ARM specific DAG nodes
4166 // using the supplied metadata string to select the instruction node to use
4167 // and the registers/masks to construct as operands for the node.
4168 bool ARMDAGToDAGISel::tryReadRegister(SDNode *N){
4169 const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
4170 const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
4171 bool IsThumb2 = Subtarget->isThumb2();
4172 SDLoc DL(N);
4174 std::vector<SDValue> Ops;
4175 getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
4177 if (!Ops.empty()) {
4178 // If the special register string was constructed of fields (as defined
4179 // in the ACLE) then need to lower to MRC node (32 bit) or
4180 // MRRC node(64 bit), we can make the distinction based on the number of
4181 // operands we have.
4182 unsigned Opcode;
4183 SmallVector<EVT, 3> ResTypes;
4184 if (Ops.size() == 5){
4185 Opcode = IsThumb2 ? ARM::t2MRC : ARM::MRC;
4186 ResTypes.append({ MVT::i32, MVT::Other });
4187 } else {
4188 assert(Ops.size() == 3 &&
4189 "Invalid number of fields in special register string.");
4190 Opcode = IsThumb2 ? ARM::t2MRRC : ARM::MRRC;
4191 ResTypes.append({ MVT::i32, MVT::i32, MVT::Other });
4194 Ops.push_back(getAL(CurDAG, DL));
4195 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
4196 Ops.push_back(N->getOperand(0));
4197 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, ResTypes, Ops));
4198 return true;
4201 std::string SpecialReg = RegString->getString().lower();
4203 int BankedReg = getBankedRegisterMask(SpecialReg);
4204 if (BankedReg != -1) {
4205 Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32),
4206 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4207 N->getOperand(0) };
4208 ReplaceNode(
4209 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked,
4210 DL, MVT::i32, MVT::Other, Ops));
4211 return true;
4214 // The VFP registers are read by creating SelectionDAG nodes with opcodes
4215 // corresponding to the register that is being read from. So we switch on the
4216 // string to find which opcode we need to use.
4217 unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
4218 .Case("fpscr", ARM::VMRS)
4219 .Case("fpexc", ARM::VMRS_FPEXC)
4220 .Case("fpsid", ARM::VMRS_FPSID)
4221 .Case("mvfr0", ARM::VMRS_MVFR0)
4222 .Case("mvfr1", ARM::VMRS_MVFR1)
4223 .Case("mvfr2", ARM::VMRS_MVFR2)
4224 .Case("fpinst", ARM::VMRS_FPINST)
4225 .Case("fpinst2", ARM::VMRS_FPINST2)
4226 .Default(0);
4228 // If an opcode was found then we can lower the read to a VFP instruction.
4229 if (Opcode) {
4230 if (!Subtarget->hasVFP2Base())
4231 return false;
4232 if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8Base())
4233 return false;
4235 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4236 N->getOperand(0) };
4237 ReplaceNode(N,
4238 CurDAG->getMachineNode(Opcode, DL, MVT::i32, MVT::Other, Ops));
4239 return true;
4242 // If the target is M Class then need to validate that the register string
4243 // is an acceptable value, so check that a mask can be constructed from the
4244 // string.
4245 if (Subtarget->isMClass()) {
4246 int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget);
4247 if (SYSmValue == -1)
4248 return false;
4250 SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
4251 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4252 N->getOperand(0) };
4253 ReplaceNode(
4254 N, CurDAG->getMachineNode(ARM::t2MRS_M, DL, MVT::i32, MVT::Other, Ops));
4255 return true;
4258 // Here we know the target is not M Class so we need to check if it is one
4259 // of the remaining possible values which are apsr, cpsr or spsr.
4260 if (SpecialReg == "apsr" || SpecialReg == "cpsr") {
4261 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4262 N->getOperand(0) };
4263 ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRS_AR : ARM::MRS,
4264 DL, MVT::i32, MVT::Other, Ops));
4265 return true;
4268 if (SpecialReg == "spsr") {
4269 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4270 N->getOperand(0) };
4271 ReplaceNode(
4272 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys, DL,
4273 MVT::i32, MVT::Other, Ops));
4274 return true;
4277 return false;
4280 // Lower the write_register intrinsic to ARM specific DAG nodes
4281 // using the supplied metadata string to select the instruction node to use
4282 // and the registers/masks to use in the nodes
4283 bool ARMDAGToDAGISel::tryWriteRegister(SDNode *N){
4284 const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
4285 const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
4286 bool IsThumb2 = Subtarget->isThumb2();
4287 SDLoc DL(N);
4289 std::vector<SDValue> Ops;
4290 getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
4292 if (!Ops.empty()) {
4293 // If the special register string was constructed of fields (as defined
4294 // in the ACLE) then need to lower to MCR node (32 bit) or
4295 // MCRR node(64 bit), we can make the distinction based on the number of
4296 // operands we have.
4297 unsigned Opcode;
4298 if (Ops.size() == 5) {
4299 Opcode = IsThumb2 ? ARM::t2MCR : ARM::MCR;
4300 Ops.insert(Ops.begin()+2, N->getOperand(2));
4301 } else {
4302 assert(Ops.size() == 3 &&
4303 "Invalid number of fields in special register string.");
4304 Opcode = IsThumb2 ? ARM::t2MCRR : ARM::MCRR;
4305 SDValue WriteValue[] = { N->getOperand(2), N->getOperand(3) };
4306 Ops.insert(Ops.begin()+2, WriteValue, WriteValue+2);
4309 Ops.push_back(getAL(CurDAG, DL));
4310 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
4311 Ops.push_back(N->getOperand(0));
4313 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
4314 return true;
4317 std::string SpecialReg = RegString->getString().lower();
4318 int BankedReg = getBankedRegisterMask(SpecialReg);
4319 if (BankedReg != -1) {
4320 Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), N->getOperand(2),
4321 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4322 N->getOperand(0) };
4323 ReplaceNode(
4324 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked,
4325 DL, MVT::Other, Ops));
4326 return true;
4329 // The VFP registers are written to by creating SelectionDAG nodes with
4330 // opcodes corresponding to the register that is being written. So we switch
4331 // on the string to find which opcode we need to use.
4332 unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
4333 .Case("fpscr", ARM::VMSR)
4334 .Case("fpexc", ARM::VMSR_FPEXC)
4335 .Case("fpsid", ARM::VMSR_FPSID)
4336 .Case("fpinst", ARM::VMSR_FPINST)
4337 .Case("fpinst2", ARM::VMSR_FPINST2)
4338 .Default(0);
4340 if (Opcode) {
4341 if (!Subtarget->hasVFP2Base())
4342 return false;
4343 Ops = { N->getOperand(2), getAL(CurDAG, DL),
4344 CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
4345 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
4346 return true;
4349 std::pair<StringRef, StringRef> Fields;
4350 Fields = StringRef(SpecialReg).rsplit('_');
4351 std::string Reg = Fields.first.str();
4352 StringRef Flags = Fields.second;
4354 // If the target was M Class then need to validate the special register value
4355 // and retrieve the mask for use in the instruction node.
4356 if (Subtarget->isMClass()) {
4357 int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget);
4358 if (SYSmValue == -1)
4359 return false;
4361 SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
4362 N->getOperand(2), getAL(CurDAG, DL),
4363 CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
4364 ReplaceNode(N, CurDAG->getMachineNode(ARM::t2MSR_M, DL, MVT::Other, Ops));
4365 return true;
4368 // We then check to see if a valid mask can be constructed for one of the
4369 // register string values permitted for the A and R class cores. These values
4370 // are apsr, spsr and cpsr; these are also valid on older cores.
4371 int Mask = getARClassRegisterMask(Reg, Flags);
4372 if (Mask != -1) {
4373 Ops = { CurDAG->getTargetConstant(Mask, DL, MVT::i32), N->getOperand(2),
4374 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4375 N->getOperand(0) };
4376 ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSR_AR : ARM::MSR,
4377 DL, MVT::Other, Ops));
4378 return true;
4381 return false;
4384 bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){
4385 std::vector<SDValue> AsmNodeOperands;
4386 unsigned Flag, Kind;
4387 bool Changed = false;
4388 unsigned NumOps = N->getNumOperands();
4390 // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint.
4391 // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require
4392 // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs
4393 // respectively. Since there is no constraint to explicitly specify a
4394 // reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb,
4395 // the 64-bit data may be referred by H, Q, R modifiers, so we still pack
4396 // them into a GPRPair.
4398 SDLoc dl(N);
4399 SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps-1)
4400 : SDValue(nullptr,0);
4402 SmallVector<bool, 8> OpChanged;
4403 // Glue node will be appended late.
4404 for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) {
4405 SDValue op = N->getOperand(i);
4406 AsmNodeOperands.push_back(op);
4408 if (i < InlineAsm::Op_FirstOperand)
4409 continue;
4411 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(i))) {
4412 Flag = C->getZExtValue();
4413 Kind = InlineAsm::getKind(Flag);
4415 else
4416 continue;
4418 // Immediate operands to inline asm in the SelectionDAG are modeled with
4419 // two operands. The first is a constant of value InlineAsm::Kind_Imm, and
4420 // the second is a constant with the value of the immediate. If we get here
4421 // and we have a Kind_Imm, skip the next operand, and continue.
4422 if (Kind == InlineAsm::Kind_Imm) {
4423 SDValue op = N->getOperand(++i);
4424 AsmNodeOperands.push_back(op);
4425 continue;
4428 unsigned NumRegs = InlineAsm::getNumOperandRegisters(Flag);
4429 if (NumRegs)
4430 OpChanged.push_back(false);
4432 unsigned DefIdx = 0;
4433 bool IsTiedToChangedOp = false;
4434 // If it's a use that is tied with a previous def, it has no
4435 // reg class constraint.
4436 if (Changed && InlineAsm::isUseOperandTiedToDef(Flag, DefIdx))
4437 IsTiedToChangedOp = OpChanged[DefIdx];
4439 // Memory operands to inline asm in the SelectionDAG are modeled with two
4440 // operands: a constant of value InlineAsm::Kind_Mem followed by the input
4441 // operand. If we get here and we have a Kind_Mem, skip the next operand (so
4442 // it doesn't get misinterpreted), and continue. We do this here because
4443 // it's important to update the OpChanged array correctly before moving on.
4444 if (Kind == InlineAsm::Kind_Mem) {
4445 SDValue op = N->getOperand(++i);
4446 AsmNodeOperands.push_back(op);
4447 continue;
4450 if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef
4451 && Kind != InlineAsm::Kind_RegDefEarlyClobber)
4452 continue;
4454 unsigned RC;
4455 bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC);
4456 if ((!IsTiedToChangedOp && (!HasRC || RC != ARM::GPRRegClassID))
4457 || NumRegs != 2)
4458 continue;
4460 assert((i+2 < NumOps) && "Invalid number of operands in inline asm");
4461 SDValue V0 = N->getOperand(i+1);
4462 SDValue V1 = N->getOperand(i+2);
4463 unsigned Reg0 = cast<RegisterSDNode>(V0)->getReg();
4464 unsigned Reg1 = cast<RegisterSDNode>(V1)->getReg();
4465 SDValue PairedReg;
4466 MachineRegisterInfo &MRI = MF->getRegInfo();
4468 if (Kind == InlineAsm::Kind_RegDef ||
4469 Kind == InlineAsm::Kind_RegDefEarlyClobber) {
4470 // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to
4471 // the original GPRs.
4473 Register GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
4474 PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
4475 SDValue Chain = SDValue(N,0);
4477 SDNode *GU = N->getGluedUser();
4478 SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped,
4479 Chain.getValue(1));
4481 // Extract values from a GPRPair reg and copy to the original GPR reg.
4482 SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32,
4483 RegCopy);
4484 SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,
4485 RegCopy);
4486 SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0,
4487 RegCopy.getValue(1));
4488 SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1));
4490 // Update the original glue user.
4491 std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1);
4492 Ops.push_back(T1.getValue(1));
4493 CurDAG->UpdateNodeOperands(GU, Ops);
4495 else {
4496 // For Kind == InlineAsm::Kind_RegUse, we first copy two GPRs into a
4497 // GPRPair and then pass the GPRPair to the inline asm.
4498 SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain];
4500 // As REG_SEQ doesn't take RegisterSDNode, we copy them first.
4501 SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32,
4502 Chain.getValue(1));
4503 SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32,
4504 T0.getValue(1));
4505 SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), 0);
4507 // Copy REG_SEQ into a GPRPair-typed VR and replace the original two
4508 // i32 VRs of inline asm with it.
4509 Register GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
4510 PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
4511 Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1));
4513 AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
4514 Glue = Chain.getValue(1);
4517 Changed = true;
4519 if(PairedReg.getNode()) {
4520 OpChanged[OpChanged.size() -1 ] = true;
4521 Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/);
4522 if (IsTiedToChangedOp)
4523 Flag = InlineAsm::getFlagWordForMatchingOp(Flag, DefIdx);
4524 else
4525 Flag = InlineAsm::getFlagWordForRegClass(Flag, ARM::GPRPairRegClassID);
4526 // Replace the current flag.
4527 AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant(
4528 Flag, dl, MVT::i32);
4529 // Add the new register node and skip the original two GPRs.
4530 AsmNodeOperands.push_back(PairedReg);
4531 // Skip the next two GPRs.
4532 i += 2;
4536 if (Glue.getNode())
4537 AsmNodeOperands.push_back(Glue);
4538 if (!Changed)
4539 return false;
4541 SDValue New = CurDAG->getNode(N->getOpcode(), SDLoc(N),
4542 CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands);
4543 New->setNodeId(-1);
4544 ReplaceNode(N, New.getNode());
4545 return true;
4549 bool ARMDAGToDAGISel::
4550 SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
4551 std::vector<SDValue> &OutOps) {
4552 switch(ConstraintID) {
4553 default:
4554 llvm_unreachable("Unexpected asm memory constraint");
4555 case InlineAsm::Constraint_i:
4556 // FIXME: It seems strange that 'i' is needed here since it's supposed to
4557 // be an immediate and not a memory constraint.
4558 LLVM_FALLTHROUGH;
4559 case InlineAsm::Constraint_m:
4560 case InlineAsm::Constraint_o:
4561 case InlineAsm::Constraint_Q:
4562 case InlineAsm::Constraint_Um:
4563 case InlineAsm::Constraint_Un:
4564 case InlineAsm::Constraint_Uq:
4565 case InlineAsm::Constraint_Us:
4566 case InlineAsm::Constraint_Ut:
4567 case InlineAsm::Constraint_Uv:
4568 case InlineAsm::Constraint_Uy:
4569 // Require the address to be in a register. That is safe for all ARM
4570 // variants and it is hard to do anything much smarter without knowing
4571 // how the operand is used.
4572 OutOps.push_back(Op);
4573 return false;
4575 return true;
4578 /// createARMISelDag - This pass converts a legalized DAG into a
4579 /// ARM-specific DAG, ready for instruction scheduling.
4581 FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM,
4582 CodeGenOpt::Level OptLevel) {
4583 return new ARMDAGToDAGISel(TM, OptLevel);