[Alignment][NFC] Use Align with TargetLowering::setPrefLoopAlignment
[llvm-complete.git] / lib / Target / ARM / ARMISelDAGToDAG.cpp
bloba59a57327d1c9cb4e1ce25c1f1f26ffa5926a93e
1 //===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines an instruction selector for the ARM target.
11 //===----------------------------------------------------------------------===//
13 #include "ARM.h"
14 #include "ARMBaseInstrInfo.h"
15 #include "ARMTargetMachine.h"
16 #include "MCTargetDesc/ARMAddressingModes.h"
17 #include "Utils/ARMBaseInfo.h"
18 #include "llvm/ADT/StringSwitch.h"
19 #include "llvm/CodeGen/MachineFrameInfo.h"
20 #include "llvm/CodeGen/MachineFunction.h"
21 #include "llvm/CodeGen/MachineInstrBuilder.h"
22 #include "llvm/CodeGen/MachineRegisterInfo.h"
23 #include "llvm/CodeGen/SelectionDAG.h"
24 #include "llvm/CodeGen/SelectionDAGISel.h"
25 #include "llvm/CodeGen/TargetLowering.h"
26 #include "llvm/IR/CallingConv.h"
27 #include "llvm/IR/Constants.h"
28 #include "llvm/IR/DerivedTypes.h"
29 #include "llvm/IR/Function.h"
30 #include "llvm/IR/Intrinsics.h"
31 #include "llvm/IR/LLVMContext.h"
32 #include "llvm/Support/CommandLine.h"
33 #include "llvm/Support/Debug.h"
34 #include "llvm/Support/ErrorHandling.h"
35 #include "llvm/Target/TargetOptions.h"
37 using namespace llvm;
39 #define DEBUG_TYPE "arm-isel"
41 static cl::opt<bool>
42 DisableShifterOp("disable-shifter-op", cl::Hidden,
43 cl::desc("Disable isel of shifter-op"),
44 cl::init(false));
46 //===--------------------------------------------------------------------===//
47 /// ARMDAGToDAGISel - ARM specific code to select ARM machine
48 /// instructions for SelectionDAG operations.
49 ///
50 namespace {
52 class ARMDAGToDAGISel : public SelectionDAGISel {
53 /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
54 /// make the right decision when generating code for different targets.
55 const ARMSubtarget *Subtarget;
57 public:
58 explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOpt::Level OptLevel)
59 : SelectionDAGISel(tm, OptLevel) {}
61 bool runOnMachineFunction(MachineFunction &MF) override {
62 // Reset the subtarget each time through.
63 Subtarget = &MF.getSubtarget<ARMSubtarget>();
64 SelectionDAGISel::runOnMachineFunction(MF);
65 return true;
68 StringRef getPassName() const override { return "ARM Instruction Selection"; }
70 void PreprocessISelDAG() override;
72 /// getI32Imm - Return a target constant of type i32 with the specified
73 /// value.
74 inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
75 return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
78 void Select(SDNode *N) override;
80 bool hasNoVMLxHazardUse(SDNode *N) const;
81 bool isShifterOpProfitable(const SDValue &Shift,
82 ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt);
83 bool SelectRegShifterOperand(SDValue N, SDValue &A,
84 SDValue &B, SDValue &C,
85 bool CheckProfitability = true);
86 bool SelectImmShifterOperand(SDValue N, SDValue &A,
87 SDValue &B, bool CheckProfitability = true);
88 bool SelectShiftRegShifterOperand(SDValue N, SDValue &A,
89 SDValue &B, SDValue &C) {
90 // Don't apply the profitability check
91 return SelectRegShifterOperand(N, A, B, C, false);
93 bool SelectShiftImmShifterOperand(SDValue N, SDValue &A,
94 SDValue &B) {
95 // Don't apply the profitability check
96 return SelectImmShifterOperand(N, A, B, false);
99 bool SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out);
101 bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
102 bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);
104 bool SelectCMOVPred(SDValue N, SDValue &Pred, SDValue &Reg) {
105 const ConstantSDNode *CN = cast<ConstantSDNode>(N);
106 Pred = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(N), MVT::i32);
107 Reg = CurDAG->getRegister(ARM::CPSR, MVT::i32);
108 return true;
111 bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
112 SDValue &Offset, SDValue &Opc);
113 bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
114 SDValue &Offset, SDValue &Opc);
115 bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
116 SDValue &Offset, SDValue &Opc);
117 bool SelectAddrOffsetNone(SDValue N, SDValue &Base);
118 bool SelectAddrMode3(SDValue N, SDValue &Base,
119 SDValue &Offset, SDValue &Opc);
120 bool SelectAddrMode3Offset(SDNode *Op, SDValue N,
121 SDValue &Offset, SDValue &Opc);
122 bool IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset, bool FP16);
123 bool SelectAddrMode5(SDValue N, SDValue &Base, SDValue &Offset);
124 bool SelectAddrMode5FP16(SDValue N, SDValue &Base, SDValue &Offset);
125 bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align);
126 bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset);
128 bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label);
130 // Thumb Addressing Modes:
131 bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset);
132 bool SelectThumbAddrModeRRSext(SDValue N, SDValue &Base, SDValue &Offset);
133 bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base,
134 SDValue &OffImm);
135 bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
136 SDValue &OffImm);
137 bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
138 SDValue &OffImm);
139 bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
140 SDValue &OffImm);
141 bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm);
143 // Thumb 2 Addressing Modes:
144 bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
145 bool SelectT2AddrModeImm8(SDValue N, SDValue &Base,
146 SDValue &OffImm);
147 bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
148 SDValue &OffImm);
149 template <unsigned Shift>
150 bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm);
151 bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm,
152 unsigned Shift);
153 template <unsigned Shift>
154 bool SelectT2AddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm);
155 bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base,
156 SDValue &OffReg, SDValue &ShImm);
157 bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm);
159 inline bool is_so_imm(unsigned Imm) const {
160 return ARM_AM::getSOImmVal(Imm) != -1;
163 inline bool is_so_imm_not(unsigned Imm) const {
164 return ARM_AM::getSOImmVal(~Imm) != -1;
167 inline bool is_t2_so_imm(unsigned Imm) const {
168 return ARM_AM::getT2SOImmVal(Imm) != -1;
171 inline bool is_t2_so_imm_not(unsigned Imm) const {
172 return ARM_AM::getT2SOImmVal(~Imm) != -1;
175 // Include the pieces autogenerated from the target description.
176 #include "ARMGenDAGISel.inc"
178 private:
179 void transferMemOperands(SDNode *Src, SDNode *Dst);
181 /// Indexed (pre/post inc/dec) load matching code for ARM.
182 bool tryARMIndexedLoad(SDNode *N);
183 bool tryT1IndexedLoad(SDNode *N);
184 bool tryT2IndexedLoad(SDNode *N);
185 bool tryMVEIndexedLoad(SDNode *N);
187 /// SelectVLD - Select NEON load intrinsics. NumVecs should be
188 /// 1, 2, 3 or 4. The opcode arrays specify the instructions used for
189 /// loads of D registers and even subregs and odd subregs of Q registers.
190 /// For NumVecs <= 2, QOpcodes1 is not used.
191 void SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
192 const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
193 const uint16_t *QOpcodes1);
195 /// SelectVST - Select NEON store intrinsics. NumVecs should
196 /// be 1, 2, 3 or 4. The opcode arrays specify the instructions used for
197 /// stores of D registers and even subregs and odd subregs of Q registers.
198 /// For NumVecs <= 2, QOpcodes1 is not used.
199 void SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
200 const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
201 const uint16_t *QOpcodes1);
203 /// SelectVLDSTLane - Select NEON load/store lane intrinsics. NumVecs should
204 /// be 2, 3 or 4. The opcode arrays specify the instructions used for
205 /// load/store of D registers and Q registers.
206 void SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
207 unsigned NumVecs, const uint16_t *DOpcodes,
208 const uint16_t *QOpcodes);
210 /// SelectVLDDup - Select NEON load-duplicate intrinsics. NumVecs
211 /// should be 1, 2, 3 or 4. The opcode array specifies the instructions used
212 /// for loading D registers.
213 void SelectVLDDup(SDNode *N, bool IsIntrinsic, bool isUpdating,
214 unsigned NumVecs, const uint16_t *DOpcodes,
215 const uint16_t *QOpcodes0 = nullptr,
216 const uint16_t *QOpcodes1 = nullptr);
218 /// Try to select SBFX/UBFX instructions for ARM.
219 bool tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned);
221 // Select special operations if node forms integer ABS pattern
222 bool tryABSOp(SDNode *N);
224 bool tryReadRegister(SDNode *N);
225 bool tryWriteRegister(SDNode *N);
227 bool tryInlineAsm(SDNode *N);
229 void SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI);
231 void SelectCMP_SWAP(SDNode *N);
233 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
234 /// inline asm expressions.
235 bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
236 std::vector<SDValue> &OutOps) override;
238 // Form pairs of consecutive R, S, D, or Q registers.
239 SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1);
240 SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1);
241 SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1);
242 SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1);
244 // Form sequences of 4 consecutive S, D, or Q registers.
245 SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
246 SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
247 SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
249 // Get the alignment operand for a NEON VLD or VST instruction.
250 SDValue GetVLDSTAlign(SDValue Align, const SDLoc &dl, unsigned NumVecs,
251 bool is64BitVector);
253 /// Checks if N is a multiplication by a constant where we can extract out a
254 /// power of two from the constant so that it can be used in a shift, but only
255 /// if it simplifies the materialization of the constant. Returns true if it
256 /// is, and assigns to PowerOfTwo the power of two that should be extracted
257 /// out and to NewMulConst the new constant to be multiplied by.
258 bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift,
259 unsigned &PowerOfTwo, SDValue &NewMulConst) const;
261 /// Replace N with M in CurDAG, in a way that also ensures that M gets
262 /// selected when N would have been selected.
263 void replaceDAGValue(const SDValue &N, SDValue M);
267 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant
268 /// operand. If so Imm will receive the 32-bit value.
269 static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
270 if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
271 Imm = cast<ConstantSDNode>(N)->getZExtValue();
272 return true;
274 return false;
277 // isInt32Immediate - This method tests to see if a constant operand.
278 // If so Imm will receive the 32 bit value.
279 static bool isInt32Immediate(SDValue N, unsigned &Imm) {
280 return isInt32Immediate(N.getNode(), Imm);
283 // isOpcWithIntImmediate - This method tests to see if the node is a specific
284 // opcode and that it has a immediate integer right operand.
285 // If so Imm will receive the 32 bit value.
286 static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
287 return N->getOpcode() == Opc &&
288 isInt32Immediate(N->getOperand(1).getNode(), Imm);
291 /// Check whether a particular node is a constant value representable as
292 /// (N * Scale) where (N in [\p RangeMin, \p RangeMax).
294 /// \param ScaledConstant [out] - On success, the pre-scaled constant value.
295 static bool isScaledConstantInRange(SDValue Node, int Scale,
296 int RangeMin, int RangeMax,
297 int &ScaledConstant) {
298 assert(Scale > 0 && "Invalid scale!");
300 // Check that this is a constant.
301 const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node);
302 if (!C)
303 return false;
305 ScaledConstant = (int) C->getZExtValue();
306 if ((ScaledConstant % Scale) != 0)
307 return false;
309 ScaledConstant /= Scale;
310 return ScaledConstant >= RangeMin && ScaledConstant < RangeMax;
313 void ARMDAGToDAGISel::PreprocessISelDAG() {
314 if (!Subtarget->hasV6T2Ops())
315 return;
317 bool isThumb2 = Subtarget->isThumb();
318 for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
319 E = CurDAG->allnodes_end(); I != E; ) {
320 SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues.
322 if (N->getOpcode() != ISD::ADD)
323 continue;
325 // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with
326 // leading zeros, followed by consecutive set bits, followed by 1 or 2
327 // trailing zeros, e.g. 1020.
328 // Transform the expression to
329 // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number
330 // of trailing zeros of c2. The left shift would be folded as an shifter
331 // operand of 'add' and the 'and' and 'srl' would become a bits extraction
332 // node (UBFX).
334 SDValue N0 = N->getOperand(0);
335 SDValue N1 = N->getOperand(1);
336 unsigned And_imm = 0;
337 if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) {
338 if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm))
339 std::swap(N0, N1);
341 if (!And_imm)
342 continue;
344 // Check if the AND mask is an immediate of the form: 000.....1111111100
345 unsigned TZ = countTrailingZeros(And_imm);
346 if (TZ != 1 && TZ != 2)
347 // Be conservative here. Shifter operands aren't always free. e.g. On
348 // Swift, left shifter operand of 1 / 2 for free but others are not.
349 // e.g.
350 // ubfx r3, r1, #16, #8
351 // ldr.w r3, [r0, r3, lsl #2]
352 // vs.
353 // mov.w r9, #1020
354 // and.w r2, r9, r1, lsr #14
355 // ldr r2, [r0, r2]
356 continue;
357 And_imm >>= TZ;
358 if (And_imm & (And_imm + 1))
359 continue;
361 // Look for (and (srl X, c1), c2).
362 SDValue Srl = N1.getOperand(0);
363 unsigned Srl_imm = 0;
364 if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) ||
365 (Srl_imm <= 2))
366 continue;
368 // Make sure first operand is not a shifter operand which would prevent
369 // folding of the left shift.
370 SDValue CPTmp0;
371 SDValue CPTmp1;
372 SDValue CPTmp2;
373 if (isThumb2) {
374 if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1))
375 continue;
376 } else {
377 if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) ||
378 SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2))
379 continue;
382 // Now make the transformation.
383 Srl = CurDAG->getNode(ISD::SRL, SDLoc(Srl), MVT::i32,
384 Srl.getOperand(0),
385 CurDAG->getConstant(Srl_imm + TZ, SDLoc(Srl),
386 MVT::i32));
387 N1 = CurDAG->getNode(ISD::AND, SDLoc(N1), MVT::i32,
388 Srl,
389 CurDAG->getConstant(And_imm, SDLoc(Srl), MVT::i32));
390 N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32,
391 N1, CurDAG->getConstant(TZ, SDLoc(Srl), MVT::i32));
392 CurDAG->UpdateNodeOperands(N, N0, N1);
396 /// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS
397 /// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at
398 /// least on current ARM implementations) which should be avoidded.
399 bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const {
400 if (OptLevel == CodeGenOpt::None)
401 return true;
403 if (!Subtarget->hasVMLxHazards())
404 return true;
406 if (!N->hasOneUse())
407 return false;
409 SDNode *Use = *N->use_begin();
410 if (Use->getOpcode() == ISD::CopyToReg)
411 return true;
412 if (Use->isMachineOpcode()) {
413 const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>(
414 CurDAG->getSubtarget().getInstrInfo());
416 const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode());
417 if (MCID.mayStore())
418 return true;
419 unsigned Opcode = MCID.getOpcode();
420 if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
421 return true;
422 // vmlx feeding into another vmlx. We actually want to unfold
423 // the use later in the MLxExpansion pass. e.g.
424 // vmla
425 // vmla (stall 8 cycles)
427 // vmul (5 cycles)
428 // vadd (5 cycles)
429 // vmla
430 // This adds up to about 18 - 19 cycles.
432 // vmla
433 // vmul (stall 4 cycles)
434 // vadd adds up to about 14 cycles.
435 return TII->isFpMLxInstruction(Opcode);
438 return false;
441 bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
442 ARM_AM::ShiftOpc ShOpcVal,
443 unsigned ShAmt) {
444 if (!Subtarget->isLikeA9() && !Subtarget->isSwift())
445 return true;
446 if (Shift.hasOneUse())
447 return true;
448 // R << 2 is free.
449 return ShOpcVal == ARM_AM::lsl &&
450 (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1));
453 bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N,
454 unsigned MaxShift,
455 unsigned &PowerOfTwo,
456 SDValue &NewMulConst) const {
457 assert(N.getOpcode() == ISD::MUL);
458 assert(MaxShift > 0);
460 // If the multiply is used in more than one place then changing the constant
461 // will make other uses incorrect, so don't.
462 if (!N.hasOneUse()) return false;
463 // Check if the multiply is by a constant
464 ConstantSDNode *MulConst = dyn_cast<ConstantSDNode>(N.getOperand(1));
465 if (!MulConst) return false;
466 // If the constant is used in more than one place then modifying it will mean
467 // we need to materialize two constants instead of one, which is a bad idea.
468 if (!MulConst->hasOneUse()) return false;
469 unsigned MulConstVal = MulConst->getZExtValue();
470 if (MulConstVal == 0) return false;
472 // Find the largest power of 2 that MulConstVal is a multiple of
473 PowerOfTwo = MaxShift;
474 while ((MulConstVal % (1 << PowerOfTwo)) != 0) {
475 --PowerOfTwo;
476 if (PowerOfTwo == 0) return false;
479 // Only optimise if the new cost is better
480 unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo);
481 NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc(N), MVT::i32);
482 unsigned OldCost = ConstantMaterializationCost(MulConstVal, Subtarget);
483 unsigned NewCost = ConstantMaterializationCost(NewMulConstVal, Subtarget);
484 return NewCost < OldCost;
487 void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) {
488 CurDAG->RepositionNode(N.getNode()->getIterator(), M.getNode());
489 ReplaceUses(N, M);
492 bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
493 SDValue &BaseReg,
494 SDValue &Opc,
495 bool CheckProfitability) {
496 if (DisableShifterOp)
497 return false;
499 // If N is a multiply-by-constant and it's profitable to extract a shift and
500 // use it in a shifted operand do so.
501 if (N.getOpcode() == ISD::MUL) {
502 unsigned PowerOfTwo = 0;
503 SDValue NewMulConst;
504 if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) {
505 HandleSDNode Handle(N);
506 SDLoc Loc(N);
507 replaceDAGValue(N.getOperand(1), NewMulConst);
508 BaseReg = Handle.getValue();
509 Opc = CurDAG->getTargetConstant(
510 ARM_AM::getSORegOpc(ARM_AM::lsl, PowerOfTwo), Loc, MVT::i32);
511 return true;
515 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
517 // Don't match base register only case. That is matched to a separate
518 // lower complexity pattern with explicit register operand.
519 if (ShOpcVal == ARM_AM::no_shift) return false;
521 BaseReg = N.getOperand(0);
522 unsigned ShImmVal = 0;
523 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
524 if (!RHS) return false;
525 ShImmVal = RHS->getZExtValue() & 31;
526 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
527 SDLoc(N), MVT::i32);
528 return true;
531 bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N,
532 SDValue &BaseReg,
533 SDValue &ShReg,
534 SDValue &Opc,
535 bool CheckProfitability) {
536 if (DisableShifterOp)
537 return false;
539 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
541 // Don't match base register only case. That is matched to a separate
542 // lower complexity pattern with explicit register operand.
543 if (ShOpcVal == ARM_AM::no_shift) return false;
545 BaseReg = N.getOperand(0);
546 unsigned ShImmVal = 0;
547 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
548 if (RHS) return false;
550 ShReg = N.getOperand(1);
551 if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal))
552 return false;
553 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
554 SDLoc(N), MVT::i32);
555 return true;
558 // Determine whether an ISD::OR's operands are suitable to turn the operation
559 // into an addition, which often has more compact encodings.
560 bool ARMDAGToDAGISel::SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out) {
561 assert(Parent->getOpcode() == ISD::OR && "unexpected parent");
562 Out = N;
563 return CurDAG->haveNoCommonBitsSet(N, Parent->getOperand(1));
567 bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
568 SDValue &Base,
569 SDValue &OffImm) {
570 // Match simple R + imm12 operands.
572 // Base only.
573 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
574 !CurDAG->isBaseWithConstantOffset(N)) {
575 if (N.getOpcode() == ISD::FrameIndex) {
576 // Match frame index.
577 int FI = cast<FrameIndexSDNode>(N)->getIndex();
578 Base = CurDAG->getTargetFrameIndex(
579 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
580 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
581 return true;
584 if (N.getOpcode() == ARMISD::Wrapper &&
585 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
586 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
587 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
588 Base = N.getOperand(0);
589 } else
590 Base = N;
591 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
592 return true;
595 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
596 int RHSC = (int)RHS->getSExtValue();
597 if (N.getOpcode() == ISD::SUB)
598 RHSC = -RHSC;
600 if (RHSC > -0x1000 && RHSC < 0x1000) { // 12 bits
601 Base = N.getOperand(0);
602 if (Base.getOpcode() == ISD::FrameIndex) {
603 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
604 Base = CurDAG->getTargetFrameIndex(
605 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
607 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
608 return true;
612 // Base only.
613 Base = N;
614 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
615 return true;
620 bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
621 SDValue &Opc) {
622 if (N.getOpcode() == ISD::MUL &&
623 ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) {
624 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
625 // X * [3,5,9] -> X + X * [2,4,8] etc.
626 int RHSC = (int)RHS->getZExtValue();
627 if (RHSC & 1) {
628 RHSC = RHSC & ~1;
629 ARM_AM::AddrOpc AddSub = ARM_AM::add;
630 if (RHSC < 0) {
631 AddSub = ARM_AM::sub;
632 RHSC = - RHSC;
634 if (isPowerOf2_32(RHSC)) {
635 unsigned ShAmt = Log2_32(RHSC);
636 Base = Offset = N.getOperand(0);
637 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
638 ARM_AM::lsl),
639 SDLoc(N), MVT::i32);
640 return true;
646 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
647 // ISD::OR that is equivalent to an ISD::ADD.
648 !CurDAG->isBaseWithConstantOffset(N))
649 return false;
651 // Leave simple R +/- imm12 operands for LDRi12
652 if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
653 int RHSC;
654 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
655 -0x1000+1, 0x1000, RHSC)) // 12 bits.
656 return false;
659 // Otherwise this is R +/- [possibly shifted] R.
660 ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add;
661 ARM_AM::ShiftOpc ShOpcVal =
662 ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
663 unsigned ShAmt = 0;
665 Base = N.getOperand(0);
666 Offset = N.getOperand(1);
668 if (ShOpcVal != ARM_AM::no_shift) {
669 // Check to see if the RHS of the shift is a constant, if not, we can't fold
670 // it.
671 if (ConstantSDNode *Sh =
672 dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
673 ShAmt = Sh->getZExtValue();
674 if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
675 Offset = N.getOperand(1).getOperand(0);
676 else {
677 ShAmt = 0;
678 ShOpcVal = ARM_AM::no_shift;
680 } else {
681 ShOpcVal = ARM_AM::no_shift;
685 // Try matching (R shl C) + (R).
686 if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
687 !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
688 N.getOperand(0).hasOneUse())) {
689 ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
690 if (ShOpcVal != ARM_AM::no_shift) {
691 // Check to see if the RHS of the shift is a constant, if not, we can't
692 // fold it.
693 if (ConstantSDNode *Sh =
694 dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
695 ShAmt = Sh->getZExtValue();
696 if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
697 Offset = N.getOperand(0).getOperand(0);
698 Base = N.getOperand(1);
699 } else {
700 ShAmt = 0;
701 ShOpcVal = ARM_AM::no_shift;
703 } else {
704 ShOpcVal = ARM_AM::no_shift;
709 // If Offset is a multiply-by-constant and it's profitable to extract a shift
710 // and use it in a shifted operand do so.
711 if (Offset.getOpcode() == ISD::MUL && N.hasOneUse()) {
712 unsigned PowerOfTwo = 0;
713 SDValue NewMulConst;
714 if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) {
715 HandleSDNode Handle(Offset);
716 replaceDAGValue(Offset.getOperand(1), NewMulConst);
717 Offset = Handle.getValue();
718 ShAmt = PowerOfTwo;
719 ShOpcVal = ARM_AM::lsl;
723 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
724 SDLoc(N), MVT::i32);
725 return true;
728 bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
729 SDValue &Offset, SDValue &Opc) {
730 unsigned Opcode = Op->getOpcode();
731 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
732 ? cast<LoadSDNode>(Op)->getAddressingMode()
733 : cast<StoreSDNode>(Op)->getAddressingMode();
734 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
735 ? ARM_AM::add : ARM_AM::sub;
736 int Val;
737 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val))
738 return false;
740 Offset = N;
741 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
742 unsigned ShAmt = 0;
743 if (ShOpcVal != ARM_AM::no_shift) {
744 // Check to see if the RHS of the shift is a constant, if not, we can't fold
745 // it.
746 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
747 ShAmt = Sh->getZExtValue();
748 if (isShifterOpProfitable(N, ShOpcVal, ShAmt))
749 Offset = N.getOperand(0);
750 else {
751 ShAmt = 0;
752 ShOpcVal = ARM_AM::no_shift;
754 } else {
755 ShOpcVal = ARM_AM::no_shift;
759 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
760 SDLoc(N), MVT::i32);
761 return true;
764 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
765 SDValue &Offset, SDValue &Opc) {
766 unsigned Opcode = Op->getOpcode();
767 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
768 ? cast<LoadSDNode>(Op)->getAddressingMode()
769 : cast<StoreSDNode>(Op)->getAddressingMode();
770 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
771 ? ARM_AM::add : ARM_AM::sub;
772 int Val;
773 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
774 if (AddSub == ARM_AM::sub) Val *= -1;
775 Offset = CurDAG->getRegister(0, MVT::i32);
776 Opc = CurDAG->getTargetConstant(Val, SDLoc(Op), MVT::i32);
777 return true;
780 return false;
784 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
785 SDValue &Offset, SDValue &Opc) {
786 unsigned Opcode = Op->getOpcode();
787 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
788 ? cast<LoadSDNode>(Op)->getAddressingMode()
789 : cast<StoreSDNode>(Op)->getAddressingMode();
790 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
791 ? ARM_AM::add : ARM_AM::sub;
792 int Val;
793 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
794 Offset = CurDAG->getRegister(0, MVT::i32);
795 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val,
796 ARM_AM::no_shift),
797 SDLoc(Op), MVT::i32);
798 return true;
801 return false;
804 bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) {
805 Base = N;
806 return true;
809 bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
810 SDValue &Base, SDValue &Offset,
811 SDValue &Opc) {
812 if (N.getOpcode() == ISD::SUB) {
813 // X - C is canonicalize to X + -C, no need to handle it here.
814 Base = N.getOperand(0);
815 Offset = N.getOperand(1);
816 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0), SDLoc(N),
817 MVT::i32);
818 return true;
821 if (!CurDAG->isBaseWithConstantOffset(N)) {
822 Base = N;
823 if (N.getOpcode() == ISD::FrameIndex) {
824 int FI = cast<FrameIndexSDNode>(N)->getIndex();
825 Base = CurDAG->getTargetFrameIndex(
826 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
828 Offset = CurDAG->getRegister(0, MVT::i32);
829 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
830 MVT::i32);
831 return true;
834 // If the RHS is +/- imm8, fold into addr mode.
835 int RHSC;
836 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
837 -256 + 1, 256, RHSC)) { // 8 bits.
838 Base = N.getOperand(0);
839 if (Base.getOpcode() == ISD::FrameIndex) {
840 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
841 Base = CurDAG->getTargetFrameIndex(
842 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
844 Offset = CurDAG->getRegister(0, MVT::i32);
846 ARM_AM::AddrOpc AddSub = ARM_AM::add;
847 if (RHSC < 0) {
848 AddSub = ARM_AM::sub;
849 RHSC = -RHSC;
851 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC), SDLoc(N),
852 MVT::i32);
853 return true;
856 Base = N.getOperand(0);
857 Offset = N.getOperand(1);
858 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
859 MVT::i32);
860 return true;
863 bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N,
864 SDValue &Offset, SDValue &Opc) {
865 unsigned Opcode = Op->getOpcode();
866 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
867 ? cast<LoadSDNode>(Op)->getAddressingMode()
868 : cast<StoreSDNode>(Op)->getAddressingMode();
869 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
870 ? ARM_AM::add : ARM_AM::sub;
871 int Val;
872 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits.
873 Offset = CurDAG->getRegister(0, MVT::i32);
874 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), SDLoc(Op),
875 MVT::i32);
876 return true;
879 Offset = N;
880 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), SDLoc(Op),
881 MVT::i32);
882 return true;
885 bool ARMDAGToDAGISel::IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset,
886 bool FP16) {
887 if (!CurDAG->isBaseWithConstantOffset(N)) {
888 Base = N;
889 if (N.getOpcode() == ISD::FrameIndex) {
890 int FI = cast<FrameIndexSDNode>(N)->getIndex();
891 Base = CurDAG->getTargetFrameIndex(
892 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
893 } else if (N.getOpcode() == ARMISD::Wrapper &&
894 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
895 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
896 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
897 Base = N.getOperand(0);
899 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
900 SDLoc(N), MVT::i32);
901 return true;
904 // If the RHS is +/- imm8, fold into addr mode.
905 int RHSC;
906 const int Scale = FP16 ? 2 : 4;
908 if (isScaledConstantInRange(N.getOperand(1), Scale, -255, 256, RHSC)) {
909 Base = N.getOperand(0);
910 if (Base.getOpcode() == ISD::FrameIndex) {
911 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
912 Base = CurDAG->getTargetFrameIndex(
913 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
916 ARM_AM::AddrOpc AddSub = ARM_AM::add;
917 if (RHSC < 0) {
918 AddSub = ARM_AM::sub;
919 RHSC = -RHSC;
922 if (FP16)
923 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(AddSub, RHSC),
924 SDLoc(N), MVT::i32);
925 else
926 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC),
927 SDLoc(N), MVT::i32);
929 return true;
932 Base = N;
934 if (FP16)
935 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(ARM_AM::add, 0),
936 SDLoc(N), MVT::i32);
937 else
938 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
939 SDLoc(N), MVT::i32);
941 return true;
944 bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
945 SDValue &Base, SDValue &Offset) {
946 return IsAddressingMode5(N, Base, Offset, /*FP16=*/ false);
949 bool ARMDAGToDAGISel::SelectAddrMode5FP16(SDValue N,
950 SDValue &Base, SDValue &Offset) {
951 return IsAddressingMode5(N, Base, Offset, /*FP16=*/ true);
954 bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
955 SDValue &Align) {
956 Addr = N;
958 unsigned Alignment = 0;
960 MemSDNode *MemN = cast<MemSDNode>(Parent);
962 if (isa<LSBaseSDNode>(MemN) ||
963 ((MemN->getOpcode() == ARMISD::VST1_UPD ||
964 MemN->getOpcode() == ARMISD::VLD1_UPD) &&
965 MemN->getConstantOperandVal(MemN->getNumOperands() - 1) == 1)) {
966 // This case occurs only for VLD1-lane/dup and VST1-lane instructions.
967 // The maximum alignment is equal to the memory size being referenced.
968 unsigned MMOAlign = MemN->getAlignment();
969 unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8;
970 if (MMOAlign >= MemSize && MemSize > 1)
971 Alignment = MemSize;
972 } else {
973 // All other uses of addrmode6 are for intrinsics. For now just record
974 // the raw alignment value; it will be refined later based on the legal
975 // alignment operands for the intrinsic.
976 Alignment = MemN->getAlignment();
979 Align = CurDAG->getTargetConstant(Alignment, SDLoc(N), MVT::i32);
980 return true;
983 bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N,
984 SDValue &Offset) {
985 LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op);
986 ISD::MemIndexedMode AM = LdSt->getAddressingMode();
987 if (AM != ISD::POST_INC)
988 return false;
989 Offset = N;
990 if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) {
991 if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits())
992 Offset = CurDAG->getRegister(0, MVT::i32);
994 return true;
997 bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N,
998 SDValue &Offset, SDValue &Label) {
999 if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
1000 Offset = N.getOperand(0);
1001 SDValue N1 = N.getOperand(1);
1002 Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
1003 SDLoc(N), MVT::i32);
1004 return true;
1007 return false;
1011 //===----------------------------------------------------------------------===//
1012 // Thumb Addressing Modes
1013 //===----------------------------------------------------------------------===//
1015 static bool shouldUseZeroOffsetLdSt(SDValue N) {
1016 // Negative numbers are difficult to materialise in thumb1. If we are
1017 // selecting the add of a negative, instead try to select ri with a zero
1018 // offset, so create the add node directly which will become a sub.
1019 if (N.getOpcode() != ISD::ADD)
1020 return false;
1022 // Look for an imm which is not legal for ld/st, but is legal for sub.
1023 if (auto C = dyn_cast<ConstantSDNode>(N.getOperand(1)))
1024 return C->getSExtValue() < 0 && C->getSExtValue() >= -255;
1026 return false;
1029 bool ARMDAGToDAGISel::SelectThumbAddrModeRRSext(SDValue N, SDValue &Base,
1030 SDValue &Offset) {
1031 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) {
1032 ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N);
1033 if (!NC || !NC->isNullValue())
1034 return false;
1036 Base = Offset = N;
1037 return true;
1040 Base = N.getOperand(0);
1041 Offset = N.getOperand(1);
1042 return true;
1045 bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N, SDValue &Base,
1046 SDValue &Offset) {
1047 if (shouldUseZeroOffsetLdSt(N))
1048 return false; // Select ri instead
1049 return SelectThumbAddrModeRRSext(N, Base, Offset);
1052 bool
1053 ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
1054 SDValue &Base, SDValue &OffImm) {
1055 if (shouldUseZeroOffsetLdSt(N)) {
1056 Base = N;
1057 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1058 return true;
1061 if (!CurDAG->isBaseWithConstantOffset(N)) {
1062 if (N.getOpcode() == ISD::ADD) {
1063 return false; // We want to select register offset instead
1064 } else if (N.getOpcode() == ARMISD::Wrapper &&
1065 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1066 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1067 N.getOperand(0).getOpcode() != ISD::TargetConstantPool &&
1068 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1069 Base = N.getOperand(0);
1070 } else {
1071 Base = N;
1074 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1075 return true;
1078 // If the RHS is + imm5 * scale, fold into addr mode.
1079 int RHSC;
1080 if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) {
1081 Base = N.getOperand(0);
1082 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1083 return true;
1086 // Offset is too large, so use register offset instead.
1087 return false;
1090 bool
1091 ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
1092 SDValue &OffImm) {
1093 return SelectThumbAddrModeImm5S(N, 4, Base, OffImm);
1096 bool
1097 ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
1098 SDValue &OffImm) {
1099 return SelectThumbAddrModeImm5S(N, 2, Base, OffImm);
1102 bool
1103 ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
1104 SDValue &OffImm) {
1105 return SelectThumbAddrModeImm5S(N, 1, Base, OffImm);
1108 bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
1109 SDValue &Base, SDValue &OffImm) {
1110 if (N.getOpcode() == ISD::FrameIndex) {
1111 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1112 // Only multiples of 4 are allowed for the offset, so the frame object
1113 // alignment must be at least 4.
1114 MachineFrameInfo &MFI = MF->getFrameInfo();
1115 if (MFI.getObjectAlignment(FI) < 4)
1116 MFI.setObjectAlignment(FI, 4);
1117 Base = CurDAG->getTargetFrameIndex(
1118 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1119 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1120 return true;
1123 if (!CurDAG->isBaseWithConstantOffset(N))
1124 return false;
1126 if (N.getOperand(0).getOpcode() == ISD::FrameIndex) {
1127 // If the RHS is + imm8 * scale, fold into addr mode.
1128 int RHSC;
1129 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) {
1130 Base = N.getOperand(0);
1131 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1132 // Make sure the offset is inside the object, or we might fail to
1133 // allocate an emergency spill slot. (An out-of-range access is UB, but
1134 // it could show up anyway.)
1135 MachineFrameInfo &MFI = MF->getFrameInfo();
1136 if (RHSC * 4 < MFI.getObjectSize(FI)) {
1137 // For LHS+RHS to result in an offset that's a multiple of 4 the object
1138 // indexed by the LHS must be 4-byte aligned.
1139 if (!MFI.isFixedObjectIndex(FI) && MFI.getObjectAlignment(FI) < 4)
1140 MFI.setObjectAlignment(FI, 4);
1141 if (MFI.getObjectAlignment(FI) >= 4) {
1142 Base = CurDAG->getTargetFrameIndex(
1143 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1144 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1145 return true;
1151 return false;
1155 //===----------------------------------------------------------------------===//
1156 // Thumb 2 Addressing Modes
1157 //===----------------------------------------------------------------------===//
1160 bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
1161 SDValue &Base, SDValue &OffImm) {
1162 // Match simple R + imm12 operands.
1164 // Base only.
1165 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1166 !CurDAG->isBaseWithConstantOffset(N)) {
1167 if (N.getOpcode() == ISD::FrameIndex) {
1168 // Match frame index.
1169 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1170 Base = CurDAG->getTargetFrameIndex(
1171 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1172 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1173 return true;
1176 if (N.getOpcode() == ARMISD::Wrapper &&
1177 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1178 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1179 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1180 Base = N.getOperand(0);
1181 if (Base.getOpcode() == ISD::TargetConstantPool)
1182 return false; // We want to select t2LDRpci instead.
1183 } else
1184 Base = N;
1185 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1186 return true;
1189 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1190 if (SelectT2AddrModeImm8(N, Base, OffImm))
1191 // Let t2LDRi8 handle (R - imm8).
1192 return false;
1194 int RHSC = (int)RHS->getZExtValue();
1195 if (N.getOpcode() == ISD::SUB)
1196 RHSC = -RHSC;
1198 if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned)
1199 Base = N.getOperand(0);
1200 if (Base.getOpcode() == ISD::FrameIndex) {
1201 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1202 Base = CurDAG->getTargetFrameIndex(
1203 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1205 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1206 return true;
1210 // Base only.
1211 Base = N;
1212 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1213 return true;
1216 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
1217 SDValue &Base, SDValue &OffImm) {
1218 // Match simple R - imm8 operands.
1219 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1220 !CurDAG->isBaseWithConstantOffset(N))
1221 return false;
1223 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1224 int RHSC = (int)RHS->getSExtValue();
1225 if (N.getOpcode() == ISD::SUB)
1226 RHSC = -RHSC;
1228 if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative)
1229 Base = N.getOperand(0);
1230 if (Base.getOpcode() == ISD::FrameIndex) {
1231 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1232 Base = CurDAG->getTargetFrameIndex(
1233 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1235 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1236 return true;
1240 return false;
1243 bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
1244 SDValue &OffImm){
1245 unsigned Opcode = Op->getOpcode();
1246 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
1247 ? cast<LoadSDNode>(Op)->getAddressingMode()
1248 : cast<StoreSDNode>(Op)->getAddressingMode();
1249 int RHSC;
1250 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits.
1251 OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
1252 ? CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32)
1253 : CurDAG->getTargetConstant(-RHSC, SDLoc(N), MVT::i32);
1254 return true;
1257 return false;
1260 template <unsigned Shift>
1261 bool ARMDAGToDAGISel::SelectT2AddrModeImm7(SDValue N, SDValue &Base,
1262 SDValue &OffImm) {
1263 if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) {
1264 int RHSC;
1265 if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -0x7f, 0x80,
1266 RHSC)) {
1267 Base = N.getOperand(0);
1268 if (Base.getOpcode() == ISD::FrameIndex) {
1269 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1270 Base = CurDAG->getTargetFrameIndex(
1271 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1274 if (N.getOpcode() == ISD::SUB)
1275 RHSC = -RHSC;
1276 OffImm =
1277 CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32);
1278 return true;
1282 // Base only.
1283 Base = N;
1284 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1285 return true;
1288 template <unsigned Shift>
1289 bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N,
1290 SDValue &OffImm) {
1291 return SelectT2AddrModeImm7Offset(Op, N, OffImm, Shift);
1294 bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N,
1295 SDValue &OffImm,
1296 unsigned Shift) {
1297 unsigned Opcode = Op->getOpcode();
1298 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
1299 ? cast<LoadSDNode>(Op)->getAddressingMode()
1300 : cast<StoreSDNode>(Op)->getAddressingMode();
1301 int RHSC;
1302 if (isScaledConstantInRange(N, 1 << Shift, 0, 0x80, RHSC)) { // 7 bits.
1303 OffImm =
1304 ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
1305 ? CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32)
1306 : CurDAG->getTargetConstant(-RHSC * (1 << Shift), SDLoc(N),
1307 MVT::i32);
1308 return true;
1310 return false;
1313 bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
1314 SDValue &Base,
1315 SDValue &OffReg, SDValue &ShImm) {
1316 // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12.
1317 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N))
1318 return false;
1320 // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8.
1321 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1322 int RHSC = (int)RHS->getZExtValue();
1323 if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned)
1324 return false;
1325 else if (RHSC < 0 && RHSC >= -255) // 8 bits
1326 return false;
1329 // Look for (R + R) or (R + (R << [1,2,3])).
1330 unsigned ShAmt = 0;
1331 Base = N.getOperand(0);
1332 OffReg = N.getOperand(1);
1334 // Swap if it is ((R << c) + R).
1335 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg.getOpcode());
1336 if (ShOpcVal != ARM_AM::lsl) {
1337 ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode());
1338 if (ShOpcVal == ARM_AM::lsl)
1339 std::swap(Base, OffReg);
1342 if (ShOpcVal == ARM_AM::lsl) {
1343 // Check to see if the RHS of the shift is a constant, if not, we can't fold
1344 // it.
1345 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) {
1346 ShAmt = Sh->getZExtValue();
1347 if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt))
1348 OffReg = OffReg.getOperand(0);
1349 else {
1350 ShAmt = 0;
1355 // If OffReg is a multiply-by-constant and it's profitable to extract a shift
1356 // and use it in a shifted operand do so.
1357 if (OffReg.getOpcode() == ISD::MUL && N.hasOneUse()) {
1358 unsigned PowerOfTwo = 0;
1359 SDValue NewMulConst;
1360 if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) {
1361 HandleSDNode Handle(OffReg);
1362 replaceDAGValue(OffReg.getOperand(1), NewMulConst);
1363 OffReg = Handle.getValue();
1364 ShAmt = PowerOfTwo;
1368 ShImm = CurDAG->getTargetConstant(ShAmt, SDLoc(N), MVT::i32);
1370 return true;
1373 bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base,
1374 SDValue &OffImm) {
1375 // This *must* succeed since it's used for the irreplaceable ldrex and strex
1376 // instructions.
1377 Base = N;
1378 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1380 if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N))
1381 return true;
1383 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
1384 if (!RHS)
1385 return true;
1387 uint32_t RHSC = (int)RHS->getZExtValue();
1388 if (RHSC > 1020 || RHSC % 4 != 0)
1389 return true;
1391 Base = N.getOperand(0);
1392 if (Base.getOpcode() == ISD::FrameIndex) {
1393 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1394 Base = CurDAG->getTargetFrameIndex(
1395 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1398 OffImm = CurDAG->getTargetConstant(RHSC/4, SDLoc(N), MVT::i32);
1399 return true;
1402 //===--------------------------------------------------------------------===//
1404 /// getAL - Returns a ARMCC::AL immediate node.
1405 static inline SDValue getAL(SelectionDAG *CurDAG, const SDLoc &dl) {
1406 return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32);
1409 void ARMDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
1410 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
1411 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
1414 bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) {
1415 LoadSDNode *LD = cast<LoadSDNode>(N);
1416 ISD::MemIndexedMode AM = LD->getAddressingMode();
1417 if (AM == ISD::UNINDEXED)
1418 return false;
1420 EVT LoadedVT = LD->getMemoryVT();
1421 SDValue Offset, AMOpc;
1422 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1423 unsigned Opcode = 0;
1424 bool Match = false;
1425 if (LoadedVT == MVT::i32 && isPre &&
1426 SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1427 Opcode = ARM::LDR_PRE_IMM;
1428 Match = true;
1429 } else if (LoadedVT == MVT::i32 && !isPre &&
1430 SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1431 Opcode = ARM::LDR_POST_IMM;
1432 Match = true;
1433 } else if (LoadedVT == MVT::i32 &&
1434 SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1435 Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG;
1436 Match = true;
1438 } else if (LoadedVT == MVT::i16 &&
1439 SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1440 Match = true;
1441 Opcode = (LD->getExtensionType() == ISD::SEXTLOAD)
1442 ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST)
1443 : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST);
1444 } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) {
1445 if (LD->getExtensionType() == ISD::SEXTLOAD) {
1446 if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1447 Match = true;
1448 Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST;
1450 } else {
1451 if (isPre &&
1452 SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1453 Match = true;
1454 Opcode = ARM::LDRB_PRE_IMM;
1455 } else if (!isPre &&
1456 SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1457 Match = true;
1458 Opcode = ARM::LDRB_POST_IMM;
1459 } else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1460 Match = true;
1461 Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG;
1466 if (Match) {
1467 if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) {
1468 SDValue Chain = LD->getChain();
1469 SDValue Base = LD->getBasePtr();
1470 SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, SDLoc(N)),
1471 CurDAG->getRegister(0, MVT::i32), Chain };
1472 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1473 MVT::Other, Ops);
1474 transferMemOperands(N, New);
1475 ReplaceNode(N, New);
1476 return true;
1477 } else {
1478 SDValue Chain = LD->getChain();
1479 SDValue Base = LD->getBasePtr();
1480 SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, SDLoc(N)),
1481 CurDAG->getRegister(0, MVT::i32), Chain };
1482 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1483 MVT::Other, Ops);
1484 transferMemOperands(N, New);
1485 ReplaceNode(N, New);
1486 return true;
1490 return false;
1493 bool ARMDAGToDAGISel::tryT1IndexedLoad(SDNode *N) {
1494 LoadSDNode *LD = cast<LoadSDNode>(N);
1495 EVT LoadedVT = LD->getMemoryVT();
1496 ISD::MemIndexedMode AM = LD->getAddressingMode();
1497 if (AM != ISD::POST_INC || LD->getExtensionType() != ISD::NON_EXTLOAD ||
1498 LoadedVT.getSimpleVT().SimpleTy != MVT::i32)
1499 return false;
1501 auto *COffs = dyn_cast<ConstantSDNode>(LD->getOffset());
1502 if (!COffs || COffs->getZExtValue() != 4)
1503 return false;
1505 // A T1 post-indexed load is just a single register LDM: LDM r0!, {r1}.
1506 // The encoding of LDM is not how the rest of ISel expects a post-inc load to
1507 // look however, so we use a pseudo here and switch it for a tLDMIA_UPD after
1508 // ISel.
1509 SDValue Chain = LD->getChain();
1510 SDValue Base = LD->getBasePtr();
1511 SDValue Ops[]= { Base, getAL(CurDAG, SDLoc(N)),
1512 CurDAG->getRegister(0, MVT::i32), Chain };
1513 SDNode *New = CurDAG->getMachineNode(ARM::tLDR_postidx, SDLoc(N), MVT::i32,
1514 MVT::i32, MVT::Other, Ops);
1515 transferMemOperands(N, New);
1516 ReplaceNode(N, New);
1517 return true;
1520 bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) {
1521 LoadSDNode *LD = cast<LoadSDNode>(N);
1522 ISD::MemIndexedMode AM = LD->getAddressingMode();
1523 if (AM == ISD::UNINDEXED)
1524 return false;
1526 EVT LoadedVT = LD->getMemoryVT();
1527 bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1528 SDValue Offset;
1529 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1530 unsigned Opcode = 0;
1531 bool Match = false;
1532 if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) {
1533 switch (LoadedVT.getSimpleVT().SimpleTy) {
1534 case MVT::i32:
1535 Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST;
1536 break;
1537 case MVT::i16:
1538 if (isSExtLd)
1539 Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST;
1540 else
1541 Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST;
1542 break;
1543 case MVT::i8:
1544 case MVT::i1:
1545 if (isSExtLd)
1546 Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST;
1547 else
1548 Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST;
1549 break;
1550 default:
1551 return false;
1553 Match = true;
1556 if (Match) {
1557 SDValue Chain = LD->getChain();
1558 SDValue Base = LD->getBasePtr();
1559 SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)),
1560 CurDAG->getRegister(0, MVT::i32), Chain };
1561 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1562 MVT::Other, Ops);
1563 transferMemOperands(N, New);
1564 ReplaceNode(N, New);
1565 return true;
1568 return false;
1571 bool ARMDAGToDAGISel::tryMVEIndexedLoad(SDNode *N) {
1572 LoadSDNode *LD = cast<LoadSDNode>(N);
1573 ISD::MemIndexedMode AM = LD->getAddressingMode();
1574 if (AM == ISD::UNINDEXED)
1575 return false;
1576 EVT LoadedVT = LD->getMemoryVT();
1577 if (!LoadedVT.isVector())
1578 return false;
1579 bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1580 SDValue Offset;
1581 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1582 unsigned Opcode = 0;
1583 unsigned Align = LD->getAlignment();
1584 bool IsLE = Subtarget->isLittle();
1586 if (Align >= 2 && LoadedVT == MVT::v4i16 &&
1587 SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 1)) {
1588 if (isSExtLd)
1589 Opcode = isPre ? ARM::MVE_VLDRHS32_pre : ARM::MVE_VLDRHS32_post;
1590 else
1591 Opcode = isPre ? ARM::MVE_VLDRHU32_pre : ARM::MVE_VLDRHU32_post;
1592 } else if (LoadedVT == MVT::v8i8 &&
1593 SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 0)) {
1594 if (isSExtLd)
1595 Opcode = isPre ? ARM::MVE_VLDRBS16_pre : ARM::MVE_VLDRBS16_post;
1596 else
1597 Opcode = isPre ? ARM::MVE_VLDRBU16_pre : ARM::MVE_VLDRBU16_post;
1598 } else if (LoadedVT == MVT::v4i8 &&
1599 SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 0)) {
1600 if (isSExtLd)
1601 Opcode = isPre ? ARM::MVE_VLDRBS32_pre : ARM::MVE_VLDRBS32_post;
1602 else
1603 Opcode = isPre ? ARM::MVE_VLDRBU32_pre : ARM::MVE_VLDRBU32_post;
1604 } else if (Align >= 4 &&
1605 (IsLE || LoadedVT == MVT::v4i32 || LoadedVT == MVT::v4f32) &&
1606 SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 2))
1607 Opcode = isPre ? ARM::MVE_VLDRWU32_pre : ARM::MVE_VLDRWU32_post;
1608 else if (Align >= 2 &&
1609 (IsLE || LoadedVT == MVT::v8i16 || LoadedVT == MVT::v8f16) &&
1610 SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 1))
1611 Opcode = isPre ? ARM::MVE_VLDRHU16_pre : ARM::MVE_VLDRHU16_post;
1612 else if ((IsLE || LoadedVT == MVT::v16i8) &&
1613 SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 0))
1614 Opcode = isPre ? ARM::MVE_VLDRBU8_pre : ARM::MVE_VLDRBU8_post;
1615 else
1616 return false;
1618 SDValue Chain = LD->getChain();
1619 SDValue Base = LD->getBasePtr();
1620 SDValue Ops[] = {Base, Offset,
1621 CurDAG->getTargetConstant(ARMVCC::None, SDLoc(N), MVT::i32),
1622 CurDAG->getRegister(0, MVT::i32), Chain};
1623 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), LD->getValueType(0),
1624 MVT::i32, MVT::Other, Ops);
1625 transferMemOperands(N, New);
1626 ReplaceUses(SDValue(N, 0), SDValue(New, 1));
1627 ReplaceUses(SDValue(N, 1), SDValue(New, 0));
1628 ReplaceUses(SDValue(N, 2), SDValue(New, 2));
1629 CurDAG->RemoveDeadNode(N);
1630 return true;
1633 /// Form a GPRPair pseudo register from a pair of GPR regs.
1634 SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) {
1635 SDLoc dl(V0.getNode());
1636 SDValue RegClass =
1637 CurDAG->getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
1638 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
1639 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
1640 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1641 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1644 /// Form a D register from a pair of S registers.
1645 SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1646 SDLoc dl(V0.getNode());
1647 SDValue RegClass =
1648 CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, dl, MVT::i32);
1649 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1650 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1651 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1652 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1655 /// Form a quad register from a pair of D registers.
1656 SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1657 SDLoc dl(V0.getNode());
1658 SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, dl,
1659 MVT::i32);
1660 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1661 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1662 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1663 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1666 /// Form 4 consecutive D registers from a pair of Q registers.
1667 SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1668 SDLoc dl(V0.getNode());
1669 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1670 MVT::i32);
1671 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1672 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1673 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1674 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1677 /// Form 4 consecutive S registers.
1678 SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1,
1679 SDValue V2, SDValue V3) {
1680 SDLoc dl(V0.getNode());
1681 SDValue RegClass =
1682 CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, dl, MVT::i32);
1683 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1684 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1685 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, dl, MVT::i32);
1686 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, dl, MVT::i32);
1687 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1688 V2, SubReg2, V3, SubReg3 };
1689 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1692 /// Form 4 consecutive D registers.
1693 SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1,
1694 SDValue V2, SDValue V3) {
1695 SDLoc dl(V0.getNode());
1696 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1697 MVT::i32);
1698 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1699 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1700 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, dl, MVT::i32);
1701 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, dl, MVT::i32);
1702 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1703 V2, SubReg2, V3, SubReg3 };
1704 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1707 /// Form 4 consecutive Q registers.
1708 SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1,
1709 SDValue V2, SDValue V3) {
1710 SDLoc dl(V0.getNode());
1711 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, dl,
1712 MVT::i32);
1713 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1714 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1715 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, dl, MVT::i32);
1716 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, dl, MVT::i32);
1717 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1718 V2, SubReg2, V3, SubReg3 };
1719 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1722 /// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
1723 /// of a NEON VLD or VST instruction. The supported values depend on the
1724 /// number of registers being loaded.
1725 SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, const SDLoc &dl,
1726 unsigned NumVecs, bool is64BitVector) {
1727 unsigned NumRegs = NumVecs;
1728 if (!is64BitVector && NumVecs < 3)
1729 NumRegs *= 2;
1731 unsigned Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
1732 if (Alignment >= 32 && NumRegs == 4)
1733 Alignment = 32;
1734 else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4))
1735 Alignment = 16;
1736 else if (Alignment >= 8)
1737 Alignment = 8;
1738 else
1739 Alignment = 0;
1741 return CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
1744 static bool isVLDfixed(unsigned Opc)
1746 switch (Opc) {
1747 default: return false;
1748 case ARM::VLD1d8wb_fixed : return true;
1749 case ARM::VLD1d16wb_fixed : return true;
1750 case ARM::VLD1d64Qwb_fixed : return true;
1751 case ARM::VLD1d32wb_fixed : return true;
1752 case ARM::VLD1d64wb_fixed : return true;
1753 case ARM::VLD1d64TPseudoWB_fixed : return true;
1754 case ARM::VLD1d64QPseudoWB_fixed : return true;
1755 case ARM::VLD1q8wb_fixed : return true;
1756 case ARM::VLD1q16wb_fixed : return true;
1757 case ARM::VLD1q32wb_fixed : return true;
1758 case ARM::VLD1q64wb_fixed : return true;
1759 case ARM::VLD1DUPd8wb_fixed : return true;
1760 case ARM::VLD1DUPd16wb_fixed : return true;
1761 case ARM::VLD1DUPd32wb_fixed : return true;
1762 case ARM::VLD1DUPq8wb_fixed : return true;
1763 case ARM::VLD1DUPq16wb_fixed : return true;
1764 case ARM::VLD1DUPq32wb_fixed : return true;
1765 case ARM::VLD2d8wb_fixed : return true;
1766 case ARM::VLD2d16wb_fixed : return true;
1767 case ARM::VLD2d32wb_fixed : return true;
1768 case ARM::VLD2q8PseudoWB_fixed : return true;
1769 case ARM::VLD2q16PseudoWB_fixed : return true;
1770 case ARM::VLD2q32PseudoWB_fixed : return true;
1771 case ARM::VLD2DUPd8wb_fixed : return true;
1772 case ARM::VLD2DUPd16wb_fixed : return true;
1773 case ARM::VLD2DUPd32wb_fixed : return true;
1777 static bool isVSTfixed(unsigned Opc)
1779 switch (Opc) {
1780 default: return false;
1781 case ARM::VST1d8wb_fixed : return true;
1782 case ARM::VST1d16wb_fixed : return true;
1783 case ARM::VST1d32wb_fixed : return true;
1784 case ARM::VST1d64wb_fixed : return true;
1785 case ARM::VST1q8wb_fixed : return true;
1786 case ARM::VST1q16wb_fixed : return true;
1787 case ARM::VST1q32wb_fixed : return true;
1788 case ARM::VST1q64wb_fixed : return true;
1789 case ARM::VST1d64TPseudoWB_fixed : return true;
1790 case ARM::VST1d64QPseudoWB_fixed : return true;
1791 case ARM::VST2d8wb_fixed : return true;
1792 case ARM::VST2d16wb_fixed : return true;
1793 case ARM::VST2d32wb_fixed : return true;
1794 case ARM::VST2q8PseudoWB_fixed : return true;
1795 case ARM::VST2q16PseudoWB_fixed : return true;
1796 case ARM::VST2q32PseudoWB_fixed : return true;
1800 // Get the register stride update opcode of a VLD/VST instruction that
1801 // is otherwise equivalent to the given fixed stride updating instruction.
1802 static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
1803 assert((isVLDfixed(Opc) || isVSTfixed(Opc))
1804 && "Incorrect fixed stride updating instruction.");
1805 switch (Opc) {
1806 default: break;
1807 case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register;
1808 case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register;
1809 case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register;
1810 case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register;
1811 case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register;
1812 case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register;
1813 case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register;
1814 case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register;
1815 case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register;
1816 case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register;
1817 case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register;
1818 case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register;
1819 case ARM::VLD1DUPd8wb_fixed : return ARM::VLD1DUPd8wb_register;
1820 case ARM::VLD1DUPd16wb_fixed : return ARM::VLD1DUPd16wb_register;
1821 case ARM::VLD1DUPd32wb_fixed : return ARM::VLD1DUPd32wb_register;
1822 case ARM::VLD1DUPq8wb_fixed : return ARM::VLD1DUPq8wb_register;
1823 case ARM::VLD1DUPq16wb_fixed : return ARM::VLD1DUPq16wb_register;
1824 case ARM::VLD1DUPq32wb_fixed : return ARM::VLD1DUPq32wb_register;
1826 case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register;
1827 case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register;
1828 case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register;
1829 case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register;
1830 case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register;
1831 case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register;
1832 case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register;
1833 case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register;
1834 case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register;
1835 case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register;
1837 case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register;
1838 case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register;
1839 case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register;
1840 case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register;
1841 case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register;
1842 case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register;
1844 case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register;
1845 case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register;
1846 case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register;
1847 case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register;
1848 case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register;
1849 case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register;
1851 case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register;
1852 case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register;
1853 case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register;
1855 return Opc; // If not one we handle, return it unchanged.
1858 /// Returns true if the given increment is a Constant known to be equal to the
1859 /// access size performed by a NEON load/store. This means the "[rN]!" form can
1860 /// be used.
1861 static bool isPerfectIncrement(SDValue Inc, EVT VecTy, unsigned NumVecs) {
1862 auto C = dyn_cast<ConstantSDNode>(Inc);
1863 return C && C->getZExtValue() == VecTy.getSizeInBits() / 8 * NumVecs;
1866 void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
1867 const uint16_t *DOpcodes,
1868 const uint16_t *QOpcodes0,
1869 const uint16_t *QOpcodes1) {
1870 assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
1871 SDLoc dl(N);
1873 SDValue MemAddr, Align;
1874 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
1875 // nodes are not intrinsics.
1876 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
1877 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
1878 return;
1880 SDValue Chain = N->getOperand(0);
1881 EVT VT = N->getValueType(0);
1882 bool is64BitVector = VT.is64BitVector();
1883 Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
1885 unsigned OpcodeIndex;
1886 switch (VT.getSimpleVT().SimpleTy) {
1887 default: llvm_unreachable("unhandled vld type");
1888 // Double-register operations:
1889 case MVT::v8i8: OpcodeIndex = 0; break;
1890 case MVT::v4f16:
1891 case MVT::v4i16: OpcodeIndex = 1; break;
1892 case MVT::v2f32:
1893 case MVT::v2i32: OpcodeIndex = 2; break;
1894 case MVT::v1i64: OpcodeIndex = 3; break;
1895 // Quad-register operations:
1896 case MVT::v16i8: OpcodeIndex = 0; break;
1897 case MVT::v8f16:
1898 case MVT::v8i16: OpcodeIndex = 1; break;
1899 case MVT::v4f32:
1900 case MVT::v4i32: OpcodeIndex = 2; break;
1901 case MVT::v2f64:
1902 case MVT::v2i64: OpcodeIndex = 3; break;
1905 EVT ResTy;
1906 if (NumVecs == 1)
1907 ResTy = VT;
1908 else {
1909 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
1910 if (!is64BitVector)
1911 ResTyElts *= 2;
1912 ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
1914 std::vector<EVT> ResTys;
1915 ResTys.push_back(ResTy);
1916 if (isUpdating)
1917 ResTys.push_back(MVT::i32);
1918 ResTys.push_back(MVT::Other);
1920 SDValue Pred = getAL(CurDAG, dl);
1921 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
1922 SDNode *VLd;
1923 SmallVector<SDValue, 7> Ops;
1925 // Double registers and VLD1/VLD2 quad registers are directly supported.
1926 if (is64BitVector || NumVecs <= 2) {
1927 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
1928 QOpcodes0[OpcodeIndex]);
1929 Ops.push_back(MemAddr);
1930 Ops.push_back(Align);
1931 if (isUpdating) {
1932 SDValue Inc = N->getOperand(AddrOpIdx + 1);
1933 bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
1934 if (!IsImmUpdate) {
1935 // We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
1936 // check for the opcode rather than the number of vector elements.
1937 if (isVLDfixed(Opc))
1938 Opc = getVLDSTRegisterUpdateOpcode(Opc);
1939 Ops.push_back(Inc);
1940 // VLD1/VLD2 fixed increment does not need Reg0 so only include it in
1941 // the operands if not such an opcode.
1942 } else if (!isVLDfixed(Opc))
1943 Ops.push_back(Reg0);
1945 Ops.push_back(Pred);
1946 Ops.push_back(Reg0);
1947 Ops.push_back(Chain);
1948 VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1950 } else {
1951 // Otherwise, quad registers are loaded with two separate instructions,
1952 // where one loads the even registers and the other loads the odd registers.
1953 EVT AddrTy = MemAddr.getValueType();
1955 // Load the even subregs. This is always an updating load, so that it
1956 // provides the address to the second load for the odd subregs.
1957 SDValue ImplDef =
1958 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
1959 const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
1960 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
1961 ResTy, AddrTy, MVT::Other, OpsA);
1962 Chain = SDValue(VLdA, 2);
1964 // Load the odd subregs.
1965 Ops.push_back(SDValue(VLdA, 1));
1966 Ops.push_back(Align);
1967 if (isUpdating) {
1968 SDValue Inc = N->getOperand(AddrOpIdx + 1);
1969 assert(isa<ConstantSDNode>(Inc.getNode()) &&
1970 "only constant post-increment update allowed for VLD3/4");
1971 (void)Inc;
1972 Ops.push_back(Reg0);
1974 Ops.push_back(SDValue(VLdA, 0));
1975 Ops.push_back(Pred);
1976 Ops.push_back(Reg0);
1977 Ops.push_back(Chain);
1978 VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops);
1981 // Transfer memoperands.
1982 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1983 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLd), {MemOp});
1985 if (NumVecs == 1) {
1986 ReplaceNode(N, VLd);
1987 return;
1990 // Extract out the subregisters.
1991 SDValue SuperReg = SDValue(VLd, 0);
1992 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
1993 ARM::qsub_3 == ARM::qsub_0 + 3,
1994 "Unexpected subreg numbering");
1995 unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0);
1996 for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
1997 ReplaceUses(SDValue(N, Vec),
1998 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
1999 ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
2000 if (isUpdating)
2001 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2));
2002 CurDAG->RemoveDeadNode(N);
2005 void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
2006 const uint16_t *DOpcodes,
2007 const uint16_t *QOpcodes0,
2008 const uint16_t *QOpcodes1) {
2009 assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
2010 SDLoc dl(N);
2012 SDValue MemAddr, Align;
2013 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
2014 // nodes are not intrinsics.
2015 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2016 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2017 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2018 return;
2020 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2022 SDValue Chain = N->getOperand(0);
2023 EVT VT = N->getOperand(Vec0Idx).getValueType();
2024 bool is64BitVector = VT.is64BitVector();
2025 Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
2027 unsigned OpcodeIndex;
2028 switch (VT.getSimpleVT().SimpleTy) {
2029 default: llvm_unreachable("unhandled vst type");
2030 // Double-register operations:
2031 case MVT::v8i8: OpcodeIndex = 0; break;
2032 case MVT::v4f16:
2033 case MVT::v4i16: OpcodeIndex = 1; break;
2034 case MVT::v2f32:
2035 case MVT::v2i32: OpcodeIndex = 2; break;
2036 case MVT::v1i64: OpcodeIndex = 3; break;
2037 // Quad-register operations:
2038 case MVT::v16i8: OpcodeIndex = 0; break;
2039 case MVT::v8f16:
2040 case MVT::v8i16: OpcodeIndex = 1; break;
2041 case MVT::v4f32:
2042 case MVT::v4i32: OpcodeIndex = 2; break;
2043 case MVT::v2f64:
2044 case MVT::v2i64: OpcodeIndex = 3; break;
2047 std::vector<EVT> ResTys;
2048 if (isUpdating)
2049 ResTys.push_back(MVT::i32);
2050 ResTys.push_back(MVT::Other);
2052 SDValue Pred = getAL(CurDAG, dl);
2053 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2054 SmallVector<SDValue, 7> Ops;
2056 // Double registers and VST1/VST2 quad registers are directly supported.
2057 if (is64BitVector || NumVecs <= 2) {
2058 SDValue SrcReg;
2059 if (NumVecs == 1) {
2060 SrcReg = N->getOperand(Vec0Idx);
2061 } else if (is64BitVector) {
2062 // Form a REG_SEQUENCE to force register allocation.
2063 SDValue V0 = N->getOperand(Vec0Idx + 0);
2064 SDValue V1 = N->getOperand(Vec0Idx + 1);
2065 if (NumVecs == 2)
2066 SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2067 else {
2068 SDValue V2 = N->getOperand(Vec0Idx + 2);
2069 // If it's a vst3, form a quad D-register and leave the last part as
2070 // an undef.
2071 SDValue V3 = (NumVecs == 3)
2072 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
2073 : N->getOperand(Vec0Idx + 3);
2074 SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2076 } else {
2077 // Form a QQ register.
2078 SDValue Q0 = N->getOperand(Vec0Idx);
2079 SDValue Q1 = N->getOperand(Vec0Idx + 1);
2080 SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0);
2083 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2084 QOpcodes0[OpcodeIndex]);
2085 Ops.push_back(MemAddr);
2086 Ops.push_back(Align);
2087 if (isUpdating) {
2088 SDValue Inc = N->getOperand(AddrOpIdx + 1);
2089 bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
2090 if (!IsImmUpdate) {
2091 // We use a VST1 for v1i64 even if the pseudo says VST2/3/4, so
2092 // check for the opcode rather than the number of vector elements.
2093 if (isVSTfixed(Opc))
2094 Opc = getVLDSTRegisterUpdateOpcode(Opc);
2095 Ops.push_back(Inc);
2097 // VST1/VST2 fixed increment does not need Reg0 so only include it in
2098 // the operands if not such an opcode.
2099 else if (!isVSTfixed(Opc))
2100 Ops.push_back(Reg0);
2102 Ops.push_back(SrcReg);
2103 Ops.push_back(Pred);
2104 Ops.push_back(Reg0);
2105 Ops.push_back(Chain);
2106 SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2108 // Transfer memoperands.
2109 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VSt), {MemOp});
2111 ReplaceNode(N, VSt);
2112 return;
2115 // Otherwise, quad registers are stored with two separate instructions,
2116 // where one stores the even registers and the other stores the odd registers.
2118 // Form the QQQQ REG_SEQUENCE.
2119 SDValue V0 = N->getOperand(Vec0Idx + 0);
2120 SDValue V1 = N->getOperand(Vec0Idx + 1);
2121 SDValue V2 = N->getOperand(Vec0Idx + 2);
2122 SDValue V3 = (NumVecs == 3)
2123 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2124 : N->getOperand(Vec0Idx + 3);
2125 SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2127 // Store the even D registers. This is always an updating store, so that it
2128 // provides the address to the second store for the odd subregs.
2129 const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain };
2130 SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
2131 MemAddr.getValueType(),
2132 MVT::Other, OpsA);
2133 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStA), {MemOp});
2134 Chain = SDValue(VStA, 1);
2136 // Store the odd D registers.
2137 Ops.push_back(SDValue(VStA, 0));
2138 Ops.push_back(Align);
2139 if (isUpdating) {
2140 SDValue Inc = N->getOperand(AddrOpIdx + 1);
2141 assert(isa<ConstantSDNode>(Inc.getNode()) &&
2142 "only constant post-increment update allowed for VST3/4");
2143 (void)Inc;
2144 Ops.push_back(Reg0);
2146 Ops.push_back(RegSeq);
2147 Ops.push_back(Pred);
2148 Ops.push_back(Reg0);
2149 Ops.push_back(Chain);
2150 SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
2151 Ops);
2152 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStB), {MemOp});
2153 ReplaceNode(N, VStB);
2156 void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
2157 unsigned NumVecs,
2158 const uint16_t *DOpcodes,
2159 const uint16_t *QOpcodes) {
2160 assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
2161 SDLoc dl(N);
2163 SDValue MemAddr, Align;
2164 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
2165 // nodes are not intrinsics.
2166 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2167 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2168 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2169 return;
2171 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2173 SDValue Chain = N->getOperand(0);
2174 unsigned Lane =
2175 cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue();
2176 EVT VT = N->getOperand(Vec0Idx).getValueType();
2177 bool is64BitVector = VT.is64BitVector();
2179 unsigned Alignment = 0;
2180 if (NumVecs != 3) {
2181 Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
2182 unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2183 if (Alignment > NumBytes)
2184 Alignment = NumBytes;
2185 if (Alignment < 8 && Alignment < NumBytes)
2186 Alignment = 0;
2187 // Alignment must be a power of two; make sure of that.
2188 Alignment = (Alignment & -Alignment);
2189 if (Alignment == 1)
2190 Alignment = 0;
2192 Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2194 unsigned OpcodeIndex;
2195 switch (VT.getSimpleVT().SimpleTy) {
2196 default: llvm_unreachable("unhandled vld/vst lane type");
2197 // Double-register operations:
2198 case MVT::v8i8: OpcodeIndex = 0; break;
2199 case MVT::v4f16:
2200 case MVT::v4i16: OpcodeIndex = 1; break;
2201 case MVT::v2f32:
2202 case MVT::v2i32: OpcodeIndex = 2; break;
2203 // Quad-register operations:
2204 case MVT::v8f16:
2205 case MVT::v8i16: OpcodeIndex = 0; break;
2206 case MVT::v4f32:
2207 case MVT::v4i32: OpcodeIndex = 1; break;
2210 std::vector<EVT> ResTys;
2211 if (IsLoad) {
2212 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2213 if (!is64BitVector)
2214 ResTyElts *= 2;
2215 ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(),
2216 MVT::i64, ResTyElts));
2218 if (isUpdating)
2219 ResTys.push_back(MVT::i32);
2220 ResTys.push_back(MVT::Other);
2222 SDValue Pred = getAL(CurDAG, dl);
2223 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2225 SmallVector<SDValue, 8> Ops;
2226 Ops.push_back(MemAddr);
2227 Ops.push_back(Align);
2228 if (isUpdating) {
2229 SDValue Inc = N->getOperand(AddrOpIdx + 1);
2230 bool IsImmUpdate =
2231 isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
2232 Ops.push_back(IsImmUpdate ? Reg0 : Inc);
2235 SDValue SuperReg;
2236 SDValue V0 = N->getOperand(Vec0Idx + 0);
2237 SDValue V1 = N->getOperand(Vec0Idx + 1);
2238 if (NumVecs == 2) {
2239 if (is64BitVector)
2240 SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2241 else
2242 SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0);
2243 } else {
2244 SDValue V2 = N->getOperand(Vec0Idx + 2);
2245 SDValue V3 = (NumVecs == 3)
2246 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2247 : N->getOperand(Vec0Idx + 3);
2248 if (is64BitVector)
2249 SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2250 else
2251 SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2253 Ops.push_back(SuperReg);
2254 Ops.push_back(getI32Imm(Lane, dl));
2255 Ops.push_back(Pred);
2256 Ops.push_back(Reg0);
2257 Ops.push_back(Chain);
2259 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2260 QOpcodes[OpcodeIndex]);
2261 SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2262 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdLn), {MemOp});
2263 if (!IsLoad) {
2264 ReplaceNode(N, VLdLn);
2265 return;
2268 // Extract the subregisters.
2269 SuperReg = SDValue(VLdLn, 0);
2270 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
2271 ARM::qsub_3 == ARM::qsub_0 + 3,
2272 "Unexpected subreg numbering");
2273 unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
2274 for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2275 ReplaceUses(SDValue(N, Vec),
2276 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
2277 ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1));
2278 if (isUpdating)
2279 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2));
2280 CurDAG->RemoveDeadNode(N);
2283 void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool IsIntrinsic,
2284 bool isUpdating, unsigned NumVecs,
2285 const uint16_t *DOpcodes,
2286 const uint16_t *QOpcodes0,
2287 const uint16_t *QOpcodes1) {
2288 assert(NumVecs >= 1 && NumVecs <= 4 && "VLDDup NumVecs out-of-range");
2289 SDLoc dl(N);
2291 SDValue MemAddr, Align;
2292 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2293 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2294 return;
2296 SDValue Chain = N->getOperand(0);
2297 EVT VT = N->getValueType(0);
2298 bool is64BitVector = VT.is64BitVector();
2300 unsigned Alignment = 0;
2301 if (NumVecs != 3) {
2302 Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
2303 unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2304 if (Alignment > NumBytes)
2305 Alignment = NumBytes;
2306 if (Alignment < 8 && Alignment < NumBytes)
2307 Alignment = 0;
2308 // Alignment must be a power of two; make sure of that.
2309 Alignment = (Alignment & -Alignment);
2310 if (Alignment == 1)
2311 Alignment = 0;
2313 Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2315 unsigned OpcodeIndex;
2316 switch (VT.getSimpleVT().SimpleTy) {
2317 default: llvm_unreachable("unhandled vld-dup type");
2318 case MVT::v8i8:
2319 case MVT::v16i8: OpcodeIndex = 0; break;
2320 case MVT::v4i16:
2321 case MVT::v8i16:
2322 case MVT::v4f16:
2323 case MVT::v8f16:
2324 OpcodeIndex = 1; break;
2325 case MVT::v2f32:
2326 case MVT::v2i32:
2327 case MVT::v4f32:
2328 case MVT::v4i32: OpcodeIndex = 2; break;
2329 case MVT::v1f64:
2330 case MVT::v1i64: OpcodeIndex = 3; break;
2333 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2334 if (!is64BitVector)
2335 ResTyElts *= 2;
2336 EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
2338 std::vector<EVT> ResTys;
2339 ResTys.push_back(ResTy);
2340 if (isUpdating)
2341 ResTys.push_back(MVT::i32);
2342 ResTys.push_back(MVT::Other);
2344 SDValue Pred = getAL(CurDAG, dl);
2345 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2347 SDNode *VLdDup;
2348 if (is64BitVector || NumVecs == 1) {
2349 SmallVector<SDValue, 6> Ops;
2350 Ops.push_back(MemAddr);
2351 Ops.push_back(Align);
2352 unsigned Opc = is64BitVector ? DOpcodes[OpcodeIndex] :
2353 QOpcodes0[OpcodeIndex];
2354 if (isUpdating) {
2355 // fixed-stride update instructions don't have an explicit writeback
2356 // operand. It's implicit in the opcode itself.
2357 SDValue Inc = N->getOperand(2);
2358 bool IsImmUpdate =
2359 isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
2360 if (NumVecs <= 2 && !IsImmUpdate)
2361 Opc = getVLDSTRegisterUpdateOpcode(Opc);
2362 if (!IsImmUpdate)
2363 Ops.push_back(Inc);
2364 // FIXME: VLD3 and VLD4 haven't been updated to that form yet.
2365 else if (NumVecs > 2)
2366 Ops.push_back(Reg0);
2368 Ops.push_back(Pred);
2369 Ops.push_back(Reg0);
2370 Ops.push_back(Chain);
2371 VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2372 } else if (NumVecs == 2) {
2373 const SDValue OpsA[] = { MemAddr, Align, Pred, Reg0, Chain };
2374 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex],
2375 dl, ResTys, OpsA);
2377 Chain = SDValue(VLdA, 1);
2378 const SDValue OpsB[] = { MemAddr, Align, Pred, Reg0, Chain };
2379 VLdDup = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, OpsB);
2380 } else {
2381 SDValue ImplDef =
2382 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
2383 const SDValue OpsA[] = { MemAddr, Align, ImplDef, Pred, Reg0, Chain };
2384 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex],
2385 dl, ResTys, OpsA);
2387 SDValue SuperReg = SDValue(VLdA, 0);
2388 Chain = SDValue(VLdA, 1);
2389 const SDValue OpsB[] = { MemAddr, Align, SuperReg, Pred, Reg0, Chain };
2390 VLdDup = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, OpsB);
2393 // Transfer memoperands.
2394 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2395 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdDup), {MemOp});
2397 // Extract the subregisters.
2398 if (NumVecs == 1) {
2399 ReplaceUses(SDValue(N, 0), SDValue(VLdDup, 0));
2400 } else {
2401 SDValue SuperReg = SDValue(VLdDup, 0);
2402 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7, "Unexpected subreg numbering");
2403 unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
2404 for (unsigned Vec = 0; Vec != NumVecs; ++Vec) {
2405 ReplaceUses(SDValue(N, Vec),
2406 CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg));
2409 ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1));
2410 if (isUpdating)
2411 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2));
2412 CurDAG->RemoveDeadNode(N);
2415 bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) {
2416 if (!Subtarget->hasV6T2Ops())
2417 return false;
2419 unsigned Opc = isSigned
2420 ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)
2421 : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX);
2422 SDLoc dl(N);
2424 // For unsigned extracts, check for a shift right and mask
2425 unsigned And_imm = 0;
2426 if (N->getOpcode() == ISD::AND) {
2427 if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) {
2429 // The immediate is a mask of the low bits iff imm & (imm+1) == 0
2430 if (And_imm & (And_imm + 1))
2431 return false;
2433 unsigned Srl_imm = 0;
2434 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL,
2435 Srl_imm)) {
2436 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2438 // Mask off the unnecessary bits of the AND immediate; normally
2439 // DAGCombine will do this, but that might not happen if
2440 // targetShrinkDemandedConstant chooses a different immediate.
2441 And_imm &= -1U >> Srl_imm;
2443 // Note: The width operand is encoded as width-1.
2444 unsigned Width = countTrailingOnes(And_imm) - 1;
2445 unsigned LSB = Srl_imm;
2447 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2449 if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) {
2450 // It's cheaper to use a right shift to extract the top bits.
2451 if (Subtarget->isThumb()) {
2452 Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri;
2453 SDValue Ops[] = { N->getOperand(0).getOperand(0),
2454 CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2455 getAL(CurDAG, dl), Reg0, Reg0 };
2456 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2457 return true;
2460 // ARM models shift instructions as MOVsi with shifter operand.
2461 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(ISD::SRL);
2462 SDValue ShOpc =
2463 CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB), dl,
2464 MVT::i32);
2465 SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc,
2466 getAL(CurDAG, dl), Reg0, Reg0 };
2467 CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops);
2468 return true;
2471 assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
2472 SDValue Ops[] = { N->getOperand(0).getOperand(0),
2473 CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2474 CurDAG->getTargetConstant(Width, dl, MVT::i32),
2475 getAL(CurDAG, dl), Reg0 };
2476 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2477 return true;
2480 return false;
2483 // Otherwise, we're looking for a shift of a shift
2484 unsigned Shl_imm = 0;
2485 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) {
2486 assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!");
2487 unsigned Srl_imm = 0;
2488 if (isInt32Immediate(N->getOperand(1), Srl_imm)) {
2489 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2490 // Note: The width operand is encoded as width-1.
2491 unsigned Width = 32 - Srl_imm - 1;
2492 int LSB = Srl_imm - Shl_imm;
2493 if (LSB < 0)
2494 return false;
2495 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2496 assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
2497 SDValue Ops[] = { N->getOperand(0).getOperand(0),
2498 CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2499 CurDAG->getTargetConstant(Width, dl, MVT::i32),
2500 getAL(CurDAG, dl), Reg0 };
2501 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2502 return true;
2506 // Or we are looking for a shift of an and, with a mask operand
2507 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_imm) &&
2508 isShiftedMask_32(And_imm)) {
2509 unsigned Srl_imm = 0;
2510 unsigned LSB = countTrailingZeros(And_imm);
2511 // Shift must be the same as the ands lsb
2512 if (isInt32Immediate(N->getOperand(1), Srl_imm) && Srl_imm == LSB) {
2513 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2514 unsigned MSB = 31 - countLeadingZeros(And_imm);
2515 // Note: The width operand is encoded as width-1.
2516 unsigned Width = MSB - LSB;
2517 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2518 assert(Srl_imm + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
2519 SDValue Ops[] = { N->getOperand(0).getOperand(0),
2520 CurDAG->getTargetConstant(Srl_imm, dl, MVT::i32),
2521 CurDAG->getTargetConstant(Width, dl, MVT::i32),
2522 getAL(CurDAG, dl), Reg0 };
2523 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2524 return true;
2528 if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) {
2529 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2530 unsigned LSB = 0;
2531 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, LSB) &&
2532 !isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRA, LSB))
2533 return false;
2535 if (LSB + Width > 32)
2536 return false;
2538 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2539 assert(LSB + Width <= 32 && "Shouldn't create an invalid ubfx");
2540 SDValue Ops[] = { N->getOperand(0).getOperand(0),
2541 CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2542 CurDAG->getTargetConstant(Width - 1, dl, MVT::i32),
2543 getAL(CurDAG, dl), Reg0 };
2544 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2545 return true;
2548 return false;
2551 /// Target-specific DAG combining for ISD::XOR.
2552 /// Target-independent combining lowers SELECT_CC nodes of the form
2553 /// select_cc setg[ge] X, 0, X, -X
2554 /// select_cc setgt X, -1, X, -X
2555 /// select_cc setl[te] X, 0, -X, X
2556 /// select_cc setlt X, 1, -X, X
2557 /// which represent Integer ABS into:
2558 /// Y = sra (X, size(X)-1); xor (add (X, Y), Y)
2559 /// ARM instruction selection detects the latter and matches it to
2560 /// ARM::ABS or ARM::t2ABS machine node.
2561 bool ARMDAGToDAGISel::tryABSOp(SDNode *N){
2562 SDValue XORSrc0 = N->getOperand(0);
2563 SDValue XORSrc1 = N->getOperand(1);
2564 EVT VT = N->getValueType(0);
2566 if (Subtarget->isThumb1Only())
2567 return false;
2569 if (XORSrc0.getOpcode() != ISD::ADD || XORSrc1.getOpcode() != ISD::SRA)
2570 return false;
2572 SDValue ADDSrc0 = XORSrc0.getOperand(0);
2573 SDValue ADDSrc1 = XORSrc0.getOperand(1);
2574 SDValue SRASrc0 = XORSrc1.getOperand(0);
2575 SDValue SRASrc1 = XORSrc1.getOperand(1);
2576 ConstantSDNode *SRAConstant = dyn_cast<ConstantSDNode>(SRASrc1);
2577 EVT XType = SRASrc0.getValueType();
2578 unsigned Size = XType.getSizeInBits() - 1;
2580 if (ADDSrc1 == XORSrc1 && ADDSrc0 == SRASrc0 &&
2581 XType.isInteger() && SRAConstant != nullptr &&
2582 Size == SRAConstant->getZExtValue()) {
2583 unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS;
2584 CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0);
2585 return true;
2588 return false;
2591 /// We've got special pseudo-instructions for these
2592 void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
2593 unsigned Opcode;
2594 EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
2595 if (MemTy == MVT::i8)
2596 Opcode = ARM::CMP_SWAP_8;
2597 else if (MemTy == MVT::i16)
2598 Opcode = ARM::CMP_SWAP_16;
2599 else if (MemTy == MVT::i32)
2600 Opcode = ARM::CMP_SWAP_32;
2601 else
2602 llvm_unreachable("Unknown AtomicCmpSwap type");
2604 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
2605 N->getOperand(0)};
2606 SDNode *CmpSwap = CurDAG->getMachineNode(
2607 Opcode, SDLoc(N),
2608 CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other), Ops);
2610 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
2611 CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
2613 ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
2614 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
2615 CurDAG->RemoveDeadNode(N);
2618 static Optional<std::pair<unsigned, unsigned>>
2619 getContiguousRangeOfSetBits(const APInt &A) {
2620 unsigned FirstOne = A.getBitWidth() - A.countLeadingZeros() - 1;
2621 unsigned LastOne = A.countTrailingZeros();
2622 if (A.countPopulation() != (FirstOne - LastOne + 1))
2623 return Optional<std::pair<unsigned,unsigned>>();
2624 return std::make_pair(FirstOne, LastOne);
2627 void ARMDAGToDAGISel::SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI) {
2628 assert(N->getOpcode() == ARMISD::CMPZ);
2629 SwitchEQNEToPLMI = false;
2631 if (!Subtarget->isThumb())
2632 // FIXME: Work out whether it is profitable to do this in A32 mode - LSL and
2633 // LSR don't exist as standalone instructions - they need the barrel shifter.
2634 return;
2636 // select (cmpz (and X, C), #0) -> (LSLS X) or (LSRS X) or (LSRS (LSLS X))
2637 SDValue And = N->getOperand(0);
2638 if (!And->hasOneUse())
2639 return;
2641 SDValue Zero = N->getOperand(1);
2642 if (!isa<ConstantSDNode>(Zero) || !cast<ConstantSDNode>(Zero)->isNullValue() ||
2643 And->getOpcode() != ISD::AND)
2644 return;
2645 SDValue X = And.getOperand(0);
2646 auto C = dyn_cast<ConstantSDNode>(And.getOperand(1));
2648 if (!C)
2649 return;
2650 auto Range = getContiguousRangeOfSetBits(C->getAPIntValue());
2651 if (!Range)
2652 return;
2654 // There are several ways to lower this:
2655 SDNode *NewN;
2656 SDLoc dl(N);
2658 auto EmitShift = [&](unsigned Opc, SDValue Src, unsigned Imm) -> SDNode* {
2659 if (Subtarget->isThumb2()) {
2660 Opc = (Opc == ARM::tLSLri) ? ARM::t2LSLri : ARM::t2LSRri;
2661 SDValue Ops[] = { Src, CurDAG->getTargetConstant(Imm, dl, MVT::i32),
2662 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
2663 CurDAG->getRegister(0, MVT::i32) };
2664 return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
2665 } else {
2666 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), Src,
2667 CurDAG->getTargetConstant(Imm, dl, MVT::i32),
2668 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
2669 return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
2673 if (Range->second == 0) {
2674 // 1. Mask includes the LSB -> Simply shift the top N bits off
2675 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
2676 ReplaceNode(And.getNode(), NewN);
2677 } else if (Range->first == 31) {
2678 // 2. Mask includes the MSB -> Simply shift the bottom N bits off
2679 NewN = EmitShift(ARM::tLSRri, X, Range->second);
2680 ReplaceNode(And.getNode(), NewN);
2681 } else if (Range->first == Range->second) {
2682 // 3. Only one bit is set. We can shift this into the sign bit and use a
2683 // PL/MI comparison.
2684 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
2685 ReplaceNode(And.getNode(), NewN);
2687 SwitchEQNEToPLMI = true;
2688 } else if (!Subtarget->hasV6T2Ops()) {
2689 // 4. Do a double shift to clear bottom and top bits, but only in
2690 // thumb-1 mode as in thumb-2 we can use UBFX.
2691 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
2692 NewN = EmitShift(ARM::tLSRri, SDValue(NewN, 0),
2693 Range->second + (31 - Range->first));
2694 ReplaceNode(And.getNode(), NewN);
2699 void ARMDAGToDAGISel::Select(SDNode *N) {
2700 SDLoc dl(N);
2702 if (N->isMachineOpcode()) {
2703 N->setNodeId(-1);
2704 return; // Already selected.
2707 switch (N->getOpcode()) {
2708 default: break;
2709 case ISD::STORE: {
2710 // For Thumb1, match an sp-relative store in C++. This is a little
2711 // unfortunate, but I don't think I can make the chain check work
2712 // otherwise. (The chain of the store has to be the same as the chain
2713 // of the CopyFromReg, or else we can't replace the CopyFromReg with
2714 // a direct reference to "SP".)
2716 // This is only necessary on Thumb1 because Thumb1 sp-relative stores use
2717 // a different addressing mode from other four-byte stores.
2719 // This pattern usually comes up with call arguments.
2720 StoreSDNode *ST = cast<StoreSDNode>(N);
2721 SDValue Ptr = ST->getBasePtr();
2722 if (Subtarget->isThumb1Only() && ST->isUnindexed()) {
2723 int RHSC = 0;
2724 if (Ptr.getOpcode() == ISD::ADD &&
2725 isScaledConstantInRange(Ptr.getOperand(1), /*Scale=*/4, 0, 256, RHSC))
2726 Ptr = Ptr.getOperand(0);
2728 if (Ptr.getOpcode() == ISD::CopyFromReg &&
2729 cast<RegisterSDNode>(Ptr.getOperand(1))->getReg() == ARM::SP &&
2730 Ptr.getOperand(0) == ST->getChain()) {
2731 SDValue Ops[] = {ST->getValue(),
2732 CurDAG->getRegister(ARM::SP, MVT::i32),
2733 CurDAG->getTargetConstant(RHSC, dl, MVT::i32),
2734 getAL(CurDAG, dl),
2735 CurDAG->getRegister(0, MVT::i32),
2736 ST->getChain()};
2737 MachineSDNode *ResNode =
2738 CurDAG->getMachineNode(ARM::tSTRspi, dl, MVT::Other, Ops);
2739 MachineMemOperand *MemOp = ST->getMemOperand();
2740 CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp});
2741 ReplaceNode(N, ResNode);
2742 return;
2745 break;
2747 case ISD::WRITE_REGISTER:
2748 if (tryWriteRegister(N))
2749 return;
2750 break;
2751 case ISD::READ_REGISTER:
2752 if (tryReadRegister(N))
2753 return;
2754 break;
2755 case ISD::INLINEASM:
2756 case ISD::INLINEASM_BR:
2757 if (tryInlineAsm(N))
2758 return;
2759 break;
2760 case ISD::XOR:
2761 // Select special operations if XOR node forms integer ABS pattern
2762 if (tryABSOp(N))
2763 return;
2764 // Other cases are autogenerated.
2765 break;
2766 case ISD::Constant: {
2767 unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
2768 // If we can't materialize the constant we need to use a literal pool
2769 if (ConstantMaterializationCost(Val, Subtarget) > 2) {
2770 SDValue CPIdx = CurDAG->getTargetConstantPool(
2771 ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val),
2772 TLI->getPointerTy(CurDAG->getDataLayout()));
2774 SDNode *ResNode;
2775 if (Subtarget->isThumb()) {
2776 SDValue Ops[] = {
2777 CPIdx,
2778 getAL(CurDAG, dl),
2779 CurDAG->getRegister(0, MVT::i32),
2780 CurDAG->getEntryNode()
2782 ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other,
2783 Ops);
2784 } else {
2785 SDValue Ops[] = {
2786 CPIdx,
2787 CurDAG->getTargetConstant(0, dl, MVT::i32),
2788 getAL(CurDAG, dl),
2789 CurDAG->getRegister(0, MVT::i32),
2790 CurDAG->getEntryNode()
2792 ResNode = CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other,
2793 Ops);
2795 // Annotate the Node with memory operand information so that MachineInstr
2796 // queries work properly. This e.g. gives the register allocation the
2797 // required information for rematerialization.
2798 MachineFunction& MF = CurDAG->getMachineFunction();
2799 MachineMemOperand *MemOp =
2800 MF.getMachineMemOperand(MachinePointerInfo::getConstantPool(MF),
2801 MachineMemOperand::MOLoad, 4, 4);
2803 CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp});
2805 ReplaceNode(N, ResNode);
2806 return;
2809 // Other cases are autogenerated.
2810 break;
2812 case ISD::FrameIndex: {
2813 // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
2814 int FI = cast<FrameIndexSDNode>(N)->getIndex();
2815 SDValue TFI = CurDAG->getTargetFrameIndex(
2816 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
2817 if (Subtarget->isThumb1Only()) {
2818 // Set the alignment of the frame object to 4, to avoid having to generate
2819 // more than one ADD
2820 MachineFrameInfo &MFI = MF->getFrameInfo();
2821 if (MFI.getObjectAlignment(FI) < 4)
2822 MFI.setObjectAlignment(FI, 4);
2823 CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI,
2824 CurDAG->getTargetConstant(0, dl, MVT::i32));
2825 return;
2826 } else {
2827 unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ?
2828 ARM::t2ADDri : ARM::ADDri);
2829 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, dl, MVT::i32),
2830 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
2831 CurDAG->getRegister(0, MVT::i32) };
2832 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2833 return;
2836 case ISD::SRL:
2837 if (tryV6T2BitfieldExtractOp(N, false))
2838 return;
2839 break;
2840 case ISD::SIGN_EXTEND_INREG:
2841 case ISD::SRA:
2842 if (tryV6T2BitfieldExtractOp(N, true))
2843 return;
2844 break;
2845 case ISD::MUL:
2846 if (Subtarget->isThumb1Only())
2847 break;
2848 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
2849 unsigned RHSV = C->getZExtValue();
2850 if (!RHSV) break;
2851 if (isPowerOf2_32(RHSV-1)) { // 2^n+1?
2852 unsigned ShImm = Log2_32(RHSV-1);
2853 if (ShImm >= 32)
2854 break;
2855 SDValue V = N->getOperand(0);
2856 ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
2857 SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
2858 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2859 if (Subtarget->isThumb()) {
2860 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
2861 CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops);
2862 return;
2863 } else {
2864 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
2865 Reg0 };
2866 CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops);
2867 return;
2870 if (isPowerOf2_32(RHSV+1)) { // 2^n-1?
2871 unsigned ShImm = Log2_32(RHSV+1);
2872 if (ShImm >= 32)
2873 break;
2874 SDValue V = N->getOperand(0);
2875 ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
2876 SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
2877 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2878 if (Subtarget->isThumb()) {
2879 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
2880 CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops);
2881 return;
2882 } else {
2883 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
2884 Reg0 };
2885 CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops);
2886 return;
2890 break;
2891 case ISD::AND: {
2892 // Check for unsigned bitfield extract
2893 if (tryV6T2BitfieldExtractOp(N, false))
2894 return;
2896 // If an immediate is used in an AND node, it is possible that the immediate
2897 // can be more optimally materialized when negated. If this is the case we
2898 // can negate the immediate and use a BIC instead.
2899 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
2900 if (N1C && N1C->hasOneUse() && Subtarget->isThumb()) {
2901 uint32_t Imm = (uint32_t) N1C->getZExtValue();
2903 // In Thumb2 mode, an AND can take a 12-bit immediate. If this
2904 // immediate can be negated and fit in the immediate operand of
2905 // a t2BIC, don't do any manual transform here as this can be
2906 // handled by the generic ISel machinery.
2907 bool PreferImmediateEncoding =
2908 Subtarget->hasThumb2() && (is_t2_so_imm(Imm) || is_t2_so_imm_not(Imm));
2909 if (!PreferImmediateEncoding &&
2910 ConstantMaterializationCost(Imm, Subtarget) >
2911 ConstantMaterializationCost(~Imm, Subtarget)) {
2912 // The current immediate costs more to materialize than a negated
2913 // immediate, so negate the immediate and use a BIC.
2914 SDValue NewImm =
2915 CurDAG->getConstant(~N1C->getZExtValue(), dl, MVT::i32);
2916 // If the new constant didn't exist before, reposition it in the topological
2917 // ordering so it is just before N. Otherwise, don't touch its location.
2918 if (NewImm->getNodeId() == -1)
2919 CurDAG->RepositionNode(N->getIterator(), NewImm.getNode());
2921 if (!Subtarget->hasThumb2()) {
2922 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32),
2923 N->getOperand(0), NewImm, getAL(CurDAG, dl),
2924 CurDAG->getRegister(0, MVT::i32)};
2925 ReplaceNode(N, CurDAG->getMachineNode(ARM::tBIC, dl, MVT::i32, Ops));
2926 return;
2927 } else {
2928 SDValue Ops[] = {N->getOperand(0), NewImm, getAL(CurDAG, dl),
2929 CurDAG->getRegister(0, MVT::i32),
2930 CurDAG->getRegister(0, MVT::i32)};
2931 ReplaceNode(N,
2932 CurDAG->getMachineNode(ARM::t2BICrr, dl, MVT::i32, Ops));
2933 return;
2938 // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits
2939 // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits
2940 // are entirely contributed by c2 and lower 16-bits are entirely contributed
2941 // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)).
2942 // Select it to: "movt x, ((c1 & 0xffff) >> 16)
2943 EVT VT = N->getValueType(0);
2944 if (VT != MVT::i32)
2945 break;
2946 unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2())
2947 ? ARM::t2MOVTi16
2948 : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0);
2949 if (!Opc)
2950 break;
2951 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
2952 N1C = dyn_cast<ConstantSDNode>(N1);
2953 if (!N1C)
2954 break;
2955 if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) {
2956 SDValue N2 = N0.getOperand(1);
2957 ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
2958 if (!N2C)
2959 break;
2960 unsigned N1CVal = N1C->getZExtValue();
2961 unsigned N2CVal = N2C->getZExtValue();
2962 if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) &&
2963 (N1CVal & 0xffffU) == 0xffffU &&
2964 (N2CVal & 0xffffU) == 0x0U) {
2965 SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16,
2966 dl, MVT::i32);
2967 SDValue Ops[] = { N0.getOperand(0), Imm16,
2968 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
2969 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
2970 return;
2974 break;
2976 case ARMISD::UMAAL: {
2977 unsigned Opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL;
2978 SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
2979 N->getOperand(2), N->getOperand(3),
2980 getAL(CurDAG, dl),
2981 CurDAG->getRegister(0, MVT::i32) };
2982 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::i32, Ops));
2983 return;
2985 case ARMISD::UMLAL:{
2986 if (Subtarget->isThumb()) {
2987 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2988 N->getOperand(3), getAL(CurDAG, dl),
2989 CurDAG->getRegister(0, MVT::i32)};
2990 ReplaceNode(
2991 N, CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops));
2992 return;
2993 }else{
2994 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2995 N->getOperand(3), getAL(CurDAG, dl),
2996 CurDAG->getRegister(0, MVT::i32),
2997 CurDAG->getRegister(0, MVT::i32) };
2998 ReplaceNode(N, CurDAG->getMachineNode(
2999 Subtarget->hasV6Ops() ? ARM::UMLAL : ARM::UMLALv5, dl,
3000 MVT::i32, MVT::i32, Ops));
3001 return;
3004 case ARMISD::SMLAL:{
3005 if (Subtarget->isThumb()) {
3006 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3007 N->getOperand(3), getAL(CurDAG, dl),
3008 CurDAG->getRegister(0, MVT::i32)};
3009 ReplaceNode(
3010 N, CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops));
3011 return;
3012 }else{
3013 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3014 N->getOperand(3), getAL(CurDAG, dl),
3015 CurDAG->getRegister(0, MVT::i32),
3016 CurDAG->getRegister(0, MVT::i32) };
3017 ReplaceNode(N, CurDAG->getMachineNode(
3018 Subtarget->hasV6Ops() ? ARM::SMLAL : ARM::SMLALv5, dl,
3019 MVT::i32, MVT::i32, Ops));
3020 return;
3023 case ARMISD::SUBE: {
3024 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
3025 break;
3026 // Look for a pattern to match SMMLS
3027 // (sube a, (smul_loHi a, b), (subc 0, (smul_LOhi(a, b))))
3028 if (N->getOperand(1).getOpcode() != ISD::SMUL_LOHI ||
3029 N->getOperand(2).getOpcode() != ARMISD::SUBC ||
3030 !SDValue(N, 1).use_empty())
3031 break;
3033 if (Subtarget->isThumb())
3034 assert(Subtarget->hasThumb2() &&
3035 "This pattern should not be generated for Thumb");
3037 SDValue SmulLoHi = N->getOperand(1);
3038 SDValue Subc = N->getOperand(2);
3039 auto *Zero = dyn_cast<ConstantSDNode>(Subc.getOperand(0));
3041 if (!Zero || Zero->getZExtValue() != 0 ||
3042 Subc.getOperand(1) != SmulLoHi.getValue(0) ||
3043 N->getOperand(1) != SmulLoHi.getValue(1) ||
3044 N->getOperand(2) != Subc.getValue(1))
3045 break;
3047 unsigned Opc = Subtarget->isThumb2() ? ARM::t2SMMLS : ARM::SMMLS;
3048 SDValue Ops[] = { SmulLoHi.getOperand(0), SmulLoHi.getOperand(1),
3049 N->getOperand(0), getAL(CurDAG, dl),
3050 CurDAG->getRegister(0, MVT::i32) };
3051 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops));
3052 return;
3054 case ISD::LOAD: {
3055 if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N))
3056 return;
3057 if (Subtarget->isThumb() && Subtarget->hasThumb2()) {
3058 if (tryT2IndexedLoad(N))
3059 return;
3060 } else if (Subtarget->isThumb()) {
3061 if (tryT1IndexedLoad(N))
3062 return;
3063 } else if (tryARMIndexedLoad(N))
3064 return;
3065 // Other cases are autogenerated.
3066 break;
3068 case ARMISD::WLS:
3069 case ARMISD::LE: {
3070 SDValue Ops[] = { N->getOperand(1),
3071 N->getOperand(2),
3072 N->getOperand(0) };
3073 unsigned Opc = N->getOpcode() == ARMISD::WLS ?
3074 ARM::t2WhileLoopStart : ARM::t2LoopEnd;
3075 SDNode *New = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
3076 ReplaceUses(N, New);
3077 CurDAG->RemoveDeadNode(N);
3078 return;
3080 case ARMISD::LOOP_DEC: {
3081 SDValue Ops[] = { N->getOperand(1),
3082 N->getOperand(2),
3083 N->getOperand(0) };
3084 SDNode *Dec =
3085 CurDAG->getMachineNode(ARM::t2LoopDec, dl,
3086 CurDAG->getVTList(MVT::i32, MVT::Other), Ops);
3087 ReplaceUses(N, Dec);
3088 CurDAG->RemoveDeadNode(N);
3089 return;
3091 case ARMISD::BRCOND: {
3092 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
3093 // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)
3094 // Pattern complexity = 6 cost = 1 size = 0
3096 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
3097 // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc)
3098 // Pattern complexity = 6 cost = 1 size = 0
3100 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
3101 // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc)
3102 // Pattern complexity = 6 cost = 1 size = 0
3104 unsigned Opc = Subtarget->isThumb() ?
3105 ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc;
3106 SDValue Chain = N->getOperand(0);
3107 SDValue N1 = N->getOperand(1);
3108 SDValue N2 = N->getOperand(2);
3109 SDValue N3 = N->getOperand(3);
3110 SDValue InFlag = N->getOperand(4);
3111 assert(N1.getOpcode() == ISD::BasicBlock);
3112 assert(N2.getOpcode() == ISD::Constant);
3113 assert(N3.getOpcode() == ISD::Register);
3115 unsigned CC = (unsigned) cast<ConstantSDNode>(N2)->getZExtValue();
3117 if (InFlag.getOpcode() == ARMISD::CMPZ) {
3118 if (InFlag.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN) {
3119 SDValue Int = InFlag.getOperand(0);
3120 uint64_t ID = cast<ConstantSDNode>(Int->getOperand(1))->getZExtValue();
3122 // Handle low-overhead loops.
3123 if (ID == Intrinsic::loop_decrement_reg) {
3124 SDValue Elements = Int.getOperand(2);
3125 SDValue Size = CurDAG->getTargetConstant(
3126 cast<ConstantSDNode>(Int.getOperand(3))->getZExtValue(), dl,
3127 MVT::i32);
3129 SDValue Args[] = { Elements, Size, Int.getOperand(0) };
3130 SDNode *LoopDec =
3131 CurDAG->getMachineNode(ARM::t2LoopDec, dl,
3132 CurDAG->getVTList(MVT::i32, MVT::Other),
3133 Args);
3134 ReplaceUses(Int.getNode(), LoopDec);
3136 SDValue EndArgs[] = { SDValue(LoopDec, 0), N1, Chain };
3137 SDNode *LoopEnd =
3138 CurDAG->getMachineNode(ARM::t2LoopEnd, dl, MVT::Other, EndArgs);
3140 ReplaceUses(N, LoopEnd);
3141 CurDAG->RemoveDeadNode(N);
3142 CurDAG->RemoveDeadNode(InFlag.getNode());
3143 CurDAG->RemoveDeadNode(Int.getNode());
3144 return;
3148 bool SwitchEQNEToPLMI;
3149 SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI);
3150 InFlag = N->getOperand(4);
3152 if (SwitchEQNEToPLMI) {
3153 switch ((ARMCC::CondCodes)CC) {
3154 default: llvm_unreachable("CMPZ must be either NE or EQ!");
3155 case ARMCC::NE:
3156 CC = (unsigned)ARMCC::MI;
3157 break;
3158 case ARMCC::EQ:
3159 CC = (unsigned)ARMCC::PL;
3160 break;
3165 SDValue Tmp2 = CurDAG->getTargetConstant(CC, dl, MVT::i32);
3166 SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag };
3167 SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
3168 MVT::Glue, Ops);
3169 Chain = SDValue(ResNode, 0);
3170 if (N->getNumValues() == 2) {
3171 InFlag = SDValue(ResNode, 1);
3172 ReplaceUses(SDValue(N, 1), InFlag);
3174 ReplaceUses(SDValue(N, 0),
3175 SDValue(Chain.getNode(), Chain.getResNo()));
3176 CurDAG->RemoveDeadNode(N);
3177 return;
3180 case ARMISD::CMPZ: {
3181 // select (CMPZ X, #-C) -> (CMPZ (ADDS X, #C), #0)
3182 // This allows us to avoid materializing the expensive negative constant.
3183 // The CMPZ #0 is useless and will be peepholed away but we need to keep it
3184 // for its glue output.
3185 SDValue X = N->getOperand(0);
3186 auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1).getNode());
3187 if (C && C->getSExtValue() < 0 && Subtarget->isThumb()) {
3188 int64_t Addend = -C->getSExtValue();
3190 SDNode *Add = nullptr;
3191 // ADDS can be better than CMN if the immediate fits in a
3192 // 16-bit ADDS, which means either [0,256) for tADDi8 or [0,8) for tADDi3.
3193 // Outside that range we can just use a CMN which is 32-bit but has a
3194 // 12-bit immediate range.
3195 if (Addend < 1<<8) {
3196 if (Subtarget->isThumb2()) {
3197 SDValue Ops[] = { X, CurDAG->getTargetConstant(Addend, dl, MVT::i32),
3198 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
3199 CurDAG->getRegister(0, MVT::i32) };
3200 Add = CurDAG->getMachineNode(ARM::t2ADDri, dl, MVT::i32, Ops);
3201 } else {
3202 unsigned Opc = (Addend < 1<<3) ? ARM::tADDi3 : ARM::tADDi8;
3203 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X,
3204 CurDAG->getTargetConstant(Addend, dl, MVT::i32),
3205 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
3206 Add = CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
3209 if (Add) {
3210 SDValue Ops2[] = {SDValue(Add, 0), CurDAG->getConstant(0, dl, MVT::i32)};
3211 CurDAG->MorphNodeTo(N, ARMISD::CMPZ, CurDAG->getVTList(MVT::Glue), Ops2);
3214 // Other cases are autogenerated.
3215 break;
3218 case ARMISD::CMOV: {
3219 SDValue InFlag = N->getOperand(4);
3221 if (InFlag.getOpcode() == ARMISD::CMPZ) {
3222 bool SwitchEQNEToPLMI;
3223 SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI);
3225 if (SwitchEQNEToPLMI) {
3226 SDValue ARMcc = N->getOperand(2);
3227 ARMCC::CondCodes CC =
3228 (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue();
3230 switch (CC) {
3231 default: llvm_unreachable("CMPZ must be either NE or EQ!");
3232 case ARMCC::NE:
3233 CC = ARMCC::MI;
3234 break;
3235 case ARMCC::EQ:
3236 CC = ARMCC::PL;
3237 break;
3239 SDValue NewARMcc = CurDAG->getConstant((unsigned)CC, dl, MVT::i32);
3240 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), NewARMcc,
3241 N->getOperand(3), N->getOperand(4)};
3242 CurDAG->MorphNodeTo(N, ARMISD::CMOV, N->getVTList(), Ops);
3246 // Other cases are autogenerated.
3247 break;
3250 case ARMISD::VZIP: {
3251 unsigned Opc = 0;
3252 EVT VT = N->getValueType(0);
3253 switch (VT.getSimpleVT().SimpleTy) {
3254 default: return;
3255 case MVT::v8i8: Opc = ARM::VZIPd8; break;
3256 case MVT::v4f16:
3257 case MVT::v4i16: Opc = ARM::VZIPd16; break;
3258 case MVT::v2f32:
3259 // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
3260 case MVT::v2i32: Opc = ARM::VTRNd32; break;
3261 case MVT::v16i8: Opc = ARM::VZIPq8; break;
3262 case MVT::v8f16:
3263 case MVT::v8i16: Opc = ARM::VZIPq16; break;
3264 case MVT::v4f32:
3265 case MVT::v4i32: Opc = ARM::VZIPq32; break;
3267 SDValue Pred = getAL(CurDAG, dl);
3268 SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3269 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3270 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3271 return;
3273 case ARMISD::VUZP: {
3274 unsigned Opc = 0;
3275 EVT VT = N->getValueType(0);
3276 switch (VT.getSimpleVT().SimpleTy) {
3277 default: return;
3278 case MVT::v8i8: Opc = ARM::VUZPd8; break;
3279 case MVT::v4f16:
3280 case MVT::v4i16: Opc = ARM::VUZPd16; break;
3281 case MVT::v2f32:
3282 // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
3283 case MVT::v2i32: Opc = ARM::VTRNd32; break;
3284 case MVT::v16i8: Opc = ARM::VUZPq8; break;
3285 case MVT::v8f16:
3286 case MVT::v8i16: Opc = ARM::VUZPq16; break;
3287 case MVT::v4f32:
3288 case MVT::v4i32: Opc = ARM::VUZPq32; break;
3290 SDValue Pred = getAL(CurDAG, dl);
3291 SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3292 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3293 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3294 return;
3296 case ARMISD::VTRN: {
3297 unsigned Opc = 0;
3298 EVT VT = N->getValueType(0);
3299 switch (VT.getSimpleVT().SimpleTy) {
3300 default: return;
3301 case MVT::v8i8: Opc = ARM::VTRNd8; break;
3302 case MVT::v4f16:
3303 case MVT::v4i16: Opc = ARM::VTRNd16; break;
3304 case MVT::v2f32:
3305 case MVT::v2i32: Opc = ARM::VTRNd32; break;
3306 case MVT::v16i8: Opc = ARM::VTRNq8; break;
3307 case MVT::v8f16:
3308 case MVT::v8i16: Opc = ARM::VTRNq16; break;
3309 case MVT::v4f32:
3310 case MVT::v4i32: Opc = ARM::VTRNq32; break;
3312 SDValue Pred = getAL(CurDAG, dl);
3313 SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3314 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3315 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3316 return;
3318 case ARMISD::BUILD_VECTOR: {
3319 EVT VecVT = N->getValueType(0);
3320 EVT EltVT = VecVT.getVectorElementType();
3321 unsigned NumElts = VecVT.getVectorNumElements();
3322 if (EltVT == MVT::f64) {
3323 assert(NumElts == 2 && "unexpected type for BUILD_VECTOR");
3324 ReplaceNode(
3325 N, createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
3326 return;
3328 assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR");
3329 if (NumElts == 2) {
3330 ReplaceNode(
3331 N, createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
3332 return;
3334 assert(NumElts == 4 && "unexpected type for BUILD_VECTOR");
3335 ReplaceNode(N,
3336 createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1),
3337 N->getOperand(2), N->getOperand(3)));
3338 return;
3341 case ARMISD::VLD1DUP: {
3342 static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8, ARM::VLD1DUPd16,
3343 ARM::VLD1DUPd32 };
3344 static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8, ARM::VLD1DUPq16,
3345 ARM::VLD1DUPq32 };
3346 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 1, DOpcodes, QOpcodes);
3347 return;
3350 case ARMISD::VLD2DUP: {
3351 static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
3352 ARM::VLD2DUPd32 };
3353 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 2, Opcodes);
3354 return;
3357 case ARMISD::VLD3DUP: {
3358 static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo,
3359 ARM::VLD3DUPd16Pseudo,
3360 ARM::VLD3DUPd32Pseudo };
3361 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 3, Opcodes);
3362 return;
3365 case ARMISD::VLD4DUP: {
3366 static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo,
3367 ARM::VLD4DUPd16Pseudo,
3368 ARM::VLD4DUPd32Pseudo };
3369 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 4, Opcodes);
3370 return;
3373 case ARMISD::VLD1DUP_UPD: {
3374 static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8wb_fixed,
3375 ARM::VLD1DUPd16wb_fixed,
3376 ARM::VLD1DUPd32wb_fixed };
3377 static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8wb_fixed,
3378 ARM::VLD1DUPq16wb_fixed,
3379 ARM::VLD1DUPq32wb_fixed };
3380 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 1, DOpcodes, QOpcodes);
3381 return;
3384 case ARMISD::VLD2DUP_UPD: {
3385 static const uint16_t Opcodes[] = { ARM::VLD2DUPd8wb_fixed,
3386 ARM::VLD2DUPd16wb_fixed,
3387 ARM::VLD2DUPd32wb_fixed };
3388 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 2, Opcodes);
3389 return;
3392 case ARMISD::VLD3DUP_UPD: {
3393 static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo_UPD,
3394 ARM::VLD3DUPd16Pseudo_UPD,
3395 ARM::VLD3DUPd32Pseudo_UPD };
3396 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 3, Opcodes);
3397 return;
3400 case ARMISD::VLD4DUP_UPD: {
3401 static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo_UPD,
3402 ARM::VLD4DUPd16Pseudo_UPD,
3403 ARM::VLD4DUPd32Pseudo_UPD };
3404 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 4, Opcodes);
3405 return;
3408 case ARMISD::VLD1_UPD: {
3409 static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed,
3410 ARM::VLD1d16wb_fixed,
3411 ARM::VLD1d32wb_fixed,
3412 ARM::VLD1d64wb_fixed };
3413 static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed,
3414 ARM::VLD1q16wb_fixed,
3415 ARM::VLD1q32wb_fixed,
3416 ARM::VLD1q64wb_fixed };
3417 SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr);
3418 return;
3421 case ARMISD::VLD2_UPD: {
3422 static const uint16_t DOpcodes[] = { ARM::VLD2d8wb_fixed,
3423 ARM::VLD2d16wb_fixed,
3424 ARM::VLD2d32wb_fixed,
3425 ARM::VLD1q64wb_fixed};
3426 static const uint16_t QOpcodes[] = { ARM::VLD2q8PseudoWB_fixed,
3427 ARM::VLD2q16PseudoWB_fixed,
3428 ARM::VLD2q32PseudoWB_fixed };
3429 SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr);
3430 return;
3433 case ARMISD::VLD3_UPD: {
3434 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD,
3435 ARM::VLD3d16Pseudo_UPD,
3436 ARM::VLD3d32Pseudo_UPD,
3437 ARM::VLD1d64TPseudoWB_fixed};
3438 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
3439 ARM::VLD3q16Pseudo_UPD,
3440 ARM::VLD3q32Pseudo_UPD };
3441 static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
3442 ARM::VLD3q16oddPseudo_UPD,
3443 ARM::VLD3q32oddPseudo_UPD };
3444 SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
3445 return;
3448 case ARMISD::VLD4_UPD: {
3449 static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo_UPD,
3450 ARM::VLD4d16Pseudo_UPD,
3451 ARM::VLD4d32Pseudo_UPD,
3452 ARM::VLD1d64QPseudoWB_fixed};
3453 static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
3454 ARM::VLD4q16Pseudo_UPD,
3455 ARM::VLD4q32Pseudo_UPD };
3456 static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo_UPD,
3457 ARM::VLD4q16oddPseudo_UPD,
3458 ARM::VLD4q32oddPseudo_UPD };
3459 SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
3460 return;
3463 case ARMISD::VLD2LN_UPD: {
3464 static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD,
3465 ARM::VLD2LNd16Pseudo_UPD,
3466 ARM::VLD2LNd32Pseudo_UPD };
3467 static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD,
3468 ARM::VLD2LNq32Pseudo_UPD };
3469 SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes);
3470 return;
3473 case ARMISD::VLD3LN_UPD: {
3474 static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD,
3475 ARM::VLD3LNd16Pseudo_UPD,
3476 ARM::VLD3LNd32Pseudo_UPD };
3477 static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD,
3478 ARM::VLD3LNq32Pseudo_UPD };
3479 SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes);
3480 return;
3483 case ARMISD::VLD4LN_UPD: {
3484 static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD,
3485 ARM::VLD4LNd16Pseudo_UPD,
3486 ARM::VLD4LNd32Pseudo_UPD };
3487 static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD,
3488 ARM::VLD4LNq32Pseudo_UPD };
3489 SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes);
3490 return;
3493 case ARMISD::VST1_UPD: {
3494 static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed,
3495 ARM::VST1d16wb_fixed,
3496 ARM::VST1d32wb_fixed,
3497 ARM::VST1d64wb_fixed };
3498 static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed,
3499 ARM::VST1q16wb_fixed,
3500 ARM::VST1q32wb_fixed,
3501 ARM::VST1q64wb_fixed };
3502 SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr);
3503 return;
3506 case ARMISD::VST2_UPD: {
3507 static const uint16_t DOpcodes[] = { ARM::VST2d8wb_fixed,
3508 ARM::VST2d16wb_fixed,
3509 ARM::VST2d32wb_fixed,
3510 ARM::VST1q64wb_fixed};
3511 static const uint16_t QOpcodes[] = { ARM::VST2q8PseudoWB_fixed,
3512 ARM::VST2q16PseudoWB_fixed,
3513 ARM::VST2q32PseudoWB_fixed };
3514 SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr);
3515 return;
3518 case ARMISD::VST3_UPD: {
3519 static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD,
3520 ARM::VST3d16Pseudo_UPD,
3521 ARM::VST3d32Pseudo_UPD,
3522 ARM::VST1d64TPseudoWB_fixed};
3523 static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
3524 ARM::VST3q16Pseudo_UPD,
3525 ARM::VST3q32Pseudo_UPD };
3526 static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
3527 ARM::VST3q16oddPseudo_UPD,
3528 ARM::VST3q32oddPseudo_UPD };
3529 SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
3530 return;
3533 case ARMISD::VST4_UPD: {
3534 static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo_UPD,
3535 ARM::VST4d16Pseudo_UPD,
3536 ARM::VST4d32Pseudo_UPD,
3537 ARM::VST1d64QPseudoWB_fixed};
3538 static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
3539 ARM::VST4q16Pseudo_UPD,
3540 ARM::VST4q32Pseudo_UPD };
3541 static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD,
3542 ARM::VST4q16oddPseudo_UPD,
3543 ARM::VST4q32oddPseudo_UPD };
3544 SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
3545 return;
3548 case ARMISD::VST2LN_UPD: {
3549 static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD,
3550 ARM::VST2LNd16Pseudo_UPD,
3551 ARM::VST2LNd32Pseudo_UPD };
3552 static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD,
3553 ARM::VST2LNq32Pseudo_UPD };
3554 SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes);
3555 return;
3558 case ARMISD::VST3LN_UPD: {
3559 static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD,
3560 ARM::VST3LNd16Pseudo_UPD,
3561 ARM::VST3LNd32Pseudo_UPD };
3562 static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD,
3563 ARM::VST3LNq32Pseudo_UPD };
3564 SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes);
3565 return;
3568 case ARMISD::VST4LN_UPD: {
3569 static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD,
3570 ARM::VST4LNd16Pseudo_UPD,
3571 ARM::VST4LNd32Pseudo_UPD };
3572 static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD,
3573 ARM::VST4LNq32Pseudo_UPD };
3574 SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes);
3575 return;
3578 case ISD::INTRINSIC_VOID:
3579 case ISD::INTRINSIC_W_CHAIN: {
3580 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
3581 switch (IntNo) {
3582 default:
3583 break;
3585 case Intrinsic::arm_mrrc:
3586 case Intrinsic::arm_mrrc2: {
3587 SDLoc dl(N);
3588 SDValue Chain = N->getOperand(0);
3589 unsigned Opc;
3591 if (Subtarget->isThumb())
3592 Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::t2MRRC : ARM::t2MRRC2);
3593 else
3594 Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::MRRC : ARM::MRRC2);
3596 SmallVector<SDValue, 5> Ops;
3597 Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(), dl)); /* coproc */
3598 Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(), dl)); /* opc */
3599 Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(4))->getZExtValue(), dl)); /* CRm */
3601 // The mrrc2 instruction in ARM doesn't allow predicates, the top 4 bits of the encoded
3602 // instruction will always be '1111' but it is possible in assembly language to specify
3603 // AL as a predicate to mrrc2 but it doesn't make any difference to the encoded instruction.
3604 if (Opc != ARM::MRRC2) {
3605 Ops.push_back(getAL(CurDAG, dl));
3606 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3609 Ops.push_back(Chain);
3611 // Writes to two registers.
3612 const EVT RetType[] = {MVT::i32, MVT::i32, MVT::Other};
3614 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, RetType, Ops));
3615 return;
3617 case Intrinsic::arm_ldaexd:
3618 case Intrinsic::arm_ldrexd: {
3619 SDLoc dl(N);
3620 SDValue Chain = N->getOperand(0);
3621 SDValue MemAddr = N->getOperand(2);
3622 bool isThumb = Subtarget->isThumb() && Subtarget->hasV8MBaselineOps();
3624 bool IsAcquire = IntNo == Intrinsic::arm_ldaexd;
3625 unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD)
3626 : (IsAcquire ? ARM::LDAEXD : ARM::LDREXD);
3628 // arm_ldrexd returns a i64 value in {i32, i32}
3629 std::vector<EVT> ResTys;
3630 if (isThumb) {
3631 ResTys.push_back(MVT::i32);
3632 ResTys.push_back(MVT::i32);
3633 } else
3634 ResTys.push_back(MVT::Untyped);
3635 ResTys.push_back(MVT::Other);
3637 // Place arguments in the right order.
3638 SDValue Ops[] = {MemAddr, getAL(CurDAG, dl),
3639 CurDAG->getRegister(0, MVT::i32), Chain};
3640 SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
3641 // Transfer memoperands.
3642 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3643 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
3645 // Remap uses.
3646 SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1);
3647 if (!SDValue(N, 0).use_empty()) {
3648 SDValue Result;
3649 if (isThumb)
3650 Result = SDValue(Ld, 0);
3651 else {
3652 SDValue SubRegIdx =
3653 CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
3654 SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
3655 dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
3656 Result = SDValue(ResNode,0);
3658 ReplaceUses(SDValue(N, 0), Result);
3660 if (!SDValue(N, 1).use_empty()) {
3661 SDValue Result;
3662 if (isThumb)
3663 Result = SDValue(Ld, 1);
3664 else {
3665 SDValue SubRegIdx =
3666 CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
3667 SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
3668 dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
3669 Result = SDValue(ResNode,0);
3671 ReplaceUses(SDValue(N, 1), Result);
3673 ReplaceUses(SDValue(N, 2), OutChain);
3674 CurDAG->RemoveDeadNode(N);
3675 return;
3677 case Intrinsic::arm_stlexd:
3678 case Intrinsic::arm_strexd: {
3679 SDLoc dl(N);
3680 SDValue Chain = N->getOperand(0);
3681 SDValue Val0 = N->getOperand(2);
3682 SDValue Val1 = N->getOperand(3);
3683 SDValue MemAddr = N->getOperand(4);
3685 // Store exclusive double return a i32 value which is the return status
3686 // of the issued store.
3687 const EVT ResTys[] = {MVT::i32, MVT::Other};
3689 bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
3690 // Place arguments in the right order.
3691 SmallVector<SDValue, 7> Ops;
3692 if (isThumb) {
3693 Ops.push_back(Val0);
3694 Ops.push_back(Val1);
3695 } else
3696 // arm_strexd uses GPRPair.
3697 Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0));
3698 Ops.push_back(MemAddr);
3699 Ops.push_back(getAL(CurDAG, dl));
3700 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3701 Ops.push_back(Chain);
3703 bool IsRelease = IntNo == Intrinsic::arm_stlexd;
3704 unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD)
3705 : (IsRelease ? ARM::STLEXD : ARM::STREXD);
3707 SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
3708 // Transfer memoperands.
3709 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3710 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
3712 ReplaceNode(N, St);
3713 return;
3716 case Intrinsic::arm_neon_vld1: {
3717 static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
3718 ARM::VLD1d32, ARM::VLD1d64 };
3719 static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
3720 ARM::VLD1q32, ARM::VLD1q64};
3721 SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr);
3722 return;
3725 case Intrinsic::arm_neon_vld1x2: {
3726 static const uint16_t DOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
3727 ARM::VLD1q32, ARM::VLD1q64 };
3728 static const uint16_t QOpcodes[] = { ARM::VLD1d8QPseudo,
3729 ARM::VLD1d16QPseudo,
3730 ARM::VLD1d32QPseudo,
3731 ARM::VLD1d64QPseudo };
3732 SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
3733 return;
3736 case Intrinsic::arm_neon_vld1x3: {
3737 static const uint16_t DOpcodes[] = { ARM::VLD1d8TPseudo,
3738 ARM::VLD1d16TPseudo,
3739 ARM::VLD1d32TPseudo,
3740 ARM::VLD1d64TPseudo };
3741 static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowTPseudo_UPD,
3742 ARM::VLD1q16LowTPseudo_UPD,
3743 ARM::VLD1q32LowTPseudo_UPD,
3744 ARM::VLD1q64LowTPseudo_UPD };
3745 static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighTPseudo,
3746 ARM::VLD1q16HighTPseudo,
3747 ARM::VLD1q32HighTPseudo,
3748 ARM::VLD1q64HighTPseudo };
3749 SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3750 return;
3753 case Intrinsic::arm_neon_vld1x4: {
3754 static const uint16_t DOpcodes[] = { ARM::VLD1d8QPseudo,
3755 ARM::VLD1d16QPseudo,
3756 ARM::VLD1d32QPseudo,
3757 ARM::VLD1d64QPseudo };
3758 static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowQPseudo_UPD,
3759 ARM::VLD1q16LowQPseudo_UPD,
3760 ARM::VLD1q32LowQPseudo_UPD,
3761 ARM::VLD1q64LowQPseudo_UPD };
3762 static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighQPseudo,
3763 ARM::VLD1q16HighQPseudo,
3764 ARM::VLD1q32HighQPseudo,
3765 ARM::VLD1q64HighQPseudo };
3766 SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3767 return;
3770 case Intrinsic::arm_neon_vld2: {
3771 static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
3772 ARM::VLD2d32, ARM::VLD1q64 };
3773 static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
3774 ARM::VLD2q32Pseudo };
3775 SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
3776 return;
3779 case Intrinsic::arm_neon_vld3: {
3780 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo,
3781 ARM::VLD3d16Pseudo,
3782 ARM::VLD3d32Pseudo,
3783 ARM::VLD1d64TPseudo };
3784 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
3785 ARM::VLD3q16Pseudo_UPD,
3786 ARM::VLD3q32Pseudo_UPD };
3787 static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo,
3788 ARM::VLD3q16oddPseudo,
3789 ARM::VLD3q32oddPseudo };
3790 SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3791 return;
3794 case Intrinsic::arm_neon_vld4: {
3795 static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo,
3796 ARM::VLD4d16Pseudo,
3797 ARM::VLD4d32Pseudo,
3798 ARM::VLD1d64QPseudo };
3799 static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
3800 ARM::VLD4q16Pseudo_UPD,
3801 ARM::VLD4q32Pseudo_UPD };
3802 static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo,
3803 ARM::VLD4q16oddPseudo,
3804 ARM::VLD4q32oddPseudo };
3805 SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3806 return;
3809 case Intrinsic::arm_neon_vld2dup: {
3810 static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
3811 ARM::VLD2DUPd32, ARM::VLD1q64 };
3812 static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo,
3813 ARM::VLD2DUPq16EvenPseudo,
3814 ARM::VLD2DUPq32EvenPseudo };
3815 static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudo,
3816 ARM::VLD2DUPq16OddPseudo,
3817 ARM::VLD2DUPq32OddPseudo };
3818 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 2,
3819 DOpcodes, QOpcodes0, QOpcodes1);
3820 return;
3823 case Intrinsic::arm_neon_vld3dup: {
3824 static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo,
3825 ARM::VLD3DUPd16Pseudo,
3826 ARM::VLD3DUPd32Pseudo,
3827 ARM::VLD1d64TPseudo };
3828 static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo,
3829 ARM::VLD3DUPq16EvenPseudo,
3830 ARM::VLD3DUPq32EvenPseudo };
3831 static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo,
3832 ARM::VLD3DUPq16OddPseudo,
3833 ARM::VLD3DUPq32OddPseudo };
3834 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 3,
3835 DOpcodes, QOpcodes0, QOpcodes1);
3836 return;
3839 case Intrinsic::arm_neon_vld4dup: {
3840 static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo,
3841 ARM::VLD4DUPd16Pseudo,
3842 ARM::VLD4DUPd32Pseudo,
3843 ARM::VLD1d64QPseudo };
3844 static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo,
3845 ARM::VLD4DUPq16EvenPseudo,
3846 ARM::VLD4DUPq32EvenPseudo };
3847 static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo,
3848 ARM::VLD4DUPq16OddPseudo,
3849 ARM::VLD4DUPq32OddPseudo };
3850 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 4,
3851 DOpcodes, QOpcodes0, QOpcodes1);
3852 return;
3855 case Intrinsic::arm_neon_vld2lane: {
3856 static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo,
3857 ARM::VLD2LNd16Pseudo,
3858 ARM::VLD2LNd32Pseudo };
3859 static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo,
3860 ARM::VLD2LNq32Pseudo };
3861 SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes);
3862 return;
3865 case Intrinsic::arm_neon_vld3lane: {
3866 static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo,
3867 ARM::VLD3LNd16Pseudo,
3868 ARM::VLD3LNd32Pseudo };
3869 static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo,
3870 ARM::VLD3LNq32Pseudo };
3871 SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes);
3872 return;
3875 case Intrinsic::arm_neon_vld4lane: {
3876 static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo,
3877 ARM::VLD4LNd16Pseudo,
3878 ARM::VLD4LNd32Pseudo };
3879 static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo,
3880 ARM::VLD4LNq32Pseudo };
3881 SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes);
3882 return;
3885 case Intrinsic::arm_neon_vst1: {
3886 static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
3887 ARM::VST1d32, ARM::VST1d64 };
3888 static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
3889 ARM::VST1q32, ARM::VST1q64 };
3890 SelectVST(N, false, 1, DOpcodes, QOpcodes, nullptr);
3891 return;
3894 case Intrinsic::arm_neon_vst1x2: {
3895 static const uint16_t DOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
3896 ARM::VST1q32, ARM::VST1q64 };
3897 static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudo,
3898 ARM::VST1d16QPseudo,
3899 ARM::VST1d32QPseudo,
3900 ARM::VST1d64QPseudo };
3901 SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
3902 return;
3905 case Intrinsic::arm_neon_vst1x3: {
3906 static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudo,
3907 ARM::VST1d16TPseudo,
3908 ARM::VST1d32TPseudo,
3909 ARM::VST1d64TPseudo };
3910 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD,
3911 ARM::VST1q16LowTPseudo_UPD,
3912 ARM::VST1q32LowTPseudo_UPD,
3913 ARM::VST1q64LowTPseudo_UPD };
3914 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo,
3915 ARM::VST1q16HighTPseudo,
3916 ARM::VST1q32HighTPseudo,
3917 ARM::VST1q64HighTPseudo };
3918 SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3919 return;
3922 case Intrinsic::arm_neon_vst1x4: {
3923 static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudo,
3924 ARM::VST1d16QPseudo,
3925 ARM::VST1d32QPseudo,
3926 ARM::VST1d64QPseudo };
3927 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD,
3928 ARM::VST1q16LowQPseudo_UPD,
3929 ARM::VST1q32LowQPseudo_UPD,
3930 ARM::VST1q64LowQPseudo_UPD };
3931 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo,
3932 ARM::VST1q16HighQPseudo,
3933 ARM::VST1q32HighQPseudo,
3934 ARM::VST1q64HighQPseudo };
3935 SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3936 return;
3939 case Intrinsic::arm_neon_vst2: {
3940 static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
3941 ARM::VST2d32, ARM::VST1q64 };
3942 static const uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
3943 ARM::VST2q32Pseudo };
3944 SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
3945 return;
3948 case Intrinsic::arm_neon_vst3: {
3949 static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo,
3950 ARM::VST3d16Pseudo,
3951 ARM::VST3d32Pseudo,
3952 ARM::VST1d64TPseudo };
3953 static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
3954 ARM::VST3q16Pseudo_UPD,
3955 ARM::VST3q32Pseudo_UPD };
3956 static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo,
3957 ARM::VST3q16oddPseudo,
3958 ARM::VST3q32oddPseudo };
3959 SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3960 return;
3963 case Intrinsic::arm_neon_vst4: {
3964 static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo,
3965 ARM::VST4d16Pseudo,
3966 ARM::VST4d32Pseudo,
3967 ARM::VST1d64QPseudo };
3968 static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
3969 ARM::VST4q16Pseudo_UPD,
3970 ARM::VST4q32Pseudo_UPD };
3971 static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo,
3972 ARM::VST4q16oddPseudo,
3973 ARM::VST4q32oddPseudo };
3974 SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3975 return;
3978 case Intrinsic::arm_neon_vst2lane: {
3979 static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo,
3980 ARM::VST2LNd16Pseudo,
3981 ARM::VST2LNd32Pseudo };
3982 static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo,
3983 ARM::VST2LNq32Pseudo };
3984 SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes);
3985 return;
3988 case Intrinsic::arm_neon_vst3lane: {
3989 static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo,
3990 ARM::VST3LNd16Pseudo,
3991 ARM::VST3LNd32Pseudo };
3992 static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo,
3993 ARM::VST3LNq32Pseudo };
3994 SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes);
3995 return;
3998 case Intrinsic::arm_neon_vst4lane: {
3999 static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo,
4000 ARM::VST4LNd16Pseudo,
4001 ARM::VST4LNd32Pseudo };
4002 static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo,
4003 ARM::VST4LNq32Pseudo };
4004 SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes);
4005 return;
4008 break;
4011 case ISD::ATOMIC_CMP_SWAP:
4012 SelectCMP_SWAP(N);
4013 return;
4016 SelectCode(N);
4019 // Inspect a register string of the form
4020 // cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or
4021 // cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string
4022 // and obtain the integer operands from them, adding these operands to the
4023 // provided vector.
4024 static void getIntOperandsFromRegisterString(StringRef RegString,
4025 SelectionDAG *CurDAG,
4026 const SDLoc &DL,
4027 std::vector<SDValue> &Ops) {
4028 SmallVector<StringRef, 5> Fields;
4029 RegString.split(Fields, ':');
4031 if (Fields.size() > 1) {
4032 bool AllIntFields = true;
4034 for (StringRef Field : Fields) {
4035 // Need to trim out leading 'cp' characters and get the integer field.
4036 unsigned IntField;
4037 AllIntFields &= !Field.trim("CPcp").getAsInteger(10, IntField);
4038 Ops.push_back(CurDAG->getTargetConstant(IntField, DL, MVT::i32));
4041 assert(AllIntFields &&
4042 "Unexpected non-integer value in special register string.");
4046 // Maps a Banked Register string to its mask value. The mask value returned is
4047 // for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register
4048 // mask operand, which expresses which register is to be used, e.g. r8, and in
4049 // which mode it is to be used, e.g. usr. Returns -1 to signify that the string
4050 // was invalid.
4051 static inline int getBankedRegisterMask(StringRef RegString) {
4052 auto TheReg = ARMBankedReg::lookupBankedRegByName(RegString.lower());
4053 if (!TheReg)
4054 return -1;
4055 return TheReg->Encoding;
4058 // The flags here are common to those allowed for apsr in the A class cores and
4059 // those allowed for the special registers in the M class cores. Returns a
4060 // value representing which flags were present, -1 if invalid.
4061 static inline int getMClassFlagsMask(StringRef Flags) {
4062 return StringSwitch<int>(Flags)
4063 .Case("", 0x2) // no flags means nzcvq for psr registers, and 0x2 is
4064 // correct when flags are not permitted
4065 .Case("g", 0x1)
4066 .Case("nzcvq", 0x2)
4067 .Case("nzcvqg", 0x3)
4068 .Default(-1);
4071 // Maps MClass special registers string to its value for use in the
4072 // t2MRS_M/t2MSR_M instruction nodes as the SYSm value operand.
4073 // Returns -1 to signify that the string was invalid.
4074 static int getMClassRegisterMask(StringRef Reg, const ARMSubtarget *Subtarget) {
4075 auto TheReg = ARMSysReg::lookupMClassSysRegByName(Reg);
4076 const FeatureBitset &FeatureBits = Subtarget->getFeatureBits();
4077 if (!TheReg || !TheReg->hasRequiredFeatures(FeatureBits))
4078 return -1;
4079 return (int)(TheReg->Encoding & 0xFFF); // SYSm value
4082 static int getARClassRegisterMask(StringRef Reg, StringRef Flags) {
4083 // The mask operand contains the special register (R Bit) in bit 4, whether
4084 // the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and
4085 // bits 3-0 contains the fields to be accessed in the special register, set by
4086 // the flags provided with the register.
4087 int Mask = 0;
4088 if (Reg == "apsr") {
4089 // The flags permitted for apsr are the same flags that are allowed in
4090 // M class registers. We get the flag value and then shift the flags into
4091 // the correct place to combine with the mask.
4092 Mask = getMClassFlagsMask(Flags);
4093 if (Mask == -1)
4094 return -1;
4095 return Mask << 2;
4098 if (Reg != "cpsr" && Reg != "spsr") {
4099 return -1;
4102 // This is the same as if the flags were "fc"
4103 if (Flags.empty() || Flags == "all")
4104 return Mask | 0x9;
4106 // Inspect the supplied flags string and set the bits in the mask for
4107 // the relevant and valid flags allowed for cpsr and spsr.
4108 for (char Flag : Flags) {
4109 int FlagVal;
4110 switch (Flag) {
4111 case 'c':
4112 FlagVal = 0x1;
4113 break;
4114 case 'x':
4115 FlagVal = 0x2;
4116 break;
4117 case 's':
4118 FlagVal = 0x4;
4119 break;
4120 case 'f':
4121 FlagVal = 0x8;
4122 break;
4123 default:
4124 FlagVal = 0;
4127 // This avoids allowing strings where the same flag bit appears twice.
4128 if (!FlagVal || (Mask & FlagVal))
4129 return -1;
4130 Mask |= FlagVal;
4133 // If the register is spsr then we need to set the R bit.
4134 if (Reg == "spsr")
4135 Mask |= 0x10;
4137 return Mask;
4140 // Lower the read_register intrinsic to ARM specific DAG nodes
4141 // using the supplied metadata string to select the instruction node to use
4142 // and the registers/masks to construct as operands for the node.
4143 bool ARMDAGToDAGISel::tryReadRegister(SDNode *N){
4144 const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
4145 const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
4146 bool IsThumb2 = Subtarget->isThumb2();
4147 SDLoc DL(N);
4149 std::vector<SDValue> Ops;
4150 getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
4152 if (!Ops.empty()) {
4153 // If the special register string was constructed of fields (as defined
4154 // in the ACLE) then need to lower to MRC node (32 bit) or
4155 // MRRC node(64 bit), we can make the distinction based on the number of
4156 // operands we have.
4157 unsigned Opcode;
4158 SmallVector<EVT, 3> ResTypes;
4159 if (Ops.size() == 5){
4160 Opcode = IsThumb2 ? ARM::t2MRC : ARM::MRC;
4161 ResTypes.append({ MVT::i32, MVT::Other });
4162 } else {
4163 assert(Ops.size() == 3 &&
4164 "Invalid number of fields in special register string.");
4165 Opcode = IsThumb2 ? ARM::t2MRRC : ARM::MRRC;
4166 ResTypes.append({ MVT::i32, MVT::i32, MVT::Other });
4169 Ops.push_back(getAL(CurDAG, DL));
4170 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
4171 Ops.push_back(N->getOperand(0));
4172 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, ResTypes, Ops));
4173 return true;
4176 std::string SpecialReg = RegString->getString().lower();
4178 int BankedReg = getBankedRegisterMask(SpecialReg);
4179 if (BankedReg != -1) {
4180 Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32),
4181 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4182 N->getOperand(0) };
4183 ReplaceNode(
4184 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked,
4185 DL, MVT::i32, MVT::Other, Ops));
4186 return true;
4189 // The VFP registers are read by creating SelectionDAG nodes with opcodes
4190 // corresponding to the register that is being read from. So we switch on the
4191 // string to find which opcode we need to use.
4192 unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
4193 .Case("fpscr", ARM::VMRS)
4194 .Case("fpexc", ARM::VMRS_FPEXC)
4195 .Case("fpsid", ARM::VMRS_FPSID)
4196 .Case("mvfr0", ARM::VMRS_MVFR0)
4197 .Case("mvfr1", ARM::VMRS_MVFR1)
4198 .Case("mvfr2", ARM::VMRS_MVFR2)
4199 .Case("fpinst", ARM::VMRS_FPINST)
4200 .Case("fpinst2", ARM::VMRS_FPINST2)
4201 .Default(0);
4203 // If an opcode was found then we can lower the read to a VFP instruction.
4204 if (Opcode) {
4205 if (!Subtarget->hasVFP2Base())
4206 return false;
4207 if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8Base())
4208 return false;
4210 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4211 N->getOperand(0) };
4212 ReplaceNode(N,
4213 CurDAG->getMachineNode(Opcode, DL, MVT::i32, MVT::Other, Ops));
4214 return true;
4217 // If the target is M Class then need to validate that the register string
4218 // is an acceptable value, so check that a mask can be constructed from the
4219 // string.
4220 if (Subtarget->isMClass()) {
4221 int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget);
4222 if (SYSmValue == -1)
4223 return false;
4225 SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
4226 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4227 N->getOperand(0) };
4228 ReplaceNode(
4229 N, CurDAG->getMachineNode(ARM::t2MRS_M, DL, MVT::i32, MVT::Other, Ops));
4230 return true;
4233 // Here we know the target is not M Class so we need to check if it is one
4234 // of the remaining possible values which are apsr, cpsr or spsr.
4235 if (SpecialReg == "apsr" || SpecialReg == "cpsr") {
4236 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4237 N->getOperand(0) };
4238 ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRS_AR : ARM::MRS,
4239 DL, MVT::i32, MVT::Other, Ops));
4240 return true;
4243 if (SpecialReg == "spsr") {
4244 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4245 N->getOperand(0) };
4246 ReplaceNode(
4247 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys, DL,
4248 MVT::i32, MVT::Other, Ops));
4249 return true;
4252 return false;
4255 // Lower the write_register intrinsic to ARM specific DAG nodes
4256 // using the supplied metadata string to select the instruction node to use
4257 // and the registers/masks to use in the nodes
4258 bool ARMDAGToDAGISel::tryWriteRegister(SDNode *N){
4259 const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
4260 const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
4261 bool IsThumb2 = Subtarget->isThumb2();
4262 SDLoc DL(N);
4264 std::vector<SDValue> Ops;
4265 getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
4267 if (!Ops.empty()) {
4268 // If the special register string was constructed of fields (as defined
4269 // in the ACLE) then need to lower to MCR node (32 bit) or
4270 // MCRR node(64 bit), we can make the distinction based on the number of
4271 // operands we have.
4272 unsigned Opcode;
4273 if (Ops.size() == 5) {
4274 Opcode = IsThumb2 ? ARM::t2MCR : ARM::MCR;
4275 Ops.insert(Ops.begin()+2, N->getOperand(2));
4276 } else {
4277 assert(Ops.size() == 3 &&
4278 "Invalid number of fields in special register string.");
4279 Opcode = IsThumb2 ? ARM::t2MCRR : ARM::MCRR;
4280 SDValue WriteValue[] = { N->getOperand(2), N->getOperand(3) };
4281 Ops.insert(Ops.begin()+2, WriteValue, WriteValue+2);
4284 Ops.push_back(getAL(CurDAG, DL));
4285 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
4286 Ops.push_back(N->getOperand(0));
4288 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
4289 return true;
4292 std::string SpecialReg = RegString->getString().lower();
4293 int BankedReg = getBankedRegisterMask(SpecialReg);
4294 if (BankedReg != -1) {
4295 Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), N->getOperand(2),
4296 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4297 N->getOperand(0) };
4298 ReplaceNode(
4299 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked,
4300 DL, MVT::Other, Ops));
4301 return true;
4304 // The VFP registers are written to by creating SelectionDAG nodes with
4305 // opcodes corresponding to the register that is being written. So we switch
4306 // on the string to find which opcode we need to use.
4307 unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
4308 .Case("fpscr", ARM::VMSR)
4309 .Case("fpexc", ARM::VMSR_FPEXC)
4310 .Case("fpsid", ARM::VMSR_FPSID)
4311 .Case("fpinst", ARM::VMSR_FPINST)
4312 .Case("fpinst2", ARM::VMSR_FPINST2)
4313 .Default(0);
4315 if (Opcode) {
4316 if (!Subtarget->hasVFP2Base())
4317 return false;
4318 Ops = { N->getOperand(2), getAL(CurDAG, DL),
4319 CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
4320 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
4321 return true;
4324 std::pair<StringRef, StringRef> Fields;
4325 Fields = StringRef(SpecialReg).rsplit('_');
4326 std::string Reg = Fields.first.str();
4327 StringRef Flags = Fields.second;
4329 // If the target was M Class then need to validate the special register value
4330 // and retrieve the mask for use in the instruction node.
4331 if (Subtarget->isMClass()) {
4332 int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget);
4333 if (SYSmValue == -1)
4334 return false;
4336 SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
4337 N->getOperand(2), getAL(CurDAG, DL),
4338 CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
4339 ReplaceNode(N, CurDAG->getMachineNode(ARM::t2MSR_M, DL, MVT::Other, Ops));
4340 return true;
4343 // We then check to see if a valid mask can be constructed for one of the
4344 // register string values permitted for the A and R class cores. These values
4345 // are apsr, spsr and cpsr; these are also valid on older cores.
4346 int Mask = getARClassRegisterMask(Reg, Flags);
4347 if (Mask != -1) {
4348 Ops = { CurDAG->getTargetConstant(Mask, DL, MVT::i32), N->getOperand(2),
4349 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4350 N->getOperand(0) };
4351 ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSR_AR : ARM::MSR,
4352 DL, MVT::Other, Ops));
4353 return true;
4356 return false;
4359 bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){
4360 std::vector<SDValue> AsmNodeOperands;
4361 unsigned Flag, Kind;
4362 bool Changed = false;
4363 unsigned NumOps = N->getNumOperands();
4365 // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint.
4366 // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require
4367 // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs
4368 // respectively. Since there is no constraint to explicitly specify a
4369 // reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb,
4370 // the 64-bit data may be referred by H, Q, R modifiers, so we still pack
4371 // them into a GPRPair.
4373 SDLoc dl(N);
4374 SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps-1)
4375 : SDValue(nullptr,0);
4377 SmallVector<bool, 8> OpChanged;
4378 // Glue node will be appended late.
4379 for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) {
4380 SDValue op = N->getOperand(i);
4381 AsmNodeOperands.push_back(op);
4383 if (i < InlineAsm::Op_FirstOperand)
4384 continue;
4386 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(i))) {
4387 Flag = C->getZExtValue();
4388 Kind = InlineAsm::getKind(Flag);
4390 else
4391 continue;
4393 // Immediate operands to inline asm in the SelectionDAG are modeled with
4394 // two operands. The first is a constant of value InlineAsm::Kind_Imm, and
4395 // the second is a constant with the value of the immediate. If we get here
4396 // and we have a Kind_Imm, skip the next operand, and continue.
4397 if (Kind == InlineAsm::Kind_Imm) {
4398 SDValue op = N->getOperand(++i);
4399 AsmNodeOperands.push_back(op);
4400 continue;
4403 unsigned NumRegs = InlineAsm::getNumOperandRegisters(Flag);
4404 if (NumRegs)
4405 OpChanged.push_back(false);
4407 unsigned DefIdx = 0;
4408 bool IsTiedToChangedOp = false;
4409 // If it's a use that is tied with a previous def, it has no
4410 // reg class constraint.
4411 if (Changed && InlineAsm::isUseOperandTiedToDef(Flag, DefIdx))
4412 IsTiedToChangedOp = OpChanged[DefIdx];
4414 // Memory operands to inline asm in the SelectionDAG are modeled with two
4415 // operands: a constant of value InlineAsm::Kind_Mem followed by the input
4416 // operand. If we get here and we have a Kind_Mem, skip the next operand (so
4417 // it doesn't get misinterpreted), and continue. We do this here because
4418 // it's important to update the OpChanged array correctly before moving on.
4419 if (Kind == InlineAsm::Kind_Mem) {
4420 SDValue op = N->getOperand(++i);
4421 AsmNodeOperands.push_back(op);
4422 continue;
4425 if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef
4426 && Kind != InlineAsm::Kind_RegDefEarlyClobber)
4427 continue;
4429 unsigned RC;
4430 bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC);
4431 if ((!IsTiedToChangedOp && (!HasRC || RC != ARM::GPRRegClassID))
4432 || NumRegs != 2)
4433 continue;
4435 assert((i+2 < NumOps) && "Invalid number of operands in inline asm");
4436 SDValue V0 = N->getOperand(i+1);
4437 SDValue V1 = N->getOperand(i+2);
4438 unsigned Reg0 = cast<RegisterSDNode>(V0)->getReg();
4439 unsigned Reg1 = cast<RegisterSDNode>(V1)->getReg();
4440 SDValue PairedReg;
4441 MachineRegisterInfo &MRI = MF->getRegInfo();
4443 if (Kind == InlineAsm::Kind_RegDef ||
4444 Kind == InlineAsm::Kind_RegDefEarlyClobber) {
4445 // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to
4446 // the original GPRs.
4448 Register GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
4449 PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
4450 SDValue Chain = SDValue(N,0);
4452 SDNode *GU = N->getGluedUser();
4453 SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped,
4454 Chain.getValue(1));
4456 // Extract values from a GPRPair reg and copy to the original GPR reg.
4457 SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32,
4458 RegCopy);
4459 SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,
4460 RegCopy);
4461 SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0,
4462 RegCopy.getValue(1));
4463 SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1));
4465 // Update the original glue user.
4466 std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1);
4467 Ops.push_back(T1.getValue(1));
4468 CurDAG->UpdateNodeOperands(GU, Ops);
4470 else {
4471 // For Kind == InlineAsm::Kind_RegUse, we first copy two GPRs into a
4472 // GPRPair and then pass the GPRPair to the inline asm.
4473 SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain];
4475 // As REG_SEQ doesn't take RegisterSDNode, we copy them first.
4476 SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32,
4477 Chain.getValue(1));
4478 SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32,
4479 T0.getValue(1));
4480 SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), 0);
4482 // Copy REG_SEQ into a GPRPair-typed VR and replace the original two
4483 // i32 VRs of inline asm with it.
4484 Register GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
4485 PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
4486 Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1));
4488 AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
4489 Glue = Chain.getValue(1);
4492 Changed = true;
4494 if(PairedReg.getNode()) {
4495 OpChanged[OpChanged.size() -1 ] = true;
4496 Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/);
4497 if (IsTiedToChangedOp)
4498 Flag = InlineAsm::getFlagWordForMatchingOp(Flag, DefIdx);
4499 else
4500 Flag = InlineAsm::getFlagWordForRegClass(Flag, ARM::GPRPairRegClassID);
4501 // Replace the current flag.
4502 AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant(
4503 Flag, dl, MVT::i32);
4504 // Add the new register node and skip the original two GPRs.
4505 AsmNodeOperands.push_back(PairedReg);
4506 // Skip the next two GPRs.
4507 i += 2;
4511 if (Glue.getNode())
4512 AsmNodeOperands.push_back(Glue);
4513 if (!Changed)
4514 return false;
4516 SDValue New = CurDAG->getNode(N->getOpcode(), SDLoc(N),
4517 CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands);
4518 New->setNodeId(-1);
4519 ReplaceNode(N, New.getNode());
4520 return true;
4524 bool ARMDAGToDAGISel::
4525 SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
4526 std::vector<SDValue> &OutOps) {
4527 switch(ConstraintID) {
4528 default:
4529 llvm_unreachable("Unexpected asm memory constraint");
4530 case InlineAsm::Constraint_i:
4531 // FIXME: It seems strange that 'i' is needed here since it's supposed to
4532 // be an immediate and not a memory constraint.
4533 LLVM_FALLTHROUGH;
4534 case InlineAsm::Constraint_m:
4535 case InlineAsm::Constraint_o:
4536 case InlineAsm::Constraint_Q:
4537 case InlineAsm::Constraint_Um:
4538 case InlineAsm::Constraint_Un:
4539 case InlineAsm::Constraint_Uq:
4540 case InlineAsm::Constraint_Us:
4541 case InlineAsm::Constraint_Ut:
4542 case InlineAsm::Constraint_Uv:
4543 case InlineAsm::Constraint_Uy:
4544 // Require the address to be in a register. That is safe for all ARM
4545 // variants and it is hard to do anything much smarter without knowing
4546 // how the operand is used.
4547 OutOps.push_back(Op);
4548 return false;
4550 return true;
4553 /// createARMISelDag - This pass converts a legalized DAG into a
4554 /// ARM-specific DAG, ready for instruction scheduling.
4556 FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM,
4557 CodeGenOpt::Level OptLevel) {
4558 return new ARMDAGToDAGISel(TM, OptLevel);