[ARM] MVE compare vector splat combine
[llvm-complete.git] / lib / Target / ARM / ARMISelDAGToDAG.cpp
blobd0cd56fd5a309c6af90525515d0d93641f6a2fae
1 //===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines an instruction selector for the ARM target.
11 //===----------------------------------------------------------------------===//
13 #include "ARM.h"
14 #include "ARMBaseInstrInfo.h"
15 #include "ARMTargetMachine.h"
16 #include "MCTargetDesc/ARMAddressingModes.h"
17 #include "Utils/ARMBaseInfo.h"
18 #include "llvm/ADT/StringSwitch.h"
19 #include "llvm/CodeGen/MachineFrameInfo.h"
20 #include "llvm/CodeGen/MachineFunction.h"
21 #include "llvm/CodeGen/MachineInstrBuilder.h"
22 #include "llvm/CodeGen/MachineRegisterInfo.h"
23 #include "llvm/CodeGen/SelectionDAG.h"
24 #include "llvm/CodeGen/SelectionDAGISel.h"
25 #include "llvm/CodeGen/TargetLowering.h"
26 #include "llvm/IR/CallingConv.h"
27 #include "llvm/IR/Constants.h"
28 #include "llvm/IR/DerivedTypes.h"
29 #include "llvm/IR/Function.h"
30 #include "llvm/IR/Intrinsics.h"
31 #include "llvm/IR/LLVMContext.h"
32 #include "llvm/Support/CommandLine.h"
33 #include "llvm/Support/Debug.h"
34 #include "llvm/Support/ErrorHandling.h"
35 #include "llvm/Target/TargetOptions.h"
37 using namespace llvm;
39 #define DEBUG_TYPE "arm-isel"
41 static cl::opt<bool>
42 DisableShifterOp("disable-shifter-op", cl::Hidden,
43 cl::desc("Disable isel of shifter-op"),
44 cl::init(false));
46 //===--------------------------------------------------------------------===//
47 /// ARMDAGToDAGISel - ARM specific code to select ARM machine
48 /// instructions for SelectionDAG operations.
49 ///
50 namespace {
52 class ARMDAGToDAGISel : public SelectionDAGISel {
53 /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
54 /// make the right decision when generating code for different targets.
55 const ARMSubtarget *Subtarget;
57 public:
58 explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOpt::Level OptLevel)
59 : SelectionDAGISel(tm, OptLevel) {}
61 bool runOnMachineFunction(MachineFunction &MF) override {
62 // Reset the subtarget each time through.
63 Subtarget = &MF.getSubtarget<ARMSubtarget>();
64 SelectionDAGISel::runOnMachineFunction(MF);
65 return true;
68 StringRef getPassName() const override { return "ARM Instruction Selection"; }
70 void PreprocessISelDAG() override;
72 /// getI32Imm - Return a target constant of type i32 with the specified
73 /// value.
74 inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
75 return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
78 void Select(SDNode *N) override;
80 bool hasNoVMLxHazardUse(SDNode *N) const;
81 bool isShifterOpProfitable(const SDValue &Shift,
82 ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt);
83 bool SelectRegShifterOperand(SDValue N, SDValue &A,
84 SDValue &B, SDValue &C,
85 bool CheckProfitability = true);
86 bool SelectImmShifterOperand(SDValue N, SDValue &A,
87 SDValue &B, bool CheckProfitability = true);
88 bool SelectShiftRegShifterOperand(SDValue N, SDValue &A,
89 SDValue &B, SDValue &C) {
90 // Don't apply the profitability check
91 return SelectRegShifterOperand(N, A, B, C, false);
93 bool SelectShiftImmShifterOperand(SDValue N, SDValue &A,
94 SDValue &B) {
95 // Don't apply the profitability check
96 return SelectImmShifterOperand(N, A, B, false);
99 bool SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out);
101 bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
102 bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);
104 bool SelectCMOVPred(SDValue N, SDValue &Pred, SDValue &Reg) {
105 const ConstantSDNode *CN = cast<ConstantSDNode>(N);
106 Pred = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(N), MVT::i32);
107 Reg = CurDAG->getRegister(ARM::CPSR, MVT::i32);
108 return true;
111 bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
112 SDValue &Offset, SDValue &Opc);
113 bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
114 SDValue &Offset, SDValue &Opc);
115 bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
116 SDValue &Offset, SDValue &Opc);
117 bool SelectAddrOffsetNone(SDValue N, SDValue &Base);
118 bool SelectAddrMode3(SDValue N, SDValue &Base,
119 SDValue &Offset, SDValue &Opc);
120 bool SelectAddrMode3Offset(SDNode *Op, SDValue N,
121 SDValue &Offset, SDValue &Opc);
122 bool IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset, bool FP16);
123 bool SelectAddrMode5(SDValue N, SDValue &Base, SDValue &Offset);
124 bool SelectAddrMode5FP16(SDValue N, SDValue &Base, SDValue &Offset);
125 bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align);
126 bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset);
128 bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label);
130 // Thumb Addressing Modes:
131 bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset);
132 bool SelectThumbAddrModeRRSext(SDValue N, SDValue &Base, SDValue &Offset);
133 bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base,
134 SDValue &OffImm);
135 bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
136 SDValue &OffImm);
137 bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
138 SDValue &OffImm);
139 bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
140 SDValue &OffImm);
141 bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm);
143 // Thumb 2 Addressing Modes:
144 bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
145 bool SelectT2AddrModeImm8(SDValue N, SDValue &Base,
146 SDValue &OffImm);
147 bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
148 SDValue &OffImm);
149 template<unsigned Shift>
150 bool SelectT2AddrModeImm7(SDValue N, SDValue &Base,
151 SDValue &OffImm);
152 bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base,
153 SDValue &OffReg, SDValue &ShImm);
154 bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm);
156 inline bool is_so_imm(unsigned Imm) const {
157 return ARM_AM::getSOImmVal(Imm) != -1;
160 inline bool is_so_imm_not(unsigned Imm) const {
161 return ARM_AM::getSOImmVal(~Imm) != -1;
164 inline bool is_t2_so_imm(unsigned Imm) const {
165 return ARM_AM::getT2SOImmVal(Imm) != -1;
168 inline bool is_t2_so_imm_not(unsigned Imm) const {
169 return ARM_AM::getT2SOImmVal(~Imm) != -1;
172 // Include the pieces autogenerated from the target description.
173 #include "ARMGenDAGISel.inc"
175 private:
176 void transferMemOperands(SDNode *Src, SDNode *Dst);
178 /// Indexed (pre/post inc/dec) load matching code for ARM.
179 bool tryARMIndexedLoad(SDNode *N);
180 bool tryT1IndexedLoad(SDNode *N);
181 bool tryT2IndexedLoad(SDNode *N);
183 /// SelectVLD - Select NEON load intrinsics. NumVecs should be
184 /// 1, 2, 3 or 4. The opcode arrays specify the instructions used for
185 /// loads of D registers and even subregs and odd subregs of Q registers.
186 /// For NumVecs <= 2, QOpcodes1 is not used.
187 void SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
188 const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
189 const uint16_t *QOpcodes1);
191 /// SelectVST - Select NEON store intrinsics. NumVecs should
192 /// be 1, 2, 3 or 4. The opcode arrays specify the instructions used for
193 /// stores of D registers and even subregs and odd subregs of Q registers.
194 /// For NumVecs <= 2, QOpcodes1 is not used.
195 void SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
196 const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
197 const uint16_t *QOpcodes1);
199 /// SelectVLDSTLane - Select NEON load/store lane intrinsics. NumVecs should
200 /// be 2, 3 or 4. The opcode arrays specify the instructions used for
201 /// load/store of D registers and Q registers.
202 void SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
203 unsigned NumVecs, const uint16_t *DOpcodes,
204 const uint16_t *QOpcodes);
206 /// SelectVLDDup - Select NEON load-duplicate intrinsics. NumVecs
207 /// should be 1, 2, 3 or 4. The opcode array specifies the instructions used
208 /// for loading D registers.
209 void SelectVLDDup(SDNode *N, bool IsIntrinsic, bool isUpdating,
210 unsigned NumVecs, const uint16_t *DOpcodes,
211 const uint16_t *QOpcodes0 = nullptr,
212 const uint16_t *QOpcodes1 = nullptr);
214 /// Try to select SBFX/UBFX instructions for ARM.
215 bool tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned);
217 // Select special operations if node forms integer ABS pattern
218 bool tryABSOp(SDNode *N);
220 bool tryReadRegister(SDNode *N);
221 bool tryWriteRegister(SDNode *N);
223 bool tryInlineAsm(SDNode *N);
225 void SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI);
227 void SelectCMP_SWAP(SDNode *N);
229 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
230 /// inline asm expressions.
231 bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
232 std::vector<SDValue> &OutOps) override;
234 // Form pairs of consecutive R, S, D, or Q registers.
235 SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1);
236 SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1);
237 SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1);
238 SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1);
240 // Form sequences of 4 consecutive S, D, or Q registers.
241 SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
242 SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
243 SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
245 // Get the alignment operand for a NEON VLD or VST instruction.
246 SDValue GetVLDSTAlign(SDValue Align, const SDLoc &dl, unsigned NumVecs,
247 bool is64BitVector);
249 /// Returns the number of instructions required to materialize the given
250 /// constant in a register, or 3 if a literal pool load is needed.
251 unsigned ConstantMaterializationCost(unsigned Val) const;
253 /// Checks if N is a multiplication by a constant where we can extract out a
254 /// power of two from the constant so that it can be used in a shift, but only
255 /// if it simplifies the materialization of the constant. Returns true if it
256 /// is, and assigns to PowerOfTwo the power of two that should be extracted
257 /// out and to NewMulConst the new constant to be multiplied by.
258 bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift,
259 unsigned &PowerOfTwo, SDValue &NewMulConst) const;
261 /// Replace N with M in CurDAG, in a way that also ensures that M gets
262 /// selected when N would have been selected.
263 void replaceDAGValue(const SDValue &N, SDValue M);
267 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant
268 /// operand. If so Imm will receive the 32-bit value.
269 static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
270 if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
271 Imm = cast<ConstantSDNode>(N)->getZExtValue();
272 return true;
274 return false;
277 // isInt32Immediate - This method tests to see if a constant operand.
278 // If so Imm will receive the 32 bit value.
279 static bool isInt32Immediate(SDValue N, unsigned &Imm) {
280 return isInt32Immediate(N.getNode(), Imm);
283 // isOpcWithIntImmediate - This method tests to see if the node is a specific
284 // opcode and that it has a immediate integer right operand.
285 // If so Imm will receive the 32 bit value.
286 static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
287 return N->getOpcode() == Opc &&
288 isInt32Immediate(N->getOperand(1).getNode(), Imm);
291 /// Check whether a particular node is a constant value representable as
292 /// (N * Scale) where (N in [\p RangeMin, \p RangeMax).
294 /// \param ScaledConstant [out] - On success, the pre-scaled constant value.
295 static bool isScaledConstantInRange(SDValue Node, int Scale,
296 int RangeMin, int RangeMax,
297 int &ScaledConstant) {
298 assert(Scale > 0 && "Invalid scale!");
300 // Check that this is a constant.
301 const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node);
302 if (!C)
303 return false;
305 ScaledConstant = (int) C->getZExtValue();
306 if ((ScaledConstant % Scale) != 0)
307 return false;
309 ScaledConstant /= Scale;
310 return ScaledConstant >= RangeMin && ScaledConstant < RangeMax;
313 void ARMDAGToDAGISel::PreprocessISelDAG() {
314 if (!Subtarget->hasV6T2Ops())
315 return;
317 bool isThumb2 = Subtarget->isThumb();
318 for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
319 E = CurDAG->allnodes_end(); I != E; ) {
320 SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues.
322 if (N->getOpcode() != ISD::ADD)
323 continue;
325 // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with
326 // leading zeros, followed by consecutive set bits, followed by 1 or 2
327 // trailing zeros, e.g. 1020.
328 // Transform the expression to
329 // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number
330 // of trailing zeros of c2. The left shift would be folded as an shifter
331 // operand of 'add' and the 'and' and 'srl' would become a bits extraction
332 // node (UBFX).
334 SDValue N0 = N->getOperand(0);
335 SDValue N1 = N->getOperand(1);
336 unsigned And_imm = 0;
337 if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) {
338 if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm))
339 std::swap(N0, N1);
341 if (!And_imm)
342 continue;
344 // Check if the AND mask is an immediate of the form: 000.....1111111100
345 unsigned TZ = countTrailingZeros(And_imm);
346 if (TZ != 1 && TZ != 2)
347 // Be conservative here. Shifter operands aren't always free. e.g. On
348 // Swift, left shifter operand of 1 / 2 for free but others are not.
349 // e.g.
350 // ubfx r3, r1, #16, #8
351 // ldr.w r3, [r0, r3, lsl #2]
352 // vs.
353 // mov.w r9, #1020
354 // and.w r2, r9, r1, lsr #14
355 // ldr r2, [r0, r2]
356 continue;
357 And_imm >>= TZ;
358 if (And_imm & (And_imm + 1))
359 continue;
361 // Look for (and (srl X, c1), c2).
362 SDValue Srl = N1.getOperand(0);
363 unsigned Srl_imm = 0;
364 if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) ||
365 (Srl_imm <= 2))
366 continue;
368 // Make sure first operand is not a shifter operand which would prevent
369 // folding of the left shift.
370 SDValue CPTmp0;
371 SDValue CPTmp1;
372 SDValue CPTmp2;
373 if (isThumb2) {
374 if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1))
375 continue;
376 } else {
377 if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) ||
378 SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2))
379 continue;
382 // Now make the transformation.
383 Srl = CurDAG->getNode(ISD::SRL, SDLoc(Srl), MVT::i32,
384 Srl.getOperand(0),
385 CurDAG->getConstant(Srl_imm + TZ, SDLoc(Srl),
386 MVT::i32));
387 N1 = CurDAG->getNode(ISD::AND, SDLoc(N1), MVT::i32,
388 Srl,
389 CurDAG->getConstant(And_imm, SDLoc(Srl), MVT::i32));
390 N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32,
391 N1, CurDAG->getConstant(TZ, SDLoc(Srl), MVT::i32));
392 CurDAG->UpdateNodeOperands(N, N0, N1);
396 /// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS
397 /// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at
398 /// least on current ARM implementations) which should be avoidded.
399 bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const {
400 if (OptLevel == CodeGenOpt::None)
401 return true;
403 if (!Subtarget->hasVMLxHazards())
404 return true;
406 if (!N->hasOneUse())
407 return false;
409 SDNode *Use = *N->use_begin();
410 if (Use->getOpcode() == ISD::CopyToReg)
411 return true;
412 if (Use->isMachineOpcode()) {
413 const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>(
414 CurDAG->getSubtarget().getInstrInfo());
416 const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode());
417 if (MCID.mayStore())
418 return true;
419 unsigned Opcode = MCID.getOpcode();
420 if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
421 return true;
422 // vmlx feeding into another vmlx. We actually want to unfold
423 // the use later in the MLxExpansion pass. e.g.
424 // vmla
425 // vmla (stall 8 cycles)
427 // vmul (5 cycles)
428 // vadd (5 cycles)
429 // vmla
430 // This adds up to about 18 - 19 cycles.
432 // vmla
433 // vmul (stall 4 cycles)
434 // vadd adds up to about 14 cycles.
435 return TII->isFpMLxInstruction(Opcode);
438 return false;
441 bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
442 ARM_AM::ShiftOpc ShOpcVal,
443 unsigned ShAmt) {
444 if (!Subtarget->isLikeA9() && !Subtarget->isSwift())
445 return true;
446 if (Shift.hasOneUse())
447 return true;
448 // R << 2 is free.
449 return ShOpcVal == ARM_AM::lsl &&
450 (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1));
453 unsigned ARMDAGToDAGISel::ConstantMaterializationCost(unsigned Val) const {
454 if (Subtarget->isThumb()) {
455 if (Val <= 255) return 1; // MOV
456 if (Subtarget->hasV6T2Ops() &&
457 (Val <= 0xffff || // MOV
458 ARM_AM::getT2SOImmVal(Val) != -1 || // MOVW
459 ARM_AM::getT2SOImmVal(~Val) != -1)) // MVN
460 return 1;
461 if (Val <= 510) return 2; // MOV + ADDi8
462 if (~Val <= 255) return 2; // MOV + MVN
463 if (ARM_AM::isThumbImmShiftedVal(Val)) return 2; // MOV + LSL
464 } else {
465 if (ARM_AM::getSOImmVal(Val) != -1) return 1; // MOV
466 if (ARM_AM::getSOImmVal(~Val) != -1) return 1; // MVN
467 if (Subtarget->hasV6T2Ops() && Val <= 0xffff) return 1; // MOVW
468 if (ARM_AM::isSOImmTwoPartVal(Val)) return 2; // two instrs
470 if (Subtarget->useMovt()) return 2; // MOVW + MOVT
471 return 3; // Literal pool load
474 bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N,
475 unsigned MaxShift,
476 unsigned &PowerOfTwo,
477 SDValue &NewMulConst) const {
478 assert(N.getOpcode() == ISD::MUL);
479 assert(MaxShift > 0);
481 // If the multiply is used in more than one place then changing the constant
482 // will make other uses incorrect, so don't.
483 if (!N.hasOneUse()) return false;
484 // Check if the multiply is by a constant
485 ConstantSDNode *MulConst = dyn_cast<ConstantSDNode>(N.getOperand(1));
486 if (!MulConst) return false;
487 // If the constant is used in more than one place then modifying it will mean
488 // we need to materialize two constants instead of one, which is a bad idea.
489 if (!MulConst->hasOneUse()) return false;
490 unsigned MulConstVal = MulConst->getZExtValue();
491 if (MulConstVal == 0) return false;
493 // Find the largest power of 2 that MulConstVal is a multiple of
494 PowerOfTwo = MaxShift;
495 while ((MulConstVal % (1 << PowerOfTwo)) != 0) {
496 --PowerOfTwo;
497 if (PowerOfTwo == 0) return false;
500 // Only optimise if the new cost is better
501 unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo);
502 NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc(N), MVT::i32);
503 unsigned OldCost = ConstantMaterializationCost(MulConstVal);
504 unsigned NewCost = ConstantMaterializationCost(NewMulConstVal);
505 return NewCost < OldCost;
508 void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) {
509 CurDAG->RepositionNode(N.getNode()->getIterator(), M.getNode());
510 ReplaceUses(N, M);
513 bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
514 SDValue &BaseReg,
515 SDValue &Opc,
516 bool CheckProfitability) {
517 if (DisableShifterOp)
518 return false;
520 // If N is a multiply-by-constant and it's profitable to extract a shift and
521 // use it in a shifted operand do so.
522 if (N.getOpcode() == ISD::MUL) {
523 unsigned PowerOfTwo = 0;
524 SDValue NewMulConst;
525 if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) {
526 HandleSDNode Handle(N);
527 SDLoc Loc(N);
528 replaceDAGValue(N.getOperand(1), NewMulConst);
529 BaseReg = Handle.getValue();
530 Opc = CurDAG->getTargetConstant(
531 ARM_AM::getSORegOpc(ARM_AM::lsl, PowerOfTwo), Loc, MVT::i32);
532 return true;
536 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
538 // Don't match base register only case. That is matched to a separate
539 // lower complexity pattern with explicit register operand.
540 if (ShOpcVal == ARM_AM::no_shift) return false;
542 BaseReg = N.getOperand(0);
543 unsigned ShImmVal = 0;
544 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
545 if (!RHS) return false;
546 ShImmVal = RHS->getZExtValue() & 31;
547 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
548 SDLoc(N), MVT::i32);
549 return true;
552 bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N,
553 SDValue &BaseReg,
554 SDValue &ShReg,
555 SDValue &Opc,
556 bool CheckProfitability) {
557 if (DisableShifterOp)
558 return false;
560 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
562 // Don't match base register only case. That is matched to a separate
563 // lower complexity pattern with explicit register operand.
564 if (ShOpcVal == ARM_AM::no_shift) return false;
566 BaseReg = N.getOperand(0);
567 unsigned ShImmVal = 0;
568 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
569 if (RHS) return false;
571 ShReg = N.getOperand(1);
572 if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal))
573 return false;
574 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
575 SDLoc(N), MVT::i32);
576 return true;
579 // Determine whether an ISD::OR's operands are suitable to turn the operation
580 // into an addition, which often has more compact encodings.
581 bool ARMDAGToDAGISel::SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out) {
582 assert(Parent->getOpcode() == ISD::OR && "unexpected parent");
583 Out = N;
584 return CurDAG->haveNoCommonBitsSet(N, Parent->getOperand(1));
588 bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
589 SDValue &Base,
590 SDValue &OffImm) {
591 // Match simple R + imm12 operands.
593 // Base only.
594 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
595 !CurDAG->isBaseWithConstantOffset(N)) {
596 if (N.getOpcode() == ISD::FrameIndex) {
597 // Match frame index.
598 int FI = cast<FrameIndexSDNode>(N)->getIndex();
599 Base = CurDAG->getTargetFrameIndex(
600 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
601 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
602 return true;
605 if (N.getOpcode() == ARMISD::Wrapper &&
606 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
607 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
608 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
609 Base = N.getOperand(0);
610 } else
611 Base = N;
612 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
613 return true;
616 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
617 int RHSC = (int)RHS->getSExtValue();
618 if (N.getOpcode() == ISD::SUB)
619 RHSC = -RHSC;
621 if (RHSC > -0x1000 && RHSC < 0x1000) { // 12 bits
622 Base = N.getOperand(0);
623 if (Base.getOpcode() == ISD::FrameIndex) {
624 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
625 Base = CurDAG->getTargetFrameIndex(
626 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
628 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
629 return true;
633 // Base only.
634 Base = N;
635 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
636 return true;
641 bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
642 SDValue &Opc) {
643 if (N.getOpcode() == ISD::MUL &&
644 ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) {
645 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
646 // X * [3,5,9] -> X + X * [2,4,8] etc.
647 int RHSC = (int)RHS->getZExtValue();
648 if (RHSC & 1) {
649 RHSC = RHSC & ~1;
650 ARM_AM::AddrOpc AddSub = ARM_AM::add;
651 if (RHSC < 0) {
652 AddSub = ARM_AM::sub;
653 RHSC = - RHSC;
655 if (isPowerOf2_32(RHSC)) {
656 unsigned ShAmt = Log2_32(RHSC);
657 Base = Offset = N.getOperand(0);
658 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
659 ARM_AM::lsl),
660 SDLoc(N), MVT::i32);
661 return true;
667 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
668 // ISD::OR that is equivalent to an ISD::ADD.
669 !CurDAG->isBaseWithConstantOffset(N))
670 return false;
672 // Leave simple R +/- imm12 operands for LDRi12
673 if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
674 int RHSC;
675 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
676 -0x1000+1, 0x1000, RHSC)) // 12 bits.
677 return false;
680 // Otherwise this is R +/- [possibly shifted] R.
681 ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add;
682 ARM_AM::ShiftOpc ShOpcVal =
683 ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
684 unsigned ShAmt = 0;
686 Base = N.getOperand(0);
687 Offset = N.getOperand(1);
689 if (ShOpcVal != ARM_AM::no_shift) {
690 // Check to see if the RHS of the shift is a constant, if not, we can't fold
691 // it.
692 if (ConstantSDNode *Sh =
693 dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
694 ShAmt = Sh->getZExtValue();
695 if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
696 Offset = N.getOperand(1).getOperand(0);
697 else {
698 ShAmt = 0;
699 ShOpcVal = ARM_AM::no_shift;
701 } else {
702 ShOpcVal = ARM_AM::no_shift;
706 // Try matching (R shl C) + (R).
707 if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
708 !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
709 N.getOperand(0).hasOneUse())) {
710 ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
711 if (ShOpcVal != ARM_AM::no_shift) {
712 // Check to see if the RHS of the shift is a constant, if not, we can't
713 // fold it.
714 if (ConstantSDNode *Sh =
715 dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
716 ShAmt = Sh->getZExtValue();
717 if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
718 Offset = N.getOperand(0).getOperand(0);
719 Base = N.getOperand(1);
720 } else {
721 ShAmt = 0;
722 ShOpcVal = ARM_AM::no_shift;
724 } else {
725 ShOpcVal = ARM_AM::no_shift;
730 // If Offset is a multiply-by-constant and it's profitable to extract a shift
731 // and use it in a shifted operand do so.
732 if (Offset.getOpcode() == ISD::MUL && N.hasOneUse()) {
733 unsigned PowerOfTwo = 0;
734 SDValue NewMulConst;
735 if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) {
736 HandleSDNode Handle(Offset);
737 replaceDAGValue(Offset.getOperand(1), NewMulConst);
738 Offset = Handle.getValue();
739 ShAmt = PowerOfTwo;
740 ShOpcVal = ARM_AM::lsl;
744 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
745 SDLoc(N), MVT::i32);
746 return true;
749 bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
750 SDValue &Offset, SDValue &Opc) {
751 unsigned Opcode = Op->getOpcode();
752 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
753 ? cast<LoadSDNode>(Op)->getAddressingMode()
754 : cast<StoreSDNode>(Op)->getAddressingMode();
755 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
756 ? ARM_AM::add : ARM_AM::sub;
757 int Val;
758 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val))
759 return false;
761 Offset = N;
762 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
763 unsigned ShAmt = 0;
764 if (ShOpcVal != ARM_AM::no_shift) {
765 // Check to see if the RHS of the shift is a constant, if not, we can't fold
766 // it.
767 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
768 ShAmt = Sh->getZExtValue();
769 if (isShifterOpProfitable(N, ShOpcVal, ShAmt))
770 Offset = N.getOperand(0);
771 else {
772 ShAmt = 0;
773 ShOpcVal = ARM_AM::no_shift;
775 } else {
776 ShOpcVal = ARM_AM::no_shift;
780 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
781 SDLoc(N), MVT::i32);
782 return true;
785 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
786 SDValue &Offset, SDValue &Opc) {
787 unsigned Opcode = Op->getOpcode();
788 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
789 ? cast<LoadSDNode>(Op)->getAddressingMode()
790 : cast<StoreSDNode>(Op)->getAddressingMode();
791 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
792 ? ARM_AM::add : ARM_AM::sub;
793 int Val;
794 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
795 if (AddSub == ARM_AM::sub) Val *= -1;
796 Offset = CurDAG->getRegister(0, MVT::i32);
797 Opc = CurDAG->getTargetConstant(Val, SDLoc(Op), MVT::i32);
798 return true;
801 return false;
805 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
806 SDValue &Offset, SDValue &Opc) {
807 unsigned Opcode = Op->getOpcode();
808 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
809 ? cast<LoadSDNode>(Op)->getAddressingMode()
810 : cast<StoreSDNode>(Op)->getAddressingMode();
811 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
812 ? ARM_AM::add : ARM_AM::sub;
813 int Val;
814 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
815 Offset = CurDAG->getRegister(0, MVT::i32);
816 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val,
817 ARM_AM::no_shift),
818 SDLoc(Op), MVT::i32);
819 return true;
822 return false;
825 bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) {
826 Base = N;
827 return true;
830 bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
831 SDValue &Base, SDValue &Offset,
832 SDValue &Opc) {
833 if (N.getOpcode() == ISD::SUB) {
834 // X - C is canonicalize to X + -C, no need to handle it here.
835 Base = N.getOperand(0);
836 Offset = N.getOperand(1);
837 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0), SDLoc(N),
838 MVT::i32);
839 return true;
842 if (!CurDAG->isBaseWithConstantOffset(N)) {
843 Base = N;
844 if (N.getOpcode() == ISD::FrameIndex) {
845 int FI = cast<FrameIndexSDNode>(N)->getIndex();
846 Base = CurDAG->getTargetFrameIndex(
847 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
849 Offset = CurDAG->getRegister(0, MVT::i32);
850 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
851 MVT::i32);
852 return true;
855 // If the RHS is +/- imm8, fold into addr mode.
856 int RHSC;
857 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
858 -256 + 1, 256, RHSC)) { // 8 bits.
859 Base = N.getOperand(0);
860 if (Base.getOpcode() == ISD::FrameIndex) {
861 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
862 Base = CurDAG->getTargetFrameIndex(
863 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
865 Offset = CurDAG->getRegister(0, MVT::i32);
867 ARM_AM::AddrOpc AddSub = ARM_AM::add;
868 if (RHSC < 0) {
869 AddSub = ARM_AM::sub;
870 RHSC = -RHSC;
872 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC), SDLoc(N),
873 MVT::i32);
874 return true;
877 Base = N.getOperand(0);
878 Offset = N.getOperand(1);
879 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
880 MVT::i32);
881 return true;
884 bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N,
885 SDValue &Offset, SDValue &Opc) {
886 unsigned Opcode = Op->getOpcode();
887 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
888 ? cast<LoadSDNode>(Op)->getAddressingMode()
889 : cast<StoreSDNode>(Op)->getAddressingMode();
890 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
891 ? ARM_AM::add : ARM_AM::sub;
892 int Val;
893 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits.
894 Offset = CurDAG->getRegister(0, MVT::i32);
895 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), SDLoc(Op),
896 MVT::i32);
897 return true;
900 Offset = N;
901 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), SDLoc(Op),
902 MVT::i32);
903 return true;
906 bool ARMDAGToDAGISel::IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset,
907 bool FP16) {
908 if (!CurDAG->isBaseWithConstantOffset(N)) {
909 Base = N;
910 if (N.getOpcode() == ISD::FrameIndex) {
911 int FI = cast<FrameIndexSDNode>(N)->getIndex();
912 Base = CurDAG->getTargetFrameIndex(
913 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
914 } else if (N.getOpcode() == ARMISD::Wrapper &&
915 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
916 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
917 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
918 Base = N.getOperand(0);
920 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
921 SDLoc(N), MVT::i32);
922 return true;
925 // If the RHS is +/- imm8, fold into addr mode.
926 int RHSC;
927 const int Scale = FP16 ? 2 : 4;
929 if (isScaledConstantInRange(N.getOperand(1), Scale, -255, 256, RHSC)) {
930 Base = N.getOperand(0);
931 if (Base.getOpcode() == ISD::FrameIndex) {
932 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
933 Base = CurDAG->getTargetFrameIndex(
934 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
937 ARM_AM::AddrOpc AddSub = ARM_AM::add;
938 if (RHSC < 0) {
939 AddSub = ARM_AM::sub;
940 RHSC = -RHSC;
943 if (FP16)
944 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(AddSub, RHSC),
945 SDLoc(N), MVT::i32);
946 else
947 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC),
948 SDLoc(N), MVT::i32);
950 return true;
953 Base = N;
955 if (FP16)
956 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(ARM_AM::add, 0),
957 SDLoc(N), MVT::i32);
958 else
959 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
960 SDLoc(N), MVT::i32);
962 return true;
965 bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
966 SDValue &Base, SDValue &Offset) {
967 return IsAddressingMode5(N, Base, Offset, /*FP16=*/ false);
970 bool ARMDAGToDAGISel::SelectAddrMode5FP16(SDValue N,
971 SDValue &Base, SDValue &Offset) {
972 return IsAddressingMode5(N, Base, Offset, /*FP16=*/ true);
975 bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
976 SDValue &Align) {
977 Addr = N;
979 unsigned Alignment = 0;
981 MemSDNode *MemN = cast<MemSDNode>(Parent);
983 if (isa<LSBaseSDNode>(MemN) ||
984 ((MemN->getOpcode() == ARMISD::VST1_UPD ||
985 MemN->getOpcode() == ARMISD::VLD1_UPD) &&
986 MemN->getConstantOperandVal(MemN->getNumOperands() - 1) == 1)) {
987 // This case occurs only for VLD1-lane/dup and VST1-lane instructions.
988 // The maximum alignment is equal to the memory size being referenced.
989 unsigned MMOAlign = MemN->getAlignment();
990 unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8;
991 if (MMOAlign >= MemSize && MemSize > 1)
992 Alignment = MemSize;
993 } else {
994 // All other uses of addrmode6 are for intrinsics. For now just record
995 // the raw alignment value; it will be refined later based on the legal
996 // alignment operands for the intrinsic.
997 Alignment = MemN->getAlignment();
1000 Align = CurDAG->getTargetConstant(Alignment, SDLoc(N), MVT::i32);
1001 return true;
1004 bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N,
1005 SDValue &Offset) {
1006 LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op);
1007 ISD::MemIndexedMode AM = LdSt->getAddressingMode();
1008 if (AM != ISD::POST_INC)
1009 return false;
1010 Offset = N;
1011 if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) {
1012 if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits())
1013 Offset = CurDAG->getRegister(0, MVT::i32);
1015 return true;
1018 bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N,
1019 SDValue &Offset, SDValue &Label) {
1020 if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
1021 Offset = N.getOperand(0);
1022 SDValue N1 = N.getOperand(1);
1023 Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
1024 SDLoc(N), MVT::i32);
1025 return true;
1028 return false;
1032 //===----------------------------------------------------------------------===//
1033 // Thumb Addressing Modes
1034 //===----------------------------------------------------------------------===//
1036 static bool shouldUseZeroOffsetLdSt(SDValue N) {
1037 // Negative numbers are difficult to materialise in thumb1. If we are
1038 // selecting the add of a negative, instead try to select ri with a zero
1039 // offset, so create the add node directly which will become a sub.
1040 if (N.getOpcode() != ISD::ADD)
1041 return false;
1043 // Look for an imm which is not legal for ld/st, but is legal for sub.
1044 if (auto C = dyn_cast<ConstantSDNode>(N.getOperand(1)))
1045 return C->getSExtValue() < 0 && C->getSExtValue() >= -255;
1047 return false;
1050 bool ARMDAGToDAGISel::SelectThumbAddrModeRRSext(SDValue N, SDValue &Base,
1051 SDValue &Offset) {
1052 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) {
1053 ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N);
1054 if (!NC || !NC->isNullValue())
1055 return false;
1057 Base = Offset = N;
1058 return true;
1061 Base = N.getOperand(0);
1062 Offset = N.getOperand(1);
1063 return true;
1066 bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N, SDValue &Base,
1067 SDValue &Offset) {
1068 if (shouldUseZeroOffsetLdSt(N))
1069 return false; // Select ri instead
1070 return SelectThumbAddrModeRRSext(N, Base, Offset);
1073 bool
1074 ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
1075 SDValue &Base, SDValue &OffImm) {
1076 if (shouldUseZeroOffsetLdSt(N)) {
1077 Base = N;
1078 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1079 return true;
1082 if (!CurDAG->isBaseWithConstantOffset(N)) {
1083 if (N.getOpcode() == ISD::ADD) {
1084 return false; // We want to select register offset instead
1085 } else if (N.getOpcode() == ARMISD::Wrapper &&
1086 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1087 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1088 N.getOperand(0).getOpcode() != ISD::TargetConstantPool &&
1089 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1090 Base = N.getOperand(0);
1091 } else {
1092 Base = N;
1095 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1096 return true;
1099 // If the RHS is + imm5 * scale, fold into addr mode.
1100 int RHSC;
1101 if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) {
1102 Base = N.getOperand(0);
1103 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1104 return true;
1107 // Offset is too large, so use register offset instead.
1108 return false;
1111 bool
1112 ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
1113 SDValue &OffImm) {
1114 return SelectThumbAddrModeImm5S(N, 4, Base, OffImm);
1117 bool
1118 ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
1119 SDValue &OffImm) {
1120 return SelectThumbAddrModeImm5S(N, 2, Base, OffImm);
1123 bool
1124 ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
1125 SDValue &OffImm) {
1126 return SelectThumbAddrModeImm5S(N, 1, Base, OffImm);
1129 bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
1130 SDValue &Base, SDValue &OffImm) {
1131 if (N.getOpcode() == ISD::FrameIndex) {
1132 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1133 // Only multiples of 4 are allowed for the offset, so the frame object
1134 // alignment must be at least 4.
1135 MachineFrameInfo &MFI = MF->getFrameInfo();
1136 if (MFI.getObjectAlignment(FI) < 4)
1137 MFI.setObjectAlignment(FI, 4);
1138 Base = CurDAG->getTargetFrameIndex(
1139 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1140 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1141 return true;
1144 if (!CurDAG->isBaseWithConstantOffset(N))
1145 return false;
1147 if (N.getOperand(0).getOpcode() == ISD::FrameIndex) {
1148 // If the RHS is + imm8 * scale, fold into addr mode.
1149 int RHSC;
1150 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) {
1151 Base = N.getOperand(0);
1152 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1153 // Make sure the offset is inside the object, or we might fail to
1154 // allocate an emergency spill slot. (An out-of-range access is UB, but
1155 // it could show up anyway.)
1156 MachineFrameInfo &MFI = MF->getFrameInfo();
1157 if (RHSC * 4 < MFI.getObjectSize(FI)) {
1158 // For LHS+RHS to result in an offset that's a multiple of 4 the object
1159 // indexed by the LHS must be 4-byte aligned.
1160 if (!MFI.isFixedObjectIndex(FI) && MFI.getObjectAlignment(FI) < 4)
1161 MFI.setObjectAlignment(FI, 4);
1162 if (MFI.getObjectAlignment(FI) >= 4) {
1163 Base = CurDAG->getTargetFrameIndex(
1164 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1165 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1166 return true;
1172 return false;
1176 //===----------------------------------------------------------------------===//
1177 // Thumb 2 Addressing Modes
1178 //===----------------------------------------------------------------------===//
1181 bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
1182 SDValue &Base, SDValue &OffImm) {
1183 // Match simple R + imm12 operands.
1185 // Base only.
1186 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1187 !CurDAG->isBaseWithConstantOffset(N)) {
1188 if (N.getOpcode() == ISD::FrameIndex) {
1189 // Match frame index.
1190 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1191 Base = CurDAG->getTargetFrameIndex(
1192 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1193 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1194 return true;
1197 if (N.getOpcode() == ARMISD::Wrapper &&
1198 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1199 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1200 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1201 Base = N.getOperand(0);
1202 if (Base.getOpcode() == ISD::TargetConstantPool)
1203 return false; // We want to select t2LDRpci instead.
1204 } else
1205 Base = N;
1206 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1207 return true;
1210 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1211 if (SelectT2AddrModeImm8(N, Base, OffImm))
1212 // Let t2LDRi8 handle (R - imm8).
1213 return false;
1215 int RHSC = (int)RHS->getZExtValue();
1216 if (N.getOpcode() == ISD::SUB)
1217 RHSC = -RHSC;
1219 if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned)
1220 Base = N.getOperand(0);
1221 if (Base.getOpcode() == ISD::FrameIndex) {
1222 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1223 Base = CurDAG->getTargetFrameIndex(
1224 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1226 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1227 return true;
1231 // Base only.
1232 Base = N;
1233 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1234 return true;
1237 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
1238 SDValue &Base, SDValue &OffImm) {
1239 // Match simple R - imm8 operands.
1240 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1241 !CurDAG->isBaseWithConstantOffset(N))
1242 return false;
1244 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1245 int RHSC = (int)RHS->getSExtValue();
1246 if (N.getOpcode() == ISD::SUB)
1247 RHSC = -RHSC;
1249 if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative)
1250 Base = N.getOperand(0);
1251 if (Base.getOpcode() == ISD::FrameIndex) {
1252 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1253 Base = CurDAG->getTargetFrameIndex(
1254 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1256 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1257 return true;
1261 return false;
1264 bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
1265 SDValue &OffImm){
1266 unsigned Opcode = Op->getOpcode();
1267 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
1268 ? cast<LoadSDNode>(Op)->getAddressingMode()
1269 : cast<StoreSDNode>(Op)->getAddressingMode();
1270 int RHSC;
1271 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits.
1272 OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
1273 ? CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32)
1274 : CurDAG->getTargetConstant(-RHSC, SDLoc(N), MVT::i32);
1275 return true;
1278 return false;
1281 template<unsigned Shift>
1282 bool ARMDAGToDAGISel::SelectT2AddrModeImm7(SDValue N,
1283 SDValue &Base, SDValue &OffImm) {
1284 if (N.getOpcode() == ISD::SUB ||
1285 CurDAG->isBaseWithConstantOffset(N)) {
1286 if (auto RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1287 int RHSC = (int)RHS->getZExtValue();
1288 if (N.getOpcode() == ISD::SUB)
1289 RHSC = -RHSC;
1291 if (isShiftedInt<7, Shift>(RHSC)) {
1292 Base = N.getOperand(0);
1293 if (Base.getOpcode() == ISD::FrameIndex) {
1294 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1295 Base = CurDAG->getTargetFrameIndex(
1296 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1298 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1299 return true;
1304 // Base only.
1305 Base = N;
1306 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1307 return true;
1310 bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
1311 SDValue &Base,
1312 SDValue &OffReg, SDValue &ShImm) {
1313 // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12.
1314 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N))
1315 return false;
1317 // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8.
1318 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1319 int RHSC = (int)RHS->getZExtValue();
1320 if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned)
1321 return false;
1322 else if (RHSC < 0 && RHSC >= -255) // 8 bits
1323 return false;
1326 // Look for (R + R) or (R + (R << [1,2,3])).
1327 unsigned ShAmt = 0;
1328 Base = N.getOperand(0);
1329 OffReg = N.getOperand(1);
1331 // Swap if it is ((R << c) + R).
1332 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg.getOpcode());
1333 if (ShOpcVal != ARM_AM::lsl) {
1334 ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode());
1335 if (ShOpcVal == ARM_AM::lsl)
1336 std::swap(Base, OffReg);
1339 if (ShOpcVal == ARM_AM::lsl) {
1340 // Check to see if the RHS of the shift is a constant, if not, we can't fold
1341 // it.
1342 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) {
1343 ShAmt = Sh->getZExtValue();
1344 if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt))
1345 OffReg = OffReg.getOperand(0);
1346 else {
1347 ShAmt = 0;
1352 // If OffReg is a multiply-by-constant and it's profitable to extract a shift
1353 // and use it in a shifted operand do so.
1354 if (OffReg.getOpcode() == ISD::MUL && N.hasOneUse()) {
1355 unsigned PowerOfTwo = 0;
1356 SDValue NewMulConst;
1357 if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) {
1358 HandleSDNode Handle(OffReg);
1359 replaceDAGValue(OffReg.getOperand(1), NewMulConst);
1360 OffReg = Handle.getValue();
1361 ShAmt = PowerOfTwo;
1365 ShImm = CurDAG->getTargetConstant(ShAmt, SDLoc(N), MVT::i32);
1367 return true;
1370 bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base,
1371 SDValue &OffImm) {
1372 // This *must* succeed since it's used for the irreplaceable ldrex and strex
1373 // instructions.
1374 Base = N;
1375 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1377 if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N))
1378 return true;
1380 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
1381 if (!RHS)
1382 return true;
1384 uint32_t RHSC = (int)RHS->getZExtValue();
1385 if (RHSC > 1020 || RHSC % 4 != 0)
1386 return true;
1388 Base = N.getOperand(0);
1389 if (Base.getOpcode() == ISD::FrameIndex) {
1390 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1391 Base = CurDAG->getTargetFrameIndex(
1392 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1395 OffImm = CurDAG->getTargetConstant(RHSC/4, SDLoc(N), MVT::i32);
1396 return true;
1399 //===--------------------------------------------------------------------===//
1401 /// getAL - Returns a ARMCC::AL immediate node.
1402 static inline SDValue getAL(SelectionDAG *CurDAG, const SDLoc &dl) {
1403 return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32);
1406 void ARMDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
1407 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
1408 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
1411 bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) {
1412 LoadSDNode *LD = cast<LoadSDNode>(N);
1413 ISD::MemIndexedMode AM = LD->getAddressingMode();
1414 if (AM == ISD::UNINDEXED)
1415 return false;
1417 EVT LoadedVT = LD->getMemoryVT();
1418 SDValue Offset, AMOpc;
1419 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1420 unsigned Opcode = 0;
1421 bool Match = false;
1422 if (LoadedVT == MVT::i32 && isPre &&
1423 SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1424 Opcode = ARM::LDR_PRE_IMM;
1425 Match = true;
1426 } else if (LoadedVT == MVT::i32 && !isPre &&
1427 SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1428 Opcode = ARM::LDR_POST_IMM;
1429 Match = true;
1430 } else if (LoadedVT == MVT::i32 &&
1431 SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1432 Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG;
1433 Match = true;
1435 } else if (LoadedVT == MVT::i16 &&
1436 SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1437 Match = true;
1438 Opcode = (LD->getExtensionType() == ISD::SEXTLOAD)
1439 ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST)
1440 : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST);
1441 } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) {
1442 if (LD->getExtensionType() == ISD::SEXTLOAD) {
1443 if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1444 Match = true;
1445 Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST;
1447 } else {
1448 if (isPre &&
1449 SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1450 Match = true;
1451 Opcode = ARM::LDRB_PRE_IMM;
1452 } else if (!isPre &&
1453 SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1454 Match = true;
1455 Opcode = ARM::LDRB_POST_IMM;
1456 } else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1457 Match = true;
1458 Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG;
1463 if (Match) {
1464 if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) {
1465 SDValue Chain = LD->getChain();
1466 SDValue Base = LD->getBasePtr();
1467 SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, SDLoc(N)),
1468 CurDAG->getRegister(0, MVT::i32), Chain };
1469 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1470 MVT::Other, Ops);
1471 transferMemOperands(N, New);
1472 ReplaceNode(N, New);
1473 return true;
1474 } else {
1475 SDValue Chain = LD->getChain();
1476 SDValue Base = LD->getBasePtr();
1477 SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, SDLoc(N)),
1478 CurDAG->getRegister(0, MVT::i32), Chain };
1479 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1480 MVT::Other, Ops);
1481 transferMemOperands(N, New);
1482 ReplaceNode(N, New);
1483 return true;
1487 return false;
1490 bool ARMDAGToDAGISel::tryT1IndexedLoad(SDNode *N) {
1491 LoadSDNode *LD = cast<LoadSDNode>(N);
1492 EVT LoadedVT = LD->getMemoryVT();
1493 ISD::MemIndexedMode AM = LD->getAddressingMode();
1494 if (AM != ISD::POST_INC || LD->getExtensionType() != ISD::NON_EXTLOAD ||
1495 LoadedVT.getSimpleVT().SimpleTy != MVT::i32)
1496 return false;
1498 auto *COffs = dyn_cast<ConstantSDNode>(LD->getOffset());
1499 if (!COffs || COffs->getZExtValue() != 4)
1500 return false;
1502 // A T1 post-indexed load is just a single register LDM: LDM r0!, {r1}.
1503 // The encoding of LDM is not how the rest of ISel expects a post-inc load to
1504 // look however, so we use a pseudo here and switch it for a tLDMIA_UPD after
1505 // ISel.
1506 SDValue Chain = LD->getChain();
1507 SDValue Base = LD->getBasePtr();
1508 SDValue Ops[]= { Base, getAL(CurDAG, SDLoc(N)),
1509 CurDAG->getRegister(0, MVT::i32), Chain };
1510 SDNode *New = CurDAG->getMachineNode(ARM::tLDR_postidx, SDLoc(N), MVT::i32,
1511 MVT::i32, MVT::Other, Ops);
1512 transferMemOperands(N, New);
1513 ReplaceNode(N, New);
1514 return true;
1517 bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) {
1518 LoadSDNode *LD = cast<LoadSDNode>(N);
1519 ISD::MemIndexedMode AM = LD->getAddressingMode();
1520 if (AM == ISD::UNINDEXED)
1521 return false;
1523 EVT LoadedVT = LD->getMemoryVT();
1524 bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1525 SDValue Offset;
1526 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1527 unsigned Opcode = 0;
1528 bool Match = false;
1529 if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) {
1530 switch (LoadedVT.getSimpleVT().SimpleTy) {
1531 case MVT::i32:
1532 Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST;
1533 break;
1534 case MVT::i16:
1535 if (isSExtLd)
1536 Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST;
1537 else
1538 Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST;
1539 break;
1540 case MVT::i8:
1541 case MVT::i1:
1542 if (isSExtLd)
1543 Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST;
1544 else
1545 Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST;
1546 break;
1547 default:
1548 return false;
1550 Match = true;
1553 if (Match) {
1554 SDValue Chain = LD->getChain();
1555 SDValue Base = LD->getBasePtr();
1556 SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)),
1557 CurDAG->getRegister(0, MVT::i32), Chain };
1558 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1559 MVT::Other, Ops);
1560 transferMemOperands(N, New);
1561 ReplaceNode(N, New);
1562 return true;
1565 return false;
1568 /// Form a GPRPair pseudo register from a pair of GPR regs.
1569 SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) {
1570 SDLoc dl(V0.getNode());
1571 SDValue RegClass =
1572 CurDAG->getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
1573 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
1574 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
1575 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1576 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1579 /// Form a D register from a pair of S registers.
1580 SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1581 SDLoc dl(V0.getNode());
1582 SDValue RegClass =
1583 CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, dl, MVT::i32);
1584 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1585 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1586 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1587 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1590 /// Form a quad register from a pair of D registers.
1591 SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1592 SDLoc dl(V0.getNode());
1593 SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, dl,
1594 MVT::i32);
1595 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1596 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1597 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1598 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1601 /// Form 4 consecutive D registers from a pair of Q registers.
1602 SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1603 SDLoc dl(V0.getNode());
1604 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1605 MVT::i32);
1606 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1607 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1608 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1609 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1612 /// Form 4 consecutive S registers.
1613 SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1,
1614 SDValue V2, SDValue V3) {
1615 SDLoc dl(V0.getNode());
1616 SDValue RegClass =
1617 CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, dl, MVT::i32);
1618 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1619 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1620 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, dl, MVT::i32);
1621 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, dl, MVT::i32);
1622 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1623 V2, SubReg2, V3, SubReg3 };
1624 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1627 /// Form 4 consecutive D registers.
1628 SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1,
1629 SDValue V2, SDValue V3) {
1630 SDLoc dl(V0.getNode());
1631 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1632 MVT::i32);
1633 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1634 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1635 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, dl, MVT::i32);
1636 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, dl, MVT::i32);
1637 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1638 V2, SubReg2, V3, SubReg3 };
1639 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1642 /// Form 4 consecutive Q registers.
1643 SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1,
1644 SDValue V2, SDValue V3) {
1645 SDLoc dl(V0.getNode());
1646 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, dl,
1647 MVT::i32);
1648 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1649 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1650 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, dl, MVT::i32);
1651 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, dl, MVT::i32);
1652 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1653 V2, SubReg2, V3, SubReg3 };
1654 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1657 /// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
1658 /// of a NEON VLD or VST instruction. The supported values depend on the
1659 /// number of registers being loaded.
1660 SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, const SDLoc &dl,
1661 unsigned NumVecs, bool is64BitVector) {
1662 unsigned NumRegs = NumVecs;
1663 if (!is64BitVector && NumVecs < 3)
1664 NumRegs *= 2;
1666 unsigned Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
1667 if (Alignment >= 32 && NumRegs == 4)
1668 Alignment = 32;
1669 else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4))
1670 Alignment = 16;
1671 else if (Alignment >= 8)
1672 Alignment = 8;
1673 else
1674 Alignment = 0;
1676 return CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
1679 static bool isVLDfixed(unsigned Opc)
1681 switch (Opc) {
1682 default: return false;
1683 case ARM::VLD1d8wb_fixed : return true;
1684 case ARM::VLD1d16wb_fixed : return true;
1685 case ARM::VLD1d64Qwb_fixed : return true;
1686 case ARM::VLD1d32wb_fixed : return true;
1687 case ARM::VLD1d64wb_fixed : return true;
1688 case ARM::VLD1d64TPseudoWB_fixed : return true;
1689 case ARM::VLD1d64QPseudoWB_fixed : return true;
1690 case ARM::VLD1q8wb_fixed : return true;
1691 case ARM::VLD1q16wb_fixed : return true;
1692 case ARM::VLD1q32wb_fixed : return true;
1693 case ARM::VLD1q64wb_fixed : return true;
1694 case ARM::VLD1DUPd8wb_fixed : return true;
1695 case ARM::VLD1DUPd16wb_fixed : return true;
1696 case ARM::VLD1DUPd32wb_fixed : return true;
1697 case ARM::VLD1DUPq8wb_fixed : return true;
1698 case ARM::VLD1DUPq16wb_fixed : return true;
1699 case ARM::VLD1DUPq32wb_fixed : return true;
1700 case ARM::VLD2d8wb_fixed : return true;
1701 case ARM::VLD2d16wb_fixed : return true;
1702 case ARM::VLD2d32wb_fixed : return true;
1703 case ARM::VLD2q8PseudoWB_fixed : return true;
1704 case ARM::VLD2q16PseudoWB_fixed : return true;
1705 case ARM::VLD2q32PseudoWB_fixed : return true;
1706 case ARM::VLD2DUPd8wb_fixed : return true;
1707 case ARM::VLD2DUPd16wb_fixed : return true;
1708 case ARM::VLD2DUPd32wb_fixed : return true;
1712 static bool isVSTfixed(unsigned Opc)
1714 switch (Opc) {
1715 default: return false;
1716 case ARM::VST1d8wb_fixed : return true;
1717 case ARM::VST1d16wb_fixed : return true;
1718 case ARM::VST1d32wb_fixed : return true;
1719 case ARM::VST1d64wb_fixed : return true;
1720 case ARM::VST1q8wb_fixed : return true;
1721 case ARM::VST1q16wb_fixed : return true;
1722 case ARM::VST1q32wb_fixed : return true;
1723 case ARM::VST1q64wb_fixed : return true;
1724 case ARM::VST1d64TPseudoWB_fixed : return true;
1725 case ARM::VST1d64QPseudoWB_fixed : return true;
1726 case ARM::VST2d8wb_fixed : return true;
1727 case ARM::VST2d16wb_fixed : return true;
1728 case ARM::VST2d32wb_fixed : return true;
1729 case ARM::VST2q8PseudoWB_fixed : return true;
1730 case ARM::VST2q16PseudoWB_fixed : return true;
1731 case ARM::VST2q32PseudoWB_fixed : return true;
1735 // Get the register stride update opcode of a VLD/VST instruction that
1736 // is otherwise equivalent to the given fixed stride updating instruction.
1737 static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
1738 assert((isVLDfixed(Opc) || isVSTfixed(Opc))
1739 && "Incorrect fixed stride updating instruction.");
1740 switch (Opc) {
1741 default: break;
1742 case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register;
1743 case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register;
1744 case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register;
1745 case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register;
1746 case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register;
1747 case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register;
1748 case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register;
1749 case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register;
1750 case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register;
1751 case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register;
1752 case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register;
1753 case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register;
1754 case ARM::VLD1DUPd8wb_fixed : return ARM::VLD1DUPd8wb_register;
1755 case ARM::VLD1DUPd16wb_fixed : return ARM::VLD1DUPd16wb_register;
1756 case ARM::VLD1DUPd32wb_fixed : return ARM::VLD1DUPd32wb_register;
1757 case ARM::VLD1DUPq8wb_fixed : return ARM::VLD1DUPq8wb_register;
1758 case ARM::VLD1DUPq16wb_fixed : return ARM::VLD1DUPq16wb_register;
1759 case ARM::VLD1DUPq32wb_fixed : return ARM::VLD1DUPq32wb_register;
1761 case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register;
1762 case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register;
1763 case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register;
1764 case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register;
1765 case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register;
1766 case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register;
1767 case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register;
1768 case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register;
1769 case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register;
1770 case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register;
1772 case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register;
1773 case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register;
1774 case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register;
1775 case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register;
1776 case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register;
1777 case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register;
1779 case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register;
1780 case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register;
1781 case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register;
1782 case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register;
1783 case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register;
1784 case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register;
1786 case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register;
1787 case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register;
1788 case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register;
1790 return Opc; // If not one we handle, return it unchanged.
1793 /// Returns true if the given increment is a Constant known to be equal to the
1794 /// access size performed by a NEON load/store. This means the "[rN]!" form can
1795 /// be used.
1796 static bool isPerfectIncrement(SDValue Inc, EVT VecTy, unsigned NumVecs) {
1797 auto C = dyn_cast<ConstantSDNode>(Inc);
1798 return C && C->getZExtValue() == VecTy.getSizeInBits() / 8 * NumVecs;
1801 void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
1802 const uint16_t *DOpcodes,
1803 const uint16_t *QOpcodes0,
1804 const uint16_t *QOpcodes1) {
1805 assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
1806 SDLoc dl(N);
1808 SDValue MemAddr, Align;
1809 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
1810 // nodes are not intrinsics.
1811 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
1812 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
1813 return;
1815 SDValue Chain = N->getOperand(0);
1816 EVT VT = N->getValueType(0);
1817 bool is64BitVector = VT.is64BitVector();
1818 Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
1820 unsigned OpcodeIndex;
1821 switch (VT.getSimpleVT().SimpleTy) {
1822 default: llvm_unreachable("unhandled vld type");
1823 // Double-register operations:
1824 case MVT::v8i8: OpcodeIndex = 0; break;
1825 case MVT::v4f16:
1826 case MVT::v4i16: OpcodeIndex = 1; break;
1827 case MVT::v2f32:
1828 case MVT::v2i32: OpcodeIndex = 2; break;
1829 case MVT::v1i64: OpcodeIndex = 3; break;
1830 // Quad-register operations:
1831 case MVT::v16i8: OpcodeIndex = 0; break;
1832 case MVT::v8f16:
1833 case MVT::v8i16: OpcodeIndex = 1; break;
1834 case MVT::v4f32:
1835 case MVT::v4i32: OpcodeIndex = 2; break;
1836 case MVT::v2f64:
1837 case MVT::v2i64: OpcodeIndex = 3; break;
1840 EVT ResTy;
1841 if (NumVecs == 1)
1842 ResTy = VT;
1843 else {
1844 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
1845 if (!is64BitVector)
1846 ResTyElts *= 2;
1847 ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
1849 std::vector<EVT> ResTys;
1850 ResTys.push_back(ResTy);
1851 if (isUpdating)
1852 ResTys.push_back(MVT::i32);
1853 ResTys.push_back(MVT::Other);
1855 SDValue Pred = getAL(CurDAG, dl);
1856 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
1857 SDNode *VLd;
1858 SmallVector<SDValue, 7> Ops;
1860 // Double registers and VLD1/VLD2 quad registers are directly supported.
1861 if (is64BitVector || NumVecs <= 2) {
1862 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
1863 QOpcodes0[OpcodeIndex]);
1864 Ops.push_back(MemAddr);
1865 Ops.push_back(Align);
1866 if (isUpdating) {
1867 SDValue Inc = N->getOperand(AddrOpIdx + 1);
1868 bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
1869 if (!IsImmUpdate) {
1870 // We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
1871 // check for the opcode rather than the number of vector elements.
1872 if (isVLDfixed(Opc))
1873 Opc = getVLDSTRegisterUpdateOpcode(Opc);
1874 Ops.push_back(Inc);
1875 // VLD1/VLD2 fixed increment does not need Reg0 so only include it in
1876 // the operands if not such an opcode.
1877 } else if (!isVLDfixed(Opc))
1878 Ops.push_back(Reg0);
1880 Ops.push_back(Pred);
1881 Ops.push_back(Reg0);
1882 Ops.push_back(Chain);
1883 VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1885 } else {
1886 // Otherwise, quad registers are loaded with two separate instructions,
1887 // where one loads the even registers and the other loads the odd registers.
1888 EVT AddrTy = MemAddr.getValueType();
1890 // Load the even subregs. This is always an updating load, so that it
1891 // provides the address to the second load for the odd subregs.
1892 SDValue ImplDef =
1893 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
1894 const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
1895 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
1896 ResTy, AddrTy, MVT::Other, OpsA);
1897 Chain = SDValue(VLdA, 2);
1899 // Load the odd subregs.
1900 Ops.push_back(SDValue(VLdA, 1));
1901 Ops.push_back(Align);
1902 if (isUpdating) {
1903 SDValue Inc = N->getOperand(AddrOpIdx + 1);
1904 assert(isa<ConstantSDNode>(Inc.getNode()) &&
1905 "only constant post-increment update allowed for VLD3/4");
1906 (void)Inc;
1907 Ops.push_back(Reg0);
1909 Ops.push_back(SDValue(VLdA, 0));
1910 Ops.push_back(Pred);
1911 Ops.push_back(Reg0);
1912 Ops.push_back(Chain);
1913 VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops);
1916 // Transfer memoperands.
1917 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1918 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLd), {MemOp});
1920 if (NumVecs == 1) {
1921 ReplaceNode(N, VLd);
1922 return;
1925 // Extract out the subregisters.
1926 SDValue SuperReg = SDValue(VLd, 0);
1927 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
1928 ARM::qsub_3 == ARM::qsub_0 + 3,
1929 "Unexpected subreg numbering");
1930 unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0);
1931 for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
1932 ReplaceUses(SDValue(N, Vec),
1933 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
1934 ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
1935 if (isUpdating)
1936 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2));
1937 CurDAG->RemoveDeadNode(N);
1940 void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
1941 const uint16_t *DOpcodes,
1942 const uint16_t *QOpcodes0,
1943 const uint16_t *QOpcodes1) {
1944 assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
1945 SDLoc dl(N);
1947 SDValue MemAddr, Align;
1948 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
1949 // nodes are not intrinsics.
1950 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
1951 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
1952 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
1953 return;
1955 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1957 SDValue Chain = N->getOperand(0);
1958 EVT VT = N->getOperand(Vec0Idx).getValueType();
1959 bool is64BitVector = VT.is64BitVector();
1960 Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
1962 unsigned OpcodeIndex;
1963 switch (VT.getSimpleVT().SimpleTy) {
1964 default: llvm_unreachable("unhandled vst type");
1965 // Double-register operations:
1966 case MVT::v8i8: OpcodeIndex = 0; break;
1967 case MVT::v4f16:
1968 case MVT::v4i16: OpcodeIndex = 1; break;
1969 case MVT::v2f32:
1970 case MVT::v2i32: OpcodeIndex = 2; break;
1971 case MVT::v1i64: OpcodeIndex = 3; break;
1972 // Quad-register operations:
1973 case MVT::v16i8: OpcodeIndex = 0; break;
1974 case MVT::v8f16:
1975 case MVT::v8i16: OpcodeIndex = 1; break;
1976 case MVT::v4f32:
1977 case MVT::v4i32: OpcodeIndex = 2; break;
1978 case MVT::v2f64:
1979 case MVT::v2i64: OpcodeIndex = 3; break;
1982 std::vector<EVT> ResTys;
1983 if (isUpdating)
1984 ResTys.push_back(MVT::i32);
1985 ResTys.push_back(MVT::Other);
1987 SDValue Pred = getAL(CurDAG, dl);
1988 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
1989 SmallVector<SDValue, 7> Ops;
1991 // Double registers and VST1/VST2 quad registers are directly supported.
1992 if (is64BitVector || NumVecs <= 2) {
1993 SDValue SrcReg;
1994 if (NumVecs == 1) {
1995 SrcReg = N->getOperand(Vec0Idx);
1996 } else if (is64BitVector) {
1997 // Form a REG_SEQUENCE to force register allocation.
1998 SDValue V0 = N->getOperand(Vec0Idx + 0);
1999 SDValue V1 = N->getOperand(Vec0Idx + 1);
2000 if (NumVecs == 2)
2001 SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2002 else {
2003 SDValue V2 = N->getOperand(Vec0Idx + 2);
2004 // If it's a vst3, form a quad D-register and leave the last part as
2005 // an undef.
2006 SDValue V3 = (NumVecs == 3)
2007 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
2008 : N->getOperand(Vec0Idx + 3);
2009 SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2011 } else {
2012 // Form a QQ register.
2013 SDValue Q0 = N->getOperand(Vec0Idx);
2014 SDValue Q1 = N->getOperand(Vec0Idx + 1);
2015 SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0);
2018 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2019 QOpcodes0[OpcodeIndex]);
2020 Ops.push_back(MemAddr);
2021 Ops.push_back(Align);
2022 if (isUpdating) {
2023 SDValue Inc = N->getOperand(AddrOpIdx + 1);
2024 bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
2025 if (!IsImmUpdate) {
2026 // We use a VST1 for v1i64 even if the pseudo says VST2/3/4, so
2027 // check for the opcode rather than the number of vector elements.
2028 if (isVSTfixed(Opc))
2029 Opc = getVLDSTRegisterUpdateOpcode(Opc);
2030 Ops.push_back(Inc);
2032 // VST1/VST2 fixed increment does not need Reg0 so only include it in
2033 // the operands if not such an opcode.
2034 else if (!isVSTfixed(Opc))
2035 Ops.push_back(Reg0);
2037 Ops.push_back(SrcReg);
2038 Ops.push_back(Pred);
2039 Ops.push_back(Reg0);
2040 Ops.push_back(Chain);
2041 SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2043 // Transfer memoperands.
2044 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VSt), {MemOp});
2046 ReplaceNode(N, VSt);
2047 return;
2050 // Otherwise, quad registers are stored with two separate instructions,
2051 // where one stores the even registers and the other stores the odd registers.
2053 // Form the QQQQ REG_SEQUENCE.
2054 SDValue V0 = N->getOperand(Vec0Idx + 0);
2055 SDValue V1 = N->getOperand(Vec0Idx + 1);
2056 SDValue V2 = N->getOperand(Vec0Idx + 2);
2057 SDValue V3 = (NumVecs == 3)
2058 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2059 : N->getOperand(Vec0Idx + 3);
2060 SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2062 // Store the even D registers. This is always an updating store, so that it
2063 // provides the address to the second store for the odd subregs.
2064 const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain };
2065 SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
2066 MemAddr.getValueType(),
2067 MVT::Other, OpsA);
2068 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStA), {MemOp});
2069 Chain = SDValue(VStA, 1);
2071 // Store the odd D registers.
2072 Ops.push_back(SDValue(VStA, 0));
2073 Ops.push_back(Align);
2074 if (isUpdating) {
2075 SDValue Inc = N->getOperand(AddrOpIdx + 1);
2076 assert(isa<ConstantSDNode>(Inc.getNode()) &&
2077 "only constant post-increment update allowed for VST3/4");
2078 (void)Inc;
2079 Ops.push_back(Reg0);
2081 Ops.push_back(RegSeq);
2082 Ops.push_back(Pred);
2083 Ops.push_back(Reg0);
2084 Ops.push_back(Chain);
2085 SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
2086 Ops);
2087 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStB), {MemOp});
2088 ReplaceNode(N, VStB);
2091 void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
2092 unsigned NumVecs,
2093 const uint16_t *DOpcodes,
2094 const uint16_t *QOpcodes) {
2095 assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
2096 SDLoc dl(N);
2098 SDValue MemAddr, Align;
2099 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
2100 // nodes are not intrinsics.
2101 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2102 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2103 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2104 return;
2106 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2108 SDValue Chain = N->getOperand(0);
2109 unsigned Lane =
2110 cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue();
2111 EVT VT = N->getOperand(Vec0Idx).getValueType();
2112 bool is64BitVector = VT.is64BitVector();
2114 unsigned Alignment = 0;
2115 if (NumVecs != 3) {
2116 Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
2117 unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2118 if (Alignment > NumBytes)
2119 Alignment = NumBytes;
2120 if (Alignment < 8 && Alignment < NumBytes)
2121 Alignment = 0;
2122 // Alignment must be a power of two; make sure of that.
2123 Alignment = (Alignment & -Alignment);
2124 if (Alignment == 1)
2125 Alignment = 0;
2127 Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2129 unsigned OpcodeIndex;
2130 switch (VT.getSimpleVT().SimpleTy) {
2131 default: llvm_unreachable("unhandled vld/vst lane type");
2132 // Double-register operations:
2133 case MVT::v8i8: OpcodeIndex = 0; break;
2134 case MVT::v4f16:
2135 case MVT::v4i16: OpcodeIndex = 1; break;
2136 case MVT::v2f32:
2137 case MVT::v2i32: OpcodeIndex = 2; break;
2138 // Quad-register operations:
2139 case MVT::v8f16:
2140 case MVT::v8i16: OpcodeIndex = 0; break;
2141 case MVT::v4f32:
2142 case MVT::v4i32: OpcodeIndex = 1; break;
2145 std::vector<EVT> ResTys;
2146 if (IsLoad) {
2147 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2148 if (!is64BitVector)
2149 ResTyElts *= 2;
2150 ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(),
2151 MVT::i64, ResTyElts));
2153 if (isUpdating)
2154 ResTys.push_back(MVT::i32);
2155 ResTys.push_back(MVT::Other);
2157 SDValue Pred = getAL(CurDAG, dl);
2158 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2160 SmallVector<SDValue, 8> Ops;
2161 Ops.push_back(MemAddr);
2162 Ops.push_back(Align);
2163 if (isUpdating) {
2164 SDValue Inc = N->getOperand(AddrOpIdx + 1);
2165 bool IsImmUpdate =
2166 isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
2167 Ops.push_back(IsImmUpdate ? Reg0 : Inc);
2170 SDValue SuperReg;
2171 SDValue V0 = N->getOperand(Vec0Idx + 0);
2172 SDValue V1 = N->getOperand(Vec0Idx + 1);
2173 if (NumVecs == 2) {
2174 if (is64BitVector)
2175 SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2176 else
2177 SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0);
2178 } else {
2179 SDValue V2 = N->getOperand(Vec0Idx + 2);
2180 SDValue V3 = (NumVecs == 3)
2181 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2182 : N->getOperand(Vec0Idx + 3);
2183 if (is64BitVector)
2184 SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2185 else
2186 SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2188 Ops.push_back(SuperReg);
2189 Ops.push_back(getI32Imm(Lane, dl));
2190 Ops.push_back(Pred);
2191 Ops.push_back(Reg0);
2192 Ops.push_back(Chain);
2194 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2195 QOpcodes[OpcodeIndex]);
2196 SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2197 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdLn), {MemOp});
2198 if (!IsLoad) {
2199 ReplaceNode(N, VLdLn);
2200 return;
2203 // Extract the subregisters.
2204 SuperReg = SDValue(VLdLn, 0);
2205 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
2206 ARM::qsub_3 == ARM::qsub_0 + 3,
2207 "Unexpected subreg numbering");
2208 unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
2209 for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2210 ReplaceUses(SDValue(N, Vec),
2211 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
2212 ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1));
2213 if (isUpdating)
2214 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2));
2215 CurDAG->RemoveDeadNode(N);
2218 void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool IsIntrinsic,
2219 bool isUpdating, unsigned NumVecs,
2220 const uint16_t *DOpcodes,
2221 const uint16_t *QOpcodes0,
2222 const uint16_t *QOpcodes1) {
2223 assert(NumVecs >= 1 && NumVecs <= 4 && "VLDDup NumVecs out-of-range");
2224 SDLoc dl(N);
2226 SDValue MemAddr, Align;
2227 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2228 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2229 return;
2231 SDValue Chain = N->getOperand(0);
2232 EVT VT = N->getValueType(0);
2233 bool is64BitVector = VT.is64BitVector();
2235 unsigned Alignment = 0;
2236 if (NumVecs != 3) {
2237 Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
2238 unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2239 if (Alignment > NumBytes)
2240 Alignment = NumBytes;
2241 if (Alignment < 8 && Alignment < NumBytes)
2242 Alignment = 0;
2243 // Alignment must be a power of two; make sure of that.
2244 Alignment = (Alignment & -Alignment);
2245 if (Alignment == 1)
2246 Alignment = 0;
2248 Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2250 unsigned OpcodeIndex;
2251 switch (VT.getSimpleVT().SimpleTy) {
2252 default: llvm_unreachable("unhandled vld-dup type");
2253 case MVT::v8i8:
2254 case MVT::v16i8: OpcodeIndex = 0; break;
2255 case MVT::v4i16:
2256 case MVT::v8i16:
2257 case MVT::v4f16:
2258 case MVT::v8f16:
2259 OpcodeIndex = 1; break;
2260 case MVT::v2f32:
2261 case MVT::v2i32:
2262 case MVT::v4f32:
2263 case MVT::v4i32: OpcodeIndex = 2; break;
2264 case MVT::v1f64:
2265 case MVT::v1i64: OpcodeIndex = 3; break;
2268 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2269 if (!is64BitVector)
2270 ResTyElts *= 2;
2271 EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
2273 std::vector<EVT> ResTys;
2274 ResTys.push_back(ResTy);
2275 if (isUpdating)
2276 ResTys.push_back(MVT::i32);
2277 ResTys.push_back(MVT::Other);
2279 SDValue Pred = getAL(CurDAG, dl);
2280 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2282 SDNode *VLdDup;
2283 if (is64BitVector || NumVecs == 1) {
2284 SmallVector<SDValue, 6> Ops;
2285 Ops.push_back(MemAddr);
2286 Ops.push_back(Align);
2287 unsigned Opc = is64BitVector ? DOpcodes[OpcodeIndex] :
2288 QOpcodes0[OpcodeIndex];
2289 if (isUpdating) {
2290 // fixed-stride update instructions don't have an explicit writeback
2291 // operand. It's implicit in the opcode itself.
2292 SDValue Inc = N->getOperand(2);
2293 bool IsImmUpdate =
2294 isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
2295 if (NumVecs <= 2 && !IsImmUpdate)
2296 Opc = getVLDSTRegisterUpdateOpcode(Opc);
2297 if (!IsImmUpdate)
2298 Ops.push_back(Inc);
2299 // FIXME: VLD3 and VLD4 haven't been updated to that form yet.
2300 else if (NumVecs > 2)
2301 Ops.push_back(Reg0);
2303 Ops.push_back(Pred);
2304 Ops.push_back(Reg0);
2305 Ops.push_back(Chain);
2306 VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2307 } else if (NumVecs == 2) {
2308 const SDValue OpsA[] = { MemAddr, Align, Pred, Reg0, Chain };
2309 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex],
2310 dl, ResTys, OpsA);
2312 Chain = SDValue(VLdA, 1);
2313 const SDValue OpsB[] = { MemAddr, Align, Pred, Reg0, Chain };
2314 VLdDup = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, OpsB);
2315 } else {
2316 SDValue ImplDef =
2317 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
2318 const SDValue OpsA[] = { MemAddr, Align, ImplDef, Pred, Reg0, Chain };
2319 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex],
2320 dl, ResTys, OpsA);
2322 SDValue SuperReg = SDValue(VLdA, 0);
2323 Chain = SDValue(VLdA, 1);
2324 const SDValue OpsB[] = { MemAddr, Align, SuperReg, Pred, Reg0, Chain };
2325 VLdDup = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, OpsB);
2328 // Transfer memoperands.
2329 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2330 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdDup), {MemOp});
2332 // Extract the subregisters.
2333 if (NumVecs == 1) {
2334 ReplaceUses(SDValue(N, 0), SDValue(VLdDup, 0));
2335 } else {
2336 SDValue SuperReg = SDValue(VLdDup, 0);
2337 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7, "Unexpected subreg numbering");
2338 unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
2339 for (unsigned Vec = 0; Vec != NumVecs; ++Vec) {
2340 ReplaceUses(SDValue(N, Vec),
2341 CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg));
2344 ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1));
2345 if (isUpdating)
2346 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2));
2347 CurDAG->RemoveDeadNode(N);
2350 bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) {
2351 if (!Subtarget->hasV6T2Ops())
2352 return false;
2354 unsigned Opc = isSigned
2355 ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)
2356 : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX);
2357 SDLoc dl(N);
2359 // For unsigned extracts, check for a shift right and mask
2360 unsigned And_imm = 0;
2361 if (N->getOpcode() == ISD::AND) {
2362 if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) {
2364 // The immediate is a mask of the low bits iff imm & (imm+1) == 0
2365 if (And_imm & (And_imm + 1))
2366 return false;
2368 unsigned Srl_imm = 0;
2369 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL,
2370 Srl_imm)) {
2371 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2373 // Mask off the unnecessary bits of the AND immediate; normally
2374 // DAGCombine will do this, but that might not happen if
2375 // targetShrinkDemandedConstant chooses a different immediate.
2376 And_imm &= -1U >> Srl_imm;
2378 // Note: The width operand is encoded as width-1.
2379 unsigned Width = countTrailingOnes(And_imm) - 1;
2380 unsigned LSB = Srl_imm;
2382 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2384 if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) {
2385 // It's cheaper to use a right shift to extract the top bits.
2386 if (Subtarget->isThumb()) {
2387 Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri;
2388 SDValue Ops[] = { N->getOperand(0).getOperand(0),
2389 CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2390 getAL(CurDAG, dl), Reg0, Reg0 };
2391 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2392 return true;
2395 // ARM models shift instructions as MOVsi with shifter operand.
2396 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(ISD::SRL);
2397 SDValue ShOpc =
2398 CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB), dl,
2399 MVT::i32);
2400 SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc,
2401 getAL(CurDAG, dl), Reg0, Reg0 };
2402 CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops);
2403 return true;
2406 assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
2407 SDValue Ops[] = { N->getOperand(0).getOperand(0),
2408 CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2409 CurDAG->getTargetConstant(Width, dl, MVT::i32),
2410 getAL(CurDAG, dl), Reg0 };
2411 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2412 return true;
2415 return false;
2418 // Otherwise, we're looking for a shift of a shift
2419 unsigned Shl_imm = 0;
2420 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) {
2421 assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!");
2422 unsigned Srl_imm = 0;
2423 if (isInt32Immediate(N->getOperand(1), Srl_imm)) {
2424 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2425 // Note: The width operand is encoded as width-1.
2426 unsigned Width = 32 - Srl_imm - 1;
2427 int LSB = Srl_imm - Shl_imm;
2428 if (LSB < 0)
2429 return false;
2430 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2431 assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
2432 SDValue Ops[] = { N->getOperand(0).getOperand(0),
2433 CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2434 CurDAG->getTargetConstant(Width, dl, MVT::i32),
2435 getAL(CurDAG, dl), Reg0 };
2436 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2437 return true;
2441 // Or we are looking for a shift of an and, with a mask operand
2442 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_imm) &&
2443 isShiftedMask_32(And_imm)) {
2444 unsigned Srl_imm = 0;
2445 unsigned LSB = countTrailingZeros(And_imm);
2446 // Shift must be the same as the ands lsb
2447 if (isInt32Immediate(N->getOperand(1), Srl_imm) && Srl_imm == LSB) {
2448 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2449 unsigned MSB = 31 - countLeadingZeros(And_imm);
2450 // Note: The width operand is encoded as width-1.
2451 unsigned Width = MSB - LSB;
2452 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2453 assert(Srl_imm + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
2454 SDValue Ops[] = { N->getOperand(0).getOperand(0),
2455 CurDAG->getTargetConstant(Srl_imm, dl, MVT::i32),
2456 CurDAG->getTargetConstant(Width, dl, MVT::i32),
2457 getAL(CurDAG, dl), Reg0 };
2458 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2459 return true;
2463 if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) {
2464 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2465 unsigned LSB = 0;
2466 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, LSB) &&
2467 !isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRA, LSB))
2468 return false;
2470 if (LSB + Width > 32)
2471 return false;
2473 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2474 assert(LSB + Width <= 32 && "Shouldn't create an invalid ubfx");
2475 SDValue Ops[] = { N->getOperand(0).getOperand(0),
2476 CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2477 CurDAG->getTargetConstant(Width - 1, dl, MVT::i32),
2478 getAL(CurDAG, dl), Reg0 };
2479 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2480 return true;
2483 return false;
2486 /// Target-specific DAG combining for ISD::XOR.
2487 /// Target-independent combining lowers SELECT_CC nodes of the form
2488 /// select_cc setg[ge] X, 0, X, -X
2489 /// select_cc setgt X, -1, X, -X
2490 /// select_cc setl[te] X, 0, -X, X
2491 /// select_cc setlt X, 1, -X, X
2492 /// which represent Integer ABS into:
2493 /// Y = sra (X, size(X)-1); xor (add (X, Y), Y)
2494 /// ARM instruction selection detects the latter and matches it to
2495 /// ARM::ABS or ARM::t2ABS machine node.
2496 bool ARMDAGToDAGISel::tryABSOp(SDNode *N){
2497 SDValue XORSrc0 = N->getOperand(0);
2498 SDValue XORSrc1 = N->getOperand(1);
2499 EVT VT = N->getValueType(0);
2501 if (Subtarget->isThumb1Only())
2502 return false;
2504 if (XORSrc0.getOpcode() != ISD::ADD || XORSrc1.getOpcode() != ISD::SRA)
2505 return false;
2507 SDValue ADDSrc0 = XORSrc0.getOperand(0);
2508 SDValue ADDSrc1 = XORSrc0.getOperand(1);
2509 SDValue SRASrc0 = XORSrc1.getOperand(0);
2510 SDValue SRASrc1 = XORSrc1.getOperand(1);
2511 ConstantSDNode *SRAConstant = dyn_cast<ConstantSDNode>(SRASrc1);
2512 EVT XType = SRASrc0.getValueType();
2513 unsigned Size = XType.getSizeInBits() - 1;
2515 if (ADDSrc1 == XORSrc1 && ADDSrc0 == SRASrc0 &&
2516 XType.isInteger() && SRAConstant != nullptr &&
2517 Size == SRAConstant->getZExtValue()) {
2518 unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS;
2519 CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0);
2520 return true;
2523 return false;
2526 /// We've got special pseudo-instructions for these
2527 void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
2528 unsigned Opcode;
2529 EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
2530 if (MemTy == MVT::i8)
2531 Opcode = ARM::CMP_SWAP_8;
2532 else if (MemTy == MVT::i16)
2533 Opcode = ARM::CMP_SWAP_16;
2534 else if (MemTy == MVT::i32)
2535 Opcode = ARM::CMP_SWAP_32;
2536 else
2537 llvm_unreachable("Unknown AtomicCmpSwap type");
2539 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
2540 N->getOperand(0)};
2541 SDNode *CmpSwap = CurDAG->getMachineNode(
2542 Opcode, SDLoc(N),
2543 CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other), Ops);
2545 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
2546 CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
2548 ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
2549 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
2550 CurDAG->RemoveDeadNode(N);
2553 static Optional<std::pair<unsigned, unsigned>>
2554 getContiguousRangeOfSetBits(const APInt &A) {
2555 unsigned FirstOne = A.getBitWidth() - A.countLeadingZeros() - 1;
2556 unsigned LastOne = A.countTrailingZeros();
2557 if (A.countPopulation() != (FirstOne - LastOne + 1))
2558 return Optional<std::pair<unsigned,unsigned>>();
2559 return std::make_pair(FirstOne, LastOne);
2562 void ARMDAGToDAGISel::SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI) {
2563 assert(N->getOpcode() == ARMISD::CMPZ);
2564 SwitchEQNEToPLMI = false;
2566 if (!Subtarget->isThumb())
2567 // FIXME: Work out whether it is profitable to do this in A32 mode - LSL and
2568 // LSR don't exist as standalone instructions - they need the barrel shifter.
2569 return;
2571 // select (cmpz (and X, C), #0) -> (LSLS X) or (LSRS X) or (LSRS (LSLS X))
2572 SDValue And = N->getOperand(0);
2573 if (!And->hasOneUse())
2574 return;
2576 SDValue Zero = N->getOperand(1);
2577 if (!isa<ConstantSDNode>(Zero) || !cast<ConstantSDNode>(Zero)->isNullValue() ||
2578 And->getOpcode() != ISD::AND)
2579 return;
2580 SDValue X = And.getOperand(0);
2581 auto C = dyn_cast<ConstantSDNode>(And.getOperand(1));
2583 if (!C)
2584 return;
2585 auto Range = getContiguousRangeOfSetBits(C->getAPIntValue());
2586 if (!Range)
2587 return;
2589 // There are several ways to lower this:
2590 SDNode *NewN;
2591 SDLoc dl(N);
2593 auto EmitShift = [&](unsigned Opc, SDValue Src, unsigned Imm) -> SDNode* {
2594 if (Subtarget->isThumb2()) {
2595 Opc = (Opc == ARM::tLSLri) ? ARM::t2LSLri : ARM::t2LSRri;
2596 SDValue Ops[] = { Src, CurDAG->getTargetConstant(Imm, dl, MVT::i32),
2597 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
2598 CurDAG->getRegister(0, MVT::i32) };
2599 return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
2600 } else {
2601 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), Src,
2602 CurDAG->getTargetConstant(Imm, dl, MVT::i32),
2603 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
2604 return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
2608 if (Range->second == 0) {
2609 // 1. Mask includes the LSB -> Simply shift the top N bits off
2610 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
2611 ReplaceNode(And.getNode(), NewN);
2612 } else if (Range->first == 31) {
2613 // 2. Mask includes the MSB -> Simply shift the bottom N bits off
2614 NewN = EmitShift(ARM::tLSRri, X, Range->second);
2615 ReplaceNode(And.getNode(), NewN);
2616 } else if (Range->first == Range->second) {
2617 // 3. Only one bit is set. We can shift this into the sign bit and use a
2618 // PL/MI comparison.
2619 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
2620 ReplaceNode(And.getNode(), NewN);
2622 SwitchEQNEToPLMI = true;
2623 } else if (!Subtarget->hasV6T2Ops()) {
2624 // 4. Do a double shift to clear bottom and top bits, but only in
2625 // thumb-1 mode as in thumb-2 we can use UBFX.
2626 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
2627 NewN = EmitShift(ARM::tLSRri, SDValue(NewN, 0),
2628 Range->second + (31 - Range->first));
2629 ReplaceNode(And.getNode(), NewN);
2634 void ARMDAGToDAGISel::Select(SDNode *N) {
2635 SDLoc dl(N);
2637 if (N->isMachineOpcode()) {
2638 N->setNodeId(-1);
2639 return; // Already selected.
2642 switch (N->getOpcode()) {
2643 default: break;
2644 case ISD::STORE: {
2645 // For Thumb1, match an sp-relative store in C++. This is a little
2646 // unfortunate, but I don't think I can make the chain check work
2647 // otherwise. (The chain of the store has to be the same as the chain
2648 // of the CopyFromReg, or else we can't replace the CopyFromReg with
2649 // a direct reference to "SP".)
2651 // This is only necessary on Thumb1 because Thumb1 sp-relative stores use
2652 // a different addressing mode from other four-byte stores.
2654 // This pattern usually comes up with call arguments.
2655 StoreSDNode *ST = cast<StoreSDNode>(N);
2656 SDValue Ptr = ST->getBasePtr();
2657 if (Subtarget->isThumb1Only() && ST->isUnindexed()) {
2658 int RHSC = 0;
2659 if (Ptr.getOpcode() == ISD::ADD &&
2660 isScaledConstantInRange(Ptr.getOperand(1), /*Scale=*/4, 0, 256, RHSC))
2661 Ptr = Ptr.getOperand(0);
2663 if (Ptr.getOpcode() == ISD::CopyFromReg &&
2664 cast<RegisterSDNode>(Ptr.getOperand(1))->getReg() == ARM::SP &&
2665 Ptr.getOperand(0) == ST->getChain()) {
2666 SDValue Ops[] = {ST->getValue(),
2667 CurDAG->getRegister(ARM::SP, MVT::i32),
2668 CurDAG->getTargetConstant(RHSC, dl, MVT::i32),
2669 getAL(CurDAG, dl),
2670 CurDAG->getRegister(0, MVT::i32),
2671 ST->getChain()};
2672 MachineSDNode *ResNode =
2673 CurDAG->getMachineNode(ARM::tSTRspi, dl, MVT::Other, Ops);
2674 MachineMemOperand *MemOp = ST->getMemOperand();
2675 CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp});
2676 ReplaceNode(N, ResNode);
2677 return;
2680 break;
2682 case ISD::WRITE_REGISTER:
2683 if (tryWriteRegister(N))
2684 return;
2685 break;
2686 case ISD::READ_REGISTER:
2687 if (tryReadRegister(N))
2688 return;
2689 break;
2690 case ISD::INLINEASM:
2691 case ISD::INLINEASM_BR:
2692 if (tryInlineAsm(N))
2693 return;
2694 break;
2695 case ISD::XOR:
2696 // Select special operations if XOR node forms integer ABS pattern
2697 if (tryABSOp(N))
2698 return;
2699 // Other cases are autogenerated.
2700 break;
2701 case ISD::Constant: {
2702 unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
2703 // If we can't materialize the constant we need to use a literal pool
2704 if (ConstantMaterializationCost(Val) > 2) {
2705 SDValue CPIdx = CurDAG->getTargetConstantPool(
2706 ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val),
2707 TLI->getPointerTy(CurDAG->getDataLayout()));
2709 SDNode *ResNode;
2710 if (Subtarget->isThumb()) {
2711 SDValue Ops[] = {
2712 CPIdx,
2713 getAL(CurDAG, dl),
2714 CurDAG->getRegister(0, MVT::i32),
2715 CurDAG->getEntryNode()
2717 ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other,
2718 Ops);
2719 } else {
2720 SDValue Ops[] = {
2721 CPIdx,
2722 CurDAG->getTargetConstant(0, dl, MVT::i32),
2723 getAL(CurDAG, dl),
2724 CurDAG->getRegister(0, MVT::i32),
2725 CurDAG->getEntryNode()
2727 ResNode = CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other,
2728 Ops);
2730 // Annotate the Node with memory operand information so that MachineInstr
2731 // queries work properly. This e.g. gives the register allocation the
2732 // required information for rematerialization.
2733 MachineFunction& MF = CurDAG->getMachineFunction();
2734 MachineMemOperand *MemOp =
2735 MF.getMachineMemOperand(MachinePointerInfo::getConstantPool(MF),
2736 MachineMemOperand::MOLoad, 4, 4);
2738 CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp});
2740 ReplaceNode(N, ResNode);
2741 return;
2744 // Other cases are autogenerated.
2745 break;
2747 case ISD::FrameIndex: {
2748 // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
2749 int FI = cast<FrameIndexSDNode>(N)->getIndex();
2750 SDValue TFI = CurDAG->getTargetFrameIndex(
2751 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
2752 if (Subtarget->isThumb1Only()) {
2753 // Set the alignment of the frame object to 4, to avoid having to generate
2754 // more than one ADD
2755 MachineFrameInfo &MFI = MF->getFrameInfo();
2756 if (MFI.getObjectAlignment(FI) < 4)
2757 MFI.setObjectAlignment(FI, 4);
2758 CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI,
2759 CurDAG->getTargetConstant(0, dl, MVT::i32));
2760 return;
2761 } else {
2762 unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ?
2763 ARM::t2ADDri : ARM::ADDri);
2764 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, dl, MVT::i32),
2765 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
2766 CurDAG->getRegister(0, MVT::i32) };
2767 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2768 return;
2771 case ISD::SRL:
2772 if (tryV6T2BitfieldExtractOp(N, false))
2773 return;
2774 break;
2775 case ISD::SIGN_EXTEND_INREG:
2776 case ISD::SRA:
2777 if (tryV6T2BitfieldExtractOp(N, true))
2778 return;
2779 break;
2780 case ISD::MUL:
2781 if (Subtarget->isThumb1Only())
2782 break;
2783 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
2784 unsigned RHSV = C->getZExtValue();
2785 if (!RHSV) break;
2786 if (isPowerOf2_32(RHSV-1)) { // 2^n+1?
2787 unsigned ShImm = Log2_32(RHSV-1);
2788 if (ShImm >= 32)
2789 break;
2790 SDValue V = N->getOperand(0);
2791 ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
2792 SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
2793 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2794 if (Subtarget->isThumb()) {
2795 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
2796 CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops);
2797 return;
2798 } else {
2799 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
2800 Reg0 };
2801 CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops);
2802 return;
2805 if (isPowerOf2_32(RHSV+1)) { // 2^n-1?
2806 unsigned ShImm = Log2_32(RHSV+1);
2807 if (ShImm >= 32)
2808 break;
2809 SDValue V = N->getOperand(0);
2810 ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
2811 SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
2812 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2813 if (Subtarget->isThumb()) {
2814 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
2815 CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops);
2816 return;
2817 } else {
2818 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
2819 Reg0 };
2820 CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops);
2821 return;
2825 break;
2826 case ISD::AND: {
2827 // Check for unsigned bitfield extract
2828 if (tryV6T2BitfieldExtractOp(N, false))
2829 return;
2831 // If an immediate is used in an AND node, it is possible that the immediate
2832 // can be more optimally materialized when negated. If this is the case we
2833 // can negate the immediate and use a BIC instead.
2834 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
2835 if (N1C && N1C->hasOneUse() && Subtarget->isThumb()) {
2836 uint32_t Imm = (uint32_t) N1C->getZExtValue();
2838 // In Thumb2 mode, an AND can take a 12-bit immediate. If this
2839 // immediate can be negated and fit in the immediate operand of
2840 // a t2BIC, don't do any manual transform here as this can be
2841 // handled by the generic ISel machinery.
2842 bool PreferImmediateEncoding =
2843 Subtarget->hasThumb2() && (is_t2_so_imm(Imm) || is_t2_so_imm_not(Imm));
2844 if (!PreferImmediateEncoding &&
2845 ConstantMaterializationCost(Imm) >
2846 ConstantMaterializationCost(~Imm)) {
2847 // The current immediate costs more to materialize than a negated
2848 // immediate, so negate the immediate and use a BIC.
2849 SDValue NewImm =
2850 CurDAG->getConstant(~N1C->getZExtValue(), dl, MVT::i32);
2851 // If the new constant didn't exist before, reposition it in the topological
2852 // ordering so it is just before N. Otherwise, don't touch its location.
2853 if (NewImm->getNodeId() == -1)
2854 CurDAG->RepositionNode(N->getIterator(), NewImm.getNode());
2856 if (!Subtarget->hasThumb2()) {
2857 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32),
2858 N->getOperand(0), NewImm, getAL(CurDAG, dl),
2859 CurDAG->getRegister(0, MVT::i32)};
2860 ReplaceNode(N, CurDAG->getMachineNode(ARM::tBIC, dl, MVT::i32, Ops));
2861 return;
2862 } else {
2863 SDValue Ops[] = {N->getOperand(0), NewImm, getAL(CurDAG, dl),
2864 CurDAG->getRegister(0, MVT::i32),
2865 CurDAG->getRegister(0, MVT::i32)};
2866 ReplaceNode(N,
2867 CurDAG->getMachineNode(ARM::t2BICrr, dl, MVT::i32, Ops));
2868 return;
2873 // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits
2874 // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits
2875 // are entirely contributed by c2 and lower 16-bits are entirely contributed
2876 // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)).
2877 // Select it to: "movt x, ((c1 & 0xffff) >> 16)
2878 EVT VT = N->getValueType(0);
2879 if (VT != MVT::i32)
2880 break;
2881 unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2())
2882 ? ARM::t2MOVTi16
2883 : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0);
2884 if (!Opc)
2885 break;
2886 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
2887 N1C = dyn_cast<ConstantSDNode>(N1);
2888 if (!N1C)
2889 break;
2890 if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) {
2891 SDValue N2 = N0.getOperand(1);
2892 ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
2893 if (!N2C)
2894 break;
2895 unsigned N1CVal = N1C->getZExtValue();
2896 unsigned N2CVal = N2C->getZExtValue();
2897 if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) &&
2898 (N1CVal & 0xffffU) == 0xffffU &&
2899 (N2CVal & 0xffffU) == 0x0U) {
2900 SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16,
2901 dl, MVT::i32);
2902 SDValue Ops[] = { N0.getOperand(0), Imm16,
2903 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
2904 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
2905 return;
2909 break;
2911 case ARMISD::UMAAL: {
2912 unsigned Opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL;
2913 SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
2914 N->getOperand(2), N->getOperand(3),
2915 getAL(CurDAG, dl),
2916 CurDAG->getRegister(0, MVT::i32) };
2917 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::i32, Ops));
2918 return;
2920 case ARMISD::UMLAL:{
2921 if (Subtarget->isThumb()) {
2922 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2923 N->getOperand(3), getAL(CurDAG, dl),
2924 CurDAG->getRegister(0, MVT::i32)};
2925 ReplaceNode(
2926 N, CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops));
2927 return;
2928 }else{
2929 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2930 N->getOperand(3), getAL(CurDAG, dl),
2931 CurDAG->getRegister(0, MVT::i32),
2932 CurDAG->getRegister(0, MVT::i32) };
2933 ReplaceNode(N, CurDAG->getMachineNode(
2934 Subtarget->hasV6Ops() ? ARM::UMLAL : ARM::UMLALv5, dl,
2935 MVT::i32, MVT::i32, Ops));
2936 return;
2939 case ARMISD::SMLAL:{
2940 if (Subtarget->isThumb()) {
2941 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2942 N->getOperand(3), getAL(CurDAG, dl),
2943 CurDAG->getRegister(0, MVT::i32)};
2944 ReplaceNode(
2945 N, CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops));
2946 return;
2947 }else{
2948 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2949 N->getOperand(3), getAL(CurDAG, dl),
2950 CurDAG->getRegister(0, MVT::i32),
2951 CurDAG->getRegister(0, MVT::i32) };
2952 ReplaceNode(N, CurDAG->getMachineNode(
2953 Subtarget->hasV6Ops() ? ARM::SMLAL : ARM::SMLALv5, dl,
2954 MVT::i32, MVT::i32, Ops));
2955 return;
2958 case ARMISD::SUBE: {
2959 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
2960 break;
2961 // Look for a pattern to match SMMLS
2962 // (sube a, (smul_loHi a, b), (subc 0, (smul_LOhi(a, b))))
2963 if (N->getOperand(1).getOpcode() != ISD::SMUL_LOHI ||
2964 N->getOperand(2).getOpcode() != ARMISD::SUBC ||
2965 !SDValue(N, 1).use_empty())
2966 break;
2968 if (Subtarget->isThumb())
2969 assert(Subtarget->hasThumb2() &&
2970 "This pattern should not be generated for Thumb");
2972 SDValue SmulLoHi = N->getOperand(1);
2973 SDValue Subc = N->getOperand(2);
2974 auto *Zero = dyn_cast<ConstantSDNode>(Subc.getOperand(0));
2976 if (!Zero || Zero->getZExtValue() != 0 ||
2977 Subc.getOperand(1) != SmulLoHi.getValue(0) ||
2978 N->getOperand(1) != SmulLoHi.getValue(1) ||
2979 N->getOperand(2) != Subc.getValue(1))
2980 break;
2982 unsigned Opc = Subtarget->isThumb2() ? ARM::t2SMMLS : ARM::SMMLS;
2983 SDValue Ops[] = { SmulLoHi.getOperand(0), SmulLoHi.getOperand(1),
2984 N->getOperand(0), getAL(CurDAG, dl),
2985 CurDAG->getRegister(0, MVT::i32) };
2986 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops));
2987 return;
2989 case ISD::LOAD: {
2990 if (Subtarget->isThumb() && Subtarget->hasThumb2()) {
2991 if (tryT2IndexedLoad(N))
2992 return;
2993 } else if (Subtarget->isThumb()) {
2994 if (tryT1IndexedLoad(N))
2995 return;
2996 } else if (tryARMIndexedLoad(N))
2997 return;
2998 // Other cases are autogenerated.
2999 break;
3001 case ARMISD::WLS:
3002 case ARMISD::LE: {
3003 SDValue Ops[] = { N->getOperand(1),
3004 N->getOperand(2),
3005 N->getOperand(0) };
3006 unsigned Opc = N->getOpcode() == ARMISD::WLS ?
3007 ARM::t2WhileLoopStart : ARM::t2LoopEnd;
3008 SDNode *New = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
3009 ReplaceUses(N, New);
3010 CurDAG->RemoveDeadNode(N);
3011 return;
3013 case ARMISD::LOOP_DEC: {
3014 SDValue Ops[] = { N->getOperand(1),
3015 N->getOperand(2),
3016 N->getOperand(0) };
3017 SDNode *Dec =
3018 CurDAG->getMachineNode(ARM::t2LoopDec, dl,
3019 CurDAG->getVTList(MVT::i32, MVT::Other), Ops);
3020 ReplaceUses(N, Dec);
3021 CurDAG->RemoveDeadNode(N);
3022 return;
3024 case ARMISD::BRCOND: {
3025 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
3026 // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)
3027 // Pattern complexity = 6 cost = 1 size = 0
3029 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
3030 // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc)
3031 // Pattern complexity = 6 cost = 1 size = 0
3033 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
3034 // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc)
3035 // Pattern complexity = 6 cost = 1 size = 0
3037 unsigned Opc = Subtarget->isThumb() ?
3038 ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc;
3039 SDValue Chain = N->getOperand(0);
3040 SDValue N1 = N->getOperand(1);
3041 SDValue N2 = N->getOperand(2);
3042 SDValue N3 = N->getOperand(3);
3043 SDValue InFlag = N->getOperand(4);
3044 assert(N1.getOpcode() == ISD::BasicBlock);
3045 assert(N2.getOpcode() == ISD::Constant);
3046 assert(N3.getOpcode() == ISD::Register);
3048 unsigned CC = (unsigned) cast<ConstantSDNode>(N2)->getZExtValue();
3050 if (InFlag.getOpcode() == ARMISD::CMPZ) {
3051 if (InFlag.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN) {
3052 SDValue Int = InFlag.getOperand(0);
3053 uint64_t ID = cast<ConstantSDNode>(Int->getOperand(1))->getZExtValue();
3055 // Handle low-overhead loops.
3056 if (ID == Intrinsic::loop_decrement_reg) {
3057 SDValue Elements = Int.getOperand(2);
3058 SDValue Size = CurDAG->getTargetConstant(
3059 cast<ConstantSDNode>(Int.getOperand(3))->getZExtValue(), dl,
3060 MVT::i32);
3062 SDValue Args[] = { Elements, Size, Int.getOperand(0) };
3063 SDNode *LoopDec =
3064 CurDAG->getMachineNode(ARM::t2LoopDec, dl,
3065 CurDAG->getVTList(MVT::i32, MVT::Other),
3066 Args);
3067 ReplaceUses(Int.getNode(), LoopDec);
3069 SDValue EndArgs[] = { SDValue(LoopDec, 0), N1, Chain };
3070 SDNode *LoopEnd =
3071 CurDAG->getMachineNode(ARM::t2LoopEnd, dl, MVT::Other, EndArgs);
3073 ReplaceUses(N, LoopEnd);
3074 CurDAG->RemoveDeadNode(N);
3075 CurDAG->RemoveDeadNode(InFlag.getNode());
3076 CurDAG->RemoveDeadNode(Int.getNode());
3077 return;
3081 bool SwitchEQNEToPLMI;
3082 SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI);
3083 InFlag = N->getOperand(4);
3085 if (SwitchEQNEToPLMI) {
3086 switch ((ARMCC::CondCodes)CC) {
3087 default: llvm_unreachable("CMPZ must be either NE or EQ!");
3088 case ARMCC::NE:
3089 CC = (unsigned)ARMCC::MI;
3090 break;
3091 case ARMCC::EQ:
3092 CC = (unsigned)ARMCC::PL;
3093 break;
3098 SDValue Tmp2 = CurDAG->getTargetConstant(CC, dl, MVT::i32);
3099 SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag };
3100 SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
3101 MVT::Glue, Ops);
3102 Chain = SDValue(ResNode, 0);
3103 if (N->getNumValues() == 2) {
3104 InFlag = SDValue(ResNode, 1);
3105 ReplaceUses(SDValue(N, 1), InFlag);
3107 ReplaceUses(SDValue(N, 0),
3108 SDValue(Chain.getNode(), Chain.getResNo()));
3109 CurDAG->RemoveDeadNode(N);
3110 return;
3113 case ARMISD::CMPZ: {
3114 // select (CMPZ X, #-C) -> (CMPZ (ADDS X, #C), #0)
3115 // This allows us to avoid materializing the expensive negative constant.
3116 // The CMPZ #0 is useless and will be peepholed away but we need to keep it
3117 // for its glue output.
3118 SDValue X = N->getOperand(0);
3119 auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1).getNode());
3120 if (C && C->getSExtValue() < 0 && Subtarget->isThumb()) {
3121 int64_t Addend = -C->getSExtValue();
3123 SDNode *Add = nullptr;
3124 // ADDS can be better than CMN if the immediate fits in a
3125 // 16-bit ADDS, which means either [0,256) for tADDi8 or [0,8) for tADDi3.
3126 // Outside that range we can just use a CMN which is 32-bit but has a
3127 // 12-bit immediate range.
3128 if (Addend < 1<<8) {
3129 if (Subtarget->isThumb2()) {
3130 SDValue Ops[] = { X, CurDAG->getTargetConstant(Addend, dl, MVT::i32),
3131 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
3132 CurDAG->getRegister(0, MVT::i32) };
3133 Add = CurDAG->getMachineNode(ARM::t2ADDri, dl, MVT::i32, Ops);
3134 } else {
3135 unsigned Opc = (Addend < 1<<3) ? ARM::tADDi3 : ARM::tADDi8;
3136 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X,
3137 CurDAG->getTargetConstant(Addend, dl, MVT::i32),
3138 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
3139 Add = CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
3142 if (Add) {
3143 SDValue Ops2[] = {SDValue(Add, 0), CurDAG->getConstant(0, dl, MVT::i32)};
3144 CurDAG->MorphNodeTo(N, ARMISD::CMPZ, CurDAG->getVTList(MVT::Glue), Ops2);
3147 // Other cases are autogenerated.
3148 break;
3151 case ARMISD::CMOV: {
3152 SDValue InFlag = N->getOperand(4);
3154 if (InFlag.getOpcode() == ARMISD::CMPZ) {
3155 bool SwitchEQNEToPLMI;
3156 SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI);
3158 if (SwitchEQNEToPLMI) {
3159 SDValue ARMcc = N->getOperand(2);
3160 ARMCC::CondCodes CC =
3161 (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue();
3163 switch (CC) {
3164 default: llvm_unreachable("CMPZ must be either NE or EQ!");
3165 case ARMCC::NE:
3166 CC = ARMCC::MI;
3167 break;
3168 case ARMCC::EQ:
3169 CC = ARMCC::PL;
3170 break;
3172 SDValue NewARMcc = CurDAG->getConstant((unsigned)CC, dl, MVT::i32);
3173 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), NewARMcc,
3174 N->getOperand(3), N->getOperand(4)};
3175 CurDAG->MorphNodeTo(N, ARMISD::CMOV, N->getVTList(), Ops);
3179 // Other cases are autogenerated.
3180 break;
3183 case ARMISD::VZIP: {
3184 unsigned Opc = 0;
3185 EVT VT = N->getValueType(0);
3186 switch (VT.getSimpleVT().SimpleTy) {
3187 default: return;
3188 case MVT::v8i8: Opc = ARM::VZIPd8; break;
3189 case MVT::v4f16:
3190 case MVT::v4i16: Opc = ARM::VZIPd16; break;
3191 case MVT::v2f32:
3192 // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
3193 case MVT::v2i32: Opc = ARM::VTRNd32; break;
3194 case MVT::v16i8: Opc = ARM::VZIPq8; break;
3195 case MVT::v8f16:
3196 case MVT::v8i16: Opc = ARM::VZIPq16; break;
3197 case MVT::v4f32:
3198 case MVT::v4i32: Opc = ARM::VZIPq32; break;
3200 SDValue Pred = getAL(CurDAG, dl);
3201 SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3202 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3203 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3204 return;
3206 case ARMISD::VUZP: {
3207 unsigned Opc = 0;
3208 EVT VT = N->getValueType(0);
3209 switch (VT.getSimpleVT().SimpleTy) {
3210 default: return;
3211 case MVT::v8i8: Opc = ARM::VUZPd8; break;
3212 case MVT::v4f16:
3213 case MVT::v4i16: Opc = ARM::VUZPd16; break;
3214 case MVT::v2f32:
3215 // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
3216 case MVT::v2i32: Opc = ARM::VTRNd32; break;
3217 case MVT::v16i8: Opc = ARM::VUZPq8; break;
3218 case MVT::v8f16:
3219 case MVT::v8i16: Opc = ARM::VUZPq16; break;
3220 case MVT::v4f32:
3221 case MVT::v4i32: Opc = ARM::VUZPq32; break;
3223 SDValue Pred = getAL(CurDAG, dl);
3224 SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3225 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3226 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3227 return;
3229 case ARMISD::VTRN: {
3230 unsigned Opc = 0;
3231 EVT VT = N->getValueType(0);
3232 switch (VT.getSimpleVT().SimpleTy) {
3233 default: return;
3234 case MVT::v8i8: Opc = ARM::VTRNd8; break;
3235 case MVT::v4f16:
3236 case MVT::v4i16: Opc = ARM::VTRNd16; break;
3237 case MVT::v2f32:
3238 case MVT::v2i32: Opc = ARM::VTRNd32; break;
3239 case MVT::v16i8: Opc = ARM::VTRNq8; break;
3240 case MVT::v8f16:
3241 case MVT::v8i16: Opc = ARM::VTRNq16; break;
3242 case MVT::v4f32:
3243 case MVT::v4i32: Opc = ARM::VTRNq32; break;
3245 SDValue Pred = getAL(CurDAG, dl);
3246 SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3247 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3248 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3249 return;
3251 case ARMISD::BUILD_VECTOR: {
3252 EVT VecVT = N->getValueType(0);
3253 EVT EltVT = VecVT.getVectorElementType();
3254 unsigned NumElts = VecVT.getVectorNumElements();
3255 if (EltVT == MVT::f64) {
3256 assert(NumElts == 2 && "unexpected type for BUILD_VECTOR");
3257 ReplaceNode(
3258 N, createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
3259 return;
3261 assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR");
3262 if (NumElts == 2) {
3263 ReplaceNode(
3264 N, createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
3265 return;
3267 assert(NumElts == 4 && "unexpected type for BUILD_VECTOR");
3268 ReplaceNode(N,
3269 createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1),
3270 N->getOperand(2), N->getOperand(3)));
3271 return;
3274 case ARMISD::VLD1DUP: {
3275 static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8, ARM::VLD1DUPd16,
3276 ARM::VLD1DUPd32 };
3277 static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8, ARM::VLD1DUPq16,
3278 ARM::VLD1DUPq32 };
3279 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 1, DOpcodes, QOpcodes);
3280 return;
3283 case ARMISD::VLD2DUP: {
3284 static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
3285 ARM::VLD2DUPd32 };
3286 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 2, Opcodes);
3287 return;
3290 case ARMISD::VLD3DUP: {
3291 static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo,
3292 ARM::VLD3DUPd16Pseudo,
3293 ARM::VLD3DUPd32Pseudo };
3294 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 3, Opcodes);
3295 return;
3298 case ARMISD::VLD4DUP: {
3299 static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo,
3300 ARM::VLD4DUPd16Pseudo,
3301 ARM::VLD4DUPd32Pseudo };
3302 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 4, Opcodes);
3303 return;
3306 case ARMISD::VLD1DUP_UPD: {
3307 static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8wb_fixed,
3308 ARM::VLD1DUPd16wb_fixed,
3309 ARM::VLD1DUPd32wb_fixed };
3310 static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8wb_fixed,
3311 ARM::VLD1DUPq16wb_fixed,
3312 ARM::VLD1DUPq32wb_fixed };
3313 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 1, DOpcodes, QOpcodes);
3314 return;
3317 case ARMISD::VLD2DUP_UPD: {
3318 static const uint16_t Opcodes[] = { ARM::VLD2DUPd8wb_fixed,
3319 ARM::VLD2DUPd16wb_fixed,
3320 ARM::VLD2DUPd32wb_fixed };
3321 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 2, Opcodes);
3322 return;
3325 case ARMISD::VLD3DUP_UPD: {
3326 static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo_UPD,
3327 ARM::VLD3DUPd16Pseudo_UPD,
3328 ARM::VLD3DUPd32Pseudo_UPD };
3329 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 3, Opcodes);
3330 return;
3333 case ARMISD::VLD4DUP_UPD: {
3334 static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo_UPD,
3335 ARM::VLD4DUPd16Pseudo_UPD,
3336 ARM::VLD4DUPd32Pseudo_UPD };
3337 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 4, Opcodes);
3338 return;
3341 case ARMISD::VLD1_UPD: {
3342 static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed,
3343 ARM::VLD1d16wb_fixed,
3344 ARM::VLD1d32wb_fixed,
3345 ARM::VLD1d64wb_fixed };
3346 static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed,
3347 ARM::VLD1q16wb_fixed,
3348 ARM::VLD1q32wb_fixed,
3349 ARM::VLD1q64wb_fixed };
3350 SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr);
3351 return;
3354 case ARMISD::VLD2_UPD: {
3355 static const uint16_t DOpcodes[] = { ARM::VLD2d8wb_fixed,
3356 ARM::VLD2d16wb_fixed,
3357 ARM::VLD2d32wb_fixed,
3358 ARM::VLD1q64wb_fixed};
3359 static const uint16_t QOpcodes[] = { ARM::VLD2q8PseudoWB_fixed,
3360 ARM::VLD2q16PseudoWB_fixed,
3361 ARM::VLD2q32PseudoWB_fixed };
3362 SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr);
3363 return;
3366 case ARMISD::VLD3_UPD: {
3367 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD,
3368 ARM::VLD3d16Pseudo_UPD,
3369 ARM::VLD3d32Pseudo_UPD,
3370 ARM::VLD1d64TPseudoWB_fixed};
3371 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
3372 ARM::VLD3q16Pseudo_UPD,
3373 ARM::VLD3q32Pseudo_UPD };
3374 static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
3375 ARM::VLD3q16oddPseudo_UPD,
3376 ARM::VLD3q32oddPseudo_UPD };
3377 SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
3378 return;
3381 case ARMISD::VLD4_UPD: {
3382 static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo_UPD,
3383 ARM::VLD4d16Pseudo_UPD,
3384 ARM::VLD4d32Pseudo_UPD,
3385 ARM::VLD1d64QPseudoWB_fixed};
3386 static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
3387 ARM::VLD4q16Pseudo_UPD,
3388 ARM::VLD4q32Pseudo_UPD };
3389 static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo_UPD,
3390 ARM::VLD4q16oddPseudo_UPD,
3391 ARM::VLD4q32oddPseudo_UPD };
3392 SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
3393 return;
3396 case ARMISD::VLD2LN_UPD: {
3397 static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD,
3398 ARM::VLD2LNd16Pseudo_UPD,
3399 ARM::VLD2LNd32Pseudo_UPD };
3400 static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD,
3401 ARM::VLD2LNq32Pseudo_UPD };
3402 SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes);
3403 return;
3406 case ARMISD::VLD3LN_UPD: {
3407 static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD,
3408 ARM::VLD3LNd16Pseudo_UPD,
3409 ARM::VLD3LNd32Pseudo_UPD };
3410 static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD,
3411 ARM::VLD3LNq32Pseudo_UPD };
3412 SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes);
3413 return;
3416 case ARMISD::VLD4LN_UPD: {
3417 static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD,
3418 ARM::VLD4LNd16Pseudo_UPD,
3419 ARM::VLD4LNd32Pseudo_UPD };
3420 static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD,
3421 ARM::VLD4LNq32Pseudo_UPD };
3422 SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes);
3423 return;
3426 case ARMISD::VST1_UPD: {
3427 static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed,
3428 ARM::VST1d16wb_fixed,
3429 ARM::VST1d32wb_fixed,
3430 ARM::VST1d64wb_fixed };
3431 static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed,
3432 ARM::VST1q16wb_fixed,
3433 ARM::VST1q32wb_fixed,
3434 ARM::VST1q64wb_fixed };
3435 SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr);
3436 return;
3439 case ARMISD::VST2_UPD: {
3440 static const uint16_t DOpcodes[] = { ARM::VST2d8wb_fixed,
3441 ARM::VST2d16wb_fixed,
3442 ARM::VST2d32wb_fixed,
3443 ARM::VST1q64wb_fixed};
3444 static const uint16_t QOpcodes[] = { ARM::VST2q8PseudoWB_fixed,
3445 ARM::VST2q16PseudoWB_fixed,
3446 ARM::VST2q32PseudoWB_fixed };
3447 SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr);
3448 return;
3451 case ARMISD::VST3_UPD: {
3452 static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD,
3453 ARM::VST3d16Pseudo_UPD,
3454 ARM::VST3d32Pseudo_UPD,
3455 ARM::VST1d64TPseudoWB_fixed};
3456 static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
3457 ARM::VST3q16Pseudo_UPD,
3458 ARM::VST3q32Pseudo_UPD };
3459 static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
3460 ARM::VST3q16oddPseudo_UPD,
3461 ARM::VST3q32oddPseudo_UPD };
3462 SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
3463 return;
3466 case ARMISD::VST4_UPD: {
3467 static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo_UPD,
3468 ARM::VST4d16Pseudo_UPD,
3469 ARM::VST4d32Pseudo_UPD,
3470 ARM::VST1d64QPseudoWB_fixed};
3471 static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
3472 ARM::VST4q16Pseudo_UPD,
3473 ARM::VST4q32Pseudo_UPD };
3474 static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD,
3475 ARM::VST4q16oddPseudo_UPD,
3476 ARM::VST4q32oddPseudo_UPD };
3477 SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
3478 return;
3481 case ARMISD::VST2LN_UPD: {
3482 static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD,
3483 ARM::VST2LNd16Pseudo_UPD,
3484 ARM::VST2LNd32Pseudo_UPD };
3485 static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD,
3486 ARM::VST2LNq32Pseudo_UPD };
3487 SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes);
3488 return;
3491 case ARMISD::VST3LN_UPD: {
3492 static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD,
3493 ARM::VST3LNd16Pseudo_UPD,
3494 ARM::VST3LNd32Pseudo_UPD };
3495 static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD,
3496 ARM::VST3LNq32Pseudo_UPD };
3497 SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes);
3498 return;
3501 case ARMISD::VST4LN_UPD: {
3502 static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD,
3503 ARM::VST4LNd16Pseudo_UPD,
3504 ARM::VST4LNd32Pseudo_UPD };
3505 static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD,
3506 ARM::VST4LNq32Pseudo_UPD };
3507 SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes);
3508 return;
3511 case ISD::INTRINSIC_VOID:
3512 case ISD::INTRINSIC_W_CHAIN: {
3513 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
3514 switch (IntNo) {
3515 default:
3516 break;
3518 case Intrinsic::arm_mrrc:
3519 case Intrinsic::arm_mrrc2: {
3520 SDLoc dl(N);
3521 SDValue Chain = N->getOperand(0);
3522 unsigned Opc;
3524 if (Subtarget->isThumb())
3525 Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::t2MRRC : ARM::t2MRRC2);
3526 else
3527 Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::MRRC : ARM::MRRC2);
3529 SmallVector<SDValue, 5> Ops;
3530 Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(), dl)); /* coproc */
3531 Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(), dl)); /* opc */
3532 Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(4))->getZExtValue(), dl)); /* CRm */
3534 // The mrrc2 instruction in ARM doesn't allow predicates, the top 4 bits of the encoded
3535 // instruction will always be '1111' but it is possible in assembly language to specify
3536 // AL as a predicate to mrrc2 but it doesn't make any difference to the encoded instruction.
3537 if (Opc != ARM::MRRC2) {
3538 Ops.push_back(getAL(CurDAG, dl));
3539 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3542 Ops.push_back(Chain);
3544 // Writes to two registers.
3545 const EVT RetType[] = {MVT::i32, MVT::i32, MVT::Other};
3547 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, RetType, Ops));
3548 return;
3550 case Intrinsic::arm_ldaexd:
3551 case Intrinsic::arm_ldrexd: {
3552 SDLoc dl(N);
3553 SDValue Chain = N->getOperand(0);
3554 SDValue MemAddr = N->getOperand(2);
3555 bool isThumb = Subtarget->isThumb() && Subtarget->hasV8MBaselineOps();
3557 bool IsAcquire = IntNo == Intrinsic::arm_ldaexd;
3558 unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD)
3559 : (IsAcquire ? ARM::LDAEXD : ARM::LDREXD);
3561 // arm_ldrexd returns a i64 value in {i32, i32}
3562 std::vector<EVT> ResTys;
3563 if (isThumb) {
3564 ResTys.push_back(MVT::i32);
3565 ResTys.push_back(MVT::i32);
3566 } else
3567 ResTys.push_back(MVT::Untyped);
3568 ResTys.push_back(MVT::Other);
3570 // Place arguments in the right order.
3571 SDValue Ops[] = {MemAddr, getAL(CurDAG, dl),
3572 CurDAG->getRegister(0, MVT::i32), Chain};
3573 SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
3574 // Transfer memoperands.
3575 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3576 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
3578 // Remap uses.
3579 SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1);
3580 if (!SDValue(N, 0).use_empty()) {
3581 SDValue Result;
3582 if (isThumb)
3583 Result = SDValue(Ld, 0);
3584 else {
3585 SDValue SubRegIdx =
3586 CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
3587 SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
3588 dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
3589 Result = SDValue(ResNode,0);
3591 ReplaceUses(SDValue(N, 0), Result);
3593 if (!SDValue(N, 1).use_empty()) {
3594 SDValue Result;
3595 if (isThumb)
3596 Result = SDValue(Ld, 1);
3597 else {
3598 SDValue SubRegIdx =
3599 CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
3600 SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
3601 dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
3602 Result = SDValue(ResNode,0);
3604 ReplaceUses(SDValue(N, 1), Result);
3606 ReplaceUses(SDValue(N, 2), OutChain);
3607 CurDAG->RemoveDeadNode(N);
3608 return;
3610 case Intrinsic::arm_stlexd:
3611 case Intrinsic::arm_strexd: {
3612 SDLoc dl(N);
3613 SDValue Chain = N->getOperand(0);
3614 SDValue Val0 = N->getOperand(2);
3615 SDValue Val1 = N->getOperand(3);
3616 SDValue MemAddr = N->getOperand(4);
3618 // Store exclusive double return a i32 value which is the return status
3619 // of the issued store.
3620 const EVT ResTys[] = {MVT::i32, MVT::Other};
3622 bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
3623 // Place arguments in the right order.
3624 SmallVector<SDValue, 7> Ops;
3625 if (isThumb) {
3626 Ops.push_back(Val0);
3627 Ops.push_back(Val1);
3628 } else
3629 // arm_strexd uses GPRPair.
3630 Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0));
3631 Ops.push_back(MemAddr);
3632 Ops.push_back(getAL(CurDAG, dl));
3633 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3634 Ops.push_back(Chain);
3636 bool IsRelease = IntNo == Intrinsic::arm_stlexd;
3637 unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD)
3638 : (IsRelease ? ARM::STLEXD : ARM::STREXD);
3640 SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
3641 // Transfer memoperands.
3642 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3643 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
3645 ReplaceNode(N, St);
3646 return;
3649 case Intrinsic::arm_neon_vld1: {
3650 static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
3651 ARM::VLD1d32, ARM::VLD1d64 };
3652 static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
3653 ARM::VLD1q32, ARM::VLD1q64};
3654 SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr);
3655 return;
3658 case Intrinsic::arm_neon_vld1x2: {
3659 static const uint16_t DOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
3660 ARM::VLD1q32, ARM::VLD1q64 };
3661 static const uint16_t QOpcodes[] = { ARM::VLD1d8QPseudo,
3662 ARM::VLD1d16QPseudo,
3663 ARM::VLD1d32QPseudo,
3664 ARM::VLD1d64QPseudo };
3665 SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
3666 return;
3669 case Intrinsic::arm_neon_vld1x3: {
3670 static const uint16_t DOpcodes[] = { ARM::VLD1d8TPseudo,
3671 ARM::VLD1d16TPseudo,
3672 ARM::VLD1d32TPseudo,
3673 ARM::VLD1d64TPseudo };
3674 static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowTPseudo_UPD,
3675 ARM::VLD1q16LowTPseudo_UPD,
3676 ARM::VLD1q32LowTPseudo_UPD,
3677 ARM::VLD1q64LowTPseudo_UPD };
3678 static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighTPseudo,
3679 ARM::VLD1q16HighTPseudo,
3680 ARM::VLD1q32HighTPseudo,
3681 ARM::VLD1q64HighTPseudo };
3682 SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3683 return;
3686 case Intrinsic::arm_neon_vld1x4: {
3687 static const uint16_t DOpcodes[] = { ARM::VLD1d8QPseudo,
3688 ARM::VLD1d16QPseudo,
3689 ARM::VLD1d32QPseudo,
3690 ARM::VLD1d64QPseudo };
3691 static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowQPseudo_UPD,
3692 ARM::VLD1q16LowQPseudo_UPD,
3693 ARM::VLD1q32LowQPseudo_UPD,
3694 ARM::VLD1q64LowQPseudo_UPD };
3695 static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighQPseudo,
3696 ARM::VLD1q16HighQPseudo,
3697 ARM::VLD1q32HighQPseudo,
3698 ARM::VLD1q64HighQPseudo };
3699 SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3700 return;
3703 case Intrinsic::arm_neon_vld2: {
3704 static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
3705 ARM::VLD2d32, ARM::VLD1q64 };
3706 static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
3707 ARM::VLD2q32Pseudo };
3708 SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
3709 return;
3712 case Intrinsic::arm_neon_vld3: {
3713 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo,
3714 ARM::VLD3d16Pseudo,
3715 ARM::VLD3d32Pseudo,
3716 ARM::VLD1d64TPseudo };
3717 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
3718 ARM::VLD3q16Pseudo_UPD,
3719 ARM::VLD3q32Pseudo_UPD };
3720 static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo,
3721 ARM::VLD3q16oddPseudo,
3722 ARM::VLD3q32oddPseudo };
3723 SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3724 return;
3727 case Intrinsic::arm_neon_vld4: {
3728 static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo,
3729 ARM::VLD4d16Pseudo,
3730 ARM::VLD4d32Pseudo,
3731 ARM::VLD1d64QPseudo };
3732 static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
3733 ARM::VLD4q16Pseudo_UPD,
3734 ARM::VLD4q32Pseudo_UPD };
3735 static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo,
3736 ARM::VLD4q16oddPseudo,
3737 ARM::VLD4q32oddPseudo };
3738 SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3739 return;
3742 case Intrinsic::arm_neon_vld2dup: {
3743 static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
3744 ARM::VLD2DUPd32, ARM::VLD1q64 };
3745 static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo,
3746 ARM::VLD2DUPq16EvenPseudo,
3747 ARM::VLD2DUPq32EvenPseudo };
3748 static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudo,
3749 ARM::VLD2DUPq16OddPseudo,
3750 ARM::VLD2DUPq32OddPseudo };
3751 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 2,
3752 DOpcodes, QOpcodes0, QOpcodes1);
3753 return;
3756 case Intrinsic::arm_neon_vld3dup: {
3757 static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo,
3758 ARM::VLD3DUPd16Pseudo,
3759 ARM::VLD3DUPd32Pseudo,
3760 ARM::VLD1d64TPseudo };
3761 static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo,
3762 ARM::VLD3DUPq16EvenPseudo,
3763 ARM::VLD3DUPq32EvenPseudo };
3764 static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo,
3765 ARM::VLD3DUPq16OddPseudo,
3766 ARM::VLD3DUPq32OddPseudo };
3767 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 3,
3768 DOpcodes, QOpcodes0, QOpcodes1);
3769 return;
3772 case Intrinsic::arm_neon_vld4dup: {
3773 static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo,
3774 ARM::VLD4DUPd16Pseudo,
3775 ARM::VLD4DUPd32Pseudo,
3776 ARM::VLD1d64QPseudo };
3777 static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo,
3778 ARM::VLD4DUPq16EvenPseudo,
3779 ARM::VLD4DUPq32EvenPseudo };
3780 static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo,
3781 ARM::VLD4DUPq16OddPseudo,
3782 ARM::VLD4DUPq32OddPseudo };
3783 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 4,
3784 DOpcodes, QOpcodes0, QOpcodes1);
3785 return;
3788 case Intrinsic::arm_neon_vld2lane: {
3789 static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo,
3790 ARM::VLD2LNd16Pseudo,
3791 ARM::VLD2LNd32Pseudo };
3792 static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo,
3793 ARM::VLD2LNq32Pseudo };
3794 SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes);
3795 return;
3798 case Intrinsic::arm_neon_vld3lane: {
3799 static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo,
3800 ARM::VLD3LNd16Pseudo,
3801 ARM::VLD3LNd32Pseudo };
3802 static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo,
3803 ARM::VLD3LNq32Pseudo };
3804 SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes);
3805 return;
3808 case Intrinsic::arm_neon_vld4lane: {
3809 static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo,
3810 ARM::VLD4LNd16Pseudo,
3811 ARM::VLD4LNd32Pseudo };
3812 static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo,
3813 ARM::VLD4LNq32Pseudo };
3814 SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes);
3815 return;
3818 case Intrinsic::arm_neon_vst1: {
3819 static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
3820 ARM::VST1d32, ARM::VST1d64 };
3821 static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
3822 ARM::VST1q32, ARM::VST1q64 };
3823 SelectVST(N, false, 1, DOpcodes, QOpcodes, nullptr);
3824 return;
3827 case Intrinsic::arm_neon_vst1x2: {
3828 static const uint16_t DOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
3829 ARM::VST1q32, ARM::VST1q64 };
3830 static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudo,
3831 ARM::VST1d16QPseudo,
3832 ARM::VST1d32QPseudo,
3833 ARM::VST1d64QPseudo };
3834 SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
3835 return;
3838 case Intrinsic::arm_neon_vst1x3: {
3839 static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudo,
3840 ARM::VST1d16TPseudo,
3841 ARM::VST1d32TPseudo,
3842 ARM::VST1d64TPseudo };
3843 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD,
3844 ARM::VST1q16LowTPseudo_UPD,
3845 ARM::VST1q32LowTPseudo_UPD,
3846 ARM::VST1q64LowTPseudo_UPD };
3847 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo,
3848 ARM::VST1q16HighTPseudo,
3849 ARM::VST1q32HighTPseudo,
3850 ARM::VST1q64HighTPseudo };
3851 SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3852 return;
3855 case Intrinsic::arm_neon_vst1x4: {
3856 static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudo,
3857 ARM::VST1d16QPseudo,
3858 ARM::VST1d32QPseudo,
3859 ARM::VST1d64QPseudo };
3860 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD,
3861 ARM::VST1q16LowQPseudo_UPD,
3862 ARM::VST1q32LowQPseudo_UPD,
3863 ARM::VST1q64LowQPseudo_UPD };
3864 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo,
3865 ARM::VST1q16HighQPseudo,
3866 ARM::VST1q32HighQPseudo,
3867 ARM::VST1q64HighQPseudo };
3868 SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3869 return;
3872 case Intrinsic::arm_neon_vst2: {
3873 static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
3874 ARM::VST2d32, ARM::VST1q64 };
3875 static const uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
3876 ARM::VST2q32Pseudo };
3877 SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
3878 return;
3881 case Intrinsic::arm_neon_vst3: {
3882 static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo,
3883 ARM::VST3d16Pseudo,
3884 ARM::VST3d32Pseudo,
3885 ARM::VST1d64TPseudo };
3886 static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
3887 ARM::VST3q16Pseudo_UPD,
3888 ARM::VST3q32Pseudo_UPD };
3889 static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo,
3890 ARM::VST3q16oddPseudo,
3891 ARM::VST3q32oddPseudo };
3892 SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3893 return;
3896 case Intrinsic::arm_neon_vst4: {
3897 static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo,
3898 ARM::VST4d16Pseudo,
3899 ARM::VST4d32Pseudo,
3900 ARM::VST1d64QPseudo };
3901 static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
3902 ARM::VST4q16Pseudo_UPD,
3903 ARM::VST4q32Pseudo_UPD };
3904 static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo,
3905 ARM::VST4q16oddPseudo,
3906 ARM::VST4q32oddPseudo };
3907 SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3908 return;
3911 case Intrinsic::arm_neon_vst2lane: {
3912 static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo,
3913 ARM::VST2LNd16Pseudo,
3914 ARM::VST2LNd32Pseudo };
3915 static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo,
3916 ARM::VST2LNq32Pseudo };
3917 SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes);
3918 return;
3921 case Intrinsic::arm_neon_vst3lane: {
3922 static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo,
3923 ARM::VST3LNd16Pseudo,
3924 ARM::VST3LNd32Pseudo };
3925 static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo,
3926 ARM::VST3LNq32Pseudo };
3927 SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes);
3928 return;
3931 case Intrinsic::arm_neon_vst4lane: {
3932 static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo,
3933 ARM::VST4LNd16Pseudo,
3934 ARM::VST4LNd32Pseudo };
3935 static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo,
3936 ARM::VST4LNq32Pseudo };
3937 SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes);
3938 return;
3941 break;
3944 case ISD::ATOMIC_CMP_SWAP:
3945 SelectCMP_SWAP(N);
3946 return;
3949 SelectCode(N);
3952 // Inspect a register string of the form
3953 // cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or
3954 // cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string
3955 // and obtain the integer operands from them, adding these operands to the
3956 // provided vector.
3957 static void getIntOperandsFromRegisterString(StringRef RegString,
3958 SelectionDAG *CurDAG,
3959 const SDLoc &DL,
3960 std::vector<SDValue> &Ops) {
3961 SmallVector<StringRef, 5> Fields;
3962 RegString.split(Fields, ':');
3964 if (Fields.size() > 1) {
3965 bool AllIntFields = true;
3967 for (StringRef Field : Fields) {
3968 // Need to trim out leading 'cp' characters and get the integer field.
3969 unsigned IntField;
3970 AllIntFields &= !Field.trim("CPcp").getAsInteger(10, IntField);
3971 Ops.push_back(CurDAG->getTargetConstant(IntField, DL, MVT::i32));
3974 assert(AllIntFields &&
3975 "Unexpected non-integer value in special register string.");
3979 // Maps a Banked Register string to its mask value. The mask value returned is
3980 // for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register
3981 // mask operand, which expresses which register is to be used, e.g. r8, and in
3982 // which mode it is to be used, e.g. usr. Returns -1 to signify that the string
3983 // was invalid.
3984 static inline int getBankedRegisterMask(StringRef RegString) {
3985 auto TheReg = ARMBankedReg::lookupBankedRegByName(RegString.lower());
3986 if (!TheReg)
3987 return -1;
3988 return TheReg->Encoding;
3991 // The flags here are common to those allowed for apsr in the A class cores and
3992 // those allowed for the special registers in the M class cores. Returns a
3993 // value representing which flags were present, -1 if invalid.
3994 static inline int getMClassFlagsMask(StringRef Flags) {
3995 return StringSwitch<int>(Flags)
3996 .Case("", 0x2) // no flags means nzcvq for psr registers, and 0x2 is
3997 // correct when flags are not permitted
3998 .Case("g", 0x1)
3999 .Case("nzcvq", 0x2)
4000 .Case("nzcvqg", 0x3)
4001 .Default(-1);
4004 // Maps MClass special registers string to its value for use in the
4005 // t2MRS_M/t2MSR_M instruction nodes as the SYSm value operand.
4006 // Returns -1 to signify that the string was invalid.
4007 static int getMClassRegisterMask(StringRef Reg, const ARMSubtarget *Subtarget) {
4008 auto TheReg = ARMSysReg::lookupMClassSysRegByName(Reg);
4009 const FeatureBitset &FeatureBits = Subtarget->getFeatureBits();
4010 if (!TheReg || !TheReg->hasRequiredFeatures(FeatureBits))
4011 return -1;
4012 return (int)(TheReg->Encoding & 0xFFF); // SYSm value
4015 static int getARClassRegisterMask(StringRef Reg, StringRef Flags) {
4016 // The mask operand contains the special register (R Bit) in bit 4, whether
4017 // the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and
4018 // bits 3-0 contains the fields to be accessed in the special register, set by
4019 // the flags provided with the register.
4020 int Mask = 0;
4021 if (Reg == "apsr") {
4022 // The flags permitted for apsr are the same flags that are allowed in
4023 // M class registers. We get the flag value and then shift the flags into
4024 // the correct place to combine with the mask.
4025 Mask = getMClassFlagsMask(Flags);
4026 if (Mask == -1)
4027 return -1;
4028 return Mask << 2;
4031 if (Reg != "cpsr" && Reg != "spsr") {
4032 return -1;
4035 // This is the same as if the flags were "fc"
4036 if (Flags.empty() || Flags == "all")
4037 return Mask | 0x9;
4039 // Inspect the supplied flags string and set the bits in the mask for
4040 // the relevant and valid flags allowed for cpsr and spsr.
4041 for (char Flag : Flags) {
4042 int FlagVal;
4043 switch (Flag) {
4044 case 'c':
4045 FlagVal = 0x1;
4046 break;
4047 case 'x':
4048 FlagVal = 0x2;
4049 break;
4050 case 's':
4051 FlagVal = 0x4;
4052 break;
4053 case 'f':
4054 FlagVal = 0x8;
4055 break;
4056 default:
4057 FlagVal = 0;
4060 // This avoids allowing strings where the same flag bit appears twice.
4061 if (!FlagVal || (Mask & FlagVal))
4062 return -1;
4063 Mask |= FlagVal;
4066 // If the register is spsr then we need to set the R bit.
4067 if (Reg == "spsr")
4068 Mask |= 0x10;
4070 return Mask;
4073 // Lower the read_register intrinsic to ARM specific DAG nodes
4074 // using the supplied metadata string to select the instruction node to use
4075 // and the registers/masks to construct as operands for the node.
4076 bool ARMDAGToDAGISel::tryReadRegister(SDNode *N){
4077 const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
4078 const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
4079 bool IsThumb2 = Subtarget->isThumb2();
4080 SDLoc DL(N);
4082 std::vector<SDValue> Ops;
4083 getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
4085 if (!Ops.empty()) {
4086 // If the special register string was constructed of fields (as defined
4087 // in the ACLE) then need to lower to MRC node (32 bit) or
4088 // MRRC node(64 bit), we can make the distinction based on the number of
4089 // operands we have.
4090 unsigned Opcode;
4091 SmallVector<EVT, 3> ResTypes;
4092 if (Ops.size() == 5){
4093 Opcode = IsThumb2 ? ARM::t2MRC : ARM::MRC;
4094 ResTypes.append({ MVT::i32, MVT::Other });
4095 } else {
4096 assert(Ops.size() == 3 &&
4097 "Invalid number of fields in special register string.");
4098 Opcode = IsThumb2 ? ARM::t2MRRC : ARM::MRRC;
4099 ResTypes.append({ MVT::i32, MVT::i32, MVT::Other });
4102 Ops.push_back(getAL(CurDAG, DL));
4103 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
4104 Ops.push_back(N->getOperand(0));
4105 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, ResTypes, Ops));
4106 return true;
4109 std::string SpecialReg = RegString->getString().lower();
4111 int BankedReg = getBankedRegisterMask(SpecialReg);
4112 if (BankedReg != -1) {
4113 Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32),
4114 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4115 N->getOperand(0) };
4116 ReplaceNode(
4117 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked,
4118 DL, MVT::i32, MVT::Other, Ops));
4119 return true;
4122 // The VFP registers are read by creating SelectionDAG nodes with opcodes
4123 // corresponding to the register that is being read from. So we switch on the
4124 // string to find which opcode we need to use.
4125 unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
4126 .Case("fpscr", ARM::VMRS)
4127 .Case("fpexc", ARM::VMRS_FPEXC)
4128 .Case("fpsid", ARM::VMRS_FPSID)
4129 .Case("mvfr0", ARM::VMRS_MVFR0)
4130 .Case("mvfr1", ARM::VMRS_MVFR1)
4131 .Case("mvfr2", ARM::VMRS_MVFR2)
4132 .Case("fpinst", ARM::VMRS_FPINST)
4133 .Case("fpinst2", ARM::VMRS_FPINST2)
4134 .Default(0);
4136 // If an opcode was found then we can lower the read to a VFP instruction.
4137 if (Opcode) {
4138 if (!Subtarget->hasVFP2Base())
4139 return false;
4140 if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8Base())
4141 return false;
4143 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4144 N->getOperand(0) };
4145 ReplaceNode(N,
4146 CurDAG->getMachineNode(Opcode, DL, MVT::i32, MVT::Other, Ops));
4147 return true;
4150 // If the target is M Class then need to validate that the register string
4151 // is an acceptable value, so check that a mask can be constructed from the
4152 // string.
4153 if (Subtarget->isMClass()) {
4154 int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget);
4155 if (SYSmValue == -1)
4156 return false;
4158 SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
4159 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4160 N->getOperand(0) };
4161 ReplaceNode(
4162 N, CurDAG->getMachineNode(ARM::t2MRS_M, DL, MVT::i32, MVT::Other, Ops));
4163 return true;
4166 // Here we know the target is not M Class so we need to check if it is one
4167 // of the remaining possible values which are apsr, cpsr or spsr.
4168 if (SpecialReg == "apsr" || SpecialReg == "cpsr") {
4169 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4170 N->getOperand(0) };
4171 ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRS_AR : ARM::MRS,
4172 DL, MVT::i32, MVT::Other, Ops));
4173 return true;
4176 if (SpecialReg == "spsr") {
4177 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4178 N->getOperand(0) };
4179 ReplaceNode(
4180 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys, DL,
4181 MVT::i32, MVT::Other, Ops));
4182 return true;
4185 return false;
4188 // Lower the write_register intrinsic to ARM specific DAG nodes
4189 // using the supplied metadata string to select the instruction node to use
4190 // and the registers/masks to use in the nodes
4191 bool ARMDAGToDAGISel::tryWriteRegister(SDNode *N){
4192 const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
4193 const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
4194 bool IsThumb2 = Subtarget->isThumb2();
4195 SDLoc DL(N);
4197 std::vector<SDValue> Ops;
4198 getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
4200 if (!Ops.empty()) {
4201 // If the special register string was constructed of fields (as defined
4202 // in the ACLE) then need to lower to MCR node (32 bit) or
4203 // MCRR node(64 bit), we can make the distinction based on the number of
4204 // operands we have.
4205 unsigned Opcode;
4206 if (Ops.size() == 5) {
4207 Opcode = IsThumb2 ? ARM::t2MCR : ARM::MCR;
4208 Ops.insert(Ops.begin()+2, N->getOperand(2));
4209 } else {
4210 assert(Ops.size() == 3 &&
4211 "Invalid number of fields in special register string.");
4212 Opcode = IsThumb2 ? ARM::t2MCRR : ARM::MCRR;
4213 SDValue WriteValue[] = { N->getOperand(2), N->getOperand(3) };
4214 Ops.insert(Ops.begin()+2, WriteValue, WriteValue+2);
4217 Ops.push_back(getAL(CurDAG, DL));
4218 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
4219 Ops.push_back(N->getOperand(0));
4221 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
4222 return true;
4225 std::string SpecialReg = RegString->getString().lower();
4226 int BankedReg = getBankedRegisterMask(SpecialReg);
4227 if (BankedReg != -1) {
4228 Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), N->getOperand(2),
4229 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4230 N->getOperand(0) };
4231 ReplaceNode(
4232 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked,
4233 DL, MVT::Other, Ops));
4234 return true;
4237 // The VFP registers are written to by creating SelectionDAG nodes with
4238 // opcodes corresponding to the register that is being written. So we switch
4239 // on the string to find which opcode we need to use.
4240 unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
4241 .Case("fpscr", ARM::VMSR)
4242 .Case("fpexc", ARM::VMSR_FPEXC)
4243 .Case("fpsid", ARM::VMSR_FPSID)
4244 .Case("fpinst", ARM::VMSR_FPINST)
4245 .Case("fpinst2", ARM::VMSR_FPINST2)
4246 .Default(0);
4248 if (Opcode) {
4249 if (!Subtarget->hasVFP2Base())
4250 return false;
4251 Ops = { N->getOperand(2), getAL(CurDAG, DL),
4252 CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
4253 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
4254 return true;
4257 std::pair<StringRef, StringRef> Fields;
4258 Fields = StringRef(SpecialReg).rsplit('_');
4259 std::string Reg = Fields.first.str();
4260 StringRef Flags = Fields.second;
4262 // If the target was M Class then need to validate the special register value
4263 // and retrieve the mask for use in the instruction node.
4264 if (Subtarget->isMClass()) {
4265 int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget);
4266 if (SYSmValue == -1)
4267 return false;
4269 SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
4270 N->getOperand(2), getAL(CurDAG, DL),
4271 CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
4272 ReplaceNode(N, CurDAG->getMachineNode(ARM::t2MSR_M, DL, MVT::Other, Ops));
4273 return true;
4276 // We then check to see if a valid mask can be constructed for one of the
4277 // register string values permitted for the A and R class cores. These values
4278 // are apsr, spsr and cpsr; these are also valid on older cores.
4279 int Mask = getARClassRegisterMask(Reg, Flags);
4280 if (Mask != -1) {
4281 Ops = { CurDAG->getTargetConstant(Mask, DL, MVT::i32), N->getOperand(2),
4282 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4283 N->getOperand(0) };
4284 ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSR_AR : ARM::MSR,
4285 DL, MVT::Other, Ops));
4286 return true;
4289 return false;
4292 bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){
4293 std::vector<SDValue> AsmNodeOperands;
4294 unsigned Flag, Kind;
4295 bool Changed = false;
4296 unsigned NumOps = N->getNumOperands();
4298 // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint.
4299 // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require
4300 // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs
4301 // respectively. Since there is no constraint to explicitly specify a
4302 // reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb,
4303 // the 64-bit data may be referred by H, Q, R modifiers, so we still pack
4304 // them into a GPRPair.
4306 SDLoc dl(N);
4307 SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps-1)
4308 : SDValue(nullptr,0);
4310 SmallVector<bool, 8> OpChanged;
4311 // Glue node will be appended late.
4312 for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) {
4313 SDValue op = N->getOperand(i);
4314 AsmNodeOperands.push_back(op);
4316 if (i < InlineAsm::Op_FirstOperand)
4317 continue;
4319 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(i))) {
4320 Flag = C->getZExtValue();
4321 Kind = InlineAsm::getKind(Flag);
4323 else
4324 continue;
4326 // Immediate operands to inline asm in the SelectionDAG are modeled with
4327 // two operands. The first is a constant of value InlineAsm::Kind_Imm, and
4328 // the second is a constant with the value of the immediate. If we get here
4329 // and we have a Kind_Imm, skip the next operand, and continue.
4330 if (Kind == InlineAsm::Kind_Imm) {
4331 SDValue op = N->getOperand(++i);
4332 AsmNodeOperands.push_back(op);
4333 continue;
4336 unsigned NumRegs = InlineAsm::getNumOperandRegisters(Flag);
4337 if (NumRegs)
4338 OpChanged.push_back(false);
4340 unsigned DefIdx = 0;
4341 bool IsTiedToChangedOp = false;
4342 // If it's a use that is tied with a previous def, it has no
4343 // reg class constraint.
4344 if (Changed && InlineAsm::isUseOperandTiedToDef(Flag, DefIdx))
4345 IsTiedToChangedOp = OpChanged[DefIdx];
4347 // Memory operands to inline asm in the SelectionDAG are modeled with two
4348 // operands: a constant of value InlineAsm::Kind_Mem followed by the input
4349 // operand. If we get here and we have a Kind_Mem, skip the next operand (so
4350 // it doesn't get misinterpreted), and continue. We do this here because
4351 // it's important to update the OpChanged array correctly before moving on.
4352 if (Kind == InlineAsm::Kind_Mem) {
4353 SDValue op = N->getOperand(++i);
4354 AsmNodeOperands.push_back(op);
4355 continue;
4358 if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef
4359 && Kind != InlineAsm::Kind_RegDefEarlyClobber)
4360 continue;
4362 unsigned RC;
4363 bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC);
4364 if ((!IsTiedToChangedOp && (!HasRC || RC != ARM::GPRRegClassID))
4365 || NumRegs != 2)
4366 continue;
4368 assert((i+2 < NumOps) && "Invalid number of operands in inline asm");
4369 SDValue V0 = N->getOperand(i+1);
4370 SDValue V1 = N->getOperand(i+2);
4371 unsigned Reg0 = cast<RegisterSDNode>(V0)->getReg();
4372 unsigned Reg1 = cast<RegisterSDNode>(V1)->getReg();
4373 SDValue PairedReg;
4374 MachineRegisterInfo &MRI = MF->getRegInfo();
4376 if (Kind == InlineAsm::Kind_RegDef ||
4377 Kind == InlineAsm::Kind_RegDefEarlyClobber) {
4378 // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to
4379 // the original GPRs.
4381 unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
4382 PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
4383 SDValue Chain = SDValue(N,0);
4385 SDNode *GU = N->getGluedUser();
4386 SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped,
4387 Chain.getValue(1));
4389 // Extract values from a GPRPair reg and copy to the original GPR reg.
4390 SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32,
4391 RegCopy);
4392 SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,
4393 RegCopy);
4394 SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0,
4395 RegCopy.getValue(1));
4396 SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1));
4398 // Update the original glue user.
4399 std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1);
4400 Ops.push_back(T1.getValue(1));
4401 CurDAG->UpdateNodeOperands(GU, Ops);
4403 else {
4404 // For Kind == InlineAsm::Kind_RegUse, we first copy two GPRs into a
4405 // GPRPair and then pass the GPRPair to the inline asm.
4406 SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain];
4408 // As REG_SEQ doesn't take RegisterSDNode, we copy them first.
4409 SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32,
4410 Chain.getValue(1));
4411 SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32,
4412 T0.getValue(1));
4413 SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), 0);
4415 // Copy REG_SEQ into a GPRPair-typed VR and replace the original two
4416 // i32 VRs of inline asm with it.
4417 unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
4418 PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
4419 Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1));
4421 AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
4422 Glue = Chain.getValue(1);
4425 Changed = true;
4427 if(PairedReg.getNode()) {
4428 OpChanged[OpChanged.size() -1 ] = true;
4429 Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/);
4430 if (IsTiedToChangedOp)
4431 Flag = InlineAsm::getFlagWordForMatchingOp(Flag, DefIdx);
4432 else
4433 Flag = InlineAsm::getFlagWordForRegClass(Flag, ARM::GPRPairRegClassID);
4434 // Replace the current flag.
4435 AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant(
4436 Flag, dl, MVT::i32);
4437 // Add the new register node and skip the original two GPRs.
4438 AsmNodeOperands.push_back(PairedReg);
4439 // Skip the next two GPRs.
4440 i += 2;
4444 if (Glue.getNode())
4445 AsmNodeOperands.push_back(Glue);
4446 if (!Changed)
4447 return false;
4449 SDValue New = CurDAG->getNode(N->getOpcode(), SDLoc(N),
4450 CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands);
4451 New->setNodeId(-1);
4452 ReplaceNode(N, New.getNode());
4453 return true;
4457 bool ARMDAGToDAGISel::
4458 SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
4459 std::vector<SDValue> &OutOps) {
4460 switch(ConstraintID) {
4461 default:
4462 llvm_unreachable("Unexpected asm memory constraint");
4463 case InlineAsm::Constraint_i:
4464 // FIXME: It seems strange that 'i' is needed here since it's supposed to
4465 // be an immediate and not a memory constraint.
4466 LLVM_FALLTHROUGH;
4467 case InlineAsm::Constraint_m:
4468 case InlineAsm::Constraint_o:
4469 case InlineAsm::Constraint_Q:
4470 case InlineAsm::Constraint_Um:
4471 case InlineAsm::Constraint_Un:
4472 case InlineAsm::Constraint_Uq:
4473 case InlineAsm::Constraint_Us:
4474 case InlineAsm::Constraint_Ut:
4475 case InlineAsm::Constraint_Uv:
4476 case InlineAsm::Constraint_Uy:
4477 // Require the address to be in a register. That is safe for all ARM
4478 // variants and it is hard to do anything much smarter without knowing
4479 // how the operand is used.
4480 OutOps.push_back(Op);
4481 return false;
4483 return true;
4486 /// createARMISelDag - This pass converts a legalized DAG into a
4487 /// ARM-specific DAG, ready for instruction scheduling.
4489 FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM,
4490 CodeGenOpt::Level OptLevel) {
4491 return new ARMDAGToDAGISel(TM, OptLevel);