[ARM] DLS/LE low-overhead loop code generation
[llvm-core.git] / lib / Target / ARM / ARMISelDAGToDAG.cpp
blobc74459a154255ebdc02ad277ff2949990fa4799f
1 //===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines an instruction selector for the ARM target.
11 //===----------------------------------------------------------------------===//
13 #include "ARM.h"
14 #include "ARMBaseInstrInfo.h"
15 #include "ARMTargetMachine.h"
16 #include "MCTargetDesc/ARMAddressingModes.h"
17 #include "Utils/ARMBaseInfo.h"
18 #include "llvm/ADT/StringSwitch.h"
19 #include "llvm/CodeGen/MachineFrameInfo.h"
20 #include "llvm/CodeGen/MachineFunction.h"
21 #include "llvm/CodeGen/MachineInstrBuilder.h"
22 #include "llvm/CodeGen/MachineRegisterInfo.h"
23 #include "llvm/CodeGen/SelectionDAG.h"
24 #include "llvm/CodeGen/SelectionDAGISel.h"
25 #include "llvm/CodeGen/TargetLowering.h"
26 #include "llvm/IR/CallingConv.h"
27 #include "llvm/IR/Constants.h"
28 #include "llvm/IR/DerivedTypes.h"
29 #include "llvm/IR/Function.h"
30 #include "llvm/IR/Intrinsics.h"
31 #include "llvm/IR/LLVMContext.h"
32 #include "llvm/Support/CommandLine.h"
33 #include "llvm/Support/Debug.h"
34 #include "llvm/Support/ErrorHandling.h"
35 #include "llvm/Target/TargetOptions.h"
37 using namespace llvm;
39 #define DEBUG_TYPE "arm-isel"
41 static cl::opt<bool>
42 DisableShifterOp("disable-shifter-op", cl::Hidden,
43 cl::desc("Disable isel of shifter-op"),
44 cl::init(false));
46 //===--------------------------------------------------------------------===//
47 /// ARMDAGToDAGISel - ARM specific code to select ARM machine
48 /// instructions for SelectionDAG operations.
49 ///
50 namespace {
52 class ARMDAGToDAGISel : public SelectionDAGISel {
53 /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
54 /// make the right decision when generating code for different targets.
55 const ARMSubtarget *Subtarget;
57 public:
58 explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOpt::Level OptLevel)
59 : SelectionDAGISel(tm, OptLevel) {}
61 bool runOnMachineFunction(MachineFunction &MF) override {
62 // Reset the subtarget each time through.
63 Subtarget = &MF.getSubtarget<ARMSubtarget>();
64 SelectionDAGISel::runOnMachineFunction(MF);
65 return true;
68 StringRef getPassName() const override { return "ARM Instruction Selection"; }
70 void PreprocessISelDAG() override;
72 /// getI32Imm - Return a target constant of type i32 with the specified
73 /// value.
74 inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
75 return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
78 void Select(SDNode *N) override;
80 bool hasNoVMLxHazardUse(SDNode *N) const;
81 bool isShifterOpProfitable(const SDValue &Shift,
82 ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt);
83 bool SelectRegShifterOperand(SDValue N, SDValue &A,
84 SDValue &B, SDValue &C,
85 bool CheckProfitability = true);
86 bool SelectImmShifterOperand(SDValue N, SDValue &A,
87 SDValue &B, bool CheckProfitability = true);
88 bool SelectShiftRegShifterOperand(SDValue N, SDValue &A,
89 SDValue &B, SDValue &C) {
90 // Don't apply the profitability check
91 return SelectRegShifterOperand(N, A, B, C, false);
93 bool SelectShiftImmShifterOperand(SDValue N, SDValue &A,
94 SDValue &B) {
95 // Don't apply the profitability check
96 return SelectImmShifterOperand(N, A, B, false);
99 bool SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out);
101 bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
102 bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);
104 bool SelectCMOVPred(SDValue N, SDValue &Pred, SDValue &Reg) {
105 const ConstantSDNode *CN = cast<ConstantSDNode>(N);
106 Pred = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(N), MVT::i32);
107 Reg = CurDAG->getRegister(ARM::CPSR, MVT::i32);
108 return true;
111 bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
112 SDValue &Offset, SDValue &Opc);
113 bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
114 SDValue &Offset, SDValue &Opc);
115 bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
116 SDValue &Offset, SDValue &Opc);
117 bool SelectAddrOffsetNone(SDValue N, SDValue &Base);
118 bool SelectAddrMode3(SDValue N, SDValue &Base,
119 SDValue &Offset, SDValue &Opc);
120 bool SelectAddrMode3Offset(SDNode *Op, SDValue N,
121 SDValue &Offset, SDValue &Opc);
122 bool IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset, bool FP16);
123 bool SelectAddrMode5(SDValue N, SDValue &Base, SDValue &Offset);
124 bool SelectAddrMode5FP16(SDValue N, SDValue &Base, SDValue &Offset);
125 bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align);
126 bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset);
128 bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label);
130 // Thumb Addressing Modes:
131 bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset);
132 bool SelectThumbAddrModeRRSext(SDValue N, SDValue &Base, SDValue &Offset);
133 bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base,
134 SDValue &OffImm);
135 bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
136 SDValue &OffImm);
137 bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
138 SDValue &OffImm);
139 bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
140 SDValue &OffImm);
141 bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm);
143 // Thumb 2 Addressing Modes:
144 bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
145 bool SelectT2AddrModeImm8(SDValue N, SDValue &Base,
146 SDValue &OffImm);
147 bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
148 SDValue &OffImm);
149 bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base,
150 SDValue &OffReg, SDValue &ShImm);
151 bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm);
153 inline bool is_so_imm(unsigned Imm) const {
154 return ARM_AM::getSOImmVal(Imm) != -1;
157 inline bool is_so_imm_not(unsigned Imm) const {
158 return ARM_AM::getSOImmVal(~Imm) != -1;
161 inline bool is_t2_so_imm(unsigned Imm) const {
162 return ARM_AM::getT2SOImmVal(Imm) != -1;
165 inline bool is_t2_so_imm_not(unsigned Imm) const {
166 return ARM_AM::getT2SOImmVal(~Imm) != -1;
169 // Include the pieces autogenerated from the target description.
170 #include "ARMGenDAGISel.inc"
172 private:
173 void transferMemOperands(SDNode *Src, SDNode *Dst);
175 /// Indexed (pre/post inc/dec) load matching code for ARM.
176 bool tryARMIndexedLoad(SDNode *N);
177 bool tryT1IndexedLoad(SDNode *N);
178 bool tryT2IndexedLoad(SDNode *N);
180 /// SelectVLD - Select NEON load intrinsics. NumVecs should be
181 /// 1, 2, 3 or 4. The opcode arrays specify the instructions used for
182 /// loads of D registers and even subregs and odd subregs of Q registers.
183 /// For NumVecs <= 2, QOpcodes1 is not used.
184 void SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
185 const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
186 const uint16_t *QOpcodes1);
188 /// SelectVST - Select NEON store intrinsics. NumVecs should
189 /// be 1, 2, 3 or 4. The opcode arrays specify the instructions used for
190 /// stores of D registers and even subregs and odd subregs of Q registers.
191 /// For NumVecs <= 2, QOpcodes1 is not used.
192 void SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
193 const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
194 const uint16_t *QOpcodes1);
196 /// SelectVLDSTLane - Select NEON load/store lane intrinsics. NumVecs should
197 /// be 2, 3 or 4. The opcode arrays specify the instructions used for
198 /// load/store of D registers and Q registers.
199 void SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
200 unsigned NumVecs, const uint16_t *DOpcodes,
201 const uint16_t *QOpcodes);
203 /// SelectVLDDup - Select NEON load-duplicate intrinsics. NumVecs
204 /// should be 1, 2, 3 or 4. The opcode array specifies the instructions used
205 /// for loading D registers.
206 void SelectVLDDup(SDNode *N, bool IsIntrinsic, bool isUpdating,
207 unsigned NumVecs, const uint16_t *DOpcodes,
208 const uint16_t *QOpcodes0 = nullptr,
209 const uint16_t *QOpcodes1 = nullptr);
211 /// Try to select SBFX/UBFX instructions for ARM.
212 bool tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned);
214 // Select special operations if node forms integer ABS pattern
215 bool tryABSOp(SDNode *N);
217 bool tryReadRegister(SDNode *N);
218 bool tryWriteRegister(SDNode *N);
220 bool tryInlineAsm(SDNode *N);
222 void SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI);
224 void SelectCMP_SWAP(SDNode *N);
226 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
227 /// inline asm expressions.
228 bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
229 std::vector<SDValue> &OutOps) override;
231 // Form pairs of consecutive R, S, D, or Q registers.
232 SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1);
233 SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1);
234 SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1);
235 SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1);
237 // Form sequences of 4 consecutive S, D, or Q registers.
238 SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
239 SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
240 SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
242 // Get the alignment operand for a NEON VLD or VST instruction.
243 SDValue GetVLDSTAlign(SDValue Align, const SDLoc &dl, unsigned NumVecs,
244 bool is64BitVector);
246 /// Returns the number of instructions required to materialize the given
247 /// constant in a register, or 3 if a literal pool load is needed.
248 unsigned ConstantMaterializationCost(unsigned Val) const;
250 /// Checks if N is a multiplication by a constant where we can extract out a
251 /// power of two from the constant so that it can be used in a shift, but only
252 /// if it simplifies the materialization of the constant. Returns true if it
253 /// is, and assigns to PowerOfTwo the power of two that should be extracted
254 /// out and to NewMulConst the new constant to be multiplied by.
255 bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift,
256 unsigned &PowerOfTwo, SDValue &NewMulConst) const;
258 /// Replace N with M in CurDAG, in a way that also ensures that M gets
259 /// selected when N would have been selected.
260 void replaceDAGValue(const SDValue &N, SDValue M);
264 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant
265 /// operand. If so Imm will receive the 32-bit value.
266 static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
267 if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
268 Imm = cast<ConstantSDNode>(N)->getZExtValue();
269 return true;
271 return false;
274 // isInt32Immediate - This method tests to see if a constant operand.
275 // If so Imm will receive the 32 bit value.
276 static bool isInt32Immediate(SDValue N, unsigned &Imm) {
277 return isInt32Immediate(N.getNode(), Imm);
280 // isOpcWithIntImmediate - This method tests to see if the node is a specific
281 // opcode and that it has a immediate integer right operand.
282 // If so Imm will receive the 32 bit value.
283 static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
284 return N->getOpcode() == Opc &&
285 isInt32Immediate(N->getOperand(1).getNode(), Imm);
288 /// Check whether a particular node is a constant value representable as
289 /// (N * Scale) where (N in [\p RangeMin, \p RangeMax).
291 /// \param ScaledConstant [out] - On success, the pre-scaled constant value.
292 static bool isScaledConstantInRange(SDValue Node, int Scale,
293 int RangeMin, int RangeMax,
294 int &ScaledConstant) {
295 assert(Scale > 0 && "Invalid scale!");
297 // Check that this is a constant.
298 const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node);
299 if (!C)
300 return false;
302 ScaledConstant = (int) C->getZExtValue();
303 if ((ScaledConstant % Scale) != 0)
304 return false;
306 ScaledConstant /= Scale;
307 return ScaledConstant >= RangeMin && ScaledConstant < RangeMax;
310 void ARMDAGToDAGISel::PreprocessISelDAG() {
311 if (!Subtarget->hasV6T2Ops())
312 return;
314 bool isThumb2 = Subtarget->isThumb();
315 for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
316 E = CurDAG->allnodes_end(); I != E; ) {
317 SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues.
319 if (N->getOpcode() != ISD::ADD)
320 continue;
322 // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with
323 // leading zeros, followed by consecutive set bits, followed by 1 or 2
324 // trailing zeros, e.g. 1020.
325 // Transform the expression to
326 // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number
327 // of trailing zeros of c2. The left shift would be folded as an shifter
328 // operand of 'add' and the 'and' and 'srl' would become a bits extraction
329 // node (UBFX).
331 SDValue N0 = N->getOperand(0);
332 SDValue N1 = N->getOperand(1);
333 unsigned And_imm = 0;
334 if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) {
335 if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm))
336 std::swap(N0, N1);
338 if (!And_imm)
339 continue;
341 // Check if the AND mask is an immediate of the form: 000.....1111111100
342 unsigned TZ = countTrailingZeros(And_imm);
343 if (TZ != 1 && TZ != 2)
344 // Be conservative here. Shifter operands aren't always free. e.g. On
345 // Swift, left shifter operand of 1 / 2 for free but others are not.
346 // e.g.
347 // ubfx r3, r1, #16, #8
348 // ldr.w r3, [r0, r3, lsl #2]
349 // vs.
350 // mov.w r9, #1020
351 // and.w r2, r9, r1, lsr #14
352 // ldr r2, [r0, r2]
353 continue;
354 And_imm >>= TZ;
355 if (And_imm & (And_imm + 1))
356 continue;
358 // Look for (and (srl X, c1), c2).
359 SDValue Srl = N1.getOperand(0);
360 unsigned Srl_imm = 0;
361 if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) ||
362 (Srl_imm <= 2))
363 continue;
365 // Make sure first operand is not a shifter operand which would prevent
366 // folding of the left shift.
367 SDValue CPTmp0;
368 SDValue CPTmp1;
369 SDValue CPTmp2;
370 if (isThumb2) {
371 if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1))
372 continue;
373 } else {
374 if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) ||
375 SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2))
376 continue;
379 // Now make the transformation.
380 Srl = CurDAG->getNode(ISD::SRL, SDLoc(Srl), MVT::i32,
381 Srl.getOperand(0),
382 CurDAG->getConstant(Srl_imm + TZ, SDLoc(Srl),
383 MVT::i32));
384 N1 = CurDAG->getNode(ISD::AND, SDLoc(N1), MVT::i32,
385 Srl,
386 CurDAG->getConstant(And_imm, SDLoc(Srl), MVT::i32));
387 N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32,
388 N1, CurDAG->getConstant(TZ, SDLoc(Srl), MVT::i32));
389 CurDAG->UpdateNodeOperands(N, N0, N1);
393 /// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS
394 /// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at
395 /// least on current ARM implementations) which should be avoidded.
396 bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const {
397 if (OptLevel == CodeGenOpt::None)
398 return true;
400 if (!Subtarget->hasVMLxHazards())
401 return true;
403 if (!N->hasOneUse())
404 return false;
406 SDNode *Use = *N->use_begin();
407 if (Use->getOpcode() == ISD::CopyToReg)
408 return true;
409 if (Use->isMachineOpcode()) {
410 const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>(
411 CurDAG->getSubtarget().getInstrInfo());
413 const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode());
414 if (MCID.mayStore())
415 return true;
416 unsigned Opcode = MCID.getOpcode();
417 if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
418 return true;
419 // vmlx feeding into another vmlx. We actually want to unfold
420 // the use later in the MLxExpansion pass. e.g.
421 // vmla
422 // vmla (stall 8 cycles)
424 // vmul (5 cycles)
425 // vadd (5 cycles)
426 // vmla
427 // This adds up to about 18 - 19 cycles.
429 // vmla
430 // vmul (stall 4 cycles)
431 // vadd adds up to about 14 cycles.
432 return TII->isFpMLxInstruction(Opcode);
435 return false;
438 bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
439 ARM_AM::ShiftOpc ShOpcVal,
440 unsigned ShAmt) {
441 if (!Subtarget->isLikeA9() && !Subtarget->isSwift())
442 return true;
443 if (Shift.hasOneUse())
444 return true;
445 // R << 2 is free.
446 return ShOpcVal == ARM_AM::lsl &&
447 (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1));
450 unsigned ARMDAGToDAGISel::ConstantMaterializationCost(unsigned Val) const {
451 if (Subtarget->isThumb()) {
452 if (Val <= 255) return 1; // MOV
453 if (Subtarget->hasV6T2Ops() &&
454 (Val <= 0xffff || // MOV
455 ARM_AM::getT2SOImmVal(Val) != -1 || // MOVW
456 ARM_AM::getT2SOImmVal(~Val) != -1)) // MVN
457 return 1;
458 if (Val <= 510) return 2; // MOV + ADDi8
459 if (~Val <= 255) return 2; // MOV + MVN
460 if (ARM_AM::isThumbImmShiftedVal(Val)) return 2; // MOV + LSL
461 } else {
462 if (ARM_AM::getSOImmVal(Val) != -1) return 1; // MOV
463 if (ARM_AM::getSOImmVal(~Val) != -1) return 1; // MVN
464 if (Subtarget->hasV6T2Ops() && Val <= 0xffff) return 1; // MOVW
465 if (ARM_AM::isSOImmTwoPartVal(Val)) return 2; // two instrs
467 if (Subtarget->useMovt()) return 2; // MOVW + MOVT
468 return 3; // Literal pool load
471 bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N,
472 unsigned MaxShift,
473 unsigned &PowerOfTwo,
474 SDValue &NewMulConst) const {
475 assert(N.getOpcode() == ISD::MUL);
476 assert(MaxShift > 0);
478 // If the multiply is used in more than one place then changing the constant
479 // will make other uses incorrect, so don't.
480 if (!N.hasOneUse()) return false;
481 // Check if the multiply is by a constant
482 ConstantSDNode *MulConst = dyn_cast<ConstantSDNode>(N.getOperand(1));
483 if (!MulConst) return false;
484 // If the constant is used in more than one place then modifying it will mean
485 // we need to materialize two constants instead of one, which is a bad idea.
486 if (!MulConst->hasOneUse()) return false;
487 unsigned MulConstVal = MulConst->getZExtValue();
488 if (MulConstVal == 0) return false;
490 // Find the largest power of 2 that MulConstVal is a multiple of
491 PowerOfTwo = MaxShift;
492 while ((MulConstVal % (1 << PowerOfTwo)) != 0) {
493 --PowerOfTwo;
494 if (PowerOfTwo == 0) return false;
497 // Only optimise if the new cost is better
498 unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo);
499 NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc(N), MVT::i32);
500 unsigned OldCost = ConstantMaterializationCost(MulConstVal);
501 unsigned NewCost = ConstantMaterializationCost(NewMulConstVal);
502 return NewCost < OldCost;
505 void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) {
506 CurDAG->RepositionNode(N.getNode()->getIterator(), M.getNode());
507 ReplaceUses(N, M);
510 bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
511 SDValue &BaseReg,
512 SDValue &Opc,
513 bool CheckProfitability) {
514 if (DisableShifterOp)
515 return false;
517 // If N is a multiply-by-constant and it's profitable to extract a shift and
518 // use it in a shifted operand do so.
519 if (N.getOpcode() == ISD::MUL) {
520 unsigned PowerOfTwo = 0;
521 SDValue NewMulConst;
522 if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) {
523 HandleSDNode Handle(N);
524 SDLoc Loc(N);
525 replaceDAGValue(N.getOperand(1), NewMulConst);
526 BaseReg = Handle.getValue();
527 Opc = CurDAG->getTargetConstant(
528 ARM_AM::getSORegOpc(ARM_AM::lsl, PowerOfTwo), Loc, MVT::i32);
529 return true;
533 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
535 // Don't match base register only case. That is matched to a separate
536 // lower complexity pattern with explicit register operand.
537 if (ShOpcVal == ARM_AM::no_shift) return false;
539 BaseReg = N.getOperand(0);
540 unsigned ShImmVal = 0;
541 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
542 if (!RHS) return false;
543 ShImmVal = RHS->getZExtValue() & 31;
544 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
545 SDLoc(N), MVT::i32);
546 return true;
549 bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N,
550 SDValue &BaseReg,
551 SDValue &ShReg,
552 SDValue &Opc,
553 bool CheckProfitability) {
554 if (DisableShifterOp)
555 return false;
557 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
559 // Don't match base register only case. That is matched to a separate
560 // lower complexity pattern with explicit register operand.
561 if (ShOpcVal == ARM_AM::no_shift) return false;
563 BaseReg = N.getOperand(0);
564 unsigned ShImmVal = 0;
565 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
566 if (RHS) return false;
568 ShReg = N.getOperand(1);
569 if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal))
570 return false;
571 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
572 SDLoc(N), MVT::i32);
573 return true;
576 // Determine whether an ISD::OR's operands are suitable to turn the operation
577 // into an addition, which often has more compact encodings.
578 bool ARMDAGToDAGISel::SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out) {
579 assert(Parent->getOpcode() == ISD::OR && "unexpected parent");
580 Out = N;
581 return CurDAG->haveNoCommonBitsSet(N, Parent->getOperand(1));
585 bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
586 SDValue &Base,
587 SDValue &OffImm) {
588 // Match simple R + imm12 operands.
590 // Base only.
591 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
592 !CurDAG->isBaseWithConstantOffset(N)) {
593 if (N.getOpcode() == ISD::FrameIndex) {
594 // Match frame index.
595 int FI = cast<FrameIndexSDNode>(N)->getIndex();
596 Base = CurDAG->getTargetFrameIndex(
597 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
598 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
599 return true;
602 if (N.getOpcode() == ARMISD::Wrapper &&
603 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
604 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
605 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
606 Base = N.getOperand(0);
607 } else
608 Base = N;
609 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
610 return true;
613 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
614 int RHSC = (int)RHS->getSExtValue();
615 if (N.getOpcode() == ISD::SUB)
616 RHSC = -RHSC;
618 if (RHSC > -0x1000 && RHSC < 0x1000) { // 12 bits
619 Base = N.getOperand(0);
620 if (Base.getOpcode() == ISD::FrameIndex) {
621 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
622 Base = CurDAG->getTargetFrameIndex(
623 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
625 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
626 return true;
630 // Base only.
631 Base = N;
632 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
633 return true;
638 bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
639 SDValue &Opc) {
640 if (N.getOpcode() == ISD::MUL &&
641 ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) {
642 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
643 // X * [3,5,9] -> X + X * [2,4,8] etc.
644 int RHSC = (int)RHS->getZExtValue();
645 if (RHSC & 1) {
646 RHSC = RHSC & ~1;
647 ARM_AM::AddrOpc AddSub = ARM_AM::add;
648 if (RHSC < 0) {
649 AddSub = ARM_AM::sub;
650 RHSC = - RHSC;
652 if (isPowerOf2_32(RHSC)) {
653 unsigned ShAmt = Log2_32(RHSC);
654 Base = Offset = N.getOperand(0);
655 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
656 ARM_AM::lsl),
657 SDLoc(N), MVT::i32);
658 return true;
664 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
665 // ISD::OR that is equivalent to an ISD::ADD.
666 !CurDAG->isBaseWithConstantOffset(N))
667 return false;
669 // Leave simple R +/- imm12 operands for LDRi12
670 if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
671 int RHSC;
672 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
673 -0x1000+1, 0x1000, RHSC)) // 12 bits.
674 return false;
677 // Otherwise this is R +/- [possibly shifted] R.
678 ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add;
679 ARM_AM::ShiftOpc ShOpcVal =
680 ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
681 unsigned ShAmt = 0;
683 Base = N.getOperand(0);
684 Offset = N.getOperand(1);
686 if (ShOpcVal != ARM_AM::no_shift) {
687 // Check to see if the RHS of the shift is a constant, if not, we can't fold
688 // it.
689 if (ConstantSDNode *Sh =
690 dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
691 ShAmt = Sh->getZExtValue();
692 if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
693 Offset = N.getOperand(1).getOperand(0);
694 else {
695 ShAmt = 0;
696 ShOpcVal = ARM_AM::no_shift;
698 } else {
699 ShOpcVal = ARM_AM::no_shift;
703 // Try matching (R shl C) + (R).
704 if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
705 !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
706 N.getOperand(0).hasOneUse())) {
707 ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
708 if (ShOpcVal != ARM_AM::no_shift) {
709 // Check to see if the RHS of the shift is a constant, if not, we can't
710 // fold it.
711 if (ConstantSDNode *Sh =
712 dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
713 ShAmt = Sh->getZExtValue();
714 if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
715 Offset = N.getOperand(0).getOperand(0);
716 Base = N.getOperand(1);
717 } else {
718 ShAmt = 0;
719 ShOpcVal = ARM_AM::no_shift;
721 } else {
722 ShOpcVal = ARM_AM::no_shift;
727 // If Offset is a multiply-by-constant and it's profitable to extract a shift
728 // and use it in a shifted operand do so.
729 if (Offset.getOpcode() == ISD::MUL && N.hasOneUse()) {
730 unsigned PowerOfTwo = 0;
731 SDValue NewMulConst;
732 if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) {
733 HandleSDNode Handle(Offset);
734 replaceDAGValue(Offset.getOperand(1), NewMulConst);
735 Offset = Handle.getValue();
736 ShAmt = PowerOfTwo;
737 ShOpcVal = ARM_AM::lsl;
741 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
742 SDLoc(N), MVT::i32);
743 return true;
746 bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
747 SDValue &Offset, SDValue &Opc) {
748 unsigned Opcode = Op->getOpcode();
749 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
750 ? cast<LoadSDNode>(Op)->getAddressingMode()
751 : cast<StoreSDNode>(Op)->getAddressingMode();
752 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
753 ? ARM_AM::add : ARM_AM::sub;
754 int Val;
755 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val))
756 return false;
758 Offset = N;
759 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
760 unsigned ShAmt = 0;
761 if (ShOpcVal != ARM_AM::no_shift) {
762 // Check to see if the RHS of the shift is a constant, if not, we can't fold
763 // it.
764 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
765 ShAmt = Sh->getZExtValue();
766 if (isShifterOpProfitable(N, ShOpcVal, ShAmt))
767 Offset = N.getOperand(0);
768 else {
769 ShAmt = 0;
770 ShOpcVal = ARM_AM::no_shift;
772 } else {
773 ShOpcVal = ARM_AM::no_shift;
777 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
778 SDLoc(N), MVT::i32);
779 return true;
782 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
783 SDValue &Offset, SDValue &Opc) {
784 unsigned Opcode = Op->getOpcode();
785 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
786 ? cast<LoadSDNode>(Op)->getAddressingMode()
787 : cast<StoreSDNode>(Op)->getAddressingMode();
788 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
789 ? ARM_AM::add : ARM_AM::sub;
790 int Val;
791 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
792 if (AddSub == ARM_AM::sub) Val *= -1;
793 Offset = CurDAG->getRegister(0, MVT::i32);
794 Opc = CurDAG->getTargetConstant(Val, SDLoc(Op), MVT::i32);
795 return true;
798 return false;
802 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
803 SDValue &Offset, SDValue &Opc) {
804 unsigned Opcode = Op->getOpcode();
805 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
806 ? cast<LoadSDNode>(Op)->getAddressingMode()
807 : cast<StoreSDNode>(Op)->getAddressingMode();
808 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
809 ? ARM_AM::add : ARM_AM::sub;
810 int Val;
811 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
812 Offset = CurDAG->getRegister(0, MVT::i32);
813 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val,
814 ARM_AM::no_shift),
815 SDLoc(Op), MVT::i32);
816 return true;
819 return false;
822 bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) {
823 Base = N;
824 return true;
827 bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
828 SDValue &Base, SDValue &Offset,
829 SDValue &Opc) {
830 if (N.getOpcode() == ISD::SUB) {
831 // X - C is canonicalize to X + -C, no need to handle it here.
832 Base = N.getOperand(0);
833 Offset = N.getOperand(1);
834 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0), SDLoc(N),
835 MVT::i32);
836 return true;
839 if (!CurDAG->isBaseWithConstantOffset(N)) {
840 Base = N;
841 if (N.getOpcode() == ISD::FrameIndex) {
842 int FI = cast<FrameIndexSDNode>(N)->getIndex();
843 Base = CurDAG->getTargetFrameIndex(
844 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
846 Offset = CurDAG->getRegister(0, MVT::i32);
847 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
848 MVT::i32);
849 return true;
852 // If the RHS is +/- imm8, fold into addr mode.
853 int RHSC;
854 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
855 -256 + 1, 256, RHSC)) { // 8 bits.
856 Base = N.getOperand(0);
857 if (Base.getOpcode() == ISD::FrameIndex) {
858 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
859 Base = CurDAG->getTargetFrameIndex(
860 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
862 Offset = CurDAG->getRegister(0, MVT::i32);
864 ARM_AM::AddrOpc AddSub = ARM_AM::add;
865 if (RHSC < 0) {
866 AddSub = ARM_AM::sub;
867 RHSC = -RHSC;
869 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC), SDLoc(N),
870 MVT::i32);
871 return true;
874 Base = N.getOperand(0);
875 Offset = N.getOperand(1);
876 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
877 MVT::i32);
878 return true;
881 bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N,
882 SDValue &Offset, SDValue &Opc) {
883 unsigned Opcode = Op->getOpcode();
884 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
885 ? cast<LoadSDNode>(Op)->getAddressingMode()
886 : cast<StoreSDNode>(Op)->getAddressingMode();
887 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
888 ? ARM_AM::add : ARM_AM::sub;
889 int Val;
890 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits.
891 Offset = CurDAG->getRegister(0, MVT::i32);
892 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), SDLoc(Op),
893 MVT::i32);
894 return true;
897 Offset = N;
898 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), SDLoc(Op),
899 MVT::i32);
900 return true;
903 bool ARMDAGToDAGISel::IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset,
904 bool FP16) {
905 if (!CurDAG->isBaseWithConstantOffset(N)) {
906 Base = N;
907 if (N.getOpcode() == ISD::FrameIndex) {
908 int FI = cast<FrameIndexSDNode>(N)->getIndex();
909 Base = CurDAG->getTargetFrameIndex(
910 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
911 } else if (N.getOpcode() == ARMISD::Wrapper &&
912 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
913 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
914 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
915 Base = N.getOperand(0);
917 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
918 SDLoc(N), MVT::i32);
919 return true;
922 // If the RHS is +/- imm8, fold into addr mode.
923 int RHSC;
924 const int Scale = FP16 ? 2 : 4;
926 if (isScaledConstantInRange(N.getOperand(1), Scale, -255, 256, RHSC)) {
927 Base = N.getOperand(0);
928 if (Base.getOpcode() == ISD::FrameIndex) {
929 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
930 Base = CurDAG->getTargetFrameIndex(
931 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
934 ARM_AM::AddrOpc AddSub = ARM_AM::add;
935 if (RHSC < 0) {
936 AddSub = ARM_AM::sub;
937 RHSC = -RHSC;
940 if (FP16)
941 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(AddSub, RHSC),
942 SDLoc(N), MVT::i32);
943 else
944 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC),
945 SDLoc(N), MVT::i32);
947 return true;
950 Base = N;
952 if (FP16)
953 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(ARM_AM::add, 0),
954 SDLoc(N), MVT::i32);
955 else
956 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
957 SDLoc(N), MVT::i32);
959 return true;
962 bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
963 SDValue &Base, SDValue &Offset) {
964 return IsAddressingMode5(N, Base, Offset, /*FP16=*/ false);
967 bool ARMDAGToDAGISel::SelectAddrMode5FP16(SDValue N,
968 SDValue &Base, SDValue &Offset) {
969 return IsAddressingMode5(N, Base, Offset, /*FP16=*/ true);
972 bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
973 SDValue &Align) {
974 Addr = N;
976 unsigned Alignment = 0;
978 MemSDNode *MemN = cast<MemSDNode>(Parent);
980 if (isa<LSBaseSDNode>(MemN) ||
981 ((MemN->getOpcode() == ARMISD::VST1_UPD ||
982 MemN->getOpcode() == ARMISD::VLD1_UPD) &&
983 MemN->getConstantOperandVal(MemN->getNumOperands() - 1) == 1)) {
984 // This case occurs only for VLD1-lane/dup and VST1-lane instructions.
985 // The maximum alignment is equal to the memory size being referenced.
986 unsigned MMOAlign = MemN->getAlignment();
987 unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8;
988 if (MMOAlign >= MemSize && MemSize > 1)
989 Alignment = MemSize;
990 } else {
991 // All other uses of addrmode6 are for intrinsics. For now just record
992 // the raw alignment value; it will be refined later based on the legal
993 // alignment operands for the intrinsic.
994 Alignment = MemN->getAlignment();
997 Align = CurDAG->getTargetConstant(Alignment, SDLoc(N), MVT::i32);
998 return true;
1001 bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N,
1002 SDValue &Offset) {
1003 LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op);
1004 ISD::MemIndexedMode AM = LdSt->getAddressingMode();
1005 if (AM != ISD::POST_INC)
1006 return false;
1007 Offset = N;
1008 if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) {
1009 if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits())
1010 Offset = CurDAG->getRegister(0, MVT::i32);
1012 return true;
1015 bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N,
1016 SDValue &Offset, SDValue &Label) {
1017 if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
1018 Offset = N.getOperand(0);
1019 SDValue N1 = N.getOperand(1);
1020 Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
1021 SDLoc(N), MVT::i32);
1022 return true;
1025 return false;
1029 //===----------------------------------------------------------------------===//
1030 // Thumb Addressing Modes
1031 //===----------------------------------------------------------------------===//
1033 static bool shouldUseZeroOffsetLdSt(SDValue N) {
1034 // Negative numbers are difficult to materialise in thumb1. If we are
1035 // selecting the add of a negative, instead try to select ri with a zero
1036 // offset, so create the add node directly which will become a sub.
1037 if (N.getOpcode() != ISD::ADD)
1038 return false;
1040 // Look for an imm which is not legal for ld/st, but is legal for sub.
1041 if (auto C = dyn_cast<ConstantSDNode>(N.getOperand(1)))
1042 return C->getSExtValue() < 0 && C->getSExtValue() >= -255;
1044 return false;
1047 bool ARMDAGToDAGISel::SelectThumbAddrModeRRSext(SDValue N, SDValue &Base,
1048 SDValue &Offset) {
1049 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) {
1050 ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N);
1051 if (!NC || !NC->isNullValue())
1052 return false;
1054 Base = Offset = N;
1055 return true;
1058 Base = N.getOperand(0);
1059 Offset = N.getOperand(1);
1060 return true;
1063 bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N, SDValue &Base,
1064 SDValue &Offset) {
1065 if (shouldUseZeroOffsetLdSt(N))
1066 return false; // Select ri instead
1067 return SelectThumbAddrModeRRSext(N, Base, Offset);
1070 bool
1071 ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
1072 SDValue &Base, SDValue &OffImm) {
1073 if (shouldUseZeroOffsetLdSt(N)) {
1074 Base = N;
1075 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1076 return true;
1079 if (!CurDAG->isBaseWithConstantOffset(N)) {
1080 if (N.getOpcode() == ISD::ADD) {
1081 return false; // We want to select register offset instead
1082 } else if (N.getOpcode() == ARMISD::Wrapper &&
1083 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1084 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1085 N.getOperand(0).getOpcode() != ISD::TargetConstantPool &&
1086 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1087 Base = N.getOperand(0);
1088 } else {
1089 Base = N;
1092 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1093 return true;
1096 // If the RHS is + imm5 * scale, fold into addr mode.
1097 int RHSC;
1098 if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) {
1099 Base = N.getOperand(0);
1100 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1101 return true;
1104 // Offset is too large, so use register offset instead.
1105 return false;
1108 bool
1109 ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
1110 SDValue &OffImm) {
1111 return SelectThumbAddrModeImm5S(N, 4, Base, OffImm);
1114 bool
1115 ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
1116 SDValue &OffImm) {
1117 return SelectThumbAddrModeImm5S(N, 2, Base, OffImm);
1120 bool
1121 ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
1122 SDValue &OffImm) {
1123 return SelectThumbAddrModeImm5S(N, 1, Base, OffImm);
1126 bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
1127 SDValue &Base, SDValue &OffImm) {
1128 if (N.getOpcode() == ISD::FrameIndex) {
1129 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1130 // Only multiples of 4 are allowed for the offset, so the frame object
1131 // alignment must be at least 4.
1132 MachineFrameInfo &MFI = MF->getFrameInfo();
1133 if (MFI.getObjectAlignment(FI) < 4)
1134 MFI.setObjectAlignment(FI, 4);
1135 Base = CurDAG->getTargetFrameIndex(
1136 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1137 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1138 return true;
1141 if (!CurDAG->isBaseWithConstantOffset(N))
1142 return false;
1144 if (N.getOperand(0).getOpcode() == ISD::FrameIndex) {
1145 // If the RHS is + imm8 * scale, fold into addr mode.
1146 int RHSC;
1147 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) {
1148 Base = N.getOperand(0);
1149 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1150 // For LHS+RHS to result in an offset that's a multiple of 4 the object
1151 // indexed by the LHS must be 4-byte aligned.
1152 MachineFrameInfo &MFI = MF->getFrameInfo();
1153 if (MFI.getObjectAlignment(FI) < 4)
1154 MFI.setObjectAlignment(FI, 4);
1155 Base = CurDAG->getTargetFrameIndex(
1156 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1157 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1158 return true;
1162 return false;
1166 //===----------------------------------------------------------------------===//
1167 // Thumb 2 Addressing Modes
1168 //===----------------------------------------------------------------------===//
1171 bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
1172 SDValue &Base, SDValue &OffImm) {
1173 // Match simple R + imm12 operands.
1175 // Base only.
1176 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1177 !CurDAG->isBaseWithConstantOffset(N)) {
1178 if (N.getOpcode() == ISD::FrameIndex) {
1179 // Match frame index.
1180 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1181 Base = CurDAG->getTargetFrameIndex(
1182 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1183 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1184 return true;
1187 if (N.getOpcode() == ARMISD::Wrapper &&
1188 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1189 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1190 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1191 Base = N.getOperand(0);
1192 if (Base.getOpcode() == ISD::TargetConstantPool)
1193 return false; // We want to select t2LDRpci instead.
1194 } else
1195 Base = N;
1196 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1197 return true;
1200 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1201 if (SelectT2AddrModeImm8(N, Base, OffImm))
1202 // Let t2LDRi8 handle (R - imm8).
1203 return false;
1205 int RHSC = (int)RHS->getZExtValue();
1206 if (N.getOpcode() == ISD::SUB)
1207 RHSC = -RHSC;
1209 if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned)
1210 Base = N.getOperand(0);
1211 if (Base.getOpcode() == ISD::FrameIndex) {
1212 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1213 Base = CurDAG->getTargetFrameIndex(
1214 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1216 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1217 return true;
1221 // Base only.
1222 Base = N;
1223 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1224 return true;
1227 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
1228 SDValue &Base, SDValue &OffImm) {
1229 // Match simple R - imm8 operands.
1230 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1231 !CurDAG->isBaseWithConstantOffset(N))
1232 return false;
1234 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1235 int RHSC = (int)RHS->getSExtValue();
1236 if (N.getOpcode() == ISD::SUB)
1237 RHSC = -RHSC;
1239 if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative)
1240 Base = N.getOperand(0);
1241 if (Base.getOpcode() == ISD::FrameIndex) {
1242 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1243 Base = CurDAG->getTargetFrameIndex(
1244 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1246 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1247 return true;
1251 return false;
1254 bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
1255 SDValue &OffImm){
1256 unsigned Opcode = Op->getOpcode();
1257 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
1258 ? cast<LoadSDNode>(Op)->getAddressingMode()
1259 : cast<StoreSDNode>(Op)->getAddressingMode();
1260 int RHSC;
1261 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits.
1262 OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
1263 ? CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32)
1264 : CurDAG->getTargetConstant(-RHSC, SDLoc(N), MVT::i32);
1265 return true;
1268 return false;
1271 bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
1272 SDValue &Base,
1273 SDValue &OffReg, SDValue &ShImm) {
1274 // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12.
1275 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N))
1276 return false;
1278 // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8.
1279 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1280 int RHSC = (int)RHS->getZExtValue();
1281 if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned)
1282 return false;
1283 else if (RHSC < 0 && RHSC >= -255) // 8 bits
1284 return false;
1287 // Look for (R + R) or (R + (R << [1,2,3])).
1288 unsigned ShAmt = 0;
1289 Base = N.getOperand(0);
1290 OffReg = N.getOperand(1);
1292 // Swap if it is ((R << c) + R).
1293 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg.getOpcode());
1294 if (ShOpcVal != ARM_AM::lsl) {
1295 ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode());
1296 if (ShOpcVal == ARM_AM::lsl)
1297 std::swap(Base, OffReg);
1300 if (ShOpcVal == ARM_AM::lsl) {
1301 // Check to see if the RHS of the shift is a constant, if not, we can't fold
1302 // it.
1303 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) {
1304 ShAmt = Sh->getZExtValue();
1305 if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt))
1306 OffReg = OffReg.getOperand(0);
1307 else {
1308 ShAmt = 0;
1313 // If OffReg is a multiply-by-constant and it's profitable to extract a shift
1314 // and use it in a shifted operand do so.
1315 if (OffReg.getOpcode() == ISD::MUL && N.hasOneUse()) {
1316 unsigned PowerOfTwo = 0;
1317 SDValue NewMulConst;
1318 if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) {
1319 HandleSDNode Handle(OffReg);
1320 replaceDAGValue(OffReg.getOperand(1), NewMulConst);
1321 OffReg = Handle.getValue();
1322 ShAmt = PowerOfTwo;
1326 ShImm = CurDAG->getTargetConstant(ShAmt, SDLoc(N), MVT::i32);
1328 return true;
1331 bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base,
1332 SDValue &OffImm) {
1333 // This *must* succeed since it's used for the irreplaceable ldrex and strex
1334 // instructions.
1335 Base = N;
1336 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1338 if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N))
1339 return true;
1341 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
1342 if (!RHS)
1343 return true;
1345 uint32_t RHSC = (int)RHS->getZExtValue();
1346 if (RHSC > 1020 || RHSC % 4 != 0)
1347 return true;
1349 Base = N.getOperand(0);
1350 if (Base.getOpcode() == ISD::FrameIndex) {
1351 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1352 Base = CurDAG->getTargetFrameIndex(
1353 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1356 OffImm = CurDAG->getTargetConstant(RHSC/4, SDLoc(N), MVT::i32);
1357 return true;
1360 //===--------------------------------------------------------------------===//
1362 /// getAL - Returns a ARMCC::AL immediate node.
1363 static inline SDValue getAL(SelectionDAG *CurDAG, const SDLoc &dl) {
1364 return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32);
1367 void ARMDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
1368 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
1369 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
1372 bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) {
1373 LoadSDNode *LD = cast<LoadSDNode>(N);
1374 ISD::MemIndexedMode AM = LD->getAddressingMode();
1375 if (AM == ISD::UNINDEXED)
1376 return false;
1378 EVT LoadedVT = LD->getMemoryVT();
1379 SDValue Offset, AMOpc;
1380 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1381 unsigned Opcode = 0;
1382 bool Match = false;
1383 if (LoadedVT == MVT::i32 && isPre &&
1384 SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1385 Opcode = ARM::LDR_PRE_IMM;
1386 Match = true;
1387 } else if (LoadedVT == MVT::i32 && !isPre &&
1388 SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1389 Opcode = ARM::LDR_POST_IMM;
1390 Match = true;
1391 } else if (LoadedVT == MVT::i32 &&
1392 SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1393 Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG;
1394 Match = true;
1396 } else if (LoadedVT == MVT::i16 &&
1397 SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1398 Match = true;
1399 Opcode = (LD->getExtensionType() == ISD::SEXTLOAD)
1400 ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST)
1401 : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST);
1402 } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) {
1403 if (LD->getExtensionType() == ISD::SEXTLOAD) {
1404 if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1405 Match = true;
1406 Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST;
1408 } else {
1409 if (isPre &&
1410 SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1411 Match = true;
1412 Opcode = ARM::LDRB_PRE_IMM;
1413 } else if (!isPre &&
1414 SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1415 Match = true;
1416 Opcode = ARM::LDRB_POST_IMM;
1417 } else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1418 Match = true;
1419 Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG;
1424 if (Match) {
1425 if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) {
1426 SDValue Chain = LD->getChain();
1427 SDValue Base = LD->getBasePtr();
1428 SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, SDLoc(N)),
1429 CurDAG->getRegister(0, MVT::i32), Chain };
1430 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1431 MVT::Other, Ops);
1432 transferMemOperands(N, New);
1433 ReplaceNode(N, New);
1434 return true;
1435 } else {
1436 SDValue Chain = LD->getChain();
1437 SDValue Base = LD->getBasePtr();
1438 SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, SDLoc(N)),
1439 CurDAG->getRegister(0, MVT::i32), Chain };
1440 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1441 MVT::Other, Ops);
1442 transferMemOperands(N, New);
1443 ReplaceNode(N, New);
1444 return true;
1448 return false;
1451 bool ARMDAGToDAGISel::tryT1IndexedLoad(SDNode *N) {
1452 LoadSDNode *LD = cast<LoadSDNode>(N);
1453 EVT LoadedVT = LD->getMemoryVT();
1454 ISD::MemIndexedMode AM = LD->getAddressingMode();
1455 if (AM != ISD::POST_INC || LD->getExtensionType() != ISD::NON_EXTLOAD ||
1456 LoadedVT.getSimpleVT().SimpleTy != MVT::i32)
1457 return false;
1459 auto *COffs = dyn_cast<ConstantSDNode>(LD->getOffset());
1460 if (!COffs || COffs->getZExtValue() != 4)
1461 return false;
1463 // A T1 post-indexed load is just a single register LDM: LDM r0!, {r1}.
1464 // The encoding of LDM is not how the rest of ISel expects a post-inc load to
1465 // look however, so we use a pseudo here and switch it for a tLDMIA_UPD after
1466 // ISel.
1467 SDValue Chain = LD->getChain();
1468 SDValue Base = LD->getBasePtr();
1469 SDValue Ops[]= { Base, getAL(CurDAG, SDLoc(N)),
1470 CurDAG->getRegister(0, MVT::i32), Chain };
1471 SDNode *New = CurDAG->getMachineNode(ARM::tLDR_postidx, SDLoc(N), MVT::i32,
1472 MVT::i32, MVT::Other, Ops);
1473 transferMemOperands(N, New);
1474 ReplaceNode(N, New);
1475 return true;
1478 bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) {
1479 LoadSDNode *LD = cast<LoadSDNode>(N);
1480 ISD::MemIndexedMode AM = LD->getAddressingMode();
1481 if (AM == ISD::UNINDEXED)
1482 return false;
1484 EVT LoadedVT = LD->getMemoryVT();
1485 bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1486 SDValue Offset;
1487 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1488 unsigned Opcode = 0;
1489 bool Match = false;
1490 if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) {
1491 switch (LoadedVT.getSimpleVT().SimpleTy) {
1492 case MVT::i32:
1493 Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST;
1494 break;
1495 case MVT::i16:
1496 if (isSExtLd)
1497 Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST;
1498 else
1499 Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST;
1500 break;
1501 case MVT::i8:
1502 case MVT::i1:
1503 if (isSExtLd)
1504 Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST;
1505 else
1506 Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST;
1507 break;
1508 default:
1509 return false;
1511 Match = true;
1514 if (Match) {
1515 SDValue Chain = LD->getChain();
1516 SDValue Base = LD->getBasePtr();
1517 SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)),
1518 CurDAG->getRegister(0, MVT::i32), Chain };
1519 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1520 MVT::Other, Ops);
1521 transferMemOperands(N, New);
1522 ReplaceNode(N, New);
1523 return true;
1526 return false;
1529 /// Form a GPRPair pseudo register from a pair of GPR regs.
1530 SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) {
1531 SDLoc dl(V0.getNode());
1532 SDValue RegClass =
1533 CurDAG->getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
1534 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
1535 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
1536 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1537 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1540 /// Form a D register from a pair of S registers.
1541 SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1542 SDLoc dl(V0.getNode());
1543 SDValue RegClass =
1544 CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, dl, MVT::i32);
1545 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1546 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1547 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1548 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1551 /// Form a quad register from a pair of D registers.
1552 SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1553 SDLoc dl(V0.getNode());
1554 SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, dl,
1555 MVT::i32);
1556 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1557 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1558 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1559 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1562 /// Form 4 consecutive D registers from a pair of Q registers.
1563 SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1564 SDLoc dl(V0.getNode());
1565 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1566 MVT::i32);
1567 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1568 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1569 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1570 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1573 /// Form 4 consecutive S registers.
1574 SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1,
1575 SDValue V2, SDValue V3) {
1576 SDLoc dl(V0.getNode());
1577 SDValue RegClass =
1578 CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, dl, MVT::i32);
1579 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1580 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1581 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, dl, MVT::i32);
1582 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, dl, MVT::i32);
1583 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1584 V2, SubReg2, V3, SubReg3 };
1585 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1588 /// Form 4 consecutive D registers.
1589 SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1,
1590 SDValue V2, SDValue V3) {
1591 SDLoc dl(V0.getNode());
1592 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1593 MVT::i32);
1594 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1595 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1596 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, dl, MVT::i32);
1597 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, dl, MVT::i32);
1598 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1599 V2, SubReg2, V3, SubReg3 };
1600 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1603 /// Form 4 consecutive Q registers.
1604 SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1,
1605 SDValue V2, SDValue V3) {
1606 SDLoc dl(V0.getNode());
1607 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, dl,
1608 MVT::i32);
1609 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1610 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1611 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, dl, MVT::i32);
1612 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, dl, MVT::i32);
1613 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1614 V2, SubReg2, V3, SubReg3 };
1615 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1618 /// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
1619 /// of a NEON VLD or VST instruction. The supported values depend on the
1620 /// number of registers being loaded.
1621 SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, const SDLoc &dl,
1622 unsigned NumVecs, bool is64BitVector) {
1623 unsigned NumRegs = NumVecs;
1624 if (!is64BitVector && NumVecs < 3)
1625 NumRegs *= 2;
1627 unsigned Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
1628 if (Alignment >= 32 && NumRegs == 4)
1629 Alignment = 32;
1630 else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4))
1631 Alignment = 16;
1632 else if (Alignment >= 8)
1633 Alignment = 8;
1634 else
1635 Alignment = 0;
1637 return CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
1640 static bool isVLDfixed(unsigned Opc)
1642 switch (Opc) {
1643 default: return false;
1644 case ARM::VLD1d8wb_fixed : return true;
1645 case ARM::VLD1d16wb_fixed : return true;
1646 case ARM::VLD1d64Qwb_fixed : return true;
1647 case ARM::VLD1d32wb_fixed : return true;
1648 case ARM::VLD1d64wb_fixed : return true;
1649 case ARM::VLD1d64TPseudoWB_fixed : return true;
1650 case ARM::VLD1d64QPseudoWB_fixed : return true;
1651 case ARM::VLD1q8wb_fixed : return true;
1652 case ARM::VLD1q16wb_fixed : return true;
1653 case ARM::VLD1q32wb_fixed : return true;
1654 case ARM::VLD1q64wb_fixed : return true;
1655 case ARM::VLD1DUPd8wb_fixed : return true;
1656 case ARM::VLD1DUPd16wb_fixed : return true;
1657 case ARM::VLD1DUPd32wb_fixed : return true;
1658 case ARM::VLD1DUPq8wb_fixed : return true;
1659 case ARM::VLD1DUPq16wb_fixed : return true;
1660 case ARM::VLD1DUPq32wb_fixed : return true;
1661 case ARM::VLD2d8wb_fixed : return true;
1662 case ARM::VLD2d16wb_fixed : return true;
1663 case ARM::VLD2d32wb_fixed : return true;
1664 case ARM::VLD2q8PseudoWB_fixed : return true;
1665 case ARM::VLD2q16PseudoWB_fixed : return true;
1666 case ARM::VLD2q32PseudoWB_fixed : return true;
1667 case ARM::VLD2DUPd8wb_fixed : return true;
1668 case ARM::VLD2DUPd16wb_fixed : return true;
1669 case ARM::VLD2DUPd32wb_fixed : return true;
1673 static bool isVSTfixed(unsigned Opc)
1675 switch (Opc) {
1676 default: return false;
1677 case ARM::VST1d8wb_fixed : return true;
1678 case ARM::VST1d16wb_fixed : return true;
1679 case ARM::VST1d32wb_fixed : return true;
1680 case ARM::VST1d64wb_fixed : return true;
1681 case ARM::VST1q8wb_fixed : return true;
1682 case ARM::VST1q16wb_fixed : return true;
1683 case ARM::VST1q32wb_fixed : return true;
1684 case ARM::VST1q64wb_fixed : return true;
1685 case ARM::VST1d64TPseudoWB_fixed : return true;
1686 case ARM::VST1d64QPseudoWB_fixed : return true;
1687 case ARM::VST2d8wb_fixed : return true;
1688 case ARM::VST2d16wb_fixed : return true;
1689 case ARM::VST2d32wb_fixed : return true;
1690 case ARM::VST2q8PseudoWB_fixed : return true;
1691 case ARM::VST2q16PseudoWB_fixed : return true;
1692 case ARM::VST2q32PseudoWB_fixed : return true;
1696 // Get the register stride update opcode of a VLD/VST instruction that
1697 // is otherwise equivalent to the given fixed stride updating instruction.
1698 static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
1699 assert((isVLDfixed(Opc) || isVSTfixed(Opc))
1700 && "Incorrect fixed stride updating instruction.");
1701 switch (Opc) {
1702 default: break;
1703 case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register;
1704 case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register;
1705 case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register;
1706 case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register;
1707 case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register;
1708 case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register;
1709 case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register;
1710 case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register;
1711 case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register;
1712 case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register;
1713 case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register;
1714 case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register;
1715 case ARM::VLD1DUPd8wb_fixed : return ARM::VLD1DUPd8wb_register;
1716 case ARM::VLD1DUPd16wb_fixed : return ARM::VLD1DUPd16wb_register;
1717 case ARM::VLD1DUPd32wb_fixed : return ARM::VLD1DUPd32wb_register;
1718 case ARM::VLD1DUPq8wb_fixed : return ARM::VLD1DUPq8wb_register;
1719 case ARM::VLD1DUPq16wb_fixed : return ARM::VLD1DUPq16wb_register;
1720 case ARM::VLD1DUPq32wb_fixed : return ARM::VLD1DUPq32wb_register;
1722 case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register;
1723 case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register;
1724 case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register;
1725 case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register;
1726 case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register;
1727 case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register;
1728 case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register;
1729 case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register;
1730 case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register;
1731 case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register;
1733 case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register;
1734 case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register;
1735 case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register;
1736 case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register;
1737 case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register;
1738 case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register;
1740 case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register;
1741 case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register;
1742 case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register;
1743 case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register;
1744 case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register;
1745 case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register;
1747 case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register;
1748 case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register;
1749 case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register;
1751 return Opc; // If not one we handle, return it unchanged.
1754 /// Returns true if the given increment is a Constant known to be equal to the
1755 /// access size performed by a NEON load/store. This means the "[rN]!" form can
1756 /// be used.
1757 static bool isPerfectIncrement(SDValue Inc, EVT VecTy, unsigned NumVecs) {
1758 auto C = dyn_cast<ConstantSDNode>(Inc);
1759 return C && C->getZExtValue() == VecTy.getSizeInBits() / 8 * NumVecs;
1762 void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
1763 const uint16_t *DOpcodes,
1764 const uint16_t *QOpcodes0,
1765 const uint16_t *QOpcodes1) {
1766 assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
1767 SDLoc dl(N);
1769 SDValue MemAddr, Align;
1770 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
1771 // nodes are not intrinsics.
1772 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
1773 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
1774 return;
1776 SDValue Chain = N->getOperand(0);
1777 EVT VT = N->getValueType(0);
1778 bool is64BitVector = VT.is64BitVector();
1779 Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
1781 unsigned OpcodeIndex;
1782 switch (VT.getSimpleVT().SimpleTy) {
1783 default: llvm_unreachable("unhandled vld type");
1784 // Double-register operations:
1785 case MVT::v8i8: OpcodeIndex = 0; break;
1786 case MVT::v4f16:
1787 case MVT::v4i16: OpcodeIndex = 1; break;
1788 case MVT::v2f32:
1789 case MVT::v2i32: OpcodeIndex = 2; break;
1790 case MVT::v1i64: OpcodeIndex = 3; break;
1791 // Quad-register operations:
1792 case MVT::v16i8: OpcodeIndex = 0; break;
1793 case MVT::v8f16:
1794 case MVT::v8i16: OpcodeIndex = 1; break;
1795 case MVT::v4f32:
1796 case MVT::v4i32: OpcodeIndex = 2; break;
1797 case MVT::v2f64:
1798 case MVT::v2i64: OpcodeIndex = 3; break;
1801 EVT ResTy;
1802 if (NumVecs == 1)
1803 ResTy = VT;
1804 else {
1805 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
1806 if (!is64BitVector)
1807 ResTyElts *= 2;
1808 ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
1810 std::vector<EVT> ResTys;
1811 ResTys.push_back(ResTy);
1812 if (isUpdating)
1813 ResTys.push_back(MVT::i32);
1814 ResTys.push_back(MVT::Other);
1816 SDValue Pred = getAL(CurDAG, dl);
1817 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
1818 SDNode *VLd;
1819 SmallVector<SDValue, 7> Ops;
1821 // Double registers and VLD1/VLD2 quad registers are directly supported.
1822 if (is64BitVector || NumVecs <= 2) {
1823 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
1824 QOpcodes0[OpcodeIndex]);
1825 Ops.push_back(MemAddr);
1826 Ops.push_back(Align);
1827 if (isUpdating) {
1828 SDValue Inc = N->getOperand(AddrOpIdx + 1);
1829 bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
1830 if (!IsImmUpdate) {
1831 // We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
1832 // check for the opcode rather than the number of vector elements.
1833 if (isVLDfixed(Opc))
1834 Opc = getVLDSTRegisterUpdateOpcode(Opc);
1835 Ops.push_back(Inc);
1836 // VLD1/VLD2 fixed increment does not need Reg0 so only include it in
1837 // the operands if not such an opcode.
1838 } else if (!isVLDfixed(Opc))
1839 Ops.push_back(Reg0);
1841 Ops.push_back(Pred);
1842 Ops.push_back(Reg0);
1843 Ops.push_back(Chain);
1844 VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1846 } else {
1847 // Otherwise, quad registers are loaded with two separate instructions,
1848 // where one loads the even registers and the other loads the odd registers.
1849 EVT AddrTy = MemAddr.getValueType();
1851 // Load the even subregs. This is always an updating load, so that it
1852 // provides the address to the second load for the odd subregs.
1853 SDValue ImplDef =
1854 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
1855 const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
1856 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
1857 ResTy, AddrTy, MVT::Other, OpsA);
1858 Chain = SDValue(VLdA, 2);
1860 // Load the odd subregs.
1861 Ops.push_back(SDValue(VLdA, 1));
1862 Ops.push_back(Align);
1863 if (isUpdating) {
1864 SDValue Inc = N->getOperand(AddrOpIdx + 1);
1865 assert(isa<ConstantSDNode>(Inc.getNode()) &&
1866 "only constant post-increment update allowed for VLD3/4");
1867 (void)Inc;
1868 Ops.push_back(Reg0);
1870 Ops.push_back(SDValue(VLdA, 0));
1871 Ops.push_back(Pred);
1872 Ops.push_back(Reg0);
1873 Ops.push_back(Chain);
1874 VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops);
1877 // Transfer memoperands.
1878 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1879 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLd), {MemOp});
1881 if (NumVecs == 1) {
1882 ReplaceNode(N, VLd);
1883 return;
1886 // Extract out the subregisters.
1887 SDValue SuperReg = SDValue(VLd, 0);
1888 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
1889 ARM::qsub_3 == ARM::qsub_0 + 3,
1890 "Unexpected subreg numbering");
1891 unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0);
1892 for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
1893 ReplaceUses(SDValue(N, Vec),
1894 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
1895 ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
1896 if (isUpdating)
1897 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2));
1898 CurDAG->RemoveDeadNode(N);
1901 void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
1902 const uint16_t *DOpcodes,
1903 const uint16_t *QOpcodes0,
1904 const uint16_t *QOpcodes1) {
1905 assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
1906 SDLoc dl(N);
1908 SDValue MemAddr, Align;
1909 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
1910 // nodes are not intrinsics.
1911 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
1912 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
1913 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
1914 return;
1916 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1918 SDValue Chain = N->getOperand(0);
1919 EVT VT = N->getOperand(Vec0Idx).getValueType();
1920 bool is64BitVector = VT.is64BitVector();
1921 Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
1923 unsigned OpcodeIndex;
1924 switch (VT.getSimpleVT().SimpleTy) {
1925 default: llvm_unreachable("unhandled vst type");
1926 // Double-register operations:
1927 case MVT::v8i8: OpcodeIndex = 0; break;
1928 case MVT::v4f16:
1929 case MVT::v4i16: OpcodeIndex = 1; break;
1930 case MVT::v2f32:
1931 case MVT::v2i32: OpcodeIndex = 2; break;
1932 case MVT::v1i64: OpcodeIndex = 3; break;
1933 // Quad-register operations:
1934 case MVT::v16i8: OpcodeIndex = 0; break;
1935 case MVT::v8f16:
1936 case MVT::v8i16: OpcodeIndex = 1; break;
1937 case MVT::v4f32:
1938 case MVT::v4i32: OpcodeIndex = 2; break;
1939 case MVT::v2f64:
1940 case MVT::v2i64: OpcodeIndex = 3; break;
1943 std::vector<EVT> ResTys;
1944 if (isUpdating)
1945 ResTys.push_back(MVT::i32);
1946 ResTys.push_back(MVT::Other);
1948 SDValue Pred = getAL(CurDAG, dl);
1949 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
1950 SmallVector<SDValue, 7> Ops;
1952 // Double registers and VST1/VST2 quad registers are directly supported.
1953 if (is64BitVector || NumVecs <= 2) {
1954 SDValue SrcReg;
1955 if (NumVecs == 1) {
1956 SrcReg = N->getOperand(Vec0Idx);
1957 } else if (is64BitVector) {
1958 // Form a REG_SEQUENCE to force register allocation.
1959 SDValue V0 = N->getOperand(Vec0Idx + 0);
1960 SDValue V1 = N->getOperand(Vec0Idx + 1);
1961 if (NumVecs == 2)
1962 SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
1963 else {
1964 SDValue V2 = N->getOperand(Vec0Idx + 2);
1965 // If it's a vst3, form a quad D-register and leave the last part as
1966 // an undef.
1967 SDValue V3 = (NumVecs == 3)
1968 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
1969 : N->getOperand(Vec0Idx + 3);
1970 SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
1972 } else {
1973 // Form a QQ register.
1974 SDValue Q0 = N->getOperand(Vec0Idx);
1975 SDValue Q1 = N->getOperand(Vec0Idx + 1);
1976 SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0);
1979 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
1980 QOpcodes0[OpcodeIndex]);
1981 Ops.push_back(MemAddr);
1982 Ops.push_back(Align);
1983 if (isUpdating) {
1984 SDValue Inc = N->getOperand(AddrOpIdx + 1);
1985 bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
1986 if (!IsImmUpdate) {
1987 // We use a VST1 for v1i64 even if the pseudo says VST2/3/4, so
1988 // check for the opcode rather than the number of vector elements.
1989 if (isVSTfixed(Opc))
1990 Opc = getVLDSTRegisterUpdateOpcode(Opc);
1991 Ops.push_back(Inc);
1993 // VST1/VST2 fixed increment does not need Reg0 so only include it in
1994 // the operands if not such an opcode.
1995 else if (!isVSTfixed(Opc))
1996 Ops.push_back(Reg0);
1998 Ops.push_back(SrcReg);
1999 Ops.push_back(Pred);
2000 Ops.push_back(Reg0);
2001 Ops.push_back(Chain);
2002 SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2004 // Transfer memoperands.
2005 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VSt), {MemOp});
2007 ReplaceNode(N, VSt);
2008 return;
2011 // Otherwise, quad registers are stored with two separate instructions,
2012 // where one stores the even registers and the other stores the odd registers.
2014 // Form the QQQQ REG_SEQUENCE.
2015 SDValue V0 = N->getOperand(Vec0Idx + 0);
2016 SDValue V1 = N->getOperand(Vec0Idx + 1);
2017 SDValue V2 = N->getOperand(Vec0Idx + 2);
2018 SDValue V3 = (NumVecs == 3)
2019 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2020 : N->getOperand(Vec0Idx + 3);
2021 SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2023 // Store the even D registers. This is always an updating store, so that it
2024 // provides the address to the second store for the odd subregs.
2025 const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain };
2026 SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
2027 MemAddr.getValueType(),
2028 MVT::Other, OpsA);
2029 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStA), {MemOp});
2030 Chain = SDValue(VStA, 1);
2032 // Store the odd D registers.
2033 Ops.push_back(SDValue(VStA, 0));
2034 Ops.push_back(Align);
2035 if (isUpdating) {
2036 SDValue Inc = N->getOperand(AddrOpIdx + 1);
2037 assert(isa<ConstantSDNode>(Inc.getNode()) &&
2038 "only constant post-increment update allowed for VST3/4");
2039 (void)Inc;
2040 Ops.push_back(Reg0);
2042 Ops.push_back(RegSeq);
2043 Ops.push_back(Pred);
2044 Ops.push_back(Reg0);
2045 Ops.push_back(Chain);
2046 SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
2047 Ops);
2048 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStB), {MemOp});
2049 ReplaceNode(N, VStB);
2052 void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
2053 unsigned NumVecs,
2054 const uint16_t *DOpcodes,
2055 const uint16_t *QOpcodes) {
2056 assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
2057 SDLoc dl(N);
2059 SDValue MemAddr, Align;
2060 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
2061 // nodes are not intrinsics.
2062 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2063 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2064 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2065 return;
2067 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2069 SDValue Chain = N->getOperand(0);
2070 unsigned Lane =
2071 cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue();
2072 EVT VT = N->getOperand(Vec0Idx).getValueType();
2073 bool is64BitVector = VT.is64BitVector();
2075 unsigned Alignment = 0;
2076 if (NumVecs != 3) {
2077 Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
2078 unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2079 if (Alignment > NumBytes)
2080 Alignment = NumBytes;
2081 if (Alignment < 8 && Alignment < NumBytes)
2082 Alignment = 0;
2083 // Alignment must be a power of two; make sure of that.
2084 Alignment = (Alignment & -Alignment);
2085 if (Alignment == 1)
2086 Alignment = 0;
2088 Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2090 unsigned OpcodeIndex;
2091 switch (VT.getSimpleVT().SimpleTy) {
2092 default: llvm_unreachable("unhandled vld/vst lane type");
2093 // Double-register operations:
2094 case MVT::v8i8: OpcodeIndex = 0; break;
2095 case MVT::v4f16:
2096 case MVT::v4i16: OpcodeIndex = 1; break;
2097 case MVT::v2f32:
2098 case MVT::v2i32: OpcodeIndex = 2; break;
2099 // Quad-register operations:
2100 case MVT::v8f16:
2101 case MVT::v8i16: OpcodeIndex = 0; break;
2102 case MVT::v4f32:
2103 case MVT::v4i32: OpcodeIndex = 1; break;
2106 std::vector<EVT> ResTys;
2107 if (IsLoad) {
2108 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2109 if (!is64BitVector)
2110 ResTyElts *= 2;
2111 ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(),
2112 MVT::i64, ResTyElts));
2114 if (isUpdating)
2115 ResTys.push_back(MVT::i32);
2116 ResTys.push_back(MVT::Other);
2118 SDValue Pred = getAL(CurDAG, dl);
2119 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2121 SmallVector<SDValue, 8> Ops;
2122 Ops.push_back(MemAddr);
2123 Ops.push_back(Align);
2124 if (isUpdating) {
2125 SDValue Inc = N->getOperand(AddrOpIdx + 1);
2126 bool IsImmUpdate =
2127 isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
2128 Ops.push_back(IsImmUpdate ? Reg0 : Inc);
2131 SDValue SuperReg;
2132 SDValue V0 = N->getOperand(Vec0Idx + 0);
2133 SDValue V1 = N->getOperand(Vec0Idx + 1);
2134 if (NumVecs == 2) {
2135 if (is64BitVector)
2136 SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2137 else
2138 SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0);
2139 } else {
2140 SDValue V2 = N->getOperand(Vec0Idx + 2);
2141 SDValue V3 = (NumVecs == 3)
2142 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2143 : N->getOperand(Vec0Idx + 3);
2144 if (is64BitVector)
2145 SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2146 else
2147 SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2149 Ops.push_back(SuperReg);
2150 Ops.push_back(getI32Imm(Lane, dl));
2151 Ops.push_back(Pred);
2152 Ops.push_back(Reg0);
2153 Ops.push_back(Chain);
2155 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2156 QOpcodes[OpcodeIndex]);
2157 SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2158 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdLn), {MemOp});
2159 if (!IsLoad) {
2160 ReplaceNode(N, VLdLn);
2161 return;
2164 // Extract the subregisters.
2165 SuperReg = SDValue(VLdLn, 0);
2166 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
2167 ARM::qsub_3 == ARM::qsub_0 + 3,
2168 "Unexpected subreg numbering");
2169 unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
2170 for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2171 ReplaceUses(SDValue(N, Vec),
2172 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
2173 ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1));
2174 if (isUpdating)
2175 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2));
2176 CurDAG->RemoveDeadNode(N);
2179 void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool IsIntrinsic,
2180 bool isUpdating, unsigned NumVecs,
2181 const uint16_t *DOpcodes,
2182 const uint16_t *QOpcodes0,
2183 const uint16_t *QOpcodes1) {
2184 assert(NumVecs >= 1 && NumVecs <= 4 && "VLDDup NumVecs out-of-range");
2185 SDLoc dl(N);
2187 SDValue MemAddr, Align;
2188 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2189 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2190 return;
2192 SDValue Chain = N->getOperand(0);
2193 EVT VT = N->getValueType(0);
2194 bool is64BitVector = VT.is64BitVector();
2196 unsigned Alignment = 0;
2197 if (NumVecs != 3) {
2198 Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
2199 unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2200 if (Alignment > NumBytes)
2201 Alignment = NumBytes;
2202 if (Alignment < 8 && Alignment < NumBytes)
2203 Alignment = 0;
2204 // Alignment must be a power of two; make sure of that.
2205 Alignment = (Alignment & -Alignment);
2206 if (Alignment == 1)
2207 Alignment = 0;
2209 Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2211 unsigned OpcodeIndex;
2212 switch (VT.getSimpleVT().SimpleTy) {
2213 default: llvm_unreachable("unhandled vld-dup type");
2214 case MVT::v8i8:
2215 case MVT::v16i8: OpcodeIndex = 0; break;
2216 case MVT::v4i16:
2217 case MVT::v8i16:
2218 case MVT::v4f16:
2219 case MVT::v8f16:
2220 OpcodeIndex = 1; break;
2221 case MVT::v2f32:
2222 case MVT::v2i32:
2223 case MVT::v4f32:
2224 case MVT::v4i32: OpcodeIndex = 2; break;
2225 case MVT::v1f64:
2226 case MVT::v1i64: OpcodeIndex = 3; break;
2229 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2230 if (!is64BitVector)
2231 ResTyElts *= 2;
2232 EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
2234 std::vector<EVT> ResTys;
2235 ResTys.push_back(ResTy);
2236 if (isUpdating)
2237 ResTys.push_back(MVT::i32);
2238 ResTys.push_back(MVT::Other);
2240 SDValue Pred = getAL(CurDAG, dl);
2241 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2243 SDNode *VLdDup;
2244 if (is64BitVector || NumVecs == 1) {
2245 SmallVector<SDValue, 6> Ops;
2246 Ops.push_back(MemAddr);
2247 Ops.push_back(Align);
2248 unsigned Opc = is64BitVector ? DOpcodes[OpcodeIndex] :
2249 QOpcodes0[OpcodeIndex];
2250 if (isUpdating) {
2251 // fixed-stride update instructions don't have an explicit writeback
2252 // operand. It's implicit in the opcode itself.
2253 SDValue Inc = N->getOperand(2);
2254 bool IsImmUpdate =
2255 isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
2256 if (NumVecs <= 2 && !IsImmUpdate)
2257 Opc = getVLDSTRegisterUpdateOpcode(Opc);
2258 if (!IsImmUpdate)
2259 Ops.push_back(Inc);
2260 // FIXME: VLD3 and VLD4 haven't been updated to that form yet.
2261 else if (NumVecs > 2)
2262 Ops.push_back(Reg0);
2264 Ops.push_back(Pred);
2265 Ops.push_back(Reg0);
2266 Ops.push_back(Chain);
2267 VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2268 } else if (NumVecs == 2) {
2269 const SDValue OpsA[] = { MemAddr, Align, Pred, Reg0, Chain };
2270 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex],
2271 dl, ResTys, OpsA);
2273 Chain = SDValue(VLdA, 1);
2274 const SDValue OpsB[] = { MemAddr, Align, Pred, Reg0, Chain };
2275 VLdDup = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, OpsB);
2276 } else {
2277 SDValue ImplDef =
2278 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
2279 const SDValue OpsA[] = { MemAddr, Align, ImplDef, Pred, Reg0, Chain };
2280 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex],
2281 dl, ResTys, OpsA);
2283 SDValue SuperReg = SDValue(VLdA, 0);
2284 Chain = SDValue(VLdA, 1);
2285 const SDValue OpsB[] = { MemAddr, Align, SuperReg, Pred, Reg0, Chain };
2286 VLdDup = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, OpsB);
2289 // Transfer memoperands.
2290 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2291 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdDup), {MemOp});
2293 // Extract the subregisters.
2294 if (NumVecs == 1) {
2295 ReplaceUses(SDValue(N, 0), SDValue(VLdDup, 0));
2296 } else {
2297 SDValue SuperReg = SDValue(VLdDup, 0);
2298 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7, "Unexpected subreg numbering");
2299 unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
2300 for (unsigned Vec = 0; Vec != NumVecs; ++Vec) {
2301 ReplaceUses(SDValue(N, Vec),
2302 CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg));
2305 ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1));
2306 if (isUpdating)
2307 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2));
2308 CurDAG->RemoveDeadNode(N);
2311 bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) {
2312 if (!Subtarget->hasV6T2Ops())
2313 return false;
2315 unsigned Opc = isSigned
2316 ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)
2317 : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX);
2318 SDLoc dl(N);
2320 // For unsigned extracts, check for a shift right and mask
2321 unsigned And_imm = 0;
2322 if (N->getOpcode() == ISD::AND) {
2323 if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) {
2325 // The immediate is a mask of the low bits iff imm & (imm+1) == 0
2326 if (And_imm & (And_imm + 1))
2327 return false;
2329 unsigned Srl_imm = 0;
2330 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL,
2331 Srl_imm)) {
2332 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2334 // Mask off the unnecessary bits of the AND immediate; normally
2335 // DAGCombine will do this, but that might not happen if
2336 // targetShrinkDemandedConstant chooses a different immediate.
2337 And_imm &= -1U >> Srl_imm;
2339 // Note: The width operand is encoded as width-1.
2340 unsigned Width = countTrailingOnes(And_imm) - 1;
2341 unsigned LSB = Srl_imm;
2343 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2345 if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) {
2346 // It's cheaper to use a right shift to extract the top bits.
2347 if (Subtarget->isThumb()) {
2348 Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri;
2349 SDValue Ops[] = { N->getOperand(0).getOperand(0),
2350 CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2351 getAL(CurDAG, dl), Reg0, Reg0 };
2352 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2353 return true;
2356 // ARM models shift instructions as MOVsi with shifter operand.
2357 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(ISD::SRL);
2358 SDValue ShOpc =
2359 CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB), dl,
2360 MVT::i32);
2361 SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc,
2362 getAL(CurDAG, dl), Reg0, Reg0 };
2363 CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops);
2364 return true;
2367 assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
2368 SDValue Ops[] = { N->getOperand(0).getOperand(0),
2369 CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2370 CurDAG->getTargetConstant(Width, dl, MVT::i32),
2371 getAL(CurDAG, dl), Reg0 };
2372 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2373 return true;
2376 return false;
2379 // Otherwise, we're looking for a shift of a shift
2380 unsigned Shl_imm = 0;
2381 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) {
2382 assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!");
2383 unsigned Srl_imm = 0;
2384 if (isInt32Immediate(N->getOperand(1), Srl_imm)) {
2385 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2386 // Note: The width operand is encoded as width-1.
2387 unsigned Width = 32 - Srl_imm - 1;
2388 int LSB = Srl_imm - Shl_imm;
2389 if (LSB < 0)
2390 return false;
2391 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2392 assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
2393 SDValue Ops[] = { N->getOperand(0).getOperand(0),
2394 CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2395 CurDAG->getTargetConstant(Width, dl, MVT::i32),
2396 getAL(CurDAG, dl), Reg0 };
2397 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2398 return true;
2402 // Or we are looking for a shift of an and, with a mask operand
2403 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_imm) &&
2404 isShiftedMask_32(And_imm)) {
2405 unsigned Srl_imm = 0;
2406 unsigned LSB = countTrailingZeros(And_imm);
2407 // Shift must be the same as the ands lsb
2408 if (isInt32Immediate(N->getOperand(1), Srl_imm) && Srl_imm == LSB) {
2409 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2410 unsigned MSB = 31 - countLeadingZeros(And_imm);
2411 // Note: The width operand is encoded as width-1.
2412 unsigned Width = MSB - LSB;
2413 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2414 assert(Srl_imm + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
2415 SDValue Ops[] = { N->getOperand(0).getOperand(0),
2416 CurDAG->getTargetConstant(Srl_imm, dl, MVT::i32),
2417 CurDAG->getTargetConstant(Width, dl, MVT::i32),
2418 getAL(CurDAG, dl), Reg0 };
2419 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2420 return true;
2424 if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) {
2425 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2426 unsigned LSB = 0;
2427 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, LSB) &&
2428 !isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRA, LSB))
2429 return false;
2431 if (LSB + Width > 32)
2432 return false;
2434 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2435 assert(LSB + Width <= 32 && "Shouldn't create an invalid ubfx");
2436 SDValue Ops[] = { N->getOperand(0).getOperand(0),
2437 CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2438 CurDAG->getTargetConstant(Width - 1, dl, MVT::i32),
2439 getAL(CurDAG, dl), Reg0 };
2440 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2441 return true;
2444 return false;
2447 /// Target-specific DAG combining for ISD::XOR.
2448 /// Target-independent combining lowers SELECT_CC nodes of the form
2449 /// select_cc setg[ge] X, 0, X, -X
2450 /// select_cc setgt X, -1, X, -X
2451 /// select_cc setl[te] X, 0, -X, X
2452 /// select_cc setlt X, 1, -X, X
2453 /// which represent Integer ABS into:
2454 /// Y = sra (X, size(X)-1); xor (add (X, Y), Y)
2455 /// ARM instruction selection detects the latter and matches it to
2456 /// ARM::ABS or ARM::t2ABS machine node.
2457 bool ARMDAGToDAGISel::tryABSOp(SDNode *N){
2458 SDValue XORSrc0 = N->getOperand(0);
2459 SDValue XORSrc1 = N->getOperand(1);
2460 EVT VT = N->getValueType(0);
2462 if (Subtarget->isThumb1Only())
2463 return false;
2465 if (XORSrc0.getOpcode() != ISD::ADD || XORSrc1.getOpcode() != ISD::SRA)
2466 return false;
2468 SDValue ADDSrc0 = XORSrc0.getOperand(0);
2469 SDValue ADDSrc1 = XORSrc0.getOperand(1);
2470 SDValue SRASrc0 = XORSrc1.getOperand(0);
2471 SDValue SRASrc1 = XORSrc1.getOperand(1);
2472 ConstantSDNode *SRAConstant = dyn_cast<ConstantSDNode>(SRASrc1);
2473 EVT XType = SRASrc0.getValueType();
2474 unsigned Size = XType.getSizeInBits() - 1;
2476 if (ADDSrc1 == XORSrc1 && ADDSrc0 == SRASrc0 &&
2477 XType.isInteger() && SRAConstant != nullptr &&
2478 Size == SRAConstant->getZExtValue()) {
2479 unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS;
2480 CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0);
2481 return true;
2484 return false;
2487 /// We've got special pseudo-instructions for these
2488 void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
2489 unsigned Opcode;
2490 EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
2491 if (MemTy == MVT::i8)
2492 Opcode = ARM::CMP_SWAP_8;
2493 else if (MemTy == MVT::i16)
2494 Opcode = ARM::CMP_SWAP_16;
2495 else if (MemTy == MVT::i32)
2496 Opcode = ARM::CMP_SWAP_32;
2497 else
2498 llvm_unreachable("Unknown AtomicCmpSwap type");
2500 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
2501 N->getOperand(0)};
2502 SDNode *CmpSwap = CurDAG->getMachineNode(
2503 Opcode, SDLoc(N),
2504 CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other), Ops);
2506 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
2507 CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
2509 ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
2510 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
2511 CurDAG->RemoveDeadNode(N);
2514 static Optional<std::pair<unsigned, unsigned>>
2515 getContiguousRangeOfSetBits(const APInt &A) {
2516 unsigned FirstOne = A.getBitWidth() - A.countLeadingZeros() - 1;
2517 unsigned LastOne = A.countTrailingZeros();
2518 if (A.countPopulation() != (FirstOne - LastOne + 1))
2519 return Optional<std::pair<unsigned,unsigned>>();
2520 return std::make_pair(FirstOne, LastOne);
2523 void ARMDAGToDAGISel::SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI) {
2524 assert(N->getOpcode() == ARMISD::CMPZ);
2525 SwitchEQNEToPLMI = false;
2527 if (!Subtarget->isThumb())
2528 // FIXME: Work out whether it is profitable to do this in A32 mode - LSL and
2529 // LSR don't exist as standalone instructions - they need the barrel shifter.
2530 return;
2532 // select (cmpz (and X, C), #0) -> (LSLS X) or (LSRS X) or (LSRS (LSLS X))
2533 SDValue And = N->getOperand(0);
2534 if (!And->hasOneUse())
2535 return;
2537 SDValue Zero = N->getOperand(1);
2538 if (!isa<ConstantSDNode>(Zero) || !cast<ConstantSDNode>(Zero)->isNullValue() ||
2539 And->getOpcode() != ISD::AND)
2540 return;
2541 SDValue X = And.getOperand(0);
2542 auto C = dyn_cast<ConstantSDNode>(And.getOperand(1));
2544 if (!C)
2545 return;
2546 auto Range = getContiguousRangeOfSetBits(C->getAPIntValue());
2547 if (!Range)
2548 return;
2550 // There are several ways to lower this:
2551 SDNode *NewN;
2552 SDLoc dl(N);
2554 auto EmitShift = [&](unsigned Opc, SDValue Src, unsigned Imm) -> SDNode* {
2555 if (Subtarget->isThumb2()) {
2556 Opc = (Opc == ARM::tLSLri) ? ARM::t2LSLri : ARM::t2LSRri;
2557 SDValue Ops[] = { Src, CurDAG->getTargetConstant(Imm, dl, MVT::i32),
2558 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
2559 CurDAG->getRegister(0, MVT::i32) };
2560 return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
2561 } else {
2562 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), Src,
2563 CurDAG->getTargetConstant(Imm, dl, MVT::i32),
2564 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
2565 return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
2569 if (Range->second == 0) {
2570 // 1. Mask includes the LSB -> Simply shift the top N bits off
2571 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
2572 ReplaceNode(And.getNode(), NewN);
2573 } else if (Range->first == 31) {
2574 // 2. Mask includes the MSB -> Simply shift the bottom N bits off
2575 NewN = EmitShift(ARM::tLSRri, X, Range->second);
2576 ReplaceNode(And.getNode(), NewN);
2577 } else if (Range->first == Range->second) {
2578 // 3. Only one bit is set. We can shift this into the sign bit and use a
2579 // PL/MI comparison.
2580 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
2581 ReplaceNode(And.getNode(), NewN);
2583 SwitchEQNEToPLMI = true;
2584 } else if (!Subtarget->hasV6T2Ops()) {
2585 // 4. Do a double shift to clear bottom and top bits, but only in
2586 // thumb-1 mode as in thumb-2 we can use UBFX.
2587 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
2588 NewN = EmitShift(ARM::tLSRri, SDValue(NewN, 0),
2589 Range->second + (31 - Range->first));
2590 ReplaceNode(And.getNode(), NewN);
2595 void ARMDAGToDAGISel::Select(SDNode *N) {
2596 SDLoc dl(N);
2598 if (N->isMachineOpcode()) {
2599 N->setNodeId(-1);
2600 return; // Already selected.
2603 switch (N->getOpcode()) {
2604 default: break;
2605 case ISD::STORE: {
2606 // For Thumb1, match an sp-relative store in C++. This is a little
2607 // unfortunate, but I don't think I can make the chain check work
2608 // otherwise. (The chain of the store has to be the same as the chain
2609 // of the CopyFromReg, or else we can't replace the CopyFromReg with
2610 // a direct reference to "SP".)
2612 // This is only necessary on Thumb1 because Thumb1 sp-relative stores use
2613 // a different addressing mode from other four-byte stores.
2615 // This pattern usually comes up with call arguments.
2616 StoreSDNode *ST = cast<StoreSDNode>(N);
2617 SDValue Ptr = ST->getBasePtr();
2618 if (Subtarget->isThumb1Only() && ST->isUnindexed()) {
2619 int RHSC = 0;
2620 if (Ptr.getOpcode() == ISD::ADD &&
2621 isScaledConstantInRange(Ptr.getOperand(1), /*Scale=*/4, 0, 256, RHSC))
2622 Ptr = Ptr.getOperand(0);
2624 if (Ptr.getOpcode() == ISD::CopyFromReg &&
2625 cast<RegisterSDNode>(Ptr.getOperand(1))->getReg() == ARM::SP &&
2626 Ptr.getOperand(0) == ST->getChain()) {
2627 SDValue Ops[] = {ST->getValue(),
2628 CurDAG->getRegister(ARM::SP, MVT::i32),
2629 CurDAG->getTargetConstant(RHSC, dl, MVT::i32),
2630 getAL(CurDAG, dl),
2631 CurDAG->getRegister(0, MVT::i32),
2632 ST->getChain()};
2633 MachineSDNode *ResNode =
2634 CurDAG->getMachineNode(ARM::tSTRspi, dl, MVT::Other, Ops);
2635 MachineMemOperand *MemOp = ST->getMemOperand();
2636 CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp});
2637 ReplaceNode(N, ResNode);
2638 return;
2641 break;
2643 case ISD::WRITE_REGISTER:
2644 if (tryWriteRegister(N))
2645 return;
2646 break;
2647 case ISD::READ_REGISTER:
2648 if (tryReadRegister(N))
2649 return;
2650 break;
2651 case ISD::INLINEASM:
2652 case ISD::INLINEASM_BR:
2653 if (tryInlineAsm(N))
2654 return;
2655 break;
2656 case ISD::XOR:
2657 // Select special operations if XOR node forms integer ABS pattern
2658 if (tryABSOp(N))
2659 return;
2660 // Other cases are autogenerated.
2661 break;
2662 case ISD::Constant: {
2663 unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
2664 // If we can't materialize the constant we need to use a literal pool
2665 if (ConstantMaterializationCost(Val) > 2) {
2666 SDValue CPIdx = CurDAG->getTargetConstantPool(
2667 ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val),
2668 TLI->getPointerTy(CurDAG->getDataLayout()));
2670 SDNode *ResNode;
2671 if (Subtarget->isThumb()) {
2672 SDValue Ops[] = {
2673 CPIdx,
2674 getAL(CurDAG, dl),
2675 CurDAG->getRegister(0, MVT::i32),
2676 CurDAG->getEntryNode()
2678 ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other,
2679 Ops);
2680 } else {
2681 SDValue Ops[] = {
2682 CPIdx,
2683 CurDAG->getTargetConstant(0, dl, MVT::i32),
2684 getAL(CurDAG, dl),
2685 CurDAG->getRegister(0, MVT::i32),
2686 CurDAG->getEntryNode()
2688 ResNode = CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other,
2689 Ops);
2691 // Annotate the Node with memory operand information so that MachineInstr
2692 // queries work properly. This e.g. gives the register allocation the
2693 // required information for rematerialization.
2694 MachineFunction& MF = CurDAG->getMachineFunction();
2695 MachineMemOperand *MemOp =
2696 MF.getMachineMemOperand(MachinePointerInfo::getConstantPool(MF),
2697 MachineMemOperand::MOLoad, 4, 4);
2699 CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp});
2701 ReplaceNode(N, ResNode);
2702 return;
2705 // Other cases are autogenerated.
2706 break;
2708 case ISD::FrameIndex: {
2709 // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
2710 int FI = cast<FrameIndexSDNode>(N)->getIndex();
2711 SDValue TFI = CurDAG->getTargetFrameIndex(
2712 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
2713 if (Subtarget->isThumb1Only()) {
2714 // Set the alignment of the frame object to 4, to avoid having to generate
2715 // more than one ADD
2716 MachineFrameInfo &MFI = MF->getFrameInfo();
2717 if (MFI.getObjectAlignment(FI) < 4)
2718 MFI.setObjectAlignment(FI, 4);
2719 CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI,
2720 CurDAG->getTargetConstant(0, dl, MVT::i32));
2721 return;
2722 } else {
2723 unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ?
2724 ARM::t2ADDri : ARM::ADDri);
2725 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, dl, MVT::i32),
2726 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
2727 CurDAG->getRegister(0, MVT::i32) };
2728 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2729 return;
2732 case ISD::SRL:
2733 if (tryV6T2BitfieldExtractOp(N, false))
2734 return;
2735 break;
2736 case ISD::SIGN_EXTEND_INREG:
2737 case ISD::SRA:
2738 if (tryV6T2BitfieldExtractOp(N, true))
2739 return;
2740 break;
2741 case ISD::MUL:
2742 if (Subtarget->isThumb1Only())
2743 break;
2744 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
2745 unsigned RHSV = C->getZExtValue();
2746 if (!RHSV) break;
2747 if (isPowerOf2_32(RHSV-1)) { // 2^n+1?
2748 unsigned ShImm = Log2_32(RHSV-1);
2749 if (ShImm >= 32)
2750 break;
2751 SDValue V = N->getOperand(0);
2752 ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
2753 SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
2754 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2755 if (Subtarget->isThumb()) {
2756 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
2757 CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops);
2758 return;
2759 } else {
2760 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
2761 Reg0 };
2762 CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops);
2763 return;
2766 if (isPowerOf2_32(RHSV+1)) { // 2^n-1?
2767 unsigned ShImm = Log2_32(RHSV+1);
2768 if (ShImm >= 32)
2769 break;
2770 SDValue V = N->getOperand(0);
2771 ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
2772 SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
2773 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2774 if (Subtarget->isThumb()) {
2775 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
2776 CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops);
2777 return;
2778 } else {
2779 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
2780 Reg0 };
2781 CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops);
2782 return;
2786 break;
2787 case ISD::AND: {
2788 // Check for unsigned bitfield extract
2789 if (tryV6T2BitfieldExtractOp(N, false))
2790 return;
2792 // If an immediate is used in an AND node, it is possible that the immediate
2793 // can be more optimally materialized when negated. If this is the case we
2794 // can negate the immediate and use a BIC instead.
2795 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
2796 if (N1C && N1C->hasOneUse() && Subtarget->isThumb()) {
2797 uint32_t Imm = (uint32_t) N1C->getZExtValue();
2799 // In Thumb2 mode, an AND can take a 12-bit immediate. If this
2800 // immediate can be negated and fit in the immediate operand of
2801 // a t2BIC, don't do any manual transform here as this can be
2802 // handled by the generic ISel machinery.
2803 bool PreferImmediateEncoding =
2804 Subtarget->hasThumb2() && (is_t2_so_imm(Imm) || is_t2_so_imm_not(Imm));
2805 if (!PreferImmediateEncoding &&
2806 ConstantMaterializationCost(Imm) >
2807 ConstantMaterializationCost(~Imm)) {
2808 // The current immediate costs more to materialize than a negated
2809 // immediate, so negate the immediate and use a BIC.
2810 SDValue NewImm =
2811 CurDAG->getConstant(~N1C->getZExtValue(), dl, MVT::i32);
2812 // If the new constant didn't exist before, reposition it in the topological
2813 // ordering so it is just before N. Otherwise, don't touch its location.
2814 if (NewImm->getNodeId() == -1)
2815 CurDAG->RepositionNode(N->getIterator(), NewImm.getNode());
2817 if (!Subtarget->hasThumb2()) {
2818 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32),
2819 N->getOperand(0), NewImm, getAL(CurDAG, dl),
2820 CurDAG->getRegister(0, MVT::i32)};
2821 ReplaceNode(N, CurDAG->getMachineNode(ARM::tBIC, dl, MVT::i32, Ops));
2822 return;
2823 } else {
2824 SDValue Ops[] = {N->getOperand(0), NewImm, getAL(CurDAG, dl),
2825 CurDAG->getRegister(0, MVT::i32),
2826 CurDAG->getRegister(0, MVT::i32)};
2827 ReplaceNode(N,
2828 CurDAG->getMachineNode(ARM::t2BICrr, dl, MVT::i32, Ops));
2829 return;
2834 // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits
2835 // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits
2836 // are entirely contributed by c2 and lower 16-bits are entirely contributed
2837 // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)).
2838 // Select it to: "movt x, ((c1 & 0xffff) >> 16)
2839 EVT VT = N->getValueType(0);
2840 if (VT != MVT::i32)
2841 break;
2842 unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2())
2843 ? ARM::t2MOVTi16
2844 : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0);
2845 if (!Opc)
2846 break;
2847 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
2848 N1C = dyn_cast<ConstantSDNode>(N1);
2849 if (!N1C)
2850 break;
2851 if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) {
2852 SDValue N2 = N0.getOperand(1);
2853 ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
2854 if (!N2C)
2855 break;
2856 unsigned N1CVal = N1C->getZExtValue();
2857 unsigned N2CVal = N2C->getZExtValue();
2858 if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) &&
2859 (N1CVal & 0xffffU) == 0xffffU &&
2860 (N2CVal & 0xffffU) == 0x0U) {
2861 SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16,
2862 dl, MVT::i32);
2863 SDValue Ops[] = { N0.getOperand(0), Imm16,
2864 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
2865 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
2866 return;
2870 break;
2872 case ARMISD::UMAAL: {
2873 unsigned Opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL;
2874 SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
2875 N->getOperand(2), N->getOperand(3),
2876 getAL(CurDAG, dl),
2877 CurDAG->getRegister(0, MVT::i32) };
2878 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::i32, Ops));
2879 return;
2881 case ARMISD::UMLAL:{
2882 if (Subtarget->isThumb()) {
2883 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2884 N->getOperand(3), getAL(CurDAG, dl),
2885 CurDAG->getRegister(0, MVT::i32)};
2886 ReplaceNode(
2887 N, CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops));
2888 return;
2889 }else{
2890 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2891 N->getOperand(3), getAL(CurDAG, dl),
2892 CurDAG->getRegister(0, MVT::i32),
2893 CurDAG->getRegister(0, MVT::i32) };
2894 ReplaceNode(N, CurDAG->getMachineNode(
2895 Subtarget->hasV6Ops() ? ARM::UMLAL : ARM::UMLALv5, dl,
2896 MVT::i32, MVT::i32, Ops));
2897 return;
2900 case ARMISD::SMLAL:{
2901 if (Subtarget->isThumb()) {
2902 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2903 N->getOperand(3), getAL(CurDAG, dl),
2904 CurDAG->getRegister(0, MVT::i32)};
2905 ReplaceNode(
2906 N, CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops));
2907 return;
2908 }else{
2909 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2910 N->getOperand(3), getAL(CurDAG, dl),
2911 CurDAG->getRegister(0, MVT::i32),
2912 CurDAG->getRegister(0, MVT::i32) };
2913 ReplaceNode(N, CurDAG->getMachineNode(
2914 Subtarget->hasV6Ops() ? ARM::SMLAL : ARM::SMLALv5, dl,
2915 MVT::i32, MVT::i32, Ops));
2916 return;
2919 case ARMISD::SUBE: {
2920 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
2921 break;
2922 // Look for a pattern to match SMMLS
2923 // (sube a, (smul_loHi a, b), (subc 0, (smul_LOhi(a, b))))
2924 if (N->getOperand(1).getOpcode() != ISD::SMUL_LOHI ||
2925 N->getOperand(2).getOpcode() != ARMISD::SUBC ||
2926 !SDValue(N, 1).use_empty())
2927 break;
2929 if (Subtarget->isThumb())
2930 assert(Subtarget->hasThumb2() &&
2931 "This pattern should not be generated for Thumb");
2933 SDValue SmulLoHi = N->getOperand(1);
2934 SDValue Subc = N->getOperand(2);
2935 auto *Zero = dyn_cast<ConstantSDNode>(Subc.getOperand(0));
2937 if (!Zero || Zero->getZExtValue() != 0 ||
2938 Subc.getOperand(1) != SmulLoHi.getValue(0) ||
2939 N->getOperand(1) != SmulLoHi.getValue(1) ||
2940 N->getOperand(2) != Subc.getValue(1))
2941 break;
2943 unsigned Opc = Subtarget->isThumb2() ? ARM::t2SMMLS : ARM::SMMLS;
2944 SDValue Ops[] = { SmulLoHi.getOperand(0), SmulLoHi.getOperand(1),
2945 N->getOperand(0), getAL(CurDAG, dl),
2946 CurDAG->getRegister(0, MVT::i32) };
2947 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops));
2948 return;
2950 case ISD::LOAD: {
2951 if (Subtarget->isThumb() && Subtarget->hasThumb2()) {
2952 if (tryT2IndexedLoad(N))
2953 return;
2954 } else if (Subtarget->isThumb()) {
2955 if (tryT1IndexedLoad(N))
2956 return;
2957 } else if (tryARMIndexedLoad(N))
2958 return;
2959 // Other cases are autogenerated.
2960 break;
2962 case ARMISD::BRCOND: {
2963 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
2964 // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)
2965 // Pattern complexity = 6 cost = 1 size = 0
2967 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
2968 // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc)
2969 // Pattern complexity = 6 cost = 1 size = 0
2971 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
2972 // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc)
2973 // Pattern complexity = 6 cost = 1 size = 0
2975 unsigned Opc = Subtarget->isThumb() ?
2976 ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc;
2977 SDValue Chain = N->getOperand(0);
2978 SDValue N1 = N->getOperand(1);
2979 SDValue N2 = N->getOperand(2);
2980 SDValue N3 = N->getOperand(3);
2981 SDValue InFlag = N->getOperand(4);
2982 assert(N1.getOpcode() == ISD::BasicBlock);
2983 assert(N2.getOpcode() == ISD::Constant);
2984 assert(N3.getOpcode() == ISD::Register);
2986 unsigned CC = (unsigned) cast<ConstantSDNode>(N2)->getZExtValue();
2988 if (InFlag.getOpcode() == ARMISD::CMPZ) {
2989 if (InFlag.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN) {
2990 SDValue Int = InFlag.getOperand(0);
2991 uint64_t ID = cast<ConstantSDNode>(Int->getOperand(1))->getZExtValue();
2993 // Handle low-overhead loops.
2994 if (ID == Intrinsic::loop_decrement_reg) {
2995 SDValue Elements = Int.getOperand(2);
2996 SDValue Size = CurDAG->getTargetConstant(
2997 cast<ConstantSDNode>(Int.getOperand(3))->getZExtValue(), dl,
2998 MVT::i32);
3000 SDValue Args[] = { Elements, Size, Int.getOperand(0) };
3001 SDNode *LoopDec =
3002 CurDAG->getMachineNode(ARM::t2LoopDec, dl,
3003 CurDAG->getVTList(MVT::i32, MVT::Other),
3004 Args);
3005 ReplaceUses(Int.getNode(), LoopDec);
3007 SDValue EndArgs[] = { SDValue(LoopDec, 0), N1, Chain };
3008 SDNode *LoopEnd =
3009 CurDAG->getMachineNode(ARM::t2LoopEnd, dl, MVT::Other, EndArgs);
3011 ReplaceUses(N, LoopEnd);
3012 CurDAG->RemoveDeadNode(N);
3013 CurDAG->RemoveDeadNode(InFlag.getNode());
3014 CurDAG->RemoveDeadNode(Int.getNode());
3015 return;
3019 bool SwitchEQNEToPLMI;
3020 SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI);
3021 InFlag = N->getOperand(4);
3023 if (SwitchEQNEToPLMI) {
3024 switch ((ARMCC::CondCodes)CC) {
3025 default: llvm_unreachable("CMPZ must be either NE or EQ!");
3026 case ARMCC::NE:
3027 CC = (unsigned)ARMCC::MI;
3028 break;
3029 case ARMCC::EQ:
3030 CC = (unsigned)ARMCC::PL;
3031 break;
3036 SDValue Tmp2 = CurDAG->getTargetConstant(CC, dl, MVT::i32);
3037 SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag };
3038 SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
3039 MVT::Glue, Ops);
3040 Chain = SDValue(ResNode, 0);
3041 if (N->getNumValues() == 2) {
3042 InFlag = SDValue(ResNode, 1);
3043 ReplaceUses(SDValue(N, 1), InFlag);
3045 ReplaceUses(SDValue(N, 0),
3046 SDValue(Chain.getNode(), Chain.getResNo()));
3047 CurDAG->RemoveDeadNode(N);
3048 return;
3051 case ARMISD::CMPZ: {
3052 // select (CMPZ X, #-C) -> (CMPZ (ADDS X, #C), #0)
3053 // This allows us to avoid materializing the expensive negative constant.
3054 // The CMPZ #0 is useless and will be peepholed away but we need to keep it
3055 // for its glue output.
3056 SDValue X = N->getOperand(0);
3057 auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1).getNode());
3058 if (C && C->getSExtValue() < 0 && Subtarget->isThumb()) {
3059 int64_t Addend = -C->getSExtValue();
3061 SDNode *Add = nullptr;
3062 // ADDS can be better than CMN if the immediate fits in a
3063 // 16-bit ADDS, which means either [0,256) for tADDi8 or [0,8) for tADDi3.
3064 // Outside that range we can just use a CMN which is 32-bit but has a
3065 // 12-bit immediate range.
3066 if (Addend < 1<<8) {
3067 if (Subtarget->isThumb2()) {
3068 SDValue Ops[] = { X, CurDAG->getTargetConstant(Addend, dl, MVT::i32),
3069 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
3070 CurDAG->getRegister(0, MVT::i32) };
3071 Add = CurDAG->getMachineNode(ARM::t2ADDri, dl, MVT::i32, Ops);
3072 } else {
3073 unsigned Opc = (Addend < 1<<3) ? ARM::tADDi3 : ARM::tADDi8;
3074 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X,
3075 CurDAG->getTargetConstant(Addend, dl, MVT::i32),
3076 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
3077 Add = CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
3080 if (Add) {
3081 SDValue Ops2[] = {SDValue(Add, 0), CurDAG->getConstant(0, dl, MVT::i32)};
3082 CurDAG->MorphNodeTo(N, ARMISD::CMPZ, CurDAG->getVTList(MVT::Glue), Ops2);
3085 // Other cases are autogenerated.
3086 break;
3089 case ARMISD::CMOV: {
3090 SDValue InFlag = N->getOperand(4);
3092 if (InFlag.getOpcode() == ARMISD::CMPZ) {
3093 bool SwitchEQNEToPLMI;
3094 SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI);
3096 if (SwitchEQNEToPLMI) {
3097 SDValue ARMcc = N->getOperand(2);
3098 ARMCC::CondCodes CC =
3099 (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue();
3101 switch (CC) {
3102 default: llvm_unreachable("CMPZ must be either NE or EQ!");
3103 case ARMCC::NE:
3104 CC = ARMCC::MI;
3105 break;
3106 case ARMCC::EQ:
3107 CC = ARMCC::PL;
3108 break;
3110 SDValue NewARMcc = CurDAG->getConstant((unsigned)CC, dl, MVT::i32);
3111 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), NewARMcc,
3112 N->getOperand(3), N->getOperand(4)};
3113 CurDAG->MorphNodeTo(N, ARMISD::CMOV, N->getVTList(), Ops);
3117 // Other cases are autogenerated.
3118 break;
3121 case ARMISD::VZIP: {
3122 unsigned Opc = 0;
3123 EVT VT = N->getValueType(0);
3124 switch (VT.getSimpleVT().SimpleTy) {
3125 default: return;
3126 case MVT::v8i8: Opc = ARM::VZIPd8; break;
3127 case MVT::v4f16:
3128 case MVT::v4i16: Opc = ARM::VZIPd16; break;
3129 case MVT::v2f32:
3130 // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
3131 case MVT::v2i32: Opc = ARM::VTRNd32; break;
3132 case MVT::v16i8: Opc = ARM::VZIPq8; break;
3133 case MVT::v8f16:
3134 case MVT::v8i16: Opc = ARM::VZIPq16; break;
3135 case MVT::v4f32:
3136 case MVT::v4i32: Opc = ARM::VZIPq32; break;
3138 SDValue Pred = getAL(CurDAG, dl);
3139 SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3140 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3141 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3142 return;
3144 case ARMISD::VUZP: {
3145 unsigned Opc = 0;
3146 EVT VT = N->getValueType(0);
3147 switch (VT.getSimpleVT().SimpleTy) {
3148 default: return;
3149 case MVT::v8i8: Opc = ARM::VUZPd8; break;
3150 case MVT::v4f16:
3151 case MVT::v4i16: Opc = ARM::VUZPd16; break;
3152 case MVT::v2f32:
3153 // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
3154 case MVT::v2i32: Opc = ARM::VTRNd32; break;
3155 case MVT::v16i8: Opc = ARM::VUZPq8; break;
3156 case MVT::v8f16:
3157 case MVT::v8i16: Opc = ARM::VUZPq16; break;
3158 case MVT::v4f32:
3159 case MVT::v4i32: Opc = ARM::VUZPq32; break;
3161 SDValue Pred = getAL(CurDAG, dl);
3162 SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3163 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3164 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3165 return;
3167 case ARMISD::VTRN: {
3168 unsigned Opc = 0;
3169 EVT VT = N->getValueType(0);
3170 switch (VT.getSimpleVT().SimpleTy) {
3171 default: return;
3172 case MVT::v8i8: Opc = ARM::VTRNd8; break;
3173 case MVT::v4f16:
3174 case MVT::v4i16: Opc = ARM::VTRNd16; break;
3175 case MVT::v2f32:
3176 case MVT::v2i32: Opc = ARM::VTRNd32; break;
3177 case MVT::v16i8: Opc = ARM::VTRNq8; break;
3178 case MVT::v8f16:
3179 case MVT::v8i16: Opc = ARM::VTRNq16; break;
3180 case MVT::v4f32:
3181 case MVT::v4i32: Opc = ARM::VTRNq32; break;
3183 SDValue Pred = getAL(CurDAG, dl);
3184 SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3185 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3186 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3187 return;
3189 case ARMISD::BUILD_VECTOR: {
3190 EVT VecVT = N->getValueType(0);
3191 EVT EltVT = VecVT.getVectorElementType();
3192 unsigned NumElts = VecVT.getVectorNumElements();
3193 if (EltVT == MVT::f64) {
3194 assert(NumElts == 2 && "unexpected type for BUILD_VECTOR");
3195 ReplaceNode(
3196 N, createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
3197 return;
3199 assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR");
3200 if (NumElts == 2) {
3201 ReplaceNode(
3202 N, createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
3203 return;
3205 assert(NumElts == 4 && "unexpected type for BUILD_VECTOR");
3206 ReplaceNode(N,
3207 createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1),
3208 N->getOperand(2), N->getOperand(3)));
3209 return;
3212 case ARMISD::VLD1DUP: {
3213 static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8, ARM::VLD1DUPd16,
3214 ARM::VLD1DUPd32 };
3215 static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8, ARM::VLD1DUPq16,
3216 ARM::VLD1DUPq32 };
3217 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 1, DOpcodes, QOpcodes);
3218 return;
3221 case ARMISD::VLD2DUP: {
3222 static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
3223 ARM::VLD2DUPd32 };
3224 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 2, Opcodes);
3225 return;
3228 case ARMISD::VLD3DUP: {
3229 static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo,
3230 ARM::VLD3DUPd16Pseudo,
3231 ARM::VLD3DUPd32Pseudo };
3232 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 3, Opcodes);
3233 return;
3236 case ARMISD::VLD4DUP: {
3237 static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo,
3238 ARM::VLD4DUPd16Pseudo,
3239 ARM::VLD4DUPd32Pseudo };
3240 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 4, Opcodes);
3241 return;
3244 case ARMISD::VLD1DUP_UPD: {
3245 static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8wb_fixed,
3246 ARM::VLD1DUPd16wb_fixed,
3247 ARM::VLD1DUPd32wb_fixed };
3248 static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8wb_fixed,
3249 ARM::VLD1DUPq16wb_fixed,
3250 ARM::VLD1DUPq32wb_fixed };
3251 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 1, DOpcodes, QOpcodes);
3252 return;
3255 case ARMISD::VLD2DUP_UPD: {
3256 static const uint16_t Opcodes[] = { ARM::VLD2DUPd8wb_fixed,
3257 ARM::VLD2DUPd16wb_fixed,
3258 ARM::VLD2DUPd32wb_fixed };
3259 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 2, Opcodes);
3260 return;
3263 case ARMISD::VLD3DUP_UPD: {
3264 static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo_UPD,
3265 ARM::VLD3DUPd16Pseudo_UPD,
3266 ARM::VLD3DUPd32Pseudo_UPD };
3267 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 3, Opcodes);
3268 return;
3271 case ARMISD::VLD4DUP_UPD: {
3272 static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo_UPD,
3273 ARM::VLD4DUPd16Pseudo_UPD,
3274 ARM::VLD4DUPd32Pseudo_UPD };
3275 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 4, Opcodes);
3276 return;
3279 case ARMISD::VLD1_UPD: {
3280 static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed,
3281 ARM::VLD1d16wb_fixed,
3282 ARM::VLD1d32wb_fixed,
3283 ARM::VLD1d64wb_fixed };
3284 static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed,
3285 ARM::VLD1q16wb_fixed,
3286 ARM::VLD1q32wb_fixed,
3287 ARM::VLD1q64wb_fixed };
3288 SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr);
3289 return;
3292 case ARMISD::VLD2_UPD: {
3293 static const uint16_t DOpcodes[] = { ARM::VLD2d8wb_fixed,
3294 ARM::VLD2d16wb_fixed,
3295 ARM::VLD2d32wb_fixed,
3296 ARM::VLD1q64wb_fixed};
3297 static const uint16_t QOpcodes[] = { ARM::VLD2q8PseudoWB_fixed,
3298 ARM::VLD2q16PseudoWB_fixed,
3299 ARM::VLD2q32PseudoWB_fixed };
3300 SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr);
3301 return;
3304 case ARMISD::VLD3_UPD: {
3305 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD,
3306 ARM::VLD3d16Pseudo_UPD,
3307 ARM::VLD3d32Pseudo_UPD,
3308 ARM::VLD1d64TPseudoWB_fixed};
3309 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
3310 ARM::VLD3q16Pseudo_UPD,
3311 ARM::VLD3q32Pseudo_UPD };
3312 static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
3313 ARM::VLD3q16oddPseudo_UPD,
3314 ARM::VLD3q32oddPseudo_UPD };
3315 SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
3316 return;
3319 case ARMISD::VLD4_UPD: {
3320 static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo_UPD,
3321 ARM::VLD4d16Pseudo_UPD,
3322 ARM::VLD4d32Pseudo_UPD,
3323 ARM::VLD1d64QPseudoWB_fixed};
3324 static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
3325 ARM::VLD4q16Pseudo_UPD,
3326 ARM::VLD4q32Pseudo_UPD };
3327 static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo_UPD,
3328 ARM::VLD4q16oddPseudo_UPD,
3329 ARM::VLD4q32oddPseudo_UPD };
3330 SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
3331 return;
3334 case ARMISD::VLD2LN_UPD: {
3335 static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD,
3336 ARM::VLD2LNd16Pseudo_UPD,
3337 ARM::VLD2LNd32Pseudo_UPD };
3338 static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD,
3339 ARM::VLD2LNq32Pseudo_UPD };
3340 SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes);
3341 return;
3344 case ARMISD::VLD3LN_UPD: {
3345 static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD,
3346 ARM::VLD3LNd16Pseudo_UPD,
3347 ARM::VLD3LNd32Pseudo_UPD };
3348 static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD,
3349 ARM::VLD3LNq32Pseudo_UPD };
3350 SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes);
3351 return;
3354 case ARMISD::VLD4LN_UPD: {
3355 static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD,
3356 ARM::VLD4LNd16Pseudo_UPD,
3357 ARM::VLD4LNd32Pseudo_UPD };
3358 static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD,
3359 ARM::VLD4LNq32Pseudo_UPD };
3360 SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes);
3361 return;
3364 case ARMISD::VST1_UPD: {
3365 static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed,
3366 ARM::VST1d16wb_fixed,
3367 ARM::VST1d32wb_fixed,
3368 ARM::VST1d64wb_fixed };
3369 static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed,
3370 ARM::VST1q16wb_fixed,
3371 ARM::VST1q32wb_fixed,
3372 ARM::VST1q64wb_fixed };
3373 SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr);
3374 return;
3377 case ARMISD::VST2_UPD: {
3378 static const uint16_t DOpcodes[] = { ARM::VST2d8wb_fixed,
3379 ARM::VST2d16wb_fixed,
3380 ARM::VST2d32wb_fixed,
3381 ARM::VST1q64wb_fixed};
3382 static const uint16_t QOpcodes[] = { ARM::VST2q8PseudoWB_fixed,
3383 ARM::VST2q16PseudoWB_fixed,
3384 ARM::VST2q32PseudoWB_fixed };
3385 SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr);
3386 return;
3389 case ARMISD::VST3_UPD: {
3390 static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD,
3391 ARM::VST3d16Pseudo_UPD,
3392 ARM::VST3d32Pseudo_UPD,
3393 ARM::VST1d64TPseudoWB_fixed};
3394 static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
3395 ARM::VST3q16Pseudo_UPD,
3396 ARM::VST3q32Pseudo_UPD };
3397 static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
3398 ARM::VST3q16oddPseudo_UPD,
3399 ARM::VST3q32oddPseudo_UPD };
3400 SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
3401 return;
3404 case ARMISD::VST4_UPD: {
3405 static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo_UPD,
3406 ARM::VST4d16Pseudo_UPD,
3407 ARM::VST4d32Pseudo_UPD,
3408 ARM::VST1d64QPseudoWB_fixed};
3409 static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
3410 ARM::VST4q16Pseudo_UPD,
3411 ARM::VST4q32Pseudo_UPD };
3412 static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD,
3413 ARM::VST4q16oddPseudo_UPD,
3414 ARM::VST4q32oddPseudo_UPD };
3415 SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
3416 return;
3419 case ARMISD::VST2LN_UPD: {
3420 static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD,
3421 ARM::VST2LNd16Pseudo_UPD,
3422 ARM::VST2LNd32Pseudo_UPD };
3423 static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD,
3424 ARM::VST2LNq32Pseudo_UPD };
3425 SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes);
3426 return;
3429 case ARMISD::VST3LN_UPD: {
3430 static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD,
3431 ARM::VST3LNd16Pseudo_UPD,
3432 ARM::VST3LNd32Pseudo_UPD };
3433 static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD,
3434 ARM::VST3LNq32Pseudo_UPD };
3435 SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes);
3436 return;
3439 case ARMISD::VST4LN_UPD: {
3440 static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD,
3441 ARM::VST4LNd16Pseudo_UPD,
3442 ARM::VST4LNd32Pseudo_UPD };
3443 static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD,
3444 ARM::VST4LNq32Pseudo_UPD };
3445 SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes);
3446 return;
3449 case ISD::INTRINSIC_VOID:
3450 case ISD::INTRINSIC_W_CHAIN: {
3451 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
3452 switch (IntNo) {
3453 default:
3454 break;
3456 case Intrinsic::arm_mrrc:
3457 case Intrinsic::arm_mrrc2: {
3458 SDLoc dl(N);
3459 SDValue Chain = N->getOperand(0);
3460 unsigned Opc;
3462 if (Subtarget->isThumb())
3463 Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::t2MRRC : ARM::t2MRRC2);
3464 else
3465 Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::MRRC : ARM::MRRC2);
3467 SmallVector<SDValue, 5> Ops;
3468 Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(), dl)); /* coproc */
3469 Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(), dl)); /* opc */
3470 Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(4))->getZExtValue(), dl)); /* CRm */
3472 // The mrrc2 instruction in ARM doesn't allow predicates, the top 4 bits of the encoded
3473 // instruction will always be '1111' but it is possible in assembly language to specify
3474 // AL as a predicate to mrrc2 but it doesn't make any difference to the encoded instruction.
3475 if (Opc != ARM::MRRC2) {
3476 Ops.push_back(getAL(CurDAG, dl));
3477 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3480 Ops.push_back(Chain);
3482 // Writes to two registers.
3483 const EVT RetType[] = {MVT::i32, MVT::i32, MVT::Other};
3485 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, RetType, Ops));
3486 return;
3488 case Intrinsic::arm_ldaexd:
3489 case Intrinsic::arm_ldrexd: {
3490 SDLoc dl(N);
3491 SDValue Chain = N->getOperand(0);
3492 SDValue MemAddr = N->getOperand(2);
3493 bool isThumb = Subtarget->isThumb() && Subtarget->hasV8MBaselineOps();
3495 bool IsAcquire = IntNo == Intrinsic::arm_ldaexd;
3496 unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD)
3497 : (IsAcquire ? ARM::LDAEXD : ARM::LDREXD);
3499 // arm_ldrexd returns a i64 value in {i32, i32}
3500 std::vector<EVT> ResTys;
3501 if (isThumb) {
3502 ResTys.push_back(MVT::i32);
3503 ResTys.push_back(MVT::i32);
3504 } else
3505 ResTys.push_back(MVT::Untyped);
3506 ResTys.push_back(MVT::Other);
3508 // Place arguments in the right order.
3509 SDValue Ops[] = {MemAddr, getAL(CurDAG, dl),
3510 CurDAG->getRegister(0, MVT::i32), Chain};
3511 SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
3512 // Transfer memoperands.
3513 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3514 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
3516 // Remap uses.
3517 SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1);
3518 if (!SDValue(N, 0).use_empty()) {
3519 SDValue Result;
3520 if (isThumb)
3521 Result = SDValue(Ld, 0);
3522 else {
3523 SDValue SubRegIdx =
3524 CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
3525 SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
3526 dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
3527 Result = SDValue(ResNode,0);
3529 ReplaceUses(SDValue(N, 0), Result);
3531 if (!SDValue(N, 1).use_empty()) {
3532 SDValue Result;
3533 if (isThumb)
3534 Result = SDValue(Ld, 1);
3535 else {
3536 SDValue SubRegIdx =
3537 CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
3538 SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
3539 dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
3540 Result = SDValue(ResNode,0);
3542 ReplaceUses(SDValue(N, 1), Result);
3544 ReplaceUses(SDValue(N, 2), OutChain);
3545 CurDAG->RemoveDeadNode(N);
3546 return;
3548 case Intrinsic::arm_stlexd:
3549 case Intrinsic::arm_strexd: {
3550 SDLoc dl(N);
3551 SDValue Chain = N->getOperand(0);
3552 SDValue Val0 = N->getOperand(2);
3553 SDValue Val1 = N->getOperand(3);
3554 SDValue MemAddr = N->getOperand(4);
3556 // Store exclusive double return a i32 value which is the return status
3557 // of the issued store.
3558 const EVT ResTys[] = {MVT::i32, MVT::Other};
3560 bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
3561 // Place arguments in the right order.
3562 SmallVector<SDValue, 7> Ops;
3563 if (isThumb) {
3564 Ops.push_back(Val0);
3565 Ops.push_back(Val1);
3566 } else
3567 // arm_strexd uses GPRPair.
3568 Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0));
3569 Ops.push_back(MemAddr);
3570 Ops.push_back(getAL(CurDAG, dl));
3571 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3572 Ops.push_back(Chain);
3574 bool IsRelease = IntNo == Intrinsic::arm_stlexd;
3575 unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD)
3576 : (IsRelease ? ARM::STLEXD : ARM::STREXD);
3578 SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
3579 // Transfer memoperands.
3580 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3581 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
3583 ReplaceNode(N, St);
3584 return;
3587 case Intrinsic::arm_neon_vld1: {
3588 static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
3589 ARM::VLD1d32, ARM::VLD1d64 };
3590 static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
3591 ARM::VLD1q32, ARM::VLD1q64};
3592 SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr);
3593 return;
3596 case Intrinsic::arm_neon_vld1x2: {
3597 static const uint16_t DOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
3598 ARM::VLD1q32, ARM::VLD1q64 };
3599 static const uint16_t QOpcodes[] = { ARM::VLD1d8QPseudo,
3600 ARM::VLD1d16QPseudo,
3601 ARM::VLD1d32QPseudo,
3602 ARM::VLD1d64QPseudo };
3603 SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
3604 return;
3607 case Intrinsic::arm_neon_vld1x3: {
3608 static const uint16_t DOpcodes[] = { ARM::VLD1d8TPseudo,
3609 ARM::VLD1d16TPseudo,
3610 ARM::VLD1d32TPseudo,
3611 ARM::VLD1d64TPseudo };
3612 static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowTPseudo_UPD,
3613 ARM::VLD1q16LowTPseudo_UPD,
3614 ARM::VLD1q32LowTPseudo_UPD,
3615 ARM::VLD1q64LowTPseudo_UPD };
3616 static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighTPseudo,
3617 ARM::VLD1q16HighTPseudo,
3618 ARM::VLD1q32HighTPseudo,
3619 ARM::VLD1q64HighTPseudo };
3620 SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3621 return;
3624 case Intrinsic::arm_neon_vld1x4: {
3625 static const uint16_t DOpcodes[] = { ARM::VLD1d8QPseudo,
3626 ARM::VLD1d16QPseudo,
3627 ARM::VLD1d32QPseudo,
3628 ARM::VLD1d64QPseudo };
3629 static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowQPseudo_UPD,
3630 ARM::VLD1q16LowQPseudo_UPD,
3631 ARM::VLD1q32LowQPseudo_UPD,
3632 ARM::VLD1q64LowQPseudo_UPD };
3633 static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighQPseudo,
3634 ARM::VLD1q16HighQPseudo,
3635 ARM::VLD1q32HighQPseudo,
3636 ARM::VLD1q64HighQPseudo };
3637 SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3638 return;
3641 case Intrinsic::arm_neon_vld2: {
3642 static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
3643 ARM::VLD2d32, ARM::VLD1q64 };
3644 static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
3645 ARM::VLD2q32Pseudo };
3646 SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
3647 return;
3650 case Intrinsic::arm_neon_vld3: {
3651 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo,
3652 ARM::VLD3d16Pseudo,
3653 ARM::VLD3d32Pseudo,
3654 ARM::VLD1d64TPseudo };
3655 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
3656 ARM::VLD3q16Pseudo_UPD,
3657 ARM::VLD3q32Pseudo_UPD };
3658 static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo,
3659 ARM::VLD3q16oddPseudo,
3660 ARM::VLD3q32oddPseudo };
3661 SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3662 return;
3665 case Intrinsic::arm_neon_vld4: {
3666 static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo,
3667 ARM::VLD4d16Pseudo,
3668 ARM::VLD4d32Pseudo,
3669 ARM::VLD1d64QPseudo };
3670 static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
3671 ARM::VLD4q16Pseudo_UPD,
3672 ARM::VLD4q32Pseudo_UPD };
3673 static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo,
3674 ARM::VLD4q16oddPseudo,
3675 ARM::VLD4q32oddPseudo };
3676 SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3677 return;
3680 case Intrinsic::arm_neon_vld2dup: {
3681 static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
3682 ARM::VLD2DUPd32, ARM::VLD1q64 };
3683 static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo,
3684 ARM::VLD2DUPq16EvenPseudo,
3685 ARM::VLD2DUPq32EvenPseudo };
3686 static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudo,
3687 ARM::VLD2DUPq16OddPseudo,
3688 ARM::VLD2DUPq32OddPseudo };
3689 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 2,
3690 DOpcodes, QOpcodes0, QOpcodes1);
3691 return;
3694 case Intrinsic::arm_neon_vld3dup: {
3695 static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo,
3696 ARM::VLD3DUPd16Pseudo,
3697 ARM::VLD3DUPd32Pseudo,
3698 ARM::VLD1d64TPseudo };
3699 static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo,
3700 ARM::VLD3DUPq16EvenPseudo,
3701 ARM::VLD3DUPq32EvenPseudo };
3702 static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo,
3703 ARM::VLD3DUPq16OddPseudo,
3704 ARM::VLD3DUPq32OddPseudo };
3705 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 3,
3706 DOpcodes, QOpcodes0, QOpcodes1);
3707 return;
3710 case Intrinsic::arm_neon_vld4dup: {
3711 static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo,
3712 ARM::VLD4DUPd16Pseudo,
3713 ARM::VLD4DUPd32Pseudo,
3714 ARM::VLD1d64QPseudo };
3715 static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo,
3716 ARM::VLD4DUPq16EvenPseudo,
3717 ARM::VLD4DUPq32EvenPseudo };
3718 static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo,
3719 ARM::VLD4DUPq16OddPseudo,
3720 ARM::VLD4DUPq32OddPseudo };
3721 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 4,
3722 DOpcodes, QOpcodes0, QOpcodes1);
3723 return;
3726 case Intrinsic::arm_neon_vld2lane: {
3727 static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo,
3728 ARM::VLD2LNd16Pseudo,
3729 ARM::VLD2LNd32Pseudo };
3730 static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo,
3731 ARM::VLD2LNq32Pseudo };
3732 SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes);
3733 return;
3736 case Intrinsic::arm_neon_vld3lane: {
3737 static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo,
3738 ARM::VLD3LNd16Pseudo,
3739 ARM::VLD3LNd32Pseudo };
3740 static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo,
3741 ARM::VLD3LNq32Pseudo };
3742 SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes);
3743 return;
3746 case Intrinsic::arm_neon_vld4lane: {
3747 static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo,
3748 ARM::VLD4LNd16Pseudo,
3749 ARM::VLD4LNd32Pseudo };
3750 static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo,
3751 ARM::VLD4LNq32Pseudo };
3752 SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes);
3753 return;
3756 case Intrinsic::arm_neon_vst1: {
3757 static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
3758 ARM::VST1d32, ARM::VST1d64 };
3759 static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
3760 ARM::VST1q32, ARM::VST1q64 };
3761 SelectVST(N, false, 1, DOpcodes, QOpcodes, nullptr);
3762 return;
3765 case Intrinsic::arm_neon_vst1x2: {
3766 static const uint16_t DOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
3767 ARM::VST1q32, ARM::VST1q64 };
3768 static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudo,
3769 ARM::VST1d16QPseudo,
3770 ARM::VST1d32QPseudo,
3771 ARM::VST1d64QPseudo };
3772 SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
3773 return;
3776 case Intrinsic::arm_neon_vst1x3: {
3777 static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudo,
3778 ARM::VST1d16TPseudo,
3779 ARM::VST1d32TPseudo,
3780 ARM::VST1d64TPseudo };
3781 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD,
3782 ARM::VST1q16LowTPseudo_UPD,
3783 ARM::VST1q32LowTPseudo_UPD,
3784 ARM::VST1q64LowTPseudo_UPD };
3785 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo,
3786 ARM::VST1q16HighTPseudo,
3787 ARM::VST1q32HighTPseudo,
3788 ARM::VST1q64HighTPseudo };
3789 SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3790 return;
3793 case Intrinsic::arm_neon_vst1x4: {
3794 static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudo,
3795 ARM::VST1d16QPseudo,
3796 ARM::VST1d32QPseudo,
3797 ARM::VST1d64QPseudo };
3798 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD,
3799 ARM::VST1q16LowQPseudo_UPD,
3800 ARM::VST1q32LowQPseudo_UPD,
3801 ARM::VST1q64LowQPseudo_UPD };
3802 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo,
3803 ARM::VST1q16HighQPseudo,
3804 ARM::VST1q32HighQPseudo,
3805 ARM::VST1q64HighQPseudo };
3806 SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3807 return;
3810 case Intrinsic::arm_neon_vst2: {
3811 static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
3812 ARM::VST2d32, ARM::VST1q64 };
3813 static const uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
3814 ARM::VST2q32Pseudo };
3815 SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
3816 return;
3819 case Intrinsic::arm_neon_vst3: {
3820 static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo,
3821 ARM::VST3d16Pseudo,
3822 ARM::VST3d32Pseudo,
3823 ARM::VST1d64TPseudo };
3824 static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
3825 ARM::VST3q16Pseudo_UPD,
3826 ARM::VST3q32Pseudo_UPD };
3827 static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo,
3828 ARM::VST3q16oddPseudo,
3829 ARM::VST3q32oddPseudo };
3830 SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3831 return;
3834 case Intrinsic::arm_neon_vst4: {
3835 static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo,
3836 ARM::VST4d16Pseudo,
3837 ARM::VST4d32Pseudo,
3838 ARM::VST1d64QPseudo };
3839 static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
3840 ARM::VST4q16Pseudo_UPD,
3841 ARM::VST4q32Pseudo_UPD };
3842 static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo,
3843 ARM::VST4q16oddPseudo,
3844 ARM::VST4q32oddPseudo };
3845 SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3846 return;
3849 case Intrinsic::arm_neon_vst2lane: {
3850 static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo,
3851 ARM::VST2LNd16Pseudo,
3852 ARM::VST2LNd32Pseudo };
3853 static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo,
3854 ARM::VST2LNq32Pseudo };
3855 SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes);
3856 return;
3859 case Intrinsic::arm_neon_vst3lane: {
3860 static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo,
3861 ARM::VST3LNd16Pseudo,
3862 ARM::VST3LNd32Pseudo };
3863 static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo,
3864 ARM::VST3LNq32Pseudo };
3865 SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes);
3866 return;
3869 case Intrinsic::arm_neon_vst4lane: {
3870 static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo,
3871 ARM::VST4LNd16Pseudo,
3872 ARM::VST4LNd32Pseudo };
3873 static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo,
3874 ARM::VST4LNq32Pseudo };
3875 SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes);
3876 return;
3879 break;
3882 case ISD::ATOMIC_CMP_SWAP:
3883 SelectCMP_SWAP(N);
3884 return;
3887 SelectCode(N);
3890 // Inspect a register string of the form
3891 // cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or
3892 // cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string
3893 // and obtain the integer operands from them, adding these operands to the
3894 // provided vector.
3895 static void getIntOperandsFromRegisterString(StringRef RegString,
3896 SelectionDAG *CurDAG,
3897 const SDLoc &DL,
3898 std::vector<SDValue> &Ops) {
3899 SmallVector<StringRef, 5> Fields;
3900 RegString.split(Fields, ':');
3902 if (Fields.size() > 1) {
3903 bool AllIntFields = true;
3905 for (StringRef Field : Fields) {
3906 // Need to trim out leading 'cp' characters and get the integer field.
3907 unsigned IntField;
3908 AllIntFields &= !Field.trim("CPcp").getAsInteger(10, IntField);
3909 Ops.push_back(CurDAG->getTargetConstant(IntField, DL, MVT::i32));
3912 assert(AllIntFields &&
3913 "Unexpected non-integer value in special register string.");
3917 // Maps a Banked Register string to its mask value. The mask value returned is
3918 // for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register
3919 // mask operand, which expresses which register is to be used, e.g. r8, and in
3920 // which mode it is to be used, e.g. usr. Returns -1 to signify that the string
3921 // was invalid.
3922 static inline int getBankedRegisterMask(StringRef RegString) {
3923 auto TheReg = ARMBankedReg::lookupBankedRegByName(RegString.lower());
3924 if (!TheReg)
3925 return -1;
3926 return TheReg->Encoding;
3929 // The flags here are common to those allowed for apsr in the A class cores and
3930 // those allowed for the special registers in the M class cores. Returns a
3931 // value representing which flags were present, -1 if invalid.
3932 static inline int getMClassFlagsMask(StringRef Flags) {
3933 return StringSwitch<int>(Flags)
3934 .Case("", 0x2) // no flags means nzcvq for psr registers, and 0x2 is
3935 // correct when flags are not permitted
3936 .Case("g", 0x1)
3937 .Case("nzcvq", 0x2)
3938 .Case("nzcvqg", 0x3)
3939 .Default(-1);
3942 // Maps MClass special registers string to its value for use in the
3943 // t2MRS_M/t2MSR_M instruction nodes as the SYSm value operand.
3944 // Returns -1 to signify that the string was invalid.
3945 static int getMClassRegisterMask(StringRef Reg, const ARMSubtarget *Subtarget) {
3946 auto TheReg = ARMSysReg::lookupMClassSysRegByName(Reg);
3947 const FeatureBitset &FeatureBits = Subtarget->getFeatureBits();
3948 if (!TheReg || !TheReg->hasRequiredFeatures(FeatureBits))
3949 return -1;
3950 return (int)(TheReg->Encoding & 0xFFF); // SYSm value
3953 static int getARClassRegisterMask(StringRef Reg, StringRef Flags) {
3954 // The mask operand contains the special register (R Bit) in bit 4, whether
3955 // the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and
3956 // bits 3-0 contains the fields to be accessed in the special register, set by
3957 // the flags provided with the register.
3958 int Mask = 0;
3959 if (Reg == "apsr") {
3960 // The flags permitted for apsr are the same flags that are allowed in
3961 // M class registers. We get the flag value and then shift the flags into
3962 // the correct place to combine with the mask.
3963 Mask = getMClassFlagsMask(Flags);
3964 if (Mask == -1)
3965 return -1;
3966 return Mask << 2;
3969 if (Reg != "cpsr" && Reg != "spsr") {
3970 return -1;
3973 // This is the same as if the flags were "fc"
3974 if (Flags.empty() || Flags == "all")
3975 return Mask | 0x9;
3977 // Inspect the supplied flags string and set the bits in the mask for
3978 // the relevant and valid flags allowed for cpsr and spsr.
3979 for (char Flag : Flags) {
3980 int FlagVal;
3981 switch (Flag) {
3982 case 'c':
3983 FlagVal = 0x1;
3984 break;
3985 case 'x':
3986 FlagVal = 0x2;
3987 break;
3988 case 's':
3989 FlagVal = 0x4;
3990 break;
3991 case 'f':
3992 FlagVal = 0x8;
3993 break;
3994 default:
3995 FlagVal = 0;
3998 // This avoids allowing strings where the same flag bit appears twice.
3999 if (!FlagVal || (Mask & FlagVal))
4000 return -1;
4001 Mask |= FlagVal;
4004 // If the register is spsr then we need to set the R bit.
4005 if (Reg == "spsr")
4006 Mask |= 0x10;
4008 return Mask;
4011 // Lower the read_register intrinsic to ARM specific DAG nodes
4012 // using the supplied metadata string to select the instruction node to use
4013 // and the registers/masks to construct as operands for the node.
4014 bool ARMDAGToDAGISel::tryReadRegister(SDNode *N){
4015 const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
4016 const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
4017 bool IsThumb2 = Subtarget->isThumb2();
4018 SDLoc DL(N);
4020 std::vector<SDValue> Ops;
4021 getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
4023 if (!Ops.empty()) {
4024 // If the special register string was constructed of fields (as defined
4025 // in the ACLE) then need to lower to MRC node (32 bit) or
4026 // MRRC node(64 bit), we can make the distinction based on the number of
4027 // operands we have.
4028 unsigned Opcode;
4029 SmallVector<EVT, 3> ResTypes;
4030 if (Ops.size() == 5){
4031 Opcode = IsThumb2 ? ARM::t2MRC : ARM::MRC;
4032 ResTypes.append({ MVT::i32, MVT::Other });
4033 } else {
4034 assert(Ops.size() == 3 &&
4035 "Invalid number of fields in special register string.");
4036 Opcode = IsThumb2 ? ARM::t2MRRC : ARM::MRRC;
4037 ResTypes.append({ MVT::i32, MVT::i32, MVT::Other });
4040 Ops.push_back(getAL(CurDAG, DL));
4041 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
4042 Ops.push_back(N->getOperand(0));
4043 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, ResTypes, Ops));
4044 return true;
4047 std::string SpecialReg = RegString->getString().lower();
4049 int BankedReg = getBankedRegisterMask(SpecialReg);
4050 if (BankedReg != -1) {
4051 Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32),
4052 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4053 N->getOperand(0) };
4054 ReplaceNode(
4055 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked,
4056 DL, MVT::i32, MVT::Other, Ops));
4057 return true;
4060 // The VFP registers are read by creating SelectionDAG nodes with opcodes
4061 // corresponding to the register that is being read from. So we switch on the
4062 // string to find which opcode we need to use.
4063 unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
4064 .Case("fpscr", ARM::VMRS)
4065 .Case("fpexc", ARM::VMRS_FPEXC)
4066 .Case("fpsid", ARM::VMRS_FPSID)
4067 .Case("mvfr0", ARM::VMRS_MVFR0)
4068 .Case("mvfr1", ARM::VMRS_MVFR1)
4069 .Case("mvfr2", ARM::VMRS_MVFR2)
4070 .Case("fpinst", ARM::VMRS_FPINST)
4071 .Case("fpinst2", ARM::VMRS_FPINST2)
4072 .Default(0);
4074 // If an opcode was found then we can lower the read to a VFP instruction.
4075 if (Opcode) {
4076 if (!Subtarget->hasVFP2Base())
4077 return false;
4078 if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8Base())
4079 return false;
4081 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4082 N->getOperand(0) };
4083 ReplaceNode(N,
4084 CurDAG->getMachineNode(Opcode, DL, MVT::i32, MVT::Other, Ops));
4085 return true;
4088 // If the target is M Class then need to validate that the register string
4089 // is an acceptable value, so check that a mask can be constructed from the
4090 // string.
4091 if (Subtarget->isMClass()) {
4092 int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget);
4093 if (SYSmValue == -1)
4094 return false;
4096 SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
4097 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4098 N->getOperand(0) };
4099 ReplaceNode(
4100 N, CurDAG->getMachineNode(ARM::t2MRS_M, DL, MVT::i32, MVT::Other, Ops));
4101 return true;
4104 // Here we know the target is not M Class so we need to check if it is one
4105 // of the remaining possible values which are apsr, cpsr or spsr.
4106 if (SpecialReg == "apsr" || SpecialReg == "cpsr") {
4107 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4108 N->getOperand(0) };
4109 ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRS_AR : ARM::MRS,
4110 DL, MVT::i32, MVT::Other, Ops));
4111 return true;
4114 if (SpecialReg == "spsr") {
4115 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4116 N->getOperand(0) };
4117 ReplaceNode(
4118 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys, DL,
4119 MVT::i32, MVT::Other, Ops));
4120 return true;
4123 return false;
4126 // Lower the write_register intrinsic to ARM specific DAG nodes
4127 // using the supplied metadata string to select the instruction node to use
4128 // and the registers/masks to use in the nodes
4129 bool ARMDAGToDAGISel::tryWriteRegister(SDNode *N){
4130 const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
4131 const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
4132 bool IsThumb2 = Subtarget->isThumb2();
4133 SDLoc DL(N);
4135 std::vector<SDValue> Ops;
4136 getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
4138 if (!Ops.empty()) {
4139 // If the special register string was constructed of fields (as defined
4140 // in the ACLE) then need to lower to MCR node (32 bit) or
4141 // MCRR node(64 bit), we can make the distinction based on the number of
4142 // operands we have.
4143 unsigned Opcode;
4144 if (Ops.size() == 5) {
4145 Opcode = IsThumb2 ? ARM::t2MCR : ARM::MCR;
4146 Ops.insert(Ops.begin()+2, N->getOperand(2));
4147 } else {
4148 assert(Ops.size() == 3 &&
4149 "Invalid number of fields in special register string.");
4150 Opcode = IsThumb2 ? ARM::t2MCRR : ARM::MCRR;
4151 SDValue WriteValue[] = { N->getOperand(2), N->getOperand(3) };
4152 Ops.insert(Ops.begin()+2, WriteValue, WriteValue+2);
4155 Ops.push_back(getAL(CurDAG, DL));
4156 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
4157 Ops.push_back(N->getOperand(0));
4159 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
4160 return true;
4163 std::string SpecialReg = RegString->getString().lower();
4164 int BankedReg = getBankedRegisterMask(SpecialReg);
4165 if (BankedReg != -1) {
4166 Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), N->getOperand(2),
4167 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4168 N->getOperand(0) };
4169 ReplaceNode(
4170 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked,
4171 DL, MVT::Other, Ops));
4172 return true;
4175 // The VFP registers are written to by creating SelectionDAG nodes with
4176 // opcodes corresponding to the register that is being written. So we switch
4177 // on the string to find which opcode we need to use.
4178 unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
4179 .Case("fpscr", ARM::VMSR)
4180 .Case("fpexc", ARM::VMSR_FPEXC)
4181 .Case("fpsid", ARM::VMSR_FPSID)
4182 .Case("fpinst", ARM::VMSR_FPINST)
4183 .Case("fpinst2", ARM::VMSR_FPINST2)
4184 .Default(0);
4186 if (Opcode) {
4187 if (!Subtarget->hasVFP2Base())
4188 return false;
4189 Ops = { N->getOperand(2), getAL(CurDAG, DL),
4190 CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
4191 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
4192 return true;
4195 std::pair<StringRef, StringRef> Fields;
4196 Fields = StringRef(SpecialReg).rsplit('_');
4197 std::string Reg = Fields.first.str();
4198 StringRef Flags = Fields.second;
4200 // If the target was M Class then need to validate the special register value
4201 // and retrieve the mask for use in the instruction node.
4202 if (Subtarget->isMClass()) {
4203 int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget);
4204 if (SYSmValue == -1)
4205 return false;
4207 SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
4208 N->getOperand(2), getAL(CurDAG, DL),
4209 CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
4210 ReplaceNode(N, CurDAG->getMachineNode(ARM::t2MSR_M, DL, MVT::Other, Ops));
4211 return true;
4214 // We then check to see if a valid mask can be constructed for one of the
4215 // register string values permitted for the A and R class cores. These values
4216 // are apsr, spsr and cpsr; these are also valid on older cores.
4217 int Mask = getARClassRegisterMask(Reg, Flags);
4218 if (Mask != -1) {
4219 Ops = { CurDAG->getTargetConstant(Mask, DL, MVT::i32), N->getOperand(2),
4220 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4221 N->getOperand(0) };
4222 ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSR_AR : ARM::MSR,
4223 DL, MVT::Other, Ops));
4224 return true;
4227 return false;
4230 bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){
4231 std::vector<SDValue> AsmNodeOperands;
4232 unsigned Flag, Kind;
4233 bool Changed = false;
4234 unsigned NumOps = N->getNumOperands();
4236 // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint.
4237 // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require
4238 // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs
4239 // respectively. Since there is no constraint to explicitly specify a
4240 // reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb,
4241 // the 64-bit data may be referred by H, Q, R modifiers, so we still pack
4242 // them into a GPRPair.
4244 SDLoc dl(N);
4245 SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps-1)
4246 : SDValue(nullptr,0);
4248 SmallVector<bool, 8> OpChanged;
4249 // Glue node will be appended late.
4250 for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) {
4251 SDValue op = N->getOperand(i);
4252 AsmNodeOperands.push_back(op);
4254 if (i < InlineAsm::Op_FirstOperand)
4255 continue;
4257 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(i))) {
4258 Flag = C->getZExtValue();
4259 Kind = InlineAsm::getKind(Flag);
4261 else
4262 continue;
4264 // Immediate operands to inline asm in the SelectionDAG are modeled with
4265 // two operands. The first is a constant of value InlineAsm::Kind_Imm, and
4266 // the second is a constant with the value of the immediate. If we get here
4267 // and we have a Kind_Imm, skip the next operand, and continue.
4268 if (Kind == InlineAsm::Kind_Imm) {
4269 SDValue op = N->getOperand(++i);
4270 AsmNodeOperands.push_back(op);
4271 continue;
4274 unsigned NumRegs = InlineAsm::getNumOperandRegisters(Flag);
4275 if (NumRegs)
4276 OpChanged.push_back(false);
4278 unsigned DefIdx = 0;
4279 bool IsTiedToChangedOp = false;
4280 // If it's a use that is tied with a previous def, it has no
4281 // reg class constraint.
4282 if (Changed && InlineAsm::isUseOperandTiedToDef(Flag, DefIdx))
4283 IsTiedToChangedOp = OpChanged[DefIdx];
4285 // Memory operands to inline asm in the SelectionDAG are modeled with two
4286 // operands: a constant of value InlineAsm::Kind_Mem followed by the input
4287 // operand. If we get here and we have a Kind_Mem, skip the next operand (so
4288 // it doesn't get misinterpreted), and continue. We do this here because
4289 // it's important to update the OpChanged array correctly before moving on.
4290 if (Kind == InlineAsm::Kind_Mem) {
4291 SDValue op = N->getOperand(++i);
4292 AsmNodeOperands.push_back(op);
4293 continue;
4296 if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef
4297 && Kind != InlineAsm::Kind_RegDefEarlyClobber)
4298 continue;
4300 unsigned RC;
4301 bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC);
4302 if ((!IsTiedToChangedOp && (!HasRC || RC != ARM::GPRRegClassID))
4303 || NumRegs != 2)
4304 continue;
4306 assert((i+2 < NumOps) && "Invalid number of operands in inline asm");
4307 SDValue V0 = N->getOperand(i+1);
4308 SDValue V1 = N->getOperand(i+2);
4309 unsigned Reg0 = cast<RegisterSDNode>(V0)->getReg();
4310 unsigned Reg1 = cast<RegisterSDNode>(V1)->getReg();
4311 SDValue PairedReg;
4312 MachineRegisterInfo &MRI = MF->getRegInfo();
4314 if (Kind == InlineAsm::Kind_RegDef ||
4315 Kind == InlineAsm::Kind_RegDefEarlyClobber) {
4316 // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to
4317 // the original GPRs.
4319 unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
4320 PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
4321 SDValue Chain = SDValue(N,0);
4323 SDNode *GU = N->getGluedUser();
4324 SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped,
4325 Chain.getValue(1));
4327 // Extract values from a GPRPair reg and copy to the original GPR reg.
4328 SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32,
4329 RegCopy);
4330 SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,
4331 RegCopy);
4332 SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0,
4333 RegCopy.getValue(1));
4334 SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1));
4336 // Update the original glue user.
4337 std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1);
4338 Ops.push_back(T1.getValue(1));
4339 CurDAG->UpdateNodeOperands(GU, Ops);
4341 else {
4342 // For Kind == InlineAsm::Kind_RegUse, we first copy two GPRs into a
4343 // GPRPair and then pass the GPRPair to the inline asm.
4344 SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain];
4346 // As REG_SEQ doesn't take RegisterSDNode, we copy them first.
4347 SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32,
4348 Chain.getValue(1));
4349 SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32,
4350 T0.getValue(1));
4351 SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), 0);
4353 // Copy REG_SEQ into a GPRPair-typed VR and replace the original two
4354 // i32 VRs of inline asm with it.
4355 unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
4356 PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
4357 Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1));
4359 AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
4360 Glue = Chain.getValue(1);
4363 Changed = true;
4365 if(PairedReg.getNode()) {
4366 OpChanged[OpChanged.size() -1 ] = true;
4367 Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/);
4368 if (IsTiedToChangedOp)
4369 Flag = InlineAsm::getFlagWordForMatchingOp(Flag, DefIdx);
4370 else
4371 Flag = InlineAsm::getFlagWordForRegClass(Flag, ARM::GPRPairRegClassID);
4372 // Replace the current flag.
4373 AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant(
4374 Flag, dl, MVT::i32);
4375 // Add the new register node and skip the original two GPRs.
4376 AsmNodeOperands.push_back(PairedReg);
4377 // Skip the next two GPRs.
4378 i += 2;
4382 if (Glue.getNode())
4383 AsmNodeOperands.push_back(Glue);
4384 if (!Changed)
4385 return false;
4387 SDValue New = CurDAG->getNode(N->getOpcode(), SDLoc(N),
4388 CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands);
4389 New->setNodeId(-1);
4390 ReplaceNode(N, New.getNode());
4391 return true;
4395 bool ARMDAGToDAGISel::
4396 SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
4397 std::vector<SDValue> &OutOps) {
4398 switch(ConstraintID) {
4399 default:
4400 llvm_unreachable("Unexpected asm memory constraint");
4401 case InlineAsm::Constraint_i:
4402 // FIXME: It seems strange that 'i' is needed here since it's supposed to
4403 // be an immediate and not a memory constraint.
4404 LLVM_FALLTHROUGH;
4405 case InlineAsm::Constraint_m:
4406 case InlineAsm::Constraint_o:
4407 case InlineAsm::Constraint_Q:
4408 case InlineAsm::Constraint_Um:
4409 case InlineAsm::Constraint_Un:
4410 case InlineAsm::Constraint_Uq:
4411 case InlineAsm::Constraint_Us:
4412 case InlineAsm::Constraint_Ut:
4413 case InlineAsm::Constraint_Uv:
4414 case InlineAsm::Constraint_Uy:
4415 // Require the address to be in a register. That is safe for all ARM
4416 // variants and it is hard to do anything much smarter without knowing
4417 // how the operand is used.
4418 OutOps.push_back(Op);
4419 return false;
4421 return true;
4424 /// createARMISelDag - This pass converts a legalized DAG into a
4425 /// ARM-specific DAG, ready for instruction scheduling.
4427 FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM,
4428 CodeGenOpt::Level OptLevel) {
4429 return new ARMDAGToDAGISel(TM, OptLevel);