Revert r354244 "[DAGCombiner] Eliminate dead stores to stack."
[llvm-complete.git] / lib / Target / ARM / ARMISelDAGToDAG.cpp
blobf765334577db7b9f961dd69a238095f357f0499b
1 //===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines an instruction selector for the ARM target.
11 //===----------------------------------------------------------------------===//
13 #include "ARM.h"
14 #include "ARMBaseInstrInfo.h"
15 #include "ARMTargetMachine.h"
16 #include "MCTargetDesc/ARMAddressingModes.h"
17 #include "Utils/ARMBaseInfo.h"
18 #include "llvm/ADT/StringSwitch.h"
19 #include "llvm/CodeGen/MachineFrameInfo.h"
20 #include "llvm/CodeGen/MachineFunction.h"
21 #include "llvm/CodeGen/MachineInstrBuilder.h"
22 #include "llvm/CodeGen/MachineRegisterInfo.h"
23 #include "llvm/CodeGen/SelectionDAG.h"
24 #include "llvm/CodeGen/SelectionDAGISel.h"
25 #include "llvm/CodeGen/TargetLowering.h"
26 #include "llvm/IR/CallingConv.h"
27 #include "llvm/IR/Constants.h"
28 #include "llvm/IR/DerivedTypes.h"
29 #include "llvm/IR/Function.h"
30 #include "llvm/IR/Intrinsics.h"
31 #include "llvm/IR/LLVMContext.h"
32 #include "llvm/Support/CommandLine.h"
33 #include "llvm/Support/Debug.h"
34 #include "llvm/Support/ErrorHandling.h"
35 #include "llvm/Target/TargetOptions.h"
37 using namespace llvm;
39 #define DEBUG_TYPE "arm-isel"
41 static cl::opt<bool>
42 DisableShifterOp("disable-shifter-op", cl::Hidden,
43 cl::desc("Disable isel of shifter-op"),
44 cl::init(false));
46 //===--------------------------------------------------------------------===//
47 /// ARMDAGToDAGISel - ARM specific code to select ARM machine
48 /// instructions for SelectionDAG operations.
49 ///
50 namespace {
52 class ARMDAGToDAGISel : public SelectionDAGISel {
53 /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
54 /// make the right decision when generating code for different targets.
55 const ARMSubtarget *Subtarget;
57 public:
58 explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOpt::Level OptLevel)
59 : SelectionDAGISel(tm, OptLevel) {}
61 bool runOnMachineFunction(MachineFunction &MF) override {
62 // Reset the subtarget each time through.
63 Subtarget = &MF.getSubtarget<ARMSubtarget>();
64 SelectionDAGISel::runOnMachineFunction(MF);
65 return true;
68 StringRef getPassName() const override { return "ARM Instruction Selection"; }
70 void PreprocessISelDAG() override;
72 /// getI32Imm - Return a target constant of type i32 with the specified
73 /// value.
74 inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
75 return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
78 void Select(SDNode *N) override;
80 bool hasNoVMLxHazardUse(SDNode *N) const;
81 bool isShifterOpProfitable(const SDValue &Shift,
82 ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt);
83 bool SelectRegShifterOperand(SDValue N, SDValue &A,
84 SDValue &B, SDValue &C,
85 bool CheckProfitability = true);
86 bool SelectImmShifterOperand(SDValue N, SDValue &A,
87 SDValue &B, bool CheckProfitability = true);
88 bool SelectShiftRegShifterOperand(SDValue N, SDValue &A,
89 SDValue &B, SDValue &C) {
90 // Don't apply the profitability check
91 return SelectRegShifterOperand(N, A, B, C, false);
93 bool SelectShiftImmShifterOperand(SDValue N, SDValue &A,
94 SDValue &B) {
95 // Don't apply the profitability check
96 return SelectImmShifterOperand(N, A, B, false);
99 bool SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out);
101 bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
102 bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);
104 bool SelectCMOVPred(SDValue N, SDValue &Pred, SDValue &Reg) {
105 const ConstantSDNode *CN = cast<ConstantSDNode>(N);
106 Pred = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(N), MVT::i32);
107 Reg = CurDAG->getRegister(ARM::CPSR, MVT::i32);
108 return true;
111 bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
112 SDValue &Offset, SDValue &Opc);
113 bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
114 SDValue &Offset, SDValue &Opc);
115 bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
116 SDValue &Offset, SDValue &Opc);
117 bool SelectAddrOffsetNone(SDValue N, SDValue &Base);
118 bool SelectAddrMode3(SDValue N, SDValue &Base,
119 SDValue &Offset, SDValue &Opc);
120 bool SelectAddrMode3Offset(SDNode *Op, SDValue N,
121 SDValue &Offset, SDValue &Opc);
122 bool IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset,
123 int Lwb, int Upb, bool FP16);
124 bool SelectAddrMode5(SDValue N, SDValue &Base, SDValue &Offset);
125 bool SelectAddrMode5FP16(SDValue N, SDValue &Base, SDValue &Offset);
126 bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align);
127 bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset);
129 bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label);
131 // Thumb Addressing Modes:
132 bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset);
133 bool SelectThumbAddrModeRRSext(SDValue N, SDValue &Base, SDValue &Offset);
134 bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base,
135 SDValue &OffImm);
136 bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
137 SDValue &OffImm);
138 bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
139 SDValue &OffImm);
140 bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
141 SDValue &OffImm);
142 bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm);
144 // Thumb 2 Addressing Modes:
145 bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
146 bool SelectT2AddrModeImm8(SDValue N, SDValue &Base,
147 SDValue &OffImm);
148 bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
149 SDValue &OffImm);
150 bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base,
151 SDValue &OffReg, SDValue &ShImm);
152 bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm);
154 inline bool is_so_imm(unsigned Imm) const {
155 return ARM_AM::getSOImmVal(Imm) != -1;
158 inline bool is_so_imm_not(unsigned Imm) const {
159 return ARM_AM::getSOImmVal(~Imm) != -1;
162 inline bool is_t2_so_imm(unsigned Imm) const {
163 return ARM_AM::getT2SOImmVal(Imm) != -1;
166 inline bool is_t2_so_imm_not(unsigned Imm) const {
167 return ARM_AM::getT2SOImmVal(~Imm) != -1;
170 // Include the pieces autogenerated from the target description.
171 #include "ARMGenDAGISel.inc"
173 private:
174 void transferMemOperands(SDNode *Src, SDNode *Dst);
176 /// Indexed (pre/post inc/dec) load matching code for ARM.
177 bool tryARMIndexedLoad(SDNode *N);
178 bool tryT1IndexedLoad(SDNode *N);
179 bool tryT2IndexedLoad(SDNode *N);
181 /// SelectVLD - Select NEON load intrinsics. NumVecs should be
182 /// 1, 2, 3 or 4. The opcode arrays specify the instructions used for
183 /// loads of D registers and even subregs and odd subregs of Q registers.
184 /// For NumVecs <= 2, QOpcodes1 is not used.
185 void SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
186 const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
187 const uint16_t *QOpcodes1);
189 /// SelectVST - Select NEON store intrinsics. NumVecs should
190 /// be 1, 2, 3 or 4. The opcode arrays specify the instructions used for
191 /// stores of D registers and even subregs and odd subregs of Q registers.
192 /// For NumVecs <= 2, QOpcodes1 is not used.
193 void SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
194 const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
195 const uint16_t *QOpcodes1);
197 /// SelectVLDSTLane - Select NEON load/store lane intrinsics. NumVecs should
198 /// be 2, 3 or 4. The opcode arrays specify the instructions used for
199 /// load/store of D registers and Q registers.
200 void SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
201 unsigned NumVecs, const uint16_t *DOpcodes,
202 const uint16_t *QOpcodes);
204 /// SelectVLDDup - Select NEON load-duplicate intrinsics. NumVecs
205 /// should be 1, 2, 3 or 4. The opcode array specifies the instructions used
206 /// for loading D registers.
207 void SelectVLDDup(SDNode *N, bool IsIntrinsic, bool isUpdating,
208 unsigned NumVecs, const uint16_t *DOpcodes,
209 const uint16_t *QOpcodes0 = nullptr,
210 const uint16_t *QOpcodes1 = nullptr);
212 /// Try to select SBFX/UBFX instructions for ARM.
213 bool tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned);
215 // Select special operations if node forms integer ABS pattern
216 bool tryABSOp(SDNode *N);
218 bool tryReadRegister(SDNode *N);
219 bool tryWriteRegister(SDNode *N);
221 bool tryInlineAsm(SDNode *N);
223 void SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI);
225 void SelectCMP_SWAP(SDNode *N);
227 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
228 /// inline asm expressions.
229 bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
230 std::vector<SDValue> &OutOps) override;
232 // Form pairs of consecutive R, S, D, or Q registers.
233 SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1);
234 SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1);
235 SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1);
236 SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1);
238 // Form sequences of 4 consecutive S, D, or Q registers.
239 SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
240 SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
241 SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
243 // Get the alignment operand for a NEON VLD or VST instruction.
244 SDValue GetVLDSTAlign(SDValue Align, const SDLoc &dl, unsigned NumVecs,
245 bool is64BitVector);
247 /// Returns the number of instructions required to materialize the given
248 /// constant in a register, or 3 if a literal pool load is needed.
249 unsigned ConstantMaterializationCost(unsigned Val) const;
251 /// Checks if N is a multiplication by a constant where we can extract out a
252 /// power of two from the constant so that it can be used in a shift, but only
253 /// if it simplifies the materialization of the constant. Returns true if it
254 /// is, and assigns to PowerOfTwo the power of two that should be extracted
255 /// out and to NewMulConst the new constant to be multiplied by.
256 bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift,
257 unsigned &PowerOfTwo, SDValue &NewMulConst) const;
259 /// Replace N with M in CurDAG, in a way that also ensures that M gets
260 /// selected when N would have been selected.
261 void replaceDAGValue(const SDValue &N, SDValue M);
265 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant
266 /// operand. If so Imm will receive the 32-bit value.
267 static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
268 if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
269 Imm = cast<ConstantSDNode>(N)->getZExtValue();
270 return true;
272 return false;
275 // isInt32Immediate - This method tests to see if a constant operand.
276 // If so Imm will receive the 32 bit value.
277 static bool isInt32Immediate(SDValue N, unsigned &Imm) {
278 return isInt32Immediate(N.getNode(), Imm);
281 // isOpcWithIntImmediate - This method tests to see if the node is a specific
282 // opcode and that it has a immediate integer right operand.
283 // If so Imm will receive the 32 bit value.
284 static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
285 return N->getOpcode() == Opc &&
286 isInt32Immediate(N->getOperand(1).getNode(), Imm);
289 /// Check whether a particular node is a constant value representable as
290 /// (N * Scale) where (N in [\p RangeMin, \p RangeMax).
292 /// \param ScaledConstant [out] - On success, the pre-scaled constant value.
293 static bool isScaledConstantInRange(SDValue Node, int Scale,
294 int RangeMin, int RangeMax,
295 int &ScaledConstant) {
296 assert(Scale > 0 && "Invalid scale!");
298 // Check that this is a constant.
299 const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node);
300 if (!C)
301 return false;
303 ScaledConstant = (int) C->getZExtValue();
304 if ((ScaledConstant % Scale) != 0)
305 return false;
307 ScaledConstant /= Scale;
308 return ScaledConstant >= RangeMin && ScaledConstant < RangeMax;
311 void ARMDAGToDAGISel::PreprocessISelDAG() {
312 if (!Subtarget->hasV6T2Ops())
313 return;
315 bool isThumb2 = Subtarget->isThumb();
316 for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
317 E = CurDAG->allnodes_end(); I != E; ) {
318 SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues.
320 if (N->getOpcode() != ISD::ADD)
321 continue;
323 // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with
324 // leading zeros, followed by consecutive set bits, followed by 1 or 2
325 // trailing zeros, e.g. 1020.
326 // Transform the expression to
327 // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number
328 // of trailing zeros of c2. The left shift would be folded as an shifter
329 // operand of 'add' and the 'and' and 'srl' would become a bits extraction
330 // node (UBFX).
332 SDValue N0 = N->getOperand(0);
333 SDValue N1 = N->getOperand(1);
334 unsigned And_imm = 0;
335 if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) {
336 if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm))
337 std::swap(N0, N1);
339 if (!And_imm)
340 continue;
342 // Check if the AND mask is an immediate of the form: 000.....1111111100
343 unsigned TZ = countTrailingZeros(And_imm);
344 if (TZ != 1 && TZ != 2)
345 // Be conservative here. Shifter operands aren't always free. e.g. On
346 // Swift, left shifter operand of 1 / 2 for free but others are not.
347 // e.g.
348 // ubfx r3, r1, #16, #8
349 // ldr.w r3, [r0, r3, lsl #2]
350 // vs.
351 // mov.w r9, #1020
352 // and.w r2, r9, r1, lsr #14
353 // ldr r2, [r0, r2]
354 continue;
355 And_imm >>= TZ;
356 if (And_imm & (And_imm + 1))
357 continue;
359 // Look for (and (srl X, c1), c2).
360 SDValue Srl = N1.getOperand(0);
361 unsigned Srl_imm = 0;
362 if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) ||
363 (Srl_imm <= 2))
364 continue;
366 // Make sure first operand is not a shifter operand which would prevent
367 // folding of the left shift.
368 SDValue CPTmp0;
369 SDValue CPTmp1;
370 SDValue CPTmp2;
371 if (isThumb2) {
372 if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1))
373 continue;
374 } else {
375 if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) ||
376 SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2))
377 continue;
380 // Now make the transformation.
381 Srl = CurDAG->getNode(ISD::SRL, SDLoc(Srl), MVT::i32,
382 Srl.getOperand(0),
383 CurDAG->getConstant(Srl_imm + TZ, SDLoc(Srl),
384 MVT::i32));
385 N1 = CurDAG->getNode(ISD::AND, SDLoc(N1), MVT::i32,
386 Srl,
387 CurDAG->getConstant(And_imm, SDLoc(Srl), MVT::i32));
388 N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32,
389 N1, CurDAG->getConstant(TZ, SDLoc(Srl), MVT::i32));
390 CurDAG->UpdateNodeOperands(N, N0, N1);
394 /// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS
395 /// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at
396 /// least on current ARM implementations) which should be avoidded.
397 bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const {
398 if (OptLevel == CodeGenOpt::None)
399 return true;
401 if (!Subtarget->hasVMLxHazards())
402 return true;
404 if (!N->hasOneUse())
405 return false;
407 SDNode *Use = *N->use_begin();
408 if (Use->getOpcode() == ISD::CopyToReg)
409 return true;
410 if (Use->isMachineOpcode()) {
411 const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>(
412 CurDAG->getSubtarget().getInstrInfo());
414 const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode());
415 if (MCID.mayStore())
416 return true;
417 unsigned Opcode = MCID.getOpcode();
418 if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
419 return true;
420 // vmlx feeding into another vmlx. We actually want to unfold
421 // the use later in the MLxExpansion pass. e.g.
422 // vmla
423 // vmla (stall 8 cycles)
425 // vmul (5 cycles)
426 // vadd (5 cycles)
427 // vmla
428 // This adds up to about 18 - 19 cycles.
430 // vmla
431 // vmul (stall 4 cycles)
432 // vadd adds up to about 14 cycles.
433 return TII->isFpMLxInstruction(Opcode);
436 return false;
439 bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
440 ARM_AM::ShiftOpc ShOpcVal,
441 unsigned ShAmt) {
442 if (!Subtarget->isLikeA9() && !Subtarget->isSwift())
443 return true;
444 if (Shift.hasOneUse())
445 return true;
446 // R << 2 is free.
447 return ShOpcVal == ARM_AM::lsl &&
448 (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1));
451 unsigned ARMDAGToDAGISel::ConstantMaterializationCost(unsigned Val) const {
452 if (Subtarget->isThumb()) {
453 if (Val <= 255) return 1; // MOV
454 if (Subtarget->hasV6T2Ops() &&
455 (Val <= 0xffff || // MOV
456 ARM_AM::getT2SOImmVal(Val) != -1 || // MOVW
457 ARM_AM::getT2SOImmVal(~Val) != -1)) // MVN
458 return 1;
459 if (Val <= 510) return 2; // MOV + ADDi8
460 if (~Val <= 255) return 2; // MOV + MVN
461 if (ARM_AM::isThumbImmShiftedVal(Val)) return 2; // MOV + LSL
462 } else {
463 if (ARM_AM::getSOImmVal(Val) != -1) return 1; // MOV
464 if (ARM_AM::getSOImmVal(~Val) != -1) return 1; // MVN
465 if (Subtarget->hasV6T2Ops() && Val <= 0xffff) return 1; // MOVW
466 if (ARM_AM::isSOImmTwoPartVal(Val)) return 2; // two instrs
468 if (Subtarget->useMovt()) return 2; // MOVW + MOVT
469 return 3; // Literal pool load
472 bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N,
473 unsigned MaxShift,
474 unsigned &PowerOfTwo,
475 SDValue &NewMulConst) const {
476 assert(N.getOpcode() == ISD::MUL);
477 assert(MaxShift > 0);
479 // If the multiply is used in more than one place then changing the constant
480 // will make other uses incorrect, so don't.
481 if (!N.hasOneUse()) return false;
482 // Check if the multiply is by a constant
483 ConstantSDNode *MulConst = dyn_cast<ConstantSDNode>(N.getOperand(1));
484 if (!MulConst) return false;
485 // If the constant is used in more than one place then modifying it will mean
486 // we need to materialize two constants instead of one, which is a bad idea.
487 if (!MulConst->hasOneUse()) return false;
488 unsigned MulConstVal = MulConst->getZExtValue();
489 if (MulConstVal == 0) return false;
491 // Find the largest power of 2 that MulConstVal is a multiple of
492 PowerOfTwo = MaxShift;
493 while ((MulConstVal % (1 << PowerOfTwo)) != 0) {
494 --PowerOfTwo;
495 if (PowerOfTwo == 0) return false;
498 // Only optimise if the new cost is better
499 unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo);
500 NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc(N), MVT::i32);
501 unsigned OldCost = ConstantMaterializationCost(MulConstVal);
502 unsigned NewCost = ConstantMaterializationCost(NewMulConstVal);
503 return NewCost < OldCost;
506 void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) {
507 CurDAG->RepositionNode(N.getNode()->getIterator(), M.getNode());
508 ReplaceUses(N, M);
511 bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
512 SDValue &BaseReg,
513 SDValue &Opc,
514 bool CheckProfitability) {
515 if (DisableShifterOp)
516 return false;
518 // If N is a multiply-by-constant and it's profitable to extract a shift and
519 // use it in a shifted operand do so.
520 if (N.getOpcode() == ISD::MUL) {
521 unsigned PowerOfTwo = 0;
522 SDValue NewMulConst;
523 if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) {
524 HandleSDNode Handle(N);
525 SDLoc Loc(N);
526 replaceDAGValue(N.getOperand(1), NewMulConst);
527 BaseReg = Handle.getValue();
528 Opc = CurDAG->getTargetConstant(
529 ARM_AM::getSORegOpc(ARM_AM::lsl, PowerOfTwo), Loc, MVT::i32);
530 return true;
534 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
536 // Don't match base register only case. That is matched to a separate
537 // lower complexity pattern with explicit register operand.
538 if (ShOpcVal == ARM_AM::no_shift) return false;
540 BaseReg = N.getOperand(0);
541 unsigned ShImmVal = 0;
542 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
543 if (!RHS) return false;
544 ShImmVal = RHS->getZExtValue() & 31;
545 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
546 SDLoc(N), MVT::i32);
547 return true;
550 bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N,
551 SDValue &BaseReg,
552 SDValue &ShReg,
553 SDValue &Opc,
554 bool CheckProfitability) {
555 if (DisableShifterOp)
556 return false;
558 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
560 // Don't match base register only case. That is matched to a separate
561 // lower complexity pattern with explicit register operand.
562 if (ShOpcVal == ARM_AM::no_shift) return false;
564 BaseReg = N.getOperand(0);
565 unsigned ShImmVal = 0;
566 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
567 if (RHS) return false;
569 ShReg = N.getOperand(1);
570 if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal))
571 return false;
572 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
573 SDLoc(N), MVT::i32);
574 return true;
577 // Determine whether an ISD::OR's operands are suitable to turn the operation
578 // into an addition, which often has more compact encodings.
579 bool ARMDAGToDAGISel::SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out) {
580 assert(Parent->getOpcode() == ISD::OR && "unexpected parent");
581 Out = N;
582 return CurDAG->haveNoCommonBitsSet(N, Parent->getOperand(1));
586 bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
587 SDValue &Base,
588 SDValue &OffImm) {
589 // Match simple R + imm12 operands.
591 // Base only.
592 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
593 !CurDAG->isBaseWithConstantOffset(N)) {
594 if (N.getOpcode() == ISD::FrameIndex) {
595 // Match frame index.
596 int FI = cast<FrameIndexSDNode>(N)->getIndex();
597 Base = CurDAG->getTargetFrameIndex(
598 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
599 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
600 return true;
603 if (N.getOpcode() == ARMISD::Wrapper &&
604 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
605 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
606 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
607 Base = N.getOperand(0);
608 } else
609 Base = N;
610 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
611 return true;
614 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
615 int RHSC = (int)RHS->getSExtValue();
616 if (N.getOpcode() == ISD::SUB)
617 RHSC = -RHSC;
619 if (RHSC > -0x1000 && RHSC < 0x1000) { // 12 bits
620 Base = N.getOperand(0);
621 if (Base.getOpcode() == ISD::FrameIndex) {
622 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
623 Base = CurDAG->getTargetFrameIndex(
624 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
626 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
627 return true;
631 // Base only.
632 Base = N;
633 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
634 return true;
639 bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
640 SDValue &Opc) {
641 if (N.getOpcode() == ISD::MUL &&
642 ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) {
643 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
644 // X * [3,5,9] -> X + X * [2,4,8] etc.
645 int RHSC = (int)RHS->getZExtValue();
646 if (RHSC & 1) {
647 RHSC = RHSC & ~1;
648 ARM_AM::AddrOpc AddSub = ARM_AM::add;
649 if (RHSC < 0) {
650 AddSub = ARM_AM::sub;
651 RHSC = - RHSC;
653 if (isPowerOf2_32(RHSC)) {
654 unsigned ShAmt = Log2_32(RHSC);
655 Base = Offset = N.getOperand(0);
656 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
657 ARM_AM::lsl),
658 SDLoc(N), MVT::i32);
659 return true;
665 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
666 // ISD::OR that is equivalent to an ISD::ADD.
667 !CurDAG->isBaseWithConstantOffset(N))
668 return false;
670 // Leave simple R +/- imm12 operands for LDRi12
671 if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
672 int RHSC;
673 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
674 -0x1000+1, 0x1000, RHSC)) // 12 bits.
675 return false;
678 // Otherwise this is R +/- [possibly shifted] R.
679 ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add;
680 ARM_AM::ShiftOpc ShOpcVal =
681 ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
682 unsigned ShAmt = 0;
684 Base = N.getOperand(0);
685 Offset = N.getOperand(1);
687 if (ShOpcVal != ARM_AM::no_shift) {
688 // Check to see if the RHS of the shift is a constant, if not, we can't fold
689 // it.
690 if (ConstantSDNode *Sh =
691 dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
692 ShAmt = Sh->getZExtValue();
693 if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
694 Offset = N.getOperand(1).getOperand(0);
695 else {
696 ShAmt = 0;
697 ShOpcVal = ARM_AM::no_shift;
699 } else {
700 ShOpcVal = ARM_AM::no_shift;
704 // Try matching (R shl C) + (R).
705 if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
706 !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
707 N.getOperand(0).hasOneUse())) {
708 ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
709 if (ShOpcVal != ARM_AM::no_shift) {
710 // Check to see if the RHS of the shift is a constant, if not, we can't
711 // fold it.
712 if (ConstantSDNode *Sh =
713 dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
714 ShAmt = Sh->getZExtValue();
715 if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
716 Offset = N.getOperand(0).getOperand(0);
717 Base = N.getOperand(1);
718 } else {
719 ShAmt = 0;
720 ShOpcVal = ARM_AM::no_shift;
722 } else {
723 ShOpcVal = ARM_AM::no_shift;
728 // If Offset is a multiply-by-constant and it's profitable to extract a shift
729 // and use it in a shifted operand do so.
730 if (Offset.getOpcode() == ISD::MUL && N.hasOneUse()) {
731 unsigned PowerOfTwo = 0;
732 SDValue NewMulConst;
733 if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) {
734 HandleSDNode Handle(Offset);
735 replaceDAGValue(Offset.getOperand(1), NewMulConst);
736 Offset = Handle.getValue();
737 ShAmt = PowerOfTwo;
738 ShOpcVal = ARM_AM::lsl;
742 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
743 SDLoc(N), MVT::i32);
744 return true;
747 bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
748 SDValue &Offset, SDValue &Opc) {
749 unsigned Opcode = Op->getOpcode();
750 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
751 ? cast<LoadSDNode>(Op)->getAddressingMode()
752 : cast<StoreSDNode>(Op)->getAddressingMode();
753 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
754 ? ARM_AM::add : ARM_AM::sub;
755 int Val;
756 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val))
757 return false;
759 Offset = N;
760 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
761 unsigned ShAmt = 0;
762 if (ShOpcVal != ARM_AM::no_shift) {
763 // Check to see if the RHS of the shift is a constant, if not, we can't fold
764 // it.
765 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
766 ShAmt = Sh->getZExtValue();
767 if (isShifterOpProfitable(N, ShOpcVal, ShAmt))
768 Offset = N.getOperand(0);
769 else {
770 ShAmt = 0;
771 ShOpcVal = ARM_AM::no_shift;
773 } else {
774 ShOpcVal = ARM_AM::no_shift;
778 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
779 SDLoc(N), MVT::i32);
780 return true;
783 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
784 SDValue &Offset, SDValue &Opc) {
785 unsigned Opcode = Op->getOpcode();
786 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
787 ? cast<LoadSDNode>(Op)->getAddressingMode()
788 : cast<StoreSDNode>(Op)->getAddressingMode();
789 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
790 ? ARM_AM::add : ARM_AM::sub;
791 int Val;
792 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
793 if (AddSub == ARM_AM::sub) Val *= -1;
794 Offset = CurDAG->getRegister(0, MVT::i32);
795 Opc = CurDAG->getTargetConstant(Val, SDLoc(Op), MVT::i32);
796 return true;
799 return false;
803 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
804 SDValue &Offset, SDValue &Opc) {
805 unsigned Opcode = Op->getOpcode();
806 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
807 ? cast<LoadSDNode>(Op)->getAddressingMode()
808 : cast<StoreSDNode>(Op)->getAddressingMode();
809 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
810 ? ARM_AM::add : ARM_AM::sub;
811 int Val;
812 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
813 Offset = CurDAG->getRegister(0, MVT::i32);
814 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val,
815 ARM_AM::no_shift),
816 SDLoc(Op), MVT::i32);
817 return true;
820 return false;
823 bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) {
824 Base = N;
825 return true;
828 bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
829 SDValue &Base, SDValue &Offset,
830 SDValue &Opc) {
831 if (N.getOpcode() == ISD::SUB) {
832 // X - C is canonicalize to X + -C, no need to handle it here.
833 Base = N.getOperand(0);
834 Offset = N.getOperand(1);
835 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0), SDLoc(N),
836 MVT::i32);
837 return true;
840 if (!CurDAG->isBaseWithConstantOffset(N)) {
841 Base = N;
842 if (N.getOpcode() == ISD::FrameIndex) {
843 int FI = cast<FrameIndexSDNode>(N)->getIndex();
844 Base = CurDAG->getTargetFrameIndex(
845 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
847 Offset = CurDAG->getRegister(0, MVT::i32);
848 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
849 MVT::i32);
850 return true;
853 // If the RHS is +/- imm8, fold into addr mode.
854 int RHSC;
855 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
856 -256 + 1, 256, RHSC)) { // 8 bits.
857 Base = N.getOperand(0);
858 if (Base.getOpcode() == ISD::FrameIndex) {
859 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
860 Base = CurDAG->getTargetFrameIndex(
861 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
863 Offset = CurDAG->getRegister(0, MVT::i32);
865 ARM_AM::AddrOpc AddSub = ARM_AM::add;
866 if (RHSC < 0) {
867 AddSub = ARM_AM::sub;
868 RHSC = -RHSC;
870 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC), SDLoc(N),
871 MVT::i32);
872 return true;
875 Base = N.getOperand(0);
876 Offset = N.getOperand(1);
877 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
878 MVT::i32);
879 return true;
882 bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N,
883 SDValue &Offset, SDValue &Opc) {
884 unsigned Opcode = Op->getOpcode();
885 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
886 ? cast<LoadSDNode>(Op)->getAddressingMode()
887 : cast<StoreSDNode>(Op)->getAddressingMode();
888 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
889 ? ARM_AM::add : ARM_AM::sub;
890 int Val;
891 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits.
892 Offset = CurDAG->getRegister(0, MVT::i32);
893 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), SDLoc(Op),
894 MVT::i32);
895 return true;
898 Offset = N;
899 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), SDLoc(Op),
900 MVT::i32);
901 return true;
904 bool ARMDAGToDAGISel::IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset,
905 int Lwb, int Upb, bool FP16) {
906 if (!CurDAG->isBaseWithConstantOffset(N)) {
907 Base = N;
908 if (N.getOpcode() == ISD::FrameIndex) {
909 int FI = cast<FrameIndexSDNode>(N)->getIndex();
910 Base = CurDAG->getTargetFrameIndex(
911 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
912 } else if (N.getOpcode() == ARMISD::Wrapper &&
913 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
914 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
915 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
916 Base = N.getOperand(0);
918 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
919 SDLoc(N), MVT::i32);
920 return true;
923 // If the RHS is +/- imm8, fold into addr mode.
924 int RHSC;
925 const int Scale = FP16 ? 2 : 4;
927 if (isScaledConstantInRange(N.getOperand(1), Scale, Lwb, Upb, RHSC)) {
928 Base = N.getOperand(0);
929 if (Base.getOpcode() == ISD::FrameIndex) {
930 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
931 Base = CurDAG->getTargetFrameIndex(
932 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
935 ARM_AM::AddrOpc AddSub = ARM_AM::add;
936 if (RHSC < 0) {
937 AddSub = ARM_AM::sub;
938 RHSC = -RHSC;
941 if (FP16)
942 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(AddSub, RHSC),
943 SDLoc(N), MVT::i32);
944 else
945 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC),
946 SDLoc(N), MVT::i32);
948 return true;
951 Base = N;
953 if (FP16)
954 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(ARM_AM::add, 0),
955 SDLoc(N), MVT::i32);
956 else
957 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
958 SDLoc(N), MVT::i32);
960 return true;
963 bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
964 SDValue &Base, SDValue &Offset) {
965 int Lwb = -256 + 1;
966 int Upb = 256;
967 return IsAddressingMode5(N, Base, Offset, Lwb, Upb, /*FP16=*/ false);
970 bool ARMDAGToDAGISel::SelectAddrMode5FP16(SDValue N,
971 SDValue &Base, SDValue &Offset) {
972 int Lwb = -512 + 1;
973 int Upb = 512;
974 return IsAddressingMode5(N, Base, Offset, Lwb, Upb, /*FP16=*/ true);
977 bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
978 SDValue &Align) {
979 Addr = N;
981 unsigned Alignment = 0;
983 MemSDNode *MemN = cast<MemSDNode>(Parent);
985 if (isa<LSBaseSDNode>(MemN) ||
986 ((MemN->getOpcode() == ARMISD::VST1_UPD ||
987 MemN->getOpcode() == ARMISD::VLD1_UPD) &&
988 MemN->getConstantOperandVal(MemN->getNumOperands() - 1) == 1)) {
989 // This case occurs only for VLD1-lane/dup and VST1-lane instructions.
990 // The maximum alignment is equal to the memory size being referenced.
991 unsigned MMOAlign = MemN->getAlignment();
992 unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8;
993 if (MMOAlign >= MemSize && MemSize > 1)
994 Alignment = MemSize;
995 } else {
996 // All other uses of addrmode6 are for intrinsics. For now just record
997 // the raw alignment value; it will be refined later based on the legal
998 // alignment operands for the intrinsic.
999 Alignment = MemN->getAlignment();
1002 Align = CurDAG->getTargetConstant(Alignment, SDLoc(N), MVT::i32);
1003 return true;
1006 bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N,
1007 SDValue &Offset) {
1008 LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op);
1009 ISD::MemIndexedMode AM = LdSt->getAddressingMode();
1010 if (AM != ISD::POST_INC)
1011 return false;
1012 Offset = N;
1013 if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) {
1014 if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits())
1015 Offset = CurDAG->getRegister(0, MVT::i32);
1017 return true;
1020 bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N,
1021 SDValue &Offset, SDValue &Label) {
1022 if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
1023 Offset = N.getOperand(0);
1024 SDValue N1 = N.getOperand(1);
1025 Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
1026 SDLoc(N), MVT::i32);
1027 return true;
1030 return false;
1034 //===----------------------------------------------------------------------===//
1035 // Thumb Addressing Modes
1036 //===----------------------------------------------------------------------===//
1038 static bool shouldUseZeroOffsetLdSt(SDValue N) {
1039 // Negative numbers are difficult to materialise in thumb1. If we are
1040 // selecting the add of a negative, instead try to select ri with a zero
1041 // offset, so create the add node directly which will become a sub.
1042 if (N.getOpcode() != ISD::ADD)
1043 return false;
1045 // Look for an imm which is not legal for ld/st, but is legal for sub.
1046 if (auto C = dyn_cast<ConstantSDNode>(N.getOperand(1)))
1047 return C->getSExtValue() < 0 && C->getSExtValue() >= -255;
1049 return false;
1052 bool ARMDAGToDAGISel::SelectThumbAddrModeRRSext(SDValue N, SDValue &Base,
1053 SDValue &Offset) {
1054 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) {
1055 ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N);
1056 if (!NC || !NC->isNullValue())
1057 return false;
1059 Base = Offset = N;
1060 return true;
1063 Base = N.getOperand(0);
1064 Offset = N.getOperand(1);
1065 return true;
1068 bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N, SDValue &Base,
1069 SDValue &Offset) {
1070 if (shouldUseZeroOffsetLdSt(N))
1071 return false; // Select ri instead
1072 return SelectThumbAddrModeRRSext(N, Base, Offset);
1075 bool
1076 ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
1077 SDValue &Base, SDValue &OffImm) {
1078 if (shouldUseZeroOffsetLdSt(N)) {
1079 Base = N;
1080 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1081 return true;
1084 if (!CurDAG->isBaseWithConstantOffset(N)) {
1085 if (N.getOpcode() == ISD::ADD) {
1086 return false; // We want to select register offset instead
1087 } else if (N.getOpcode() == ARMISD::Wrapper &&
1088 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1089 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1090 N.getOperand(0).getOpcode() != ISD::TargetConstantPool &&
1091 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1092 Base = N.getOperand(0);
1093 } else {
1094 Base = N;
1097 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1098 return true;
1101 // If the RHS is + imm5 * scale, fold into addr mode.
1102 int RHSC;
1103 if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) {
1104 Base = N.getOperand(0);
1105 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1106 return true;
1109 // Offset is too large, so use register offset instead.
1110 return false;
1113 bool
1114 ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
1115 SDValue &OffImm) {
1116 return SelectThumbAddrModeImm5S(N, 4, Base, OffImm);
1119 bool
1120 ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
1121 SDValue &OffImm) {
1122 return SelectThumbAddrModeImm5S(N, 2, Base, OffImm);
1125 bool
1126 ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
1127 SDValue &OffImm) {
1128 return SelectThumbAddrModeImm5S(N, 1, Base, OffImm);
1131 bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
1132 SDValue &Base, SDValue &OffImm) {
1133 if (N.getOpcode() == ISD::FrameIndex) {
1134 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1135 // Only multiples of 4 are allowed for the offset, so the frame object
1136 // alignment must be at least 4.
1137 MachineFrameInfo &MFI = MF->getFrameInfo();
1138 if (MFI.getObjectAlignment(FI) < 4)
1139 MFI.setObjectAlignment(FI, 4);
1140 Base = CurDAG->getTargetFrameIndex(
1141 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1142 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1143 return true;
1146 if (!CurDAG->isBaseWithConstantOffset(N))
1147 return false;
1149 RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0));
1150 if (N.getOperand(0).getOpcode() == ISD::FrameIndex ||
1151 (LHSR && LHSR->getReg() == ARM::SP)) {
1152 // If the RHS is + imm8 * scale, fold into addr mode.
1153 int RHSC;
1154 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) {
1155 Base = N.getOperand(0);
1156 if (Base.getOpcode() == ISD::FrameIndex) {
1157 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1158 // For LHS+RHS to result in an offset that's a multiple of 4 the object
1159 // indexed by the LHS must be 4-byte aligned.
1160 MachineFrameInfo &MFI = MF->getFrameInfo();
1161 if (MFI.getObjectAlignment(FI) < 4)
1162 MFI.setObjectAlignment(FI, 4);
1163 Base = CurDAG->getTargetFrameIndex(
1164 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1166 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1167 return true;
1171 return false;
1175 //===----------------------------------------------------------------------===//
1176 // Thumb 2 Addressing Modes
1177 //===----------------------------------------------------------------------===//
1180 bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
1181 SDValue &Base, SDValue &OffImm) {
1182 // Match simple R + imm12 operands.
1184 // Base only.
1185 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1186 !CurDAG->isBaseWithConstantOffset(N)) {
1187 if (N.getOpcode() == ISD::FrameIndex) {
1188 // Match frame index.
1189 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1190 Base = CurDAG->getTargetFrameIndex(
1191 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1192 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1193 return true;
1196 if (N.getOpcode() == ARMISD::Wrapper &&
1197 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1198 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1199 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1200 Base = N.getOperand(0);
1201 if (Base.getOpcode() == ISD::TargetConstantPool)
1202 return false; // We want to select t2LDRpci instead.
1203 } else
1204 Base = N;
1205 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1206 return true;
1209 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1210 if (SelectT2AddrModeImm8(N, Base, OffImm))
1211 // Let t2LDRi8 handle (R - imm8).
1212 return false;
1214 int RHSC = (int)RHS->getZExtValue();
1215 if (N.getOpcode() == ISD::SUB)
1216 RHSC = -RHSC;
1218 if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned)
1219 Base = N.getOperand(0);
1220 if (Base.getOpcode() == ISD::FrameIndex) {
1221 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1222 Base = CurDAG->getTargetFrameIndex(
1223 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1225 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1226 return true;
1230 // Base only.
1231 Base = N;
1232 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1233 return true;
1236 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
1237 SDValue &Base, SDValue &OffImm) {
1238 // Match simple R - imm8 operands.
1239 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1240 !CurDAG->isBaseWithConstantOffset(N))
1241 return false;
1243 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1244 int RHSC = (int)RHS->getSExtValue();
1245 if (N.getOpcode() == ISD::SUB)
1246 RHSC = -RHSC;
1248 if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative)
1249 Base = N.getOperand(0);
1250 if (Base.getOpcode() == ISD::FrameIndex) {
1251 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1252 Base = CurDAG->getTargetFrameIndex(
1253 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1255 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1256 return true;
1260 return false;
1263 bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
1264 SDValue &OffImm){
1265 unsigned Opcode = Op->getOpcode();
1266 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
1267 ? cast<LoadSDNode>(Op)->getAddressingMode()
1268 : cast<StoreSDNode>(Op)->getAddressingMode();
1269 int RHSC;
1270 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits.
1271 OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
1272 ? CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32)
1273 : CurDAG->getTargetConstant(-RHSC, SDLoc(N), MVT::i32);
1274 return true;
1277 return false;
1280 bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
1281 SDValue &Base,
1282 SDValue &OffReg, SDValue &ShImm) {
1283 // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12.
1284 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N))
1285 return false;
1287 // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8.
1288 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1289 int RHSC = (int)RHS->getZExtValue();
1290 if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned)
1291 return false;
1292 else if (RHSC < 0 && RHSC >= -255) // 8 bits
1293 return false;
1296 // Look for (R + R) or (R + (R << [1,2,3])).
1297 unsigned ShAmt = 0;
1298 Base = N.getOperand(0);
1299 OffReg = N.getOperand(1);
1301 // Swap if it is ((R << c) + R).
1302 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg.getOpcode());
1303 if (ShOpcVal != ARM_AM::lsl) {
1304 ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode());
1305 if (ShOpcVal == ARM_AM::lsl)
1306 std::swap(Base, OffReg);
1309 if (ShOpcVal == ARM_AM::lsl) {
1310 // Check to see if the RHS of the shift is a constant, if not, we can't fold
1311 // it.
1312 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) {
1313 ShAmt = Sh->getZExtValue();
1314 if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt))
1315 OffReg = OffReg.getOperand(0);
1316 else {
1317 ShAmt = 0;
1322 // If OffReg is a multiply-by-constant and it's profitable to extract a shift
1323 // and use it in a shifted operand do so.
1324 if (OffReg.getOpcode() == ISD::MUL && N.hasOneUse()) {
1325 unsigned PowerOfTwo = 0;
1326 SDValue NewMulConst;
1327 if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) {
1328 HandleSDNode Handle(OffReg);
1329 replaceDAGValue(OffReg.getOperand(1), NewMulConst);
1330 OffReg = Handle.getValue();
1331 ShAmt = PowerOfTwo;
1335 ShImm = CurDAG->getTargetConstant(ShAmt, SDLoc(N), MVT::i32);
1337 return true;
1340 bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base,
1341 SDValue &OffImm) {
1342 // This *must* succeed since it's used for the irreplaceable ldrex and strex
1343 // instructions.
1344 Base = N;
1345 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1347 if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N))
1348 return true;
1350 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
1351 if (!RHS)
1352 return true;
1354 uint32_t RHSC = (int)RHS->getZExtValue();
1355 if (RHSC > 1020 || RHSC % 4 != 0)
1356 return true;
1358 Base = N.getOperand(0);
1359 if (Base.getOpcode() == ISD::FrameIndex) {
1360 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1361 Base = CurDAG->getTargetFrameIndex(
1362 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1365 OffImm = CurDAG->getTargetConstant(RHSC/4, SDLoc(N), MVT::i32);
1366 return true;
1369 //===--------------------------------------------------------------------===//
1371 /// getAL - Returns a ARMCC::AL immediate node.
1372 static inline SDValue getAL(SelectionDAG *CurDAG, const SDLoc &dl) {
1373 return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32);
1376 void ARMDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
1377 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
1378 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
1381 bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) {
1382 LoadSDNode *LD = cast<LoadSDNode>(N);
1383 ISD::MemIndexedMode AM = LD->getAddressingMode();
1384 if (AM == ISD::UNINDEXED)
1385 return false;
1387 EVT LoadedVT = LD->getMemoryVT();
1388 SDValue Offset, AMOpc;
1389 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1390 unsigned Opcode = 0;
1391 bool Match = false;
1392 if (LoadedVT == MVT::i32 && isPre &&
1393 SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1394 Opcode = ARM::LDR_PRE_IMM;
1395 Match = true;
1396 } else if (LoadedVT == MVT::i32 && !isPre &&
1397 SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1398 Opcode = ARM::LDR_POST_IMM;
1399 Match = true;
1400 } else if (LoadedVT == MVT::i32 &&
1401 SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1402 Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG;
1403 Match = true;
1405 } else if (LoadedVT == MVT::i16 &&
1406 SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1407 Match = true;
1408 Opcode = (LD->getExtensionType() == ISD::SEXTLOAD)
1409 ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST)
1410 : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST);
1411 } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) {
1412 if (LD->getExtensionType() == ISD::SEXTLOAD) {
1413 if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1414 Match = true;
1415 Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST;
1417 } else {
1418 if (isPre &&
1419 SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1420 Match = true;
1421 Opcode = ARM::LDRB_PRE_IMM;
1422 } else if (!isPre &&
1423 SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1424 Match = true;
1425 Opcode = ARM::LDRB_POST_IMM;
1426 } else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1427 Match = true;
1428 Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG;
1433 if (Match) {
1434 if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) {
1435 SDValue Chain = LD->getChain();
1436 SDValue Base = LD->getBasePtr();
1437 SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, SDLoc(N)),
1438 CurDAG->getRegister(0, MVT::i32), Chain };
1439 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1440 MVT::Other, Ops);
1441 transferMemOperands(N, New);
1442 ReplaceNode(N, New);
1443 return true;
1444 } else {
1445 SDValue Chain = LD->getChain();
1446 SDValue Base = LD->getBasePtr();
1447 SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, SDLoc(N)),
1448 CurDAG->getRegister(0, MVT::i32), Chain };
1449 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1450 MVT::Other, Ops);
1451 transferMemOperands(N, New);
1452 ReplaceNode(N, New);
1453 return true;
1457 return false;
1460 bool ARMDAGToDAGISel::tryT1IndexedLoad(SDNode *N) {
1461 LoadSDNode *LD = cast<LoadSDNode>(N);
1462 EVT LoadedVT = LD->getMemoryVT();
1463 ISD::MemIndexedMode AM = LD->getAddressingMode();
1464 if (AM != ISD::POST_INC || LD->getExtensionType() != ISD::NON_EXTLOAD ||
1465 LoadedVT.getSimpleVT().SimpleTy != MVT::i32)
1466 return false;
1468 auto *COffs = dyn_cast<ConstantSDNode>(LD->getOffset());
1469 if (!COffs || COffs->getZExtValue() != 4)
1470 return false;
1472 // A T1 post-indexed load is just a single register LDM: LDM r0!, {r1}.
1473 // The encoding of LDM is not how the rest of ISel expects a post-inc load to
1474 // look however, so we use a pseudo here and switch it for a tLDMIA_UPD after
1475 // ISel.
1476 SDValue Chain = LD->getChain();
1477 SDValue Base = LD->getBasePtr();
1478 SDValue Ops[]= { Base, getAL(CurDAG, SDLoc(N)),
1479 CurDAG->getRegister(0, MVT::i32), Chain };
1480 SDNode *New = CurDAG->getMachineNode(ARM::tLDR_postidx, SDLoc(N), MVT::i32,
1481 MVT::i32, MVT::Other, Ops);
1482 transferMemOperands(N, New);
1483 ReplaceNode(N, New);
1484 return true;
1487 bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) {
1488 LoadSDNode *LD = cast<LoadSDNode>(N);
1489 ISD::MemIndexedMode AM = LD->getAddressingMode();
1490 if (AM == ISD::UNINDEXED)
1491 return false;
1493 EVT LoadedVT = LD->getMemoryVT();
1494 bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1495 SDValue Offset;
1496 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1497 unsigned Opcode = 0;
1498 bool Match = false;
1499 if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) {
1500 switch (LoadedVT.getSimpleVT().SimpleTy) {
1501 case MVT::i32:
1502 Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST;
1503 break;
1504 case MVT::i16:
1505 if (isSExtLd)
1506 Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST;
1507 else
1508 Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST;
1509 break;
1510 case MVT::i8:
1511 case MVT::i1:
1512 if (isSExtLd)
1513 Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST;
1514 else
1515 Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST;
1516 break;
1517 default:
1518 return false;
1520 Match = true;
1523 if (Match) {
1524 SDValue Chain = LD->getChain();
1525 SDValue Base = LD->getBasePtr();
1526 SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)),
1527 CurDAG->getRegister(0, MVT::i32), Chain };
1528 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1529 MVT::Other, Ops);
1530 transferMemOperands(N, New);
1531 ReplaceNode(N, New);
1532 return true;
1535 return false;
1538 /// Form a GPRPair pseudo register from a pair of GPR regs.
1539 SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) {
1540 SDLoc dl(V0.getNode());
1541 SDValue RegClass =
1542 CurDAG->getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
1543 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
1544 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
1545 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1546 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1549 /// Form a D register from a pair of S registers.
1550 SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1551 SDLoc dl(V0.getNode());
1552 SDValue RegClass =
1553 CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, dl, MVT::i32);
1554 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1555 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1556 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1557 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1560 /// Form a quad register from a pair of D registers.
1561 SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1562 SDLoc dl(V0.getNode());
1563 SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, dl,
1564 MVT::i32);
1565 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1566 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1567 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1568 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1571 /// Form 4 consecutive D registers from a pair of Q registers.
1572 SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1573 SDLoc dl(V0.getNode());
1574 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1575 MVT::i32);
1576 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1577 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1578 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1579 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1582 /// Form 4 consecutive S registers.
1583 SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1,
1584 SDValue V2, SDValue V3) {
1585 SDLoc dl(V0.getNode());
1586 SDValue RegClass =
1587 CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, dl, MVT::i32);
1588 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1589 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1590 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, dl, MVT::i32);
1591 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, dl, MVT::i32);
1592 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1593 V2, SubReg2, V3, SubReg3 };
1594 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1597 /// Form 4 consecutive D registers.
1598 SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1,
1599 SDValue V2, SDValue V3) {
1600 SDLoc dl(V0.getNode());
1601 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1602 MVT::i32);
1603 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1604 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1605 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, dl, MVT::i32);
1606 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, dl, MVT::i32);
1607 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1608 V2, SubReg2, V3, SubReg3 };
1609 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1612 /// Form 4 consecutive Q registers.
1613 SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1,
1614 SDValue V2, SDValue V3) {
1615 SDLoc dl(V0.getNode());
1616 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, dl,
1617 MVT::i32);
1618 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1619 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1620 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, dl, MVT::i32);
1621 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, dl, MVT::i32);
1622 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1623 V2, SubReg2, V3, SubReg3 };
1624 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1627 /// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
1628 /// of a NEON VLD or VST instruction. The supported values depend on the
1629 /// number of registers being loaded.
1630 SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, const SDLoc &dl,
1631 unsigned NumVecs, bool is64BitVector) {
1632 unsigned NumRegs = NumVecs;
1633 if (!is64BitVector && NumVecs < 3)
1634 NumRegs *= 2;
1636 unsigned Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
1637 if (Alignment >= 32 && NumRegs == 4)
1638 Alignment = 32;
1639 else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4))
1640 Alignment = 16;
1641 else if (Alignment >= 8)
1642 Alignment = 8;
1643 else
1644 Alignment = 0;
1646 return CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
1649 static bool isVLDfixed(unsigned Opc)
1651 switch (Opc) {
1652 default: return false;
1653 case ARM::VLD1d8wb_fixed : return true;
1654 case ARM::VLD1d16wb_fixed : return true;
1655 case ARM::VLD1d64Qwb_fixed : return true;
1656 case ARM::VLD1d32wb_fixed : return true;
1657 case ARM::VLD1d64wb_fixed : return true;
1658 case ARM::VLD1d64TPseudoWB_fixed : return true;
1659 case ARM::VLD1d64QPseudoWB_fixed : return true;
1660 case ARM::VLD1q8wb_fixed : return true;
1661 case ARM::VLD1q16wb_fixed : return true;
1662 case ARM::VLD1q32wb_fixed : return true;
1663 case ARM::VLD1q64wb_fixed : return true;
1664 case ARM::VLD1DUPd8wb_fixed : return true;
1665 case ARM::VLD1DUPd16wb_fixed : return true;
1666 case ARM::VLD1DUPd32wb_fixed : return true;
1667 case ARM::VLD1DUPq8wb_fixed : return true;
1668 case ARM::VLD1DUPq16wb_fixed : return true;
1669 case ARM::VLD1DUPq32wb_fixed : return true;
1670 case ARM::VLD2d8wb_fixed : return true;
1671 case ARM::VLD2d16wb_fixed : return true;
1672 case ARM::VLD2d32wb_fixed : return true;
1673 case ARM::VLD2q8PseudoWB_fixed : return true;
1674 case ARM::VLD2q16PseudoWB_fixed : return true;
1675 case ARM::VLD2q32PseudoWB_fixed : return true;
1676 case ARM::VLD2DUPd8wb_fixed : return true;
1677 case ARM::VLD2DUPd16wb_fixed : return true;
1678 case ARM::VLD2DUPd32wb_fixed : return true;
1682 static bool isVSTfixed(unsigned Opc)
1684 switch (Opc) {
1685 default: return false;
1686 case ARM::VST1d8wb_fixed : return true;
1687 case ARM::VST1d16wb_fixed : return true;
1688 case ARM::VST1d32wb_fixed : return true;
1689 case ARM::VST1d64wb_fixed : return true;
1690 case ARM::VST1q8wb_fixed : return true;
1691 case ARM::VST1q16wb_fixed : return true;
1692 case ARM::VST1q32wb_fixed : return true;
1693 case ARM::VST1q64wb_fixed : return true;
1694 case ARM::VST1d64TPseudoWB_fixed : return true;
1695 case ARM::VST1d64QPseudoWB_fixed : return true;
1696 case ARM::VST2d8wb_fixed : return true;
1697 case ARM::VST2d16wb_fixed : return true;
1698 case ARM::VST2d32wb_fixed : return true;
1699 case ARM::VST2q8PseudoWB_fixed : return true;
1700 case ARM::VST2q16PseudoWB_fixed : return true;
1701 case ARM::VST2q32PseudoWB_fixed : return true;
1705 // Get the register stride update opcode of a VLD/VST instruction that
1706 // is otherwise equivalent to the given fixed stride updating instruction.
1707 static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
1708 assert((isVLDfixed(Opc) || isVSTfixed(Opc))
1709 && "Incorrect fixed stride updating instruction.");
1710 switch (Opc) {
1711 default: break;
1712 case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register;
1713 case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register;
1714 case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register;
1715 case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register;
1716 case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register;
1717 case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register;
1718 case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register;
1719 case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register;
1720 case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register;
1721 case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register;
1722 case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register;
1723 case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register;
1724 case ARM::VLD1DUPd8wb_fixed : return ARM::VLD1DUPd8wb_register;
1725 case ARM::VLD1DUPd16wb_fixed : return ARM::VLD1DUPd16wb_register;
1726 case ARM::VLD1DUPd32wb_fixed : return ARM::VLD1DUPd32wb_register;
1727 case ARM::VLD1DUPq8wb_fixed : return ARM::VLD1DUPq8wb_register;
1728 case ARM::VLD1DUPq16wb_fixed : return ARM::VLD1DUPq16wb_register;
1729 case ARM::VLD1DUPq32wb_fixed : return ARM::VLD1DUPq32wb_register;
1731 case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register;
1732 case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register;
1733 case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register;
1734 case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register;
1735 case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register;
1736 case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register;
1737 case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register;
1738 case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register;
1739 case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register;
1740 case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register;
1742 case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register;
1743 case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register;
1744 case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register;
1745 case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register;
1746 case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register;
1747 case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register;
1749 case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register;
1750 case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register;
1751 case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register;
1752 case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register;
1753 case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register;
1754 case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register;
1756 case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register;
1757 case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register;
1758 case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register;
1760 return Opc; // If not one we handle, return it unchanged.
1763 /// Returns true if the given increment is a Constant known to be equal to the
1764 /// access size performed by a NEON load/store. This means the "[rN]!" form can
1765 /// be used.
1766 static bool isPerfectIncrement(SDValue Inc, EVT VecTy, unsigned NumVecs) {
1767 auto C = dyn_cast<ConstantSDNode>(Inc);
1768 return C && C->getZExtValue() == VecTy.getSizeInBits() / 8 * NumVecs;
1771 void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
1772 const uint16_t *DOpcodes,
1773 const uint16_t *QOpcodes0,
1774 const uint16_t *QOpcodes1) {
1775 assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
1776 SDLoc dl(N);
1778 SDValue MemAddr, Align;
1779 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
1780 // nodes are not intrinsics.
1781 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
1782 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
1783 return;
1785 SDValue Chain = N->getOperand(0);
1786 EVT VT = N->getValueType(0);
1787 bool is64BitVector = VT.is64BitVector();
1788 Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
1790 unsigned OpcodeIndex;
1791 switch (VT.getSimpleVT().SimpleTy) {
1792 default: llvm_unreachable("unhandled vld type");
1793 // Double-register operations:
1794 case MVT::v8i8: OpcodeIndex = 0; break;
1795 case MVT::v4f16:
1796 case MVT::v4i16: OpcodeIndex = 1; break;
1797 case MVT::v2f32:
1798 case MVT::v2i32: OpcodeIndex = 2; break;
1799 case MVT::v1i64: OpcodeIndex = 3; break;
1800 // Quad-register operations:
1801 case MVT::v16i8: OpcodeIndex = 0; break;
1802 case MVT::v8f16:
1803 case MVT::v8i16: OpcodeIndex = 1; break;
1804 case MVT::v4f32:
1805 case MVT::v4i32: OpcodeIndex = 2; break;
1806 case MVT::v2f64:
1807 case MVT::v2i64: OpcodeIndex = 3; break;
1810 EVT ResTy;
1811 if (NumVecs == 1)
1812 ResTy = VT;
1813 else {
1814 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
1815 if (!is64BitVector)
1816 ResTyElts *= 2;
1817 ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
1819 std::vector<EVT> ResTys;
1820 ResTys.push_back(ResTy);
1821 if (isUpdating)
1822 ResTys.push_back(MVT::i32);
1823 ResTys.push_back(MVT::Other);
1825 SDValue Pred = getAL(CurDAG, dl);
1826 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
1827 SDNode *VLd;
1828 SmallVector<SDValue, 7> Ops;
1830 // Double registers and VLD1/VLD2 quad registers are directly supported.
1831 if (is64BitVector || NumVecs <= 2) {
1832 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
1833 QOpcodes0[OpcodeIndex]);
1834 Ops.push_back(MemAddr);
1835 Ops.push_back(Align);
1836 if (isUpdating) {
1837 SDValue Inc = N->getOperand(AddrOpIdx + 1);
1838 bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
1839 if (!IsImmUpdate) {
1840 // We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
1841 // check for the opcode rather than the number of vector elements.
1842 if (isVLDfixed(Opc))
1843 Opc = getVLDSTRegisterUpdateOpcode(Opc);
1844 Ops.push_back(Inc);
1845 // VLD1/VLD2 fixed increment does not need Reg0 so only include it in
1846 // the operands if not such an opcode.
1847 } else if (!isVLDfixed(Opc))
1848 Ops.push_back(Reg0);
1850 Ops.push_back(Pred);
1851 Ops.push_back(Reg0);
1852 Ops.push_back(Chain);
1853 VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1855 } else {
1856 // Otherwise, quad registers are loaded with two separate instructions,
1857 // where one loads the even registers and the other loads the odd registers.
1858 EVT AddrTy = MemAddr.getValueType();
1860 // Load the even subregs. This is always an updating load, so that it
1861 // provides the address to the second load for the odd subregs.
1862 SDValue ImplDef =
1863 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
1864 const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
1865 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
1866 ResTy, AddrTy, MVT::Other, OpsA);
1867 Chain = SDValue(VLdA, 2);
1869 // Load the odd subregs.
1870 Ops.push_back(SDValue(VLdA, 1));
1871 Ops.push_back(Align);
1872 if (isUpdating) {
1873 SDValue Inc = N->getOperand(AddrOpIdx + 1);
1874 assert(isa<ConstantSDNode>(Inc.getNode()) &&
1875 "only constant post-increment update allowed for VLD3/4");
1876 (void)Inc;
1877 Ops.push_back(Reg0);
1879 Ops.push_back(SDValue(VLdA, 0));
1880 Ops.push_back(Pred);
1881 Ops.push_back(Reg0);
1882 Ops.push_back(Chain);
1883 VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops);
1886 // Transfer memoperands.
1887 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1888 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLd), {MemOp});
1890 if (NumVecs == 1) {
1891 ReplaceNode(N, VLd);
1892 return;
1895 // Extract out the subregisters.
1896 SDValue SuperReg = SDValue(VLd, 0);
1897 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
1898 ARM::qsub_3 == ARM::qsub_0 + 3,
1899 "Unexpected subreg numbering");
1900 unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0);
1901 for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
1902 ReplaceUses(SDValue(N, Vec),
1903 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
1904 ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
1905 if (isUpdating)
1906 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2));
1907 CurDAG->RemoveDeadNode(N);
1910 void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
1911 const uint16_t *DOpcodes,
1912 const uint16_t *QOpcodes0,
1913 const uint16_t *QOpcodes1) {
1914 assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
1915 SDLoc dl(N);
1917 SDValue MemAddr, Align;
1918 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
1919 // nodes are not intrinsics.
1920 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
1921 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
1922 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
1923 return;
1925 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1927 SDValue Chain = N->getOperand(0);
1928 EVT VT = N->getOperand(Vec0Idx).getValueType();
1929 bool is64BitVector = VT.is64BitVector();
1930 Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
1932 unsigned OpcodeIndex;
1933 switch (VT.getSimpleVT().SimpleTy) {
1934 default: llvm_unreachable("unhandled vst type");
1935 // Double-register operations:
1936 case MVT::v8i8: OpcodeIndex = 0; break;
1937 case MVT::v4f16:
1938 case MVT::v4i16: OpcodeIndex = 1; break;
1939 case MVT::v2f32:
1940 case MVT::v2i32: OpcodeIndex = 2; break;
1941 case MVT::v1i64: OpcodeIndex = 3; break;
1942 // Quad-register operations:
1943 case MVT::v16i8: OpcodeIndex = 0; break;
1944 case MVT::v8f16:
1945 case MVT::v8i16: OpcodeIndex = 1; break;
1946 case MVT::v4f32:
1947 case MVT::v4i32: OpcodeIndex = 2; break;
1948 case MVT::v2f64:
1949 case MVT::v2i64: OpcodeIndex = 3; break;
1952 std::vector<EVT> ResTys;
1953 if (isUpdating)
1954 ResTys.push_back(MVT::i32);
1955 ResTys.push_back(MVT::Other);
1957 SDValue Pred = getAL(CurDAG, dl);
1958 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
1959 SmallVector<SDValue, 7> Ops;
1961 // Double registers and VST1/VST2 quad registers are directly supported.
1962 if (is64BitVector || NumVecs <= 2) {
1963 SDValue SrcReg;
1964 if (NumVecs == 1) {
1965 SrcReg = N->getOperand(Vec0Idx);
1966 } else if (is64BitVector) {
1967 // Form a REG_SEQUENCE to force register allocation.
1968 SDValue V0 = N->getOperand(Vec0Idx + 0);
1969 SDValue V1 = N->getOperand(Vec0Idx + 1);
1970 if (NumVecs == 2)
1971 SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
1972 else {
1973 SDValue V2 = N->getOperand(Vec0Idx + 2);
1974 // If it's a vst3, form a quad D-register and leave the last part as
1975 // an undef.
1976 SDValue V3 = (NumVecs == 3)
1977 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
1978 : N->getOperand(Vec0Idx + 3);
1979 SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
1981 } else {
1982 // Form a QQ register.
1983 SDValue Q0 = N->getOperand(Vec0Idx);
1984 SDValue Q1 = N->getOperand(Vec0Idx + 1);
1985 SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0);
1988 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
1989 QOpcodes0[OpcodeIndex]);
1990 Ops.push_back(MemAddr);
1991 Ops.push_back(Align);
1992 if (isUpdating) {
1993 SDValue Inc = N->getOperand(AddrOpIdx + 1);
1994 bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
1995 if (!IsImmUpdate) {
1996 // We use a VST1 for v1i64 even if the pseudo says VST2/3/4, so
1997 // check for the opcode rather than the number of vector elements.
1998 if (isVSTfixed(Opc))
1999 Opc = getVLDSTRegisterUpdateOpcode(Opc);
2000 Ops.push_back(Inc);
2002 // VST1/VST2 fixed increment does not need Reg0 so only include it in
2003 // the operands if not such an opcode.
2004 else if (!isVSTfixed(Opc))
2005 Ops.push_back(Reg0);
2007 Ops.push_back(SrcReg);
2008 Ops.push_back(Pred);
2009 Ops.push_back(Reg0);
2010 Ops.push_back(Chain);
2011 SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2013 // Transfer memoperands.
2014 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VSt), {MemOp});
2016 ReplaceNode(N, VSt);
2017 return;
2020 // Otherwise, quad registers are stored with two separate instructions,
2021 // where one stores the even registers and the other stores the odd registers.
2023 // Form the QQQQ REG_SEQUENCE.
2024 SDValue V0 = N->getOperand(Vec0Idx + 0);
2025 SDValue V1 = N->getOperand(Vec0Idx + 1);
2026 SDValue V2 = N->getOperand(Vec0Idx + 2);
2027 SDValue V3 = (NumVecs == 3)
2028 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2029 : N->getOperand(Vec0Idx + 3);
2030 SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2032 // Store the even D registers. This is always an updating store, so that it
2033 // provides the address to the second store for the odd subregs.
2034 const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain };
2035 SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
2036 MemAddr.getValueType(),
2037 MVT::Other, OpsA);
2038 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStA), {MemOp});
2039 Chain = SDValue(VStA, 1);
2041 // Store the odd D registers.
2042 Ops.push_back(SDValue(VStA, 0));
2043 Ops.push_back(Align);
2044 if (isUpdating) {
2045 SDValue Inc = N->getOperand(AddrOpIdx + 1);
2046 assert(isa<ConstantSDNode>(Inc.getNode()) &&
2047 "only constant post-increment update allowed for VST3/4");
2048 (void)Inc;
2049 Ops.push_back(Reg0);
2051 Ops.push_back(RegSeq);
2052 Ops.push_back(Pred);
2053 Ops.push_back(Reg0);
2054 Ops.push_back(Chain);
2055 SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
2056 Ops);
2057 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStB), {MemOp});
2058 ReplaceNode(N, VStB);
2061 void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
2062 unsigned NumVecs,
2063 const uint16_t *DOpcodes,
2064 const uint16_t *QOpcodes) {
2065 assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
2066 SDLoc dl(N);
2068 SDValue MemAddr, Align;
2069 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
2070 // nodes are not intrinsics.
2071 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2072 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2073 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2074 return;
2076 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2078 SDValue Chain = N->getOperand(0);
2079 unsigned Lane =
2080 cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue();
2081 EVT VT = N->getOperand(Vec0Idx).getValueType();
2082 bool is64BitVector = VT.is64BitVector();
2084 unsigned Alignment = 0;
2085 if (NumVecs != 3) {
2086 Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
2087 unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2088 if (Alignment > NumBytes)
2089 Alignment = NumBytes;
2090 if (Alignment < 8 && Alignment < NumBytes)
2091 Alignment = 0;
2092 // Alignment must be a power of two; make sure of that.
2093 Alignment = (Alignment & -Alignment);
2094 if (Alignment == 1)
2095 Alignment = 0;
2097 Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2099 unsigned OpcodeIndex;
2100 switch (VT.getSimpleVT().SimpleTy) {
2101 default: llvm_unreachable("unhandled vld/vst lane type");
2102 // Double-register operations:
2103 case MVT::v8i8: OpcodeIndex = 0; break;
2104 case MVT::v4i16: OpcodeIndex = 1; break;
2105 case MVT::v2f32:
2106 case MVT::v2i32: OpcodeIndex = 2; break;
2107 // Quad-register operations:
2108 case MVT::v8i16: OpcodeIndex = 0; break;
2109 case MVT::v4f32:
2110 case MVT::v4i32: OpcodeIndex = 1; break;
2113 std::vector<EVT> ResTys;
2114 if (IsLoad) {
2115 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2116 if (!is64BitVector)
2117 ResTyElts *= 2;
2118 ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(),
2119 MVT::i64, ResTyElts));
2121 if (isUpdating)
2122 ResTys.push_back(MVT::i32);
2123 ResTys.push_back(MVT::Other);
2125 SDValue Pred = getAL(CurDAG, dl);
2126 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2128 SmallVector<SDValue, 8> Ops;
2129 Ops.push_back(MemAddr);
2130 Ops.push_back(Align);
2131 if (isUpdating) {
2132 SDValue Inc = N->getOperand(AddrOpIdx + 1);
2133 bool IsImmUpdate =
2134 isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
2135 Ops.push_back(IsImmUpdate ? Reg0 : Inc);
2138 SDValue SuperReg;
2139 SDValue V0 = N->getOperand(Vec0Idx + 0);
2140 SDValue V1 = N->getOperand(Vec0Idx + 1);
2141 if (NumVecs == 2) {
2142 if (is64BitVector)
2143 SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2144 else
2145 SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0);
2146 } else {
2147 SDValue V2 = N->getOperand(Vec0Idx + 2);
2148 SDValue V3 = (NumVecs == 3)
2149 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2150 : N->getOperand(Vec0Idx + 3);
2151 if (is64BitVector)
2152 SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2153 else
2154 SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2156 Ops.push_back(SuperReg);
2157 Ops.push_back(getI32Imm(Lane, dl));
2158 Ops.push_back(Pred);
2159 Ops.push_back(Reg0);
2160 Ops.push_back(Chain);
2162 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2163 QOpcodes[OpcodeIndex]);
2164 SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2165 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdLn), {MemOp});
2166 if (!IsLoad) {
2167 ReplaceNode(N, VLdLn);
2168 return;
2171 // Extract the subregisters.
2172 SuperReg = SDValue(VLdLn, 0);
2173 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
2174 ARM::qsub_3 == ARM::qsub_0 + 3,
2175 "Unexpected subreg numbering");
2176 unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
2177 for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2178 ReplaceUses(SDValue(N, Vec),
2179 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
2180 ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1));
2181 if (isUpdating)
2182 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2));
2183 CurDAG->RemoveDeadNode(N);
2186 void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool IsIntrinsic,
2187 bool isUpdating, unsigned NumVecs,
2188 const uint16_t *DOpcodes,
2189 const uint16_t *QOpcodes0,
2190 const uint16_t *QOpcodes1) {
2191 assert(NumVecs >= 1 && NumVecs <= 4 && "VLDDup NumVecs out-of-range");
2192 SDLoc dl(N);
2194 SDValue MemAddr, Align;
2195 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2196 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2197 return;
2199 SDValue Chain = N->getOperand(0);
2200 EVT VT = N->getValueType(0);
2201 bool is64BitVector = VT.is64BitVector();
2203 unsigned Alignment = 0;
2204 if (NumVecs != 3) {
2205 Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
2206 unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2207 if (Alignment > NumBytes)
2208 Alignment = NumBytes;
2209 if (Alignment < 8 && Alignment < NumBytes)
2210 Alignment = 0;
2211 // Alignment must be a power of two; make sure of that.
2212 Alignment = (Alignment & -Alignment);
2213 if (Alignment == 1)
2214 Alignment = 0;
2216 Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2218 unsigned OpcodeIndex;
2219 switch (VT.getSimpleVT().SimpleTy) {
2220 default: llvm_unreachable("unhandled vld-dup type");
2221 case MVT::v8i8:
2222 case MVT::v16i8: OpcodeIndex = 0; break;
2223 case MVT::v4i16:
2224 case MVT::v8i16: OpcodeIndex = 1; break;
2225 case MVT::v2f32:
2226 case MVT::v2i32:
2227 case MVT::v4f32:
2228 case MVT::v4i32: OpcodeIndex = 2; break;
2229 case MVT::v1f64:
2230 case MVT::v1i64: OpcodeIndex = 3; break;
2233 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2234 if (!is64BitVector)
2235 ResTyElts *= 2;
2236 EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
2238 std::vector<EVT> ResTys;
2239 ResTys.push_back(ResTy);
2240 if (isUpdating)
2241 ResTys.push_back(MVT::i32);
2242 ResTys.push_back(MVT::Other);
2244 SDValue Pred = getAL(CurDAG, dl);
2245 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2247 SDNode *VLdDup;
2248 if (is64BitVector || NumVecs == 1) {
2249 SmallVector<SDValue, 6> Ops;
2250 Ops.push_back(MemAddr);
2251 Ops.push_back(Align);
2252 unsigned Opc = is64BitVector ? DOpcodes[OpcodeIndex] :
2253 QOpcodes0[OpcodeIndex];
2254 if (isUpdating) {
2255 // fixed-stride update instructions don't have an explicit writeback
2256 // operand. It's implicit in the opcode itself.
2257 SDValue Inc = N->getOperand(2);
2258 bool IsImmUpdate =
2259 isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
2260 if (NumVecs <= 2 && !IsImmUpdate)
2261 Opc = getVLDSTRegisterUpdateOpcode(Opc);
2262 if (!IsImmUpdate)
2263 Ops.push_back(Inc);
2264 // FIXME: VLD3 and VLD4 haven't been updated to that form yet.
2265 else if (NumVecs > 2)
2266 Ops.push_back(Reg0);
2268 Ops.push_back(Pred);
2269 Ops.push_back(Reg0);
2270 Ops.push_back(Chain);
2271 VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2272 } else if (NumVecs == 2) {
2273 const SDValue OpsA[] = { MemAddr, Align, Pred, Reg0, Chain };
2274 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex],
2275 dl, ResTys, OpsA);
2277 Chain = SDValue(VLdA, 1);
2278 const SDValue OpsB[] = { MemAddr, Align, Pred, Reg0, Chain };
2279 VLdDup = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, OpsB);
2280 } else {
2281 SDValue ImplDef =
2282 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
2283 const SDValue OpsA[] = { MemAddr, Align, ImplDef, Pred, Reg0, Chain };
2284 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex],
2285 dl, ResTys, OpsA);
2287 SDValue SuperReg = SDValue(VLdA, 0);
2288 Chain = SDValue(VLdA, 1);
2289 const SDValue OpsB[] = { MemAddr, Align, SuperReg, Pred, Reg0, Chain };
2290 VLdDup = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, OpsB);
2293 // Transfer memoperands.
2294 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2295 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdDup), {MemOp});
2297 // Extract the subregisters.
2298 if (NumVecs == 1) {
2299 ReplaceUses(SDValue(N, 0), SDValue(VLdDup, 0));
2300 } else {
2301 SDValue SuperReg = SDValue(VLdDup, 0);
2302 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7, "Unexpected subreg numbering");
2303 unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
2304 for (unsigned Vec = 0; Vec != NumVecs; ++Vec) {
2305 ReplaceUses(SDValue(N, Vec),
2306 CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg));
2309 ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1));
2310 if (isUpdating)
2311 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2));
2312 CurDAG->RemoveDeadNode(N);
2315 bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) {
2316 if (!Subtarget->hasV6T2Ops())
2317 return false;
2319 unsigned Opc = isSigned
2320 ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)
2321 : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX);
2322 SDLoc dl(N);
2324 // For unsigned extracts, check for a shift right and mask
2325 unsigned And_imm = 0;
2326 if (N->getOpcode() == ISD::AND) {
2327 if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) {
2329 // The immediate is a mask of the low bits iff imm & (imm+1) == 0
2330 if (And_imm & (And_imm + 1))
2331 return false;
2333 unsigned Srl_imm = 0;
2334 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL,
2335 Srl_imm)) {
2336 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2338 // Mask off the unnecessary bits of the AND immediate; normally
2339 // DAGCombine will do this, but that might not happen if
2340 // targetShrinkDemandedConstant chooses a different immediate.
2341 And_imm &= -1U >> Srl_imm;
2343 // Note: The width operand is encoded as width-1.
2344 unsigned Width = countTrailingOnes(And_imm) - 1;
2345 unsigned LSB = Srl_imm;
2347 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2349 if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) {
2350 // It's cheaper to use a right shift to extract the top bits.
2351 if (Subtarget->isThumb()) {
2352 Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri;
2353 SDValue Ops[] = { N->getOperand(0).getOperand(0),
2354 CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2355 getAL(CurDAG, dl), Reg0, Reg0 };
2356 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2357 return true;
2360 // ARM models shift instructions as MOVsi with shifter operand.
2361 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(ISD::SRL);
2362 SDValue ShOpc =
2363 CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB), dl,
2364 MVT::i32);
2365 SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc,
2366 getAL(CurDAG, dl), Reg0, Reg0 };
2367 CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops);
2368 return true;
2371 assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
2372 SDValue Ops[] = { N->getOperand(0).getOperand(0),
2373 CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2374 CurDAG->getTargetConstant(Width, dl, MVT::i32),
2375 getAL(CurDAG, dl), Reg0 };
2376 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2377 return true;
2380 return false;
2383 // Otherwise, we're looking for a shift of a shift
2384 unsigned Shl_imm = 0;
2385 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) {
2386 assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!");
2387 unsigned Srl_imm = 0;
2388 if (isInt32Immediate(N->getOperand(1), Srl_imm)) {
2389 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2390 // Note: The width operand is encoded as width-1.
2391 unsigned Width = 32 - Srl_imm - 1;
2392 int LSB = Srl_imm - Shl_imm;
2393 if (LSB < 0)
2394 return false;
2395 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2396 assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
2397 SDValue Ops[] = { N->getOperand(0).getOperand(0),
2398 CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2399 CurDAG->getTargetConstant(Width, dl, MVT::i32),
2400 getAL(CurDAG, dl), Reg0 };
2401 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2402 return true;
2406 // Or we are looking for a shift of an and, with a mask operand
2407 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_imm) &&
2408 isShiftedMask_32(And_imm)) {
2409 unsigned Srl_imm = 0;
2410 unsigned LSB = countTrailingZeros(And_imm);
2411 // Shift must be the same as the ands lsb
2412 if (isInt32Immediate(N->getOperand(1), Srl_imm) && Srl_imm == LSB) {
2413 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2414 unsigned MSB = 31 - countLeadingZeros(And_imm);
2415 // Note: The width operand is encoded as width-1.
2416 unsigned Width = MSB - LSB;
2417 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2418 assert(Srl_imm + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
2419 SDValue Ops[] = { N->getOperand(0).getOperand(0),
2420 CurDAG->getTargetConstant(Srl_imm, dl, MVT::i32),
2421 CurDAG->getTargetConstant(Width, dl, MVT::i32),
2422 getAL(CurDAG, dl), Reg0 };
2423 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2424 return true;
2428 if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) {
2429 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2430 unsigned LSB = 0;
2431 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, LSB) &&
2432 !isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRA, LSB))
2433 return false;
2435 if (LSB + Width > 32)
2436 return false;
2438 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2439 assert(LSB + Width <= 32 && "Shouldn't create an invalid ubfx");
2440 SDValue Ops[] = { N->getOperand(0).getOperand(0),
2441 CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2442 CurDAG->getTargetConstant(Width - 1, dl, MVT::i32),
2443 getAL(CurDAG, dl), Reg0 };
2444 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2445 return true;
2448 return false;
2451 /// Target-specific DAG combining for ISD::XOR.
2452 /// Target-independent combining lowers SELECT_CC nodes of the form
2453 /// select_cc setg[ge] X, 0, X, -X
2454 /// select_cc setgt X, -1, X, -X
2455 /// select_cc setl[te] X, 0, -X, X
2456 /// select_cc setlt X, 1, -X, X
2457 /// which represent Integer ABS into:
2458 /// Y = sra (X, size(X)-1); xor (add (X, Y), Y)
2459 /// ARM instruction selection detects the latter and matches it to
2460 /// ARM::ABS or ARM::t2ABS machine node.
2461 bool ARMDAGToDAGISel::tryABSOp(SDNode *N){
2462 SDValue XORSrc0 = N->getOperand(0);
2463 SDValue XORSrc1 = N->getOperand(1);
2464 EVT VT = N->getValueType(0);
2466 if (Subtarget->isThumb1Only())
2467 return false;
2469 if (XORSrc0.getOpcode() != ISD::ADD || XORSrc1.getOpcode() != ISD::SRA)
2470 return false;
2472 SDValue ADDSrc0 = XORSrc0.getOperand(0);
2473 SDValue ADDSrc1 = XORSrc0.getOperand(1);
2474 SDValue SRASrc0 = XORSrc1.getOperand(0);
2475 SDValue SRASrc1 = XORSrc1.getOperand(1);
2476 ConstantSDNode *SRAConstant = dyn_cast<ConstantSDNode>(SRASrc1);
2477 EVT XType = SRASrc0.getValueType();
2478 unsigned Size = XType.getSizeInBits() - 1;
2480 if (ADDSrc1 == XORSrc1 && ADDSrc0 == SRASrc0 &&
2481 XType.isInteger() && SRAConstant != nullptr &&
2482 Size == SRAConstant->getZExtValue()) {
2483 unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS;
2484 CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0);
2485 return true;
2488 return false;
2491 /// We've got special pseudo-instructions for these
2492 void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
2493 unsigned Opcode;
2494 EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
2495 if (MemTy == MVT::i8)
2496 Opcode = ARM::CMP_SWAP_8;
2497 else if (MemTy == MVT::i16)
2498 Opcode = ARM::CMP_SWAP_16;
2499 else if (MemTy == MVT::i32)
2500 Opcode = ARM::CMP_SWAP_32;
2501 else
2502 llvm_unreachable("Unknown AtomicCmpSwap type");
2504 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
2505 N->getOperand(0)};
2506 SDNode *CmpSwap = CurDAG->getMachineNode(
2507 Opcode, SDLoc(N),
2508 CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other), Ops);
2510 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
2511 CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
2513 ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
2514 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
2515 CurDAG->RemoveDeadNode(N);
2518 static Optional<std::pair<unsigned, unsigned>>
2519 getContiguousRangeOfSetBits(const APInt &A) {
2520 unsigned FirstOne = A.getBitWidth() - A.countLeadingZeros() - 1;
2521 unsigned LastOne = A.countTrailingZeros();
2522 if (A.countPopulation() != (FirstOne - LastOne + 1))
2523 return Optional<std::pair<unsigned,unsigned>>();
2524 return std::make_pair(FirstOne, LastOne);
2527 void ARMDAGToDAGISel::SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI) {
2528 assert(N->getOpcode() == ARMISD::CMPZ);
2529 SwitchEQNEToPLMI = false;
2531 if (!Subtarget->isThumb())
2532 // FIXME: Work out whether it is profitable to do this in A32 mode - LSL and
2533 // LSR don't exist as standalone instructions - they need the barrel shifter.
2534 return;
2536 // select (cmpz (and X, C), #0) -> (LSLS X) or (LSRS X) or (LSRS (LSLS X))
2537 SDValue And = N->getOperand(0);
2538 if (!And->hasOneUse())
2539 return;
2541 SDValue Zero = N->getOperand(1);
2542 if (!isa<ConstantSDNode>(Zero) || !cast<ConstantSDNode>(Zero)->isNullValue() ||
2543 And->getOpcode() != ISD::AND)
2544 return;
2545 SDValue X = And.getOperand(0);
2546 auto C = dyn_cast<ConstantSDNode>(And.getOperand(1));
2548 if (!C)
2549 return;
2550 auto Range = getContiguousRangeOfSetBits(C->getAPIntValue());
2551 if (!Range)
2552 return;
2554 // There are several ways to lower this:
2555 SDNode *NewN;
2556 SDLoc dl(N);
2558 auto EmitShift = [&](unsigned Opc, SDValue Src, unsigned Imm) -> SDNode* {
2559 if (Subtarget->isThumb2()) {
2560 Opc = (Opc == ARM::tLSLri) ? ARM::t2LSLri : ARM::t2LSRri;
2561 SDValue Ops[] = { Src, CurDAG->getTargetConstant(Imm, dl, MVT::i32),
2562 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
2563 CurDAG->getRegister(0, MVT::i32) };
2564 return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
2565 } else {
2566 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), Src,
2567 CurDAG->getTargetConstant(Imm, dl, MVT::i32),
2568 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
2569 return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
2573 if (Range->second == 0) {
2574 // 1. Mask includes the LSB -> Simply shift the top N bits off
2575 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
2576 ReplaceNode(And.getNode(), NewN);
2577 } else if (Range->first == 31) {
2578 // 2. Mask includes the MSB -> Simply shift the bottom N bits off
2579 NewN = EmitShift(ARM::tLSRri, X, Range->second);
2580 ReplaceNode(And.getNode(), NewN);
2581 } else if (Range->first == Range->second) {
2582 // 3. Only one bit is set. We can shift this into the sign bit and use a
2583 // PL/MI comparison.
2584 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
2585 ReplaceNode(And.getNode(), NewN);
2587 SwitchEQNEToPLMI = true;
2588 } else if (!Subtarget->hasV6T2Ops()) {
2589 // 4. Do a double shift to clear bottom and top bits, but only in
2590 // thumb-1 mode as in thumb-2 we can use UBFX.
2591 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
2592 NewN = EmitShift(ARM::tLSRri, SDValue(NewN, 0),
2593 Range->second + (31 - Range->first));
2594 ReplaceNode(And.getNode(), NewN);
2599 void ARMDAGToDAGISel::Select(SDNode *N) {
2600 SDLoc dl(N);
2602 if (N->isMachineOpcode()) {
2603 N->setNodeId(-1);
2604 return; // Already selected.
2607 switch (N->getOpcode()) {
2608 default: break;
2609 case ISD::WRITE_REGISTER:
2610 if (tryWriteRegister(N))
2611 return;
2612 break;
2613 case ISD::READ_REGISTER:
2614 if (tryReadRegister(N))
2615 return;
2616 break;
2617 case ISD::INLINEASM:
2618 case ISD::INLINEASM_BR:
2619 if (tryInlineAsm(N))
2620 return;
2621 break;
2622 case ISD::XOR:
2623 // Select special operations if XOR node forms integer ABS pattern
2624 if (tryABSOp(N))
2625 return;
2626 // Other cases are autogenerated.
2627 break;
2628 case ISD::Constant: {
2629 unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
2630 // If we can't materialize the constant we need to use a literal pool
2631 if (ConstantMaterializationCost(Val) > 2) {
2632 SDValue CPIdx = CurDAG->getTargetConstantPool(
2633 ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val),
2634 TLI->getPointerTy(CurDAG->getDataLayout()));
2636 SDNode *ResNode;
2637 if (Subtarget->isThumb()) {
2638 SDValue Ops[] = {
2639 CPIdx,
2640 getAL(CurDAG, dl),
2641 CurDAG->getRegister(0, MVT::i32),
2642 CurDAG->getEntryNode()
2644 ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other,
2645 Ops);
2646 } else {
2647 SDValue Ops[] = {
2648 CPIdx,
2649 CurDAG->getTargetConstant(0, dl, MVT::i32),
2650 getAL(CurDAG, dl),
2651 CurDAG->getRegister(0, MVT::i32),
2652 CurDAG->getEntryNode()
2654 ResNode = CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other,
2655 Ops);
2657 // Annotate the Node with memory operand information so that MachineInstr
2658 // queries work properly. This e.g. gives the register allocation the
2659 // required information for rematerialization.
2660 MachineFunction& MF = CurDAG->getMachineFunction();
2661 MachineMemOperand *MemOp =
2662 MF.getMachineMemOperand(MachinePointerInfo::getConstantPool(MF),
2663 MachineMemOperand::MOLoad, 4, 4);
2665 CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp});
2667 ReplaceNode(N, ResNode);
2668 return;
2671 // Other cases are autogenerated.
2672 break;
2674 case ISD::FrameIndex: {
2675 // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
2676 int FI = cast<FrameIndexSDNode>(N)->getIndex();
2677 SDValue TFI = CurDAG->getTargetFrameIndex(
2678 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
2679 if (Subtarget->isThumb1Only()) {
2680 // Set the alignment of the frame object to 4, to avoid having to generate
2681 // more than one ADD
2682 MachineFrameInfo &MFI = MF->getFrameInfo();
2683 if (MFI.getObjectAlignment(FI) < 4)
2684 MFI.setObjectAlignment(FI, 4);
2685 CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI,
2686 CurDAG->getTargetConstant(0, dl, MVT::i32));
2687 return;
2688 } else {
2689 unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ?
2690 ARM::t2ADDri : ARM::ADDri);
2691 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, dl, MVT::i32),
2692 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
2693 CurDAG->getRegister(0, MVT::i32) };
2694 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2695 return;
2698 case ISD::SRL:
2699 if (tryV6T2BitfieldExtractOp(N, false))
2700 return;
2701 break;
2702 case ISD::SIGN_EXTEND_INREG:
2703 case ISD::SRA:
2704 if (tryV6T2BitfieldExtractOp(N, true))
2705 return;
2706 break;
2707 case ISD::MUL:
2708 if (Subtarget->isThumb1Only())
2709 break;
2710 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
2711 unsigned RHSV = C->getZExtValue();
2712 if (!RHSV) break;
2713 if (isPowerOf2_32(RHSV-1)) { // 2^n+1?
2714 unsigned ShImm = Log2_32(RHSV-1);
2715 if (ShImm >= 32)
2716 break;
2717 SDValue V = N->getOperand(0);
2718 ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
2719 SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
2720 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2721 if (Subtarget->isThumb()) {
2722 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
2723 CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops);
2724 return;
2725 } else {
2726 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
2727 Reg0 };
2728 CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops);
2729 return;
2732 if (isPowerOf2_32(RHSV+1)) { // 2^n-1?
2733 unsigned ShImm = Log2_32(RHSV+1);
2734 if (ShImm >= 32)
2735 break;
2736 SDValue V = N->getOperand(0);
2737 ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
2738 SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
2739 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2740 if (Subtarget->isThumb()) {
2741 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
2742 CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops);
2743 return;
2744 } else {
2745 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
2746 Reg0 };
2747 CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops);
2748 return;
2752 break;
2753 case ISD::AND: {
2754 // Check for unsigned bitfield extract
2755 if (tryV6T2BitfieldExtractOp(N, false))
2756 return;
2758 // If an immediate is used in an AND node, it is possible that the immediate
2759 // can be more optimally materialized when negated. If this is the case we
2760 // can negate the immediate and use a BIC instead.
2761 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
2762 if (N1C && N1C->hasOneUse() && Subtarget->isThumb()) {
2763 uint32_t Imm = (uint32_t) N1C->getZExtValue();
2765 // In Thumb2 mode, an AND can take a 12-bit immediate. If this
2766 // immediate can be negated and fit in the immediate operand of
2767 // a t2BIC, don't do any manual transform here as this can be
2768 // handled by the generic ISel machinery.
2769 bool PreferImmediateEncoding =
2770 Subtarget->hasThumb2() && (is_t2_so_imm(Imm) || is_t2_so_imm_not(Imm));
2771 if (!PreferImmediateEncoding &&
2772 ConstantMaterializationCost(Imm) >
2773 ConstantMaterializationCost(~Imm)) {
2774 // The current immediate costs more to materialize than a negated
2775 // immediate, so negate the immediate and use a BIC.
2776 SDValue NewImm =
2777 CurDAG->getConstant(~N1C->getZExtValue(), dl, MVT::i32);
2778 // If the new constant didn't exist before, reposition it in the topological
2779 // ordering so it is just before N. Otherwise, don't touch its location.
2780 if (NewImm->getNodeId() == -1)
2781 CurDAG->RepositionNode(N->getIterator(), NewImm.getNode());
2783 if (!Subtarget->hasThumb2()) {
2784 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32),
2785 N->getOperand(0), NewImm, getAL(CurDAG, dl),
2786 CurDAG->getRegister(0, MVT::i32)};
2787 ReplaceNode(N, CurDAG->getMachineNode(ARM::tBIC, dl, MVT::i32, Ops));
2788 return;
2789 } else {
2790 SDValue Ops[] = {N->getOperand(0), NewImm, getAL(CurDAG, dl),
2791 CurDAG->getRegister(0, MVT::i32),
2792 CurDAG->getRegister(0, MVT::i32)};
2793 ReplaceNode(N,
2794 CurDAG->getMachineNode(ARM::t2BICrr, dl, MVT::i32, Ops));
2795 return;
2800 // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits
2801 // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits
2802 // are entirely contributed by c2 and lower 16-bits are entirely contributed
2803 // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)).
2804 // Select it to: "movt x, ((c1 & 0xffff) >> 16)
2805 EVT VT = N->getValueType(0);
2806 if (VT != MVT::i32)
2807 break;
2808 unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2())
2809 ? ARM::t2MOVTi16
2810 : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0);
2811 if (!Opc)
2812 break;
2813 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
2814 N1C = dyn_cast<ConstantSDNode>(N1);
2815 if (!N1C)
2816 break;
2817 if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) {
2818 SDValue N2 = N0.getOperand(1);
2819 ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
2820 if (!N2C)
2821 break;
2822 unsigned N1CVal = N1C->getZExtValue();
2823 unsigned N2CVal = N2C->getZExtValue();
2824 if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) &&
2825 (N1CVal & 0xffffU) == 0xffffU &&
2826 (N2CVal & 0xffffU) == 0x0U) {
2827 SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16,
2828 dl, MVT::i32);
2829 SDValue Ops[] = { N0.getOperand(0), Imm16,
2830 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
2831 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
2832 return;
2836 break;
2838 case ARMISD::UMAAL: {
2839 unsigned Opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL;
2840 SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
2841 N->getOperand(2), N->getOperand(3),
2842 getAL(CurDAG, dl),
2843 CurDAG->getRegister(0, MVT::i32) };
2844 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::i32, Ops));
2845 return;
2847 case ARMISD::UMLAL:{
2848 if (Subtarget->isThumb()) {
2849 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2850 N->getOperand(3), getAL(CurDAG, dl),
2851 CurDAG->getRegister(0, MVT::i32)};
2852 ReplaceNode(
2853 N, CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops));
2854 return;
2855 }else{
2856 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2857 N->getOperand(3), getAL(CurDAG, dl),
2858 CurDAG->getRegister(0, MVT::i32),
2859 CurDAG->getRegister(0, MVT::i32) };
2860 ReplaceNode(N, CurDAG->getMachineNode(
2861 Subtarget->hasV6Ops() ? ARM::UMLAL : ARM::UMLALv5, dl,
2862 MVT::i32, MVT::i32, Ops));
2863 return;
2866 case ARMISD::SMLAL:{
2867 if (Subtarget->isThumb()) {
2868 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2869 N->getOperand(3), getAL(CurDAG, dl),
2870 CurDAG->getRegister(0, MVT::i32)};
2871 ReplaceNode(
2872 N, CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops));
2873 return;
2874 }else{
2875 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2876 N->getOperand(3), getAL(CurDAG, dl),
2877 CurDAG->getRegister(0, MVT::i32),
2878 CurDAG->getRegister(0, MVT::i32) };
2879 ReplaceNode(N, CurDAG->getMachineNode(
2880 Subtarget->hasV6Ops() ? ARM::SMLAL : ARM::SMLALv5, dl,
2881 MVT::i32, MVT::i32, Ops));
2882 return;
2885 case ARMISD::SUBE: {
2886 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
2887 break;
2888 // Look for a pattern to match SMMLS
2889 // (sube a, (smul_loHi a, b), (subc 0, (smul_LOhi(a, b))))
2890 if (N->getOperand(1).getOpcode() != ISD::SMUL_LOHI ||
2891 N->getOperand(2).getOpcode() != ARMISD::SUBC ||
2892 !SDValue(N, 1).use_empty())
2893 break;
2895 if (Subtarget->isThumb())
2896 assert(Subtarget->hasThumb2() &&
2897 "This pattern should not be generated for Thumb");
2899 SDValue SmulLoHi = N->getOperand(1);
2900 SDValue Subc = N->getOperand(2);
2901 auto *Zero = dyn_cast<ConstantSDNode>(Subc.getOperand(0));
2903 if (!Zero || Zero->getZExtValue() != 0 ||
2904 Subc.getOperand(1) != SmulLoHi.getValue(0) ||
2905 N->getOperand(1) != SmulLoHi.getValue(1) ||
2906 N->getOperand(2) != Subc.getValue(1))
2907 break;
2909 unsigned Opc = Subtarget->isThumb2() ? ARM::t2SMMLS : ARM::SMMLS;
2910 SDValue Ops[] = { SmulLoHi.getOperand(0), SmulLoHi.getOperand(1),
2911 N->getOperand(0), getAL(CurDAG, dl),
2912 CurDAG->getRegister(0, MVT::i32) };
2913 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops));
2914 return;
2916 case ISD::LOAD: {
2917 if (Subtarget->isThumb() && Subtarget->hasThumb2()) {
2918 if (tryT2IndexedLoad(N))
2919 return;
2920 } else if (Subtarget->isThumb()) {
2921 if (tryT1IndexedLoad(N))
2922 return;
2923 } else if (tryARMIndexedLoad(N))
2924 return;
2925 // Other cases are autogenerated.
2926 break;
2928 case ARMISD::BRCOND: {
2929 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
2930 // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)
2931 // Pattern complexity = 6 cost = 1 size = 0
2933 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
2934 // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc)
2935 // Pattern complexity = 6 cost = 1 size = 0
2937 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
2938 // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc)
2939 // Pattern complexity = 6 cost = 1 size = 0
2941 unsigned Opc = Subtarget->isThumb() ?
2942 ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc;
2943 SDValue Chain = N->getOperand(0);
2944 SDValue N1 = N->getOperand(1);
2945 SDValue N2 = N->getOperand(2);
2946 SDValue N3 = N->getOperand(3);
2947 SDValue InFlag = N->getOperand(4);
2948 assert(N1.getOpcode() == ISD::BasicBlock);
2949 assert(N2.getOpcode() == ISD::Constant);
2950 assert(N3.getOpcode() == ISD::Register);
2952 unsigned CC = (unsigned) cast<ConstantSDNode>(N2)->getZExtValue();
2954 if (InFlag.getOpcode() == ARMISD::CMPZ) {
2955 bool SwitchEQNEToPLMI;
2956 SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI);
2957 InFlag = N->getOperand(4);
2959 if (SwitchEQNEToPLMI) {
2960 switch ((ARMCC::CondCodes)CC) {
2961 default: llvm_unreachable("CMPZ must be either NE or EQ!");
2962 case ARMCC::NE:
2963 CC = (unsigned)ARMCC::MI;
2964 break;
2965 case ARMCC::EQ:
2966 CC = (unsigned)ARMCC::PL;
2967 break;
2972 SDValue Tmp2 = CurDAG->getTargetConstant(CC, dl, MVT::i32);
2973 SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag };
2974 SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
2975 MVT::Glue, Ops);
2976 Chain = SDValue(ResNode, 0);
2977 if (N->getNumValues() == 2) {
2978 InFlag = SDValue(ResNode, 1);
2979 ReplaceUses(SDValue(N, 1), InFlag);
2981 ReplaceUses(SDValue(N, 0),
2982 SDValue(Chain.getNode(), Chain.getResNo()));
2983 CurDAG->RemoveDeadNode(N);
2984 return;
2987 case ARMISD::CMPZ: {
2988 // select (CMPZ X, #-C) -> (CMPZ (ADDS X, #C), #0)
2989 // This allows us to avoid materializing the expensive negative constant.
2990 // The CMPZ #0 is useless and will be peepholed away but we need to keep it
2991 // for its glue output.
2992 SDValue X = N->getOperand(0);
2993 auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1).getNode());
2994 if (C && C->getSExtValue() < 0 && Subtarget->isThumb()) {
2995 int64_t Addend = -C->getSExtValue();
2997 SDNode *Add = nullptr;
2998 // ADDS can be better than CMN if the immediate fits in a
2999 // 16-bit ADDS, which means either [0,256) for tADDi8 or [0,8) for tADDi3.
3000 // Outside that range we can just use a CMN which is 32-bit but has a
3001 // 12-bit immediate range.
3002 if (Addend < 1<<8) {
3003 if (Subtarget->isThumb2()) {
3004 SDValue Ops[] = { X, CurDAG->getTargetConstant(Addend, dl, MVT::i32),
3005 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
3006 CurDAG->getRegister(0, MVT::i32) };
3007 Add = CurDAG->getMachineNode(ARM::t2ADDri, dl, MVT::i32, Ops);
3008 } else {
3009 unsigned Opc = (Addend < 1<<3) ? ARM::tADDi3 : ARM::tADDi8;
3010 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X,
3011 CurDAG->getTargetConstant(Addend, dl, MVT::i32),
3012 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
3013 Add = CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
3016 if (Add) {
3017 SDValue Ops2[] = {SDValue(Add, 0), CurDAG->getConstant(0, dl, MVT::i32)};
3018 CurDAG->MorphNodeTo(N, ARMISD::CMPZ, CurDAG->getVTList(MVT::Glue), Ops2);
3021 // Other cases are autogenerated.
3022 break;
3025 case ARMISD::CMOV: {
3026 SDValue InFlag = N->getOperand(4);
3028 if (InFlag.getOpcode() == ARMISD::CMPZ) {
3029 bool SwitchEQNEToPLMI;
3030 SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI);
3032 if (SwitchEQNEToPLMI) {
3033 SDValue ARMcc = N->getOperand(2);
3034 ARMCC::CondCodes CC =
3035 (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue();
3037 switch (CC) {
3038 default: llvm_unreachable("CMPZ must be either NE or EQ!");
3039 case ARMCC::NE:
3040 CC = ARMCC::MI;
3041 break;
3042 case ARMCC::EQ:
3043 CC = ARMCC::PL;
3044 break;
3046 SDValue NewARMcc = CurDAG->getConstant((unsigned)CC, dl, MVT::i32);
3047 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), NewARMcc,
3048 N->getOperand(3), N->getOperand(4)};
3049 CurDAG->MorphNodeTo(N, ARMISD::CMOV, N->getVTList(), Ops);
3053 // Other cases are autogenerated.
3054 break;
3057 case ARMISD::VZIP: {
3058 unsigned Opc = 0;
3059 EVT VT = N->getValueType(0);
3060 switch (VT.getSimpleVT().SimpleTy) {
3061 default: return;
3062 case MVT::v8i8: Opc = ARM::VZIPd8; break;
3063 case MVT::v4f16:
3064 case MVT::v4i16: Opc = ARM::VZIPd16; break;
3065 case MVT::v2f32:
3066 // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
3067 case MVT::v2i32: Opc = ARM::VTRNd32; break;
3068 case MVT::v16i8: Opc = ARM::VZIPq8; break;
3069 case MVT::v8f16:
3070 case MVT::v8i16: Opc = ARM::VZIPq16; break;
3071 case MVT::v4f32:
3072 case MVT::v4i32: Opc = ARM::VZIPq32; break;
3074 SDValue Pred = getAL(CurDAG, dl);
3075 SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3076 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3077 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3078 return;
3080 case ARMISD::VUZP: {
3081 unsigned Opc = 0;
3082 EVT VT = N->getValueType(0);
3083 switch (VT.getSimpleVT().SimpleTy) {
3084 default: return;
3085 case MVT::v8i8: Opc = ARM::VUZPd8; break;
3086 case MVT::v4f16:
3087 case MVT::v4i16: Opc = ARM::VUZPd16; break;
3088 case MVT::v2f32:
3089 // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
3090 case MVT::v2i32: Opc = ARM::VTRNd32; break;
3091 case MVT::v16i8: Opc = ARM::VUZPq8; break;
3092 case MVT::v8f16:
3093 case MVT::v8i16: Opc = ARM::VUZPq16; break;
3094 case MVT::v4f32:
3095 case MVT::v4i32: Opc = ARM::VUZPq32; break;
3097 SDValue Pred = getAL(CurDAG, dl);
3098 SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3099 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3100 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3101 return;
3103 case ARMISD::VTRN: {
3104 unsigned Opc = 0;
3105 EVT VT = N->getValueType(0);
3106 switch (VT.getSimpleVT().SimpleTy) {
3107 default: return;
3108 case MVT::v8i8: Opc = ARM::VTRNd8; break;
3109 case MVT::v4f16:
3110 case MVT::v4i16: Opc = ARM::VTRNd16; break;
3111 case MVT::v2f32:
3112 case MVT::v2i32: Opc = ARM::VTRNd32; break;
3113 case MVT::v16i8: Opc = ARM::VTRNq8; break;
3114 case MVT::v8f16:
3115 case MVT::v8i16: Opc = ARM::VTRNq16; break;
3116 case MVT::v4f32:
3117 case MVT::v4i32: Opc = ARM::VTRNq32; break;
3119 SDValue Pred = getAL(CurDAG, dl);
3120 SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3121 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3122 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3123 return;
3125 case ARMISD::BUILD_VECTOR: {
3126 EVT VecVT = N->getValueType(0);
3127 EVT EltVT = VecVT.getVectorElementType();
3128 unsigned NumElts = VecVT.getVectorNumElements();
3129 if (EltVT == MVT::f64) {
3130 assert(NumElts == 2 && "unexpected type for BUILD_VECTOR");
3131 ReplaceNode(
3132 N, createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
3133 return;
3135 assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR");
3136 if (NumElts == 2) {
3137 ReplaceNode(
3138 N, createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
3139 return;
3141 assert(NumElts == 4 && "unexpected type for BUILD_VECTOR");
3142 ReplaceNode(N,
3143 createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1),
3144 N->getOperand(2), N->getOperand(3)));
3145 return;
3148 case ARMISD::VLD1DUP: {
3149 static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8, ARM::VLD1DUPd16,
3150 ARM::VLD1DUPd32 };
3151 static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8, ARM::VLD1DUPq16,
3152 ARM::VLD1DUPq32 };
3153 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 1, DOpcodes, QOpcodes);
3154 return;
3157 case ARMISD::VLD2DUP: {
3158 static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
3159 ARM::VLD2DUPd32 };
3160 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 2, Opcodes);
3161 return;
3164 case ARMISD::VLD3DUP: {
3165 static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo,
3166 ARM::VLD3DUPd16Pseudo,
3167 ARM::VLD3DUPd32Pseudo };
3168 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 3, Opcodes);
3169 return;
3172 case ARMISD::VLD4DUP: {
3173 static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo,
3174 ARM::VLD4DUPd16Pseudo,
3175 ARM::VLD4DUPd32Pseudo };
3176 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 4, Opcodes);
3177 return;
3180 case ARMISD::VLD1DUP_UPD: {
3181 static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8wb_fixed,
3182 ARM::VLD1DUPd16wb_fixed,
3183 ARM::VLD1DUPd32wb_fixed };
3184 static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8wb_fixed,
3185 ARM::VLD1DUPq16wb_fixed,
3186 ARM::VLD1DUPq32wb_fixed };
3187 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 1, DOpcodes, QOpcodes);
3188 return;
3191 case ARMISD::VLD2DUP_UPD: {
3192 static const uint16_t Opcodes[] = { ARM::VLD2DUPd8wb_fixed,
3193 ARM::VLD2DUPd16wb_fixed,
3194 ARM::VLD2DUPd32wb_fixed };
3195 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 2, Opcodes);
3196 return;
3199 case ARMISD::VLD3DUP_UPD: {
3200 static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo_UPD,
3201 ARM::VLD3DUPd16Pseudo_UPD,
3202 ARM::VLD3DUPd32Pseudo_UPD };
3203 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 3, Opcodes);
3204 return;
3207 case ARMISD::VLD4DUP_UPD: {
3208 static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo_UPD,
3209 ARM::VLD4DUPd16Pseudo_UPD,
3210 ARM::VLD4DUPd32Pseudo_UPD };
3211 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 4, Opcodes);
3212 return;
3215 case ARMISD::VLD1_UPD: {
3216 static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed,
3217 ARM::VLD1d16wb_fixed,
3218 ARM::VLD1d32wb_fixed,
3219 ARM::VLD1d64wb_fixed };
3220 static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed,
3221 ARM::VLD1q16wb_fixed,
3222 ARM::VLD1q32wb_fixed,
3223 ARM::VLD1q64wb_fixed };
3224 SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr);
3225 return;
3228 case ARMISD::VLD2_UPD: {
3229 static const uint16_t DOpcodes[] = { ARM::VLD2d8wb_fixed,
3230 ARM::VLD2d16wb_fixed,
3231 ARM::VLD2d32wb_fixed,
3232 ARM::VLD1q64wb_fixed};
3233 static const uint16_t QOpcodes[] = { ARM::VLD2q8PseudoWB_fixed,
3234 ARM::VLD2q16PseudoWB_fixed,
3235 ARM::VLD2q32PseudoWB_fixed };
3236 SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr);
3237 return;
3240 case ARMISD::VLD3_UPD: {
3241 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD,
3242 ARM::VLD3d16Pseudo_UPD,
3243 ARM::VLD3d32Pseudo_UPD,
3244 ARM::VLD1d64TPseudoWB_fixed};
3245 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
3246 ARM::VLD3q16Pseudo_UPD,
3247 ARM::VLD3q32Pseudo_UPD };
3248 static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
3249 ARM::VLD3q16oddPseudo_UPD,
3250 ARM::VLD3q32oddPseudo_UPD };
3251 SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
3252 return;
3255 case ARMISD::VLD4_UPD: {
3256 static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo_UPD,
3257 ARM::VLD4d16Pseudo_UPD,
3258 ARM::VLD4d32Pseudo_UPD,
3259 ARM::VLD1d64QPseudoWB_fixed};
3260 static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
3261 ARM::VLD4q16Pseudo_UPD,
3262 ARM::VLD4q32Pseudo_UPD };
3263 static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo_UPD,
3264 ARM::VLD4q16oddPseudo_UPD,
3265 ARM::VLD4q32oddPseudo_UPD };
3266 SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
3267 return;
3270 case ARMISD::VLD2LN_UPD: {
3271 static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD,
3272 ARM::VLD2LNd16Pseudo_UPD,
3273 ARM::VLD2LNd32Pseudo_UPD };
3274 static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD,
3275 ARM::VLD2LNq32Pseudo_UPD };
3276 SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes);
3277 return;
3280 case ARMISD::VLD3LN_UPD: {
3281 static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD,
3282 ARM::VLD3LNd16Pseudo_UPD,
3283 ARM::VLD3LNd32Pseudo_UPD };
3284 static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD,
3285 ARM::VLD3LNq32Pseudo_UPD };
3286 SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes);
3287 return;
3290 case ARMISD::VLD4LN_UPD: {
3291 static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD,
3292 ARM::VLD4LNd16Pseudo_UPD,
3293 ARM::VLD4LNd32Pseudo_UPD };
3294 static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD,
3295 ARM::VLD4LNq32Pseudo_UPD };
3296 SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes);
3297 return;
3300 case ARMISD::VST1_UPD: {
3301 static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed,
3302 ARM::VST1d16wb_fixed,
3303 ARM::VST1d32wb_fixed,
3304 ARM::VST1d64wb_fixed };
3305 static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed,
3306 ARM::VST1q16wb_fixed,
3307 ARM::VST1q32wb_fixed,
3308 ARM::VST1q64wb_fixed };
3309 SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr);
3310 return;
3313 case ARMISD::VST2_UPD: {
3314 static const uint16_t DOpcodes[] = { ARM::VST2d8wb_fixed,
3315 ARM::VST2d16wb_fixed,
3316 ARM::VST2d32wb_fixed,
3317 ARM::VST1q64wb_fixed};
3318 static const uint16_t QOpcodes[] = { ARM::VST2q8PseudoWB_fixed,
3319 ARM::VST2q16PseudoWB_fixed,
3320 ARM::VST2q32PseudoWB_fixed };
3321 SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr);
3322 return;
3325 case ARMISD::VST3_UPD: {
3326 static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD,
3327 ARM::VST3d16Pseudo_UPD,
3328 ARM::VST3d32Pseudo_UPD,
3329 ARM::VST1d64TPseudoWB_fixed};
3330 static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
3331 ARM::VST3q16Pseudo_UPD,
3332 ARM::VST3q32Pseudo_UPD };
3333 static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
3334 ARM::VST3q16oddPseudo_UPD,
3335 ARM::VST3q32oddPseudo_UPD };
3336 SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
3337 return;
3340 case ARMISD::VST4_UPD: {
3341 static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo_UPD,
3342 ARM::VST4d16Pseudo_UPD,
3343 ARM::VST4d32Pseudo_UPD,
3344 ARM::VST1d64QPseudoWB_fixed};
3345 static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
3346 ARM::VST4q16Pseudo_UPD,
3347 ARM::VST4q32Pseudo_UPD };
3348 static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD,
3349 ARM::VST4q16oddPseudo_UPD,
3350 ARM::VST4q32oddPseudo_UPD };
3351 SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
3352 return;
3355 case ARMISD::VST2LN_UPD: {
3356 static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD,
3357 ARM::VST2LNd16Pseudo_UPD,
3358 ARM::VST2LNd32Pseudo_UPD };
3359 static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD,
3360 ARM::VST2LNq32Pseudo_UPD };
3361 SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes);
3362 return;
3365 case ARMISD::VST3LN_UPD: {
3366 static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD,
3367 ARM::VST3LNd16Pseudo_UPD,
3368 ARM::VST3LNd32Pseudo_UPD };
3369 static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD,
3370 ARM::VST3LNq32Pseudo_UPD };
3371 SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes);
3372 return;
3375 case ARMISD::VST4LN_UPD: {
3376 static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD,
3377 ARM::VST4LNd16Pseudo_UPD,
3378 ARM::VST4LNd32Pseudo_UPD };
3379 static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD,
3380 ARM::VST4LNq32Pseudo_UPD };
3381 SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes);
3382 return;
3385 case ISD::INTRINSIC_VOID:
3386 case ISD::INTRINSIC_W_CHAIN: {
3387 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
3388 switch (IntNo) {
3389 default:
3390 break;
3392 case Intrinsic::arm_mrrc:
3393 case Intrinsic::arm_mrrc2: {
3394 SDLoc dl(N);
3395 SDValue Chain = N->getOperand(0);
3396 unsigned Opc;
3398 if (Subtarget->isThumb())
3399 Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::t2MRRC : ARM::t2MRRC2);
3400 else
3401 Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::MRRC : ARM::MRRC2);
3403 SmallVector<SDValue, 5> Ops;
3404 Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(), dl)); /* coproc */
3405 Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(), dl)); /* opc */
3406 Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(4))->getZExtValue(), dl)); /* CRm */
3408 // The mrrc2 instruction in ARM doesn't allow predicates, the top 4 bits of the encoded
3409 // instruction will always be '1111' but it is possible in assembly language to specify
3410 // AL as a predicate to mrrc2 but it doesn't make any difference to the encoded instruction.
3411 if (Opc != ARM::MRRC2) {
3412 Ops.push_back(getAL(CurDAG, dl));
3413 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3416 Ops.push_back(Chain);
3418 // Writes to two registers.
3419 const EVT RetType[] = {MVT::i32, MVT::i32, MVT::Other};
3421 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, RetType, Ops));
3422 return;
3424 case Intrinsic::arm_ldaexd:
3425 case Intrinsic::arm_ldrexd: {
3426 SDLoc dl(N);
3427 SDValue Chain = N->getOperand(0);
3428 SDValue MemAddr = N->getOperand(2);
3429 bool isThumb = Subtarget->isThumb() && Subtarget->hasV8MBaselineOps();
3431 bool IsAcquire = IntNo == Intrinsic::arm_ldaexd;
3432 unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD)
3433 : (IsAcquire ? ARM::LDAEXD : ARM::LDREXD);
3435 // arm_ldrexd returns a i64 value in {i32, i32}
3436 std::vector<EVT> ResTys;
3437 if (isThumb) {
3438 ResTys.push_back(MVT::i32);
3439 ResTys.push_back(MVT::i32);
3440 } else
3441 ResTys.push_back(MVT::Untyped);
3442 ResTys.push_back(MVT::Other);
3444 // Place arguments in the right order.
3445 SDValue Ops[] = {MemAddr, getAL(CurDAG, dl),
3446 CurDAG->getRegister(0, MVT::i32), Chain};
3447 SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
3448 // Transfer memoperands.
3449 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3450 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
3452 // Remap uses.
3453 SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1);
3454 if (!SDValue(N, 0).use_empty()) {
3455 SDValue Result;
3456 if (isThumb)
3457 Result = SDValue(Ld, 0);
3458 else {
3459 SDValue SubRegIdx =
3460 CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
3461 SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
3462 dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
3463 Result = SDValue(ResNode,0);
3465 ReplaceUses(SDValue(N, 0), Result);
3467 if (!SDValue(N, 1).use_empty()) {
3468 SDValue Result;
3469 if (isThumb)
3470 Result = SDValue(Ld, 1);
3471 else {
3472 SDValue SubRegIdx =
3473 CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
3474 SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
3475 dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
3476 Result = SDValue(ResNode,0);
3478 ReplaceUses(SDValue(N, 1), Result);
3480 ReplaceUses(SDValue(N, 2), OutChain);
3481 CurDAG->RemoveDeadNode(N);
3482 return;
3484 case Intrinsic::arm_stlexd:
3485 case Intrinsic::arm_strexd: {
3486 SDLoc dl(N);
3487 SDValue Chain = N->getOperand(0);
3488 SDValue Val0 = N->getOperand(2);
3489 SDValue Val1 = N->getOperand(3);
3490 SDValue MemAddr = N->getOperand(4);
3492 // Store exclusive double return a i32 value which is the return status
3493 // of the issued store.
3494 const EVT ResTys[] = {MVT::i32, MVT::Other};
3496 bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
3497 // Place arguments in the right order.
3498 SmallVector<SDValue, 7> Ops;
3499 if (isThumb) {
3500 Ops.push_back(Val0);
3501 Ops.push_back(Val1);
3502 } else
3503 // arm_strexd uses GPRPair.
3504 Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0));
3505 Ops.push_back(MemAddr);
3506 Ops.push_back(getAL(CurDAG, dl));
3507 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3508 Ops.push_back(Chain);
3510 bool IsRelease = IntNo == Intrinsic::arm_stlexd;
3511 unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD)
3512 : (IsRelease ? ARM::STLEXD : ARM::STREXD);
3514 SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
3515 // Transfer memoperands.
3516 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3517 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
3519 ReplaceNode(N, St);
3520 return;
3523 case Intrinsic::arm_neon_vld1: {
3524 static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
3525 ARM::VLD1d32, ARM::VLD1d64 };
3526 static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
3527 ARM::VLD1q32, ARM::VLD1q64};
3528 SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr);
3529 return;
3532 case Intrinsic::arm_neon_vld1x2: {
3533 static const uint16_t DOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
3534 ARM::VLD1q32, ARM::VLD1q64 };
3535 static const uint16_t QOpcodes[] = { ARM::VLD1d8QPseudo,
3536 ARM::VLD1d16QPseudo,
3537 ARM::VLD1d32QPseudo,
3538 ARM::VLD1d64QPseudo };
3539 SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
3540 return;
3543 case Intrinsic::arm_neon_vld1x3: {
3544 static const uint16_t DOpcodes[] = { ARM::VLD1d8TPseudo,
3545 ARM::VLD1d16TPseudo,
3546 ARM::VLD1d32TPseudo,
3547 ARM::VLD1d64TPseudo };
3548 static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowTPseudo_UPD,
3549 ARM::VLD1q16LowTPseudo_UPD,
3550 ARM::VLD1q32LowTPseudo_UPD,
3551 ARM::VLD1q64LowTPseudo_UPD };
3552 static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighTPseudo,
3553 ARM::VLD1q16HighTPseudo,
3554 ARM::VLD1q32HighTPseudo,
3555 ARM::VLD1q64HighTPseudo };
3556 SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3557 return;
3560 case Intrinsic::arm_neon_vld1x4: {
3561 static const uint16_t DOpcodes[] = { ARM::VLD1d8QPseudo,
3562 ARM::VLD1d16QPseudo,
3563 ARM::VLD1d32QPseudo,
3564 ARM::VLD1d64QPseudo };
3565 static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowQPseudo_UPD,
3566 ARM::VLD1q16LowQPseudo_UPD,
3567 ARM::VLD1q32LowQPseudo_UPD,
3568 ARM::VLD1q64LowQPseudo_UPD };
3569 static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighQPseudo,
3570 ARM::VLD1q16HighQPseudo,
3571 ARM::VLD1q32HighQPseudo,
3572 ARM::VLD1q64HighQPseudo };
3573 SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3574 return;
3577 case Intrinsic::arm_neon_vld2: {
3578 static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
3579 ARM::VLD2d32, ARM::VLD1q64 };
3580 static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
3581 ARM::VLD2q32Pseudo };
3582 SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
3583 return;
3586 case Intrinsic::arm_neon_vld3: {
3587 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo,
3588 ARM::VLD3d16Pseudo,
3589 ARM::VLD3d32Pseudo,
3590 ARM::VLD1d64TPseudo };
3591 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
3592 ARM::VLD3q16Pseudo_UPD,
3593 ARM::VLD3q32Pseudo_UPD };
3594 static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo,
3595 ARM::VLD3q16oddPseudo,
3596 ARM::VLD3q32oddPseudo };
3597 SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3598 return;
3601 case Intrinsic::arm_neon_vld4: {
3602 static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo,
3603 ARM::VLD4d16Pseudo,
3604 ARM::VLD4d32Pseudo,
3605 ARM::VLD1d64QPseudo };
3606 static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
3607 ARM::VLD4q16Pseudo_UPD,
3608 ARM::VLD4q32Pseudo_UPD };
3609 static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo,
3610 ARM::VLD4q16oddPseudo,
3611 ARM::VLD4q32oddPseudo };
3612 SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3613 return;
3616 case Intrinsic::arm_neon_vld2dup: {
3617 static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
3618 ARM::VLD2DUPd32, ARM::VLD1q64 };
3619 static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo,
3620 ARM::VLD2DUPq16EvenPseudo,
3621 ARM::VLD2DUPq32EvenPseudo };
3622 static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudo,
3623 ARM::VLD2DUPq16OddPseudo,
3624 ARM::VLD2DUPq32OddPseudo };
3625 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 2,
3626 DOpcodes, QOpcodes0, QOpcodes1);
3627 return;
3630 case Intrinsic::arm_neon_vld3dup: {
3631 static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo,
3632 ARM::VLD3DUPd16Pseudo,
3633 ARM::VLD3DUPd32Pseudo,
3634 ARM::VLD1d64TPseudo };
3635 static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo,
3636 ARM::VLD3DUPq16EvenPseudo,
3637 ARM::VLD3DUPq32EvenPseudo };
3638 static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo,
3639 ARM::VLD3DUPq16OddPseudo,
3640 ARM::VLD3DUPq32OddPseudo };
3641 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 3,
3642 DOpcodes, QOpcodes0, QOpcodes1);
3643 return;
3646 case Intrinsic::arm_neon_vld4dup: {
3647 static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo,
3648 ARM::VLD4DUPd16Pseudo,
3649 ARM::VLD4DUPd32Pseudo,
3650 ARM::VLD1d64QPseudo };
3651 static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo,
3652 ARM::VLD4DUPq16EvenPseudo,
3653 ARM::VLD4DUPq32EvenPseudo };
3654 static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo,
3655 ARM::VLD4DUPq16OddPseudo,
3656 ARM::VLD4DUPq32OddPseudo };
3657 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 4,
3658 DOpcodes, QOpcodes0, QOpcodes1);
3659 return;
3662 case Intrinsic::arm_neon_vld2lane: {
3663 static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo,
3664 ARM::VLD2LNd16Pseudo,
3665 ARM::VLD2LNd32Pseudo };
3666 static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo,
3667 ARM::VLD2LNq32Pseudo };
3668 SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes);
3669 return;
3672 case Intrinsic::arm_neon_vld3lane: {
3673 static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo,
3674 ARM::VLD3LNd16Pseudo,
3675 ARM::VLD3LNd32Pseudo };
3676 static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo,
3677 ARM::VLD3LNq32Pseudo };
3678 SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes);
3679 return;
3682 case Intrinsic::arm_neon_vld4lane: {
3683 static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo,
3684 ARM::VLD4LNd16Pseudo,
3685 ARM::VLD4LNd32Pseudo };
3686 static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo,
3687 ARM::VLD4LNq32Pseudo };
3688 SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes);
3689 return;
3692 case Intrinsic::arm_neon_vst1: {
3693 static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
3694 ARM::VST1d32, ARM::VST1d64 };
3695 static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
3696 ARM::VST1q32, ARM::VST1q64 };
3697 SelectVST(N, false, 1, DOpcodes, QOpcodes, nullptr);
3698 return;
3701 case Intrinsic::arm_neon_vst1x2: {
3702 static const uint16_t DOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
3703 ARM::VST1q32, ARM::VST1q64 };
3704 static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudo,
3705 ARM::VST1d16QPseudo,
3706 ARM::VST1d32QPseudo,
3707 ARM::VST1d64QPseudo };
3708 SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
3709 return;
3712 case Intrinsic::arm_neon_vst1x3: {
3713 static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudo,
3714 ARM::VST1d16TPseudo,
3715 ARM::VST1d32TPseudo,
3716 ARM::VST1d64TPseudo };
3717 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD,
3718 ARM::VST1q16LowTPseudo_UPD,
3719 ARM::VST1q32LowTPseudo_UPD,
3720 ARM::VST1q64LowTPseudo_UPD };
3721 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo,
3722 ARM::VST1q16HighTPseudo,
3723 ARM::VST1q32HighTPseudo,
3724 ARM::VST1q64HighTPseudo };
3725 SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3726 return;
3729 case Intrinsic::arm_neon_vst1x4: {
3730 static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudo,
3731 ARM::VST1d16QPseudo,
3732 ARM::VST1d32QPseudo,
3733 ARM::VST1d64QPseudo };
3734 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD,
3735 ARM::VST1q16LowQPseudo_UPD,
3736 ARM::VST1q32LowQPseudo_UPD,
3737 ARM::VST1q64LowQPseudo_UPD };
3738 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo,
3739 ARM::VST1q16HighQPseudo,
3740 ARM::VST1q32HighQPseudo,
3741 ARM::VST1q64HighQPseudo };
3742 SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3743 return;
3746 case Intrinsic::arm_neon_vst2: {
3747 static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
3748 ARM::VST2d32, ARM::VST1q64 };
3749 static const uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
3750 ARM::VST2q32Pseudo };
3751 SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
3752 return;
3755 case Intrinsic::arm_neon_vst3: {
3756 static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo,
3757 ARM::VST3d16Pseudo,
3758 ARM::VST3d32Pseudo,
3759 ARM::VST1d64TPseudo };
3760 static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
3761 ARM::VST3q16Pseudo_UPD,
3762 ARM::VST3q32Pseudo_UPD };
3763 static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo,
3764 ARM::VST3q16oddPseudo,
3765 ARM::VST3q32oddPseudo };
3766 SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3767 return;
3770 case Intrinsic::arm_neon_vst4: {
3771 static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo,
3772 ARM::VST4d16Pseudo,
3773 ARM::VST4d32Pseudo,
3774 ARM::VST1d64QPseudo };
3775 static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
3776 ARM::VST4q16Pseudo_UPD,
3777 ARM::VST4q32Pseudo_UPD };
3778 static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo,
3779 ARM::VST4q16oddPseudo,
3780 ARM::VST4q32oddPseudo };
3781 SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3782 return;
3785 case Intrinsic::arm_neon_vst2lane: {
3786 static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo,
3787 ARM::VST2LNd16Pseudo,
3788 ARM::VST2LNd32Pseudo };
3789 static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo,
3790 ARM::VST2LNq32Pseudo };
3791 SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes);
3792 return;
3795 case Intrinsic::arm_neon_vst3lane: {
3796 static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo,
3797 ARM::VST3LNd16Pseudo,
3798 ARM::VST3LNd32Pseudo };
3799 static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo,
3800 ARM::VST3LNq32Pseudo };
3801 SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes);
3802 return;
3805 case Intrinsic::arm_neon_vst4lane: {
3806 static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo,
3807 ARM::VST4LNd16Pseudo,
3808 ARM::VST4LNd32Pseudo };
3809 static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo,
3810 ARM::VST4LNq32Pseudo };
3811 SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes);
3812 return;
3815 break;
3818 case ISD::ATOMIC_CMP_SWAP:
3819 SelectCMP_SWAP(N);
3820 return;
3823 SelectCode(N);
3826 // Inspect a register string of the form
3827 // cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or
3828 // cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string
3829 // and obtain the integer operands from them, adding these operands to the
3830 // provided vector.
3831 static void getIntOperandsFromRegisterString(StringRef RegString,
3832 SelectionDAG *CurDAG,
3833 const SDLoc &DL,
3834 std::vector<SDValue> &Ops) {
3835 SmallVector<StringRef, 5> Fields;
3836 RegString.split(Fields, ':');
3838 if (Fields.size() > 1) {
3839 bool AllIntFields = true;
3841 for (StringRef Field : Fields) {
3842 // Need to trim out leading 'cp' characters and get the integer field.
3843 unsigned IntField;
3844 AllIntFields &= !Field.trim("CPcp").getAsInteger(10, IntField);
3845 Ops.push_back(CurDAG->getTargetConstant(IntField, DL, MVT::i32));
3848 assert(AllIntFields &&
3849 "Unexpected non-integer value in special register string.");
3853 // Maps a Banked Register string to its mask value. The mask value returned is
3854 // for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register
3855 // mask operand, which expresses which register is to be used, e.g. r8, and in
3856 // which mode it is to be used, e.g. usr. Returns -1 to signify that the string
3857 // was invalid.
3858 static inline int getBankedRegisterMask(StringRef RegString) {
3859 auto TheReg = ARMBankedReg::lookupBankedRegByName(RegString.lower());
3860 if (!TheReg)
3861 return -1;
3862 return TheReg->Encoding;
3865 // The flags here are common to those allowed for apsr in the A class cores and
3866 // those allowed for the special registers in the M class cores. Returns a
3867 // value representing which flags were present, -1 if invalid.
3868 static inline int getMClassFlagsMask(StringRef Flags) {
3869 return StringSwitch<int>(Flags)
3870 .Case("", 0x2) // no flags means nzcvq for psr registers, and 0x2 is
3871 // correct when flags are not permitted
3872 .Case("g", 0x1)
3873 .Case("nzcvq", 0x2)
3874 .Case("nzcvqg", 0x3)
3875 .Default(-1);
3878 // Maps MClass special registers string to its value for use in the
3879 // t2MRS_M/t2MSR_M instruction nodes as the SYSm value operand.
3880 // Returns -1 to signify that the string was invalid.
3881 static int getMClassRegisterMask(StringRef Reg, const ARMSubtarget *Subtarget) {
3882 auto TheReg = ARMSysReg::lookupMClassSysRegByName(Reg);
3883 const FeatureBitset &FeatureBits = Subtarget->getFeatureBits();
3884 if (!TheReg || !TheReg->hasRequiredFeatures(FeatureBits))
3885 return -1;
3886 return (int)(TheReg->Encoding & 0xFFF); // SYSm value
3889 static int getARClassRegisterMask(StringRef Reg, StringRef Flags) {
3890 // The mask operand contains the special register (R Bit) in bit 4, whether
3891 // the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and
3892 // bits 3-0 contains the fields to be accessed in the special register, set by
3893 // the flags provided with the register.
3894 int Mask = 0;
3895 if (Reg == "apsr") {
3896 // The flags permitted for apsr are the same flags that are allowed in
3897 // M class registers. We get the flag value and then shift the flags into
3898 // the correct place to combine with the mask.
3899 Mask = getMClassFlagsMask(Flags);
3900 if (Mask == -1)
3901 return -1;
3902 return Mask << 2;
3905 if (Reg != "cpsr" && Reg != "spsr") {
3906 return -1;
3909 // This is the same as if the flags were "fc"
3910 if (Flags.empty() || Flags == "all")
3911 return Mask | 0x9;
3913 // Inspect the supplied flags string and set the bits in the mask for
3914 // the relevant and valid flags allowed for cpsr and spsr.
3915 for (char Flag : Flags) {
3916 int FlagVal;
3917 switch (Flag) {
3918 case 'c':
3919 FlagVal = 0x1;
3920 break;
3921 case 'x':
3922 FlagVal = 0x2;
3923 break;
3924 case 's':
3925 FlagVal = 0x4;
3926 break;
3927 case 'f':
3928 FlagVal = 0x8;
3929 break;
3930 default:
3931 FlagVal = 0;
3934 // This avoids allowing strings where the same flag bit appears twice.
3935 if (!FlagVal || (Mask & FlagVal))
3936 return -1;
3937 Mask |= FlagVal;
3940 // If the register is spsr then we need to set the R bit.
3941 if (Reg == "spsr")
3942 Mask |= 0x10;
3944 return Mask;
3947 // Lower the read_register intrinsic to ARM specific DAG nodes
3948 // using the supplied metadata string to select the instruction node to use
3949 // and the registers/masks to construct as operands for the node.
3950 bool ARMDAGToDAGISel::tryReadRegister(SDNode *N){
3951 const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
3952 const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
3953 bool IsThumb2 = Subtarget->isThumb2();
3954 SDLoc DL(N);
3956 std::vector<SDValue> Ops;
3957 getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
3959 if (!Ops.empty()) {
3960 // If the special register string was constructed of fields (as defined
3961 // in the ACLE) then need to lower to MRC node (32 bit) or
3962 // MRRC node(64 bit), we can make the distinction based on the number of
3963 // operands we have.
3964 unsigned Opcode;
3965 SmallVector<EVT, 3> ResTypes;
3966 if (Ops.size() == 5){
3967 Opcode = IsThumb2 ? ARM::t2MRC : ARM::MRC;
3968 ResTypes.append({ MVT::i32, MVT::Other });
3969 } else {
3970 assert(Ops.size() == 3 &&
3971 "Invalid number of fields in special register string.");
3972 Opcode = IsThumb2 ? ARM::t2MRRC : ARM::MRRC;
3973 ResTypes.append({ MVT::i32, MVT::i32, MVT::Other });
3976 Ops.push_back(getAL(CurDAG, DL));
3977 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3978 Ops.push_back(N->getOperand(0));
3979 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, ResTypes, Ops));
3980 return true;
3983 std::string SpecialReg = RegString->getString().lower();
3985 int BankedReg = getBankedRegisterMask(SpecialReg);
3986 if (BankedReg != -1) {
3987 Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32),
3988 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
3989 N->getOperand(0) };
3990 ReplaceNode(
3991 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked,
3992 DL, MVT::i32, MVT::Other, Ops));
3993 return true;
3996 // The VFP registers are read by creating SelectionDAG nodes with opcodes
3997 // corresponding to the register that is being read from. So we switch on the
3998 // string to find which opcode we need to use.
3999 unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
4000 .Case("fpscr", ARM::VMRS)
4001 .Case("fpexc", ARM::VMRS_FPEXC)
4002 .Case("fpsid", ARM::VMRS_FPSID)
4003 .Case("mvfr0", ARM::VMRS_MVFR0)
4004 .Case("mvfr1", ARM::VMRS_MVFR1)
4005 .Case("mvfr2", ARM::VMRS_MVFR2)
4006 .Case("fpinst", ARM::VMRS_FPINST)
4007 .Case("fpinst2", ARM::VMRS_FPINST2)
4008 .Default(0);
4010 // If an opcode was found then we can lower the read to a VFP instruction.
4011 if (Opcode) {
4012 if (!Subtarget->hasVFP2())
4013 return false;
4014 if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8())
4015 return false;
4017 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4018 N->getOperand(0) };
4019 ReplaceNode(N,
4020 CurDAG->getMachineNode(Opcode, DL, MVT::i32, MVT::Other, Ops));
4021 return true;
4024 // If the target is M Class then need to validate that the register string
4025 // is an acceptable value, so check that a mask can be constructed from the
4026 // string.
4027 if (Subtarget->isMClass()) {
4028 int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget);
4029 if (SYSmValue == -1)
4030 return false;
4032 SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
4033 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4034 N->getOperand(0) };
4035 ReplaceNode(
4036 N, CurDAG->getMachineNode(ARM::t2MRS_M, DL, MVT::i32, MVT::Other, Ops));
4037 return true;
4040 // Here we know the target is not M Class so we need to check if it is one
4041 // of the remaining possible values which are apsr, cpsr or spsr.
4042 if (SpecialReg == "apsr" || SpecialReg == "cpsr") {
4043 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4044 N->getOperand(0) };
4045 ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRS_AR : ARM::MRS,
4046 DL, MVT::i32, MVT::Other, Ops));
4047 return true;
4050 if (SpecialReg == "spsr") {
4051 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4052 N->getOperand(0) };
4053 ReplaceNode(
4054 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys, DL,
4055 MVT::i32, MVT::Other, Ops));
4056 return true;
4059 return false;
4062 // Lower the write_register intrinsic to ARM specific DAG nodes
4063 // using the supplied metadata string to select the instruction node to use
4064 // and the registers/masks to use in the nodes
4065 bool ARMDAGToDAGISel::tryWriteRegister(SDNode *N){
4066 const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
4067 const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
4068 bool IsThumb2 = Subtarget->isThumb2();
4069 SDLoc DL(N);
4071 std::vector<SDValue> Ops;
4072 getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
4074 if (!Ops.empty()) {
4075 // If the special register string was constructed of fields (as defined
4076 // in the ACLE) then need to lower to MCR node (32 bit) or
4077 // MCRR node(64 bit), we can make the distinction based on the number of
4078 // operands we have.
4079 unsigned Opcode;
4080 if (Ops.size() == 5) {
4081 Opcode = IsThumb2 ? ARM::t2MCR : ARM::MCR;
4082 Ops.insert(Ops.begin()+2, N->getOperand(2));
4083 } else {
4084 assert(Ops.size() == 3 &&
4085 "Invalid number of fields in special register string.");
4086 Opcode = IsThumb2 ? ARM::t2MCRR : ARM::MCRR;
4087 SDValue WriteValue[] = { N->getOperand(2), N->getOperand(3) };
4088 Ops.insert(Ops.begin()+2, WriteValue, WriteValue+2);
4091 Ops.push_back(getAL(CurDAG, DL));
4092 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
4093 Ops.push_back(N->getOperand(0));
4095 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
4096 return true;
4099 std::string SpecialReg = RegString->getString().lower();
4100 int BankedReg = getBankedRegisterMask(SpecialReg);
4101 if (BankedReg != -1) {
4102 Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), N->getOperand(2),
4103 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4104 N->getOperand(0) };
4105 ReplaceNode(
4106 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked,
4107 DL, MVT::Other, Ops));
4108 return true;
4111 // The VFP registers are written to by creating SelectionDAG nodes with
4112 // opcodes corresponding to the register that is being written. So we switch
4113 // on the string to find which opcode we need to use.
4114 unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
4115 .Case("fpscr", ARM::VMSR)
4116 .Case("fpexc", ARM::VMSR_FPEXC)
4117 .Case("fpsid", ARM::VMSR_FPSID)
4118 .Case("fpinst", ARM::VMSR_FPINST)
4119 .Case("fpinst2", ARM::VMSR_FPINST2)
4120 .Default(0);
4122 if (Opcode) {
4123 if (!Subtarget->hasVFP2())
4124 return false;
4125 Ops = { N->getOperand(2), getAL(CurDAG, DL),
4126 CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
4127 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
4128 return true;
4131 std::pair<StringRef, StringRef> Fields;
4132 Fields = StringRef(SpecialReg).rsplit('_');
4133 std::string Reg = Fields.first.str();
4134 StringRef Flags = Fields.second;
4136 // If the target was M Class then need to validate the special register value
4137 // and retrieve the mask for use in the instruction node.
4138 if (Subtarget->isMClass()) {
4139 int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget);
4140 if (SYSmValue == -1)
4141 return false;
4143 SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
4144 N->getOperand(2), getAL(CurDAG, DL),
4145 CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
4146 ReplaceNode(N, CurDAG->getMachineNode(ARM::t2MSR_M, DL, MVT::Other, Ops));
4147 return true;
4150 // We then check to see if a valid mask can be constructed for one of the
4151 // register string values permitted for the A and R class cores. These values
4152 // are apsr, spsr and cpsr; these are also valid on older cores.
4153 int Mask = getARClassRegisterMask(Reg, Flags);
4154 if (Mask != -1) {
4155 Ops = { CurDAG->getTargetConstant(Mask, DL, MVT::i32), N->getOperand(2),
4156 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4157 N->getOperand(0) };
4158 ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSR_AR : ARM::MSR,
4159 DL, MVT::Other, Ops));
4160 return true;
4163 return false;
4166 bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){
4167 std::vector<SDValue> AsmNodeOperands;
4168 unsigned Flag, Kind;
4169 bool Changed = false;
4170 unsigned NumOps = N->getNumOperands();
4172 // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint.
4173 // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require
4174 // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs
4175 // respectively. Since there is no constraint to explicitly specify a
4176 // reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb,
4177 // the 64-bit data may be referred by H, Q, R modifiers, so we still pack
4178 // them into a GPRPair.
4180 SDLoc dl(N);
4181 SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps-1)
4182 : SDValue(nullptr,0);
4184 SmallVector<bool, 8> OpChanged;
4185 // Glue node will be appended late.
4186 for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) {
4187 SDValue op = N->getOperand(i);
4188 AsmNodeOperands.push_back(op);
4190 if (i < InlineAsm::Op_FirstOperand)
4191 continue;
4193 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(i))) {
4194 Flag = C->getZExtValue();
4195 Kind = InlineAsm::getKind(Flag);
4197 else
4198 continue;
4200 // Immediate operands to inline asm in the SelectionDAG are modeled with
4201 // two operands. The first is a constant of value InlineAsm::Kind_Imm, and
4202 // the second is a constant with the value of the immediate. If we get here
4203 // and we have a Kind_Imm, skip the next operand, and continue.
4204 if (Kind == InlineAsm::Kind_Imm) {
4205 SDValue op = N->getOperand(++i);
4206 AsmNodeOperands.push_back(op);
4207 continue;
4210 unsigned NumRegs = InlineAsm::getNumOperandRegisters(Flag);
4211 if (NumRegs)
4212 OpChanged.push_back(false);
4214 unsigned DefIdx = 0;
4215 bool IsTiedToChangedOp = false;
4216 // If it's a use that is tied with a previous def, it has no
4217 // reg class constraint.
4218 if (Changed && InlineAsm::isUseOperandTiedToDef(Flag, DefIdx))
4219 IsTiedToChangedOp = OpChanged[DefIdx];
4221 // Memory operands to inline asm in the SelectionDAG are modeled with two
4222 // operands: a constant of value InlineAsm::Kind_Mem followed by the input
4223 // operand. If we get here and we have a Kind_Mem, skip the next operand (so
4224 // it doesn't get misinterpreted), and continue. We do this here because
4225 // it's important to update the OpChanged array correctly before moving on.
4226 if (Kind == InlineAsm::Kind_Mem) {
4227 SDValue op = N->getOperand(++i);
4228 AsmNodeOperands.push_back(op);
4229 continue;
4232 if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef
4233 && Kind != InlineAsm::Kind_RegDefEarlyClobber)
4234 continue;
4236 unsigned RC;
4237 bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC);
4238 if ((!IsTiedToChangedOp && (!HasRC || RC != ARM::GPRRegClassID))
4239 || NumRegs != 2)
4240 continue;
4242 assert((i+2 < NumOps) && "Invalid number of operands in inline asm");
4243 SDValue V0 = N->getOperand(i+1);
4244 SDValue V1 = N->getOperand(i+2);
4245 unsigned Reg0 = cast<RegisterSDNode>(V0)->getReg();
4246 unsigned Reg1 = cast<RegisterSDNode>(V1)->getReg();
4247 SDValue PairedReg;
4248 MachineRegisterInfo &MRI = MF->getRegInfo();
4250 if (Kind == InlineAsm::Kind_RegDef ||
4251 Kind == InlineAsm::Kind_RegDefEarlyClobber) {
4252 // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to
4253 // the original GPRs.
4255 unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
4256 PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
4257 SDValue Chain = SDValue(N,0);
4259 SDNode *GU = N->getGluedUser();
4260 SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped,
4261 Chain.getValue(1));
4263 // Extract values from a GPRPair reg and copy to the original GPR reg.
4264 SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32,
4265 RegCopy);
4266 SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,
4267 RegCopy);
4268 SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0,
4269 RegCopy.getValue(1));
4270 SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1));
4272 // Update the original glue user.
4273 std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1);
4274 Ops.push_back(T1.getValue(1));
4275 CurDAG->UpdateNodeOperands(GU, Ops);
4277 else {
4278 // For Kind == InlineAsm::Kind_RegUse, we first copy two GPRs into a
4279 // GPRPair and then pass the GPRPair to the inline asm.
4280 SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain];
4282 // As REG_SEQ doesn't take RegisterSDNode, we copy them first.
4283 SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32,
4284 Chain.getValue(1));
4285 SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32,
4286 T0.getValue(1));
4287 SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), 0);
4289 // Copy REG_SEQ into a GPRPair-typed VR and replace the original two
4290 // i32 VRs of inline asm with it.
4291 unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
4292 PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
4293 Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1));
4295 AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
4296 Glue = Chain.getValue(1);
4299 Changed = true;
4301 if(PairedReg.getNode()) {
4302 OpChanged[OpChanged.size() -1 ] = true;
4303 Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/);
4304 if (IsTiedToChangedOp)
4305 Flag = InlineAsm::getFlagWordForMatchingOp(Flag, DefIdx);
4306 else
4307 Flag = InlineAsm::getFlagWordForRegClass(Flag, ARM::GPRPairRegClassID);
4308 // Replace the current flag.
4309 AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant(
4310 Flag, dl, MVT::i32);
4311 // Add the new register node and skip the original two GPRs.
4312 AsmNodeOperands.push_back(PairedReg);
4313 // Skip the next two GPRs.
4314 i += 2;
4318 if (Glue.getNode())
4319 AsmNodeOperands.push_back(Glue);
4320 if (!Changed)
4321 return false;
4323 SDValue New = CurDAG->getNode(N->getOpcode(), SDLoc(N),
4324 CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands);
4325 New->setNodeId(-1);
4326 ReplaceNode(N, New.getNode());
4327 return true;
4331 bool ARMDAGToDAGISel::
4332 SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
4333 std::vector<SDValue> &OutOps) {
4334 switch(ConstraintID) {
4335 default:
4336 llvm_unreachable("Unexpected asm memory constraint");
4337 case InlineAsm::Constraint_i:
4338 // FIXME: It seems strange that 'i' is needed here since it's supposed to
4339 // be an immediate and not a memory constraint.
4340 LLVM_FALLTHROUGH;
4341 case InlineAsm::Constraint_m:
4342 case InlineAsm::Constraint_o:
4343 case InlineAsm::Constraint_Q:
4344 case InlineAsm::Constraint_Um:
4345 case InlineAsm::Constraint_Un:
4346 case InlineAsm::Constraint_Uq:
4347 case InlineAsm::Constraint_Us:
4348 case InlineAsm::Constraint_Ut:
4349 case InlineAsm::Constraint_Uv:
4350 case InlineAsm::Constraint_Uy:
4351 // Require the address to be in a register. That is safe for all ARM
4352 // variants and it is hard to do anything much smarter without knowing
4353 // how the operand is used.
4354 OutOps.push_back(Op);
4355 return false;
4357 return true;
4360 /// createARMISelDag - This pass converts a legalized DAG into a
4361 /// ARM-specific DAG, ready for instruction scheduling.
4363 FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM,
4364 CodeGenOpt::Level OptLevel) {
4365 return new ARMDAGToDAGISel(TM, OptLevel);