1 //===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file defines an instruction selector for the ARM target.
11 //===----------------------------------------------------------------------===//
14 #include "ARMBaseInstrInfo.h"
15 #include "ARMTargetMachine.h"
16 #include "MCTargetDesc/ARMAddressingModes.h"
17 #include "Utils/ARMBaseInfo.h"
18 #include "llvm/ADT/APSInt.h"
19 #include "llvm/ADT/StringSwitch.h"
20 #include "llvm/CodeGen/MachineFrameInfo.h"
21 #include "llvm/CodeGen/MachineFunction.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 #include "llvm/CodeGen/MachineRegisterInfo.h"
24 #include "llvm/CodeGen/SelectionDAG.h"
25 #include "llvm/CodeGen/SelectionDAGISel.h"
26 #include "llvm/CodeGen/TargetLowering.h"
27 #include "llvm/IR/CallingConv.h"
28 #include "llvm/IR/Constants.h"
29 #include "llvm/IR/DerivedTypes.h"
30 #include "llvm/IR/Function.h"
31 #include "llvm/IR/Intrinsics.h"
32 #include "llvm/IR/IntrinsicsARM.h"
33 #include "llvm/IR/LLVMContext.h"
34 #include "llvm/Support/CommandLine.h"
35 #include "llvm/Support/Debug.h"
36 #include "llvm/Support/ErrorHandling.h"
37 #include "llvm/Target/TargetOptions.h"
41 #define DEBUG_TYPE "arm-isel"
44 DisableShifterOp("disable-shifter-op", cl::Hidden
,
45 cl::desc("Disable isel of shifter-op"),
48 //===--------------------------------------------------------------------===//
49 /// ARMDAGToDAGISel - ARM specific code to select ARM machine
50 /// instructions for SelectionDAG operations.
54 class ARMDAGToDAGISel
: public SelectionDAGISel
{
55 /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
56 /// make the right decision when generating code for different targets.
57 const ARMSubtarget
*Subtarget
;
60 explicit ARMDAGToDAGISel(ARMBaseTargetMachine
&tm
, CodeGenOpt::Level OptLevel
)
61 : SelectionDAGISel(tm
, OptLevel
) {}
63 bool runOnMachineFunction(MachineFunction
&MF
) override
{
64 // Reset the subtarget each time through.
65 Subtarget
= &MF
.getSubtarget
<ARMSubtarget
>();
66 SelectionDAGISel::runOnMachineFunction(MF
);
70 StringRef
getPassName() const override
{ return "ARM Instruction Selection"; }
72 void PreprocessISelDAG() override
;
74 /// getI32Imm - Return a target constant of type i32 with the specified
76 inline SDValue
getI32Imm(unsigned Imm
, const SDLoc
&dl
) {
77 return CurDAG
->getTargetConstant(Imm
, dl
, MVT::i32
);
80 void Select(SDNode
*N
) override
;
82 /// Return true as some complex patterns, like those that call
83 /// canExtractShiftFromMul can modify the DAG inplace.
84 bool ComplexPatternFuncMutatesDAG() const override
{ return true; }
86 bool hasNoVMLxHazardUse(SDNode
*N
) const;
87 bool isShifterOpProfitable(const SDValue
&Shift
,
88 ARM_AM::ShiftOpc ShOpcVal
, unsigned ShAmt
);
89 bool SelectRegShifterOperand(SDValue N
, SDValue
&A
,
90 SDValue
&B
, SDValue
&C
,
91 bool CheckProfitability
= true);
92 bool SelectImmShifterOperand(SDValue N
, SDValue
&A
,
93 SDValue
&B
, bool CheckProfitability
= true);
94 bool SelectShiftRegShifterOperand(SDValue N
, SDValue
&A
, SDValue
&B
,
96 // Don't apply the profitability check
97 return SelectRegShifterOperand(N
, A
, B
, C
, false);
99 bool SelectShiftImmShifterOperand(SDValue N
, SDValue
&A
, SDValue
&B
) {
100 // Don't apply the profitability check
101 return SelectImmShifterOperand(N
, A
, B
, false);
103 bool SelectShiftImmShifterOperandOneUse(SDValue N
, SDValue
&A
, SDValue
&B
) {
106 return SelectImmShifterOperand(N
, A
, B
, false);
109 bool SelectAddLikeOr(SDNode
*Parent
, SDValue N
, SDValue
&Out
);
111 bool SelectAddrModeImm12(SDValue N
, SDValue
&Base
, SDValue
&OffImm
);
112 bool SelectLdStSOReg(SDValue N
, SDValue
&Base
, SDValue
&Offset
, SDValue
&Opc
);
114 bool SelectCMOVPred(SDValue N
, SDValue
&Pred
, SDValue
&Reg
) {
115 const ConstantSDNode
*CN
= cast
<ConstantSDNode
>(N
);
116 Pred
= CurDAG
->getTargetConstant(CN
->getZExtValue(), SDLoc(N
), MVT::i32
);
117 Reg
= CurDAG
->getRegister(ARM::CPSR
, MVT::i32
);
121 bool SelectAddrMode2OffsetReg(SDNode
*Op
, SDValue N
,
122 SDValue
&Offset
, SDValue
&Opc
);
123 bool SelectAddrMode2OffsetImm(SDNode
*Op
, SDValue N
,
124 SDValue
&Offset
, SDValue
&Opc
);
125 bool SelectAddrMode2OffsetImmPre(SDNode
*Op
, SDValue N
,
126 SDValue
&Offset
, SDValue
&Opc
);
127 bool SelectAddrOffsetNone(SDValue N
, SDValue
&Base
);
128 bool SelectAddrMode3(SDValue N
, SDValue
&Base
,
129 SDValue
&Offset
, SDValue
&Opc
);
130 bool SelectAddrMode3Offset(SDNode
*Op
, SDValue N
,
131 SDValue
&Offset
, SDValue
&Opc
);
132 bool IsAddressingMode5(SDValue N
, SDValue
&Base
, SDValue
&Offset
, bool FP16
);
133 bool SelectAddrMode5(SDValue N
, SDValue
&Base
, SDValue
&Offset
);
134 bool SelectAddrMode5FP16(SDValue N
, SDValue
&Base
, SDValue
&Offset
);
135 bool SelectAddrMode6(SDNode
*Parent
, SDValue N
, SDValue
&Addr
,SDValue
&Align
);
136 bool SelectAddrMode6Offset(SDNode
*Op
, SDValue N
, SDValue
&Offset
);
138 bool SelectAddrModePC(SDValue N
, SDValue
&Offset
, SDValue
&Label
);
140 // Thumb Addressing Modes:
141 bool SelectThumbAddrModeRR(SDValue N
, SDValue
&Base
, SDValue
&Offset
);
142 bool SelectThumbAddrModeRRSext(SDValue N
, SDValue
&Base
, SDValue
&Offset
);
143 bool SelectThumbAddrModeImm5S(SDValue N
, unsigned Scale
, SDValue
&Base
,
145 bool SelectThumbAddrModeImm5S1(SDValue N
, SDValue
&Base
,
147 bool SelectThumbAddrModeImm5S2(SDValue N
, SDValue
&Base
,
149 bool SelectThumbAddrModeImm5S4(SDValue N
, SDValue
&Base
,
151 bool SelectThumbAddrModeSP(SDValue N
, SDValue
&Base
, SDValue
&OffImm
);
152 template <unsigned Shift
>
153 bool SelectTAddrModeImm7(SDValue N
, SDValue
&Base
, SDValue
&OffImm
);
155 // Thumb 2 Addressing Modes:
156 bool SelectT2AddrModeImm12(SDValue N
, SDValue
&Base
, SDValue
&OffImm
);
157 template <unsigned Shift
>
158 bool SelectT2AddrModeImm8(SDValue N
, SDValue
&Base
, SDValue
&OffImm
);
159 bool SelectT2AddrModeImm8(SDValue N
, SDValue
&Base
,
161 bool SelectT2AddrModeImm8Offset(SDNode
*Op
, SDValue N
,
163 template <unsigned Shift
>
164 bool SelectT2AddrModeImm7Offset(SDNode
*Op
, SDValue N
, SDValue
&OffImm
);
165 bool SelectT2AddrModeImm7Offset(SDNode
*Op
, SDValue N
, SDValue
&OffImm
,
167 template <unsigned Shift
>
168 bool SelectT2AddrModeImm7(SDValue N
, SDValue
&Base
, SDValue
&OffImm
);
169 bool SelectT2AddrModeSoReg(SDValue N
, SDValue
&Base
,
170 SDValue
&OffReg
, SDValue
&ShImm
);
171 bool SelectT2AddrModeExclusive(SDValue N
, SDValue
&Base
, SDValue
&OffImm
);
173 template<int Min
, int Max
>
174 bool SelectImmediateInRange(SDValue N
, SDValue
&OffImm
);
176 inline bool is_so_imm(unsigned Imm
) const {
177 return ARM_AM::getSOImmVal(Imm
) != -1;
180 inline bool is_so_imm_not(unsigned Imm
) const {
181 return ARM_AM::getSOImmVal(~Imm
) != -1;
184 inline bool is_t2_so_imm(unsigned Imm
) const {
185 return ARM_AM::getT2SOImmVal(Imm
) != -1;
188 inline bool is_t2_so_imm_not(unsigned Imm
) const {
189 return ARM_AM::getT2SOImmVal(~Imm
) != -1;
192 // Include the pieces autogenerated from the target description.
193 #include "ARMGenDAGISel.inc"
196 void transferMemOperands(SDNode
*Src
, SDNode
*Dst
);
198 /// Indexed (pre/post inc/dec) load matching code for ARM.
199 bool tryARMIndexedLoad(SDNode
*N
);
200 bool tryT1IndexedLoad(SDNode
*N
);
201 bool tryT2IndexedLoad(SDNode
*N
);
202 bool tryMVEIndexedLoad(SDNode
*N
);
203 bool tryFMULFixed(SDNode
*N
, SDLoc dl
);
204 bool tryFP_TO_INT(SDNode
*N
, SDLoc dl
);
205 bool transformFixedFloatingPointConversion(SDNode
*N
, SDNode
*FMul
,
209 /// SelectVLD - Select NEON load intrinsics. NumVecs should be
210 /// 1, 2, 3 or 4. The opcode arrays specify the instructions used for
211 /// loads of D registers and even subregs and odd subregs of Q registers.
212 /// For NumVecs <= 2, QOpcodes1 is not used.
213 void SelectVLD(SDNode
*N
, bool isUpdating
, unsigned NumVecs
,
214 const uint16_t *DOpcodes
, const uint16_t *QOpcodes0
,
215 const uint16_t *QOpcodes1
);
217 /// SelectVST - Select NEON store intrinsics. NumVecs should
218 /// be 1, 2, 3 or 4. The opcode arrays specify the instructions used for
219 /// stores of D registers and even subregs and odd subregs of Q registers.
220 /// For NumVecs <= 2, QOpcodes1 is not used.
221 void SelectVST(SDNode
*N
, bool isUpdating
, unsigned NumVecs
,
222 const uint16_t *DOpcodes
, const uint16_t *QOpcodes0
,
223 const uint16_t *QOpcodes1
);
225 /// SelectVLDSTLane - Select NEON load/store lane intrinsics. NumVecs should
226 /// be 2, 3 or 4. The opcode arrays specify the instructions used for
227 /// load/store of D registers and Q registers.
228 void SelectVLDSTLane(SDNode
*N
, bool IsLoad
, bool isUpdating
,
229 unsigned NumVecs
, const uint16_t *DOpcodes
,
230 const uint16_t *QOpcodes
);
232 /// Helper functions for setting up clusters of MVE predication operands.
233 template <typename SDValueVector
>
234 void AddMVEPredicateToOps(SDValueVector
&Ops
, SDLoc Loc
,
235 SDValue PredicateMask
);
236 template <typename SDValueVector
>
237 void AddMVEPredicateToOps(SDValueVector
&Ops
, SDLoc Loc
,
238 SDValue PredicateMask
, SDValue Inactive
);
240 template <typename SDValueVector
>
241 void AddEmptyMVEPredicateToOps(SDValueVector
&Ops
, SDLoc Loc
);
242 template <typename SDValueVector
>
243 void AddEmptyMVEPredicateToOps(SDValueVector
&Ops
, SDLoc Loc
, EVT InactiveTy
);
245 /// SelectMVE_WB - Select MVE writeback load/store intrinsics.
246 void SelectMVE_WB(SDNode
*N
, const uint16_t *Opcodes
, bool Predicated
);
248 /// SelectMVE_LongShift - Select MVE 64-bit scalar shift intrinsics.
249 void SelectMVE_LongShift(SDNode
*N
, uint16_t Opcode
, bool Immediate
,
250 bool HasSaturationOperand
);
252 /// SelectMVE_VADCSBC - Select MVE vector add/sub-with-carry intrinsics.
253 void SelectMVE_VADCSBC(SDNode
*N
, uint16_t OpcodeWithCarry
,
254 uint16_t OpcodeWithNoCarry
, bool Add
, bool Predicated
);
256 /// SelectMVE_VSHLC - Select MVE intrinsics for a shift that carries between
258 void SelectMVE_VSHLC(SDNode
*N
, bool Predicated
);
260 /// Select long MVE vector reductions with two vector operands
261 /// Stride is the number of vector element widths the instruction can operate
263 /// 2 for long non-rounding variants, vml{a,s}ldav[a][x]: [i16, i32]
264 /// 1 for long rounding variants: vrml{a,s}ldavh[a][x]: [i32]
265 /// Stride is used when addressing the OpcodesS array which contains multiple
266 /// opcodes for each element width.
267 /// TySize is the index into the list of element types listed above
268 void SelectBaseMVE_VMLLDAV(SDNode
*N
, bool Predicated
,
269 const uint16_t *OpcodesS
, const uint16_t *OpcodesU
,
270 size_t Stride
, size_t TySize
);
272 /// Select a 64-bit MVE vector reduction with two vector operands
273 /// arm_mve_vmlldava_[predicated]
274 void SelectMVE_VMLLDAV(SDNode
*N
, bool Predicated
, const uint16_t *OpcodesS
,
275 const uint16_t *OpcodesU
);
276 /// Select a 72-bit MVE vector rounding reduction with two vector operands
277 /// int_arm_mve_vrmlldavha[_predicated]
278 void SelectMVE_VRMLLDAVH(SDNode
*N
, bool Predicated
, const uint16_t *OpcodesS
,
279 const uint16_t *OpcodesU
);
281 /// SelectMVE_VLD - Select MVE interleaving load intrinsics. NumVecs
282 /// should be 2 or 4. The opcode array specifies the instructions
283 /// used for 8, 16 and 32-bit lane sizes respectively, and each
284 /// pointer points to a set of NumVecs sub-opcodes used for the
285 /// different stages (e.g. VLD20 versus VLD21) of each load family.
286 void SelectMVE_VLD(SDNode
*N
, unsigned NumVecs
,
287 const uint16_t *const *Opcodes
, bool HasWriteback
);
289 /// SelectMVE_VxDUP - Select MVE incrementing-dup instructions. Opcodes is an
290 /// array of 3 elements for the 8, 16 and 32-bit lane sizes.
291 void SelectMVE_VxDUP(SDNode
*N
, const uint16_t *Opcodes
,
292 bool Wrapping
, bool Predicated
);
294 /// Select SelectCDE_CXxD - Select CDE dual-GPR instruction (one of CX1D,
295 /// CX1DA, CX2D, CX2DA, CX3, CX3DA).
296 /// \arg \c NumExtraOps number of extra operands besides the coprocossor,
297 /// the accumulator and the immediate operand, i.e. 0
298 /// for CX1*, 1 for CX2*, 2 for CX3*
299 /// \arg \c HasAccum whether the instruction has an accumulator operand
300 void SelectCDE_CXxD(SDNode
*N
, uint16_t Opcode
, size_t NumExtraOps
,
303 /// SelectVLDDup - Select NEON load-duplicate intrinsics. NumVecs
304 /// should be 1, 2, 3 or 4. The opcode array specifies the instructions used
305 /// for loading D registers.
306 void SelectVLDDup(SDNode
*N
, bool IsIntrinsic
, bool isUpdating
,
307 unsigned NumVecs
, const uint16_t *DOpcodes
,
308 const uint16_t *QOpcodes0
= nullptr,
309 const uint16_t *QOpcodes1
= nullptr);
311 /// Try to select SBFX/UBFX instructions for ARM.
312 bool tryV6T2BitfieldExtractOp(SDNode
*N
, bool isSigned
);
314 bool tryInsertVectorElt(SDNode
*N
);
316 // Select special operations if node forms integer ABS pattern
317 bool tryABSOp(SDNode
*N
);
319 bool tryReadRegister(SDNode
*N
);
320 bool tryWriteRegister(SDNode
*N
);
322 bool tryInlineAsm(SDNode
*N
);
324 void SelectCMPZ(SDNode
*N
, bool &SwitchEQNEToPLMI
);
326 void SelectCMP_SWAP(SDNode
*N
);
328 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
329 /// inline asm expressions.
330 bool SelectInlineAsmMemoryOperand(const SDValue
&Op
, unsigned ConstraintID
,
331 std::vector
<SDValue
> &OutOps
) override
;
333 // Form pairs of consecutive R, S, D, or Q registers.
334 SDNode
*createGPRPairNode(EVT VT
, SDValue V0
, SDValue V1
);
335 SDNode
*createSRegPairNode(EVT VT
, SDValue V0
, SDValue V1
);
336 SDNode
*createDRegPairNode(EVT VT
, SDValue V0
, SDValue V1
);
337 SDNode
*createQRegPairNode(EVT VT
, SDValue V0
, SDValue V1
);
339 // Form sequences of 4 consecutive S, D, or Q registers.
340 SDNode
*createQuadSRegsNode(EVT VT
, SDValue V0
, SDValue V1
, SDValue V2
, SDValue V3
);
341 SDNode
*createQuadDRegsNode(EVT VT
, SDValue V0
, SDValue V1
, SDValue V2
, SDValue V3
);
342 SDNode
*createQuadQRegsNode(EVT VT
, SDValue V0
, SDValue V1
, SDValue V2
, SDValue V3
);
344 // Get the alignment operand for a NEON VLD or VST instruction.
345 SDValue
GetVLDSTAlign(SDValue Align
, const SDLoc
&dl
, unsigned NumVecs
,
348 /// Checks if N is a multiplication by a constant where we can extract out a
349 /// power of two from the constant so that it can be used in a shift, but only
350 /// if it simplifies the materialization of the constant. Returns true if it
351 /// is, and assigns to PowerOfTwo the power of two that should be extracted
352 /// out and to NewMulConst the new constant to be multiplied by.
353 bool canExtractShiftFromMul(const SDValue
&N
, unsigned MaxShift
,
354 unsigned &PowerOfTwo
, SDValue
&NewMulConst
) const;
356 /// Replace N with M in CurDAG, in a way that also ensures that M gets
357 /// selected when N would have been selected.
358 void replaceDAGValue(const SDValue
&N
, SDValue M
);
362 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant
363 /// operand. If so Imm will receive the 32-bit value.
364 static bool isInt32Immediate(SDNode
*N
, unsigned &Imm
) {
365 if (N
->getOpcode() == ISD::Constant
&& N
->getValueType(0) == MVT::i32
) {
366 Imm
= cast
<ConstantSDNode
>(N
)->getZExtValue();
372 // isInt32Immediate - This method tests to see if a constant operand.
373 // If so Imm will receive the 32 bit value.
374 static bool isInt32Immediate(SDValue N
, unsigned &Imm
) {
375 return isInt32Immediate(N
.getNode(), Imm
);
378 // isOpcWithIntImmediate - This method tests to see if the node is a specific
379 // opcode and that it has a immediate integer right operand.
380 // If so Imm will receive the 32 bit value.
381 static bool isOpcWithIntImmediate(SDNode
*N
, unsigned Opc
, unsigned& Imm
) {
382 return N
->getOpcode() == Opc
&&
383 isInt32Immediate(N
->getOperand(1).getNode(), Imm
);
386 /// Check whether a particular node is a constant value representable as
387 /// (N * Scale) where (N in [\p RangeMin, \p RangeMax).
389 /// \param ScaledConstant [out] - On success, the pre-scaled constant value.
390 static bool isScaledConstantInRange(SDValue Node
, int Scale
,
391 int RangeMin
, int RangeMax
,
392 int &ScaledConstant
) {
393 assert(Scale
> 0 && "Invalid scale!");
395 // Check that this is a constant.
396 const ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(Node
);
400 ScaledConstant
= (int) C
->getZExtValue();
401 if ((ScaledConstant
% Scale
) != 0)
404 ScaledConstant
/= Scale
;
405 return ScaledConstant
>= RangeMin
&& ScaledConstant
< RangeMax
;
408 void ARMDAGToDAGISel::PreprocessISelDAG() {
409 if (!Subtarget
->hasV6T2Ops())
412 bool isThumb2
= Subtarget
->isThumb();
413 for (SelectionDAG::allnodes_iterator I
= CurDAG
->allnodes_begin(),
414 E
= CurDAG
->allnodes_end(); I
!= E
; ) {
415 SDNode
*N
= &*I
++; // Preincrement iterator to avoid invalidation issues.
417 if (N
->getOpcode() != ISD::ADD
)
420 // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with
421 // leading zeros, followed by consecutive set bits, followed by 1 or 2
422 // trailing zeros, e.g. 1020.
423 // Transform the expression to
424 // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number
425 // of trailing zeros of c2. The left shift would be folded as an shifter
426 // operand of 'add' and the 'and' and 'srl' would become a bits extraction
429 SDValue N0
= N
->getOperand(0);
430 SDValue N1
= N
->getOperand(1);
431 unsigned And_imm
= 0;
432 if (!isOpcWithIntImmediate(N1
.getNode(), ISD::AND
, And_imm
)) {
433 if (isOpcWithIntImmediate(N0
.getNode(), ISD::AND
, And_imm
))
439 // Check if the AND mask is an immediate of the form: 000.....1111111100
440 unsigned TZ
= countTrailingZeros(And_imm
);
441 if (TZ
!= 1 && TZ
!= 2)
442 // Be conservative here. Shifter operands aren't always free. e.g. On
443 // Swift, left shifter operand of 1 / 2 for free but others are not.
445 // ubfx r3, r1, #16, #8
446 // ldr.w r3, [r0, r3, lsl #2]
449 // and.w r2, r9, r1, lsr #14
453 if (And_imm
& (And_imm
+ 1))
456 // Look for (and (srl X, c1), c2).
457 SDValue Srl
= N1
.getOperand(0);
458 unsigned Srl_imm
= 0;
459 if (!isOpcWithIntImmediate(Srl
.getNode(), ISD::SRL
, Srl_imm
) ||
463 // Make sure first operand is not a shifter operand which would prevent
464 // folding of the left shift.
469 if (SelectImmShifterOperand(N0
, CPTmp0
, CPTmp1
))
472 if (SelectImmShifterOperand(N0
, CPTmp0
, CPTmp1
) ||
473 SelectRegShifterOperand(N0
, CPTmp0
, CPTmp1
, CPTmp2
))
477 // Now make the transformation.
478 Srl
= CurDAG
->getNode(ISD::SRL
, SDLoc(Srl
), MVT::i32
,
480 CurDAG
->getConstant(Srl_imm
+ TZ
, SDLoc(Srl
),
482 N1
= CurDAG
->getNode(ISD::AND
, SDLoc(N1
), MVT::i32
,
484 CurDAG
->getConstant(And_imm
, SDLoc(Srl
), MVT::i32
));
485 N1
= CurDAG
->getNode(ISD::SHL
, SDLoc(N1
), MVT::i32
,
486 N1
, CurDAG
->getConstant(TZ
, SDLoc(Srl
), MVT::i32
));
487 CurDAG
->UpdateNodeOperands(N
, N0
, N1
);
491 /// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS
492 /// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at
493 /// least on current ARM implementations) which should be avoidded.
494 bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode
*N
) const {
495 if (OptLevel
== CodeGenOpt::None
)
498 if (!Subtarget
->hasVMLxHazards())
504 SDNode
*Use
= *N
->use_begin();
505 if (Use
->getOpcode() == ISD::CopyToReg
)
507 if (Use
->isMachineOpcode()) {
508 const ARMBaseInstrInfo
*TII
= static_cast<const ARMBaseInstrInfo
*>(
509 CurDAG
->getSubtarget().getInstrInfo());
511 const MCInstrDesc
&MCID
= TII
->get(Use
->getMachineOpcode());
514 unsigned Opcode
= MCID
.getOpcode();
515 if (Opcode
== ARM::VMOVRS
|| Opcode
== ARM::VMOVRRD
)
517 // vmlx feeding into another vmlx. We actually want to unfold
518 // the use later in the MLxExpansion pass. e.g.
520 // vmla (stall 8 cycles)
525 // This adds up to about 18 - 19 cycles.
528 // vmul (stall 4 cycles)
529 // vadd adds up to about 14 cycles.
530 return TII
->isFpMLxInstruction(Opcode
);
536 bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue
&Shift
,
537 ARM_AM::ShiftOpc ShOpcVal
,
539 if (!Subtarget
->isLikeA9() && !Subtarget
->isSwift())
541 if (Shift
.hasOneUse())
544 return ShOpcVal
== ARM_AM::lsl
&&
545 (ShAmt
== 2 || (Subtarget
->isSwift() && ShAmt
== 1));
548 bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue
&N
,
550 unsigned &PowerOfTwo
,
551 SDValue
&NewMulConst
) const {
552 assert(N
.getOpcode() == ISD::MUL
);
553 assert(MaxShift
> 0);
555 // If the multiply is used in more than one place then changing the constant
556 // will make other uses incorrect, so don't.
557 if (!N
.hasOneUse()) return false;
558 // Check if the multiply is by a constant
559 ConstantSDNode
*MulConst
= dyn_cast
<ConstantSDNode
>(N
.getOperand(1));
560 if (!MulConst
) return false;
561 // If the constant is used in more than one place then modifying it will mean
562 // we need to materialize two constants instead of one, which is a bad idea.
563 if (!MulConst
->hasOneUse()) return false;
564 unsigned MulConstVal
= MulConst
->getZExtValue();
565 if (MulConstVal
== 0) return false;
567 // Find the largest power of 2 that MulConstVal is a multiple of
568 PowerOfTwo
= MaxShift
;
569 while ((MulConstVal
% (1 << PowerOfTwo
)) != 0) {
571 if (PowerOfTwo
== 0) return false;
574 // Only optimise if the new cost is better
575 unsigned NewMulConstVal
= MulConstVal
/ (1 << PowerOfTwo
);
576 NewMulConst
= CurDAG
->getConstant(NewMulConstVal
, SDLoc(N
), MVT::i32
);
577 unsigned OldCost
= ConstantMaterializationCost(MulConstVal
, Subtarget
);
578 unsigned NewCost
= ConstantMaterializationCost(NewMulConstVal
, Subtarget
);
579 return NewCost
< OldCost
;
582 void ARMDAGToDAGISel::replaceDAGValue(const SDValue
&N
, SDValue M
) {
583 CurDAG
->RepositionNode(N
.getNode()->getIterator(), M
.getNode());
587 bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N
,
590 bool CheckProfitability
) {
591 if (DisableShifterOp
)
594 // If N is a multiply-by-constant and it's profitable to extract a shift and
595 // use it in a shifted operand do so.
596 if (N
.getOpcode() == ISD::MUL
) {
597 unsigned PowerOfTwo
= 0;
599 if (canExtractShiftFromMul(N
, 31, PowerOfTwo
, NewMulConst
)) {
600 HandleSDNode
Handle(N
);
602 replaceDAGValue(N
.getOperand(1), NewMulConst
);
603 BaseReg
= Handle
.getValue();
604 Opc
= CurDAG
->getTargetConstant(
605 ARM_AM::getSORegOpc(ARM_AM::lsl
, PowerOfTwo
), Loc
, MVT::i32
);
610 ARM_AM::ShiftOpc ShOpcVal
= ARM_AM::getShiftOpcForNode(N
.getOpcode());
612 // Don't match base register only case. That is matched to a separate
613 // lower complexity pattern with explicit register operand.
614 if (ShOpcVal
== ARM_AM::no_shift
) return false;
616 BaseReg
= N
.getOperand(0);
617 unsigned ShImmVal
= 0;
618 ConstantSDNode
*RHS
= dyn_cast
<ConstantSDNode
>(N
.getOperand(1));
619 if (!RHS
) return false;
620 ShImmVal
= RHS
->getZExtValue() & 31;
621 Opc
= CurDAG
->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal
, ShImmVal
),
626 bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N
,
630 bool CheckProfitability
) {
631 if (DisableShifterOp
)
634 ARM_AM::ShiftOpc ShOpcVal
= ARM_AM::getShiftOpcForNode(N
.getOpcode());
636 // Don't match base register only case. That is matched to a separate
637 // lower complexity pattern with explicit register operand.
638 if (ShOpcVal
== ARM_AM::no_shift
) return false;
640 BaseReg
= N
.getOperand(0);
641 unsigned ShImmVal
= 0;
642 ConstantSDNode
*RHS
= dyn_cast
<ConstantSDNode
>(N
.getOperand(1));
643 if (RHS
) return false;
645 ShReg
= N
.getOperand(1);
646 if (CheckProfitability
&& !isShifterOpProfitable(N
, ShOpcVal
, ShImmVal
))
648 Opc
= CurDAG
->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal
, ShImmVal
),
653 // Determine whether an ISD::OR's operands are suitable to turn the operation
654 // into an addition, which often has more compact encodings.
655 bool ARMDAGToDAGISel::SelectAddLikeOr(SDNode
*Parent
, SDValue N
, SDValue
&Out
) {
656 assert(Parent
->getOpcode() == ISD::OR
&& "unexpected parent");
658 return CurDAG
->haveNoCommonBitsSet(N
, Parent
->getOperand(1));
662 bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N
,
665 // Match simple R + imm12 operands.
668 if (N
.getOpcode() != ISD::ADD
&& N
.getOpcode() != ISD::SUB
&&
669 !CurDAG
->isBaseWithConstantOffset(N
)) {
670 if (N
.getOpcode() == ISD::FrameIndex
) {
671 // Match frame index.
672 int FI
= cast
<FrameIndexSDNode
>(N
)->getIndex();
673 Base
= CurDAG
->getTargetFrameIndex(
674 FI
, TLI
->getPointerTy(CurDAG
->getDataLayout()));
675 OffImm
= CurDAG
->getTargetConstant(0, SDLoc(N
), MVT::i32
);
679 if (N
.getOpcode() == ARMISD::Wrapper
&&
680 N
.getOperand(0).getOpcode() != ISD::TargetGlobalAddress
&&
681 N
.getOperand(0).getOpcode() != ISD::TargetExternalSymbol
&&
682 N
.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress
) {
683 Base
= N
.getOperand(0);
686 OffImm
= CurDAG
->getTargetConstant(0, SDLoc(N
), MVT::i32
);
690 if (ConstantSDNode
*RHS
= dyn_cast
<ConstantSDNode
>(N
.getOperand(1))) {
691 int RHSC
= (int)RHS
->getSExtValue();
692 if (N
.getOpcode() == ISD::SUB
)
695 if (RHSC
> -0x1000 && RHSC
< 0x1000) { // 12 bits
696 Base
= N
.getOperand(0);
697 if (Base
.getOpcode() == ISD::FrameIndex
) {
698 int FI
= cast
<FrameIndexSDNode
>(Base
)->getIndex();
699 Base
= CurDAG
->getTargetFrameIndex(
700 FI
, TLI
->getPointerTy(CurDAG
->getDataLayout()));
702 OffImm
= CurDAG
->getTargetConstant(RHSC
, SDLoc(N
), MVT::i32
);
709 OffImm
= CurDAG
->getTargetConstant(0, SDLoc(N
), MVT::i32
);
715 bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N
, SDValue
&Base
, SDValue
&Offset
,
717 if (N
.getOpcode() == ISD::MUL
&&
718 ((!Subtarget
->isLikeA9() && !Subtarget
->isSwift()) || N
.hasOneUse())) {
719 if (ConstantSDNode
*RHS
= dyn_cast
<ConstantSDNode
>(N
.getOperand(1))) {
720 // X * [3,5,9] -> X + X * [2,4,8] etc.
721 int RHSC
= (int)RHS
->getZExtValue();
724 ARM_AM::AddrOpc AddSub
= ARM_AM::add
;
726 AddSub
= ARM_AM::sub
;
729 if (isPowerOf2_32(RHSC
)) {
730 unsigned ShAmt
= Log2_32(RHSC
);
731 Base
= Offset
= N
.getOperand(0);
732 Opc
= CurDAG
->getTargetConstant(ARM_AM::getAM2Opc(AddSub
, ShAmt
,
741 if (N
.getOpcode() != ISD::ADD
&& N
.getOpcode() != ISD::SUB
&&
742 // ISD::OR that is equivalent to an ISD::ADD.
743 !CurDAG
->isBaseWithConstantOffset(N
))
746 // Leave simple R +/- imm12 operands for LDRi12
747 if (N
.getOpcode() == ISD::ADD
|| N
.getOpcode() == ISD::OR
) {
749 if (isScaledConstantInRange(N
.getOperand(1), /*Scale=*/1,
750 -0x1000+1, 0x1000, RHSC
)) // 12 bits.
754 // Otherwise this is R +/- [possibly shifted] R.
755 ARM_AM::AddrOpc AddSub
= N
.getOpcode() == ISD::SUB
? ARM_AM::sub
:ARM_AM::add
;
756 ARM_AM::ShiftOpc ShOpcVal
=
757 ARM_AM::getShiftOpcForNode(N
.getOperand(1).getOpcode());
760 Base
= N
.getOperand(0);
761 Offset
= N
.getOperand(1);
763 if (ShOpcVal
!= ARM_AM::no_shift
) {
764 // Check to see if the RHS of the shift is a constant, if not, we can't fold
766 if (ConstantSDNode
*Sh
=
767 dyn_cast
<ConstantSDNode
>(N
.getOperand(1).getOperand(1))) {
768 ShAmt
= Sh
->getZExtValue();
769 if (isShifterOpProfitable(Offset
, ShOpcVal
, ShAmt
))
770 Offset
= N
.getOperand(1).getOperand(0);
773 ShOpcVal
= ARM_AM::no_shift
;
776 ShOpcVal
= ARM_AM::no_shift
;
780 // Try matching (R shl C) + (R).
781 if (N
.getOpcode() != ISD::SUB
&& ShOpcVal
== ARM_AM::no_shift
&&
782 !(Subtarget
->isLikeA9() || Subtarget
->isSwift() ||
783 N
.getOperand(0).hasOneUse())) {
784 ShOpcVal
= ARM_AM::getShiftOpcForNode(N
.getOperand(0).getOpcode());
785 if (ShOpcVal
!= ARM_AM::no_shift
) {
786 // Check to see if the RHS of the shift is a constant, if not, we can't
788 if (ConstantSDNode
*Sh
=
789 dyn_cast
<ConstantSDNode
>(N
.getOperand(0).getOperand(1))) {
790 ShAmt
= Sh
->getZExtValue();
791 if (isShifterOpProfitable(N
.getOperand(0), ShOpcVal
, ShAmt
)) {
792 Offset
= N
.getOperand(0).getOperand(0);
793 Base
= N
.getOperand(1);
796 ShOpcVal
= ARM_AM::no_shift
;
799 ShOpcVal
= ARM_AM::no_shift
;
804 // If Offset is a multiply-by-constant and it's profitable to extract a shift
805 // and use it in a shifted operand do so.
806 if (Offset
.getOpcode() == ISD::MUL
&& N
.hasOneUse()) {
807 unsigned PowerOfTwo
= 0;
809 if (canExtractShiftFromMul(Offset
, 31, PowerOfTwo
, NewMulConst
)) {
810 HandleSDNode
Handle(Offset
);
811 replaceDAGValue(Offset
.getOperand(1), NewMulConst
);
812 Offset
= Handle
.getValue();
814 ShOpcVal
= ARM_AM::lsl
;
818 Opc
= CurDAG
->getTargetConstant(ARM_AM::getAM2Opc(AddSub
, ShAmt
, ShOpcVal
),
823 bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode
*Op
, SDValue N
,
824 SDValue
&Offset
, SDValue
&Opc
) {
825 unsigned Opcode
= Op
->getOpcode();
826 ISD::MemIndexedMode AM
= (Opcode
== ISD::LOAD
)
827 ? cast
<LoadSDNode
>(Op
)->getAddressingMode()
828 : cast
<StoreSDNode
>(Op
)->getAddressingMode();
829 ARM_AM::AddrOpc AddSub
= (AM
== ISD::PRE_INC
|| AM
== ISD::POST_INC
)
830 ? ARM_AM::add
: ARM_AM::sub
;
832 if (isScaledConstantInRange(N
, /*Scale=*/1, 0, 0x1000, Val
))
836 ARM_AM::ShiftOpc ShOpcVal
= ARM_AM::getShiftOpcForNode(N
.getOpcode());
838 if (ShOpcVal
!= ARM_AM::no_shift
) {
839 // Check to see if the RHS of the shift is a constant, if not, we can't fold
841 if (ConstantSDNode
*Sh
= dyn_cast
<ConstantSDNode
>(N
.getOperand(1))) {
842 ShAmt
= Sh
->getZExtValue();
843 if (isShifterOpProfitable(N
, ShOpcVal
, ShAmt
))
844 Offset
= N
.getOperand(0);
847 ShOpcVal
= ARM_AM::no_shift
;
850 ShOpcVal
= ARM_AM::no_shift
;
854 Opc
= CurDAG
->getTargetConstant(ARM_AM::getAM2Opc(AddSub
, ShAmt
, ShOpcVal
),
859 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode
*Op
, SDValue N
,
860 SDValue
&Offset
, SDValue
&Opc
) {
861 unsigned Opcode
= Op
->getOpcode();
862 ISD::MemIndexedMode AM
= (Opcode
== ISD::LOAD
)
863 ? cast
<LoadSDNode
>(Op
)->getAddressingMode()
864 : cast
<StoreSDNode
>(Op
)->getAddressingMode();
865 ARM_AM::AddrOpc AddSub
= (AM
== ISD::PRE_INC
|| AM
== ISD::POST_INC
)
866 ? ARM_AM::add
: ARM_AM::sub
;
868 if (isScaledConstantInRange(N
, /*Scale=*/1, 0, 0x1000, Val
)) { // 12 bits.
869 if (AddSub
== ARM_AM::sub
) Val
*= -1;
870 Offset
= CurDAG
->getRegister(0, MVT::i32
);
871 Opc
= CurDAG
->getTargetConstant(Val
, SDLoc(Op
), MVT::i32
);
879 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode
*Op
, SDValue N
,
880 SDValue
&Offset
, SDValue
&Opc
) {
881 unsigned Opcode
= Op
->getOpcode();
882 ISD::MemIndexedMode AM
= (Opcode
== ISD::LOAD
)
883 ? cast
<LoadSDNode
>(Op
)->getAddressingMode()
884 : cast
<StoreSDNode
>(Op
)->getAddressingMode();
885 ARM_AM::AddrOpc AddSub
= (AM
== ISD::PRE_INC
|| AM
== ISD::POST_INC
)
886 ? ARM_AM::add
: ARM_AM::sub
;
888 if (isScaledConstantInRange(N
, /*Scale=*/1, 0, 0x1000, Val
)) { // 12 bits.
889 Offset
= CurDAG
->getRegister(0, MVT::i32
);
890 Opc
= CurDAG
->getTargetConstant(ARM_AM::getAM2Opc(AddSub
, Val
,
892 SDLoc(Op
), MVT::i32
);
899 bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N
, SDValue
&Base
) {
904 bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N
,
905 SDValue
&Base
, SDValue
&Offset
,
907 if (N
.getOpcode() == ISD::SUB
) {
908 // X - C is canonicalize to X + -C, no need to handle it here.
909 Base
= N
.getOperand(0);
910 Offset
= N
.getOperand(1);
911 Opc
= CurDAG
->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub
, 0), SDLoc(N
),
916 if (!CurDAG
->isBaseWithConstantOffset(N
)) {
918 if (N
.getOpcode() == ISD::FrameIndex
) {
919 int FI
= cast
<FrameIndexSDNode
>(N
)->getIndex();
920 Base
= CurDAG
->getTargetFrameIndex(
921 FI
, TLI
->getPointerTy(CurDAG
->getDataLayout()));
923 Offset
= CurDAG
->getRegister(0, MVT::i32
);
924 Opc
= CurDAG
->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add
, 0), SDLoc(N
),
929 // If the RHS is +/- imm8, fold into addr mode.
931 if (isScaledConstantInRange(N
.getOperand(1), /*Scale=*/1,
932 -256 + 1, 256, RHSC
)) { // 8 bits.
933 Base
= N
.getOperand(0);
934 if (Base
.getOpcode() == ISD::FrameIndex
) {
935 int FI
= cast
<FrameIndexSDNode
>(Base
)->getIndex();
936 Base
= CurDAG
->getTargetFrameIndex(
937 FI
, TLI
->getPointerTy(CurDAG
->getDataLayout()));
939 Offset
= CurDAG
->getRegister(0, MVT::i32
);
941 ARM_AM::AddrOpc AddSub
= ARM_AM::add
;
943 AddSub
= ARM_AM::sub
;
946 Opc
= CurDAG
->getTargetConstant(ARM_AM::getAM3Opc(AddSub
, RHSC
), SDLoc(N
),
951 Base
= N
.getOperand(0);
952 Offset
= N
.getOperand(1);
953 Opc
= CurDAG
->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add
, 0), SDLoc(N
),
958 bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode
*Op
, SDValue N
,
959 SDValue
&Offset
, SDValue
&Opc
) {
960 unsigned Opcode
= Op
->getOpcode();
961 ISD::MemIndexedMode AM
= (Opcode
== ISD::LOAD
)
962 ? cast
<LoadSDNode
>(Op
)->getAddressingMode()
963 : cast
<StoreSDNode
>(Op
)->getAddressingMode();
964 ARM_AM::AddrOpc AddSub
= (AM
== ISD::PRE_INC
|| AM
== ISD::POST_INC
)
965 ? ARM_AM::add
: ARM_AM::sub
;
967 if (isScaledConstantInRange(N
, /*Scale=*/1, 0, 256, Val
)) { // 12 bits.
968 Offset
= CurDAG
->getRegister(0, MVT::i32
);
969 Opc
= CurDAG
->getTargetConstant(ARM_AM::getAM3Opc(AddSub
, Val
), SDLoc(Op
),
975 Opc
= CurDAG
->getTargetConstant(ARM_AM::getAM3Opc(AddSub
, 0), SDLoc(Op
),
980 bool ARMDAGToDAGISel::IsAddressingMode5(SDValue N
, SDValue
&Base
, SDValue
&Offset
,
982 if (!CurDAG
->isBaseWithConstantOffset(N
)) {
984 if (N
.getOpcode() == ISD::FrameIndex
) {
985 int FI
= cast
<FrameIndexSDNode
>(N
)->getIndex();
986 Base
= CurDAG
->getTargetFrameIndex(
987 FI
, TLI
->getPointerTy(CurDAG
->getDataLayout()));
988 } else if (N
.getOpcode() == ARMISD::Wrapper
&&
989 N
.getOperand(0).getOpcode() != ISD::TargetGlobalAddress
&&
990 N
.getOperand(0).getOpcode() != ISD::TargetExternalSymbol
&&
991 N
.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress
) {
992 Base
= N
.getOperand(0);
994 Offset
= CurDAG
->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add
, 0),
999 // If the RHS is +/- imm8, fold into addr mode.
1001 const int Scale
= FP16
? 2 : 4;
1003 if (isScaledConstantInRange(N
.getOperand(1), Scale
, -255, 256, RHSC
)) {
1004 Base
= N
.getOperand(0);
1005 if (Base
.getOpcode() == ISD::FrameIndex
) {
1006 int FI
= cast
<FrameIndexSDNode
>(Base
)->getIndex();
1007 Base
= CurDAG
->getTargetFrameIndex(
1008 FI
, TLI
->getPointerTy(CurDAG
->getDataLayout()));
1011 ARM_AM::AddrOpc AddSub
= ARM_AM::add
;
1013 AddSub
= ARM_AM::sub
;
1018 Offset
= CurDAG
->getTargetConstant(ARM_AM::getAM5FP16Opc(AddSub
, RHSC
),
1019 SDLoc(N
), MVT::i32
);
1021 Offset
= CurDAG
->getTargetConstant(ARM_AM::getAM5Opc(AddSub
, RHSC
),
1022 SDLoc(N
), MVT::i32
);
1030 Offset
= CurDAG
->getTargetConstant(ARM_AM::getAM5FP16Opc(ARM_AM::add
, 0),
1031 SDLoc(N
), MVT::i32
);
1033 Offset
= CurDAG
->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add
, 0),
1034 SDLoc(N
), MVT::i32
);
1039 bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N
,
1040 SDValue
&Base
, SDValue
&Offset
) {
1041 return IsAddressingMode5(N
, Base
, Offset
, /*FP16=*/ false);
1044 bool ARMDAGToDAGISel::SelectAddrMode5FP16(SDValue N
,
1045 SDValue
&Base
, SDValue
&Offset
) {
1046 return IsAddressingMode5(N
, Base
, Offset
, /*FP16=*/ true);
1049 bool ARMDAGToDAGISel::SelectAddrMode6(SDNode
*Parent
, SDValue N
, SDValue
&Addr
,
1053 unsigned Alignment
= 0;
1055 MemSDNode
*MemN
= cast
<MemSDNode
>(Parent
);
1057 if (isa
<LSBaseSDNode
>(MemN
) ||
1058 ((MemN
->getOpcode() == ARMISD::VST1_UPD
||
1059 MemN
->getOpcode() == ARMISD::VLD1_UPD
) &&
1060 MemN
->getConstantOperandVal(MemN
->getNumOperands() - 1) == 1)) {
1061 // This case occurs only for VLD1-lane/dup and VST1-lane instructions.
1062 // The maximum alignment is equal to the memory size being referenced.
1063 unsigned MMOAlign
= MemN
->getAlignment();
1064 unsigned MemSize
= MemN
->getMemoryVT().getSizeInBits() / 8;
1065 if (MMOAlign
>= MemSize
&& MemSize
> 1)
1066 Alignment
= MemSize
;
1068 // All other uses of addrmode6 are for intrinsics. For now just record
1069 // the raw alignment value; it will be refined later based on the legal
1070 // alignment operands for the intrinsic.
1071 Alignment
= MemN
->getAlignment();
1074 Align
= CurDAG
->getTargetConstant(Alignment
, SDLoc(N
), MVT::i32
);
1078 bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode
*Op
, SDValue N
,
1080 LSBaseSDNode
*LdSt
= cast
<LSBaseSDNode
>(Op
);
1081 ISD::MemIndexedMode AM
= LdSt
->getAddressingMode();
1082 if (AM
!= ISD::POST_INC
)
1085 if (ConstantSDNode
*NC
= dyn_cast
<ConstantSDNode
>(N
)) {
1086 if (NC
->getZExtValue() * 8 == LdSt
->getMemoryVT().getSizeInBits())
1087 Offset
= CurDAG
->getRegister(0, MVT::i32
);
1092 bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N
,
1093 SDValue
&Offset
, SDValue
&Label
) {
1094 if (N
.getOpcode() == ARMISD::PIC_ADD
&& N
.hasOneUse()) {
1095 Offset
= N
.getOperand(0);
1096 SDValue N1
= N
.getOperand(1);
1097 Label
= CurDAG
->getTargetConstant(cast
<ConstantSDNode
>(N1
)->getZExtValue(),
1098 SDLoc(N
), MVT::i32
);
1106 //===----------------------------------------------------------------------===//
1107 // Thumb Addressing Modes
1108 //===----------------------------------------------------------------------===//
1110 static bool shouldUseZeroOffsetLdSt(SDValue N
) {
1111 // Negative numbers are difficult to materialise in thumb1. If we are
1112 // selecting the add of a negative, instead try to select ri with a zero
1113 // offset, so create the add node directly which will become a sub.
1114 if (N
.getOpcode() != ISD::ADD
)
1117 // Look for an imm which is not legal for ld/st, but is legal for sub.
1118 if (auto C
= dyn_cast
<ConstantSDNode
>(N
.getOperand(1)))
1119 return C
->getSExtValue() < 0 && C
->getSExtValue() >= -255;
1124 bool ARMDAGToDAGISel::SelectThumbAddrModeRRSext(SDValue N
, SDValue
&Base
,
1126 if (N
.getOpcode() != ISD::ADD
&& !CurDAG
->isBaseWithConstantOffset(N
)) {
1127 ConstantSDNode
*NC
= dyn_cast
<ConstantSDNode
>(N
);
1128 if (!NC
|| !NC
->isNullValue())
1135 Base
= N
.getOperand(0);
1136 Offset
= N
.getOperand(1);
1140 bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N
, SDValue
&Base
,
1142 if (shouldUseZeroOffsetLdSt(N
))
1143 return false; // Select ri instead
1144 return SelectThumbAddrModeRRSext(N
, Base
, Offset
);
1148 ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N
, unsigned Scale
,
1149 SDValue
&Base
, SDValue
&OffImm
) {
1150 if (shouldUseZeroOffsetLdSt(N
)) {
1152 OffImm
= CurDAG
->getTargetConstant(0, SDLoc(N
), MVT::i32
);
1156 if (!CurDAG
->isBaseWithConstantOffset(N
)) {
1157 if (N
.getOpcode() == ISD::ADD
) {
1158 return false; // We want to select register offset instead
1159 } else if (N
.getOpcode() == ARMISD::Wrapper
&&
1160 N
.getOperand(0).getOpcode() != ISD::TargetGlobalAddress
&&
1161 N
.getOperand(0).getOpcode() != ISD::TargetExternalSymbol
&&
1162 N
.getOperand(0).getOpcode() != ISD::TargetConstantPool
&&
1163 N
.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress
) {
1164 Base
= N
.getOperand(0);
1169 OffImm
= CurDAG
->getTargetConstant(0, SDLoc(N
), MVT::i32
);
1173 // If the RHS is + imm5 * scale, fold into addr mode.
1175 if (isScaledConstantInRange(N
.getOperand(1), Scale
, 0, 32, RHSC
)) {
1176 Base
= N
.getOperand(0);
1177 OffImm
= CurDAG
->getTargetConstant(RHSC
, SDLoc(N
), MVT::i32
);
1181 // Offset is too large, so use register offset instead.
1186 ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N
, SDValue
&Base
,
1188 return SelectThumbAddrModeImm5S(N
, 4, Base
, OffImm
);
1192 ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N
, SDValue
&Base
,
1194 return SelectThumbAddrModeImm5S(N
, 2, Base
, OffImm
);
1198 ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N
, SDValue
&Base
,
1200 return SelectThumbAddrModeImm5S(N
, 1, Base
, OffImm
);
1203 bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N
,
1204 SDValue
&Base
, SDValue
&OffImm
) {
1205 if (N
.getOpcode() == ISD::FrameIndex
) {
1206 int FI
= cast
<FrameIndexSDNode
>(N
)->getIndex();
1207 // Only multiples of 4 are allowed for the offset, so the frame object
1208 // alignment must be at least 4.
1209 MachineFrameInfo
&MFI
= MF
->getFrameInfo();
1210 if (MFI
.getObjectAlign(FI
) < Align(4))
1211 MFI
.setObjectAlignment(FI
, Align(4));
1212 Base
= CurDAG
->getTargetFrameIndex(
1213 FI
, TLI
->getPointerTy(CurDAG
->getDataLayout()));
1214 OffImm
= CurDAG
->getTargetConstant(0, SDLoc(N
), MVT::i32
);
1218 if (!CurDAG
->isBaseWithConstantOffset(N
))
1221 if (N
.getOperand(0).getOpcode() == ISD::FrameIndex
) {
1222 // If the RHS is + imm8 * scale, fold into addr mode.
1224 if (isScaledConstantInRange(N
.getOperand(1), /*Scale=*/4, 0, 256, RHSC
)) {
1225 Base
= N
.getOperand(0);
1226 int FI
= cast
<FrameIndexSDNode
>(Base
)->getIndex();
1227 // Make sure the offset is inside the object, or we might fail to
1228 // allocate an emergency spill slot. (An out-of-range access is UB, but
1229 // it could show up anyway.)
1230 MachineFrameInfo
&MFI
= MF
->getFrameInfo();
1231 if (RHSC
* 4 < MFI
.getObjectSize(FI
)) {
1232 // For LHS+RHS to result in an offset that's a multiple of 4 the object
1233 // indexed by the LHS must be 4-byte aligned.
1234 if (!MFI
.isFixedObjectIndex(FI
) && MFI
.getObjectAlign(FI
) < Align(4))
1235 MFI
.setObjectAlignment(FI
, Align(4));
1236 if (MFI
.getObjectAlign(FI
) >= Align(4)) {
1237 Base
= CurDAG
->getTargetFrameIndex(
1238 FI
, TLI
->getPointerTy(CurDAG
->getDataLayout()));
1239 OffImm
= CurDAG
->getTargetConstant(RHSC
, SDLoc(N
), MVT::i32
);
1249 template <unsigned Shift
>
1250 bool ARMDAGToDAGISel::SelectTAddrModeImm7(SDValue N
, SDValue
&Base
,
1252 if (N
.getOpcode() == ISD::SUB
|| CurDAG
->isBaseWithConstantOffset(N
)) {
1254 if (isScaledConstantInRange(N
.getOperand(1), 1 << Shift
, -0x7f, 0x80,
1256 Base
= N
.getOperand(0);
1257 if (N
.getOpcode() == ISD::SUB
)
1260 CurDAG
->getTargetConstant(RHSC
* (1 << Shift
), SDLoc(N
), MVT::i32
);
1267 OffImm
= CurDAG
->getTargetConstant(0, SDLoc(N
), MVT::i32
);
1272 //===----------------------------------------------------------------------===//
1273 // Thumb 2 Addressing Modes
1274 //===----------------------------------------------------------------------===//
1277 bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N
,
1278 SDValue
&Base
, SDValue
&OffImm
) {
1279 // Match simple R + imm12 operands.
1282 if (N
.getOpcode() != ISD::ADD
&& N
.getOpcode() != ISD::SUB
&&
1283 !CurDAG
->isBaseWithConstantOffset(N
)) {
1284 if (N
.getOpcode() == ISD::FrameIndex
) {
1285 // Match frame index.
1286 int FI
= cast
<FrameIndexSDNode
>(N
)->getIndex();
1287 Base
= CurDAG
->getTargetFrameIndex(
1288 FI
, TLI
->getPointerTy(CurDAG
->getDataLayout()));
1289 OffImm
= CurDAG
->getTargetConstant(0, SDLoc(N
), MVT::i32
);
1293 if (N
.getOpcode() == ARMISD::Wrapper
&&
1294 N
.getOperand(0).getOpcode() != ISD::TargetGlobalAddress
&&
1295 N
.getOperand(0).getOpcode() != ISD::TargetExternalSymbol
&&
1296 N
.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress
) {
1297 Base
= N
.getOperand(0);
1298 if (Base
.getOpcode() == ISD::TargetConstantPool
)
1299 return false; // We want to select t2LDRpci instead.
1302 OffImm
= CurDAG
->getTargetConstant(0, SDLoc(N
), MVT::i32
);
1306 if (ConstantSDNode
*RHS
= dyn_cast
<ConstantSDNode
>(N
.getOperand(1))) {
1307 if (SelectT2AddrModeImm8(N
, Base
, OffImm
))
1308 // Let t2LDRi8 handle (R - imm8).
1311 int RHSC
= (int)RHS
->getZExtValue();
1312 if (N
.getOpcode() == ISD::SUB
)
1315 if (RHSC
>= 0 && RHSC
< 0x1000) { // 12 bits (unsigned)
1316 Base
= N
.getOperand(0);
1317 if (Base
.getOpcode() == ISD::FrameIndex
) {
1318 int FI
= cast
<FrameIndexSDNode
>(Base
)->getIndex();
1319 Base
= CurDAG
->getTargetFrameIndex(
1320 FI
, TLI
->getPointerTy(CurDAG
->getDataLayout()));
1322 OffImm
= CurDAG
->getTargetConstant(RHSC
, SDLoc(N
), MVT::i32
);
1329 OffImm
= CurDAG
->getTargetConstant(0, SDLoc(N
), MVT::i32
);
1333 template <unsigned Shift
>
1334 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N
, SDValue
&Base
,
1336 if (N
.getOpcode() == ISD::SUB
|| CurDAG
->isBaseWithConstantOffset(N
)) {
1338 if (isScaledConstantInRange(N
.getOperand(1), 1 << Shift
, -255, 256, RHSC
)) {
1339 Base
= N
.getOperand(0);
1340 if (Base
.getOpcode() == ISD::FrameIndex
) {
1341 int FI
= cast
<FrameIndexSDNode
>(Base
)->getIndex();
1342 Base
= CurDAG
->getTargetFrameIndex(
1343 FI
, TLI
->getPointerTy(CurDAG
->getDataLayout()));
1346 if (N
.getOpcode() == ISD::SUB
)
1349 CurDAG
->getTargetConstant(RHSC
* (1 << Shift
), SDLoc(N
), MVT::i32
);
1356 OffImm
= CurDAG
->getTargetConstant(0, SDLoc(N
), MVT::i32
);
1360 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N
,
1361 SDValue
&Base
, SDValue
&OffImm
) {
1362 // Match simple R - imm8 operands.
1363 if (N
.getOpcode() != ISD::ADD
&& N
.getOpcode() != ISD::SUB
&&
1364 !CurDAG
->isBaseWithConstantOffset(N
))
1367 if (ConstantSDNode
*RHS
= dyn_cast
<ConstantSDNode
>(N
.getOperand(1))) {
1368 int RHSC
= (int)RHS
->getSExtValue();
1369 if (N
.getOpcode() == ISD::SUB
)
1372 if ((RHSC
>= -255) && (RHSC
< 0)) { // 8 bits (always negative)
1373 Base
= N
.getOperand(0);
1374 if (Base
.getOpcode() == ISD::FrameIndex
) {
1375 int FI
= cast
<FrameIndexSDNode
>(Base
)->getIndex();
1376 Base
= CurDAG
->getTargetFrameIndex(
1377 FI
, TLI
->getPointerTy(CurDAG
->getDataLayout()));
1379 OffImm
= CurDAG
->getTargetConstant(RHSC
, SDLoc(N
), MVT::i32
);
1387 bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode
*Op
, SDValue N
,
1389 unsigned Opcode
= Op
->getOpcode();
1390 ISD::MemIndexedMode AM
= (Opcode
== ISD::LOAD
)
1391 ? cast
<LoadSDNode
>(Op
)->getAddressingMode()
1392 : cast
<StoreSDNode
>(Op
)->getAddressingMode();
1394 if (isScaledConstantInRange(N
, /*Scale=*/1, 0, 0x100, RHSC
)) { // 8 bits.
1395 OffImm
= ((AM
== ISD::PRE_INC
) || (AM
== ISD::POST_INC
))
1396 ? CurDAG
->getTargetConstant(RHSC
, SDLoc(N
), MVT::i32
)
1397 : CurDAG
->getTargetConstant(-RHSC
, SDLoc(N
), MVT::i32
);
1404 template <unsigned Shift
>
1405 bool ARMDAGToDAGISel::SelectT2AddrModeImm7(SDValue N
, SDValue
&Base
,
1407 if (N
.getOpcode() == ISD::SUB
|| CurDAG
->isBaseWithConstantOffset(N
)) {
1409 if (isScaledConstantInRange(N
.getOperand(1), 1 << Shift
, -0x7f, 0x80,
1411 Base
= N
.getOperand(0);
1412 if (Base
.getOpcode() == ISD::FrameIndex
) {
1413 int FI
= cast
<FrameIndexSDNode
>(Base
)->getIndex();
1414 Base
= CurDAG
->getTargetFrameIndex(
1415 FI
, TLI
->getPointerTy(CurDAG
->getDataLayout()));
1418 if (N
.getOpcode() == ISD::SUB
)
1421 CurDAG
->getTargetConstant(RHSC
* (1 << Shift
), SDLoc(N
), MVT::i32
);
1428 OffImm
= CurDAG
->getTargetConstant(0, SDLoc(N
), MVT::i32
);
1432 template <unsigned Shift
>
1433 bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode
*Op
, SDValue N
,
1435 return SelectT2AddrModeImm7Offset(Op
, N
, OffImm
, Shift
);
1438 bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode
*Op
, SDValue N
,
1441 unsigned Opcode
= Op
->getOpcode();
1442 ISD::MemIndexedMode AM
;
1445 AM
= cast
<LoadSDNode
>(Op
)->getAddressingMode();
1448 AM
= cast
<StoreSDNode
>(Op
)->getAddressingMode();
1451 AM
= cast
<MaskedLoadSDNode
>(Op
)->getAddressingMode();
1454 AM
= cast
<MaskedStoreSDNode
>(Op
)->getAddressingMode();
1457 llvm_unreachable("Unexpected Opcode for Imm7Offset");
1461 // 7 bit constant, shifted by Shift.
1462 if (isScaledConstantInRange(N
, 1 << Shift
, 0, 0x80, RHSC
)) {
1464 ((AM
== ISD::PRE_INC
) || (AM
== ISD::POST_INC
))
1465 ? CurDAG
->getTargetConstant(RHSC
* (1 << Shift
), SDLoc(N
), MVT::i32
)
1466 : CurDAG
->getTargetConstant(-RHSC
* (1 << Shift
), SDLoc(N
),
1473 template <int Min
, int Max
>
1474 bool ARMDAGToDAGISel::SelectImmediateInRange(SDValue N
, SDValue
&OffImm
) {
1476 if (isScaledConstantInRange(N
, 1, Min
, Max
, Val
)) {
1477 OffImm
= CurDAG
->getTargetConstant(Val
, SDLoc(N
), MVT::i32
);
1483 bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N
,
1485 SDValue
&OffReg
, SDValue
&ShImm
) {
1486 // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12.
1487 if (N
.getOpcode() != ISD::ADD
&& !CurDAG
->isBaseWithConstantOffset(N
))
1490 // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8.
1491 if (ConstantSDNode
*RHS
= dyn_cast
<ConstantSDNode
>(N
.getOperand(1))) {
1492 int RHSC
= (int)RHS
->getZExtValue();
1493 if (RHSC
>= 0 && RHSC
< 0x1000) // 12 bits (unsigned)
1495 else if (RHSC
< 0 && RHSC
>= -255) // 8 bits
1499 // Look for (R + R) or (R + (R << [1,2,3])).
1501 Base
= N
.getOperand(0);
1502 OffReg
= N
.getOperand(1);
1504 // Swap if it is ((R << c) + R).
1505 ARM_AM::ShiftOpc ShOpcVal
= ARM_AM::getShiftOpcForNode(OffReg
.getOpcode());
1506 if (ShOpcVal
!= ARM_AM::lsl
) {
1507 ShOpcVal
= ARM_AM::getShiftOpcForNode(Base
.getOpcode());
1508 if (ShOpcVal
== ARM_AM::lsl
)
1509 std::swap(Base
, OffReg
);
1512 if (ShOpcVal
== ARM_AM::lsl
) {
1513 // Check to see if the RHS of the shift is a constant, if not, we can't fold
1515 if (ConstantSDNode
*Sh
= dyn_cast
<ConstantSDNode
>(OffReg
.getOperand(1))) {
1516 ShAmt
= Sh
->getZExtValue();
1517 if (ShAmt
< 4 && isShifterOpProfitable(OffReg
, ShOpcVal
, ShAmt
))
1518 OffReg
= OffReg
.getOperand(0);
1525 // If OffReg is a multiply-by-constant and it's profitable to extract a shift
1526 // and use it in a shifted operand do so.
1527 if (OffReg
.getOpcode() == ISD::MUL
&& N
.hasOneUse()) {
1528 unsigned PowerOfTwo
= 0;
1529 SDValue NewMulConst
;
1530 if (canExtractShiftFromMul(OffReg
, 3, PowerOfTwo
, NewMulConst
)) {
1531 HandleSDNode
Handle(OffReg
);
1532 replaceDAGValue(OffReg
.getOperand(1), NewMulConst
);
1533 OffReg
= Handle
.getValue();
1538 ShImm
= CurDAG
->getTargetConstant(ShAmt
, SDLoc(N
), MVT::i32
);
1543 bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N
, SDValue
&Base
,
1545 // This *must* succeed since it's used for the irreplaceable ldrex and strex
1548 OffImm
= CurDAG
->getTargetConstant(0, SDLoc(N
), MVT::i32
);
1550 if (N
.getOpcode() != ISD::ADD
|| !CurDAG
->isBaseWithConstantOffset(N
))
1553 ConstantSDNode
*RHS
= dyn_cast
<ConstantSDNode
>(N
.getOperand(1));
1557 uint32_t RHSC
= (int)RHS
->getZExtValue();
1558 if (RHSC
> 1020 || RHSC
% 4 != 0)
1561 Base
= N
.getOperand(0);
1562 if (Base
.getOpcode() == ISD::FrameIndex
) {
1563 int FI
= cast
<FrameIndexSDNode
>(Base
)->getIndex();
1564 Base
= CurDAG
->getTargetFrameIndex(
1565 FI
, TLI
->getPointerTy(CurDAG
->getDataLayout()));
1568 OffImm
= CurDAG
->getTargetConstant(RHSC
/4, SDLoc(N
), MVT::i32
);
1572 //===--------------------------------------------------------------------===//
1574 /// getAL - Returns a ARMCC::AL immediate node.
1575 static inline SDValue
getAL(SelectionDAG
*CurDAG
, const SDLoc
&dl
) {
1576 return CurDAG
->getTargetConstant((uint64_t)ARMCC::AL
, dl
, MVT::i32
);
1579 void ARMDAGToDAGISel::transferMemOperands(SDNode
*N
, SDNode
*Result
) {
1580 MachineMemOperand
*MemOp
= cast
<MemSDNode
>(N
)->getMemOperand();
1581 CurDAG
->setNodeMemRefs(cast
<MachineSDNode
>(Result
), {MemOp
});
1584 bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode
*N
) {
1585 LoadSDNode
*LD
= cast
<LoadSDNode
>(N
);
1586 ISD::MemIndexedMode AM
= LD
->getAddressingMode();
1587 if (AM
== ISD::UNINDEXED
)
1590 EVT LoadedVT
= LD
->getMemoryVT();
1591 SDValue Offset
, AMOpc
;
1592 bool isPre
= (AM
== ISD::PRE_INC
) || (AM
== ISD::PRE_DEC
);
1593 unsigned Opcode
= 0;
1595 if (LoadedVT
== MVT::i32
&& isPre
&&
1596 SelectAddrMode2OffsetImmPre(N
, LD
->getOffset(), Offset
, AMOpc
)) {
1597 Opcode
= ARM::LDR_PRE_IMM
;
1599 } else if (LoadedVT
== MVT::i32
&& !isPre
&&
1600 SelectAddrMode2OffsetImm(N
, LD
->getOffset(), Offset
, AMOpc
)) {
1601 Opcode
= ARM::LDR_POST_IMM
;
1603 } else if (LoadedVT
== MVT::i32
&&
1604 SelectAddrMode2OffsetReg(N
, LD
->getOffset(), Offset
, AMOpc
)) {
1605 Opcode
= isPre
? ARM::LDR_PRE_REG
: ARM::LDR_POST_REG
;
1608 } else if (LoadedVT
== MVT::i16
&&
1609 SelectAddrMode3Offset(N
, LD
->getOffset(), Offset
, AMOpc
)) {
1611 Opcode
= (LD
->getExtensionType() == ISD::SEXTLOAD
)
1612 ? (isPre
? ARM::LDRSH_PRE
: ARM::LDRSH_POST
)
1613 : (isPre
? ARM::LDRH_PRE
: ARM::LDRH_POST
);
1614 } else if (LoadedVT
== MVT::i8
|| LoadedVT
== MVT::i1
) {
1615 if (LD
->getExtensionType() == ISD::SEXTLOAD
) {
1616 if (SelectAddrMode3Offset(N
, LD
->getOffset(), Offset
, AMOpc
)) {
1618 Opcode
= isPre
? ARM::LDRSB_PRE
: ARM::LDRSB_POST
;
1622 SelectAddrMode2OffsetImmPre(N
, LD
->getOffset(), Offset
, AMOpc
)) {
1624 Opcode
= ARM::LDRB_PRE_IMM
;
1625 } else if (!isPre
&&
1626 SelectAddrMode2OffsetImm(N
, LD
->getOffset(), Offset
, AMOpc
)) {
1628 Opcode
= ARM::LDRB_POST_IMM
;
1629 } else if (SelectAddrMode2OffsetReg(N
, LD
->getOffset(), Offset
, AMOpc
)) {
1631 Opcode
= isPre
? ARM::LDRB_PRE_REG
: ARM::LDRB_POST_REG
;
1637 if (Opcode
== ARM::LDR_PRE_IMM
|| Opcode
== ARM::LDRB_PRE_IMM
) {
1638 SDValue Chain
= LD
->getChain();
1639 SDValue Base
= LD
->getBasePtr();
1640 SDValue Ops
[]= { Base
, AMOpc
, getAL(CurDAG
, SDLoc(N
)),
1641 CurDAG
->getRegister(0, MVT::i32
), Chain
};
1642 SDNode
*New
= CurDAG
->getMachineNode(Opcode
, SDLoc(N
), MVT::i32
, MVT::i32
,
1644 transferMemOperands(N
, New
);
1645 ReplaceNode(N
, New
);
1648 SDValue Chain
= LD
->getChain();
1649 SDValue Base
= LD
->getBasePtr();
1650 SDValue Ops
[]= { Base
, Offset
, AMOpc
, getAL(CurDAG
, SDLoc(N
)),
1651 CurDAG
->getRegister(0, MVT::i32
), Chain
};
1652 SDNode
*New
= CurDAG
->getMachineNode(Opcode
, SDLoc(N
), MVT::i32
, MVT::i32
,
1654 transferMemOperands(N
, New
);
1655 ReplaceNode(N
, New
);
1663 bool ARMDAGToDAGISel::tryT1IndexedLoad(SDNode
*N
) {
1664 LoadSDNode
*LD
= cast
<LoadSDNode
>(N
);
1665 EVT LoadedVT
= LD
->getMemoryVT();
1666 ISD::MemIndexedMode AM
= LD
->getAddressingMode();
1667 if (AM
!= ISD::POST_INC
|| LD
->getExtensionType() != ISD::NON_EXTLOAD
||
1668 LoadedVT
.getSimpleVT().SimpleTy
!= MVT::i32
)
1671 auto *COffs
= dyn_cast
<ConstantSDNode
>(LD
->getOffset());
1672 if (!COffs
|| COffs
->getZExtValue() != 4)
1675 // A T1 post-indexed load is just a single register LDM: LDM r0!, {r1}.
1676 // The encoding of LDM is not how the rest of ISel expects a post-inc load to
1677 // look however, so we use a pseudo here and switch it for a tLDMIA_UPD after
1679 SDValue Chain
= LD
->getChain();
1680 SDValue Base
= LD
->getBasePtr();
1681 SDValue Ops
[]= { Base
, getAL(CurDAG
, SDLoc(N
)),
1682 CurDAG
->getRegister(0, MVT::i32
), Chain
};
1683 SDNode
*New
= CurDAG
->getMachineNode(ARM::tLDR_postidx
, SDLoc(N
), MVT::i32
,
1684 MVT::i32
, MVT::Other
, Ops
);
1685 transferMemOperands(N
, New
);
1686 ReplaceNode(N
, New
);
1690 bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode
*N
) {
1691 LoadSDNode
*LD
= cast
<LoadSDNode
>(N
);
1692 ISD::MemIndexedMode AM
= LD
->getAddressingMode();
1693 if (AM
== ISD::UNINDEXED
)
1696 EVT LoadedVT
= LD
->getMemoryVT();
1697 bool isSExtLd
= LD
->getExtensionType() == ISD::SEXTLOAD
;
1699 bool isPre
= (AM
== ISD::PRE_INC
) || (AM
== ISD::PRE_DEC
);
1700 unsigned Opcode
= 0;
1702 if (SelectT2AddrModeImm8Offset(N
, LD
->getOffset(), Offset
)) {
1703 switch (LoadedVT
.getSimpleVT().SimpleTy
) {
1705 Opcode
= isPre
? ARM::t2LDR_PRE
: ARM::t2LDR_POST
;
1709 Opcode
= isPre
? ARM::t2LDRSH_PRE
: ARM::t2LDRSH_POST
;
1711 Opcode
= isPre
? ARM::t2LDRH_PRE
: ARM::t2LDRH_POST
;
1716 Opcode
= isPre
? ARM::t2LDRSB_PRE
: ARM::t2LDRSB_POST
;
1718 Opcode
= isPre
? ARM::t2LDRB_PRE
: ARM::t2LDRB_POST
;
1727 SDValue Chain
= LD
->getChain();
1728 SDValue Base
= LD
->getBasePtr();
1729 SDValue Ops
[]= { Base
, Offset
, getAL(CurDAG
, SDLoc(N
)),
1730 CurDAG
->getRegister(0, MVT::i32
), Chain
};
1731 SDNode
*New
= CurDAG
->getMachineNode(Opcode
, SDLoc(N
), MVT::i32
, MVT::i32
,
1733 transferMemOperands(N
, New
);
1734 ReplaceNode(N
, New
);
1741 bool ARMDAGToDAGISel::tryMVEIndexedLoad(SDNode
*N
) {
1743 unsigned Opcode
= 0;
1744 bool isSExtLd
, isPre
;
1746 ARMVCC::VPTCodes Pred
;
1748 SDValue Chain
, Base
, Offset
;
1750 if (LoadSDNode
*LD
= dyn_cast
<LoadSDNode
>(N
)) {
1751 ISD::MemIndexedMode AM
= LD
->getAddressingMode();
1752 if (AM
== ISD::UNINDEXED
)
1754 LoadedVT
= LD
->getMemoryVT();
1755 if (!LoadedVT
.isVector())
1758 Chain
= LD
->getChain();
1759 Base
= LD
->getBasePtr();
1760 Offset
= LD
->getOffset();
1761 Alignment
= LD
->getAlign();
1762 isSExtLd
= LD
->getExtensionType() == ISD::SEXTLOAD
;
1763 isPre
= (AM
== ISD::PRE_INC
) || (AM
== ISD::PRE_DEC
);
1764 Pred
= ARMVCC::None
;
1765 PredReg
= CurDAG
->getRegister(0, MVT::i32
);
1766 } else if (MaskedLoadSDNode
*LD
= dyn_cast
<MaskedLoadSDNode
>(N
)) {
1767 ISD::MemIndexedMode AM
= LD
->getAddressingMode();
1768 if (AM
== ISD::UNINDEXED
)
1770 LoadedVT
= LD
->getMemoryVT();
1771 if (!LoadedVT
.isVector())
1774 Chain
= LD
->getChain();
1775 Base
= LD
->getBasePtr();
1776 Offset
= LD
->getOffset();
1777 Alignment
= LD
->getAlign();
1778 isSExtLd
= LD
->getExtensionType() == ISD::SEXTLOAD
;
1779 isPre
= (AM
== ISD::PRE_INC
) || (AM
== ISD::PRE_DEC
);
1780 Pred
= ARMVCC::Then
;
1781 PredReg
= LD
->getMask();
1783 llvm_unreachable("Expected a Load or a Masked Load!");
1785 // We allow LE non-masked loads to change the type (for example use a vldrb.8
1786 // as opposed to a vldrw.32). This can allow extra addressing modes or
1787 // alignments for what is otherwise an equivalent instruction.
1788 bool CanChangeType
= Subtarget
->isLittle() && !isa
<MaskedLoadSDNode
>(N
);
1791 if (Alignment
>= Align(2) && LoadedVT
== MVT::v4i16
&&
1792 SelectT2AddrModeImm7Offset(N
, Offset
, NewOffset
, 1)) {
1794 Opcode
= isPre
? ARM::MVE_VLDRHS32_pre
: ARM::MVE_VLDRHS32_post
;
1796 Opcode
= isPre
? ARM::MVE_VLDRHU32_pre
: ARM::MVE_VLDRHU32_post
;
1797 } else if (LoadedVT
== MVT::v8i8
&&
1798 SelectT2AddrModeImm7Offset(N
, Offset
, NewOffset
, 0)) {
1800 Opcode
= isPre
? ARM::MVE_VLDRBS16_pre
: ARM::MVE_VLDRBS16_post
;
1802 Opcode
= isPre
? ARM::MVE_VLDRBU16_pre
: ARM::MVE_VLDRBU16_post
;
1803 } else if (LoadedVT
== MVT::v4i8
&&
1804 SelectT2AddrModeImm7Offset(N
, Offset
, NewOffset
, 0)) {
1806 Opcode
= isPre
? ARM::MVE_VLDRBS32_pre
: ARM::MVE_VLDRBS32_post
;
1808 Opcode
= isPre
? ARM::MVE_VLDRBU32_pre
: ARM::MVE_VLDRBU32_post
;
1809 } else if (Alignment
>= Align(4) &&
1810 (CanChangeType
|| LoadedVT
== MVT::v4i32
||
1811 LoadedVT
== MVT::v4f32
) &&
1812 SelectT2AddrModeImm7Offset(N
, Offset
, NewOffset
, 2))
1813 Opcode
= isPre
? ARM::MVE_VLDRWU32_pre
: ARM::MVE_VLDRWU32_post
;
1814 else if (Alignment
>= Align(2) &&
1815 (CanChangeType
|| LoadedVT
== MVT::v8i16
||
1816 LoadedVT
== MVT::v8f16
) &&
1817 SelectT2AddrModeImm7Offset(N
, Offset
, NewOffset
, 1))
1818 Opcode
= isPre
? ARM::MVE_VLDRHU16_pre
: ARM::MVE_VLDRHU16_post
;
1819 else if ((CanChangeType
|| LoadedVT
== MVT::v16i8
) &&
1820 SelectT2AddrModeImm7Offset(N
, Offset
, NewOffset
, 0))
1821 Opcode
= isPre
? ARM::MVE_VLDRBU8_pre
: ARM::MVE_VLDRBU8_post
;
1825 SDValue Ops
[] = {Base
, NewOffset
,
1826 CurDAG
->getTargetConstant(Pred
, SDLoc(N
), MVT::i32
), PredReg
,
1828 SDNode
*New
= CurDAG
->getMachineNode(Opcode
, SDLoc(N
), MVT::i32
,
1829 N
->getValueType(0), MVT::Other
, Ops
);
1830 transferMemOperands(N
, New
);
1831 ReplaceUses(SDValue(N
, 0), SDValue(New
, 1));
1832 ReplaceUses(SDValue(N
, 1), SDValue(New
, 0));
1833 ReplaceUses(SDValue(N
, 2), SDValue(New
, 2));
1834 CurDAG
->RemoveDeadNode(N
);
1838 /// Form a GPRPair pseudo register from a pair of GPR regs.
1839 SDNode
*ARMDAGToDAGISel::createGPRPairNode(EVT VT
, SDValue V0
, SDValue V1
) {
1840 SDLoc
dl(V0
.getNode());
1842 CurDAG
->getTargetConstant(ARM::GPRPairRegClassID
, dl
, MVT::i32
);
1843 SDValue SubReg0
= CurDAG
->getTargetConstant(ARM::gsub_0
, dl
, MVT::i32
);
1844 SDValue SubReg1
= CurDAG
->getTargetConstant(ARM::gsub_1
, dl
, MVT::i32
);
1845 const SDValue Ops
[] = { RegClass
, V0
, SubReg0
, V1
, SubReg1
};
1846 return CurDAG
->getMachineNode(TargetOpcode::REG_SEQUENCE
, dl
, VT
, Ops
);
1849 /// Form a D register from a pair of S registers.
1850 SDNode
*ARMDAGToDAGISel::createSRegPairNode(EVT VT
, SDValue V0
, SDValue V1
) {
1851 SDLoc
dl(V0
.getNode());
1853 CurDAG
->getTargetConstant(ARM::DPR_VFP2RegClassID
, dl
, MVT::i32
);
1854 SDValue SubReg0
= CurDAG
->getTargetConstant(ARM::ssub_0
, dl
, MVT::i32
);
1855 SDValue SubReg1
= CurDAG
->getTargetConstant(ARM::ssub_1
, dl
, MVT::i32
);
1856 const SDValue Ops
[] = { RegClass
, V0
, SubReg0
, V1
, SubReg1
};
1857 return CurDAG
->getMachineNode(TargetOpcode::REG_SEQUENCE
, dl
, VT
, Ops
);
1860 /// Form a quad register from a pair of D registers.
1861 SDNode
*ARMDAGToDAGISel::createDRegPairNode(EVT VT
, SDValue V0
, SDValue V1
) {
1862 SDLoc
dl(V0
.getNode());
1863 SDValue RegClass
= CurDAG
->getTargetConstant(ARM::QPRRegClassID
, dl
,
1865 SDValue SubReg0
= CurDAG
->getTargetConstant(ARM::dsub_0
, dl
, MVT::i32
);
1866 SDValue SubReg1
= CurDAG
->getTargetConstant(ARM::dsub_1
, dl
, MVT::i32
);
1867 const SDValue Ops
[] = { RegClass
, V0
, SubReg0
, V1
, SubReg1
};
1868 return CurDAG
->getMachineNode(TargetOpcode::REG_SEQUENCE
, dl
, VT
, Ops
);
1871 /// Form 4 consecutive D registers from a pair of Q registers.
1872 SDNode
*ARMDAGToDAGISel::createQRegPairNode(EVT VT
, SDValue V0
, SDValue V1
) {
1873 SDLoc
dl(V0
.getNode());
1874 SDValue RegClass
= CurDAG
->getTargetConstant(ARM::QQPRRegClassID
, dl
,
1876 SDValue SubReg0
= CurDAG
->getTargetConstant(ARM::qsub_0
, dl
, MVT::i32
);
1877 SDValue SubReg1
= CurDAG
->getTargetConstant(ARM::qsub_1
, dl
, MVT::i32
);
1878 const SDValue Ops
[] = { RegClass
, V0
, SubReg0
, V1
, SubReg1
};
1879 return CurDAG
->getMachineNode(TargetOpcode::REG_SEQUENCE
, dl
, VT
, Ops
);
1882 /// Form 4 consecutive S registers.
1883 SDNode
*ARMDAGToDAGISel::createQuadSRegsNode(EVT VT
, SDValue V0
, SDValue V1
,
1884 SDValue V2
, SDValue V3
) {
1885 SDLoc
dl(V0
.getNode());
1887 CurDAG
->getTargetConstant(ARM::QPR_VFP2RegClassID
, dl
, MVT::i32
);
1888 SDValue SubReg0
= CurDAG
->getTargetConstant(ARM::ssub_0
, dl
, MVT::i32
);
1889 SDValue SubReg1
= CurDAG
->getTargetConstant(ARM::ssub_1
, dl
, MVT::i32
);
1890 SDValue SubReg2
= CurDAG
->getTargetConstant(ARM::ssub_2
, dl
, MVT::i32
);
1891 SDValue SubReg3
= CurDAG
->getTargetConstant(ARM::ssub_3
, dl
, MVT::i32
);
1892 const SDValue Ops
[] = { RegClass
, V0
, SubReg0
, V1
, SubReg1
,
1893 V2
, SubReg2
, V3
, SubReg3
};
1894 return CurDAG
->getMachineNode(TargetOpcode::REG_SEQUENCE
, dl
, VT
, Ops
);
1897 /// Form 4 consecutive D registers.
1898 SDNode
*ARMDAGToDAGISel::createQuadDRegsNode(EVT VT
, SDValue V0
, SDValue V1
,
1899 SDValue V2
, SDValue V3
) {
1900 SDLoc
dl(V0
.getNode());
1901 SDValue RegClass
= CurDAG
->getTargetConstant(ARM::QQPRRegClassID
, dl
,
1903 SDValue SubReg0
= CurDAG
->getTargetConstant(ARM::dsub_0
, dl
, MVT::i32
);
1904 SDValue SubReg1
= CurDAG
->getTargetConstant(ARM::dsub_1
, dl
, MVT::i32
);
1905 SDValue SubReg2
= CurDAG
->getTargetConstant(ARM::dsub_2
, dl
, MVT::i32
);
1906 SDValue SubReg3
= CurDAG
->getTargetConstant(ARM::dsub_3
, dl
, MVT::i32
);
1907 const SDValue Ops
[] = { RegClass
, V0
, SubReg0
, V1
, SubReg1
,
1908 V2
, SubReg2
, V3
, SubReg3
};
1909 return CurDAG
->getMachineNode(TargetOpcode::REG_SEQUENCE
, dl
, VT
, Ops
);
1912 /// Form 4 consecutive Q registers.
1913 SDNode
*ARMDAGToDAGISel::createQuadQRegsNode(EVT VT
, SDValue V0
, SDValue V1
,
1914 SDValue V2
, SDValue V3
) {
1915 SDLoc
dl(V0
.getNode());
1916 SDValue RegClass
= CurDAG
->getTargetConstant(ARM::QQQQPRRegClassID
, dl
,
1918 SDValue SubReg0
= CurDAG
->getTargetConstant(ARM::qsub_0
, dl
, MVT::i32
);
1919 SDValue SubReg1
= CurDAG
->getTargetConstant(ARM::qsub_1
, dl
, MVT::i32
);
1920 SDValue SubReg2
= CurDAG
->getTargetConstant(ARM::qsub_2
, dl
, MVT::i32
);
1921 SDValue SubReg3
= CurDAG
->getTargetConstant(ARM::qsub_3
, dl
, MVT::i32
);
1922 const SDValue Ops
[] = { RegClass
, V0
, SubReg0
, V1
, SubReg1
,
1923 V2
, SubReg2
, V3
, SubReg3
};
1924 return CurDAG
->getMachineNode(TargetOpcode::REG_SEQUENCE
, dl
, VT
, Ops
);
1927 /// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
1928 /// of a NEON VLD or VST instruction. The supported values depend on the
1929 /// number of registers being loaded.
1930 SDValue
ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align
, const SDLoc
&dl
,
1931 unsigned NumVecs
, bool is64BitVector
) {
1932 unsigned NumRegs
= NumVecs
;
1933 if (!is64BitVector
&& NumVecs
< 3)
1936 unsigned Alignment
= cast
<ConstantSDNode
>(Align
)->getZExtValue();
1937 if (Alignment
>= 32 && NumRegs
== 4)
1939 else if (Alignment
>= 16 && (NumRegs
== 2 || NumRegs
== 4))
1941 else if (Alignment
>= 8)
1946 return CurDAG
->getTargetConstant(Alignment
, dl
, MVT::i32
);
1949 static bool isVLDfixed(unsigned Opc
)
1952 default: return false;
1953 case ARM::VLD1d8wb_fixed
: return true;
1954 case ARM::VLD1d16wb_fixed
: return true;
1955 case ARM::VLD1d64Qwb_fixed
: return true;
1956 case ARM::VLD1d32wb_fixed
: return true;
1957 case ARM::VLD1d64wb_fixed
: return true;
1958 case ARM::VLD1d8TPseudoWB_fixed
: return true;
1959 case ARM::VLD1d16TPseudoWB_fixed
: return true;
1960 case ARM::VLD1d32TPseudoWB_fixed
: return true;
1961 case ARM::VLD1d64TPseudoWB_fixed
: return true;
1962 case ARM::VLD1d8QPseudoWB_fixed
: return true;
1963 case ARM::VLD1d16QPseudoWB_fixed
: return true;
1964 case ARM::VLD1d32QPseudoWB_fixed
: return true;
1965 case ARM::VLD1d64QPseudoWB_fixed
: return true;
1966 case ARM::VLD1q8wb_fixed
: return true;
1967 case ARM::VLD1q16wb_fixed
: return true;
1968 case ARM::VLD1q32wb_fixed
: return true;
1969 case ARM::VLD1q64wb_fixed
: return true;
1970 case ARM::VLD1DUPd8wb_fixed
: return true;
1971 case ARM::VLD1DUPd16wb_fixed
: return true;
1972 case ARM::VLD1DUPd32wb_fixed
: return true;
1973 case ARM::VLD1DUPq8wb_fixed
: return true;
1974 case ARM::VLD1DUPq16wb_fixed
: return true;
1975 case ARM::VLD1DUPq32wb_fixed
: return true;
1976 case ARM::VLD2d8wb_fixed
: return true;
1977 case ARM::VLD2d16wb_fixed
: return true;
1978 case ARM::VLD2d32wb_fixed
: return true;
1979 case ARM::VLD2q8PseudoWB_fixed
: return true;
1980 case ARM::VLD2q16PseudoWB_fixed
: return true;
1981 case ARM::VLD2q32PseudoWB_fixed
: return true;
1982 case ARM::VLD2DUPd8wb_fixed
: return true;
1983 case ARM::VLD2DUPd16wb_fixed
: return true;
1984 case ARM::VLD2DUPd32wb_fixed
: return true;
1985 case ARM::VLD2DUPq8OddPseudoWB_fixed
: return true;
1986 case ARM::VLD2DUPq16OddPseudoWB_fixed
: return true;
1987 case ARM::VLD2DUPq32OddPseudoWB_fixed
: return true;
1991 static bool isVSTfixed(unsigned Opc
)
1994 default: return false;
1995 case ARM::VST1d8wb_fixed
: return true;
1996 case ARM::VST1d16wb_fixed
: return true;
1997 case ARM::VST1d32wb_fixed
: return true;
1998 case ARM::VST1d64wb_fixed
: return true;
1999 case ARM::VST1q8wb_fixed
: return true;
2000 case ARM::VST1q16wb_fixed
: return true;
2001 case ARM::VST1q32wb_fixed
: return true;
2002 case ARM::VST1q64wb_fixed
: return true;
2003 case ARM::VST1d8TPseudoWB_fixed
: return true;
2004 case ARM::VST1d16TPseudoWB_fixed
: return true;
2005 case ARM::VST1d32TPseudoWB_fixed
: return true;
2006 case ARM::VST1d64TPseudoWB_fixed
: return true;
2007 case ARM::VST1d8QPseudoWB_fixed
: return true;
2008 case ARM::VST1d16QPseudoWB_fixed
: return true;
2009 case ARM::VST1d32QPseudoWB_fixed
: return true;
2010 case ARM::VST1d64QPseudoWB_fixed
: return true;
2011 case ARM::VST2d8wb_fixed
: return true;
2012 case ARM::VST2d16wb_fixed
: return true;
2013 case ARM::VST2d32wb_fixed
: return true;
2014 case ARM::VST2q8PseudoWB_fixed
: return true;
2015 case ARM::VST2q16PseudoWB_fixed
: return true;
2016 case ARM::VST2q32PseudoWB_fixed
: return true;
2020 // Get the register stride update opcode of a VLD/VST instruction that
2021 // is otherwise equivalent to the given fixed stride updating instruction.
2022 static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc
) {
2023 assert((isVLDfixed(Opc
) || isVSTfixed(Opc
))
2024 && "Incorrect fixed stride updating instruction.");
2027 case ARM::VLD1d8wb_fixed
: return ARM::VLD1d8wb_register
;
2028 case ARM::VLD1d16wb_fixed
: return ARM::VLD1d16wb_register
;
2029 case ARM::VLD1d32wb_fixed
: return ARM::VLD1d32wb_register
;
2030 case ARM::VLD1d64wb_fixed
: return ARM::VLD1d64wb_register
;
2031 case ARM::VLD1q8wb_fixed
: return ARM::VLD1q8wb_register
;
2032 case ARM::VLD1q16wb_fixed
: return ARM::VLD1q16wb_register
;
2033 case ARM::VLD1q32wb_fixed
: return ARM::VLD1q32wb_register
;
2034 case ARM::VLD1q64wb_fixed
: return ARM::VLD1q64wb_register
;
2035 case ARM::VLD1d64Twb_fixed
: return ARM::VLD1d64Twb_register
;
2036 case ARM::VLD1d64Qwb_fixed
: return ARM::VLD1d64Qwb_register
;
2037 case ARM::VLD1d8TPseudoWB_fixed
: return ARM::VLD1d8TPseudoWB_register
;
2038 case ARM::VLD1d16TPseudoWB_fixed
: return ARM::VLD1d16TPseudoWB_register
;
2039 case ARM::VLD1d32TPseudoWB_fixed
: return ARM::VLD1d32TPseudoWB_register
;
2040 case ARM::VLD1d64TPseudoWB_fixed
: return ARM::VLD1d64TPseudoWB_register
;
2041 case ARM::VLD1d8QPseudoWB_fixed
: return ARM::VLD1d8QPseudoWB_register
;
2042 case ARM::VLD1d16QPseudoWB_fixed
: return ARM::VLD1d16QPseudoWB_register
;
2043 case ARM::VLD1d32QPseudoWB_fixed
: return ARM::VLD1d32QPseudoWB_register
;
2044 case ARM::VLD1d64QPseudoWB_fixed
: return ARM::VLD1d64QPseudoWB_register
;
2045 case ARM::VLD1DUPd8wb_fixed
: return ARM::VLD1DUPd8wb_register
;
2046 case ARM::VLD1DUPd16wb_fixed
: return ARM::VLD1DUPd16wb_register
;
2047 case ARM::VLD1DUPd32wb_fixed
: return ARM::VLD1DUPd32wb_register
;
2048 case ARM::VLD1DUPq8wb_fixed
: return ARM::VLD1DUPq8wb_register
;
2049 case ARM::VLD1DUPq16wb_fixed
: return ARM::VLD1DUPq16wb_register
;
2050 case ARM::VLD1DUPq32wb_fixed
: return ARM::VLD1DUPq32wb_register
;
2051 case ARM::VLD2DUPq8OddPseudoWB_fixed
: return ARM::VLD2DUPq8OddPseudoWB_register
;
2052 case ARM::VLD2DUPq16OddPseudoWB_fixed
: return ARM::VLD2DUPq16OddPseudoWB_register
;
2053 case ARM::VLD2DUPq32OddPseudoWB_fixed
: return ARM::VLD2DUPq32OddPseudoWB_register
;
2055 case ARM::VST1d8wb_fixed
: return ARM::VST1d8wb_register
;
2056 case ARM::VST1d16wb_fixed
: return ARM::VST1d16wb_register
;
2057 case ARM::VST1d32wb_fixed
: return ARM::VST1d32wb_register
;
2058 case ARM::VST1d64wb_fixed
: return ARM::VST1d64wb_register
;
2059 case ARM::VST1q8wb_fixed
: return ARM::VST1q8wb_register
;
2060 case ARM::VST1q16wb_fixed
: return ARM::VST1q16wb_register
;
2061 case ARM::VST1q32wb_fixed
: return ARM::VST1q32wb_register
;
2062 case ARM::VST1q64wb_fixed
: return ARM::VST1q64wb_register
;
2063 case ARM::VST1d8TPseudoWB_fixed
: return ARM::VST1d8TPseudoWB_register
;
2064 case ARM::VST1d16TPseudoWB_fixed
: return ARM::VST1d16TPseudoWB_register
;
2065 case ARM::VST1d32TPseudoWB_fixed
: return ARM::VST1d32TPseudoWB_register
;
2066 case ARM::VST1d64TPseudoWB_fixed
: return ARM::VST1d64TPseudoWB_register
;
2067 case ARM::VST1d8QPseudoWB_fixed
: return ARM::VST1d8QPseudoWB_register
;
2068 case ARM::VST1d16QPseudoWB_fixed
: return ARM::VST1d16QPseudoWB_register
;
2069 case ARM::VST1d32QPseudoWB_fixed
: return ARM::VST1d32QPseudoWB_register
;
2070 case ARM::VST1d64QPseudoWB_fixed
: return ARM::VST1d64QPseudoWB_register
;
2072 case ARM::VLD2d8wb_fixed
: return ARM::VLD2d8wb_register
;
2073 case ARM::VLD2d16wb_fixed
: return ARM::VLD2d16wb_register
;
2074 case ARM::VLD2d32wb_fixed
: return ARM::VLD2d32wb_register
;
2075 case ARM::VLD2q8PseudoWB_fixed
: return ARM::VLD2q8PseudoWB_register
;
2076 case ARM::VLD2q16PseudoWB_fixed
: return ARM::VLD2q16PseudoWB_register
;
2077 case ARM::VLD2q32PseudoWB_fixed
: return ARM::VLD2q32PseudoWB_register
;
2079 case ARM::VST2d8wb_fixed
: return ARM::VST2d8wb_register
;
2080 case ARM::VST2d16wb_fixed
: return ARM::VST2d16wb_register
;
2081 case ARM::VST2d32wb_fixed
: return ARM::VST2d32wb_register
;
2082 case ARM::VST2q8PseudoWB_fixed
: return ARM::VST2q8PseudoWB_register
;
2083 case ARM::VST2q16PseudoWB_fixed
: return ARM::VST2q16PseudoWB_register
;
2084 case ARM::VST2q32PseudoWB_fixed
: return ARM::VST2q32PseudoWB_register
;
2086 case ARM::VLD2DUPd8wb_fixed
: return ARM::VLD2DUPd8wb_register
;
2087 case ARM::VLD2DUPd16wb_fixed
: return ARM::VLD2DUPd16wb_register
;
2088 case ARM::VLD2DUPd32wb_fixed
: return ARM::VLD2DUPd32wb_register
;
2090 return Opc
; // If not one we handle, return it unchanged.
2093 /// Returns true if the given increment is a Constant known to be equal to the
2094 /// access size performed by a NEON load/store. This means the "[rN]!" form can
2096 static bool isPerfectIncrement(SDValue Inc
, EVT VecTy
, unsigned NumVecs
) {
2097 auto C
= dyn_cast
<ConstantSDNode
>(Inc
);
2098 return C
&& C
->getZExtValue() == VecTy
.getSizeInBits() / 8 * NumVecs
;
2101 void ARMDAGToDAGISel::SelectVLD(SDNode
*N
, bool isUpdating
, unsigned NumVecs
,
2102 const uint16_t *DOpcodes
,
2103 const uint16_t *QOpcodes0
,
2104 const uint16_t *QOpcodes1
) {
2105 assert(Subtarget
->hasNEON());
2106 assert(NumVecs
>= 1 && NumVecs
<= 4 && "VLD NumVecs out-of-range");
2109 SDValue MemAddr
, Align
;
2110 bool IsIntrinsic
= !isUpdating
; // By coincidence, all supported updating
2111 // nodes are not intrinsics.
2112 unsigned AddrOpIdx
= IsIntrinsic
? 2 : 1;
2113 if (!SelectAddrMode6(N
, N
->getOperand(AddrOpIdx
), MemAddr
, Align
))
2116 SDValue Chain
= N
->getOperand(0);
2117 EVT VT
= N
->getValueType(0);
2118 bool is64BitVector
= VT
.is64BitVector();
2119 Align
= GetVLDSTAlign(Align
, dl
, NumVecs
, is64BitVector
);
2121 unsigned OpcodeIndex
;
2122 switch (VT
.getSimpleVT().SimpleTy
) {
2123 default: llvm_unreachable("unhandled vld type");
2124 // Double-register operations:
2125 case MVT::v8i8
: OpcodeIndex
= 0; break;
2128 case MVT::v4i16
: OpcodeIndex
= 1; break;
2130 case MVT::v2i32
: OpcodeIndex
= 2; break;
2131 case MVT::v1i64
: OpcodeIndex
= 3; break;
2132 // Quad-register operations:
2133 case MVT::v16i8
: OpcodeIndex
= 0; break;
2136 case MVT::v8i16
: OpcodeIndex
= 1; break;
2138 case MVT::v4i32
: OpcodeIndex
= 2; break;
2140 case MVT::v2i64
: OpcodeIndex
= 3; break;
2147 unsigned ResTyElts
= (NumVecs
== 3) ? 4 : NumVecs
;
2150 ResTy
= EVT::getVectorVT(*CurDAG
->getContext(), MVT::i64
, ResTyElts
);
2152 std::vector
<EVT
> ResTys
;
2153 ResTys
.push_back(ResTy
);
2155 ResTys
.push_back(MVT::i32
);
2156 ResTys
.push_back(MVT::Other
);
2158 SDValue Pred
= getAL(CurDAG
, dl
);
2159 SDValue Reg0
= CurDAG
->getRegister(0, MVT::i32
);
2161 SmallVector
<SDValue
, 7> Ops
;
2163 // Double registers and VLD1/VLD2 quad registers are directly supported.
2164 if (is64BitVector
|| NumVecs
<= 2) {
2165 unsigned Opc
= (is64BitVector
? DOpcodes
[OpcodeIndex
] :
2166 QOpcodes0
[OpcodeIndex
]);
2167 Ops
.push_back(MemAddr
);
2168 Ops
.push_back(Align
);
2170 SDValue Inc
= N
->getOperand(AddrOpIdx
+ 1);
2171 bool IsImmUpdate
= isPerfectIncrement(Inc
, VT
, NumVecs
);
2173 // We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
2174 // check for the opcode rather than the number of vector elements.
2175 if (isVLDfixed(Opc
))
2176 Opc
= getVLDSTRegisterUpdateOpcode(Opc
);
2178 // VLD1/VLD2 fixed increment does not need Reg0 so only include it in
2179 // the operands if not such an opcode.
2180 } else if (!isVLDfixed(Opc
))
2181 Ops
.push_back(Reg0
);
2183 Ops
.push_back(Pred
);
2184 Ops
.push_back(Reg0
);
2185 Ops
.push_back(Chain
);
2186 VLd
= CurDAG
->getMachineNode(Opc
, dl
, ResTys
, Ops
);
2189 // Otherwise, quad registers are loaded with two separate instructions,
2190 // where one loads the even registers and the other loads the odd registers.
2191 EVT AddrTy
= MemAddr
.getValueType();
2193 // Load the even subregs. This is always an updating load, so that it
2194 // provides the address to the second load for the odd subregs.
2196 SDValue(CurDAG
->getMachineNode(TargetOpcode::IMPLICIT_DEF
, dl
, ResTy
), 0);
2197 const SDValue OpsA
[] = { MemAddr
, Align
, Reg0
, ImplDef
, Pred
, Reg0
, Chain
};
2198 SDNode
*VLdA
= CurDAG
->getMachineNode(QOpcodes0
[OpcodeIndex
], dl
,
2199 ResTy
, AddrTy
, MVT::Other
, OpsA
);
2200 Chain
= SDValue(VLdA
, 2);
2202 // Load the odd subregs.
2203 Ops
.push_back(SDValue(VLdA
, 1));
2204 Ops
.push_back(Align
);
2206 SDValue Inc
= N
->getOperand(AddrOpIdx
+ 1);
2207 assert(isa
<ConstantSDNode
>(Inc
.getNode()) &&
2208 "only constant post-increment update allowed for VLD3/4");
2210 Ops
.push_back(Reg0
);
2212 Ops
.push_back(SDValue(VLdA
, 0));
2213 Ops
.push_back(Pred
);
2214 Ops
.push_back(Reg0
);
2215 Ops
.push_back(Chain
);
2216 VLd
= CurDAG
->getMachineNode(QOpcodes1
[OpcodeIndex
], dl
, ResTys
, Ops
);
2219 // Transfer memoperands.
2220 MachineMemOperand
*MemOp
= cast
<MemIntrinsicSDNode
>(N
)->getMemOperand();
2221 CurDAG
->setNodeMemRefs(cast
<MachineSDNode
>(VLd
), {MemOp
});
2224 ReplaceNode(N
, VLd
);
2228 // Extract out the subregisters.
2229 SDValue SuperReg
= SDValue(VLd
, 0);
2230 static_assert(ARM::dsub_7
== ARM::dsub_0
+ 7 &&
2231 ARM::qsub_3
== ARM::qsub_0
+ 3,
2232 "Unexpected subreg numbering");
2233 unsigned Sub0
= (is64BitVector
? ARM::dsub_0
: ARM::qsub_0
);
2234 for (unsigned Vec
= 0; Vec
< NumVecs
; ++Vec
)
2235 ReplaceUses(SDValue(N
, Vec
),
2236 CurDAG
->getTargetExtractSubreg(Sub0
+ Vec
, dl
, VT
, SuperReg
));
2237 ReplaceUses(SDValue(N
, NumVecs
), SDValue(VLd
, 1));
2239 ReplaceUses(SDValue(N
, NumVecs
+ 1), SDValue(VLd
, 2));
2240 CurDAG
->RemoveDeadNode(N
);
2243 void ARMDAGToDAGISel::SelectVST(SDNode
*N
, bool isUpdating
, unsigned NumVecs
,
2244 const uint16_t *DOpcodes
,
2245 const uint16_t *QOpcodes0
,
2246 const uint16_t *QOpcodes1
) {
2247 assert(Subtarget
->hasNEON());
2248 assert(NumVecs
>= 1 && NumVecs
<= 4 && "VST NumVecs out-of-range");
2251 SDValue MemAddr
, Align
;
2252 bool IsIntrinsic
= !isUpdating
; // By coincidence, all supported updating
2253 // nodes are not intrinsics.
2254 unsigned AddrOpIdx
= IsIntrinsic
? 2 : 1;
2255 unsigned Vec0Idx
= 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2256 if (!SelectAddrMode6(N
, N
->getOperand(AddrOpIdx
), MemAddr
, Align
))
2259 MachineMemOperand
*MemOp
= cast
<MemIntrinsicSDNode
>(N
)->getMemOperand();
2261 SDValue Chain
= N
->getOperand(0);
2262 EVT VT
= N
->getOperand(Vec0Idx
).getValueType();
2263 bool is64BitVector
= VT
.is64BitVector();
2264 Align
= GetVLDSTAlign(Align
, dl
, NumVecs
, is64BitVector
);
2266 unsigned OpcodeIndex
;
2267 switch (VT
.getSimpleVT().SimpleTy
) {
2268 default: llvm_unreachable("unhandled vst type");
2269 // Double-register operations:
2270 case MVT::v8i8
: OpcodeIndex
= 0; break;
2273 case MVT::v4i16
: OpcodeIndex
= 1; break;
2275 case MVT::v2i32
: OpcodeIndex
= 2; break;
2276 case MVT::v1i64
: OpcodeIndex
= 3; break;
2277 // Quad-register operations:
2278 case MVT::v16i8
: OpcodeIndex
= 0; break;
2281 case MVT::v8i16
: OpcodeIndex
= 1; break;
2283 case MVT::v4i32
: OpcodeIndex
= 2; break;
2285 case MVT::v2i64
: OpcodeIndex
= 3; break;
2288 std::vector
<EVT
> ResTys
;
2290 ResTys
.push_back(MVT::i32
);
2291 ResTys
.push_back(MVT::Other
);
2293 SDValue Pred
= getAL(CurDAG
, dl
);
2294 SDValue Reg0
= CurDAG
->getRegister(0, MVT::i32
);
2295 SmallVector
<SDValue
, 7> Ops
;
2297 // Double registers and VST1/VST2 quad registers are directly supported.
2298 if (is64BitVector
|| NumVecs
<= 2) {
2301 SrcReg
= N
->getOperand(Vec0Idx
);
2302 } else if (is64BitVector
) {
2303 // Form a REG_SEQUENCE to force register allocation.
2304 SDValue V0
= N
->getOperand(Vec0Idx
+ 0);
2305 SDValue V1
= N
->getOperand(Vec0Idx
+ 1);
2307 SrcReg
= SDValue(createDRegPairNode(MVT::v2i64
, V0
, V1
), 0);
2309 SDValue V2
= N
->getOperand(Vec0Idx
+ 2);
2310 // If it's a vst3, form a quad D-register and leave the last part as
2312 SDValue V3
= (NumVecs
== 3)
2313 ? SDValue(CurDAG
->getMachineNode(TargetOpcode::IMPLICIT_DEF
,dl
,VT
), 0)
2314 : N
->getOperand(Vec0Idx
+ 3);
2315 SrcReg
= SDValue(createQuadDRegsNode(MVT::v4i64
, V0
, V1
, V2
, V3
), 0);
2318 // Form a QQ register.
2319 SDValue Q0
= N
->getOperand(Vec0Idx
);
2320 SDValue Q1
= N
->getOperand(Vec0Idx
+ 1);
2321 SrcReg
= SDValue(createQRegPairNode(MVT::v4i64
, Q0
, Q1
), 0);
2324 unsigned Opc
= (is64BitVector
? DOpcodes
[OpcodeIndex
] :
2325 QOpcodes0
[OpcodeIndex
]);
2326 Ops
.push_back(MemAddr
);
2327 Ops
.push_back(Align
);
2329 SDValue Inc
= N
->getOperand(AddrOpIdx
+ 1);
2330 bool IsImmUpdate
= isPerfectIncrement(Inc
, VT
, NumVecs
);
2332 // We use a VST1 for v1i64 even if the pseudo says VST2/3/4, so
2333 // check for the opcode rather than the number of vector elements.
2334 if (isVSTfixed(Opc
))
2335 Opc
= getVLDSTRegisterUpdateOpcode(Opc
);
2338 // VST1/VST2 fixed increment does not need Reg0 so only include it in
2339 // the operands if not such an opcode.
2340 else if (!isVSTfixed(Opc
))
2341 Ops
.push_back(Reg0
);
2343 Ops
.push_back(SrcReg
);
2344 Ops
.push_back(Pred
);
2345 Ops
.push_back(Reg0
);
2346 Ops
.push_back(Chain
);
2347 SDNode
*VSt
= CurDAG
->getMachineNode(Opc
, dl
, ResTys
, Ops
);
2349 // Transfer memoperands.
2350 CurDAG
->setNodeMemRefs(cast
<MachineSDNode
>(VSt
), {MemOp
});
2352 ReplaceNode(N
, VSt
);
2356 // Otherwise, quad registers are stored with two separate instructions,
2357 // where one stores the even registers and the other stores the odd registers.
2359 // Form the QQQQ REG_SEQUENCE.
2360 SDValue V0
= N
->getOperand(Vec0Idx
+ 0);
2361 SDValue V1
= N
->getOperand(Vec0Idx
+ 1);
2362 SDValue V2
= N
->getOperand(Vec0Idx
+ 2);
2363 SDValue V3
= (NumVecs
== 3)
2364 ? SDValue(CurDAG
->getMachineNode(TargetOpcode::IMPLICIT_DEF
, dl
, VT
), 0)
2365 : N
->getOperand(Vec0Idx
+ 3);
2366 SDValue RegSeq
= SDValue(createQuadQRegsNode(MVT::v8i64
, V0
, V1
, V2
, V3
), 0);
2368 // Store the even D registers. This is always an updating store, so that it
2369 // provides the address to the second store for the odd subregs.
2370 const SDValue OpsA
[] = { MemAddr
, Align
, Reg0
, RegSeq
, Pred
, Reg0
, Chain
};
2371 SDNode
*VStA
= CurDAG
->getMachineNode(QOpcodes0
[OpcodeIndex
], dl
,
2372 MemAddr
.getValueType(),
2374 CurDAG
->setNodeMemRefs(cast
<MachineSDNode
>(VStA
), {MemOp
});
2375 Chain
= SDValue(VStA
, 1);
2377 // Store the odd D registers.
2378 Ops
.push_back(SDValue(VStA
, 0));
2379 Ops
.push_back(Align
);
2381 SDValue Inc
= N
->getOperand(AddrOpIdx
+ 1);
2382 assert(isa
<ConstantSDNode
>(Inc
.getNode()) &&
2383 "only constant post-increment update allowed for VST3/4");
2385 Ops
.push_back(Reg0
);
2387 Ops
.push_back(RegSeq
);
2388 Ops
.push_back(Pred
);
2389 Ops
.push_back(Reg0
);
2390 Ops
.push_back(Chain
);
2391 SDNode
*VStB
= CurDAG
->getMachineNode(QOpcodes1
[OpcodeIndex
], dl
, ResTys
,
2393 CurDAG
->setNodeMemRefs(cast
<MachineSDNode
>(VStB
), {MemOp
});
2394 ReplaceNode(N
, VStB
);
2397 void ARMDAGToDAGISel::SelectVLDSTLane(SDNode
*N
, bool IsLoad
, bool isUpdating
,
2399 const uint16_t *DOpcodes
,
2400 const uint16_t *QOpcodes
) {
2401 assert(Subtarget
->hasNEON());
2402 assert(NumVecs
>=2 && NumVecs
<= 4 && "VLDSTLane NumVecs out-of-range");
2405 SDValue MemAddr
, Align
;
2406 bool IsIntrinsic
= !isUpdating
; // By coincidence, all supported updating
2407 // nodes are not intrinsics.
2408 unsigned AddrOpIdx
= IsIntrinsic
? 2 : 1;
2409 unsigned Vec0Idx
= 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2410 if (!SelectAddrMode6(N
, N
->getOperand(AddrOpIdx
), MemAddr
, Align
))
2413 MachineMemOperand
*MemOp
= cast
<MemIntrinsicSDNode
>(N
)->getMemOperand();
2415 SDValue Chain
= N
->getOperand(0);
2417 cast
<ConstantSDNode
>(N
->getOperand(Vec0Idx
+ NumVecs
))->getZExtValue();
2418 EVT VT
= N
->getOperand(Vec0Idx
).getValueType();
2419 bool is64BitVector
= VT
.is64BitVector();
2421 unsigned Alignment
= 0;
2423 Alignment
= cast
<ConstantSDNode
>(Align
)->getZExtValue();
2424 unsigned NumBytes
= NumVecs
* VT
.getScalarSizeInBits() / 8;
2425 if (Alignment
> NumBytes
)
2426 Alignment
= NumBytes
;
2427 if (Alignment
< 8 && Alignment
< NumBytes
)
2429 // Alignment must be a power of two; make sure of that.
2430 Alignment
= (Alignment
& -Alignment
);
2434 Align
= CurDAG
->getTargetConstant(Alignment
, dl
, MVT::i32
);
2436 unsigned OpcodeIndex
;
2437 switch (VT
.getSimpleVT().SimpleTy
) {
2438 default: llvm_unreachable("unhandled vld/vst lane type");
2439 // Double-register operations:
2440 case MVT::v8i8
: OpcodeIndex
= 0; break;
2443 case MVT::v4i16
: OpcodeIndex
= 1; break;
2445 case MVT::v2i32
: OpcodeIndex
= 2; break;
2446 // Quad-register operations:
2449 case MVT::v8i16
: OpcodeIndex
= 0; break;
2451 case MVT::v4i32
: OpcodeIndex
= 1; break;
2454 std::vector
<EVT
> ResTys
;
2456 unsigned ResTyElts
= (NumVecs
== 3) ? 4 : NumVecs
;
2459 ResTys
.push_back(EVT::getVectorVT(*CurDAG
->getContext(),
2460 MVT::i64
, ResTyElts
));
2463 ResTys
.push_back(MVT::i32
);
2464 ResTys
.push_back(MVT::Other
);
2466 SDValue Pred
= getAL(CurDAG
, dl
);
2467 SDValue Reg0
= CurDAG
->getRegister(0, MVT::i32
);
2469 SmallVector
<SDValue
, 8> Ops
;
2470 Ops
.push_back(MemAddr
);
2471 Ops
.push_back(Align
);
2473 SDValue Inc
= N
->getOperand(AddrOpIdx
+ 1);
2475 isPerfectIncrement(Inc
, VT
.getVectorElementType(), NumVecs
);
2476 Ops
.push_back(IsImmUpdate
? Reg0
: Inc
);
2480 SDValue V0
= N
->getOperand(Vec0Idx
+ 0);
2481 SDValue V1
= N
->getOperand(Vec0Idx
+ 1);
2484 SuperReg
= SDValue(createDRegPairNode(MVT::v2i64
, V0
, V1
), 0);
2486 SuperReg
= SDValue(createQRegPairNode(MVT::v4i64
, V0
, V1
), 0);
2488 SDValue V2
= N
->getOperand(Vec0Idx
+ 2);
2489 SDValue V3
= (NumVecs
== 3)
2490 ? SDValue(CurDAG
->getMachineNode(TargetOpcode::IMPLICIT_DEF
, dl
, VT
), 0)
2491 : N
->getOperand(Vec0Idx
+ 3);
2493 SuperReg
= SDValue(createQuadDRegsNode(MVT::v4i64
, V0
, V1
, V2
, V3
), 0);
2495 SuperReg
= SDValue(createQuadQRegsNode(MVT::v8i64
, V0
, V1
, V2
, V3
), 0);
2497 Ops
.push_back(SuperReg
);
2498 Ops
.push_back(getI32Imm(Lane
, dl
));
2499 Ops
.push_back(Pred
);
2500 Ops
.push_back(Reg0
);
2501 Ops
.push_back(Chain
);
2503 unsigned Opc
= (is64BitVector
? DOpcodes
[OpcodeIndex
] :
2504 QOpcodes
[OpcodeIndex
]);
2505 SDNode
*VLdLn
= CurDAG
->getMachineNode(Opc
, dl
, ResTys
, Ops
);
2506 CurDAG
->setNodeMemRefs(cast
<MachineSDNode
>(VLdLn
), {MemOp
});
2508 ReplaceNode(N
, VLdLn
);
2512 // Extract the subregisters.
2513 SuperReg
= SDValue(VLdLn
, 0);
2514 static_assert(ARM::dsub_7
== ARM::dsub_0
+ 7 &&
2515 ARM::qsub_3
== ARM::qsub_0
+ 3,
2516 "Unexpected subreg numbering");
2517 unsigned Sub0
= is64BitVector
? ARM::dsub_0
: ARM::qsub_0
;
2518 for (unsigned Vec
= 0; Vec
< NumVecs
; ++Vec
)
2519 ReplaceUses(SDValue(N
, Vec
),
2520 CurDAG
->getTargetExtractSubreg(Sub0
+ Vec
, dl
, VT
, SuperReg
));
2521 ReplaceUses(SDValue(N
, NumVecs
), SDValue(VLdLn
, 1));
2523 ReplaceUses(SDValue(N
, NumVecs
+ 1), SDValue(VLdLn
, 2));
2524 CurDAG
->RemoveDeadNode(N
);
2527 template <typename SDValueVector
>
2528 void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector
&Ops
, SDLoc Loc
,
2529 SDValue PredicateMask
) {
2530 Ops
.push_back(CurDAG
->getTargetConstant(ARMVCC::Then
, Loc
, MVT::i32
));
2531 Ops
.push_back(PredicateMask
);
2534 template <typename SDValueVector
>
2535 void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector
&Ops
, SDLoc Loc
,
2536 SDValue PredicateMask
,
2538 Ops
.push_back(CurDAG
->getTargetConstant(ARMVCC::Then
, Loc
, MVT::i32
));
2539 Ops
.push_back(PredicateMask
);
2540 Ops
.push_back(Inactive
);
2543 template <typename SDValueVector
>
2544 void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector
&Ops
, SDLoc Loc
) {
2545 Ops
.push_back(CurDAG
->getTargetConstant(ARMVCC::None
, Loc
, MVT::i32
));
2546 Ops
.push_back(CurDAG
->getRegister(0, MVT::i32
));
2549 template <typename SDValueVector
>
2550 void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector
&Ops
, SDLoc Loc
,
2552 Ops
.push_back(CurDAG
->getTargetConstant(ARMVCC::None
, Loc
, MVT::i32
));
2553 Ops
.push_back(CurDAG
->getRegister(0, MVT::i32
));
2554 Ops
.push_back(SDValue(
2555 CurDAG
->getMachineNode(TargetOpcode::IMPLICIT_DEF
, Loc
, InactiveTy
), 0));
2558 void ARMDAGToDAGISel::SelectMVE_WB(SDNode
*N
, const uint16_t *Opcodes
,
2561 SmallVector
<SDValue
, 8> Ops
;
2564 switch (N
->getValueType(1).getVectorElementType().getSizeInBits()) {
2566 Opcode
= Opcodes
[0];
2569 Opcode
= Opcodes
[1];
2572 llvm_unreachable("bad vector element size in SelectMVE_WB");
2575 Ops
.push_back(N
->getOperand(2)); // vector of base addresses
2577 int32_t ImmValue
= cast
<ConstantSDNode
>(N
->getOperand(3))->getZExtValue();
2578 Ops
.push_back(getI32Imm(ImmValue
, Loc
)); // immediate offset
2581 AddMVEPredicateToOps(Ops
, Loc
, N
->getOperand(4));
2583 AddEmptyMVEPredicateToOps(Ops
, Loc
);
2585 Ops
.push_back(N
->getOperand(0)); // chain
2587 SmallVector
<EVT
, 8> VTs
;
2588 VTs
.push_back(N
->getValueType(1));
2589 VTs
.push_back(N
->getValueType(0));
2590 VTs
.push_back(N
->getValueType(2));
2592 SDNode
*New
= CurDAG
->getMachineNode(Opcode
, SDLoc(N
), VTs
, Ops
);
2593 ReplaceUses(SDValue(N
, 0), SDValue(New
, 1));
2594 ReplaceUses(SDValue(N
, 1), SDValue(New
, 0));
2595 ReplaceUses(SDValue(N
, 2), SDValue(New
, 2));
2596 transferMemOperands(N
, New
);
2597 CurDAG
->RemoveDeadNode(N
);
2600 void ARMDAGToDAGISel::SelectMVE_LongShift(SDNode
*N
, uint16_t Opcode
,
2602 bool HasSaturationOperand
) {
2604 SmallVector
<SDValue
, 8> Ops
;
2606 // Two 32-bit halves of the value to be shifted
2607 Ops
.push_back(N
->getOperand(1));
2608 Ops
.push_back(N
->getOperand(2));
2612 int32_t ImmValue
= cast
<ConstantSDNode
>(N
->getOperand(3))->getZExtValue();
2613 Ops
.push_back(getI32Imm(ImmValue
, Loc
)); // immediate shift count
2615 Ops
.push_back(N
->getOperand(3));
2618 // The immediate saturation operand, if any
2619 if (HasSaturationOperand
) {
2620 int32_t SatOp
= cast
<ConstantSDNode
>(N
->getOperand(4))->getZExtValue();
2621 int SatBit
= (SatOp
== 64 ? 0 : 1);
2622 Ops
.push_back(getI32Imm(SatBit
, Loc
));
2625 // MVE scalar shifts are IT-predicable, so include the standard
2626 // predicate arguments.
2627 Ops
.push_back(getAL(CurDAG
, Loc
));
2628 Ops
.push_back(CurDAG
->getRegister(0, MVT::i32
));
2630 CurDAG
->SelectNodeTo(N
, Opcode
, N
->getVTList(), makeArrayRef(Ops
));
2633 void ARMDAGToDAGISel::SelectMVE_VADCSBC(SDNode
*N
, uint16_t OpcodeWithCarry
,
2634 uint16_t OpcodeWithNoCarry
,
2635 bool Add
, bool Predicated
) {
2637 SmallVector
<SDValue
, 8> Ops
;
2640 unsigned FirstInputOp
= Predicated
? 2 : 1;
2642 // Two input vectors and the input carry flag
2643 Ops
.push_back(N
->getOperand(FirstInputOp
));
2644 Ops
.push_back(N
->getOperand(FirstInputOp
+ 1));
2645 SDValue CarryIn
= N
->getOperand(FirstInputOp
+ 2);
2646 ConstantSDNode
*CarryInConstant
= dyn_cast
<ConstantSDNode
>(CarryIn
);
2647 uint32_t CarryMask
= 1 << 29;
2648 uint32_t CarryExpected
= Add
? 0 : CarryMask
;
2649 if (CarryInConstant
&&
2650 (CarryInConstant
->getZExtValue() & CarryMask
) == CarryExpected
) {
2651 Opcode
= OpcodeWithNoCarry
;
2653 Ops
.push_back(CarryIn
);
2654 Opcode
= OpcodeWithCarry
;
2658 AddMVEPredicateToOps(Ops
, Loc
,
2659 N
->getOperand(FirstInputOp
+ 3), // predicate
2660 N
->getOperand(FirstInputOp
- 1)); // inactive
2662 AddEmptyMVEPredicateToOps(Ops
, Loc
, N
->getValueType(0));
2664 CurDAG
->SelectNodeTo(N
, Opcode
, N
->getVTList(), makeArrayRef(Ops
));
2667 void ARMDAGToDAGISel::SelectMVE_VSHLC(SDNode
*N
, bool Predicated
) {
2669 SmallVector
<SDValue
, 8> Ops
;
2671 // One vector input, followed by a 32-bit word of bits to shift in
2672 // and then an immediate shift count
2673 Ops
.push_back(N
->getOperand(1));
2674 Ops
.push_back(N
->getOperand(2));
2675 int32_t ImmValue
= cast
<ConstantSDNode
>(N
->getOperand(3))->getZExtValue();
2676 Ops
.push_back(getI32Imm(ImmValue
, Loc
)); // immediate shift count
2679 AddMVEPredicateToOps(Ops
, Loc
, N
->getOperand(4));
2681 AddEmptyMVEPredicateToOps(Ops
, Loc
);
2683 CurDAG
->SelectNodeTo(N
, ARM::MVE_VSHLC
, N
->getVTList(), makeArrayRef(Ops
));
2686 static bool SDValueToConstBool(SDValue SDVal
) {
2687 assert(isa
<ConstantSDNode
>(SDVal
) && "expected a compile-time constant");
2688 ConstantSDNode
*SDValConstant
= dyn_cast
<ConstantSDNode
>(SDVal
);
2689 uint64_t Value
= SDValConstant
->getZExtValue();
2690 assert((Value
== 0 || Value
== 1) && "expected value 0 or 1");
2694 void ARMDAGToDAGISel::SelectBaseMVE_VMLLDAV(SDNode
*N
, bool Predicated
,
2695 const uint16_t *OpcodesS
,
2696 const uint16_t *OpcodesU
,
2697 size_t Stride
, size_t TySize
) {
2698 assert(TySize
< Stride
&& "Invalid TySize");
2699 bool IsUnsigned
= SDValueToConstBool(N
->getOperand(1));
2700 bool IsSub
= SDValueToConstBool(N
->getOperand(2));
2701 bool IsExchange
= SDValueToConstBool(N
->getOperand(3));
2704 "Unsigned versions of vmlsldav[a]/vrmlsldavh[a] do not exist");
2705 assert(!IsExchange
&&
2706 "Unsigned versions of vmlaldav[a]x/vrmlaldavh[a]x do not exist");
2709 auto OpIsZero
= [N
](size_t OpNo
) {
2710 if (ConstantSDNode
*OpConst
= dyn_cast
<ConstantSDNode
>(N
->getOperand(OpNo
)))
2711 if (OpConst
->getZExtValue() == 0)
2716 // If the input accumulator value is not zero, select an instruction with
2717 // accumulator, otherwise select an instruction without accumulator
2718 bool IsAccum
= !(OpIsZero(4) && OpIsZero(5));
2720 const uint16_t *Opcodes
= IsUnsigned
? OpcodesU
: OpcodesS
;
2722 Opcodes
+= 4 * Stride
;
2724 Opcodes
+= 2 * Stride
;
2727 uint16_t Opcode
= Opcodes
[TySize
];
2730 SmallVector
<SDValue
, 8> Ops
;
2731 // Push the accumulator operands, if they are used
2733 Ops
.push_back(N
->getOperand(4));
2734 Ops
.push_back(N
->getOperand(5));
2736 // Push the two vector operands
2737 Ops
.push_back(N
->getOperand(6));
2738 Ops
.push_back(N
->getOperand(7));
2741 AddMVEPredicateToOps(Ops
, Loc
, N
->getOperand(8));
2743 AddEmptyMVEPredicateToOps(Ops
, Loc
);
2745 CurDAG
->SelectNodeTo(N
, Opcode
, N
->getVTList(), makeArrayRef(Ops
));
2748 void ARMDAGToDAGISel::SelectMVE_VMLLDAV(SDNode
*N
, bool Predicated
,
2749 const uint16_t *OpcodesS
,
2750 const uint16_t *OpcodesU
) {
2751 EVT VecTy
= N
->getOperand(6).getValueType();
2753 switch (VecTy
.getVectorElementType().getSizeInBits()) {
2761 llvm_unreachable("bad vector element size");
2764 SelectBaseMVE_VMLLDAV(N
, Predicated
, OpcodesS
, OpcodesU
, 2, SizeIndex
);
2767 void ARMDAGToDAGISel::SelectMVE_VRMLLDAVH(SDNode
*N
, bool Predicated
,
2768 const uint16_t *OpcodesS
,
2769 const uint16_t *OpcodesU
) {
2771 N
->getOperand(6).getValueType().getVectorElementType().getSizeInBits() ==
2773 "bad vector element size");
2774 SelectBaseMVE_VMLLDAV(N
, Predicated
, OpcodesS
, OpcodesU
, 1, 0);
2777 void ARMDAGToDAGISel::SelectMVE_VLD(SDNode
*N
, unsigned NumVecs
,
2778 const uint16_t *const *Opcodes
,
2779 bool HasWriteback
) {
2780 EVT VT
= N
->getValueType(0);
2783 const uint16_t *OurOpcodes
;
2784 switch (VT
.getVectorElementType().getSizeInBits()) {
2786 OurOpcodes
= Opcodes
[0];
2789 OurOpcodes
= Opcodes
[1];
2792 OurOpcodes
= Opcodes
[2];
2795 llvm_unreachable("bad vector element size in SelectMVE_VLD");
2798 EVT DataTy
= EVT::getVectorVT(*CurDAG
->getContext(), MVT::i64
, NumVecs
* 2);
2799 SmallVector
<EVT
, 4> ResultTys
= {DataTy
, MVT::Other
};
2800 unsigned PtrOperand
= HasWriteback
? 1 : 2;
2802 auto Data
= SDValue(
2803 CurDAG
->getMachineNode(TargetOpcode::IMPLICIT_DEF
, Loc
, DataTy
), 0);
2804 SDValue Chain
= N
->getOperand(0);
2805 // Add a MVE_VLDn instruction for each Vec, except the last
2806 for (unsigned Stage
= 0; Stage
< NumVecs
- 1; ++Stage
) {
2807 SDValue Ops
[] = {Data
, N
->getOperand(PtrOperand
), Chain
};
2809 CurDAG
->getMachineNode(OurOpcodes
[Stage
], Loc
, ResultTys
, Ops
);
2810 Data
= SDValue(LoadInst
, 0);
2811 Chain
= SDValue(LoadInst
, 1);
2812 transferMemOperands(N
, LoadInst
);
2814 // The last may need a writeback on it
2816 ResultTys
= {DataTy
, MVT::i32
, MVT::Other
};
2817 SDValue Ops
[] = {Data
, N
->getOperand(PtrOperand
), Chain
};
2819 CurDAG
->getMachineNode(OurOpcodes
[NumVecs
- 1], Loc
, ResultTys
, Ops
);
2820 transferMemOperands(N
, LoadInst
);
2823 for (i
= 0; i
< NumVecs
; i
++)
2824 ReplaceUses(SDValue(N
, i
),
2825 CurDAG
->getTargetExtractSubreg(ARM::qsub_0
+ i
, Loc
, VT
,
2826 SDValue(LoadInst
, 0)));
2828 ReplaceUses(SDValue(N
, i
++), SDValue(LoadInst
, 1));
2829 ReplaceUses(SDValue(N
, i
), SDValue(LoadInst
, HasWriteback
? 2 : 1));
2830 CurDAG
->RemoveDeadNode(N
);
2833 void ARMDAGToDAGISel::SelectMVE_VxDUP(SDNode
*N
, const uint16_t *Opcodes
,
2834 bool Wrapping
, bool Predicated
) {
2835 EVT VT
= N
->getValueType(0);
2839 switch (VT
.getScalarSizeInBits()) {
2841 Opcode
= Opcodes
[0];
2844 Opcode
= Opcodes
[1];
2847 Opcode
= Opcodes
[2];
2850 llvm_unreachable("bad vector element size in SelectMVE_VxDUP");
2853 SmallVector
<SDValue
, 8> Ops
;
2858 Inactive
= N
->getOperand(OpIdx
++);
2860 Ops
.push_back(N
->getOperand(OpIdx
++)); // base
2862 Ops
.push_back(N
->getOperand(OpIdx
++)); // limit
2864 SDValue ImmOp
= N
->getOperand(OpIdx
++); // step
2865 int ImmValue
= cast
<ConstantSDNode
>(ImmOp
)->getZExtValue();
2866 Ops
.push_back(getI32Imm(ImmValue
, Loc
));
2869 AddMVEPredicateToOps(Ops
, Loc
, N
->getOperand(OpIdx
), Inactive
);
2871 AddEmptyMVEPredicateToOps(Ops
, Loc
, N
->getValueType(0));
2873 CurDAG
->SelectNodeTo(N
, Opcode
, N
->getVTList(), makeArrayRef(Ops
));
2876 void ARMDAGToDAGISel::SelectCDE_CXxD(SDNode
*N
, uint16_t Opcode
,
2877 size_t NumExtraOps
, bool HasAccum
) {
2878 bool IsBigEndian
= CurDAG
->getDataLayout().isBigEndian();
2880 SmallVector
<SDValue
, 8> Ops
;
2884 // Convert and append the immediate operand designating the coprocessor.
2885 SDValue ImmCorpoc
= N
->getOperand(OpIdx
++);
2886 uint32_t ImmCoprocVal
= cast
<ConstantSDNode
>(ImmCorpoc
)->getZExtValue();
2887 Ops
.push_back(getI32Imm(ImmCoprocVal
, Loc
));
2889 // For accumulating variants copy the low and high order parts of the
2890 // accumulator into a register pair and add it to the operand vector.
2892 SDValue AccLo
= N
->getOperand(OpIdx
++);
2893 SDValue AccHi
= N
->getOperand(OpIdx
++);
2895 std::swap(AccLo
, AccHi
);
2896 Ops
.push_back(SDValue(createGPRPairNode(MVT::Untyped
, AccLo
, AccHi
), 0));
2899 // Copy extra operands as-is.
2900 for (size_t I
= 0; I
< NumExtraOps
; I
++)
2901 Ops
.push_back(N
->getOperand(OpIdx
++));
2903 // Convert and append the immediate operand
2904 SDValue Imm
= N
->getOperand(OpIdx
);
2905 uint32_t ImmVal
= cast
<ConstantSDNode
>(Imm
)->getZExtValue();
2906 Ops
.push_back(getI32Imm(ImmVal
, Loc
));
2908 // Accumulating variants are IT-predicable, add predicate operands.
2910 SDValue Pred
= getAL(CurDAG
, Loc
);
2911 SDValue PredReg
= CurDAG
->getRegister(0, MVT::i32
);
2912 Ops
.push_back(Pred
);
2913 Ops
.push_back(PredReg
);
2916 // Create the CDE intruction
2917 SDNode
*InstrNode
= CurDAG
->getMachineNode(Opcode
, Loc
, MVT::Untyped
, Ops
);
2918 SDValue ResultPair
= SDValue(InstrNode
, 0);
2920 // The original intrinsic had two outputs, and the output of the dual-register
2921 // CDE instruction is a register pair. We need to extract the two subregisters
2922 // and replace all uses of the original outputs with the extracted
2924 uint16_t SubRegs
[2] = {ARM::gsub_0
, ARM::gsub_1
};
2926 std::swap(SubRegs
[0], SubRegs
[1]);
2928 for (size_t ResIdx
= 0; ResIdx
< 2; ResIdx
++) {
2929 if (SDValue(N
, ResIdx
).use_empty())
2931 SDValue SubReg
= CurDAG
->getTargetExtractSubreg(SubRegs
[ResIdx
], Loc
,
2932 MVT::i32
, ResultPair
);
2933 ReplaceUses(SDValue(N
, ResIdx
), SubReg
);
2936 CurDAG
->RemoveDeadNode(N
);
2939 void ARMDAGToDAGISel::SelectVLDDup(SDNode
*N
, bool IsIntrinsic
,
2940 bool isUpdating
, unsigned NumVecs
,
2941 const uint16_t *DOpcodes
,
2942 const uint16_t *QOpcodes0
,
2943 const uint16_t *QOpcodes1
) {
2944 assert(Subtarget
->hasNEON());
2945 assert(NumVecs
>= 1 && NumVecs
<= 4 && "VLDDup NumVecs out-of-range");
2948 SDValue MemAddr
, Align
;
2949 unsigned AddrOpIdx
= IsIntrinsic
? 2 : 1;
2950 if (!SelectAddrMode6(N
, N
->getOperand(AddrOpIdx
), MemAddr
, Align
))
2953 SDValue Chain
= N
->getOperand(0);
2954 EVT VT
= N
->getValueType(0);
2955 bool is64BitVector
= VT
.is64BitVector();
2957 unsigned Alignment
= 0;
2959 Alignment
= cast
<ConstantSDNode
>(Align
)->getZExtValue();
2960 unsigned NumBytes
= NumVecs
* VT
.getScalarSizeInBits() / 8;
2961 if (Alignment
> NumBytes
)
2962 Alignment
= NumBytes
;
2963 if (Alignment
< 8 && Alignment
< NumBytes
)
2965 // Alignment must be a power of two; make sure of that.
2966 Alignment
= (Alignment
& -Alignment
);
2970 Align
= CurDAG
->getTargetConstant(Alignment
, dl
, MVT::i32
);
2972 unsigned OpcodeIndex
;
2973 switch (VT
.getSimpleVT().SimpleTy
) {
2974 default: llvm_unreachable("unhandled vld-dup type");
2976 case MVT::v16i8
: OpcodeIndex
= 0; break;
2983 OpcodeIndex
= 1; break;
2987 case MVT::v4i32
: OpcodeIndex
= 2; break;
2989 case MVT::v1i64
: OpcodeIndex
= 3; break;
2992 unsigned ResTyElts
= (NumVecs
== 3) ? 4 : NumVecs
;
2995 EVT ResTy
= EVT::getVectorVT(*CurDAG
->getContext(), MVT::i64
, ResTyElts
);
2997 std::vector
<EVT
> ResTys
;
2998 ResTys
.push_back(ResTy
);
3000 ResTys
.push_back(MVT::i32
);
3001 ResTys
.push_back(MVT::Other
);
3003 SDValue Pred
= getAL(CurDAG
, dl
);
3004 SDValue Reg0
= CurDAG
->getRegister(0, MVT::i32
);
3006 SmallVector
<SDValue
, 6> Ops
;
3007 Ops
.push_back(MemAddr
);
3008 Ops
.push_back(Align
);
3009 unsigned Opc
= is64BitVector
? DOpcodes
[OpcodeIndex
]
3010 : (NumVecs
== 1) ? QOpcodes0
[OpcodeIndex
]
3011 : QOpcodes1
[OpcodeIndex
];
3013 SDValue Inc
= N
->getOperand(2);
3015 isPerfectIncrement(Inc
, VT
.getVectorElementType(), NumVecs
);
3017 if (!isVLDfixed(Opc
))
3018 Ops
.push_back(Reg0
);
3020 if (isVLDfixed(Opc
))
3021 Opc
= getVLDSTRegisterUpdateOpcode(Opc
);
3025 if (is64BitVector
|| NumVecs
== 1) {
3026 // Double registers and VLD1 quad registers are directly supported.
3027 } else if (NumVecs
== 2) {
3028 const SDValue OpsA
[] = {MemAddr
, Align
, Pred
, Reg0
, Chain
};
3029 SDNode
*VLdA
= CurDAG
->getMachineNode(QOpcodes0
[OpcodeIndex
], dl
, ResTy
,
3031 Chain
= SDValue(VLdA
, 1);
3033 SDValue ImplDef
= SDValue(
3034 CurDAG
->getMachineNode(TargetOpcode::IMPLICIT_DEF
, dl
, ResTy
), 0);
3035 const SDValue OpsA
[] = {MemAddr
, Align
, ImplDef
, Pred
, Reg0
, Chain
};
3036 SDNode
*VLdA
= CurDAG
->getMachineNode(QOpcodes0
[OpcodeIndex
], dl
, ResTy
,
3038 Ops
.push_back(SDValue(VLdA
, 0));
3039 Chain
= SDValue(VLdA
, 1);
3042 Ops
.push_back(Pred
);
3043 Ops
.push_back(Reg0
);
3044 Ops
.push_back(Chain
);
3046 SDNode
*VLdDup
= CurDAG
->getMachineNode(Opc
, dl
, ResTys
, Ops
);
3048 // Transfer memoperands.
3049 MachineMemOperand
*MemOp
= cast
<MemIntrinsicSDNode
>(N
)->getMemOperand();
3050 CurDAG
->setNodeMemRefs(cast
<MachineSDNode
>(VLdDup
), {MemOp
});
3052 // Extract the subregisters.
3054 ReplaceUses(SDValue(N
, 0), SDValue(VLdDup
, 0));
3056 SDValue SuperReg
= SDValue(VLdDup
, 0);
3057 static_assert(ARM::dsub_7
== ARM::dsub_0
+ 7, "Unexpected subreg numbering");
3058 unsigned SubIdx
= is64BitVector
? ARM::dsub_0
: ARM::qsub_0
;
3059 for (unsigned Vec
= 0; Vec
!= NumVecs
; ++Vec
) {
3060 ReplaceUses(SDValue(N
, Vec
),
3061 CurDAG
->getTargetExtractSubreg(SubIdx
+Vec
, dl
, VT
, SuperReg
));
3064 ReplaceUses(SDValue(N
, NumVecs
), SDValue(VLdDup
, 1));
3066 ReplaceUses(SDValue(N
, NumVecs
+ 1), SDValue(VLdDup
, 2));
3067 CurDAG
->RemoveDeadNode(N
);
3070 bool ARMDAGToDAGISel::tryInsertVectorElt(SDNode
*N
) {
3071 if (!Subtarget
->hasMVEIntegerOps())
3076 // We are trying to use VMOV/VMOVX/VINS to more efficiently lower insert and
3077 // extracts of v8f16 and v8i16 vectors. Check that we have two adjacent
3078 // inserts of the correct type:
3079 SDValue Ins1
= SDValue(N
, 0);
3080 SDValue Ins2
= N
->getOperand(0);
3081 EVT VT
= Ins1
.getValueType();
3082 if (Ins2
.getOpcode() != ISD::INSERT_VECTOR_ELT
|| !Ins2
.hasOneUse() ||
3083 !isa
<ConstantSDNode
>(Ins1
.getOperand(2)) ||
3084 !isa
<ConstantSDNode
>(Ins2
.getOperand(2)) ||
3085 (VT
!= MVT::v8f16
&& VT
!= MVT::v8i16
) || (Ins2
.getValueType() != VT
))
3088 unsigned Lane1
= Ins1
.getConstantOperandVal(2);
3089 unsigned Lane2
= Ins2
.getConstantOperandVal(2);
3090 if (Lane2
% 2 != 0 || Lane1
!= Lane2
+ 1)
3093 // If the inserted values will be able to use T/B already, leave it to the
3094 // existing tablegen patterns. For example VCVTT/VCVTB.
3095 SDValue Val1
= Ins1
.getOperand(1);
3096 SDValue Val2
= Ins2
.getOperand(1);
3097 if (Val1
.getOpcode() == ISD::FP_ROUND
|| Val2
.getOpcode() == ISD::FP_ROUND
)
3100 // Check if the inserted values are both extracts.
3101 if ((Val1
.getOpcode() == ISD::EXTRACT_VECTOR_ELT
||
3102 Val1
.getOpcode() == ARMISD::VGETLANEu
) &&
3103 (Val2
.getOpcode() == ISD::EXTRACT_VECTOR_ELT
||
3104 Val2
.getOpcode() == ARMISD::VGETLANEu
) &&
3105 isa
<ConstantSDNode
>(Val1
.getOperand(1)) &&
3106 isa
<ConstantSDNode
>(Val2
.getOperand(1)) &&
3107 (Val1
.getOperand(0).getValueType() == MVT::v8f16
||
3108 Val1
.getOperand(0).getValueType() == MVT::v8i16
) &&
3109 (Val2
.getOperand(0).getValueType() == MVT::v8f16
||
3110 Val2
.getOperand(0).getValueType() == MVT::v8i16
)) {
3111 unsigned ExtractLane1
= Val1
.getConstantOperandVal(1);
3112 unsigned ExtractLane2
= Val2
.getConstantOperandVal(1);
3114 // If the two extracted lanes are from the same place and adjacent, this
3115 // simplifies into a f32 lane move.
3116 if (Val1
.getOperand(0) == Val2
.getOperand(0) && ExtractLane2
% 2 == 0 &&
3117 ExtractLane1
== ExtractLane2
+ 1) {
3118 SDValue NewExt
= CurDAG
->getTargetExtractSubreg(
3119 ARM::ssub_0
+ ExtractLane2
/ 2, dl
, MVT::f32
, Val1
.getOperand(0));
3120 SDValue NewIns
= CurDAG
->getTargetInsertSubreg(
3121 ARM::ssub_0
+ Lane2
/ 2, dl
, VT
, Ins2
.getOperand(0),
3123 ReplaceUses(Ins1
, NewIns
);
3127 // Else v8i16 pattern of an extract and an insert, with a optional vmovx for
3128 // extracting odd lanes.
3129 if (VT
== MVT::v8i16
) {
3130 SDValue Inp1
= CurDAG
->getTargetExtractSubreg(
3131 ARM::ssub_0
+ ExtractLane1
/ 2, dl
, MVT::f32
, Val1
.getOperand(0));
3132 SDValue Inp2
= CurDAG
->getTargetExtractSubreg(
3133 ARM::ssub_0
+ ExtractLane2
/ 2, dl
, MVT::f32
, Val2
.getOperand(0));
3134 if (ExtractLane1
% 2 != 0)
3135 Inp1
= SDValue(CurDAG
->getMachineNode(ARM::VMOVH
, dl
, MVT::f32
, Inp1
), 0);
3136 if (ExtractLane2
% 2 != 0)
3137 Inp2
= SDValue(CurDAG
->getMachineNode(ARM::VMOVH
, dl
, MVT::f32
, Inp2
), 0);
3138 SDNode
*VINS
= CurDAG
->getMachineNode(ARM::VINSH
, dl
, MVT::f32
, Inp2
, Inp1
);
3140 CurDAG
->getTargetInsertSubreg(ARM::ssub_0
+ Lane2
/ 2, dl
, MVT::v4f32
,
3141 Ins2
.getOperand(0), SDValue(VINS
, 0));
3142 ReplaceUses(Ins1
, NewIns
);
3147 // The inserted values are not extracted - if they are f16 then insert them
3148 // directly using a VINS.
3149 if (VT
== MVT::v8f16
) {
3150 SDNode
*VINS
= CurDAG
->getMachineNode(ARM::VINSH
, dl
, MVT::f32
, Val2
, Val1
);
3152 CurDAG
->getTargetInsertSubreg(ARM::ssub_0
+ Lane2
/ 2, dl
, MVT::v4f32
,
3153 Ins2
.getOperand(0), SDValue(VINS
, 0));
3154 ReplaceUses(Ins1
, NewIns
);
3161 bool ARMDAGToDAGISel::transformFixedFloatingPointConversion(SDNode
*N
,
3164 bool FixedToFloat
) {
3165 auto Type
= N
->getValueType(0);
3166 unsigned ScalarBits
= Type
.getScalarSizeInBits();
3167 if (ScalarBits
> 32)
3170 SDNodeFlags FMulFlags
= FMul
->getFlags();
3171 // The fixed-point vcvt and vcvt+vmul are not always equivalent if inf is
3172 // allowed in 16 bit unsigned floats
3173 if (ScalarBits
== 16 && !FMulFlags
.hasNoInfs() && IsUnsigned
)
3176 SDValue ImmNode
= FMul
->getOperand(1);
3177 SDValue VecVal
= FMul
->getOperand(0);
3178 if (VecVal
->getOpcode() == ISD::UINT_TO_FP
||
3179 VecVal
->getOpcode() == ISD::SINT_TO_FP
)
3180 VecVal
= VecVal
->getOperand(0);
3182 if (VecVal
.getValueType().getScalarSizeInBits() != ScalarBits
)
3185 if (ImmNode
.getOpcode() == ISD::BITCAST
) {
3186 if (ImmNode
.getValueType().getScalarSizeInBits() != ScalarBits
)
3188 ImmNode
= ImmNode
.getOperand(0);
3191 if (ImmNode
.getValueType().getScalarSizeInBits() != ScalarBits
)
3194 APFloat
ImmAPF(0.0f
);
3195 switch (ImmNode
.getOpcode()) {
3196 case ARMISD::VMOVIMM
:
3197 case ARMISD::VDUP
: {
3198 if (!isa
<ConstantSDNode
>(ImmNode
.getOperand(0)))
3200 unsigned Imm
= ImmNode
.getConstantOperandVal(0);
3201 if (ImmNode
.getOpcode() == ARMISD::VMOVIMM
)
3202 Imm
= ARM_AM::decodeVMOVModImm(Imm
, ScalarBits
);
3204 APFloat(ScalarBits
== 32 ? APFloat::IEEEsingle() : APFloat::IEEEhalf(),
3205 APInt(ScalarBits
, Imm
));
3208 case ARMISD::VMOVFPIMM
: {
3209 ImmAPF
= APFloat(ARM_AM::getFPImmFloat(ImmNode
.getConstantOperandVal(0)));
3216 // Where n is the number of fractional bits, multiplying by 2^n will convert
3217 // from float to fixed and multiplying by 2^-n will convert from fixed to
3218 // float. Taking log2 of the factor (after taking the inverse in the case of
3219 // float to fixed) will give n.
3220 APFloat ToConvert
= ImmAPF
;
3222 if (!ImmAPF
.getExactInverse(&ToConvert
))
3225 APSInt
Converted(64, 0);
3227 ToConvert
.convertToInteger(Converted
, llvm::RoundingMode::NearestTiesToEven
,
3229 if (!IsExact
|| !Converted
.isPowerOf2())
3232 unsigned FracBits
= Converted
.logBase2();
3233 if (FracBits
> ScalarBits
)
3236 SmallVector
<SDValue
, 3> Ops
{
3237 VecVal
, CurDAG
->getConstant(FracBits
, SDLoc(N
), MVT::i32
)};
3238 AddEmptyMVEPredicateToOps(Ops
, SDLoc(N
), Type
);
3240 unsigned int Opcode
;
3241 switch (ScalarBits
) {
3244 Opcode
= IsUnsigned
? ARM::MVE_VCVTf16u16_fix
: ARM::MVE_VCVTf16s16_fix
;
3246 Opcode
= IsUnsigned
? ARM::MVE_VCVTu16f16_fix
: ARM::MVE_VCVTs16f16_fix
;
3250 Opcode
= IsUnsigned
? ARM::MVE_VCVTf32u32_fix
: ARM::MVE_VCVTf32s32_fix
;
3252 Opcode
= IsUnsigned
? ARM::MVE_VCVTu32f32_fix
: ARM::MVE_VCVTs32f32_fix
;
3255 llvm_unreachable("unexpected number of scalar bits");
3259 ReplaceNode(N
, CurDAG
->getMachineNode(Opcode
, SDLoc(N
), Type
, Ops
));
3263 bool ARMDAGToDAGISel::tryFP_TO_INT(SDNode
*N
, SDLoc dl
) {
3264 // Transform a floating-point to fixed-point conversion to a VCVT
3265 if (!Subtarget
->hasMVEFloatOps())
3267 EVT Type
= N
->getValueType(0);
3268 if (!Type
.isVector())
3270 unsigned int ScalarBits
= Type
.getScalarSizeInBits();
3272 bool IsUnsigned
= N
->getOpcode() == ISD::FP_TO_UINT
;
3273 SDNode
*Node
= N
->getOperand(0).getNode();
3275 // floating-point to fixed-point with one fractional bit gets turned into an
3276 // FP_TO_[U|S]INT(FADD (x, x)) rather than an FP_TO_[U|S]INT(FMUL (x, y))
3277 if (Node
->getOpcode() == ISD::FADD
) {
3278 if (Node
->getOperand(0) != Node
->getOperand(1))
3280 SDNodeFlags Flags
= Node
->getFlags();
3281 // The fixed-point vcvt and vcvt+vmul are not always equivalent if inf is
3282 // allowed in 16 bit unsigned floats
3283 if (ScalarBits
== 16 && !Flags
.hasNoInfs() && IsUnsigned
)
3287 switch (ScalarBits
) {
3289 Opcode
= IsUnsigned
? ARM::MVE_VCVTu16f16_fix
: ARM::MVE_VCVTs16f16_fix
;
3292 Opcode
= IsUnsigned
? ARM::MVE_VCVTu32f32_fix
: ARM::MVE_VCVTs32f32_fix
;
3295 SmallVector
<SDValue
, 3> Ops
{Node
->getOperand(0),
3296 CurDAG
->getConstant(1, dl
, MVT::i32
)};
3297 AddEmptyMVEPredicateToOps(Ops
, dl
, Type
);
3299 ReplaceNode(N
, CurDAG
->getMachineNode(Opcode
, dl
, Type
, Ops
));
3303 if (Node
->getOpcode() != ISD::FMUL
)
3306 return transformFixedFloatingPointConversion(N
, Node
, IsUnsigned
, false);
3309 bool ARMDAGToDAGISel::tryFMULFixed(SDNode
*N
, SDLoc dl
) {
3310 // Transform a fixed-point to floating-point conversion to a VCVT
3311 if (!Subtarget
->hasMVEFloatOps())
3313 auto Type
= N
->getValueType(0);
3314 if (!Type
.isVector())
3317 auto LHS
= N
->getOperand(0);
3318 if (LHS
.getOpcode() != ISD::SINT_TO_FP
&& LHS
.getOpcode() != ISD::UINT_TO_FP
)
3321 return transformFixedFloatingPointConversion(
3322 N
, N
, LHS
.getOpcode() == ISD::UINT_TO_FP
, true);
3325 bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode
*N
, bool isSigned
) {
3326 if (!Subtarget
->hasV6T2Ops())
3329 unsigned Opc
= isSigned
3330 ? (Subtarget
->isThumb() ? ARM::t2SBFX
: ARM::SBFX
)
3331 : (Subtarget
->isThumb() ? ARM::t2UBFX
: ARM::UBFX
);
3334 // For unsigned extracts, check for a shift right and mask
3335 unsigned And_imm
= 0;
3336 if (N
->getOpcode() == ISD::AND
) {
3337 if (isOpcWithIntImmediate(N
, ISD::AND
, And_imm
)) {
3339 // The immediate is a mask of the low bits iff imm & (imm+1) == 0
3340 if (And_imm
& (And_imm
+ 1))
3343 unsigned Srl_imm
= 0;
3344 if (isOpcWithIntImmediate(N
->getOperand(0).getNode(), ISD::SRL
,
3346 assert(Srl_imm
> 0 && Srl_imm
< 32 && "bad amount in shift node!");
3348 // Mask off the unnecessary bits of the AND immediate; normally
3349 // DAGCombine will do this, but that might not happen if
3350 // targetShrinkDemandedConstant chooses a different immediate.
3351 And_imm
&= -1U >> Srl_imm
;
3353 // Note: The width operand is encoded as width-1.
3354 unsigned Width
= countTrailingOnes(And_imm
) - 1;
3355 unsigned LSB
= Srl_imm
;
3357 SDValue Reg0
= CurDAG
->getRegister(0, MVT::i32
);
3359 if ((LSB
+ Width
+ 1) == N
->getValueType(0).getSizeInBits()) {
3360 // It's cheaper to use a right shift to extract the top bits.
3361 if (Subtarget
->isThumb()) {
3362 Opc
= isSigned
? ARM::t2ASRri
: ARM::t2LSRri
;
3363 SDValue Ops
[] = { N
->getOperand(0).getOperand(0),
3364 CurDAG
->getTargetConstant(LSB
, dl
, MVT::i32
),
3365 getAL(CurDAG
, dl
), Reg0
, Reg0
};
3366 CurDAG
->SelectNodeTo(N
, Opc
, MVT::i32
, Ops
);
3370 // ARM models shift instructions as MOVsi with shifter operand.
3371 ARM_AM::ShiftOpc ShOpcVal
= ARM_AM::getShiftOpcForNode(ISD::SRL
);
3373 CurDAG
->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal
, LSB
), dl
,
3375 SDValue Ops
[] = { N
->getOperand(0).getOperand(0), ShOpc
,
3376 getAL(CurDAG
, dl
), Reg0
, Reg0
};
3377 CurDAG
->SelectNodeTo(N
, ARM::MOVsi
, MVT::i32
, Ops
);
3381 assert(LSB
+ Width
+ 1 <= 32 && "Shouldn't create an invalid ubfx");
3382 SDValue Ops
[] = { N
->getOperand(0).getOperand(0),
3383 CurDAG
->getTargetConstant(LSB
, dl
, MVT::i32
),
3384 CurDAG
->getTargetConstant(Width
, dl
, MVT::i32
),
3385 getAL(CurDAG
, dl
), Reg0
};
3386 CurDAG
->SelectNodeTo(N
, Opc
, MVT::i32
, Ops
);
3393 // Otherwise, we're looking for a shift of a shift
3394 unsigned Shl_imm
= 0;
3395 if (isOpcWithIntImmediate(N
->getOperand(0).getNode(), ISD::SHL
, Shl_imm
)) {
3396 assert(Shl_imm
> 0 && Shl_imm
< 32 && "bad amount in shift node!");
3397 unsigned Srl_imm
= 0;
3398 if (isInt32Immediate(N
->getOperand(1), Srl_imm
)) {
3399 assert(Srl_imm
> 0 && Srl_imm
< 32 && "bad amount in shift node!");
3400 // Note: The width operand is encoded as width-1.
3401 unsigned Width
= 32 - Srl_imm
- 1;
3402 int LSB
= Srl_imm
- Shl_imm
;
3405 SDValue Reg0
= CurDAG
->getRegister(0, MVT::i32
);
3406 assert(LSB
+ Width
+ 1 <= 32 && "Shouldn't create an invalid ubfx");
3407 SDValue Ops
[] = { N
->getOperand(0).getOperand(0),
3408 CurDAG
->getTargetConstant(LSB
, dl
, MVT::i32
),
3409 CurDAG
->getTargetConstant(Width
, dl
, MVT::i32
),
3410 getAL(CurDAG
, dl
), Reg0
};
3411 CurDAG
->SelectNodeTo(N
, Opc
, MVT::i32
, Ops
);
3416 // Or we are looking for a shift of an and, with a mask operand
3417 if (isOpcWithIntImmediate(N
->getOperand(0).getNode(), ISD::AND
, And_imm
) &&
3418 isShiftedMask_32(And_imm
)) {
3419 unsigned Srl_imm
= 0;
3420 unsigned LSB
= countTrailingZeros(And_imm
);
3421 // Shift must be the same as the ands lsb
3422 if (isInt32Immediate(N
->getOperand(1), Srl_imm
) && Srl_imm
== LSB
) {
3423 assert(Srl_imm
> 0 && Srl_imm
< 32 && "bad amount in shift node!");
3424 unsigned MSB
= 31 - countLeadingZeros(And_imm
);
3425 // Note: The width operand is encoded as width-1.
3426 unsigned Width
= MSB
- LSB
;
3427 SDValue Reg0
= CurDAG
->getRegister(0, MVT::i32
);
3428 assert(Srl_imm
+ Width
+ 1 <= 32 && "Shouldn't create an invalid ubfx");
3429 SDValue Ops
[] = { N
->getOperand(0).getOperand(0),
3430 CurDAG
->getTargetConstant(Srl_imm
, dl
, MVT::i32
),
3431 CurDAG
->getTargetConstant(Width
, dl
, MVT::i32
),
3432 getAL(CurDAG
, dl
), Reg0
};
3433 CurDAG
->SelectNodeTo(N
, Opc
, MVT::i32
, Ops
);
3438 if (N
->getOpcode() == ISD::SIGN_EXTEND_INREG
) {
3439 unsigned Width
= cast
<VTSDNode
>(N
->getOperand(1))->getVT().getSizeInBits();
3441 if (!isOpcWithIntImmediate(N
->getOperand(0).getNode(), ISD::SRL
, LSB
) &&
3442 !isOpcWithIntImmediate(N
->getOperand(0).getNode(), ISD::SRA
, LSB
))
3445 if (LSB
+ Width
> 32)
3448 SDValue Reg0
= CurDAG
->getRegister(0, MVT::i32
);
3449 assert(LSB
+ Width
<= 32 && "Shouldn't create an invalid ubfx");
3450 SDValue Ops
[] = { N
->getOperand(0).getOperand(0),
3451 CurDAG
->getTargetConstant(LSB
, dl
, MVT::i32
),
3452 CurDAG
->getTargetConstant(Width
- 1, dl
, MVT::i32
),
3453 getAL(CurDAG
, dl
), Reg0
};
3454 CurDAG
->SelectNodeTo(N
, Opc
, MVT::i32
, Ops
);
3461 /// Target-specific DAG combining for ISD::XOR.
3462 /// Target-independent combining lowers SELECT_CC nodes of the form
3463 /// select_cc setg[ge] X, 0, X, -X
3464 /// select_cc setgt X, -1, X, -X
3465 /// select_cc setl[te] X, 0, -X, X
3466 /// select_cc setlt X, 1, -X, X
3467 /// which represent Integer ABS into:
3468 /// Y = sra (X, size(X)-1); xor (add (X, Y), Y)
3469 /// ARM instruction selection detects the latter and matches it to
3470 /// ARM::ABS or ARM::t2ABS machine node.
3471 bool ARMDAGToDAGISel::tryABSOp(SDNode
*N
){
3472 SDValue XORSrc0
= N
->getOperand(0);
3473 SDValue XORSrc1
= N
->getOperand(1);
3474 EVT VT
= N
->getValueType(0);
3476 if (Subtarget
->isThumb1Only())
3479 if (XORSrc0
.getOpcode() != ISD::ADD
|| XORSrc1
.getOpcode() != ISD::SRA
)
3482 SDValue ADDSrc0
= XORSrc0
.getOperand(0);
3483 SDValue ADDSrc1
= XORSrc0
.getOperand(1);
3484 SDValue SRASrc0
= XORSrc1
.getOperand(0);
3485 SDValue SRASrc1
= XORSrc1
.getOperand(1);
3486 ConstantSDNode
*SRAConstant
= dyn_cast
<ConstantSDNode
>(SRASrc1
);
3487 EVT XType
= SRASrc0
.getValueType();
3488 unsigned Size
= XType
.getSizeInBits() - 1;
3490 if (ADDSrc1
== XORSrc1
&& ADDSrc0
== SRASrc0
&&
3491 XType
.isInteger() && SRAConstant
!= nullptr &&
3492 Size
== SRAConstant
->getZExtValue()) {
3493 unsigned Opcode
= Subtarget
->isThumb2() ? ARM::t2ABS
: ARM::ABS
;
3494 CurDAG
->SelectNodeTo(N
, Opcode
, VT
, ADDSrc0
);
3501 /// We've got special pseudo-instructions for these
3502 void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode
*N
) {
3504 EVT MemTy
= cast
<MemSDNode
>(N
)->getMemoryVT();
3505 if (MemTy
== MVT::i8
)
3506 Opcode
= Subtarget
->isThumb() ? ARM::tCMP_SWAP_8
: ARM::CMP_SWAP_8
;
3507 else if (MemTy
== MVT::i16
)
3508 Opcode
= Subtarget
->isThumb() ? ARM::tCMP_SWAP_16
: ARM::CMP_SWAP_16
;
3509 else if (MemTy
== MVT::i32
)
3510 Opcode
= ARM::CMP_SWAP_32
;
3512 llvm_unreachable("Unknown AtomicCmpSwap type");
3514 SDValue Ops
[] = {N
->getOperand(1), N
->getOperand(2), N
->getOperand(3),
3516 SDNode
*CmpSwap
= CurDAG
->getMachineNode(
3518 CurDAG
->getVTList(MVT::i32
, MVT::i32
, MVT::Other
), Ops
);
3520 MachineMemOperand
*MemOp
= cast
<MemSDNode
>(N
)->getMemOperand();
3521 CurDAG
->setNodeMemRefs(cast
<MachineSDNode
>(CmpSwap
), {MemOp
});
3523 ReplaceUses(SDValue(N
, 0), SDValue(CmpSwap
, 0));
3524 ReplaceUses(SDValue(N
, 1), SDValue(CmpSwap
, 2));
3525 CurDAG
->RemoveDeadNode(N
);
3528 static Optional
<std::pair
<unsigned, unsigned>>
3529 getContiguousRangeOfSetBits(const APInt
&A
) {
3530 unsigned FirstOne
= A
.getBitWidth() - A
.countLeadingZeros() - 1;
3531 unsigned LastOne
= A
.countTrailingZeros();
3532 if (A
.countPopulation() != (FirstOne
- LastOne
+ 1))
3533 return Optional
<std::pair
<unsigned,unsigned>>();
3534 return std::make_pair(FirstOne
, LastOne
);
3537 void ARMDAGToDAGISel::SelectCMPZ(SDNode
*N
, bool &SwitchEQNEToPLMI
) {
3538 assert(N
->getOpcode() == ARMISD::CMPZ
);
3539 SwitchEQNEToPLMI
= false;
3541 if (!Subtarget
->isThumb())
3542 // FIXME: Work out whether it is profitable to do this in A32 mode - LSL and
3543 // LSR don't exist as standalone instructions - they need the barrel shifter.
3546 // select (cmpz (and X, C), #0) -> (LSLS X) or (LSRS X) or (LSRS (LSLS X))
3547 SDValue And
= N
->getOperand(0);
3548 if (!And
->hasOneUse())
3551 SDValue Zero
= N
->getOperand(1);
3552 if (!isa
<ConstantSDNode
>(Zero
) || !cast
<ConstantSDNode
>(Zero
)->isNullValue() ||
3553 And
->getOpcode() != ISD::AND
)
3555 SDValue X
= And
.getOperand(0);
3556 auto C
= dyn_cast
<ConstantSDNode
>(And
.getOperand(1));
3560 auto Range
= getContiguousRangeOfSetBits(C
->getAPIntValue());
3564 // There are several ways to lower this:
3568 auto EmitShift
= [&](unsigned Opc
, SDValue Src
, unsigned Imm
) -> SDNode
* {
3569 if (Subtarget
->isThumb2()) {
3570 Opc
= (Opc
== ARM::tLSLri
) ? ARM::t2LSLri
: ARM::t2LSRri
;
3571 SDValue Ops
[] = { Src
, CurDAG
->getTargetConstant(Imm
, dl
, MVT::i32
),
3572 getAL(CurDAG
, dl
), CurDAG
->getRegister(0, MVT::i32
),
3573 CurDAG
->getRegister(0, MVT::i32
) };
3574 return CurDAG
->getMachineNode(Opc
, dl
, MVT::i32
, Ops
);
3576 SDValue Ops
[] = {CurDAG
->getRegister(ARM::CPSR
, MVT::i32
), Src
,
3577 CurDAG
->getTargetConstant(Imm
, dl
, MVT::i32
),
3578 getAL(CurDAG
, dl
), CurDAG
->getRegister(0, MVT::i32
)};
3579 return CurDAG
->getMachineNode(Opc
, dl
, MVT::i32
, Ops
);
3583 if (Range
->second
== 0) {
3584 // 1. Mask includes the LSB -> Simply shift the top N bits off
3585 NewN
= EmitShift(ARM::tLSLri
, X
, 31 - Range
->first
);
3586 ReplaceNode(And
.getNode(), NewN
);
3587 } else if (Range
->first
== 31) {
3588 // 2. Mask includes the MSB -> Simply shift the bottom N bits off
3589 NewN
= EmitShift(ARM::tLSRri
, X
, Range
->second
);
3590 ReplaceNode(And
.getNode(), NewN
);
3591 } else if (Range
->first
== Range
->second
) {
3592 // 3. Only one bit is set. We can shift this into the sign bit and use a
3593 // PL/MI comparison.
3594 NewN
= EmitShift(ARM::tLSLri
, X
, 31 - Range
->first
);
3595 ReplaceNode(And
.getNode(), NewN
);
3597 SwitchEQNEToPLMI
= true;
3598 } else if (!Subtarget
->hasV6T2Ops()) {
3599 // 4. Do a double shift to clear bottom and top bits, but only in
3600 // thumb-1 mode as in thumb-2 we can use UBFX.
3601 NewN
= EmitShift(ARM::tLSLri
, X
, 31 - Range
->first
);
3602 NewN
= EmitShift(ARM::tLSRri
, SDValue(NewN
, 0),
3603 Range
->second
+ (31 - Range
->first
));
3604 ReplaceNode(And
.getNode(), NewN
);
3609 void ARMDAGToDAGISel::Select(SDNode
*N
) {
3612 if (N
->isMachineOpcode()) {
3614 return; // Already selected.
3617 switch (N
->getOpcode()) {
3620 // For Thumb1, match an sp-relative store in C++. This is a little
3621 // unfortunate, but I don't think I can make the chain check work
3622 // otherwise. (The chain of the store has to be the same as the chain
3623 // of the CopyFromReg, or else we can't replace the CopyFromReg with
3624 // a direct reference to "SP".)
3626 // This is only necessary on Thumb1 because Thumb1 sp-relative stores use
3627 // a different addressing mode from other four-byte stores.
3629 // This pattern usually comes up with call arguments.
3630 StoreSDNode
*ST
= cast
<StoreSDNode
>(N
);
3631 SDValue Ptr
= ST
->getBasePtr();
3632 if (Subtarget
->isThumb1Only() && ST
->isUnindexed()) {
3634 if (Ptr
.getOpcode() == ISD::ADD
&&
3635 isScaledConstantInRange(Ptr
.getOperand(1), /*Scale=*/4, 0, 256, RHSC
))
3636 Ptr
= Ptr
.getOperand(0);
3638 if (Ptr
.getOpcode() == ISD::CopyFromReg
&&
3639 cast
<RegisterSDNode
>(Ptr
.getOperand(1))->getReg() == ARM::SP
&&
3640 Ptr
.getOperand(0) == ST
->getChain()) {
3641 SDValue Ops
[] = {ST
->getValue(),
3642 CurDAG
->getRegister(ARM::SP
, MVT::i32
),
3643 CurDAG
->getTargetConstant(RHSC
, dl
, MVT::i32
),
3645 CurDAG
->getRegister(0, MVT::i32
),
3647 MachineSDNode
*ResNode
=
3648 CurDAG
->getMachineNode(ARM::tSTRspi
, dl
, MVT::Other
, Ops
);
3649 MachineMemOperand
*MemOp
= ST
->getMemOperand();
3650 CurDAG
->setNodeMemRefs(cast
<MachineSDNode
>(ResNode
), {MemOp
});
3651 ReplaceNode(N
, ResNode
);
3657 case ISD::WRITE_REGISTER
:
3658 if (tryWriteRegister(N
))
3661 case ISD::READ_REGISTER
:
3662 if (tryReadRegister(N
))
3665 case ISD::INLINEASM
:
3666 case ISD::INLINEASM_BR
:
3667 if (tryInlineAsm(N
))
3671 // Select special operations if XOR node forms integer ABS pattern
3674 // Other cases are autogenerated.
3676 case ISD::Constant
: {
3677 unsigned Val
= cast
<ConstantSDNode
>(N
)->getZExtValue();
3678 // If we can't materialize the constant we need to use a literal pool
3679 if (ConstantMaterializationCost(Val
, Subtarget
) > 2) {
3680 SDValue CPIdx
= CurDAG
->getTargetConstantPool(
3681 ConstantInt::get(Type::getInt32Ty(*CurDAG
->getContext()), Val
),
3682 TLI
->getPointerTy(CurDAG
->getDataLayout()));
3685 if (Subtarget
->isThumb()) {
3689 CurDAG
->getRegister(0, MVT::i32
),
3690 CurDAG
->getEntryNode()
3692 ResNode
= CurDAG
->getMachineNode(ARM::tLDRpci
, dl
, MVT::i32
, MVT::Other
,
3697 CurDAG
->getTargetConstant(0, dl
, MVT::i32
),
3699 CurDAG
->getRegister(0, MVT::i32
),
3700 CurDAG
->getEntryNode()
3702 ResNode
= CurDAG
->getMachineNode(ARM::LDRcp
, dl
, MVT::i32
, MVT::Other
,
3705 // Annotate the Node with memory operand information so that MachineInstr
3706 // queries work properly. This e.g. gives the register allocation the
3707 // required information for rematerialization.
3708 MachineFunction
& MF
= CurDAG
->getMachineFunction();
3709 MachineMemOperand
*MemOp
=
3710 MF
.getMachineMemOperand(MachinePointerInfo::getConstantPool(MF
),
3711 MachineMemOperand::MOLoad
, 4, Align(4));
3713 CurDAG
->setNodeMemRefs(cast
<MachineSDNode
>(ResNode
), {MemOp
});
3715 ReplaceNode(N
, ResNode
);
3719 // Other cases are autogenerated.
3722 case ISD::FrameIndex
: {
3723 // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
3724 int FI
= cast
<FrameIndexSDNode
>(N
)->getIndex();
3725 SDValue TFI
= CurDAG
->getTargetFrameIndex(
3726 FI
, TLI
->getPointerTy(CurDAG
->getDataLayout()));
3727 if (Subtarget
->isThumb1Only()) {
3728 // Set the alignment of the frame object to 4, to avoid having to generate
3729 // more than one ADD
3730 MachineFrameInfo
&MFI
= MF
->getFrameInfo();
3731 if (MFI
.getObjectAlign(FI
) < Align(4))
3732 MFI
.setObjectAlignment(FI
, Align(4));
3733 CurDAG
->SelectNodeTo(N
, ARM::tADDframe
, MVT::i32
, TFI
,
3734 CurDAG
->getTargetConstant(0, dl
, MVT::i32
));
3737 unsigned Opc
= ((Subtarget
->isThumb() && Subtarget
->hasThumb2()) ?
3738 ARM::t2ADDri
: ARM::ADDri
);
3739 SDValue Ops
[] = { TFI
, CurDAG
->getTargetConstant(0, dl
, MVT::i32
),
3740 getAL(CurDAG
, dl
), CurDAG
->getRegister(0, MVT::i32
),
3741 CurDAG
->getRegister(0, MVT::i32
) };
3742 CurDAG
->SelectNodeTo(N
, Opc
, MVT::i32
, Ops
);
3746 case ISD::INSERT_VECTOR_ELT
: {
3747 if (tryInsertVectorElt(N
))
3752 if (tryV6T2BitfieldExtractOp(N
, false))
3755 case ISD::SIGN_EXTEND_INREG
:
3757 if (tryV6T2BitfieldExtractOp(N
, true))
3760 case ISD::FP_TO_UINT
:
3761 case ISD::FP_TO_SINT
:
3762 if (tryFP_TO_INT(N
, dl
))
3766 if (tryFMULFixed(N
, dl
))
3770 if (Subtarget
->isThumb1Only())
3772 if (ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(N
->getOperand(1))) {
3773 unsigned RHSV
= C
->getZExtValue();
3775 if (isPowerOf2_32(RHSV
-1)) { // 2^n+1?
3776 unsigned ShImm
= Log2_32(RHSV
-1);
3779 SDValue V
= N
->getOperand(0);
3780 ShImm
= ARM_AM::getSORegOpc(ARM_AM::lsl
, ShImm
);
3781 SDValue ShImmOp
= CurDAG
->getTargetConstant(ShImm
, dl
, MVT::i32
);
3782 SDValue Reg0
= CurDAG
->getRegister(0, MVT::i32
);
3783 if (Subtarget
->isThumb()) {
3784 SDValue Ops
[] = { V
, V
, ShImmOp
, getAL(CurDAG
, dl
), Reg0
, Reg0
};
3785 CurDAG
->SelectNodeTo(N
, ARM::t2ADDrs
, MVT::i32
, Ops
);
3788 SDValue Ops
[] = { V
, V
, Reg0
, ShImmOp
, getAL(CurDAG
, dl
), Reg0
,
3790 CurDAG
->SelectNodeTo(N
, ARM::ADDrsi
, MVT::i32
, Ops
);
3794 if (isPowerOf2_32(RHSV
+1)) { // 2^n-1?
3795 unsigned ShImm
= Log2_32(RHSV
+1);
3798 SDValue V
= N
->getOperand(0);
3799 ShImm
= ARM_AM::getSORegOpc(ARM_AM::lsl
, ShImm
);
3800 SDValue ShImmOp
= CurDAG
->getTargetConstant(ShImm
, dl
, MVT::i32
);
3801 SDValue Reg0
= CurDAG
->getRegister(0, MVT::i32
);
3802 if (Subtarget
->isThumb()) {
3803 SDValue Ops
[] = { V
, V
, ShImmOp
, getAL(CurDAG
, dl
), Reg0
, Reg0
};
3804 CurDAG
->SelectNodeTo(N
, ARM::t2RSBrs
, MVT::i32
, Ops
);
3807 SDValue Ops
[] = { V
, V
, Reg0
, ShImmOp
, getAL(CurDAG
, dl
), Reg0
,
3809 CurDAG
->SelectNodeTo(N
, ARM::RSBrsi
, MVT::i32
, Ops
);
3816 // Check for unsigned bitfield extract
3817 if (tryV6T2BitfieldExtractOp(N
, false))
3820 // If an immediate is used in an AND node, it is possible that the immediate
3821 // can be more optimally materialized when negated. If this is the case we
3822 // can negate the immediate and use a BIC instead.
3823 auto *N1C
= dyn_cast
<ConstantSDNode
>(N
->getOperand(1));
3824 if (N1C
&& N1C
->hasOneUse() && Subtarget
->isThumb()) {
3825 uint32_t Imm
= (uint32_t) N1C
->getZExtValue();
3827 // In Thumb2 mode, an AND can take a 12-bit immediate. If this
3828 // immediate can be negated and fit in the immediate operand of
3829 // a t2BIC, don't do any manual transform here as this can be
3830 // handled by the generic ISel machinery.
3831 bool PreferImmediateEncoding
=
3832 Subtarget
->hasThumb2() && (is_t2_so_imm(Imm
) || is_t2_so_imm_not(Imm
));
3833 if (!PreferImmediateEncoding
&&
3834 ConstantMaterializationCost(Imm
, Subtarget
) >
3835 ConstantMaterializationCost(~Imm
, Subtarget
)) {
3836 // The current immediate costs more to materialize than a negated
3837 // immediate, so negate the immediate and use a BIC.
3839 CurDAG
->getConstant(~N1C
->getZExtValue(), dl
, MVT::i32
);
3840 // If the new constant didn't exist before, reposition it in the topological
3841 // ordering so it is just before N. Otherwise, don't touch its location.
3842 if (NewImm
->getNodeId() == -1)
3843 CurDAG
->RepositionNode(N
->getIterator(), NewImm
.getNode());
3845 if (!Subtarget
->hasThumb2()) {
3846 SDValue Ops
[] = {CurDAG
->getRegister(ARM::CPSR
, MVT::i32
),
3847 N
->getOperand(0), NewImm
, getAL(CurDAG
, dl
),
3848 CurDAG
->getRegister(0, MVT::i32
)};
3849 ReplaceNode(N
, CurDAG
->getMachineNode(ARM::tBIC
, dl
, MVT::i32
, Ops
));
3852 SDValue Ops
[] = {N
->getOperand(0), NewImm
, getAL(CurDAG
, dl
),
3853 CurDAG
->getRegister(0, MVT::i32
),
3854 CurDAG
->getRegister(0, MVT::i32
)};
3856 CurDAG
->getMachineNode(ARM::t2BICrr
, dl
, MVT::i32
, Ops
));
3862 // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits
3863 // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits
3864 // are entirely contributed by c2 and lower 16-bits are entirely contributed
3865 // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)).
3866 // Select it to: "movt x, ((c1 & 0xffff) >> 16)
3867 EVT VT
= N
->getValueType(0);
3870 unsigned Opc
= (Subtarget
->isThumb() && Subtarget
->hasThumb2())
3872 : (Subtarget
->hasV6T2Ops() ? ARM::MOVTi16
: 0);
3875 SDValue N0
= N
->getOperand(0), N1
= N
->getOperand(1);
3876 N1C
= dyn_cast
<ConstantSDNode
>(N1
);
3879 if (N0
.getOpcode() == ISD::OR
&& N0
.getNode()->hasOneUse()) {
3880 SDValue N2
= N0
.getOperand(1);
3881 ConstantSDNode
*N2C
= dyn_cast
<ConstantSDNode
>(N2
);
3884 unsigned N1CVal
= N1C
->getZExtValue();
3885 unsigned N2CVal
= N2C
->getZExtValue();
3886 if ((N1CVal
& 0xffff0000U
) == (N2CVal
& 0xffff0000U
) &&
3887 (N1CVal
& 0xffffU
) == 0xffffU
&&
3888 (N2CVal
& 0xffffU
) == 0x0U
) {
3889 SDValue Imm16
= CurDAG
->getTargetConstant((N2CVal
& 0xFFFF0000U
) >> 16,
3891 SDValue Ops
[] = { N0
.getOperand(0), Imm16
,
3892 getAL(CurDAG
, dl
), CurDAG
->getRegister(0, MVT::i32
) };
3893 ReplaceNode(N
, CurDAG
->getMachineNode(Opc
, dl
, VT
, Ops
));
3900 case ARMISD::UMAAL
: {
3901 unsigned Opc
= Subtarget
->isThumb() ? ARM::t2UMAAL
: ARM::UMAAL
;
3902 SDValue Ops
[] = { N
->getOperand(0), N
->getOperand(1),
3903 N
->getOperand(2), N
->getOperand(3),
3905 CurDAG
->getRegister(0, MVT::i32
) };
3906 ReplaceNode(N
, CurDAG
->getMachineNode(Opc
, dl
, MVT::i32
, MVT::i32
, Ops
));
3909 case ARMISD::UMLAL
:{
3910 if (Subtarget
->isThumb()) {
3911 SDValue Ops
[] = { N
->getOperand(0), N
->getOperand(1), N
->getOperand(2),
3912 N
->getOperand(3), getAL(CurDAG
, dl
),
3913 CurDAG
->getRegister(0, MVT::i32
)};
3915 N
, CurDAG
->getMachineNode(ARM::t2UMLAL
, dl
, MVT::i32
, MVT::i32
, Ops
));
3918 SDValue Ops
[] = { N
->getOperand(0), N
->getOperand(1), N
->getOperand(2),
3919 N
->getOperand(3), getAL(CurDAG
, dl
),
3920 CurDAG
->getRegister(0, MVT::i32
),
3921 CurDAG
->getRegister(0, MVT::i32
) };
3922 ReplaceNode(N
, CurDAG
->getMachineNode(
3923 Subtarget
->hasV6Ops() ? ARM::UMLAL
: ARM::UMLALv5
, dl
,
3924 MVT::i32
, MVT::i32
, Ops
));
3928 case ARMISD::SMLAL
:{
3929 if (Subtarget
->isThumb()) {
3930 SDValue Ops
[] = { N
->getOperand(0), N
->getOperand(1), N
->getOperand(2),
3931 N
->getOperand(3), getAL(CurDAG
, dl
),
3932 CurDAG
->getRegister(0, MVT::i32
)};
3934 N
, CurDAG
->getMachineNode(ARM::t2SMLAL
, dl
, MVT::i32
, MVT::i32
, Ops
));
3937 SDValue Ops
[] = { N
->getOperand(0), N
->getOperand(1), N
->getOperand(2),
3938 N
->getOperand(3), getAL(CurDAG
, dl
),
3939 CurDAG
->getRegister(0, MVT::i32
),
3940 CurDAG
->getRegister(0, MVT::i32
) };
3941 ReplaceNode(N
, CurDAG
->getMachineNode(
3942 Subtarget
->hasV6Ops() ? ARM::SMLAL
: ARM::SMLALv5
, dl
,
3943 MVT::i32
, MVT::i32
, Ops
));
3947 case ARMISD::SUBE
: {
3948 if (!Subtarget
->hasV6Ops() || !Subtarget
->hasDSP())
3950 // Look for a pattern to match SMMLS
3951 // (sube a, (smul_loHi a, b), (subc 0, (smul_LOhi(a, b))))
3952 if (N
->getOperand(1).getOpcode() != ISD::SMUL_LOHI
||
3953 N
->getOperand(2).getOpcode() != ARMISD::SUBC
||
3954 !SDValue(N
, 1).use_empty())
3957 if (Subtarget
->isThumb())
3958 assert(Subtarget
->hasThumb2() &&
3959 "This pattern should not be generated for Thumb");
3961 SDValue SmulLoHi
= N
->getOperand(1);
3962 SDValue Subc
= N
->getOperand(2);
3963 auto *Zero
= dyn_cast
<ConstantSDNode
>(Subc
.getOperand(0));
3965 if (!Zero
|| Zero
->getZExtValue() != 0 ||
3966 Subc
.getOperand(1) != SmulLoHi
.getValue(0) ||
3967 N
->getOperand(1) != SmulLoHi
.getValue(1) ||
3968 N
->getOperand(2) != Subc
.getValue(1))
3971 unsigned Opc
= Subtarget
->isThumb2() ? ARM::t2SMMLS
: ARM::SMMLS
;
3972 SDValue Ops
[] = { SmulLoHi
.getOperand(0), SmulLoHi
.getOperand(1),
3973 N
->getOperand(0), getAL(CurDAG
, dl
),
3974 CurDAG
->getRegister(0, MVT::i32
) };
3975 ReplaceNode(N
, CurDAG
->getMachineNode(Opc
, dl
, MVT::i32
, Ops
));
3979 if (Subtarget
->hasMVEIntegerOps() && tryMVEIndexedLoad(N
))
3981 if (Subtarget
->isThumb() && Subtarget
->hasThumb2()) {
3982 if (tryT2IndexedLoad(N
))
3984 } else if (Subtarget
->isThumb()) {
3985 if (tryT1IndexedLoad(N
))
3987 } else if (tryARMIndexedLoad(N
))
3989 // Other cases are autogenerated.
3993 if (Subtarget
->hasMVEIntegerOps() && tryMVEIndexedLoad(N
))
3995 // Other cases are autogenerated.
3997 case ARMISD::WLSSETUP
: {
3998 SDNode
*New
= CurDAG
->getMachineNode(ARM::t2WhileLoopSetup
, dl
, MVT::i32
,
4000 ReplaceUses(N
, New
);
4001 CurDAG
->RemoveDeadNode(N
);
4005 SDNode
*New
= CurDAG
->getMachineNode(ARM::t2WhileLoopStart
, dl
, MVT::Other
,
4006 N
->getOperand(1), N
->getOperand(2),
4008 ReplaceUses(N
, New
);
4009 CurDAG
->RemoveDeadNode(N
);
4013 SDValue Ops
[] = { N
->getOperand(1),
4016 unsigned Opc
= ARM::t2LoopEnd
;
4017 SDNode
*New
= CurDAG
->getMachineNode(Opc
, dl
, MVT::Other
, Ops
);
4018 ReplaceUses(N
, New
);
4019 CurDAG
->RemoveDeadNode(N
);
4022 case ARMISD::LDRD
: {
4023 if (Subtarget
->isThumb2())
4024 break; // TableGen handles isel in this case.
4025 SDValue Base
, RegOffset
, ImmOffset
;
4026 const SDValue
&Chain
= N
->getOperand(0);
4027 const SDValue
&Addr
= N
->getOperand(1);
4028 SelectAddrMode3(Addr
, Base
, RegOffset
, ImmOffset
);
4029 if (RegOffset
!= CurDAG
->getRegister(0, MVT::i32
)) {
4030 // The register-offset variant of LDRD mandates that the register
4031 // allocated to RegOffset is not reused in any of the remaining operands.
4032 // This restriction is currently not enforced. Therefore emitting this
4033 // variant is explicitly avoided.
4035 RegOffset
= CurDAG
->getRegister(0, MVT::i32
);
4037 SDValue Ops
[] = {Base
, RegOffset
, ImmOffset
, Chain
};
4038 SDNode
*New
= CurDAG
->getMachineNode(ARM::LOADDUAL
, dl
,
4039 {MVT::Untyped
, MVT::Other
}, Ops
);
4040 SDValue Lo
= CurDAG
->getTargetExtractSubreg(ARM::gsub_0
, dl
, MVT::i32
,
4042 SDValue Hi
= CurDAG
->getTargetExtractSubreg(ARM::gsub_1
, dl
, MVT::i32
,
4044 transferMemOperands(N
, New
);
4045 ReplaceUses(SDValue(N
, 0), Lo
);
4046 ReplaceUses(SDValue(N
, 1), Hi
);
4047 ReplaceUses(SDValue(N
, 2), SDValue(New
, 1));
4048 CurDAG
->RemoveDeadNode(N
);
4051 case ARMISD::STRD
: {
4052 if (Subtarget
->isThumb2())
4053 break; // TableGen handles isel in this case.
4054 SDValue Base
, RegOffset
, ImmOffset
;
4055 const SDValue
&Chain
= N
->getOperand(0);
4056 const SDValue
&Addr
= N
->getOperand(3);
4057 SelectAddrMode3(Addr
, Base
, RegOffset
, ImmOffset
);
4058 if (RegOffset
!= CurDAG
->getRegister(0, MVT::i32
)) {
4059 // The register-offset variant of STRD mandates that the register
4060 // allocated to RegOffset is not reused in any of the remaining operands.
4061 // This restriction is currently not enforced. Therefore emitting this
4062 // variant is explicitly avoided.
4064 RegOffset
= CurDAG
->getRegister(0, MVT::i32
);
4067 createGPRPairNode(MVT::Untyped
, N
->getOperand(1), N
->getOperand(2));
4068 SDValue Ops
[] = {SDValue(RegPair
, 0), Base
, RegOffset
, ImmOffset
, Chain
};
4069 SDNode
*New
= CurDAG
->getMachineNode(ARM::STOREDUAL
, dl
, MVT::Other
, Ops
);
4070 transferMemOperands(N
, New
);
4071 ReplaceUses(SDValue(N
, 0), SDValue(New
, 0));
4072 CurDAG
->RemoveDeadNode(N
);
4075 case ARMISD::LOOP_DEC
: {
4076 SDValue Ops
[] = { N
->getOperand(1),
4080 CurDAG
->getMachineNode(ARM::t2LoopDec
, dl
,
4081 CurDAG
->getVTList(MVT::i32
, MVT::Other
), Ops
);
4082 ReplaceUses(N
, Dec
);
4083 CurDAG
->RemoveDeadNode(N
);
4086 case ARMISD::BRCOND
: {
4087 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
4088 // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)
4089 // Pattern complexity = 6 cost = 1 size = 0
4091 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
4092 // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc)
4093 // Pattern complexity = 6 cost = 1 size = 0
4095 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
4096 // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc)
4097 // Pattern complexity = 6 cost = 1 size = 0
4099 unsigned Opc
= Subtarget
->isThumb() ?
4100 ((Subtarget
->hasThumb2()) ? ARM::t2Bcc
: ARM::tBcc
) : ARM::Bcc
;
4101 SDValue Chain
= N
->getOperand(0);
4102 SDValue N1
= N
->getOperand(1);
4103 SDValue N2
= N
->getOperand(2);
4104 SDValue N3
= N
->getOperand(3);
4105 SDValue InFlag
= N
->getOperand(4);
4106 assert(N1
.getOpcode() == ISD::BasicBlock
);
4107 assert(N2
.getOpcode() == ISD::Constant
);
4108 assert(N3
.getOpcode() == ISD::Register
);
4110 unsigned CC
= (unsigned) cast
<ConstantSDNode
>(N2
)->getZExtValue();
4112 if (InFlag
.getOpcode() == ARMISD::CMPZ
) {
4113 if (InFlag
.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN
) {
4114 SDValue Int
= InFlag
.getOperand(0);
4115 uint64_t ID
= cast
<ConstantSDNode
>(Int
->getOperand(1))->getZExtValue();
4117 // Handle low-overhead loops.
4118 if (ID
== Intrinsic::loop_decrement_reg
) {
4119 SDValue Elements
= Int
.getOperand(2);
4120 SDValue Size
= CurDAG
->getTargetConstant(
4121 cast
<ConstantSDNode
>(Int
.getOperand(3))->getZExtValue(), dl
,
4124 SDValue Args
[] = { Elements
, Size
, Int
.getOperand(0) };
4126 CurDAG
->getMachineNode(ARM::t2LoopDec
, dl
,
4127 CurDAG
->getVTList(MVT::i32
, MVT::Other
),
4129 ReplaceUses(Int
.getNode(), LoopDec
);
4131 SDValue EndArgs
[] = { SDValue(LoopDec
, 0), N1
, Chain
};
4133 CurDAG
->getMachineNode(ARM::t2LoopEnd
, dl
, MVT::Other
, EndArgs
);
4135 ReplaceUses(N
, LoopEnd
);
4136 CurDAG
->RemoveDeadNode(N
);
4137 CurDAG
->RemoveDeadNode(InFlag
.getNode());
4138 CurDAG
->RemoveDeadNode(Int
.getNode());
4143 bool SwitchEQNEToPLMI
;
4144 SelectCMPZ(InFlag
.getNode(), SwitchEQNEToPLMI
);
4145 InFlag
= N
->getOperand(4);
4147 if (SwitchEQNEToPLMI
) {
4148 switch ((ARMCC::CondCodes
)CC
) {
4149 default: llvm_unreachable("CMPZ must be either NE or EQ!");
4151 CC
= (unsigned)ARMCC::MI
;
4154 CC
= (unsigned)ARMCC::PL
;
4160 SDValue Tmp2
= CurDAG
->getTargetConstant(CC
, dl
, MVT::i32
);
4161 SDValue Ops
[] = { N1
, Tmp2
, N3
, Chain
, InFlag
};
4162 SDNode
*ResNode
= CurDAG
->getMachineNode(Opc
, dl
, MVT::Other
,
4164 Chain
= SDValue(ResNode
, 0);
4165 if (N
->getNumValues() == 2) {
4166 InFlag
= SDValue(ResNode
, 1);
4167 ReplaceUses(SDValue(N
, 1), InFlag
);
4169 ReplaceUses(SDValue(N
, 0),
4170 SDValue(Chain
.getNode(), Chain
.getResNo()));
4171 CurDAG
->RemoveDeadNode(N
);
4175 case ARMISD::CMPZ
: {
4176 // select (CMPZ X, #-C) -> (CMPZ (ADDS X, #C), #0)
4177 // This allows us to avoid materializing the expensive negative constant.
4178 // The CMPZ #0 is useless and will be peepholed away but we need to keep it
4179 // for its glue output.
4180 SDValue X
= N
->getOperand(0);
4181 auto *C
= dyn_cast
<ConstantSDNode
>(N
->getOperand(1).getNode());
4182 if (C
&& C
->getSExtValue() < 0 && Subtarget
->isThumb()) {
4183 int64_t Addend
= -C
->getSExtValue();
4185 SDNode
*Add
= nullptr;
4186 // ADDS can be better than CMN if the immediate fits in a
4187 // 16-bit ADDS, which means either [0,256) for tADDi8 or [0,8) for tADDi3.
4188 // Outside that range we can just use a CMN which is 32-bit but has a
4189 // 12-bit immediate range.
4190 if (Addend
< 1<<8) {
4191 if (Subtarget
->isThumb2()) {
4192 SDValue Ops
[] = { X
, CurDAG
->getTargetConstant(Addend
, dl
, MVT::i32
),
4193 getAL(CurDAG
, dl
), CurDAG
->getRegister(0, MVT::i32
),
4194 CurDAG
->getRegister(0, MVT::i32
) };
4195 Add
= CurDAG
->getMachineNode(ARM::t2ADDri
, dl
, MVT::i32
, Ops
);
4197 unsigned Opc
= (Addend
< 1<<3) ? ARM::tADDi3
: ARM::tADDi8
;
4198 SDValue Ops
[] = {CurDAG
->getRegister(ARM::CPSR
, MVT::i32
), X
,
4199 CurDAG
->getTargetConstant(Addend
, dl
, MVT::i32
),
4200 getAL(CurDAG
, dl
), CurDAG
->getRegister(0, MVT::i32
)};
4201 Add
= CurDAG
->getMachineNode(Opc
, dl
, MVT::i32
, Ops
);
4205 SDValue Ops2
[] = {SDValue(Add
, 0), CurDAG
->getConstant(0, dl
, MVT::i32
)};
4206 CurDAG
->MorphNodeTo(N
, ARMISD::CMPZ
, CurDAG
->getVTList(MVT::Glue
), Ops2
);
4209 // Other cases are autogenerated.
4213 case ARMISD::CMOV
: {
4214 SDValue InFlag
= N
->getOperand(4);
4216 if (InFlag
.getOpcode() == ARMISD::CMPZ
) {
4217 bool SwitchEQNEToPLMI
;
4218 SelectCMPZ(InFlag
.getNode(), SwitchEQNEToPLMI
);
4220 if (SwitchEQNEToPLMI
) {
4221 SDValue ARMcc
= N
->getOperand(2);
4222 ARMCC::CondCodes CC
=
4223 (ARMCC::CondCodes
)cast
<ConstantSDNode
>(ARMcc
)->getZExtValue();
4226 default: llvm_unreachable("CMPZ must be either NE or EQ!");
4234 SDValue NewARMcc
= CurDAG
->getConstant((unsigned)CC
, dl
, MVT::i32
);
4235 SDValue Ops
[] = {N
->getOperand(0), N
->getOperand(1), NewARMcc
,
4236 N
->getOperand(3), N
->getOperand(4)};
4237 CurDAG
->MorphNodeTo(N
, ARMISD::CMOV
, N
->getVTList(), Ops
);
4241 // Other cases are autogenerated.
4245 case ARMISD::VZIP
: {
4247 EVT VT
= N
->getValueType(0);
4248 switch (VT
.getSimpleVT().SimpleTy
) {
4250 case MVT::v8i8
: Opc
= ARM::VZIPd8
; break;
4252 case MVT::v4i16
: Opc
= ARM::VZIPd16
; break;
4254 // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
4255 case MVT::v2i32
: Opc
= ARM::VTRNd32
; break;
4256 case MVT::v16i8
: Opc
= ARM::VZIPq8
; break;
4258 case MVT::v8i16
: Opc
= ARM::VZIPq16
; break;
4260 case MVT::v4i32
: Opc
= ARM::VZIPq32
; break;
4262 SDValue Pred
= getAL(CurDAG
, dl
);
4263 SDValue PredReg
= CurDAG
->getRegister(0, MVT::i32
);
4264 SDValue Ops
[] = { N
->getOperand(0), N
->getOperand(1), Pred
, PredReg
};
4265 ReplaceNode(N
, CurDAG
->getMachineNode(Opc
, dl
, VT
, VT
, Ops
));
4268 case ARMISD::VUZP
: {
4270 EVT VT
= N
->getValueType(0);
4271 switch (VT
.getSimpleVT().SimpleTy
) {
4273 case MVT::v8i8
: Opc
= ARM::VUZPd8
; break;
4275 case MVT::v4i16
: Opc
= ARM::VUZPd16
; break;
4277 // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
4278 case MVT::v2i32
: Opc
= ARM::VTRNd32
; break;
4279 case MVT::v16i8
: Opc
= ARM::VUZPq8
; break;
4281 case MVT::v8i16
: Opc
= ARM::VUZPq16
; break;
4283 case MVT::v4i32
: Opc
= ARM::VUZPq32
; break;
4285 SDValue Pred
= getAL(CurDAG
, dl
);
4286 SDValue PredReg
= CurDAG
->getRegister(0, MVT::i32
);
4287 SDValue Ops
[] = { N
->getOperand(0), N
->getOperand(1), Pred
, PredReg
};
4288 ReplaceNode(N
, CurDAG
->getMachineNode(Opc
, dl
, VT
, VT
, Ops
));
4291 case ARMISD::VTRN
: {
4293 EVT VT
= N
->getValueType(0);
4294 switch (VT
.getSimpleVT().SimpleTy
) {
4296 case MVT::v8i8
: Opc
= ARM::VTRNd8
; break;
4298 case MVT::v4i16
: Opc
= ARM::VTRNd16
; break;
4300 case MVT::v2i32
: Opc
= ARM::VTRNd32
; break;
4301 case MVT::v16i8
: Opc
= ARM::VTRNq8
; break;
4303 case MVT::v8i16
: Opc
= ARM::VTRNq16
; break;
4305 case MVT::v4i32
: Opc
= ARM::VTRNq32
; break;
4307 SDValue Pred
= getAL(CurDAG
, dl
);
4308 SDValue PredReg
= CurDAG
->getRegister(0, MVT::i32
);
4309 SDValue Ops
[] = { N
->getOperand(0), N
->getOperand(1), Pred
, PredReg
};
4310 ReplaceNode(N
, CurDAG
->getMachineNode(Opc
, dl
, VT
, VT
, Ops
));
4313 case ARMISD::BUILD_VECTOR
: {
4314 EVT VecVT
= N
->getValueType(0);
4315 EVT EltVT
= VecVT
.getVectorElementType();
4316 unsigned NumElts
= VecVT
.getVectorNumElements();
4317 if (EltVT
== MVT::f64
) {
4318 assert(NumElts
== 2 && "unexpected type for BUILD_VECTOR");
4320 N
, createDRegPairNode(VecVT
, N
->getOperand(0), N
->getOperand(1)));
4323 assert(EltVT
== MVT::f32
&& "unexpected type for BUILD_VECTOR");
4326 N
, createSRegPairNode(VecVT
, N
->getOperand(0), N
->getOperand(1)));
4329 assert(NumElts
== 4 && "unexpected type for BUILD_VECTOR");
4331 createQuadSRegsNode(VecVT
, N
->getOperand(0), N
->getOperand(1),
4332 N
->getOperand(2), N
->getOperand(3)));
4336 case ARMISD::VLD1DUP
: {
4337 static const uint16_t DOpcodes
[] = { ARM::VLD1DUPd8
, ARM::VLD1DUPd16
,
4339 static const uint16_t QOpcodes
[] = { ARM::VLD1DUPq8
, ARM::VLD1DUPq16
,
4341 SelectVLDDup(N
, /* IsIntrinsic= */ false, false, 1, DOpcodes
, QOpcodes
);
4345 case ARMISD::VLD2DUP
: {
4346 static const uint16_t Opcodes
[] = { ARM::VLD2DUPd8
, ARM::VLD2DUPd16
,
4348 SelectVLDDup(N
, /* IsIntrinsic= */ false, false, 2, Opcodes
);
4352 case ARMISD::VLD3DUP
: {
4353 static const uint16_t Opcodes
[] = { ARM::VLD3DUPd8Pseudo
,
4354 ARM::VLD3DUPd16Pseudo
,
4355 ARM::VLD3DUPd32Pseudo
};
4356 SelectVLDDup(N
, /* IsIntrinsic= */ false, false, 3, Opcodes
);
4360 case ARMISD::VLD4DUP
: {
4361 static const uint16_t Opcodes
[] = { ARM::VLD4DUPd8Pseudo
,
4362 ARM::VLD4DUPd16Pseudo
,
4363 ARM::VLD4DUPd32Pseudo
};
4364 SelectVLDDup(N
, /* IsIntrinsic= */ false, false, 4, Opcodes
);
4368 case ARMISD::VLD1DUP_UPD
: {
4369 static const uint16_t DOpcodes
[] = { ARM::VLD1DUPd8wb_fixed
,
4370 ARM::VLD1DUPd16wb_fixed
,
4371 ARM::VLD1DUPd32wb_fixed
};
4372 static const uint16_t QOpcodes
[] = { ARM::VLD1DUPq8wb_fixed
,
4373 ARM::VLD1DUPq16wb_fixed
,
4374 ARM::VLD1DUPq32wb_fixed
};
4375 SelectVLDDup(N
, /* IsIntrinsic= */ false, true, 1, DOpcodes
, QOpcodes
);
4379 case ARMISD::VLD2DUP_UPD
: {
4380 static const uint16_t DOpcodes
[] = { ARM::VLD2DUPd8wb_fixed
,
4381 ARM::VLD2DUPd16wb_fixed
,
4382 ARM::VLD2DUPd32wb_fixed
,
4383 ARM::VLD1q64wb_fixed
};
4384 static const uint16_t QOpcodes0
[] = { ARM::VLD2DUPq8EvenPseudo
,
4385 ARM::VLD2DUPq16EvenPseudo
,
4386 ARM::VLD2DUPq32EvenPseudo
};
4387 static const uint16_t QOpcodes1
[] = { ARM::VLD2DUPq8OddPseudoWB_fixed
,
4388 ARM::VLD2DUPq16OddPseudoWB_fixed
,
4389 ARM::VLD2DUPq32OddPseudoWB_fixed
};
4390 SelectVLDDup(N
, /* IsIntrinsic= */ false, true, 2, DOpcodes
, QOpcodes0
, QOpcodes1
);
4394 case ARMISD::VLD3DUP_UPD
: {
4395 static const uint16_t DOpcodes
[] = { ARM::VLD3DUPd8Pseudo_UPD
,
4396 ARM::VLD3DUPd16Pseudo_UPD
,
4397 ARM::VLD3DUPd32Pseudo_UPD
,
4398 ARM::VLD1d64TPseudoWB_fixed
};
4399 static const uint16_t QOpcodes0
[] = { ARM::VLD3DUPq8EvenPseudo
,
4400 ARM::VLD3DUPq16EvenPseudo
,
4401 ARM::VLD3DUPq32EvenPseudo
};
4402 static const uint16_t QOpcodes1
[] = { ARM::VLD3DUPq8OddPseudo_UPD
,
4403 ARM::VLD3DUPq16OddPseudo_UPD
,
4404 ARM::VLD3DUPq32OddPseudo_UPD
};
4405 SelectVLDDup(N
, /* IsIntrinsic= */ false, true, 3, DOpcodes
, QOpcodes0
, QOpcodes1
);
4409 case ARMISD::VLD4DUP_UPD
: {
4410 static const uint16_t DOpcodes
[] = { ARM::VLD4DUPd8Pseudo_UPD
,
4411 ARM::VLD4DUPd16Pseudo_UPD
,
4412 ARM::VLD4DUPd32Pseudo_UPD
,
4413 ARM::VLD1d64QPseudoWB_fixed
};
4414 static const uint16_t QOpcodes0
[] = { ARM::VLD4DUPq8EvenPseudo
,
4415 ARM::VLD4DUPq16EvenPseudo
,
4416 ARM::VLD4DUPq32EvenPseudo
};
4417 static const uint16_t QOpcodes1
[] = { ARM::VLD4DUPq8OddPseudo_UPD
,
4418 ARM::VLD4DUPq16OddPseudo_UPD
,
4419 ARM::VLD4DUPq32OddPseudo_UPD
};
4420 SelectVLDDup(N
, /* IsIntrinsic= */ false, true, 4, DOpcodes
, QOpcodes0
, QOpcodes1
);
4424 case ARMISD::VLD1_UPD
: {
4425 static const uint16_t DOpcodes
[] = { ARM::VLD1d8wb_fixed
,
4426 ARM::VLD1d16wb_fixed
,
4427 ARM::VLD1d32wb_fixed
,
4428 ARM::VLD1d64wb_fixed
};
4429 static const uint16_t QOpcodes
[] = { ARM::VLD1q8wb_fixed
,
4430 ARM::VLD1q16wb_fixed
,
4431 ARM::VLD1q32wb_fixed
,
4432 ARM::VLD1q64wb_fixed
};
4433 SelectVLD(N
, true, 1, DOpcodes
, QOpcodes
, nullptr);
4437 case ARMISD::VLD2_UPD
: {
4438 if (Subtarget
->hasNEON()) {
4439 static const uint16_t DOpcodes
[] = {
4440 ARM::VLD2d8wb_fixed
, ARM::VLD2d16wb_fixed
, ARM::VLD2d32wb_fixed
,
4441 ARM::VLD1q64wb_fixed
};
4442 static const uint16_t QOpcodes
[] = {ARM::VLD2q8PseudoWB_fixed
,
4443 ARM::VLD2q16PseudoWB_fixed
,
4444 ARM::VLD2q32PseudoWB_fixed
};
4445 SelectVLD(N
, true, 2, DOpcodes
, QOpcodes
, nullptr);
4447 static const uint16_t Opcodes8
[] = {ARM::MVE_VLD20_8
,
4448 ARM::MVE_VLD21_8_wb
};
4449 static const uint16_t Opcodes16
[] = {ARM::MVE_VLD20_16
,
4450 ARM::MVE_VLD21_16_wb
};
4451 static const uint16_t Opcodes32
[] = {ARM::MVE_VLD20_32
,
4452 ARM::MVE_VLD21_32_wb
};
4453 static const uint16_t *const Opcodes
[] = {Opcodes8
, Opcodes16
, Opcodes32
};
4454 SelectMVE_VLD(N
, 2, Opcodes
, true);
4459 case ARMISD::VLD3_UPD
: {
4460 static const uint16_t DOpcodes
[] = { ARM::VLD3d8Pseudo_UPD
,
4461 ARM::VLD3d16Pseudo_UPD
,
4462 ARM::VLD3d32Pseudo_UPD
,
4463 ARM::VLD1d64TPseudoWB_fixed
};
4464 static const uint16_t QOpcodes0
[] = { ARM::VLD3q8Pseudo_UPD
,
4465 ARM::VLD3q16Pseudo_UPD
,
4466 ARM::VLD3q32Pseudo_UPD
};
4467 static const uint16_t QOpcodes1
[] = { ARM::VLD3q8oddPseudo_UPD
,
4468 ARM::VLD3q16oddPseudo_UPD
,
4469 ARM::VLD3q32oddPseudo_UPD
};
4470 SelectVLD(N
, true, 3, DOpcodes
, QOpcodes0
, QOpcodes1
);
4474 case ARMISD::VLD4_UPD
: {
4475 if (Subtarget
->hasNEON()) {
4476 static const uint16_t DOpcodes
[] = {
4477 ARM::VLD4d8Pseudo_UPD
, ARM::VLD4d16Pseudo_UPD
, ARM::VLD4d32Pseudo_UPD
,
4478 ARM::VLD1d64QPseudoWB_fixed
};
4479 static const uint16_t QOpcodes0
[] = {ARM::VLD4q8Pseudo_UPD
,
4480 ARM::VLD4q16Pseudo_UPD
,
4481 ARM::VLD4q32Pseudo_UPD
};
4482 static const uint16_t QOpcodes1
[] = {ARM::VLD4q8oddPseudo_UPD
,
4483 ARM::VLD4q16oddPseudo_UPD
,
4484 ARM::VLD4q32oddPseudo_UPD
};
4485 SelectVLD(N
, true, 4, DOpcodes
, QOpcodes0
, QOpcodes1
);
4487 static const uint16_t Opcodes8
[] = {ARM::MVE_VLD40_8
, ARM::MVE_VLD41_8
,
4489 ARM::MVE_VLD43_8_wb
};
4490 static const uint16_t Opcodes16
[] = {ARM::MVE_VLD40_16
, ARM::MVE_VLD41_16
,
4492 ARM::MVE_VLD43_16_wb
};
4493 static const uint16_t Opcodes32
[] = {ARM::MVE_VLD40_32
, ARM::MVE_VLD41_32
,
4495 ARM::MVE_VLD43_32_wb
};
4496 static const uint16_t *const Opcodes
[] = {Opcodes8
, Opcodes16
, Opcodes32
};
4497 SelectMVE_VLD(N
, 4, Opcodes
, true);
4502 case ARMISD::VLD1x2_UPD
: {
4503 if (Subtarget
->hasNEON()) {
4504 static const uint16_t DOpcodes
[] = {
4505 ARM::VLD1q8wb_fixed
, ARM::VLD1q16wb_fixed
, ARM::VLD1q32wb_fixed
,
4506 ARM::VLD1q64wb_fixed
};
4507 static const uint16_t QOpcodes
[] = {
4508 ARM::VLD1d8QPseudoWB_fixed
, ARM::VLD1d16QPseudoWB_fixed
,
4509 ARM::VLD1d32QPseudoWB_fixed
, ARM::VLD1d64QPseudoWB_fixed
};
4510 SelectVLD(N
, true, 2, DOpcodes
, QOpcodes
, nullptr);
4516 case ARMISD::VLD1x3_UPD
: {
4517 if (Subtarget
->hasNEON()) {
4518 static const uint16_t DOpcodes
[] = {
4519 ARM::VLD1d8TPseudoWB_fixed
, ARM::VLD1d16TPseudoWB_fixed
,
4520 ARM::VLD1d32TPseudoWB_fixed
, ARM::VLD1d64TPseudoWB_fixed
};
4521 static const uint16_t QOpcodes0
[] = {
4522 ARM::VLD1q8LowTPseudo_UPD
, ARM::VLD1q16LowTPseudo_UPD
,
4523 ARM::VLD1q32LowTPseudo_UPD
, ARM::VLD1q64LowTPseudo_UPD
};
4524 static const uint16_t QOpcodes1
[] = {
4525 ARM::VLD1q8HighTPseudo_UPD
, ARM::VLD1q16HighTPseudo_UPD
,
4526 ARM::VLD1q32HighTPseudo_UPD
, ARM::VLD1q64HighTPseudo_UPD
};
4527 SelectVLD(N
, true, 3, DOpcodes
, QOpcodes0
, QOpcodes1
);
4533 case ARMISD::VLD1x4_UPD
: {
4534 if (Subtarget
->hasNEON()) {
4535 static const uint16_t DOpcodes
[] = {
4536 ARM::VLD1d8QPseudoWB_fixed
, ARM::VLD1d16QPseudoWB_fixed
,
4537 ARM::VLD1d32QPseudoWB_fixed
, ARM::VLD1d64QPseudoWB_fixed
};
4538 static const uint16_t QOpcodes0
[] = {
4539 ARM::VLD1q8LowQPseudo_UPD
, ARM::VLD1q16LowQPseudo_UPD
,
4540 ARM::VLD1q32LowQPseudo_UPD
, ARM::VLD1q64LowQPseudo_UPD
};
4541 static const uint16_t QOpcodes1
[] = {
4542 ARM::VLD1q8HighQPseudo_UPD
, ARM::VLD1q16HighQPseudo_UPD
,
4543 ARM::VLD1q32HighQPseudo_UPD
, ARM::VLD1q64HighQPseudo_UPD
};
4544 SelectVLD(N
, true, 4, DOpcodes
, QOpcodes0
, QOpcodes1
);
4550 case ARMISD::VLD2LN_UPD
: {
4551 static const uint16_t DOpcodes
[] = { ARM::VLD2LNd8Pseudo_UPD
,
4552 ARM::VLD2LNd16Pseudo_UPD
,
4553 ARM::VLD2LNd32Pseudo_UPD
};
4554 static const uint16_t QOpcodes
[] = { ARM::VLD2LNq16Pseudo_UPD
,
4555 ARM::VLD2LNq32Pseudo_UPD
};
4556 SelectVLDSTLane(N
, true, true, 2, DOpcodes
, QOpcodes
);
4560 case ARMISD::VLD3LN_UPD
: {
4561 static const uint16_t DOpcodes
[] = { ARM::VLD3LNd8Pseudo_UPD
,
4562 ARM::VLD3LNd16Pseudo_UPD
,
4563 ARM::VLD3LNd32Pseudo_UPD
};
4564 static const uint16_t QOpcodes
[] = { ARM::VLD3LNq16Pseudo_UPD
,
4565 ARM::VLD3LNq32Pseudo_UPD
};
4566 SelectVLDSTLane(N
, true, true, 3, DOpcodes
, QOpcodes
);
4570 case ARMISD::VLD4LN_UPD
: {
4571 static const uint16_t DOpcodes
[] = { ARM::VLD4LNd8Pseudo_UPD
,
4572 ARM::VLD4LNd16Pseudo_UPD
,
4573 ARM::VLD4LNd32Pseudo_UPD
};
4574 static const uint16_t QOpcodes
[] = { ARM::VLD4LNq16Pseudo_UPD
,
4575 ARM::VLD4LNq32Pseudo_UPD
};
4576 SelectVLDSTLane(N
, true, true, 4, DOpcodes
, QOpcodes
);
4580 case ARMISD::VST1_UPD
: {
4581 static const uint16_t DOpcodes
[] = { ARM::VST1d8wb_fixed
,
4582 ARM::VST1d16wb_fixed
,
4583 ARM::VST1d32wb_fixed
,
4584 ARM::VST1d64wb_fixed
};
4585 static const uint16_t QOpcodes
[] = { ARM::VST1q8wb_fixed
,
4586 ARM::VST1q16wb_fixed
,
4587 ARM::VST1q32wb_fixed
,
4588 ARM::VST1q64wb_fixed
};
4589 SelectVST(N
, true, 1, DOpcodes
, QOpcodes
, nullptr);
4593 case ARMISD::VST2_UPD
: {
4594 if (Subtarget
->hasNEON()) {
4595 static const uint16_t DOpcodes
[] = {
4596 ARM::VST2d8wb_fixed
, ARM::VST2d16wb_fixed
, ARM::VST2d32wb_fixed
,
4597 ARM::VST1q64wb_fixed
};
4598 static const uint16_t QOpcodes
[] = {ARM::VST2q8PseudoWB_fixed
,
4599 ARM::VST2q16PseudoWB_fixed
,
4600 ARM::VST2q32PseudoWB_fixed
};
4601 SelectVST(N
, true, 2, DOpcodes
, QOpcodes
, nullptr);
4607 case ARMISD::VST3_UPD
: {
4608 static const uint16_t DOpcodes
[] = { ARM::VST3d8Pseudo_UPD
,
4609 ARM::VST3d16Pseudo_UPD
,
4610 ARM::VST3d32Pseudo_UPD
,
4611 ARM::VST1d64TPseudoWB_fixed
};
4612 static const uint16_t QOpcodes0
[] = { ARM::VST3q8Pseudo_UPD
,
4613 ARM::VST3q16Pseudo_UPD
,
4614 ARM::VST3q32Pseudo_UPD
};
4615 static const uint16_t QOpcodes1
[] = { ARM::VST3q8oddPseudo_UPD
,
4616 ARM::VST3q16oddPseudo_UPD
,
4617 ARM::VST3q32oddPseudo_UPD
};
4618 SelectVST(N
, true, 3, DOpcodes
, QOpcodes0
, QOpcodes1
);
4622 case ARMISD::VST4_UPD
: {
4623 if (Subtarget
->hasNEON()) {
4624 static const uint16_t DOpcodes
[] = {
4625 ARM::VST4d8Pseudo_UPD
, ARM::VST4d16Pseudo_UPD
, ARM::VST4d32Pseudo_UPD
,
4626 ARM::VST1d64QPseudoWB_fixed
};
4627 static const uint16_t QOpcodes0
[] = {ARM::VST4q8Pseudo_UPD
,
4628 ARM::VST4q16Pseudo_UPD
,
4629 ARM::VST4q32Pseudo_UPD
};
4630 static const uint16_t QOpcodes1
[] = {ARM::VST4q8oddPseudo_UPD
,
4631 ARM::VST4q16oddPseudo_UPD
,
4632 ARM::VST4q32oddPseudo_UPD
};
4633 SelectVST(N
, true, 4, DOpcodes
, QOpcodes0
, QOpcodes1
);
4639 case ARMISD::VST1x2_UPD
: {
4640 if (Subtarget
->hasNEON()) {
4641 static const uint16_t DOpcodes
[] = { ARM::VST1q8wb_fixed
,
4642 ARM::VST1q16wb_fixed
,
4643 ARM::VST1q32wb_fixed
,
4644 ARM::VST1q64wb_fixed
};
4645 static const uint16_t QOpcodes
[] = { ARM::VST1d8QPseudoWB_fixed
,
4646 ARM::VST1d16QPseudoWB_fixed
,
4647 ARM::VST1d32QPseudoWB_fixed
,
4648 ARM::VST1d64QPseudoWB_fixed
};
4649 SelectVST(N
, true, 2, DOpcodes
, QOpcodes
, nullptr);
4655 case ARMISD::VST1x3_UPD
: {
4656 if (Subtarget
->hasNEON()) {
4657 static const uint16_t DOpcodes
[] = { ARM::VST1d8TPseudoWB_fixed
,
4658 ARM::VST1d16TPseudoWB_fixed
,
4659 ARM::VST1d32TPseudoWB_fixed
,
4660 ARM::VST1d64TPseudoWB_fixed
};
4661 static const uint16_t QOpcodes0
[] = { ARM::VST1q8LowTPseudo_UPD
,
4662 ARM::VST1q16LowTPseudo_UPD
,
4663 ARM::VST1q32LowTPseudo_UPD
,
4664 ARM::VST1q64LowTPseudo_UPD
};
4665 static const uint16_t QOpcodes1
[] = { ARM::VST1q8HighTPseudo_UPD
,
4666 ARM::VST1q16HighTPseudo_UPD
,
4667 ARM::VST1q32HighTPseudo_UPD
,
4668 ARM::VST1q64HighTPseudo_UPD
};
4669 SelectVST(N
, true, 3, DOpcodes
, QOpcodes0
, QOpcodes1
);
4675 case ARMISD::VST1x4_UPD
: {
4676 if (Subtarget
->hasNEON()) {
4677 static const uint16_t DOpcodes
[] = { ARM::VST1d8QPseudoWB_fixed
,
4678 ARM::VST1d16QPseudoWB_fixed
,
4679 ARM::VST1d32QPseudoWB_fixed
,
4680 ARM::VST1d64QPseudoWB_fixed
};
4681 static const uint16_t QOpcodes0
[] = { ARM::VST1q8LowQPseudo_UPD
,
4682 ARM::VST1q16LowQPseudo_UPD
,
4683 ARM::VST1q32LowQPseudo_UPD
,
4684 ARM::VST1q64LowQPseudo_UPD
};
4685 static const uint16_t QOpcodes1
[] = { ARM::VST1q8HighQPseudo_UPD
,
4686 ARM::VST1q16HighQPseudo_UPD
,
4687 ARM::VST1q32HighQPseudo_UPD
,
4688 ARM::VST1q64HighQPseudo_UPD
};
4689 SelectVST(N
, true, 4, DOpcodes
, QOpcodes0
, QOpcodes1
);
4694 case ARMISD::VST2LN_UPD
: {
4695 static const uint16_t DOpcodes
[] = { ARM::VST2LNd8Pseudo_UPD
,
4696 ARM::VST2LNd16Pseudo_UPD
,
4697 ARM::VST2LNd32Pseudo_UPD
};
4698 static const uint16_t QOpcodes
[] = { ARM::VST2LNq16Pseudo_UPD
,
4699 ARM::VST2LNq32Pseudo_UPD
};
4700 SelectVLDSTLane(N
, false, true, 2, DOpcodes
, QOpcodes
);
4704 case ARMISD::VST3LN_UPD
: {
4705 static const uint16_t DOpcodes
[] = { ARM::VST3LNd8Pseudo_UPD
,
4706 ARM::VST3LNd16Pseudo_UPD
,
4707 ARM::VST3LNd32Pseudo_UPD
};
4708 static const uint16_t QOpcodes
[] = { ARM::VST3LNq16Pseudo_UPD
,
4709 ARM::VST3LNq32Pseudo_UPD
};
4710 SelectVLDSTLane(N
, false, true, 3, DOpcodes
, QOpcodes
);
4714 case ARMISD::VST4LN_UPD
: {
4715 static const uint16_t DOpcodes
[] = { ARM::VST4LNd8Pseudo_UPD
,
4716 ARM::VST4LNd16Pseudo_UPD
,
4717 ARM::VST4LNd32Pseudo_UPD
};
4718 static const uint16_t QOpcodes
[] = { ARM::VST4LNq16Pseudo_UPD
,
4719 ARM::VST4LNq32Pseudo_UPD
};
4720 SelectVLDSTLane(N
, false, true, 4, DOpcodes
, QOpcodes
);
4724 case ISD::INTRINSIC_VOID
:
4725 case ISD::INTRINSIC_W_CHAIN
: {
4726 unsigned IntNo
= cast
<ConstantSDNode
>(N
->getOperand(1))->getZExtValue();
4731 case Intrinsic::arm_mrrc
:
4732 case Intrinsic::arm_mrrc2
: {
4734 SDValue Chain
= N
->getOperand(0);
4737 if (Subtarget
->isThumb())
4738 Opc
= (IntNo
== Intrinsic::arm_mrrc
? ARM::t2MRRC
: ARM::t2MRRC2
);
4740 Opc
= (IntNo
== Intrinsic::arm_mrrc
? ARM::MRRC
: ARM::MRRC2
);
4742 SmallVector
<SDValue
, 5> Ops
;
4743 Ops
.push_back(getI32Imm(cast
<ConstantSDNode
>(N
->getOperand(2))->getZExtValue(), dl
)); /* coproc */
4744 Ops
.push_back(getI32Imm(cast
<ConstantSDNode
>(N
->getOperand(3))->getZExtValue(), dl
)); /* opc */
4745 Ops
.push_back(getI32Imm(cast
<ConstantSDNode
>(N
->getOperand(4))->getZExtValue(), dl
)); /* CRm */
4747 // The mrrc2 instruction in ARM doesn't allow predicates, the top 4 bits of the encoded
4748 // instruction will always be '1111' but it is possible in assembly language to specify
4749 // AL as a predicate to mrrc2 but it doesn't make any difference to the encoded instruction.
4750 if (Opc
!= ARM::MRRC2
) {
4751 Ops
.push_back(getAL(CurDAG
, dl
));
4752 Ops
.push_back(CurDAG
->getRegister(0, MVT::i32
));
4755 Ops
.push_back(Chain
);
4757 // Writes to two registers.
4758 const EVT RetType
[] = {MVT::i32
, MVT::i32
, MVT::Other
};
4760 ReplaceNode(N
, CurDAG
->getMachineNode(Opc
, dl
, RetType
, Ops
));
4763 case Intrinsic::arm_ldaexd
:
4764 case Intrinsic::arm_ldrexd
: {
4766 SDValue Chain
= N
->getOperand(0);
4767 SDValue MemAddr
= N
->getOperand(2);
4768 bool isThumb
= Subtarget
->isThumb() && Subtarget
->hasV8MBaselineOps();
4770 bool IsAcquire
= IntNo
== Intrinsic::arm_ldaexd
;
4771 unsigned NewOpc
= isThumb
? (IsAcquire
? ARM::t2LDAEXD
: ARM::t2LDREXD
)
4772 : (IsAcquire
? ARM::LDAEXD
: ARM::LDREXD
);
4774 // arm_ldrexd returns a i64 value in {i32, i32}
4775 std::vector
<EVT
> ResTys
;
4777 ResTys
.push_back(MVT::i32
);
4778 ResTys
.push_back(MVT::i32
);
4780 ResTys
.push_back(MVT::Untyped
);
4781 ResTys
.push_back(MVT::Other
);
4783 // Place arguments in the right order.
4784 SDValue Ops
[] = {MemAddr
, getAL(CurDAG
, dl
),
4785 CurDAG
->getRegister(0, MVT::i32
), Chain
};
4786 SDNode
*Ld
= CurDAG
->getMachineNode(NewOpc
, dl
, ResTys
, Ops
);
4787 // Transfer memoperands.
4788 MachineMemOperand
*MemOp
= cast
<MemIntrinsicSDNode
>(N
)->getMemOperand();
4789 CurDAG
->setNodeMemRefs(cast
<MachineSDNode
>(Ld
), {MemOp
});
4792 SDValue OutChain
= isThumb
? SDValue(Ld
, 2) : SDValue(Ld
, 1);
4793 if (!SDValue(N
, 0).use_empty()) {
4796 Result
= SDValue(Ld
, 0);
4799 CurDAG
->getTargetConstant(ARM::gsub_0
, dl
, MVT::i32
);
4800 SDNode
*ResNode
= CurDAG
->getMachineNode(TargetOpcode::EXTRACT_SUBREG
,
4801 dl
, MVT::i32
, SDValue(Ld
, 0), SubRegIdx
);
4802 Result
= SDValue(ResNode
,0);
4804 ReplaceUses(SDValue(N
, 0), Result
);
4806 if (!SDValue(N
, 1).use_empty()) {
4809 Result
= SDValue(Ld
, 1);
4812 CurDAG
->getTargetConstant(ARM::gsub_1
, dl
, MVT::i32
);
4813 SDNode
*ResNode
= CurDAG
->getMachineNode(TargetOpcode::EXTRACT_SUBREG
,
4814 dl
, MVT::i32
, SDValue(Ld
, 0), SubRegIdx
);
4815 Result
= SDValue(ResNode
,0);
4817 ReplaceUses(SDValue(N
, 1), Result
);
4819 ReplaceUses(SDValue(N
, 2), OutChain
);
4820 CurDAG
->RemoveDeadNode(N
);
4823 case Intrinsic::arm_stlexd
:
4824 case Intrinsic::arm_strexd
: {
4826 SDValue Chain
= N
->getOperand(0);
4827 SDValue Val0
= N
->getOperand(2);
4828 SDValue Val1
= N
->getOperand(3);
4829 SDValue MemAddr
= N
->getOperand(4);
4831 // Store exclusive double return a i32 value which is the return status
4832 // of the issued store.
4833 const EVT ResTys
[] = {MVT::i32
, MVT::Other
};
4835 bool isThumb
= Subtarget
->isThumb() && Subtarget
->hasThumb2();
4836 // Place arguments in the right order.
4837 SmallVector
<SDValue
, 7> Ops
;
4839 Ops
.push_back(Val0
);
4840 Ops
.push_back(Val1
);
4842 // arm_strexd uses GPRPair.
4843 Ops
.push_back(SDValue(createGPRPairNode(MVT::Untyped
, Val0
, Val1
), 0));
4844 Ops
.push_back(MemAddr
);
4845 Ops
.push_back(getAL(CurDAG
, dl
));
4846 Ops
.push_back(CurDAG
->getRegister(0, MVT::i32
));
4847 Ops
.push_back(Chain
);
4849 bool IsRelease
= IntNo
== Intrinsic::arm_stlexd
;
4850 unsigned NewOpc
= isThumb
? (IsRelease
? ARM::t2STLEXD
: ARM::t2STREXD
)
4851 : (IsRelease
? ARM::STLEXD
: ARM::STREXD
);
4853 SDNode
*St
= CurDAG
->getMachineNode(NewOpc
, dl
, ResTys
, Ops
);
4854 // Transfer memoperands.
4855 MachineMemOperand
*MemOp
= cast
<MemIntrinsicSDNode
>(N
)->getMemOperand();
4856 CurDAG
->setNodeMemRefs(cast
<MachineSDNode
>(St
), {MemOp
});
4862 case Intrinsic::arm_neon_vld1
: {
4863 static const uint16_t DOpcodes
[] = { ARM::VLD1d8
, ARM::VLD1d16
,
4864 ARM::VLD1d32
, ARM::VLD1d64
};
4865 static const uint16_t QOpcodes
[] = { ARM::VLD1q8
, ARM::VLD1q16
,
4866 ARM::VLD1q32
, ARM::VLD1q64
};
4867 SelectVLD(N
, false, 1, DOpcodes
, QOpcodes
, nullptr);
4871 case Intrinsic::arm_neon_vld1x2
: {
4872 static const uint16_t DOpcodes
[] = { ARM::VLD1q8
, ARM::VLD1q16
,
4873 ARM::VLD1q32
, ARM::VLD1q64
};
4874 static const uint16_t QOpcodes
[] = { ARM::VLD1d8QPseudo
,
4875 ARM::VLD1d16QPseudo
,
4876 ARM::VLD1d32QPseudo
,
4877 ARM::VLD1d64QPseudo
};
4878 SelectVLD(N
, false, 2, DOpcodes
, QOpcodes
, nullptr);
4882 case Intrinsic::arm_neon_vld1x3
: {
4883 static const uint16_t DOpcodes
[] = { ARM::VLD1d8TPseudo
,
4884 ARM::VLD1d16TPseudo
,
4885 ARM::VLD1d32TPseudo
,
4886 ARM::VLD1d64TPseudo
};
4887 static const uint16_t QOpcodes0
[] = { ARM::VLD1q8LowTPseudo_UPD
,
4888 ARM::VLD1q16LowTPseudo_UPD
,
4889 ARM::VLD1q32LowTPseudo_UPD
,
4890 ARM::VLD1q64LowTPseudo_UPD
};
4891 static const uint16_t QOpcodes1
[] = { ARM::VLD1q8HighTPseudo
,
4892 ARM::VLD1q16HighTPseudo
,
4893 ARM::VLD1q32HighTPseudo
,
4894 ARM::VLD1q64HighTPseudo
};
4895 SelectVLD(N
, false, 3, DOpcodes
, QOpcodes0
, QOpcodes1
);
4899 case Intrinsic::arm_neon_vld1x4
: {
4900 static const uint16_t DOpcodes
[] = { ARM::VLD1d8QPseudo
,
4901 ARM::VLD1d16QPseudo
,
4902 ARM::VLD1d32QPseudo
,
4903 ARM::VLD1d64QPseudo
};
4904 static const uint16_t QOpcodes0
[] = { ARM::VLD1q8LowQPseudo_UPD
,
4905 ARM::VLD1q16LowQPseudo_UPD
,
4906 ARM::VLD1q32LowQPseudo_UPD
,
4907 ARM::VLD1q64LowQPseudo_UPD
};
4908 static const uint16_t QOpcodes1
[] = { ARM::VLD1q8HighQPseudo
,
4909 ARM::VLD1q16HighQPseudo
,
4910 ARM::VLD1q32HighQPseudo
,
4911 ARM::VLD1q64HighQPseudo
};
4912 SelectVLD(N
, false, 4, DOpcodes
, QOpcodes0
, QOpcodes1
);
4916 case Intrinsic::arm_neon_vld2
: {
4917 static const uint16_t DOpcodes
[] = { ARM::VLD2d8
, ARM::VLD2d16
,
4918 ARM::VLD2d32
, ARM::VLD1q64
};
4919 static const uint16_t QOpcodes
[] = { ARM::VLD2q8Pseudo
, ARM::VLD2q16Pseudo
,
4920 ARM::VLD2q32Pseudo
};
4921 SelectVLD(N
, false, 2, DOpcodes
, QOpcodes
, nullptr);
4925 case Intrinsic::arm_neon_vld3
: {
4926 static const uint16_t DOpcodes
[] = { ARM::VLD3d8Pseudo
,
4929 ARM::VLD1d64TPseudo
};
4930 static const uint16_t QOpcodes0
[] = { ARM::VLD3q8Pseudo_UPD
,
4931 ARM::VLD3q16Pseudo_UPD
,
4932 ARM::VLD3q32Pseudo_UPD
};
4933 static const uint16_t QOpcodes1
[] = { ARM::VLD3q8oddPseudo
,
4934 ARM::VLD3q16oddPseudo
,
4935 ARM::VLD3q32oddPseudo
};
4936 SelectVLD(N
, false, 3, DOpcodes
, QOpcodes0
, QOpcodes1
);
4940 case Intrinsic::arm_neon_vld4
: {
4941 static const uint16_t DOpcodes
[] = { ARM::VLD4d8Pseudo
,
4944 ARM::VLD1d64QPseudo
};
4945 static const uint16_t QOpcodes0
[] = { ARM::VLD4q8Pseudo_UPD
,
4946 ARM::VLD4q16Pseudo_UPD
,
4947 ARM::VLD4q32Pseudo_UPD
};
4948 static const uint16_t QOpcodes1
[] = { ARM::VLD4q8oddPseudo
,
4949 ARM::VLD4q16oddPseudo
,
4950 ARM::VLD4q32oddPseudo
};
4951 SelectVLD(N
, false, 4, DOpcodes
, QOpcodes0
, QOpcodes1
);
4955 case Intrinsic::arm_neon_vld2dup
: {
4956 static const uint16_t DOpcodes
[] = { ARM::VLD2DUPd8
, ARM::VLD2DUPd16
,
4957 ARM::VLD2DUPd32
, ARM::VLD1q64
};
4958 static const uint16_t QOpcodes0
[] = { ARM::VLD2DUPq8EvenPseudo
,
4959 ARM::VLD2DUPq16EvenPseudo
,
4960 ARM::VLD2DUPq32EvenPseudo
};
4961 static const uint16_t QOpcodes1
[] = { ARM::VLD2DUPq8OddPseudo
,
4962 ARM::VLD2DUPq16OddPseudo
,
4963 ARM::VLD2DUPq32OddPseudo
};
4964 SelectVLDDup(N
, /* IsIntrinsic= */ true, false, 2,
4965 DOpcodes
, QOpcodes0
, QOpcodes1
);
4969 case Intrinsic::arm_neon_vld3dup
: {
4970 static const uint16_t DOpcodes
[] = { ARM::VLD3DUPd8Pseudo
,
4971 ARM::VLD3DUPd16Pseudo
,
4972 ARM::VLD3DUPd32Pseudo
,
4973 ARM::VLD1d64TPseudo
};
4974 static const uint16_t QOpcodes0
[] = { ARM::VLD3DUPq8EvenPseudo
,
4975 ARM::VLD3DUPq16EvenPseudo
,
4976 ARM::VLD3DUPq32EvenPseudo
};
4977 static const uint16_t QOpcodes1
[] = { ARM::VLD3DUPq8OddPseudo
,
4978 ARM::VLD3DUPq16OddPseudo
,
4979 ARM::VLD3DUPq32OddPseudo
};
4980 SelectVLDDup(N
, /* IsIntrinsic= */ true, false, 3,
4981 DOpcodes
, QOpcodes0
, QOpcodes1
);
4985 case Intrinsic::arm_neon_vld4dup
: {
4986 static const uint16_t DOpcodes
[] = { ARM::VLD4DUPd8Pseudo
,
4987 ARM::VLD4DUPd16Pseudo
,
4988 ARM::VLD4DUPd32Pseudo
,
4989 ARM::VLD1d64QPseudo
};
4990 static const uint16_t QOpcodes0
[] = { ARM::VLD4DUPq8EvenPseudo
,
4991 ARM::VLD4DUPq16EvenPseudo
,
4992 ARM::VLD4DUPq32EvenPseudo
};
4993 static const uint16_t QOpcodes1
[] = { ARM::VLD4DUPq8OddPseudo
,
4994 ARM::VLD4DUPq16OddPseudo
,
4995 ARM::VLD4DUPq32OddPseudo
};
4996 SelectVLDDup(N
, /* IsIntrinsic= */ true, false, 4,
4997 DOpcodes
, QOpcodes0
, QOpcodes1
);
5001 case Intrinsic::arm_neon_vld2lane
: {
5002 static const uint16_t DOpcodes
[] = { ARM::VLD2LNd8Pseudo
,
5003 ARM::VLD2LNd16Pseudo
,
5004 ARM::VLD2LNd32Pseudo
};
5005 static const uint16_t QOpcodes
[] = { ARM::VLD2LNq16Pseudo
,
5006 ARM::VLD2LNq32Pseudo
};
5007 SelectVLDSTLane(N
, true, false, 2, DOpcodes
, QOpcodes
);
5011 case Intrinsic::arm_neon_vld3lane
: {
5012 static const uint16_t DOpcodes
[] = { ARM::VLD3LNd8Pseudo
,
5013 ARM::VLD3LNd16Pseudo
,
5014 ARM::VLD3LNd32Pseudo
};
5015 static const uint16_t QOpcodes
[] = { ARM::VLD3LNq16Pseudo
,
5016 ARM::VLD3LNq32Pseudo
};
5017 SelectVLDSTLane(N
, true, false, 3, DOpcodes
, QOpcodes
);
5021 case Intrinsic::arm_neon_vld4lane
: {
5022 static const uint16_t DOpcodes
[] = { ARM::VLD4LNd8Pseudo
,
5023 ARM::VLD4LNd16Pseudo
,
5024 ARM::VLD4LNd32Pseudo
};
5025 static const uint16_t QOpcodes
[] = { ARM::VLD4LNq16Pseudo
,
5026 ARM::VLD4LNq32Pseudo
};
5027 SelectVLDSTLane(N
, true, false, 4, DOpcodes
, QOpcodes
);
5031 case Intrinsic::arm_neon_vst1
: {
5032 static const uint16_t DOpcodes
[] = { ARM::VST1d8
, ARM::VST1d16
,
5033 ARM::VST1d32
, ARM::VST1d64
};
5034 static const uint16_t QOpcodes
[] = { ARM::VST1q8
, ARM::VST1q16
,
5035 ARM::VST1q32
, ARM::VST1q64
};
5036 SelectVST(N
, false, 1, DOpcodes
, QOpcodes
, nullptr);
5040 case Intrinsic::arm_neon_vst1x2
: {
5041 static const uint16_t DOpcodes
[] = { ARM::VST1q8
, ARM::VST1q16
,
5042 ARM::VST1q32
, ARM::VST1q64
};
5043 static const uint16_t QOpcodes
[] = { ARM::VST1d8QPseudo
,
5044 ARM::VST1d16QPseudo
,
5045 ARM::VST1d32QPseudo
,
5046 ARM::VST1d64QPseudo
};
5047 SelectVST(N
, false, 2, DOpcodes
, QOpcodes
, nullptr);
5051 case Intrinsic::arm_neon_vst1x3
: {
5052 static const uint16_t DOpcodes
[] = { ARM::VST1d8TPseudo
,
5053 ARM::VST1d16TPseudo
,
5054 ARM::VST1d32TPseudo
,
5055 ARM::VST1d64TPseudo
};
5056 static const uint16_t QOpcodes0
[] = { ARM::VST1q8LowTPseudo_UPD
,
5057 ARM::VST1q16LowTPseudo_UPD
,
5058 ARM::VST1q32LowTPseudo_UPD
,
5059 ARM::VST1q64LowTPseudo_UPD
};
5060 static const uint16_t QOpcodes1
[] = { ARM::VST1q8HighTPseudo
,
5061 ARM::VST1q16HighTPseudo
,
5062 ARM::VST1q32HighTPseudo
,
5063 ARM::VST1q64HighTPseudo
};
5064 SelectVST(N
, false, 3, DOpcodes
, QOpcodes0
, QOpcodes1
);
5068 case Intrinsic::arm_neon_vst1x4
: {
5069 static const uint16_t DOpcodes
[] = { ARM::VST1d8QPseudo
,
5070 ARM::VST1d16QPseudo
,
5071 ARM::VST1d32QPseudo
,
5072 ARM::VST1d64QPseudo
};
5073 static const uint16_t QOpcodes0
[] = { ARM::VST1q8LowQPseudo_UPD
,
5074 ARM::VST1q16LowQPseudo_UPD
,
5075 ARM::VST1q32LowQPseudo_UPD
,
5076 ARM::VST1q64LowQPseudo_UPD
};
5077 static const uint16_t QOpcodes1
[] = { ARM::VST1q8HighQPseudo
,
5078 ARM::VST1q16HighQPseudo
,
5079 ARM::VST1q32HighQPseudo
,
5080 ARM::VST1q64HighQPseudo
};
5081 SelectVST(N
, false, 4, DOpcodes
, QOpcodes0
, QOpcodes1
);
5085 case Intrinsic::arm_neon_vst2
: {
5086 static const uint16_t DOpcodes
[] = { ARM::VST2d8
, ARM::VST2d16
,
5087 ARM::VST2d32
, ARM::VST1q64
};
5088 static const uint16_t QOpcodes
[] = { ARM::VST2q8Pseudo
, ARM::VST2q16Pseudo
,
5089 ARM::VST2q32Pseudo
};
5090 SelectVST(N
, false, 2, DOpcodes
, QOpcodes
, nullptr);
5094 case Intrinsic::arm_neon_vst3
: {
5095 static const uint16_t DOpcodes
[] = { ARM::VST3d8Pseudo
,
5098 ARM::VST1d64TPseudo
};
5099 static const uint16_t QOpcodes0
[] = { ARM::VST3q8Pseudo_UPD
,
5100 ARM::VST3q16Pseudo_UPD
,
5101 ARM::VST3q32Pseudo_UPD
};
5102 static const uint16_t QOpcodes1
[] = { ARM::VST3q8oddPseudo
,
5103 ARM::VST3q16oddPseudo
,
5104 ARM::VST3q32oddPseudo
};
5105 SelectVST(N
, false, 3, DOpcodes
, QOpcodes0
, QOpcodes1
);
5109 case Intrinsic::arm_neon_vst4
: {
5110 static const uint16_t DOpcodes
[] = { ARM::VST4d8Pseudo
,
5113 ARM::VST1d64QPseudo
};
5114 static const uint16_t QOpcodes0
[] = { ARM::VST4q8Pseudo_UPD
,
5115 ARM::VST4q16Pseudo_UPD
,
5116 ARM::VST4q32Pseudo_UPD
};
5117 static const uint16_t QOpcodes1
[] = { ARM::VST4q8oddPseudo
,
5118 ARM::VST4q16oddPseudo
,
5119 ARM::VST4q32oddPseudo
};
5120 SelectVST(N
, false, 4, DOpcodes
, QOpcodes0
, QOpcodes1
);
5124 case Intrinsic::arm_neon_vst2lane
: {
5125 static const uint16_t DOpcodes
[] = { ARM::VST2LNd8Pseudo
,
5126 ARM::VST2LNd16Pseudo
,
5127 ARM::VST2LNd32Pseudo
};
5128 static const uint16_t QOpcodes
[] = { ARM::VST2LNq16Pseudo
,
5129 ARM::VST2LNq32Pseudo
};
5130 SelectVLDSTLane(N
, false, false, 2, DOpcodes
, QOpcodes
);
5134 case Intrinsic::arm_neon_vst3lane
: {
5135 static const uint16_t DOpcodes
[] = { ARM::VST3LNd8Pseudo
,
5136 ARM::VST3LNd16Pseudo
,
5137 ARM::VST3LNd32Pseudo
};
5138 static const uint16_t QOpcodes
[] = { ARM::VST3LNq16Pseudo
,
5139 ARM::VST3LNq32Pseudo
};
5140 SelectVLDSTLane(N
, false, false, 3, DOpcodes
, QOpcodes
);
5144 case Intrinsic::arm_neon_vst4lane
: {
5145 static const uint16_t DOpcodes
[] = { ARM::VST4LNd8Pseudo
,
5146 ARM::VST4LNd16Pseudo
,
5147 ARM::VST4LNd32Pseudo
};
5148 static const uint16_t QOpcodes
[] = { ARM::VST4LNq16Pseudo
,
5149 ARM::VST4LNq32Pseudo
};
5150 SelectVLDSTLane(N
, false, false, 4, DOpcodes
, QOpcodes
);
5154 case Intrinsic::arm_mve_vldr_gather_base_wb
:
5155 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated
: {
5156 static const uint16_t Opcodes
[] = {ARM::MVE_VLDRWU32_qi_pre
,
5157 ARM::MVE_VLDRDU64_qi_pre
};
5158 SelectMVE_WB(N
, Opcodes
,
5159 IntNo
== Intrinsic::arm_mve_vldr_gather_base_wb_predicated
);
5163 case Intrinsic::arm_mve_vld2q
: {
5164 static const uint16_t Opcodes8
[] = {ARM::MVE_VLD20_8
, ARM::MVE_VLD21_8
};
5165 static const uint16_t Opcodes16
[] = {ARM::MVE_VLD20_16
,
5167 static const uint16_t Opcodes32
[] = {ARM::MVE_VLD20_32
,
5169 static const uint16_t *const Opcodes
[] = {Opcodes8
, Opcodes16
, Opcodes32
};
5170 SelectMVE_VLD(N
, 2, Opcodes
, false);
5174 case Intrinsic::arm_mve_vld4q
: {
5175 static const uint16_t Opcodes8
[] = {ARM::MVE_VLD40_8
, ARM::MVE_VLD41_8
,
5176 ARM::MVE_VLD42_8
, ARM::MVE_VLD43_8
};
5177 static const uint16_t Opcodes16
[] = {ARM::MVE_VLD40_16
, ARM::MVE_VLD41_16
,
5180 static const uint16_t Opcodes32
[] = {ARM::MVE_VLD40_32
, ARM::MVE_VLD41_32
,
5183 static const uint16_t *const Opcodes
[] = {Opcodes8
, Opcodes16
, Opcodes32
};
5184 SelectMVE_VLD(N
, 4, Opcodes
, false);
5191 case ISD::INTRINSIC_WO_CHAIN
: {
5192 unsigned IntNo
= cast
<ConstantSDNode
>(N
->getOperand(0))->getZExtValue();
5197 // Scalar f32 -> bf16
5198 case Intrinsic::arm_neon_vcvtbfp2bf
: {
5200 const SDValue
&Src
= N
->getOperand(1);
5201 llvm::EVT DestTy
= N
->getValueType(0);
5202 SDValue Pred
= getAL(CurDAG
, dl
);
5203 SDValue Reg0
= CurDAG
->getRegister(0, MVT::i32
);
5204 SDValue Ops
[] = { Src
, Src
, Pred
, Reg0
};
5205 CurDAG
->SelectNodeTo(N
, ARM::BF16_VCVTB
, DestTy
, Ops
);
5209 // Vector v4f32 -> v4bf16
5210 case Intrinsic::arm_neon_vcvtfp2bf
: {
5212 const SDValue
&Src
= N
->getOperand(1);
5213 SDValue Pred
= getAL(CurDAG
, dl
);
5214 SDValue Reg0
= CurDAG
->getRegister(0, MVT::i32
);
5215 SDValue Ops
[] = { Src
, Pred
, Reg0
};
5216 CurDAG
->SelectNodeTo(N
, ARM::BF16_VCVT
, MVT::v4bf16
, Ops
);
5220 case Intrinsic::arm_mve_urshrl
:
5221 SelectMVE_LongShift(N
, ARM::MVE_URSHRL
, true, false);
5223 case Intrinsic::arm_mve_uqshll
:
5224 SelectMVE_LongShift(N
, ARM::MVE_UQSHLL
, true, false);
5226 case Intrinsic::arm_mve_srshrl
:
5227 SelectMVE_LongShift(N
, ARM::MVE_SRSHRL
, true, false);
5229 case Intrinsic::arm_mve_sqshll
:
5230 SelectMVE_LongShift(N
, ARM::MVE_SQSHLL
, true, false);
5232 case Intrinsic::arm_mve_uqrshll
:
5233 SelectMVE_LongShift(N
, ARM::MVE_UQRSHLL
, false, true);
5235 case Intrinsic::arm_mve_sqrshrl
:
5236 SelectMVE_LongShift(N
, ARM::MVE_SQRSHRL
, false, true);
5239 case Intrinsic::arm_mve_vadc
:
5240 case Intrinsic::arm_mve_vadc_predicated
:
5241 SelectMVE_VADCSBC(N
, ARM::MVE_VADC
, ARM::MVE_VADCI
, true,
5242 IntNo
== Intrinsic::arm_mve_vadc_predicated
);
5244 case Intrinsic::arm_mve_vsbc
:
5245 case Intrinsic::arm_mve_vsbc_predicated
:
5246 SelectMVE_VADCSBC(N
, ARM::MVE_VSBC
, ARM::MVE_VSBCI
, true,
5247 IntNo
== Intrinsic::arm_mve_vsbc_predicated
);
5249 case Intrinsic::arm_mve_vshlc
:
5250 case Intrinsic::arm_mve_vshlc_predicated
:
5251 SelectMVE_VSHLC(N
, IntNo
== Intrinsic::arm_mve_vshlc_predicated
);
5254 case Intrinsic::arm_mve_vmlldava
:
5255 case Intrinsic::arm_mve_vmlldava_predicated
: {
5256 static const uint16_t OpcodesU
[] = {
5257 ARM::MVE_VMLALDAVu16
, ARM::MVE_VMLALDAVu32
,
5258 ARM::MVE_VMLALDAVau16
, ARM::MVE_VMLALDAVau32
,
5260 static const uint16_t OpcodesS
[] = {
5261 ARM::MVE_VMLALDAVs16
, ARM::MVE_VMLALDAVs32
,
5262 ARM::MVE_VMLALDAVas16
, ARM::MVE_VMLALDAVas32
,
5263 ARM::MVE_VMLALDAVxs16
, ARM::MVE_VMLALDAVxs32
,
5264 ARM::MVE_VMLALDAVaxs16
, ARM::MVE_VMLALDAVaxs32
,
5265 ARM::MVE_VMLSLDAVs16
, ARM::MVE_VMLSLDAVs32
,
5266 ARM::MVE_VMLSLDAVas16
, ARM::MVE_VMLSLDAVas32
,
5267 ARM::MVE_VMLSLDAVxs16
, ARM::MVE_VMLSLDAVxs32
,
5268 ARM::MVE_VMLSLDAVaxs16
, ARM::MVE_VMLSLDAVaxs32
,
5270 SelectMVE_VMLLDAV(N
, IntNo
== Intrinsic::arm_mve_vmlldava_predicated
,
5271 OpcodesS
, OpcodesU
);
5275 case Intrinsic::arm_mve_vrmlldavha
:
5276 case Intrinsic::arm_mve_vrmlldavha_predicated
: {
5277 static const uint16_t OpcodesU
[] = {
5278 ARM::MVE_VRMLALDAVHu32
, ARM::MVE_VRMLALDAVHau32
,
5280 static const uint16_t OpcodesS
[] = {
5281 ARM::MVE_VRMLALDAVHs32
, ARM::MVE_VRMLALDAVHas32
,
5282 ARM::MVE_VRMLALDAVHxs32
, ARM::MVE_VRMLALDAVHaxs32
,
5283 ARM::MVE_VRMLSLDAVHs32
, ARM::MVE_VRMLSLDAVHas32
,
5284 ARM::MVE_VRMLSLDAVHxs32
, ARM::MVE_VRMLSLDAVHaxs32
,
5286 SelectMVE_VRMLLDAVH(N
, IntNo
== Intrinsic::arm_mve_vrmlldavha_predicated
,
5287 OpcodesS
, OpcodesU
);
5291 case Intrinsic::arm_mve_vidup
:
5292 case Intrinsic::arm_mve_vidup_predicated
: {
5293 static const uint16_t Opcodes
[] = {
5294 ARM::MVE_VIDUPu8
, ARM::MVE_VIDUPu16
, ARM::MVE_VIDUPu32
,
5296 SelectMVE_VxDUP(N
, Opcodes
, false,
5297 IntNo
== Intrinsic::arm_mve_vidup_predicated
);
5301 case Intrinsic::arm_mve_vddup
:
5302 case Intrinsic::arm_mve_vddup_predicated
: {
5303 static const uint16_t Opcodes
[] = {
5304 ARM::MVE_VDDUPu8
, ARM::MVE_VDDUPu16
, ARM::MVE_VDDUPu32
,
5306 SelectMVE_VxDUP(N
, Opcodes
, false,
5307 IntNo
== Intrinsic::arm_mve_vddup_predicated
);
5311 case Intrinsic::arm_mve_viwdup
:
5312 case Intrinsic::arm_mve_viwdup_predicated
: {
5313 static const uint16_t Opcodes
[] = {
5314 ARM::MVE_VIWDUPu8
, ARM::MVE_VIWDUPu16
, ARM::MVE_VIWDUPu32
,
5316 SelectMVE_VxDUP(N
, Opcodes
, true,
5317 IntNo
== Intrinsic::arm_mve_viwdup_predicated
);
5321 case Intrinsic::arm_mve_vdwdup
:
5322 case Intrinsic::arm_mve_vdwdup_predicated
: {
5323 static const uint16_t Opcodes
[] = {
5324 ARM::MVE_VDWDUPu8
, ARM::MVE_VDWDUPu16
, ARM::MVE_VDWDUPu32
,
5326 SelectMVE_VxDUP(N
, Opcodes
, true,
5327 IntNo
== Intrinsic::arm_mve_vdwdup_predicated
);
5331 case Intrinsic::arm_cde_cx1d
:
5332 case Intrinsic::arm_cde_cx1da
:
5333 case Intrinsic::arm_cde_cx2d
:
5334 case Intrinsic::arm_cde_cx2da
:
5335 case Intrinsic::arm_cde_cx3d
:
5336 case Intrinsic::arm_cde_cx3da
: {
5337 bool HasAccum
= IntNo
== Intrinsic::arm_cde_cx1da
||
5338 IntNo
== Intrinsic::arm_cde_cx2da
||
5339 IntNo
== Intrinsic::arm_cde_cx3da
;
5343 case Intrinsic::arm_cde_cx1d
:
5344 case Intrinsic::arm_cde_cx1da
:
5346 Opcode
= HasAccum
? ARM::CDE_CX1DA
: ARM::CDE_CX1D
;
5348 case Intrinsic::arm_cde_cx2d
:
5349 case Intrinsic::arm_cde_cx2da
:
5351 Opcode
= HasAccum
? ARM::CDE_CX2DA
: ARM::CDE_CX2D
;
5353 case Intrinsic::arm_cde_cx3d
:
5354 case Intrinsic::arm_cde_cx3da
:
5356 Opcode
= HasAccum
? ARM::CDE_CX3DA
: ARM::CDE_CX3D
;
5359 llvm_unreachable("Unexpected opcode");
5361 SelectCDE_CXxD(N
, Opcode
, NumExtraOps
, HasAccum
);
5368 case ISD::ATOMIC_CMP_SWAP
:
5376 // Inspect a register string of the form
5377 // cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or
5378 // cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string
5379 // and obtain the integer operands from them, adding these operands to the
5381 static void getIntOperandsFromRegisterString(StringRef RegString
,
5382 SelectionDAG
*CurDAG
,
5384 std::vector
<SDValue
> &Ops
) {
5385 SmallVector
<StringRef
, 5> Fields
;
5386 RegString
.split(Fields
, ':');
5388 if (Fields
.size() > 1) {
5389 bool AllIntFields
= true;
5391 for (StringRef Field
: Fields
) {
5392 // Need to trim out leading 'cp' characters and get the integer field.
5394 AllIntFields
&= !Field
.trim("CPcp").getAsInteger(10, IntField
);
5395 Ops
.push_back(CurDAG
->getTargetConstant(IntField
, DL
, MVT::i32
));
5398 assert(AllIntFields
&&
5399 "Unexpected non-integer value in special register string.");
5404 // Maps a Banked Register string to its mask value. The mask value returned is
5405 // for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register
5406 // mask operand, which expresses which register is to be used, e.g. r8, and in
5407 // which mode it is to be used, e.g. usr. Returns -1 to signify that the string
5409 static inline int getBankedRegisterMask(StringRef RegString
) {
5410 auto TheReg
= ARMBankedReg::lookupBankedRegByName(RegString
.lower());
5413 return TheReg
->Encoding
;
5416 // The flags here are common to those allowed for apsr in the A class cores and
5417 // those allowed for the special registers in the M class cores. Returns a
5418 // value representing which flags were present, -1 if invalid.
5419 static inline int getMClassFlagsMask(StringRef Flags
) {
5420 return StringSwitch
<int>(Flags
)
5421 .Case("", 0x2) // no flags means nzcvq for psr registers, and 0x2 is
5422 // correct when flags are not permitted
5425 .Case("nzcvqg", 0x3)
5429 // Maps MClass special registers string to its value for use in the
5430 // t2MRS_M/t2MSR_M instruction nodes as the SYSm value operand.
5431 // Returns -1 to signify that the string was invalid.
5432 static int getMClassRegisterMask(StringRef Reg
, const ARMSubtarget
*Subtarget
) {
5433 auto TheReg
= ARMSysReg::lookupMClassSysRegByName(Reg
);
5434 const FeatureBitset
&FeatureBits
= Subtarget
->getFeatureBits();
5435 if (!TheReg
|| !TheReg
->hasRequiredFeatures(FeatureBits
))
5437 return (int)(TheReg
->Encoding
& 0xFFF); // SYSm value
5440 static int getARClassRegisterMask(StringRef Reg
, StringRef Flags
) {
5441 // The mask operand contains the special register (R Bit) in bit 4, whether
5442 // the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and
5443 // bits 3-0 contains the fields to be accessed in the special register, set by
5444 // the flags provided with the register.
5446 if (Reg
== "apsr") {
5447 // The flags permitted for apsr are the same flags that are allowed in
5448 // M class registers. We get the flag value and then shift the flags into
5449 // the correct place to combine with the mask.
5450 Mask
= getMClassFlagsMask(Flags
);
5456 if (Reg
!= "cpsr" && Reg
!= "spsr") {
5460 // This is the same as if the flags were "fc"
5461 if (Flags
.empty() || Flags
== "all")
5464 // Inspect the supplied flags string and set the bits in the mask for
5465 // the relevant and valid flags allowed for cpsr and spsr.
5466 for (char Flag
: Flags
) {
5485 // This avoids allowing strings where the same flag bit appears twice.
5486 if (!FlagVal
|| (Mask
& FlagVal
))
5491 // If the register is spsr then we need to set the R bit.
5498 // Lower the read_register intrinsic to ARM specific DAG nodes
5499 // using the supplied metadata string to select the instruction node to use
5500 // and the registers/masks to construct as operands for the node.
5501 bool ARMDAGToDAGISel::tryReadRegister(SDNode
*N
){
5502 const auto *MD
= cast
<MDNodeSDNode
>(N
->getOperand(1));
5503 const auto *RegString
= cast
<MDString
>(MD
->getMD()->getOperand(0));
5504 bool IsThumb2
= Subtarget
->isThumb2();
5507 std::vector
<SDValue
> Ops
;
5508 getIntOperandsFromRegisterString(RegString
->getString(), CurDAG
, DL
, Ops
);
5511 // If the special register string was constructed of fields (as defined
5512 // in the ACLE) then need to lower to MRC node (32 bit) or
5513 // MRRC node(64 bit), we can make the distinction based on the number of
5514 // operands we have.
5516 SmallVector
<EVT
, 3> ResTypes
;
5517 if (Ops
.size() == 5){
5518 Opcode
= IsThumb2
? ARM::t2MRC
: ARM::MRC
;
5519 ResTypes
.append({ MVT::i32
, MVT::Other
});
5521 assert(Ops
.size() == 3 &&
5522 "Invalid number of fields in special register string.");
5523 Opcode
= IsThumb2
? ARM::t2MRRC
: ARM::MRRC
;
5524 ResTypes
.append({ MVT::i32
, MVT::i32
, MVT::Other
});
5527 Ops
.push_back(getAL(CurDAG
, DL
));
5528 Ops
.push_back(CurDAG
->getRegister(0, MVT::i32
));
5529 Ops
.push_back(N
->getOperand(0));
5530 ReplaceNode(N
, CurDAG
->getMachineNode(Opcode
, DL
, ResTypes
, Ops
));
5534 std::string SpecialReg
= RegString
->getString().lower();
5536 int BankedReg
= getBankedRegisterMask(SpecialReg
);
5537 if (BankedReg
!= -1) {
5538 Ops
= { CurDAG
->getTargetConstant(BankedReg
, DL
, MVT::i32
),
5539 getAL(CurDAG
, DL
), CurDAG
->getRegister(0, MVT::i32
),
5542 N
, CurDAG
->getMachineNode(IsThumb2
? ARM::t2MRSbanked
: ARM::MRSbanked
,
5543 DL
, MVT::i32
, MVT::Other
, Ops
));
5547 // The VFP registers are read by creating SelectionDAG nodes with opcodes
5548 // corresponding to the register that is being read from. So we switch on the
5549 // string to find which opcode we need to use.
5550 unsigned Opcode
= StringSwitch
<unsigned>(SpecialReg
)
5551 .Case("fpscr", ARM::VMRS
)
5552 .Case("fpexc", ARM::VMRS_FPEXC
)
5553 .Case("fpsid", ARM::VMRS_FPSID
)
5554 .Case("mvfr0", ARM::VMRS_MVFR0
)
5555 .Case("mvfr1", ARM::VMRS_MVFR1
)
5556 .Case("mvfr2", ARM::VMRS_MVFR2
)
5557 .Case("fpinst", ARM::VMRS_FPINST
)
5558 .Case("fpinst2", ARM::VMRS_FPINST2
)
5561 // If an opcode was found then we can lower the read to a VFP instruction.
5563 if (!Subtarget
->hasVFP2Base())
5565 if (Opcode
== ARM::VMRS_MVFR2
&& !Subtarget
->hasFPARMv8Base())
5568 Ops
= { getAL(CurDAG
, DL
), CurDAG
->getRegister(0, MVT::i32
),
5571 CurDAG
->getMachineNode(Opcode
, DL
, MVT::i32
, MVT::Other
, Ops
));
5575 // If the target is M Class then need to validate that the register string
5576 // is an acceptable value, so check that a mask can be constructed from the
5578 if (Subtarget
->isMClass()) {
5579 int SYSmValue
= getMClassRegisterMask(SpecialReg
, Subtarget
);
5580 if (SYSmValue
== -1)
5583 SDValue Ops
[] = { CurDAG
->getTargetConstant(SYSmValue
, DL
, MVT::i32
),
5584 getAL(CurDAG
, DL
), CurDAG
->getRegister(0, MVT::i32
),
5587 N
, CurDAG
->getMachineNode(ARM::t2MRS_M
, DL
, MVT::i32
, MVT::Other
, Ops
));
5591 // Here we know the target is not M Class so we need to check if it is one
5592 // of the remaining possible values which are apsr, cpsr or spsr.
5593 if (SpecialReg
== "apsr" || SpecialReg
== "cpsr") {
5594 Ops
= { getAL(CurDAG
, DL
), CurDAG
->getRegister(0, MVT::i32
),
5596 ReplaceNode(N
, CurDAG
->getMachineNode(IsThumb2
? ARM::t2MRS_AR
: ARM::MRS
,
5597 DL
, MVT::i32
, MVT::Other
, Ops
));
5601 if (SpecialReg
== "spsr") {
5602 Ops
= { getAL(CurDAG
, DL
), CurDAG
->getRegister(0, MVT::i32
),
5605 N
, CurDAG
->getMachineNode(IsThumb2
? ARM::t2MRSsys_AR
: ARM::MRSsys
, DL
,
5606 MVT::i32
, MVT::Other
, Ops
));
5613 // Lower the write_register intrinsic to ARM specific DAG nodes
5614 // using the supplied metadata string to select the instruction node to use
5615 // and the registers/masks to use in the nodes
5616 bool ARMDAGToDAGISel::tryWriteRegister(SDNode
*N
){
5617 const auto *MD
= cast
<MDNodeSDNode
>(N
->getOperand(1));
5618 const auto *RegString
= cast
<MDString
>(MD
->getMD()->getOperand(0));
5619 bool IsThumb2
= Subtarget
->isThumb2();
5622 std::vector
<SDValue
> Ops
;
5623 getIntOperandsFromRegisterString(RegString
->getString(), CurDAG
, DL
, Ops
);
5626 // If the special register string was constructed of fields (as defined
5627 // in the ACLE) then need to lower to MCR node (32 bit) or
5628 // MCRR node(64 bit), we can make the distinction based on the number of
5629 // operands we have.
5631 if (Ops
.size() == 5) {
5632 Opcode
= IsThumb2
? ARM::t2MCR
: ARM::MCR
;
5633 Ops
.insert(Ops
.begin()+2, N
->getOperand(2));
5635 assert(Ops
.size() == 3 &&
5636 "Invalid number of fields in special register string.");
5637 Opcode
= IsThumb2
? ARM::t2MCRR
: ARM::MCRR
;
5638 SDValue WriteValue
[] = { N
->getOperand(2), N
->getOperand(3) };
5639 Ops
.insert(Ops
.begin()+2, WriteValue
, WriteValue
+2);
5642 Ops
.push_back(getAL(CurDAG
, DL
));
5643 Ops
.push_back(CurDAG
->getRegister(0, MVT::i32
));
5644 Ops
.push_back(N
->getOperand(0));
5646 ReplaceNode(N
, CurDAG
->getMachineNode(Opcode
, DL
, MVT::Other
, Ops
));
5650 std::string SpecialReg
= RegString
->getString().lower();
5651 int BankedReg
= getBankedRegisterMask(SpecialReg
);
5652 if (BankedReg
!= -1) {
5653 Ops
= { CurDAG
->getTargetConstant(BankedReg
, DL
, MVT::i32
), N
->getOperand(2),
5654 getAL(CurDAG
, DL
), CurDAG
->getRegister(0, MVT::i32
),
5657 N
, CurDAG
->getMachineNode(IsThumb2
? ARM::t2MSRbanked
: ARM::MSRbanked
,
5658 DL
, MVT::Other
, Ops
));
5662 // The VFP registers are written to by creating SelectionDAG nodes with
5663 // opcodes corresponding to the register that is being written. So we switch
5664 // on the string to find which opcode we need to use.
5665 unsigned Opcode
= StringSwitch
<unsigned>(SpecialReg
)
5666 .Case("fpscr", ARM::VMSR
)
5667 .Case("fpexc", ARM::VMSR_FPEXC
)
5668 .Case("fpsid", ARM::VMSR_FPSID
)
5669 .Case("fpinst", ARM::VMSR_FPINST
)
5670 .Case("fpinst2", ARM::VMSR_FPINST2
)
5674 if (!Subtarget
->hasVFP2Base())
5676 Ops
= { N
->getOperand(2), getAL(CurDAG
, DL
),
5677 CurDAG
->getRegister(0, MVT::i32
), N
->getOperand(0) };
5678 ReplaceNode(N
, CurDAG
->getMachineNode(Opcode
, DL
, MVT::Other
, Ops
));
5682 std::pair
<StringRef
, StringRef
> Fields
;
5683 Fields
= StringRef(SpecialReg
).rsplit('_');
5684 std::string Reg
= Fields
.first
.str();
5685 StringRef Flags
= Fields
.second
;
5687 // If the target was M Class then need to validate the special register value
5688 // and retrieve the mask for use in the instruction node.
5689 if (Subtarget
->isMClass()) {
5690 int SYSmValue
= getMClassRegisterMask(SpecialReg
, Subtarget
);
5691 if (SYSmValue
== -1)
5694 SDValue Ops
[] = { CurDAG
->getTargetConstant(SYSmValue
, DL
, MVT::i32
),
5695 N
->getOperand(2), getAL(CurDAG
, DL
),
5696 CurDAG
->getRegister(0, MVT::i32
), N
->getOperand(0) };
5697 ReplaceNode(N
, CurDAG
->getMachineNode(ARM::t2MSR_M
, DL
, MVT::Other
, Ops
));
5701 // We then check to see if a valid mask can be constructed for one of the
5702 // register string values permitted for the A and R class cores. These values
5703 // are apsr, spsr and cpsr; these are also valid on older cores.
5704 int Mask
= getARClassRegisterMask(Reg
, Flags
);
5706 Ops
= { CurDAG
->getTargetConstant(Mask
, DL
, MVT::i32
), N
->getOperand(2),
5707 getAL(CurDAG
, DL
), CurDAG
->getRegister(0, MVT::i32
),
5709 ReplaceNode(N
, CurDAG
->getMachineNode(IsThumb2
? ARM::t2MSR_AR
: ARM::MSR
,
5710 DL
, MVT::Other
, Ops
));
5717 bool ARMDAGToDAGISel::tryInlineAsm(SDNode
*N
){
5718 std::vector
<SDValue
> AsmNodeOperands
;
5719 unsigned Flag
, Kind
;
5720 bool Changed
= false;
5721 unsigned NumOps
= N
->getNumOperands();
5723 // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint.
5724 // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require
5725 // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs
5726 // respectively. Since there is no constraint to explicitly specify a
5727 // reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb,
5728 // the 64-bit data may be referred by H, Q, R modifiers, so we still pack
5729 // them into a GPRPair.
5732 SDValue Glue
= N
->getGluedNode() ? N
->getOperand(NumOps
-1)
5733 : SDValue(nullptr,0);
5735 SmallVector
<bool, 8> OpChanged
;
5736 // Glue node will be appended late.
5737 for(unsigned i
= 0, e
= N
->getGluedNode() ? NumOps
- 1 : NumOps
; i
< e
; ++i
) {
5738 SDValue op
= N
->getOperand(i
);
5739 AsmNodeOperands
.push_back(op
);
5741 if (i
< InlineAsm::Op_FirstOperand
)
5744 if (ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(N
->getOperand(i
))) {
5745 Flag
= C
->getZExtValue();
5746 Kind
= InlineAsm::getKind(Flag
);
5751 // Immediate operands to inline asm in the SelectionDAG are modeled with
5752 // two operands. The first is a constant of value InlineAsm::Kind_Imm, and
5753 // the second is a constant with the value of the immediate. If we get here
5754 // and we have a Kind_Imm, skip the next operand, and continue.
5755 if (Kind
== InlineAsm::Kind_Imm
) {
5756 SDValue op
= N
->getOperand(++i
);
5757 AsmNodeOperands
.push_back(op
);
5761 unsigned NumRegs
= InlineAsm::getNumOperandRegisters(Flag
);
5763 OpChanged
.push_back(false);
5765 unsigned DefIdx
= 0;
5766 bool IsTiedToChangedOp
= false;
5767 // If it's a use that is tied with a previous def, it has no
5768 // reg class constraint.
5769 if (Changed
&& InlineAsm::isUseOperandTiedToDef(Flag
, DefIdx
))
5770 IsTiedToChangedOp
= OpChanged
[DefIdx
];
5772 // Memory operands to inline asm in the SelectionDAG are modeled with two
5773 // operands: a constant of value InlineAsm::Kind_Mem followed by the input
5774 // operand. If we get here and we have a Kind_Mem, skip the next operand (so
5775 // it doesn't get misinterpreted), and continue. We do this here because
5776 // it's important to update the OpChanged array correctly before moving on.
5777 if (Kind
== InlineAsm::Kind_Mem
) {
5778 SDValue op
= N
->getOperand(++i
);
5779 AsmNodeOperands
.push_back(op
);
5783 if (Kind
!= InlineAsm::Kind_RegUse
&& Kind
!= InlineAsm::Kind_RegDef
5784 && Kind
!= InlineAsm::Kind_RegDefEarlyClobber
)
5788 bool HasRC
= InlineAsm::hasRegClassConstraint(Flag
, RC
);
5789 if ((!IsTiedToChangedOp
&& (!HasRC
|| RC
!= ARM::GPRRegClassID
))
5793 assert((i
+2 < NumOps
) && "Invalid number of operands in inline asm");
5794 SDValue V0
= N
->getOperand(i
+1);
5795 SDValue V1
= N
->getOperand(i
+2);
5796 unsigned Reg0
= cast
<RegisterSDNode
>(V0
)->getReg();
5797 unsigned Reg1
= cast
<RegisterSDNode
>(V1
)->getReg();
5799 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
5801 if (Kind
== InlineAsm::Kind_RegDef
||
5802 Kind
== InlineAsm::Kind_RegDefEarlyClobber
) {
5803 // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to
5804 // the original GPRs.
5806 Register GPVR
= MRI
.createVirtualRegister(&ARM::GPRPairRegClass
);
5807 PairedReg
= CurDAG
->getRegister(GPVR
, MVT::Untyped
);
5808 SDValue Chain
= SDValue(N
,0);
5810 SDNode
*GU
= N
->getGluedUser();
5811 SDValue RegCopy
= CurDAG
->getCopyFromReg(Chain
, dl
, GPVR
, MVT::Untyped
,
5814 // Extract values from a GPRPair reg and copy to the original GPR reg.
5815 SDValue Sub0
= CurDAG
->getTargetExtractSubreg(ARM::gsub_0
, dl
, MVT::i32
,
5817 SDValue Sub1
= CurDAG
->getTargetExtractSubreg(ARM::gsub_1
, dl
, MVT::i32
,
5819 SDValue T0
= CurDAG
->getCopyToReg(Sub0
, dl
, Reg0
, Sub0
,
5820 RegCopy
.getValue(1));
5821 SDValue T1
= CurDAG
->getCopyToReg(Sub1
, dl
, Reg1
, Sub1
, T0
.getValue(1));
5823 // Update the original glue user.
5824 std::vector
<SDValue
> Ops(GU
->op_begin(), GU
->op_end()-1);
5825 Ops
.push_back(T1
.getValue(1));
5826 CurDAG
->UpdateNodeOperands(GU
, Ops
);
5829 // For Kind == InlineAsm::Kind_RegUse, we first copy two GPRs into a
5830 // GPRPair and then pass the GPRPair to the inline asm.
5831 SDValue Chain
= AsmNodeOperands
[InlineAsm::Op_InputChain
];
5833 // As REG_SEQ doesn't take RegisterSDNode, we copy them first.
5834 SDValue T0
= CurDAG
->getCopyFromReg(Chain
, dl
, Reg0
, MVT::i32
,
5836 SDValue T1
= CurDAG
->getCopyFromReg(Chain
, dl
, Reg1
, MVT::i32
,
5838 SDValue Pair
= SDValue(createGPRPairNode(MVT::Untyped
, T0
, T1
), 0);
5840 // Copy REG_SEQ into a GPRPair-typed VR and replace the original two
5841 // i32 VRs of inline asm with it.
5842 Register GPVR
= MRI
.createVirtualRegister(&ARM::GPRPairRegClass
);
5843 PairedReg
= CurDAG
->getRegister(GPVR
, MVT::Untyped
);
5844 Chain
= CurDAG
->getCopyToReg(T1
, dl
, GPVR
, Pair
, T1
.getValue(1));
5846 AsmNodeOperands
[InlineAsm::Op_InputChain
] = Chain
;
5847 Glue
= Chain
.getValue(1);
5852 if(PairedReg
.getNode()) {
5853 OpChanged
[OpChanged
.size() -1 ] = true;
5854 Flag
= InlineAsm::getFlagWord(Kind
, 1 /* RegNum*/);
5855 if (IsTiedToChangedOp
)
5856 Flag
= InlineAsm::getFlagWordForMatchingOp(Flag
, DefIdx
);
5858 Flag
= InlineAsm::getFlagWordForRegClass(Flag
, ARM::GPRPairRegClassID
);
5859 // Replace the current flag.
5860 AsmNodeOperands
[AsmNodeOperands
.size() -1] = CurDAG
->getTargetConstant(
5861 Flag
, dl
, MVT::i32
);
5862 // Add the new register node and skip the original two GPRs.
5863 AsmNodeOperands
.push_back(PairedReg
);
5864 // Skip the next two GPRs.
5870 AsmNodeOperands
.push_back(Glue
);
5874 SDValue New
= CurDAG
->getNode(N
->getOpcode(), SDLoc(N
),
5875 CurDAG
->getVTList(MVT::Other
, MVT::Glue
), AsmNodeOperands
);
5877 ReplaceNode(N
, New
.getNode());
5882 bool ARMDAGToDAGISel::
5883 SelectInlineAsmMemoryOperand(const SDValue
&Op
, unsigned ConstraintID
,
5884 std::vector
<SDValue
> &OutOps
) {
5885 switch(ConstraintID
) {
5887 llvm_unreachable("Unexpected asm memory constraint");
5888 case InlineAsm::Constraint_m
:
5889 case InlineAsm::Constraint_o
:
5890 case InlineAsm::Constraint_Q
:
5891 case InlineAsm::Constraint_Um
:
5892 case InlineAsm::Constraint_Un
:
5893 case InlineAsm::Constraint_Uq
:
5894 case InlineAsm::Constraint_Us
:
5895 case InlineAsm::Constraint_Ut
:
5896 case InlineAsm::Constraint_Uv
:
5897 case InlineAsm::Constraint_Uy
:
5898 // Require the address to be in a register. That is safe for all ARM
5899 // variants and it is hard to do anything much smarter without knowing
5900 // how the operand is used.
5901 OutOps
.push_back(Op
);
5907 /// createARMISelDag - This pass converts a legalized DAG into a
5908 /// ARM-specific DAG, ready for instruction scheduling.
5910 FunctionPass
*llvm::createARMISelDag(ARMBaseTargetMachine
&TM
,
5911 CodeGenOpt::Level OptLevel
) {
5912 return new ARMDAGToDAGISel(TM
, OptLevel
);