1 //===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file defines an instruction selector for the ARM target.
11 //===----------------------------------------------------------------------===//
14 #include "ARMBaseInstrInfo.h"
15 #include "ARMTargetMachine.h"
16 #include "MCTargetDesc/ARMAddressingModes.h"
17 #include "Utils/ARMBaseInfo.h"
18 #include "llvm/ADT/StringSwitch.h"
19 #include "llvm/CodeGen/MachineFrameInfo.h"
20 #include "llvm/CodeGen/MachineFunction.h"
21 #include "llvm/CodeGen/MachineInstrBuilder.h"
22 #include "llvm/CodeGen/MachineRegisterInfo.h"
23 #include "llvm/CodeGen/SelectionDAG.h"
24 #include "llvm/CodeGen/SelectionDAGISel.h"
25 #include "llvm/CodeGen/TargetLowering.h"
26 #include "llvm/IR/CallingConv.h"
27 #include "llvm/IR/Constants.h"
28 #include "llvm/IR/DerivedTypes.h"
29 #include "llvm/IR/Function.h"
30 #include "llvm/IR/Intrinsics.h"
31 #include "llvm/IR/LLVMContext.h"
32 #include "llvm/Support/CommandLine.h"
33 #include "llvm/Support/Debug.h"
34 #include "llvm/Support/ErrorHandling.h"
35 #include "llvm/Target/TargetOptions.h"
39 #define DEBUG_TYPE "arm-isel"
42 DisableShifterOp("disable-shifter-op", cl::Hidden
,
43 cl::desc("Disable isel of shifter-op"),
46 //===--------------------------------------------------------------------===//
47 /// ARMDAGToDAGISel - ARM specific code to select ARM machine
48 /// instructions for SelectionDAG operations.
52 class ARMDAGToDAGISel
: public SelectionDAGISel
{
53 /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
54 /// make the right decision when generating code for different targets.
55 const ARMSubtarget
*Subtarget
;
58 explicit ARMDAGToDAGISel(ARMBaseTargetMachine
&tm
, CodeGenOpt::Level OptLevel
)
59 : SelectionDAGISel(tm
, OptLevel
) {}
61 bool runOnMachineFunction(MachineFunction
&MF
) override
{
62 // Reset the subtarget each time through.
63 Subtarget
= &MF
.getSubtarget
<ARMSubtarget
>();
64 SelectionDAGISel::runOnMachineFunction(MF
);
68 StringRef
getPassName() const override
{ return "ARM Instruction Selection"; }
70 void PreprocessISelDAG() override
;
72 /// getI32Imm - Return a target constant of type i32 with the specified
74 inline SDValue
getI32Imm(unsigned Imm
, const SDLoc
&dl
) {
75 return CurDAG
->getTargetConstant(Imm
, dl
, MVT::i32
);
78 void Select(SDNode
*N
) override
;
80 bool hasNoVMLxHazardUse(SDNode
*N
) const;
81 bool isShifterOpProfitable(const SDValue
&Shift
,
82 ARM_AM::ShiftOpc ShOpcVal
, unsigned ShAmt
);
83 bool SelectRegShifterOperand(SDValue N
, SDValue
&A
,
84 SDValue
&B
, SDValue
&C
,
85 bool CheckProfitability
= true);
86 bool SelectImmShifterOperand(SDValue N
, SDValue
&A
,
87 SDValue
&B
, bool CheckProfitability
= true);
88 bool SelectShiftRegShifterOperand(SDValue N
, SDValue
&A
,
89 SDValue
&B
, SDValue
&C
) {
90 // Don't apply the profitability check
91 return SelectRegShifterOperand(N
, A
, B
, C
, false);
93 bool SelectShiftImmShifterOperand(SDValue N
, SDValue
&A
,
95 // Don't apply the profitability check
96 return SelectImmShifterOperand(N
, A
, B
, false);
99 bool SelectAddLikeOr(SDNode
*Parent
, SDValue N
, SDValue
&Out
);
101 bool SelectAddrModeImm12(SDValue N
, SDValue
&Base
, SDValue
&OffImm
);
102 bool SelectLdStSOReg(SDValue N
, SDValue
&Base
, SDValue
&Offset
, SDValue
&Opc
);
104 bool SelectCMOVPred(SDValue N
, SDValue
&Pred
, SDValue
&Reg
) {
105 const ConstantSDNode
*CN
= cast
<ConstantSDNode
>(N
);
106 Pred
= CurDAG
->getTargetConstant(CN
->getZExtValue(), SDLoc(N
), MVT::i32
);
107 Reg
= CurDAG
->getRegister(ARM::CPSR
, MVT::i32
);
111 bool SelectAddrMode2OffsetReg(SDNode
*Op
, SDValue N
,
112 SDValue
&Offset
, SDValue
&Opc
);
113 bool SelectAddrMode2OffsetImm(SDNode
*Op
, SDValue N
,
114 SDValue
&Offset
, SDValue
&Opc
);
115 bool SelectAddrMode2OffsetImmPre(SDNode
*Op
, SDValue N
,
116 SDValue
&Offset
, SDValue
&Opc
);
117 bool SelectAddrOffsetNone(SDValue N
, SDValue
&Base
);
118 bool SelectAddrMode3(SDValue N
, SDValue
&Base
,
119 SDValue
&Offset
, SDValue
&Opc
);
120 bool SelectAddrMode3Offset(SDNode
*Op
, SDValue N
,
121 SDValue
&Offset
, SDValue
&Opc
);
122 bool IsAddressingMode5(SDValue N
, SDValue
&Base
, SDValue
&Offset
, bool FP16
);
123 bool SelectAddrMode5(SDValue N
, SDValue
&Base
, SDValue
&Offset
);
124 bool SelectAddrMode5FP16(SDValue N
, SDValue
&Base
, SDValue
&Offset
);
125 bool SelectAddrMode6(SDNode
*Parent
, SDValue N
, SDValue
&Addr
,SDValue
&Align
);
126 bool SelectAddrMode6Offset(SDNode
*Op
, SDValue N
, SDValue
&Offset
);
128 bool SelectAddrModePC(SDValue N
, SDValue
&Offset
, SDValue
&Label
);
130 // Thumb Addressing Modes:
131 bool SelectThumbAddrModeRR(SDValue N
, SDValue
&Base
, SDValue
&Offset
);
132 bool SelectThumbAddrModeRRSext(SDValue N
, SDValue
&Base
, SDValue
&Offset
);
133 bool SelectThumbAddrModeImm5S(SDValue N
, unsigned Scale
, SDValue
&Base
,
135 bool SelectThumbAddrModeImm5S1(SDValue N
, SDValue
&Base
,
137 bool SelectThumbAddrModeImm5S2(SDValue N
, SDValue
&Base
,
139 bool SelectThumbAddrModeImm5S4(SDValue N
, SDValue
&Base
,
141 bool SelectThumbAddrModeSP(SDValue N
, SDValue
&Base
, SDValue
&OffImm
);
143 // Thumb 2 Addressing Modes:
144 bool SelectT2AddrModeImm12(SDValue N
, SDValue
&Base
, SDValue
&OffImm
);
145 bool SelectT2AddrModeImm8(SDValue N
, SDValue
&Base
,
147 bool SelectT2AddrModeImm8Offset(SDNode
*Op
, SDValue N
,
149 template <unsigned Shift
>
150 bool SelectT2AddrModeImm7Offset(SDNode
*Op
, SDValue N
, SDValue
&OffImm
);
151 bool SelectT2AddrModeImm7Offset(SDNode
*Op
, SDValue N
, SDValue
&OffImm
,
153 template <unsigned Shift
>
154 bool SelectT2AddrModeImm7(SDValue N
, SDValue
&Base
, SDValue
&OffImm
);
155 bool SelectT2AddrModeSoReg(SDValue N
, SDValue
&Base
,
156 SDValue
&OffReg
, SDValue
&ShImm
);
157 bool SelectT2AddrModeExclusive(SDValue N
, SDValue
&Base
, SDValue
&OffImm
);
159 inline bool is_so_imm(unsigned Imm
) const {
160 return ARM_AM::getSOImmVal(Imm
) != -1;
163 inline bool is_so_imm_not(unsigned Imm
) const {
164 return ARM_AM::getSOImmVal(~Imm
) != -1;
167 inline bool is_t2_so_imm(unsigned Imm
) const {
168 return ARM_AM::getT2SOImmVal(Imm
) != -1;
171 inline bool is_t2_so_imm_not(unsigned Imm
) const {
172 return ARM_AM::getT2SOImmVal(~Imm
) != -1;
175 // Include the pieces autogenerated from the target description.
176 #include "ARMGenDAGISel.inc"
179 void transferMemOperands(SDNode
*Src
, SDNode
*Dst
);
181 /// Indexed (pre/post inc/dec) load matching code for ARM.
182 bool tryARMIndexedLoad(SDNode
*N
);
183 bool tryT1IndexedLoad(SDNode
*N
);
184 bool tryT2IndexedLoad(SDNode
*N
);
185 bool tryMVEIndexedLoad(SDNode
*N
);
187 /// SelectVLD - Select NEON load intrinsics. NumVecs should be
188 /// 1, 2, 3 or 4. The opcode arrays specify the instructions used for
189 /// loads of D registers and even subregs and odd subregs of Q registers.
190 /// For NumVecs <= 2, QOpcodes1 is not used.
191 void SelectVLD(SDNode
*N
, bool isUpdating
, unsigned NumVecs
,
192 const uint16_t *DOpcodes
, const uint16_t *QOpcodes0
,
193 const uint16_t *QOpcodes1
);
195 /// SelectVST - Select NEON store intrinsics. NumVecs should
196 /// be 1, 2, 3 or 4. The opcode arrays specify the instructions used for
197 /// stores of D registers and even subregs and odd subregs of Q registers.
198 /// For NumVecs <= 2, QOpcodes1 is not used.
199 void SelectVST(SDNode
*N
, bool isUpdating
, unsigned NumVecs
,
200 const uint16_t *DOpcodes
, const uint16_t *QOpcodes0
,
201 const uint16_t *QOpcodes1
);
203 /// SelectVLDSTLane - Select NEON load/store lane intrinsics. NumVecs should
204 /// be 2, 3 or 4. The opcode arrays specify the instructions used for
205 /// load/store of D registers and Q registers.
206 void SelectVLDSTLane(SDNode
*N
, bool IsLoad
, bool isUpdating
,
207 unsigned NumVecs
, const uint16_t *DOpcodes
,
208 const uint16_t *QOpcodes
);
210 /// SelectVLDDup - Select NEON load-duplicate intrinsics. NumVecs
211 /// should be 1, 2, 3 or 4. The opcode array specifies the instructions used
212 /// for loading D registers.
213 void SelectVLDDup(SDNode
*N
, bool IsIntrinsic
, bool isUpdating
,
214 unsigned NumVecs
, const uint16_t *DOpcodes
,
215 const uint16_t *QOpcodes0
= nullptr,
216 const uint16_t *QOpcodes1
= nullptr);
218 /// Try to select SBFX/UBFX instructions for ARM.
219 bool tryV6T2BitfieldExtractOp(SDNode
*N
, bool isSigned
);
221 // Select special operations if node forms integer ABS pattern
222 bool tryABSOp(SDNode
*N
);
224 bool tryReadRegister(SDNode
*N
);
225 bool tryWriteRegister(SDNode
*N
);
227 bool tryInlineAsm(SDNode
*N
);
229 void SelectCMPZ(SDNode
*N
, bool &SwitchEQNEToPLMI
);
231 void SelectCMP_SWAP(SDNode
*N
);
233 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
234 /// inline asm expressions.
235 bool SelectInlineAsmMemoryOperand(const SDValue
&Op
, unsigned ConstraintID
,
236 std::vector
<SDValue
> &OutOps
) override
;
238 // Form pairs of consecutive R, S, D, or Q registers.
239 SDNode
*createGPRPairNode(EVT VT
, SDValue V0
, SDValue V1
);
240 SDNode
*createSRegPairNode(EVT VT
, SDValue V0
, SDValue V1
);
241 SDNode
*createDRegPairNode(EVT VT
, SDValue V0
, SDValue V1
);
242 SDNode
*createQRegPairNode(EVT VT
, SDValue V0
, SDValue V1
);
244 // Form sequences of 4 consecutive S, D, or Q registers.
245 SDNode
*createQuadSRegsNode(EVT VT
, SDValue V0
, SDValue V1
, SDValue V2
, SDValue V3
);
246 SDNode
*createQuadDRegsNode(EVT VT
, SDValue V0
, SDValue V1
, SDValue V2
, SDValue V3
);
247 SDNode
*createQuadQRegsNode(EVT VT
, SDValue V0
, SDValue V1
, SDValue V2
, SDValue V3
);
249 // Get the alignment operand for a NEON VLD or VST instruction.
250 SDValue
GetVLDSTAlign(SDValue Align
, const SDLoc
&dl
, unsigned NumVecs
,
253 /// Returns the number of instructions required to materialize the given
254 /// constant in a register, or 3 if a literal pool load is needed.
255 unsigned ConstantMaterializationCost(unsigned Val
) const;
257 /// Checks if N is a multiplication by a constant where we can extract out a
258 /// power of two from the constant so that it can be used in a shift, but only
259 /// if it simplifies the materialization of the constant. Returns true if it
260 /// is, and assigns to PowerOfTwo the power of two that should be extracted
261 /// out and to NewMulConst the new constant to be multiplied by.
262 bool canExtractShiftFromMul(const SDValue
&N
, unsigned MaxShift
,
263 unsigned &PowerOfTwo
, SDValue
&NewMulConst
) const;
265 /// Replace N with M in CurDAG, in a way that also ensures that M gets
266 /// selected when N would have been selected.
267 void replaceDAGValue(const SDValue
&N
, SDValue M
);
271 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant
272 /// operand. If so Imm will receive the 32-bit value.
273 static bool isInt32Immediate(SDNode
*N
, unsigned &Imm
) {
274 if (N
->getOpcode() == ISD::Constant
&& N
->getValueType(0) == MVT::i32
) {
275 Imm
= cast
<ConstantSDNode
>(N
)->getZExtValue();
281 // isInt32Immediate - This method tests to see if a constant operand.
282 // If so Imm will receive the 32 bit value.
283 static bool isInt32Immediate(SDValue N
, unsigned &Imm
) {
284 return isInt32Immediate(N
.getNode(), Imm
);
287 // isOpcWithIntImmediate - This method tests to see if the node is a specific
288 // opcode and that it has a immediate integer right operand.
289 // If so Imm will receive the 32 bit value.
290 static bool isOpcWithIntImmediate(SDNode
*N
, unsigned Opc
, unsigned& Imm
) {
291 return N
->getOpcode() == Opc
&&
292 isInt32Immediate(N
->getOperand(1).getNode(), Imm
);
295 /// Check whether a particular node is a constant value representable as
296 /// (N * Scale) where (N in [\p RangeMin, \p RangeMax).
298 /// \param ScaledConstant [out] - On success, the pre-scaled constant value.
299 static bool isScaledConstantInRange(SDValue Node
, int Scale
,
300 int RangeMin
, int RangeMax
,
301 int &ScaledConstant
) {
302 assert(Scale
> 0 && "Invalid scale!");
304 // Check that this is a constant.
305 const ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(Node
);
309 ScaledConstant
= (int) C
->getZExtValue();
310 if ((ScaledConstant
% Scale
) != 0)
313 ScaledConstant
/= Scale
;
314 return ScaledConstant
>= RangeMin
&& ScaledConstant
< RangeMax
;
317 void ARMDAGToDAGISel::PreprocessISelDAG() {
318 if (!Subtarget
->hasV6T2Ops())
321 bool isThumb2
= Subtarget
->isThumb();
322 for (SelectionDAG::allnodes_iterator I
= CurDAG
->allnodes_begin(),
323 E
= CurDAG
->allnodes_end(); I
!= E
; ) {
324 SDNode
*N
= &*I
++; // Preincrement iterator to avoid invalidation issues.
326 if (N
->getOpcode() != ISD::ADD
)
329 // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with
330 // leading zeros, followed by consecutive set bits, followed by 1 or 2
331 // trailing zeros, e.g. 1020.
332 // Transform the expression to
333 // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number
334 // of trailing zeros of c2. The left shift would be folded as an shifter
335 // operand of 'add' and the 'and' and 'srl' would become a bits extraction
338 SDValue N0
= N
->getOperand(0);
339 SDValue N1
= N
->getOperand(1);
340 unsigned And_imm
= 0;
341 if (!isOpcWithIntImmediate(N1
.getNode(), ISD::AND
, And_imm
)) {
342 if (isOpcWithIntImmediate(N0
.getNode(), ISD::AND
, And_imm
))
348 // Check if the AND mask is an immediate of the form: 000.....1111111100
349 unsigned TZ
= countTrailingZeros(And_imm
);
350 if (TZ
!= 1 && TZ
!= 2)
351 // Be conservative here. Shifter operands aren't always free. e.g. On
352 // Swift, left shifter operand of 1 / 2 for free but others are not.
354 // ubfx r3, r1, #16, #8
355 // ldr.w r3, [r0, r3, lsl #2]
358 // and.w r2, r9, r1, lsr #14
362 if (And_imm
& (And_imm
+ 1))
365 // Look for (and (srl X, c1), c2).
366 SDValue Srl
= N1
.getOperand(0);
367 unsigned Srl_imm
= 0;
368 if (!isOpcWithIntImmediate(Srl
.getNode(), ISD::SRL
, Srl_imm
) ||
372 // Make sure first operand is not a shifter operand which would prevent
373 // folding of the left shift.
378 if (SelectImmShifterOperand(N0
, CPTmp0
, CPTmp1
))
381 if (SelectImmShifterOperand(N0
, CPTmp0
, CPTmp1
) ||
382 SelectRegShifterOperand(N0
, CPTmp0
, CPTmp1
, CPTmp2
))
386 // Now make the transformation.
387 Srl
= CurDAG
->getNode(ISD::SRL
, SDLoc(Srl
), MVT::i32
,
389 CurDAG
->getConstant(Srl_imm
+ TZ
, SDLoc(Srl
),
391 N1
= CurDAG
->getNode(ISD::AND
, SDLoc(N1
), MVT::i32
,
393 CurDAG
->getConstant(And_imm
, SDLoc(Srl
), MVT::i32
));
394 N1
= CurDAG
->getNode(ISD::SHL
, SDLoc(N1
), MVT::i32
,
395 N1
, CurDAG
->getConstant(TZ
, SDLoc(Srl
), MVT::i32
));
396 CurDAG
->UpdateNodeOperands(N
, N0
, N1
);
400 /// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS
401 /// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at
402 /// least on current ARM implementations) which should be avoidded.
403 bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode
*N
) const {
404 if (OptLevel
== CodeGenOpt::None
)
407 if (!Subtarget
->hasVMLxHazards())
413 SDNode
*Use
= *N
->use_begin();
414 if (Use
->getOpcode() == ISD::CopyToReg
)
416 if (Use
->isMachineOpcode()) {
417 const ARMBaseInstrInfo
*TII
= static_cast<const ARMBaseInstrInfo
*>(
418 CurDAG
->getSubtarget().getInstrInfo());
420 const MCInstrDesc
&MCID
= TII
->get(Use
->getMachineOpcode());
423 unsigned Opcode
= MCID
.getOpcode();
424 if (Opcode
== ARM::VMOVRS
|| Opcode
== ARM::VMOVRRD
)
426 // vmlx feeding into another vmlx. We actually want to unfold
427 // the use later in the MLxExpansion pass. e.g.
429 // vmla (stall 8 cycles)
434 // This adds up to about 18 - 19 cycles.
437 // vmul (stall 4 cycles)
438 // vadd adds up to about 14 cycles.
439 return TII
->isFpMLxInstruction(Opcode
);
445 bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue
&Shift
,
446 ARM_AM::ShiftOpc ShOpcVal
,
448 if (!Subtarget
->isLikeA9() && !Subtarget
->isSwift())
450 if (Shift
.hasOneUse())
453 return ShOpcVal
== ARM_AM::lsl
&&
454 (ShAmt
== 2 || (Subtarget
->isSwift() && ShAmt
== 1));
457 unsigned ARMDAGToDAGISel::ConstantMaterializationCost(unsigned Val
) const {
458 if (Subtarget
->isThumb()) {
459 if (Val
<= 255) return 1; // MOV
460 if (Subtarget
->hasV6T2Ops() &&
461 (Val
<= 0xffff || // MOV
462 ARM_AM::getT2SOImmVal(Val
) != -1 || // MOVW
463 ARM_AM::getT2SOImmVal(~Val
) != -1)) // MVN
465 if (Val
<= 510) return 2; // MOV + ADDi8
466 if (~Val
<= 255) return 2; // MOV + MVN
467 if (ARM_AM::isThumbImmShiftedVal(Val
)) return 2; // MOV + LSL
469 if (ARM_AM::getSOImmVal(Val
) != -1) return 1; // MOV
470 if (ARM_AM::getSOImmVal(~Val
) != -1) return 1; // MVN
471 if (Subtarget
->hasV6T2Ops() && Val
<= 0xffff) return 1; // MOVW
472 if (ARM_AM::isSOImmTwoPartVal(Val
)) return 2; // two instrs
474 if (Subtarget
->useMovt()) return 2; // MOVW + MOVT
475 return 3; // Literal pool load
478 bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue
&N
,
480 unsigned &PowerOfTwo
,
481 SDValue
&NewMulConst
) const {
482 assert(N
.getOpcode() == ISD::MUL
);
483 assert(MaxShift
> 0);
485 // If the multiply is used in more than one place then changing the constant
486 // will make other uses incorrect, so don't.
487 if (!N
.hasOneUse()) return false;
488 // Check if the multiply is by a constant
489 ConstantSDNode
*MulConst
= dyn_cast
<ConstantSDNode
>(N
.getOperand(1));
490 if (!MulConst
) return false;
491 // If the constant is used in more than one place then modifying it will mean
492 // we need to materialize two constants instead of one, which is a bad idea.
493 if (!MulConst
->hasOneUse()) return false;
494 unsigned MulConstVal
= MulConst
->getZExtValue();
495 if (MulConstVal
== 0) return false;
497 // Find the largest power of 2 that MulConstVal is a multiple of
498 PowerOfTwo
= MaxShift
;
499 while ((MulConstVal
% (1 << PowerOfTwo
)) != 0) {
501 if (PowerOfTwo
== 0) return false;
504 // Only optimise if the new cost is better
505 unsigned NewMulConstVal
= MulConstVal
/ (1 << PowerOfTwo
);
506 NewMulConst
= CurDAG
->getConstant(NewMulConstVal
, SDLoc(N
), MVT::i32
);
507 unsigned OldCost
= ConstantMaterializationCost(MulConstVal
);
508 unsigned NewCost
= ConstantMaterializationCost(NewMulConstVal
);
509 return NewCost
< OldCost
;
512 void ARMDAGToDAGISel::replaceDAGValue(const SDValue
&N
, SDValue M
) {
513 CurDAG
->RepositionNode(N
.getNode()->getIterator(), M
.getNode());
517 bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N
,
520 bool CheckProfitability
) {
521 if (DisableShifterOp
)
524 // If N is a multiply-by-constant and it's profitable to extract a shift and
525 // use it in a shifted operand do so.
526 if (N
.getOpcode() == ISD::MUL
) {
527 unsigned PowerOfTwo
= 0;
529 if (canExtractShiftFromMul(N
, 31, PowerOfTwo
, NewMulConst
)) {
530 HandleSDNode
Handle(N
);
532 replaceDAGValue(N
.getOperand(1), NewMulConst
);
533 BaseReg
= Handle
.getValue();
534 Opc
= CurDAG
->getTargetConstant(
535 ARM_AM::getSORegOpc(ARM_AM::lsl
, PowerOfTwo
), Loc
, MVT::i32
);
540 ARM_AM::ShiftOpc ShOpcVal
= ARM_AM::getShiftOpcForNode(N
.getOpcode());
542 // Don't match base register only case. That is matched to a separate
543 // lower complexity pattern with explicit register operand.
544 if (ShOpcVal
== ARM_AM::no_shift
) return false;
546 BaseReg
= N
.getOperand(0);
547 unsigned ShImmVal
= 0;
548 ConstantSDNode
*RHS
= dyn_cast
<ConstantSDNode
>(N
.getOperand(1));
549 if (!RHS
) return false;
550 ShImmVal
= RHS
->getZExtValue() & 31;
551 Opc
= CurDAG
->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal
, ShImmVal
),
556 bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N
,
560 bool CheckProfitability
) {
561 if (DisableShifterOp
)
564 ARM_AM::ShiftOpc ShOpcVal
= ARM_AM::getShiftOpcForNode(N
.getOpcode());
566 // Don't match base register only case. That is matched to a separate
567 // lower complexity pattern with explicit register operand.
568 if (ShOpcVal
== ARM_AM::no_shift
) return false;
570 BaseReg
= N
.getOperand(0);
571 unsigned ShImmVal
= 0;
572 ConstantSDNode
*RHS
= dyn_cast
<ConstantSDNode
>(N
.getOperand(1));
573 if (RHS
) return false;
575 ShReg
= N
.getOperand(1);
576 if (CheckProfitability
&& !isShifterOpProfitable(N
, ShOpcVal
, ShImmVal
))
578 Opc
= CurDAG
->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal
, ShImmVal
),
583 // Determine whether an ISD::OR's operands are suitable to turn the operation
584 // into an addition, which often has more compact encodings.
585 bool ARMDAGToDAGISel::SelectAddLikeOr(SDNode
*Parent
, SDValue N
, SDValue
&Out
) {
586 assert(Parent
->getOpcode() == ISD::OR
&& "unexpected parent");
588 return CurDAG
->haveNoCommonBitsSet(N
, Parent
->getOperand(1));
592 bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N
,
595 // Match simple R + imm12 operands.
598 if (N
.getOpcode() != ISD::ADD
&& N
.getOpcode() != ISD::SUB
&&
599 !CurDAG
->isBaseWithConstantOffset(N
)) {
600 if (N
.getOpcode() == ISD::FrameIndex
) {
601 // Match frame index.
602 int FI
= cast
<FrameIndexSDNode
>(N
)->getIndex();
603 Base
= CurDAG
->getTargetFrameIndex(
604 FI
, TLI
->getPointerTy(CurDAG
->getDataLayout()));
605 OffImm
= CurDAG
->getTargetConstant(0, SDLoc(N
), MVT::i32
);
609 if (N
.getOpcode() == ARMISD::Wrapper
&&
610 N
.getOperand(0).getOpcode() != ISD::TargetGlobalAddress
&&
611 N
.getOperand(0).getOpcode() != ISD::TargetExternalSymbol
&&
612 N
.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress
) {
613 Base
= N
.getOperand(0);
616 OffImm
= CurDAG
->getTargetConstant(0, SDLoc(N
), MVT::i32
);
620 if (ConstantSDNode
*RHS
= dyn_cast
<ConstantSDNode
>(N
.getOperand(1))) {
621 int RHSC
= (int)RHS
->getSExtValue();
622 if (N
.getOpcode() == ISD::SUB
)
625 if (RHSC
> -0x1000 && RHSC
< 0x1000) { // 12 bits
626 Base
= N
.getOperand(0);
627 if (Base
.getOpcode() == ISD::FrameIndex
) {
628 int FI
= cast
<FrameIndexSDNode
>(Base
)->getIndex();
629 Base
= CurDAG
->getTargetFrameIndex(
630 FI
, TLI
->getPointerTy(CurDAG
->getDataLayout()));
632 OffImm
= CurDAG
->getTargetConstant(RHSC
, SDLoc(N
), MVT::i32
);
639 OffImm
= CurDAG
->getTargetConstant(0, SDLoc(N
), MVT::i32
);
645 bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N
, SDValue
&Base
, SDValue
&Offset
,
647 if (N
.getOpcode() == ISD::MUL
&&
648 ((!Subtarget
->isLikeA9() && !Subtarget
->isSwift()) || N
.hasOneUse())) {
649 if (ConstantSDNode
*RHS
= dyn_cast
<ConstantSDNode
>(N
.getOperand(1))) {
650 // X * [3,5,9] -> X + X * [2,4,8] etc.
651 int RHSC
= (int)RHS
->getZExtValue();
654 ARM_AM::AddrOpc AddSub
= ARM_AM::add
;
656 AddSub
= ARM_AM::sub
;
659 if (isPowerOf2_32(RHSC
)) {
660 unsigned ShAmt
= Log2_32(RHSC
);
661 Base
= Offset
= N
.getOperand(0);
662 Opc
= CurDAG
->getTargetConstant(ARM_AM::getAM2Opc(AddSub
, ShAmt
,
671 if (N
.getOpcode() != ISD::ADD
&& N
.getOpcode() != ISD::SUB
&&
672 // ISD::OR that is equivalent to an ISD::ADD.
673 !CurDAG
->isBaseWithConstantOffset(N
))
676 // Leave simple R +/- imm12 operands for LDRi12
677 if (N
.getOpcode() == ISD::ADD
|| N
.getOpcode() == ISD::OR
) {
679 if (isScaledConstantInRange(N
.getOperand(1), /*Scale=*/1,
680 -0x1000+1, 0x1000, RHSC
)) // 12 bits.
684 // Otherwise this is R +/- [possibly shifted] R.
685 ARM_AM::AddrOpc AddSub
= N
.getOpcode() == ISD::SUB
? ARM_AM::sub
:ARM_AM::add
;
686 ARM_AM::ShiftOpc ShOpcVal
=
687 ARM_AM::getShiftOpcForNode(N
.getOperand(1).getOpcode());
690 Base
= N
.getOperand(0);
691 Offset
= N
.getOperand(1);
693 if (ShOpcVal
!= ARM_AM::no_shift
) {
694 // Check to see if the RHS of the shift is a constant, if not, we can't fold
696 if (ConstantSDNode
*Sh
=
697 dyn_cast
<ConstantSDNode
>(N
.getOperand(1).getOperand(1))) {
698 ShAmt
= Sh
->getZExtValue();
699 if (isShifterOpProfitable(Offset
, ShOpcVal
, ShAmt
))
700 Offset
= N
.getOperand(1).getOperand(0);
703 ShOpcVal
= ARM_AM::no_shift
;
706 ShOpcVal
= ARM_AM::no_shift
;
710 // Try matching (R shl C) + (R).
711 if (N
.getOpcode() != ISD::SUB
&& ShOpcVal
== ARM_AM::no_shift
&&
712 !(Subtarget
->isLikeA9() || Subtarget
->isSwift() ||
713 N
.getOperand(0).hasOneUse())) {
714 ShOpcVal
= ARM_AM::getShiftOpcForNode(N
.getOperand(0).getOpcode());
715 if (ShOpcVal
!= ARM_AM::no_shift
) {
716 // Check to see if the RHS of the shift is a constant, if not, we can't
718 if (ConstantSDNode
*Sh
=
719 dyn_cast
<ConstantSDNode
>(N
.getOperand(0).getOperand(1))) {
720 ShAmt
= Sh
->getZExtValue();
721 if (isShifterOpProfitable(N
.getOperand(0), ShOpcVal
, ShAmt
)) {
722 Offset
= N
.getOperand(0).getOperand(0);
723 Base
= N
.getOperand(1);
726 ShOpcVal
= ARM_AM::no_shift
;
729 ShOpcVal
= ARM_AM::no_shift
;
734 // If Offset is a multiply-by-constant and it's profitable to extract a shift
735 // and use it in a shifted operand do so.
736 if (Offset
.getOpcode() == ISD::MUL
&& N
.hasOneUse()) {
737 unsigned PowerOfTwo
= 0;
739 if (canExtractShiftFromMul(Offset
, 31, PowerOfTwo
, NewMulConst
)) {
740 HandleSDNode
Handle(Offset
);
741 replaceDAGValue(Offset
.getOperand(1), NewMulConst
);
742 Offset
= Handle
.getValue();
744 ShOpcVal
= ARM_AM::lsl
;
748 Opc
= CurDAG
->getTargetConstant(ARM_AM::getAM2Opc(AddSub
, ShAmt
, ShOpcVal
),
753 bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode
*Op
, SDValue N
,
754 SDValue
&Offset
, SDValue
&Opc
) {
755 unsigned Opcode
= Op
->getOpcode();
756 ISD::MemIndexedMode AM
= (Opcode
== ISD::LOAD
)
757 ? cast
<LoadSDNode
>(Op
)->getAddressingMode()
758 : cast
<StoreSDNode
>(Op
)->getAddressingMode();
759 ARM_AM::AddrOpc AddSub
= (AM
== ISD::PRE_INC
|| AM
== ISD::POST_INC
)
760 ? ARM_AM::add
: ARM_AM::sub
;
762 if (isScaledConstantInRange(N
, /*Scale=*/1, 0, 0x1000, Val
))
766 ARM_AM::ShiftOpc ShOpcVal
= ARM_AM::getShiftOpcForNode(N
.getOpcode());
768 if (ShOpcVal
!= ARM_AM::no_shift
) {
769 // Check to see if the RHS of the shift is a constant, if not, we can't fold
771 if (ConstantSDNode
*Sh
= dyn_cast
<ConstantSDNode
>(N
.getOperand(1))) {
772 ShAmt
= Sh
->getZExtValue();
773 if (isShifterOpProfitable(N
, ShOpcVal
, ShAmt
))
774 Offset
= N
.getOperand(0);
777 ShOpcVal
= ARM_AM::no_shift
;
780 ShOpcVal
= ARM_AM::no_shift
;
784 Opc
= CurDAG
->getTargetConstant(ARM_AM::getAM2Opc(AddSub
, ShAmt
, ShOpcVal
),
789 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode
*Op
, SDValue N
,
790 SDValue
&Offset
, SDValue
&Opc
) {
791 unsigned Opcode
= Op
->getOpcode();
792 ISD::MemIndexedMode AM
= (Opcode
== ISD::LOAD
)
793 ? cast
<LoadSDNode
>(Op
)->getAddressingMode()
794 : cast
<StoreSDNode
>(Op
)->getAddressingMode();
795 ARM_AM::AddrOpc AddSub
= (AM
== ISD::PRE_INC
|| AM
== ISD::POST_INC
)
796 ? ARM_AM::add
: ARM_AM::sub
;
798 if (isScaledConstantInRange(N
, /*Scale=*/1, 0, 0x1000, Val
)) { // 12 bits.
799 if (AddSub
== ARM_AM::sub
) Val
*= -1;
800 Offset
= CurDAG
->getRegister(0, MVT::i32
);
801 Opc
= CurDAG
->getTargetConstant(Val
, SDLoc(Op
), MVT::i32
);
809 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode
*Op
, SDValue N
,
810 SDValue
&Offset
, SDValue
&Opc
) {
811 unsigned Opcode
= Op
->getOpcode();
812 ISD::MemIndexedMode AM
= (Opcode
== ISD::LOAD
)
813 ? cast
<LoadSDNode
>(Op
)->getAddressingMode()
814 : cast
<StoreSDNode
>(Op
)->getAddressingMode();
815 ARM_AM::AddrOpc AddSub
= (AM
== ISD::PRE_INC
|| AM
== ISD::POST_INC
)
816 ? ARM_AM::add
: ARM_AM::sub
;
818 if (isScaledConstantInRange(N
, /*Scale=*/1, 0, 0x1000, Val
)) { // 12 bits.
819 Offset
= CurDAG
->getRegister(0, MVT::i32
);
820 Opc
= CurDAG
->getTargetConstant(ARM_AM::getAM2Opc(AddSub
, Val
,
822 SDLoc(Op
), MVT::i32
);
829 bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N
, SDValue
&Base
) {
834 bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N
,
835 SDValue
&Base
, SDValue
&Offset
,
837 if (N
.getOpcode() == ISD::SUB
) {
838 // X - C is canonicalize to X + -C, no need to handle it here.
839 Base
= N
.getOperand(0);
840 Offset
= N
.getOperand(1);
841 Opc
= CurDAG
->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub
, 0), SDLoc(N
),
846 if (!CurDAG
->isBaseWithConstantOffset(N
)) {
848 if (N
.getOpcode() == ISD::FrameIndex
) {
849 int FI
= cast
<FrameIndexSDNode
>(N
)->getIndex();
850 Base
= CurDAG
->getTargetFrameIndex(
851 FI
, TLI
->getPointerTy(CurDAG
->getDataLayout()));
853 Offset
= CurDAG
->getRegister(0, MVT::i32
);
854 Opc
= CurDAG
->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add
, 0), SDLoc(N
),
859 // If the RHS is +/- imm8, fold into addr mode.
861 if (isScaledConstantInRange(N
.getOperand(1), /*Scale=*/1,
862 -256 + 1, 256, RHSC
)) { // 8 bits.
863 Base
= N
.getOperand(0);
864 if (Base
.getOpcode() == ISD::FrameIndex
) {
865 int FI
= cast
<FrameIndexSDNode
>(Base
)->getIndex();
866 Base
= CurDAG
->getTargetFrameIndex(
867 FI
, TLI
->getPointerTy(CurDAG
->getDataLayout()));
869 Offset
= CurDAG
->getRegister(0, MVT::i32
);
871 ARM_AM::AddrOpc AddSub
= ARM_AM::add
;
873 AddSub
= ARM_AM::sub
;
876 Opc
= CurDAG
->getTargetConstant(ARM_AM::getAM3Opc(AddSub
, RHSC
), SDLoc(N
),
881 Base
= N
.getOperand(0);
882 Offset
= N
.getOperand(1);
883 Opc
= CurDAG
->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add
, 0), SDLoc(N
),
888 bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode
*Op
, SDValue N
,
889 SDValue
&Offset
, SDValue
&Opc
) {
890 unsigned Opcode
= Op
->getOpcode();
891 ISD::MemIndexedMode AM
= (Opcode
== ISD::LOAD
)
892 ? cast
<LoadSDNode
>(Op
)->getAddressingMode()
893 : cast
<StoreSDNode
>(Op
)->getAddressingMode();
894 ARM_AM::AddrOpc AddSub
= (AM
== ISD::PRE_INC
|| AM
== ISD::POST_INC
)
895 ? ARM_AM::add
: ARM_AM::sub
;
897 if (isScaledConstantInRange(N
, /*Scale=*/1, 0, 256, Val
)) { // 12 bits.
898 Offset
= CurDAG
->getRegister(0, MVT::i32
);
899 Opc
= CurDAG
->getTargetConstant(ARM_AM::getAM3Opc(AddSub
, Val
), SDLoc(Op
),
905 Opc
= CurDAG
->getTargetConstant(ARM_AM::getAM3Opc(AddSub
, 0), SDLoc(Op
),
910 bool ARMDAGToDAGISel::IsAddressingMode5(SDValue N
, SDValue
&Base
, SDValue
&Offset
,
912 if (!CurDAG
->isBaseWithConstantOffset(N
)) {
914 if (N
.getOpcode() == ISD::FrameIndex
) {
915 int FI
= cast
<FrameIndexSDNode
>(N
)->getIndex();
916 Base
= CurDAG
->getTargetFrameIndex(
917 FI
, TLI
->getPointerTy(CurDAG
->getDataLayout()));
918 } else if (N
.getOpcode() == ARMISD::Wrapper
&&
919 N
.getOperand(0).getOpcode() != ISD::TargetGlobalAddress
&&
920 N
.getOperand(0).getOpcode() != ISD::TargetExternalSymbol
&&
921 N
.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress
) {
922 Base
= N
.getOperand(0);
924 Offset
= CurDAG
->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add
, 0),
929 // If the RHS is +/- imm8, fold into addr mode.
931 const int Scale
= FP16
? 2 : 4;
933 if (isScaledConstantInRange(N
.getOperand(1), Scale
, -255, 256, RHSC
)) {
934 Base
= N
.getOperand(0);
935 if (Base
.getOpcode() == ISD::FrameIndex
) {
936 int FI
= cast
<FrameIndexSDNode
>(Base
)->getIndex();
937 Base
= CurDAG
->getTargetFrameIndex(
938 FI
, TLI
->getPointerTy(CurDAG
->getDataLayout()));
941 ARM_AM::AddrOpc AddSub
= ARM_AM::add
;
943 AddSub
= ARM_AM::sub
;
948 Offset
= CurDAG
->getTargetConstant(ARM_AM::getAM5FP16Opc(AddSub
, RHSC
),
951 Offset
= CurDAG
->getTargetConstant(ARM_AM::getAM5Opc(AddSub
, RHSC
),
960 Offset
= CurDAG
->getTargetConstant(ARM_AM::getAM5FP16Opc(ARM_AM::add
, 0),
963 Offset
= CurDAG
->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add
, 0),
969 bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N
,
970 SDValue
&Base
, SDValue
&Offset
) {
971 return IsAddressingMode5(N
, Base
, Offset
, /*FP16=*/ false);
974 bool ARMDAGToDAGISel::SelectAddrMode5FP16(SDValue N
,
975 SDValue
&Base
, SDValue
&Offset
) {
976 return IsAddressingMode5(N
, Base
, Offset
, /*FP16=*/ true);
979 bool ARMDAGToDAGISel::SelectAddrMode6(SDNode
*Parent
, SDValue N
, SDValue
&Addr
,
983 unsigned Alignment
= 0;
985 MemSDNode
*MemN
= cast
<MemSDNode
>(Parent
);
987 if (isa
<LSBaseSDNode
>(MemN
) ||
988 ((MemN
->getOpcode() == ARMISD::VST1_UPD
||
989 MemN
->getOpcode() == ARMISD::VLD1_UPD
) &&
990 MemN
->getConstantOperandVal(MemN
->getNumOperands() - 1) == 1)) {
991 // This case occurs only for VLD1-lane/dup and VST1-lane instructions.
992 // The maximum alignment is equal to the memory size being referenced.
993 unsigned MMOAlign
= MemN
->getAlignment();
994 unsigned MemSize
= MemN
->getMemoryVT().getSizeInBits() / 8;
995 if (MMOAlign
>= MemSize
&& MemSize
> 1)
998 // All other uses of addrmode6 are for intrinsics. For now just record
999 // the raw alignment value; it will be refined later based on the legal
1000 // alignment operands for the intrinsic.
1001 Alignment
= MemN
->getAlignment();
1004 Align
= CurDAG
->getTargetConstant(Alignment
, SDLoc(N
), MVT::i32
);
1008 bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode
*Op
, SDValue N
,
1010 LSBaseSDNode
*LdSt
= cast
<LSBaseSDNode
>(Op
);
1011 ISD::MemIndexedMode AM
= LdSt
->getAddressingMode();
1012 if (AM
!= ISD::POST_INC
)
1015 if (ConstantSDNode
*NC
= dyn_cast
<ConstantSDNode
>(N
)) {
1016 if (NC
->getZExtValue() * 8 == LdSt
->getMemoryVT().getSizeInBits())
1017 Offset
= CurDAG
->getRegister(0, MVT::i32
);
1022 bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N
,
1023 SDValue
&Offset
, SDValue
&Label
) {
1024 if (N
.getOpcode() == ARMISD::PIC_ADD
&& N
.hasOneUse()) {
1025 Offset
= N
.getOperand(0);
1026 SDValue N1
= N
.getOperand(1);
1027 Label
= CurDAG
->getTargetConstant(cast
<ConstantSDNode
>(N1
)->getZExtValue(),
1028 SDLoc(N
), MVT::i32
);
1036 //===----------------------------------------------------------------------===//
1037 // Thumb Addressing Modes
1038 //===----------------------------------------------------------------------===//
1040 static bool shouldUseZeroOffsetLdSt(SDValue N
) {
1041 // Negative numbers are difficult to materialise in thumb1. If we are
1042 // selecting the add of a negative, instead try to select ri with a zero
1043 // offset, so create the add node directly which will become a sub.
1044 if (N
.getOpcode() != ISD::ADD
)
1047 // Look for an imm which is not legal for ld/st, but is legal for sub.
1048 if (auto C
= dyn_cast
<ConstantSDNode
>(N
.getOperand(1)))
1049 return C
->getSExtValue() < 0 && C
->getSExtValue() >= -255;
1054 bool ARMDAGToDAGISel::SelectThumbAddrModeRRSext(SDValue N
, SDValue
&Base
,
1056 if (N
.getOpcode() != ISD::ADD
&& !CurDAG
->isBaseWithConstantOffset(N
)) {
1057 ConstantSDNode
*NC
= dyn_cast
<ConstantSDNode
>(N
);
1058 if (!NC
|| !NC
->isNullValue())
1065 Base
= N
.getOperand(0);
1066 Offset
= N
.getOperand(1);
1070 bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N
, SDValue
&Base
,
1072 if (shouldUseZeroOffsetLdSt(N
))
1073 return false; // Select ri instead
1074 return SelectThumbAddrModeRRSext(N
, Base
, Offset
);
1078 ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N
, unsigned Scale
,
1079 SDValue
&Base
, SDValue
&OffImm
) {
1080 if (shouldUseZeroOffsetLdSt(N
)) {
1082 OffImm
= CurDAG
->getTargetConstant(0, SDLoc(N
), MVT::i32
);
1086 if (!CurDAG
->isBaseWithConstantOffset(N
)) {
1087 if (N
.getOpcode() == ISD::ADD
) {
1088 return false; // We want to select register offset instead
1089 } else if (N
.getOpcode() == ARMISD::Wrapper
&&
1090 N
.getOperand(0).getOpcode() != ISD::TargetGlobalAddress
&&
1091 N
.getOperand(0).getOpcode() != ISD::TargetExternalSymbol
&&
1092 N
.getOperand(0).getOpcode() != ISD::TargetConstantPool
&&
1093 N
.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress
) {
1094 Base
= N
.getOperand(0);
1099 OffImm
= CurDAG
->getTargetConstant(0, SDLoc(N
), MVT::i32
);
1103 // If the RHS is + imm5 * scale, fold into addr mode.
1105 if (isScaledConstantInRange(N
.getOperand(1), Scale
, 0, 32, RHSC
)) {
1106 Base
= N
.getOperand(0);
1107 OffImm
= CurDAG
->getTargetConstant(RHSC
, SDLoc(N
), MVT::i32
);
1111 // Offset is too large, so use register offset instead.
1116 ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N
, SDValue
&Base
,
1118 return SelectThumbAddrModeImm5S(N
, 4, Base
, OffImm
);
1122 ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N
, SDValue
&Base
,
1124 return SelectThumbAddrModeImm5S(N
, 2, Base
, OffImm
);
1128 ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N
, SDValue
&Base
,
1130 return SelectThumbAddrModeImm5S(N
, 1, Base
, OffImm
);
1133 bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N
,
1134 SDValue
&Base
, SDValue
&OffImm
) {
1135 if (N
.getOpcode() == ISD::FrameIndex
) {
1136 int FI
= cast
<FrameIndexSDNode
>(N
)->getIndex();
1137 // Only multiples of 4 are allowed for the offset, so the frame object
1138 // alignment must be at least 4.
1139 MachineFrameInfo
&MFI
= MF
->getFrameInfo();
1140 if (MFI
.getObjectAlignment(FI
) < 4)
1141 MFI
.setObjectAlignment(FI
, 4);
1142 Base
= CurDAG
->getTargetFrameIndex(
1143 FI
, TLI
->getPointerTy(CurDAG
->getDataLayout()));
1144 OffImm
= CurDAG
->getTargetConstant(0, SDLoc(N
), MVT::i32
);
1148 if (!CurDAG
->isBaseWithConstantOffset(N
))
1151 if (N
.getOperand(0).getOpcode() == ISD::FrameIndex
) {
1152 // If the RHS is + imm8 * scale, fold into addr mode.
1154 if (isScaledConstantInRange(N
.getOperand(1), /*Scale=*/4, 0, 256, RHSC
)) {
1155 Base
= N
.getOperand(0);
1156 int FI
= cast
<FrameIndexSDNode
>(Base
)->getIndex();
1157 // Make sure the offset is inside the object, or we might fail to
1158 // allocate an emergency spill slot. (An out-of-range access is UB, but
1159 // it could show up anyway.)
1160 MachineFrameInfo
&MFI
= MF
->getFrameInfo();
1161 if (RHSC
* 4 < MFI
.getObjectSize(FI
)) {
1162 // For LHS+RHS to result in an offset that's a multiple of 4 the object
1163 // indexed by the LHS must be 4-byte aligned.
1164 if (!MFI
.isFixedObjectIndex(FI
) && MFI
.getObjectAlignment(FI
) < 4)
1165 MFI
.setObjectAlignment(FI
, 4);
1166 if (MFI
.getObjectAlignment(FI
) >= 4) {
1167 Base
= CurDAG
->getTargetFrameIndex(
1168 FI
, TLI
->getPointerTy(CurDAG
->getDataLayout()));
1169 OffImm
= CurDAG
->getTargetConstant(RHSC
, SDLoc(N
), MVT::i32
);
1180 //===----------------------------------------------------------------------===//
1181 // Thumb 2 Addressing Modes
1182 //===----------------------------------------------------------------------===//
1185 bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N
,
1186 SDValue
&Base
, SDValue
&OffImm
) {
1187 // Match simple R + imm12 operands.
1190 if (N
.getOpcode() != ISD::ADD
&& N
.getOpcode() != ISD::SUB
&&
1191 !CurDAG
->isBaseWithConstantOffset(N
)) {
1192 if (N
.getOpcode() == ISD::FrameIndex
) {
1193 // Match frame index.
1194 int FI
= cast
<FrameIndexSDNode
>(N
)->getIndex();
1195 Base
= CurDAG
->getTargetFrameIndex(
1196 FI
, TLI
->getPointerTy(CurDAG
->getDataLayout()));
1197 OffImm
= CurDAG
->getTargetConstant(0, SDLoc(N
), MVT::i32
);
1201 if (N
.getOpcode() == ARMISD::Wrapper
&&
1202 N
.getOperand(0).getOpcode() != ISD::TargetGlobalAddress
&&
1203 N
.getOperand(0).getOpcode() != ISD::TargetExternalSymbol
&&
1204 N
.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress
) {
1205 Base
= N
.getOperand(0);
1206 if (Base
.getOpcode() == ISD::TargetConstantPool
)
1207 return false; // We want to select t2LDRpci instead.
1210 OffImm
= CurDAG
->getTargetConstant(0, SDLoc(N
), MVT::i32
);
1214 if (ConstantSDNode
*RHS
= dyn_cast
<ConstantSDNode
>(N
.getOperand(1))) {
1215 if (SelectT2AddrModeImm8(N
, Base
, OffImm
))
1216 // Let t2LDRi8 handle (R - imm8).
1219 int RHSC
= (int)RHS
->getZExtValue();
1220 if (N
.getOpcode() == ISD::SUB
)
1223 if (RHSC
>= 0 && RHSC
< 0x1000) { // 12 bits (unsigned)
1224 Base
= N
.getOperand(0);
1225 if (Base
.getOpcode() == ISD::FrameIndex
) {
1226 int FI
= cast
<FrameIndexSDNode
>(Base
)->getIndex();
1227 Base
= CurDAG
->getTargetFrameIndex(
1228 FI
, TLI
->getPointerTy(CurDAG
->getDataLayout()));
1230 OffImm
= CurDAG
->getTargetConstant(RHSC
, SDLoc(N
), MVT::i32
);
1237 OffImm
= CurDAG
->getTargetConstant(0, SDLoc(N
), MVT::i32
);
1241 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N
,
1242 SDValue
&Base
, SDValue
&OffImm
) {
1243 // Match simple R - imm8 operands.
1244 if (N
.getOpcode() != ISD::ADD
&& N
.getOpcode() != ISD::SUB
&&
1245 !CurDAG
->isBaseWithConstantOffset(N
))
1248 if (ConstantSDNode
*RHS
= dyn_cast
<ConstantSDNode
>(N
.getOperand(1))) {
1249 int RHSC
= (int)RHS
->getSExtValue();
1250 if (N
.getOpcode() == ISD::SUB
)
1253 if ((RHSC
>= -255) && (RHSC
< 0)) { // 8 bits (always negative)
1254 Base
= N
.getOperand(0);
1255 if (Base
.getOpcode() == ISD::FrameIndex
) {
1256 int FI
= cast
<FrameIndexSDNode
>(Base
)->getIndex();
1257 Base
= CurDAG
->getTargetFrameIndex(
1258 FI
, TLI
->getPointerTy(CurDAG
->getDataLayout()));
1260 OffImm
= CurDAG
->getTargetConstant(RHSC
, SDLoc(N
), MVT::i32
);
1268 bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode
*Op
, SDValue N
,
1270 unsigned Opcode
= Op
->getOpcode();
1271 ISD::MemIndexedMode AM
= (Opcode
== ISD::LOAD
)
1272 ? cast
<LoadSDNode
>(Op
)->getAddressingMode()
1273 : cast
<StoreSDNode
>(Op
)->getAddressingMode();
1275 if (isScaledConstantInRange(N
, /*Scale=*/1, 0, 0x100, RHSC
)) { // 8 bits.
1276 OffImm
= ((AM
== ISD::PRE_INC
) || (AM
== ISD::POST_INC
))
1277 ? CurDAG
->getTargetConstant(RHSC
, SDLoc(N
), MVT::i32
)
1278 : CurDAG
->getTargetConstant(-RHSC
, SDLoc(N
), MVT::i32
);
1285 template<unsigned Shift
>
1286 bool ARMDAGToDAGISel::SelectT2AddrModeImm7(SDValue N
,
1287 SDValue
&Base
, SDValue
&OffImm
) {
1288 if (N
.getOpcode() == ISD::SUB
||
1289 CurDAG
->isBaseWithConstantOffset(N
)) {
1290 if (auto RHS
= dyn_cast
<ConstantSDNode
>(N
.getOperand(1))) {
1291 int RHSC
= (int)RHS
->getZExtValue();
1292 if (N
.getOpcode() == ISD::SUB
)
1295 if (isShiftedInt
<7, Shift
>(RHSC
)) {
1296 Base
= N
.getOperand(0);
1297 if (Base
.getOpcode() == ISD::FrameIndex
) {
1298 int FI
= cast
<FrameIndexSDNode
>(Base
)->getIndex();
1299 Base
= CurDAG
->getTargetFrameIndex(
1300 FI
, TLI
->getPointerTy(CurDAG
->getDataLayout()));
1302 OffImm
= CurDAG
->getTargetConstant(RHSC
, SDLoc(N
), MVT::i32
);
1310 OffImm
= CurDAG
->getTargetConstant(0, SDLoc(N
), MVT::i32
);
1314 template <unsigned Shift
>
1315 bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode
*Op
, SDValue N
,
1317 return SelectT2AddrModeImm7Offset(Op
, N
, OffImm
, Shift
);
1320 bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode
*Op
, SDValue N
,
1323 unsigned Opcode
= Op
->getOpcode();
1324 ISD::MemIndexedMode AM
= (Opcode
== ISD::LOAD
)
1325 ? cast
<LoadSDNode
>(Op
)->getAddressingMode()
1326 : cast
<StoreSDNode
>(Op
)->getAddressingMode();
1328 if (isScaledConstantInRange(N
, 1 << Shift
, 0, 0x80, RHSC
)) { // 7 bits.
1330 ((AM
== ISD::PRE_INC
) || (AM
== ISD::POST_INC
))
1331 ? CurDAG
->getTargetConstant(RHSC
* (1 << Shift
), SDLoc(N
), MVT::i32
)
1332 : CurDAG
->getTargetConstant(-RHSC
* (1 << Shift
), SDLoc(N
),
1339 bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N
,
1341 SDValue
&OffReg
, SDValue
&ShImm
) {
1342 // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12.
1343 if (N
.getOpcode() != ISD::ADD
&& !CurDAG
->isBaseWithConstantOffset(N
))
1346 // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8.
1347 if (ConstantSDNode
*RHS
= dyn_cast
<ConstantSDNode
>(N
.getOperand(1))) {
1348 int RHSC
= (int)RHS
->getZExtValue();
1349 if (RHSC
>= 0 && RHSC
< 0x1000) // 12 bits (unsigned)
1351 else if (RHSC
< 0 && RHSC
>= -255) // 8 bits
1355 // Look for (R + R) or (R + (R << [1,2,3])).
1357 Base
= N
.getOperand(0);
1358 OffReg
= N
.getOperand(1);
1360 // Swap if it is ((R << c) + R).
1361 ARM_AM::ShiftOpc ShOpcVal
= ARM_AM::getShiftOpcForNode(OffReg
.getOpcode());
1362 if (ShOpcVal
!= ARM_AM::lsl
) {
1363 ShOpcVal
= ARM_AM::getShiftOpcForNode(Base
.getOpcode());
1364 if (ShOpcVal
== ARM_AM::lsl
)
1365 std::swap(Base
, OffReg
);
1368 if (ShOpcVal
== ARM_AM::lsl
) {
1369 // Check to see if the RHS of the shift is a constant, if not, we can't fold
1371 if (ConstantSDNode
*Sh
= dyn_cast
<ConstantSDNode
>(OffReg
.getOperand(1))) {
1372 ShAmt
= Sh
->getZExtValue();
1373 if (ShAmt
< 4 && isShifterOpProfitable(OffReg
, ShOpcVal
, ShAmt
))
1374 OffReg
= OffReg
.getOperand(0);
1381 // If OffReg is a multiply-by-constant and it's profitable to extract a shift
1382 // and use it in a shifted operand do so.
1383 if (OffReg
.getOpcode() == ISD::MUL
&& N
.hasOneUse()) {
1384 unsigned PowerOfTwo
= 0;
1385 SDValue NewMulConst
;
1386 if (canExtractShiftFromMul(OffReg
, 3, PowerOfTwo
, NewMulConst
)) {
1387 HandleSDNode
Handle(OffReg
);
1388 replaceDAGValue(OffReg
.getOperand(1), NewMulConst
);
1389 OffReg
= Handle
.getValue();
1394 ShImm
= CurDAG
->getTargetConstant(ShAmt
, SDLoc(N
), MVT::i32
);
1399 bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N
, SDValue
&Base
,
1401 // This *must* succeed since it's used for the irreplaceable ldrex and strex
1404 OffImm
= CurDAG
->getTargetConstant(0, SDLoc(N
), MVT::i32
);
1406 if (N
.getOpcode() != ISD::ADD
|| !CurDAG
->isBaseWithConstantOffset(N
))
1409 ConstantSDNode
*RHS
= dyn_cast
<ConstantSDNode
>(N
.getOperand(1));
1413 uint32_t RHSC
= (int)RHS
->getZExtValue();
1414 if (RHSC
> 1020 || RHSC
% 4 != 0)
1417 Base
= N
.getOperand(0);
1418 if (Base
.getOpcode() == ISD::FrameIndex
) {
1419 int FI
= cast
<FrameIndexSDNode
>(Base
)->getIndex();
1420 Base
= CurDAG
->getTargetFrameIndex(
1421 FI
, TLI
->getPointerTy(CurDAG
->getDataLayout()));
1424 OffImm
= CurDAG
->getTargetConstant(RHSC
/4, SDLoc(N
), MVT::i32
);
1428 //===--------------------------------------------------------------------===//
1430 /// getAL - Returns a ARMCC::AL immediate node.
1431 static inline SDValue
getAL(SelectionDAG
*CurDAG
, const SDLoc
&dl
) {
1432 return CurDAG
->getTargetConstant((uint64_t)ARMCC::AL
, dl
, MVT::i32
);
1435 void ARMDAGToDAGISel::transferMemOperands(SDNode
*N
, SDNode
*Result
) {
1436 MachineMemOperand
*MemOp
= cast
<MemSDNode
>(N
)->getMemOperand();
1437 CurDAG
->setNodeMemRefs(cast
<MachineSDNode
>(Result
), {MemOp
});
1440 bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode
*N
) {
1441 LoadSDNode
*LD
= cast
<LoadSDNode
>(N
);
1442 ISD::MemIndexedMode AM
= LD
->getAddressingMode();
1443 if (AM
== ISD::UNINDEXED
)
1446 EVT LoadedVT
= LD
->getMemoryVT();
1447 SDValue Offset
, AMOpc
;
1448 bool isPre
= (AM
== ISD::PRE_INC
) || (AM
== ISD::PRE_DEC
);
1449 unsigned Opcode
= 0;
1451 if (LoadedVT
== MVT::i32
&& isPre
&&
1452 SelectAddrMode2OffsetImmPre(N
, LD
->getOffset(), Offset
, AMOpc
)) {
1453 Opcode
= ARM::LDR_PRE_IMM
;
1455 } else if (LoadedVT
== MVT::i32
&& !isPre
&&
1456 SelectAddrMode2OffsetImm(N
, LD
->getOffset(), Offset
, AMOpc
)) {
1457 Opcode
= ARM::LDR_POST_IMM
;
1459 } else if (LoadedVT
== MVT::i32
&&
1460 SelectAddrMode2OffsetReg(N
, LD
->getOffset(), Offset
, AMOpc
)) {
1461 Opcode
= isPre
? ARM::LDR_PRE_REG
: ARM::LDR_POST_REG
;
1464 } else if (LoadedVT
== MVT::i16
&&
1465 SelectAddrMode3Offset(N
, LD
->getOffset(), Offset
, AMOpc
)) {
1467 Opcode
= (LD
->getExtensionType() == ISD::SEXTLOAD
)
1468 ? (isPre
? ARM::LDRSH_PRE
: ARM::LDRSH_POST
)
1469 : (isPre
? ARM::LDRH_PRE
: ARM::LDRH_POST
);
1470 } else if (LoadedVT
== MVT::i8
|| LoadedVT
== MVT::i1
) {
1471 if (LD
->getExtensionType() == ISD::SEXTLOAD
) {
1472 if (SelectAddrMode3Offset(N
, LD
->getOffset(), Offset
, AMOpc
)) {
1474 Opcode
= isPre
? ARM::LDRSB_PRE
: ARM::LDRSB_POST
;
1478 SelectAddrMode2OffsetImmPre(N
, LD
->getOffset(), Offset
, AMOpc
)) {
1480 Opcode
= ARM::LDRB_PRE_IMM
;
1481 } else if (!isPre
&&
1482 SelectAddrMode2OffsetImm(N
, LD
->getOffset(), Offset
, AMOpc
)) {
1484 Opcode
= ARM::LDRB_POST_IMM
;
1485 } else if (SelectAddrMode2OffsetReg(N
, LD
->getOffset(), Offset
, AMOpc
)) {
1487 Opcode
= isPre
? ARM::LDRB_PRE_REG
: ARM::LDRB_POST_REG
;
1493 if (Opcode
== ARM::LDR_PRE_IMM
|| Opcode
== ARM::LDRB_PRE_IMM
) {
1494 SDValue Chain
= LD
->getChain();
1495 SDValue Base
= LD
->getBasePtr();
1496 SDValue Ops
[]= { Base
, AMOpc
, getAL(CurDAG
, SDLoc(N
)),
1497 CurDAG
->getRegister(0, MVT::i32
), Chain
};
1498 SDNode
*New
= CurDAG
->getMachineNode(Opcode
, SDLoc(N
), MVT::i32
, MVT::i32
,
1500 transferMemOperands(N
, New
);
1501 ReplaceNode(N
, New
);
1504 SDValue Chain
= LD
->getChain();
1505 SDValue Base
= LD
->getBasePtr();
1506 SDValue Ops
[]= { Base
, Offset
, AMOpc
, getAL(CurDAG
, SDLoc(N
)),
1507 CurDAG
->getRegister(0, MVT::i32
), Chain
};
1508 SDNode
*New
= CurDAG
->getMachineNode(Opcode
, SDLoc(N
), MVT::i32
, MVT::i32
,
1510 transferMemOperands(N
, New
);
1511 ReplaceNode(N
, New
);
1519 bool ARMDAGToDAGISel::tryT1IndexedLoad(SDNode
*N
) {
1520 LoadSDNode
*LD
= cast
<LoadSDNode
>(N
);
1521 EVT LoadedVT
= LD
->getMemoryVT();
1522 ISD::MemIndexedMode AM
= LD
->getAddressingMode();
1523 if (AM
!= ISD::POST_INC
|| LD
->getExtensionType() != ISD::NON_EXTLOAD
||
1524 LoadedVT
.getSimpleVT().SimpleTy
!= MVT::i32
)
1527 auto *COffs
= dyn_cast
<ConstantSDNode
>(LD
->getOffset());
1528 if (!COffs
|| COffs
->getZExtValue() != 4)
1531 // A T1 post-indexed load is just a single register LDM: LDM r0!, {r1}.
1532 // The encoding of LDM is not how the rest of ISel expects a post-inc load to
1533 // look however, so we use a pseudo here and switch it for a tLDMIA_UPD after
1535 SDValue Chain
= LD
->getChain();
1536 SDValue Base
= LD
->getBasePtr();
1537 SDValue Ops
[]= { Base
, getAL(CurDAG
, SDLoc(N
)),
1538 CurDAG
->getRegister(0, MVT::i32
), Chain
};
1539 SDNode
*New
= CurDAG
->getMachineNode(ARM::tLDR_postidx
, SDLoc(N
), MVT::i32
,
1540 MVT::i32
, MVT::Other
, Ops
);
1541 transferMemOperands(N
, New
);
1542 ReplaceNode(N
, New
);
1546 bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode
*N
) {
1547 LoadSDNode
*LD
= cast
<LoadSDNode
>(N
);
1548 ISD::MemIndexedMode AM
= LD
->getAddressingMode();
1549 if (AM
== ISD::UNINDEXED
)
1552 EVT LoadedVT
= LD
->getMemoryVT();
1553 bool isSExtLd
= LD
->getExtensionType() == ISD::SEXTLOAD
;
1555 bool isPre
= (AM
== ISD::PRE_INC
) || (AM
== ISD::PRE_DEC
);
1556 unsigned Opcode
= 0;
1558 if (SelectT2AddrModeImm8Offset(N
, LD
->getOffset(), Offset
)) {
1559 switch (LoadedVT
.getSimpleVT().SimpleTy
) {
1561 Opcode
= isPre
? ARM::t2LDR_PRE
: ARM::t2LDR_POST
;
1565 Opcode
= isPre
? ARM::t2LDRSH_PRE
: ARM::t2LDRSH_POST
;
1567 Opcode
= isPre
? ARM::t2LDRH_PRE
: ARM::t2LDRH_POST
;
1572 Opcode
= isPre
? ARM::t2LDRSB_PRE
: ARM::t2LDRSB_POST
;
1574 Opcode
= isPre
? ARM::t2LDRB_PRE
: ARM::t2LDRB_POST
;
1583 SDValue Chain
= LD
->getChain();
1584 SDValue Base
= LD
->getBasePtr();
1585 SDValue Ops
[]= { Base
, Offset
, getAL(CurDAG
, SDLoc(N
)),
1586 CurDAG
->getRegister(0, MVT::i32
), Chain
};
1587 SDNode
*New
= CurDAG
->getMachineNode(Opcode
, SDLoc(N
), MVT::i32
, MVT::i32
,
1589 transferMemOperands(N
, New
);
1590 ReplaceNode(N
, New
);
1597 bool ARMDAGToDAGISel::tryMVEIndexedLoad(SDNode
*N
) {
1598 LoadSDNode
*LD
= cast
<LoadSDNode
>(N
);
1599 ISD::MemIndexedMode AM
= LD
->getAddressingMode();
1600 if (AM
== ISD::UNINDEXED
)
1602 EVT LoadedVT
= LD
->getMemoryVT();
1603 if (!LoadedVT
.isVector())
1605 bool isSExtLd
= LD
->getExtensionType() == ISD::SEXTLOAD
;
1607 bool isPre
= (AM
== ISD::PRE_INC
) || (AM
== ISD::PRE_DEC
);
1608 unsigned Opcode
= 0;
1609 unsigned Align
= LD
->getAlignment();
1610 bool IsLE
= Subtarget
->isLittle();
1612 if (Align
>= 2 && LoadedVT
== MVT::v4i16
&&
1613 SelectT2AddrModeImm7Offset(N
, LD
->getOffset(), Offset
, 1)) {
1615 Opcode
= isPre
? ARM::MVE_VLDRHS32_pre
: ARM::MVE_VLDRHS32_post
;
1617 Opcode
= isPre
? ARM::MVE_VLDRHU32_pre
: ARM::MVE_VLDRHU32_post
;
1618 } else if (LoadedVT
== MVT::v8i8
&&
1619 SelectT2AddrModeImm7Offset(N
, LD
->getOffset(), Offset
, 0)) {
1621 Opcode
= isPre
? ARM::MVE_VLDRBS16_pre
: ARM::MVE_VLDRBS16_post
;
1623 Opcode
= isPre
? ARM::MVE_VLDRBU16_pre
: ARM::MVE_VLDRBU16_post
;
1624 } else if (LoadedVT
== MVT::v4i8
&&
1625 SelectT2AddrModeImm7Offset(N
, LD
->getOffset(), Offset
, 0)) {
1627 Opcode
= isPre
? ARM::MVE_VLDRBS32_pre
: ARM::MVE_VLDRBS32_post
;
1629 Opcode
= isPre
? ARM::MVE_VLDRBU32_pre
: ARM::MVE_VLDRBU32_post
;
1630 } else if (Align
>= 4 &&
1631 (IsLE
|| LoadedVT
== MVT::v4i32
|| LoadedVT
== MVT::v4f32
) &&
1632 SelectT2AddrModeImm7Offset(N
, LD
->getOffset(), Offset
, 2))
1633 Opcode
= isPre
? ARM::MVE_VLDRWU32_pre
: ARM::MVE_VLDRWU32_post
;
1634 else if (Align
>= 2 &&
1635 (IsLE
|| LoadedVT
== MVT::v8i16
|| LoadedVT
== MVT::v8f16
) &&
1636 SelectT2AddrModeImm7Offset(N
, LD
->getOffset(), Offset
, 1))
1637 Opcode
= isPre
? ARM::MVE_VLDRHU16_pre
: ARM::MVE_VLDRHU16_post
;
1638 else if ((IsLE
|| LoadedVT
== MVT::v16i8
) &&
1639 SelectT2AddrModeImm7Offset(N
, LD
->getOffset(), Offset
, 0))
1640 Opcode
= isPre
? ARM::MVE_VLDRBU8_pre
: ARM::MVE_VLDRBU8_post
;
1644 SDValue Chain
= LD
->getChain();
1645 SDValue Base
= LD
->getBasePtr();
1646 SDValue Ops
[] = {Base
, Offset
,
1647 CurDAG
->getTargetConstant(ARMVCC::None
, SDLoc(N
), MVT::i32
),
1648 CurDAG
->getRegister(0, MVT::i32
), Chain
};
1649 SDNode
*New
= CurDAG
->getMachineNode(Opcode
, SDLoc(N
), LD
->getValueType(0),
1650 MVT::i32
, MVT::Other
, Ops
);
1651 transferMemOperands(N
, New
);
1652 ReplaceUses(SDValue(N
, 0), SDValue(New
, 1));
1653 ReplaceUses(SDValue(N
, 1), SDValue(New
, 0));
1654 ReplaceUses(SDValue(N
, 2), SDValue(New
, 2));
1655 CurDAG
->RemoveDeadNode(N
);
1659 /// Form a GPRPair pseudo register from a pair of GPR regs.
1660 SDNode
*ARMDAGToDAGISel::createGPRPairNode(EVT VT
, SDValue V0
, SDValue V1
) {
1661 SDLoc
dl(V0
.getNode());
1663 CurDAG
->getTargetConstant(ARM::GPRPairRegClassID
, dl
, MVT::i32
);
1664 SDValue SubReg0
= CurDAG
->getTargetConstant(ARM::gsub_0
, dl
, MVT::i32
);
1665 SDValue SubReg1
= CurDAG
->getTargetConstant(ARM::gsub_1
, dl
, MVT::i32
);
1666 const SDValue Ops
[] = { RegClass
, V0
, SubReg0
, V1
, SubReg1
};
1667 return CurDAG
->getMachineNode(TargetOpcode::REG_SEQUENCE
, dl
, VT
, Ops
);
1670 /// Form a D register from a pair of S registers.
1671 SDNode
*ARMDAGToDAGISel::createSRegPairNode(EVT VT
, SDValue V0
, SDValue V1
) {
1672 SDLoc
dl(V0
.getNode());
1674 CurDAG
->getTargetConstant(ARM::DPR_VFP2RegClassID
, dl
, MVT::i32
);
1675 SDValue SubReg0
= CurDAG
->getTargetConstant(ARM::ssub_0
, dl
, MVT::i32
);
1676 SDValue SubReg1
= CurDAG
->getTargetConstant(ARM::ssub_1
, dl
, MVT::i32
);
1677 const SDValue Ops
[] = { RegClass
, V0
, SubReg0
, V1
, SubReg1
};
1678 return CurDAG
->getMachineNode(TargetOpcode::REG_SEQUENCE
, dl
, VT
, Ops
);
1681 /// Form a quad register from a pair of D registers.
1682 SDNode
*ARMDAGToDAGISel::createDRegPairNode(EVT VT
, SDValue V0
, SDValue V1
) {
1683 SDLoc
dl(V0
.getNode());
1684 SDValue RegClass
= CurDAG
->getTargetConstant(ARM::QPRRegClassID
, dl
,
1686 SDValue SubReg0
= CurDAG
->getTargetConstant(ARM::dsub_0
, dl
, MVT::i32
);
1687 SDValue SubReg1
= CurDAG
->getTargetConstant(ARM::dsub_1
, dl
, MVT::i32
);
1688 const SDValue Ops
[] = { RegClass
, V0
, SubReg0
, V1
, SubReg1
};
1689 return CurDAG
->getMachineNode(TargetOpcode::REG_SEQUENCE
, dl
, VT
, Ops
);
1692 /// Form 4 consecutive D registers from a pair of Q registers.
1693 SDNode
*ARMDAGToDAGISel::createQRegPairNode(EVT VT
, SDValue V0
, SDValue V1
) {
1694 SDLoc
dl(V0
.getNode());
1695 SDValue RegClass
= CurDAG
->getTargetConstant(ARM::QQPRRegClassID
, dl
,
1697 SDValue SubReg0
= CurDAG
->getTargetConstant(ARM::qsub_0
, dl
, MVT::i32
);
1698 SDValue SubReg1
= CurDAG
->getTargetConstant(ARM::qsub_1
, dl
, MVT::i32
);
1699 const SDValue Ops
[] = { RegClass
, V0
, SubReg0
, V1
, SubReg1
};
1700 return CurDAG
->getMachineNode(TargetOpcode::REG_SEQUENCE
, dl
, VT
, Ops
);
1703 /// Form 4 consecutive S registers.
1704 SDNode
*ARMDAGToDAGISel::createQuadSRegsNode(EVT VT
, SDValue V0
, SDValue V1
,
1705 SDValue V2
, SDValue V3
) {
1706 SDLoc
dl(V0
.getNode());
1708 CurDAG
->getTargetConstant(ARM::QPR_VFP2RegClassID
, dl
, MVT::i32
);
1709 SDValue SubReg0
= CurDAG
->getTargetConstant(ARM::ssub_0
, dl
, MVT::i32
);
1710 SDValue SubReg1
= CurDAG
->getTargetConstant(ARM::ssub_1
, dl
, MVT::i32
);
1711 SDValue SubReg2
= CurDAG
->getTargetConstant(ARM::ssub_2
, dl
, MVT::i32
);
1712 SDValue SubReg3
= CurDAG
->getTargetConstant(ARM::ssub_3
, dl
, MVT::i32
);
1713 const SDValue Ops
[] = { RegClass
, V0
, SubReg0
, V1
, SubReg1
,
1714 V2
, SubReg2
, V3
, SubReg3
};
1715 return CurDAG
->getMachineNode(TargetOpcode::REG_SEQUENCE
, dl
, VT
, Ops
);
1718 /// Form 4 consecutive D registers.
1719 SDNode
*ARMDAGToDAGISel::createQuadDRegsNode(EVT VT
, SDValue V0
, SDValue V1
,
1720 SDValue V2
, SDValue V3
) {
1721 SDLoc
dl(V0
.getNode());
1722 SDValue RegClass
= CurDAG
->getTargetConstant(ARM::QQPRRegClassID
, dl
,
1724 SDValue SubReg0
= CurDAG
->getTargetConstant(ARM::dsub_0
, dl
, MVT::i32
);
1725 SDValue SubReg1
= CurDAG
->getTargetConstant(ARM::dsub_1
, dl
, MVT::i32
);
1726 SDValue SubReg2
= CurDAG
->getTargetConstant(ARM::dsub_2
, dl
, MVT::i32
);
1727 SDValue SubReg3
= CurDAG
->getTargetConstant(ARM::dsub_3
, dl
, MVT::i32
);
1728 const SDValue Ops
[] = { RegClass
, V0
, SubReg0
, V1
, SubReg1
,
1729 V2
, SubReg2
, V3
, SubReg3
};
1730 return CurDAG
->getMachineNode(TargetOpcode::REG_SEQUENCE
, dl
, VT
, Ops
);
1733 /// Form 4 consecutive Q registers.
1734 SDNode
*ARMDAGToDAGISel::createQuadQRegsNode(EVT VT
, SDValue V0
, SDValue V1
,
1735 SDValue V2
, SDValue V3
) {
1736 SDLoc
dl(V0
.getNode());
1737 SDValue RegClass
= CurDAG
->getTargetConstant(ARM::QQQQPRRegClassID
, dl
,
1739 SDValue SubReg0
= CurDAG
->getTargetConstant(ARM::qsub_0
, dl
, MVT::i32
);
1740 SDValue SubReg1
= CurDAG
->getTargetConstant(ARM::qsub_1
, dl
, MVT::i32
);
1741 SDValue SubReg2
= CurDAG
->getTargetConstant(ARM::qsub_2
, dl
, MVT::i32
);
1742 SDValue SubReg3
= CurDAG
->getTargetConstant(ARM::qsub_3
, dl
, MVT::i32
);
1743 const SDValue Ops
[] = { RegClass
, V0
, SubReg0
, V1
, SubReg1
,
1744 V2
, SubReg2
, V3
, SubReg3
};
1745 return CurDAG
->getMachineNode(TargetOpcode::REG_SEQUENCE
, dl
, VT
, Ops
);
1748 /// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
1749 /// of a NEON VLD or VST instruction. The supported values depend on the
1750 /// number of registers being loaded.
1751 SDValue
ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align
, const SDLoc
&dl
,
1752 unsigned NumVecs
, bool is64BitVector
) {
1753 unsigned NumRegs
= NumVecs
;
1754 if (!is64BitVector
&& NumVecs
< 3)
1757 unsigned Alignment
= cast
<ConstantSDNode
>(Align
)->getZExtValue();
1758 if (Alignment
>= 32 && NumRegs
== 4)
1760 else if (Alignment
>= 16 && (NumRegs
== 2 || NumRegs
== 4))
1762 else if (Alignment
>= 8)
1767 return CurDAG
->getTargetConstant(Alignment
, dl
, MVT::i32
);
1770 static bool isVLDfixed(unsigned Opc
)
1773 default: return false;
1774 case ARM::VLD1d8wb_fixed
: return true;
1775 case ARM::VLD1d16wb_fixed
: return true;
1776 case ARM::VLD1d64Qwb_fixed
: return true;
1777 case ARM::VLD1d32wb_fixed
: return true;
1778 case ARM::VLD1d64wb_fixed
: return true;
1779 case ARM::VLD1d64TPseudoWB_fixed
: return true;
1780 case ARM::VLD1d64QPseudoWB_fixed
: return true;
1781 case ARM::VLD1q8wb_fixed
: return true;
1782 case ARM::VLD1q16wb_fixed
: return true;
1783 case ARM::VLD1q32wb_fixed
: return true;
1784 case ARM::VLD1q64wb_fixed
: return true;
1785 case ARM::VLD1DUPd8wb_fixed
: return true;
1786 case ARM::VLD1DUPd16wb_fixed
: return true;
1787 case ARM::VLD1DUPd32wb_fixed
: return true;
1788 case ARM::VLD1DUPq8wb_fixed
: return true;
1789 case ARM::VLD1DUPq16wb_fixed
: return true;
1790 case ARM::VLD1DUPq32wb_fixed
: return true;
1791 case ARM::VLD2d8wb_fixed
: return true;
1792 case ARM::VLD2d16wb_fixed
: return true;
1793 case ARM::VLD2d32wb_fixed
: return true;
1794 case ARM::VLD2q8PseudoWB_fixed
: return true;
1795 case ARM::VLD2q16PseudoWB_fixed
: return true;
1796 case ARM::VLD2q32PseudoWB_fixed
: return true;
1797 case ARM::VLD2DUPd8wb_fixed
: return true;
1798 case ARM::VLD2DUPd16wb_fixed
: return true;
1799 case ARM::VLD2DUPd32wb_fixed
: return true;
1803 static bool isVSTfixed(unsigned Opc
)
1806 default: return false;
1807 case ARM::VST1d8wb_fixed
: return true;
1808 case ARM::VST1d16wb_fixed
: return true;
1809 case ARM::VST1d32wb_fixed
: return true;
1810 case ARM::VST1d64wb_fixed
: return true;
1811 case ARM::VST1q8wb_fixed
: return true;
1812 case ARM::VST1q16wb_fixed
: return true;
1813 case ARM::VST1q32wb_fixed
: return true;
1814 case ARM::VST1q64wb_fixed
: return true;
1815 case ARM::VST1d64TPseudoWB_fixed
: return true;
1816 case ARM::VST1d64QPseudoWB_fixed
: return true;
1817 case ARM::VST2d8wb_fixed
: return true;
1818 case ARM::VST2d16wb_fixed
: return true;
1819 case ARM::VST2d32wb_fixed
: return true;
1820 case ARM::VST2q8PseudoWB_fixed
: return true;
1821 case ARM::VST2q16PseudoWB_fixed
: return true;
1822 case ARM::VST2q32PseudoWB_fixed
: return true;
1826 // Get the register stride update opcode of a VLD/VST instruction that
1827 // is otherwise equivalent to the given fixed stride updating instruction.
1828 static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc
) {
1829 assert((isVLDfixed(Opc
) || isVSTfixed(Opc
))
1830 && "Incorrect fixed stride updating instruction.");
1833 case ARM::VLD1d8wb_fixed
: return ARM::VLD1d8wb_register
;
1834 case ARM::VLD1d16wb_fixed
: return ARM::VLD1d16wb_register
;
1835 case ARM::VLD1d32wb_fixed
: return ARM::VLD1d32wb_register
;
1836 case ARM::VLD1d64wb_fixed
: return ARM::VLD1d64wb_register
;
1837 case ARM::VLD1q8wb_fixed
: return ARM::VLD1q8wb_register
;
1838 case ARM::VLD1q16wb_fixed
: return ARM::VLD1q16wb_register
;
1839 case ARM::VLD1q32wb_fixed
: return ARM::VLD1q32wb_register
;
1840 case ARM::VLD1q64wb_fixed
: return ARM::VLD1q64wb_register
;
1841 case ARM::VLD1d64Twb_fixed
: return ARM::VLD1d64Twb_register
;
1842 case ARM::VLD1d64Qwb_fixed
: return ARM::VLD1d64Qwb_register
;
1843 case ARM::VLD1d64TPseudoWB_fixed
: return ARM::VLD1d64TPseudoWB_register
;
1844 case ARM::VLD1d64QPseudoWB_fixed
: return ARM::VLD1d64QPseudoWB_register
;
1845 case ARM::VLD1DUPd8wb_fixed
: return ARM::VLD1DUPd8wb_register
;
1846 case ARM::VLD1DUPd16wb_fixed
: return ARM::VLD1DUPd16wb_register
;
1847 case ARM::VLD1DUPd32wb_fixed
: return ARM::VLD1DUPd32wb_register
;
1848 case ARM::VLD1DUPq8wb_fixed
: return ARM::VLD1DUPq8wb_register
;
1849 case ARM::VLD1DUPq16wb_fixed
: return ARM::VLD1DUPq16wb_register
;
1850 case ARM::VLD1DUPq32wb_fixed
: return ARM::VLD1DUPq32wb_register
;
1852 case ARM::VST1d8wb_fixed
: return ARM::VST1d8wb_register
;
1853 case ARM::VST1d16wb_fixed
: return ARM::VST1d16wb_register
;
1854 case ARM::VST1d32wb_fixed
: return ARM::VST1d32wb_register
;
1855 case ARM::VST1d64wb_fixed
: return ARM::VST1d64wb_register
;
1856 case ARM::VST1q8wb_fixed
: return ARM::VST1q8wb_register
;
1857 case ARM::VST1q16wb_fixed
: return ARM::VST1q16wb_register
;
1858 case ARM::VST1q32wb_fixed
: return ARM::VST1q32wb_register
;
1859 case ARM::VST1q64wb_fixed
: return ARM::VST1q64wb_register
;
1860 case ARM::VST1d64TPseudoWB_fixed
: return ARM::VST1d64TPseudoWB_register
;
1861 case ARM::VST1d64QPseudoWB_fixed
: return ARM::VST1d64QPseudoWB_register
;
1863 case ARM::VLD2d8wb_fixed
: return ARM::VLD2d8wb_register
;
1864 case ARM::VLD2d16wb_fixed
: return ARM::VLD2d16wb_register
;
1865 case ARM::VLD2d32wb_fixed
: return ARM::VLD2d32wb_register
;
1866 case ARM::VLD2q8PseudoWB_fixed
: return ARM::VLD2q8PseudoWB_register
;
1867 case ARM::VLD2q16PseudoWB_fixed
: return ARM::VLD2q16PseudoWB_register
;
1868 case ARM::VLD2q32PseudoWB_fixed
: return ARM::VLD2q32PseudoWB_register
;
1870 case ARM::VST2d8wb_fixed
: return ARM::VST2d8wb_register
;
1871 case ARM::VST2d16wb_fixed
: return ARM::VST2d16wb_register
;
1872 case ARM::VST2d32wb_fixed
: return ARM::VST2d32wb_register
;
1873 case ARM::VST2q8PseudoWB_fixed
: return ARM::VST2q8PseudoWB_register
;
1874 case ARM::VST2q16PseudoWB_fixed
: return ARM::VST2q16PseudoWB_register
;
1875 case ARM::VST2q32PseudoWB_fixed
: return ARM::VST2q32PseudoWB_register
;
1877 case ARM::VLD2DUPd8wb_fixed
: return ARM::VLD2DUPd8wb_register
;
1878 case ARM::VLD2DUPd16wb_fixed
: return ARM::VLD2DUPd16wb_register
;
1879 case ARM::VLD2DUPd32wb_fixed
: return ARM::VLD2DUPd32wb_register
;
1881 return Opc
; // If not one we handle, return it unchanged.
1884 /// Returns true if the given increment is a Constant known to be equal to the
1885 /// access size performed by a NEON load/store. This means the "[rN]!" form can
1887 static bool isPerfectIncrement(SDValue Inc
, EVT VecTy
, unsigned NumVecs
) {
1888 auto C
= dyn_cast
<ConstantSDNode
>(Inc
);
1889 return C
&& C
->getZExtValue() == VecTy
.getSizeInBits() / 8 * NumVecs
;
1892 void ARMDAGToDAGISel::SelectVLD(SDNode
*N
, bool isUpdating
, unsigned NumVecs
,
1893 const uint16_t *DOpcodes
,
1894 const uint16_t *QOpcodes0
,
1895 const uint16_t *QOpcodes1
) {
1896 assert(NumVecs
>= 1 && NumVecs
<= 4 && "VLD NumVecs out-of-range");
1899 SDValue MemAddr
, Align
;
1900 bool IsIntrinsic
= !isUpdating
; // By coincidence, all supported updating
1901 // nodes are not intrinsics.
1902 unsigned AddrOpIdx
= IsIntrinsic
? 2 : 1;
1903 if (!SelectAddrMode6(N
, N
->getOperand(AddrOpIdx
), MemAddr
, Align
))
1906 SDValue Chain
= N
->getOperand(0);
1907 EVT VT
= N
->getValueType(0);
1908 bool is64BitVector
= VT
.is64BitVector();
1909 Align
= GetVLDSTAlign(Align
, dl
, NumVecs
, is64BitVector
);
1911 unsigned OpcodeIndex
;
1912 switch (VT
.getSimpleVT().SimpleTy
) {
1913 default: llvm_unreachable("unhandled vld type");
1914 // Double-register operations:
1915 case MVT::v8i8
: OpcodeIndex
= 0; break;
1917 case MVT::v4i16
: OpcodeIndex
= 1; break;
1919 case MVT::v2i32
: OpcodeIndex
= 2; break;
1920 case MVT::v1i64
: OpcodeIndex
= 3; break;
1921 // Quad-register operations:
1922 case MVT::v16i8
: OpcodeIndex
= 0; break;
1924 case MVT::v8i16
: OpcodeIndex
= 1; break;
1926 case MVT::v4i32
: OpcodeIndex
= 2; break;
1928 case MVT::v2i64
: OpcodeIndex
= 3; break;
1935 unsigned ResTyElts
= (NumVecs
== 3) ? 4 : NumVecs
;
1938 ResTy
= EVT::getVectorVT(*CurDAG
->getContext(), MVT::i64
, ResTyElts
);
1940 std::vector
<EVT
> ResTys
;
1941 ResTys
.push_back(ResTy
);
1943 ResTys
.push_back(MVT::i32
);
1944 ResTys
.push_back(MVT::Other
);
1946 SDValue Pred
= getAL(CurDAG
, dl
);
1947 SDValue Reg0
= CurDAG
->getRegister(0, MVT::i32
);
1949 SmallVector
<SDValue
, 7> Ops
;
1951 // Double registers and VLD1/VLD2 quad registers are directly supported.
1952 if (is64BitVector
|| NumVecs
<= 2) {
1953 unsigned Opc
= (is64BitVector
? DOpcodes
[OpcodeIndex
] :
1954 QOpcodes0
[OpcodeIndex
]);
1955 Ops
.push_back(MemAddr
);
1956 Ops
.push_back(Align
);
1958 SDValue Inc
= N
->getOperand(AddrOpIdx
+ 1);
1959 bool IsImmUpdate
= isPerfectIncrement(Inc
, VT
, NumVecs
);
1961 // We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
1962 // check for the opcode rather than the number of vector elements.
1963 if (isVLDfixed(Opc
))
1964 Opc
= getVLDSTRegisterUpdateOpcode(Opc
);
1966 // VLD1/VLD2 fixed increment does not need Reg0 so only include it in
1967 // the operands if not such an opcode.
1968 } else if (!isVLDfixed(Opc
))
1969 Ops
.push_back(Reg0
);
1971 Ops
.push_back(Pred
);
1972 Ops
.push_back(Reg0
);
1973 Ops
.push_back(Chain
);
1974 VLd
= CurDAG
->getMachineNode(Opc
, dl
, ResTys
, Ops
);
1977 // Otherwise, quad registers are loaded with two separate instructions,
1978 // where one loads the even registers and the other loads the odd registers.
1979 EVT AddrTy
= MemAddr
.getValueType();
1981 // Load the even subregs. This is always an updating load, so that it
1982 // provides the address to the second load for the odd subregs.
1984 SDValue(CurDAG
->getMachineNode(TargetOpcode::IMPLICIT_DEF
, dl
, ResTy
), 0);
1985 const SDValue OpsA
[] = { MemAddr
, Align
, Reg0
, ImplDef
, Pred
, Reg0
, Chain
};
1986 SDNode
*VLdA
= CurDAG
->getMachineNode(QOpcodes0
[OpcodeIndex
], dl
,
1987 ResTy
, AddrTy
, MVT::Other
, OpsA
);
1988 Chain
= SDValue(VLdA
, 2);
1990 // Load the odd subregs.
1991 Ops
.push_back(SDValue(VLdA
, 1));
1992 Ops
.push_back(Align
);
1994 SDValue Inc
= N
->getOperand(AddrOpIdx
+ 1);
1995 assert(isa
<ConstantSDNode
>(Inc
.getNode()) &&
1996 "only constant post-increment update allowed for VLD3/4");
1998 Ops
.push_back(Reg0
);
2000 Ops
.push_back(SDValue(VLdA
, 0));
2001 Ops
.push_back(Pred
);
2002 Ops
.push_back(Reg0
);
2003 Ops
.push_back(Chain
);
2004 VLd
= CurDAG
->getMachineNode(QOpcodes1
[OpcodeIndex
], dl
, ResTys
, Ops
);
2007 // Transfer memoperands.
2008 MachineMemOperand
*MemOp
= cast
<MemIntrinsicSDNode
>(N
)->getMemOperand();
2009 CurDAG
->setNodeMemRefs(cast
<MachineSDNode
>(VLd
), {MemOp
});
2012 ReplaceNode(N
, VLd
);
2016 // Extract out the subregisters.
2017 SDValue SuperReg
= SDValue(VLd
, 0);
2018 static_assert(ARM::dsub_7
== ARM::dsub_0
+ 7 &&
2019 ARM::qsub_3
== ARM::qsub_0
+ 3,
2020 "Unexpected subreg numbering");
2021 unsigned Sub0
= (is64BitVector
? ARM::dsub_0
: ARM::qsub_0
);
2022 for (unsigned Vec
= 0; Vec
< NumVecs
; ++Vec
)
2023 ReplaceUses(SDValue(N
, Vec
),
2024 CurDAG
->getTargetExtractSubreg(Sub0
+ Vec
, dl
, VT
, SuperReg
));
2025 ReplaceUses(SDValue(N
, NumVecs
), SDValue(VLd
, 1));
2027 ReplaceUses(SDValue(N
, NumVecs
+ 1), SDValue(VLd
, 2));
2028 CurDAG
->RemoveDeadNode(N
);
2031 void ARMDAGToDAGISel::SelectVST(SDNode
*N
, bool isUpdating
, unsigned NumVecs
,
2032 const uint16_t *DOpcodes
,
2033 const uint16_t *QOpcodes0
,
2034 const uint16_t *QOpcodes1
) {
2035 assert(NumVecs
>= 1 && NumVecs
<= 4 && "VST NumVecs out-of-range");
2038 SDValue MemAddr
, Align
;
2039 bool IsIntrinsic
= !isUpdating
; // By coincidence, all supported updating
2040 // nodes are not intrinsics.
2041 unsigned AddrOpIdx
= IsIntrinsic
? 2 : 1;
2042 unsigned Vec0Idx
= 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2043 if (!SelectAddrMode6(N
, N
->getOperand(AddrOpIdx
), MemAddr
, Align
))
2046 MachineMemOperand
*MemOp
= cast
<MemIntrinsicSDNode
>(N
)->getMemOperand();
2048 SDValue Chain
= N
->getOperand(0);
2049 EVT VT
= N
->getOperand(Vec0Idx
).getValueType();
2050 bool is64BitVector
= VT
.is64BitVector();
2051 Align
= GetVLDSTAlign(Align
, dl
, NumVecs
, is64BitVector
);
2053 unsigned OpcodeIndex
;
2054 switch (VT
.getSimpleVT().SimpleTy
) {
2055 default: llvm_unreachable("unhandled vst type");
2056 // Double-register operations:
2057 case MVT::v8i8
: OpcodeIndex
= 0; break;
2059 case MVT::v4i16
: OpcodeIndex
= 1; break;
2061 case MVT::v2i32
: OpcodeIndex
= 2; break;
2062 case MVT::v1i64
: OpcodeIndex
= 3; break;
2063 // Quad-register operations:
2064 case MVT::v16i8
: OpcodeIndex
= 0; break;
2066 case MVT::v8i16
: OpcodeIndex
= 1; break;
2068 case MVT::v4i32
: OpcodeIndex
= 2; break;
2070 case MVT::v2i64
: OpcodeIndex
= 3; break;
2073 std::vector
<EVT
> ResTys
;
2075 ResTys
.push_back(MVT::i32
);
2076 ResTys
.push_back(MVT::Other
);
2078 SDValue Pred
= getAL(CurDAG
, dl
);
2079 SDValue Reg0
= CurDAG
->getRegister(0, MVT::i32
);
2080 SmallVector
<SDValue
, 7> Ops
;
2082 // Double registers and VST1/VST2 quad registers are directly supported.
2083 if (is64BitVector
|| NumVecs
<= 2) {
2086 SrcReg
= N
->getOperand(Vec0Idx
);
2087 } else if (is64BitVector
) {
2088 // Form a REG_SEQUENCE to force register allocation.
2089 SDValue V0
= N
->getOperand(Vec0Idx
+ 0);
2090 SDValue V1
= N
->getOperand(Vec0Idx
+ 1);
2092 SrcReg
= SDValue(createDRegPairNode(MVT::v2i64
, V0
, V1
), 0);
2094 SDValue V2
= N
->getOperand(Vec0Idx
+ 2);
2095 // If it's a vst3, form a quad D-register and leave the last part as
2097 SDValue V3
= (NumVecs
== 3)
2098 ? SDValue(CurDAG
->getMachineNode(TargetOpcode::IMPLICIT_DEF
,dl
,VT
), 0)
2099 : N
->getOperand(Vec0Idx
+ 3);
2100 SrcReg
= SDValue(createQuadDRegsNode(MVT::v4i64
, V0
, V1
, V2
, V3
), 0);
2103 // Form a QQ register.
2104 SDValue Q0
= N
->getOperand(Vec0Idx
);
2105 SDValue Q1
= N
->getOperand(Vec0Idx
+ 1);
2106 SrcReg
= SDValue(createQRegPairNode(MVT::v4i64
, Q0
, Q1
), 0);
2109 unsigned Opc
= (is64BitVector
? DOpcodes
[OpcodeIndex
] :
2110 QOpcodes0
[OpcodeIndex
]);
2111 Ops
.push_back(MemAddr
);
2112 Ops
.push_back(Align
);
2114 SDValue Inc
= N
->getOperand(AddrOpIdx
+ 1);
2115 bool IsImmUpdate
= isPerfectIncrement(Inc
, VT
, NumVecs
);
2117 // We use a VST1 for v1i64 even if the pseudo says VST2/3/4, so
2118 // check for the opcode rather than the number of vector elements.
2119 if (isVSTfixed(Opc
))
2120 Opc
= getVLDSTRegisterUpdateOpcode(Opc
);
2123 // VST1/VST2 fixed increment does not need Reg0 so only include it in
2124 // the operands if not such an opcode.
2125 else if (!isVSTfixed(Opc
))
2126 Ops
.push_back(Reg0
);
2128 Ops
.push_back(SrcReg
);
2129 Ops
.push_back(Pred
);
2130 Ops
.push_back(Reg0
);
2131 Ops
.push_back(Chain
);
2132 SDNode
*VSt
= CurDAG
->getMachineNode(Opc
, dl
, ResTys
, Ops
);
2134 // Transfer memoperands.
2135 CurDAG
->setNodeMemRefs(cast
<MachineSDNode
>(VSt
), {MemOp
});
2137 ReplaceNode(N
, VSt
);
2141 // Otherwise, quad registers are stored with two separate instructions,
2142 // where one stores the even registers and the other stores the odd registers.
2144 // Form the QQQQ REG_SEQUENCE.
2145 SDValue V0
= N
->getOperand(Vec0Idx
+ 0);
2146 SDValue V1
= N
->getOperand(Vec0Idx
+ 1);
2147 SDValue V2
= N
->getOperand(Vec0Idx
+ 2);
2148 SDValue V3
= (NumVecs
== 3)
2149 ? SDValue(CurDAG
->getMachineNode(TargetOpcode::IMPLICIT_DEF
, dl
, VT
), 0)
2150 : N
->getOperand(Vec0Idx
+ 3);
2151 SDValue RegSeq
= SDValue(createQuadQRegsNode(MVT::v8i64
, V0
, V1
, V2
, V3
), 0);
2153 // Store the even D registers. This is always an updating store, so that it
2154 // provides the address to the second store for the odd subregs.
2155 const SDValue OpsA
[] = { MemAddr
, Align
, Reg0
, RegSeq
, Pred
, Reg0
, Chain
};
2156 SDNode
*VStA
= CurDAG
->getMachineNode(QOpcodes0
[OpcodeIndex
], dl
,
2157 MemAddr
.getValueType(),
2159 CurDAG
->setNodeMemRefs(cast
<MachineSDNode
>(VStA
), {MemOp
});
2160 Chain
= SDValue(VStA
, 1);
2162 // Store the odd D registers.
2163 Ops
.push_back(SDValue(VStA
, 0));
2164 Ops
.push_back(Align
);
2166 SDValue Inc
= N
->getOperand(AddrOpIdx
+ 1);
2167 assert(isa
<ConstantSDNode
>(Inc
.getNode()) &&
2168 "only constant post-increment update allowed for VST3/4");
2170 Ops
.push_back(Reg0
);
2172 Ops
.push_back(RegSeq
);
2173 Ops
.push_back(Pred
);
2174 Ops
.push_back(Reg0
);
2175 Ops
.push_back(Chain
);
2176 SDNode
*VStB
= CurDAG
->getMachineNode(QOpcodes1
[OpcodeIndex
], dl
, ResTys
,
2178 CurDAG
->setNodeMemRefs(cast
<MachineSDNode
>(VStB
), {MemOp
});
2179 ReplaceNode(N
, VStB
);
2182 void ARMDAGToDAGISel::SelectVLDSTLane(SDNode
*N
, bool IsLoad
, bool isUpdating
,
2184 const uint16_t *DOpcodes
,
2185 const uint16_t *QOpcodes
) {
2186 assert(NumVecs
>=2 && NumVecs
<= 4 && "VLDSTLane NumVecs out-of-range");
2189 SDValue MemAddr
, Align
;
2190 bool IsIntrinsic
= !isUpdating
; // By coincidence, all supported updating
2191 // nodes are not intrinsics.
2192 unsigned AddrOpIdx
= IsIntrinsic
? 2 : 1;
2193 unsigned Vec0Idx
= 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2194 if (!SelectAddrMode6(N
, N
->getOperand(AddrOpIdx
), MemAddr
, Align
))
2197 MachineMemOperand
*MemOp
= cast
<MemIntrinsicSDNode
>(N
)->getMemOperand();
2199 SDValue Chain
= N
->getOperand(0);
2201 cast
<ConstantSDNode
>(N
->getOperand(Vec0Idx
+ NumVecs
))->getZExtValue();
2202 EVT VT
= N
->getOperand(Vec0Idx
).getValueType();
2203 bool is64BitVector
= VT
.is64BitVector();
2205 unsigned Alignment
= 0;
2207 Alignment
= cast
<ConstantSDNode
>(Align
)->getZExtValue();
2208 unsigned NumBytes
= NumVecs
* VT
.getScalarSizeInBits() / 8;
2209 if (Alignment
> NumBytes
)
2210 Alignment
= NumBytes
;
2211 if (Alignment
< 8 && Alignment
< NumBytes
)
2213 // Alignment must be a power of two; make sure of that.
2214 Alignment
= (Alignment
& -Alignment
);
2218 Align
= CurDAG
->getTargetConstant(Alignment
, dl
, MVT::i32
);
2220 unsigned OpcodeIndex
;
2221 switch (VT
.getSimpleVT().SimpleTy
) {
2222 default: llvm_unreachable("unhandled vld/vst lane type");
2223 // Double-register operations:
2224 case MVT::v8i8
: OpcodeIndex
= 0; break;
2226 case MVT::v4i16
: OpcodeIndex
= 1; break;
2228 case MVT::v2i32
: OpcodeIndex
= 2; break;
2229 // Quad-register operations:
2231 case MVT::v8i16
: OpcodeIndex
= 0; break;
2233 case MVT::v4i32
: OpcodeIndex
= 1; break;
2236 std::vector
<EVT
> ResTys
;
2238 unsigned ResTyElts
= (NumVecs
== 3) ? 4 : NumVecs
;
2241 ResTys
.push_back(EVT::getVectorVT(*CurDAG
->getContext(),
2242 MVT::i64
, ResTyElts
));
2245 ResTys
.push_back(MVT::i32
);
2246 ResTys
.push_back(MVT::Other
);
2248 SDValue Pred
= getAL(CurDAG
, dl
);
2249 SDValue Reg0
= CurDAG
->getRegister(0, MVT::i32
);
2251 SmallVector
<SDValue
, 8> Ops
;
2252 Ops
.push_back(MemAddr
);
2253 Ops
.push_back(Align
);
2255 SDValue Inc
= N
->getOperand(AddrOpIdx
+ 1);
2257 isPerfectIncrement(Inc
, VT
.getVectorElementType(), NumVecs
);
2258 Ops
.push_back(IsImmUpdate
? Reg0
: Inc
);
2262 SDValue V0
= N
->getOperand(Vec0Idx
+ 0);
2263 SDValue V1
= N
->getOperand(Vec0Idx
+ 1);
2266 SuperReg
= SDValue(createDRegPairNode(MVT::v2i64
, V0
, V1
), 0);
2268 SuperReg
= SDValue(createQRegPairNode(MVT::v4i64
, V0
, V1
), 0);
2270 SDValue V2
= N
->getOperand(Vec0Idx
+ 2);
2271 SDValue V3
= (NumVecs
== 3)
2272 ? SDValue(CurDAG
->getMachineNode(TargetOpcode::IMPLICIT_DEF
, dl
, VT
), 0)
2273 : N
->getOperand(Vec0Idx
+ 3);
2275 SuperReg
= SDValue(createQuadDRegsNode(MVT::v4i64
, V0
, V1
, V2
, V3
), 0);
2277 SuperReg
= SDValue(createQuadQRegsNode(MVT::v8i64
, V0
, V1
, V2
, V3
), 0);
2279 Ops
.push_back(SuperReg
);
2280 Ops
.push_back(getI32Imm(Lane
, dl
));
2281 Ops
.push_back(Pred
);
2282 Ops
.push_back(Reg0
);
2283 Ops
.push_back(Chain
);
2285 unsigned Opc
= (is64BitVector
? DOpcodes
[OpcodeIndex
] :
2286 QOpcodes
[OpcodeIndex
]);
2287 SDNode
*VLdLn
= CurDAG
->getMachineNode(Opc
, dl
, ResTys
, Ops
);
2288 CurDAG
->setNodeMemRefs(cast
<MachineSDNode
>(VLdLn
), {MemOp
});
2290 ReplaceNode(N
, VLdLn
);
2294 // Extract the subregisters.
2295 SuperReg
= SDValue(VLdLn
, 0);
2296 static_assert(ARM::dsub_7
== ARM::dsub_0
+ 7 &&
2297 ARM::qsub_3
== ARM::qsub_0
+ 3,
2298 "Unexpected subreg numbering");
2299 unsigned Sub0
= is64BitVector
? ARM::dsub_0
: ARM::qsub_0
;
2300 for (unsigned Vec
= 0; Vec
< NumVecs
; ++Vec
)
2301 ReplaceUses(SDValue(N
, Vec
),
2302 CurDAG
->getTargetExtractSubreg(Sub0
+ Vec
, dl
, VT
, SuperReg
));
2303 ReplaceUses(SDValue(N
, NumVecs
), SDValue(VLdLn
, 1));
2305 ReplaceUses(SDValue(N
, NumVecs
+ 1), SDValue(VLdLn
, 2));
2306 CurDAG
->RemoveDeadNode(N
);
2309 void ARMDAGToDAGISel::SelectVLDDup(SDNode
*N
, bool IsIntrinsic
,
2310 bool isUpdating
, unsigned NumVecs
,
2311 const uint16_t *DOpcodes
,
2312 const uint16_t *QOpcodes0
,
2313 const uint16_t *QOpcodes1
) {
2314 assert(NumVecs
>= 1 && NumVecs
<= 4 && "VLDDup NumVecs out-of-range");
2317 SDValue MemAddr
, Align
;
2318 unsigned AddrOpIdx
= IsIntrinsic
? 2 : 1;
2319 if (!SelectAddrMode6(N
, N
->getOperand(AddrOpIdx
), MemAddr
, Align
))
2322 SDValue Chain
= N
->getOperand(0);
2323 EVT VT
= N
->getValueType(0);
2324 bool is64BitVector
= VT
.is64BitVector();
2326 unsigned Alignment
= 0;
2328 Alignment
= cast
<ConstantSDNode
>(Align
)->getZExtValue();
2329 unsigned NumBytes
= NumVecs
* VT
.getScalarSizeInBits() / 8;
2330 if (Alignment
> NumBytes
)
2331 Alignment
= NumBytes
;
2332 if (Alignment
< 8 && Alignment
< NumBytes
)
2334 // Alignment must be a power of two; make sure of that.
2335 Alignment
= (Alignment
& -Alignment
);
2339 Align
= CurDAG
->getTargetConstant(Alignment
, dl
, MVT::i32
);
2341 unsigned OpcodeIndex
;
2342 switch (VT
.getSimpleVT().SimpleTy
) {
2343 default: llvm_unreachable("unhandled vld-dup type");
2345 case MVT::v16i8
: OpcodeIndex
= 0; break;
2350 OpcodeIndex
= 1; break;
2354 case MVT::v4i32
: OpcodeIndex
= 2; break;
2356 case MVT::v1i64
: OpcodeIndex
= 3; break;
2359 unsigned ResTyElts
= (NumVecs
== 3) ? 4 : NumVecs
;
2362 EVT ResTy
= EVT::getVectorVT(*CurDAG
->getContext(), MVT::i64
, ResTyElts
);
2364 std::vector
<EVT
> ResTys
;
2365 ResTys
.push_back(ResTy
);
2367 ResTys
.push_back(MVT::i32
);
2368 ResTys
.push_back(MVT::Other
);
2370 SDValue Pred
= getAL(CurDAG
, dl
);
2371 SDValue Reg0
= CurDAG
->getRegister(0, MVT::i32
);
2374 if (is64BitVector
|| NumVecs
== 1) {
2375 SmallVector
<SDValue
, 6> Ops
;
2376 Ops
.push_back(MemAddr
);
2377 Ops
.push_back(Align
);
2378 unsigned Opc
= is64BitVector
? DOpcodes
[OpcodeIndex
] :
2379 QOpcodes0
[OpcodeIndex
];
2381 // fixed-stride update instructions don't have an explicit writeback
2382 // operand. It's implicit in the opcode itself.
2383 SDValue Inc
= N
->getOperand(2);
2385 isPerfectIncrement(Inc
, VT
.getVectorElementType(), NumVecs
);
2386 if (NumVecs
<= 2 && !IsImmUpdate
)
2387 Opc
= getVLDSTRegisterUpdateOpcode(Opc
);
2390 // FIXME: VLD3 and VLD4 haven't been updated to that form yet.
2391 else if (NumVecs
> 2)
2392 Ops
.push_back(Reg0
);
2394 Ops
.push_back(Pred
);
2395 Ops
.push_back(Reg0
);
2396 Ops
.push_back(Chain
);
2397 VLdDup
= CurDAG
->getMachineNode(Opc
, dl
, ResTys
, Ops
);
2398 } else if (NumVecs
== 2) {
2399 const SDValue OpsA
[] = { MemAddr
, Align
, Pred
, Reg0
, Chain
};
2400 SDNode
*VLdA
= CurDAG
->getMachineNode(QOpcodes0
[OpcodeIndex
],
2403 Chain
= SDValue(VLdA
, 1);
2404 const SDValue OpsB
[] = { MemAddr
, Align
, Pred
, Reg0
, Chain
};
2405 VLdDup
= CurDAG
->getMachineNode(QOpcodes1
[OpcodeIndex
], dl
, ResTys
, OpsB
);
2408 SDValue(CurDAG
->getMachineNode(TargetOpcode::IMPLICIT_DEF
, dl
, ResTy
), 0);
2409 const SDValue OpsA
[] = { MemAddr
, Align
, ImplDef
, Pred
, Reg0
, Chain
};
2410 SDNode
*VLdA
= CurDAG
->getMachineNode(QOpcodes0
[OpcodeIndex
],
2413 SDValue SuperReg
= SDValue(VLdA
, 0);
2414 Chain
= SDValue(VLdA
, 1);
2415 const SDValue OpsB
[] = { MemAddr
, Align
, SuperReg
, Pred
, Reg0
, Chain
};
2416 VLdDup
= CurDAG
->getMachineNode(QOpcodes1
[OpcodeIndex
], dl
, ResTys
, OpsB
);
2419 // Transfer memoperands.
2420 MachineMemOperand
*MemOp
= cast
<MemIntrinsicSDNode
>(N
)->getMemOperand();
2421 CurDAG
->setNodeMemRefs(cast
<MachineSDNode
>(VLdDup
), {MemOp
});
2423 // Extract the subregisters.
2425 ReplaceUses(SDValue(N
, 0), SDValue(VLdDup
, 0));
2427 SDValue SuperReg
= SDValue(VLdDup
, 0);
2428 static_assert(ARM::dsub_7
== ARM::dsub_0
+ 7, "Unexpected subreg numbering");
2429 unsigned SubIdx
= is64BitVector
? ARM::dsub_0
: ARM::qsub_0
;
2430 for (unsigned Vec
= 0; Vec
!= NumVecs
; ++Vec
) {
2431 ReplaceUses(SDValue(N
, Vec
),
2432 CurDAG
->getTargetExtractSubreg(SubIdx
+Vec
, dl
, VT
, SuperReg
));
2435 ReplaceUses(SDValue(N
, NumVecs
), SDValue(VLdDup
, 1));
2437 ReplaceUses(SDValue(N
, NumVecs
+ 1), SDValue(VLdDup
, 2));
2438 CurDAG
->RemoveDeadNode(N
);
2441 bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode
*N
, bool isSigned
) {
2442 if (!Subtarget
->hasV6T2Ops())
2445 unsigned Opc
= isSigned
2446 ? (Subtarget
->isThumb() ? ARM::t2SBFX
: ARM::SBFX
)
2447 : (Subtarget
->isThumb() ? ARM::t2UBFX
: ARM::UBFX
);
2450 // For unsigned extracts, check for a shift right and mask
2451 unsigned And_imm
= 0;
2452 if (N
->getOpcode() == ISD::AND
) {
2453 if (isOpcWithIntImmediate(N
, ISD::AND
, And_imm
)) {
2455 // The immediate is a mask of the low bits iff imm & (imm+1) == 0
2456 if (And_imm
& (And_imm
+ 1))
2459 unsigned Srl_imm
= 0;
2460 if (isOpcWithIntImmediate(N
->getOperand(0).getNode(), ISD::SRL
,
2462 assert(Srl_imm
> 0 && Srl_imm
< 32 && "bad amount in shift node!");
2464 // Mask off the unnecessary bits of the AND immediate; normally
2465 // DAGCombine will do this, but that might not happen if
2466 // targetShrinkDemandedConstant chooses a different immediate.
2467 And_imm
&= -1U >> Srl_imm
;
2469 // Note: The width operand is encoded as width-1.
2470 unsigned Width
= countTrailingOnes(And_imm
) - 1;
2471 unsigned LSB
= Srl_imm
;
2473 SDValue Reg0
= CurDAG
->getRegister(0, MVT::i32
);
2475 if ((LSB
+ Width
+ 1) == N
->getValueType(0).getSizeInBits()) {
2476 // It's cheaper to use a right shift to extract the top bits.
2477 if (Subtarget
->isThumb()) {
2478 Opc
= isSigned
? ARM::t2ASRri
: ARM::t2LSRri
;
2479 SDValue Ops
[] = { N
->getOperand(0).getOperand(0),
2480 CurDAG
->getTargetConstant(LSB
, dl
, MVT::i32
),
2481 getAL(CurDAG
, dl
), Reg0
, Reg0
};
2482 CurDAG
->SelectNodeTo(N
, Opc
, MVT::i32
, Ops
);
2486 // ARM models shift instructions as MOVsi with shifter operand.
2487 ARM_AM::ShiftOpc ShOpcVal
= ARM_AM::getShiftOpcForNode(ISD::SRL
);
2489 CurDAG
->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal
, LSB
), dl
,
2491 SDValue Ops
[] = { N
->getOperand(0).getOperand(0), ShOpc
,
2492 getAL(CurDAG
, dl
), Reg0
, Reg0
};
2493 CurDAG
->SelectNodeTo(N
, ARM::MOVsi
, MVT::i32
, Ops
);
2497 assert(LSB
+ Width
+ 1 <= 32 && "Shouldn't create an invalid ubfx");
2498 SDValue Ops
[] = { N
->getOperand(0).getOperand(0),
2499 CurDAG
->getTargetConstant(LSB
, dl
, MVT::i32
),
2500 CurDAG
->getTargetConstant(Width
, dl
, MVT::i32
),
2501 getAL(CurDAG
, dl
), Reg0
};
2502 CurDAG
->SelectNodeTo(N
, Opc
, MVT::i32
, Ops
);
2509 // Otherwise, we're looking for a shift of a shift
2510 unsigned Shl_imm
= 0;
2511 if (isOpcWithIntImmediate(N
->getOperand(0).getNode(), ISD::SHL
, Shl_imm
)) {
2512 assert(Shl_imm
> 0 && Shl_imm
< 32 && "bad amount in shift node!");
2513 unsigned Srl_imm
= 0;
2514 if (isInt32Immediate(N
->getOperand(1), Srl_imm
)) {
2515 assert(Srl_imm
> 0 && Srl_imm
< 32 && "bad amount in shift node!");
2516 // Note: The width operand is encoded as width-1.
2517 unsigned Width
= 32 - Srl_imm
- 1;
2518 int LSB
= Srl_imm
- Shl_imm
;
2521 SDValue Reg0
= CurDAG
->getRegister(0, MVT::i32
);
2522 assert(LSB
+ Width
+ 1 <= 32 && "Shouldn't create an invalid ubfx");
2523 SDValue Ops
[] = { N
->getOperand(0).getOperand(0),
2524 CurDAG
->getTargetConstant(LSB
, dl
, MVT::i32
),
2525 CurDAG
->getTargetConstant(Width
, dl
, MVT::i32
),
2526 getAL(CurDAG
, dl
), Reg0
};
2527 CurDAG
->SelectNodeTo(N
, Opc
, MVT::i32
, Ops
);
2532 // Or we are looking for a shift of an and, with a mask operand
2533 if (isOpcWithIntImmediate(N
->getOperand(0).getNode(), ISD::AND
, And_imm
) &&
2534 isShiftedMask_32(And_imm
)) {
2535 unsigned Srl_imm
= 0;
2536 unsigned LSB
= countTrailingZeros(And_imm
);
2537 // Shift must be the same as the ands lsb
2538 if (isInt32Immediate(N
->getOperand(1), Srl_imm
) && Srl_imm
== LSB
) {
2539 assert(Srl_imm
> 0 && Srl_imm
< 32 && "bad amount in shift node!");
2540 unsigned MSB
= 31 - countLeadingZeros(And_imm
);
2541 // Note: The width operand is encoded as width-1.
2542 unsigned Width
= MSB
- LSB
;
2543 SDValue Reg0
= CurDAG
->getRegister(0, MVT::i32
);
2544 assert(Srl_imm
+ Width
+ 1 <= 32 && "Shouldn't create an invalid ubfx");
2545 SDValue Ops
[] = { N
->getOperand(0).getOperand(0),
2546 CurDAG
->getTargetConstant(Srl_imm
, dl
, MVT::i32
),
2547 CurDAG
->getTargetConstant(Width
, dl
, MVT::i32
),
2548 getAL(CurDAG
, dl
), Reg0
};
2549 CurDAG
->SelectNodeTo(N
, Opc
, MVT::i32
, Ops
);
2554 if (N
->getOpcode() == ISD::SIGN_EXTEND_INREG
) {
2555 unsigned Width
= cast
<VTSDNode
>(N
->getOperand(1))->getVT().getSizeInBits();
2557 if (!isOpcWithIntImmediate(N
->getOperand(0).getNode(), ISD::SRL
, LSB
) &&
2558 !isOpcWithIntImmediate(N
->getOperand(0).getNode(), ISD::SRA
, LSB
))
2561 if (LSB
+ Width
> 32)
2564 SDValue Reg0
= CurDAG
->getRegister(0, MVT::i32
);
2565 assert(LSB
+ Width
<= 32 && "Shouldn't create an invalid ubfx");
2566 SDValue Ops
[] = { N
->getOperand(0).getOperand(0),
2567 CurDAG
->getTargetConstant(LSB
, dl
, MVT::i32
),
2568 CurDAG
->getTargetConstant(Width
- 1, dl
, MVT::i32
),
2569 getAL(CurDAG
, dl
), Reg0
};
2570 CurDAG
->SelectNodeTo(N
, Opc
, MVT::i32
, Ops
);
2577 /// Target-specific DAG combining for ISD::XOR.
2578 /// Target-independent combining lowers SELECT_CC nodes of the form
2579 /// select_cc setg[ge] X, 0, X, -X
2580 /// select_cc setgt X, -1, X, -X
2581 /// select_cc setl[te] X, 0, -X, X
2582 /// select_cc setlt X, 1, -X, X
2583 /// which represent Integer ABS into:
2584 /// Y = sra (X, size(X)-1); xor (add (X, Y), Y)
2585 /// ARM instruction selection detects the latter and matches it to
2586 /// ARM::ABS or ARM::t2ABS machine node.
2587 bool ARMDAGToDAGISel::tryABSOp(SDNode
*N
){
2588 SDValue XORSrc0
= N
->getOperand(0);
2589 SDValue XORSrc1
= N
->getOperand(1);
2590 EVT VT
= N
->getValueType(0);
2592 if (Subtarget
->isThumb1Only())
2595 if (XORSrc0
.getOpcode() != ISD::ADD
|| XORSrc1
.getOpcode() != ISD::SRA
)
2598 SDValue ADDSrc0
= XORSrc0
.getOperand(0);
2599 SDValue ADDSrc1
= XORSrc0
.getOperand(1);
2600 SDValue SRASrc0
= XORSrc1
.getOperand(0);
2601 SDValue SRASrc1
= XORSrc1
.getOperand(1);
2602 ConstantSDNode
*SRAConstant
= dyn_cast
<ConstantSDNode
>(SRASrc1
);
2603 EVT XType
= SRASrc0
.getValueType();
2604 unsigned Size
= XType
.getSizeInBits() - 1;
2606 if (ADDSrc1
== XORSrc1
&& ADDSrc0
== SRASrc0
&&
2607 XType
.isInteger() && SRAConstant
!= nullptr &&
2608 Size
== SRAConstant
->getZExtValue()) {
2609 unsigned Opcode
= Subtarget
->isThumb2() ? ARM::t2ABS
: ARM::ABS
;
2610 CurDAG
->SelectNodeTo(N
, Opcode
, VT
, ADDSrc0
);
2617 /// We've got special pseudo-instructions for these
2618 void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode
*N
) {
2620 EVT MemTy
= cast
<MemSDNode
>(N
)->getMemoryVT();
2621 if (MemTy
== MVT::i8
)
2622 Opcode
= ARM::CMP_SWAP_8
;
2623 else if (MemTy
== MVT::i16
)
2624 Opcode
= ARM::CMP_SWAP_16
;
2625 else if (MemTy
== MVT::i32
)
2626 Opcode
= ARM::CMP_SWAP_32
;
2628 llvm_unreachable("Unknown AtomicCmpSwap type");
2630 SDValue Ops
[] = {N
->getOperand(1), N
->getOperand(2), N
->getOperand(3),
2632 SDNode
*CmpSwap
= CurDAG
->getMachineNode(
2634 CurDAG
->getVTList(MVT::i32
, MVT::i32
, MVT::Other
), Ops
);
2636 MachineMemOperand
*MemOp
= cast
<MemSDNode
>(N
)->getMemOperand();
2637 CurDAG
->setNodeMemRefs(cast
<MachineSDNode
>(CmpSwap
), {MemOp
});
2639 ReplaceUses(SDValue(N
, 0), SDValue(CmpSwap
, 0));
2640 ReplaceUses(SDValue(N
, 1), SDValue(CmpSwap
, 2));
2641 CurDAG
->RemoveDeadNode(N
);
2644 static Optional
<std::pair
<unsigned, unsigned>>
2645 getContiguousRangeOfSetBits(const APInt
&A
) {
2646 unsigned FirstOne
= A
.getBitWidth() - A
.countLeadingZeros() - 1;
2647 unsigned LastOne
= A
.countTrailingZeros();
2648 if (A
.countPopulation() != (FirstOne
- LastOne
+ 1))
2649 return Optional
<std::pair
<unsigned,unsigned>>();
2650 return std::make_pair(FirstOne
, LastOne
);
2653 void ARMDAGToDAGISel::SelectCMPZ(SDNode
*N
, bool &SwitchEQNEToPLMI
) {
2654 assert(N
->getOpcode() == ARMISD::CMPZ
);
2655 SwitchEQNEToPLMI
= false;
2657 if (!Subtarget
->isThumb())
2658 // FIXME: Work out whether it is profitable to do this in A32 mode - LSL and
2659 // LSR don't exist as standalone instructions - they need the barrel shifter.
2662 // select (cmpz (and X, C), #0) -> (LSLS X) or (LSRS X) or (LSRS (LSLS X))
2663 SDValue And
= N
->getOperand(0);
2664 if (!And
->hasOneUse())
2667 SDValue Zero
= N
->getOperand(1);
2668 if (!isa
<ConstantSDNode
>(Zero
) || !cast
<ConstantSDNode
>(Zero
)->isNullValue() ||
2669 And
->getOpcode() != ISD::AND
)
2671 SDValue X
= And
.getOperand(0);
2672 auto C
= dyn_cast
<ConstantSDNode
>(And
.getOperand(1));
2676 auto Range
= getContiguousRangeOfSetBits(C
->getAPIntValue());
2680 // There are several ways to lower this:
2684 auto EmitShift
= [&](unsigned Opc
, SDValue Src
, unsigned Imm
) -> SDNode
* {
2685 if (Subtarget
->isThumb2()) {
2686 Opc
= (Opc
== ARM::tLSLri
) ? ARM::t2LSLri
: ARM::t2LSRri
;
2687 SDValue Ops
[] = { Src
, CurDAG
->getTargetConstant(Imm
, dl
, MVT::i32
),
2688 getAL(CurDAG
, dl
), CurDAG
->getRegister(0, MVT::i32
),
2689 CurDAG
->getRegister(0, MVT::i32
) };
2690 return CurDAG
->getMachineNode(Opc
, dl
, MVT::i32
, Ops
);
2692 SDValue Ops
[] = {CurDAG
->getRegister(ARM::CPSR
, MVT::i32
), Src
,
2693 CurDAG
->getTargetConstant(Imm
, dl
, MVT::i32
),
2694 getAL(CurDAG
, dl
), CurDAG
->getRegister(0, MVT::i32
)};
2695 return CurDAG
->getMachineNode(Opc
, dl
, MVT::i32
, Ops
);
2699 if (Range
->second
== 0) {
2700 // 1. Mask includes the LSB -> Simply shift the top N bits off
2701 NewN
= EmitShift(ARM::tLSLri
, X
, 31 - Range
->first
);
2702 ReplaceNode(And
.getNode(), NewN
);
2703 } else if (Range
->first
== 31) {
2704 // 2. Mask includes the MSB -> Simply shift the bottom N bits off
2705 NewN
= EmitShift(ARM::tLSRri
, X
, Range
->second
);
2706 ReplaceNode(And
.getNode(), NewN
);
2707 } else if (Range
->first
== Range
->second
) {
2708 // 3. Only one bit is set. We can shift this into the sign bit and use a
2709 // PL/MI comparison.
2710 NewN
= EmitShift(ARM::tLSLri
, X
, 31 - Range
->first
);
2711 ReplaceNode(And
.getNode(), NewN
);
2713 SwitchEQNEToPLMI
= true;
2714 } else if (!Subtarget
->hasV6T2Ops()) {
2715 // 4. Do a double shift to clear bottom and top bits, but only in
2716 // thumb-1 mode as in thumb-2 we can use UBFX.
2717 NewN
= EmitShift(ARM::tLSLri
, X
, 31 - Range
->first
);
2718 NewN
= EmitShift(ARM::tLSRri
, SDValue(NewN
, 0),
2719 Range
->second
+ (31 - Range
->first
));
2720 ReplaceNode(And
.getNode(), NewN
);
2725 void ARMDAGToDAGISel::Select(SDNode
*N
) {
2728 if (N
->isMachineOpcode()) {
2730 return; // Already selected.
2733 switch (N
->getOpcode()) {
2736 // For Thumb1, match an sp-relative store in C++. This is a little
2737 // unfortunate, but I don't think I can make the chain check work
2738 // otherwise. (The chain of the store has to be the same as the chain
2739 // of the CopyFromReg, or else we can't replace the CopyFromReg with
2740 // a direct reference to "SP".)
2742 // This is only necessary on Thumb1 because Thumb1 sp-relative stores use
2743 // a different addressing mode from other four-byte stores.
2745 // This pattern usually comes up with call arguments.
2746 StoreSDNode
*ST
= cast
<StoreSDNode
>(N
);
2747 SDValue Ptr
= ST
->getBasePtr();
2748 if (Subtarget
->isThumb1Only() && ST
->isUnindexed()) {
2750 if (Ptr
.getOpcode() == ISD::ADD
&&
2751 isScaledConstantInRange(Ptr
.getOperand(1), /*Scale=*/4, 0, 256, RHSC
))
2752 Ptr
= Ptr
.getOperand(0);
2754 if (Ptr
.getOpcode() == ISD::CopyFromReg
&&
2755 cast
<RegisterSDNode
>(Ptr
.getOperand(1))->getReg() == ARM::SP
&&
2756 Ptr
.getOperand(0) == ST
->getChain()) {
2757 SDValue Ops
[] = {ST
->getValue(),
2758 CurDAG
->getRegister(ARM::SP
, MVT::i32
),
2759 CurDAG
->getTargetConstant(RHSC
, dl
, MVT::i32
),
2761 CurDAG
->getRegister(0, MVT::i32
),
2763 MachineSDNode
*ResNode
=
2764 CurDAG
->getMachineNode(ARM::tSTRspi
, dl
, MVT::Other
, Ops
);
2765 MachineMemOperand
*MemOp
= ST
->getMemOperand();
2766 CurDAG
->setNodeMemRefs(cast
<MachineSDNode
>(ResNode
), {MemOp
});
2767 ReplaceNode(N
, ResNode
);
2773 case ISD::WRITE_REGISTER
:
2774 if (tryWriteRegister(N
))
2777 case ISD::READ_REGISTER
:
2778 if (tryReadRegister(N
))
2781 case ISD::INLINEASM
:
2782 case ISD::INLINEASM_BR
:
2783 if (tryInlineAsm(N
))
2787 // Select special operations if XOR node forms integer ABS pattern
2790 // Other cases are autogenerated.
2792 case ISD::Constant
: {
2793 unsigned Val
= cast
<ConstantSDNode
>(N
)->getZExtValue();
2794 // If we can't materialize the constant we need to use a literal pool
2795 if (ConstantMaterializationCost(Val
) > 2) {
2796 SDValue CPIdx
= CurDAG
->getTargetConstantPool(
2797 ConstantInt::get(Type::getInt32Ty(*CurDAG
->getContext()), Val
),
2798 TLI
->getPointerTy(CurDAG
->getDataLayout()));
2801 if (Subtarget
->isThumb()) {
2805 CurDAG
->getRegister(0, MVT::i32
),
2806 CurDAG
->getEntryNode()
2808 ResNode
= CurDAG
->getMachineNode(ARM::tLDRpci
, dl
, MVT::i32
, MVT::Other
,
2813 CurDAG
->getTargetConstant(0, dl
, MVT::i32
),
2815 CurDAG
->getRegister(0, MVT::i32
),
2816 CurDAG
->getEntryNode()
2818 ResNode
= CurDAG
->getMachineNode(ARM::LDRcp
, dl
, MVT::i32
, MVT::Other
,
2821 // Annotate the Node with memory operand information so that MachineInstr
2822 // queries work properly. This e.g. gives the register allocation the
2823 // required information for rematerialization.
2824 MachineFunction
& MF
= CurDAG
->getMachineFunction();
2825 MachineMemOperand
*MemOp
=
2826 MF
.getMachineMemOperand(MachinePointerInfo::getConstantPool(MF
),
2827 MachineMemOperand::MOLoad
, 4, 4);
2829 CurDAG
->setNodeMemRefs(cast
<MachineSDNode
>(ResNode
), {MemOp
});
2831 ReplaceNode(N
, ResNode
);
2835 // Other cases are autogenerated.
2838 case ISD::FrameIndex
: {
2839 // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
2840 int FI
= cast
<FrameIndexSDNode
>(N
)->getIndex();
2841 SDValue TFI
= CurDAG
->getTargetFrameIndex(
2842 FI
, TLI
->getPointerTy(CurDAG
->getDataLayout()));
2843 if (Subtarget
->isThumb1Only()) {
2844 // Set the alignment of the frame object to 4, to avoid having to generate
2845 // more than one ADD
2846 MachineFrameInfo
&MFI
= MF
->getFrameInfo();
2847 if (MFI
.getObjectAlignment(FI
) < 4)
2848 MFI
.setObjectAlignment(FI
, 4);
2849 CurDAG
->SelectNodeTo(N
, ARM::tADDframe
, MVT::i32
, TFI
,
2850 CurDAG
->getTargetConstant(0, dl
, MVT::i32
));
2853 unsigned Opc
= ((Subtarget
->isThumb() && Subtarget
->hasThumb2()) ?
2854 ARM::t2ADDri
: ARM::ADDri
);
2855 SDValue Ops
[] = { TFI
, CurDAG
->getTargetConstant(0, dl
, MVT::i32
),
2856 getAL(CurDAG
, dl
), CurDAG
->getRegister(0, MVT::i32
),
2857 CurDAG
->getRegister(0, MVT::i32
) };
2858 CurDAG
->SelectNodeTo(N
, Opc
, MVT::i32
, Ops
);
2863 if (tryV6T2BitfieldExtractOp(N
, false))
2866 case ISD::SIGN_EXTEND_INREG
:
2868 if (tryV6T2BitfieldExtractOp(N
, true))
2872 if (Subtarget
->isThumb1Only())
2874 if (ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(N
->getOperand(1))) {
2875 unsigned RHSV
= C
->getZExtValue();
2877 if (isPowerOf2_32(RHSV
-1)) { // 2^n+1?
2878 unsigned ShImm
= Log2_32(RHSV
-1);
2881 SDValue V
= N
->getOperand(0);
2882 ShImm
= ARM_AM::getSORegOpc(ARM_AM::lsl
, ShImm
);
2883 SDValue ShImmOp
= CurDAG
->getTargetConstant(ShImm
, dl
, MVT::i32
);
2884 SDValue Reg0
= CurDAG
->getRegister(0, MVT::i32
);
2885 if (Subtarget
->isThumb()) {
2886 SDValue Ops
[] = { V
, V
, ShImmOp
, getAL(CurDAG
, dl
), Reg0
, Reg0
};
2887 CurDAG
->SelectNodeTo(N
, ARM::t2ADDrs
, MVT::i32
, Ops
);
2890 SDValue Ops
[] = { V
, V
, Reg0
, ShImmOp
, getAL(CurDAG
, dl
), Reg0
,
2892 CurDAG
->SelectNodeTo(N
, ARM::ADDrsi
, MVT::i32
, Ops
);
2896 if (isPowerOf2_32(RHSV
+1)) { // 2^n-1?
2897 unsigned ShImm
= Log2_32(RHSV
+1);
2900 SDValue V
= N
->getOperand(0);
2901 ShImm
= ARM_AM::getSORegOpc(ARM_AM::lsl
, ShImm
);
2902 SDValue ShImmOp
= CurDAG
->getTargetConstant(ShImm
, dl
, MVT::i32
);
2903 SDValue Reg0
= CurDAG
->getRegister(0, MVT::i32
);
2904 if (Subtarget
->isThumb()) {
2905 SDValue Ops
[] = { V
, V
, ShImmOp
, getAL(CurDAG
, dl
), Reg0
, Reg0
};
2906 CurDAG
->SelectNodeTo(N
, ARM::t2RSBrs
, MVT::i32
, Ops
);
2909 SDValue Ops
[] = { V
, V
, Reg0
, ShImmOp
, getAL(CurDAG
, dl
), Reg0
,
2911 CurDAG
->SelectNodeTo(N
, ARM::RSBrsi
, MVT::i32
, Ops
);
2918 // Check for unsigned bitfield extract
2919 if (tryV6T2BitfieldExtractOp(N
, false))
2922 // If an immediate is used in an AND node, it is possible that the immediate
2923 // can be more optimally materialized when negated. If this is the case we
2924 // can negate the immediate and use a BIC instead.
2925 auto *N1C
= dyn_cast
<ConstantSDNode
>(N
->getOperand(1));
2926 if (N1C
&& N1C
->hasOneUse() && Subtarget
->isThumb()) {
2927 uint32_t Imm
= (uint32_t) N1C
->getZExtValue();
2929 // In Thumb2 mode, an AND can take a 12-bit immediate. If this
2930 // immediate can be negated and fit in the immediate operand of
2931 // a t2BIC, don't do any manual transform here as this can be
2932 // handled by the generic ISel machinery.
2933 bool PreferImmediateEncoding
=
2934 Subtarget
->hasThumb2() && (is_t2_so_imm(Imm
) || is_t2_so_imm_not(Imm
));
2935 if (!PreferImmediateEncoding
&&
2936 ConstantMaterializationCost(Imm
) >
2937 ConstantMaterializationCost(~Imm
)) {
2938 // The current immediate costs more to materialize than a negated
2939 // immediate, so negate the immediate and use a BIC.
2941 CurDAG
->getConstant(~N1C
->getZExtValue(), dl
, MVT::i32
);
2942 // If the new constant didn't exist before, reposition it in the topological
2943 // ordering so it is just before N. Otherwise, don't touch its location.
2944 if (NewImm
->getNodeId() == -1)
2945 CurDAG
->RepositionNode(N
->getIterator(), NewImm
.getNode());
2947 if (!Subtarget
->hasThumb2()) {
2948 SDValue Ops
[] = {CurDAG
->getRegister(ARM::CPSR
, MVT::i32
),
2949 N
->getOperand(0), NewImm
, getAL(CurDAG
, dl
),
2950 CurDAG
->getRegister(0, MVT::i32
)};
2951 ReplaceNode(N
, CurDAG
->getMachineNode(ARM::tBIC
, dl
, MVT::i32
, Ops
));
2954 SDValue Ops
[] = {N
->getOperand(0), NewImm
, getAL(CurDAG
, dl
),
2955 CurDAG
->getRegister(0, MVT::i32
),
2956 CurDAG
->getRegister(0, MVT::i32
)};
2958 CurDAG
->getMachineNode(ARM::t2BICrr
, dl
, MVT::i32
, Ops
));
2964 // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits
2965 // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits
2966 // are entirely contributed by c2 and lower 16-bits are entirely contributed
2967 // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)).
2968 // Select it to: "movt x, ((c1 & 0xffff) >> 16)
2969 EVT VT
= N
->getValueType(0);
2972 unsigned Opc
= (Subtarget
->isThumb() && Subtarget
->hasThumb2())
2974 : (Subtarget
->hasV6T2Ops() ? ARM::MOVTi16
: 0);
2977 SDValue N0
= N
->getOperand(0), N1
= N
->getOperand(1);
2978 N1C
= dyn_cast
<ConstantSDNode
>(N1
);
2981 if (N0
.getOpcode() == ISD::OR
&& N0
.getNode()->hasOneUse()) {
2982 SDValue N2
= N0
.getOperand(1);
2983 ConstantSDNode
*N2C
= dyn_cast
<ConstantSDNode
>(N2
);
2986 unsigned N1CVal
= N1C
->getZExtValue();
2987 unsigned N2CVal
= N2C
->getZExtValue();
2988 if ((N1CVal
& 0xffff0000U
) == (N2CVal
& 0xffff0000U
) &&
2989 (N1CVal
& 0xffffU
) == 0xffffU
&&
2990 (N2CVal
& 0xffffU
) == 0x0U
) {
2991 SDValue Imm16
= CurDAG
->getTargetConstant((N2CVal
& 0xFFFF0000U
) >> 16,
2993 SDValue Ops
[] = { N0
.getOperand(0), Imm16
,
2994 getAL(CurDAG
, dl
), CurDAG
->getRegister(0, MVT::i32
) };
2995 ReplaceNode(N
, CurDAG
->getMachineNode(Opc
, dl
, VT
, Ops
));
3002 case ARMISD::UMAAL
: {
3003 unsigned Opc
= Subtarget
->isThumb() ? ARM::t2UMAAL
: ARM::UMAAL
;
3004 SDValue Ops
[] = { N
->getOperand(0), N
->getOperand(1),
3005 N
->getOperand(2), N
->getOperand(3),
3007 CurDAG
->getRegister(0, MVT::i32
) };
3008 ReplaceNode(N
, CurDAG
->getMachineNode(Opc
, dl
, MVT::i32
, MVT::i32
, Ops
));
3011 case ARMISD::UMLAL
:{
3012 if (Subtarget
->isThumb()) {
3013 SDValue Ops
[] = { N
->getOperand(0), N
->getOperand(1), N
->getOperand(2),
3014 N
->getOperand(3), getAL(CurDAG
, dl
),
3015 CurDAG
->getRegister(0, MVT::i32
)};
3017 N
, CurDAG
->getMachineNode(ARM::t2UMLAL
, dl
, MVT::i32
, MVT::i32
, Ops
));
3020 SDValue Ops
[] = { N
->getOperand(0), N
->getOperand(1), N
->getOperand(2),
3021 N
->getOperand(3), getAL(CurDAG
, dl
),
3022 CurDAG
->getRegister(0, MVT::i32
),
3023 CurDAG
->getRegister(0, MVT::i32
) };
3024 ReplaceNode(N
, CurDAG
->getMachineNode(
3025 Subtarget
->hasV6Ops() ? ARM::UMLAL
: ARM::UMLALv5
, dl
,
3026 MVT::i32
, MVT::i32
, Ops
));
3030 case ARMISD::SMLAL
:{
3031 if (Subtarget
->isThumb()) {
3032 SDValue Ops
[] = { N
->getOperand(0), N
->getOperand(1), N
->getOperand(2),
3033 N
->getOperand(3), getAL(CurDAG
, dl
),
3034 CurDAG
->getRegister(0, MVT::i32
)};
3036 N
, CurDAG
->getMachineNode(ARM::t2SMLAL
, dl
, MVT::i32
, MVT::i32
, Ops
));
3039 SDValue Ops
[] = { N
->getOperand(0), N
->getOperand(1), N
->getOperand(2),
3040 N
->getOperand(3), getAL(CurDAG
, dl
),
3041 CurDAG
->getRegister(0, MVT::i32
),
3042 CurDAG
->getRegister(0, MVT::i32
) };
3043 ReplaceNode(N
, CurDAG
->getMachineNode(
3044 Subtarget
->hasV6Ops() ? ARM::SMLAL
: ARM::SMLALv5
, dl
,
3045 MVT::i32
, MVT::i32
, Ops
));
3049 case ARMISD::SUBE
: {
3050 if (!Subtarget
->hasV6Ops() || !Subtarget
->hasDSP())
3052 // Look for a pattern to match SMMLS
3053 // (sube a, (smul_loHi a, b), (subc 0, (smul_LOhi(a, b))))
3054 if (N
->getOperand(1).getOpcode() != ISD::SMUL_LOHI
||
3055 N
->getOperand(2).getOpcode() != ARMISD::SUBC
||
3056 !SDValue(N
, 1).use_empty())
3059 if (Subtarget
->isThumb())
3060 assert(Subtarget
->hasThumb2() &&
3061 "This pattern should not be generated for Thumb");
3063 SDValue SmulLoHi
= N
->getOperand(1);
3064 SDValue Subc
= N
->getOperand(2);
3065 auto *Zero
= dyn_cast
<ConstantSDNode
>(Subc
.getOperand(0));
3067 if (!Zero
|| Zero
->getZExtValue() != 0 ||
3068 Subc
.getOperand(1) != SmulLoHi
.getValue(0) ||
3069 N
->getOperand(1) != SmulLoHi
.getValue(1) ||
3070 N
->getOperand(2) != Subc
.getValue(1))
3073 unsigned Opc
= Subtarget
->isThumb2() ? ARM::t2SMMLS
: ARM::SMMLS
;
3074 SDValue Ops
[] = { SmulLoHi
.getOperand(0), SmulLoHi
.getOperand(1),
3075 N
->getOperand(0), getAL(CurDAG
, dl
),
3076 CurDAG
->getRegister(0, MVT::i32
) };
3077 ReplaceNode(N
, CurDAG
->getMachineNode(Opc
, dl
, MVT::i32
, Ops
));
3081 if (Subtarget
->hasMVEIntegerOps() && tryMVEIndexedLoad(N
))
3083 if (Subtarget
->isThumb() && Subtarget
->hasThumb2()) {
3084 if (tryT2IndexedLoad(N
))
3086 } else if (Subtarget
->isThumb()) {
3087 if (tryT1IndexedLoad(N
))
3089 } else if (tryARMIndexedLoad(N
))
3091 // Other cases are autogenerated.
3096 SDValue Ops
[] = { N
->getOperand(1),
3099 unsigned Opc
= N
->getOpcode() == ARMISD::WLS
?
3100 ARM::t2WhileLoopStart
: ARM::t2LoopEnd
;
3101 SDNode
*New
= CurDAG
->getMachineNode(Opc
, dl
, MVT::Other
, Ops
);
3102 ReplaceUses(N
, New
);
3103 CurDAG
->RemoveDeadNode(N
);
3106 case ARMISD::LOOP_DEC
: {
3107 SDValue Ops
[] = { N
->getOperand(1),
3111 CurDAG
->getMachineNode(ARM::t2LoopDec
, dl
,
3112 CurDAG
->getVTList(MVT::i32
, MVT::Other
), Ops
);
3113 ReplaceUses(N
, Dec
);
3114 CurDAG
->RemoveDeadNode(N
);
3117 case ARMISD::BRCOND
: {
3118 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
3119 // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)
3120 // Pattern complexity = 6 cost = 1 size = 0
3122 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
3123 // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc)
3124 // Pattern complexity = 6 cost = 1 size = 0
3126 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
3127 // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc)
3128 // Pattern complexity = 6 cost = 1 size = 0
3130 unsigned Opc
= Subtarget
->isThumb() ?
3131 ((Subtarget
->hasThumb2()) ? ARM::t2Bcc
: ARM::tBcc
) : ARM::Bcc
;
3132 SDValue Chain
= N
->getOperand(0);
3133 SDValue N1
= N
->getOperand(1);
3134 SDValue N2
= N
->getOperand(2);
3135 SDValue N3
= N
->getOperand(3);
3136 SDValue InFlag
= N
->getOperand(4);
3137 assert(N1
.getOpcode() == ISD::BasicBlock
);
3138 assert(N2
.getOpcode() == ISD::Constant
);
3139 assert(N3
.getOpcode() == ISD::Register
);
3141 unsigned CC
= (unsigned) cast
<ConstantSDNode
>(N2
)->getZExtValue();
3143 if (InFlag
.getOpcode() == ARMISD::CMPZ
) {
3144 if (InFlag
.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN
) {
3145 SDValue Int
= InFlag
.getOperand(0);
3146 uint64_t ID
= cast
<ConstantSDNode
>(Int
->getOperand(1))->getZExtValue();
3148 // Handle low-overhead loops.
3149 if (ID
== Intrinsic::loop_decrement_reg
) {
3150 SDValue Elements
= Int
.getOperand(2);
3151 SDValue Size
= CurDAG
->getTargetConstant(
3152 cast
<ConstantSDNode
>(Int
.getOperand(3))->getZExtValue(), dl
,
3155 SDValue Args
[] = { Elements
, Size
, Int
.getOperand(0) };
3157 CurDAG
->getMachineNode(ARM::t2LoopDec
, dl
,
3158 CurDAG
->getVTList(MVT::i32
, MVT::Other
),
3160 ReplaceUses(Int
.getNode(), LoopDec
);
3162 SDValue EndArgs
[] = { SDValue(LoopDec
, 0), N1
, Chain
};
3164 CurDAG
->getMachineNode(ARM::t2LoopEnd
, dl
, MVT::Other
, EndArgs
);
3166 ReplaceUses(N
, LoopEnd
);
3167 CurDAG
->RemoveDeadNode(N
);
3168 CurDAG
->RemoveDeadNode(InFlag
.getNode());
3169 CurDAG
->RemoveDeadNode(Int
.getNode());
3174 bool SwitchEQNEToPLMI
;
3175 SelectCMPZ(InFlag
.getNode(), SwitchEQNEToPLMI
);
3176 InFlag
= N
->getOperand(4);
3178 if (SwitchEQNEToPLMI
) {
3179 switch ((ARMCC::CondCodes
)CC
) {
3180 default: llvm_unreachable("CMPZ must be either NE or EQ!");
3182 CC
= (unsigned)ARMCC::MI
;
3185 CC
= (unsigned)ARMCC::PL
;
3191 SDValue Tmp2
= CurDAG
->getTargetConstant(CC
, dl
, MVT::i32
);
3192 SDValue Ops
[] = { N1
, Tmp2
, N3
, Chain
, InFlag
};
3193 SDNode
*ResNode
= CurDAG
->getMachineNode(Opc
, dl
, MVT::Other
,
3195 Chain
= SDValue(ResNode
, 0);
3196 if (N
->getNumValues() == 2) {
3197 InFlag
= SDValue(ResNode
, 1);
3198 ReplaceUses(SDValue(N
, 1), InFlag
);
3200 ReplaceUses(SDValue(N
, 0),
3201 SDValue(Chain
.getNode(), Chain
.getResNo()));
3202 CurDAG
->RemoveDeadNode(N
);
3206 case ARMISD::CMPZ
: {
3207 // select (CMPZ X, #-C) -> (CMPZ (ADDS X, #C), #0)
3208 // This allows us to avoid materializing the expensive negative constant.
3209 // The CMPZ #0 is useless and will be peepholed away but we need to keep it
3210 // for its glue output.
3211 SDValue X
= N
->getOperand(0);
3212 auto *C
= dyn_cast
<ConstantSDNode
>(N
->getOperand(1).getNode());
3213 if (C
&& C
->getSExtValue() < 0 && Subtarget
->isThumb()) {
3214 int64_t Addend
= -C
->getSExtValue();
3216 SDNode
*Add
= nullptr;
3217 // ADDS can be better than CMN if the immediate fits in a
3218 // 16-bit ADDS, which means either [0,256) for tADDi8 or [0,8) for tADDi3.
3219 // Outside that range we can just use a CMN which is 32-bit but has a
3220 // 12-bit immediate range.
3221 if (Addend
< 1<<8) {
3222 if (Subtarget
->isThumb2()) {
3223 SDValue Ops
[] = { X
, CurDAG
->getTargetConstant(Addend
, dl
, MVT::i32
),
3224 getAL(CurDAG
, dl
), CurDAG
->getRegister(0, MVT::i32
),
3225 CurDAG
->getRegister(0, MVT::i32
) };
3226 Add
= CurDAG
->getMachineNode(ARM::t2ADDri
, dl
, MVT::i32
, Ops
);
3228 unsigned Opc
= (Addend
< 1<<3) ? ARM::tADDi3
: ARM::tADDi8
;
3229 SDValue Ops
[] = {CurDAG
->getRegister(ARM::CPSR
, MVT::i32
), X
,
3230 CurDAG
->getTargetConstant(Addend
, dl
, MVT::i32
),
3231 getAL(CurDAG
, dl
), CurDAG
->getRegister(0, MVT::i32
)};
3232 Add
= CurDAG
->getMachineNode(Opc
, dl
, MVT::i32
, Ops
);
3236 SDValue Ops2
[] = {SDValue(Add
, 0), CurDAG
->getConstant(0, dl
, MVT::i32
)};
3237 CurDAG
->MorphNodeTo(N
, ARMISD::CMPZ
, CurDAG
->getVTList(MVT::Glue
), Ops2
);
3240 // Other cases are autogenerated.
3244 case ARMISD::CMOV
: {
3245 SDValue InFlag
= N
->getOperand(4);
3247 if (InFlag
.getOpcode() == ARMISD::CMPZ
) {
3248 bool SwitchEQNEToPLMI
;
3249 SelectCMPZ(InFlag
.getNode(), SwitchEQNEToPLMI
);
3251 if (SwitchEQNEToPLMI
) {
3252 SDValue ARMcc
= N
->getOperand(2);
3253 ARMCC::CondCodes CC
=
3254 (ARMCC::CondCodes
)cast
<ConstantSDNode
>(ARMcc
)->getZExtValue();
3257 default: llvm_unreachable("CMPZ must be either NE or EQ!");
3265 SDValue NewARMcc
= CurDAG
->getConstant((unsigned)CC
, dl
, MVT::i32
);
3266 SDValue Ops
[] = {N
->getOperand(0), N
->getOperand(1), NewARMcc
,
3267 N
->getOperand(3), N
->getOperand(4)};
3268 CurDAG
->MorphNodeTo(N
, ARMISD::CMOV
, N
->getVTList(), Ops
);
3272 // Other cases are autogenerated.
3276 case ARMISD::VZIP
: {
3278 EVT VT
= N
->getValueType(0);
3279 switch (VT
.getSimpleVT().SimpleTy
) {
3281 case MVT::v8i8
: Opc
= ARM::VZIPd8
; break;
3283 case MVT::v4i16
: Opc
= ARM::VZIPd16
; break;
3285 // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
3286 case MVT::v2i32
: Opc
= ARM::VTRNd32
; break;
3287 case MVT::v16i8
: Opc
= ARM::VZIPq8
; break;
3289 case MVT::v8i16
: Opc
= ARM::VZIPq16
; break;
3291 case MVT::v4i32
: Opc
= ARM::VZIPq32
; break;
3293 SDValue Pred
= getAL(CurDAG
, dl
);
3294 SDValue PredReg
= CurDAG
->getRegister(0, MVT::i32
);
3295 SDValue Ops
[] = { N
->getOperand(0), N
->getOperand(1), Pred
, PredReg
};
3296 ReplaceNode(N
, CurDAG
->getMachineNode(Opc
, dl
, VT
, VT
, Ops
));
3299 case ARMISD::VUZP
: {
3301 EVT VT
= N
->getValueType(0);
3302 switch (VT
.getSimpleVT().SimpleTy
) {
3304 case MVT::v8i8
: Opc
= ARM::VUZPd8
; break;
3306 case MVT::v4i16
: Opc
= ARM::VUZPd16
; break;
3308 // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
3309 case MVT::v2i32
: Opc
= ARM::VTRNd32
; break;
3310 case MVT::v16i8
: Opc
= ARM::VUZPq8
; break;
3312 case MVT::v8i16
: Opc
= ARM::VUZPq16
; break;
3314 case MVT::v4i32
: Opc
= ARM::VUZPq32
; break;
3316 SDValue Pred
= getAL(CurDAG
, dl
);
3317 SDValue PredReg
= CurDAG
->getRegister(0, MVT::i32
);
3318 SDValue Ops
[] = { N
->getOperand(0), N
->getOperand(1), Pred
, PredReg
};
3319 ReplaceNode(N
, CurDAG
->getMachineNode(Opc
, dl
, VT
, VT
, Ops
));
3322 case ARMISD::VTRN
: {
3324 EVT VT
= N
->getValueType(0);
3325 switch (VT
.getSimpleVT().SimpleTy
) {
3327 case MVT::v8i8
: Opc
= ARM::VTRNd8
; break;
3329 case MVT::v4i16
: Opc
= ARM::VTRNd16
; break;
3331 case MVT::v2i32
: Opc
= ARM::VTRNd32
; break;
3332 case MVT::v16i8
: Opc
= ARM::VTRNq8
; break;
3334 case MVT::v8i16
: Opc
= ARM::VTRNq16
; break;
3336 case MVT::v4i32
: Opc
= ARM::VTRNq32
; break;
3338 SDValue Pred
= getAL(CurDAG
, dl
);
3339 SDValue PredReg
= CurDAG
->getRegister(0, MVT::i32
);
3340 SDValue Ops
[] = { N
->getOperand(0), N
->getOperand(1), Pred
, PredReg
};
3341 ReplaceNode(N
, CurDAG
->getMachineNode(Opc
, dl
, VT
, VT
, Ops
));
3344 case ARMISD::BUILD_VECTOR
: {
3345 EVT VecVT
= N
->getValueType(0);
3346 EVT EltVT
= VecVT
.getVectorElementType();
3347 unsigned NumElts
= VecVT
.getVectorNumElements();
3348 if (EltVT
== MVT::f64
) {
3349 assert(NumElts
== 2 && "unexpected type for BUILD_VECTOR");
3351 N
, createDRegPairNode(VecVT
, N
->getOperand(0), N
->getOperand(1)));
3354 assert(EltVT
== MVT::f32
&& "unexpected type for BUILD_VECTOR");
3357 N
, createSRegPairNode(VecVT
, N
->getOperand(0), N
->getOperand(1)));
3360 assert(NumElts
== 4 && "unexpected type for BUILD_VECTOR");
3362 createQuadSRegsNode(VecVT
, N
->getOperand(0), N
->getOperand(1),
3363 N
->getOperand(2), N
->getOperand(3)));
3367 case ARMISD::VLD1DUP
: {
3368 static const uint16_t DOpcodes
[] = { ARM::VLD1DUPd8
, ARM::VLD1DUPd16
,
3370 static const uint16_t QOpcodes
[] = { ARM::VLD1DUPq8
, ARM::VLD1DUPq16
,
3372 SelectVLDDup(N
, /* IsIntrinsic= */ false, false, 1, DOpcodes
, QOpcodes
);
3376 case ARMISD::VLD2DUP
: {
3377 static const uint16_t Opcodes
[] = { ARM::VLD2DUPd8
, ARM::VLD2DUPd16
,
3379 SelectVLDDup(N
, /* IsIntrinsic= */ false, false, 2, Opcodes
);
3383 case ARMISD::VLD3DUP
: {
3384 static const uint16_t Opcodes
[] = { ARM::VLD3DUPd8Pseudo
,
3385 ARM::VLD3DUPd16Pseudo
,
3386 ARM::VLD3DUPd32Pseudo
};
3387 SelectVLDDup(N
, /* IsIntrinsic= */ false, false, 3, Opcodes
);
3391 case ARMISD::VLD4DUP
: {
3392 static const uint16_t Opcodes
[] = { ARM::VLD4DUPd8Pseudo
,
3393 ARM::VLD4DUPd16Pseudo
,
3394 ARM::VLD4DUPd32Pseudo
};
3395 SelectVLDDup(N
, /* IsIntrinsic= */ false, false, 4, Opcodes
);
3399 case ARMISD::VLD1DUP_UPD
: {
3400 static const uint16_t DOpcodes
[] = { ARM::VLD1DUPd8wb_fixed
,
3401 ARM::VLD1DUPd16wb_fixed
,
3402 ARM::VLD1DUPd32wb_fixed
};
3403 static const uint16_t QOpcodes
[] = { ARM::VLD1DUPq8wb_fixed
,
3404 ARM::VLD1DUPq16wb_fixed
,
3405 ARM::VLD1DUPq32wb_fixed
};
3406 SelectVLDDup(N
, /* IsIntrinsic= */ false, true, 1, DOpcodes
, QOpcodes
);
3410 case ARMISD::VLD2DUP_UPD
: {
3411 static const uint16_t Opcodes
[] = { ARM::VLD2DUPd8wb_fixed
,
3412 ARM::VLD2DUPd16wb_fixed
,
3413 ARM::VLD2DUPd32wb_fixed
};
3414 SelectVLDDup(N
, /* IsIntrinsic= */ false, true, 2, Opcodes
);
3418 case ARMISD::VLD3DUP_UPD
: {
3419 static const uint16_t Opcodes
[] = { ARM::VLD3DUPd8Pseudo_UPD
,
3420 ARM::VLD3DUPd16Pseudo_UPD
,
3421 ARM::VLD3DUPd32Pseudo_UPD
};
3422 SelectVLDDup(N
, /* IsIntrinsic= */ false, true, 3, Opcodes
);
3426 case ARMISD::VLD4DUP_UPD
: {
3427 static const uint16_t Opcodes
[] = { ARM::VLD4DUPd8Pseudo_UPD
,
3428 ARM::VLD4DUPd16Pseudo_UPD
,
3429 ARM::VLD4DUPd32Pseudo_UPD
};
3430 SelectVLDDup(N
, /* IsIntrinsic= */ false, true, 4, Opcodes
);
3434 case ARMISD::VLD1_UPD
: {
3435 static const uint16_t DOpcodes
[] = { ARM::VLD1d8wb_fixed
,
3436 ARM::VLD1d16wb_fixed
,
3437 ARM::VLD1d32wb_fixed
,
3438 ARM::VLD1d64wb_fixed
};
3439 static const uint16_t QOpcodes
[] = { ARM::VLD1q8wb_fixed
,
3440 ARM::VLD1q16wb_fixed
,
3441 ARM::VLD1q32wb_fixed
,
3442 ARM::VLD1q64wb_fixed
};
3443 SelectVLD(N
, true, 1, DOpcodes
, QOpcodes
, nullptr);
3447 case ARMISD::VLD2_UPD
: {
3448 static const uint16_t DOpcodes
[] = { ARM::VLD2d8wb_fixed
,
3449 ARM::VLD2d16wb_fixed
,
3450 ARM::VLD2d32wb_fixed
,
3451 ARM::VLD1q64wb_fixed
};
3452 static const uint16_t QOpcodes
[] = { ARM::VLD2q8PseudoWB_fixed
,
3453 ARM::VLD2q16PseudoWB_fixed
,
3454 ARM::VLD2q32PseudoWB_fixed
};
3455 SelectVLD(N
, true, 2, DOpcodes
, QOpcodes
, nullptr);
3459 case ARMISD::VLD3_UPD
: {
3460 static const uint16_t DOpcodes
[] = { ARM::VLD3d8Pseudo_UPD
,
3461 ARM::VLD3d16Pseudo_UPD
,
3462 ARM::VLD3d32Pseudo_UPD
,
3463 ARM::VLD1d64TPseudoWB_fixed
};
3464 static const uint16_t QOpcodes0
[] = { ARM::VLD3q8Pseudo_UPD
,
3465 ARM::VLD3q16Pseudo_UPD
,
3466 ARM::VLD3q32Pseudo_UPD
};
3467 static const uint16_t QOpcodes1
[] = { ARM::VLD3q8oddPseudo_UPD
,
3468 ARM::VLD3q16oddPseudo_UPD
,
3469 ARM::VLD3q32oddPseudo_UPD
};
3470 SelectVLD(N
, true, 3, DOpcodes
, QOpcodes0
, QOpcodes1
);
3474 case ARMISD::VLD4_UPD
: {
3475 static const uint16_t DOpcodes
[] = { ARM::VLD4d8Pseudo_UPD
,
3476 ARM::VLD4d16Pseudo_UPD
,
3477 ARM::VLD4d32Pseudo_UPD
,
3478 ARM::VLD1d64QPseudoWB_fixed
};
3479 static const uint16_t QOpcodes0
[] = { ARM::VLD4q8Pseudo_UPD
,
3480 ARM::VLD4q16Pseudo_UPD
,
3481 ARM::VLD4q32Pseudo_UPD
};
3482 static const uint16_t QOpcodes1
[] = { ARM::VLD4q8oddPseudo_UPD
,
3483 ARM::VLD4q16oddPseudo_UPD
,
3484 ARM::VLD4q32oddPseudo_UPD
};
3485 SelectVLD(N
, true, 4, DOpcodes
, QOpcodes0
, QOpcodes1
);
3489 case ARMISD::VLD2LN_UPD
: {
3490 static const uint16_t DOpcodes
[] = { ARM::VLD2LNd8Pseudo_UPD
,
3491 ARM::VLD2LNd16Pseudo_UPD
,
3492 ARM::VLD2LNd32Pseudo_UPD
};
3493 static const uint16_t QOpcodes
[] = { ARM::VLD2LNq16Pseudo_UPD
,
3494 ARM::VLD2LNq32Pseudo_UPD
};
3495 SelectVLDSTLane(N
, true, true, 2, DOpcodes
, QOpcodes
);
3499 case ARMISD::VLD3LN_UPD
: {
3500 static const uint16_t DOpcodes
[] = { ARM::VLD3LNd8Pseudo_UPD
,
3501 ARM::VLD3LNd16Pseudo_UPD
,
3502 ARM::VLD3LNd32Pseudo_UPD
};
3503 static const uint16_t QOpcodes
[] = { ARM::VLD3LNq16Pseudo_UPD
,
3504 ARM::VLD3LNq32Pseudo_UPD
};
3505 SelectVLDSTLane(N
, true, true, 3, DOpcodes
, QOpcodes
);
3509 case ARMISD::VLD4LN_UPD
: {
3510 static const uint16_t DOpcodes
[] = { ARM::VLD4LNd8Pseudo_UPD
,
3511 ARM::VLD4LNd16Pseudo_UPD
,
3512 ARM::VLD4LNd32Pseudo_UPD
};
3513 static const uint16_t QOpcodes
[] = { ARM::VLD4LNq16Pseudo_UPD
,
3514 ARM::VLD4LNq32Pseudo_UPD
};
3515 SelectVLDSTLane(N
, true, true, 4, DOpcodes
, QOpcodes
);
3519 case ARMISD::VST1_UPD
: {
3520 static const uint16_t DOpcodes
[] = { ARM::VST1d8wb_fixed
,
3521 ARM::VST1d16wb_fixed
,
3522 ARM::VST1d32wb_fixed
,
3523 ARM::VST1d64wb_fixed
};
3524 static const uint16_t QOpcodes
[] = { ARM::VST1q8wb_fixed
,
3525 ARM::VST1q16wb_fixed
,
3526 ARM::VST1q32wb_fixed
,
3527 ARM::VST1q64wb_fixed
};
3528 SelectVST(N
, true, 1, DOpcodes
, QOpcodes
, nullptr);
3532 case ARMISD::VST2_UPD
: {
3533 static const uint16_t DOpcodes
[] = { ARM::VST2d8wb_fixed
,
3534 ARM::VST2d16wb_fixed
,
3535 ARM::VST2d32wb_fixed
,
3536 ARM::VST1q64wb_fixed
};
3537 static const uint16_t QOpcodes
[] = { ARM::VST2q8PseudoWB_fixed
,
3538 ARM::VST2q16PseudoWB_fixed
,
3539 ARM::VST2q32PseudoWB_fixed
};
3540 SelectVST(N
, true, 2, DOpcodes
, QOpcodes
, nullptr);
3544 case ARMISD::VST3_UPD
: {
3545 static const uint16_t DOpcodes
[] = { ARM::VST3d8Pseudo_UPD
,
3546 ARM::VST3d16Pseudo_UPD
,
3547 ARM::VST3d32Pseudo_UPD
,
3548 ARM::VST1d64TPseudoWB_fixed
};
3549 static const uint16_t QOpcodes0
[] = { ARM::VST3q8Pseudo_UPD
,
3550 ARM::VST3q16Pseudo_UPD
,
3551 ARM::VST3q32Pseudo_UPD
};
3552 static const uint16_t QOpcodes1
[] = { ARM::VST3q8oddPseudo_UPD
,
3553 ARM::VST3q16oddPseudo_UPD
,
3554 ARM::VST3q32oddPseudo_UPD
};
3555 SelectVST(N
, true, 3, DOpcodes
, QOpcodes0
, QOpcodes1
);
3559 case ARMISD::VST4_UPD
: {
3560 static const uint16_t DOpcodes
[] = { ARM::VST4d8Pseudo_UPD
,
3561 ARM::VST4d16Pseudo_UPD
,
3562 ARM::VST4d32Pseudo_UPD
,
3563 ARM::VST1d64QPseudoWB_fixed
};
3564 static const uint16_t QOpcodes0
[] = { ARM::VST4q8Pseudo_UPD
,
3565 ARM::VST4q16Pseudo_UPD
,
3566 ARM::VST4q32Pseudo_UPD
};
3567 static const uint16_t QOpcodes1
[] = { ARM::VST4q8oddPseudo_UPD
,
3568 ARM::VST4q16oddPseudo_UPD
,
3569 ARM::VST4q32oddPseudo_UPD
};
3570 SelectVST(N
, true, 4, DOpcodes
, QOpcodes0
, QOpcodes1
);
3574 case ARMISD::VST2LN_UPD
: {
3575 static const uint16_t DOpcodes
[] = { ARM::VST2LNd8Pseudo_UPD
,
3576 ARM::VST2LNd16Pseudo_UPD
,
3577 ARM::VST2LNd32Pseudo_UPD
};
3578 static const uint16_t QOpcodes
[] = { ARM::VST2LNq16Pseudo_UPD
,
3579 ARM::VST2LNq32Pseudo_UPD
};
3580 SelectVLDSTLane(N
, false, true, 2, DOpcodes
, QOpcodes
);
3584 case ARMISD::VST3LN_UPD
: {
3585 static const uint16_t DOpcodes
[] = { ARM::VST3LNd8Pseudo_UPD
,
3586 ARM::VST3LNd16Pseudo_UPD
,
3587 ARM::VST3LNd32Pseudo_UPD
};
3588 static const uint16_t QOpcodes
[] = { ARM::VST3LNq16Pseudo_UPD
,
3589 ARM::VST3LNq32Pseudo_UPD
};
3590 SelectVLDSTLane(N
, false, true, 3, DOpcodes
, QOpcodes
);
3594 case ARMISD::VST4LN_UPD
: {
3595 static const uint16_t DOpcodes
[] = { ARM::VST4LNd8Pseudo_UPD
,
3596 ARM::VST4LNd16Pseudo_UPD
,
3597 ARM::VST4LNd32Pseudo_UPD
};
3598 static const uint16_t QOpcodes
[] = { ARM::VST4LNq16Pseudo_UPD
,
3599 ARM::VST4LNq32Pseudo_UPD
};
3600 SelectVLDSTLane(N
, false, true, 4, DOpcodes
, QOpcodes
);
3604 case ISD::INTRINSIC_VOID
:
3605 case ISD::INTRINSIC_W_CHAIN
: {
3606 unsigned IntNo
= cast
<ConstantSDNode
>(N
->getOperand(1))->getZExtValue();
3611 case Intrinsic::arm_mrrc
:
3612 case Intrinsic::arm_mrrc2
: {
3614 SDValue Chain
= N
->getOperand(0);
3617 if (Subtarget
->isThumb())
3618 Opc
= (IntNo
== Intrinsic::arm_mrrc
? ARM::t2MRRC
: ARM::t2MRRC2
);
3620 Opc
= (IntNo
== Intrinsic::arm_mrrc
? ARM::MRRC
: ARM::MRRC2
);
3622 SmallVector
<SDValue
, 5> Ops
;
3623 Ops
.push_back(getI32Imm(cast
<ConstantSDNode
>(N
->getOperand(2))->getZExtValue(), dl
)); /* coproc */
3624 Ops
.push_back(getI32Imm(cast
<ConstantSDNode
>(N
->getOperand(3))->getZExtValue(), dl
)); /* opc */
3625 Ops
.push_back(getI32Imm(cast
<ConstantSDNode
>(N
->getOperand(4))->getZExtValue(), dl
)); /* CRm */
3627 // The mrrc2 instruction in ARM doesn't allow predicates, the top 4 bits of the encoded
3628 // instruction will always be '1111' but it is possible in assembly language to specify
3629 // AL as a predicate to mrrc2 but it doesn't make any difference to the encoded instruction.
3630 if (Opc
!= ARM::MRRC2
) {
3631 Ops
.push_back(getAL(CurDAG
, dl
));
3632 Ops
.push_back(CurDAG
->getRegister(0, MVT::i32
));
3635 Ops
.push_back(Chain
);
3637 // Writes to two registers.
3638 const EVT RetType
[] = {MVT::i32
, MVT::i32
, MVT::Other
};
3640 ReplaceNode(N
, CurDAG
->getMachineNode(Opc
, dl
, RetType
, Ops
));
3643 case Intrinsic::arm_ldaexd
:
3644 case Intrinsic::arm_ldrexd
: {
3646 SDValue Chain
= N
->getOperand(0);
3647 SDValue MemAddr
= N
->getOperand(2);
3648 bool isThumb
= Subtarget
->isThumb() && Subtarget
->hasV8MBaselineOps();
3650 bool IsAcquire
= IntNo
== Intrinsic::arm_ldaexd
;
3651 unsigned NewOpc
= isThumb
? (IsAcquire
? ARM::t2LDAEXD
: ARM::t2LDREXD
)
3652 : (IsAcquire
? ARM::LDAEXD
: ARM::LDREXD
);
3654 // arm_ldrexd returns a i64 value in {i32, i32}
3655 std::vector
<EVT
> ResTys
;
3657 ResTys
.push_back(MVT::i32
);
3658 ResTys
.push_back(MVT::i32
);
3660 ResTys
.push_back(MVT::Untyped
);
3661 ResTys
.push_back(MVT::Other
);
3663 // Place arguments in the right order.
3664 SDValue Ops
[] = {MemAddr
, getAL(CurDAG
, dl
),
3665 CurDAG
->getRegister(0, MVT::i32
), Chain
};
3666 SDNode
*Ld
= CurDAG
->getMachineNode(NewOpc
, dl
, ResTys
, Ops
);
3667 // Transfer memoperands.
3668 MachineMemOperand
*MemOp
= cast
<MemIntrinsicSDNode
>(N
)->getMemOperand();
3669 CurDAG
->setNodeMemRefs(cast
<MachineSDNode
>(Ld
), {MemOp
});
3672 SDValue OutChain
= isThumb
? SDValue(Ld
, 2) : SDValue(Ld
, 1);
3673 if (!SDValue(N
, 0).use_empty()) {
3676 Result
= SDValue(Ld
, 0);
3679 CurDAG
->getTargetConstant(ARM::gsub_0
, dl
, MVT::i32
);
3680 SDNode
*ResNode
= CurDAG
->getMachineNode(TargetOpcode::EXTRACT_SUBREG
,
3681 dl
, MVT::i32
, SDValue(Ld
, 0), SubRegIdx
);
3682 Result
= SDValue(ResNode
,0);
3684 ReplaceUses(SDValue(N
, 0), Result
);
3686 if (!SDValue(N
, 1).use_empty()) {
3689 Result
= SDValue(Ld
, 1);
3692 CurDAG
->getTargetConstant(ARM::gsub_1
, dl
, MVT::i32
);
3693 SDNode
*ResNode
= CurDAG
->getMachineNode(TargetOpcode::EXTRACT_SUBREG
,
3694 dl
, MVT::i32
, SDValue(Ld
, 0), SubRegIdx
);
3695 Result
= SDValue(ResNode
,0);
3697 ReplaceUses(SDValue(N
, 1), Result
);
3699 ReplaceUses(SDValue(N
, 2), OutChain
);
3700 CurDAG
->RemoveDeadNode(N
);
3703 case Intrinsic::arm_stlexd
:
3704 case Intrinsic::arm_strexd
: {
3706 SDValue Chain
= N
->getOperand(0);
3707 SDValue Val0
= N
->getOperand(2);
3708 SDValue Val1
= N
->getOperand(3);
3709 SDValue MemAddr
= N
->getOperand(4);
3711 // Store exclusive double return a i32 value which is the return status
3712 // of the issued store.
3713 const EVT ResTys
[] = {MVT::i32
, MVT::Other
};
3715 bool isThumb
= Subtarget
->isThumb() && Subtarget
->hasThumb2();
3716 // Place arguments in the right order.
3717 SmallVector
<SDValue
, 7> Ops
;
3719 Ops
.push_back(Val0
);
3720 Ops
.push_back(Val1
);
3722 // arm_strexd uses GPRPair.
3723 Ops
.push_back(SDValue(createGPRPairNode(MVT::Untyped
, Val0
, Val1
), 0));
3724 Ops
.push_back(MemAddr
);
3725 Ops
.push_back(getAL(CurDAG
, dl
));
3726 Ops
.push_back(CurDAG
->getRegister(0, MVT::i32
));
3727 Ops
.push_back(Chain
);
3729 bool IsRelease
= IntNo
== Intrinsic::arm_stlexd
;
3730 unsigned NewOpc
= isThumb
? (IsRelease
? ARM::t2STLEXD
: ARM::t2STREXD
)
3731 : (IsRelease
? ARM::STLEXD
: ARM::STREXD
);
3733 SDNode
*St
= CurDAG
->getMachineNode(NewOpc
, dl
, ResTys
, Ops
);
3734 // Transfer memoperands.
3735 MachineMemOperand
*MemOp
= cast
<MemIntrinsicSDNode
>(N
)->getMemOperand();
3736 CurDAG
->setNodeMemRefs(cast
<MachineSDNode
>(St
), {MemOp
});
3742 case Intrinsic::arm_neon_vld1
: {
3743 static const uint16_t DOpcodes
[] = { ARM::VLD1d8
, ARM::VLD1d16
,
3744 ARM::VLD1d32
, ARM::VLD1d64
};
3745 static const uint16_t QOpcodes
[] = { ARM::VLD1q8
, ARM::VLD1q16
,
3746 ARM::VLD1q32
, ARM::VLD1q64
};
3747 SelectVLD(N
, false, 1, DOpcodes
, QOpcodes
, nullptr);
3751 case Intrinsic::arm_neon_vld1x2
: {
3752 static const uint16_t DOpcodes
[] = { ARM::VLD1q8
, ARM::VLD1q16
,
3753 ARM::VLD1q32
, ARM::VLD1q64
};
3754 static const uint16_t QOpcodes
[] = { ARM::VLD1d8QPseudo
,
3755 ARM::VLD1d16QPseudo
,
3756 ARM::VLD1d32QPseudo
,
3757 ARM::VLD1d64QPseudo
};
3758 SelectVLD(N
, false, 2, DOpcodes
, QOpcodes
, nullptr);
3762 case Intrinsic::arm_neon_vld1x3
: {
3763 static const uint16_t DOpcodes
[] = { ARM::VLD1d8TPseudo
,
3764 ARM::VLD1d16TPseudo
,
3765 ARM::VLD1d32TPseudo
,
3766 ARM::VLD1d64TPseudo
};
3767 static const uint16_t QOpcodes0
[] = { ARM::VLD1q8LowTPseudo_UPD
,
3768 ARM::VLD1q16LowTPseudo_UPD
,
3769 ARM::VLD1q32LowTPseudo_UPD
,
3770 ARM::VLD1q64LowTPseudo_UPD
};
3771 static const uint16_t QOpcodes1
[] = { ARM::VLD1q8HighTPseudo
,
3772 ARM::VLD1q16HighTPseudo
,
3773 ARM::VLD1q32HighTPseudo
,
3774 ARM::VLD1q64HighTPseudo
};
3775 SelectVLD(N
, false, 3, DOpcodes
, QOpcodes0
, QOpcodes1
);
3779 case Intrinsic::arm_neon_vld1x4
: {
3780 static const uint16_t DOpcodes
[] = { ARM::VLD1d8QPseudo
,
3781 ARM::VLD1d16QPseudo
,
3782 ARM::VLD1d32QPseudo
,
3783 ARM::VLD1d64QPseudo
};
3784 static const uint16_t QOpcodes0
[] = { ARM::VLD1q8LowQPseudo_UPD
,
3785 ARM::VLD1q16LowQPseudo_UPD
,
3786 ARM::VLD1q32LowQPseudo_UPD
,
3787 ARM::VLD1q64LowQPseudo_UPD
};
3788 static const uint16_t QOpcodes1
[] = { ARM::VLD1q8HighQPseudo
,
3789 ARM::VLD1q16HighQPseudo
,
3790 ARM::VLD1q32HighQPseudo
,
3791 ARM::VLD1q64HighQPseudo
};
3792 SelectVLD(N
, false, 4, DOpcodes
, QOpcodes0
, QOpcodes1
);
3796 case Intrinsic::arm_neon_vld2
: {
3797 static const uint16_t DOpcodes
[] = { ARM::VLD2d8
, ARM::VLD2d16
,
3798 ARM::VLD2d32
, ARM::VLD1q64
};
3799 static const uint16_t QOpcodes
[] = { ARM::VLD2q8Pseudo
, ARM::VLD2q16Pseudo
,
3800 ARM::VLD2q32Pseudo
};
3801 SelectVLD(N
, false, 2, DOpcodes
, QOpcodes
, nullptr);
3805 case Intrinsic::arm_neon_vld3
: {
3806 static const uint16_t DOpcodes
[] = { ARM::VLD3d8Pseudo
,
3809 ARM::VLD1d64TPseudo
};
3810 static const uint16_t QOpcodes0
[] = { ARM::VLD3q8Pseudo_UPD
,
3811 ARM::VLD3q16Pseudo_UPD
,
3812 ARM::VLD3q32Pseudo_UPD
};
3813 static const uint16_t QOpcodes1
[] = { ARM::VLD3q8oddPseudo
,
3814 ARM::VLD3q16oddPseudo
,
3815 ARM::VLD3q32oddPseudo
};
3816 SelectVLD(N
, false, 3, DOpcodes
, QOpcodes0
, QOpcodes1
);
3820 case Intrinsic::arm_neon_vld4
: {
3821 static const uint16_t DOpcodes
[] = { ARM::VLD4d8Pseudo
,
3824 ARM::VLD1d64QPseudo
};
3825 static const uint16_t QOpcodes0
[] = { ARM::VLD4q8Pseudo_UPD
,
3826 ARM::VLD4q16Pseudo_UPD
,
3827 ARM::VLD4q32Pseudo_UPD
};
3828 static const uint16_t QOpcodes1
[] = { ARM::VLD4q8oddPseudo
,
3829 ARM::VLD4q16oddPseudo
,
3830 ARM::VLD4q32oddPseudo
};
3831 SelectVLD(N
, false, 4, DOpcodes
, QOpcodes0
, QOpcodes1
);
3835 case Intrinsic::arm_neon_vld2dup
: {
3836 static const uint16_t DOpcodes
[] = { ARM::VLD2DUPd8
, ARM::VLD2DUPd16
,
3837 ARM::VLD2DUPd32
, ARM::VLD1q64
};
3838 static const uint16_t QOpcodes0
[] = { ARM::VLD2DUPq8EvenPseudo
,
3839 ARM::VLD2DUPq16EvenPseudo
,
3840 ARM::VLD2DUPq32EvenPseudo
};
3841 static const uint16_t QOpcodes1
[] = { ARM::VLD2DUPq8OddPseudo
,
3842 ARM::VLD2DUPq16OddPseudo
,
3843 ARM::VLD2DUPq32OddPseudo
};
3844 SelectVLDDup(N
, /* IsIntrinsic= */ true, false, 2,
3845 DOpcodes
, QOpcodes0
, QOpcodes1
);
3849 case Intrinsic::arm_neon_vld3dup
: {
3850 static const uint16_t DOpcodes
[] = { ARM::VLD3DUPd8Pseudo
,
3851 ARM::VLD3DUPd16Pseudo
,
3852 ARM::VLD3DUPd32Pseudo
,
3853 ARM::VLD1d64TPseudo
};
3854 static const uint16_t QOpcodes0
[] = { ARM::VLD3DUPq8EvenPseudo
,
3855 ARM::VLD3DUPq16EvenPseudo
,
3856 ARM::VLD3DUPq32EvenPseudo
};
3857 static const uint16_t QOpcodes1
[] = { ARM::VLD3DUPq8OddPseudo
,
3858 ARM::VLD3DUPq16OddPseudo
,
3859 ARM::VLD3DUPq32OddPseudo
};
3860 SelectVLDDup(N
, /* IsIntrinsic= */ true, false, 3,
3861 DOpcodes
, QOpcodes0
, QOpcodes1
);
3865 case Intrinsic::arm_neon_vld4dup
: {
3866 static const uint16_t DOpcodes
[] = { ARM::VLD4DUPd8Pseudo
,
3867 ARM::VLD4DUPd16Pseudo
,
3868 ARM::VLD4DUPd32Pseudo
,
3869 ARM::VLD1d64QPseudo
};
3870 static const uint16_t QOpcodes0
[] = { ARM::VLD4DUPq8EvenPseudo
,
3871 ARM::VLD4DUPq16EvenPseudo
,
3872 ARM::VLD4DUPq32EvenPseudo
};
3873 static const uint16_t QOpcodes1
[] = { ARM::VLD4DUPq8OddPseudo
,
3874 ARM::VLD4DUPq16OddPseudo
,
3875 ARM::VLD4DUPq32OddPseudo
};
3876 SelectVLDDup(N
, /* IsIntrinsic= */ true, false, 4,
3877 DOpcodes
, QOpcodes0
, QOpcodes1
);
3881 case Intrinsic::arm_neon_vld2lane
: {
3882 static const uint16_t DOpcodes
[] = { ARM::VLD2LNd8Pseudo
,
3883 ARM::VLD2LNd16Pseudo
,
3884 ARM::VLD2LNd32Pseudo
};
3885 static const uint16_t QOpcodes
[] = { ARM::VLD2LNq16Pseudo
,
3886 ARM::VLD2LNq32Pseudo
};
3887 SelectVLDSTLane(N
, true, false, 2, DOpcodes
, QOpcodes
);
3891 case Intrinsic::arm_neon_vld3lane
: {
3892 static const uint16_t DOpcodes
[] = { ARM::VLD3LNd8Pseudo
,
3893 ARM::VLD3LNd16Pseudo
,
3894 ARM::VLD3LNd32Pseudo
};
3895 static const uint16_t QOpcodes
[] = { ARM::VLD3LNq16Pseudo
,
3896 ARM::VLD3LNq32Pseudo
};
3897 SelectVLDSTLane(N
, true, false, 3, DOpcodes
, QOpcodes
);
3901 case Intrinsic::arm_neon_vld4lane
: {
3902 static const uint16_t DOpcodes
[] = { ARM::VLD4LNd8Pseudo
,
3903 ARM::VLD4LNd16Pseudo
,
3904 ARM::VLD4LNd32Pseudo
};
3905 static const uint16_t QOpcodes
[] = { ARM::VLD4LNq16Pseudo
,
3906 ARM::VLD4LNq32Pseudo
};
3907 SelectVLDSTLane(N
, true, false, 4, DOpcodes
, QOpcodes
);
3911 case Intrinsic::arm_neon_vst1
: {
3912 static const uint16_t DOpcodes
[] = { ARM::VST1d8
, ARM::VST1d16
,
3913 ARM::VST1d32
, ARM::VST1d64
};
3914 static const uint16_t QOpcodes
[] = { ARM::VST1q8
, ARM::VST1q16
,
3915 ARM::VST1q32
, ARM::VST1q64
};
3916 SelectVST(N
, false, 1, DOpcodes
, QOpcodes
, nullptr);
3920 case Intrinsic::arm_neon_vst1x2
: {
3921 static const uint16_t DOpcodes
[] = { ARM::VST1q8
, ARM::VST1q16
,
3922 ARM::VST1q32
, ARM::VST1q64
};
3923 static const uint16_t QOpcodes
[] = { ARM::VST1d8QPseudo
,
3924 ARM::VST1d16QPseudo
,
3925 ARM::VST1d32QPseudo
,
3926 ARM::VST1d64QPseudo
};
3927 SelectVST(N
, false, 2, DOpcodes
, QOpcodes
, nullptr);
3931 case Intrinsic::arm_neon_vst1x3
: {
3932 static const uint16_t DOpcodes
[] = { ARM::VST1d8TPseudo
,
3933 ARM::VST1d16TPseudo
,
3934 ARM::VST1d32TPseudo
,
3935 ARM::VST1d64TPseudo
};
3936 static const uint16_t QOpcodes0
[] = { ARM::VST1q8LowTPseudo_UPD
,
3937 ARM::VST1q16LowTPseudo_UPD
,
3938 ARM::VST1q32LowTPseudo_UPD
,
3939 ARM::VST1q64LowTPseudo_UPD
};
3940 static const uint16_t QOpcodes1
[] = { ARM::VST1q8HighTPseudo
,
3941 ARM::VST1q16HighTPseudo
,
3942 ARM::VST1q32HighTPseudo
,
3943 ARM::VST1q64HighTPseudo
};
3944 SelectVST(N
, false, 3, DOpcodes
, QOpcodes0
, QOpcodes1
);
3948 case Intrinsic::arm_neon_vst1x4
: {
3949 static const uint16_t DOpcodes
[] = { ARM::VST1d8QPseudo
,
3950 ARM::VST1d16QPseudo
,
3951 ARM::VST1d32QPseudo
,
3952 ARM::VST1d64QPseudo
};
3953 static const uint16_t QOpcodes0
[] = { ARM::VST1q8LowQPseudo_UPD
,
3954 ARM::VST1q16LowQPseudo_UPD
,
3955 ARM::VST1q32LowQPseudo_UPD
,
3956 ARM::VST1q64LowQPseudo_UPD
};
3957 static const uint16_t QOpcodes1
[] = { ARM::VST1q8HighQPseudo
,
3958 ARM::VST1q16HighQPseudo
,
3959 ARM::VST1q32HighQPseudo
,
3960 ARM::VST1q64HighQPseudo
};
3961 SelectVST(N
, false, 4, DOpcodes
, QOpcodes0
, QOpcodes1
);
3965 case Intrinsic::arm_neon_vst2
: {
3966 static const uint16_t DOpcodes
[] = { ARM::VST2d8
, ARM::VST2d16
,
3967 ARM::VST2d32
, ARM::VST1q64
};
3968 static const uint16_t QOpcodes
[] = { ARM::VST2q8Pseudo
, ARM::VST2q16Pseudo
,
3969 ARM::VST2q32Pseudo
};
3970 SelectVST(N
, false, 2, DOpcodes
, QOpcodes
, nullptr);
3974 case Intrinsic::arm_neon_vst3
: {
3975 static const uint16_t DOpcodes
[] = { ARM::VST3d8Pseudo
,
3978 ARM::VST1d64TPseudo
};
3979 static const uint16_t QOpcodes0
[] = { ARM::VST3q8Pseudo_UPD
,
3980 ARM::VST3q16Pseudo_UPD
,
3981 ARM::VST3q32Pseudo_UPD
};
3982 static const uint16_t QOpcodes1
[] = { ARM::VST3q8oddPseudo
,
3983 ARM::VST3q16oddPseudo
,
3984 ARM::VST3q32oddPseudo
};
3985 SelectVST(N
, false, 3, DOpcodes
, QOpcodes0
, QOpcodes1
);
3989 case Intrinsic::arm_neon_vst4
: {
3990 static const uint16_t DOpcodes
[] = { ARM::VST4d8Pseudo
,
3993 ARM::VST1d64QPseudo
};
3994 static const uint16_t QOpcodes0
[] = { ARM::VST4q8Pseudo_UPD
,
3995 ARM::VST4q16Pseudo_UPD
,
3996 ARM::VST4q32Pseudo_UPD
};
3997 static const uint16_t QOpcodes1
[] = { ARM::VST4q8oddPseudo
,
3998 ARM::VST4q16oddPseudo
,
3999 ARM::VST4q32oddPseudo
};
4000 SelectVST(N
, false, 4, DOpcodes
, QOpcodes0
, QOpcodes1
);
4004 case Intrinsic::arm_neon_vst2lane
: {
4005 static const uint16_t DOpcodes
[] = { ARM::VST2LNd8Pseudo
,
4006 ARM::VST2LNd16Pseudo
,
4007 ARM::VST2LNd32Pseudo
};
4008 static const uint16_t QOpcodes
[] = { ARM::VST2LNq16Pseudo
,
4009 ARM::VST2LNq32Pseudo
};
4010 SelectVLDSTLane(N
, false, false, 2, DOpcodes
, QOpcodes
);
4014 case Intrinsic::arm_neon_vst3lane
: {
4015 static const uint16_t DOpcodes
[] = { ARM::VST3LNd8Pseudo
,
4016 ARM::VST3LNd16Pseudo
,
4017 ARM::VST3LNd32Pseudo
};
4018 static const uint16_t QOpcodes
[] = { ARM::VST3LNq16Pseudo
,
4019 ARM::VST3LNq32Pseudo
};
4020 SelectVLDSTLane(N
, false, false, 3, DOpcodes
, QOpcodes
);
4024 case Intrinsic::arm_neon_vst4lane
: {
4025 static const uint16_t DOpcodes
[] = { ARM::VST4LNd8Pseudo
,
4026 ARM::VST4LNd16Pseudo
,
4027 ARM::VST4LNd32Pseudo
};
4028 static const uint16_t QOpcodes
[] = { ARM::VST4LNq16Pseudo
,
4029 ARM::VST4LNq32Pseudo
};
4030 SelectVLDSTLane(N
, false, false, 4, DOpcodes
, QOpcodes
);
4037 case ISD::ATOMIC_CMP_SWAP
:
4045 // Inspect a register string of the form
4046 // cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or
4047 // cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string
4048 // and obtain the integer operands from them, adding these operands to the
4050 static void getIntOperandsFromRegisterString(StringRef RegString
,
4051 SelectionDAG
*CurDAG
,
4053 std::vector
<SDValue
> &Ops
) {
4054 SmallVector
<StringRef
, 5> Fields
;
4055 RegString
.split(Fields
, ':');
4057 if (Fields
.size() > 1) {
4058 bool AllIntFields
= true;
4060 for (StringRef Field
: Fields
) {
4061 // Need to trim out leading 'cp' characters and get the integer field.
4063 AllIntFields
&= !Field
.trim("CPcp").getAsInteger(10, IntField
);
4064 Ops
.push_back(CurDAG
->getTargetConstant(IntField
, DL
, MVT::i32
));
4067 assert(AllIntFields
&&
4068 "Unexpected non-integer value in special register string.");
4072 // Maps a Banked Register string to its mask value. The mask value returned is
4073 // for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register
4074 // mask operand, which expresses which register is to be used, e.g. r8, and in
4075 // which mode it is to be used, e.g. usr. Returns -1 to signify that the string
4077 static inline int getBankedRegisterMask(StringRef RegString
) {
4078 auto TheReg
= ARMBankedReg::lookupBankedRegByName(RegString
.lower());
4081 return TheReg
->Encoding
;
4084 // The flags here are common to those allowed for apsr in the A class cores and
4085 // those allowed for the special registers in the M class cores. Returns a
4086 // value representing which flags were present, -1 if invalid.
4087 static inline int getMClassFlagsMask(StringRef Flags
) {
4088 return StringSwitch
<int>(Flags
)
4089 .Case("", 0x2) // no flags means nzcvq for psr registers, and 0x2 is
4090 // correct when flags are not permitted
4093 .Case("nzcvqg", 0x3)
4097 // Maps MClass special registers string to its value for use in the
4098 // t2MRS_M/t2MSR_M instruction nodes as the SYSm value operand.
4099 // Returns -1 to signify that the string was invalid.
4100 static int getMClassRegisterMask(StringRef Reg
, const ARMSubtarget
*Subtarget
) {
4101 auto TheReg
= ARMSysReg::lookupMClassSysRegByName(Reg
);
4102 const FeatureBitset
&FeatureBits
= Subtarget
->getFeatureBits();
4103 if (!TheReg
|| !TheReg
->hasRequiredFeatures(FeatureBits
))
4105 return (int)(TheReg
->Encoding
& 0xFFF); // SYSm value
4108 static int getARClassRegisterMask(StringRef Reg
, StringRef Flags
) {
4109 // The mask operand contains the special register (R Bit) in bit 4, whether
4110 // the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and
4111 // bits 3-0 contains the fields to be accessed in the special register, set by
4112 // the flags provided with the register.
4114 if (Reg
== "apsr") {
4115 // The flags permitted for apsr are the same flags that are allowed in
4116 // M class registers. We get the flag value and then shift the flags into
4117 // the correct place to combine with the mask.
4118 Mask
= getMClassFlagsMask(Flags
);
4124 if (Reg
!= "cpsr" && Reg
!= "spsr") {
4128 // This is the same as if the flags were "fc"
4129 if (Flags
.empty() || Flags
== "all")
4132 // Inspect the supplied flags string and set the bits in the mask for
4133 // the relevant and valid flags allowed for cpsr and spsr.
4134 for (char Flag
: Flags
) {
4153 // This avoids allowing strings where the same flag bit appears twice.
4154 if (!FlagVal
|| (Mask
& FlagVal
))
4159 // If the register is spsr then we need to set the R bit.
4166 // Lower the read_register intrinsic to ARM specific DAG nodes
4167 // using the supplied metadata string to select the instruction node to use
4168 // and the registers/masks to construct as operands for the node.
4169 bool ARMDAGToDAGISel::tryReadRegister(SDNode
*N
){
4170 const MDNodeSDNode
*MD
= dyn_cast
<MDNodeSDNode
>(N
->getOperand(1));
4171 const MDString
*RegString
= dyn_cast
<MDString
>(MD
->getMD()->getOperand(0));
4172 bool IsThumb2
= Subtarget
->isThumb2();
4175 std::vector
<SDValue
> Ops
;
4176 getIntOperandsFromRegisterString(RegString
->getString(), CurDAG
, DL
, Ops
);
4179 // If the special register string was constructed of fields (as defined
4180 // in the ACLE) then need to lower to MRC node (32 bit) or
4181 // MRRC node(64 bit), we can make the distinction based on the number of
4182 // operands we have.
4184 SmallVector
<EVT
, 3> ResTypes
;
4185 if (Ops
.size() == 5){
4186 Opcode
= IsThumb2
? ARM::t2MRC
: ARM::MRC
;
4187 ResTypes
.append({ MVT::i32
, MVT::Other
});
4189 assert(Ops
.size() == 3 &&
4190 "Invalid number of fields in special register string.");
4191 Opcode
= IsThumb2
? ARM::t2MRRC
: ARM::MRRC
;
4192 ResTypes
.append({ MVT::i32
, MVT::i32
, MVT::Other
});
4195 Ops
.push_back(getAL(CurDAG
, DL
));
4196 Ops
.push_back(CurDAG
->getRegister(0, MVT::i32
));
4197 Ops
.push_back(N
->getOperand(0));
4198 ReplaceNode(N
, CurDAG
->getMachineNode(Opcode
, DL
, ResTypes
, Ops
));
4202 std::string SpecialReg
= RegString
->getString().lower();
4204 int BankedReg
= getBankedRegisterMask(SpecialReg
);
4205 if (BankedReg
!= -1) {
4206 Ops
= { CurDAG
->getTargetConstant(BankedReg
, DL
, MVT::i32
),
4207 getAL(CurDAG
, DL
), CurDAG
->getRegister(0, MVT::i32
),
4210 N
, CurDAG
->getMachineNode(IsThumb2
? ARM::t2MRSbanked
: ARM::MRSbanked
,
4211 DL
, MVT::i32
, MVT::Other
, Ops
));
4215 // The VFP registers are read by creating SelectionDAG nodes with opcodes
4216 // corresponding to the register that is being read from. So we switch on the
4217 // string to find which opcode we need to use.
4218 unsigned Opcode
= StringSwitch
<unsigned>(SpecialReg
)
4219 .Case("fpscr", ARM::VMRS
)
4220 .Case("fpexc", ARM::VMRS_FPEXC
)
4221 .Case("fpsid", ARM::VMRS_FPSID
)
4222 .Case("mvfr0", ARM::VMRS_MVFR0
)
4223 .Case("mvfr1", ARM::VMRS_MVFR1
)
4224 .Case("mvfr2", ARM::VMRS_MVFR2
)
4225 .Case("fpinst", ARM::VMRS_FPINST
)
4226 .Case("fpinst2", ARM::VMRS_FPINST2
)
4229 // If an opcode was found then we can lower the read to a VFP instruction.
4231 if (!Subtarget
->hasVFP2Base())
4233 if (Opcode
== ARM::VMRS_MVFR2
&& !Subtarget
->hasFPARMv8Base())
4236 Ops
= { getAL(CurDAG
, DL
), CurDAG
->getRegister(0, MVT::i32
),
4239 CurDAG
->getMachineNode(Opcode
, DL
, MVT::i32
, MVT::Other
, Ops
));
4243 // If the target is M Class then need to validate that the register string
4244 // is an acceptable value, so check that a mask can be constructed from the
4246 if (Subtarget
->isMClass()) {
4247 int SYSmValue
= getMClassRegisterMask(SpecialReg
, Subtarget
);
4248 if (SYSmValue
== -1)
4251 SDValue Ops
[] = { CurDAG
->getTargetConstant(SYSmValue
, DL
, MVT::i32
),
4252 getAL(CurDAG
, DL
), CurDAG
->getRegister(0, MVT::i32
),
4255 N
, CurDAG
->getMachineNode(ARM::t2MRS_M
, DL
, MVT::i32
, MVT::Other
, Ops
));
4259 // Here we know the target is not M Class so we need to check if it is one
4260 // of the remaining possible values which are apsr, cpsr or spsr.
4261 if (SpecialReg
== "apsr" || SpecialReg
== "cpsr") {
4262 Ops
= { getAL(CurDAG
, DL
), CurDAG
->getRegister(0, MVT::i32
),
4264 ReplaceNode(N
, CurDAG
->getMachineNode(IsThumb2
? ARM::t2MRS_AR
: ARM::MRS
,
4265 DL
, MVT::i32
, MVT::Other
, Ops
));
4269 if (SpecialReg
== "spsr") {
4270 Ops
= { getAL(CurDAG
, DL
), CurDAG
->getRegister(0, MVT::i32
),
4273 N
, CurDAG
->getMachineNode(IsThumb2
? ARM::t2MRSsys_AR
: ARM::MRSsys
, DL
,
4274 MVT::i32
, MVT::Other
, Ops
));
4281 // Lower the write_register intrinsic to ARM specific DAG nodes
4282 // using the supplied metadata string to select the instruction node to use
4283 // and the registers/masks to use in the nodes
4284 bool ARMDAGToDAGISel::tryWriteRegister(SDNode
*N
){
4285 const MDNodeSDNode
*MD
= dyn_cast
<MDNodeSDNode
>(N
->getOperand(1));
4286 const MDString
*RegString
= dyn_cast
<MDString
>(MD
->getMD()->getOperand(0));
4287 bool IsThumb2
= Subtarget
->isThumb2();
4290 std::vector
<SDValue
> Ops
;
4291 getIntOperandsFromRegisterString(RegString
->getString(), CurDAG
, DL
, Ops
);
4294 // If the special register string was constructed of fields (as defined
4295 // in the ACLE) then need to lower to MCR node (32 bit) or
4296 // MCRR node(64 bit), we can make the distinction based on the number of
4297 // operands we have.
4299 if (Ops
.size() == 5) {
4300 Opcode
= IsThumb2
? ARM::t2MCR
: ARM::MCR
;
4301 Ops
.insert(Ops
.begin()+2, N
->getOperand(2));
4303 assert(Ops
.size() == 3 &&
4304 "Invalid number of fields in special register string.");
4305 Opcode
= IsThumb2
? ARM::t2MCRR
: ARM::MCRR
;
4306 SDValue WriteValue
[] = { N
->getOperand(2), N
->getOperand(3) };
4307 Ops
.insert(Ops
.begin()+2, WriteValue
, WriteValue
+2);
4310 Ops
.push_back(getAL(CurDAG
, DL
));
4311 Ops
.push_back(CurDAG
->getRegister(0, MVT::i32
));
4312 Ops
.push_back(N
->getOperand(0));
4314 ReplaceNode(N
, CurDAG
->getMachineNode(Opcode
, DL
, MVT::Other
, Ops
));
4318 std::string SpecialReg
= RegString
->getString().lower();
4319 int BankedReg
= getBankedRegisterMask(SpecialReg
);
4320 if (BankedReg
!= -1) {
4321 Ops
= { CurDAG
->getTargetConstant(BankedReg
, DL
, MVT::i32
), N
->getOperand(2),
4322 getAL(CurDAG
, DL
), CurDAG
->getRegister(0, MVT::i32
),
4325 N
, CurDAG
->getMachineNode(IsThumb2
? ARM::t2MSRbanked
: ARM::MSRbanked
,
4326 DL
, MVT::Other
, Ops
));
4330 // The VFP registers are written to by creating SelectionDAG nodes with
4331 // opcodes corresponding to the register that is being written. So we switch
4332 // on the string to find which opcode we need to use.
4333 unsigned Opcode
= StringSwitch
<unsigned>(SpecialReg
)
4334 .Case("fpscr", ARM::VMSR
)
4335 .Case("fpexc", ARM::VMSR_FPEXC
)
4336 .Case("fpsid", ARM::VMSR_FPSID
)
4337 .Case("fpinst", ARM::VMSR_FPINST
)
4338 .Case("fpinst2", ARM::VMSR_FPINST2
)
4342 if (!Subtarget
->hasVFP2Base())
4344 Ops
= { N
->getOperand(2), getAL(CurDAG
, DL
),
4345 CurDAG
->getRegister(0, MVT::i32
), N
->getOperand(0) };
4346 ReplaceNode(N
, CurDAG
->getMachineNode(Opcode
, DL
, MVT::Other
, Ops
));
4350 std::pair
<StringRef
, StringRef
> Fields
;
4351 Fields
= StringRef(SpecialReg
).rsplit('_');
4352 std::string Reg
= Fields
.first
.str();
4353 StringRef Flags
= Fields
.second
;
4355 // If the target was M Class then need to validate the special register value
4356 // and retrieve the mask for use in the instruction node.
4357 if (Subtarget
->isMClass()) {
4358 int SYSmValue
= getMClassRegisterMask(SpecialReg
, Subtarget
);
4359 if (SYSmValue
== -1)
4362 SDValue Ops
[] = { CurDAG
->getTargetConstant(SYSmValue
, DL
, MVT::i32
),
4363 N
->getOperand(2), getAL(CurDAG
, DL
),
4364 CurDAG
->getRegister(0, MVT::i32
), N
->getOperand(0) };
4365 ReplaceNode(N
, CurDAG
->getMachineNode(ARM::t2MSR_M
, DL
, MVT::Other
, Ops
));
4369 // We then check to see if a valid mask can be constructed for one of the
4370 // register string values permitted for the A and R class cores. These values
4371 // are apsr, spsr and cpsr; these are also valid on older cores.
4372 int Mask
= getARClassRegisterMask(Reg
, Flags
);
4374 Ops
= { CurDAG
->getTargetConstant(Mask
, DL
, MVT::i32
), N
->getOperand(2),
4375 getAL(CurDAG
, DL
), CurDAG
->getRegister(0, MVT::i32
),
4377 ReplaceNode(N
, CurDAG
->getMachineNode(IsThumb2
? ARM::t2MSR_AR
: ARM::MSR
,
4378 DL
, MVT::Other
, Ops
));
4385 bool ARMDAGToDAGISel::tryInlineAsm(SDNode
*N
){
4386 std::vector
<SDValue
> AsmNodeOperands
;
4387 unsigned Flag
, Kind
;
4388 bool Changed
= false;
4389 unsigned NumOps
= N
->getNumOperands();
4391 // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint.
4392 // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require
4393 // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs
4394 // respectively. Since there is no constraint to explicitly specify a
4395 // reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb,
4396 // the 64-bit data may be referred by H, Q, R modifiers, so we still pack
4397 // them into a GPRPair.
4400 SDValue Glue
= N
->getGluedNode() ? N
->getOperand(NumOps
-1)
4401 : SDValue(nullptr,0);
4403 SmallVector
<bool, 8> OpChanged
;
4404 // Glue node will be appended late.
4405 for(unsigned i
= 0, e
= N
->getGluedNode() ? NumOps
- 1 : NumOps
; i
< e
; ++i
) {
4406 SDValue op
= N
->getOperand(i
);
4407 AsmNodeOperands
.push_back(op
);
4409 if (i
< InlineAsm::Op_FirstOperand
)
4412 if (ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(N
->getOperand(i
))) {
4413 Flag
= C
->getZExtValue();
4414 Kind
= InlineAsm::getKind(Flag
);
4419 // Immediate operands to inline asm in the SelectionDAG are modeled with
4420 // two operands. The first is a constant of value InlineAsm::Kind_Imm, and
4421 // the second is a constant with the value of the immediate. If we get here
4422 // and we have a Kind_Imm, skip the next operand, and continue.
4423 if (Kind
== InlineAsm::Kind_Imm
) {
4424 SDValue op
= N
->getOperand(++i
);
4425 AsmNodeOperands
.push_back(op
);
4429 unsigned NumRegs
= InlineAsm::getNumOperandRegisters(Flag
);
4431 OpChanged
.push_back(false);
4433 unsigned DefIdx
= 0;
4434 bool IsTiedToChangedOp
= false;
4435 // If it's a use that is tied with a previous def, it has no
4436 // reg class constraint.
4437 if (Changed
&& InlineAsm::isUseOperandTiedToDef(Flag
, DefIdx
))
4438 IsTiedToChangedOp
= OpChanged
[DefIdx
];
4440 // Memory operands to inline asm in the SelectionDAG are modeled with two
4441 // operands: a constant of value InlineAsm::Kind_Mem followed by the input
4442 // operand. If we get here and we have a Kind_Mem, skip the next operand (so
4443 // it doesn't get misinterpreted), and continue. We do this here because
4444 // it's important to update the OpChanged array correctly before moving on.
4445 if (Kind
== InlineAsm::Kind_Mem
) {
4446 SDValue op
= N
->getOperand(++i
);
4447 AsmNodeOperands
.push_back(op
);
4451 if (Kind
!= InlineAsm::Kind_RegUse
&& Kind
!= InlineAsm::Kind_RegDef
4452 && Kind
!= InlineAsm::Kind_RegDefEarlyClobber
)
4456 bool HasRC
= InlineAsm::hasRegClassConstraint(Flag
, RC
);
4457 if ((!IsTiedToChangedOp
&& (!HasRC
|| RC
!= ARM::GPRRegClassID
))
4461 assert((i
+2 < NumOps
) && "Invalid number of operands in inline asm");
4462 SDValue V0
= N
->getOperand(i
+1);
4463 SDValue V1
= N
->getOperand(i
+2);
4464 unsigned Reg0
= cast
<RegisterSDNode
>(V0
)->getReg();
4465 unsigned Reg1
= cast
<RegisterSDNode
>(V1
)->getReg();
4467 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
4469 if (Kind
== InlineAsm::Kind_RegDef
||
4470 Kind
== InlineAsm::Kind_RegDefEarlyClobber
) {
4471 // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to
4472 // the original GPRs.
4474 Register GPVR
= MRI
.createVirtualRegister(&ARM::GPRPairRegClass
);
4475 PairedReg
= CurDAG
->getRegister(GPVR
, MVT::Untyped
);
4476 SDValue Chain
= SDValue(N
,0);
4478 SDNode
*GU
= N
->getGluedUser();
4479 SDValue RegCopy
= CurDAG
->getCopyFromReg(Chain
, dl
, GPVR
, MVT::Untyped
,
4482 // Extract values from a GPRPair reg and copy to the original GPR reg.
4483 SDValue Sub0
= CurDAG
->getTargetExtractSubreg(ARM::gsub_0
, dl
, MVT::i32
,
4485 SDValue Sub1
= CurDAG
->getTargetExtractSubreg(ARM::gsub_1
, dl
, MVT::i32
,
4487 SDValue T0
= CurDAG
->getCopyToReg(Sub0
, dl
, Reg0
, Sub0
,
4488 RegCopy
.getValue(1));
4489 SDValue T1
= CurDAG
->getCopyToReg(Sub1
, dl
, Reg1
, Sub1
, T0
.getValue(1));
4491 // Update the original glue user.
4492 std::vector
<SDValue
> Ops(GU
->op_begin(), GU
->op_end()-1);
4493 Ops
.push_back(T1
.getValue(1));
4494 CurDAG
->UpdateNodeOperands(GU
, Ops
);
4497 // For Kind == InlineAsm::Kind_RegUse, we first copy two GPRs into a
4498 // GPRPair and then pass the GPRPair to the inline asm.
4499 SDValue Chain
= AsmNodeOperands
[InlineAsm::Op_InputChain
];
4501 // As REG_SEQ doesn't take RegisterSDNode, we copy them first.
4502 SDValue T0
= CurDAG
->getCopyFromReg(Chain
, dl
, Reg0
, MVT::i32
,
4504 SDValue T1
= CurDAG
->getCopyFromReg(Chain
, dl
, Reg1
, MVT::i32
,
4506 SDValue Pair
= SDValue(createGPRPairNode(MVT::Untyped
, T0
, T1
), 0);
4508 // Copy REG_SEQ into a GPRPair-typed VR and replace the original two
4509 // i32 VRs of inline asm with it.
4510 Register GPVR
= MRI
.createVirtualRegister(&ARM::GPRPairRegClass
);
4511 PairedReg
= CurDAG
->getRegister(GPVR
, MVT::Untyped
);
4512 Chain
= CurDAG
->getCopyToReg(T1
, dl
, GPVR
, Pair
, T1
.getValue(1));
4514 AsmNodeOperands
[InlineAsm::Op_InputChain
] = Chain
;
4515 Glue
= Chain
.getValue(1);
4520 if(PairedReg
.getNode()) {
4521 OpChanged
[OpChanged
.size() -1 ] = true;
4522 Flag
= InlineAsm::getFlagWord(Kind
, 1 /* RegNum*/);
4523 if (IsTiedToChangedOp
)
4524 Flag
= InlineAsm::getFlagWordForMatchingOp(Flag
, DefIdx
);
4526 Flag
= InlineAsm::getFlagWordForRegClass(Flag
, ARM::GPRPairRegClassID
);
4527 // Replace the current flag.
4528 AsmNodeOperands
[AsmNodeOperands
.size() -1] = CurDAG
->getTargetConstant(
4529 Flag
, dl
, MVT::i32
);
4530 // Add the new register node and skip the original two GPRs.
4531 AsmNodeOperands
.push_back(PairedReg
);
4532 // Skip the next two GPRs.
4538 AsmNodeOperands
.push_back(Glue
);
4542 SDValue New
= CurDAG
->getNode(N
->getOpcode(), SDLoc(N
),
4543 CurDAG
->getVTList(MVT::Other
, MVT::Glue
), AsmNodeOperands
);
4545 ReplaceNode(N
, New
.getNode());
4550 bool ARMDAGToDAGISel::
4551 SelectInlineAsmMemoryOperand(const SDValue
&Op
, unsigned ConstraintID
,
4552 std::vector
<SDValue
> &OutOps
) {
4553 switch(ConstraintID
) {
4555 llvm_unreachable("Unexpected asm memory constraint");
4556 case InlineAsm::Constraint_i
:
4557 // FIXME: It seems strange that 'i' is needed here since it's supposed to
4558 // be an immediate and not a memory constraint.
4560 case InlineAsm::Constraint_m
:
4561 case InlineAsm::Constraint_o
:
4562 case InlineAsm::Constraint_Q
:
4563 case InlineAsm::Constraint_Um
:
4564 case InlineAsm::Constraint_Un
:
4565 case InlineAsm::Constraint_Uq
:
4566 case InlineAsm::Constraint_Us
:
4567 case InlineAsm::Constraint_Ut
:
4568 case InlineAsm::Constraint_Uv
:
4569 case InlineAsm::Constraint_Uy
:
4570 // Require the address to be in a register. That is safe for all ARM
4571 // variants and it is hard to do anything much smarter without knowing
4572 // how the operand is used.
4573 OutOps
.push_back(Op
);
4579 /// createARMISelDag - This pass converts a legalized DAG into a
4580 /// ARM-specific DAG, ready for instruction scheduling.
4582 FunctionPass
*llvm::createARMISelDag(ARMBaseTargetMachine
&TM
,
4583 CodeGenOpt::Level OptLevel
) {
4584 return new ARMDAGToDAGISel(TM
, OptLevel
);