1 //===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file defines an instruction selector for the ARM target.
11 //===----------------------------------------------------------------------===//
14 #include "ARMBaseInstrInfo.h"
15 #include "ARMTargetMachine.h"
16 #include "MCTargetDesc/ARMAddressingModes.h"
17 #include "Utils/ARMBaseInfo.h"
18 #include "llvm/ADT/StringSwitch.h"
19 #include "llvm/CodeGen/MachineFrameInfo.h"
20 #include "llvm/CodeGen/MachineFunction.h"
21 #include "llvm/CodeGen/MachineInstrBuilder.h"
22 #include "llvm/CodeGen/MachineRegisterInfo.h"
23 #include "llvm/CodeGen/SelectionDAG.h"
24 #include "llvm/CodeGen/SelectionDAGISel.h"
25 #include "llvm/CodeGen/TargetLowering.h"
26 #include "llvm/IR/CallingConv.h"
27 #include "llvm/IR/Constants.h"
28 #include "llvm/IR/DerivedTypes.h"
29 #include "llvm/IR/Function.h"
30 #include "llvm/IR/Intrinsics.h"
31 #include "llvm/IR/LLVMContext.h"
32 #include "llvm/Support/CommandLine.h"
33 #include "llvm/Support/Debug.h"
34 #include "llvm/Support/ErrorHandling.h"
35 #include "llvm/Target/TargetOptions.h"
39 #define DEBUG_TYPE "arm-isel"
42 DisableShifterOp("disable-shifter-op", cl::Hidden
,
43 cl::desc("Disable isel of shifter-op"),
46 //===--------------------------------------------------------------------===//
47 /// ARMDAGToDAGISel - ARM specific code to select ARM machine
48 /// instructions for SelectionDAG operations.
52 class ARMDAGToDAGISel
: public SelectionDAGISel
{
53 /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
54 /// make the right decision when generating code for different targets.
55 const ARMSubtarget
*Subtarget
;
58 explicit ARMDAGToDAGISel(ARMBaseTargetMachine
&tm
, CodeGenOpt::Level OptLevel
)
59 : SelectionDAGISel(tm
, OptLevel
) {}
61 bool runOnMachineFunction(MachineFunction
&MF
) override
{
62 // Reset the subtarget each time through.
63 Subtarget
= &MF
.getSubtarget
<ARMSubtarget
>();
64 SelectionDAGISel::runOnMachineFunction(MF
);
68 StringRef
getPassName() const override
{ return "ARM Instruction Selection"; }
70 void PreprocessISelDAG() override
;
72 /// getI32Imm - Return a target constant of type i32 with the specified
74 inline SDValue
getI32Imm(unsigned Imm
, const SDLoc
&dl
) {
75 return CurDAG
->getTargetConstant(Imm
, dl
, MVT::i32
);
78 void Select(SDNode
*N
) override
;
80 bool hasNoVMLxHazardUse(SDNode
*N
) const;
81 bool isShifterOpProfitable(const SDValue
&Shift
,
82 ARM_AM::ShiftOpc ShOpcVal
, unsigned ShAmt
);
83 bool SelectRegShifterOperand(SDValue N
, SDValue
&A
,
84 SDValue
&B
, SDValue
&C
,
85 bool CheckProfitability
= true);
86 bool SelectImmShifterOperand(SDValue N
, SDValue
&A
,
87 SDValue
&B
, bool CheckProfitability
= true);
88 bool SelectShiftRegShifterOperand(SDValue N
, SDValue
&A
,
89 SDValue
&B
, SDValue
&C
) {
90 // Don't apply the profitability check
91 return SelectRegShifterOperand(N
, A
, B
, C
, false);
93 bool SelectShiftImmShifterOperand(SDValue N
, SDValue
&A
,
95 // Don't apply the profitability check
96 return SelectImmShifterOperand(N
, A
, B
, false);
99 bool SelectAddLikeOr(SDNode
*Parent
, SDValue N
, SDValue
&Out
);
101 bool SelectAddrModeImm12(SDValue N
, SDValue
&Base
, SDValue
&OffImm
);
102 bool SelectLdStSOReg(SDValue N
, SDValue
&Base
, SDValue
&Offset
, SDValue
&Opc
);
104 bool SelectCMOVPred(SDValue N
, SDValue
&Pred
, SDValue
&Reg
) {
105 const ConstantSDNode
*CN
= cast
<ConstantSDNode
>(N
);
106 Pred
= CurDAG
->getTargetConstant(CN
->getZExtValue(), SDLoc(N
), MVT::i32
);
107 Reg
= CurDAG
->getRegister(ARM::CPSR
, MVT::i32
);
111 bool SelectAddrMode2OffsetReg(SDNode
*Op
, SDValue N
,
112 SDValue
&Offset
, SDValue
&Opc
);
113 bool SelectAddrMode2OffsetImm(SDNode
*Op
, SDValue N
,
114 SDValue
&Offset
, SDValue
&Opc
);
115 bool SelectAddrMode2OffsetImmPre(SDNode
*Op
, SDValue N
,
116 SDValue
&Offset
, SDValue
&Opc
);
117 bool SelectAddrOffsetNone(SDValue N
, SDValue
&Base
);
118 bool SelectAddrMode3(SDValue N
, SDValue
&Base
,
119 SDValue
&Offset
, SDValue
&Opc
);
120 bool SelectAddrMode3Offset(SDNode
*Op
, SDValue N
,
121 SDValue
&Offset
, SDValue
&Opc
);
122 bool IsAddressingMode5(SDValue N
, SDValue
&Base
, SDValue
&Offset
, bool FP16
);
123 bool SelectAddrMode5(SDValue N
, SDValue
&Base
, SDValue
&Offset
);
124 bool SelectAddrMode5FP16(SDValue N
, SDValue
&Base
, SDValue
&Offset
);
125 bool SelectAddrMode6(SDNode
*Parent
, SDValue N
, SDValue
&Addr
,SDValue
&Align
);
126 bool SelectAddrMode6Offset(SDNode
*Op
, SDValue N
, SDValue
&Offset
);
128 bool SelectAddrModePC(SDValue N
, SDValue
&Offset
, SDValue
&Label
);
130 // Thumb Addressing Modes:
131 bool SelectThumbAddrModeRR(SDValue N
, SDValue
&Base
, SDValue
&Offset
);
132 bool SelectThumbAddrModeRRSext(SDValue N
, SDValue
&Base
, SDValue
&Offset
);
133 bool SelectThumbAddrModeImm5S(SDValue N
, unsigned Scale
, SDValue
&Base
,
135 bool SelectThumbAddrModeImm5S1(SDValue N
, SDValue
&Base
,
137 bool SelectThumbAddrModeImm5S2(SDValue N
, SDValue
&Base
,
139 bool SelectThumbAddrModeImm5S4(SDValue N
, SDValue
&Base
,
141 bool SelectThumbAddrModeSP(SDValue N
, SDValue
&Base
, SDValue
&OffImm
);
143 // Thumb 2 Addressing Modes:
144 bool SelectT2AddrModeImm12(SDValue N
, SDValue
&Base
, SDValue
&OffImm
);
145 bool SelectT2AddrModeImm8(SDValue N
, SDValue
&Base
,
147 bool SelectT2AddrModeImm8Offset(SDNode
*Op
, SDValue N
,
149 bool SelectT2AddrModeSoReg(SDValue N
, SDValue
&Base
,
150 SDValue
&OffReg
, SDValue
&ShImm
);
151 bool SelectT2AddrModeExclusive(SDValue N
, SDValue
&Base
, SDValue
&OffImm
);
153 inline bool is_so_imm(unsigned Imm
) const {
154 return ARM_AM::getSOImmVal(Imm
) != -1;
157 inline bool is_so_imm_not(unsigned Imm
) const {
158 return ARM_AM::getSOImmVal(~Imm
) != -1;
161 inline bool is_t2_so_imm(unsigned Imm
) const {
162 return ARM_AM::getT2SOImmVal(Imm
) != -1;
165 inline bool is_t2_so_imm_not(unsigned Imm
) const {
166 return ARM_AM::getT2SOImmVal(~Imm
) != -1;
169 // Include the pieces autogenerated from the target description.
170 #include "ARMGenDAGISel.inc"
173 void transferMemOperands(SDNode
*Src
, SDNode
*Dst
);
175 /// Indexed (pre/post inc/dec) load matching code for ARM.
176 bool tryARMIndexedLoad(SDNode
*N
);
177 bool tryT1IndexedLoad(SDNode
*N
);
178 bool tryT2IndexedLoad(SDNode
*N
);
180 /// SelectVLD - Select NEON load intrinsics. NumVecs should be
181 /// 1, 2, 3 or 4. The opcode arrays specify the instructions used for
182 /// loads of D registers and even subregs and odd subregs of Q registers.
183 /// For NumVecs <= 2, QOpcodes1 is not used.
184 void SelectVLD(SDNode
*N
, bool isUpdating
, unsigned NumVecs
,
185 const uint16_t *DOpcodes
, const uint16_t *QOpcodes0
,
186 const uint16_t *QOpcodes1
);
188 /// SelectVST - Select NEON store intrinsics. NumVecs should
189 /// be 1, 2, 3 or 4. The opcode arrays specify the instructions used for
190 /// stores of D registers and even subregs and odd subregs of Q registers.
191 /// For NumVecs <= 2, QOpcodes1 is not used.
192 void SelectVST(SDNode
*N
, bool isUpdating
, unsigned NumVecs
,
193 const uint16_t *DOpcodes
, const uint16_t *QOpcodes0
,
194 const uint16_t *QOpcodes1
);
196 /// SelectVLDSTLane - Select NEON load/store lane intrinsics. NumVecs should
197 /// be 2, 3 or 4. The opcode arrays specify the instructions used for
198 /// load/store of D registers and Q registers.
199 void SelectVLDSTLane(SDNode
*N
, bool IsLoad
, bool isUpdating
,
200 unsigned NumVecs
, const uint16_t *DOpcodes
,
201 const uint16_t *QOpcodes
);
203 /// SelectVLDDup - Select NEON load-duplicate intrinsics. NumVecs
204 /// should be 1, 2, 3 or 4. The opcode array specifies the instructions used
205 /// for loading D registers.
206 void SelectVLDDup(SDNode
*N
, bool IsIntrinsic
, bool isUpdating
,
207 unsigned NumVecs
, const uint16_t *DOpcodes
,
208 const uint16_t *QOpcodes0
= nullptr,
209 const uint16_t *QOpcodes1
= nullptr);
211 /// Try to select SBFX/UBFX instructions for ARM.
212 bool tryV6T2BitfieldExtractOp(SDNode
*N
, bool isSigned
);
214 // Select special operations if node forms integer ABS pattern
215 bool tryABSOp(SDNode
*N
);
217 bool tryReadRegister(SDNode
*N
);
218 bool tryWriteRegister(SDNode
*N
);
220 bool tryInlineAsm(SDNode
*N
);
222 void SelectCMPZ(SDNode
*N
, bool &SwitchEQNEToPLMI
);
224 void SelectCMP_SWAP(SDNode
*N
);
226 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
227 /// inline asm expressions.
228 bool SelectInlineAsmMemoryOperand(const SDValue
&Op
, unsigned ConstraintID
,
229 std::vector
<SDValue
> &OutOps
) override
;
231 // Form pairs of consecutive R, S, D, or Q registers.
232 SDNode
*createGPRPairNode(EVT VT
, SDValue V0
, SDValue V1
);
233 SDNode
*createSRegPairNode(EVT VT
, SDValue V0
, SDValue V1
);
234 SDNode
*createDRegPairNode(EVT VT
, SDValue V0
, SDValue V1
);
235 SDNode
*createQRegPairNode(EVT VT
, SDValue V0
, SDValue V1
);
237 // Form sequences of 4 consecutive S, D, or Q registers.
238 SDNode
*createQuadSRegsNode(EVT VT
, SDValue V0
, SDValue V1
, SDValue V2
, SDValue V3
);
239 SDNode
*createQuadDRegsNode(EVT VT
, SDValue V0
, SDValue V1
, SDValue V2
, SDValue V3
);
240 SDNode
*createQuadQRegsNode(EVT VT
, SDValue V0
, SDValue V1
, SDValue V2
, SDValue V3
);
242 // Get the alignment operand for a NEON VLD or VST instruction.
243 SDValue
GetVLDSTAlign(SDValue Align
, const SDLoc
&dl
, unsigned NumVecs
,
246 /// Returns the number of instructions required to materialize the given
247 /// constant in a register, or 3 if a literal pool load is needed.
248 unsigned ConstantMaterializationCost(unsigned Val
) const;
250 /// Checks if N is a multiplication by a constant where we can extract out a
251 /// power of two from the constant so that it can be used in a shift, but only
252 /// if it simplifies the materialization of the constant. Returns true if it
253 /// is, and assigns to PowerOfTwo the power of two that should be extracted
254 /// out and to NewMulConst the new constant to be multiplied by.
255 bool canExtractShiftFromMul(const SDValue
&N
, unsigned MaxShift
,
256 unsigned &PowerOfTwo
, SDValue
&NewMulConst
) const;
258 /// Replace N with M in CurDAG, in a way that also ensures that M gets
259 /// selected when N would have been selected.
260 void replaceDAGValue(const SDValue
&N
, SDValue M
);
264 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant
265 /// operand. If so Imm will receive the 32-bit value.
266 static bool isInt32Immediate(SDNode
*N
, unsigned &Imm
) {
267 if (N
->getOpcode() == ISD::Constant
&& N
->getValueType(0) == MVT::i32
) {
268 Imm
= cast
<ConstantSDNode
>(N
)->getZExtValue();
274 // isInt32Immediate - This method tests to see if a constant operand.
275 // If so Imm will receive the 32 bit value.
276 static bool isInt32Immediate(SDValue N
, unsigned &Imm
) {
277 return isInt32Immediate(N
.getNode(), Imm
);
280 // isOpcWithIntImmediate - This method tests to see if the node is a specific
281 // opcode and that it has a immediate integer right operand.
282 // If so Imm will receive the 32 bit value.
283 static bool isOpcWithIntImmediate(SDNode
*N
, unsigned Opc
, unsigned& Imm
) {
284 return N
->getOpcode() == Opc
&&
285 isInt32Immediate(N
->getOperand(1).getNode(), Imm
);
288 /// Check whether a particular node is a constant value representable as
289 /// (N * Scale) where (N in [\p RangeMin, \p RangeMax).
291 /// \param ScaledConstant [out] - On success, the pre-scaled constant value.
292 static bool isScaledConstantInRange(SDValue Node
, int Scale
,
293 int RangeMin
, int RangeMax
,
294 int &ScaledConstant
) {
295 assert(Scale
> 0 && "Invalid scale!");
297 // Check that this is a constant.
298 const ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(Node
);
302 ScaledConstant
= (int) C
->getZExtValue();
303 if ((ScaledConstant
% Scale
) != 0)
306 ScaledConstant
/= Scale
;
307 return ScaledConstant
>= RangeMin
&& ScaledConstant
< RangeMax
;
310 void ARMDAGToDAGISel::PreprocessISelDAG() {
311 if (!Subtarget
->hasV6T2Ops())
314 bool isThumb2
= Subtarget
->isThumb();
315 for (SelectionDAG::allnodes_iterator I
= CurDAG
->allnodes_begin(),
316 E
= CurDAG
->allnodes_end(); I
!= E
; ) {
317 SDNode
*N
= &*I
++; // Preincrement iterator to avoid invalidation issues.
319 if (N
->getOpcode() != ISD::ADD
)
322 // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with
323 // leading zeros, followed by consecutive set bits, followed by 1 or 2
324 // trailing zeros, e.g. 1020.
325 // Transform the expression to
326 // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number
327 // of trailing zeros of c2. The left shift would be folded as an shifter
328 // operand of 'add' and the 'and' and 'srl' would become a bits extraction
331 SDValue N0
= N
->getOperand(0);
332 SDValue N1
= N
->getOperand(1);
333 unsigned And_imm
= 0;
334 if (!isOpcWithIntImmediate(N1
.getNode(), ISD::AND
, And_imm
)) {
335 if (isOpcWithIntImmediate(N0
.getNode(), ISD::AND
, And_imm
))
341 // Check if the AND mask is an immediate of the form: 000.....1111111100
342 unsigned TZ
= countTrailingZeros(And_imm
);
343 if (TZ
!= 1 && TZ
!= 2)
344 // Be conservative here. Shifter operands aren't always free. e.g. On
345 // Swift, left shifter operand of 1 / 2 for free but others are not.
347 // ubfx r3, r1, #16, #8
348 // ldr.w r3, [r0, r3, lsl #2]
351 // and.w r2, r9, r1, lsr #14
355 if (And_imm
& (And_imm
+ 1))
358 // Look for (and (srl X, c1), c2).
359 SDValue Srl
= N1
.getOperand(0);
360 unsigned Srl_imm
= 0;
361 if (!isOpcWithIntImmediate(Srl
.getNode(), ISD::SRL
, Srl_imm
) ||
365 // Make sure first operand is not a shifter operand which would prevent
366 // folding of the left shift.
371 if (SelectImmShifterOperand(N0
, CPTmp0
, CPTmp1
))
374 if (SelectImmShifterOperand(N0
, CPTmp0
, CPTmp1
) ||
375 SelectRegShifterOperand(N0
, CPTmp0
, CPTmp1
, CPTmp2
))
379 // Now make the transformation.
380 Srl
= CurDAG
->getNode(ISD::SRL
, SDLoc(Srl
), MVT::i32
,
382 CurDAG
->getConstant(Srl_imm
+ TZ
, SDLoc(Srl
),
384 N1
= CurDAG
->getNode(ISD::AND
, SDLoc(N1
), MVT::i32
,
386 CurDAG
->getConstant(And_imm
, SDLoc(Srl
), MVT::i32
));
387 N1
= CurDAG
->getNode(ISD::SHL
, SDLoc(N1
), MVT::i32
,
388 N1
, CurDAG
->getConstant(TZ
, SDLoc(Srl
), MVT::i32
));
389 CurDAG
->UpdateNodeOperands(N
, N0
, N1
);
393 /// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS
394 /// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at
395 /// least on current ARM implementations) which should be avoidded.
396 bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode
*N
) const {
397 if (OptLevel
== CodeGenOpt::None
)
400 if (!Subtarget
->hasVMLxHazards())
406 SDNode
*Use
= *N
->use_begin();
407 if (Use
->getOpcode() == ISD::CopyToReg
)
409 if (Use
->isMachineOpcode()) {
410 const ARMBaseInstrInfo
*TII
= static_cast<const ARMBaseInstrInfo
*>(
411 CurDAG
->getSubtarget().getInstrInfo());
413 const MCInstrDesc
&MCID
= TII
->get(Use
->getMachineOpcode());
416 unsigned Opcode
= MCID
.getOpcode();
417 if (Opcode
== ARM::VMOVRS
|| Opcode
== ARM::VMOVRRD
)
419 // vmlx feeding into another vmlx. We actually want to unfold
420 // the use later in the MLxExpansion pass. e.g.
422 // vmla (stall 8 cycles)
427 // This adds up to about 18 - 19 cycles.
430 // vmul (stall 4 cycles)
431 // vadd adds up to about 14 cycles.
432 return TII
->isFpMLxInstruction(Opcode
);
438 bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue
&Shift
,
439 ARM_AM::ShiftOpc ShOpcVal
,
441 if (!Subtarget
->isLikeA9() && !Subtarget
->isSwift())
443 if (Shift
.hasOneUse())
446 return ShOpcVal
== ARM_AM::lsl
&&
447 (ShAmt
== 2 || (Subtarget
->isSwift() && ShAmt
== 1));
450 unsigned ARMDAGToDAGISel::ConstantMaterializationCost(unsigned Val
) const {
451 if (Subtarget
->isThumb()) {
452 if (Val
<= 255) return 1; // MOV
453 if (Subtarget
->hasV6T2Ops() &&
454 (Val
<= 0xffff || // MOV
455 ARM_AM::getT2SOImmVal(Val
) != -1 || // MOVW
456 ARM_AM::getT2SOImmVal(~Val
) != -1)) // MVN
458 if (Val
<= 510) return 2; // MOV + ADDi8
459 if (~Val
<= 255) return 2; // MOV + MVN
460 if (ARM_AM::isThumbImmShiftedVal(Val
)) return 2; // MOV + LSL
462 if (ARM_AM::getSOImmVal(Val
) != -1) return 1; // MOV
463 if (ARM_AM::getSOImmVal(~Val
) != -1) return 1; // MVN
464 if (Subtarget
->hasV6T2Ops() && Val
<= 0xffff) return 1; // MOVW
465 if (ARM_AM::isSOImmTwoPartVal(Val
)) return 2; // two instrs
467 if (Subtarget
->useMovt()) return 2; // MOVW + MOVT
468 return 3; // Literal pool load
471 bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue
&N
,
473 unsigned &PowerOfTwo
,
474 SDValue
&NewMulConst
) const {
475 assert(N
.getOpcode() == ISD::MUL
);
476 assert(MaxShift
> 0);
478 // If the multiply is used in more than one place then changing the constant
479 // will make other uses incorrect, so don't.
480 if (!N
.hasOneUse()) return false;
481 // Check if the multiply is by a constant
482 ConstantSDNode
*MulConst
= dyn_cast
<ConstantSDNode
>(N
.getOperand(1));
483 if (!MulConst
) return false;
484 // If the constant is used in more than one place then modifying it will mean
485 // we need to materialize two constants instead of one, which is a bad idea.
486 if (!MulConst
->hasOneUse()) return false;
487 unsigned MulConstVal
= MulConst
->getZExtValue();
488 if (MulConstVal
== 0) return false;
490 // Find the largest power of 2 that MulConstVal is a multiple of
491 PowerOfTwo
= MaxShift
;
492 while ((MulConstVal
% (1 << PowerOfTwo
)) != 0) {
494 if (PowerOfTwo
== 0) return false;
497 // Only optimise if the new cost is better
498 unsigned NewMulConstVal
= MulConstVal
/ (1 << PowerOfTwo
);
499 NewMulConst
= CurDAG
->getConstant(NewMulConstVal
, SDLoc(N
), MVT::i32
);
500 unsigned OldCost
= ConstantMaterializationCost(MulConstVal
);
501 unsigned NewCost
= ConstantMaterializationCost(NewMulConstVal
);
502 return NewCost
< OldCost
;
505 void ARMDAGToDAGISel::replaceDAGValue(const SDValue
&N
, SDValue M
) {
506 CurDAG
->RepositionNode(N
.getNode()->getIterator(), M
.getNode());
510 bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N
,
513 bool CheckProfitability
) {
514 if (DisableShifterOp
)
517 // If N is a multiply-by-constant and it's profitable to extract a shift and
518 // use it in a shifted operand do so.
519 if (N
.getOpcode() == ISD::MUL
) {
520 unsigned PowerOfTwo
= 0;
522 if (canExtractShiftFromMul(N
, 31, PowerOfTwo
, NewMulConst
)) {
523 HandleSDNode
Handle(N
);
525 replaceDAGValue(N
.getOperand(1), NewMulConst
);
526 BaseReg
= Handle
.getValue();
527 Opc
= CurDAG
->getTargetConstant(
528 ARM_AM::getSORegOpc(ARM_AM::lsl
, PowerOfTwo
), Loc
, MVT::i32
);
533 ARM_AM::ShiftOpc ShOpcVal
= ARM_AM::getShiftOpcForNode(N
.getOpcode());
535 // Don't match base register only case. That is matched to a separate
536 // lower complexity pattern with explicit register operand.
537 if (ShOpcVal
== ARM_AM::no_shift
) return false;
539 BaseReg
= N
.getOperand(0);
540 unsigned ShImmVal
= 0;
541 ConstantSDNode
*RHS
= dyn_cast
<ConstantSDNode
>(N
.getOperand(1));
542 if (!RHS
) return false;
543 ShImmVal
= RHS
->getZExtValue() & 31;
544 Opc
= CurDAG
->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal
, ShImmVal
),
549 bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N
,
553 bool CheckProfitability
) {
554 if (DisableShifterOp
)
557 ARM_AM::ShiftOpc ShOpcVal
= ARM_AM::getShiftOpcForNode(N
.getOpcode());
559 // Don't match base register only case. That is matched to a separate
560 // lower complexity pattern with explicit register operand.
561 if (ShOpcVal
== ARM_AM::no_shift
) return false;
563 BaseReg
= N
.getOperand(0);
564 unsigned ShImmVal
= 0;
565 ConstantSDNode
*RHS
= dyn_cast
<ConstantSDNode
>(N
.getOperand(1));
566 if (RHS
) return false;
568 ShReg
= N
.getOperand(1);
569 if (CheckProfitability
&& !isShifterOpProfitable(N
, ShOpcVal
, ShImmVal
))
571 Opc
= CurDAG
->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal
, ShImmVal
),
576 // Determine whether an ISD::OR's operands are suitable to turn the operation
577 // into an addition, which often has more compact encodings.
578 bool ARMDAGToDAGISel::SelectAddLikeOr(SDNode
*Parent
, SDValue N
, SDValue
&Out
) {
579 assert(Parent
->getOpcode() == ISD::OR
&& "unexpected parent");
581 return CurDAG
->haveNoCommonBitsSet(N
, Parent
->getOperand(1));
585 bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N
,
588 // Match simple R + imm12 operands.
591 if (N
.getOpcode() != ISD::ADD
&& N
.getOpcode() != ISD::SUB
&&
592 !CurDAG
->isBaseWithConstantOffset(N
)) {
593 if (N
.getOpcode() == ISD::FrameIndex
) {
594 // Match frame index.
595 int FI
= cast
<FrameIndexSDNode
>(N
)->getIndex();
596 Base
= CurDAG
->getTargetFrameIndex(
597 FI
, TLI
->getPointerTy(CurDAG
->getDataLayout()));
598 OffImm
= CurDAG
->getTargetConstant(0, SDLoc(N
), MVT::i32
);
602 if (N
.getOpcode() == ARMISD::Wrapper
&&
603 N
.getOperand(0).getOpcode() != ISD::TargetGlobalAddress
&&
604 N
.getOperand(0).getOpcode() != ISD::TargetExternalSymbol
&&
605 N
.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress
) {
606 Base
= N
.getOperand(0);
609 OffImm
= CurDAG
->getTargetConstant(0, SDLoc(N
), MVT::i32
);
613 if (ConstantSDNode
*RHS
= dyn_cast
<ConstantSDNode
>(N
.getOperand(1))) {
614 int RHSC
= (int)RHS
->getSExtValue();
615 if (N
.getOpcode() == ISD::SUB
)
618 if (RHSC
> -0x1000 && RHSC
< 0x1000) { // 12 bits
619 Base
= N
.getOperand(0);
620 if (Base
.getOpcode() == ISD::FrameIndex
) {
621 int FI
= cast
<FrameIndexSDNode
>(Base
)->getIndex();
622 Base
= CurDAG
->getTargetFrameIndex(
623 FI
, TLI
->getPointerTy(CurDAG
->getDataLayout()));
625 OffImm
= CurDAG
->getTargetConstant(RHSC
, SDLoc(N
), MVT::i32
);
632 OffImm
= CurDAG
->getTargetConstant(0, SDLoc(N
), MVT::i32
);
638 bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N
, SDValue
&Base
, SDValue
&Offset
,
640 if (N
.getOpcode() == ISD::MUL
&&
641 ((!Subtarget
->isLikeA9() && !Subtarget
->isSwift()) || N
.hasOneUse())) {
642 if (ConstantSDNode
*RHS
= dyn_cast
<ConstantSDNode
>(N
.getOperand(1))) {
643 // X * [3,5,9] -> X + X * [2,4,8] etc.
644 int RHSC
= (int)RHS
->getZExtValue();
647 ARM_AM::AddrOpc AddSub
= ARM_AM::add
;
649 AddSub
= ARM_AM::sub
;
652 if (isPowerOf2_32(RHSC
)) {
653 unsigned ShAmt
= Log2_32(RHSC
);
654 Base
= Offset
= N
.getOperand(0);
655 Opc
= CurDAG
->getTargetConstant(ARM_AM::getAM2Opc(AddSub
, ShAmt
,
664 if (N
.getOpcode() != ISD::ADD
&& N
.getOpcode() != ISD::SUB
&&
665 // ISD::OR that is equivalent to an ISD::ADD.
666 !CurDAG
->isBaseWithConstantOffset(N
))
669 // Leave simple R +/- imm12 operands for LDRi12
670 if (N
.getOpcode() == ISD::ADD
|| N
.getOpcode() == ISD::OR
) {
672 if (isScaledConstantInRange(N
.getOperand(1), /*Scale=*/1,
673 -0x1000+1, 0x1000, RHSC
)) // 12 bits.
677 // Otherwise this is R +/- [possibly shifted] R.
678 ARM_AM::AddrOpc AddSub
= N
.getOpcode() == ISD::SUB
? ARM_AM::sub
:ARM_AM::add
;
679 ARM_AM::ShiftOpc ShOpcVal
=
680 ARM_AM::getShiftOpcForNode(N
.getOperand(1).getOpcode());
683 Base
= N
.getOperand(0);
684 Offset
= N
.getOperand(1);
686 if (ShOpcVal
!= ARM_AM::no_shift
) {
687 // Check to see if the RHS of the shift is a constant, if not, we can't fold
689 if (ConstantSDNode
*Sh
=
690 dyn_cast
<ConstantSDNode
>(N
.getOperand(1).getOperand(1))) {
691 ShAmt
= Sh
->getZExtValue();
692 if (isShifterOpProfitable(Offset
, ShOpcVal
, ShAmt
))
693 Offset
= N
.getOperand(1).getOperand(0);
696 ShOpcVal
= ARM_AM::no_shift
;
699 ShOpcVal
= ARM_AM::no_shift
;
703 // Try matching (R shl C) + (R).
704 if (N
.getOpcode() != ISD::SUB
&& ShOpcVal
== ARM_AM::no_shift
&&
705 !(Subtarget
->isLikeA9() || Subtarget
->isSwift() ||
706 N
.getOperand(0).hasOneUse())) {
707 ShOpcVal
= ARM_AM::getShiftOpcForNode(N
.getOperand(0).getOpcode());
708 if (ShOpcVal
!= ARM_AM::no_shift
) {
709 // Check to see if the RHS of the shift is a constant, if not, we can't
711 if (ConstantSDNode
*Sh
=
712 dyn_cast
<ConstantSDNode
>(N
.getOperand(0).getOperand(1))) {
713 ShAmt
= Sh
->getZExtValue();
714 if (isShifterOpProfitable(N
.getOperand(0), ShOpcVal
, ShAmt
)) {
715 Offset
= N
.getOperand(0).getOperand(0);
716 Base
= N
.getOperand(1);
719 ShOpcVal
= ARM_AM::no_shift
;
722 ShOpcVal
= ARM_AM::no_shift
;
727 // If Offset is a multiply-by-constant and it's profitable to extract a shift
728 // and use it in a shifted operand do so.
729 if (Offset
.getOpcode() == ISD::MUL
&& N
.hasOneUse()) {
730 unsigned PowerOfTwo
= 0;
732 if (canExtractShiftFromMul(Offset
, 31, PowerOfTwo
, NewMulConst
)) {
733 HandleSDNode
Handle(Offset
);
734 replaceDAGValue(Offset
.getOperand(1), NewMulConst
);
735 Offset
= Handle
.getValue();
737 ShOpcVal
= ARM_AM::lsl
;
741 Opc
= CurDAG
->getTargetConstant(ARM_AM::getAM2Opc(AddSub
, ShAmt
, ShOpcVal
),
746 bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode
*Op
, SDValue N
,
747 SDValue
&Offset
, SDValue
&Opc
) {
748 unsigned Opcode
= Op
->getOpcode();
749 ISD::MemIndexedMode AM
= (Opcode
== ISD::LOAD
)
750 ? cast
<LoadSDNode
>(Op
)->getAddressingMode()
751 : cast
<StoreSDNode
>(Op
)->getAddressingMode();
752 ARM_AM::AddrOpc AddSub
= (AM
== ISD::PRE_INC
|| AM
== ISD::POST_INC
)
753 ? ARM_AM::add
: ARM_AM::sub
;
755 if (isScaledConstantInRange(N
, /*Scale=*/1, 0, 0x1000, Val
))
759 ARM_AM::ShiftOpc ShOpcVal
= ARM_AM::getShiftOpcForNode(N
.getOpcode());
761 if (ShOpcVal
!= ARM_AM::no_shift
) {
762 // Check to see if the RHS of the shift is a constant, if not, we can't fold
764 if (ConstantSDNode
*Sh
= dyn_cast
<ConstantSDNode
>(N
.getOperand(1))) {
765 ShAmt
= Sh
->getZExtValue();
766 if (isShifterOpProfitable(N
, ShOpcVal
, ShAmt
))
767 Offset
= N
.getOperand(0);
770 ShOpcVal
= ARM_AM::no_shift
;
773 ShOpcVal
= ARM_AM::no_shift
;
777 Opc
= CurDAG
->getTargetConstant(ARM_AM::getAM2Opc(AddSub
, ShAmt
, ShOpcVal
),
782 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode
*Op
, SDValue N
,
783 SDValue
&Offset
, SDValue
&Opc
) {
784 unsigned Opcode
= Op
->getOpcode();
785 ISD::MemIndexedMode AM
= (Opcode
== ISD::LOAD
)
786 ? cast
<LoadSDNode
>(Op
)->getAddressingMode()
787 : cast
<StoreSDNode
>(Op
)->getAddressingMode();
788 ARM_AM::AddrOpc AddSub
= (AM
== ISD::PRE_INC
|| AM
== ISD::POST_INC
)
789 ? ARM_AM::add
: ARM_AM::sub
;
791 if (isScaledConstantInRange(N
, /*Scale=*/1, 0, 0x1000, Val
)) { // 12 bits.
792 if (AddSub
== ARM_AM::sub
) Val
*= -1;
793 Offset
= CurDAG
->getRegister(0, MVT::i32
);
794 Opc
= CurDAG
->getTargetConstant(Val
, SDLoc(Op
), MVT::i32
);
802 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode
*Op
, SDValue N
,
803 SDValue
&Offset
, SDValue
&Opc
) {
804 unsigned Opcode
= Op
->getOpcode();
805 ISD::MemIndexedMode AM
= (Opcode
== ISD::LOAD
)
806 ? cast
<LoadSDNode
>(Op
)->getAddressingMode()
807 : cast
<StoreSDNode
>(Op
)->getAddressingMode();
808 ARM_AM::AddrOpc AddSub
= (AM
== ISD::PRE_INC
|| AM
== ISD::POST_INC
)
809 ? ARM_AM::add
: ARM_AM::sub
;
811 if (isScaledConstantInRange(N
, /*Scale=*/1, 0, 0x1000, Val
)) { // 12 bits.
812 Offset
= CurDAG
->getRegister(0, MVT::i32
);
813 Opc
= CurDAG
->getTargetConstant(ARM_AM::getAM2Opc(AddSub
, Val
,
815 SDLoc(Op
), MVT::i32
);
822 bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N
, SDValue
&Base
) {
827 bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N
,
828 SDValue
&Base
, SDValue
&Offset
,
830 if (N
.getOpcode() == ISD::SUB
) {
831 // X - C is canonicalize to X + -C, no need to handle it here.
832 Base
= N
.getOperand(0);
833 Offset
= N
.getOperand(1);
834 Opc
= CurDAG
->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub
, 0), SDLoc(N
),
839 if (!CurDAG
->isBaseWithConstantOffset(N
)) {
841 if (N
.getOpcode() == ISD::FrameIndex
) {
842 int FI
= cast
<FrameIndexSDNode
>(N
)->getIndex();
843 Base
= CurDAG
->getTargetFrameIndex(
844 FI
, TLI
->getPointerTy(CurDAG
->getDataLayout()));
846 Offset
= CurDAG
->getRegister(0, MVT::i32
);
847 Opc
= CurDAG
->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add
, 0), SDLoc(N
),
852 // If the RHS is +/- imm8, fold into addr mode.
854 if (isScaledConstantInRange(N
.getOperand(1), /*Scale=*/1,
855 -256 + 1, 256, RHSC
)) { // 8 bits.
856 Base
= N
.getOperand(0);
857 if (Base
.getOpcode() == ISD::FrameIndex
) {
858 int FI
= cast
<FrameIndexSDNode
>(Base
)->getIndex();
859 Base
= CurDAG
->getTargetFrameIndex(
860 FI
, TLI
->getPointerTy(CurDAG
->getDataLayout()));
862 Offset
= CurDAG
->getRegister(0, MVT::i32
);
864 ARM_AM::AddrOpc AddSub
= ARM_AM::add
;
866 AddSub
= ARM_AM::sub
;
869 Opc
= CurDAG
->getTargetConstant(ARM_AM::getAM3Opc(AddSub
, RHSC
), SDLoc(N
),
874 Base
= N
.getOperand(0);
875 Offset
= N
.getOperand(1);
876 Opc
= CurDAG
->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add
, 0), SDLoc(N
),
881 bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode
*Op
, SDValue N
,
882 SDValue
&Offset
, SDValue
&Opc
) {
883 unsigned Opcode
= Op
->getOpcode();
884 ISD::MemIndexedMode AM
= (Opcode
== ISD::LOAD
)
885 ? cast
<LoadSDNode
>(Op
)->getAddressingMode()
886 : cast
<StoreSDNode
>(Op
)->getAddressingMode();
887 ARM_AM::AddrOpc AddSub
= (AM
== ISD::PRE_INC
|| AM
== ISD::POST_INC
)
888 ? ARM_AM::add
: ARM_AM::sub
;
890 if (isScaledConstantInRange(N
, /*Scale=*/1, 0, 256, Val
)) { // 12 bits.
891 Offset
= CurDAG
->getRegister(0, MVT::i32
);
892 Opc
= CurDAG
->getTargetConstant(ARM_AM::getAM3Opc(AddSub
, Val
), SDLoc(Op
),
898 Opc
= CurDAG
->getTargetConstant(ARM_AM::getAM3Opc(AddSub
, 0), SDLoc(Op
),
903 bool ARMDAGToDAGISel::IsAddressingMode5(SDValue N
, SDValue
&Base
, SDValue
&Offset
,
905 if (!CurDAG
->isBaseWithConstantOffset(N
)) {
907 if (N
.getOpcode() == ISD::FrameIndex
) {
908 int FI
= cast
<FrameIndexSDNode
>(N
)->getIndex();
909 Base
= CurDAG
->getTargetFrameIndex(
910 FI
, TLI
->getPointerTy(CurDAG
->getDataLayout()));
911 } else if (N
.getOpcode() == ARMISD::Wrapper
&&
912 N
.getOperand(0).getOpcode() != ISD::TargetGlobalAddress
&&
913 N
.getOperand(0).getOpcode() != ISD::TargetExternalSymbol
&&
914 N
.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress
) {
915 Base
= N
.getOperand(0);
917 Offset
= CurDAG
->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add
, 0),
922 // If the RHS is +/- imm8, fold into addr mode.
924 const int Scale
= FP16
? 2 : 4;
926 if (isScaledConstantInRange(N
.getOperand(1), Scale
, -255, 256, RHSC
)) {
927 Base
= N
.getOperand(0);
928 if (Base
.getOpcode() == ISD::FrameIndex
) {
929 int FI
= cast
<FrameIndexSDNode
>(Base
)->getIndex();
930 Base
= CurDAG
->getTargetFrameIndex(
931 FI
, TLI
->getPointerTy(CurDAG
->getDataLayout()));
934 ARM_AM::AddrOpc AddSub
= ARM_AM::add
;
936 AddSub
= ARM_AM::sub
;
941 Offset
= CurDAG
->getTargetConstant(ARM_AM::getAM5FP16Opc(AddSub
, RHSC
),
944 Offset
= CurDAG
->getTargetConstant(ARM_AM::getAM5Opc(AddSub
, RHSC
),
953 Offset
= CurDAG
->getTargetConstant(ARM_AM::getAM5FP16Opc(ARM_AM::add
, 0),
956 Offset
= CurDAG
->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add
, 0),
962 bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N
,
963 SDValue
&Base
, SDValue
&Offset
) {
964 return IsAddressingMode5(N
, Base
, Offset
, /*FP16=*/ false);
967 bool ARMDAGToDAGISel::SelectAddrMode5FP16(SDValue N
,
968 SDValue
&Base
, SDValue
&Offset
) {
969 return IsAddressingMode5(N
, Base
, Offset
, /*FP16=*/ true);
972 bool ARMDAGToDAGISel::SelectAddrMode6(SDNode
*Parent
, SDValue N
, SDValue
&Addr
,
976 unsigned Alignment
= 0;
978 MemSDNode
*MemN
= cast
<MemSDNode
>(Parent
);
980 if (isa
<LSBaseSDNode
>(MemN
) ||
981 ((MemN
->getOpcode() == ARMISD::VST1_UPD
||
982 MemN
->getOpcode() == ARMISD::VLD1_UPD
) &&
983 MemN
->getConstantOperandVal(MemN
->getNumOperands() - 1) == 1)) {
984 // This case occurs only for VLD1-lane/dup and VST1-lane instructions.
985 // The maximum alignment is equal to the memory size being referenced.
986 unsigned MMOAlign
= MemN
->getAlignment();
987 unsigned MemSize
= MemN
->getMemoryVT().getSizeInBits() / 8;
988 if (MMOAlign
>= MemSize
&& MemSize
> 1)
991 // All other uses of addrmode6 are for intrinsics. For now just record
992 // the raw alignment value; it will be refined later based on the legal
993 // alignment operands for the intrinsic.
994 Alignment
= MemN
->getAlignment();
997 Align
= CurDAG
->getTargetConstant(Alignment
, SDLoc(N
), MVT::i32
);
1001 bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode
*Op
, SDValue N
,
1003 LSBaseSDNode
*LdSt
= cast
<LSBaseSDNode
>(Op
);
1004 ISD::MemIndexedMode AM
= LdSt
->getAddressingMode();
1005 if (AM
!= ISD::POST_INC
)
1008 if (ConstantSDNode
*NC
= dyn_cast
<ConstantSDNode
>(N
)) {
1009 if (NC
->getZExtValue() * 8 == LdSt
->getMemoryVT().getSizeInBits())
1010 Offset
= CurDAG
->getRegister(0, MVT::i32
);
1015 bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N
,
1016 SDValue
&Offset
, SDValue
&Label
) {
1017 if (N
.getOpcode() == ARMISD::PIC_ADD
&& N
.hasOneUse()) {
1018 Offset
= N
.getOperand(0);
1019 SDValue N1
= N
.getOperand(1);
1020 Label
= CurDAG
->getTargetConstant(cast
<ConstantSDNode
>(N1
)->getZExtValue(),
1021 SDLoc(N
), MVT::i32
);
1029 //===----------------------------------------------------------------------===//
1030 // Thumb Addressing Modes
1031 //===----------------------------------------------------------------------===//
1033 static bool shouldUseZeroOffsetLdSt(SDValue N
) {
1034 // Negative numbers are difficult to materialise in thumb1. If we are
1035 // selecting the add of a negative, instead try to select ri with a zero
1036 // offset, so create the add node directly which will become a sub.
1037 if (N
.getOpcode() != ISD::ADD
)
1040 // Look for an imm which is not legal for ld/st, but is legal for sub.
1041 if (auto C
= dyn_cast
<ConstantSDNode
>(N
.getOperand(1)))
1042 return C
->getSExtValue() < 0 && C
->getSExtValue() >= -255;
1047 bool ARMDAGToDAGISel::SelectThumbAddrModeRRSext(SDValue N
, SDValue
&Base
,
1049 if (N
.getOpcode() != ISD::ADD
&& !CurDAG
->isBaseWithConstantOffset(N
)) {
1050 ConstantSDNode
*NC
= dyn_cast
<ConstantSDNode
>(N
);
1051 if (!NC
|| !NC
->isNullValue())
1058 Base
= N
.getOperand(0);
1059 Offset
= N
.getOperand(1);
1063 bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N
, SDValue
&Base
,
1065 if (shouldUseZeroOffsetLdSt(N
))
1066 return false; // Select ri instead
1067 return SelectThumbAddrModeRRSext(N
, Base
, Offset
);
1071 ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N
, unsigned Scale
,
1072 SDValue
&Base
, SDValue
&OffImm
) {
1073 if (shouldUseZeroOffsetLdSt(N
)) {
1075 OffImm
= CurDAG
->getTargetConstant(0, SDLoc(N
), MVT::i32
);
1079 if (!CurDAG
->isBaseWithConstantOffset(N
)) {
1080 if (N
.getOpcode() == ISD::ADD
) {
1081 return false; // We want to select register offset instead
1082 } else if (N
.getOpcode() == ARMISD::Wrapper
&&
1083 N
.getOperand(0).getOpcode() != ISD::TargetGlobalAddress
&&
1084 N
.getOperand(0).getOpcode() != ISD::TargetExternalSymbol
&&
1085 N
.getOperand(0).getOpcode() != ISD::TargetConstantPool
&&
1086 N
.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress
) {
1087 Base
= N
.getOperand(0);
1092 OffImm
= CurDAG
->getTargetConstant(0, SDLoc(N
), MVT::i32
);
1096 // If the RHS is + imm5 * scale, fold into addr mode.
1098 if (isScaledConstantInRange(N
.getOperand(1), Scale
, 0, 32, RHSC
)) {
1099 Base
= N
.getOperand(0);
1100 OffImm
= CurDAG
->getTargetConstant(RHSC
, SDLoc(N
), MVT::i32
);
1104 // Offset is too large, so use register offset instead.
1109 ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N
, SDValue
&Base
,
1111 return SelectThumbAddrModeImm5S(N
, 4, Base
, OffImm
);
1115 ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N
, SDValue
&Base
,
1117 return SelectThumbAddrModeImm5S(N
, 2, Base
, OffImm
);
1121 ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N
, SDValue
&Base
,
1123 return SelectThumbAddrModeImm5S(N
, 1, Base
, OffImm
);
1126 bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N
,
1127 SDValue
&Base
, SDValue
&OffImm
) {
1128 if (N
.getOpcode() == ISD::FrameIndex
) {
1129 int FI
= cast
<FrameIndexSDNode
>(N
)->getIndex();
1130 // Only multiples of 4 are allowed for the offset, so the frame object
1131 // alignment must be at least 4.
1132 MachineFrameInfo
&MFI
= MF
->getFrameInfo();
1133 if (MFI
.getObjectAlignment(FI
) < 4)
1134 MFI
.setObjectAlignment(FI
, 4);
1135 Base
= CurDAG
->getTargetFrameIndex(
1136 FI
, TLI
->getPointerTy(CurDAG
->getDataLayout()));
1137 OffImm
= CurDAG
->getTargetConstant(0, SDLoc(N
), MVT::i32
);
1141 if (!CurDAG
->isBaseWithConstantOffset(N
))
1144 if (N
.getOperand(0).getOpcode() == ISD::FrameIndex
) {
1145 // If the RHS is + imm8 * scale, fold into addr mode.
1147 if (isScaledConstantInRange(N
.getOperand(1), /*Scale=*/4, 0, 256, RHSC
)) {
1148 Base
= N
.getOperand(0);
1149 int FI
= cast
<FrameIndexSDNode
>(Base
)->getIndex();
1150 // For LHS+RHS to result in an offset that's a multiple of 4 the object
1151 // indexed by the LHS must be 4-byte aligned.
1152 MachineFrameInfo
&MFI
= MF
->getFrameInfo();
1153 if (MFI
.getObjectAlignment(FI
) < 4)
1154 MFI
.setObjectAlignment(FI
, 4);
1155 Base
= CurDAG
->getTargetFrameIndex(
1156 FI
, TLI
->getPointerTy(CurDAG
->getDataLayout()));
1157 OffImm
= CurDAG
->getTargetConstant(RHSC
, SDLoc(N
), MVT::i32
);
1166 //===----------------------------------------------------------------------===//
1167 // Thumb 2 Addressing Modes
1168 //===----------------------------------------------------------------------===//
1171 bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N
,
1172 SDValue
&Base
, SDValue
&OffImm
) {
1173 // Match simple R + imm12 operands.
1176 if (N
.getOpcode() != ISD::ADD
&& N
.getOpcode() != ISD::SUB
&&
1177 !CurDAG
->isBaseWithConstantOffset(N
)) {
1178 if (N
.getOpcode() == ISD::FrameIndex
) {
1179 // Match frame index.
1180 int FI
= cast
<FrameIndexSDNode
>(N
)->getIndex();
1181 Base
= CurDAG
->getTargetFrameIndex(
1182 FI
, TLI
->getPointerTy(CurDAG
->getDataLayout()));
1183 OffImm
= CurDAG
->getTargetConstant(0, SDLoc(N
), MVT::i32
);
1187 if (N
.getOpcode() == ARMISD::Wrapper
&&
1188 N
.getOperand(0).getOpcode() != ISD::TargetGlobalAddress
&&
1189 N
.getOperand(0).getOpcode() != ISD::TargetExternalSymbol
&&
1190 N
.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress
) {
1191 Base
= N
.getOperand(0);
1192 if (Base
.getOpcode() == ISD::TargetConstantPool
)
1193 return false; // We want to select t2LDRpci instead.
1196 OffImm
= CurDAG
->getTargetConstant(0, SDLoc(N
), MVT::i32
);
1200 if (ConstantSDNode
*RHS
= dyn_cast
<ConstantSDNode
>(N
.getOperand(1))) {
1201 if (SelectT2AddrModeImm8(N
, Base
, OffImm
))
1202 // Let t2LDRi8 handle (R - imm8).
1205 int RHSC
= (int)RHS
->getZExtValue();
1206 if (N
.getOpcode() == ISD::SUB
)
1209 if (RHSC
>= 0 && RHSC
< 0x1000) { // 12 bits (unsigned)
1210 Base
= N
.getOperand(0);
1211 if (Base
.getOpcode() == ISD::FrameIndex
) {
1212 int FI
= cast
<FrameIndexSDNode
>(Base
)->getIndex();
1213 Base
= CurDAG
->getTargetFrameIndex(
1214 FI
, TLI
->getPointerTy(CurDAG
->getDataLayout()));
1216 OffImm
= CurDAG
->getTargetConstant(RHSC
, SDLoc(N
), MVT::i32
);
1223 OffImm
= CurDAG
->getTargetConstant(0, SDLoc(N
), MVT::i32
);
1227 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N
,
1228 SDValue
&Base
, SDValue
&OffImm
) {
1229 // Match simple R - imm8 operands.
1230 if (N
.getOpcode() != ISD::ADD
&& N
.getOpcode() != ISD::SUB
&&
1231 !CurDAG
->isBaseWithConstantOffset(N
))
1234 if (ConstantSDNode
*RHS
= dyn_cast
<ConstantSDNode
>(N
.getOperand(1))) {
1235 int RHSC
= (int)RHS
->getSExtValue();
1236 if (N
.getOpcode() == ISD::SUB
)
1239 if ((RHSC
>= -255) && (RHSC
< 0)) { // 8 bits (always negative)
1240 Base
= N
.getOperand(0);
1241 if (Base
.getOpcode() == ISD::FrameIndex
) {
1242 int FI
= cast
<FrameIndexSDNode
>(Base
)->getIndex();
1243 Base
= CurDAG
->getTargetFrameIndex(
1244 FI
, TLI
->getPointerTy(CurDAG
->getDataLayout()));
1246 OffImm
= CurDAG
->getTargetConstant(RHSC
, SDLoc(N
), MVT::i32
);
1254 bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode
*Op
, SDValue N
,
1256 unsigned Opcode
= Op
->getOpcode();
1257 ISD::MemIndexedMode AM
= (Opcode
== ISD::LOAD
)
1258 ? cast
<LoadSDNode
>(Op
)->getAddressingMode()
1259 : cast
<StoreSDNode
>(Op
)->getAddressingMode();
1261 if (isScaledConstantInRange(N
, /*Scale=*/1, 0, 0x100, RHSC
)) { // 8 bits.
1262 OffImm
= ((AM
== ISD::PRE_INC
) || (AM
== ISD::POST_INC
))
1263 ? CurDAG
->getTargetConstant(RHSC
, SDLoc(N
), MVT::i32
)
1264 : CurDAG
->getTargetConstant(-RHSC
, SDLoc(N
), MVT::i32
);
1271 bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N
,
1273 SDValue
&OffReg
, SDValue
&ShImm
) {
1274 // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12.
1275 if (N
.getOpcode() != ISD::ADD
&& !CurDAG
->isBaseWithConstantOffset(N
))
1278 // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8.
1279 if (ConstantSDNode
*RHS
= dyn_cast
<ConstantSDNode
>(N
.getOperand(1))) {
1280 int RHSC
= (int)RHS
->getZExtValue();
1281 if (RHSC
>= 0 && RHSC
< 0x1000) // 12 bits (unsigned)
1283 else if (RHSC
< 0 && RHSC
>= -255) // 8 bits
1287 // Look for (R + R) or (R + (R << [1,2,3])).
1289 Base
= N
.getOperand(0);
1290 OffReg
= N
.getOperand(1);
1292 // Swap if it is ((R << c) + R).
1293 ARM_AM::ShiftOpc ShOpcVal
= ARM_AM::getShiftOpcForNode(OffReg
.getOpcode());
1294 if (ShOpcVal
!= ARM_AM::lsl
) {
1295 ShOpcVal
= ARM_AM::getShiftOpcForNode(Base
.getOpcode());
1296 if (ShOpcVal
== ARM_AM::lsl
)
1297 std::swap(Base
, OffReg
);
1300 if (ShOpcVal
== ARM_AM::lsl
) {
1301 // Check to see if the RHS of the shift is a constant, if not, we can't fold
1303 if (ConstantSDNode
*Sh
= dyn_cast
<ConstantSDNode
>(OffReg
.getOperand(1))) {
1304 ShAmt
= Sh
->getZExtValue();
1305 if (ShAmt
< 4 && isShifterOpProfitable(OffReg
, ShOpcVal
, ShAmt
))
1306 OffReg
= OffReg
.getOperand(0);
1313 // If OffReg is a multiply-by-constant and it's profitable to extract a shift
1314 // and use it in a shifted operand do so.
1315 if (OffReg
.getOpcode() == ISD::MUL
&& N
.hasOneUse()) {
1316 unsigned PowerOfTwo
= 0;
1317 SDValue NewMulConst
;
1318 if (canExtractShiftFromMul(OffReg
, 3, PowerOfTwo
, NewMulConst
)) {
1319 HandleSDNode
Handle(OffReg
);
1320 replaceDAGValue(OffReg
.getOperand(1), NewMulConst
);
1321 OffReg
= Handle
.getValue();
1326 ShImm
= CurDAG
->getTargetConstant(ShAmt
, SDLoc(N
), MVT::i32
);
1331 bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N
, SDValue
&Base
,
1333 // This *must* succeed since it's used for the irreplaceable ldrex and strex
1336 OffImm
= CurDAG
->getTargetConstant(0, SDLoc(N
), MVT::i32
);
1338 if (N
.getOpcode() != ISD::ADD
|| !CurDAG
->isBaseWithConstantOffset(N
))
1341 ConstantSDNode
*RHS
= dyn_cast
<ConstantSDNode
>(N
.getOperand(1));
1345 uint32_t RHSC
= (int)RHS
->getZExtValue();
1346 if (RHSC
> 1020 || RHSC
% 4 != 0)
1349 Base
= N
.getOperand(0);
1350 if (Base
.getOpcode() == ISD::FrameIndex
) {
1351 int FI
= cast
<FrameIndexSDNode
>(Base
)->getIndex();
1352 Base
= CurDAG
->getTargetFrameIndex(
1353 FI
, TLI
->getPointerTy(CurDAG
->getDataLayout()));
1356 OffImm
= CurDAG
->getTargetConstant(RHSC
/4, SDLoc(N
), MVT::i32
);
1360 //===--------------------------------------------------------------------===//
1362 /// getAL - Returns a ARMCC::AL immediate node.
1363 static inline SDValue
getAL(SelectionDAG
*CurDAG
, const SDLoc
&dl
) {
1364 return CurDAG
->getTargetConstant((uint64_t)ARMCC::AL
, dl
, MVT::i32
);
1367 void ARMDAGToDAGISel::transferMemOperands(SDNode
*N
, SDNode
*Result
) {
1368 MachineMemOperand
*MemOp
= cast
<MemSDNode
>(N
)->getMemOperand();
1369 CurDAG
->setNodeMemRefs(cast
<MachineSDNode
>(Result
), {MemOp
});
1372 bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode
*N
) {
1373 LoadSDNode
*LD
= cast
<LoadSDNode
>(N
);
1374 ISD::MemIndexedMode AM
= LD
->getAddressingMode();
1375 if (AM
== ISD::UNINDEXED
)
1378 EVT LoadedVT
= LD
->getMemoryVT();
1379 SDValue Offset
, AMOpc
;
1380 bool isPre
= (AM
== ISD::PRE_INC
) || (AM
== ISD::PRE_DEC
);
1381 unsigned Opcode
= 0;
1383 if (LoadedVT
== MVT::i32
&& isPre
&&
1384 SelectAddrMode2OffsetImmPre(N
, LD
->getOffset(), Offset
, AMOpc
)) {
1385 Opcode
= ARM::LDR_PRE_IMM
;
1387 } else if (LoadedVT
== MVT::i32
&& !isPre
&&
1388 SelectAddrMode2OffsetImm(N
, LD
->getOffset(), Offset
, AMOpc
)) {
1389 Opcode
= ARM::LDR_POST_IMM
;
1391 } else if (LoadedVT
== MVT::i32
&&
1392 SelectAddrMode2OffsetReg(N
, LD
->getOffset(), Offset
, AMOpc
)) {
1393 Opcode
= isPre
? ARM::LDR_PRE_REG
: ARM::LDR_POST_REG
;
1396 } else if (LoadedVT
== MVT::i16
&&
1397 SelectAddrMode3Offset(N
, LD
->getOffset(), Offset
, AMOpc
)) {
1399 Opcode
= (LD
->getExtensionType() == ISD::SEXTLOAD
)
1400 ? (isPre
? ARM::LDRSH_PRE
: ARM::LDRSH_POST
)
1401 : (isPre
? ARM::LDRH_PRE
: ARM::LDRH_POST
);
1402 } else if (LoadedVT
== MVT::i8
|| LoadedVT
== MVT::i1
) {
1403 if (LD
->getExtensionType() == ISD::SEXTLOAD
) {
1404 if (SelectAddrMode3Offset(N
, LD
->getOffset(), Offset
, AMOpc
)) {
1406 Opcode
= isPre
? ARM::LDRSB_PRE
: ARM::LDRSB_POST
;
1410 SelectAddrMode2OffsetImmPre(N
, LD
->getOffset(), Offset
, AMOpc
)) {
1412 Opcode
= ARM::LDRB_PRE_IMM
;
1413 } else if (!isPre
&&
1414 SelectAddrMode2OffsetImm(N
, LD
->getOffset(), Offset
, AMOpc
)) {
1416 Opcode
= ARM::LDRB_POST_IMM
;
1417 } else if (SelectAddrMode2OffsetReg(N
, LD
->getOffset(), Offset
, AMOpc
)) {
1419 Opcode
= isPre
? ARM::LDRB_PRE_REG
: ARM::LDRB_POST_REG
;
1425 if (Opcode
== ARM::LDR_PRE_IMM
|| Opcode
== ARM::LDRB_PRE_IMM
) {
1426 SDValue Chain
= LD
->getChain();
1427 SDValue Base
= LD
->getBasePtr();
1428 SDValue Ops
[]= { Base
, AMOpc
, getAL(CurDAG
, SDLoc(N
)),
1429 CurDAG
->getRegister(0, MVT::i32
), Chain
};
1430 SDNode
*New
= CurDAG
->getMachineNode(Opcode
, SDLoc(N
), MVT::i32
, MVT::i32
,
1432 transferMemOperands(N
, New
);
1433 ReplaceNode(N
, New
);
1436 SDValue Chain
= LD
->getChain();
1437 SDValue Base
= LD
->getBasePtr();
1438 SDValue Ops
[]= { Base
, Offset
, AMOpc
, getAL(CurDAG
, SDLoc(N
)),
1439 CurDAG
->getRegister(0, MVT::i32
), Chain
};
1440 SDNode
*New
= CurDAG
->getMachineNode(Opcode
, SDLoc(N
), MVT::i32
, MVT::i32
,
1442 transferMemOperands(N
, New
);
1443 ReplaceNode(N
, New
);
1451 bool ARMDAGToDAGISel::tryT1IndexedLoad(SDNode
*N
) {
1452 LoadSDNode
*LD
= cast
<LoadSDNode
>(N
);
1453 EVT LoadedVT
= LD
->getMemoryVT();
1454 ISD::MemIndexedMode AM
= LD
->getAddressingMode();
1455 if (AM
!= ISD::POST_INC
|| LD
->getExtensionType() != ISD::NON_EXTLOAD
||
1456 LoadedVT
.getSimpleVT().SimpleTy
!= MVT::i32
)
1459 auto *COffs
= dyn_cast
<ConstantSDNode
>(LD
->getOffset());
1460 if (!COffs
|| COffs
->getZExtValue() != 4)
1463 // A T1 post-indexed load is just a single register LDM: LDM r0!, {r1}.
1464 // The encoding of LDM is not how the rest of ISel expects a post-inc load to
1465 // look however, so we use a pseudo here and switch it for a tLDMIA_UPD after
1467 SDValue Chain
= LD
->getChain();
1468 SDValue Base
= LD
->getBasePtr();
1469 SDValue Ops
[]= { Base
, getAL(CurDAG
, SDLoc(N
)),
1470 CurDAG
->getRegister(0, MVT::i32
), Chain
};
1471 SDNode
*New
= CurDAG
->getMachineNode(ARM::tLDR_postidx
, SDLoc(N
), MVT::i32
,
1472 MVT::i32
, MVT::Other
, Ops
);
1473 transferMemOperands(N
, New
);
1474 ReplaceNode(N
, New
);
1478 bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode
*N
) {
1479 LoadSDNode
*LD
= cast
<LoadSDNode
>(N
);
1480 ISD::MemIndexedMode AM
= LD
->getAddressingMode();
1481 if (AM
== ISD::UNINDEXED
)
1484 EVT LoadedVT
= LD
->getMemoryVT();
1485 bool isSExtLd
= LD
->getExtensionType() == ISD::SEXTLOAD
;
1487 bool isPre
= (AM
== ISD::PRE_INC
) || (AM
== ISD::PRE_DEC
);
1488 unsigned Opcode
= 0;
1490 if (SelectT2AddrModeImm8Offset(N
, LD
->getOffset(), Offset
)) {
1491 switch (LoadedVT
.getSimpleVT().SimpleTy
) {
1493 Opcode
= isPre
? ARM::t2LDR_PRE
: ARM::t2LDR_POST
;
1497 Opcode
= isPre
? ARM::t2LDRSH_PRE
: ARM::t2LDRSH_POST
;
1499 Opcode
= isPre
? ARM::t2LDRH_PRE
: ARM::t2LDRH_POST
;
1504 Opcode
= isPre
? ARM::t2LDRSB_PRE
: ARM::t2LDRSB_POST
;
1506 Opcode
= isPre
? ARM::t2LDRB_PRE
: ARM::t2LDRB_POST
;
1515 SDValue Chain
= LD
->getChain();
1516 SDValue Base
= LD
->getBasePtr();
1517 SDValue Ops
[]= { Base
, Offset
, getAL(CurDAG
, SDLoc(N
)),
1518 CurDAG
->getRegister(0, MVT::i32
), Chain
};
1519 SDNode
*New
= CurDAG
->getMachineNode(Opcode
, SDLoc(N
), MVT::i32
, MVT::i32
,
1521 transferMemOperands(N
, New
);
1522 ReplaceNode(N
, New
);
1529 /// Form a GPRPair pseudo register from a pair of GPR regs.
1530 SDNode
*ARMDAGToDAGISel::createGPRPairNode(EVT VT
, SDValue V0
, SDValue V1
) {
1531 SDLoc
dl(V0
.getNode());
1533 CurDAG
->getTargetConstant(ARM::GPRPairRegClassID
, dl
, MVT::i32
);
1534 SDValue SubReg0
= CurDAG
->getTargetConstant(ARM::gsub_0
, dl
, MVT::i32
);
1535 SDValue SubReg1
= CurDAG
->getTargetConstant(ARM::gsub_1
, dl
, MVT::i32
);
1536 const SDValue Ops
[] = { RegClass
, V0
, SubReg0
, V1
, SubReg1
};
1537 return CurDAG
->getMachineNode(TargetOpcode::REG_SEQUENCE
, dl
, VT
, Ops
);
1540 /// Form a D register from a pair of S registers.
1541 SDNode
*ARMDAGToDAGISel::createSRegPairNode(EVT VT
, SDValue V0
, SDValue V1
) {
1542 SDLoc
dl(V0
.getNode());
1544 CurDAG
->getTargetConstant(ARM::DPR_VFP2RegClassID
, dl
, MVT::i32
);
1545 SDValue SubReg0
= CurDAG
->getTargetConstant(ARM::ssub_0
, dl
, MVT::i32
);
1546 SDValue SubReg1
= CurDAG
->getTargetConstant(ARM::ssub_1
, dl
, MVT::i32
);
1547 const SDValue Ops
[] = { RegClass
, V0
, SubReg0
, V1
, SubReg1
};
1548 return CurDAG
->getMachineNode(TargetOpcode::REG_SEQUENCE
, dl
, VT
, Ops
);
1551 /// Form a quad register from a pair of D registers.
1552 SDNode
*ARMDAGToDAGISel::createDRegPairNode(EVT VT
, SDValue V0
, SDValue V1
) {
1553 SDLoc
dl(V0
.getNode());
1554 SDValue RegClass
= CurDAG
->getTargetConstant(ARM::QPRRegClassID
, dl
,
1556 SDValue SubReg0
= CurDAG
->getTargetConstant(ARM::dsub_0
, dl
, MVT::i32
);
1557 SDValue SubReg1
= CurDAG
->getTargetConstant(ARM::dsub_1
, dl
, MVT::i32
);
1558 const SDValue Ops
[] = { RegClass
, V0
, SubReg0
, V1
, SubReg1
};
1559 return CurDAG
->getMachineNode(TargetOpcode::REG_SEQUENCE
, dl
, VT
, Ops
);
1562 /// Form 4 consecutive D registers from a pair of Q registers.
1563 SDNode
*ARMDAGToDAGISel::createQRegPairNode(EVT VT
, SDValue V0
, SDValue V1
) {
1564 SDLoc
dl(V0
.getNode());
1565 SDValue RegClass
= CurDAG
->getTargetConstant(ARM::QQPRRegClassID
, dl
,
1567 SDValue SubReg0
= CurDAG
->getTargetConstant(ARM::qsub_0
, dl
, MVT::i32
);
1568 SDValue SubReg1
= CurDAG
->getTargetConstant(ARM::qsub_1
, dl
, MVT::i32
);
1569 const SDValue Ops
[] = { RegClass
, V0
, SubReg0
, V1
, SubReg1
};
1570 return CurDAG
->getMachineNode(TargetOpcode::REG_SEQUENCE
, dl
, VT
, Ops
);
1573 /// Form 4 consecutive S registers.
1574 SDNode
*ARMDAGToDAGISel::createQuadSRegsNode(EVT VT
, SDValue V0
, SDValue V1
,
1575 SDValue V2
, SDValue V3
) {
1576 SDLoc
dl(V0
.getNode());
1578 CurDAG
->getTargetConstant(ARM::QPR_VFP2RegClassID
, dl
, MVT::i32
);
1579 SDValue SubReg0
= CurDAG
->getTargetConstant(ARM::ssub_0
, dl
, MVT::i32
);
1580 SDValue SubReg1
= CurDAG
->getTargetConstant(ARM::ssub_1
, dl
, MVT::i32
);
1581 SDValue SubReg2
= CurDAG
->getTargetConstant(ARM::ssub_2
, dl
, MVT::i32
);
1582 SDValue SubReg3
= CurDAG
->getTargetConstant(ARM::ssub_3
, dl
, MVT::i32
);
1583 const SDValue Ops
[] = { RegClass
, V0
, SubReg0
, V1
, SubReg1
,
1584 V2
, SubReg2
, V3
, SubReg3
};
1585 return CurDAG
->getMachineNode(TargetOpcode::REG_SEQUENCE
, dl
, VT
, Ops
);
1588 /// Form 4 consecutive D registers.
1589 SDNode
*ARMDAGToDAGISel::createQuadDRegsNode(EVT VT
, SDValue V0
, SDValue V1
,
1590 SDValue V2
, SDValue V3
) {
1591 SDLoc
dl(V0
.getNode());
1592 SDValue RegClass
= CurDAG
->getTargetConstant(ARM::QQPRRegClassID
, dl
,
1594 SDValue SubReg0
= CurDAG
->getTargetConstant(ARM::dsub_0
, dl
, MVT::i32
);
1595 SDValue SubReg1
= CurDAG
->getTargetConstant(ARM::dsub_1
, dl
, MVT::i32
);
1596 SDValue SubReg2
= CurDAG
->getTargetConstant(ARM::dsub_2
, dl
, MVT::i32
);
1597 SDValue SubReg3
= CurDAG
->getTargetConstant(ARM::dsub_3
, dl
, MVT::i32
);
1598 const SDValue Ops
[] = { RegClass
, V0
, SubReg0
, V1
, SubReg1
,
1599 V2
, SubReg2
, V3
, SubReg3
};
1600 return CurDAG
->getMachineNode(TargetOpcode::REG_SEQUENCE
, dl
, VT
, Ops
);
1603 /// Form 4 consecutive Q registers.
1604 SDNode
*ARMDAGToDAGISel::createQuadQRegsNode(EVT VT
, SDValue V0
, SDValue V1
,
1605 SDValue V2
, SDValue V3
) {
1606 SDLoc
dl(V0
.getNode());
1607 SDValue RegClass
= CurDAG
->getTargetConstant(ARM::QQQQPRRegClassID
, dl
,
1609 SDValue SubReg0
= CurDAG
->getTargetConstant(ARM::qsub_0
, dl
, MVT::i32
);
1610 SDValue SubReg1
= CurDAG
->getTargetConstant(ARM::qsub_1
, dl
, MVT::i32
);
1611 SDValue SubReg2
= CurDAG
->getTargetConstant(ARM::qsub_2
, dl
, MVT::i32
);
1612 SDValue SubReg3
= CurDAG
->getTargetConstant(ARM::qsub_3
, dl
, MVT::i32
);
1613 const SDValue Ops
[] = { RegClass
, V0
, SubReg0
, V1
, SubReg1
,
1614 V2
, SubReg2
, V3
, SubReg3
};
1615 return CurDAG
->getMachineNode(TargetOpcode::REG_SEQUENCE
, dl
, VT
, Ops
);
1618 /// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
1619 /// of a NEON VLD or VST instruction. The supported values depend on the
1620 /// number of registers being loaded.
1621 SDValue
ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align
, const SDLoc
&dl
,
1622 unsigned NumVecs
, bool is64BitVector
) {
1623 unsigned NumRegs
= NumVecs
;
1624 if (!is64BitVector
&& NumVecs
< 3)
1627 unsigned Alignment
= cast
<ConstantSDNode
>(Align
)->getZExtValue();
1628 if (Alignment
>= 32 && NumRegs
== 4)
1630 else if (Alignment
>= 16 && (NumRegs
== 2 || NumRegs
== 4))
1632 else if (Alignment
>= 8)
1637 return CurDAG
->getTargetConstant(Alignment
, dl
, MVT::i32
);
1640 static bool isVLDfixed(unsigned Opc
)
1643 default: return false;
1644 case ARM::VLD1d8wb_fixed
: return true;
1645 case ARM::VLD1d16wb_fixed
: return true;
1646 case ARM::VLD1d64Qwb_fixed
: return true;
1647 case ARM::VLD1d32wb_fixed
: return true;
1648 case ARM::VLD1d64wb_fixed
: return true;
1649 case ARM::VLD1d64TPseudoWB_fixed
: return true;
1650 case ARM::VLD1d64QPseudoWB_fixed
: return true;
1651 case ARM::VLD1q8wb_fixed
: return true;
1652 case ARM::VLD1q16wb_fixed
: return true;
1653 case ARM::VLD1q32wb_fixed
: return true;
1654 case ARM::VLD1q64wb_fixed
: return true;
1655 case ARM::VLD1DUPd8wb_fixed
: return true;
1656 case ARM::VLD1DUPd16wb_fixed
: return true;
1657 case ARM::VLD1DUPd32wb_fixed
: return true;
1658 case ARM::VLD1DUPq8wb_fixed
: return true;
1659 case ARM::VLD1DUPq16wb_fixed
: return true;
1660 case ARM::VLD1DUPq32wb_fixed
: return true;
1661 case ARM::VLD2d8wb_fixed
: return true;
1662 case ARM::VLD2d16wb_fixed
: return true;
1663 case ARM::VLD2d32wb_fixed
: return true;
1664 case ARM::VLD2q8PseudoWB_fixed
: return true;
1665 case ARM::VLD2q16PseudoWB_fixed
: return true;
1666 case ARM::VLD2q32PseudoWB_fixed
: return true;
1667 case ARM::VLD2DUPd8wb_fixed
: return true;
1668 case ARM::VLD2DUPd16wb_fixed
: return true;
1669 case ARM::VLD2DUPd32wb_fixed
: return true;
1673 static bool isVSTfixed(unsigned Opc
)
1676 default: return false;
1677 case ARM::VST1d8wb_fixed
: return true;
1678 case ARM::VST1d16wb_fixed
: return true;
1679 case ARM::VST1d32wb_fixed
: return true;
1680 case ARM::VST1d64wb_fixed
: return true;
1681 case ARM::VST1q8wb_fixed
: return true;
1682 case ARM::VST1q16wb_fixed
: return true;
1683 case ARM::VST1q32wb_fixed
: return true;
1684 case ARM::VST1q64wb_fixed
: return true;
1685 case ARM::VST1d64TPseudoWB_fixed
: return true;
1686 case ARM::VST1d64QPseudoWB_fixed
: return true;
1687 case ARM::VST2d8wb_fixed
: return true;
1688 case ARM::VST2d16wb_fixed
: return true;
1689 case ARM::VST2d32wb_fixed
: return true;
1690 case ARM::VST2q8PseudoWB_fixed
: return true;
1691 case ARM::VST2q16PseudoWB_fixed
: return true;
1692 case ARM::VST2q32PseudoWB_fixed
: return true;
1696 // Get the register stride update opcode of a VLD/VST instruction that
1697 // is otherwise equivalent to the given fixed stride updating instruction.
1698 static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc
) {
1699 assert((isVLDfixed(Opc
) || isVSTfixed(Opc
))
1700 && "Incorrect fixed stride updating instruction.");
1703 case ARM::VLD1d8wb_fixed
: return ARM::VLD1d8wb_register
;
1704 case ARM::VLD1d16wb_fixed
: return ARM::VLD1d16wb_register
;
1705 case ARM::VLD1d32wb_fixed
: return ARM::VLD1d32wb_register
;
1706 case ARM::VLD1d64wb_fixed
: return ARM::VLD1d64wb_register
;
1707 case ARM::VLD1q8wb_fixed
: return ARM::VLD1q8wb_register
;
1708 case ARM::VLD1q16wb_fixed
: return ARM::VLD1q16wb_register
;
1709 case ARM::VLD1q32wb_fixed
: return ARM::VLD1q32wb_register
;
1710 case ARM::VLD1q64wb_fixed
: return ARM::VLD1q64wb_register
;
1711 case ARM::VLD1d64Twb_fixed
: return ARM::VLD1d64Twb_register
;
1712 case ARM::VLD1d64Qwb_fixed
: return ARM::VLD1d64Qwb_register
;
1713 case ARM::VLD1d64TPseudoWB_fixed
: return ARM::VLD1d64TPseudoWB_register
;
1714 case ARM::VLD1d64QPseudoWB_fixed
: return ARM::VLD1d64QPseudoWB_register
;
1715 case ARM::VLD1DUPd8wb_fixed
: return ARM::VLD1DUPd8wb_register
;
1716 case ARM::VLD1DUPd16wb_fixed
: return ARM::VLD1DUPd16wb_register
;
1717 case ARM::VLD1DUPd32wb_fixed
: return ARM::VLD1DUPd32wb_register
;
1718 case ARM::VLD1DUPq8wb_fixed
: return ARM::VLD1DUPq8wb_register
;
1719 case ARM::VLD1DUPq16wb_fixed
: return ARM::VLD1DUPq16wb_register
;
1720 case ARM::VLD1DUPq32wb_fixed
: return ARM::VLD1DUPq32wb_register
;
1722 case ARM::VST1d8wb_fixed
: return ARM::VST1d8wb_register
;
1723 case ARM::VST1d16wb_fixed
: return ARM::VST1d16wb_register
;
1724 case ARM::VST1d32wb_fixed
: return ARM::VST1d32wb_register
;
1725 case ARM::VST1d64wb_fixed
: return ARM::VST1d64wb_register
;
1726 case ARM::VST1q8wb_fixed
: return ARM::VST1q8wb_register
;
1727 case ARM::VST1q16wb_fixed
: return ARM::VST1q16wb_register
;
1728 case ARM::VST1q32wb_fixed
: return ARM::VST1q32wb_register
;
1729 case ARM::VST1q64wb_fixed
: return ARM::VST1q64wb_register
;
1730 case ARM::VST1d64TPseudoWB_fixed
: return ARM::VST1d64TPseudoWB_register
;
1731 case ARM::VST1d64QPseudoWB_fixed
: return ARM::VST1d64QPseudoWB_register
;
1733 case ARM::VLD2d8wb_fixed
: return ARM::VLD2d8wb_register
;
1734 case ARM::VLD2d16wb_fixed
: return ARM::VLD2d16wb_register
;
1735 case ARM::VLD2d32wb_fixed
: return ARM::VLD2d32wb_register
;
1736 case ARM::VLD2q8PseudoWB_fixed
: return ARM::VLD2q8PseudoWB_register
;
1737 case ARM::VLD2q16PseudoWB_fixed
: return ARM::VLD2q16PseudoWB_register
;
1738 case ARM::VLD2q32PseudoWB_fixed
: return ARM::VLD2q32PseudoWB_register
;
1740 case ARM::VST2d8wb_fixed
: return ARM::VST2d8wb_register
;
1741 case ARM::VST2d16wb_fixed
: return ARM::VST2d16wb_register
;
1742 case ARM::VST2d32wb_fixed
: return ARM::VST2d32wb_register
;
1743 case ARM::VST2q8PseudoWB_fixed
: return ARM::VST2q8PseudoWB_register
;
1744 case ARM::VST2q16PseudoWB_fixed
: return ARM::VST2q16PseudoWB_register
;
1745 case ARM::VST2q32PseudoWB_fixed
: return ARM::VST2q32PseudoWB_register
;
1747 case ARM::VLD2DUPd8wb_fixed
: return ARM::VLD2DUPd8wb_register
;
1748 case ARM::VLD2DUPd16wb_fixed
: return ARM::VLD2DUPd16wb_register
;
1749 case ARM::VLD2DUPd32wb_fixed
: return ARM::VLD2DUPd32wb_register
;
1751 return Opc
; // If not one we handle, return it unchanged.
1754 /// Returns true if the given increment is a Constant known to be equal to the
1755 /// access size performed by a NEON load/store. This means the "[rN]!" form can
1757 static bool isPerfectIncrement(SDValue Inc
, EVT VecTy
, unsigned NumVecs
) {
1758 auto C
= dyn_cast
<ConstantSDNode
>(Inc
);
1759 return C
&& C
->getZExtValue() == VecTy
.getSizeInBits() / 8 * NumVecs
;
1762 void ARMDAGToDAGISel::SelectVLD(SDNode
*N
, bool isUpdating
, unsigned NumVecs
,
1763 const uint16_t *DOpcodes
,
1764 const uint16_t *QOpcodes0
,
1765 const uint16_t *QOpcodes1
) {
1766 assert(NumVecs
>= 1 && NumVecs
<= 4 && "VLD NumVecs out-of-range");
1769 SDValue MemAddr
, Align
;
1770 bool IsIntrinsic
= !isUpdating
; // By coincidence, all supported updating
1771 // nodes are not intrinsics.
1772 unsigned AddrOpIdx
= IsIntrinsic
? 2 : 1;
1773 if (!SelectAddrMode6(N
, N
->getOperand(AddrOpIdx
), MemAddr
, Align
))
1776 SDValue Chain
= N
->getOperand(0);
1777 EVT VT
= N
->getValueType(0);
1778 bool is64BitVector
= VT
.is64BitVector();
1779 Align
= GetVLDSTAlign(Align
, dl
, NumVecs
, is64BitVector
);
1781 unsigned OpcodeIndex
;
1782 switch (VT
.getSimpleVT().SimpleTy
) {
1783 default: llvm_unreachable("unhandled vld type");
1784 // Double-register operations:
1785 case MVT::v8i8
: OpcodeIndex
= 0; break;
1787 case MVT::v4i16
: OpcodeIndex
= 1; break;
1789 case MVT::v2i32
: OpcodeIndex
= 2; break;
1790 case MVT::v1i64
: OpcodeIndex
= 3; break;
1791 // Quad-register operations:
1792 case MVT::v16i8
: OpcodeIndex
= 0; break;
1794 case MVT::v8i16
: OpcodeIndex
= 1; break;
1796 case MVT::v4i32
: OpcodeIndex
= 2; break;
1798 case MVT::v2i64
: OpcodeIndex
= 3; break;
1805 unsigned ResTyElts
= (NumVecs
== 3) ? 4 : NumVecs
;
1808 ResTy
= EVT::getVectorVT(*CurDAG
->getContext(), MVT::i64
, ResTyElts
);
1810 std::vector
<EVT
> ResTys
;
1811 ResTys
.push_back(ResTy
);
1813 ResTys
.push_back(MVT::i32
);
1814 ResTys
.push_back(MVT::Other
);
1816 SDValue Pred
= getAL(CurDAG
, dl
);
1817 SDValue Reg0
= CurDAG
->getRegister(0, MVT::i32
);
1819 SmallVector
<SDValue
, 7> Ops
;
1821 // Double registers and VLD1/VLD2 quad registers are directly supported.
1822 if (is64BitVector
|| NumVecs
<= 2) {
1823 unsigned Opc
= (is64BitVector
? DOpcodes
[OpcodeIndex
] :
1824 QOpcodes0
[OpcodeIndex
]);
1825 Ops
.push_back(MemAddr
);
1826 Ops
.push_back(Align
);
1828 SDValue Inc
= N
->getOperand(AddrOpIdx
+ 1);
1829 bool IsImmUpdate
= isPerfectIncrement(Inc
, VT
, NumVecs
);
1831 // We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
1832 // check for the opcode rather than the number of vector elements.
1833 if (isVLDfixed(Opc
))
1834 Opc
= getVLDSTRegisterUpdateOpcode(Opc
);
1836 // VLD1/VLD2 fixed increment does not need Reg0 so only include it in
1837 // the operands if not such an opcode.
1838 } else if (!isVLDfixed(Opc
))
1839 Ops
.push_back(Reg0
);
1841 Ops
.push_back(Pred
);
1842 Ops
.push_back(Reg0
);
1843 Ops
.push_back(Chain
);
1844 VLd
= CurDAG
->getMachineNode(Opc
, dl
, ResTys
, Ops
);
1847 // Otherwise, quad registers are loaded with two separate instructions,
1848 // where one loads the even registers and the other loads the odd registers.
1849 EVT AddrTy
= MemAddr
.getValueType();
1851 // Load the even subregs. This is always an updating load, so that it
1852 // provides the address to the second load for the odd subregs.
1854 SDValue(CurDAG
->getMachineNode(TargetOpcode::IMPLICIT_DEF
, dl
, ResTy
), 0);
1855 const SDValue OpsA
[] = { MemAddr
, Align
, Reg0
, ImplDef
, Pred
, Reg0
, Chain
};
1856 SDNode
*VLdA
= CurDAG
->getMachineNode(QOpcodes0
[OpcodeIndex
], dl
,
1857 ResTy
, AddrTy
, MVT::Other
, OpsA
);
1858 Chain
= SDValue(VLdA
, 2);
1860 // Load the odd subregs.
1861 Ops
.push_back(SDValue(VLdA
, 1));
1862 Ops
.push_back(Align
);
1864 SDValue Inc
= N
->getOperand(AddrOpIdx
+ 1);
1865 assert(isa
<ConstantSDNode
>(Inc
.getNode()) &&
1866 "only constant post-increment update allowed for VLD3/4");
1868 Ops
.push_back(Reg0
);
1870 Ops
.push_back(SDValue(VLdA
, 0));
1871 Ops
.push_back(Pred
);
1872 Ops
.push_back(Reg0
);
1873 Ops
.push_back(Chain
);
1874 VLd
= CurDAG
->getMachineNode(QOpcodes1
[OpcodeIndex
], dl
, ResTys
, Ops
);
1877 // Transfer memoperands.
1878 MachineMemOperand
*MemOp
= cast
<MemIntrinsicSDNode
>(N
)->getMemOperand();
1879 CurDAG
->setNodeMemRefs(cast
<MachineSDNode
>(VLd
), {MemOp
});
1882 ReplaceNode(N
, VLd
);
1886 // Extract out the subregisters.
1887 SDValue SuperReg
= SDValue(VLd
, 0);
1888 static_assert(ARM::dsub_7
== ARM::dsub_0
+ 7 &&
1889 ARM::qsub_3
== ARM::qsub_0
+ 3,
1890 "Unexpected subreg numbering");
1891 unsigned Sub0
= (is64BitVector
? ARM::dsub_0
: ARM::qsub_0
);
1892 for (unsigned Vec
= 0; Vec
< NumVecs
; ++Vec
)
1893 ReplaceUses(SDValue(N
, Vec
),
1894 CurDAG
->getTargetExtractSubreg(Sub0
+ Vec
, dl
, VT
, SuperReg
));
1895 ReplaceUses(SDValue(N
, NumVecs
), SDValue(VLd
, 1));
1897 ReplaceUses(SDValue(N
, NumVecs
+ 1), SDValue(VLd
, 2));
1898 CurDAG
->RemoveDeadNode(N
);
1901 void ARMDAGToDAGISel::SelectVST(SDNode
*N
, bool isUpdating
, unsigned NumVecs
,
1902 const uint16_t *DOpcodes
,
1903 const uint16_t *QOpcodes0
,
1904 const uint16_t *QOpcodes1
) {
1905 assert(NumVecs
>= 1 && NumVecs
<= 4 && "VST NumVecs out-of-range");
1908 SDValue MemAddr
, Align
;
1909 bool IsIntrinsic
= !isUpdating
; // By coincidence, all supported updating
1910 // nodes are not intrinsics.
1911 unsigned AddrOpIdx
= IsIntrinsic
? 2 : 1;
1912 unsigned Vec0Idx
= 3; // AddrOpIdx + (isUpdating ? 2 : 1)
1913 if (!SelectAddrMode6(N
, N
->getOperand(AddrOpIdx
), MemAddr
, Align
))
1916 MachineMemOperand
*MemOp
= cast
<MemIntrinsicSDNode
>(N
)->getMemOperand();
1918 SDValue Chain
= N
->getOperand(0);
1919 EVT VT
= N
->getOperand(Vec0Idx
).getValueType();
1920 bool is64BitVector
= VT
.is64BitVector();
1921 Align
= GetVLDSTAlign(Align
, dl
, NumVecs
, is64BitVector
);
1923 unsigned OpcodeIndex
;
1924 switch (VT
.getSimpleVT().SimpleTy
) {
1925 default: llvm_unreachable("unhandled vst type");
1926 // Double-register operations:
1927 case MVT::v8i8
: OpcodeIndex
= 0; break;
1929 case MVT::v4i16
: OpcodeIndex
= 1; break;
1931 case MVT::v2i32
: OpcodeIndex
= 2; break;
1932 case MVT::v1i64
: OpcodeIndex
= 3; break;
1933 // Quad-register operations:
1934 case MVT::v16i8
: OpcodeIndex
= 0; break;
1936 case MVT::v8i16
: OpcodeIndex
= 1; break;
1938 case MVT::v4i32
: OpcodeIndex
= 2; break;
1940 case MVT::v2i64
: OpcodeIndex
= 3; break;
1943 std::vector
<EVT
> ResTys
;
1945 ResTys
.push_back(MVT::i32
);
1946 ResTys
.push_back(MVT::Other
);
1948 SDValue Pred
= getAL(CurDAG
, dl
);
1949 SDValue Reg0
= CurDAG
->getRegister(0, MVT::i32
);
1950 SmallVector
<SDValue
, 7> Ops
;
1952 // Double registers and VST1/VST2 quad registers are directly supported.
1953 if (is64BitVector
|| NumVecs
<= 2) {
1956 SrcReg
= N
->getOperand(Vec0Idx
);
1957 } else if (is64BitVector
) {
1958 // Form a REG_SEQUENCE to force register allocation.
1959 SDValue V0
= N
->getOperand(Vec0Idx
+ 0);
1960 SDValue V1
= N
->getOperand(Vec0Idx
+ 1);
1962 SrcReg
= SDValue(createDRegPairNode(MVT::v2i64
, V0
, V1
), 0);
1964 SDValue V2
= N
->getOperand(Vec0Idx
+ 2);
1965 // If it's a vst3, form a quad D-register and leave the last part as
1967 SDValue V3
= (NumVecs
== 3)
1968 ? SDValue(CurDAG
->getMachineNode(TargetOpcode::IMPLICIT_DEF
,dl
,VT
), 0)
1969 : N
->getOperand(Vec0Idx
+ 3);
1970 SrcReg
= SDValue(createQuadDRegsNode(MVT::v4i64
, V0
, V1
, V2
, V3
), 0);
1973 // Form a QQ register.
1974 SDValue Q0
= N
->getOperand(Vec0Idx
);
1975 SDValue Q1
= N
->getOperand(Vec0Idx
+ 1);
1976 SrcReg
= SDValue(createQRegPairNode(MVT::v4i64
, Q0
, Q1
), 0);
1979 unsigned Opc
= (is64BitVector
? DOpcodes
[OpcodeIndex
] :
1980 QOpcodes0
[OpcodeIndex
]);
1981 Ops
.push_back(MemAddr
);
1982 Ops
.push_back(Align
);
1984 SDValue Inc
= N
->getOperand(AddrOpIdx
+ 1);
1985 bool IsImmUpdate
= isPerfectIncrement(Inc
, VT
, NumVecs
);
1987 // We use a VST1 for v1i64 even if the pseudo says VST2/3/4, so
1988 // check for the opcode rather than the number of vector elements.
1989 if (isVSTfixed(Opc
))
1990 Opc
= getVLDSTRegisterUpdateOpcode(Opc
);
1993 // VST1/VST2 fixed increment does not need Reg0 so only include it in
1994 // the operands if not such an opcode.
1995 else if (!isVSTfixed(Opc
))
1996 Ops
.push_back(Reg0
);
1998 Ops
.push_back(SrcReg
);
1999 Ops
.push_back(Pred
);
2000 Ops
.push_back(Reg0
);
2001 Ops
.push_back(Chain
);
2002 SDNode
*VSt
= CurDAG
->getMachineNode(Opc
, dl
, ResTys
, Ops
);
2004 // Transfer memoperands.
2005 CurDAG
->setNodeMemRefs(cast
<MachineSDNode
>(VSt
), {MemOp
});
2007 ReplaceNode(N
, VSt
);
2011 // Otherwise, quad registers are stored with two separate instructions,
2012 // where one stores the even registers and the other stores the odd registers.
2014 // Form the QQQQ REG_SEQUENCE.
2015 SDValue V0
= N
->getOperand(Vec0Idx
+ 0);
2016 SDValue V1
= N
->getOperand(Vec0Idx
+ 1);
2017 SDValue V2
= N
->getOperand(Vec0Idx
+ 2);
2018 SDValue V3
= (NumVecs
== 3)
2019 ? SDValue(CurDAG
->getMachineNode(TargetOpcode::IMPLICIT_DEF
, dl
, VT
), 0)
2020 : N
->getOperand(Vec0Idx
+ 3);
2021 SDValue RegSeq
= SDValue(createQuadQRegsNode(MVT::v8i64
, V0
, V1
, V2
, V3
), 0);
2023 // Store the even D registers. This is always an updating store, so that it
2024 // provides the address to the second store for the odd subregs.
2025 const SDValue OpsA
[] = { MemAddr
, Align
, Reg0
, RegSeq
, Pred
, Reg0
, Chain
};
2026 SDNode
*VStA
= CurDAG
->getMachineNode(QOpcodes0
[OpcodeIndex
], dl
,
2027 MemAddr
.getValueType(),
2029 CurDAG
->setNodeMemRefs(cast
<MachineSDNode
>(VStA
), {MemOp
});
2030 Chain
= SDValue(VStA
, 1);
2032 // Store the odd D registers.
2033 Ops
.push_back(SDValue(VStA
, 0));
2034 Ops
.push_back(Align
);
2036 SDValue Inc
= N
->getOperand(AddrOpIdx
+ 1);
2037 assert(isa
<ConstantSDNode
>(Inc
.getNode()) &&
2038 "only constant post-increment update allowed for VST3/4");
2040 Ops
.push_back(Reg0
);
2042 Ops
.push_back(RegSeq
);
2043 Ops
.push_back(Pred
);
2044 Ops
.push_back(Reg0
);
2045 Ops
.push_back(Chain
);
2046 SDNode
*VStB
= CurDAG
->getMachineNode(QOpcodes1
[OpcodeIndex
], dl
, ResTys
,
2048 CurDAG
->setNodeMemRefs(cast
<MachineSDNode
>(VStB
), {MemOp
});
2049 ReplaceNode(N
, VStB
);
2052 void ARMDAGToDAGISel::SelectVLDSTLane(SDNode
*N
, bool IsLoad
, bool isUpdating
,
2054 const uint16_t *DOpcodes
,
2055 const uint16_t *QOpcodes
) {
2056 assert(NumVecs
>=2 && NumVecs
<= 4 && "VLDSTLane NumVecs out-of-range");
2059 SDValue MemAddr
, Align
;
2060 bool IsIntrinsic
= !isUpdating
; // By coincidence, all supported updating
2061 // nodes are not intrinsics.
2062 unsigned AddrOpIdx
= IsIntrinsic
? 2 : 1;
2063 unsigned Vec0Idx
= 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2064 if (!SelectAddrMode6(N
, N
->getOperand(AddrOpIdx
), MemAddr
, Align
))
2067 MachineMemOperand
*MemOp
= cast
<MemIntrinsicSDNode
>(N
)->getMemOperand();
2069 SDValue Chain
= N
->getOperand(0);
2071 cast
<ConstantSDNode
>(N
->getOperand(Vec0Idx
+ NumVecs
))->getZExtValue();
2072 EVT VT
= N
->getOperand(Vec0Idx
).getValueType();
2073 bool is64BitVector
= VT
.is64BitVector();
2075 unsigned Alignment
= 0;
2077 Alignment
= cast
<ConstantSDNode
>(Align
)->getZExtValue();
2078 unsigned NumBytes
= NumVecs
* VT
.getScalarSizeInBits() / 8;
2079 if (Alignment
> NumBytes
)
2080 Alignment
= NumBytes
;
2081 if (Alignment
< 8 && Alignment
< NumBytes
)
2083 // Alignment must be a power of two; make sure of that.
2084 Alignment
= (Alignment
& -Alignment
);
2088 Align
= CurDAG
->getTargetConstant(Alignment
, dl
, MVT::i32
);
2090 unsigned OpcodeIndex
;
2091 switch (VT
.getSimpleVT().SimpleTy
) {
2092 default: llvm_unreachable("unhandled vld/vst lane type");
2093 // Double-register operations:
2094 case MVT::v8i8
: OpcodeIndex
= 0; break;
2096 case MVT::v4i16
: OpcodeIndex
= 1; break;
2098 case MVT::v2i32
: OpcodeIndex
= 2; break;
2099 // Quad-register operations:
2101 case MVT::v8i16
: OpcodeIndex
= 0; break;
2103 case MVT::v4i32
: OpcodeIndex
= 1; break;
2106 std::vector
<EVT
> ResTys
;
2108 unsigned ResTyElts
= (NumVecs
== 3) ? 4 : NumVecs
;
2111 ResTys
.push_back(EVT::getVectorVT(*CurDAG
->getContext(),
2112 MVT::i64
, ResTyElts
));
2115 ResTys
.push_back(MVT::i32
);
2116 ResTys
.push_back(MVT::Other
);
2118 SDValue Pred
= getAL(CurDAG
, dl
);
2119 SDValue Reg0
= CurDAG
->getRegister(0, MVT::i32
);
2121 SmallVector
<SDValue
, 8> Ops
;
2122 Ops
.push_back(MemAddr
);
2123 Ops
.push_back(Align
);
2125 SDValue Inc
= N
->getOperand(AddrOpIdx
+ 1);
2127 isPerfectIncrement(Inc
, VT
.getVectorElementType(), NumVecs
);
2128 Ops
.push_back(IsImmUpdate
? Reg0
: Inc
);
2132 SDValue V0
= N
->getOperand(Vec0Idx
+ 0);
2133 SDValue V1
= N
->getOperand(Vec0Idx
+ 1);
2136 SuperReg
= SDValue(createDRegPairNode(MVT::v2i64
, V0
, V1
), 0);
2138 SuperReg
= SDValue(createQRegPairNode(MVT::v4i64
, V0
, V1
), 0);
2140 SDValue V2
= N
->getOperand(Vec0Idx
+ 2);
2141 SDValue V3
= (NumVecs
== 3)
2142 ? SDValue(CurDAG
->getMachineNode(TargetOpcode::IMPLICIT_DEF
, dl
, VT
), 0)
2143 : N
->getOperand(Vec0Idx
+ 3);
2145 SuperReg
= SDValue(createQuadDRegsNode(MVT::v4i64
, V0
, V1
, V2
, V3
), 0);
2147 SuperReg
= SDValue(createQuadQRegsNode(MVT::v8i64
, V0
, V1
, V2
, V3
), 0);
2149 Ops
.push_back(SuperReg
);
2150 Ops
.push_back(getI32Imm(Lane
, dl
));
2151 Ops
.push_back(Pred
);
2152 Ops
.push_back(Reg0
);
2153 Ops
.push_back(Chain
);
2155 unsigned Opc
= (is64BitVector
? DOpcodes
[OpcodeIndex
] :
2156 QOpcodes
[OpcodeIndex
]);
2157 SDNode
*VLdLn
= CurDAG
->getMachineNode(Opc
, dl
, ResTys
, Ops
);
2158 CurDAG
->setNodeMemRefs(cast
<MachineSDNode
>(VLdLn
), {MemOp
});
2160 ReplaceNode(N
, VLdLn
);
2164 // Extract the subregisters.
2165 SuperReg
= SDValue(VLdLn
, 0);
2166 static_assert(ARM::dsub_7
== ARM::dsub_0
+ 7 &&
2167 ARM::qsub_3
== ARM::qsub_0
+ 3,
2168 "Unexpected subreg numbering");
2169 unsigned Sub0
= is64BitVector
? ARM::dsub_0
: ARM::qsub_0
;
2170 for (unsigned Vec
= 0; Vec
< NumVecs
; ++Vec
)
2171 ReplaceUses(SDValue(N
, Vec
),
2172 CurDAG
->getTargetExtractSubreg(Sub0
+ Vec
, dl
, VT
, SuperReg
));
2173 ReplaceUses(SDValue(N
, NumVecs
), SDValue(VLdLn
, 1));
2175 ReplaceUses(SDValue(N
, NumVecs
+ 1), SDValue(VLdLn
, 2));
2176 CurDAG
->RemoveDeadNode(N
);
2179 void ARMDAGToDAGISel::SelectVLDDup(SDNode
*N
, bool IsIntrinsic
,
2180 bool isUpdating
, unsigned NumVecs
,
2181 const uint16_t *DOpcodes
,
2182 const uint16_t *QOpcodes0
,
2183 const uint16_t *QOpcodes1
) {
2184 assert(NumVecs
>= 1 && NumVecs
<= 4 && "VLDDup NumVecs out-of-range");
2187 SDValue MemAddr
, Align
;
2188 unsigned AddrOpIdx
= IsIntrinsic
? 2 : 1;
2189 if (!SelectAddrMode6(N
, N
->getOperand(AddrOpIdx
), MemAddr
, Align
))
2192 SDValue Chain
= N
->getOperand(0);
2193 EVT VT
= N
->getValueType(0);
2194 bool is64BitVector
= VT
.is64BitVector();
2196 unsigned Alignment
= 0;
2198 Alignment
= cast
<ConstantSDNode
>(Align
)->getZExtValue();
2199 unsigned NumBytes
= NumVecs
* VT
.getScalarSizeInBits() / 8;
2200 if (Alignment
> NumBytes
)
2201 Alignment
= NumBytes
;
2202 if (Alignment
< 8 && Alignment
< NumBytes
)
2204 // Alignment must be a power of two; make sure of that.
2205 Alignment
= (Alignment
& -Alignment
);
2209 Align
= CurDAG
->getTargetConstant(Alignment
, dl
, MVT::i32
);
2211 unsigned OpcodeIndex
;
2212 switch (VT
.getSimpleVT().SimpleTy
) {
2213 default: llvm_unreachable("unhandled vld-dup type");
2215 case MVT::v16i8
: OpcodeIndex
= 0; break;
2220 OpcodeIndex
= 1; break;
2224 case MVT::v4i32
: OpcodeIndex
= 2; break;
2226 case MVT::v1i64
: OpcodeIndex
= 3; break;
2229 unsigned ResTyElts
= (NumVecs
== 3) ? 4 : NumVecs
;
2232 EVT ResTy
= EVT::getVectorVT(*CurDAG
->getContext(), MVT::i64
, ResTyElts
);
2234 std::vector
<EVT
> ResTys
;
2235 ResTys
.push_back(ResTy
);
2237 ResTys
.push_back(MVT::i32
);
2238 ResTys
.push_back(MVT::Other
);
2240 SDValue Pred
= getAL(CurDAG
, dl
);
2241 SDValue Reg0
= CurDAG
->getRegister(0, MVT::i32
);
2244 if (is64BitVector
|| NumVecs
== 1) {
2245 SmallVector
<SDValue
, 6> Ops
;
2246 Ops
.push_back(MemAddr
);
2247 Ops
.push_back(Align
);
2248 unsigned Opc
= is64BitVector
? DOpcodes
[OpcodeIndex
] :
2249 QOpcodes0
[OpcodeIndex
];
2251 // fixed-stride update instructions don't have an explicit writeback
2252 // operand. It's implicit in the opcode itself.
2253 SDValue Inc
= N
->getOperand(2);
2255 isPerfectIncrement(Inc
, VT
.getVectorElementType(), NumVecs
);
2256 if (NumVecs
<= 2 && !IsImmUpdate
)
2257 Opc
= getVLDSTRegisterUpdateOpcode(Opc
);
2260 // FIXME: VLD3 and VLD4 haven't been updated to that form yet.
2261 else if (NumVecs
> 2)
2262 Ops
.push_back(Reg0
);
2264 Ops
.push_back(Pred
);
2265 Ops
.push_back(Reg0
);
2266 Ops
.push_back(Chain
);
2267 VLdDup
= CurDAG
->getMachineNode(Opc
, dl
, ResTys
, Ops
);
2268 } else if (NumVecs
== 2) {
2269 const SDValue OpsA
[] = { MemAddr
, Align
, Pred
, Reg0
, Chain
};
2270 SDNode
*VLdA
= CurDAG
->getMachineNode(QOpcodes0
[OpcodeIndex
],
2273 Chain
= SDValue(VLdA
, 1);
2274 const SDValue OpsB
[] = { MemAddr
, Align
, Pred
, Reg0
, Chain
};
2275 VLdDup
= CurDAG
->getMachineNode(QOpcodes1
[OpcodeIndex
], dl
, ResTys
, OpsB
);
2278 SDValue(CurDAG
->getMachineNode(TargetOpcode::IMPLICIT_DEF
, dl
, ResTy
), 0);
2279 const SDValue OpsA
[] = { MemAddr
, Align
, ImplDef
, Pred
, Reg0
, Chain
};
2280 SDNode
*VLdA
= CurDAG
->getMachineNode(QOpcodes0
[OpcodeIndex
],
2283 SDValue SuperReg
= SDValue(VLdA
, 0);
2284 Chain
= SDValue(VLdA
, 1);
2285 const SDValue OpsB
[] = { MemAddr
, Align
, SuperReg
, Pred
, Reg0
, Chain
};
2286 VLdDup
= CurDAG
->getMachineNode(QOpcodes1
[OpcodeIndex
], dl
, ResTys
, OpsB
);
2289 // Transfer memoperands.
2290 MachineMemOperand
*MemOp
= cast
<MemIntrinsicSDNode
>(N
)->getMemOperand();
2291 CurDAG
->setNodeMemRefs(cast
<MachineSDNode
>(VLdDup
), {MemOp
});
2293 // Extract the subregisters.
2295 ReplaceUses(SDValue(N
, 0), SDValue(VLdDup
, 0));
2297 SDValue SuperReg
= SDValue(VLdDup
, 0);
2298 static_assert(ARM::dsub_7
== ARM::dsub_0
+ 7, "Unexpected subreg numbering");
2299 unsigned SubIdx
= is64BitVector
? ARM::dsub_0
: ARM::qsub_0
;
2300 for (unsigned Vec
= 0; Vec
!= NumVecs
; ++Vec
) {
2301 ReplaceUses(SDValue(N
, Vec
),
2302 CurDAG
->getTargetExtractSubreg(SubIdx
+Vec
, dl
, VT
, SuperReg
));
2305 ReplaceUses(SDValue(N
, NumVecs
), SDValue(VLdDup
, 1));
2307 ReplaceUses(SDValue(N
, NumVecs
+ 1), SDValue(VLdDup
, 2));
2308 CurDAG
->RemoveDeadNode(N
);
2311 bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode
*N
, bool isSigned
) {
2312 if (!Subtarget
->hasV6T2Ops())
2315 unsigned Opc
= isSigned
2316 ? (Subtarget
->isThumb() ? ARM::t2SBFX
: ARM::SBFX
)
2317 : (Subtarget
->isThumb() ? ARM::t2UBFX
: ARM::UBFX
);
2320 // For unsigned extracts, check for a shift right and mask
2321 unsigned And_imm
= 0;
2322 if (N
->getOpcode() == ISD::AND
) {
2323 if (isOpcWithIntImmediate(N
, ISD::AND
, And_imm
)) {
2325 // The immediate is a mask of the low bits iff imm & (imm+1) == 0
2326 if (And_imm
& (And_imm
+ 1))
2329 unsigned Srl_imm
= 0;
2330 if (isOpcWithIntImmediate(N
->getOperand(0).getNode(), ISD::SRL
,
2332 assert(Srl_imm
> 0 && Srl_imm
< 32 && "bad amount in shift node!");
2334 // Mask off the unnecessary bits of the AND immediate; normally
2335 // DAGCombine will do this, but that might not happen if
2336 // targetShrinkDemandedConstant chooses a different immediate.
2337 And_imm
&= -1U >> Srl_imm
;
2339 // Note: The width operand is encoded as width-1.
2340 unsigned Width
= countTrailingOnes(And_imm
) - 1;
2341 unsigned LSB
= Srl_imm
;
2343 SDValue Reg0
= CurDAG
->getRegister(0, MVT::i32
);
2345 if ((LSB
+ Width
+ 1) == N
->getValueType(0).getSizeInBits()) {
2346 // It's cheaper to use a right shift to extract the top bits.
2347 if (Subtarget
->isThumb()) {
2348 Opc
= isSigned
? ARM::t2ASRri
: ARM::t2LSRri
;
2349 SDValue Ops
[] = { N
->getOperand(0).getOperand(0),
2350 CurDAG
->getTargetConstant(LSB
, dl
, MVT::i32
),
2351 getAL(CurDAG
, dl
), Reg0
, Reg0
};
2352 CurDAG
->SelectNodeTo(N
, Opc
, MVT::i32
, Ops
);
2356 // ARM models shift instructions as MOVsi with shifter operand.
2357 ARM_AM::ShiftOpc ShOpcVal
= ARM_AM::getShiftOpcForNode(ISD::SRL
);
2359 CurDAG
->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal
, LSB
), dl
,
2361 SDValue Ops
[] = { N
->getOperand(0).getOperand(0), ShOpc
,
2362 getAL(CurDAG
, dl
), Reg0
, Reg0
};
2363 CurDAG
->SelectNodeTo(N
, ARM::MOVsi
, MVT::i32
, Ops
);
2367 assert(LSB
+ Width
+ 1 <= 32 && "Shouldn't create an invalid ubfx");
2368 SDValue Ops
[] = { N
->getOperand(0).getOperand(0),
2369 CurDAG
->getTargetConstant(LSB
, dl
, MVT::i32
),
2370 CurDAG
->getTargetConstant(Width
, dl
, MVT::i32
),
2371 getAL(CurDAG
, dl
), Reg0
};
2372 CurDAG
->SelectNodeTo(N
, Opc
, MVT::i32
, Ops
);
2379 // Otherwise, we're looking for a shift of a shift
2380 unsigned Shl_imm
= 0;
2381 if (isOpcWithIntImmediate(N
->getOperand(0).getNode(), ISD::SHL
, Shl_imm
)) {
2382 assert(Shl_imm
> 0 && Shl_imm
< 32 && "bad amount in shift node!");
2383 unsigned Srl_imm
= 0;
2384 if (isInt32Immediate(N
->getOperand(1), Srl_imm
)) {
2385 assert(Srl_imm
> 0 && Srl_imm
< 32 && "bad amount in shift node!");
2386 // Note: The width operand is encoded as width-1.
2387 unsigned Width
= 32 - Srl_imm
- 1;
2388 int LSB
= Srl_imm
- Shl_imm
;
2391 SDValue Reg0
= CurDAG
->getRegister(0, MVT::i32
);
2392 assert(LSB
+ Width
+ 1 <= 32 && "Shouldn't create an invalid ubfx");
2393 SDValue Ops
[] = { N
->getOperand(0).getOperand(0),
2394 CurDAG
->getTargetConstant(LSB
, dl
, MVT::i32
),
2395 CurDAG
->getTargetConstant(Width
, dl
, MVT::i32
),
2396 getAL(CurDAG
, dl
), Reg0
};
2397 CurDAG
->SelectNodeTo(N
, Opc
, MVT::i32
, Ops
);
2402 // Or we are looking for a shift of an and, with a mask operand
2403 if (isOpcWithIntImmediate(N
->getOperand(0).getNode(), ISD::AND
, And_imm
) &&
2404 isShiftedMask_32(And_imm
)) {
2405 unsigned Srl_imm
= 0;
2406 unsigned LSB
= countTrailingZeros(And_imm
);
2407 // Shift must be the same as the ands lsb
2408 if (isInt32Immediate(N
->getOperand(1), Srl_imm
) && Srl_imm
== LSB
) {
2409 assert(Srl_imm
> 0 && Srl_imm
< 32 && "bad amount in shift node!");
2410 unsigned MSB
= 31 - countLeadingZeros(And_imm
);
2411 // Note: The width operand is encoded as width-1.
2412 unsigned Width
= MSB
- LSB
;
2413 SDValue Reg0
= CurDAG
->getRegister(0, MVT::i32
);
2414 assert(Srl_imm
+ Width
+ 1 <= 32 && "Shouldn't create an invalid ubfx");
2415 SDValue Ops
[] = { N
->getOperand(0).getOperand(0),
2416 CurDAG
->getTargetConstant(Srl_imm
, dl
, MVT::i32
),
2417 CurDAG
->getTargetConstant(Width
, dl
, MVT::i32
),
2418 getAL(CurDAG
, dl
), Reg0
};
2419 CurDAG
->SelectNodeTo(N
, Opc
, MVT::i32
, Ops
);
2424 if (N
->getOpcode() == ISD::SIGN_EXTEND_INREG
) {
2425 unsigned Width
= cast
<VTSDNode
>(N
->getOperand(1))->getVT().getSizeInBits();
2427 if (!isOpcWithIntImmediate(N
->getOperand(0).getNode(), ISD::SRL
, LSB
) &&
2428 !isOpcWithIntImmediate(N
->getOperand(0).getNode(), ISD::SRA
, LSB
))
2431 if (LSB
+ Width
> 32)
2434 SDValue Reg0
= CurDAG
->getRegister(0, MVT::i32
);
2435 assert(LSB
+ Width
<= 32 && "Shouldn't create an invalid ubfx");
2436 SDValue Ops
[] = { N
->getOperand(0).getOperand(0),
2437 CurDAG
->getTargetConstant(LSB
, dl
, MVT::i32
),
2438 CurDAG
->getTargetConstant(Width
- 1, dl
, MVT::i32
),
2439 getAL(CurDAG
, dl
), Reg0
};
2440 CurDAG
->SelectNodeTo(N
, Opc
, MVT::i32
, Ops
);
2447 /// Target-specific DAG combining for ISD::XOR.
2448 /// Target-independent combining lowers SELECT_CC nodes of the form
2449 /// select_cc setg[ge] X, 0, X, -X
2450 /// select_cc setgt X, -1, X, -X
2451 /// select_cc setl[te] X, 0, -X, X
2452 /// select_cc setlt X, 1, -X, X
2453 /// which represent Integer ABS into:
2454 /// Y = sra (X, size(X)-1); xor (add (X, Y), Y)
2455 /// ARM instruction selection detects the latter and matches it to
2456 /// ARM::ABS or ARM::t2ABS machine node.
2457 bool ARMDAGToDAGISel::tryABSOp(SDNode
*N
){
2458 SDValue XORSrc0
= N
->getOperand(0);
2459 SDValue XORSrc1
= N
->getOperand(1);
2460 EVT VT
= N
->getValueType(0);
2462 if (Subtarget
->isThumb1Only())
2465 if (XORSrc0
.getOpcode() != ISD::ADD
|| XORSrc1
.getOpcode() != ISD::SRA
)
2468 SDValue ADDSrc0
= XORSrc0
.getOperand(0);
2469 SDValue ADDSrc1
= XORSrc0
.getOperand(1);
2470 SDValue SRASrc0
= XORSrc1
.getOperand(0);
2471 SDValue SRASrc1
= XORSrc1
.getOperand(1);
2472 ConstantSDNode
*SRAConstant
= dyn_cast
<ConstantSDNode
>(SRASrc1
);
2473 EVT XType
= SRASrc0
.getValueType();
2474 unsigned Size
= XType
.getSizeInBits() - 1;
2476 if (ADDSrc1
== XORSrc1
&& ADDSrc0
== SRASrc0
&&
2477 XType
.isInteger() && SRAConstant
!= nullptr &&
2478 Size
== SRAConstant
->getZExtValue()) {
2479 unsigned Opcode
= Subtarget
->isThumb2() ? ARM::t2ABS
: ARM::ABS
;
2480 CurDAG
->SelectNodeTo(N
, Opcode
, VT
, ADDSrc0
);
2487 /// We've got special pseudo-instructions for these
2488 void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode
*N
) {
2490 EVT MemTy
= cast
<MemSDNode
>(N
)->getMemoryVT();
2491 if (MemTy
== MVT::i8
)
2492 Opcode
= ARM::CMP_SWAP_8
;
2493 else if (MemTy
== MVT::i16
)
2494 Opcode
= ARM::CMP_SWAP_16
;
2495 else if (MemTy
== MVT::i32
)
2496 Opcode
= ARM::CMP_SWAP_32
;
2498 llvm_unreachable("Unknown AtomicCmpSwap type");
2500 SDValue Ops
[] = {N
->getOperand(1), N
->getOperand(2), N
->getOperand(3),
2502 SDNode
*CmpSwap
= CurDAG
->getMachineNode(
2504 CurDAG
->getVTList(MVT::i32
, MVT::i32
, MVT::Other
), Ops
);
2506 MachineMemOperand
*MemOp
= cast
<MemSDNode
>(N
)->getMemOperand();
2507 CurDAG
->setNodeMemRefs(cast
<MachineSDNode
>(CmpSwap
), {MemOp
});
2509 ReplaceUses(SDValue(N
, 0), SDValue(CmpSwap
, 0));
2510 ReplaceUses(SDValue(N
, 1), SDValue(CmpSwap
, 2));
2511 CurDAG
->RemoveDeadNode(N
);
2514 static Optional
<std::pair
<unsigned, unsigned>>
2515 getContiguousRangeOfSetBits(const APInt
&A
) {
2516 unsigned FirstOne
= A
.getBitWidth() - A
.countLeadingZeros() - 1;
2517 unsigned LastOne
= A
.countTrailingZeros();
2518 if (A
.countPopulation() != (FirstOne
- LastOne
+ 1))
2519 return Optional
<std::pair
<unsigned,unsigned>>();
2520 return std::make_pair(FirstOne
, LastOne
);
2523 void ARMDAGToDAGISel::SelectCMPZ(SDNode
*N
, bool &SwitchEQNEToPLMI
) {
2524 assert(N
->getOpcode() == ARMISD::CMPZ
);
2525 SwitchEQNEToPLMI
= false;
2527 if (!Subtarget
->isThumb())
2528 // FIXME: Work out whether it is profitable to do this in A32 mode - LSL and
2529 // LSR don't exist as standalone instructions - they need the barrel shifter.
2532 // select (cmpz (and X, C), #0) -> (LSLS X) or (LSRS X) or (LSRS (LSLS X))
2533 SDValue And
= N
->getOperand(0);
2534 if (!And
->hasOneUse())
2537 SDValue Zero
= N
->getOperand(1);
2538 if (!isa
<ConstantSDNode
>(Zero
) || !cast
<ConstantSDNode
>(Zero
)->isNullValue() ||
2539 And
->getOpcode() != ISD::AND
)
2541 SDValue X
= And
.getOperand(0);
2542 auto C
= dyn_cast
<ConstantSDNode
>(And
.getOperand(1));
2546 auto Range
= getContiguousRangeOfSetBits(C
->getAPIntValue());
2550 // There are several ways to lower this:
2554 auto EmitShift
= [&](unsigned Opc
, SDValue Src
, unsigned Imm
) -> SDNode
* {
2555 if (Subtarget
->isThumb2()) {
2556 Opc
= (Opc
== ARM::tLSLri
) ? ARM::t2LSLri
: ARM::t2LSRri
;
2557 SDValue Ops
[] = { Src
, CurDAG
->getTargetConstant(Imm
, dl
, MVT::i32
),
2558 getAL(CurDAG
, dl
), CurDAG
->getRegister(0, MVT::i32
),
2559 CurDAG
->getRegister(0, MVT::i32
) };
2560 return CurDAG
->getMachineNode(Opc
, dl
, MVT::i32
, Ops
);
2562 SDValue Ops
[] = {CurDAG
->getRegister(ARM::CPSR
, MVT::i32
), Src
,
2563 CurDAG
->getTargetConstant(Imm
, dl
, MVT::i32
),
2564 getAL(CurDAG
, dl
), CurDAG
->getRegister(0, MVT::i32
)};
2565 return CurDAG
->getMachineNode(Opc
, dl
, MVT::i32
, Ops
);
2569 if (Range
->second
== 0) {
2570 // 1. Mask includes the LSB -> Simply shift the top N bits off
2571 NewN
= EmitShift(ARM::tLSLri
, X
, 31 - Range
->first
);
2572 ReplaceNode(And
.getNode(), NewN
);
2573 } else if (Range
->first
== 31) {
2574 // 2. Mask includes the MSB -> Simply shift the bottom N bits off
2575 NewN
= EmitShift(ARM::tLSRri
, X
, Range
->second
);
2576 ReplaceNode(And
.getNode(), NewN
);
2577 } else if (Range
->first
== Range
->second
) {
2578 // 3. Only one bit is set. We can shift this into the sign bit and use a
2579 // PL/MI comparison.
2580 NewN
= EmitShift(ARM::tLSLri
, X
, 31 - Range
->first
);
2581 ReplaceNode(And
.getNode(), NewN
);
2583 SwitchEQNEToPLMI
= true;
2584 } else if (!Subtarget
->hasV6T2Ops()) {
2585 // 4. Do a double shift to clear bottom and top bits, but only in
2586 // thumb-1 mode as in thumb-2 we can use UBFX.
2587 NewN
= EmitShift(ARM::tLSLri
, X
, 31 - Range
->first
);
2588 NewN
= EmitShift(ARM::tLSRri
, SDValue(NewN
, 0),
2589 Range
->second
+ (31 - Range
->first
));
2590 ReplaceNode(And
.getNode(), NewN
);
2595 void ARMDAGToDAGISel::Select(SDNode
*N
) {
2598 if (N
->isMachineOpcode()) {
2600 return; // Already selected.
2603 switch (N
->getOpcode()) {
2606 // For Thumb1, match an sp-relative store in C++. This is a little
2607 // unfortunate, but I don't think I can make the chain check work
2608 // otherwise. (The chain of the store has to be the same as the chain
2609 // of the CopyFromReg, or else we can't replace the CopyFromReg with
2610 // a direct reference to "SP".)
2612 // This is only necessary on Thumb1 because Thumb1 sp-relative stores use
2613 // a different addressing mode from other four-byte stores.
2615 // This pattern usually comes up with call arguments.
2616 StoreSDNode
*ST
= cast
<StoreSDNode
>(N
);
2617 SDValue Ptr
= ST
->getBasePtr();
2618 if (Subtarget
->isThumb1Only() && ST
->isUnindexed()) {
2620 if (Ptr
.getOpcode() == ISD::ADD
&&
2621 isScaledConstantInRange(Ptr
.getOperand(1), /*Scale=*/4, 0, 256, RHSC
))
2622 Ptr
= Ptr
.getOperand(0);
2624 if (Ptr
.getOpcode() == ISD::CopyFromReg
&&
2625 cast
<RegisterSDNode
>(Ptr
.getOperand(1))->getReg() == ARM::SP
&&
2626 Ptr
.getOperand(0) == ST
->getChain()) {
2627 SDValue Ops
[] = {ST
->getValue(),
2628 CurDAG
->getRegister(ARM::SP
, MVT::i32
),
2629 CurDAG
->getTargetConstant(RHSC
, dl
, MVT::i32
),
2631 CurDAG
->getRegister(0, MVT::i32
),
2633 MachineSDNode
*ResNode
=
2634 CurDAG
->getMachineNode(ARM::tSTRspi
, dl
, MVT::Other
, Ops
);
2635 MachineMemOperand
*MemOp
= ST
->getMemOperand();
2636 CurDAG
->setNodeMemRefs(cast
<MachineSDNode
>(ResNode
), {MemOp
});
2637 ReplaceNode(N
, ResNode
);
2643 case ISD::WRITE_REGISTER
:
2644 if (tryWriteRegister(N
))
2647 case ISD::READ_REGISTER
:
2648 if (tryReadRegister(N
))
2651 case ISD::INLINEASM
:
2652 case ISD::INLINEASM_BR
:
2653 if (tryInlineAsm(N
))
2657 // Select special operations if XOR node forms integer ABS pattern
2660 // Other cases are autogenerated.
2662 case ISD::Constant
: {
2663 unsigned Val
= cast
<ConstantSDNode
>(N
)->getZExtValue();
2664 // If we can't materialize the constant we need to use a literal pool
2665 if (ConstantMaterializationCost(Val
) > 2) {
2666 SDValue CPIdx
= CurDAG
->getTargetConstantPool(
2667 ConstantInt::get(Type::getInt32Ty(*CurDAG
->getContext()), Val
),
2668 TLI
->getPointerTy(CurDAG
->getDataLayout()));
2671 if (Subtarget
->isThumb()) {
2675 CurDAG
->getRegister(0, MVT::i32
),
2676 CurDAG
->getEntryNode()
2678 ResNode
= CurDAG
->getMachineNode(ARM::tLDRpci
, dl
, MVT::i32
, MVT::Other
,
2683 CurDAG
->getTargetConstant(0, dl
, MVT::i32
),
2685 CurDAG
->getRegister(0, MVT::i32
),
2686 CurDAG
->getEntryNode()
2688 ResNode
= CurDAG
->getMachineNode(ARM::LDRcp
, dl
, MVT::i32
, MVT::Other
,
2691 // Annotate the Node with memory operand information so that MachineInstr
2692 // queries work properly. This e.g. gives the register allocation the
2693 // required information for rematerialization.
2694 MachineFunction
& MF
= CurDAG
->getMachineFunction();
2695 MachineMemOperand
*MemOp
=
2696 MF
.getMachineMemOperand(MachinePointerInfo::getConstantPool(MF
),
2697 MachineMemOperand::MOLoad
, 4, 4);
2699 CurDAG
->setNodeMemRefs(cast
<MachineSDNode
>(ResNode
), {MemOp
});
2701 ReplaceNode(N
, ResNode
);
2705 // Other cases are autogenerated.
2708 case ISD::FrameIndex
: {
2709 // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
2710 int FI
= cast
<FrameIndexSDNode
>(N
)->getIndex();
2711 SDValue TFI
= CurDAG
->getTargetFrameIndex(
2712 FI
, TLI
->getPointerTy(CurDAG
->getDataLayout()));
2713 if (Subtarget
->isThumb1Only()) {
2714 // Set the alignment of the frame object to 4, to avoid having to generate
2715 // more than one ADD
2716 MachineFrameInfo
&MFI
= MF
->getFrameInfo();
2717 if (MFI
.getObjectAlignment(FI
) < 4)
2718 MFI
.setObjectAlignment(FI
, 4);
2719 CurDAG
->SelectNodeTo(N
, ARM::tADDframe
, MVT::i32
, TFI
,
2720 CurDAG
->getTargetConstant(0, dl
, MVT::i32
));
2723 unsigned Opc
= ((Subtarget
->isThumb() && Subtarget
->hasThumb2()) ?
2724 ARM::t2ADDri
: ARM::ADDri
);
2725 SDValue Ops
[] = { TFI
, CurDAG
->getTargetConstant(0, dl
, MVT::i32
),
2726 getAL(CurDAG
, dl
), CurDAG
->getRegister(0, MVT::i32
),
2727 CurDAG
->getRegister(0, MVT::i32
) };
2728 CurDAG
->SelectNodeTo(N
, Opc
, MVT::i32
, Ops
);
2733 if (tryV6T2BitfieldExtractOp(N
, false))
2736 case ISD::SIGN_EXTEND_INREG
:
2738 if (tryV6T2BitfieldExtractOp(N
, true))
2742 if (Subtarget
->isThumb1Only())
2744 if (ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(N
->getOperand(1))) {
2745 unsigned RHSV
= C
->getZExtValue();
2747 if (isPowerOf2_32(RHSV
-1)) { // 2^n+1?
2748 unsigned ShImm
= Log2_32(RHSV
-1);
2751 SDValue V
= N
->getOperand(0);
2752 ShImm
= ARM_AM::getSORegOpc(ARM_AM::lsl
, ShImm
);
2753 SDValue ShImmOp
= CurDAG
->getTargetConstant(ShImm
, dl
, MVT::i32
);
2754 SDValue Reg0
= CurDAG
->getRegister(0, MVT::i32
);
2755 if (Subtarget
->isThumb()) {
2756 SDValue Ops
[] = { V
, V
, ShImmOp
, getAL(CurDAG
, dl
), Reg0
, Reg0
};
2757 CurDAG
->SelectNodeTo(N
, ARM::t2ADDrs
, MVT::i32
, Ops
);
2760 SDValue Ops
[] = { V
, V
, Reg0
, ShImmOp
, getAL(CurDAG
, dl
), Reg0
,
2762 CurDAG
->SelectNodeTo(N
, ARM::ADDrsi
, MVT::i32
, Ops
);
2766 if (isPowerOf2_32(RHSV
+1)) { // 2^n-1?
2767 unsigned ShImm
= Log2_32(RHSV
+1);
2770 SDValue V
= N
->getOperand(0);
2771 ShImm
= ARM_AM::getSORegOpc(ARM_AM::lsl
, ShImm
);
2772 SDValue ShImmOp
= CurDAG
->getTargetConstant(ShImm
, dl
, MVT::i32
);
2773 SDValue Reg0
= CurDAG
->getRegister(0, MVT::i32
);
2774 if (Subtarget
->isThumb()) {
2775 SDValue Ops
[] = { V
, V
, ShImmOp
, getAL(CurDAG
, dl
), Reg0
, Reg0
};
2776 CurDAG
->SelectNodeTo(N
, ARM::t2RSBrs
, MVT::i32
, Ops
);
2779 SDValue Ops
[] = { V
, V
, Reg0
, ShImmOp
, getAL(CurDAG
, dl
), Reg0
,
2781 CurDAG
->SelectNodeTo(N
, ARM::RSBrsi
, MVT::i32
, Ops
);
2788 // Check for unsigned bitfield extract
2789 if (tryV6T2BitfieldExtractOp(N
, false))
2792 // If an immediate is used in an AND node, it is possible that the immediate
2793 // can be more optimally materialized when negated. If this is the case we
2794 // can negate the immediate and use a BIC instead.
2795 auto *N1C
= dyn_cast
<ConstantSDNode
>(N
->getOperand(1));
2796 if (N1C
&& N1C
->hasOneUse() && Subtarget
->isThumb()) {
2797 uint32_t Imm
= (uint32_t) N1C
->getZExtValue();
2799 // In Thumb2 mode, an AND can take a 12-bit immediate. If this
2800 // immediate can be negated and fit in the immediate operand of
2801 // a t2BIC, don't do any manual transform here as this can be
2802 // handled by the generic ISel machinery.
2803 bool PreferImmediateEncoding
=
2804 Subtarget
->hasThumb2() && (is_t2_so_imm(Imm
) || is_t2_so_imm_not(Imm
));
2805 if (!PreferImmediateEncoding
&&
2806 ConstantMaterializationCost(Imm
) >
2807 ConstantMaterializationCost(~Imm
)) {
2808 // The current immediate costs more to materialize than a negated
2809 // immediate, so negate the immediate and use a BIC.
2811 CurDAG
->getConstant(~N1C
->getZExtValue(), dl
, MVT::i32
);
2812 // If the new constant didn't exist before, reposition it in the topological
2813 // ordering so it is just before N. Otherwise, don't touch its location.
2814 if (NewImm
->getNodeId() == -1)
2815 CurDAG
->RepositionNode(N
->getIterator(), NewImm
.getNode());
2817 if (!Subtarget
->hasThumb2()) {
2818 SDValue Ops
[] = {CurDAG
->getRegister(ARM::CPSR
, MVT::i32
),
2819 N
->getOperand(0), NewImm
, getAL(CurDAG
, dl
),
2820 CurDAG
->getRegister(0, MVT::i32
)};
2821 ReplaceNode(N
, CurDAG
->getMachineNode(ARM::tBIC
, dl
, MVT::i32
, Ops
));
2824 SDValue Ops
[] = {N
->getOperand(0), NewImm
, getAL(CurDAG
, dl
),
2825 CurDAG
->getRegister(0, MVT::i32
),
2826 CurDAG
->getRegister(0, MVT::i32
)};
2828 CurDAG
->getMachineNode(ARM::t2BICrr
, dl
, MVT::i32
, Ops
));
2834 // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits
2835 // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits
2836 // are entirely contributed by c2 and lower 16-bits are entirely contributed
2837 // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)).
2838 // Select it to: "movt x, ((c1 & 0xffff) >> 16)
2839 EVT VT
= N
->getValueType(0);
2842 unsigned Opc
= (Subtarget
->isThumb() && Subtarget
->hasThumb2())
2844 : (Subtarget
->hasV6T2Ops() ? ARM::MOVTi16
: 0);
2847 SDValue N0
= N
->getOperand(0), N1
= N
->getOperand(1);
2848 N1C
= dyn_cast
<ConstantSDNode
>(N1
);
2851 if (N0
.getOpcode() == ISD::OR
&& N0
.getNode()->hasOneUse()) {
2852 SDValue N2
= N0
.getOperand(1);
2853 ConstantSDNode
*N2C
= dyn_cast
<ConstantSDNode
>(N2
);
2856 unsigned N1CVal
= N1C
->getZExtValue();
2857 unsigned N2CVal
= N2C
->getZExtValue();
2858 if ((N1CVal
& 0xffff0000U
) == (N2CVal
& 0xffff0000U
) &&
2859 (N1CVal
& 0xffffU
) == 0xffffU
&&
2860 (N2CVal
& 0xffffU
) == 0x0U
) {
2861 SDValue Imm16
= CurDAG
->getTargetConstant((N2CVal
& 0xFFFF0000U
) >> 16,
2863 SDValue Ops
[] = { N0
.getOperand(0), Imm16
,
2864 getAL(CurDAG
, dl
), CurDAG
->getRegister(0, MVT::i32
) };
2865 ReplaceNode(N
, CurDAG
->getMachineNode(Opc
, dl
, VT
, Ops
));
2872 case ARMISD::UMAAL
: {
2873 unsigned Opc
= Subtarget
->isThumb() ? ARM::t2UMAAL
: ARM::UMAAL
;
2874 SDValue Ops
[] = { N
->getOperand(0), N
->getOperand(1),
2875 N
->getOperand(2), N
->getOperand(3),
2877 CurDAG
->getRegister(0, MVT::i32
) };
2878 ReplaceNode(N
, CurDAG
->getMachineNode(Opc
, dl
, MVT::i32
, MVT::i32
, Ops
));
2881 case ARMISD::UMLAL
:{
2882 if (Subtarget
->isThumb()) {
2883 SDValue Ops
[] = { N
->getOperand(0), N
->getOperand(1), N
->getOperand(2),
2884 N
->getOperand(3), getAL(CurDAG
, dl
),
2885 CurDAG
->getRegister(0, MVT::i32
)};
2887 N
, CurDAG
->getMachineNode(ARM::t2UMLAL
, dl
, MVT::i32
, MVT::i32
, Ops
));
2890 SDValue Ops
[] = { N
->getOperand(0), N
->getOperand(1), N
->getOperand(2),
2891 N
->getOperand(3), getAL(CurDAG
, dl
),
2892 CurDAG
->getRegister(0, MVT::i32
),
2893 CurDAG
->getRegister(0, MVT::i32
) };
2894 ReplaceNode(N
, CurDAG
->getMachineNode(
2895 Subtarget
->hasV6Ops() ? ARM::UMLAL
: ARM::UMLALv5
, dl
,
2896 MVT::i32
, MVT::i32
, Ops
));
2900 case ARMISD::SMLAL
:{
2901 if (Subtarget
->isThumb()) {
2902 SDValue Ops
[] = { N
->getOperand(0), N
->getOperand(1), N
->getOperand(2),
2903 N
->getOperand(3), getAL(CurDAG
, dl
),
2904 CurDAG
->getRegister(0, MVT::i32
)};
2906 N
, CurDAG
->getMachineNode(ARM::t2SMLAL
, dl
, MVT::i32
, MVT::i32
, Ops
));
2909 SDValue Ops
[] = { N
->getOperand(0), N
->getOperand(1), N
->getOperand(2),
2910 N
->getOperand(3), getAL(CurDAG
, dl
),
2911 CurDAG
->getRegister(0, MVT::i32
),
2912 CurDAG
->getRegister(0, MVT::i32
) };
2913 ReplaceNode(N
, CurDAG
->getMachineNode(
2914 Subtarget
->hasV6Ops() ? ARM::SMLAL
: ARM::SMLALv5
, dl
,
2915 MVT::i32
, MVT::i32
, Ops
));
2919 case ARMISD::SUBE
: {
2920 if (!Subtarget
->hasV6Ops() || !Subtarget
->hasDSP())
2922 // Look for a pattern to match SMMLS
2923 // (sube a, (smul_loHi a, b), (subc 0, (smul_LOhi(a, b))))
2924 if (N
->getOperand(1).getOpcode() != ISD::SMUL_LOHI
||
2925 N
->getOperand(2).getOpcode() != ARMISD::SUBC
||
2926 !SDValue(N
, 1).use_empty())
2929 if (Subtarget
->isThumb())
2930 assert(Subtarget
->hasThumb2() &&
2931 "This pattern should not be generated for Thumb");
2933 SDValue SmulLoHi
= N
->getOperand(1);
2934 SDValue Subc
= N
->getOperand(2);
2935 auto *Zero
= dyn_cast
<ConstantSDNode
>(Subc
.getOperand(0));
2937 if (!Zero
|| Zero
->getZExtValue() != 0 ||
2938 Subc
.getOperand(1) != SmulLoHi
.getValue(0) ||
2939 N
->getOperand(1) != SmulLoHi
.getValue(1) ||
2940 N
->getOperand(2) != Subc
.getValue(1))
2943 unsigned Opc
= Subtarget
->isThumb2() ? ARM::t2SMMLS
: ARM::SMMLS
;
2944 SDValue Ops
[] = { SmulLoHi
.getOperand(0), SmulLoHi
.getOperand(1),
2945 N
->getOperand(0), getAL(CurDAG
, dl
),
2946 CurDAG
->getRegister(0, MVT::i32
) };
2947 ReplaceNode(N
, CurDAG
->getMachineNode(Opc
, dl
, MVT::i32
, Ops
));
2951 if (Subtarget
->isThumb() && Subtarget
->hasThumb2()) {
2952 if (tryT2IndexedLoad(N
))
2954 } else if (Subtarget
->isThumb()) {
2955 if (tryT1IndexedLoad(N
))
2957 } else if (tryARMIndexedLoad(N
))
2959 // Other cases are autogenerated.
2962 case ARMISD::BRCOND
: {
2963 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
2964 // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)
2965 // Pattern complexity = 6 cost = 1 size = 0
2967 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
2968 // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc)
2969 // Pattern complexity = 6 cost = 1 size = 0
2971 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
2972 // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc)
2973 // Pattern complexity = 6 cost = 1 size = 0
2975 unsigned Opc
= Subtarget
->isThumb() ?
2976 ((Subtarget
->hasThumb2()) ? ARM::t2Bcc
: ARM::tBcc
) : ARM::Bcc
;
2977 SDValue Chain
= N
->getOperand(0);
2978 SDValue N1
= N
->getOperand(1);
2979 SDValue N2
= N
->getOperand(2);
2980 SDValue N3
= N
->getOperand(3);
2981 SDValue InFlag
= N
->getOperand(4);
2982 assert(N1
.getOpcode() == ISD::BasicBlock
);
2983 assert(N2
.getOpcode() == ISD::Constant
);
2984 assert(N3
.getOpcode() == ISD::Register
);
2986 unsigned CC
= (unsigned) cast
<ConstantSDNode
>(N2
)->getZExtValue();
2988 if (InFlag
.getOpcode() == ARMISD::CMPZ
) {
2989 if (InFlag
.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN
) {
2990 SDValue Int
= InFlag
.getOperand(0);
2991 uint64_t ID
= cast
<ConstantSDNode
>(Int
->getOperand(1))->getZExtValue();
2993 // Handle low-overhead loops.
2994 if (ID
== Intrinsic::loop_decrement_reg
) {
2995 SDValue Elements
= Int
.getOperand(2);
2996 SDValue Size
= CurDAG
->getTargetConstant(
2997 cast
<ConstantSDNode
>(Int
.getOperand(3))->getZExtValue(), dl
,
3000 SDValue Args
[] = { Elements
, Size
, Int
.getOperand(0) };
3002 CurDAG
->getMachineNode(ARM::t2LoopDec
, dl
,
3003 CurDAG
->getVTList(MVT::i32
, MVT::Other
),
3005 ReplaceUses(Int
.getNode(), LoopDec
);
3007 SDValue EndArgs
[] = { SDValue(LoopDec
, 0), N1
, Chain
};
3009 CurDAG
->getMachineNode(ARM::t2LoopEnd
, dl
, MVT::Other
, EndArgs
);
3011 ReplaceUses(N
, LoopEnd
);
3012 CurDAG
->RemoveDeadNode(N
);
3013 CurDAG
->RemoveDeadNode(InFlag
.getNode());
3014 CurDAG
->RemoveDeadNode(Int
.getNode());
3019 bool SwitchEQNEToPLMI
;
3020 SelectCMPZ(InFlag
.getNode(), SwitchEQNEToPLMI
);
3021 InFlag
= N
->getOperand(4);
3023 if (SwitchEQNEToPLMI
) {
3024 switch ((ARMCC::CondCodes
)CC
) {
3025 default: llvm_unreachable("CMPZ must be either NE or EQ!");
3027 CC
= (unsigned)ARMCC::MI
;
3030 CC
= (unsigned)ARMCC::PL
;
3036 SDValue Tmp2
= CurDAG
->getTargetConstant(CC
, dl
, MVT::i32
);
3037 SDValue Ops
[] = { N1
, Tmp2
, N3
, Chain
, InFlag
};
3038 SDNode
*ResNode
= CurDAG
->getMachineNode(Opc
, dl
, MVT::Other
,
3040 Chain
= SDValue(ResNode
, 0);
3041 if (N
->getNumValues() == 2) {
3042 InFlag
= SDValue(ResNode
, 1);
3043 ReplaceUses(SDValue(N
, 1), InFlag
);
3045 ReplaceUses(SDValue(N
, 0),
3046 SDValue(Chain
.getNode(), Chain
.getResNo()));
3047 CurDAG
->RemoveDeadNode(N
);
3051 case ARMISD::CMPZ
: {
3052 // select (CMPZ X, #-C) -> (CMPZ (ADDS X, #C), #0)
3053 // This allows us to avoid materializing the expensive negative constant.
3054 // The CMPZ #0 is useless and will be peepholed away but we need to keep it
3055 // for its glue output.
3056 SDValue X
= N
->getOperand(0);
3057 auto *C
= dyn_cast
<ConstantSDNode
>(N
->getOperand(1).getNode());
3058 if (C
&& C
->getSExtValue() < 0 && Subtarget
->isThumb()) {
3059 int64_t Addend
= -C
->getSExtValue();
3061 SDNode
*Add
= nullptr;
3062 // ADDS can be better than CMN if the immediate fits in a
3063 // 16-bit ADDS, which means either [0,256) for tADDi8 or [0,8) for tADDi3.
3064 // Outside that range we can just use a CMN which is 32-bit but has a
3065 // 12-bit immediate range.
3066 if (Addend
< 1<<8) {
3067 if (Subtarget
->isThumb2()) {
3068 SDValue Ops
[] = { X
, CurDAG
->getTargetConstant(Addend
, dl
, MVT::i32
),
3069 getAL(CurDAG
, dl
), CurDAG
->getRegister(0, MVT::i32
),
3070 CurDAG
->getRegister(0, MVT::i32
) };
3071 Add
= CurDAG
->getMachineNode(ARM::t2ADDri
, dl
, MVT::i32
, Ops
);
3073 unsigned Opc
= (Addend
< 1<<3) ? ARM::tADDi3
: ARM::tADDi8
;
3074 SDValue Ops
[] = {CurDAG
->getRegister(ARM::CPSR
, MVT::i32
), X
,
3075 CurDAG
->getTargetConstant(Addend
, dl
, MVT::i32
),
3076 getAL(CurDAG
, dl
), CurDAG
->getRegister(0, MVT::i32
)};
3077 Add
= CurDAG
->getMachineNode(Opc
, dl
, MVT::i32
, Ops
);
3081 SDValue Ops2
[] = {SDValue(Add
, 0), CurDAG
->getConstant(0, dl
, MVT::i32
)};
3082 CurDAG
->MorphNodeTo(N
, ARMISD::CMPZ
, CurDAG
->getVTList(MVT::Glue
), Ops2
);
3085 // Other cases are autogenerated.
3089 case ARMISD::CMOV
: {
3090 SDValue InFlag
= N
->getOperand(4);
3092 if (InFlag
.getOpcode() == ARMISD::CMPZ
) {
3093 bool SwitchEQNEToPLMI
;
3094 SelectCMPZ(InFlag
.getNode(), SwitchEQNEToPLMI
);
3096 if (SwitchEQNEToPLMI
) {
3097 SDValue ARMcc
= N
->getOperand(2);
3098 ARMCC::CondCodes CC
=
3099 (ARMCC::CondCodes
)cast
<ConstantSDNode
>(ARMcc
)->getZExtValue();
3102 default: llvm_unreachable("CMPZ must be either NE or EQ!");
3110 SDValue NewARMcc
= CurDAG
->getConstant((unsigned)CC
, dl
, MVT::i32
);
3111 SDValue Ops
[] = {N
->getOperand(0), N
->getOperand(1), NewARMcc
,
3112 N
->getOperand(3), N
->getOperand(4)};
3113 CurDAG
->MorphNodeTo(N
, ARMISD::CMOV
, N
->getVTList(), Ops
);
3117 // Other cases are autogenerated.
3121 case ARMISD::VZIP
: {
3123 EVT VT
= N
->getValueType(0);
3124 switch (VT
.getSimpleVT().SimpleTy
) {
3126 case MVT::v8i8
: Opc
= ARM::VZIPd8
; break;
3128 case MVT::v4i16
: Opc
= ARM::VZIPd16
; break;
3130 // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
3131 case MVT::v2i32
: Opc
= ARM::VTRNd32
; break;
3132 case MVT::v16i8
: Opc
= ARM::VZIPq8
; break;
3134 case MVT::v8i16
: Opc
= ARM::VZIPq16
; break;
3136 case MVT::v4i32
: Opc
= ARM::VZIPq32
; break;
3138 SDValue Pred
= getAL(CurDAG
, dl
);
3139 SDValue PredReg
= CurDAG
->getRegister(0, MVT::i32
);
3140 SDValue Ops
[] = { N
->getOperand(0), N
->getOperand(1), Pred
, PredReg
};
3141 ReplaceNode(N
, CurDAG
->getMachineNode(Opc
, dl
, VT
, VT
, Ops
));
3144 case ARMISD::VUZP
: {
3146 EVT VT
= N
->getValueType(0);
3147 switch (VT
.getSimpleVT().SimpleTy
) {
3149 case MVT::v8i8
: Opc
= ARM::VUZPd8
; break;
3151 case MVT::v4i16
: Opc
= ARM::VUZPd16
; break;
3153 // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
3154 case MVT::v2i32
: Opc
= ARM::VTRNd32
; break;
3155 case MVT::v16i8
: Opc
= ARM::VUZPq8
; break;
3157 case MVT::v8i16
: Opc
= ARM::VUZPq16
; break;
3159 case MVT::v4i32
: Opc
= ARM::VUZPq32
; break;
3161 SDValue Pred
= getAL(CurDAG
, dl
);
3162 SDValue PredReg
= CurDAG
->getRegister(0, MVT::i32
);
3163 SDValue Ops
[] = { N
->getOperand(0), N
->getOperand(1), Pred
, PredReg
};
3164 ReplaceNode(N
, CurDAG
->getMachineNode(Opc
, dl
, VT
, VT
, Ops
));
3167 case ARMISD::VTRN
: {
3169 EVT VT
= N
->getValueType(0);
3170 switch (VT
.getSimpleVT().SimpleTy
) {
3172 case MVT::v8i8
: Opc
= ARM::VTRNd8
; break;
3174 case MVT::v4i16
: Opc
= ARM::VTRNd16
; break;
3176 case MVT::v2i32
: Opc
= ARM::VTRNd32
; break;
3177 case MVT::v16i8
: Opc
= ARM::VTRNq8
; break;
3179 case MVT::v8i16
: Opc
= ARM::VTRNq16
; break;
3181 case MVT::v4i32
: Opc
= ARM::VTRNq32
; break;
3183 SDValue Pred
= getAL(CurDAG
, dl
);
3184 SDValue PredReg
= CurDAG
->getRegister(0, MVT::i32
);
3185 SDValue Ops
[] = { N
->getOperand(0), N
->getOperand(1), Pred
, PredReg
};
3186 ReplaceNode(N
, CurDAG
->getMachineNode(Opc
, dl
, VT
, VT
, Ops
));
3189 case ARMISD::BUILD_VECTOR
: {
3190 EVT VecVT
= N
->getValueType(0);
3191 EVT EltVT
= VecVT
.getVectorElementType();
3192 unsigned NumElts
= VecVT
.getVectorNumElements();
3193 if (EltVT
== MVT::f64
) {
3194 assert(NumElts
== 2 && "unexpected type for BUILD_VECTOR");
3196 N
, createDRegPairNode(VecVT
, N
->getOperand(0), N
->getOperand(1)));
3199 assert(EltVT
== MVT::f32
&& "unexpected type for BUILD_VECTOR");
3202 N
, createSRegPairNode(VecVT
, N
->getOperand(0), N
->getOperand(1)));
3205 assert(NumElts
== 4 && "unexpected type for BUILD_VECTOR");
3207 createQuadSRegsNode(VecVT
, N
->getOperand(0), N
->getOperand(1),
3208 N
->getOperand(2), N
->getOperand(3)));
3212 case ARMISD::VLD1DUP
: {
3213 static const uint16_t DOpcodes
[] = { ARM::VLD1DUPd8
, ARM::VLD1DUPd16
,
3215 static const uint16_t QOpcodes
[] = { ARM::VLD1DUPq8
, ARM::VLD1DUPq16
,
3217 SelectVLDDup(N
, /* IsIntrinsic= */ false, false, 1, DOpcodes
, QOpcodes
);
3221 case ARMISD::VLD2DUP
: {
3222 static const uint16_t Opcodes
[] = { ARM::VLD2DUPd8
, ARM::VLD2DUPd16
,
3224 SelectVLDDup(N
, /* IsIntrinsic= */ false, false, 2, Opcodes
);
3228 case ARMISD::VLD3DUP
: {
3229 static const uint16_t Opcodes
[] = { ARM::VLD3DUPd8Pseudo
,
3230 ARM::VLD3DUPd16Pseudo
,
3231 ARM::VLD3DUPd32Pseudo
};
3232 SelectVLDDup(N
, /* IsIntrinsic= */ false, false, 3, Opcodes
);
3236 case ARMISD::VLD4DUP
: {
3237 static const uint16_t Opcodes
[] = { ARM::VLD4DUPd8Pseudo
,
3238 ARM::VLD4DUPd16Pseudo
,
3239 ARM::VLD4DUPd32Pseudo
};
3240 SelectVLDDup(N
, /* IsIntrinsic= */ false, false, 4, Opcodes
);
3244 case ARMISD::VLD1DUP_UPD
: {
3245 static const uint16_t DOpcodes
[] = { ARM::VLD1DUPd8wb_fixed
,
3246 ARM::VLD1DUPd16wb_fixed
,
3247 ARM::VLD1DUPd32wb_fixed
};
3248 static const uint16_t QOpcodes
[] = { ARM::VLD1DUPq8wb_fixed
,
3249 ARM::VLD1DUPq16wb_fixed
,
3250 ARM::VLD1DUPq32wb_fixed
};
3251 SelectVLDDup(N
, /* IsIntrinsic= */ false, true, 1, DOpcodes
, QOpcodes
);
3255 case ARMISD::VLD2DUP_UPD
: {
3256 static const uint16_t Opcodes
[] = { ARM::VLD2DUPd8wb_fixed
,
3257 ARM::VLD2DUPd16wb_fixed
,
3258 ARM::VLD2DUPd32wb_fixed
};
3259 SelectVLDDup(N
, /* IsIntrinsic= */ false, true, 2, Opcodes
);
3263 case ARMISD::VLD3DUP_UPD
: {
3264 static const uint16_t Opcodes
[] = { ARM::VLD3DUPd8Pseudo_UPD
,
3265 ARM::VLD3DUPd16Pseudo_UPD
,
3266 ARM::VLD3DUPd32Pseudo_UPD
};
3267 SelectVLDDup(N
, /* IsIntrinsic= */ false, true, 3, Opcodes
);
3271 case ARMISD::VLD4DUP_UPD
: {
3272 static const uint16_t Opcodes
[] = { ARM::VLD4DUPd8Pseudo_UPD
,
3273 ARM::VLD4DUPd16Pseudo_UPD
,
3274 ARM::VLD4DUPd32Pseudo_UPD
};
3275 SelectVLDDup(N
, /* IsIntrinsic= */ false, true, 4, Opcodes
);
3279 case ARMISD::VLD1_UPD
: {
3280 static const uint16_t DOpcodes
[] = { ARM::VLD1d8wb_fixed
,
3281 ARM::VLD1d16wb_fixed
,
3282 ARM::VLD1d32wb_fixed
,
3283 ARM::VLD1d64wb_fixed
};
3284 static const uint16_t QOpcodes
[] = { ARM::VLD1q8wb_fixed
,
3285 ARM::VLD1q16wb_fixed
,
3286 ARM::VLD1q32wb_fixed
,
3287 ARM::VLD1q64wb_fixed
};
3288 SelectVLD(N
, true, 1, DOpcodes
, QOpcodes
, nullptr);
3292 case ARMISD::VLD2_UPD
: {
3293 static const uint16_t DOpcodes
[] = { ARM::VLD2d8wb_fixed
,
3294 ARM::VLD2d16wb_fixed
,
3295 ARM::VLD2d32wb_fixed
,
3296 ARM::VLD1q64wb_fixed
};
3297 static const uint16_t QOpcodes
[] = { ARM::VLD2q8PseudoWB_fixed
,
3298 ARM::VLD2q16PseudoWB_fixed
,
3299 ARM::VLD2q32PseudoWB_fixed
};
3300 SelectVLD(N
, true, 2, DOpcodes
, QOpcodes
, nullptr);
3304 case ARMISD::VLD3_UPD
: {
3305 static const uint16_t DOpcodes
[] = { ARM::VLD3d8Pseudo_UPD
,
3306 ARM::VLD3d16Pseudo_UPD
,
3307 ARM::VLD3d32Pseudo_UPD
,
3308 ARM::VLD1d64TPseudoWB_fixed
};
3309 static const uint16_t QOpcodes0
[] = { ARM::VLD3q8Pseudo_UPD
,
3310 ARM::VLD3q16Pseudo_UPD
,
3311 ARM::VLD3q32Pseudo_UPD
};
3312 static const uint16_t QOpcodes1
[] = { ARM::VLD3q8oddPseudo_UPD
,
3313 ARM::VLD3q16oddPseudo_UPD
,
3314 ARM::VLD3q32oddPseudo_UPD
};
3315 SelectVLD(N
, true, 3, DOpcodes
, QOpcodes0
, QOpcodes1
);
3319 case ARMISD::VLD4_UPD
: {
3320 static const uint16_t DOpcodes
[] = { ARM::VLD4d8Pseudo_UPD
,
3321 ARM::VLD4d16Pseudo_UPD
,
3322 ARM::VLD4d32Pseudo_UPD
,
3323 ARM::VLD1d64QPseudoWB_fixed
};
3324 static const uint16_t QOpcodes0
[] = { ARM::VLD4q8Pseudo_UPD
,
3325 ARM::VLD4q16Pseudo_UPD
,
3326 ARM::VLD4q32Pseudo_UPD
};
3327 static const uint16_t QOpcodes1
[] = { ARM::VLD4q8oddPseudo_UPD
,
3328 ARM::VLD4q16oddPseudo_UPD
,
3329 ARM::VLD4q32oddPseudo_UPD
};
3330 SelectVLD(N
, true, 4, DOpcodes
, QOpcodes0
, QOpcodes1
);
3334 case ARMISD::VLD2LN_UPD
: {
3335 static const uint16_t DOpcodes
[] = { ARM::VLD2LNd8Pseudo_UPD
,
3336 ARM::VLD2LNd16Pseudo_UPD
,
3337 ARM::VLD2LNd32Pseudo_UPD
};
3338 static const uint16_t QOpcodes
[] = { ARM::VLD2LNq16Pseudo_UPD
,
3339 ARM::VLD2LNq32Pseudo_UPD
};
3340 SelectVLDSTLane(N
, true, true, 2, DOpcodes
, QOpcodes
);
3344 case ARMISD::VLD3LN_UPD
: {
3345 static const uint16_t DOpcodes
[] = { ARM::VLD3LNd8Pseudo_UPD
,
3346 ARM::VLD3LNd16Pseudo_UPD
,
3347 ARM::VLD3LNd32Pseudo_UPD
};
3348 static const uint16_t QOpcodes
[] = { ARM::VLD3LNq16Pseudo_UPD
,
3349 ARM::VLD3LNq32Pseudo_UPD
};
3350 SelectVLDSTLane(N
, true, true, 3, DOpcodes
, QOpcodes
);
3354 case ARMISD::VLD4LN_UPD
: {
3355 static const uint16_t DOpcodes
[] = { ARM::VLD4LNd8Pseudo_UPD
,
3356 ARM::VLD4LNd16Pseudo_UPD
,
3357 ARM::VLD4LNd32Pseudo_UPD
};
3358 static const uint16_t QOpcodes
[] = { ARM::VLD4LNq16Pseudo_UPD
,
3359 ARM::VLD4LNq32Pseudo_UPD
};
3360 SelectVLDSTLane(N
, true, true, 4, DOpcodes
, QOpcodes
);
3364 case ARMISD::VST1_UPD
: {
3365 static const uint16_t DOpcodes
[] = { ARM::VST1d8wb_fixed
,
3366 ARM::VST1d16wb_fixed
,
3367 ARM::VST1d32wb_fixed
,
3368 ARM::VST1d64wb_fixed
};
3369 static const uint16_t QOpcodes
[] = { ARM::VST1q8wb_fixed
,
3370 ARM::VST1q16wb_fixed
,
3371 ARM::VST1q32wb_fixed
,
3372 ARM::VST1q64wb_fixed
};
3373 SelectVST(N
, true, 1, DOpcodes
, QOpcodes
, nullptr);
3377 case ARMISD::VST2_UPD
: {
3378 static const uint16_t DOpcodes
[] = { ARM::VST2d8wb_fixed
,
3379 ARM::VST2d16wb_fixed
,
3380 ARM::VST2d32wb_fixed
,
3381 ARM::VST1q64wb_fixed
};
3382 static const uint16_t QOpcodes
[] = { ARM::VST2q8PseudoWB_fixed
,
3383 ARM::VST2q16PseudoWB_fixed
,
3384 ARM::VST2q32PseudoWB_fixed
};
3385 SelectVST(N
, true, 2, DOpcodes
, QOpcodes
, nullptr);
3389 case ARMISD::VST3_UPD
: {
3390 static const uint16_t DOpcodes
[] = { ARM::VST3d8Pseudo_UPD
,
3391 ARM::VST3d16Pseudo_UPD
,
3392 ARM::VST3d32Pseudo_UPD
,
3393 ARM::VST1d64TPseudoWB_fixed
};
3394 static const uint16_t QOpcodes0
[] = { ARM::VST3q8Pseudo_UPD
,
3395 ARM::VST3q16Pseudo_UPD
,
3396 ARM::VST3q32Pseudo_UPD
};
3397 static const uint16_t QOpcodes1
[] = { ARM::VST3q8oddPseudo_UPD
,
3398 ARM::VST3q16oddPseudo_UPD
,
3399 ARM::VST3q32oddPseudo_UPD
};
3400 SelectVST(N
, true, 3, DOpcodes
, QOpcodes0
, QOpcodes1
);
3404 case ARMISD::VST4_UPD
: {
3405 static const uint16_t DOpcodes
[] = { ARM::VST4d8Pseudo_UPD
,
3406 ARM::VST4d16Pseudo_UPD
,
3407 ARM::VST4d32Pseudo_UPD
,
3408 ARM::VST1d64QPseudoWB_fixed
};
3409 static const uint16_t QOpcodes0
[] = { ARM::VST4q8Pseudo_UPD
,
3410 ARM::VST4q16Pseudo_UPD
,
3411 ARM::VST4q32Pseudo_UPD
};
3412 static const uint16_t QOpcodes1
[] = { ARM::VST4q8oddPseudo_UPD
,
3413 ARM::VST4q16oddPseudo_UPD
,
3414 ARM::VST4q32oddPseudo_UPD
};
3415 SelectVST(N
, true, 4, DOpcodes
, QOpcodes0
, QOpcodes1
);
3419 case ARMISD::VST2LN_UPD
: {
3420 static const uint16_t DOpcodes
[] = { ARM::VST2LNd8Pseudo_UPD
,
3421 ARM::VST2LNd16Pseudo_UPD
,
3422 ARM::VST2LNd32Pseudo_UPD
};
3423 static const uint16_t QOpcodes
[] = { ARM::VST2LNq16Pseudo_UPD
,
3424 ARM::VST2LNq32Pseudo_UPD
};
3425 SelectVLDSTLane(N
, false, true, 2, DOpcodes
, QOpcodes
);
3429 case ARMISD::VST3LN_UPD
: {
3430 static const uint16_t DOpcodes
[] = { ARM::VST3LNd8Pseudo_UPD
,
3431 ARM::VST3LNd16Pseudo_UPD
,
3432 ARM::VST3LNd32Pseudo_UPD
};
3433 static const uint16_t QOpcodes
[] = { ARM::VST3LNq16Pseudo_UPD
,
3434 ARM::VST3LNq32Pseudo_UPD
};
3435 SelectVLDSTLane(N
, false, true, 3, DOpcodes
, QOpcodes
);
3439 case ARMISD::VST4LN_UPD
: {
3440 static const uint16_t DOpcodes
[] = { ARM::VST4LNd8Pseudo_UPD
,
3441 ARM::VST4LNd16Pseudo_UPD
,
3442 ARM::VST4LNd32Pseudo_UPD
};
3443 static const uint16_t QOpcodes
[] = { ARM::VST4LNq16Pseudo_UPD
,
3444 ARM::VST4LNq32Pseudo_UPD
};
3445 SelectVLDSTLane(N
, false, true, 4, DOpcodes
, QOpcodes
);
3449 case ISD::INTRINSIC_VOID
:
3450 case ISD::INTRINSIC_W_CHAIN
: {
3451 unsigned IntNo
= cast
<ConstantSDNode
>(N
->getOperand(1))->getZExtValue();
3456 case Intrinsic::arm_mrrc
:
3457 case Intrinsic::arm_mrrc2
: {
3459 SDValue Chain
= N
->getOperand(0);
3462 if (Subtarget
->isThumb())
3463 Opc
= (IntNo
== Intrinsic::arm_mrrc
? ARM::t2MRRC
: ARM::t2MRRC2
);
3465 Opc
= (IntNo
== Intrinsic::arm_mrrc
? ARM::MRRC
: ARM::MRRC2
);
3467 SmallVector
<SDValue
, 5> Ops
;
3468 Ops
.push_back(getI32Imm(cast
<ConstantSDNode
>(N
->getOperand(2))->getZExtValue(), dl
)); /* coproc */
3469 Ops
.push_back(getI32Imm(cast
<ConstantSDNode
>(N
->getOperand(3))->getZExtValue(), dl
)); /* opc */
3470 Ops
.push_back(getI32Imm(cast
<ConstantSDNode
>(N
->getOperand(4))->getZExtValue(), dl
)); /* CRm */
3472 // The mrrc2 instruction in ARM doesn't allow predicates, the top 4 bits of the encoded
3473 // instruction will always be '1111' but it is possible in assembly language to specify
3474 // AL as a predicate to mrrc2 but it doesn't make any difference to the encoded instruction.
3475 if (Opc
!= ARM::MRRC2
) {
3476 Ops
.push_back(getAL(CurDAG
, dl
));
3477 Ops
.push_back(CurDAG
->getRegister(0, MVT::i32
));
3480 Ops
.push_back(Chain
);
3482 // Writes to two registers.
3483 const EVT RetType
[] = {MVT::i32
, MVT::i32
, MVT::Other
};
3485 ReplaceNode(N
, CurDAG
->getMachineNode(Opc
, dl
, RetType
, Ops
));
3488 case Intrinsic::arm_ldaexd
:
3489 case Intrinsic::arm_ldrexd
: {
3491 SDValue Chain
= N
->getOperand(0);
3492 SDValue MemAddr
= N
->getOperand(2);
3493 bool isThumb
= Subtarget
->isThumb() && Subtarget
->hasV8MBaselineOps();
3495 bool IsAcquire
= IntNo
== Intrinsic::arm_ldaexd
;
3496 unsigned NewOpc
= isThumb
? (IsAcquire
? ARM::t2LDAEXD
: ARM::t2LDREXD
)
3497 : (IsAcquire
? ARM::LDAEXD
: ARM::LDREXD
);
3499 // arm_ldrexd returns a i64 value in {i32, i32}
3500 std::vector
<EVT
> ResTys
;
3502 ResTys
.push_back(MVT::i32
);
3503 ResTys
.push_back(MVT::i32
);
3505 ResTys
.push_back(MVT::Untyped
);
3506 ResTys
.push_back(MVT::Other
);
3508 // Place arguments in the right order.
3509 SDValue Ops
[] = {MemAddr
, getAL(CurDAG
, dl
),
3510 CurDAG
->getRegister(0, MVT::i32
), Chain
};
3511 SDNode
*Ld
= CurDAG
->getMachineNode(NewOpc
, dl
, ResTys
, Ops
);
3512 // Transfer memoperands.
3513 MachineMemOperand
*MemOp
= cast
<MemIntrinsicSDNode
>(N
)->getMemOperand();
3514 CurDAG
->setNodeMemRefs(cast
<MachineSDNode
>(Ld
), {MemOp
});
3517 SDValue OutChain
= isThumb
? SDValue(Ld
, 2) : SDValue(Ld
, 1);
3518 if (!SDValue(N
, 0).use_empty()) {
3521 Result
= SDValue(Ld
, 0);
3524 CurDAG
->getTargetConstant(ARM::gsub_0
, dl
, MVT::i32
);
3525 SDNode
*ResNode
= CurDAG
->getMachineNode(TargetOpcode::EXTRACT_SUBREG
,
3526 dl
, MVT::i32
, SDValue(Ld
, 0), SubRegIdx
);
3527 Result
= SDValue(ResNode
,0);
3529 ReplaceUses(SDValue(N
, 0), Result
);
3531 if (!SDValue(N
, 1).use_empty()) {
3534 Result
= SDValue(Ld
, 1);
3537 CurDAG
->getTargetConstant(ARM::gsub_1
, dl
, MVT::i32
);
3538 SDNode
*ResNode
= CurDAG
->getMachineNode(TargetOpcode::EXTRACT_SUBREG
,
3539 dl
, MVT::i32
, SDValue(Ld
, 0), SubRegIdx
);
3540 Result
= SDValue(ResNode
,0);
3542 ReplaceUses(SDValue(N
, 1), Result
);
3544 ReplaceUses(SDValue(N
, 2), OutChain
);
3545 CurDAG
->RemoveDeadNode(N
);
3548 case Intrinsic::arm_stlexd
:
3549 case Intrinsic::arm_strexd
: {
3551 SDValue Chain
= N
->getOperand(0);
3552 SDValue Val0
= N
->getOperand(2);
3553 SDValue Val1
= N
->getOperand(3);
3554 SDValue MemAddr
= N
->getOperand(4);
3556 // Store exclusive double return a i32 value which is the return status
3557 // of the issued store.
3558 const EVT ResTys
[] = {MVT::i32
, MVT::Other
};
3560 bool isThumb
= Subtarget
->isThumb() && Subtarget
->hasThumb2();
3561 // Place arguments in the right order.
3562 SmallVector
<SDValue
, 7> Ops
;
3564 Ops
.push_back(Val0
);
3565 Ops
.push_back(Val1
);
3567 // arm_strexd uses GPRPair.
3568 Ops
.push_back(SDValue(createGPRPairNode(MVT::Untyped
, Val0
, Val1
), 0));
3569 Ops
.push_back(MemAddr
);
3570 Ops
.push_back(getAL(CurDAG
, dl
));
3571 Ops
.push_back(CurDAG
->getRegister(0, MVT::i32
));
3572 Ops
.push_back(Chain
);
3574 bool IsRelease
= IntNo
== Intrinsic::arm_stlexd
;
3575 unsigned NewOpc
= isThumb
? (IsRelease
? ARM::t2STLEXD
: ARM::t2STREXD
)
3576 : (IsRelease
? ARM::STLEXD
: ARM::STREXD
);
3578 SDNode
*St
= CurDAG
->getMachineNode(NewOpc
, dl
, ResTys
, Ops
);
3579 // Transfer memoperands.
3580 MachineMemOperand
*MemOp
= cast
<MemIntrinsicSDNode
>(N
)->getMemOperand();
3581 CurDAG
->setNodeMemRefs(cast
<MachineSDNode
>(St
), {MemOp
});
3587 case Intrinsic::arm_neon_vld1
: {
3588 static const uint16_t DOpcodes
[] = { ARM::VLD1d8
, ARM::VLD1d16
,
3589 ARM::VLD1d32
, ARM::VLD1d64
};
3590 static const uint16_t QOpcodes
[] = { ARM::VLD1q8
, ARM::VLD1q16
,
3591 ARM::VLD1q32
, ARM::VLD1q64
};
3592 SelectVLD(N
, false, 1, DOpcodes
, QOpcodes
, nullptr);
3596 case Intrinsic::arm_neon_vld1x2
: {
3597 static const uint16_t DOpcodes
[] = { ARM::VLD1q8
, ARM::VLD1q16
,
3598 ARM::VLD1q32
, ARM::VLD1q64
};
3599 static const uint16_t QOpcodes
[] = { ARM::VLD1d8QPseudo
,
3600 ARM::VLD1d16QPseudo
,
3601 ARM::VLD1d32QPseudo
,
3602 ARM::VLD1d64QPseudo
};
3603 SelectVLD(N
, false, 2, DOpcodes
, QOpcodes
, nullptr);
3607 case Intrinsic::arm_neon_vld1x3
: {
3608 static const uint16_t DOpcodes
[] = { ARM::VLD1d8TPseudo
,
3609 ARM::VLD1d16TPseudo
,
3610 ARM::VLD1d32TPseudo
,
3611 ARM::VLD1d64TPseudo
};
3612 static const uint16_t QOpcodes0
[] = { ARM::VLD1q8LowTPseudo_UPD
,
3613 ARM::VLD1q16LowTPseudo_UPD
,
3614 ARM::VLD1q32LowTPseudo_UPD
,
3615 ARM::VLD1q64LowTPseudo_UPD
};
3616 static const uint16_t QOpcodes1
[] = { ARM::VLD1q8HighTPseudo
,
3617 ARM::VLD1q16HighTPseudo
,
3618 ARM::VLD1q32HighTPseudo
,
3619 ARM::VLD1q64HighTPseudo
};
3620 SelectVLD(N
, false, 3, DOpcodes
, QOpcodes0
, QOpcodes1
);
3624 case Intrinsic::arm_neon_vld1x4
: {
3625 static const uint16_t DOpcodes
[] = { ARM::VLD1d8QPseudo
,
3626 ARM::VLD1d16QPseudo
,
3627 ARM::VLD1d32QPseudo
,
3628 ARM::VLD1d64QPseudo
};
3629 static const uint16_t QOpcodes0
[] = { ARM::VLD1q8LowQPseudo_UPD
,
3630 ARM::VLD1q16LowQPseudo_UPD
,
3631 ARM::VLD1q32LowQPseudo_UPD
,
3632 ARM::VLD1q64LowQPseudo_UPD
};
3633 static const uint16_t QOpcodes1
[] = { ARM::VLD1q8HighQPseudo
,
3634 ARM::VLD1q16HighQPseudo
,
3635 ARM::VLD1q32HighQPseudo
,
3636 ARM::VLD1q64HighQPseudo
};
3637 SelectVLD(N
, false, 4, DOpcodes
, QOpcodes0
, QOpcodes1
);
3641 case Intrinsic::arm_neon_vld2
: {
3642 static const uint16_t DOpcodes
[] = { ARM::VLD2d8
, ARM::VLD2d16
,
3643 ARM::VLD2d32
, ARM::VLD1q64
};
3644 static const uint16_t QOpcodes
[] = { ARM::VLD2q8Pseudo
, ARM::VLD2q16Pseudo
,
3645 ARM::VLD2q32Pseudo
};
3646 SelectVLD(N
, false, 2, DOpcodes
, QOpcodes
, nullptr);
3650 case Intrinsic::arm_neon_vld3
: {
3651 static const uint16_t DOpcodes
[] = { ARM::VLD3d8Pseudo
,
3654 ARM::VLD1d64TPseudo
};
3655 static const uint16_t QOpcodes0
[] = { ARM::VLD3q8Pseudo_UPD
,
3656 ARM::VLD3q16Pseudo_UPD
,
3657 ARM::VLD3q32Pseudo_UPD
};
3658 static const uint16_t QOpcodes1
[] = { ARM::VLD3q8oddPseudo
,
3659 ARM::VLD3q16oddPseudo
,
3660 ARM::VLD3q32oddPseudo
};
3661 SelectVLD(N
, false, 3, DOpcodes
, QOpcodes0
, QOpcodes1
);
3665 case Intrinsic::arm_neon_vld4
: {
3666 static const uint16_t DOpcodes
[] = { ARM::VLD4d8Pseudo
,
3669 ARM::VLD1d64QPseudo
};
3670 static const uint16_t QOpcodes0
[] = { ARM::VLD4q8Pseudo_UPD
,
3671 ARM::VLD4q16Pseudo_UPD
,
3672 ARM::VLD4q32Pseudo_UPD
};
3673 static const uint16_t QOpcodes1
[] = { ARM::VLD4q8oddPseudo
,
3674 ARM::VLD4q16oddPseudo
,
3675 ARM::VLD4q32oddPseudo
};
3676 SelectVLD(N
, false, 4, DOpcodes
, QOpcodes0
, QOpcodes1
);
3680 case Intrinsic::arm_neon_vld2dup
: {
3681 static const uint16_t DOpcodes
[] = { ARM::VLD2DUPd8
, ARM::VLD2DUPd16
,
3682 ARM::VLD2DUPd32
, ARM::VLD1q64
};
3683 static const uint16_t QOpcodes0
[] = { ARM::VLD2DUPq8EvenPseudo
,
3684 ARM::VLD2DUPq16EvenPseudo
,
3685 ARM::VLD2DUPq32EvenPseudo
};
3686 static const uint16_t QOpcodes1
[] = { ARM::VLD2DUPq8OddPseudo
,
3687 ARM::VLD2DUPq16OddPseudo
,
3688 ARM::VLD2DUPq32OddPseudo
};
3689 SelectVLDDup(N
, /* IsIntrinsic= */ true, false, 2,
3690 DOpcodes
, QOpcodes0
, QOpcodes1
);
3694 case Intrinsic::arm_neon_vld3dup
: {
3695 static const uint16_t DOpcodes
[] = { ARM::VLD3DUPd8Pseudo
,
3696 ARM::VLD3DUPd16Pseudo
,
3697 ARM::VLD3DUPd32Pseudo
,
3698 ARM::VLD1d64TPseudo
};
3699 static const uint16_t QOpcodes0
[] = { ARM::VLD3DUPq8EvenPseudo
,
3700 ARM::VLD3DUPq16EvenPseudo
,
3701 ARM::VLD3DUPq32EvenPseudo
};
3702 static const uint16_t QOpcodes1
[] = { ARM::VLD3DUPq8OddPseudo
,
3703 ARM::VLD3DUPq16OddPseudo
,
3704 ARM::VLD3DUPq32OddPseudo
};
3705 SelectVLDDup(N
, /* IsIntrinsic= */ true, false, 3,
3706 DOpcodes
, QOpcodes0
, QOpcodes1
);
3710 case Intrinsic::arm_neon_vld4dup
: {
3711 static const uint16_t DOpcodes
[] = { ARM::VLD4DUPd8Pseudo
,
3712 ARM::VLD4DUPd16Pseudo
,
3713 ARM::VLD4DUPd32Pseudo
,
3714 ARM::VLD1d64QPseudo
};
3715 static const uint16_t QOpcodes0
[] = { ARM::VLD4DUPq8EvenPseudo
,
3716 ARM::VLD4DUPq16EvenPseudo
,
3717 ARM::VLD4DUPq32EvenPseudo
};
3718 static const uint16_t QOpcodes1
[] = { ARM::VLD4DUPq8OddPseudo
,
3719 ARM::VLD4DUPq16OddPseudo
,
3720 ARM::VLD4DUPq32OddPseudo
};
3721 SelectVLDDup(N
, /* IsIntrinsic= */ true, false, 4,
3722 DOpcodes
, QOpcodes0
, QOpcodes1
);
3726 case Intrinsic::arm_neon_vld2lane
: {
3727 static const uint16_t DOpcodes
[] = { ARM::VLD2LNd8Pseudo
,
3728 ARM::VLD2LNd16Pseudo
,
3729 ARM::VLD2LNd32Pseudo
};
3730 static const uint16_t QOpcodes
[] = { ARM::VLD2LNq16Pseudo
,
3731 ARM::VLD2LNq32Pseudo
};
3732 SelectVLDSTLane(N
, true, false, 2, DOpcodes
, QOpcodes
);
3736 case Intrinsic::arm_neon_vld3lane
: {
3737 static const uint16_t DOpcodes
[] = { ARM::VLD3LNd8Pseudo
,
3738 ARM::VLD3LNd16Pseudo
,
3739 ARM::VLD3LNd32Pseudo
};
3740 static const uint16_t QOpcodes
[] = { ARM::VLD3LNq16Pseudo
,
3741 ARM::VLD3LNq32Pseudo
};
3742 SelectVLDSTLane(N
, true, false, 3, DOpcodes
, QOpcodes
);
3746 case Intrinsic::arm_neon_vld4lane
: {
3747 static const uint16_t DOpcodes
[] = { ARM::VLD4LNd8Pseudo
,
3748 ARM::VLD4LNd16Pseudo
,
3749 ARM::VLD4LNd32Pseudo
};
3750 static const uint16_t QOpcodes
[] = { ARM::VLD4LNq16Pseudo
,
3751 ARM::VLD4LNq32Pseudo
};
3752 SelectVLDSTLane(N
, true, false, 4, DOpcodes
, QOpcodes
);
3756 case Intrinsic::arm_neon_vst1
: {
3757 static const uint16_t DOpcodes
[] = { ARM::VST1d8
, ARM::VST1d16
,
3758 ARM::VST1d32
, ARM::VST1d64
};
3759 static const uint16_t QOpcodes
[] = { ARM::VST1q8
, ARM::VST1q16
,
3760 ARM::VST1q32
, ARM::VST1q64
};
3761 SelectVST(N
, false, 1, DOpcodes
, QOpcodes
, nullptr);
3765 case Intrinsic::arm_neon_vst1x2
: {
3766 static const uint16_t DOpcodes
[] = { ARM::VST1q8
, ARM::VST1q16
,
3767 ARM::VST1q32
, ARM::VST1q64
};
3768 static const uint16_t QOpcodes
[] = { ARM::VST1d8QPseudo
,
3769 ARM::VST1d16QPseudo
,
3770 ARM::VST1d32QPseudo
,
3771 ARM::VST1d64QPseudo
};
3772 SelectVST(N
, false, 2, DOpcodes
, QOpcodes
, nullptr);
3776 case Intrinsic::arm_neon_vst1x3
: {
3777 static const uint16_t DOpcodes
[] = { ARM::VST1d8TPseudo
,
3778 ARM::VST1d16TPseudo
,
3779 ARM::VST1d32TPseudo
,
3780 ARM::VST1d64TPseudo
};
3781 static const uint16_t QOpcodes0
[] = { ARM::VST1q8LowTPseudo_UPD
,
3782 ARM::VST1q16LowTPseudo_UPD
,
3783 ARM::VST1q32LowTPseudo_UPD
,
3784 ARM::VST1q64LowTPseudo_UPD
};
3785 static const uint16_t QOpcodes1
[] = { ARM::VST1q8HighTPseudo
,
3786 ARM::VST1q16HighTPseudo
,
3787 ARM::VST1q32HighTPseudo
,
3788 ARM::VST1q64HighTPseudo
};
3789 SelectVST(N
, false, 3, DOpcodes
, QOpcodes0
, QOpcodes1
);
3793 case Intrinsic::arm_neon_vst1x4
: {
3794 static const uint16_t DOpcodes
[] = { ARM::VST1d8QPseudo
,
3795 ARM::VST1d16QPseudo
,
3796 ARM::VST1d32QPseudo
,
3797 ARM::VST1d64QPseudo
};
3798 static const uint16_t QOpcodes0
[] = { ARM::VST1q8LowQPseudo_UPD
,
3799 ARM::VST1q16LowQPseudo_UPD
,
3800 ARM::VST1q32LowQPseudo_UPD
,
3801 ARM::VST1q64LowQPseudo_UPD
};
3802 static const uint16_t QOpcodes1
[] = { ARM::VST1q8HighQPseudo
,
3803 ARM::VST1q16HighQPseudo
,
3804 ARM::VST1q32HighQPseudo
,
3805 ARM::VST1q64HighQPseudo
};
3806 SelectVST(N
, false, 4, DOpcodes
, QOpcodes0
, QOpcodes1
);
3810 case Intrinsic::arm_neon_vst2
: {
3811 static const uint16_t DOpcodes
[] = { ARM::VST2d8
, ARM::VST2d16
,
3812 ARM::VST2d32
, ARM::VST1q64
};
3813 static const uint16_t QOpcodes
[] = { ARM::VST2q8Pseudo
, ARM::VST2q16Pseudo
,
3814 ARM::VST2q32Pseudo
};
3815 SelectVST(N
, false, 2, DOpcodes
, QOpcodes
, nullptr);
3819 case Intrinsic::arm_neon_vst3
: {
3820 static const uint16_t DOpcodes
[] = { ARM::VST3d8Pseudo
,
3823 ARM::VST1d64TPseudo
};
3824 static const uint16_t QOpcodes0
[] = { ARM::VST3q8Pseudo_UPD
,
3825 ARM::VST3q16Pseudo_UPD
,
3826 ARM::VST3q32Pseudo_UPD
};
3827 static const uint16_t QOpcodes1
[] = { ARM::VST3q8oddPseudo
,
3828 ARM::VST3q16oddPseudo
,
3829 ARM::VST3q32oddPseudo
};
3830 SelectVST(N
, false, 3, DOpcodes
, QOpcodes0
, QOpcodes1
);
3834 case Intrinsic::arm_neon_vst4
: {
3835 static const uint16_t DOpcodes
[] = { ARM::VST4d8Pseudo
,
3838 ARM::VST1d64QPseudo
};
3839 static const uint16_t QOpcodes0
[] = { ARM::VST4q8Pseudo_UPD
,
3840 ARM::VST4q16Pseudo_UPD
,
3841 ARM::VST4q32Pseudo_UPD
};
3842 static const uint16_t QOpcodes1
[] = { ARM::VST4q8oddPseudo
,
3843 ARM::VST4q16oddPseudo
,
3844 ARM::VST4q32oddPseudo
};
3845 SelectVST(N
, false, 4, DOpcodes
, QOpcodes0
, QOpcodes1
);
3849 case Intrinsic::arm_neon_vst2lane
: {
3850 static const uint16_t DOpcodes
[] = { ARM::VST2LNd8Pseudo
,
3851 ARM::VST2LNd16Pseudo
,
3852 ARM::VST2LNd32Pseudo
};
3853 static const uint16_t QOpcodes
[] = { ARM::VST2LNq16Pseudo
,
3854 ARM::VST2LNq32Pseudo
};
3855 SelectVLDSTLane(N
, false, false, 2, DOpcodes
, QOpcodes
);
3859 case Intrinsic::arm_neon_vst3lane
: {
3860 static const uint16_t DOpcodes
[] = { ARM::VST3LNd8Pseudo
,
3861 ARM::VST3LNd16Pseudo
,
3862 ARM::VST3LNd32Pseudo
};
3863 static const uint16_t QOpcodes
[] = { ARM::VST3LNq16Pseudo
,
3864 ARM::VST3LNq32Pseudo
};
3865 SelectVLDSTLane(N
, false, false, 3, DOpcodes
, QOpcodes
);
3869 case Intrinsic::arm_neon_vst4lane
: {
3870 static const uint16_t DOpcodes
[] = { ARM::VST4LNd8Pseudo
,
3871 ARM::VST4LNd16Pseudo
,
3872 ARM::VST4LNd32Pseudo
};
3873 static const uint16_t QOpcodes
[] = { ARM::VST4LNq16Pseudo
,
3874 ARM::VST4LNq32Pseudo
};
3875 SelectVLDSTLane(N
, false, false, 4, DOpcodes
, QOpcodes
);
3882 case ISD::ATOMIC_CMP_SWAP
:
3890 // Inspect a register string of the form
3891 // cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or
3892 // cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string
3893 // and obtain the integer operands from them, adding these operands to the
3895 static void getIntOperandsFromRegisterString(StringRef RegString
,
3896 SelectionDAG
*CurDAG
,
3898 std::vector
<SDValue
> &Ops
) {
3899 SmallVector
<StringRef
, 5> Fields
;
3900 RegString
.split(Fields
, ':');
3902 if (Fields
.size() > 1) {
3903 bool AllIntFields
= true;
3905 for (StringRef Field
: Fields
) {
3906 // Need to trim out leading 'cp' characters and get the integer field.
3908 AllIntFields
&= !Field
.trim("CPcp").getAsInteger(10, IntField
);
3909 Ops
.push_back(CurDAG
->getTargetConstant(IntField
, DL
, MVT::i32
));
3912 assert(AllIntFields
&&
3913 "Unexpected non-integer value in special register string.");
3917 // Maps a Banked Register string to its mask value. The mask value returned is
3918 // for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register
3919 // mask operand, which expresses which register is to be used, e.g. r8, and in
3920 // which mode it is to be used, e.g. usr. Returns -1 to signify that the string
3922 static inline int getBankedRegisterMask(StringRef RegString
) {
3923 auto TheReg
= ARMBankedReg::lookupBankedRegByName(RegString
.lower());
3926 return TheReg
->Encoding
;
3929 // The flags here are common to those allowed for apsr in the A class cores and
3930 // those allowed for the special registers in the M class cores. Returns a
3931 // value representing which flags were present, -1 if invalid.
3932 static inline int getMClassFlagsMask(StringRef Flags
) {
3933 return StringSwitch
<int>(Flags
)
3934 .Case("", 0x2) // no flags means nzcvq for psr registers, and 0x2 is
3935 // correct when flags are not permitted
3938 .Case("nzcvqg", 0x3)
3942 // Maps MClass special registers string to its value for use in the
3943 // t2MRS_M/t2MSR_M instruction nodes as the SYSm value operand.
3944 // Returns -1 to signify that the string was invalid.
3945 static int getMClassRegisterMask(StringRef Reg
, const ARMSubtarget
*Subtarget
) {
3946 auto TheReg
= ARMSysReg::lookupMClassSysRegByName(Reg
);
3947 const FeatureBitset
&FeatureBits
= Subtarget
->getFeatureBits();
3948 if (!TheReg
|| !TheReg
->hasRequiredFeatures(FeatureBits
))
3950 return (int)(TheReg
->Encoding
& 0xFFF); // SYSm value
3953 static int getARClassRegisterMask(StringRef Reg
, StringRef Flags
) {
3954 // The mask operand contains the special register (R Bit) in bit 4, whether
3955 // the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and
3956 // bits 3-0 contains the fields to be accessed in the special register, set by
3957 // the flags provided with the register.
3959 if (Reg
== "apsr") {
3960 // The flags permitted for apsr are the same flags that are allowed in
3961 // M class registers. We get the flag value and then shift the flags into
3962 // the correct place to combine with the mask.
3963 Mask
= getMClassFlagsMask(Flags
);
3969 if (Reg
!= "cpsr" && Reg
!= "spsr") {
3973 // This is the same as if the flags were "fc"
3974 if (Flags
.empty() || Flags
== "all")
3977 // Inspect the supplied flags string and set the bits in the mask for
3978 // the relevant and valid flags allowed for cpsr and spsr.
3979 for (char Flag
: Flags
) {
3998 // This avoids allowing strings where the same flag bit appears twice.
3999 if (!FlagVal
|| (Mask
& FlagVal
))
4004 // If the register is spsr then we need to set the R bit.
4011 // Lower the read_register intrinsic to ARM specific DAG nodes
4012 // using the supplied metadata string to select the instruction node to use
4013 // and the registers/masks to construct as operands for the node.
4014 bool ARMDAGToDAGISel::tryReadRegister(SDNode
*N
){
4015 const MDNodeSDNode
*MD
= dyn_cast
<MDNodeSDNode
>(N
->getOperand(1));
4016 const MDString
*RegString
= dyn_cast
<MDString
>(MD
->getMD()->getOperand(0));
4017 bool IsThumb2
= Subtarget
->isThumb2();
4020 std::vector
<SDValue
> Ops
;
4021 getIntOperandsFromRegisterString(RegString
->getString(), CurDAG
, DL
, Ops
);
4024 // If the special register string was constructed of fields (as defined
4025 // in the ACLE) then need to lower to MRC node (32 bit) or
4026 // MRRC node(64 bit), we can make the distinction based on the number of
4027 // operands we have.
4029 SmallVector
<EVT
, 3> ResTypes
;
4030 if (Ops
.size() == 5){
4031 Opcode
= IsThumb2
? ARM::t2MRC
: ARM::MRC
;
4032 ResTypes
.append({ MVT::i32
, MVT::Other
});
4034 assert(Ops
.size() == 3 &&
4035 "Invalid number of fields in special register string.");
4036 Opcode
= IsThumb2
? ARM::t2MRRC
: ARM::MRRC
;
4037 ResTypes
.append({ MVT::i32
, MVT::i32
, MVT::Other
});
4040 Ops
.push_back(getAL(CurDAG
, DL
));
4041 Ops
.push_back(CurDAG
->getRegister(0, MVT::i32
));
4042 Ops
.push_back(N
->getOperand(0));
4043 ReplaceNode(N
, CurDAG
->getMachineNode(Opcode
, DL
, ResTypes
, Ops
));
4047 std::string SpecialReg
= RegString
->getString().lower();
4049 int BankedReg
= getBankedRegisterMask(SpecialReg
);
4050 if (BankedReg
!= -1) {
4051 Ops
= { CurDAG
->getTargetConstant(BankedReg
, DL
, MVT::i32
),
4052 getAL(CurDAG
, DL
), CurDAG
->getRegister(0, MVT::i32
),
4055 N
, CurDAG
->getMachineNode(IsThumb2
? ARM::t2MRSbanked
: ARM::MRSbanked
,
4056 DL
, MVT::i32
, MVT::Other
, Ops
));
4060 // The VFP registers are read by creating SelectionDAG nodes with opcodes
4061 // corresponding to the register that is being read from. So we switch on the
4062 // string to find which opcode we need to use.
4063 unsigned Opcode
= StringSwitch
<unsigned>(SpecialReg
)
4064 .Case("fpscr", ARM::VMRS
)
4065 .Case("fpexc", ARM::VMRS_FPEXC
)
4066 .Case("fpsid", ARM::VMRS_FPSID
)
4067 .Case("mvfr0", ARM::VMRS_MVFR0
)
4068 .Case("mvfr1", ARM::VMRS_MVFR1
)
4069 .Case("mvfr2", ARM::VMRS_MVFR2
)
4070 .Case("fpinst", ARM::VMRS_FPINST
)
4071 .Case("fpinst2", ARM::VMRS_FPINST2
)
4074 // If an opcode was found then we can lower the read to a VFP instruction.
4076 if (!Subtarget
->hasVFP2Base())
4078 if (Opcode
== ARM::VMRS_MVFR2
&& !Subtarget
->hasFPARMv8Base())
4081 Ops
= { getAL(CurDAG
, DL
), CurDAG
->getRegister(0, MVT::i32
),
4084 CurDAG
->getMachineNode(Opcode
, DL
, MVT::i32
, MVT::Other
, Ops
));
4088 // If the target is M Class then need to validate that the register string
4089 // is an acceptable value, so check that a mask can be constructed from the
4091 if (Subtarget
->isMClass()) {
4092 int SYSmValue
= getMClassRegisterMask(SpecialReg
, Subtarget
);
4093 if (SYSmValue
== -1)
4096 SDValue Ops
[] = { CurDAG
->getTargetConstant(SYSmValue
, DL
, MVT::i32
),
4097 getAL(CurDAG
, DL
), CurDAG
->getRegister(0, MVT::i32
),
4100 N
, CurDAG
->getMachineNode(ARM::t2MRS_M
, DL
, MVT::i32
, MVT::Other
, Ops
));
4104 // Here we know the target is not M Class so we need to check if it is one
4105 // of the remaining possible values which are apsr, cpsr or spsr.
4106 if (SpecialReg
== "apsr" || SpecialReg
== "cpsr") {
4107 Ops
= { getAL(CurDAG
, DL
), CurDAG
->getRegister(0, MVT::i32
),
4109 ReplaceNode(N
, CurDAG
->getMachineNode(IsThumb2
? ARM::t2MRS_AR
: ARM::MRS
,
4110 DL
, MVT::i32
, MVT::Other
, Ops
));
4114 if (SpecialReg
== "spsr") {
4115 Ops
= { getAL(CurDAG
, DL
), CurDAG
->getRegister(0, MVT::i32
),
4118 N
, CurDAG
->getMachineNode(IsThumb2
? ARM::t2MRSsys_AR
: ARM::MRSsys
, DL
,
4119 MVT::i32
, MVT::Other
, Ops
));
4126 // Lower the write_register intrinsic to ARM specific DAG nodes
4127 // using the supplied metadata string to select the instruction node to use
4128 // and the registers/masks to use in the nodes
4129 bool ARMDAGToDAGISel::tryWriteRegister(SDNode
*N
){
4130 const MDNodeSDNode
*MD
= dyn_cast
<MDNodeSDNode
>(N
->getOperand(1));
4131 const MDString
*RegString
= dyn_cast
<MDString
>(MD
->getMD()->getOperand(0));
4132 bool IsThumb2
= Subtarget
->isThumb2();
4135 std::vector
<SDValue
> Ops
;
4136 getIntOperandsFromRegisterString(RegString
->getString(), CurDAG
, DL
, Ops
);
4139 // If the special register string was constructed of fields (as defined
4140 // in the ACLE) then need to lower to MCR node (32 bit) or
4141 // MCRR node(64 bit), we can make the distinction based on the number of
4142 // operands we have.
4144 if (Ops
.size() == 5) {
4145 Opcode
= IsThumb2
? ARM::t2MCR
: ARM::MCR
;
4146 Ops
.insert(Ops
.begin()+2, N
->getOperand(2));
4148 assert(Ops
.size() == 3 &&
4149 "Invalid number of fields in special register string.");
4150 Opcode
= IsThumb2
? ARM::t2MCRR
: ARM::MCRR
;
4151 SDValue WriteValue
[] = { N
->getOperand(2), N
->getOperand(3) };
4152 Ops
.insert(Ops
.begin()+2, WriteValue
, WriteValue
+2);
4155 Ops
.push_back(getAL(CurDAG
, DL
));
4156 Ops
.push_back(CurDAG
->getRegister(0, MVT::i32
));
4157 Ops
.push_back(N
->getOperand(0));
4159 ReplaceNode(N
, CurDAG
->getMachineNode(Opcode
, DL
, MVT::Other
, Ops
));
4163 std::string SpecialReg
= RegString
->getString().lower();
4164 int BankedReg
= getBankedRegisterMask(SpecialReg
);
4165 if (BankedReg
!= -1) {
4166 Ops
= { CurDAG
->getTargetConstant(BankedReg
, DL
, MVT::i32
), N
->getOperand(2),
4167 getAL(CurDAG
, DL
), CurDAG
->getRegister(0, MVT::i32
),
4170 N
, CurDAG
->getMachineNode(IsThumb2
? ARM::t2MSRbanked
: ARM::MSRbanked
,
4171 DL
, MVT::Other
, Ops
));
4175 // The VFP registers are written to by creating SelectionDAG nodes with
4176 // opcodes corresponding to the register that is being written. So we switch
4177 // on the string to find which opcode we need to use.
4178 unsigned Opcode
= StringSwitch
<unsigned>(SpecialReg
)
4179 .Case("fpscr", ARM::VMSR
)
4180 .Case("fpexc", ARM::VMSR_FPEXC
)
4181 .Case("fpsid", ARM::VMSR_FPSID
)
4182 .Case("fpinst", ARM::VMSR_FPINST
)
4183 .Case("fpinst2", ARM::VMSR_FPINST2
)
4187 if (!Subtarget
->hasVFP2Base())
4189 Ops
= { N
->getOperand(2), getAL(CurDAG
, DL
),
4190 CurDAG
->getRegister(0, MVT::i32
), N
->getOperand(0) };
4191 ReplaceNode(N
, CurDAG
->getMachineNode(Opcode
, DL
, MVT::Other
, Ops
));
4195 std::pair
<StringRef
, StringRef
> Fields
;
4196 Fields
= StringRef(SpecialReg
).rsplit('_');
4197 std::string Reg
= Fields
.first
.str();
4198 StringRef Flags
= Fields
.second
;
4200 // If the target was M Class then need to validate the special register value
4201 // and retrieve the mask for use in the instruction node.
4202 if (Subtarget
->isMClass()) {
4203 int SYSmValue
= getMClassRegisterMask(SpecialReg
, Subtarget
);
4204 if (SYSmValue
== -1)
4207 SDValue Ops
[] = { CurDAG
->getTargetConstant(SYSmValue
, DL
, MVT::i32
),
4208 N
->getOperand(2), getAL(CurDAG
, DL
),
4209 CurDAG
->getRegister(0, MVT::i32
), N
->getOperand(0) };
4210 ReplaceNode(N
, CurDAG
->getMachineNode(ARM::t2MSR_M
, DL
, MVT::Other
, Ops
));
4214 // We then check to see if a valid mask can be constructed for one of the
4215 // register string values permitted for the A and R class cores. These values
4216 // are apsr, spsr and cpsr; these are also valid on older cores.
4217 int Mask
= getARClassRegisterMask(Reg
, Flags
);
4219 Ops
= { CurDAG
->getTargetConstant(Mask
, DL
, MVT::i32
), N
->getOperand(2),
4220 getAL(CurDAG
, DL
), CurDAG
->getRegister(0, MVT::i32
),
4222 ReplaceNode(N
, CurDAG
->getMachineNode(IsThumb2
? ARM::t2MSR_AR
: ARM::MSR
,
4223 DL
, MVT::Other
, Ops
));
4230 bool ARMDAGToDAGISel::tryInlineAsm(SDNode
*N
){
4231 std::vector
<SDValue
> AsmNodeOperands
;
4232 unsigned Flag
, Kind
;
4233 bool Changed
= false;
4234 unsigned NumOps
= N
->getNumOperands();
4236 // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint.
4237 // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require
4238 // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs
4239 // respectively. Since there is no constraint to explicitly specify a
4240 // reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb,
4241 // the 64-bit data may be referred by H, Q, R modifiers, so we still pack
4242 // them into a GPRPair.
4245 SDValue Glue
= N
->getGluedNode() ? N
->getOperand(NumOps
-1)
4246 : SDValue(nullptr,0);
4248 SmallVector
<bool, 8> OpChanged
;
4249 // Glue node will be appended late.
4250 for(unsigned i
= 0, e
= N
->getGluedNode() ? NumOps
- 1 : NumOps
; i
< e
; ++i
) {
4251 SDValue op
= N
->getOperand(i
);
4252 AsmNodeOperands
.push_back(op
);
4254 if (i
< InlineAsm::Op_FirstOperand
)
4257 if (ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(N
->getOperand(i
))) {
4258 Flag
= C
->getZExtValue();
4259 Kind
= InlineAsm::getKind(Flag
);
4264 // Immediate operands to inline asm in the SelectionDAG are modeled with
4265 // two operands. The first is a constant of value InlineAsm::Kind_Imm, and
4266 // the second is a constant with the value of the immediate. If we get here
4267 // and we have a Kind_Imm, skip the next operand, and continue.
4268 if (Kind
== InlineAsm::Kind_Imm
) {
4269 SDValue op
= N
->getOperand(++i
);
4270 AsmNodeOperands
.push_back(op
);
4274 unsigned NumRegs
= InlineAsm::getNumOperandRegisters(Flag
);
4276 OpChanged
.push_back(false);
4278 unsigned DefIdx
= 0;
4279 bool IsTiedToChangedOp
= false;
4280 // If it's a use that is tied with a previous def, it has no
4281 // reg class constraint.
4282 if (Changed
&& InlineAsm::isUseOperandTiedToDef(Flag
, DefIdx
))
4283 IsTiedToChangedOp
= OpChanged
[DefIdx
];
4285 // Memory operands to inline asm in the SelectionDAG are modeled with two
4286 // operands: a constant of value InlineAsm::Kind_Mem followed by the input
4287 // operand. If we get here and we have a Kind_Mem, skip the next operand (so
4288 // it doesn't get misinterpreted), and continue. We do this here because
4289 // it's important to update the OpChanged array correctly before moving on.
4290 if (Kind
== InlineAsm::Kind_Mem
) {
4291 SDValue op
= N
->getOperand(++i
);
4292 AsmNodeOperands
.push_back(op
);
4296 if (Kind
!= InlineAsm::Kind_RegUse
&& Kind
!= InlineAsm::Kind_RegDef
4297 && Kind
!= InlineAsm::Kind_RegDefEarlyClobber
)
4301 bool HasRC
= InlineAsm::hasRegClassConstraint(Flag
, RC
);
4302 if ((!IsTiedToChangedOp
&& (!HasRC
|| RC
!= ARM::GPRRegClassID
))
4306 assert((i
+2 < NumOps
) && "Invalid number of operands in inline asm");
4307 SDValue V0
= N
->getOperand(i
+1);
4308 SDValue V1
= N
->getOperand(i
+2);
4309 unsigned Reg0
= cast
<RegisterSDNode
>(V0
)->getReg();
4310 unsigned Reg1
= cast
<RegisterSDNode
>(V1
)->getReg();
4312 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
4314 if (Kind
== InlineAsm::Kind_RegDef
||
4315 Kind
== InlineAsm::Kind_RegDefEarlyClobber
) {
4316 // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to
4317 // the original GPRs.
4319 unsigned GPVR
= MRI
.createVirtualRegister(&ARM::GPRPairRegClass
);
4320 PairedReg
= CurDAG
->getRegister(GPVR
, MVT::Untyped
);
4321 SDValue Chain
= SDValue(N
,0);
4323 SDNode
*GU
= N
->getGluedUser();
4324 SDValue RegCopy
= CurDAG
->getCopyFromReg(Chain
, dl
, GPVR
, MVT::Untyped
,
4327 // Extract values from a GPRPair reg and copy to the original GPR reg.
4328 SDValue Sub0
= CurDAG
->getTargetExtractSubreg(ARM::gsub_0
, dl
, MVT::i32
,
4330 SDValue Sub1
= CurDAG
->getTargetExtractSubreg(ARM::gsub_1
, dl
, MVT::i32
,
4332 SDValue T0
= CurDAG
->getCopyToReg(Sub0
, dl
, Reg0
, Sub0
,
4333 RegCopy
.getValue(1));
4334 SDValue T1
= CurDAG
->getCopyToReg(Sub1
, dl
, Reg1
, Sub1
, T0
.getValue(1));
4336 // Update the original glue user.
4337 std::vector
<SDValue
> Ops(GU
->op_begin(), GU
->op_end()-1);
4338 Ops
.push_back(T1
.getValue(1));
4339 CurDAG
->UpdateNodeOperands(GU
, Ops
);
4342 // For Kind == InlineAsm::Kind_RegUse, we first copy two GPRs into a
4343 // GPRPair and then pass the GPRPair to the inline asm.
4344 SDValue Chain
= AsmNodeOperands
[InlineAsm::Op_InputChain
];
4346 // As REG_SEQ doesn't take RegisterSDNode, we copy them first.
4347 SDValue T0
= CurDAG
->getCopyFromReg(Chain
, dl
, Reg0
, MVT::i32
,
4349 SDValue T1
= CurDAG
->getCopyFromReg(Chain
, dl
, Reg1
, MVT::i32
,
4351 SDValue Pair
= SDValue(createGPRPairNode(MVT::Untyped
, T0
, T1
), 0);
4353 // Copy REG_SEQ into a GPRPair-typed VR and replace the original two
4354 // i32 VRs of inline asm with it.
4355 unsigned GPVR
= MRI
.createVirtualRegister(&ARM::GPRPairRegClass
);
4356 PairedReg
= CurDAG
->getRegister(GPVR
, MVT::Untyped
);
4357 Chain
= CurDAG
->getCopyToReg(T1
, dl
, GPVR
, Pair
, T1
.getValue(1));
4359 AsmNodeOperands
[InlineAsm::Op_InputChain
] = Chain
;
4360 Glue
= Chain
.getValue(1);
4365 if(PairedReg
.getNode()) {
4366 OpChanged
[OpChanged
.size() -1 ] = true;
4367 Flag
= InlineAsm::getFlagWord(Kind
, 1 /* RegNum*/);
4368 if (IsTiedToChangedOp
)
4369 Flag
= InlineAsm::getFlagWordForMatchingOp(Flag
, DefIdx
);
4371 Flag
= InlineAsm::getFlagWordForRegClass(Flag
, ARM::GPRPairRegClassID
);
4372 // Replace the current flag.
4373 AsmNodeOperands
[AsmNodeOperands
.size() -1] = CurDAG
->getTargetConstant(
4374 Flag
, dl
, MVT::i32
);
4375 // Add the new register node and skip the original two GPRs.
4376 AsmNodeOperands
.push_back(PairedReg
);
4377 // Skip the next two GPRs.
4383 AsmNodeOperands
.push_back(Glue
);
4387 SDValue New
= CurDAG
->getNode(N
->getOpcode(), SDLoc(N
),
4388 CurDAG
->getVTList(MVT::Other
, MVT::Glue
), AsmNodeOperands
);
4390 ReplaceNode(N
, New
.getNode());
4395 bool ARMDAGToDAGISel::
4396 SelectInlineAsmMemoryOperand(const SDValue
&Op
, unsigned ConstraintID
,
4397 std::vector
<SDValue
> &OutOps
) {
4398 switch(ConstraintID
) {
4400 llvm_unreachable("Unexpected asm memory constraint");
4401 case InlineAsm::Constraint_i
:
4402 // FIXME: It seems strange that 'i' is needed here since it's supposed to
4403 // be an immediate and not a memory constraint.
4405 case InlineAsm::Constraint_m
:
4406 case InlineAsm::Constraint_o
:
4407 case InlineAsm::Constraint_Q
:
4408 case InlineAsm::Constraint_Um
:
4409 case InlineAsm::Constraint_Un
:
4410 case InlineAsm::Constraint_Uq
:
4411 case InlineAsm::Constraint_Us
:
4412 case InlineAsm::Constraint_Ut
:
4413 case InlineAsm::Constraint_Uv
:
4414 case InlineAsm::Constraint_Uy
:
4415 // Require the address to be in a register. That is safe for all ARM
4416 // variants and it is hard to do anything much smarter without knowing
4417 // how the operand is used.
4418 OutOps
.push_back(Op
);
4424 /// createARMISelDag - This pass converts a legalized DAG into a
4425 /// ARM-specific DAG, ready for instruction scheduling.
4427 FunctionPass
*llvm::createARMISelDag(ARMBaseTargetMachine
&TM
,
4428 CodeGenOpt::Level OptLevel
) {
4429 return new ARMDAGToDAGISel(TM
, OptLevel
);