1 //===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file defines an instruction selector for the ARM target.
11 //===----------------------------------------------------------------------===//
14 #include "ARMBaseInstrInfo.h"
15 #include "ARMTargetMachine.h"
16 #include "MCTargetDesc/ARMAddressingModes.h"
17 #include "Utils/ARMBaseInfo.h"
18 #include "llvm/ADT/StringSwitch.h"
19 #include "llvm/CodeGen/MachineFrameInfo.h"
20 #include "llvm/CodeGen/MachineFunction.h"
21 #include "llvm/CodeGen/MachineInstrBuilder.h"
22 #include "llvm/CodeGen/MachineRegisterInfo.h"
23 #include "llvm/CodeGen/SelectionDAG.h"
24 #include "llvm/CodeGen/SelectionDAGISel.h"
25 #include "llvm/CodeGen/TargetLowering.h"
26 #include "llvm/IR/CallingConv.h"
27 #include "llvm/IR/Constants.h"
28 #include "llvm/IR/DerivedTypes.h"
29 #include "llvm/IR/Function.h"
30 #include "llvm/IR/Intrinsics.h"
31 #include "llvm/IR/LLVMContext.h"
32 #include "llvm/Support/CommandLine.h"
33 #include "llvm/Support/Debug.h"
34 #include "llvm/Support/ErrorHandling.h"
35 #include "llvm/Target/TargetOptions.h"
39 #define DEBUG_TYPE "arm-isel"
42 DisableShifterOp("disable-shifter-op", cl::Hidden
,
43 cl::desc("Disable isel of shifter-op"),
46 //===--------------------------------------------------------------------===//
47 /// ARMDAGToDAGISel - ARM specific code to select ARM machine
48 /// instructions for SelectionDAG operations.
52 class ARMDAGToDAGISel
: public SelectionDAGISel
{
53 /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
54 /// make the right decision when generating code for different targets.
55 const ARMSubtarget
*Subtarget
;
58 explicit ARMDAGToDAGISel(ARMBaseTargetMachine
&tm
, CodeGenOpt::Level OptLevel
)
59 : SelectionDAGISel(tm
, OptLevel
) {}
61 bool runOnMachineFunction(MachineFunction
&MF
) override
{
62 // Reset the subtarget each time through.
63 Subtarget
= &MF
.getSubtarget
<ARMSubtarget
>();
64 SelectionDAGISel::runOnMachineFunction(MF
);
68 StringRef
getPassName() const override
{ return "ARM Instruction Selection"; }
70 void PreprocessISelDAG() override
;
72 /// getI32Imm - Return a target constant of type i32 with the specified
74 inline SDValue
getI32Imm(unsigned Imm
, const SDLoc
&dl
) {
75 return CurDAG
->getTargetConstant(Imm
, dl
, MVT::i32
);
78 void Select(SDNode
*N
) override
;
80 bool hasNoVMLxHazardUse(SDNode
*N
) const;
81 bool isShifterOpProfitable(const SDValue
&Shift
,
82 ARM_AM::ShiftOpc ShOpcVal
, unsigned ShAmt
);
83 bool SelectRegShifterOperand(SDValue N
, SDValue
&A
,
84 SDValue
&B
, SDValue
&C
,
85 bool CheckProfitability
= true);
86 bool SelectImmShifterOperand(SDValue N
, SDValue
&A
,
87 SDValue
&B
, bool CheckProfitability
= true);
88 bool SelectShiftRegShifterOperand(SDValue N
, SDValue
&A
,
89 SDValue
&B
, SDValue
&C
) {
90 // Don't apply the profitability check
91 return SelectRegShifterOperand(N
, A
, B
, C
, false);
93 bool SelectShiftImmShifterOperand(SDValue N
, SDValue
&A
,
95 // Don't apply the profitability check
96 return SelectImmShifterOperand(N
, A
, B
, false);
99 bool SelectAddLikeOr(SDNode
*Parent
, SDValue N
, SDValue
&Out
);
101 bool SelectAddrModeImm12(SDValue N
, SDValue
&Base
, SDValue
&OffImm
);
102 bool SelectLdStSOReg(SDValue N
, SDValue
&Base
, SDValue
&Offset
, SDValue
&Opc
);
104 bool SelectCMOVPred(SDValue N
, SDValue
&Pred
, SDValue
&Reg
) {
105 const ConstantSDNode
*CN
= cast
<ConstantSDNode
>(N
);
106 Pred
= CurDAG
->getTargetConstant(CN
->getZExtValue(), SDLoc(N
), MVT::i32
);
107 Reg
= CurDAG
->getRegister(ARM::CPSR
, MVT::i32
);
111 bool SelectAddrMode2OffsetReg(SDNode
*Op
, SDValue N
,
112 SDValue
&Offset
, SDValue
&Opc
);
113 bool SelectAddrMode2OffsetImm(SDNode
*Op
, SDValue N
,
114 SDValue
&Offset
, SDValue
&Opc
);
115 bool SelectAddrMode2OffsetImmPre(SDNode
*Op
, SDValue N
,
116 SDValue
&Offset
, SDValue
&Opc
);
117 bool SelectAddrOffsetNone(SDValue N
, SDValue
&Base
);
118 bool SelectAddrMode3(SDValue N
, SDValue
&Base
,
119 SDValue
&Offset
, SDValue
&Opc
);
120 bool SelectAddrMode3Offset(SDNode
*Op
, SDValue N
,
121 SDValue
&Offset
, SDValue
&Opc
);
122 bool IsAddressingMode5(SDValue N
, SDValue
&Base
, SDValue
&Offset
, bool FP16
);
123 bool SelectAddrMode5(SDValue N
, SDValue
&Base
, SDValue
&Offset
);
124 bool SelectAddrMode5FP16(SDValue N
, SDValue
&Base
, SDValue
&Offset
);
125 bool SelectAddrMode6(SDNode
*Parent
, SDValue N
, SDValue
&Addr
,SDValue
&Align
);
126 bool SelectAddrMode6Offset(SDNode
*Op
, SDValue N
, SDValue
&Offset
);
128 bool SelectAddrModePC(SDValue N
, SDValue
&Offset
, SDValue
&Label
);
130 // Thumb Addressing Modes:
131 bool SelectThumbAddrModeRR(SDValue N
, SDValue
&Base
, SDValue
&Offset
);
132 bool SelectThumbAddrModeRRSext(SDValue N
, SDValue
&Base
, SDValue
&Offset
);
133 bool SelectThumbAddrModeImm5S(SDValue N
, unsigned Scale
, SDValue
&Base
,
135 bool SelectThumbAddrModeImm5S1(SDValue N
, SDValue
&Base
,
137 bool SelectThumbAddrModeImm5S2(SDValue N
, SDValue
&Base
,
139 bool SelectThumbAddrModeImm5S4(SDValue N
, SDValue
&Base
,
141 bool SelectThumbAddrModeSP(SDValue N
, SDValue
&Base
, SDValue
&OffImm
);
143 // Thumb 2 Addressing Modes:
144 bool SelectT2AddrModeImm12(SDValue N
, SDValue
&Base
, SDValue
&OffImm
);
145 bool SelectT2AddrModeImm8(SDValue N
, SDValue
&Base
,
147 bool SelectT2AddrModeImm8Offset(SDNode
*Op
, SDValue N
,
149 template<unsigned Shift
>
150 bool SelectT2AddrModeImm7(SDValue N
, SDValue
&Base
,
152 bool SelectT2AddrModeSoReg(SDValue N
, SDValue
&Base
,
153 SDValue
&OffReg
, SDValue
&ShImm
);
154 bool SelectT2AddrModeExclusive(SDValue N
, SDValue
&Base
, SDValue
&OffImm
);
156 inline bool is_so_imm(unsigned Imm
) const {
157 return ARM_AM::getSOImmVal(Imm
) != -1;
160 inline bool is_so_imm_not(unsigned Imm
) const {
161 return ARM_AM::getSOImmVal(~Imm
) != -1;
164 inline bool is_t2_so_imm(unsigned Imm
) const {
165 return ARM_AM::getT2SOImmVal(Imm
) != -1;
168 inline bool is_t2_so_imm_not(unsigned Imm
) const {
169 return ARM_AM::getT2SOImmVal(~Imm
) != -1;
172 // Include the pieces autogenerated from the target description.
173 #include "ARMGenDAGISel.inc"
176 void transferMemOperands(SDNode
*Src
, SDNode
*Dst
);
178 /// Indexed (pre/post inc/dec) load matching code for ARM.
179 bool tryARMIndexedLoad(SDNode
*N
);
180 bool tryT1IndexedLoad(SDNode
*N
);
181 bool tryT2IndexedLoad(SDNode
*N
);
183 /// SelectVLD - Select NEON load intrinsics. NumVecs should be
184 /// 1, 2, 3 or 4. The opcode arrays specify the instructions used for
185 /// loads of D registers and even subregs and odd subregs of Q registers.
186 /// For NumVecs <= 2, QOpcodes1 is not used.
187 void SelectVLD(SDNode
*N
, bool isUpdating
, unsigned NumVecs
,
188 const uint16_t *DOpcodes
, const uint16_t *QOpcodes0
,
189 const uint16_t *QOpcodes1
);
191 /// SelectVST - Select NEON store intrinsics. NumVecs should
192 /// be 1, 2, 3 or 4. The opcode arrays specify the instructions used for
193 /// stores of D registers and even subregs and odd subregs of Q registers.
194 /// For NumVecs <= 2, QOpcodes1 is not used.
195 void SelectVST(SDNode
*N
, bool isUpdating
, unsigned NumVecs
,
196 const uint16_t *DOpcodes
, const uint16_t *QOpcodes0
,
197 const uint16_t *QOpcodes1
);
199 /// SelectVLDSTLane - Select NEON load/store lane intrinsics. NumVecs should
200 /// be 2, 3 or 4. The opcode arrays specify the instructions used for
201 /// load/store of D registers and Q registers.
202 void SelectVLDSTLane(SDNode
*N
, bool IsLoad
, bool isUpdating
,
203 unsigned NumVecs
, const uint16_t *DOpcodes
,
204 const uint16_t *QOpcodes
);
206 /// SelectVLDDup - Select NEON load-duplicate intrinsics. NumVecs
207 /// should be 1, 2, 3 or 4. The opcode array specifies the instructions used
208 /// for loading D registers.
209 void SelectVLDDup(SDNode
*N
, bool IsIntrinsic
, bool isUpdating
,
210 unsigned NumVecs
, const uint16_t *DOpcodes
,
211 const uint16_t *QOpcodes0
= nullptr,
212 const uint16_t *QOpcodes1
= nullptr);
214 /// Try to select SBFX/UBFX instructions for ARM.
215 bool tryV6T2BitfieldExtractOp(SDNode
*N
, bool isSigned
);
217 // Select special operations if node forms integer ABS pattern
218 bool tryABSOp(SDNode
*N
);
220 bool tryReadRegister(SDNode
*N
);
221 bool tryWriteRegister(SDNode
*N
);
223 bool tryInlineAsm(SDNode
*N
);
225 void SelectCMPZ(SDNode
*N
, bool &SwitchEQNEToPLMI
);
227 void SelectCMP_SWAP(SDNode
*N
);
229 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
230 /// inline asm expressions.
231 bool SelectInlineAsmMemoryOperand(const SDValue
&Op
, unsigned ConstraintID
,
232 std::vector
<SDValue
> &OutOps
) override
;
234 // Form pairs of consecutive R, S, D, or Q registers.
235 SDNode
*createGPRPairNode(EVT VT
, SDValue V0
, SDValue V1
);
236 SDNode
*createSRegPairNode(EVT VT
, SDValue V0
, SDValue V1
);
237 SDNode
*createDRegPairNode(EVT VT
, SDValue V0
, SDValue V1
);
238 SDNode
*createQRegPairNode(EVT VT
, SDValue V0
, SDValue V1
);
240 // Form sequences of 4 consecutive S, D, or Q registers.
241 SDNode
*createQuadSRegsNode(EVT VT
, SDValue V0
, SDValue V1
, SDValue V2
, SDValue V3
);
242 SDNode
*createQuadDRegsNode(EVT VT
, SDValue V0
, SDValue V1
, SDValue V2
, SDValue V3
);
243 SDNode
*createQuadQRegsNode(EVT VT
, SDValue V0
, SDValue V1
, SDValue V2
, SDValue V3
);
245 // Get the alignment operand for a NEON VLD or VST instruction.
246 SDValue
GetVLDSTAlign(SDValue Align
, const SDLoc
&dl
, unsigned NumVecs
,
249 /// Returns the number of instructions required to materialize the given
250 /// constant in a register, or 3 if a literal pool load is needed.
251 unsigned ConstantMaterializationCost(unsigned Val
) const;
253 /// Checks if N is a multiplication by a constant where we can extract out a
254 /// power of two from the constant so that it can be used in a shift, but only
255 /// if it simplifies the materialization of the constant. Returns true if it
256 /// is, and assigns to PowerOfTwo the power of two that should be extracted
257 /// out and to NewMulConst the new constant to be multiplied by.
258 bool canExtractShiftFromMul(const SDValue
&N
, unsigned MaxShift
,
259 unsigned &PowerOfTwo
, SDValue
&NewMulConst
) const;
261 /// Replace N with M in CurDAG, in a way that also ensures that M gets
262 /// selected when N would have been selected.
263 void replaceDAGValue(const SDValue
&N
, SDValue M
);
267 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant
268 /// operand. If so Imm will receive the 32-bit value.
269 static bool isInt32Immediate(SDNode
*N
, unsigned &Imm
) {
270 if (N
->getOpcode() == ISD::Constant
&& N
->getValueType(0) == MVT::i32
) {
271 Imm
= cast
<ConstantSDNode
>(N
)->getZExtValue();
277 // isInt32Immediate - This method tests to see if a constant operand.
278 // If so Imm will receive the 32 bit value.
279 static bool isInt32Immediate(SDValue N
, unsigned &Imm
) {
280 return isInt32Immediate(N
.getNode(), Imm
);
283 // isOpcWithIntImmediate - This method tests to see if the node is a specific
284 // opcode and that it has a immediate integer right operand.
285 // If so Imm will receive the 32 bit value.
286 static bool isOpcWithIntImmediate(SDNode
*N
, unsigned Opc
, unsigned& Imm
) {
287 return N
->getOpcode() == Opc
&&
288 isInt32Immediate(N
->getOperand(1).getNode(), Imm
);
291 /// Check whether a particular node is a constant value representable as
292 /// (N * Scale) where (N in [\p RangeMin, \p RangeMax).
294 /// \param ScaledConstant [out] - On success, the pre-scaled constant value.
295 static bool isScaledConstantInRange(SDValue Node
, int Scale
,
296 int RangeMin
, int RangeMax
,
297 int &ScaledConstant
) {
298 assert(Scale
> 0 && "Invalid scale!");
300 // Check that this is a constant.
301 const ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(Node
);
305 ScaledConstant
= (int) C
->getZExtValue();
306 if ((ScaledConstant
% Scale
) != 0)
309 ScaledConstant
/= Scale
;
310 return ScaledConstant
>= RangeMin
&& ScaledConstant
< RangeMax
;
313 void ARMDAGToDAGISel::PreprocessISelDAG() {
314 if (!Subtarget
->hasV6T2Ops())
317 bool isThumb2
= Subtarget
->isThumb();
318 for (SelectionDAG::allnodes_iterator I
= CurDAG
->allnodes_begin(),
319 E
= CurDAG
->allnodes_end(); I
!= E
; ) {
320 SDNode
*N
= &*I
++; // Preincrement iterator to avoid invalidation issues.
322 if (N
->getOpcode() != ISD::ADD
)
325 // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with
326 // leading zeros, followed by consecutive set bits, followed by 1 or 2
327 // trailing zeros, e.g. 1020.
328 // Transform the expression to
329 // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number
330 // of trailing zeros of c2. The left shift would be folded as an shifter
331 // operand of 'add' and the 'and' and 'srl' would become a bits extraction
334 SDValue N0
= N
->getOperand(0);
335 SDValue N1
= N
->getOperand(1);
336 unsigned And_imm
= 0;
337 if (!isOpcWithIntImmediate(N1
.getNode(), ISD::AND
, And_imm
)) {
338 if (isOpcWithIntImmediate(N0
.getNode(), ISD::AND
, And_imm
))
344 // Check if the AND mask is an immediate of the form: 000.....1111111100
345 unsigned TZ
= countTrailingZeros(And_imm
);
346 if (TZ
!= 1 && TZ
!= 2)
347 // Be conservative here. Shifter operands aren't always free. e.g. On
348 // Swift, left shifter operand of 1 / 2 for free but others are not.
350 // ubfx r3, r1, #16, #8
351 // ldr.w r3, [r0, r3, lsl #2]
354 // and.w r2, r9, r1, lsr #14
358 if (And_imm
& (And_imm
+ 1))
361 // Look for (and (srl X, c1), c2).
362 SDValue Srl
= N1
.getOperand(0);
363 unsigned Srl_imm
= 0;
364 if (!isOpcWithIntImmediate(Srl
.getNode(), ISD::SRL
, Srl_imm
) ||
368 // Make sure first operand is not a shifter operand which would prevent
369 // folding of the left shift.
374 if (SelectImmShifterOperand(N0
, CPTmp0
, CPTmp1
))
377 if (SelectImmShifterOperand(N0
, CPTmp0
, CPTmp1
) ||
378 SelectRegShifterOperand(N0
, CPTmp0
, CPTmp1
, CPTmp2
))
382 // Now make the transformation.
383 Srl
= CurDAG
->getNode(ISD::SRL
, SDLoc(Srl
), MVT::i32
,
385 CurDAG
->getConstant(Srl_imm
+ TZ
, SDLoc(Srl
),
387 N1
= CurDAG
->getNode(ISD::AND
, SDLoc(N1
), MVT::i32
,
389 CurDAG
->getConstant(And_imm
, SDLoc(Srl
), MVT::i32
));
390 N1
= CurDAG
->getNode(ISD::SHL
, SDLoc(N1
), MVT::i32
,
391 N1
, CurDAG
->getConstant(TZ
, SDLoc(Srl
), MVT::i32
));
392 CurDAG
->UpdateNodeOperands(N
, N0
, N1
);
396 /// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS
397 /// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at
398 /// least on current ARM implementations) which should be avoidded.
399 bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode
*N
) const {
400 if (OptLevel
== CodeGenOpt::None
)
403 if (!Subtarget
->hasVMLxHazards())
409 SDNode
*Use
= *N
->use_begin();
410 if (Use
->getOpcode() == ISD::CopyToReg
)
412 if (Use
->isMachineOpcode()) {
413 const ARMBaseInstrInfo
*TII
= static_cast<const ARMBaseInstrInfo
*>(
414 CurDAG
->getSubtarget().getInstrInfo());
416 const MCInstrDesc
&MCID
= TII
->get(Use
->getMachineOpcode());
419 unsigned Opcode
= MCID
.getOpcode();
420 if (Opcode
== ARM::VMOVRS
|| Opcode
== ARM::VMOVRRD
)
422 // vmlx feeding into another vmlx. We actually want to unfold
423 // the use later in the MLxExpansion pass. e.g.
425 // vmla (stall 8 cycles)
430 // This adds up to about 18 - 19 cycles.
433 // vmul (stall 4 cycles)
434 // vadd adds up to about 14 cycles.
435 return TII
->isFpMLxInstruction(Opcode
);
441 bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue
&Shift
,
442 ARM_AM::ShiftOpc ShOpcVal
,
444 if (!Subtarget
->isLikeA9() && !Subtarget
->isSwift())
446 if (Shift
.hasOneUse())
449 return ShOpcVal
== ARM_AM::lsl
&&
450 (ShAmt
== 2 || (Subtarget
->isSwift() && ShAmt
== 1));
453 unsigned ARMDAGToDAGISel::ConstantMaterializationCost(unsigned Val
) const {
454 if (Subtarget
->isThumb()) {
455 if (Val
<= 255) return 1; // MOV
456 if (Subtarget
->hasV6T2Ops() &&
457 (Val
<= 0xffff || // MOV
458 ARM_AM::getT2SOImmVal(Val
) != -1 || // MOVW
459 ARM_AM::getT2SOImmVal(~Val
) != -1)) // MVN
461 if (Val
<= 510) return 2; // MOV + ADDi8
462 if (~Val
<= 255) return 2; // MOV + MVN
463 if (ARM_AM::isThumbImmShiftedVal(Val
)) return 2; // MOV + LSL
465 if (ARM_AM::getSOImmVal(Val
) != -1) return 1; // MOV
466 if (ARM_AM::getSOImmVal(~Val
) != -1) return 1; // MVN
467 if (Subtarget
->hasV6T2Ops() && Val
<= 0xffff) return 1; // MOVW
468 if (ARM_AM::isSOImmTwoPartVal(Val
)) return 2; // two instrs
470 if (Subtarget
->useMovt()) return 2; // MOVW + MOVT
471 return 3; // Literal pool load
474 bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue
&N
,
476 unsigned &PowerOfTwo
,
477 SDValue
&NewMulConst
) const {
478 assert(N
.getOpcode() == ISD::MUL
);
479 assert(MaxShift
> 0);
481 // If the multiply is used in more than one place then changing the constant
482 // will make other uses incorrect, so don't.
483 if (!N
.hasOneUse()) return false;
484 // Check if the multiply is by a constant
485 ConstantSDNode
*MulConst
= dyn_cast
<ConstantSDNode
>(N
.getOperand(1));
486 if (!MulConst
) return false;
487 // If the constant is used in more than one place then modifying it will mean
488 // we need to materialize two constants instead of one, which is a bad idea.
489 if (!MulConst
->hasOneUse()) return false;
490 unsigned MulConstVal
= MulConst
->getZExtValue();
491 if (MulConstVal
== 0) return false;
493 // Find the largest power of 2 that MulConstVal is a multiple of
494 PowerOfTwo
= MaxShift
;
495 while ((MulConstVal
% (1 << PowerOfTwo
)) != 0) {
497 if (PowerOfTwo
== 0) return false;
500 // Only optimise if the new cost is better
501 unsigned NewMulConstVal
= MulConstVal
/ (1 << PowerOfTwo
);
502 NewMulConst
= CurDAG
->getConstant(NewMulConstVal
, SDLoc(N
), MVT::i32
);
503 unsigned OldCost
= ConstantMaterializationCost(MulConstVal
);
504 unsigned NewCost
= ConstantMaterializationCost(NewMulConstVal
);
505 return NewCost
< OldCost
;
508 void ARMDAGToDAGISel::replaceDAGValue(const SDValue
&N
, SDValue M
) {
509 CurDAG
->RepositionNode(N
.getNode()->getIterator(), M
.getNode());
513 bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N
,
516 bool CheckProfitability
) {
517 if (DisableShifterOp
)
520 // If N is a multiply-by-constant and it's profitable to extract a shift and
521 // use it in a shifted operand do so.
522 if (N
.getOpcode() == ISD::MUL
) {
523 unsigned PowerOfTwo
= 0;
525 if (canExtractShiftFromMul(N
, 31, PowerOfTwo
, NewMulConst
)) {
526 HandleSDNode
Handle(N
);
528 replaceDAGValue(N
.getOperand(1), NewMulConst
);
529 BaseReg
= Handle
.getValue();
530 Opc
= CurDAG
->getTargetConstant(
531 ARM_AM::getSORegOpc(ARM_AM::lsl
, PowerOfTwo
), Loc
, MVT::i32
);
536 ARM_AM::ShiftOpc ShOpcVal
= ARM_AM::getShiftOpcForNode(N
.getOpcode());
538 // Don't match base register only case. That is matched to a separate
539 // lower complexity pattern with explicit register operand.
540 if (ShOpcVal
== ARM_AM::no_shift
) return false;
542 BaseReg
= N
.getOperand(0);
543 unsigned ShImmVal
= 0;
544 ConstantSDNode
*RHS
= dyn_cast
<ConstantSDNode
>(N
.getOperand(1));
545 if (!RHS
) return false;
546 ShImmVal
= RHS
->getZExtValue() & 31;
547 Opc
= CurDAG
->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal
, ShImmVal
),
552 bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N
,
556 bool CheckProfitability
) {
557 if (DisableShifterOp
)
560 ARM_AM::ShiftOpc ShOpcVal
= ARM_AM::getShiftOpcForNode(N
.getOpcode());
562 // Don't match base register only case. That is matched to a separate
563 // lower complexity pattern with explicit register operand.
564 if (ShOpcVal
== ARM_AM::no_shift
) return false;
566 BaseReg
= N
.getOperand(0);
567 unsigned ShImmVal
= 0;
568 ConstantSDNode
*RHS
= dyn_cast
<ConstantSDNode
>(N
.getOperand(1));
569 if (RHS
) return false;
571 ShReg
= N
.getOperand(1);
572 if (CheckProfitability
&& !isShifterOpProfitable(N
, ShOpcVal
, ShImmVal
))
574 Opc
= CurDAG
->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal
, ShImmVal
),
579 // Determine whether an ISD::OR's operands are suitable to turn the operation
580 // into an addition, which often has more compact encodings.
581 bool ARMDAGToDAGISel::SelectAddLikeOr(SDNode
*Parent
, SDValue N
, SDValue
&Out
) {
582 assert(Parent
->getOpcode() == ISD::OR
&& "unexpected parent");
584 return CurDAG
->haveNoCommonBitsSet(N
, Parent
->getOperand(1));
588 bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N
,
591 // Match simple R + imm12 operands.
594 if (N
.getOpcode() != ISD::ADD
&& N
.getOpcode() != ISD::SUB
&&
595 !CurDAG
->isBaseWithConstantOffset(N
)) {
596 if (N
.getOpcode() == ISD::FrameIndex
) {
597 // Match frame index.
598 int FI
= cast
<FrameIndexSDNode
>(N
)->getIndex();
599 Base
= CurDAG
->getTargetFrameIndex(
600 FI
, TLI
->getPointerTy(CurDAG
->getDataLayout()));
601 OffImm
= CurDAG
->getTargetConstant(0, SDLoc(N
), MVT::i32
);
605 if (N
.getOpcode() == ARMISD::Wrapper
&&
606 N
.getOperand(0).getOpcode() != ISD::TargetGlobalAddress
&&
607 N
.getOperand(0).getOpcode() != ISD::TargetExternalSymbol
&&
608 N
.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress
) {
609 Base
= N
.getOperand(0);
612 OffImm
= CurDAG
->getTargetConstant(0, SDLoc(N
), MVT::i32
);
616 if (ConstantSDNode
*RHS
= dyn_cast
<ConstantSDNode
>(N
.getOperand(1))) {
617 int RHSC
= (int)RHS
->getSExtValue();
618 if (N
.getOpcode() == ISD::SUB
)
621 if (RHSC
> -0x1000 && RHSC
< 0x1000) { // 12 bits
622 Base
= N
.getOperand(0);
623 if (Base
.getOpcode() == ISD::FrameIndex
) {
624 int FI
= cast
<FrameIndexSDNode
>(Base
)->getIndex();
625 Base
= CurDAG
->getTargetFrameIndex(
626 FI
, TLI
->getPointerTy(CurDAG
->getDataLayout()));
628 OffImm
= CurDAG
->getTargetConstant(RHSC
, SDLoc(N
), MVT::i32
);
635 OffImm
= CurDAG
->getTargetConstant(0, SDLoc(N
), MVT::i32
);
641 bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N
, SDValue
&Base
, SDValue
&Offset
,
643 if (N
.getOpcode() == ISD::MUL
&&
644 ((!Subtarget
->isLikeA9() && !Subtarget
->isSwift()) || N
.hasOneUse())) {
645 if (ConstantSDNode
*RHS
= dyn_cast
<ConstantSDNode
>(N
.getOperand(1))) {
646 // X * [3,5,9] -> X + X * [2,4,8] etc.
647 int RHSC
= (int)RHS
->getZExtValue();
650 ARM_AM::AddrOpc AddSub
= ARM_AM::add
;
652 AddSub
= ARM_AM::sub
;
655 if (isPowerOf2_32(RHSC
)) {
656 unsigned ShAmt
= Log2_32(RHSC
);
657 Base
= Offset
= N
.getOperand(0);
658 Opc
= CurDAG
->getTargetConstant(ARM_AM::getAM2Opc(AddSub
, ShAmt
,
667 if (N
.getOpcode() != ISD::ADD
&& N
.getOpcode() != ISD::SUB
&&
668 // ISD::OR that is equivalent to an ISD::ADD.
669 !CurDAG
->isBaseWithConstantOffset(N
))
672 // Leave simple R +/- imm12 operands for LDRi12
673 if (N
.getOpcode() == ISD::ADD
|| N
.getOpcode() == ISD::OR
) {
675 if (isScaledConstantInRange(N
.getOperand(1), /*Scale=*/1,
676 -0x1000+1, 0x1000, RHSC
)) // 12 bits.
680 // Otherwise this is R +/- [possibly shifted] R.
681 ARM_AM::AddrOpc AddSub
= N
.getOpcode() == ISD::SUB
? ARM_AM::sub
:ARM_AM::add
;
682 ARM_AM::ShiftOpc ShOpcVal
=
683 ARM_AM::getShiftOpcForNode(N
.getOperand(1).getOpcode());
686 Base
= N
.getOperand(0);
687 Offset
= N
.getOperand(1);
689 if (ShOpcVal
!= ARM_AM::no_shift
) {
690 // Check to see if the RHS of the shift is a constant, if not, we can't fold
692 if (ConstantSDNode
*Sh
=
693 dyn_cast
<ConstantSDNode
>(N
.getOperand(1).getOperand(1))) {
694 ShAmt
= Sh
->getZExtValue();
695 if (isShifterOpProfitable(Offset
, ShOpcVal
, ShAmt
))
696 Offset
= N
.getOperand(1).getOperand(0);
699 ShOpcVal
= ARM_AM::no_shift
;
702 ShOpcVal
= ARM_AM::no_shift
;
706 // Try matching (R shl C) + (R).
707 if (N
.getOpcode() != ISD::SUB
&& ShOpcVal
== ARM_AM::no_shift
&&
708 !(Subtarget
->isLikeA9() || Subtarget
->isSwift() ||
709 N
.getOperand(0).hasOneUse())) {
710 ShOpcVal
= ARM_AM::getShiftOpcForNode(N
.getOperand(0).getOpcode());
711 if (ShOpcVal
!= ARM_AM::no_shift
) {
712 // Check to see if the RHS of the shift is a constant, if not, we can't
714 if (ConstantSDNode
*Sh
=
715 dyn_cast
<ConstantSDNode
>(N
.getOperand(0).getOperand(1))) {
716 ShAmt
= Sh
->getZExtValue();
717 if (isShifterOpProfitable(N
.getOperand(0), ShOpcVal
, ShAmt
)) {
718 Offset
= N
.getOperand(0).getOperand(0);
719 Base
= N
.getOperand(1);
722 ShOpcVal
= ARM_AM::no_shift
;
725 ShOpcVal
= ARM_AM::no_shift
;
730 // If Offset is a multiply-by-constant and it's profitable to extract a shift
731 // and use it in a shifted operand do so.
732 if (Offset
.getOpcode() == ISD::MUL
&& N
.hasOneUse()) {
733 unsigned PowerOfTwo
= 0;
735 if (canExtractShiftFromMul(Offset
, 31, PowerOfTwo
, NewMulConst
)) {
736 HandleSDNode
Handle(Offset
);
737 replaceDAGValue(Offset
.getOperand(1), NewMulConst
);
738 Offset
= Handle
.getValue();
740 ShOpcVal
= ARM_AM::lsl
;
744 Opc
= CurDAG
->getTargetConstant(ARM_AM::getAM2Opc(AddSub
, ShAmt
, ShOpcVal
),
749 bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode
*Op
, SDValue N
,
750 SDValue
&Offset
, SDValue
&Opc
) {
751 unsigned Opcode
= Op
->getOpcode();
752 ISD::MemIndexedMode AM
= (Opcode
== ISD::LOAD
)
753 ? cast
<LoadSDNode
>(Op
)->getAddressingMode()
754 : cast
<StoreSDNode
>(Op
)->getAddressingMode();
755 ARM_AM::AddrOpc AddSub
= (AM
== ISD::PRE_INC
|| AM
== ISD::POST_INC
)
756 ? ARM_AM::add
: ARM_AM::sub
;
758 if (isScaledConstantInRange(N
, /*Scale=*/1, 0, 0x1000, Val
))
762 ARM_AM::ShiftOpc ShOpcVal
= ARM_AM::getShiftOpcForNode(N
.getOpcode());
764 if (ShOpcVal
!= ARM_AM::no_shift
) {
765 // Check to see if the RHS of the shift is a constant, if not, we can't fold
767 if (ConstantSDNode
*Sh
= dyn_cast
<ConstantSDNode
>(N
.getOperand(1))) {
768 ShAmt
= Sh
->getZExtValue();
769 if (isShifterOpProfitable(N
, ShOpcVal
, ShAmt
))
770 Offset
= N
.getOperand(0);
773 ShOpcVal
= ARM_AM::no_shift
;
776 ShOpcVal
= ARM_AM::no_shift
;
780 Opc
= CurDAG
->getTargetConstant(ARM_AM::getAM2Opc(AddSub
, ShAmt
, ShOpcVal
),
785 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode
*Op
, SDValue N
,
786 SDValue
&Offset
, SDValue
&Opc
) {
787 unsigned Opcode
= Op
->getOpcode();
788 ISD::MemIndexedMode AM
= (Opcode
== ISD::LOAD
)
789 ? cast
<LoadSDNode
>(Op
)->getAddressingMode()
790 : cast
<StoreSDNode
>(Op
)->getAddressingMode();
791 ARM_AM::AddrOpc AddSub
= (AM
== ISD::PRE_INC
|| AM
== ISD::POST_INC
)
792 ? ARM_AM::add
: ARM_AM::sub
;
794 if (isScaledConstantInRange(N
, /*Scale=*/1, 0, 0x1000, Val
)) { // 12 bits.
795 if (AddSub
== ARM_AM::sub
) Val
*= -1;
796 Offset
= CurDAG
->getRegister(0, MVT::i32
);
797 Opc
= CurDAG
->getTargetConstant(Val
, SDLoc(Op
), MVT::i32
);
805 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode
*Op
, SDValue N
,
806 SDValue
&Offset
, SDValue
&Opc
) {
807 unsigned Opcode
= Op
->getOpcode();
808 ISD::MemIndexedMode AM
= (Opcode
== ISD::LOAD
)
809 ? cast
<LoadSDNode
>(Op
)->getAddressingMode()
810 : cast
<StoreSDNode
>(Op
)->getAddressingMode();
811 ARM_AM::AddrOpc AddSub
= (AM
== ISD::PRE_INC
|| AM
== ISD::POST_INC
)
812 ? ARM_AM::add
: ARM_AM::sub
;
814 if (isScaledConstantInRange(N
, /*Scale=*/1, 0, 0x1000, Val
)) { // 12 bits.
815 Offset
= CurDAG
->getRegister(0, MVT::i32
);
816 Opc
= CurDAG
->getTargetConstant(ARM_AM::getAM2Opc(AddSub
, Val
,
818 SDLoc(Op
), MVT::i32
);
825 bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N
, SDValue
&Base
) {
830 bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N
,
831 SDValue
&Base
, SDValue
&Offset
,
833 if (N
.getOpcode() == ISD::SUB
) {
834 // X - C is canonicalize to X + -C, no need to handle it here.
835 Base
= N
.getOperand(0);
836 Offset
= N
.getOperand(1);
837 Opc
= CurDAG
->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub
, 0), SDLoc(N
),
842 if (!CurDAG
->isBaseWithConstantOffset(N
)) {
844 if (N
.getOpcode() == ISD::FrameIndex
) {
845 int FI
= cast
<FrameIndexSDNode
>(N
)->getIndex();
846 Base
= CurDAG
->getTargetFrameIndex(
847 FI
, TLI
->getPointerTy(CurDAG
->getDataLayout()));
849 Offset
= CurDAG
->getRegister(0, MVT::i32
);
850 Opc
= CurDAG
->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add
, 0), SDLoc(N
),
855 // If the RHS is +/- imm8, fold into addr mode.
857 if (isScaledConstantInRange(N
.getOperand(1), /*Scale=*/1,
858 -256 + 1, 256, RHSC
)) { // 8 bits.
859 Base
= N
.getOperand(0);
860 if (Base
.getOpcode() == ISD::FrameIndex
) {
861 int FI
= cast
<FrameIndexSDNode
>(Base
)->getIndex();
862 Base
= CurDAG
->getTargetFrameIndex(
863 FI
, TLI
->getPointerTy(CurDAG
->getDataLayout()));
865 Offset
= CurDAG
->getRegister(0, MVT::i32
);
867 ARM_AM::AddrOpc AddSub
= ARM_AM::add
;
869 AddSub
= ARM_AM::sub
;
872 Opc
= CurDAG
->getTargetConstant(ARM_AM::getAM3Opc(AddSub
, RHSC
), SDLoc(N
),
877 Base
= N
.getOperand(0);
878 Offset
= N
.getOperand(1);
879 Opc
= CurDAG
->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add
, 0), SDLoc(N
),
884 bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode
*Op
, SDValue N
,
885 SDValue
&Offset
, SDValue
&Opc
) {
886 unsigned Opcode
= Op
->getOpcode();
887 ISD::MemIndexedMode AM
= (Opcode
== ISD::LOAD
)
888 ? cast
<LoadSDNode
>(Op
)->getAddressingMode()
889 : cast
<StoreSDNode
>(Op
)->getAddressingMode();
890 ARM_AM::AddrOpc AddSub
= (AM
== ISD::PRE_INC
|| AM
== ISD::POST_INC
)
891 ? ARM_AM::add
: ARM_AM::sub
;
893 if (isScaledConstantInRange(N
, /*Scale=*/1, 0, 256, Val
)) { // 12 bits.
894 Offset
= CurDAG
->getRegister(0, MVT::i32
);
895 Opc
= CurDAG
->getTargetConstant(ARM_AM::getAM3Opc(AddSub
, Val
), SDLoc(Op
),
901 Opc
= CurDAG
->getTargetConstant(ARM_AM::getAM3Opc(AddSub
, 0), SDLoc(Op
),
906 bool ARMDAGToDAGISel::IsAddressingMode5(SDValue N
, SDValue
&Base
, SDValue
&Offset
,
908 if (!CurDAG
->isBaseWithConstantOffset(N
)) {
910 if (N
.getOpcode() == ISD::FrameIndex
) {
911 int FI
= cast
<FrameIndexSDNode
>(N
)->getIndex();
912 Base
= CurDAG
->getTargetFrameIndex(
913 FI
, TLI
->getPointerTy(CurDAG
->getDataLayout()));
914 } else if (N
.getOpcode() == ARMISD::Wrapper
&&
915 N
.getOperand(0).getOpcode() != ISD::TargetGlobalAddress
&&
916 N
.getOperand(0).getOpcode() != ISD::TargetExternalSymbol
&&
917 N
.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress
) {
918 Base
= N
.getOperand(0);
920 Offset
= CurDAG
->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add
, 0),
925 // If the RHS is +/- imm8, fold into addr mode.
927 const int Scale
= FP16
? 2 : 4;
929 if (isScaledConstantInRange(N
.getOperand(1), Scale
, -255, 256, RHSC
)) {
930 Base
= N
.getOperand(0);
931 if (Base
.getOpcode() == ISD::FrameIndex
) {
932 int FI
= cast
<FrameIndexSDNode
>(Base
)->getIndex();
933 Base
= CurDAG
->getTargetFrameIndex(
934 FI
, TLI
->getPointerTy(CurDAG
->getDataLayout()));
937 ARM_AM::AddrOpc AddSub
= ARM_AM::add
;
939 AddSub
= ARM_AM::sub
;
944 Offset
= CurDAG
->getTargetConstant(ARM_AM::getAM5FP16Opc(AddSub
, RHSC
),
947 Offset
= CurDAG
->getTargetConstant(ARM_AM::getAM5Opc(AddSub
, RHSC
),
956 Offset
= CurDAG
->getTargetConstant(ARM_AM::getAM5FP16Opc(ARM_AM::add
, 0),
959 Offset
= CurDAG
->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add
, 0),
965 bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N
,
966 SDValue
&Base
, SDValue
&Offset
) {
967 return IsAddressingMode5(N
, Base
, Offset
, /*FP16=*/ false);
970 bool ARMDAGToDAGISel::SelectAddrMode5FP16(SDValue N
,
971 SDValue
&Base
, SDValue
&Offset
) {
972 return IsAddressingMode5(N
, Base
, Offset
, /*FP16=*/ true);
975 bool ARMDAGToDAGISel::SelectAddrMode6(SDNode
*Parent
, SDValue N
, SDValue
&Addr
,
979 unsigned Alignment
= 0;
981 MemSDNode
*MemN
= cast
<MemSDNode
>(Parent
);
983 if (isa
<LSBaseSDNode
>(MemN
) ||
984 ((MemN
->getOpcode() == ARMISD::VST1_UPD
||
985 MemN
->getOpcode() == ARMISD::VLD1_UPD
) &&
986 MemN
->getConstantOperandVal(MemN
->getNumOperands() - 1) == 1)) {
987 // This case occurs only for VLD1-lane/dup and VST1-lane instructions.
988 // The maximum alignment is equal to the memory size being referenced.
989 unsigned MMOAlign
= MemN
->getAlignment();
990 unsigned MemSize
= MemN
->getMemoryVT().getSizeInBits() / 8;
991 if (MMOAlign
>= MemSize
&& MemSize
> 1)
994 // All other uses of addrmode6 are for intrinsics. For now just record
995 // the raw alignment value; it will be refined later based on the legal
996 // alignment operands for the intrinsic.
997 Alignment
= MemN
->getAlignment();
1000 Align
= CurDAG
->getTargetConstant(Alignment
, SDLoc(N
), MVT::i32
);
1004 bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode
*Op
, SDValue N
,
1006 LSBaseSDNode
*LdSt
= cast
<LSBaseSDNode
>(Op
);
1007 ISD::MemIndexedMode AM
= LdSt
->getAddressingMode();
1008 if (AM
!= ISD::POST_INC
)
1011 if (ConstantSDNode
*NC
= dyn_cast
<ConstantSDNode
>(N
)) {
1012 if (NC
->getZExtValue() * 8 == LdSt
->getMemoryVT().getSizeInBits())
1013 Offset
= CurDAG
->getRegister(0, MVT::i32
);
1018 bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N
,
1019 SDValue
&Offset
, SDValue
&Label
) {
1020 if (N
.getOpcode() == ARMISD::PIC_ADD
&& N
.hasOneUse()) {
1021 Offset
= N
.getOperand(0);
1022 SDValue N1
= N
.getOperand(1);
1023 Label
= CurDAG
->getTargetConstant(cast
<ConstantSDNode
>(N1
)->getZExtValue(),
1024 SDLoc(N
), MVT::i32
);
1032 //===----------------------------------------------------------------------===//
1033 // Thumb Addressing Modes
1034 //===----------------------------------------------------------------------===//
1036 static bool shouldUseZeroOffsetLdSt(SDValue N
) {
1037 // Negative numbers are difficult to materialise in thumb1. If we are
1038 // selecting the add of a negative, instead try to select ri with a zero
1039 // offset, so create the add node directly which will become a sub.
1040 if (N
.getOpcode() != ISD::ADD
)
1043 // Look for an imm which is not legal for ld/st, but is legal for sub.
1044 if (auto C
= dyn_cast
<ConstantSDNode
>(N
.getOperand(1)))
1045 return C
->getSExtValue() < 0 && C
->getSExtValue() >= -255;
1050 bool ARMDAGToDAGISel::SelectThumbAddrModeRRSext(SDValue N
, SDValue
&Base
,
1052 if (N
.getOpcode() != ISD::ADD
&& !CurDAG
->isBaseWithConstantOffset(N
)) {
1053 ConstantSDNode
*NC
= dyn_cast
<ConstantSDNode
>(N
);
1054 if (!NC
|| !NC
->isNullValue())
1061 Base
= N
.getOperand(0);
1062 Offset
= N
.getOperand(1);
1066 bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N
, SDValue
&Base
,
1068 if (shouldUseZeroOffsetLdSt(N
))
1069 return false; // Select ri instead
1070 return SelectThumbAddrModeRRSext(N
, Base
, Offset
);
1074 ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N
, unsigned Scale
,
1075 SDValue
&Base
, SDValue
&OffImm
) {
1076 if (shouldUseZeroOffsetLdSt(N
)) {
1078 OffImm
= CurDAG
->getTargetConstant(0, SDLoc(N
), MVT::i32
);
1082 if (!CurDAG
->isBaseWithConstantOffset(N
)) {
1083 if (N
.getOpcode() == ISD::ADD
) {
1084 return false; // We want to select register offset instead
1085 } else if (N
.getOpcode() == ARMISD::Wrapper
&&
1086 N
.getOperand(0).getOpcode() != ISD::TargetGlobalAddress
&&
1087 N
.getOperand(0).getOpcode() != ISD::TargetExternalSymbol
&&
1088 N
.getOperand(0).getOpcode() != ISD::TargetConstantPool
&&
1089 N
.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress
) {
1090 Base
= N
.getOperand(0);
1095 OffImm
= CurDAG
->getTargetConstant(0, SDLoc(N
), MVT::i32
);
1099 // If the RHS is + imm5 * scale, fold into addr mode.
1101 if (isScaledConstantInRange(N
.getOperand(1), Scale
, 0, 32, RHSC
)) {
1102 Base
= N
.getOperand(0);
1103 OffImm
= CurDAG
->getTargetConstant(RHSC
, SDLoc(N
), MVT::i32
);
1107 // Offset is too large, so use register offset instead.
1112 ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N
, SDValue
&Base
,
1114 return SelectThumbAddrModeImm5S(N
, 4, Base
, OffImm
);
1118 ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N
, SDValue
&Base
,
1120 return SelectThumbAddrModeImm5S(N
, 2, Base
, OffImm
);
1124 ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N
, SDValue
&Base
,
1126 return SelectThumbAddrModeImm5S(N
, 1, Base
, OffImm
);
1129 bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N
,
1130 SDValue
&Base
, SDValue
&OffImm
) {
1131 if (N
.getOpcode() == ISD::FrameIndex
) {
1132 int FI
= cast
<FrameIndexSDNode
>(N
)->getIndex();
1133 // Only multiples of 4 are allowed for the offset, so the frame object
1134 // alignment must be at least 4.
1135 MachineFrameInfo
&MFI
= MF
->getFrameInfo();
1136 if (MFI
.getObjectAlignment(FI
) < 4)
1137 MFI
.setObjectAlignment(FI
, 4);
1138 Base
= CurDAG
->getTargetFrameIndex(
1139 FI
, TLI
->getPointerTy(CurDAG
->getDataLayout()));
1140 OffImm
= CurDAG
->getTargetConstant(0, SDLoc(N
), MVT::i32
);
1144 if (!CurDAG
->isBaseWithConstantOffset(N
))
1147 if (N
.getOperand(0).getOpcode() == ISD::FrameIndex
) {
1148 // If the RHS is + imm8 * scale, fold into addr mode.
1150 if (isScaledConstantInRange(N
.getOperand(1), /*Scale=*/4, 0, 256, RHSC
)) {
1151 Base
= N
.getOperand(0);
1152 int FI
= cast
<FrameIndexSDNode
>(Base
)->getIndex();
1153 // Make sure the offset is inside the object, or we might fail to
1154 // allocate an emergency spill slot. (An out-of-range access is UB, but
1155 // it could show up anyway.)
1156 MachineFrameInfo
&MFI
= MF
->getFrameInfo();
1157 if (RHSC
* 4 < MFI
.getObjectSize(FI
)) {
1158 // For LHS+RHS to result in an offset that's a multiple of 4 the object
1159 // indexed by the LHS must be 4-byte aligned.
1160 if (!MFI
.isFixedObjectIndex(FI
) && MFI
.getObjectAlignment(FI
) < 4)
1161 MFI
.setObjectAlignment(FI
, 4);
1162 if (MFI
.getObjectAlignment(FI
) >= 4) {
1163 Base
= CurDAG
->getTargetFrameIndex(
1164 FI
, TLI
->getPointerTy(CurDAG
->getDataLayout()));
1165 OffImm
= CurDAG
->getTargetConstant(RHSC
, SDLoc(N
), MVT::i32
);
1176 //===----------------------------------------------------------------------===//
1177 // Thumb 2 Addressing Modes
1178 //===----------------------------------------------------------------------===//
1181 bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N
,
1182 SDValue
&Base
, SDValue
&OffImm
) {
1183 // Match simple R + imm12 operands.
1186 if (N
.getOpcode() != ISD::ADD
&& N
.getOpcode() != ISD::SUB
&&
1187 !CurDAG
->isBaseWithConstantOffset(N
)) {
1188 if (N
.getOpcode() == ISD::FrameIndex
) {
1189 // Match frame index.
1190 int FI
= cast
<FrameIndexSDNode
>(N
)->getIndex();
1191 Base
= CurDAG
->getTargetFrameIndex(
1192 FI
, TLI
->getPointerTy(CurDAG
->getDataLayout()));
1193 OffImm
= CurDAG
->getTargetConstant(0, SDLoc(N
), MVT::i32
);
1197 if (N
.getOpcode() == ARMISD::Wrapper
&&
1198 N
.getOperand(0).getOpcode() != ISD::TargetGlobalAddress
&&
1199 N
.getOperand(0).getOpcode() != ISD::TargetExternalSymbol
&&
1200 N
.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress
) {
1201 Base
= N
.getOperand(0);
1202 if (Base
.getOpcode() == ISD::TargetConstantPool
)
1203 return false; // We want to select t2LDRpci instead.
1206 OffImm
= CurDAG
->getTargetConstant(0, SDLoc(N
), MVT::i32
);
1210 if (ConstantSDNode
*RHS
= dyn_cast
<ConstantSDNode
>(N
.getOperand(1))) {
1211 if (SelectT2AddrModeImm8(N
, Base
, OffImm
))
1212 // Let t2LDRi8 handle (R - imm8).
1215 int RHSC
= (int)RHS
->getZExtValue();
1216 if (N
.getOpcode() == ISD::SUB
)
1219 if (RHSC
>= 0 && RHSC
< 0x1000) { // 12 bits (unsigned)
1220 Base
= N
.getOperand(0);
1221 if (Base
.getOpcode() == ISD::FrameIndex
) {
1222 int FI
= cast
<FrameIndexSDNode
>(Base
)->getIndex();
1223 Base
= CurDAG
->getTargetFrameIndex(
1224 FI
, TLI
->getPointerTy(CurDAG
->getDataLayout()));
1226 OffImm
= CurDAG
->getTargetConstant(RHSC
, SDLoc(N
), MVT::i32
);
1233 OffImm
= CurDAG
->getTargetConstant(0, SDLoc(N
), MVT::i32
);
1237 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N
,
1238 SDValue
&Base
, SDValue
&OffImm
) {
1239 // Match simple R - imm8 operands.
1240 if (N
.getOpcode() != ISD::ADD
&& N
.getOpcode() != ISD::SUB
&&
1241 !CurDAG
->isBaseWithConstantOffset(N
))
1244 if (ConstantSDNode
*RHS
= dyn_cast
<ConstantSDNode
>(N
.getOperand(1))) {
1245 int RHSC
= (int)RHS
->getSExtValue();
1246 if (N
.getOpcode() == ISD::SUB
)
1249 if ((RHSC
>= -255) && (RHSC
< 0)) { // 8 bits (always negative)
1250 Base
= N
.getOperand(0);
1251 if (Base
.getOpcode() == ISD::FrameIndex
) {
1252 int FI
= cast
<FrameIndexSDNode
>(Base
)->getIndex();
1253 Base
= CurDAG
->getTargetFrameIndex(
1254 FI
, TLI
->getPointerTy(CurDAG
->getDataLayout()));
1256 OffImm
= CurDAG
->getTargetConstant(RHSC
, SDLoc(N
), MVT::i32
);
1264 bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode
*Op
, SDValue N
,
1266 unsigned Opcode
= Op
->getOpcode();
1267 ISD::MemIndexedMode AM
= (Opcode
== ISD::LOAD
)
1268 ? cast
<LoadSDNode
>(Op
)->getAddressingMode()
1269 : cast
<StoreSDNode
>(Op
)->getAddressingMode();
1271 if (isScaledConstantInRange(N
, /*Scale=*/1, 0, 0x100, RHSC
)) { // 8 bits.
1272 OffImm
= ((AM
== ISD::PRE_INC
) || (AM
== ISD::POST_INC
))
1273 ? CurDAG
->getTargetConstant(RHSC
, SDLoc(N
), MVT::i32
)
1274 : CurDAG
->getTargetConstant(-RHSC
, SDLoc(N
), MVT::i32
);
1281 template<unsigned Shift
>
1282 bool ARMDAGToDAGISel::SelectT2AddrModeImm7(SDValue N
,
1283 SDValue
&Base
, SDValue
&OffImm
) {
1284 if (N
.getOpcode() == ISD::SUB
||
1285 CurDAG
->isBaseWithConstantOffset(N
)) {
1286 if (auto RHS
= dyn_cast
<ConstantSDNode
>(N
.getOperand(1))) {
1287 int RHSC
= (int)RHS
->getZExtValue();
1288 if (N
.getOpcode() == ISD::SUB
)
1291 if (isShiftedInt
<7, Shift
>(RHSC
)) {
1292 Base
= N
.getOperand(0);
1293 if (Base
.getOpcode() == ISD::FrameIndex
) {
1294 int FI
= cast
<FrameIndexSDNode
>(Base
)->getIndex();
1295 Base
= CurDAG
->getTargetFrameIndex(
1296 FI
, TLI
->getPointerTy(CurDAG
->getDataLayout()));
1298 OffImm
= CurDAG
->getTargetConstant(RHSC
, SDLoc(N
), MVT::i32
);
1306 OffImm
= CurDAG
->getTargetConstant(0, SDLoc(N
), MVT::i32
);
1310 bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N
,
1312 SDValue
&OffReg
, SDValue
&ShImm
) {
1313 // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12.
1314 if (N
.getOpcode() != ISD::ADD
&& !CurDAG
->isBaseWithConstantOffset(N
))
1317 // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8.
1318 if (ConstantSDNode
*RHS
= dyn_cast
<ConstantSDNode
>(N
.getOperand(1))) {
1319 int RHSC
= (int)RHS
->getZExtValue();
1320 if (RHSC
>= 0 && RHSC
< 0x1000) // 12 bits (unsigned)
1322 else if (RHSC
< 0 && RHSC
>= -255) // 8 bits
1326 // Look for (R + R) or (R + (R << [1,2,3])).
1328 Base
= N
.getOperand(0);
1329 OffReg
= N
.getOperand(1);
1331 // Swap if it is ((R << c) + R).
1332 ARM_AM::ShiftOpc ShOpcVal
= ARM_AM::getShiftOpcForNode(OffReg
.getOpcode());
1333 if (ShOpcVal
!= ARM_AM::lsl
) {
1334 ShOpcVal
= ARM_AM::getShiftOpcForNode(Base
.getOpcode());
1335 if (ShOpcVal
== ARM_AM::lsl
)
1336 std::swap(Base
, OffReg
);
1339 if (ShOpcVal
== ARM_AM::lsl
) {
1340 // Check to see if the RHS of the shift is a constant, if not, we can't fold
1342 if (ConstantSDNode
*Sh
= dyn_cast
<ConstantSDNode
>(OffReg
.getOperand(1))) {
1343 ShAmt
= Sh
->getZExtValue();
1344 if (ShAmt
< 4 && isShifterOpProfitable(OffReg
, ShOpcVal
, ShAmt
))
1345 OffReg
= OffReg
.getOperand(0);
1352 // If OffReg is a multiply-by-constant and it's profitable to extract a shift
1353 // and use it in a shifted operand do so.
1354 if (OffReg
.getOpcode() == ISD::MUL
&& N
.hasOneUse()) {
1355 unsigned PowerOfTwo
= 0;
1356 SDValue NewMulConst
;
1357 if (canExtractShiftFromMul(OffReg
, 3, PowerOfTwo
, NewMulConst
)) {
1358 HandleSDNode
Handle(OffReg
);
1359 replaceDAGValue(OffReg
.getOperand(1), NewMulConst
);
1360 OffReg
= Handle
.getValue();
1365 ShImm
= CurDAG
->getTargetConstant(ShAmt
, SDLoc(N
), MVT::i32
);
1370 bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N
, SDValue
&Base
,
1372 // This *must* succeed since it's used for the irreplaceable ldrex and strex
1375 OffImm
= CurDAG
->getTargetConstant(0, SDLoc(N
), MVT::i32
);
1377 if (N
.getOpcode() != ISD::ADD
|| !CurDAG
->isBaseWithConstantOffset(N
))
1380 ConstantSDNode
*RHS
= dyn_cast
<ConstantSDNode
>(N
.getOperand(1));
1384 uint32_t RHSC
= (int)RHS
->getZExtValue();
1385 if (RHSC
> 1020 || RHSC
% 4 != 0)
1388 Base
= N
.getOperand(0);
1389 if (Base
.getOpcode() == ISD::FrameIndex
) {
1390 int FI
= cast
<FrameIndexSDNode
>(Base
)->getIndex();
1391 Base
= CurDAG
->getTargetFrameIndex(
1392 FI
, TLI
->getPointerTy(CurDAG
->getDataLayout()));
1395 OffImm
= CurDAG
->getTargetConstant(RHSC
/4, SDLoc(N
), MVT::i32
);
1399 //===--------------------------------------------------------------------===//
1401 /// getAL - Returns a ARMCC::AL immediate node.
1402 static inline SDValue
getAL(SelectionDAG
*CurDAG
, const SDLoc
&dl
) {
1403 return CurDAG
->getTargetConstant((uint64_t)ARMCC::AL
, dl
, MVT::i32
);
1406 void ARMDAGToDAGISel::transferMemOperands(SDNode
*N
, SDNode
*Result
) {
1407 MachineMemOperand
*MemOp
= cast
<MemSDNode
>(N
)->getMemOperand();
1408 CurDAG
->setNodeMemRefs(cast
<MachineSDNode
>(Result
), {MemOp
});
1411 bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode
*N
) {
1412 LoadSDNode
*LD
= cast
<LoadSDNode
>(N
);
1413 ISD::MemIndexedMode AM
= LD
->getAddressingMode();
1414 if (AM
== ISD::UNINDEXED
)
1417 EVT LoadedVT
= LD
->getMemoryVT();
1418 SDValue Offset
, AMOpc
;
1419 bool isPre
= (AM
== ISD::PRE_INC
) || (AM
== ISD::PRE_DEC
);
1420 unsigned Opcode
= 0;
1422 if (LoadedVT
== MVT::i32
&& isPre
&&
1423 SelectAddrMode2OffsetImmPre(N
, LD
->getOffset(), Offset
, AMOpc
)) {
1424 Opcode
= ARM::LDR_PRE_IMM
;
1426 } else if (LoadedVT
== MVT::i32
&& !isPre
&&
1427 SelectAddrMode2OffsetImm(N
, LD
->getOffset(), Offset
, AMOpc
)) {
1428 Opcode
= ARM::LDR_POST_IMM
;
1430 } else if (LoadedVT
== MVT::i32
&&
1431 SelectAddrMode2OffsetReg(N
, LD
->getOffset(), Offset
, AMOpc
)) {
1432 Opcode
= isPre
? ARM::LDR_PRE_REG
: ARM::LDR_POST_REG
;
1435 } else if (LoadedVT
== MVT::i16
&&
1436 SelectAddrMode3Offset(N
, LD
->getOffset(), Offset
, AMOpc
)) {
1438 Opcode
= (LD
->getExtensionType() == ISD::SEXTLOAD
)
1439 ? (isPre
? ARM::LDRSH_PRE
: ARM::LDRSH_POST
)
1440 : (isPre
? ARM::LDRH_PRE
: ARM::LDRH_POST
);
1441 } else if (LoadedVT
== MVT::i8
|| LoadedVT
== MVT::i1
) {
1442 if (LD
->getExtensionType() == ISD::SEXTLOAD
) {
1443 if (SelectAddrMode3Offset(N
, LD
->getOffset(), Offset
, AMOpc
)) {
1445 Opcode
= isPre
? ARM::LDRSB_PRE
: ARM::LDRSB_POST
;
1449 SelectAddrMode2OffsetImmPre(N
, LD
->getOffset(), Offset
, AMOpc
)) {
1451 Opcode
= ARM::LDRB_PRE_IMM
;
1452 } else if (!isPre
&&
1453 SelectAddrMode2OffsetImm(N
, LD
->getOffset(), Offset
, AMOpc
)) {
1455 Opcode
= ARM::LDRB_POST_IMM
;
1456 } else if (SelectAddrMode2OffsetReg(N
, LD
->getOffset(), Offset
, AMOpc
)) {
1458 Opcode
= isPre
? ARM::LDRB_PRE_REG
: ARM::LDRB_POST_REG
;
1464 if (Opcode
== ARM::LDR_PRE_IMM
|| Opcode
== ARM::LDRB_PRE_IMM
) {
1465 SDValue Chain
= LD
->getChain();
1466 SDValue Base
= LD
->getBasePtr();
1467 SDValue Ops
[]= { Base
, AMOpc
, getAL(CurDAG
, SDLoc(N
)),
1468 CurDAG
->getRegister(0, MVT::i32
), Chain
};
1469 SDNode
*New
= CurDAG
->getMachineNode(Opcode
, SDLoc(N
), MVT::i32
, MVT::i32
,
1471 transferMemOperands(N
, New
);
1472 ReplaceNode(N
, New
);
1475 SDValue Chain
= LD
->getChain();
1476 SDValue Base
= LD
->getBasePtr();
1477 SDValue Ops
[]= { Base
, Offset
, AMOpc
, getAL(CurDAG
, SDLoc(N
)),
1478 CurDAG
->getRegister(0, MVT::i32
), Chain
};
1479 SDNode
*New
= CurDAG
->getMachineNode(Opcode
, SDLoc(N
), MVT::i32
, MVT::i32
,
1481 transferMemOperands(N
, New
);
1482 ReplaceNode(N
, New
);
1490 bool ARMDAGToDAGISel::tryT1IndexedLoad(SDNode
*N
) {
1491 LoadSDNode
*LD
= cast
<LoadSDNode
>(N
);
1492 EVT LoadedVT
= LD
->getMemoryVT();
1493 ISD::MemIndexedMode AM
= LD
->getAddressingMode();
1494 if (AM
!= ISD::POST_INC
|| LD
->getExtensionType() != ISD::NON_EXTLOAD
||
1495 LoadedVT
.getSimpleVT().SimpleTy
!= MVT::i32
)
1498 auto *COffs
= dyn_cast
<ConstantSDNode
>(LD
->getOffset());
1499 if (!COffs
|| COffs
->getZExtValue() != 4)
1502 // A T1 post-indexed load is just a single register LDM: LDM r0!, {r1}.
1503 // The encoding of LDM is not how the rest of ISel expects a post-inc load to
1504 // look however, so we use a pseudo here and switch it for a tLDMIA_UPD after
1506 SDValue Chain
= LD
->getChain();
1507 SDValue Base
= LD
->getBasePtr();
1508 SDValue Ops
[]= { Base
, getAL(CurDAG
, SDLoc(N
)),
1509 CurDAG
->getRegister(0, MVT::i32
), Chain
};
1510 SDNode
*New
= CurDAG
->getMachineNode(ARM::tLDR_postidx
, SDLoc(N
), MVT::i32
,
1511 MVT::i32
, MVT::Other
, Ops
);
1512 transferMemOperands(N
, New
);
1513 ReplaceNode(N
, New
);
1517 bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode
*N
) {
1518 LoadSDNode
*LD
= cast
<LoadSDNode
>(N
);
1519 ISD::MemIndexedMode AM
= LD
->getAddressingMode();
1520 if (AM
== ISD::UNINDEXED
)
1523 EVT LoadedVT
= LD
->getMemoryVT();
1524 bool isSExtLd
= LD
->getExtensionType() == ISD::SEXTLOAD
;
1526 bool isPre
= (AM
== ISD::PRE_INC
) || (AM
== ISD::PRE_DEC
);
1527 unsigned Opcode
= 0;
1529 if (SelectT2AddrModeImm8Offset(N
, LD
->getOffset(), Offset
)) {
1530 switch (LoadedVT
.getSimpleVT().SimpleTy
) {
1532 Opcode
= isPre
? ARM::t2LDR_PRE
: ARM::t2LDR_POST
;
1536 Opcode
= isPre
? ARM::t2LDRSH_PRE
: ARM::t2LDRSH_POST
;
1538 Opcode
= isPre
? ARM::t2LDRH_PRE
: ARM::t2LDRH_POST
;
1543 Opcode
= isPre
? ARM::t2LDRSB_PRE
: ARM::t2LDRSB_POST
;
1545 Opcode
= isPre
? ARM::t2LDRB_PRE
: ARM::t2LDRB_POST
;
1554 SDValue Chain
= LD
->getChain();
1555 SDValue Base
= LD
->getBasePtr();
1556 SDValue Ops
[]= { Base
, Offset
, getAL(CurDAG
, SDLoc(N
)),
1557 CurDAG
->getRegister(0, MVT::i32
), Chain
};
1558 SDNode
*New
= CurDAG
->getMachineNode(Opcode
, SDLoc(N
), MVT::i32
, MVT::i32
,
1560 transferMemOperands(N
, New
);
1561 ReplaceNode(N
, New
);
1568 /// Form a GPRPair pseudo register from a pair of GPR regs.
1569 SDNode
*ARMDAGToDAGISel::createGPRPairNode(EVT VT
, SDValue V0
, SDValue V1
) {
1570 SDLoc
dl(V0
.getNode());
1572 CurDAG
->getTargetConstant(ARM::GPRPairRegClassID
, dl
, MVT::i32
);
1573 SDValue SubReg0
= CurDAG
->getTargetConstant(ARM::gsub_0
, dl
, MVT::i32
);
1574 SDValue SubReg1
= CurDAG
->getTargetConstant(ARM::gsub_1
, dl
, MVT::i32
);
1575 const SDValue Ops
[] = { RegClass
, V0
, SubReg0
, V1
, SubReg1
};
1576 return CurDAG
->getMachineNode(TargetOpcode::REG_SEQUENCE
, dl
, VT
, Ops
);
1579 /// Form a D register from a pair of S registers.
1580 SDNode
*ARMDAGToDAGISel::createSRegPairNode(EVT VT
, SDValue V0
, SDValue V1
) {
1581 SDLoc
dl(V0
.getNode());
1583 CurDAG
->getTargetConstant(ARM::DPR_VFP2RegClassID
, dl
, MVT::i32
);
1584 SDValue SubReg0
= CurDAG
->getTargetConstant(ARM::ssub_0
, dl
, MVT::i32
);
1585 SDValue SubReg1
= CurDAG
->getTargetConstant(ARM::ssub_1
, dl
, MVT::i32
);
1586 const SDValue Ops
[] = { RegClass
, V0
, SubReg0
, V1
, SubReg1
};
1587 return CurDAG
->getMachineNode(TargetOpcode::REG_SEQUENCE
, dl
, VT
, Ops
);
1590 /// Form a quad register from a pair of D registers.
1591 SDNode
*ARMDAGToDAGISel::createDRegPairNode(EVT VT
, SDValue V0
, SDValue V1
) {
1592 SDLoc
dl(V0
.getNode());
1593 SDValue RegClass
= CurDAG
->getTargetConstant(ARM::QPRRegClassID
, dl
,
1595 SDValue SubReg0
= CurDAG
->getTargetConstant(ARM::dsub_0
, dl
, MVT::i32
);
1596 SDValue SubReg1
= CurDAG
->getTargetConstant(ARM::dsub_1
, dl
, MVT::i32
);
1597 const SDValue Ops
[] = { RegClass
, V0
, SubReg0
, V1
, SubReg1
};
1598 return CurDAG
->getMachineNode(TargetOpcode::REG_SEQUENCE
, dl
, VT
, Ops
);
1601 /// Form 4 consecutive D registers from a pair of Q registers.
1602 SDNode
*ARMDAGToDAGISel::createQRegPairNode(EVT VT
, SDValue V0
, SDValue V1
) {
1603 SDLoc
dl(V0
.getNode());
1604 SDValue RegClass
= CurDAG
->getTargetConstant(ARM::QQPRRegClassID
, dl
,
1606 SDValue SubReg0
= CurDAG
->getTargetConstant(ARM::qsub_0
, dl
, MVT::i32
);
1607 SDValue SubReg1
= CurDAG
->getTargetConstant(ARM::qsub_1
, dl
, MVT::i32
);
1608 const SDValue Ops
[] = { RegClass
, V0
, SubReg0
, V1
, SubReg1
};
1609 return CurDAG
->getMachineNode(TargetOpcode::REG_SEQUENCE
, dl
, VT
, Ops
);
1612 /// Form 4 consecutive S registers.
1613 SDNode
*ARMDAGToDAGISel::createQuadSRegsNode(EVT VT
, SDValue V0
, SDValue V1
,
1614 SDValue V2
, SDValue V3
) {
1615 SDLoc
dl(V0
.getNode());
1617 CurDAG
->getTargetConstant(ARM::QPR_VFP2RegClassID
, dl
, MVT::i32
);
1618 SDValue SubReg0
= CurDAG
->getTargetConstant(ARM::ssub_0
, dl
, MVT::i32
);
1619 SDValue SubReg1
= CurDAG
->getTargetConstant(ARM::ssub_1
, dl
, MVT::i32
);
1620 SDValue SubReg2
= CurDAG
->getTargetConstant(ARM::ssub_2
, dl
, MVT::i32
);
1621 SDValue SubReg3
= CurDAG
->getTargetConstant(ARM::ssub_3
, dl
, MVT::i32
);
1622 const SDValue Ops
[] = { RegClass
, V0
, SubReg0
, V1
, SubReg1
,
1623 V2
, SubReg2
, V3
, SubReg3
};
1624 return CurDAG
->getMachineNode(TargetOpcode::REG_SEQUENCE
, dl
, VT
, Ops
);
1627 /// Form 4 consecutive D registers.
1628 SDNode
*ARMDAGToDAGISel::createQuadDRegsNode(EVT VT
, SDValue V0
, SDValue V1
,
1629 SDValue V2
, SDValue V3
) {
1630 SDLoc
dl(V0
.getNode());
1631 SDValue RegClass
= CurDAG
->getTargetConstant(ARM::QQPRRegClassID
, dl
,
1633 SDValue SubReg0
= CurDAG
->getTargetConstant(ARM::dsub_0
, dl
, MVT::i32
);
1634 SDValue SubReg1
= CurDAG
->getTargetConstant(ARM::dsub_1
, dl
, MVT::i32
);
1635 SDValue SubReg2
= CurDAG
->getTargetConstant(ARM::dsub_2
, dl
, MVT::i32
);
1636 SDValue SubReg3
= CurDAG
->getTargetConstant(ARM::dsub_3
, dl
, MVT::i32
);
1637 const SDValue Ops
[] = { RegClass
, V0
, SubReg0
, V1
, SubReg1
,
1638 V2
, SubReg2
, V3
, SubReg3
};
1639 return CurDAG
->getMachineNode(TargetOpcode::REG_SEQUENCE
, dl
, VT
, Ops
);
1642 /// Form 4 consecutive Q registers.
1643 SDNode
*ARMDAGToDAGISel::createQuadQRegsNode(EVT VT
, SDValue V0
, SDValue V1
,
1644 SDValue V2
, SDValue V3
) {
1645 SDLoc
dl(V0
.getNode());
1646 SDValue RegClass
= CurDAG
->getTargetConstant(ARM::QQQQPRRegClassID
, dl
,
1648 SDValue SubReg0
= CurDAG
->getTargetConstant(ARM::qsub_0
, dl
, MVT::i32
);
1649 SDValue SubReg1
= CurDAG
->getTargetConstant(ARM::qsub_1
, dl
, MVT::i32
);
1650 SDValue SubReg2
= CurDAG
->getTargetConstant(ARM::qsub_2
, dl
, MVT::i32
);
1651 SDValue SubReg3
= CurDAG
->getTargetConstant(ARM::qsub_3
, dl
, MVT::i32
);
1652 const SDValue Ops
[] = { RegClass
, V0
, SubReg0
, V1
, SubReg1
,
1653 V2
, SubReg2
, V3
, SubReg3
};
1654 return CurDAG
->getMachineNode(TargetOpcode::REG_SEQUENCE
, dl
, VT
, Ops
);
1657 /// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
1658 /// of a NEON VLD or VST instruction. The supported values depend on the
1659 /// number of registers being loaded.
1660 SDValue
ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align
, const SDLoc
&dl
,
1661 unsigned NumVecs
, bool is64BitVector
) {
1662 unsigned NumRegs
= NumVecs
;
1663 if (!is64BitVector
&& NumVecs
< 3)
1666 unsigned Alignment
= cast
<ConstantSDNode
>(Align
)->getZExtValue();
1667 if (Alignment
>= 32 && NumRegs
== 4)
1669 else if (Alignment
>= 16 && (NumRegs
== 2 || NumRegs
== 4))
1671 else if (Alignment
>= 8)
1676 return CurDAG
->getTargetConstant(Alignment
, dl
, MVT::i32
);
1679 static bool isVLDfixed(unsigned Opc
)
1682 default: return false;
1683 case ARM::VLD1d8wb_fixed
: return true;
1684 case ARM::VLD1d16wb_fixed
: return true;
1685 case ARM::VLD1d64Qwb_fixed
: return true;
1686 case ARM::VLD1d32wb_fixed
: return true;
1687 case ARM::VLD1d64wb_fixed
: return true;
1688 case ARM::VLD1d64TPseudoWB_fixed
: return true;
1689 case ARM::VLD1d64QPseudoWB_fixed
: return true;
1690 case ARM::VLD1q8wb_fixed
: return true;
1691 case ARM::VLD1q16wb_fixed
: return true;
1692 case ARM::VLD1q32wb_fixed
: return true;
1693 case ARM::VLD1q64wb_fixed
: return true;
1694 case ARM::VLD1DUPd8wb_fixed
: return true;
1695 case ARM::VLD1DUPd16wb_fixed
: return true;
1696 case ARM::VLD1DUPd32wb_fixed
: return true;
1697 case ARM::VLD1DUPq8wb_fixed
: return true;
1698 case ARM::VLD1DUPq16wb_fixed
: return true;
1699 case ARM::VLD1DUPq32wb_fixed
: return true;
1700 case ARM::VLD2d8wb_fixed
: return true;
1701 case ARM::VLD2d16wb_fixed
: return true;
1702 case ARM::VLD2d32wb_fixed
: return true;
1703 case ARM::VLD2q8PseudoWB_fixed
: return true;
1704 case ARM::VLD2q16PseudoWB_fixed
: return true;
1705 case ARM::VLD2q32PseudoWB_fixed
: return true;
1706 case ARM::VLD2DUPd8wb_fixed
: return true;
1707 case ARM::VLD2DUPd16wb_fixed
: return true;
1708 case ARM::VLD2DUPd32wb_fixed
: return true;
1712 static bool isVSTfixed(unsigned Opc
)
1715 default: return false;
1716 case ARM::VST1d8wb_fixed
: return true;
1717 case ARM::VST1d16wb_fixed
: return true;
1718 case ARM::VST1d32wb_fixed
: return true;
1719 case ARM::VST1d64wb_fixed
: return true;
1720 case ARM::VST1q8wb_fixed
: return true;
1721 case ARM::VST1q16wb_fixed
: return true;
1722 case ARM::VST1q32wb_fixed
: return true;
1723 case ARM::VST1q64wb_fixed
: return true;
1724 case ARM::VST1d64TPseudoWB_fixed
: return true;
1725 case ARM::VST1d64QPseudoWB_fixed
: return true;
1726 case ARM::VST2d8wb_fixed
: return true;
1727 case ARM::VST2d16wb_fixed
: return true;
1728 case ARM::VST2d32wb_fixed
: return true;
1729 case ARM::VST2q8PseudoWB_fixed
: return true;
1730 case ARM::VST2q16PseudoWB_fixed
: return true;
1731 case ARM::VST2q32PseudoWB_fixed
: return true;
1735 // Get the register stride update opcode of a VLD/VST instruction that
1736 // is otherwise equivalent to the given fixed stride updating instruction.
1737 static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc
) {
1738 assert((isVLDfixed(Opc
) || isVSTfixed(Opc
))
1739 && "Incorrect fixed stride updating instruction.");
1742 case ARM::VLD1d8wb_fixed
: return ARM::VLD1d8wb_register
;
1743 case ARM::VLD1d16wb_fixed
: return ARM::VLD1d16wb_register
;
1744 case ARM::VLD1d32wb_fixed
: return ARM::VLD1d32wb_register
;
1745 case ARM::VLD1d64wb_fixed
: return ARM::VLD1d64wb_register
;
1746 case ARM::VLD1q8wb_fixed
: return ARM::VLD1q8wb_register
;
1747 case ARM::VLD1q16wb_fixed
: return ARM::VLD1q16wb_register
;
1748 case ARM::VLD1q32wb_fixed
: return ARM::VLD1q32wb_register
;
1749 case ARM::VLD1q64wb_fixed
: return ARM::VLD1q64wb_register
;
1750 case ARM::VLD1d64Twb_fixed
: return ARM::VLD1d64Twb_register
;
1751 case ARM::VLD1d64Qwb_fixed
: return ARM::VLD1d64Qwb_register
;
1752 case ARM::VLD1d64TPseudoWB_fixed
: return ARM::VLD1d64TPseudoWB_register
;
1753 case ARM::VLD1d64QPseudoWB_fixed
: return ARM::VLD1d64QPseudoWB_register
;
1754 case ARM::VLD1DUPd8wb_fixed
: return ARM::VLD1DUPd8wb_register
;
1755 case ARM::VLD1DUPd16wb_fixed
: return ARM::VLD1DUPd16wb_register
;
1756 case ARM::VLD1DUPd32wb_fixed
: return ARM::VLD1DUPd32wb_register
;
1757 case ARM::VLD1DUPq8wb_fixed
: return ARM::VLD1DUPq8wb_register
;
1758 case ARM::VLD1DUPq16wb_fixed
: return ARM::VLD1DUPq16wb_register
;
1759 case ARM::VLD1DUPq32wb_fixed
: return ARM::VLD1DUPq32wb_register
;
1761 case ARM::VST1d8wb_fixed
: return ARM::VST1d8wb_register
;
1762 case ARM::VST1d16wb_fixed
: return ARM::VST1d16wb_register
;
1763 case ARM::VST1d32wb_fixed
: return ARM::VST1d32wb_register
;
1764 case ARM::VST1d64wb_fixed
: return ARM::VST1d64wb_register
;
1765 case ARM::VST1q8wb_fixed
: return ARM::VST1q8wb_register
;
1766 case ARM::VST1q16wb_fixed
: return ARM::VST1q16wb_register
;
1767 case ARM::VST1q32wb_fixed
: return ARM::VST1q32wb_register
;
1768 case ARM::VST1q64wb_fixed
: return ARM::VST1q64wb_register
;
1769 case ARM::VST1d64TPseudoWB_fixed
: return ARM::VST1d64TPseudoWB_register
;
1770 case ARM::VST1d64QPseudoWB_fixed
: return ARM::VST1d64QPseudoWB_register
;
1772 case ARM::VLD2d8wb_fixed
: return ARM::VLD2d8wb_register
;
1773 case ARM::VLD2d16wb_fixed
: return ARM::VLD2d16wb_register
;
1774 case ARM::VLD2d32wb_fixed
: return ARM::VLD2d32wb_register
;
1775 case ARM::VLD2q8PseudoWB_fixed
: return ARM::VLD2q8PseudoWB_register
;
1776 case ARM::VLD2q16PseudoWB_fixed
: return ARM::VLD2q16PseudoWB_register
;
1777 case ARM::VLD2q32PseudoWB_fixed
: return ARM::VLD2q32PseudoWB_register
;
1779 case ARM::VST2d8wb_fixed
: return ARM::VST2d8wb_register
;
1780 case ARM::VST2d16wb_fixed
: return ARM::VST2d16wb_register
;
1781 case ARM::VST2d32wb_fixed
: return ARM::VST2d32wb_register
;
1782 case ARM::VST2q8PseudoWB_fixed
: return ARM::VST2q8PseudoWB_register
;
1783 case ARM::VST2q16PseudoWB_fixed
: return ARM::VST2q16PseudoWB_register
;
1784 case ARM::VST2q32PseudoWB_fixed
: return ARM::VST2q32PseudoWB_register
;
1786 case ARM::VLD2DUPd8wb_fixed
: return ARM::VLD2DUPd8wb_register
;
1787 case ARM::VLD2DUPd16wb_fixed
: return ARM::VLD2DUPd16wb_register
;
1788 case ARM::VLD2DUPd32wb_fixed
: return ARM::VLD2DUPd32wb_register
;
1790 return Opc
; // If not one we handle, return it unchanged.
1793 /// Returns true if the given increment is a Constant known to be equal to the
1794 /// access size performed by a NEON load/store. This means the "[rN]!" form can
1796 static bool isPerfectIncrement(SDValue Inc
, EVT VecTy
, unsigned NumVecs
) {
1797 auto C
= dyn_cast
<ConstantSDNode
>(Inc
);
1798 return C
&& C
->getZExtValue() == VecTy
.getSizeInBits() / 8 * NumVecs
;
1801 void ARMDAGToDAGISel::SelectVLD(SDNode
*N
, bool isUpdating
, unsigned NumVecs
,
1802 const uint16_t *DOpcodes
,
1803 const uint16_t *QOpcodes0
,
1804 const uint16_t *QOpcodes1
) {
1805 assert(NumVecs
>= 1 && NumVecs
<= 4 && "VLD NumVecs out-of-range");
1808 SDValue MemAddr
, Align
;
1809 bool IsIntrinsic
= !isUpdating
; // By coincidence, all supported updating
1810 // nodes are not intrinsics.
1811 unsigned AddrOpIdx
= IsIntrinsic
? 2 : 1;
1812 if (!SelectAddrMode6(N
, N
->getOperand(AddrOpIdx
), MemAddr
, Align
))
1815 SDValue Chain
= N
->getOperand(0);
1816 EVT VT
= N
->getValueType(0);
1817 bool is64BitVector
= VT
.is64BitVector();
1818 Align
= GetVLDSTAlign(Align
, dl
, NumVecs
, is64BitVector
);
1820 unsigned OpcodeIndex
;
1821 switch (VT
.getSimpleVT().SimpleTy
) {
1822 default: llvm_unreachable("unhandled vld type");
1823 // Double-register operations:
1824 case MVT::v8i8
: OpcodeIndex
= 0; break;
1826 case MVT::v4i16
: OpcodeIndex
= 1; break;
1828 case MVT::v2i32
: OpcodeIndex
= 2; break;
1829 case MVT::v1i64
: OpcodeIndex
= 3; break;
1830 // Quad-register operations:
1831 case MVT::v16i8
: OpcodeIndex
= 0; break;
1833 case MVT::v8i16
: OpcodeIndex
= 1; break;
1835 case MVT::v4i32
: OpcodeIndex
= 2; break;
1837 case MVT::v2i64
: OpcodeIndex
= 3; break;
1844 unsigned ResTyElts
= (NumVecs
== 3) ? 4 : NumVecs
;
1847 ResTy
= EVT::getVectorVT(*CurDAG
->getContext(), MVT::i64
, ResTyElts
);
1849 std::vector
<EVT
> ResTys
;
1850 ResTys
.push_back(ResTy
);
1852 ResTys
.push_back(MVT::i32
);
1853 ResTys
.push_back(MVT::Other
);
1855 SDValue Pred
= getAL(CurDAG
, dl
);
1856 SDValue Reg0
= CurDAG
->getRegister(0, MVT::i32
);
1858 SmallVector
<SDValue
, 7> Ops
;
1860 // Double registers and VLD1/VLD2 quad registers are directly supported.
1861 if (is64BitVector
|| NumVecs
<= 2) {
1862 unsigned Opc
= (is64BitVector
? DOpcodes
[OpcodeIndex
] :
1863 QOpcodes0
[OpcodeIndex
]);
1864 Ops
.push_back(MemAddr
);
1865 Ops
.push_back(Align
);
1867 SDValue Inc
= N
->getOperand(AddrOpIdx
+ 1);
1868 bool IsImmUpdate
= isPerfectIncrement(Inc
, VT
, NumVecs
);
1870 // We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
1871 // check for the opcode rather than the number of vector elements.
1872 if (isVLDfixed(Opc
))
1873 Opc
= getVLDSTRegisterUpdateOpcode(Opc
);
1875 // VLD1/VLD2 fixed increment does not need Reg0 so only include it in
1876 // the operands if not such an opcode.
1877 } else if (!isVLDfixed(Opc
))
1878 Ops
.push_back(Reg0
);
1880 Ops
.push_back(Pred
);
1881 Ops
.push_back(Reg0
);
1882 Ops
.push_back(Chain
);
1883 VLd
= CurDAG
->getMachineNode(Opc
, dl
, ResTys
, Ops
);
1886 // Otherwise, quad registers are loaded with two separate instructions,
1887 // where one loads the even registers and the other loads the odd registers.
1888 EVT AddrTy
= MemAddr
.getValueType();
1890 // Load the even subregs. This is always an updating load, so that it
1891 // provides the address to the second load for the odd subregs.
1893 SDValue(CurDAG
->getMachineNode(TargetOpcode::IMPLICIT_DEF
, dl
, ResTy
), 0);
1894 const SDValue OpsA
[] = { MemAddr
, Align
, Reg0
, ImplDef
, Pred
, Reg0
, Chain
};
1895 SDNode
*VLdA
= CurDAG
->getMachineNode(QOpcodes0
[OpcodeIndex
], dl
,
1896 ResTy
, AddrTy
, MVT::Other
, OpsA
);
1897 Chain
= SDValue(VLdA
, 2);
1899 // Load the odd subregs.
1900 Ops
.push_back(SDValue(VLdA
, 1));
1901 Ops
.push_back(Align
);
1903 SDValue Inc
= N
->getOperand(AddrOpIdx
+ 1);
1904 assert(isa
<ConstantSDNode
>(Inc
.getNode()) &&
1905 "only constant post-increment update allowed for VLD3/4");
1907 Ops
.push_back(Reg0
);
1909 Ops
.push_back(SDValue(VLdA
, 0));
1910 Ops
.push_back(Pred
);
1911 Ops
.push_back(Reg0
);
1912 Ops
.push_back(Chain
);
1913 VLd
= CurDAG
->getMachineNode(QOpcodes1
[OpcodeIndex
], dl
, ResTys
, Ops
);
1916 // Transfer memoperands.
1917 MachineMemOperand
*MemOp
= cast
<MemIntrinsicSDNode
>(N
)->getMemOperand();
1918 CurDAG
->setNodeMemRefs(cast
<MachineSDNode
>(VLd
), {MemOp
});
1921 ReplaceNode(N
, VLd
);
1925 // Extract out the subregisters.
1926 SDValue SuperReg
= SDValue(VLd
, 0);
1927 static_assert(ARM::dsub_7
== ARM::dsub_0
+ 7 &&
1928 ARM::qsub_3
== ARM::qsub_0
+ 3,
1929 "Unexpected subreg numbering");
1930 unsigned Sub0
= (is64BitVector
? ARM::dsub_0
: ARM::qsub_0
);
1931 for (unsigned Vec
= 0; Vec
< NumVecs
; ++Vec
)
1932 ReplaceUses(SDValue(N
, Vec
),
1933 CurDAG
->getTargetExtractSubreg(Sub0
+ Vec
, dl
, VT
, SuperReg
));
1934 ReplaceUses(SDValue(N
, NumVecs
), SDValue(VLd
, 1));
1936 ReplaceUses(SDValue(N
, NumVecs
+ 1), SDValue(VLd
, 2));
1937 CurDAG
->RemoveDeadNode(N
);
1940 void ARMDAGToDAGISel::SelectVST(SDNode
*N
, bool isUpdating
, unsigned NumVecs
,
1941 const uint16_t *DOpcodes
,
1942 const uint16_t *QOpcodes0
,
1943 const uint16_t *QOpcodes1
) {
1944 assert(NumVecs
>= 1 && NumVecs
<= 4 && "VST NumVecs out-of-range");
1947 SDValue MemAddr
, Align
;
1948 bool IsIntrinsic
= !isUpdating
; // By coincidence, all supported updating
1949 // nodes are not intrinsics.
1950 unsigned AddrOpIdx
= IsIntrinsic
? 2 : 1;
1951 unsigned Vec0Idx
= 3; // AddrOpIdx + (isUpdating ? 2 : 1)
1952 if (!SelectAddrMode6(N
, N
->getOperand(AddrOpIdx
), MemAddr
, Align
))
1955 MachineMemOperand
*MemOp
= cast
<MemIntrinsicSDNode
>(N
)->getMemOperand();
1957 SDValue Chain
= N
->getOperand(0);
1958 EVT VT
= N
->getOperand(Vec0Idx
).getValueType();
1959 bool is64BitVector
= VT
.is64BitVector();
1960 Align
= GetVLDSTAlign(Align
, dl
, NumVecs
, is64BitVector
);
1962 unsigned OpcodeIndex
;
1963 switch (VT
.getSimpleVT().SimpleTy
) {
1964 default: llvm_unreachable("unhandled vst type");
1965 // Double-register operations:
1966 case MVT::v8i8
: OpcodeIndex
= 0; break;
1968 case MVT::v4i16
: OpcodeIndex
= 1; break;
1970 case MVT::v2i32
: OpcodeIndex
= 2; break;
1971 case MVT::v1i64
: OpcodeIndex
= 3; break;
1972 // Quad-register operations:
1973 case MVT::v16i8
: OpcodeIndex
= 0; break;
1975 case MVT::v8i16
: OpcodeIndex
= 1; break;
1977 case MVT::v4i32
: OpcodeIndex
= 2; break;
1979 case MVT::v2i64
: OpcodeIndex
= 3; break;
1982 std::vector
<EVT
> ResTys
;
1984 ResTys
.push_back(MVT::i32
);
1985 ResTys
.push_back(MVT::Other
);
1987 SDValue Pred
= getAL(CurDAG
, dl
);
1988 SDValue Reg0
= CurDAG
->getRegister(0, MVT::i32
);
1989 SmallVector
<SDValue
, 7> Ops
;
1991 // Double registers and VST1/VST2 quad registers are directly supported.
1992 if (is64BitVector
|| NumVecs
<= 2) {
1995 SrcReg
= N
->getOperand(Vec0Idx
);
1996 } else if (is64BitVector
) {
1997 // Form a REG_SEQUENCE to force register allocation.
1998 SDValue V0
= N
->getOperand(Vec0Idx
+ 0);
1999 SDValue V1
= N
->getOperand(Vec0Idx
+ 1);
2001 SrcReg
= SDValue(createDRegPairNode(MVT::v2i64
, V0
, V1
), 0);
2003 SDValue V2
= N
->getOperand(Vec0Idx
+ 2);
2004 // If it's a vst3, form a quad D-register and leave the last part as
2006 SDValue V3
= (NumVecs
== 3)
2007 ? SDValue(CurDAG
->getMachineNode(TargetOpcode::IMPLICIT_DEF
,dl
,VT
), 0)
2008 : N
->getOperand(Vec0Idx
+ 3);
2009 SrcReg
= SDValue(createQuadDRegsNode(MVT::v4i64
, V0
, V1
, V2
, V3
), 0);
2012 // Form a QQ register.
2013 SDValue Q0
= N
->getOperand(Vec0Idx
);
2014 SDValue Q1
= N
->getOperand(Vec0Idx
+ 1);
2015 SrcReg
= SDValue(createQRegPairNode(MVT::v4i64
, Q0
, Q1
), 0);
2018 unsigned Opc
= (is64BitVector
? DOpcodes
[OpcodeIndex
] :
2019 QOpcodes0
[OpcodeIndex
]);
2020 Ops
.push_back(MemAddr
);
2021 Ops
.push_back(Align
);
2023 SDValue Inc
= N
->getOperand(AddrOpIdx
+ 1);
2024 bool IsImmUpdate
= isPerfectIncrement(Inc
, VT
, NumVecs
);
2026 // We use a VST1 for v1i64 even if the pseudo says VST2/3/4, so
2027 // check for the opcode rather than the number of vector elements.
2028 if (isVSTfixed(Opc
))
2029 Opc
= getVLDSTRegisterUpdateOpcode(Opc
);
2032 // VST1/VST2 fixed increment does not need Reg0 so only include it in
2033 // the operands if not such an opcode.
2034 else if (!isVSTfixed(Opc
))
2035 Ops
.push_back(Reg0
);
2037 Ops
.push_back(SrcReg
);
2038 Ops
.push_back(Pred
);
2039 Ops
.push_back(Reg0
);
2040 Ops
.push_back(Chain
);
2041 SDNode
*VSt
= CurDAG
->getMachineNode(Opc
, dl
, ResTys
, Ops
);
2043 // Transfer memoperands.
2044 CurDAG
->setNodeMemRefs(cast
<MachineSDNode
>(VSt
), {MemOp
});
2046 ReplaceNode(N
, VSt
);
2050 // Otherwise, quad registers are stored with two separate instructions,
2051 // where one stores the even registers and the other stores the odd registers.
2053 // Form the QQQQ REG_SEQUENCE.
2054 SDValue V0
= N
->getOperand(Vec0Idx
+ 0);
2055 SDValue V1
= N
->getOperand(Vec0Idx
+ 1);
2056 SDValue V2
= N
->getOperand(Vec0Idx
+ 2);
2057 SDValue V3
= (NumVecs
== 3)
2058 ? SDValue(CurDAG
->getMachineNode(TargetOpcode::IMPLICIT_DEF
, dl
, VT
), 0)
2059 : N
->getOperand(Vec0Idx
+ 3);
2060 SDValue RegSeq
= SDValue(createQuadQRegsNode(MVT::v8i64
, V0
, V1
, V2
, V3
), 0);
2062 // Store the even D registers. This is always an updating store, so that it
2063 // provides the address to the second store for the odd subregs.
2064 const SDValue OpsA
[] = { MemAddr
, Align
, Reg0
, RegSeq
, Pred
, Reg0
, Chain
};
2065 SDNode
*VStA
= CurDAG
->getMachineNode(QOpcodes0
[OpcodeIndex
], dl
,
2066 MemAddr
.getValueType(),
2068 CurDAG
->setNodeMemRefs(cast
<MachineSDNode
>(VStA
), {MemOp
});
2069 Chain
= SDValue(VStA
, 1);
2071 // Store the odd D registers.
2072 Ops
.push_back(SDValue(VStA
, 0));
2073 Ops
.push_back(Align
);
2075 SDValue Inc
= N
->getOperand(AddrOpIdx
+ 1);
2076 assert(isa
<ConstantSDNode
>(Inc
.getNode()) &&
2077 "only constant post-increment update allowed for VST3/4");
2079 Ops
.push_back(Reg0
);
2081 Ops
.push_back(RegSeq
);
2082 Ops
.push_back(Pred
);
2083 Ops
.push_back(Reg0
);
2084 Ops
.push_back(Chain
);
2085 SDNode
*VStB
= CurDAG
->getMachineNode(QOpcodes1
[OpcodeIndex
], dl
, ResTys
,
2087 CurDAG
->setNodeMemRefs(cast
<MachineSDNode
>(VStB
), {MemOp
});
2088 ReplaceNode(N
, VStB
);
2091 void ARMDAGToDAGISel::SelectVLDSTLane(SDNode
*N
, bool IsLoad
, bool isUpdating
,
2093 const uint16_t *DOpcodes
,
2094 const uint16_t *QOpcodes
) {
2095 assert(NumVecs
>=2 && NumVecs
<= 4 && "VLDSTLane NumVecs out-of-range");
2098 SDValue MemAddr
, Align
;
2099 bool IsIntrinsic
= !isUpdating
; // By coincidence, all supported updating
2100 // nodes are not intrinsics.
2101 unsigned AddrOpIdx
= IsIntrinsic
? 2 : 1;
2102 unsigned Vec0Idx
= 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2103 if (!SelectAddrMode6(N
, N
->getOperand(AddrOpIdx
), MemAddr
, Align
))
2106 MachineMemOperand
*MemOp
= cast
<MemIntrinsicSDNode
>(N
)->getMemOperand();
2108 SDValue Chain
= N
->getOperand(0);
2110 cast
<ConstantSDNode
>(N
->getOperand(Vec0Idx
+ NumVecs
))->getZExtValue();
2111 EVT VT
= N
->getOperand(Vec0Idx
).getValueType();
2112 bool is64BitVector
= VT
.is64BitVector();
2114 unsigned Alignment
= 0;
2116 Alignment
= cast
<ConstantSDNode
>(Align
)->getZExtValue();
2117 unsigned NumBytes
= NumVecs
* VT
.getScalarSizeInBits() / 8;
2118 if (Alignment
> NumBytes
)
2119 Alignment
= NumBytes
;
2120 if (Alignment
< 8 && Alignment
< NumBytes
)
2122 // Alignment must be a power of two; make sure of that.
2123 Alignment
= (Alignment
& -Alignment
);
2127 Align
= CurDAG
->getTargetConstant(Alignment
, dl
, MVT::i32
);
2129 unsigned OpcodeIndex
;
2130 switch (VT
.getSimpleVT().SimpleTy
) {
2131 default: llvm_unreachable("unhandled vld/vst lane type");
2132 // Double-register operations:
2133 case MVT::v8i8
: OpcodeIndex
= 0; break;
2135 case MVT::v4i16
: OpcodeIndex
= 1; break;
2137 case MVT::v2i32
: OpcodeIndex
= 2; break;
2138 // Quad-register operations:
2140 case MVT::v8i16
: OpcodeIndex
= 0; break;
2142 case MVT::v4i32
: OpcodeIndex
= 1; break;
2145 std::vector
<EVT
> ResTys
;
2147 unsigned ResTyElts
= (NumVecs
== 3) ? 4 : NumVecs
;
2150 ResTys
.push_back(EVT::getVectorVT(*CurDAG
->getContext(),
2151 MVT::i64
, ResTyElts
));
2154 ResTys
.push_back(MVT::i32
);
2155 ResTys
.push_back(MVT::Other
);
2157 SDValue Pred
= getAL(CurDAG
, dl
);
2158 SDValue Reg0
= CurDAG
->getRegister(0, MVT::i32
);
2160 SmallVector
<SDValue
, 8> Ops
;
2161 Ops
.push_back(MemAddr
);
2162 Ops
.push_back(Align
);
2164 SDValue Inc
= N
->getOperand(AddrOpIdx
+ 1);
2166 isPerfectIncrement(Inc
, VT
.getVectorElementType(), NumVecs
);
2167 Ops
.push_back(IsImmUpdate
? Reg0
: Inc
);
2171 SDValue V0
= N
->getOperand(Vec0Idx
+ 0);
2172 SDValue V1
= N
->getOperand(Vec0Idx
+ 1);
2175 SuperReg
= SDValue(createDRegPairNode(MVT::v2i64
, V0
, V1
), 0);
2177 SuperReg
= SDValue(createQRegPairNode(MVT::v4i64
, V0
, V1
), 0);
2179 SDValue V2
= N
->getOperand(Vec0Idx
+ 2);
2180 SDValue V3
= (NumVecs
== 3)
2181 ? SDValue(CurDAG
->getMachineNode(TargetOpcode::IMPLICIT_DEF
, dl
, VT
), 0)
2182 : N
->getOperand(Vec0Idx
+ 3);
2184 SuperReg
= SDValue(createQuadDRegsNode(MVT::v4i64
, V0
, V1
, V2
, V3
), 0);
2186 SuperReg
= SDValue(createQuadQRegsNode(MVT::v8i64
, V0
, V1
, V2
, V3
), 0);
2188 Ops
.push_back(SuperReg
);
2189 Ops
.push_back(getI32Imm(Lane
, dl
));
2190 Ops
.push_back(Pred
);
2191 Ops
.push_back(Reg0
);
2192 Ops
.push_back(Chain
);
2194 unsigned Opc
= (is64BitVector
? DOpcodes
[OpcodeIndex
] :
2195 QOpcodes
[OpcodeIndex
]);
2196 SDNode
*VLdLn
= CurDAG
->getMachineNode(Opc
, dl
, ResTys
, Ops
);
2197 CurDAG
->setNodeMemRefs(cast
<MachineSDNode
>(VLdLn
), {MemOp
});
2199 ReplaceNode(N
, VLdLn
);
2203 // Extract the subregisters.
2204 SuperReg
= SDValue(VLdLn
, 0);
2205 static_assert(ARM::dsub_7
== ARM::dsub_0
+ 7 &&
2206 ARM::qsub_3
== ARM::qsub_0
+ 3,
2207 "Unexpected subreg numbering");
2208 unsigned Sub0
= is64BitVector
? ARM::dsub_0
: ARM::qsub_0
;
2209 for (unsigned Vec
= 0; Vec
< NumVecs
; ++Vec
)
2210 ReplaceUses(SDValue(N
, Vec
),
2211 CurDAG
->getTargetExtractSubreg(Sub0
+ Vec
, dl
, VT
, SuperReg
));
2212 ReplaceUses(SDValue(N
, NumVecs
), SDValue(VLdLn
, 1));
2214 ReplaceUses(SDValue(N
, NumVecs
+ 1), SDValue(VLdLn
, 2));
2215 CurDAG
->RemoveDeadNode(N
);
2218 void ARMDAGToDAGISel::SelectVLDDup(SDNode
*N
, bool IsIntrinsic
,
2219 bool isUpdating
, unsigned NumVecs
,
2220 const uint16_t *DOpcodes
,
2221 const uint16_t *QOpcodes0
,
2222 const uint16_t *QOpcodes1
) {
2223 assert(NumVecs
>= 1 && NumVecs
<= 4 && "VLDDup NumVecs out-of-range");
2226 SDValue MemAddr
, Align
;
2227 unsigned AddrOpIdx
= IsIntrinsic
? 2 : 1;
2228 if (!SelectAddrMode6(N
, N
->getOperand(AddrOpIdx
), MemAddr
, Align
))
2231 SDValue Chain
= N
->getOperand(0);
2232 EVT VT
= N
->getValueType(0);
2233 bool is64BitVector
= VT
.is64BitVector();
2235 unsigned Alignment
= 0;
2237 Alignment
= cast
<ConstantSDNode
>(Align
)->getZExtValue();
2238 unsigned NumBytes
= NumVecs
* VT
.getScalarSizeInBits() / 8;
2239 if (Alignment
> NumBytes
)
2240 Alignment
= NumBytes
;
2241 if (Alignment
< 8 && Alignment
< NumBytes
)
2243 // Alignment must be a power of two; make sure of that.
2244 Alignment
= (Alignment
& -Alignment
);
2248 Align
= CurDAG
->getTargetConstant(Alignment
, dl
, MVT::i32
);
2250 unsigned OpcodeIndex
;
2251 switch (VT
.getSimpleVT().SimpleTy
) {
2252 default: llvm_unreachable("unhandled vld-dup type");
2254 case MVT::v16i8
: OpcodeIndex
= 0; break;
2259 OpcodeIndex
= 1; break;
2263 case MVT::v4i32
: OpcodeIndex
= 2; break;
2265 case MVT::v1i64
: OpcodeIndex
= 3; break;
2268 unsigned ResTyElts
= (NumVecs
== 3) ? 4 : NumVecs
;
2271 EVT ResTy
= EVT::getVectorVT(*CurDAG
->getContext(), MVT::i64
, ResTyElts
);
2273 std::vector
<EVT
> ResTys
;
2274 ResTys
.push_back(ResTy
);
2276 ResTys
.push_back(MVT::i32
);
2277 ResTys
.push_back(MVT::Other
);
2279 SDValue Pred
= getAL(CurDAG
, dl
);
2280 SDValue Reg0
= CurDAG
->getRegister(0, MVT::i32
);
2283 if (is64BitVector
|| NumVecs
== 1) {
2284 SmallVector
<SDValue
, 6> Ops
;
2285 Ops
.push_back(MemAddr
);
2286 Ops
.push_back(Align
);
2287 unsigned Opc
= is64BitVector
? DOpcodes
[OpcodeIndex
] :
2288 QOpcodes0
[OpcodeIndex
];
2290 // fixed-stride update instructions don't have an explicit writeback
2291 // operand. It's implicit in the opcode itself.
2292 SDValue Inc
= N
->getOperand(2);
2294 isPerfectIncrement(Inc
, VT
.getVectorElementType(), NumVecs
);
2295 if (NumVecs
<= 2 && !IsImmUpdate
)
2296 Opc
= getVLDSTRegisterUpdateOpcode(Opc
);
2299 // FIXME: VLD3 and VLD4 haven't been updated to that form yet.
2300 else if (NumVecs
> 2)
2301 Ops
.push_back(Reg0
);
2303 Ops
.push_back(Pred
);
2304 Ops
.push_back(Reg0
);
2305 Ops
.push_back(Chain
);
2306 VLdDup
= CurDAG
->getMachineNode(Opc
, dl
, ResTys
, Ops
);
2307 } else if (NumVecs
== 2) {
2308 const SDValue OpsA
[] = { MemAddr
, Align
, Pred
, Reg0
, Chain
};
2309 SDNode
*VLdA
= CurDAG
->getMachineNode(QOpcodes0
[OpcodeIndex
],
2312 Chain
= SDValue(VLdA
, 1);
2313 const SDValue OpsB
[] = { MemAddr
, Align
, Pred
, Reg0
, Chain
};
2314 VLdDup
= CurDAG
->getMachineNode(QOpcodes1
[OpcodeIndex
], dl
, ResTys
, OpsB
);
2317 SDValue(CurDAG
->getMachineNode(TargetOpcode::IMPLICIT_DEF
, dl
, ResTy
), 0);
2318 const SDValue OpsA
[] = { MemAddr
, Align
, ImplDef
, Pred
, Reg0
, Chain
};
2319 SDNode
*VLdA
= CurDAG
->getMachineNode(QOpcodes0
[OpcodeIndex
],
2322 SDValue SuperReg
= SDValue(VLdA
, 0);
2323 Chain
= SDValue(VLdA
, 1);
2324 const SDValue OpsB
[] = { MemAddr
, Align
, SuperReg
, Pred
, Reg0
, Chain
};
2325 VLdDup
= CurDAG
->getMachineNode(QOpcodes1
[OpcodeIndex
], dl
, ResTys
, OpsB
);
2328 // Transfer memoperands.
2329 MachineMemOperand
*MemOp
= cast
<MemIntrinsicSDNode
>(N
)->getMemOperand();
2330 CurDAG
->setNodeMemRefs(cast
<MachineSDNode
>(VLdDup
), {MemOp
});
2332 // Extract the subregisters.
2334 ReplaceUses(SDValue(N
, 0), SDValue(VLdDup
, 0));
2336 SDValue SuperReg
= SDValue(VLdDup
, 0);
2337 static_assert(ARM::dsub_7
== ARM::dsub_0
+ 7, "Unexpected subreg numbering");
2338 unsigned SubIdx
= is64BitVector
? ARM::dsub_0
: ARM::qsub_0
;
2339 for (unsigned Vec
= 0; Vec
!= NumVecs
; ++Vec
) {
2340 ReplaceUses(SDValue(N
, Vec
),
2341 CurDAG
->getTargetExtractSubreg(SubIdx
+Vec
, dl
, VT
, SuperReg
));
2344 ReplaceUses(SDValue(N
, NumVecs
), SDValue(VLdDup
, 1));
2346 ReplaceUses(SDValue(N
, NumVecs
+ 1), SDValue(VLdDup
, 2));
2347 CurDAG
->RemoveDeadNode(N
);
2350 bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode
*N
, bool isSigned
) {
2351 if (!Subtarget
->hasV6T2Ops())
2354 unsigned Opc
= isSigned
2355 ? (Subtarget
->isThumb() ? ARM::t2SBFX
: ARM::SBFX
)
2356 : (Subtarget
->isThumb() ? ARM::t2UBFX
: ARM::UBFX
);
2359 // For unsigned extracts, check for a shift right and mask
2360 unsigned And_imm
= 0;
2361 if (N
->getOpcode() == ISD::AND
) {
2362 if (isOpcWithIntImmediate(N
, ISD::AND
, And_imm
)) {
2364 // The immediate is a mask of the low bits iff imm & (imm+1) == 0
2365 if (And_imm
& (And_imm
+ 1))
2368 unsigned Srl_imm
= 0;
2369 if (isOpcWithIntImmediate(N
->getOperand(0).getNode(), ISD::SRL
,
2371 assert(Srl_imm
> 0 && Srl_imm
< 32 && "bad amount in shift node!");
2373 // Mask off the unnecessary bits of the AND immediate; normally
2374 // DAGCombine will do this, but that might not happen if
2375 // targetShrinkDemandedConstant chooses a different immediate.
2376 And_imm
&= -1U >> Srl_imm
;
2378 // Note: The width operand is encoded as width-1.
2379 unsigned Width
= countTrailingOnes(And_imm
) - 1;
2380 unsigned LSB
= Srl_imm
;
2382 SDValue Reg0
= CurDAG
->getRegister(0, MVT::i32
);
2384 if ((LSB
+ Width
+ 1) == N
->getValueType(0).getSizeInBits()) {
2385 // It's cheaper to use a right shift to extract the top bits.
2386 if (Subtarget
->isThumb()) {
2387 Opc
= isSigned
? ARM::t2ASRri
: ARM::t2LSRri
;
2388 SDValue Ops
[] = { N
->getOperand(0).getOperand(0),
2389 CurDAG
->getTargetConstant(LSB
, dl
, MVT::i32
),
2390 getAL(CurDAG
, dl
), Reg0
, Reg0
};
2391 CurDAG
->SelectNodeTo(N
, Opc
, MVT::i32
, Ops
);
2395 // ARM models shift instructions as MOVsi with shifter operand.
2396 ARM_AM::ShiftOpc ShOpcVal
= ARM_AM::getShiftOpcForNode(ISD::SRL
);
2398 CurDAG
->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal
, LSB
), dl
,
2400 SDValue Ops
[] = { N
->getOperand(0).getOperand(0), ShOpc
,
2401 getAL(CurDAG
, dl
), Reg0
, Reg0
};
2402 CurDAG
->SelectNodeTo(N
, ARM::MOVsi
, MVT::i32
, Ops
);
2406 assert(LSB
+ Width
+ 1 <= 32 && "Shouldn't create an invalid ubfx");
2407 SDValue Ops
[] = { N
->getOperand(0).getOperand(0),
2408 CurDAG
->getTargetConstant(LSB
, dl
, MVT::i32
),
2409 CurDAG
->getTargetConstant(Width
, dl
, MVT::i32
),
2410 getAL(CurDAG
, dl
), Reg0
};
2411 CurDAG
->SelectNodeTo(N
, Opc
, MVT::i32
, Ops
);
2418 // Otherwise, we're looking for a shift of a shift
2419 unsigned Shl_imm
= 0;
2420 if (isOpcWithIntImmediate(N
->getOperand(0).getNode(), ISD::SHL
, Shl_imm
)) {
2421 assert(Shl_imm
> 0 && Shl_imm
< 32 && "bad amount in shift node!");
2422 unsigned Srl_imm
= 0;
2423 if (isInt32Immediate(N
->getOperand(1), Srl_imm
)) {
2424 assert(Srl_imm
> 0 && Srl_imm
< 32 && "bad amount in shift node!");
2425 // Note: The width operand is encoded as width-1.
2426 unsigned Width
= 32 - Srl_imm
- 1;
2427 int LSB
= Srl_imm
- Shl_imm
;
2430 SDValue Reg0
= CurDAG
->getRegister(0, MVT::i32
);
2431 assert(LSB
+ Width
+ 1 <= 32 && "Shouldn't create an invalid ubfx");
2432 SDValue Ops
[] = { N
->getOperand(0).getOperand(0),
2433 CurDAG
->getTargetConstant(LSB
, dl
, MVT::i32
),
2434 CurDAG
->getTargetConstant(Width
, dl
, MVT::i32
),
2435 getAL(CurDAG
, dl
), Reg0
};
2436 CurDAG
->SelectNodeTo(N
, Opc
, MVT::i32
, Ops
);
2441 // Or we are looking for a shift of an and, with a mask operand
2442 if (isOpcWithIntImmediate(N
->getOperand(0).getNode(), ISD::AND
, And_imm
) &&
2443 isShiftedMask_32(And_imm
)) {
2444 unsigned Srl_imm
= 0;
2445 unsigned LSB
= countTrailingZeros(And_imm
);
2446 // Shift must be the same as the ands lsb
2447 if (isInt32Immediate(N
->getOperand(1), Srl_imm
) && Srl_imm
== LSB
) {
2448 assert(Srl_imm
> 0 && Srl_imm
< 32 && "bad amount in shift node!");
2449 unsigned MSB
= 31 - countLeadingZeros(And_imm
);
2450 // Note: The width operand is encoded as width-1.
2451 unsigned Width
= MSB
- LSB
;
2452 SDValue Reg0
= CurDAG
->getRegister(0, MVT::i32
);
2453 assert(Srl_imm
+ Width
+ 1 <= 32 && "Shouldn't create an invalid ubfx");
2454 SDValue Ops
[] = { N
->getOperand(0).getOperand(0),
2455 CurDAG
->getTargetConstant(Srl_imm
, dl
, MVT::i32
),
2456 CurDAG
->getTargetConstant(Width
, dl
, MVT::i32
),
2457 getAL(CurDAG
, dl
), Reg0
};
2458 CurDAG
->SelectNodeTo(N
, Opc
, MVT::i32
, Ops
);
2463 if (N
->getOpcode() == ISD::SIGN_EXTEND_INREG
) {
2464 unsigned Width
= cast
<VTSDNode
>(N
->getOperand(1))->getVT().getSizeInBits();
2466 if (!isOpcWithIntImmediate(N
->getOperand(0).getNode(), ISD::SRL
, LSB
) &&
2467 !isOpcWithIntImmediate(N
->getOperand(0).getNode(), ISD::SRA
, LSB
))
2470 if (LSB
+ Width
> 32)
2473 SDValue Reg0
= CurDAG
->getRegister(0, MVT::i32
);
2474 assert(LSB
+ Width
<= 32 && "Shouldn't create an invalid ubfx");
2475 SDValue Ops
[] = { N
->getOperand(0).getOperand(0),
2476 CurDAG
->getTargetConstant(LSB
, dl
, MVT::i32
),
2477 CurDAG
->getTargetConstant(Width
- 1, dl
, MVT::i32
),
2478 getAL(CurDAG
, dl
), Reg0
};
2479 CurDAG
->SelectNodeTo(N
, Opc
, MVT::i32
, Ops
);
2486 /// Target-specific DAG combining for ISD::XOR.
2487 /// Target-independent combining lowers SELECT_CC nodes of the form
2488 /// select_cc setg[ge] X, 0, X, -X
2489 /// select_cc setgt X, -1, X, -X
2490 /// select_cc setl[te] X, 0, -X, X
2491 /// select_cc setlt X, 1, -X, X
2492 /// which represent Integer ABS into:
2493 /// Y = sra (X, size(X)-1); xor (add (X, Y), Y)
2494 /// ARM instruction selection detects the latter and matches it to
2495 /// ARM::ABS or ARM::t2ABS machine node.
2496 bool ARMDAGToDAGISel::tryABSOp(SDNode
*N
){
2497 SDValue XORSrc0
= N
->getOperand(0);
2498 SDValue XORSrc1
= N
->getOperand(1);
2499 EVT VT
= N
->getValueType(0);
2501 if (Subtarget
->isThumb1Only())
2504 if (XORSrc0
.getOpcode() != ISD::ADD
|| XORSrc1
.getOpcode() != ISD::SRA
)
2507 SDValue ADDSrc0
= XORSrc0
.getOperand(0);
2508 SDValue ADDSrc1
= XORSrc0
.getOperand(1);
2509 SDValue SRASrc0
= XORSrc1
.getOperand(0);
2510 SDValue SRASrc1
= XORSrc1
.getOperand(1);
2511 ConstantSDNode
*SRAConstant
= dyn_cast
<ConstantSDNode
>(SRASrc1
);
2512 EVT XType
= SRASrc0
.getValueType();
2513 unsigned Size
= XType
.getSizeInBits() - 1;
2515 if (ADDSrc1
== XORSrc1
&& ADDSrc0
== SRASrc0
&&
2516 XType
.isInteger() && SRAConstant
!= nullptr &&
2517 Size
== SRAConstant
->getZExtValue()) {
2518 unsigned Opcode
= Subtarget
->isThumb2() ? ARM::t2ABS
: ARM::ABS
;
2519 CurDAG
->SelectNodeTo(N
, Opcode
, VT
, ADDSrc0
);
2526 /// We've got special pseudo-instructions for these
2527 void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode
*N
) {
2529 EVT MemTy
= cast
<MemSDNode
>(N
)->getMemoryVT();
2530 if (MemTy
== MVT::i8
)
2531 Opcode
= ARM::CMP_SWAP_8
;
2532 else if (MemTy
== MVT::i16
)
2533 Opcode
= ARM::CMP_SWAP_16
;
2534 else if (MemTy
== MVT::i32
)
2535 Opcode
= ARM::CMP_SWAP_32
;
2537 llvm_unreachable("Unknown AtomicCmpSwap type");
2539 SDValue Ops
[] = {N
->getOperand(1), N
->getOperand(2), N
->getOperand(3),
2541 SDNode
*CmpSwap
= CurDAG
->getMachineNode(
2543 CurDAG
->getVTList(MVT::i32
, MVT::i32
, MVT::Other
), Ops
);
2545 MachineMemOperand
*MemOp
= cast
<MemSDNode
>(N
)->getMemOperand();
2546 CurDAG
->setNodeMemRefs(cast
<MachineSDNode
>(CmpSwap
), {MemOp
});
2548 ReplaceUses(SDValue(N
, 0), SDValue(CmpSwap
, 0));
2549 ReplaceUses(SDValue(N
, 1), SDValue(CmpSwap
, 2));
2550 CurDAG
->RemoveDeadNode(N
);
2553 static Optional
<std::pair
<unsigned, unsigned>>
2554 getContiguousRangeOfSetBits(const APInt
&A
) {
2555 unsigned FirstOne
= A
.getBitWidth() - A
.countLeadingZeros() - 1;
2556 unsigned LastOne
= A
.countTrailingZeros();
2557 if (A
.countPopulation() != (FirstOne
- LastOne
+ 1))
2558 return Optional
<std::pair
<unsigned,unsigned>>();
2559 return std::make_pair(FirstOne
, LastOne
);
2562 void ARMDAGToDAGISel::SelectCMPZ(SDNode
*N
, bool &SwitchEQNEToPLMI
) {
2563 assert(N
->getOpcode() == ARMISD::CMPZ
);
2564 SwitchEQNEToPLMI
= false;
2566 if (!Subtarget
->isThumb())
2567 // FIXME: Work out whether it is profitable to do this in A32 mode - LSL and
2568 // LSR don't exist as standalone instructions - they need the barrel shifter.
2571 // select (cmpz (and X, C), #0) -> (LSLS X) or (LSRS X) or (LSRS (LSLS X))
2572 SDValue And
= N
->getOperand(0);
2573 if (!And
->hasOneUse())
2576 SDValue Zero
= N
->getOperand(1);
2577 if (!isa
<ConstantSDNode
>(Zero
) || !cast
<ConstantSDNode
>(Zero
)->isNullValue() ||
2578 And
->getOpcode() != ISD::AND
)
2580 SDValue X
= And
.getOperand(0);
2581 auto C
= dyn_cast
<ConstantSDNode
>(And
.getOperand(1));
2585 auto Range
= getContiguousRangeOfSetBits(C
->getAPIntValue());
2589 // There are several ways to lower this:
2593 auto EmitShift
= [&](unsigned Opc
, SDValue Src
, unsigned Imm
) -> SDNode
* {
2594 if (Subtarget
->isThumb2()) {
2595 Opc
= (Opc
== ARM::tLSLri
) ? ARM::t2LSLri
: ARM::t2LSRri
;
2596 SDValue Ops
[] = { Src
, CurDAG
->getTargetConstant(Imm
, dl
, MVT::i32
),
2597 getAL(CurDAG
, dl
), CurDAG
->getRegister(0, MVT::i32
),
2598 CurDAG
->getRegister(0, MVT::i32
) };
2599 return CurDAG
->getMachineNode(Opc
, dl
, MVT::i32
, Ops
);
2601 SDValue Ops
[] = {CurDAG
->getRegister(ARM::CPSR
, MVT::i32
), Src
,
2602 CurDAG
->getTargetConstant(Imm
, dl
, MVT::i32
),
2603 getAL(CurDAG
, dl
), CurDAG
->getRegister(0, MVT::i32
)};
2604 return CurDAG
->getMachineNode(Opc
, dl
, MVT::i32
, Ops
);
2608 if (Range
->second
== 0) {
2609 // 1. Mask includes the LSB -> Simply shift the top N bits off
2610 NewN
= EmitShift(ARM::tLSLri
, X
, 31 - Range
->first
);
2611 ReplaceNode(And
.getNode(), NewN
);
2612 } else if (Range
->first
== 31) {
2613 // 2. Mask includes the MSB -> Simply shift the bottom N bits off
2614 NewN
= EmitShift(ARM::tLSRri
, X
, Range
->second
);
2615 ReplaceNode(And
.getNode(), NewN
);
2616 } else if (Range
->first
== Range
->second
) {
2617 // 3. Only one bit is set. We can shift this into the sign bit and use a
2618 // PL/MI comparison.
2619 NewN
= EmitShift(ARM::tLSLri
, X
, 31 - Range
->first
);
2620 ReplaceNode(And
.getNode(), NewN
);
2622 SwitchEQNEToPLMI
= true;
2623 } else if (!Subtarget
->hasV6T2Ops()) {
2624 // 4. Do a double shift to clear bottom and top bits, but only in
2625 // thumb-1 mode as in thumb-2 we can use UBFX.
2626 NewN
= EmitShift(ARM::tLSLri
, X
, 31 - Range
->first
);
2627 NewN
= EmitShift(ARM::tLSRri
, SDValue(NewN
, 0),
2628 Range
->second
+ (31 - Range
->first
));
2629 ReplaceNode(And
.getNode(), NewN
);
2634 void ARMDAGToDAGISel::Select(SDNode
*N
) {
2637 if (N
->isMachineOpcode()) {
2639 return; // Already selected.
2642 switch (N
->getOpcode()) {
2645 // For Thumb1, match an sp-relative store in C++. This is a little
2646 // unfortunate, but I don't think I can make the chain check work
2647 // otherwise. (The chain of the store has to be the same as the chain
2648 // of the CopyFromReg, or else we can't replace the CopyFromReg with
2649 // a direct reference to "SP".)
2651 // This is only necessary on Thumb1 because Thumb1 sp-relative stores use
2652 // a different addressing mode from other four-byte stores.
2654 // This pattern usually comes up with call arguments.
2655 StoreSDNode
*ST
= cast
<StoreSDNode
>(N
);
2656 SDValue Ptr
= ST
->getBasePtr();
2657 if (Subtarget
->isThumb1Only() && ST
->isUnindexed()) {
2659 if (Ptr
.getOpcode() == ISD::ADD
&&
2660 isScaledConstantInRange(Ptr
.getOperand(1), /*Scale=*/4, 0, 256, RHSC
))
2661 Ptr
= Ptr
.getOperand(0);
2663 if (Ptr
.getOpcode() == ISD::CopyFromReg
&&
2664 cast
<RegisterSDNode
>(Ptr
.getOperand(1))->getReg() == ARM::SP
&&
2665 Ptr
.getOperand(0) == ST
->getChain()) {
2666 SDValue Ops
[] = {ST
->getValue(),
2667 CurDAG
->getRegister(ARM::SP
, MVT::i32
),
2668 CurDAG
->getTargetConstant(RHSC
, dl
, MVT::i32
),
2670 CurDAG
->getRegister(0, MVT::i32
),
2672 MachineSDNode
*ResNode
=
2673 CurDAG
->getMachineNode(ARM::tSTRspi
, dl
, MVT::Other
, Ops
);
2674 MachineMemOperand
*MemOp
= ST
->getMemOperand();
2675 CurDAG
->setNodeMemRefs(cast
<MachineSDNode
>(ResNode
), {MemOp
});
2676 ReplaceNode(N
, ResNode
);
2682 case ISD::WRITE_REGISTER
:
2683 if (tryWriteRegister(N
))
2686 case ISD::READ_REGISTER
:
2687 if (tryReadRegister(N
))
2690 case ISD::INLINEASM
:
2691 case ISD::INLINEASM_BR
:
2692 if (tryInlineAsm(N
))
2696 // Select special operations if XOR node forms integer ABS pattern
2699 // Other cases are autogenerated.
2701 case ISD::Constant
: {
2702 unsigned Val
= cast
<ConstantSDNode
>(N
)->getZExtValue();
2703 // If we can't materialize the constant we need to use a literal pool
2704 if (ConstantMaterializationCost(Val
) > 2) {
2705 SDValue CPIdx
= CurDAG
->getTargetConstantPool(
2706 ConstantInt::get(Type::getInt32Ty(*CurDAG
->getContext()), Val
),
2707 TLI
->getPointerTy(CurDAG
->getDataLayout()));
2710 if (Subtarget
->isThumb()) {
2714 CurDAG
->getRegister(0, MVT::i32
),
2715 CurDAG
->getEntryNode()
2717 ResNode
= CurDAG
->getMachineNode(ARM::tLDRpci
, dl
, MVT::i32
, MVT::Other
,
2722 CurDAG
->getTargetConstant(0, dl
, MVT::i32
),
2724 CurDAG
->getRegister(0, MVT::i32
),
2725 CurDAG
->getEntryNode()
2727 ResNode
= CurDAG
->getMachineNode(ARM::LDRcp
, dl
, MVT::i32
, MVT::Other
,
2730 // Annotate the Node with memory operand information so that MachineInstr
2731 // queries work properly. This e.g. gives the register allocation the
2732 // required information for rematerialization.
2733 MachineFunction
& MF
= CurDAG
->getMachineFunction();
2734 MachineMemOperand
*MemOp
=
2735 MF
.getMachineMemOperand(MachinePointerInfo::getConstantPool(MF
),
2736 MachineMemOperand::MOLoad
, 4, 4);
2738 CurDAG
->setNodeMemRefs(cast
<MachineSDNode
>(ResNode
), {MemOp
});
2740 ReplaceNode(N
, ResNode
);
2744 // Other cases are autogenerated.
2747 case ISD::FrameIndex
: {
2748 // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
2749 int FI
= cast
<FrameIndexSDNode
>(N
)->getIndex();
2750 SDValue TFI
= CurDAG
->getTargetFrameIndex(
2751 FI
, TLI
->getPointerTy(CurDAG
->getDataLayout()));
2752 if (Subtarget
->isThumb1Only()) {
2753 // Set the alignment of the frame object to 4, to avoid having to generate
2754 // more than one ADD
2755 MachineFrameInfo
&MFI
= MF
->getFrameInfo();
2756 if (MFI
.getObjectAlignment(FI
) < 4)
2757 MFI
.setObjectAlignment(FI
, 4);
2758 CurDAG
->SelectNodeTo(N
, ARM::tADDframe
, MVT::i32
, TFI
,
2759 CurDAG
->getTargetConstant(0, dl
, MVT::i32
));
2762 unsigned Opc
= ((Subtarget
->isThumb() && Subtarget
->hasThumb2()) ?
2763 ARM::t2ADDri
: ARM::ADDri
);
2764 SDValue Ops
[] = { TFI
, CurDAG
->getTargetConstant(0, dl
, MVT::i32
),
2765 getAL(CurDAG
, dl
), CurDAG
->getRegister(0, MVT::i32
),
2766 CurDAG
->getRegister(0, MVT::i32
) };
2767 CurDAG
->SelectNodeTo(N
, Opc
, MVT::i32
, Ops
);
2772 if (tryV6T2BitfieldExtractOp(N
, false))
2775 case ISD::SIGN_EXTEND_INREG
:
2777 if (tryV6T2BitfieldExtractOp(N
, true))
2781 if (Subtarget
->isThumb1Only())
2783 if (ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(N
->getOperand(1))) {
2784 unsigned RHSV
= C
->getZExtValue();
2786 if (isPowerOf2_32(RHSV
-1)) { // 2^n+1?
2787 unsigned ShImm
= Log2_32(RHSV
-1);
2790 SDValue V
= N
->getOperand(0);
2791 ShImm
= ARM_AM::getSORegOpc(ARM_AM::lsl
, ShImm
);
2792 SDValue ShImmOp
= CurDAG
->getTargetConstant(ShImm
, dl
, MVT::i32
);
2793 SDValue Reg0
= CurDAG
->getRegister(0, MVT::i32
);
2794 if (Subtarget
->isThumb()) {
2795 SDValue Ops
[] = { V
, V
, ShImmOp
, getAL(CurDAG
, dl
), Reg0
, Reg0
};
2796 CurDAG
->SelectNodeTo(N
, ARM::t2ADDrs
, MVT::i32
, Ops
);
2799 SDValue Ops
[] = { V
, V
, Reg0
, ShImmOp
, getAL(CurDAG
, dl
), Reg0
,
2801 CurDAG
->SelectNodeTo(N
, ARM::ADDrsi
, MVT::i32
, Ops
);
2805 if (isPowerOf2_32(RHSV
+1)) { // 2^n-1?
2806 unsigned ShImm
= Log2_32(RHSV
+1);
2809 SDValue V
= N
->getOperand(0);
2810 ShImm
= ARM_AM::getSORegOpc(ARM_AM::lsl
, ShImm
);
2811 SDValue ShImmOp
= CurDAG
->getTargetConstant(ShImm
, dl
, MVT::i32
);
2812 SDValue Reg0
= CurDAG
->getRegister(0, MVT::i32
);
2813 if (Subtarget
->isThumb()) {
2814 SDValue Ops
[] = { V
, V
, ShImmOp
, getAL(CurDAG
, dl
), Reg0
, Reg0
};
2815 CurDAG
->SelectNodeTo(N
, ARM::t2RSBrs
, MVT::i32
, Ops
);
2818 SDValue Ops
[] = { V
, V
, Reg0
, ShImmOp
, getAL(CurDAG
, dl
), Reg0
,
2820 CurDAG
->SelectNodeTo(N
, ARM::RSBrsi
, MVT::i32
, Ops
);
2827 // Check for unsigned bitfield extract
2828 if (tryV6T2BitfieldExtractOp(N
, false))
2831 // If an immediate is used in an AND node, it is possible that the immediate
2832 // can be more optimally materialized when negated. If this is the case we
2833 // can negate the immediate and use a BIC instead.
2834 auto *N1C
= dyn_cast
<ConstantSDNode
>(N
->getOperand(1));
2835 if (N1C
&& N1C
->hasOneUse() && Subtarget
->isThumb()) {
2836 uint32_t Imm
= (uint32_t) N1C
->getZExtValue();
2838 // In Thumb2 mode, an AND can take a 12-bit immediate. If this
2839 // immediate can be negated and fit in the immediate operand of
2840 // a t2BIC, don't do any manual transform here as this can be
2841 // handled by the generic ISel machinery.
2842 bool PreferImmediateEncoding
=
2843 Subtarget
->hasThumb2() && (is_t2_so_imm(Imm
) || is_t2_so_imm_not(Imm
));
2844 if (!PreferImmediateEncoding
&&
2845 ConstantMaterializationCost(Imm
) >
2846 ConstantMaterializationCost(~Imm
)) {
2847 // The current immediate costs more to materialize than a negated
2848 // immediate, so negate the immediate and use a BIC.
2850 CurDAG
->getConstant(~N1C
->getZExtValue(), dl
, MVT::i32
);
2851 // If the new constant didn't exist before, reposition it in the topological
2852 // ordering so it is just before N. Otherwise, don't touch its location.
2853 if (NewImm
->getNodeId() == -1)
2854 CurDAG
->RepositionNode(N
->getIterator(), NewImm
.getNode());
2856 if (!Subtarget
->hasThumb2()) {
2857 SDValue Ops
[] = {CurDAG
->getRegister(ARM::CPSR
, MVT::i32
),
2858 N
->getOperand(0), NewImm
, getAL(CurDAG
, dl
),
2859 CurDAG
->getRegister(0, MVT::i32
)};
2860 ReplaceNode(N
, CurDAG
->getMachineNode(ARM::tBIC
, dl
, MVT::i32
, Ops
));
2863 SDValue Ops
[] = {N
->getOperand(0), NewImm
, getAL(CurDAG
, dl
),
2864 CurDAG
->getRegister(0, MVT::i32
),
2865 CurDAG
->getRegister(0, MVT::i32
)};
2867 CurDAG
->getMachineNode(ARM::t2BICrr
, dl
, MVT::i32
, Ops
));
2873 // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits
2874 // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits
2875 // are entirely contributed by c2 and lower 16-bits are entirely contributed
2876 // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)).
2877 // Select it to: "movt x, ((c1 & 0xffff) >> 16)
2878 EVT VT
= N
->getValueType(0);
2881 unsigned Opc
= (Subtarget
->isThumb() && Subtarget
->hasThumb2())
2883 : (Subtarget
->hasV6T2Ops() ? ARM::MOVTi16
: 0);
2886 SDValue N0
= N
->getOperand(0), N1
= N
->getOperand(1);
2887 N1C
= dyn_cast
<ConstantSDNode
>(N1
);
2890 if (N0
.getOpcode() == ISD::OR
&& N0
.getNode()->hasOneUse()) {
2891 SDValue N2
= N0
.getOperand(1);
2892 ConstantSDNode
*N2C
= dyn_cast
<ConstantSDNode
>(N2
);
2895 unsigned N1CVal
= N1C
->getZExtValue();
2896 unsigned N2CVal
= N2C
->getZExtValue();
2897 if ((N1CVal
& 0xffff0000U
) == (N2CVal
& 0xffff0000U
) &&
2898 (N1CVal
& 0xffffU
) == 0xffffU
&&
2899 (N2CVal
& 0xffffU
) == 0x0U
) {
2900 SDValue Imm16
= CurDAG
->getTargetConstant((N2CVal
& 0xFFFF0000U
) >> 16,
2902 SDValue Ops
[] = { N0
.getOperand(0), Imm16
,
2903 getAL(CurDAG
, dl
), CurDAG
->getRegister(0, MVT::i32
) };
2904 ReplaceNode(N
, CurDAG
->getMachineNode(Opc
, dl
, VT
, Ops
));
2911 case ARMISD::UMAAL
: {
2912 unsigned Opc
= Subtarget
->isThumb() ? ARM::t2UMAAL
: ARM::UMAAL
;
2913 SDValue Ops
[] = { N
->getOperand(0), N
->getOperand(1),
2914 N
->getOperand(2), N
->getOperand(3),
2916 CurDAG
->getRegister(0, MVT::i32
) };
2917 ReplaceNode(N
, CurDAG
->getMachineNode(Opc
, dl
, MVT::i32
, MVT::i32
, Ops
));
2920 case ARMISD::UMLAL
:{
2921 if (Subtarget
->isThumb()) {
2922 SDValue Ops
[] = { N
->getOperand(0), N
->getOperand(1), N
->getOperand(2),
2923 N
->getOperand(3), getAL(CurDAG
, dl
),
2924 CurDAG
->getRegister(0, MVT::i32
)};
2926 N
, CurDAG
->getMachineNode(ARM::t2UMLAL
, dl
, MVT::i32
, MVT::i32
, Ops
));
2929 SDValue Ops
[] = { N
->getOperand(0), N
->getOperand(1), N
->getOperand(2),
2930 N
->getOperand(3), getAL(CurDAG
, dl
),
2931 CurDAG
->getRegister(0, MVT::i32
),
2932 CurDAG
->getRegister(0, MVT::i32
) };
2933 ReplaceNode(N
, CurDAG
->getMachineNode(
2934 Subtarget
->hasV6Ops() ? ARM::UMLAL
: ARM::UMLALv5
, dl
,
2935 MVT::i32
, MVT::i32
, Ops
));
2939 case ARMISD::SMLAL
:{
2940 if (Subtarget
->isThumb()) {
2941 SDValue Ops
[] = { N
->getOperand(0), N
->getOperand(1), N
->getOperand(2),
2942 N
->getOperand(3), getAL(CurDAG
, dl
),
2943 CurDAG
->getRegister(0, MVT::i32
)};
2945 N
, CurDAG
->getMachineNode(ARM::t2SMLAL
, dl
, MVT::i32
, MVT::i32
, Ops
));
2948 SDValue Ops
[] = { N
->getOperand(0), N
->getOperand(1), N
->getOperand(2),
2949 N
->getOperand(3), getAL(CurDAG
, dl
),
2950 CurDAG
->getRegister(0, MVT::i32
),
2951 CurDAG
->getRegister(0, MVT::i32
) };
2952 ReplaceNode(N
, CurDAG
->getMachineNode(
2953 Subtarget
->hasV6Ops() ? ARM::SMLAL
: ARM::SMLALv5
, dl
,
2954 MVT::i32
, MVT::i32
, Ops
));
2958 case ARMISD::SUBE
: {
2959 if (!Subtarget
->hasV6Ops() || !Subtarget
->hasDSP())
2961 // Look for a pattern to match SMMLS
2962 // (sube a, (smul_loHi a, b), (subc 0, (smul_LOhi(a, b))))
2963 if (N
->getOperand(1).getOpcode() != ISD::SMUL_LOHI
||
2964 N
->getOperand(2).getOpcode() != ARMISD::SUBC
||
2965 !SDValue(N
, 1).use_empty())
2968 if (Subtarget
->isThumb())
2969 assert(Subtarget
->hasThumb2() &&
2970 "This pattern should not be generated for Thumb");
2972 SDValue SmulLoHi
= N
->getOperand(1);
2973 SDValue Subc
= N
->getOperand(2);
2974 auto *Zero
= dyn_cast
<ConstantSDNode
>(Subc
.getOperand(0));
2976 if (!Zero
|| Zero
->getZExtValue() != 0 ||
2977 Subc
.getOperand(1) != SmulLoHi
.getValue(0) ||
2978 N
->getOperand(1) != SmulLoHi
.getValue(1) ||
2979 N
->getOperand(2) != Subc
.getValue(1))
2982 unsigned Opc
= Subtarget
->isThumb2() ? ARM::t2SMMLS
: ARM::SMMLS
;
2983 SDValue Ops
[] = { SmulLoHi
.getOperand(0), SmulLoHi
.getOperand(1),
2984 N
->getOperand(0), getAL(CurDAG
, dl
),
2985 CurDAG
->getRegister(0, MVT::i32
) };
2986 ReplaceNode(N
, CurDAG
->getMachineNode(Opc
, dl
, MVT::i32
, Ops
));
2990 if (Subtarget
->isThumb() && Subtarget
->hasThumb2()) {
2991 if (tryT2IndexedLoad(N
))
2993 } else if (Subtarget
->isThumb()) {
2994 if (tryT1IndexedLoad(N
))
2996 } else if (tryARMIndexedLoad(N
))
2998 // Other cases are autogenerated.
3003 SDValue Ops
[] = { N
->getOperand(1),
3006 unsigned Opc
= N
->getOpcode() == ARMISD::WLS
?
3007 ARM::t2WhileLoopStart
: ARM::t2LoopEnd
;
3008 SDNode
*New
= CurDAG
->getMachineNode(Opc
, dl
, MVT::Other
, Ops
);
3009 ReplaceUses(N
, New
);
3010 CurDAG
->RemoveDeadNode(N
);
3013 case ARMISD::LOOP_DEC
: {
3014 SDValue Ops
[] = { N
->getOperand(1),
3018 CurDAG
->getMachineNode(ARM::t2LoopDec
, dl
,
3019 CurDAG
->getVTList(MVT::i32
, MVT::Other
), Ops
);
3020 ReplaceUses(N
, Dec
);
3021 CurDAG
->RemoveDeadNode(N
);
3024 case ARMISD::BRCOND
: {
3025 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
3026 // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)
3027 // Pattern complexity = 6 cost = 1 size = 0
3029 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
3030 // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc)
3031 // Pattern complexity = 6 cost = 1 size = 0
3033 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
3034 // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc)
3035 // Pattern complexity = 6 cost = 1 size = 0
3037 unsigned Opc
= Subtarget
->isThumb() ?
3038 ((Subtarget
->hasThumb2()) ? ARM::t2Bcc
: ARM::tBcc
) : ARM::Bcc
;
3039 SDValue Chain
= N
->getOperand(0);
3040 SDValue N1
= N
->getOperand(1);
3041 SDValue N2
= N
->getOperand(2);
3042 SDValue N3
= N
->getOperand(3);
3043 SDValue InFlag
= N
->getOperand(4);
3044 assert(N1
.getOpcode() == ISD::BasicBlock
);
3045 assert(N2
.getOpcode() == ISD::Constant
);
3046 assert(N3
.getOpcode() == ISD::Register
);
3048 unsigned CC
= (unsigned) cast
<ConstantSDNode
>(N2
)->getZExtValue();
3050 if (InFlag
.getOpcode() == ARMISD::CMPZ
) {
3051 if (InFlag
.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN
) {
3052 SDValue Int
= InFlag
.getOperand(0);
3053 uint64_t ID
= cast
<ConstantSDNode
>(Int
->getOperand(1))->getZExtValue();
3055 // Handle low-overhead loops.
3056 if (ID
== Intrinsic::loop_decrement_reg
) {
3057 SDValue Elements
= Int
.getOperand(2);
3058 SDValue Size
= CurDAG
->getTargetConstant(
3059 cast
<ConstantSDNode
>(Int
.getOperand(3))->getZExtValue(), dl
,
3062 SDValue Args
[] = { Elements
, Size
, Int
.getOperand(0) };
3064 CurDAG
->getMachineNode(ARM::t2LoopDec
, dl
,
3065 CurDAG
->getVTList(MVT::i32
, MVT::Other
),
3067 ReplaceUses(Int
.getNode(), LoopDec
);
3069 SDValue EndArgs
[] = { SDValue(LoopDec
, 0), N1
, Chain
};
3071 CurDAG
->getMachineNode(ARM::t2LoopEnd
, dl
, MVT::Other
, EndArgs
);
3073 ReplaceUses(N
, LoopEnd
);
3074 CurDAG
->RemoveDeadNode(N
);
3075 CurDAG
->RemoveDeadNode(InFlag
.getNode());
3076 CurDAG
->RemoveDeadNode(Int
.getNode());
3081 bool SwitchEQNEToPLMI
;
3082 SelectCMPZ(InFlag
.getNode(), SwitchEQNEToPLMI
);
3083 InFlag
= N
->getOperand(4);
3085 if (SwitchEQNEToPLMI
) {
3086 switch ((ARMCC::CondCodes
)CC
) {
3087 default: llvm_unreachable("CMPZ must be either NE or EQ!");
3089 CC
= (unsigned)ARMCC::MI
;
3092 CC
= (unsigned)ARMCC::PL
;
3098 SDValue Tmp2
= CurDAG
->getTargetConstant(CC
, dl
, MVT::i32
);
3099 SDValue Ops
[] = { N1
, Tmp2
, N3
, Chain
, InFlag
};
3100 SDNode
*ResNode
= CurDAG
->getMachineNode(Opc
, dl
, MVT::Other
,
3102 Chain
= SDValue(ResNode
, 0);
3103 if (N
->getNumValues() == 2) {
3104 InFlag
= SDValue(ResNode
, 1);
3105 ReplaceUses(SDValue(N
, 1), InFlag
);
3107 ReplaceUses(SDValue(N
, 0),
3108 SDValue(Chain
.getNode(), Chain
.getResNo()));
3109 CurDAG
->RemoveDeadNode(N
);
3113 case ARMISD::CMPZ
: {
3114 // select (CMPZ X, #-C) -> (CMPZ (ADDS X, #C), #0)
3115 // This allows us to avoid materializing the expensive negative constant.
3116 // The CMPZ #0 is useless and will be peepholed away but we need to keep it
3117 // for its glue output.
3118 SDValue X
= N
->getOperand(0);
3119 auto *C
= dyn_cast
<ConstantSDNode
>(N
->getOperand(1).getNode());
3120 if (C
&& C
->getSExtValue() < 0 && Subtarget
->isThumb()) {
3121 int64_t Addend
= -C
->getSExtValue();
3123 SDNode
*Add
= nullptr;
3124 // ADDS can be better than CMN if the immediate fits in a
3125 // 16-bit ADDS, which means either [0,256) for tADDi8 or [0,8) for tADDi3.
3126 // Outside that range we can just use a CMN which is 32-bit but has a
3127 // 12-bit immediate range.
3128 if (Addend
< 1<<8) {
3129 if (Subtarget
->isThumb2()) {
3130 SDValue Ops
[] = { X
, CurDAG
->getTargetConstant(Addend
, dl
, MVT::i32
),
3131 getAL(CurDAG
, dl
), CurDAG
->getRegister(0, MVT::i32
),
3132 CurDAG
->getRegister(0, MVT::i32
) };
3133 Add
= CurDAG
->getMachineNode(ARM::t2ADDri
, dl
, MVT::i32
, Ops
);
3135 unsigned Opc
= (Addend
< 1<<3) ? ARM::tADDi3
: ARM::tADDi8
;
3136 SDValue Ops
[] = {CurDAG
->getRegister(ARM::CPSR
, MVT::i32
), X
,
3137 CurDAG
->getTargetConstant(Addend
, dl
, MVT::i32
),
3138 getAL(CurDAG
, dl
), CurDAG
->getRegister(0, MVT::i32
)};
3139 Add
= CurDAG
->getMachineNode(Opc
, dl
, MVT::i32
, Ops
);
3143 SDValue Ops2
[] = {SDValue(Add
, 0), CurDAG
->getConstant(0, dl
, MVT::i32
)};
3144 CurDAG
->MorphNodeTo(N
, ARMISD::CMPZ
, CurDAG
->getVTList(MVT::Glue
), Ops2
);
3147 // Other cases are autogenerated.
3151 case ARMISD::CMOV
: {
3152 SDValue InFlag
= N
->getOperand(4);
3154 if (InFlag
.getOpcode() == ARMISD::CMPZ
) {
3155 bool SwitchEQNEToPLMI
;
3156 SelectCMPZ(InFlag
.getNode(), SwitchEQNEToPLMI
);
3158 if (SwitchEQNEToPLMI
) {
3159 SDValue ARMcc
= N
->getOperand(2);
3160 ARMCC::CondCodes CC
=
3161 (ARMCC::CondCodes
)cast
<ConstantSDNode
>(ARMcc
)->getZExtValue();
3164 default: llvm_unreachable("CMPZ must be either NE or EQ!");
3172 SDValue NewARMcc
= CurDAG
->getConstant((unsigned)CC
, dl
, MVT::i32
);
3173 SDValue Ops
[] = {N
->getOperand(0), N
->getOperand(1), NewARMcc
,
3174 N
->getOperand(3), N
->getOperand(4)};
3175 CurDAG
->MorphNodeTo(N
, ARMISD::CMOV
, N
->getVTList(), Ops
);
3179 // Other cases are autogenerated.
3183 case ARMISD::VZIP
: {
3185 EVT VT
= N
->getValueType(0);
3186 switch (VT
.getSimpleVT().SimpleTy
) {
3188 case MVT::v8i8
: Opc
= ARM::VZIPd8
; break;
3190 case MVT::v4i16
: Opc
= ARM::VZIPd16
; break;
3192 // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
3193 case MVT::v2i32
: Opc
= ARM::VTRNd32
; break;
3194 case MVT::v16i8
: Opc
= ARM::VZIPq8
; break;
3196 case MVT::v8i16
: Opc
= ARM::VZIPq16
; break;
3198 case MVT::v4i32
: Opc
= ARM::VZIPq32
; break;
3200 SDValue Pred
= getAL(CurDAG
, dl
);
3201 SDValue PredReg
= CurDAG
->getRegister(0, MVT::i32
);
3202 SDValue Ops
[] = { N
->getOperand(0), N
->getOperand(1), Pred
, PredReg
};
3203 ReplaceNode(N
, CurDAG
->getMachineNode(Opc
, dl
, VT
, VT
, Ops
));
3206 case ARMISD::VUZP
: {
3208 EVT VT
= N
->getValueType(0);
3209 switch (VT
.getSimpleVT().SimpleTy
) {
3211 case MVT::v8i8
: Opc
= ARM::VUZPd8
; break;
3213 case MVT::v4i16
: Opc
= ARM::VUZPd16
; break;
3215 // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
3216 case MVT::v2i32
: Opc
= ARM::VTRNd32
; break;
3217 case MVT::v16i8
: Opc
= ARM::VUZPq8
; break;
3219 case MVT::v8i16
: Opc
= ARM::VUZPq16
; break;
3221 case MVT::v4i32
: Opc
= ARM::VUZPq32
; break;
3223 SDValue Pred
= getAL(CurDAG
, dl
);
3224 SDValue PredReg
= CurDAG
->getRegister(0, MVT::i32
);
3225 SDValue Ops
[] = { N
->getOperand(0), N
->getOperand(1), Pred
, PredReg
};
3226 ReplaceNode(N
, CurDAG
->getMachineNode(Opc
, dl
, VT
, VT
, Ops
));
3229 case ARMISD::VTRN
: {
3231 EVT VT
= N
->getValueType(0);
3232 switch (VT
.getSimpleVT().SimpleTy
) {
3234 case MVT::v8i8
: Opc
= ARM::VTRNd8
; break;
3236 case MVT::v4i16
: Opc
= ARM::VTRNd16
; break;
3238 case MVT::v2i32
: Opc
= ARM::VTRNd32
; break;
3239 case MVT::v16i8
: Opc
= ARM::VTRNq8
; break;
3241 case MVT::v8i16
: Opc
= ARM::VTRNq16
; break;
3243 case MVT::v4i32
: Opc
= ARM::VTRNq32
; break;
3245 SDValue Pred
= getAL(CurDAG
, dl
);
3246 SDValue PredReg
= CurDAG
->getRegister(0, MVT::i32
);
3247 SDValue Ops
[] = { N
->getOperand(0), N
->getOperand(1), Pred
, PredReg
};
3248 ReplaceNode(N
, CurDAG
->getMachineNode(Opc
, dl
, VT
, VT
, Ops
));
3251 case ARMISD::BUILD_VECTOR
: {
3252 EVT VecVT
= N
->getValueType(0);
3253 EVT EltVT
= VecVT
.getVectorElementType();
3254 unsigned NumElts
= VecVT
.getVectorNumElements();
3255 if (EltVT
== MVT::f64
) {
3256 assert(NumElts
== 2 && "unexpected type for BUILD_VECTOR");
3258 N
, createDRegPairNode(VecVT
, N
->getOperand(0), N
->getOperand(1)));
3261 assert(EltVT
== MVT::f32
&& "unexpected type for BUILD_VECTOR");
3264 N
, createSRegPairNode(VecVT
, N
->getOperand(0), N
->getOperand(1)));
3267 assert(NumElts
== 4 && "unexpected type for BUILD_VECTOR");
3269 createQuadSRegsNode(VecVT
, N
->getOperand(0), N
->getOperand(1),
3270 N
->getOperand(2), N
->getOperand(3)));
3274 case ARMISD::VLD1DUP
: {
3275 static const uint16_t DOpcodes
[] = { ARM::VLD1DUPd8
, ARM::VLD1DUPd16
,
3277 static const uint16_t QOpcodes
[] = { ARM::VLD1DUPq8
, ARM::VLD1DUPq16
,
3279 SelectVLDDup(N
, /* IsIntrinsic= */ false, false, 1, DOpcodes
, QOpcodes
);
3283 case ARMISD::VLD2DUP
: {
3284 static const uint16_t Opcodes
[] = { ARM::VLD2DUPd8
, ARM::VLD2DUPd16
,
3286 SelectVLDDup(N
, /* IsIntrinsic= */ false, false, 2, Opcodes
);
3290 case ARMISD::VLD3DUP
: {
3291 static const uint16_t Opcodes
[] = { ARM::VLD3DUPd8Pseudo
,
3292 ARM::VLD3DUPd16Pseudo
,
3293 ARM::VLD3DUPd32Pseudo
};
3294 SelectVLDDup(N
, /* IsIntrinsic= */ false, false, 3, Opcodes
);
3298 case ARMISD::VLD4DUP
: {
3299 static const uint16_t Opcodes
[] = { ARM::VLD4DUPd8Pseudo
,
3300 ARM::VLD4DUPd16Pseudo
,
3301 ARM::VLD4DUPd32Pseudo
};
3302 SelectVLDDup(N
, /* IsIntrinsic= */ false, false, 4, Opcodes
);
3306 case ARMISD::VLD1DUP_UPD
: {
3307 static const uint16_t DOpcodes
[] = { ARM::VLD1DUPd8wb_fixed
,
3308 ARM::VLD1DUPd16wb_fixed
,
3309 ARM::VLD1DUPd32wb_fixed
};
3310 static const uint16_t QOpcodes
[] = { ARM::VLD1DUPq8wb_fixed
,
3311 ARM::VLD1DUPq16wb_fixed
,
3312 ARM::VLD1DUPq32wb_fixed
};
3313 SelectVLDDup(N
, /* IsIntrinsic= */ false, true, 1, DOpcodes
, QOpcodes
);
3317 case ARMISD::VLD2DUP_UPD
: {
3318 static const uint16_t Opcodes
[] = { ARM::VLD2DUPd8wb_fixed
,
3319 ARM::VLD2DUPd16wb_fixed
,
3320 ARM::VLD2DUPd32wb_fixed
};
3321 SelectVLDDup(N
, /* IsIntrinsic= */ false, true, 2, Opcodes
);
3325 case ARMISD::VLD3DUP_UPD
: {
3326 static const uint16_t Opcodes
[] = { ARM::VLD3DUPd8Pseudo_UPD
,
3327 ARM::VLD3DUPd16Pseudo_UPD
,
3328 ARM::VLD3DUPd32Pseudo_UPD
};
3329 SelectVLDDup(N
, /* IsIntrinsic= */ false, true, 3, Opcodes
);
3333 case ARMISD::VLD4DUP_UPD
: {
3334 static const uint16_t Opcodes
[] = { ARM::VLD4DUPd8Pseudo_UPD
,
3335 ARM::VLD4DUPd16Pseudo_UPD
,
3336 ARM::VLD4DUPd32Pseudo_UPD
};
3337 SelectVLDDup(N
, /* IsIntrinsic= */ false, true, 4, Opcodes
);
3341 case ARMISD::VLD1_UPD
: {
3342 static const uint16_t DOpcodes
[] = { ARM::VLD1d8wb_fixed
,
3343 ARM::VLD1d16wb_fixed
,
3344 ARM::VLD1d32wb_fixed
,
3345 ARM::VLD1d64wb_fixed
};
3346 static const uint16_t QOpcodes
[] = { ARM::VLD1q8wb_fixed
,
3347 ARM::VLD1q16wb_fixed
,
3348 ARM::VLD1q32wb_fixed
,
3349 ARM::VLD1q64wb_fixed
};
3350 SelectVLD(N
, true, 1, DOpcodes
, QOpcodes
, nullptr);
3354 case ARMISD::VLD2_UPD
: {
3355 static const uint16_t DOpcodes
[] = { ARM::VLD2d8wb_fixed
,
3356 ARM::VLD2d16wb_fixed
,
3357 ARM::VLD2d32wb_fixed
,
3358 ARM::VLD1q64wb_fixed
};
3359 static const uint16_t QOpcodes
[] = { ARM::VLD2q8PseudoWB_fixed
,
3360 ARM::VLD2q16PseudoWB_fixed
,
3361 ARM::VLD2q32PseudoWB_fixed
};
3362 SelectVLD(N
, true, 2, DOpcodes
, QOpcodes
, nullptr);
3366 case ARMISD::VLD3_UPD
: {
3367 static const uint16_t DOpcodes
[] = { ARM::VLD3d8Pseudo_UPD
,
3368 ARM::VLD3d16Pseudo_UPD
,
3369 ARM::VLD3d32Pseudo_UPD
,
3370 ARM::VLD1d64TPseudoWB_fixed
};
3371 static const uint16_t QOpcodes0
[] = { ARM::VLD3q8Pseudo_UPD
,
3372 ARM::VLD3q16Pseudo_UPD
,
3373 ARM::VLD3q32Pseudo_UPD
};
3374 static const uint16_t QOpcodes1
[] = { ARM::VLD3q8oddPseudo_UPD
,
3375 ARM::VLD3q16oddPseudo_UPD
,
3376 ARM::VLD3q32oddPseudo_UPD
};
3377 SelectVLD(N
, true, 3, DOpcodes
, QOpcodes0
, QOpcodes1
);
3381 case ARMISD::VLD4_UPD
: {
3382 static const uint16_t DOpcodes
[] = { ARM::VLD4d8Pseudo_UPD
,
3383 ARM::VLD4d16Pseudo_UPD
,
3384 ARM::VLD4d32Pseudo_UPD
,
3385 ARM::VLD1d64QPseudoWB_fixed
};
3386 static const uint16_t QOpcodes0
[] = { ARM::VLD4q8Pseudo_UPD
,
3387 ARM::VLD4q16Pseudo_UPD
,
3388 ARM::VLD4q32Pseudo_UPD
};
3389 static const uint16_t QOpcodes1
[] = { ARM::VLD4q8oddPseudo_UPD
,
3390 ARM::VLD4q16oddPseudo_UPD
,
3391 ARM::VLD4q32oddPseudo_UPD
};
3392 SelectVLD(N
, true, 4, DOpcodes
, QOpcodes0
, QOpcodes1
);
3396 case ARMISD::VLD2LN_UPD
: {
3397 static const uint16_t DOpcodes
[] = { ARM::VLD2LNd8Pseudo_UPD
,
3398 ARM::VLD2LNd16Pseudo_UPD
,
3399 ARM::VLD2LNd32Pseudo_UPD
};
3400 static const uint16_t QOpcodes
[] = { ARM::VLD2LNq16Pseudo_UPD
,
3401 ARM::VLD2LNq32Pseudo_UPD
};
3402 SelectVLDSTLane(N
, true, true, 2, DOpcodes
, QOpcodes
);
3406 case ARMISD::VLD3LN_UPD
: {
3407 static const uint16_t DOpcodes
[] = { ARM::VLD3LNd8Pseudo_UPD
,
3408 ARM::VLD3LNd16Pseudo_UPD
,
3409 ARM::VLD3LNd32Pseudo_UPD
};
3410 static const uint16_t QOpcodes
[] = { ARM::VLD3LNq16Pseudo_UPD
,
3411 ARM::VLD3LNq32Pseudo_UPD
};
3412 SelectVLDSTLane(N
, true, true, 3, DOpcodes
, QOpcodes
);
3416 case ARMISD::VLD4LN_UPD
: {
3417 static const uint16_t DOpcodes
[] = { ARM::VLD4LNd8Pseudo_UPD
,
3418 ARM::VLD4LNd16Pseudo_UPD
,
3419 ARM::VLD4LNd32Pseudo_UPD
};
3420 static const uint16_t QOpcodes
[] = { ARM::VLD4LNq16Pseudo_UPD
,
3421 ARM::VLD4LNq32Pseudo_UPD
};
3422 SelectVLDSTLane(N
, true, true, 4, DOpcodes
, QOpcodes
);
3426 case ARMISD::VST1_UPD
: {
3427 static const uint16_t DOpcodes
[] = { ARM::VST1d8wb_fixed
,
3428 ARM::VST1d16wb_fixed
,
3429 ARM::VST1d32wb_fixed
,
3430 ARM::VST1d64wb_fixed
};
3431 static const uint16_t QOpcodes
[] = { ARM::VST1q8wb_fixed
,
3432 ARM::VST1q16wb_fixed
,
3433 ARM::VST1q32wb_fixed
,
3434 ARM::VST1q64wb_fixed
};
3435 SelectVST(N
, true, 1, DOpcodes
, QOpcodes
, nullptr);
3439 case ARMISD::VST2_UPD
: {
3440 static const uint16_t DOpcodes
[] = { ARM::VST2d8wb_fixed
,
3441 ARM::VST2d16wb_fixed
,
3442 ARM::VST2d32wb_fixed
,
3443 ARM::VST1q64wb_fixed
};
3444 static const uint16_t QOpcodes
[] = { ARM::VST2q8PseudoWB_fixed
,
3445 ARM::VST2q16PseudoWB_fixed
,
3446 ARM::VST2q32PseudoWB_fixed
};
3447 SelectVST(N
, true, 2, DOpcodes
, QOpcodes
, nullptr);
3451 case ARMISD::VST3_UPD
: {
3452 static const uint16_t DOpcodes
[] = { ARM::VST3d8Pseudo_UPD
,
3453 ARM::VST3d16Pseudo_UPD
,
3454 ARM::VST3d32Pseudo_UPD
,
3455 ARM::VST1d64TPseudoWB_fixed
};
3456 static const uint16_t QOpcodes0
[] = { ARM::VST3q8Pseudo_UPD
,
3457 ARM::VST3q16Pseudo_UPD
,
3458 ARM::VST3q32Pseudo_UPD
};
3459 static const uint16_t QOpcodes1
[] = { ARM::VST3q8oddPseudo_UPD
,
3460 ARM::VST3q16oddPseudo_UPD
,
3461 ARM::VST3q32oddPseudo_UPD
};
3462 SelectVST(N
, true, 3, DOpcodes
, QOpcodes0
, QOpcodes1
);
3466 case ARMISD::VST4_UPD
: {
3467 static const uint16_t DOpcodes
[] = { ARM::VST4d8Pseudo_UPD
,
3468 ARM::VST4d16Pseudo_UPD
,
3469 ARM::VST4d32Pseudo_UPD
,
3470 ARM::VST1d64QPseudoWB_fixed
};
3471 static const uint16_t QOpcodes0
[] = { ARM::VST4q8Pseudo_UPD
,
3472 ARM::VST4q16Pseudo_UPD
,
3473 ARM::VST4q32Pseudo_UPD
};
3474 static const uint16_t QOpcodes1
[] = { ARM::VST4q8oddPseudo_UPD
,
3475 ARM::VST4q16oddPseudo_UPD
,
3476 ARM::VST4q32oddPseudo_UPD
};
3477 SelectVST(N
, true, 4, DOpcodes
, QOpcodes0
, QOpcodes1
);
3481 case ARMISD::VST2LN_UPD
: {
3482 static const uint16_t DOpcodes
[] = { ARM::VST2LNd8Pseudo_UPD
,
3483 ARM::VST2LNd16Pseudo_UPD
,
3484 ARM::VST2LNd32Pseudo_UPD
};
3485 static const uint16_t QOpcodes
[] = { ARM::VST2LNq16Pseudo_UPD
,
3486 ARM::VST2LNq32Pseudo_UPD
};
3487 SelectVLDSTLane(N
, false, true, 2, DOpcodes
, QOpcodes
);
3491 case ARMISD::VST3LN_UPD
: {
3492 static const uint16_t DOpcodes
[] = { ARM::VST3LNd8Pseudo_UPD
,
3493 ARM::VST3LNd16Pseudo_UPD
,
3494 ARM::VST3LNd32Pseudo_UPD
};
3495 static const uint16_t QOpcodes
[] = { ARM::VST3LNq16Pseudo_UPD
,
3496 ARM::VST3LNq32Pseudo_UPD
};
3497 SelectVLDSTLane(N
, false, true, 3, DOpcodes
, QOpcodes
);
3501 case ARMISD::VST4LN_UPD
: {
3502 static const uint16_t DOpcodes
[] = { ARM::VST4LNd8Pseudo_UPD
,
3503 ARM::VST4LNd16Pseudo_UPD
,
3504 ARM::VST4LNd32Pseudo_UPD
};
3505 static const uint16_t QOpcodes
[] = { ARM::VST4LNq16Pseudo_UPD
,
3506 ARM::VST4LNq32Pseudo_UPD
};
3507 SelectVLDSTLane(N
, false, true, 4, DOpcodes
, QOpcodes
);
3511 case ISD::INTRINSIC_VOID
:
3512 case ISD::INTRINSIC_W_CHAIN
: {
3513 unsigned IntNo
= cast
<ConstantSDNode
>(N
->getOperand(1))->getZExtValue();
3518 case Intrinsic::arm_mrrc
:
3519 case Intrinsic::arm_mrrc2
: {
3521 SDValue Chain
= N
->getOperand(0);
3524 if (Subtarget
->isThumb())
3525 Opc
= (IntNo
== Intrinsic::arm_mrrc
? ARM::t2MRRC
: ARM::t2MRRC2
);
3527 Opc
= (IntNo
== Intrinsic::arm_mrrc
? ARM::MRRC
: ARM::MRRC2
);
3529 SmallVector
<SDValue
, 5> Ops
;
3530 Ops
.push_back(getI32Imm(cast
<ConstantSDNode
>(N
->getOperand(2))->getZExtValue(), dl
)); /* coproc */
3531 Ops
.push_back(getI32Imm(cast
<ConstantSDNode
>(N
->getOperand(3))->getZExtValue(), dl
)); /* opc */
3532 Ops
.push_back(getI32Imm(cast
<ConstantSDNode
>(N
->getOperand(4))->getZExtValue(), dl
)); /* CRm */
3534 // The mrrc2 instruction in ARM doesn't allow predicates, the top 4 bits of the encoded
3535 // instruction will always be '1111' but it is possible in assembly language to specify
3536 // AL as a predicate to mrrc2 but it doesn't make any difference to the encoded instruction.
3537 if (Opc
!= ARM::MRRC2
) {
3538 Ops
.push_back(getAL(CurDAG
, dl
));
3539 Ops
.push_back(CurDAG
->getRegister(0, MVT::i32
));
3542 Ops
.push_back(Chain
);
3544 // Writes to two registers.
3545 const EVT RetType
[] = {MVT::i32
, MVT::i32
, MVT::Other
};
3547 ReplaceNode(N
, CurDAG
->getMachineNode(Opc
, dl
, RetType
, Ops
));
3550 case Intrinsic::arm_ldaexd
:
3551 case Intrinsic::arm_ldrexd
: {
3553 SDValue Chain
= N
->getOperand(0);
3554 SDValue MemAddr
= N
->getOperand(2);
3555 bool isThumb
= Subtarget
->isThumb() && Subtarget
->hasV8MBaselineOps();
3557 bool IsAcquire
= IntNo
== Intrinsic::arm_ldaexd
;
3558 unsigned NewOpc
= isThumb
? (IsAcquire
? ARM::t2LDAEXD
: ARM::t2LDREXD
)
3559 : (IsAcquire
? ARM::LDAEXD
: ARM::LDREXD
);
3561 // arm_ldrexd returns a i64 value in {i32, i32}
3562 std::vector
<EVT
> ResTys
;
3564 ResTys
.push_back(MVT::i32
);
3565 ResTys
.push_back(MVT::i32
);
3567 ResTys
.push_back(MVT::Untyped
);
3568 ResTys
.push_back(MVT::Other
);
3570 // Place arguments in the right order.
3571 SDValue Ops
[] = {MemAddr
, getAL(CurDAG
, dl
),
3572 CurDAG
->getRegister(0, MVT::i32
), Chain
};
3573 SDNode
*Ld
= CurDAG
->getMachineNode(NewOpc
, dl
, ResTys
, Ops
);
3574 // Transfer memoperands.
3575 MachineMemOperand
*MemOp
= cast
<MemIntrinsicSDNode
>(N
)->getMemOperand();
3576 CurDAG
->setNodeMemRefs(cast
<MachineSDNode
>(Ld
), {MemOp
});
3579 SDValue OutChain
= isThumb
? SDValue(Ld
, 2) : SDValue(Ld
, 1);
3580 if (!SDValue(N
, 0).use_empty()) {
3583 Result
= SDValue(Ld
, 0);
3586 CurDAG
->getTargetConstant(ARM::gsub_0
, dl
, MVT::i32
);
3587 SDNode
*ResNode
= CurDAG
->getMachineNode(TargetOpcode::EXTRACT_SUBREG
,
3588 dl
, MVT::i32
, SDValue(Ld
, 0), SubRegIdx
);
3589 Result
= SDValue(ResNode
,0);
3591 ReplaceUses(SDValue(N
, 0), Result
);
3593 if (!SDValue(N
, 1).use_empty()) {
3596 Result
= SDValue(Ld
, 1);
3599 CurDAG
->getTargetConstant(ARM::gsub_1
, dl
, MVT::i32
);
3600 SDNode
*ResNode
= CurDAG
->getMachineNode(TargetOpcode::EXTRACT_SUBREG
,
3601 dl
, MVT::i32
, SDValue(Ld
, 0), SubRegIdx
);
3602 Result
= SDValue(ResNode
,0);
3604 ReplaceUses(SDValue(N
, 1), Result
);
3606 ReplaceUses(SDValue(N
, 2), OutChain
);
3607 CurDAG
->RemoveDeadNode(N
);
3610 case Intrinsic::arm_stlexd
:
3611 case Intrinsic::arm_strexd
: {
3613 SDValue Chain
= N
->getOperand(0);
3614 SDValue Val0
= N
->getOperand(2);
3615 SDValue Val1
= N
->getOperand(3);
3616 SDValue MemAddr
= N
->getOperand(4);
3618 // Store exclusive double return a i32 value which is the return status
3619 // of the issued store.
3620 const EVT ResTys
[] = {MVT::i32
, MVT::Other
};
3622 bool isThumb
= Subtarget
->isThumb() && Subtarget
->hasThumb2();
3623 // Place arguments in the right order.
3624 SmallVector
<SDValue
, 7> Ops
;
3626 Ops
.push_back(Val0
);
3627 Ops
.push_back(Val1
);
3629 // arm_strexd uses GPRPair.
3630 Ops
.push_back(SDValue(createGPRPairNode(MVT::Untyped
, Val0
, Val1
), 0));
3631 Ops
.push_back(MemAddr
);
3632 Ops
.push_back(getAL(CurDAG
, dl
));
3633 Ops
.push_back(CurDAG
->getRegister(0, MVT::i32
));
3634 Ops
.push_back(Chain
);
3636 bool IsRelease
= IntNo
== Intrinsic::arm_stlexd
;
3637 unsigned NewOpc
= isThumb
? (IsRelease
? ARM::t2STLEXD
: ARM::t2STREXD
)
3638 : (IsRelease
? ARM::STLEXD
: ARM::STREXD
);
3640 SDNode
*St
= CurDAG
->getMachineNode(NewOpc
, dl
, ResTys
, Ops
);
3641 // Transfer memoperands.
3642 MachineMemOperand
*MemOp
= cast
<MemIntrinsicSDNode
>(N
)->getMemOperand();
3643 CurDAG
->setNodeMemRefs(cast
<MachineSDNode
>(St
), {MemOp
});
3649 case Intrinsic::arm_neon_vld1
: {
3650 static const uint16_t DOpcodes
[] = { ARM::VLD1d8
, ARM::VLD1d16
,
3651 ARM::VLD1d32
, ARM::VLD1d64
};
3652 static const uint16_t QOpcodes
[] = { ARM::VLD1q8
, ARM::VLD1q16
,
3653 ARM::VLD1q32
, ARM::VLD1q64
};
3654 SelectVLD(N
, false, 1, DOpcodes
, QOpcodes
, nullptr);
3658 case Intrinsic::arm_neon_vld1x2
: {
3659 static const uint16_t DOpcodes
[] = { ARM::VLD1q8
, ARM::VLD1q16
,
3660 ARM::VLD1q32
, ARM::VLD1q64
};
3661 static const uint16_t QOpcodes
[] = { ARM::VLD1d8QPseudo
,
3662 ARM::VLD1d16QPseudo
,
3663 ARM::VLD1d32QPseudo
,
3664 ARM::VLD1d64QPseudo
};
3665 SelectVLD(N
, false, 2, DOpcodes
, QOpcodes
, nullptr);
3669 case Intrinsic::arm_neon_vld1x3
: {
3670 static const uint16_t DOpcodes
[] = { ARM::VLD1d8TPseudo
,
3671 ARM::VLD1d16TPseudo
,
3672 ARM::VLD1d32TPseudo
,
3673 ARM::VLD1d64TPseudo
};
3674 static const uint16_t QOpcodes0
[] = { ARM::VLD1q8LowTPseudo_UPD
,
3675 ARM::VLD1q16LowTPseudo_UPD
,
3676 ARM::VLD1q32LowTPseudo_UPD
,
3677 ARM::VLD1q64LowTPseudo_UPD
};
3678 static const uint16_t QOpcodes1
[] = { ARM::VLD1q8HighTPseudo
,
3679 ARM::VLD1q16HighTPseudo
,
3680 ARM::VLD1q32HighTPseudo
,
3681 ARM::VLD1q64HighTPseudo
};
3682 SelectVLD(N
, false, 3, DOpcodes
, QOpcodes0
, QOpcodes1
);
3686 case Intrinsic::arm_neon_vld1x4
: {
3687 static const uint16_t DOpcodes
[] = { ARM::VLD1d8QPseudo
,
3688 ARM::VLD1d16QPseudo
,
3689 ARM::VLD1d32QPseudo
,
3690 ARM::VLD1d64QPseudo
};
3691 static const uint16_t QOpcodes0
[] = { ARM::VLD1q8LowQPseudo_UPD
,
3692 ARM::VLD1q16LowQPseudo_UPD
,
3693 ARM::VLD1q32LowQPseudo_UPD
,
3694 ARM::VLD1q64LowQPseudo_UPD
};
3695 static const uint16_t QOpcodes1
[] = { ARM::VLD1q8HighQPseudo
,
3696 ARM::VLD1q16HighQPseudo
,
3697 ARM::VLD1q32HighQPseudo
,
3698 ARM::VLD1q64HighQPseudo
};
3699 SelectVLD(N
, false, 4, DOpcodes
, QOpcodes0
, QOpcodes1
);
3703 case Intrinsic::arm_neon_vld2
: {
3704 static const uint16_t DOpcodes
[] = { ARM::VLD2d8
, ARM::VLD2d16
,
3705 ARM::VLD2d32
, ARM::VLD1q64
};
3706 static const uint16_t QOpcodes
[] = { ARM::VLD2q8Pseudo
, ARM::VLD2q16Pseudo
,
3707 ARM::VLD2q32Pseudo
};
3708 SelectVLD(N
, false, 2, DOpcodes
, QOpcodes
, nullptr);
3712 case Intrinsic::arm_neon_vld3
: {
3713 static const uint16_t DOpcodes
[] = { ARM::VLD3d8Pseudo
,
3716 ARM::VLD1d64TPseudo
};
3717 static const uint16_t QOpcodes0
[] = { ARM::VLD3q8Pseudo_UPD
,
3718 ARM::VLD3q16Pseudo_UPD
,
3719 ARM::VLD3q32Pseudo_UPD
};
3720 static const uint16_t QOpcodes1
[] = { ARM::VLD3q8oddPseudo
,
3721 ARM::VLD3q16oddPseudo
,
3722 ARM::VLD3q32oddPseudo
};
3723 SelectVLD(N
, false, 3, DOpcodes
, QOpcodes0
, QOpcodes1
);
3727 case Intrinsic::arm_neon_vld4
: {
3728 static const uint16_t DOpcodes
[] = { ARM::VLD4d8Pseudo
,
3731 ARM::VLD1d64QPseudo
};
3732 static const uint16_t QOpcodes0
[] = { ARM::VLD4q8Pseudo_UPD
,
3733 ARM::VLD4q16Pseudo_UPD
,
3734 ARM::VLD4q32Pseudo_UPD
};
3735 static const uint16_t QOpcodes1
[] = { ARM::VLD4q8oddPseudo
,
3736 ARM::VLD4q16oddPseudo
,
3737 ARM::VLD4q32oddPseudo
};
3738 SelectVLD(N
, false, 4, DOpcodes
, QOpcodes0
, QOpcodes1
);
3742 case Intrinsic::arm_neon_vld2dup
: {
3743 static const uint16_t DOpcodes
[] = { ARM::VLD2DUPd8
, ARM::VLD2DUPd16
,
3744 ARM::VLD2DUPd32
, ARM::VLD1q64
};
3745 static const uint16_t QOpcodes0
[] = { ARM::VLD2DUPq8EvenPseudo
,
3746 ARM::VLD2DUPq16EvenPseudo
,
3747 ARM::VLD2DUPq32EvenPseudo
};
3748 static const uint16_t QOpcodes1
[] = { ARM::VLD2DUPq8OddPseudo
,
3749 ARM::VLD2DUPq16OddPseudo
,
3750 ARM::VLD2DUPq32OddPseudo
};
3751 SelectVLDDup(N
, /* IsIntrinsic= */ true, false, 2,
3752 DOpcodes
, QOpcodes0
, QOpcodes1
);
3756 case Intrinsic::arm_neon_vld3dup
: {
3757 static const uint16_t DOpcodes
[] = { ARM::VLD3DUPd8Pseudo
,
3758 ARM::VLD3DUPd16Pseudo
,
3759 ARM::VLD3DUPd32Pseudo
,
3760 ARM::VLD1d64TPseudo
};
3761 static const uint16_t QOpcodes0
[] = { ARM::VLD3DUPq8EvenPseudo
,
3762 ARM::VLD3DUPq16EvenPseudo
,
3763 ARM::VLD3DUPq32EvenPseudo
};
3764 static const uint16_t QOpcodes1
[] = { ARM::VLD3DUPq8OddPseudo
,
3765 ARM::VLD3DUPq16OddPseudo
,
3766 ARM::VLD3DUPq32OddPseudo
};
3767 SelectVLDDup(N
, /* IsIntrinsic= */ true, false, 3,
3768 DOpcodes
, QOpcodes0
, QOpcodes1
);
3772 case Intrinsic::arm_neon_vld4dup
: {
3773 static const uint16_t DOpcodes
[] = { ARM::VLD4DUPd8Pseudo
,
3774 ARM::VLD4DUPd16Pseudo
,
3775 ARM::VLD4DUPd32Pseudo
,
3776 ARM::VLD1d64QPseudo
};
3777 static const uint16_t QOpcodes0
[] = { ARM::VLD4DUPq8EvenPseudo
,
3778 ARM::VLD4DUPq16EvenPseudo
,
3779 ARM::VLD4DUPq32EvenPseudo
};
3780 static const uint16_t QOpcodes1
[] = { ARM::VLD4DUPq8OddPseudo
,
3781 ARM::VLD4DUPq16OddPseudo
,
3782 ARM::VLD4DUPq32OddPseudo
};
3783 SelectVLDDup(N
, /* IsIntrinsic= */ true, false, 4,
3784 DOpcodes
, QOpcodes0
, QOpcodes1
);
3788 case Intrinsic::arm_neon_vld2lane
: {
3789 static const uint16_t DOpcodes
[] = { ARM::VLD2LNd8Pseudo
,
3790 ARM::VLD2LNd16Pseudo
,
3791 ARM::VLD2LNd32Pseudo
};
3792 static const uint16_t QOpcodes
[] = { ARM::VLD2LNq16Pseudo
,
3793 ARM::VLD2LNq32Pseudo
};
3794 SelectVLDSTLane(N
, true, false, 2, DOpcodes
, QOpcodes
);
3798 case Intrinsic::arm_neon_vld3lane
: {
3799 static const uint16_t DOpcodes
[] = { ARM::VLD3LNd8Pseudo
,
3800 ARM::VLD3LNd16Pseudo
,
3801 ARM::VLD3LNd32Pseudo
};
3802 static const uint16_t QOpcodes
[] = { ARM::VLD3LNq16Pseudo
,
3803 ARM::VLD3LNq32Pseudo
};
3804 SelectVLDSTLane(N
, true, false, 3, DOpcodes
, QOpcodes
);
3808 case Intrinsic::arm_neon_vld4lane
: {
3809 static const uint16_t DOpcodes
[] = { ARM::VLD4LNd8Pseudo
,
3810 ARM::VLD4LNd16Pseudo
,
3811 ARM::VLD4LNd32Pseudo
};
3812 static const uint16_t QOpcodes
[] = { ARM::VLD4LNq16Pseudo
,
3813 ARM::VLD4LNq32Pseudo
};
3814 SelectVLDSTLane(N
, true, false, 4, DOpcodes
, QOpcodes
);
3818 case Intrinsic::arm_neon_vst1
: {
3819 static const uint16_t DOpcodes
[] = { ARM::VST1d8
, ARM::VST1d16
,
3820 ARM::VST1d32
, ARM::VST1d64
};
3821 static const uint16_t QOpcodes
[] = { ARM::VST1q8
, ARM::VST1q16
,
3822 ARM::VST1q32
, ARM::VST1q64
};
3823 SelectVST(N
, false, 1, DOpcodes
, QOpcodes
, nullptr);
3827 case Intrinsic::arm_neon_vst1x2
: {
3828 static const uint16_t DOpcodes
[] = { ARM::VST1q8
, ARM::VST1q16
,
3829 ARM::VST1q32
, ARM::VST1q64
};
3830 static const uint16_t QOpcodes
[] = { ARM::VST1d8QPseudo
,
3831 ARM::VST1d16QPseudo
,
3832 ARM::VST1d32QPseudo
,
3833 ARM::VST1d64QPseudo
};
3834 SelectVST(N
, false, 2, DOpcodes
, QOpcodes
, nullptr);
3838 case Intrinsic::arm_neon_vst1x3
: {
3839 static const uint16_t DOpcodes
[] = { ARM::VST1d8TPseudo
,
3840 ARM::VST1d16TPseudo
,
3841 ARM::VST1d32TPseudo
,
3842 ARM::VST1d64TPseudo
};
3843 static const uint16_t QOpcodes0
[] = { ARM::VST1q8LowTPseudo_UPD
,
3844 ARM::VST1q16LowTPseudo_UPD
,
3845 ARM::VST1q32LowTPseudo_UPD
,
3846 ARM::VST1q64LowTPseudo_UPD
};
3847 static const uint16_t QOpcodes1
[] = { ARM::VST1q8HighTPseudo
,
3848 ARM::VST1q16HighTPseudo
,
3849 ARM::VST1q32HighTPseudo
,
3850 ARM::VST1q64HighTPseudo
};
3851 SelectVST(N
, false, 3, DOpcodes
, QOpcodes0
, QOpcodes1
);
3855 case Intrinsic::arm_neon_vst1x4
: {
3856 static const uint16_t DOpcodes
[] = { ARM::VST1d8QPseudo
,
3857 ARM::VST1d16QPseudo
,
3858 ARM::VST1d32QPseudo
,
3859 ARM::VST1d64QPseudo
};
3860 static const uint16_t QOpcodes0
[] = { ARM::VST1q8LowQPseudo_UPD
,
3861 ARM::VST1q16LowQPseudo_UPD
,
3862 ARM::VST1q32LowQPseudo_UPD
,
3863 ARM::VST1q64LowQPseudo_UPD
};
3864 static const uint16_t QOpcodes1
[] = { ARM::VST1q8HighQPseudo
,
3865 ARM::VST1q16HighQPseudo
,
3866 ARM::VST1q32HighQPseudo
,
3867 ARM::VST1q64HighQPseudo
};
3868 SelectVST(N
, false, 4, DOpcodes
, QOpcodes0
, QOpcodes1
);
3872 case Intrinsic::arm_neon_vst2
: {
3873 static const uint16_t DOpcodes
[] = { ARM::VST2d8
, ARM::VST2d16
,
3874 ARM::VST2d32
, ARM::VST1q64
};
3875 static const uint16_t QOpcodes
[] = { ARM::VST2q8Pseudo
, ARM::VST2q16Pseudo
,
3876 ARM::VST2q32Pseudo
};
3877 SelectVST(N
, false, 2, DOpcodes
, QOpcodes
, nullptr);
3881 case Intrinsic::arm_neon_vst3
: {
3882 static const uint16_t DOpcodes
[] = { ARM::VST3d8Pseudo
,
3885 ARM::VST1d64TPseudo
};
3886 static const uint16_t QOpcodes0
[] = { ARM::VST3q8Pseudo_UPD
,
3887 ARM::VST3q16Pseudo_UPD
,
3888 ARM::VST3q32Pseudo_UPD
};
3889 static const uint16_t QOpcodes1
[] = { ARM::VST3q8oddPseudo
,
3890 ARM::VST3q16oddPseudo
,
3891 ARM::VST3q32oddPseudo
};
3892 SelectVST(N
, false, 3, DOpcodes
, QOpcodes0
, QOpcodes1
);
3896 case Intrinsic::arm_neon_vst4
: {
3897 static const uint16_t DOpcodes
[] = { ARM::VST4d8Pseudo
,
3900 ARM::VST1d64QPseudo
};
3901 static const uint16_t QOpcodes0
[] = { ARM::VST4q8Pseudo_UPD
,
3902 ARM::VST4q16Pseudo_UPD
,
3903 ARM::VST4q32Pseudo_UPD
};
3904 static const uint16_t QOpcodes1
[] = { ARM::VST4q8oddPseudo
,
3905 ARM::VST4q16oddPseudo
,
3906 ARM::VST4q32oddPseudo
};
3907 SelectVST(N
, false, 4, DOpcodes
, QOpcodes0
, QOpcodes1
);
3911 case Intrinsic::arm_neon_vst2lane
: {
3912 static const uint16_t DOpcodes
[] = { ARM::VST2LNd8Pseudo
,
3913 ARM::VST2LNd16Pseudo
,
3914 ARM::VST2LNd32Pseudo
};
3915 static const uint16_t QOpcodes
[] = { ARM::VST2LNq16Pseudo
,
3916 ARM::VST2LNq32Pseudo
};
3917 SelectVLDSTLane(N
, false, false, 2, DOpcodes
, QOpcodes
);
3921 case Intrinsic::arm_neon_vst3lane
: {
3922 static const uint16_t DOpcodes
[] = { ARM::VST3LNd8Pseudo
,
3923 ARM::VST3LNd16Pseudo
,
3924 ARM::VST3LNd32Pseudo
};
3925 static const uint16_t QOpcodes
[] = { ARM::VST3LNq16Pseudo
,
3926 ARM::VST3LNq32Pseudo
};
3927 SelectVLDSTLane(N
, false, false, 3, DOpcodes
, QOpcodes
);
3931 case Intrinsic::arm_neon_vst4lane
: {
3932 static const uint16_t DOpcodes
[] = { ARM::VST4LNd8Pseudo
,
3933 ARM::VST4LNd16Pseudo
,
3934 ARM::VST4LNd32Pseudo
};
3935 static const uint16_t QOpcodes
[] = { ARM::VST4LNq16Pseudo
,
3936 ARM::VST4LNq32Pseudo
};
3937 SelectVLDSTLane(N
, false, false, 4, DOpcodes
, QOpcodes
);
3944 case ISD::ATOMIC_CMP_SWAP
:
3952 // Inspect a register string of the form
3953 // cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or
3954 // cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string
3955 // and obtain the integer operands from them, adding these operands to the
3957 static void getIntOperandsFromRegisterString(StringRef RegString
,
3958 SelectionDAG
*CurDAG
,
3960 std::vector
<SDValue
> &Ops
) {
3961 SmallVector
<StringRef
, 5> Fields
;
3962 RegString
.split(Fields
, ':');
3964 if (Fields
.size() > 1) {
3965 bool AllIntFields
= true;
3967 for (StringRef Field
: Fields
) {
3968 // Need to trim out leading 'cp' characters and get the integer field.
3970 AllIntFields
&= !Field
.trim("CPcp").getAsInteger(10, IntField
);
3971 Ops
.push_back(CurDAG
->getTargetConstant(IntField
, DL
, MVT::i32
));
3974 assert(AllIntFields
&&
3975 "Unexpected non-integer value in special register string.");
3979 // Maps a Banked Register string to its mask value. The mask value returned is
3980 // for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register
3981 // mask operand, which expresses which register is to be used, e.g. r8, and in
3982 // which mode it is to be used, e.g. usr. Returns -1 to signify that the string
3984 static inline int getBankedRegisterMask(StringRef RegString
) {
3985 auto TheReg
= ARMBankedReg::lookupBankedRegByName(RegString
.lower());
3988 return TheReg
->Encoding
;
3991 // The flags here are common to those allowed for apsr in the A class cores and
3992 // those allowed for the special registers in the M class cores. Returns a
3993 // value representing which flags were present, -1 if invalid.
3994 static inline int getMClassFlagsMask(StringRef Flags
) {
3995 return StringSwitch
<int>(Flags
)
3996 .Case("", 0x2) // no flags means nzcvq for psr registers, and 0x2 is
3997 // correct when flags are not permitted
4000 .Case("nzcvqg", 0x3)
4004 // Maps MClass special registers string to its value for use in the
4005 // t2MRS_M/t2MSR_M instruction nodes as the SYSm value operand.
4006 // Returns -1 to signify that the string was invalid.
4007 static int getMClassRegisterMask(StringRef Reg
, const ARMSubtarget
*Subtarget
) {
4008 auto TheReg
= ARMSysReg::lookupMClassSysRegByName(Reg
);
4009 const FeatureBitset
&FeatureBits
= Subtarget
->getFeatureBits();
4010 if (!TheReg
|| !TheReg
->hasRequiredFeatures(FeatureBits
))
4012 return (int)(TheReg
->Encoding
& 0xFFF); // SYSm value
4015 static int getARClassRegisterMask(StringRef Reg
, StringRef Flags
) {
4016 // The mask operand contains the special register (R Bit) in bit 4, whether
4017 // the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and
4018 // bits 3-0 contains the fields to be accessed in the special register, set by
4019 // the flags provided with the register.
4021 if (Reg
== "apsr") {
4022 // The flags permitted for apsr are the same flags that are allowed in
4023 // M class registers. We get the flag value and then shift the flags into
4024 // the correct place to combine with the mask.
4025 Mask
= getMClassFlagsMask(Flags
);
4031 if (Reg
!= "cpsr" && Reg
!= "spsr") {
4035 // This is the same as if the flags were "fc"
4036 if (Flags
.empty() || Flags
== "all")
4039 // Inspect the supplied flags string and set the bits in the mask for
4040 // the relevant and valid flags allowed for cpsr and spsr.
4041 for (char Flag
: Flags
) {
4060 // This avoids allowing strings where the same flag bit appears twice.
4061 if (!FlagVal
|| (Mask
& FlagVal
))
4066 // If the register is spsr then we need to set the R bit.
4073 // Lower the read_register intrinsic to ARM specific DAG nodes
4074 // using the supplied metadata string to select the instruction node to use
4075 // and the registers/masks to construct as operands for the node.
4076 bool ARMDAGToDAGISel::tryReadRegister(SDNode
*N
){
4077 const MDNodeSDNode
*MD
= dyn_cast
<MDNodeSDNode
>(N
->getOperand(1));
4078 const MDString
*RegString
= dyn_cast
<MDString
>(MD
->getMD()->getOperand(0));
4079 bool IsThumb2
= Subtarget
->isThumb2();
4082 std::vector
<SDValue
> Ops
;
4083 getIntOperandsFromRegisterString(RegString
->getString(), CurDAG
, DL
, Ops
);
4086 // If the special register string was constructed of fields (as defined
4087 // in the ACLE) then need to lower to MRC node (32 bit) or
4088 // MRRC node(64 bit), we can make the distinction based on the number of
4089 // operands we have.
4091 SmallVector
<EVT
, 3> ResTypes
;
4092 if (Ops
.size() == 5){
4093 Opcode
= IsThumb2
? ARM::t2MRC
: ARM::MRC
;
4094 ResTypes
.append({ MVT::i32
, MVT::Other
});
4096 assert(Ops
.size() == 3 &&
4097 "Invalid number of fields in special register string.");
4098 Opcode
= IsThumb2
? ARM::t2MRRC
: ARM::MRRC
;
4099 ResTypes
.append({ MVT::i32
, MVT::i32
, MVT::Other
});
4102 Ops
.push_back(getAL(CurDAG
, DL
));
4103 Ops
.push_back(CurDAG
->getRegister(0, MVT::i32
));
4104 Ops
.push_back(N
->getOperand(0));
4105 ReplaceNode(N
, CurDAG
->getMachineNode(Opcode
, DL
, ResTypes
, Ops
));
4109 std::string SpecialReg
= RegString
->getString().lower();
4111 int BankedReg
= getBankedRegisterMask(SpecialReg
);
4112 if (BankedReg
!= -1) {
4113 Ops
= { CurDAG
->getTargetConstant(BankedReg
, DL
, MVT::i32
),
4114 getAL(CurDAG
, DL
), CurDAG
->getRegister(0, MVT::i32
),
4117 N
, CurDAG
->getMachineNode(IsThumb2
? ARM::t2MRSbanked
: ARM::MRSbanked
,
4118 DL
, MVT::i32
, MVT::Other
, Ops
));
4122 // The VFP registers are read by creating SelectionDAG nodes with opcodes
4123 // corresponding to the register that is being read from. So we switch on the
4124 // string to find which opcode we need to use.
4125 unsigned Opcode
= StringSwitch
<unsigned>(SpecialReg
)
4126 .Case("fpscr", ARM::VMRS
)
4127 .Case("fpexc", ARM::VMRS_FPEXC
)
4128 .Case("fpsid", ARM::VMRS_FPSID
)
4129 .Case("mvfr0", ARM::VMRS_MVFR0
)
4130 .Case("mvfr1", ARM::VMRS_MVFR1
)
4131 .Case("mvfr2", ARM::VMRS_MVFR2
)
4132 .Case("fpinst", ARM::VMRS_FPINST
)
4133 .Case("fpinst2", ARM::VMRS_FPINST2
)
4136 // If an opcode was found then we can lower the read to a VFP instruction.
4138 if (!Subtarget
->hasVFP2Base())
4140 if (Opcode
== ARM::VMRS_MVFR2
&& !Subtarget
->hasFPARMv8Base())
4143 Ops
= { getAL(CurDAG
, DL
), CurDAG
->getRegister(0, MVT::i32
),
4146 CurDAG
->getMachineNode(Opcode
, DL
, MVT::i32
, MVT::Other
, Ops
));
4150 // If the target is M Class then need to validate that the register string
4151 // is an acceptable value, so check that a mask can be constructed from the
4153 if (Subtarget
->isMClass()) {
4154 int SYSmValue
= getMClassRegisterMask(SpecialReg
, Subtarget
);
4155 if (SYSmValue
== -1)
4158 SDValue Ops
[] = { CurDAG
->getTargetConstant(SYSmValue
, DL
, MVT::i32
),
4159 getAL(CurDAG
, DL
), CurDAG
->getRegister(0, MVT::i32
),
4162 N
, CurDAG
->getMachineNode(ARM::t2MRS_M
, DL
, MVT::i32
, MVT::Other
, Ops
));
4166 // Here we know the target is not M Class so we need to check if it is one
4167 // of the remaining possible values which are apsr, cpsr or spsr.
4168 if (SpecialReg
== "apsr" || SpecialReg
== "cpsr") {
4169 Ops
= { getAL(CurDAG
, DL
), CurDAG
->getRegister(0, MVT::i32
),
4171 ReplaceNode(N
, CurDAG
->getMachineNode(IsThumb2
? ARM::t2MRS_AR
: ARM::MRS
,
4172 DL
, MVT::i32
, MVT::Other
, Ops
));
4176 if (SpecialReg
== "spsr") {
4177 Ops
= { getAL(CurDAG
, DL
), CurDAG
->getRegister(0, MVT::i32
),
4180 N
, CurDAG
->getMachineNode(IsThumb2
? ARM::t2MRSsys_AR
: ARM::MRSsys
, DL
,
4181 MVT::i32
, MVT::Other
, Ops
));
4188 // Lower the write_register intrinsic to ARM specific DAG nodes
4189 // using the supplied metadata string to select the instruction node to use
4190 // and the registers/masks to use in the nodes
4191 bool ARMDAGToDAGISel::tryWriteRegister(SDNode
*N
){
4192 const MDNodeSDNode
*MD
= dyn_cast
<MDNodeSDNode
>(N
->getOperand(1));
4193 const MDString
*RegString
= dyn_cast
<MDString
>(MD
->getMD()->getOperand(0));
4194 bool IsThumb2
= Subtarget
->isThumb2();
4197 std::vector
<SDValue
> Ops
;
4198 getIntOperandsFromRegisterString(RegString
->getString(), CurDAG
, DL
, Ops
);
4201 // If the special register string was constructed of fields (as defined
4202 // in the ACLE) then need to lower to MCR node (32 bit) or
4203 // MCRR node(64 bit), we can make the distinction based on the number of
4204 // operands we have.
4206 if (Ops
.size() == 5) {
4207 Opcode
= IsThumb2
? ARM::t2MCR
: ARM::MCR
;
4208 Ops
.insert(Ops
.begin()+2, N
->getOperand(2));
4210 assert(Ops
.size() == 3 &&
4211 "Invalid number of fields in special register string.");
4212 Opcode
= IsThumb2
? ARM::t2MCRR
: ARM::MCRR
;
4213 SDValue WriteValue
[] = { N
->getOperand(2), N
->getOperand(3) };
4214 Ops
.insert(Ops
.begin()+2, WriteValue
, WriteValue
+2);
4217 Ops
.push_back(getAL(CurDAG
, DL
));
4218 Ops
.push_back(CurDAG
->getRegister(0, MVT::i32
));
4219 Ops
.push_back(N
->getOperand(0));
4221 ReplaceNode(N
, CurDAG
->getMachineNode(Opcode
, DL
, MVT::Other
, Ops
));
4225 std::string SpecialReg
= RegString
->getString().lower();
4226 int BankedReg
= getBankedRegisterMask(SpecialReg
);
4227 if (BankedReg
!= -1) {
4228 Ops
= { CurDAG
->getTargetConstant(BankedReg
, DL
, MVT::i32
), N
->getOperand(2),
4229 getAL(CurDAG
, DL
), CurDAG
->getRegister(0, MVT::i32
),
4232 N
, CurDAG
->getMachineNode(IsThumb2
? ARM::t2MSRbanked
: ARM::MSRbanked
,
4233 DL
, MVT::Other
, Ops
));
4237 // The VFP registers are written to by creating SelectionDAG nodes with
4238 // opcodes corresponding to the register that is being written. So we switch
4239 // on the string to find which opcode we need to use.
4240 unsigned Opcode
= StringSwitch
<unsigned>(SpecialReg
)
4241 .Case("fpscr", ARM::VMSR
)
4242 .Case("fpexc", ARM::VMSR_FPEXC
)
4243 .Case("fpsid", ARM::VMSR_FPSID
)
4244 .Case("fpinst", ARM::VMSR_FPINST
)
4245 .Case("fpinst2", ARM::VMSR_FPINST2
)
4249 if (!Subtarget
->hasVFP2Base())
4251 Ops
= { N
->getOperand(2), getAL(CurDAG
, DL
),
4252 CurDAG
->getRegister(0, MVT::i32
), N
->getOperand(0) };
4253 ReplaceNode(N
, CurDAG
->getMachineNode(Opcode
, DL
, MVT::Other
, Ops
));
4257 std::pair
<StringRef
, StringRef
> Fields
;
4258 Fields
= StringRef(SpecialReg
).rsplit('_');
4259 std::string Reg
= Fields
.first
.str();
4260 StringRef Flags
= Fields
.second
;
4262 // If the target was M Class then need to validate the special register value
4263 // and retrieve the mask for use in the instruction node.
4264 if (Subtarget
->isMClass()) {
4265 int SYSmValue
= getMClassRegisterMask(SpecialReg
, Subtarget
);
4266 if (SYSmValue
== -1)
4269 SDValue Ops
[] = { CurDAG
->getTargetConstant(SYSmValue
, DL
, MVT::i32
),
4270 N
->getOperand(2), getAL(CurDAG
, DL
),
4271 CurDAG
->getRegister(0, MVT::i32
), N
->getOperand(0) };
4272 ReplaceNode(N
, CurDAG
->getMachineNode(ARM::t2MSR_M
, DL
, MVT::Other
, Ops
));
4276 // We then check to see if a valid mask can be constructed for one of the
4277 // register string values permitted for the A and R class cores. These values
4278 // are apsr, spsr and cpsr; these are also valid on older cores.
4279 int Mask
= getARClassRegisterMask(Reg
, Flags
);
4281 Ops
= { CurDAG
->getTargetConstant(Mask
, DL
, MVT::i32
), N
->getOperand(2),
4282 getAL(CurDAG
, DL
), CurDAG
->getRegister(0, MVT::i32
),
4284 ReplaceNode(N
, CurDAG
->getMachineNode(IsThumb2
? ARM::t2MSR_AR
: ARM::MSR
,
4285 DL
, MVT::Other
, Ops
));
4292 bool ARMDAGToDAGISel::tryInlineAsm(SDNode
*N
){
4293 std::vector
<SDValue
> AsmNodeOperands
;
4294 unsigned Flag
, Kind
;
4295 bool Changed
= false;
4296 unsigned NumOps
= N
->getNumOperands();
4298 // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint.
4299 // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require
4300 // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs
4301 // respectively. Since there is no constraint to explicitly specify a
4302 // reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb,
4303 // the 64-bit data may be referred by H, Q, R modifiers, so we still pack
4304 // them into a GPRPair.
4307 SDValue Glue
= N
->getGluedNode() ? N
->getOperand(NumOps
-1)
4308 : SDValue(nullptr,0);
4310 SmallVector
<bool, 8> OpChanged
;
4311 // Glue node will be appended late.
4312 for(unsigned i
= 0, e
= N
->getGluedNode() ? NumOps
- 1 : NumOps
; i
< e
; ++i
) {
4313 SDValue op
= N
->getOperand(i
);
4314 AsmNodeOperands
.push_back(op
);
4316 if (i
< InlineAsm::Op_FirstOperand
)
4319 if (ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(N
->getOperand(i
))) {
4320 Flag
= C
->getZExtValue();
4321 Kind
= InlineAsm::getKind(Flag
);
4326 // Immediate operands to inline asm in the SelectionDAG are modeled with
4327 // two operands. The first is a constant of value InlineAsm::Kind_Imm, and
4328 // the second is a constant with the value of the immediate. If we get here
4329 // and we have a Kind_Imm, skip the next operand, and continue.
4330 if (Kind
== InlineAsm::Kind_Imm
) {
4331 SDValue op
= N
->getOperand(++i
);
4332 AsmNodeOperands
.push_back(op
);
4336 unsigned NumRegs
= InlineAsm::getNumOperandRegisters(Flag
);
4338 OpChanged
.push_back(false);
4340 unsigned DefIdx
= 0;
4341 bool IsTiedToChangedOp
= false;
4342 // If it's a use that is tied with a previous def, it has no
4343 // reg class constraint.
4344 if (Changed
&& InlineAsm::isUseOperandTiedToDef(Flag
, DefIdx
))
4345 IsTiedToChangedOp
= OpChanged
[DefIdx
];
4347 // Memory operands to inline asm in the SelectionDAG are modeled with two
4348 // operands: a constant of value InlineAsm::Kind_Mem followed by the input
4349 // operand. If we get here and we have a Kind_Mem, skip the next operand (so
4350 // it doesn't get misinterpreted), and continue. We do this here because
4351 // it's important to update the OpChanged array correctly before moving on.
4352 if (Kind
== InlineAsm::Kind_Mem
) {
4353 SDValue op
= N
->getOperand(++i
);
4354 AsmNodeOperands
.push_back(op
);
4358 if (Kind
!= InlineAsm::Kind_RegUse
&& Kind
!= InlineAsm::Kind_RegDef
4359 && Kind
!= InlineAsm::Kind_RegDefEarlyClobber
)
4363 bool HasRC
= InlineAsm::hasRegClassConstraint(Flag
, RC
);
4364 if ((!IsTiedToChangedOp
&& (!HasRC
|| RC
!= ARM::GPRRegClassID
))
4368 assert((i
+2 < NumOps
) && "Invalid number of operands in inline asm");
4369 SDValue V0
= N
->getOperand(i
+1);
4370 SDValue V1
= N
->getOperand(i
+2);
4371 unsigned Reg0
= cast
<RegisterSDNode
>(V0
)->getReg();
4372 unsigned Reg1
= cast
<RegisterSDNode
>(V1
)->getReg();
4374 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
4376 if (Kind
== InlineAsm::Kind_RegDef
||
4377 Kind
== InlineAsm::Kind_RegDefEarlyClobber
) {
4378 // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to
4379 // the original GPRs.
4381 unsigned GPVR
= MRI
.createVirtualRegister(&ARM::GPRPairRegClass
);
4382 PairedReg
= CurDAG
->getRegister(GPVR
, MVT::Untyped
);
4383 SDValue Chain
= SDValue(N
,0);
4385 SDNode
*GU
= N
->getGluedUser();
4386 SDValue RegCopy
= CurDAG
->getCopyFromReg(Chain
, dl
, GPVR
, MVT::Untyped
,
4389 // Extract values from a GPRPair reg and copy to the original GPR reg.
4390 SDValue Sub0
= CurDAG
->getTargetExtractSubreg(ARM::gsub_0
, dl
, MVT::i32
,
4392 SDValue Sub1
= CurDAG
->getTargetExtractSubreg(ARM::gsub_1
, dl
, MVT::i32
,
4394 SDValue T0
= CurDAG
->getCopyToReg(Sub0
, dl
, Reg0
, Sub0
,
4395 RegCopy
.getValue(1));
4396 SDValue T1
= CurDAG
->getCopyToReg(Sub1
, dl
, Reg1
, Sub1
, T0
.getValue(1));
4398 // Update the original glue user.
4399 std::vector
<SDValue
> Ops(GU
->op_begin(), GU
->op_end()-1);
4400 Ops
.push_back(T1
.getValue(1));
4401 CurDAG
->UpdateNodeOperands(GU
, Ops
);
4404 // For Kind == InlineAsm::Kind_RegUse, we first copy two GPRs into a
4405 // GPRPair and then pass the GPRPair to the inline asm.
4406 SDValue Chain
= AsmNodeOperands
[InlineAsm::Op_InputChain
];
4408 // As REG_SEQ doesn't take RegisterSDNode, we copy them first.
4409 SDValue T0
= CurDAG
->getCopyFromReg(Chain
, dl
, Reg0
, MVT::i32
,
4411 SDValue T1
= CurDAG
->getCopyFromReg(Chain
, dl
, Reg1
, MVT::i32
,
4413 SDValue Pair
= SDValue(createGPRPairNode(MVT::Untyped
, T0
, T1
), 0);
4415 // Copy REG_SEQ into a GPRPair-typed VR and replace the original two
4416 // i32 VRs of inline asm with it.
4417 unsigned GPVR
= MRI
.createVirtualRegister(&ARM::GPRPairRegClass
);
4418 PairedReg
= CurDAG
->getRegister(GPVR
, MVT::Untyped
);
4419 Chain
= CurDAG
->getCopyToReg(T1
, dl
, GPVR
, Pair
, T1
.getValue(1));
4421 AsmNodeOperands
[InlineAsm::Op_InputChain
] = Chain
;
4422 Glue
= Chain
.getValue(1);
4427 if(PairedReg
.getNode()) {
4428 OpChanged
[OpChanged
.size() -1 ] = true;
4429 Flag
= InlineAsm::getFlagWord(Kind
, 1 /* RegNum*/);
4430 if (IsTiedToChangedOp
)
4431 Flag
= InlineAsm::getFlagWordForMatchingOp(Flag
, DefIdx
);
4433 Flag
= InlineAsm::getFlagWordForRegClass(Flag
, ARM::GPRPairRegClassID
);
4434 // Replace the current flag.
4435 AsmNodeOperands
[AsmNodeOperands
.size() -1] = CurDAG
->getTargetConstant(
4436 Flag
, dl
, MVT::i32
);
4437 // Add the new register node and skip the original two GPRs.
4438 AsmNodeOperands
.push_back(PairedReg
);
4439 // Skip the next two GPRs.
4445 AsmNodeOperands
.push_back(Glue
);
4449 SDValue New
= CurDAG
->getNode(N
->getOpcode(), SDLoc(N
),
4450 CurDAG
->getVTList(MVT::Other
, MVT::Glue
), AsmNodeOperands
);
4452 ReplaceNode(N
, New
.getNode());
4457 bool ARMDAGToDAGISel::
4458 SelectInlineAsmMemoryOperand(const SDValue
&Op
, unsigned ConstraintID
,
4459 std::vector
<SDValue
> &OutOps
) {
4460 switch(ConstraintID
) {
4462 llvm_unreachable("Unexpected asm memory constraint");
4463 case InlineAsm::Constraint_i
:
4464 // FIXME: It seems strange that 'i' is needed here since it's supposed to
4465 // be an immediate and not a memory constraint.
4467 case InlineAsm::Constraint_m
:
4468 case InlineAsm::Constraint_o
:
4469 case InlineAsm::Constraint_Q
:
4470 case InlineAsm::Constraint_Um
:
4471 case InlineAsm::Constraint_Un
:
4472 case InlineAsm::Constraint_Uq
:
4473 case InlineAsm::Constraint_Us
:
4474 case InlineAsm::Constraint_Ut
:
4475 case InlineAsm::Constraint_Uv
:
4476 case InlineAsm::Constraint_Uy
:
4477 // Require the address to be in a register. That is safe for all ARM
4478 // variants and it is hard to do anything much smarter without knowing
4479 // how the operand is used.
4480 OutOps
.push_back(Op
);
4486 /// createARMISelDag - This pass converts a legalized DAG into a
4487 /// ARM-specific DAG, ready for instruction scheduling.
4489 FunctionPass
*llvm::createARMISelDag(ARMBaseTargetMachine
&TM
,
4490 CodeGenOpt::Level OptLevel
) {
4491 return new ARMDAGToDAGISel(TM
, OptLevel
);