1 //===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file defines an instruction selector for the ARM target.
11 //===----------------------------------------------------------------------===//
14 #include "ARMBaseInstrInfo.h"
15 #include "ARMTargetMachine.h"
16 #include "MCTargetDesc/ARMAddressingModes.h"
17 #include "Utils/ARMBaseInfo.h"
18 #include "llvm/ADT/StringSwitch.h"
19 #include "llvm/CodeGen/MachineFrameInfo.h"
20 #include "llvm/CodeGen/MachineFunction.h"
21 #include "llvm/CodeGen/MachineInstrBuilder.h"
22 #include "llvm/CodeGen/MachineRegisterInfo.h"
23 #include "llvm/CodeGen/SelectionDAG.h"
24 #include "llvm/CodeGen/SelectionDAGISel.h"
25 #include "llvm/CodeGen/TargetLowering.h"
26 #include "llvm/IR/CallingConv.h"
27 #include "llvm/IR/Constants.h"
28 #include "llvm/IR/DerivedTypes.h"
29 #include "llvm/IR/Function.h"
30 #include "llvm/IR/Intrinsics.h"
31 #include "llvm/IR/LLVMContext.h"
32 #include "llvm/Support/CommandLine.h"
33 #include "llvm/Support/Debug.h"
34 #include "llvm/Support/ErrorHandling.h"
35 #include "llvm/Target/TargetOptions.h"
39 #define DEBUG_TYPE "arm-isel"
42 DisableShifterOp("disable-shifter-op", cl::Hidden
,
43 cl::desc("Disable isel of shifter-op"),
46 //===--------------------------------------------------------------------===//
47 /// ARMDAGToDAGISel - ARM specific code to select ARM machine
48 /// instructions for SelectionDAG operations.
52 class ARMDAGToDAGISel
: public SelectionDAGISel
{
53 /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
54 /// make the right decision when generating code for different targets.
55 const ARMSubtarget
*Subtarget
;
58 explicit ARMDAGToDAGISel(ARMBaseTargetMachine
&tm
, CodeGenOpt::Level OptLevel
)
59 : SelectionDAGISel(tm
, OptLevel
) {}
61 bool runOnMachineFunction(MachineFunction
&MF
) override
{
62 // Reset the subtarget each time through.
63 Subtarget
= &MF
.getSubtarget
<ARMSubtarget
>();
64 SelectionDAGISel::runOnMachineFunction(MF
);
68 StringRef
getPassName() const override
{ return "ARM Instruction Selection"; }
70 void PreprocessISelDAG() override
;
72 /// getI32Imm - Return a target constant of type i32 with the specified
74 inline SDValue
getI32Imm(unsigned Imm
, const SDLoc
&dl
) {
75 return CurDAG
->getTargetConstant(Imm
, dl
, MVT::i32
);
78 void Select(SDNode
*N
) override
;
80 bool hasNoVMLxHazardUse(SDNode
*N
) const;
81 bool isShifterOpProfitable(const SDValue
&Shift
,
82 ARM_AM::ShiftOpc ShOpcVal
, unsigned ShAmt
);
83 bool SelectRegShifterOperand(SDValue N
, SDValue
&A
,
84 SDValue
&B
, SDValue
&C
,
85 bool CheckProfitability
= true);
86 bool SelectImmShifterOperand(SDValue N
, SDValue
&A
,
87 SDValue
&B
, bool CheckProfitability
= true);
88 bool SelectShiftRegShifterOperand(SDValue N
, SDValue
&A
,
89 SDValue
&B
, SDValue
&C
) {
90 // Don't apply the profitability check
91 return SelectRegShifterOperand(N
, A
, B
, C
, false);
93 bool SelectShiftImmShifterOperand(SDValue N
, SDValue
&A
,
95 // Don't apply the profitability check
96 return SelectImmShifterOperand(N
, A
, B
, false);
99 bool SelectAddLikeOr(SDNode
*Parent
, SDValue N
, SDValue
&Out
);
101 bool SelectAddrModeImm12(SDValue N
, SDValue
&Base
, SDValue
&OffImm
);
102 bool SelectLdStSOReg(SDValue N
, SDValue
&Base
, SDValue
&Offset
, SDValue
&Opc
);
104 bool SelectCMOVPred(SDValue N
, SDValue
&Pred
, SDValue
&Reg
) {
105 const ConstantSDNode
*CN
= cast
<ConstantSDNode
>(N
);
106 Pred
= CurDAG
->getTargetConstant(CN
->getZExtValue(), SDLoc(N
), MVT::i32
);
107 Reg
= CurDAG
->getRegister(ARM::CPSR
, MVT::i32
);
111 bool SelectAddrMode2OffsetReg(SDNode
*Op
, SDValue N
,
112 SDValue
&Offset
, SDValue
&Opc
);
113 bool SelectAddrMode2OffsetImm(SDNode
*Op
, SDValue N
,
114 SDValue
&Offset
, SDValue
&Opc
);
115 bool SelectAddrMode2OffsetImmPre(SDNode
*Op
, SDValue N
,
116 SDValue
&Offset
, SDValue
&Opc
);
117 bool SelectAddrOffsetNone(SDValue N
, SDValue
&Base
);
118 bool SelectAddrMode3(SDValue N
, SDValue
&Base
,
119 SDValue
&Offset
, SDValue
&Opc
);
120 bool SelectAddrMode3Offset(SDNode
*Op
, SDValue N
,
121 SDValue
&Offset
, SDValue
&Opc
);
122 bool IsAddressingMode5(SDValue N
, SDValue
&Base
, SDValue
&Offset
,
123 int Lwb
, int Upb
, bool FP16
);
124 bool SelectAddrMode5(SDValue N
, SDValue
&Base
, SDValue
&Offset
);
125 bool SelectAddrMode5FP16(SDValue N
, SDValue
&Base
, SDValue
&Offset
);
126 bool SelectAddrMode6(SDNode
*Parent
, SDValue N
, SDValue
&Addr
,SDValue
&Align
);
127 bool SelectAddrMode6Offset(SDNode
*Op
, SDValue N
, SDValue
&Offset
);
129 bool SelectAddrModePC(SDValue N
, SDValue
&Offset
, SDValue
&Label
);
131 // Thumb Addressing Modes:
132 bool SelectThumbAddrModeRR(SDValue N
, SDValue
&Base
, SDValue
&Offset
);
133 bool SelectThumbAddrModeRRSext(SDValue N
, SDValue
&Base
, SDValue
&Offset
);
134 bool SelectThumbAddrModeImm5S(SDValue N
, unsigned Scale
, SDValue
&Base
,
136 bool SelectThumbAddrModeImm5S1(SDValue N
, SDValue
&Base
,
138 bool SelectThumbAddrModeImm5S2(SDValue N
, SDValue
&Base
,
140 bool SelectThumbAddrModeImm5S4(SDValue N
, SDValue
&Base
,
142 bool SelectThumbAddrModeSP(SDValue N
, SDValue
&Base
, SDValue
&OffImm
);
144 // Thumb 2 Addressing Modes:
145 bool SelectT2AddrModeImm12(SDValue N
, SDValue
&Base
, SDValue
&OffImm
);
146 bool SelectT2AddrModeImm8(SDValue N
, SDValue
&Base
,
148 bool SelectT2AddrModeImm8Offset(SDNode
*Op
, SDValue N
,
150 bool SelectT2AddrModeSoReg(SDValue N
, SDValue
&Base
,
151 SDValue
&OffReg
, SDValue
&ShImm
);
152 bool SelectT2AddrModeExclusive(SDValue N
, SDValue
&Base
, SDValue
&OffImm
);
154 inline bool is_so_imm(unsigned Imm
) const {
155 return ARM_AM::getSOImmVal(Imm
) != -1;
158 inline bool is_so_imm_not(unsigned Imm
) const {
159 return ARM_AM::getSOImmVal(~Imm
) != -1;
162 inline bool is_t2_so_imm(unsigned Imm
) const {
163 return ARM_AM::getT2SOImmVal(Imm
) != -1;
166 inline bool is_t2_so_imm_not(unsigned Imm
) const {
167 return ARM_AM::getT2SOImmVal(~Imm
) != -1;
170 // Include the pieces autogenerated from the target description.
171 #include "ARMGenDAGISel.inc"
174 void transferMemOperands(SDNode
*Src
, SDNode
*Dst
);
176 /// Indexed (pre/post inc/dec) load matching code for ARM.
177 bool tryARMIndexedLoad(SDNode
*N
);
178 bool tryT1IndexedLoad(SDNode
*N
);
179 bool tryT2IndexedLoad(SDNode
*N
);
181 /// SelectVLD - Select NEON load intrinsics. NumVecs should be
182 /// 1, 2, 3 or 4. The opcode arrays specify the instructions used for
183 /// loads of D registers and even subregs and odd subregs of Q registers.
184 /// For NumVecs <= 2, QOpcodes1 is not used.
185 void SelectVLD(SDNode
*N
, bool isUpdating
, unsigned NumVecs
,
186 const uint16_t *DOpcodes
, const uint16_t *QOpcodes0
,
187 const uint16_t *QOpcodes1
);
189 /// SelectVST - Select NEON store intrinsics. NumVecs should
190 /// be 1, 2, 3 or 4. The opcode arrays specify the instructions used for
191 /// stores of D registers and even subregs and odd subregs of Q registers.
192 /// For NumVecs <= 2, QOpcodes1 is not used.
193 void SelectVST(SDNode
*N
, bool isUpdating
, unsigned NumVecs
,
194 const uint16_t *DOpcodes
, const uint16_t *QOpcodes0
,
195 const uint16_t *QOpcodes1
);
197 /// SelectVLDSTLane - Select NEON load/store lane intrinsics. NumVecs should
198 /// be 2, 3 or 4. The opcode arrays specify the instructions used for
199 /// load/store of D registers and Q registers.
200 void SelectVLDSTLane(SDNode
*N
, bool IsLoad
, bool isUpdating
,
201 unsigned NumVecs
, const uint16_t *DOpcodes
,
202 const uint16_t *QOpcodes
);
204 /// SelectVLDDup - Select NEON load-duplicate intrinsics. NumVecs
205 /// should be 1, 2, 3 or 4. The opcode array specifies the instructions used
206 /// for loading D registers.
207 void SelectVLDDup(SDNode
*N
, bool IsIntrinsic
, bool isUpdating
,
208 unsigned NumVecs
, const uint16_t *DOpcodes
,
209 const uint16_t *QOpcodes0
= nullptr,
210 const uint16_t *QOpcodes1
= nullptr);
212 /// Try to select SBFX/UBFX instructions for ARM.
213 bool tryV6T2BitfieldExtractOp(SDNode
*N
, bool isSigned
);
215 // Select special operations if node forms integer ABS pattern
216 bool tryABSOp(SDNode
*N
);
218 bool tryReadRegister(SDNode
*N
);
219 bool tryWriteRegister(SDNode
*N
);
221 bool tryInlineAsm(SDNode
*N
);
223 void SelectCMPZ(SDNode
*N
, bool &SwitchEQNEToPLMI
);
225 void SelectCMP_SWAP(SDNode
*N
);
227 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
228 /// inline asm expressions.
229 bool SelectInlineAsmMemoryOperand(const SDValue
&Op
, unsigned ConstraintID
,
230 std::vector
<SDValue
> &OutOps
) override
;
232 // Form pairs of consecutive R, S, D, or Q registers.
233 SDNode
*createGPRPairNode(EVT VT
, SDValue V0
, SDValue V1
);
234 SDNode
*createSRegPairNode(EVT VT
, SDValue V0
, SDValue V1
);
235 SDNode
*createDRegPairNode(EVT VT
, SDValue V0
, SDValue V1
);
236 SDNode
*createQRegPairNode(EVT VT
, SDValue V0
, SDValue V1
);
238 // Form sequences of 4 consecutive S, D, or Q registers.
239 SDNode
*createQuadSRegsNode(EVT VT
, SDValue V0
, SDValue V1
, SDValue V2
, SDValue V3
);
240 SDNode
*createQuadDRegsNode(EVT VT
, SDValue V0
, SDValue V1
, SDValue V2
, SDValue V3
);
241 SDNode
*createQuadQRegsNode(EVT VT
, SDValue V0
, SDValue V1
, SDValue V2
, SDValue V3
);
243 // Get the alignment operand for a NEON VLD or VST instruction.
244 SDValue
GetVLDSTAlign(SDValue Align
, const SDLoc
&dl
, unsigned NumVecs
,
247 /// Returns the number of instructions required to materialize the given
248 /// constant in a register, or 3 if a literal pool load is needed.
249 unsigned ConstantMaterializationCost(unsigned Val
) const;
251 /// Checks if N is a multiplication by a constant where we can extract out a
252 /// power of two from the constant so that it can be used in a shift, but only
253 /// if it simplifies the materialization of the constant. Returns true if it
254 /// is, and assigns to PowerOfTwo the power of two that should be extracted
255 /// out and to NewMulConst the new constant to be multiplied by.
256 bool canExtractShiftFromMul(const SDValue
&N
, unsigned MaxShift
,
257 unsigned &PowerOfTwo
, SDValue
&NewMulConst
) const;
259 /// Replace N with M in CurDAG, in a way that also ensures that M gets
260 /// selected when N would have been selected.
261 void replaceDAGValue(const SDValue
&N
, SDValue M
);
265 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant
266 /// operand. If so Imm will receive the 32-bit value.
267 static bool isInt32Immediate(SDNode
*N
, unsigned &Imm
) {
268 if (N
->getOpcode() == ISD::Constant
&& N
->getValueType(0) == MVT::i32
) {
269 Imm
= cast
<ConstantSDNode
>(N
)->getZExtValue();
275 // isInt32Immediate - This method tests to see if a constant operand.
276 // If so Imm will receive the 32 bit value.
277 static bool isInt32Immediate(SDValue N
, unsigned &Imm
) {
278 return isInt32Immediate(N
.getNode(), Imm
);
281 // isOpcWithIntImmediate - This method tests to see if the node is a specific
282 // opcode and that it has a immediate integer right operand.
283 // If so Imm will receive the 32 bit value.
284 static bool isOpcWithIntImmediate(SDNode
*N
, unsigned Opc
, unsigned& Imm
) {
285 return N
->getOpcode() == Opc
&&
286 isInt32Immediate(N
->getOperand(1).getNode(), Imm
);
289 /// Check whether a particular node is a constant value representable as
290 /// (N * Scale) where (N in [\p RangeMin, \p RangeMax).
292 /// \param ScaledConstant [out] - On success, the pre-scaled constant value.
293 static bool isScaledConstantInRange(SDValue Node
, int Scale
,
294 int RangeMin
, int RangeMax
,
295 int &ScaledConstant
) {
296 assert(Scale
> 0 && "Invalid scale!");
298 // Check that this is a constant.
299 const ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(Node
);
303 ScaledConstant
= (int) C
->getZExtValue();
304 if ((ScaledConstant
% Scale
) != 0)
307 ScaledConstant
/= Scale
;
308 return ScaledConstant
>= RangeMin
&& ScaledConstant
< RangeMax
;
311 void ARMDAGToDAGISel::PreprocessISelDAG() {
312 if (!Subtarget
->hasV6T2Ops())
315 bool isThumb2
= Subtarget
->isThumb();
316 for (SelectionDAG::allnodes_iterator I
= CurDAG
->allnodes_begin(),
317 E
= CurDAG
->allnodes_end(); I
!= E
; ) {
318 SDNode
*N
= &*I
++; // Preincrement iterator to avoid invalidation issues.
320 if (N
->getOpcode() != ISD::ADD
)
323 // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with
324 // leading zeros, followed by consecutive set bits, followed by 1 or 2
325 // trailing zeros, e.g. 1020.
326 // Transform the expression to
327 // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number
328 // of trailing zeros of c2. The left shift would be folded as an shifter
329 // operand of 'add' and the 'and' and 'srl' would become a bits extraction
332 SDValue N0
= N
->getOperand(0);
333 SDValue N1
= N
->getOperand(1);
334 unsigned And_imm
= 0;
335 if (!isOpcWithIntImmediate(N1
.getNode(), ISD::AND
, And_imm
)) {
336 if (isOpcWithIntImmediate(N0
.getNode(), ISD::AND
, And_imm
))
342 // Check if the AND mask is an immediate of the form: 000.....1111111100
343 unsigned TZ
= countTrailingZeros(And_imm
);
344 if (TZ
!= 1 && TZ
!= 2)
345 // Be conservative here. Shifter operands aren't always free. e.g. On
346 // Swift, left shifter operand of 1 / 2 for free but others are not.
348 // ubfx r3, r1, #16, #8
349 // ldr.w r3, [r0, r3, lsl #2]
352 // and.w r2, r9, r1, lsr #14
356 if (And_imm
& (And_imm
+ 1))
359 // Look for (and (srl X, c1), c2).
360 SDValue Srl
= N1
.getOperand(0);
361 unsigned Srl_imm
= 0;
362 if (!isOpcWithIntImmediate(Srl
.getNode(), ISD::SRL
, Srl_imm
) ||
366 // Make sure first operand is not a shifter operand which would prevent
367 // folding of the left shift.
372 if (SelectImmShifterOperand(N0
, CPTmp0
, CPTmp1
))
375 if (SelectImmShifterOperand(N0
, CPTmp0
, CPTmp1
) ||
376 SelectRegShifterOperand(N0
, CPTmp0
, CPTmp1
, CPTmp2
))
380 // Now make the transformation.
381 Srl
= CurDAG
->getNode(ISD::SRL
, SDLoc(Srl
), MVT::i32
,
383 CurDAG
->getConstant(Srl_imm
+ TZ
, SDLoc(Srl
),
385 N1
= CurDAG
->getNode(ISD::AND
, SDLoc(N1
), MVT::i32
,
387 CurDAG
->getConstant(And_imm
, SDLoc(Srl
), MVT::i32
));
388 N1
= CurDAG
->getNode(ISD::SHL
, SDLoc(N1
), MVT::i32
,
389 N1
, CurDAG
->getConstant(TZ
, SDLoc(Srl
), MVT::i32
));
390 CurDAG
->UpdateNodeOperands(N
, N0
, N1
);
394 /// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS
395 /// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at
396 /// least on current ARM implementations) which should be avoidded.
397 bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode
*N
) const {
398 if (OptLevel
== CodeGenOpt::None
)
401 if (!Subtarget
->hasVMLxHazards())
407 SDNode
*Use
= *N
->use_begin();
408 if (Use
->getOpcode() == ISD::CopyToReg
)
410 if (Use
->isMachineOpcode()) {
411 const ARMBaseInstrInfo
*TII
= static_cast<const ARMBaseInstrInfo
*>(
412 CurDAG
->getSubtarget().getInstrInfo());
414 const MCInstrDesc
&MCID
= TII
->get(Use
->getMachineOpcode());
417 unsigned Opcode
= MCID
.getOpcode();
418 if (Opcode
== ARM::VMOVRS
|| Opcode
== ARM::VMOVRRD
)
420 // vmlx feeding into another vmlx. We actually want to unfold
421 // the use later in the MLxExpansion pass. e.g.
423 // vmla (stall 8 cycles)
428 // This adds up to about 18 - 19 cycles.
431 // vmul (stall 4 cycles)
432 // vadd adds up to about 14 cycles.
433 return TII
->isFpMLxInstruction(Opcode
);
439 bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue
&Shift
,
440 ARM_AM::ShiftOpc ShOpcVal
,
442 if (!Subtarget
->isLikeA9() && !Subtarget
->isSwift())
444 if (Shift
.hasOneUse())
447 return ShOpcVal
== ARM_AM::lsl
&&
448 (ShAmt
== 2 || (Subtarget
->isSwift() && ShAmt
== 1));
451 unsigned ARMDAGToDAGISel::ConstantMaterializationCost(unsigned Val
) const {
452 if (Subtarget
->isThumb()) {
453 if (Val
<= 255) return 1; // MOV
454 if (Subtarget
->hasV6T2Ops() &&
455 (Val
<= 0xffff || // MOV
456 ARM_AM::getT2SOImmVal(Val
) != -1 || // MOVW
457 ARM_AM::getT2SOImmVal(~Val
) != -1)) // MVN
459 if (Val
<= 510) return 2; // MOV + ADDi8
460 if (~Val
<= 255) return 2; // MOV + MVN
461 if (ARM_AM::isThumbImmShiftedVal(Val
)) return 2; // MOV + LSL
463 if (ARM_AM::getSOImmVal(Val
) != -1) return 1; // MOV
464 if (ARM_AM::getSOImmVal(~Val
) != -1) return 1; // MVN
465 if (Subtarget
->hasV6T2Ops() && Val
<= 0xffff) return 1; // MOVW
466 if (ARM_AM::isSOImmTwoPartVal(Val
)) return 2; // two instrs
468 if (Subtarget
->useMovt()) return 2; // MOVW + MOVT
469 return 3; // Literal pool load
472 bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue
&N
,
474 unsigned &PowerOfTwo
,
475 SDValue
&NewMulConst
) const {
476 assert(N
.getOpcode() == ISD::MUL
);
477 assert(MaxShift
> 0);
479 // If the multiply is used in more than one place then changing the constant
480 // will make other uses incorrect, so don't.
481 if (!N
.hasOneUse()) return false;
482 // Check if the multiply is by a constant
483 ConstantSDNode
*MulConst
= dyn_cast
<ConstantSDNode
>(N
.getOperand(1));
484 if (!MulConst
) return false;
485 // If the constant is used in more than one place then modifying it will mean
486 // we need to materialize two constants instead of one, which is a bad idea.
487 if (!MulConst
->hasOneUse()) return false;
488 unsigned MulConstVal
= MulConst
->getZExtValue();
489 if (MulConstVal
== 0) return false;
491 // Find the largest power of 2 that MulConstVal is a multiple of
492 PowerOfTwo
= MaxShift
;
493 while ((MulConstVal
% (1 << PowerOfTwo
)) != 0) {
495 if (PowerOfTwo
== 0) return false;
498 // Only optimise if the new cost is better
499 unsigned NewMulConstVal
= MulConstVal
/ (1 << PowerOfTwo
);
500 NewMulConst
= CurDAG
->getConstant(NewMulConstVal
, SDLoc(N
), MVT::i32
);
501 unsigned OldCost
= ConstantMaterializationCost(MulConstVal
);
502 unsigned NewCost
= ConstantMaterializationCost(NewMulConstVal
);
503 return NewCost
< OldCost
;
506 void ARMDAGToDAGISel::replaceDAGValue(const SDValue
&N
, SDValue M
) {
507 CurDAG
->RepositionNode(N
.getNode()->getIterator(), M
.getNode());
511 bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N
,
514 bool CheckProfitability
) {
515 if (DisableShifterOp
)
518 // If N is a multiply-by-constant and it's profitable to extract a shift and
519 // use it in a shifted operand do so.
520 if (N
.getOpcode() == ISD::MUL
) {
521 unsigned PowerOfTwo
= 0;
523 if (canExtractShiftFromMul(N
, 31, PowerOfTwo
, NewMulConst
)) {
524 HandleSDNode
Handle(N
);
526 replaceDAGValue(N
.getOperand(1), NewMulConst
);
527 BaseReg
= Handle
.getValue();
528 Opc
= CurDAG
->getTargetConstant(
529 ARM_AM::getSORegOpc(ARM_AM::lsl
, PowerOfTwo
), Loc
, MVT::i32
);
534 ARM_AM::ShiftOpc ShOpcVal
= ARM_AM::getShiftOpcForNode(N
.getOpcode());
536 // Don't match base register only case. That is matched to a separate
537 // lower complexity pattern with explicit register operand.
538 if (ShOpcVal
== ARM_AM::no_shift
) return false;
540 BaseReg
= N
.getOperand(0);
541 unsigned ShImmVal
= 0;
542 ConstantSDNode
*RHS
= dyn_cast
<ConstantSDNode
>(N
.getOperand(1));
543 if (!RHS
) return false;
544 ShImmVal
= RHS
->getZExtValue() & 31;
545 Opc
= CurDAG
->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal
, ShImmVal
),
550 bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N
,
554 bool CheckProfitability
) {
555 if (DisableShifterOp
)
558 ARM_AM::ShiftOpc ShOpcVal
= ARM_AM::getShiftOpcForNode(N
.getOpcode());
560 // Don't match base register only case. That is matched to a separate
561 // lower complexity pattern with explicit register operand.
562 if (ShOpcVal
== ARM_AM::no_shift
) return false;
564 BaseReg
= N
.getOperand(0);
565 unsigned ShImmVal
= 0;
566 ConstantSDNode
*RHS
= dyn_cast
<ConstantSDNode
>(N
.getOperand(1));
567 if (RHS
) return false;
569 ShReg
= N
.getOperand(1);
570 if (CheckProfitability
&& !isShifterOpProfitable(N
, ShOpcVal
, ShImmVal
))
572 Opc
= CurDAG
->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal
, ShImmVal
),
577 // Determine whether an ISD::OR's operands are suitable to turn the operation
578 // into an addition, which often has more compact encodings.
579 bool ARMDAGToDAGISel::SelectAddLikeOr(SDNode
*Parent
, SDValue N
, SDValue
&Out
) {
580 assert(Parent
->getOpcode() == ISD::OR
&& "unexpected parent");
582 return CurDAG
->haveNoCommonBitsSet(N
, Parent
->getOperand(1));
586 bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N
,
589 // Match simple R + imm12 operands.
592 if (N
.getOpcode() != ISD::ADD
&& N
.getOpcode() != ISD::SUB
&&
593 !CurDAG
->isBaseWithConstantOffset(N
)) {
594 if (N
.getOpcode() == ISD::FrameIndex
) {
595 // Match frame index.
596 int FI
= cast
<FrameIndexSDNode
>(N
)->getIndex();
597 Base
= CurDAG
->getTargetFrameIndex(
598 FI
, TLI
->getPointerTy(CurDAG
->getDataLayout()));
599 OffImm
= CurDAG
->getTargetConstant(0, SDLoc(N
), MVT::i32
);
603 if (N
.getOpcode() == ARMISD::Wrapper
&&
604 N
.getOperand(0).getOpcode() != ISD::TargetGlobalAddress
&&
605 N
.getOperand(0).getOpcode() != ISD::TargetExternalSymbol
&&
606 N
.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress
) {
607 Base
= N
.getOperand(0);
610 OffImm
= CurDAG
->getTargetConstant(0, SDLoc(N
), MVT::i32
);
614 if (ConstantSDNode
*RHS
= dyn_cast
<ConstantSDNode
>(N
.getOperand(1))) {
615 int RHSC
= (int)RHS
->getSExtValue();
616 if (N
.getOpcode() == ISD::SUB
)
619 if (RHSC
> -0x1000 && RHSC
< 0x1000) { // 12 bits
620 Base
= N
.getOperand(0);
621 if (Base
.getOpcode() == ISD::FrameIndex
) {
622 int FI
= cast
<FrameIndexSDNode
>(Base
)->getIndex();
623 Base
= CurDAG
->getTargetFrameIndex(
624 FI
, TLI
->getPointerTy(CurDAG
->getDataLayout()));
626 OffImm
= CurDAG
->getTargetConstant(RHSC
, SDLoc(N
), MVT::i32
);
633 OffImm
= CurDAG
->getTargetConstant(0, SDLoc(N
), MVT::i32
);
639 bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N
, SDValue
&Base
, SDValue
&Offset
,
641 if (N
.getOpcode() == ISD::MUL
&&
642 ((!Subtarget
->isLikeA9() && !Subtarget
->isSwift()) || N
.hasOneUse())) {
643 if (ConstantSDNode
*RHS
= dyn_cast
<ConstantSDNode
>(N
.getOperand(1))) {
644 // X * [3,5,9] -> X + X * [2,4,8] etc.
645 int RHSC
= (int)RHS
->getZExtValue();
648 ARM_AM::AddrOpc AddSub
= ARM_AM::add
;
650 AddSub
= ARM_AM::sub
;
653 if (isPowerOf2_32(RHSC
)) {
654 unsigned ShAmt
= Log2_32(RHSC
);
655 Base
= Offset
= N
.getOperand(0);
656 Opc
= CurDAG
->getTargetConstant(ARM_AM::getAM2Opc(AddSub
, ShAmt
,
665 if (N
.getOpcode() != ISD::ADD
&& N
.getOpcode() != ISD::SUB
&&
666 // ISD::OR that is equivalent to an ISD::ADD.
667 !CurDAG
->isBaseWithConstantOffset(N
))
670 // Leave simple R +/- imm12 operands for LDRi12
671 if (N
.getOpcode() == ISD::ADD
|| N
.getOpcode() == ISD::OR
) {
673 if (isScaledConstantInRange(N
.getOperand(1), /*Scale=*/1,
674 -0x1000+1, 0x1000, RHSC
)) // 12 bits.
678 // Otherwise this is R +/- [possibly shifted] R.
679 ARM_AM::AddrOpc AddSub
= N
.getOpcode() == ISD::SUB
? ARM_AM::sub
:ARM_AM::add
;
680 ARM_AM::ShiftOpc ShOpcVal
=
681 ARM_AM::getShiftOpcForNode(N
.getOperand(1).getOpcode());
684 Base
= N
.getOperand(0);
685 Offset
= N
.getOperand(1);
687 if (ShOpcVal
!= ARM_AM::no_shift
) {
688 // Check to see if the RHS of the shift is a constant, if not, we can't fold
690 if (ConstantSDNode
*Sh
=
691 dyn_cast
<ConstantSDNode
>(N
.getOperand(1).getOperand(1))) {
692 ShAmt
= Sh
->getZExtValue();
693 if (isShifterOpProfitable(Offset
, ShOpcVal
, ShAmt
))
694 Offset
= N
.getOperand(1).getOperand(0);
697 ShOpcVal
= ARM_AM::no_shift
;
700 ShOpcVal
= ARM_AM::no_shift
;
704 // Try matching (R shl C) + (R).
705 if (N
.getOpcode() != ISD::SUB
&& ShOpcVal
== ARM_AM::no_shift
&&
706 !(Subtarget
->isLikeA9() || Subtarget
->isSwift() ||
707 N
.getOperand(0).hasOneUse())) {
708 ShOpcVal
= ARM_AM::getShiftOpcForNode(N
.getOperand(0).getOpcode());
709 if (ShOpcVal
!= ARM_AM::no_shift
) {
710 // Check to see if the RHS of the shift is a constant, if not, we can't
712 if (ConstantSDNode
*Sh
=
713 dyn_cast
<ConstantSDNode
>(N
.getOperand(0).getOperand(1))) {
714 ShAmt
= Sh
->getZExtValue();
715 if (isShifterOpProfitable(N
.getOperand(0), ShOpcVal
, ShAmt
)) {
716 Offset
= N
.getOperand(0).getOperand(0);
717 Base
= N
.getOperand(1);
720 ShOpcVal
= ARM_AM::no_shift
;
723 ShOpcVal
= ARM_AM::no_shift
;
728 // If Offset is a multiply-by-constant and it's profitable to extract a shift
729 // and use it in a shifted operand do so.
730 if (Offset
.getOpcode() == ISD::MUL
&& N
.hasOneUse()) {
731 unsigned PowerOfTwo
= 0;
733 if (canExtractShiftFromMul(Offset
, 31, PowerOfTwo
, NewMulConst
)) {
734 HandleSDNode
Handle(Offset
);
735 replaceDAGValue(Offset
.getOperand(1), NewMulConst
);
736 Offset
= Handle
.getValue();
738 ShOpcVal
= ARM_AM::lsl
;
742 Opc
= CurDAG
->getTargetConstant(ARM_AM::getAM2Opc(AddSub
, ShAmt
, ShOpcVal
),
747 bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode
*Op
, SDValue N
,
748 SDValue
&Offset
, SDValue
&Opc
) {
749 unsigned Opcode
= Op
->getOpcode();
750 ISD::MemIndexedMode AM
= (Opcode
== ISD::LOAD
)
751 ? cast
<LoadSDNode
>(Op
)->getAddressingMode()
752 : cast
<StoreSDNode
>(Op
)->getAddressingMode();
753 ARM_AM::AddrOpc AddSub
= (AM
== ISD::PRE_INC
|| AM
== ISD::POST_INC
)
754 ? ARM_AM::add
: ARM_AM::sub
;
756 if (isScaledConstantInRange(N
, /*Scale=*/1, 0, 0x1000, Val
))
760 ARM_AM::ShiftOpc ShOpcVal
= ARM_AM::getShiftOpcForNode(N
.getOpcode());
762 if (ShOpcVal
!= ARM_AM::no_shift
) {
763 // Check to see if the RHS of the shift is a constant, if not, we can't fold
765 if (ConstantSDNode
*Sh
= dyn_cast
<ConstantSDNode
>(N
.getOperand(1))) {
766 ShAmt
= Sh
->getZExtValue();
767 if (isShifterOpProfitable(N
, ShOpcVal
, ShAmt
))
768 Offset
= N
.getOperand(0);
771 ShOpcVal
= ARM_AM::no_shift
;
774 ShOpcVal
= ARM_AM::no_shift
;
778 Opc
= CurDAG
->getTargetConstant(ARM_AM::getAM2Opc(AddSub
, ShAmt
, ShOpcVal
),
783 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode
*Op
, SDValue N
,
784 SDValue
&Offset
, SDValue
&Opc
) {
785 unsigned Opcode
= Op
->getOpcode();
786 ISD::MemIndexedMode AM
= (Opcode
== ISD::LOAD
)
787 ? cast
<LoadSDNode
>(Op
)->getAddressingMode()
788 : cast
<StoreSDNode
>(Op
)->getAddressingMode();
789 ARM_AM::AddrOpc AddSub
= (AM
== ISD::PRE_INC
|| AM
== ISD::POST_INC
)
790 ? ARM_AM::add
: ARM_AM::sub
;
792 if (isScaledConstantInRange(N
, /*Scale=*/1, 0, 0x1000, Val
)) { // 12 bits.
793 if (AddSub
== ARM_AM::sub
) Val
*= -1;
794 Offset
= CurDAG
->getRegister(0, MVT::i32
);
795 Opc
= CurDAG
->getTargetConstant(Val
, SDLoc(Op
), MVT::i32
);
803 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode
*Op
, SDValue N
,
804 SDValue
&Offset
, SDValue
&Opc
) {
805 unsigned Opcode
= Op
->getOpcode();
806 ISD::MemIndexedMode AM
= (Opcode
== ISD::LOAD
)
807 ? cast
<LoadSDNode
>(Op
)->getAddressingMode()
808 : cast
<StoreSDNode
>(Op
)->getAddressingMode();
809 ARM_AM::AddrOpc AddSub
= (AM
== ISD::PRE_INC
|| AM
== ISD::POST_INC
)
810 ? ARM_AM::add
: ARM_AM::sub
;
812 if (isScaledConstantInRange(N
, /*Scale=*/1, 0, 0x1000, Val
)) { // 12 bits.
813 Offset
= CurDAG
->getRegister(0, MVT::i32
);
814 Opc
= CurDAG
->getTargetConstant(ARM_AM::getAM2Opc(AddSub
, Val
,
816 SDLoc(Op
), MVT::i32
);
823 bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N
, SDValue
&Base
) {
828 bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N
,
829 SDValue
&Base
, SDValue
&Offset
,
831 if (N
.getOpcode() == ISD::SUB
) {
832 // X - C is canonicalize to X + -C, no need to handle it here.
833 Base
= N
.getOperand(0);
834 Offset
= N
.getOperand(1);
835 Opc
= CurDAG
->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub
, 0), SDLoc(N
),
840 if (!CurDAG
->isBaseWithConstantOffset(N
)) {
842 if (N
.getOpcode() == ISD::FrameIndex
) {
843 int FI
= cast
<FrameIndexSDNode
>(N
)->getIndex();
844 Base
= CurDAG
->getTargetFrameIndex(
845 FI
, TLI
->getPointerTy(CurDAG
->getDataLayout()));
847 Offset
= CurDAG
->getRegister(0, MVT::i32
);
848 Opc
= CurDAG
->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add
, 0), SDLoc(N
),
853 // If the RHS is +/- imm8, fold into addr mode.
855 if (isScaledConstantInRange(N
.getOperand(1), /*Scale=*/1,
856 -256 + 1, 256, RHSC
)) { // 8 bits.
857 Base
= N
.getOperand(0);
858 if (Base
.getOpcode() == ISD::FrameIndex
) {
859 int FI
= cast
<FrameIndexSDNode
>(Base
)->getIndex();
860 Base
= CurDAG
->getTargetFrameIndex(
861 FI
, TLI
->getPointerTy(CurDAG
->getDataLayout()));
863 Offset
= CurDAG
->getRegister(0, MVT::i32
);
865 ARM_AM::AddrOpc AddSub
= ARM_AM::add
;
867 AddSub
= ARM_AM::sub
;
870 Opc
= CurDAG
->getTargetConstant(ARM_AM::getAM3Opc(AddSub
, RHSC
), SDLoc(N
),
875 Base
= N
.getOperand(0);
876 Offset
= N
.getOperand(1);
877 Opc
= CurDAG
->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add
, 0), SDLoc(N
),
882 bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode
*Op
, SDValue N
,
883 SDValue
&Offset
, SDValue
&Opc
) {
884 unsigned Opcode
= Op
->getOpcode();
885 ISD::MemIndexedMode AM
= (Opcode
== ISD::LOAD
)
886 ? cast
<LoadSDNode
>(Op
)->getAddressingMode()
887 : cast
<StoreSDNode
>(Op
)->getAddressingMode();
888 ARM_AM::AddrOpc AddSub
= (AM
== ISD::PRE_INC
|| AM
== ISD::POST_INC
)
889 ? ARM_AM::add
: ARM_AM::sub
;
891 if (isScaledConstantInRange(N
, /*Scale=*/1, 0, 256, Val
)) { // 12 bits.
892 Offset
= CurDAG
->getRegister(0, MVT::i32
);
893 Opc
= CurDAG
->getTargetConstant(ARM_AM::getAM3Opc(AddSub
, Val
), SDLoc(Op
),
899 Opc
= CurDAG
->getTargetConstant(ARM_AM::getAM3Opc(AddSub
, 0), SDLoc(Op
),
904 bool ARMDAGToDAGISel::IsAddressingMode5(SDValue N
, SDValue
&Base
, SDValue
&Offset
,
905 int Lwb
, int Upb
, bool FP16
) {
906 if (!CurDAG
->isBaseWithConstantOffset(N
)) {
908 if (N
.getOpcode() == ISD::FrameIndex
) {
909 int FI
= cast
<FrameIndexSDNode
>(N
)->getIndex();
910 Base
= CurDAG
->getTargetFrameIndex(
911 FI
, TLI
->getPointerTy(CurDAG
->getDataLayout()));
912 } else if (N
.getOpcode() == ARMISD::Wrapper
&&
913 N
.getOperand(0).getOpcode() != ISD::TargetGlobalAddress
&&
914 N
.getOperand(0).getOpcode() != ISD::TargetExternalSymbol
&&
915 N
.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress
) {
916 Base
= N
.getOperand(0);
918 Offset
= CurDAG
->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add
, 0),
923 // If the RHS is +/- imm8, fold into addr mode.
925 const int Scale
= FP16
? 2 : 4;
927 if (isScaledConstantInRange(N
.getOperand(1), Scale
, Lwb
, Upb
, RHSC
)) {
928 Base
= N
.getOperand(0);
929 if (Base
.getOpcode() == ISD::FrameIndex
) {
930 int FI
= cast
<FrameIndexSDNode
>(Base
)->getIndex();
931 Base
= CurDAG
->getTargetFrameIndex(
932 FI
, TLI
->getPointerTy(CurDAG
->getDataLayout()));
935 ARM_AM::AddrOpc AddSub
= ARM_AM::add
;
937 AddSub
= ARM_AM::sub
;
942 Offset
= CurDAG
->getTargetConstant(ARM_AM::getAM5FP16Opc(AddSub
, RHSC
),
945 Offset
= CurDAG
->getTargetConstant(ARM_AM::getAM5Opc(AddSub
, RHSC
),
954 Offset
= CurDAG
->getTargetConstant(ARM_AM::getAM5FP16Opc(ARM_AM::add
, 0),
957 Offset
= CurDAG
->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add
, 0),
963 bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N
,
964 SDValue
&Base
, SDValue
&Offset
) {
967 return IsAddressingMode5(N
, Base
, Offset
, Lwb
, Upb
, /*FP16=*/ false);
970 bool ARMDAGToDAGISel::SelectAddrMode5FP16(SDValue N
,
971 SDValue
&Base
, SDValue
&Offset
) {
974 return IsAddressingMode5(N
, Base
, Offset
, Lwb
, Upb
, /*FP16=*/ true);
977 bool ARMDAGToDAGISel::SelectAddrMode6(SDNode
*Parent
, SDValue N
, SDValue
&Addr
,
981 unsigned Alignment
= 0;
983 MemSDNode
*MemN
= cast
<MemSDNode
>(Parent
);
985 if (isa
<LSBaseSDNode
>(MemN
) ||
986 ((MemN
->getOpcode() == ARMISD::VST1_UPD
||
987 MemN
->getOpcode() == ARMISD::VLD1_UPD
) &&
988 MemN
->getConstantOperandVal(MemN
->getNumOperands() - 1) == 1)) {
989 // This case occurs only for VLD1-lane/dup and VST1-lane instructions.
990 // The maximum alignment is equal to the memory size being referenced.
991 unsigned MMOAlign
= MemN
->getAlignment();
992 unsigned MemSize
= MemN
->getMemoryVT().getSizeInBits() / 8;
993 if (MMOAlign
>= MemSize
&& MemSize
> 1)
996 // All other uses of addrmode6 are for intrinsics. For now just record
997 // the raw alignment value; it will be refined later based on the legal
998 // alignment operands for the intrinsic.
999 Alignment
= MemN
->getAlignment();
1002 Align
= CurDAG
->getTargetConstant(Alignment
, SDLoc(N
), MVT::i32
);
1006 bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode
*Op
, SDValue N
,
1008 LSBaseSDNode
*LdSt
= cast
<LSBaseSDNode
>(Op
);
1009 ISD::MemIndexedMode AM
= LdSt
->getAddressingMode();
1010 if (AM
!= ISD::POST_INC
)
1013 if (ConstantSDNode
*NC
= dyn_cast
<ConstantSDNode
>(N
)) {
1014 if (NC
->getZExtValue() * 8 == LdSt
->getMemoryVT().getSizeInBits())
1015 Offset
= CurDAG
->getRegister(0, MVT::i32
);
1020 bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N
,
1021 SDValue
&Offset
, SDValue
&Label
) {
1022 if (N
.getOpcode() == ARMISD::PIC_ADD
&& N
.hasOneUse()) {
1023 Offset
= N
.getOperand(0);
1024 SDValue N1
= N
.getOperand(1);
1025 Label
= CurDAG
->getTargetConstant(cast
<ConstantSDNode
>(N1
)->getZExtValue(),
1026 SDLoc(N
), MVT::i32
);
1034 //===----------------------------------------------------------------------===//
1035 // Thumb Addressing Modes
1036 //===----------------------------------------------------------------------===//
1038 static bool shouldUseZeroOffsetLdSt(SDValue N
) {
1039 // Negative numbers are difficult to materialise in thumb1. If we are
1040 // selecting the add of a negative, instead try to select ri with a zero
1041 // offset, so create the add node directly which will become a sub.
1042 if (N
.getOpcode() != ISD::ADD
)
1045 // Look for an imm which is not legal for ld/st, but is legal for sub.
1046 if (auto C
= dyn_cast
<ConstantSDNode
>(N
.getOperand(1)))
1047 return C
->getSExtValue() < 0 && C
->getSExtValue() >= -255;
1052 bool ARMDAGToDAGISel::SelectThumbAddrModeRRSext(SDValue N
, SDValue
&Base
,
1054 if (N
.getOpcode() != ISD::ADD
&& !CurDAG
->isBaseWithConstantOffset(N
)) {
1055 ConstantSDNode
*NC
= dyn_cast
<ConstantSDNode
>(N
);
1056 if (!NC
|| !NC
->isNullValue())
1063 Base
= N
.getOperand(0);
1064 Offset
= N
.getOperand(1);
1068 bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N
, SDValue
&Base
,
1070 if (shouldUseZeroOffsetLdSt(N
))
1071 return false; // Select ri instead
1072 return SelectThumbAddrModeRRSext(N
, Base
, Offset
);
1076 ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N
, unsigned Scale
,
1077 SDValue
&Base
, SDValue
&OffImm
) {
1078 if (shouldUseZeroOffsetLdSt(N
)) {
1080 OffImm
= CurDAG
->getTargetConstant(0, SDLoc(N
), MVT::i32
);
1084 if (!CurDAG
->isBaseWithConstantOffset(N
)) {
1085 if (N
.getOpcode() == ISD::ADD
) {
1086 return false; // We want to select register offset instead
1087 } else if (N
.getOpcode() == ARMISD::Wrapper
&&
1088 N
.getOperand(0).getOpcode() != ISD::TargetGlobalAddress
&&
1089 N
.getOperand(0).getOpcode() != ISD::TargetExternalSymbol
&&
1090 N
.getOperand(0).getOpcode() != ISD::TargetConstantPool
&&
1091 N
.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress
) {
1092 Base
= N
.getOperand(0);
1097 OffImm
= CurDAG
->getTargetConstant(0, SDLoc(N
), MVT::i32
);
1101 // If the RHS is + imm5 * scale, fold into addr mode.
1103 if (isScaledConstantInRange(N
.getOperand(1), Scale
, 0, 32, RHSC
)) {
1104 Base
= N
.getOperand(0);
1105 OffImm
= CurDAG
->getTargetConstant(RHSC
, SDLoc(N
), MVT::i32
);
1109 // Offset is too large, so use register offset instead.
1114 ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N
, SDValue
&Base
,
1116 return SelectThumbAddrModeImm5S(N
, 4, Base
, OffImm
);
1120 ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N
, SDValue
&Base
,
1122 return SelectThumbAddrModeImm5S(N
, 2, Base
, OffImm
);
1126 ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N
, SDValue
&Base
,
1128 return SelectThumbAddrModeImm5S(N
, 1, Base
, OffImm
);
1131 bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N
,
1132 SDValue
&Base
, SDValue
&OffImm
) {
1133 if (N
.getOpcode() == ISD::FrameIndex
) {
1134 int FI
= cast
<FrameIndexSDNode
>(N
)->getIndex();
1135 // Only multiples of 4 are allowed for the offset, so the frame object
1136 // alignment must be at least 4.
1137 MachineFrameInfo
&MFI
= MF
->getFrameInfo();
1138 if (MFI
.getObjectAlignment(FI
) < 4)
1139 MFI
.setObjectAlignment(FI
, 4);
1140 Base
= CurDAG
->getTargetFrameIndex(
1141 FI
, TLI
->getPointerTy(CurDAG
->getDataLayout()));
1142 OffImm
= CurDAG
->getTargetConstant(0, SDLoc(N
), MVT::i32
);
1146 if (!CurDAG
->isBaseWithConstantOffset(N
))
1149 RegisterSDNode
*LHSR
= dyn_cast
<RegisterSDNode
>(N
.getOperand(0));
1150 if (N
.getOperand(0).getOpcode() == ISD::FrameIndex
||
1151 (LHSR
&& LHSR
->getReg() == ARM::SP
)) {
1152 // If the RHS is + imm8 * scale, fold into addr mode.
1154 if (isScaledConstantInRange(N
.getOperand(1), /*Scale=*/4, 0, 256, RHSC
)) {
1155 Base
= N
.getOperand(0);
1156 if (Base
.getOpcode() == ISD::FrameIndex
) {
1157 int FI
= cast
<FrameIndexSDNode
>(Base
)->getIndex();
1158 // For LHS+RHS to result in an offset that's a multiple of 4 the object
1159 // indexed by the LHS must be 4-byte aligned.
1160 MachineFrameInfo
&MFI
= MF
->getFrameInfo();
1161 if (MFI
.getObjectAlignment(FI
) < 4)
1162 MFI
.setObjectAlignment(FI
, 4);
1163 Base
= CurDAG
->getTargetFrameIndex(
1164 FI
, TLI
->getPointerTy(CurDAG
->getDataLayout()));
1166 OffImm
= CurDAG
->getTargetConstant(RHSC
, SDLoc(N
), MVT::i32
);
1175 //===----------------------------------------------------------------------===//
1176 // Thumb 2 Addressing Modes
1177 //===----------------------------------------------------------------------===//
1180 bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N
,
1181 SDValue
&Base
, SDValue
&OffImm
) {
1182 // Match simple R + imm12 operands.
1185 if (N
.getOpcode() != ISD::ADD
&& N
.getOpcode() != ISD::SUB
&&
1186 !CurDAG
->isBaseWithConstantOffset(N
)) {
1187 if (N
.getOpcode() == ISD::FrameIndex
) {
1188 // Match frame index.
1189 int FI
= cast
<FrameIndexSDNode
>(N
)->getIndex();
1190 Base
= CurDAG
->getTargetFrameIndex(
1191 FI
, TLI
->getPointerTy(CurDAG
->getDataLayout()));
1192 OffImm
= CurDAG
->getTargetConstant(0, SDLoc(N
), MVT::i32
);
1196 if (N
.getOpcode() == ARMISD::Wrapper
&&
1197 N
.getOperand(0).getOpcode() != ISD::TargetGlobalAddress
&&
1198 N
.getOperand(0).getOpcode() != ISD::TargetExternalSymbol
&&
1199 N
.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress
) {
1200 Base
= N
.getOperand(0);
1201 if (Base
.getOpcode() == ISD::TargetConstantPool
)
1202 return false; // We want to select t2LDRpci instead.
1205 OffImm
= CurDAG
->getTargetConstant(0, SDLoc(N
), MVT::i32
);
1209 if (ConstantSDNode
*RHS
= dyn_cast
<ConstantSDNode
>(N
.getOperand(1))) {
1210 if (SelectT2AddrModeImm8(N
, Base
, OffImm
))
1211 // Let t2LDRi8 handle (R - imm8).
1214 int RHSC
= (int)RHS
->getZExtValue();
1215 if (N
.getOpcode() == ISD::SUB
)
1218 if (RHSC
>= 0 && RHSC
< 0x1000) { // 12 bits (unsigned)
1219 Base
= N
.getOperand(0);
1220 if (Base
.getOpcode() == ISD::FrameIndex
) {
1221 int FI
= cast
<FrameIndexSDNode
>(Base
)->getIndex();
1222 Base
= CurDAG
->getTargetFrameIndex(
1223 FI
, TLI
->getPointerTy(CurDAG
->getDataLayout()));
1225 OffImm
= CurDAG
->getTargetConstant(RHSC
, SDLoc(N
), MVT::i32
);
1232 OffImm
= CurDAG
->getTargetConstant(0, SDLoc(N
), MVT::i32
);
1236 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N
,
1237 SDValue
&Base
, SDValue
&OffImm
) {
1238 // Match simple R - imm8 operands.
1239 if (N
.getOpcode() != ISD::ADD
&& N
.getOpcode() != ISD::SUB
&&
1240 !CurDAG
->isBaseWithConstantOffset(N
))
1243 if (ConstantSDNode
*RHS
= dyn_cast
<ConstantSDNode
>(N
.getOperand(1))) {
1244 int RHSC
= (int)RHS
->getSExtValue();
1245 if (N
.getOpcode() == ISD::SUB
)
1248 if ((RHSC
>= -255) && (RHSC
< 0)) { // 8 bits (always negative)
1249 Base
= N
.getOperand(0);
1250 if (Base
.getOpcode() == ISD::FrameIndex
) {
1251 int FI
= cast
<FrameIndexSDNode
>(Base
)->getIndex();
1252 Base
= CurDAG
->getTargetFrameIndex(
1253 FI
, TLI
->getPointerTy(CurDAG
->getDataLayout()));
1255 OffImm
= CurDAG
->getTargetConstant(RHSC
, SDLoc(N
), MVT::i32
);
1263 bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode
*Op
, SDValue N
,
1265 unsigned Opcode
= Op
->getOpcode();
1266 ISD::MemIndexedMode AM
= (Opcode
== ISD::LOAD
)
1267 ? cast
<LoadSDNode
>(Op
)->getAddressingMode()
1268 : cast
<StoreSDNode
>(Op
)->getAddressingMode();
1270 if (isScaledConstantInRange(N
, /*Scale=*/1, 0, 0x100, RHSC
)) { // 8 bits.
1271 OffImm
= ((AM
== ISD::PRE_INC
) || (AM
== ISD::POST_INC
))
1272 ? CurDAG
->getTargetConstant(RHSC
, SDLoc(N
), MVT::i32
)
1273 : CurDAG
->getTargetConstant(-RHSC
, SDLoc(N
), MVT::i32
);
1280 bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N
,
1282 SDValue
&OffReg
, SDValue
&ShImm
) {
1283 // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12.
1284 if (N
.getOpcode() != ISD::ADD
&& !CurDAG
->isBaseWithConstantOffset(N
))
1287 // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8.
1288 if (ConstantSDNode
*RHS
= dyn_cast
<ConstantSDNode
>(N
.getOperand(1))) {
1289 int RHSC
= (int)RHS
->getZExtValue();
1290 if (RHSC
>= 0 && RHSC
< 0x1000) // 12 bits (unsigned)
1292 else if (RHSC
< 0 && RHSC
>= -255) // 8 bits
1296 // Look for (R + R) or (R + (R << [1,2,3])).
1298 Base
= N
.getOperand(0);
1299 OffReg
= N
.getOperand(1);
1301 // Swap if it is ((R << c) + R).
1302 ARM_AM::ShiftOpc ShOpcVal
= ARM_AM::getShiftOpcForNode(OffReg
.getOpcode());
1303 if (ShOpcVal
!= ARM_AM::lsl
) {
1304 ShOpcVal
= ARM_AM::getShiftOpcForNode(Base
.getOpcode());
1305 if (ShOpcVal
== ARM_AM::lsl
)
1306 std::swap(Base
, OffReg
);
1309 if (ShOpcVal
== ARM_AM::lsl
) {
1310 // Check to see if the RHS of the shift is a constant, if not, we can't fold
1312 if (ConstantSDNode
*Sh
= dyn_cast
<ConstantSDNode
>(OffReg
.getOperand(1))) {
1313 ShAmt
= Sh
->getZExtValue();
1314 if (ShAmt
< 4 && isShifterOpProfitable(OffReg
, ShOpcVal
, ShAmt
))
1315 OffReg
= OffReg
.getOperand(0);
1322 // If OffReg is a multiply-by-constant and it's profitable to extract a shift
1323 // and use it in a shifted operand do so.
1324 if (OffReg
.getOpcode() == ISD::MUL
&& N
.hasOneUse()) {
1325 unsigned PowerOfTwo
= 0;
1326 SDValue NewMulConst
;
1327 if (canExtractShiftFromMul(OffReg
, 3, PowerOfTwo
, NewMulConst
)) {
1328 HandleSDNode
Handle(OffReg
);
1329 replaceDAGValue(OffReg
.getOperand(1), NewMulConst
);
1330 OffReg
= Handle
.getValue();
1335 ShImm
= CurDAG
->getTargetConstant(ShAmt
, SDLoc(N
), MVT::i32
);
1340 bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N
, SDValue
&Base
,
1342 // This *must* succeed since it's used for the irreplaceable ldrex and strex
1345 OffImm
= CurDAG
->getTargetConstant(0, SDLoc(N
), MVT::i32
);
1347 if (N
.getOpcode() != ISD::ADD
|| !CurDAG
->isBaseWithConstantOffset(N
))
1350 ConstantSDNode
*RHS
= dyn_cast
<ConstantSDNode
>(N
.getOperand(1));
1354 uint32_t RHSC
= (int)RHS
->getZExtValue();
1355 if (RHSC
> 1020 || RHSC
% 4 != 0)
1358 Base
= N
.getOperand(0);
1359 if (Base
.getOpcode() == ISD::FrameIndex
) {
1360 int FI
= cast
<FrameIndexSDNode
>(Base
)->getIndex();
1361 Base
= CurDAG
->getTargetFrameIndex(
1362 FI
, TLI
->getPointerTy(CurDAG
->getDataLayout()));
1365 OffImm
= CurDAG
->getTargetConstant(RHSC
/4, SDLoc(N
), MVT::i32
);
1369 //===--------------------------------------------------------------------===//
1371 /// getAL - Returns a ARMCC::AL immediate node.
1372 static inline SDValue
getAL(SelectionDAG
*CurDAG
, const SDLoc
&dl
) {
1373 return CurDAG
->getTargetConstant((uint64_t)ARMCC::AL
, dl
, MVT::i32
);
1376 void ARMDAGToDAGISel::transferMemOperands(SDNode
*N
, SDNode
*Result
) {
1377 MachineMemOperand
*MemOp
= cast
<MemSDNode
>(N
)->getMemOperand();
1378 CurDAG
->setNodeMemRefs(cast
<MachineSDNode
>(Result
), {MemOp
});
1381 bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode
*N
) {
1382 LoadSDNode
*LD
= cast
<LoadSDNode
>(N
);
1383 ISD::MemIndexedMode AM
= LD
->getAddressingMode();
1384 if (AM
== ISD::UNINDEXED
)
1387 EVT LoadedVT
= LD
->getMemoryVT();
1388 SDValue Offset
, AMOpc
;
1389 bool isPre
= (AM
== ISD::PRE_INC
) || (AM
== ISD::PRE_DEC
);
1390 unsigned Opcode
= 0;
1392 if (LoadedVT
== MVT::i32
&& isPre
&&
1393 SelectAddrMode2OffsetImmPre(N
, LD
->getOffset(), Offset
, AMOpc
)) {
1394 Opcode
= ARM::LDR_PRE_IMM
;
1396 } else if (LoadedVT
== MVT::i32
&& !isPre
&&
1397 SelectAddrMode2OffsetImm(N
, LD
->getOffset(), Offset
, AMOpc
)) {
1398 Opcode
= ARM::LDR_POST_IMM
;
1400 } else if (LoadedVT
== MVT::i32
&&
1401 SelectAddrMode2OffsetReg(N
, LD
->getOffset(), Offset
, AMOpc
)) {
1402 Opcode
= isPre
? ARM::LDR_PRE_REG
: ARM::LDR_POST_REG
;
1405 } else if (LoadedVT
== MVT::i16
&&
1406 SelectAddrMode3Offset(N
, LD
->getOffset(), Offset
, AMOpc
)) {
1408 Opcode
= (LD
->getExtensionType() == ISD::SEXTLOAD
)
1409 ? (isPre
? ARM::LDRSH_PRE
: ARM::LDRSH_POST
)
1410 : (isPre
? ARM::LDRH_PRE
: ARM::LDRH_POST
);
1411 } else if (LoadedVT
== MVT::i8
|| LoadedVT
== MVT::i1
) {
1412 if (LD
->getExtensionType() == ISD::SEXTLOAD
) {
1413 if (SelectAddrMode3Offset(N
, LD
->getOffset(), Offset
, AMOpc
)) {
1415 Opcode
= isPre
? ARM::LDRSB_PRE
: ARM::LDRSB_POST
;
1419 SelectAddrMode2OffsetImmPre(N
, LD
->getOffset(), Offset
, AMOpc
)) {
1421 Opcode
= ARM::LDRB_PRE_IMM
;
1422 } else if (!isPre
&&
1423 SelectAddrMode2OffsetImm(N
, LD
->getOffset(), Offset
, AMOpc
)) {
1425 Opcode
= ARM::LDRB_POST_IMM
;
1426 } else if (SelectAddrMode2OffsetReg(N
, LD
->getOffset(), Offset
, AMOpc
)) {
1428 Opcode
= isPre
? ARM::LDRB_PRE_REG
: ARM::LDRB_POST_REG
;
1434 if (Opcode
== ARM::LDR_PRE_IMM
|| Opcode
== ARM::LDRB_PRE_IMM
) {
1435 SDValue Chain
= LD
->getChain();
1436 SDValue Base
= LD
->getBasePtr();
1437 SDValue Ops
[]= { Base
, AMOpc
, getAL(CurDAG
, SDLoc(N
)),
1438 CurDAG
->getRegister(0, MVT::i32
), Chain
};
1439 SDNode
*New
= CurDAG
->getMachineNode(Opcode
, SDLoc(N
), MVT::i32
, MVT::i32
,
1441 transferMemOperands(N
, New
);
1442 ReplaceNode(N
, New
);
1445 SDValue Chain
= LD
->getChain();
1446 SDValue Base
= LD
->getBasePtr();
1447 SDValue Ops
[]= { Base
, Offset
, AMOpc
, getAL(CurDAG
, SDLoc(N
)),
1448 CurDAG
->getRegister(0, MVT::i32
), Chain
};
1449 SDNode
*New
= CurDAG
->getMachineNode(Opcode
, SDLoc(N
), MVT::i32
, MVT::i32
,
1451 transferMemOperands(N
, New
);
1452 ReplaceNode(N
, New
);
1460 bool ARMDAGToDAGISel::tryT1IndexedLoad(SDNode
*N
) {
1461 LoadSDNode
*LD
= cast
<LoadSDNode
>(N
);
1462 EVT LoadedVT
= LD
->getMemoryVT();
1463 ISD::MemIndexedMode AM
= LD
->getAddressingMode();
1464 if (AM
!= ISD::POST_INC
|| LD
->getExtensionType() != ISD::NON_EXTLOAD
||
1465 LoadedVT
.getSimpleVT().SimpleTy
!= MVT::i32
)
1468 auto *COffs
= dyn_cast
<ConstantSDNode
>(LD
->getOffset());
1469 if (!COffs
|| COffs
->getZExtValue() != 4)
1472 // A T1 post-indexed load is just a single register LDM: LDM r0!, {r1}.
1473 // The encoding of LDM is not how the rest of ISel expects a post-inc load to
1474 // look however, so we use a pseudo here and switch it for a tLDMIA_UPD after
1476 SDValue Chain
= LD
->getChain();
1477 SDValue Base
= LD
->getBasePtr();
1478 SDValue Ops
[]= { Base
, getAL(CurDAG
, SDLoc(N
)),
1479 CurDAG
->getRegister(0, MVT::i32
), Chain
};
1480 SDNode
*New
= CurDAG
->getMachineNode(ARM::tLDR_postidx
, SDLoc(N
), MVT::i32
,
1481 MVT::i32
, MVT::Other
, Ops
);
1482 transferMemOperands(N
, New
);
1483 ReplaceNode(N
, New
);
1487 bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode
*N
) {
1488 LoadSDNode
*LD
= cast
<LoadSDNode
>(N
);
1489 ISD::MemIndexedMode AM
= LD
->getAddressingMode();
1490 if (AM
== ISD::UNINDEXED
)
1493 EVT LoadedVT
= LD
->getMemoryVT();
1494 bool isSExtLd
= LD
->getExtensionType() == ISD::SEXTLOAD
;
1496 bool isPre
= (AM
== ISD::PRE_INC
) || (AM
== ISD::PRE_DEC
);
1497 unsigned Opcode
= 0;
1499 if (SelectT2AddrModeImm8Offset(N
, LD
->getOffset(), Offset
)) {
1500 switch (LoadedVT
.getSimpleVT().SimpleTy
) {
1502 Opcode
= isPre
? ARM::t2LDR_PRE
: ARM::t2LDR_POST
;
1506 Opcode
= isPre
? ARM::t2LDRSH_PRE
: ARM::t2LDRSH_POST
;
1508 Opcode
= isPre
? ARM::t2LDRH_PRE
: ARM::t2LDRH_POST
;
1513 Opcode
= isPre
? ARM::t2LDRSB_PRE
: ARM::t2LDRSB_POST
;
1515 Opcode
= isPre
? ARM::t2LDRB_PRE
: ARM::t2LDRB_POST
;
1524 SDValue Chain
= LD
->getChain();
1525 SDValue Base
= LD
->getBasePtr();
1526 SDValue Ops
[]= { Base
, Offset
, getAL(CurDAG
, SDLoc(N
)),
1527 CurDAG
->getRegister(0, MVT::i32
), Chain
};
1528 SDNode
*New
= CurDAG
->getMachineNode(Opcode
, SDLoc(N
), MVT::i32
, MVT::i32
,
1530 transferMemOperands(N
, New
);
1531 ReplaceNode(N
, New
);
1538 /// Form a GPRPair pseudo register from a pair of GPR regs.
1539 SDNode
*ARMDAGToDAGISel::createGPRPairNode(EVT VT
, SDValue V0
, SDValue V1
) {
1540 SDLoc
dl(V0
.getNode());
1542 CurDAG
->getTargetConstant(ARM::GPRPairRegClassID
, dl
, MVT::i32
);
1543 SDValue SubReg0
= CurDAG
->getTargetConstant(ARM::gsub_0
, dl
, MVT::i32
);
1544 SDValue SubReg1
= CurDAG
->getTargetConstant(ARM::gsub_1
, dl
, MVT::i32
);
1545 const SDValue Ops
[] = { RegClass
, V0
, SubReg0
, V1
, SubReg1
};
1546 return CurDAG
->getMachineNode(TargetOpcode::REG_SEQUENCE
, dl
, VT
, Ops
);
1549 /// Form a D register from a pair of S registers.
1550 SDNode
*ARMDAGToDAGISel::createSRegPairNode(EVT VT
, SDValue V0
, SDValue V1
) {
1551 SDLoc
dl(V0
.getNode());
1553 CurDAG
->getTargetConstant(ARM::DPR_VFP2RegClassID
, dl
, MVT::i32
);
1554 SDValue SubReg0
= CurDAG
->getTargetConstant(ARM::ssub_0
, dl
, MVT::i32
);
1555 SDValue SubReg1
= CurDAG
->getTargetConstant(ARM::ssub_1
, dl
, MVT::i32
);
1556 const SDValue Ops
[] = { RegClass
, V0
, SubReg0
, V1
, SubReg1
};
1557 return CurDAG
->getMachineNode(TargetOpcode::REG_SEQUENCE
, dl
, VT
, Ops
);
1560 /// Form a quad register from a pair of D registers.
1561 SDNode
*ARMDAGToDAGISel::createDRegPairNode(EVT VT
, SDValue V0
, SDValue V1
) {
1562 SDLoc
dl(V0
.getNode());
1563 SDValue RegClass
= CurDAG
->getTargetConstant(ARM::QPRRegClassID
, dl
,
1565 SDValue SubReg0
= CurDAG
->getTargetConstant(ARM::dsub_0
, dl
, MVT::i32
);
1566 SDValue SubReg1
= CurDAG
->getTargetConstant(ARM::dsub_1
, dl
, MVT::i32
);
1567 const SDValue Ops
[] = { RegClass
, V0
, SubReg0
, V1
, SubReg1
};
1568 return CurDAG
->getMachineNode(TargetOpcode::REG_SEQUENCE
, dl
, VT
, Ops
);
1571 /// Form 4 consecutive D registers from a pair of Q registers.
1572 SDNode
*ARMDAGToDAGISel::createQRegPairNode(EVT VT
, SDValue V0
, SDValue V1
) {
1573 SDLoc
dl(V0
.getNode());
1574 SDValue RegClass
= CurDAG
->getTargetConstant(ARM::QQPRRegClassID
, dl
,
1576 SDValue SubReg0
= CurDAG
->getTargetConstant(ARM::qsub_0
, dl
, MVT::i32
);
1577 SDValue SubReg1
= CurDAG
->getTargetConstant(ARM::qsub_1
, dl
, MVT::i32
);
1578 const SDValue Ops
[] = { RegClass
, V0
, SubReg0
, V1
, SubReg1
};
1579 return CurDAG
->getMachineNode(TargetOpcode::REG_SEQUENCE
, dl
, VT
, Ops
);
1582 /// Form 4 consecutive S registers.
1583 SDNode
*ARMDAGToDAGISel::createQuadSRegsNode(EVT VT
, SDValue V0
, SDValue V1
,
1584 SDValue V2
, SDValue V3
) {
1585 SDLoc
dl(V0
.getNode());
1587 CurDAG
->getTargetConstant(ARM::QPR_VFP2RegClassID
, dl
, MVT::i32
);
1588 SDValue SubReg0
= CurDAG
->getTargetConstant(ARM::ssub_0
, dl
, MVT::i32
);
1589 SDValue SubReg1
= CurDAG
->getTargetConstant(ARM::ssub_1
, dl
, MVT::i32
);
1590 SDValue SubReg2
= CurDAG
->getTargetConstant(ARM::ssub_2
, dl
, MVT::i32
);
1591 SDValue SubReg3
= CurDAG
->getTargetConstant(ARM::ssub_3
, dl
, MVT::i32
);
1592 const SDValue Ops
[] = { RegClass
, V0
, SubReg0
, V1
, SubReg1
,
1593 V2
, SubReg2
, V3
, SubReg3
};
1594 return CurDAG
->getMachineNode(TargetOpcode::REG_SEQUENCE
, dl
, VT
, Ops
);
1597 /// Form 4 consecutive D registers.
1598 SDNode
*ARMDAGToDAGISel::createQuadDRegsNode(EVT VT
, SDValue V0
, SDValue V1
,
1599 SDValue V2
, SDValue V3
) {
1600 SDLoc
dl(V0
.getNode());
1601 SDValue RegClass
= CurDAG
->getTargetConstant(ARM::QQPRRegClassID
, dl
,
1603 SDValue SubReg0
= CurDAG
->getTargetConstant(ARM::dsub_0
, dl
, MVT::i32
);
1604 SDValue SubReg1
= CurDAG
->getTargetConstant(ARM::dsub_1
, dl
, MVT::i32
);
1605 SDValue SubReg2
= CurDAG
->getTargetConstant(ARM::dsub_2
, dl
, MVT::i32
);
1606 SDValue SubReg3
= CurDAG
->getTargetConstant(ARM::dsub_3
, dl
, MVT::i32
);
1607 const SDValue Ops
[] = { RegClass
, V0
, SubReg0
, V1
, SubReg1
,
1608 V2
, SubReg2
, V3
, SubReg3
};
1609 return CurDAG
->getMachineNode(TargetOpcode::REG_SEQUENCE
, dl
, VT
, Ops
);
1612 /// Form 4 consecutive Q registers.
1613 SDNode
*ARMDAGToDAGISel::createQuadQRegsNode(EVT VT
, SDValue V0
, SDValue V1
,
1614 SDValue V2
, SDValue V3
) {
1615 SDLoc
dl(V0
.getNode());
1616 SDValue RegClass
= CurDAG
->getTargetConstant(ARM::QQQQPRRegClassID
, dl
,
1618 SDValue SubReg0
= CurDAG
->getTargetConstant(ARM::qsub_0
, dl
, MVT::i32
);
1619 SDValue SubReg1
= CurDAG
->getTargetConstant(ARM::qsub_1
, dl
, MVT::i32
);
1620 SDValue SubReg2
= CurDAG
->getTargetConstant(ARM::qsub_2
, dl
, MVT::i32
);
1621 SDValue SubReg3
= CurDAG
->getTargetConstant(ARM::qsub_3
, dl
, MVT::i32
);
1622 const SDValue Ops
[] = { RegClass
, V0
, SubReg0
, V1
, SubReg1
,
1623 V2
, SubReg2
, V3
, SubReg3
};
1624 return CurDAG
->getMachineNode(TargetOpcode::REG_SEQUENCE
, dl
, VT
, Ops
);
1627 /// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
1628 /// of a NEON VLD or VST instruction. The supported values depend on the
1629 /// number of registers being loaded.
1630 SDValue
ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align
, const SDLoc
&dl
,
1631 unsigned NumVecs
, bool is64BitVector
) {
1632 unsigned NumRegs
= NumVecs
;
1633 if (!is64BitVector
&& NumVecs
< 3)
1636 unsigned Alignment
= cast
<ConstantSDNode
>(Align
)->getZExtValue();
1637 if (Alignment
>= 32 && NumRegs
== 4)
1639 else if (Alignment
>= 16 && (NumRegs
== 2 || NumRegs
== 4))
1641 else if (Alignment
>= 8)
1646 return CurDAG
->getTargetConstant(Alignment
, dl
, MVT::i32
);
1649 static bool isVLDfixed(unsigned Opc
)
1652 default: return false;
1653 case ARM::VLD1d8wb_fixed
: return true;
1654 case ARM::VLD1d16wb_fixed
: return true;
1655 case ARM::VLD1d64Qwb_fixed
: return true;
1656 case ARM::VLD1d32wb_fixed
: return true;
1657 case ARM::VLD1d64wb_fixed
: return true;
1658 case ARM::VLD1d64TPseudoWB_fixed
: return true;
1659 case ARM::VLD1d64QPseudoWB_fixed
: return true;
1660 case ARM::VLD1q8wb_fixed
: return true;
1661 case ARM::VLD1q16wb_fixed
: return true;
1662 case ARM::VLD1q32wb_fixed
: return true;
1663 case ARM::VLD1q64wb_fixed
: return true;
1664 case ARM::VLD1DUPd8wb_fixed
: return true;
1665 case ARM::VLD1DUPd16wb_fixed
: return true;
1666 case ARM::VLD1DUPd32wb_fixed
: return true;
1667 case ARM::VLD1DUPq8wb_fixed
: return true;
1668 case ARM::VLD1DUPq16wb_fixed
: return true;
1669 case ARM::VLD1DUPq32wb_fixed
: return true;
1670 case ARM::VLD2d8wb_fixed
: return true;
1671 case ARM::VLD2d16wb_fixed
: return true;
1672 case ARM::VLD2d32wb_fixed
: return true;
1673 case ARM::VLD2q8PseudoWB_fixed
: return true;
1674 case ARM::VLD2q16PseudoWB_fixed
: return true;
1675 case ARM::VLD2q32PseudoWB_fixed
: return true;
1676 case ARM::VLD2DUPd8wb_fixed
: return true;
1677 case ARM::VLD2DUPd16wb_fixed
: return true;
1678 case ARM::VLD2DUPd32wb_fixed
: return true;
1682 static bool isVSTfixed(unsigned Opc
)
1685 default: return false;
1686 case ARM::VST1d8wb_fixed
: return true;
1687 case ARM::VST1d16wb_fixed
: return true;
1688 case ARM::VST1d32wb_fixed
: return true;
1689 case ARM::VST1d64wb_fixed
: return true;
1690 case ARM::VST1q8wb_fixed
: return true;
1691 case ARM::VST1q16wb_fixed
: return true;
1692 case ARM::VST1q32wb_fixed
: return true;
1693 case ARM::VST1q64wb_fixed
: return true;
1694 case ARM::VST1d64TPseudoWB_fixed
: return true;
1695 case ARM::VST1d64QPseudoWB_fixed
: return true;
1696 case ARM::VST2d8wb_fixed
: return true;
1697 case ARM::VST2d16wb_fixed
: return true;
1698 case ARM::VST2d32wb_fixed
: return true;
1699 case ARM::VST2q8PseudoWB_fixed
: return true;
1700 case ARM::VST2q16PseudoWB_fixed
: return true;
1701 case ARM::VST2q32PseudoWB_fixed
: return true;
1705 // Get the register stride update opcode of a VLD/VST instruction that
1706 // is otherwise equivalent to the given fixed stride updating instruction.
1707 static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc
) {
1708 assert((isVLDfixed(Opc
) || isVSTfixed(Opc
))
1709 && "Incorrect fixed stride updating instruction.");
1712 case ARM::VLD1d8wb_fixed
: return ARM::VLD1d8wb_register
;
1713 case ARM::VLD1d16wb_fixed
: return ARM::VLD1d16wb_register
;
1714 case ARM::VLD1d32wb_fixed
: return ARM::VLD1d32wb_register
;
1715 case ARM::VLD1d64wb_fixed
: return ARM::VLD1d64wb_register
;
1716 case ARM::VLD1q8wb_fixed
: return ARM::VLD1q8wb_register
;
1717 case ARM::VLD1q16wb_fixed
: return ARM::VLD1q16wb_register
;
1718 case ARM::VLD1q32wb_fixed
: return ARM::VLD1q32wb_register
;
1719 case ARM::VLD1q64wb_fixed
: return ARM::VLD1q64wb_register
;
1720 case ARM::VLD1d64Twb_fixed
: return ARM::VLD1d64Twb_register
;
1721 case ARM::VLD1d64Qwb_fixed
: return ARM::VLD1d64Qwb_register
;
1722 case ARM::VLD1d64TPseudoWB_fixed
: return ARM::VLD1d64TPseudoWB_register
;
1723 case ARM::VLD1d64QPseudoWB_fixed
: return ARM::VLD1d64QPseudoWB_register
;
1724 case ARM::VLD1DUPd8wb_fixed
: return ARM::VLD1DUPd8wb_register
;
1725 case ARM::VLD1DUPd16wb_fixed
: return ARM::VLD1DUPd16wb_register
;
1726 case ARM::VLD1DUPd32wb_fixed
: return ARM::VLD1DUPd32wb_register
;
1727 case ARM::VLD1DUPq8wb_fixed
: return ARM::VLD1DUPq8wb_register
;
1728 case ARM::VLD1DUPq16wb_fixed
: return ARM::VLD1DUPq16wb_register
;
1729 case ARM::VLD1DUPq32wb_fixed
: return ARM::VLD1DUPq32wb_register
;
1731 case ARM::VST1d8wb_fixed
: return ARM::VST1d8wb_register
;
1732 case ARM::VST1d16wb_fixed
: return ARM::VST1d16wb_register
;
1733 case ARM::VST1d32wb_fixed
: return ARM::VST1d32wb_register
;
1734 case ARM::VST1d64wb_fixed
: return ARM::VST1d64wb_register
;
1735 case ARM::VST1q8wb_fixed
: return ARM::VST1q8wb_register
;
1736 case ARM::VST1q16wb_fixed
: return ARM::VST1q16wb_register
;
1737 case ARM::VST1q32wb_fixed
: return ARM::VST1q32wb_register
;
1738 case ARM::VST1q64wb_fixed
: return ARM::VST1q64wb_register
;
1739 case ARM::VST1d64TPseudoWB_fixed
: return ARM::VST1d64TPseudoWB_register
;
1740 case ARM::VST1d64QPseudoWB_fixed
: return ARM::VST1d64QPseudoWB_register
;
1742 case ARM::VLD2d8wb_fixed
: return ARM::VLD2d8wb_register
;
1743 case ARM::VLD2d16wb_fixed
: return ARM::VLD2d16wb_register
;
1744 case ARM::VLD2d32wb_fixed
: return ARM::VLD2d32wb_register
;
1745 case ARM::VLD2q8PseudoWB_fixed
: return ARM::VLD2q8PseudoWB_register
;
1746 case ARM::VLD2q16PseudoWB_fixed
: return ARM::VLD2q16PseudoWB_register
;
1747 case ARM::VLD2q32PseudoWB_fixed
: return ARM::VLD2q32PseudoWB_register
;
1749 case ARM::VST2d8wb_fixed
: return ARM::VST2d8wb_register
;
1750 case ARM::VST2d16wb_fixed
: return ARM::VST2d16wb_register
;
1751 case ARM::VST2d32wb_fixed
: return ARM::VST2d32wb_register
;
1752 case ARM::VST2q8PseudoWB_fixed
: return ARM::VST2q8PseudoWB_register
;
1753 case ARM::VST2q16PseudoWB_fixed
: return ARM::VST2q16PseudoWB_register
;
1754 case ARM::VST2q32PseudoWB_fixed
: return ARM::VST2q32PseudoWB_register
;
1756 case ARM::VLD2DUPd8wb_fixed
: return ARM::VLD2DUPd8wb_register
;
1757 case ARM::VLD2DUPd16wb_fixed
: return ARM::VLD2DUPd16wb_register
;
1758 case ARM::VLD2DUPd32wb_fixed
: return ARM::VLD2DUPd32wb_register
;
1760 return Opc
; // If not one we handle, return it unchanged.
1763 /// Returns true if the given increment is a Constant known to be equal to the
1764 /// access size performed by a NEON load/store. This means the "[rN]!" form can
1766 static bool isPerfectIncrement(SDValue Inc
, EVT VecTy
, unsigned NumVecs
) {
1767 auto C
= dyn_cast
<ConstantSDNode
>(Inc
);
1768 return C
&& C
->getZExtValue() == VecTy
.getSizeInBits() / 8 * NumVecs
;
1771 void ARMDAGToDAGISel::SelectVLD(SDNode
*N
, bool isUpdating
, unsigned NumVecs
,
1772 const uint16_t *DOpcodes
,
1773 const uint16_t *QOpcodes0
,
1774 const uint16_t *QOpcodes1
) {
1775 assert(NumVecs
>= 1 && NumVecs
<= 4 && "VLD NumVecs out-of-range");
1778 SDValue MemAddr
, Align
;
1779 bool IsIntrinsic
= !isUpdating
; // By coincidence, all supported updating
1780 // nodes are not intrinsics.
1781 unsigned AddrOpIdx
= IsIntrinsic
? 2 : 1;
1782 if (!SelectAddrMode6(N
, N
->getOperand(AddrOpIdx
), MemAddr
, Align
))
1785 SDValue Chain
= N
->getOperand(0);
1786 EVT VT
= N
->getValueType(0);
1787 bool is64BitVector
= VT
.is64BitVector();
1788 Align
= GetVLDSTAlign(Align
, dl
, NumVecs
, is64BitVector
);
1790 unsigned OpcodeIndex
;
1791 switch (VT
.getSimpleVT().SimpleTy
) {
1792 default: llvm_unreachable("unhandled vld type");
1793 // Double-register operations:
1794 case MVT::v8i8
: OpcodeIndex
= 0; break;
1796 case MVT::v4i16
: OpcodeIndex
= 1; break;
1798 case MVT::v2i32
: OpcodeIndex
= 2; break;
1799 case MVT::v1i64
: OpcodeIndex
= 3; break;
1800 // Quad-register operations:
1801 case MVT::v16i8
: OpcodeIndex
= 0; break;
1803 case MVT::v8i16
: OpcodeIndex
= 1; break;
1805 case MVT::v4i32
: OpcodeIndex
= 2; break;
1807 case MVT::v2i64
: OpcodeIndex
= 3; break;
1814 unsigned ResTyElts
= (NumVecs
== 3) ? 4 : NumVecs
;
1817 ResTy
= EVT::getVectorVT(*CurDAG
->getContext(), MVT::i64
, ResTyElts
);
1819 std::vector
<EVT
> ResTys
;
1820 ResTys
.push_back(ResTy
);
1822 ResTys
.push_back(MVT::i32
);
1823 ResTys
.push_back(MVT::Other
);
1825 SDValue Pred
= getAL(CurDAG
, dl
);
1826 SDValue Reg0
= CurDAG
->getRegister(0, MVT::i32
);
1828 SmallVector
<SDValue
, 7> Ops
;
1830 // Double registers and VLD1/VLD2 quad registers are directly supported.
1831 if (is64BitVector
|| NumVecs
<= 2) {
1832 unsigned Opc
= (is64BitVector
? DOpcodes
[OpcodeIndex
] :
1833 QOpcodes0
[OpcodeIndex
]);
1834 Ops
.push_back(MemAddr
);
1835 Ops
.push_back(Align
);
1837 SDValue Inc
= N
->getOperand(AddrOpIdx
+ 1);
1838 bool IsImmUpdate
= isPerfectIncrement(Inc
, VT
, NumVecs
);
1840 // We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
1841 // check for the opcode rather than the number of vector elements.
1842 if (isVLDfixed(Opc
))
1843 Opc
= getVLDSTRegisterUpdateOpcode(Opc
);
1845 // VLD1/VLD2 fixed increment does not need Reg0 so only include it in
1846 // the operands if not such an opcode.
1847 } else if (!isVLDfixed(Opc
))
1848 Ops
.push_back(Reg0
);
1850 Ops
.push_back(Pred
);
1851 Ops
.push_back(Reg0
);
1852 Ops
.push_back(Chain
);
1853 VLd
= CurDAG
->getMachineNode(Opc
, dl
, ResTys
, Ops
);
1856 // Otherwise, quad registers are loaded with two separate instructions,
1857 // where one loads the even registers and the other loads the odd registers.
1858 EVT AddrTy
= MemAddr
.getValueType();
1860 // Load the even subregs. This is always an updating load, so that it
1861 // provides the address to the second load for the odd subregs.
1863 SDValue(CurDAG
->getMachineNode(TargetOpcode::IMPLICIT_DEF
, dl
, ResTy
), 0);
1864 const SDValue OpsA
[] = { MemAddr
, Align
, Reg0
, ImplDef
, Pred
, Reg0
, Chain
};
1865 SDNode
*VLdA
= CurDAG
->getMachineNode(QOpcodes0
[OpcodeIndex
], dl
,
1866 ResTy
, AddrTy
, MVT::Other
, OpsA
);
1867 Chain
= SDValue(VLdA
, 2);
1869 // Load the odd subregs.
1870 Ops
.push_back(SDValue(VLdA
, 1));
1871 Ops
.push_back(Align
);
1873 SDValue Inc
= N
->getOperand(AddrOpIdx
+ 1);
1874 assert(isa
<ConstantSDNode
>(Inc
.getNode()) &&
1875 "only constant post-increment update allowed for VLD3/4");
1877 Ops
.push_back(Reg0
);
1879 Ops
.push_back(SDValue(VLdA
, 0));
1880 Ops
.push_back(Pred
);
1881 Ops
.push_back(Reg0
);
1882 Ops
.push_back(Chain
);
1883 VLd
= CurDAG
->getMachineNode(QOpcodes1
[OpcodeIndex
], dl
, ResTys
, Ops
);
1886 // Transfer memoperands.
1887 MachineMemOperand
*MemOp
= cast
<MemIntrinsicSDNode
>(N
)->getMemOperand();
1888 CurDAG
->setNodeMemRefs(cast
<MachineSDNode
>(VLd
), {MemOp
});
1891 ReplaceNode(N
, VLd
);
1895 // Extract out the subregisters.
1896 SDValue SuperReg
= SDValue(VLd
, 0);
1897 static_assert(ARM::dsub_7
== ARM::dsub_0
+ 7 &&
1898 ARM::qsub_3
== ARM::qsub_0
+ 3,
1899 "Unexpected subreg numbering");
1900 unsigned Sub0
= (is64BitVector
? ARM::dsub_0
: ARM::qsub_0
);
1901 for (unsigned Vec
= 0; Vec
< NumVecs
; ++Vec
)
1902 ReplaceUses(SDValue(N
, Vec
),
1903 CurDAG
->getTargetExtractSubreg(Sub0
+ Vec
, dl
, VT
, SuperReg
));
1904 ReplaceUses(SDValue(N
, NumVecs
), SDValue(VLd
, 1));
1906 ReplaceUses(SDValue(N
, NumVecs
+ 1), SDValue(VLd
, 2));
1907 CurDAG
->RemoveDeadNode(N
);
1910 void ARMDAGToDAGISel::SelectVST(SDNode
*N
, bool isUpdating
, unsigned NumVecs
,
1911 const uint16_t *DOpcodes
,
1912 const uint16_t *QOpcodes0
,
1913 const uint16_t *QOpcodes1
) {
1914 assert(NumVecs
>= 1 && NumVecs
<= 4 && "VST NumVecs out-of-range");
1917 SDValue MemAddr
, Align
;
1918 bool IsIntrinsic
= !isUpdating
; // By coincidence, all supported updating
1919 // nodes are not intrinsics.
1920 unsigned AddrOpIdx
= IsIntrinsic
? 2 : 1;
1921 unsigned Vec0Idx
= 3; // AddrOpIdx + (isUpdating ? 2 : 1)
1922 if (!SelectAddrMode6(N
, N
->getOperand(AddrOpIdx
), MemAddr
, Align
))
1925 MachineMemOperand
*MemOp
= cast
<MemIntrinsicSDNode
>(N
)->getMemOperand();
1927 SDValue Chain
= N
->getOperand(0);
1928 EVT VT
= N
->getOperand(Vec0Idx
).getValueType();
1929 bool is64BitVector
= VT
.is64BitVector();
1930 Align
= GetVLDSTAlign(Align
, dl
, NumVecs
, is64BitVector
);
1932 unsigned OpcodeIndex
;
1933 switch (VT
.getSimpleVT().SimpleTy
) {
1934 default: llvm_unreachable("unhandled vst type");
1935 // Double-register operations:
1936 case MVT::v8i8
: OpcodeIndex
= 0; break;
1938 case MVT::v4i16
: OpcodeIndex
= 1; break;
1940 case MVT::v2i32
: OpcodeIndex
= 2; break;
1941 case MVT::v1i64
: OpcodeIndex
= 3; break;
1942 // Quad-register operations:
1943 case MVT::v16i8
: OpcodeIndex
= 0; break;
1945 case MVT::v8i16
: OpcodeIndex
= 1; break;
1947 case MVT::v4i32
: OpcodeIndex
= 2; break;
1949 case MVT::v2i64
: OpcodeIndex
= 3; break;
1952 std::vector
<EVT
> ResTys
;
1954 ResTys
.push_back(MVT::i32
);
1955 ResTys
.push_back(MVT::Other
);
1957 SDValue Pred
= getAL(CurDAG
, dl
);
1958 SDValue Reg0
= CurDAG
->getRegister(0, MVT::i32
);
1959 SmallVector
<SDValue
, 7> Ops
;
1961 // Double registers and VST1/VST2 quad registers are directly supported.
1962 if (is64BitVector
|| NumVecs
<= 2) {
1965 SrcReg
= N
->getOperand(Vec0Idx
);
1966 } else if (is64BitVector
) {
1967 // Form a REG_SEQUENCE to force register allocation.
1968 SDValue V0
= N
->getOperand(Vec0Idx
+ 0);
1969 SDValue V1
= N
->getOperand(Vec0Idx
+ 1);
1971 SrcReg
= SDValue(createDRegPairNode(MVT::v2i64
, V0
, V1
), 0);
1973 SDValue V2
= N
->getOperand(Vec0Idx
+ 2);
1974 // If it's a vst3, form a quad D-register and leave the last part as
1976 SDValue V3
= (NumVecs
== 3)
1977 ? SDValue(CurDAG
->getMachineNode(TargetOpcode::IMPLICIT_DEF
,dl
,VT
), 0)
1978 : N
->getOperand(Vec0Idx
+ 3);
1979 SrcReg
= SDValue(createQuadDRegsNode(MVT::v4i64
, V0
, V1
, V2
, V3
), 0);
1982 // Form a QQ register.
1983 SDValue Q0
= N
->getOperand(Vec0Idx
);
1984 SDValue Q1
= N
->getOperand(Vec0Idx
+ 1);
1985 SrcReg
= SDValue(createQRegPairNode(MVT::v4i64
, Q0
, Q1
), 0);
1988 unsigned Opc
= (is64BitVector
? DOpcodes
[OpcodeIndex
] :
1989 QOpcodes0
[OpcodeIndex
]);
1990 Ops
.push_back(MemAddr
);
1991 Ops
.push_back(Align
);
1993 SDValue Inc
= N
->getOperand(AddrOpIdx
+ 1);
1994 bool IsImmUpdate
= isPerfectIncrement(Inc
, VT
, NumVecs
);
1996 // We use a VST1 for v1i64 even if the pseudo says VST2/3/4, so
1997 // check for the opcode rather than the number of vector elements.
1998 if (isVSTfixed(Opc
))
1999 Opc
= getVLDSTRegisterUpdateOpcode(Opc
);
2002 // VST1/VST2 fixed increment does not need Reg0 so only include it in
2003 // the operands if not such an opcode.
2004 else if (!isVSTfixed(Opc
))
2005 Ops
.push_back(Reg0
);
2007 Ops
.push_back(SrcReg
);
2008 Ops
.push_back(Pred
);
2009 Ops
.push_back(Reg0
);
2010 Ops
.push_back(Chain
);
2011 SDNode
*VSt
= CurDAG
->getMachineNode(Opc
, dl
, ResTys
, Ops
);
2013 // Transfer memoperands.
2014 CurDAG
->setNodeMemRefs(cast
<MachineSDNode
>(VSt
), {MemOp
});
2016 ReplaceNode(N
, VSt
);
2020 // Otherwise, quad registers are stored with two separate instructions,
2021 // where one stores the even registers and the other stores the odd registers.
2023 // Form the QQQQ REG_SEQUENCE.
2024 SDValue V0
= N
->getOperand(Vec0Idx
+ 0);
2025 SDValue V1
= N
->getOperand(Vec0Idx
+ 1);
2026 SDValue V2
= N
->getOperand(Vec0Idx
+ 2);
2027 SDValue V3
= (NumVecs
== 3)
2028 ? SDValue(CurDAG
->getMachineNode(TargetOpcode::IMPLICIT_DEF
, dl
, VT
), 0)
2029 : N
->getOperand(Vec0Idx
+ 3);
2030 SDValue RegSeq
= SDValue(createQuadQRegsNode(MVT::v8i64
, V0
, V1
, V2
, V3
), 0);
2032 // Store the even D registers. This is always an updating store, so that it
2033 // provides the address to the second store for the odd subregs.
2034 const SDValue OpsA
[] = { MemAddr
, Align
, Reg0
, RegSeq
, Pred
, Reg0
, Chain
};
2035 SDNode
*VStA
= CurDAG
->getMachineNode(QOpcodes0
[OpcodeIndex
], dl
,
2036 MemAddr
.getValueType(),
2038 CurDAG
->setNodeMemRefs(cast
<MachineSDNode
>(VStA
), {MemOp
});
2039 Chain
= SDValue(VStA
, 1);
2041 // Store the odd D registers.
2042 Ops
.push_back(SDValue(VStA
, 0));
2043 Ops
.push_back(Align
);
2045 SDValue Inc
= N
->getOperand(AddrOpIdx
+ 1);
2046 assert(isa
<ConstantSDNode
>(Inc
.getNode()) &&
2047 "only constant post-increment update allowed for VST3/4");
2049 Ops
.push_back(Reg0
);
2051 Ops
.push_back(RegSeq
);
2052 Ops
.push_back(Pred
);
2053 Ops
.push_back(Reg0
);
2054 Ops
.push_back(Chain
);
2055 SDNode
*VStB
= CurDAG
->getMachineNode(QOpcodes1
[OpcodeIndex
], dl
, ResTys
,
2057 CurDAG
->setNodeMemRefs(cast
<MachineSDNode
>(VStB
), {MemOp
});
2058 ReplaceNode(N
, VStB
);
2061 void ARMDAGToDAGISel::SelectVLDSTLane(SDNode
*N
, bool IsLoad
, bool isUpdating
,
2063 const uint16_t *DOpcodes
,
2064 const uint16_t *QOpcodes
) {
2065 assert(NumVecs
>=2 && NumVecs
<= 4 && "VLDSTLane NumVecs out-of-range");
2068 SDValue MemAddr
, Align
;
2069 bool IsIntrinsic
= !isUpdating
; // By coincidence, all supported updating
2070 // nodes are not intrinsics.
2071 unsigned AddrOpIdx
= IsIntrinsic
? 2 : 1;
2072 unsigned Vec0Idx
= 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2073 if (!SelectAddrMode6(N
, N
->getOperand(AddrOpIdx
), MemAddr
, Align
))
2076 MachineMemOperand
*MemOp
= cast
<MemIntrinsicSDNode
>(N
)->getMemOperand();
2078 SDValue Chain
= N
->getOperand(0);
2080 cast
<ConstantSDNode
>(N
->getOperand(Vec0Idx
+ NumVecs
))->getZExtValue();
2081 EVT VT
= N
->getOperand(Vec0Idx
).getValueType();
2082 bool is64BitVector
= VT
.is64BitVector();
2084 unsigned Alignment
= 0;
2086 Alignment
= cast
<ConstantSDNode
>(Align
)->getZExtValue();
2087 unsigned NumBytes
= NumVecs
* VT
.getScalarSizeInBits() / 8;
2088 if (Alignment
> NumBytes
)
2089 Alignment
= NumBytes
;
2090 if (Alignment
< 8 && Alignment
< NumBytes
)
2092 // Alignment must be a power of two; make sure of that.
2093 Alignment
= (Alignment
& -Alignment
);
2097 Align
= CurDAG
->getTargetConstant(Alignment
, dl
, MVT::i32
);
2099 unsigned OpcodeIndex
;
2100 switch (VT
.getSimpleVT().SimpleTy
) {
2101 default: llvm_unreachable("unhandled vld/vst lane type");
2102 // Double-register operations:
2103 case MVT::v8i8
: OpcodeIndex
= 0; break;
2104 case MVT::v4i16
: OpcodeIndex
= 1; break;
2106 case MVT::v2i32
: OpcodeIndex
= 2; break;
2107 // Quad-register operations:
2108 case MVT::v8i16
: OpcodeIndex
= 0; break;
2110 case MVT::v4i32
: OpcodeIndex
= 1; break;
2113 std::vector
<EVT
> ResTys
;
2115 unsigned ResTyElts
= (NumVecs
== 3) ? 4 : NumVecs
;
2118 ResTys
.push_back(EVT::getVectorVT(*CurDAG
->getContext(),
2119 MVT::i64
, ResTyElts
));
2122 ResTys
.push_back(MVT::i32
);
2123 ResTys
.push_back(MVT::Other
);
2125 SDValue Pred
= getAL(CurDAG
, dl
);
2126 SDValue Reg0
= CurDAG
->getRegister(0, MVT::i32
);
2128 SmallVector
<SDValue
, 8> Ops
;
2129 Ops
.push_back(MemAddr
);
2130 Ops
.push_back(Align
);
2132 SDValue Inc
= N
->getOperand(AddrOpIdx
+ 1);
2134 isPerfectIncrement(Inc
, VT
.getVectorElementType(), NumVecs
);
2135 Ops
.push_back(IsImmUpdate
? Reg0
: Inc
);
2139 SDValue V0
= N
->getOperand(Vec0Idx
+ 0);
2140 SDValue V1
= N
->getOperand(Vec0Idx
+ 1);
2143 SuperReg
= SDValue(createDRegPairNode(MVT::v2i64
, V0
, V1
), 0);
2145 SuperReg
= SDValue(createQRegPairNode(MVT::v4i64
, V0
, V1
), 0);
2147 SDValue V2
= N
->getOperand(Vec0Idx
+ 2);
2148 SDValue V3
= (NumVecs
== 3)
2149 ? SDValue(CurDAG
->getMachineNode(TargetOpcode::IMPLICIT_DEF
, dl
, VT
), 0)
2150 : N
->getOperand(Vec0Idx
+ 3);
2152 SuperReg
= SDValue(createQuadDRegsNode(MVT::v4i64
, V0
, V1
, V2
, V3
), 0);
2154 SuperReg
= SDValue(createQuadQRegsNode(MVT::v8i64
, V0
, V1
, V2
, V3
), 0);
2156 Ops
.push_back(SuperReg
);
2157 Ops
.push_back(getI32Imm(Lane
, dl
));
2158 Ops
.push_back(Pred
);
2159 Ops
.push_back(Reg0
);
2160 Ops
.push_back(Chain
);
2162 unsigned Opc
= (is64BitVector
? DOpcodes
[OpcodeIndex
] :
2163 QOpcodes
[OpcodeIndex
]);
2164 SDNode
*VLdLn
= CurDAG
->getMachineNode(Opc
, dl
, ResTys
, Ops
);
2165 CurDAG
->setNodeMemRefs(cast
<MachineSDNode
>(VLdLn
), {MemOp
});
2167 ReplaceNode(N
, VLdLn
);
2171 // Extract the subregisters.
2172 SuperReg
= SDValue(VLdLn
, 0);
2173 static_assert(ARM::dsub_7
== ARM::dsub_0
+ 7 &&
2174 ARM::qsub_3
== ARM::qsub_0
+ 3,
2175 "Unexpected subreg numbering");
2176 unsigned Sub0
= is64BitVector
? ARM::dsub_0
: ARM::qsub_0
;
2177 for (unsigned Vec
= 0; Vec
< NumVecs
; ++Vec
)
2178 ReplaceUses(SDValue(N
, Vec
),
2179 CurDAG
->getTargetExtractSubreg(Sub0
+ Vec
, dl
, VT
, SuperReg
));
2180 ReplaceUses(SDValue(N
, NumVecs
), SDValue(VLdLn
, 1));
2182 ReplaceUses(SDValue(N
, NumVecs
+ 1), SDValue(VLdLn
, 2));
2183 CurDAG
->RemoveDeadNode(N
);
2186 void ARMDAGToDAGISel::SelectVLDDup(SDNode
*N
, bool IsIntrinsic
,
2187 bool isUpdating
, unsigned NumVecs
,
2188 const uint16_t *DOpcodes
,
2189 const uint16_t *QOpcodes0
,
2190 const uint16_t *QOpcodes1
) {
2191 assert(NumVecs
>= 1 && NumVecs
<= 4 && "VLDDup NumVecs out-of-range");
2194 SDValue MemAddr
, Align
;
2195 unsigned AddrOpIdx
= IsIntrinsic
? 2 : 1;
2196 if (!SelectAddrMode6(N
, N
->getOperand(AddrOpIdx
), MemAddr
, Align
))
2199 SDValue Chain
= N
->getOperand(0);
2200 EVT VT
= N
->getValueType(0);
2201 bool is64BitVector
= VT
.is64BitVector();
2203 unsigned Alignment
= 0;
2205 Alignment
= cast
<ConstantSDNode
>(Align
)->getZExtValue();
2206 unsigned NumBytes
= NumVecs
* VT
.getScalarSizeInBits() / 8;
2207 if (Alignment
> NumBytes
)
2208 Alignment
= NumBytes
;
2209 if (Alignment
< 8 && Alignment
< NumBytes
)
2211 // Alignment must be a power of two; make sure of that.
2212 Alignment
= (Alignment
& -Alignment
);
2216 Align
= CurDAG
->getTargetConstant(Alignment
, dl
, MVT::i32
);
2218 unsigned OpcodeIndex
;
2219 switch (VT
.getSimpleVT().SimpleTy
) {
2220 default: llvm_unreachable("unhandled vld-dup type");
2222 case MVT::v16i8
: OpcodeIndex
= 0; break;
2224 case MVT::v8i16
: OpcodeIndex
= 1; break;
2228 case MVT::v4i32
: OpcodeIndex
= 2; break;
2230 case MVT::v1i64
: OpcodeIndex
= 3; break;
2233 unsigned ResTyElts
= (NumVecs
== 3) ? 4 : NumVecs
;
2236 EVT ResTy
= EVT::getVectorVT(*CurDAG
->getContext(), MVT::i64
, ResTyElts
);
2238 std::vector
<EVT
> ResTys
;
2239 ResTys
.push_back(ResTy
);
2241 ResTys
.push_back(MVT::i32
);
2242 ResTys
.push_back(MVT::Other
);
2244 SDValue Pred
= getAL(CurDAG
, dl
);
2245 SDValue Reg0
= CurDAG
->getRegister(0, MVT::i32
);
2248 if (is64BitVector
|| NumVecs
== 1) {
2249 SmallVector
<SDValue
, 6> Ops
;
2250 Ops
.push_back(MemAddr
);
2251 Ops
.push_back(Align
);
2252 unsigned Opc
= is64BitVector
? DOpcodes
[OpcodeIndex
] :
2253 QOpcodes0
[OpcodeIndex
];
2255 // fixed-stride update instructions don't have an explicit writeback
2256 // operand. It's implicit in the opcode itself.
2257 SDValue Inc
= N
->getOperand(2);
2259 isPerfectIncrement(Inc
, VT
.getVectorElementType(), NumVecs
);
2260 if (NumVecs
<= 2 && !IsImmUpdate
)
2261 Opc
= getVLDSTRegisterUpdateOpcode(Opc
);
2264 // FIXME: VLD3 and VLD4 haven't been updated to that form yet.
2265 else if (NumVecs
> 2)
2266 Ops
.push_back(Reg0
);
2268 Ops
.push_back(Pred
);
2269 Ops
.push_back(Reg0
);
2270 Ops
.push_back(Chain
);
2271 VLdDup
= CurDAG
->getMachineNode(Opc
, dl
, ResTys
, Ops
);
2272 } else if (NumVecs
== 2) {
2273 const SDValue OpsA
[] = { MemAddr
, Align
, Pred
, Reg0
, Chain
};
2274 SDNode
*VLdA
= CurDAG
->getMachineNode(QOpcodes0
[OpcodeIndex
],
2277 Chain
= SDValue(VLdA
, 1);
2278 const SDValue OpsB
[] = { MemAddr
, Align
, Pred
, Reg0
, Chain
};
2279 VLdDup
= CurDAG
->getMachineNode(QOpcodes1
[OpcodeIndex
], dl
, ResTys
, OpsB
);
2282 SDValue(CurDAG
->getMachineNode(TargetOpcode::IMPLICIT_DEF
, dl
, ResTy
), 0);
2283 const SDValue OpsA
[] = { MemAddr
, Align
, ImplDef
, Pred
, Reg0
, Chain
};
2284 SDNode
*VLdA
= CurDAG
->getMachineNode(QOpcodes0
[OpcodeIndex
],
2287 SDValue SuperReg
= SDValue(VLdA
, 0);
2288 Chain
= SDValue(VLdA
, 1);
2289 const SDValue OpsB
[] = { MemAddr
, Align
, SuperReg
, Pred
, Reg0
, Chain
};
2290 VLdDup
= CurDAG
->getMachineNode(QOpcodes1
[OpcodeIndex
], dl
, ResTys
, OpsB
);
2293 // Transfer memoperands.
2294 MachineMemOperand
*MemOp
= cast
<MemIntrinsicSDNode
>(N
)->getMemOperand();
2295 CurDAG
->setNodeMemRefs(cast
<MachineSDNode
>(VLdDup
), {MemOp
});
2297 // Extract the subregisters.
2299 ReplaceUses(SDValue(N
, 0), SDValue(VLdDup
, 0));
2301 SDValue SuperReg
= SDValue(VLdDup
, 0);
2302 static_assert(ARM::dsub_7
== ARM::dsub_0
+ 7, "Unexpected subreg numbering");
2303 unsigned SubIdx
= is64BitVector
? ARM::dsub_0
: ARM::qsub_0
;
2304 for (unsigned Vec
= 0; Vec
!= NumVecs
; ++Vec
) {
2305 ReplaceUses(SDValue(N
, Vec
),
2306 CurDAG
->getTargetExtractSubreg(SubIdx
+Vec
, dl
, VT
, SuperReg
));
2309 ReplaceUses(SDValue(N
, NumVecs
), SDValue(VLdDup
, 1));
2311 ReplaceUses(SDValue(N
, NumVecs
+ 1), SDValue(VLdDup
, 2));
2312 CurDAG
->RemoveDeadNode(N
);
2315 bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode
*N
, bool isSigned
) {
2316 if (!Subtarget
->hasV6T2Ops())
2319 unsigned Opc
= isSigned
2320 ? (Subtarget
->isThumb() ? ARM::t2SBFX
: ARM::SBFX
)
2321 : (Subtarget
->isThumb() ? ARM::t2UBFX
: ARM::UBFX
);
2324 // For unsigned extracts, check for a shift right and mask
2325 unsigned And_imm
= 0;
2326 if (N
->getOpcode() == ISD::AND
) {
2327 if (isOpcWithIntImmediate(N
, ISD::AND
, And_imm
)) {
2329 // The immediate is a mask of the low bits iff imm & (imm+1) == 0
2330 if (And_imm
& (And_imm
+ 1))
2333 unsigned Srl_imm
= 0;
2334 if (isOpcWithIntImmediate(N
->getOperand(0).getNode(), ISD::SRL
,
2336 assert(Srl_imm
> 0 && Srl_imm
< 32 && "bad amount in shift node!");
2338 // Mask off the unnecessary bits of the AND immediate; normally
2339 // DAGCombine will do this, but that might not happen if
2340 // targetShrinkDemandedConstant chooses a different immediate.
2341 And_imm
&= -1U >> Srl_imm
;
2343 // Note: The width operand is encoded as width-1.
2344 unsigned Width
= countTrailingOnes(And_imm
) - 1;
2345 unsigned LSB
= Srl_imm
;
2347 SDValue Reg0
= CurDAG
->getRegister(0, MVT::i32
);
2349 if ((LSB
+ Width
+ 1) == N
->getValueType(0).getSizeInBits()) {
2350 // It's cheaper to use a right shift to extract the top bits.
2351 if (Subtarget
->isThumb()) {
2352 Opc
= isSigned
? ARM::t2ASRri
: ARM::t2LSRri
;
2353 SDValue Ops
[] = { N
->getOperand(0).getOperand(0),
2354 CurDAG
->getTargetConstant(LSB
, dl
, MVT::i32
),
2355 getAL(CurDAG
, dl
), Reg0
, Reg0
};
2356 CurDAG
->SelectNodeTo(N
, Opc
, MVT::i32
, Ops
);
2360 // ARM models shift instructions as MOVsi with shifter operand.
2361 ARM_AM::ShiftOpc ShOpcVal
= ARM_AM::getShiftOpcForNode(ISD::SRL
);
2363 CurDAG
->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal
, LSB
), dl
,
2365 SDValue Ops
[] = { N
->getOperand(0).getOperand(0), ShOpc
,
2366 getAL(CurDAG
, dl
), Reg0
, Reg0
};
2367 CurDAG
->SelectNodeTo(N
, ARM::MOVsi
, MVT::i32
, Ops
);
2371 assert(LSB
+ Width
+ 1 <= 32 && "Shouldn't create an invalid ubfx");
2372 SDValue Ops
[] = { N
->getOperand(0).getOperand(0),
2373 CurDAG
->getTargetConstant(LSB
, dl
, MVT::i32
),
2374 CurDAG
->getTargetConstant(Width
, dl
, MVT::i32
),
2375 getAL(CurDAG
, dl
), Reg0
};
2376 CurDAG
->SelectNodeTo(N
, Opc
, MVT::i32
, Ops
);
2383 // Otherwise, we're looking for a shift of a shift
2384 unsigned Shl_imm
= 0;
2385 if (isOpcWithIntImmediate(N
->getOperand(0).getNode(), ISD::SHL
, Shl_imm
)) {
2386 assert(Shl_imm
> 0 && Shl_imm
< 32 && "bad amount in shift node!");
2387 unsigned Srl_imm
= 0;
2388 if (isInt32Immediate(N
->getOperand(1), Srl_imm
)) {
2389 assert(Srl_imm
> 0 && Srl_imm
< 32 && "bad amount in shift node!");
2390 // Note: The width operand is encoded as width-1.
2391 unsigned Width
= 32 - Srl_imm
- 1;
2392 int LSB
= Srl_imm
- Shl_imm
;
2395 SDValue Reg0
= CurDAG
->getRegister(0, MVT::i32
);
2396 assert(LSB
+ Width
+ 1 <= 32 && "Shouldn't create an invalid ubfx");
2397 SDValue Ops
[] = { N
->getOperand(0).getOperand(0),
2398 CurDAG
->getTargetConstant(LSB
, dl
, MVT::i32
),
2399 CurDAG
->getTargetConstant(Width
, dl
, MVT::i32
),
2400 getAL(CurDAG
, dl
), Reg0
};
2401 CurDAG
->SelectNodeTo(N
, Opc
, MVT::i32
, Ops
);
2406 // Or we are looking for a shift of an and, with a mask operand
2407 if (isOpcWithIntImmediate(N
->getOperand(0).getNode(), ISD::AND
, And_imm
) &&
2408 isShiftedMask_32(And_imm
)) {
2409 unsigned Srl_imm
= 0;
2410 unsigned LSB
= countTrailingZeros(And_imm
);
2411 // Shift must be the same as the ands lsb
2412 if (isInt32Immediate(N
->getOperand(1), Srl_imm
) && Srl_imm
== LSB
) {
2413 assert(Srl_imm
> 0 && Srl_imm
< 32 && "bad amount in shift node!");
2414 unsigned MSB
= 31 - countLeadingZeros(And_imm
);
2415 // Note: The width operand is encoded as width-1.
2416 unsigned Width
= MSB
- LSB
;
2417 SDValue Reg0
= CurDAG
->getRegister(0, MVT::i32
);
2418 assert(Srl_imm
+ Width
+ 1 <= 32 && "Shouldn't create an invalid ubfx");
2419 SDValue Ops
[] = { N
->getOperand(0).getOperand(0),
2420 CurDAG
->getTargetConstant(Srl_imm
, dl
, MVT::i32
),
2421 CurDAG
->getTargetConstant(Width
, dl
, MVT::i32
),
2422 getAL(CurDAG
, dl
), Reg0
};
2423 CurDAG
->SelectNodeTo(N
, Opc
, MVT::i32
, Ops
);
2428 if (N
->getOpcode() == ISD::SIGN_EXTEND_INREG
) {
2429 unsigned Width
= cast
<VTSDNode
>(N
->getOperand(1))->getVT().getSizeInBits();
2431 if (!isOpcWithIntImmediate(N
->getOperand(0).getNode(), ISD::SRL
, LSB
) &&
2432 !isOpcWithIntImmediate(N
->getOperand(0).getNode(), ISD::SRA
, LSB
))
2435 if (LSB
+ Width
> 32)
2438 SDValue Reg0
= CurDAG
->getRegister(0, MVT::i32
);
2439 assert(LSB
+ Width
<= 32 && "Shouldn't create an invalid ubfx");
2440 SDValue Ops
[] = { N
->getOperand(0).getOperand(0),
2441 CurDAG
->getTargetConstant(LSB
, dl
, MVT::i32
),
2442 CurDAG
->getTargetConstant(Width
- 1, dl
, MVT::i32
),
2443 getAL(CurDAG
, dl
), Reg0
};
2444 CurDAG
->SelectNodeTo(N
, Opc
, MVT::i32
, Ops
);
2451 /// Target-specific DAG combining for ISD::XOR.
2452 /// Target-independent combining lowers SELECT_CC nodes of the form
2453 /// select_cc setg[ge] X, 0, X, -X
2454 /// select_cc setgt X, -1, X, -X
2455 /// select_cc setl[te] X, 0, -X, X
2456 /// select_cc setlt X, 1, -X, X
2457 /// which represent Integer ABS into:
2458 /// Y = sra (X, size(X)-1); xor (add (X, Y), Y)
2459 /// ARM instruction selection detects the latter and matches it to
2460 /// ARM::ABS or ARM::t2ABS machine node.
2461 bool ARMDAGToDAGISel::tryABSOp(SDNode
*N
){
2462 SDValue XORSrc0
= N
->getOperand(0);
2463 SDValue XORSrc1
= N
->getOperand(1);
2464 EVT VT
= N
->getValueType(0);
2466 if (Subtarget
->isThumb1Only())
2469 if (XORSrc0
.getOpcode() != ISD::ADD
|| XORSrc1
.getOpcode() != ISD::SRA
)
2472 SDValue ADDSrc0
= XORSrc0
.getOperand(0);
2473 SDValue ADDSrc1
= XORSrc0
.getOperand(1);
2474 SDValue SRASrc0
= XORSrc1
.getOperand(0);
2475 SDValue SRASrc1
= XORSrc1
.getOperand(1);
2476 ConstantSDNode
*SRAConstant
= dyn_cast
<ConstantSDNode
>(SRASrc1
);
2477 EVT XType
= SRASrc0
.getValueType();
2478 unsigned Size
= XType
.getSizeInBits() - 1;
2480 if (ADDSrc1
== XORSrc1
&& ADDSrc0
== SRASrc0
&&
2481 XType
.isInteger() && SRAConstant
!= nullptr &&
2482 Size
== SRAConstant
->getZExtValue()) {
2483 unsigned Opcode
= Subtarget
->isThumb2() ? ARM::t2ABS
: ARM::ABS
;
2484 CurDAG
->SelectNodeTo(N
, Opcode
, VT
, ADDSrc0
);
2491 /// We've got special pseudo-instructions for these
2492 void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode
*N
) {
2494 EVT MemTy
= cast
<MemSDNode
>(N
)->getMemoryVT();
2495 if (MemTy
== MVT::i8
)
2496 Opcode
= ARM::CMP_SWAP_8
;
2497 else if (MemTy
== MVT::i16
)
2498 Opcode
= ARM::CMP_SWAP_16
;
2499 else if (MemTy
== MVT::i32
)
2500 Opcode
= ARM::CMP_SWAP_32
;
2502 llvm_unreachable("Unknown AtomicCmpSwap type");
2504 SDValue Ops
[] = {N
->getOperand(1), N
->getOperand(2), N
->getOperand(3),
2506 SDNode
*CmpSwap
= CurDAG
->getMachineNode(
2508 CurDAG
->getVTList(MVT::i32
, MVT::i32
, MVT::Other
), Ops
);
2510 MachineMemOperand
*MemOp
= cast
<MemSDNode
>(N
)->getMemOperand();
2511 CurDAG
->setNodeMemRefs(cast
<MachineSDNode
>(CmpSwap
), {MemOp
});
2513 ReplaceUses(SDValue(N
, 0), SDValue(CmpSwap
, 0));
2514 ReplaceUses(SDValue(N
, 1), SDValue(CmpSwap
, 2));
2515 CurDAG
->RemoveDeadNode(N
);
2518 static Optional
<std::pair
<unsigned, unsigned>>
2519 getContiguousRangeOfSetBits(const APInt
&A
) {
2520 unsigned FirstOne
= A
.getBitWidth() - A
.countLeadingZeros() - 1;
2521 unsigned LastOne
= A
.countTrailingZeros();
2522 if (A
.countPopulation() != (FirstOne
- LastOne
+ 1))
2523 return Optional
<std::pair
<unsigned,unsigned>>();
2524 return std::make_pair(FirstOne
, LastOne
);
2527 void ARMDAGToDAGISel::SelectCMPZ(SDNode
*N
, bool &SwitchEQNEToPLMI
) {
2528 assert(N
->getOpcode() == ARMISD::CMPZ
);
2529 SwitchEQNEToPLMI
= false;
2531 if (!Subtarget
->isThumb())
2532 // FIXME: Work out whether it is profitable to do this in A32 mode - LSL and
2533 // LSR don't exist as standalone instructions - they need the barrel shifter.
2536 // select (cmpz (and X, C), #0) -> (LSLS X) or (LSRS X) or (LSRS (LSLS X))
2537 SDValue And
= N
->getOperand(0);
2538 if (!And
->hasOneUse())
2541 SDValue Zero
= N
->getOperand(1);
2542 if (!isa
<ConstantSDNode
>(Zero
) || !cast
<ConstantSDNode
>(Zero
)->isNullValue() ||
2543 And
->getOpcode() != ISD::AND
)
2545 SDValue X
= And
.getOperand(0);
2546 auto C
= dyn_cast
<ConstantSDNode
>(And
.getOperand(1));
2550 auto Range
= getContiguousRangeOfSetBits(C
->getAPIntValue());
2554 // There are several ways to lower this:
2558 auto EmitShift
= [&](unsigned Opc
, SDValue Src
, unsigned Imm
) -> SDNode
* {
2559 if (Subtarget
->isThumb2()) {
2560 Opc
= (Opc
== ARM::tLSLri
) ? ARM::t2LSLri
: ARM::t2LSRri
;
2561 SDValue Ops
[] = { Src
, CurDAG
->getTargetConstant(Imm
, dl
, MVT::i32
),
2562 getAL(CurDAG
, dl
), CurDAG
->getRegister(0, MVT::i32
),
2563 CurDAG
->getRegister(0, MVT::i32
) };
2564 return CurDAG
->getMachineNode(Opc
, dl
, MVT::i32
, Ops
);
2566 SDValue Ops
[] = {CurDAG
->getRegister(ARM::CPSR
, MVT::i32
), Src
,
2567 CurDAG
->getTargetConstant(Imm
, dl
, MVT::i32
),
2568 getAL(CurDAG
, dl
), CurDAG
->getRegister(0, MVT::i32
)};
2569 return CurDAG
->getMachineNode(Opc
, dl
, MVT::i32
, Ops
);
2573 if (Range
->second
== 0) {
2574 // 1. Mask includes the LSB -> Simply shift the top N bits off
2575 NewN
= EmitShift(ARM::tLSLri
, X
, 31 - Range
->first
);
2576 ReplaceNode(And
.getNode(), NewN
);
2577 } else if (Range
->first
== 31) {
2578 // 2. Mask includes the MSB -> Simply shift the bottom N bits off
2579 NewN
= EmitShift(ARM::tLSRri
, X
, Range
->second
);
2580 ReplaceNode(And
.getNode(), NewN
);
2581 } else if (Range
->first
== Range
->second
) {
2582 // 3. Only one bit is set. We can shift this into the sign bit and use a
2583 // PL/MI comparison.
2584 NewN
= EmitShift(ARM::tLSLri
, X
, 31 - Range
->first
);
2585 ReplaceNode(And
.getNode(), NewN
);
2587 SwitchEQNEToPLMI
= true;
2588 } else if (!Subtarget
->hasV6T2Ops()) {
2589 // 4. Do a double shift to clear bottom and top bits, but only in
2590 // thumb-1 mode as in thumb-2 we can use UBFX.
2591 NewN
= EmitShift(ARM::tLSLri
, X
, 31 - Range
->first
);
2592 NewN
= EmitShift(ARM::tLSRri
, SDValue(NewN
, 0),
2593 Range
->second
+ (31 - Range
->first
));
2594 ReplaceNode(And
.getNode(), NewN
);
2599 void ARMDAGToDAGISel::Select(SDNode
*N
) {
2602 if (N
->isMachineOpcode()) {
2604 return; // Already selected.
2607 switch (N
->getOpcode()) {
2609 case ISD::WRITE_REGISTER
:
2610 if (tryWriteRegister(N
))
2613 case ISD::READ_REGISTER
:
2614 if (tryReadRegister(N
))
2617 case ISD::INLINEASM
:
2618 case ISD::INLINEASM_BR
:
2619 if (tryInlineAsm(N
))
2623 // Select special operations if XOR node forms integer ABS pattern
2626 // Other cases are autogenerated.
2628 case ISD::Constant
: {
2629 unsigned Val
= cast
<ConstantSDNode
>(N
)->getZExtValue();
2630 // If we can't materialize the constant we need to use a literal pool
2631 if (ConstantMaterializationCost(Val
) > 2) {
2632 SDValue CPIdx
= CurDAG
->getTargetConstantPool(
2633 ConstantInt::get(Type::getInt32Ty(*CurDAG
->getContext()), Val
),
2634 TLI
->getPointerTy(CurDAG
->getDataLayout()));
2637 if (Subtarget
->isThumb()) {
2641 CurDAG
->getRegister(0, MVT::i32
),
2642 CurDAG
->getEntryNode()
2644 ResNode
= CurDAG
->getMachineNode(ARM::tLDRpci
, dl
, MVT::i32
, MVT::Other
,
2649 CurDAG
->getTargetConstant(0, dl
, MVT::i32
),
2651 CurDAG
->getRegister(0, MVT::i32
),
2652 CurDAG
->getEntryNode()
2654 ResNode
= CurDAG
->getMachineNode(ARM::LDRcp
, dl
, MVT::i32
, MVT::Other
,
2657 // Annotate the Node with memory operand information so that MachineInstr
2658 // queries work properly. This e.g. gives the register allocation the
2659 // required information for rematerialization.
2660 MachineFunction
& MF
= CurDAG
->getMachineFunction();
2661 MachineMemOperand
*MemOp
=
2662 MF
.getMachineMemOperand(MachinePointerInfo::getConstantPool(MF
),
2663 MachineMemOperand::MOLoad
, 4, 4);
2665 CurDAG
->setNodeMemRefs(cast
<MachineSDNode
>(ResNode
), {MemOp
});
2667 ReplaceNode(N
, ResNode
);
2671 // Other cases are autogenerated.
2674 case ISD::FrameIndex
: {
2675 // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
2676 int FI
= cast
<FrameIndexSDNode
>(N
)->getIndex();
2677 SDValue TFI
= CurDAG
->getTargetFrameIndex(
2678 FI
, TLI
->getPointerTy(CurDAG
->getDataLayout()));
2679 if (Subtarget
->isThumb1Only()) {
2680 // Set the alignment of the frame object to 4, to avoid having to generate
2681 // more than one ADD
2682 MachineFrameInfo
&MFI
= MF
->getFrameInfo();
2683 if (MFI
.getObjectAlignment(FI
) < 4)
2684 MFI
.setObjectAlignment(FI
, 4);
2685 CurDAG
->SelectNodeTo(N
, ARM::tADDframe
, MVT::i32
, TFI
,
2686 CurDAG
->getTargetConstant(0, dl
, MVT::i32
));
2689 unsigned Opc
= ((Subtarget
->isThumb() && Subtarget
->hasThumb2()) ?
2690 ARM::t2ADDri
: ARM::ADDri
);
2691 SDValue Ops
[] = { TFI
, CurDAG
->getTargetConstant(0, dl
, MVT::i32
),
2692 getAL(CurDAG
, dl
), CurDAG
->getRegister(0, MVT::i32
),
2693 CurDAG
->getRegister(0, MVT::i32
) };
2694 CurDAG
->SelectNodeTo(N
, Opc
, MVT::i32
, Ops
);
2699 if (tryV6T2BitfieldExtractOp(N
, false))
2702 case ISD::SIGN_EXTEND_INREG
:
2704 if (tryV6T2BitfieldExtractOp(N
, true))
2708 if (Subtarget
->isThumb1Only())
2710 if (ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(N
->getOperand(1))) {
2711 unsigned RHSV
= C
->getZExtValue();
2713 if (isPowerOf2_32(RHSV
-1)) { // 2^n+1?
2714 unsigned ShImm
= Log2_32(RHSV
-1);
2717 SDValue V
= N
->getOperand(0);
2718 ShImm
= ARM_AM::getSORegOpc(ARM_AM::lsl
, ShImm
);
2719 SDValue ShImmOp
= CurDAG
->getTargetConstant(ShImm
, dl
, MVT::i32
);
2720 SDValue Reg0
= CurDAG
->getRegister(0, MVT::i32
);
2721 if (Subtarget
->isThumb()) {
2722 SDValue Ops
[] = { V
, V
, ShImmOp
, getAL(CurDAG
, dl
), Reg0
, Reg0
};
2723 CurDAG
->SelectNodeTo(N
, ARM::t2ADDrs
, MVT::i32
, Ops
);
2726 SDValue Ops
[] = { V
, V
, Reg0
, ShImmOp
, getAL(CurDAG
, dl
), Reg0
,
2728 CurDAG
->SelectNodeTo(N
, ARM::ADDrsi
, MVT::i32
, Ops
);
2732 if (isPowerOf2_32(RHSV
+1)) { // 2^n-1?
2733 unsigned ShImm
= Log2_32(RHSV
+1);
2736 SDValue V
= N
->getOperand(0);
2737 ShImm
= ARM_AM::getSORegOpc(ARM_AM::lsl
, ShImm
);
2738 SDValue ShImmOp
= CurDAG
->getTargetConstant(ShImm
, dl
, MVT::i32
);
2739 SDValue Reg0
= CurDAG
->getRegister(0, MVT::i32
);
2740 if (Subtarget
->isThumb()) {
2741 SDValue Ops
[] = { V
, V
, ShImmOp
, getAL(CurDAG
, dl
), Reg0
, Reg0
};
2742 CurDAG
->SelectNodeTo(N
, ARM::t2RSBrs
, MVT::i32
, Ops
);
2745 SDValue Ops
[] = { V
, V
, Reg0
, ShImmOp
, getAL(CurDAG
, dl
), Reg0
,
2747 CurDAG
->SelectNodeTo(N
, ARM::RSBrsi
, MVT::i32
, Ops
);
2754 // Check for unsigned bitfield extract
2755 if (tryV6T2BitfieldExtractOp(N
, false))
2758 // If an immediate is used in an AND node, it is possible that the immediate
2759 // can be more optimally materialized when negated. If this is the case we
2760 // can negate the immediate and use a BIC instead.
2761 auto *N1C
= dyn_cast
<ConstantSDNode
>(N
->getOperand(1));
2762 if (N1C
&& N1C
->hasOneUse() && Subtarget
->isThumb()) {
2763 uint32_t Imm
= (uint32_t) N1C
->getZExtValue();
2765 // In Thumb2 mode, an AND can take a 12-bit immediate. If this
2766 // immediate can be negated and fit in the immediate operand of
2767 // a t2BIC, don't do any manual transform here as this can be
2768 // handled by the generic ISel machinery.
2769 bool PreferImmediateEncoding
=
2770 Subtarget
->hasThumb2() && (is_t2_so_imm(Imm
) || is_t2_so_imm_not(Imm
));
2771 if (!PreferImmediateEncoding
&&
2772 ConstantMaterializationCost(Imm
) >
2773 ConstantMaterializationCost(~Imm
)) {
2774 // The current immediate costs more to materialize than a negated
2775 // immediate, so negate the immediate and use a BIC.
2777 CurDAG
->getConstant(~N1C
->getZExtValue(), dl
, MVT::i32
);
2778 // If the new constant didn't exist before, reposition it in the topological
2779 // ordering so it is just before N. Otherwise, don't touch its location.
2780 if (NewImm
->getNodeId() == -1)
2781 CurDAG
->RepositionNode(N
->getIterator(), NewImm
.getNode());
2783 if (!Subtarget
->hasThumb2()) {
2784 SDValue Ops
[] = {CurDAG
->getRegister(ARM::CPSR
, MVT::i32
),
2785 N
->getOperand(0), NewImm
, getAL(CurDAG
, dl
),
2786 CurDAG
->getRegister(0, MVT::i32
)};
2787 ReplaceNode(N
, CurDAG
->getMachineNode(ARM::tBIC
, dl
, MVT::i32
, Ops
));
2790 SDValue Ops
[] = {N
->getOperand(0), NewImm
, getAL(CurDAG
, dl
),
2791 CurDAG
->getRegister(0, MVT::i32
),
2792 CurDAG
->getRegister(0, MVT::i32
)};
2794 CurDAG
->getMachineNode(ARM::t2BICrr
, dl
, MVT::i32
, Ops
));
2800 // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits
2801 // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits
2802 // are entirely contributed by c2 and lower 16-bits are entirely contributed
2803 // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)).
2804 // Select it to: "movt x, ((c1 & 0xffff) >> 16)
2805 EVT VT
= N
->getValueType(0);
2808 unsigned Opc
= (Subtarget
->isThumb() && Subtarget
->hasThumb2())
2810 : (Subtarget
->hasV6T2Ops() ? ARM::MOVTi16
: 0);
2813 SDValue N0
= N
->getOperand(0), N1
= N
->getOperand(1);
2814 N1C
= dyn_cast
<ConstantSDNode
>(N1
);
2817 if (N0
.getOpcode() == ISD::OR
&& N0
.getNode()->hasOneUse()) {
2818 SDValue N2
= N0
.getOperand(1);
2819 ConstantSDNode
*N2C
= dyn_cast
<ConstantSDNode
>(N2
);
2822 unsigned N1CVal
= N1C
->getZExtValue();
2823 unsigned N2CVal
= N2C
->getZExtValue();
2824 if ((N1CVal
& 0xffff0000U
) == (N2CVal
& 0xffff0000U
) &&
2825 (N1CVal
& 0xffffU
) == 0xffffU
&&
2826 (N2CVal
& 0xffffU
) == 0x0U
) {
2827 SDValue Imm16
= CurDAG
->getTargetConstant((N2CVal
& 0xFFFF0000U
) >> 16,
2829 SDValue Ops
[] = { N0
.getOperand(0), Imm16
,
2830 getAL(CurDAG
, dl
), CurDAG
->getRegister(0, MVT::i32
) };
2831 ReplaceNode(N
, CurDAG
->getMachineNode(Opc
, dl
, VT
, Ops
));
2838 case ARMISD::UMAAL
: {
2839 unsigned Opc
= Subtarget
->isThumb() ? ARM::t2UMAAL
: ARM::UMAAL
;
2840 SDValue Ops
[] = { N
->getOperand(0), N
->getOperand(1),
2841 N
->getOperand(2), N
->getOperand(3),
2843 CurDAG
->getRegister(0, MVT::i32
) };
2844 ReplaceNode(N
, CurDAG
->getMachineNode(Opc
, dl
, MVT::i32
, MVT::i32
, Ops
));
2847 case ARMISD::UMLAL
:{
2848 if (Subtarget
->isThumb()) {
2849 SDValue Ops
[] = { N
->getOperand(0), N
->getOperand(1), N
->getOperand(2),
2850 N
->getOperand(3), getAL(CurDAG
, dl
),
2851 CurDAG
->getRegister(0, MVT::i32
)};
2853 N
, CurDAG
->getMachineNode(ARM::t2UMLAL
, dl
, MVT::i32
, MVT::i32
, Ops
));
2856 SDValue Ops
[] = { N
->getOperand(0), N
->getOperand(1), N
->getOperand(2),
2857 N
->getOperand(3), getAL(CurDAG
, dl
),
2858 CurDAG
->getRegister(0, MVT::i32
),
2859 CurDAG
->getRegister(0, MVT::i32
) };
2860 ReplaceNode(N
, CurDAG
->getMachineNode(
2861 Subtarget
->hasV6Ops() ? ARM::UMLAL
: ARM::UMLALv5
, dl
,
2862 MVT::i32
, MVT::i32
, Ops
));
2866 case ARMISD::SMLAL
:{
2867 if (Subtarget
->isThumb()) {
2868 SDValue Ops
[] = { N
->getOperand(0), N
->getOperand(1), N
->getOperand(2),
2869 N
->getOperand(3), getAL(CurDAG
, dl
),
2870 CurDAG
->getRegister(0, MVT::i32
)};
2872 N
, CurDAG
->getMachineNode(ARM::t2SMLAL
, dl
, MVT::i32
, MVT::i32
, Ops
));
2875 SDValue Ops
[] = { N
->getOperand(0), N
->getOperand(1), N
->getOperand(2),
2876 N
->getOperand(3), getAL(CurDAG
, dl
),
2877 CurDAG
->getRegister(0, MVT::i32
),
2878 CurDAG
->getRegister(0, MVT::i32
) };
2879 ReplaceNode(N
, CurDAG
->getMachineNode(
2880 Subtarget
->hasV6Ops() ? ARM::SMLAL
: ARM::SMLALv5
, dl
,
2881 MVT::i32
, MVT::i32
, Ops
));
2885 case ARMISD::SUBE
: {
2886 if (!Subtarget
->hasV6Ops() || !Subtarget
->hasDSP())
2888 // Look for a pattern to match SMMLS
2889 // (sube a, (smul_loHi a, b), (subc 0, (smul_LOhi(a, b))))
2890 if (N
->getOperand(1).getOpcode() != ISD::SMUL_LOHI
||
2891 N
->getOperand(2).getOpcode() != ARMISD::SUBC
||
2892 !SDValue(N
, 1).use_empty())
2895 if (Subtarget
->isThumb())
2896 assert(Subtarget
->hasThumb2() &&
2897 "This pattern should not be generated for Thumb");
2899 SDValue SmulLoHi
= N
->getOperand(1);
2900 SDValue Subc
= N
->getOperand(2);
2901 auto *Zero
= dyn_cast
<ConstantSDNode
>(Subc
.getOperand(0));
2903 if (!Zero
|| Zero
->getZExtValue() != 0 ||
2904 Subc
.getOperand(1) != SmulLoHi
.getValue(0) ||
2905 N
->getOperand(1) != SmulLoHi
.getValue(1) ||
2906 N
->getOperand(2) != Subc
.getValue(1))
2909 unsigned Opc
= Subtarget
->isThumb2() ? ARM::t2SMMLS
: ARM::SMMLS
;
2910 SDValue Ops
[] = { SmulLoHi
.getOperand(0), SmulLoHi
.getOperand(1),
2911 N
->getOperand(0), getAL(CurDAG
, dl
),
2912 CurDAG
->getRegister(0, MVT::i32
) };
2913 ReplaceNode(N
, CurDAG
->getMachineNode(Opc
, dl
, MVT::i32
, Ops
));
2917 if (Subtarget
->isThumb() && Subtarget
->hasThumb2()) {
2918 if (tryT2IndexedLoad(N
))
2920 } else if (Subtarget
->isThumb()) {
2921 if (tryT1IndexedLoad(N
))
2923 } else if (tryARMIndexedLoad(N
))
2925 // Other cases are autogenerated.
2928 case ARMISD::BRCOND
: {
2929 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
2930 // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)
2931 // Pattern complexity = 6 cost = 1 size = 0
2933 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
2934 // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc)
2935 // Pattern complexity = 6 cost = 1 size = 0
2937 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
2938 // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc)
2939 // Pattern complexity = 6 cost = 1 size = 0
2941 unsigned Opc
= Subtarget
->isThumb() ?
2942 ((Subtarget
->hasThumb2()) ? ARM::t2Bcc
: ARM::tBcc
) : ARM::Bcc
;
2943 SDValue Chain
= N
->getOperand(0);
2944 SDValue N1
= N
->getOperand(1);
2945 SDValue N2
= N
->getOperand(2);
2946 SDValue N3
= N
->getOperand(3);
2947 SDValue InFlag
= N
->getOperand(4);
2948 assert(N1
.getOpcode() == ISD::BasicBlock
);
2949 assert(N2
.getOpcode() == ISD::Constant
);
2950 assert(N3
.getOpcode() == ISD::Register
);
2952 unsigned CC
= (unsigned) cast
<ConstantSDNode
>(N2
)->getZExtValue();
2954 if (InFlag
.getOpcode() == ARMISD::CMPZ
) {
2955 bool SwitchEQNEToPLMI
;
2956 SelectCMPZ(InFlag
.getNode(), SwitchEQNEToPLMI
);
2957 InFlag
= N
->getOperand(4);
2959 if (SwitchEQNEToPLMI
) {
2960 switch ((ARMCC::CondCodes
)CC
) {
2961 default: llvm_unreachable("CMPZ must be either NE or EQ!");
2963 CC
= (unsigned)ARMCC::MI
;
2966 CC
= (unsigned)ARMCC::PL
;
2972 SDValue Tmp2
= CurDAG
->getTargetConstant(CC
, dl
, MVT::i32
);
2973 SDValue Ops
[] = { N1
, Tmp2
, N3
, Chain
, InFlag
};
2974 SDNode
*ResNode
= CurDAG
->getMachineNode(Opc
, dl
, MVT::Other
,
2976 Chain
= SDValue(ResNode
, 0);
2977 if (N
->getNumValues() == 2) {
2978 InFlag
= SDValue(ResNode
, 1);
2979 ReplaceUses(SDValue(N
, 1), InFlag
);
2981 ReplaceUses(SDValue(N
, 0),
2982 SDValue(Chain
.getNode(), Chain
.getResNo()));
2983 CurDAG
->RemoveDeadNode(N
);
2987 case ARMISD::CMPZ
: {
2988 // select (CMPZ X, #-C) -> (CMPZ (ADDS X, #C), #0)
2989 // This allows us to avoid materializing the expensive negative constant.
2990 // The CMPZ #0 is useless and will be peepholed away but we need to keep it
2991 // for its glue output.
2992 SDValue X
= N
->getOperand(0);
2993 auto *C
= dyn_cast
<ConstantSDNode
>(N
->getOperand(1).getNode());
2994 if (C
&& C
->getSExtValue() < 0 && Subtarget
->isThumb()) {
2995 int64_t Addend
= -C
->getSExtValue();
2997 SDNode
*Add
= nullptr;
2998 // ADDS can be better than CMN if the immediate fits in a
2999 // 16-bit ADDS, which means either [0,256) for tADDi8 or [0,8) for tADDi3.
3000 // Outside that range we can just use a CMN which is 32-bit but has a
3001 // 12-bit immediate range.
3002 if (Addend
< 1<<8) {
3003 if (Subtarget
->isThumb2()) {
3004 SDValue Ops
[] = { X
, CurDAG
->getTargetConstant(Addend
, dl
, MVT::i32
),
3005 getAL(CurDAG
, dl
), CurDAG
->getRegister(0, MVT::i32
),
3006 CurDAG
->getRegister(0, MVT::i32
) };
3007 Add
= CurDAG
->getMachineNode(ARM::t2ADDri
, dl
, MVT::i32
, Ops
);
3009 unsigned Opc
= (Addend
< 1<<3) ? ARM::tADDi3
: ARM::tADDi8
;
3010 SDValue Ops
[] = {CurDAG
->getRegister(ARM::CPSR
, MVT::i32
), X
,
3011 CurDAG
->getTargetConstant(Addend
, dl
, MVT::i32
),
3012 getAL(CurDAG
, dl
), CurDAG
->getRegister(0, MVT::i32
)};
3013 Add
= CurDAG
->getMachineNode(Opc
, dl
, MVT::i32
, Ops
);
3017 SDValue Ops2
[] = {SDValue(Add
, 0), CurDAG
->getConstant(0, dl
, MVT::i32
)};
3018 CurDAG
->MorphNodeTo(N
, ARMISD::CMPZ
, CurDAG
->getVTList(MVT::Glue
), Ops2
);
3021 // Other cases are autogenerated.
3025 case ARMISD::CMOV
: {
3026 SDValue InFlag
= N
->getOperand(4);
3028 if (InFlag
.getOpcode() == ARMISD::CMPZ
) {
3029 bool SwitchEQNEToPLMI
;
3030 SelectCMPZ(InFlag
.getNode(), SwitchEQNEToPLMI
);
3032 if (SwitchEQNEToPLMI
) {
3033 SDValue ARMcc
= N
->getOperand(2);
3034 ARMCC::CondCodes CC
=
3035 (ARMCC::CondCodes
)cast
<ConstantSDNode
>(ARMcc
)->getZExtValue();
3038 default: llvm_unreachable("CMPZ must be either NE or EQ!");
3046 SDValue NewARMcc
= CurDAG
->getConstant((unsigned)CC
, dl
, MVT::i32
);
3047 SDValue Ops
[] = {N
->getOperand(0), N
->getOperand(1), NewARMcc
,
3048 N
->getOperand(3), N
->getOperand(4)};
3049 CurDAG
->MorphNodeTo(N
, ARMISD::CMOV
, N
->getVTList(), Ops
);
3053 // Other cases are autogenerated.
3057 case ARMISD::VZIP
: {
3059 EVT VT
= N
->getValueType(0);
3060 switch (VT
.getSimpleVT().SimpleTy
) {
3062 case MVT::v8i8
: Opc
= ARM::VZIPd8
; break;
3064 case MVT::v4i16
: Opc
= ARM::VZIPd16
; break;
3066 // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
3067 case MVT::v2i32
: Opc
= ARM::VTRNd32
; break;
3068 case MVT::v16i8
: Opc
= ARM::VZIPq8
; break;
3070 case MVT::v8i16
: Opc
= ARM::VZIPq16
; break;
3072 case MVT::v4i32
: Opc
= ARM::VZIPq32
; break;
3074 SDValue Pred
= getAL(CurDAG
, dl
);
3075 SDValue PredReg
= CurDAG
->getRegister(0, MVT::i32
);
3076 SDValue Ops
[] = { N
->getOperand(0), N
->getOperand(1), Pred
, PredReg
};
3077 ReplaceNode(N
, CurDAG
->getMachineNode(Opc
, dl
, VT
, VT
, Ops
));
3080 case ARMISD::VUZP
: {
3082 EVT VT
= N
->getValueType(0);
3083 switch (VT
.getSimpleVT().SimpleTy
) {
3085 case MVT::v8i8
: Opc
= ARM::VUZPd8
; break;
3087 case MVT::v4i16
: Opc
= ARM::VUZPd16
; break;
3089 // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
3090 case MVT::v2i32
: Opc
= ARM::VTRNd32
; break;
3091 case MVT::v16i8
: Opc
= ARM::VUZPq8
; break;
3093 case MVT::v8i16
: Opc
= ARM::VUZPq16
; break;
3095 case MVT::v4i32
: Opc
= ARM::VUZPq32
; break;
3097 SDValue Pred
= getAL(CurDAG
, dl
);
3098 SDValue PredReg
= CurDAG
->getRegister(0, MVT::i32
);
3099 SDValue Ops
[] = { N
->getOperand(0), N
->getOperand(1), Pred
, PredReg
};
3100 ReplaceNode(N
, CurDAG
->getMachineNode(Opc
, dl
, VT
, VT
, Ops
));
3103 case ARMISD::VTRN
: {
3105 EVT VT
= N
->getValueType(0);
3106 switch (VT
.getSimpleVT().SimpleTy
) {
3108 case MVT::v8i8
: Opc
= ARM::VTRNd8
; break;
3110 case MVT::v4i16
: Opc
= ARM::VTRNd16
; break;
3112 case MVT::v2i32
: Opc
= ARM::VTRNd32
; break;
3113 case MVT::v16i8
: Opc
= ARM::VTRNq8
; break;
3115 case MVT::v8i16
: Opc
= ARM::VTRNq16
; break;
3117 case MVT::v4i32
: Opc
= ARM::VTRNq32
; break;
3119 SDValue Pred
= getAL(CurDAG
, dl
);
3120 SDValue PredReg
= CurDAG
->getRegister(0, MVT::i32
);
3121 SDValue Ops
[] = { N
->getOperand(0), N
->getOperand(1), Pred
, PredReg
};
3122 ReplaceNode(N
, CurDAG
->getMachineNode(Opc
, dl
, VT
, VT
, Ops
));
3125 case ARMISD::BUILD_VECTOR
: {
3126 EVT VecVT
= N
->getValueType(0);
3127 EVT EltVT
= VecVT
.getVectorElementType();
3128 unsigned NumElts
= VecVT
.getVectorNumElements();
3129 if (EltVT
== MVT::f64
) {
3130 assert(NumElts
== 2 && "unexpected type for BUILD_VECTOR");
3132 N
, createDRegPairNode(VecVT
, N
->getOperand(0), N
->getOperand(1)));
3135 assert(EltVT
== MVT::f32
&& "unexpected type for BUILD_VECTOR");
3138 N
, createSRegPairNode(VecVT
, N
->getOperand(0), N
->getOperand(1)));
3141 assert(NumElts
== 4 && "unexpected type for BUILD_VECTOR");
3143 createQuadSRegsNode(VecVT
, N
->getOperand(0), N
->getOperand(1),
3144 N
->getOperand(2), N
->getOperand(3)));
3148 case ARMISD::VLD1DUP
: {
3149 static const uint16_t DOpcodes
[] = { ARM::VLD1DUPd8
, ARM::VLD1DUPd16
,
3151 static const uint16_t QOpcodes
[] = { ARM::VLD1DUPq8
, ARM::VLD1DUPq16
,
3153 SelectVLDDup(N
, /* IsIntrinsic= */ false, false, 1, DOpcodes
, QOpcodes
);
3157 case ARMISD::VLD2DUP
: {
3158 static const uint16_t Opcodes
[] = { ARM::VLD2DUPd8
, ARM::VLD2DUPd16
,
3160 SelectVLDDup(N
, /* IsIntrinsic= */ false, false, 2, Opcodes
);
3164 case ARMISD::VLD3DUP
: {
3165 static const uint16_t Opcodes
[] = { ARM::VLD3DUPd8Pseudo
,
3166 ARM::VLD3DUPd16Pseudo
,
3167 ARM::VLD3DUPd32Pseudo
};
3168 SelectVLDDup(N
, /* IsIntrinsic= */ false, false, 3, Opcodes
);
3172 case ARMISD::VLD4DUP
: {
3173 static const uint16_t Opcodes
[] = { ARM::VLD4DUPd8Pseudo
,
3174 ARM::VLD4DUPd16Pseudo
,
3175 ARM::VLD4DUPd32Pseudo
};
3176 SelectVLDDup(N
, /* IsIntrinsic= */ false, false, 4, Opcodes
);
3180 case ARMISD::VLD1DUP_UPD
: {
3181 static const uint16_t DOpcodes
[] = { ARM::VLD1DUPd8wb_fixed
,
3182 ARM::VLD1DUPd16wb_fixed
,
3183 ARM::VLD1DUPd32wb_fixed
};
3184 static const uint16_t QOpcodes
[] = { ARM::VLD1DUPq8wb_fixed
,
3185 ARM::VLD1DUPq16wb_fixed
,
3186 ARM::VLD1DUPq32wb_fixed
};
3187 SelectVLDDup(N
, /* IsIntrinsic= */ false, true, 1, DOpcodes
, QOpcodes
);
3191 case ARMISD::VLD2DUP_UPD
: {
3192 static const uint16_t Opcodes
[] = { ARM::VLD2DUPd8wb_fixed
,
3193 ARM::VLD2DUPd16wb_fixed
,
3194 ARM::VLD2DUPd32wb_fixed
};
3195 SelectVLDDup(N
, /* IsIntrinsic= */ false, true, 2, Opcodes
);
3199 case ARMISD::VLD3DUP_UPD
: {
3200 static const uint16_t Opcodes
[] = { ARM::VLD3DUPd8Pseudo_UPD
,
3201 ARM::VLD3DUPd16Pseudo_UPD
,
3202 ARM::VLD3DUPd32Pseudo_UPD
};
3203 SelectVLDDup(N
, /* IsIntrinsic= */ false, true, 3, Opcodes
);
3207 case ARMISD::VLD4DUP_UPD
: {
3208 static const uint16_t Opcodes
[] = { ARM::VLD4DUPd8Pseudo_UPD
,
3209 ARM::VLD4DUPd16Pseudo_UPD
,
3210 ARM::VLD4DUPd32Pseudo_UPD
};
3211 SelectVLDDup(N
, /* IsIntrinsic= */ false, true, 4, Opcodes
);
3215 case ARMISD::VLD1_UPD
: {
3216 static const uint16_t DOpcodes
[] = { ARM::VLD1d8wb_fixed
,
3217 ARM::VLD1d16wb_fixed
,
3218 ARM::VLD1d32wb_fixed
,
3219 ARM::VLD1d64wb_fixed
};
3220 static const uint16_t QOpcodes
[] = { ARM::VLD1q8wb_fixed
,
3221 ARM::VLD1q16wb_fixed
,
3222 ARM::VLD1q32wb_fixed
,
3223 ARM::VLD1q64wb_fixed
};
3224 SelectVLD(N
, true, 1, DOpcodes
, QOpcodes
, nullptr);
3228 case ARMISD::VLD2_UPD
: {
3229 static const uint16_t DOpcodes
[] = { ARM::VLD2d8wb_fixed
,
3230 ARM::VLD2d16wb_fixed
,
3231 ARM::VLD2d32wb_fixed
,
3232 ARM::VLD1q64wb_fixed
};
3233 static const uint16_t QOpcodes
[] = { ARM::VLD2q8PseudoWB_fixed
,
3234 ARM::VLD2q16PseudoWB_fixed
,
3235 ARM::VLD2q32PseudoWB_fixed
};
3236 SelectVLD(N
, true, 2, DOpcodes
, QOpcodes
, nullptr);
3240 case ARMISD::VLD3_UPD
: {
3241 static const uint16_t DOpcodes
[] = { ARM::VLD3d8Pseudo_UPD
,
3242 ARM::VLD3d16Pseudo_UPD
,
3243 ARM::VLD3d32Pseudo_UPD
,
3244 ARM::VLD1d64TPseudoWB_fixed
};
3245 static const uint16_t QOpcodes0
[] = { ARM::VLD3q8Pseudo_UPD
,
3246 ARM::VLD3q16Pseudo_UPD
,
3247 ARM::VLD3q32Pseudo_UPD
};
3248 static const uint16_t QOpcodes1
[] = { ARM::VLD3q8oddPseudo_UPD
,
3249 ARM::VLD3q16oddPseudo_UPD
,
3250 ARM::VLD3q32oddPseudo_UPD
};
3251 SelectVLD(N
, true, 3, DOpcodes
, QOpcodes0
, QOpcodes1
);
3255 case ARMISD::VLD4_UPD
: {
3256 static const uint16_t DOpcodes
[] = { ARM::VLD4d8Pseudo_UPD
,
3257 ARM::VLD4d16Pseudo_UPD
,
3258 ARM::VLD4d32Pseudo_UPD
,
3259 ARM::VLD1d64QPseudoWB_fixed
};
3260 static const uint16_t QOpcodes0
[] = { ARM::VLD4q8Pseudo_UPD
,
3261 ARM::VLD4q16Pseudo_UPD
,
3262 ARM::VLD4q32Pseudo_UPD
};
3263 static const uint16_t QOpcodes1
[] = { ARM::VLD4q8oddPseudo_UPD
,
3264 ARM::VLD4q16oddPseudo_UPD
,
3265 ARM::VLD4q32oddPseudo_UPD
};
3266 SelectVLD(N
, true, 4, DOpcodes
, QOpcodes0
, QOpcodes1
);
3270 case ARMISD::VLD2LN_UPD
: {
3271 static const uint16_t DOpcodes
[] = { ARM::VLD2LNd8Pseudo_UPD
,
3272 ARM::VLD2LNd16Pseudo_UPD
,
3273 ARM::VLD2LNd32Pseudo_UPD
};
3274 static const uint16_t QOpcodes
[] = { ARM::VLD2LNq16Pseudo_UPD
,
3275 ARM::VLD2LNq32Pseudo_UPD
};
3276 SelectVLDSTLane(N
, true, true, 2, DOpcodes
, QOpcodes
);
3280 case ARMISD::VLD3LN_UPD
: {
3281 static const uint16_t DOpcodes
[] = { ARM::VLD3LNd8Pseudo_UPD
,
3282 ARM::VLD3LNd16Pseudo_UPD
,
3283 ARM::VLD3LNd32Pseudo_UPD
};
3284 static const uint16_t QOpcodes
[] = { ARM::VLD3LNq16Pseudo_UPD
,
3285 ARM::VLD3LNq32Pseudo_UPD
};
3286 SelectVLDSTLane(N
, true, true, 3, DOpcodes
, QOpcodes
);
3290 case ARMISD::VLD4LN_UPD
: {
3291 static const uint16_t DOpcodes
[] = { ARM::VLD4LNd8Pseudo_UPD
,
3292 ARM::VLD4LNd16Pseudo_UPD
,
3293 ARM::VLD4LNd32Pseudo_UPD
};
3294 static const uint16_t QOpcodes
[] = { ARM::VLD4LNq16Pseudo_UPD
,
3295 ARM::VLD4LNq32Pseudo_UPD
};
3296 SelectVLDSTLane(N
, true, true, 4, DOpcodes
, QOpcodes
);
3300 case ARMISD::VST1_UPD
: {
3301 static const uint16_t DOpcodes
[] = { ARM::VST1d8wb_fixed
,
3302 ARM::VST1d16wb_fixed
,
3303 ARM::VST1d32wb_fixed
,
3304 ARM::VST1d64wb_fixed
};
3305 static const uint16_t QOpcodes
[] = { ARM::VST1q8wb_fixed
,
3306 ARM::VST1q16wb_fixed
,
3307 ARM::VST1q32wb_fixed
,
3308 ARM::VST1q64wb_fixed
};
3309 SelectVST(N
, true, 1, DOpcodes
, QOpcodes
, nullptr);
3313 case ARMISD::VST2_UPD
: {
3314 static const uint16_t DOpcodes
[] = { ARM::VST2d8wb_fixed
,
3315 ARM::VST2d16wb_fixed
,
3316 ARM::VST2d32wb_fixed
,
3317 ARM::VST1q64wb_fixed
};
3318 static const uint16_t QOpcodes
[] = { ARM::VST2q8PseudoWB_fixed
,
3319 ARM::VST2q16PseudoWB_fixed
,
3320 ARM::VST2q32PseudoWB_fixed
};
3321 SelectVST(N
, true, 2, DOpcodes
, QOpcodes
, nullptr);
3325 case ARMISD::VST3_UPD
: {
3326 static const uint16_t DOpcodes
[] = { ARM::VST3d8Pseudo_UPD
,
3327 ARM::VST3d16Pseudo_UPD
,
3328 ARM::VST3d32Pseudo_UPD
,
3329 ARM::VST1d64TPseudoWB_fixed
};
3330 static const uint16_t QOpcodes0
[] = { ARM::VST3q8Pseudo_UPD
,
3331 ARM::VST3q16Pseudo_UPD
,
3332 ARM::VST3q32Pseudo_UPD
};
3333 static const uint16_t QOpcodes1
[] = { ARM::VST3q8oddPseudo_UPD
,
3334 ARM::VST3q16oddPseudo_UPD
,
3335 ARM::VST3q32oddPseudo_UPD
};
3336 SelectVST(N
, true, 3, DOpcodes
, QOpcodes0
, QOpcodes1
);
3340 case ARMISD::VST4_UPD
: {
3341 static const uint16_t DOpcodes
[] = { ARM::VST4d8Pseudo_UPD
,
3342 ARM::VST4d16Pseudo_UPD
,
3343 ARM::VST4d32Pseudo_UPD
,
3344 ARM::VST1d64QPseudoWB_fixed
};
3345 static const uint16_t QOpcodes0
[] = { ARM::VST4q8Pseudo_UPD
,
3346 ARM::VST4q16Pseudo_UPD
,
3347 ARM::VST4q32Pseudo_UPD
};
3348 static const uint16_t QOpcodes1
[] = { ARM::VST4q8oddPseudo_UPD
,
3349 ARM::VST4q16oddPseudo_UPD
,
3350 ARM::VST4q32oddPseudo_UPD
};
3351 SelectVST(N
, true, 4, DOpcodes
, QOpcodes0
, QOpcodes1
);
3355 case ARMISD::VST2LN_UPD
: {
3356 static const uint16_t DOpcodes
[] = { ARM::VST2LNd8Pseudo_UPD
,
3357 ARM::VST2LNd16Pseudo_UPD
,
3358 ARM::VST2LNd32Pseudo_UPD
};
3359 static const uint16_t QOpcodes
[] = { ARM::VST2LNq16Pseudo_UPD
,
3360 ARM::VST2LNq32Pseudo_UPD
};
3361 SelectVLDSTLane(N
, false, true, 2, DOpcodes
, QOpcodes
);
3365 case ARMISD::VST3LN_UPD
: {
3366 static const uint16_t DOpcodes
[] = { ARM::VST3LNd8Pseudo_UPD
,
3367 ARM::VST3LNd16Pseudo_UPD
,
3368 ARM::VST3LNd32Pseudo_UPD
};
3369 static const uint16_t QOpcodes
[] = { ARM::VST3LNq16Pseudo_UPD
,
3370 ARM::VST3LNq32Pseudo_UPD
};
3371 SelectVLDSTLane(N
, false, true, 3, DOpcodes
, QOpcodes
);
3375 case ARMISD::VST4LN_UPD
: {
3376 static const uint16_t DOpcodes
[] = { ARM::VST4LNd8Pseudo_UPD
,
3377 ARM::VST4LNd16Pseudo_UPD
,
3378 ARM::VST4LNd32Pseudo_UPD
};
3379 static const uint16_t QOpcodes
[] = { ARM::VST4LNq16Pseudo_UPD
,
3380 ARM::VST4LNq32Pseudo_UPD
};
3381 SelectVLDSTLane(N
, false, true, 4, DOpcodes
, QOpcodes
);
3385 case ISD::INTRINSIC_VOID
:
3386 case ISD::INTRINSIC_W_CHAIN
: {
3387 unsigned IntNo
= cast
<ConstantSDNode
>(N
->getOperand(1))->getZExtValue();
3392 case Intrinsic::arm_mrrc
:
3393 case Intrinsic::arm_mrrc2
: {
3395 SDValue Chain
= N
->getOperand(0);
3398 if (Subtarget
->isThumb())
3399 Opc
= (IntNo
== Intrinsic::arm_mrrc
? ARM::t2MRRC
: ARM::t2MRRC2
);
3401 Opc
= (IntNo
== Intrinsic::arm_mrrc
? ARM::MRRC
: ARM::MRRC2
);
3403 SmallVector
<SDValue
, 5> Ops
;
3404 Ops
.push_back(getI32Imm(cast
<ConstantSDNode
>(N
->getOperand(2))->getZExtValue(), dl
)); /* coproc */
3405 Ops
.push_back(getI32Imm(cast
<ConstantSDNode
>(N
->getOperand(3))->getZExtValue(), dl
)); /* opc */
3406 Ops
.push_back(getI32Imm(cast
<ConstantSDNode
>(N
->getOperand(4))->getZExtValue(), dl
)); /* CRm */
3408 // The mrrc2 instruction in ARM doesn't allow predicates, the top 4 bits of the encoded
3409 // instruction will always be '1111' but it is possible in assembly language to specify
3410 // AL as a predicate to mrrc2 but it doesn't make any difference to the encoded instruction.
3411 if (Opc
!= ARM::MRRC2
) {
3412 Ops
.push_back(getAL(CurDAG
, dl
));
3413 Ops
.push_back(CurDAG
->getRegister(0, MVT::i32
));
3416 Ops
.push_back(Chain
);
3418 // Writes to two registers.
3419 const EVT RetType
[] = {MVT::i32
, MVT::i32
, MVT::Other
};
3421 ReplaceNode(N
, CurDAG
->getMachineNode(Opc
, dl
, RetType
, Ops
));
3424 case Intrinsic::arm_ldaexd
:
3425 case Intrinsic::arm_ldrexd
: {
3427 SDValue Chain
= N
->getOperand(0);
3428 SDValue MemAddr
= N
->getOperand(2);
3429 bool isThumb
= Subtarget
->isThumb() && Subtarget
->hasV8MBaselineOps();
3431 bool IsAcquire
= IntNo
== Intrinsic::arm_ldaexd
;
3432 unsigned NewOpc
= isThumb
? (IsAcquire
? ARM::t2LDAEXD
: ARM::t2LDREXD
)
3433 : (IsAcquire
? ARM::LDAEXD
: ARM::LDREXD
);
3435 // arm_ldrexd returns a i64 value in {i32, i32}
3436 std::vector
<EVT
> ResTys
;
3438 ResTys
.push_back(MVT::i32
);
3439 ResTys
.push_back(MVT::i32
);
3441 ResTys
.push_back(MVT::Untyped
);
3442 ResTys
.push_back(MVT::Other
);
3444 // Place arguments in the right order.
3445 SDValue Ops
[] = {MemAddr
, getAL(CurDAG
, dl
),
3446 CurDAG
->getRegister(0, MVT::i32
), Chain
};
3447 SDNode
*Ld
= CurDAG
->getMachineNode(NewOpc
, dl
, ResTys
, Ops
);
3448 // Transfer memoperands.
3449 MachineMemOperand
*MemOp
= cast
<MemIntrinsicSDNode
>(N
)->getMemOperand();
3450 CurDAG
->setNodeMemRefs(cast
<MachineSDNode
>(Ld
), {MemOp
});
3453 SDValue OutChain
= isThumb
? SDValue(Ld
, 2) : SDValue(Ld
, 1);
3454 if (!SDValue(N
, 0).use_empty()) {
3457 Result
= SDValue(Ld
, 0);
3460 CurDAG
->getTargetConstant(ARM::gsub_0
, dl
, MVT::i32
);
3461 SDNode
*ResNode
= CurDAG
->getMachineNode(TargetOpcode::EXTRACT_SUBREG
,
3462 dl
, MVT::i32
, SDValue(Ld
, 0), SubRegIdx
);
3463 Result
= SDValue(ResNode
,0);
3465 ReplaceUses(SDValue(N
, 0), Result
);
3467 if (!SDValue(N
, 1).use_empty()) {
3470 Result
= SDValue(Ld
, 1);
3473 CurDAG
->getTargetConstant(ARM::gsub_1
, dl
, MVT::i32
);
3474 SDNode
*ResNode
= CurDAG
->getMachineNode(TargetOpcode::EXTRACT_SUBREG
,
3475 dl
, MVT::i32
, SDValue(Ld
, 0), SubRegIdx
);
3476 Result
= SDValue(ResNode
,0);
3478 ReplaceUses(SDValue(N
, 1), Result
);
3480 ReplaceUses(SDValue(N
, 2), OutChain
);
3481 CurDAG
->RemoveDeadNode(N
);
3484 case Intrinsic::arm_stlexd
:
3485 case Intrinsic::arm_strexd
: {
3487 SDValue Chain
= N
->getOperand(0);
3488 SDValue Val0
= N
->getOperand(2);
3489 SDValue Val1
= N
->getOperand(3);
3490 SDValue MemAddr
= N
->getOperand(4);
3492 // Store exclusive double return a i32 value which is the return status
3493 // of the issued store.
3494 const EVT ResTys
[] = {MVT::i32
, MVT::Other
};
3496 bool isThumb
= Subtarget
->isThumb() && Subtarget
->hasThumb2();
3497 // Place arguments in the right order.
3498 SmallVector
<SDValue
, 7> Ops
;
3500 Ops
.push_back(Val0
);
3501 Ops
.push_back(Val1
);
3503 // arm_strexd uses GPRPair.
3504 Ops
.push_back(SDValue(createGPRPairNode(MVT::Untyped
, Val0
, Val1
), 0));
3505 Ops
.push_back(MemAddr
);
3506 Ops
.push_back(getAL(CurDAG
, dl
));
3507 Ops
.push_back(CurDAG
->getRegister(0, MVT::i32
));
3508 Ops
.push_back(Chain
);
3510 bool IsRelease
= IntNo
== Intrinsic::arm_stlexd
;
3511 unsigned NewOpc
= isThumb
? (IsRelease
? ARM::t2STLEXD
: ARM::t2STREXD
)
3512 : (IsRelease
? ARM::STLEXD
: ARM::STREXD
);
3514 SDNode
*St
= CurDAG
->getMachineNode(NewOpc
, dl
, ResTys
, Ops
);
3515 // Transfer memoperands.
3516 MachineMemOperand
*MemOp
= cast
<MemIntrinsicSDNode
>(N
)->getMemOperand();
3517 CurDAG
->setNodeMemRefs(cast
<MachineSDNode
>(St
), {MemOp
});
3523 case Intrinsic::arm_neon_vld1
: {
3524 static const uint16_t DOpcodes
[] = { ARM::VLD1d8
, ARM::VLD1d16
,
3525 ARM::VLD1d32
, ARM::VLD1d64
};
3526 static const uint16_t QOpcodes
[] = { ARM::VLD1q8
, ARM::VLD1q16
,
3527 ARM::VLD1q32
, ARM::VLD1q64
};
3528 SelectVLD(N
, false, 1, DOpcodes
, QOpcodes
, nullptr);
3532 case Intrinsic::arm_neon_vld1x2
: {
3533 static const uint16_t DOpcodes
[] = { ARM::VLD1q8
, ARM::VLD1q16
,
3534 ARM::VLD1q32
, ARM::VLD1q64
};
3535 static const uint16_t QOpcodes
[] = { ARM::VLD1d8QPseudo
,
3536 ARM::VLD1d16QPseudo
,
3537 ARM::VLD1d32QPseudo
,
3538 ARM::VLD1d64QPseudo
};
3539 SelectVLD(N
, false, 2, DOpcodes
, QOpcodes
, nullptr);
3543 case Intrinsic::arm_neon_vld1x3
: {
3544 static const uint16_t DOpcodes
[] = { ARM::VLD1d8TPseudo
,
3545 ARM::VLD1d16TPseudo
,
3546 ARM::VLD1d32TPseudo
,
3547 ARM::VLD1d64TPseudo
};
3548 static const uint16_t QOpcodes0
[] = { ARM::VLD1q8LowTPseudo_UPD
,
3549 ARM::VLD1q16LowTPseudo_UPD
,
3550 ARM::VLD1q32LowTPseudo_UPD
,
3551 ARM::VLD1q64LowTPseudo_UPD
};
3552 static const uint16_t QOpcodes1
[] = { ARM::VLD1q8HighTPseudo
,
3553 ARM::VLD1q16HighTPseudo
,
3554 ARM::VLD1q32HighTPseudo
,
3555 ARM::VLD1q64HighTPseudo
};
3556 SelectVLD(N
, false, 3, DOpcodes
, QOpcodes0
, QOpcodes1
);
3560 case Intrinsic::arm_neon_vld1x4
: {
3561 static const uint16_t DOpcodes
[] = { ARM::VLD1d8QPseudo
,
3562 ARM::VLD1d16QPseudo
,
3563 ARM::VLD1d32QPseudo
,
3564 ARM::VLD1d64QPseudo
};
3565 static const uint16_t QOpcodes0
[] = { ARM::VLD1q8LowQPseudo_UPD
,
3566 ARM::VLD1q16LowQPseudo_UPD
,
3567 ARM::VLD1q32LowQPseudo_UPD
,
3568 ARM::VLD1q64LowQPseudo_UPD
};
3569 static const uint16_t QOpcodes1
[] = { ARM::VLD1q8HighQPseudo
,
3570 ARM::VLD1q16HighQPseudo
,
3571 ARM::VLD1q32HighQPseudo
,
3572 ARM::VLD1q64HighQPseudo
};
3573 SelectVLD(N
, false, 4, DOpcodes
, QOpcodes0
, QOpcodes1
);
3577 case Intrinsic::arm_neon_vld2
: {
3578 static const uint16_t DOpcodes
[] = { ARM::VLD2d8
, ARM::VLD2d16
,
3579 ARM::VLD2d32
, ARM::VLD1q64
};
3580 static const uint16_t QOpcodes
[] = { ARM::VLD2q8Pseudo
, ARM::VLD2q16Pseudo
,
3581 ARM::VLD2q32Pseudo
};
3582 SelectVLD(N
, false, 2, DOpcodes
, QOpcodes
, nullptr);
3586 case Intrinsic::arm_neon_vld3
: {
3587 static const uint16_t DOpcodes
[] = { ARM::VLD3d8Pseudo
,
3590 ARM::VLD1d64TPseudo
};
3591 static const uint16_t QOpcodes0
[] = { ARM::VLD3q8Pseudo_UPD
,
3592 ARM::VLD3q16Pseudo_UPD
,
3593 ARM::VLD3q32Pseudo_UPD
};
3594 static const uint16_t QOpcodes1
[] = { ARM::VLD3q8oddPseudo
,
3595 ARM::VLD3q16oddPseudo
,
3596 ARM::VLD3q32oddPseudo
};
3597 SelectVLD(N
, false, 3, DOpcodes
, QOpcodes0
, QOpcodes1
);
3601 case Intrinsic::arm_neon_vld4
: {
3602 static const uint16_t DOpcodes
[] = { ARM::VLD4d8Pseudo
,
3605 ARM::VLD1d64QPseudo
};
3606 static const uint16_t QOpcodes0
[] = { ARM::VLD4q8Pseudo_UPD
,
3607 ARM::VLD4q16Pseudo_UPD
,
3608 ARM::VLD4q32Pseudo_UPD
};
3609 static const uint16_t QOpcodes1
[] = { ARM::VLD4q8oddPseudo
,
3610 ARM::VLD4q16oddPseudo
,
3611 ARM::VLD4q32oddPseudo
};
3612 SelectVLD(N
, false, 4, DOpcodes
, QOpcodes0
, QOpcodes1
);
3616 case Intrinsic::arm_neon_vld2dup
: {
3617 static const uint16_t DOpcodes
[] = { ARM::VLD2DUPd8
, ARM::VLD2DUPd16
,
3618 ARM::VLD2DUPd32
, ARM::VLD1q64
};
3619 static const uint16_t QOpcodes0
[] = { ARM::VLD2DUPq8EvenPseudo
,
3620 ARM::VLD2DUPq16EvenPseudo
,
3621 ARM::VLD2DUPq32EvenPseudo
};
3622 static const uint16_t QOpcodes1
[] = { ARM::VLD2DUPq8OddPseudo
,
3623 ARM::VLD2DUPq16OddPseudo
,
3624 ARM::VLD2DUPq32OddPseudo
};
3625 SelectVLDDup(N
, /* IsIntrinsic= */ true, false, 2,
3626 DOpcodes
, QOpcodes0
, QOpcodes1
);
3630 case Intrinsic::arm_neon_vld3dup
: {
3631 static const uint16_t DOpcodes
[] = { ARM::VLD3DUPd8Pseudo
,
3632 ARM::VLD3DUPd16Pseudo
,
3633 ARM::VLD3DUPd32Pseudo
,
3634 ARM::VLD1d64TPseudo
};
3635 static const uint16_t QOpcodes0
[] = { ARM::VLD3DUPq8EvenPseudo
,
3636 ARM::VLD3DUPq16EvenPseudo
,
3637 ARM::VLD3DUPq32EvenPseudo
};
3638 static const uint16_t QOpcodes1
[] = { ARM::VLD3DUPq8OddPseudo
,
3639 ARM::VLD3DUPq16OddPseudo
,
3640 ARM::VLD3DUPq32OddPseudo
};
3641 SelectVLDDup(N
, /* IsIntrinsic= */ true, false, 3,
3642 DOpcodes
, QOpcodes0
, QOpcodes1
);
3646 case Intrinsic::arm_neon_vld4dup
: {
3647 static const uint16_t DOpcodes
[] = { ARM::VLD4DUPd8Pseudo
,
3648 ARM::VLD4DUPd16Pseudo
,
3649 ARM::VLD4DUPd32Pseudo
,
3650 ARM::VLD1d64QPseudo
};
3651 static const uint16_t QOpcodes0
[] = { ARM::VLD4DUPq8EvenPseudo
,
3652 ARM::VLD4DUPq16EvenPseudo
,
3653 ARM::VLD4DUPq32EvenPseudo
};
3654 static const uint16_t QOpcodes1
[] = { ARM::VLD4DUPq8OddPseudo
,
3655 ARM::VLD4DUPq16OddPseudo
,
3656 ARM::VLD4DUPq32OddPseudo
};
3657 SelectVLDDup(N
, /* IsIntrinsic= */ true, false, 4,
3658 DOpcodes
, QOpcodes0
, QOpcodes1
);
3662 case Intrinsic::arm_neon_vld2lane
: {
3663 static const uint16_t DOpcodes
[] = { ARM::VLD2LNd8Pseudo
,
3664 ARM::VLD2LNd16Pseudo
,
3665 ARM::VLD2LNd32Pseudo
};
3666 static const uint16_t QOpcodes
[] = { ARM::VLD2LNq16Pseudo
,
3667 ARM::VLD2LNq32Pseudo
};
3668 SelectVLDSTLane(N
, true, false, 2, DOpcodes
, QOpcodes
);
3672 case Intrinsic::arm_neon_vld3lane
: {
3673 static const uint16_t DOpcodes
[] = { ARM::VLD3LNd8Pseudo
,
3674 ARM::VLD3LNd16Pseudo
,
3675 ARM::VLD3LNd32Pseudo
};
3676 static const uint16_t QOpcodes
[] = { ARM::VLD3LNq16Pseudo
,
3677 ARM::VLD3LNq32Pseudo
};
3678 SelectVLDSTLane(N
, true, false, 3, DOpcodes
, QOpcodes
);
3682 case Intrinsic::arm_neon_vld4lane
: {
3683 static const uint16_t DOpcodes
[] = { ARM::VLD4LNd8Pseudo
,
3684 ARM::VLD4LNd16Pseudo
,
3685 ARM::VLD4LNd32Pseudo
};
3686 static const uint16_t QOpcodes
[] = { ARM::VLD4LNq16Pseudo
,
3687 ARM::VLD4LNq32Pseudo
};
3688 SelectVLDSTLane(N
, true, false, 4, DOpcodes
, QOpcodes
);
3692 case Intrinsic::arm_neon_vst1
: {
3693 static const uint16_t DOpcodes
[] = { ARM::VST1d8
, ARM::VST1d16
,
3694 ARM::VST1d32
, ARM::VST1d64
};
3695 static const uint16_t QOpcodes
[] = { ARM::VST1q8
, ARM::VST1q16
,
3696 ARM::VST1q32
, ARM::VST1q64
};
3697 SelectVST(N
, false, 1, DOpcodes
, QOpcodes
, nullptr);
3701 case Intrinsic::arm_neon_vst1x2
: {
3702 static const uint16_t DOpcodes
[] = { ARM::VST1q8
, ARM::VST1q16
,
3703 ARM::VST1q32
, ARM::VST1q64
};
3704 static const uint16_t QOpcodes
[] = { ARM::VST1d8QPseudo
,
3705 ARM::VST1d16QPseudo
,
3706 ARM::VST1d32QPseudo
,
3707 ARM::VST1d64QPseudo
};
3708 SelectVST(N
, false, 2, DOpcodes
, QOpcodes
, nullptr);
3712 case Intrinsic::arm_neon_vst1x3
: {
3713 static const uint16_t DOpcodes
[] = { ARM::VST1d8TPseudo
,
3714 ARM::VST1d16TPseudo
,
3715 ARM::VST1d32TPseudo
,
3716 ARM::VST1d64TPseudo
};
3717 static const uint16_t QOpcodes0
[] = { ARM::VST1q8LowTPseudo_UPD
,
3718 ARM::VST1q16LowTPseudo_UPD
,
3719 ARM::VST1q32LowTPseudo_UPD
,
3720 ARM::VST1q64LowTPseudo_UPD
};
3721 static const uint16_t QOpcodes1
[] = { ARM::VST1q8HighTPseudo
,
3722 ARM::VST1q16HighTPseudo
,
3723 ARM::VST1q32HighTPseudo
,
3724 ARM::VST1q64HighTPseudo
};
3725 SelectVST(N
, false, 3, DOpcodes
, QOpcodes0
, QOpcodes1
);
3729 case Intrinsic::arm_neon_vst1x4
: {
3730 static const uint16_t DOpcodes
[] = { ARM::VST1d8QPseudo
,
3731 ARM::VST1d16QPseudo
,
3732 ARM::VST1d32QPseudo
,
3733 ARM::VST1d64QPseudo
};
3734 static const uint16_t QOpcodes0
[] = { ARM::VST1q8LowQPseudo_UPD
,
3735 ARM::VST1q16LowQPseudo_UPD
,
3736 ARM::VST1q32LowQPseudo_UPD
,
3737 ARM::VST1q64LowQPseudo_UPD
};
3738 static const uint16_t QOpcodes1
[] = { ARM::VST1q8HighQPseudo
,
3739 ARM::VST1q16HighQPseudo
,
3740 ARM::VST1q32HighQPseudo
,
3741 ARM::VST1q64HighQPseudo
};
3742 SelectVST(N
, false, 4, DOpcodes
, QOpcodes0
, QOpcodes1
);
3746 case Intrinsic::arm_neon_vst2
: {
3747 static const uint16_t DOpcodes
[] = { ARM::VST2d8
, ARM::VST2d16
,
3748 ARM::VST2d32
, ARM::VST1q64
};
3749 static const uint16_t QOpcodes
[] = { ARM::VST2q8Pseudo
, ARM::VST2q16Pseudo
,
3750 ARM::VST2q32Pseudo
};
3751 SelectVST(N
, false, 2, DOpcodes
, QOpcodes
, nullptr);
3755 case Intrinsic::arm_neon_vst3
: {
3756 static const uint16_t DOpcodes
[] = { ARM::VST3d8Pseudo
,
3759 ARM::VST1d64TPseudo
};
3760 static const uint16_t QOpcodes0
[] = { ARM::VST3q8Pseudo_UPD
,
3761 ARM::VST3q16Pseudo_UPD
,
3762 ARM::VST3q32Pseudo_UPD
};
3763 static const uint16_t QOpcodes1
[] = { ARM::VST3q8oddPseudo
,
3764 ARM::VST3q16oddPseudo
,
3765 ARM::VST3q32oddPseudo
};
3766 SelectVST(N
, false, 3, DOpcodes
, QOpcodes0
, QOpcodes1
);
3770 case Intrinsic::arm_neon_vst4
: {
3771 static const uint16_t DOpcodes
[] = { ARM::VST4d8Pseudo
,
3774 ARM::VST1d64QPseudo
};
3775 static const uint16_t QOpcodes0
[] = { ARM::VST4q8Pseudo_UPD
,
3776 ARM::VST4q16Pseudo_UPD
,
3777 ARM::VST4q32Pseudo_UPD
};
3778 static const uint16_t QOpcodes1
[] = { ARM::VST4q8oddPseudo
,
3779 ARM::VST4q16oddPseudo
,
3780 ARM::VST4q32oddPseudo
};
3781 SelectVST(N
, false, 4, DOpcodes
, QOpcodes0
, QOpcodes1
);
3785 case Intrinsic::arm_neon_vst2lane
: {
3786 static const uint16_t DOpcodes
[] = { ARM::VST2LNd8Pseudo
,
3787 ARM::VST2LNd16Pseudo
,
3788 ARM::VST2LNd32Pseudo
};
3789 static const uint16_t QOpcodes
[] = { ARM::VST2LNq16Pseudo
,
3790 ARM::VST2LNq32Pseudo
};
3791 SelectVLDSTLane(N
, false, false, 2, DOpcodes
, QOpcodes
);
3795 case Intrinsic::arm_neon_vst3lane
: {
3796 static const uint16_t DOpcodes
[] = { ARM::VST3LNd8Pseudo
,
3797 ARM::VST3LNd16Pseudo
,
3798 ARM::VST3LNd32Pseudo
};
3799 static const uint16_t QOpcodes
[] = { ARM::VST3LNq16Pseudo
,
3800 ARM::VST3LNq32Pseudo
};
3801 SelectVLDSTLane(N
, false, false, 3, DOpcodes
, QOpcodes
);
3805 case Intrinsic::arm_neon_vst4lane
: {
3806 static const uint16_t DOpcodes
[] = { ARM::VST4LNd8Pseudo
,
3807 ARM::VST4LNd16Pseudo
,
3808 ARM::VST4LNd32Pseudo
};
3809 static const uint16_t QOpcodes
[] = { ARM::VST4LNq16Pseudo
,
3810 ARM::VST4LNq32Pseudo
};
3811 SelectVLDSTLane(N
, false, false, 4, DOpcodes
, QOpcodes
);
3818 case ISD::ATOMIC_CMP_SWAP
:
3826 // Inspect a register string of the form
3827 // cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or
3828 // cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string
3829 // and obtain the integer operands from them, adding these operands to the
3831 static void getIntOperandsFromRegisterString(StringRef RegString
,
3832 SelectionDAG
*CurDAG
,
3834 std::vector
<SDValue
> &Ops
) {
3835 SmallVector
<StringRef
, 5> Fields
;
3836 RegString
.split(Fields
, ':');
3838 if (Fields
.size() > 1) {
3839 bool AllIntFields
= true;
3841 for (StringRef Field
: Fields
) {
3842 // Need to trim out leading 'cp' characters and get the integer field.
3844 AllIntFields
&= !Field
.trim("CPcp").getAsInteger(10, IntField
);
3845 Ops
.push_back(CurDAG
->getTargetConstant(IntField
, DL
, MVT::i32
));
3848 assert(AllIntFields
&&
3849 "Unexpected non-integer value in special register string.");
3853 // Maps a Banked Register string to its mask value. The mask value returned is
3854 // for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register
3855 // mask operand, which expresses which register is to be used, e.g. r8, and in
3856 // which mode it is to be used, e.g. usr. Returns -1 to signify that the string
3858 static inline int getBankedRegisterMask(StringRef RegString
) {
3859 auto TheReg
= ARMBankedReg::lookupBankedRegByName(RegString
.lower());
3862 return TheReg
->Encoding
;
3865 // The flags here are common to those allowed for apsr in the A class cores and
3866 // those allowed for the special registers in the M class cores. Returns a
3867 // value representing which flags were present, -1 if invalid.
3868 static inline int getMClassFlagsMask(StringRef Flags
) {
3869 return StringSwitch
<int>(Flags
)
3870 .Case("", 0x2) // no flags means nzcvq for psr registers, and 0x2 is
3871 // correct when flags are not permitted
3874 .Case("nzcvqg", 0x3)
3878 // Maps MClass special registers string to its value for use in the
3879 // t2MRS_M/t2MSR_M instruction nodes as the SYSm value operand.
3880 // Returns -1 to signify that the string was invalid.
3881 static int getMClassRegisterMask(StringRef Reg
, const ARMSubtarget
*Subtarget
) {
3882 auto TheReg
= ARMSysReg::lookupMClassSysRegByName(Reg
);
3883 const FeatureBitset
&FeatureBits
= Subtarget
->getFeatureBits();
3884 if (!TheReg
|| !TheReg
->hasRequiredFeatures(FeatureBits
))
3886 return (int)(TheReg
->Encoding
& 0xFFF); // SYSm value
3889 static int getARClassRegisterMask(StringRef Reg
, StringRef Flags
) {
3890 // The mask operand contains the special register (R Bit) in bit 4, whether
3891 // the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and
3892 // bits 3-0 contains the fields to be accessed in the special register, set by
3893 // the flags provided with the register.
3895 if (Reg
== "apsr") {
3896 // The flags permitted for apsr are the same flags that are allowed in
3897 // M class registers. We get the flag value and then shift the flags into
3898 // the correct place to combine with the mask.
3899 Mask
= getMClassFlagsMask(Flags
);
3905 if (Reg
!= "cpsr" && Reg
!= "spsr") {
3909 // This is the same as if the flags were "fc"
3910 if (Flags
.empty() || Flags
== "all")
3913 // Inspect the supplied flags string and set the bits in the mask for
3914 // the relevant and valid flags allowed for cpsr and spsr.
3915 for (char Flag
: Flags
) {
3934 // This avoids allowing strings where the same flag bit appears twice.
3935 if (!FlagVal
|| (Mask
& FlagVal
))
3940 // If the register is spsr then we need to set the R bit.
3947 // Lower the read_register intrinsic to ARM specific DAG nodes
3948 // using the supplied metadata string to select the instruction node to use
3949 // and the registers/masks to construct as operands for the node.
3950 bool ARMDAGToDAGISel::tryReadRegister(SDNode
*N
){
3951 const MDNodeSDNode
*MD
= dyn_cast
<MDNodeSDNode
>(N
->getOperand(1));
3952 const MDString
*RegString
= dyn_cast
<MDString
>(MD
->getMD()->getOperand(0));
3953 bool IsThumb2
= Subtarget
->isThumb2();
3956 std::vector
<SDValue
> Ops
;
3957 getIntOperandsFromRegisterString(RegString
->getString(), CurDAG
, DL
, Ops
);
3960 // If the special register string was constructed of fields (as defined
3961 // in the ACLE) then need to lower to MRC node (32 bit) or
3962 // MRRC node(64 bit), we can make the distinction based on the number of
3963 // operands we have.
3965 SmallVector
<EVT
, 3> ResTypes
;
3966 if (Ops
.size() == 5){
3967 Opcode
= IsThumb2
? ARM::t2MRC
: ARM::MRC
;
3968 ResTypes
.append({ MVT::i32
, MVT::Other
});
3970 assert(Ops
.size() == 3 &&
3971 "Invalid number of fields in special register string.");
3972 Opcode
= IsThumb2
? ARM::t2MRRC
: ARM::MRRC
;
3973 ResTypes
.append({ MVT::i32
, MVT::i32
, MVT::Other
});
3976 Ops
.push_back(getAL(CurDAG
, DL
));
3977 Ops
.push_back(CurDAG
->getRegister(0, MVT::i32
));
3978 Ops
.push_back(N
->getOperand(0));
3979 ReplaceNode(N
, CurDAG
->getMachineNode(Opcode
, DL
, ResTypes
, Ops
));
3983 std::string SpecialReg
= RegString
->getString().lower();
3985 int BankedReg
= getBankedRegisterMask(SpecialReg
);
3986 if (BankedReg
!= -1) {
3987 Ops
= { CurDAG
->getTargetConstant(BankedReg
, DL
, MVT::i32
),
3988 getAL(CurDAG
, DL
), CurDAG
->getRegister(0, MVT::i32
),
3991 N
, CurDAG
->getMachineNode(IsThumb2
? ARM::t2MRSbanked
: ARM::MRSbanked
,
3992 DL
, MVT::i32
, MVT::Other
, Ops
));
3996 // The VFP registers are read by creating SelectionDAG nodes with opcodes
3997 // corresponding to the register that is being read from. So we switch on the
3998 // string to find which opcode we need to use.
3999 unsigned Opcode
= StringSwitch
<unsigned>(SpecialReg
)
4000 .Case("fpscr", ARM::VMRS
)
4001 .Case("fpexc", ARM::VMRS_FPEXC
)
4002 .Case("fpsid", ARM::VMRS_FPSID
)
4003 .Case("mvfr0", ARM::VMRS_MVFR0
)
4004 .Case("mvfr1", ARM::VMRS_MVFR1
)
4005 .Case("mvfr2", ARM::VMRS_MVFR2
)
4006 .Case("fpinst", ARM::VMRS_FPINST
)
4007 .Case("fpinst2", ARM::VMRS_FPINST2
)
4010 // If an opcode was found then we can lower the read to a VFP instruction.
4012 if (!Subtarget
->hasVFP2())
4014 if (Opcode
== ARM::VMRS_MVFR2
&& !Subtarget
->hasFPARMv8())
4017 Ops
= { getAL(CurDAG
, DL
), CurDAG
->getRegister(0, MVT::i32
),
4020 CurDAG
->getMachineNode(Opcode
, DL
, MVT::i32
, MVT::Other
, Ops
));
4024 // If the target is M Class then need to validate that the register string
4025 // is an acceptable value, so check that a mask can be constructed from the
4027 if (Subtarget
->isMClass()) {
4028 int SYSmValue
= getMClassRegisterMask(SpecialReg
, Subtarget
);
4029 if (SYSmValue
== -1)
4032 SDValue Ops
[] = { CurDAG
->getTargetConstant(SYSmValue
, DL
, MVT::i32
),
4033 getAL(CurDAG
, DL
), CurDAG
->getRegister(0, MVT::i32
),
4036 N
, CurDAG
->getMachineNode(ARM::t2MRS_M
, DL
, MVT::i32
, MVT::Other
, Ops
));
4040 // Here we know the target is not M Class so we need to check if it is one
4041 // of the remaining possible values which are apsr, cpsr or spsr.
4042 if (SpecialReg
== "apsr" || SpecialReg
== "cpsr") {
4043 Ops
= { getAL(CurDAG
, DL
), CurDAG
->getRegister(0, MVT::i32
),
4045 ReplaceNode(N
, CurDAG
->getMachineNode(IsThumb2
? ARM::t2MRS_AR
: ARM::MRS
,
4046 DL
, MVT::i32
, MVT::Other
, Ops
));
4050 if (SpecialReg
== "spsr") {
4051 Ops
= { getAL(CurDAG
, DL
), CurDAG
->getRegister(0, MVT::i32
),
4054 N
, CurDAG
->getMachineNode(IsThumb2
? ARM::t2MRSsys_AR
: ARM::MRSsys
, DL
,
4055 MVT::i32
, MVT::Other
, Ops
));
4062 // Lower the write_register intrinsic to ARM specific DAG nodes
4063 // using the supplied metadata string to select the instruction node to use
4064 // and the registers/masks to use in the nodes
4065 bool ARMDAGToDAGISel::tryWriteRegister(SDNode
*N
){
4066 const MDNodeSDNode
*MD
= dyn_cast
<MDNodeSDNode
>(N
->getOperand(1));
4067 const MDString
*RegString
= dyn_cast
<MDString
>(MD
->getMD()->getOperand(0));
4068 bool IsThumb2
= Subtarget
->isThumb2();
4071 std::vector
<SDValue
> Ops
;
4072 getIntOperandsFromRegisterString(RegString
->getString(), CurDAG
, DL
, Ops
);
4075 // If the special register string was constructed of fields (as defined
4076 // in the ACLE) then need to lower to MCR node (32 bit) or
4077 // MCRR node(64 bit), we can make the distinction based on the number of
4078 // operands we have.
4080 if (Ops
.size() == 5) {
4081 Opcode
= IsThumb2
? ARM::t2MCR
: ARM::MCR
;
4082 Ops
.insert(Ops
.begin()+2, N
->getOperand(2));
4084 assert(Ops
.size() == 3 &&
4085 "Invalid number of fields in special register string.");
4086 Opcode
= IsThumb2
? ARM::t2MCRR
: ARM::MCRR
;
4087 SDValue WriteValue
[] = { N
->getOperand(2), N
->getOperand(3) };
4088 Ops
.insert(Ops
.begin()+2, WriteValue
, WriteValue
+2);
4091 Ops
.push_back(getAL(CurDAG
, DL
));
4092 Ops
.push_back(CurDAG
->getRegister(0, MVT::i32
));
4093 Ops
.push_back(N
->getOperand(0));
4095 ReplaceNode(N
, CurDAG
->getMachineNode(Opcode
, DL
, MVT::Other
, Ops
));
4099 std::string SpecialReg
= RegString
->getString().lower();
4100 int BankedReg
= getBankedRegisterMask(SpecialReg
);
4101 if (BankedReg
!= -1) {
4102 Ops
= { CurDAG
->getTargetConstant(BankedReg
, DL
, MVT::i32
), N
->getOperand(2),
4103 getAL(CurDAG
, DL
), CurDAG
->getRegister(0, MVT::i32
),
4106 N
, CurDAG
->getMachineNode(IsThumb2
? ARM::t2MSRbanked
: ARM::MSRbanked
,
4107 DL
, MVT::Other
, Ops
));
4111 // The VFP registers are written to by creating SelectionDAG nodes with
4112 // opcodes corresponding to the register that is being written. So we switch
4113 // on the string to find which opcode we need to use.
4114 unsigned Opcode
= StringSwitch
<unsigned>(SpecialReg
)
4115 .Case("fpscr", ARM::VMSR
)
4116 .Case("fpexc", ARM::VMSR_FPEXC
)
4117 .Case("fpsid", ARM::VMSR_FPSID
)
4118 .Case("fpinst", ARM::VMSR_FPINST
)
4119 .Case("fpinst2", ARM::VMSR_FPINST2
)
4123 if (!Subtarget
->hasVFP2())
4125 Ops
= { N
->getOperand(2), getAL(CurDAG
, DL
),
4126 CurDAG
->getRegister(0, MVT::i32
), N
->getOperand(0) };
4127 ReplaceNode(N
, CurDAG
->getMachineNode(Opcode
, DL
, MVT::Other
, Ops
));
4131 std::pair
<StringRef
, StringRef
> Fields
;
4132 Fields
= StringRef(SpecialReg
).rsplit('_');
4133 std::string Reg
= Fields
.first
.str();
4134 StringRef Flags
= Fields
.second
;
4136 // If the target was M Class then need to validate the special register value
4137 // and retrieve the mask for use in the instruction node.
4138 if (Subtarget
->isMClass()) {
4139 int SYSmValue
= getMClassRegisterMask(SpecialReg
, Subtarget
);
4140 if (SYSmValue
== -1)
4143 SDValue Ops
[] = { CurDAG
->getTargetConstant(SYSmValue
, DL
, MVT::i32
),
4144 N
->getOperand(2), getAL(CurDAG
, DL
),
4145 CurDAG
->getRegister(0, MVT::i32
), N
->getOperand(0) };
4146 ReplaceNode(N
, CurDAG
->getMachineNode(ARM::t2MSR_M
, DL
, MVT::Other
, Ops
));
4150 // We then check to see if a valid mask can be constructed for one of the
4151 // register string values permitted for the A and R class cores. These values
4152 // are apsr, spsr and cpsr; these are also valid on older cores.
4153 int Mask
= getARClassRegisterMask(Reg
, Flags
);
4155 Ops
= { CurDAG
->getTargetConstant(Mask
, DL
, MVT::i32
), N
->getOperand(2),
4156 getAL(CurDAG
, DL
), CurDAG
->getRegister(0, MVT::i32
),
4158 ReplaceNode(N
, CurDAG
->getMachineNode(IsThumb2
? ARM::t2MSR_AR
: ARM::MSR
,
4159 DL
, MVT::Other
, Ops
));
4166 bool ARMDAGToDAGISel::tryInlineAsm(SDNode
*N
){
4167 std::vector
<SDValue
> AsmNodeOperands
;
4168 unsigned Flag
, Kind
;
4169 bool Changed
= false;
4170 unsigned NumOps
= N
->getNumOperands();
4172 // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint.
4173 // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require
4174 // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs
4175 // respectively. Since there is no constraint to explicitly specify a
4176 // reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb,
4177 // the 64-bit data may be referred by H, Q, R modifiers, so we still pack
4178 // them into a GPRPair.
4181 SDValue Glue
= N
->getGluedNode() ? N
->getOperand(NumOps
-1)
4182 : SDValue(nullptr,0);
4184 SmallVector
<bool, 8> OpChanged
;
4185 // Glue node will be appended late.
4186 for(unsigned i
= 0, e
= N
->getGluedNode() ? NumOps
- 1 : NumOps
; i
< e
; ++i
) {
4187 SDValue op
= N
->getOperand(i
);
4188 AsmNodeOperands
.push_back(op
);
4190 if (i
< InlineAsm::Op_FirstOperand
)
4193 if (ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(N
->getOperand(i
))) {
4194 Flag
= C
->getZExtValue();
4195 Kind
= InlineAsm::getKind(Flag
);
4200 // Immediate operands to inline asm in the SelectionDAG are modeled with
4201 // two operands. The first is a constant of value InlineAsm::Kind_Imm, and
4202 // the second is a constant with the value of the immediate. If we get here
4203 // and we have a Kind_Imm, skip the next operand, and continue.
4204 if (Kind
== InlineAsm::Kind_Imm
) {
4205 SDValue op
= N
->getOperand(++i
);
4206 AsmNodeOperands
.push_back(op
);
4210 unsigned NumRegs
= InlineAsm::getNumOperandRegisters(Flag
);
4212 OpChanged
.push_back(false);
4214 unsigned DefIdx
= 0;
4215 bool IsTiedToChangedOp
= false;
4216 // If it's a use that is tied with a previous def, it has no
4217 // reg class constraint.
4218 if (Changed
&& InlineAsm::isUseOperandTiedToDef(Flag
, DefIdx
))
4219 IsTiedToChangedOp
= OpChanged
[DefIdx
];
4221 // Memory operands to inline asm in the SelectionDAG are modeled with two
4222 // operands: a constant of value InlineAsm::Kind_Mem followed by the input
4223 // operand. If we get here and we have a Kind_Mem, skip the next operand (so
4224 // it doesn't get misinterpreted), and continue. We do this here because
4225 // it's important to update the OpChanged array correctly before moving on.
4226 if (Kind
== InlineAsm::Kind_Mem
) {
4227 SDValue op
= N
->getOperand(++i
);
4228 AsmNodeOperands
.push_back(op
);
4232 if (Kind
!= InlineAsm::Kind_RegUse
&& Kind
!= InlineAsm::Kind_RegDef
4233 && Kind
!= InlineAsm::Kind_RegDefEarlyClobber
)
4237 bool HasRC
= InlineAsm::hasRegClassConstraint(Flag
, RC
);
4238 if ((!IsTiedToChangedOp
&& (!HasRC
|| RC
!= ARM::GPRRegClassID
))
4242 assert((i
+2 < NumOps
) && "Invalid number of operands in inline asm");
4243 SDValue V0
= N
->getOperand(i
+1);
4244 SDValue V1
= N
->getOperand(i
+2);
4245 unsigned Reg0
= cast
<RegisterSDNode
>(V0
)->getReg();
4246 unsigned Reg1
= cast
<RegisterSDNode
>(V1
)->getReg();
4248 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
4250 if (Kind
== InlineAsm::Kind_RegDef
||
4251 Kind
== InlineAsm::Kind_RegDefEarlyClobber
) {
4252 // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to
4253 // the original GPRs.
4255 unsigned GPVR
= MRI
.createVirtualRegister(&ARM::GPRPairRegClass
);
4256 PairedReg
= CurDAG
->getRegister(GPVR
, MVT::Untyped
);
4257 SDValue Chain
= SDValue(N
,0);
4259 SDNode
*GU
= N
->getGluedUser();
4260 SDValue RegCopy
= CurDAG
->getCopyFromReg(Chain
, dl
, GPVR
, MVT::Untyped
,
4263 // Extract values from a GPRPair reg and copy to the original GPR reg.
4264 SDValue Sub0
= CurDAG
->getTargetExtractSubreg(ARM::gsub_0
, dl
, MVT::i32
,
4266 SDValue Sub1
= CurDAG
->getTargetExtractSubreg(ARM::gsub_1
, dl
, MVT::i32
,
4268 SDValue T0
= CurDAG
->getCopyToReg(Sub0
, dl
, Reg0
, Sub0
,
4269 RegCopy
.getValue(1));
4270 SDValue T1
= CurDAG
->getCopyToReg(Sub1
, dl
, Reg1
, Sub1
, T0
.getValue(1));
4272 // Update the original glue user.
4273 std::vector
<SDValue
> Ops(GU
->op_begin(), GU
->op_end()-1);
4274 Ops
.push_back(T1
.getValue(1));
4275 CurDAG
->UpdateNodeOperands(GU
, Ops
);
4278 // For Kind == InlineAsm::Kind_RegUse, we first copy two GPRs into a
4279 // GPRPair and then pass the GPRPair to the inline asm.
4280 SDValue Chain
= AsmNodeOperands
[InlineAsm::Op_InputChain
];
4282 // As REG_SEQ doesn't take RegisterSDNode, we copy them first.
4283 SDValue T0
= CurDAG
->getCopyFromReg(Chain
, dl
, Reg0
, MVT::i32
,
4285 SDValue T1
= CurDAG
->getCopyFromReg(Chain
, dl
, Reg1
, MVT::i32
,
4287 SDValue Pair
= SDValue(createGPRPairNode(MVT::Untyped
, T0
, T1
), 0);
4289 // Copy REG_SEQ into a GPRPair-typed VR and replace the original two
4290 // i32 VRs of inline asm with it.
4291 unsigned GPVR
= MRI
.createVirtualRegister(&ARM::GPRPairRegClass
);
4292 PairedReg
= CurDAG
->getRegister(GPVR
, MVT::Untyped
);
4293 Chain
= CurDAG
->getCopyToReg(T1
, dl
, GPVR
, Pair
, T1
.getValue(1));
4295 AsmNodeOperands
[InlineAsm::Op_InputChain
] = Chain
;
4296 Glue
= Chain
.getValue(1);
4301 if(PairedReg
.getNode()) {
4302 OpChanged
[OpChanged
.size() -1 ] = true;
4303 Flag
= InlineAsm::getFlagWord(Kind
, 1 /* RegNum*/);
4304 if (IsTiedToChangedOp
)
4305 Flag
= InlineAsm::getFlagWordForMatchingOp(Flag
, DefIdx
);
4307 Flag
= InlineAsm::getFlagWordForRegClass(Flag
, ARM::GPRPairRegClassID
);
4308 // Replace the current flag.
4309 AsmNodeOperands
[AsmNodeOperands
.size() -1] = CurDAG
->getTargetConstant(
4310 Flag
, dl
, MVT::i32
);
4311 // Add the new register node and skip the original two GPRs.
4312 AsmNodeOperands
.push_back(PairedReg
);
4313 // Skip the next two GPRs.
4319 AsmNodeOperands
.push_back(Glue
);
4323 SDValue New
= CurDAG
->getNode(N
->getOpcode(), SDLoc(N
),
4324 CurDAG
->getVTList(MVT::Other
, MVT::Glue
), AsmNodeOperands
);
4326 ReplaceNode(N
, New
.getNode());
4331 bool ARMDAGToDAGISel::
4332 SelectInlineAsmMemoryOperand(const SDValue
&Op
, unsigned ConstraintID
,
4333 std::vector
<SDValue
> &OutOps
) {
4334 switch(ConstraintID
) {
4336 llvm_unreachable("Unexpected asm memory constraint");
4337 case InlineAsm::Constraint_i
:
4338 // FIXME: It seems strange that 'i' is needed here since it's supposed to
4339 // be an immediate and not a memory constraint.
4341 case InlineAsm::Constraint_m
:
4342 case InlineAsm::Constraint_o
:
4343 case InlineAsm::Constraint_Q
:
4344 case InlineAsm::Constraint_Um
:
4345 case InlineAsm::Constraint_Un
:
4346 case InlineAsm::Constraint_Uq
:
4347 case InlineAsm::Constraint_Us
:
4348 case InlineAsm::Constraint_Ut
:
4349 case InlineAsm::Constraint_Uv
:
4350 case InlineAsm::Constraint_Uy
:
4351 // Require the address to be in a register. That is safe for all ARM
4352 // variants and it is hard to do anything much smarter without knowing
4353 // how the operand is used.
4354 OutOps
.push_back(Op
);
4360 /// createARMISelDag - This pass converts a legalized DAG into a
4361 /// ARM-specific DAG, ready for instruction scheduling.
4363 FunctionPass
*llvm::createARMISelDag(ARMBaseTargetMachine
&TM
,
4364 CodeGenOpt::Level OptLevel
) {
4365 return new ARMDAGToDAGISel(TM
, OptLevel
);