1 //=- LoongArchISelLowering.cpp - LoongArch DAG Lowering Implementation ---===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file defines the interfaces that LoongArch uses to lower LLVM code into
12 //===----------------------------------------------------------------------===//
14 #include "LoongArchISelLowering.h"
15 #include "LoongArch.h"
16 #include "LoongArchMachineFunctionInfo.h"
17 #include "LoongArchRegisterInfo.h"
18 #include "LoongArchSubtarget.h"
19 #include "MCTargetDesc/LoongArchBaseInfo.h"
20 #include "MCTargetDesc/LoongArchMCTargetDesc.h"
21 #include "llvm/ADT/Statistic.h"
22 #include "llvm/ADT/StringExtras.h"
23 #include "llvm/CodeGen/ISDOpcodes.h"
24 #include "llvm/CodeGen/RuntimeLibcallUtil.h"
25 #include "llvm/CodeGen/SelectionDAGNodes.h"
26 #include "llvm/IR/IRBuilder.h"
27 #include "llvm/IR/IntrinsicInst.h"
28 #include "llvm/IR/IntrinsicsLoongArch.h"
29 #include "llvm/Support/CodeGen.h"
30 #include "llvm/Support/Debug.h"
31 #include "llvm/Support/ErrorHandling.h"
32 #include "llvm/Support/KnownBits.h"
33 #include "llvm/Support/MathExtras.h"
37 #define DEBUG_TYPE "loongarch-isel-lowering"
39 STATISTIC(NumTailCalls
, "Number of tail calls");
41 static cl::opt
<bool> ZeroDivCheck("loongarch-check-zero-division", cl::Hidden
,
42 cl::desc("Trap on integer division by zero."),
45 LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine
&TM
,
46 const LoongArchSubtarget
&STI
)
47 : TargetLowering(TM
), Subtarget(STI
) {
49 MVT GRLenVT
= Subtarget
.getGRLenVT();
51 // Set up the register classes.
53 addRegisterClass(GRLenVT
, &LoongArch::GPRRegClass
);
54 if (Subtarget
.hasBasicF())
55 addRegisterClass(MVT::f32
, &LoongArch::FPR32RegClass
);
56 if (Subtarget
.hasBasicD())
57 addRegisterClass(MVT::f64
, &LoongArch::FPR64RegClass
);
59 static const MVT::SimpleValueType LSXVTs
[] = {
60 MVT::v16i8
, MVT::v8i16
, MVT::v4i32
, MVT::v2i64
, MVT::v4f32
, MVT::v2f64
};
61 static const MVT::SimpleValueType LASXVTs
[] = {
62 MVT::v32i8
, MVT::v16i16
, MVT::v8i32
, MVT::v4i64
, MVT::v8f32
, MVT::v4f64
};
64 if (Subtarget
.hasExtLSX())
66 addRegisterClass(VT
, &LoongArch::LSX128RegClass
);
68 if (Subtarget
.hasExtLASX())
69 for (MVT VT
: LASXVTs
)
70 addRegisterClass(VT
, &LoongArch::LASX256RegClass
);
72 // Set operations for LA32 and LA64.
74 setLoadExtAction({ISD::EXTLOAD
, ISD::SEXTLOAD
, ISD::ZEXTLOAD
}, GRLenVT
,
77 setOperationAction(ISD::SHL_PARTS
, GRLenVT
, Custom
);
78 setOperationAction(ISD::SRA_PARTS
, GRLenVT
, Custom
);
79 setOperationAction(ISD::SRL_PARTS
, GRLenVT
, Custom
);
80 setOperationAction(ISD::FP_TO_SINT
, GRLenVT
, Custom
);
81 setOperationAction(ISD::ROTL
, GRLenVT
, Expand
);
82 setOperationAction(ISD::CTPOP
, GRLenVT
, Expand
);
84 setOperationAction({ISD::GlobalAddress
, ISD::BlockAddress
, ISD::ConstantPool
,
85 ISD::JumpTable
, ISD::GlobalTLSAddress
},
88 setOperationAction(ISD::EH_DWARF_CFA
, GRLenVT
, Custom
);
90 setOperationAction(ISD::DYNAMIC_STACKALLOC
, GRLenVT
, Expand
);
91 setOperationAction({ISD::STACKSAVE
, ISD::STACKRESTORE
}, MVT::Other
, Expand
);
92 setOperationAction(ISD::VASTART
, MVT::Other
, Custom
);
93 setOperationAction({ISD::VAARG
, ISD::VACOPY
, ISD::VAEND
}, MVT::Other
, Expand
);
95 setOperationAction(ISD::DEBUGTRAP
, MVT::Other
, Legal
);
96 setOperationAction(ISD::TRAP
, MVT::Other
, Legal
);
98 setOperationAction(ISD::INTRINSIC_VOID
, MVT::Other
, Custom
);
99 setOperationAction(ISD::INTRINSIC_W_CHAIN
, MVT::Other
, Custom
);
100 setOperationAction(ISD::INTRINSIC_WO_CHAIN
, MVT::Other
, Custom
);
102 // Expand bitreverse.i16 with native-width bitrev and shift for now, before
103 // we get to know which of sll and revb.2h is faster.
104 setOperationAction(ISD::BITREVERSE
, MVT::i8
, Custom
);
105 setOperationAction(ISD::BITREVERSE
, GRLenVT
, Legal
);
107 // LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and
108 // the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16
109 // and i32 could still be byte-swapped relatively cheaply.
110 setOperationAction(ISD::BSWAP
, MVT::i16
, Custom
);
112 setOperationAction(ISD::BR_JT
, MVT::Other
, Expand
);
113 setOperationAction(ISD::BR_CC
, GRLenVT
, Expand
);
114 setOperationAction(ISD::SELECT_CC
, GRLenVT
, Expand
);
115 setOperationAction(ISD::SIGN_EXTEND_INREG
, MVT::i1
, Expand
);
116 setOperationAction({ISD::SMUL_LOHI
, ISD::UMUL_LOHI
}, GRLenVT
, Expand
);
118 setOperationAction(ISD::FP_TO_UINT
, GRLenVT
, Custom
);
119 setOperationAction(ISD::UINT_TO_FP
, GRLenVT
, Expand
);
121 // Set operations for LA64 only.
123 if (Subtarget
.is64Bit()) {
124 setOperationAction(ISD::ADD
, MVT::i32
, Custom
);
125 setOperationAction(ISD::SUB
, MVT::i32
, Custom
);
126 setOperationAction(ISD::SHL
, MVT::i32
, Custom
);
127 setOperationAction(ISD::SRA
, MVT::i32
, Custom
);
128 setOperationAction(ISD::SRL
, MVT::i32
, Custom
);
129 setOperationAction(ISD::FP_TO_SINT
, MVT::i32
, Custom
);
130 setOperationAction(ISD::BITCAST
, MVT::i32
, Custom
);
131 setOperationAction(ISD::ROTR
, MVT::i32
, Custom
);
132 setOperationAction(ISD::ROTL
, MVT::i32
, Custom
);
133 setOperationAction(ISD::CTTZ
, MVT::i32
, Custom
);
134 setOperationAction(ISD::CTLZ
, MVT::i32
, Custom
);
135 setOperationAction(ISD::EH_DWARF_CFA
, MVT::i32
, Custom
);
136 setOperationAction(ISD::READ_REGISTER
, MVT::i32
, Custom
);
137 setOperationAction(ISD::WRITE_REGISTER
, MVT::i32
, Custom
);
138 setOperationAction(ISD::INTRINSIC_VOID
, MVT::i32
, Custom
);
139 setOperationAction(ISD::INTRINSIC_WO_CHAIN
, MVT::i32
, Custom
);
140 setOperationAction(ISD::INTRINSIC_W_CHAIN
, MVT::i32
, Custom
);
142 setOperationAction(ISD::BITREVERSE
, MVT::i32
, Custom
);
143 setOperationAction(ISD::BSWAP
, MVT::i32
, Custom
);
144 setOperationAction({ISD::SDIV
, ISD::UDIV
, ISD::SREM
, ISD::UREM
}, MVT::i32
,
146 setOperationAction(ISD::LROUND
, MVT::i32
, Custom
);
149 // Set operations for LA32 only.
151 if (!Subtarget
.is64Bit()) {
152 setOperationAction(ISD::READ_REGISTER
, MVT::i64
, Custom
);
153 setOperationAction(ISD::WRITE_REGISTER
, MVT::i64
, Custom
);
154 setOperationAction(ISD::INTRINSIC_VOID
, MVT::i64
, Custom
);
155 setOperationAction(ISD::INTRINSIC_WO_CHAIN
, MVT::i64
, Custom
);
156 setOperationAction(ISD::INTRINSIC_W_CHAIN
, MVT::i64
, Custom
);
159 setOperationAction(ISD::ATOMIC_FENCE
, MVT::Other
, Custom
);
161 static const ISD::CondCode FPCCToExpand
[] = {
162 ISD::SETOGT
, ISD::SETOGE
, ISD::SETUGT
, ISD::SETUGE
,
163 ISD::SETGE
, ISD::SETNE
, ISD::SETGT
};
165 // Set operations for 'F' feature.
167 if (Subtarget
.hasBasicF()) {
168 setLoadExtAction(ISD::EXTLOAD
, MVT::f32
, MVT::f16
, Expand
);
169 setTruncStoreAction(MVT::f32
, MVT::f16
, Expand
);
170 setCondCodeAction(FPCCToExpand
, MVT::f32
, Expand
);
172 setOperationAction(ISD::SELECT_CC
, MVT::f32
, Expand
);
173 setOperationAction(ISD::BR_CC
, MVT::f32
, Expand
);
174 setOperationAction(ISD::FMA
, MVT::f32
, Legal
);
175 setOperationAction(ISD::FMINNUM_IEEE
, MVT::f32
, Legal
);
176 setOperationAction(ISD::FMAXNUM_IEEE
, MVT::f32
, Legal
);
177 setOperationAction(ISD::STRICT_FSETCCS
, MVT::f32
, Legal
);
178 setOperationAction(ISD::STRICT_FSETCC
, MVT::f32
, Legal
);
179 setOperationAction(ISD::IS_FPCLASS
, MVT::f32
, Legal
);
180 setOperationAction(ISD::FSIN
, MVT::f32
, Expand
);
181 setOperationAction(ISD::FCOS
, MVT::f32
, Expand
);
182 setOperationAction(ISD::FSINCOS
, MVT::f32
, Expand
);
183 setOperationAction(ISD::FPOW
, MVT::f32
, Expand
);
184 setOperationAction(ISD::FREM
, MVT::f32
, Expand
);
185 setOperationAction(ISD::FP16_TO_FP
, MVT::f32
, Expand
);
186 setOperationAction(ISD::FP_TO_FP16
, MVT::f32
, Expand
);
188 if (Subtarget
.is64Bit())
189 setOperationAction(ISD::FRINT
, MVT::f32
, Legal
);
191 if (!Subtarget
.hasBasicD()) {
192 setOperationAction(ISD::FP_TO_UINT
, MVT::i32
, Custom
);
193 if (Subtarget
.is64Bit()) {
194 setOperationAction(ISD::SINT_TO_FP
, MVT::i64
, Custom
);
195 setOperationAction(ISD::UINT_TO_FP
, MVT::i64
, Custom
);
200 // Set operations for 'D' feature.
202 if (Subtarget
.hasBasicD()) {
203 setLoadExtAction(ISD::EXTLOAD
, MVT::f64
, MVT::f16
, Expand
);
204 setLoadExtAction(ISD::EXTLOAD
, MVT::f64
, MVT::f32
, Expand
);
205 setTruncStoreAction(MVT::f64
, MVT::f16
, Expand
);
206 setTruncStoreAction(MVT::f64
, MVT::f32
, Expand
);
207 setCondCodeAction(FPCCToExpand
, MVT::f64
, Expand
);
209 setOperationAction(ISD::SELECT_CC
, MVT::f64
, Expand
);
210 setOperationAction(ISD::BR_CC
, MVT::f64
, Expand
);
211 setOperationAction(ISD::STRICT_FSETCCS
, MVT::f64
, Legal
);
212 setOperationAction(ISD::STRICT_FSETCC
, MVT::f64
, Legal
);
213 setOperationAction(ISD::FMA
, MVT::f64
, Legal
);
214 setOperationAction(ISD::FMINNUM_IEEE
, MVT::f64
, Legal
);
215 setOperationAction(ISD::FMAXNUM_IEEE
, MVT::f64
, Legal
);
216 setOperationAction(ISD::IS_FPCLASS
, MVT::f64
, Legal
);
217 setOperationAction(ISD::FSIN
, MVT::f64
, Expand
);
218 setOperationAction(ISD::FCOS
, MVT::f64
, Expand
);
219 setOperationAction(ISD::FSINCOS
, MVT::f64
, Expand
);
220 setOperationAction(ISD::FPOW
, MVT::f64
, Expand
);
221 setOperationAction(ISD::FREM
, MVT::f64
, Expand
);
222 setOperationAction(ISD::FP16_TO_FP
, MVT::f64
, Expand
);
223 setOperationAction(ISD::FP_TO_FP16
, MVT::f64
, Expand
);
225 if (Subtarget
.is64Bit())
226 setOperationAction(ISD::FRINT
, MVT::f64
, Legal
);
229 // Set operations for 'LSX' feature.
231 if (Subtarget
.hasExtLSX()) {
232 for (MVT VT
: MVT::fixedlen_vector_valuetypes()) {
233 // Expand all truncating stores and extending loads.
234 for (MVT InnerVT
: MVT::fixedlen_vector_valuetypes()) {
235 setTruncStoreAction(VT
, InnerVT
, Expand
);
236 setLoadExtAction(ISD::SEXTLOAD
, VT
, InnerVT
, Expand
);
237 setLoadExtAction(ISD::ZEXTLOAD
, VT
, InnerVT
, Expand
);
238 setLoadExtAction(ISD::EXTLOAD
, VT
, InnerVT
, Expand
);
240 // By default everything must be expanded. Then we will selectively turn
241 // on ones that can be effectively codegen'd.
242 for (unsigned Op
= 0; Op
< ISD::BUILTIN_OP_END
; ++Op
)
243 setOperationAction(Op
, VT
, Expand
);
246 for (MVT VT
: LSXVTs
) {
247 setOperationAction({ISD::LOAD
, ISD::STORE
}, VT
, Legal
);
248 setOperationAction(ISD::BITCAST
, VT
, Legal
);
249 setOperationAction(ISD::UNDEF
, VT
, Legal
);
251 setOperationAction(ISD::INSERT_VECTOR_ELT
, VT
, Custom
);
252 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, VT
, Legal
);
253 setOperationAction(ISD::BUILD_VECTOR
, VT
, Custom
);
255 setOperationAction(ISD::SETCC
, VT
, Legal
);
256 setOperationAction(ISD::VSELECT
, VT
, Legal
);
257 setOperationAction(ISD::VECTOR_SHUFFLE
, VT
, Custom
);
259 for (MVT VT
: {MVT::v16i8
, MVT::v8i16
, MVT::v4i32
, MVT::v2i64
}) {
260 setOperationAction({ISD::ADD
, ISD::SUB
}, VT
, Legal
);
261 setOperationAction({ISD::UMAX
, ISD::UMIN
, ISD::SMAX
, ISD::SMIN
}, VT
,
263 setOperationAction({ISD::MUL
, ISD::SDIV
, ISD::SREM
, ISD::UDIV
, ISD::UREM
},
265 setOperationAction({ISD::AND
, ISD::OR
, ISD::XOR
}, VT
, Legal
);
266 setOperationAction({ISD::SHL
, ISD::SRA
, ISD::SRL
}, VT
, Legal
);
267 setOperationAction({ISD::CTPOP
, ISD::CTLZ
}, VT
, Legal
);
268 setOperationAction({ISD::MULHS
, ISD::MULHU
}, VT
, Legal
);
270 {ISD::SETNE
, ISD::SETGE
, ISD::SETGT
, ISD::SETUGE
, ISD::SETUGT
}, VT
,
273 for (MVT VT
: {MVT::v8i16
, MVT::v4i32
, MVT::v2i64
})
274 setOperationAction(ISD::BSWAP
, VT
, Legal
);
275 for (MVT VT
: {MVT::v4i32
, MVT::v2i64
}) {
276 setOperationAction({ISD::SINT_TO_FP
, ISD::UINT_TO_FP
}, VT
, Legal
);
277 setOperationAction({ISD::FP_TO_SINT
, ISD::FP_TO_UINT
}, VT
, Legal
);
279 for (MVT VT
: {MVT::v4f32
, MVT::v2f64
}) {
280 setOperationAction({ISD::FADD
, ISD::FSUB
}, VT
, Legal
);
281 setOperationAction({ISD::FMUL
, ISD::FDIV
}, VT
, Legal
);
282 setOperationAction(ISD::FMA
, VT
, Legal
);
283 setOperationAction(ISD::FSQRT
, VT
, Legal
);
284 setOperationAction(ISD::FNEG
, VT
, Legal
);
285 setCondCodeAction({ISD::SETGE
, ISD::SETGT
, ISD::SETOGE
, ISD::SETOGT
,
286 ISD::SETUGE
, ISD::SETUGT
},
289 setOperationAction(ISD::CTPOP
, GRLenVT
, Legal
);
290 setOperationAction(ISD::FCEIL
, {MVT::f32
, MVT::f64
}, Legal
);
291 setOperationAction(ISD::FFLOOR
, {MVT::f32
, MVT::f64
}, Legal
);
292 setOperationAction(ISD::FTRUNC
, {MVT::f32
, MVT::f64
}, Legal
);
293 setOperationAction(ISD::FROUNDEVEN
, {MVT::f32
, MVT::f64
}, Legal
);
296 // Set operations for 'LASX' feature.
298 if (Subtarget
.hasExtLASX()) {
299 for (MVT VT
: LASXVTs
) {
300 setOperationAction({ISD::LOAD
, ISD::STORE
}, VT
, Legal
);
301 setOperationAction(ISD::BITCAST
, VT
, Legal
);
302 setOperationAction(ISD::UNDEF
, VT
, Legal
);
304 setOperationAction(ISD::INSERT_VECTOR_ELT
, VT
, Custom
);
305 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, VT
, Custom
);
306 setOperationAction(ISD::BUILD_VECTOR
, VT
, Custom
);
307 setOperationAction(ISD::CONCAT_VECTORS
, VT
, Legal
);
309 setOperationAction(ISD::SETCC
, VT
, Legal
);
310 setOperationAction(ISD::VSELECT
, VT
, Legal
);
311 setOperationAction(ISD::VECTOR_SHUFFLE
, VT
, Custom
);
313 for (MVT VT
: {MVT::v4i64
, MVT::v8i32
, MVT::v16i16
, MVT::v32i8
}) {
314 setOperationAction({ISD::ADD
, ISD::SUB
}, VT
, Legal
);
315 setOperationAction({ISD::UMAX
, ISD::UMIN
, ISD::SMAX
, ISD::SMIN
}, VT
,
317 setOperationAction({ISD::MUL
, ISD::SDIV
, ISD::SREM
, ISD::UDIV
, ISD::UREM
},
319 setOperationAction({ISD::AND
, ISD::OR
, ISD::XOR
}, VT
, Legal
);
320 setOperationAction({ISD::SHL
, ISD::SRA
, ISD::SRL
}, VT
, Legal
);
321 setOperationAction({ISD::CTPOP
, ISD::CTLZ
}, VT
, Legal
);
322 setOperationAction({ISD::MULHS
, ISD::MULHU
}, VT
, Legal
);
324 {ISD::SETNE
, ISD::SETGE
, ISD::SETGT
, ISD::SETUGE
, ISD::SETUGT
}, VT
,
327 for (MVT VT
: {MVT::v16i16
, MVT::v8i32
, MVT::v4i64
})
328 setOperationAction(ISD::BSWAP
, VT
, Legal
);
329 for (MVT VT
: {MVT::v8i32
, MVT::v4i32
, MVT::v4i64
}) {
330 setOperationAction({ISD::SINT_TO_FP
, ISD::UINT_TO_FP
}, VT
, Legal
);
331 setOperationAction({ISD::FP_TO_SINT
, ISD::FP_TO_UINT
}, VT
, Legal
);
333 for (MVT VT
: {MVT::v8f32
, MVT::v4f64
}) {
334 setOperationAction({ISD::FADD
, ISD::FSUB
}, VT
, Legal
);
335 setOperationAction({ISD::FMUL
, ISD::FDIV
}, VT
, Legal
);
336 setOperationAction(ISD::FMA
, VT
, Legal
);
337 setOperationAction(ISD::FSQRT
, VT
, Legal
);
338 setOperationAction(ISD::FNEG
, VT
, Legal
);
339 setCondCodeAction({ISD::SETGE
, ISD::SETGT
, ISD::SETOGE
, ISD::SETOGT
,
340 ISD::SETUGE
, ISD::SETUGT
},
345 // Set DAG combine for LA32 and LA64.
347 setTargetDAGCombine(ISD::AND
);
348 setTargetDAGCombine(ISD::OR
);
349 setTargetDAGCombine(ISD::SRL
);
350 setTargetDAGCombine(ISD::SETCC
);
352 // Set DAG combine for 'LSX' feature.
354 if (Subtarget
.hasExtLSX())
355 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN
);
357 // Compute derived properties from the register classes.
358 computeRegisterProperties(Subtarget
.getRegisterInfo());
360 setStackPointerRegisterToSaveRestore(LoongArch::R3
);
362 setBooleanContents(ZeroOrOneBooleanContent
);
363 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent
);
365 setMaxAtomicSizeInBitsSupported(Subtarget
.getGRLen());
367 setMinCmpXchgSizeInBits(32);
369 // Function alignments.
370 setMinFunctionAlignment(Align(4));
371 // Set preferred alignments.
372 setPrefFunctionAlignment(Subtarget
.getPrefFunctionAlignment());
373 setPrefLoopAlignment(Subtarget
.getPrefLoopAlignment());
374 setMaxBytesForAlignment(Subtarget
.getMaxBytesForAlignment());
376 // cmpxchg sizes down to 8 bits become legal if LAMCAS is available.
377 if (Subtarget
.hasLAMCAS())
378 setMinCmpXchgSizeInBits(8);
381 bool LoongArchTargetLowering::isOffsetFoldingLegal(
382 const GlobalAddressSDNode
*GA
) const {
383 // In order to maximise the opportunity for common subexpression elimination,
384 // keep a separate ADD node for the global address offset instead of folding
385 // it in the global address node. Later peephole optimisations may choose to
386 // fold it back in when profitable.
390 SDValue
LoongArchTargetLowering::LowerOperation(SDValue Op
,
391 SelectionDAG
&DAG
) const {
392 switch (Op
.getOpcode()) {
393 case ISD::ATOMIC_FENCE
:
394 return lowerATOMIC_FENCE(Op
, DAG
);
395 case ISD::EH_DWARF_CFA
:
396 return lowerEH_DWARF_CFA(Op
, DAG
);
397 case ISD::GlobalAddress
:
398 return lowerGlobalAddress(Op
, DAG
);
399 case ISD::GlobalTLSAddress
:
400 return lowerGlobalTLSAddress(Op
, DAG
);
401 case ISD::INTRINSIC_WO_CHAIN
:
402 return lowerINTRINSIC_WO_CHAIN(Op
, DAG
);
403 case ISD::INTRINSIC_W_CHAIN
:
404 return lowerINTRINSIC_W_CHAIN(Op
, DAG
);
405 case ISD::INTRINSIC_VOID
:
406 return lowerINTRINSIC_VOID(Op
, DAG
);
407 case ISD::BlockAddress
:
408 return lowerBlockAddress(Op
, DAG
);
410 return lowerJumpTable(Op
, DAG
);
412 return lowerShiftLeftParts(Op
, DAG
);
414 return lowerShiftRightParts(Op
, DAG
, true);
416 return lowerShiftRightParts(Op
, DAG
, false);
417 case ISD::ConstantPool
:
418 return lowerConstantPool(Op
, DAG
);
419 case ISD::FP_TO_SINT
:
420 return lowerFP_TO_SINT(Op
, DAG
);
422 return lowerBITCAST(Op
, DAG
);
423 case ISD::UINT_TO_FP
:
424 return lowerUINT_TO_FP(Op
, DAG
);
425 case ISD::SINT_TO_FP
:
426 return lowerSINT_TO_FP(Op
, DAG
);
428 return lowerVASTART(Op
, DAG
);
430 return lowerFRAMEADDR(Op
, DAG
);
431 case ISD::RETURNADDR
:
432 return lowerRETURNADDR(Op
, DAG
);
433 case ISD::WRITE_REGISTER
:
434 return lowerWRITE_REGISTER(Op
, DAG
);
435 case ISD::INSERT_VECTOR_ELT
:
436 return lowerINSERT_VECTOR_ELT(Op
, DAG
);
437 case ISD::EXTRACT_VECTOR_ELT
:
438 return lowerEXTRACT_VECTOR_ELT(Op
, DAG
);
439 case ISD::BUILD_VECTOR
:
440 return lowerBUILD_VECTOR(Op
, DAG
);
441 case ISD::VECTOR_SHUFFLE
:
442 return lowerVECTOR_SHUFFLE(Op
, DAG
);
447 /// Determine whether a range fits a regular pattern of values.
448 /// This function accounts for the possibility of jumping over the End iterator.
449 template <typename ValType
>
451 fitsRegularPattern(typename SmallVectorImpl
<ValType
>::const_iterator Begin
,
452 unsigned CheckStride
,
453 typename SmallVectorImpl
<ValType
>::const_iterator End
,
454 ValType ExpectedIndex
, unsigned ExpectedIndexStride
) {
458 if (*I
!= -1 && *I
!= ExpectedIndex
)
460 ExpectedIndex
+= ExpectedIndexStride
;
462 // Incrementing past End is undefined behaviour so we must increment one
463 // step at a time and check for End at each step.
464 for (unsigned n
= 0; n
< CheckStride
&& I
!= End
; ++n
, ++I
)
465 ; // Empty loop body.
470 /// Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
472 /// VREPLVEI performs vector broadcast based on an element specified by an
473 /// integer immediate, with its mask being similar to:
475 /// where x is any valid index.
477 /// When undef's appear in the mask they are treated as if they were whatever
478 /// value is necessary in order to fit the above form.
479 static SDValue
lowerVECTOR_SHUFFLE_VREPLVEI(const SDLoc
&DL
, ArrayRef
<int> Mask
,
480 MVT VT
, SDValue V1
, SDValue V2
,
483 for (const auto &M
: Mask
) {
490 if (SplatIndex
== -1)
491 return DAG
.getUNDEF(VT
);
493 assert(SplatIndex
< (int)Mask
.size() && "Out of bounds mask index");
494 if (fitsRegularPattern
<int>(Mask
.begin(), 1, Mask
.end(), SplatIndex
, 0)) {
495 APInt
Imm(64, SplatIndex
);
496 return DAG
.getNode(LoongArchISD::VREPLVEI
, DL
, VT
, V1
,
497 DAG
.getConstant(Imm
, DL
, MVT::i64
));
503 /// Lower VECTOR_SHUFFLE into VSHUF4I (if possible).
505 /// VSHUF4I splits the vector into blocks of four elements, then shuffles these
506 /// elements according to a <4 x i2> constant (encoded as an integer immediate).
508 /// It is therefore possible to lower into VSHUF4I when the mask takes the form:
509 /// <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...>
510 /// When undef's appear they are treated as if they were whatever value is
511 /// necessary in order to fit the above forms.
514 /// %2 = shufflevector <8 x i16> %0, <8 x i16> undef,
515 /// <8 x i32> <i32 3, i32 2, i32 1, i32 0,
516 /// i32 7, i32 6, i32 5, i32 4>
518 /// (VSHUF4I_H $v0, $v1, 27)
519 /// where the 27 comes from:
520 /// 3 + (2 << 2) + (1 << 4) + (0 << 6)
521 static SDValue
lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc
&DL
, ArrayRef
<int> Mask
,
522 MVT VT
, SDValue V1
, SDValue V2
,
525 // When the size is less than 4, lower cost instructions may be used.
529 int SubMask
[4] = {-1, -1, -1, -1};
530 for (unsigned i
= 0; i
< 4; ++i
) {
531 for (unsigned j
= i
; j
< Mask
.size(); j
+= 4) {
534 // Convert from vector index to 4-element subvector index
535 // If an index refers to an element outside of the subvector then give up
538 if (Idx
< 0 || Idx
>= 4)
542 // If the mask has an undef, replace it with the current index.
543 // Note that it might still be undef if the current index is also undef
544 if (SubMask
[i
] == -1)
546 // Check that non-undef values are the same as in the mask. If they
547 // aren't then give up
548 else if (Idx
!= -1 && Idx
!= SubMask
[i
])
553 // Calculate the immediate. Replace any remaining undefs with zero
555 for (int i
= 3; i
>= 0; --i
) {
556 int Idx
= SubMask
[i
];
565 return DAG
.getNode(LoongArchISD::VSHUF4I
, DL
, VT
, V1
,
566 DAG
.getConstant(Imm
, DL
, MVT::i64
));
569 /// Lower VECTOR_SHUFFLE into VPACKEV (if possible).
571 /// VPACKEV interleaves the even elements from each vector.
573 /// It is possible to lower into VPACKEV when the mask consists of two of the
574 /// following forms interleaved:
576 /// <n, n+2, n+4, ...>
577 /// where n is the number of elements in the vector.
579 /// <0, 0, 2, 2, 4, 4, ...>
580 /// <0, n, 2, n+2, 4, n+4, ...>
582 /// When undef's appear in the mask they are treated as if they were whatever
583 /// value is necessary in order to fit the above forms.
584 static SDValue
lowerVECTOR_SHUFFLE_VPACKEV(const SDLoc
&DL
, ArrayRef
<int> Mask
,
585 MVT VT
, SDValue V1
, SDValue V2
,
588 const auto &Begin
= Mask
.begin();
589 const auto &End
= Mask
.end();
590 SDValue OriV1
= V1
, OriV2
= V2
;
592 if (fitsRegularPattern
<int>(Begin
, 2, End
, 0, 2))
594 else if (fitsRegularPattern
<int>(Begin
, 2, End
, Mask
.size(), 2))
599 if (fitsRegularPattern
<int>(Begin
+ 1, 2, End
, 0, 2))
601 else if (fitsRegularPattern
<int>(Begin
+ 1, 2, End
, Mask
.size(), 2))
606 return DAG
.getNode(LoongArchISD::VPACKEV
, DL
, VT
, V2
, V1
);
609 /// Lower VECTOR_SHUFFLE into VPACKOD (if possible).
611 /// VPACKOD interleaves the odd elements from each vector.
613 /// It is possible to lower into VPACKOD when the mask consists of two of the
614 /// following forms interleaved:
616 /// <n+1, n+3, n+5, ...>
617 /// where n is the number of elements in the vector.
619 /// <1, 1, 3, 3, 5, 5, ...>
620 /// <1, n+1, 3, n+3, 5, n+5, ...>
622 /// When undef's appear in the mask they are treated as if they were whatever
623 /// value is necessary in order to fit the above forms.
624 static SDValue
lowerVECTOR_SHUFFLE_VPACKOD(const SDLoc
&DL
, ArrayRef
<int> Mask
,
625 MVT VT
, SDValue V1
, SDValue V2
,
628 const auto &Begin
= Mask
.begin();
629 const auto &End
= Mask
.end();
630 SDValue OriV1
= V1
, OriV2
= V2
;
632 if (fitsRegularPattern
<int>(Begin
, 2, End
, 1, 2))
634 else if (fitsRegularPattern
<int>(Begin
, 2, End
, Mask
.size() + 1, 2))
639 if (fitsRegularPattern
<int>(Begin
+ 1, 2, End
, 1, 2))
641 else if (fitsRegularPattern
<int>(Begin
+ 1, 2, End
, Mask
.size() + 1, 2))
646 return DAG
.getNode(LoongArchISD::VPACKOD
, DL
, VT
, V2
, V1
);
649 /// Lower VECTOR_SHUFFLE into VILVH (if possible).
651 /// VILVH interleaves consecutive elements from the left (highest-indexed) half
654 /// It is possible to lower into VILVH when the mask consists of two of the
655 /// following forms interleaved:
656 /// <x, x+1, x+2, ...>
657 /// <n+x, n+x+1, n+x+2, ...>
658 /// where n is the number of elements in the vector and x is half n.
660 /// <x, x, x+1, x+1, x+2, x+2, ...>
661 /// <x, n+x, x+1, n+x+1, x+2, n+x+2, ...>
663 /// When undef's appear in the mask they are treated as if they were whatever
664 /// value is necessary in order to fit the above forms.
665 static SDValue
lowerVECTOR_SHUFFLE_VILVH(const SDLoc
&DL
, ArrayRef
<int> Mask
,
666 MVT VT
, SDValue V1
, SDValue V2
,
669 const auto &Begin
= Mask
.begin();
670 const auto &End
= Mask
.end();
671 unsigned HalfSize
= Mask
.size() / 2;
672 SDValue OriV1
= V1
, OriV2
= V2
;
674 if (fitsRegularPattern
<int>(Begin
, 2, End
, HalfSize
, 1))
676 else if (fitsRegularPattern
<int>(Begin
, 2, End
, Mask
.size() + HalfSize
, 1))
681 if (fitsRegularPattern
<int>(Begin
+ 1, 2, End
, HalfSize
, 1))
683 else if (fitsRegularPattern
<int>(Begin
+ 1, 2, End
, Mask
.size() + HalfSize
,
689 return DAG
.getNode(LoongArchISD::VILVH
, DL
, VT
, V2
, V1
);
692 /// Lower VECTOR_SHUFFLE into VILVL (if possible).
694 /// VILVL interleaves consecutive elements from the right (lowest-indexed) half
697 /// It is possible to lower into VILVL when the mask consists of two of the
698 /// following forms interleaved:
700 /// <n, n+1, n+2, ...>
701 /// where n is the number of elements in the vector.
703 /// <0, 0, 1, 1, 2, 2, ...>
704 /// <0, n, 1, n+1, 2, n+2, ...>
706 /// When undef's appear in the mask they are treated as if they were whatever
707 /// value is necessary in order to fit the above forms.
708 static SDValue
lowerVECTOR_SHUFFLE_VILVL(const SDLoc
&DL
, ArrayRef
<int> Mask
,
709 MVT VT
, SDValue V1
, SDValue V2
,
712 const auto &Begin
= Mask
.begin();
713 const auto &End
= Mask
.end();
714 SDValue OriV1
= V1
, OriV2
= V2
;
716 if (fitsRegularPattern
<int>(Begin
, 2, End
, 0, 1))
718 else if (fitsRegularPattern
<int>(Begin
, 2, End
, Mask
.size(), 1))
723 if (fitsRegularPattern
<int>(Begin
+ 1, 2, End
, 0, 1))
725 else if (fitsRegularPattern
<int>(Begin
+ 1, 2, End
, Mask
.size(), 1))
730 return DAG
.getNode(LoongArchISD::VILVL
, DL
, VT
, V2
, V1
);
733 /// Lower VECTOR_SHUFFLE into VPICKEV (if possible).
735 /// VPICKEV copies the even elements of each vector into the result vector.
737 /// It is possible to lower into VPICKEV when the mask consists of two of the
738 /// following forms concatenated:
740 /// <n, n+2, n+4, ...>
741 /// where n is the number of elements in the vector.
743 /// <0, 2, 4, ..., 0, 2, 4, ...>
744 /// <0, 2, 4, ..., n, n+2, n+4, ...>
746 /// When undef's appear in the mask they are treated as if they were whatever
747 /// value is necessary in order to fit the above forms.
748 static SDValue
lowerVECTOR_SHUFFLE_VPICKEV(const SDLoc
&DL
, ArrayRef
<int> Mask
,
749 MVT VT
, SDValue V1
, SDValue V2
,
752 const auto &Begin
= Mask
.begin();
753 const auto &Mid
= Mask
.begin() + Mask
.size() / 2;
754 const auto &End
= Mask
.end();
755 SDValue OriV1
= V1
, OriV2
= V2
;
757 if (fitsRegularPattern
<int>(Begin
, 1, Mid
, 0, 2))
759 else if (fitsRegularPattern
<int>(Begin
, 1, Mid
, Mask
.size(), 2))
764 if (fitsRegularPattern
<int>(Mid
, 1, End
, 0, 2))
766 else if (fitsRegularPattern
<int>(Mid
, 1, End
, Mask
.size(), 2))
772 return DAG
.getNode(LoongArchISD::VPICKEV
, DL
, VT
, V2
, V1
);
775 /// Lower VECTOR_SHUFFLE into VPICKOD (if possible).
777 /// VPICKOD copies the odd elements of each vector into the result vector.
779 /// It is possible to lower into VPICKOD when the mask consists of two of the
780 /// following forms concatenated:
782 /// <n+1, n+3, n+5, ...>
783 /// where n is the number of elements in the vector.
785 /// <1, 3, 5, ..., 1, 3, 5, ...>
786 /// <1, 3, 5, ..., n+1, n+3, n+5, ...>
788 /// When undef's appear in the mask they are treated as if they were whatever
789 /// value is necessary in order to fit the above forms.
790 static SDValue
lowerVECTOR_SHUFFLE_VPICKOD(const SDLoc
&DL
, ArrayRef
<int> Mask
,
791 MVT VT
, SDValue V1
, SDValue V2
,
794 const auto &Begin
= Mask
.begin();
795 const auto &Mid
= Mask
.begin() + Mask
.size() / 2;
796 const auto &End
= Mask
.end();
797 SDValue OriV1
= V1
, OriV2
= V2
;
799 if (fitsRegularPattern
<int>(Begin
, 1, Mid
, 1, 2))
801 else if (fitsRegularPattern
<int>(Begin
, 1, Mid
, Mask
.size() + 1, 2))
806 if (fitsRegularPattern
<int>(Mid
, 1, End
, 1, 2))
808 else if (fitsRegularPattern
<int>(Mid
, 1, End
, Mask
.size() + 1, 2))
813 return DAG
.getNode(LoongArchISD::VPICKOD
, DL
, VT
, V2
, V1
);
816 /// Lower VECTOR_SHUFFLE into VSHUF.
818 /// This mostly consists of converting the shuffle mask into a BUILD_VECTOR and
819 /// adding it as an operand to the resulting VSHUF.
820 static SDValue
lowerVECTOR_SHUFFLE_VSHUF(const SDLoc
&DL
, ArrayRef
<int> Mask
,
821 MVT VT
, SDValue V1
, SDValue V2
,
824 SmallVector
<SDValue
, 16> Ops
;
826 Ops
.push_back(DAG
.getConstant(M
, DL
, MVT::i64
));
828 EVT MaskVecTy
= VT
.changeVectorElementTypeToInteger();
829 SDValue MaskVec
= DAG
.getBuildVector(MaskVecTy
, DL
, Ops
);
831 // VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion.
832 // <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11>
833 // VSHF concatenates the vectors in a bitwise fashion:
834 // <0b00, 0b01> + <0b10, 0b11> ->
835 // 0b0100 + 0b1110 -> 0b01001110
836 // <0b10, 0b11, 0b00, 0b01>
837 // We must therefore swap the operands to get the correct result.
838 return DAG
.getNode(LoongArchISD::VSHUF
, DL
, VT
, MaskVec
, V2
, V1
);
841 /// Dispatching routine to lower various 128-bit LoongArch vector shuffles.
843 /// This routine breaks down the specific type of 128-bit shuffle and
844 /// dispatches to the lowering routines accordingly.
845 static SDValue
lower128BitShuffle(const SDLoc
&DL
, ArrayRef
<int> Mask
, MVT VT
,
846 SDValue V1
, SDValue V2
, SelectionDAG
&DAG
) {
847 assert((VT
.SimpleTy
== MVT::v16i8
|| VT
.SimpleTy
== MVT::v8i16
||
848 VT
.SimpleTy
== MVT::v4i32
|| VT
.SimpleTy
== MVT::v2i64
||
849 VT
.SimpleTy
== MVT::v4f32
|| VT
.SimpleTy
== MVT::v2f64
) &&
850 "Vector type is unsupported for lsx!");
851 assert(V1
.getSimpleValueType() == V2
.getSimpleValueType() &&
852 "Two operands have different types!");
853 assert(VT
.getVectorNumElements() == Mask
.size() &&
854 "Unexpected mask size for shuffle!");
855 assert(Mask
.size() % 2 == 0 && "Expected even mask size.");
858 // TODO: Add more comparison patterns.
860 if ((Result
= lowerVECTOR_SHUFFLE_VREPLVEI(DL
, Mask
, VT
, V1
, V2
, DAG
)))
862 if ((Result
= lowerVECTOR_SHUFFLE_VSHUF4I(DL
, Mask
, VT
, V1
, V2
, DAG
)))
865 // TODO: This comment may be enabled in the future to better match the
866 // pattern for instruction selection.
870 // It is recommended not to change the pattern comparison order for better
872 if ((Result
= lowerVECTOR_SHUFFLE_VPACKEV(DL
, Mask
, VT
, V1
, V2
, DAG
)))
874 if ((Result
= lowerVECTOR_SHUFFLE_VPACKOD(DL
, Mask
, VT
, V1
, V2
, DAG
)))
876 if ((Result
= lowerVECTOR_SHUFFLE_VILVH(DL
, Mask
, VT
, V1
, V2
, DAG
)))
878 if ((Result
= lowerVECTOR_SHUFFLE_VILVL(DL
, Mask
, VT
, V1
, V2
, DAG
)))
880 if ((Result
= lowerVECTOR_SHUFFLE_VPICKEV(DL
, Mask
, VT
, V1
, V2
, DAG
)))
882 if ((Result
= lowerVECTOR_SHUFFLE_VPICKOD(DL
, Mask
, VT
, V1
, V2
, DAG
)))
884 if ((Result
= lowerVECTOR_SHUFFLE_VSHUF(DL
, Mask
, VT
, V1
, V2
, DAG
)))
890 /// Lower VECTOR_SHUFFLE into XVREPLVEI (if possible).
892 /// It is a XVREPLVEI when the mask is:
893 /// <x, x, x, ..., x+n, x+n, x+n, ...>
894 /// where the number of x is equal to n and n is half the length of vector.
896 /// When undef's appear in the mask they are treated as if they were whatever
897 /// value is necessary in order to fit the above form.
898 static SDValue
lowerVECTOR_SHUFFLE_XVREPLVEI(const SDLoc
&DL
,
899 ArrayRef
<int> Mask
, MVT VT
,
900 SDValue V1
, SDValue V2
,
903 for (const auto &M
: Mask
) {
910 if (SplatIndex
== -1)
911 return DAG
.getUNDEF(VT
);
913 const auto &Begin
= Mask
.begin();
914 const auto &End
= Mask
.end();
915 unsigned HalfSize
= Mask
.size() / 2;
917 assert(SplatIndex
< (int)Mask
.size() && "Out of bounds mask index");
918 if (fitsRegularPattern
<int>(Begin
, 1, End
- HalfSize
, SplatIndex
, 0) &&
919 fitsRegularPattern
<int>(Begin
+ HalfSize
, 1, End
, SplatIndex
+ HalfSize
,
921 APInt
Imm(64, SplatIndex
);
922 return DAG
.getNode(LoongArchISD::VREPLVEI
, DL
, VT
, V1
,
923 DAG
.getConstant(Imm
, DL
, MVT::i64
));
929 /// Lower VECTOR_SHUFFLE into XVSHUF4I (if possible).
930 static SDValue
lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc
&DL
, ArrayRef
<int> Mask
,
931 MVT VT
, SDValue V1
, SDValue V2
,
933 // When the size is less than or equal to 4, lower cost instructions may be
935 if (Mask
.size() <= 4)
937 return lowerVECTOR_SHUFFLE_VSHUF4I(DL
, Mask
, VT
, V1
, V2
, DAG
);
940 /// Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
941 static SDValue
lowerVECTOR_SHUFFLE_XVPACKEV(const SDLoc
&DL
, ArrayRef
<int> Mask
,
942 MVT VT
, SDValue V1
, SDValue V2
,
944 return lowerVECTOR_SHUFFLE_VPACKEV(DL
, Mask
, VT
, V1
, V2
, DAG
);
947 /// Lower VECTOR_SHUFFLE into XVPACKOD (if possible).
948 static SDValue
lowerVECTOR_SHUFFLE_XVPACKOD(const SDLoc
&DL
, ArrayRef
<int> Mask
,
949 MVT VT
, SDValue V1
, SDValue V2
,
951 return lowerVECTOR_SHUFFLE_VPACKOD(DL
, Mask
, VT
, V1
, V2
, DAG
);
954 /// Lower VECTOR_SHUFFLE into XVILVH (if possible).
955 static SDValue
lowerVECTOR_SHUFFLE_XVILVH(const SDLoc
&DL
, ArrayRef
<int> Mask
,
956 MVT VT
, SDValue V1
, SDValue V2
,
959 const auto &Begin
= Mask
.begin();
960 const auto &End
= Mask
.end();
961 unsigned HalfSize
= Mask
.size() / 2;
962 unsigned LeftSize
= HalfSize
/ 2;
963 SDValue OriV1
= V1
, OriV2
= V2
;
965 if (fitsRegularPattern
<int>(Begin
, 2, End
- HalfSize
, HalfSize
- LeftSize
,
967 fitsRegularPattern
<int>(Begin
+ HalfSize
, 2, End
, HalfSize
+ LeftSize
, 1))
969 else if (fitsRegularPattern
<int>(Begin
, 2, End
- HalfSize
,
970 Mask
.size() + HalfSize
- LeftSize
, 1) &&
971 fitsRegularPattern
<int>(Begin
+ HalfSize
, 2, End
,
972 Mask
.size() + HalfSize
+ LeftSize
, 1))
977 if (fitsRegularPattern
<int>(Begin
+ 1, 2, End
- HalfSize
, HalfSize
- LeftSize
,
979 fitsRegularPattern
<int>(Begin
+ 1 + HalfSize
, 2, End
, HalfSize
+ LeftSize
,
982 else if (fitsRegularPattern
<int>(Begin
+ 1, 2, End
- HalfSize
,
983 Mask
.size() + HalfSize
- LeftSize
, 1) &&
984 fitsRegularPattern
<int>(Begin
+ 1 + HalfSize
, 2, End
,
985 Mask
.size() + HalfSize
+ LeftSize
, 1))
990 return DAG
.getNode(LoongArchISD::VILVH
, DL
, VT
, V2
, V1
);
993 /// Lower VECTOR_SHUFFLE into XVILVL (if possible).
994 static SDValue
lowerVECTOR_SHUFFLE_XVILVL(const SDLoc
&DL
, ArrayRef
<int> Mask
,
995 MVT VT
, SDValue V1
, SDValue V2
,
998 const auto &Begin
= Mask
.begin();
999 const auto &End
= Mask
.end();
1000 unsigned HalfSize
= Mask
.size() / 2;
1001 SDValue OriV1
= V1
, OriV2
= V2
;
1003 if (fitsRegularPattern
<int>(Begin
, 2, End
- HalfSize
, 0, 1) &&
1004 fitsRegularPattern
<int>(Begin
+ HalfSize
, 2, End
, HalfSize
, 1))
1006 else if (fitsRegularPattern
<int>(Begin
, 2, End
- HalfSize
, Mask
.size(), 1) &&
1007 fitsRegularPattern
<int>(Begin
+ HalfSize
, 2, End
,
1008 Mask
.size() + HalfSize
, 1))
1013 if (fitsRegularPattern
<int>(Begin
+ 1, 2, End
- HalfSize
, 0, 1) &&
1014 fitsRegularPattern
<int>(Begin
+ 1 + HalfSize
, 2, End
, HalfSize
, 1))
1016 else if (fitsRegularPattern
<int>(Begin
+ 1, 2, End
- HalfSize
, Mask
.size(),
1018 fitsRegularPattern
<int>(Begin
+ 1 + HalfSize
, 2, End
,
1019 Mask
.size() + HalfSize
, 1))
1024 return DAG
.getNode(LoongArchISD::VILVL
, DL
, VT
, V2
, V1
);
1027 /// Lower VECTOR_SHUFFLE into XVPICKEV (if possible).
1028 static SDValue
lowerVECTOR_SHUFFLE_XVPICKEV(const SDLoc
&DL
, ArrayRef
<int> Mask
,
1029 MVT VT
, SDValue V1
, SDValue V2
,
1030 SelectionDAG
&DAG
) {
1032 const auto &Begin
= Mask
.begin();
1033 const auto &LeftMid
= Mask
.begin() + Mask
.size() / 4;
1034 const auto &Mid
= Mask
.begin() + Mask
.size() / 2;
1035 const auto &RightMid
= Mask
.end() - Mask
.size() / 4;
1036 const auto &End
= Mask
.end();
1037 unsigned HalfSize
= Mask
.size() / 2;
1038 SDValue OriV1
= V1
, OriV2
= V2
;
1040 if (fitsRegularPattern
<int>(Begin
, 1, LeftMid
, 0, 2) &&
1041 fitsRegularPattern
<int>(Mid
, 1, RightMid
, HalfSize
, 2))
1043 else if (fitsRegularPattern
<int>(Begin
, 1, LeftMid
, Mask
.size(), 2) &&
1044 fitsRegularPattern
<int>(Mid
, 1, RightMid
, Mask
.size() + HalfSize
, 2))
1049 if (fitsRegularPattern
<int>(LeftMid
, 1, Mid
, 0, 2) &&
1050 fitsRegularPattern
<int>(RightMid
, 1, End
, HalfSize
, 2))
1052 else if (fitsRegularPattern
<int>(LeftMid
, 1, Mid
, Mask
.size(), 2) &&
1053 fitsRegularPattern
<int>(RightMid
, 1, End
, Mask
.size() + HalfSize
, 2))
1059 return DAG
.getNode(LoongArchISD::VPICKEV
, DL
, VT
, V2
, V1
);
1062 /// Lower VECTOR_SHUFFLE into XVPICKOD (if possible).
1063 static SDValue
lowerVECTOR_SHUFFLE_XVPICKOD(const SDLoc
&DL
, ArrayRef
<int> Mask
,
1064 MVT VT
, SDValue V1
, SDValue V2
,
1065 SelectionDAG
&DAG
) {
1067 const auto &Begin
= Mask
.begin();
1068 const auto &LeftMid
= Mask
.begin() + Mask
.size() / 4;
1069 const auto &Mid
= Mask
.begin() + Mask
.size() / 2;
1070 const auto &RightMid
= Mask
.end() - Mask
.size() / 4;
1071 const auto &End
= Mask
.end();
1072 unsigned HalfSize
= Mask
.size() / 2;
1073 SDValue OriV1
= V1
, OriV2
= V2
;
1075 if (fitsRegularPattern
<int>(Begin
, 1, LeftMid
, 1, 2) &&
1076 fitsRegularPattern
<int>(Mid
, 1, RightMid
, HalfSize
+ 1, 2))
1078 else if (fitsRegularPattern
<int>(Begin
, 1, LeftMid
, Mask
.size() + 1, 2) &&
1079 fitsRegularPattern
<int>(Mid
, 1, RightMid
, Mask
.size() + HalfSize
+ 1,
1085 if (fitsRegularPattern
<int>(LeftMid
, 1, Mid
, 1, 2) &&
1086 fitsRegularPattern
<int>(RightMid
, 1, End
, HalfSize
+ 1, 2))
1088 else if (fitsRegularPattern
<int>(LeftMid
, 1, Mid
, Mask
.size() + 1, 2) &&
1089 fitsRegularPattern
<int>(RightMid
, 1, End
, Mask
.size() + HalfSize
+ 1,
1095 return DAG
.getNode(LoongArchISD::VPICKOD
, DL
, VT
, V2
, V1
);
1098 /// Lower VECTOR_SHUFFLE into XVSHUF (if possible).
1099 static SDValue
lowerVECTOR_SHUFFLE_XVSHUF(const SDLoc
&DL
, ArrayRef
<int> Mask
,
1100 MVT VT
, SDValue V1
, SDValue V2
,
1101 SelectionDAG
&DAG
) {
1103 int MaskSize
= Mask
.size();
1104 int HalfSize
= Mask
.size() / 2;
1105 const auto &Begin
= Mask
.begin();
1106 const auto &Mid
= Mask
.begin() + HalfSize
;
1107 const auto &End
= Mask
.end();
1109 // VECTOR_SHUFFLE concatenates the vectors:
1110 // <0, 1, 2, 3, 4, 5, 6, 7> + <8, 9, 10, 11, 12, 13, 14, 15>
1112 // <0, 1, 2, 3, 8, 9, 10, 11> <4, 5, 6, 7, 12, 13, 14, 15>
1114 // XVSHUF concatenates the vectors:
1115 // <a0, a1, a2, a3, b0, b1, b2, b3> + <a4, a5, a6, a7, b4, b5, b6, b7>
1117 // <a0, a1, a2, a3, a4, a5, a6, a7> + <b0, b1, b2, b3, b4, b5, b6, b7>
1118 SmallVector
<SDValue
, 8> MaskAlloc
;
1119 for (auto it
= Begin
; it
< Mid
; it
++) {
1120 if (*it
< 0) // UNDEF
1121 MaskAlloc
.push_back(DAG
.getTargetConstant(0, DL
, MVT::i64
));
1122 else if ((*it
>= 0 && *it
< HalfSize
) ||
1123 (*it
>= MaskSize
&& *it
<= MaskSize
+ HalfSize
)) {
1124 int M
= *it
< HalfSize
? *it
: *it
- HalfSize
;
1125 MaskAlloc
.push_back(DAG
.getTargetConstant(M
, DL
, MVT::i64
));
1129 assert((int)MaskAlloc
.size() == HalfSize
&& "xvshuf convert failed!");
1131 for (auto it
= Mid
; it
< End
; it
++) {
1132 if (*it
< 0) // UNDEF
1133 MaskAlloc
.push_back(DAG
.getTargetConstant(0, DL
, MVT::i64
));
1134 else if ((*it
>= HalfSize
&& *it
< MaskSize
) ||
1135 (*it
>= MaskSize
+ HalfSize
&& *it
< MaskSize
* 2)) {
1136 int M
= *it
< MaskSize
? *it
- HalfSize
: *it
- MaskSize
;
1137 MaskAlloc
.push_back(DAG
.getTargetConstant(M
, DL
, MVT::i64
));
1141 assert((int)MaskAlloc
.size() == MaskSize
&& "xvshuf convert failed!");
1143 EVT MaskVecTy
= VT
.changeVectorElementTypeToInteger();
1144 SDValue MaskVec
= DAG
.getBuildVector(MaskVecTy
, DL
, MaskAlloc
);
1145 return DAG
.getNode(LoongArchISD::VSHUF
, DL
, VT
, MaskVec
, V2
, V1
);
1148 /// Shuffle vectors by lane to generate more optimized instructions.
1149 /// 256-bit shuffles are always considered as 2-lane 128-bit shuffles.
1151 /// Therefore, except for the following four cases, other cases are regarded
1152 /// as cross-lane shuffles, where optimization is relatively limited.
1154 /// - Shuffle high, low lanes of two inputs vector
1155 /// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 3, 6>
1156 /// - Shuffle low, high lanes of two inputs vector
1157 /// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 0, 5>
1158 /// - Shuffle low, low lanes of two inputs vector
1159 /// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 3, 6>
1160 /// - Shuffle high, high lanes of two inputs vector
1161 /// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 0, 5>
1163 /// The first case is the closest to LoongArch instructions and the other
1164 /// cases need to be converted to it for processing.
1166 /// This function may modify V1, V2 and Mask
1167 static void canonicalizeShuffleVectorByLane(const SDLoc
&DL
,
1168 MutableArrayRef
<int> Mask
, MVT VT
,
1169 SDValue
&V1
, SDValue
&V2
,
1170 SelectionDAG
&DAG
) {
1172 enum HalfMaskType
{ HighLaneTy
, LowLaneTy
, None
};
1174 int MaskSize
= Mask
.size();
1175 int HalfSize
= Mask
.size() / 2;
1177 HalfMaskType preMask
= None
, postMask
= None
;
1179 if (std::all_of(Mask
.begin(), Mask
.begin() + HalfSize
, [&](int M
) {
1180 return M
< 0 || (M
>= 0 && M
< HalfSize
) ||
1181 (M
>= MaskSize
&& M
< MaskSize
+ HalfSize
);
1183 preMask
= HighLaneTy
;
1184 else if (std::all_of(Mask
.begin(), Mask
.begin() + HalfSize
, [&](int M
) {
1185 return M
< 0 || (M
>= HalfSize
&& M
< MaskSize
) ||
1186 (M
>= MaskSize
+ HalfSize
&& M
< MaskSize
* 2);
1188 preMask
= LowLaneTy
;
1190 if (std::all_of(Mask
.begin() + HalfSize
, Mask
.end(), [&](int M
) {
1191 return M
< 0 || (M
>= 0 && M
< HalfSize
) ||
1192 (M
>= MaskSize
&& M
< MaskSize
+ HalfSize
);
1194 postMask
= HighLaneTy
;
1195 else if (std::all_of(Mask
.begin() + HalfSize
, Mask
.end(), [&](int M
) {
1196 return M
< 0 || (M
>= HalfSize
&& M
< MaskSize
) ||
1197 (M
>= MaskSize
+ HalfSize
&& M
< MaskSize
* 2);
1199 postMask
= LowLaneTy
;
1201 // The pre-half of mask is high lane type, and the post-half of mask
1202 // is low lane type, which is closest to the LoongArch instructions.
1204 // Note: In the LoongArch architecture, the high lane of mask corresponds
1205 // to the lower 128-bit of vector register, and the low lane of mask
1206 // corresponds the higher 128-bit of vector register.
1207 if (preMask
== HighLaneTy
&& postMask
== LowLaneTy
) {
1210 if (preMask
== LowLaneTy
&& postMask
== HighLaneTy
) {
1211 V1
= DAG
.getBitcast(MVT::v4i64
, V1
);
1212 V1
= DAG
.getNode(LoongArchISD::XVPERMI
, DL
, MVT::v4i64
, V1
,
1213 DAG
.getConstant(0b01001110, DL
, MVT::i64
));
1214 V1
= DAG
.getBitcast(VT
, V1
);
1216 if (!V2
.isUndef()) {
1217 V2
= DAG
.getBitcast(MVT::v4i64
, V2
);
1218 V2
= DAG
.getNode(LoongArchISD::XVPERMI
, DL
, MVT::v4i64
, V2
,
1219 DAG
.getConstant(0b01001110, DL
, MVT::i64
));
1220 V2
= DAG
.getBitcast(VT
, V2
);
1223 for (auto it
= Mask
.begin(); it
< Mask
.begin() + HalfSize
; it
++) {
1224 *it
= *it
< 0 ? *it
: *it
- HalfSize
;
1226 for (auto it
= Mask
.begin() + HalfSize
; it
< Mask
.end(); it
++) {
1227 *it
= *it
< 0 ? *it
: *it
+ HalfSize
;
1229 } else if (preMask
== LowLaneTy
&& postMask
== LowLaneTy
) {
1230 V1
= DAG
.getBitcast(MVT::v4i64
, V1
);
1231 V1
= DAG
.getNode(LoongArchISD::XVPERMI
, DL
, MVT::v4i64
, V1
,
1232 DAG
.getConstant(0b11101110, DL
, MVT::i64
));
1233 V1
= DAG
.getBitcast(VT
, V1
);
1235 if (!V2
.isUndef()) {
1236 V2
= DAG
.getBitcast(MVT::v4i64
, V2
);
1237 V2
= DAG
.getNode(LoongArchISD::XVPERMI
, DL
, MVT::v4i64
, V2
,
1238 DAG
.getConstant(0b11101110, DL
, MVT::i64
));
1239 V2
= DAG
.getBitcast(VT
, V2
);
1242 for (auto it
= Mask
.begin(); it
< Mask
.begin() + HalfSize
; it
++) {
1243 *it
= *it
< 0 ? *it
: *it
- HalfSize
;
1245 } else if (preMask
== HighLaneTy
&& postMask
== HighLaneTy
) {
1246 V1
= DAG
.getBitcast(MVT::v4i64
, V1
);
1247 V1
= DAG
.getNode(LoongArchISD::XVPERMI
, DL
, MVT::v4i64
, V1
,
1248 DAG
.getConstant(0b01000100, DL
, MVT::i64
));
1249 V1
= DAG
.getBitcast(VT
, V1
);
1251 if (!V2
.isUndef()) {
1252 V2
= DAG
.getBitcast(MVT::v4i64
, V2
);
1253 V2
= DAG
.getNode(LoongArchISD::XVPERMI
, DL
, MVT::v4i64
, V2
,
1254 DAG
.getConstant(0b01000100, DL
, MVT::i64
));
1255 V2
= DAG
.getBitcast(VT
, V2
);
1258 for (auto it
= Mask
.begin() + HalfSize
; it
< Mask
.end(); it
++) {
1259 *it
= *it
< 0 ? *it
: *it
+ HalfSize
;
1261 } else { // cross-lane
1266 /// Dispatching routine to lower various 256-bit LoongArch vector shuffles.
1268 /// This routine breaks down the specific type of 256-bit shuffle and
1269 /// dispatches to the lowering routines accordingly.
1270 static SDValue
lower256BitShuffle(const SDLoc
&DL
, ArrayRef
<int> Mask
, MVT VT
,
1271 SDValue V1
, SDValue V2
, SelectionDAG
&DAG
) {
1272 assert((VT
.SimpleTy
== MVT::v32i8
|| VT
.SimpleTy
== MVT::v16i16
||
1273 VT
.SimpleTy
== MVT::v8i32
|| VT
.SimpleTy
== MVT::v4i64
||
1274 VT
.SimpleTy
== MVT::v8f32
|| VT
.SimpleTy
== MVT::v4f64
) &&
1275 "Vector type is unsupported for lasx!");
1276 assert(V1
.getSimpleValueType() == V2
.getSimpleValueType() &&
1277 "Two operands have different types!");
1278 assert(VT
.getVectorNumElements() == Mask
.size() &&
1279 "Unexpected mask size for shuffle!");
1280 assert(Mask
.size() % 2 == 0 && "Expected even mask size.");
1281 assert(Mask
.size() >= 4 && "Mask size is less than 4.");
1283 // canonicalize non cross-lane shuffle vector
1284 SmallVector
<int> NewMask(Mask
);
1285 canonicalizeShuffleVectorByLane(DL
, NewMask
, VT
, V1
, V2
, DAG
);
1288 // TODO: Add more comparison patterns.
1290 if ((Result
= lowerVECTOR_SHUFFLE_XVREPLVEI(DL
, NewMask
, VT
, V1
, V2
, DAG
)))
1292 if ((Result
= lowerVECTOR_SHUFFLE_XVSHUF4I(DL
, NewMask
, VT
, V1
, V2
, DAG
)))
1295 // TODO: This comment may be enabled in the future to better match the
1296 // pattern for instruction selection.
1300 // It is recommended not to change the pattern comparison order for better
1302 if ((Result
= lowerVECTOR_SHUFFLE_XVPACKEV(DL
, NewMask
, VT
, V1
, V2
, DAG
)))
1304 if ((Result
= lowerVECTOR_SHUFFLE_XVPACKOD(DL
, NewMask
, VT
, V1
, V2
, DAG
)))
1306 if ((Result
= lowerVECTOR_SHUFFLE_XVILVH(DL
, NewMask
, VT
, V1
, V2
, DAG
)))
1308 if ((Result
= lowerVECTOR_SHUFFLE_XVILVL(DL
, NewMask
, VT
, V1
, V2
, DAG
)))
1310 if ((Result
= lowerVECTOR_SHUFFLE_XVPICKEV(DL
, NewMask
, VT
, V1
, V2
, DAG
)))
1312 if ((Result
= lowerVECTOR_SHUFFLE_XVPICKOD(DL
, NewMask
, VT
, V1
, V2
, DAG
)))
1314 if ((Result
= lowerVECTOR_SHUFFLE_XVSHUF(DL
, NewMask
, VT
, V1
, V2
, DAG
)))
1320 SDValue
LoongArchTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op
,
1321 SelectionDAG
&DAG
) const {
1322 ShuffleVectorSDNode
*SVOp
= cast
<ShuffleVectorSDNode
>(Op
);
1323 ArrayRef
<int> OrigMask
= SVOp
->getMask();
1324 SDValue V1
= Op
.getOperand(0);
1325 SDValue V2
= Op
.getOperand(1);
1326 MVT VT
= Op
.getSimpleValueType();
1327 int NumElements
= VT
.getVectorNumElements();
1330 bool V1IsUndef
= V1
.isUndef();
1331 bool V2IsUndef
= V2
.isUndef();
1332 if (V1IsUndef
&& V2IsUndef
)
1333 return DAG
.getUNDEF(VT
);
1335 // When we create a shuffle node we put the UNDEF node to second operand,
1336 // but in some cases the first operand may be transformed to UNDEF.
1337 // In this case we should just commute the node.
1339 return DAG
.getCommutedVectorShuffle(*SVOp
);
1341 // Check for non-undef masks pointing at an undef vector and make the masks
1342 // undef as well. This makes it easier to match the shuffle based solely on
1345 any_of(OrigMask
, [NumElements
](int M
) { return M
>= NumElements
; })) {
1346 SmallVector
<int, 8> NewMask(OrigMask
);
1347 for (int &M
: NewMask
)
1348 if (M
>= NumElements
)
1350 return DAG
.getVectorShuffle(VT
, DL
, V1
, V2
, NewMask
);
1353 // Check for illegal shuffle mask element index values.
1354 int MaskUpperLimit
= OrigMask
.size() * (V2IsUndef
? 1 : 2);
1355 (void)MaskUpperLimit
;
1356 assert(llvm::all_of(OrigMask
,
1357 [&](int M
) { return -1 <= M
&& M
< MaskUpperLimit
; }) &&
1358 "Out of bounds shuffle index");
1360 // For each vector width, delegate to a specialized lowering routine.
1361 if (VT
.is128BitVector())
1362 return lower128BitShuffle(DL
, OrigMask
, VT
, V1
, V2
, DAG
);
1364 if (VT
.is256BitVector())
1365 return lower256BitShuffle(DL
, OrigMask
, VT
, V1
, V2
, DAG
);
1370 static bool isConstantOrUndef(const SDValue Op
) {
1373 if (isa
<ConstantSDNode
>(Op
))
1375 if (isa
<ConstantFPSDNode
>(Op
))
1380 static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode
*Op
) {
1381 for (unsigned i
= 0; i
< Op
->getNumOperands(); ++i
)
1382 if (isConstantOrUndef(Op
->getOperand(i
)))
1387 SDValue
LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op
,
1388 SelectionDAG
&DAG
) const {
1389 BuildVectorSDNode
*Node
= cast
<BuildVectorSDNode
>(Op
);
1390 EVT ResTy
= Op
->getValueType(0);
1392 APInt SplatValue
, SplatUndef
;
1393 unsigned SplatBitSize
;
1395 bool Is128Vec
= ResTy
.is128BitVector();
1396 bool Is256Vec
= ResTy
.is256BitVector();
1398 if ((!Subtarget
.hasExtLSX() || !Is128Vec
) &&
1399 (!Subtarget
.hasExtLASX() || !Is256Vec
))
1402 if (Node
->isConstantSplat(SplatValue
, SplatUndef
, SplatBitSize
, HasAnyUndefs
,
1403 /*MinSplatBits=*/8) &&
1404 SplatBitSize
<= 64) {
1405 // We can only cope with 8, 16, 32, or 64-bit elements.
1406 if (SplatBitSize
!= 8 && SplatBitSize
!= 16 && SplatBitSize
!= 32 &&
1412 switch (SplatBitSize
) {
1416 ViaVecTy
= Is128Vec
? MVT::v16i8
: MVT::v32i8
;
1419 ViaVecTy
= Is128Vec
? MVT::v8i16
: MVT::v16i16
;
1422 ViaVecTy
= Is128Vec
? MVT::v4i32
: MVT::v8i32
;
1425 ViaVecTy
= Is128Vec
? MVT::v2i64
: MVT::v4i64
;
1429 // SelectionDAG::getConstant will promote SplatValue appropriately.
1430 SDValue Result
= DAG
.getConstant(SplatValue
, DL
, ViaVecTy
);
1432 // Bitcast to the type we originally wanted.
1433 if (ViaVecTy
!= ResTy
)
1434 Result
= DAG
.getNode(ISD::BITCAST
, SDLoc(Node
), ResTy
, Result
);
1439 if (DAG
.isSplatValue(Op
, /*AllowUndefs=*/false))
1442 if (!isConstantOrUndefBUILD_VECTOR(Node
)) {
1443 // Use INSERT_VECTOR_ELT operations rather than expand to stores.
1444 // The resulting code is the same length as the expansion, but it doesn't
1445 // use memory operations.
1446 EVT ResTy
= Node
->getValueType(0);
1448 assert(ResTy
.isVector());
1450 unsigned NumElts
= ResTy
.getVectorNumElements();
1451 SDValue Vector
= DAG
.getUNDEF(ResTy
);
1452 for (unsigned i
= 0; i
< NumElts
; ++i
) {
1453 Vector
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
, DL
, ResTy
, Vector
,
1454 Node
->getOperand(i
),
1455 DAG
.getConstant(i
, DL
, Subtarget
.getGRLenVT()));
1464 LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op
,
1465 SelectionDAG
&DAG
) const {
1466 EVT VecTy
= Op
->getOperand(0)->getValueType(0);
1467 SDValue Idx
= Op
->getOperand(1);
1468 EVT EltTy
= VecTy
.getVectorElementType();
1469 unsigned NumElts
= VecTy
.getVectorNumElements();
1471 if (isa
<ConstantSDNode
>(Idx
) &&
1472 (EltTy
== MVT::i32
|| EltTy
== MVT::i64
|| EltTy
== MVT::f32
||
1473 EltTy
== MVT::f64
|| Idx
->getAsZExtVal() < NumElts
/ 2))
1480 LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op
,
1481 SelectionDAG
&DAG
) const {
1482 if (isa
<ConstantSDNode
>(Op
->getOperand(2)))
1487 SDValue
LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op
,
1488 SelectionDAG
&DAG
) const {
1490 SyncScope::ID FenceSSID
=
1491 static_cast<SyncScope::ID
>(Op
.getConstantOperandVal(2));
1493 // singlethread fences only synchronize with signal handlers on the same
1494 // thread and thus only need to preserve instruction order, not actually
1495 // enforce memory ordering.
1496 if (FenceSSID
== SyncScope::SingleThread
)
1497 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
1498 return DAG
.getNode(ISD::MEMBARRIER
, DL
, MVT::Other
, Op
.getOperand(0));
1503 SDValue
LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op
,
1504 SelectionDAG
&DAG
) const {
1506 if (Subtarget
.is64Bit() && Op
.getOperand(2).getValueType() == MVT::i32
) {
1507 DAG
.getContext()->emitError(
1508 "On LA64, only 64-bit registers can be written.");
1509 return Op
.getOperand(0);
1512 if (!Subtarget
.is64Bit() && Op
.getOperand(2).getValueType() == MVT::i64
) {
1513 DAG
.getContext()->emitError(
1514 "On LA32, only 32-bit registers can be written.");
1515 return Op
.getOperand(0);
1521 SDValue
LoongArchTargetLowering::lowerFRAMEADDR(SDValue Op
,
1522 SelectionDAG
&DAG
) const {
1523 if (!isa
<ConstantSDNode
>(Op
.getOperand(0))) {
1524 DAG
.getContext()->emitError("argument to '__builtin_frame_address' must "
1525 "be a constant integer");
1529 MachineFunction
&MF
= DAG
.getMachineFunction();
1530 MF
.getFrameInfo().setFrameAddressIsTaken(true);
1531 Register FrameReg
= Subtarget
.getRegisterInfo()->getFrameRegister(MF
);
1532 EVT VT
= Op
.getValueType();
1534 SDValue FrameAddr
= DAG
.getCopyFromReg(DAG
.getEntryNode(), DL
, FrameReg
, VT
);
1535 unsigned Depth
= Op
.getConstantOperandVal(0);
1536 int GRLenInBytes
= Subtarget
.getGRLen() / 8;
1539 int Offset
= -(GRLenInBytes
* 2);
1540 SDValue Ptr
= DAG
.getNode(ISD::ADD
, DL
, VT
, FrameAddr
,
1541 DAG
.getSignedConstant(Offset
, DL
, VT
));
1543 DAG
.getLoad(VT
, DL
, DAG
.getEntryNode(), Ptr
, MachinePointerInfo());
1548 SDValue
LoongArchTargetLowering::lowerRETURNADDR(SDValue Op
,
1549 SelectionDAG
&DAG
) const {
1550 if (verifyReturnAddressArgumentIsConstant(Op
, DAG
))
1553 // Currently only support lowering return address for current frame.
1554 if (Op
.getConstantOperandVal(0) != 0) {
1555 DAG
.getContext()->emitError(
1556 "return address can only be determined for the current frame");
1560 MachineFunction
&MF
= DAG
.getMachineFunction();
1561 MF
.getFrameInfo().setReturnAddressIsTaken(true);
1562 MVT GRLenVT
= Subtarget
.getGRLenVT();
1564 // Return the value of the return address register, marking it an implicit
1566 Register Reg
= MF
.addLiveIn(Subtarget
.getRegisterInfo()->getRARegister(),
1567 getRegClassFor(GRLenVT
));
1568 return DAG
.getCopyFromReg(DAG
.getEntryNode(), SDLoc(Op
), Reg
, GRLenVT
);
1571 SDValue
LoongArchTargetLowering::lowerEH_DWARF_CFA(SDValue Op
,
1572 SelectionDAG
&DAG
) const {
1573 MachineFunction
&MF
= DAG
.getMachineFunction();
1574 auto Size
= Subtarget
.getGRLen() / 8;
1575 auto FI
= MF
.getFrameInfo().CreateFixedObject(Size
, 0, false);
1576 return DAG
.getFrameIndex(FI
, getPointerTy(DAG
.getDataLayout()));
1579 SDValue
LoongArchTargetLowering::lowerVASTART(SDValue Op
,
1580 SelectionDAG
&DAG
) const {
1581 MachineFunction
&MF
= DAG
.getMachineFunction();
1582 auto *FuncInfo
= MF
.getInfo
<LoongArchMachineFunctionInfo
>();
1585 SDValue FI
= DAG
.getFrameIndex(FuncInfo
->getVarArgsFrameIndex(),
1586 getPointerTy(MF
.getDataLayout()));
1588 // vastart just stores the address of the VarArgsFrameIndex slot into the
1589 // memory location argument.
1590 const Value
*SV
= cast
<SrcValueSDNode
>(Op
.getOperand(2))->getValue();
1591 return DAG
.getStore(Op
.getOperand(0), DL
, FI
, Op
.getOperand(1),
1592 MachinePointerInfo(SV
));
1595 SDValue
LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op
,
1596 SelectionDAG
&DAG
) const {
1597 assert(Subtarget
.is64Bit() && Subtarget
.hasBasicF() &&
1598 !Subtarget
.hasBasicD() && "unexpected target features");
1601 SDValue Op0
= Op
.getOperand(0);
1602 if (Op0
->getOpcode() == ISD::AND
) {
1603 auto *C
= dyn_cast
<ConstantSDNode
>(Op0
.getOperand(1));
1604 if (C
&& C
->getZExtValue() < UINT64_C(0xFFFFFFFF))
1608 if (Op0
->getOpcode() == LoongArchISD::BSTRPICK
&&
1609 Op0
.getConstantOperandVal(1) < UINT64_C(0X1F) &&
1610 Op0
.getConstantOperandVal(2) == UINT64_C(0))
1613 if (Op0
.getOpcode() == ISD::AssertZext
&&
1614 dyn_cast
<VTSDNode
>(Op0
.getOperand(1))->getVT().bitsLT(MVT::i32
))
1617 EVT OpVT
= Op0
.getValueType();
1618 EVT RetVT
= Op
.getValueType();
1619 RTLIB::Libcall LC
= RTLIB::getUINTTOFP(OpVT
, RetVT
);
1620 MakeLibCallOptions CallOptions
;
1621 CallOptions
.setTypeListBeforeSoften(OpVT
, RetVT
, true);
1622 SDValue Chain
= SDValue();
1624 std::tie(Result
, Chain
) =
1625 makeLibCall(DAG
, LC
, Op
.getValueType(), Op0
, CallOptions
, DL
, Chain
);
1629 SDValue
LoongArchTargetLowering::lowerSINT_TO_FP(SDValue Op
,
1630 SelectionDAG
&DAG
) const {
1631 assert(Subtarget
.is64Bit() && Subtarget
.hasBasicF() &&
1632 !Subtarget
.hasBasicD() && "unexpected target features");
1635 SDValue Op0
= Op
.getOperand(0);
1637 if ((Op0
.getOpcode() == ISD::AssertSext
||
1638 Op0
.getOpcode() == ISD::SIGN_EXTEND_INREG
) &&
1639 dyn_cast
<VTSDNode
>(Op0
.getOperand(1))->getVT().bitsLE(MVT::i32
))
1642 EVT OpVT
= Op0
.getValueType();
1643 EVT RetVT
= Op
.getValueType();
1644 RTLIB::Libcall LC
= RTLIB::getSINTTOFP(OpVT
, RetVT
);
1645 MakeLibCallOptions CallOptions
;
1646 CallOptions
.setTypeListBeforeSoften(OpVT
, RetVT
, true);
1647 SDValue Chain
= SDValue();
1649 std::tie(Result
, Chain
) =
1650 makeLibCall(DAG
, LC
, Op
.getValueType(), Op0
, CallOptions
, DL
, Chain
);
1654 SDValue
LoongArchTargetLowering::lowerBITCAST(SDValue Op
,
1655 SelectionDAG
&DAG
) const {
1658 SDValue Op0
= Op
.getOperand(0);
1660 if (Op
.getValueType() == MVT::f32
&& Op0
.getValueType() == MVT::i32
&&
1661 Subtarget
.is64Bit() && Subtarget
.hasBasicF()) {
1662 SDValue NewOp0
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, Op0
);
1663 return DAG
.getNode(LoongArchISD::MOVGR2FR_W_LA64
, DL
, MVT::f32
, NewOp0
);
1668 SDValue
LoongArchTargetLowering::lowerFP_TO_SINT(SDValue Op
,
1669 SelectionDAG
&DAG
) const {
1673 if (Op
.getValueSizeInBits() > 32 && Subtarget
.hasBasicF() &&
1674 !Subtarget
.hasBasicD()) {
1676 DAG
.getNode(LoongArchISD::FTINT
, DL
, MVT::f32
, Op
.getOperand(0));
1677 return DAG
.getNode(LoongArchISD::MOVFR2GR_S_LA64
, DL
, MVT::i64
, Dst
);
1680 EVT FPTy
= EVT::getFloatingPointVT(Op
.getValueSizeInBits());
1681 SDValue Trunc
= DAG
.getNode(LoongArchISD::FTINT
, DL
, FPTy
, Op
.getOperand(0));
1682 return DAG
.getNode(ISD::BITCAST
, DL
, Op
.getValueType(), Trunc
);
1685 static SDValue
getTargetNode(GlobalAddressSDNode
*N
, SDLoc DL
, EVT Ty
,
1686 SelectionDAG
&DAG
, unsigned Flags
) {
1687 return DAG
.getTargetGlobalAddress(N
->getGlobal(), DL
, Ty
, 0, Flags
);
1690 static SDValue
getTargetNode(BlockAddressSDNode
*N
, SDLoc DL
, EVT Ty
,
1691 SelectionDAG
&DAG
, unsigned Flags
) {
1692 return DAG
.getTargetBlockAddress(N
->getBlockAddress(), Ty
, N
->getOffset(),
1696 static SDValue
getTargetNode(ConstantPoolSDNode
*N
, SDLoc DL
, EVT Ty
,
1697 SelectionDAG
&DAG
, unsigned Flags
) {
1698 return DAG
.getTargetConstantPool(N
->getConstVal(), Ty
, N
->getAlign(),
1699 N
->getOffset(), Flags
);
1702 static SDValue
getTargetNode(JumpTableSDNode
*N
, SDLoc DL
, EVT Ty
,
1703 SelectionDAG
&DAG
, unsigned Flags
) {
1704 return DAG
.getTargetJumpTable(N
->getIndex(), Ty
, Flags
);
1707 template <class NodeTy
>
1708 SDValue
LoongArchTargetLowering::getAddr(NodeTy
*N
, SelectionDAG
&DAG
,
1710 bool IsLocal
) const {
1712 EVT Ty
= getPointerTy(DAG
.getDataLayout());
1713 SDValue Addr
= getTargetNode(N
, DL
, Ty
, DAG
, 0);
1718 report_fatal_error("Unsupported code model");
1720 case CodeModel::Large
: {
1721 assert(Subtarget
.is64Bit() && "Large code model requires LA64");
1723 // This is not actually used, but is necessary for successfully matching
1724 // the PseudoLA_*_LARGE nodes.
1725 SDValue Tmp
= DAG
.getConstant(0, DL
, Ty
);
1727 // This generates the pattern (PseudoLA_PCREL_LARGE tmp sym), that
1728 // eventually becomes the desired 5-insn code sequence.
1729 Load
= SDValue(DAG
.getMachineNode(LoongArch::PseudoLA_PCREL_LARGE
, DL
, Ty
,
1733 // This generates the pattern (PseudoLA_GOT_LARGE tmp sym), that
1734 // eventually becomes the desired 5-insn code sequence.
1736 DAG
.getMachineNode(LoongArch::PseudoLA_GOT_LARGE
, DL
, Ty
, Tmp
, Addr
),
1742 case CodeModel::Small
:
1743 case CodeModel::Medium
:
1745 // This generates the pattern (PseudoLA_PCREL sym), which expands to
1746 // (addi.w/d (pcalau12i %pc_hi20(sym)) %pc_lo12(sym)).
1748 DAG
.getMachineNode(LoongArch::PseudoLA_PCREL
, DL
, Ty
, Addr
), 0);
1750 // This generates the pattern (PseudoLA_GOT sym), which expands to (ld.w/d
1751 // (pcalau12i %got_pc_hi20(sym)) %got_pc_lo12(sym)).
1753 SDValue(DAG
.getMachineNode(LoongArch::PseudoLA_GOT
, DL
, Ty
, Addr
), 0);
1758 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
1759 MachineFunction
&MF
= DAG
.getMachineFunction();
1760 MachineMemOperand
*MemOp
= MF
.getMachineMemOperand(
1761 MachinePointerInfo::getGOT(MF
),
1762 MachineMemOperand::MOLoad
| MachineMemOperand::MODereferenceable
|
1763 MachineMemOperand::MOInvariant
,
1764 LLT(Ty
.getSimpleVT()), Align(Ty
.getFixedSizeInBits() / 8));
1765 DAG
.setNodeMemRefs(cast
<MachineSDNode
>(Load
.getNode()), {MemOp
});
1771 SDValue
LoongArchTargetLowering::lowerBlockAddress(SDValue Op
,
1772 SelectionDAG
&DAG
) const {
1773 return getAddr(cast
<BlockAddressSDNode
>(Op
), DAG
,
1774 DAG
.getTarget().getCodeModel());
1777 SDValue
LoongArchTargetLowering::lowerJumpTable(SDValue Op
,
1778 SelectionDAG
&DAG
) const {
1779 return getAddr(cast
<JumpTableSDNode
>(Op
), DAG
,
1780 DAG
.getTarget().getCodeModel());
1783 SDValue
LoongArchTargetLowering::lowerConstantPool(SDValue Op
,
1784 SelectionDAG
&DAG
) const {
1785 return getAddr(cast
<ConstantPoolSDNode
>(Op
), DAG
,
1786 DAG
.getTarget().getCodeModel());
1789 SDValue
LoongArchTargetLowering::lowerGlobalAddress(SDValue Op
,
1790 SelectionDAG
&DAG
) const {
1791 GlobalAddressSDNode
*N
= cast
<GlobalAddressSDNode
>(Op
);
1792 assert(N
->getOffset() == 0 && "unexpected offset in global node");
1793 auto CM
= DAG
.getTarget().getCodeModel();
1794 const GlobalValue
*GV
= N
->getGlobal();
1796 if (GV
->isDSOLocal() && isa
<GlobalVariable
>(GV
)) {
1797 if (auto GCM
= dyn_cast
<GlobalVariable
>(GV
)->getCodeModel())
1801 return getAddr(N
, DAG
, CM
, GV
->isDSOLocal());
1804 SDValue
LoongArchTargetLowering::getStaticTLSAddr(GlobalAddressSDNode
*N
,
1806 unsigned Opc
, bool UseGOT
,
1809 EVT Ty
= getPointerTy(DAG
.getDataLayout());
1810 MVT GRLenVT
= Subtarget
.getGRLenVT();
1812 // This is not actually used, but is necessary for successfully matching the
1813 // PseudoLA_*_LARGE nodes.
1814 SDValue Tmp
= DAG
.getConstant(0, DL
, Ty
);
1815 SDValue Addr
= DAG
.getTargetGlobalAddress(N
->getGlobal(), DL
, Ty
, 0, 0);
1816 SDValue Offset
= Large
1817 ? SDValue(DAG
.getMachineNode(Opc
, DL
, Ty
, Tmp
, Addr
), 0)
1818 : SDValue(DAG
.getMachineNode(Opc
, DL
, Ty
, Addr
), 0);
1820 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
1821 MachineFunction
&MF
= DAG
.getMachineFunction();
1822 MachineMemOperand
*MemOp
= MF
.getMachineMemOperand(
1823 MachinePointerInfo::getGOT(MF
),
1824 MachineMemOperand::MOLoad
| MachineMemOperand::MODereferenceable
|
1825 MachineMemOperand::MOInvariant
,
1826 LLT(Ty
.getSimpleVT()), Align(Ty
.getFixedSizeInBits() / 8));
1827 DAG
.setNodeMemRefs(cast
<MachineSDNode
>(Offset
.getNode()), {MemOp
});
1830 // Add the thread pointer.
1831 return DAG
.getNode(ISD::ADD
, DL
, Ty
, Offset
,
1832 DAG
.getRegister(LoongArch::R2
, GRLenVT
));
1835 SDValue
LoongArchTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode
*N
,
1840 EVT Ty
= getPointerTy(DAG
.getDataLayout());
1841 IntegerType
*CallTy
= Type::getIntNTy(*DAG
.getContext(), Ty
.getSizeInBits());
1843 // This is not actually used, but is necessary for successfully matching the
1844 // PseudoLA_*_LARGE nodes.
1845 SDValue Tmp
= DAG
.getConstant(0, DL
, Ty
);
1847 // Use a PC-relative addressing mode to access the dynamic GOT address.
1848 SDValue Addr
= DAG
.getTargetGlobalAddress(N
->getGlobal(), DL
, Ty
, 0, 0);
1849 SDValue Load
= Large
? SDValue(DAG
.getMachineNode(Opc
, DL
, Ty
, Tmp
, Addr
), 0)
1850 : SDValue(DAG
.getMachineNode(Opc
, DL
, Ty
, Addr
), 0);
1852 // Prepare argument list to generate call.
1857 Args
.push_back(Entry
);
1859 // Setup call to __tls_get_addr.
1860 TargetLowering::CallLoweringInfo
CLI(DAG
);
1862 .setChain(DAG
.getEntryNode())
1863 .setLibCallee(CallingConv::C
, CallTy
,
1864 DAG
.getExternalSymbol("__tls_get_addr", Ty
),
1867 return LowerCallTo(CLI
).first
;
1870 SDValue
LoongArchTargetLowering::getTLSDescAddr(GlobalAddressSDNode
*N
,
1871 SelectionDAG
&DAG
, unsigned Opc
,
1874 EVT Ty
= getPointerTy(DAG
.getDataLayout());
1875 const GlobalValue
*GV
= N
->getGlobal();
1877 // This is not actually used, but is necessary for successfully matching the
1878 // PseudoLA_*_LARGE nodes.
1879 SDValue Tmp
= DAG
.getConstant(0, DL
, Ty
);
1881 // Use a PC-relative addressing mode to access the global dynamic GOT address.
1882 // This generates the pattern (PseudoLA_TLS_DESC_PC{,LARGE} sym).
1883 SDValue Addr
= DAG
.getTargetGlobalAddress(GV
, DL
, Ty
, 0, 0);
1884 return Large
? SDValue(DAG
.getMachineNode(Opc
, DL
, Ty
, Tmp
, Addr
), 0)
1885 : SDValue(DAG
.getMachineNode(Opc
, DL
, Ty
, Addr
), 0);
1889 LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op
,
1890 SelectionDAG
&DAG
) const {
1891 if (DAG
.getMachineFunction().getFunction().getCallingConv() ==
1893 report_fatal_error("In GHC calling convention TLS is not supported");
1895 bool Large
= DAG
.getTarget().getCodeModel() == CodeModel::Large
;
1896 assert((!Large
|| Subtarget
.is64Bit()) && "Large code model requires LA64");
1898 GlobalAddressSDNode
*N
= cast
<GlobalAddressSDNode
>(Op
);
1899 assert(N
->getOffset() == 0 && "unexpected offset in global node");
1901 if (DAG
.getTarget().useEmulatedTLS())
1902 report_fatal_error("the emulated TLS is prohibited",
1903 /*GenCrashDiag=*/false);
1905 bool IsDesc
= DAG
.getTarget().useTLSDESC();
1907 switch (getTargetMachine().getTLSModel(N
->getGlobal())) {
1908 case TLSModel::GeneralDynamic
:
1909 // In this model, application code calls the dynamic linker function
1910 // __tls_get_addr to locate TLS offsets into the dynamic thread vector at
1913 return getDynamicTLSAddr(N
, DAG
,
1914 Large
? LoongArch::PseudoLA_TLS_GD_LARGE
1915 : LoongArch::PseudoLA_TLS_GD
,
1918 case TLSModel::LocalDynamic
:
1919 // Same as GeneralDynamic, except for assembly modifiers and relocation
1922 return getDynamicTLSAddr(N
, DAG
,
1923 Large
? LoongArch::PseudoLA_TLS_LD_LARGE
1924 : LoongArch::PseudoLA_TLS_LD
,
1927 case TLSModel::InitialExec
:
1928 // This model uses the GOT to resolve TLS offsets.
1929 return getStaticTLSAddr(N
, DAG
,
1930 Large
? LoongArch::PseudoLA_TLS_IE_LARGE
1931 : LoongArch::PseudoLA_TLS_IE
,
1932 /*UseGOT=*/true, Large
);
1933 case TLSModel::LocalExec
:
1934 // This model is used when static linking as the TLS offsets are resolved
1935 // during program linking.
1937 // This node doesn't need an extra argument for the large code model.
1938 return getStaticTLSAddr(N
, DAG
, LoongArch::PseudoLA_TLS_LE
,
1942 return getTLSDescAddr(N
, DAG
,
1943 Large
? LoongArch::PseudoLA_TLS_DESC_LARGE
1944 : LoongArch::PseudoLA_TLS_DESC
,
1948 template <unsigned N
>
1949 static SDValue
checkIntrinsicImmArg(SDValue Op
, unsigned ImmOp
,
1950 SelectionDAG
&DAG
, bool IsSigned
= false) {
1951 auto *CImm
= cast
<ConstantSDNode
>(Op
->getOperand(ImmOp
));
1952 // Check the ImmArg.
1953 if ((IsSigned
&& !isInt
<N
>(CImm
->getSExtValue())) ||
1954 (!IsSigned
&& !isUInt
<N
>(CImm
->getZExtValue()))) {
1955 DAG
.getContext()->emitError(Op
->getOperationName(0) +
1956 ": argument out of range.");
1957 return DAG
.getNode(ISD::UNDEF
, SDLoc(Op
), Op
.getValueType());
1963 LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op
,
1964 SelectionDAG
&DAG
) const {
1966 switch (Op
.getConstantOperandVal(0)) {
1968 return SDValue(); // Don't custom lower most intrinsics.
1969 case Intrinsic::thread_pointer
: {
1970 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
1971 return DAG
.getRegister(LoongArch::R2
, PtrVT
);
1973 case Intrinsic::loongarch_lsx_vpickve2gr_d
:
1974 case Intrinsic::loongarch_lsx_vpickve2gr_du
:
1975 case Intrinsic::loongarch_lsx_vreplvei_d
:
1976 case Intrinsic::loongarch_lasx_xvrepl128vei_d
:
1977 return checkIntrinsicImmArg
<1>(Op
, 2, DAG
);
1978 case Intrinsic::loongarch_lsx_vreplvei_w
:
1979 case Intrinsic::loongarch_lasx_xvrepl128vei_w
:
1980 case Intrinsic::loongarch_lasx_xvpickve2gr_d
:
1981 case Intrinsic::loongarch_lasx_xvpickve2gr_du
:
1982 case Intrinsic::loongarch_lasx_xvpickve_d
:
1983 case Intrinsic::loongarch_lasx_xvpickve_d_f
:
1984 return checkIntrinsicImmArg
<2>(Op
, 2, DAG
);
1985 case Intrinsic::loongarch_lasx_xvinsve0_d
:
1986 return checkIntrinsicImmArg
<2>(Op
, 3, DAG
);
1987 case Intrinsic::loongarch_lsx_vsat_b
:
1988 case Intrinsic::loongarch_lsx_vsat_bu
:
1989 case Intrinsic::loongarch_lsx_vrotri_b
:
1990 case Intrinsic::loongarch_lsx_vsllwil_h_b
:
1991 case Intrinsic::loongarch_lsx_vsllwil_hu_bu
:
1992 case Intrinsic::loongarch_lsx_vsrlri_b
:
1993 case Intrinsic::loongarch_lsx_vsrari_b
:
1994 case Intrinsic::loongarch_lsx_vreplvei_h
:
1995 case Intrinsic::loongarch_lasx_xvsat_b
:
1996 case Intrinsic::loongarch_lasx_xvsat_bu
:
1997 case Intrinsic::loongarch_lasx_xvrotri_b
:
1998 case Intrinsic::loongarch_lasx_xvsllwil_h_b
:
1999 case Intrinsic::loongarch_lasx_xvsllwil_hu_bu
:
2000 case Intrinsic::loongarch_lasx_xvsrlri_b
:
2001 case Intrinsic::loongarch_lasx_xvsrari_b
:
2002 case Intrinsic::loongarch_lasx_xvrepl128vei_h
:
2003 case Intrinsic::loongarch_lasx_xvpickve_w
:
2004 case Intrinsic::loongarch_lasx_xvpickve_w_f
:
2005 return checkIntrinsicImmArg
<3>(Op
, 2, DAG
);
2006 case Intrinsic::loongarch_lasx_xvinsve0_w
:
2007 return checkIntrinsicImmArg
<3>(Op
, 3, DAG
);
2008 case Intrinsic::loongarch_lsx_vsat_h
:
2009 case Intrinsic::loongarch_lsx_vsat_hu
:
2010 case Intrinsic::loongarch_lsx_vrotri_h
:
2011 case Intrinsic::loongarch_lsx_vsllwil_w_h
:
2012 case Intrinsic::loongarch_lsx_vsllwil_wu_hu
:
2013 case Intrinsic::loongarch_lsx_vsrlri_h
:
2014 case Intrinsic::loongarch_lsx_vsrari_h
:
2015 case Intrinsic::loongarch_lsx_vreplvei_b
:
2016 case Intrinsic::loongarch_lasx_xvsat_h
:
2017 case Intrinsic::loongarch_lasx_xvsat_hu
:
2018 case Intrinsic::loongarch_lasx_xvrotri_h
:
2019 case Intrinsic::loongarch_lasx_xvsllwil_w_h
:
2020 case Intrinsic::loongarch_lasx_xvsllwil_wu_hu
:
2021 case Intrinsic::loongarch_lasx_xvsrlri_h
:
2022 case Intrinsic::loongarch_lasx_xvsrari_h
:
2023 case Intrinsic::loongarch_lasx_xvrepl128vei_b
:
2024 return checkIntrinsicImmArg
<4>(Op
, 2, DAG
);
2025 case Intrinsic::loongarch_lsx_vsrlni_b_h
:
2026 case Intrinsic::loongarch_lsx_vsrani_b_h
:
2027 case Intrinsic::loongarch_lsx_vsrlrni_b_h
:
2028 case Intrinsic::loongarch_lsx_vsrarni_b_h
:
2029 case Intrinsic::loongarch_lsx_vssrlni_b_h
:
2030 case Intrinsic::loongarch_lsx_vssrani_b_h
:
2031 case Intrinsic::loongarch_lsx_vssrlni_bu_h
:
2032 case Intrinsic::loongarch_lsx_vssrani_bu_h
:
2033 case Intrinsic::loongarch_lsx_vssrlrni_b_h
:
2034 case Intrinsic::loongarch_lsx_vssrarni_b_h
:
2035 case Intrinsic::loongarch_lsx_vssrlrni_bu_h
:
2036 case Intrinsic::loongarch_lsx_vssrarni_bu_h
:
2037 case Intrinsic::loongarch_lasx_xvsrlni_b_h
:
2038 case Intrinsic::loongarch_lasx_xvsrani_b_h
:
2039 case Intrinsic::loongarch_lasx_xvsrlrni_b_h
:
2040 case Intrinsic::loongarch_lasx_xvsrarni_b_h
:
2041 case Intrinsic::loongarch_lasx_xvssrlni_b_h
:
2042 case Intrinsic::loongarch_lasx_xvssrani_b_h
:
2043 case Intrinsic::loongarch_lasx_xvssrlni_bu_h
:
2044 case Intrinsic::loongarch_lasx_xvssrani_bu_h
:
2045 case Intrinsic::loongarch_lasx_xvssrlrni_b_h
:
2046 case Intrinsic::loongarch_lasx_xvssrarni_b_h
:
2047 case Intrinsic::loongarch_lasx_xvssrlrni_bu_h
:
2048 case Intrinsic::loongarch_lasx_xvssrarni_bu_h
:
2049 return checkIntrinsicImmArg
<4>(Op
, 3, DAG
);
2050 case Intrinsic::loongarch_lsx_vsat_w
:
2051 case Intrinsic::loongarch_lsx_vsat_wu
:
2052 case Intrinsic::loongarch_lsx_vrotri_w
:
2053 case Intrinsic::loongarch_lsx_vsllwil_d_w
:
2054 case Intrinsic::loongarch_lsx_vsllwil_du_wu
:
2055 case Intrinsic::loongarch_lsx_vsrlri_w
:
2056 case Intrinsic::loongarch_lsx_vsrari_w
:
2057 case Intrinsic::loongarch_lsx_vslei_bu
:
2058 case Intrinsic::loongarch_lsx_vslei_hu
:
2059 case Intrinsic::loongarch_lsx_vslei_wu
:
2060 case Intrinsic::loongarch_lsx_vslei_du
:
2061 case Intrinsic::loongarch_lsx_vslti_bu
:
2062 case Intrinsic::loongarch_lsx_vslti_hu
:
2063 case Intrinsic::loongarch_lsx_vslti_wu
:
2064 case Intrinsic::loongarch_lsx_vslti_du
:
2065 case Intrinsic::loongarch_lsx_vbsll_v
:
2066 case Intrinsic::loongarch_lsx_vbsrl_v
:
2067 case Intrinsic::loongarch_lasx_xvsat_w
:
2068 case Intrinsic::loongarch_lasx_xvsat_wu
:
2069 case Intrinsic::loongarch_lasx_xvrotri_w
:
2070 case Intrinsic::loongarch_lasx_xvsllwil_d_w
:
2071 case Intrinsic::loongarch_lasx_xvsllwil_du_wu
:
2072 case Intrinsic::loongarch_lasx_xvsrlri_w
:
2073 case Intrinsic::loongarch_lasx_xvsrari_w
:
2074 case Intrinsic::loongarch_lasx_xvslei_bu
:
2075 case Intrinsic::loongarch_lasx_xvslei_hu
:
2076 case Intrinsic::loongarch_lasx_xvslei_wu
:
2077 case Intrinsic::loongarch_lasx_xvslei_du
:
2078 case Intrinsic::loongarch_lasx_xvslti_bu
:
2079 case Intrinsic::loongarch_lasx_xvslti_hu
:
2080 case Intrinsic::loongarch_lasx_xvslti_wu
:
2081 case Intrinsic::loongarch_lasx_xvslti_du
:
2082 case Intrinsic::loongarch_lasx_xvbsll_v
:
2083 case Intrinsic::loongarch_lasx_xvbsrl_v
:
2084 return checkIntrinsicImmArg
<5>(Op
, 2, DAG
);
2085 case Intrinsic::loongarch_lsx_vseqi_b
:
2086 case Intrinsic::loongarch_lsx_vseqi_h
:
2087 case Intrinsic::loongarch_lsx_vseqi_w
:
2088 case Intrinsic::loongarch_lsx_vseqi_d
:
2089 case Intrinsic::loongarch_lsx_vslei_b
:
2090 case Intrinsic::loongarch_lsx_vslei_h
:
2091 case Intrinsic::loongarch_lsx_vslei_w
:
2092 case Intrinsic::loongarch_lsx_vslei_d
:
2093 case Intrinsic::loongarch_lsx_vslti_b
:
2094 case Intrinsic::loongarch_lsx_vslti_h
:
2095 case Intrinsic::loongarch_lsx_vslti_w
:
2096 case Intrinsic::loongarch_lsx_vslti_d
:
2097 case Intrinsic::loongarch_lasx_xvseqi_b
:
2098 case Intrinsic::loongarch_lasx_xvseqi_h
:
2099 case Intrinsic::loongarch_lasx_xvseqi_w
:
2100 case Intrinsic::loongarch_lasx_xvseqi_d
:
2101 case Intrinsic::loongarch_lasx_xvslei_b
:
2102 case Intrinsic::loongarch_lasx_xvslei_h
:
2103 case Intrinsic::loongarch_lasx_xvslei_w
:
2104 case Intrinsic::loongarch_lasx_xvslei_d
:
2105 case Intrinsic::loongarch_lasx_xvslti_b
:
2106 case Intrinsic::loongarch_lasx_xvslti_h
:
2107 case Intrinsic::loongarch_lasx_xvslti_w
:
2108 case Intrinsic::loongarch_lasx_xvslti_d
:
2109 return checkIntrinsicImmArg
<5>(Op
, 2, DAG
, /*IsSigned=*/true);
2110 case Intrinsic::loongarch_lsx_vsrlni_h_w
:
2111 case Intrinsic::loongarch_lsx_vsrani_h_w
:
2112 case Intrinsic::loongarch_lsx_vsrlrni_h_w
:
2113 case Intrinsic::loongarch_lsx_vsrarni_h_w
:
2114 case Intrinsic::loongarch_lsx_vssrlni_h_w
:
2115 case Intrinsic::loongarch_lsx_vssrani_h_w
:
2116 case Intrinsic::loongarch_lsx_vssrlni_hu_w
:
2117 case Intrinsic::loongarch_lsx_vssrani_hu_w
:
2118 case Intrinsic::loongarch_lsx_vssrlrni_h_w
:
2119 case Intrinsic::loongarch_lsx_vssrarni_h_w
:
2120 case Intrinsic::loongarch_lsx_vssrlrni_hu_w
:
2121 case Intrinsic::loongarch_lsx_vssrarni_hu_w
:
2122 case Intrinsic::loongarch_lsx_vfrstpi_b
:
2123 case Intrinsic::loongarch_lsx_vfrstpi_h
:
2124 case Intrinsic::loongarch_lasx_xvsrlni_h_w
:
2125 case Intrinsic::loongarch_lasx_xvsrani_h_w
:
2126 case Intrinsic::loongarch_lasx_xvsrlrni_h_w
:
2127 case Intrinsic::loongarch_lasx_xvsrarni_h_w
:
2128 case Intrinsic::loongarch_lasx_xvssrlni_h_w
:
2129 case Intrinsic::loongarch_lasx_xvssrani_h_w
:
2130 case Intrinsic::loongarch_lasx_xvssrlni_hu_w
:
2131 case Intrinsic::loongarch_lasx_xvssrani_hu_w
:
2132 case Intrinsic::loongarch_lasx_xvssrlrni_h_w
:
2133 case Intrinsic::loongarch_lasx_xvssrarni_h_w
:
2134 case Intrinsic::loongarch_lasx_xvssrlrni_hu_w
:
2135 case Intrinsic::loongarch_lasx_xvssrarni_hu_w
:
2136 case Intrinsic::loongarch_lasx_xvfrstpi_b
:
2137 case Intrinsic::loongarch_lasx_xvfrstpi_h
:
2138 return checkIntrinsicImmArg
<5>(Op
, 3, DAG
);
2139 case Intrinsic::loongarch_lsx_vsat_d
:
2140 case Intrinsic::loongarch_lsx_vsat_du
:
2141 case Intrinsic::loongarch_lsx_vrotri_d
:
2142 case Intrinsic::loongarch_lsx_vsrlri_d
:
2143 case Intrinsic::loongarch_lsx_vsrari_d
:
2144 case Intrinsic::loongarch_lasx_xvsat_d
:
2145 case Intrinsic::loongarch_lasx_xvsat_du
:
2146 case Intrinsic::loongarch_lasx_xvrotri_d
:
2147 case Intrinsic::loongarch_lasx_xvsrlri_d
:
2148 case Intrinsic::loongarch_lasx_xvsrari_d
:
2149 return checkIntrinsicImmArg
<6>(Op
, 2, DAG
);
2150 case Intrinsic::loongarch_lsx_vsrlni_w_d
:
2151 case Intrinsic::loongarch_lsx_vsrani_w_d
:
2152 case Intrinsic::loongarch_lsx_vsrlrni_w_d
:
2153 case Intrinsic::loongarch_lsx_vsrarni_w_d
:
2154 case Intrinsic::loongarch_lsx_vssrlni_w_d
:
2155 case Intrinsic::loongarch_lsx_vssrani_w_d
:
2156 case Intrinsic::loongarch_lsx_vssrlni_wu_d
:
2157 case Intrinsic::loongarch_lsx_vssrani_wu_d
:
2158 case Intrinsic::loongarch_lsx_vssrlrni_w_d
:
2159 case Intrinsic::loongarch_lsx_vssrarni_w_d
:
2160 case Intrinsic::loongarch_lsx_vssrlrni_wu_d
:
2161 case Intrinsic::loongarch_lsx_vssrarni_wu_d
:
2162 case Intrinsic::loongarch_lasx_xvsrlni_w_d
:
2163 case Intrinsic::loongarch_lasx_xvsrani_w_d
:
2164 case Intrinsic::loongarch_lasx_xvsrlrni_w_d
:
2165 case Intrinsic::loongarch_lasx_xvsrarni_w_d
:
2166 case Intrinsic::loongarch_lasx_xvssrlni_w_d
:
2167 case Intrinsic::loongarch_lasx_xvssrani_w_d
:
2168 case Intrinsic::loongarch_lasx_xvssrlni_wu_d
:
2169 case Intrinsic::loongarch_lasx_xvssrani_wu_d
:
2170 case Intrinsic::loongarch_lasx_xvssrlrni_w_d
:
2171 case Intrinsic::loongarch_lasx_xvssrarni_w_d
:
2172 case Intrinsic::loongarch_lasx_xvssrlrni_wu_d
:
2173 case Intrinsic::loongarch_lasx_xvssrarni_wu_d
:
2174 return checkIntrinsicImmArg
<6>(Op
, 3, DAG
);
2175 case Intrinsic::loongarch_lsx_vsrlni_d_q
:
2176 case Intrinsic::loongarch_lsx_vsrani_d_q
:
2177 case Intrinsic::loongarch_lsx_vsrlrni_d_q
:
2178 case Intrinsic::loongarch_lsx_vsrarni_d_q
:
2179 case Intrinsic::loongarch_lsx_vssrlni_d_q
:
2180 case Intrinsic::loongarch_lsx_vssrani_d_q
:
2181 case Intrinsic::loongarch_lsx_vssrlni_du_q
:
2182 case Intrinsic::loongarch_lsx_vssrani_du_q
:
2183 case Intrinsic::loongarch_lsx_vssrlrni_d_q
:
2184 case Intrinsic::loongarch_lsx_vssrarni_d_q
:
2185 case Intrinsic::loongarch_lsx_vssrlrni_du_q
:
2186 case Intrinsic::loongarch_lsx_vssrarni_du_q
:
2187 case Intrinsic::loongarch_lasx_xvsrlni_d_q
:
2188 case Intrinsic::loongarch_lasx_xvsrani_d_q
:
2189 case Intrinsic::loongarch_lasx_xvsrlrni_d_q
:
2190 case Intrinsic::loongarch_lasx_xvsrarni_d_q
:
2191 case Intrinsic::loongarch_lasx_xvssrlni_d_q
:
2192 case Intrinsic::loongarch_lasx_xvssrani_d_q
:
2193 case Intrinsic::loongarch_lasx_xvssrlni_du_q
:
2194 case Intrinsic::loongarch_lasx_xvssrani_du_q
:
2195 case Intrinsic::loongarch_lasx_xvssrlrni_d_q
:
2196 case Intrinsic::loongarch_lasx_xvssrarni_d_q
:
2197 case Intrinsic::loongarch_lasx_xvssrlrni_du_q
:
2198 case Intrinsic::loongarch_lasx_xvssrarni_du_q
:
2199 return checkIntrinsicImmArg
<7>(Op
, 3, DAG
);
2200 case Intrinsic::loongarch_lsx_vnori_b
:
2201 case Intrinsic::loongarch_lsx_vshuf4i_b
:
2202 case Intrinsic::loongarch_lsx_vshuf4i_h
:
2203 case Intrinsic::loongarch_lsx_vshuf4i_w
:
2204 case Intrinsic::loongarch_lasx_xvnori_b
:
2205 case Intrinsic::loongarch_lasx_xvshuf4i_b
:
2206 case Intrinsic::loongarch_lasx_xvshuf4i_h
:
2207 case Intrinsic::loongarch_lasx_xvshuf4i_w
:
2208 case Intrinsic::loongarch_lasx_xvpermi_d
:
2209 return checkIntrinsicImmArg
<8>(Op
, 2, DAG
);
2210 case Intrinsic::loongarch_lsx_vshuf4i_d
:
2211 case Intrinsic::loongarch_lsx_vpermi_w
:
2212 case Intrinsic::loongarch_lsx_vbitseli_b
:
2213 case Intrinsic::loongarch_lsx_vextrins_b
:
2214 case Intrinsic::loongarch_lsx_vextrins_h
:
2215 case Intrinsic::loongarch_lsx_vextrins_w
:
2216 case Intrinsic::loongarch_lsx_vextrins_d
:
2217 case Intrinsic::loongarch_lasx_xvshuf4i_d
:
2218 case Intrinsic::loongarch_lasx_xvpermi_w
:
2219 case Intrinsic::loongarch_lasx_xvpermi_q
:
2220 case Intrinsic::loongarch_lasx_xvbitseli_b
:
2221 case Intrinsic::loongarch_lasx_xvextrins_b
:
2222 case Intrinsic::loongarch_lasx_xvextrins_h
:
2223 case Intrinsic::loongarch_lasx_xvextrins_w
:
2224 case Intrinsic::loongarch_lasx_xvextrins_d
:
2225 return checkIntrinsicImmArg
<8>(Op
, 3, DAG
);
2226 case Intrinsic::loongarch_lsx_vrepli_b
:
2227 case Intrinsic::loongarch_lsx_vrepli_h
:
2228 case Intrinsic::loongarch_lsx_vrepli_w
:
2229 case Intrinsic::loongarch_lsx_vrepli_d
:
2230 case Intrinsic::loongarch_lasx_xvrepli_b
:
2231 case Intrinsic::loongarch_lasx_xvrepli_h
:
2232 case Intrinsic::loongarch_lasx_xvrepli_w
:
2233 case Intrinsic::loongarch_lasx_xvrepli_d
:
2234 return checkIntrinsicImmArg
<10>(Op
, 1, DAG
, /*IsSigned=*/true);
2235 case Intrinsic::loongarch_lsx_vldi
:
2236 case Intrinsic::loongarch_lasx_xvldi
:
2237 return checkIntrinsicImmArg
<13>(Op
, 1, DAG
, /*IsSigned=*/true);
2241 // Helper function that emits error message for intrinsics with chain and return
2242 // merge values of a UNDEF and the chain.
2243 static SDValue
emitIntrinsicWithChainErrorMessage(SDValue Op
,
2245 SelectionDAG
&DAG
) {
2246 DAG
.getContext()->emitError(Op
->getOperationName(0) + ": " + ErrorMsg
+ ".");
2247 return DAG
.getMergeValues({DAG
.getUNDEF(Op
.getValueType()), Op
.getOperand(0)},
2252 LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op
,
2253 SelectionDAG
&DAG
) const {
2255 MVT GRLenVT
= Subtarget
.getGRLenVT();
2256 EVT VT
= Op
.getValueType();
2257 SDValue Chain
= Op
.getOperand(0);
2258 const StringRef ErrorMsgOOR
= "argument out of range";
2259 const StringRef ErrorMsgReqLA64
= "requires loongarch64";
2260 const StringRef ErrorMsgReqF
= "requires basic 'f' target feature";
2262 switch (Op
.getConstantOperandVal(1)) {
2265 case Intrinsic::loongarch_crc_w_b_w
:
2266 case Intrinsic::loongarch_crc_w_h_w
:
2267 case Intrinsic::loongarch_crc_w_w_w
:
2268 case Intrinsic::loongarch_crc_w_d_w
:
2269 case Intrinsic::loongarch_crcc_w_b_w
:
2270 case Intrinsic::loongarch_crcc_w_h_w
:
2271 case Intrinsic::loongarch_crcc_w_w_w
:
2272 case Intrinsic::loongarch_crcc_w_d_w
:
2273 return emitIntrinsicWithChainErrorMessage(Op
, ErrorMsgReqLA64
, DAG
);
2274 case Intrinsic::loongarch_csrrd_w
:
2275 case Intrinsic::loongarch_csrrd_d
: {
2276 unsigned Imm
= Op
.getConstantOperandVal(2);
2277 return !isUInt
<14>(Imm
)
2278 ? emitIntrinsicWithChainErrorMessage(Op
, ErrorMsgOOR
, DAG
)
2279 : DAG
.getNode(LoongArchISD::CSRRD
, DL
, {GRLenVT
, MVT::Other
},
2280 {Chain
, DAG
.getConstant(Imm
, DL
, GRLenVT
)});
2282 case Intrinsic::loongarch_csrwr_w
:
2283 case Intrinsic::loongarch_csrwr_d
: {
2284 unsigned Imm
= Op
.getConstantOperandVal(3);
2285 return !isUInt
<14>(Imm
)
2286 ? emitIntrinsicWithChainErrorMessage(Op
, ErrorMsgOOR
, DAG
)
2287 : DAG
.getNode(LoongArchISD::CSRWR
, DL
, {GRLenVT
, MVT::Other
},
2288 {Chain
, Op
.getOperand(2),
2289 DAG
.getConstant(Imm
, DL
, GRLenVT
)});
2291 case Intrinsic::loongarch_csrxchg_w
:
2292 case Intrinsic::loongarch_csrxchg_d
: {
2293 unsigned Imm
= Op
.getConstantOperandVal(4);
2294 return !isUInt
<14>(Imm
)
2295 ? emitIntrinsicWithChainErrorMessage(Op
, ErrorMsgOOR
, DAG
)
2296 : DAG
.getNode(LoongArchISD::CSRXCHG
, DL
, {GRLenVT
, MVT::Other
},
2297 {Chain
, Op
.getOperand(2), Op
.getOperand(3),
2298 DAG
.getConstant(Imm
, DL
, GRLenVT
)});
2300 case Intrinsic::loongarch_iocsrrd_d
: {
2302 LoongArchISD::IOCSRRD_D
, DL
, {GRLenVT
, MVT::Other
},
2303 {Chain
, DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, Op
.getOperand(2))});
2305 #define IOCSRRD_CASE(NAME, NODE) \
2306 case Intrinsic::loongarch_##NAME: { \
2307 return DAG.getNode(LoongArchISD::NODE, DL, {GRLenVT, MVT::Other}, \
2308 {Chain, Op.getOperand(2)}); \
2310 IOCSRRD_CASE(iocsrrd_b
, IOCSRRD_B
);
2311 IOCSRRD_CASE(iocsrrd_h
, IOCSRRD_H
);
2312 IOCSRRD_CASE(iocsrrd_w
, IOCSRRD_W
);
2314 case Intrinsic::loongarch_cpucfg
: {
2315 return DAG
.getNode(LoongArchISD::CPUCFG
, DL
, {GRLenVT
, MVT::Other
},
2316 {Chain
, Op
.getOperand(2)});
2318 case Intrinsic::loongarch_lddir_d
: {
2319 unsigned Imm
= Op
.getConstantOperandVal(3);
2320 return !isUInt
<8>(Imm
)
2321 ? emitIntrinsicWithChainErrorMessage(Op
, ErrorMsgOOR
, DAG
)
2324 case Intrinsic::loongarch_movfcsr2gr
: {
2325 if (!Subtarget
.hasBasicF())
2326 return emitIntrinsicWithChainErrorMessage(Op
, ErrorMsgReqF
, DAG
);
2327 unsigned Imm
= Op
.getConstantOperandVal(2);
2328 return !isUInt
<2>(Imm
)
2329 ? emitIntrinsicWithChainErrorMessage(Op
, ErrorMsgOOR
, DAG
)
2330 : DAG
.getNode(LoongArchISD::MOVFCSR2GR
, DL
, {VT
, MVT::Other
},
2331 {Chain
, DAG
.getConstant(Imm
, DL
, GRLenVT
)});
2333 case Intrinsic::loongarch_lsx_vld
:
2334 case Intrinsic::loongarch_lsx_vldrepl_b
:
2335 case Intrinsic::loongarch_lasx_xvld
:
2336 case Intrinsic::loongarch_lasx_xvldrepl_b
:
2337 return !isInt
<12>(cast
<ConstantSDNode
>(Op
.getOperand(3))->getSExtValue())
2338 ? emitIntrinsicWithChainErrorMessage(Op
, ErrorMsgOOR
, DAG
)
2340 case Intrinsic::loongarch_lsx_vldrepl_h
:
2341 case Intrinsic::loongarch_lasx_xvldrepl_h
:
2342 return !isShiftedInt
<11, 1>(
2343 cast
<ConstantSDNode
>(Op
.getOperand(3))->getSExtValue())
2344 ? emitIntrinsicWithChainErrorMessage(
2345 Op
, "argument out of range or not a multiple of 2", DAG
)
2347 case Intrinsic::loongarch_lsx_vldrepl_w
:
2348 case Intrinsic::loongarch_lasx_xvldrepl_w
:
2349 return !isShiftedInt
<10, 2>(
2350 cast
<ConstantSDNode
>(Op
.getOperand(3))->getSExtValue())
2351 ? emitIntrinsicWithChainErrorMessage(
2352 Op
, "argument out of range or not a multiple of 4", DAG
)
2354 case Intrinsic::loongarch_lsx_vldrepl_d
:
2355 case Intrinsic::loongarch_lasx_xvldrepl_d
:
2356 return !isShiftedInt
<9, 3>(
2357 cast
<ConstantSDNode
>(Op
.getOperand(3))->getSExtValue())
2358 ? emitIntrinsicWithChainErrorMessage(
2359 Op
, "argument out of range or not a multiple of 8", DAG
)
2364 // Helper function that emits error message for intrinsics with void return
2365 // value and return the chain.
2366 static SDValue
emitIntrinsicErrorMessage(SDValue Op
, StringRef ErrorMsg
,
2367 SelectionDAG
&DAG
) {
2369 DAG
.getContext()->emitError(Op
->getOperationName(0) + ": " + ErrorMsg
+ ".");
2370 return Op
.getOperand(0);
2373 SDValue
LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op
,
2374 SelectionDAG
&DAG
) const {
2376 MVT GRLenVT
= Subtarget
.getGRLenVT();
2377 SDValue Chain
= Op
.getOperand(0);
2378 uint64_t IntrinsicEnum
= Op
.getConstantOperandVal(1);
2379 SDValue Op2
= Op
.getOperand(2);
2380 const StringRef ErrorMsgOOR
= "argument out of range";
2381 const StringRef ErrorMsgReqLA64
= "requires loongarch64";
2382 const StringRef ErrorMsgReqLA32
= "requires loongarch32";
2383 const StringRef ErrorMsgReqF
= "requires basic 'f' target feature";
2385 switch (IntrinsicEnum
) {
2387 // TODO: Add more Intrinsics.
2389 case Intrinsic::loongarch_cacop_d
:
2390 case Intrinsic::loongarch_cacop_w
: {
2391 if (IntrinsicEnum
== Intrinsic::loongarch_cacop_d
&& !Subtarget
.is64Bit())
2392 return emitIntrinsicErrorMessage(Op
, ErrorMsgReqLA64
, DAG
);
2393 if (IntrinsicEnum
== Intrinsic::loongarch_cacop_w
&& Subtarget
.is64Bit())
2394 return emitIntrinsicErrorMessage(Op
, ErrorMsgReqLA32
, DAG
);
2395 // call void @llvm.loongarch.cacop.[d/w](uimm5, rj, simm12)
2396 unsigned Imm1
= Op2
->getAsZExtVal();
2397 int Imm2
= cast
<ConstantSDNode
>(Op
.getOperand(4))->getSExtValue();
2398 if (!isUInt
<5>(Imm1
) || !isInt
<12>(Imm2
))
2399 return emitIntrinsicErrorMessage(Op
, ErrorMsgOOR
, DAG
);
2402 case Intrinsic::loongarch_dbar
: {
2403 unsigned Imm
= Op2
->getAsZExtVal();
2404 return !isUInt
<15>(Imm
)
2405 ? emitIntrinsicErrorMessage(Op
, ErrorMsgOOR
, DAG
)
2406 : DAG
.getNode(LoongArchISD::DBAR
, DL
, MVT::Other
, Chain
,
2407 DAG
.getConstant(Imm
, DL
, GRLenVT
));
2409 case Intrinsic::loongarch_ibar
: {
2410 unsigned Imm
= Op2
->getAsZExtVal();
2411 return !isUInt
<15>(Imm
)
2412 ? emitIntrinsicErrorMessage(Op
, ErrorMsgOOR
, DAG
)
2413 : DAG
.getNode(LoongArchISD::IBAR
, DL
, MVT::Other
, Chain
,
2414 DAG
.getConstant(Imm
, DL
, GRLenVT
));
2416 case Intrinsic::loongarch_break
: {
2417 unsigned Imm
= Op2
->getAsZExtVal();
2418 return !isUInt
<15>(Imm
)
2419 ? emitIntrinsicErrorMessage(Op
, ErrorMsgOOR
, DAG
)
2420 : DAG
.getNode(LoongArchISD::BREAK
, DL
, MVT::Other
, Chain
,
2421 DAG
.getConstant(Imm
, DL
, GRLenVT
));
2423 case Intrinsic::loongarch_movgr2fcsr
: {
2424 if (!Subtarget
.hasBasicF())
2425 return emitIntrinsicErrorMessage(Op
, ErrorMsgReqF
, DAG
);
2426 unsigned Imm
= Op2
->getAsZExtVal();
2427 return !isUInt
<2>(Imm
)
2428 ? emitIntrinsicErrorMessage(Op
, ErrorMsgOOR
, DAG
)
2429 : DAG
.getNode(LoongArchISD::MOVGR2FCSR
, DL
, MVT::Other
, Chain
,
2430 DAG
.getConstant(Imm
, DL
, GRLenVT
),
2431 DAG
.getNode(ISD::ANY_EXTEND
, DL
, GRLenVT
,
2434 case Intrinsic::loongarch_syscall
: {
2435 unsigned Imm
= Op2
->getAsZExtVal();
2436 return !isUInt
<15>(Imm
)
2437 ? emitIntrinsicErrorMessage(Op
, ErrorMsgOOR
, DAG
)
2438 : DAG
.getNode(LoongArchISD::SYSCALL
, DL
, MVT::Other
, Chain
,
2439 DAG
.getConstant(Imm
, DL
, GRLenVT
));
2441 #define IOCSRWR_CASE(NAME, NODE) \
2442 case Intrinsic::loongarch_##NAME: { \
2443 SDValue Op3 = Op.getOperand(3); \
2444 return Subtarget.is64Bit() \
2445 ? DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, \
2446 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
2447 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op3)) \
2448 : DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, Op2, \
2451 IOCSRWR_CASE(iocsrwr_b
, IOCSRWR_B
);
2452 IOCSRWR_CASE(iocsrwr_h
, IOCSRWR_H
);
2453 IOCSRWR_CASE(iocsrwr_w
, IOCSRWR_W
);
2455 case Intrinsic::loongarch_iocsrwr_d
: {
2456 return !Subtarget
.is64Bit()
2457 ? emitIntrinsicErrorMessage(Op
, ErrorMsgReqLA64
, DAG
)
2458 : DAG
.getNode(LoongArchISD::IOCSRWR_D
, DL
, MVT::Other
, Chain
,
2460 DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
,
2463 #define ASRT_LE_GT_CASE(NAME) \
2464 case Intrinsic::loongarch_##NAME: { \
2465 return !Subtarget.is64Bit() \
2466 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG) \
2469 ASRT_LE_GT_CASE(asrtle_d
)
2470 ASRT_LE_GT_CASE(asrtgt_d
)
2471 #undef ASRT_LE_GT_CASE
2472 case Intrinsic::loongarch_ldpte_d
: {
2473 unsigned Imm
= Op
.getConstantOperandVal(3);
2474 return !Subtarget
.is64Bit()
2475 ? emitIntrinsicErrorMessage(Op
, ErrorMsgReqLA64
, DAG
)
2476 : !isUInt
<8>(Imm
) ? emitIntrinsicErrorMessage(Op
, ErrorMsgOOR
, DAG
)
2479 case Intrinsic::loongarch_lsx_vst
:
2480 case Intrinsic::loongarch_lasx_xvst
:
2481 return !isInt
<12>(cast
<ConstantSDNode
>(Op
.getOperand(4))->getSExtValue())
2482 ? emitIntrinsicErrorMessage(Op
, ErrorMsgOOR
, DAG
)
2484 case Intrinsic::loongarch_lasx_xvstelm_b
:
2485 return (!isInt
<8>(cast
<ConstantSDNode
>(Op
.getOperand(4))->getSExtValue()) ||
2486 !isUInt
<5>(Op
.getConstantOperandVal(5)))
2487 ? emitIntrinsicErrorMessage(Op
, ErrorMsgOOR
, DAG
)
2489 case Intrinsic::loongarch_lsx_vstelm_b
:
2490 return (!isInt
<8>(cast
<ConstantSDNode
>(Op
.getOperand(4))->getSExtValue()) ||
2491 !isUInt
<4>(Op
.getConstantOperandVal(5)))
2492 ? emitIntrinsicErrorMessage(Op
, ErrorMsgOOR
, DAG
)
2494 case Intrinsic::loongarch_lasx_xvstelm_h
:
2495 return (!isShiftedInt
<8, 1>(
2496 cast
<ConstantSDNode
>(Op
.getOperand(4))->getSExtValue()) ||
2497 !isUInt
<4>(Op
.getConstantOperandVal(5)))
2498 ? emitIntrinsicErrorMessage(
2499 Op
, "argument out of range or not a multiple of 2", DAG
)
2501 case Intrinsic::loongarch_lsx_vstelm_h
:
2502 return (!isShiftedInt
<8, 1>(
2503 cast
<ConstantSDNode
>(Op
.getOperand(4))->getSExtValue()) ||
2504 !isUInt
<3>(Op
.getConstantOperandVal(5)))
2505 ? emitIntrinsicErrorMessage(
2506 Op
, "argument out of range or not a multiple of 2", DAG
)
2508 case Intrinsic::loongarch_lasx_xvstelm_w
:
2509 return (!isShiftedInt
<8, 2>(
2510 cast
<ConstantSDNode
>(Op
.getOperand(4))->getSExtValue()) ||
2511 !isUInt
<3>(Op
.getConstantOperandVal(5)))
2512 ? emitIntrinsicErrorMessage(
2513 Op
, "argument out of range or not a multiple of 4", DAG
)
2515 case Intrinsic::loongarch_lsx_vstelm_w
:
2516 return (!isShiftedInt
<8, 2>(
2517 cast
<ConstantSDNode
>(Op
.getOperand(4))->getSExtValue()) ||
2518 !isUInt
<2>(Op
.getConstantOperandVal(5)))
2519 ? emitIntrinsicErrorMessage(
2520 Op
, "argument out of range or not a multiple of 4", DAG
)
2522 case Intrinsic::loongarch_lasx_xvstelm_d
:
2523 return (!isShiftedInt
<8, 3>(
2524 cast
<ConstantSDNode
>(Op
.getOperand(4))->getSExtValue()) ||
2525 !isUInt
<2>(Op
.getConstantOperandVal(5)))
2526 ? emitIntrinsicErrorMessage(
2527 Op
, "argument out of range or not a multiple of 8", DAG
)
2529 case Intrinsic::loongarch_lsx_vstelm_d
:
2530 return (!isShiftedInt
<8, 3>(
2531 cast
<ConstantSDNode
>(Op
.getOperand(4))->getSExtValue()) ||
2532 !isUInt
<1>(Op
.getConstantOperandVal(5)))
2533 ? emitIntrinsicErrorMessage(
2534 Op
, "argument out of range or not a multiple of 8", DAG
)
2539 SDValue
LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op
,
2540 SelectionDAG
&DAG
) const {
2542 SDValue Lo
= Op
.getOperand(0);
2543 SDValue Hi
= Op
.getOperand(1);
2544 SDValue Shamt
= Op
.getOperand(2);
2545 EVT VT
= Lo
.getValueType();
2547 // if Shamt-GRLen < 0: // Shamt < GRLen
2549 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (GRLen-1 ^ Shamt))
2552 // Hi = Lo << (Shamt-GRLen)
2554 SDValue Zero
= DAG
.getConstant(0, DL
, VT
);
2555 SDValue One
= DAG
.getConstant(1, DL
, VT
);
2556 SDValue MinusGRLen
=
2557 DAG
.getSignedConstant(-(int)Subtarget
.getGRLen(), DL
, VT
);
2558 SDValue GRLenMinus1
= DAG
.getConstant(Subtarget
.getGRLen() - 1, DL
, VT
);
2559 SDValue ShamtMinusGRLen
= DAG
.getNode(ISD::ADD
, DL
, VT
, Shamt
, MinusGRLen
);
2560 SDValue GRLenMinus1Shamt
= DAG
.getNode(ISD::XOR
, DL
, VT
, Shamt
, GRLenMinus1
);
2562 SDValue LoTrue
= DAG
.getNode(ISD::SHL
, DL
, VT
, Lo
, Shamt
);
2563 SDValue ShiftRight1Lo
= DAG
.getNode(ISD::SRL
, DL
, VT
, Lo
, One
);
2564 SDValue ShiftRightLo
=
2565 DAG
.getNode(ISD::SRL
, DL
, VT
, ShiftRight1Lo
, GRLenMinus1Shamt
);
2566 SDValue ShiftLeftHi
= DAG
.getNode(ISD::SHL
, DL
, VT
, Hi
, Shamt
);
2567 SDValue HiTrue
= DAG
.getNode(ISD::OR
, DL
, VT
, ShiftLeftHi
, ShiftRightLo
);
2568 SDValue HiFalse
= DAG
.getNode(ISD::SHL
, DL
, VT
, Lo
, ShamtMinusGRLen
);
2570 SDValue CC
= DAG
.getSetCC(DL
, VT
, ShamtMinusGRLen
, Zero
, ISD::SETLT
);
2572 Lo
= DAG
.getNode(ISD::SELECT
, DL
, VT
, CC
, LoTrue
, Zero
);
2573 Hi
= DAG
.getNode(ISD::SELECT
, DL
, VT
, CC
, HiTrue
, HiFalse
);
2575 SDValue Parts
[2] = {Lo
, Hi
};
2576 return DAG
.getMergeValues(Parts
, DL
);
2579 SDValue
LoongArchTargetLowering::lowerShiftRightParts(SDValue Op
,
2583 SDValue Lo
= Op
.getOperand(0);
2584 SDValue Hi
= Op
.getOperand(1);
2585 SDValue Shamt
= Op
.getOperand(2);
2586 EVT VT
= Lo
.getValueType();
2589 // if Shamt-GRLen < 0: // Shamt < GRLen
2590 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
2591 // Hi = Hi >>s Shamt
2593 // Lo = Hi >>s (Shamt-GRLen);
2594 // Hi = Hi >>s (GRLen-1)
2597 // if Shamt-GRLen < 0: // Shamt < GRLen
2598 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
2599 // Hi = Hi >>u Shamt
2601 // Lo = Hi >>u (Shamt-GRLen);
2604 unsigned ShiftRightOp
= IsSRA
? ISD::SRA
: ISD::SRL
;
2606 SDValue Zero
= DAG
.getConstant(0, DL
, VT
);
2607 SDValue One
= DAG
.getConstant(1, DL
, VT
);
2608 SDValue MinusGRLen
=
2609 DAG
.getSignedConstant(-(int)Subtarget
.getGRLen(), DL
, VT
);
2610 SDValue GRLenMinus1
= DAG
.getConstant(Subtarget
.getGRLen() - 1, DL
, VT
);
2611 SDValue ShamtMinusGRLen
= DAG
.getNode(ISD::ADD
, DL
, VT
, Shamt
, MinusGRLen
);
2612 SDValue GRLenMinus1Shamt
= DAG
.getNode(ISD::XOR
, DL
, VT
, Shamt
, GRLenMinus1
);
2614 SDValue ShiftRightLo
= DAG
.getNode(ISD::SRL
, DL
, VT
, Lo
, Shamt
);
2615 SDValue ShiftLeftHi1
= DAG
.getNode(ISD::SHL
, DL
, VT
, Hi
, One
);
2616 SDValue ShiftLeftHi
=
2617 DAG
.getNode(ISD::SHL
, DL
, VT
, ShiftLeftHi1
, GRLenMinus1Shamt
);
2618 SDValue LoTrue
= DAG
.getNode(ISD::OR
, DL
, VT
, ShiftRightLo
, ShiftLeftHi
);
2619 SDValue HiTrue
= DAG
.getNode(ShiftRightOp
, DL
, VT
, Hi
, Shamt
);
2620 SDValue LoFalse
= DAG
.getNode(ShiftRightOp
, DL
, VT
, Hi
, ShamtMinusGRLen
);
2622 IsSRA
? DAG
.getNode(ISD::SRA
, DL
, VT
, Hi
, GRLenMinus1
) : Zero
;
2624 SDValue CC
= DAG
.getSetCC(DL
, VT
, ShamtMinusGRLen
, Zero
, ISD::SETLT
);
2626 Lo
= DAG
.getNode(ISD::SELECT
, DL
, VT
, CC
, LoTrue
, LoFalse
);
2627 Hi
= DAG
.getNode(ISD::SELECT
, DL
, VT
, CC
, HiTrue
, HiFalse
);
2629 SDValue Parts
[2] = {Lo
, Hi
};
2630 return DAG
.getMergeValues(Parts
, DL
);
2633 // Returns the opcode of the target-specific SDNode that implements the 32-bit
2634 // form of the given Opcode.
2635 static LoongArchISD::NodeType
getLoongArchWOpcode(unsigned Opcode
) {
2638 llvm_unreachable("Unexpected opcode");
2640 return LoongArchISD::DIV_W
;
2642 return LoongArchISD::DIV_WU
;
2644 return LoongArchISD::MOD_W
;
2646 return LoongArchISD::MOD_WU
;
2648 return LoongArchISD::SLL_W
;
2650 return LoongArchISD::SRA_W
;
2652 return LoongArchISD::SRL_W
;
2655 return LoongArchISD::ROTR_W
;
2657 return LoongArchISD::CTZ_W
;
2659 return LoongArchISD::CLZ_W
;
2663 // Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
2664 // node. Because i8/i16/i32 isn't a legal type for LA64, these operations would
2665 // otherwise be promoted to i64, making it difficult to select the
2666 // SLL_W/.../*W later one because the fact the operation was originally of
2667 // type i8/i16/i32 is lost.
2668 static SDValue
customLegalizeToWOp(SDNode
*N
, SelectionDAG
&DAG
, int NumOp
,
2669 unsigned ExtOpc
= ISD::ANY_EXTEND
) {
2671 LoongArchISD::NodeType WOpcode
= getLoongArchWOpcode(N
->getOpcode());
2672 SDValue NewOp0
, NewRes
;
2676 llvm_unreachable("Unexpected NumOp");
2678 NewOp0
= DAG
.getNode(ExtOpc
, DL
, MVT::i64
, N
->getOperand(0));
2679 NewRes
= DAG
.getNode(WOpcode
, DL
, MVT::i64
, NewOp0
);
2683 NewOp0
= DAG
.getNode(ExtOpc
, DL
, MVT::i64
, N
->getOperand(0));
2684 SDValue NewOp1
= DAG
.getNode(ExtOpc
, DL
, MVT::i64
, N
->getOperand(1));
2685 if (N
->getOpcode() == ISD::ROTL
) {
2686 SDValue TmpOp
= DAG
.getConstant(32, DL
, MVT::i64
);
2687 NewOp1
= DAG
.getNode(ISD::SUB
, DL
, MVT::i64
, TmpOp
, NewOp1
);
2689 NewRes
= DAG
.getNode(WOpcode
, DL
, MVT::i64
, NewOp0
, NewOp1
);
2692 // TODO:Handle more NumOp.
2695 // ReplaceNodeResults requires we maintain the same type for the return
2697 return DAG
.getNode(ISD::TRUNCATE
, DL
, N
->getValueType(0), NewRes
);
2700 // Converts the given 32-bit operation to a i64 operation with signed extension
2701 // semantic to reduce the signed extension instructions.
2702 static SDValue
customLegalizeToWOpWithSExt(SDNode
*N
, SelectionDAG
&DAG
) {
2704 SDValue NewOp0
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, N
->getOperand(0));
2705 SDValue NewOp1
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, N
->getOperand(1));
2706 SDValue NewWOp
= DAG
.getNode(N
->getOpcode(), DL
, MVT::i64
, NewOp0
, NewOp1
);
2707 SDValue NewRes
= DAG
.getNode(ISD::SIGN_EXTEND_INREG
, DL
, MVT::i64
, NewWOp
,
2708 DAG
.getValueType(MVT::i32
));
2709 return DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, NewRes
);
2712 // Helper function that emits error message for intrinsics with/without chain
2713 // and return a UNDEF or and the chain as the results.
2714 static void emitErrorAndReplaceIntrinsicResults(
2715 SDNode
*N
, SmallVectorImpl
<SDValue
> &Results
, SelectionDAG
&DAG
,
2716 StringRef ErrorMsg
, bool WithChain
= true) {
2717 DAG
.getContext()->emitError(N
->getOperationName(0) + ": " + ErrorMsg
+ ".");
2718 Results
.push_back(DAG
.getUNDEF(N
->getValueType(0)));
2721 Results
.push_back(N
->getOperand(0));
2724 template <unsigned N
>
2726 replaceVPICKVE2GRResults(SDNode
*Node
, SmallVectorImpl
<SDValue
> &Results
,
2727 SelectionDAG
&DAG
, const LoongArchSubtarget
&Subtarget
,
2729 const StringRef ErrorMsgOOR
= "argument out of range";
2730 unsigned Imm
= Node
->getConstantOperandVal(2);
2731 if (!isUInt
<N
>(Imm
)) {
2732 emitErrorAndReplaceIntrinsicResults(Node
, Results
, DAG
, ErrorMsgOOR
,
2733 /*WithChain=*/false);
2737 SDValue Vec
= Node
->getOperand(1);
2740 DAG
.getNode(ResOp
, DL
, Subtarget
.getGRLenVT(), Vec
,
2741 DAG
.getConstant(Imm
, DL
, Subtarget
.getGRLenVT()),
2742 DAG
.getValueType(Vec
.getValueType().getVectorElementType()));
2743 Results
.push_back(DAG
.getNode(ISD::TRUNCATE
, DL
, Node
->getValueType(0),
2744 PickElt
.getValue(0)));
2747 static void replaceVecCondBranchResults(SDNode
*N
,
2748 SmallVectorImpl
<SDValue
> &Results
,
2750 const LoongArchSubtarget
&Subtarget
,
2753 SDValue Vec
= N
->getOperand(1);
2755 SDValue CB
= DAG
.getNode(ResOp
, DL
, Subtarget
.getGRLenVT(), Vec
);
2757 DAG
.getNode(ISD::TRUNCATE
, DL
, N
->getValueType(0), CB
.getValue(0)));
2761 replaceINTRINSIC_WO_CHAINResults(SDNode
*N
, SmallVectorImpl
<SDValue
> &Results
,
2763 const LoongArchSubtarget
&Subtarget
) {
2764 switch (N
->getConstantOperandVal(0)) {
2766 llvm_unreachable("Unexpected Intrinsic.");
2767 case Intrinsic::loongarch_lsx_vpickve2gr_b
:
2768 replaceVPICKVE2GRResults
<4>(N
, Results
, DAG
, Subtarget
,
2769 LoongArchISD::VPICK_SEXT_ELT
);
2771 case Intrinsic::loongarch_lsx_vpickve2gr_h
:
2772 case Intrinsic::loongarch_lasx_xvpickve2gr_w
:
2773 replaceVPICKVE2GRResults
<3>(N
, Results
, DAG
, Subtarget
,
2774 LoongArchISD::VPICK_SEXT_ELT
);
2776 case Intrinsic::loongarch_lsx_vpickve2gr_w
:
2777 replaceVPICKVE2GRResults
<2>(N
, Results
, DAG
, Subtarget
,
2778 LoongArchISD::VPICK_SEXT_ELT
);
2780 case Intrinsic::loongarch_lsx_vpickve2gr_bu
:
2781 replaceVPICKVE2GRResults
<4>(N
, Results
, DAG
, Subtarget
,
2782 LoongArchISD::VPICK_ZEXT_ELT
);
2784 case Intrinsic::loongarch_lsx_vpickve2gr_hu
:
2785 case Intrinsic::loongarch_lasx_xvpickve2gr_wu
:
2786 replaceVPICKVE2GRResults
<3>(N
, Results
, DAG
, Subtarget
,
2787 LoongArchISD::VPICK_ZEXT_ELT
);
2789 case Intrinsic::loongarch_lsx_vpickve2gr_wu
:
2790 replaceVPICKVE2GRResults
<2>(N
, Results
, DAG
, Subtarget
,
2791 LoongArchISD::VPICK_ZEXT_ELT
);
2793 case Intrinsic::loongarch_lsx_bz_b
:
2794 case Intrinsic::loongarch_lsx_bz_h
:
2795 case Intrinsic::loongarch_lsx_bz_w
:
2796 case Intrinsic::loongarch_lsx_bz_d
:
2797 case Intrinsic::loongarch_lasx_xbz_b
:
2798 case Intrinsic::loongarch_lasx_xbz_h
:
2799 case Intrinsic::loongarch_lasx_xbz_w
:
2800 case Intrinsic::loongarch_lasx_xbz_d
:
2801 replaceVecCondBranchResults(N
, Results
, DAG
, Subtarget
,
2802 LoongArchISD::VALL_ZERO
);
2804 case Intrinsic::loongarch_lsx_bz_v
:
2805 case Intrinsic::loongarch_lasx_xbz_v
:
2806 replaceVecCondBranchResults(N
, Results
, DAG
, Subtarget
,
2807 LoongArchISD::VANY_ZERO
);
2809 case Intrinsic::loongarch_lsx_bnz_b
:
2810 case Intrinsic::loongarch_lsx_bnz_h
:
2811 case Intrinsic::loongarch_lsx_bnz_w
:
2812 case Intrinsic::loongarch_lsx_bnz_d
:
2813 case Intrinsic::loongarch_lasx_xbnz_b
:
2814 case Intrinsic::loongarch_lasx_xbnz_h
:
2815 case Intrinsic::loongarch_lasx_xbnz_w
:
2816 case Intrinsic::loongarch_lasx_xbnz_d
:
2817 replaceVecCondBranchResults(N
, Results
, DAG
, Subtarget
,
2818 LoongArchISD::VALL_NONZERO
);
2820 case Intrinsic::loongarch_lsx_bnz_v
:
2821 case Intrinsic::loongarch_lasx_xbnz_v
:
2822 replaceVecCondBranchResults(N
, Results
, DAG
, Subtarget
,
2823 LoongArchISD::VANY_NONZERO
);
2828 void LoongArchTargetLowering::ReplaceNodeResults(
2829 SDNode
*N
, SmallVectorImpl
<SDValue
> &Results
, SelectionDAG
&DAG
) const {
2831 EVT VT
= N
->getValueType(0);
2832 switch (N
->getOpcode()) {
2834 llvm_unreachable("Don't know how to legalize this operation");
2837 assert(N
->getValueType(0) == MVT::i32
&& Subtarget
.is64Bit() &&
2838 "Unexpected custom legalisation");
2839 Results
.push_back(customLegalizeToWOpWithSExt(N
, DAG
));
2845 assert(VT
== MVT::i32
&& Subtarget
.is64Bit() &&
2846 "Unexpected custom legalisation");
2847 Results
.push_back(customLegalizeToWOp(N
, DAG
, 2,
2848 Subtarget
.hasDiv32() && VT
== MVT::i32
2850 : ISD::SIGN_EXTEND
));
2855 assert(VT
== MVT::i32
&& Subtarget
.is64Bit() &&
2856 "Unexpected custom legalisation");
2857 if (N
->getOperand(1).getOpcode() != ISD::Constant
) {
2858 Results
.push_back(customLegalizeToWOp(N
, DAG
, 2));
2864 assert(VT
== MVT::i32
&& Subtarget
.is64Bit() &&
2865 "Unexpected custom legalisation");
2866 Results
.push_back(customLegalizeToWOp(N
, DAG
, 2));
2868 case ISD::FP_TO_SINT
: {
2869 assert(VT
== MVT::i32
&& Subtarget
.is64Bit() &&
2870 "Unexpected custom legalisation");
2871 SDValue Src
= N
->getOperand(0);
2872 EVT FVT
= EVT::getFloatingPointVT(N
->getValueSizeInBits(0));
2873 if (getTypeAction(*DAG
.getContext(), Src
.getValueType()) !=
2874 TargetLowering::TypeSoftenFloat
) {
2875 SDValue Dst
= DAG
.getNode(LoongArchISD::FTINT
, DL
, FVT
, Src
);
2876 Results
.push_back(DAG
.getNode(ISD::BITCAST
, DL
, VT
, Dst
));
2879 // If the FP type needs to be softened, emit a library call using the 'si'
2880 // version. If we left it to default legalization we'd end up with 'di'.
2882 LC
= RTLIB::getFPTOSINT(Src
.getValueType(), VT
);
2883 MakeLibCallOptions CallOptions
;
2884 EVT OpVT
= Src
.getValueType();
2885 CallOptions
.setTypeListBeforeSoften(OpVT
, VT
, true);
2886 SDValue Chain
= SDValue();
2888 std::tie(Result
, Chain
) =
2889 makeLibCall(DAG
, LC
, VT
, Src
, CallOptions
, DL
, Chain
);
2890 Results
.push_back(Result
);
2893 case ISD::BITCAST
: {
2894 SDValue Src
= N
->getOperand(0);
2895 EVT SrcVT
= Src
.getValueType();
2896 if (VT
== MVT::i32
&& SrcVT
== MVT::f32
&& Subtarget
.is64Bit() &&
2897 Subtarget
.hasBasicF()) {
2899 DAG
.getNode(LoongArchISD::MOVFR2GR_S_LA64
, DL
, MVT::i64
, Src
);
2900 Results
.push_back(DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, Dst
));
2904 case ISD::FP_TO_UINT
: {
2905 assert(VT
== MVT::i32
&& Subtarget
.is64Bit() &&
2906 "Unexpected custom legalisation");
2907 auto &TLI
= DAG
.getTargetLoweringInfo();
2909 TLI
.expandFP_TO_UINT(N
, Tmp1
, Tmp2
, DAG
);
2910 Results
.push_back(DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, Tmp1
));
2914 SDValue Src
= N
->getOperand(0);
2915 assert((VT
== MVT::i16
|| VT
== MVT::i32
) &&
2916 "Unexpected custom legalization");
2917 MVT GRLenVT
= Subtarget
.getGRLenVT();
2918 SDValue NewSrc
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, GRLenVT
, Src
);
2920 switch (VT
.getSizeInBits()) {
2922 llvm_unreachable("Unexpected operand width");
2924 Tmp
= DAG
.getNode(LoongArchISD::REVB_2H
, DL
, GRLenVT
, NewSrc
);
2927 // Only LA64 will get to here due to the size mismatch between VT and
2928 // GRLenVT, LA32 lowering is directly defined in LoongArchInstrInfo.
2929 Tmp
= DAG
.getNode(LoongArchISD::REVB_2W
, DL
, GRLenVT
, NewSrc
);
2932 Results
.push_back(DAG
.getNode(ISD::TRUNCATE
, DL
, VT
, Tmp
));
2935 case ISD::BITREVERSE
: {
2936 SDValue Src
= N
->getOperand(0);
2937 assert((VT
== MVT::i8
|| (VT
== MVT::i32
&& Subtarget
.is64Bit())) &&
2938 "Unexpected custom legalization");
2939 MVT GRLenVT
= Subtarget
.getGRLenVT();
2940 SDValue NewSrc
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, GRLenVT
, Src
);
2942 switch (VT
.getSizeInBits()) {
2944 llvm_unreachable("Unexpected operand width");
2946 Tmp
= DAG
.getNode(LoongArchISD::BITREV_4B
, DL
, GRLenVT
, NewSrc
);
2949 Tmp
= DAG
.getNode(LoongArchISD::BITREV_W
, DL
, GRLenVT
, NewSrc
);
2952 Results
.push_back(DAG
.getNode(ISD::TRUNCATE
, DL
, VT
, Tmp
));
2957 assert(VT
== MVT::i32
&& Subtarget
.is64Bit() &&
2958 "Unexpected custom legalisation");
2959 Results
.push_back(customLegalizeToWOp(N
, DAG
, 1));
2962 case ISD::INTRINSIC_W_CHAIN
: {
2963 SDValue Chain
= N
->getOperand(0);
2964 SDValue Op2
= N
->getOperand(2);
2965 MVT GRLenVT
= Subtarget
.getGRLenVT();
2966 const StringRef ErrorMsgOOR
= "argument out of range";
2967 const StringRef ErrorMsgReqLA64
= "requires loongarch64";
2968 const StringRef ErrorMsgReqF
= "requires basic 'f' target feature";
2970 switch (N
->getConstantOperandVal(1)) {
2972 llvm_unreachable("Unexpected Intrinsic.");
2973 case Intrinsic::loongarch_movfcsr2gr
: {
2974 if (!Subtarget
.hasBasicF()) {
2975 emitErrorAndReplaceIntrinsicResults(N
, Results
, DAG
, ErrorMsgReqF
);
2978 unsigned Imm
= Op2
->getAsZExtVal();
2979 if (!isUInt
<2>(Imm
)) {
2980 emitErrorAndReplaceIntrinsicResults(N
, Results
, DAG
, ErrorMsgOOR
);
2983 SDValue MOVFCSR2GRResults
= DAG
.getNode(
2984 LoongArchISD::MOVFCSR2GR
, SDLoc(N
), {MVT::i64
, MVT::Other
},
2985 {Chain
, DAG
.getConstant(Imm
, DL
, GRLenVT
)});
2987 DAG
.getNode(ISD::TRUNCATE
, DL
, VT
, MOVFCSR2GRResults
.getValue(0)));
2988 Results
.push_back(MOVFCSR2GRResults
.getValue(1));
2991 #define CRC_CASE_EXT_BINARYOP(NAME, NODE) \
2992 case Intrinsic::loongarch_##NAME: { \
2993 SDValue NODE = DAG.getNode( \
2994 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
2995 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
2996 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
2997 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
2998 Results.push_back(NODE.getValue(1)); \
3001 CRC_CASE_EXT_BINARYOP(crc_w_b_w
, CRC_W_B_W
)
3002 CRC_CASE_EXT_BINARYOP(crc_w_h_w
, CRC_W_H_W
)
3003 CRC_CASE_EXT_BINARYOP(crc_w_w_w
, CRC_W_W_W
)
3004 CRC_CASE_EXT_BINARYOP(crcc_w_b_w
, CRCC_W_B_W
)
3005 CRC_CASE_EXT_BINARYOP(crcc_w_h_w
, CRCC_W_H_W
)
3006 CRC_CASE_EXT_BINARYOP(crcc_w_w_w
, CRCC_W_W_W
)
3007 #undef CRC_CASE_EXT_BINARYOP
3009 #define CRC_CASE_EXT_UNARYOP(NAME, NODE) \
3010 case Intrinsic::loongarch_##NAME: { \
3011 SDValue NODE = DAG.getNode( \
3012 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
3014 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
3015 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
3016 Results.push_back(NODE.getValue(1)); \
3019 CRC_CASE_EXT_UNARYOP(crc_w_d_w
, CRC_W_D_W
)
3020 CRC_CASE_EXT_UNARYOP(crcc_w_d_w
, CRCC_W_D_W
)
3021 #undef CRC_CASE_EXT_UNARYOP
3022 #define CSR_CASE(ID) \
3023 case Intrinsic::loongarch_##ID: { \
3024 if (!Subtarget.is64Bit()) \
3025 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64); \
3030 CSR_CASE(csrxchg_d
);
3031 CSR_CASE(iocsrrd_d
);
3033 case Intrinsic::loongarch_csrrd_w
: {
3034 unsigned Imm
= Op2
->getAsZExtVal();
3035 if (!isUInt
<14>(Imm
)) {
3036 emitErrorAndReplaceIntrinsicResults(N
, Results
, DAG
, ErrorMsgOOR
);
3039 SDValue CSRRDResults
=
3040 DAG
.getNode(LoongArchISD::CSRRD
, DL
, {GRLenVT
, MVT::Other
},
3041 {Chain
, DAG
.getConstant(Imm
, DL
, GRLenVT
)});
3043 DAG
.getNode(ISD::TRUNCATE
, DL
, VT
, CSRRDResults
.getValue(0)));
3044 Results
.push_back(CSRRDResults
.getValue(1));
3047 case Intrinsic::loongarch_csrwr_w
: {
3048 unsigned Imm
= N
->getConstantOperandVal(3);
3049 if (!isUInt
<14>(Imm
)) {
3050 emitErrorAndReplaceIntrinsicResults(N
, Results
, DAG
, ErrorMsgOOR
);
3053 SDValue CSRWRResults
=
3054 DAG
.getNode(LoongArchISD::CSRWR
, DL
, {GRLenVT
, MVT::Other
},
3055 {Chain
, DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, Op2
),
3056 DAG
.getConstant(Imm
, DL
, GRLenVT
)});
3058 DAG
.getNode(ISD::TRUNCATE
, DL
, VT
, CSRWRResults
.getValue(0)));
3059 Results
.push_back(CSRWRResults
.getValue(1));
3062 case Intrinsic::loongarch_csrxchg_w
: {
3063 unsigned Imm
= N
->getConstantOperandVal(4);
3064 if (!isUInt
<14>(Imm
)) {
3065 emitErrorAndReplaceIntrinsicResults(N
, Results
, DAG
, ErrorMsgOOR
);
3068 SDValue CSRXCHGResults
= DAG
.getNode(
3069 LoongArchISD::CSRXCHG
, DL
, {GRLenVT
, MVT::Other
},
3070 {Chain
, DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, Op2
),
3071 DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, N
->getOperand(3)),
3072 DAG
.getConstant(Imm
, DL
, GRLenVT
)});
3074 DAG
.getNode(ISD::TRUNCATE
, DL
, VT
, CSRXCHGResults
.getValue(0)));
3075 Results
.push_back(CSRXCHGResults
.getValue(1));
3078 #define IOCSRRD_CASE(NAME, NODE) \
3079 case Intrinsic::loongarch_##NAME: { \
3080 SDValue IOCSRRDResults = \
3081 DAG.getNode(LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
3082 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)}); \
3083 Results.push_back( \
3084 DAG.getNode(ISD::TRUNCATE, DL, VT, IOCSRRDResults.getValue(0))); \
3085 Results.push_back(IOCSRRDResults.getValue(1)); \
3088 IOCSRRD_CASE(iocsrrd_b
, IOCSRRD_B
);
3089 IOCSRRD_CASE(iocsrrd_h
, IOCSRRD_H
);
3090 IOCSRRD_CASE(iocsrrd_w
, IOCSRRD_W
);
3092 case Intrinsic::loongarch_cpucfg
: {
3093 SDValue CPUCFGResults
=
3094 DAG
.getNode(LoongArchISD::CPUCFG
, DL
, {GRLenVT
, MVT::Other
},
3095 {Chain
, DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, Op2
)});
3097 DAG
.getNode(ISD::TRUNCATE
, DL
, VT
, CPUCFGResults
.getValue(0)));
3098 Results
.push_back(CPUCFGResults
.getValue(1));
3101 case Intrinsic::loongarch_lddir_d
: {
3102 if (!Subtarget
.is64Bit()) {
3103 emitErrorAndReplaceIntrinsicResults(N
, Results
, DAG
, ErrorMsgReqLA64
);
3111 case ISD::READ_REGISTER
: {
3112 if (Subtarget
.is64Bit())
3113 DAG
.getContext()->emitError(
3114 "On LA64, only 64-bit registers can be read.");
3116 DAG
.getContext()->emitError(
3117 "On LA32, only 32-bit registers can be read.");
3118 Results
.push_back(DAG
.getUNDEF(VT
));
3119 Results
.push_back(N
->getOperand(0));
3122 case ISD::INTRINSIC_WO_CHAIN
: {
3123 replaceINTRINSIC_WO_CHAINResults(N
, Results
, DAG
, Subtarget
);
3127 SDValue Op0
= N
->getOperand(0);
3128 EVT OpVT
= Op0
.getValueType();
3130 OpVT
== MVT::f64
? RTLIB::LROUND_F64
: RTLIB::LROUND_F32
;
3131 MakeLibCallOptions CallOptions
;
3132 CallOptions
.setTypeListBeforeSoften(OpVT
, MVT::i64
, true);
3133 SDValue Result
= makeLibCall(DAG
, LC
, MVT::i64
, Op0
, CallOptions
, DL
).first
;
3134 Result
= DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, Result
);
3135 Results
.push_back(Result
);
3141 static SDValue
performANDCombine(SDNode
*N
, SelectionDAG
&DAG
,
3142 TargetLowering::DAGCombinerInfo
&DCI
,
3143 const LoongArchSubtarget
&Subtarget
) {
3144 if (DCI
.isBeforeLegalizeOps())
3147 SDValue FirstOperand
= N
->getOperand(0);
3148 SDValue SecondOperand
= N
->getOperand(1);
3149 unsigned FirstOperandOpc
= FirstOperand
.getOpcode();
3150 EVT ValTy
= N
->getValueType(0);
3153 unsigned SMIdx
, SMLen
;
3156 MVT GRLenVT
= Subtarget
.getGRLenVT();
3158 // Op's second operand must be a shifted mask.
3159 if (!(CN
= dyn_cast
<ConstantSDNode
>(SecondOperand
)) ||
3160 !isShiftedMask_64(CN
->getZExtValue(), SMIdx
, SMLen
))
3163 if (FirstOperandOpc
== ISD::SRA
|| FirstOperandOpc
== ISD::SRL
) {
3164 // Pattern match BSTRPICK.
3165 // $dst = and ((sra or srl) $src , lsb), (2**len - 1)
3166 // => BSTRPICK $dst, $src, msb, lsb
3167 // where msb = lsb + len - 1
3169 // The second operand of the shift must be an immediate.
3170 if (!(CN
= dyn_cast
<ConstantSDNode
>(FirstOperand
.getOperand(1))))
3173 lsb
= CN
->getZExtValue();
3175 // Return if the shifted mask does not start at bit 0 or the sum of its
3176 // length and lsb exceeds the word's size.
3177 if (SMIdx
!= 0 || lsb
+ SMLen
> ValTy
.getSizeInBits())
3180 NewOperand
= FirstOperand
.getOperand(0);
3182 // Pattern match BSTRPICK.
3183 // $dst = and $src, (2**len- 1) , if len > 12
3184 // => BSTRPICK $dst, $src, msb, lsb
3185 // where lsb = 0 and msb = len - 1
3187 // If the mask is <= 0xfff, andi can be used instead.
3188 if (CN
->getZExtValue() <= 0xfff)
3191 // Return if the MSB exceeds.
3192 if (SMIdx
+ SMLen
> ValTy
.getSizeInBits())
3196 // Omit if the constant has more than 2 uses. This a conservative
3197 // decision. Whether it is a win depends on the HW microarchitecture.
3198 // However it should always be better for 1 and 2 uses.
3199 if (CN
->use_size() > 2)
3201 // Return if the constant can be composed by a single LU12I.W.
3202 if ((CN
->getZExtValue() & 0xfff) == 0)
3204 // Return if the constand can be composed by a single ADDI with
3205 // the zero register.
3206 if (CN
->getSExtValue() >= -2048 && CN
->getSExtValue() < 0)
3211 NewOperand
= FirstOperand
;
3214 msb
= lsb
+ SMLen
- 1;
3215 SDValue NR0
= DAG
.getNode(LoongArchISD::BSTRPICK
, DL
, ValTy
, NewOperand
,
3216 DAG
.getConstant(msb
, DL
, GRLenVT
),
3217 DAG
.getConstant(lsb
, DL
, GRLenVT
));
3218 if (FirstOperandOpc
== ISD::SRA
|| FirstOperandOpc
== ISD::SRL
|| lsb
== 0)
3220 // Try to optimize to
3221 // bstrpick $Rd, $Rs, msb, lsb
3222 // slli $Rd, $Rd, lsb
3223 return DAG
.getNode(ISD::SHL
, DL
, ValTy
, NR0
,
3224 DAG
.getConstant(lsb
, DL
, GRLenVT
));
3227 static SDValue
performSRLCombine(SDNode
*N
, SelectionDAG
&DAG
,
3228 TargetLowering::DAGCombinerInfo
&DCI
,
3229 const LoongArchSubtarget
&Subtarget
) {
3230 if (DCI
.isBeforeLegalizeOps())
3233 // $dst = srl (and $src, Mask), Shamt
3235 // BSTRPICK $dst, $src, MaskIdx+MaskLen-1, Shamt
3236 // when Mask is a shifted mask, and MaskIdx <= Shamt <= MaskIdx+MaskLen-1
3239 SDValue FirstOperand
= N
->getOperand(0);
3241 EVT ValTy
= N
->getValueType(0);
3243 MVT GRLenVT
= Subtarget
.getGRLenVT();
3244 unsigned MaskIdx
, MaskLen
;
3247 // The first operand must be an AND and the second operand of the AND must be
3249 if (FirstOperand
.getOpcode() != ISD::AND
||
3250 !(CN
= dyn_cast
<ConstantSDNode
>(FirstOperand
.getOperand(1))) ||
3251 !isShiftedMask_64(CN
->getZExtValue(), MaskIdx
, MaskLen
))
3254 // The second operand (shift amount) must be an immediate.
3255 if (!(CN
= dyn_cast
<ConstantSDNode
>(N
->getOperand(1))))
3258 Shamt
= CN
->getZExtValue();
3259 if (MaskIdx
<= Shamt
&& Shamt
<= MaskIdx
+ MaskLen
- 1)
3260 return DAG
.getNode(LoongArchISD::BSTRPICK
, DL
, ValTy
,
3261 FirstOperand
->getOperand(0),
3262 DAG
.getConstant(MaskIdx
+ MaskLen
- 1, DL
, GRLenVT
),
3263 DAG
.getConstant(Shamt
, DL
, GRLenVT
));
3268 static SDValue
performORCombine(SDNode
*N
, SelectionDAG
&DAG
,
3269 TargetLowering::DAGCombinerInfo
&DCI
,
3270 const LoongArchSubtarget
&Subtarget
) {
3271 MVT GRLenVT
= Subtarget
.getGRLenVT();
3272 EVT ValTy
= N
->getValueType(0);
3273 SDValue N0
= N
->getOperand(0), N1
= N
->getOperand(1);
3274 ConstantSDNode
*CN0
, *CN1
;
3276 unsigned ValBits
= ValTy
.getSizeInBits();
3277 unsigned MaskIdx0
, MaskLen0
, MaskIdx1
, MaskLen1
;
3279 bool SwapAndRetried
= false;
3281 if (DCI
.isBeforeLegalizeOps())
3284 if (ValBits
!= 32 && ValBits
!= 64)
3288 // 1st pattern to match BSTRINS:
3289 // R = or (and X, mask0), (and (shl Y, lsb), mask1)
3290 // where mask1 = (2**size - 1) << lsb, mask0 = ~mask1
3292 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
3293 if (N0
.getOpcode() == ISD::AND
&&
3294 (CN0
= dyn_cast
<ConstantSDNode
>(N0
.getOperand(1))) &&
3295 isShiftedMask_64(~CN0
->getSExtValue(), MaskIdx0
, MaskLen0
) &&
3296 N1
.getOpcode() == ISD::AND
&& N1
.getOperand(0).getOpcode() == ISD::SHL
&&
3297 (CN1
= dyn_cast
<ConstantSDNode
>(N1
.getOperand(1))) &&
3298 isShiftedMask_64(CN1
->getZExtValue(), MaskIdx1
, MaskLen1
) &&
3299 MaskIdx0
== MaskIdx1
&& MaskLen0
== MaskLen1
&&
3300 (CN1
= dyn_cast
<ConstantSDNode
>(N1
.getOperand(0).getOperand(1))) &&
3301 (Shamt
= CN1
->getZExtValue()) == MaskIdx0
&&
3302 (MaskIdx0
+ MaskLen0
<= ValBits
)) {
3303 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 1\n");
3304 return DAG
.getNode(LoongArchISD::BSTRINS
, DL
, ValTy
, N0
.getOperand(0),
3305 N1
.getOperand(0).getOperand(0),
3306 DAG
.getConstant((MaskIdx0
+ MaskLen0
- 1), DL
, GRLenVT
),
3307 DAG
.getConstant(MaskIdx0
, DL
, GRLenVT
));
3310 // 2nd pattern to match BSTRINS:
3311 // R = or (and X, mask0), (shl (and Y, mask1), lsb)
3312 // where mask1 = (2**size - 1), mask0 = ~(mask1 << lsb)
3314 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
3315 if (N0
.getOpcode() == ISD::AND
&&
3316 (CN0
= dyn_cast
<ConstantSDNode
>(N0
.getOperand(1))) &&
3317 isShiftedMask_64(~CN0
->getSExtValue(), MaskIdx0
, MaskLen0
) &&
3318 N1
.getOpcode() == ISD::SHL
&& N1
.getOperand(0).getOpcode() == ISD::AND
&&
3319 (CN1
= dyn_cast
<ConstantSDNode
>(N1
.getOperand(1))) &&
3320 (Shamt
= CN1
->getZExtValue()) == MaskIdx0
&&
3321 (CN1
= dyn_cast
<ConstantSDNode
>(N1
.getOperand(0).getOperand(1))) &&
3322 isShiftedMask_64(CN1
->getZExtValue(), MaskIdx1
, MaskLen1
) &&
3323 MaskLen0
== MaskLen1
&& MaskIdx1
== 0 &&
3324 (MaskIdx0
+ MaskLen0
<= ValBits
)) {
3325 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 2\n");
3326 return DAG
.getNode(LoongArchISD::BSTRINS
, DL
, ValTy
, N0
.getOperand(0),
3327 N1
.getOperand(0).getOperand(0),
3328 DAG
.getConstant((MaskIdx0
+ MaskLen0
- 1), DL
, GRLenVT
),
3329 DAG
.getConstant(MaskIdx0
, DL
, GRLenVT
));
3332 // 3rd pattern to match BSTRINS:
3333 // R = or (and X, mask0), (and Y, mask1)
3334 // where ~mask0 = (2**size - 1) << lsb, mask0 & mask1 = 0
3336 // R = BSTRINS X, (shr (and Y, mask1), lsb), msb, lsb
3337 // where msb = lsb + size - 1
3338 if (N0
.getOpcode() == ISD::AND
&& N1
.getOpcode() == ISD::AND
&&
3339 (CN0
= dyn_cast
<ConstantSDNode
>(N0
.getOperand(1))) &&
3340 isShiftedMask_64(~CN0
->getSExtValue(), MaskIdx0
, MaskLen0
) &&
3341 (MaskIdx0
+ MaskLen0
<= 64) &&
3342 (CN1
= dyn_cast
<ConstantSDNode
>(N1
->getOperand(1))) &&
3343 (CN1
->getSExtValue() & CN0
->getSExtValue()) == 0) {
3344 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 3\n");
3345 return DAG
.getNode(LoongArchISD::BSTRINS
, DL
, ValTy
, N0
.getOperand(0),
3346 DAG
.getNode(ISD::SRL
, DL
, N1
->getValueType(0), N1
,
3347 DAG
.getConstant(MaskIdx0
, DL
, GRLenVT
)),
3348 DAG
.getConstant(ValBits
== 32
3349 ? (MaskIdx0
+ (MaskLen0
& 31) - 1)
3350 : (MaskIdx0
+ MaskLen0
- 1),
3352 DAG
.getConstant(MaskIdx0
, DL
, GRLenVT
));
3355 // 4th pattern to match BSTRINS:
3356 // R = or (and X, mask), (shl Y, shamt)
3357 // where mask = (2**shamt - 1)
3359 // R = BSTRINS X, Y, ValBits - 1, shamt
3360 // where ValBits = 32 or 64
3361 if (N0
.getOpcode() == ISD::AND
&& N1
.getOpcode() == ISD::SHL
&&
3362 (CN0
= dyn_cast
<ConstantSDNode
>(N0
.getOperand(1))) &&
3363 isShiftedMask_64(CN0
->getZExtValue(), MaskIdx0
, MaskLen0
) &&
3364 MaskIdx0
== 0 && (CN1
= dyn_cast
<ConstantSDNode
>(N1
.getOperand(1))) &&
3365 (Shamt
= CN1
->getZExtValue()) == MaskLen0
&&
3366 (MaskIdx0
+ MaskLen0
<= ValBits
)) {
3367 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 4\n");
3368 return DAG
.getNode(LoongArchISD::BSTRINS
, DL
, ValTy
, N0
.getOperand(0),
3370 DAG
.getConstant((ValBits
- 1), DL
, GRLenVT
),
3371 DAG
.getConstant(Shamt
, DL
, GRLenVT
));
3374 // 5th pattern to match BSTRINS:
3375 // R = or (and X, mask), const
3376 // where ~mask = (2**size - 1) << lsb, mask & const = 0
3378 // R = BSTRINS X, (const >> lsb), msb, lsb
3379 // where msb = lsb + size - 1
3380 if (N0
.getOpcode() == ISD::AND
&&
3381 (CN0
= dyn_cast
<ConstantSDNode
>(N0
.getOperand(1))) &&
3382 isShiftedMask_64(~CN0
->getSExtValue(), MaskIdx0
, MaskLen0
) &&
3383 (CN1
= dyn_cast
<ConstantSDNode
>(N1
)) &&
3384 (CN1
->getSExtValue() & CN0
->getSExtValue()) == 0) {
3385 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 5\n");
3387 LoongArchISD::BSTRINS
, DL
, ValTy
, N0
.getOperand(0),
3388 DAG
.getConstant(CN1
->getSExtValue() >> MaskIdx0
, DL
, ValTy
),
3389 DAG
.getConstant(ValBits
== 32 ? (MaskIdx0
+ (MaskLen0
& 31) - 1)
3390 : (MaskIdx0
+ MaskLen0
- 1),
3392 DAG
.getConstant(MaskIdx0
, DL
, GRLenVT
));
3396 // a = b | ((c & mask) << shamt), where all positions in b to be overwritten
3397 // by the incoming bits are known to be zero.
3399 // a = BSTRINS b, c, shamt + MaskLen - 1, shamt
3401 // Note that the 1st pattern is a special situation of the 6th, i.e. the 6th
3402 // pattern is more common than the 1st. So we put the 1st before the 6th in
3403 // order to match as many nodes as possible.
3404 ConstantSDNode
*CNMask
, *CNShamt
;
3405 unsigned MaskIdx
, MaskLen
;
3406 if (N1
.getOpcode() == ISD::SHL
&& N1
.getOperand(0).getOpcode() == ISD::AND
&&
3407 (CNMask
= dyn_cast
<ConstantSDNode
>(N1
.getOperand(0).getOperand(1))) &&
3408 isShiftedMask_64(CNMask
->getZExtValue(), MaskIdx
, MaskLen
) &&
3409 MaskIdx
== 0 && (CNShamt
= dyn_cast
<ConstantSDNode
>(N1
.getOperand(1))) &&
3410 CNShamt
->getZExtValue() + MaskLen
<= ValBits
) {
3411 Shamt
= CNShamt
->getZExtValue();
3412 APInt
ShMask(ValBits
, CNMask
->getZExtValue() << Shamt
);
3413 if (ShMask
.isSubsetOf(DAG
.computeKnownBits(N0
).Zero
)) {
3414 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 6\n");
3415 return DAG
.getNode(LoongArchISD::BSTRINS
, DL
, ValTy
, N0
,
3416 N1
.getOperand(0).getOperand(0),
3417 DAG
.getConstant(Shamt
+ MaskLen
- 1, DL
, GRLenVT
),
3418 DAG
.getConstant(Shamt
, DL
, GRLenVT
));
3423 // a = b | ((c << shamt) & shifted_mask), where all positions in b to be
3424 // overwritten by the incoming bits are known to be zero.
3426 // a = BSTRINS b, c, MaskIdx + MaskLen - 1, MaskIdx
3428 // Similarly, the 7th pattern is more common than the 2nd. So we put the 2nd
3429 // before the 7th in order to match as many nodes as possible.
3430 if (N1
.getOpcode() == ISD::AND
&&
3431 (CNMask
= dyn_cast
<ConstantSDNode
>(N1
.getOperand(1))) &&
3432 isShiftedMask_64(CNMask
->getZExtValue(), MaskIdx
, MaskLen
) &&
3433 N1
.getOperand(0).getOpcode() == ISD::SHL
&&
3434 (CNShamt
= dyn_cast
<ConstantSDNode
>(N1
.getOperand(0).getOperand(1))) &&
3435 CNShamt
->getZExtValue() == MaskIdx
) {
3436 APInt
ShMask(ValBits
, CNMask
->getZExtValue());
3437 if (ShMask
.isSubsetOf(DAG
.computeKnownBits(N0
).Zero
)) {
3438 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 7\n");
3439 return DAG
.getNode(LoongArchISD::BSTRINS
, DL
, ValTy
, N0
,
3440 N1
.getOperand(0).getOperand(0),
3441 DAG
.getConstant(MaskIdx
+ MaskLen
- 1, DL
, GRLenVT
),
3442 DAG
.getConstant(MaskIdx
, DL
, GRLenVT
));
3446 // (or a, b) and (or b, a) are equivalent, so swap the operands and retry.
3447 if (!SwapAndRetried
) {
3449 SwapAndRetried
= true;
3453 SwapAndRetried
= false;
3456 // a = b | (c & shifted_mask), where all positions in b to be overwritten by
3457 // the incoming bits are known to be zero.
3459 // a = BSTRINS b, c >> MaskIdx, MaskIdx + MaskLen - 1, MaskIdx
3461 // Similarly, the 8th pattern is more common than the 4th and 5th patterns. So
3462 // we put it here in order to match as many nodes as possible or generate less
3464 if (N1
.getOpcode() == ISD::AND
&&
3465 (CNMask
= dyn_cast
<ConstantSDNode
>(N1
.getOperand(1))) &&
3466 isShiftedMask_64(CNMask
->getZExtValue(), MaskIdx
, MaskLen
)) {
3467 APInt
ShMask(ValBits
, CNMask
->getZExtValue());
3468 if (ShMask
.isSubsetOf(DAG
.computeKnownBits(N0
).Zero
)) {
3469 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 8\n");
3470 return DAG
.getNode(LoongArchISD::BSTRINS
, DL
, ValTy
, N0
,
3471 DAG
.getNode(ISD::SRL
, DL
, N1
->getValueType(0),
3473 DAG
.getConstant(MaskIdx
, DL
, GRLenVT
)),
3474 DAG
.getConstant(MaskIdx
+ MaskLen
- 1, DL
, GRLenVT
),
3475 DAG
.getConstant(MaskIdx
, DL
, GRLenVT
));
3478 // Swap N0/N1 and retry.
3479 if (!SwapAndRetried
) {
3481 SwapAndRetried
= true;
3488 static bool checkValueWidth(SDValue V
, ISD::LoadExtType
&ExtType
) {
3489 ExtType
= ISD::NON_EXTLOAD
;
3491 switch (V
.getNode()->getOpcode()) {
3493 LoadSDNode
*LoadNode
= cast
<LoadSDNode
>(V
.getNode());
3494 if ((LoadNode
->getMemoryVT() == MVT::i8
) ||
3495 (LoadNode
->getMemoryVT() == MVT::i16
)) {
3496 ExtType
= LoadNode
->getExtensionType();
3501 case ISD::AssertSext
: {
3502 VTSDNode
*TypeNode
= cast
<VTSDNode
>(V
.getNode()->getOperand(1));
3503 if ((TypeNode
->getVT() == MVT::i8
) || (TypeNode
->getVT() == MVT::i16
)) {
3504 ExtType
= ISD::SEXTLOAD
;
3509 case ISD::AssertZext
: {
3510 VTSDNode
*TypeNode
= cast
<VTSDNode
>(V
.getNode()->getOperand(1));
3511 if ((TypeNode
->getVT() == MVT::i8
) || (TypeNode
->getVT() == MVT::i16
)) {
3512 ExtType
= ISD::ZEXTLOAD
;
3524 // Eliminate redundant truncation and zero-extension nodes.
3526 // +------------+ +------------+ +------------+
3527 // | Input1 | | Input2 | | CC |
3528 // +------------+ +------------+ +------------+
3531 // +------------+ +------------+ |
3532 // | TRUNCATE | | TRUNCATE | |
3533 // +------------+ +------------+ |
3536 // +------------+ +------------+ |
3537 // | ZERO_EXT | | ZERO_EXT | |
3538 // +------------+ +------------+ |
3540 // | +-------------+ |
3542 // +----------------+ | |
3544 // +----------------+ | |
3546 // +---------------+ | |
3553 // +------------+ +------------+ +-------------+ +------------+ +------------+
3554 // | Input1 | | Input2 | | Constant -1 | | Constant 0 | | CC |
3555 // +------------+ +------------+ +-------------+ +------------+ +------------+
3558 // +------------+ | | | |
3559 // | XOR |<---------------------+ | |
3560 // +------------+ | | |
3562 // V V +---------------+ |
3563 // +------------+ +------------+ | |
3564 // | TRUNCATE | | TRUNCATE | | +-------------------------+
3565 // +------------+ +------------+ | |
3568 // +------------+ +------------+ | |
3569 // | ZERO_EXT | | ZERO_EXT | | |
3570 // +------------+ +------------+ | |
3573 // +----------------+ | |
3575 // +----------------+ | |
3577 // +---------------+ | |
3583 static SDValue
performSETCCCombine(SDNode
*N
, SelectionDAG
&DAG
,
3584 TargetLowering::DAGCombinerInfo
&DCI
,
3585 const LoongArchSubtarget
&Subtarget
) {
3586 ISD::CondCode CC
= cast
<CondCodeSDNode
>(N
->getOperand(2))->get();
3588 SDNode
*AndNode
= N
->getOperand(0).getNode();
3589 if (AndNode
->getOpcode() != ISD::AND
)
3592 SDValue AndInputValue2
= AndNode
->getOperand(1);
3593 if (AndInputValue2
.getOpcode() != ISD::ZERO_EXTEND
)
3596 SDValue CmpInputValue
= N
->getOperand(1);
3597 SDValue AndInputValue1
= AndNode
->getOperand(0);
3598 if (AndInputValue1
.getOpcode() == ISD::XOR
) {
3599 if (CC
!= ISD::SETEQ
&& CC
!= ISD::SETNE
)
3601 ConstantSDNode
*CN
= dyn_cast
<ConstantSDNode
>(AndInputValue1
.getOperand(1));
3602 if (!CN
|| CN
->getSExtValue() != -1)
3604 CN
= dyn_cast
<ConstantSDNode
>(CmpInputValue
);
3605 if (!CN
|| CN
->getSExtValue() != 0)
3607 AndInputValue1
= AndInputValue1
.getOperand(0);
3608 if (AndInputValue1
.getOpcode() != ISD::ZERO_EXTEND
)
3610 } else if (AndInputValue1
.getOpcode() == ISD::ZERO_EXTEND
) {
3611 if (AndInputValue2
!= CmpInputValue
)
3617 SDValue TruncValue1
= AndInputValue1
.getNode()->getOperand(0);
3618 if (TruncValue1
.getOpcode() != ISD::TRUNCATE
)
3621 SDValue TruncValue2
= AndInputValue2
.getNode()->getOperand(0);
3622 if (TruncValue2
.getOpcode() != ISD::TRUNCATE
)
3625 SDValue TruncInputValue1
= TruncValue1
.getNode()->getOperand(0);
3626 SDValue TruncInputValue2
= TruncValue2
.getNode()->getOperand(0);
3627 ISD::LoadExtType ExtType1
;
3628 ISD::LoadExtType ExtType2
;
3630 if (!checkValueWidth(TruncInputValue1
, ExtType1
) ||
3631 !checkValueWidth(TruncInputValue2
, ExtType2
))
3634 if (TruncInputValue1
->getValueType(0) != TruncInputValue2
->getValueType(0) ||
3635 AndNode
->getValueType(0) != TruncInputValue1
->getValueType(0))
3638 if ((ExtType2
!= ISD::ZEXTLOAD
) &&
3639 ((ExtType2
!= ISD::SEXTLOAD
) && (ExtType1
!= ISD::SEXTLOAD
)))
3642 // These truncation and zero-extension nodes are not necessary, remove them.
3643 SDValue NewAnd
= DAG
.getNode(ISD::AND
, SDLoc(N
), AndNode
->getValueType(0),
3644 TruncInputValue1
, TruncInputValue2
);
3646 DAG
.getSetCC(SDLoc(N
), N
->getValueType(0), NewAnd
, TruncInputValue2
, CC
);
3647 DAG
.ReplaceAllUsesWith(N
, NewSetCC
.getNode());
3648 return SDValue(N
, 0);
3651 // Combine (loongarch_bitrev_w (loongarch_revb_2w X)) to loongarch_bitrev_4b.
3652 static SDValue
performBITREV_WCombine(SDNode
*N
, SelectionDAG
&DAG
,
3653 TargetLowering::DAGCombinerInfo
&DCI
,
3654 const LoongArchSubtarget
&Subtarget
) {
3655 if (DCI
.isBeforeLegalizeOps())
3658 SDValue Src
= N
->getOperand(0);
3659 if (Src
.getOpcode() != LoongArchISD::REVB_2W
)
3662 return DAG
.getNode(LoongArchISD::BITREV_4B
, SDLoc(N
), N
->getValueType(0),
3666 template <unsigned N
>
3667 static SDValue
legalizeIntrinsicImmArg(SDNode
*Node
, unsigned ImmOp
,
3669 const LoongArchSubtarget
&Subtarget
,
3670 bool IsSigned
= false) {
3672 auto *CImm
= cast
<ConstantSDNode
>(Node
->getOperand(ImmOp
));
3673 // Check the ImmArg.
3674 if ((IsSigned
&& !isInt
<N
>(CImm
->getSExtValue())) ||
3675 (!IsSigned
&& !isUInt
<N
>(CImm
->getZExtValue()))) {
3676 DAG
.getContext()->emitError(Node
->getOperationName(0) +
3677 ": argument out of range.");
3678 return DAG
.getNode(ISD::UNDEF
, DL
, Subtarget
.getGRLenVT());
3680 return DAG
.getConstant(CImm
->getZExtValue(), DL
, Subtarget
.getGRLenVT());
3683 template <unsigned N
>
3684 static SDValue
lowerVectorSplatImm(SDNode
*Node
, unsigned ImmOp
,
3685 SelectionDAG
&DAG
, bool IsSigned
= false) {
3687 EVT ResTy
= Node
->getValueType(0);
3688 auto *CImm
= cast
<ConstantSDNode
>(Node
->getOperand(ImmOp
));
3690 // Check the ImmArg.
3691 if ((IsSigned
&& !isInt
<N
>(CImm
->getSExtValue())) ||
3692 (!IsSigned
&& !isUInt
<N
>(CImm
->getZExtValue()))) {
3693 DAG
.getContext()->emitError(Node
->getOperationName(0) +
3694 ": argument out of range.");
3695 return DAG
.getNode(ISD::UNDEF
, DL
, ResTy
);
3697 return DAG
.getConstant(
3698 APInt(ResTy
.getScalarType().getSizeInBits(),
3699 IsSigned
? CImm
->getSExtValue() : CImm
->getZExtValue(), IsSigned
),
3703 static SDValue
truncateVecElts(SDNode
*Node
, SelectionDAG
&DAG
) {
3705 EVT ResTy
= Node
->getValueType(0);
3706 SDValue Vec
= Node
->getOperand(2);
3707 SDValue Mask
= DAG
.getConstant(Vec
.getScalarValueSizeInBits() - 1, DL
, ResTy
);
3708 return DAG
.getNode(ISD::AND
, DL
, ResTy
, Vec
, Mask
);
3711 static SDValue
lowerVectorBitClear(SDNode
*Node
, SelectionDAG
&DAG
) {
3713 EVT ResTy
= Node
->getValueType(0);
3714 SDValue One
= DAG
.getConstant(1, DL
, ResTy
);
3716 DAG
.getNode(ISD::SHL
, DL
, ResTy
, One
, truncateVecElts(Node
, DAG
));
3718 return DAG
.getNode(ISD::AND
, DL
, ResTy
, Node
->getOperand(1),
3719 DAG
.getNOT(DL
, Bit
, ResTy
));
3722 template <unsigned N
>
3723 static SDValue
lowerVectorBitClearImm(SDNode
*Node
, SelectionDAG
&DAG
) {
3725 EVT ResTy
= Node
->getValueType(0);
3726 auto *CImm
= cast
<ConstantSDNode
>(Node
->getOperand(2));
3727 // Check the unsigned ImmArg.
3728 if (!isUInt
<N
>(CImm
->getZExtValue())) {
3729 DAG
.getContext()->emitError(Node
->getOperationName(0) +
3730 ": argument out of range.");
3731 return DAG
.getNode(ISD::UNDEF
, DL
, ResTy
);
3734 APInt BitImm
= APInt(ResTy
.getScalarSizeInBits(), 1) << CImm
->getAPIntValue();
3735 SDValue Mask
= DAG
.getConstant(~BitImm
, DL
, ResTy
);
3737 return DAG
.getNode(ISD::AND
, DL
, ResTy
, Node
->getOperand(1), Mask
);
3740 template <unsigned N
>
3741 static SDValue
lowerVectorBitSetImm(SDNode
*Node
, SelectionDAG
&DAG
) {
3743 EVT ResTy
= Node
->getValueType(0);
3744 auto *CImm
= cast
<ConstantSDNode
>(Node
->getOperand(2));
3745 // Check the unsigned ImmArg.
3746 if (!isUInt
<N
>(CImm
->getZExtValue())) {
3747 DAG
.getContext()->emitError(Node
->getOperationName(0) +
3748 ": argument out of range.");
3749 return DAG
.getNode(ISD::UNDEF
, DL
, ResTy
);
3752 APInt Imm
= APInt(ResTy
.getScalarSizeInBits(), 1) << CImm
->getAPIntValue();
3753 SDValue BitImm
= DAG
.getConstant(Imm
, DL
, ResTy
);
3754 return DAG
.getNode(ISD::OR
, DL
, ResTy
, Node
->getOperand(1), BitImm
);
3757 template <unsigned N
>
3758 static SDValue
lowerVectorBitRevImm(SDNode
*Node
, SelectionDAG
&DAG
) {
3760 EVT ResTy
= Node
->getValueType(0);
3761 auto *CImm
= cast
<ConstantSDNode
>(Node
->getOperand(2));
3762 // Check the unsigned ImmArg.
3763 if (!isUInt
<N
>(CImm
->getZExtValue())) {
3764 DAG
.getContext()->emitError(Node
->getOperationName(0) +
3765 ": argument out of range.");
3766 return DAG
.getNode(ISD::UNDEF
, DL
, ResTy
);
3769 APInt Imm
= APInt(ResTy
.getScalarSizeInBits(), 1) << CImm
->getAPIntValue();
3770 SDValue BitImm
= DAG
.getConstant(Imm
, DL
, ResTy
);
3771 return DAG
.getNode(ISD::XOR
, DL
, ResTy
, Node
->getOperand(1), BitImm
);
3775 performINTRINSIC_WO_CHAINCombine(SDNode
*N
, SelectionDAG
&DAG
,
3776 TargetLowering::DAGCombinerInfo
&DCI
,
3777 const LoongArchSubtarget
&Subtarget
) {
3779 switch (N
->getConstantOperandVal(0)) {
3782 case Intrinsic::loongarch_lsx_vadd_b
:
3783 case Intrinsic::loongarch_lsx_vadd_h
:
3784 case Intrinsic::loongarch_lsx_vadd_w
:
3785 case Intrinsic::loongarch_lsx_vadd_d
:
3786 case Intrinsic::loongarch_lasx_xvadd_b
:
3787 case Intrinsic::loongarch_lasx_xvadd_h
:
3788 case Intrinsic::loongarch_lasx_xvadd_w
:
3789 case Intrinsic::loongarch_lasx_xvadd_d
:
3790 return DAG
.getNode(ISD::ADD
, DL
, N
->getValueType(0), N
->getOperand(1),
3792 case Intrinsic::loongarch_lsx_vaddi_bu
:
3793 case Intrinsic::loongarch_lsx_vaddi_hu
:
3794 case Intrinsic::loongarch_lsx_vaddi_wu
:
3795 case Intrinsic::loongarch_lsx_vaddi_du
:
3796 case Intrinsic::loongarch_lasx_xvaddi_bu
:
3797 case Intrinsic::loongarch_lasx_xvaddi_hu
:
3798 case Intrinsic::loongarch_lasx_xvaddi_wu
:
3799 case Intrinsic::loongarch_lasx_xvaddi_du
:
3800 return DAG
.getNode(ISD::ADD
, DL
, N
->getValueType(0), N
->getOperand(1),
3801 lowerVectorSplatImm
<5>(N
, 2, DAG
));
3802 case Intrinsic::loongarch_lsx_vsub_b
:
3803 case Intrinsic::loongarch_lsx_vsub_h
:
3804 case Intrinsic::loongarch_lsx_vsub_w
:
3805 case Intrinsic::loongarch_lsx_vsub_d
:
3806 case Intrinsic::loongarch_lasx_xvsub_b
:
3807 case Intrinsic::loongarch_lasx_xvsub_h
:
3808 case Intrinsic::loongarch_lasx_xvsub_w
:
3809 case Intrinsic::loongarch_lasx_xvsub_d
:
3810 return DAG
.getNode(ISD::SUB
, DL
, N
->getValueType(0), N
->getOperand(1),
3812 case Intrinsic::loongarch_lsx_vsubi_bu
:
3813 case Intrinsic::loongarch_lsx_vsubi_hu
:
3814 case Intrinsic::loongarch_lsx_vsubi_wu
:
3815 case Intrinsic::loongarch_lsx_vsubi_du
:
3816 case Intrinsic::loongarch_lasx_xvsubi_bu
:
3817 case Intrinsic::loongarch_lasx_xvsubi_hu
:
3818 case Intrinsic::loongarch_lasx_xvsubi_wu
:
3819 case Intrinsic::loongarch_lasx_xvsubi_du
:
3820 return DAG
.getNode(ISD::SUB
, DL
, N
->getValueType(0), N
->getOperand(1),
3821 lowerVectorSplatImm
<5>(N
, 2, DAG
));
3822 case Intrinsic::loongarch_lsx_vneg_b
:
3823 case Intrinsic::loongarch_lsx_vneg_h
:
3824 case Intrinsic::loongarch_lsx_vneg_w
:
3825 case Intrinsic::loongarch_lsx_vneg_d
:
3826 case Intrinsic::loongarch_lasx_xvneg_b
:
3827 case Intrinsic::loongarch_lasx_xvneg_h
:
3828 case Intrinsic::loongarch_lasx_xvneg_w
:
3829 case Intrinsic::loongarch_lasx_xvneg_d
:
3831 ISD::SUB
, DL
, N
->getValueType(0),
3833 APInt(N
->getValueType(0).getScalarType().getSizeInBits(), 0,
3835 SDLoc(N
), N
->getValueType(0)),
3837 case Intrinsic::loongarch_lsx_vmax_b
:
3838 case Intrinsic::loongarch_lsx_vmax_h
:
3839 case Intrinsic::loongarch_lsx_vmax_w
:
3840 case Intrinsic::loongarch_lsx_vmax_d
:
3841 case Intrinsic::loongarch_lasx_xvmax_b
:
3842 case Intrinsic::loongarch_lasx_xvmax_h
:
3843 case Intrinsic::loongarch_lasx_xvmax_w
:
3844 case Intrinsic::loongarch_lasx_xvmax_d
:
3845 return DAG
.getNode(ISD::SMAX
, DL
, N
->getValueType(0), N
->getOperand(1),
3847 case Intrinsic::loongarch_lsx_vmax_bu
:
3848 case Intrinsic::loongarch_lsx_vmax_hu
:
3849 case Intrinsic::loongarch_lsx_vmax_wu
:
3850 case Intrinsic::loongarch_lsx_vmax_du
:
3851 case Intrinsic::loongarch_lasx_xvmax_bu
:
3852 case Intrinsic::loongarch_lasx_xvmax_hu
:
3853 case Intrinsic::loongarch_lasx_xvmax_wu
:
3854 case Intrinsic::loongarch_lasx_xvmax_du
:
3855 return DAG
.getNode(ISD::UMAX
, DL
, N
->getValueType(0), N
->getOperand(1),
3857 case Intrinsic::loongarch_lsx_vmaxi_b
:
3858 case Intrinsic::loongarch_lsx_vmaxi_h
:
3859 case Intrinsic::loongarch_lsx_vmaxi_w
:
3860 case Intrinsic::loongarch_lsx_vmaxi_d
:
3861 case Intrinsic::loongarch_lasx_xvmaxi_b
:
3862 case Intrinsic::loongarch_lasx_xvmaxi_h
:
3863 case Intrinsic::loongarch_lasx_xvmaxi_w
:
3864 case Intrinsic::loongarch_lasx_xvmaxi_d
:
3865 return DAG
.getNode(ISD::SMAX
, DL
, N
->getValueType(0), N
->getOperand(1),
3866 lowerVectorSplatImm
<5>(N
, 2, DAG
, /*IsSigned=*/true));
3867 case Intrinsic::loongarch_lsx_vmaxi_bu
:
3868 case Intrinsic::loongarch_lsx_vmaxi_hu
:
3869 case Intrinsic::loongarch_lsx_vmaxi_wu
:
3870 case Intrinsic::loongarch_lsx_vmaxi_du
:
3871 case Intrinsic::loongarch_lasx_xvmaxi_bu
:
3872 case Intrinsic::loongarch_lasx_xvmaxi_hu
:
3873 case Intrinsic::loongarch_lasx_xvmaxi_wu
:
3874 case Intrinsic::loongarch_lasx_xvmaxi_du
:
3875 return DAG
.getNode(ISD::UMAX
, DL
, N
->getValueType(0), N
->getOperand(1),
3876 lowerVectorSplatImm
<5>(N
, 2, DAG
));
3877 case Intrinsic::loongarch_lsx_vmin_b
:
3878 case Intrinsic::loongarch_lsx_vmin_h
:
3879 case Intrinsic::loongarch_lsx_vmin_w
:
3880 case Intrinsic::loongarch_lsx_vmin_d
:
3881 case Intrinsic::loongarch_lasx_xvmin_b
:
3882 case Intrinsic::loongarch_lasx_xvmin_h
:
3883 case Intrinsic::loongarch_lasx_xvmin_w
:
3884 case Intrinsic::loongarch_lasx_xvmin_d
:
3885 return DAG
.getNode(ISD::SMIN
, DL
, N
->getValueType(0), N
->getOperand(1),
3887 case Intrinsic::loongarch_lsx_vmin_bu
:
3888 case Intrinsic::loongarch_lsx_vmin_hu
:
3889 case Intrinsic::loongarch_lsx_vmin_wu
:
3890 case Intrinsic::loongarch_lsx_vmin_du
:
3891 case Intrinsic::loongarch_lasx_xvmin_bu
:
3892 case Intrinsic::loongarch_lasx_xvmin_hu
:
3893 case Intrinsic::loongarch_lasx_xvmin_wu
:
3894 case Intrinsic::loongarch_lasx_xvmin_du
:
3895 return DAG
.getNode(ISD::UMIN
, DL
, N
->getValueType(0), N
->getOperand(1),
3897 case Intrinsic::loongarch_lsx_vmini_b
:
3898 case Intrinsic::loongarch_lsx_vmini_h
:
3899 case Intrinsic::loongarch_lsx_vmini_w
:
3900 case Intrinsic::loongarch_lsx_vmini_d
:
3901 case Intrinsic::loongarch_lasx_xvmini_b
:
3902 case Intrinsic::loongarch_lasx_xvmini_h
:
3903 case Intrinsic::loongarch_lasx_xvmini_w
:
3904 case Intrinsic::loongarch_lasx_xvmini_d
:
3905 return DAG
.getNode(ISD::SMIN
, DL
, N
->getValueType(0), N
->getOperand(1),
3906 lowerVectorSplatImm
<5>(N
, 2, DAG
, /*IsSigned=*/true));
3907 case Intrinsic::loongarch_lsx_vmini_bu
:
3908 case Intrinsic::loongarch_lsx_vmini_hu
:
3909 case Intrinsic::loongarch_lsx_vmini_wu
:
3910 case Intrinsic::loongarch_lsx_vmini_du
:
3911 case Intrinsic::loongarch_lasx_xvmini_bu
:
3912 case Intrinsic::loongarch_lasx_xvmini_hu
:
3913 case Intrinsic::loongarch_lasx_xvmini_wu
:
3914 case Intrinsic::loongarch_lasx_xvmini_du
:
3915 return DAG
.getNode(ISD::UMIN
, DL
, N
->getValueType(0), N
->getOperand(1),
3916 lowerVectorSplatImm
<5>(N
, 2, DAG
));
3917 case Intrinsic::loongarch_lsx_vmul_b
:
3918 case Intrinsic::loongarch_lsx_vmul_h
:
3919 case Intrinsic::loongarch_lsx_vmul_w
:
3920 case Intrinsic::loongarch_lsx_vmul_d
:
3921 case Intrinsic::loongarch_lasx_xvmul_b
:
3922 case Intrinsic::loongarch_lasx_xvmul_h
:
3923 case Intrinsic::loongarch_lasx_xvmul_w
:
3924 case Intrinsic::loongarch_lasx_xvmul_d
:
3925 return DAG
.getNode(ISD::MUL
, DL
, N
->getValueType(0), N
->getOperand(1),
3927 case Intrinsic::loongarch_lsx_vmadd_b
:
3928 case Intrinsic::loongarch_lsx_vmadd_h
:
3929 case Intrinsic::loongarch_lsx_vmadd_w
:
3930 case Intrinsic::loongarch_lsx_vmadd_d
:
3931 case Intrinsic::loongarch_lasx_xvmadd_b
:
3932 case Intrinsic::loongarch_lasx_xvmadd_h
:
3933 case Intrinsic::loongarch_lasx_xvmadd_w
:
3934 case Intrinsic::loongarch_lasx_xvmadd_d
: {
3935 EVT ResTy
= N
->getValueType(0);
3936 return DAG
.getNode(ISD::ADD
, SDLoc(N
), ResTy
, N
->getOperand(1),
3937 DAG
.getNode(ISD::MUL
, SDLoc(N
), ResTy
, N
->getOperand(2),
3940 case Intrinsic::loongarch_lsx_vmsub_b
:
3941 case Intrinsic::loongarch_lsx_vmsub_h
:
3942 case Intrinsic::loongarch_lsx_vmsub_w
:
3943 case Intrinsic::loongarch_lsx_vmsub_d
:
3944 case Intrinsic::loongarch_lasx_xvmsub_b
:
3945 case Intrinsic::loongarch_lasx_xvmsub_h
:
3946 case Intrinsic::loongarch_lasx_xvmsub_w
:
3947 case Intrinsic::loongarch_lasx_xvmsub_d
: {
3948 EVT ResTy
= N
->getValueType(0);
3949 return DAG
.getNode(ISD::SUB
, SDLoc(N
), ResTy
, N
->getOperand(1),
3950 DAG
.getNode(ISD::MUL
, SDLoc(N
), ResTy
, N
->getOperand(2),
3953 case Intrinsic::loongarch_lsx_vdiv_b
:
3954 case Intrinsic::loongarch_lsx_vdiv_h
:
3955 case Intrinsic::loongarch_lsx_vdiv_w
:
3956 case Intrinsic::loongarch_lsx_vdiv_d
:
3957 case Intrinsic::loongarch_lasx_xvdiv_b
:
3958 case Intrinsic::loongarch_lasx_xvdiv_h
:
3959 case Intrinsic::loongarch_lasx_xvdiv_w
:
3960 case Intrinsic::loongarch_lasx_xvdiv_d
:
3961 return DAG
.getNode(ISD::SDIV
, DL
, N
->getValueType(0), N
->getOperand(1),
3963 case Intrinsic::loongarch_lsx_vdiv_bu
:
3964 case Intrinsic::loongarch_lsx_vdiv_hu
:
3965 case Intrinsic::loongarch_lsx_vdiv_wu
:
3966 case Intrinsic::loongarch_lsx_vdiv_du
:
3967 case Intrinsic::loongarch_lasx_xvdiv_bu
:
3968 case Intrinsic::loongarch_lasx_xvdiv_hu
:
3969 case Intrinsic::loongarch_lasx_xvdiv_wu
:
3970 case Intrinsic::loongarch_lasx_xvdiv_du
:
3971 return DAG
.getNode(ISD::UDIV
, DL
, N
->getValueType(0), N
->getOperand(1),
3973 case Intrinsic::loongarch_lsx_vmod_b
:
3974 case Intrinsic::loongarch_lsx_vmod_h
:
3975 case Intrinsic::loongarch_lsx_vmod_w
:
3976 case Intrinsic::loongarch_lsx_vmod_d
:
3977 case Intrinsic::loongarch_lasx_xvmod_b
:
3978 case Intrinsic::loongarch_lasx_xvmod_h
:
3979 case Intrinsic::loongarch_lasx_xvmod_w
:
3980 case Intrinsic::loongarch_lasx_xvmod_d
:
3981 return DAG
.getNode(ISD::SREM
, DL
, N
->getValueType(0), N
->getOperand(1),
3983 case Intrinsic::loongarch_lsx_vmod_bu
:
3984 case Intrinsic::loongarch_lsx_vmod_hu
:
3985 case Intrinsic::loongarch_lsx_vmod_wu
:
3986 case Intrinsic::loongarch_lsx_vmod_du
:
3987 case Intrinsic::loongarch_lasx_xvmod_bu
:
3988 case Intrinsic::loongarch_lasx_xvmod_hu
:
3989 case Intrinsic::loongarch_lasx_xvmod_wu
:
3990 case Intrinsic::loongarch_lasx_xvmod_du
:
3991 return DAG
.getNode(ISD::UREM
, DL
, N
->getValueType(0), N
->getOperand(1),
3993 case Intrinsic::loongarch_lsx_vand_v
:
3994 case Intrinsic::loongarch_lasx_xvand_v
:
3995 return DAG
.getNode(ISD::AND
, DL
, N
->getValueType(0), N
->getOperand(1),
3997 case Intrinsic::loongarch_lsx_vor_v
:
3998 case Intrinsic::loongarch_lasx_xvor_v
:
3999 return DAG
.getNode(ISD::OR
, DL
, N
->getValueType(0), N
->getOperand(1),
4001 case Intrinsic::loongarch_lsx_vxor_v
:
4002 case Intrinsic::loongarch_lasx_xvxor_v
:
4003 return DAG
.getNode(ISD::XOR
, DL
, N
->getValueType(0), N
->getOperand(1),
4005 case Intrinsic::loongarch_lsx_vnor_v
:
4006 case Intrinsic::loongarch_lasx_xvnor_v
: {
4007 SDValue Res
= DAG
.getNode(ISD::OR
, DL
, N
->getValueType(0), N
->getOperand(1),
4009 return DAG
.getNOT(DL
, Res
, Res
->getValueType(0));
4011 case Intrinsic::loongarch_lsx_vandi_b
:
4012 case Intrinsic::loongarch_lasx_xvandi_b
:
4013 return DAG
.getNode(ISD::AND
, DL
, N
->getValueType(0), N
->getOperand(1),
4014 lowerVectorSplatImm
<8>(N
, 2, DAG
));
4015 case Intrinsic::loongarch_lsx_vori_b
:
4016 case Intrinsic::loongarch_lasx_xvori_b
:
4017 return DAG
.getNode(ISD::OR
, DL
, N
->getValueType(0), N
->getOperand(1),
4018 lowerVectorSplatImm
<8>(N
, 2, DAG
));
4019 case Intrinsic::loongarch_lsx_vxori_b
:
4020 case Intrinsic::loongarch_lasx_xvxori_b
:
4021 return DAG
.getNode(ISD::XOR
, DL
, N
->getValueType(0), N
->getOperand(1),
4022 lowerVectorSplatImm
<8>(N
, 2, DAG
));
4023 case Intrinsic::loongarch_lsx_vsll_b
:
4024 case Intrinsic::loongarch_lsx_vsll_h
:
4025 case Intrinsic::loongarch_lsx_vsll_w
:
4026 case Intrinsic::loongarch_lsx_vsll_d
:
4027 case Intrinsic::loongarch_lasx_xvsll_b
:
4028 case Intrinsic::loongarch_lasx_xvsll_h
:
4029 case Intrinsic::loongarch_lasx_xvsll_w
:
4030 case Intrinsic::loongarch_lasx_xvsll_d
:
4031 return DAG
.getNode(ISD::SHL
, DL
, N
->getValueType(0), N
->getOperand(1),
4032 truncateVecElts(N
, DAG
));
4033 case Intrinsic::loongarch_lsx_vslli_b
:
4034 case Intrinsic::loongarch_lasx_xvslli_b
:
4035 return DAG
.getNode(ISD::SHL
, DL
, N
->getValueType(0), N
->getOperand(1),
4036 lowerVectorSplatImm
<3>(N
, 2, DAG
));
4037 case Intrinsic::loongarch_lsx_vslli_h
:
4038 case Intrinsic::loongarch_lasx_xvslli_h
:
4039 return DAG
.getNode(ISD::SHL
, DL
, N
->getValueType(0), N
->getOperand(1),
4040 lowerVectorSplatImm
<4>(N
, 2, DAG
));
4041 case Intrinsic::loongarch_lsx_vslli_w
:
4042 case Intrinsic::loongarch_lasx_xvslli_w
:
4043 return DAG
.getNode(ISD::SHL
, DL
, N
->getValueType(0), N
->getOperand(1),
4044 lowerVectorSplatImm
<5>(N
, 2, DAG
));
4045 case Intrinsic::loongarch_lsx_vslli_d
:
4046 case Intrinsic::loongarch_lasx_xvslli_d
:
4047 return DAG
.getNode(ISD::SHL
, DL
, N
->getValueType(0), N
->getOperand(1),
4048 lowerVectorSplatImm
<6>(N
, 2, DAG
));
4049 case Intrinsic::loongarch_lsx_vsrl_b
:
4050 case Intrinsic::loongarch_lsx_vsrl_h
:
4051 case Intrinsic::loongarch_lsx_vsrl_w
:
4052 case Intrinsic::loongarch_lsx_vsrl_d
:
4053 case Intrinsic::loongarch_lasx_xvsrl_b
:
4054 case Intrinsic::loongarch_lasx_xvsrl_h
:
4055 case Intrinsic::loongarch_lasx_xvsrl_w
:
4056 case Intrinsic::loongarch_lasx_xvsrl_d
:
4057 return DAG
.getNode(ISD::SRL
, DL
, N
->getValueType(0), N
->getOperand(1),
4058 truncateVecElts(N
, DAG
));
4059 case Intrinsic::loongarch_lsx_vsrli_b
:
4060 case Intrinsic::loongarch_lasx_xvsrli_b
:
4061 return DAG
.getNode(ISD::SRL
, DL
, N
->getValueType(0), N
->getOperand(1),
4062 lowerVectorSplatImm
<3>(N
, 2, DAG
));
4063 case Intrinsic::loongarch_lsx_vsrli_h
:
4064 case Intrinsic::loongarch_lasx_xvsrli_h
:
4065 return DAG
.getNode(ISD::SRL
, DL
, N
->getValueType(0), N
->getOperand(1),
4066 lowerVectorSplatImm
<4>(N
, 2, DAG
));
4067 case Intrinsic::loongarch_lsx_vsrli_w
:
4068 case Intrinsic::loongarch_lasx_xvsrli_w
:
4069 return DAG
.getNode(ISD::SRL
, DL
, N
->getValueType(0), N
->getOperand(1),
4070 lowerVectorSplatImm
<5>(N
, 2, DAG
));
4071 case Intrinsic::loongarch_lsx_vsrli_d
:
4072 case Intrinsic::loongarch_lasx_xvsrli_d
:
4073 return DAG
.getNode(ISD::SRL
, DL
, N
->getValueType(0), N
->getOperand(1),
4074 lowerVectorSplatImm
<6>(N
, 2, DAG
));
4075 case Intrinsic::loongarch_lsx_vsra_b
:
4076 case Intrinsic::loongarch_lsx_vsra_h
:
4077 case Intrinsic::loongarch_lsx_vsra_w
:
4078 case Intrinsic::loongarch_lsx_vsra_d
:
4079 case Intrinsic::loongarch_lasx_xvsra_b
:
4080 case Intrinsic::loongarch_lasx_xvsra_h
:
4081 case Intrinsic::loongarch_lasx_xvsra_w
:
4082 case Intrinsic::loongarch_lasx_xvsra_d
:
4083 return DAG
.getNode(ISD::SRA
, DL
, N
->getValueType(0), N
->getOperand(1),
4084 truncateVecElts(N
, DAG
));
4085 case Intrinsic::loongarch_lsx_vsrai_b
:
4086 case Intrinsic::loongarch_lasx_xvsrai_b
:
4087 return DAG
.getNode(ISD::SRA
, DL
, N
->getValueType(0), N
->getOperand(1),
4088 lowerVectorSplatImm
<3>(N
, 2, DAG
));
4089 case Intrinsic::loongarch_lsx_vsrai_h
:
4090 case Intrinsic::loongarch_lasx_xvsrai_h
:
4091 return DAG
.getNode(ISD::SRA
, DL
, N
->getValueType(0), N
->getOperand(1),
4092 lowerVectorSplatImm
<4>(N
, 2, DAG
));
4093 case Intrinsic::loongarch_lsx_vsrai_w
:
4094 case Intrinsic::loongarch_lasx_xvsrai_w
:
4095 return DAG
.getNode(ISD::SRA
, DL
, N
->getValueType(0), N
->getOperand(1),
4096 lowerVectorSplatImm
<5>(N
, 2, DAG
));
4097 case Intrinsic::loongarch_lsx_vsrai_d
:
4098 case Intrinsic::loongarch_lasx_xvsrai_d
:
4099 return DAG
.getNode(ISD::SRA
, DL
, N
->getValueType(0), N
->getOperand(1),
4100 lowerVectorSplatImm
<6>(N
, 2, DAG
));
4101 case Intrinsic::loongarch_lsx_vclz_b
:
4102 case Intrinsic::loongarch_lsx_vclz_h
:
4103 case Intrinsic::loongarch_lsx_vclz_w
:
4104 case Intrinsic::loongarch_lsx_vclz_d
:
4105 case Intrinsic::loongarch_lasx_xvclz_b
:
4106 case Intrinsic::loongarch_lasx_xvclz_h
:
4107 case Intrinsic::loongarch_lasx_xvclz_w
:
4108 case Intrinsic::loongarch_lasx_xvclz_d
:
4109 return DAG
.getNode(ISD::CTLZ
, DL
, N
->getValueType(0), N
->getOperand(1));
4110 case Intrinsic::loongarch_lsx_vpcnt_b
:
4111 case Intrinsic::loongarch_lsx_vpcnt_h
:
4112 case Intrinsic::loongarch_lsx_vpcnt_w
:
4113 case Intrinsic::loongarch_lsx_vpcnt_d
:
4114 case Intrinsic::loongarch_lasx_xvpcnt_b
:
4115 case Intrinsic::loongarch_lasx_xvpcnt_h
:
4116 case Intrinsic::loongarch_lasx_xvpcnt_w
:
4117 case Intrinsic::loongarch_lasx_xvpcnt_d
:
4118 return DAG
.getNode(ISD::CTPOP
, DL
, N
->getValueType(0), N
->getOperand(1));
4119 case Intrinsic::loongarch_lsx_vbitclr_b
:
4120 case Intrinsic::loongarch_lsx_vbitclr_h
:
4121 case Intrinsic::loongarch_lsx_vbitclr_w
:
4122 case Intrinsic::loongarch_lsx_vbitclr_d
:
4123 case Intrinsic::loongarch_lasx_xvbitclr_b
:
4124 case Intrinsic::loongarch_lasx_xvbitclr_h
:
4125 case Intrinsic::loongarch_lasx_xvbitclr_w
:
4126 case Intrinsic::loongarch_lasx_xvbitclr_d
:
4127 return lowerVectorBitClear(N
, DAG
);
4128 case Intrinsic::loongarch_lsx_vbitclri_b
:
4129 case Intrinsic::loongarch_lasx_xvbitclri_b
:
4130 return lowerVectorBitClearImm
<3>(N
, DAG
);
4131 case Intrinsic::loongarch_lsx_vbitclri_h
:
4132 case Intrinsic::loongarch_lasx_xvbitclri_h
:
4133 return lowerVectorBitClearImm
<4>(N
, DAG
);
4134 case Intrinsic::loongarch_lsx_vbitclri_w
:
4135 case Intrinsic::loongarch_lasx_xvbitclri_w
:
4136 return lowerVectorBitClearImm
<5>(N
, DAG
);
4137 case Intrinsic::loongarch_lsx_vbitclri_d
:
4138 case Intrinsic::loongarch_lasx_xvbitclri_d
:
4139 return lowerVectorBitClearImm
<6>(N
, DAG
);
4140 case Intrinsic::loongarch_lsx_vbitset_b
:
4141 case Intrinsic::loongarch_lsx_vbitset_h
:
4142 case Intrinsic::loongarch_lsx_vbitset_w
:
4143 case Intrinsic::loongarch_lsx_vbitset_d
:
4144 case Intrinsic::loongarch_lasx_xvbitset_b
:
4145 case Intrinsic::loongarch_lasx_xvbitset_h
:
4146 case Intrinsic::loongarch_lasx_xvbitset_w
:
4147 case Intrinsic::loongarch_lasx_xvbitset_d
: {
4148 EVT VecTy
= N
->getValueType(0);
4149 SDValue One
= DAG
.getConstant(1, DL
, VecTy
);
4151 ISD::OR
, DL
, VecTy
, N
->getOperand(1),
4152 DAG
.getNode(ISD::SHL
, DL
, VecTy
, One
, truncateVecElts(N
, DAG
)));
4154 case Intrinsic::loongarch_lsx_vbitseti_b
:
4155 case Intrinsic::loongarch_lasx_xvbitseti_b
:
4156 return lowerVectorBitSetImm
<3>(N
, DAG
);
4157 case Intrinsic::loongarch_lsx_vbitseti_h
:
4158 case Intrinsic::loongarch_lasx_xvbitseti_h
:
4159 return lowerVectorBitSetImm
<4>(N
, DAG
);
4160 case Intrinsic::loongarch_lsx_vbitseti_w
:
4161 case Intrinsic::loongarch_lasx_xvbitseti_w
:
4162 return lowerVectorBitSetImm
<5>(N
, DAG
);
4163 case Intrinsic::loongarch_lsx_vbitseti_d
:
4164 case Intrinsic::loongarch_lasx_xvbitseti_d
:
4165 return lowerVectorBitSetImm
<6>(N
, DAG
);
4166 case Intrinsic::loongarch_lsx_vbitrev_b
:
4167 case Intrinsic::loongarch_lsx_vbitrev_h
:
4168 case Intrinsic::loongarch_lsx_vbitrev_w
:
4169 case Intrinsic::loongarch_lsx_vbitrev_d
:
4170 case Intrinsic::loongarch_lasx_xvbitrev_b
:
4171 case Intrinsic::loongarch_lasx_xvbitrev_h
:
4172 case Intrinsic::loongarch_lasx_xvbitrev_w
:
4173 case Intrinsic::loongarch_lasx_xvbitrev_d
: {
4174 EVT VecTy
= N
->getValueType(0);
4175 SDValue One
= DAG
.getConstant(1, DL
, VecTy
);
4177 ISD::XOR
, DL
, VecTy
, N
->getOperand(1),
4178 DAG
.getNode(ISD::SHL
, DL
, VecTy
, One
, truncateVecElts(N
, DAG
)));
4180 case Intrinsic::loongarch_lsx_vbitrevi_b
:
4181 case Intrinsic::loongarch_lasx_xvbitrevi_b
:
4182 return lowerVectorBitRevImm
<3>(N
, DAG
);
4183 case Intrinsic::loongarch_lsx_vbitrevi_h
:
4184 case Intrinsic::loongarch_lasx_xvbitrevi_h
:
4185 return lowerVectorBitRevImm
<4>(N
, DAG
);
4186 case Intrinsic::loongarch_lsx_vbitrevi_w
:
4187 case Intrinsic::loongarch_lasx_xvbitrevi_w
:
4188 return lowerVectorBitRevImm
<5>(N
, DAG
);
4189 case Intrinsic::loongarch_lsx_vbitrevi_d
:
4190 case Intrinsic::loongarch_lasx_xvbitrevi_d
:
4191 return lowerVectorBitRevImm
<6>(N
, DAG
);
4192 case Intrinsic::loongarch_lsx_vfadd_s
:
4193 case Intrinsic::loongarch_lsx_vfadd_d
:
4194 case Intrinsic::loongarch_lasx_xvfadd_s
:
4195 case Intrinsic::loongarch_lasx_xvfadd_d
:
4196 return DAG
.getNode(ISD::FADD
, DL
, N
->getValueType(0), N
->getOperand(1),
4198 case Intrinsic::loongarch_lsx_vfsub_s
:
4199 case Intrinsic::loongarch_lsx_vfsub_d
:
4200 case Intrinsic::loongarch_lasx_xvfsub_s
:
4201 case Intrinsic::loongarch_lasx_xvfsub_d
:
4202 return DAG
.getNode(ISD::FSUB
, DL
, N
->getValueType(0), N
->getOperand(1),
4204 case Intrinsic::loongarch_lsx_vfmul_s
:
4205 case Intrinsic::loongarch_lsx_vfmul_d
:
4206 case Intrinsic::loongarch_lasx_xvfmul_s
:
4207 case Intrinsic::loongarch_lasx_xvfmul_d
:
4208 return DAG
.getNode(ISD::FMUL
, DL
, N
->getValueType(0), N
->getOperand(1),
4210 case Intrinsic::loongarch_lsx_vfdiv_s
:
4211 case Intrinsic::loongarch_lsx_vfdiv_d
:
4212 case Intrinsic::loongarch_lasx_xvfdiv_s
:
4213 case Intrinsic::loongarch_lasx_xvfdiv_d
:
4214 return DAG
.getNode(ISD::FDIV
, DL
, N
->getValueType(0), N
->getOperand(1),
4216 case Intrinsic::loongarch_lsx_vfmadd_s
:
4217 case Intrinsic::loongarch_lsx_vfmadd_d
:
4218 case Intrinsic::loongarch_lasx_xvfmadd_s
:
4219 case Intrinsic::loongarch_lasx_xvfmadd_d
:
4220 return DAG
.getNode(ISD::FMA
, DL
, N
->getValueType(0), N
->getOperand(1),
4221 N
->getOperand(2), N
->getOperand(3));
4222 case Intrinsic::loongarch_lsx_vinsgr2vr_b
:
4223 return DAG
.getNode(ISD::INSERT_VECTOR_ELT
, SDLoc(N
), N
->getValueType(0),
4224 N
->getOperand(1), N
->getOperand(2),
4225 legalizeIntrinsicImmArg
<4>(N
, 3, DAG
, Subtarget
));
4226 case Intrinsic::loongarch_lsx_vinsgr2vr_h
:
4227 case Intrinsic::loongarch_lasx_xvinsgr2vr_w
:
4228 return DAG
.getNode(ISD::INSERT_VECTOR_ELT
, SDLoc(N
), N
->getValueType(0),
4229 N
->getOperand(1), N
->getOperand(2),
4230 legalizeIntrinsicImmArg
<3>(N
, 3, DAG
, Subtarget
));
4231 case Intrinsic::loongarch_lsx_vinsgr2vr_w
:
4232 case Intrinsic::loongarch_lasx_xvinsgr2vr_d
:
4233 return DAG
.getNode(ISD::INSERT_VECTOR_ELT
, SDLoc(N
), N
->getValueType(0),
4234 N
->getOperand(1), N
->getOperand(2),
4235 legalizeIntrinsicImmArg
<2>(N
, 3, DAG
, Subtarget
));
4236 case Intrinsic::loongarch_lsx_vinsgr2vr_d
:
4237 return DAG
.getNode(ISD::INSERT_VECTOR_ELT
, SDLoc(N
), N
->getValueType(0),
4238 N
->getOperand(1), N
->getOperand(2),
4239 legalizeIntrinsicImmArg
<1>(N
, 3, DAG
, Subtarget
));
4240 case Intrinsic::loongarch_lsx_vreplgr2vr_b
:
4241 case Intrinsic::loongarch_lsx_vreplgr2vr_h
:
4242 case Intrinsic::loongarch_lsx_vreplgr2vr_w
:
4243 case Intrinsic::loongarch_lsx_vreplgr2vr_d
:
4244 case Intrinsic::loongarch_lasx_xvreplgr2vr_b
:
4245 case Intrinsic::loongarch_lasx_xvreplgr2vr_h
:
4246 case Intrinsic::loongarch_lasx_xvreplgr2vr_w
:
4247 case Intrinsic::loongarch_lasx_xvreplgr2vr_d
:
4248 return DAG
.getNode(LoongArchISD::VREPLGR2VR
, DL
, N
->getValueType(0),
4249 DAG
.getNode(ISD::ANY_EXTEND
, DL
, Subtarget
.getGRLenVT(),
4251 case Intrinsic::loongarch_lsx_vreplve_b
:
4252 case Intrinsic::loongarch_lsx_vreplve_h
:
4253 case Intrinsic::loongarch_lsx_vreplve_w
:
4254 case Intrinsic::loongarch_lsx_vreplve_d
:
4255 case Intrinsic::loongarch_lasx_xvreplve_b
:
4256 case Intrinsic::loongarch_lasx_xvreplve_h
:
4257 case Intrinsic::loongarch_lasx_xvreplve_w
:
4258 case Intrinsic::loongarch_lasx_xvreplve_d
:
4259 return DAG
.getNode(LoongArchISD::VREPLVE
, DL
, N
->getValueType(0),
4261 DAG
.getNode(ISD::ANY_EXTEND
, DL
, Subtarget
.getGRLenVT(),
4267 SDValue
LoongArchTargetLowering::PerformDAGCombine(SDNode
*N
,
4268 DAGCombinerInfo
&DCI
) const {
4269 SelectionDAG
&DAG
= DCI
.DAG
;
4270 switch (N
->getOpcode()) {
4274 return performANDCombine(N
, DAG
, DCI
, Subtarget
);
4276 return performORCombine(N
, DAG
, DCI
, Subtarget
);
4278 return performSETCCCombine(N
, DAG
, DCI
, Subtarget
);
4280 return performSRLCombine(N
, DAG
, DCI
, Subtarget
);
4281 case LoongArchISD::BITREV_W
:
4282 return performBITREV_WCombine(N
, DAG
, DCI
, Subtarget
);
4283 case ISD::INTRINSIC_WO_CHAIN
:
4284 return performINTRINSIC_WO_CHAINCombine(N
, DAG
, DCI
, Subtarget
);
4289 static MachineBasicBlock
*insertDivByZeroTrap(MachineInstr
&MI
,
4290 MachineBasicBlock
*MBB
) {
4294 // Build instructions:
4296 // div(or mod) $dst, $dividend, $divisor
4297 // bnez $divisor, SinkMBB
4299 // break 7 // BRK_DIVZERO
4302 const BasicBlock
*LLVM_BB
= MBB
->getBasicBlock();
4303 MachineFunction::iterator It
= ++MBB
->getIterator();
4304 MachineFunction
*MF
= MBB
->getParent();
4305 auto BreakMBB
= MF
->CreateMachineBasicBlock(LLVM_BB
);
4306 auto SinkMBB
= MF
->CreateMachineBasicBlock(LLVM_BB
);
4307 MF
->insert(It
, BreakMBB
);
4308 MF
->insert(It
, SinkMBB
);
4310 // Transfer the remainder of MBB and its successor edges to SinkMBB.
4311 SinkMBB
->splice(SinkMBB
->end(), MBB
, std::next(MI
.getIterator()), MBB
->end());
4312 SinkMBB
->transferSuccessorsAndUpdatePHIs(MBB
);
4314 const TargetInstrInfo
&TII
= *MF
->getSubtarget().getInstrInfo();
4315 DebugLoc DL
= MI
.getDebugLoc();
4316 MachineOperand
&Divisor
= MI
.getOperand(2);
4317 Register DivisorReg
= Divisor
.getReg();
4320 BuildMI(MBB
, DL
, TII
.get(LoongArch::BNEZ
))
4321 .addReg(DivisorReg
, getKillRegState(Divisor
.isKill()))
4323 MBB
->addSuccessor(BreakMBB
);
4324 MBB
->addSuccessor(SinkMBB
);
4327 // See linux header file arch/loongarch/include/uapi/asm/break.h for the
4328 // definition of BRK_DIVZERO.
4329 BuildMI(BreakMBB
, DL
, TII
.get(LoongArch::BREAK
)).addImm(7 /*BRK_DIVZERO*/);
4330 BreakMBB
->addSuccessor(SinkMBB
);
4332 // Clear Divisor's kill flag.
4333 Divisor
.setIsKill(false);
4338 static MachineBasicBlock
*
4339 emitVecCondBranchPseudo(MachineInstr
&MI
, MachineBasicBlock
*BB
,
4340 const LoongArchSubtarget
&Subtarget
) {
4342 switch (MI
.getOpcode()) {
4344 llvm_unreachable("Unexpected opcode");
4345 case LoongArch::PseudoVBZ
:
4346 CondOpc
= LoongArch::VSETEQZ_V
;
4348 case LoongArch::PseudoVBZ_B
:
4349 CondOpc
= LoongArch::VSETANYEQZ_B
;
4351 case LoongArch::PseudoVBZ_H
:
4352 CondOpc
= LoongArch::VSETANYEQZ_H
;
4354 case LoongArch::PseudoVBZ_W
:
4355 CondOpc
= LoongArch::VSETANYEQZ_W
;
4357 case LoongArch::PseudoVBZ_D
:
4358 CondOpc
= LoongArch::VSETANYEQZ_D
;
4360 case LoongArch::PseudoVBNZ
:
4361 CondOpc
= LoongArch::VSETNEZ_V
;
4363 case LoongArch::PseudoVBNZ_B
:
4364 CondOpc
= LoongArch::VSETALLNEZ_B
;
4366 case LoongArch::PseudoVBNZ_H
:
4367 CondOpc
= LoongArch::VSETALLNEZ_H
;
4369 case LoongArch::PseudoVBNZ_W
:
4370 CondOpc
= LoongArch::VSETALLNEZ_W
;
4372 case LoongArch::PseudoVBNZ_D
:
4373 CondOpc
= LoongArch::VSETALLNEZ_D
;
4375 case LoongArch::PseudoXVBZ
:
4376 CondOpc
= LoongArch::XVSETEQZ_V
;
4378 case LoongArch::PseudoXVBZ_B
:
4379 CondOpc
= LoongArch::XVSETANYEQZ_B
;
4381 case LoongArch::PseudoXVBZ_H
:
4382 CondOpc
= LoongArch::XVSETANYEQZ_H
;
4384 case LoongArch::PseudoXVBZ_W
:
4385 CondOpc
= LoongArch::XVSETANYEQZ_W
;
4387 case LoongArch::PseudoXVBZ_D
:
4388 CondOpc
= LoongArch::XVSETANYEQZ_D
;
4390 case LoongArch::PseudoXVBNZ
:
4391 CondOpc
= LoongArch::XVSETNEZ_V
;
4393 case LoongArch::PseudoXVBNZ_B
:
4394 CondOpc
= LoongArch::XVSETALLNEZ_B
;
4396 case LoongArch::PseudoXVBNZ_H
:
4397 CondOpc
= LoongArch::XVSETALLNEZ_H
;
4399 case LoongArch::PseudoXVBNZ_W
:
4400 CondOpc
= LoongArch::XVSETALLNEZ_W
;
4402 case LoongArch::PseudoXVBNZ_D
:
4403 CondOpc
= LoongArch::XVSETALLNEZ_D
;
4407 const TargetInstrInfo
*TII
= Subtarget
.getInstrInfo();
4408 const BasicBlock
*LLVM_BB
= BB
->getBasicBlock();
4409 DebugLoc DL
= MI
.getDebugLoc();
4410 MachineRegisterInfo
&MRI
= BB
->getParent()->getRegInfo();
4411 MachineFunction::iterator It
= ++BB
->getIterator();
4413 MachineFunction
*F
= BB
->getParent();
4414 MachineBasicBlock
*FalseBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
4415 MachineBasicBlock
*TrueBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
4416 MachineBasicBlock
*SinkBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
4418 F
->insert(It
, FalseBB
);
4419 F
->insert(It
, TrueBB
);
4420 F
->insert(It
, SinkBB
);
4422 // Transfer the remainder of MBB and its successor edges to Sink.
4423 SinkBB
->splice(SinkBB
->end(), BB
, std::next(MI
.getIterator()), BB
->end());
4424 SinkBB
->transferSuccessorsAndUpdatePHIs(BB
);
4426 // Insert the real instruction to BB.
4427 Register FCC
= MRI
.createVirtualRegister(&LoongArch::CFRRegClass
);
4428 BuildMI(BB
, DL
, TII
->get(CondOpc
), FCC
).addReg(MI
.getOperand(1).getReg());
4431 BuildMI(BB
, DL
, TII
->get(LoongArch::BCNEZ
)).addReg(FCC
).addMBB(TrueBB
);
4432 BB
->addSuccessor(FalseBB
);
4433 BB
->addSuccessor(TrueBB
);
4436 Register RD1
= MRI
.createVirtualRegister(&LoongArch::GPRRegClass
);
4437 BuildMI(FalseBB
, DL
, TII
->get(LoongArch::ADDI_W
), RD1
)
4438 .addReg(LoongArch::R0
)
4440 BuildMI(FalseBB
, DL
, TII
->get(LoongArch::PseudoBR
)).addMBB(SinkBB
);
4441 FalseBB
->addSuccessor(SinkBB
);
4444 Register RD2
= MRI
.createVirtualRegister(&LoongArch::GPRRegClass
);
4445 BuildMI(TrueBB
, DL
, TII
->get(LoongArch::ADDI_W
), RD2
)
4446 .addReg(LoongArch::R0
)
4448 TrueBB
->addSuccessor(SinkBB
);
4450 // SinkBB: merge the results.
4451 BuildMI(*SinkBB
, SinkBB
->begin(), DL
, TII
->get(LoongArch::PHI
),
4452 MI
.getOperand(0).getReg())
4458 // The pseudo instruction is gone now.
4459 MI
.eraseFromParent();
4463 static MachineBasicBlock
*
4464 emitPseudoXVINSGR2VR(MachineInstr
&MI
, MachineBasicBlock
*BB
,
4465 const LoongArchSubtarget
&Subtarget
) {
4468 switch (MI
.getOpcode()) {
4470 llvm_unreachable("Unexpected opcode");
4471 case LoongArch::PseudoXVINSGR2VR_B
:
4473 InsOp
= LoongArch::VINSGR2VR_B
;
4475 case LoongArch::PseudoXVINSGR2VR_H
:
4477 InsOp
= LoongArch::VINSGR2VR_H
;
4480 const TargetInstrInfo
*TII
= Subtarget
.getInstrInfo();
4481 const TargetRegisterClass
*RC
= &LoongArch::LASX256RegClass
;
4482 const TargetRegisterClass
*SubRC
= &LoongArch::LSX128RegClass
;
4483 DebugLoc DL
= MI
.getDebugLoc();
4484 MachineRegisterInfo
&MRI
= BB
->getParent()->getRegInfo();
4485 // XDst = vector_insert XSrc, Elt, Idx
4486 Register XDst
= MI
.getOperand(0).getReg();
4487 Register XSrc
= MI
.getOperand(1).getReg();
4488 Register Elt
= MI
.getOperand(2).getReg();
4489 unsigned Idx
= MI
.getOperand(3).getImm();
4491 Register ScratchReg1
= XSrc
;
4492 if (Idx
>= HalfSize
) {
4493 ScratchReg1
= MRI
.createVirtualRegister(RC
);
4494 BuildMI(*BB
, MI
, DL
, TII
->get(LoongArch::XVPERMI_Q
), ScratchReg1
)
4500 Register ScratchSubReg1
= MRI
.createVirtualRegister(SubRC
);
4501 Register ScratchSubReg2
= MRI
.createVirtualRegister(SubRC
);
4502 BuildMI(*BB
, MI
, DL
, TII
->get(LoongArch::COPY
), ScratchSubReg1
)
4503 .addReg(ScratchReg1
, 0, LoongArch::sub_128
);
4504 BuildMI(*BB
, MI
, DL
, TII
->get(InsOp
), ScratchSubReg2
)
4505 .addReg(ScratchSubReg1
)
4507 .addImm(Idx
>= HalfSize
? Idx
- HalfSize
: Idx
);
4509 Register ScratchReg2
= XDst
;
4510 if (Idx
>= HalfSize
)
4511 ScratchReg2
= MRI
.createVirtualRegister(RC
);
4513 BuildMI(*BB
, MI
, DL
, TII
->get(LoongArch::SUBREG_TO_REG
), ScratchReg2
)
4515 .addReg(ScratchSubReg2
)
4516 .addImm(LoongArch::sub_128
);
4518 if (Idx
>= HalfSize
)
4519 BuildMI(*BB
, MI
, DL
, TII
->get(LoongArch::XVPERMI_Q
), XDst
)
4521 .addReg(ScratchReg2
)
4524 MI
.eraseFromParent();
4528 static MachineBasicBlock
*emitPseudoCTPOP(MachineInstr
&MI
,
4529 MachineBasicBlock
*BB
,
4530 const LoongArchSubtarget
&Subtarget
) {
4531 assert(Subtarget
.hasExtLSX());
4532 const TargetInstrInfo
*TII
= Subtarget
.getInstrInfo();
4533 const TargetRegisterClass
*RC
= &LoongArch::LSX128RegClass
;
4534 DebugLoc DL
= MI
.getDebugLoc();
4535 MachineRegisterInfo
&MRI
= BB
->getParent()->getRegInfo();
4536 Register Dst
= MI
.getOperand(0).getReg();
4537 Register Src
= MI
.getOperand(1).getReg();
4538 Register ScratchReg1
= MRI
.createVirtualRegister(RC
);
4539 Register ScratchReg2
= MRI
.createVirtualRegister(RC
);
4540 Register ScratchReg3
= MRI
.createVirtualRegister(RC
);
4542 BuildMI(*BB
, MI
, DL
, TII
->get(LoongArch::VLDI
), ScratchReg1
).addImm(0);
4543 BuildMI(*BB
, MI
, DL
,
4544 TII
->get(Subtarget
.is64Bit() ? LoongArch::VINSGR2VR_D
4545 : LoongArch::VINSGR2VR_W
),
4547 .addReg(ScratchReg1
)
4552 TII
->get(Subtarget
.is64Bit() ? LoongArch::VPCNT_D
: LoongArch::VPCNT_W
),
4554 .addReg(ScratchReg2
);
4555 BuildMI(*BB
, MI
, DL
,
4556 TII
->get(Subtarget
.is64Bit() ? LoongArch::VPICKVE2GR_D
4557 : LoongArch::VPICKVE2GR_W
),
4559 .addReg(ScratchReg3
)
4562 MI
.eraseFromParent();
4566 MachineBasicBlock
*LoongArchTargetLowering::EmitInstrWithCustomInserter(
4567 MachineInstr
&MI
, MachineBasicBlock
*BB
) const {
4568 const TargetInstrInfo
*TII
= Subtarget
.getInstrInfo();
4569 DebugLoc DL
= MI
.getDebugLoc();
4571 switch (MI
.getOpcode()) {
4573 llvm_unreachable("Unexpected instr type to insert");
4574 case LoongArch::DIV_W
:
4575 case LoongArch::DIV_WU
:
4576 case LoongArch::MOD_W
:
4577 case LoongArch::MOD_WU
:
4578 case LoongArch::DIV_D
:
4579 case LoongArch::DIV_DU
:
4580 case LoongArch::MOD_D
:
4581 case LoongArch::MOD_DU
:
4582 return insertDivByZeroTrap(MI
, BB
);
4584 case LoongArch::WRFCSR
: {
4585 BuildMI(*BB
, MI
, DL
, TII
->get(LoongArch::MOVGR2FCSR
),
4586 LoongArch::FCSR0
+ MI
.getOperand(0).getImm())
4587 .addReg(MI
.getOperand(1).getReg());
4588 MI
.eraseFromParent();
4591 case LoongArch::RDFCSR
: {
4592 MachineInstr
*ReadFCSR
=
4593 BuildMI(*BB
, MI
, DL
, TII
->get(LoongArch::MOVFCSR2GR
),
4594 MI
.getOperand(0).getReg())
4595 .addReg(LoongArch::FCSR0
+ MI
.getOperand(1).getImm());
4596 ReadFCSR
->getOperand(1).setIsUndef();
4597 MI
.eraseFromParent();
4600 case LoongArch::PseudoVBZ
:
4601 case LoongArch::PseudoVBZ_B
:
4602 case LoongArch::PseudoVBZ_H
:
4603 case LoongArch::PseudoVBZ_W
:
4604 case LoongArch::PseudoVBZ_D
:
4605 case LoongArch::PseudoVBNZ
:
4606 case LoongArch::PseudoVBNZ_B
:
4607 case LoongArch::PseudoVBNZ_H
:
4608 case LoongArch::PseudoVBNZ_W
:
4609 case LoongArch::PseudoVBNZ_D
:
4610 case LoongArch::PseudoXVBZ
:
4611 case LoongArch::PseudoXVBZ_B
:
4612 case LoongArch::PseudoXVBZ_H
:
4613 case LoongArch::PseudoXVBZ_W
:
4614 case LoongArch::PseudoXVBZ_D
:
4615 case LoongArch::PseudoXVBNZ
:
4616 case LoongArch::PseudoXVBNZ_B
:
4617 case LoongArch::PseudoXVBNZ_H
:
4618 case LoongArch::PseudoXVBNZ_W
:
4619 case LoongArch::PseudoXVBNZ_D
:
4620 return emitVecCondBranchPseudo(MI
, BB
, Subtarget
);
4621 case LoongArch::PseudoXVINSGR2VR_B
:
4622 case LoongArch::PseudoXVINSGR2VR_H
:
4623 return emitPseudoXVINSGR2VR(MI
, BB
, Subtarget
);
4624 case LoongArch::PseudoCTPOP
:
4625 return emitPseudoCTPOP(MI
, BB
, Subtarget
);
4626 case TargetOpcode::STATEPOINT
:
4627 // STATEPOINT is a pseudo instruction which has no implicit defs/uses
4628 // while bl call instruction (where statepoint will be lowered at the
4629 // end) has implicit def. This def is early-clobber as it will be set at
4630 // the moment of the call and earlier than any use is read.
4631 // Add this implicit dead def here as a workaround.
4632 MI
.addOperand(*MI
.getMF(),
4633 MachineOperand::CreateReg(
4634 LoongArch::R1
, /*isDef*/ true,
4635 /*isImp*/ true, /*isKill*/ false, /*isDead*/ true,
4636 /*isUndef*/ false, /*isEarlyClobber*/ true));
4637 if (!Subtarget
.is64Bit())
4638 report_fatal_error("STATEPOINT is only supported on 64-bit targets");
4639 return emitPatchPoint(MI
, BB
);
4643 bool LoongArchTargetLowering::allowsMisalignedMemoryAccesses(
4644 EVT VT
, unsigned AddrSpace
, Align Alignment
, MachineMemOperand::Flags Flags
,
4645 unsigned *Fast
) const {
4646 if (!Subtarget
.hasUAL())
4649 // TODO: set reasonable speed number.
4655 const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode
) const {
4656 switch ((LoongArchISD::NodeType
)Opcode
) {
4657 case LoongArchISD::FIRST_NUMBER
:
4660 #define NODE_NAME_CASE(node) \
4661 case LoongArchISD::node: \
4662 return "LoongArchISD::" #node;
4664 // TODO: Add more target-dependent nodes later.
4665 NODE_NAME_CASE(CALL
)
4666 NODE_NAME_CASE(CALL_MEDIUM
)
4667 NODE_NAME_CASE(CALL_LARGE
)
4669 NODE_NAME_CASE(TAIL
)
4670 NODE_NAME_CASE(TAIL_MEDIUM
)
4671 NODE_NAME_CASE(TAIL_LARGE
)
4672 NODE_NAME_CASE(SLL_W
)
4673 NODE_NAME_CASE(SRA_W
)
4674 NODE_NAME_CASE(SRL_W
)
4675 NODE_NAME_CASE(BSTRINS
)
4676 NODE_NAME_CASE(BSTRPICK
)
4677 NODE_NAME_CASE(MOVGR2FR_W_LA64
)
4678 NODE_NAME_CASE(MOVFR2GR_S_LA64
)
4679 NODE_NAME_CASE(FTINT
)
4680 NODE_NAME_CASE(REVB_2H
)
4681 NODE_NAME_CASE(REVB_2W
)
4682 NODE_NAME_CASE(BITREV_4B
)
4683 NODE_NAME_CASE(BITREV_W
)
4684 NODE_NAME_CASE(ROTR_W
)
4685 NODE_NAME_CASE(ROTL_W
)
4686 NODE_NAME_CASE(DIV_W
)
4687 NODE_NAME_CASE(DIV_WU
)
4688 NODE_NAME_CASE(MOD_W
)
4689 NODE_NAME_CASE(MOD_WU
)
4690 NODE_NAME_CASE(CLZ_W
)
4691 NODE_NAME_CASE(CTZ_W
)
4692 NODE_NAME_CASE(DBAR
)
4693 NODE_NAME_CASE(IBAR
)
4694 NODE_NAME_CASE(BREAK
)
4695 NODE_NAME_CASE(SYSCALL
)
4696 NODE_NAME_CASE(CRC_W_B_W
)
4697 NODE_NAME_CASE(CRC_W_H_W
)
4698 NODE_NAME_CASE(CRC_W_W_W
)
4699 NODE_NAME_CASE(CRC_W_D_W
)
4700 NODE_NAME_CASE(CRCC_W_B_W
)
4701 NODE_NAME_CASE(CRCC_W_H_W
)
4702 NODE_NAME_CASE(CRCC_W_W_W
)
4703 NODE_NAME_CASE(CRCC_W_D_W
)
4704 NODE_NAME_CASE(CSRRD
)
4705 NODE_NAME_CASE(CSRWR
)
4706 NODE_NAME_CASE(CSRXCHG
)
4707 NODE_NAME_CASE(IOCSRRD_B
)
4708 NODE_NAME_CASE(IOCSRRD_H
)
4709 NODE_NAME_CASE(IOCSRRD_W
)
4710 NODE_NAME_CASE(IOCSRRD_D
)
4711 NODE_NAME_CASE(IOCSRWR_B
)
4712 NODE_NAME_CASE(IOCSRWR_H
)
4713 NODE_NAME_CASE(IOCSRWR_W
)
4714 NODE_NAME_CASE(IOCSRWR_D
)
4715 NODE_NAME_CASE(CPUCFG
)
4716 NODE_NAME_CASE(MOVGR2FCSR
)
4717 NODE_NAME_CASE(MOVFCSR2GR
)
4718 NODE_NAME_CASE(CACOP_D
)
4719 NODE_NAME_CASE(CACOP_W
)
4720 NODE_NAME_CASE(VSHUF
)
4721 NODE_NAME_CASE(VPICKEV
)
4722 NODE_NAME_CASE(VPICKOD
)
4723 NODE_NAME_CASE(VPACKEV
)
4724 NODE_NAME_CASE(VPACKOD
)
4725 NODE_NAME_CASE(VILVL
)
4726 NODE_NAME_CASE(VILVH
)
4727 NODE_NAME_CASE(VSHUF4I
)
4728 NODE_NAME_CASE(VREPLVEI
)
4729 NODE_NAME_CASE(VREPLGR2VR
)
4730 NODE_NAME_CASE(XVPERMI
)
4731 NODE_NAME_CASE(VPICK_SEXT_ELT
)
4732 NODE_NAME_CASE(VPICK_ZEXT_ELT
)
4733 NODE_NAME_CASE(VREPLVE
)
4734 NODE_NAME_CASE(VALL_ZERO
)
4735 NODE_NAME_CASE(VANY_ZERO
)
4736 NODE_NAME_CASE(VALL_NONZERO
)
4737 NODE_NAME_CASE(VANY_NONZERO
)
4738 NODE_NAME_CASE(FRECIPE
)
4739 NODE_NAME_CASE(FRSQRTE
)
4741 #undef NODE_NAME_CASE
4745 //===----------------------------------------------------------------------===//
4746 // Calling Convention Implementation
4747 //===----------------------------------------------------------------------===//
4749 // Eight general-purpose registers a0-a7 used for passing integer arguments,
4750 // with a0-a1 reused to return values. Generally, the GPRs are used to pass
4751 // fixed-point arguments, and floating-point arguments when no FPR is available
4752 // or with soft float ABI.
4753 const MCPhysReg ArgGPRs
[] = {LoongArch::R4
, LoongArch::R5
, LoongArch::R6
,
4754 LoongArch::R7
, LoongArch::R8
, LoongArch::R9
,
4755 LoongArch::R10
, LoongArch::R11
};
4756 // Eight floating-point registers fa0-fa7 used for passing floating-point
4757 // arguments, and fa0-fa1 are also used to return values.
4758 const MCPhysReg ArgFPR32s
[] = {LoongArch::F0
, LoongArch::F1
, LoongArch::F2
,
4759 LoongArch::F3
, LoongArch::F4
, LoongArch::F5
,
4760 LoongArch::F6
, LoongArch::F7
};
4761 // FPR32 and FPR64 alias each other.
4762 const MCPhysReg ArgFPR64s
[] = {
4763 LoongArch::F0_64
, LoongArch::F1_64
, LoongArch::F2_64
, LoongArch::F3_64
,
4764 LoongArch::F4_64
, LoongArch::F5_64
, LoongArch::F6_64
, LoongArch::F7_64
};
4766 const MCPhysReg ArgVRs
[] = {LoongArch::VR0
, LoongArch::VR1
, LoongArch::VR2
,
4767 LoongArch::VR3
, LoongArch::VR4
, LoongArch::VR5
,
4768 LoongArch::VR6
, LoongArch::VR7
};
4770 const MCPhysReg ArgXRs
[] = {LoongArch::XR0
, LoongArch::XR1
, LoongArch::XR2
,
4771 LoongArch::XR3
, LoongArch::XR4
, LoongArch::XR5
,
4772 LoongArch::XR6
, LoongArch::XR7
};
4774 // Pass a 2*GRLen argument that has been split into two GRLen values through
4775 // registers or the stack as necessary.
4776 static bool CC_LoongArchAssign2GRLen(unsigned GRLen
, CCState
&State
,
4777 CCValAssign VA1
, ISD::ArgFlagsTy ArgFlags1
,
4778 unsigned ValNo2
, MVT ValVT2
, MVT LocVT2
,
4779 ISD::ArgFlagsTy ArgFlags2
) {
4780 unsigned GRLenInBytes
= GRLen
/ 8;
4781 if (Register Reg
= State
.AllocateReg(ArgGPRs
)) {
4782 // At least one half can be passed via register.
4783 State
.addLoc(CCValAssign::getReg(VA1
.getValNo(), VA1
.getValVT(), Reg
,
4784 VA1
.getLocVT(), CCValAssign::Full
));
4786 // Both halves must be passed on the stack, with proper alignment.
4788 std::max(Align(GRLenInBytes
), ArgFlags1
.getNonZeroOrigAlign());
4790 CCValAssign::getMem(VA1
.getValNo(), VA1
.getValVT(),
4791 State
.AllocateStack(GRLenInBytes
, StackAlign
),
4792 VA1
.getLocVT(), CCValAssign::Full
));
4793 State
.addLoc(CCValAssign::getMem(
4794 ValNo2
, ValVT2
, State
.AllocateStack(GRLenInBytes
, Align(GRLenInBytes
)),
4795 LocVT2
, CCValAssign::Full
));
4798 if (Register Reg
= State
.AllocateReg(ArgGPRs
)) {
4799 // The second half can also be passed via register.
4801 CCValAssign::getReg(ValNo2
, ValVT2
, Reg
, LocVT2
, CCValAssign::Full
));
4803 // The second half is passed via the stack, without additional alignment.
4804 State
.addLoc(CCValAssign::getMem(
4805 ValNo2
, ValVT2
, State
.AllocateStack(GRLenInBytes
, Align(GRLenInBytes
)),
4806 LocVT2
, CCValAssign::Full
));
4811 // Implements the LoongArch calling convention. Returns true upon failure.
4812 static bool CC_LoongArch(const DataLayout
&DL
, LoongArchABI::ABI ABI
,
4813 unsigned ValNo
, MVT ValVT
,
4814 CCValAssign::LocInfo LocInfo
, ISD::ArgFlagsTy ArgFlags
,
4815 CCState
&State
, bool IsFixed
, bool IsRet
,
4817 unsigned GRLen
= DL
.getLargestLegalIntTypeSizeInBits();
4818 assert((GRLen
== 32 || GRLen
== 64) && "Unspport GRLen");
4819 MVT GRLenVT
= GRLen
== 32 ? MVT::i32
: MVT::i64
;
4822 // Any return value split into more than two values can't be returned
4824 if (IsRet
&& ValNo
> 1)
4827 // If passing a variadic argument, or if no FPR is available.
4828 bool UseGPRForFloat
= true;
4832 llvm_unreachable("Unexpected ABI");
4834 case LoongArchABI::ABI_ILP32F
:
4835 case LoongArchABI::ABI_LP64F
:
4836 case LoongArchABI::ABI_ILP32D
:
4837 case LoongArchABI::ABI_LP64D
:
4838 UseGPRForFloat
= !IsFixed
;
4840 case LoongArchABI::ABI_ILP32S
:
4841 case LoongArchABI::ABI_LP64S
:
4845 // FPR32 and FPR64 alias each other.
4846 if (State
.getFirstUnallocated(ArgFPR32s
) == std::size(ArgFPR32s
))
4847 UseGPRForFloat
= true;
4849 if (UseGPRForFloat
&& ValVT
== MVT::f32
) {
4851 LocInfo
= CCValAssign::BCvt
;
4852 } else if (UseGPRForFloat
&& GRLen
== 64 && ValVT
== MVT::f64
) {
4854 LocInfo
= CCValAssign::BCvt
;
4855 } else if (UseGPRForFloat
&& GRLen
== 32 && ValVT
== MVT::f64
) {
4856 // TODO: Handle passing f64 on LA32 with D feature.
4857 report_fatal_error("Passing f64 with GPR on LA32 is undefined");
4860 // If this is a variadic argument, the LoongArch calling convention requires
4861 // that it is assigned an 'even' or 'aligned' register if it has (2*GRLen)/8
4862 // byte alignment. An aligned register should be used regardless of whether
4863 // the original argument was split during legalisation or not. The argument
4864 // will not be passed by registers if the original type is larger than
4865 // 2*GRLen, so the register alignment rule does not apply.
4866 unsigned TwoGRLenInBytes
= (2 * GRLen
) / 8;
4867 if (!IsFixed
&& ArgFlags
.getNonZeroOrigAlign() == TwoGRLenInBytes
&&
4868 DL
.getTypeAllocSize(OrigTy
) == TwoGRLenInBytes
) {
4869 unsigned RegIdx
= State
.getFirstUnallocated(ArgGPRs
);
4870 // Skip 'odd' register if necessary.
4871 if (RegIdx
!= std::size(ArgGPRs
) && RegIdx
% 2 == 1)
4872 State
.AllocateReg(ArgGPRs
);
4875 SmallVectorImpl
<CCValAssign
> &PendingLocs
= State
.getPendingLocs();
4876 SmallVectorImpl
<ISD::ArgFlagsTy
> &PendingArgFlags
=
4877 State
.getPendingArgFlags();
4879 assert(PendingLocs
.size() == PendingArgFlags
.size() &&
4880 "PendingLocs and PendingArgFlags out of sync");
4882 // Split arguments might be passed indirectly, so keep track of the pending
4884 if (ValVT
.isScalarInteger() && (ArgFlags
.isSplit() || !PendingLocs
.empty())) {
4886 LocInfo
= CCValAssign::Indirect
;
4887 PendingLocs
.push_back(
4888 CCValAssign::getPending(ValNo
, ValVT
, LocVT
, LocInfo
));
4889 PendingArgFlags
.push_back(ArgFlags
);
4890 if (!ArgFlags
.isSplitEnd()) {
4895 // If the split argument only had two elements, it should be passed directly
4896 // in registers or on the stack.
4897 if (ValVT
.isScalarInteger() && ArgFlags
.isSplitEnd() &&
4898 PendingLocs
.size() <= 2) {
4899 assert(PendingLocs
.size() == 2 && "Unexpected PendingLocs.size()");
4900 // Apply the normal calling convention rules to the first half of the
4902 CCValAssign VA
= PendingLocs
[0];
4903 ISD::ArgFlagsTy AF
= PendingArgFlags
[0];
4904 PendingLocs
.clear();
4905 PendingArgFlags
.clear();
4906 return CC_LoongArchAssign2GRLen(GRLen
, State
, VA
, AF
, ValNo
, ValVT
, LocVT
,
4910 // Allocate to a register if possible, or else a stack slot.
4912 unsigned StoreSizeBytes
= GRLen
/ 8;
4913 Align StackAlign
= Align(GRLen
/ 8);
4915 if (ValVT
== MVT::f32
&& !UseGPRForFloat
)
4916 Reg
= State
.AllocateReg(ArgFPR32s
);
4917 else if (ValVT
== MVT::f64
&& !UseGPRForFloat
)
4918 Reg
= State
.AllocateReg(ArgFPR64s
);
4919 else if (ValVT
.is128BitVector())
4920 Reg
= State
.AllocateReg(ArgVRs
);
4921 else if (ValVT
.is256BitVector())
4922 Reg
= State
.AllocateReg(ArgXRs
);
4924 Reg
= State
.AllocateReg(ArgGPRs
);
4926 unsigned StackOffset
=
4927 Reg
? 0 : State
.AllocateStack(StoreSizeBytes
, StackAlign
);
4929 // If we reach this point and PendingLocs is non-empty, we must be at the
4930 // end of a split argument that must be passed indirectly.
4931 if (!PendingLocs
.empty()) {
4932 assert(ArgFlags
.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
4933 assert(PendingLocs
.size() > 2 && "Unexpected PendingLocs.size()");
4934 for (auto &It
: PendingLocs
) {
4936 It
.convertToReg(Reg
);
4938 It
.convertToMem(StackOffset
);
4941 PendingLocs
.clear();
4942 PendingArgFlags
.clear();
4945 assert((!UseGPRForFloat
|| LocVT
== GRLenVT
) &&
4946 "Expected an GRLenVT at this stage");
4949 State
.addLoc(CCValAssign::getReg(ValNo
, ValVT
, Reg
, LocVT
, LocInfo
));
4953 // When a floating-point value is passed on the stack, no bit-cast is needed.
4954 if (ValVT
.isFloatingPoint()) {
4956 LocInfo
= CCValAssign::Full
;
4959 State
.addLoc(CCValAssign::getMem(ValNo
, ValVT
, StackOffset
, LocVT
, LocInfo
));
4963 void LoongArchTargetLowering::analyzeInputArgs(
4964 MachineFunction
&MF
, CCState
&CCInfo
,
4965 const SmallVectorImpl
<ISD::InputArg
> &Ins
, bool IsRet
,
4966 LoongArchCCAssignFn Fn
) const {
4967 FunctionType
*FType
= MF
.getFunction().getFunctionType();
4968 for (unsigned i
= 0, e
= Ins
.size(); i
!= e
; ++i
) {
4969 MVT ArgVT
= Ins
[i
].VT
;
4970 Type
*ArgTy
= nullptr;
4972 ArgTy
= FType
->getReturnType();
4973 else if (Ins
[i
].isOrigArg())
4974 ArgTy
= FType
->getParamType(Ins
[i
].getOrigArgIndex());
4975 LoongArchABI::ABI ABI
=
4976 MF
.getSubtarget
<LoongArchSubtarget
>().getTargetABI();
4977 if (Fn(MF
.getDataLayout(), ABI
, i
, ArgVT
, CCValAssign::Full
, Ins
[i
].Flags
,
4978 CCInfo
, /*IsFixed=*/true, IsRet
, ArgTy
)) {
4979 LLVM_DEBUG(dbgs() << "InputArg #" << i
<< " has unhandled type " << ArgVT
4981 llvm_unreachable("");
4986 void LoongArchTargetLowering::analyzeOutputArgs(
4987 MachineFunction
&MF
, CCState
&CCInfo
,
4988 const SmallVectorImpl
<ISD::OutputArg
> &Outs
, bool IsRet
,
4989 CallLoweringInfo
*CLI
, LoongArchCCAssignFn Fn
) const {
4990 for (unsigned i
= 0, e
= Outs
.size(); i
!= e
; ++i
) {
4991 MVT ArgVT
= Outs
[i
].VT
;
4992 Type
*OrigTy
= CLI
? CLI
->getArgs()[Outs
[i
].OrigArgIndex
].Ty
: nullptr;
4993 LoongArchABI::ABI ABI
=
4994 MF
.getSubtarget
<LoongArchSubtarget
>().getTargetABI();
4995 if (Fn(MF
.getDataLayout(), ABI
, i
, ArgVT
, CCValAssign::Full
, Outs
[i
].Flags
,
4996 CCInfo
, Outs
[i
].IsFixed
, IsRet
, OrigTy
)) {
4997 LLVM_DEBUG(dbgs() << "OutputArg #" << i
<< " has unhandled type " << ArgVT
4999 llvm_unreachable("");
5004 // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
5006 static SDValue
convertLocVTToValVT(SelectionDAG
&DAG
, SDValue Val
,
5007 const CCValAssign
&VA
, const SDLoc
&DL
) {
5008 switch (VA
.getLocInfo()) {
5010 llvm_unreachable("Unexpected CCValAssign::LocInfo");
5011 case CCValAssign::Full
:
5012 case CCValAssign::Indirect
:
5014 case CCValAssign::BCvt
:
5015 if (VA
.getLocVT() == MVT::i64
&& VA
.getValVT() == MVT::f32
)
5016 Val
= DAG
.getNode(LoongArchISD::MOVGR2FR_W_LA64
, DL
, MVT::f32
, Val
);
5018 Val
= DAG
.getNode(ISD::BITCAST
, DL
, VA
.getValVT(), Val
);
5024 static SDValue
unpackFromRegLoc(SelectionDAG
&DAG
, SDValue Chain
,
5025 const CCValAssign
&VA
, const SDLoc
&DL
,
5026 const ISD::InputArg
&In
,
5027 const LoongArchTargetLowering
&TLI
) {
5028 MachineFunction
&MF
= DAG
.getMachineFunction();
5029 MachineRegisterInfo
&RegInfo
= MF
.getRegInfo();
5030 EVT LocVT
= VA
.getLocVT();
5032 const TargetRegisterClass
*RC
= TLI
.getRegClassFor(LocVT
.getSimpleVT());
5033 Register VReg
= RegInfo
.createVirtualRegister(RC
);
5034 RegInfo
.addLiveIn(VA
.getLocReg(), VReg
);
5035 Val
= DAG
.getCopyFromReg(Chain
, DL
, VReg
, LocVT
);
5037 // If input is sign extended from 32 bits, note it for the OptW pass.
5038 if (In
.isOrigArg()) {
5039 Argument
*OrigArg
= MF
.getFunction().getArg(In
.getOrigArgIndex());
5040 if (OrigArg
->getType()->isIntegerTy()) {
5041 unsigned BitWidth
= OrigArg
->getType()->getIntegerBitWidth();
5042 // An input zero extended from i31 can also be considered sign extended.
5043 if ((BitWidth
<= 32 && In
.Flags
.isSExt()) ||
5044 (BitWidth
< 32 && In
.Flags
.isZExt())) {
5045 LoongArchMachineFunctionInfo
*LAFI
=
5046 MF
.getInfo
<LoongArchMachineFunctionInfo
>();
5047 LAFI
->addSExt32Register(VReg
);
5052 return convertLocVTToValVT(DAG
, Val
, VA
, DL
);
5055 // The caller is responsible for loading the full value if the argument is
5056 // passed with CCValAssign::Indirect.
5057 static SDValue
unpackFromMemLoc(SelectionDAG
&DAG
, SDValue Chain
,
5058 const CCValAssign
&VA
, const SDLoc
&DL
) {
5059 MachineFunction
&MF
= DAG
.getMachineFunction();
5060 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
5061 EVT ValVT
= VA
.getValVT();
5062 int FI
= MFI
.CreateFixedObject(ValVT
.getStoreSize(), VA
.getLocMemOffset(),
5063 /*IsImmutable=*/true);
5064 SDValue FIN
= DAG
.getFrameIndex(
5065 FI
, MVT::getIntegerVT(DAG
.getDataLayout().getPointerSizeInBits(0)));
5067 ISD::LoadExtType ExtType
;
5068 switch (VA
.getLocInfo()) {
5070 llvm_unreachable("Unexpected CCValAssign::LocInfo");
5071 case CCValAssign::Full
:
5072 case CCValAssign::Indirect
:
5073 case CCValAssign::BCvt
:
5074 ExtType
= ISD::NON_EXTLOAD
;
5077 return DAG
.getExtLoad(
5078 ExtType
, DL
, VA
.getLocVT(), Chain
, FIN
,
5079 MachinePointerInfo::getFixedStack(DAG
.getMachineFunction(), FI
), ValVT
);
5082 static SDValue
convertValVTToLocVT(SelectionDAG
&DAG
, SDValue Val
,
5083 const CCValAssign
&VA
, const SDLoc
&DL
) {
5084 EVT LocVT
= VA
.getLocVT();
5086 switch (VA
.getLocInfo()) {
5088 llvm_unreachable("Unexpected CCValAssign::LocInfo");
5089 case CCValAssign::Full
:
5091 case CCValAssign::BCvt
:
5092 if (VA
.getLocVT() == MVT::i64
&& VA
.getValVT() == MVT::f32
)
5093 Val
= DAG
.getNode(LoongArchISD::MOVFR2GR_S_LA64
, DL
, MVT::i64
, Val
);
5095 Val
= DAG
.getNode(ISD::BITCAST
, DL
, LocVT
, Val
);
5101 static bool CC_LoongArch_GHC(unsigned ValNo
, MVT ValVT
, MVT LocVT
,
5102 CCValAssign::LocInfo LocInfo
,
5103 ISD::ArgFlagsTy ArgFlags
, CCState
&State
) {
5104 if (LocVT
== MVT::i32
|| LocVT
== MVT::i64
) {
5105 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, SpLim
5106 // s0 s1 s2 s3 s4 s5 s6 s7 s8
5107 static const MCPhysReg GPRList
[] = {
5108 LoongArch::R23
, LoongArch::R24
, LoongArch::R25
,
5109 LoongArch::R26
, LoongArch::R27
, LoongArch::R28
,
5110 LoongArch::R29
, LoongArch::R30
, LoongArch::R31
};
5111 if (MCRegister Reg
= State
.AllocateReg(GPRList
)) {
5112 State
.addLoc(CCValAssign::getReg(ValNo
, ValVT
, Reg
, LocVT
, LocInfo
));
5117 if (LocVT
== MVT::f32
) {
5118 // Pass in STG registers: F1, F2, F3, F4
5120 static const MCPhysReg FPR32List
[] = {LoongArch::F24
, LoongArch::F25
,
5121 LoongArch::F26
, LoongArch::F27
};
5122 if (MCRegister Reg
= State
.AllocateReg(FPR32List
)) {
5123 State
.addLoc(CCValAssign::getReg(ValNo
, ValVT
, Reg
, LocVT
, LocInfo
));
5128 if (LocVT
== MVT::f64
) {
5129 // Pass in STG registers: D1, D2, D3, D4
5131 static const MCPhysReg FPR64List
[] = {LoongArch::F28_64
, LoongArch::F29_64
,
5132 LoongArch::F30_64
, LoongArch::F31_64
};
5133 if (MCRegister Reg
= State
.AllocateReg(FPR64List
)) {
5134 State
.addLoc(CCValAssign::getReg(ValNo
, ValVT
, Reg
, LocVT
, LocInfo
));
5139 report_fatal_error("No registers left in GHC calling convention");
5143 // Transform physical registers into virtual registers.
5144 SDValue
LoongArchTargetLowering::LowerFormalArguments(
5145 SDValue Chain
, CallingConv::ID CallConv
, bool IsVarArg
,
5146 const SmallVectorImpl
<ISD::InputArg
> &Ins
, const SDLoc
&DL
,
5147 SelectionDAG
&DAG
, SmallVectorImpl
<SDValue
> &InVals
) const {
5149 MachineFunction
&MF
= DAG
.getMachineFunction();
5153 llvm_unreachable("Unsupported calling convention");
5154 case CallingConv::C
:
5155 case CallingConv::Fast
:
5157 case CallingConv::GHC
:
5158 if (!MF
.getSubtarget().hasFeature(LoongArch::FeatureBasicF
) ||
5159 !MF
.getSubtarget().hasFeature(LoongArch::FeatureBasicD
))
5161 "GHC calling convention requires the F and D extensions");
5164 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
5165 MVT GRLenVT
= Subtarget
.getGRLenVT();
5166 unsigned GRLenInBytes
= Subtarget
.getGRLen() / 8;
5167 // Used with varargs to acumulate store chains.
5168 std::vector
<SDValue
> OutChains
;
5170 // Assign locations to all of the incoming arguments.
5171 SmallVector
<CCValAssign
> ArgLocs
;
5172 CCState
CCInfo(CallConv
, IsVarArg
, MF
, ArgLocs
, *DAG
.getContext());
5174 if (CallConv
== CallingConv::GHC
)
5175 CCInfo
.AnalyzeFormalArguments(Ins
, CC_LoongArch_GHC
);
5177 analyzeInputArgs(MF
, CCInfo
, Ins
, /*IsRet=*/false, CC_LoongArch
);
5179 for (unsigned i
= 0, e
= ArgLocs
.size(); i
!= e
; ++i
) {
5180 CCValAssign
&VA
= ArgLocs
[i
];
5183 ArgValue
= unpackFromRegLoc(DAG
, Chain
, VA
, DL
, Ins
[i
], *this);
5185 ArgValue
= unpackFromMemLoc(DAG
, Chain
, VA
, DL
);
5186 if (VA
.getLocInfo() == CCValAssign::Indirect
) {
5187 // If the original argument was split and passed by reference, we need to
5188 // load all parts of it here (using the same address).
5189 InVals
.push_back(DAG
.getLoad(VA
.getValVT(), DL
, Chain
, ArgValue
,
5190 MachinePointerInfo()));
5191 unsigned ArgIndex
= Ins
[i
].OrigArgIndex
;
5192 unsigned ArgPartOffset
= Ins
[i
].PartOffset
;
5193 assert(ArgPartOffset
== 0);
5194 while (i
+ 1 != e
&& Ins
[i
+ 1].OrigArgIndex
== ArgIndex
) {
5195 CCValAssign
&PartVA
= ArgLocs
[i
+ 1];
5196 unsigned PartOffset
= Ins
[i
+ 1].PartOffset
- ArgPartOffset
;
5197 SDValue Offset
= DAG
.getIntPtrConstant(PartOffset
, DL
);
5198 SDValue Address
= DAG
.getNode(ISD::ADD
, DL
, PtrVT
, ArgValue
, Offset
);
5199 InVals
.push_back(DAG
.getLoad(PartVA
.getValVT(), DL
, Chain
, Address
,
5200 MachinePointerInfo()));
5205 InVals
.push_back(ArgValue
);
5209 ArrayRef
<MCPhysReg
> ArgRegs
= ArrayRef(ArgGPRs
);
5210 unsigned Idx
= CCInfo
.getFirstUnallocated(ArgRegs
);
5211 const TargetRegisterClass
*RC
= &LoongArch::GPRRegClass
;
5212 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
5213 MachineRegisterInfo
&RegInfo
= MF
.getRegInfo();
5214 auto *LoongArchFI
= MF
.getInfo
<LoongArchMachineFunctionInfo
>();
5216 // Offset of the first variable argument from stack pointer, and size of
5217 // the vararg save area. For now, the varargs save area is either zero or
5218 // large enough to hold a0-a7.
5219 int VaArgOffset
, VarArgsSaveSize
;
5221 // If all registers are allocated, then all varargs must be passed on the
5222 // stack and we don't need to save any argregs.
5223 if (ArgRegs
.size() == Idx
) {
5224 VaArgOffset
= CCInfo
.getStackSize();
5225 VarArgsSaveSize
= 0;
5227 VarArgsSaveSize
= GRLenInBytes
* (ArgRegs
.size() - Idx
);
5228 VaArgOffset
= -VarArgsSaveSize
;
5231 // Record the frame index of the first variable argument
5232 // which is a value necessary to VASTART.
5233 int FI
= MFI
.CreateFixedObject(GRLenInBytes
, VaArgOffset
, true);
5234 LoongArchFI
->setVarArgsFrameIndex(FI
);
5236 // If saving an odd number of registers then create an extra stack slot to
5237 // ensure that the frame pointer is 2*GRLen-aligned, which in turn ensures
5238 // offsets to even-numbered registered remain 2*GRLen-aligned.
5240 MFI
.CreateFixedObject(GRLenInBytes
, VaArgOffset
- (int)GRLenInBytes
,
5242 VarArgsSaveSize
+= GRLenInBytes
;
5245 // Copy the integer registers that may have been used for passing varargs
5246 // to the vararg save area.
5247 for (unsigned I
= Idx
; I
< ArgRegs
.size();
5248 ++I
, VaArgOffset
+= GRLenInBytes
) {
5249 const Register Reg
= RegInfo
.createVirtualRegister(RC
);
5250 RegInfo
.addLiveIn(ArgRegs
[I
], Reg
);
5251 SDValue ArgValue
= DAG
.getCopyFromReg(Chain
, DL
, Reg
, GRLenVT
);
5252 FI
= MFI
.CreateFixedObject(GRLenInBytes
, VaArgOffset
, true);
5253 SDValue PtrOff
= DAG
.getFrameIndex(FI
, getPointerTy(DAG
.getDataLayout()));
5254 SDValue Store
= DAG
.getStore(Chain
, DL
, ArgValue
, PtrOff
,
5255 MachinePointerInfo::getFixedStack(MF
, FI
));
5256 cast
<StoreSDNode
>(Store
.getNode())
5258 ->setValue((Value
*)nullptr);
5259 OutChains
.push_back(Store
);
5261 LoongArchFI
->setVarArgsSaveSize(VarArgsSaveSize
);
5264 // All stores are grouped in one node to allow the matching between
5265 // the size of Ins and InVals. This only happens for vararg functions.
5266 if (!OutChains
.empty()) {
5267 OutChains
.push_back(Chain
);
5268 Chain
= DAG
.getNode(ISD::TokenFactor
, DL
, MVT::Other
, OutChains
);
5274 bool LoongArchTargetLowering::mayBeEmittedAsTailCall(const CallInst
*CI
) const {
5275 return CI
->isTailCall();
5278 // Check if the return value is used as only a return value, as otherwise
5279 // we can't perform a tail-call.
5280 bool LoongArchTargetLowering::isUsedByReturnOnly(SDNode
*N
,
5281 SDValue
&Chain
) const {
5282 if (N
->getNumValues() != 1)
5284 if (!N
->hasNUsesOfValue(1, 0))
5287 SDNode
*Copy
= *N
->use_begin();
5288 if (Copy
->getOpcode() != ISD::CopyToReg
)
5291 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
5292 // isn't safe to perform a tail call.
5293 if (Copy
->getGluedNode())
5296 // The copy must be used by a LoongArchISD::RET, and nothing else.
5297 bool HasRet
= false;
5298 for (SDNode
*Node
: Copy
->uses()) {
5299 if (Node
->getOpcode() != LoongArchISD::RET
)
5307 Chain
= Copy
->getOperand(0);
5311 // Check whether the call is eligible for tail call optimization.
5312 bool LoongArchTargetLowering::isEligibleForTailCallOptimization(
5313 CCState
&CCInfo
, CallLoweringInfo
&CLI
, MachineFunction
&MF
,
5314 const SmallVectorImpl
<CCValAssign
> &ArgLocs
) const {
5316 auto CalleeCC
= CLI
.CallConv
;
5317 auto &Outs
= CLI
.Outs
;
5318 auto &Caller
= MF
.getFunction();
5319 auto CallerCC
= Caller
.getCallingConv();
5321 // Do not tail call opt if the stack is used to pass parameters.
5322 if (CCInfo
.getStackSize() != 0)
5325 // Do not tail call opt if any parameters need to be passed indirectly.
5326 for (auto &VA
: ArgLocs
)
5327 if (VA
.getLocInfo() == CCValAssign::Indirect
)
5330 // Do not tail call opt if either caller or callee uses struct return
5332 auto IsCallerStructRet
= Caller
.hasStructRetAttr();
5333 auto IsCalleeStructRet
= Outs
.empty() ? false : Outs
[0].Flags
.isSRet();
5334 if (IsCallerStructRet
|| IsCalleeStructRet
)
5337 // Do not tail call opt if either the callee or caller has a byval argument.
5338 for (auto &Arg
: Outs
)
5339 if (Arg
.Flags
.isByVal())
5342 // The callee has to preserve all registers the caller needs to preserve.
5343 const LoongArchRegisterInfo
*TRI
= Subtarget
.getRegisterInfo();
5344 const uint32_t *CallerPreserved
= TRI
->getCallPreservedMask(MF
, CallerCC
);
5345 if (CalleeCC
!= CallerCC
) {
5346 const uint32_t *CalleePreserved
= TRI
->getCallPreservedMask(MF
, CalleeCC
);
5347 if (!TRI
->regmaskSubsetEqual(CallerPreserved
, CalleePreserved
))
5353 static Align
getPrefTypeAlign(EVT VT
, SelectionDAG
&DAG
) {
5354 return DAG
.getDataLayout().getPrefTypeAlign(
5355 VT
.getTypeForEVT(*DAG
.getContext()));
5358 // Lower a call to a callseq_start + CALL + callseq_end chain, and add input
5359 // and output parameter nodes.
5361 LoongArchTargetLowering::LowerCall(CallLoweringInfo
&CLI
,
5362 SmallVectorImpl
<SDValue
> &InVals
) const {
5363 SelectionDAG
&DAG
= CLI
.DAG
;
5365 SmallVectorImpl
<ISD::OutputArg
> &Outs
= CLI
.Outs
;
5366 SmallVectorImpl
<SDValue
> &OutVals
= CLI
.OutVals
;
5367 SmallVectorImpl
<ISD::InputArg
> &Ins
= CLI
.Ins
;
5368 SDValue Chain
= CLI
.Chain
;
5369 SDValue Callee
= CLI
.Callee
;
5370 CallingConv::ID CallConv
= CLI
.CallConv
;
5371 bool IsVarArg
= CLI
.IsVarArg
;
5372 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
5373 MVT GRLenVT
= Subtarget
.getGRLenVT();
5374 bool &IsTailCall
= CLI
.IsTailCall
;
5376 MachineFunction
&MF
= DAG
.getMachineFunction();
5378 // Analyze the operands of the call, assigning locations to each operand.
5379 SmallVector
<CCValAssign
> ArgLocs
;
5380 CCState
ArgCCInfo(CallConv
, IsVarArg
, MF
, ArgLocs
, *DAG
.getContext());
5382 if (CallConv
== CallingConv::GHC
)
5383 ArgCCInfo
.AnalyzeCallOperands(Outs
, CC_LoongArch_GHC
);
5385 analyzeOutputArgs(MF
, ArgCCInfo
, Outs
, /*IsRet=*/false, &CLI
, CC_LoongArch
);
5387 // Check if it's really possible to do a tail call.
5389 IsTailCall
= isEligibleForTailCallOptimization(ArgCCInfo
, CLI
, MF
, ArgLocs
);
5393 else if (CLI
.CB
&& CLI
.CB
->isMustTailCall())
5394 report_fatal_error("failed to perform tail call elimination on a call "
5395 "site marked musttail");
5397 // Get a count of how many bytes are to be pushed on the stack.
5398 unsigned NumBytes
= ArgCCInfo
.getStackSize();
5400 // Create local copies for byval args.
5401 SmallVector
<SDValue
> ByValArgs
;
5402 for (unsigned i
= 0, e
= Outs
.size(); i
!= e
; ++i
) {
5403 ISD::ArgFlagsTy Flags
= Outs
[i
].Flags
;
5404 if (!Flags
.isByVal())
5407 SDValue Arg
= OutVals
[i
];
5408 unsigned Size
= Flags
.getByValSize();
5409 Align Alignment
= Flags
.getNonZeroByValAlign();
5412 MF
.getFrameInfo().CreateStackObject(Size
, Alignment
, /*isSS=*/false);
5413 SDValue FIPtr
= DAG
.getFrameIndex(FI
, getPointerTy(DAG
.getDataLayout()));
5414 SDValue SizeNode
= DAG
.getConstant(Size
, DL
, GRLenVT
);
5416 Chain
= DAG
.getMemcpy(Chain
, DL
, FIPtr
, Arg
, SizeNode
, Alignment
,
5417 /*IsVolatile=*/false,
5418 /*AlwaysInline=*/false, /*CI=*/nullptr, std::nullopt
,
5419 MachinePointerInfo(), MachinePointerInfo());
5420 ByValArgs
.push_back(FIPtr
);
5424 Chain
= DAG
.getCALLSEQ_START(Chain
, NumBytes
, 0, CLI
.DL
);
5426 // Copy argument values to their designated locations.
5427 SmallVector
<std::pair
<Register
, SDValue
>> RegsToPass
;
5428 SmallVector
<SDValue
> MemOpChains
;
5430 for (unsigned i
= 0, j
= 0, e
= ArgLocs
.size(); i
!= e
; ++i
) {
5431 CCValAssign
&VA
= ArgLocs
[i
];
5432 SDValue ArgValue
= OutVals
[i
];
5433 ISD::ArgFlagsTy Flags
= Outs
[i
].Flags
;
5435 // Promote the value if needed.
5436 // For now, only handle fully promoted and indirect arguments.
5437 if (VA
.getLocInfo() == CCValAssign::Indirect
) {
5438 // Store the argument in a stack slot and pass its address.
5440 std::max(getPrefTypeAlign(Outs
[i
].ArgVT
, DAG
),
5441 getPrefTypeAlign(ArgValue
.getValueType(), DAG
));
5442 TypeSize StoredSize
= ArgValue
.getValueType().getStoreSize();
5443 // If the original argument was split and passed by reference, we need to
5444 // store the required parts of it here (and pass just one address).
5445 unsigned ArgIndex
= Outs
[i
].OrigArgIndex
;
5446 unsigned ArgPartOffset
= Outs
[i
].PartOffset
;
5447 assert(ArgPartOffset
== 0);
5448 // Calculate the total size to store. We don't have access to what we're
5449 // actually storing other than performing the loop and collecting the
5451 SmallVector
<std::pair
<SDValue
, SDValue
>> Parts
;
5452 while (i
+ 1 != e
&& Outs
[i
+ 1].OrigArgIndex
== ArgIndex
) {
5453 SDValue PartValue
= OutVals
[i
+ 1];
5454 unsigned PartOffset
= Outs
[i
+ 1].PartOffset
- ArgPartOffset
;
5455 SDValue Offset
= DAG
.getIntPtrConstant(PartOffset
, DL
);
5456 EVT PartVT
= PartValue
.getValueType();
5458 StoredSize
+= PartVT
.getStoreSize();
5459 StackAlign
= std::max(StackAlign
, getPrefTypeAlign(PartVT
, DAG
));
5460 Parts
.push_back(std::make_pair(PartValue
, Offset
));
5463 SDValue SpillSlot
= DAG
.CreateStackTemporary(StoredSize
, StackAlign
);
5464 int FI
= cast
<FrameIndexSDNode
>(SpillSlot
)->getIndex();
5465 MemOpChains
.push_back(
5466 DAG
.getStore(Chain
, DL
, ArgValue
, SpillSlot
,
5467 MachinePointerInfo::getFixedStack(MF
, FI
)));
5468 for (const auto &Part
: Parts
) {
5469 SDValue PartValue
= Part
.first
;
5470 SDValue PartOffset
= Part
.second
;
5472 DAG
.getNode(ISD::ADD
, DL
, PtrVT
, SpillSlot
, PartOffset
);
5473 MemOpChains
.push_back(
5474 DAG
.getStore(Chain
, DL
, PartValue
, Address
,
5475 MachinePointerInfo::getFixedStack(MF
, FI
)));
5477 ArgValue
= SpillSlot
;
5479 ArgValue
= convertValVTToLocVT(DAG
, ArgValue
, VA
, DL
);
5482 // Use local copy if it is a byval arg.
5483 if (Flags
.isByVal())
5484 ArgValue
= ByValArgs
[j
++];
5486 if (VA
.isRegLoc()) {
5487 // Queue up the argument copies and emit them at the end.
5488 RegsToPass
.push_back(std::make_pair(VA
.getLocReg(), ArgValue
));
5490 assert(VA
.isMemLoc() && "Argument not register or memory");
5491 assert(!IsTailCall
&& "Tail call not allowed if stack is used "
5492 "for passing parameters");
5494 // Work out the address of the stack slot.
5495 if (!StackPtr
.getNode())
5496 StackPtr
= DAG
.getCopyFromReg(Chain
, DL
, LoongArch::R3
, PtrVT
);
5498 DAG
.getNode(ISD::ADD
, DL
, PtrVT
, StackPtr
,
5499 DAG
.getIntPtrConstant(VA
.getLocMemOffset(), DL
));
5502 MemOpChains
.push_back(
5503 DAG
.getStore(Chain
, DL
, ArgValue
, Address
, MachinePointerInfo()));
5507 // Join the stores, which are independent of one another.
5508 if (!MemOpChains
.empty())
5509 Chain
= DAG
.getNode(ISD::TokenFactor
, DL
, MVT::Other
, MemOpChains
);
5513 // Build a sequence of copy-to-reg nodes, chained and glued together.
5514 for (auto &Reg
: RegsToPass
) {
5515 Chain
= DAG
.getCopyToReg(Chain
, DL
, Reg
.first
, Reg
.second
, Glue
);
5516 Glue
= Chain
.getValue(1);
5519 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
5520 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
5521 // split it and then direct call can be matched by PseudoCALL.
5522 if (GlobalAddressSDNode
*S
= dyn_cast
<GlobalAddressSDNode
>(Callee
)) {
5523 const GlobalValue
*GV
= S
->getGlobal();
5524 unsigned OpFlags
= getTargetMachine().shouldAssumeDSOLocal(GV
)
5525 ? LoongArchII::MO_CALL
5526 : LoongArchII::MO_CALL_PLT
;
5527 Callee
= DAG
.getTargetGlobalAddress(S
->getGlobal(), DL
, PtrVT
, 0, OpFlags
);
5528 } else if (ExternalSymbolSDNode
*S
= dyn_cast
<ExternalSymbolSDNode
>(Callee
)) {
5529 unsigned OpFlags
= getTargetMachine().shouldAssumeDSOLocal(nullptr)
5530 ? LoongArchII::MO_CALL
5531 : LoongArchII::MO_CALL_PLT
;
5532 Callee
= DAG
.getTargetExternalSymbol(S
->getSymbol(), PtrVT
, OpFlags
);
5535 // The first call operand is the chain and the second is the target address.
5536 SmallVector
<SDValue
> Ops
;
5537 Ops
.push_back(Chain
);
5538 Ops
.push_back(Callee
);
5540 // Add argument registers to the end of the list so that they are
5541 // known live into the call.
5542 for (auto &Reg
: RegsToPass
)
5543 Ops
.push_back(DAG
.getRegister(Reg
.first
, Reg
.second
.getValueType()));
5546 // Add a register mask operand representing the call-preserved registers.
5547 const TargetRegisterInfo
*TRI
= Subtarget
.getRegisterInfo();
5548 const uint32_t *Mask
= TRI
->getCallPreservedMask(MF
, CallConv
);
5549 assert(Mask
&& "Missing call preserved mask for calling convention");
5550 Ops
.push_back(DAG
.getRegisterMask(Mask
));
5553 // Glue the call to the argument copies, if any.
5555 Ops
.push_back(Glue
);
5558 SDVTList NodeTys
= DAG
.getVTList(MVT::Other
, MVT::Glue
);
5560 switch (DAG
.getTarget().getCodeModel()) {
5562 report_fatal_error("Unsupported code model");
5563 case CodeModel::Small
:
5564 Op
= IsTailCall
? LoongArchISD::TAIL
: LoongArchISD::CALL
;
5566 case CodeModel::Medium
:
5567 assert(Subtarget
.is64Bit() && "Medium code model requires LA64");
5568 Op
= IsTailCall
? LoongArchISD::TAIL_MEDIUM
: LoongArchISD::CALL_MEDIUM
;
5570 case CodeModel::Large
:
5571 assert(Subtarget
.is64Bit() && "Large code model requires LA64");
5572 Op
= IsTailCall
? LoongArchISD::TAIL_LARGE
: LoongArchISD::CALL_LARGE
;
5577 MF
.getFrameInfo().setHasTailCall();
5578 SDValue Ret
= DAG
.getNode(Op
, DL
, NodeTys
, Ops
);
5579 DAG
.addNoMergeSiteInfo(Ret
.getNode(), CLI
.NoMerge
);
5583 Chain
= DAG
.getNode(Op
, DL
, NodeTys
, Ops
);
5584 DAG
.addNoMergeSiteInfo(Chain
.getNode(), CLI
.NoMerge
);
5585 Glue
= Chain
.getValue(1);
5587 // Mark the end of the call, which is glued to the call itself.
5588 Chain
= DAG
.getCALLSEQ_END(Chain
, NumBytes
, 0, Glue
, DL
);
5589 Glue
= Chain
.getValue(1);
5591 // Assign locations to each value returned by this call.
5592 SmallVector
<CCValAssign
> RVLocs
;
5593 CCState
RetCCInfo(CallConv
, IsVarArg
, MF
, RVLocs
, *DAG
.getContext());
5594 analyzeInputArgs(MF
, RetCCInfo
, Ins
, /*IsRet=*/true, CC_LoongArch
);
5596 // Copy all of the result registers out of their specified physreg.
5597 for (auto &VA
: RVLocs
) {
5598 // Copy the value out.
5600 DAG
.getCopyFromReg(Chain
, DL
, VA
.getLocReg(), VA
.getLocVT(), Glue
);
5601 // Glue the RetValue to the end of the call sequence.
5602 Chain
= RetValue
.getValue(1);
5603 Glue
= RetValue
.getValue(2);
5605 RetValue
= convertLocVTToValVT(DAG
, RetValue
, VA
, DL
);
5607 InVals
.push_back(RetValue
);
5613 bool LoongArchTargetLowering::CanLowerReturn(
5614 CallingConv::ID CallConv
, MachineFunction
&MF
, bool IsVarArg
,
5615 const SmallVectorImpl
<ISD::OutputArg
> &Outs
, LLVMContext
&Context
) const {
5616 SmallVector
<CCValAssign
> RVLocs
;
5617 CCState
CCInfo(CallConv
, IsVarArg
, MF
, RVLocs
, Context
);
5619 for (unsigned i
= 0, e
= Outs
.size(); i
!= e
; ++i
) {
5620 LoongArchABI::ABI ABI
=
5621 MF
.getSubtarget
<LoongArchSubtarget
>().getTargetABI();
5622 if (CC_LoongArch(MF
.getDataLayout(), ABI
, i
, Outs
[i
].VT
, CCValAssign::Full
,
5623 Outs
[i
].Flags
, CCInfo
, /*IsFixed=*/true, /*IsRet=*/true,
5630 SDValue
LoongArchTargetLowering::LowerReturn(
5631 SDValue Chain
, CallingConv::ID CallConv
, bool IsVarArg
,
5632 const SmallVectorImpl
<ISD::OutputArg
> &Outs
,
5633 const SmallVectorImpl
<SDValue
> &OutVals
, const SDLoc
&DL
,
5634 SelectionDAG
&DAG
) const {
5635 // Stores the assignment of the return value to a location.
5636 SmallVector
<CCValAssign
> RVLocs
;
5638 // Info about the registers and stack slot.
5639 CCState
CCInfo(CallConv
, IsVarArg
, DAG
.getMachineFunction(), RVLocs
,
5642 analyzeOutputArgs(DAG
.getMachineFunction(), CCInfo
, Outs
, /*IsRet=*/true,
5643 nullptr, CC_LoongArch
);
5644 if (CallConv
== CallingConv::GHC
&& !RVLocs
.empty())
5645 report_fatal_error("GHC functions return void only");
5647 SmallVector
<SDValue
, 4> RetOps(1, Chain
);
5649 // Copy the result values into the output registers.
5650 for (unsigned i
= 0, e
= RVLocs
.size(); i
< e
; ++i
) {
5651 CCValAssign
&VA
= RVLocs
[i
];
5652 assert(VA
.isRegLoc() && "Can only return in registers!");
5654 // Handle a 'normal' return.
5655 SDValue Val
= convertValVTToLocVT(DAG
, OutVals
[i
], VA
, DL
);
5656 Chain
= DAG
.getCopyToReg(Chain
, DL
, VA
.getLocReg(), Val
, Glue
);
5658 // Guarantee that all emitted copies are stuck together.
5659 Glue
= Chain
.getValue(1);
5660 RetOps
.push_back(DAG
.getRegister(VA
.getLocReg(), VA
.getLocVT()));
5663 RetOps
[0] = Chain
; // Update chain.
5665 // Add the glue node if we have it.
5667 RetOps
.push_back(Glue
);
5669 return DAG
.getNode(LoongArchISD::RET
, DL
, MVT::Other
, RetOps
);
5672 bool LoongArchTargetLowering::isFPImmVLDILegal(const APFloat
&Imm
,
5674 if (!Subtarget
.hasExtLSX())
5677 if (VT
== MVT::f32
) {
5678 uint64_t masked
= Imm
.bitcastToAPInt().getZExtValue() & 0x7e07ffff;
5679 return (masked
== 0x3e000000 || masked
== 0x40000000);
5682 if (VT
== MVT::f64
) {
5683 uint64_t masked
= Imm
.bitcastToAPInt().getZExtValue() & 0x7fc0ffffffffffff;
5684 return (masked
== 0x3fc0000000000000 || masked
== 0x4000000000000000);
5690 bool LoongArchTargetLowering::isFPImmLegal(const APFloat
&Imm
, EVT VT
,
5691 bool ForCodeSize
) const {
5692 // TODO: Maybe need more checks here after vector extension is supported.
5693 if (VT
== MVT::f32
&& !Subtarget
.hasBasicF())
5695 if (VT
== MVT::f64
&& !Subtarget
.hasBasicD())
5697 return (Imm
.isZero() || Imm
.isExactlyValue(1.0) || isFPImmVLDILegal(Imm
, VT
));
5700 bool LoongArchTargetLowering::isCheapToSpeculateCttz(Type
*) const {
5704 bool LoongArchTargetLowering::isCheapToSpeculateCtlz(Type
*) const {
5708 bool LoongArchTargetLowering::shouldInsertFencesForAtomic(
5709 const Instruction
*I
) const {
5710 if (!Subtarget
.is64Bit())
5711 return isa
<LoadInst
>(I
) || isa
<StoreInst
>(I
);
5713 if (isa
<LoadInst
>(I
))
5716 // On LA64, atomic store operations with IntegerBitWidth of 32 and 64 do not
5717 // require fences beacuse we can use amswap_db.[w/d].
5718 Type
*Ty
= I
->getOperand(0)->getType();
5719 if (isa
<StoreInst
>(I
) && Ty
->isIntegerTy()) {
5720 unsigned Size
= Ty
->getIntegerBitWidth();
5721 return (Size
== 8 || Size
== 16);
5727 EVT
LoongArchTargetLowering::getSetCCResultType(const DataLayout
&DL
,
5728 LLVMContext
&Context
,
5731 return getPointerTy(DL
);
5732 return VT
.changeVectorElementTypeToInteger();
5735 bool LoongArchTargetLowering::hasAndNot(SDValue Y
) const {
5736 // TODO: Support vectors.
5737 return Y
.getValueType().isScalarInteger() && !isa
<ConstantSDNode
>(Y
);
5740 bool LoongArchTargetLowering::getTgtMemIntrinsic(IntrinsicInfo
&Info
,
5742 MachineFunction
&MF
,
5743 unsigned Intrinsic
) const {
5744 switch (Intrinsic
) {
5747 case Intrinsic::loongarch_masked_atomicrmw_xchg_i32
:
5748 case Intrinsic::loongarch_masked_atomicrmw_add_i32
:
5749 case Intrinsic::loongarch_masked_atomicrmw_sub_i32
:
5750 case Intrinsic::loongarch_masked_atomicrmw_nand_i32
:
5751 Info
.opc
= ISD::INTRINSIC_W_CHAIN
;
5752 Info
.memVT
= MVT::i32
;
5753 Info
.ptrVal
= I
.getArgOperand(0);
5755 Info
.align
= Align(4);
5756 Info
.flags
= MachineMemOperand::MOLoad
| MachineMemOperand::MOStore
|
5757 MachineMemOperand::MOVolatile
;
5759 // TODO: Add more Intrinsics later.
5763 // When -mlamcas is enabled, MinCmpXchgSizeInBits will be set to 8,
5764 // atomicrmw and/or/xor operations with operands less than 32 bits cannot be
5765 // expanded to am{and/or/xor}[_db].w through AtomicExpandPass. To prevent
5766 // regression, we need to implement it manually.
5767 void LoongArchTargetLowering::emitExpandAtomicRMW(AtomicRMWInst
*AI
) const {
5768 AtomicRMWInst::BinOp Op
= AI
->getOperation();
5770 assert((Op
== AtomicRMWInst::Or
|| Op
== AtomicRMWInst::Xor
||
5771 Op
== AtomicRMWInst::And
) &&
5772 "Unable to expand");
5773 unsigned MinWordSize
= 4;
5775 IRBuilder
<> Builder(AI
);
5776 LLVMContext
&Ctx
= Builder
.getContext();
5777 const DataLayout
&DL
= AI
->getDataLayout();
5778 Type
*ValueType
= AI
->getType();
5779 Type
*WordType
= Type::getIntNTy(Ctx
, MinWordSize
* 8);
5781 Value
*Addr
= AI
->getPointerOperand();
5782 PointerType
*PtrTy
= cast
<PointerType
>(Addr
->getType());
5783 IntegerType
*IntTy
= DL
.getIndexType(Ctx
, PtrTy
->getAddressSpace());
5785 Value
*AlignedAddr
= Builder
.CreateIntrinsic(
5786 Intrinsic::ptrmask
, {PtrTy
, IntTy
},
5787 {Addr
, ConstantInt::get(IntTy
, ~(uint64_t)(MinWordSize
- 1))}, nullptr,
5790 Value
*AddrInt
= Builder
.CreatePtrToInt(Addr
, IntTy
);
5791 Value
*PtrLSB
= Builder
.CreateAnd(AddrInt
, MinWordSize
- 1, "PtrLSB");
5792 Value
*ShiftAmt
= Builder
.CreateShl(PtrLSB
, 3);
5793 ShiftAmt
= Builder
.CreateTrunc(ShiftAmt
, WordType
, "ShiftAmt");
5794 Value
*Mask
= Builder
.CreateShl(
5795 ConstantInt::get(WordType
,
5796 (1 << (DL
.getTypeStoreSize(ValueType
) * 8)) - 1),
5798 Value
*Inv_Mask
= Builder
.CreateNot(Mask
, "Inv_Mask");
5799 Value
*ValOperand_Shifted
=
5800 Builder
.CreateShl(Builder
.CreateZExt(AI
->getValOperand(), WordType
),
5801 ShiftAmt
, "ValOperand_Shifted");
5803 if (Op
== AtomicRMWInst::And
)
5804 NewOperand
= Builder
.CreateOr(ValOperand_Shifted
, Inv_Mask
, "AndOperand");
5806 NewOperand
= ValOperand_Shifted
;
5808 AtomicRMWInst
*NewAI
=
5809 Builder
.CreateAtomicRMW(Op
, AlignedAddr
, NewOperand
, Align(MinWordSize
),
5810 AI
->getOrdering(), AI
->getSyncScopeID());
5812 Value
*Shift
= Builder
.CreateLShr(NewAI
, ShiftAmt
, "shifted");
5813 Value
*Trunc
= Builder
.CreateTrunc(Shift
, ValueType
, "extracted");
5814 Value
*FinalOldResult
= Builder
.CreateBitCast(Trunc
, ValueType
);
5815 AI
->replaceAllUsesWith(FinalOldResult
);
5816 AI
->eraseFromParent();
5819 TargetLowering::AtomicExpansionKind
5820 LoongArchTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst
*AI
) const {
5821 // TODO: Add more AtomicRMWInst that needs to be extended.
5823 // Since floating-point operation requires a non-trivial set of data
5824 // operations, use CmpXChg to expand.
5825 if (AI
->isFloatingPointOperation() ||
5826 AI
->getOperation() == AtomicRMWInst::UIncWrap
||
5827 AI
->getOperation() == AtomicRMWInst::UDecWrap
||
5828 AI
->getOperation() == AtomicRMWInst::USubCond
||
5829 AI
->getOperation() == AtomicRMWInst::USubSat
)
5830 return AtomicExpansionKind::CmpXChg
;
5832 if (Subtarget
.hasLAM_BH() && Subtarget
.is64Bit() &&
5833 (AI
->getOperation() == AtomicRMWInst::Xchg
||
5834 AI
->getOperation() == AtomicRMWInst::Add
||
5835 AI
->getOperation() == AtomicRMWInst::Sub
)) {
5836 return AtomicExpansionKind::None
;
5839 unsigned Size
= AI
->getType()->getPrimitiveSizeInBits();
5840 if (Subtarget
.hasLAMCAS()) {
5841 if (Size
< 32 && (AI
->getOperation() == AtomicRMWInst::And
||
5842 AI
->getOperation() == AtomicRMWInst::Or
||
5843 AI
->getOperation() == AtomicRMWInst::Xor
))
5844 return AtomicExpansionKind::Expand
;
5845 if (AI
->getOperation() == AtomicRMWInst::Nand
|| Size
< 32)
5846 return AtomicExpansionKind::CmpXChg
;
5849 if (Size
== 8 || Size
== 16)
5850 return AtomicExpansionKind::MaskedIntrinsic
;
5851 return AtomicExpansionKind::None
;
5854 static Intrinsic::ID
5855 getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen
,
5856 AtomicRMWInst::BinOp BinOp
) {
5860 llvm_unreachable("Unexpected AtomicRMW BinOp");
5861 case AtomicRMWInst::Xchg
:
5862 return Intrinsic::loongarch_masked_atomicrmw_xchg_i64
;
5863 case AtomicRMWInst::Add
:
5864 return Intrinsic::loongarch_masked_atomicrmw_add_i64
;
5865 case AtomicRMWInst::Sub
:
5866 return Intrinsic::loongarch_masked_atomicrmw_sub_i64
;
5867 case AtomicRMWInst::Nand
:
5868 return Intrinsic::loongarch_masked_atomicrmw_nand_i64
;
5869 case AtomicRMWInst::UMax
:
5870 return Intrinsic::loongarch_masked_atomicrmw_umax_i64
;
5871 case AtomicRMWInst::UMin
:
5872 return Intrinsic::loongarch_masked_atomicrmw_umin_i64
;
5873 case AtomicRMWInst::Max
:
5874 return Intrinsic::loongarch_masked_atomicrmw_max_i64
;
5875 case AtomicRMWInst::Min
:
5876 return Intrinsic::loongarch_masked_atomicrmw_min_i64
;
5877 // TODO: support other AtomicRMWInst.
5884 llvm_unreachable("Unexpected AtomicRMW BinOp");
5885 case AtomicRMWInst::Xchg
:
5886 return Intrinsic::loongarch_masked_atomicrmw_xchg_i32
;
5887 case AtomicRMWInst::Add
:
5888 return Intrinsic::loongarch_masked_atomicrmw_add_i32
;
5889 case AtomicRMWInst::Sub
:
5890 return Intrinsic::loongarch_masked_atomicrmw_sub_i32
;
5891 case AtomicRMWInst::Nand
:
5892 return Intrinsic::loongarch_masked_atomicrmw_nand_i32
;
5893 // TODO: support other AtomicRMWInst.
5897 llvm_unreachable("Unexpected GRLen\n");
5900 TargetLowering::AtomicExpansionKind
5901 LoongArchTargetLowering::shouldExpandAtomicCmpXchgInIR(
5902 AtomicCmpXchgInst
*CI
) const {
5904 if (Subtarget
.hasLAMCAS())
5905 return AtomicExpansionKind::None
;
5907 unsigned Size
= CI
->getCompareOperand()->getType()->getPrimitiveSizeInBits();
5908 if (Size
== 8 || Size
== 16)
5909 return AtomicExpansionKind::MaskedIntrinsic
;
5910 return AtomicExpansionKind::None
;
5913 Value
*LoongArchTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
5914 IRBuilderBase
&Builder
, AtomicCmpXchgInst
*CI
, Value
*AlignedAddr
,
5915 Value
*CmpVal
, Value
*NewVal
, Value
*Mask
, AtomicOrdering Ord
) const {
5916 AtomicOrdering FailOrd
= CI
->getFailureOrdering();
5917 Value
*FailureOrdering
=
5918 Builder
.getIntN(Subtarget
.getGRLen(), static_cast<uint64_t>(FailOrd
));
5920 // TODO: Support cmpxchg on LA32.
5921 Intrinsic::ID CmpXchgIntrID
= Intrinsic::loongarch_masked_cmpxchg_i64
;
5922 CmpVal
= Builder
.CreateSExt(CmpVal
, Builder
.getInt64Ty());
5923 NewVal
= Builder
.CreateSExt(NewVal
, Builder
.getInt64Ty());
5924 Mask
= Builder
.CreateSExt(Mask
, Builder
.getInt64Ty());
5925 Type
*Tys
[] = {AlignedAddr
->getType()};
5926 Value
*Result
= Builder
.CreateIntrinsic(
5927 CmpXchgIntrID
, Tys
, {AlignedAddr
, CmpVal
, NewVal
, Mask
, FailureOrdering
});
5928 Result
= Builder
.CreateTrunc(Result
, Builder
.getInt32Ty());
5932 Value
*LoongArchTargetLowering::emitMaskedAtomicRMWIntrinsic(
5933 IRBuilderBase
&Builder
, AtomicRMWInst
*AI
, Value
*AlignedAddr
, Value
*Incr
,
5934 Value
*Mask
, Value
*ShiftAmt
, AtomicOrdering Ord
) const {
5935 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
5936 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
5937 // mask, as this produces better code than the LL/SC loop emitted by
5938 // int_loongarch_masked_atomicrmw_xchg.
5939 if (AI
->getOperation() == AtomicRMWInst::Xchg
&&
5940 isa
<ConstantInt
>(AI
->getValOperand())) {
5941 ConstantInt
*CVal
= cast
<ConstantInt
>(AI
->getValOperand());
5943 return Builder
.CreateAtomicRMW(AtomicRMWInst::And
, AlignedAddr
,
5944 Builder
.CreateNot(Mask
, "Inv_Mask"),
5945 AI
->getAlign(), Ord
);
5946 if (CVal
->isMinusOne())
5947 return Builder
.CreateAtomicRMW(AtomicRMWInst::Or
, AlignedAddr
, Mask
,
5948 AI
->getAlign(), Ord
);
5951 unsigned GRLen
= Subtarget
.getGRLen();
5953 Builder
.getIntN(GRLen
, static_cast<uint64_t>(AI
->getOrdering()));
5954 Type
*Tys
[] = {AlignedAddr
->getType()};
5955 Function
*LlwOpScwLoop
= Intrinsic::getOrInsertDeclaration(
5957 getIntrinsicForMaskedAtomicRMWBinOp(GRLen
, AI
->getOperation()), Tys
);
5960 Incr
= Builder
.CreateSExt(Incr
, Builder
.getInt64Ty());
5961 Mask
= Builder
.CreateSExt(Mask
, Builder
.getInt64Ty());
5962 ShiftAmt
= Builder
.CreateSExt(ShiftAmt
, Builder
.getInt64Ty());
5967 // Must pass the shift amount needed to sign extend the loaded value prior
5968 // to performing a signed comparison for min/max. ShiftAmt is the number of
5969 // bits to shift the value into position. Pass GRLen-ShiftAmt-ValWidth, which
5970 // is the number of bits to left+right shift the value in order to
5972 if (AI
->getOperation() == AtomicRMWInst::Min
||
5973 AI
->getOperation() == AtomicRMWInst::Max
) {
5974 const DataLayout
&DL
= AI
->getDataLayout();
5976 DL
.getTypeStoreSizeInBits(AI
->getValOperand()->getType());
5978 Builder
.CreateSub(Builder
.getIntN(GRLen
, GRLen
- ValWidth
), ShiftAmt
);
5979 Result
= Builder
.CreateCall(LlwOpScwLoop
,
5980 {AlignedAddr
, Incr
, Mask
, SextShamt
, Ordering
});
5983 Builder
.CreateCall(LlwOpScwLoop
, {AlignedAddr
, Incr
, Mask
, Ordering
});
5987 Result
= Builder
.CreateTrunc(Result
, Builder
.getInt32Ty());
5991 bool LoongArchTargetLowering::isFMAFasterThanFMulAndFAdd(
5992 const MachineFunction
&MF
, EVT VT
) const {
5993 VT
= VT
.getScalarType();
5998 switch (VT
.getSimpleVT().SimpleTy
) {
6009 Register
LoongArchTargetLowering::getExceptionPointerRegister(
6010 const Constant
*PersonalityFn
) const {
6011 return LoongArch::R4
;
6014 Register
LoongArchTargetLowering::getExceptionSelectorRegister(
6015 const Constant
*PersonalityFn
) const {
6016 return LoongArch::R5
;
6019 //===----------------------------------------------------------------------===//
6020 // Target Optimization Hooks
6021 //===----------------------------------------------------------------------===//
6023 static int getEstimateRefinementSteps(EVT VT
,
6024 const LoongArchSubtarget
&Subtarget
) {
6025 // Feature FRECIPE instrucions relative accuracy is 2^-14.
6026 // IEEE float has 23 digits and double has 52 digits.
6027 int RefinementSteps
= VT
.getScalarType() == MVT::f64
? 2 : 1;
6028 return RefinementSteps
;
6031 SDValue
LoongArchTargetLowering::getSqrtEstimate(SDValue Operand
,
6032 SelectionDAG
&DAG
, int Enabled
,
6033 int &RefinementSteps
,
6034 bool &UseOneConstNR
,
6035 bool Reciprocal
) const {
6036 if (Subtarget
.hasFrecipe()) {
6038 EVT VT
= Operand
.getValueType();
6040 if (VT
== MVT::f32
|| (VT
== MVT::f64
&& Subtarget
.hasBasicD()) ||
6041 (VT
== MVT::v4f32
&& Subtarget
.hasExtLSX()) ||
6042 (VT
== MVT::v2f64
&& Subtarget
.hasExtLSX()) ||
6043 (VT
== MVT::v8f32
&& Subtarget
.hasExtLASX()) ||
6044 (VT
== MVT::v4f64
&& Subtarget
.hasExtLASX())) {
6046 if (RefinementSteps
== ReciprocalEstimate::Unspecified
)
6047 RefinementSteps
= getEstimateRefinementSteps(VT
, Subtarget
);
6049 SDValue Estimate
= DAG
.getNode(LoongArchISD::FRSQRTE
, DL
, VT
, Operand
);
6051 Estimate
= DAG
.getNode(ISD::FMUL
, DL
, VT
, Operand
, Estimate
);
6060 SDValue
LoongArchTargetLowering::getRecipEstimate(SDValue Operand
,
6063 int &RefinementSteps
) const {
6064 if (Subtarget
.hasFrecipe()) {
6066 EVT VT
= Operand
.getValueType();
6068 if (VT
== MVT::f32
|| (VT
== MVT::f64
&& Subtarget
.hasBasicD()) ||
6069 (VT
== MVT::v4f32
&& Subtarget
.hasExtLSX()) ||
6070 (VT
== MVT::v2f64
&& Subtarget
.hasExtLSX()) ||
6071 (VT
== MVT::v8f32
&& Subtarget
.hasExtLASX()) ||
6072 (VT
== MVT::v4f64
&& Subtarget
.hasExtLASX())) {
6074 if (RefinementSteps
== ReciprocalEstimate::Unspecified
)
6075 RefinementSteps
= getEstimateRefinementSteps(VT
, Subtarget
);
6077 return DAG
.getNode(LoongArchISD::FRECIPE
, DL
, VT
, Operand
);
6084 //===----------------------------------------------------------------------===//
6085 // LoongArch Inline Assembly Support
6086 //===----------------------------------------------------------------------===//
6088 LoongArchTargetLowering::ConstraintType
6089 LoongArchTargetLowering::getConstraintType(StringRef Constraint
) const {
6090 // LoongArch specific constraints in GCC: config/loongarch/constraints.md
6092 // 'f': A floating-point register (if available).
6093 // 'k': A memory operand whose address is formed by a base register and
6094 // (optionally scaled) index register.
6095 // 'l': A signed 16-bit constant.
6096 // 'm': A memory operand whose address is formed by a base register and
6097 // offset that is suitable for use in instructions with the same
6098 // addressing mode as st.w and ld.w.
6099 // 'I': A signed 12-bit constant (for arithmetic instructions).
6100 // 'J': Integer zero.
6101 // 'K': An unsigned 12-bit constant (for logic instructions).
6102 // "ZB": An address that is held in a general-purpose register. The offset is
6104 // "ZC": A memory operand whose address is formed by a base register and
6105 // offset that is suitable for use in instructions with the same
6106 // addressing mode as ll.w and sc.w.
6107 if (Constraint
.size() == 1) {
6108 switch (Constraint
[0]) {
6112 return C_RegisterClass
;
6123 if (Constraint
== "ZC" || Constraint
== "ZB")
6126 // 'm' is handled here.
6127 return TargetLowering::getConstraintType(Constraint
);
6130 InlineAsm::ConstraintCode
LoongArchTargetLowering::getInlineAsmMemConstraint(
6131 StringRef ConstraintCode
) const {
6132 return StringSwitch
<InlineAsm::ConstraintCode
>(ConstraintCode
)
6133 .Case("k", InlineAsm::ConstraintCode::k
)
6134 .Case("ZB", InlineAsm::ConstraintCode::ZB
)
6135 .Case("ZC", InlineAsm::ConstraintCode::ZC
)
6136 .Default(TargetLowering::getInlineAsmMemConstraint(ConstraintCode
));
6139 std::pair
<unsigned, const TargetRegisterClass
*>
6140 LoongArchTargetLowering::getRegForInlineAsmConstraint(
6141 const TargetRegisterInfo
*TRI
, StringRef Constraint
, MVT VT
) const {
6142 // First, see if this is a constraint that directly corresponds to a LoongArch
6144 if (Constraint
.size() == 1) {
6145 switch (Constraint
[0]) {
6147 // TODO: Support fixed vectors up to GRLen?
6150 return std::make_pair(0U, &LoongArch::GPRRegClass
);
6152 if (Subtarget
.hasBasicF() && VT
== MVT::f32
)
6153 return std::make_pair(0U, &LoongArch::FPR32RegClass
);
6154 if (Subtarget
.hasBasicD() && VT
== MVT::f64
)
6155 return std::make_pair(0U, &LoongArch::FPR64RegClass
);
6156 if (Subtarget
.hasExtLSX() &&
6157 TRI
->isTypeLegalForClass(LoongArch::LSX128RegClass
, VT
))
6158 return std::make_pair(0U, &LoongArch::LSX128RegClass
);
6159 if (Subtarget
.hasExtLASX() &&
6160 TRI
->isTypeLegalForClass(LoongArch::LASX256RegClass
, VT
))
6161 return std::make_pair(0U, &LoongArch::LASX256RegClass
);
6168 // TargetLowering::getRegForInlineAsmConstraint uses the name of the TableGen
6169 // record (e.g. the "R0" in `def R0`) to choose registers for InlineAsm
6170 // constraints while the official register name is prefixed with a '$'. So we
6171 // clip the '$' from the original constraint string (e.g. {$r0} to {r0}.)
6172 // before it being parsed. And TargetLowering::getRegForInlineAsmConstraint is
6173 // case insensitive, so no need to convert the constraint to upper case here.
6175 // For now, no need to support ABI names (e.g. `$a0`) as clang will correctly
6176 // decode the usage of register name aliases into their official names. And
6177 // AFAIK, the not yet upstreamed `rustc` for LoongArch will always use
6178 // official register names.
6179 if (Constraint
.starts_with("{$r") || Constraint
.starts_with("{$f") ||
6180 Constraint
.starts_with("{$vr") || Constraint
.starts_with("{$xr")) {
6181 bool IsFP
= Constraint
[2] == 'f';
6182 std::pair
<StringRef
, StringRef
> Temp
= Constraint
.split('$');
6183 std::pair
<unsigned, const TargetRegisterClass
*> R
;
6184 R
= TargetLowering::getRegForInlineAsmConstraint(
6185 TRI
, join_items("", Temp
.first
, Temp
.second
), VT
);
6186 // Match those names to the widest floating point register type available.
6188 unsigned RegNo
= R
.first
;
6189 if (LoongArch::F0
<= RegNo
&& RegNo
<= LoongArch::F31
) {
6190 if (Subtarget
.hasBasicD() && (VT
== MVT::f64
|| VT
== MVT::Other
)) {
6191 unsigned DReg
= RegNo
- LoongArch::F0
+ LoongArch::F0_64
;
6192 return std::make_pair(DReg
, &LoongArch::FPR64RegClass
);
6199 return TargetLowering::getRegForInlineAsmConstraint(TRI
, Constraint
, VT
);
6202 void LoongArchTargetLowering::LowerAsmOperandForConstraint(
6203 SDValue Op
, StringRef Constraint
, std::vector
<SDValue
> &Ops
,
6204 SelectionDAG
&DAG
) const {
6205 // Currently only support length 1 constraints.
6206 if (Constraint
.size() == 1) {
6207 switch (Constraint
[0]) {
6209 // Validate & create a 16-bit signed immediate operand.
6210 if (auto *C
= dyn_cast
<ConstantSDNode
>(Op
)) {
6211 uint64_t CVal
= C
->getSExtValue();
6212 if (isInt
<16>(CVal
))
6213 Ops
.push_back(DAG
.getSignedTargetConstant(CVal
, SDLoc(Op
),
6214 Subtarget
.getGRLenVT()));
6218 // Validate & create a 12-bit signed immediate operand.
6219 if (auto *C
= dyn_cast
<ConstantSDNode
>(Op
)) {
6220 uint64_t CVal
= C
->getSExtValue();
6221 if (isInt
<12>(CVal
))
6222 Ops
.push_back(DAG
.getSignedTargetConstant(CVal
, SDLoc(Op
),
6223 Subtarget
.getGRLenVT()));
6227 // Validate & create an integer zero operand.
6228 if (auto *C
= dyn_cast
<ConstantSDNode
>(Op
))
6229 if (C
->getZExtValue() == 0)
6231 DAG
.getTargetConstant(0, SDLoc(Op
), Subtarget
.getGRLenVT()));
6234 // Validate & create a 12-bit unsigned immediate operand.
6235 if (auto *C
= dyn_cast
<ConstantSDNode
>(Op
)) {
6236 uint64_t CVal
= C
->getZExtValue();
6237 if (isUInt
<12>(CVal
))
6239 DAG
.getTargetConstant(CVal
, SDLoc(Op
), Subtarget
.getGRLenVT()));
6246 TargetLowering::LowerAsmOperandForConstraint(Op
, Constraint
, Ops
, DAG
);
6249 #define GET_REGISTER_MATCHER
6250 #include "LoongArchGenAsmMatcher.inc"
6253 LoongArchTargetLowering::getRegisterByName(const char *RegName
, LLT VT
,
6254 const MachineFunction
&MF
) const {
6255 std::pair
<StringRef
, StringRef
> Name
= StringRef(RegName
).split('$');
6256 std::string NewRegName
= Name
.second
.str();
6257 Register Reg
= MatchRegisterAltName(NewRegName
);
6258 if (Reg
== LoongArch::NoRegister
)
6259 Reg
= MatchRegisterName(NewRegName
);
6260 if (Reg
== LoongArch::NoRegister
)
6262 Twine("Invalid register name \"" + StringRef(RegName
) + "\"."));
6263 BitVector ReservedRegs
= Subtarget
.getRegisterInfo()->getReservedRegs(MF
);
6264 if (!ReservedRegs
.test(Reg
))
6265 report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
6266 StringRef(RegName
) + "\"."));
6270 bool LoongArchTargetLowering::decomposeMulByConstant(LLVMContext
&Context
,
6271 EVT VT
, SDValue C
) const {
6272 // TODO: Support vectors.
6273 if (!VT
.isScalarInteger())
6276 // Omit the optimization if the data size exceeds GRLen.
6277 if (VT
.getSizeInBits() > Subtarget
.getGRLen())
6280 if (auto *ConstNode
= dyn_cast
<ConstantSDNode
>(C
.getNode())) {
6281 const APInt
&Imm
= ConstNode
->getAPIntValue();
6282 // Break MUL into (SLLI + ADD/SUB) or ALSL.
6283 if ((Imm
+ 1).isPowerOf2() || (Imm
- 1).isPowerOf2() ||
6284 (1 - Imm
).isPowerOf2() || (-1 - Imm
).isPowerOf2())
6286 // Break MUL into (ALSL x, (SLLI x, imm0), imm1).
6287 if (ConstNode
->hasOneUse() &&
6288 ((Imm
- 2).isPowerOf2() || (Imm
- 4).isPowerOf2() ||
6289 (Imm
- 8).isPowerOf2() || (Imm
- 16).isPowerOf2()))
6291 // Break (MUL x, imm) into (ADD (SLLI x, s0), (SLLI x, s1)),
6292 // in which the immediate has two set bits. Or Break (MUL x, imm)
6293 // into (SUB (SLLI x, s0), (SLLI x, s1)), in which the immediate
6294 // equals to (1 << s0) - (1 << s1).
6295 if (ConstNode
->hasOneUse() && !(Imm
.sge(-2048) && Imm
.sle(4095))) {
6296 unsigned Shifts
= Imm
.countr_zero();
6297 // Reject immediates which can be composed via a single LUI.
6300 // Reject multiplications can be optimized to
6301 // (SLLI (ALSL x, x, 1/2/3/4), s).
6302 APInt ImmPop
= Imm
.ashr(Shifts
);
6303 if (ImmPop
== 3 || ImmPop
== 5 || ImmPop
== 9 || ImmPop
== 17)
6305 // We do not consider the case `(-Imm - ImmSmall).isPowerOf2()`,
6306 // since it needs one more instruction than other 3 cases.
6307 APInt ImmSmall
= APInt(Imm
.getBitWidth(), 1ULL << Shifts
, true);
6308 if ((Imm
- ImmSmall
).isPowerOf2() || (Imm
+ ImmSmall
).isPowerOf2() ||
6309 (ImmSmall
- Imm
).isPowerOf2())
6317 bool LoongArchTargetLowering::isLegalAddressingMode(const DataLayout
&DL
,
6319 Type
*Ty
, unsigned AS
,
6320 Instruction
*I
) const {
6321 // LoongArch has four basic addressing modes:
6323 // 2. reg + 12-bit signed offset
6324 // 3. reg + 14-bit signed offset left-shifted by 2
6326 // TODO: Add more checks after support vector extension.
6328 // No global is ever allowed as a base.
6332 // Require a 12-bit signed offset or 14-bit signed offset left-shifted by 2
6333 // with `UAL` feature.
6334 if (!isInt
<12>(AM
.BaseOffs
) &&
6335 !(isShiftedInt
<14, 2>(AM
.BaseOffs
) && Subtarget
.hasUAL()))
6340 // "r+i" or just "i", depending on HasBaseReg.
6343 // "r+r+i" is not allowed.
6344 if (AM
.HasBaseReg
&& AM
.BaseOffs
)
6346 // Otherwise we have "r+r" or "r+i".
6349 // "2*r+r" or "2*r+i" is not allowed.
6350 if (AM
.HasBaseReg
|| AM
.BaseOffs
)
6352 // Allow "2*r" as "r+r".
6361 bool LoongArchTargetLowering::isLegalICmpImmediate(int64_t Imm
) const {
6362 return isInt
<12>(Imm
);
6365 bool LoongArchTargetLowering::isLegalAddImmediate(int64_t Imm
) const {
6366 return isInt
<12>(Imm
);
6369 bool LoongArchTargetLowering::isZExtFree(SDValue Val
, EVT VT2
) const {
6370 // Zexts are free if they can be combined with a load.
6371 // Don't advertise i32->i64 zextload as being free for LA64. It interacts
6372 // poorly with type legalization of compares preferring sext.
6373 if (auto *LD
= dyn_cast
<LoadSDNode
>(Val
)) {
6374 EVT MemVT
= LD
->getMemoryVT();
6375 if ((MemVT
== MVT::i8
|| MemVT
== MVT::i16
) &&
6376 (LD
->getExtensionType() == ISD::NON_EXTLOAD
||
6377 LD
->getExtensionType() == ISD::ZEXTLOAD
))
6381 return TargetLowering::isZExtFree(Val
, VT2
);
6384 bool LoongArchTargetLowering::isSExtCheaperThanZExt(EVT SrcVT
,
6386 return Subtarget
.is64Bit() && SrcVT
== MVT::i32
&& DstVT
== MVT::i64
;
6389 bool LoongArchTargetLowering::signExtendConstant(const ConstantInt
*CI
) const {
6390 return Subtarget
.is64Bit() && CI
->getType()->isIntegerTy(32);
6393 bool LoongArchTargetLowering::hasAndNotCompare(SDValue Y
) const {
6394 // TODO: Support vectors.
6395 if (Y
.getValueType().isVector())
6398 return !isa
<ConstantSDNode
>(Y
);
6401 ISD::NodeType
LoongArchTargetLowering::getExtendForAtomicCmpSwapArg() const {
6402 // LAMCAS will use amcas[_DB].{b/h/w/d} which does not require extension.
6403 return Subtarget
.hasLAMCAS() ? ISD::ANY_EXTEND
: ISD::SIGN_EXTEND
;
6406 bool LoongArchTargetLowering::shouldSignExtendTypeInLibCall(
6407 EVT Type
, bool IsSigned
) const {
6408 if (Subtarget
.is64Bit() && Type
== MVT::i32
)
6414 bool LoongArchTargetLowering::shouldExtendTypeInLibCall(EVT Type
) const {
6415 // Return false to suppress the unnecessary extensions if the LibCall
6416 // arguments or return value is a float narrower than GRLEN on a soft FP ABI.
6417 if (Subtarget
.isSoftFPABI() && (Type
.isFloatingPoint() && !Type
.isVector() &&
6418 Type
.getSizeInBits() < Subtarget
.getGRLen()))
6423 // memcpy, and other memory intrinsics, typically tries to use wider load/store
6424 // if the source/dest is aligned and the copy size is large enough. We therefore
6425 // want to align such objects passed to memory intrinsics.
6426 bool LoongArchTargetLowering::shouldAlignPointerArgs(CallInst
*CI
,
6428 Align
&PrefAlign
) const {
6429 if (!isa
<MemIntrinsic
>(CI
))
6432 if (Subtarget
.is64Bit()) {
6434 PrefAlign
= Align(8);
6437 PrefAlign
= Align(4);