[clang-tidy][modernize-use-starts-ends-with] Fix operator rewriting false negative...
[llvm-project.git] / llvm / lib / Target / LoongArch / LoongArchISelLowering.cpp
blob16bceacfaa222c4f3807a28b28e76e4d85c32b40
1 //=- LoongArchISelLowering.cpp - LoongArch DAG Lowering Implementation ---===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that LoongArch uses to lower LLVM code into
10 // a selection DAG.
12 //===----------------------------------------------------------------------===//
14 #include "LoongArchISelLowering.h"
15 #include "LoongArch.h"
16 #include "LoongArchMachineFunctionInfo.h"
17 #include "LoongArchRegisterInfo.h"
18 #include "LoongArchSubtarget.h"
19 #include "MCTargetDesc/LoongArchBaseInfo.h"
20 #include "MCTargetDesc/LoongArchMCTargetDesc.h"
21 #include "llvm/ADT/Statistic.h"
22 #include "llvm/ADT/StringExtras.h"
23 #include "llvm/CodeGen/ISDOpcodes.h"
24 #include "llvm/CodeGen/RuntimeLibcallUtil.h"
25 #include "llvm/CodeGen/SelectionDAGNodes.h"
26 #include "llvm/IR/IRBuilder.h"
27 #include "llvm/IR/IntrinsicInst.h"
28 #include "llvm/IR/IntrinsicsLoongArch.h"
29 #include "llvm/Support/CodeGen.h"
30 #include "llvm/Support/Debug.h"
31 #include "llvm/Support/ErrorHandling.h"
32 #include "llvm/Support/KnownBits.h"
33 #include "llvm/Support/MathExtras.h"
35 using namespace llvm;
37 #define DEBUG_TYPE "loongarch-isel-lowering"
39 STATISTIC(NumTailCalls, "Number of tail calls");
41 static cl::opt<bool> ZeroDivCheck("loongarch-check-zero-division", cl::Hidden,
42 cl::desc("Trap on integer division by zero."),
43 cl::init(false));
45 LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
46 const LoongArchSubtarget &STI)
47 : TargetLowering(TM), Subtarget(STI) {
49 MVT GRLenVT = Subtarget.getGRLenVT();
51 // Set up the register classes.
53 addRegisterClass(GRLenVT, &LoongArch::GPRRegClass);
54 if (Subtarget.hasBasicF())
55 addRegisterClass(MVT::f32, &LoongArch::FPR32RegClass);
56 if (Subtarget.hasBasicD())
57 addRegisterClass(MVT::f64, &LoongArch::FPR64RegClass);
59 static const MVT::SimpleValueType LSXVTs[] = {
60 MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64};
61 static const MVT::SimpleValueType LASXVTs[] = {
62 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v8f32, MVT::v4f64};
64 if (Subtarget.hasExtLSX())
65 for (MVT VT : LSXVTs)
66 addRegisterClass(VT, &LoongArch::LSX128RegClass);
68 if (Subtarget.hasExtLASX())
69 for (MVT VT : LASXVTs)
70 addRegisterClass(VT, &LoongArch::LASX256RegClass);
72 // Set operations for LA32 and LA64.
74 setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, GRLenVT,
75 MVT::i1, Promote);
77 setOperationAction(ISD::SHL_PARTS, GRLenVT, Custom);
78 setOperationAction(ISD::SRA_PARTS, GRLenVT, Custom);
79 setOperationAction(ISD::SRL_PARTS, GRLenVT, Custom);
80 setOperationAction(ISD::FP_TO_SINT, GRLenVT, Custom);
81 setOperationAction(ISD::ROTL, GRLenVT, Expand);
82 setOperationAction(ISD::CTPOP, GRLenVT, Expand);
84 setOperationAction({ISD::GlobalAddress, ISD::BlockAddress, ISD::ConstantPool,
85 ISD::JumpTable, ISD::GlobalTLSAddress},
86 GRLenVT, Custom);
88 setOperationAction(ISD::EH_DWARF_CFA, GRLenVT, Custom);
90 setOperationAction(ISD::DYNAMIC_STACKALLOC, GRLenVT, Expand);
91 setOperationAction({ISD::STACKSAVE, ISD::STACKRESTORE}, MVT::Other, Expand);
92 setOperationAction(ISD::VASTART, MVT::Other, Custom);
93 setOperationAction({ISD::VAARG, ISD::VACOPY, ISD::VAEND}, MVT::Other, Expand);
95 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
96 setOperationAction(ISD::TRAP, MVT::Other, Legal);
98 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
99 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
100 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
102 // Expand bitreverse.i16 with native-width bitrev and shift for now, before
103 // we get to know which of sll and revb.2h is faster.
104 setOperationAction(ISD::BITREVERSE, MVT::i8, Custom);
105 setOperationAction(ISD::BITREVERSE, GRLenVT, Legal);
107 // LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and
108 // the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16
109 // and i32 could still be byte-swapped relatively cheaply.
110 setOperationAction(ISD::BSWAP, MVT::i16, Custom);
112 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
113 setOperationAction(ISD::BR_CC, GRLenVT, Expand);
114 setOperationAction(ISD::SELECT_CC, GRLenVT, Expand);
115 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
116 setOperationAction({ISD::SMUL_LOHI, ISD::UMUL_LOHI}, GRLenVT, Expand);
118 setOperationAction(ISD::FP_TO_UINT, GRLenVT, Custom);
119 setOperationAction(ISD::UINT_TO_FP, GRLenVT, Expand);
121 // Set operations for LA64 only.
123 if (Subtarget.is64Bit()) {
124 setOperationAction(ISD::ADD, MVT::i32, Custom);
125 setOperationAction(ISD::SUB, MVT::i32, Custom);
126 setOperationAction(ISD::SHL, MVT::i32, Custom);
127 setOperationAction(ISD::SRA, MVT::i32, Custom);
128 setOperationAction(ISD::SRL, MVT::i32, Custom);
129 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
130 setOperationAction(ISD::BITCAST, MVT::i32, Custom);
131 setOperationAction(ISD::ROTR, MVT::i32, Custom);
132 setOperationAction(ISD::ROTL, MVT::i32, Custom);
133 setOperationAction(ISD::CTTZ, MVT::i32, Custom);
134 setOperationAction(ISD::CTLZ, MVT::i32, Custom);
135 setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom);
136 setOperationAction(ISD::READ_REGISTER, MVT::i32, Custom);
137 setOperationAction(ISD::WRITE_REGISTER, MVT::i32, Custom);
138 setOperationAction(ISD::INTRINSIC_VOID, MVT::i32, Custom);
139 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom);
140 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i32, Custom);
142 setOperationAction(ISD::BITREVERSE, MVT::i32, Custom);
143 setOperationAction(ISD::BSWAP, MVT::i32, Custom);
144 setOperationAction({ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM}, MVT::i32,
145 Custom);
146 setOperationAction(ISD::LROUND, MVT::i32, Custom);
149 // Set operations for LA32 only.
151 if (!Subtarget.is64Bit()) {
152 setOperationAction(ISD::READ_REGISTER, MVT::i64, Custom);
153 setOperationAction(ISD::WRITE_REGISTER, MVT::i64, Custom);
154 setOperationAction(ISD::INTRINSIC_VOID, MVT::i64, Custom);
155 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
156 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
159 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
161 static const ISD::CondCode FPCCToExpand[] = {
162 ISD::SETOGT, ISD::SETOGE, ISD::SETUGT, ISD::SETUGE,
163 ISD::SETGE, ISD::SETNE, ISD::SETGT};
165 // Set operations for 'F' feature.
167 if (Subtarget.hasBasicF()) {
168 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
169 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
170 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
172 setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
173 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
174 setOperationAction(ISD::FMA, MVT::f32, Legal);
175 setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal);
176 setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal);
177 setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Legal);
178 setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Legal);
179 setOperationAction(ISD::IS_FPCLASS, MVT::f32, Legal);
180 setOperationAction(ISD::FSIN, MVT::f32, Expand);
181 setOperationAction(ISD::FCOS, MVT::f32, Expand);
182 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
183 setOperationAction(ISD::FPOW, MVT::f32, Expand);
184 setOperationAction(ISD::FREM, MVT::f32, Expand);
185 setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
186 setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
188 if (Subtarget.is64Bit())
189 setOperationAction(ISD::FRINT, MVT::f32, Legal);
191 if (!Subtarget.hasBasicD()) {
192 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
193 if (Subtarget.is64Bit()) {
194 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
195 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
200 // Set operations for 'D' feature.
202 if (Subtarget.hasBasicD()) {
203 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
204 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
205 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
206 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
207 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
209 setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
210 setOperationAction(ISD::BR_CC, MVT::f64, Expand);
211 setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Legal);
212 setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Legal);
213 setOperationAction(ISD::FMA, MVT::f64, Legal);
214 setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal);
215 setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal);
216 setOperationAction(ISD::IS_FPCLASS, MVT::f64, Legal);
217 setOperationAction(ISD::FSIN, MVT::f64, Expand);
218 setOperationAction(ISD::FCOS, MVT::f64, Expand);
219 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
220 setOperationAction(ISD::FPOW, MVT::f64, Expand);
221 setOperationAction(ISD::FREM, MVT::f64, Expand);
222 setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
223 setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
225 if (Subtarget.is64Bit())
226 setOperationAction(ISD::FRINT, MVT::f64, Legal);
229 // Set operations for 'LSX' feature.
231 if (Subtarget.hasExtLSX()) {
232 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
233 // Expand all truncating stores and extending loads.
234 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
235 setTruncStoreAction(VT, InnerVT, Expand);
236 setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
237 setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
238 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
240 // By default everything must be expanded. Then we will selectively turn
241 // on ones that can be effectively codegen'd.
242 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
243 setOperationAction(Op, VT, Expand);
246 for (MVT VT : LSXVTs) {
247 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Legal);
248 setOperationAction(ISD::BITCAST, VT, Legal);
249 setOperationAction(ISD::UNDEF, VT, Legal);
251 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
252 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal);
253 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
255 setOperationAction(ISD::SETCC, VT, Legal);
256 setOperationAction(ISD::VSELECT, VT, Legal);
257 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
259 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
260 setOperationAction({ISD::ADD, ISD::SUB}, VT, Legal);
261 setOperationAction({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, VT,
262 Legal);
263 setOperationAction({ISD::MUL, ISD::SDIV, ISD::SREM, ISD::UDIV, ISD::UREM},
264 VT, Legal);
265 setOperationAction({ISD::AND, ISD::OR, ISD::XOR}, VT, Legal);
266 setOperationAction({ISD::SHL, ISD::SRA, ISD::SRL}, VT, Legal);
267 setOperationAction({ISD::CTPOP, ISD::CTLZ}, VT, Legal);
268 setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Legal);
269 setCondCodeAction(
270 {ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT,
271 Expand);
273 for (MVT VT : {MVT::v8i16, MVT::v4i32, MVT::v2i64})
274 setOperationAction(ISD::BSWAP, VT, Legal);
275 for (MVT VT : {MVT::v4i32, MVT::v2i64}) {
276 setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP}, VT, Legal);
277 setOperationAction({ISD::FP_TO_SINT, ISD::FP_TO_UINT}, VT, Legal);
279 for (MVT VT : {MVT::v4f32, MVT::v2f64}) {
280 setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal);
281 setOperationAction({ISD::FMUL, ISD::FDIV}, VT, Legal);
282 setOperationAction(ISD::FMA, VT, Legal);
283 setOperationAction(ISD::FSQRT, VT, Legal);
284 setOperationAction(ISD::FNEG, VT, Legal);
285 setCondCodeAction({ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT,
286 ISD::SETUGE, ISD::SETUGT},
287 VT, Expand);
289 setOperationAction(ISD::CTPOP, GRLenVT, Legal);
290 setOperationAction(ISD::FCEIL, {MVT::f32, MVT::f64}, Legal);
291 setOperationAction(ISD::FFLOOR, {MVT::f32, MVT::f64}, Legal);
292 setOperationAction(ISD::FTRUNC, {MVT::f32, MVT::f64}, Legal);
293 setOperationAction(ISD::FROUNDEVEN, {MVT::f32, MVT::f64}, Legal);
296 // Set operations for 'LASX' feature.
298 if (Subtarget.hasExtLASX()) {
299 for (MVT VT : LASXVTs) {
300 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Legal);
301 setOperationAction(ISD::BITCAST, VT, Legal);
302 setOperationAction(ISD::UNDEF, VT, Legal);
304 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
305 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
306 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
307 setOperationAction(ISD::CONCAT_VECTORS, VT, Legal);
309 setOperationAction(ISD::SETCC, VT, Legal);
310 setOperationAction(ISD::VSELECT, VT, Legal);
311 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
313 for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) {
314 setOperationAction({ISD::ADD, ISD::SUB}, VT, Legal);
315 setOperationAction({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, VT,
316 Legal);
317 setOperationAction({ISD::MUL, ISD::SDIV, ISD::SREM, ISD::UDIV, ISD::UREM},
318 VT, Legal);
319 setOperationAction({ISD::AND, ISD::OR, ISD::XOR}, VT, Legal);
320 setOperationAction({ISD::SHL, ISD::SRA, ISD::SRL}, VT, Legal);
321 setOperationAction({ISD::CTPOP, ISD::CTLZ}, VT, Legal);
322 setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Legal);
323 setCondCodeAction(
324 {ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT,
325 Expand);
327 for (MVT VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64})
328 setOperationAction(ISD::BSWAP, VT, Legal);
329 for (MVT VT : {MVT::v8i32, MVT::v4i32, MVT::v4i64}) {
330 setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP}, VT, Legal);
331 setOperationAction({ISD::FP_TO_SINT, ISD::FP_TO_UINT}, VT, Legal);
333 for (MVT VT : {MVT::v8f32, MVT::v4f64}) {
334 setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal);
335 setOperationAction({ISD::FMUL, ISD::FDIV}, VT, Legal);
336 setOperationAction(ISD::FMA, VT, Legal);
337 setOperationAction(ISD::FSQRT, VT, Legal);
338 setOperationAction(ISD::FNEG, VT, Legal);
339 setCondCodeAction({ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT,
340 ISD::SETUGE, ISD::SETUGT},
341 VT, Expand);
345 // Set DAG combine for LA32 and LA64.
347 setTargetDAGCombine(ISD::AND);
348 setTargetDAGCombine(ISD::OR);
349 setTargetDAGCombine(ISD::SRL);
350 setTargetDAGCombine(ISD::SETCC);
352 // Set DAG combine for 'LSX' feature.
354 if (Subtarget.hasExtLSX())
355 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
357 // Compute derived properties from the register classes.
358 computeRegisterProperties(Subtarget.getRegisterInfo());
360 setStackPointerRegisterToSaveRestore(LoongArch::R3);
362 setBooleanContents(ZeroOrOneBooleanContent);
363 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
365 setMaxAtomicSizeInBitsSupported(Subtarget.getGRLen());
367 setMinCmpXchgSizeInBits(32);
369 // Function alignments.
370 setMinFunctionAlignment(Align(4));
371 // Set preferred alignments.
372 setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment());
373 setPrefLoopAlignment(Subtarget.getPrefLoopAlignment());
374 setMaxBytesForAlignment(Subtarget.getMaxBytesForAlignment());
376 // cmpxchg sizes down to 8 bits become legal if LAMCAS is available.
377 if (Subtarget.hasLAMCAS())
378 setMinCmpXchgSizeInBits(8);
381 bool LoongArchTargetLowering::isOffsetFoldingLegal(
382 const GlobalAddressSDNode *GA) const {
383 // In order to maximise the opportunity for common subexpression elimination,
384 // keep a separate ADD node for the global address offset instead of folding
385 // it in the global address node. Later peephole optimisations may choose to
386 // fold it back in when profitable.
387 return false;
390 SDValue LoongArchTargetLowering::LowerOperation(SDValue Op,
391 SelectionDAG &DAG) const {
392 switch (Op.getOpcode()) {
393 case ISD::ATOMIC_FENCE:
394 return lowerATOMIC_FENCE(Op, DAG);
395 case ISD::EH_DWARF_CFA:
396 return lowerEH_DWARF_CFA(Op, DAG);
397 case ISD::GlobalAddress:
398 return lowerGlobalAddress(Op, DAG);
399 case ISD::GlobalTLSAddress:
400 return lowerGlobalTLSAddress(Op, DAG);
401 case ISD::INTRINSIC_WO_CHAIN:
402 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
403 case ISD::INTRINSIC_W_CHAIN:
404 return lowerINTRINSIC_W_CHAIN(Op, DAG);
405 case ISD::INTRINSIC_VOID:
406 return lowerINTRINSIC_VOID(Op, DAG);
407 case ISD::BlockAddress:
408 return lowerBlockAddress(Op, DAG);
409 case ISD::JumpTable:
410 return lowerJumpTable(Op, DAG);
411 case ISD::SHL_PARTS:
412 return lowerShiftLeftParts(Op, DAG);
413 case ISD::SRA_PARTS:
414 return lowerShiftRightParts(Op, DAG, true);
415 case ISD::SRL_PARTS:
416 return lowerShiftRightParts(Op, DAG, false);
417 case ISD::ConstantPool:
418 return lowerConstantPool(Op, DAG);
419 case ISD::FP_TO_SINT:
420 return lowerFP_TO_SINT(Op, DAG);
421 case ISD::BITCAST:
422 return lowerBITCAST(Op, DAG);
423 case ISD::UINT_TO_FP:
424 return lowerUINT_TO_FP(Op, DAG);
425 case ISD::SINT_TO_FP:
426 return lowerSINT_TO_FP(Op, DAG);
427 case ISD::VASTART:
428 return lowerVASTART(Op, DAG);
429 case ISD::FRAMEADDR:
430 return lowerFRAMEADDR(Op, DAG);
431 case ISD::RETURNADDR:
432 return lowerRETURNADDR(Op, DAG);
433 case ISD::WRITE_REGISTER:
434 return lowerWRITE_REGISTER(Op, DAG);
435 case ISD::INSERT_VECTOR_ELT:
436 return lowerINSERT_VECTOR_ELT(Op, DAG);
437 case ISD::EXTRACT_VECTOR_ELT:
438 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
439 case ISD::BUILD_VECTOR:
440 return lowerBUILD_VECTOR(Op, DAG);
441 case ISD::VECTOR_SHUFFLE:
442 return lowerVECTOR_SHUFFLE(Op, DAG);
444 return SDValue();
447 /// Determine whether a range fits a regular pattern of values.
448 /// This function accounts for the possibility of jumping over the End iterator.
449 template <typename ValType>
450 static bool
451 fitsRegularPattern(typename SmallVectorImpl<ValType>::const_iterator Begin,
452 unsigned CheckStride,
453 typename SmallVectorImpl<ValType>::const_iterator End,
454 ValType ExpectedIndex, unsigned ExpectedIndexStride) {
455 auto &I = Begin;
457 while (I != End) {
458 if (*I != -1 && *I != ExpectedIndex)
459 return false;
460 ExpectedIndex += ExpectedIndexStride;
462 // Incrementing past End is undefined behaviour so we must increment one
463 // step at a time and check for End at each step.
464 for (unsigned n = 0; n < CheckStride && I != End; ++n, ++I)
465 ; // Empty loop body.
467 return true;
470 /// Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
472 /// VREPLVEI performs vector broadcast based on an element specified by an
473 /// integer immediate, with its mask being similar to:
474 /// <x, x, x, ...>
475 /// where x is any valid index.
477 /// When undef's appear in the mask they are treated as if they were whatever
478 /// value is necessary in order to fit the above form.
479 static SDValue lowerVECTOR_SHUFFLE_VREPLVEI(const SDLoc &DL, ArrayRef<int> Mask,
480 MVT VT, SDValue V1, SDValue V2,
481 SelectionDAG &DAG) {
482 int SplatIndex = -1;
483 for (const auto &M : Mask) {
484 if (M != -1) {
485 SplatIndex = M;
486 break;
490 if (SplatIndex == -1)
491 return DAG.getUNDEF(VT);
493 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
494 if (fitsRegularPattern<int>(Mask.begin(), 1, Mask.end(), SplatIndex, 0)) {
495 APInt Imm(64, SplatIndex);
496 return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
497 DAG.getConstant(Imm, DL, MVT::i64));
500 return SDValue();
503 /// Lower VECTOR_SHUFFLE into VSHUF4I (if possible).
505 /// VSHUF4I splits the vector into blocks of four elements, then shuffles these
506 /// elements according to a <4 x i2> constant (encoded as an integer immediate).
508 /// It is therefore possible to lower into VSHUF4I when the mask takes the form:
509 /// <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...>
510 /// When undef's appear they are treated as if they were whatever value is
511 /// necessary in order to fit the above forms.
513 /// For example:
514 /// %2 = shufflevector <8 x i16> %0, <8 x i16> undef,
515 /// <8 x i32> <i32 3, i32 2, i32 1, i32 0,
516 /// i32 7, i32 6, i32 5, i32 4>
517 /// is lowered to:
518 /// (VSHUF4I_H $v0, $v1, 27)
519 /// where the 27 comes from:
520 /// 3 + (2 << 2) + (1 << 4) + (0 << 6)
521 static SDValue lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc &DL, ArrayRef<int> Mask,
522 MVT VT, SDValue V1, SDValue V2,
523 SelectionDAG &DAG) {
525 // When the size is less than 4, lower cost instructions may be used.
526 if (Mask.size() < 4)
527 return SDValue();
529 int SubMask[4] = {-1, -1, -1, -1};
530 for (unsigned i = 0; i < 4; ++i) {
531 for (unsigned j = i; j < Mask.size(); j += 4) {
532 int Idx = Mask[j];
534 // Convert from vector index to 4-element subvector index
535 // If an index refers to an element outside of the subvector then give up
536 if (Idx != -1) {
537 Idx -= 4 * (j / 4);
538 if (Idx < 0 || Idx >= 4)
539 return SDValue();
542 // If the mask has an undef, replace it with the current index.
543 // Note that it might still be undef if the current index is also undef
544 if (SubMask[i] == -1)
545 SubMask[i] = Idx;
546 // Check that non-undef values are the same as in the mask. If they
547 // aren't then give up
548 else if (Idx != -1 && Idx != SubMask[i])
549 return SDValue();
553 // Calculate the immediate. Replace any remaining undefs with zero
554 APInt Imm(64, 0);
555 for (int i = 3; i >= 0; --i) {
556 int Idx = SubMask[i];
558 if (Idx == -1)
559 Idx = 0;
561 Imm <<= 2;
562 Imm |= Idx & 0x3;
565 return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, V1,
566 DAG.getConstant(Imm, DL, MVT::i64));
569 /// Lower VECTOR_SHUFFLE into VPACKEV (if possible).
571 /// VPACKEV interleaves the even elements from each vector.
573 /// It is possible to lower into VPACKEV when the mask consists of two of the
574 /// following forms interleaved:
575 /// <0, 2, 4, ...>
576 /// <n, n+2, n+4, ...>
577 /// where n is the number of elements in the vector.
578 /// For example:
579 /// <0, 0, 2, 2, 4, 4, ...>
580 /// <0, n, 2, n+2, 4, n+4, ...>
582 /// When undef's appear in the mask they are treated as if they were whatever
583 /// value is necessary in order to fit the above forms.
584 static SDValue lowerVECTOR_SHUFFLE_VPACKEV(const SDLoc &DL, ArrayRef<int> Mask,
585 MVT VT, SDValue V1, SDValue V2,
586 SelectionDAG &DAG) {
588 const auto &Begin = Mask.begin();
589 const auto &End = Mask.end();
590 SDValue OriV1 = V1, OriV2 = V2;
592 if (fitsRegularPattern<int>(Begin, 2, End, 0, 2))
593 V1 = OriV1;
594 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 2))
595 V1 = OriV2;
596 else
597 return SDValue();
599 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 2))
600 V2 = OriV1;
601 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 2))
602 V2 = OriV2;
603 else
604 return SDValue();
606 return DAG.getNode(LoongArchISD::VPACKEV, DL, VT, V2, V1);
609 /// Lower VECTOR_SHUFFLE into VPACKOD (if possible).
611 /// VPACKOD interleaves the odd elements from each vector.
613 /// It is possible to lower into VPACKOD when the mask consists of two of the
614 /// following forms interleaved:
615 /// <1, 3, 5, ...>
616 /// <n+1, n+3, n+5, ...>
617 /// where n is the number of elements in the vector.
618 /// For example:
619 /// <1, 1, 3, 3, 5, 5, ...>
620 /// <1, n+1, 3, n+3, 5, n+5, ...>
622 /// When undef's appear in the mask they are treated as if they were whatever
623 /// value is necessary in order to fit the above forms.
624 static SDValue lowerVECTOR_SHUFFLE_VPACKOD(const SDLoc &DL, ArrayRef<int> Mask,
625 MVT VT, SDValue V1, SDValue V2,
626 SelectionDAG &DAG) {
628 const auto &Begin = Mask.begin();
629 const auto &End = Mask.end();
630 SDValue OriV1 = V1, OriV2 = V2;
632 if (fitsRegularPattern<int>(Begin, 2, End, 1, 2))
633 V1 = OriV1;
634 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + 1, 2))
635 V1 = OriV2;
636 else
637 return SDValue();
639 if (fitsRegularPattern<int>(Begin + 1, 2, End, 1, 2))
640 V2 = OriV1;
641 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + 1, 2))
642 V2 = OriV2;
643 else
644 return SDValue();
646 return DAG.getNode(LoongArchISD::VPACKOD, DL, VT, V2, V1);
649 /// Lower VECTOR_SHUFFLE into VILVH (if possible).
651 /// VILVH interleaves consecutive elements from the left (highest-indexed) half
652 /// of each vector.
654 /// It is possible to lower into VILVH when the mask consists of two of the
655 /// following forms interleaved:
656 /// <x, x+1, x+2, ...>
657 /// <n+x, n+x+1, n+x+2, ...>
658 /// where n is the number of elements in the vector and x is half n.
659 /// For example:
660 /// <x, x, x+1, x+1, x+2, x+2, ...>
661 /// <x, n+x, x+1, n+x+1, x+2, n+x+2, ...>
663 /// When undef's appear in the mask they are treated as if they were whatever
664 /// value is necessary in order to fit the above forms.
665 static SDValue lowerVECTOR_SHUFFLE_VILVH(const SDLoc &DL, ArrayRef<int> Mask,
666 MVT VT, SDValue V1, SDValue V2,
667 SelectionDAG &DAG) {
669 const auto &Begin = Mask.begin();
670 const auto &End = Mask.end();
671 unsigned HalfSize = Mask.size() / 2;
672 SDValue OriV1 = V1, OriV2 = V2;
674 if (fitsRegularPattern<int>(Begin, 2, End, HalfSize, 1))
675 V1 = OriV1;
676 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + HalfSize, 1))
677 V1 = OriV2;
678 else
679 return SDValue();
681 if (fitsRegularPattern<int>(Begin + 1, 2, End, HalfSize, 1))
682 V2 = OriV1;
683 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + HalfSize,
685 V2 = OriV2;
686 else
687 return SDValue();
689 return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);
692 /// Lower VECTOR_SHUFFLE into VILVL (if possible).
694 /// VILVL interleaves consecutive elements from the right (lowest-indexed) half
695 /// of each vector.
697 /// It is possible to lower into VILVL when the mask consists of two of the
698 /// following forms interleaved:
699 /// <0, 1, 2, ...>
700 /// <n, n+1, n+2, ...>
701 /// where n is the number of elements in the vector.
702 /// For example:
703 /// <0, 0, 1, 1, 2, 2, ...>
704 /// <0, n, 1, n+1, 2, n+2, ...>
706 /// When undef's appear in the mask they are treated as if they were whatever
707 /// value is necessary in order to fit the above forms.
708 static SDValue lowerVECTOR_SHUFFLE_VILVL(const SDLoc &DL, ArrayRef<int> Mask,
709 MVT VT, SDValue V1, SDValue V2,
710 SelectionDAG &DAG) {
712 const auto &Begin = Mask.begin();
713 const auto &End = Mask.end();
714 SDValue OriV1 = V1, OriV2 = V2;
716 if (fitsRegularPattern<int>(Begin, 2, End, 0, 1))
717 V1 = OriV1;
718 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 1))
719 V1 = OriV2;
720 else
721 return SDValue();
723 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 1))
724 V2 = OriV1;
725 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 1))
726 V2 = OriV2;
727 else
728 return SDValue();
730 return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);
733 /// Lower VECTOR_SHUFFLE into VPICKEV (if possible).
735 /// VPICKEV copies the even elements of each vector into the result vector.
737 /// It is possible to lower into VPICKEV when the mask consists of two of the
738 /// following forms concatenated:
739 /// <0, 2, 4, ...>
740 /// <n, n+2, n+4, ...>
741 /// where n is the number of elements in the vector.
742 /// For example:
743 /// <0, 2, 4, ..., 0, 2, 4, ...>
744 /// <0, 2, 4, ..., n, n+2, n+4, ...>
746 /// When undef's appear in the mask they are treated as if they were whatever
747 /// value is necessary in order to fit the above forms.
748 static SDValue lowerVECTOR_SHUFFLE_VPICKEV(const SDLoc &DL, ArrayRef<int> Mask,
749 MVT VT, SDValue V1, SDValue V2,
750 SelectionDAG &DAG) {
752 const auto &Begin = Mask.begin();
753 const auto &Mid = Mask.begin() + Mask.size() / 2;
754 const auto &End = Mask.end();
755 SDValue OriV1 = V1, OriV2 = V2;
757 if (fitsRegularPattern<int>(Begin, 1, Mid, 0, 2))
758 V1 = OriV1;
759 else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size(), 2))
760 V1 = OriV2;
761 else
762 return SDValue();
764 if (fitsRegularPattern<int>(Mid, 1, End, 0, 2))
765 V2 = OriV1;
766 else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size(), 2))
767 V2 = OriV2;
769 else
770 return SDValue();
772 return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);
775 /// Lower VECTOR_SHUFFLE into VPICKOD (if possible).
777 /// VPICKOD copies the odd elements of each vector into the result vector.
779 /// It is possible to lower into VPICKOD when the mask consists of two of the
780 /// following forms concatenated:
781 /// <1, 3, 5, ...>
782 /// <n+1, n+3, n+5, ...>
783 /// where n is the number of elements in the vector.
784 /// For example:
785 /// <1, 3, 5, ..., 1, 3, 5, ...>
786 /// <1, 3, 5, ..., n+1, n+3, n+5, ...>
788 /// When undef's appear in the mask they are treated as if they were whatever
789 /// value is necessary in order to fit the above forms.
790 static SDValue lowerVECTOR_SHUFFLE_VPICKOD(const SDLoc &DL, ArrayRef<int> Mask,
791 MVT VT, SDValue V1, SDValue V2,
792 SelectionDAG &DAG) {
794 const auto &Begin = Mask.begin();
795 const auto &Mid = Mask.begin() + Mask.size() / 2;
796 const auto &End = Mask.end();
797 SDValue OriV1 = V1, OriV2 = V2;
799 if (fitsRegularPattern<int>(Begin, 1, Mid, 1, 2))
800 V1 = OriV1;
801 else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size() + 1, 2))
802 V1 = OriV2;
803 else
804 return SDValue();
806 if (fitsRegularPattern<int>(Mid, 1, End, 1, 2))
807 V2 = OriV1;
808 else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size() + 1, 2))
809 V2 = OriV2;
810 else
811 return SDValue();
813 return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
816 /// Lower VECTOR_SHUFFLE into VSHUF.
818 /// This mostly consists of converting the shuffle mask into a BUILD_VECTOR and
819 /// adding it as an operand to the resulting VSHUF.
820 static SDValue lowerVECTOR_SHUFFLE_VSHUF(const SDLoc &DL, ArrayRef<int> Mask,
821 MVT VT, SDValue V1, SDValue V2,
822 SelectionDAG &DAG) {
824 SmallVector<SDValue, 16> Ops;
825 for (auto M : Mask)
826 Ops.push_back(DAG.getConstant(M, DL, MVT::i64));
828 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
829 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, Ops);
831 // VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion.
832 // <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11>
833 // VSHF concatenates the vectors in a bitwise fashion:
834 // <0b00, 0b01> + <0b10, 0b11> ->
835 // 0b0100 + 0b1110 -> 0b01001110
836 // <0b10, 0b11, 0b00, 0b01>
837 // We must therefore swap the operands to get the correct result.
838 return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);
841 /// Dispatching routine to lower various 128-bit LoongArch vector shuffles.
843 /// This routine breaks down the specific type of 128-bit shuffle and
844 /// dispatches to the lowering routines accordingly.
845 static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
846 SDValue V1, SDValue V2, SelectionDAG &DAG) {
847 assert((VT.SimpleTy == MVT::v16i8 || VT.SimpleTy == MVT::v8i16 ||
848 VT.SimpleTy == MVT::v4i32 || VT.SimpleTy == MVT::v2i64 ||
849 VT.SimpleTy == MVT::v4f32 || VT.SimpleTy == MVT::v2f64) &&
850 "Vector type is unsupported for lsx!");
851 assert(V1.getSimpleValueType() == V2.getSimpleValueType() &&
852 "Two operands have different types!");
853 assert(VT.getVectorNumElements() == Mask.size() &&
854 "Unexpected mask size for shuffle!");
855 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
857 SDValue Result;
858 // TODO: Add more comparison patterns.
859 if (V2.isUndef()) {
860 if ((Result = lowerVECTOR_SHUFFLE_VREPLVEI(DL, Mask, VT, V1, V2, DAG)))
861 return Result;
862 if ((Result = lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG)))
863 return Result;
865 // TODO: This comment may be enabled in the future to better match the
866 // pattern for instruction selection.
867 /* V2 = V1; */
870 // It is recommended not to change the pattern comparison order for better
871 // performance.
872 if ((Result = lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG)))
873 return Result;
874 if ((Result = lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG)))
875 return Result;
876 if ((Result = lowerVECTOR_SHUFFLE_VILVH(DL, Mask, VT, V1, V2, DAG)))
877 return Result;
878 if ((Result = lowerVECTOR_SHUFFLE_VILVL(DL, Mask, VT, V1, V2, DAG)))
879 return Result;
880 if ((Result = lowerVECTOR_SHUFFLE_VPICKEV(DL, Mask, VT, V1, V2, DAG)))
881 return Result;
882 if ((Result = lowerVECTOR_SHUFFLE_VPICKOD(DL, Mask, VT, V1, V2, DAG)))
883 return Result;
884 if ((Result = lowerVECTOR_SHUFFLE_VSHUF(DL, Mask, VT, V1, V2, DAG)))
885 return Result;
887 return SDValue();
890 /// Lower VECTOR_SHUFFLE into XVREPLVEI (if possible).
892 /// It is a XVREPLVEI when the mask is:
893 /// <x, x, x, ..., x+n, x+n, x+n, ...>
894 /// where the number of x is equal to n and n is half the length of vector.
896 /// When undef's appear in the mask they are treated as if they were whatever
897 /// value is necessary in order to fit the above form.
898 static SDValue lowerVECTOR_SHUFFLE_XVREPLVEI(const SDLoc &DL,
899 ArrayRef<int> Mask, MVT VT,
900 SDValue V1, SDValue V2,
901 SelectionDAG &DAG) {
902 int SplatIndex = -1;
903 for (const auto &M : Mask) {
904 if (M != -1) {
905 SplatIndex = M;
906 break;
910 if (SplatIndex == -1)
911 return DAG.getUNDEF(VT);
913 const auto &Begin = Mask.begin();
914 const auto &End = Mask.end();
915 unsigned HalfSize = Mask.size() / 2;
917 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
918 if (fitsRegularPattern<int>(Begin, 1, End - HalfSize, SplatIndex, 0) &&
919 fitsRegularPattern<int>(Begin + HalfSize, 1, End, SplatIndex + HalfSize,
920 0)) {
921 APInt Imm(64, SplatIndex);
922 return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
923 DAG.getConstant(Imm, DL, MVT::i64));
926 return SDValue();
929 /// Lower VECTOR_SHUFFLE into XVSHUF4I (if possible).
930 static SDValue lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc &DL, ArrayRef<int> Mask,
931 MVT VT, SDValue V1, SDValue V2,
932 SelectionDAG &DAG) {
933 // When the size is less than or equal to 4, lower cost instructions may be
934 // used.
935 if (Mask.size() <= 4)
936 return SDValue();
937 return lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG);
940 /// Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
941 static SDValue lowerVECTOR_SHUFFLE_XVPACKEV(const SDLoc &DL, ArrayRef<int> Mask,
942 MVT VT, SDValue V1, SDValue V2,
943 SelectionDAG &DAG) {
944 return lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG);
947 /// Lower VECTOR_SHUFFLE into XVPACKOD (if possible).
948 static SDValue lowerVECTOR_SHUFFLE_XVPACKOD(const SDLoc &DL, ArrayRef<int> Mask,
949 MVT VT, SDValue V1, SDValue V2,
950 SelectionDAG &DAG) {
951 return lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG);
954 /// Lower VECTOR_SHUFFLE into XVILVH (if possible).
955 static SDValue lowerVECTOR_SHUFFLE_XVILVH(const SDLoc &DL, ArrayRef<int> Mask,
956 MVT VT, SDValue V1, SDValue V2,
957 SelectionDAG &DAG) {
959 const auto &Begin = Mask.begin();
960 const auto &End = Mask.end();
961 unsigned HalfSize = Mask.size() / 2;
962 unsigned LeftSize = HalfSize / 2;
963 SDValue OriV1 = V1, OriV2 = V2;
965 if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, HalfSize - LeftSize,
966 1) &&
967 fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize + LeftSize, 1))
968 V1 = OriV1;
969 else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize,
970 Mask.size() + HalfSize - LeftSize, 1) &&
971 fitsRegularPattern<int>(Begin + HalfSize, 2, End,
972 Mask.size() + HalfSize + LeftSize, 1))
973 V1 = OriV2;
974 else
975 return SDValue();
977 if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, HalfSize - LeftSize,
978 1) &&
979 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize + LeftSize,
981 V2 = OriV1;
982 else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize,
983 Mask.size() + HalfSize - LeftSize, 1) &&
984 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
985 Mask.size() + HalfSize + LeftSize, 1))
986 V2 = OriV2;
987 else
988 return SDValue();
990 return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);
993 /// Lower VECTOR_SHUFFLE into XVILVL (if possible).
994 static SDValue lowerVECTOR_SHUFFLE_XVILVL(const SDLoc &DL, ArrayRef<int> Mask,
995 MVT VT, SDValue V1, SDValue V2,
996 SelectionDAG &DAG) {
998 const auto &Begin = Mask.begin();
999 const auto &End = Mask.end();
1000 unsigned HalfSize = Mask.size() / 2;
1001 SDValue OriV1 = V1, OriV2 = V2;
1003 if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, 0, 1) &&
1004 fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize, 1))
1005 V1 = OriV1;
1006 else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, Mask.size(), 1) &&
1007 fitsRegularPattern<int>(Begin + HalfSize, 2, End,
1008 Mask.size() + HalfSize, 1))
1009 V1 = OriV2;
1010 else
1011 return SDValue();
1013 if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, 0, 1) &&
1014 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize, 1))
1015 V2 = OriV1;
1016 else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, Mask.size(),
1017 1) &&
1018 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
1019 Mask.size() + HalfSize, 1))
1020 V2 = OriV2;
1021 else
1022 return SDValue();
1024 return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);
1027 /// Lower VECTOR_SHUFFLE into XVPICKEV (if possible).
1028 static SDValue lowerVECTOR_SHUFFLE_XVPICKEV(const SDLoc &DL, ArrayRef<int> Mask,
1029 MVT VT, SDValue V1, SDValue V2,
1030 SelectionDAG &DAG) {
1032 const auto &Begin = Mask.begin();
1033 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
1034 const auto &Mid = Mask.begin() + Mask.size() / 2;
1035 const auto &RightMid = Mask.end() - Mask.size() / 4;
1036 const auto &End = Mask.end();
1037 unsigned HalfSize = Mask.size() / 2;
1038 SDValue OriV1 = V1, OriV2 = V2;
1040 if (fitsRegularPattern<int>(Begin, 1, LeftMid, 0, 2) &&
1041 fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize, 2))
1042 V1 = OriV1;
1043 else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size(), 2) &&
1044 fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize, 2))
1045 V1 = OriV2;
1046 else
1047 return SDValue();
1049 if (fitsRegularPattern<int>(LeftMid, 1, Mid, 0, 2) &&
1050 fitsRegularPattern<int>(RightMid, 1, End, HalfSize, 2))
1051 V2 = OriV1;
1052 else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size(), 2) &&
1053 fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize, 2))
1054 V2 = OriV2;
1056 else
1057 return SDValue();
1059 return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);
1062 /// Lower VECTOR_SHUFFLE into XVPICKOD (if possible).
1063 static SDValue lowerVECTOR_SHUFFLE_XVPICKOD(const SDLoc &DL, ArrayRef<int> Mask,
1064 MVT VT, SDValue V1, SDValue V2,
1065 SelectionDAG &DAG) {
1067 const auto &Begin = Mask.begin();
1068 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
1069 const auto &Mid = Mask.begin() + Mask.size() / 2;
1070 const auto &RightMid = Mask.end() - Mask.size() / 4;
1071 const auto &End = Mask.end();
1072 unsigned HalfSize = Mask.size() / 2;
1073 SDValue OriV1 = V1, OriV2 = V2;
1075 if (fitsRegularPattern<int>(Begin, 1, LeftMid, 1, 2) &&
1076 fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize + 1, 2))
1077 V1 = OriV1;
1078 else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size() + 1, 2) &&
1079 fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize + 1,
1081 V1 = OriV2;
1082 else
1083 return SDValue();
1085 if (fitsRegularPattern<int>(LeftMid, 1, Mid, 1, 2) &&
1086 fitsRegularPattern<int>(RightMid, 1, End, HalfSize + 1, 2))
1087 V2 = OriV1;
1088 else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size() + 1, 2) &&
1089 fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize + 1,
1091 V2 = OriV2;
1092 else
1093 return SDValue();
1095 return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
1098 /// Lower VECTOR_SHUFFLE into XVSHUF (if possible).
1099 static SDValue lowerVECTOR_SHUFFLE_XVSHUF(const SDLoc &DL, ArrayRef<int> Mask,
1100 MVT VT, SDValue V1, SDValue V2,
1101 SelectionDAG &DAG) {
1103 int MaskSize = Mask.size();
1104 int HalfSize = Mask.size() / 2;
1105 const auto &Begin = Mask.begin();
1106 const auto &Mid = Mask.begin() + HalfSize;
1107 const auto &End = Mask.end();
1109 // VECTOR_SHUFFLE concatenates the vectors:
1110 // <0, 1, 2, 3, 4, 5, 6, 7> + <8, 9, 10, 11, 12, 13, 14, 15>
1111 // shuffling ->
1112 // <0, 1, 2, 3, 8, 9, 10, 11> <4, 5, 6, 7, 12, 13, 14, 15>
1114 // XVSHUF concatenates the vectors:
1115 // <a0, a1, a2, a3, b0, b1, b2, b3> + <a4, a5, a6, a7, b4, b5, b6, b7>
1116 // shuffling ->
1117 // <a0, a1, a2, a3, a4, a5, a6, a7> + <b0, b1, b2, b3, b4, b5, b6, b7>
1118 SmallVector<SDValue, 8> MaskAlloc;
1119 for (auto it = Begin; it < Mid; it++) {
1120 if (*it < 0) // UNDEF
1121 MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
1122 else if ((*it >= 0 && *it < HalfSize) ||
1123 (*it >= MaskSize && *it <= MaskSize + HalfSize)) {
1124 int M = *it < HalfSize ? *it : *it - HalfSize;
1125 MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));
1126 } else
1127 return SDValue();
1129 assert((int)MaskAlloc.size() == HalfSize && "xvshuf convert failed!");
1131 for (auto it = Mid; it < End; it++) {
1132 if (*it < 0) // UNDEF
1133 MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
1134 else if ((*it >= HalfSize && *it < MaskSize) ||
1135 (*it >= MaskSize + HalfSize && *it < MaskSize * 2)) {
1136 int M = *it < MaskSize ? *it - HalfSize : *it - MaskSize;
1137 MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));
1138 } else
1139 return SDValue();
1141 assert((int)MaskAlloc.size() == MaskSize && "xvshuf convert failed!");
1143 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
1144 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, MaskAlloc);
1145 return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);
1148 /// Shuffle vectors by lane to generate more optimized instructions.
1149 /// 256-bit shuffles are always considered as 2-lane 128-bit shuffles.
1151 /// Therefore, except for the following four cases, other cases are regarded
1152 /// as cross-lane shuffles, where optimization is relatively limited.
1154 /// - Shuffle high, low lanes of two inputs vector
1155 /// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 3, 6>
1156 /// - Shuffle low, high lanes of two inputs vector
1157 /// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 0, 5>
1158 /// - Shuffle low, low lanes of two inputs vector
1159 /// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 3, 6>
1160 /// - Shuffle high, high lanes of two inputs vector
1161 /// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 0, 5>
1163 /// The first case is the closest to LoongArch instructions and the other
1164 /// cases need to be converted to it for processing.
1166 /// This function may modify V1, V2 and Mask
1167 static void canonicalizeShuffleVectorByLane(const SDLoc &DL,
1168 MutableArrayRef<int> Mask, MVT VT,
1169 SDValue &V1, SDValue &V2,
1170 SelectionDAG &DAG) {
1172 enum HalfMaskType { HighLaneTy, LowLaneTy, None };
1174 int MaskSize = Mask.size();
1175 int HalfSize = Mask.size() / 2;
1177 HalfMaskType preMask = None, postMask = None;
1179 if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {
1180 return M < 0 || (M >= 0 && M < HalfSize) ||
1181 (M >= MaskSize && M < MaskSize + HalfSize);
1183 preMask = HighLaneTy;
1184 else if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {
1185 return M < 0 || (M >= HalfSize && M < MaskSize) ||
1186 (M >= MaskSize + HalfSize && M < MaskSize * 2);
1188 preMask = LowLaneTy;
1190 if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
1191 return M < 0 || (M >= 0 && M < HalfSize) ||
1192 (M >= MaskSize && M < MaskSize + HalfSize);
1194 postMask = HighLaneTy;
1195 else if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
1196 return M < 0 || (M >= HalfSize && M < MaskSize) ||
1197 (M >= MaskSize + HalfSize && M < MaskSize * 2);
1199 postMask = LowLaneTy;
1201 // The pre-half of mask is high lane type, and the post-half of mask
1202 // is low lane type, which is closest to the LoongArch instructions.
1204 // Note: In the LoongArch architecture, the high lane of mask corresponds
1205 // to the lower 128-bit of vector register, and the low lane of mask
1206 // corresponds the higher 128-bit of vector register.
1207 if (preMask == HighLaneTy && postMask == LowLaneTy) {
1208 return;
1210 if (preMask == LowLaneTy && postMask == HighLaneTy) {
1211 V1 = DAG.getBitcast(MVT::v4i64, V1);
1212 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
1213 DAG.getConstant(0b01001110, DL, MVT::i64));
1214 V1 = DAG.getBitcast(VT, V1);
1216 if (!V2.isUndef()) {
1217 V2 = DAG.getBitcast(MVT::v4i64, V2);
1218 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
1219 DAG.getConstant(0b01001110, DL, MVT::i64));
1220 V2 = DAG.getBitcast(VT, V2);
1223 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
1224 *it = *it < 0 ? *it : *it - HalfSize;
1226 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
1227 *it = *it < 0 ? *it : *it + HalfSize;
1229 } else if (preMask == LowLaneTy && postMask == LowLaneTy) {
1230 V1 = DAG.getBitcast(MVT::v4i64, V1);
1231 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
1232 DAG.getConstant(0b11101110, DL, MVT::i64));
1233 V1 = DAG.getBitcast(VT, V1);
1235 if (!V2.isUndef()) {
1236 V2 = DAG.getBitcast(MVT::v4i64, V2);
1237 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
1238 DAG.getConstant(0b11101110, DL, MVT::i64));
1239 V2 = DAG.getBitcast(VT, V2);
1242 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
1243 *it = *it < 0 ? *it : *it - HalfSize;
1245 } else if (preMask == HighLaneTy && postMask == HighLaneTy) {
1246 V1 = DAG.getBitcast(MVT::v4i64, V1);
1247 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
1248 DAG.getConstant(0b01000100, DL, MVT::i64));
1249 V1 = DAG.getBitcast(VT, V1);
1251 if (!V2.isUndef()) {
1252 V2 = DAG.getBitcast(MVT::v4i64, V2);
1253 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
1254 DAG.getConstant(0b01000100, DL, MVT::i64));
1255 V2 = DAG.getBitcast(VT, V2);
1258 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
1259 *it = *it < 0 ? *it : *it + HalfSize;
1261 } else { // cross-lane
1262 return;
1266 /// Dispatching routine to lower various 256-bit LoongArch vector shuffles.
1268 /// This routine breaks down the specific type of 256-bit shuffle and
1269 /// dispatches to the lowering routines accordingly.
1270 static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
1271 SDValue V1, SDValue V2, SelectionDAG &DAG) {
1272 assert((VT.SimpleTy == MVT::v32i8 || VT.SimpleTy == MVT::v16i16 ||
1273 VT.SimpleTy == MVT::v8i32 || VT.SimpleTy == MVT::v4i64 ||
1274 VT.SimpleTy == MVT::v8f32 || VT.SimpleTy == MVT::v4f64) &&
1275 "Vector type is unsupported for lasx!");
1276 assert(V1.getSimpleValueType() == V2.getSimpleValueType() &&
1277 "Two operands have different types!");
1278 assert(VT.getVectorNumElements() == Mask.size() &&
1279 "Unexpected mask size for shuffle!");
1280 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
1281 assert(Mask.size() >= 4 && "Mask size is less than 4.");
1283 // canonicalize non cross-lane shuffle vector
1284 SmallVector<int> NewMask(Mask);
1285 canonicalizeShuffleVectorByLane(DL, NewMask, VT, V1, V2, DAG);
1287 SDValue Result;
1288 // TODO: Add more comparison patterns.
1289 if (V2.isUndef()) {
1290 if ((Result = lowerVECTOR_SHUFFLE_XVREPLVEI(DL, NewMask, VT, V1, V2, DAG)))
1291 return Result;
1292 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, NewMask, VT, V1, V2, DAG)))
1293 return Result;
1295 // TODO: This comment may be enabled in the future to better match the
1296 // pattern for instruction selection.
1297 /* V2 = V1; */
1300 // It is recommended not to change the pattern comparison order for better
1301 // performance.
1302 if ((Result = lowerVECTOR_SHUFFLE_XVPACKEV(DL, NewMask, VT, V1, V2, DAG)))
1303 return Result;
1304 if ((Result = lowerVECTOR_SHUFFLE_XVPACKOD(DL, NewMask, VT, V1, V2, DAG)))
1305 return Result;
1306 if ((Result = lowerVECTOR_SHUFFLE_XVILVH(DL, NewMask, VT, V1, V2, DAG)))
1307 return Result;
1308 if ((Result = lowerVECTOR_SHUFFLE_XVILVL(DL, NewMask, VT, V1, V2, DAG)))
1309 return Result;
1310 if ((Result = lowerVECTOR_SHUFFLE_XVPICKEV(DL, NewMask, VT, V1, V2, DAG)))
1311 return Result;
1312 if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD(DL, NewMask, VT, V1, V2, DAG)))
1313 return Result;
1314 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF(DL, NewMask, VT, V1, V2, DAG)))
1315 return Result;
1317 return SDValue();
1320 SDValue LoongArchTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
1321 SelectionDAG &DAG) const {
1322 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
1323 ArrayRef<int> OrigMask = SVOp->getMask();
1324 SDValue V1 = Op.getOperand(0);
1325 SDValue V2 = Op.getOperand(1);
1326 MVT VT = Op.getSimpleValueType();
1327 int NumElements = VT.getVectorNumElements();
1328 SDLoc DL(Op);
1330 bool V1IsUndef = V1.isUndef();
1331 bool V2IsUndef = V2.isUndef();
1332 if (V1IsUndef && V2IsUndef)
1333 return DAG.getUNDEF(VT);
1335 // When we create a shuffle node we put the UNDEF node to second operand,
1336 // but in some cases the first operand may be transformed to UNDEF.
1337 // In this case we should just commute the node.
1338 if (V1IsUndef)
1339 return DAG.getCommutedVectorShuffle(*SVOp);
1341 // Check for non-undef masks pointing at an undef vector and make the masks
1342 // undef as well. This makes it easier to match the shuffle based solely on
1343 // the mask.
1344 if (V2IsUndef &&
1345 any_of(OrigMask, [NumElements](int M) { return M >= NumElements; })) {
1346 SmallVector<int, 8> NewMask(OrigMask);
1347 for (int &M : NewMask)
1348 if (M >= NumElements)
1349 M = -1;
1350 return DAG.getVectorShuffle(VT, DL, V1, V2, NewMask);
1353 // Check for illegal shuffle mask element index values.
1354 int MaskUpperLimit = OrigMask.size() * (V2IsUndef ? 1 : 2);
1355 (void)MaskUpperLimit;
1356 assert(llvm::all_of(OrigMask,
1357 [&](int M) { return -1 <= M && M < MaskUpperLimit; }) &&
1358 "Out of bounds shuffle index");
1360 // For each vector width, delegate to a specialized lowering routine.
1361 if (VT.is128BitVector())
1362 return lower128BitShuffle(DL, OrigMask, VT, V1, V2, DAG);
1364 if (VT.is256BitVector())
1365 return lower256BitShuffle(DL, OrigMask, VT, V1, V2, DAG);
1367 return SDValue();
1370 static bool isConstantOrUndef(const SDValue Op) {
1371 if (Op->isUndef())
1372 return true;
1373 if (isa<ConstantSDNode>(Op))
1374 return true;
1375 if (isa<ConstantFPSDNode>(Op))
1376 return true;
1377 return false;
1380 static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode *Op) {
1381 for (unsigned i = 0; i < Op->getNumOperands(); ++i)
1382 if (isConstantOrUndef(Op->getOperand(i)))
1383 return true;
1384 return false;
1387 SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
1388 SelectionDAG &DAG) const {
1389 BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op);
1390 EVT ResTy = Op->getValueType(0);
1391 SDLoc DL(Op);
1392 APInt SplatValue, SplatUndef;
1393 unsigned SplatBitSize;
1394 bool HasAnyUndefs;
1395 bool Is128Vec = ResTy.is128BitVector();
1396 bool Is256Vec = ResTy.is256BitVector();
1398 if ((!Subtarget.hasExtLSX() || !Is128Vec) &&
1399 (!Subtarget.hasExtLASX() || !Is256Vec))
1400 return SDValue();
1402 if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
1403 /*MinSplatBits=*/8) &&
1404 SplatBitSize <= 64) {
1405 // We can only cope with 8, 16, 32, or 64-bit elements.
1406 if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 &&
1407 SplatBitSize != 64)
1408 return SDValue();
1410 EVT ViaVecTy;
1412 switch (SplatBitSize) {
1413 default:
1414 return SDValue();
1415 case 8:
1416 ViaVecTy = Is128Vec ? MVT::v16i8 : MVT::v32i8;
1417 break;
1418 case 16:
1419 ViaVecTy = Is128Vec ? MVT::v8i16 : MVT::v16i16;
1420 break;
1421 case 32:
1422 ViaVecTy = Is128Vec ? MVT::v4i32 : MVT::v8i32;
1423 break;
1424 case 64:
1425 ViaVecTy = Is128Vec ? MVT::v2i64 : MVT::v4i64;
1426 break;
1429 // SelectionDAG::getConstant will promote SplatValue appropriately.
1430 SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy);
1432 // Bitcast to the type we originally wanted.
1433 if (ViaVecTy != ResTy)
1434 Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result);
1436 return Result;
1439 if (DAG.isSplatValue(Op, /*AllowUndefs=*/false))
1440 return Op;
1442 if (!isConstantOrUndefBUILD_VECTOR(Node)) {
1443 // Use INSERT_VECTOR_ELT operations rather than expand to stores.
1444 // The resulting code is the same length as the expansion, but it doesn't
1445 // use memory operations.
1446 EVT ResTy = Node->getValueType(0);
1448 assert(ResTy.isVector());
1450 unsigned NumElts = ResTy.getVectorNumElements();
1451 SDValue Vector = DAG.getUNDEF(ResTy);
1452 for (unsigned i = 0; i < NumElts; ++i) {
1453 Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector,
1454 Node->getOperand(i),
1455 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
1457 return Vector;
1460 return SDValue();
1463 SDValue
1464 LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
1465 SelectionDAG &DAG) const {
1466 EVT VecTy = Op->getOperand(0)->getValueType(0);
1467 SDValue Idx = Op->getOperand(1);
1468 EVT EltTy = VecTy.getVectorElementType();
1469 unsigned NumElts = VecTy.getVectorNumElements();
1471 if (isa<ConstantSDNode>(Idx) &&
1472 (EltTy == MVT::i32 || EltTy == MVT::i64 || EltTy == MVT::f32 ||
1473 EltTy == MVT::f64 || Idx->getAsZExtVal() < NumElts / 2))
1474 return Op;
1476 return SDValue();
1479 SDValue
1480 LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
1481 SelectionDAG &DAG) const {
1482 if (isa<ConstantSDNode>(Op->getOperand(2)))
1483 return Op;
1484 return SDValue();
1487 SDValue LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op,
1488 SelectionDAG &DAG) const {
1489 SDLoc DL(Op);
1490 SyncScope::ID FenceSSID =
1491 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
1493 // singlethread fences only synchronize with signal handlers on the same
1494 // thread and thus only need to preserve instruction order, not actually
1495 // enforce memory ordering.
1496 if (FenceSSID == SyncScope::SingleThread)
1497 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
1498 return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
1500 return Op;
1503 SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op,
1504 SelectionDAG &DAG) const {
1506 if (Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i32) {
1507 DAG.getContext()->emitError(
1508 "On LA64, only 64-bit registers can be written.");
1509 return Op.getOperand(0);
1512 if (!Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i64) {
1513 DAG.getContext()->emitError(
1514 "On LA32, only 32-bit registers can be written.");
1515 return Op.getOperand(0);
1518 return Op;
1521 SDValue LoongArchTargetLowering::lowerFRAMEADDR(SDValue Op,
1522 SelectionDAG &DAG) const {
1523 if (!isa<ConstantSDNode>(Op.getOperand(0))) {
1524 DAG.getContext()->emitError("argument to '__builtin_frame_address' must "
1525 "be a constant integer");
1526 return SDValue();
1529 MachineFunction &MF = DAG.getMachineFunction();
1530 MF.getFrameInfo().setFrameAddressIsTaken(true);
1531 Register FrameReg = Subtarget.getRegisterInfo()->getFrameRegister(MF);
1532 EVT VT = Op.getValueType();
1533 SDLoc DL(Op);
1534 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
1535 unsigned Depth = Op.getConstantOperandVal(0);
1536 int GRLenInBytes = Subtarget.getGRLen() / 8;
1538 while (Depth--) {
1539 int Offset = -(GRLenInBytes * 2);
1540 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
1541 DAG.getSignedConstant(Offset, DL, VT));
1542 FrameAddr =
1543 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
1545 return FrameAddr;
1548 SDValue LoongArchTargetLowering::lowerRETURNADDR(SDValue Op,
1549 SelectionDAG &DAG) const {
1550 if (verifyReturnAddressArgumentIsConstant(Op, DAG))
1551 return SDValue();
1553 // Currently only support lowering return address for current frame.
1554 if (Op.getConstantOperandVal(0) != 0) {
1555 DAG.getContext()->emitError(
1556 "return address can only be determined for the current frame");
1557 return SDValue();
1560 MachineFunction &MF = DAG.getMachineFunction();
1561 MF.getFrameInfo().setReturnAddressIsTaken(true);
1562 MVT GRLenVT = Subtarget.getGRLenVT();
1564 // Return the value of the return address register, marking it an implicit
1565 // live-in.
1566 Register Reg = MF.addLiveIn(Subtarget.getRegisterInfo()->getRARegister(),
1567 getRegClassFor(GRLenVT));
1568 return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), Reg, GRLenVT);
1571 SDValue LoongArchTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
1572 SelectionDAG &DAG) const {
1573 MachineFunction &MF = DAG.getMachineFunction();
1574 auto Size = Subtarget.getGRLen() / 8;
1575 auto FI = MF.getFrameInfo().CreateFixedObject(Size, 0, false);
1576 return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
1579 SDValue LoongArchTargetLowering::lowerVASTART(SDValue Op,
1580 SelectionDAG &DAG) const {
1581 MachineFunction &MF = DAG.getMachineFunction();
1582 auto *FuncInfo = MF.getInfo<LoongArchMachineFunctionInfo>();
1584 SDLoc DL(Op);
1585 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
1586 getPointerTy(MF.getDataLayout()));
1588 // vastart just stores the address of the VarArgsFrameIndex slot into the
1589 // memory location argument.
1590 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
1591 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
1592 MachinePointerInfo(SV));
1595 SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op,
1596 SelectionDAG &DAG) const {
1597 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
1598 !Subtarget.hasBasicD() && "unexpected target features");
1600 SDLoc DL(Op);
1601 SDValue Op0 = Op.getOperand(0);
1602 if (Op0->getOpcode() == ISD::AND) {
1603 auto *C = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
1604 if (C && C->getZExtValue() < UINT64_C(0xFFFFFFFF))
1605 return Op;
1608 if (Op0->getOpcode() == LoongArchISD::BSTRPICK &&
1609 Op0.getConstantOperandVal(1) < UINT64_C(0X1F) &&
1610 Op0.getConstantOperandVal(2) == UINT64_C(0))
1611 return Op;
1613 if (Op0.getOpcode() == ISD::AssertZext &&
1614 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLT(MVT::i32))
1615 return Op;
1617 EVT OpVT = Op0.getValueType();
1618 EVT RetVT = Op.getValueType();
1619 RTLIB::Libcall LC = RTLIB::getUINTTOFP(OpVT, RetVT);
1620 MakeLibCallOptions CallOptions;
1621 CallOptions.setTypeListBeforeSoften(OpVT, RetVT, true);
1622 SDValue Chain = SDValue();
1623 SDValue Result;
1624 std::tie(Result, Chain) =
1625 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
1626 return Result;
1629 SDValue LoongArchTargetLowering::lowerSINT_TO_FP(SDValue Op,
1630 SelectionDAG &DAG) const {
1631 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
1632 !Subtarget.hasBasicD() && "unexpected target features");
1634 SDLoc DL(Op);
1635 SDValue Op0 = Op.getOperand(0);
1637 if ((Op0.getOpcode() == ISD::AssertSext ||
1638 Op0.getOpcode() == ISD::SIGN_EXTEND_INREG) &&
1639 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLE(MVT::i32))
1640 return Op;
1642 EVT OpVT = Op0.getValueType();
1643 EVT RetVT = Op.getValueType();
1644 RTLIB::Libcall LC = RTLIB::getSINTTOFP(OpVT, RetVT);
1645 MakeLibCallOptions CallOptions;
1646 CallOptions.setTypeListBeforeSoften(OpVT, RetVT, true);
1647 SDValue Chain = SDValue();
1648 SDValue Result;
1649 std::tie(Result, Chain) =
1650 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
1651 return Result;
1654 SDValue LoongArchTargetLowering::lowerBITCAST(SDValue Op,
1655 SelectionDAG &DAG) const {
1657 SDLoc DL(Op);
1658 SDValue Op0 = Op.getOperand(0);
1660 if (Op.getValueType() == MVT::f32 && Op0.getValueType() == MVT::i32 &&
1661 Subtarget.is64Bit() && Subtarget.hasBasicF()) {
1662 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
1663 return DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, NewOp0);
1665 return Op;
1668 SDValue LoongArchTargetLowering::lowerFP_TO_SINT(SDValue Op,
1669 SelectionDAG &DAG) const {
1671 SDLoc DL(Op);
1673 if (Op.getValueSizeInBits() > 32 && Subtarget.hasBasicF() &&
1674 !Subtarget.hasBasicD()) {
1675 SDValue Dst =
1676 DAG.getNode(LoongArchISD::FTINT, DL, MVT::f32, Op.getOperand(0));
1677 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Dst);
1680 EVT FPTy = EVT::getFloatingPointVT(Op.getValueSizeInBits());
1681 SDValue Trunc = DAG.getNode(LoongArchISD::FTINT, DL, FPTy, Op.getOperand(0));
1682 return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Trunc);
1685 static SDValue getTargetNode(GlobalAddressSDNode *N, SDLoc DL, EVT Ty,
1686 SelectionDAG &DAG, unsigned Flags) {
1687 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
1690 static SDValue getTargetNode(BlockAddressSDNode *N, SDLoc DL, EVT Ty,
1691 SelectionDAG &DAG, unsigned Flags) {
1692 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
1693 Flags);
1696 static SDValue getTargetNode(ConstantPoolSDNode *N, SDLoc DL, EVT Ty,
1697 SelectionDAG &DAG, unsigned Flags) {
1698 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
1699 N->getOffset(), Flags);
1702 static SDValue getTargetNode(JumpTableSDNode *N, SDLoc DL, EVT Ty,
1703 SelectionDAG &DAG, unsigned Flags) {
1704 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
1707 template <class NodeTy>
1708 SDValue LoongArchTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
1709 CodeModel::Model M,
1710 bool IsLocal) const {
1711 SDLoc DL(N);
1712 EVT Ty = getPointerTy(DAG.getDataLayout());
1713 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
1714 SDValue Load;
1716 switch (M) {
1717 default:
1718 report_fatal_error("Unsupported code model");
1720 case CodeModel::Large: {
1721 assert(Subtarget.is64Bit() && "Large code model requires LA64");
1723 // This is not actually used, but is necessary for successfully matching
1724 // the PseudoLA_*_LARGE nodes.
1725 SDValue Tmp = DAG.getConstant(0, DL, Ty);
1726 if (IsLocal) {
1727 // This generates the pattern (PseudoLA_PCREL_LARGE tmp sym), that
1728 // eventually becomes the desired 5-insn code sequence.
1729 Load = SDValue(DAG.getMachineNode(LoongArch::PseudoLA_PCREL_LARGE, DL, Ty,
1730 Tmp, Addr),
1732 } else {
1733 // This generates the pattern (PseudoLA_GOT_LARGE tmp sym), that
1734 // eventually becomes the desired 5-insn code sequence.
1735 Load = SDValue(
1736 DAG.getMachineNode(LoongArch::PseudoLA_GOT_LARGE, DL, Ty, Tmp, Addr),
1739 break;
1742 case CodeModel::Small:
1743 case CodeModel::Medium:
1744 if (IsLocal) {
1745 // This generates the pattern (PseudoLA_PCREL sym), which expands to
1746 // (addi.w/d (pcalau12i %pc_hi20(sym)) %pc_lo12(sym)).
1747 Load = SDValue(
1748 DAG.getMachineNode(LoongArch::PseudoLA_PCREL, DL, Ty, Addr), 0);
1749 } else {
1750 // This generates the pattern (PseudoLA_GOT sym), which expands to (ld.w/d
1751 // (pcalau12i %got_pc_hi20(sym)) %got_pc_lo12(sym)).
1752 Load =
1753 SDValue(DAG.getMachineNode(LoongArch::PseudoLA_GOT, DL, Ty, Addr), 0);
1757 if (!IsLocal) {
1758 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
1759 MachineFunction &MF = DAG.getMachineFunction();
1760 MachineMemOperand *MemOp = MF.getMachineMemOperand(
1761 MachinePointerInfo::getGOT(MF),
1762 MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
1763 MachineMemOperand::MOInvariant,
1764 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
1765 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
1768 return Load;
1771 SDValue LoongArchTargetLowering::lowerBlockAddress(SDValue Op,
1772 SelectionDAG &DAG) const {
1773 return getAddr(cast<BlockAddressSDNode>(Op), DAG,
1774 DAG.getTarget().getCodeModel());
1777 SDValue LoongArchTargetLowering::lowerJumpTable(SDValue Op,
1778 SelectionDAG &DAG) const {
1779 return getAddr(cast<JumpTableSDNode>(Op), DAG,
1780 DAG.getTarget().getCodeModel());
1783 SDValue LoongArchTargetLowering::lowerConstantPool(SDValue Op,
1784 SelectionDAG &DAG) const {
1785 return getAddr(cast<ConstantPoolSDNode>(Op), DAG,
1786 DAG.getTarget().getCodeModel());
1789 SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op,
1790 SelectionDAG &DAG) const {
1791 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
1792 assert(N->getOffset() == 0 && "unexpected offset in global node");
1793 auto CM = DAG.getTarget().getCodeModel();
1794 const GlobalValue *GV = N->getGlobal();
1796 if (GV->isDSOLocal() && isa<GlobalVariable>(GV)) {
1797 if (auto GCM = dyn_cast<GlobalVariable>(GV)->getCodeModel())
1798 CM = *GCM;
1801 return getAddr(N, DAG, CM, GV->isDSOLocal());
1804 SDValue LoongArchTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
1805 SelectionDAG &DAG,
1806 unsigned Opc, bool UseGOT,
1807 bool Large) const {
1808 SDLoc DL(N);
1809 EVT Ty = getPointerTy(DAG.getDataLayout());
1810 MVT GRLenVT = Subtarget.getGRLenVT();
1812 // This is not actually used, but is necessary for successfully matching the
1813 // PseudoLA_*_LARGE nodes.
1814 SDValue Tmp = DAG.getConstant(0, DL, Ty);
1815 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
1816 SDValue Offset = Large
1817 ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
1818 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
1819 if (UseGOT) {
1820 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
1821 MachineFunction &MF = DAG.getMachineFunction();
1822 MachineMemOperand *MemOp = MF.getMachineMemOperand(
1823 MachinePointerInfo::getGOT(MF),
1824 MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
1825 MachineMemOperand::MOInvariant,
1826 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
1827 DAG.setNodeMemRefs(cast<MachineSDNode>(Offset.getNode()), {MemOp});
1830 // Add the thread pointer.
1831 return DAG.getNode(ISD::ADD, DL, Ty, Offset,
1832 DAG.getRegister(LoongArch::R2, GRLenVT));
1835 SDValue LoongArchTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
1836 SelectionDAG &DAG,
1837 unsigned Opc,
1838 bool Large) const {
1839 SDLoc DL(N);
1840 EVT Ty = getPointerTy(DAG.getDataLayout());
1841 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
1843 // This is not actually used, but is necessary for successfully matching the
1844 // PseudoLA_*_LARGE nodes.
1845 SDValue Tmp = DAG.getConstant(0, DL, Ty);
1847 // Use a PC-relative addressing mode to access the dynamic GOT address.
1848 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
1849 SDValue Load = Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
1850 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
1852 // Prepare argument list to generate call.
1853 ArgListTy Args;
1854 ArgListEntry Entry;
1855 Entry.Node = Load;
1856 Entry.Ty = CallTy;
1857 Args.push_back(Entry);
1859 // Setup call to __tls_get_addr.
1860 TargetLowering::CallLoweringInfo CLI(DAG);
1861 CLI.setDebugLoc(DL)
1862 .setChain(DAG.getEntryNode())
1863 .setLibCallee(CallingConv::C, CallTy,
1864 DAG.getExternalSymbol("__tls_get_addr", Ty),
1865 std::move(Args));
1867 return LowerCallTo(CLI).first;
1870 SDValue LoongArchTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
1871 SelectionDAG &DAG, unsigned Opc,
1872 bool Large) const {
1873 SDLoc DL(N);
1874 EVT Ty = getPointerTy(DAG.getDataLayout());
1875 const GlobalValue *GV = N->getGlobal();
1877 // This is not actually used, but is necessary for successfully matching the
1878 // PseudoLA_*_LARGE nodes.
1879 SDValue Tmp = DAG.getConstant(0, DL, Ty);
1881 // Use a PC-relative addressing mode to access the global dynamic GOT address.
1882 // This generates the pattern (PseudoLA_TLS_DESC_PC{,LARGE} sym).
1883 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
1884 return Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
1885 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
1888 SDValue
1889 LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op,
1890 SelectionDAG &DAG) const {
1891 if (DAG.getMachineFunction().getFunction().getCallingConv() ==
1892 CallingConv::GHC)
1893 report_fatal_error("In GHC calling convention TLS is not supported");
1895 bool Large = DAG.getTarget().getCodeModel() == CodeModel::Large;
1896 assert((!Large || Subtarget.is64Bit()) && "Large code model requires LA64");
1898 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
1899 assert(N->getOffset() == 0 && "unexpected offset in global node");
1901 if (DAG.getTarget().useEmulatedTLS())
1902 report_fatal_error("the emulated TLS is prohibited",
1903 /*GenCrashDiag=*/false);
1905 bool IsDesc = DAG.getTarget().useTLSDESC();
1907 switch (getTargetMachine().getTLSModel(N->getGlobal())) {
1908 case TLSModel::GeneralDynamic:
1909 // In this model, application code calls the dynamic linker function
1910 // __tls_get_addr to locate TLS offsets into the dynamic thread vector at
1911 // runtime.
1912 if (!IsDesc)
1913 return getDynamicTLSAddr(N, DAG,
1914 Large ? LoongArch::PseudoLA_TLS_GD_LARGE
1915 : LoongArch::PseudoLA_TLS_GD,
1916 Large);
1917 break;
1918 case TLSModel::LocalDynamic:
1919 // Same as GeneralDynamic, except for assembly modifiers and relocation
1920 // records.
1921 if (!IsDesc)
1922 return getDynamicTLSAddr(N, DAG,
1923 Large ? LoongArch::PseudoLA_TLS_LD_LARGE
1924 : LoongArch::PseudoLA_TLS_LD,
1925 Large);
1926 break;
1927 case TLSModel::InitialExec:
1928 // This model uses the GOT to resolve TLS offsets.
1929 return getStaticTLSAddr(N, DAG,
1930 Large ? LoongArch::PseudoLA_TLS_IE_LARGE
1931 : LoongArch::PseudoLA_TLS_IE,
1932 /*UseGOT=*/true, Large);
1933 case TLSModel::LocalExec:
1934 // This model is used when static linking as the TLS offsets are resolved
1935 // during program linking.
1937 // This node doesn't need an extra argument for the large code model.
1938 return getStaticTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_LE,
1939 /*UseGOT=*/false);
1942 return getTLSDescAddr(N, DAG,
1943 Large ? LoongArch::PseudoLA_TLS_DESC_LARGE
1944 : LoongArch::PseudoLA_TLS_DESC,
1945 Large);
1948 template <unsigned N>
1949 static SDValue checkIntrinsicImmArg(SDValue Op, unsigned ImmOp,
1950 SelectionDAG &DAG, bool IsSigned = false) {
1951 auto *CImm = cast<ConstantSDNode>(Op->getOperand(ImmOp));
1952 // Check the ImmArg.
1953 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
1954 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
1955 DAG.getContext()->emitError(Op->getOperationName(0) +
1956 ": argument out of range.");
1957 return DAG.getNode(ISD::UNDEF, SDLoc(Op), Op.getValueType());
1959 return SDValue();
1962 SDValue
1963 LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
1964 SelectionDAG &DAG) const {
1965 SDLoc DL(Op);
1966 switch (Op.getConstantOperandVal(0)) {
1967 default:
1968 return SDValue(); // Don't custom lower most intrinsics.
1969 case Intrinsic::thread_pointer: {
1970 EVT PtrVT = getPointerTy(DAG.getDataLayout());
1971 return DAG.getRegister(LoongArch::R2, PtrVT);
1973 case Intrinsic::loongarch_lsx_vpickve2gr_d:
1974 case Intrinsic::loongarch_lsx_vpickve2gr_du:
1975 case Intrinsic::loongarch_lsx_vreplvei_d:
1976 case Intrinsic::loongarch_lasx_xvrepl128vei_d:
1977 return checkIntrinsicImmArg<1>(Op, 2, DAG);
1978 case Intrinsic::loongarch_lsx_vreplvei_w:
1979 case Intrinsic::loongarch_lasx_xvrepl128vei_w:
1980 case Intrinsic::loongarch_lasx_xvpickve2gr_d:
1981 case Intrinsic::loongarch_lasx_xvpickve2gr_du:
1982 case Intrinsic::loongarch_lasx_xvpickve_d:
1983 case Intrinsic::loongarch_lasx_xvpickve_d_f:
1984 return checkIntrinsicImmArg<2>(Op, 2, DAG);
1985 case Intrinsic::loongarch_lasx_xvinsve0_d:
1986 return checkIntrinsicImmArg<2>(Op, 3, DAG);
1987 case Intrinsic::loongarch_lsx_vsat_b:
1988 case Intrinsic::loongarch_lsx_vsat_bu:
1989 case Intrinsic::loongarch_lsx_vrotri_b:
1990 case Intrinsic::loongarch_lsx_vsllwil_h_b:
1991 case Intrinsic::loongarch_lsx_vsllwil_hu_bu:
1992 case Intrinsic::loongarch_lsx_vsrlri_b:
1993 case Intrinsic::loongarch_lsx_vsrari_b:
1994 case Intrinsic::loongarch_lsx_vreplvei_h:
1995 case Intrinsic::loongarch_lasx_xvsat_b:
1996 case Intrinsic::loongarch_lasx_xvsat_bu:
1997 case Intrinsic::loongarch_lasx_xvrotri_b:
1998 case Intrinsic::loongarch_lasx_xvsllwil_h_b:
1999 case Intrinsic::loongarch_lasx_xvsllwil_hu_bu:
2000 case Intrinsic::loongarch_lasx_xvsrlri_b:
2001 case Intrinsic::loongarch_lasx_xvsrari_b:
2002 case Intrinsic::loongarch_lasx_xvrepl128vei_h:
2003 case Intrinsic::loongarch_lasx_xvpickve_w:
2004 case Intrinsic::loongarch_lasx_xvpickve_w_f:
2005 return checkIntrinsicImmArg<3>(Op, 2, DAG);
2006 case Intrinsic::loongarch_lasx_xvinsve0_w:
2007 return checkIntrinsicImmArg<3>(Op, 3, DAG);
2008 case Intrinsic::loongarch_lsx_vsat_h:
2009 case Intrinsic::loongarch_lsx_vsat_hu:
2010 case Intrinsic::loongarch_lsx_vrotri_h:
2011 case Intrinsic::loongarch_lsx_vsllwil_w_h:
2012 case Intrinsic::loongarch_lsx_vsllwil_wu_hu:
2013 case Intrinsic::loongarch_lsx_vsrlri_h:
2014 case Intrinsic::loongarch_lsx_vsrari_h:
2015 case Intrinsic::loongarch_lsx_vreplvei_b:
2016 case Intrinsic::loongarch_lasx_xvsat_h:
2017 case Intrinsic::loongarch_lasx_xvsat_hu:
2018 case Intrinsic::loongarch_lasx_xvrotri_h:
2019 case Intrinsic::loongarch_lasx_xvsllwil_w_h:
2020 case Intrinsic::loongarch_lasx_xvsllwil_wu_hu:
2021 case Intrinsic::loongarch_lasx_xvsrlri_h:
2022 case Intrinsic::loongarch_lasx_xvsrari_h:
2023 case Intrinsic::loongarch_lasx_xvrepl128vei_b:
2024 return checkIntrinsicImmArg<4>(Op, 2, DAG);
2025 case Intrinsic::loongarch_lsx_vsrlni_b_h:
2026 case Intrinsic::loongarch_lsx_vsrani_b_h:
2027 case Intrinsic::loongarch_lsx_vsrlrni_b_h:
2028 case Intrinsic::loongarch_lsx_vsrarni_b_h:
2029 case Intrinsic::loongarch_lsx_vssrlni_b_h:
2030 case Intrinsic::loongarch_lsx_vssrani_b_h:
2031 case Intrinsic::loongarch_lsx_vssrlni_bu_h:
2032 case Intrinsic::loongarch_lsx_vssrani_bu_h:
2033 case Intrinsic::loongarch_lsx_vssrlrni_b_h:
2034 case Intrinsic::loongarch_lsx_vssrarni_b_h:
2035 case Intrinsic::loongarch_lsx_vssrlrni_bu_h:
2036 case Intrinsic::loongarch_lsx_vssrarni_bu_h:
2037 case Intrinsic::loongarch_lasx_xvsrlni_b_h:
2038 case Intrinsic::loongarch_lasx_xvsrani_b_h:
2039 case Intrinsic::loongarch_lasx_xvsrlrni_b_h:
2040 case Intrinsic::loongarch_lasx_xvsrarni_b_h:
2041 case Intrinsic::loongarch_lasx_xvssrlni_b_h:
2042 case Intrinsic::loongarch_lasx_xvssrani_b_h:
2043 case Intrinsic::loongarch_lasx_xvssrlni_bu_h:
2044 case Intrinsic::loongarch_lasx_xvssrani_bu_h:
2045 case Intrinsic::loongarch_lasx_xvssrlrni_b_h:
2046 case Intrinsic::loongarch_lasx_xvssrarni_b_h:
2047 case Intrinsic::loongarch_lasx_xvssrlrni_bu_h:
2048 case Intrinsic::loongarch_lasx_xvssrarni_bu_h:
2049 return checkIntrinsicImmArg<4>(Op, 3, DAG);
2050 case Intrinsic::loongarch_lsx_vsat_w:
2051 case Intrinsic::loongarch_lsx_vsat_wu:
2052 case Intrinsic::loongarch_lsx_vrotri_w:
2053 case Intrinsic::loongarch_lsx_vsllwil_d_w:
2054 case Intrinsic::loongarch_lsx_vsllwil_du_wu:
2055 case Intrinsic::loongarch_lsx_vsrlri_w:
2056 case Intrinsic::loongarch_lsx_vsrari_w:
2057 case Intrinsic::loongarch_lsx_vslei_bu:
2058 case Intrinsic::loongarch_lsx_vslei_hu:
2059 case Intrinsic::loongarch_lsx_vslei_wu:
2060 case Intrinsic::loongarch_lsx_vslei_du:
2061 case Intrinsic::loongarch_lsx_vslti_bu:
2062 case Intrinsic::loongarch_lsx_vslti_hu:
2063 case Intrinsic::loongarch_lsx_vslti_wu:
2064 case Intrinsic::loongarch_lsx_vslti_du:
2065 case Intrinsic::loongarch_lsx_vbsll_v:
2066 case Intrinsic::loongarch_lsx_vbsrl_v:
2067 case Intrinsic::loongarch_lasx_xvsat_w:
2068 case Intrinsic::loongarch_lasx_xvsat_wu:
2069 case Intrinsic::loongarch_lasx_xvrotri_w:
2070 case Intrinsic::loongarch_lasx_xvsllwil_d_w:
2071 case Intrinsic::loongarch_lasx_xvsllwil_du_wu:
2072 case Intrinsic::loongarch_lasx_xvsrlri_w:
2073 case Intrinsic::loongarch_lasx_xvsrari_w:
2074 case Intrinsic::loongarch_lasx_xvslei_bu:
2075 case Intrinsic::loongarch_lasx_xvslei_hu:
2076 case Intrinsic::loongarch_lasx_xvslei_wu:
2077 case Intrinsic::loongarch_lasx_xvslei_du:
2078 case Intrinsic::loongarch_lasx_xvslti_bu:
2079 case Intrinsic::loongarch_lasx_xvslti_hu:
2080 case Intrinsic::loongarch_lasx_xvslti_wu:
2081 case Intrinsic::loongarch_lasx_xvslti_du:
2082 case Intrinsic::loongarch_lasx_xvbsll_v:
2083 case Intrinsic::loongarch_lasx_xvbsrl_v:
2084 return checkIntrinsicImmArg<5>(Op, 2, DAG);
2085 case Intrinsic::loongarch_lsx_vseqi_b:
2086 case Intrinsic::loongarch_lsx_vseqi_h:
2087 case Intrinsic::loongarch_lsx_vseqi_w:
2088 case Intrinsic::loongarch_lsx_vseqi_d:
2089 case Intrinsic::loongarch_lsx_vslei_b:
2090 case Intrinsic::loongarch_lsx_vslei_h:
2091 case Intrinsic::loongarch_lsx_vslei_w:
2092 case Intrinsic::loongarch_lsx_vslei_d:
2093 case Intrinsic::loongarch_lsx_vslti_b:
2094 case Intrinsic::loongarch_lsx_vslti_h:
2095 case Intrinsic::loongarch_lsx_vslti_w:
2096 case Intrinsic::loongarch_lsx_vslti_d:
2097 case Intrinsic::loongarch_lasx_xvseqi_b:
2098 case Intrinsic::loongarch_lasx_xvseqi_h:
2099 case Intrinsic::loongarch_lasx_xvseqi_w:
2100 case Intrinsic::loongarch_lasx_xvseqi_d:
2101 case Intrinsic::loongarch_lasx_xvslei_b:
2102 case Intrinsic::loongarch_lasx_xvslei_h:
2103 case Intrinsic::loongarch_lasx_xvslei_w:
2104 case Intrinsic::loongarch_lasx_xvslei_d:
2105 case Intrinsic::loongarch_lasx_xvslti_b:
2106 case Intrinsic::loongarch_lasx_xvslti_h:
2107 case Intrinsic::loongarch_lasx_xvslti_w:
2108 case Intrinsic::loongarch_lasx_xvslti_d:
2109 return checkIntrinsicImmArg<5>(Op, 2, DAG, /*IsSigned=*/true);
2110 case Intrinsic::loongarch_lsx_vsrlni_h_w:
2111 case Intrinsic::loongarch_lsx_vsrani_h_w:
2112 case Intrinsic::loongarch_lsx_vsrlrni_h_w:
2113 case Intrinsic::loongarch_lsx_vsrarni_h_w:
2114 case Intrinsic::loongarch_lsx_vssrlni_h_w:
2115 case Intrinsic::loongarch_lsx_vssrani_h_w:
2116 case Intrinsic::loongarch_lsx_vssrlni_hu_w:
2117 case Intrinsic::loongarch_lsx_vssrani_hu_w:
2118 case Intrinsic::loongarch_lsx_vssrlrni_h_w:
2119 case Intrinsic::loongarch_lsx_vssrarni_h_w:
2120 case Intrinsic::loongarch_lsx_vssrlrni_hu_w:
2121 case Intrinsic::loongarch_lsx_vssrarni_hu_w:
2122 case Intrinsic::loongarch_lsx_vfrstpi_b:
2123 case Intrinsic::loongarch_lsx_vfrstpi_h:
2124 case Intrinsic::loongarch_lasx_xvsrlni_h_w:
2125 case Intrinsic::loongarch_lasx_xvsrani_h_w:
2126 case Intrinsic::loongarch_lasx_xvsrlrni_h_w:
2127 case Intrinsic::loongarch_lasx_xvsrarni_h_w:
2128 case Intrinsic::loongarch_lasx_xvssrlni_h_w:
2129 case Intrinsic::loongarch_lasx_xvssrani_h_w:
2130 case Intrinsic::loongarch_lasx_xvssrlni_hu_w:
2131 case Intrinsic::loongarch_lasx_xvssrani_hu_w:
2132 case Intrinsic::loongarch_lasx_xvssrlrni_h_w:
2133 case Intrinsic::loongarch_lasx_xvssrarni_h_w:
2134 case Intrinsic::loongarch_lasx_xvssrlrni_hu_w:
2135 case Intrinsic::loongarch_lasx_xvssrarni_hu_w:
2136 case Intrinsic::loongarch_lasx_xvfrstpi_b:
2137 case Intrinsic::loongarch_lasx_xvfrstpi_h:
2138 return checkIntrinsicImmArg<5>(Op, 3, DAG);
2139 case Intrinsic::loongarch_lsx_vsat_d:
2140 case Intrinsic::loongarch_lsx_vsat_du:
2141 case Intrinsic::loongarch_lsx_vrotri_d:
2142 case Intrinsic::loongarch_lsx_vsrlri_d:
2143 case Intrinsic::loongarch_lsx_vsrari_d:
2144 case Intrinsic::loongarch_lasx_xvsat_d:
2145 case Intrinsic::loongarch_lasx_xvsat_du:
2146 case Intrinsic::loongarch_lasx_xvrotri_d:
2147 case Intrinsic::loongarch_lasx_xvsrlri_d:
2148 case Intrinsic::loongarch_lasx_xvsrari_d:
2149 return checkIntrinsicImmArg<6>(Op, 2, DAG);
2150 case Intrinsic::loongarch_lsx_vsrlni_w_d:
2151 case Intrinsic::loongarch_lsx_vsrani_w_d:
2152 case Intrinsic::loongarch_lsx_vsrlrni_w_d:
2153 case Intrinsic::loongarch_lsx_vsrarni_w_d:
2154 case Intrinsic::loongarch_lsx_vssrlni_w_d:
2155 case Intrinsic::loongarch_lsx_vssrani_w_d:
2156 case Intrinsic::loongarch_lsx_vssrlni_wu_d:
2157 case Intrinsic::loongarch_lsx_vssrani_wu_d:
2158 case Intrinsic::loongarch_lsx_vssrlrni_w_d:
2159 case Intrinsic::loongarch_lsx_vssrarni_w_d:
2160 case Intrinsic::loongarch_lsx_vssrlrni_wu_d:
2161 case Intrinsic::loongarch_lsx_vssrarni_wu_d:
2162 case Intrinsic::loongarch_lasx_xvsrlni_w_d:
2163 case Intrinsic::loongarch_lasx_xvsrani_w_d:
2164 case Intrinsic::loongarch_lasx_xvsrlrni_w_d:
2165 case Intrinsic::loongarch_lasx_xvsrarni_w_d:
2166 case Intrinsic::loongarch_lasx_xvssrlni_w_d:
2167 case Intrinsic::loongarch_lasx_xvssrani_w_d:
2168 case Intrinsic::loongarch_lasx_xvssrlni_wu_d:
2169 case Intrinsic::loongarch_lasx_xvssrani_wu_d:
2170 case Intrinsic::loongarch_lasx_xvssrlrni_w_d:
2171 case Intrinsic::loongarch_lasx_xvssrarni_w_d:
2172 case Intrinsic::loongarch_lasx_xvssrlrni_wu_d:
2173 case Intrinsic::loongarch_lasx_xvssrarni_wu_d:
2174 return checkIntrinsicImmArg<6>(Op, 3, DAG);
2175 case Intrinsic::loongarch_lsx_vsrlni_d_q:
2176 case Intrinsic::loongarch_lsx_vsrani_d_q:
2177 case Intrinsic::loongarch_lsx_vsrlrni_d_q:
2178 case Intrinsic::loongarch_lsx_vsrarni_d_q:
2179 case Intrinsic::loongarch_lsx_vssrlni_d_q:
2180 case Intrinsic::loongarch_lsx_vssrani_d_q:
2181 case Intrinsic::loongarch_lsx_vssrlni_du_q:
2182 case Intrinsic::loongarch_lsx_vssrani_du_q:
2183 case Intrinsic::loongarch_lsx_vssrlrni_d_q:
2184 case Intrinsic::loongarch_lsx_vssrarni_d_q:
2185 case Intrinsic::loongarch_lsx_vssrlrni_du_q:
2186 case Intrinsic::loongarch_lsx_vssrarni_du_q:
2187 case Intrinsic::loongarch_lasx_xvsrlni_d_q:
2188 case Intrinsic::loongarch_lasx_xvsrani_d_q:
2189 case Intrinsic::loongarch_lasx_xvsrlrni_d_q:
2190 case Intrinsic::loongarch_lasx_xvsrarni_d_q:
2191 case Intrinsic::loongarch_lasx_xvssrlni_d_q:
2192 case Intrinsic::loongarch_lasx_xvssrani_d_q:
2193 case Intrinsic::loongarch_lasx_xvssrlni_du_q:
2194 case Intrinsic::loongarch_lasx_xvssrani_du_q:
2195 case Intrinsic::loongarch_lasx_xvssrlrni_d_q:
2196 case Intrinsic::loongarch_lasx_xvssrarni_d_q:
2197 case Intrinsic::loongarch_lasx_xvssrlrni_du_q:
2198 case Intrinsic::loongarch_lasx_xvssrarni_du_q:
2199 return checkIntrinsicImmArg<7>(Op, 3, DAG);
2200 case Intrinsic::loongarch_lsx_vnori_b:
2201 case Intrinsic::loongarch_lsx_vshuf4i_b:
2202 case Intrinsic::loongarch_lsx_vshuf4i_h:
2203 case Intrinsic::loongarch_lsx_vshuf4i_w:
2204 case Intrinsic::loongarch_lasx_xvnori_b:
2205 case Intrinsic::loongarch_lasx_xvshuf4i_b:
2206 case Intrinsic::loongarch_lasx_xvshuf4i_h:
2207 case Intrinsic::loongarch_lasx_xvshuf4i_w:
2208 case Intrinsic::loongarch_lasx_xvpermi_d:
2209 return checkIntrinsicImmArg<8>(Op, 2, DAG);
2210 case Intrinsic::loongarch_lsx_vshuf4i_d:
2211 case Intrinsic::loongarch_lsx_vpermi_w:
2212 case Intrinsic::loongarch_lsx_vbitseli_b:
2213 case Intrinsic::loongarch_lsx_vextrins_b:
2214 case Intrinsic::loongarch_lsx_vextrins_h:
2215 case Intrinsic::loongarch_lsx_vextrins_w:
2216 case Intrinsic::loongarch_lsx_vextrins_d:
2217 case Intrinsic::loongarch_lasx_xvshuf4i_d:
2218 case Intrinsic::loongarch_lasx_xvpermi_w:
2219 case Intrinsic::loongarch_lasx_xvpermi_q:
2220 case Intrinsic::loongarch_lasx_xvbitseli_b:
2221 case Intrinsic::loongarch_lasx_xvextrins_b:
2222 case Intrinsic::loongarch_lasx_xvextrins_h:
2223 case Intrinsic::loongarch_lasx_xvextrins_w:
2224 case Intrinsic::loongarch_lasx_xvextrins_d:
2225 return checkIntrinsicImmArg<8>(Op, 3, DAG);
2226 case Intrinsic::loongarch_lsx_vrepli_b:
2227 case Intrinsic::loongarch_lsx_vrepli_h:
2228 case Intrinsic::loongarch_lsx_vrepli_w:
2229 case Intrinsic::loongarch_lsx_vrepli_d:
2230 case Intrinsic::loongarch_lasx_xvrepli_b:
2231 case Intrinsic::loongarch_lasx_xvrepli_h:
2232 case Intrinsic::loongarch_lasx_xvrepli_w:
2233 case Intrinsic::loongarch_lasx_xvrepli_d:
2234 return checkIntrinsicImmArg<10>(Op, 1, DAG, /*IsSigned=*/true);
2235 case Intrinsic::loongarch_lsx_vldi:
2236 case Intrinsic::loongarch_lasx_xvldi:
2237 return checkIntrinsicImmArg<13>(Op, 1, DAG, /*IsSigned=*/true);
2241 // Helper function that emits error message for intrinsics with chain and return
2242 // merge values of a UNDEF and the chain.
2243 static SDValue emitIntrinsicWithChainErrorMessage(SDValue Op,
2244 StringRef ErrorMsg,
2245 SelectionDAG &DAG) {
2246 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
2247 return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op.getOperand(0)},
2248 SDLoc(Op));
2251 SDValue
2252 LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
2253 SelectionDAG &DAG) const {
2254 SDLoc DL(Op);
2255 MVT GRLenVT = Subtarget.getGRLenVT();
2256 EVT VT = Op.getValueType();
2257 SDValue Chain = Op.getOperand(0);
2258 const StringRef ErrorMsgOOR = "argument out of range";
2259 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
2260 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
2262 switch (Op.getConstantOperandVal(1)) {
2263 default:
2264 return Op;
2265 case Intrinsic::loongarch_crc_w_b_w:
2266 case Intrinsic::loongarch_crc_w_h_w:
2267 case Intrinsic::loongarch_crc_w_w_w:
2268 case Intrinsic::loongarch_crc_w_d_w:
2269 case Intrinsic::loongarch_crcc_w_b_w:
2270 case Intrinsic::loongarch_crcc_w_h_w:
2271 case Intrinsic::loongarch_crcc_w_w_w:
2272 case Intrinsic::loongarch_crcc_w_d_w:
2273 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqLA64, DAG);
2274 case Intrinsic::loongarch_csrrd_w:
2275 case Intrinsic::loongarch_csrrd_d: {
2276 unsigned Imm = Op.getConstantOperandVal(2);
2277 return !isUInt<14>(Imm)
2278 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
2279 : DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
2280 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
2282 case Intrinsic::loongarch_csrwr_w:
2283 case Intrinsic::loongarch_csrwr_d: {
2284 unsigned Imm = Op.getConstantOperandVal(3);
2285 return !isUInt<14>(Imm)
2286 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
2287 : DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
2288 {Chain, Op.getOperand(2),
2289 DAG.getConstant(Imm, DL, GRLenVT)});
2291 case Intrinsic::loongarch_csrxchg_w:
2292 case Intrinsic::loongarch_csrxchg_d: {
2293 unsigned Imm = Op.getConstantOperandVal(4);
2294 return !isUInt<14>(Imm)
2295 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
2296 : DAG.getNode(LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
2297 {Chain, Op.getOperand(2), Op.getOperand(3),
2298 DAG.getConstant(Imm, DL, GRLenVT)});
2300 case Intrinsic::loongarch_iocsrrd_d: {
2301 return DAG.getNode(
2302 LoongArchISD::IOCSRRD_D, DL, {GRLenVT, MVT::Other},
2303 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2))});
2305 #define IOCSRRD_CASE(NAME, NODE) \
2306 case Intrinsic::loongarch_##NAME: { \
2307 return DAG.getNode(LoongArchISD::NODE, DL, {GRLenVT, MVT::Other}, \
2308 {Chain, Op.getOperand(2)}); \
2310 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
2311 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
2312 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
2313 #undef IOCSRRD_CASE
2314 case Intrinsic::loongarch_cpucfg: {
2315 return DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
2316 {Chain, Op.getOperand(2)});
2318 case Intrinsic::loongarch_lddir_d: {
2319 unsigned Imm = Op.getConstantOperandVal(3);
2320 return !isUInt<8>(Imm)
2321 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
2322 : Op;
2324 case Intrinsic::loongarch_movfcsr2gr: {
2325 if (!Subtarget.hasBasicF())
2326 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqF, DAG);
2327 unsigned Imm = Op.getConstantOperandVal(2);
2328 return !isUInt<2>(Imm)
2329 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
2330 : DAG.getNode(LoongArchISD::MOVFCSR2GR, DL, {VT, MVT::Other},
2331 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
2333 case Intrinsic::loongarch_lsx_vld:
2334 case Intrinsic::loongarch_lsx_vldrepl_b:
2335 case Intrinsic::loongarch_lasx_xvld:
2336 case Intrinsic::loongarch_lasx_xvldrepl_b:
2337 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
2338 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
2339 : SDValue();
2340 case Intrinsic::loongarch_lsx_vldrepl_h:
2341 case Intrinsic::loongarch_lasx_xvldrepl_h:
2342 return !isShiftedInt<11, 1>(
2343 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
2344 ? emitIntrinsicWithChainErrorMessage(
2345 Op, "argument out of range or not a multiple of 2", DAG)
2346 : SDValue();
2347 case Intrinsic::loongarch_lsx_vldrepl_w:
2348 case Intrinsic::loongarch_lasx_xvldrepl_w:
2349 return !isShiftedInt<10, 2>(
2350 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
2351 ? emitIntrinsicWithChainErrorMessage(
2352 Op, "argument out of range or not a multiple of 4", DAG)
2353 : SDValue();
2354 case Intrinsic::loongarch_lsx_vldrepl_d:
2355 case Intrinsic::loongarch_lasx_xvldrepl_d:
2356 return !isShiftedInt<9, 3>(
2357 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
2358 ? emitIntrinsicWithChainErrorMessage(
2359 Op, "argument out of range or not a multiple of 8", DAG)
2360 : SDValue();
2364 // Helper function that emits error message for intrinsics with void return
2365 // value and return the chain.
2366 static SDValue emitIntrinsicErrorMessage(SDValue Op, StringRef ErrorMsg,
2367 SelectionDAG &DAG) {
2369 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
2370 return Op.getOperand(0);
2373 SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op,
2374 SelectionDAG &DAG) const {
2375 SDLoc DL(Op);
2376 MVT GRLenVT = Subtarget.getGRLenVT();
2377 SDValue Chain = Op.getOperand(0);
2378 uint64_t IntrinsicEnum = Op.getConstantOperandVal(1);
2379 SDValue Op2 = Op.getOperand(2);
2380 const StringRef ErrorMsgOOR = "argument out of range";
2381 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
2382 const StringRef ErrorMsgReqLA32 = "requires loongarch32";
2383 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
2385 switch (IntrinsicEnum) {
2386 default:
2387 // TODO: Add more Intrinsics.
2388 return SDValue();
2389 case Intrinsic::loongarch_cacop_d:
2390 case Intrinsic::loongarch_cacop_w: {
2391 if (IntrinsicEnum == Intrinsic::loongarch_cacop_d && !Subtarget.is64Bit())
2392 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG);
2393 if (IntrinsicEnum == Intrinsic::loongarch_cacop_w && Subtarget.is64Bit())
2394 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA32, DAG);
2395 // call void @llvm.loongarch.cacop.[d/w](uimm5, rj, simm12)
2396 unsigned Imm1 = Op2->getAsZExtVal();
2397 int Imm2 = cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue();
2398 if (!isUInt<5>(Imm1) || !isInt<12>(Imm2))
2399 return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
2400 return Op;
2402 case Intrinsic::loongarch_dbar: {
2403 unsigned Imm = Op2->getAsZExtVal();
2404 return !isUInt<15>(Imm)
2405 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2406 : DAG.getNode(LoongArchISD::DBAR, DL, MVT::Other, Chain,
2407 DAG.getConstant(Imm, DL, GRLenVT));
2409 case Intrinsic::loongarch_ibar: {
2410 unsigned Imm = Op2->getAsZExtVal();
2411 return !isUInt<15>(Imm)
2412 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2413 : DAG.getNode(LoongArchISD::IBAR, DL, MVT::Other, Chain,
2414 DAG.getConstant(Imm, DL, GRLenVT));
2416 case Intrinsic::loongarch_break: {
2417 unsigned Imm = Op2->getAsZExtVal();
2418 return !isUInt<15>(Imm)
2419 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2420 : DAG.getNode(LoongArchISD::BREAK, DL, MVT::Other, Chain,
2421 DAG.getConstant(Imm, DL, GRLenVT));
2423 case Intrinsic::loongarch_movgr2fcsr: {
2424 if (!Subtarget.hasBasicF())
2425 return emitIntrinsicErrorMessage(Op, ErrorMsgReqF, DAG);
2426 unsigned Imm = Op2->getAsZExtVal();
2427 return !isUInt<2>(Imm)
2428 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2429 : DAG.getNode(LoongArchISD::MOVGR2FCSR, DL, MVT::Other, Chain,
2430 DAG.getConstant(Imm, DL, GRLenVT),
2431 DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT,
2432 Op.getOperand(3)));
2434 case Intrinsic::loongarch_syscall: {
2435 unsigned Imm = Op2->getAsZExtVal();
2436 return !isUInt<15>(Imm)
2437 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2438 : DAG.getNode(LoongArchISD::SYSCALL, DL, MVT::Other, Chain,
2439 DAG.getConstant(Imm, DL, GRLenVT));
2441 #define IOCSRWR_CASE(NAME, NODE) \
2442 case Intrinsic::loongarch_##NAME: { \
2443 SDValue Op3 = Op.getOperand(3); \
2444 return Subtarget.is64Bit() \
2445 ? DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, \
2446 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
2447 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op3)) \
2448 : DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, Op2, \
2449 Op3); \
2451 IOCSRWR_CASE(iocsrwr_b, IOCSRWR_B);
2452 IOCSRWR_CASE(iocsrwr_h, IOCSRWR_H);
2453 IOCSRWR_CASE(iocsrwr_w, IOCSRWR_W);
2454 #undef IOCSRWR_CASE
2455 case Intrinsic::loongarch_iocsrwr_d: {
2456 return !Subtarget.is64Bit()
2457 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
2458 : DAG.getNode(LoongArchISD::IOCSRWR_D, DL, MVT::Other, Chain,
2459 Op2,
2460 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
2461 Op.getOperand(3)));
2463 #define ASRT_LE_GT_CASE(NAME) \
2464 case Intrinsic::loongarch_##NAME: { \
2465 return !Subtarget.is64Bit() \
2466 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG) \
2467 : Op; \
2469 ASRT_LE_GT_CASE(asrtle_d)
2470 ASRT_LE_GT_CASE(asrtgt_d)
2471 #undef ASRT_LE_GT_CASE
2472 case Intrinsic::loongarch_ldpte_d: {
2473 unsigned Imm = Op.getConstantOperandVal(3);
2474 return !Subtarget.is64Bit()
2475 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
2476 : !isUInt<8>(Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2477 : Op;
2479 case Intrinsic::loongarch_lsx_vst:
2480 case Intrinsic::loongarch_lasx_xvst:
2481 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue())
2482 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2483 : SDValue();
2484 case Intrinsic::loongarch_lasx_xvstelm_b:
2485 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2486 !isUInt<5>(Op.getConstantOperandVal(5)))
2487 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2488 : SDValue();
2489 case Intrinsic::loongarch_lsx_vstelm_b:
2490 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2491 !isUInt<4>(Op.getConstantOperandVal(5)))
2492 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2493 : SDValue();
2494 case Intrinsic::loongarch_lasx_xvstelm_h:
2495 return (!isShiftedInt<8, 1>(
2496 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2497 !isUInt<4>(Op.getConstantOperandVal(5)))
2498 ? emitIntrinsicErrorMessage(
2499 Op, "argument out of range or not a multiple of 2", DAG)
2500 : SDValue();
2501 case Intrinsic::loongarch_lsx_vstelm_h:
2502 return (!isShiftedInt<8, 1>(
2503 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2504 !isUInt<3>(Op.getConstantOperandVal(5)))
2505 ? emitIntrinsicErrorMessage(
2506 Op, "argument out of range or not a multiple of 2", DAG)
2507 : SDValue();
2508 case Intrinsic::loongarch_lasx_xvstelm_w:
2509 return (!isShiftedInt<8, 2>(
2510 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2511 !isUInt<3>(Op.getConstantOperandVal(5)))
2512 ? emitIntrinsicErrorMessage(
2513 Op, "argument out of range or not a multiple of 4", DAG)
2514 : SDValue();
2515 case Intrinsic::loongarch_lsx_vstelm_w:
2516 return (!isShiftedInt<8, 2>(
2517 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2518 !isUInt<2>(Op.getConstantOperandVal(5)))
2519 ? emitIntrinsicErrorMessage(
2520 Op, "argument out of range or not a multiple of 4", DAG)
2521 : SDValue();
2522 case Intrinsic::loongarch_lasx_xvstelm_d:
2523 return (!isShiftedInt<8, 3>(
2524 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2525 !isUInt<2>(Op.getConstantOperandVal(5)))
2526 ? emitIntrinsicErrorMessage(
2527 Op, "argument out of range or not a multiple of 8", DAG)
2528 : SDValue();
2529 case Intrinsic::loongarch_lsx_vstelm_d:
2530 return (!isShiftedInt<8, 3>(
2531 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2532 !isUInt<1>(Op.getConstantOperandVal(5)))
2533 ? emitIntrinsicErrorMessage(
2534 Op, "argument out of range or not a multiple of 8", DAG)
2535 : SDValue();
2539 SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op,
2540 SelectionDAG &DAG) const {
2541 SDLoc DL(Op);
2542 SDValue Lo = Op.getOperand(0);
2543 SDValue Hi = Op.getOperand(1);
2544 SDValue Shamt = Op.getOperand(2);
2545 EVT VT = Lo.getValueType();
2547 // if Shamt-GRLen < 0: // Shamt < GRLen
2548 // Lo = Lo << Shamt
2549 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (GRLen-1 ^ Shamt))
2550 // else:
2551 // Lo = 0
2552 // Hi = Lo << (Shamt-GRLen)
2554 SDValue Zero = DAG.getConstant(0, DL, VT);
2555 SDValue One = DAG.getConstant(1, DL, VT);
2556 SDValue MinusGRLen =
2557 DAG.getSignedConstant(-(int)Subtarget.getGRLen(), DL, VT);
2558 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
2559 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
2560 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
2562 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
2563 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
2564 SDValue ShiftRightLo =
2565 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, GRLenMinus1Shamt);
2566 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
2567 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
2568 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusGRLen);
2570 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
2572 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
2573 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
2575 SDValue Parts[2] = {Lo, Hi};
2576 return DAG.getMergeValues(Parts, DL);
2579 SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op,
2580 SelectionDAG &DAG,
2581 bool IsSRA) const {
2582 SDLoc DL(Op);
2583 SDValue Lo = Op.getOperand(0);
2584 SDValue Hi = Op.getOperand(1);
2585 SDValue Shamt = Op.getOperand(2);
2586 EVT VT = Lo.getValueType();
2588 // SRA expansion:
2589 // if Shamt-GRLen < 0: // Shamt < GRLen
2590 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
2591 // Hi = Hi >>s Shamt
2592 // else:
2593 // Lo = Hi >>s (Shamt-GRLen);
2594 // Hi = Hi >>s (GRLen-1)
2596 // SRL expansion:
2597 // if Shamt-GRLen < 0: // Shamt < GRLen
2598 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
2599 // Hi = Hi >>u Shamt
2600 // else:
2601 // Lo = Hi >>u (Shamt-GRLen);
2602 // Hi = 0;
2604 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
2606 SDValue Zero = DAG.getConstant(0, DL, VT);
2607 SDValue One = DAG.getConstant(1, DL, VT);
2608 SDValue MinusGRLen =
2609 DAG.getSignedConstant(-(int)Subtarget.getGRLen(), DL, VT);
2610 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
2611 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
2612 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
2614 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
2615 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
2616 SDValue ShiftLeftHi =
2617 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, GRLenMinus1Shamt);
2618 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
2619 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
2620 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusGRLen);
2621 SDValue HiFalse =
2622 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, GRLenMinus1) : Zero;
2624 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
2626 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
2627 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
2629 SDValue Parts[2] = {Lo, Hi};
2630 return DAG.getMergeValues(Parts, DL);
2633 // Returns the opcode of the target-specific SDNode that implements the 32-bit
2634 // form of the given Opcode.
2635 static LoongArchISD::NodeType getLoongArchWOpcode(unsigned Opcode) {
2636 switch (Opcode) {
2637 default:
2638 llvm_unreachable("Unexpected opcode");
2639 case ISD::SDIV:
2640 return LoongArchISD::DIV_W;
2641 case ISD::UDIV:
2642 return LoongArchISD::DIV_WU;
2643 case ISD::SREM:
2644 return LoongArchISD::MOD_W;
2645 case ISD::UREM:
2646 return LoongArchISD::MOD_WU;
2647 case ISD::SHL:
2648 return LoongArchISD::SLL_W;
2649 case ISD::SRA:
2650 return LoongArchISD::SRA_W;
2651 case ISD::SRL:
2652 return LoongArchISD::SRL_W;
2653 case ISD::ROTL:
2654 case ISD::ROTR:
2655 return LoongArchISD::ROTR_W;
2656 case ISD::CTTZ:
2657 return LoongArchISD::CTZ_W;
2658 case ISD::CTLZ:
2659 return LoongArchISD::CLZ_W;
2663 // Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
2664 // node. Because i8/i16/i32 isn't a legal type for LA64, these operations would
2665 // otherwise be promoted to i64, making it difficult to select the
2666 // SLL_W/.../*W later one because the fact the operation was originally of
2667 // type i8/i16/i32 is lost.
2668 static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp,
2669 unsigned ExtOpc = ISD::ANY_EXTEND) {
2670 SDLoc DL(N);
2671 LoongArchISD::NodeType WOpcode = getLoongArchWOpcode(N->getOpcode());
2672 SDValue NewOp0, NewRes;
2674 switch (NumOp) {
2675 default:
2676 llvm_unreachable("Unexpected NumOp");
2677 case 1: {
2678 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
2679 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0);
2680 break;
2682 case 2: {
2683 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
2684 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
2685 if (N->getOpcode() == ISD::ROTL) {
2686 SDValue TmpOp = DAG.getConstant(32, DL, MVT::i64);
2687 NewOp1 = DAG.getNode(ISD::SUB, DL, MVT::i64, TmpOp, NewOp1);
2689 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
2690 break;
2692 // TODO:Handle more NumOp.
2695 // ReplaceNodeResults requires we maintain the same type for the return
2696 // value.
2697 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
2700 // Converts the given 32-bit operation to a i64 operation with signed extension
2701 // semantic to reduce the signed extension instructions.
2702 static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG) {
2703 SDLoc DL(N);
2704 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
2705 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
2706 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
2707 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
2708 DAG.getValueType(MVT::i32));
2709 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
2712 // Helper function that emits error message for intrinsics with/without chain
2713 // and return a UNDEF or and the chain as the results.
2714 static void emitErrorAndReplaceIntrinsicResults(
2715 SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG,
2716 StringRef ErrorMsg, bool WithChain = true) {
2717 DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + ".");
2718 Results.push_back(DAG.getUNDEF(N->getValueType(0)));
2719 if (!WithChain)
2720 return;
2721 Results.push_back(N->getOperand(0));
2724 template <unsigned N>
2725 static void
2726 replaceVPICKVE2GRResults(SDNode *Node, SmallVectorImpl<SDValue> &Results,
2727 SelectionDAG &DAG, const LoongArchSubtarget &Subtarget,
2728 unsigned ResOp) {
2729 const StringRef ErrorMsgOOR = "argument out of range";
2730 unsigned Imm = Node->getConstantOperandVal(2);
2731 if (!isUInt<N>(Imm)) {
2732 emitErrorAndReplaceIntrinsicResults(Node, Results, DAG, ErrorMsgOOR,
2733 /*WithChain=*/false);
2734 return;
2736 SDLoc DL(Node);
2737 SDValue Vec = Node->getOperand(1);
2739 SDValue PickElt =
2740 DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec,
2741 DAG.getConstant(Imm, DL, Subtarget.getGRLenVT()),
2742 DAG.getValueType(Vec.getValueType().getVectorElementType()));
2743 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, Node->getValueType(0),
2744 PickElt.getValue(0)));
2747 static void replaceVecCondBranchResults(SDNode *N,
2748 SmallVectorImpl<SDValue> &Results,
2749 SelectionDAG &DAG,
2750 const LoongArchSubtarget &Subtarget,
2751 unsigned ResOp) {
2752 SDLoc DL(N);
2753 SDValue Vec = N->getOperand(1);
2755 SDValue CB = DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec);
2756 Results.push_back(
2757 DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), CB.getValue(0)));
2760 static void
2761 replaceINTRINSIC_WO_CHAINResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
2762 SelectionDAG &DAG,
2763 const LoongArchSubtarget &Subtarget) {
2764 switch (N->getConstantOperandVal(0)) {
2765 default:
2766 llvm_unreachable("Unexpected Intrinsic.");
2767 case Intrinsic::loongarch_lsx_vpickve2gr_b:
2768 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
2769 LoongArchISD::VPICK_SEXT_ELT);
2770 break;
2771 case Intrinsic::loongarch_lsx_vpickve2gr_h:
2772 case Intrinsic::loongarch_lasx_xvpickve2gr_w:
2773 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
2774 LoongArchISD::VPICK_SEXT_ELT);
2775 break;
2776 case Intrinsic::loongarch_lsx_vpickve2gr_w:
2777 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
2778 LoongArchISD::VPICK_SEXT_ELT);
2779 break;
2780 case Intrinsic::loongarch_lsx_vpickve2gr_bu:
2781 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
2782 LoongArchISD::VPICK_ZEXT_ELT);
2783 break;
2784 case Intrinsic::loongarch_lsx_vpickve2gr_hu:
2785 case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
2786 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
2787 LoongArchISD::VPICK_ZEXT_ELT);
2788 break;
2789 case Intrinsic::loongarch_lsx_vpickve2gr_wu:
2790 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
2791 LoongArchISD::VPICK_ZEXT_ELT);
2792 break;
2793 case Intrinsic::loongarch_lsx_bz_b:
2794 case Intrinsic::loongarch_lsx_bz_h:
2795 case Intrinsic::loongarch_lsx_bz_w:
2796 case Intrinsic::loongarch_lsx_bz_d:
2797 case Intrinsic::loongarch_lasx_xbz_b:
2798 case Intrinsic::loongarch_lasx_xbz_h:
2799 case Intrinsic::loongarch_lasx_xbz_w:
2800 case Intrinsic::loongarch_lasx_xbz_d:
2801 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
2802 LoongArchISD::VALL_ZERO);
2803 break;
2804 case Intrinsic::loongarch_lsx_bz_v:
2805 case Intrinsic::loongarch_lasx_xbz_v:
2806 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
2807 LoongArchISD::VANY_ZERO);
2808 break;
2809 case Intrinsic::loongarch_lsx_bnz_b:
2810 case Intrinsic::loongarch_lsx_bnz_h:
2811 case Intrinsic::loongarch_lsx_bnz_w:
2812 case Intrinsic::loongarch_lsx_bnz_d:
2813 case Intrinsic::loongarch_lasx_xbnz_b:
2814 case Intrinsic::loongarch_lasx_xbnz_h:
2815 case Intrinsic::loongarch_lasx_xbnz_w:
2816 case Intrinsic::loongarch_lasx_xbnz_d:
2817 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
2818 LoongArchISD::VALL_NONZERO);
2819 break;
2820 case Intrinsic::loongarch_lsx_bnz_v:
2821 case Intrinsic::loongarch_lasx_xbnz_v:
2822 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
2823 LoongArchISD::VANY_NONZERO);
2824 break;
2828 void LoongArchTargetLowering::ReplaceNodeResults(
2829 SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
2830 SDLoc DL(N);
2831 EVT VT = N->getValueType(0);
2832 switch (N->getOpcode()) {
2833 default:
2834 llvm_unreachable("Don't know how to legalize this operation");
2835 case ISD::ADD:
2836 case ISD::SUB:
2837 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
2838 "Unexpected custom legalisation");
2839 Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
2840 break;
2841 case ISD::SDIV:
2842 case ISD::UDIV:
2843 case ISD::SREM:
2844 case ISD::UREM:
2845 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
2846 "Unexpected custom legalisation");
2847 Results.push_back(customLegalizeToWOp(N, DAG, 2,
2848 Subtarget.hasDiv32() && VT == MVT::i32
2849 ? ISD::ANY_EXTEND
2850 : ISD::SIGN_EXTEND));
2851 break;
2852 case ISD::SHL:
2853 case ISD::SRA:
2854 case ISD::SRL:
2855 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
2856 "Unexpected custom legalisation");
2857 if (N->getOperand(1).getOpcode() != ISD::Constant) {
2858 Results.push_back(customLegalizeToWOp(N, DAG, 2));
2859 break;
2861 break;
2862 case ISD::ROTL:
2863 case ISD::ROTR:
2864 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
2865 "Unexpected custom legalisation");
2866 Results.push_back(customLegalizeToWOp(N, DAG, 2));
2867 break;
2868 case ISD::FP_TO_SINT: {
2869 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
2870 "Unexpected custom legalisation");
2871 SDValue Src = N->getOperand(0);
2872 EVT FVT = EVT::getFloatingPointVT(N->getValueSizeInBits(0));
2873 if (getTypeAction(*DAG.getContext(), Src.getValueType()) !=
2874 TargetLowering::TypeSoftenFloat) {
2875 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, FVT, Src);
2876 Results.push_back(DAG.getNode(ISD::BITCAST, DL, VT, Dst));
2877 return;
2879 // If the FP type needs to be softened, emit a library call using the 'si'
2880 // version. If we left it to default legalization we'd end up with 'di'.
2881 RTLIB::Libcall LC;
2882 LC = RTLIB::getFPTOSINT(Src.getValueType(), VT);
2883 MakeLibCallOptions CallOptions;
2884 EVT OpVT = Src.getValueType();
2885 CallOptions.setTypeListBeforeSoften(OpVT, VT, true);
2886 SDValue Chain = SDValue();
2887 SDValue Result;
2888 std::tie(Result, Chain) =
2889 makeLibCall(DAG, LC, VT, Src, CallOptions, DL, Chain);
2890 Results.push_back(Result);
2891 break;
2893 case ISD::BITCAST: {
2894 SDValue Src = N->getOperand(0);
2895 EVT SrcVT = Src.getValueType();
2896 if (VT == MVT::i32 && SrcVT == MVT::f32 && Subtarget.is64Bit() &&
2897 Subtarget.hasBasicF()) {
2898 SDValue Dst =
2899 DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Src);
2900 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Dst));
2902 break;
2904 case ISD::FP_TO_UINT: {
2905 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
2906 "Unexpected custom legalisation");
2907 auto &TLI = DAG.getTargetLoweringInfo();
2908 SDValue Tmp1, Tmp2;
2909 TLI.expandFP_TO_UINT(N, Tmp1, Tmp2, DAG);
2910 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Tmp1));
2911 break;
2913 case ISD::BSWAP: {
2914 SDValue Src = N->getOperand(0);
2915 assert((VT == MVT::i16 || VT == MVT::i32) &&
2916 "Unexpected custom legalization");
2917 MVT GRLenVT = Subtarget.getGRLenVT();
2918 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
2919 SDValue Tmp;
2920 switch (VT.getSizeInBits()) {
2921 default:
2922 llvm_unreachable("Unexpected operand width");
2923 case 16:
2924 Tmp = DAG.getNode(LoongArchISD::REVB_2H, DL, GRLenVT, NewSrc);
2925 break;
2926 case 32:
2927 // Only LA64 will get to here due to the size mismatch between VT and
2928 // GRLenVT, LA32 lowering is directly defined in LoongArchInstrInfo.
2929 Tmp = DAG.getNode(LoongArchISD::REVB_2W, DL, GRLenVT, NewSrc);
2930 break;
2932 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
2933 break;
2935 case ISD::BITREVERSE: {
2936 SDValue Src = N->getOperand(0);
2937 assert((VT == MVT::i8 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
2938 "Unexpected custom legalization");
2939 MVT GRLenVT = Subtarget.getGRLenVT();
2940 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
2941 SDValue Tmp;
2942 switch (VT.getSizeInBits()) {
2943 default:
2944 llvm_unreachable("Unexpected operand width");
2945 case 8:
2946 Tmp = DAG.getNode(LoongArchISD::BITREV_4B, DL, GRLenVT, NewSrc);
2947 break;
2948 case 32:
2949 Tmp = DAG.getNode(LoongArchISD::BITREV_W, DL, GRLenVT, NewSrc);
2950 break;
2952 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
2953 break;
2955 case ISD::CTLZ:
2956 case ISD::CTTZ: {
2957 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
2958 "Unexpected custom legalisation");
2959 Results.push_back(customLegalizeToWOp(N, DAG, 1));
2960 break;
2962 case ISD::INTRINSIC_W_CHAIN: {
2963 SDValue Chain = N->getOperand(0);
2964 SDValue Op2 = N->getOperand(2);
2965 MVT GRLenVT = Subtarget.getGRLenVT();
2966 const StringRef ErrorMsgOOR = "argument out of range";
2967 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
2968 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
2970 switch (N->getConstantOperandVal(1)) {
2971 default:
2972 llvm_unreachable("Unexpected Intrinsic.");
2973 case Intrinsic::loongarch_movfcsr2gr: {
2974 if (!Subtarget.hasBasicF()) {
2975 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqF);
2976 return;
2978 unsigned Imm = Op2->getAsZExtVal();
2979 if (!isUInt<2>(Imm)) {
2980 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
2981 return;
2983 SDValue MOVFCSR2GRResults = DAG.getNode(
2984 LoongArchISD::MOVFCSR2GR, SDLoc(N), {MVT::i64, MVT::Other},
2985 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
2986 Results.push_back(
2987 DAG.getNode(ISD::TRUNCATE, DL, VT, MOVFCSR2GRResults.getValue(0)));
2988 Results.push_back(MOVFCSR2GRResults.getValue(1));
2989 break;
2991 #define CRC_CASE_EXT_BINARYOP(NAME, NODE) \
2992 case Intrinsic::loongarch_##NAME: { \
2993 SDValue NODE = DAG.getNode( \
2994 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
2995 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
2996 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
2997 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
2998 Results.push_back(NODE.getValue(1)); \
2999 break; \
3001 CRC_CASE_EXT_BINARYOP(crc_w_b_w, CRC_W_B_W)
3002 CRC_CASE_EXT_BINARYOP(crc_w_h_w, CRC_W_H_W)
3003 CRC_CASE_EXT_BINARYOP(crc_w_w_w, CRC_W_W_W)
3004 CRC_CASE_EXT_BINARYOP(crcc_w_b_w, CRCC_W_B_W)
3005 CRC_CASE_EXT_BINARYOP(crcc_w_h_w, CRCC_W_H_W)
3006 CRC_CASE_EXT_BINARYOP(crcc_w_w_w, CRCC_W_W_W)
3007 #undef CRC_CASE_EXT_BINARYOP
3009 #define CRC_CASE_EXT_UNARYOP(NAME, NODE) \
3010 case Intrinsic::loongarch_##NAME: { \
3011 SDValue NODE = DAG.getNode( \
3012 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
3013 {Chain, Op2, \
3014 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
3015 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
3016 Results.push_back(NODE.getValue(1)); \
3017 break; \
3019 CRC_CASE_EXT_UNARYOP(crc_w_d_w, CRC_W_D_W)
3020 CRC_CASE_EXT_UNARYOP(crcc_w_d_w, CRCC_W_D_W)
3021 #undef CRC_CASE_EXT_UNARYOP
3022 #define CSR_CASE(ID) \
3023 case Intrinsic::loongarch_##ID: { \
3024 if (!Subtarget.is64Bit()) \
3025 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64); \
3026 break; \
3028 CSR_CASE(csrrd_d);
3029 CSR_CASE(csrwr_d);
3030 CSR_CASE(csrxchg_d);
3031 CSR_CASE(iocsrrd_d);
3032 #undef CSR_CASE
3033 case Intrinsic::loongarch_csrrd_w: {
3034 unsigned Imm = Op2->getAsZExtVal();
3035 if (!isUInt<14>(Imm)) {
3036 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
3037 return;
3039 SDValue CSRRDResults =
3040 DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
3041 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
3042 Results.push_back(
3043 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRRDResults.getValue(0)));
3044 Results.push_back(CSRRDResults.getValue(1));
3045 break;
3047 case Intrinsic::loongarch_csrwr_w: {
3048 unsigned Imm = N->getConstantOperandVal(3);
3049 if (!isUInt<14>(Imm)) {
3050 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
3051 return;
3053 SDValue CSRWRResults =
3054 DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
3055 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
3056 DAG.getConstant(Imm, DL, GRLenVT)});
3057 Results.push_back(
3058 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRWRResults.getValue(0)));
3059 Results.push_back(CSRWRResults.getValue(1));
3060 break;
3062 case Intrinsic::loongarch_csrxchg_w: {
3063 unsigned Imm = N->getConstantOperandVal(4);
3064 if (!isUInt<14>(Imm)) {
3065 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
3066 return;
3068 SDValue CSRXCHGResults = DAG.getNode(
3069 LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
3070 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
3071 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3)),
3072 DAG.getConstant(Imm, DL, GRLenVT)});
3073 Results.push_back(
3074 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRXCHGResults.getValue(0)));
3075 Results.push_back(CSRXCHGResults.getValue(1));
3076 break;
3078 #define IOCSRRD_CASE(NAME, NODE) \
3079 case Intrinsic::loongarch_##NAME: { \
3080 SDValue IOCSRRDResults = \
3081 DAG.getNode(LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
3082 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)}); \
3083 Results.push_back( \
3084 DAG.getNode(ISD::TRUNCATE, DL, VT, IOCSRRDResults.getValue(0))); \
3085 Results.push_back(IOCSRRDResults.getValue(1)); \
3086 break; \
3088 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
3089 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
3090 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
3091 #undef IOCSRRD_CASE
3092 case Intrinsic::loongarch_cpucfg: {
3093 SDValue CPUCFGResults =
3094 DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
3095 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)});
3096 Results.push_back(
3097 DAG.getNode(ISD::TRUNCATE, DL, VT, CPUCFGResults.getValue(0)));
3098 Results.push_back(CPUCFGResults.getValue(1));
3099 break;
3101 case Intrinsic::loongarch_lddir_d: {
3102 if (!Subtarget.is64Bit()) {
3103 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64);
3104 return;
3106 break;
3109 break;
3111 case ISD::READ_REGISTER: {
3112 if (Subtarget.is64Bit())
3113 DAG.getContext()->emitError(
3114 "On LA64, only 64-bit registers can be read.");
3115 else
3116 DAG.getContext()->emitError(
3117 "On LA32, only 32-bit registers can be read.");
3118 Results.push_back(DAG.getUNDEF(VT));
3119 Results.push_back(N->getOperand(0));
3120 break;
3122 case ISD::INTRINSIC_WO_CHAIN: {
3123 replaceINTRINSIC_WO_CHAINResults(N, Results, DAG, Subtarget);
3124 break;
3126 case ISD::LROUND: {
3127 SDValue Op0 = N->getOperand(0);
3128 EVT OpVT = Op0.getValueType();
3129 RTLIB::Libcall LC =
3130 OpVT == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
3131 MakeLibCallOptions CallOptions;
3132 CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64, true);
3133 SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;
3134 Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
3135 Results.push_back(Result);
3136 break;
3141 static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG,
3142 TargetLowering::DAGCombinerInfo &DCI,
3143 const LoongArchSubtarget &Subtarget) {
3144 if (DCI.isBeforeLegalizeOps())
3145 return SDValue();
3147 SDValue FirstOperand = N->getOperand(0);
3148 SDValue SecondOperand = N->getOperand(1);
3149 unsigned FirstOperandOpc = FirstOperand.getOpcode();
3150 EVT ValTy = N->getValueType(0);
3151 SDLoc DL(N);
3152 uint64_t lsb, msb;
3153 unsigned SMIdx, SMLen;
3154 ConstantSDNode *CN;
3155 SDValue NewOperand;
3156 MVT GRLenVT = Subtarget.getGRLenVT();
3158 // Op's second operand must be a shifted mask.
3159 if (!(CN = dyn_cast<ConstantSDNode>(SecondOperand)) ||
3160 !isShiftedMask_64(CN->getZExtValue(), SMIdx, SMLen))
3161 return SDValue();
3163 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL) {
3164 // Pattern match BSTRPICK.
3165 // $dst = and ((sra or srl) $src , lsb), (2**len - 1)
3166 // => BSTRPICK $dst, $src, msb, lsb
3167 // where msb = lsb + len - 1
3169 // The second operand of the shift must be an immediate.
3170 if (!(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))))
3171 return SDValue();
3173 lsb = CN->getZExtValue();
3175 // Return if the shifted mask does not start at bit 0 or the sum of its
3176 // length and lsb exceeds the word's size.
3177 if (SMIdx != 0 || lsb + SMLen > ValTy.getSizeInBits())
3178 return SDValue();
3180 NewOperand = FirstOperand.getOperand(0);
3181 } else {
3182 // Pattern match BSTRPICK.
3183 // $dst = and $src, (2**len- 1) , if len > 12
3184 // => BSTRPICK $dst, $src, msb, lsb
3185 // where lsb = 0 and msb = len - 1
3187 // If the mask is <= 0xfff, andi can be used instead.
3188 if (CN->getZExtValue() <= 0xfff)
3189 return SDValue();
3191 // Return if the MSB exceeds.
3192 if (SMIdx + SMLen > ValTy.getSizeInBits())
3193 return SDValue();
3195 if (SMIdx > 0) {
3196 // Omit if the constant has more than 2 uses. This a conservative
3197 // decision. Whether it is a win depends on the HW microarchitecture.
3198 // However it should always be better for 1 and 2 uses.
3199 if (CN->use_size() > 2)
3200 return SDValue();
3201 // Return if the constant can be composed by a single LU12I.W.
3202 if ((CN->getZExtValue() & 0xfff) == 0)
3203 return SDValue();
3204 // Return if the constand can be composed by a single ADDI with
3205 // the zero register.
3206 if (CN->getSExtValue() >= -2048 && CN->getSExtValue() < 0)
3207 return SDValue();
3210 lsb = SMIdx;
3211 NewOperand = FirstOperand;
3214 msb = lsb + SMLen - 1;
3215 SDValue NR0 = DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy, NewOperand,
3216 DAG.getConstant(msb, DL, GRLenVT),
3217 DAG.getConstant(lsb, DL, GRLenVT));
3218 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL || lsb == 0)
3219 return NR0;
3220 // Try to optimize to
3221 // bstrpick $Rd, $Rs, msb, lsb
3222 // slli $Rd, $Rd, lsb
3223 return DAG.getNode(ISD::SHL, DL, ValTy, NR0,
3224 DAG.getConstant(lsb, DL, GRLenVT));
3227 static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG,
3228 TargetLowering::DAGCombinerInfo &DCI,
3229 const LoongArchSubtarget &Subtarget) {
3230 if (DCI.isBeforeLegalizeOps())
3231 return SDValue();
3233 // $dst = srl (and $src, Mask), Shamt
3234 // =>
3235 // BSTRPICK $dst, $src, MaskIdx+MaskLen-1, Shamt
3236 // when Mask is a shifted mask, and MaskIdx <= Shamt <= MaskIdx+MaskLen-1
3239 SDValue FirstOperand = N->getOperand(0);
3240 ConstantSDNode *CN;
3241 EVT ValTy = N->getValueType(0);
3242 SDLoc DL(N);
3243 MVT GRLenVT = Subtarget.getGRLenVT();
3244 unsigned MaskIdx, MaskLen;
3245 uint64_t Shamt;
3247 // The first operand must be an AND and the second operand of the AND must be
3248 // a shifted mask.
3249 if (FirstOperand.getOpcode() != ISD::AND ||
3250 !(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))) ||
3251 !isShiftedMask_64(CN->getZExtValue(), MaskIdx, MaskLen))
3252 return SDValue();
3254 // The second operand (shift amount) must be an immediate.
3255 if (!(CN = dyn_cast<ConstantSDNode>(N->getOperand(1))))
3256 return SDValue();
3258 Shamt = CN->getZExtValue();
3259 if (MaskIdx <= Shamt && Shamt <= MaskIdx + MaskLen - 1)
3260 return DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy,
3261 FirstOperand->getOperand(0),
3262 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
3263 DAG.getConstant(Shamt, DL, GRLenVT));
3265 return SDValue();
3268 static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,
3269 TargetLowering::DAGCombinerInfo &DCI,
3270 const LoongArchSubtarget &Subtarget) {
3271 MVT GRLenVT = Subtarget.getGRLenVT();
3272 EVT ValTy = N->getValueType(0);
3273 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
3274 ConstantSDNode *CN0, *CN1;
3275 SDLoc DL(N);
3276 unsigned ValBits = ValTy.getSizeInBits();
3277 unsigned MaskIdx0, MaskLen0, MaskIdx1, MaskLen1;
3278 unsigned Shamt;
3279 bool SwapAndRetried = false;
3281 if (DCI.isBeforeLegalizeOps())
3282 return SDValue();
3284 if (ValBits != 32 && ValBits != 64)
3285 return SDValue();
3287 Retry:
3288 // 1st pattern to match BSTRINS:
3289 // R = or (and X, mask0), (and (shl Y, lsb), mask1)
3290 // where mask1 = (2**size - 1) << lsb, mask0 = ~mask1
3291 // =>
3292 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
3293 if (N0.getOpcode() == ISD::AND &&
3294 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
3295 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
3296 N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL &&
3297 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
3298 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
3299 MaskIdx0 == MaskIdx1 && MaskLen0 == MaskLen1 &&
3300 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
3301 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
3302 (MaskIdx0 + MaskLen0 <= ValBits)) {
3303 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 1\n");
3304 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
3305 N1.getOperand(0).getOperand(0),
3306 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
3307 DAG.getConstant(MaskIdx0, DL, GRLenVT));
3310 // 2nd pattern to match BSTRINS:
3311 // R = or (and X, mask0), (shl (and Y, mask1), lsb)
3312 // where mask1 = (2**size - 1), mask0 = ~(mask1 << lsb)
3313 // =>
3314 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
3315 if (N0.getOpcode() == ISD::AND &&
3316 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
3317 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
3318 N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
3319 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
3320 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
3321 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
3322 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
3323 MaskLen0 == MaskLen1 && MaskIdx1 == 0 &&
3324 (MaskIdx0 + MaskLen0 <= ValBits)) {
3325 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 2\n");
3326 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
3327 N1.getOperand(0).getOperand(0),
3328 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
3329 DAG.getConstant(MaskIdx0, DL, GRLenVT));
3332 // 3rd pattern to match BSTRINS:
3333 // R = or (and X, mask0), (and Y, mask1)
3334 // where ~mask0 = (2**size - 1) << lsb, mask0 & mask1 = 0
3335 // =>
3336 // R = BSTRINS X, (shr (and Y, mask1), lsb), msb, lsb
3337 // where msb = lsb + size - 1
3338 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
3339 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
3340 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
3341 (MaskIdx0 + MaskLen0 <= 64) &&
3342 (CN1 = dyn_cast<ConstantSDNode>(N1->getOperand(1))) &&
3343 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
3344 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 3\n");
3345 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
3346 DAG.getNode(ISD::SRL, DL, N1->getValueType(0), N1,
3347 DAG.getConstant(MaskIdx0, DL, GRLenVT)),
3348 DAG.getConstant(ValBits == 32
3349 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
3350 : (MaskIdx0 + MaskLen0 - 1),
3351 DL, GRLenVT),
3352 DAG.getConstant(MaskIdx0, DL, GRLenVT));
3355 // 4th pattern to match BSTRINS:
3356 // R = or (and X, mask), (shl Y, shamt)
3357 // where mask = (2**shamt - 1)
3358 // =>
3359 // R = BSTRINS X, Y, ValBits - 1, shamt
3360 // where ValBits = 32 or 64
3361 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::SHL &&
3362 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
3363 isShiftedMask_64(CN0->getZExtValue(), MaskIdx0, MaskLen0) &&
3364 MaskIdx0 == 0 && (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
3365 (Shamt = CN1->getZExtValue()) == MaskLen0 &&
3366 (MaskIdx0 + MaskLen0 <= ValBits)) {
3367 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 4\n");
3368 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
3369 N1.getOperand(0),
3370 DAG.getConstant((ValBits - 1), DL, GRLenVT),
3371 DAG.getConstant(Shamt, DL, GRLenVT));
3374 // 5th pattern to match BSTRINS:
3375 // R = or (and X, mask), const
3376 // where ~mask = (2**size - 1) << lsb, mask & const = 0
3377 // =>
3378 // R = BSTRINS X, (const >> lsb), msb, lsb
3379 // where msb = lsb + size - 1
3380 if (N0.getOpcode() == ISD::AND &&
3381 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
3382 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
3383 (CN1 = dyn_cast<ConstantSDNode>(N1)) &&
3384 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
3385 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 5\n");
3386 return DAG.getNode(
3387 LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
3388 DAG.getConstant(CN1->getSExtValue() >> MaskIdx0, DL, ValTy),
3389 DAG.getConstant(ValBits == 32 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
3390 : (MaskIdx0 + MaskLen0 - 1),
3391 DL, GRLenVT),
3392 DAG.getConstant(MaskIdx0, DL, GRLenVT));
3395 // 6th pattern.
3396 // a = b | ((c & mask) << shamt), where all positions in b to be overwritten
3397 // by the incoming bits are known to be zero.
3398 // =>
3399 // a = BSTRINS b, c, shamt + MaskLen - 1, shamt
3401 // Note that the 1st pattern is a special situation of the 6th, i.e. the 6th
3402 // pattern is more common than the 1st. So we put the 1st before the 6th in
3403 // order to match as many nodes as possible.
3404 ConstantSDNode *CNMask, *CNShamt;
3405 unsigned MaskIdx, MaskLen;
3406 if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
3407 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
3408 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
3409 MaskIdx == 0 && (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
3410 CNShamt->getZExtValue() + MaskLen <= ValBits) {
3411 Shamt = CNShamt->getZExtValue();
3412 APInt ShMask(ValBits, CNMask->getZExtValue() << Shamt);
3413 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
3414 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 6\n");
3415 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
3416 N1.getOperand(0).getOperand(0),
3417 DAG.getConstant(Shamt + MaskLen - 1, DL, GRLenVT),
3418 DAG.getConstant(Shamt, DL, GRLenVT));
3422 // 7th pattern.
3423 // a = b | ((c << shamt) & shifted_mask), where all positions in b to be
3424 // overwritten by the incoming bits are known to be zero.
3425 // =>
3426 // a = BSTRINS b, c, MaskIdx + MaskLen - 1, MaskIdx
3428 // Similarly, the 7th pattern is more common than the 2nd. So we put the 2nd
3429 // before the 7th in order to match as many nodes as possible.
3430 if (N1.getOpcode() == ISD::AND &&
3431 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
3432 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
3433 N1.getOperand(0).getOpcode() == ISD::SHL &&
3434 (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
3435 CNShamt->getZExtValue() == MaskIdx) {
3436 APInt ShMask(ValBits, CNMask->getZExtValue());
3437 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
3438 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 7\n");
3439 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
3440 N1.getOperand(0).getOperand(0),
3441 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
3442 DAG.getConstant(MaskIdx, DL, GRLenVT));
3446 // (or a, b) and (or b, a) are equivalent, so swap the operands and retry.
3447 if (!SwapAndRetried) {
3448 std::swap(N0, N1);
3449 SwapAndRetried = true;
3450 goto Retry;
3453 SwapAndRetried = false;
3454 Retry2:
3455 // 8th pattern.
3456 // a = b | (c & shifted_mask), where all positions in b to be overwritten by
3457 // the incoming bits are known to be zero.
3458 // =>
3459 // a = BSTRINS b, c >> MaskIdx, MaskIdx + MaskLen - 1, MaskIdx
3461 // Similarly, the 8th pattern is more common than the 4th and 5th patterns. So
3462 // we put it here in order to match as many nodes as possible or generate less
3463 // instructions.
3464 if (N1.getOpcode() == ISD::AND &&
3465 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
3466 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen)) {
3467 APInt ShMask(ValBits, CNMask->getZExtValue());
3468 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
3469 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 8\n");
3470 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
3471 DAG.getNode(ISD::SRL, DL, N1->getValueType(0),
3472 N1->getOperand(0),
3473 DAG.getConstant(MaskIdx, DL, GRLenVT)),
3474 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
3475 DAG.getConstant(MaskIdx, DL, GRLenVT));
3478 // Swap N0/N1 and retry.
3479 if (!SwapAndRetried) {
3480 std::swap(N0, N1);
3481 SwapAndRetried = true;
3482 goto Retry2;
3485 return SDValue();
3488 static bool checkValueWidth(SDValue V, ISD::LoadExtType &ExtType) {
3489 ExtType = ISD::NON_EXTLOAD;
3491 switch (V.getNode()->getOpcode()) {
3492 case ISD::LOAD: {
3493 LoadSDNode *LoadNode = cast<LoadSDNode>(V.getNode());
3494 if ((LoadNode->getMemoryVT() == MVT::i8) ||
3495 (LoadNode->getMemoryVT() == MVT::i16)) {
3496 ExtType = LoadNode->getExtensionType();
3497 return true;
3499 return false;
3501 case ISD::AssertSext: {
3502 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
3503 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
3504 ExtType = ISD::SEXTLOAD;
3505 return true;
3507 return false;
3509 case ISD::AssertZext: {
3510 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
3511 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
3512 ExtType = ISD::ZEXTLOAD;
3513 return true;
3515 return false;
3517 default:
3518 return false;
3521 return false;
3524 // Eliminate redundant truncation and zero-extension nodes.
3525 // * Case 1:
3526 // +------------+ +------------+ +------------+
3527 // | Input1 | | Input2 | | CC |
3528 // +------------+ +------------+ +------------+
3529 // | | |
3530 // V V +----+
3531 // +------------+ +------------+ |
3532 // | TRUNCATE | | TRUNCATE | |
3533 // +------------+ +------------+ |
3534 // | | |
3535 // V V |
3536 // +------------+ +------------+ |
3537 // | ZERO_EXT | | ZERO_EXT | |
3538 // +------------+ +------------+ |
3539 // | | |
3540 // | +-------------+ |
3541 // V V | |
3542 // +----------------+ | |
3543 // | AND | | |
3544 // +----------------+ | |
3545 // | | |
3546 // +---------------+ | |
3547 // | | |
3548 // V V V
3549 // +-------------+
3550 // | CMP |
3551 // +-------------+
3552 // * Case 2:
3553 // +------------+ +------------+ +-------------+ +------------+ +------------+
3554 // | Input1 | | Input2 | | Constant -1 | | Constant 0 | | CC |
3555 // +------------+ +------------+ +-------------+ +------------+ +------------+
3556 // | | | | |
3557 // V | | | |
3558 // +------------+ | | | |
3559 // | XOR |<---------------------+ | |
3560 // +------------+ | | |
3561 // | | | |
3562 // V V +---------------+ |
3563 // +------------+ +------------+ | |
3564 // | TRUNCATE | | TRUNCATE | | +-------------------------+
3565 // +------------+ +------------+ | |
3566 // | | | |
3567 // V V | |
3568 // +------------+ +------------+ | |
3569 // | ZERO_EXT | | ZERO_EXT | | |
3570 // +------------+ +------------+ | |
3571 // | | | |
3572 // V V | |
3573 // +----------------+ | |
3574 // | AND | | |
3575 // +----------------+ | |
3576 // | | |
3577 // +---------------+ | |
3578 // | | |
3579 // V V V
3580 // +-------------+
3581 // | CMP |
3582 // +-------------+
3583 static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG,
3584 TargetLowering::DAGCombinerInfo &DCI,
3585 const LoongArchSubtarget &Subtarget) {
3586 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
3588 SDNode *AndNode = N->getOperand(0).getNode();
3589 if (AndNode->getOpcode() != ISD::AND)
3590 return SDValue();
3592 SDValue AndInputValue2 = AndNode->getOperand(1);
3593 if (AndInputValue2.getOpcode() != ISD::ZERO_EXTEND)
3594 return SDValue();
3596 SDValue CmpInputValue = N->getOperand(1);
3597 SDValue AndInputValue1 = AndNode->getOperand(0);
3598 if (AndInputValue1.getOpcode() == ISD::XOR) {
3599 if (CC != ISD::SETEQ && CC != ISD::SETNE)
3600 return SDValue();
3601 ConstantSDNode *CN = dyn_cast<ConstantSDNode>(AndInputValue1.getOperand(1));
3602 if (!CN || CN->getSExtValue() != -1)
3603 return SDValue();
3604 CN = dyn_cast<ConstantSDNode>(CmpInputValue);
3605 if (!CN || CN->getSExtValue() != 0)
3606 return SDValue();
3607 AndInputValue1 = AndInputValue1.getOperand(0);
3608 if (AndInputValue1.getOpcode() != ISD::ZERO_EXTEND)
3609 return SDValue();
3610 } else if (AndInputValue1.getOpcode() == ISD::ZERO_EXTEND) {
3611 if (AndInputValue2 != CmpInputValue)
3612 return SDValue();
3613 } else {
3614 return SDValue();
3617 SDValue TruncValue1 = AndInputValue1.getNode()->getOperand(0);
3618 if (TruncValue1.getOpcode() != ISD::TRUNCATE)
3619 return SDValue();
3621 SDValue TruncValue2 = AndInputValue2.getNode()->getOperand(0);
3622 if (TruncValue2.getOpcode() != ISD::TRUNCATE)
3623 return SDValue();
3625 SDValue TruncInputValue1 = TruncValue1.getNode()->getOperand(0);
3626 SDValue TruncInputValue2 = TruncValue2.getNode()->getOperand(0);
3627 ISD::LoadExtType ExtType1;
3628 ISD::LoadExtType ExtType2;
3630 if (!checkValueWidth(TruncInputValue1, ExtType1) ||
3631 !checkValueWidth(TruncInputValue2, ExtType2))
3632 return SDValue();
3634 if (TruncInputValue1->getValueType(0) != TruncInputValue2->getValueType(0) ||
3635 AndNode->getValueType(0) != TruncInputValue1->getValueType(0))
3636 return SDValue();
3638 if ((ExtType2 != ISD::ZEXTLOAD) &&
3639 ((ExtType2 != ISD::SEXTLOAD) && (ExtType1 != ISD::SEXTLOAD)))
3640 return SDValue();
3642 // These truncation and zero-extension nodes are not necessary, remove them.
3643 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N), AndNode->getValueType(0),
3644 TruncInputValue1, TruncInputValue2);
3645 SDValue NewSetCC =
3646 DAG.getSetCC(SDLoc(N), N->getValueType(0), NewAnd, TruncInputValue2, CC);
3647 DAG.ReplaceAllUsesWith(N, NewSetCC.getNode());
3648 return SDValue(N, 0);
3651 // Combine (loongarch_bitrev_w (loongarch_revb_2w X)) to loongarch_bitrev_4b.
3652 static SDValue performBITREV_WCombine(SDNode *N, SelectionDAG &DAG,
3653 TargetLowering::DAGCombinerInfo &DCI,
3654 const LoongArchSubtarget &Subtarget) {
3655 if (DCI.isBeforeLegalizeOps())
3656 return SDValue();
3658 SDValue Src = N->getOperand(0);
3659 if (Src.getOpcode() != LoongArchISD::REVB_2W)
3660 return SDValue();
3662 return DAG.getNode(LoongArchISD::BITREV_4B, SDLoc(N), N->getValueType(0),
3663 Src.getOperand(0));
3666 template <unsigned N>
3667 static SDValue legalizeIntrinsicImmArg(SDNode *Node, unsigned ImmOp,
3668 SelectionDAG &DAG,
3669 const LoongArchSubtarget &Subtarget,
3670 bool IsSigned = false) {
3671 SDLoc DL(Node);
3672 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
3673 // Check the ImmArg.
3674 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
3675 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
3676 DAG.getContext()->emitError(Node->getOperationName(0) +
3677 ": argument out of range.");
3678 return DAG.getNode(ISD::UNDEF, DL, Subtarget.getGRLenVT());
3680 return DAG.getConstant(CImm->getZExtValue(), DL, Subtarget.getGRLenVT());
3683 template <unsigned N>
3684 static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp,
3685 SelectionDAG &DAG, bool IsSigned = false) {
3686 SDLoc DL(Node);
3687 EVT ResTy = Node->getValueType(0);
3688 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
3690 // Check the ImmArg.
3691 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
3692 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
3693 DAG.getContext()->emitError(Node->getOperationName(0) +
3694 ": argument out of range.");
3695 return DAG.getNode(ISD::UNDEF, DL, ResTy);
3697 return DAG.getConstant(
3698 APInt(ResTy.getScalarType().getSizeInBits(),
3699 IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned),
3700 DL, ResTy);
3703 static SDValue truncateVecElts(SDNode *Node, SelectionDAG &DAG) {
3704 SDLoc DL(Node);
3705 EVT ResTy = Node->getValueType(0);
3706 SDValue Vec = Node->getOperand(2);
3707 SDValue Mask = DAG.getConstant(Vec.getScalarValueSizeInBits() - 1, DL, ResTy);
3708 return DAG.getNode(ISD::AND, DL, ResTy, Vec, Mask);
3711 static SDValue lowerVectorBitClear(SDNode *Node, SelectionDAG &DAG) {
3712 SDLoc DL(Node);
3713 EVT ResTy = Node->getValueType(0);
3714 SDValue One = DAG.getConstant(1, DL, ResTy);
3715 SDValue Bit =
3716 DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Node, DAG));
3718 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1),
3719 DAG.getNOT(DL, Bit, ResTy));
3722 template <unsigned N>
3723 static SDValue lowerVectorBitClearImm(SDNode *Node, SelectionDAG &DAG) {
3724 SDLoc DL(Node);
3725 EVT ResTy = Node->getValueType(0);
3726 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
3727 // Check the unsigned ImmArg.
3728 if (!isUInt<N>(CImm->getZExtValue())) {
3729 DAG.getContext()->emitError(Node->getOperationName(0) +
3730 ": argument out of range.");
3731 return DAG.getNode(ISD::UNDEF, DL, ResTy);
3734 APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
3735 SDValue Mask = DAG.getConstant(~BitImm, DL, ResTy);
3737 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1), Mask);
3740 template <unsigned N>
3741 static SDValue lowerVectorBitSetImm(SDNode *Node, SelectionDAG &DAG) {
3742 SDLoc DL(Node);
3743 EVT ResTy = Node->getValueType(0);
3744 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
3745 // Check the unsigned ImmArg.
3746 if (!isUInt<N>(CImm->getZExtValue())) {
3747 DAG.getContext()->emitError(Node->getOperationName(0) +
3748 ": argument out of range.");
3749 return DAG.getNode(ISD::UNDEF, DL, ResTy);
3752 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
3753 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
3754 return DAG.getNode(ISD::OR, DL, ResTy, Node->getOperand(1), BitImm);
3757 template <unsigned N>
3758 static SDValue lowerVectorBitRevImm(SDNode *Node, SelectionDAG &DAG) {
3759 SDLoc DL(Node);
3760 EVT ResTy = Node->getValueType(0);
3761 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
3762 // Check the unsigned ImmArg.
3763 if (!isUInt<N>(CImm->getZExtValue())) {
3764 DAG.getContext()->emitError(Node->getOperationName(0) +
3765 ": argument out of range.");
3766 return DAG.getNode(ISD::UNDEF, DL, ResTy);
3769 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
3770 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
3771 return DAG.getNode(ISD::XOR, DL, ResTy, Node->getOperand(1), BitImm);
3774 static SDValue
3775 performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG,
3776 TargetLowering::DAGCombinerInfo &DCI,
3777 const LoongArchSubtarget &Subtarget) {
3778 SDLoc DL(N);
3779 switch (N->getConstantOperandVal(0)) {
3780 default:
3781 break;
3782 case Intrinsic::loongarch_lsx_vadd_b:
3783 case Intrinsic::loongarch_lsx_vadd_h:
3784 case Intrinsic::loongarch_lsx_vadd_w:
3785 case Intrinsic::loongarch_lsx_vadd_d:
3786 case Intrinsic::loongarch_lasx_xvadd_b:
3787 case Intrinsic::loongarch_lasx_xvadd_h:
3788 case Intrinsic::loongarch_lasx_xvadd_w:
3789 case Intrinsic::loongarch_lasx_xvadd_d:
3790 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
3791 N->getOperand(2));
3792 case Intrinsic::loongarch_lsx_vaddi_bu:
3793 case Intrinsic::loongarch_lsx_vaddi_hu:
3794 case Intrinsic::loongarch_lsx_vaddi_wu:
3795 case Intrinsic::loongarch_lsx_vaddi_du:
3796 case Intrinsic::loongarch_lasx_xvaddi_bu:
3797 case Intrinsic::loongarch_lasx_xvaddi_hu:
3798 case Intrinsic::loongarch_lasx_xvaddi_wu:
3799 case Intrinsic::loongarch_lasx_xvaddi_du:
3800 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
3801 lowerVectorSplatImm<5>(N, 2, DAG));
3802 case Intrinsic::loongarch_lsx_vsub_b:
3803 case Intrinsic::loongarch_lsx_vsub_h:
3804 case Intrinsic::loongarch_lsx_vsub_w:
3805 case Intrinsic::loongarch_lsx_vsub_d:
3806 case Intrinsic::loongarch_lasx_xvsub_b:
3807 case Intrinsic::loongarch_lasx_xvsub_h:
3808 case Intrinsic::loongarch_lasx_xvsub_w:
3809 case Intrinsic::loongarch_lasx_xvsub_d:
3810 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
3811 N->getOperand(2));
3812 case Intrinsic::loongarch_lsx_vsubi_bu:
3813 case Intrinsic::loongarch_lsx_vsubi_hu:
3814 case Intrinsic::loongarch_lsx_vsubi_wu:
3815 case Intrinsic::loongarch_lsx_vsubi_du:
3816 case Intrinsic::loongarch_lasx_xvsubi_bu:
3817 case Intrinsic::loongarch_lasx_xvsubi_hu:
3818 case Intrinsic::loongarch_lasx_xvsubi_wu:
3819 case Intrinsic::loongarch_lasx_xvsubi_du:
3820 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
3821 lowerVectorSplatImm<5>(N, 2, DAG));
3822 case Intrinsic::loongarch_lsx_vneg_b:
3823 case Intrinsic::loongarch_lsx_vneg_h:
3824 case Intrinsic::loongarch_lsx_vneg_w:
3825 case Intrinsic::loongarch_lsx_vneg_d:
3826 case Intrinsic::loongarch_lasx_xvneg_b:
3827 case Intrinsic::loongarch_lasx_xvneg_h:
3828 case Intrinsic::loongarch_lasx_xvneg_w:
3829 case Intrinsic::loongarch_lasx_xvneg_d:
3830 return DAG.getNode(
3831 ISD::SUB, DL, N->getValueType(0),
3832 DAG.getConstant(
3833 APInt(N->getValueType(0).getScalarType().getSizeInBits(), 0,
3834 /*isSigned=*/true),
3835 SDLoc(N), N->getValueType(0)),
3836 N->getOperand(1));
3837 case Intrinsic::loongarch_lsx_vmax_b:
3838 case Intrinsic::loongarch_lsx_vmax_h:
3839 case Intrinsic::loongarch_lsx_vmax_w:
3840 case Intrinsic::loongarch_lsx_vmax_d:
3841 case Intrinsic::loongarch_lasx_xvmax_b:
3842 case Intrinsic::loongarch_lasx_xvmax_h:
3843 case Intrinsic::loongarch_lasx_xvmax_w:
3844 case Intrinsic::loongarch_lasx_xvmax_d:
3845 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
3846 N->getOperand(2));
3847 case Intrinsic::loongarch_lsx_vmax_bu:
3848 case Intrinsic::loongarch_lsx_vmax_hu:
3849 case Intrinsic::loongarch_lsx_vmax_wu:
3850 case Intrinsic::loongarch_lsx_vmax_du:
3851 case Intrinsic::loongarch_lasx_xvmax_bu:
3852 case Intrinsic::loongarch_lasx_xvmax_hu:
3853 case Intrinsic::loongarch_lasx_xvmax_wu:
3854 case Intrinsic::loongarch_lasx_xvmax_du:
3855 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
3856 N->getOperand(2));
3857 case Intrinsic::loongarch_lsx_vmaxi_b:
3858 case Intrinsic::loongarch_lsx_vmaxi_h:
3859 case Intrinsic::loongarch_lsx_vmaxi_w:
3860 case Intrinsic::loongarch_lsx_vmaxi_d:
3861 case Intrinsic::loongarch_lasx_xvmaxi_b:
3862 case Intrinsic::loongarch_lasx_xvmaxi_h:
3863 case Intrinsic::loongarch_lasx_xvmaxi_w:
3864 case Intrinsic::loongarch_lasx_xvmaxi_d:
3865 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
3866 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
3867 case Intrinsic::loongarch_lsx_vmaxi_bu:
3868 case Intrinsic::loongarch_lsx_vmaxi_hu:
3869 case Intrinsic::loongarch_lsx_vmaxi_wu:
3870 case Intrinsic::loongarch_lsx_vmaxi_du:
3871 case Intrinsic::loongarch_lasx_xvmaxi_bu:
3872 case Intrinsic::loongarch_lasx_xvmaxi_hu:
3873 case Intrinsic::loongarch_lasx_xvmaxi_wu:
3874 case Intrinsic::loongarch_lasx_xvmaxi_du:
3875 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
3876 lowerVectorSplatImm<5>(N, 2, DAG));
3877 case Intrinsic::loongarch_lsx_vmin_b:
3878 case Intrinsic::loongarch_lsx_vmin_h:
3879 case Intrinsic::loongarch_lsx_vmin_w:
3880 case Intrinsic::loongarch_lsx_vmin_d:
3881 case Intrinsic::loongarch_lasx_xvmin_b:
3882 case Intrinsic::loongarch_lasx_xvmin_h:
3883 case Intrinsic::loongarch_lasx_xvmin_w:
3884 case Intrinsic::loongarch_lasx_xvmin_d:
3885 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
3886 N->getOperand(2));
3887 case Intrinsic::loongarch_lsx_vmin_bu:
3888 case Intrinsic::loongarch_lsx_vmin_hu:
3889 case Intrinsic::loongarch_lsx_vmin_wu:
3890 case Intrinsic::loongarch_lsx_vmin_du:
3891 case Intrinsic::loongarch_lasx_xvmin_bu:
3892 case Intrinsic::loongarch_lasx_xvmin_hu:
3893 case Intrinsic::loongarch_lasx_xvmin_wu:
3894 case Intrinsic::loongarch_lasx_xvmin_du:
3895 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
3896 N->getOperand(2));
3897 case Intrinsic::loongarch_lsx_vmini_b:
3898 case Intrinsic::loongarch_lsx_vmini_h:
3899 case Intrinsic::loongarch_lsx_vmini_w:
3900 case Intrinsic::loongarch_lsx_vmini_d:
3901 case Intrinsic::loongarch_lasx_xvmini_b:
3902 case Intrinsic::loongarch_lasx_xvmini_h:
3903 case Intrinsic::loongarch_lasx_xvmini_w:
3904 case Intrinsic::loongarch_lasx_xvmini_d:
3905 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
3906 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
3907 case Intrinsic::loongarch_lsx_vmini_bu:
3908 case Intrinsic::loongarch_lsx_vmini_hu:
3909 case Intrinsic::loongarch_lsx_vmini_wu:
3910 case Intrinsic::loongarch_lsx_vmini_du:
3911 case Intrinsic::loongarch_lasx_xvmini_bu:
3912 case Intrinsic::loongarch_lasx_xvmini_hu:
3913 case Intrinsic::loongarch_lasx_xvmini_wu:
3914 case Intrinsic::loongarch_lasx_xvmini_du:
3915 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
3916 lowerVectorSplatImm<5>(N, 2, DAG));
3917 case Intrinsic::loongarch_lsx_vmul_b:
3918 case Intrinsic::loongarch_lsx_vmul_h:
3919 case Intrinsic::loongarch_lsx_vmul_w:
3920 case Intrinsic::loongarch_lsx_vmul_d:
3921 case Intrinsic::loongarch_lasx_xvmul_b:
3922 case Intrinsic::loongarch_lasx_xvmul_h:
3923 case Intrinsic::loongarch_lasx_xvmul_w:
3924 case Intrinsic::loongarch_lasx_xvmul_d:
3925 return DAG.getNode(ISD::MUL, DL, N->getValueType(0), N->getOperand(1),
3926 N->getOperand(2));
3927 case Intrinsic::loongarch_lsx_vmadd_b:
3928 case Intrinsic::loongarch_lsx_vmadd_h:
3929 case Intrinsic::loongarch_lsx_vmadd_w:
3930 case Intrinsic::loongarch_lsx_vmadd_d:
3931 case Intrinsic::loongarch_lasx_xvmadd_b:
3932 case Intrinsic::loongarch_lasx_xvmadd_h:
3933 case Intrinsic::loongarch_lasx_xvmadd_w:
3934 case Intrinsic::loongarch_lasx_xvmadd_d: {
3935 EVT ResTy = N->getValueType(0);
3936 return DAG.getNode(ISD::ADD, SDLoc(N), ResTy, N->getOperand(1),
3937 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
3938 N->getOperand(3)));
3940 case Intrinsic::loongarch_lsx_vmsub_b:
3941 case Intrinsic::loongarch_lsx_vmsub_h:
3942 case Intrinsic::loongarch_lsx_vmsub_w:
3943 case Intrinsic::loongarch_lsx_vmsub_d:
3944 case Intrinsic::loongarch_lasx_xvmsub_b:
3945 case Intrinsic::loongarch_lasx_xvmsub_h:
3946 case Intrinsic::loongarch_lasx_xvmsub_w:
3947 case Intrinsic::loongarch_lasx_xvmsub_d: {
3948 EVT ResTy = N->getValueType(0);
3949 return DAG.getNode(ISD::SUB, SDLoc(N), ResTy, N->getOperand(1),
3950 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
3951 N->getOperand(3)));
3953 case Intrinsic::loongarch_lsx_vdiv_b:
3954 case Intrinsic::loongarch_lsx_vdiv_h:
3955 case Intrinsic::loongarch_lsx_vdiv_w:
3956 case Intrinsic::loongarch_lsx_vdiv_d:
3957 case Intrinsic::loongarch_lasx_xvdiv_b:
3958 case Intrinsic::loongarch_lasx_xvdiv_h:
3959 case Intrinsic::loongarch_lasx_xvdiv_w:
3960 case Intrinsic::loongarch_lasx_xvdiv_d:
3961 return DAG.getNode(ISD::SDIV, DL, N->getValueType(0), N->getOperand(1),
3962 N->getOperand(2));
3963 case Intrinsic::loongarch_lsx_vdiv_bu:
3964 case Intrinsic::loongarch_lsx_vdiv_hu:
3965 case Intrinsic::loongarch_lsx_vdiv_wu:
3966 case Intrinsic::loongarch_lsx_vdiv_du:
3967 case Intrinsic::loongarch_lasx_xvdiv_bu:
3968 case Intrinsic::loongarch_lasx_xvdiv_hu:
3969 case Intrinsic::loongarch_lasx_xvdiv_wu:
3970 case Intrinsic::loongarch_lasx_xvdiv_du:
3971 return DAG.getNode(ISD::UDIV, DL, N->getValueType(0), N->getOperand(1),
3972 N->getOperand(2));
3973 case Intrinsic::loongarch_lsx_vmod_b:
3974 case Intrinsic::loongarch_lsx_vmod_h:
3975 case Intrinsic::loongarch_lsx_vmod_w:
3976 case Intrinsic::loongarch_lsx_vmod_d:
3977 case Intrinsic::loongarch_lasx_xvmod_b:
3978 case Intrinsic::loongarch_lasx_xvmod_h:
3979 case Intrinsic::loongarch_lasx_xvmod_w:
3980 case Intrinsic::loongarch_lasx_xvmod_d:
3981 return DAG.getNode(ISD::SREM, DL, N->getValueType(0), N->getOperand(1),
3982 N->getOperand(2));
3983 case Intrinsic::loongarch_lsx_vmod_bu:
3984 case Intrinsic::loongarch_lsx_vmod_hu:
3985 case Intrinsic::loongarch_lsx_vmod_wu:
3986 case Intrinsic::loongarch_lsx_vmod_du:
3987 case Intrinsic::loongarch_lasx_xvmod_bu:
3988 case Intrinsic::loongarch_lasx_xvmod_hu:
3989 case Intrinsic::loongarch_lasx_xvmod_wu:
3990 case Intrinsic::loongarch_lasx_xvmod_du:
3991 return DAG.getNode(ISD::UREM, DL, N->getValueType(0), N->getOperand(1),
3992 N->getOperand(2));
3993 case Intrinsic::loongarch_lsx_vand_v:
3994 case Intrinsic::loongarch_lasx_xvand_v:
3995 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
3996 N->getOperand(2));
3997 case Intrinsic::loongarch_lsx_vor_v:
3998 case Intrinsic::loongarch_lasx_xvor_v:
3999 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
4000 N->getOperand(2));
4001 case Intrinsic::loongarch_lsx_vxor_v:
4002 case Intrinsic::loongarch_lasx_xvxor_v:
4003 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
4004 N->getOperand(2));
4005 case Intrinsic::loongarch_lsx_vnor_v:
4006 case Intrinsic::loongarch_lasx_xvnor_v: {
4007 SDValue Res = DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
4008 N->getOperand(2));
4009 return DAG.getNOT(DL, Res, Res->getValueType(0));
4011 case Intrinsic::loongarch_lsx_vandi_b:
4012 case Intrinsic::loongarch_lasx_xvandi_b:
4013 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
4014 lowerVectorSplatImm<8>(N, 2, DAG));
4015 case Intrinsic::loongarch_lsx_vori_b:
4016 case Intrinsic::loongarch_lasx_xvori_b:
4017 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
4018 lowerVectorSplatImm<8>(N, 2, DAG));
4019 case Intrinsic::loongarch_lsx_vxori_b:
4020 case Intrinsic::loongarch_lasx_xvxori_b:
4021 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
4022 lowerVectorSplatImm<8>(N, 2, DAG));
4023 case Intrinsic::loongarch_lsx_vsll_b:
4024 case Intrinsic::loongarch_lsx_vsll_h:
4025 case Intrinsic::loongarch_lsx_vsll_w:
4026 case Intrinsic::loongarch_lsx_vsll_d:
4027 case Intrinsic::loongarch_lasx_xvsll_b:
4028 case Intrinsic::loongarch_lasx_xvsll_h:
4029 case Intrinsic::loongarch_lasx_xvsll_w:
4030 case Intrinsic::loongarch_lasx_xvsll_d:
4031 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
4032 truncateVecElts(N, DAG));
4033 case Intrinsic::loongarch_lsx_vslli_b:
4034 case Intrinsic::loongarch_lasx_xvslli_b:
4035 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
4036 lowerVectorSplatImm<3>(N, 2, DAG));
4037 case Intrinsic::loongarch_lsx_vslli_h:
4038 case Intrinsic::loongarch_lasx_xvslli_h:
4039 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
4040 lowerVectorSplatImm<4>(N, 2, DAG));
4041 case Intrinsic::loongarch_lsx_vslli_w:
4042 case Intrinsic::loongarch_lasx_xvslli_w:
4043 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
4044 lowerVectorSplatImm<5>(N, 2, DAG));
4045 case Intrinsic::loongarch_lsx_vslli_d:
4046 case Intrinsic::loongarch_lasx_xvslli_d:
4047 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
4048 lowerVectorSplatImm<6>(N, 2, DAG));
4049 case Intrinsic::loongarch_lsx_vsrl_b:
4050 case Intrinsic::loongarch_lsx_vsrl_h:
4051 case Intrinsic::loongarch_lsx_vsrl_w:
4052 case Intrinsic::loongarch_lsx_vsrl_d:
4053 case Intrinsic::loongarch_lasx_xvsrl_b:
4054 case Intrinsic::loongarch_lasx_xvsrl_h:
4055 case Intrinsic::loongarch_lasx_xvsrl_w:
4056 case Intrinsic::loongarch_lasx_xvsrl_d:
4057 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
4058 truncateVecElts(N, DAG));
4059 case Intrinsic::loongarch_lsx_vsrli_b:
4060 case Intrinsic::loongarch_lasx_xvsrli_b:
4061 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
4062 lowerVectorSplatImm<3>(N, 2, DAG));
4063 case Intrinsic::loongarch_lsx_vsrli_h:
4064 case Intrinsic::loongarch_lasx_xvsrli_h:
4065 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
4066 lowerVectorSplatImm<4>(N, 2, DAG));
4067 case Intrinsic::loongarch_lsx_vsrli_w:
4068 case Intrinsic::loongarch_lasx_xvsrli_w:
4069 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
4070 lowerVectorSplatImm<5>(N, 2, DAG));
4071 case Intrinsic::loongarch_lsx_vsrli_d:
4072 case Intrinsic::loongarch_lasx_xvsrli_d:
4073 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
4074 lowerVectorSplatImm<6>(N, 2, DAG));
4075 case Intrinsic::loongarch_lsx_vsra_b:
4076 case Intrinsic::loongarch_lsx_vsra_h:
4077 case Intrinsic::loongarch_lsx_vsra_w:
4078 case Intrinsic::loongarch_lsx_vsra_d:
4079 case Intrinsic::loongarch_lasx_xvsra_b:
4080 case Intrinsic::loongarch_lasx_xvsra_h:
4081 case Intrinsic::loongarch_lasx_xvsra_w:
4082 case Intrinsic::loongarch_lasx_xvsra_d:
4083 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
4084 truncateVecElts(N, DAG));
4085 case Intrinsic::loongarch_lsx_vsrai_b:
4086 case Intrinsic::loongarch_lasx_xvsrai_b:
4087 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
4088 lowerVectorSplatImm<3>(N, 2, DAG));
4089 case Intrinsic::loongarch_lsx_vsrai_h:
4090 case Intrinsic::loongarch_lasx_xvsrai_h:
4091 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
4092 lowerVectorSplatImm<4>(N, 2, DAG));
4093 case Intrinsic::loongarch_lsx_vsrai_w:
4094 case Intrinsic::loongarch_lasx_xvsrai_w:
4095 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
4096 lowerVectorSplatImm<5>(N, 2, DAG));
4097 case Intrinsic::loongarch_lsx_vsrai_d:
4098 case Intrinsic::loongarch_lasx_xvsrai_d:
4099 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
4100 lowerVectorSplatImm<6>(N, 2, DAG));
4101 case Intrinsic::loongarch_lsx_vclz_b:
4102 case Intrinsic::loongarch_lsx_vclz_h:
4103 case Intrinsic::loongarch_lsx_vclz_w:
4104 case Intrinsic::loongarch_lsx_vclz_d:
4105 case Intrinsic::loongarch_lasx_xvclz_b:
4106 case Intrinsic::loongarch_lasx_xvclz_h:
4107 case Intrinsic::loongarch_lasx_xvclz_w:
4108 case Intrinsic::loongarch_lasx_xvclz_d:
4109 return DAG.getNode(ISD::CTLZ, DL, N->getValueType(0), N->getOperand(1));
4110 case Intrinsic::loongarch_lsx_vpcnt_b:
4111 case Intrinsic::loongarch_lsx_vpcnt_h:
4112 case Intrinsic::loongarch_lsx_vpcnt_w:
4113 case Intrinsic::loongarch_lsx_vpcnt_d:
4114 case Intrinsic::loongarch_lasx_xvpcnt_b:
4115 case Intrinsic::loongarch_lasx_xvpcnt_h:
4116 case Intrinsic::loongarch_lasx_xvpcnt_w:
4117 case Intrinsic::loongarch_lasx_xvpcnt_d:
4118 return DAG.getNode(ISD::CTPOP, DL, N->getValueType(0), N->getOperand(1));
4119 case Intrinsic::loongarch_lsx_vbitclr_b:
4120 case Intrinsic::loongarch_lsx_vbitclr_h:
4121 case Intrinsic::loongarch_lsx_vbitclr_w:
4122 case Intrinsic::loongarch_lsx_vbitclr_d:
4123 case Intrinsic::loongarch_lasx_xvbitclr_b:
4124 case Intrinsic::loongarch_lasx_xvbitclr_h:
4125 case Intrinsic::loongarch_lasx_xvbitclr_w:
4126 case Intrinsic::loongarch_lasx_xvbitclr_d:
4127 return lowerVectorBitClear(N, DAG);
4128 case Intrinsic::loongarch_lsx_vbitclri_b:
4129 case Intrinsic::loongarch_lasx_xvbitclri_b:
4130 return lowerVectorBitClearImm<3>(N, DAG);
4131 case Intrinsic::loongarch_lsx_vbitclri_h:
4132 case Intrinsic::loongarch_lasx_xvbitclri_h:
4133 return lowerVectorBitClearImm<4>(N, DAG);
4134 case Intrinsic::loongarch_lsx_vbitclri_w:
4135 case Intrinsic::loongarch_lasx_xvbitclri_w:
4136 return lowerVectorBitClearImm<5>(N, DAG);
4137 case Intrinsic::loongarch_lsx_vbitclri_d:
4138 case Intrinsic::loongarch_lasx_xvbitclri_d:
4139 return lowerVectorBitClearImm<6>(N, DAG);
4140 case Intrinsic::loongarch_lsx_vbitset_b:
4141 case Intrinsic::loongarch_lsx_vbitset_h:
4142 case Intrinsic::loongarch_lsx_vbitset_w:
4143 case Intrinsic::loongarch_lsx_vbitset_d:
4144 case Intrinsic::loongarch_lasx_xvbitset_b:
4145 case Intrinsic::loongarch_lasx_xvbitset_h:
4146 case Intrinsic::loongarch_lasx_xvbitset_w:
4147 case Intrinsic::loongarch_lasx_xvbitset_d: {
4148 EVT VecTy = N->getValueType(0);
4149 SDValue One = DAG.getConstant(1, DL, VecTy);
4150 return DAG.getNode(
4151 ISD::OR, DL, VecTy, N->getOperand(1),
4152 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
4154 case Intrinsic::loongarch_lsx_vbitseti_b:
4155 case Intrinsic::loongarch_lasx_xvbitseti_b:
4156 return lowerVectorBitSetImm<3>(N, DAG);
4157 case Intrinsic::loongarch_lsx_vbitseti_h:
4158 case Intrinsic::loongarch_lasx_xvbitseti_h:
4159 return lowerVectorBitSetImm<4>(N, DAG);
4160 case Intrinsic::loongarch_lsx_vbitseti_w:
4161 case Intrinsic::loongarch_lasx_xvbitseti_w:
4162 return lowerVectorBitSetImm<5>(N, DAG);
4163 case Intrinsic::loongarch_lsx_vbitseti_d:
4164 case Intrinsic::loongarch_lasx_xvbitseti_d:
4165 return lowerVectorBitSetImm<6>(N, DAG);
4166 case Intrinsic::loongarch_lsx_vbitrev_b:
4167 case Intrinsic::loongarch_lsx_vbitrev_h:
4168 case Intrinsic::loongarch_lsx_vbitrev_w:
4169 case Intrinsic::loongarch_lsx_vbitrev_d:
4170 case Intrinsic::loongarch_lasx_xvbitrev_b:
4171 case Intrinsic::loongarch_lasx_xvbitrev_h:
4172 case Intrinsic::loongarch_lasx_xvbitrev_w:
4173 case Intrinsic::loongarch_lasx_xvbitrev_d: {
4174 EVT VecTy = N->getValueType(0);
4175 SDValue One = DAG.getConstant(1, DL, VecTy);
4176 return DAG.getNode(
4177 ISD::XOR, DL, VecTy, N->getOperand(1),
4178 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
4180 case Intrinsic::loongarch_lsx_vbitrevi_b:
4181 case Intrinsic::loongarch_lasx_xvbitrevi_b:
4182 return lowerVectorBitRevImm<3>(N, DAG);
4183 case Intrinsic::loongarch_lsx_vbitrevi_h:
4184 case Intrinsic::loongarch_lasx_xvbitrevi_h:
4185 return lowerVectorBitRevImm<4>(N, DAG);
4186 case Intrinsic::loongarch_lsx_vbitrevi_w:
4187 case Intrinsic::loongarch_lasx_xvbitrevi_w:
4188 return lowerVectorBitRevImm<5>(N, DAG);
4189 case Intrinsic::loongarch_lsx_vbitrevi_d:
4190 case Intrinsic::loongarch_lasx_xvbitrevi_d:
4191 return lowerVectorBitRevImm<6>(N, DAG);
4192 case Intrinsic::loongarch_lsx_vfadd_s:
4193 case Intrinsic::loongarch_lsx_vfadd_d:
4194 case Intrinsic::loongarch_lasx_xvfadd_s:
4195 case Intrinsic::loongarch_lasx_xvfadd_d:
4196 return DAG.getNode(ISD::FADD, DL, N->getValueType(0), N->getOperand(1),
4197 N->getOperand(2));
4198 case Intrinsic::loongarch_lsx_vfsub_s:
4199 case Intrinsic::loongarch_lsx_vfsub_d:
4200 case Intrinsic::loongarch_lasx_xvfsub_s:
4201 case Intrinsic::loongarch_lasx_xvfsub_d:
4202 return DAG.getNode(ISD::FSUB, DL, N->getValueType(0), N->getOperand(1),
4203 N->getOperand(2));
4204 case Intrinsic::loongarch_lsx_vfmul_s:
4205 case Intrinsic::loongarch_lsx_vfmul_d:
4206 case Intrinsic::loongarch_lasx_xvfmul_s:
4207 case Intrinsic::loongarch_lasx_xvfmul_d:
4208 return DAG.getNode(ISD::FMUL, DL, N->getValueType(0), N->getOperand(1),
4209 N->getOperand(2));
4210 case Intrinsic::loongarch_lsx_vfdiv_s:
4211 case Intrinsic::loongarch_lsx_vfdiv_d:
4212 case Intrinsic::loongarch_lasx_xvfdiv_s:
4213 case Intrinsic::loongarch_lasx_xvfdiv_d:
4214 return DAG.getNode(ISD::FDIV, DL, N->getValueType(0), N->getOperand(1),
4215 N->getOperand(2));
4216 case Intrinsic::loongarch_lsx_vfmadd_s:
4217 case Intrinsic::loongarch_lsx_vfmadd_d:
4218 case Intrinsic::loongarch_lasx_xvfmadd_s:
4219 case Intrinsic::loongarch_lasx_xvfmadd_d:
4220 return DAG.getNode(ISD::FMA, DL, N->getValueType(0), N->getOperand(1),
4221 N->getOperand(2), N->getOperand(3));
4222 case Intrinsic::loongarch_lsx_vinsgr2vr_b:
4223 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
4224 N->getOperand(1), N->getOperand(2),
4225 legalizeIntrinsicImmArg<4>(N, 3, DAG, Subtarget));
4226 case Intrinsic::loongarch_lsx_vinsgr2vr_h:
4227 case Intrinsic::loongarch_lasx_xvinsgr2vr_w:
4228 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
4229 N->getOperand(1), N->getOperand(2),
4230 legalizeIntrinsicImmArg<3>(N, 3, DAG, Subtarget));
4231 case Intrinsic::loongarch_lsx_vinsgr2vr_w:
4232 case Intrinsic::loongarch_lasx_xvinsgr2vr_d:
4233 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
4234 N->getOperand(1), N->getOperand(2),
4235 legalizeIntrinsicImmArg<2>(N, 3, DAG, Subtarget));
4236 case Intrinsic::loongarch_lsx_vinsgr2vr_d:
4237 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
4238 N->getOperand(1), N->getOperand(2),
4239 legalizeIntrinsicImmArg<1>(N, 3, DAG, Subtarget));
4240 case Intrinsic::loongarch_lsx_vreplgr2vr_b:
4241 case Intrinsic::loongarch_lsx_vreplgr2vr_h:
4242 case Intrinsic::loongarch_lsx_vreplgr2vr_w:
4243 case Intrinsic::loongarch_lsx_vreplgr2vr_d:
4244 case Intrinsic::loongarch_lasx_xvreplgr2vr_b:
4245 case Intrinsic::loongarch_lasx_xvreplgr2vr_h:
4246 case Intrinsic::loongarch_lasx_xvreplgr2vr_w:
4247 case Intrinsic::loongarch_lasx_xvreplgr2vr_d:
4248 return DAG.getNode(LoongArchISD::VREPLGR2VR, DL, N->getValueType(0),
4249 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
4250 N->getOperand(1)));
4251 case Intrinsic::loongarch_lsx_vreplve_b:
4252 case Intrinsic::loongarch_lsx_vreplve_h:
4253 case Intrinsic::loongarch_lsx_vreplve_w:
4254 case Intrinsic::loongarch_lsx_vreplve_d:
4255 case Intrinsic::loongarch_lasx_xvreplve_b:
4256 case Intrinsic::loongarch_lasx_xvreplve_h:
4257 case Intrinsic::loongarch_lasx_xvreplve_w:
4258 case Intrinsic::loongarch_lasx_xvreplve_d:
4259 return DAG.getNode(LoongArchISD::VREPLVE, DL, N->getValueType(0),
4260 N->getOperand(1),
4261 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
4262 N->getOperand(2)));
4264 return SDValue();
4267 SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N,
4268 DAGCombinerInfo &DCI) const {
4269 SelectionDAG &DAG = DCI.DAG;
4270 switch (N->getOpcode()) {
4271 default:
4272 break;
4273 case ISD::AND:
4274 return performANDCombine(N, DAG, DCI, Subtarget);
4275 case ISD::OR:
4276 return performORCombine(N, DAG, DCI, Subtarget);
4277 case ISD::SETCC:
4278 return performSETCCCombine(N, DAG, DCI, Subtarget);
4279 case ISD::SRL:
4280 return performSRLCombine(N, DAG, DCI, Subtarget);
4281 case LoongArchISD::BITREV_W:
4282 return performBITREV_WCombine(N, DAG, DCI, Subtarget);
4283 case ISD::INTRINSIC_WO_CHAIN:
4284 return performINTRINSIC_WO_CHAINCombine(N, DAG, DCI, Subtarget);
4286 return SDValue();
4289 static MachineBasicBlock *insertDivByZeroTrap(MachineInstr &MI,
4290 MachineBasicBlock *MBB) {
4291 if (!ZeroDivCheck)
4292 return MBB;
4294 // Build instructions:
4295 // MBB:
4296 // div(or mod) $dst, $dividend, $divisor
4297 // bnez $divisor, SinkMBB
4298 // BreakMBB:
4299 // break 7 // BRK_DIVZERO
4300 // SinkMBB:
4301 // fallthrough
4302 const BasicBlock *LLVM_BB = MBB->getBasicBlock();
4303 MachineFunction::iterator It = ++MBB->getIterator();
4304 MachineFunction *MF = MBB->getParent();
4305 auto BreakMBB = MF->CreateMachineBasicBlock(LLVM_BB);
4306 auto SinkMBB = MF->CreateMachineBasicBlock(LLVM_BB);
4307 MF->insert(It, BreakMBB);
4308 MF->insert(It, SinkMBB);
4310 // Transfer the remainder of MBB and its successor edges to SinkMBB.
4311 SinkMBB->splice(SinkMBB->end(), MBB, std::next(MI.getIterator()), MBB->end());
4312 SinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
4314 const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
4315 DebugLoc DL = MI.getDebugLoc();
4316 MachineOperand &Divisor = MI.getOperand(2);
4317 Register DivisorReg = Divisor.getReg();
4319 // MBB:
4320 BuildMI(MBB, DL, TII.get(LoongArch::BNEZ))
4321 .addReg(DivisorReg, getKillRegState(Divisor.isKill()))
4322 .addMBB(SinkMBB);
4323 MBB->addSuccessor(BreakMBB);
4324 MBB->addSuccessor(SinkMBB);
4326 // BreakMBB:
4327 // See linux header file arch/loongarch/include/uapi/asm/break.h for the
4328 // definition of BRK_DIVZERO.
4329 BuildMI(BreakMBB, DL, TII.get(LoongArch::BREAK)).addImm(7 /*BRK_DIVZERO*/);
4330 BreakMBB->addSuccessor(SinkMBB);
4332 // Clear Divisor's kill flag.
4333 Divisor.setIsKill(false);
4335 return SinkMBB;
4338 static MachineBasicBlock *
4339 emitVecCondBranchPseudo(MachineInstr &MI, MachineBasicBlock *BB,
4340 const LoongArchSubtarget &Subtarget) {
4341 unsigned CondOpc;
4342 switch (MI.getOpcode()) {
4343 default:
4344 llvm_unreachable("Unexpected opcode");
4345 case LoongArch::PseudoVBZ:
4346 CondOpc = LoongArch::VSETEQZ_V;
4347 break;
4348 case LoongArch::PseudoVBZ_B:
4349 CondOpc = LoongArch::VSETANYEQZ_B;
4350 break;
4351 case LoongArch::PseudoVBZ_H:
4352 CondOpc = LoongArch::VSETANYEQZ_H;
4353 break;
4354 case LoongArch::PseudoVBZ_W:
4355 CondOpc = LoongArch::VSETANYEQZ_W;
4356 break;
4357 case LoongArch::PseudoVBZ_D:
4358 CondOpc = LoongArch::VSETANYEQZ_D;
4359 break;
4360 case LoongArch::PseudoVBNZ:
4361 CondOpc = LoongArch::VSETNEZ_V;
4362 break;
4363 case LoongArch::PseudoVBNZ_B:
4364 CondOpc = LoongArch::VSETALLNEZ_B;
4365 break;
4366 case LoongArch::PseudoVBNZ_H:
4367 CondOpc = LoongArch::VSETALLNEZ_H;
4368 break;
4369 case LoongArch::PseudoVBNZ_W:
4370 CondOpc = LoongArch::VSETALLNEZ_W;
4371 break;
4372 case LoongArch::PseudoVBNZ_D:
4373 CondOpc = LoongArch::VSETALLNEZ_D;
4374 break;
4375 case LoongArch::PseudoXVBZ:
4376 CondOpc = LoongArch::XVSETEQZ_V;
4377 break;
4378 case LoongArch::PseudoXVBZ_B:
4379 CondOpc = LoongArch::XVSETANYEQZ_B;
4380 break;
4381 case LoongArch::PseudoXVBZ_H:
4382 CondOpc = LoongArch::XVSETANYEQZ_H;
4383 break;
4384 case LoongArch::PseudoXVBZ_W:
4385 CondOpc = LoongArch::XVSETANYEQZ_W;
4386 break;
4387 case LoongArch::PseudoXVBZ_D:
4388 CondOpc = LoongArch::XVSETANYEQZ_D;
4389 break;
4390 case LoongArch::PseudoXVBNZ:
4391 CondOpc = LoongArch::XVSETNEZ_V;
4392 break;
4393 case LoongArch::PseudoXVBNZ_B:
4394 CondOpc = LoongArch::XVSETALLNEZ_B;
4395 break;
4396 case LoongArch::PseudoXVBNZ_H:
4397 CondOpc = LoongArch::XVSETALLNEZ_H;
4398 break;
4399 case LoongArch::PseudoXVBNZ_W:
4400 CondOpc = LoongArch::XVSETALLNEZ_W;
4401 break;
4402 case LoongArch::PseudoXVBNZ_D:
4403 CondOpc = LoongArch::XVSETALLNEZ_D;
4404 break;
4407 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
4408 const BasicBlock *LLVM_BB = BB->getBasicBlock();
4409 DebugLoc DL = MI.getDebugLoc();
4410 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
4411 MachineFunction::iterator It = ++BB->getIterator();
4413 MachineFunction *F = BB->getParent();
4414 MachineBasicBlock *FalseBB = F->CreateMachineBasicBlock(LLVM_BB);
4415 MachineBasicBlock *TrueBB = F->CreateMachineBasicBlock(LLVM_BB);
4416 MachineBasicBlock *SinkBB = F->CreateMachineBasicBlock(LLVM_BB);
4418 F->insert(It, FalseBB);
4419 F->insert(It, TrueBB);
4420 F->insert(It, SinkBB);
4422 // Transfer the remainder of MBB and its successor edges to Sink.
4423 SinkBB->splice(SinkBB->end(), BB, std::next(MI.getIterator()), BB->end());
4424 SinkBB->transferSuccessorsAndUpdatePHIs(BB);
4426 // Insert the real instruction to BB.
4427 Register FCC = MRI.createVirtualRegister(&LoongArch::CFRRegClass);
4428 BuildMI(BB, DL, TII->get(CondOpc), FCC).addReg(MI.getOperand(1).getReg());
4430 // Insert branch.
4431 BuildMI(BB, DL, TII->get(LoongArch::BCNEZ)).addReg(FCC).addMBB(TrueBB);
4432 BB->addSuccessor(FalseBB);
4433 BB->addSuccessor(TrueBB);
4435 // FalseBB.
4436 Register RD1 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
4437 BuildMI(FalseBB, DL, TII->get(LoongArch::ADDI_W), RD1)
4438 .addReg(LoongArch::R0)
4439 .addImm(0);
4440 BuildMI(FalseBB, DL, TII->get(LoongArch::PseudoBR)).addMBB(SinkBB);
4441 FalseBB->addSuccessor(SinkBB);
4443 // TrueBB.
4444 Register RD2 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
4445 BuildMI(TrueBB, DL, TII->get(LoongArch::ADDI_W), RD2)
4446 .addReg(LoongArch::R0)
4447 .addImm(1);
4448 TrueBB->addSuccessor(SinkBB);
4450 // SinkBB: merge the results.
4451 BuildMI(*SinkBB, SinkBB->begin(), DL, TII->get(LoongArch::PHI),
4452 MI.getOperand(0).getReg())
4453 .addReg(RD1)
4454 .addMBB(FalseBB)
4455 .addReg(RD2)
4456 .addMBB(TrueBB);
4458 // The pseudo instruction is gone now.
4459 MI.eraseFromParent();
4460 return SinkBB;
4463 static MachineBasicBlock *
4464 emitPseudoXVINSGR2VR(MachineInstr &MI, MachineBasicBlock *BB,
4465 const LoongArchSubtarget &Subtarget) {
4466 unsigned InsOp;
4467 unsigned HalfSize;
4468 switch (MI.getOpcode()) {
4469 default:
4470 llvm_unreachable("Unexpected opcode");
4471 case LoongArch::PseudoXVINSGR2VR_B:
4472 HalfSize = 16;
4473 InsOp = LoongArch::VINSGR2VR_B;
4474 break;
4475 case LoongArch::PseudoXVINSGR2VR_H:
4476 HalfSize = 8;
4477 InsOp = LoongArch::VINSGR2VR_H;
4478 break;
4480 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
4481 const TargetRegisterClass *RC = &LoongArch::LASX256RegClass;
4482 const TargetRegisterClass *SubRC = &LoongArch::LSX128RegClass;
4483 DebugLoc DL = MI.getDebugLoc();
4484 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
4485 // XDst = vector_insert XSrc, Elt, Idx
4486 Register XDst = MI.getOperand(0).getReg();
4487 Register XSrc = MI.getOperand(1).getReg();
4488 Register Elt = MI.getOperand(2).getReg();
4489 unsigned Idx = MI.getOperand(3).getImm();
4491 Register ScratchReg1 = XSrc;
4492 if (Idx >= HalfSize) {
4493 ScratchReg1 = MRI.createVirtualRegister(RC);
4494 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), ScratchReg1)
4495 .addReg(XSrc)
4496 .addReg(XSrc)
4497 .addImm(1);
4500 Register ScratchSubReg1 = MRI.createVirtualRegister(SubRC);
4501 Register ScratchSubReg2 = MRI.createVirtualRegister(SubRC);
4502 BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), ScratchSubReg1)
4503 .addReg(ScratchReg1, 0, LoongArch::sub_128);
4504 BuildMI(*BB, MI, DL, TII->get(InsOp), ScratchSubReg2)
4505 .addReg(ScratchSubReg1)
4506 .addReg(Elt)
4507 .addImm(Idx >= HalfSize ? Idx - HalfSize : Idx);
4509 Register ScratchReg2 = XDst;
4510 if (Idx >= HalfSize)
4511 ScratchReg2 = MRI.createVirtualRegister(RC);
4513 BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), ScratchReg2)
4514 .addImm(0)
4515 .addReg(ScratchSubReg2)
4516 .addImm(LoongArch::sub_128);
4518 if (Idx >= HalfSize)
4519 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), XDst)
4520 .addReg(XSrc)
4521 .addReg(ScratchReg2)
4522 .addImm(2);
4524 MI.eraseFromParent();
4525 return BB;
4528 static MachineBasicBlock *emitPseudoCTPOP(MachineInstr &MI,
4529 MachineBasicBlock *BB,
4530 const LoongArchSubtarget &Subtarget) {
4531 assert(Subtarget.hasExtLSX());
4532 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
4533 const TargetRegisterClass *RC = &LoongArch::LSX128RegClass;
4534 DebugLoc DL = MI.getDebugLoc();
4535 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
4536 Register Dst = MI.getOperand(0).getReg();
4537 Register Src = MI.getOperand(1).getReg();
4538 Register ScratchReg1 = MRI.createVirtualRegister(RC);
4539 Register ScratchReg2 = MRI.createVirtualRegister(RC);
4540 Register ScratchReg3 = MRI.createVirtualRegister(RC);
4542 BuildMI(*BB, MI, DL, TII->get(LoongArch::VLDI), ScratchReg1).addImm(0);
4543 BuildMI(*BB, MI, DL,
4544 TII->get(Subtarget.is64Bit() ? LoongArch::VINSGR2VR_D
4545 : LoongArch::VINSGR2VR_W),
4546 ScratchReg2)
4547 .addReg(ScratchReg1)
4548 .addReg(Src)
4549 .addImm(0);
4550 BuildMI(
4551 *BB, MI, DL,
4552 TII->get(Subtarget.is64Bit() ? LoongArch::VPCNT_D : LoongArch::VPCNT_W),
4553 ScratchReg3)
4554 .addReg(ScratchReg2);
4555 BuildMI(*BB, MI, DL,
4556 TII->get(Subtarget.is64Bit() ? LoongArch::VPICKVE2GR_D
4557 : LoongArch::VPICKVE2GR_W),
4558 Dst)
4559 .addReg(ScratchReg3)
4560 .addImm(0);
4562 MI.eraseFromParent();
4563 return BB;
4566 MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter(
4567 MachineInstr &MI, MachineBasicBlock *BB) const {
4568 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
4569 DebugLoc DL = MI.getDebugLoc();
4571 switch (MI.getOpcode()) {
4572 default:
4573 llvm_unreachable("Unexpected instr type to insert");
4574 case LoongArch::DIV_W:
4575 case LoongArch::DIV_WU:
4576 case LoongArch::MOD_W:
4577 case LoongArch::MOD_WU:
4578 case LoongArch::DIV_D:
4579 case LoongArch::DIV_DU:
4580 case LoongArch::MOD_D:
4581 case LoongArch::MOD_DU:
4582 return insertDivByZeroTrap(MI, BB);
4583 break;
4584 case LoongArch::WRFCSR: {
4585 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVGR2FCSR),
4586 LoongArch::FCSR0 + MI.getOperand(0).getImm())
4587 .addReg(MI.getOperand(1).getReg());
4588 MI.eraseFromParent();
4589 return BB;
4591 case LoongArch::RDFCSR: {
4592 MachineInstr *ReadFCSR =
4593 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVFCSR2GR),
4594 MI.getOperand(0).getReg())
4595 .addReg(LoongArch::FCSR0 + MI.getOperand(1).getImm());
4596 ReadFCSR->getOperand(1).setIsUndef();
4597 MI.eraseFromParent();
4598 return BB;
4600 case LoongArch::PseudoVBZ:
4601 case LoongArch::PseudoVBZ_B:
4602 case LoongArch::PseudoVBZ_H:
4603 case LoongArch::PseudoVBZ_W:
4604 case LoongArch::PseudoVBZ_D:
4605 case LoongArch::PseudoVBNZ:
4606 case LoongArch::PseudoVBNZ_B:
4607 case LoongArch::PseudoVBNZ_H:
4608 case LoongArch::PseudoVBNZ_W:
4609 case LoongArch::PseudoVBNZ_D:
4610 case LoongArch::PseudoXVBZ:
4611 case LoongArch::PseudoXVBZ_B:
4612 case LoongArch::PseudoXVBZ_H:
4613 case LoongArch::PseudoXVBZ_W:
4614 case LoongArch::PseudoXVBZ_D:
4615 case LoongArch::PseudoXVBNZ:
4616 case LoongArch::PseudoXVBNZ_B:
4617 case LoongArch::PseudoXVBNZ_H:
4618 case LoongArch::PseudoXVBNZ_W:
4619 case LoongArch::PseudoXVBNZ_D:
4620 return emitVecCondBranchPseudo(MI, BB, Subtarget);
4621 case LoongArch::PseudoXVINSGR2VR_B:
4622 case LoongArch::PseudoXVINSGR2VR_H:
4623 return emitPseudoXVINSGR2VR(MI, BB, Subtarget);
4624 case LoongArch::PseudoCTPOP:
4625 return emitPseudoCTPOP(MI, BB, Subtarget);
4626 case TargetOpcode::STATEPOINT:
4627 // STATEPOINT is a pseudo instruction which has no implicit defs/uses
4628 // while bl call instruction (where statepoint will be lowered at the
4629 // end) has implicit def. This def is early-clobber as it will be set at
4630 // the moment of the call and earlier than any use is read.
4631 // Add this implicit dead def here as a workaround.
4632 MI.addOperand(*MI.getMF(),
4633 MachineOperand::CreateReg(
4634 LoongArch::R1, /*isDef*/ true,
4635 /*isImp*/ true, /*isKill*/ false, /*isDead*/ true,
4636 /*isUndef*/ false, /*isEarlyClobber*/ true));
4637 if (!Subtarget.is64Bit())
4638 report_fatal_error("STATEPOINT is only supported on 64-bit targets");
4639 return emitPatchPoint(MI, BB);
4643 bool LoongArchTargetLowering::allowsMisalignedMemoryAccesses(
4644 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
4645 unsigned *Fast) const {
4646 if (!Subtarget.hasUAL())
4647 return false;
4649 // TODO: set reasonable speed number.
4650 if (Fast)
4651 *Fast = 1;
4652 return true;
4655 const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
4656 switch ((LoongArchISD::NodeType)Opcode) {
4657 case LoongArchISD::FIRST_NUMBER:
4658 break;
4660 #define NODE_NAME_CASE(node) \
4661 case LoongArchISD::node: \
4662 return "LoongArchISD::" #node;
4664 // TODO: Add more target-dependent nodes later.
4665 NODE_NAME_CASE(CALL)
4666 NODE_NAME_CASE(CALL_MEDIUM)
4667 NODE_NAME_CASE(CALL_LARGE)
4668 NODE_NAME_CASE(RET)
4669 NODE_NAME_CASE(TAIL)
4670 NODE_NAME_CASE(TAIL_MEDIUM)
4671 NODE_NAME_CASE(TAIL_LARGE)
4672 NODE_NAME_CASE(SLL_W)
4673 NODE_NAME_CASE(SRA_W)
4674 NODE_NAME_CASE(SRL_W)
4675 NODE_NAME_CASE(BSTRINS)
4676 NODE_NAME_CASE(BSTRPICK)
4677 NODE_NAME_CASE(MOVGR2FR_W_LA64)
4678 NODE_NAME_CASE(MOVFR2GR_S_LA64)
4679 NODE_NAME_CASE(FTINT)
4680 NODE_NAME_CASE(REVB_2H)
4681 NODE_NAME_CASE(REVB_2W)
4682 NODE_NAME_CASE(BITREV_4B)
4683 NODE_NAME_CASE(BITREV_W)
4684 NODE_NAME_CASE(ROTR_W)
4685 NODE_NAME_CASE(ROTL_W)
4686 NODE_NAME_CASE(DIV_W)
4687 NODE_NAME_CASE(DIV_WU)
4688 NODE_NAME_CASE(MOD_W)
4689 NODE_NAME_CASE(MOD_WU)
4690 NODE_NAME_CASE(CLZ_W)
4691 NODE_NAME_CASE(CTZ_W)
4692 NODE_NAME_CASE(DBAR)
4693 NODE_NAME_CASE(IBAR)
4694 NODE_NAME_CASE(BREAK)
4695 NODE_NAME_CASE(SYSCALL)
4696 NODE_NAME_CASE(CRC_W_B_W)
4697 NODE_NAME_CASE(CRC_W_H_W)
4698 NODE_NAME_CASE(CRC_W_W_W)
4699 NODE_NAME_CASE(CRC_W_D_W)
4700 NODE_NAME_CASE(CRCC_W_B_W)
4701 NODE_NAME_CASE(CRCC_W_H_W)
4702 NODE_NAME_CASE(CRCC_W_W_W)
4703 NODE_NAME_CASE(CRCC_W_D_W)
4704 NODE_NAME_CASE(CSRRD)
4705 NODE_NAME_CASE(CSRWR)
4706 NODE_NAME_CASE(CSRXCHG)
4707 NODE_NAME_CASE(IOCSRRD_B)
4708 NODE_NAME_CASE(IOCSRRD_H)
4709 NODE_NAME_CASE(IOCSRRD_W)
4710 NODE_NAME_CASE(IOCSRRD_D)
4711 NODE_NAME_CASE(IOCSRWR_B)
4712 NODE_NAME_CASE(IOCSRWR_H)
4713 NODE_NAME_CASE(IOCSRWR_W)
4714 NODE_NAME_CASE(IOCSRWR_D)
4715 NODE_NAME_CASE(CPUCFG)
4716 NODE_NAME_CASE(MOVGR2FCSR)
4717 NODE_NAME_CASE(MOVFCSR2GR)
4718 NODE_NAME_CASE(CACOP_D)
4719 NODE_NAME_CASE(CACOP_W)
4720 NODE_NAME_CASE(VSHUF)
4721 NODE_NAME_CASE(VPICKEV)
4722 NODE_NAME_CASE(VPICKOD)
4723 NODE_NAME_CASE(VPACKEV)
4724 NODE_NAME_CASE(VPACKOD)
4725 NODE_NAME_CASE(VILVL)
4726 NODE_NAME_CASE(VILVH)
4727 NODE_NAME_CASE(VSHUF4I)
4728 NODE_NAME_CASE(VREPLVEI)
4729 NODE_NAME_CASE(VREPLGR2VR)
4730 NODE_NAME_CASE(XVPERMI)
4731 NODE_NAME_CASE(VPICK_SEXT_ELT)
4732 NODE_NAME_CASE(VPICK_ZEXT_ELT)
4733 NODE_NAME_CASE(VREPLVE)
4734 NODE_NAME_CASE(VALL_ZERO)
4735 NODE_NAME_CASE(VANY_ZERO)
4736 NODE_NAME_CASE(VALL_NONZERO)
4737 NODE_NAME_CASE(VANY_NONZERO)
4738 NODE_NAME_CASE(FRECIPE)
4739 NODE_NAME_CASE(FRSQRTE)
4741 #undef NODE_NAME_CASE
4742 return nullptr;
4745 //===----------------------------------------------------------------------===//
4746 // Calling Convention Implementation
4747 //===----------------------------------------------------------------------===//
4749 // Eight general-purpose registers a0-a7 used for passing integer arguments,
4750 // with a0-a1 reused to return values. Generally, the GPRs are used to pass
4751 // fixed-point arguments, and floating-point arguments when no FPR is available
4752 // or with soft float ABI.
4753 const MCPhysReg ArgGPRs[] = {LoongArch::R4, LoongArch::R5, LoongArch::R6,
4754 LoongArch::R7, LoongArch::R8, LoongArch::R9,
4755 LoongArch::R10, LoongArch::R11};
4756 // Eight floating-point registers fa0-fa7 used for passing floating-point
4757 // arguments, and fa0-fa1 are also used to return values.
4758 const MCPhysReg ArgFPR32s[] = {LoongArch::F0, LoongArch::F1, LoongArch::F2,
4759 LoongArch::F3, LoongArch::F4, LoongArch::F5,
4760 LoongArch::F6, LoongArch::F7};
4761 // FPR32 and FPR64 alias each other.
4762 const MCPhysReg ArgFPR64s[] = {
4763 LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64,
4764 LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64};
4766 const MCPhysReg ArgVRs[] = {LoongArch::VR0, LoongArch::VR1, LoongArch::VR2,
4767 LoongArch::VR3, LoongArch::VR4, LoongArch::VR5,
4768 LoongArch::VR6, LoongArch::VR7};
4770 const MCPhysReg ArgXRs[] = {LoongArch::XR0, LoongArch::XR1, LoongArch::XR2,
4771 LoongArch::XR3, LoongArch::XR4, LoongArch::XR5,
4772 LoongArch::XR6, LoongArch::XR7};
4774 // Pass a 2*GRLen argument that has been split into two GRLen values through
4775 // registers or the stack as necessary.
4776 static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State,
4777 CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1,
4778 unsigned ValNo2, MVT ValVT2, MVT LocVT2,
4779 ISD::ArgFlagsTy ArgFlags2) {
4780 unsigned GRLenInBytes = GRLen / 8;
4781 if (Register Reg = State.AllocateReg(ArgGPRs)) {
4782 // At least one half can be passed via register.
4783 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
4784 VA1.getLocVT(), CCValAssign::Full));
4785 } else {
4786 // Both halves must be passed on the stack, with proper alignment.
4787 Align StackAlign =
4788 std::max(Align(GRLenInBytes), ArgFlags1.getNonZeroOrigAlign());
4789 State.addLoc(
4790 CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(),
4791 State.AllocateStack(GRLenInBytes, StackAlign),
4792 VA1.getLocVT(), CCValAssign::Full));
4793 State.addLoc(CCValAssign::getMem(
4794 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
4795 LocVT2, CCValAssign::Full));
4796 return false;
4798 if (Register Reg = State.AllocateReg(ArgGPRs)) {
4799 // The second half can also be passed via register.
4800 State.addLoc(
4801 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
4802 } else {
4803 // The second half is passed via the stack, without additional alignment.
4804 State.addLoc(CCValAssign::getMem(
4805 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
4806 LocVT2, CCValAssign::Full));
4808 return false;
4811 // Implements the LoongArch calling convention. Returns true upon failure.
4812 static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI,
4813 unsigned ValNo, MVT ValVT,
4814 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
4815 CCState &State, bool IsFixed, bool IsRet,
4816 Type *OrigTy) {
4817 unsigned GRLen = DL.getLargestLegalIntTypeSizeInBits();
4818 assert((GRLen == 32 || GRLen == 64) && "Unspport GRLen");
4819 MVT GRLenVT = GRLen == 32 ? MVT::i32 : MVT::i64;
4820 MVT LocVT = ValVT;
4822 // Any return value split into more than two values can't be returned
4823 // directly.
4824 if (IsRet && ValNo > 1)
4825 return true;
4827 // If passing a variadic argument, or if no FPR is available.
4828 bool UseGPRForFloat = true;
4830 switch (ABI) {
4831 default:
4832 llvm_unreachable("Unexpected ABI");
4833 break;
4834 case LoongArchABI::ABI_ILP32F:
4835 case LoongArchABI::ABI_LP64F:
4836 case LoongArchABI::ABI_ILP32D:
4837 case LoongArchABI::ABI_LP64D:
4838 UseGPRForFloat = !IsFixed;
4839 break;
4840 case LoongArchABI::ABI_ILP32S:
4841 case LoongArchABI::ABI_LP64S:
4842 break;
4845 // FPR32 and FPR64 alias each other.
4846 if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s))
4847 UseGPRForFloat = true;
4849 if (UseGPRForFloat && ValVT == MVT::f32) {
4850 LocVT = GRLenVT;
4851 LocInfo = CCValAssign::BCvt;
4852 } else if (UseGPRForFloat && GRLen == 64 && ValVT == MVT::f64) {
4853 LocVT = MVT::i64;
4854 LocInfo = CCValAssign::BCvt;
4855 } else if (UseGPRForFloat && GRLen == 32 && ValVT == MVT::f64) {
4856 // TODO: Handle passing f64 on LA32 with D feature.
4857 report_fatal_error("Passing f64 with GPR on LA32 is undefined");
4860 // If this is a variadic argument, the LoongArch calling convention requires
4861 // that it is assigned an 'even' or 'aligned' register if it has (2*GRLen)/8
4862 // byte alignment. An aligned register should be used regardless of whether
4863 // the original argument was split during legalisation or not. The argument
4864 // will not be passed by registers if the original type is larger than
4865 // 2*GRLen, so the register alignment rule does not apply.
4866 unsigned TwoGRLenInBytes = (2 * GRLen) / 8;
4867 if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoGRLenInBytes &&
4868 DL.getTypeAllocSize(OrigTy) == TwoGRLenInBytes) {
4869 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
4870 // Skip 'odd' register if necessary.
4871 if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1)
4872 State.AllocateReg(ArgGPRs);
4875 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
4876 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
4877 State.getPendingArgFlags();
4879 assert(PendingLocs.size() == PendingArgFlags.size() &&
4880 "PendingLocs and PendingArgFlags out of sync");
4882 // Split arguments might be passed indirectly, so keep track of the pending
4883 // values.
4884 if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
4885 LocVT = GRLenVT;
4886 LocInfo = CCValAssign::Indirect;
4887 PendingLocs.push_back(
4888 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
4889 PendingArgFlags.push_back(ArgFlags);
4890 if (!ArgFlags.isSplitEnd()) {
4891 return false;
4895 // If the split argument only had two elements, it should be passed directly
4896 // in registers or on the stack.
4897 if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
4898 PendingLocs.size() <= 2) {
4899 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
4900 // Apply the normal calling convention rules to the first half of the
4901 // split argument.
4902 CCValAssign VA = PendingLocs[0];
4903 ISD::ArgFlagsTy AF = PendingArgFlags[0];
4904 PendingLocs.clear();
4905 PendingArgFlags.clear();
4906 return CC_LoongArchAssign2GRLen(GRLen, State, VA, AF, ValNo, ValVT, LocVT,
4907 ArgFlags);
4910 // Allocate to a register if possible, or else a stack slot.
4911 Register Reg;
4912 unsigned StoreSizeBytes = GRLen / 8;
4913 Align StackAlign = Align(GRLen / 8);
4915 if (ValVT == MVT::f32 && !UseGPRForFloat)
4916 Reg = State.AllocateReg(ArgFPR32s);
4917 else if (ValVT == MVT::f64 && !UseGPRForFloat)
4918 Reg = State.AllocateReg(ArgFPR64s);
4919 else if (ValVT.is128BitVector())
4920 Reg = State.AllocateReg(ArgVRs);
4921 else if (ValVT.is256BitVector())
4922 Reg = State.AllocateReg(ArgXRs);
4923 else
4924 Reg = State.AllocateReg(ArgGPRs);
4926 unsigned StackOffset =
4927 Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);
4929 // If we reach this point and PendingLocs is non-empty, we must be at the
4930 // end of a split argument that must be passed indirectly.
4931 if (!PendingLocs.empty()) {
4932 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
4933 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
4934 for (auto &It : PendingLocs) {
4935 if (Reg)
4936 It.convertToReg(Reg);
4937 else
4938 It.convertToMem(StackOffset);
4939 State.addLoc(It);
4941 PendingLocs.clear();
4942 PendingArgFlags.clear();
4943 return false;
4945 assert((!UseGPRForFloat || LocVT == GRLenVT) &&
4946 "Expected an GRLenVT at this stage");
4948 if (Reg) {
4949 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
4950 return false;
4953 // When a floating-point value is passed on the stack, no bit-cast is needed.
4954 if (ValVT.isFloatingPoint()) {
4955 LocVT = ValVT;
4956 LocInfo = CCValAssign::Full;
4959 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
4960 return false;
4963 void LoongArchTargetLowering::analyzeInputArgs(
4964 MachineFunction &MF, CCState &CCInfo,
4965 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
4966 LoongArchCCAssignFn Fn) const {
4967 FunctionType *FType = MF.getFunction().getFunctionType();
4968 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
4969 MVT ArgVT = Ins[i].VT;
4970 Type *ArgTy = nullptr;
4971 if (IsRet)
4972 ArgTy = FType->getReturnType();
4973 else if (Ins[i].isOrigArg())
4974 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
4975 LoongArchABI::ABI ABI =
4976 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
4977 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Ins[i].Flags,
4978 CCInfo, /*IsFixed=*/true, IsRet, ArgTy)) {
4979 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " << ArgVT
4980 << '\n');
4981 llvm_unreachable("");
4986 void LoongArchTargetLowering::analyzeOutputArgs(
4987 MachineFunction &MF, CCState &CCInfo,
4988 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
4989 CallLoweringInfo *CLI, LoongArchCCAssignFn Fn) const {
4990 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
4991 MVT ArgVT = Outs[i].VT;
4992 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
4993 LoongArchABI::ABI ABI =
4994 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
4995 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Outs[i].Flags,
4996 CCInfo, Outs[i].IsFixed, IsRet, OrigTy)) {
4997 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " << ArgVT
4998 << "\n");
4999 llvm_unreachable("");
5004 // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
5005 // values.
5006 static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val,
5007 const CCValAssign &VA, const SDLoc &DL) {
5008 switch (VA.getLocInfo()) {
5009 default:
5010 llvm_unreachable("Unexpected CCValAssign::LocInfo");
5011 case CCValAssign::Full:
5012 case CCValAssign::Indirect:
5013 break;
5014 case CCValAssign::BCvt:
5015 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
5016 Val = DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Val);
5017 else
5018 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
5019 break;
5021 return Val;
5024 static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain,
5025 const CCValAssign &VA, const SDLoc &DL,
5026 const ISD::InputArg &In,
5027 const LoongArchTargetLowering &TLI) {
5028 MachineFunction &MF = DAG.getMachineFunction();
5029 MachineRegisterInfo &RegInfo = MF.getRegInfo();
5030 EVT LocVT = VA.getLocVT();
5031 SDValue Val;
5032 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
5033 Register VReg = RegInfo.createVirtualRegister(RC);
5034 RegInfo.addLiveIn(VA.getLocReg(), VReg);
5035 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
5037 // If input is sign extended from 32 bits, note it for the OptW pass.
5038 if (In.isOrigArg()) {
5039 Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());
5040 if (OrigArg->getType()->isIntegerTy()) {
5041 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
5042 // An input zero extended from i31 can also be considered sign extended.
5043 if ((BitWidth <= 32 && In.Flags.isSExt()) ||
5044 (BitWidth < 32 && In.Flags.isZExt())) {
5045 LoongArchMachineFunctionInfo *LAFI =
5046 MF.getInfo<LoongArchMachineFunctionInfo>();
5047 LAFI->addSExt32Register(VReg);
5052 return convertLocVTToValVT(DAG, Val, VA, DL);
5055 // The caller is responsible for loading the full value if the argument is
5056 // passed with CCValAssign::Indirect.
5057 static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain,
5058 const CCValAssign &VA, const SDLoc &DL) {
5059 MachineFunction &MF = DAG.getMachineFunction();
5060 MachineFrameInfo &MFI = MF.getFrameInfo();
5061 EVT ValVT = VA.getValVT();
5062 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
5063 /*IsImmutable=*/true);
5064 SDValue FIN = DAG.getFrameIndex(
5065 FI, MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0)));
5067 ISD::LoadExtType ExtType;
5068 switch (VA.getLocInfo()) {
5069 default:
5070 llvm_unreachable("Unexpected CCValAssign::LocInfo");
5071 case CCValAssign::Full:
5072 case CCValAssign::Indirect:
5073 case CCValAssign::BCvt:
5074 ExtType = ISD::NON_EXTLOAD;
5075 break;
5077 return DAG.getExtLoad(
5078 ExtType, DL, VA.getLocVT(), Chain, FIN,
5079 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT);
5082 static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val,
5083 const CCValAssign &VA, const SDLoc &DL) {
5084 EVT LocVT = VA.getLocVT();
5086 switch (VA.getLocInfo()) {
5087 default:
5088 llvm_unreachable("Unexpected CCValAssign::LocInfo");
5089 case CCValAssign::Full:
5090 break;
5091 case CCValAssign::BCvt:
5092 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
5093 Val = DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Val);
5094 else
5095 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
5096 break;
5098 return Val;
5101 static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
5102 CCValAssign::LocInfo LocInfo,
5103 ISD::ArgFlagsTy ArgFlags, CCState &State) {
5104 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
5105 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, SpLim
5106 // s0 s1 s2 s3 s4 s5 s6 s7 s8
5107 static const MCPhysReg GPRList[] = {
5108 LoongArch::R23, LoongArch::R24, LoongArch::R25,
5109 LoongArch::R26, LoongArch::R27, LoongArch::R28,
5110 LoongArch::R29, LoongArch::R30, LoongArch::R31};
5111 if (MCRegister Reg = State.AllocateReg(GPRList)) {
5112 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
5113 return false;
5117 if (LocVT == MVT::f32) {
5118 // Pass in STG registers: F1, F2, F3, F4
5119 // fs0,fs1,fs2,fs3
5120 static const MCPhysReg FPR32List[] = {LoongArch::F24, LoongArch::F25,
5121 LoongArch::F26, LoongArch::F27};
5122 if (MCRegister Reg = State.AllocateReg(FPR32List)) {
5123 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
5124 return false;
5128 if (LocVT == MVT::f64) {
5129 // Pass in STG registers: D1, D2, D3, D4
5130 // fs4,fs5,fs6,fs7
5131 static const MCPhysReg FPR64List[] = {LoongArch::F28_64, LoongArch::F29_64,
5132 LoongArch::F30_64, LoongArch::F31_64};
5133 if (MCRegister Reg = State.AllocateReg(FPR64List)) {
5134 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
5135 return false;
5139 report_fatal_error("No registers left in GHC calling convention");
5140 return true;
5143 // Transform physical registers into virtual registers.
5144 SDValue LoongArchTargetLowering::LowerFormalArguments(
5145 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
5146 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
5147 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
5149 MachineFunction &MF = DAG.getMachineFunction();
5151 switch (CallConv) {
5152 default:
5153 llvm_unreachable("Unsupported calling convention");
5154 case CallingConv::C:
5155 case CallingConv::Fast:
5156 break;
5157 case CallingConv::GHC:
5158 if (!MF.getSubtarget().hasFeature(LoongArch::FeatureBasicF) ||
5159 !MF.getSubtarget().hasFeature(LoongArch::FeatureBasicD))
5160 report_fatal_error(
5161 "GHC calling convention requires the F and D extensions");
5164 EVT PtrVT = getPointerTy(DAG.getDataLayout());
5165 MVT GRLenVT = Subtarget.getGRLenVT();
5166 unsigned GRLenInBytes = Subtarget.getGRLen() / 8;
5167 // Used with varargs to acumulate store chains.
5168 std::vector<SDValue> OutChains;
5170 // Assign locations to all of the incoming arguments.
5171 SmallVector<CCValAssign> ArgLocs;
5172 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
5174 if (CallConv == CallingConv::GHC)
5175 CCInfo.AnalyzeFormalArguments(Ins, CC_LoongArch_GHC);
5176 else
5177 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false, CC_LoongArch);
5179 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
5180 CCValAssign &VA = ArgLocs[i];
5181 SDValue ArgValue;
5182 if (VA.isRegLoc())
5183 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[i], *this);
5184 else
5185 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
5186 if (VA.getLocInfo() == CCValAssign::Indirect) {
5187 // If the original argument was split and passed by reference, we need to
5188 // load all parts of it here (using the same address).
5189 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
5190 MachinePointerInfo()));
5191 unsigned ArgIndex = Ins[i].OrigArgIndex;
5192 unsigned ArgPartOffset = Ins[i].PartOffset;
5193 assert(ArgPartOffset == 0);
5194 while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) {
5195 CCValAssign &PartVA = ArgLocs[i + 1];
5196 unsigned PartOffset = Ins[i + 1].PartOffset - ArgPartOffset;
5197 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
5198 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
5199 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
5200 MachinePointerInfo()));
5201 ++i;
5203 continue;
5205 InVals.push_back(ArgValue);
5208 if (IsVarArg) {
5209 ArrayRef<MCPhysReg> ArgRegs = ArrayRef(ArgGPRs);
5210 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
5211 const TargetRegisterClass *RC = &LoongArch::GPRRegClass;
5212 MachineFrameInfo &MFI = MF.getFrameInfo();
5213 MachineRegisterInfo &RegInfo = MF.getRegInfo();
5214 auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
5216 // Offset of the first variable argument from stack pointer, and size of
5217 // the vararg save area. For now, the varargs save area is either zero or
5218 // large enough to hold a0-a7.
5219 int VaArgOffset, VarArgsSaveSize;
5221 // If all registers are allocated, then all varargs must be passed on the
5222 // stack and we don't need to save any argregs.
5223 if (ArgRegs.size() == Idx) {
5224 VaArgOffset = CCInfo.getStackSize();
5225 VarArgsSaveSize = 0;
5226 } else {
5227 VarArgsSaveSize = GRLenInBytes * (ArgRegs.size() - Idx);
5228 VaArgOffset = -VarArgsSaveSize;
5231 // Record the frame index of the first variable argument
5232 // which is a value necessary to VASTART.
5233 int FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
5234 LoongArchFI->setVarArgsFrameIndex(FI);
5236 // If saving an odd number of registers then create an extra stack slot to
5237 // ensure that the frame pointer is 2*GRLen-aligned, which in turn ensures
5238 // offsets to even-numbered registered remain 2*GRLen-aligned.
5239 if (Idx % 2) {
5240 MFI.CreateFixedObject(GRLenInBytes, VaArgOffset - (int)GRLenInBytes,
5241 true);
5242 VarArgsSaveSize += GRLenInBytes;
5245 // Copy the integer registers that may have been used for passing varargs
5246 // to the vararg save area.
5247 for (unsigned I = Idx; I < ArgRegs.size();
5248 ++I, VaArgOffset += GRLenInBytes) {
5249 const Register Reg = RegInfo.createVirtualRegister(RC);
5250 RegInfo.addLiveIn(ArgRegs[I], Reg);
5251 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, GRLenVT);
5252 FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
5253 SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
5254 SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
5255 MachinePointerInfo::getFixedStack(MF, FI));
5256 cast<StoreSDNode>(Store.getNode())
5257 ->getMemOperand()
5258 ->setValue((Value *)nullptr);
5259 OutChains.push_back(Store);
5261 LoongArchFI->setVarArgsSaveSize(VarArgsSaveSize);
5264 // All stores are grouped in one node to allow the matching between
5265 // the size of Ins and InVals. This only happens for vararg functions.
5266 if (!OutChains.empty()) {
5267 OutChains.push_back(Chain);
5268 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
5271 return Chain;
5274 bool LoongArchTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
5275 return CI->isTailCall();
5278 // Check if the return value is used as only a return value, as otherwise
5279 // we can't perform a tail-call.
5280 bool LoongArchTargetLowering::isUsedByReturnOnly(SDNode *N,
5281 SDValue &Chain) const {
5282 if (N->getNumValues() != 1)
5283 return false;
5284 if (!N->hasNUsesOfValue(1, 0))
5285 return false;
5287 SDNode *Copy = *N->use_begin();
5288 if (Copy->getOpcode() != ISD::CopyToReg)
5289 return false;
5291 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
5292 // isn't safe to perform a tail call.
5293 if (Copy->getGluedNode())
5294 return false;
5296 // The copy must be used by a LoongArchISD::RET, and nothing else.
5297 bool HasRet = false;
5298 for (SDNode *Node : Copy->uses()) {
5299 if (Node->getOpcode() != LoongArchISD::RET)
5300 return false;
5301 HasRet = true;
5304 if (!HasRet)
5305 return false;
5307 Chain = Copy->getOperand(0);
5308 return true;
5311 // Check whether the call is eligible for tail call optimization.
5312 bool LoongArchTargetLowering::isEligibleForTailCallOptimization(
5313 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
5314 const SmallVectorImpl<CCValAssign> &ArgLocs) const {
5316 auto CalleeCC = CLI.CallConv;
5317 auto &Outs = CLI.Outs;
5318 auto &Caller = MF.getFunction();
5319 auto CallerCC = Caller.getCallingConv();
5321 // Do not tail call opt if the stack is used to pass parameters.
5322 if (CCInfo.getStackSize() != 0)
5323 return false;
5325 // Do not tail call opt if any parameters need to be passed indirectly.
5326 for (auto &VA : ArgLocs)
5327 if (VA.getLocInfo() == CCValAssign::Indirect)
5328 return false;
5330 // Do not tail call opt if either caller or callee uses struct return
5331 // semantics.
5332 auto IsCallerStructRet = Caller.hasStructRetAttr();
5333 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
5334 if (IsCallerStructRet || IsCalleeStructRet)
5335 return false;
5337 // Do not tail call opt if either the callee or caller has a byval argument.
5338 for (auto &Arg : Outs)
5339 if (Arg.Flags.isByVal())
5340 return false;
5342 // The callee has to preserve all registers the caller needs to preserve.
5343 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
5344 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
5345 if (CalleeCC != CallerCC) {
5346 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
5347 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
5348 return false;
5350 return true;
5353 static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG) {
5354 return DAG.getDataLayout().getPrefTypeAlign(
5355 VT.getTypeForEVT(*DAG.getContext()));
5358 // Lower a call to a callseq_start + CALL + callseq_end chain, and add input
5359 // and output parameter nodes.
5360 SDValue
5361 LoongArchTargetLowering::LowerCall(CallLoweringInfo &CLI,
5362 SmallVectorImpl<SDValue> &InVals) const {
5363 SelectionDAG &DAG = CLI.DAG;
5364 SDLoc &DL = CLI.DL;
5365 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
5366 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
5367 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
5368 SDValue Chain = CLI.Chain;
5369 SDValue Callee = CLI.Callee;
5370 CallingConv::ID CallConv = CLI.CallConv;
5371 bool IsVarArg = CLI.IsVarArg;
5372 EVT PtrVT = getPointerTy(DAG.getDataLayout());
5373 MVT GRLenVT = Subtarget.getGRLenVT();
5374 bool &IsTailCall = CLI.IsTailCall;
5376 MachineFunction &MF = DAG.getMachineFunction();
5378 // Analyze the operands of the call, assigning locations to each operand.
5379 SmallVector<CCValAssign> ArgLocs;
5380 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
5382 if (CallConv == CallingConv::GHC)
5383 ArgCCInfo.AnalyzeCallOperands(Outs, CC_LoongArch_GHC);
5384 else
5385 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI, CC_LoongArch);
5387 // Check if it's really possible to do a tail call.
5388 if (IsTailCall)
5389 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
5391 if (IsTailCall)
5392 ++NumTailCalls;
5393 else if (CLI.CB && CLI.CB->isMustTailCall())
5394 report_fatal_error("failed to perform tail call elimination on a call "
5395 "site marked musttail");
5397 // Get a count of how many bytes are to be pushed on the stack.
5398 unsigned NumBytes = ArgCCInfo.getStackSize();
5400 // Create local copies for byval args.
5401 SmallVector<SDValue> ByValArgs;
5402 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
5403 ISD::ArgFlagsTy Flags = Outs[i].Flags;
5404 if (!Flags.isByVal())
5405 continue;
5407 SDValue Arg = OutVals[i];
5408 unsigned Size = Flags.getByValSize();
5409 Align Alignment = Flags.getNonZeroByValAlign();
5411 int FI =
5412 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
5413 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
5414 SDValue SizeNode = DAG.getConstant(Size, DL, GRLenVT);
5416 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
5417 /*IsVolatile=*/false,
5418 /*AlwaysInline=*/false, /*CI=*/nullptr, std::nullopt,
5419 MachinePointerInfo(), MachinePointerInfo());
5420 ByValArgs.push_back(FIPtr);
5423 if (!IsTailCall)
5424 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
5426 // Copy argument values to their designated locations.
5427 SmallVector<std::pair<Register, SDValue>> RegsToPass;
5428 SmallVector<SDValue> MemOpChains;
5429 SDValue StackPtr;
5430 for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) {
5431 CCValAssign &VA = ArgLocs[i];
5432 SDValue ArgValue = OutVals[i];
5433 ISD::ArgFlagsTy Flags = Outs[i].Flags;
5435 // Promote the value if needed.
5436 // For now, only handle fully promoted and indirect arguments.
5437 if (VA.getLocInfo() == CCValAssign::Indirect) {
5438 // Store the argument in a stack slot and pass its address.
5439 Align StackAlign =
5440 std::max(getPrefTypeAlign(Outs[i].ArgVT, DAG),
5441 getPrefTypeAlign(ArgValue.getValueType(), DAG));
5442 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
5443 // If the original argument was split and passed by reference, we need to
5444 // store the required parts of it here (and pass just one address).
5445 unsigned ArgIndex = Outs[i].OrigArgIndex;
5446 unsigned ArgPartOffset = Outs[i].PartOffset;
5447 assert(ArgPartOffset == 0);
5448 // Calculate the total size to store. We don't have access to what we're
5449 // actually storing other than performing the loop and collecting the
5450 // info.
5451 SmallVector<std::pair<SDValue, SDValue>> Parts;
5452 while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) {
5453 SDValue PartValue = OutVals[i + 1];
5454 unsigned PartOffset = Outs[i + 1].PartOffset - ArgPartOffset;
5455 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
5456 EVT PartVT = PartValue.getValueType();
5458 StoredSize += PartVT.getStoreSize();
5459 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
5460 Parts.push_back(std::make_pair(PartValue, Offset));
5461 ++i;
5463 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
5464 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
5465 MemOpChains.push_back(
5466 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
5467 MachinePointerInfo::getFixedStack(MF, FI)));
5468 for (const auto &Part : Parts) {
5469 SDValue PartValue = Part.first;
5470 SDValue PartOffset = Part.second;
5471 SDValue Address =
5472 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
5473 MemOpChains.push_back(
5474 DAG.getStore(Chain, DL, PartValue, Address,
5475 MachinePointerInfo::getFixedStack(MF, FI)));
5477 ArgValue = SpillSlot;
5478 } else {
5479 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL);
5482 // Use local copy if it is a byval arg.
5483 if (Flags.isByVal())
5484 ArgValue = ByValArgs[j++];
5486 if (VA.isRegLoc()) {
5487 // Queue up the argument copies and emit them at the end.
5488 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
5489 } else {
5490 assert(VA.isMemLoc() && "Argument not register or memory");
5491 assert(!IsTailCall && "Tail call not allowed if stack is used "
5492 "for passing parameters");
5494 // Work out the address of the stack slot.
5495 if (!StackPtr.getNode())
5496 StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT);
5497 SDValue Address =
5498 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
5499 DAG.getIntPtrConstant(VA.getLocMemOffset(), DL));
5501 // Emit the store.
5502 MemOpChains.push_back(
5503 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
5507 // Join the stores, which are independent of one another.
5508 if (!MemOpChains.empty())
5509 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
5511 SDValue Glue;
5513 // Build a sequence of copy-to-reg nodes, chained and glued together.
5514 for (auto &Reg : RegsToPass) {
5515 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
5516 Glue = Chain.getValue(1);
5519 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
5520 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
5521 // split it and then direct call can be matched by PseudoCALL.
5522 if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
5523 const GlobalValue *GV = S->getGlobal();
5524 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(GV)
5525 ? LoongArchII::MO_CALL
5526 : LoongArchII::MO_CALL_PLT;
5527 Callee = DAG.getTargetGlobalAddress(S->getGlobal(), DL, PtrVT, 0, OpFlags);
5528 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
5529 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(nullptr)
5530 ? LoongArchII::MO_CALL
5531 : LoongArchII::MO_CALL_PLT;
5532 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags);
5535 // The first call operand is the chain and the second is the target address.
5536 SmallVector<SDValue> Ops;
5537 Ops.push_back(Chain);
5538 Ops.push_back(Callee);
5540 // Add argument registers to the end of the list so that they are
5541 // known live into the call.
5542 for (auto &Reg : RegsToPass)
5543 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
5545 if (!IsTailCall) {
5546 // Add a register mask operand representing the call-preserved registers.
5547 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
5548 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
5549 assert(Mask && "Missing call preserved mask for calling convention");
5550 Ops.push_back(DAG.getRegisterMask(Mask));
5553 // Glue the call to the argument copies, if any.
5554 if (Glue.getNode())
5555 Ops.push_back(Glue);
5557 // Emit the call.
5558 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
5559 unsigned Op;
5560 switch (DAG.getTarget().getCodeModel()) {
5561 default:
5562 report_fatal_error("Unsupported code model");
5563 case CodeModel::Small:
5564 Op = IsTailCall ? LoongArchISD::TAIL : LoongArchISD::CALL;
5565 break;
5566 case CodeModel::Medium:
5567 assert(Subtarget.is64Bit() && "Medium code model requires LA64");
5568 Op = IsTailCall ? LoongArchISD::TAIL_MEDIUM : LoongArchISD::CALL_MEDIUM;
5569 break;
5570 case CodeModel::Large:
5571 assert(Subtarget.is64Bit() && "Large code model requires LA64");
5572 Op = IsTailCall ? LoongArchISD::TAIL_LARGE : LoongArchISD::CALL_LARGE;
5573 break;
5576 if (IsTailCall) {
5577 MF.getFrameInfo().setHasTailCall();
5578 SDValue Ret = DAG.getNode(Op, DL, NodeTys, Ops);
5579 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
5580 return Ret;
5583 Chain = DAG.getNode(Op, DL, NodeTys, Ops);
5584 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
5585 Glue = Chain.getValue(1);
5587 // Mark the end of the call, which is glued to the call itself.
5588 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
5589 Glue = Chain.getValue(1);
5591 // Assign locations to each value returned by this call.
5592 SmallVector<CCValAssign> RVLocs;
5593 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
5594 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_LoongArch);
5596 // Copy all of the result registers out of their specified physreg.
5597 for (auto &VA : RVLocs) {
5598 // Copy the value out.
5599 SDValue RetValue =
5600 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
5601 // Glue the RetValue to the end of the call sequence.
5602 Chain = RetValue.getValue(1);
5603 Glue = RetValue.getValue(2);
5605 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL);
5607 InVals.push_back(RetValue);
5610 return Chain;
5613 bool LoongArchTargetLowering::CanLowerReturn(
5614 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
5615 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
5616 SmallVector<CCValAssign> RVLocs;
5617 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
5619 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
5620 LoongArchABI::ABI ABI =
5621 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
5622 if (CC_LoongArch(MF.getDataLayout(), ABI, i, Outs[i].VT, CCValAssign::Full,
5623 Outs[i].Flags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true,
5624 nullptr))
5625 return false;
5627 return true;
5630 SDValue LoongArchTargetLowering::LowerReturn(
5631 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
5632 const SmallVectorImpl<ISD::OutputArg> &Outs,
5633 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
5634 SelectionDAG &DAG) const {
5635 // Stores the assignment of the return value to a location.
5636 SmallVector<CCValAssign> RVLocs;
5638 // Info about the registers and stack slot.
5639 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
5640 *DAG.getContext());
5642 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
5643 nullptr, CC_LoongArch);
5644 if (CallConv == CallingConv::GHC && !RVLocs.empty())
5645 report_fatal_error("GHC functions return void only");
5646 SDValue Glue;
5647 SmallVector<SDValue, 4> RetOps(1, Chain);
5649 // Copy the result values into the output registers.
5650 for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) {
5651 CCValAssign &VA = RVLocs[i];
5652 assert(VA.isRegLoc() && "Can only return in registers!");
5654 // Handle a 'normal' return.
5655 SDValue Val = convertValVTToLocVT(DAG, OutVals[i], VA, DL);
5656 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
5658 // Guarantee that all emitted copies are stuck together.
5659 Glue = Chain.getValue(1);
5660 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
5663 RetOps[0] = Chain; // Update chain.
5665 // Add the glue node if we have it.
5666 if (Glue.getNode())
5667 RetOps.push_back(Glue);
5669 return DAG.getNode(LoongArchISD::RET, DL, MVT::Other, RetOps);
5672 bool LoongArchTargetLowering::isFPImmVLDILegal(const APFloat &Imm,
5673 EVT VT) const {
5674 if (!Subtarget.hasExtLSX())
5675 return false;
5677 if (VT == MVT::f32) {
5678 uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & 0x7e07ffff;
5679 return (masked == 0x3e000000 || masked == 0x40000000);
5682 if (VT == MVT::f64) {
5683 uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & 0x7fc0ffffffffffff;
5684 return (masked == 0x3fc0000000000000 || masked == 0x4000000000000000);
5687 return false;
5690 bool LoongArchTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
5691 bool ForCodeSize) const {
5692 // TODO: Maybe need more checks here after vector extension is supported.
5693 if (VT == MVT::f32 && !Subtarget.hasBasicF())
5694 return false;
5695 if (VT == MVT::f64 && !Subtarget.hasBasicD())
5696 return false;
5697 return (Imm.isZero() || Imm.isExactlyValue(1.0) || isFPImmVLDILegal(Imm, VT));
5700 bool LoongArchTargetLowering::isCheapToSpeculateCttz(Type *) const {
5701 return true;
5704 bool LoongArchTargetLowering::isCheapToSpeculateCtlz(Type *) const {
5705 return true;
5708 bool LoongArchTargetLowering::shouldInsertFencesForAtomic(
5709 const Instruction *I) const {
5710 if (!Subtarget.is64Bit())
5711 return isa<LoadInst>(I) || isa<StoreInst>(I);
5713 if (isa<LoadInst>(I))
5714 return true;
5716 // On LA64, atomic store operations with IntegerBitWidth of 32 and 64 do not
5717 // require fences beacuse we can use amswap_db.[w/d].
5718 Type *Ty = I->getOperand(0)->getType();
5719 if (isa<StoreInst>(I) && Ty->isIntegerTy()) {
5720 unsigned Size = Ty->getIntegerBitWidth();
5721 return (Size == 8 || Size == 16);
5724 return false;
5727 EVT LoongArchTargetLowering::getSetCCResultType(const DataLayout &DL,
5728 LLVMContext &Context,
5729 EVT VT) const {
5730 if (!VT.isVector())
5731 return getPointerTy(DL);
5732 return VT.changeVectorElementTypeToInteger();
5735 bool LoongArchTargetLowering::hasAndNot(SDValue Y) const {
5736 // TODO: Support vectors.
5737 return Y.getValueType().isScalarInteger() && !isa<ConstantSDNode>(Y);
5740 bool LoongArchTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
5741 const CallInst &I,
5742 MachineFunction &MF,
5743 unsigned Intrinsic) const {
5744 switch (Intrinsic) {
5745 default:
5746 return false;
5747 case Intrinsic::loongarch_masked_atomicrmw_xchg_i32:
5748 case Intrinsic::loongarch_masked_atomicrmw_add_i32:
5749 case Intrinsic::loongarch_masked_atomicrmw_sub_i32:
5750 case Intrinsic::loongarch_masked_atomicrmw_nand_i32:
5751 Info.opc = ISD::INTRINSIC_W_CHAIN;
5752 Info.memVT = MVT::i32;
5753 Info.ptrVal = I.getArgOperand(0);
5754 Info.offset = 0;
5755 Info.align = Align(4);
5756 Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
5757 MachineMemOperand::MOVolatile;
5758 return true;
5759 // TODO: Add more Intrinsics later.
5763 // When -mlamcas is enabled, MinCmpXchgSizeInBits will be set to 8,
5764 // atomicrmw and/or/xor operations with operands less than 32 bits cannot be
5765 // expanded to am{and/or/xor}[_db].w through AtomicExpandPass. To prevent
5766 // regression, we need to implement it manually.
5767 void LoongArchTargetLowering::emitExpandAtomicRMW(AtomicRMWInst *AI) const {
5768 AtomicRMWInst::BinOp Op = AI->getOperation();
5770 assert((Op == AtomicRMWInst::Or || Op == AtomicRMWInst::Xor ||
5771 Op == AtomicRMWInst::And) &&
5772 "Unable to expand");
5773 unsigned MinWordSize = 4;
5775 IRBuilder<> Builder(AI);
5776 LLVMContext &Ctx = Builder.getContext();
5777 const DataLayout &DL = AI->getDataLayout();
5778 Type *ValueType = AI->getType();
5779 Type *WordType = Type::getIntNTy(Ctx, MinWordSize * 8);
5781 Value *Addr = AI->getPointerOperand();
5782 PointerType *PtrTy = cast<PointerType>(Addr->getType());
5783 IntegerType *IntTy = DL.getIndexType(Ctx, PtrTy->getAddressSpace());
5785 Value *AlignedAddr = Builder.CreateIntrinsic(
5786 Intrinsic::ptrmask, {PtrTy, IntTy},
5787 {Addr, ConstantInt::get(IntTy, ~(uint64_t)(MinWordSize - 1))}, nullptr,
5788 "AlignedAddr");
5790 Value *AddrInt = Builder.CreatePtrToInt(Addr, IntTy);
5791 Value *PtrLSB = Builder.CreateAnd(AddrInt, MinWordSize - 1, "PtrLSB");
5792 Value *ShiftAmt = Builder.CreateShl(PtrLSB, 3);
5793 ShiftAmt = Builder.CreateTrunc(ShiftAmt, WordType, "ShiftAmt");
5794 Value *Mask = Builder.CreateShl(
5795 ConstantInt::get(WordType,
5796 (1 << (DL.getTypeStoreSize(ValueType) * 8)) - 1),
5797 ShiftAmt, "Mask");
5798 Value *Inv_Mask = Builder.CreateNot(Mask, "Inv_Mask");
5799 Value *ValOperand_Shifted =
5800 Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), WordType),
5801 ShiftAmt, "ValOperand_Shifted");
5802 Value *NewOperand;
5803 if (Op == AtomicRMWInst::And)
5804 NewOperand = Builder.CreateOr(ValOperand_Shifted, Inv_Mask, "AndOperand");
5805 else
5806 NewOperand = ValOperand_Shifted;
5808 AtomicRMWInst *NewAI =
5809 Builder.CreateAtomicRMW(Op, AlignedAddr, NewOperand, Align(MinWordSize),
5810 AI->getOrdering(), AI->getSyncScopeID());
5812 Value *Shift = Builder.CreateLShr(NewAI, ShiftAmt, "shifted");
5813 Value *Trunc = Builder.CreateTrunc(Shift, ValueType, "extracted");
5814 Value *FinalOldResult = Builder.CreateBitCast(Trunc, ValueType);
5815 AI->replaceAllUsesWith(FinalOldResult);
5816 AI->eraseFromParent();
5819 TargetLowering::AtomicExpansionKind
5820 LoongArchTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
5821 // TODO: Add more AtomicRMWInst that needs to be extended.
5823 // Since floating-point operation requires a non-trivial set of data
5824 // operations, use CmpXChg to expand.
5825 if (AI->isFloatingPointOperation() ||
5826 AI->getOperation() == AtomicRMWInst::UIncWrap ||
5827 AI->getOperation() == AtomicRMWInst::UDecWrap ||
5828 AI->getOperation() == AtomicRMWInst::USubCond ||
5829 AI->getOperation() == AtomicRMWInst::USubSat)
5830 return AtomicExpansionKind::CmpXChg;
5832 if (Subtarget.hasLAM_BH() && Subtarget.is64Bit() &&
5833 (AI->getOperation() == AtomicRMWInst::Xchg ||
5834 AI->getOperation() == AtomicRMWInst::Add ||
5835 AI->getOperation() == AtomicRMWInst::Sub)) {
5836 return AtomicExpansionKind::None;
5839 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
5840 if (Subtarget.hasLAMCAS()) {
5841 if (Size < 32 && (AI->getOperation() == AtomicRMWInst::And ||
5842 AI->getOperation() == AtomicRMWInst::Or ||
5843 AI->getOperation() == AtomicRMWInst::Xor))
5844 return AtomicExpansionKind::Expand;
5845 if (AI->getOperation() == AtomicRMWInst::Nand || Size < 32)
5846 return AtomicExpansionKind::CmpXChg;
5849 if (Size == 8 || Size == 16)
5850 return AtomicExpansionKind::MaskedIntrinsic;
5851 return AtomicExpansionKind::None;
5854 static Intrinsic::ID
5855 getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen,
5856 AtomicRMWInst::BinOp BinOp) {
5857 if (GRLen == 64) {
5858 switch (BinOp) {
5859 default:
5860 llvm_unreachable("Unexpected AtomicRMW BinOp");
5861 case AtomicRMWInst::Xchg:
5862 return Intrinsic::loongarch_masked_atomicrmw_xchg_i64;
5863 case AtomicRMWInst::Add:
5864 return Intrinsic::loongarch_masked_atomicrmw_add_i64;
5865 case AtomicRMWInst::Sub:
5866 return Intrinsic::loongarch_masked_atomicrmw_sub_i64;
5867 case AtomicRMWInst::Nand:
5868 return Intrinsic::loongarch_masked_atomicrmw_nand_i64;
5869 case AtomicRMWInst::UMax:
5870 return Intrinsic::loongarch_masked_atomicrmw_umax_i64;
5871 case AtomicRMWInst::UMin:
5872 return Intrinsic::loongarch_masked_atomicrmw_umin_i64;
5873 case AtomicRMWInst::Max:
5874 return Intrinsic::loongarch_masked_atomicrmw_max_i64;
5875 case AtomicRMWInst::Min:
5876 return Intrinsic::loongarch_masked_atomicrmw_min_i64;
5877 // TODO: support other AtomicRMWInst.
5881 if (GRLen == 32) {
5882 switch (BinOp) {
5883 default:
5884 llvm_unreachable("Unexpected AtomicRMW BinOp");
5885 case AtomicRMWInst::Xchg:
5886 return Intrinsic::loongarch_masked_atomicrmw_xchg_i32;
5887 case AtomicRMWInst::Add:
5888 return Intrinsic::loongarch_masked_atomicrmw_add_i32;
5889 case AtomicRMWInst::Sub:
5890 return Intrinsic::loongarch_masked_atomicrmw_sub_i32;
5891 case AtomicRMWInst::Nand:
5892 return Intrinsic::loongarch_masked_atomicrmw_nand_i32;
5893 // TODO: support other AtomicRMWInst.
5897 llvm_unreachable("Unexpected GRLen\n");
5900 TargetLowering::AtomicExpansionKind
5901 LoongArchTargetLowering::shouldExpandAtomicCmpXchgInIR(
5902 AtomicCmpXchgInst *CI) const {
5904 if (Subtarget.hasLAMCAS())
5905 return AtomicExpansionKind::None;
5907 unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
5908 if (Size == 8 || Size == 16)
5909 return AtomicExpansionKind::MaskedIntrinsic;
5910 return AtomicExpansionKind::None;
5913 Value *LoongArchTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
5914 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
5915 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
5916 AtomicOrdering FailOrd = CI->getFailureOrdering();
5917 Value *FailureOrdering =
5918 Builder.getIntN(Subtarget.getGRLen(), static_cast<uint64_t>(FailOrd));
5920 // TODO: Support cmpxchg on LA32.
5921 Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64;
5922 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
5923 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
5924 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
5925 Type *Tys[] = {AlignedAddr->getType()};
5926 Value *Result = Builder.CreateIntrinsic(
5927 CmpXchgIntrID, Tys, {AlignedAddr, CmpVal, NewVal, Mask, FailureOrdering});
5928 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
5929 return Result;
5932 Value *LoongArchTargetLowering::emitMaskedAtomicRMWIntrinsic(
5933 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
5934 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
5935 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
5936 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
5937 // mask, as this produces better code than the LL/SC loop emitted by
5938 // int_loongarch_masked_atomicrmw_xchg.
5939 if (AI->getOperation() == AtomicRMWInst::Xchg &&
5940 isa<ConstantInt>(AI->getValOperand())) {
5941 ConstantInt *CVal = cast<ConstantInt>(AI->getValOperand());
5942 if (CVal->isZero())
5943 return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,
5944 Builder.CreateNot(Mask, "Inv_Mask"),
5945 AI->getAlign(), Ord);
5946 if (CVal->isMinusOne())
5947 return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,
5948 AI->getAlign(), Ord);
5951 unsigned GRLen = Subtarget.getGRLen();
5952 Value *Ordering =
5953 Builder.getIntN(GRLen, static_cast<uint64_t>(AI->getOrdering()));
5954 Type *Tys[] = {AlignedAddr->getType()};
5955 Function *LlwOpScwLoop = Intrinsic::getOrInsertDeclaration(
5956 AI->getModule(),
5957 getIntrinsicForMaskedAtomicRMWBinOp(GRLen, AI->getOperation()), Tys);
5959 if (GRLen == 64) {
5960 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
5961 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
5962 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
5965 Value *Result;
5967 // Must pass the shift amount needed to sign extend the loaded value prior
5968 // to performing a signed comparison for min/max. ShiftAmt is the number of
5969 // bits to shift the value into position. Pass GRLen-ShiftAmt-ValWidth, which
5970 // is the number of bits to left+right shift the value in order to
5971 // sign-extend.
5972 if (AI->getOperation() == AtomicRMWInst::Min ||
5973 AI->getOperation() == AtomicRMWInst::Max) {
5974 const DataLayout &DL = AI->getDataLayout();
5975 unsigned ValWidth =
5976 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
5977 Value *SextShamt =
5978 Builder.CreateSub(Builder.getIntN(GRLen, GRLen - ValWidth), ShiftAmt);
5979 Result = Builder.CreateCall(LlwOpScwLoop,
5980 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
5981 } else {
5982 Result =
5983 Builder.CreateCall(LlwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
5986 if (GRLen == 64)
5987 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
5988 return Result;
5991 bool LoongArchTargetLowering::isFMAFasterThanFMulAndFAdd(
5992 const MachineFunction &MF, EVT VT) const {
5993 VT = VT.getScalarType();
5995 if (!VT.isSimple())
5996 return false;
5998 switch (VT.getSimpleVT().SimpleTy) {
5999 case MVT::f32:
6000 case MVT::f64:
6001 return true;
6002 default:
6003 break;
6006 return false;
6009 Register LoongArchTargetLowering::getExceptionPointerRegister(
6010 const Constant *PersonalityFn) const {
6011 return LoongArch::R4;
6014 Register LoongArchTargetLowering::getExceptionSelectorRegister(
6015 const Constant *PersonalityFn) const {
6016 return LoongArch::R5;
6019 //===----------------------------------------------------------------------===//
6020 // Target Optimization Hooks
6021 //===----------------------------------------------------------------------===//
6023 static int getEstimateRefinementSteps(EVT VT,
6024 const LoongArchSubtarget &Subtarget) {
6025 // Feature FRECIPE instrucions relative accuracy is 2^-14.
6026 // IEEE float has 23 digits and double has 52 digits.
6027 int RefinementSteps = VT.getScalarType() == MVT::f64 ? 2 : 1;
6028 return RefinementSteps;
6031 SDValue LoongArchTargetLowering::getSqrtEstimate(SDValue Operand,
6032 SelectionDAG &DAG, int Enabled,
6033 int &RefinementSteps,
6034 bool &UseOneConstNR,
6035 bool Reciprocal) const {
6036 if (Subtarget.hasFrecipe()) {
6037 SDLoc DL(Operand);
6038 EVT VT = Operand.getValueType();
6040 if (VT == MVT::f32 || (VT == MVT::f64 && Subtarget.hasBasicD()) ||
6041 (VT == MVT::v4f32 && Subtarget.hasExtLSX()) ||
6042 (VT == MVT::v2f64 && Subtarget.hasExtLSX()) ||
6043 (VT == MVT::v8f32 && Subtarget.hasExtLASX()) ||
6044 (VT == MVT::v4f64 && Subtarget.hasExtLASX())) {
6046 if (RefinementSteps == ReciprocalEstimate::Unspecified)
6047 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
6049 SDValue Estimate = DAG.getNode(LoongArchISD::FRSQRTE, DL, VT, Operand);
6050 if (Reciprocal)
6051 Estimate = DAG.getNode(ISD::FMUL, DL, VT, Operand, Estimate);
6053 return Estimate;
6057 return SDValue();
6060 SDValue LoongArchTargetLowering::getRecipEstimate(SDValue Operand,
6061 SelectionDAG &DAG,
6062 int Enabled,
6063 int &RefinementSteps) const {
6064 if (Subtarget.hasFrecipe()) {
6065 SDLoc DL(Operand);
6066 EVT VT = Operand.getValueType();
6068 if (VT == MVT::f32 || (VT == MVT::f64 && Subtarget.hasBasicD()) ||
6069 (VT == MVT::v4f32 && Subtarget.hasExtLSX()) ||
6070 (VT == MVT::v2f64 && Subtarget.hasExtLSX()) ||
6071 (VT == MVT::v8f32 && Subtarget.hasExtLASX()) ||
6072 (VT == MVT::v4f64 && Subtarget.hasExtLASX())) {
6074 if (RefinementSteps == ReciprocalEstimate::Unspecified)
6075 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
6077 return DAG.getNode(LoongArchISD::FRECIPE, DL, VT, Operand);
6081 return SDValue();
6084 //===----------------------------------------------------------------------===//
6085 // LoongArch Inline Assembly Support
6086 //===----------------------------------------------------------------------===//
6088 LoongArchTargetLowering::ConstraintType
6089 LoongArchTargetLowering::getConstraintType(StringRef Constraint) const {
6090 // LoongArch specific constraints in GCC: config/loongarch/constraints.md
6092 // 'f': A floating-point register (if available).
6093 // 'k': A memory operand whose address is formed by a base register and
6094 // (optionally scaled) index register.
6095 // 'l': A signed 16-bit constant.
6096 // 'm': A memory operand whose address is formed by a base register and
6097 // offset that is suitable for use in instructions with the same
6098 // addressing mode as st.w and ld.w.
6099 // 'I': A signed 12-bit constant (for arithmetic instructions).
6100 // 'J': Integer zero.
6101 // 'K': An unsigned 12-bit constant (for logic instructions).
6102 // "ZB": An address that is held in a general-purpose register. The offset is
6103 // zero.
6104 // "ZC": A memory operand whose address is formed by a base register and
6105 // offset that is suitable for use in instructions with the same
6106 // addressing mode as ll.w and sc.w.
6107 if (Constraint.size() == 1) {
6108 switch (Constraint[0]) {
6109 default:
6110 break;
6111 case 'f':
6112 return C_RegisterClass;
6113 case 'l':
6114 case 'I':
6115 case 'J':
6116 case 'K':
6117 return C_Immediate;
6118 case 'k':
6119 return C_Memory;
6123 if (Constraint == "ZC" || Constraint == "ZB")
6124 return C_Memory;
6126 // 'm' is handled here.
6127 return TargetLowering::getConstraintType(Constraint);
6130 InlineAsm::ConstraintCode LoongArchTargetLowering::getInlineAsmMemConstraint(
6131 StringRef ConstraintCode) const {
6132 return StringSwitch<InlineAsm::ConstraintCode>(ConstraintCode)
6133 .Case("k", InlineAsm::ConstraintCode::k)
6134 .Case("ZB", InlineAsm::ConstraintCode::ZB)
6135 .Case("ZC", InlineAsm::ConstraintCode::ZC)
6136 .Default(TargetLowering::getInlineAsmMemConstraint(ConstraintCode));
6139 std::pair<unsigned, const TargetRegisterClass *>
6140 LoongArchTargetLowering::getRegForInlineAsmConstraint(
6141 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
6142 // First, see if this is a constraint that directly corresponds to a LoongArch
6143 // register class.
6144 if (Constraint.size() == 1) {
6145 switch (Constraint[0]) {
6146 case 'r':
6147 // TODO: Support fixed vectors up to GRLen?
6148 if (VT.isVector())
6149 break;
6150 return std::make_pair(0U, &LoongArch::GPRRegClass);
6151 case 'f':
6152 if (Subtarget.hasBasicF() && VT == MVT::f32)
6153 return std::make_pair(0U, &LoongArch::FPR32RegClass);
6154 if (Subtarget.hasBasicD() && VT == MVT::f64)
6155 return std::make_pair(0U, &LoongArch::FPR64RegClass);
6156 if (Subtarget.hasExtLSX() &&
6157 TRI->isTypeLegalForClass(LoongArch::LSX128RegClass, VT))
6158 return std::make_pair(0U, &LoongArch::LSX128RegClass);
6159 if (Subtarget.hasExtLASX() &&
6160 TRI->isTypeLegalForClass(LoongArch::LASX256RegClass, VT))
6161 return std::make_pair(0U, &LoongArch::LASX256RegClass);
6162 break;
6163 default:
6164 break;
6168 // TargetLowering::getRegForInlineAsmConstraint uses the name of the TableGen
6169 // record (e.g. the "R0" in `def R0`) to choose registers for InlineAsm
6170 // constraints while the official register name is prefixed with a '$'. So we
6171 // clip the '$' from the original constraint string (e.g. {$r0} to {r0}.)
6172 // before it being parsed. And TargetLowering::getRegForInlineAsmConstraint is
6173 // case insensitive, so no need to convert the constraint to upper case here.
6175 // For now, no need to support ABI names (e.g. `$a0`) as clang will correctly
6176 // decode the usage of register name aliases into their official names. And
6177 // AFAIK, the not yet upstreamed `rustc` for LoongArch will always use
6178 // official register names.
6179 if (Constraint.starts_with("{$r") || Constraint.starts_with("{$f") ||
6180 Constraint.starts_with("{$vr") || Constraint.starts_with("{$xr")) {
6181 bool IsFP = Constraint[2] == 'f';
6182 std::pair<StringRef, StringRef> Temp = Constraint.split('$');
6183 std::pair<unsigned, const TargetRegisterClass *> R;
6184 R = TargetLowering::getRegForInlineAsmConstraint(
6185 TRI, join_items("", Temp.first, Temp.second), VT);
6186 // Match those names to the widest floating point register type available.
6187 if (IsFP) {
6188 unsigned RegNo = R.first;
6189 if (LoongArch::F0 <= RegNo && RegNo <= LoongArch::F31) {
6190 if (Subtarget.hasBasicD() && (VT == MVT::f64 || VT == MVT::Other)) {
6191 unsigned DReg = RegNo - LoongArch::F0 + LoongArch::F0_64;
6192 return std::make_pair(DReg, &LoongArch::FPR64RegClass);
6196 return R;
6199 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
6202 void LoongArchTargetLowering::LowerAsmOperandForConstraint(
6203 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
6204 SelectionDAG &DAG) const {
6205 // Currently only support length 1 constraints.
6206 if (Constraint.size() == 1) {
6207 switch (Constraint[0]) {
6208 case 'l':
6209 // Validate & create a 16-bit signed immediate operand.
6210 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
6211 uint64_t CVal = C->getSExtValue();
6212 if (isInt<16>(CVal))
6213 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
6214 Subtarget.getGRLenVT()));
6216 return;
6217 case 'I':
6218 // Validate & create a 12-bit signed immediate operand.
6219 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
6220 uint64_t CVal = C->getSExtValue();
6221 if (isInt<12>(CVal))
6222 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
6223 Subtarget.getGRLenVT()));
6225 return;
6226 case 'J':
6227 // Validate & create an integer zero operand.
6228 if (auto *C = dyn_cast<ConstantSDNode>(Op))
6229 if (C->getZExtValue() == 0)
6230 Ops.push_back(
6231 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getGRLenVT()));
6232 return;
6233 case 'K':
6234 // Validate & create a 12-bit unsigned immediate operand.
6235 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
6236 uint64_t CVal = C->getZExtValue();
6237 if (isUInt<12>(CVal))
6238 Ops.push_back(
6239 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT()));
6241 return;
6242 default:
6243 break;
6246 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
6249 #define GET_REGISTER_MATCHER
6250 #include "LoongArchGenAsmMatcher.inc"
6252 Register
6253 LoongArchTargetLowering::getRegisterByName(const char *RegName, LLT VT,
6254 const MachineFunction &MF) const {
6255 std::pair<StringRef, StringRef> Name = StringRef(RegName).split('$');
6256 std::string NewRegName = Name.second.str();
6257 Register Reg = MatchRegisterAltName(NewRegName);
6258 if (Reg == LoongArch::NoRegister)
6259 Reg = MatchRegisterName(NewRegName);
6260 if (Reg == LoongArch::NoRegister)
6261 report_fatal_error(
6262 Twine("Invalid register name \"" + StringRef(RegName) + "\"."));
6263 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
6264 if (!ReservedRegs.test(Reg))
6265 report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
6266 StringRef(RegName) + "\"."));
6267 return Reg;
6270 bool LoongArchTargetLowering::decomposeMulByConstant(LLVMContext &Context,
6271 EVT VT, SDValue C) const {
6272 // TODO: Support vectors.
6273 if (!VT.isScalarInteger())
6274 return false;
6276 // Omit the optimization if the data size exceeds GRLen.
6277 if (VT.getSizeInBits() > Subtarget.getGRLen())
6278 return false;
6280 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
6281 const APInt &Imm = ConstNode->getAPIntValue();
6282 // Break MUL into (SLLI + ADD/SUB) or ALSL.
6283 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
6284 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
6285 return true;
6286 // Break MUL into (ALSL x, (SLLI x, imm0), imm1).
6287 if (ConstNode->hasOneUse() &&
6288 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
6289 (Imm - 8).isPowerOf2() || (Imm - 16).isPowerOf2()))
6290 return true;
6291 // Break (MUL x, imm) into (ADD (SLLI x, s0), (SLLI x, s1)),
6292 // in which the immediate has two set bits. Or Break (MUL x, imm)
6293 // into (SUB (SLLI x, s0), (SLLI x, s1)), in which the immediate
6294 // equals to (1 << s0) - (1 << s1).
6295 if (ConstNode->hasOneUse() && !(Imm.sge(-2048) && Imm.sle(4095))) {
6296 unsigned Shifts = Imm.countr_zero();
6297 // Reject immediates which can be composed via a single LUI.
6298 if (Shifts >= 12)
6299 return false;
6300 // Reject multiplications can be optimized to
6301 // (SLLI (ALSL x, x, 1/2/3/4), s).
6302 APInt ImmPop = Imm.ashr(Shifts);
6303 if (ImmPop == 3 || ImmPop == 5 || ImmPop == 9 || ImmPop == 17)
6304 return false;
6305 // We do not consider the case `(-Imm - ImmSmall).isPowerOf2()`,
6306 // since it needs one more instruction than other 3 cases.
6307 APInt ImmSmall = APInt(Imm.getBitWidth(), 1ULL << Shifts, true);
6308 if ((Imm - ImmSmall).isPowerOf2() || (Imm + ImmSmall).isPowerOf2() ||
6309 (ImmSmall - Imm).isPowerOf2())
6310 return true;
6314 return false;
6317 bool LoongArchTargetLowering::isLegalAddressingMode(const DataLayout &DL,
6318 const AddrMode &AM,
6319 Type *Ty, unsigned AS,
6320 Instruction *I) const {
6321 // LoongArch has four basic addressing modes:
6322 // 1. reg
6323 // 2. reg + 12-bit signed offset
6324 // 3. reg + 14-bit signed offset left-shifted by 2
6325 // 4. reg1 + reg2
6326 // TODO: Add more checks after support vector extension.
6328 // No global is ever allowed as a base.
6329 if (AM.BaseGV)
6330 return false;
6332 // Require a 12-bit signed offset or 14-bit signed offset left-shifted by 2
6333 // with `UAL` feature.
6334 if (!isInt<12>(AM.BaseOffs) &&
6335 !(isShiftedInt<14, 2>(AM.BaseOffs) && Subtarget.hasUAL()))
6336 return false;
6338 switch (AM.Scale) {
6339 case 0:
6340 // "r+i" or just "i", depending on HasBaseReg.
6341 break;
6342 case 1:
6343 // "r+r+i" is not allowed.
6344 if (AM.HasBaseReg && AM.BaseOffs)
6345 return false;
6346 // Otherwise we have "r+r" or "r+i".
6347 break;
6348 case 2:
6349 // "2*r+r" or "2*r+i" is not allowed.
6350 if (AM.HasBaseReg || AM.BaseOffs)
6351 return false;
6352 // Allow "2*r" as "r+r".
6353 break;
6354 default:
6355 return false;
6358 return true;
6361 bool LoongArchTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
6362 return isInt<12>(Imm);
6365 bool LoongArchTargetLowering::isLegalAddImmediate(int64_t Imm) const {
6366 return isInt<12>(Imm);
6369 bool LoongArchTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
6370 // Zexts are free if they can be combined with a load.
6371 // Don't advertise i32->i64 zextload as being free for LA64. It interacts
6372 // poorly with type legalization of compares preferring sext.
6373 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
6374 EVT MemVT = LD->getMemoryVT();
6375 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
6376 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
6377 LD->getExtensionType() == ISD::ZEXTLOAD))
6378 return true;
6381 return TargetLowering::isZExtFree(Val, VT2);
6384 bool LoongArchTargetLowering::isSExtCheaperThanZExt(EVT SrcVT,
6385 EVT DstVT) const {
6386 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
6389 bool LoongArchTargetLowering::signExtendConstant(const ConstantInt *CI) const {
6390 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
6393 bool LoongArchTargetLowering::hasAndNotCompare(SDValue Y) const {
6394 // TODO: Support vectors.
6395 if (Y.getValueType().isVector())
6396 return false;
6398 return !isa<ConstantSDNode>(Y);
6401 ISD::NodeType LoongArchTargetLowering::getExtendForAtomicCmpSwapArg() const {
6402 // LAMCAS will use amcas[_DB].{b/h/w/d} which does not require extension.
6403 return Subtarget.hasLAMCAS() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND;
6406 bool LoongArchTargetLowering::shouldSignExtendTypeInLibCall(
6407 EVT Type, bool IsSigned) const {
6408 if (Subtarget.is64Bit() && Type == MVT::i32)
6409 return true;
6411 return IsSigned;
6414 bool LoongArchTargetLowering::shouldExtendTypeInLibCall(EVT Type) const {
6415 // Return false to suppress the unnecessary extensions if the LibCall
6416 // arguments or return value is a float narrower than GRLEN on a soft FP ABI.
6417 if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
6418 Type.getSizeInBits() < Subtarget.getGRLen()))
6419 return false;
6420 return true;
6423 // memcpy, and other memory intrinsics, typically tries to use wider load/store
6424 // if the source/dest is aligned and the copy size is large enough. We therefore
6425 // want to align such objects passed to memory intrinsics.
6426 bool LoongArchTargetLowering::shouldAlignPointerArgs(CallInst *CI,
6427 unsigned &MinSize,
6428 Align &PrefAlign) const {
6429 if (!isa<MemIntrinsic>(CI))
6430 return false;
6432 if (Subtarget.is64Bit()) {
6433 MinSize = 8;
6434 PrefAlign = Align(8);
6435 } else {
6436 MinSize = 4;
6437 PrefAlign = Align(4);
6440 return true;