1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32
3 ; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64
5 ;; TODO: Add optimization to ISD::ROTL
7 define i32 @rotl_32(i32 %x, i32 %y) nounwind {
10 ; LA32-NEXT: ori $a2, $zero, 32
11 ; LA32-NEXT: sub.w $a1, $a2, $a1
12 ; LA32-NEXT: rotr.w $a0, $a0, $a1
15 ; LA64-LABEL: rotl_32:
17 ; LA64-NEXT: sub.d $a2, $zero, $a1
18 ; LA64-NEXT: sll.w $a1, $a0, $a1
19 ; LA64-NEXT: srl.w $a0, $a0, $a2
20 ; LA64-NEXT: or $a0, $a1, $a0
29 define i32 @rotr_32(i32 %x, i32 %y) nounwind {
30 ; LA32-LABEL: rotr_32:
32 ; LA32-NEXT: rotr.w $a0, $a0, $a1
35 ; LA64-LABEL: rotr_32:
37 ; LA64-NEXT: rotr.w $a0, $a0, $a1
46 define i64 @rotl_64(i64 %x, i64 %y) nounwind {
47 ; LA32-LABEL: rotl_64:
49 ; LA32-NEXT: xori $a3, $a2, 31
50 ; LA32-NEXT: srli.w $a4, $a0, 1
51 ; LA32-NEXT: srl.w $a3, $a4, $a3
52 ; LA32-NEXT: sll.w $a4, $a1, $a2
53 ; LA32-NEXT: or $a3, $a4, $a3
54 ; LA32-NEXT: addi.w $a4, $a2, -32
55 ; LA32-NEXT: slti $a5, $a4, 0
56 ; LA32-NEXT: maskeqz $a3, $a3, $a5
57 ; LA32-NEXT: sll.w $a6, $a0, $a4
58 ; LA32-NEXT: masknez $a5, $a6, $a5
59 ; LA32-NEXT: or $a3, $a3, $a5
60 ; LA32-NEXT: ori $a5, $zero, 64
61 ; LA32-NEXT: sub.w $a5, $a5, $a2
62 ; LA32-NEXT: xori $a5, $a5, 31
63 ; LA32-NEXT: slli.w $a6, $a1, 1
64 ; LA32-NEXT: sll.w $a5, $a6, $a5
65 ; LA32-NEXT: sub.w $a6, $zero, $a2
66 ; LA32-NEXT: srl.w $a7, $a1, $a6
67 ; LA32-NEXT: ori $a1, $zero, 32
68 ; LA32-NEXT: sub.w $t0, $a1, $a2
69 ; LA32-NEXT: srai.w $a1, $t0, 31
70 ; LA32-NEXT: and $a1, $a1, $a7
71 ; LA32-NEXT: or $a1, $a3, $a1
72 ; LA32-NEXT: srl.w $a3, $a0, $a6
73 ; LA32-NEXT: or $a3, $a3, $a5
74 ; LA32-NEXT: slti $a5, $t0, 0
75 ; LA32-NEXT: masknez $a6, $a7, $a5
76 ; LA32-NEXT: maskeqz $a3, $a3, $a5
77 ; LA32-NEXT: or $a3, $a3, $a6
78 ; LA32-NEXT: sll.w $a0, $a0, $a2
79 ; LA32-NEXT: srai.w $a2, $a4, 31
80 ; LA32-NEXT: and $a0, $a2, $a0
81 ; LA32-NEXT: or $a0, $a0, $a3
84 ; LA64-LABEL: rotl_64:
86 ; LA64-NEXT: ori $a2, $zero, 64
87 ; LA64-NEXT: sub.d $a1, $a2, $a1
88 ; LA64-NEXT: rotr.d $a0, $a0, $a1
97 define i64 @rotr_64(i64 %x, i64 %y) nounwind {
98 ; LA32-LABEL: rotr_64:
100 ; LA32-NEXT: xori $a3, $a2, 31
101 ; LA32-NEXT: slli.w $a4, $a1, 1
102 ; LA32-NEXT: sll.w $a3, $a4, $a3
103 ; LA32-NEXT: srl.w $a4, $a0, $a2
104 ; LA32-NEXT: or $a3, $a4, $a3
105 ; LA32-NEXT: addi.w $a4, $a2, -32
106 ; LA32-NEXT: slti $a5, $a4, 0
107 ; LA32-NEXT: maskeqz $a3, $a3, $a5
108 ; LA32-NEXT: srl.w $a6, $a1, $a4
109 ; LA32-NEXT: masknez $a5, $a6, $a5
110 ; LA32-NEXT: or $a3, $a3, $a5
111 ; LA32-NEXT: ori $a5, $zero, 64
112 ; LA32-NEXT: sub.w $a5, $a5, $a2
113 ; LA32-NEXT: xori $a5, $a5, 31
114 ; LA32-NEXT: srli.w $a6, $a0, 1
115 ; LA32-NEXT: srl.w $a5, $a6, $a5
116 ; LA32-NEXT: sub.w $a6, $zero, $a2
117 ; LA32-NEXT: sll.w $a7, $a0, $a6
118 ; LA32-NEXT: ori $a0, $zero, 32
119 ; LA32-NEXT: sub.w $t0, $a0, $a2
120 ; LA32-NEXT: srai.w $a0, $t0, 31
121 ; LA32-NEXT: and $a0, $a0, $a7
122 ; LA32-NEXT: or $a0, $a3, $a0
123 ; LA32-NEXT: sll.w $a3, $a1, $a6
124 ; LA32-NEXT: or $a3, $a3, $a5
125 ; LA32-NEXT: slti $a5, $t0, 0
126 ; LA32-NEXT: masknez $a6, $a7, $a5
127 ; LA32-NEXT: maskeqz $a3, $a3, $a5
128 ; LA32-NEXT: or $a3, $a3, $a6
129 ; LA32-NEXT: srl.w $a1, $a1, $a2
130 ; LA32-NEXT: srai.w $a2, $a4, 31
131 ; LA32-NEXT: and $a1, $a2, $a1
132 ; LA32-NEXT: or $a1, $a1, $a3
135 ; LA64-LABEL: rotr_64:
137 ; LA64-NEXT: rotr.d $a0, $a0, $a1
146 define i32 @rotl_32_mask(i32 %x, i32 %y) nounwind {
147 ; LA32-LABEL: rotl_32_mask:
149 ; LA32-NEXT: sub.w $a1, $zero, $a1
150 ; LA32-NEXT: rotr.w $a0, $a0, $a1
153 ; LA64-LABEL: rotl_32_mask:
155 ; LA64-NEXT: sub.d $a2, $zero, $a1
156 ; LA64-NEXT: sll.w $a1, $a0, $a1
157 ; LA64-NEXT: srl.w $a0, $a0, $a2
158 ; LA64-NEXT: or $a0, $a1, $a0
161 %and = and i32 %z, 31
163 %c = lshr i32 %x, %and
168 define i32 @rotl_32_mask_and_63_and_31(i32 %x, i32 %y) nounwind {
169 ; LA32-LABEL: rotl_32_mask_and_63_and_31:
171 ; LA32-NEXT: sub.w $a1, $zero, $a1
172 ; LA32-NEXT: rotr.w $a0, $a0, $a1
175 ; LA64-LABEL: rotl_32_mask_and_63_and_31:
177 ; LA64-NEXT: sub.d $a2, $zero, $a1
178 ; LA64-NEXT: sll.w $a1, $a0, $a1
179 ; LA64-NEXT: srl.w $a0, $a0, $a2
180 ; LA64-NEXT: or $a0, $a1, $a0
191 define i32 @rotl_32_mask_or_64_or_32(i32 %x, i32 %y) nounwind {
192 ; LA32-LABEL: rotl_32_mask_or_64_or_32:
194 ; LA32-NEXT: sub.w $a1, $zero, $a1
195 ; LA32-NEXT: rotr.w $a0, $a0, $a1
198 ; LA64-LABEL: rotl_32_mask_or_64_or_32:
200 ; LA64-NEXT: sub.d $a2, $zero, $a1
201 ; LA64-NEXT: sll.w $a1, $a0, $a1
202 ; LA64-NEXT: srl.w $a0, $a0, $a2
203 ; LA64-NEXT: or $a0, $a1, $a0
214 define i32 @rotr_32_mask(i32 %x, i32 %y) nounwind {
215 ; LA32-LABEL: rotr_32_mask:
217 ; LA32-NEXT: rotr.w $a0, $a0, $a1
220 ; LA64-LABEL: rotr_32_mask:
222 ; LA64-NEXT: rotr.w $a0, $a0, $a1
225 %and = and i32 %z, 31
227 %c = shl i32 %x, %and
232 define i32 @rotr_32_mask_and_63_and_31(i32 %x, i32 %y) nounwind {
233 ; LA32-LABEL: rotr_32_mask_and_63_and_31:
235 ; LA32-NEXT: rotr.w $a0, $a0, $a1
238 ; LA64-LABEL: rotr_32_mask_and_63_and_31:
240 ; LA64-NEXT: rotr.w $a0, $a0, $a1
251 define i32 @rotr_32_mask_or_64_or_32(i32 %x, i32 %y) nounwind {
252 ; LA32-LABEL: rotr_32_mask_or_64_or_32:
254 ; LA32-NEXT: rotr.w $a0, $a0, $a1
257 ; LA64-LABEL: rotr_32_mask_or_64_or_32:
259 ; LA64-NEXT: rotr.w $a0, $a0, $a1
270 define i64 @rotl_64_mask(i64 %x, i64 %y) nounwind {
271 ; LA32-LABEL: rotl_64_mask:
273 ; LA32-NEXT: xori $a3, $a2, 31
274 ; LA32-NEXT: srli.w $a4, $a0, 1
275 ; LA32-NEXT: srl.w $a3, $a4, $a3
276 ; LA32-NEXT: sll.w $a4, $a1, $a2
277 ; LA32-NEXT: or $a3, $a4, $a3
278 ; LA32-NEXT: sub.w $a4, $zero, $a2
279 ; LA32-NEXT: srl.w $a5, $a1, $a4
280 ; LA32-NEXT: andi $a6, $a4, 63
281 ; LA32-NEXT: addi.w $a7, $a6, -32
282 ; LA32-NEXT: srai.w $t0, $a7, 31
283 ; LA32-NEXT: and $a5, $t0, $a5
284 ; LA32-NEXT: addi.w $t0, $a2, -32
285 ; LA32-NEXT: slti $t1, $t0, 0
286 ; LA32-NEXT: maskeqz $a3, $a3, $t1
287 ; LA32-NEXT: sll.w $t2, $a0, $t0
288 ; LA32-NEXT: masknez $t1, $t2, $t1
289 ; LA32-NEXT: or $a3, $a3, $t1
290 ; LA32-NEXT: xori $a6, $a6, 31
291 ; LA32-NEXT: slli.w $t1, $a1, 1
292 ; LA32-NEXT: sll.w $a6, $t1, $a6
293 ; LA32-NEXT: or $a3, $a3, $a5
294 ; LA32-NEXT: srl.w $a4, $a0, $a4
295 ; LA32-NEXT: or $a4, $a4, $a6
296 ; LA32-NEXT: srl.w $a1, $a1, $a7
297 ; LA32-NEXT: slti $a5, $a7, 0
298 ; LA32-NEXT: masknez $a1, $a1, $a5
299 ; LA32-NEXT: maskeqz $a4, $a4, $a5
300 ; LA32-NEXT: or $a1, $a4, $a1
301 ; LA32-NEXT: sll.w $a0, $a0, $a2
302 ; LA32-NEXT: srai.w $a2, $t0, 31
303 ; LA32-NEXT: and $a0, $a2, $a0
304 ; LA32-NEXT: or $a0, $a0, $a1
305 ; LA32-NEXT: move $a1, $a3
308 ; LA64-LABEL: rotl_64_mask:
310 ; LA64-NEXT: sub.d $a1, $zero, $a1
311 ; LA64-NEXT: rotr.d $a0, $a0, $a1
314 %and = and i64 %z, 63
316 %c = lshr i64 %x, %and
321 define i64 @rotl_64_mask_and_127_and_63(i64 %x, i64 %y) nounwind {
322 ; LA32-LABEL: rotl_64_mask_and_127_and_63:
324 ; LA32-NEXT: srli.w $a3, $a0, 1
325 ; LA32-NEXT: andi $a4, $a2, 127
326 ; LA32-NEXT: xori $a5, $a4, 31
327 ; LA32-NEXT: srl.w $a3, $a3, $a5
328 ; LA32-NEXT: sll.w $a5, $a1, $a2
329 ; LA32-NEXT: or $a3, $a5, $a3
330 ; LA32-NEXT: sub.w $a5, $zero, $a2
331 ; LA32-NEXT: srl.w $a6, $a1, $a5
332 ; LA32-NEXT: andi $a7, $a5, 63
333 ; LA32-NEXT: addi.w $t0, $a7, -32
334 ; LA32-NEXT: srai.w $t1, $t0, 31
335 ; LA32-NEXT: and $a6, $t1, $a6
336 ; LA32-NEXT: addi.w $a4, $a4, -32
337 ; LA32-NEXT: slti $t1, $a4, 0
338 ; LA32-NEXT: maskeqz $a3, $a3, $t1
339 ; LA32-NEXT: sll.w $t2, $a0, $a4
340 ; LA32-NEXT: masknez $t1, $t2, $t1
341 ; LA32-NEXT: or $a3, $a3, $t1
342 ; LA32-NEXT: xori $a7, $a7, 31
343 ; LA32-NEXT: slli.w $t1, $a1, 1
344 ; LA32-NEXT: sll.w $a7, $t1, $a7
345 ; LA32-NEXT: or $a3, $a3, $a6
346 ; LA32-NEXT: srl.w $a5, $a0, $a5
347 ; LA32-NEXT: or $a5, $a5, $a7
348 ; LA32-NEXT: srl.w $a1, $a1, $t0
349 ; LA32-NEXT: slti $a6, $t0, 0
350 ; LA32-NEXT: masknez $a1, $a1, $a6
351 ; LA32-NEXT: maskeqz $a5, $a5, $a6
352 ; LA32-NEXT: or $a1, $a5, $a1
353 ; LA32-NEXT: sll.w $a0, $a0, $a2
354 ; LA32-NEXT: srai.w $a2, $a4, 31
355 ; LA32-NEXT: and $a0, $a2, $a0
356 ; LA32-NEXT: or $a0, $a0, $a1
357 ; LA32-NEXT: move $a1, $a3
360 ; LA64-LABEL: rotl_64_mask_and_127_and_63:
362 ; LA64-NEXT: sub.d $a1, $zero, $a1
363 ; LA64-NEXT: rotr.d $a0, $a0, $a1
374 define i64 @rotl_64_mask_or_128_or_64(i64 %x, i64 %y) nounwind {
375 ; LA32-LABEL: rotl_64_mask_or_128_or_64:
377 ; LA32-NEXT: move $a0, $zero
378 ; LA32-NEXT: move $a1, $zero
381 ; LA64-LABEL: rotl_64_mask_or_128_or_64:
383 ; LA64-NEXT: sub.d $a1, $zero, $a1
384 ; LA64-NEXT: rotr.d $a0, $a0, $a1
395 define i64 @rotr_64_mask(i64 %x, i64 %y) nounwind {
396 ; LA32-LABEL: rotr_64_mask:
398 ; LA32-NEXT: xori $a3, $a2, 31
399 ; LA32-NEXT: slli.w $a4, $a1, 1
400 ; LA32-NEXT: sll.w $a3, $a4, $a3
401 ; LA32-NEXT: srl.w $a4, $a0, $a2
402 ; LA32-NEXT: or $a3, $a4, $a3
403 ; LA32-NEXT: sub.w $a4, $zero, $a2
404 ; LA32-NEXT: sll.w $a5, $a0, $a4
405 ; LA32-NEXT: andi $a6, $a4, 63
406 ; LA32-NEXT: addi.w $a7, $a6, -32
407 ; LA32-NEXT: srai.w $t0, $a7, 31
408 ; LA32-NEXT: and $a5, $t0, $a5
409 ; LA32-NEXT: addi.w $t0, $a2, -32
410 ; LA32-NEXT: slti $t1, $t0, 0
411 ; LA32-NEXT: maskeqz $a3, $a3, $t1
412 ; LA32-NEXT: srl.w $t2, $a1, $t0
413 ; LA32-NEXT: masknez $t1, $t2, $t1
414 ; LA32-NEXT: or $a3, $a3, $t1
415 ; LA32-NEXT: xori $a6, $a6, 31
416 ; LA32-NEXT: srli.w $t1, $a0, 1
417 ; LA32-NEXT: srl.w $a6, $t1, $a6
418 ; LA32-NEXT: or $a3, $a3, $a5
419 ; LA32-NEXT: sll.w $a4, $a1, $a4
420 ; LA32-NEXT: or $a4, $a4, $a6
421 ; LA32-NEXT: sll.w $a0, $a0, $a7
422 ; LA32-NEXT: slti $a5, $a7, 0
423 ; LA32-NEXT: masknez $a0, $a0, $a5
424 ; LA32-NEXT: maskeqz $a4, $a4, $a5
425 ; LA32-NEXT: or $a0, $a4, $a0
426 ; LA32-NEXT: srl.w $a1, $a1, $a2
427 ; LA32-NEXT: srai.w $a2, $t0, 31
428 ; LA32-NEXT: and $a1, $a2, $a1
429 ; LA32-NEXT: or $a1, $a1, $a0
430 ; LA32-NEXT: move $a0, $a3
433 ; LA64-LABEL: rotr_64_mask:
435 ; LA64-NEXT: rotr.d $a0, $a0, $a1
438 %and = and i64 %z, 63
440 %c = shl i64 %x, %and
445 define i64 @rotr_64_mask_and_127_and_63(i64 %x, i64 %y) nounwind {
446 ; LA32-LABEL: rotr_64_mask_and_127_and_63:
448 ; LA32-NEXT: slli.w $a3, $a1, 1
449 ; LA32-NEXT: andi $a4, $a2, 127
450 ; LA32-NEXT: xori $a5, $a4, 31
451 ; LA32-NEXT: sll.w $a3, $a3, $a5
452 ; LA32-NEXT: srl.w $a5, $a0, $a2
453 ; LA32-NEXT: or $a3, $a5, $a3
454 ; LA32-NEXT: sub.w $a5, $zero, $a2
455 ; LA32-NEXT: sll.w $a6, $a0, $a5
456 ; LA32-NEXT: andi $a7, $a5, 63
457 ; LA32-NEXT: addi.w $t0, $a7, -32
458 ; LA32-NEXT: srai.w $t1, $t0, 31
459 ; LA32-NEXT: and $a6, $t1, $a6
460 ; LA32-NEXT: addi.w $a4, $a4, -32
461 ; LA32-NEXT: slti $t1, $a4, 0
462 ; LA32-NEXT: maskeqz $a3, $a3, $t1
463 ; LA32-NEXT: srl.w $t2, $a1, $a4
464 ; LA32-NEXT: masknez $t1, $t2, $t1
465 ; LA32-NEXT: or $a3, $a3, $t1
466 ; LA32-NEXT: xori $a7, $a7, 31
467 ; LA32-NEXT: srli.w $t1, $a0, 1
468 ; LA32-NEXT: srl.w $a7, $t1, $a7
469 ; LA32-NEXT: or $a3, $a3, $a6
470 ; LA32-NEXT: sll.w $a5, $a1, $a5
471 ; LA32-NEXT: or $a5, $a5, $a7
472 ; LA32-NEXT: sll.w $a0, $a0, $t0
473 ; LA32-NEXT: slti $a6, $t0, 0
474 ; LA32-NEXT: masknez $a0, $a0, $a6
475 ; LA32-NEXT: maskeqz $a5, $a5, $a6
476 ; LA32-NEXT: or $a0, $a5, $a0
477 ; LA32-NEXT: srl.w $a1, $a1, $a2
478 ; LA32-NEXT: srai.w $a2, $a4, 31
479 ; LA32-NEXT: and $a1, $a2, $a1
480 ; LA32-NEXT: or $a1, $a1, $a0
481 ; LA32-NEXT: move $a0, $a3
484 ; LA64-LABEL: rotr_64_mask_and_127_and_63:
486 ; LA64-NEXT: rotr.d $a0, $a0, $a1
497 define i64 @rotr_64_mask_or_128_or_64(i64 %x, i64 %y) nounwind {
498 ; LA32-LABEL: rotr_64_mask_or_128_or_64:
500 ; LA32-NEXT: move $a0, $zero
501 ; LA32-NEXT: move $a1, $zero
504 ; LA64-LABEL: rotr_64_mask_or_128_or_64:
506 ; LA64-NEXT: rotr.d $a0, $a0, $a1
517 define i32 @rotri_i32(i32 %a) nounwind {
518 ; LA32-LABEL: rotri_i32:
520 ; LA32-NEXT: rotri.w $a0, $a0, 16
523 ; LA64-LABEL: rotri_i32:
525 ; LA64-NEXT: rotri.w $a0, $a0, 16
527 %shl = shl i32 %a, 16
528 %shr = lshr i32 %a, 16
529 %or = or i32 %shl, %shr
533 define i64 @rotri_i64(i64 %a) nounwind {
534 ; LA32-LABEL: rotri_i64:
536 ; LA32-NEXT: move $a2, $a0
537 ; LA32-NEXT: move $a0, $a1
538 ; LA32-NEXT: move $a1, $a2
541 ; LA64-LABEL: rotri_i64:
543 ; LA64-NEXT: rotri.d $a0, $a0, 32
545 %shl = shl i64 %a, 32
546 %shr = lshr i64 %a, 32
547 %or = or i64 %shl, %shr
551 declare i32 @llvm.fshl.i32(i32, i32, i32)
552 declare i64 @llvm.fshl.i64(i64, i64, i64)
553 declare i32 @llvm.fshr.i32(i32, i32, i32)
554 declare i64 @llvm.fshr.i64(i64, i64, i64)
556 define signext i32 @rotl_i32_fshl(i32 signext %a) nounwind {
557 ; LA32-LABEL: rotl_i32_fshl:
559 ; LA32-NEXT: rotri.w $a0, $a0, 20
562 ; LA64-LABEL: rotl_i32_fshl:
564 ; LA64-NEXT: rotri.w $a0, $a0, 20
566 %or = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 12)
570 define i64 @rotl_i64_fshl(i64 %a) nounwind {
571 ; LA32-LABEL: rotl_i64_fshl:
573 ; LA32-NEXT: srli.w $a2, $a1, 20
574 ; LA32-NEXT: slli.w $a3, $a0, 12
575 ; LA32-NEXT: or $a2, $a3, $a2
576 ; LA32-NEXT: srli.w $a0, $a0, 20
577 ; LA32-NEXT: slli.w $a1, $a1, 12
578 ; LA32-NEXT: or $a1, $a1, $a0
579 ; LA32-NEXT: move $a0, $a2
582 ; LA64-LABEL: rotl_i64_fshl:
584 ; LA64-NEXT: rotri.d $a0, $a0, 52
586 %or = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 12)
590 define signext i32 @rotr_i32_fshr(i32 signext %a) nounwind {
591 ; LA32-LABEL: rotr_i32_fshr:
593 ; LA32-NEXT: rotri.w $a0, $a0, 12
596 ; LA64-LABEL: rotr_i32_fshr:
598 ; LA64-NEXT: slli.d $a1, $a0, 20
599 ; LA64-NEXT: bstrpick.d $a0, $a0, 31, 12
600 ; LA64-NEXT: or $a0, $a0, $a1
601 ; LA64-NEXT: addi.w $a0, $a0, 0
603 %or = tail call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 12)
607 define i64 @rotr_i64_fshr(i64 %a) nounwind {
608 ; LA32-LABEL: rotr_i64_fshr:
610 ; LA32-NEXT: srli.w $a2, $a0, 12
611 ; LA32-NEXT: slli.w $a3, $a1, 20
612 ; LA32-NEXT: or $a2, $a3, $a2
613 ; LA32-NEXT: srli.w $a1, $a1, 12
614 ; LA32-NEXT: slli.w $a0, $a0, 20
615 ; LA32-NEXT: or $a1, $a0, $a1
616 ; LA32-NEXT: move $a0, $a2
619 ; LA64-LABEL: rotr_i64_fshr:
621 ; LA64-NEXT: rotri.d $a0, $a0, 12
623 %or = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 12)