1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=aarch64 -aarch64-neon-syntax=apple | FileCheck %s --check-prefixes=CHECK,CHECK-SD
3 ; RUN: llc < %s -mtriple=aarch64 -aarch64-neon-syntax=apple -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI
5 define i32 @test_rev_w(i32 %a) nounwind {
6 ; CHECK-LABEL: test_rev_w:
7 ; CHECK: // %bb.0: // %entry
8 ; CHECK-NEXT: rev w0, w0
11 %0 = tail call i32 @llvm.bswap.i32(i32 %a)
15 define i64 @test_rev_x(i64 %a) nounwind {
16 ; CHECK-LABEL: test_rev_x:
17 ; CHECK: // %bb.0: // %entry
18 ; CHECK-NEXT: rev x0, x0
21 %0 = tail call i64 @llvm.bswap.i64(i64 %a)
25 ; Canonicalize (srl (bswap x), 16) to (rotr (bswap x), 16) if the high 16-bits
26 ; of %a are zero. This optimizes rev + lsr 16 to rev16.
27 define i32 @test_rev_w_srl16(i16 %a) {
28 ; CHECK-SD-LABEL: test_rev_w_srl16:
29 ; CHECK-SD: // %bb.0: // %entry
30 ; CHECK-SD-NEXT: rev w8, w0
31 ; CHECK-SD-NEXT: lsr w0, w8, #16
34 ; CHECK-GI-LABEL: test_rev_w_srl16:
35 ; CHECK-GI: // %bb.0: // %entry
36 ; CHECK-GI-NEXT: and w8, w0, #0xffff
37 ; CHECK-GI-NEXT: rev w8, w8
38 ; CHECK-GI-NEXT: lsr w0, w8, #16
41 %0 = zext i16 %a to i32
42 %1 = tail call i32 @llvm.bswap.i32(i32 %0)
47 define i32 @test_rev_w_srl16_load(ptr %a) {
48 ; CHECK-LABEL: test_rev_w_srl16_load:
49 ; CHECK: // %bb.0: // %entry
50 ; CHECK-NEXT: ldrh w8, [x0]
51 ; CHECK-NEXT: rev w8, w8
52 ; CHECK-NEXT: lsr w0, w8, #16
56 %1 = zext i16 %0 to i32
57 %2 = tail call i32 @llvm.bswap.i32(i32 %1)
62 define i32 @test_rev_w_srl16_add(i8 %a, i8 %b) {
63 ; CHECK-SD-LABEL: test_rev_w_srl16_add:
64 ; CHECK-SD: // %bb.0: // %entry
65 ; CHECK-SD-NEXT: and w8, w0, #0xff
66 ; CHECK-SD-NEXT: add w8, w8, w1, uxtb
67 ; CHECK-SD-NEXT: rev16 w0, w8
70 ; CHECK-GI-LABEL: test_rev_w_srl16_add:
71 ; CHECK-GI: // %bb.0: // %entry
72 ; CHECK-GI-NEXT: and w8, w1, #0xff
73 ; CHECK-GI-NEXT: add w8, w8, w0, uxtb
74 ; CHECK-GI-NEXT: rev w8, w8
75 ; CHECK-GI-NEXT: lsr w0, w8, #16
78 %0 = zext i8 %a to i32
79 %1 = zext i8 %b to i32
81 %3 = tail call i32 @llvm.bswap.i32(i32 %2)
86 ; Canonicalize (srl (bswap x), 32) to (rotr (bswap x), 32) if the high 32-bits
87 ; of %a are zero. This optimizes rev + lsr 32 to rev32.
88 define i64 @test_rev_x_srl32(i32 %a) {
89 ; CHECK-SD-LABEL: test_rev_x_srl32:
90 ; CHECK-SD: // %bb.0: // %entry
91 ; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0
92 ; CHECK-SD-NEXT: rev x8, x0
93 ; CHECK-SD-NEXT: lsr x0, x8, #32
96 ; CHECK-GI-LABEL: test_rev_x_srl32:
97 ; CHECK-GI: // %bb.0: // %entry
98 ; CHECK-GI-NEXT: mov w8, w0
99 ; CHECK-GI-NEXT: rev x8, x8
100 ; CHECK-GI-NEXT: lsr x0, x8, #32
103 %0 = zext i32 %a to i64
104 %1 = tail call i64 @llvm.bswap.i64(i64 %0)
109 define i64 @test_rev_x_srl32_load(ptr %a) {
110 ; CHECK-LABEL: test_rev_x_srl32_load:
111 ; CHECK: // %bb.0: // %entry
112 ; CHECK-NEXT: ldr w8, [x0]
113 ; CHECK-NEXT: rev x8, x8
114 ; CHECK-NEXT: lsr x0, x8, #32
117 %0 = load i32, ptr %a
118 %1 = zext i32 %0 to i64
119 %2 = tail call i64 @llvm.bswap.i64(i64 %1)
124 define i64 @test_rev_x_srl32_shift(i64 %a) {
125 ; CHECK-SD-LABEL: test_rev_x_srl32_shift:
126 ; CHECK-SD: // %bb.0: // %entry
127 ; CHECK-SD-NEXT: ubfx x8, x0, #2, #29
128 ; CHECK-SD-NEXT: rev32 x0, x8
131 ; CHECK-GI-LABEL: test_rev_x_srl32_shift:
132 ; CHECK-GI: // %bb.0: // %entry
133 ; CHECK-GI-NEXT: ubfx x8, x0, #2, #29
134 ; CHECK-GI-NEXT: rev x8, x8
135 ; CHECK-GI-NEXT: lsr x0, x8, #32
140 %2 = tail call i64 @llvm.bswap.i64(i64 %1)
145 declare i32 @llvm.bswap.i32(i32) nounwind readnone
146 declare i64 @llvm.bswap.i64(i64) nounwind readnone
148 define i32 @test_rev16_w(i32 %X) nounwind {
149 ; CHECK-SD-LABEL: test_rev16_w:
150 ; CHECK-SD: // %bb.0: // %entry
151 ; CHECK-SD-NEXT: rev16 w0, w0
154 ; CHECK-GI-LABEL: test_rev16_w:
155 ; CHECK-GI: // %bb.0: // %entry
156 ; CHECK-GI-NEXT: lsr w8, w0, #8
157 ; CHECK-GI-NEXT: lsl w9, w0, #8
158 ; CHECK-GI-NEXT: and w10, w8, #0xff0000
159 ; CHECK-GI-NEXT: and w11, w9, #0xff000000
160 ; CHECK-GI-NEXT: and w8, w8, #0xff
161 ; CHECK-GI-NEXT: and w9, w9, #0xff00
162 ; CHECK-GI-NEXT: orr w10, w11, w10
163 ; CHECK-GI-NEXT: orr w8, w9, w8
164 ; CHECK-GI-NEXT: orr w0, w10, w8
167 %tmp1 = lshr i32 %X, 8
168 %X15 = bitcast i32 %X to i32
169 %tmp4 = shl i32 %X15, 8
170 %tmp2 = and i32 %tmp1, 16711680
171 %tmp5 = and i32 %tmp4, -16777216
172 %tmp9 = and i32 %tmp1, 255
173 %tmp13 = and i32 %tmp4, 65280
174 %tmp6 = or i32 %tmp5, %tmp2
175 %tmp10 = or i32 %tmp6, %tmp13
176 %tmp14 = or i32 %tmp10, %tmp9
180 ; 64-bit REV16 is *not* a swap then a 16-bit rotation:
181 ; 01234567 ->(bswap) 76543210 ->(rotr) 10765432
182 ; 01234567 ->(rev16) 10325476
183 define i64 @test_rev16_x(i64 %a) nounwind {
184 ; CHECK-LABEL: test_rev16_x:
185 ; CHECK: // %bb.0: // %entry
186 ; CHECK-NEXT: rev x8, x0
187 ; CHECK-NEXT: ror x0, x8, #16
190 %0 = tail call i64 @llvm.bswap.i64(i64 %a)
197 define i64 @test_rev32_x(i64 %a) nounwind {
198 ; CHECK-LABEL: test_rev32_x:
199 ; CHECK: // %bb.0: // %entry
200 ; CHECK-NEXT: rev32 x0, x0
203 %0 = tail call i64 @llvm.bswap.i64(i64 %a)
210 define <8 x i8> @test_vrev64D8(ptr %A) nounwind {
211 ; CHECK-LABEL: test_vrev64D8:
213 ; CHECK-NEXT: ldr d0, [x0]
214 ; CHECK-NEXT: rev64.8b v0, v0
216 %tmp1 = load <8 x i8>, ptr %A
217 %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
221 define <4 x i16> @test_vrev64D16(ptr %A) nounwind {
222 ; CHECK-LABEL: test_vrev64D16:
224 ; CHECK-NEXT: ldr d0, [x0]
225 ; CHECK-NEXT: rev64.4h v0, v0
227 %tmp1 = load <4 x i16>, ptr %A
228 %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
232 define <2 x i32> @test_vrev64D32(ptr %A) nounwind {
233 ; CHECK-LABEL: test_vrev64D32:
235 ; CHECK-NEXT: ldr d0, [x0]
236 ; CHECK-NEXT: rev64.2s v0, v0
238 %tmp1 = load <2 x i32>, ptr %A
239 %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> <i32 1, i32 0>
243 define <2 x float> @test_vrev64Df(ptr %A) nounwind {
244 ; CHECK-LABEL: test_vrev64Df:
246 ; CHECK-NEXT: ldr d0, [x0]
247 ; CHECK-NEXT: rev64.2s v0, v0
249 %tmp1 = load <2 x float>, ptr %A
250 %tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> <i32 1, i32 0>
251 ret <2 x float> %tmp2
254 define <16 x i8> @test_vrev64Q8(ptr %A) nounwind {
255 ; CHECK-LABEL: test_vrev64Q8:
257 ; CHECK-NEXT: ldr q0, [x0]
258 ; CHECK-NEXT: rev64.16b v0, v0
260 %tmp1 = load <16 x i8>, ptr %A
261 %tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
265 define <8 x i16> @test_vrev64Q16(ptr %A) nounwind {
266 ; CHECK-LABEL: test_vrev64Q16:
268 ; CHECK-NEXT: ldr q0, [x0]
269 ; CHECK-NEXT: rev64.8h v0, v0
271 %tmp1 = load <8 x i16>, ptr %A
272 %tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
276 define <4 x i32> @test_vrev64Q32(ptr %A) nounwind {
277 ; CHECK-LABEL: test_vrev64Q32:
279 ; CHECK-NEXT: ldr q0, [x0]
280 ; CHECK-NEXT: rev64.4s v0, v0
282 %tmp1 = load <4 x i32>, ptr %A
283 %tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
287 define <4 x float> @test_vrev64Qf(ptr %A) nounwind {
288 ; CHECK-LABEL: test_vrev64Qf:
290 ; CHECK-NEXT: ldr q0, [x0]
291 ; CHECK-NEXT: rev64.4s v0, v0
293 %tmp1 = load <4 x float>, ptr %A
294 %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
295 ret <4 x float> %tmp2
298 define <8 x i8> @test_vrev32D8(ptr %A) nounwind {
299 ; CHECK-LABEL: test_vrev32D8:
301 ; CHECK-NEXT: ldr d0, [x0]
302 ; CHECK-NEXT: rev32.8b v0, v0
304 %tmp1 = load <8 x i8>, ptr %A
305 %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
309 define <4 x i16> @test_vrev32D16(ptr %A) nounwind {
310 ; CHECK-LABEL: test_vrev32D16:
312 ; CHECK-NEXT: ldr d0, [x0]
313 ; CHECK-NEXT: rev32.4h v0, v0
315 %tmp1 = load <4 x i16>, ptr %A
316 %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
320 define <16 x i8> @test_vrev32Q8(ptr %A) nounwind {
321 ; CHECK-LABEL: test_vrev32Q8:
323 ; CHECK-NEXT: ldr q0, [x0]
324 ; CHECK-NEXT: rev32.16b v0, v0
326 %tmp1 = load <16 x i8>, ptr %A
327 %tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
331 define <8 x i16> @test_vrev32Q16(ptr %A) nounwind {
332 ; CHECK-LABEL: test_vrev32Q16:
334 ; CHECK-NEXT: ldr q0, [x0]
335 ; CHECK-NEXT: rev32.8h v0, v0
337 %tmp1 = load <8 x i16>, ptr %A
338 %tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
342 define <8 x i8> @test_vrev16D8(ptr %A) nounwind {
343 ; CHECK-LABEL: test_vrev16D8:
345 ; CHECK-NEXT: ldr d0, [x0]
346 ; CHECK-NEXT: rev16.8b v0, v0
348 %tmp1 = load <8 x i8>, ptr %A
349 %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
353 define <16 x i8> @test_vrev16Q8(ptr %A) nounwind {
354 ; CHECK-LABEL: test_vrev16Q8:
356 ; CHECK-NEXT: ldr q0, [x0]
357 ; CHECK-NEXT: rev16.16b v0, v0
359 %tmp1 = load <16 x i8>, ptr %A
360 %tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
364 ; Undef shuffle indices should not prevent matching to VREV:
366 define <8 x i8> @test_vrev64D8_undef(ptr %A) nounwind {
367 ; CHECK-LABEL: test_vrev64D8_undef:
369 ; CHECK-NEXT: ldr d0, [x0]
370 ; CHECK-NEXT: rev64.8b v0, v0
372 %tmp1 = load <8 x i8>, ptr %A
373 %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 7, i32 undef, i32 undef, i32 4, i32 3, i32 2, i32 1, i32 0>
377 define <8 x i16> @test_vrev32Q16_undef(ptr %A) nounwind {
378 ; CHECK-LABEL: test_vrev32Q16_undef:
380 ; CHECK-NEXT: ldr q0, [x0]
381 ; CHECK-NEXT: rev32.8h v0, v0
383 %tmp1 = load <8 x i16>, ptr %A
384 %tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 undef, i32 0, i32 undef, i32 2, i32 5, i32 4, i32 7, i32 undef>
388 ; vrev <4 x i16> should use REV32 and not REV64
389 define void @test_vrev64(ptr nocapture %source, ptr nocapture %dst) nounwind ssp {
390 ; CHECK-SD-LABEL: test_vrev64:
391 ; CHECK-SD: // %bb.0: // %entry
392 ; CHECK-SD-NEXT: ldr q0, [x0]
393 ; CHECK-SD-NEXT: add x8, x1, #2
394 ; CHECK-SD-NEXT: st1.h { v0 }[5], [x8]
395 ; CHECK-SD-NEXT: st1.h { v0 }[6], [x1]
398 ; CHECK-GI-LABEL: test_vrev64:
399 ; CHECK-GI: // %bb.0: // %entry
400 ; CHECK-GI-NEXT: ldr q0, [x0]
401 ; CHECK-GI-NEXT: add x8, x1, #2
402 ; CHECK-GI-NEXT: st1.h { v0 }[6], [x1]
403 ; CHECK-GI-NEXT: st1.h { v0 }[5], [x8]
406 %tmp2 = load <8 x i16>, ptr %source, align 4
407 %tmp3 = extractelement <8 x i16> %tmp2, i32 6
408 %tmp5 = insertelement <2 x i16> undef, i16 %tmp3, i32 0
409 %tmp9 = extractelement <8 x i16> %tmp2, i32 5
410 %tmp11 = insertelement <2 x i16> %tmp5, i16 %tmp9, i32 1
411 store <2 x i16> %tmp11, ptr %dst, align 4
415 ; Test vrev of float4
416 define void @float_vrev64(ptr nocapture %source, ptr nocapture %dest) nounwind noinline ssp {
417 ; CHECK-SD-LABEL: float_vrev64:
418 ; CHECK-SD: // %bb.0: // %entry
419 ; CHECK-SD-NEXT: movi.2d v0, #0000000000000000
420 ; CHECK-SD-NEXT: add x8, x0, #12
421 ; CHECK-SD-NEXT: dup.4s v0, v0[0]
422 ; CHECK-SD-NEXT: ld1.s { v0 }[1], [x8]
423 ; CHECK-SD-NEXT: str q0, [x1, #176]
426 ; CHECK-GI-LABEL: float_vrev64:
427 ; CHECK-GI: // %bb.0: // %entry
428 ; CHECK-GI-NEXT: movi d0, #0000000000000000
429 ; CHECK-GI-NEXT: adrp x8, .LCPI28_0
430 ; CHECK-GI-NEXT: ldr q1, [x0]
431 ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI28_0]
432 ; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2
433 ; CHECK-GI-NEXT: str q0, [x1, #176]
436 %tmp2 = load <4 x float>, ptr %source, align 4
437 %tmp5 = shufflevector <4 x float> <float 0.000000e+00, float undef, float undef, float undef>, <4 x float> %tmp2, <4 x i32> <i32 0, i32 7, i32 0, i32 0>
438 %arrayidx8 = getelementptr inbounds <4 x float>, ptr %dest, i32 11
439 store <4 x float> %tmp5, ptr %arrayidx8, align 4
444 define <4 x i32> @test_vrev32_bswap(<4 x i32> %source) nounwind {
445 ; CHECK-LABEL: test_vrev32_bswap:
447 ; CHECK-NEXT: rev32.16b v0, v0
449 %bswap = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %source)
453 declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>) nounwind readnone
455 ; Reduced regression from D114354
456 define void @test_rev16_truncstore() {
457 ; CHECK-SD-LABEL: test_rev16_truncstore:
458 ; CHECK-SD: // %bb.0: // %entry
459 ; CHECK-SD-NEXT: cbnz wzr, .LBB30_2
460 ; CHECK-SD-NEXT: .LBB30_1: // %cleanup
461 ; CHECK-SD-NEXT: // =>This Inner Loop Header: Depth=1
462 ; CHECK-SD-NEXT: ldrh w8, [x8]
463 ; CHECK-SD-NEXT: rev16 w8, w8
464 ; CHECK-SD-NEXT: strh w8, [x8]
465 ; CHECK-SD-NEXT: cbz wzr, .LBB30_1
466 ; CHECK-SD-NEXT: .LBB30_2: // %fail
469 ; CHECK-GI-LABEL: test_rev16_truncstore:
470 ; CHECK-GI: // %bb.0: // %entry
471 ; CHECK-GI-NEXT: tbnz wzr, #0, .LBB30_2
472 ; CHECK-GI-NEXT: .LBB30_1: // %cleanup
473 ; CHECK-GI-NEXT: // =>This Inner Loop Header: Depth=1
474 ; CHECK-GI-NEXT: ldrh w8, [x8]
475 ; CHECK-GI-NEXT: rev w8, w8
476 ; CHECK-GI-NEXT: lsr w8, w8, #16
477 ; CHECK-GI-NEXT: strh w8, [x8]
478 ; CHECK-GI-NEXT: tbz wzr, #0, .LBB30_1
479 ; CHECK-GI-NEXT: .LBB30_2: // %fail
485 %out.6269.i = phi ptr [ undef, %cleanup ], [ undef, %entry ]
486 %0 = load i16, ptr undef, align 2
487 %1 = icmp eq i16 undef, -10240
488 br i1 %1, label %fail, label %cleanup
491 %or130.i = call i16 @llvm.bswap.i16(i16 %0)
492 store i16 %or130.i, ptr %out.6269.i, align 2
498 declare i16 @llvm.bswap.i16(i16)
500 ; Reduced regression from D120192
501 define void @test_bswap32_narrow(ptr %p0, ptr %p1) nounwind {
502 ; CHECK-SD-LABEL: test_bswap32_narrow:
503 ; CHECK-SD: // %bb.0:
504 ; CHECK-SD-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
505 ; CHECK-SD-NEXT: ldrh w8, [x0, #2]
506 ; CHECK-SD-NEXT: mov x19, x1
507 ; CHECK-SD-NEXT: rev16 w0, w8
508 ; CHECK-SD-NEXT: bl gid_tbl_len
509 ; CHECK-SD-NEXT: strh wzr, [x19]
510 ; CHECK-SD-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload
513 ; CHECK-GI-LABEL: test_bswap32_narrow:
514 ; CHECK-GI: // %bb.0:
515 ; CHECK-GI-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
516 ; CHECK-GI-NEXT: ldr w8, [x0]
517 ; CHECK-GI-NEXT: mov x19, x1
518 ; CHECK-GI-NEXT: and w8, w8, #0xffff0000
519 ; CHECK-GI-NEXT: rev w0, w8
520 ; CHECK-GI-NEXT: bl gid_tbl_len
521 ; CHECK-GI-NEXT: strh wzr, [x19]
522 ; CHECK-GI-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload
524 %ld = load i32, ptr %p0, align 4
525 %and = and i32 %ld, -65536
526 %bswap = tail call i32 @llvm.bswap.i32(i32 %and)
527 %and16 = zext i32 %bswap to i64
528 %call17 = tail call i32 @gid_tbl_len(i64 %and16)
529 store i16 0, ptr %p1, align 4
532 declare i32 @gid_tbl_len(...)
534 ; 64-bit REV16 is *not* a swap then a 16-bit rotation:
535 ; 01234567 ->(bswap) 76543210 ->(rotr) 10765432
536 ; 01234567 ->(rev16) 10325476
537 ; Optimize patterns where rev16 can be generated for a 64-bit input.
538 define i64 @test_rev16_x_hwbyteswaps(i64 %a) nounwind {
539 ; CHECK-LABEL: test_rev16_x_hwbyteswaps:
540 ; CHECK: // %bb.0: // %entry
541 ; CHECK-NEXT: rev16 x0, x0
545 %1 = and i64 %0, 71777214294589695
547 %3 = and i64 %2, -71777214294589696
552 ; Optimize pattern with multiple and/or to a simple pattern which can enable generation of rev16.
553 define i64 @test_rev16_x_hwbyteswaps_complex1(i64 %a) nounwind {
554 ; CHECK-SD-LABEL: test_rev16_x_hwbyteswaps_complex1:
555 ; CHECK-SD: // %bb.0: // %entry
556 ; CHECK-SD-NEXT: lsr x8, x0, #8
557 ; CHECK-SD-NEXT: lsr x9, x0, #48
558 ; CHECK-SD-NEXT: and x10, x8, #0xff000000000000
559 ; CHECK-SD-NEXT: and x11, x8, #0xff00000000
560 ; CHECK-SD-NEXT: and x8, x8, #0xff0000
561 ; CHECK-SD-NEXT: bfi x10, x9, #56, #8
562 ; CHECK-SD-NEXT: lsr x9, x0, #32
563 ; CHECK-SD-NEXT: orr x10, x10, x11
564 ; CHECK-SD-NEXT: bfi x10, x9, #40, #8
565 ; CHECK-SD-NEXT: lsr x9, x0, #16
566 ; CHECK-SD-NEXT: orr x8, x10, x8
567 ; CHECK-SD-NEXT: bfi x8, x9, #24, #8
568 ; CHECK-SD-NEXT: ubfiz x9, x0, #8, #8
569 ; CHECK-SD-NEXT: bfxil x8, x0, #8, #8
570 ; CHECK-SD-NEXT: orr x0, x8, x9
573 ; CHECK-GI-LABEL: test_rev16_x_hwbyteswaps_complex1:
574 ; CHECK-GI: // %bb.0: // %entry
575 ; CHECK-GI-NEXT: lsr x8, x0, #8
576 ; CHECK-GI-NEXT: lsl x9, x0, #8
577 ; CHECK-GI-NEXT: and x10, x8, #0xff000000000000
578 ; CHECK-GI-NEXT: and x11, x9, #0xff00000000000000
579 ; CHECK-GI-NEXT: and x12, x8, #0xff00000000
580 ; CHECK-GI-NEXT: and x13, x9, #0xff0000000000
581 ; CHECK-GI-NEXT: and x14, x8, #0xff0000
582 ; CHECK-GI-NEXT: orr x10, x10, x11
583 ; CHECK-GI-NEXT: and x11, x9, #0xff000000
584 ; CHECK-GI-NEXT: orr x12, x12, x13
585 ; CHECK-GI-NEXT: and x8, x8, #0xff
586 ; CHECK-GI-NEXT: orr x11, x14, x11
587 ; CHECK-GI-NEXT: orr x10, x10, x12
588 ; CHECK-GI-NEXT: and x9, x9, #0xff00
589 ; CHECK-GI-NEXT: orr x8, x11, x8
590 ; CHECK-GI-NEXT: orr x8, x10, x8
591 ; CHECK-GI-NEXT: orr x0, x8, x9
595 %1 = and i64 %0, 71776119061217280
597 %3 = and i64 %2, -72057594037927936
599 %5 = and i64 %0, 1095216660480
601 %7 = and i64 %2, 280375465082880
603 %9 = and i64 %0, 16711680
605 %11 = and i64 %2, 4278190080
606 %12 = or i64 %10, %11
607 %13 = and i64 %0, 255
608 %14 = or i64 %12, %13
609 %15 = and i64 %2, 65280
610 %16 = or i64 %14, %15
614 define i64 @test_rev16_x_hwbyteswaps_complex2(i64 %a) nounwind {
615 ; CHECK-SD-LABEL: test_rev16_x_hwbyteswaps_complex2:
616 ; CHECK-SD: // %bb.0: // %entry
617 ; CHECK-SD-NEXT: lsr x8, x0, #8
618 ; CHECK-SD-NEXT: lsr x9, x0, #48
619 ; CHECK-SD-NEXT: lsr x10, x0, #32
620 ; CHECK-SD-NEXT: and x8, x8, #0xff00ff00ff00ff
621 ; CHECK-SD-NEXT: bfi x8, x9, #56, #8
622 ; CHECK-SD-NEXT: lsr x9, x0, #16
623 ; CHECK-SD-NEXT: bfi x8, x10, #40, #8
624 ; CHECK-SD-NEXT: bfi x8, x9, #24, #8
625 ; CHECK-SD-NEXT: bfi x8, x0, #8, #8
626 ; CHECK-SD-NEXT: mov x0, x8
629 ; CHECK-GI-LABEL: test_rev16_x_hwbyteswaps_complex2:
630 ; CHECK-GI: // %bb.0: // %entry
631 ; CHECK-GI-NEXT: lsr x8, x0, #8
632 ; CHECK-GI-NEXT: lsl x9, x0, #8
633 ; CHECK-GI-NEXT: and x10, x8, #0xff000000000000
634 ; CHECK-GI-NEXT: and x11, x8, #0xff00000000
635 ; CHECK-GI-NEXT: and x12, x8, #0xff0000
636 ; CHECK-GI-NEXT: and x8, x8, #0xff
637 ; CHECK-GI-NEXT: and x13, x9, #0xff00000000000000
638 ; CHECK-GI-NEXT: orr x10, x10, x11
639 ; CHECK-GI-NEXT: and x11, x9, #0xff0000000000
640 ; CHECK-GI-NEXT: orr x8, x12, x8
641 ; CHECK-GI-NEXT: and x12, x9, #0xff000000
642 ; CHECK-GI-NEXT: orr x11, x13, x11
643 ; CHECK-GI-NEXT: orr x8, x10, x8
644 ; CHECK-GI-NEXT: and x9, x9, #0xff00
645 ; CHECK-GI-NEXT: orr x10, x11, x12
646 ; CHECK-GI-NEXT: orr x8, x8, x10
647 ; CHECK-GI-NEXT: orr x0, x8, x9
651 %1 = and i64 %0, 71776119061217280
653 %3 = and i64 %0, 1095216660480
655 %5 = and i64 %0, 16711680
659 %9 = and i64 %2, -72057594037927936
661 %11 = and i64 %2, 280375465082880
662 %12 = or i64 %10, %11
663 %13 = and i64 %2, 4278190080
664 %14 = or i64 %12, %13
665 %15 = and i64 %2, 65280
666 %16 = or i64 %14, %15
670 ; Optimize pattern with multiple and/or to a simple pattern which can enable generation of rev16.
671 define i64 @test_rev16_x_hwbyteswaps_complex3(i64 %a) nounwind {
672 ; CHECK-SD-LABEL: test_rev16_x_hwbyteswaps_complex3:
673 ; CHECK-SD: // %bb.0: // %entry
674 ; CHECK-SD-NEXT: lsr x8, x0, #8
675 ; CHECK-SD-NEXT: lsr x9, x0, #48
676 ; CHECK-SD-NEXT: and x10, x8, #0xff000000000000
677 ; CHECK-SD-NEXT: and x11, x8, #0xff00000000
678 ; CHECK-SD-NEXT: and x8, x8, #0xff0000
679 ; CHECK-SD-NEXT: bfi x10, x9, #56, #8
680 ; CHECK-SD-NEXT: lsr x9, x0, #32
681 ; CHECK-SD-NEXT: orr x10, x11, x10
682 ; CHECK-SD-NEXT: bfi x10, x9, #40, #8
683 ; CHECK-SD-NEXT: lsr x9, x0, #16
684 ; CHECK-SD-NEXT: orr x8, x8, x10
685 ; CHECK-SD-NEXT: bfi x8, x9, #24, #8
686 ; CHECK-SD-NEXT: ubfiz x9, x0, #8, #8
687 ; CHECK-SD-NEXT: bfxil x8, x0, #8, #8
688 ; CHECK-SD-NEXT: orr x0, x9, x8
691 ; CHECK-GI-LABEL: test_rev16_x_hwbyteswaps_complex3:
692 ; CHECK-GI: // %bb.0: // %entry
693 ; CHECK-GI-NEXT: lsr x8, x0, #8
694 ; CHECK-GI-NEXT: lsl x9, x0, #8
695 ; CHECK-GI-NEXT: and x10, x8, #0xff000000000000
696 ; CHECK-GI-NEXT: and x11, x9, #0xff00000000000000
697 ; CHECK-GI-NEXT: and x12, x8, #0xff00000000
698 ; CHECK-GI-NEXT: and x13, x9, #0xff0000000000
699 ; CHECK-GI-NEXT: and x14, x8, #0xff0000
700 ; CHECK-GI-NEXT: orr x10, x11, x10
701 ; CHECK-GI-NEXT: and x11, x9, #0xff000000
702 ; CHECK-GI-NEXT: orr x12, x13, x12
703 ; CHECK-GI-NEXT: and x8, x8, #0xff
704 ; CHECK-GI-NEXT: orr x11, x11, x14
705 ; CHECK-GI-NEXT: orr x10, x12, x10
706 ; CHECK-GI-NEXT: and x9, x9, #0xff00
707 ; CHECK-GI-NEXT: orr x8, x8, x11
708 ; CHECK-GI-NEXT: orr x8, x8, x10
709 ; CHECK-GI-NEXT: orr x0, x9, x8
713 %1 = and i64 %0, 71776119061217280
715 %3 = and i64 %2, -72057594037927936
717 %5 = and i64 %0, 1095216660480
719 %7 = and i64 %2, 280375465082880
721 %9 = and i64 %0, 16711680
723 %11 = and i64 %2, 4278190080
724 %12 = or i64 %11, %10
725 %13 = and i64 %0, 255
726 %14 = or i64 %13, %12
727 %15 = and i64 %2, 65280
728 %16 = or i64 %15, %14
732 define i64 @test_or_and_combine1(i64 %a) nounwind {
733 ; CHECK-SD-LABEL: test_or_and_combine1:
734 ; CHECK-SD: // %bb.0: // %entry
735 ; CHECK-SD-NEXT: lsr x8, x0, #8
736 ; CHECK-SD-NEXT: lsr x9, x0, #24
737 ; CHECK-SD-NEXT: and x10, x8, #0xff000000000000
738 ; CHECK-SD-NEXT: and x8, x8, #0xff0000
739 ; CHECK-SD-NEXT: bfi x10, x9, #32, #8
740 ; CHECK-SD-NEXT: orr x0, x10, x8
743 ; CHECK-GI-LABEL: test_or_and_combine1:
744 ; CHECK-GI: // %bb.0: // %entry
745 ; CHECK-GI-NEXT: lsr x8, x0, #8
746 ; CHECK-GI-NEXT: lsl x9, x0, #8
747 ; CHECK-GI-NEXT: and x10, x8, #0xff000000000000
748 ; CHECK-GI-NEXT: and x9, x9, #0xff00000000
749 ; CHECK-GI-NEXT: and x8, x8, #0xff0000
750 ; CHECK-GI-NEXT: orr x9, x10, x9
751 ; CHECK-GI-NEXT: orr x0, x9, x8
755 %1 = and i64 %0, 71776119061217280
757 %3 = and i64 %2, 1095216660480
759 %5 = and i64 %0, 16711680
764 define i64 @test_or_and_combine2(i64 %a, i64 %b) nounwind {
765 ; CHECK-LABEL: test_or_and_combine2:
766 ; CHECK: // %bb.0: // %entry
767 ; CHECK-NEXT: lsr x8, x0, #8
768 ; CHECK-NEXT: lsl x9, x0, #8
769 ; CHECK-NEXT: and x10, x8, #0xff000000000000
770 ; CHECK-NEXT: and x11, x9, #0xff00000000
771 ; CHECK-NEXT: and x8, x8, #0xff0000
772 ; CHECK-NEXT: orr x9, x10, x9
773 ; CHECK-NEXT: orr x8, x11, x8
774 ; CHECK-NEXT: orr x0, x9, x8
778 %1 = and i64 %0, 71776119061217280
781 %4 = and i64 %2, 1095216660480
783 %6 = and i64 %0, 16711680
788 define i32 @pr55484(i32 %0) {
789 ; CHECK-SD-LABEL: pr55484:
790 ; CHECK-SD: // %bb.0:
791 ; CHECK-SD-NEXT: lsr w8, w0, #8
792 ; CHECK-SD-NEXT: orr w8, w8, w0, lsl #8
793 ; CHECK-SD-NEXT: sxth w0, w8
796 ; CHECK-GI-LABEL: pr55484:
797 ; CHECK-GI: // %bb.0:
798 ; CHECK-GI-NEXT: lsl w8, w0, #8
799 ; CHECK-GI-NEXT: orr w8, w8, w0, lsr #8
800 ; CHECK-GI-NEXT: sxth w0, w8
805 %5 = trunc i32 %4 to i16
806 %6 = sext i16 %5 to i32