1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=aarch64-eabi -aarch64-neon-syntax=apple | FileCheck %s --check-prefixes=CHECK,CHECK-SD
3 ; RUN: llc < %s -mtriple=aarch64-eabi -aarch64-neon-syntax=apple -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI
5 define i32 @test_rev_w(i32 %a) nounwind {
6 ; CHECK-LABEL: test_rev_w:
7 ; CHECK: // %bb.0: // %entry
8 ; CHECK-NEXT: rev w0, w0
11 %0 = tail call i32 @llvm.bswap.i32(i32 %a)
15 define i64 @test_rev_x(i64 %a) nounwind {
16 ; CHECK-LABEL: test_rev_x:
17 ; CHECK: // %bb.0: // %entry
18 ; CHECK-NEXT: rev x0, x0
21 %0 = tail call i64 @llvm.bswap.i64(i64 %a)
25 ; Canonicalize (srl (bswap x), 16) to (rotr (bswap x), 16) if the high 16-bits
26 ; of %a are zero. This optimizes rev + lsr 16 to rev16.
27 define i32 @test_rev_w_srl16(i16 %a) {
28 ; CHECK-SD-LABEL: test_rev_w_srl16:
29 ; CHECK-SD: // %bb.0: // %entry
30 ; CHECK-SD-NEXT: rev w8, w0
31 ; CHECK-SD-NEXT: lsr w0, w8, #16
34 ; CHECK-GI-LABEL: test_rev_w_srl16:
35 ; CHECK-GI: // %bb.0: // %entry
36 ; CHECK-GI-NEXT: and w8, w0, #0xffff
37 ; CHECK-GI-NEXT: rev w8, w8
38 ; CHECK-GI-NEXT: lsr w0, w8, #16
41 %0 = zext i16 %a to i32
42 %1 = tail call i32 @llvm.bswap.i32(i32 %0)
47 define i32 @test_rev_w_srl16_load(ptr %a) {
48 ; CHECK-LABEL: test_rev_w_srl16_load:
49 ; CHECK: // %bb.0: // %entry
50 ; CHECK-NEXT: ldrh w8, [x0]
51 ; CHECK-NEXT: rev w8, w8
52 ; CHECK-NEXT: lsr w0, w8, #16
56 %1 = zext i16 %0 to i32
57 %2 = tail call i32 @llvm.bswap.i32(i32 %1)
62 define i32 @test_rev_w_srl16_add(i8 %a, i8 %b) {
63 ; CHECK-SD-LABEL: test_rev_w_srl16_add:
64 ; CHECK-SD: // %bb.0: // %entry
65 ; CHECK-SD-NEXT: and w8, w0, #0xff
66 ; CHECK-SD-NEXT: add w8, w8, w1, uxtb
67 ; CHECK-SD-NEXT: rev16 w0, w8
70 ; CHECK-GI-LABEL: test_rev_w_srl16_add:
71 ; CHECK-GI: // %bb.0: // %entry
72 ; CHECK-GI-NEXT: and w8, w1, #0xff
73 ; CHECK-GI-NEXT: add w8, w8, w0, uxtb
74 ; CHECK-GI-NEXT: rev w8, w8
75 ; CHECK-GI-NEXT: lsr w0, w8, #16
78 %0 = zext i8 %a to i32
79 %1 = zext i8 %b to i32
81 %3 = tail call i32 @llvm.bswap.i32(i32 %2)
86 ; Canonicalize (srl (bswap x), 32) to (rotr (bswap x), 32) if the high 32-bits
87 ; of %a are zero. This optimizes rev + lsr 32 to rev32.
88 define i64 @test_rev_x_srl32(i32 %a) {
89 ; CHECK-SD-LABEL: test_rev_x_srl32:
90 ; CHECK-SD: // %bb.0: // %entry
91 ; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0
92 ; CHECK-SD-NEXT: rev x8, x0
93 ; CHECK-SD-NEXT: lsr x0, x8, #32
96 ; CHECK-GI-LABEL: test_rev_x_srl32:
97 ; CHECK-GI: // %bb.0: // %entry
98 ; CHECK-GI-NEXT: mov w8, w0
99 ; CHECK-GI-NEXT: rev x8, x8
100 ; CHECK-GI-NEXT: lsr x0, x8, #32
103 %0 = zext i32 %a to i64
104 %1 = tail call i64 @llvm.bswap.i64(i64 %0)
109 define i64 @test_rev_x_srl32_load(ptr %a) {
110 ; CHECK-LABEL: test_rev_x_srl32_load:
111 ; CHECK: // %bb.0: // %entry
112 ; CHECK-NEXT: ldr w8, [x0]
113 ; CHECK-NEXT: rev x8, x8
114 ; CHECK-NEXT: lsr x0, x8, #32
117 %0 = load i32, ptr %a
118 %1 = zext i32 %0 to i64
119 %2 = tail call i64 @llvm.bswap.i64(i64 %1)
124 define i64 @test_rev_x_srl32_shift(i64 %a) {
125 ; CHECK-SD-LABEL: test_rev_x_srl32_shift:
126 ; CHECK-SD: // %bb.0: // %entry
127 ; CHECK-SD-NEXT: ubfx x8, x0, #2, #29
128 ; CHECK-SD-NEXT: rev32 x0, x8
131 ; CHECK-GI-LABEL: test_rev_x_srl32_shift:
132 ; CHECK-GI: // %bb.0: // %entry
133 ; CHECK-GI-NEXT: ubfx x8, x0, #2, #29
134 ; CHECK-GI-NEXT: rev x8, x8
135 ; CHECK-GI-NEXT: lsr x0, x8, #32
140 %2 = tail call i64 @llvm.bswap.i64(i64 %1)
145 declare i32 @llvm.bswap.i32(i32) nounwind readnone
146 declare i64 @llvm.bswap.i64(i64) nounwind readnone
148 define i32 @test_rev16_w(i32 %X) nounwind {
149 ; CHECK-SD-LABEL: test_rev16_w:
150 ; CHECK-SD: // %bb.0: // %entry
151 ; CHECK-SD-NEXT: rev16 w0, w0
154 ; CHECK-GI-LABEL: test_rev16_w:
155 ; CHECK-GI: // %bb.0: // %entry
156 ; CHECK-GI-NEXT: lsr w8, w0, #8
157 ; CHECK-GI-NEXT: lsl w9, w0, #8
158 ; CHECK-GI-NEXT: and w10, w8, #0xff0000
159 ; CHECK-GI-NEXT: and w11, w9, #0xff000000
160 ; CHECK-GI-NEXT: and w8, w8, #0xff
161 ; CHECK-GI-NEXT: and w9, w9, #0xff00
162 ; CHECK-GI-NEXT: orr w10, w11, w10
163 ; CHECK-GI-NEXT: orr w8, w9, w8
164 ; CHECK-GI-NEXT: orr w0, w10, w8
167 %tmp1 = lshr i32 %X, 8
168 %X15 = bitcast i32 %X to i32
169 %tmp4 = shl i32 %X15, 8
170 %tmp2 = and i32 %tmp1, 16711680
171 %tmp5 = and i32 %tmp4, -16777216
172 %tmp9 = and i32 %tmp1, 255
173 %tmp13 = and i32 %tmp4, 65280
174 %tmp6 = or i32 %tmp5, %tmp2
175 %tmp10 = or i32 %tmp6, %tmp13
176 %tmp14 = or i32 %tmp10, %tmp9
180 ; 64-bit REV16 is *not* a swap then a 16-bit rotation:
181 ; 01234567 ->(bswap) 76543210 ->(rotr) 10765432
182 ; 01234567 ->(rev16) 10325476
183 define i64 @test_rev16_x(i64 %a) nounwind {
184 ; CHECK-LABEL: test_rev16_x:
185 ; CHECK: // %bb.0: // %entry
186 ; CHECK-NEXT: rev x8, x0
187 ; CHECK-NEXT: ror x0, x8, #16
190 %0 = tail call i64 @llvm.bswap.i64(i64 %a)
197 define i64 @test_rev32_x(i64 %a) nounwind {
198 ; CHECK-LABEL: test_rev32_x:
199 ; CHECK: // %bb.0: // %entry
200 ; CHECK-NEXT: rev32 x0, x0
203 %0 = tail call i64 @llvm.bswap.i64(i64 %a)
210 define <8 x i8> @test_vrev64D8(ptr %A) nounwind {
211 ; CHECK-LABEL: test_vrev64D8:
213 ; CHECK-NEXT: ldr d0, [x0]
214 ; CHECK-NEXT: rev64.8b v0, v0
216 %tmp1 = load <8 x i8>, ptr %A
217 %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
221 define <4 x i16> @test_vrev64D16(ptr %A) nounwind {
222 ; CHECK-LABEL: test_vrev64D16:
224 ; CHECK-NEXT: ldr d0, [x0]
225 ; CHECK-NEXT: rev64.4h v0, v0
227 %tmp1 = load <4 x i16>, ptr %A
228 %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
232 define <2 x i32> @test_vrev64D32(ptr %A) nounwind {
233 ; CHECK-LABEL: test_vrev64D32:
235 ; CHECK-NEXT: ldr d0, [x0]
236 ; CHECK-NEXT: rev64.2s v0, v0
238 %tmp1 = load <2 x i32>, ptr %A
239 %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> <i32 1, i32 0>
243 define <2 x float> @test_vrev64Df(ptr %A) nounwind {
244 ; CHECK-LABEL: test_vrev64Df:
246 ; CHECK-NEXT: ldr d0, [x0]
247 ; CHECK-NEXT: rev64.2s v0, v0
249 %tmp1 = load <2 x float>, ptr %A
250 %tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> <i32 1, i32 0>
251 ret <2 x float> %tmp2
254 define <16 x i8> @test_vrev64Q8(ptr %A) nounwind {
255 ; CHECK-LABEL: test_vrev64Q8:
257 ; CHECK-NEXT: ldr q0, [x0]
258 ; CHECK-NEXT: rev64.16b v0, v0
260 %tmp1 = load <16 x i8>, ptr %A
261 %tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
265 define <8 x i16> @test_vrev64Q16(ptr %A) nounwind {
266 ; CHECK-LABEL: test_vrev64Q16:
268 ; CHECK-NEXT: ldr q0, [x0]
269 ; CHECK-NEXT: rev64.8h v0, v0
271 %tmp1 = load <8 x i16>, ptr %A
272 %tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
276 define <4 x i32> @test_vrev64Q32(ptr %A) nounwind {
277 ; CHECK-LABEL: test_vrev64Q32:
279 ; CHECK-NEXT: ldr q0, [x0]
280 ; CHECK-NEXT: rev64.4s v0, v0
282 %tmp1 = load <4 x i32>, ptr %A
283 %tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
287 define <4 x float> @test_vrev64Qf(ptr %A) nounwind {
288 ; CHECK-LABEL: test_vrev64Qf:
290 ; CHECK-NEXT: ldr q0, [x0]
291 ; CHECK-NEXT: rev64.4s v0, v0
293 %tmp1 = load <4 x float>, ptr %A
294 %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
295 ret <4 x float> %tmp2
298 define <8 x i8> @test_vrev32D8(ptr %A) nounwind {
299 ; CHECK-SD-LABEL: test_vrev32D8:
300 ; CHECK-SD: // %bb.0:
301 ; CHECK-SD-NEXT: ldr d0, [x0]
302 ; CHECK-SD-NEXT: rev32.8b v0, v0
305 ; CHECK-GI-LABEL: test_vrev32D8:
306 ; CHECK-GI: // %bb.0:
307 ; CHECK-GI-NEXT: ldr d0, [x0]
308 ; CHECK-GI-NEXT: adrp x8, .LCPI19_0
309 ; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI19_0]
310 ; CHECK-GI-NEXT: mov.d v0[1], v0[0]
311 ; CHECK-GI-NEXT: tbl.16b v0, { v0 }, v1
312 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
314 %tmp1 = load <8 x i8>, ptr %A
315 %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
319 define <4 x i16> @test_vrev32D16(ptr %A) nounwind {
320 ; CHECK-SD-LABEL: test_vrev32D16:
321 ; CHECK-SD: // %bb.0:
322 ; CHECK-SD-NEXT: ldr d0, [x0]
323 ; CHECK-SD-NEXT: rev32.4h v0, v0
326 ; CHECK-GI-LABEL: test_vrev32D16:
327 ; CHECK-GI: // %bb.0:
328 ; CHECK-GI-NEXT: ldr d0, [x0]
329 ; CHECK-GI-NEXT: adrp x8, .LCPI20_0
330 ; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI20_0]
331 ; CHECK-GI-NEXT: mov.d v0[1], v0[0]
332 ; CHECK-GI-NEXT: tbl.16b v0, { v0 }, v1
333 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
335 %tmp1 = load <4 x i16>, ptr %A
336 %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
340 define <16 x i8> @test_vrev32Q8(ptr %A) nounwind {
341 ; CHECK-SD-LABEL: test_vrev32Q8:
342 ; CHECK-SD: // %bb.0:
343 ; CHECK-SD-NEXT: ldr q0, [x0]
344 ; CHECK-SD-NEXT: rev32.16b v0, v0
347 ; CHECK-GI-LABEL: test_vrev32Q8:
348 ; CHECK-GI: // %bb.0:
349 ; CHECK-GI-NEXT: adrp x8, .LCPI21_0
350 ; CHECK-GI-NEXT: ldr q0, [x0]
351 ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI21_0]
352 ; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2
354 %tmp1 = load <16 x i8>, ptr %A
355 %tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
359 define <8 x i16> @test_vrev32Q16(ptr %A) nounwind {
360 ; CHECK-SD-LABEL: test_vrev32Q16:
361 ; CHECK-SD: // %bb.0:
362 ; CHECK-SD-NEXT: ldr q0, [x0]
363 ; CHECK-SD-NEXT: rev32.8h v0, v0
366 ; CHECK-GI-LABEL: test_vrev32Q16:
367 ; CHECK-GI: // %bb.0:
368 ; CHECK-GI-NEXT: adrp x8, .LCPI22_0
369 ; CHECK-GI-NEXT: ldr q0, [x0]
370 ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI22_0]
371 ; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2
373 %tmp1 = load <8 x i16>, ptr %A
374 %tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
378 define <8 x i8> @test_vrev16D8(ptr %A) nounwind {
379 ; CHECK-SD-LABEL: test_vrev16D8:
380 ; CHECK-SD: // %bb.0:
381 ; CHECK-SD-NEXT: ldr d0, [x0]
382 ; CHECK-SD-NEXT: rev16.8b v0, v0
385 ; CHECK-GI-LABEL: test_vrev16D8:
386 ; CHECK-GI: // %bb.0:
387 ; CHECK-GI-NEXT: ldr d0, [x0]
388 ; CHECK-GI-NEXT: adrp x8, .LCPI23_0
389 ; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI23_0]
390 ; CHECK-GI-NEXT: mov.d v0[1], v0[0]
391 ; CHECK-GI-NEXT: tbl.16b v0, { v0 }, v1
392 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
394 %tmp1 = load <8 x i8>, ptr %A
395 %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
399 define <16 x i8> @test_vrev16Q8(ptr %A) nounwind {
400 ; CHECK-SD-LABEL: test_vrev16Q8:
401 ; CHECK-SD: // %bb.0:
402 ; CHECK-SD-NEXT: ldr q0, [x0]
403 ; CHECK-SD-NEXT: rev16.16b v0, v0
406 ; CHECK-GI-LABEL: test_vrev16Q8:
407 ; CHECK-GI: // %bb.0:
408 ; CHECK-GI-NEXT: adrp x8, .LCPI24_0
409 ; CHECK-GI-NEXT: ldr q0, [x0]
410 ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI24_0]
411 ; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2
413 %tmp1 = load <16 x i8>, ptr %A
414 %tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
418 ; Undef shuffle indices should not prevent matching to VREV:
420 define <8 x i8> @test_vrev64D8_undef(ptr %A) nounwind {
421 ; CHECK-LABEL: test_vrev64D8_undef:
423 ; CHECK-NEXT: ldr d0, [x0]
424 ; CHECK-NEXT: rev64.8b v0, v0
426 %tmp1 = load <8 x i8>, ptr %A
427 %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 7, i32 undef, i32 undef, i32 4, i32 3, i32 2, i32 1, i32 0>
431 define <8 x i16> @test_vrev32Q16_undef(ptr %A) nounwind {
432 ; CHECK-SD-LABEL: test_vrev32Q16_undef:
433 ; CHECK-SD: // %bb.0:
434 ; CHECK-SD-NEXT: ldr q0, [x0]
435 ; CHECK-SD-NEXT: rev32.8h v0, v0
438 ; CHECK-GI-LABEL: test_vrev32Q16_undef:
439 ; CHECK-GI: // %bb.0:
440 ; CHECK-GI-NEXT: adrp x8, .LCPI26_0
441 ; CHECK-GI-NEXT: ldr q0, [x0]
442 ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI26_0]
443 ; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2
445 %tmp1 = load <8 x i16>, ptr %A
446 %tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 undef, i32 0, i32 undef, i32 2, i32 5, i32 4, i32 7, i32 undef>
450 ; vrev <4 x i16> should use REV32 and not REV64
451 define void @test_vrev64(ptr nocapture %source, ptr nocapture %dst) nounwind ssp {
452 ; CHECK-SD-LABEL: test_vrev64:
453 ; CHECK-SD: // %bb.0: // %entry
454 ; CHECK-SD-NEXT: ldr q0, [x0]
455 ; CHECK-SD-NEXT: add x8, x1, #2
456 ; CHECK-SD-NEXT: st1.h { v0 }[5], [x8]
457 ; CHECK-SD-NEXT: st1.h { v0 }[6], [x1]
460 ; CHECK-GI-LABEL: test_vrev64:
461 ; CHECK-GI: // %bb.0: // %entry
462 ; CHECK-GI-NEXT: ldr q0, [x0]
463 ; CHECK-GI-NEXT: add x8, x1, #2
464 ; CHECK-GI-NEXT: st1.h { v0 }[6], [x1]
465 ; CHECK-GI-NEXT: st1.h { v0 }[5], [x8]
468 %tmp2 = load <8 x i16>, ptr %source, align 4
469 %tmp3 = extractelement <8 x i16> %tmp2, i32 6
470 %tmp5 = insertelement <2 x i16> undef, i16 %tmp3, i32 0
471 %tmp9 = extractelement <8 x i16> %tmp2, i32 5
472 %tmp11 = insertelement <2 x i16> %tmp5, i16 %tmp9, i32 1
473 store <2 x i16> %tmp11, ptr %dst, align 4
477 ; Test vrev of float4
478 define void @float_vrev64(ptr nocapture %source, ptr nocapture %dest) nounwind noinline ssp {
479 ; CHECK-SD-LABEL: float_vrev64:
480 ; CHECK-SD: // %bb.0: // %entry
481 ; CHECK-SD-NEXT: movi.2d v0, #0000000000000000
482 ; CHECK-SD-NEXT: add x8, x0, #12
483 ; CHECK-SD-NEXT: dup.4s v0, v0[0]
484 ; CHECK-SD-NEXT: ld1.s { v0 }[1], [x8]
485 ; CHECK-SD-NEXT: str q0, [x1, #176]
488 ; CHECK-GI-LABEL: float_vrev64:
489 ; CHECK-GI: // %bb.0: // %entry
490 ; CHECK-GI-NEXT: movi d0, #0000000000000000
491 ; CHECK-GI-NEXT: adrp x8, .LCPI28_0
492 ; CHECK-GI-NEXT: ldr q1, [x0]
493 ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI28_0]
494 ; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2
495 ; CHECK-GI-NEXT: str q0, [x1, #176]
498 %tmp2 = load <4 x float>, ptr %source, align 4
499 %tmp5 = shufflevector <4 x float> <float 0.000000e+00, float undef, float undef, float undef>, <4 x float> %tmp2, <4 x i32> <i32 0, i32 7, i32 0, i32 0>
500 %arrayidx8 = getelementptr inbounds <4 x float>, ptr %dest, i32 11
501 store <4 x float> %tmp5, ptr %arrayidx8, align 4
506 define <4 x i32> @test_vrev32_bswap(<4 x i32> %source) nounwind {
507 ; CHECK-LABEL: test_vrev32_bswap:
509 ; CHECK-NEXT: rev32.16b v0, v0
511 %bswap = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %source)
515 declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>) nounwind readnone
517 ; Reduced regression from D114354
518 define void @test_rev16_truncstore() {
519 ; CHECK-SD-LABEL: test_rev16_truncstore:
520 ; CHECK-SD: // %bb.0: // %entry
521 ; CHECK-SD-NEXT: cbnz wzr, .LBB30_2
522 ; CHECK-SD-NEXT: .LBB30_1: // %cleanup
523 ; CHECK-SD-NEXT: // =>This Inner Loop Header: Depth=1
524 ; CHECK-SD-NEXT: ldrh w8, [x8]
525 ; CHECK-SD-NEXT: rev16 w8, w8
526 ; CHECK-SD-NEXT: strh w8, [x8]
527 ; CHECK-SD-NEXT: cbz wzr, .LBB30_1
528 ; CHECK-SD-NEXT: .LBB30_2: // %fail
531 ; CHECK-GI-LABEL: test_rev16_truncstore:
532 ; CHECK-GI: // %bb.0: // %entry
533 ; CHECK-GI-NEXT: tbnz wzr, #0, .LBB30_2
534 ; CHECK-GI-NEXT: .LBB30_1: // %cleanup
535 ; CHECK-GI-NEXT: // =>This Inner Loop Header: Depth=1
536 ; CHECK-GI-NEXT: ldrh w8, [x8]
537 ; CHECK-GI-NEXT: rev w8, w8
538 ; CHECK-GI-NEXT: lsr w8, w8, #16
539 ; CHECK-GI-NEXT: strh w8, [x8]
540 ; CHECK-GI-NEXT: tbz wzr, #0, .LBB30_1
541 ; CHECK-GI-NEXT: .LBB30_2: // %fail
547 %out.6269.i = phi ptr [ undef, %cleanup ], [ undef, %entry ]
548 %0 = load i16, ptr undef, align 2
549 %1 = icmp eq i16 undef, -10240
550 br i1 %1, label %fail, label %cleanup
553 %or130.i = call i16 @llvm.bswap.i16(i16 %0)
554 store i16 %or130.i, ptr %out.6269.i, align 2
560 declare i16 @llvm.bswap.i16(i16)
562 ; Reduced regression from D120192
563 define void @test_bswap32_narrow(ptr %p0, ptr %p1) nounwind {
564 ; CHECK-SD-LABEL: test_bswap32_narrow:
565 ; CHECK-SD: // %bb.0:
566 ; CHECK-SD-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
567 ; CHECK-SD-NEXT: ldrh w8, [x0, #2]
568 ; CHECK-SD-NEXT: mov x19, x1
569 ; CHECK-SD-NEXT: rev16 w0, w8
570 ; CHECK-SD-NEXT: bl gid_tbl_len
571 ; CHECK-SD-NEXT: strh wzr, [x19]
572 ; CHECK-SD-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload
575 ; CHECK-GI-LABEL: test_bswap32_narrow:
576 ; CHECK-GI: // %bb.0:
577 ; CHECK-GI-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
578 ; CHECK-GI-NEXT: ldr w8, [x0]
579 ; CHECK-GI-NEXT: mov x19, x1
580 ; CHECK-GI-NEXT: and w8, w8, #0xffff0000
581 ; CHECK-GI-NEXT: rev w0, w8
582 ; CHECK-GI-NEXT: bl gid_tbl_len
583 ; CHECK-GI-NEXT: strh wzr, [x19]
584 ; CHECK-GI-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload
586 %ld = load i32, ptr %p0, align 4
587 %and = and i32 %ld, -65536
588 %bswap = tail call i32 @llvm.bswap.i32(i32 %and)
589 %and16 = zext i32 %bswap to i64
590 %call17 = tail call i32 @gid_tbl_len(i64 %and16)
591 store i16 0, ptr %p1, align 4
594 declare i32 @gid_tbl_len(...)
596 ; 64-bit REV16 is *not* a swap then a 16-bit rotation:
597 ; 01234567 ->(bswap) 76543210 ->(rotr) 10765432
598 ; 01234567 ->(rev16) 10325476
599 ; Optimize patterns where rev16 can be generated for a 64-bit input.
600 define i64 @test_rev16_x_hwbyteswaps(i64 %a) nounwind {
601 ; CHECK-LABEL: test_rev16_x_hwbyteswaps:
602 ; CHECK: // %bb.0: // %entry
603 ; CHECK-NEXT: rev16 x0, x0
607 %1 = and i64 %0, 71777214294589695
609 %3 = and i64 %2, -71777214294589696
614 ; Optimize pattern with multiple and/or to a simple pattern which can enable generation of rev16.
615 define i64 @test_rev16_x_hwbyteswaps_complex1(i64 %a) nounwind {
616 ; CHECK-SD-LABEL: test_rev16_x_hwbyteswaps_complex1:
617 ; CHECK-SD: // %bb.0: // %entry
618 ; CHECK-SD-NEXT: lsr x8, x0, #8
619 ; CHECK-SD-NEXT: lsr x9, x0, #48
620 ; CHECK-SD-NEXT: and x10, x8, #0xff000000000000
621 ; CHECK-SD-NEXT: and x11, x8, #0xff00000000
622 ; CHECK-SD-NEXT: and x8, x8, #0xff0000
623 ; CHECK-SD-NEXT: bfi x10, x9, #56, #8
624 ; CHECK-SD-NEXT: lsr x9, x0, #32
625 ; CHECK-SD-NEXT: orr x10, x10, x11
626 ; CHECK-SD-NEXT: bfi x10, x9, #40, #8
627 ; CHECK-SD-NEXT: lsr x9, x0, #16
628 ; CHECK-SD-NEXT: orr x8, x10, x8
629 ; CHECK-SD-NEXT: bfi x8, x9, #24, #8
630 ; CHECK-SD-NEXT: ubfiz x9, x0, #8, #8
631 ; CHECK-SD-NEXT: bfxil x8, x0, #8, #8
632 ; CHECK-SD-NEXT: orr x0, x8, x9
635 ; CHECK-GI-LABEL: test_rev16_x_hwbyteswaps_complex1:
636 ; CHECK-GI: // %bb.0: // %entry
637 ; CHECK-GI-NEXT: lsr x8, x0, #8
638 ; CHECK-GI-NEXT: lsl x9, x0, #8
639 ; CHECK-GI-NEXT: and x10, x8, #0xff000000000000
640 ; CHECK-GI-NEXT: and x11, x9, #0xff00000000000000
641 ; CHECK-GI-NEXT: and x12, x8, #0xff00000000
642 ; CHECK-GI-NEXT: and x13, x9, #0xff0000000000
643 ; CHECK-GI-NEXT: and x14, x8, #0xff0000
644 ; CHECK-GI-NEXT: orr x10, x10, x11
645 ; CHECK-GI-NEXT: and x11, x9, #0xff000000
646 ; CHECK-GI-NEXT: orr x12, x12, x13
647 ; CHECK-GI-NEXT: and x8, x8, #0xff
648 ; CHECK-GI-NEXT: orr x11, x14, x11
649 ; CHECK-GI-NEXT: orr x10, x10, x12
650 ; CHECK-GI-NEXT: and x9, x9, #0xff00
651 ; CHECK-GI-NEXT: orr x8, x11, x8
652 ; CHECK-GI-NEXT: orr x8, x10, x8
653 ; CHECK-GI-NEXT: orr x0, x8, x9
657 %1 = and i64 %0, 71776119061217280
659 %3 = and i64 %2, -72057594037927936
661 %5 = and i64 %0, 1095216660480
663 %7 = and i64 %2, 280375465082880
665 %9 = and i64 %0, 16711680
667 %11 = and i64 %2, 4278190080
668 %12 = or i64 %10, %11
669 %13 = and i64 %0, 255
670 %14 = or i64 %12, %13
671 %15 = and i64 %2, 65280
672 %16 = or i64 %14, %15
676 define i64 @test_rev16_x_hwbyteswaps_complex2(i64 %a) nounwind {
677 ; CHECK-SD-LABEL: test_rev16_x_hwbyteswaps_complex2:
678 ; CHECK-SD: // %bb.0: // %entry
679 ; CHECK-SD-NEXT: lsr x8, x0, #8
680 ; CHECK-SD-NEXT: lsr x9, x0, #48
681 ; CHECK-SD-NEXT: lsr x10, x0, #32
682 ; CHECK-SD-NEXT: and x8, x8, #0xff00ff00ff00ff
683 ; CHECK-SD-NEXT: bfi x8, x9, #56, #8
684 ; CHECK-SD-NEXT: lsr x9, x0, #16
685 ; CHECK-SD-NEXT: bfi x8, x10, #40, #8
686 ; CHECK-SD-NEXT: bfi x8, x9, #24, #8
687 ; CHECK-SD-NEXT: bfi x8, x0, #8, #8
688 ; CHECK-SD-NEXT: mov x0, x8
691 ; CHECK-GI-LABEL: test_rev16_x_hwbyteswaps_complex2:
692 ; CHECK-GI: // %bb.0: // %entry
693 ; CHECK-GI-NEXT: lsr x8, x0, #8
694 ; CHECK-GI-NEXT: lsl x9, x0, #8
695 ; CHECK-GI-NEXT: and x10, x8, #0xff000000000000
696 ; CHECK-GI-NEXT: and x11, x8, #0xff00000000
697 ; CHECK-GI-NEXT: and x12, x8, #0xff0000
698 ; CHECK-GI-NEXT: and x8, x8, #0xff
699 ; CHECK-GI-NEXT: and x13, x9, #0xff00000000000000
700 ; CHECK-GI-NEXT: orr x10, x10, x11
701 ; CHECK-GI-NEXT: and x11, x9, #0xff0000000000
702 ; CHECK-GI-NEXT: orr x8, x12, x8
703 ; CHECK-GI-NEXT: and x12, x9, #0xff000000
704 ; CHECK-GI-NEXT: orr x11, x13, x11
705 ; CHECK-GI-NEXT: orr x8, x10, x8
706 ; CHECK-GI-NEXT: and x9, x9, #0xff00
707 ; CHECK-GI-NEXT: orr x10, x11, x12
708 ; CHECK-GI-NEXT: orr x8, x8, x10
709 ; CHECK-GI-NEXT: orr x0, x8, x9
713 %1 = and i64 %0, 71776119061217280
715 %3 = and i64 %0, 1095216660480
717 %5 = and i64 %0, 16711680
721 %9 = and i64 %2, -72057594037927936
723 %11 = and i64 %2, 280375465082880
724 %12 = or i64 %10, %11
725 %13 = and i64 %2, 4278190080
726 %14 = or i64 %12, %13
727 %15 = and i64 %2, 65280
728 %16 = or i64 %14, %15
732 ; Optimize pattern with multiple and/or to a simple pattern which can enable generation of rev16.
733 define i64 @test_rev16_x_hwbyteswaps_complex3(i64 %a) nounwind {
734 ; CHECK-SD-LABEL: test_rev16_x_hwbyteswaps_complex3:
735 ; CHECK-SD: // %bb.0: // %entry
736 ; CHECK-SD-NEXT: lsr x8, x0, #8
737 ; CHECK-SD-NEXT: lsr x9, x0, #48
738 ; CHECK-SD-NEXT: and x10, x8, #0xff000000000000
739 ; CHECK-SD-NEXT: and x11, x8, #0xff00000000
740 ; CHECK-SD-NEXT: and x8, x8, #0xff0000
741 ; CHECK-SD-NEXT: bfi x10, x9, #56, #8
742 ; CHECK-SD-NEXT: lsr x9, x0, #32
743 ; CHECK-SD-NEXT: orr x10, x11, x10
744 ; CHECK-SD-NEXT: bfi x10, x9, #40, #8
745 ; CHECK-SD-NEXT: lsr x9, x0, #16
746 ; CHECK-SD-NEXT: orr x8, x8, x10
747 ; CHECK-SD-NEXT: bfi x8, x9, #24, #8
748 ; CHECK-SD-NEXT: ubfiz x9, x0, #8, #8
749 ; CHECK-SD-NEXT: bfxil x8, x0, #8, #8
750 ; CHECK-SD-NEXT: orr x0, x9, x8
753 ; CHECK-GI-LABEL: test_rev16_x_hwbyteswaps_complex3:
754 ; CHECK-GI: // %bb.0: // %entry
755 ; CHECK-GI-NEXT: lsr x8, x0, #8
756 ; CHECK-GI-NEXT: lsl x9, x0, #8
757 ; CHECK-GI-NEXT: and x10, x8, #0xff000000000000
758 ; CHECK-GI-NEXT: and x11, x9, #0xff00000000000000
759 ; CHECK-GI-NEXT: and x12, x8, #0xff00000000
760 ; CHECK-GI-NEXT: and x13, x9, #0xff0000000000
761 ; CHECK-GI-NEXT: and x14, x8, #0xff0000
762 ; CHECK-GI-NEXT: orr x10, x11, x10
763 ; CHECK-GI-NEXT: and x11, x9, #0xff000000
764 ; CHECK-GI-NEXT: orr x12, x13, x12
765 ; CHECK-GI-NEXT: and x8, x8, #0xff
766 ; CHECK-GI-NEXT: orr x11, x11, x14
767 ; CHECK-GI-NEXT: orr x10, x12, x10
768 ; CHECK-GI-NEXT: and x9, x9, #0xff00
769 ; CHECK-GI-NEXT: orr x8, x8, x11
770 ; CHECK-GI-NEXT: orr x8, x8, x10
771 ; CHECK-GI-NEXT: orr x0, x9, x8
775 %1 = and i64 %0, 71776119061217280
777 %3 = and i64 %2, -72057594037927936
779 %5 = and i64 %0, 1095216660480
781 %7 = and i64 %2, 280375465082880
783 %9 = and i64 %0, 16711680
785 %11 = and i64 %2, 4278190080
786 %12 = or i64 %11, %10
787 %13 = and i64 %0, 255
788 %14 = or i64 %13, %12
789 %15 = and i64 %2, 65280
790 %16 = or i64 %15, %14
794 define i64 @test_or_and_combine1(i64 %a) nounwind {
795 ; CHECK-SD-LABEL: test_or_and_combine1:
796 ; CHECK-SD: // %bb.0: // %entry
797 ; CHECK-SD-NEXT: lsr x8, x0, #8
798 ; CHECK-SD-NEXT: lsr x9, x0, #24
799 ; CHECK-SD-NEXT: and x10, x8, #0xff000000000000
800 ; CHECK-SD-NEXT: and x8, x8, #0xff0000
801 ; CHECK-SD-NEXT: bfi x10, x9, #32, #8
802 ; CHECK-SD-NEXT: orr x0, x10, x8
805 ; CHECK-GI-LABEL: test_or_and_combine1:
806 ; CHECK-GI: // %bb.0: // %entry
807 ; CHECK-GI-NEXT: lsr x8, x0, #8
808 ; CHECK-GI-NEXT: lsl x9, x0, #8
809 ; CHECK-GI-NEXT: and x10, x8, #0xff000000000000
810 ; CHECK-GI-NEXT: and x9, x9, #0xff00000000
811 ; CHECK-GI-NEXT: and x8, x8, #0xff0000
812 ; CHECK-GI-NEXT: orr x9, x10, x9
813 ; CHECK-GI-NEXT: orr x0, x9, x8
817 %1 = and i64 %0, 71776119061217280
819 %3 = and i64 %2, 1095216660480
821 %5 = and i64 %0, 16711680
826 define i64 @test_or_and_combine2(i64 %a, i64 %b) nounwind {
827 ; CHECK-LABEL: test_or_and_combine2:
828 ; CHECK: // %bb.0: // %entry
829 ; CHECK-NEXT: lsr x8, x0, #8
830 ; CHECK-NEXT: lsl x9, x0, #8
831 ; CHECK-NEXT: and x10, x8, #0xff000000000000
832 ; CHECK-NEXT: and x11, x9, #0xff00000000
833 ; CHECK-NEXT: and x8, x8, #0xff0000
834 ; CHECK-NEXT: orr x9, x10, x9
835 ; CHECK-NEXT: orr x8, x11, x8
836 ; CHECK-NEXT: orr x0, x9, x8
840 %1 = and i64 %0, 71776119061217280
843 %4 = and i64 %2, 1095216660480
845 %6 = and i64 %0, 16711680
850 define i32 @pr55484(i32 %0) {
851 ; CHECK-SD-LABEL: pr55484:
852 ; CHECK-SD: // %bb.0:
853 ; CHECK-SD-NEXT: lsr w8, w0, #8
854 ; CHECK-SD-NEXT: orr w8, w8, w0, lsl #8
855 ; CHECK-SD-NEXT: sxth w0, w8
858 ; CHECK-GI-LABEL: pr55484:
859 ; CHECK-GI: // %bb.0:
860 ; CHECK-GI-NEXT: lsl w8, w0, #8
861 ; CHECK-GI-NEXT: orr w8, w8, w0, lsr #8
862 ; CHECK-GI-NEXT: sxth w0, w8
867 %5 = trunc i32 %4 to i16
868 %6 = sext i16 %5 to i32