1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=aarch64--linux-gnu | FileCheck %s --check-prefix=CHECKLE
3 ; RUN: llc < %s -mtriple=aarch64_be--linux-gnu | FileCheck %s --check-prefix=CHECKBE
5 define <16 x i32> @test_shuf1(<16 x i32> %x, <16 x i32> %y) {
6 ; CHECKLE-LABEL: test_shuf1:
8 ; CHECKLE-NEXT: ext v3.16b, v6.16b, v1.16b, #4
9 ; CHECKLE-NEXT: uzp1 v5.4s, v1.4s, v0.4s
10 ; CHECKLE-NEXT: uzp2 v16.4s, v2.4s, v4.4s
11 ; CHECKLE-NEXT: dup v17.4s, v4.s[0]
12 ; CHECKLE-NEXT: trn2 v4.4s, v1.4s, v3.4s
13 ; CHECKLE-NEXT: mov v17.s[0], v6.s[3]
14 ; CHECKLE-NEXT: trn2 v1.4s, v5.4s, v1.4s
15 ; CHECKLE-NEXT: rev64 v3.4s, v7.4s
16 ; CHECKLE-NEXT: trn1 v2.4s, v16.4s, v2.4s
17 ; CHECKLE-NEXT: mov v4.s[0], v7.s[1]
18 ; CHECKLE-NEXT: ext v1.16b, v0.16b, v1.16b, #12
19 ; CHECKLE-NEXT: mov v3.d[0], v17.d[0]
20 ; CHECKLE-NEXT: mov v2.s[3], v7.s[0]
21 ; CHECKLE-NEXT: mov v0.16b, v4.16b
24 ; CHECKBE-LABEL: test_shuf1:
26 ; CHECKBE-NEXT: rev64 v1.4s, v1.4s
27 ; CHECKBE-NEXT: rev64 v3.4s, v6.4s
28 ; CHECKBE-NEXT: rev64 v0.4s, v0.4s
29 ; CHECKBE-NEXT: rev64 v2.4s, v2.4s
30 ; CHECKBE-NEXT: rev64 v4.4s, v4.4s
31 ; CHECKBE-NEXT: rev64 v5.4s, v7.4s
32 ; CHECKBE-NEXT: ext v1.16b, v1.16b, v1.16b, #8
33 ; CHECKBE-NEXT: ext v3.16b, v3.16b, v3.16b, #8
34 ; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
35 ; CHECKBE-NEXT: ext v2.16b, v2.16b, v2.16b, #8
36 ; CHECKBE-NEXT: ext v4.16b, v4.16b, v4.16b, #8
37 ; CHECKBE-NEXT: ext v5.16b, v5.16b, v5.16b, #8
38 ; CHECKBE-NEXT: ext v6.16b, v3.16b, v1.16b, #4
39 ; CHECKBE-NEXT: uzp1 v16.4s, v1.4s, v0.4s
40 ; CHECKBE-NEXT: uzp2 v7.4s, v2.4s, v4.4s
41 ; CHECKBE-NEXT: dup v4.4s, v4.s[0]
42 ; CHECKBE-NEXT: rev64 v17.4s, v5.4s
43 ; CHECKBE-NEXT: trn2 v6.4s, v1.4s, v6.4s
44 ; CHECKBE-NEXT: mov v4.s[0], v3.s[3]
45 ; CHECKBE-NEXT: trn2 v1.4s, v16.4s, v1.4s
46 ; CHECKBE-NEXT: trn1 v2.4s, v7.4s, v2.4s
47 ; CHECKBE-NEXT: rev64 v3.4s, v17.4s
48 ; CHECKBE-NEXT: mov v6.s[0], v5.s[1]
49 ; CHECKBE-NEXT: rev64 v4.4s, v4.4s
50 ; CHECKBE-NEXT: ext v0.16b, v0.16b, v1.16b, #12
51 ; CHECKBE-NEXT: mov v2.s[3], v5.s[0]
52 ; CHECKBE-NEXT: rev64 v1.4s, v6.4s
53 ; CHECKBE-NEXT: mov v3.d[0], v4.d[0]
54 ; CHECKBE-NEXT: rev64 v4.4s, v0.4s
55 ; CHECKBE-NEXT: rev64 v2.4s, v2.4s
56 ; CHECKBE-NEXT: ext v0.16b, v1.16b, v1.16b, #8
57 ; CHECKBE-NEXT: ext v3.16b, v3.16b, v3.16b, #8
58 ; CHECKBE-NEXT: ext v1.16b, v4.16b, v4.16b, #8
59 ; CHECKBE-NEXT: ext v2.16b, v2.16b, v2.16b, #8
61 %s3 = shufflevector <16 x i32> %x, <16 x i32> %y, <16 x i32> <i32 29, i32 26, i32 7, i32 4, i32 3, i32 6, i32 5, i32 2, i32 9, i32 8, i32 17, i32 28, i32 27, i32 16, i32 31, i32 30>
65 define <4 x i32> @test_shuf2(<16 x i32> %x, <16 x i32> %y) {
66 ; CHECKLE-LABEL: test_shuf2:
68 ; CHECKLE-NEXT: zip2 v0.4s, v7.4s, v6.4s
69 ; CHECKLE-NEXT: trn2 v2.4s, v7.4s, v0.4s
70 ; CHECKLE-NEXT: ext v0.16b, v1.16b, v1.16b, #4
71 ; CHECKLE-NEXT: mov v0.d[0], v2.d[0]
74 ; CHECKBE-LABEL: test_shuf2:
76 ; CHECKBE-NEXT: rev64 v0.4s, v6.4s
77 ; CHECKBE-NEXT: rev64 v2.4s, v7.4s
78 ; CHECKBE-NEXT: rev64 v1.4s, v1.4s
79 ; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
80 ; CHECKBE-NEXT: ext v2.16b, v2.16b, v2.16b, #8
81 ; CHECKBE-NEXT: ext v1.16b, v1.16b, v1.16b, #8
82 ; CHECKBE-NEXT: zip2 v0.4s, v2.4s, v0.4s
83 ; CHECKBE-NEXT: ext v1.16b, v1.16b, v1.16b, #4
84 ; CHECKBE-NEXT: trn2 v0.4s, v2.4s, v0.4s
85 ; CHECKBE-NEXT: rev64 v1.4s, v1.4s
86 ; CHECKBE-NEXT: rev64 v0.4s, v0.4s
87 ; CHECKBE-NEXT: mov v1.d[0], v0.d[0]
88 ; CHECKBE-NEXT: ext v0.16b, v1.16b, v1.16b, #8
90 %s3 = shufflevector <16 x i32> %x, <16 x i32> %y, <4 x i32> <i32 29, i32 26, i32 7, i32 4>
94 define <4 x i32> @test_shuf3(<16 x i32> %x, <16 x i32> %y) {
95 ; CHECKLE-LABEL: test_shuf3:
97 ; CHECKLE-NEXT: uzp1 v2.4s, v1.4s, v0.4s
98 ; CHECKLE-NEXT: trn2 v1.4s, v2.4s, v1.4s
99 ; CHECKLE-NEXT: ext v0.16b, v0.16b, v1.16b, #12
102 ; CHECKBE-LABEL: test_shuf3:
104 ; CHECKBE-NEXT: rev64 v0.4s, v0.4s
105 ; CHECKBE-NEXT: rev64 v1.4s, v1.4s
106 ; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
107 ; CHECKBE-NEXT: ext v1.16b, v1.16b, v1.16b, #8
108 ; CHECKBE-NEXT: uzp1 v2.4s, v1.4s, v0.4s
109 ; CHECKBE-NEXT: trn2 v1.4s, v2.4s, v1.4s
110 ; CHECKBE-NEXT: ext v0.16b, v0.16b, v1.16b, #12
111 ; CHECKBE-NEXT: rev64 v0.4s, v0.4s
112 ; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
114 %s3 = shufflevector <16 x i32> %x, <16 x i32> %y, <4 x i32> <i32 3, i32 6, i32 5, i32 2>
118 define <4 x i32> @test_shuf4(<16 x i32> %x, <16 x i32> %y) {
119 ; CHECKLE-LABEL: test_shuf4:
121 ; CHECKLE-NEXT: uzp2 v0.4s, v2.4s, v4.4s
122 ; CHECKLE-NEXT: trn1 v0.4s, v0.4s, v2.4s
123 ; CHECKLE-NEXT: mov v0.s[3], v7.s[0]
126 ; CHECKBE-LABEL: test_shuf4:
128 ; CHECKBE-NEXT: rev64 v0.4s, v4.4s
129 ; CHECKBE-NEXT: rev64 v1.4s, v2.4s
130 ; CHECKBE-NEXT: rev64 v2.4s, v7.4s
131 ; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
132 ; CHECKBE-NEXT: ext v1.16b, v1.16b, v1.16b, #8
133 ; CHECKBE-NEXT: ext v2.16b, v2.16b, v2.16b, #8
134 ; CHECKBE-NEXT: uzp2 v0.4s, v1.4s, v0.4s
135 ; CHECKBE-NEXT: trn1 v0.4s, v0.4s, v1.4s
136 ; CHECKBE-NEXT: mov v0.s[3], v2.s[0]
137 ; CHECKBE-NEXT: rev64 v0.4s, v0.4s
138 ; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
140 %s3 = shufflevector <16 x i32> %x, <16 x i32> %y, <4 x i32> <i32 9, i32 8, i32 17, i32 28>
144 define <4 x i32> @test_shuf5(<16 x i32> %x, <16 x i32> %y) {
145 ; CHECKLE-LABEL: test_shuf5:
147 ; CHECKLE-NEXT: ext v1.16b, v6.16b, v4.16b, #12
148 ; CHECKLE-NEXT: rev64 v0.4s, v7.4s
149 ; CHECKLE-NEXT: mov v0.d[0], v1.d[0]
152 ; CHECKBE-LABEL: test_shuf5:
154 ; CHECKBE-NEXT: rev64 v0.4s, v7.4s
155 ; CHECKBE-NEXT: rev64 v1.4s, v4.4s
156 ; CHECKBE-NEXT: rev64 v2.4s, v6.4s
157 ; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
158 ; CHECKBE-NEXT: ext v1.16b, v1.16b, v1.16b, #8
159 ; CHECKBE-NEXT: ext v2.16b, v2.16b, v2.16b, #8
160 ; CHECKBE-NEXT: ext v1.16b, v2.16b, v1.16b, #12
161 ; CHECKBE-NEXT: rev64 v0.4s, v0.4s
162 ; CHECKBE-NEXT: rev64 v1.4s, v1.4s
163 ; CHECKBE-NEXT: rev64 v0.4s, v0.4s
164 ; CHECKBE-NEXT: mov v0.d[0], v1.d[0]
165 ; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
167 %s3 = shufflevector <16 x i32> %x, <16 x i32> %y, <4 x i32> <i32 27, i32 16, i32 31, i32 30>
171 define <4 x i32> @test1503(<4 x i32> %a, <4 x i32> %b)
172 ; CHECKLE-LABEL: test1503:
174 ; CHECKLE-NEXT: zip1 v1.4s, v0.4s, v1.4s
175 ; CHECKLE-NEXT: ext v1.16b, v1.16b, v0.16b, #8
176 ; CHECKLE-NEXT: mov v1.s[3], v0.s[3]
177 ; CHECKLE-NEXT: mov v0.16b, v1.16b
180 ; CHECKBE-LABEL: test1503:
182 ; CHECKBE-NEXT: rev64 v1.4s, v1.4s
183 ; CHECKBE-NEXT: rev64 v0.4s, v0.4s
184 ; CHECKBE-NEXT: ext v1.16b, v1.16b, v1.16b, #8
185 ; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
186 ; CHECKBE-NEXT: zip1 v1.4s, v0.4s, v1.4s
187 ; CHECKBE-NEXT: ext v1.16b, v1.16b, v0.16b, #8
188 ; CHECKBE-NEXT: mov v1.s[3], v0.s[3]
189 ; CHECKBE-NEXT: rev64 v0.4s, v1.4s
190 ; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
193 %r = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 5, i32 0, i32 3>
197 define <4 x i32> @test4366(<4 x i32> %a, <4 x i32> %b)
198 ; CHECKLE-LABEL: test4366:
200 ; CHECKLE-NEXT: trn1 v1.4s, v1.4s, v1.4s
201 ; CHECKLE-NEXT: mov v1.s[1], v0.s[3]
202 ; CHECKLE-NEXT: mov v0.16b, v1.16b
205 ; CHECKBE-LABEL: test4366:
207 ; CHECKBE-NEXT: rev64 v1.4s, v1.4s
208 ; CHECKBE-NEXT: rev64 v0.4s, v0.4s
209 ; CHECKBE-NEXT: ext v1.16b, v1.16b, v1.16b, #8
210 ; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
211 ; CHECKBE-NEXT: trn1 v1.4s, v1.4s, v1.4s
212 ; CHECKBE-NEXT: mov v1.s[1], v0.s[3]
213 ; CHECKBE-NEXT: rev64 v0.4s, v1.4s
214 ; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
217 %r = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 3, i32 6, i32 6>
221 define <4 x i32> @test7367(<4 x i32> %a, <4 x i32> %b)
222 ; CHECKLE-LABEL: test7367:
224 ; CHECKLE-NEXT: mov v2.16b, v1.16b
225 ; CHECKLE-NEXT: mov v2.d[0], v0.d[1]
226 ; CHECKLE-NEXT: mov v2.s[0], v1.s[3]
227 ; CHECKLE-NEXT: mov v0.16b, v2.16b
230 ; CHECKBE-LABEL: test7367:
232 ; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
233 ; CHECKBE-NEXT: ext v2.16b, v1.16b, v1.16b, #8
234 ; CHECKBE-NEXT: rev64 v1.4s, v1.4s
235 ; CHECKBE-NEXT: mov v2.d[0], v0.d[1]
236 ; CHECKBE-NEXT: ext v0.16b, v1.16b, v1.16b, #8
237 ; CHECKBE-NEXT: rev64 v1.4s, v2.4s
238 ; CHECKBE-NEXT: mov v1.s[0], v0.s[3]
239 ; CHECKBE-NEXT: rev64 v0.4s, v1.4s
240 ; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
243 %r = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 7, i32 3, i32 6, i32 7>
247 define <4 x i32> @test4045(<4 x i32> %a, <4 x i32> %b)
248 ; CHECKLE-LABEL: test4045:
250 ; CHECKLE-NEXT: trn1 v0.4s, v1.4s, v0.4s
251 ; CHECKLE-NEXT: mov v0.d[1], v1.d[0]
254 ; CHECKBE-LABEL: test4045:
256 ; CHECKBE-NEXT: rev64 v0.4s, v0.4s
257 ; CHECKBE-NEXT: rev64 v2.4s, v1.4s
258 ; CHECKBE-NEXT: ext v1.16b, v1.16b, v1.16b, #8
259 ; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
260 ; CHECKBE-NEXT: ext v2.16b, v2.16b, v2.16b, #8
261 ; CHECKBE-NEXT: trn1 v0.4s, v2.4s, v0.4s
262 ; CHECKBE-NEXT: rev64 v0.4s, v0.4s
263 ; CHECKBE-NEXT: mov v0.d[1], v1.d[0]
264 ; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
267 %r = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 0, i32 4, i32 5>
271 define <4 x i32> @test0067(<4 x i32> %a, <4 x i32> %b)
272 ; CHECKLE-LABEL: test0067:
274 ; CHECKLE-NEXT: trn1 v0.4s, v0.4s, v0.4s
275 ; CHECKLE-NEXT: mov v0.d[1], v1.d[1]
278 ; CHECKBE-LABEL: test0067:
280 ; CHECKBE-NEXT: rev64 v0.4s, v0.4s
281 ; CHECKBE-NEXT: ext v1.16b, v1.16b, v1.16b, #8
282 ; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
283 ; CHECKBE-NEXT: trn1 v0.4s, v0.4s, v0.4s
284 ; CHECKBE-NEXT: rev64 v0.4s, v0.4s
285 ; CHECKBE-NEXT: mov v0.d[1], v1.d[1]
286 ; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
289 %r = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 0, i32 6, i32 7>
293 define <4 x i32> @test_shuf6(<4 x i32> %a, <4 x i32> %b)
294 ; CHECKLE-LABEL: test_shuf6:
296 ; CHECKLE-NEXT: mov v0.s[2], v1.s[3]
297 ; CHECKLE-NEXT: trn1 v0.4s, v0.4s, v0.4s
300 ; CHECKBE-LABEL: test_shuf6:
302 ; CHECKBE-NEXT: rev64 v1.4s, v1.4s
303 ; CHECKBE-NEXT: rev64 v0.4s, v0.4s
304 ; CHECKBE-NEXT: ext v1.16b, v1.16b, v1.16b, #8
305 ; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
306 ; CHECKBE-NEXT: mov v0.s[2], v1.s[3]
307 ; CHECKBE-NEXT: trn1 v0.4s, v0.4s, v0.4s
308 ; CHECKBE-NEXT: rev64 v0.4s, v0.4s
309 ; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
312 %r = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 0, i32 7, i32 7>
316 define <4 x i16> @test_shuf7(<4 x i16> %a, <4 x i16> %b)
317 ; CHECKLE-LABEL: test_shuf7:
319 ; CHECKLE-NEXT: // kill: def $d0 killed $d0 def $q0
320 ; CHECKLE-NEXT: // kill: def $d1 killed $d1 def $q1
321 ; CHECKLE-NEXT: mov v0.h[2], v1.h[3]
322 ; CHECKLE-NEXT: trn1 v0.4h, v0.4h, v0.4h
325 ; CHECKBE-LABEL: test_shuf7:
327 ; CHECKBE-NEXT: rev64 v1.4h, v1.4h
328 ; CHECKBE-NEXT: rev64 v0.4h, v0.4h
329 ; CHECKBE-NEXT: mov v0.h[2], v1.h[3]
330 ; CHECKBE-NEXT: trn1 v0.4h, v0.4h, v0.4h
331 ; CHECKBE-NEXT: rev64 v0.4h, v0.4h
334 %r = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 0, i32 7, i32 7>
338 define <8 x i8> @test_shuf8(<8 x i8> %a, <8 x i8> %b)
339 ; CHECKLE-LABEL: test_shuf8:
341 ; CHECKLE-NEXT: // kill: def $d0 killed $d0 def $q0
342 ; CHECKLE-NEXT: // kill: def $d1 killed $d1 def $q1
343 ; CHECKLE-NEXT: adrp x8, .LCPI12_0
344 ; CHECKLE-NEXT: mov v0.d[1], v1.d[0]
345 ; CHECKLE-NEXT: ldr d1, [x8, :lo12:.LCPI12_0]
346 ; CHECKLE-NEXT: tbl v0.8b, { v0.16b }, v1.8b
349 ; CHECKBE-LABEL: test_shuf8:
351 ; CHECKBE-NEXT: rev64 v0.8b, v0.8b
352 ; CHECKBE-NEXT: rev64 v1.8b, v1.8b
353 ; CHECKBE-NEXT: adrp x8, .LCPI12_0
354 ; CHECKBE-NEXT: add x8, x8, :lo12:.LCPI12_0
355 ; CHECKBE-NEXT: mov v0.d[1], v1.d[0]
356 ; CHECKBE-NEXT: ld1 { v1.8b }, [x8]
357 ; CHECKBE-NEXT: tbl v0.8b, { v0.16b }, v1.8b
358 ; CHECKBE-NEXT: rev64 v0.8b, v0.8b
361 %r = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8>
365 define <8 x i16> @test_shuf9(<8 x i16> %a, <8 x i16> %b)
366 ; CHECKLE-LABEL: test_shuf9:
368 ; CHECKLE-NEXT: adrp x8, .LCPI13_0
369 ; CHECKLE-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
370 ; CHECKLE-NEXT: ldr q2, [x8, :lo12:.LCPI13_0]
371 ; CHECKLE-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
372 ; CHECKLE-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
375 ; CHECKBE-LABEL: test_shuf9:
377 ; CHECKBE-NEXT: rev64 v1.16b, v1.16b
378 ; CHECKBE-NEXT: rev64 v0.16b, v0.16b
379 ; CHECKBE-NEXT: adrp x8, .LCPI13_0
380 ; CHECKBE-NEXT: add x8, x8, :lo12:.LCPI13_0
381 ; CHECKBE-NEXT: ext v2.16b, v1.16b, v1.16b, #8
382 ; CHECKBE-NEXT: ext v1.16b, v0.16b, v0.16b, #8
383 ; CHECKBE-NEXT: ld1 { v0.16b }, [x8]
384 ; CHECKBE-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v0.16b
385 ; CHECKBE-NEXT: rev64 v0.16b, v0.16b
386 ; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
389 %r = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8>
393 define <16 x i8> @test_shuf10(<16 x i8> %a, <16 x i8> %b)
394 ; CHECKLE-LABEL: test_shuf10:
396 ; CHECKLE-NEXT: adrp x8, .LCPI14_0
397 ; CHECKLE-NEXT: ldr q1, [x8, :lo12:.LCPI14_0]
398 ; CHECKLE-NEXT: tbl v0.16b, { v0.16b }, v1.16b
401 ; CHECKBE-LABEL: test_shuf10:
403 ; CHECKBE-NEXT: rev64 v0.16b, v0.16b
404 ; CHECKBE-NEXT: adrp x8, .LCPI14_0
405 ; CHECKBE-NEXT: add x8, x8, :lo12:.LCPI14_0
406 ; CHECKBE-NEXT: ld1 { v1.16b }, [x8]
407 ; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
408 ; CHECKBE-NEXT: tbl v0.16b, { v0.16b }, v1.16b
409 ; CHECKBE-NEXT: rev64 v0.16b, v0.16b
410 ; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
413 %r = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8 >
417 define <8 x half> @test_shuf11(<8 x half> %a, <8 x half> %b)
418 ; CHECKLE-LABEL: test_shuf11:
420 ; CHECKLE-NEXT: adrp x8, .LCPI15_0
421 ; CHECKLE-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
422 ; CHECKLE-NEXT: ldr q2, [x8, :lo12:.LCPI15_0]
423 ; CHECKLE-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
424 ; CHECKLE-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
427 ; CHECKBE-LABEL: test_shuf11:
429 ; CHECKBE-NEXT: rev64 v1.16b, v1.16b
430 ; CHECKBE-NEXT: rev64 v0.16b, v0.16b
431 ; CHECKBE-NEXT: adrp x8, .LCPI15_0
432 ; CHECKBE-NEXT: add x8, x8, :lo12:.LCPI15_0
433 ; CHECKBE-NEXT: ext v2.16b, v1.16b, v1.16b, #8
434 ; CHECKBE-NEXT: ext v1.16b, v0.16b, v0.16b, #8
435 ; CHECKBE-NEXT: ld1 { v0.16b }, [x8]
436 ; CHECKBE-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v0.16b
437 ; CHECKBE-NEXT: rev64 v0.16b, v0.16b
438 ; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
441 %r = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8>
445 define <8 x half> @test_shuf12(<8 x half> %a, <8 x half> %b)
446 ; CHECKLE-LABEL: test_shuf12:
448 ; CHECKLE-NEXT: adrp x8, .LCPI16_0
449 ; CHECKLE-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
450 ; CHECKLE-NEXT: ldr q2, [x8, :lo12:.LCPI16_0]
451 ; CHECKLE-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
452 ; CHECKLE-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
455 ; CHECKBE-LABEL: test_shuf12:
457 ; CHECKBE-NEXT: rev64 v1.16b, v1.16b
458 ; CHECKBE-NEXT: rev64 v0.16b, v0.16b
459 ; CHECKBE-NEXT: adrp x8, .LCPI16_0
460 ; CHECKBE-NEXT: add x8, x8, :lo12:.LCPI16_0
461 ; CHECKBE-NEXT: ext v2.16b, v1.16b, v1.16b, #8
462 ; CHECKBE-NEXT: ext v1.16b, v0.16b, v0.16b, #8
463 ; CHECKBE-NEXT: ld1 { v0.16b }, [x8]
464 ; CHECKBE-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v0.16b
465 ; CHECKBE-NEXT: rev64 v0.16b, v0.16b
466 ; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
469 %r = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 1, i32 15>
473 define <8 x half> @test_shuf13(<8 x half> %a, <8 x half> %b)
474 ; CHECKLE-LABEL: test_shuf13:
476 ; CHECKLE-NEXT: adrp x8, .LCPI17_0
477 ; CHECKLE-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
478 ; CHECKLE-NEXT: ldr q2, [x8, :lo12:.LCPI17_0]
479 ; CHECKLE-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
480 ; CHECKLE-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
483 ; CHECKBE-LABEL: test_shuf13:
485 ; CHECKBE-NEXT: rev64 v1.16b, v1.16b
486 ; CHECKBE-NEXT: rev64 v0.16b, v0.16b
487 ; CHECKBE-NEXT: adrp x8, .LCPI17_0
488 ; CHECKBE-NEXT: add x8, x8, :lo12:.LCPI17_0
489 ; CHECKBE-NEXT: ext v2.16b, v1.16b, v1.16b, #8
490 ; CHECKBE-NEXT: ext v1.16b, v0.16b, v0.16b, #8
491 ; CHECKBE-NEXT: ld1 { v0.16b }, [x8]
492 ; CHECKBE-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v0.16b
493 ; CHECKBE-NEXT: rev64 v0.16b, v0.16b
494 ; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
497 %r = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 1, i32 2, i32 0, i32 0, i32 0, i32 8, i32 1, i32 15>
501 define <8 x half> @test_shuf14(<8 x half> %a, <8 x half> %b)
502 ; CHECKLE-LABEL: test_shuf14:
504 ; CHECKLE-NEXT: adrp x8, .LCPI18_0
505 ; CHECKLE-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
506 ; CHECKLE-NEXT: ldr q2, [x8, :lo12:.LCPI18_0]
507 ; CHECKLE-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
508 ; CHECKLE-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
511 ; CHECKBE-LABEL: test_shuf14:
513 ; CHECKBE-NEXT: rev64 v1.16b, v1.16b
514 ; CHECKBE-NEXT: rev64 v0.16b, v0.16b
515 ; CHECKBE-NEXT: adrp x8, .LCPI18_0
516 ; CHECKBE-NEXT: add x8, x8, :lo12:.LCPI18_0
517 ; CHECKBE-NEXT: ext v2.16b, v1.16b, v1.16b, #8
518 ; CHECKBE-NEXT: ext v1.16b, v0.16b, v0.16b, #8
519 ; CHECKBE-NEXT: ld1 { v0.16b }, [x8]
520 ; CHECKBE-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v0.16b
521 ; CHECKBE-NEXT: rev64 v0.16b, v0.16b
522 ; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
525 %r = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 1, i32 2, i32 1, i32 1, i32 0, i32 8, i32 1, i32 15>
529 define <8 x half> @test_shuf15(<8 x half> %a, <8 x half> %b)
530 ; CHECKLE-LABEL: test_shuf15:
532 ; CHECKLE-NEXT: adrp x8, .LCPI19_0
533 ; CHECKLE-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
534 ; CHECKLE-NEXT: ldr q2, [x8, :lo12:.LCPI19_0]
535 ; CHECKLE-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
536 ; CHECKLE-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
539 ; CHECKBE-LABEL: test_shuf15:
541 ; CHECKBE-NEXT: rev64 v1.16b, v1.16b
542 ; CHECKBE-NEXT: rev64 v0.16b, v0.16b
543 ; CHECKBE-NEXT: adrp x8, .LCPI19_0
544 ; CHECKBE-NEXT: add x8, x8, :lo12:.LCPI19_0
545 ; CHECKBE-NEXT: ext v2.16b, v1.16b, v1.16b, #8
546 ; CHECKBE-NEXT: ext v1.16b, v0.16b, v0.16b, #8
547 ; CHECKBE-NEXT: ld1 { v0.16b }, [x8]
548 ; CHECKBE-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v0.16b
549 ; CHECKBE-NEXT: rev64 v0.16b, v0.16b
550 ; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
553 %r = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 1, i32 2, i32 7, i32 2, i32 0, i32 3, i32 2, i32 15>
557 define <4 x i32> @extract_shuffle(<8 x i16> %j, <4 x i16> %k) {
558 ; CHECKLE-LABEL: extract_shuffle:
560 ; CHECKLE-NEXT: ushll2 v0.4s, v0.8h, #3
563 ; CHECKBE-LABEL: extract_shuffle:
565 ; CHECKBE-NEXT: rev64 v0.8h, v0.8h
566 ; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
567 ; CHECKBE-NEXT: ushll2 v0.4s, v0.8h, #3
568 ; CHECKBE-NEXT: rev64 v0.4s, v0.4s
569 ; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
571 %a = shufflevector <8 x i16> %j, <8 x i16> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison>
572 %b = shufflevector <8 x i16> %a, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
573 %c = zext <4 x i16> %b to <4 x i32>
574 %d = shl <4 x i32> %c, <i32 3, i32 3, i32 3, i32 3>