1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s --check-prefixes=CHECK,CHECK-SD
3 ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
5 ; CHECK-GI: warning: Instruction selection used fallback path for shuffle_zip1
6 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for shuffle_zip2
7 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for shuffle_zip3
9 define <8 x i8> @vzipi8(ptr %A, ptr %B) nounwind {
10 ; CHECK-LABEL: vzipi8:
12 ; CHECK-NEXT: ldr d0, [x0]
13 ; CHECK-NEXT: ldr d1, [x1]
14 ; CHECK-NEXT: zip1.8b v2, v0, v1
15 ; CHECK-NEXT: zip2.8b v0, v0, v1
16 ; CHECK-NEXT: add.8b v0, v2, v0
18 %tmp1 = load <8 x i8>, ptr %A
19 %tmp2 = load <8 x i8>, ptr %B
20 %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
21 %tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
22 %tmp5 = add <8 x i8> %tmp3, %tmp4
26 define <4 x i16> @vzipi16(ptr %A, ptr %B) nounwind {
27 ; CHECK-LABEL: vzipi16:
29 ; CHECK-NEXT: ldr d0, [x0]
30 ; CHECK-NEXT: ldr d1, [x1]
31 ; CHECK-NEXT: zip1.4h v2, v0, v1
32 ; CHECK-NEXT: zip2.4h v0, v0, v1
33 ; CHECK-NEXT: add.4h v0, v2, v0
35 %tmp1 = load <4 x i16>, ptr %A
36 %tmp2 = load <4 x i16>, ptr %B
37 %tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
38 %tmp4 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
39 %tmp5 = add <4 x i16> %tmp3, %tmp4
43 define <16 x i8> @vzipQi8(ptr %A, ptr %B) nounwind {
44 ; CHECK-LABEL: vzipQi8:
46 ; CHECK-NEXT: ldr q0, [x0]
47 ; CHECK-NEXT: ldr q1, [x1]
48 ; CHECK-NEXT: zip1.16b v2, v0, v1
49 ; CHECK-NEXT: zip2.16b v0, v0, v1
50 ; CHECK-NEXT: add.16b v0, v2, v0
52 %tmp1 = load <16 x i8>, ptr %A
53 %tmp2 = load <16 x i8>, ptr %B
54 %tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
55 %tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
56 %tmp5 = add <16 x i8> %tmp3, %tmp4
60 define <8 x i16> @vzipQi16(ptr %A, ptr %B) nounwind {
61 ; CHECK-LABEL: vzipQi16:
63 ; CHECK-NEXT: ldr q0, [x0]
64 ; CHECK-NEXT: ldr q1, [x1]
65 ; CHECK-NEXT: zip1.8h v2, v0, v1
66 ; CHECK-NEXT: zip2.8h v0, v0, v1
67 ; CHECK-NEXT: add.8h v0, v2, v0
69 %tmp1 = load <8 x i16>, ptr %A
70 %tmp2 = load <8 x i16>, ptr %B
71 %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
72 %tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
73 %tmp5 = add <8 x i16> %tmp3, %tmp4
77 define <4 x i32> @vzipQi32(ptr %A, ptr %B) nounwind {
78 ; CHECK-LABEL: vzipQi32:
80 ; CHECK-NEXT: ldr q0, [x0]
81 ; CHECK-NEXT: ldr q1, [x1]
82 ; CHECK-NEXT: zip1.4s v2, v0, v1
83 ; CHECK-NEXT: zip2.4s v0, v0, v1
84 ; CHECK-NEXT: add.4s v0, v2, v0
86 %tmp1 = load <4 x i32>, ptr %A
87 %tmp2 = load <4 x i32>, ptr %B
88 %tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
89 %tmp4 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
90 %tmp5 = add <4 x i32> %tmp3, %tmp4
94 define <4 x float> @vzipQf(ptr %A, ptr %B) nounwind {
95 ; CHECK-LABEL: vzipQf:
97 ; CHECK-NEXT: ldr q0, [x0]
98 ; CHECK-NEXT: ldr q1, [x1]
99 ; CHECK-NEXT: zip1.4s v2, v0, v1
100 ; CHECK-NEXT: zip2.4s v0, v0, v1
101 ; CHECK-NEXT: fadd.4s v0, v2, v0
103 %tmp1 = load <4 x float>, ptr %A
104 %tmp2 = load <4 x float>, ptr %B
105 %tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
106 %tmp4 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
107 %tmp5 = fadd <4 x float> %tmp3, %tmp4
108 ret <4 x float> %tmp5
111 ; Undef shuffle indices should not prevent matching to VZIP:
113 define <8 x i8> @vzipi8_undef(ptr %A, ptr %B) nounwind {
114 ; CHECK-LABEL: vzipi8_undef:
116 ; CHECK-NEXT: ldr d0, [x0]
117 ; CHECK-NEXT: ldr d1, [x1]
118 ; CHECK-NEXT: zip1.8b v2, v0, v1
119 ; CHECK-NEXT: zip2.8b v0, v0, v1
120 ; CHECK-NEXT: add.8b v0, v2, v0
122 %tmp1 = load <8 x i8>, ptr %A
123 %tmp2 = load <8 x i8>, ptr %B
124 %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 undef, i32 1, i32 9, i32 undef, i32 10, i32 3, i32 11>
125 %tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 undef, i32 undef, i32 15>
126 %tmp5 = add <8 x i8> %tmp3, %tmp4
130 define <16 x i8> @vzipQi8_undef(ptr %A, ptr %B) nounwind {
131 ; CHECK-LABEL: vzipQi8_undef:
133 ; CHECK-NEXT: ldr q0, [x0]
134 ; CHECK-NEXT: ldr q1, [x1]
135 ; CHECK-NEXT: zip1.16b v2, v0, v1
136 ; CHECK-NEXT: zip2.16b v0, v0, v1
137 ; CHECK-NEXT: add.16b v0, v2, v0
139 %tmp1 = load <16 x i8>, ptr %A
140 %tmp2 = load <16 x i8>, ptr %B
141 %tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 16, i32 1, i32 undef, i32 undef, i32 undef, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
142 %tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 8, i32 24, i32 9, i32 undef, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 undef, i32 14, i32 30, i32 undef, i32 31>
143 %tmp5 = add <16 x i8> %tmp3, %tmp4
147 define <8 x i16> @vzip1_undef_01(<8 x i16> %A, <8 x i16> %B) nounwind {
148 ; CHECK-LABEL: vzip1_undef_01:
150 ; CHECK-NEXT: zip1.8h v0, v0, v1
152 %s = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 undef, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
156 define <8 x i16> @vzip1_undef_0(<8 x i16> %A, <8 x i16> %B) nounwind {
157 ; CHECK-LABEL: vzip1_undef_0:
159 ; CHECK-NEXT: zip1.8h v0, v0, v1
161 %s = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
165 define <8 x i16> @vzip1_undef_1(<8 x i16> %A, <8 x i16> %B) nounwind {
166 ; CHECK-LABEL: vzip1_undef_1:
168 ; CHECK-NEXT: zip1.8h v0, v0, v1
170 %s = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 0, i32 undef, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
174 define <8 x i16> @vzip1_undef_012(<8 x i16> %A, <8 x i16> %B) nounwind {
175 ; CHECK-LABEL: vzip1_undef_012:
177 ; CHECK-NEXT: zip1.8h v0, v0, v1
179 %s = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 9, i32 2, i32 10, i32 3, i32 11>
183 define <8 x i16> @vzip2_undef_01(<8 x i16> %A, <8 x i16> %B) nounwind {
184 ; CHECK-LABEL: vzip2_undef_01:
186 ; CHECK-NEXT: zip2.8h v0, v0, v1
188 %s = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 undef, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
192 define <8 x i16> @vzip2_undef_0(<8 x i16> %A, <8 x i16> %B) nounwind {
193 ; CHECK-LABEL: vzip2_undef_0:
195 ; CHECK-NEXT: zip2.8h v0, v0, v1
197 %s = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
201 define <8 x i16> @vzip2_undef_1(<8 x i16> %A, <8 x i16> %B) nounwind {
202 ; CHECK-LABEL: vzip2_undef_1:
204 ; CHECK-NEXT: zip2.8h v0, v0, v1
206 %s = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 4, i32 undef, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
210 define <8 x i16> @vzip2_undef_012(<8 x i16> %A, <8 x i16> %B) nounwind {
211 ; CHECK-LABEL: vzip2_undef_012:
213 ; CHECK-NEXT: zip2.8h v0, v0, v1
215 %s = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 13, i32 6, i32 14, i32 7, i32 15>
219 define <16 x i8> @combine_v16i8(<8 x i8> %0, <8 x i8> %1) {
220 ; CHECK-LABEL: combine_v16i8:
222 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
223 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
224 ; CHECK-NEXT: zip1.16b v0, v0, v1
226 %3 = shufflevector <8 x i8> %0, <8 x i8> %1, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
230 define <16 x i8> @combine2_v16i8(<8 x i8> %0, <8 x i8> %1) {
231 ; CHECK-SD-LABEL: combine2_v16i8:
232 ; CHECK-SD: // %bb.0:
233 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
234 ; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
235 ; CHECK-SD-NEXT: zip1.16b v0, v0, v1
238 ; CHECK-GI-LABEL: combine2_v16i8:
239 ; CHECK-GI: // %bb.0:
240 ; CHECK-GI-NEXT: zip1.8b v2, v0, v1
241 ; CHECK-GI-NEXT: zip2.8b v0, v0, v1
242 ; CHECK-GI-NEXT: mov.d v2[1], v0[0]
243 ; CHECK-GI-NEXT: mov.16b v0, v2
245 %3 = shufflevector <8 x i8> %0, <8 x i8> %1, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
246 %4 = shufflevector <8 x i8> %0, <8 x i8> %1, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
247 %5 = shufflevector <8 x i8> %3, <8 x i8> %4, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
251 define <8 x i16> @combine_v8i16(<4 x i16> %0, <4 x i16> %1) {
252 ; CHECK-LABEL: combine_v8i16:
254 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
255 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
256 ; CHECK-NEXT: zip1.8h v0, v0, v1
258 %3 = shufflevector <4 x i16> %0, <4 x i16> %1, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
262 define <8 x i16> @combine2_v8i16(<4 x i16> %0, <4 x i16> %1) {
263 ; CHECK-SD-LABEL: combine2_v8i16:
264 ; CHECK-SD: // %bb.0:
265 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
266 ; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
267 ; CHECK-SD-NEXT: zip1.8h v0, v0, v1
270 ; CHECK-GI-LABEL: combine2_v8i16:
271 ; CHECK-GI: // %bb.0:
272 ; CHECK-GI-NEXT: zip1.4h v2, v0, v1
273 ; CHECK-GI-NEXT: zip2.4h v0, v0, v1
274 ; CHECK-GI-NEXT: mov.d v2[1], v0[0]
275 ; CHECK-GI-NEXT: mov.16b v0, v2
277 %3 = shufflevector <4 x i16> %0, <4 x i16> %1, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
278 %4 = shufflevector <4 x i16> %0, <4 x i16> %1, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
279 %5 = shufflevector <4 x i16> %3, <4 x i16> %4, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
283 define <4 x i32> @combine_v4i32(<2 x i32> %0, <2 x i32> %1) {
284 ; CHECK-LABEL: combine_v4i32:
286 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
287 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
288 ; CHECK-NEXT: zip1.4s v0, v0, v1
290 %3 = shufflevector <2 x i32> %0, <2 x i32> %1, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
294 define <4 x i32> @combine2_v4i32(<2 x i32> %0, <2 x i32> %1) {
295 ; CHECK-SD-LABEL: combine2_v4i32:
296 ; CHECK-SD: // %bb.0:
297 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
298 ; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
299 ; CHECK-SD-NEXT: zip1.4s v0, v0, v1
302 ; CHECK-GI-LABEL: combine2_v4i32:
303 ; CHECK-GI: // %bb.0:
304 ; CHECK-GI-NEXT: zip1.2s v2, v0, v1
305 ; CHECK-GI-NEXT: zip2.2s v0, v0, v1
306 ; CHECK-GI-NEXT: mov.d v2[1], v0[0]
307 ; CHECK-GI-NEXT: mov.16b v0, v2
309 %3 = shufflevector <2 x i32> %0, <2 x i32> %1, <2 x i32> <i32 0, i32 2>
310 %4 = shufflevector <2 x i32> %0, <2 x i32> %1, <2 x i32> <i32 1, i32 3>
311 %5 = shufflevector <2 x i32> %3, <2 x i32> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
315 define <16 x i8> @combine_v16i8_undef(<8 x i8> %0, <8 x i8> %1) {
316 ; CHECK-LABEL: combine_v16i8_undef:
318 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
319 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
320 ; CHECK-NEXT: zip1.16b v0, v0, v1
322 %3 = shufflevector <8 x i8> %0, <8 x i8> %1, <16 x i32> <i32 0, i32 undef, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
326 define <16 x i8> @combine2_v16i8_undef(<8 x i8> %0, <8 x i8> %1) {
327 ; CHECK-SD-LABEL: combine2_v16i8_undef:
328 ; CHECK-SD: // %bb.0:
329 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
330 ; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
331 ; CHECK-SD-NEXT: zip1.16b v0, v0, v1
334 ; CHECK-GI-LABEL: combine2_v16i8_undef:
335 ; CHECK-GI: // %bb.0:
336 ; CHECK-GI-NEXT: zip1.8b v2, v0, v1
337 ; CHECK-GI-NEXT: zip2.8b v0, v0, v1
338 ; CHECK-GI-NEXT: mov.d v2[1], v0[0]
339 ; CHECK-GI-NEXT: mov.16b v0, v2
341 %3 = shufflevector <8 x i8> %0, <8 x i8> %1, <8 x i32> <i32 0, i32 undef, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
342 %4 = shufflevector <8 x i8> %0, <8 x i8> %1, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
343 %5 = shufflevector <8 x i8> %3, <8 x i8> %4, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
347 define <8 x i16> @combine_v8i16_undef(<4 x i16> %0, <4 x i16> %1) {
348 ; CHECK-LABEL: combine_v8i16_undef:
350 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
351 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
352 ; CHECK-NEXT: zip1.8h v0, v0, v1
354 %3 = shufflevector <4 x i16> %0, <4 x i16> %1, <8 x i32> <i32 0, i32 undef, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
358 ; FIXME: This could be zip1 too, 8,0,9,1... pattern is handled
359 define <16 x i8> @combine_v8i16_8first(<8 x i8> %0, <8 x i8> %1) {
360 ; CHECK-SD-LABEL: combine_v8i16_8first:
361 ; CHECK-SD: // %bb.0:
362 ; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1_q2
363 ; CHECK-SD-NEXT: adrp x8, .LCPI25_0
364 ; CHECK-SD-NEXT: fmov d2, d0
365 ; CHECK-SD-NEXT: ldr q3, [x8, :lo12:.LCPI25_0]
366 ; CHECK-SD-NEXT: tbl.16b v0, { v1, v2 }, v3
369 ; CHECK-GI-LABEL: combine_v8i16_8first:
370 ; CHECK-GI: // %bb.0:
371 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q31_q0
372 ; CHECK-GI-NEXT: adrp x8, .LCPI25_0
373 ; CHECK-GI-NEXT: fmov d31, d1
374 ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI25_0]
375 ; CHECK-GI-NEXT: tbl.16b v0, { v31, v0 }, v2
377 %3 = shufflevector <8 x i8> %1, <8 x i8> %0, <16 x i32> <i32 8, i32 0, i32 9, i32 1, i32 10, i32 2, i32 11, i32 3, i32 12, i32 4, i32 13, i32 5, i32 14, i32 6, i32 15, i32 7>
382 ; FIXME: This could be zip1 too, 8,0,9,1... pattern is handled
383 define <16 x i8> @combine_v8i16_8firstundef(<8 x i8> %0, <8 x i8> %1) {
384 ; CHECK-SD-LABEL: combine_v8i16_8firstundef:
385 ; CHECK-SD: // %bb.0:
386 ; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1_q2
387 ; CHECK-SD-NEXT: adrp x8, .LCPI26_0
388 ; CHECK-SD-NEXT: fmov d2, d0
389 ; CHECK-SD-NEXT: ldr q3, [x8, :lo12:.LCPI26_0]
390 ; CHECK-SD-NEXT: tbl.16b v0, { v1, v2 }, v3
393 ; CHECK-GI-LABEL: combine_v8i16_8firstundef:
394 ; CHECK-GI: // %bb.0:
395 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q31_q0
396 ; CHECK-GI-NEXT: adrp x8, .LCPI26_0
397 ; CHECK-GI-NEXT: fmov d31, d1
398 ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI26_0]
399 ; CHECK-GI-NEXT: tbl.16b v0, { v31, v0 }, v2
401 %3 = shufflevector <8 x i8> %1, <8 x i8> %0, <16 x i32> <i32 8, i32 0, i32 9, i32 1, i32 10, i32 2, i32 11, i32 3, i32 12, i32 4, i32 13, i32 5, i32 14, i32 6, i32 15, i32 undef>
405 define <4 x float> @shuffle_zip1(<4 x float> %arg) {
406 ; CHECK-LABEL: shuffle_zip1:
407 ; CHECK: // %bb.0: // %bb
408 ; CHECK-NEXT: movi.2d v1, #0000000000000000
409 ; CHECK-NEXT: fcmgt.4s v0, v0, v1
410 ; CHECK-NEXT: uzp1.8h v1, v0, v0
411 ; CHECK-NEXT: xtn.4h v0, v0
412 ; CHECK-NEXT: xtn.4h v1, v1
413 ; CHECK-NEXT: zip2.4h v0, v0, v1
414 ; CHECK-NEXT: fmov.4s v1, #1.00000000
415 ; CHECK-NEXT: zip1.4h v0, v0, v0
416 ; CHECK-NEXT: sshll.4s v0, v0, #0
417 ; CHECK-NEXT: and.16b v0, v1, v0
420 %inst = fcmp olt <4 x float> zeroinitializer, %arg
421 %inst1 = shufflevector <4 x i1> %inst, <4 x i1> zeroinitializer, <2 x i32> <i32 2, i32 0>
422 %inst2 = shufflevector <2 x i1> %inst1, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
423 %inst3 = select <4 x i1> %inst2, <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float> zeroinitializer
424 ret <4 x float> %inst3
427 define <4 x i32> @shuffle_zip2(<4 x i32> %arg) {
428 ; CHECK-LABEL: shuffle_zip2:
429 ; CHECK: // %bb.0: // %bb
430 ; CHECK-NEXT: cmtst.4s v0, v0, v0
431 ; CHECK-NEXT: uzp1.8h v1, v0, v0
432 ; CHECK-NEXT: xtn.4h v0, v0
433 ; CHECK-NEXT: xtn.4h v1, v1
434 ; CHECK-NEXT: zip2.4h v0, v0, v1
435 ; CHECK-NEXT: movi.4s v1, #1
436 ; CHECK-NEXT: zip1.4h v0, v0, v0
437 ; CHECK-NEXT: ushll.4s v0, v0, #0
438 ; CHECK-NEXT: and.16b v0, v0, v1
441 %inst = icmp ult <4 x i32> zeroinitializer, %arg
442 %inst1 = shufflevector <4 x i1> %inst, <4 x i1> zeroinitializer, <2 x i32> <i32 2, i32 0>
443 %inst2 = shufflevector <2 x i1> %inst1, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
444 %inst3 = select <4 x i1> %inst2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>, <4 x i32> zeroinitializer
448 define <4 x i32> @shuffle_zip3(<4 x i32> %arg) {
449 ; CHECK-LABEL: shuffle_zip3:
450 ; CHECK: // %bb.0: // %bb
451 ; CHECK-NEXT: cmgt.4s v0, v0, #0
452 ; CHECK-NEXT: uzp1.8h v1, v0, v0
453 ; CHECK-NEXT: xtn.4h v0, v0
454 ; CHECK-NEXT: xtn.4h v1, v1
455 ; CHECK-NEXT: zip2.4h v0, v0, v1
456 ; CHECK-NEXT: movi.4s v1, #1
457 ; CHECK-NEXT: zip1.4h v0, v0, v0
458 ; CHECK-NEXT: sshll.4s v0, v0, #0
459 ; CHECK-NEXT: and.16b v0, v0, v1
462 %inst = icmp slt <4 x i32> zeroinitializer, %arg
463 %inst1 = shufflevector <4 x i1> %inst, <4 x i1> zeroinitializer, <2 x i32> <i32 2, i32 0>
464 %inst2 = shufflevector <2 x i1> %inst1, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
465 %inst3 = select <4 x i1> %inst2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>, <4 x i32> zeroinitializer