1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2 ; RUN: llc -mtriple=aarch64-none-linux-gnu %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
3 ; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel -global-isel-abort=2 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
5 ; CHECK-GI: warning: Instruction selection used fallback path for shufflevector_v2i1
6 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for shufflevector_v2i1_zeroes
8 ; ===== Legal Vector Types =====
10 define <8 x i8> @shufflevector_v8i8(<8 x i8> %a, <8 x i8> %b) {
11 ; CHECK-SD-LABEL: shufflevector_v8i8:
13 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
14 ; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
15 ; CHECK-SD-NEXT: adrp x8, .LCPI0_0
16 ; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
17 ; CHECK-SD-NEXT: ldr d1, [x8, :lo12:.LCPI0_0]
18 ; CHECK-SD-NEXT: tbl v0.8b, { v0.16b }, v1.8b
21 ; CHECK-GI-LABEL: shufflevector_v8i8:
23 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
24 ; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
25 ; CHECK-GI-NEXT: adrp x8, .LCPI0_0
26 ; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
27 ; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI0_0]
28 ; CHECK-GI-NEXT: tbl v0.16b, { v0.16b }, v1.16b
29 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
31 %c = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 8, i32 10, i32 12, i32 15>
35 define <16 x i8> @shufflevector_v16i8(<16 x i8> %a, <16 x i8> %b) {
36 ; CHECK-SD-LABEL: shufflevector_v16i8:
38 ; CHECK-SD-NEXT: adrp x8, .LCPI1_0
39 ; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
40 ; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI1_0]
41 ; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
42 ; CHECK-SD-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
45 ; CHECK-GI-LABEL: shufflevector_v16i8:
47 ; CHECK-GI-NEXT: adrp x8, .LCPI1_0
48 ; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
49 ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI1_0]
50 ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
51 ; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
53 %c = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 8, i32 10, i32 12, i32 15, i32 2, i32 4, i32 6, i32 8, i32 25, i32 30, i32 31, i32 31>
57 define <4 x i16> @shufflevector_v4i16(<4 x i16> %a, <4 x i16> %b) {
58 ; CHECK-LABEL: shufflevector_v4i16:
60 ; CHECK-NEXT: uzp2 v0.4h, v0.4h, v1.4h
62 %c = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
66 define <8 x i16> @shufflevector_v8i16(<8 x i16> %a, <8 x i16> %b) {
67 ; CHECK-SD-LABEL: shufflevector_v8i16:
69 ; CHECK-SD-NEXT: adrp x8, .LCPI3_0
70 ; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
71 ; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI3_0]
72 ; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
73 ; CHECK-SD-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
76 ; CHECK-GI-LABEL: shufflevector_v8i16:
78 ; CHECK-GI-NEXT: adrp x8, .LCPI3_0
79 ; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
80 ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI3_0]
81 ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
82 ; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
84 %c = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 8, i32 10, i32 12, i32 15>
88 define <2 x i32> @shufflevector_v2i32(<2 x i32> %a, <2 x i32> %b) {
89 ; CHECK-LABEL: shufflevector_v2i32:
91 ; CHECK-NEXT: zip2 v0.2s, v0.2s, v1.2s
93 %c = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
97 define <4 x i32> @shufflevector_v4i32(<4 x i32> %a, <4 x i32> %b) {
98 ; CHECK-LABEL: shufflevector_v4i32:
100 ; CHECK-NEXT: uzp2 v0.4s, v0.4s, v1.4s
102 %c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
106 define <2 x i64> @shufflevector_v2i64(<2 x i64> %a, <2 x i64> %b) {
107 ; CHECK-LABEL: shufflevector_v2i64:
109 ; CHECK-NEXT: zip2 v0.2d, v0.2d, v1.2d
111 %c = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
115 ; ===== Legal Vector Types with Zero Masks =====
117 define <8 x i8> @shufflevector_v8i8_zeroes(<8 x i8> %a, <8 x i8> %b) {
118 ; CHECK-LABEL: shufflevector_v8i8_zeroes:
120 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
121 ; CHECK-NEXT: dup v0.8b, v0.b[0]
123 %c = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
127 define <16 x i8> @shufflevector_v16i8_zeroes(<16 x i8> %a, <16 x i8> %b) {
128 ; CHECK-LABEL: shufflevector_v16i8_zeroes:
130 ; CHECK-NEXT: dup v0.16b, v0.b[0]
132 %c = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
136 define <4 x i16> @shufflevector_v4i16_zeroes(<4 x i16> %a, <4 x i16> %b) {
137 ; CHECK-LABEL: shufflevector_v4i16_zeroes:
139 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
140 ; CHECK-NEXT: dup v0.4h, v0.h[0]
142 %c = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
146 define <8 x i16> @shufflevector_v8i16_zeroes(<8 x i16> %a, <8 x i16> %b) {
147 ; CHECK-LABEL: shufflevector_v8i16_zeroes:
149 ; CHECK-NEXT: dup v0.8h, v0.h[0]
151 %c = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
155 define <2 x i32> @shufflevector_v2i32_zeroes(<2 x i32> %a, <2 x i32> %b) {
156 ; CHECK-LABEL: shufflevector_v2i32_zeroes:
158 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
159 ; CHECK-NEXT: dup v0.2s, v0.s[0]
161 %c = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 0>
165 define <4 x i32> @shufflevector_v4i32_zeroes(<4 x i32> %a, <4 x i32> %b) {
166 ; CHECK-LABEL: shufflevector_v4i32_zeroes:
168 ; CHECK-NEXT: dup v0.4s, v0.s[0]
170 %c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
174 define <2 x i64> @shufflevector_v2i64_zeroes(<2 x i64> %a, <2 x i64> %b) {
175 ; CHECK-LABEL: shufflevector_v2i64_zeroes:
177 ; CHECK-NEXT: dup v0.2d, v0.d[0]
179 %c = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 0>
183 ; ===== Smaller/Larger Width Vectors with Legal Element Sizes =====
185 define <2 x i1> @shufflevector_v2i1(<2 x i1> %a, <2 x i1> %b){
186 ; CHECK-LABEL: shufflevector_v2i1:
188 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
189 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
190 ; CHECK-NEXT: mov v0.s[1], v1.s[1]
191 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
193 %c = shufflevector <2 x i1> %a, <2 x i1> %b, <2 x i32> <i32 0, i32 3>
197 define i32 @shufflevector_v4i8(<4 x i8> %a, <4 x i8> %b){
198 ; CHECK-SD-LABEL: shufflevector_v4i8:
199 ; CHECK-SD: // %bb.0:
200 ; CHECK-SD-NEXT: sub sp, sp, #16
201 ; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
202 ; CHECK-SD-NEXT: ext v0.8b, v1.8b, v0.8b, #6
203 ; CHECK-SD-NEXT: zip1 v1.4h, v1.4h, v0.4h
204 ; CHECK-SD-NEXT: ext v0.8b, v0.8b, v1.8b, #4
205 ; CHECK-SD-NEXT: uzp1 v0.8b, v0.8b, v0.8b
206 ; CHECK-SD-NEXT: fmov w0, s0
207 ; CHECK-SD-NEXT: add sp, sp, #16
210 ; CHECK-GI-LABEL: shufflevector_v4i8:
211 ; CHECK-GI: // %bb.0:
212 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
213 ; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
214 ; CHECK-GI-NEXT: mov h2, v0.h[1]
215 ; CHECK-GI-NEXT: mov h3, v1.h[1]
216 ; CHECK-GI-NEXT: adrp x8, .LCPI15_0
217 ; CHECK-GI-NEXT: mov h4, v0.h[2]
218 ; CHECK-GI-NEXT: mov h5, v0.h[3]
219 ; CHECK-GI-NEXT: mov h6, v1.h[3]
220 ; CHECK-GI-NEXT: mov v0.b[1], v2.b[0]
221 ; CHECK-GI-NEXT: mov h2, v1.h[2]
222 ; CHECK-GI-NEXT: mov v1.b[1], v3.b[0]
223 ; CHECK-GI-NEXT: mov v0.b[2], v4.b[0]
224 ; CHECK-GI-NEXT: mov v1.b[2], v2.b[0]
225 ; CHECK-GI-NEXT: mov v0.b[3], v5.b[0]
226 ; CHECK-GI-NEXT: mov v1.b[3], v6.b[0]
227 ; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
228 ; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI15_0]
229 ; CHECK-GI-NEXT: tbl v0.16b, { v0.16b }, v1.16b
230 ; CHECK-GI-NEXT: fmov w0, s0
232 %c = shufflevector <4 x i8> %a, <4 x i8> %b, <4 x i32> <i32 1, i32 2, i32 4, i32 7>
233 %d = bitcast <4 x i8> %c to i32
237 define <32 x i8> @shufflevector_v32i8(<32 x i8> %a, <32 x i8> %b){
238 ; CHECK-SD-LABEL: shufflevector_v32i8:
239 ; CHECK-SD: // %bb.0:
240 ; CHECK-SD-NEXT: // kill: def $q2 killed $q2 def $q1_q2
241 ; CHECK-SD-NEXT: adrp x8, .LCPI16_0
242 ; CHECK-SD-NEXT: adrp x9, .LCPI16_1
243 ; CHECK-SD-NEXT: mov v1.16b, v0.16b
244 ; CHECK-SD-NEXT: ldr q3, [x8, :lo12:.LCPI16_0]
245 ; CHECK-SD-NEXT: ldr q4, [x9, :lo12:.LCPI16_1]
246 ; CHECK-SD-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v3.16b
247 ; CHECK-SD-NEXT: tbl v1.16b, { v1.16b, v2.16b }, v4.16b
250 ; CHECK-GI-LABEL: shufflevector_v32i8:
251 ; CHECK-GI: // %bb.0:
252 ; CHECK-GI-NEXT: mov v3.16b, v0.16b
253 ; CHECK-GI-NEXT: adrp x8, .LCPI16_1
254 ; CHECK-GI-NEXT: adrp x9, .LCPI16_0
255 ; CHECK-GI-NEXT: mov v4.16b, v2.16b
256 ; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI16_1]
257 ; CHECK-GI-NEXT: ldr q1, [x9, :lo12:.LCPI16_0]
258 ; CHECK-GI-NEXT: tbl v0.16b, { v3.16b, v4.16b }, v0.16b
259 ; CHECK-GI-NEXT: tbl v1.16b, { v3.16b, v4.16b }, v1.16b
261 %c = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 32, i32 32, i32 32, i32 1, i32 32, i32 32, i32 32, i32 2, i32 32, i32 32, i32 32, i32 3, i32 32, i32 32, i32 32, i32 4, i32 32, i32 32, i32 32, i32 5, i32 32, i32 32, i32 32, i32 6, i32 32, i32 32, i32 32, i32 7, i32 32, i32 32, i32 32>
265 define i32 @shufflevector_v2i16(<2 x i16> %a, <2 x i16> %b){
266 ; CHECK-SD-LABEL: shufflevector_v2i16:
267 ; CHECK-SD: // %bb.0:
268 ; CHECK-SD-NEXT: sub sp, sp, #16
269 ; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
270 ; CHECK-SD-NEXT: ext v0.8b, v0.8b, v1.8b, #4
271 ; CHECK-SD-NEXT: mov w8, v0.s[1]
272 ; CHECK-SD-NEXT: fmov w9, s0
273 ; CHECK-SD-NEXT: strh w9, [sp, #12]
274 ; CHECK-SD-NEXT: strh w8, [sp, #14]
275 ; CHECK-SD-NEXT: ldr w0, [sp, #12]
276 ; CHECK-SD-NEXT: add sp, sp, #16
279 ; CHECK-GI-LABEL: shufflevector_v2i16:
280 ; CHECK-GI: // %bb.0:
281 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
282 ; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
283 ; CHECK-GI-NEXT: mov s2, v0.s[1]
284 ; CHECK-GI-NEXT: mov s3, v1.s[1]
285 ; CHECK-GI-NEXT: adrp x8, .LCPI17_0
286 ; CHECK-GI-NEXT: mov v0.h[1], v2.h[0]
287 ; CHECK-GI-NEXT: mov v1.h[1], v3.h[0]
288 ; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
289 ; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI17_0]
290 ; CHECK-GI-NEXT: tbl v0.16b, { v0.16b }, v1.16b
291 ; CHECK-GI-NEXT: fmov w0, s0
293 %c = shufflevector <2 x i16> %a, <2 x i16> %b, <2 x i32> <i32 1, i32 2>
294 %d = bitcast <2 x i16> %c to i32
298 define <16 x i16> @shufflevector_v16i16(<16 x i16> %a, <16 x i16> %b){
299 ; CHECK-SD-LABEL: shufflevector_v16i16:
300 ; CHECK-SD: // %bb.0:
301 ; CHECK-SD-NEXT: // kill: def $q2 killed $q2 def $q1_q2
302 ; CHECK-SD-NEXT: adrp x8, .LCPI18_0
303 ; CHECK-SD-NEXT: adrp x9, .LCPI18_1
304 ; CHECK-SD-NEXT: mov v1.16b, v0.16b
305 ; CHECK-SD-NEXT: ldr q3, [x8, :lo12:.LCPI18_0]
306 ; CHECK-SD-NEXT: ldr q4, [x9, :lo12:.LCPI18_1]
307 ; CHECK-SD-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v3.16b
308 ; CHECK-SD-NEXT: tbl v1.16b, { v1.16b, v2.16b }, v4.16b
311 ; CHECK-GI-LABEL: shufflevector_v16i16:
312 ; CHECK-GI: // %bb.0:
313 ; CHECK-GI-NEXT: mov v3.16b, v0.16b
314 ; CHECK-GI-NEXT: adrp x8, .LCPI18_1
315 ; CHECK-GI-NEXT: adrp x9, .LCPI18_0
316 ; CHECK-GI-NEXT: mov v4.16b, v2.16b
317 ; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI18_1]
318 ; CHECK-GI-NEXT: ldr q1, [x9, :lo12:.LCPI18_0]
319 ; CHECK-GI-NEXT: tbl v0.16b, { v3.16b, v4.16b }, v0.16b
320 ; CHECK-GI-NEXT: tbl v1.16b, { v3.16b, v4.16b }, v1.16b
322 %c = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 16, i32 16, i32 1, i32 16, i32 16, i32 16, i32 1, i32 16, i32 16, i32 16, i32 3, i32 16, i32 16, i32 16>
326 define <1 x i32> @shufflevector_v1i32(<1 x i32> %a, <1 x i32> %b) {
327 ; CHECK-LABEL: shufflevector_v1i32:
329 ; CHECK-NEXT: fmov d0, d1
331 %c = shufflevector <1 x i32> %a, <1 x i32> %b, <1 x i32> <i32 1>
335 define <8 x i32> @shufflevector_v8i32(<8 x i32> %a, <8 x i32> %b) {
336 ; CHECK-SD-LABEL: shufflevector_v8i32:
337 ; CHECK-SD: // %bb.0:
338 ; CHECK-SD-NEXT: uzp1 v2.4s, v2.4s, v3.4s
339 ; CHECK-SD-NEXT: uzp2 v0.4s, v0.4s, v1.4s
340 ; CHECK-SD-NEXT: mov v2.s[3], v3.s[3]
341 ; CHECK-SD-NEXT: mov v1.16b, v2.16b
344 ; CHECK-GI-LABEL: shufflevector_v8i32:
345 ; CHECK-GI: // %bb.0:
346 ; CHECK-GI-NEXT: adrp x8, .LCPI20_0
347 ; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
348 ; CHECK-GI-NEXT: uzp2 v0.4s, v0.4s, v1.4s
349 ; CHECK-GI-NEXT: ldr q4, [x8, :lo12:.LCPI20_0]
350 ; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
351 ; CHECK-GI-NEXT: tbl v1.16b, { v2.16b, v3.16b }, v4.16b
353 %c = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 8, i32 10, i32 12, i32 15>
357 define <4 x i64> @shufflevector_v4i64(<4 x i64> %a, <4 x i64> %b) {
358 ; CHECK-SD-LABEL: shufflevector_v4i64:
359 ; CHECK-SD: // %bb.0:
360 ; CHECK-SD-NEXT: zip2 v2.2d, v2.2d, v3.2d
361 ; CHECK-SD-NEXT: zip2 v0.2d, v0.2d, v1.2d
362 ; CHECK-SD-NEXT: mov v1.16b, v2.16b
365 ; CHECK-GI-LABEL: shufflevector_v4i64:
366 ; CHECK-GI: // %bb.0:
367 ; CHECK-GI-NEXT: zip2 v0.2d, v0.2d, v1.2d
368 ; CHECK-GI-NEXT: zip2 v1.2d, v2.2d, v3.2d
370 %c = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
374 ; ===== Smaller/Larger Width Vectors with Zero Masks =====
376 define <2 x i1> @shufflevector_v2i1_zeroes(<2 x i1> %a, <2 x i1> %b){
377 ; CHECK-LABEL: shufflevector_v2i1_zeroes:
379 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
380 ; CHECK-NEXT: dup v0.2s, v0.s[0]
382 %c = shufflevector <2 x i1> %a, <2 x i1> %b, <2 x i32> <i32 0, i32 0>
386 define i32 @shufflevector_v4i8_zeroes(<4 x i8> %a, <4 x i8> %b){
387 ; CHECK-SD-LABEL: shufflevector_v4i8_zeroes:
388 ; CHECK-SD: // %bb.0:
389 ; CHECK-SD-NEXT: sub sp, sp, #16
390 ; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
391 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
392 ; CHECK-SD-NEXT: dup v0.4h, v0.h[0]
393 ; CHECK-SD-NEXT: uzp1 v0.8b, v0.8b, v0.8b
394 ; CHECK-SD-NEXT: fmov w0, s0
395 ; CHECK-SD-NEXT: add sp, sp, #16
398 ; CHECK-GI-LABEL: shufflevector_v4i8_zeroes:
399 ; CHECK-GI: // %bb.0:
400 ; CHECK-GI-NEXT: fmov w8, s0
401 ; CHECK-GI-NEXT: dup v0.8b, w8
402 ; CHECK-GI-NEXT: fmov w0, s0
404 %c = shufflevector <4 x i8> %a, <4 x i8> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
405 %d = bitcast <4 x i8> %c to i32
409 define <32 x i8> @shufflevector_v32i8_zeroes(<32 x i8> %a, <32 x i8> %b){
410 ; CHECK-LABEL: shufflevector_v32i8_zeroes:
412 ; CHECK-NEXT: dup v0.16b, v0.b[0]
413 ; CHECK-NEXT: mov v1.16b, v0.16b
415 %c = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
419 define i32 @shufflevector_v2i16_zeroes(<2 x i16> %a, <2 x i16> %b){
420 ; CHECK-SD-LABEL: shufflevector_v2i16_zeroes:
421 ; CHECK-SD: // %bb.0:
422 ; CHECK-SD-NEXT: sub sp, sp, #16
423 ; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
424 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
425 ; CHECK-SD-NEXT: dup v1.2s, v0.s[0]
426 ; CHECK-SD-NEXT: fmov w9, s0
427 ; CHECK-SD-NEXT: strh w9, [sp, #12]
428 ; CHECK-SD-NEXT: mov w8, v1.s[1]
429 ; CHECK-SD-NEXT: strh w8, [sp, #14]
430 ; CHECK-SD-NEXT: ldr w0, [sp, #12]
431 ; CHECK-SD-NEXT: add sp, sp, #16
434 ; CHECK-GI-LABEL: shufflevector_v2i16_zeroes:
435 ; CHECK-GI: // %bb.0:
436 ; CHECK-GI-NEXT: fmov w8, s0
437 ; CHECK-GI-NEXT: dup v0.4h, w8
438 ; CHECK-GI-NEXT: fmov w0, s0
440 %c = shufflevector <2 x i16> %a, <2 x i16> %b, <2 x i32> <i32 0, i32 0>
441 %d = bitcast <2 x i16> %c to i32
445 define <16 x i16> @shufflevector_v16i16_zeroes(<16 x i16> %a, <16 x i16> %b){
446 ; CHECK-LABEL: shufflevector_v16i16_zeroes:
448 ; CHECK-NEXT: dup v0.8h, v0.h[0]
449 ; CHECK-NEXT: mov v1.16b, v0.16b
451 %c = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
455 define <1 x i32> @shufflevector_v1i32_zeroes(<1 x i32> %a, <1 x i32> %b) {
456 ; CHECK-LABEL: shufflevector_v1i32_zeroes:
459 %c = shufflevector <1 x i32> %a, <1 x i32> %b, <1 x i32> <i32 0>
463 define <8 x i32> @shufflevector_v8i32_zeroes(<8 x i32> %a, <8 x i32> %b) {
464 ; CHECK-LABEL: shufflevector_v8i32_zeroes:
466 ; CHECK-NEXT: dup v0.4s, v0.s[0]
467 ; CHECK-NEXT: mov v1.16b, v0.16b
469 %c = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
473 define <4 x i64> @shufflevector_v4i64_zeroes(<4 x i64> %a, <4 x i64> %b) {
474 ; CHECK-LABEL: shufflevector_v4i64_zeroes:
476 ; CHECK-NEXT: dup v0.2d, v0.d[0]
477 ; CHECK-NEXT: mov v1.16b, v0.16b
479 %c = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
483 ; ===== Vectors with Non-Pow 2 Widths =====
485 define <3 x i8> @shufflevector_v3i8(<3 x i8> %a, <3 x i8> %b) {
486 ; CHECK-SD-LABEL: shufflevector_v3i8:
487 ; CHECK-SD: // %bb.0:
488 ; CHECK-SD-NEXT: mov w0, w1
489 ; CHECK-SD-NEXT: mov w1, w2
490 ; CHECK-SD-NEXT: mov w2, w4
493 ; CHECK-GI-LABEL: shufflevector_v3i8:
494 ; CHECK-GI: // %bb.0:
495 ; CHECK-GI-NEXT: fmov s0, w0
496 ; CHECK-GI-NEXT: fmov s1, w1
497 ; CHECK-GI-NEXT: adrp x8, .LCPI30_0
498 ; CHECK-GI-NEXT: fmov s2, w3
499 ; CHECK-GI-NEXT: fmov s3, w4
500 ; CHECK-GI-NEXT: mov v0.b[1], v1.b[0]
501 ; CHECK-GI-NEXT: fmov s1, w2
502 ; CHECK-GI-NEXT: mov v2.b[1], v3.b[0]
503 ; CHECK-GI-NEXT: fmov s3, w5
504 ; CHECK-GI-NEXT: mov v0.b[2], v1.b[0]
505 ; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI30_0]
506 ; CHECK-GI-NEXT: mov v2.b[2], v3.b[0]
507 ; CHECK-GI-NEXT: mov v0.d[1], v2.d[0]
508 ; CHECK-GI-NEXT: tbl v0.16b, { v0.16b }, v1.16b
509 ; CHECK-GI-NEXT: mov b1, v0.b[1]
510 ; CHECK-GI-NEXT: mov b2, v0.b[2]
511 ; CHECK-GI-NEXT: fmov w0, s0
512 ; CHECK-GI-NEXT: fmov w1, s1
513 ; CHECK-GI-NEXT: fmov w2, s2
515 %c = shufflevector <3 x i8> %a, <3 x i8> %b, <3 x i32> <i32 1, i32 2, i32 4>
519 define <7 x i8> @shufflevector_v7i8(<7 x i8> %a, <7 x i8> %b) {
520 ; CHECK-SD-LABEL: shufflevector_v7i8:
521 ; CHECK-SD: // %bb.0:
522 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
523 ; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
524 ; CHECK-SD-NEXT: adrp x8, .LCPI31_0
525 ; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
526 ; CHECK-SD-NEXT: ldr d1, [x8, :lo12:.LCPI31_0]
527 ; CHECK-SD-NEXT: tbl v0.8b, { v0.16b }, v1.8b
530 ; CHECK-GI-LABEL: shufflevector_v7i8:
531 ; CHECK-GI: // %bb.0:
532 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
533 ; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
534 ; CHECK-GI-NEXT: adrp x8, .LCPI31_0
535 ; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
536 ; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI31_0]
537 ; CHECK-GI-NEXT: tbl v0.16b, { v0.16b }, v1.16b
538 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
540 %c = shufflevector <7 x i8> %a, <7 x i8> %b, <7 x i32> <i32 1, i32 3, i32 5, i32 7, i32 8, i32 10, i32 12>
544 define <3 x i16> @shufflevector_v3i16(<3 x i16> %a, <3 x i16> %b) {
545 ; CHECK-SD-LABEL: shufflevector_v3i16:
546 ; CHECK-SD: // %bb.0:
547 ; CHECK-SD-NEXT: zip1 v1.4h, v0.4h, v1.4h
548 ; CHECK-SD-NEXT: zip2 v0.4h, v1.4h, v0.4h
551 ; CHECK-GI-LABEL: shufflevector_v3i16:
552 ; CHECK-GI: // %bb.0:
553 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
554 ; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
555 ; CHECK-GI-NEXT: adrp x8, .LCPI32_0
556 ; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
557 ; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI32_0]
558 ; CHECK-GI-NEXT: tbl v0.16b, { v0.16b }, v1.16b
559 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
561 %c = shufflevector <3 x i16> %a, <3 x i16> %b, <3 x i32> <i32 1, i32 2, i32 4>
565 define <7 x i16> @shufflevector_v7i16(<7 x i16> %a, <7 x i16> %b) {
566 ; CHECK-SD-LABEL: shufflevector_v7i16:
567 ; CHECK-SD: // %bb.0:
568 ; CHECK-SD-NEXT: adrp x8, .LCPI33_0
569 ; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
570 ; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI33_0]
571 ; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
572 ; CHECK-SD-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
575 ; CHECK-GI-LABEL: shufflevector_v7i16:
576 ; CHECK-GI: // %bb.0:
577 ; CHECK-GI-NEXT: adrp x8, .LCPI33_0
578 ; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
579 ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI33_0]
580 ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
581 ; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
583 %c = shufflevector <7 x i16> %a, <7 x i16> %b, <7 x i32> <i32 1, i32 3, i32 5, i32 7, i32 8, i32 10, i32 12>
587 define <3 x i32> @shufflevector_v3i32(<3 x i32> %a, <3 x i32> %b) {
588 ; CHECK-SD-LABEL: shufflevector_v3i32:
589 ; CHECK-SD: // %bb.0:
590 ; CHECK-SD-NEXT: zip1 v1.4s, v0.4s, v1.4s
591 ; CHECK-SD-NEXT: zip2 v0.4s, v1.4s, v0.4s
594 ; CHECK-GI-LABEL: shufflevector_v3i32:
595 ; CHECK-GI: // %bb.0:
596 ; CHECK-GI-NEXT: adrp x8, .LCPI34_0
597 ; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
598 ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI34_0]
599 ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
600 ; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
602 %c = shufflevector <3 x i32> %a, <3 x i32> %b, <3 x i32> <i32 1, i32 2, i32 4>
606 ; ===== Vectors with Non-Pow 2 Widths with Zero Masks =====
608 define <3 x i8> @shufflevector_v3i8_zeroes(<3 x i8> %a, <3 x i8> %b) {
609 ; CHECK-SD-LABEL: shufflevector_v3i8_zeroes:
610 ; CHECK-SD: // %bb.0:
611 ; CHECK-SD-NEXT: mov w1, w0
612 ; CHECK-SD-NEXT: mov w2, w0
615 ; CHECK-GI-LABEL: shufflevector_v3i8_zeroes:
616 ; CHECK-GI: // %bb.0:
617 ; CHECK-GI-NEXT: dup v0.8b, w0
618 ; CHECK-GI-NEXT: mov b1, v0.b[1]
619 ; CHECK-GI-NEXT: mov b2, v0.b[2]
620 ; CHECK-GI-NEXT: fmov w0, s0
621 ; CHECK-GI-NEXT: fmov w1, s1
622 ; CHECK-GI-NEXT: fmov w2, s2
624 %c = shufflevector <3 x i8> %a, <3 x i8> %b, <3 x i32> <i32 0, i32 0, i32 0>
628 define <7 x i8> @shufflevector_v7i8_zeroes(<7 x i8> %a, <7 x i8> %b) {
629 ; CHECK-LABEL: shufflevector_v7i8_zeroes:
631 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
632 ; CHECK-NEXT: dup v0.8b, v0.b[0]
634 %c = shufflevector <7 x i8> %a, <7 x i8> %b, <7 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
638 define <3 x i16> @shufflevector_v3i16_zeroes(<3 x i16> %a, <3 x i16> %b) {
639 ; CHECK-LABEL: shufflevector_v3i16_zeroes:
641 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
642 ; CHECK-NEXT: dup v0.4h, v0.h[0]
644 %c = shufflevector <3 x i16> %a, <3 x i16> %b, <3 x i32> <i32 0, i32 0, i32 0>
648 define <7 x i16> @shufflevector_v7i16_zeroes(<7 x i16> %a, <7 x i16> %b) {
649 ; CHECK-LABEL: shufflevector_v7i16_zeroes:
651 ; CHECK-NEXT: dup v0.8h, v0.h[0]
653 %c = shufflevector <7 x i16> %a, <7 x i16> %b, <7 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
657 define <3 x i32> @shufflevector_v3i32_zeroes(<3 x i32> %a, <3 x i32> %b) {
658 ; CHECK-LABEL: shufflevector_v3i32_zeroes:
660 ; CHECK-NEXT: dup v0.4s, v0.s[0]
662 %c = shufflevector <3 x i32> %a, <3 x i32> %b, <3 x i32> <i32 0, i32 0, i32 0>