1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
4 define <16 x i8> @ins16bw(<16 x i8> %tmp1, i8 %tmp2) {
5 ; CHECK-LABEL: ins16bw:
7 ; CHECK-NEXT: mov v0.b[15], w0
9 %tmp3 = insertelement <16 x i8> %tmp1, i8 %tmp2, i32 15
13 define <8 x i16> @ins8hw(<8 x i16> %tmp1, i16 %tmp2) {
14 ; CHECK-LABEL: ins8hw:
16 ; CHECK-NEXT: mov v0.h[6], w0
18 %tmp3 = insertelement <8 x i16> %tmp1, i16 %tmp2, i32 6
22 define <4 x i32> @ins4sw(<4 x i32> %tmp1, i32 %tmp2) {
23 ; CHECK-LABEL: ins4sw:
25 ; CHECK-NEXT: mov v0.s[2], w0
27 %tmp3 = insertelement <4 x i32> %tmp1, i32 %tmp2, i32 2
31 define <2 x i64> @ins2dw(<2 x i64> %tmp1, i64 %tmp2) {
32 ; CHECK-LABEL: ins2dw:
34 ; CHECK-NEXT: mov v0.d[1], x0
36 %tmp3 = insertelement <2 x i64> %tmp1, i64 %tmp2, i32 1
40 define <8 x i8> @ins8bw(<8 x i8> %tmp1, i8 %tmp2) {
41 ; CHECK-LABEL: ins8bw:
43 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
44 ; CHECK-NEXT: mov v0.b[5], w0
45 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
47 %tmp3 = insertelement <8 x i8> %tmp1, i8 %tmp2, i32 5
51 define <4 x i16> @ins4hw(<4 x i16> %tmp1, i16 %tmp2) {
52 ; CHECK-LABEL: ins4hw:
54 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
55 ; CHECK-NEXT: mov v0.h[3], w0
56 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
58 %tmp3 = insertelement <4 x i16> %tmp1, i16 %tmp2, i32 3
62 define <2 x i32> @ins2sw(<2 x i32> %tmp1, i32 %tmp2) {
63 ; CHECK-LABEL: ins2sw:
65 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
66 ; CHECK-NEXT: mov v0.s[1], w0
67 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
69 %tmp3 = insertelement <2 x i32> %tmp1, i32 %tmp2, i32 1
73 define <16 x i8> @ins16b16(<16 x i8> %tmp1, <16 x i8> %tmp2) {
74 ; CHECK-LABEL: ins16b16:
76 ; CHECK-NEXT: mov v1.b[15], v0.b[2]
77 ; CHECK-NEXT: mov v0.16b, v1.16b
79 %tmp3 = extractelement <16 x i8> %tmp1, i32 2
80 %tmp4 = insertelement <16 x i8> %tmp2, i8 %tmp3, i32 15
84 define <8 x i16> @ins8h8(<8 x i16> %tmp1, <8 x i16> %tmp2) {
85 ; CHECK-LABEL: ins8h8:
87 ; CHECK-NEXT: mov v1.h[7], v0.h[2]
88 ; CHECK-NEXT: mov v0.16b, v1.16b
90 %tmp3 = extractelement <8 x i16> %tmp1, i32 2
91 %tmp4 = insertelement <8 x i16> %tmp2, i16 %tmp3, i32 7
95 define <4 x i32> @ins4s4(<4 x i32> %tmp1, <4 x i32> %tmp2) {
96 ; CHECK-LABEL: ins4s4:
98 ; CHECK-NEXT: mov v1.s[1], v0.s[2]
99 ; CHECK-NEXT: mov v0.16b, v1.16b
101 %tmp3 = extractelement <4 x i32> %tmp1, i32 2
102 %tmp4 = insertelement <4 x i32> %tmp2, i32 %tmp3, i32 1
106 define <2 x i64> @ins2d2(<2 x i64> %tmp1, <2 x i64> %tmp2) {
107 ; CHECK-LABEL: ins2d2:
109 ; CHECK-NEXT: mov v1.d[1], v0.d[0]
110 ; CHECK-NEXT: mov v0.16b, v1.16b
112 %tmp3 = extractelement <2 x i64> %tmp1, i32 0
113 %tmp4 = insertelement <2 x i64> %tmp2, i64 %tmp3, i32 1
117 define <4 x float> @ins4f4(<4 x float> %tmp1, <4 x float> %tmp2) {
118 ; CHECK-LABEL: ins4f4:
120 ; CHECK-NEXT: mov v1.s[1], v0.s[2]
121 ; CHECK-NEXT: mov v0.16b, v1.16b
123 %tmp3 = extractelement <4 x float> %tmp1, i32 2
124 %tmp4 = insertelement <4 x float> %tmp2, float %tmp3, i32 1
125 ret <4 x float> %tmp4
128 define <2 x double> @ins2df2(<2 x double> %tmp1, <2 x double> %tmp2) {
129 ; CHECK-LABEL: ins2df2:
131 ; CHECK-NEXT: mov v1.d[1], v0.d[0]
132 ; CHECK-NEXT: mov v0.16b, v1.16b
134 %tmp3 = extractelement <2 x double> %tmp1, i32 0
135 %tmp4 = insertelement <2 x double> %tmp2, double %tmp3, i32 1
136 ret <2 x double> %tmp4
139 define <16 x i8> @ins8b16(<8 x i8> %tmp1, <16 x i8> %tmp2) {
140 ; CHECK-LABEL: ins8b16:
142 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
143 ; CHECK-NEXT: mov v1.b[15], v0.b[2]
144 ; CHECK-NEXT: mov v0.16b, v1.16b
146 %tmp3 = extractelement <8 x i8> %tmp1, i32 2
147 %tmp4 = insertelement <16 x i8> %tmp2, i8 %tmp3, i32 15
151 define <8 x i16> @ins4h8(<4 x i16> %tmp1, <8 x i16> %tmp2) {
152 ; CHECK-LABEL: ins4h8:
154 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
155 ; CHECK-NEXT: mov v1.h[7], v0.h[2]
156 ; CHECK-NEXT: mov v0.16b, v1.16b
158 %tmp3 = extractelement <4 x i16> %tmp1, i32 2
159 %tmp4 = insertelement <8 x i16> %tmp2, i16 %tmp3, i32 7
163 define <4 x i32> @ins2s4(<2 x i32> %tmp1, <4 x i32> %tmp2) {
164 ; CHECK-LABEL: ins2s4:
166 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
167 ; CHECK-NEXT: mov v1.s[1], v0.s[1]
168 ; CHECK-NEXT: mov v0.16b, v1.16b
170 %tmp3 = extractelement <2 x i32> %tmp1, i32 1
171 %tmp4 = insertelement <4 x i32> %tmp2, i32 %tmp3, i32 1
175 define <2 x i64> @ins1d2(<1 x i64> %tmp1, <2 x i64> %tmp2) {
176 ; CHECK-LABEL: ins1d2:
178 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
179 ; CHECK-NEXT: mov v1.d[1], v0.d[0]
180 ; CHECK-NEXT: mov v0.16b, v1.16b
182 %tmp3 = extractelement <1 x i64> %tmp1, i32 0
183 %tmp4 = insertelement <2 x i64> %tmp2, i64 %tmp3, i32 1
187 define <4 x float> @ins2f4(<2 x float> %tmp1, <4 x float> %tmp2) {
188 ; CHECK-LABEL: ins2f4:
190 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
191 ; CHECK-NEXT: mov v1.s[1], v0.s[1]
192 ; CHECK-NEXT: mov v0.16b, v1.16b
194 %tmp3 = extractelement <2 x float> %tmp1, i32 1
195 %tmp4 = insertelement <4 x float> %tmp2, float %tmp3, i32 1
196 ret <4 x float> %tmp4
199 define <2 x double> @ins1f2(<1 x double> %tmp1, <2 x double> %tmp2) {
200 ; CHECK-LABEL: ins1f2:
202 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
203 ; CHECK-NEXT: mov v1.d[1], v0.d[0]
204 ; CHECK-NEXT: mov v0.16b, v1.16b
206 %tmp3 = extractelement <1 x double> %tmp1, i32 0
207 %tmp4 = insertelement <2 x double> %tmp2, double %tmp3, i32 1
208 ret <2 x double> %tmp4
211 define <2 x double> @ins1f2_args_flipped(<2 x double> %tmp2, <1 x double> %tmp1) {
212 ; CHECK-LABEL: ins1f2_args_flipped:
214 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
215 ; CHECK-NEXT: mov v0.d[1], v1.d[0]
217 %tmp3 = extractelement <1 x double> %tmp1, i32 0
218 %tmp4 = insertelement <2 x double> %tmp2, double %tmp3, i32 1
219 ret <2 x double> %tmp4
222 define <8 x i8> @ins16b8(<16 x i8> %tmp1, <8 x i8> %tmp2) {
223 ; CHECK-LABEL: ins16b8:
225 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
226 ; CHECK-NEXT: mov v1.b[7], v0.b[2]
227 ; CHECK-NEXT: fmov d0, d1
229 %tmp3 = extractelement <16 x i8> %tmp1, i32 2
230 %tmp4 = insertelement <8 x i8> %tmp2, i8 %tmp3, i32 7
234 define <4 x i16> @ins8h4(<8 x i16> %tmp1, <4 x i16> %tmp2) {
235 ; CHECK-LABEL: ins8h4:
237 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
238 ; CHECK-NEXT: mov v1.h[3], v0.h[2]
239 ; CHECK-NEXT: fmov d0, d1
241 %tmp3 = extractelement <8 x i16> %tmp1, i32 2
242 %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 3
246 define <2 x i32> @ins4s2(<4 x i32> %tmp1, <2 x i32> %tmp2) {
247 ; CHECK-LABEL: ins4s2:
249 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
250 ; CHECK-NEXT: mov v1.s[1], v0.s[2]
251 ; CHECK-NEXT: fmov d0, d1
253 %tmp3 = extractelement <4 x i32> %tmp1, i32 2
254 %tmp4 = insertelement <2 x i32> %tmp2, i32 %tmp3, i32 1
258 define <1 x i64> @ins2d1(<2 x i64> %tmp1, <1 x i64> %tmp2) {
259 ; CHECK-LABEL: ins2d1:
261 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
263 %tmp3 = extractelement <2 x i64> %tmp1, i32 0
264 %tmp4 = insertelement <1 x i64> %tmp2, i64 %tmp3, i32 0
268 define <2 x float> @ins4f2(<4 x float> %tmp1, <2 x float> %tmp2) {
269 ; CHECK-LABEL: ins4f2:
271 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
272 ; CHECK-NEXT: mov v1.s[1], v0.s[2]
273 ; CHECK-NEXT: fmov d0, d1
275 %tmp3 = extractelement <4 x float> %tmp1, i32 2
276 %tmp4 = insertelement <2 x float> %tmp2, float %tmp3, i32 1
277 ret <2 x float> %tmp4
280 define <1 x double> @ins2f1(<2 x double> %tmp1, <1 x double> %tmp2) {
281 ; CHECK-LABEL: ins2f1:
283 ; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
284 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
286 %tmp3 = extractelement <2 x double> %tmp1, i32 1
287 %tmp4 = insertelement <1 x double> %tmp2, double %tmp3, i32 0
288 ret <1 x double> %tmp4
291 define <8 x i8> @ins8b8(<8 x i8> %tmp1, <8 x i8> %tmp2) {
292 ; CHECK-LABEL: ins8b8:
294 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
295 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
296 ; CHECK-NEXT: mov v1.b[4], v0.b[2]
297 ; CHECK-NEXT: fmov d0, d1
299 %tmp3 = extractelement <8 x i8> %tmp1, i32 2
300 %tmp4 = insertelement <8 x i8> %tmp2, i8 %tmp3, i32 4
304 define <4 x i16> @ins4h4(<4 x i16> %tmp1, <4 x i16> %tmp2) {
305 ; CHECK-LABEL: ins4h4:
307 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
308 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
309 ; CHECK-NEXT: mov v1.h[3], v0.h[2]
310 ; CHECK-NEXT: fmov d0, d1
312 %tmp3 = extractelement <4 x i16> %tmp1, i32 2
313 %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 3
317 define <2 x i32> @ins2s2(<2 x i32> %tmp1, <2 x i32> %tmp2) {
318 ; CHECK-LABEL: ins2s2:
320 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
321 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
322 ; CHECK-NEXT: mov v1.s[1], v0.s[0]
323 ; CHECK-NEXT: fmov d0, d1
325 %tmp3 = extractelement <2 x i32> %tmp1, i32 0
326 %tmp4 = insertelement <2 x i32> %tmp2, i32 %tmp3, i32 1
330 define <1 x i64> @ins1d1(<1 x i64> %tmp1, <1 x i64> %tmp2) {
331 ; CHECK-LABEL: ins1d1:
334 %tmp3 = extractelement <1 x i64> %tmp1, i32 0
335 %tmp4 = insertelement <1 x i64> %tmp2, i64 %tmp3, i32 0
339 define <2 x float> @ins2f2(<2 x float> %tmp1, <2 x float> %tmp2) {
340 ; CHECK-LABEL: ins2f2:
342 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
343 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
344 ; CHECK-NEXT: mov v1.s[1], v0.s[0]
345 ; CHECK-NEXT: fmov d0, d1
347 %tmp3 = extractelement <2 x float> %tmp1, i32 0
348 %tmp4 = insertelement <2 x float> %tmp2, float %tmp3, i32 1
349 ret <2 x float> %tmp4
352 define <1 x double> @ins1df1(<1 x double> %tmp1, <1 x double> %tmp2) {
353 ; CHECK-LABEL: ins1df1:
356 %tmp3 = extractelement <1 x double> %tmp1, i32 0
357 %tmp4 = insertelement <1 x double> %tmp2, double %tmp3, i32 0
358 ret <1 x double> %tmp4
361 define i32 @umovw16b(<16 x i8> %tmp1) {
362 ; CHECK-LABEL: umovw16b:
364 ; CHECK-NEXT: umov w0, v0.b[8]
366 %tmp3 = extractelement <16 x i8> %tmp1, i32 8
367 %tmp4 = zext i8 %tmp3 to i32
371 define i32 @umovw8h(<8 x i16> %tmp1) {
372 ; CHECK-LABEL: umovw8h:
374 ; CHECK-NEXT: umov w0, v0.h[2]
376 %tmp3 = extractelement <8 x i16> %tmp1, i32 2
377 %tmp4 = zext i16 %tmp3 to i32
381 define i32 @umovw4s(<4 x i32> %tmp1) {
382 ; CHECK-LABEL: umovw4s:
384 ; CHECK-NEXT: mov w0, v0.s[2]
386 %tmp3 = extractelement <4 x i32> %tmp1, i32 2
390 define i64 @umovx2d(<2 x i64> %tmp1) {
391 ; CHECK-LABEL: umovx2d:
393 ; CHECK-NEXT: mov x0, v0.d[1]
395 %tmp3 = extractelement <2 x i64> %tmp1, i32 1
399 define i32 @umovw8b(<8 x i8> %tmp1) {
400 ; CHECK-LABEL: umovw8b:
402 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
403 ; CHECK-NEXT: umov w0, v0.b[7]
405 %tmp3 = extractelement <8 x i8> %tmp1, i32 7
406 %tmp4 = zext i8 %tmp3 to i32
410 define i32 @umovw4h(<4 x i16> %tmp1) {
411 ; CHECK-LABEL: umovw4h:
413 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
414 ; CHECK-NEXT: umov w0, v0.h[2]
416 %tmp3 = extractelement <4 x i16> %tmp1, i32 2
417 %tmp4 = zext i16 %tmp3 to i32
421 define i32 @umovw2s(<2 x i32> %tmp1) {
422 ; CHECK-LABEL: umovw2s:
424 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
425 ; CHECK-NEXT: mov w0, v0.s[1]
427 %tmp3 = extractelement <2 x i32> %tmp1, i32 1
431 define i64 @umovx1d(<1 x i64> %tmp1) {
432 ; CHECK-LABEL: umovx1d:
434 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
435 ; CHECK-NEXT: fmov x0, d0
437 %tmp3 = extractelement <1 x i64> %tmp1, i32 0
441 define i32 @smovw16b(<16 x i8> %tmp1) {
442 ; CHECK-LABEL: smovw16b:
444 ; CHECK-NEXT: smov w8, v0.b[8]
445 ; CHECK-NEXT: add w0, w8, w8
447 %tmp3 = extractelement <16 x i8> %tmp1, i32 8
448 %tmp4 = sext i8 %tmp3 to i32
449 %tmp5 = add i32 %tmp4, %tmp4
453 define i32 @smovw8h(<8 x i16> %tmp1) {
454 ; CHECK-LABEL: smovw8h:
456 ; CHECK-NEXT: smov w8, v0.h[2]
457 ; CHECK-NEXT: add w0, w8, w8
459 %tmp3 = extractelement <8 x i16> %tmp1, i32 2
460 %tmp4 = sext i16 %tmp3 to i32
461 %tmp5 = add i32 %tmp4, %tmp4
465 define i64 @smovx16b(<16 x i8> %tmp1) {
466 ; CHECK-LABEL: smovx16b:
468 ; CHECK-NEXT: smov x0, v0.b[8]
470 %tmp3 = extractelement <16 x i8> %tmp1, i32 8
471 %tmp4 = sext i8 %tmp3 to i64
475 define i64 @smovx8h(<8 x i16> %tmp1) {
476 ; CHECK-LABEL: smovx8h:
478 ; CHECK-NEXT: smov x0, v0.h[2]
480 %tmp3 = extractelement <8 x i16> %tmp1, i32 2
481 %tmp4 = sext i16 %tmp3 to i64
485 define i64 @smovx4s(<4 x i32> %tmp1) {
486 ; CHECK-LABEL: smovx4s:
488 ; CHECK-NEXT: smov x0, v0.s[2]
490 %tmp3 = extractelement <4 x i32> %tmp1, i32 2
491 %tmp4 = sext i32 %tmp3 to i64
495 define i32 @smovw8b(<8 x i8> %tmp1) {
496 ; CHECK-LABEL: smovw8b:
498 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
499 ; CHECK-NEXT: smov w8, v0.b[4]
500 ; CHECK-NEXT: add w0, w8, w8
502 %tmp3 = extractelement <8 x i8> %tmp1, i32 4
503 %tmp4 = sext i8 %tmp3 to i32
504 %tmp5 = add i32 %tmp4, %tmp4
508 define i32 @smovw4h(<4 x i16> %tmp1) {
509 ; CHECK-LABEL: smovw4h:
511 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
512 ; CHECK-NEXT: smov w8, v0.h[2]
513 ; CHECK-NEXT: add w0, w8, w8
515 %tmp3 = extractelement <4 x i16> %tmp1, i32 2
516 %tmp4 = sext i16 %tmp3 to i32
517 %tmp5 = add i32 %tmp4, %tmp4
521 define i32 @smovx8b(<8 x i8> %tmp1) {
522 ; CHECK-LABEL: smovx8b:
524 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
525 ; CHECK-NEXT: smov w0, v0.b[6]
527 %tmp3 = extractelement <8 x i8> %tmp1, i32 6
528 %tmp4 = sext i8 %tmp3 to i32
532 define i32 @smovx4h(<4 x i16> %tmp1) {
533 ; CHECK-LABEL: smovx4h:
535 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
536 ; CHECK-NEXT: smov w0, v0.h[2]
538 %tmp3 = extractelement <4 x i16> %tmp1, i32 2
539 %tmp4 = sext i16 %tmp3 to i32
543 define i64 @smovx2s(<2 x i32> %tmp1) {
544 ; CHECK-LABEL: smovx2s:
546 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
547 ; CHECK-NEXT: smov x0, v0.s[1]
549 %tmp3 = extractelement <2 x i32> %tmp1, i32 1
550 %tmp4 = sext i32 %tmp3 to i64
554 define <8 x i8> @test_vcopy_lane_s8(<8 x i8> %v1, <8 x i8> %v2) {
555 ; CHECK-LABEL: test_vcopy_lane_s8:
557 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
558 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
559 ; CHECK-NEXT: mov v0.b[5], v1.b[3]
560 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
562 %vset_lane = shufflevector <8 x i8> %v1, <8 x i8> %v2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 11, i32 6, i32 7>
563 ret <8 x i8> %vset_lane
566 define <16 x i8> @test_vcopyq_laneq_s8(<16 x i8> %v1, <16 x i8> %v2) {
567 ; CHECK-LABEL: test_vcopyq_laneq_s8:
569 ; CHECK-NEXT: mov v0.b[14], v1.b[6]
571 %vset_lane = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 22, i32 15>
572 ret <16 x i8> %vset_lane
575 define <8 x i8> @test_vcopy_lane_swap_s8(<8 x i8> %v1, <8 x i8> %v2) {
576 ; CHECK-LABEL: test_vcopy_lane_swap_s8:
578 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
579 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
580 ; CHECK-NEXT: mov v1.b[7], v0.b[0]
581 ; CHECK-NEXT: fmov d0, d1
583 %vset_lane = shufflevector <8 x i8> %v1, <8 x i8> %v2, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 0>
584 ret <8 x i8> %vset_lane
587 define <16 x i8> @test_vcopyq_laneq_swap_s8(<16 x i8> %v1, <16 x i8> %v2) {
588 ; CHECK-LABEL: test_vcopyq_laneq_swap_s8:
590 ; CHECK-NEXT: mov v1.b[0], v0.b[15]
591 ; CHECK-NEXT: mov v0.16b, v1.16b
593 %vset_lane = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> <i32 15, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
594 ret <16 x i8> %vset_lane
597 define <8 x i8> @test_vdup_n_u8(i8 %v1) #0 {
598 ; CHECK-LABEL: test_vdup_n_u8:
600 ; CHECK-NEXT: dup v0.8b, w0
602 %vecinit.i = insertelement <8 x i8> undef, i8 %v1, i32 0
603 %vecinit1.i = insertelement <8 x i8> %vecinit.i, i8 %v1, i32 1
604 %vecinit2.i = insertelement <8 x i8> %vecinit1.i, i8 %v1, i32 2
605 %vecinit3.i = insertelement <8 x i8> %vecinit2.i, i8 %v1, i32 3
606 %vecinit4.i = insertelement <8 x i8> %vecinit3.i, i8 %v1, i32 4
607 %vecinit5.i = insertelement <8 x i8> %vecinit4.i, i8 %v1, i32 5
608 %vecinit6.i = insertelement <8 x i8> %vecinit5.i, i8 %v1, i32 6
609 %vecinit7.i = insertelement <8 x i8> %vecinit6.i, i8 %v1, i32 7
610 ret <8 x i8> %vecinit7.i
613 define <4 x i16> @test_vdup_n_u16(i16 %v1) #0 {
614 ; CHECK-LABEL: test_vdup_n_u16:
616 ; CHECK-NEXT: dup v0.4h, w0
618 %vecinit.i = insertelement <4 x i16> undef, i16 %v1, i32 0
619 %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %v1, i32 1
620 %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %v1, i32 2
621 %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %v1, i32 3
622 ret <4 x i16> %vecinit3.i
625 define <2 x i32> @test_vdup_n_u32(i32 %v1) #0 {
626 ; CHECK-LABEL: test_vdup_n_u32:
628 ; CHECK-NEXT: dup v0.2s, w0
630 %vecinit.i = insertelement <2 x i32> undef, i32 %v1, i32 0
631 %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %v1, i32 1
632 ret <2 x i32> %vecinit1.i
635 define <1 x i64> @test_vdup_n_u64(i64 %v1) #0 {
636 ; CHECK-LABEL: test_vdup_n_u64:
638 ; CHECK-NEXT: fmov d0, x0
640 %vecinit.i = insertelement <1 x i64> undef, i64 %v1, i32 0
641 ret <1 x i64> %vecinit.i
644 define <16 x i8> @test_vdupq_n_u8(i8 %v1) #0 {
645 ; CHECK-LABEL: test_vdupq_n_u8:
647 ; CHECK-NEXT: dup v0.16b, w0
649 %vecinit.i = insertelement <16 x i8> undef, i8 %v1, i32 0
650 %vecinit1.i = insertelement <16 x i8> %vecinit.i, i8 %v1, i32 1
651 %vecinit2.i = insertelement <16 x i8> %vecinit1.i, i8 %v1, i32 2
652 %vecinit3.i = insertelement <16 x i8> %vecinit2.i, i8 %v1, i32 3
653 %vecinit4.i = insertelement <16 x i8> %vecinit3.i, i8 %v1, i32 4
654 %vecinit5.i = insertelement <16 x i8> %vecinit4.i, i8 %v1, i32 5
655 %vecinit6.i = insertelement <16 x i8> %vecinit5.i, i8 %v1, i32 6
656 %vecinit7.i = insertelement <16 x i8> %vecinit6.i, i8 %v1, i32 7
657 %vecinit8.i = insertelement <16 x i8> %vecinit7.i, i8 %v1, i32 8
658 %vecinit9.i = insertelement <16 x i8> %vecinit8.i, i8 %v1, i32 9
659 %vecinit10.i = insertelement <16 x i8> %vecinit9.i, i8 %v1, i32 10
660 %vecinit11.i = insertelement <16 x i8> %vecinit10.i, i8 %v1, i32 11
661 %vecinit12.i = insertelement <16 x i8> %vecinit11.i, i8 %v1, i32 12
662 %vecinit13.i = insertelement <16 x i8> %vecinit12.i, i8 %v1, i32 13
663 %vecinit14.i = insertelement <16 x i8> %vecinit13.i, i8 %v1, i32 14
664 %vecinit15.i = insertelement <16 x i8> %vecinit14.i, i8 %v1, i32 15
665 ret <16 x i8> %vecinit15.i
668 define <8 x i16> @test_vdupq_n_u16(i16 %v1) #0 {
669 ; CHECK-LABEL: test_vdupq_n_u16:
671 ; CHECK-NEXT: dup v0.8h, w0
673 %vecinit.i = insertelement <8 x i16> undef, i16 %v1, i32 0
674 %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %v1, i32 1
675 %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %v1, i32 2
676 %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %v1, i32 3
677 %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %v1, i32 4
678 %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %v1, i32 5
679 %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %v1, i32 6
680 %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %v1, i32 7
681 ret <8 x i16> %vecinit7.i
684 define <4 x i32> @test_vdupq_n_u32(i32 %v1) #0 {
685 ; CHECK-LABEL: test_vdupq_n_u32:
687 ; CHECK-NEXT: dup v0.4s, w0
689 %vecinit.i = insertelement <4 x i32> undef, i32 %v1, i32 0
690 %vecinit1.i = insertelement <4 x i32> %vecinit.i, i32 %v1, i32 1
691 %vecinit2.i = insertelement <4 x i32> %vecinit1.i, i32 %v1, i32 2
692 %vecinit3.i = insertelement <4 x i32> %vecinit2.i, i32 %v1, i32 3
693 ret <4 x i32> %vecinit3.i
696 define <2 x i64> @test_vdupq_n_u64(i64 %v1) #0 {
697 ; CHECK-LABEL: test_vdupq_n_u64:
699 ; CHECK-NEXT: dup v0.2d, x0
701 %vecinit.i = insertelement <2 x i64> undef, i64 %v1, i32 0
702 %vecinit1.i = insertelement <2 x i64> %vecinit.i, i64 %v1, i32 1
703 ret <2 x i64> %vecinit1.i
706 define <8 x i8> @test_vdup_lane_s8(<8 x i8> %v1) #0 {
707 ; CHECK-LABEL: test_vdup_lane_s8:
709 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
710 ; CHECK-NEXT: dup v0.8b, v0.b[5]
712 %shuffle = shufflevector <8 x i8> %v1, <8 x i8> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
713 ret <8 x i8> %shuffle
716 define <4 x i16> @test_vdup_lane_s16(<4 x i16> %v1) #0 {
717 ; CHECK-LABEL: test_vdup_lane_s16:
719 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
720 ; CHECK-NEXT: dup v0.4h, v0.h[2]
722 %shuffle = shufflevector <4 x i16> %v1, <4 x i16> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
723 ret <4 x i16> %shuffle
726 define <2 x i32> @test_vdup_lane_s32(<2 x i32> %v1) #0 {
727 ; CHECK-LABEL: test_vdup_lane_s32:
729 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
730 ; CHECK-NEXT: dup v0.2s, v0.s[1]
732 %shuffle = shufflevector <2 x i32> %v1, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
733 ret <2 x i32> %shuffle
736 define <16 x i8> @test_vdupq_lane_s8(<8 x i8> %v1) #0 {
737 ; CHECK-LABEL: test_vdupq_lane_s8:
739 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
740 ; CHECK-NEXT: dup v0.16b, v0.b[5]
742 %shuffle = shufflevector <8 x i8> %v1, <8 x i8> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
743 ret <16 x i8> %shuffle
746 define <8 x i16> @test_vdupq_lane_s16(<4 x i16> %v1) #0 {
747 ; CHECK-LABEL: test_vdupq_lane_s16:
749 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
750 ; CHECK-NEXT: dup v0.8h, v0.h[2]
752 %shuffle = shufflevector <4 x i16> %v1, <4 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
753 ret <8 x i16> %shuffle
756 define <4 x i32> @test_vdupq_lane_s32(<2 x i32> %v1) #0 {
757 ; CHECK-LABEL: test_vdupq_lane_s32:
759 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
760 ; CHECK-NEXT: dup v0.4s, v0.s[1]
762 %shuffle = shufflevector <2 x i32> %v1, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
763 ret <4 x i32> %shuffle
766 define <2 x i64> @test_vdupq_lane_s64(<1 x i64> %v1) #0 {
767 ; CHECK-LABEL: test_vdupq_lane_s64:
769 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
770 ; CHECK-NEXT: dup v0.2d, v0.d[0]
772 %shuffle = shufflevector <1 x i64> %v1, <1 x i64> undef, <2 x i32> zeroinitializer
773 ret <2 x i64> %shuffle
776 define <8 x i8> @test_vdup_laneq_s8(<16 x i8> %v1) #0 {
777 ; CHECK-LABEL: test_vdup_laneq_s8:
779 ; CHECK-NEXT: dup v0.8b, v0.b[5]
781 %shuffle = shufflevector <16 x i8> %v1, <16 x i8> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
782 ret <8 x i8> %shuffle
785 define <4 x i16> @test_vdup_laneq_s16(<8 x i16> %v1) #0 {
786 ; CHECK-LABEL: test_vdup_laneq_s16:
788 ; CHECK-NEXT: dup v0.4h, v0.h[2]
790 %shuffle = shufflevector <8 x i16> %v1, <8 x i16> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
791 ret <4 x i16> %shuffle
794 define <2 x i32> @test_vdup_laneq_s32(<4 x i32> %v1) #0 {
795 ; CHECK-LABEL: test_vdup_laneq_s32:
797 ; CHECK-NEXT: dup v0.2s, v0.s[1]
799 %shuffle = shufflevector <4 x i32> %v1, <4 x i32> undef, <2 x i32> <i32 1, i32 1>
800 ret <2 x i32> %shuffle
803 define <16 x i8> @test_vdupq_laneq_s8(<16 x i8> %v1) #0 {
804 ; CHECK-LABEL: test_vdupq_laneq_s8:
806 ; CHECK-NEXT: dup v0.16b, v0.b[5]
808 %shuffle = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
809 ret <16 x i8> %shuffle
812 define <8 x i16> @test_vdupq_laneq_s16(<8 x i16> %v1) #0 {
813 ; CHECK-LABEL: test_vdupq_laneq_s16:
815 ; CHECK-NEXT: dup v0.8h, v0.h[2]
817 %shuffle = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
818 ret <8 x i16> %shuffle
821 define <4 x i32> @test_vdupq_laneq_s32(<4 x i32> %v1) #0 {
822 ; CHECK-LABEL: test_vdupq_laneq_s32:
824 ; CHECK-NEXT: dup v0.4s, v0.s[1]
826 %shuffle = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
827 ret <4 x i32> %shuffle
830 define <2 x i64> @test_vdupq_laneq_s64(<2 x i64> %v1) #0 {
831 ; CHECK-LABEL: test_vdupq_laneq_s64:
833 ; CHECK-NEXT: dup v0.2d, v0.d[0]
835 %shuffle = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
836 ret <2 x i64> %shuffle
839 define i64 @test_bitcastv8i8toi64(<8 x i8> %in) {
840 ; CHECK-LABEL: test_bitcastv8i8toi64:
842 ; CHECK-NEXT: fmov x0, d0
844 %res = bitcast <8 x i8> %in to i64
848 define i64 @test_bitcastv4i16toi64(<4 x i16> %in) {
849 ; CHECK-LABEL: test_bitcastv4i16toi64:
851 ; CHECK-NEXT: fmov x0, d0
853 %res = bitcast <4 x i16> %in to i64
857 define i64 @test_bitcastv2i32toi64(<2 x i32> %in) {
858 ; CHECK-LABEL: test_bitcastv2i32toi64:
860 ; CHECK-NEXT: fmov x0, d0
862 %res = bitcast <2 x i32> %in to i64
866 define i64 @test_bitcastv2f32toi64(<2 x float> %in) {
867 ; CHECK-LABEL: test_bitcastv2f32toi64:
869 ; CHECK-NEXT: fmov x0, d0
871 %res = bitcast <2 x float> %in to i64
875 define i64 @test_bitcastv1i64toi64(<1 x i64> %in) {
876 ; CHECK-LABEL: test_bitcastv1i64toi64:
878 ; CHECK-NEXT: fmov x0, d0
880 %res = bitcast <1 x i64> %in to i64
884 define i64 @test_bitcastv1f64toi64(<1 x double> %in) {
885 ; CHECK-LABEL: test_bitcastv1f64toi64:
887 ; CHECK-NEXT: fmov x0, d0
889 %res = bitcast <1 x double> %in to i64
893 define <8 x i8> @test_bitcasti64tov8i8(i64 %in) {
894 ; CHECK-LABEL: test_bitcasti64tov8i8:
896 ; CHECK-NEXT: fmov d0, x0
898 %res = bitcast i64 %in to <8 x i8>
902 define <4 x i16> @test_bitcasti64tov4i16(i64 %in) {
903 ; CHECK-LABEL: test_bitcasti64tov4i16:
905 ; CHECK-NEXT: fmov d0, x0
907 %res = bitcast i64 %in to <4 x i16>
911 define <2 x i32> @test_bitcasti64tov2i32(i64 %in) {
912 ; CHECK-LABEL: test_bitcasti64tov2i32:
914 ; CHECK-NEXT: fmov d0, x0
916 %res = bitcast i64 %in to <2 x i32>
920 define <2 x float> @test_bitcasti64tov2f32(i64 %in) {
921 ; CHECK-LABEL: test_bitcasti64tov2f32:
923 ; CHECK-NEXT: fmov d0, x0
925 %res = bitcast i64 %in to <2 x float>
929 define <1 x i64> @test_bitcasti64tov1i64(i64 %in) {
930 ; CHECK-LABEL: test_bitcasti64tov1i64:
932 ; CHECK-NEXT: fmov d0, x0
934 %res = bitcast i64 %in to <1 x i64>
938 define <1 x double> @test_bitcasti64tov1f64(i64 %in) {
939 ; CHECK-LABEL: test_bitcasti64tov1f64:
941 ; CHECK-NEXT: fmov d0, x0
943 %res = bitcast i64 %in to <1 x double>
944 ret <1 x double> %res
947 define <1 x i64> @test_bitcastv8i8tov1f64(<8 x i8> %a) #0 {
948 ; CHECK-LABEL: test_bitcastv8i8tov1f64:
950 ; CHECK-NEXT: neg v0.8b, v0.8b
951 ; CHECK-NEXT: fcvtzs x8, d0
952 ; CHECK-NEXT: fmov d0, x8
954 %sub.i = sub <8 x i8> zeroinitializer, %a
955 %1 = bitcast <8 x i8> %sub.i to <1 x double>
956 %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
957 ret <1 x i64> %vcvt.i
960 define <1 x i64> @test_bitcastv4i16tov1f64(<4 x i16> %a) #0 {
961 ; CHECK-LABEL: test_bitcastv4i16tov1f64:
963 ; CHECK-NEXT: neg v0.4h, v0.4h
964 ; CHECK-NEXT: fcvtzs x8, d0
965 ; CHECK-NEXT: fmov d0, x8
967 %sub.i = sub <4 x i16> zeroinitializer, %a
968 %1 = bitcast <4 x i16> %sub.i to <1 x double>
969 %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
970 ret <1 x i64> %vcvt.i
973 define <1 x i64> @test_bitcastv2i32tov1f64(<2 x i32> %a) #0 {
974 ; CHECK-LABEL: test_bitcastv2i32tov1f64:
976 ; CHECK-NEXT: neg v0.2s, v0.2s
977 ; CHECK-NEXT: fcvtzs x8, d0
978 ; CHECK-NEXT: fmov d0, x8
980 %sub.i = sub <2 x i32> zeroinitializer, %a
981 %1 = bitcast <2 x i32> %sub.i to <1 x double>
982 %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
983 ret <1 x i64> %vcvt.i
986 define <1 x i64> @test_bitcastv1i64tov1f64(<1 x i64> %a) #0 {
987 ; CHECK-LABEL: test_bitcastv1i64tov1f64:
989 ; CHECK-NEXT: neg d0, d0
990 ; CHECK-NEXT: fcvtzs x8, d0
991 ; CHECK-NEXT: fmov d0, x8
993 %sub.i = sub <1 x i64> zeroinitializer, %a
994 %1 = bitcast <1 x i64> %sub.i to <1 x double>
995 %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
996 ret <1 x i64> %vcvt.i
999 define <1 x i64> @test_bitcastv2f32tov1f64(<2 x float> %a) #0 {
1000 ; CHECK-LABEL: test_bitcastv2f32tov1f64:
1002 ; CHECK-NEXT: fneg v0.2s, v0.2s
1003 ; CHECK-NEXT: fcvtzs x8, d0
1004 ; CHECK-NEXT: fmov d0, x8
1006 %sub.i = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %a
1007 %1 = bitcast <2 x float> %sub.i to <1 x double>
1008 %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
1009 ret <1 x i64> %vcvt.i
1012 define <8 x i8> @test_bitcastv1f64tov8i8(<1 x i64> %a) #0 {
1013 ; CHECK-LABEL: test_bitcastv1f64tov8i8:
1015 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
1016 ; CHECK-NEXT: fmov x8, d0
1017 ; CHECK-NEXT: scvtf d0, x8
1018 ; CHECK-NEXT: neg v0.8b, v0.8b
1020 %vcvt.i = sitofp <1 x i64> %a to <1 x double>
1021 %1 = bitcast <1 x double> %vcvt.i to <8 x i8>
1022 %sub.i = sub <8 x i8> zeroinitializer, %1
1026 define <4 x i16> @test_bitcastv1f64tov4i16(<1 x i64> %a) #0 {
1027 ; CHECK-LABEL: test_bitcastv1f64tov4i16:
1029 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
1030 ; CHECK-NEXT: fmov x8, d0
1031 ; CHECK-NEXT: scvtf d0, x8
1032 ; CHECK-NEXT: neg v0.4h, v0.4h
1034 %vcvt.i = sitofp <1 x i64> %a to <1 x double>
1035 %1 = bitcast <1 x double> %vcvt.i to <4 x i16>
1036 %sub.i = sub <4 x i16> zeroinitializer, %1
1037 ret <4 x i16> %sub.i
1040 define <2 x i32> @test_bitcastv1f64tov2i32(<1 x i64> %a) #0 {
1041 ; CHECK-LABEL: test_bitcastv1f64tov2i32:
1043 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
1044 ; CHECK-NEXT: fmov x8, d0
1045 ; CHECK-NEXT: scvtf d0, x8
1046 ; CHECK-NEXT: neg v0.2s, v0.2s
1048 %vcvt.i = sitofp <1 x i64> %a to <1 x double>
1049 %1 = bitcast <1 x double> %vcvt.i to <2 x i32>
1050 %sub.i = sub <2 x i32> zeroinitializer, %1
1051 ret <2 x i32> %sub.i
1054 define <1 x i64> @test_bitcastv1f64tov1i64(<1 x i64> %a) #0 {
1055 ; CHECK-LABEL: test_bitcastv1f64tov1i64:
1057 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
1058 ; CHECK-NEXT: fmov x8, d0
1059 ; CHECK-NEXT: scvtf d0, x8
1060 ; CHECK-NEXT: neg d0, d0
1062 %vcvt.i = sitofp <1 x i64> %a to <1 x double>
1063 %1 = bitcast <1 x double> %vcvt.i to <1 x i64>
1064 %sub.i = sub <1 x i64> zeroinitializer, %1
1065 ret <1 x i64> %sub.i
1068 define <2 x float> @test_bitcastv1f64tov2f32(<1 x i64> %a) #0 {
1069 ; CHECK-LABEL: test_bitcastv1f64tov2f32:
1071 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
1072 ; CHECK-NEXT: fmov x8, d0
1073 ; CHECK-NEXT: scvtf d0, x8
1074 ; CHECK-NEXT: fneg v0.2s, v0.2s
1076 %vcvt.i = sitofp <1 x i64> %a to <1 x double>
1077 %1 = bitcast <1 x double> %vcvt.i to <2 x float>
1078 %sub.i = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %1
1079 ret <2 x float> %sub.i
1082 ; Test insert element into an undef vector
1083 define <8 x i8> @scalar_to_vector.v8i8(i8 %a) {
1084 ; CHECK-LABEL: scalar_to_vector.v8i8:
1086 ; CHECK-NEXT: fmov s0, w0
1088 %b = insertelement <8 x i8> undef, i8 %a, i32 0
1092 define <16 x i8> @scalar_to_vector.v16i8(i8 %a) {
1093 ; CHECK-LABEL: scalar_to_vector.v16i8:
1095 ; CHECK-NEXT: fmov s0, w0
1097 %b = insertelement <16 x i8> undef, i8 %a, i32 0
1101 define <4 x i16> @scalar_to_vector.v4i16(i16 %a) {
1102 ; CHECK-LABEL: scalar_to_vector.v4i16:
1104 ; CHECK-NEXT: fmov s0, w0
1106 %b = insertelement <4 x i16> undef, i16 %a, i32 0
1110 define <8 x i16> @scalar_to_vector.v8i16(i16 %a) {
1111 ; CHECK-LABEL: scalar_to_vector.v8i16:
1113 ; CHECK-NEXT: fmov s0, w0
1115 %b = insertelement <8 x i16> undef, i16 %a, i32 0
1119 define <2 x i32> @scalar_to_vector.v2i32(i32 %a) {
1120 ; CHECK-LABEL: scalar_to_vector.v2i32:
1122 ; CHECK-NEXT: fmov s0, w0
1124 %b = insertelement <2 x i32> undef, i32 %a, i32 0
1128 define <4 x i32> @scalar_to_vector.v4i32(i32 %a) {
1129 ; CHECK-LABEL: scalar_to_vector.v4i32:
1131 ; CHECK-NEXT: fmov s0, w0
1133 %b = insertelement <4 x i32> undef, i32 %a, i32 0
1137 define <2 x i64> @scalar_to_vector.v2i64(i64 %a) {
1138 ; CHECK-LABEL: scalar_to_vector.v2i64:
1140 ; CHECK-NEXT: fmov d0, x0
1142 %b = insertelement <2 x i64> undef, i64 %a, i32 0
1146 define <8 x i8> @testDUP.v1i8(<1 x i8> %a) {
1147 ; CHECK-LABEL: testDUP.v1i8:
1149 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
1150 ; CHECK-NEXT: dup v0.8b, v0.b[0]
1152 %b = extractelement <1 x i8> %a, i32 0
1153 %c = insertelement <8 x i8> undef, i8 %b, i32 0
1154 %d = insertelement <8 x i8> %c, i8 %b, i32 1
1155 %e = insertelement <8 x i8> %d, i8 %b, i32 2
1156 %f = insertelement <8 x i8> %e, i8 %b, i32 3
1157 %g = insertelement <8 x i8> %f, i8 %b, i32 4
1158 %h = insertelement <8 x i8> %g, i8 %b, i32 5
1159 %i = insertelement <8 x i8> %h, i8 %b, i32 6
1160 %j = insertelement <8 x i8> %i, i8 %b, i32 7
1164 define <8 x i16> @testDUP.v1i16(<1 x i16> %a) {
1165 ; CHECK-LABEL: testDUP.v1i16:
1167 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
1168 ; CHECK-NEXT: dup v0.8h, v0.h[0]
1170 %b = extractelement <1 x i16> %a, i32 0
1171 %c = insertelement <8 x i16> undef, i16 %b, i32 0
1172 %d = insertelement <8 x i16> %c, i16 %b, i32 1
1173 %e = insertelement <8 x i16> %d, i16 %b, i32 2
1174 %f = insertelement <8 x i16> %e, i16 %b, i32 3
1175 %g = insertelement <8 x i16> %f, i16 %b, i32 4
1176 %h = insertelement <8 x i16> %g, i16 %b, i32 5
1177 %i = insertelement <8 x i16> %h, i16 %b, i32 6
1178 %j = insertelement <8 x i16> %i, i16 %b, i32 7
1182 define <4 x i32> @testDUP.v1i32(<1 x i32> %a) {
1183 ; CHECK-LABEL: testDUP.v1i32:
1185 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
1186 ; CHECK-NEXT: dup v0.4s, v0.s[0]
1188 %b = extractelement <1 x i32> %a, i32 0
1189 %c = insertelement <4 x i32> undef, i32 %b, i32 0
1190 %d = insertelement <4 x i32> %c, i32 %b, i32 1
1191 %e = insertelement <4 x i32> %d, i32 %b, i32 2
1192 %f = insertelement <4 x i32> %e, i32 %b, i32 3
1196 define <8 x i8> @getl(<16 x i8> %x) #0 {
1197 ; CHECK-LABEL: getl:
1199 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
1201 %vecext = extractelement <16 x i8> %x, i32 0
1202 %vecinit = insertelement <8 x i8> undef, i8 %vecext, i32 0
1203 %vecext1 = extractelement <16 x i8> %x, i32 1
1204 %vecinit2 = insertelement <8 x i8> %vecinit, i8 %vecext1, i32 1
1205 %vecext3 = extractelement <16 x i8> %x, i32 2
1206 %vecinit4 = insertelement <8 x i8> %vecinit2, i8 %vecext3, i32 2
1207 %vecext5 = extractelement <16 x i8> %x, i32 3
1208 %vecinit6 = insertelement <8 x i8> %vecinit4, i8 %vecext5, i32 3
1209 %vecext7 = extractelement <16 x i8> %x, i32 4
1210 %vecinit8 = insertelement <8 x i8> %vecinit6, i8 %vecext7, i32 4
1211 %vecext9 = extractelement <16 x i8> %x, i32 5
1212 %vecinit10 = insertelement <8 x i8> %vecinit8, i8 %vecext9, i32 5
1213 %vecext11 = extractelement <16 x i8> %x, i32 6
1214 %vecinit12 = insertelement <8 x i8> %vecinit10, i8 %vecext11, i32 6
1215 %vecext13 = extractelement <16 x i8> %x, i32 7
1216 %vecinit14 = insertelement <8 x i8> %vecinit12, i8 %vecext13, i32 7
1217 ret <8 x i8> %vecinit14
1220 define <4 x i16> @test_extracts_inserts_varidx_extract(<8 x i16> %x, i32 %idx) {
1221 ; CHECK-LABEL: test_extracts_inserts_varidx_extract:
1223 ; CHECK-NEXT: sub sp, sp, #16
1224 ; CHECK-NEXT: .cfi_def_cfa_offset 16
1225 ; CHECK-NEXT: mov x8, sp
1226 ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
1227 ; CHECK-NEXT: str q0, [sp]
1228 ; CHECK-NEXT: bfi x8, x0, #1, #3
1229 ; CHECK-NEXT: ldr h1, [x8]
1230 ; CHECK-NEXT: mov v1.h[1], v0.h[1]
1231 ; CHECK-NEXT: mov v1.h[2], v0.h[2]
1232 ; CHECK-NEXT: mov v1.h[3], v0.h[3]
1233 ; CHECK-NEXT: fmov d0, d1
1234 ; CHECK-NEXT: add sp, sp, #16
1236 %tmp = extractelement <8 x i16> %x, i32 %idx
1237 %tmp2 = insertelement <4 x i16> undef, i16 %tmp, i32 0
1238 %tmp3 = extractelement <8 x i16> %x, i32 1
1239 %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 1
1240 %tmp5 = extractelement <8 x i16> %x, i32 2
1241 %tmp6 = insertelement <4 x i16> %tmp4, i16 %tmp5, i32 2
1242 %tmp7 = extractelement <8 x i16> %x, i32 3
1243 %tmp8 = insertelement <4 x i16> %tmp6, i16 %tmp7, i32 3
1247 define <4 x i16> @test_extracts_inserts_varidx_insert(<8 x i16> %x, i32 %idx) {
1248 ; CHECK-LABEL: test_extracts_inserts_varidx_insert:
1250 ; CHECK-NEXT: sub sp, sp, #16
1251 ; CHECK-NEXT: .cfi_def_cfa_offset 16
1252 ; CHECK-NEXT: add x8, sp, #8
1253 ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
1254 ; CHECK-NEXT: bfi x8, x0, #1, #2
1255 ; CHECK-NEXT: str h0, [x8]
1256 ; CHECK-NEXT: ldr d1, [sp, #8]
1257 ; CHECK-NEXT: mov v1.h[1], v0.h[1]
1258 ; CHECK-NEXT: mov v1.h[2], v0.h[2]
1259 ; CHECK-NEXT: mov v1.h[3], v0.h[3]
1260 ; CHECK-NEXT: fmov d0, d1
1261 ; CHECK-NEXT: add sp, sp, #16
1263 %tmp = extractelement <8 x i16> %x, i32 0
1264 %tmp2 = insertelement <4 x i16> undef, i16 %tmp, i32 %idx
1265 %tmp3 = extractelement <8 x i16> %x, i32 1
1266 %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 1
1267 %tmp5 = extractelement <8 x i16> %x, i32 2
1268 %tmp6 = insertelement <4 x i16> %tmp4, i16 %tmp5, i32 2
1269 %tmp7 = extractelement <8 x i16> %x, i32 3
1270 %tmp8 = insertelement <4 x i16> %tmp6, i16 %tmp7, i32 3
1274 define <4 x i16> @test_dup_v2i32_v4i16(<2 x i32> %a) {
1275 ; CHECK-LABEL: test_dup_v2i32_v4i16:
1276 ; CHECK: // %bb.0: // %entry
1277 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
1278 ; CHECK-NEXT: dup v0.4h, v0.h[2]
1281 %x = extractelement <2 x i32> %a, i32 1
1282 %vget_lane = trunc i32 %x to i16
1283 %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0
1284 %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1
1285 %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2
1286 %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3
1287 ret <4 x i16> %vecinit3.i
1290 define <8 x i16> @test_dup_v4i32_v8i16(<4 x i32> %a) {
1291 ; CHECK-LABEL: test_dup_v4i32_v8i16:
1292 ; CHECK: // %bb.0: // %entry
1293 ; CHECK-NEXT: dup v0.8h, v0.h[6]
1296 %x = extractelement <4 x i32> %a, i32 3
1297 %vget_lane = trunc i32 %x to i16
1298 %vecinit.i = insertelement <8 x i16> undef, i16 %vget_lane, i32 0
1299 %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %vget_lane, i32 1
1300 %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %vget_lane, i32 2
1301 %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %vget_lane, i32 3
1302 %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %vget_lane, i32 4
1303 %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %vget_lane, i32 5
1304 %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %vget_lane, i32 6
1305 %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %vget_lane, i32 7
1306 ret <8 x i16> %vecinit7.i
1309 define <4 x i16> @test_dup_v1i64_v4i16(<1 x i64> %a) {
1310 ; CHECK-LABEL: test_dup_v1i64_v4i16:
1311 ; CHECK: // %bb.0: // %entry
1312 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
1313 ; CHECK-NEXT: dup v0.4h, v0.h[0]
1316 %x = extractelement <1 x i64> %a, i32 0
1317 %vget_lane = trunc i64 %x to i16
1318 %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0
1319 %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1
1320 %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2
1321 %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3
1322 ret <4 x i16> %vecinit3.i
1325 define <2 x i32> @test_dup_v1i64_v2i32(<1 x i64> %a) {
1326 ; CHECK-LABEL: test_dup_v1i64_v2i32:
1327 ; CHECK: // %bb.0: // %entry
1328 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
1329 ; CHECK-NEXT: dup v0.2s, v0.s[0]
1332 %x = extractelement <1 x i64> %a, i32 0
1333 %vget_lane = trunc i64 %x to i32
1334 %vecinit.i = insertelement <2 x i32> undef, i32 %vget_lane, i32 0
1335 %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %vget_lane, i32 1
1336 ret <2 x i32> %vecinit1.i
1339 define <8 x i16> @test_dup_v2i64_v8i16(<2 x i64> %a) {
1340 ; CHECK-LABEL: test_dup_v2i64_v8i16:
1341 ; CHECK: // %bb.0: // %entry
1342 ; CHECK-NEXT: dup v0.8h, v0.h[4]
1345 %x = extractelement <2 x i64> %a, i32 1
1346 %vget_lane = trunc i64 %x to i16
1347 %vecinit.i = insertelement <8 x i16> undef, i16 %vget_lane, i32 0
1348 %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %vget_lane, i32 1
1349 %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %vget_lane, i32 2
1350 %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %vget_lane, i32 3
1351 %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %vget_lane, i32 4
1352 %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %vget_lane, i32 5
1353 %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %vget_lane, i32 6
1354 %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %vget_lane, i32 7
1355 ret <8 x i16> %vecinit7.i
1358 define <4 x i32> @test_dup_v2i64_v4i32(<2 x i64> %a) {
1359 ; CHECK-LABEL: test_dup_v2i64_v4i32:
1360 ; CHECK: // %bb.0: // %entry
1361 ; CHECK-NEXT: dup v0.4s, v0.s[2]
1364 %x = extractelement <2 x i64> %a, i32 1
1365 %vget_lane = trunc i64 %x to i32
1366 %vecinit.i = insertelement <4 x i32> undef, i32 %vget_lane, i32 0
1367 %vecinit1.i = insertelement <4 x i32> %vecinit.i, i32 %vget_lane, i32 1
1368 %vecinit2.i = insertelement <4 x i32> %vecinit1.i, i32 %vget_lane, i32 2
1369 %vecinit3.i = insertelement <4 x i32> %vecinit2.i, i32 %vget_lane, i32 3
1370 ret <4 x i32> %vecinit3.i
1373 define <4 x i16> @test_dup_v4i32_v4i16(<4 x i32> %a) {
1374 ; CHECK-LABEL: test_dup_v4i32_v4i16:
1375 ; CHECK: // %bb.0: // %entry
1376 ; CHECK-NEXT: dup v0.4h, v0.h[2]
1379 %x = extractelement <4 x i32> %a, i32 1
1380 %vget_lane = trunc i32 %x to i16
1381 %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0
1382 %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1
1383 %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2
1384 %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3
1385 ret <4 x i16> %vecinit3.i
1388 define <4 x i16> @test_dup_v2i64_v4i16(<2 x i64> %a) {
1389 ; CHECK-LABEL: test_dup_v2i64_v4i16:
1390 ; CHECK: // %bb.0: // %entry
1391 ; CHECK-NEXT: dup v0.4h, v0.h[0]
1394 %x = extractelement <2 x i64> %a, i32 0
1395 %vget_lane = trunc i64 %x to i16
1396 %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0
1397 %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1
1398 %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2
1399 %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3
1400 ret <4 x i16> %vecinit3.i
1403 define <2 x i32> @test_dup_v2i64_v2i32(<2 x i64> %a) {
1404 ; CHECK-LABEL: test_dup_v2i64_v2i32:
1405 ; CHECK: // %bb.0: // %entry
1406 ; CHECK-NEXT: dup v0.2s, v0.s[0]
1409 %x = extractelement <2 x i64> %a, i32 0
1410 %vget_lane = trunc i64 %x to i32
1411 %vecinit.i = insertelement <2 x i32> undef, i32 %vget_lane, i32 0
1412 %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %vget_lane, i32 1
1413 ret <2 x i32> %vecinit1.i
1417 define <2 x float> @test_scalar_to_vector_f32_to_v2f32(<2 x float> %a) {
1418 ; CHECK-LABEL: test_scalar_to_vector_f32_to_v2f32:
1419 ; CHECK: // %bb.0: // %entry
1420 ; CHECK-NEXT: fmaxp s0, v0.2s
1423 %0 = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> %a)
1424 %1 = insertelement <1 x float> undef, float %0, i32 0
1425 %2 = extractelement <1 x float> %1, i32 0
1426 %vecinit1.i = insertelement <2 x float> undef, float %2, i32 0
1427 ret <2 x float> %vecinit1.i
1430 define <4 x float> @test_scalar_to_vector_f32_to_v4f32(<2 x float> %a) {
1431 ; CHECK-LABEL: test_scalar_to_vector_f32_to_v4f32:
1432 ; CHECK: // %bb.0: // %entry
1433 ; CHECK-NEXT: fmaxp s0, v0.2s
1436 %0 = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> %a)
1437 %1 = insertelement <1 x float> undef, float %0, i32 0
1438 %2 = extractelement <1 x float> %1, i32 0
1439 %vecinit1.i = insertelement <4 x float> undef, float %2, i32 0
1440 ret <4 x float> %vecinit1.i
1443 declare float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float>)
1445 define <2 x i32> @test_concat_undef_v1i32(<2 x i32> %a) {
1446 ; CHECK-LABEL: test_concat_undef_v1i32:
1447 ; CHECK: // %bb.0: // %entry
1448 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
1449 ; CHECK-NEXT: dup v0.2s, v0.s[0]
1452 %0 = extractelement <2 x i32> %a, i32 0
1453 %vecinit1.i = insertelement <2 x i32> undef, i32 %0, i32 1
1454 ret <2 x i32> %vecinit1.i
1457 declare i32 @llvm.aarch64.neon.sqabs.i32(i32) #4
1459 define <2 x i32> @test_concat_v1i32_undef(i32 %a) {
1460 ; CHECK-LABEL: test_concat_v1i32_undef:
1461 ; CHECK: // %bb.0: // %entry
1462 ; CHECK-NEXT: fmov s0, w0
1463 ; CHECK-NEXT: sqabs s0, s0
1466 %b = tail call i32 @llvm.aarch64.neon.sqabs.i32(i32 %a)
1467 %vecinit.i432 = insertelement <2 x i32> undef, i32 %b, i32 0
1468 ret <2 x i32> %vecinit.i432
1471 define <2 x i32> @test_concat_same_v1i32_v1i32(<2 x i32> %a) {
1472 ; CHECK-LABEL: test_concat_same_v1i32_v1i32:
1473 ; CHECK: // %bb.0: // %entry
1474 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
1475 ; CHECK-NEXT: dup v0.2s, v0.s[0]
1478 %0 = extractelement <2 x i32> %a, i32 0
1479 %vecinit.i = insertelement <2 x i32> undef, i32 %0, i32 0
1480 %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %0, i32 1
1481 ret <2 x i32> %vecinit1.i
1484 define <2 x i32> @test_concat_diff_v1i32_v1i32(i32 %a, i32 %b) {
1485 ; CHECK-LABEL: test_concat_diff_v1i32_v1i32:
1486 ; CHECK: // %bb.0: // %entry
1487 ; CHECK-NEXT: fmov s0, w1
1488 ; CHECK-NEXT: fmov s1, w0
1489 ; CHECK-NEXT: sqabs s2, s0
1490 ; CHECK-NEXT: sqabs s0, s1
1491 ; CHECK-NEXT: fmov w8, s2
1492 ; CHECK-NEXT: mov v0.s[1], w8
1493 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
1496 %c = tail call i32 @llvm.aarch64.neon.sqabs.i32(i32 %a)
1497 %d = insertelement <2 x i32> undef, i32 %c, i32 0
1498 %e = tail call i32 @llvm.aarch64.neon.sqabs.i32(i32 %b)
1499 %f = insertelement <2 x i32> undef, i32 %e, i32 0
1500 %h = shufflevector <2 x i32> %d, <2 x i32> %f, <2 x i32> <i32 0, i32 2>
1504 define <16 x i8> @test_concat_v16i8_v16i8_v16i8(<16 x i8> %x, <16 x i8> %y) #0 {
1505 ; CHECK-LABEL: test_concat_v16i8_v16i8_v16i8:
1506 ; CHECK: // %bb.0: // %entry
1507 ; CHECK-NEXT: mov v0.d[1], v1.d[0]
1510 %vecinit30 = shufflevector <16 x i8> %x, <16 x i8> %y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
1511 ret <16 x i8> %vecinit30
1514 define <16 x i8> @test_concat_v16i8_v8i8_v16i8(<8 x i8> %x, <16 x i8> %y) #0 {
1515 ; CHECK-LABEL: test_concat_v16i8_v8i8_v16i8:
1516 ; CHECK: // %bb.0: // %entry
1517 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
1518 ; CHECK-NEXT: mov v0.d[1], v1.d[0]
1521 %vecext = extractelement <8 x i8> %x, i32 0
1522 %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0
1523 %vecext1 = extractelement <8 x i8> %x, i32 1
1524 %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1
1525 %vecext3 = extractelement <8 x i8> %x, i32 2
1526 %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2
1527 %vecext5 = extractelement <8 x i8> %x, i32 3
1528 %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3
1529 %vecext7 = extractelement <8 x i8> %x, i32 4
1530 %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4
1531 %vecext9 = extractelement <8 x i8> %x, i32 5
1532 %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5
1533 %vecext11 = extractelement <8 x i8> %x, i32 6
1534 %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6
1535 %vecext13 = extractelement <8 x i8> %x, i32 7
1536 %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7
1537 %vecinit30 = shufflevector <16 x i8> %vecinit14, <16 x i8> %y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
1538 ret <16 x i8> %vecinit30
1541 define <16 x i8> @test_concat_v16i8_v16i8_v8i8(<16 x i8> %x, <8 x i8> %y) #0 {
1542 ; CHECK-LABEL: test_concat_v16i8_v16i8_v8i8:
1543 ; CHECK: // %bb.0: // %entry
1544 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
1545 ; CHECK-NEXT: mov v0.d[1], v1.d[0]
1548 %vecext = extractelement <16 x i8> %x, i32 0
1549 %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0
1550 %vecext1 = extractelement <16 x i8> %x, i32 1
1551 %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1
1552 %vecext3 = extractelement <16 x i8> %x, i32 2
1553 %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2
1554 %vecext5 = extractelement <16 x i8> %x, i32 3
1555 %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3
1556 %vecext7 = extractelement <16 x i8> %x, i32 4
1557 %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4
1558 %vecext9 = extractelement <16 x i8> %x, i32 5
1559 %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5
1560 %vecext11 = extractelement <16 x i8> %x, i32 6
1561 %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6
1562 %vecext13 = extractelement <16 x i8> %x, i32 7
1563 %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7
1564 %vecext15 = extractelement <8 x i8> %y, i32 0
1565 %vecinit16 = insertelement <16 x i8> %vecinit14, i8 %vecext15, i32 8
1566 %vecext17 = extractelement <8 x i8> %y, i32 1
1567 %vecinit18 = insertelement <16 x i8> %vecinit16, i8 %vecext17, i32 9
1568 %vecext19 = extractelement <8 x i8> %y, i32 2
1569 %vecinit20 = insertelement <16 x i8> %vecinit18, i8 %vecext19, i32 10
1570 %vecext21 = extractelement <8 x i8> %y, i32 3
1571 %vecinit22 = insertelement <16 x i8> %vecinit20, i8 %vecext21, i32 11
1572 %vecext23 = extractelement <8 x i8> %y, i32 4
1573 %vecinit24 = insertelement <16 x i8> %vecinit22, i8 %vecext23, i32 12
1574 %vecext25 = extractelement <8 x i8> %y, i32 5
1575 %vecinit26 = insertelement <16 x i8> %vecinit24, i8 %vecext25, i32 13
1576 %vecext27 = extractelement <8 x i8> %y, i32 6
1577 %vecinit28 = insertelement <16 x i8> %vecinit26, i8 %vecext27, i32 14
1578 %vecext29 = extractelement <8 x i8> %y, i32 7
1579 %vecinit30 = insertelement <16 x i8> %vecinit28, i8 %vecext29, i32 15
1580 ret <16 x i8> %vecinit30
1583 define <16 x i8> @test_concat_v16i8_v8i8_v8i8(<8 x i8> %x, <8 x i8> %y) #0 {
1584 ; CHECK-LABEL: test_concat_v16i8_v8i8_v8i8:
1585 ; CHECK: // %bb.0: // %entry
1586 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
1587 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
1588 ; CHECK-NEXT: mov v0.d[1], v1.d[0]
1591 %vecext = extractelement <8 x i8> %x, i32 0
1592 %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0
1593 %vecext1 = extractelement <8 x i8> %x, i32 1
1594 %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1
1595 %vecext3 = extractelement <8 x i8> %x, i32 2
1596 %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2
1597 %vecext5 = extractelement <8 x i8> %x, i32 3
1598 %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3
1599 %vecext7 = extractelement <8 x i8> %x, i32 4
1600 %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4
1601 %vecext9 = extractelement <8 x i8> %x, i32 5
1602 %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5
1603 %vecext11 = extractelement <8 x i8> %x, i32 6
1604 %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6
1605 %vecext13 = extractelement <8 x i8> %x, i32 7
1606 %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7
1607 %vecext15 = extractelement <8 x i8> %y, i32 0
1608 %vecinit16 = insertelement <16 x i8> %vecinit14, i8 %vecext15, i32 8
1609 %vecext17 = extractelement <8 x i8> %y, i32 1
1610 %vecinit18 = insertelement <16 x i8> %vecinit16, i8 %vecext17, i32 9
1611 %vecext19 = extractelement <8 x i8> %y, i32 2
1612 %vecinit20 = insertelement <16 x i8> %vecinit18, i8 %vecext19, i32 10
1613 %vecext21 = extractelement <8 x i8> %y, i32 3
1614 %vecinit22 = insertelement <16 x i8> %vecinit20, i8 %vecext21, i32 11
1615 %vecext23 = extractelement <8 x i8> %y, i32 4
1616 %vecinit24 = insertelement <16 x i8> %vecinit22, i8 %vecext23, i32 12
1617 %vecext25 = extractelement <8 x i8> %y, i32 5
1618 %vecinit26 = insertelement <16 x i8> %vecinit24, i8 %vecext25, i32 13
1619 %vecext27 = extractelement <8 x i8> %y, i32 6
1620 %vecinit28 = insertelement <16 x i8> %vecinit26, i8 %vecext27, i32 14
1621 %vecext29 = extractelement <8 x i8> %y, i32 7
1622 %vecinit30 = insertelement <16 x i8> %vecinit28, i8 %vecext29, i32 15
1623 ret <16 x i8> %vecinit30
1626 define <8 x i16> @test_concat_v8i16_v8i16_v8i16(<8 x i16> %x, <8 x i16> %y) #0 {
1627 ; CHECK-LABEL: test_concat_v8i16_v8i16_v8i16:
1628 ; CHECK: // %bb.0: // %entry
1629 ; CHECK-NEXT: mov v0.d[1], v1.d[0]
1632 %vecinit14 = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
1633 ret <8 x i16> %vecinit14
1636 define <8 x i16> @test_concat_v8i16_v4i16_v8i16(<4 x i16> %x, <8 x i16> %y) #0 {
1637 ; CHECK-LABEL: test_concat_v8i16_v4i16_v8i16:
1638 ; CHECK: // %bb.0: // %entry
1639 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
1640 ; CHECK-NEXT: mov v0.d[1], v1.d[0]
1643 %vecext = extractelement <4 x i16> %x, i32 0
1644 %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0
1645 %vecext1 = extractelement <4 x i16> %x, i32 1
1646 %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1
1647 %vecext3 = extractelement <4 x i16> %x, i32 2
1648 %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2
1649 %vecext5 = extractelement <4 x i16> %x, i32 3
1650 %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3
1651 %vecinit14 = shufflevector <8 x i16> %vecinit6, <8 x i16> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
1652 ret <8 x i16> %vecinit14
1655 define <8 x i16> @test_concat_v8i16_v8i16_v4i16(<8 x i16> %x, <4 x i16> %y) #0 {
1656 ; CHECK-LABEL: test_concat_v8i16_v8i16_v4i16:
1657 ; CHECK: // %bb.0: // %entry
1658 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
1659 ; CHECK-NEXT: mov v0.d[1], v1.d[0]
1662 %vecext = extractelement <8 x i16> %x, i32 0
1663 %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0
1664 %vecext1 = extractelement <8 x i16> %x, i32 1
1665 %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1
1666 %vecext3 = extractelement <8 x i16> %x, i32 2
1667 %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2
1668 %vecext5 = extractelement <8 x i16> %x, i32 3
1669 %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3
1670 %vecext7 = extractelement <4 x i16> %y, i32 0
1671 %vecinit8 = insertelement <8 x i16> %vecinit6, i16 %vecext7, i32 4
1672 %vecext9 = extractelement <4 x i16> %y, i32 1
1673 %vecinit10 = insertelement <8 x i16> %vecinit8, i16 %vecext9, i32 5
1674 %vecext11 = extractelement <4 x i16> %y, i32 2
1675 %vecinit12 = insertelement <8 x i16> %vecinit10, i16 %vecext11, i32 6
1676 %vecext13 = extractelement <4 x i16> %y, i32 3
1677 %vecinit14 = insertelement <8 x i16> %vecinit12, i16 %vecext13, i32 7
1678 ret <8 x i16> %vecinit14
1681 define <8 x i16> @test_concat_v8i16_v4i16_v4i16(<4 x i16> %x, <4 x i16> %y) #0 {
1682 ; CHECK-LABEL: test_concat_v8i16_v4i16_v4i16:
1683 ; CHECK: // %bb.0: // %entry
1684 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
1685 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
1686 ; CHECK-NEXT: mov v0.d[1], v1.d[0]
1689 %vecext = extractelement <4 x i16> %x, i32 0
1690 %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0
1691 %vecext1 = extractelement <4 x i16> %x, i32 1
1692 %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1
1693 %vecext3 = extractelement <4 x i16> %x, i32 2
1694 %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2
1695 %vecext5 = extractelement <4 x i16> %x, i32 3
1696 %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3
1697 %vecext7 = extractelement <4 x i16> %y, i32 0
1698 %vecinit8 = insertelement <8 x i16> %vecinit6, i16 %vecext7, i32 4
1699 %vecext9 = extractelement <4 x i16> %y, i32 1
1700 %vecinit10 = insertelement <8 x i16> %vecinit8, i16 %vecext9, i32 5
1701 %vecext11 = extractelement <4 x i16> %y, i32 2
1702 %vecinit12 = insertelement <8 x i16> %vecinit10, i16 %vecext11, i32 6
1703 %vecext13 = extractelement <4 x i16> %y, i32 3
1704 %vecinit14 = insertelement <8 x i16> %vecinit12, i16 %vecext13, i32 7
1705 ret <8 x i16> %vecinit14
1708 define <4 x i32> @test_concat_v4i32_v4i32_v4i32(<4 x i32> %x, <4 x i32> %y) #0 {
1709 ; CHECK-LABEL: test_concat_v4i32_v4i32_v4i32:
1710 ; CHECK: // %bb.0: // %entry
1711 ; CHECK-NEXT: mov v0.d[1], v1.d[0]
1714 %vecinit6 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
1715 ret <4 x i32> %vecinit6
1718 define <4 x i32> @test_concat_v4i32_v2i32_v4i32(<2 x i32> %x, <4 x i32> %y) #0 {
1719 ; CHECK-LABEL: test_concat_v4i32_v2i32_v4i32:
1720 ; CHECK: // %bb.0: // %entry
1721 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
1722 ; CHECK-NEXT: mov v0.d[1], v1.d[0]
1725 %vecext = extractelement <2 x i32> %x, i32 0
1726 %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
1727 %vecext1 = extractelement <2 x i32> %x, i32 1
1728 %vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1
1729 %vecinit6 = shufflevector <4 x i32> %vecinit2, <4 x i32> %y, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
1730 ret <4 x i32> %vecinit6
1733 define <4 x i32> @test_concat_v4i32_v4i32_v2i32(<4 x i32> %x, <2 x i32> %y) #0 {
1734 ; CHECK-LABEL: test_concat_v4i32_v4i32_v2i32:
1735 ; CHECK: // %bb.0: // %entry
1736 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
1737 ; CHECK-NEXT: mov v0.d[1], v1.d[0]
1740 %vecext = extractelement <4 x i32> %x, i32 0
1741 %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
1742 %vecext1 = extractelement <4 x i32> %x, i32 1
1743 %vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1
1744 %vecext3 = extractelement <2 x i32> %y, i32 0
1745 %vecinit4 = insertelement <4 x i32> %vecinit2, i32 %vecext3, i32 2
1746 %vecext5 = extractelement <2 x i32> %y, i32 1
1747 %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %vecext5, i32 3
1748 ret <4 x i32> %vecinit6
1751 define <4 x i32> @test_concat_v4i32_v2i32_v2i32(<2 x i32> %x, <2 x i32> %y) #0 {
1752 ; CHECK-LABEL: test_concat_v4i32_v2i32_v2i32:
1753 ; CHECK: // %bb.0: // %entry
1754 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
1755 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
1756 ; CHECK-NEXT: mov v0.d[1], v1.d[0]
1759 %vecinit6 = shufflevector <2 x i32> %x, <2 x i32> %y, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1760 ret <4 x i32> %vecinit6
1763 define <2 x i64> @test_concat_v2i64_v2i64_v2i64(<2 x i64> %x, <2 x i64> %y) #0 {
1764 ; CHECK-LABEL: test_concat_v2i64_v2i64_v2i64:
1765 ; CHECK: // %bb.0: // %entry
1766 ; CHECK-NEXT: zip1 v0.2d, v0.2d, v1.2d
1769 %vecinit2 = shufflevector <2 x i64> %x, <2 x i64> %y, <2 x i32> <i32 0, i32 2>
1770 ret <2 x i64> %vecinit2
1773 define <2 x i64> @test_concat_v2i64_v1i64_v2i64(<1 x i64> %x, <2 x i64> %y) #0 {
1774 ; CHECK-LABEL: test_concat_v2i64_v1i64_v2i64:
1775 ; CHECK: // %bb.0: // %entry
1776 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
1777 ; CHECK-NEXT: zip1 v0.2d, v0.2d, v1.2d
1780 %vecext = extractelement <1 x i64> %x, i32 0
1781 %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0
1782 %vecinit2 = shufflevector <2 x i64> %vecinit, <2 x i64> %y, <2 x i32> <i32 0, i32 2>
1783 ret <2 x i64> %vecinit2
1786 define <2 x i64> @test_concat_v2i64_v2i64_v1i64(<2 x i64> %x, <1 x i64> %y) #0 {
1787 ; CHECK-LABEL: test_concat_v2i64_v2i64_v1i64:
1788 ; CHECK: // %bb.0: // %entry
1789 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
1790 ; CHECK-NEXT: mov v0.d[1], v1.d[0]
1793 %vecext = extractelement <2 x i64> %x, i32 0
1794 %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0
1795 %vecext1 = extractelement <1 x i64> %y, i32 0
1796 %vecinit2 = insertelement <2 x i64> %vecinit, i64 %vecext1, i32 1
1797 ret <2 x i64> %vecinit2
1800 define <2 x i64> @test_concat_v2i64_v1i64_v1i64(<1 x i64> %x, <1 x i64> %y) #0 {
1801 ; CHECK-LABEL: test_concat_v2i64_v1i64_v1i64:
1802 ; CHECK: // %bb.0: // %entry
1803 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
1804 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
1805 ; CHECK-NEXT: mov v0.d[1], v1.d[0]
1808 %vecext = extractelement <1 x i64> %x, i32 0
1809 %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0
1810 %vecext1 = extractelement <1 x i64> %y, i32 0
1811 %vecinit2 = insertelement <2 x i64> %vecinit, i64 %vecext1, i32 1
1812 ret <2 x i64> %vecinit2
1816 define <4 x i16> @concat_vector_v4i16_const() {
1817 ; CHECK-LABEL: concat_vector_v4i16_const:
1819 ; CHECK-NEXT: movi v0.2d, #0000000000000000
1821 %r = shufflevector <1 x i16> zeroinitializer, <1 x i16> undef, <4 x i32> zeroinitializer
1825 define <4 x i16> @concat_vector_v4i16_const_one() {
1826 ; CHECK-LABEL: concat_vector_v4i16_const_one:
1828 ; CHECK-NEXT: movi v0.4h, #1
1830 %r = shufflevector <1 x i16> <i16 1>, <1 x i16> undef, <4 x i32> zeroinitializer
1834 define <4 x i32> @concat_vector_v4i32_const() {
1835 ; CHECK-LABEL: concat_vector_v4i32_const:
1837 ; CHECK-NEXT: movi v0.2d, #0000000000000000
1839 %r = shufflevector <1 x i32> zeroinitializer, <1 x i32> undef, <4 x i32> zeroinitializer
1843 define <8 x i8> @concat_vector_v8i8_const() {
1844 ; CHECK-LABEL: concat_vector_v8i8_const:
1846 ; CHECK-NEXT: movi v0.2d, #0000000000000000
1848 %r = shufflevector <1 x i8> zeroinitializer, <1 x i8> undef, <8 x i32> zeroinitializer
1852 define <8 x i16> @concat_vector_v8i16_const() {
1853 ; CHECK-LABEL: concat_vector_v8i16_const:
1855 ; CHECK-NEXT: movi v0.2d, #0000000000000000
1857 %r = shufflevector <1 x i16> zeroinitializer, <1 x i16> undef, <8 x i32> zeroinitializer
1861 define <8 x i16> @concat_vector_v8i16_const_one() {
1862 ; CHECK-LABEL: concat_vector_v8i16_const_one:
1864 ; CHECK-NEXT: movi v0.8h, #1
1866 %r = shufflevector <1 x i16> <i16 1>, <1 x i16> undef, <8 x i32> zeroinitializer
1870 define <16 x i8> @concat_vector_v16i8_const() {
1871 ; CHECK-LABEL: concat_vector_v16i8_const:
1873 ; CHECK-NEXT: movi v0.2d, #0000000000000000
1875 %r = shufflevector <1 x i8> zeroinitializer, <1 x i8> undef, <16 x i32> zeroinitializer
1879 define <4 x i16> @concat_vector_v4i16(<1 x i16> %a) {
1880 ; CHECK-LABEL: concat_vector_v4i16:
1882 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
1883 ; CHECK-NEXT: dup v0.4h, v0.h[0]
1885 %r = shufflevector <1 x i16> %a, <1 x i16> undef, <4 x i32> zeroinitializer
1889 define <4 x i32> @concat_vector_v4i32(<1 x i32> %a) {
1890 ; CHECK-LABEL: concat_vector_v4i32:
1892 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
1893 ; CHECK-NEXT: dup v0.4s, v0.s[0]
1895 %r = shufflevector <1 x i32> %a, <1 x i32> undef, <4 x i32> zeroinitializer
1899 define <8 x i8> @concat_vector_v8i8(<1 x i8> %a) {
1900 ; CHECK-LABEL: concat_vector_v8i8:
1902 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
1903 ; CHECK-NEXT: dup v0.8b, v0.b[0]
1905 %r = shufflevector <1 x i8> %a, <1 x i8> undef, <8 x i32> zeroinitializer
1909 define <8 x i16> @concat_vector_v8i16(<1 x i16> %a) {
1910 ; CHECK-LABEL: concat_vector_v8i16:
1912 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
1913 ; CHECK-NEXT: dup v0.8h, v0.h[0]
1915 %r = shufflevector <1 x i16> %a, <1 x i16> undef, <8 x i32> zeroinitializer
1919 define <16 x i8> @concat_vector_v16i8(<1 x i8> %a) {
1920 ; CHECK-LABEL: concat_vector_v16i8:
1922 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
1923 ; CHECK-NEXT: dup v0.16b, v0.b[0]
1925 %r = shufflevector <1 x i8> %a, <1 x i8> undef, <16 x i32> zeroinitializer