1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
4 define <16 x i8> @ins16bw(<16 x i8> %tmp1, i8 %tmp2) {
5 ; CHECK-LABEL: ins16bw:
7 ; CHECK-NEXT: mov v0.b[15], w0
9 %tmp3 = insertelement <16 x i8> %tmp1, i8 %tmp2, i32 15
13 define <8 x i16> @ins8hw(<8 x i16> %tmp1, i16 %tmp2) {
14 ; CHECK-LABEL: ins8hw:
16 ; CHECK-NEXT: mov v0.h[6], w0
18 %tmp3 = insertelement <8 x i16> %tmp1, i16 %tmp2, i32 6
22 define <4 x i32> @ins4sw(<4 x i32> %tmp1, i32 %tmp2) {
23 ; CHECK-LABEL: ins4sw:
25 ; CHECK-NEXT: mov v0.s[2], w0
27 %tmp3 = insertelement <4 x i32> %tmp1, i32 %tmp2, i32 2
31 define <2 x i64> @ins2dw(<2 x i64> %tmp1, i64 %tmp2) {
32 ; CHECK-LABEL: ins2dw:
34 ; CHECK-NEXT: mov v0.d[1], x0
36 %tmp3 = insertelement <2 x i64> %tmp1, i64 %tmp2, i32 1
40 define <8 x i8> @ins8bw(<8 x i8> %tmp1, i8 %tmp2) {
41 ; CHECK-LABEL: ins8bw:
43 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
44 ; CHECK-NEXT: mov v0.b[5], w0
45 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
47 %tmp3 = insertelement <8 x i8> %tmp1, i8 %tmp2, i32 5
51 define <4 x i16> @ins4hw(<4 x i16> %tmp1, i16 %tmp2) {
52 ; CHECK-LABEL: ins4hw:
54 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
55 ; CHECK-NEXT: mov v0.h[3], w0
56 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
58 %tmp3 = insertelement <4 x i16> %tmp1, i16 %tmp2, i32 3
62 define <2 x i32> @ins2sw(<2 x i32> %tmp1, i32 %tmp2) {
63 ; CHECK-LABEL: ins2sw:
65 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
66 ; CHECK-NEXT: mov v0.s[1], w0
67 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
69 %tmp3 = insertelement <2 x i32> %tmp1, i32 %tmp2, i32 1
73 define <16 x i8> @ins16b16(<16 x i8> %tmp1, <16 x i8> %tmp2) {
74 ; CHECK-LABEL: ins16b16:
76 ; CHECK-NEXT: mov v1.b[15], v0.b[2]
77 ; CHECK-NEXT: mov v0.16b, v1.16b
79 %tmp3 = extractelement <16 x i8> %tmp1, i32 2
80 %tmp4 = insertelement <16 x i8> %tmp2, i8 %tmp3, i32 15
84 define <8 x i16> @ins8h8(<8 x i16> %tmp1, <8 x i16> %tmp2) {
85 ; CHECK-LABEL: ins8h8:
87 ; CHECK-NEXT: mov v1.h[7], v0.h[2]
88 ; CHECK-NEXT: mov v0.16b, v1.16b
90 %tmp3 = extractelement <8 x i16> %tmp1, i32 2
91 %tmp4 = insertelement <8 x i16> %tmp2, i16 %tmp3, i32 7
95 define <4 x i32> @ins4s4(<4 x i32> %tmp1, <4 x i32> %tmp2) {
96 ; CHECK-LABEL: ins4s4:
98 ; CHECK-NEXT: mov v1.s[1], v0.s[2]
99 ; CHECK-NEXT: mov v0.16b, v1.16b
101 %tmp3 = extractelement <4 x i32> %tmp1, i32 2
102 %tmp4 = insertelement <4 x i32> %tmp2, i32 %tmp3, i32 1
106 define <2 x i64> @ins2d2(<2 x i64> %tmp1, <2 x i64> %tmp2) {
107 ; CHECK-LABEL: ins2d2:
109 ; CHECK-NEXT: mov v1.d[1], v0.d[0]
110 ; CHECK-NEXT: mov v0.16b, v1.16b
112 %tmp3 = extractelement <2 x i64> %tmp1, i32 0
113 %tmp4 = insertelement <2 x i64> %tmp2, i64 %tmp3, i32 1
117 define <4 x float> @ins4f4(<4 x float> %tmp1, <4 x float> %tmp2) {
118 ; CHECK-LABEL: ins4f4:
120 ; CHECK-NEXT: mov v1.s[1], v0.s[2]
121 ; CHECK-NEXT: mov v0.16b, v1.16b
123 %tmp3 = extractelement <4 x float> %tmp1, i32 2
124 %tmp4 = insertelement <4 x float> %tmp2, float %tmp3, i32 1
125 ret <4 x float> %tmp4
128 define <2 x double> @ins2df2(<2 x double> %tmp1, <2 x double> %tmp2) {
129 ; CHECK-LABEL: ins2df2:
131 ; CHECK-NEXT: mov v1.d[1], v0.d[0]
132 ; CHECK-NEXT: mov v0.16b, v1.16b
134 %tmp3 = extractelement <2 x double> %tmp1, i32 0
135 %tmp4 = insertelement <2 x double> %tmp2, double %tmp3, i32 1
136 ret <2 x double> %tmp4
139 define <16 x i8> @ins8b16(<8 x i8> %tmp1, <16 x i8> %tmp2) {
140 ; CHECK-LABEL: ins8b16:
142 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
143 ; CHECK-NEXT: mov v1.b[15], v0.b[2]
144 ; CHECK-NEXT: mov v0.16b, v1.16b
146 %tmp3 = extractelement <8 x i8> %tmp1, i32 2
147 %tmp4 = insertelement <16 x i8> %tmp2, i8 %tmp3, i32 15
151 define <8 x i16> @ins4h8(<4 x i16> %tmp1, <8 x i16> %tmp2) {
152 ; CHECK-LABEL: ins4h8:
154 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
155 ; CHECK-NEXT: mov v1.h[7], v0.h[2]
156 ; CHECK-NEXT: mov v0.16b, v1.16b
158 %tmp3 = extractelement <4 x i16> %tmp1, i32 2
159 %tmp4 = insertelement <8 x i16> %tmp2, i16 %tmp3, i32 7
163 define <4 x i32> @ins2s4(<2 x i32> %tmp1, <4 x i32> %tmp2) {
164 ; CHECK-LABEL: ins2s4:
166 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
167 ; CHECK-NEXT: mov v1.s[1], v0.s[1]
168 ; CHECK-NEXT: mov v0.16b, v1.16b
170 %tmp3 = extractelement <2 x i32> %tmp1, i32 1
171 %tmp4 = insertelement <4 x i32> %tmp2, i32 %tmp3, i32 1
175 define <2 x i64> @ins1d2(<1 x i64> %tmp1, <2 x i64> %tmp2) {
176 ; CHECK-LABEL: ins1d2:
178 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
179 ; CHECK-NEXT: mov v1.d[1], v0.d[0]
180 ; CHECK-NEXT: mov v0.16b, v1.16b
182 %tmp3 = extractelement <1 x i64> %tmp1, i32 0
183 %tmp4 = insertelement <2 x i64> %tmp2, i64 %tmp3, i32 1
187 define <4 x float> @ins2f4(<2 x float> %tmp1, <4 x float> %tmp2) {
188 ; CHECK-LABEL: ins2f4:
190 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
191 ; CHECK-NEXT: mov v1.s[1], v0.s[1]
192 ; CHECK-NEXT: mov v0.16b, v1.16b
194 %tmp3 = extractelement <2 x float> %tmp1, i32 1
195 %tmp4 = insertelement <4 x float> %tmp2, float %tmp3, i32 1
196 ret <4 x float> %tmp4
199 define <2 x double> @ins1f2(<1 x double> %tmp1, <2 x double> %tmp2) {
200 ; CHECK-LABEL: ins1f2:
202 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
203 ; CHECK-NEXT: mov v1.d[1], v0.d[0]
204 ; CHECK-NEXT: mov v0.16b, v1.16b
206 %tmp3 = extractelement <1 x double> %tmp1, i32 0
207 %tmp4 = insertelement <2 x double> %tmp2, double %tmp3, i32 1
208 ret <2 x double> %tmp4
211 define <2 x double> @ins1f2_args_flipped(<2 x double> %tmp2, <1 x double> %tmp1) {
212 ; CHECK-LABEL: ins1f2_args_flipped:
214 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
215 ; CHECK-NEXT: mov v0.d[1], v1.d[0]
217 %tmp3 = extractelement <1 x double> %tmp1, i32 0
218 %tmp4 = insertelement <2 x double> %tmp2, double %tmp3, i32 1
219 ret <2 x double> %tmp4
222 define <8 x i8> @ins16b8(<16 x i8> %tmp1, <8 x i8> %tmp2) {
223 ; CHECK-LABEL: ins16b8:
225 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
226 ; CHECK-NEXT: mov v1.b[7], v0.b[2]
227 ; CHECK-NEXT: fmov d0, d1
229 %tmp3 = extractelement <16 x i8> %tmp1, i32 2
230 %tmp4 = insertelement <8 x i8> %tmp2, i8 %tmp3, i32 7
234 define <4 x i16> @ins8h4(<8 x i16> %tmp1, <4 x i16> %tmp2) {
235 ; CHECK-LABEL: ins8h4:
237 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
238 ; CHECK-NEXT: mov v1.h[3], v0.h[2]
239 ; CHECK-NEXT: fmov d0, d1
241 %tmp3 = extractelement <8 x i16> %tmp1, i32 2
242 %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 3
246 define <2 x i32> @ins4s2(<4 x i32> %tmp1, <2 x i32> %tmp2) {
247 ; CHECK-LABEL: ins4s2:
249 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
250 ; CHECK-NEXT: mov v1.s[1], v0.s[2]
251 ; CHECK-NEXT: fmov d0, d1
253 %tmp3 = extractelement <4 x i32> %tmp1, i32 2
254 %tmp4 = insertelement <2 x i32> %tmp2, i32 %tmp3, i32 1
258 define <1 x i64> @ins2d1(<2 x i64> %tmp1, <1 x i64> %tmp2) {
259 ; CHECK-LABEL: ins2d1:
261 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
262 ; CHECK-NEXT: mov v1.d[0], v0.d[0]
263 ; CHECK-NEXT: fmov d0, d1
265 %tmp3 = extractelement <2 x i64> %tmp1, i32 0
266 %tmp4 = insertelement <1 x i64> %tmp2, i64 %tmp3, i32 0
270 define <2 x float> @ins4f2(<4 x float> %tmp1, <2 x float> %tmp2) {
271 ; CHECK-LABEL: ins4f2:
273 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
274 ; CHECK-NEXT: mov v1.s[1], v0.s[2]
275 ; CHECK-NEXT: fmov d0, d1
277 %tmp3 = extractelement <4 x float> %tmp1, i32 2
278 %tmp4 = insertelement <2 x float> %tmp2, float %tmp3, i32 1
279 ret <2 x float> %tmp4
282 define <1 x double> @ins2f1(<2 x double> %tmp1, <1 x double> %tmp2) {
283 ; CHECK-LABEL: ins2f1:
285 ; CHECK-NEXT: dup v0.2d, v0.d[1]
286 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
288 %tmp3 = extractelement <2 x double> %tmp1, i32 1
289 %tmp4 = insertelement <1 x double> %tmp2, double %tmp3, i32 0
290 ret <1 x double> %tmp4
293 define <8 x i8> @ins8b8(<8 x i8> %tmp1, <8 x i8> %tmp2) {
294 ; CHECK-LABEL: ins8b8:
296 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
297 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
298 ; CHECK-NEXT: mov v1.b[4], v0.b[2]
299 ; CHECK-NEXT: fmov d0, d1
301 %tmp3 = extractelement <8 x i8> %tmp1, i32 2
302 %tmp4 = insertelement <8 x i8> %tmp2, i8 %tmp3, i32 4
306 define <4 x i16> @ins4h4(<4 x i16> %tmp1, <4 x i16> %tmp2) {
307 ; CHECK-LABEL: ins4h4:
309 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
310 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
311 ; CHECK-NEXT: mov v1.h[3], v0.h[2]
312 ; CHECK-NEXT: fmov d0, d1
314 %tmp3 = extractelement <4 x i16> %tmp1, i32 2
315 %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 3
319 define <2 x i32> @ins2s2(<2 x i32> %tmp1, <2 x i32> %tmp2) {
320 ; CHECK-LABEL: ins2s2:
322 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
323 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
324 ; CHECK-NEXT: mov v1.s[1], v0.s[0]
325 ; CHECK-NEXT: fmov d0, d1
327 %tmp3 = extractelement <2 x i32> %tmp1, i32 0
328 %tmp4 = insertelement <2 x i32> %tmp2, i32 %tmp3, i32 1
332 define <1 x i64> @ins1d1(<1 x i64> %tmp1, <1 x i64> %tmp2) {
333 ; CHECK-LABEL: ins1d1:
335 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
336 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
337 ; CHECK-NEXT: mov v1.d[0], v0.d[0]
338 ; CHECK-NEXT: fmov d0, d1
340 %tmp3 = extractelement <1 x i64> %tmp1, i32 0
341 %tmp4 = insertelement <1 x i64> %tmp2, i64 %tmp3, i32 0
345 define <2 x float> @ins2f2(<2 x float> %tmp1, <2 x float> %tmp2) {
346 ; CHECK-LABEL: ins2f2:
348 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
349 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
350 ; CHECK-NEXT: mov v1.s[1], v0.s[0]
351 ; CHECK-NEXT: fmov d0, d1
353 %tmp3 = extractelement <2 x float> %tmp1, i32 0
354 %tmp4 = insertelement <2 x float> %tmp2, float %tmp3, i32 1
355 ret <2 x float> %tmp4
358 define <1 x double> @ins1df1(<1 x double> %tmp1, <1 x double> %tmp2) {
359 ; CHECK-LABEL: ins1df1:
362 %tmp3 = extractelement <1 x double> %tmp1, i32 0
363 %tmp4 = insertelement <1 x double> %tmp2, double %tmp3, i32 0
364 ret <1 x double> %tmp4
367 define i32 @umovw16b(<16 x i8> %tmp1) {
368 ; CHECK-LABEL: umovw16b:
370 ; CHECK-NEXT: umov w0, v0.b[8]
372 %tmp3 = extractelement <16 x i8> %tmp1, i32 8
373 %tmp4 = zext i8 %tmp3 to i32
377 define i32 @umovw8h(<8 x i16> %tmp1) {
378 ; CHECK-LABEL: umovw8h:
380 ; CHECK-NEXT: umov w0, v0.h[2]
382 %tmp3 = extractelement <8 x i16> %tmp1, i32 2
383 %tmp4 = zext i16 %tmp3 to i32
387 define i32 @umovw4s(<4 x i32> %tmp1) {
388 ; CHECK-LABEL: umovw4s:
390 ; CHECK-NEXT: mov w0, v0.s[2]
392 %tmp3 = extractelement <4 x i32> %tmp1, i32 2
396 define i64 @umovx2d(<2 x i64> %tmp1) {
397 ; CHECK-LABEL: umovx2d:
399 ; CHECK-NEXT: mov x0, v0.d[1]
401 %tmp3 = extractelement <2 x i64> %tmp1, i32 1
405 define i32 @umovw8b(<8 x i8> %tmp1) {
406 ; CHECK-LABEL: umovw8b:
408 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
409 ; CHECK-NEXT: umov w0, v0.b[7]
411 %tmp3 = extractelement <8 x i8> %tmp1, i32 7
412 %tmp4 = zext i8 %tmp3 to i32
416 define i32 @umovw4h(<4 x i16> %tmp1) {
417 ; CHECK-LABEL: umovw4h:
419 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
420 ; CHECK-NEXT: umov w0, v0.h[2]
422 %tmp3 = extractelement <4 x i16> %tmp1, i32 2
423 %tmp4 = zext i16 %tmp3 to i32
427 define i32 @umovw2s(<2 x i32> %tmp1) {
428 ; CHECK-LABEL: umovw2s:
430 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
431 ; CHECK-NEXT: mov w0, v0.s[1]
433 %tmp3 = extractelement <2 x i32> %tmp1, i32 1
437 define i64 @umovx1d(<1 x i64> %tmp1) {
438 ; CHECK-LABEL: umovx1d:
440 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
441 ; CHECK-NEXT: fmov x0, d0
443 %tmp3 = extractelement <1 x i64> %tmp1, i32 0
447 define i32 @smovw16b(<16 x i8> %tmp1) {
448 ; CHECK-LABEL: smovw16b:
450 ; CHECK-NEXT: smov w8, v0.b[8]
451 ; CHECK-NEXT: add w0, w8, w8
453 %tmp3 = extractelement <16 x i8> %tmp1, i32 8
454 %tmp4 = sext i8 %tmp3 to i32
455 %tmp5 = add i32 %tmp4, %tmp4
459 define i32 @smovw8h(<8 x i16> %tmp1) {
460 ; CHECK-LABEL: smovw8h:
462 ; CHECK-NEXT: smov w8, v0.h[2]
463 ; CHECK-NEXT: add w0, w8, w8
465 %tmp3 = extractelement <8 x i16> %tmp1, i32 2
466 %tmp4 = sext i16 %tmp3 to i32
467 %tmp5 = add i32 %tmp4, %tmp4
471 define i64 @smovx16b(<16 x i8> %tmp1) {
472 ; CHECK-LABEL: smovx16b:
474 ; CHECK-NEXT: smov x0, v0.b[8]
476 %tmp3 = extractelement <16 x i8> %tmp1, i32 8
477 %tmp4 = sext i8 %tmp3 to i64
481 define i64 @smovx8h(<8 x i16> %tmp1) {
482 ; CHECK-LABEL: smovx8h:
484 ; CHECK-NEXT: smov x0, v0.h[2]
486 %tmp3 = extractelement <8 x i16> %tmp1, i32 2
487 %tmp4 = sext i16 %tmp3 to i64
491 define i64 @smovx4s(<4 x i32> %tmp1) {
492 ; CHECK-LABEL: smovx4s:
494 ; CHECK-NEXT: smov x0, v0.s[2]
496 %tmp3 = extractelement <4 x i32> %tmp1, i32 2
497 %tmp4 = sext i32 %tmp3 to i64
501 define i32 @smovw8b(<8 x i8> %tmp1) {
502 ; CHECK-LABEL: smovw8b:
504 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
505 ; CHECK-NEXT: smov w8, v0.b[4]
506 ; CHECK-NEXT: add w0, w8, w8
508 %tmp3 = extractelement <8 x i8> %tmp1, i32 4
509 %tmp4 = sext i8 %tmp3 to i32
510 %tmp5 = add i32 %tmp4, %tmp4
514 define i32 @smovw4h(<4 x i16> %tmp1) {
515 ; CHECK-LABEL: smovw4h:
517 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
518 ; CHECK-NEXT: smov w8, v0.h[2]
519 ; CHECK-NEXT: add w0, w8, w8
521 %tmp3 = extractelement <4 x i16> %tmp1, i32 2
522 %tmp4 = sext i16 %tmp3 to i32
523 %tmp5 = add i32 %tmp4, %tmp4
527 define i32 @smovx8b(<8 x i8> %tmp1) {
528 ; CHECK-LABEL: smovx8b:
530 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
531 ; CHECK-NEXT: smov w0, v0.b[6]
533 %tmp3 = extractelement <8 x i8> %tmp1, i32 6
534 %tmp4 = sext i8 %tmp3 to i32
538 define i32 @smovx4h(<4 x i16> %tmp1) {
539 ; CHECK-LABEL: smovx4h:
541 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
542 ; CHECK-NEXT: smov w0, v0.h[2]
544 %tmp3 = extractelement <4 x i16> %tmp1, i32 2
545 %tmp4 = sext i16 %tmp3 to i32
549 define i64 @smovx2s(<2 x i32> %tmp1) {
550 ; CHECK-LABEL: smovx2s:
552 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
553 ; CHECK-NEXT: smov x0, v0.s[1]
555 %tmp3 = extractelement <2 x i32> %tmp1, i32 1
556 %tmp4 = sext i32 %tmp3 to i64
560 define <8 x i8> @test_vcopy_lane_s8(<8 x i8> %v1, <8 x i8> %v2) {
561 ; CHECK-LABEL: test_vcopy_lane_s8:
563 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
564 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
565 ; CHECK-NEXT: mov v0.b[5], v1.b[3]
566 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
568 %vset_lane = shufflevector <8 x i8> %v1, <8 x i8> %v2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 11, i32 6, i32 7>
569 ret <8 x i8> %vset_lane
572 define <16 x i8> @test_vcopyq_laneq_s8(<16 x i8> %v1, <16 x i8> %v2) {
573 ; CHECK-LABEL: test_vcopyq_laneq_s8:
575 ; CHECK-NEXT: mov v0.b[14], v1.b[6]
577 %vset_lane = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 22, i32 15>
578 ret <16 x i8> %vset_lane
581 define <8 x i8> @test_vcopy_lane_swap_s8(<8 x i8> %v1, <8 x i8> %v2) {
582 ; CHECK-LABEL: test_vcopy_lane_swap_s8:
584 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
585 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
586 ; CHECK-NEXT: mov v1.b[7], v0.b[0]
587 ; CHECK-NEXT: fmov d0, d1
589 %vset_lane = shufflevector <8 x i8> %v1, <8 x i8> %v2, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 0>
590 ret <8 x i8> %vset_lane
593 define <16 x i8> @test_vcopyq_laneq_swap_s8(<16 x i8> %v1, <16 x i8> %v2) {
594 ; CHECK-LABEL: test_vcopyq_laneq_swap_s8:
596 ; CHECK-NEXT: mov v1.b[0], v0.b[15]
597 ; CHECK-NEXT: mov v0.16b, v1.16b
599 %vset_lane = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> <i32 15, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
600 ret <16 x i8> %vset_lane
603 define <8 x i8> @test_vdup_n_u8(i8 %v1) #0 {
604 ; CHECK-LABEL: test_vdup_n_u8:
606 ; CHECK-NEXT: dup v0.8b, w0
608 %vecinit.i = insertelement <8 x i8> undef, i8 %v1, i32 0
609 %vecinit1.i = insertelement <8 x i8> %vecinit.i, i8 %v1, i32 1
610 %vecinit2.i = insertelement <8 x i8> %vecinit1.i, i8 %v1, i32 2
611 %vecinit3.i = insertelement <8 x i8> %vecinit2.i, i8 %v1, i32 3
612 %vecinit4.i = insertelement <8 x i8> %vecinit3.i, i8 %v1, i32 4
613 %vecinit5.i = insertelement <8 x i8> %vecinit4.i, i8 %v1, i32 5
614 %vecinit6.i = insertelement <8 x i8> %vecinit5.i, i8 %v1, i32 6
615 %vecinit7.i = insertelement <8 x i8> %vecinit6.i, i8 %v1, i32 7
616 ret <8 x i8> %vecinit7.i
619 define <4 x i16> @test_vdup_n_u16(i16 %v1) #0 {
620 ; CHECK-LABEL: test_vdup_n_u16:
622 ; CHECK-NEXT: dup v0.4h, w0
624 %vecinit.i = insertelement <4 x i16> undef, i16 %v1, i32 0
625 %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %v1, i32 1
626 %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %v1, i32 2
627 %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %v1, i32 3
628 ret <4 x i16> %vecinit3.i
631 define <2 x i32> @test_vdup_n_u32(i32 %v1) #0 {
632 ; CHECK-LABEL: test_vdup_n_u32:
634 ; CHECK-NEXT: dup v0.2s, w0
636 %vecinit.i = insertelement <2 x i32> undef, i32 %v1, i32 0
637 %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %v1, i32 1
638 ret <2 x i32> %vecinit1.i
641 define <1 x i64> @test_vdup_n_u64(i64 %v1) #0 {
642 ; CHECK-LABEL: test_vdup_n_u64:
644 ; CHECK-NEXT: fmov d0, x0
646 %vecinit.i = insertelement <1 x i64> undef, i64 %v1, i32 0
647 ret <1 x i64> %vecinit.i
650 define <16 x i8> @test_vdupq_n_u8(i8 %v1) #0 {
651 ; CHECK-LABEL: test_vdupq_n_u8:
653 ; CHECK-NEXT: dup v0.16b, w0
655 %vecinit.i = insertelement <16 x i8> undef, i8 %v1, i32 0
656 %vecinit1.i = insertelement <16 x i8> %vecinit.i, i8 %v1, i32 1
657 %vecinit2.i = insertelement <16 x i8> %vecinit1.i, i8 %v1, i32 2
658 %vecinit3.i = insertelement <16 x i8> %vecinit2.i, i8 %v1, i32 3
659 %vecinit4.i = insertelement <16 x i8> %vecinit3.i, i8 %v1, i32 4
660 %vecinit5.i = insertelement <16 x i8> %vecinit4.i, i8 %v1, i32 5
661 %vecinit6.i = insertelement <16 x i8> %vecinit5.i, i8 %v1, i32 6
662 %vecinit7.i = insertelement <16 x i8> %vecinit6.i, i8 %v1, i32 7
663 %vecinit8.i = insertelement <16 x i8> %vecinit7.i, i8 %v1, i32 8
664 %vecinit9.i = insertelement <16 x i8> %vecinit8.i, i8 %v1, i32 9
665 %vecinit10.i = insertelement <16 x i8> %vecinit9.i, i8 %v1, i32 10
666 %vecinit11.i = insertelement <16 x i8> %vecinit10.i, i8 %v1, i32 11
667 %vecinit12.i = insertelement <16 x i8> %vecinit11.i, i8 %v1, i32 12
668 %vecinit13.i = insertelement <16 x i8> %vecinit12.i, i8 %v1, i32 13
669 %vecinit14.i = insertelement <16 x i8> %vecinit13.i, i8 %v1, i32 14
670 %vecinit15.i = insertelement <16 x i8> %vecinit14.i, i8 %v1, i32 15
671 ret <16 x i8> %vecinit15.i
674 define <8 x i16> @test_vdupq_n_u16(i16 %v1) #0 {
675 ; CHECK-LABEL: test_vdupq_n_u16:
677 ; CHECK-NEXT: dup v0.8h, w0
679 %vecinit.i = insertelement <8 x i16> undef, i16 %v1, i32 0
680 %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %v1, i32 1
681 %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %v1, i32 2
682 %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %v1, i32 3
683 %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %v1, i32 4
684 %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %v1, i32 5
685 %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %v1, i32 6
686 %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %v1, i32 7
687 ret <8 x i16> %vecinit7.i
690 define <4 x i32> @test_vdupq_n_u32(i32 %v1) #0 {
691 ; CHECK-LABEL: test_vdupq_n_u32:
693 ; CHECK-NEXT: dup v0.4s, w0
695 %vecinit.i = insertelement <4 x i32> undef, i32 %v1, i32 0
696 %vecinit1.i = insertelement <4 x i32> %vecinit.i, i32 %v1, i32 1
697 %vecinit2.i = insertelement <4 x i32> %vecinit1.i, i32 %v1, i32 2
698 %vecinit3.i = insertelement <4 x i32> %vecinit2.i, i32 %v1, i32 3
699 ret <4 x i32> %vecinit3.i
702 define <2 x i64> @test_vdupq_n_u64(i64 %v1) #0 {
703 ; CHECK-LABEL: test_vdupq_n_u64:
705 ; CHECK-NEXT: dup v0.2d, x0
707 %vecinit.i = insertelement <2 x i64> undef, i64 %v1, i32 0
708 %vecinit1.i = insertelement <2 x i64> %vecinit.i, i64 %v1, i32 1
709 ret <2 x i64> %vecinit1.i
712 define <8 x i8> @test_vdup_lane_s8(<8 x i8> %v1) #0 {
713 ; CHECK-LABEL: test_vdup_lane_s8:
715 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
716 ; CHECK-NEXT: dup v0.8b, v0.b[5]
718 %shuffle = shufflevector <8 x i8> %v1, <8 x i8> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
719 ret <8 x i8> %shuffle
722 define <4 x i16> @test_vdup_lane_s16(<4 x i16> %v1) #0 {
723 ; CHECK-LABEL: test_vdup_lane_s16:
725 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
726 ; CHECK-NEXT: dup v0.4h, v0.h[2]
728 %shuffle = shufflevector <4 x i16> %v1, <4 x i16> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
729 ret <4 x i16> %shuffle
732 define <2 x i32> @test_vdup_lane_s32(<2 x i32> %v1) #0 {
733 ; CHECK-LABEL: test_vdup_lane_s32:
735 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
736 ; CHECK-NEXT: dup v0.2s, v0.s[1]
738 %shuffle = shufflevector <2 x i32> %v1, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
739 ret <2 x i32> %shuffle
742 define <16 x i8> @test_vdupq_lane_s8(<8 x i8> %v1) #0 {
743 ; CHECK-LABEL: test_vdupq_lane_s8:
745 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
746 ; CHECK-NEXT: dup v0.16b, v0.b[5]
748 %shuffle = shufflevector <8 x i8> %v1, <8 x i8> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
749 ret <16 x i8> %shuffle
752 define <8 x i16> @test_vdupq_lane_s16(<4 x i16> %v1) #0 {
753 ; CHECK-LABEL: test_vdupq_lane_s16:
755 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
756 ; CHECK-NEXT: dup v0.8h, v0.h[2]
758 %shuffle = shufflevector <4 x i16> %v1, <4 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
759 ret <8 x i16> %shuffle
762 define <4 x i32> @test_vdupq_lane_s32(<2 x i32> %v1) #0 {
763 ; CHECK-LABEL: test_vdupq_lane_s32:
765 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
766 ; CHECK-NEXT: dup v0.4s, v0.s[1]
768 %shuffle = shufflevector <2 x i32> %v1, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
769 ret <4 x i32> %shuffle
772 define <2 x i64> @test_vdupq_lane_s64(<1 x i64> %v1) #0 {
773 ; CHECK-LABEL: test_vdupq_lane_s64:
775 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
776 ; CHECK-NEXT: dup v0.2d, v0.d[0]
778 %shuffle = shufflevector <1 x i64> %v1, <1 x i64> undef, <2 x i32> zeroinitializer
779 ret <2 x i64> %shuffle
782 define <8 x i8> @test_vdup_laneq_s8(<16 x i8> %v1) #0 {
783 ; CHECK-LABEL: test_vdup_laneq_s8:
785 ; CHECK-NEXT: dup v0.8b, v0.b[5]
787 %shuffle = shufflevector <16 x i8> %v1, <16 x i8> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
788 ret <8 x i8> %shuffle
791 define <4 x i16> @test_vdup_laneq_s16(<8 x i16> %v1) #0 {
792 ; CHECK-LABEL: test_vdup_laneq_s16:
794 ; CHECK-NEXT: dup v0.4h, v0.h[2]
796 %shuffle = shufflevector <8 x i16> %v1, <8 x i16> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
797 ret <4 x i16> %shuffle
800 define <2 x i32> @test_vdup_laneq_s32(<4 x i32> %v1) #0 {
801 ; CHECK-LABEL: test_vdup_laneq_s32:
803 ; CHECK-NEXT: dup v0.2s, v0.s[1]
805 %shuffle = shufflevector <4 x i32> %v1, <4 x i32> undef, <2 x i32> <i32 1, i32 1>
806 ret <2 x i32> %shuffle
809 define <16 x i8> @test_vdupq_laneq_s8(<16 x i8> %v1) #0 {
810 ; CHECK-LABEL: test_vdupq_laneq_s8:
812 ; CHECK-NEXT: dup v0.16b, v0.b[5]
814 %shuffle = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
815 ret <16 x i8> %shuffle
818 define <8 x i16> @test_vdupq_laneq_s16(<8 x i16> %v1) #0 {
819 ; CHECK-LABEL: test_vdupq_laneq_s16:
821 ; CHECK-NEXT: dup v0.8h, v0.h[2]
823 %shuffle = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
824 ret <8 x i16> %shuffle
827 define <4 x i32> @test_vdupq_laneq_s32(<4 x i32> %v1) #0 {
828 ; CHECK-LABEL: test_vdupq_laneq_s32:
830 ; CHECK-NEXT: dup v0.4s, v0.s[1]
832 %shuffle = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
833 ret <4 x i32> %shuffle
836 define <2 x i64> @test_vdupq_laneq_s64(<2 x i64> %v1) #0 {
837 ; CHECK-LABEL: test_vdupq_laneq_s64:
839 ; CHECK-NEXT: dup v0.2d, v0.d[0]
841 %shuffle = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
842 ret <2 x i64> %shuffle
845 define i64 @test_bitcastv8i8toi64(<8 x i8> %in) {
846 ; CHECK-LABEL: test_bitcastv8i8toi64:
848 ; CHECK-NEXT: fmov x0, d0
850 %res = bitcast <8 x i8> %in to i64
854 define i64 @test_bitcastv4i16toi64(<4 x i16> %in) {
855 ; CHECK-LABEL: test_bitcastv4i16toi64:
857 ; CHECK-NEXT: fmov x0, d0
859 %res = bitcast <4 x i16> %in to i64
863 define i64 @test_bitcastv2i32toi64(<2 x i32> %in) {
864 ; CHECK-LABEL: test_bitcastv2i32toi64:
866 ; CHECK-NEXT: fmov x0, d0
868 %res = bitcast <2 x i32> %in to i64
872 define i64 @test_bitcastv2f32toi64(<2 x float> %in) {
873 ; CHECK-LABEL: test_bitcastv2f32toi64:
875 ; CHECK-NEXT: fmov x0, d0
877 %res = bitcast <2 x float> %in to i64
881 define i64 @test_bitcastv1i64toi64(<1 x i64> %in) {
882 ; CHECK-LABEL: test_bitcastv1i64toi64:
884 ; CHECK-NEXT: fmov x0, d0
886 %res = bitcast <1 x i64> %in to i64
890 define i64 @test_bitcastv1f64toi64(<1 x double> %in) {
891 ; CHECK-LABEL: test_bitcastv1f64toi64:
893 ; CHECK-NEXT: fmov x0, d0
895 %res = bitcast <1 x double> %in to i64
899 define <8 x i8> @test_bitcasti64tov8i8(i64 %in) {
900 ; CHECK-LABEL: test_bitcasti64tov8i8:
902 ; CHECK-NEXT: fmov d0, x0
904 %res = bitcast i64 %in to <8 x i8>
908 define <4 x i16> @test_bitcasti64tov4i16(i64 %in) {
909 ; CHECK-LABEL: test_bitcasti64tov4i16:
911 ; CHECK-NEXT: fmov d0, x0
913 %res = bitcast i64 %in to <4 x i16>
917 define <2 x i32> @test_bitcasti64tov2i32(i64 %in) {
918 ; CHECK-LABEL: test_bitcasti64tov2i32:
920 ; CHECK-NEXT: fmov d0, x0
922 %res = bitcast i64 %in to <2 x i32>
926 define <2 x float> @test_bitcasti64tov2f32(i64 %in) {
927 ; CHECK-LABEL: test_bitcasti64tov2f32:
929 ; CHECK-NEXT: fmov d0, x0
931 %res = bitcast i64 %in to <2 x float>
935 define <1 x i64> @test_bitcasti64tov1i64(i64 %in) {
936 ; CHECK-LABEL: test_bitcasti64tov1i64:
938 ; CHECK-NEXT: fmov d0, x0
940 %res = bitcast i64 %in to <1 x i64>
944 define <1 x double> @test_bitcasti64tov1f64(i64 %in) {
945 ; CHECK-LABEL: test_bitcasti64tov1f64:
947 ; CHECK-NEXT: fmov d0, x0
949 %res = bitcast i64 %in to <1 x double>
950 ret <1 x double> %res
953 define <1 x i64> @test_bitcastv8i8tov1f64(<8 x i8> %a) #0 {
954 ; CHECK-LABEL: test_bitcastv8i8tov1f64:
956 ; CHECK-NEXT: neg v0.8b, v0.8b
957 ; CHECK-NEXT: fcvtzs x8, d0
958 ; CHECK-NEXT: fmov d0, x8
960 %sub.i = sub <8 x i8> zeroinitializer, %a
961 %1 = bitcast <8 x i8> %sub.i to <1 x double>
962 %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
963 ret <1 x i64> %vcvt.i
966 define <1 x i64> @test_bitcastv4i16tov1f64(<4 x i16> %a) #0 {
967 ; CHECK-LABEL: test_bitcastv4i16tov1f64:
969 ; CHECK-NEXT: neg v0.4h, v0.4h
970 ; CHECK-NEXT: fcvtzs x8, d0
971 ; CHECK-NEXT: fmov d0, x8
973 %sub.i = sub <4 x i16> zeroinitializer, %a
974 %1 = bitcast <4 x i16> %sub.i to <1 x double>
975 %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
976 ret <1 x i64> %vcvt.i
979 define <1 x i64> @test_bitcastv2i32tov1f64(<2 x i32> %a) #0 {
980 ; CHECK-LABEL: test_bitcastv2i32tov1f64:
982 ; CHECK-NEXT: neg v0.2s, v0.2s
983 ; CHECK-NEXT: fcvtzs x8, d0
984 ; CHECK-NEXT: fmov d0, x8
986 %sub.i = sub <2 x i32> zeroinitializer, %a
987 %1 = bitcast <2 x i32> %sub.i to <1 x double>
988 %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
989 ret <1 x i64> %vcvt.i
992 define <1 x i64> @test_bitcastv1i64tov1f64(<1 x i64> %a) #0 {
993 ; CHECK-LABEL: test_bitcastv1i64tov1f64:
995 ; CHECK-NEXT: neg d0, d0
996 ; CHECK-NEXT: fcvtzs x8, d0
997 ; CHECK-NEXT: fmov d0, x8
999 %sub.i = sub <1 x i64> zeroinitializer, %a
1000 %1 = bitcast <1 x i64> %sub.i to <1 x double>
1001 %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
1002 ret <1 x i64> %vcvt.i
1005 define <1 x i64> @test_bitcastv2f32tov1f64(<2 x float> %a) #0 {
1006 ; CHECK-LABEL: test_bitcastv2f32tov1f64:
1008 ; CHECK-NEXT: fneg v0.2s, v0.2s
1009 ; CHECK-NEXT: fcvtzs x8, d0
1010 ; CHECK-NEXT: fmov d0, x8
1012 %sub.i = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %a
1013 %1 = bitcast <2 x float> %sub.i to <1 x double>
1014 %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
1015 ret <1 x i64> %vcvt.i
1018 define <8 x i8> @test_bitcastv1f64tov8i8(<1 x i64> %a) #0 {
1019 ; CHECK-LABEL: test_bitcastv1f64tov8i8:
1021 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
1022 ; CHECK-NEXT: fmov x8, d0
1023 ; CHECK-NEXT: scvtf d0, x8
1024 ; CHECK-NEXT: neg v0.8b, v0.8b
1026 %vcvt.i = sitofp <1 x i64> %a to <1 x double>
1027 %1 = bitcast <1 x double> %vcvt.i to <8 x i8>
1028 %sub.i = sub <8 x i8> zeroinitializer, %1
1032 define <4 x i16> @test_bitcastv1f64tov4i16(<1 x i64> %a) #0 {
1033 ; CHECK-LABEL: test_bitcastv1f64tov4i16:
1035 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
1036 ; CHECK-NEXT: fmov x8, d0
1037 ; CHECK-NEXT: scvtf d0, x8
1038 ; CHECK-NEXT: neg v0.4h, v0.4h
1040 %vcvt.i = sitofp <1 x i64> %a to <1 x double>
1041 %1 = bitcast <1 x double> %vcvt.i to <4 x i16>
1042 %sub.i = sub <4 x i16> zeroinitializer, %1
1043 ret <4 x i16> %sub.i
1046 define <2 x i32> @test_bitcastv1f64tov2i32(<1 x i64> %a) #0 {
1047 ; CHECK-LABEL: test_bitcastv1f64tov2i32:
1049 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
1050 ; CHECK-NEXT: fmov x8, d0
1051 ; CHECK-NEXT: scvtf d0, x8
1052 ; CHECK-NEXT: neg v0.2s, v0.2s
1054 %vcvt.i = sitofp <1 x i64> %a to <1 x double>
1055 %1 = bitcast <1 x double> %vcvt.i to <2 x i32>
1056 %sub.i = sub <2 x i32> zeroinitializer, %1
1057 ret <2 x i32> %sub.i
1060 define <1 x i64> @test_bitcastv1f64tov1i64(<1 x i64> %a) #0 {
1061 ; CHECK-LABEL: test_bitcastv1f64tov1i64:
1063 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
1064 ; CHECK-NEXT: fmov x8, d0
1065 ; CHECK-NEXT: scvtf d0, x8
1066 ; CHECK-NEXT: neg d0, d0
1068 %vcvt.i = sitofp <1 x i64> %a to <1 x double>
1069 %1 = bitcast <1 x double> %vcvt.i to <1 x i64>
1070 %sub.i = sub <1 x i64> zeroinitializer, %1
1071 ret <1 x i64> %sub.i
1074 define <2 x float> @test_bitcastv1f64tov2f32(<1 x i64> %a) #0 {
1075 ; CHECK-LABEL: test_bitcastv1f64tov2f32:
1077 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
1078 ; CHECK-NEXT: fmov x8, d0
1079 ; CHECK-NEXT: scvtf d0, x8
1080 ; CHECK-NEXT: fneg v0.2s, v0.2s
1082 %vcvt.i = sitofp <1 x i64> %a to <1 x double>
1083 %1 = bitcast <1 x double> %vcvt.i to <2 x float>
1084 %sub.i = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %1
1085 ret <2 x float> %sub.i
1088 ; Test insert element into an undef vector
1089 define <8 x i8> @scalar_to_vector.v8i8(i8 %a) {
1090 ; CHECK-LABEL: scalar_to_vector.v8i8:
1092 ; CHECK-NEXT: fmov s0, w0
1094 %b = insertelement <8 x i8> undef, i8 %a, i32 0
1098 define <16 x i8> @scalar_to_vector.v16i8(i8 %a) {
1099 ; CHECK-LABEL: scalar_to_vector.v16i8:
1101 ; CHECK-NEXT: fmov s0, w0
1103 %b = insertelement <16 x i8> undef, i8 %a, i32 0
1107 define <4 x i16> @scalar_to_vector.v4i16(i16 %a) {
1108 ; CHECK-LABEL: scalar_to_vector.v4i16:
1110 ; CHECK-NEXT: fmov s0, w0
1112 %b = insertelement <4 x i16> undef, i16 %a, i32 0
1116 define <8 x i16> @scalar_to_vector.v8i16(i16 %a) {
1117 ; CHECK-LABEL: scalar_to_vector.v8i16:
1119 ; CHECK-NEXT: fmov s0, w0
1121 %b = insertelement <8 x i16> undef, i16 %a, i32 0
1125 define <2 x i32> @scalar_to_vector.v2i32(i32 %a) {
1126 ; CHECK-LABEL: scalar_to_vector.v2i32:
1128 ; CHECK-NEXT: fmov s0, w0
1130 %b = insertelement <2 x i32> undef, i32 %a, i32 0
1134 define <4 x i32> @scalar_to_vector.v4i32(i32 %a) {
1135 ; CHECK-LABEL: scalar_to_vector.v4i32:
1137 ; CHECK-NEXT: fmov s0, w0
1139 %b = insertelement <4 x i32> undef, i32 %a, i32 0
1143 define <2 x i64> @scalar_to_vector.v2i64(i64 %a) {
1144 ; CHECK-LABEL: scalar_to_vector.v2i64:
1146 ; CHECK-NEXT: fmov d0, x0
1148 %b = insertelement <2 x i64> undef, i64 %a, i32 0
1152 define <8 x i8> @testDUP.v1i8(<1 x i8> %a) {
1153 ; CHECK-LABEL: testDUP.v1i8:
1155 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
1156 ; CHECK-NEXT: dup v0.8b, v0.b[0]
1158 %b = extractelement <1 x i8> %a, i32 0
1159 %c = insertelement <8 x i8> undef, i8 %b, i32 0
1160 %d = insertelement <8 x i8> %c, i8 %b, i32 1
1161 %e = insertelement <8 x i8> %d, i8 %b, i32 2
1162 %f = insertelement <8 x i8> %e, i8 %b, i32 3
1163 %g = insertelement <8 x i8> %f, i8 %b, i32 4
1164 %h = insertelement <8 x i8> %g, i8 %b, i32 5
1165 %i = insertelement <8 x i8> %h, i8 %b, i32 6
1166 %j = insertelement <8 x i8> %i, i8 %b, i32 7
1170 define <8 x i16> @testDUP.v1i16(<1 x i16> %a) {
1171 ; CHECK-LABEL: testDUP.v1i16:
1173 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
1174 ; CHECK-NEXT: dup v0.8h, v0.h[0]
1176 %b = extractelement <1 x i16> %a, i32 0
1177 %c = insertelement <8 x i16> undef, i16 %b, i32 0
1178 %d = insertelement <8 x i16> %c, i16 %b, i32 1
1179 %e = insertelement <8 x i16> %d, i16 %b, i32 2
1180 %f = insertelement <8 x i16> %e, i16 %b, i32 3
1181 %g = insertelement <8 x i16> %f, i16 %b, i32 4
1182 %h = insertelement <8 x i16> %g, i16 %b, i32 5
1183 %i = insertelement <8 x i16> %h, i16 %b, i32 6
1184 %j = insertelement <8 x i16> %i, i16 %b, i32 7
1188 define <4 x i32> @testDUP.v1i32(<1 x i32> %a) {
1189 ; CHECK-LABEL: testDUP.v1i32:
1191 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
1192 ; CHECK-NEXT: dup v0.4s, v0.s[0]
1194 %b = extractelement <1 x i32> %a, i32 0
1195 %c = insertelement <4 x i32> undef, i32 %b, i32 0
1196 %d = insertelement <4 x i32> %c, i32 %b, i32 1
1197 %e = insertelement <4 x i32> %d, i32 %b, i32 2
1198 %f = insertelement <4 x i32> %e, i32 %b, i32 3
1202 define <8 x i8> @getl(<16 x i8> %x) #0 {
1203 ; CHECK-LABEL: getl:
1205 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
1207 %vecext = extractelement <16 x i8> %x, i32 0
1208 %vecinit = insertelement <8 x i8> undef, i8 %vecext, i32 0
1209 %vecext1 = extractelement <16 x i8> %x, i32 1
1210 %vecinit2 = insertelement <8 x i8> %vecinit, i8 %vecext1, i32 1
1211 %vecext3 = extractelement <16 x i8> %x, i32 2
1212 %vecinit4 = insertelement <8 x i8> %vecinit2, i8 %vecext3, i32 2
1213 %vecext5 = extractelement <16 x i8> %x, i32 3
1214 %vecinit6 = insertelement <8 x i8> %vecinit4, i8 %vecext5, i32 3
1215 %vecext7 = extractelement <16 x i8> %x, i32 4
1216 %vecinit8 = insertelement <8 x i8> %vecinit6, i8 %vecext7, i32 4
1217 %vecext9 = extractelement <16 x i8> %x, i32 5
1218 %vecinit10 = insertelement <8 x i8> %vecinit8, i8 %vecext9, i32 5
1219 %vecext11 = extractelement <16 x i8> %x, i32 6
1220 %vecinit12 = insertelement <8 x i8> %vecinit10, i8 %vecext11, i32 6
1221 %vecext13 = extractelement <16 x i8> %x, i32 7
1222 %vecinit14 = insertelement <8 x i8> %vecinit12, i8 %vecext13, i32 7
1223 ret <8 x i8> %vecinit14
1226 define <4 x i16> @test_extracts_inserts_varidx_extract(<8 x i16> %x, i32 %idx) {
1227 ; CHECK-LABEL: test_extracts_inserts_varidx_extract:
1229 ; CHECK-NEXT: .cfi_def_cfa_offset 16
1230 ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
1231 ; CHECK-NEXT: str q0, [sp, #-16]!
1232 ; CHECK-NEXT: and x8, x0, #0x7
1233 ; CHECK-NEXT: mov x9, sp
1234 ; CHECK-NEXT: bfi x9, x8, #1, #3
1235 ; CHECK-NEXT: ldr h1, [x9]
1236 ; CHECK-NEXT: mov v1.h[1], v0.h[1]
1237 ; CHECK-NEXT: mov v1.h[2], v0.h[2]
1238 ; CHECK-NEXT: mov v1.h[3], v0.h[3]
1239 ; CHECK-NEXT: fmov d0, d1
1240 ; CHECK-NEXT: add sp, sp, #16
1242 %tmp = extractelement <8 x i16> %x, i32 %idx
1243 %tmp2 = insertelement <4 x i16> undef, i16 %tmp, i32 0
1244 %tmp3 = extractelement <8 x i16> %x, i32 1
1245 %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 1
1246 %tmp5 = extractelement <8 x i16> %x, i32 2
1247 %tmp6 = insertelement <4 x i16> %tmp4, i16 %tmp5, i32 2
1248 %tmp7 = extractelement <8 x i16> %x, i32 3
1249 %tmp8 = insertelement <4 x i16> %tmp6, i16 %tmp7, i32 3
1253 define <4 x i16> @test_extracts_inserts_varidx_insert(<8 x i16> %x, i32 %idx) {
1254 ; CHECK-LABEL: test_extracts_inserts_varidx_insert:
1256 ; CHECK-NEXT: sub sp, sp, #16
1257 ; CHECK-NEXT: .cfi_def_cfa_offset 16
1258 ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
1259 ; CHECK-NEXT: and x8, x0, #0x3
1260 ; CHECK-NEXT: add x9, sp, #8
1261 ; CHECK-NEXT: bfi x9, x8, #1, #2
1262 ; CHECK-NEXT: str h0, [x9]
1263 ; CHECK-NEXT: ldr d1, [sp, #8]
1264 ; CHECK-NEXT: mov v1.h[1], v0.h[1]
1265 ; CHECK-NEXT: mov v1.h[2], v0.h[2]
1266 ; CHECK-NEXT: mov v1.h[3], v0.h[3]
1267 ; CHECK-NEXT: fmov d0, d1
1268 ; CHECK-NEXT: add sp, sp, #16
1270 %tmp = extractelement <8 x i16> %x, i32 0
1271 %tmp2 = insertelement <4 x i16> undef, i16 %tmp, i32 %idx
1272 %tmp3 = extractelement <8 x i16> %x, i32 1
1273 %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 1
1274 %tmp5 = extractelement <8 x i16> %x, i32 2
1275 %tmp6 = insertelement <4 x i16> %tmp4, i16 %tmp5, i32 2
1276 %tmp7 = extractelement <8 x i16> %x, i32 3
1277 %tmp8 = insertelement <4 x i16> %tmp6, i16 %tmp7, i32 3
1281 define <4 x i16> @test_dup_v2i32_v4i16(<2 x i32> %a) {
1282 ; CHECK-LABEL: test_dup_v2i32_v4i16:
1283 ; CHECK: // %bb.0: // %entry
1284 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
1285 ; CHECK-NEXT: dup v0.4h, v0.h[2]
1288 %x = extractelement <2 x i32> %a, i32 1
1289 %vget_lane = trunc i32 %x to i16
1290 %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0
1291 %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1
1292 %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2
1293 %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3
1294 ret <4 x i16> %vecinit3.i
1297 define <8 x i16> @test_dup_v4i32_v8i16(<4 x i32> %a) {
1298 ; CHECK-LABEL: test_dup_v4i32_v8i16:
1299 ; CHECK: // %bb.0: // %entry
1300 ; CHECK-NEXT: dup v0.8h, v0.h[6]
1303 %x = extractelement <4 x i32> %a, i32 3
1304 %vget_lane = trunc i32 %x to i16
1305 %vecinit.i = insertelement <8 x i16> undef, i16 %vget_lane, i32 0
1306 %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %vget_lane, i32 1
1307 %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %vget_lane, i32 2
1308 %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %vget_lane, i32 3
1309 %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %vget_lane, i32 4
1310 %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %vget_lane, i32 5
1311 %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %vget_lane, i32 6
1312 %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %vget_lane, i32 7
1313 ret <8 x i16> %vecinit7.i
1316 define <4 x i16> @test_dup_v1i64_v4i16(<1 x i64> %a) {
1317 ; CHECK-LABEL: test_dup_v1i64_v4i16:
1318 ; CHECK: // %bb.0: // %entry
1319 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
1320 ; CHECK-NEXT: dup v0.4h, v0.h[0]
1323 %x = extractelement <1 x i64> %a, i32 0
1324 %vget_lane = trunc i64 %x to i16
1325 %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0
1326 %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1
1327 %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2
1328 %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3
1329 ret <4 x i16> %vecinit3.i
1332 define <2 x i32> @test_dup_v1i64_v2i32(<1 x i64> %a) {
1333 ; CHECK-LABEL: test_dup_v1i64_v2i32:
1334 ; CHECK: // %bb.0: // %entry
1335 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
1336 ; CHECK-NEXT: dup v0.2s, v0.s[0]
1339 %x = extractelement <1 x i64> %a, i32 0
1340 %vget_lane = trunc i64 %x to i32
1341 %vecinit.i = insertelement <2 x i32> undef, i32 %vget_lane, i32 0
1342 %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %vget_lane, i32 1
1343 ret <2 x i32> %vecinit1.i
1346 define <8 x i16> @test_dup_v2i64_v8i16(<2 x i64> %a) {
1347 ; CHECK-LABEL: test_dup_v2i64_v8i16:
1348 ; CHECK: // %bb.0: // %entry
1349 ; CHECK-NEXT: dup v0.8h, v0.h[4]
1352 %x = extractelement <2 x i64> %a, i32 1
1353 %vget_lane = trunc i64 %x to i16
1354 %vecinit.i = insertelement <8 x i16> undef, i16 %vget_lane, i32 0
1355 %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %vget_lane, i32 1
1356 %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %vget_lane, i32 2
1357 %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %vget_lane, i32 3
1358 %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %vget_lane, i32 4
1359 %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %vget_lane, i32 5
1360 %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %vget_lane, i32 6
1361 %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %vget_lane, i32 7
1362 ret <8 x i16> %vecinit7.i
1365 define <4 x i32> @test_dup_v2i64_v4i32(<2 x i64> %a) {
1366 ; CHECK-LABEL: test_dup_v2i64_v4i32:
1367 ; CHECK: // %bb.0: // %entry
1368 ; CHECK-NEXT: dup v0.4s, v0.s[2]
1371 %x = extractelement <2 x i64> %a, i32 1
1372 %vget_lane = trunc i64 %x to i32
1373 %vecinit.i = insertelement <4 x i32> undef, i32 %vget_lane, i32 0
1374 %vecinit1.i = insertelement <4 x i32> %vecinit.i, i32 %vget_lane, i32 1
1375 %vecinit2.i = insertelement <4 x i32> %vecinit1.i, i32 %vget_lane, i32 2
1376 %vecinit3.i = insertelement <4 x i32> %vecinit2.i, i32 %vget_lane, i32 3
1377 ret <4 x i32> %vecinit3.i
1380 define <4 x i16> @test_dup_v4i32_v4i16(<4 x i32> %a) {
1381 ; CHECK-LABEL: test_dup_v4i32_v4i16:
1382 ; CHECK: // %bb.0: // %entry
1383 ; CHECK-NEXT: dup v0.4h, v0.h[2]
1386 %x = extractelement <4 x i32> %a, i32 1
1387 %vget_lane = trunc i32 %x to i16
1388 %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0
1389 %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1
1390 %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2
1391 %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3
1392 ret <4 x i16> %vecinit3.i
1395 define <4 x i16> @test_dup_v2i64_v4i16(<2 x i64> %a) {
1396 ; CHECK-LABEL: test_dup_v2i64_v4i16:
1397 ; CHECK: // %bb.0: // %entry
1398 ; CHECK-NEXT: dup v0.4h, v0.h[0]
1401 %x = extractelement <2 x i64> %a, i32 0
1402 %vget_lane = trunc i64 %x to i16
1403 %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0
1404 %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1
1405 %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2
1406 %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3
1407 ret <4 x i16> %vecinit3.i
1410 define <2 x i32> @test_dup_v2i64_v2i32(<2 x i64> %a) {
1411 ; CHECK-LABEL: test_dup_v2i64_v2i32:
1412 ; CHECK: // %bb.0: // %entry
1413 ; CHECK-NEXT: dup v0.2s, v0.s[0]
1416 %x = extractelement <2 x i64> %a, i32 0
1417 %vget_lane = trunc i64 %x to i32
1418 %vecinit.i = insertelement <2 x i32> undef, i32 %vget_lane, i32 0
1419 %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %vget_lane, i32 1
1420 ret <2 x i32> %vecinit1.i
1424 define <2 x float> @test_scalar_to_vector_f32_to_v2f32(<2 x float> %a) {
1425 ; CHECK-LABEL: test_scalar_to_vector_f32_to_v2f32:
1426 ; CHECK: // %bb.0: // %entry
1427 ; CHECK-NEXT: fmaxp s0, v0.2s
1430 %0 = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> %a)
1431 %1 = insertelement <1 x float> undef, float %0, i32 0
1432 %2 = extractelement <1 x float> %1, i32 0
1433 %vecinit1.i = insertelement <2 x float> undef, float %2, i32 0
1434 ret <2 x float> %vecinit1.i
1437 define <4 x float> @test_scalar_to_vector_f32_to_v4f32(<2 x float> %a) {
1438 ; CHECK-LABEL: test_scalar_to_vector_f32_to_v4f32:
1439 ; CHECK: // %bb.0: // %entry
1440 ; CHECK-NEXT: fmaxp s0, v0.2s
1443 %0 = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> %a)
1444 %1 = insertelement <1 x float> undef, float %0, i32 0
1445 %2 = extractelement <1 x float> %1, i32 0
1446 %vecinit1.i = insertelement <4 x float> undef, float %2, i32 0
1447 ret <4 x float> %vecinit1.i
1450 declare float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float>)
1452 define <2 x i32> @test_concat_undef_v1i32(<2 x i32> %a) {
1453 ; CHECK-LABEL: test_concat_undef_v1i32:
1454 ; CHECK: // %bb.0: // %entry
1455 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
1456 ; CHECK-NEXT: dup v0.2s, v0.s[0]
1459 %0 = extractelement <2 x i32> %a, i32 0
1460 %vecinit1.i = insertelement <2 x i32> undef, i32 %0, i32 1
1461 ret <2 x i32> %vecinit1.i
1464 declare i32 @llvm.aarch64.neon.sqabs.i32(i32) #4
1466 define <2 x i32> @test_concat_v1i32_undef(i32 %a) {
1467 ; CHECK-LABEL: test_concat_v1i32_undef:
1468 ; CHECK: // %bb.0: // %entry
1469 ; CHECK-NEXT: fmov s0, w0
1470 ; CHECK-NEXT: sqabs s0, s0
1473 %b = tail call i32 @llvm.aarch64.neon.sqabs.i32(i32 %a)
1474 %vecinit.i432 = insertelement <2 x i32> undef, i32 %b, i32 0
1475 ret <2 x i32> %vecinit.i432
1478 define <2 x i32> @test_concat_same_v1i32_v1i32(<2 x i32> %a) {
1479 ; CHECK-LABEL: test_concat_same_v1i32_v1i32:
1480 ; CHECK: // %bb.0: // %entry
1481 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
1482 ; CHECK-NEXT: dup v0.2s, v0.s[0]
1485 %0 = extractelement <2 x i32> %a, i32 0
1486 %vecinit.i = insertelement <2 x i32> undef, i32 %0, i32 0
1487 %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %0, i32 1
1488 ret <2 x i32> %vecinit1.i
1491 define <2 x i32> @test_concat_diff_v1i32_v1i32(i32 %a, i32 %b) {
1492 ; CHECK-LABEL: test_concat_diff_v1i32_v1i32:
1493 ; CHECK: // %bb.0: // %entry
1494 ; CHECK-NEXT: fmov s1, w1
1495 ; CHECK-NEXT: fmov s0, w0
1496 ; CHECK-NEXT: sqabs s1, s1
1497 ; CHECK-NEXT: sqabs s0, s0
1498 ; CHECK-NEXT: fmov w8, s1
1499 ; CHECK-NEXT: mov v0.s[1], w8
1500 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
1503 %c = tail call i32 @llvm.aarch64.neon.sqabs.i32(i32 %a)
1504 %d = insertelement <2 x i32> undef, i32 %c, i32 0
1505 %e = tail call i32 @llvm.aarch64.neon.sqabs.i32(i32 %b)
1506 %f = insertelement <2 x i32> undef, i32 %e, i32 0
1507 %h = shufflevector <2 x i32> %d, <2 x i32> %f, <2 x i32> <i32 0, i32 2>
1511 define <16 x i8> @test_concat_v16i8_v16i8_v16i8(<16 x i8> %x, <16 x i8> %y) #0 {
1512 ; CHECK-LABEL: test_concat_v16i8_v16i8_v16i8:
1513 ; CHECK: // %bb.0: // %entry
1514 ; CHECK-NEXT: mov v0.d[1], v1.d[0]
1517 %vecinit30 = shufflevector <16 x i8> %x, <16 x i8> %y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
1518 ret <16 x i8> %vecinit30
1521 define <16 x i8> @test_concat_v16i8_v8i8_v16i8(<8 x i8> %x, <16 x i8> %y) #0 {
1522 ; CHECK-LABEL: test_concat_v16i8_v8i8_v16i8:
1523 ; CHECK: // %bb.0: // %entry
1524 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
1525 ; CHECK-NEXT: mov v0.d[1], v1.d[0]
1528 %vecext = extractelement <8 x i8> %x, i32 0
1529 %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0
1530 %vecext1 = extractelement <8 x i8> %x, i32 1
1531 %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1
1532 %vecext3 = extractelement <8 x i8> %x, i32 2
1533 %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2
1534 %vecext5 = extractelement <8 x i8> %x, i32 3
1535 %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3
1536 %vecext7 = extractelement <8 x i8> %x, i32 4
1537 %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4
1538 %vecext9 = extractelement <8 x i8> %x, i32 5
1539 %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5
1540 %vecext11 = extractelement <8 x i8> %x, i32 6
1541 %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6
1542 %vecext13 = extractelement <8 x i8> %x, i32 7
1543 %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7
1544 %vecinit30 = shufflevector <16 x i8> %vecinit14, <16 x i8> %y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
1545 ret <16 x i8> %vecinit30
1548 define <16 x i8> @test_concat_v16i8_v16i8_v8i8(<16 x i8> %x, <8 x i8> %y) #0 {
1549 ; CHECK-LABEL: test_concat_v16i8_v16i8_v8i8:
1550 ; CHECK: // %bb.0: // %entry
1551 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
1552 ; CHECK-NEXT: mov v0.d[1], v1.d[0]
1555 %vecext = extractelement <16 x i8> %x, i32 0
1556 %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0
1557 %vecext1 = extractelement <16 x i8> %x, i32 1
1558 %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1
1559 %vecext3 = extractelement <16 x i8> %x, i32 2
1560 %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2
1561 %vecext5 = extractelement <16 x i8> %x, i32 3
1562 %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3
1563 %vecext7 = extractelement <16 x i8> %x, i32 4
1564 %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4
1565 %vecext9 = extractelement <16 x i8> %x, i32 5
1566 %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5
1567 %vecext11 = extractelement <16 x i8> %x, i32 6
1568 %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6
1569 %vecext13 = extractelement <16 x i8> %x, i32 7
1570 %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7
1571 %vecext15 = extractelement <8 x i8> %y, i32 0
1572 %vecinit16 = insertelement <16 x i8> %vecinit14, i8 %vecext15, i32 8
1573 %vecext17 = extractelement <8 x i8> %y, i32 1
1574 %vecinit18 = insertelement <16 x i8> %vecinit16, i8 %vecext17, i32 9
1575 %vecext19 = extractelement <8 x i8> %y, i32 2
1576 %vecinit20 = insertelement <16 x i8> %vecinit18, i8 %vecext19, i32 10
1577 %vecext21 = extractelement <8 x i8> %y, i32 3
1578 %vecinit22 = insertelement <16 x i8> %vecinit20, i8 %vecext21, i32 11
1579 %vecext23 = extractelement <8 x i8> %y, i32 4
1580 %vecinit24 = insertelement <16 x i8> %vecinit22, i8 %vecext23, i32 12
1581 %vecext25 = extractelement <8 x i8> %y, i32 5
1582 %vecinit26 = insertelement <16 x i8> %vecinit24, i8 %vecext25, i32 13
1583 %vecext27 = extractelement <8 x i8> %y, i32 6
1584 %vecinit28 = insertelement <16 x i8> %vecinit26, i8 %vecext27, i32 14
1585 %vecext29 = extractelement <8 x i8> %y, i32 7
1586 %vecinit30 = insertelement <16 x i8> %vecinit28, i8 %vecext29, i32 15
1587 ret <16 x i8> %vecinit30
1590 define <16 x i8> @test_concat_v16i8_v8i8_v8i8(<8 x i8> %x, <8 x i8> %y) #0 {
1591 ; CHECK-LABEL: test_concat_v16i8_v8i8_v8i8:
1592 ; CHECK: // %bb.0: // %entry
1593 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
1594 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
1595 ; CHECK-NEXT: mov v0.d[1], v1.d[0]
1598 %vecext = extractelement <8 x i8> %x, i32 0
1599 %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0
1600 %vecext1 = extractelement <8 x i8> %x, i32 1
1601 %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1
1602 %vecext3 = extractelement <8 x i8> %x, i32 2
1603 %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2
1604 %vecext5 = extractelement <8 x i8> %x, i32 3
1605 %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3
1606 %vecext7 = extractelement <8 x i8> %x, i32 4
1607 %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4
1608 %vecext9 = extractelement <8 x i8> %x, i32 5
1609 %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5
1610 %vecext11 = extractelement <8 x i8> %x, i32 6
1611 %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6
1612 %vecext13 = extractelement <8 x i8> %x, i32 7
1613 %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7
1614 %vecext15 = extractelement <8 x i8> %y, i32 0
1615 %vecinit16 = insertelement <16 x i8> %vecinit14, i8 %vecext15, i32 8
1616 %vecext17 = extractelement <8 x i8> %y, i32 1
1617 %vecinit18 = insertelement <16 x i8> %vecinit16, i8 %vecext17, i32 9
1618 %vecext19 = extractelement <8 x i8> %y, i32 2
1619 %vecinit20 = insertelement <16 x i8> %vecinit18, i8 %vecext19, i32 10
1620 %vecext21 = extractelement <8 x i8> %y, i32 3
1621 %vecinit22 = insertelement <16 x i8> %vecinit20, i8 %vecext21, i32 11
1622 %vecext23 = extractelement <8 x i8> %y, i32 4
1623 %vecinit24 = insertelement <16 x i8> %vecinit22, i8 %vecext23, i32 12
1624 %vecext25 = extractelement <8 x i8> %y, i32 5
1625 %vecinit26 = insertelement <16 x i8> %vecinit24, i8 %vecext25, i32 13
1626 %vecext27 = extractelement <8 x i8> %y, i32 6
1627 %vecinit28 = insertelement <16 x i8> %vecinit26, i8 %vecext27, i32 14
1628 %vecext29 = extractelement <8 x i8> %y, i32 7
1629 %vecinit30 = insertelement <16 x i8> %vecinit28, i8 %vecext29, i32 15
1630 ret <16 x i8> %vecinit30
1633 define <8 x i16> @test_concat_v8i16_v8i16_v8i16(<8 x i16> %x, <8 x i16> %y) #0 {
1634 ; CHECK-LABEL: test_concat_v8i16_v8i16_v8i16:
1635 ; CHECK: // %bb.0: // %entry
1636 ; CHECK-NEXT: mov v0.d[1], v1.d[0]
1639 %vecinit14 = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
1640 ret <8 x i16> %vecinit14
1643 define <8 x i16> @test_concat_v8i16_v4i16_v8i16(<4 x i16> %x, <8 x i16> %y) #0 {
1644 ; CHECK-LABEL: test_concat_v8i16_v4i16_v8i16:
1645 ; CHECK: // %bb.0: // %entry
1646 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
1647 ; CHECK-NEXT: mov v0.d[1], v1.d[0]
1650 %vecext = extractelement <4 x i16> %x, i32 0
1651 %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0
1652 %vecext1 = extractelement <4 x i16> %x, i32 1
1653 %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1
1654 %vecext3 = extractelement <4 x i16> %x, i32 2
1655 %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2
1656 %vecext5 = extractelement <4 x i16> %x, i32 3
1657 %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3
1658 %vecinit14 = shufflevector <8 x i16> %vecinit6, <8 x i16> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
1659 ret <8 x i16> %vecinit14
1662 define <8 x i16> @test_concat_v8i16_v8i16_v4i16(<8 x i16> %x, <4 x i16> %y) #0 {
1663 ; CHECK-LABEL: test_concat_v8i16_v8i16_v4i16:
1664 ; CHECK: // %bb.0: // %entry
1665 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
1666 ; CHECK-NEXT: mov v0.d[1], v1.d[0]
1669 %vecext = extractelement <8 x i16> %x, i32 0
1670 %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0
1671 %vecext1 = extractelement <8 x i16> %x, i32 1
1672 %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1
1673 %vecext3 = extractelement <8 x i16> %x, i32 2
1674 %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2
1675 %vecext5 = extractelement <8 x i16> %x, i32 3
1676 %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3
1677 %vecext7 = extractelement <4 x i16> %y, i32 0
1678 %vecinit8 = insertelement <8 x i16> %vecinit6, i16 %vecext7, i32 4
1679 %vecext9 = extractelement <4 x i16> %y, i32 1
1680 %vecinit10 = insertelement <8 x i16> %vecinit8, i16 %vecext9, i32 5
1681 %vecext11 = extractelement <4 x i16> %y, i32 2
1682 %vecinit12 = insertelement <8 x i16> %vecinit10, i16 %vecext11, i32 6
1683 %vecext13 = extractelement <4 x i16> %y, i32 3
1684 %vecinit14 = insertelement <8 x i16> %vecinit12, i16 %vecext13, i32 7
1685 ret <8 x i16> %vecinit14
1688 define <8 x i16> @test_concat_v8i16_v4i16_v4i16(<4 x i16> %x, <4 x i16> %y) #0 {
1689 ; CHECK-LABEL: test_concat_v8i16_v4i16_v4i16:
1690 ; CHECK: // %bb.0: // %entry
1691 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
1692 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
1693 ; CHECK-NEXT: mov v0.d[1], v1.d[0]
1696 %vecext = extractelement <4 x i16> %x, i32 0
1697 %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0
1698 %vecext1 = extractelement <4 x i16> %x, i32 1
1699 %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1
1700 %vecext3 = extractelement <4 x i16> %x, i32 2
1701 %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2
1702 %vecext5 = extractelement <4 x i16> %x, i32 3
1703 %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3
1704 %vecext7 = extractelement <4 x i16> %y, i32 0
1705 %vecinit8 = insertelement <8 x i16> %vecinit6, i16 %vecext7, i32 4
1706 %vecext9 = extractelement <4 x i16> %y, i32 1
1707 %vecinit10 = insertelement <8 x i16> %vecinit8, i16 %vecext9, i32 5
1708 %vecext11 = extractelement <4 x i16> %y, i32 2
1709 %vecinit12 = insertelement <8 x i16> %vecinit10, i16 %vecext11, i32 6
1710 %vecext13 = extractelement <4 x i16> %y, i32 3
1711 %vecinit14 = insertelement <8 x i16> %vecinit12, i16 %vecext13, i32 7
1712 ret <8 x i16> %vecinit14
1715 define <4 x i32> @test_concat_v4i32_v4i32_v4i32(<4 x i32> %x, <4 x i32> %y) #0 {
1716 ; CHECK-LABEL: test_concat_v4i32_v4i32_v4i32:
1717 ; CHECK: // %bb.0: // %entry
1718 ; CHECK-NEXT: mov v0.d[1], v1.d[0]
1721 %vecinit6 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
1722 ret <4 x i32> %vecinit6
1725 define <4 x i32> @test_concat_v4i32_v2i32_v4i32(<2 x i32> %x, <4 x i32> %y) #0 {
1726 ; CHECK-LABEL: test_concat_v4i32_v2i32_v4i32:
1727 ; CHECK: // %bb.0: // %entry
1728 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
1729 ; CHECK-NEXT: mov v0.d[1], v1.d[0]
1732 %vecext = extractelement <2 x i32> %x, i32 0
1733 %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
1734 %vecext1 = extractelement <2 x i32> %x, i32 1
1735 %vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1
1736 %vecinit6 = shufflevector <4 x i32> %vecinit2, <4 x i32> %y, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
1737 ret <4 x i32> %vecinit6
1740 define <4 x i32> @test_concat_v4i32_v4i32_v2i32(<4 x i32> %x, <2 x i32> %y) #0 {
1741 ; CHECK-LABEL: test_concat_v4i32_v4i32_v2i32:
1742 ; CHECK: // %bb.0: // %entry
1743 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
1744 ; CHECK-NEXT: mov v0.d[1], v1.d[0]
1747 %vecext = extractelement <4 x i32> %x, i32 0
1748 %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
1749 %vecext1 = extractelement <4 x i32> %x, i32 1
1750 %vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1
1751 %vecext3 = extractelement <2 x i32> %y, i32 0
1752 %vecinit4 = insertelement <4 x i32> %vecinit2, i32 %vecext3, i32 2
1753 %vecext5 = extractelement <2 x i32> %y, i32 1
1754 %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %vecext5, i32 3
1755 ret <4 x i32> %vecinit6
1758 define <4 x i32> @test_concat_v4i32_v2i32_v2i32(<2 x i32> %x, <2 x i32> %y) #0 {
1759 ; CHECK-LABEL: test_concat_v4i32_v2i32_v2i32:
1760 ; CHECK: // %bb.0: // %entry
1761 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
1762 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
1763 ; CHECK-NEXT: mov v0.d[1], v1.d[0]
1766 %vecinit6 = shufflevector <2 x i32> %x, <2 x i32> %y, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1767 ret <4 x i32> %vecinit6
1770 define <2 x i64> @test_concat_v2i64_v2i64_v2i64(<2 x i64> %x, <2 x i64> %y) #0 {
1771 ; CHECK-LABEL: test_concat_v2i64_v2i64_v2i64:
1772 ; CHECK: // %bb.0: // %entry
1773 ; CHECK-NEXT: zip1 v0.2d, v0.2d, v1.2d
1776 %vecinit2 = shufflevector <2 x i64> %x, <2 x i64> %y, <2 x i32> <i32 0, i32 2>
1777 ret <2 x i64> %vecinit2
1780 define <2 x i64> @test_concat_v2i64_v1i64_v2i64(<1 x i64> %x, <2 x i64> %y) #0 {
1781 ; CHECK-LABEL: test_concat_v2i64_v1i64_v2i64:
1782 ; CHECK: // %bb.0: // %entry
1783 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
1784 ; CHECK-NEXT: zip1 v0.2d, v0.2d, v1.2d
1787 %vecext = extractelement <1 x i64> %x, i32 0
1788 %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0
1789 %vecinit2 = shufflevector <2 x i64> %vecinit, <2 x i64> %y, <2 x i32> <i32 0, i32 2>
1790 ret <2 x i64> %vecinit2
1793 define <2 x i64> @test_concat_v2i64_v2i64_v1i64(<2 x i64> %x, <1 x i64> %y) #0 {
1794 ; CHECK-LABEL: test_concat_v2i64_v2i64_v1i64:
1795 ; CHECK: // %bb.0: // %entry
1796 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
1797 ; CHECK-NEXT: mov v0.d[1], v1.d[0]
1800 %vecext = extractelement <2 x i64> %x, i32 0
1801 %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0
1802 %vecext1 = extractelement <1 x i64> %y, i32 0
1803 %vecinit2 = insertelement <2 x i64> %vecinit, i64 %vecext1, i32 1
1804 ret <2 x i64> %vecinit2
1807 define <2 x i64> @test_concat_v2i64_v1i64_v1i64(<1 x i64> %x, <1 x i64> %y) #0 {
1808 ; CHECK-LABEL: test_concat_v2i64_v1i64_v1i64:
1809 ; CHECK: // %bb.0: // %entry
1810 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
1811 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
1812 ; CHECK-NEXT: mov v0.d[1], v1.d[0]
1815 %vecext = extractelement <1 x i64> %x, i32 0
1816 %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0
1817 %vecext1 = extractelement <1 x i64> %y, i32 0
1818 %vecinit2 = insertelement <2 x i64> %vecinit, i64 %vecext1, i32 1
1819 ret <2 x i64> %vecinit2
1823 define <4 x i16> @concat_vector_v4i16_const() {
1824 ; CHECK-LABEL: concat_vector_v4i16_const:
1826 ; CHECK-NEXT: movi v0.2d, #0000000000000000
1828 %r = shufflevector <1 x i16> zeroinitializer, <1 x i16> undef, <4 x i32> zeroinitializer
1832 define <4 x i16> @concat_vector_v4i16_const_one() {
1833 ; CHECK-LABEL: concat_vector_v4i16_const_one:
1835 ; CHECK-NEXT: movi v0.4h, #1
1837 %r = shufflevector <1 x i16> <i16 1>, <1 x i16> undef, <4 x i32> zeroinitializer
1841 define <4 x i32> @concat_vector_v4i32_const() {
1842 ; CHECK-LABEL: concat_vector_v4i32_const:
1844 ; CHECK-NEXT: movi v0.2d, #0000000000000000
1846 %r = shufflevector <1 x i32> zeroinitializer, <1 x i32> undef, <4 x i32> zeroinitializer
1850 define <8 x i8> @concat_vector_v8i8_const() {
1851 ; CHECK-LABEL: concat_vector_v8i8_const:
1853 ; CHECK-NEXT: movi v0.2d, #0000000000000000
1855 %r = shufflevector <1 x i8> zeroinitializer, <1 x i8> undef, <8 x i32> zeroinitializer
1859 define <8 x i16> @concat_vector_v8i16_const() {
1860 ; CHECK-LABEL: concat_vector_v8i16_const:
1862 ; CHECK-NEXT: movi v0.2d, #0000000000000000
1864 %r = shufflevector <1 x i16> zeroinitializer, <1 x i16> undef, <8 x i32> zeroinitializer
1868 define <8 x i16> @concat_vector_v8i16_const_one() {
1869 ; CHECK-LABEL: concat_vector_v8i16_const_one:
1871 ; CHECK-NEXT: movi v0.8h, #1
1873 %r = shufflevector <1 x i16> <i16 1>, <1 x i16> undef, <8 x i32> zeroinitializer
1877 define <16 x i8> @concat_vector_v16i8_const() {
1878 ; CHECK-LABEL: concat_vector_v16i8_const:
1880 ; CHECK-NEXT: movi v0.2d, #0000000000000000
1882 %r = shufflevector <1 x i8> zeroinitializer, <1 x i8> undef, <16 x i32> zeroinitializer
1886 define <4 x i16> @concat_vector_v4i16(<1 x i16> %a) {
1887 ; CHECK-LABEL: concat_vector_v4i16:
1889 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
1890 ; CHECK-NEXT: dup v0.4h, v0.h[0]
1892 %r = shufflevector <1 x i16> %a, <1 x i16> undef, <4 x i32> zeroinitializer
1896 define <4 x i32> @concat_vector_v4i32(<1 x i32> %a) {
1897 ; CHECK-LABEL: concat_vector_v4i32:
1899 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
1900 ; CHECK-NEXT: dup v0.4s, v0.s[0]
1902 %r = shufflevector <1 x i32> %a, <1 x i32> undef, <4 x i32> zeroinitializer
1906 define <8 x i8> @concat_vector_v8i8(<1 x i8> %a) {
1907 ; CHECK-LABEL: concat_vector_v8i8:
1909 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
1910 ; CHECK-NEXT: dup v0.8b, v0.b[0]
1912 %r = shufflevector <1 x i8> %a, <1 x i8> undef, <8 x i32> zeroinitializer
1916 define <8 x i16> @concat_vector_v8i16(<1 x i16> %a) {
1917 ; CHECK-LABEL: concat_vector_v8i16:
1919 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
1920 ; CHECK-NEXT: dup v0.8h, v0.h[0]
1922 %r = shufflevector <1 x i16> %a, <1 x i16> undef, <8 x i32> zeroinitializer
1926 define <16 x i8> @concat_vector_v16i8(<1 x i8> %a) {
1927 ; CHECK-LABEL: concat_vector_v16i8:
1929 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
1930 ; CHECK-NEXT: dup v0.16b, v0.b[0]
1932 %r = shufflevector <1 x i8> %a, <1 x i8> undef, <16 x i32> zeroinitializer