1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=armv7a-none-eabihf -mattr=+neon -verify-machineinstrs | FileCheck %s
4 define <16 x i8> @ins16bw(<16 x i8> %tmp1, i8 %tmp2) {
5 ; CHECK-LABEL: ins16bw:
7 ; CHECK-NEXT: vmov.8 d1[7], r0
9 %tmp3 = insertelement <16 x i8> %tmp1, i8 %tmp2, i32 15
13 define <8 x i16> @ins8hw(<8 x i16> %tmp1, i16 %tmp2) {
14 ; CHECK-LABEL: ins8hw:
16 ; CHECK-NEXT: vmov.16 d1[2], r0
18 %tmp3 = insertelement <8 x i16> %tmp1, i16 %tmp2, i32 6
22 define <4 x i32> @ins4sw(<4 x i32> %tmp1, i32 %tmp2) {
23 ; CHECK-LABEL: ins4sw:
25 ; CHECK-NEXT: vmov.32 d1[0], r0
27 %tmp3 = insertelement <4 x i32> %tmp1, i32 %tmp2, i32 2
31 define <2 x i64> @ins2dw(<2 x i64> %tmp1, i64 %tmp2) {
32 ; CHECK-LABEL: ins2dw:
34 ; CHECK-NEXT: vmov.32 d1[0], r0
35 ; CHECK-NEXT: vmov.32 d1[1], r1
37 %tmp3 = insertelement <2 x i64> %tmp1, i64 %tmp2, i32 1
41 define <8 x i8> @ins8bw(<8 x i8> %tmp1, i8 %tmp2) {
42 ; CHECK-LABEL: ins8bw:
44 ; CHECK-NEXT: vmov.8 d0[5], r0
46 %tmp3 = insertelement <8 x i8> %tmp1, i8 %tmp2, i32 5
50 define <4 x i16> @ins4hw(<4 x i16> %tmp1, i16 %tmp2) {
51 ; CHECK-LABEL: ins4hw:
53 ; CHECK-NEXT: vmov.16 d0[3], r0
55 %tmp3 = insertelement <4 x i16> %tmp1, i16 %tmp2, i32 3
59 define <2 x i32> @ins2sw(<2 x i32> %tmp1, i32 %tmp2) {
60 ; CHECK-LABEL: ins2sw:
62 ; CHECK-NEXT: vmov.32 d0[1], r0
64 %tmp3 = insertelement <2 x i32> %tmp1, i32 %tmp2, i32 1
68 define <16 x i8> @ins16b16(<16 x i8> %tmp1, <16 x i8> %tmp2) {
69 ; CHECK-LABEL: ins16b16:
71 ; CHECK-NEXT: vmov.u8 r0, d0[2]
72 ; CHECK-NEXT: vmov.8 d3[7], r0
73 ; CHECK-NEXT: vorr q0, q1, q1
75 %tmp3 = extractelement <16 x i8> %tmp1, i32 2
76 %tmp4 = insertelement <16 x i8> %tmp2, i8 %tmp3, i32 15
80 define <8 x i16> @ins8h8(<8 x i16> %tmp1, <8 x i16> %tmp2) {
81 ; CHECK-LABEL: ins8h8:
83 ; CHECK-NEXT: vmov.u16 r0, d0[2]
84 ; CHECK-NEXT: vmov.16 d3[3], r0
85 ; CHECK-NEXT: vorr q0, q1, q1
87 %tmp3 = extractelement <8 x i16> %tmp1, i32 2
88 %tmp4 = insertelement <8 x i16> %tmp2, i16 %tmp3, i32 7
92 define <4 x i32> @ins4s4(<4 x i32> %tmp1, <4 x i32> %tmp2) {
93 ; CHECK-LABEL: ins4s4:
95 ; CHECK-NEXT: vmov.32 r0, d1[0]
96 ; CHECK-NEXT: vmov.32 d2[1], r0
97 ; CHECK-NEXT: vorr q0, q1, q1
99 %tmp3 = extractelement <4 x i32> %tmp1, i32 2
100 %tmp4 = insertelement <4 x i32> %tmp2, i32 %tmp3, i32 1
104 define <2 x i64> @ins2d2(<2 x i64> %tmp1, <2 x i64> %tmp2) {
105 ; CHECK-LABEL: ins2d2:
107 ; CHECK-NEXT: vmov r0, r1, d0
108 ; CHECK-NEXT: vmov.32 d3[0], r0
109 ; CHECK-NEXT: vmov.32 d3[1], r1
110 ; CHECK-NEXT: vorr q0, q1, q1
112 %tmp3 = extractelement <2 x i64> %tmp1, i32 0
113 %tmp4 = insertelement <2 x i64> %tmp2, i64 %tmp3, i32 1
117 define <4 x float> @ins4f4(<4 x float> %tmp1, <4 x float> %tmp2) {
118 ; CHECK-LABEL: ins4f4:
120 ; CHECK-NEXT: vmov.f32 s5, s2
121 ; CHECK-NEXT: vorr q0, q1, q1
123 %tmp3 = extractelement <4 x float> %tmp1, i32 2
124 %tmp4 = insertelement <4 x float> %tmp2, float %tmp3, i32 1
125 ret <4 x float> %tmp4
128 define <2 x double> @ins2df2(<2 x double> %tmp1, <2 x double> %tmp2) {
129 ; CHECK-LABEL: ins2df2:
131 ; CHECK-NEXT: vorr d3, d0, d0
132 ; CHECK-NEXT: vorr q0, q1, q1
134 %tmp3 = extractelement <2 x double> %tmp1, i32 0
135 %tmp4 = insertelement <2 x double> %tmp2, double %tmp3, i32 1
136 ret <2 x double> %tmp4
139 define <16 x i8> @ins8b16(<8 x i8> %tmp1, <16 x i8> %tmp2) {
140 ; CHECK-LABEL: ins8b16:
142 ; CHECK-NEXT: vmov.u8 r0, d0[2]
143 ; CHECK-NEXT: vmov.8 d3[7], r0
144 ; CHECK-NEXT: vorr q0, q1, q1
146 %tmp3 = extractelement <8 x i8> %tmp1, i32 2
147 %tmp4 = insertelement <16 x i8> %tmp2, i8 %tmp3, i32 15
151 define <8 x i16> @ins4h8(<4 x i16> %tmp1, <8 x i16> %tmp2) {
152 ; CHECK-LABEL: ins4h8:
154 ; CHECK-NEXT: vmov.u16 r0, d0[2]
155 ; CHECK-NEXT: vmov.16 d3[3], r0
156 ; CHECK-NEXT: vorr q0, q1, q1
158 %tmp3 = extractelement <4 x i16> %tmp1, i32 2
159 %tmp4 = insertelement <8 x i16> %tmp2, i16 %tmp3, i32 7
163 define <4 x i32> @ins2s4(<2 x i32> %tmp1, <4 x i32> %tmp2) {
164 ; CHECK-LABEL: ins2s4:
166 ; CHECK-NEXT: vmov.32 r0, d0[1]
167 ; CHECK-NEXT: vmov.32 d2[1], r0
168 ; CHECK-NEXT: vorr q0, q1, q1
170 %tmp3 = extractelement <2 x i32> %tmp1, i32 1
171 %tmp4 = insertelement <4 x i32> %tmp2, i32 %tmp3, i32 1
175 define <2 x i64> @ins1d2(<1 x i64> %tmp1, <2 x i64> %tmp2) {
176 ; CHECK-LABEL: ins1d2:
178 ; CHECK-NEXT: vmov.32 r0, d0[0]
179 ; CHECK-NEXT: vmov.32 r1, d0[1]
180 ; CHECK-NEXT: vmov.32 d3[0], r0
181 ; CHECK-NEXT: vmov.32 d3[1], r1
182 ; CHECK-NEXT: vorr q0, q1, q1
184 %tmp3 = extractelement <1 x i64> %tmp1, i32 0
185 %tmp4 = insertelement <2 x i64> %tmp2, i64 %tmp3, i32 1
189 define <4 x float> @ins2f4(<2 x float> %tmp1, <4 x float> %tmp2) {
190 ; CHECK-LABEL: ins2f4:
192 ; CHECK-NEXT: vmov.f32 s5, s1
193 ; CHECK-NEXT: vorr q0, q1, q1
195 %tmp3 = extractelement <2 x float> %tmp1, i32 1
196 %tmp4 = insertelement <4 x float> %tmp2, float %tmp3, i32 1
197 ret <4 x float> %tmp4
200 define <2 x double> @ins1f2(<1 x double> %tmp1, <2 x double> %tmp2) {
201 ; CHECK-LABEL: ins1f2:
203 ; CHECK-NEXT: vorr d3, d0, d0
204 ; CHECK-NEXT: vorr q0, q1, q1
206 %tmp3 = extractelement <1 x double> %tmp1, i32 0
207 %tmp4 = insertelement <2 x double> %tmp2, double %tmp3, i32 1
208 ret <2 x double> %tmp4
211 define <2 x double> @ins1f2_args_flipped(<2 x double> %tmp2, <1 x double> %tmp1) {
212 ; CHECK-LABEL: ins1f2_args_flipped:
214 ; CHECK-NEXT: vmov.f64 d1, d2
216 %tmp3 = extractelement <1 x double> %tmp1, i32 0
217 %tmp4 = insertelement <2 x double> %tmp2, double %tmp3, i32 1
218 ret <2 x double> %tmp4
221 define <8 x i8> @ins16b8(<16 x i8> %tmp1, <8 x i8> %tmp2) {
222 ; CHECK-LABEL: ins16b8:
224 ; CHECK-NEXT: vmov.u8 r0, d0[2]
225 ; CHECK-NEXT: vmov.8 d2[7], r0
226 ; CHECK-NEXT: vorr d0, d2, d2
228 %tmp3 = extractelement <16 x i8> %tmp1, i32 2
229 %tmp4 = insertelement <8 x i8> %tmp2, i8 %tmp3, i32 7
233 define <4 x i16> @ins8h4(<8 x i16> %tmp1, <4 x i16> %tmp2) {
234 ; CHECK-LABEL: ins8h4:
236 ; CHECK-NEXT: vmov.u16 r0, d0[2]
237 ; CHECK-NEXT: vmov.16 d2[3], r0
238 ; CHECK-NEXT: vorr d0, d2, d2
240 %tmp3 = extractelement <8 x i16> %tmp1, i32 2
241 %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 3
245 define <2 x i32> @ins4s2(<4 x i32> %tmp1, <2 x i32> %tmp2) {
246 ; CHECK-LABEL: ins4s2:
248 ; CHECK-NEXT: vmov.32 r0, d1[0]
249 ; CHECK-NEXT: vmov.32 d2[1], r0
250 ; CHECK-NEXT: vorr d0, d2, d2
252 %tmp3 = extractelement <4 x i32> %tmp1, i32 2
253 %tmp4 = insertelement <2 x i32> %tmp2, i32 %tmp3, i32 1
257 define <1 x i64> @ins2d1(<2 x i64> %tmp1, <1 x i64> %tmp2) {
258 ; CHECK-LABEL: ins2d1:
260 ; CHECK-NEXT: vmov r0, r1, d0
261 ; CHECK-NEXT: vmov.32 d2[0], r0
262 ; CHECK-NEXT: vmov.32 d2[1], r1
263 ; CHECK-NEXT: vorr d0, d2, d2
265 %tmp3 = extractelement <2 x i64> %tmp1, i32 0
266 %tmp4 = insertelement <1 x i64> %tmp2, i64 %tmp3, i32 0
270 define <2 x float> @ins4f2(<4 x float> %tmp1, <2 x float> %tmp2) {
271 ; CHECK-LABEL: ins4f2:
273 ; CHECK-NEXT: vmov.f32 s5, s2
274 ; CHECK-NEXT: vmov.f64 d0, d2
276 %tmp3 = extractelement <4 x float> %tmp1, i32 2
277 %tmp4 = insertelement <2 x float> %tmp2, float %tmp3, i32 1
278 ret <2 x float> %tmp4
281 define <1 x double> @ins2f1(<2 x double> %tmp1, <1 x double> %tmp2) {
282 ; CHECK-LABEL: ins2f1:
284 ; CHECK-NEXT: vmov.f64 d0, d1
286 %tmp3 = extractelement <2 x double> %tmp1, i32 1
287 %tmp4 = insertelement <1 x double> %tmp2, double %tmp3, i32 0
288 ret <1 x double> %tmp4
291 define <8 x i8> @ins8b8(<8 x i8> %tmp1, <8 x i8> %tmp2) {
292 ; CHECK-LABEL: ins8b8:
294 ; CHECK-NEXT: vmov.u8 r0, d0[2]
295 ; CHECK-NEXT: vmov.8 d1[4], r0
296 ; CHECK-NEXT: vorr d0, d1, d1
298 %tmp3 = extractelement <8 x i8> %tmp1, i32 2
299 %tmp4 = insertelement <8 x i8> %tmp2, i8 %tmp3, i32 4
303 define <4 x i16> @ins4h4(<4 x i16> %tmp1, <4 x i16> %tmp2) {
304 ; CHECK-LABEL: ins4h4:
306 ; CHECK-NEXT: vmov.u16 r0, d0[2]
307 ; CHECK-NEXT: vmov.16 d1[3], r0
308 ; CHECK-NEXT: vorr d0, d1, d1
310 %tmp3 = extractelement <4 x i16> %tmp1, i32 2
311 %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 3
315 define <2 x i32> @ins2s2(<2 x i32> %tmp1, <2 x i32> %tmp2) {
316 ; CHECK-LABEL: ins2s2:
318 ; CHECK-NEXT: vmov.32 r0, d0[0]
319 ; CHECK-NEXT: vmov.32 d1[1], r0
320 ; CHECK-NEXT: vorr d0, d1, d1
322 %tmp3 = extractelement <2 x i32> %tmp1, i32 0
323 %tmp4 = insertelement <2 x i32> %tmp2, i32 %tmp3, i32 1
327 define <1 x i64> @ins1d1(<1 x i64> %tmp1, <1 x i64> %tmp2) {
328 ; CHECK-LABEL: ins1d1:
330 ; CHECK-NEXT: vmov.32 r0, d0[0]
331 ; CHECK-NEXT: vmov.32 r1, d0[1]
332 ; CHECK-NEXT: vmov.32 d1[0], r0
333 ; CHECK-NEXT: vmov.32 d1[1], r1
334 ; CHECK-NEXT: vorr d0, d1, d1
336 %tmp3 = extractelement <1 x i64> %tmp1, i32 0
337 %tmp4 = insertelement <1 x i64> %tmp2, i64 %tmp3, i32 0
341 define <2 x float> @ins2f2(<2 x float> %tmp1, <2 x float> %tmp2) {
342 ; CHECK-LABEL: ins2f2:
344 ; CHECK-NEXT: vmov.f32 s3, s0
345 ; CHECK-NEXT: vmov.f64 d0, d1
347 %tmp3 = extractelement <2 x float> %tmp1, i32 0
348 %tmp4 = insertelement <2 x float> %tmp2, float %tmp3, i32 1
349 ret <2 x float> %tmp4
352 define <1 x double> @ins1df1(<1 x double> %tmp1, <1 x double> %tmp2) {
353 ; CHECK-LABEL: ins1df1:
356 %tmp3 = extractelement <1 x double> %tmp1, i32 0
357 %tmp4 = insertelement <1 x double> %tmp2, double %tmp3, i32 0
358 ret <1 x double> %tmp4
361 define i32 @umovw16b(<16 x i8> %tmp1) {
362 ; CHECK-LABEL: umovw16b:
364 ; CHECK-NEXT: vmov.u8 r0, d1[0]
366 %tmp3 = extractelement <16 x i8> %tmp1, i32 8
367 %tmp4 = zext i8 %tmp3 to i32
371 define i32 @umovw8h(<8 x i16> %tmp1) {
372 ; CHECK-LABEL: umovw8h:
374 ; CHECK-NEXT: vmov.u16 r0, d0[2]
376 %tmp3 = extractelement <8 x i16> %tmp1, i32 2
377 %tmp4 = zext i16 %tmp3 to i32
381 define i32 @umovw4s(<4 x i32> %tmp1) {
382 ; CHECK-LABEL: umovw4s:
384 ; CHECK-NEXT: vmov.32 r0, d1[0]
386 %tmp3 = extractelement <4 x i32> %tmp1, i32 2
390 define i64 @umovx2d(<2 x i64> %tmp1) {
391 ; CHECK-LABEL: umovx2d:
393 ; CHECK-NEXT: vmov r0, r1, d1
395 %tmp3 = extractelement <2 x i64> %tmp1, i32 1
399 define i32 @umovw8b(<8 x i8> %tmp1) {
400 ; CHECK-LABEL: umovw8b:
402 ; CHECK-NEXT: vmov.u8 r0, d0[7]
404 %tmp3 = extractelement <8 x i8> %tmp1, i32 7
405 %tmp4 = zext i8 %tmp3 to i32
409 define i32 @umovw4h(<4 x i16> %tmp1) {
410 ; CHECK-LABEL: umovw4h:
412 ; CHECK-NEXT: vmov.u16 r0, d0[2]
414 %tmp3 = extractelement <4 x i16> %tmp1, i32 2
415 %tmp4 = zext i16 %tmp3 to i32
419 define i32 @umovw2s(<2 x i32> %tmp1) {
420 ; CHECK-LABEL: umovw2s:
422 ; CHECK-NEXT: vmov.32 r0, d0[1]
424 %tmp3 = extractelement <2 x i32> %tmp1, i32 1
428 define i64 @umovx1d(<1 x i64> %tmp1) {
429 ; CHECK-LABEL: umovx1d:
431 ; CHECK-NEXT: vmov.32 r0, d0[0]
432 ; CHECK-NEXT: vmov.32 r1, d0[1]
434 %tmp3 = extractelement <1 x i64> %tmp1, i32 0
438 define i32 @smovw16b(<16 x i8> %tmp1) {
439 ; CHECK-LABEL: smovw16b:
441 ; CHECK-NEXT: vmov.s8 r0, d1[0]
442 ; CHECK-NEXT: add r0, r0, r0
444 %tmp3 = extractelement <16 x i8> %tmp1, i32 8
445 %tmp4 = sext i8 %tmp3 to i32
446 %tmp5 = add i32 %tmp4, %tmp4
450 define i32 @smovw8h(<8 x i16> %tmp1) {
451 ; CHECK-LABEL: smovw8h:
453 ; CHECK-NEXT: vmov.s16 r0, d0[2]
454 ; CHECK-NEXT: add r0, r0, r0
456 %tmp3 = extractelement <8 x i16> %tmp1, i32 2
457 %tmp4 = sext i16 %tmp3 to i32
458 %tmp5 = add i32 %tmp4, %tmp4
462 define i64 @smovx16b(<16 x i8> %tmp1) {
463 ; CHECK-LABEL: smovx16b:
465 ; CHECK-NEXT: vmov.s8 r0, d1[0]
466 ; CHECK-NEXT: asr r1, r0, #31
468 %tmp3 = extractelement <16 x i8> %tmp1, i32 8
469 %tmp4 = sext i8 %tmp3 to i64
473 define i64 @smovx8h(<8 x i16> %tmp1) {
474 ; CHECK-LABEL: smovx8h:
476 ; CHECK-NEXT: vmov.s16 r0, d0[2]
477 ; CHECK-NEXT: asr r1, r0, #31
479 %tmp3 = extractelement <8 x i16> %tmp1, i32 2
480 %tmp4 = sext i16 %tmp3 to i64
484 define i64 @smovx4s(<4 x i32> %tmp1) {
485 ; CHECK-LABEL: smovx4s:
487 ; CHECK-NEXT: vmov.32 r0, d1[0]
488 ; CHECK-NEXT: asr r1, r0, #31
490 %tmp3 = extractelement <4 x i32> %tmp1, i32 2
491 %tmp4 = sext i32 %tmp3 to i64
495 define i32 @smovw8b(<8 x i8> %tmp1) {
496 ; CHECK-LABEL: smovw8b:
498 ; CHECK-NEXT: vmov.s8 r0, d0[4]
499 ; CHECK-NEXT: add r0, r0, r0
501 %tmp3 = extractelement <8 x i8> %tmp1, i32 4
502 %tmp4 = sext i8 %tmp3 to i32
503 %tmp5 = add i32 %tmp4, %tmp4
507 define i32 @smovw4h(<4 x i16> %tmp1) {
508 ; CHECK-LABEL: smovw4h:
510 ; CHECK-NEXT: vmov.s16 r0, d0[2]
511 ; CHECK-NEXT: add r0, r0, r0
513 %tmp3 = extractelement <4 x i16> %tmp1, i32 2
514 %tmp4 = sext i16 %tmp3 to i32
515 %tmp5 = add i32 %tmp4, %tmp4
519 define i32 @smovx8b(<8 x i8> %tmp1) {
520 ; CHECK-LABEL: smovx8b:
522 ; CHECK-NEXT: vmov.s8 r0, d0[6]
524 %tmp3 = extractelement <8 x i8> %tmp1, i32 6
525 %tmp4 = sext i8 %tmp3 to i32
529 define i32 @smovx4h(<4 x i16> %tmp1) {
530 ; CHECK-LABEL: smovx4h:
532 ; CHECK-NEXT: vmov.s16 r0, d0[2]
534 %tmp3 = extractelement <4 x i16> %tmp1, i32 2
535 %tmp4 = sext i16 %tmp3 to i32
539 define i64 @smovx2s(<2 x i32> %tmp1) {
540 ; CHECK-LABEL: smovx2s:
542 ; CHECK-NEXT: vmov.32 r0, d0[1]
543 ; CHECK-NEXT: asr r1, r0, #31
545 %tmp3 = extractelement <2 x i32> %tmp1, i32 1
546 %tmp4 = sext i32 %tmp3 to i64
550 define <8 x i8> @test_vcopy_lane_s8(<8 x i8> %v1, <8 x i8> %v2) {
551 ; CHECK-LABEL: test_vcopy_lane_s8:
553 ; CHECK-NEXT: @ kill: def $d1 killed $d1 killed $q0 def $q0
554 ; CHECK-NEXT: vldr d16, .LCPI50_0
555 ; CHECK-NEXT: @ kill: def $d0 killed $d0 killed $q0 def $q0
556 ; CHECK-NEXT: vtbl.8 d0, {d0, d1}, d16
558 ; CHECK-NEXT: .p2align 3
559 ; CHECK-NEXT: @ %bb.1:
560 ; CHECK-NEXT: .LCPI50_0:
561 ; CHECK-NEXT: .byte 0 @ 0x0
562 ; CHECK-NEXT: .byte 1 @ 0x1
563 ; CHECK-NEXT: .byte 2 @ 0x2
564 ; CHECK-NEXT: .byte 3 @ 0x3
565 ; CHECK-NEXT: .byte 4 @ 0x4
566 ; CHECK-NEXT: .byte 11 @ 0xb
567 ; CHECK-NEXT: .byte 6 @ 0x6
568 ; CHECK-NEXT: .byte 7 @ 0x7
569 %vset_lane = shufflevector <8 x i8> %v1, <8 x i8> %v2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 11, i32 6, i32 7>
570 ret <8 x i8> %vset_lane
573 define <16 x i8> @test_vcopyq_laneq_s8(<16 x i8> %v1, <16 x i8> %v2) {
574 ; CHECK-LABEL: test_vcopyq_laneq_s8:
576 ; CHECK-NEXT: @ kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
577 ; CHECK-NEXT: vldr d16, .LCPI51_0
578 ; CHECK-NEXT: @ kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
579 ; CHECK-NEXT: vtbl.8 d1, {d1, d2}, d16
580 ; CHECK-NEXT: @ kill: def $q0 killed $q0 killed $q0_q1
582 ; CHECK-NEXT: .p2align 3
583 ; CHECK-NEXT: @ %bb.1:
584 ; CHECK-NEXT: .LCPI51_0:
585 ; CHECK-NEXT: .byte 0 @ 0x0
586 ; CHECK-NEXT: .byte 1 @ 0x1
587 ; CHECK-NEXT: .byte 2 @ 0x2
588 ; CHECK-NEXT: .byte 3 @ 0x3
589 ; CHECK-NEXT: .byte 4 @ 0x4
590 ; CHECK-NEXT: .byte 5 @ 0x5
591 ; CHECK-NEXT: .byte 14 @ 0xe
592 ; CHECK-NEXT: .byte 7 @ 0x7
593 %vset_lane = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 22, i32 15>
594 ret <16 x i8> %vset_lane
597 define <8 x i8> @test_vcopy_lane_swap_s8(<8 x i8> %v1, <8 x i8> %v2) {
598 ; CHECK-LABEL: test_vcopy_lane_swap_s8:
600 ; CHECK-NEXT: @ kill: def $d1 killed $d1 killed $q0 def $q0
601 ; CHECK-NEXT: vldr d16, .LCPI52_0
602 ; CHECK-NEXT: @ kill: def $d0 killed $d0 killed $q0 def $q0
603 ; CHECK-NEXT: vtbl.8 d0, {d0, d1}, d16
605 ; CHECK-NEXT: .p2align 3
606 ; CHECK-NEXT: @ %bb.1:
607 ; CHECK-NEXT: .LCPI52_0:
608 ; CHECK-NEXT: .byte 8 @ 0x8
609 ; CHECK-NEXT: .byte 9 @ 0x9
610 ; CHECK-NEXT: .byte 10 @ 0xa
611 ; CHECK-NEXT: .byte 11 @ 0xb
612 ; CHECK-NEXT: .byte 12 @ 0xc
613 ; CHECK-NEXT: .byte 13 @ 0xd
614 ; CHECK-NEXT: .byte 14 @ 0xe
615 ; CHECK-NEXT: .byte 0 @ 0x0
616 %vset_lane = shufflevector <8 x i8> %v1, <8 x i8> %v2, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 0>
617 ret <8 x i8> %vset_lane
620 define <16 x i8> @test_vcopyq_laneq_swap_s8(<16 x i8> %v1, <16 x i8> %v2) {
621 ; CHECK-LABEL: test_vcopyq_laneq_swap_s8:
623 ; CHECK-NEXT: vorr q9, q1, q1
624 ; CHECK-NEXT: vldr d20, .LCPI53_0
625 ; CHECK-NEXT: vorr q8, q0, q0
626 ; CHECK-NEXT: vtbl.8 d18, {d17, d18}, d20
627 ; CHECK-NEXT: vorr q0, q9, q9
629 ; CHECK-NEXT: .p2align 3
630 ; CHECK-NEXT: @ %bb.1:
631 ; CHECK-NEXT: .LCPI53_0:
632 ; CHECK-NEXT: .byte 7 @ 0x7
633 ; CHECK-NEXT: .byte 9 @ 0x9
634 ; CHECK-NEXT: .byte 10 @ 0xa
635 ; CHECK-NEXT: .byte 11 @ 0xb
636 ; CHECK-NEXT: .byte 12 @ 0xc
637 ; CHECK-NEXT: .byte 13 @ 0xd
638 ; CHECK-NEXT: .byte 14 @ 0xe
639 ; CHECK-NEXT: .byte 15 @ 0xf
640 %vset_lane = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> <i32 15, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
641 ret <16 x i8> %vset_lane
644 define <8 x i8> @test_vdup_n_u8(i8 %v1) #0 {
645 ; CHECK-LABEL: test_vdup_n_u8:
647 ; CHECK-NEXT: vdup.8 d0, r0
649 %vecinit.i = insertelement <8 x i8> undef, i8 %v1, i32 0
650 %vecinit1.i = insertelement <8 x i8> %vecinit.i, i8 %v1, i32 1
651 %vecinit2.i = insertelement <8 x i8> %vecinit1.i, i8 %v1, i32 2
652 %vecinit3.i = insertelement <8 x i8> %vecinit2.i, i8 %v1, i32 3
653 %vecinit4.i = insertelement <8 x i8> %vecinit3.i, i8 %v1, i32 4
654 %vecinit5.i = insertelement <8 x i8> %vecinit4.i, i8 %v1, i32 5
655 %vecinit6.i = insertelement <8 x i8> %vecinit5.i, i8 %v1, i32 6
656 %vecinit7.i = insertelement <8 x i8> %vecinit6.i, i8 %v1, i32 7
657 ret <8 x i8> %vecinit7.i
660 define <4 x i16> @test_vdup_n_u16(i16 %v1) #0 {
661 ; CHECK-LABEL: test_vdup_n_u16:
663 ; CHECK-NEXT: vdup.16 d0, r0
665 %vecinit.i = insertelement <4 x i16> undef, i16 %v1, i32 0
666 %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %v1, i32 1
667 %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %v1, i32 2
668 %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %v1, i32 3
669 ret <4 x i16> %vecinit3.i
672 define <2 x i32> @test_vdup_n_u32(i32 %v1) #0 {
673 ; CHECK-LABEL: test_vdup_n_u32:
675 ; CHECK-NEXT: vdup.32 d0, r0
677 %vecinit.i = insertelement <2 x i32> undef, i32 %v1, i32 0
678 %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %v1, i32 1
679 ret <2 x i32> %vecinit1.i
682 define <1 x i64> @test_vdup_n_u64(i64 %v1) #0 {
683 ; CHECK-LABEL: test_vdup_n_u64:
685 ; CHECK-NEXT: vmov.32 d0[0], r0
686 ; CHECK-NEXT: vmov.32 d0[1], r1
688 %vecinit.i = insertelement <1 x i64> undef, i64 %v1, i32 0
689 ret <1 x i64> %vecinit.i
692 define <16 x i8> @test_vdupq_n_u8(i8 %v1) #0 {
693 ; CHECK-LABEL: test_vdupq_n_u8:
695 ; CHECK-NEXT: vdup.8 q0, r0
697 %vecinit.i = insertelement <16 x i8> undef, i8 %v1, i32 0
698 %vecinit1.i = insertelement <16 x i8> %vecinit.i, i8 %v1, i32 1
699 %vecinit2.i = insertelement <16 x i8> %vecinit1.i, i8 %v1, i32 2
700 %vecinit3.i = insertelement <16 x i8> %vecinit2.i, i8 %v1, i32 3
701 %vecinit4.i = insertelement <16 x i8> %vecinit3.i, i8 %v1, i32 4
702 %vecinit5.i = insertelement <16 x i8> %vecinit4.i, i8 %v1, i32 5
703 %vecinit6.i = insertelement <16 x i8> %vecinit5.i, i8 %v1, i32 6
704 %vecinit7.i = insertelement <16 x i8> %vecinit6.i, i8 %v1, i32 7
705 %vecinit8.i = insertelement <16 x i8> %vecinit7.i, i8 %v1, i32 8
706 %vecinit9.i = insertelement <16 x i8> %vecinit8.i, i8 %v1, i32 9
707 %vecinit10.i = insertelement <16 x i8> %vecinit9.i, i8 %v1, i32 10
708 %vecinit11.i = insertelement <16 x i8> %vecinit10.i, i8 %v1, i32 11
709 %vecinit12.i = insertelement <16 x i8> %vecinit11.i, i8 %v1, i32 12
710 %vecinit13.i = insertelement <16 x i8> %vecinit12.i, i8 %v1, i32 13
711 %vecinit14.i = insertelement <16 x i8> %vecinit13.i, i8 %v1, i32 14
712 %vecinit15.i = insertelement <16 x i8> %vecinit14.i, i8 %v1, i32 15
713 ret <16 x i8> %vecinit15.i
716 define <8 x i16> @test_vdupq_n_u16(i16 %v1) #0 {
717 ; CHECK-LABEL: test_vdupq_n_u16:
719 ; CHECK-NEXT: vdup.16 q0, r0
721 %vecinit.i = insertelement <8 x i16> undef, i16 %v1, i32 0
722 %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %v1, i32 1
723 %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %v1, i32 2
724 %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %v1, i32 3
725 %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %v1, i32 4
726 %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %v1, i32 5
727 %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %v1, i32 6
728 %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %v1, i32 7
729 ret <8 x i16> %vecinit7.i
732 define <4 x i32> @test_vdupq_n_u32(i32 %v1) #0 {
733 ; CHECK-LABEL: test_vdupq_n_u32:
735 ; CHECK-NEXT: vdup.32 q0, r0
737 %vecinit.i = insertelement <4 x i32> undef, i32 %v1, i32 0
738 %vecinit1.i = insertelement <4 x i32> %vecinit.i, i32 %v1, i32 1
739 %vecinit2.i = insertelement <4 x i32> %vecinit1.i, i32 %v1, i32 2
740 %vecinit3.i = insertelement <4 x i32> %vecinit2.i, i32 %v1, i32 3
741 ret <4 x i32> %vecinit3.i
744 define <2 x i64> @test_vdupq_n_u64(i64 %v1) #0 {
745 ; CHECK-LABEL: test_vdupq_n_u64:
747 ; CHECK-NEXT: vmov.32 d0[0], r0
748 ; CHECK-NEXT: vmov.32 d0[1], r1
749 ; CHECK-NEXT: vorr d1, d0, d0
751 %vecinit.i = insertelement <2 x i64> undef, i64 %v1, i32 0
752 %vecinit1.i = insertelement <2 x i64> %vecinit.i, i64 %v1, i32 1
753 ret <2 x i64> %vecinit1.i
756 define <8 x i8> @test_vdup_lane_s8(<8 x i8> %v1) #0 {
757 ; CHECK-LABEL: test_vdup_lane_s8:
759 ; CHECK-NEXT: vdup.8 d0, d0[5]
761 %shuffle = shufflevector <8 x i8> %v1, <8 x i8> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
762 ret <8 x i8> %shuffle
765 define <4 x i16> @test_vdup_lane_s16(<4 x i16> %v1) #0 {
766 ; CHECK-LABEL: test_vdup_lane_s16:
768 ; CHECK-NEXT: vdup.16 d0, d0[2]
770 %shuffle = shufflevector <4 x i16> %v1, <4 x i16> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
771 ret <4 x i16> %shuffle
774 define <2 x i32> @test_vdup_lane_s32(<2 x i32> %v1) #0 {
775 ; CHECK-LABEL: test_vdup_lane_s32:
777 ; CHECK-NEXT: vdup.32 d0, d0[1]
779 %shuffle = shufflevector <2 x i32> %v1, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
780 ret <2 x i32> %shuffle
783 define <16 x i8> @test_vdupq_lane_s8(<8 x i8> %v1) #0 {
784 ; CHECK-LABEL: test_vdupq_lane_s8:
786 ; CHECK-NEXT: @ kill: def $d0 killed $d0 def $q0
787 ; CHECK-NEXT: vdup.8 q0, d0[5]
789 %shuffle = shufflevector <8 x i8> %v1, <8 x i8> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
790 ret <16 x i8> %shuffle
793 define <8 x i16> @test_vdupq_lane_s16(<4 x i16> %v1) #0 {
794 ; CHECK-LABEL: test_vdupq_lane_s16:
796 ; CHECK-NEXT: @ kill: def $d0 killed $d0 def $q0
797 ; CHECK-NEXT: vdup.16 q0, d0[2]
799 %shuffle = shufflevector <4 x i16> %v1, <4 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
800 ret <8 x i16> %shuffle
803 define <4 x i32> @test_vdupq_lane_s32(<2 x i32> %v1) #0 {
804 ; CHECK-LABEL: test_vdupq_lane_s32:
806 ; CHECK-NEXT: @ kill: def $d0 killed $d0 def $q0
807 ; CHECK-NEXT: vdup.32 q0, d0[1]
809 %shuffle = shufflevector <2 x i32> %v1, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
810 ret <4 x i32> %shuffle
813 define <2 x i64> @test_vdupq_lane_s64(<1 x i64> %v1) #0 {
814 ; CHECK-LABEL: test_vdupq_lane_s64:
816 ; CHECK-NEXT: @ kill: def $d0 killed $d0 def $q0
817 ; CHECK-NEXT: vmov.f64 d1, d0
819 %shuffle = shufflevector <1 x i64> %v1, <1 x i64> undef, <2 x i32> zeroinitializer
820 ret <2 x i64> %shuffle
823 define <8 x i8> @test_vdup_laneq_s8(<16 x i8> %v1) #0 {
824 ; CHECK-LABEL: test_vdup_laneq_s8:
826 ; CHECK-NEXT: vdup.8 d0, d0[5]
828 %shuffle = shufflevector <16 x i8> %v1, <16 x i8> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
829 ret <8 x i8> %shuffle
832 define <4 x i16> @test_vdup_laneq_s16(<8 x i16> %v1) #0 {
833 ; CHECK-LABEL: test_vdup_laneq_s16:
835 ; CHECK-NEXT: vdup.16 d0, d0[2]
837 %shuffle = shufflevector <8 x i16> %v1, <8 x i16> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
838 ret <4 x i16> %shuffle
841 define <2 x i32> @test_vdup_laneq_s32(<4 x i32> %v1) #0 {
842 ; CHECK-LABEL: test_vdup_laneq_s32:
844 ; CHECK-NEXT: vdup.32 d0, d0[1]
846 %shuffle = shufflevector <4 x i32> %v1, <4 x i32> undef, <2 x i32> <i32 1, i32 1>
847 ret <2 x i32> %shuffle
850 define <16 x i8> @test_vdupq_laneq_s8(<16 x i8> %v1) #0 {
851 ; CHECK-LABEL: test_vdupq_laneq_s8:
853 ; CHECK-NEXT: vdup.8 q0, d0[5]
855 %shuffle = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
856 ret <16 x i8> %shuffle
859 define <8 x i16> @test_vdupq_laneq_s16(<8 x i16> %v1) #0 {
860 ; CHECK-LABEL: test_vdupq_laneq_s16:
862 ; CHECK-NEXT: vdup.16 q0, d0[2]
864 %shuffle = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
865 ret <8 x i16> %shuffle
868 define <4 x i32> @test_vdupq_laneq_s32(<4 x i32> %v1) #0 {
869 ; CHECK-LABEL: test_vdupq_laneq_s32:
871 ; CHECK-NEXT: vdup.32 q0, d0[1]
873 %shuffle = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
874 ret <4 x i32> %shuffle
877 define <2 x i64> @test_vdupq_laneq_s64(<2 x i64> %v1) #0 {
878 ; CHECK-LABEL: test_vdupq_laneq_s64:
880 ; CHECK-NEXT: vmov.f64 d1, d0
882 %shuffle = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
883 ret <2 x i64> %shuffle
886 define i64 @test_bitcastv8i8toi64(<8 x i8> %in) {
887 ; CHECK-LABEL: test_bitcastv8i8toi64:
889 ; CHECK-NEXT: vmov r0, r1, d0
891 %res = bitcast <8 x i8> %in to i64
895 define i64 @test_bitcastv4i16toi64(<4 x i16> %in) {
896 ; CHECK-LABEL: test_bitcastv4i16toi64:
898 ; CHECK-NEXT: vmov r0, r1, d0
900 %res = bitcast <4 x i16> %in to i64
904 define i64 @test_bitcastv2i32toi64(<2 x i32> %in) {
905 ; CHECK-LABEL: test_bitcastv2i32toi64:
907 ; CHECK-NEXT: vmov r0, r1, d0
909 %res = bitcast <2 x i32> %in to i64
913 define i64 @test_bitcastv2f32toi64(<2 x float> %in) {
914 ; CHECK-LABEL: test_bitcastv2f32toi64:
916 ; CHECK-NEXT: vmov r0, r1, d0
918 %res = bitcast <2 x float> %in to i64
922 define i64 @test_bitcastv1i64toi64(<1 x i64> %in) {
923 ; CHECK-LABEL: test_bitcastv1i64toi64:
925 ; CHECK-NEXT: vmov r0, r1, d0
927 %res = bitcast <1 x i64> %in to i64
931 define i64 @test_bitcastv1f64toi64(<1 x double> %in) {
932 ; CHECK-LABEL: test_bitcastv1f64toi64:
934 ; CHECK-NEXT: vmov r0, r1, d0
936 %res = bitcast <1 x double> %in to i64
940 define <8 x i8> @test_bitcasti64tov8i8(i64 %in) {
941 ; CHECK-LABEL: test_bitcasti64tov8i8:
943 ; CHECK-NEXT: vmov d0, r0, r1
945 %res = bitcast i64 %in to <8 x i8>
949 define <4 x i16> @test_bitcasti64tov4i16(i64 %in) {
950 ; CHECK-LABEL: test_bitcasti64tov4i16:
952 ; CHECK-NEXT: vmov d0, r0, r1
954 %res = bitcast i64 %in to <4 x i16>
958 define <2 x i32> @test_bitcasti64tov2i32(i64 %in) {
959 ; CHECK-LABEL: test_bitcasti64tov2i32:
961 ; CHECK-NEXT: vmov d0, r0, r1
963 %res = bitcast i64 %in to <2 x i32>
967 define <2 x float> @test_bitcasti64tov2f32(i64 %in) {
968 ; CHECK-LABEL: test_bitcasti64tov2f32:
970 ; CHECK-NEXT: vmov d0, r0, r1
972 %res = bitcast i64 %in to <2 x float>
976 define <1 x i64> @test_bitcasti64tov1i64(i64 %in) {
977 ; CHECK-LABEL: test_bitcasti64tov1i64:
979 ; CHECK-NEXT: vmov d0, r0, r1
981 %res = bitcast i64 %in to <1 x i64>
985 define <1 x double> @test_bitcasti64tov1f64(i64 %in) {
986 ; CHECK-LABEL: test_bitcasti64tov1f64:
988 ; CHECK-NEXT: vmov d0, r0, r1
990 %res = bitcast i64 %in to <1 x double>
991 ret <1 x double> %res
994 define <1 x i64> @test_bitcastv8i8tov1f64(<8 x i8> %a) #0 {
995 ; CHECK-LABEL: test_bitcastv8i8tov1f64:
997 ; CHECK-NEXT: .save {r11, lr}
998 ; CHECK-NEXT: push {r11, lr}
999 ; CHECK-NEXT: vneg.s8 d16, d0
1000 ; CHECK-NEXT: vmov r0, r1, d16
1001 ; CHECK-NEXT: bl __aeabi_d2lz
1002 ; CHECK-NEXT: vmov.32 d0[0], r0
1003 ; CHECK-NEXT: vmov.32 d0[1], r1
1004 ; CHECK-NEXT: pop {r11, pc}
1005 %sub.i = sub <8 x i8> zeroinitializer, %a
1006 %1 = bitcast <8 x i8> %sub.i to <1 x double>
1007 %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
1008 ret <1 x i64> %vcvt.i
1011 define <1 x i64> @test_bitcastv4i16tov1f64(<4 x i16> %a) #0 {
1012 ; CHECK-LABEL: test_bitcastv4i16tov1f64:
1014 ; CHECK-NEXT: .save {r11, lr}
1015 ; CHECK-NEXT: push {r11, lr}
1016 ; CHECK-NEXT: vneg.s16 d16, d0
1017 ; CHECK-NEXT: vmov r0, r1, d16
1018 ; CHECK-NEXT: bl __aeabi_d2lz
1019 ; CHECK-NEXT: vmov.32 d0[0], r0
1020 ; CHECK-NEXT: vmov.32 d0[1], r1
1021 ; CHECK-NEXT: pop {r11, pc}
1022 %sub.i = sub <4 x i16> zeroinitializer, %a
1023 %1 = bitcast <4 x i16> %sub.i to <1 x double>
1024 %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
1025 ret <1 x i64> %vcvt.i
1028 define <1 x i64> @test_bitcastv2i32tov1f64(<2 x i32> %a) #0 {
1029 ; CHECK-LABEL: test_bitcastv2i32tov1f64:
1031 ; CHECK-NEXT: .save {r11, lr}
1032 ; CHECK-NEXT: push {r11, lr}
1033 ; CHECK-NEXT: vneg.s32 d16, d0
1034 ; CHECK-NEXT: vmov r0, r1, d16
1035 ; CHECK-NEXT: bl __aeabi_d2lz
1036 ; CHECK-NEXT: vmov.32 d0[0], r0
1037 ; CHECK-NEXT: vmov.32 d0[1], r1
1038 ; CHECK-NEXT: pop {r11, pc}
1039 %sub.i = sub <2 x i32> zeroinitializer, %a
1040 %1 = bitcast <2 x i32> %sub.i to <1 x double>
1041 %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
1042 ret <1 x i64> %vcvt.i
1045 define <1 x i64> @test_bitcastv1i64tov1f64(<1 x i64> %a) #0 {
1046 ; CHECK-LABEL: test_bitcastv1i64tov1f64:
1048 ; CHECK-NEXT: .save {r11, lr}
1049 ; CHECK-NEXT: push {r11, lr}
1050 ; CHECK-NEXT: vmov.i32 d16, #0x0
1051 ; CHECK-NEXT: vsub.i64 d16, d16, d0
1052 ; CHECK-NEXT: vmov r0, r1, d16
1053 ; CHECK-NEXT: bl __aeabi_d2lz
1054 ; CHECK-NEXT: vmov.32 d0[0], r0
1055 ; CHECK-NEXT: vmov.32 d0[1], r1
1056 ; CHECK-NEXT: pop {r11, pc}
1057 %sub.i = sub <1 x i64> zeroinitializer, %a
1058 %1 = bitcast <1 x i64> %sub.i to <1 x double>
1059 %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
1060 ret <1 x i64> %vcvt.i
1063 define <1 x i64> @test_bitcastv2f32tov1f64(<2 x float> %a) #0 {
1064 ; CHECK-LABEL: test_bitcastv2f32tov1f64:
1066 ; CHECK-NEXT: .save {r11, lr}
1067 ; CHECK-NEXT: push {r11, lr}
1068 ; CHECK-NEXT: vneg.f32 d16, d0
1069 ; CHECK-NEXT: vmov r0, r1, d16
1070 ; CHECK-NEXT: bl __aeabi_d2lz
1071 ; CHECK-NEXT: vmov.32 d0[0], r0
1072 ; CHECK-NEXT: vmov.32 d0[1], r1
1073 ; CHECK-NEXT: pop {r11, pc}
1074 %sub.i = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %a
1075 %1 = bitcast <2 x float> %sub.i to <1 x double>
1076 %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
1077 ret <1 x i64> %vcvt.i
1080 define <8 x i8> @test_bitcastv1f64tov8i8(<1 x i64> %a) #0 {
1081 ; CHECK-LABEL: test_bitcastv1f64tov8i8:
1083 ; CHECK-NEXT: .save {r11, lr}
1084 ; CHECK-NEXT: push {r11, lr}
1085 ; CHECK-NEXT: vmov.32 r0, d0[0]
1086 ; CHECK-NEXT: vmov.32 r1, d0[1]
1087 ; CHECK-NEXT: bl __aeabi_l2d
1088 ; CHECK-NEXT: vmov d16, r0, r1
1089 ; CHECK-NEXT: vneg.s8 d0, d16
1090 ; CHECK-NEXT: pop {r11, pc}
1091 %vcvt.i = sitofp <1 x i64> %a to <1 x double>
1092 %1 = bitcast <1 x double> %vcvt.i to <8 x i8>
1093 %sub.i = sub <8 x i8> zeroinitializer, %1
1097 define <4 x i16> @test_bitcastv1f64tov4i16(<1 x i64> %a) #0 {
1098 ; CHECK-LABEL: test_bitcastv1f64tov4i16:
1100 ; CHECK-NEXT: .save {r11, lr}
1101 ; CHECK-NEXT: push {r11, lr}
1102 ; CHECK-NEXT: vmov.32 r0, d0[0]
1103 ; CHECK-NEXT: vmov.32 r1, d0[1]
1104 ; CHECK-NEXT: bl __aeabi_l2d
1105 ; CHECK-NEXT: vmov d16, r0, r1
1106 ; CHECK-NEXT: vneg.s16 d0, d16
1107 ; CHECK-NEXT: pop {r11, pc}
1108 %vcvt.i = sitofp <1 x i64> %a to <1 x double>
1109 %1 = bitcast <1 x double> %vcvt.i to <4 x i16>
1110 %sub.i = sub <4 x i16> zeroinitializer, %1
1111 ret <4 x i16> %sub.i
1114 define <2 x i32> @test_bitcastv1f64tov2i32(<1 x i64> %a) #0 {
1115 ; CHECK-LABEL: test_bitcastv1f64tov2i32:
1117 ; CHECK-NEXT: .save {r11, lr}
1118 ; CHECK-NEXT: push {r11, lr}
1119 ; CHECK-NEXT: vmov.32 r0, d0[0]
1120 ; CHECK-NEXT: vmov.32 r1, d0[1]
1121 ; CHECK-NEXT: bl __aeabi_l2d
1122 ; CHECK-NEXT: vmov d16, r0, r1
1123 ; CHECK-NEXT: vneg.s32 d0, d16
1124 ; CHECK-NEXT: pop {r11, pc}
1125 %vcvt.i = sitofp <1 x i64> %a to <1 x double>
1126 %1 = bitcast <1 x double> %vcvt.i to <2 x i32>
1127 %sub.i = sub <2 x i32> zeroinitializer, %1
1128 ret <2 x i32> %sub.i
1131 define <1 x i64> @test_bitcastv1f64tov1i64(<1 x i64> %a) #0 {
1132 ; CHECK-LABEL: test_bitcastv1f64tov1i64:
1134 ; CHECK-NEXT: .save {r11, lr}
1135 ; CHECK-NEXT: push {r11, lr}
1136 ; CHECK-NEXT: vmov.32 r0, d0[0]
1137 ; CHECK-NEXT: vmov.32 r1, d0[1]
1138 ; CHECK-NEXT: bl __aeabi_l2d
1139 ; CHECK-NEXT: vmov.i32 d16, #0x0
1140 ; CHECK-NEXT: vmov d17, r0, r1
1141 ; CHECK-NEXT: vsub.i64 d0, d16, d17
1142 ; CHECK-NEXT: pop {r11, pc}
1143 %vcvt.i = sitofp <1 x i64> %a to <1 x double>
1144 %1 = bitcast <1 x double> %vcvt.i to <1 x i64>
1145 %sub.i = sub <1 x i64> zeroinitializer, %1
1146 ret <1 x i64> %sub.i
1149 define <2 x float> @test_bitcastv1f64tov2f32(<1 x i64> %a) #0 {
1150 ; CHECK-LABEL: test_bitcastv1f64tov2f32:
1152 ; CHECK-NEXT: .save {r11, lr}
1153 ; CHECK-NEXT: push {r11, lr}
1154 ; CHECK-NEXT: vmov.32 r0, d0[0]
1155 ; CHECK-NEXT: vmov.32 r1, d0[1]
1156 ; CHECK-NEXT: bl __aeabi_l2d
1157 ; CHECK-NEXT: vmov d16, r0, r1
1158 ; CHECK-NEXT: vneg.f32 d0, d16
1159 ; CHECK-NEXT: pop {r11, pc}
1160 %vcvt.i = sitofp <1 x i64> %a to <1 x double>
1161 %1 = bitcast <1 x double> %vcvt.i to <2 x float>
1162 %sub.i = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %1
1163 ret <2 x float> %sub.i
1166 ; Test insert element into an undef vector
1167 define <8 x i8> @scalar_to_vector_v8i8(i8 %a) {
1168 ; CHECK-LABEL: scalar_to_vector_v8i8:
1170 ; CHECK-NEXT: vmov.8 d0[0], r0
1172 %b = insertelement <8 x i8> undef, i8 %a, i32 0
1176 define <16 x i8> @scalar_to_vector_v16i8(i8 %a) {
1177 ; CHECK-LABEL: scalar_to_vector_v16i8:
1179 ; CHECK-NEXT: vmov.8 d0[0], r0
1181 %b = insertelement <16 x i8> undef, i8 %a, i32 0
1185 define <4 x i16> @scalar_to_vector_v4i16(i16 %a) {
1186 ; CHECK-LABEL: scalar_to_vector_v4i16:
1188 ; CHECK-NEXT: vmov.16 d0[0], r0
1190 %b = insertelement <4 x i16> undef, i16 %a, i32 0
1194 define <8 x i16> @scalar_to_vector_v8i16(i16 %a) {
1195 ; CHECK-LABEL: scalar_to_vector_v8i16:
1197 ; CHECK-NEXT: vmov.16 d0[0], r0
1199 %b = insertelement <8 x i16> undef, i16 %a, i32 0
1203 define <2 x i32> @scalar_to_vector_v2i32(i32 %a) {
1204 ; CHECK-LABEL: scalar_to_vector_v2i32:
1206 ; CHECK-NEXT: vmov.32 d0[0], r0
1208 %b = insertelement <2 x i32> undef, i32 %a, i32 0
1212 define <4 x i32> @scalar_to_vector_v4i32(i32 %a) {
1213 ; CHECK-LABEL: scalar_to_vector_v4i32:
1215 ; CHECK-NEXT: vmov.32 d0[0], r0
1217 %b = insertelement <4 x i32> undef, i32 %a, i32 0
1221 define <2 x i64> @scalar_to_vector_v2i64(i64 %a) {
1222 ; CHECK-LABEL: scalar_to_vector_v2i64:
1224 ; CHECK-NEXT: vmov.32 d0[0], r0
1225 ; CHECK-NEXT: vmov.32 d0[1], r1
1227 %b = insertelement <2 x i64> undef, i64 %a, i32 0
1231 define <8 x i8> @testDUPv1i8(<1 x i8> %a) {
1232 ; CHECK-LABEL: testDUPv1i8:
1234 ; CHECK-NEXT: vdup.8 d0, r0
1236 %b = extractelement <1 x i8> %a, i32 0
1237 %c = insertelement <8 x i8> undef, i8 %b, i32 0
1238 %d = insertelement <8 x i8> %c, i8 %b, i32 1
1239 %e = insertelement <8 x i8> %d, i8 %b, i32 2
1240 %f = insertelement <8 x i8> %e, i8 %b, i32 3
1241 %g = insertelement <8 x i8> %f, i8 %b, i32 4
1242 %h = insertelement <8 x i8> %g, i8 %b, i32 5
1243 %i = insertelement <8 x i8> %h, i8 %b, i32 6
1244 %j = insertelement <8 x i8> %i, i8 %b, i32 7
1248 define <8 x i16> @testDUPv1i16(<1 x i16> %a) {
1249 ; CHECK-LABEL: testDUPv1i16:
1251 ; CHECK-NEXT: vdup.16 q0, r0
1253 %b = extractelement <1 x i16> %a, i32 0
1254 %c = insertelement <8 x i16> undef, i16 %b, i32 0
1255 %d = insertelement <8 x i16> %c, i16 %b, i32 1
1256 %e = insertelement <8 x i16> %d, i16 %b, i32 2
1257 %f = insertelement <8 x i16> %e, i16 %b, i32 3
1258 %g = insertelement <8 x i16> %f, i16 %b, i32 4
1259 %h = insertelement <8 x i16> %g, i16 %b, i32 5
1260 %i = insertelement <8 x i16> %h, i16 %b, i32 6
1261 %j = insertelement <8 x i16> %i, i16 %b, i32 7
1265 define <4 x i32> @testDUPv1i32(<1 x i32> %a) {
1266 ; CHECK-LABEL: testDUPv1i32:
1268 ; CHECK-NEXT: vdup.32 q0, r0
1270 %b = extractelement <1 x i32> %a, i32 0
1271 %c = insertelement <4 x i32> undef, i32 %b, i32 0
1272 %d = insertelement <4 x i32> %c, i32 %b, i32 1
1273 %e = insertelement <4 x i32> %d, i32 %b, i32 2
1274 %f = insertelement <4 x i32> %e, i32 %b, i32 3
1278 define <8 x i8> @getl(<16 x i8> %x) #0 {
1279 ; CHECK-LABEL: getl:
1281 ; CHECK-NEXT: @ kill: def $d0 killed $d0 killed $q0
1283 %vecext = extractelement <16 x i8> %x, i32 0
1284 %vecinit = insertelement <8 x i8> undef, i8 %vecext, i32 0
1285 %vecext1 = extractelement <16 x i8> %x, i32 1
1286 %vecinit2 = insertelement <8 x i8> %vecinit, i8 %vecext1, i32 1
1287 %vecext3 = extractelement <16 x i8> %x, i32 2
1288 %vecinit4 = insertelement <8 x i8> %vecinit2, i8 %vecext3, i32 2
1289 %vecext5 = extractelement <16 x i8> %x, i32 3
1290 %vecinit6 = insertelement <8 x i8> %vecinit4, i8 %vecext5, i32 3
1291 %vecext7 = extractelement <16 x i8> %x, i32 4
1292 %vecinit8 = insertelement <8 x i8> %vecinit6, i8 %vecext7, i32 4
1293 %vecext9 = extractelement <16 x i8> %x, i32 5
1294 %vecinit10 = insertelement <8 x i8> %vecinit8, i8 %vecext9, i32 5
1295 %vecext11 = extractelement <16 x i8> %x, i32 6
1296 %vecinit12 = insertelement <8 x i8> %vecinit10, i8 %vecext11, i32 6
1297 %vecext13 = extractelement <16 x i8> %x, i32 7
1298 %vecinit14 = insertelement <8 x i8> %vecinit12, i8 %vecext13, i32 7
1299 ret <8 x i8> %vecinit14
1302 define <4 x i16> @test_extracts_inserts_varidx_extract(<8 x i16> %x, i32 %idx) {
1303 ; CHECK-LABEL: test_extracts_inserts_varidx_extract:
1305 ; CHECK-NEXT: .save {r11}
1306 ; CHECK-NEXT: push {r11}
1307 ; CHECK-NEXT: .setfp r11, sp
1308 ; CHECK-NEXT: mov r11, sp
1309 ; CHECK-NEXT: .pad #28
1310 ; CHECK-NEXT: sub sp, sp, #28
1311 ; CHECK-NEXT: bfc sp, #0, #4
1312 ; CHECK-NEXT: vmov.u16 r1, d0[1]
1313 ; CHECK-NEXT: and r0, r0, #7
1314 ; CHECK-NEXT: vmov.u16 r2, d0[2]
1315 ; CHECK-NEXT: mov r3, sp
1316 ; CHECK-NEXT: vmov.u16 r12, d0[3]
1317 ; CHECK-NEXT: lsl r0, r0, #1
1318 ; CHECK-NEXT: vst1.64 {d0, d1}, [r3:128], r0
1319 ; CHECK-NEXT: vld1.16 {d0[0]}, [r3:16]
1320 ; CHECK-NEXT: vmov.16 d0[1], r1
1321 ; CHECK-NEXT: vmov.16 d0[2], r2
1322 ; CHECK-NEXT: vmov.16 d0[3], r12
1323 ; CHECK-NEXT: mov sp, r11
1324 ; CHECK-NEXT: pop {r11}
1326 %tmp = extractelement <8 x i16> %x, i32 %idx
1327 %tmp2 = insertelement <4 x i16> undef, i16 %tmp, i32 0
1328 %tmp3 = extractelement <8 x i16> %x, i32 1
1329 %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 1
1330 %tmp5 = extractelement <8 x i16> %x, i32 2
1331 %tmp6 = insertelement <4 x i16> %tmp4, i16 %tmp5, i32 2
1332 %tmp7 = extractelement <8 x i16> %x, i32 3
1333 %tmp8 = insertelement <4 x i16> %tmp6, i16 %tmp7, i32 3
1337 define <4 x i16> @test_extracts_inserts_varidx_insert(<8 x i16> %x, i32 %idx) {
1338 ; CHECK-LABEL: test_extracts_inserts_varidx_insert:
1340 ; CHECK-NEXT: .pad #8
1341 ; CHECK-NEXT: sub sp, sp, #8
1342 ; CHECK-NEXT: vmov.u16 r1, d0[1]
1343 ; CHECK-NEXT: and r0, r0, #3
1344 ; CHECK-NEXT: vmov.u16 r2, d0[2]
1345 ; CHECK-NEXT: mov r3, sp
1346 ; CHECK-NEXT: vmov.u16 r12, d0[3]
1347 ; CHECK-NEXT: orr r0, r3, r0, lsl #1
1348 ; CHECK-NEXT: vst1.16 {d0[0]}, [r0:16]
1349 ; CHECK-NEXT: vldr d0, [sp]
1350 ; CHECK-NEXT: vmov.16 d0[1], r1
1351 ; CHECK-NEXT: vmov.16 d0[2], r2
1352 ; CHECK-NEXT: vmov.16 d0[3], r12
1353 ; CHECK-NEXT: add sp, sp, #8
1355 %tmp = extractelement <8 x i16> %x, i32 0
1356 %tmp2 = insertelement <4 x i16> undef, i16 %tmp, i32 %idx
1357 %tmp3 = extractelement <8 x i16> %x, i32 1
1358 %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 1
1359 %tmp5 = extractelement <8 x i16> %x, i32 2
1360 %tmp6 = insertelement <4 x i16> %tmp4, i16 %tmp5, i32 2
1361 %tmp7 = extractelement <8 x i16> %x, i32 3
1362 %tmp8 = insertelement <4 x i16> %tmp6, i16 %tmp7, i32 3
1366 define <4 x i16> @test_dup_v2i32_v4i16(<2 x i32> %a) {
1367 ; CHECK-LABEL: test_dup_v2i32_v4i16:
1368 ; CHECK: @ %bb.0: @ %entry
1369 ; CHECK-NEXT: vmov.32 r0, d0[1]
1370 ; CHECK-NEXT: vmov.16 d16[1], r0
1371 ; CHECK-NEXT: vdup.16 d0, d16[1]
1374 %x = extractelement <2 x i32> %a, i32 1
1375 %vget_lane = trunc i32 %x to i16
1376 %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0
1377 %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1
1378 %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2
1379 %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3
1380 ret <4 x i16> %vecinit3.i
1383 define <8 x i16> @test_dup_v4i32_v8i16(<4 x i32> %a) {
1384 ; CHECK-LABEL: test_dup_v4i32_v8i16:
1385 ; CHECK: @ %bb.0: @ %entry
1386 ; CHECK-NEXT: vmov.32 r0, d1[1]
1387 ; CHECK-NEXT: vmov.16 d16[3], r0
1388 ; CHECK-NEXT: vdup.16 q0, d16[3]
1391 %x = extractelement <4 x i32> %a, i32 3
1392 %vget_lane = trunc i32 %x to i16
1393 %vecinit.i = insertelement <8 x i16> undef, i16 %vget_lane, i32 0
1394 %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %vget_lane, i32 1
1395 %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %vget_lane, i32 2
1396 %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %vget_lane, i32 3
1397 %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %vget_lane, i32 4
1398 %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %vget_lane, i32 5
1399 %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %vget_lane, i32 6
1400 %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %vget_lane, i32 7
1401 ret <8 x i16> %vecinit7.i
1404 define <4 x i16> @test_dup_v1i64_v4i16(<1 x i64> %a) {
1405 ; CHECK-LABEL: test_dup_v1i64_v4i16:
1406 ; CHECK: @ %bb.0: @ %entry
1407 ; CHECK-NEXT: vmov.32 r0, d0[0]
1408 ; CHECK-NEXT: vmov.16 d16[0], r0
1409 ; CHECK-NEXT: vdup.16 d0, d16[0]
1412 %x = extractelement <1 x i64> %a, i32 0
1413 %vget_lane = trunc i64 %x to i16
1414 %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0
1415 %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1
1416 %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2
1417 %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3
1418 ret <4 x i16> %vecinit3.i
1421 define <2 x i32> @test_dup_v1i64_v2i32(<1 x i64> %a) {
1422 ; CHECK-LABEL: test_dup_v1i64_v2i32:
1423 ; CHECK: @ %bb.0: @ %entry
1424 ; CHECK-NEXT: vdup.32 d0, d0[0]
1427 %x = extractelement <1 x i64> %a, i32 0
1428 %vget_lane = trunc i64 %x to i32
1429 %vecinit.i = insertelement <2 x i32> undef, i32 %vget_lane, i32 0
1430 %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %vget_lane, i32 1
1431 ret <2 x i32> %vecinit1.i
1434 define <8 x i16> @test_dup_v2i64_v8i16(<2 x i64> %a) {
1435 ; CHECK-LABEL: test_dup_v2i64_v8i16:
1436 ; CHECK: @ %bb.0: @ %entry
1437 ; CHECK-NEXT: vmov.32 r0, d1[0]
1438 ; CHECK-NEXT: vmov.16 d16[2], r0
1439 ; CHECK-NEXT: vdup.16 q0, d16[2]
1442 %x = extractelement <2 x i64> %a, i32 1
1443 %vget_lane = trunc i64 %x to i16
1444 %vecinit.i = insertelement <8 x i16> undef, i16 %vget_lane, i32 0
1445 %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %vget_lane, i32 1
1446 %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %vget_lane, i32 2
1447 %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %vget_lane, i32 3
1448 %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %vget_lane, i32 4
1449 %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %vget_lane, i32 5
1450 %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %vget_lane, i32 6
1451 %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %vget_lane, i32 7
1452 ret <8 x i16> %vecinit7.i
1455 define <4 x i32> @test_dup_v2i64_v4i32(<2 x i64> %a) {
1456 ; CHECK-LABEL: test_dup_v2i64_v4i32:
1457 ; CHECK: @ %bb.0: @ %entry
1458 ; CHECK-NEXT: vdup.32 q0, d1[0]
1461 %x = extractelement <2 x i64> %a, i32 1
1462 %vget_lane = trunc i64 %x to i32
1463 %vecinit.i = insertelement <4 x i32> undef, i32 %vget_lane, i32 0
1464 %vecinit1.i = insertelement <4 x i32> %vecinit.i, i32 %vget_lane, i32 1
1465 %vecinit2.i = insertelement <4 x i32> %vecinit1.i, i32 %vget_lane, i32 2
1466 %vecinit3.i = insertelement <4 x i32> %vecinit2.i, i32 %vget_lane, i32 3
1467 ret <4 x i32> %vecinit3.i
1470 define <4 x i16> @test_dup_v4i32_v4i16(<4 x i32> %a) {
1471 ; CHECK-LABEL: test_dup_v4i32_v4i16:
1472 ; CHECK: @ %bb.0: @ %entry
1473 ; CHECK-NEXT: vmov.32 r0, d0[1]
1474 ; CHECK-NEXT: vmov.16 d16[1], r0
1475 ; CHECK-NEXT: vdup.16 d0, d16[1]
1478 %x = extractelement <4 x i32> %a, i32 1
1479 %vget_lane = trunc i32 %x to i16
1480 %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0
1481 %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1
1482 %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2
1483 %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3
1484 ret <4 x i16> %vecinit3.i
1487 define <4 x i16> @test_dup_v2i64_v4i16(<2 x i64> %a) {
1488 ; CHECK-LABEL: test_dup_v2i64_v4i16:
1489 ; CHECK: @ %bb.0: @ %entry
1490 ; CHECK-NEXT: vmov.32 r0, d0[0]
1491 ; CHECK-NEXT: vmov.16 d16[0], r0
1492 ; CHECK-NEXT: vdup.16 d0, d16[0]
1495 %x = extractelement <2 x i64> %a, i32 0
1496 %vget_lane = trunc i64 %x to i16
1497 %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0
1498 %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1
1499 %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2
1500 %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3
1501 ret <4 x i16> %vecinit3.i
1504 define <2 x i32> @test_dup_v2i64_v2i32(<2 x i64> %a) {
1505 ; CHECK-LABEL: test_dup_v2i64_v2i32:
1506 ; CHECK: @ %bb.0: @ %entry
1507 ; CHECK-NEXT: vdup.32 d0, d0[0]
1510 %x = extractelement <2 x i64> %a, i32 0
1511 %vget_lane = trunc i64 %x to i32
1512 %vecinit.i = insertelement <2 x i32> undef, i32 %vget_lane, i32 0
1513 %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %vget_lane, i32 1
1514 ret <2 x i32> %vecinit1.i
1517 define <2 x i32> @test_concat_undef_v1i32(<2 x i32> %a) {
1518 ; CHECK-LABEL: test_concat_undef_v1i32:
1519 ; CHECK: @ %bb.0: @ %entry
1520 ; CHECK-NEXT: vdup.32 d0, d0[0]
1523 %0 = extractelement <2 x i32> %a, i32 0
1524 %vecinit1.i = insertelement <2 x i32> undef, i32 %0, i32 1
1525 ret <2 x i32> %vecinit1.i
1528 define <2 x i32> @test_concat_same_v1i32_v1i32(<2 x i32> %a) {
1529 ; CHECK-LABEL: test_concat_same_v1i32_v1i32:
1530 ; CHECK: @ %bb.0: @ %entry
1531 ; CHECK-NEXT: vdup.32 d0, d0[0]
1534 %0 = extractelement <2 x i32> %a, i32 0
1535 %vecinit.i = insertelement <2 x i32> undef, i32 %0, i32 0
1536 %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %0, i32 1
1537 ret <2 x i32> %vecinit1.i
1541 define <16 x i8> @test_concat_v16i8_v16i8_v16i8(<16 x i8> %x, <16 x i8> %y) #0 {
1542 ; CHECK-LABEL: test_concat_v16i8_v16i8_v16i8:
1543 ; CHECK: @ %bb.0: @ %entry
1544 ; CHECK-NEXT: vmov.f64 d1, d2
1547 %vecinit30 = shufflevector <16 x i8> %x, <16 x i8> %y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
1548 ret <16 x i8> %vecinit30
1551 define <16 x i8> @test_concat_v16i8_v8i8_v16i8(<8 x i8> %x, <16 x i8> %y) #0 {
1552 ; CHECK-LABEL: test_concat_v16i8_v8i8_v16i8:
1553 ; CHECK: @ %bb.0: @ %entry
1554 ; CHECK-NEXT: @ kill: def $d0 killed $d0 def $q0
1555 ; CHECK-NEXT: vmov.f64 d1, d2
1558 %vecext = extractelement <8 x i8> %x, i32 0
1559 %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0
1560 %vecext1 = extractelement <8 x i8> %x, i32 1
1561 %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1
1562 %vecext3 = extractelement <8 x i8> %x, i32 2
1563 %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2
1564 %vecext5 = extractelement <8 x i8> %x, i32 3
1565 %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3
1566 %vecext7 = extractelement <8 x i8> %x, i32 4
1567 %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4
1568 %vecext9 = extractelement <8 x i8> %x, i32 5
1569 %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5
1570 %vecext11 = extractelement <8 x i8> %x, i32 6
1571 %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6
1572 %vecext13 = extractelement <8 x i8> %x, i32 7
1573 %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7
1574 %vecinit30 = shufflevector <16 x i8> %vecinit14, <16 x i8> %y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
1575 ret <16 x i8> %vecinit30
1578 define <16 x i8> @test_concat_v16i8_v16i8_v8i8(<16 x i8> %x, <8 x i8> %y) #0 {
1579 ; CHECK-LABEL: test_concat_v16i8_v16i8_v8i8:
1580 ; CHECK: @ %bb.0: @ %entry
1581 ; CHECK-NEXT: vmov.f64 d1, d2
1584 %vecext = extractelement <16 x i8> %x, i32 0
1585 %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0
1586 %vecext1 = extractelement <16 x i8> %x, i32 1
1587 %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1
1588 %vecext3 = extractelement <16 x i8> %x, i32 2
1589 %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2
1590 %vecext5 = extractelement <16 x i8> %x, i32 3
1591 %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3
1592 %vecext7 = extractelement <16 x i8> %x, i32 4
1593 %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4
1594 %vecext9 = extractelement <16 x i8> %x, i32 5
1595 %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5
1596 %vecext11 = extractelement <16 x i8> %x, i32 6
1597 %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6
1598 %vecext13 = extractelement <16 x i8> %x, i32 7
1599 %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7
1600 %vecext15 = extractelement <8 x i8> %y, i32 0
1601 %vecinit16 = insertelement <16 x i8> %vecinit14, i8 %vecext15, i32 8
1602 %vecext17 = extractelement <8 x i8> %y, i32 1
1603 %vecinit18 = insertelement <16 x i8> %vecinit16, i8 %vecext17, i32 9
1604 %vecext19 = extractelement <8 x i8> %y, i32 2
1605 %vecinit20 = insertelement <16 x i8> %vecinit18, i8 %vecext19, i32 10
1606 %vecext21 = extractelement <8 x i8> %y, i32 3
1607 %vecinit22 = insertelement <16 x i8> %vecinit20, i8 %vecext21, i32 11
1608 %vecext23 = extractelement <8 x i8> %y, i32 4
1609 %vecinit24 = insertelement <16 x i8> %vecinit22, i8 %vecext23, i32 12
1610 %vecext25 = extractelement <8 x i8> %y, i32 5
1611 %vecinit26 = insertelement <16 x i8> %vecinit24, i8 %vecext25, i32 13
1612 %vecext27 = extractelement <8 x i8> %y, i32 6
1613 %vecinit28 = insertelement <16 x i8> %vecinit26, i8 %vecext27, i32 14
1614 %vecext29 = extractelement <8 x i8> %y, i32 7
1615 %vecinit30 = insertelement <16 x i8> %vecinit28, i8 %vecext29, i32 15
1616 ret <16 x i8> %vecinit30
1619 define <16 x i8> @test_concat_v16i8_v8i8_v8i8(<8 x i8> %x, <8 x i8> %y) #0 {
1620 ; CHECK-LABEL: test_concat_v16i8_v8i8_v8i8:
1621 ; CHECK: @ %bb.0: @ %entry
1622 ; CHECK-NEXT: @ kill: def $d1 killed $d1 killed $q0 def $q0
1623 ; CHECK-NEXT: @ kill: def $d0 killed $d0 killed $q0 def $q0
1626 %vecext = extractelement <8 x i8> %x, i32 0
1627 %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0
1628 %vecext1 = extractelement <8 x i8> %x, i32 1
1629 %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1
1630 %vecext3 = extractelement <8 x i8> %x, i32 2
1631 %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2
1632 %vecext5 = extractelement <8 x i8> %x, i32 3
1633 %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3
1634 %vecext7 = extractelement <8 x i8> %x, i32 4
1635 %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4
1636 %vecext9 = extractelement <8 x i8> %x, i32 5
1637 %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5
1638 %vecext11 = extractelement <8 x i8> %x, i32 6
1639 %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6
1640 %vecext13 = extractelement <8 x i8> %x, i32 7
1641 %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7
1642 %vecext15 = extractelement <8 x i8> %y, i32 0
1643 %vecinit16 = insertelement <16 x i8> %vecinit14, i8 %vecext15, i32 8
1644 %vecext17 = extractelement <8 x i8> %y, i32 1
1645 %vecinit18 = insertelement <16 x i8> %vecinit16, i8 %vecext17, i32 9
1646 %vecext19 = extractelement <8 x i8> %y, i32 2
1647 %vecinit20 = insertelement <16 x i8> %vecinit18, i8 %vecext19, i32 10
1648 %vecext21 = extractelement <8 x i8> %y, i32 3
1649 %vecinit22 = insertelement <16 x i8> %vecinit20, i8 %vecext21, i32 11
1650 %vecext23 = extractelement <8 x i8> %y, i32 4
1651 %vecinit24 = insertelement <16 x i8> %vecinit22, i8 %vecext23, i32 12
1652 %vecext25 = extractelement <8 x i8> %y, i32 5
1653 %vecinit26 = insertelement <16 x i8> %vecinit24, i8 %vecext25, i32 13
1654 %vecext27 = extractelement <8 x i8> %y, i32 6
1655 %vecinit28 = insertelement <16 x i8> %vecinit26, i8 %vecext27, i32 14
1656 %vecext29 = extractelement <8 x i8> %y, i32 7
1657 %vecinit30 = insertelement <16 x i8> %vecinit28, i8 %vecext29, i32 15
1658 ret <16 x i8> %vecinit30
1661 define <8 x i16> @test_concat_v8i16_v8i16_v8i16(<8 x i16> %x, <8 x i16> %y) #0 {
1662 ; CHECK-LABEL: test_concat_v8i16_v8i16_v8i16:
1663 ; CHECK: @ %bb.0: @ %entry
1664 ; CHECK-NEXT: vmov.f64 d1, d2
1667 %vecinit14 = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
1668 ret <8 x i16> %vecinit14
1671 define <8 x i16> @test_concat_v8i16_v4i16_v8i16(<4 x i16> %x, <8 x i16> %y) #0 {
1672 ; CHECK-LABEL: test_concat_v8i16_v4i16_v8i16:
1673 ; CHECK: @ %bb.0: @ %entry
1674 ; CHECK-NEXT: @ kill: def $d0 killed $d0 def $q0
1675 ; CHECK-NEXT: vmov.f64 d1, d2
1678 %vecext = extractelement <4 x i16> %x, i32 0
1679 %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0
1680 %vecext1 = extractelement <4 x i16> %x, i32 1
1681 %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1
1682 %vecext3 = extractelement <4 x i16> %x, i32 2
1683 %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2
1684 %vecext5 = extractelement <4 x i16> %x, i32 3
1685 %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3
1686 %vecinit14 = shufflevector <8 x i16> %vecinit6, <8 x i16> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
1687 ret <8 x i16> %vecinit14
1690 define <8 x i16> @test_concat_v8i16_v8i16_v4i16(<8 x i16> %x, <4 x i16> %y) #0 {
1691 ; CHECK-LABEL: test_concat_v8i16_v8i16_v4i16:
1692 ; CHECK: @ %bb.0: @ %entry
1693 ; CHECK-NEXT: vmov.f64 d1, d2
1696 %vecext = extractelement <8 x i16> %x, i32 0
1697 %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0
1698 %vecext1 = extractelement <8 x i16> %x, i32 1
1699 %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1
1700 %vecext3 = extractelement <8 x i16> %x, i32 2
1701 %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2
1702 %vecext5 = extractelement <8 x i16> %x, i32 3
1703 %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3
1704 %vecext7 = extractelement <4 x i16> %y, i32 0
1705 %vecinit8 = insertelement <8 x i16> %vecinit6, i16 %vecext7, i32 4
1706 %vecext9 = extractelement <4 x i16> %y, i32 1
1707 %vecinit10 = insertelement <8 x i16> %vecinit8, i16 %vecext9, i32 5
1708 %vecext11 = extractelement <4 x i16> %y, i32 2
1709 %vecinit12 = insertelement <8 x i16> %vecinit10, i16 %vecext11, i32 6
1710 %vecext13 = extractelement <4 x i16> %y, i32 3
1711 %vecinit14 = insertelement <8 x i16> %vecinit12, i16 %vecext13, i32 7
1712 ret <8 x i16> %vecinit14
1715 define <8 x i16> @test_concat_v8i16_v4i16_v4i16(<4 x i16> %x, <4 x i16> %y) #0 {
1716 ; CHECK-LABEL: test_concat_v8i16_v4i16_v4i16:
1717 ; CHECK: @ %bb.0: @ %entry
1718 ; CHECK-NEXT: @ kill: def $d1 killed $d1 killed $q0 def $q0
1719 ; CHECK-NEXT: @ kill: def $d0 killed $d0 killed $q0 def $q0
1722 %vecext = extractelement <4 x i16> %x, i32 0
1723 %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0
1724 %vecext1 = extractelement <4 x i16> %x, i32 1
1725 %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1
1726 %vecext3 = extractelement <4 x i16> %x, i32 2
1727 %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2
1728 %vecext5 = extractelement <4 x i16> %x, i32 3
1729 %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3
1730 %vecext7 = extractelement <4 x i16> %y, i32 0
1731 %vecinit8 = insertelement <8 x i16> %vecinit6, i16 %vecext7, i32 4
1732 %vecext9 = extractelement <4 x i16> %y, i32 1
1733 %vecinit10 = insertelement <8 x i16> %vecinit8, i16 %vecext9, i32 5
1734 %vecext11 = extractelement <4 x i16> %y, i32 2
1735 %vecinit12 = insertelement <8 x i16> %vecinit10, i16 %vecext11, i32 6
1736 %vecext13 = extractelement <4 x i16> %y, i32 3
1737 %vecinit14 = insertelement <8 x i16> %vecinit12, i16 %vecext13, i32 7
1738 ret <8 x i16> %vecinit14
1741 define <4 x i32> @test_concat_v4i32_v4i32_v4i32(<4 x i32> %x, <4 x i32> %y) #0 {
1742 ; CHECK-LABEL: test_concat_v4i32_v4i32_v4i32:
1743 ; CHECK: @ %bb.0: @ %entry
1744 ; CHECK-NEXT: vext.32 q8, q0, q0, #2
1745 ; CHECK-NEXT: vext.32 q0, q8, q1, #2
1748 %vecinit6 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
1749 ret <4 x i32> %vecinit6
1752 define <4 x i32> @test_concat_v4i32_v2i32_v4i32(<2 x i32> %x, <4 x i32> %y) #0 {
1753 ; CHECK-LABEL: test_concat_v4i32_v2i32_v4i32:
1754 ; CHECK: @ %bb.0: @ %entry
1755 ; CHECK-NEXT: @ kill: def $d0 killed $d0 def $q0
1756 ; CHECK-NEXT: vext.32 q8, q0, q0, #2
1757 ; CHECK-NEXT: vext.32 q0, q8, q1, #2
1760 %vecext = extractelement <2 x i32> %x, i32 0
1761 %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
1762 %vecext1 = extractelement <2 x i32> %x, i32 1
1763 %vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1
1764 %vecinit6 = shufflevector <4 x i32> %vecinit2, <4 x i32> %y, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
1765 ret <4 x i32> %vecinit6
1768 define <4 x i32> @test_concat_v4i32_v4i32_v2i32(<4 x i32> %x, <2 x i32> %y) #0 {
1769 ; CHECK-LABEL: test_concat_v4i32_v4i32_v2i32:
1770 ; CHECK: @ %bb.0: @ %entry
1771 ; CHECK-NEXT: vmov.f64 d1, d2
1774 %vecext = extractelement <4 x i32> %x, i32 0
1775 %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
1776 %vecext1 = extractelement <4 x i32> %x, i32 1
1777 %vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1
1778 %vecext3 = extractelement <2 x i32> %y, i32 0
1779 %vecinit4 = insertelement <4 x i32> %vecinit2, i32 %vecext3, i32 2
1780 %vecext5 = extractelement <2 x i32> %y, i32 1
1781 %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %vecext5, i32 3
1782 ret <4 x i32> %vecinit6
1785 define <4 x i32> @test_concat_v4i32_v2i32_v2i32(<2 x i32> %x, <2 x i32> %y) #0 {
1786 ; CHECK-LABEL: test_concat_v4i32_v2i32_v2i32:
1787 ; CHECK: @ %bb.0: @ %entry
1788 ; CHECK-NEXT: @ kill: def $d1 killed $d1 killed $q0 def $q0
1789 ; CHECK-NEXT: @ kill: def $d0 killed $d0 killed $q0 def $q0
1792 %vecinit6 = shufflevector <2 x i32> %x, <2 x i32> %y, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1793 ret <4 x i32> %vecinit6
1796 define <2 x i64> @test_concat_v2i64_v2i64_v2i64(<2 x i64> %x, <2 x i64> %y) #0 {
1797 ; CHECK-LABEL: test_concat_v2i64_v2i64_v2i64:
1798 ; CHECK: @ %bb.0: @ %entry
1799 ; CHECK-NEXT: vmov.f64 d1, d2
1802 %vecinit2 = shufflevector <2 x i64> %x, <2 x i64> %y, <2 x i32> <i32 0, i32 2>
1803 ret <2 x i64> %vecinit2
1806 define <2 x i64> @test_concat_v2i64_v1i64_v2i64(<1 x i64> %x, <2 x i64> %y) #0 {
1807 ; CHECK-LABEL: test_concat_v2i64_v1i64_v2i64:
1808 ; CHECK: @ %bb.0: @ %entry
1809 ; CHECK-NEXT: @ kill: def $d0 killed $d0 def $q0
1810 ; CHECK-NEXT: vmov.f64 d1, d2
1813 %vecext = extractelement <1 x i64> %x, i32 0
1814 %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0
1815 %vecinit2 = shufflevector <2 x i64> %vecinit, <2 x i64> %y, <2 x i32> <i32 0, i32 2>
1816 ret <2 x i64> %vecinit2
1819 define <2 x i64> @test_concat_v2i64_v2i64_v1i64(<2 x i64> %x, <1 x i64> %y) #0 {
1820 ; CHECK-LABEL: test_concat_v2i64_v2i64_v1i64:
1821 ; CHECK: @ %bb.0: @ %entry
1822 ; CHECK-NEXT: vmov.f64 d1, d2
1825 %vecext = extractelement <2 x i64> %x, i32 0
1826 %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0
1827 %vecext1 = extractelement <1 x i64> %y, i32 0
1828 %vecinit2 = insertelement <2 x i64> %vecinit, i64 %vecext1, i32 1
1829 ret <2 x i64> %vecinit2
1832 define <2 x i64> @test_concat_v2i64_v1i64_v1i64(<1 x i64> %x, <1 x i64> %y) #0 {
1833 ; CHECK-LABEL: test_concat_v2i64_v1i64_v1i64:
1834 ; CHECK: @ %bb.0: @ %entry
1835 ; CHECK-NEXT: @ kill: def $d1 killed $d1 killed $q0 def $q0
1836 ; CHECK-NEXT: @ kill: def $d0 killed $d0 killed $q0 def $q0
1839 %vecext = extractelement <1 x i64> %x, i32 0
1840 %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0
1841 %vecext1 = extractelement <1 x i64> %y, i32 0
1842 %vecinit2 = insertelement <2 x i64> %vecinit, i64 %vecext1, i32 1
1843 ret <2 x i64> %vecinit2
1847 define <4 x i16> @concat_vector_v4i16_const() {
1848 ; CHECK-LABEL: concat_vector_v4i16_const:
1850 ; CHECK-NEXT: vmov.i32 d0, #0x0
1852 %r = shufflevector <1 x i16> zeroinitializer, <1 x i16> undef, <4 x i32> zeroinitializer
1856 define <4 x i16> @concat_vector_v4i16_const_one() {
1857 ; CHECK-LABEL: concat_vector_v4i16_const_one:
1859 ; CHECK-NEXT: vmov.i16 d0, #0x1
1861 %r = shufflevector <1 x i16> <i16 1>, <1 x i16> undef, <4 x i32> zeroinitializer
1865 define <4 x i32> @concat_vector_v4i32_const() {
1866 ; CHECK-LABEL: concat_vector_v4i32_const:
1868 ; CHECK-NEXT: vmov.i32 q0, #0x0
1870 %r = shufflevector <1 x i32> zeroinitializer, <1 x i32> undef, <4 x i32> zeroinitializer
1874 define <8 x i8> @concat_vector_v8i8_const() {
1875 ; CHECK-LABEL: concat_vector_v8i8_const:
1877 ; CHECK-NEXT: vmov.i32 d0, #0x0
1879 %r = shufflevector <1 x i8> zeroinitializer, <1 x i8> undef, <8 x i32> zeroinitializer
1883 define <8 x i16> @concat_vector_v8i16_const() {
1884 ; CHECK-LABEL: concat_vector_v8i16_const:
1886 ; CHECK-NEXT: vmov.i32 q0, #0x0
1888 %r = shufflevector <1 x i16> zeroinitializer, <1 x i16> undef, <8 x i32> zeroinitializer
1892 define <8 x i16> @concat_vector_v8i16_const_one() {
1893 ; CHECK-LABEL: concat_vector_v8i16_const_one:
1895 ; CHECK-NEXT: vmov.i16 q0, #0x1
1897 %r = shufflevector <1 x i16> <i16 1>, <1 x i16> undef, <8 x i32> zeroinitializer
1901 define <16 x i8> @concat_vector_v16i8_const() {
1902 ; CHECK-LABEL: concat_vector_v16i8_const:
1904 ; CHECK-NEXT: vmov.i32 q0, #0x0
1906 %r = shufflevector <1 x i8> zeroinitializer, <1 x i8> undef, <16 x i32> zeroinitializer
1910 define <4 x i16> @concat_vector_v4i16(<1 x i16> %a) {
1911 ; CHECK-LABEL: concat_vector_v4i16:
1913 ; CHECK-NEXT: vdup.16 d0, r0
1915 %r = shufflevector <1 x i16> %a, <1 x i16> undef, <4 x i32> zeroinitializer
1919 define <4 x i32> @concat_vector_v4i32(<1 x i32> %a) {
1920 ; CHECK-LABEL: concat_vector_v4i32:
1922 ; CHECK-NEXT: vdup.32 q0, r0
1924 %r = shufflevector <1 x i32> %a, <1 x i32> undef, <4 x i32> zeroinitializer
1928 define <8 x i8> @concat_vector_v8i8(<1 x i8> %a) {
1929 ; CHECK-LABEL: concat_vector_v8i8:
1931 ; CHECK-NEXT: vdup.8 d0, r0
1933 %r = shufflevector <1 x i8> %a, <1 x i8> undef, <8 x i32> zeroinitializer
1937 define <8 x i16> @concat_vector_v8i16(<1 x i16> %a) {
1938 ; CHECK-LABEL: concat_vector_v8i16:
1940 ; CHECK-NEXT: vdup.16 q0, r0
1942 %r = shufflevector <1 x i16> %a, <1 x i16> undef, <8 x i32> zeroinitializer
1946 define <16 x i8> @concat_vector_v16i8(<1 x i8> %a) {
1947 ; CHECK-LABEL: concat_vector_v16i8:
1949 ; CHECK-NEXT: vdup.8 q0, r0
1951 %r = shufflevector <1 x i8> %a, <1 x i8> undef, <16 x i32> zeroinitializer