1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
4 define <16 x i8> @ins16bw(<16 x i8> %tmp1, i8 %tmp2) {
5 ; CHECK-LABEL: ins16bw:
7 ; CHECK-NEXT: mov v0.b[15], w0
9 %tmp3 = insertelement <16 x i8> %tmp1, i8 %tmp2, i32 15
13 define <8 x i16> @ins8hw(<8 x i16> %tmp1, i16 %tmp2) {
14 ; CHECK-LABEL: ins8hw:
16 ; CHECK-NEXT: mov v0.h[6], w0
18 %tmp3 = insertelement <8 x i16> %tmp1, i16 %tmp2, i32 6
22 define <4 x i32> @ins4sw(<4 x i32> %tmp1, i32 %tmp2) {
23 ; CHECK-LABEL: ins4sw:
25 ; CHECK-NEXT: mov v0.s[2], w0
27 %tmp3 = insertelement <4 x i32> %tmp1, i32 %tmp2, i32 2
31 define <2 x i64> @ins2dw(<2 x i64> %tmp1, i64 %tmp2) {
32 ; CHECK-LABEL: ins2dw:
34 ; CHECK-NEXT: mov v0.d[1], x0
36 %tmp3 = insertelement <2 x i64> %tmp1, i64 %tmp2, i32 1
40 define <8 x i8> @ins8bw(<8 x i8> %tmp1, i8 %tmp2) {
41 ; CHECK-LABEL: ins8bw:
43 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
44 ; CHECK-NEXT: mov v0.b[5], w0
45 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
47 %tmp3 = insertelement <8 x i8> %tmp1, i8 %tmp2, i32 5
51 define <4 x i16> @ins4hw(<4 x i16> %tmp1, i16 %tmp2) {
52 ; CHECK-LABEL: ins4hw:
54 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
55 ; CHECK-NEXT: mov v0.h[3], w0
56 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
58 %tmp3 = insertelement <4 x i16> %tmp1, i16 %tmp2, i32 3
62 define <2 x i32> @ins2sw(<2 x i32> %tmp1, i32 %tmp2) {
63 ; CHECK-LABEL: ins2sw:
65 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
66 ; CHECK-NEXT: mov v0.s[1], w0
67 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
69 %tmp3 = insertelement <2 x i32> %tmp1, i32 %tmp2, i32 1
73 define <16 x i8> @ins16b16(<16 x i8> %tmp1, <16 x i8> %tmp2) {
74 ; CHECK-LABEL: ins16b16:
76 ; CHECK-NEXT: mov v1.b[15], v0.b[2]
77 ; CHECK-NEXT: mov v0.16b, v1.16b
79 %tmp3 = extractelement <16 x i8> %tmp1, i32 2
80 %tmp4 = insertelement <16 x i8> %tmp2, i8 %tmp3, i32 15
84 define <8 x i16> @ins8h8(<8 x i16> %tmp1, <8 x i16> %tmp2) {
85 ; CHECK-LABEL: ins8h8:
87 ; CHECK-NEXT: mov v1.h[7], v0.h[2]
88 ; CHECK-NEXT: mov v0.16b, v1.16b
90 %tmp3 = extractelement <8 x i16> %tmp1, i32 2
91 %tmp4 = insertelement <8 x i16> %tmp2, i16 %tmp3, i32 7
95 define <4 x i32> @ins4s4(<4 x i32> %tmp1, <4 x i32> %tmp2) {
96 ; CHECK-LABEL: ins4s4:
98 ; CHECK-NEXT: mov v1.s[1], v0.s[2]
99 ; CHECK-NEXT: mov v0.16b, v1.16b
101 %tmp3 = extractelement <4 x i32> %tmp1, i32 2
102 %tmp4 = insertelement <4 x i32> %tmp2, i32 %tmp3, i32 1
106 define <2 x i64> @ins2d2(<2 x i64> %tmp1, <2 x i64> %tmp2) {
107 ; CHECK-LABEL: ins2d2:
109 ; CHECK-NEXT: mov v1.d[1], v0.d[0]
110 ; CHECK-NEXT: mov v0.16b, v1.16b
112 %tmp3 = extractelement <2 x i64> %tmp1, i32 0
113 %tmp4 = insertelement <2 x i64> %tmp2, i64 %tmp3, i32 1
117 define <4 x float> @ins4f4(<4 x float> %tmp1, <4 x float> %tmp2) {
118 ; CHECK-LABEL: ins4f4:
120 ; CHECK-NEXT: mov v1.s[1], v0.s[2]
121 ; CHECK-NEXT: mov v0.16b, v1.16b
123 %tmp3 = extractelement <4 x float> %tmp1, i32 2
124 %tmp4 = insertelement <4 x float> %tmp2, float %tmp3, i32 1
125 ret <4 x float> %tmp4
128 define <2 x double> @ins2df2(<2 x double> %tmp1, <2 x double> %tmp2) {
129 ; CHECK-LABEL: ins2df2:
131 ; CHECK-NEXT: mov v1.d[1], v0.d[0]
132 ; CHECK-NEXT: mov v0.16b, v1.16b
134 %tmp3 = extractelement <2 x double> %tmp1, i32 0
135 %tmp4 = insertelement <2 x double> %tmp2, double %tmp3, i32 1
136 ret <2 x double> %tmp4
139 define <16 x i8> @ins8b16(<8 x i8> %tmp1, <16 x i8> %tmp2) {
140 ; CHECK-LABEL: ins8b16:
142 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
143 ; CHECK-NEXT: mov v1.b[15], v0.b[2]
144 ; CHECK-NEXT: mov v0.16b, v1.16b
146 %tmp3 = extractelement <8 x i8> %tmp1, i32 2
147 %tmp4 = insertelement <16 x i8> %tmp2, i8 %tmp3, i32 15
151 define <8 x i16> @ins4h8(<4 x i16> %tmp1, <8 x i16> %tmp2) {
152 ; CHECK-LABEL: ins4h8:
154 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
155 ; CHECK-NEXT: mov v1.h[7], v0.h[2]
156 ; CHECK-NEXT: mov v0.16b, v1.16b
158 %tmp3 = extractelement <4 x i16> %tmp1, i32 2
159 %tmp4 = insertelement <8 x i16> %tmp2, i16 %tmp3, i32 7
163 define <4 x i32> @ins2s4(<2 x i32> %tmp1, <4 x i32> %tmp2) {
164 ; CHECK-LABEL: ins2s4:
166 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
167 ; CHECK-NEXT: mov v1.s[1], v0.s[1]
168 ; CHECK-NEXT: mov v0.16b, v1.16b
170 %tmp3 = extractelement <2 x i32> %tmp1, i32 1
171 %tmp4 = insertelement <4 x i32> %tmp2, i32 %tmp3, i32 1
175 define <2 x i64> @ins1d2(<1 x i64> %tmp1, <2 x i64> %tmp2) {
176 ; CHECK-LABEL: ins1d2:
178 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
179 ; CHECK-NEXT: mov v1.d[1], v0.d[0]
180 ; CHECK-NEXT: mov v0.16b, v1.16b
182 %tmp3 = extractelement <1 x i64> %tmp1, i32 0
183 %tmp4 = insertelement <2 x i64> %tmp2, i64 %tmp3, i32 1
187 define <4 x float> @ins2f4(<2 x float> %tmp1, <4 x float> %tmp2) {
188 ; CHECK-LABEL: ins2f4:
190 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
191 ; CHECK-NEXT: mov v1.s[1], v0.s[1]
192 ; CHECK-NEXT: mov v0.16b, v1.16b
194 %tmp3 = extractelement <2 x float> %tmp1, i32 1
195 %tmp4 = insertelement <4 x float> %tmp2, float %tmp3, i32 1
196 ret <4 x float> %tmp4
199 define <2 x double> @ins1f2(<1 x double> %tmp1, <2 x double> %tmp2) {
200 ; CHECK-LABEL: ins1f2:
202 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
203 ; CHECK-NEXT: zip1 v0.2d, v1.2d, v0.2d
205 %tmp3 = extractelement <1 x double> %tmp1, i32 0
206 %tmp4 = insertelement <2 x double> %tmp2, double %tmp3, i32 1
207 ret <2 x double> %tmp4
210 define <8 x i8> @ins16b8(<16 x i8> %tmp1, <8 x i8> %tmp2) {
211 ; CHECK-LABEL: ins16b8:
213 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
214 ; CHECK-NEXT: mov v1.b[7], v0.b[2]
215 ; CHECK-NEXT: mov v0.16b, v1.16b
217 %tmp3 = extractelement <16 x i8> %tmp1, i32 2
218 %tmp4 = insertelement <8 x i8> %tmp2, i8 %tmp3, i32 7
222 define <4 x i16> @ins8h4(<8 x i16> %tmp1, <4 x i16> %tmp2) {
223 ; CHECK-LABEL: ins8h4:
225 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
226 ; CHECK-NEXT: mov v1.h[3], v0.h[2]
227 ; CHECK-NEXT: mov v0.16b, v1.16b
229 %tmp3 = extractelement <8 x i16> %tmp1, i32 2
230 %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 3
234 define <2 x i32> @ins4s2(<4 x i32> %tmp1, <2 x i32> %tmp2) {
235 ; CHECK-LABEL: ins4s2:
237 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
238 ; CHECK-NEXT: mov v1.s[1], v0.s[2]
239 ; CHECK-NEXT: mov v0.16b, v1.16b
241 %tmp3 = extractelement <4 x i32> %tmp1, i32 2
242 %tmp4 = insertelement <2 x i32> %tmp2, i32 %tmp3, i32 1
246 define <1 x i64> @ins2d1(<2 x i64> %tmp1, <1 x i64> %tmp2) {
247 ; CHECK-LABEL: ins2d1:
249 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
250 ; CHECK-NEXT: mov v1.d[0], v0.d[0]
251 ; CHECK-NEXT: mov v0.16b, v1.16b
253 %tmp3 = extractelement <2 x i64> %tmp1, i32 0
254 %tmp4 = insertelement <1 x i64> %tmp2, i64 %tmp3, i32 0
258 define <2 x float> @ins4f2(<4 x float> %tmp1, <2 x float> %tmp2) {
259 ; CHECK-LABEL: ins4f2:
261 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
262 ; CHECK-NEXT: mov v1.s[1], v0.s[2]
263 ; CHECK-NEXT: mov v0.16b, v1.16b
265 %tmp3 = extractelement <4 x float> %tmp1, i32 2
266 %tmp4 = insertelement <2 x float> %tmp2, float %tmp3, i32 1
267 ret <2 x float> %tmp4
270 define <1 x double> @ins2f1(<2 x double> %tmp1, <1 x double> %tmp2) {
271 ; CHECK-LABEL: ins2f1:
273 ; CHECK-NEXT: dup v0.2d, v0.d[1]
274 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
276 %tmp3 = extractelement <2 x double> %tmp1, i32 1
277 %tmp4 = insertelement <1 x double> %tmp2, double %tmp3, i32 0
278 ret <1 x double> %tmp4
281 define <8 x i8> @ins8b8(<8 x i8> %tmp1, <8 x i8> %tmp2) {
282 ; CHECK-LABEL: ins8b8:
284 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
285 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
286 ; CHECK-NEXT: mov v1.b[4], v0.b[2]
287 ; CHECK-NEXT: mov v0.16b, v1.16b
289 %tmp3 = extractelement <8 x i8> %tmp1, i32 2
290 %tmp4 = insertelement <8 x i8> %tmp2, i8 %tmp3, i32 4
294 define <4 x i16> @ins4h4(<4 x i16> %tmp1, <4 x i16> %tmp2) {
295 ; CHECK-LABEL: ins4h4:
297 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
298 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
299 ; CHECK-NEXT: mov v1.h[3], v0.h[2]
300 ; CHECK-NEXT: mov v0.16b, v1.16b
302 %tmp3 = extractelement <4 x i16> %tmp1, i32 2
303 %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 3
307 define <2 x i32> @ins2s2(<2 x i32> %tmp1, <2 x i32> %tmp2) {
308 ; CHECK-LABEL: ins2s2:
310 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
311 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
312 ; CHECK-NEXT: mov v1.s[1], v0.s[0]
313 ; CHECK-NEXT: mov v0.16b, v1.16b
315 %tmp3 = extractelement <2 x i32> %tmp1, i32 0
316 %tmp4 = insertelement <2 x i32> %tmp2, i32 %tmp3, i32 1
320 define <1 x i64> @ins1d1(<1 x i64> %tmp1, <1 x i64> %tmp2) {
321 ; CHECK-LABEL: ins1d1:
323 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
324 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
325 ; CHECK-NEXT: mov v1.d[0], v0.d[0]
326 ; CHECK-NEXT: mov v0.16b, v1.16b
328 %tmp3 = extractelement <1 x i64> %tmp1, i32 0
329 %tmp4 = insertelement <1 x i64> %tmp2, i64 %tmp3, i32 0
333 define <2 x float> @ins2f2(<2 x float> %tmp1, <2 x float> %tmp2) {
334 ; CHECK-LABEL: ins2f2:
336 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
337 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
338 ; CHECK-NEXT: mov v1.s[1], v0.s[0]
339 ; CHECK-NEXT: mov v0.16b, v1.16b
341 %tmp3 = extractelement <2 x float> %tmp1, i32 0
342 %tmp4 = insertelement <2 x float> %tmp2, float %tmp3, i32 1
343 ret <2 x float> %tmp4
346 define <1 x double> @ins1df1(<1 x double> %tmp1, <1 x double> %tmp2) {
347 ; CHECK-LABEL: ins1df1:
350 %tmp3 = extractelement <1 x double> %tmp1, i32 0
351 %tmp4 = insertelement <1 x double> %tmp2, double %tmp3, i32 0
352 ret <1 x double> %tmp4
355 define i32 @umovw16b(<16 x i8> %tmp1) {
356 ; CHECK-LABEL: umovw16b:
358 ; CHECK-NEXT: umov w0, v0.b[8]
360 %tmp3 = extractelement <16 x i8> %tmp1, i32 8
361 %tmp4 = zext i8 %tmp3 to i32
365 define i32 @umovw8h(<8 x i16> %tmp1) {
366 ; CHECK-LABEL: umovw8h:
368 ; CHECK-NEXT: umov w0, v0.h[2]
370 %tmp3 = extractelement <8 x i16> %tmp1, i32 2
371 %tmp4 = zext i16 %tmp3 to i32
375 define i32 @umovw4s(<4 x i32> %tmp1) {
376 ; CHECK-LABEL: umovw4s:
378 ; CHECK-NEXT: mov w0, v0.s[2]
380 %tmp3 = extractelement <4 x i32> %tmp1, i32 2
384 define i64 @umovx2d(<2 x i64> %tmp1) {
385 ; CHECK-LABEL: umovx2d:
387 ; CHECK-NEXT: mov x0, v0.d[1]
389 %tmp3 = extractelement <2 x i64> %tmp1, i32 1
393 define i32 @umovw8b(<8 x i8> %tmp1) {
394 ; CHECK-LABEL: umovw8b:
396 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
397 ; CHECK-NEXT: umov w0, v0.b[7]
399 %tmp3 = extractelement <8 x i8> %tmp1, i32 7
400 %tmp4 = zext i8 %tmp3 to i32
404 define i32 @umovw4h(<4 x i16> %tmp1) {
405 ; CHECK-LABEL: umovw4h:
407 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
408 ; CHECK-NEXT: umov w0, v0.h[2]
410 %tmp3 = extractelement <4 x i16> %tmp1, i32 2
411 %tmp4 = zext i16 %tmp3 to i32
415 define i32 @umovw2s(<2 x i32> %tmp1) {
416 ; CHECK-LABEL: umovw2s:
418 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
419 ; CHECK-NEXT: mov w0, v0.s[1]
421 %tmp3 = extractelement <2 x i32> %tmp1, i32 1
425 define i64 @umovx1d(<1 x i64> %tmp1) {
426 ; CHECK-LABEL: umovx1d:
428 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
429 ; CHECK-NEXT: fmov x0, d0
431 %tmp3 = extractelement <1 x i64> %tmp1, i32 0
435 define i32 @smovw16b(<16 x i8> %tmp1) {
436 ; CHECK-LABEL: smovw16b:
438 ; CHECK-NEXT: smov w8, v0.b[8]
439 ; CHECK-NEXT: add w0, w8, w8
441 %tmp3 = extractelement <16 x i8> %tmp1, i32 8
442 %tmp4 = sext i8 %tmp3 to i32
443 %tmp5 = add i32 %tmp4, %tmp4
447 define i32 @smovw8h(<8 x i16> %tmp1) {
448 ; CHECK-LABEL: smovw8h:
450 ; CHECK-NEXT: smov w8, v0.h[2]
451 ; CHECK-NEXT: add w0, w8, w8
453 %tmp3 = extractelement <8 x i16> %tmp1, i32 2
454 %tmp4 = sext i16 %tmp3 to i32
455 %tmp5 = add i32 %tmp4, %tmp4
459 define i64 @smovx16b(<16 x i8> %tmp1) {
460 ; CHECK-LABEL: smovx16b:
462 ; CHECK-NEXT: smov x0, v0.b[8]
464 %tmp3 = extractelement <16 x i8> %tmp1, i32 8
465 %tmp4 = sext i8 %tmp3 to i64
469 define i64 @smovx8h(<8 x i16> %tmp1) {
470 ; CHECK-LABEL: smovx8h:
472 ; CHECK-NEXT: smov x0, v0.h[2]
474 %tmp3 = extractelement <8 x i16> %tmp1, i32 2
475 %tmp4 = sext i16 %tmp3 to i64
479 define i64 @smovx4s(<4 x i32> %tmp1) {
480 ; CHECK-LABEL: smovx4s:
482 ; CHECK-NEXT: smov x0, v0.s[2]
484 %tmp3 = extractelement <4 x i32> %tmp1, i32 2
485 %tmp4 = sext i32 %tmp3 to i64
489 define i32 @smovw8b(<8 x i8> %tmp1) {
490 ; CHECK-LABEL: smovw8b:
492 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
493 ; CHECK-NEXT: smov w8, v0.b[4]
494 ; CHECK-NEXT: add w0, w8, w8
496 %tmp3 = extractelement <8 x i8> %tmp1, i32 4
497 %tmp4 = sext i8 %tmp3 to i32
498 %tmp5 = add i32 %tmp4, %tmp4
502 define i32 @smovw4h(<4 x i16> %tmp1) {
503 ; CHECK-LABEL: smovw4h:
505 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
506 ; CHECK-NEXT: smov w8, v0.h[2]
507 ; CHECK-NEXT: add w0, w8, w8
509 %tmp3 = extractelement <4 x i16> %tmp1, i32 2
510 %tmp4 = sext i16 %tmp3 to i32
511 %tmp5 = add i32 %tmp4, %tmp4
515 define i32 @smovx8b(<8 x i8> %tmp1) {
516 ; CHECK-LABEL: smovx8b:
518 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
519 ; CHECK-NEXT: smov w0, v0.b[6]
521 %tmp3 = extractelement <8 x i8> %tmp1, i32 6
522 %tmp4 = sext i8 %tmp3 to i32
526 define i32 @smovx4h(<4 x i16> %tmp1) {
527 ; CHECK-LABEL: smovx4h:
529 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
530 ; CHECK-NEXT: smov w0, v0.h[2]
532 %tmp3 = extractelement <4 x i16> %tmp1, i32 2
533 %tmp4 = sext i16 %tmp3 to i32
537 define i64 @smovx2s(<2 x i32> %tmp1) {
538 ; CHECK-LABEL: smovx2s:
540 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
541 ; CHECK-NEXT: smov x0, v0.s[1]
543 %tmp3 = extractelement <2 x i32> %tmp1, i32 1
544 %tmp4 = sext i32 %tmp3 to i64
548 define <8 x i8> @test_vcopy_lane_s8(<8 x i8> %v1, <8 x i8> %v2) {
549 ; CHECK-LABEL: test_vcopy_lane_s8:
551 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
552 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
553 ; CHECK-NEXT: mov v0.b[5], v1.b[3]
554 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
556 %vset_lane = shufflevector <8 x i8> %v1, <8 x i8> %v2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 11, i32 6, i32 7>
557 ret <8 x i8> %vset_lane
560 define <16 x i8> @test_vcopyq_laneq_s8(<16 x i8> %v1, <16 x i8> %v2) {
561 ; CHECK-LABEL: test_vcopyq_laneq_s8:
563 ; CHECK-NEXT: mov v0.b[14], v1.b[6]
565 %vset_lane = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 22, i32 15>
566 ret <16 x i8> %vset_lane
569 define <8 x i8> @test_vcopy_lane_swap_s8(<8 x i8> %v1, <8 x i8> %v2) {
570 ; CHECK-LABEL: test_vcopy_lane_swap_s8:
572 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
573 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
574 ; CHECK-NEXT: mov v1.b[7], v0.b[0]
575 ; CHECK-NEXT: mov v0.16b, v1.16b
577 %vset_lane = shufflevector <8 x i8> %v1, <8 x i8> %v2, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 0>
578 ret <8 x i8> %vset_lane
581 define <16 x i8> @test_vcopyq_laneq_swap_s8(<16 x i8> %v1, <16 x i8> %v2) {
582 ; CHECK-LABEL: test_vcopyq_laneq_swap_s8:
584 ; CHECK-NEXT: mov v1.b[0], v0.b[15]
585 ; CHECK-NEXT: mov v0.16b, v1.16b
587 %vset_lane = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> <i32 15, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
588 ret <16 x i8> %vset_lane
591 define <8 x i8> @test_vdup_n_u8(i8 %v1) #0 {
592 ; CHECK-LABEL: test_vdup_n_u8:
594 ; CHECK-NEXT: dup v0.8b, w0
596 %vecinit.i = insertelement <8 x i8> undef, i8 %v1, i32 0
597 %vecinit1.i = insertelement <8 x i8> %vecinit.i, i8 %v1, i32 1
598 %vecinit2.i = insertelement <8 x i8> %vecinit1.i, i8 %v1, i32 2
599 %vecinit3.i = insertelement <8 x i8> %vecinit2.i, i8 %v1, i32 3
600 %vecinit4.i = insertelement <8 x i8> %vecinit3.i, i8 %v1, i32 4
601 %vecinit5.i = insertelement <8 x i8> %vecinit4.i, i8 %v1, i32 5
602 %vecinit6.i = insertelement <8 x i8> %vecinit5.i, i8 %v1, i32 6
603 %vecinit7.i = insertelement <8 x i8> %vecinit6.i, i8 %v1, i32 7
604 ret <8 x i8> %vecinit7.i
607 define <4 x i16> @test_vdup_n_u16(i16 %v1) #0 {
608 ; CHECK-LABEL: test_vdup_n_u16:
610 ; CHECK-NEXT: dup v0.4h, w0
612 %vecinit.i = insertelement <4 x i16> undef, i16 %v1, i32 0
613 %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %v1, i32 1
614 %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %v1, i32 2
615 %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %v1, i32 3
616 ret <4 x i16> %vecinit3.i
619 define <2 x i32> @test_vdup_n_u32(i32 %v1) #0 {
620 ; CHECK-LABEL: test_vdup_n_u32:
622 ; CHECK-NEXT: dup v0.2s, w0
624 %vecinit.i = insertelement <2 x i32> undef, i32 %v1, i32 0
625 %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %v1, i32 1
626 ret <2 x i32> %vecinit1.i
629 define <1 x i64> @test_vdup_n_u64(i64 %v1) #0 {
630 ; CHECK-LABEL: test_vdup_n_u64:
632 ; CHECK-NEXT: fmov d0, x0
634 %vecinit.i = insertelement <1 x i64> undef, i64 %v1, i32 0
635 ret <1 x i64> %vecinit.i
638 define <16 x i8> @test_vdupq_n_u8(i8 %v1) #0 {
639 ; CHECK-LABEL: test_vdupq_n_u8:
641 ; CHECK-NEXT: dup v0.16b, w0
643 %vecinit.i = insertelement <16 x i8> undef, i8 %v1, i32 0
644 %vecinit1.i = insertelement <16 x i8> %vecinit.i, i8 %v1, i32 1
645 %vecinit2.i = insertelement <16 x i8> %vecinit1.i, i8 %v1, i32 2
646 %vecinit3.i = insertelement <16 x i8> %vecinit2.i, i8 %v1, i32 3
647 %vecinit4.i = insertelement <16 x i8> %vecinit3.i, i8 %v1, i32 4
648 %vecinit5.i = insertelement <16 x i8> %vecinit4.i, i8 %v1, i32 5
649 %vecinit6.i = insertelement <16 x i8> %vecinit5.i, i8 %v1, i32 6
650 %vecinit7.i = insertelement <16 x i8> %vecinit6.i, i8 %v1, i32 7
651 %vecinit8.i = insertelement <16 x i8> %vecinit7.i, i8 %v1, i32 8
652 %vecinit9.i = insertelement <16 x i8> %vecinit8.i, i8 %v1, i32 9
653 %vecinit10.i = insertelement <16 x i8> %vecinit9.i, i8 %v1, i32 10
654 %vecinit11.i = insertelement <16 x i8> %vecinit10.i, i8 %v1, i32 11
655 %vecinit12.i = insertelement <16 x i8> %vecinit11.i, i8 %v1, i32 12
656 %vecinit13.i = insertelement <16 x i8> %vecinit12.i, i8 %v1, i32 13
657 %vecinit14.i = insertelement <16 x i8> %vecinit13.i, i8 %v1, i32 14
658 %vecinit15.i = insertelement <16 x i8> %vecinit14.i, i8 %v1, i32 15
659 ret <16 x i8> %vecinit15.i
662 define <8 x i16> @test_vdupq_n_u16(i16 %v1) #0 {
663 ; CHECK-LABEL: test_vdupq_n_u16:
665 ; CHECK-NEXT: dup v0.8h, w0
667 %vecinit.i = insertelement <8 x i16> undef, i16 %v1, i32 0
668 %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %v1, i32 1
669 %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %v1, i32 2
670 %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %v1, i32 3
671 %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %v1, i32 4
672 %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %v1, i32 5
673 %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %v1, i32 6
674 %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %v1, i32 7
675 ret <8 x i16> %vecinit7.i
678 define <4 x i32> @test_vdupq_n_u32(i32 %v1) #0 {
679 ; CHECK-LABEL: test_vdupq_n_u32:
681 ; CHECK-NEXT: dup v0.4s, w0
683 %vecinit.i = insertelement <4 x i32> undef, i32 %v1, i32 0
684 %vecinit1.i = insertelement <4 x i32> %vecinit.i, i32 %v1, i32 1
685 %vecinit2.i = insertelement <4 x i32> %vecinit1.i, i32 %v1, i32 2
686 %vecinit3.i = insertelement <4 x i32> %vecinit2.i, i32 %v1, i32 3
687 ret <4 x i32> %vecinit3.i
690 define <2 x i64> @test_vdupq_n_u64(i64 %v1) #0 {
691 ; CHECK-LABEL: test_vdupq_n_u64:
693 ; CHECK-NEXT: dup v0.2d, x0
695 %vecinit.i = insertelement <2 x i64> undef, i64 %v1, i32 0
696 %vecinit1.i = insertelement <2 x i64> %vecinit.i, i64 %v1, i32 1
697 ret <2 x i64> %vecinit1.i
700 define <8 x i8> @test_vdup_lane_s8(<8 x i8> %v1) #0 {
701 ; CHECK-LABEL: test_vdup_lane_s8:
703 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
704 ; CHECK-NEXT: dup v0.8b, v0.b[5]
706 %shuffle = shufflevector <8 x i8> %v1, <8 x i8> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
707 ret <8 x i8> %shuffle
710 define <4 x i16> @test_vdup_lane_s16(<4 x i16> %v1) #0 {
711 ; CHECK-LABEL: test_vdup_lane_s16:
713 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
714 ; CHECK-NEXT: dup v0.4h, v0.h[2]
716 %shuffle = shufflevector <4 x i16> %v1, <4 x i16> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
717 ret <4 x i16> %shuffle
720 define <2 x i32> @test_vdup_lane_s32(<2 x i32> %v1) #0 {
721 ; CHECK-LABEL: test_vdup_lane_s32:
723 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
724 ; CHECK-NEXT: dup v0.2s, v0.s[1]
726 %shuffle = shufflevector <2 x i32> %v1, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
727 ret <2 x i32> %shuffle
730 define <16 x i8> @test_vdupq_lane_s8(<8 x i8> %v1) #0 {
731 ; CHECK-LABEL: test_vdupq_lane_s8:
733 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
734 ; CHECK-NEXT: dup v0.16b, v0.b[5]
736 %shuffle = shufflevector <8 x i8> %v1, <8 x i8> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
737 ret <16 x i8> %shuffle
740 define <8 x i16> @test_vdupq_lane_s16(<4 x i16> %v1) #0 {
741 ; CHECK-LABEL: test_vdupq_lane_s16:
743 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
744 ; CHECK-NEXT: dup v0.8h, v0.h[2]
746 %shuffle = shufflevector <4 x i16> %v1, <4 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
747 ret <8 x i16> %shuffle
750 define <4 x i32> @test_vdupq_lane_s32(<2 x i32> %v1) #0 {
751 ; CHECK-LABEL: test_vdupq_lane_s32:
753 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
754 ; CHECK-NEXT: dup v0.4s, v0.s[1]
756 %shuffle = shufflevector <2 x i32> %v1, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
757 ret <4 x i32> %shuffle
760 define <2 x i64> @test_vdupq_lane_s64(<1 x i64> %v1) #0 {
761 ; CHECK-LABEL: test_vdupq_lane_s64:
763 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
764 ; CHECK-NEXT: dup v0.2d, v0.d[0]
766 %shuffle = shufflevector <1 x i64> %v1, <1 x i64> undef, <2 x i32> zeroinitializer
767 ret <2 x i64> %shuffle
770 define <8 x i8> @test_vdup_laneq_s8(<16 x i8> %v1) #0 {
771 ; CHECK-LABEL: test_vdup_laneq_s8:
773 ; CHECK-NEXT: dup v0.8b, v0.b[5]
775 %shuffle = shufflevector <16 x i8> %v1, <16 x i8> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
776 ret <8 x i8> %shuffle
779 define <4 x i16> @test_vdup_laneq_s16(<8 x i16> %v1) #0 {
780 ; CHECK-LABEL: test_vdup_laneq_s16:
782 ; CHECK-NEXT: dup v0.4h, v0.h[2]
784 %shuffle = shufflevector <8 x i16> %v1, <8 x i16> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
785 ret <4 x i16> %shuffle
788 define <2 x i32> @test_vdup_laneq_s32(<4 x i32> %v1) #0 {
789 ; CHECK-LABEL: test_vdup_laneq_s32:
791 ; CHECK-NEXT: dup v0.2s, v0.s[1]
793 %shuffle = shufflevector <4 x i32> %v1, <4 x i32> undef, <2 x i32> <i32 1, i32 1>
794 ret <2 x i32> %shuffle
797 define <16 x i8> @test_vdupq_laneq_s8(<16 x i8> %v1) #0 {
798 ; CHECK-LABEL: test_vdupq_laneq_s8:
800 ; CHECK-NEXT: dup v0.16b, v0.b[5]
802 %shuffle = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
803 ret <16 x i8> %shuffle
806 define <8 x i16> @test_vdupq_laneq_s16(<8 x i16> %v1) #0 {
807 ; CHECK-LABEL: test_vdupq_laneq_s16:
809 ; CHECK-NEXT: dup v0.8h, v0.h[2]
811 %shuffle = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
812 ret <8 x i16> %shuffle
815 define <4 x i32> @test_vdupq_laneq_s32(<4 x i32> %v1) #0 {
816 ; CHECK-LABEL: test_vdupq_laneq_s32:
818 ; CHECK-NEXT: dup v0.4s, v0.s[1]
820 %shuffle = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
821 ret <4 x i32> %shuffle
824 define <2 x i64> @test_vdupq_laneq_s64(<2 x i64> %v1) #0 {
825 ; CHECK-LABEL: test_vdupq_laneq_s64:
827 ; CHECK-NEXT: dup v0.2d, v0.d[0]
829 %shuffle = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
830 ret <2 x i64> %shuffle
833 define i64 @test_bitcastv8i8toi64(<8 x i8> %in) {
834 ; CHECK-LABEL: test_bitcastv8i8toi64:
836 ; CHECK-NEXT: fmov x0, d0
838 %res = bitcast <8 x i8> %in to i64
842 define i64 @test_bitcastv4i16toi64(<4 x i16> %in) {
843 ; CHECK-LABEL: test_bitcastv4i16toi64:
845 ; CHECK-NEXT: fmov x0, d0
847 %res = bitcast <4 x i16> %in to i64
851 define i64 @test_bitcastv2i32toi64(<2 x i32> %in) {
852 ; CHECK-LABEL: test_bitcastv2i32toi64:
854 ; CHECK-NEXT: fmov x0, d0
856 %res = bitcast <2 x i32> %in to i64
860 define i64 @test_bitcastv2f32toi64(<2 x float> %in) {
861 ; CHECK-LABEL: test_bitcastv2f32toi64:
863 ; CHECK-NEXT: fmov x0, d0
865 %res = bitcast <2 x float> %in to i64
869 define i64 @test_bitcastv1i64toi64(<1 x i64> %in) {
870 ; CHECK-LABEL: test_bitcastv1i64toi64:
872 ; CHECK-NEXT: fmov x0, d0
874 %res = bitcast <1 x i64> %in to i64
878 define i64 @test_bitcastv1f64toi64(<1 x double> %in) {
879 ; CHECK-LABEL: test_bitcastv1f64toi64:
881 ; CHECK-NEXT: fmov x0, d0
883 %res = bitcast <1 x double> %in to i64
887 define <8 x i8> @test_bitcasti64tov8i8(i64 %in) {
888 ; CHECK-LABEL: test_bitcasti64tov8i8:
890 ; CHECK-NEXT: fmov d0, x0
892 %res = bitcast i64 %in to <8 x i8>
896 define <4 x i16> @test_bitcasti64tov4i16(i64 %in) {
897 ; CHECK-LABEL: test_bitcasti64tov4i16:
899 ; CHECK-NEXT: fmov d0, x0
901 %res = bitcast i64 %in to <4 x i16>
905 define <2 x i32> @test_bitcasti64tov2i32(i64 %in) {
906 ; CHECK-LABEL: test_bitcasti64tov2i32:
908 ; CHECK-NEXT: fmov d0, x0
910 %res = bitcast i64 %in to <2 x i32>
914 define <2 x float> @test_bitcasti64tov2f32(i64 %in) {
915 ; CHECK-LABEL: test_bitcasti64tov2f32:
917 ; CHECK-NEXT: fmov d0, x0
919 %res = bitcast i64 %in to <2 x float>
923 define <1 x i64> @test_bitcasti64tov1i64(i64 %in) {
924 ; CHECK-LABEL: test_bitcasti64tov1i64:
926 ; CHECK-NEXT: fmov d0, x0
928 %res = bitcast i64 %in to <1 x i64>
932 define <1 x double> @test_bitcasti64tov1f64(i64 %in) {
933 ; CHECK-LABEL: test_bitcasti64tov1f64:
935 ; CHECK-NEXT: fmov d0, x0
937 %res = bitcast i64 %in to <1 x double>
938 ret <1 x double> %res
941 define <1 x i64> @test_bitcastv8i8tov1f64(<8 x i8> %a) #0 {
942 ; CHECK-LABEL: test_bitcastv8i8tov1f64:
944 ; CHECK-NEXT: neg v0.8b, v0.8b
945 ; CHECK-NEXT: fcvtzs x8, d0
946 ; CHECK-NEXT: fmov d0, x8
948 %sub.i = sub <8 x i8> zeroinitializer, %a
949 %1 = bitcast <8 x i8> %sub.i to <1 x double>
950 %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
951 ret <1 x i64> %vcvt.i
954 define <1 x i64> @test_bitcastv4i16tov1f64(<4 x i16> %a) #0 {
955 ; CHECK-LABEL: test_bitcastv4i16tov1f64:
957 ; CHECK-NEXT: neg v0.4h, v0.4h
958 ; CHECK-NEXT: fcvtzs x8, d0
959 ; CHECK-NEXT: fmov d0, x8
961 %sub.i = sub <4 x i16> zeroinitializer, %a
962 %1 = bitcast <4 x i16> %sub.i to <1 x double>
963 %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
964 ret <1 x i64> %vcvt.i
967 define <1 x i64> @test_bitcastv2i32tov1f64(<2 x i32> %a) #0 {
968 ; CHECK-LABEL: test_bitcastv2i32tov1f64:
970 ; CHECK-NEXT: neg v0.2s, v0.2s
971 ; CHECK-NEXT: fcvtzs x8, d0
972 ; CHECK-NEXT: fmov d0, x8
974 %sub.i = sub <2 x i32> zeroinitializer, %a
975 %1 = bitcast <2 x i32> %sub.i to <1 x double>
976 %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
977 ret <1 x i64> %vcvt.i
980 define <1 x i64> @test_bitcastv1i64tov1f64(<1 x i64> %a) #0 {
981 ; CHECK-LABEL: test_bitcastv1i64tov1f64:
983 ; CHECK-NEXT: neg d0, d0
984 ; CHECK-NEXT: fcvtzs x8, d0
985 ; CHECK-NEXT: fmov d0, x8
987 %sub.i = sub <1 x i64> zeroinitializer, %a
988 %1 = bitcast <1 x i64> %sub.i to <1 x double>
989 %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
990 ret <1 x i64> %vcvt.i
993 define <1 x i64> @test_bitcastv2f32tov1f64(<2 x float> %a) #0 {
994 ; CHECK-LABEL: test_bitcastv2f32tov1f64:
996 ; CHECK-NEXT: fneg v0.2s, v0.2s
997 ; CHECK-NEXT: fcvtzs x8, d0
998 ; CHECK-NEXT: fmov d0, x8
1000 %sub.i = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %a
1001 %1 = bitcast <2 x float> %sub.i to <1 x double>
1002 %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
1003 ret <1 x i64> %vcvt.i
1006 define <8 x i8> @test_bitcastv1f64tov8i8(<1 x i64> %a) #0 {
1007 ; CHECK-LABEL: test_bitcastv1f64tov8i8:
1009 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
1010 ; CHECK-NEXT: fmov x8, d0
1011 ; CHECK-NEXT: scvtf d0, x8
1012 ; CHECK-NEXT: neg v0.8b, v0.8b
1014 %vcvt.i = sitofp <1 x i64> %a to <1 x double>
1015 %1 = bitcast <1 x double> %vcvt.i to <8 x i8>
1016 %sub.i = sub <8 x i8> zeroinitializer, %1
1020 define <4 x i16> @test_bitcastv1f64tov4i16(<1 x i64> %a) #0 {
1021 ; CHECK-LABEL: test_bitcastv1f64tov4i16:
1023 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
1024 ; CHECK-NEXT: fmov x8, d0
1025 ; CHECK-NEXT: scvtf d0, x8
1026 ; CHECK-NEXT: neg v0.4h, v0.4h
1028 %vcvt.i = sitofp <1 x i64> %a to <1 x double>
1029 %1 = bitcast <1 x double> %vcvt.i to <4 x i16>
1030 %sub.i = sub <4 x i16> zeroinitializer, %1
1031 ret <4 x i16> %sub.i
1034 define <2 x i32> @test_bitcastv1f64tov2i32(<1 x i64> %a) #0 {
1035 ; CHECK-LABEL: test_bitcastv1f64tov2i32:
1037 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
1038 ; CHECK-NEXT: fmov x8, d0
1039 ; CHECK-NEXT: scvtf d0, x8
1040 ; CHECK-NEXT: neg v0.2s, v0.2s
1042 %vcvt.i = sitofp <1 x i64> %a to <1 x double>
1043 %1 = bitcast <1 x double> %vcvt.i to <2 x i32>
1044 %sub.i = sub <2 x i32> zeroinitializer, %1
1045 ret <2 x i32> %sub.i
1048 define <1 x i64> @test_bitcastv1f64tov1i64(<1 x i64> %a) #0 {
1049 ; CHECK-LABEL: test_bitcastv1f64tov1i64:
1051 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
1052 ; CHECK-NEXT: fmov x8, d0
1053 ; CHECK-NEXT: scvtf d0, x8
1054 ; CHECK-NEXT: neg d0, d0
1056 %vcvt.i = sitofp <1 x i64> %a to <1 x double>
1057 %1 = bitcast <1 x double> %vcvt.i to <1 x i64>
1058 %sub.i = sub <1 x i64> zeroinitializer, %1
1059 ret <1 x i64> %sub.i
1062 define <2 x float> @test_bitcastv1f64tov2f32(<1 x i64> %a) #0 {
1063 ; CHECK-LABEL: test_bitcastv1f64tov2f32:
1065 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
1066 ; CHECK-NEXT: fmov x8, d0
1067 ; CHECK-NEXT: scvtf d0, x8
1068 ; CHECK-NEXT: fneg v0.2s, v0.2s
1070 %vcvt.i = sitofp <1 x i64> %a to <1 x double>
1071 %1 = bitcast <1 x double> %vcvt.i to <2 x float>
1072 %sub.i = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %1
1073 ret <2 x float> %sub.i
1076 ; Test insert element into an undef vector
1077 define <8 x i8> @scalar_to_vector.v8i8(i8 %a) {
1078 ; CHECK-LABEL: scalar_to_vector.v8i8:
1079 ; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}}
1080 %b = insertelement <8 x i8> undef, i8 %a, i32 0
1084 define <16 x i8> @scalar_to_vector.v16i8(i8 %a) {
1085 ; CHECK-LABEL: scalar_to_vector.v16i8:
1086 ; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}}
1087 %b = insertelement <16 x i8> undef, i8 %a, i32 0
1091 define <4 x i16> @scalar_to_vector.v4i16(i16 %a) {
1092 ; CHECK-LABEL: scalar_to_vector.v4i16:
1093 ; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}}
1094 %b = insertelement <4 x i16> undef, i16 %a, i32 0
1098 define <8 x i16> @scalar_to_vector.v8i16(i16 %a) {
1099 ; CHECK-LABEL: scalar_to_vector.v8i16:
1100 ; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}}
1101 %b = insertelement <8 x i16> undef, i16 %a, i32 0
1105 define <2 x i32> @scalar_to_vector.v2i32(i32 %a) {
1106 ; CHECK-LABEL: scalar_to_vector.v2i32:
1107 ; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}}
1108 %b = insertelement <2 x i32> undef, i32 %a, i32 0
1112 define <4 x i32> @scalar_to_vector.v4i32(i32 %a) {
1113 ; CHECK-LABEL: scalar_to_vector.v4i32:
1114 ; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}}
1115 %b = insertelement <4 x i32> undef, i32 %a, i32 0
1119 define <2 x i64> @scalar_to_vector.v2i64(i64 %a) {
1120 ; CHECK-LABEL: scalar_to_vector.v2i64:
1121 ; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
1122 %b = insertelement <2 x i64> undef, i64 %a, i32 0
1126 define <8 x i8> @testDUP.v1i8(<1 x i8> %a) {
1127 ; CHECK-LABEL: testDUP.v1i8:
1128 ; CHECK: dup v0.8b, v0.b[0]
1129 %b = extractelement <1 x i8> %a, i32 0
1130 %c = insertelement <8 x i8> undef, i8 %b, i32 0
1131 %d = insertelement <8 x i8> %c, i8 %b, i32 1
1132 %e = insertelement <8 x i8> %d, i8 %b, i32 2
1133 %f = insertelement <8 x i8> %e, i8 %b, i32 3
1134 %g = insertelement <8 x i8> %f, i8 %b, i32 4
1135 %h = insertelement <8 x i8> %g, i8 %b, i32 5
1136 %i = insertelement <8 x i8> %h, i8 %b, i32 6
1137 %j = insertelement <8 x i8> %i, i8 %b, i32 7
1141 define <8 x i16> @testDUP.v1i16(<1 x i16> %a) {
1142 ; CHECK-LABEL: testDUP.v1i16:
1143 ; CHECK: dup v0.8h, v0.h[0]
1144 %b = extractelement <1 x i16> %a, i32 0
1145 %c = insertelement <8 x i16> undef, i16 %b, i32 0
1146 %d = insertelement <8 x i16> %c, i16 %b, i32 1
1147 %e = insertelement <8 x i16> %d, i16 %b, i32 2
1148 %f = insertelement <8 x i16> %e, i16 %b, i32 3
1149 %g = insertelement <8 x i16> %f, i16 %b, i32 4
1150 %h = insertelement <8 x i16> %g, i16 %b, i32 5
1151 %i = insertelement <8 x i16> %h, i16 %b, i32 6
1152 %j = insertelement <8 x i16> %i, i16 %b, i32 7
1156 define <4 x i32> @testDUP.v1i32(<1 x i32> %a) {
1157 ; CHECK-LABEL: testDUP.v1i32:
1158 ; CHECK: dup v0.4s, v0.s[0]
1159 %b = extractelement <1 x i32> %a, i32 0
1160 %c = insertelement <4 x i32> undef, i32 %b, i32 0
1161 %d = insertelement <4 x i32> %c, i32 %b, i32 1
1162 %e = insertelement <4 x i32> %d, i32 %b, i32 2
1163 %f = insertelement <4 x i32> %e, i32 %b, i32 3
1167 define <8 x i8> @getl(<16 x i8> %x) #0 {
1168 ; CHECK-LABEL: getl:
1170 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
1172 %vecext = extractelement <16 x i8> %x, i32 0
1173 %vecinit = insertelement <8 x i8> undef, i8 %vecext, i32 0
1174 %vecext1 = extractelement <16 x i8> %x, i32 1
1175 %vecinit2 = insertelement <8 x i8> %vecinit, i8 %vecext1, i32 1
1176 %vecext3 = extractelement <16 x i8> %x, i32 2
1177 %vecinit4 = insertelement <8 x i8> %vecinit2, i8 %vecext3, i32 2
1178 %vecext5 = extractelement <16 x i8> %x, i32 3
1179 %vecinit6 = insertelement <8 x i8> %vecinit4, i8 %vecext5, i32 3
1180 %vecext7 = extractelement <16 x i8> %x, i32 4
1181 %vecinit8 = insertelement <8 x i8> %vecinit6, i8 %vecext7, i32 4
1182 %vecext9 = extractelement <16 x i8> %x, i32 5
1183 %vecinit10 = insertelement <8 x i8> %vecinit8, i8 %vecext9, i32 5
1184 %vecext11 = extractelement <16 x i8> %x, i32 6
1185 %vecinit12 = insertelement <8 x i8> %vecinit10, i8 %vecext11, i32 6
1186 %vecext13 = extractelement <16 x i8> %x, i32 7
1187 %vecinit14 = insertelement <8 x i8> %vecinit12, i8 %vecext13, i32 7
1188 ret <8 x i8> %vecinit14
1191 define <4 x i16> @test_extracts_inserts_varidx_extract(<8 x i16> %x, i32 %idx) {
1192 ; CHECK-LABEL: test_extracts_inserts_varidx_extract:
1194 ; CHECK-NEXT: .cfi_def_cfa_offset 16
1195 ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
1196 ; CHECK-NEXT: str q0, [sp, #-16]!
1197 ; CHECK-NEXT: and x8, x0, #0x7
1198 ; CHECK-NEXT: mov x9, sp
1199 ; CHECK-NEXT: bfi x9, x8, #1, #3
1200 ; CHECK-NEXT: ldr h1, [x9]
1201 ; CHECK-NEXT: mov v1.h[1], v0.h[1]
1202 ; CHECK-NEXT: mov v1.h[2], v0.h[2]
1203 ; CHECK-NEXT: mov v1.h[3], v0.h[3]
1204 ; CHECK-NEXT: mov v0.16b, v1.16b
1205 ; CHECK-NEXT: add sp, sp, #16 // =16
1207 %tmp = extractelement <8 x i16> %x, i32 %idx
1208 %tmp2 = insertelement <4 x i16> undef, i16 %tmp, i32 0
1209 %tmp3 = extractelement <8 x i16> %x, i32 1
1210 %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 1
1211 %tmp5 = extractelement <8 x i16> %x, i32 2
1212 %tmp6 = insertelement <4 x i16> %tmp4, i16 %tmp5, i32 2
1213 %tmp7 = extractelement <8 x i16> %x, i32 3
1214 %tmp8 = insertelement <4 x i16> %tmp6, i16 %tmp7, i32 3
1218 define <4 x i16> @test_extracts_inserts_varidx_insert(<8 x i16> %x, i32 %idx) {
1219 ; CHECK-LABEL: test_extracts_inserts_varidx_insert:
1221 ; CHECK-NEXT: sub sp, sp, #16 // =16
1222 ; CHECK-NEXT: .cfi_def_cfa_offset 16
1223 ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
1224 ; CHECK-NEXT: and x8, x0, #0x3
1225 ; CHECK-NEXT: add x9, sp, #8 // =8
1226 ; CHECK-NEXT: bfi x9, x8, #1, #2
1227 ; CHECK-NEXT: str h0, [x9]
1228 ; CHECK-NEXT: ldr d1, [sp, #8]
1229 ; CHECK-NEXT: mov v1.h[1], v0.h[1]
1230 ; CHECK-NEXT: mov v1.h[2], v0.h[2]
1231 ; CHECK-NEXT: mov v1.h[3], v0.h[3]
1232 ; CHECK-NEXT: mov v0.16b, v1.16b
1233 ; CHECK-NEXT: add sp, sp, #16 // =16
1235 %tmp = extractelement <8 x i16> %x, i32 0
1236 %tmp2 = insertelement <4 x i16> undef, i16 %tmp, i32 %idx
1237 %tmp3 = extractelement <8 x i16> %x, i32 1
1238 %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 1
1239 %tmp5 = extractelement <8 x i16> %x, i32 2
1240 %tmp6 = insertelement <4 x i16> %tmp4, i16 %tmp5, i32 2
1241 %tmp7 = extractelement <8 x i16> %x, i32 3
1242 %tmp8 = insertelement <4 x i16> %tmp6, i16 %tmp7, i32 3
1246 define <4 x i16> @test_dup_v2i32_v4i16(<2 x i32> %a) {
1247 ; CHECK-LABEL: test_dup_v2i32_v4i16:
1248 ; CHECK: // %bb.0: // %entry
1249 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
1250 ; CHECK-NEXT: dup v0.4h, v0.h[2]
1253 %x = extractelement <2 x i32> %a, i32 1
1254 %vget_lane = trunc i32 %x to i16
1255 %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0
1256 %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1
1257 %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2
1258 %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3
1259 ret <4 x i16> %vecinit3.i
1262 define <8 x i16> @test_dup_v4i32_v8i16(<4 x i32> %a) {
1263 ; CHECK-LABEL: test_dup_v4i32_v8i16:
1264 ; CHECK: // %bb.0: // %entry
1265 ; CHECK-NEXT: dup v0.8h, v0.h[6]
1268 %x = extractelement <4 x i32> %a, i32 3
1269 %vget_lane = trunc i32 %x to i16
1270 %vecinit.i = insertelement <8 x i16> undef, i16 %vget_lane, i32 0
1271 %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %vget_lane, i32 1
1272 %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %vget_lane, i32 2
1273 %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %vget_lane, i32 3
1274 %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %vget_lane, i32 4
1275 %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %vget_lane, i32 5
1276 %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %vget_lane, i32 6
1277 %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %vget_lane, i32 7
1278 ret <8 x i16> %vecinit7.i
1281 define <4 x i16> @test_dup_v1i64_v4i16(<1 x i64> %a) {
1282 ; CHECK-LABEL: test_dup_v1i64_v4i16:
1283 ; CHECK: // %bb.0: // %entry
1284 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
1285 ; CHECK-NEXT: dup v0.4h, v0.h[0]
1288 %x = extractelement <1 x i64> %a, i32 0
1289 %vget_lane = trunc i64 %x to i16
1290 %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0
1291 %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1
1292 %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2
1293 %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3
1294 ret <4 x i16> %vecinit3.i
1297 define <2 x i32> @test_dup_v1i64_v2i32(<1 x i64> %a) {
1298 ; CHECK-LABEL: test_dup_v1i64_v2i32:
1299 ; CHECK: // %bb.0: // %entry
1300 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
1301 ; CHECK-NEXT: dup v0.2s, v0.s[0]
1304 %x = extractelement <1 x i64> %a, i32 0
1305 %vget_lane = trunc i64 %x to i32
1306 %vecinit.i = insertelement <2 x i32> undef, i32 %vget_lane, i32 0
1307 %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %vget_lane, i32 1
1308 ret <2 x i32> %vecinit1.i
1311 define <8 x i16> @test_dup_v2i64_v8i16(<2 x i64> %a) {
1312 ; CHECK-LABEL: test_dup_v2i64_v8i16:
1313 ; CHECK: // %bb.0: // %entry
1314 ; CHECK-NEXT: dup v0.8h, v0.h[4]
1317 %x = extractelement <2 x i64> %a, i32 1
1318 %vget_lane = trunc i64 %x to i16
1319 %vecinit.i = insertelement <8 x i16> undef, i16 %vget_lane, i32 0
1320 %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %vget_lane, i32 1
1321 %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %vget_lane, i32 2
1322 %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %vget_lane, i32 3
1323 %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %vget_lane, i32 4
1324 %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %vget_lane, i32 5
1325 %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %vget_lane, i32 6
1326 %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %vget_lane, i32 7
1327 ret <8 x i16> %vecinit7.i
1330 define <4 x i32> @test_dup_v2i64_v4i32(<2 x i64> %a) {
1331 ; CHECK-LABEL: test_dup_v2i64_v4i32:
1332 ; CHECK: // %bb.0: // %entry
1333 ; CHECK-NEXT: dup v0.4s, v0.s[2]
1336 %x = extractelement <2 x i64> %a, i32 1
1337 %vget_lane = trunc i64 %x to i32
1338 %vecinit.i = insertelement <4 x i32> undef, i32 %vget_lane, i32 0
1339 %vecinit1.i = insertelement <4 x i32> %vecinit.i, i32 %vget_lane, i32 1
1340 %vecinit2.i = insertelement <4 x i32> %vecinit1.i, i32 %vget_lane, i32 2
1341 %vecinit3.i = insertelement <4 x i32> %vecinit2.i, i32 %vget_lane, i32 3
1342 ret <4 x i32> %vecinit3.i
1345 define <4 x i16> @test_dup_v4i32_v4i16(<4 x i32> %a) {
1346 ; CHECK-LABEL: test_dup_v4i32_v4i16:
1347 ; CHECK: // %bb.0: // %entry
1348 ; CHECK-NEXT: dup v0.4h, v0.h[2]
1351 %x = extractelement <4 x i32> %a, i32 1
1352 %vget_lane = trunc i32 %x to i16
1353 %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0
1354 %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1
1355 %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2
1356 %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3
1357 ret <4 x i16> %vecinit3.i
1360 define <4 x i16> @test_dup_v2i64_v4i16(<2 x i64> %a) {
1361 ; CHECK-LABEL: test_dup_v2i64_v4i16:
1362 ; CHECK: // %bb.0: // %entry
1363 ; CHECK-NEXT: dup v0.4h, v0.h[0]
1366 %x = extractelement <2 x i64> %a, i32 0
1367 %vget_lane = trunc i64 %x to i16
1368 %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0
1369 %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1
1370 %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2
1371 %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3
1372 ret <4 x i16> %vecinit3.i
1375 define <2 x i32> @test_dup_v2i64_v2i32(<2 x i64> %a) {
1376 ; CHECK-LABEL: test_dup_v2i64_v2i32:
1377 ; CHECK: // %bb.0: // %entry
1378 ; CHECK-NEXT: dup v0.2s, v0.s[0]
1381 %x = extractelement <2 x i64> %a, i32 0
1382 %vget_lane = trunc i64 %x to i32
1383 %vecinit.i = insertelement <2 x i32> undef, i32 %vget_lane, i32 0
1384 %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %vget_lane, i32 1
1385 ret <2 x i32> %vecinit1.i
1389 define <2 x float> @test_scalar_to_vector_f32_to_v2f32(<2 x float> %a) {
1390 ; CHECK-LABEL: test_scalar_to_vector_f32_to_v2f32:
1391 ; CHECK: // %bb.0: // %entry
1392 ; CHECK-NEXT: fmaxp s0, v0.2s
1395 %0 = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> %a)
1396 %1 = insertelement <1 x float> undef, float %0, i32 0
1397 %2 = extractelement <1 x float> %1, i32 0
1398 %vecinit1.i = insertelement <2 x float> undef, float %2, i32 0
1399 ret <2 x float> %vecinit1.i
1402 define <4 x float> @test_scalar_to_vector_f32_to_v4f32(<2 x float> %a) {
1403 ; CHECK-LABEL: test_scalar_to_vector_f32_to_v4f32:
1404 ; CHECK: // %bb.0: // %entry
1405 ; CHECK-NEXT: fmaxp s0, v0.2s
1408 %0 = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> %a)
1409 %1 = insertelement <1 x float> undef, float %0, i32 0
1410 %2 = extractelement <1 x float> %1, i32 0
1411 %vecinit1.i = insertelement <4 x float> undef, float %2, i32 0
1412 ret <4 x float> %vecinit1.i
1415 declare float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float>)
1417 define <2 x i32> @test_concat_undef_v1i32(<2 x i32> %a) {
1418 ; CHECK-LABEL: test_concat_undef_v1i32:
1419 ; CHECK: // %bb.0: // %entry
1420 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
1421 ; CHECK-NEXT: dup v0.2s, v0.s[0]
1424 %0 = extractelement <2 x i32> %a, i32 0
1425 %vecinit1.i = insertelement <2 x i32> undef, i32 %0, i32 1
1426 ret <2 x i32> %vecinit1.i
1429 declare i32 @llvm.aarch64.neon.sqabs.i32(i32) #4
1431 define <2 x i32> @test_concat_v1i32_undef(i32 %a) {
1432 ; CHECK-LABEL: test_concat_v1i32_undef:
1433 ; CHECK: // %bb.0: // %entry
1434 ; CHECK-NEXT: fmov s0, w0
1435 ; CHECK-NEXT: sqabs s0, s0
1438 %b = tail call i32 @llvm.aarch64.neon.sqabs.i32(i32 %a)
1439 %vecinit.i432 = insertelement <2 x i32> undef, i32 %b, i32 0
1440 ret <2 x i32> %vecinit.i432
1443 define <2 x i32> @test_concat_same_v1i32_v1i32(<2 x i32> %a) {
1444 ; CHECK-LABEL: test_concat_same_v1i32_v1i32:
1445 ; CHECK: // %bb.0: // %entry
1446 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
1447 ; CHECK-NEXT: dup v0.2s, v0.s[0]
1450 %0 = extractelement <2 x i32> %a, i32 0
1451 %vecinit.i = insertelement <2 x i32> undef, i32 %0, i32 0
1452 %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %0, i32 1
1453 ret <2 x i32> %vecinit1.i
1456 define <2 x i32> @test_concat_diff_v1i32_v1i32(i32 %a, i32 %b) {
1457 ; CHECK-LABEL: test_concat_diff_v1i32_v1i32:
1458 ; CHECK: // %bb.0: // %entry
1459 ; CHECK-NEXT: fmov s1, w1
1460 ; CHECK-NEXT: fmov s0, w0
1461 ; CHECK-NEXT: sqabs s1, s1
1462 ; CHECK-NEXT: sqabs s0, s0
1463 ; CHECK-NEXT: fmov w8, s1
1464 ; CHECK-NEXT: mov v0.s[1], w8
1465 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
1468 %c = tail call i32 @llvm.aarch64.neon.sqabs.i32(i32 %a)
1469 %d = insertelement <2 x i32> undef, i32 %c, i32 0
1470 %e = tail call i32 @llvm.aarch64.neon.sqabs.i32(i32 %b)
1471 %f = insertelement <2 x i32> undef, i32 %e, i32 0
1472 %h = shufflevector <2 x i32> %d, <2 x i32> %f, <2 x i32> <i32 0, i32 2>
1476 define <16 x i8> @test_concat_v16i8_v16i8_v16i8(<16 x i8> %x, <16 x i8> %y) #0 {
1477 ; CHECK-LABEL: test_concat_v16i8_v16i8_v16i8:
1478 ; CHECK: // %bb.0: // %entry
1479 ; CHECK-NEXT: mov v0.d[1], v1.d[0]
1482 %vecinit30 = shufflevector <16 x i8> %x, <16 x i8> %y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
1483 ret <16 x i8> %vecinit30
1486 define <16 x i8> @test_concat_v16i8_v8i8_v16i8(<8 x i8> %x, <16 x i8> %y) #0 {
1487 ; CHECK-LABEL: test_concat_v16i8_v8i8_v16i8:
1488 ; CHECK: // %bb.0: // %entry
1489 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
1490 ; CHECK-NEXT: mov v0.d[1], v1.d[0]
1493 %vecext = extractelement <8 x i8> %x, i32 0
1494 %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0
1495 %vecext1 = extractelement <8 x i8> %x, i32 1
1496 %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1
1497 %vecext3 = extractelement <8 x i8> %x, i32 2
1498 %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2
1499 %vecext5 = extractelement <8 x i8> %x, i32 3
1500 %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3
1501 %vecext7 = extractelement <8 x i8> %x, i32 4
1502 %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4
1503 %vecext9 = extractelement <8 x i8> %x, i32 5
1504 %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5
1505 %vecext11 = extractelement <8 x i8> %x, i32 6
1506 %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6
1507 %vecext13 = extractelement <8 x i8> %x, i32 7
1508 %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7
1509 %vecinit30 = shufflevector <16 x i8> %vecinit14, <16 x i8> %y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
1510 ret <16 x i8> %vecinit30
1513 define <16 x i8> @test_concat_v16i8_v16i8_v8i8(<16 x i8> %x, <8 x i8> %y) #0 {
1514 ; CHECK-LABEL: test_concat_v16i8_v16i8_v8i8:
1515 ; CHECK: // %bb.0: // %entry
1516 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
1517 ; CHECK-NEXT: mov v0.d[1], v1.d[0]
1520 %vecext = extractelement <16 x i8> %x, i32 0
1521 %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0
1522 %vecext1 = extractelement <16 x i8> %x, i32 1
1523 %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1
1524 %vecext3 = extractelement <16 x i8> %x, i32 2
1525 %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2
1526 %vecext5 = extractelement <16 x i8> %x, i32 3
1527 %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3
1528 %vecext7 = extractelement <16 x i8> %x, i32 4
1529 %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4
1530 %vecext9 = extractelement <16 x i8> %x, i32 5
1531 %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5
1532 %vecext11 = extractelement <16 x i8> %x, i32 6
1533 %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6
1534 %vecext13 = extractelement <16 x i8> %x, i32 7
1535 %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7
1536 %vecext15 = extractelement <8 x i8> %y, i32 0
1537 %vecinit16 = insertelement <16 x i8> %vecinit14, i8 %vecext15, i32 8
1538 %vecext17 = extractelement <8 x i8> %y, i32 1
1539 %vecinit18 = insertelement <16 x i8> %vecinit16, i8 %vecext17, i32 9
1540 %vecext19 = extractelement <8 x i8> %y, i32 2
1541 %vecinit20 = insertelement <16 x i8> %vecinit18, i8 %vecext19, i32 10
1542 %vecext21 = extractelement <8 x i8> %y, i32 3
1543 %vecinit22 = insertelement <16 x i8> %vecinit20, i8 %vecext21, i32 11
1544 %vecext23 = extractelement <8 x i8> %y, i32 4
1545 %vecinit24 = insertelement <16 x i8> %vecinit22, i8 %vecext23, i32 12
1546 %vecext25 = extractelement <8 x i8> %y, i32 5
1547 %vecinit26 = insertelement <16 x i8> %vecinit24, i8 %vecext25, i32 13
1548 %vecext27 = extractelement <8 x i8> %y, i32 6
1549 %vecinit28 = insertelement <16 x i8> %vecinit26, i8 %vecext27, i32 14
1550 %vecext29 = extractelement <8 x i8> %y, i32 7
1551 %vecinit30 = insertelement <16 x i8> %vecinit28, i8 %vecext29, i32 15
1552 ret <16 x i8> %vecinit30
1555 define <16 x i8> @test_concat_v16i8_v8i8_v8i8(<8 x i8> %x, <8 x i8> %y) #0 {
1556 ; CHECK-LABEL: test_concat_v16i8_v8i8_v8i8:
1557 ; CHECK: // %bb.0: // %entry
1558 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
1559 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
1560 ; CHECK-NEXT: mov v0.d[1], v1.d[0]
1563 %vecext = extractelement <8 x i8> %x, i32 0
1564 %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0
1565 %vecext1 = extractelement <8 x i8> %x, i32 1
1566 %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1
1567 %vecext3 = extractelement <8 x i8> %x, i32 2
1568 %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2
1569 %vecext5 = extractelement <8 x i8> %x, i32 3
1570 %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3
1571 %vecext7 = extractelement <8 x i8> %x, i32 4
1572 %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4
1573 %vecext9 = extractelement <8 x i8> %x, i32 5
1574 %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5
1575 %vecext11 = extractelement <8 x i8> %x, i32 6
1576 %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6
1577 %vecext13 = extractelement <8 x i8> %x, i32 7
1578 %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7
1579 %vecext15 = extractelement <8 x i8> %y, i32 0
1580 %vecinit16 = insertelement <16 x i8> %vecinit14, i8 %vecext15, i32 8
1581 %vecext17 = extractelement <8 x i8> %y, i32 1
1582 %vecinit18 = insertelement <16 x i8> %vecinit16, i8 %vecext17, i32 9
1583 %vecext19 = extractelement <8 x i8> %y, i32 2
1584 %vecinit20 = insertelement <16 x i8> %vecinit18, i8 %vecext19, i32 10
1585 %vecext21 = extractelement <8 x i8> %y, i32 3
1586 %vecinit22 = insertelement <16 x i8> %vecinit20, i8 %vecext21, i32 11
1587 %vecext23 = extractelement <8 x i8> %y, i32 4
1588 %vecinit24 = insertelement <16 x i8> %vecinit22, i8 %vecext23, i32 12
1589 %vecext25 = extractelement <8 x i8> %y, i32 5
1590 %vecinit26 = insertelement <16 x i8> %vecinit24, i8 %vecext25, i32 13
1591 %vecext27 = extractelement <8 x i8> %y, i32 6
1592 %vecinit28 = insertelement <16 x i8> %vecinit26, i8 %vecext27, i32 14
1593 %vecext29 = extractelement <8 x i8> %y, i32 7
1594 %vecinit30 = insertelement <16 x i8> %vecinit28, i8 %vecext29, i32 15
1595 ret <16 x i8> %vecinit30
1598 define <8 x i16> @test_concat_v8i16_v8i16_v8i16(<8 x i16> %x, <8 x i16> %y) #0 {
1599 ; CHECK-LABEL: test_concat_v8i16_v8i16_v8i16:
1600 ; CHECK: // %bb.0: // %entry
1601 ; CHECK-NEXT: mov v0.d[1], v1.d[0]
1604 %vecinit14 = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
1605 ret <8 x i16> %vecinit14
1608 define <8 x i16> @test_concat_v8i16_v4i16_v8i16(<4 x i16> %x, <8 x i16> %y) #0 {
1609 ; CHECK-LABEL: test_concat_v8i16_v4i16_v8i16:
1610 ; CHECK: // %bb.0: // %entry
1611 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
1612 ; CHECK-NEXT: mov v0.d[1], v1.d[0]
1615 %vecext = extractelement <4 x i16> %x, i32 0
1616 %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0
1617 %vecext1 = extractelement <4 x i16> %x, i32 1
1618 %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1
1619 %vecext3 = extractelement <4 x i16> %x, i32 2
1620 %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2
1621 %vecext5 = extractelement <4 x i16> %x, i32 3
1622 %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3
1623 %vecinit14 = shufflevector <8 x i16> %vecinit6, <8 x i16> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
1624 ret <8 x i16> %vecinit14
1627 define <8 x i16> @test_concat_v8i16_v8i16_v4i16(<8 x i16> %x, <4 x i16> %y) #0 {
1628 ; CHECK-LABEL: test_concat_v8i16_v8i16_v4i16:
1629 ; CHECK: // %bb.0: // %entry
1630 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
1631 ; CHECK-NEXT: mov v0.d[1], v1.d[0]
1634 %vecext = extractelement <8 x i16> %x, i32 0
1635 %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0
1636 %vecext1 = extractelement <8 x i16> %x, i32 1
1637 %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1
1638 %vecext3 = extractelement <8 x i16> %x, i32 2
1639 %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2
1640 %vecext5 = extractelement <8 x i16> %x, i32 3
1641 %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3
1642 %vecext7 = extractelement <4 x i16> %y, i32 0
1643 %vecinit8 = insertelement <8 x i16> %vecinit6, i16 %vecext7, i32 4
1644 %vecext9 = extractelement <4 x i16> %y, i32 1
1645 %vecinit10 = insertelement <8 x i16> %vecinit8, i16 %vecext9, i32 5
1646 %vecext11 = extractelement <4 x i16> %y, i32 2
1647 %vecinit12 = insertelement <8 x i16> %vecinit10, i16 %vecext11, i32 6
1648 %vecext13 = extractelement <4 x i16> %y, i32 3
1649 %vecinit14 = insertelement <8 x i16> %vecinit12, i16 %vecext13, i32 7
1650 ret <8 x i16> %vecinit14
1653 define <8 x i16> @test_concat_v8i16_v4i16_v4i16(<4 x i16> %x, <4 x i16> %y) #0 {
1654 ; CHECK-LABEL: test_concat_v8i16_v4i16_v4i16:
1655 ; CHECK: // %bb.0: // %entry
1656 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
1657 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
1658 ; CHECK-NEXT: mov v0.d[1], v1.d[0]
1661 %vecext = extractelement <4 x i16> %x, i32 0
1662 %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0
1663 %vecext1 = extractelement <4 x i16> %x, i32 1
1664 %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1
1665 %vecext3 = extractelement <4 x i16> %x, i32 2
1666 %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2
1667 %vecext5 = extractelement <4 x i16> %x, i32 3
1668 %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3
1669 %vecext7 = extractelement <4 x i16> %y, i32 0
1670 %vecinit8 = insertelement <8 x i16> %vecinit6, i16 %vecext7, i32 4
1671 %vecext9 = extractelement <4 x i16> %y, i32 1
1672 %vecinit10 = insertelement <8 x i16> %vecinit8, i16 %vecext9, i32 5
1673 %vecext11 = extractelement <4 x i16> %y, i32 2
1674 %vecinit12 = insertelement <8 x i16> %vecinit10, i16 %vecext11, i32 6
1675 %vecext13 = extractelement <4 x i16> %y, i32 3
1676 %vecinit14 = insertelement <8 x i16> %vecinit12, i16 %vecext13, i32 7
1677 ret <8 x i16> %vecinit14
1680 define <4 x i32> @test_concat_v4i32_v4i32_v4i32(<4 x i32> %x, <4 x i32> %y) #0 {
1681 ; CHECK-LABEL: test_concat_v4i32_v4i32_v4i32:
1682 ; CHECK: // %bb.0: // %entry
1683 ; CHECK-NEXT: mov v0.d[1], v1.d[0]
1686 %vecinit6 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
1687 ret <4 x i32> %vecinit6
1690 define <4 x i32> @test_concat_v4i32_v2i32_v4i32(<2 x i32> %x, <4 x i32> %y) #0 {
1691 ; CHECK-LABEL: test_concat_v4i32_v2i32_v4i32:
1692 ; CHECK: // %bb.0: // %entry
1693 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
1694 ; CHECK-NEXT: mov v0.d[1], v1.d[0]
1697 %vecext = extractelement <2 x i32> %x, i32 0
1698 %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
1699 %vecext1 = extractelement <2 x i32> %x, i32 1
1700 %vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1
1701 %vecinit6 = shufflevector <4 x i32> %vecinit2, <4 x i32> %y, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
1702 ret <4 x i32> %vecinit6
1705 define <4 x i32> @test_concat_v4i32_v4i32_v2i32(<4 x i32> %x, <2 x i32> %y) #0 {
1706 ; CHECK-LABEL: test_concat_v4i32_v4i32_v2i32:
1707 ; CHECK: // %bb.0: // %entry
1708 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
1709 ; CHECK-NEXT: mov v0.d[1], v1.d[0]
1712 %vecext = extractelement <4 x i32> %x, i32 0
1713 %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
1714 %vecext1 = extractelement <4 x i32> %x, i32 1
1715 %vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1
1716 %vecext3 = extractelement <2 x i32> %y, i32 0
1717 %vecinit4 = insertelement <4 x i32> %vecinit2, i32 %vecext3, i32 2
1718 %vecext5 = extractelement <2 x i32> %y, i32 1
1719 %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %vecext5, i32 3
1720 ret <4 x i32> %vecinit6
1723 define <4 x i32> @test_concat_v4i32_v2i32_v2i32(<2 x i32> %x, <2 x i32> %y) #0 {
1724 ; CHECK-LABEL: test_concat_v4i32_v2i32_v2i32:
1725 ; CHECK: // %bb.0: // %entry
1726 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
1727 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
1728 ; CHECK-NEXT: mov v0.d[1], v1.d[0]
1731 %vecinit6 = shufflevector <2 x i32> %x, <2 x i32> %y, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1732 ret <4 x i32> %vecinit6
1735 define <2 x i64> @test_concat_v2i64_v2i64_v2i64(<2 x i64> %x, <2 x i64> %y) #0 {
1736 ; CHECK-LABEL: test_concat_v2i64_v2i64_v2i64:
1737 ; CHECK: // %bb.0: // %entry
1738 ; CHECK-NEXT: zip1 v0.2d, v0.2d, v1.2d
1741 %vecinit2 = shufflevector <2 x i64> %x, <2 x i64> %y, <2 x i32> <i32 0, i32 2>
1742 ret <2 x i64> %vecinit2
1745 define <2 x i64> @test_concat_v2i64_v1i64_v2i64(<1 x i64> %x, <2 x i64> %y) #0 {
1746 ; CHECK-LABEL: test_concat_v2i64_v1i64_v2i64:
1747 ; CHECK: // %bb.0: // %entry
1748 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
1749 ; CHECK-NEXT: zip1 v0.2d, v0.2d, v1.2d
1752 %vecext = extractelement <1 x i64> %x, i32 0
1753 %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0
1754 %vecinit2 = shufflevector <2 x i64> %vecinit, <2 x i64> %y, <2 x i32> <i32 0, i32 2>
1755 ret <2 x i64> %vecinit2
1758 define <2 x i64> @test_concat_v2i64_v2i64_v1i64(<2 x i64> %x, <1 x i64> %y) #0 {
1759 ; CHECK-LABEL: test_concat_v2i64_v2i64_v1i64:
1760 ; CHECK: // %bb.0: // %entry
1761 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
1762 ; CHECK-NEXT: zip1 v0.2d, v0.2d, v1.2d
1765 %vecext = extractelement <2 x i64> %x, i32 0
1766 %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0
1767 %vecext1 = extractelement <1 x i64> %y, i32 0
1768 %vecinit2 = insertelement <2 x i64> %vecinit, i64 %vecext1, i32 1
1769 ret <2 x i64> %vecinit2
1772 define <2 x i64> @test_concat_v2i64_v1i64_v1i64(<1 x i64> %x, <1 x i64> %y) #0 {
1773 ; CHECK-LABEL: test_concat_v2i64_v1i64_v1i64:
1774 ; CHECK: // %bb.0: // %entry
1775 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
1776 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
1777 ; CHECK-NEXT: mov v0.d[1], v1.d[0]
1780 %vecext = extractelement <1 x i64> %x, i32 0
1781 %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0
1782 %vecext1 = extractelement <1 x i64> %y, i32 0
1783 %vecinit2 = insertelement <2 x i64> %vecinit, i64 %vecext1, i32 1
1784 ret <2 x i64> %vecinit2
1788 define <4 x i16> @concat_vector_v4i16_const() {
1789 ; CHECK-LABEL: concat_vector_v4i16_const:
1791 ; CHECK-NEXT: movi v0.2d, #0000000000000000
1793 %r = shufflevector <1 x i16> zeroinitializer, <1 x i16> undef, <4 x i32> zeroinitializer
1797 define <4 x i16> @concat_vector_v4i16_const_one() {
1798 ; CHECK-LABEL: concat_vector_v4i16_const_one:
1800 ; CHECK-NEXT: movi v0.4h, #1
1802 %r = shufflevector <1 x i16> <i16 1>, <1 x i16> undef, <4 x i32> zeroinitializer
1806 define <4 x i32> @concat_vector_v4i32_const() {
1807 ; CHECK-LABEL: concat_vector_v4i32_const:
1809 ; CHECK-NEXT: movi v0.2d, #0000000000000000
1811 %r = shufflevector <1 x i32> zeroinitializer, <1 x i32> undef, <4 x i32> zeroinitializer
1815 define <8 x i8> @concat_vector_v8i8_const() {
1816 ; CHECK-LABEL: concat_vector_v8i8_const:
1818 ; CHECK-NEXT: movi v0.2d, #0000000000000000
1820 %r = shufflevector <1 x i8> zeroinitializer, <1 x i8> undef, <8 x i32> zeroinitializer
1824 define <8 x i16> @concat_vector_v8i16_const() {
1825 ; CHECK-LABEL: concat_vector_v8i16_const:
1827 ; CHECK-NEXT: movi v0.2d, #0000000000000000
1829 %r = shufflevector <1 x i16> zeroinitializer, <1 x i16> undef, <8 x i32> zeroinitializer
1833 define <8 x i16> @concat_vector_v8i16_const_one() {
1834 ; CHECK-LABEL: concat_vector_v8i16_const_one:
1836 ; CHECK-NEXT: movi v0.8h, #1
1838 %r = shufflevector <1 x i16> <i16 1>, <1 x i16> undef, <8 x i32> zeroinitializer
1842 define <16 x i8> @concat_vector_v16i8_const() {
1843 ; CHECK-LABEL: concat_vector_v16i8_const:
1845 ; CHECK-NEXT: movi v0.2d, #0000000000000000
1847 %r = shufflevector <1 x i8> zeroinitializer, <1 x i8> undef, <16 x i32> zeroinitializer
1851 define <4 x i16> @concat_vector_v4i16(<1 x i16> %a) {
1852 ; CHECK-LABEL: concat_vector_v4i16:
1854 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
1855 ; CHECK-NEXT: dup v0.4h, v0.h[0]
1857 %r = shufflevector <1 x i16> %a, <1 x i16> undef, <4 x i32> zeroinitializer
1861 define <4 x i32> @concat_vector_v4i32(<1 x i32> %a) {
1862 ; CHECK-LABEL: concat_vector_v4i32:
1864 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
1865 ; CHECK-NEXT: dup v0.4s, v0.s[0]
1867 %r = shufflevector <1 x i32> %a, <1 x i32> undef, <4 x i32> zeroinitializer
1871 define <8 x i8> @concat_vector_v8i8(<1 x i8> %a) {
1872 ; CHECK-LABEL: concat_vector_v8i8:
1874 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
1875 ; CHECK-NEXT: dup v0.8b, v0.b[0]
1877 %r = shufflevector <1 x i8> %a, <1 x i8> undef, <8 x i32> zeroinitializer
1881 define <8 x i16> @concat_vector_v8i16(<1 x i16> %a) {
1882 ; CHECK-LABEL: concat_vector_v8i16:
1884 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
1885 ; CHECK-NEXT: dup v0.8h, v0.h[0]
1887 %r = shufflevector <1 x i16> %a, <1 x i16> undef, <8 x i32> zeroinitializer
1891 define <16 x i8> @concat_vector_v16i8(<1 x i8> %a) {
1892 ; CHECK-LABEL: concat_vector_v16i8:
1894 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
1895 ; CHECK-NEXT: dup v0.16b, v0.b[0]
1897 %r = shufflevector <1 x i8> %a, <1 x i8> undef, <16 x i32> zeroinitializer