1 ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
4 define <16 x i8> @ins16bw(<16 x i8> %tmp1, i8 %tmp2) {
5 ; CHECK-LABEL: ins16bw:
6 ; CHECK: ins {{v[0-9]+}}.b[15], {{w[0-9]+}}
7 %tmp3 = insertelement <16 x i8> %tmp1, i8 %tmp2, i32 15
11 define <8 x i16> @ins8hw(<8 x i16> %tmp1, i16 %tmp2) {
12 ; CHECK-LABEL: ins8hw:
13 ; CHECK: ins {{v[0-9]+}}.h[6], {{w[0-9]+}}
14 %tmp3 = insertelement <8 x i16> %tmp1, i16 %tmp2, i32 6
18 define <4 x i32> @ins4sw(<4 x i32> %tmp1, i32 %tmp2) {
19 ; CHECK-LABEL: ins4sw:
20 ; CHECK: ins {{v[0-9]+}}.s[2], {{w[0-9]+}}
21 %tmp3 = insertelement <4 x i32> %tmp1, i32 %tmp2, i32 2
25 define <2 x i64> @ins2dw(<2 x i64> %tmp1, i64 %tmp2) {
26 ; CHECK-LABEL: ins2dw:
27 ; CHECK: ins {{v[0-9]+}}.d[1], {{x[0-9]+}}
28 %tmp3 = insertelement <2 x i64> %tmp1, i64 %tmp2, i32 1
32 define <8 x i8> @ins8bw(<8 x i8> %tmp1, i8 %tmp2) {
33 ; CHECK-LABEL: ins8bw:
34 ; CHECK: ins {{v[0-9]+}}.b[5], {{w[0-9]+}}
35 %tmp3 = insertelement <8 x i8> %tmp1, i8 %tmp2, i32 5
39 define <4 x i16> @ins4hw(<4 x i16> %tmp1, i16 %tmp2) {
40 ; CHECK-LABEL: ins4hw:
41 ; CHECK: ins {{v[0-9]+}}.h[3], {{w[0-9]+}}
42 %tmp3 = insertelement <4 x i16> %tmp1, i16 %tmp2, i32 3
46 define <2 x i32> @ins2sw(<2 x i32> %tmp1, i32 %tmp2) {
47 ; CHECK-LABEL: ins2sw:
48 ; CHECK: ins {{v[0-9]+}}.s[1], {{w[0-9]+}}
49 %tmp3 = insertelement <2 x i32> %tmp1, i32 %tmp2, i32 1
53 define <16 x i8> @ins16b16(<16 x i8> %tmp1, <16 x i8> %tmp2) {
54 ; CHECK-LABEL: ins16b16:
55 ; CHECK: ins {{v[0-9]+}}.b[15], {{v[0-9]+}}.b[2]
56 %tmp3 = extractelement <16 x i8> %tmp1, i32 2
57 %tmp4 = insertelement <16 x i8> %tmp2, i8 %tmp3, i32 15
61 define <8 x i16> @ins8h8(<8 x i16> %tmp1, <8 x i16> %tmp2) {
62 ; CHECK-LABEL: ins8h8:
63 ; CHECK: ins {{v[0-9]+}}.h[7], {{v[0-9]+}}.h[2]
64 %tmp3 = extractelement <8 x i16> %tmp1, i32 2
65 %tmp4 = insertelement <8 x i16> %tmp2, i16 %tmp3, i32 7
69 define <4 x i32> @ins4s4(<4 x i32> %tmp1, <4 x i32> %tmp2) {
70 ; CHECK-LABEL: ins4s4:
71 ; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2]
72 %tmp3 = extractelement <4 x i32> %tmp1, i32 2
73 %tmp4 = insertelement <4 x i32> %tmp2, i32 %tmp3, i32 1
77 define <2 x i64> @ins2d2(<2 x i64> %tmp1, <2 x i64> %tmp2) {
78 ; CHECK-LABEL: ins2d2:
79 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
80 %tmp3 = extractelement <2 x i64> %tmp1, i32 0
81 %tmp4 = insertelement <2 x i64> %tmp2, i64 %tmp3, i32 1
85 define <4 x float> @ins4f4(<4 x float> %tmp1, <4 x float> %tmp2) {
86 ; CHECK-LABEL: ins4f4:
87 ; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2]
88 %tmp3 = extractelement <4 x float> %tmp1, i32 2
89 %tmp4 = insertelement <4 x float> %tmp2, float %tmp3, i32 1
93 define <2 x double> @ins2df2(<2 x double> %tmp1, <2 x double> %tmp2) {
94 ; CHECK-LABEL: ins2df2:
95 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
96 %tmp3 = extractelement <2 x double> %tmp1, i32 0
97 %tmp4 = insertelement <2 x double> %tmp2, double %tmp3, i32 1
98 ret <2 x double> %tmp4
101 define <16 x i8> @ins8b16(<8 x i8> %tmp1, <16 x i8> %tmp2) {
102 ; CHECK-LABEL: ins8b16:
103 ; CHECK: ins {{v[0-9]+}}.b[15], {{v[0-9]+}}.b[2]
104 %tmp3 = extractelement <8 x i8> %tmp1, i32 2
105 %tmp4 = insertelement <16 x i8> %tmp2, i8 %tmp3, i32 15
109 define <8 x i16> @ins4h8(<4 x i16> %tmp1, <8 x i16> %tmp2) {
110 ; CHECK-LABEL: ins4h8:
111 ; CHECK: ins {{v[0-9]+}}.h[7], {{v[0-9]+}}.h[2]
112 %tmp3 = extractelement <4 x i16> %tmp1, i32 2
113 %tmp4 = insertelement <8 x i16> %tmp2, i16 %tmp3, i32 7
117 define <4 x i32> @ins2s4(<2 x i32> %tmp1, <4 x i32> %tmp2) {
118 ; CHECK-LABEL: ins2s4:
119 ; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[1]
120 %tmp3 = extractelement <2 x i32> %tmp1, i32 1
121 %tmp4 = insertelement <4 x i32> %tmp2, i32 %tmp3, i32 1
125 define <2 x i64> @ins1d2(<1 x i64> %tmp1, <2 x i64> %tmp2) {
126 ; CHECK-LABEL: ins1d2:
127 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
128 %tmp3 = extractelement <1 x i64> %tmp1, i32 0
129 %tmp4 = insertelement <2 x i64> %tmp2, i64 %tmp3, i32 1
133 define <4 x float> @ins2f4(<2 x float> %tmp1, <4 x float> %tmp2) {
134 ; CHECK-LABEL: ins2f4:
135 ; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[1]
136 %tmp3 = extractelement <2 x float> %tmp1, i32 1
137 %tmp4 = insertelement <4 x float> %tmp2, float %tmp3, i32 1
138 ret <4 x float> %tmp4
141 define <2 x double> @ins1f2(<1 x double> %tmp1, <2 x double> %tmp2) {
142 ; CHECK-LABEL: ins1f2:
143 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
144 %tmp3 = extractelement <1 x double> %tmp1, i32 0
145 %tmp4 = insertelement <2 x double> %tmp2, double %tmp3, i32 1
146 ret <2 x double> %tmp4
149 define <8 x i8> @ins16b8(<16 x i8> %tmp1, <8 x i8> %tmp2) {
150 ; CHECK-LABEL: ins16b8:
151 ; CHECK: ins {{v[0-9]+}}.b[7], {{v[0-9]+}}.b[2]
152 %tmp3 = extractelement <16 x i8> %tmp1, i32 2
153 %tmp4 = insertelement <8 x i8> %tmp2, i8 %tmp3, i32 7
157 define <4 x i16> @ins8h4(<8 x i16> %tmp1, <4 x i16> %tmp2) {
158 ; CHECK-LABEL: ins8h4:
159 ; CHECK: ins {{v[0-9]+}}.h[3], {{v[0-9]+}}.h[2]
160 %tmp3 = extractelement <8 x i16> %tmp1, i32 2
161 %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 3
165 define <2 x i32> @ins4s2(<4 x i32> %tmp1, <2 x i32> %tmp2) {
166 ; CHECK-LABEL: ins4s2:
167 ; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2]
168 %tmp3 = extractelement <4 x i32> %tmp1, i32 2
169 %tmp4 = insertelement <2 x i32> %tmp2, i32 %tmp3, i32 1
173 define <1 x i64> @ins2d1(<2 x i64> %tmp1, <1 x i64> %tmp2) {
174 ; CHECK-LABEL: ins2d1:
175 ; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[0]
176 %tmp3 = extractelement <2 x i64> %tmp1, i32 0
177 %tmp4 = insertelement <1 x i64> %tmp2, i64 %tmp3, i32 0
181 define <2 x float> @ins4f2(<4 x float> %tmp1, <2 x float> %tmp2) {
182 ; CHECK-LABEL: ins4f2:
183 ; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2]
184 %tmp3 = extractelement <4 x float> %tmp1, i32 2
185 %tmp4 = insertelement <2 x float> %tmp2, float %tmp3, i32 1
186 ret <2 x float> %tmp4
189 define <1 x double> @ins2f1(<2 x double> %tmp1, <1 x double> %tmp2) {
190 ; CHECK-LABEL: ins2f1:
191 ; CHECK: dup {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1]
192 %tmp3 = extractelement <2 x double> %tmp1, i32 1
193 %tmp4 = insertelement <1 x double> %tmp2, double %tmp3, i32 0
194 ret <1 x double> %tmp4
197 define <8 x i8> @ins8b8(<8 x i8> %tmp1, <8 x i8> %tmp2) {
198 ; CHECK-LABEL: ins8b8:
199 ; CHECK: ins {{v[0-9]+}}.b[4], {{v[0-9]+}}.b[2]
200 %tmp3 = extractelement <8 x i8> %tmp1, i32 2
201 %tmp4 = insertelement <8 x i8> %tmp2, i8 %tmp3, i32 4
205 define <4 x i16> @ins4h4(<4 x i16> %tmp1, <4 x i16> %tmp2) {
206 ; CHECK-LABEL: ins4h4:
207 ; CHECK: ins {{v[0-9]+}}.h[3], {{v[0-9]+}}.h[2]
208 %tmp3 = extractelement <4 x i16> %tmp1, i32 2
209 %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 3
213 define <2 x i32> @ins2s2(<2 x i32> %tmp1, <2 x i32> %tmp2) {
214 ; CHECK-LABEL: ins2s2:
215 ; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
216 %tmp3 = extractelement <2 x i32> %tmp1, i32 0
217 %tmp4 = insertelement <2 x i32> %tmp2, i32 %tmp3, i32 1
221 define <1 x i64> @ins1d1(<1 x i64> %tmp1, <1 x i64> %tmp2) {
222 ; CHECK-LABEL: ins1d1:
223 ; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[0]
224 %tmp3 = extractelement <1 x i64> %tmp1, i32 0
225 %tmp4 = insertelement <1 x i64> %tmp2, i64 %tmp3, i32 0
229 define <2 x float> @ins2f2(<2 x float> %tmp1, <2 x float> %tmp2) {
230 ; CHECK-LABEL: ins2f2:
231 ; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
232 %tmp3 = extractelement <2 x float> %tmp1, i32 0
233 %tmp4 = insertelement <2 x float> %tmp2, float %tmp3, i32 1
234 ret <2 x float> %tmp4
237 define <1 x double> @ins1df1(<1 x double> %tmp1, <1 x double> %tmp2) {
238 ; CHECK-LABEL: ins1df1:
239 ; CHECK-NOT: ins {{v[0-9]+}}
240 %tmp3 = extractelement <1 x double> %tmp1, i32 0
241 %tmp4 = insertelement <1 x double> %tmp2, double %tmp3, i32 0
242 ret <1 x double> %tmp4
245 define i32 @umovw16b(<16 x i8> %tmp1) {
246 ; CHECK-LABEL: umovw16b:
247 ; CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.b[8]
248 %tmp3 = extractelement <16 x i8> %tmp1, i32 8
249 %tmp4 = zext i8 %tmp3 to i32
253 define i32 @umovw8h(<8 x i16> %tmp1) {
254 ; CHECK-LABEL: umovw8h:
255 ; CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.h[2]
256 %tmp3 = extractelement <8 x i16> %tmp1, i32 2
257 %tmp4 = zext i16 %tmp3 to i32
261 define i32 @umovw4s(<4 x i32> %tmp1) {
262 ; CHECK-LABEL: umovw4s:
263 ; CHECK: mov {{w[0-9]+}}, {{v[0-9]+}}.s[2]
264 %tmp3 = extractelement <4 x i32> %tmp1, i32 2
268 define i64 @umovx2d(<2 x i64> %tmp1) {
269 ; CHECK-LABEL: umovx2d:
270 ; CHECK: mov {{x[0-9]+}}, {{v[0-9]+}}.d[1]
271 %tmp3 = extractelement <2 x i64> %tmp1, i32 1
275 define i32 @umovw8b(<8 x i8> %tmp1) {
276 ; CHECK-LABEL: umovw8b:
277 ; CHECK: mov {{w[0-9]+}}, {{v[0-9]+}}.b[7]
278 %tmp3 = extractelement <8 x i8> %tmp1, i32 7
279 %tmp4 = zext i8 %tmp3 to i32
283 define i32 @umovw4h(<4 x i16> %tmp1) {
284 ; CHECK-LABEL: umovw4h:
285 ; CHECK: mov {{w[0-9]+}}, {{v[0-9]+}}.h[2]
286 %tmp3 = extractelement <4 x i16> %tmp1, i32 2
287 %tmp4 = zext i16 %tmp3 to i32
291 define i32 @umovw2s(<2 x i32> %tmp1) {
292 ; CHECK-LABEL: umovw2s:
293 ; CHECK: mov {{w[0-9]+}}, {{v[0-9]+}}.s[1]
294 %tmp3 = extractelement <2 x i32> %tmp1, i32 1
298 define i64 @umovx1d(<1 x i64> %tmp1) {
299 ; CHECK-LABEL: umovx1d:
300 ; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
301 %tmp3 = extractelement <1 x i64> %tmp1, i32 0
305 define i32 @smovw16b(<16 x i8> %tmp1) {
306 ; CHECK-LABEL: smovw16b:
307 ; CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.b[8]
308 %tmp3 = extractelement <16 x i8> %tmp1, i32 8
309 %tmp4 = sext i8 %tmp3 to i32
310 %tmp5 = add i32 %tmp4, %tmp4
314 define i32 @smovw8h(<8 x i16> %tmp1) {
315 ; CHECK-LABEL: smovw8h:
316 ; CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.h[2]
317 %tmp3 = extractelement <8 x i16> %tmp1, i32 2
318 %tmp4 = sext i16 %tmp3 to i32
319 %tmp5 = add i32 %tmp4, %tmp4
323 define i64 @smovx16b(<16 x i8> %tmp1) {
324 ; CHECK-LABEL: smovx16b:
325 ; CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.b[8]
326 %tmp3 = extractelement <16 x i8> %tmp1, i32 8
327 %tmp4 = sext i8 %tmp3 to i64
331 define i64 @smovx8h(<8 x i16> %tmp1) {
332 ; CHECK-LABEL: smovx8h:
333 ; CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.h[2]
334 %tmp3 = extractelement <8 x i16> %tmp1, i32 2
335 %tmp4 = sext i16 %tmp3 to i64
339 define i64 @smovx4s(<4 x i32> %tmp1) {
340 ; CHECK-LABEL: smovx4s:
341 ; CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.s[2]
342 %tmp3 = extractelement <4 x i32> %tmp1, i32 2
343 %tmp4 = sext i32 %tmp3 to i64
347 define i32 @smovw8b(<8 x i8> %tmp1) {
348 ; CHECK-LABEL: smovw8b:
349 ; CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.b[4]
350 %tmp3 = extractelement <8 x i8> %tmp1, i32 4
351 %tmp4 = sext i8 %tmp3 to i32
352 %tmp5 = add i32 %tmp4, %tmp4
356 define i32 @smovw4h(<4 x i16> %tmp1) {
357 ; CHECK-LABEL: smovw4h:
358 ; CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.h[2]
359 %tmp3 = extractelement <4 x i16> %tmp1, i32 2
360 %tmp4 = sext i16 %tmp3 to i32
361 %tmp5 = add i32 %tmp4, %tmp4
365 define i32 @smovx8b(<8 x i8> %tmp1) {
366 ; CHECK-LABEL: smovx8b:
367 ; CHECK: smov {{[xw][0-9]+}}, {{v[0-9]+}}.b[6]
368 %tmp3 = extractelement <8 x i8> %tmp1, i32 6
369 %tmp4 = sext i8 %tmp3 to i32
373 define i32 @smovx4h(<4 x i16> %tmp1) {
374 ; CHECK-LABEL: smovx4h:
375 ; CHECK: smov {{[xw][0-9]+}}, {{v[0-9]+}}.h[2]
376 %tmp3 = extractelement <4 x i16> %tmp1, i32 2
377 %tmp4 = sext i16 %tmp3 to i32
381 define i64 @smovx2s(<2 x i32> %tmp1) {
382 ; CHECK-LABEL: smovx2s:
383 ; CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.s[1]
384 %tmp3 = extractelement <2 x i32> %tmp1, i32 1
385 %tmp4 = sext i32 %tmp3 to i64
389 define <8 x i8> @test_vcopy_lane_s8(<8 x i8> %v1, <8 x i8> %v2) {
390 ; CHECK-LABEL: test_vcopy_lane_s8:
391 ; CHECK: ins {{v[0-9]+}}.b[5], {{v[0-9]+}}.b[3]
392 %vset_lane = shufflevector <8 x i8> %v1, <8 x i8> %v2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 11, i32 6, i32 7>
393 ret <8 x i8> %vset_lane
396 define <16 x i8> @test_vcopyq_laneq_s8(<16 x i8> %v1, <16 x i8> %v2) {
397 ; CHECK-LABEL: test_vcopyq_laneq_s8:
398 ; CHECK: ins {{v[0-9]+}}.b[14], {{v[0-9]+}}.b[6]
399 %vset_lane = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 22, i32 15>
400 ret <16 x i8> %vset_lane
403 define <8 x i8> @test_vcopy_lane_swap_s8(<8 x i8> %v1, <8 x i8> %v2) {
404 ; CHECK-LABEL: test_vcopy_lane_swap_s8:
405 ; CHECK: ins {{v[0-9]+}}.b[7], {{v[0-9]+}}.b[0]
406 %vset_lane = shufflevector <8 x i8> %v1, <8 x i8> %v2, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 0>
407 ret <8 x i8> %vset_lane
410 define <16 x i8> @test_vcopyq_laneq_swap_s8(<16 x i8> %v1, <16 x i8> %v2) {
411 ; CHECK-LABEL: test_vcopyq_laneq_swap_s8:
412 ; CHECK: ins {{v[0-9]+}}.b[0], {{v[0-9]+}}.b[15]
413 %vset_lane = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> <i32 15, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
414 ret <16 x i8> %vset_lane
417 define <8 x i8> @test_vdup_n_u8(i8 %v1) #0 {
418 ; CHECK-LABEL: test_vdup_n_u8:
419 ; CHECK: dup {{v[0-9]+}}.8b, {{w[0-9]+}}
420 %vecinit.i = insertelement <8 x i8> undef, i8 %v1, i32 0
421 %vecinit1.i = insertelement <8 x i8> %vecinit.i, i8 %v1, i32 1
422 %vecinit2.i = insertelement <8 x i8> %vecinit1.i, i8 %v1, i32 2
423 %vecinit3.i = insertelement <8 x i8> %vecinit2.i, i8 %v1, i32 3
424 %vecinit4.i = insertelement <8 x i8> %vecinit3.i, i8 %v1, i32 4
425 %vecinit5.i = insertelement <8 x i8> %vecinit4.i, i8 %v1, i32 5
426 %vecinit6.i = insertelement <8 x i8> %vecinit5.i, i8 %v1, i32 6
427 %vecinit7.i = insertelement <8 x i8> %vecinit6.i, i8 %v1, i32 7
428 ret <8 x i8> %vecinit7.i
431 define <4 x i16> @test_vdup_n_u16(i16 %v1) #0 {
432 ; CHECK-LABEL: test_vdup_n_u16:
433 ; CHECK: dup {{v[0-9]+}}.4h, {{w[0-9]+}}
434 %vecinit.i = insertelement <4 x i16> undef, i16 %v1, i32 0
435 %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %v1, i32 1
436 %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %v1, i32 2
437 %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %v1, i32 3
438 ret <4 x i16> %vecinit3.i
441 define <2 x i32> @test_vdup_n_u32(i32 %v1) #0 {
442 ; CHECK-LABEL: test_vdup_n_u32:
443 ; CHECK: dup {{v[0-9]+}}.2s, {{w[0-9]+}}
444 %vecinit.i = insertelement <2 x i32> undef, i32 %v1, i32 0
445 %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %v1, i32 1
446 ret <2 x i32> %vecinit1.i
449 define <1 x i64> @test_vdup_n_u64(i64 %v1) #0 {
450 ; CHECK-LABEL: test_vdup_n_u64:
451 ; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
452 %vecinit.i = insertelement <1 x i64> undef, i64 %v1, i32 0
453 ret <1 x i64> %vecinit.i
456 define <16 x i8> @test_vdupq_n_u8(i8 %v1) #0 {
457 ; CHECK-LABEL: test_vdupq_n_u8:
458 ; CHECK: dup {{v[0-9]+}}.16b, {{w[0-9]+}}
459 %vecinit.i = insertelement <16 x i8> undef, i8 %v1, i32 0
460 %vecinit1.i = insertelement <16 x i8> %vecinit.i, i8 %v1, i32 1
461 %vecinit2.i = insertelement <16 x i8> %vecinit1.i, i8 %v1, i32 2
462 %vecinit3.i = insertelement <16 x i8> %vecinit2.i, i8 %v1, i32 3
463 %vecinit4.i = insertelement <16 x i8> %vecinit3.i, i8 %v1, i32 4
464 %vecinit5.i = insertelement <16 x i8> %vecinit4.i, i8 %v1, i32 5
465 %vecinit6.i = insertelement <16 x i8> %vecinit5.i, i8 %v1, i32 6
466 %vecinit7.i = insertelement <16 x i8> %vecinit6.i, i8 %v1, i32 7
467 %vecinit8.i = insertelement <16 x i8> %vecinit7.i, i8 %v1, i32 8
468 %vecinit9.i = insertelement <16 x i8> %vecinit8.i, i8 %v1, i32 9
469 %vecinit10.i = insertelement <16 x i8> %vecinit9.i, i8 %v1, i32 10
470 %vecinit11.i = insertelement <16 x i8> %vecinit10.i, i8 %v1, i32 11
471 %vecinit12.i = insertelement <16 x i8> %vecinit11.i, i8 %v1, i32 12
472 %vecinit13.i = insertelement <16 x i8> %vecinit12.i, i8 %v1, i32 13
473 %vecinit14.i = insertelement <16 x i8> %vecinit13.i, i8 %v1, i32 14
474 %vecinit15.i = insertelement <16 x i8> %vecinit14.i, i8 %v1, i32 15
475 ret <16 x i8> %vecinit15.i
478 define <8 x i16> @test_vdupq_n_u16(i16 %v1) #0 {
479 ; CHECK-LABEL: test_vdupq_n_u16:
480 ; CHECK: dup {{v[0-9]+}}.8h, {{w[0-9]+}}
481 %vecinit.i = insertelement <8 x i16> undef, i16 %v1, i32 0
482 %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %v1, i32 1
483 %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %v1, i32 2
484 %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %v1, i32 3
485 %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %v1, i32 4
486 %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %v1, i32 5
487 %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %v1, i32 6
488 %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %v1, i32 7
489 ret <8 x i16> %vecinit7.i
492 define <4 x i32> @test_vdupq_n_u32(i32 %v1) #0 {
493 ; CHECK-LABEL: test_vdupq_n_u32:
494 ; CHECK: dup {{v[0-9]+}}.4s, {{w[0-9]+}}
495 %vecinit.i = insertelement <4 x i32> undef, i32 %v1, i32 0
496 %vecinit1.i = insertelement <4 x i32> %vecinit.i, i32 %v1, i32 1
497 %vecinit2.i = insertelement <4 x i32> %vecinit1.i, i32 %v1, i32 2
498 %vecinit3.i = insertelement <4 x i32> %vecinit2.i, i32 %v1, i32 3
499 ret <4 x i32> %vecinit3.i
502 define <2 x i64> @test_vdupq_n_u64(i64 %v1) #0 {
503 ; CHECK-LABEL: test_vdupq_n_u64:
504 ; CHECK: dup {{v[0-9]+}}.2d, {{x[0-9]+}}
505 %vecinit.i = insertelement <2 x i64> undef, i64 %v1, i32 0
506 %vecinit1.i = insertelement <2 x i64> %vecinit.i, i64 %v1, i32 1
507 ret <2 x i64> %vecinit1.i
510 define <8 x i8> @test_vdup_lane_s8(<8 x i8> %v1) #0 {
511 ; CHECK-LABEL: test_vdup_lane_s8:
512 ; CHECK: dup {{v[0-9]+}}.8b, {{v[0-9]+}}.b[5]
513 %shuffle = shufflevector <8 x i8> %v1, <8 x i8> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
514 ret <8 x i8> %shuffle
517 define <4 x i16> @test_vdup_lane_s16(<4 x i16> %v1) #0 {
518 ; CHECK-LABEL: test_vdup_lane_s16:
519 ; CHECK: dup {{v[0-9]+}}.4h, {{v[0-9]+}}.h[2]
520 %shuffle = shufflevector <4 x i16> %v1, <4 x i16> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
521 ret <4 x i16> %shuffle
524 define <2 x i32> @test_vdup_lane_s32(<2 x i32> %v1) #0 {
525 ; CHECK-LABEL: test_vdup_lane_s32:
526 ; CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
527 %shuffle = shufflevector <2 x i32> %v1, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
528 ret <2 x i32> %shuffle
531 define <16 x i8> @test_vdupq_lane_s8(<8 x i8> %v1) #0 {
532 ; CHECK-LABEL: test_vdupq_lane_s8:
533 ; CHECK: {{v[0-9]+}}.16b, {{v[0-9]+}}.b[5]
534 %shuffle = shufflevector <8 x i8> %v1, <8 x i8> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
535 ret <16 x i8> %shuffle
538 define <8 x i16> @test_vdupq_lane_s16(<4 x i16> %v1) #0 {
539 ; CHECK-LABEL: test_vdupq_lane_s16:
540 ; CHECK: {{v[0-9]+}}.8h, {{v[0-9]+}}.h[2]
541 %shuffle = shufflevector <4 x i16> %v1, <4 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
542 ret <8 x i16> %shuffle
545 define <4 x i32> @test_vdupq_lane_s32(<2 x i32> %v1) #0 {
546 ; CHECK-LABEL: test_vdupq_lane_s32:
547 ; CHECK: {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
548 %shuffle = shufflevector <2 x i32> %v1, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
549 ret <4 x i32> %shuffle
552 define <2 x i64> @test_vdupq_lane_s64(<1 x i64> %v1) #0 {
553 ; CHECK-LABEL: test_vdupq_lane_s64:
554 ; CHECK: {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
555 %shuffle = shufflevector <1 x i64> %v1, <1 x i64> undef, <2 x i32> zeroinitializer
556 ret <2 x i64> %shuffle
559 define <8 x i8> @test_vdup_laneq_s8(<16 x i8> %v1) #0 {
560 ; CHECK-LABEL: test_vdup_laneq_s8:
561 ; CHECK: dup {{v[0-9]+}}.8b, {{v[0-9]+}}.b[5]
562 %shuffle = shufflevector <16 x i8> %v1, <16 x i8> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
563 ret <8 x i8> %shuffle
566 define <4 x i16> @test_vdup_laneq_s16(<8 x i16> %v1) #0 {
567 ; CHECK-LABEL: test_vdup_laneq_s16:
568 ; CHECK: dup {{v[0-9]+}}.4h, {{v[0-9]+}}.h[2]
569 %shuffle = shufflevector <8 x i16> %v1, <8 x i16> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
570 ret <4 x i16> %shuffle
573 define <2 x i32> @test_vdup_laneq_s32(<4 x i32> %v1) #0 {
574 ; CHECK-LABEL: test_vdup_laneq_s32:
575 ; CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
576 %shuffle = shufflevector <4 x i32> %v1, <4 x i32> undef, <2 x i32> <i32 1, i32 1>
577 ret <2 x i32> %shuffle
580 define <16 x i8> @test_vdupq_laneq_s8(<16 x i8> %v1) #0 {
581 ; CHECK-LABEL: test_vdupq_laneq_s8:
582 ; CHECK: dup {{v[0-9]+}}.16b, {{v[0-9]+}}.b[5]
583 %shuffle = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
584 ret <16 x i8> %shuffle
587 define <8 x i16> @test_vdupq_laneq_s16(<8 x i16> %v1) #0 {
588 ; CHECK-LABEL: test_vdupq_laneq_s16:
589 ; CHECK: {{v[0-9]+}}.8h, {{v[0-9]+}}.h[2]
590 %shuffle = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
591 ret <8 x i16> %shuffle
594 define <4 x i32> @test_vdupq_laneq_s32(<4 x i32> %v1) #0 {
595 ; CHECK-LABEL: test_vdupq_laneq_s32:
596 ; CHECK: dup {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
597 %shuffle = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
598 ret <4 x i32> %shuffle
601 define <2 x i64> @test_vdupq_laneq_s64(<2 x i64> %v1) #0 {
602 ; CHECK-LABEL: test_vdupq_laneq_s64:
603 ; CHECK: dup {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
604 %shuffle = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
605 ret <2 x i64> %shuffle
608 define i64 @test_bitcastv8i8toi64(<8 x i8> %in) {
609 ; CHECK-LABEL: test_bitcastv8i8toi64:
610 %res = bitcast <8 x i8> %in to i64
611 ; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
615 define i64 @test_bitcastv4i16toi64(<4 x i16> %in) {
616 ; CHECK-LABEL: test_bitcastv4i16toi64:
617 %res = bitcast <4 x i16> %in to i64
618 ; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
622 define i64 @test_bitcastv2i32toi64(<2 x i32> %in) {
623 ; CHECK-LABEL: test_bitcastv2i32toi64:
624 %res = bitcast <2 x i32> %in to i64
625 ; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
629 define i64 @test_bitcastv2f32toi64(<2 x float> %in) {
630 ; CHECK-LABEL: test_bitcastv2f32toi64:
631 %res = bitcast <2 x float> %in to i64
632 ; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
636 define i64 @test_bitcastv1i64toi64(<1 x i64> %in) {
637 ; CHECK-LABEL: test_bitcastv1i64toi64:
638 %res = bitcast <1 x i64> %in to i64
639 ; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
643 define i64 @test_bitcastv1f64toi64(<1 x double> %in) {
644 ; CHECK-LABEL: test_bitcastv1f64toi64:
645 %res = bitcast <1 x double> %in to i64
646 ; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
650 define <8 x i8> @test_bitcasti64tov8i8(i64 %in) {
651 ; CHECK-LABEL: test_bitcasti64tov8i8:
652 %res = bitcast i64 %in to <8 x i8>
653 ; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
657 define <4 x i16> @test_bitcasti64tov4i16(i64 %in) {
658 ; CHECK-LABEL: test_bitcasti64tov4i16:
659 %res = bitcast i64 %in to <4 x i16>
660 ; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
664 define <2 x i32> @test_bitcasti64tov2i32(i64 %in) {
665 ; CHECK-LABEL: test_bitcasti64tov2i32:
666 %res = bitcast i64 %in to <2 x i32>
667 ; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
671 define <2 x float> @test_bitcasti64tov2f32(i64 %in) {
672 ; CHECK-LABEL: test_bitcasti64tov2f32:
673 %res = bitcast i64 %in to <2 x float>
674 ; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
678 define <1 x i64> @test_bitcasti64tov1i64(i64 %in) {
679 ; CHECK-LABEL: test_bitcasti64tov1i64:
680 %res = bitcast i64 %in to <1 x i64>
681 ; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
685 define <1 x double> @test_bitcasti64tov1f64(i64 %in) {
686 ; CHECK-LABEL: test_bitcasti64tov1f64:
687 %res = bitcast i64 %in to <1 x double>
688 ; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
689 ret <1 x double> %res
692 define <1 x i64> @test_bitcastv8i8tov1f64(<8 x i8> %a) #0 {
693 ; CHECK-LABEL: test_bitcastv8i8tov1f64:
694 ; CHECK: neg {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
695 ; CHECK-NEXT: fcvtzs {{[xd][0-9]+}}, {{d[0-9]+}}
696 %sub.i = sub <8 x i8> zeroinitializer, %a
697 %1 = bitcast <8 x i8> %sub.i to <1 x double>
698 %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
699 ret <1 x i64> %vcvt.i
702 define <1 x i64> @test_bitcastv4i16tov1f64(<4 x i16> %a) #0 {
703 ; CHECK-LABEL: test_bitcastv4i16tov1f64:
704 ; CHECK: neg {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
705 ; CHECK-NEXT: fcvtzs {{[dx][0-9]+}}, {{d[0-9]+}}
706 %sub.i = sub <4 x i16> zeroinitializer, %a
707 %1 = bitcast <4 x i16> %sub.i to <1 x double>
708 %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
709 ret <1 x i64> %vcvt.i
712 define <1 x i64> @test_bitcastv2i32tov1f64(<2 x i32> %a) #0 {
713 ; CHECK-LABEL: test_bitcastv2i32tov1f64:
714 ; CHECK: neg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
715 ; CHECK-NEXT: fcvtzs {{[xd][0-9]+}}, {{d[0-9]+}}
716 %sub.i = sub <2 x i32> zeroinitializer, %a
717 %1 = bitcast <2 x i32> %sub.i to <1 x double>
718 %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
719 ret <1 x i64> %vcvt.i
722 define <1 x i64> @test_bitcastv1i64tov1f64(<1 x i64> %a) #0 {
723 ; CHECK-LABEL: test_bitcastv1i64tov1f64:
724 ; CHECK: neg {{d[0-9]+}}, {{d[0-9]+}}
725 ; CHECK-NEXT: fcvtzs {{[dx][0-9]+}}, {{d[0-9]+}}
726 %sub.i = sub <1 x i64> zeroinitializer, %a
727 %1 = bitcast <1 x i64> %sub.i to <1 x double>
728 %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
729 ret <1 x i64> %vcvt.i
732 define <1 x i64> @test_bitcastv2f32tov1f64(<2 x float> %a) #0 {
733 ; CHECK-LABEL: test_bitcastv2f32tov1f64:
734 ; CHECK: fneg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
735 ; CHECK-NEXT: fcvtzs {{[xd][0-9]+}}, {{d[0-9]+}}
736 %sub.i = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %a
737 %1 = bitcast <2 x float> %sub.i to <1 x double>
738 %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
739 ret <1 x i64> %vcvt.i
742 define <8 x i8> @test_bitcastv1f64tov8i8(<1 x i64> %a) #0 {
743 ; CHECK-LABEL: test_bitcastv1f64tov8i8:
744 ; CHECK: scvtf {{d[0-9]+}}, {{[xd][0-9]+}}
745 ; CHECK-NEXT: neg {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
746 %vcvt.i = sitofp <1 x i64> %a to <1 x double>
747 %1 = bitcast <1 x double> %vcvt.i to <8 x i8>
748 %sub.i = sub <8 x i8> zeroinitializer, %1
752 define <4 x i16> @test_bitcastv1f64tov4i16(<1 x i64> %a) #0 {
753 ; CHECK-LABEL: test_bitcastv1f64tov4i16:
754 ; CHECK: scvtf {{d[0-9]+}}, {{[xd][0-9]+}}
755 ; CHECK-NEXT: neg {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
756 %vcvt.i = sitofp <1 x i64> %a to <1 x double>
757 %1 = bitcast <1 x double> %vcvt.i to <4 x i16>
758 %sub.i = sub <4 x i16> zeroinitializer, %1
762 define <2 x i32> @test_bitcastv1f64tov2i32(<1 x i64> %a) #0 {
763 ; CHECK-LABEL: test_bitcastv1f64tov2i32:
764 ; CHECK: scvtf {{d[0-9]+}}, {{[xd][0-9]+}}
765 ; CHECK-NEXT: neg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
766 %vcvt.i = sitofp <1 x i64> %a to <1 x double>
767 %1 = bitcast <1 x double> %vcvt.i to <2 x i32>
768 %sub.i = sub <2 x i32> zeroinitializer, %1
772 define <1 x i64> @test_bitcastv1f64tov1i64(<1 x i64> %a) #0 {
773 ; CHECK-LABEL: test_bitcastv1f64tov1i64:
774 ; CHECK: scvtf {{d[0-9]+}}, {{[xd][0-9]+}}
775 ; CHECK-NEXT: neg {{d[0-9]+}}, {{d[0-9]+}}
776 %vcvt.i = sitofp <1 x i64> %a to <1 x double>
777 %1 = bitcast <1 x double> %vcvt.i to <1 x i64>
778 %sub.i = sub <1 x i64> zeroinitializer, %1
782 define <2 x float> @test_bitcastv1f64tov2f32(<1 x i64> %a) #0 {
783 ; CHECK-LABEL: test_bitcastv1f64tov2f32:
784 ; CHECK: scvtf {{d[0-9]+}}, {{[xd][0-9]+}}
785 ; CHECK-NEXT: fneg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
786 %vcvt.i = sitofp <1 x i64> %a to <1 x double>
787 %1 = bitcast <1 x double> %vcvt.i to <2 x float>
788 %sub.i = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %1
789 ret <2 x float> %sub.i
792 ; Test insert element into an undef vector
793 define <8 x i8> @scalar_to_vector.v8i8(i8 %a) {
794 ; CHECK-LABEL: scalar_to_vector.v8i8:
795 ; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}}
796 %b = insertelement <8 x i8> undef, i8 %a, i32 0
800 define <16 x i8> @scalar_to_vector.v16i8(i8 %a) {
801 ; CHECK-LABEL: scalar_to_vector.v16i8:
802 ; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}}
803 %b = insertelement <16 x i8> undef, i8 %a, i32 0
807 define <4 x i16> @scalar_to_vector.v4i16(i16 %a) {
808 ; CHECK-LABEL: scalar_to_vector.v4i16:
809 ; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}}
810 %b = insertelement <4 x i16> undef, i16 %a, i32 0
814 define <8 x i16> @scalar_to_vector.v8i16(i16 %a) {
815 ; CHECK-LABEL: scalar_to_vector.v8i16:
816 ; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}}
817 %b = insertelement <8 x i16> undef, i16 %a, i32 0
821 define <2 x i32> @scalar_to_vector.v2i32(i32 %a) {
822 ; CHECK-LABEL: scalar_to_vector.v2i32:
823 ; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}}
824 %b = insertelement <2 x i32> undef, i32 %a, i32 0
828 define <4 x i32> @scalar_to_vector.v4i32(i32 %a) {
829 ; CHECK-LABEL: scalar_to_vector.v4i32:
830 ; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}}
831 %b = insertelement <4 x i32> undef, i32 %a, i32 0
835 define <2 x i64> @scalar_to_vector.v2i64(i64 %a) {
836 ; CHECK-LABEL: scalar_to_vector.v2i64:
837 ; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
838 %b = insertelement <2 x i64> undef, i64 %a, i32 0
842 define <8 x i8> @testDUP.v1i8(<1 x i8> %a) {
843 ; CHECK-LABEL: testDUP.v1i8:
844 ; CHECK: dup v0.8b, v0.b[0]
845 %b = extractelement <1 x i8> %a, i32 0
846 %c = insertelement <8 x i8> undef, i8 %b, i32 0
847 %d = insertelement <8 x i8> %c, i8 %b, i32 1
848 %e = insertelement <8 x i8> %d, i8 %b, i32 2
849 %f = insertelement <8 x i8> %e, i8 %b, i32 3
850 %g = insertelement <8 x i8> %f, i8 %b, i32 4
851 %h = insertelement <8 x i8> %g, i8 %b, i32 5
852 %i = insertelement <8 x i8> %h, i8 %b, i32 6
853 %j = insertelement <8 x i8> %i, i8 %b, i32 7
857 define <8 x i16> @testDUP.v1i16(<1 x i16> %a) {
858 ; CHECK-LABEL: testDUP.v1i16:
859 ; CHECK: dup v0.8h, v0.h[0]
860 %b = extractelement <1 x i16> %a, i32 0
861 %c = insertelement <8 x i16> undef, i16 %b, i32 0
862 %d = insertelement <8 x i16> %c, i16 %b, i32 1
863 %e = insertelement <8 x i16> %d, i16 %b, i32 2
864 %f = insertelement <8 x i16> %e, i16 %b, i32 3
865 %g = insertelement <8 x i16> %f, i16 %b, i32 4
866 %h = insertelement <8 x i16> %g, i16 %b, i32 5
867 %i = insertelement <8 x i16> %h, i16 %b, i32 6
868 %j = insertelement <8 x i16> %i, i16 %b, i32 7
872 define <4 x i32> @testDUP.v1i32(<1 x i32> %a) {
873 ; CHECK-LABEL: testDUP.v1i32:
874 ; CHECK: dup v0.4s, v0.s[0]
875 %b = extractelement <1 x i32> %a, i32 0
876 %c = insertelement <4 x i32> undef, i32 %b, i32 0
877 %d = insertelement <4 x i32> %c, i32 %b, i32 1
878 %e = insertelement <4 x i32> %d, i32 %b, i32 2
879 %f = insertelement <4 x i32> %e, i32 %b, i32 3
883 define <8 x i8> @getl(<16 x i8> %x) #0 {
886 %vecext = extractelement <16 x i8> %x, i32 0
887 %vecinit = insertelement <8 x i8> undef, i8 %vecext, i32 0
888 %vecext1 = extractelement <16 x i8> %x, i32 1
889 %vecinit2 = insertelement <8 x i8> %vecinit, i8 %vecext1, i32 1
890 %vecext3 = extractelement <16 x i8> %x, i32 2
891 %vecinit4 = insertelement <8 x i8> %vecinit2, i8 %vecext3, i32 2
892 %vecext5 = extractelement <16 x i8> %x, i32 3
893 %vecinit6 = insertelement <8 x i8> %vecinit4, i8 %vecext5, i32 3
894 %vecext7 = extractelement <16 x i8> %x, i32 4
895 %vecinit8 = insertelement <8 x i8> %vecinit6, i8 %vecext7, i32 4
896 %vecext9 = extractelement <16 x i8> %x, i32 5
897 %vecinit10 = insertelement <8 x i8> %vecinit8, i8 %vecext9, i32 5
898 %vecext11 = extractelement <16 x i8> %x, i32 6
899 %vecinit12 = insertelement <8 x i8> %vecinit10, i8 %vecext11, i32 6
900 %vecext13 = extractelement <16 x i8> %x, i32 7
901 %vecinit14 = insertelement <8 x i8> %vecinit12, i8 %vecext13, i32 7
902 ret <8 x i8> %vecinit14
905 ; CHECK-LABEL: test_extracts_inserts_varidx_extract:
907 ; CHECK-DAG: and [[MASKED_IDX:x[0-9]+]], x0, #0x7
908 ; CHECK: bfi [[PTR:x[0-9]+]], [[MASKED_IDX]], #1, #3
909 ; CHECK-DAG: ldr h[[R:[0-9]+]], {{\[}}[[PTR]]{{\]}}
910 ; CHECK-DAG: ins v[[R]].h[1], v0.h[1]
911 ; CHECK-DAG: ins v[[R]].h[2], v0.h[2]
912 ; CHECK-DAG: ins v[[R]].h[3], v0.h[3]
913 define <4 x i16> @test_extracts_inserts_varidx_extract(<8 x i16> %x, i32 %idx) {
914 %tmp = extractelement <8 x i16> %x, i32 %idx
915 %tmp2 = insertelement <4 x i16> undef, i16 %tmp, i32 0
916 %tmp3 = extractelement <8 x i16> %x, i32 1
917 %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 1
918 %tmp5 = extractelement <8 x i16> %x, i32 2
919 %tmp6 = insertelement <4 x i16> %tmp4, i16 %tmp5, i32 2
920 %tmp7 = extractelement <8 x i16> %x, i32 3
921 %tmp8 = insertelement <4 x i16> %tmp6, i16 %tmp7, i32 3
925 ; CHECK-LABEL: test_extracts_inserts_varidx_insert:
926 ; CHECK: and [[MASKED_IDX:x[0-9]+]], x0, #0x3
927 ; CHECK: bfi x9, [[MASKED_IDX]], #1, #2
928 ; CHECK: st1 { v0.h }[0], [x9]
929 ; CHECK-DAG: ldr d[[R:[0-9]+]]
930 ; CHECK-DAG: ins v[[R]].h[1], v0.h[1]
931 ; CHECK-DAG: ins v[[R]].h[2], v0.h[2]
932 ; CHECK-DAG: ins v[[R]].h[3], v0.h[3]
933 define <4 x i16> @test_extracts_inserts_varidx_insert(<8 x i16> %x, i32 %idx) {
934 %tmp = extractelement <8 x i16> %x, i32 0
935 %tmp2 = insertelement <4 x i16> undef, i16 %tmp, i32 %idx
936 %tmp3 = extractelement <8 x i16> %x, i32 1
937 %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 1
938 %tmp5 = extractelement <8 x i16> %x, i32 2
939 %tmp6 = insertelement <4 x i16> %tmp4, i16 %tmp5, i32 2
940 %tmp7 = extractelement <8 x i16> %x, i32 3
941 %tmp8 = insertelement <4 x i16> %tmp6, i16 %tmp7, i32 3
945 define <4 x i16> @test_dup_v2i32_v4i16(<2 x i32> %a) {
946 ; CHECK-LABEL: test_dup_v2i32_v4i16:
947 ; CHECK: dup v0.4h, v0.h[2]
949 %x = extractelement <2 x i32> %a, i32 1
950 %vget_lane = trunc i32 %x to i16
951 %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0
952 %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1
953 %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2
954 %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3
955 ret <4 x i16> %vecinit3.i
958 define <8 x i16> @test_dup_v4i32_v8i16(<4 x i32> %a) {
959 ; CHECK-LABEL: test_dup_v4i32_v8i16:
960 ; CHECK: dup v0.8h, v0.h[6]
962 %x = extractelement <4 x i32> %a, i32 3
963 %vget_lane = trunc i32 %x to i16
964 %vecinit.i = insertelement <8 x i16> undef, i16 %vget_lane, i32 0
965 %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %vget_lane, i32 1
966 %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %vget_lane, i32 2
967 %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %vget_lane, i32 3
968 %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %vget_lane, i32 4
969 %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %vget_lane, i32 5
970 %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %vget_lane, i32 6
971 %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %vget_lane, i32 7
972 ret <8 x i16> %vecinit7.i
975 define <4 x i16> @test_dup_v1i64_v4i16(<1 x i64> %a) {
976 ; CHECK-LABEL: test_dup_v1i64_v4i16:
977 ; CHECK: dup v0.4h, v0.h[0]
979 %x = extractelement <1 x i64> %a, i32 0
980 %vget_lane = trunc i64 %x to i16
981 %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0
982 %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1
983 %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2
984 %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3
985 ret <4 x i16> %vecinit3.i
988 define <2 x i32> @test_dup_v1i64_v2i32(<1 x i64> %a) {
989 ; CHECK-LABEL: test_dup_v1i64_v2i32:
990 ; CHECK: dup v0.2s, v0.s[0]
992 %x = extractelement <1 x i64> %a, i32 0
993 %vget_lane = trunc i64 %x to i32
994 %vecinit.i = insertelement <2 x i32> undef, i32 %vget_lane, i32 0
995 %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %vget_lane, i32 1
996 ret <2 x i32> %vecinit1.i
999 define <8 x i16> @test_dup_v2i64_v8i16(<2 x i64> %a) {
1000 ; CHECK-LABEL: test_dup_v2i64_v8i16:
1001 ; CHECK: dup v0.8h, v0.h[4]
1003 %x = extractelement <2 x i64> %a, i32 1
1004 %vget_lane = trunc i64 %x to i16
1005 %vecinit.i = insertelement <8 x i16> undef, i16 %vget_lane, i32 0
1006 %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %vget_lane, i32 1
1007 %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %vget_lane, i32 2
1008 %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %vget_lane, i32 3
1009 %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %vget_lane, i32 4
1010 %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %vget_lane, i32 5
1011 %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %vget_lane, i32 6
1012 %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %vget_lane, i32 7
1013 ret <8 x i16> %vecinit7.i
1016 define <4 x i32> @test_dup_v2i64_v4i32(<2 x i64> %a) {
1017 ; CHECK-LABEL: test_dup_v2i64_v4i32:
1018 ; CHECK: dup v0.4s, v0.s[2]
1020 %x = extractelement <2 x i64> %a, i32 1
1021 %vget_lane = trunc i64 %x to i32
1022 %vecinit.i = insertelement <4 x i32> undef, i32 %vget_lane, i32 0
1023 %vecinit1.i = insertelement <4 x i32> %vecinit.i, i32 %vget_lane, i32 1
1024 %vecinit2.i = insertelement <4 x i32> %vecinit1.i, i32 %vget_lane, i32 2
1025 %vecinit3.i = insertelement <4 x i32> %vecinit2.i, i32 %vget_lane, i32 3
1026 ret <4 x i32> %vecinit3.i
1029 define <4 x i16> @test_dup_v4i32_v4i16(<4 x i32> %a) {
1030 ; CHECK-LABEL: test_dup_v4i32_v4i16:
1031 ; CHECK: dup v0.4h, v0.h[2]
1033 %x = extractelement <4 x i32> %a, i32 1
1034 %vget_lane = trunc i32 %x to i16
1035 %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0
1036 %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1
1037 %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2
1038 %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3
1039 ret <4 x i16> %vecinit3.i
1042 define <4 x i16> @test_dup_v2i64_v4i16(<2 x i64> %a) {
1043 ; CHECK-LABEL: test_dup_v2i64_v4i16:
1044 ; CHECK: dup v0.4h, v0.h[0]
1046 %x = extractelement <2 x i64> %a, i32 0
1047 %vget_lane = trunc i64 %x to i16
1048 %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0
1049 %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1
1050 %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2
1051 %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3
1052 ret <4 x i16> %vecinit3.i
1055 define <2 x i32> @test_dup_v2i64_v2i32(<2 x i64> %a) {
1056 ; CHECK-LABEL: test_dup_v2i64_v2i32:
1057 ; CHECK: dup v0.2s, v0.s[0]
1059 %x = extractelement <2 x i64> %a, i32 0
1060 %vget_lane = trunc i64 %x to i32
1061 %vecinit.i = insertelement <2 x i32> undef, i32 %vget_lane, i32 0
1062 %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %vget_lane, i32 1
1063 ret <2 x i32> %vecinit1.i
1067 define <2 x float> @test_scalar_to_vector_f32_to_v2f32(<2 x float> %a) {
1068 ; CHECK-LABEL: test_scalar_to_vector_f32_to_v2f32:
1069 ; CHECK: fmaxp s{{[0-9]+}}, v{{[0-9]+}}.2s
1072 %0 = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> %a)
1073 %1 = insertelement <1 x float> undef, float %0, i32 0
1074 %2 = extractelement <1 x float> %1, i32 0
1075 %vecinit1.i = insertelement <2 x float> undef, float %2, i32 0
1076 ret <2 x float> %vecinit1.i
1079 define <4 x float> @test_scalar_to_vector_f32_to_v4f32(<2 x float> %a) {
1080 ; CHECK-LABEL: test_scalar_to_vector_f32_to_v4f32:
1081 ; CHECK: fmaxp s{{[0-9]+}}, v{{[0-9]+}}.2s
1084 %0 = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> %a)
1085 %1 = insertelement <1 x float> undef, float %0, i32 0
1086 %2 = extractelement <1 x float> %1, i32 0
1087 %vecinit1.i = insertelement <4 x float> undef, float %2, i32 0
1088 ret <4 x float> %vecinit1.i
1091 declare float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float>)
1093 define <2 x i32> @test_concat_undef_v1i32(<2 x i32> %a) {
1094 ; CHECK-LABEL: test_concat_undef_v1i32:
1095 ; CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
1097 %0 = extractelement <2 x i32> %a, i32 0
1098 %vecinit1.i = insertelement <2 x i32> undef, i32 %0, i32 1
1099 ret <2 x i32> %vecinit1.i
1102 declare i32 @llvm.aarch64.neon.sqabs.i32(i32) #4
1104 define <2 x i32> @test_concat_v1i32_undef(i32 %a) {
1105 ; CHECK-LABEL: test_concat_v1i32_undef:
1106 ; CHECK: sqabs s{{[0-9]+}}, s{{[0-9]+}}
1109 %b = tail call i32 @llvm.aarch64.neon.sqabs.i32(i32 %a)
1110 %vecinit.i432 = insertelement <2 x i32> undef, i32 %b, i32 0
1111 ret <2 x i32> %vecinit.i432
1114 define <2 x i32> @test_concat_same_v1i32_v1i32(<2 x i32> %a) {
1115 ; CHECK-LABEL: test_concat_same_v1i32_v1i32:
1116 ; CHECK: dup v{{[0-9]+}}.2s, v{{[0-9]+}}.s[0]
1118 %0 = extractelement <2 x i32> %a, i32 0
1119 %vecinit.i = insertelement <2 x i32> undef, i32 %0, i32 0
1120 %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %0, i32 1
1121 ret <2 x i32> %vecinit1.i
1124 define <2 x i32> @test_concat_diff_v1i32_v1i32(i32 %a, i32 %b) {
1125 ; CHECK-LABEL: test_concat_diff_v1i32_v1i32:
1126 ; CHECK: sqabs s{{[0-9]+}}, s{{[0-9]+}}
1127 ; CHECK: sqabs s{{[0-9]+}}, s{{[0-9]+}}
1128 ; CHECK: ins {{v[0-9]+}}.s[1], w{{[0-9]+}}
1130 %c = tail call i32 @llvm.aarch64.neon.sqabs.i32(i32 %a)
1131 %d = insertelement <2 x i32> undef, i32 %c, i32 0
1132 %e = tail call i32 @llvm.aarch64.neon.sqabs.i32(i32 %b)
1133 %f = insertelement <2 x i32> undef, i32 %e, i32 0
1134 %h = shufflevector <2 x i32> %d, <2 x i32> %f, <2 x i32> <i32 0, i32 2>
1138 define <16 x i8> @test_concat_v16i8_v16i8_v16i8(<16 x i8> %x, <16 x i8> %y) #0 {
1139 ; CHECK-LABEL: test_concat_v16i8_v16i8_v16i8:
1140 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1142 %vecinit30 = shufflevector <16 x i8> %x, <16 x i8> %y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
1143 ret <16 x i8> %vecinit30
1146 define <16 x i8> @test_concat_v16i8_v8i8_v16i8(<8 x i8> %x, <16 x i8> %y) #0 {
1147 ; CHECK-LABEL: test_concat_v16i8_v8i8_v16i8:
1148 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1150 %vecext = extractelement <8 x i8> %x, i32 0
1151 %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0
1152 %vecext1 = extractelement <8 x i8> %x, i32 1
1153 %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1
1154 %vecext3 = extractelement <8 x i8> %x, i32 2
1155 %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2
1156 %vecext5 = extractelement <8 x i8> %x, i32 3
1157 %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3
1158 %vecext7 = extractelement <8 x i8> %x, i32 4
1159 %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4
1160 %vecext9 = extractelement <8 x i8> %x, i32 5
1161 %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5
1162 %vecext11 = extractelement <8 x i8> %x, i32 6
1163 %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6
1164 %vecext13 = extractelement <8 x i8> %x, i32 7
1165 %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7
1166 %vecinit30 = shufflevector <16 x i8> %vecinit14, <16 x i8> %y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
1167 ret <16 x i8> %vecinit30
1170 define <16 x i8> @test_concat_v16i8_v16i8_v8i8(<16 x i8> %x, <8 x i8> %y) #0 {
1171 ; CHECK-LABEL: test_concat_v16i8_v16i8_v8i8:
1172 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1174 %vecext = extractelement <16 x i8> %x, i32 0
1175 %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0
1176 %vecext1 = extractelement <16 x i8> %x, i32 1
1177 %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1
1178 %vecext3 = extractelement <16 x i8> %x, i32 2
1179 %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2
1180 %vecext5 = extractelement <16 x i8> %x, i32 3
1181 %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3
1182 %vecext7 = extractelement <16 x i8> %x, i32 4
1183 %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4
1184 %vecext9 = extractelement <16 x i8> %x, i32 5
1185 %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5
1186 %vecext11 = extractelement <16 x i8> %x, i32 6
1187 %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6
1188 %vecext13 = extractelement <16 x i8> %x, i32 7
1189 %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7
1190 %vecext15 = extractelement <8 x i8> %y, i32 0
1191 %vecinit16 = insertelement <16 x i8> %vecinit14, i8 %vecext15, i32 8
1192 %vecext17 = extractelement <8 x i8> %y, i32 1
1193 %vecinit18 = insertelement <16 x i8> %vecinit16, i8 %vecext17, i32 9
1194 %vecext19 = extractelement <8 x i8> %y, i32 2
1195 %vecinit20 = insertelement <16 x i8> %vecinit18, i8 %vecext19, i32 10
1196 %vecext21 = extractelement <8 x i8> %y, i32 3
1197 %vecinit22 = insertelement <16 x i8> %vecinit20, i8 %vecext21, i32 11
1198 %vecext23 = extractelement <8 x i8> %y, i32 4
1199 %vecinit24 = insertelement <16 x i8> %vecinit22, i8 %vecext23, i32 12
1200 %vecext25 = extractelement <8 x i8> %y, i32 5
1201 %vecinit26 = insertelement <16 x i8> %vecinit24, i8 %vecext25, i32 13
1202 %vecext27 = extractelement <8 x i8> %y, i32 6
1203 %vecinit28 = insertelement <16 x i8> %vecinit26, i8 %vecext27, i32 14
1204 %vecext29 = extractelement <8 x i8> %y, i32 7
1205 %vecinit30 = insertelement <16 x i8> %vecinit28, i8 %vecext29, i32 15
1206 ret <16 x i8> %vecinit30
1209 define <16 x i8> @test_concat_v16i8_v8i8_v8i8(<8 x i8> %x, <8 x i8> %y) #0 {
1210 ; CHECK-LABEL: test_concat_v16i8_v8i8_v8i8:
1211 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1213 %vecext = extractelement <8 x i8> %x, i32 0
1214 %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0
1215 %vecext1 = extractelement <8 x i8> %x, i32 1
1216 %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1
1217 %vecext3 = extractelement <8 x i8> %x, i32 2
1218 %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2
1219 %vecext5 = extractelement <8 x i8> %x, i32 3
1220 %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3
1221 %vecext7 = extractelement <8 x i8> %x, i32 4
1222 %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4
1223 %vecext9 = extractelement <8 x i8> %x, i32 5
1224 %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5
1225 %vecext11 = extractelement <8 x i8> %x, i32 6
1226 %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6
1227 %vecext13 = extractelement <8 x i8> %x, i32 7
1228 %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7
1229 %vecext15 = extractelement <8 x i8> %y, i32 0
1230 %vecinit16 = insertelement <16 x i8> %vecinit14, i8 %vecext15, i32 8
1231 %vecext17 = extractelement <8 x i8> %y, i32 1
1232 %vecinit18 = insertelement <16 x i8> %vecinit16, i8 %vecext17, i32 9
1233 %vecext19 = extractelement <8 x i8> %y, i32 2
1234 %vecinit20 = insertelement <16 x i8> %vecinit18, i8 %vecext19, i32 10
1235 %vecext21 = extractelement <8 x i8> %y, i32 3
1236 %vecinit22 = insertelement <16 x i8> %vecinit20, i8 %vecext21, i32 11
1237 %vecext23 = extractelement <8 x i8> %y, i32 4
1238 %vecinit24 = insertelement <16 x i8> %vecinit22, i8 %vecext23, i32 12
1239 %vecext25 = extractelement <8 x i8> %y, i32 5
1240 %vecinit26 = insertelement <16 x i8> %vecinit24, i8 %vecext25, i32 13
1241 %vecext27 = extractelement <8 x i8> %y, i32 6
1242 %vecinit28 = insertelement <16 x i8> %vecinit26, i8 %vecext27, i32 14
1243 %vecext29 = extractelement <8 x i8> %y, i32 7
1244 %vecinit30 = insertelement <16 x i8> %vecinit28, i8 %vecext29, i32 15
1245 ret <16 x i8> %vecinit30
1248 define <8 x i16> @test_concat_v8i16_v8i16_v8i16(<8 x i16> %x, <8 x i16> %y) #0 {
1249 ; CHECK-LABEL: test_concat_v8i16_v8i16_v8i16:
1250 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1252 %vecinit14 = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
1253 ret <8 x i16> %vecinit14
1256 define <8 x i16> @test_concat_v8i16_v4i16_v8i16(<4 x i16> %x, <8 x i16> %y) #0 {
1257 ; CHECK-LABEL: test_concat_v8i16_v4i16_v8i16:
1258 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1260 %vecext = extractelement <4 x i16> %x, i32 0
1261 %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0
1262 %vecext1 = extractelement <4 x i16> %x, i32 1
1263 %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1
1264 %vecext3 = extractelement <4 x i16> %x, i32 2
1265 %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2
1266 %vecext5 = extractelement <4 x i16> %x, i32 3
1267 %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3
1268 %vecinit14 = shufflevector <8 x i16> %vecinit6, <8 x i16> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
1269 ret <8 x i16> %vecinit14
1272 define <8 x i16> @test_concat_v8i16_v8i16_v4i16(<8 x i16> %x, <4 x i16> %y) #0 {
1273 ; CHECK-LABEL: test_concat_v8i16_v8i16_v4i16:
1274 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1276 %vecext = extractelement <8 x i16> %x, i32 0
1277 %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0
1278 %vecext1 = extractelement <8 x i16> %x, i32 1
1279 %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1
1280 %vecext3 = extractelement <8 x i16> %x, i32 2
1281 %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2
1282 %vecext5 = extractelement <8 x i16> %x, i32 3
1283 %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3
1284 %vecext7 = extractelement <4 x i16> %y, i32 0
1285 %vecinit8 = insertelement <8 x i16> %vecinit6, i16 %vecext7, i32 4
1286 %vecext9 = extractelement <4 x i16> %y, i32 1
1287 %vecinit10 = insertelement <8 x i16> %vecinit8, i16 %vecext9, i32 5
1288 %vecext11 = extractelement <4 x i16> %y, i32 2
1289 %vecinit12 = insertelement <8 x i16> %vecinit10, i16 %vecext11, i32 6
1290 %vecext13 = extractelement <4 x i16> %y, i32 3
1291 %vecinit14 = insertelement <8 x i16> %vecinit12, i16 %vecext13, i32 7
1292 ret <8 x i16> %vecinit14
1295 define <8 x i16> @test_concat_v8i16_v4i16_v4i16(<4 x i16> %x, <4 x i16> %y) #0 {
1296 ; CHECK-LABEL: test_concat_v8i16_v4i16_v4i16:
1297 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1299 %vecext = extractelement <4 x i16> %x, i32 0
1300 %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0
1301 %vecext1 = extractelement <4 x i16> %x, i32 1
1302 %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1
1303 %vecext3 = extractelement <4 x i16> %x, i32 2
1304 %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2
1305 %vecext5 = extractelement <4 x i16> %x, i32 3
1306 %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3
1307 %vecext7 = extractelement <4 x i16> %y, i32 0
1308 %vecinit8 = insertelement <8 x i16> %vecinit6, i16 %vecext7, i32 4
1309 %vecext9 = extractelement <4 x i16> %y, i32 1
1310 %vecinit10 = insertelement <8 x i16> %vecinit8, i16 %vecext9, i32 5
1311 %vecext11 = extractelement <4 x i16> %y, i32 2
1312 %vecinit12 = insertelement <8 x i16> %vecinit10, i16 %vecext11, i32 6
1313 %vecext13 = extractelement <4 x i16> %y, i32 3
1314 %vecinit14 = insertelement <8 x i16> %vecinit12, i16 %vecext13, i32 7
1315 ret <8 x i16> %vecinit14
1318 define <4 x i32> @test_concat_v4i32_v4i32_v4i32(<4 x i32> %x, <4 x i32> %y) #0 {
1319 ; CHECK-LABEL: test_concat_v4i32_v4i32_v4i32:
1320 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1322 %vecinit6 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
1323 ret <4 x i32> %vecinit6
1326 define <4 x i32> @test_concat_v4i32_v2i32_v4i32(<2 x i32> %x, <4 x i32> %y) #0 {
1327 ; CHECK-LABEL: test_concat_v4i32_v2i32_v4i32:
1328 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1330 %vecext = extractelement <2 x i32> %x, i32 0
1331 %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
1332 %vecext1 = extractelement <2 x i32> %x, i32 1
1333 %vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1
1334 %vecinit6 = shufflevector <4 x i32> %vecinit2, <4 x i32> %y, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
1335 ret <4 x i32> %vecinit6
1338 define <4 x i32> @test_concat_v4i32_v4i32_v2i32(<4 x i32> %x, <2 x i32> %y) #0 {
1339 ; CHECK-LABEL: test_concat_v4i32_v4i32_v2i32:
1340 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1342 %vecext = extractelement <4 x i32> %x, i32 0
1343 %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
1344 %vecext1 = extractelement <4 x i32> %x, i32 1
1345 %vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1
1346 %vecext3 = extractelement <2 x i32> %y, i32 0
1347 %vecinit4 = insertelement <4 x i32> %vecinit2, i32 %vecext3, i32 2
1348 %vecext5 = extractelement <2 x i32> %y, i32 1
1349 %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %vecext5, i32 3
1350 ret <4 x i32> %vecinit6
1353 define <4 x i32> @test_concat_v4i32_v2i32_v2i32(<2 x i32> %x, <2 x i32> %y) #0 {
1354 ; CHECK-LABEL: test_concat_v4i32_v2i32_v2i32:
1355 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1357 %vecinit6 = shufflevector <2 x i32> %x, <2 x i32> %y, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1358 ret <4 x i32> %vecinit6
1361 define <2 x i64> @test_concat_v2i64_v2i64_v2i64(<2 x i64> %x, <2 x i64> %y) #0 {
1362 ; CHECK-LABEL: test_concat_v2i64_v2i64_v2i64:
1363 ; CHECK: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
1365 %vecinit2 = shufflevector <2 x i64> %x, <2 x i64> %y, <2 x i32> <i32 0, i32 2>
1366 ret <2 x i64> %vecinit2
1369 define <2 x i64> @test_concat_v2i64_v1i64_v2i64(<1 x i64> %x, <2 x i64> %y) #0 {
1370 ; CHECK-LABEL: test_concat_v2i64_v1i64_v2i64:
1371 ; CHECK: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
1373 %vecext = extractelement <1 x i64> %x, i32 0
1374 %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0
1375 %vecinit2 = shufflevector <2 x i64> %vecinit, <2 x i64> %y, <2 x i32> <i32 0, i32 2>
1376 ret <2 x i64> %vecinit2
1379 define <2 x i64> @test_concat_v2i64_v2i64_v1i64(<2 x i64> %x, <1 x i64> %y) #0 {
1380 ; CHECK-LABEL: test_concat_v2i64_v2i64_v1i64:
1381 ; CHECK: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
1383 %vecext = extractelement <2 x i64> %x, i32 0
1384 %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0
1385 %vecext1 = extractelement <1 x i64> %y, i32 0
1386 %vecinit2 = insertelement <2 x i64> %vecinit, i64 %vecext1, i32 1
1387 ret <2 x i64> %vecinit2
1390 define <2 x i64> @test_concat_v2i64_v1i64_v1i64(<1 x i64> %x, <1 x i64> %y) #0 {
1391 ; CHECK-LABEL: test_concat_v2i64_v1i64_v1i64:
1392 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1394 %vecext = extractelement <1 x i64> %x, i32 0
1395 %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0
1396 %vecext1 = extractelement <1 x i64> %y, i32 0
1397 %vecinit2 = insertelement <2 x i64> %vecinit, i64 %vecext1, i32 1
1398 ret <2 x i64> %vecinit2
1402 define <4 x i16> @concat_vector_v4i16_const() {
1403 ; CHECK-LABEL: concat_vector_v4i16_const:
1404 ; CHECK: movi {{d[0-9]+}}, #0
1405 %r = shufflevector <1 x i16> zeroinitializer, <1 x i16> undef, <4 x i32> zeroinitializer
1409 define <4 x i16> @concat_vector_v4i16_const_one() {
1410 ; CHECK-LABEL: concat_vector_v4i16_const_one:
1411 ; CHECK: movi {{v[0-9]+}}.4h, #1
1412 %r = shufflevector <1 x i16> <i16 1>, <1 x i16> undef, <4 x i32> zeroinitializer
1416 define <4 x i32> @concat_vector_v4i32_const() {
1417 ; CHECK-LABEL: concat_vector_v4i32_const:
1418 ; CHECK: movi {{v[0-9]+}}.2d, #0
1419 %r = shufflevector <1 x i32> zeroinitializer, <1 x i32> undef, <4 x i32> zeroinitializer
1423 define <8 x i8> @concat_vector_v8i8_const() {
1424 ; CHECK-LABEL: concat_vector_v8i8_const:
1425 ; CHECK: movi {{d[0-9]+}}, #0
1426 %r = shufflevector <1 x i8> zeroinitializer, <1 x i8> undef, <8 x i32> zeroinitializer
1430 define <8 x i16> @concat_vector_v8i16_const() {
1431 ; CHECK-LABEL: concat_vector_v8i16_const:
1432 ; CHECK: movi {{v[0-9]+}}.2d, #0
1433 %r = shufflevector <1 x i16> zeroinitializer, <1 x i16> undef, <8 x i32> zeroinitializer
1437 define <8 x i16> @concat_vector_v8i16_const_one() {
1438 ; CHECK-LABEL: concat_vector_v8i16_const_one:
1439 ; CHECK: movi {{v[0-9]+}}.8h, #1
1440 %r = shufflevector <1 x i16> <i16 1>, <1 x i16> undef, <8 x i32> zeroinitializer
1444 define <16 x i8> @concat_vector_v16i8_const() {
1445 ; CHECK-LABEL: concat_vector_v16i8_const:
1446 ; CHECK: movi {{v[0-9]+}}.2d, #0
1447 %r = shufflevector <1 x i8> zeroinitializer, <1 x i8> undef, <16 x i32> zeroinitializer
1451 define <4 x i16> @concat_vector_v4i16(<1 x i16> %a) {
1452 ; CHECK-LABEL: concat_vector_v4i16:
1453 ; CHECK: dup v0.4h, v0.h[0]
1454 %r = shufflevector <1 x i16> %a, <1 x i16> undef, <4 x i32> zeroinitializer
1458 define <4 x i32> @concat_vector_v4i32(<1 x i32> %a) {
1459 ; CHECK-LABEL: concat_vector_v4i32:
1460 ; CHECK: dup v0.4s, v0.s[0]
1461 %r = shufflevector <1 x i32> %a, <1 x i32> undef, <4 x i32> zeroinitializer
1465 define <8 x i8> @concat_vector_v8i8(<1 x i8> %a) {
1466 ; CHECK-LABEL: concat_vector_v8i8:
1467 ; CHECK: dup v0.8b, v0.b[0]
1468 %r = shufflevector <1 x i8> %a, <1 x i8> undef, <8 x i32> zeroinitializer
1472 define <8 x i16> @concat_vector_v8i16(<1 x i16> %a) {
1473 ; CHECK-LABEL: concat_vector_v8i16:
1474 ; CHECK: dup v0.8h, v0.h[0]
1475 %r = shufflevector <1 x i16> %a, <1 x i16> undef, <8 x i32> zeroinitializer
1479 define <16 x i8> @concat_vector_v16i8(<1 x i8> %a) {
1480 ; CHECK-LABEL: concat_vector_v16i8:
1481 ; CHECK: dup v0.16b, v0.b[0]
1482 %r = shufflevector <1 x i8> %a, <1 x i8> undef, <16 x i32> zeroinitializer