1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=arm64-apple-ios -o - %s | FileCheck %s
4 declare i32 @llvm.aarch64.neon.uaddlv.i32.v8i8(<8 x i8>) #0
5 declare i32 @llvm.aarch64.neon.uaddlv.i32.v16i8(<16 x i8>) #0
6 declare i32 @llvm.aarch64.neon.uaddlv.i32.v4i16(<4 x i16>) #0
7 declare i32 @llvm.aarch64.neon.uaddlv.i32.v8i16(<8 x i16>) #0
8 declare i64 @llvm.aarch64.neon.uaddlv.i64.v4i32(<4 x i32>) #0
10 define void @insert_vec_v2i32_uaddlv_from_v8i16(ptr %0) {
11 ; CHECK-LABEL: insert_vec_v2i32_uaddlv_from_v8i16:
12 ; CHECK: ; %bb.0: ; %entry
13 ; CHECK-NEXT: movi.2d v0, #0000000000000000
14 ; CHECK-NEXT: movi.2d v1, #0000000000000000
15 ; CHECK-NEXT: uaddlv.8h s0, v0
16 ; CHECK-NEXT: mov.s v1[0], v0[0]
17 ; CHECK-NEXT: ucvtf.2s v0, v1
18 ; CHECK-NEXT: str d0, [x0]
22 %vaddlv = tail call i32 @llvm.aarch64.neon.uaddlv.i32.v8i16(<8 x i16> zeroinitializer)
23 %1 = insertelement <2 x i32> zeroinitializer, i32 %vaddlv, i64 0
24 %2 = uitofp <2 x i32> %1 to <2 x float>
25 store <2 x float> %2, ptr %0, align 8
29 define void @insert_vec_v4i32_uaddlv_from_v8i16(ptr %0) {
30 ; CHECK-LABEL: insert_vec_v4i32_uaddlv_from_v8i16:
31 ; CHECK: ; %bb.0: ; %entry
32 ; CHECK-NEXT: movi.2d v0, #0000000000000000
33 ; CHECK-NEXT: uaddlv.8h s1, v0
34 ; CHECK-NEXT: mov.s v0[0], v1[0]
35 ; CHECK-NEXT: ucvtf.4s v0, v0
36 ; CHECK-NEXT: str q0, [x0]
40 %vaddlv = tail call i32 @llvm.aarch64.neon.uaddlv.i32.v8i16(<8 x i16> zeroinitializer)
41 %1 = insertelement <4 x i32> zeroinitializer, i32 %vaddlv, i64 0
42 %2 = uitofp <4 x i32> %1 to <4 x float>
43 store <4 x float> %2, ptr %0, align 8
47 define void @insert_vec_v16i32_uaddlv_from_v8i16(ptr %0) {
48 ; CHECK-LABEL: insert_vec_v16i32_uaddlv_from_v8i16:
49 ; CHECK: ; %bb.0: ; %entry
50 ; CHECK-NEXT: movi.2d v0, #0000000000000000
51 ; CHECK-NEXT: movi.2d v2, #0000000000000000
52 ; CHECK-NEXT: uaddlv.8h s1, v0
53 ; CHECK-NEXT: stp q0, q0, [x0, #32]
54 ; CHECK-NEXT: mov.s v2[0], v1[0]
55 ; CHECK-NEXT: ucvtf.4s v1, v2
56 ; CHECK-NEXT: stp q1, q0, [x0]
60 %vaddlv = tail call i32 @llvm.aarch64.neon.uaddlv.i32.v8i16(<8 x i16> zeroinitializer)
61 %1 = insertelement <16 x i32> zeroinitializer, i32 %vaddlv, i64 0
62 %2 = uitofp <16 x i32> %1 to <16 x float>
63 store <16 x float> %2, ptr %0, align 8
67 define void @insert_vec_v23i32_uaddlv_from_v8i16(ptr %0) {
68 ; CHECK-LABEL: insert_vec_v23i32_uaddlv_from_v8i16:
69 ; CHECK: ; %bb.0: ; %entry
70 ; CHECK-NEXT: movi.2d v0, #0000000000000000
71 ; CHECK-NEXT: movi.2d v2, #0000000000000000
72 ; CHECK-NEXT: add x8, x0, #88
73 ; CHECK-NEXT: uaddlv.8h s1, v0
74 ; CHECK-NEXT: stp q0, q0, [x0, #16]
75 ; CHECK-NEXT: stp q0, q0, [x0, #48]
76 ; CHECK-NEXT: st1.s { v0 }[2], [x8]
77 ; CHECK-NEXT: str d0, [x0, #80]
78 ; CHECK-NEXT: mov.s v2[0], v1[0]
79 ; CHECK-NEXT: ucvtf.4s v1, v2
80 ; CHECK-NEXT: str q1, [x0]
84 %vaddlv = tail call i32 @llvm.aarch64.neon.uaddlv.i32.v8i16(<8 x i16> zeroinitializer)
85 %1 = insertelement <23 x i32> zeroinitializer, i32 %vaddlv, i64 0
86 %2 = uitofp <23 x i32> %1 to <23 x float>
87 store <23 x float> %2, ptr %0, align 8
91 define void @insert_vec_v2i32_uaddlv_from_v16i8(ptr %0) {
92 ; CHECK-LABEL: insert_vec_v2i32_uaddlv_from_v16i8:
93 ; CHECK: ; %bb.0: ; %entry
94 ; CHECK-NEXT: movi.2d v0, #0000000000000000
95 ; CHECK-NEXT: movi.2d v1, #0000000000000000
96 ; CHECK-NEXT: uaddlv.16b h0, v0
97 ; CHECK-NEXT: mov.s v1[0], v0[0]
98 ; CHECK-NEXT: ucvtf.2s v0, v1
99 ; CHECK-NEXT: str d0, [x0]
103 %vaddlv = tail call i32 @llvm.aarch64.neon.uaddlv.i32.v16i8(<16 x i8> zeroinitializer)
104 %1 = insertelement <2 x i32> zeroinitializer, i32 %vaddlv, i64 0
105 %2 = uitofp <2 x i32> %1 to <2 x float>
106 store <2 x float> %2, ptr %0, align 8
110 define void @insert_vec_v2i32_uaddlv_from_v8i8(ptr %0) {
111 ; CHECK-LABEL: insert_vec_v2i32_uaddlv_from_v8i8:
112 ; CHECK: ; %bb.0: ; %entry
113 ; CHECK-NEXT: movi.2d v0, #0000000000000000
114 ; CHECK-NEXT: uaddlv.8b h1, v0
115 ; CHECK-NEXT: mov.s v0[0], v1[0]
116 ; CHECK-NEXT: ucvtf.2s v0, v0
117 ; CHECK-NEXT: str d0, [x0]
121 %vaddlv = tail call i32 @llvm.aarch64.neon.uaddlv.i32.v8i8(<8 x i8> zeroinitializer)
122 %1 = insertelement <2 x i32> zeroinitializer, i32 %vaddlv, i64 0
123 %2 = uitofp <2 x i32> %1 to <2 x float>
124 store <2 x float> %2, ptr %0, align 8
128 define void @insert_vec_v2i32_uaddlv_from_v4i16(ptr %0) {
129 ; CHECK-LABEL: insert_vec_v2i32_uaddlv_from_v4i16:
130 ; CHECK: ; %bb.0: ; %entry
131 ; CHECK-NEXT: movi.2d v0, #0000000000000000
132 ; CHECK-NEXT: uaddlv.4h s1, v0
133 ; CHECK-NEXT: mov.s v0[0], v1[0]
134 ; CHECK-NEXT: ucvtf.2s v0, v0
135 ; CHECK-NEXT: str d0, [x0]
139 %vaddlv = tail call i32 @llvm.aarch64.neon.uaddlv.i32.v4i16(<4 x i16> zeroinitializer)
140 %1 = insertelement <2 x i32> zeroinitializer, i32 %vaddlv, i64 0
141 %2 = uitofp <2 x i32> %1 to <2 x float>
142 store <2 x float> %2, ptr %0, align 8
146 define void @insert_vec_v6i64_uaddlv_from_v4i32(ptr %0) {
147 ; CHECK-LABEL: insert_vec_v6i64_uaddlv_from_v4i32:
148 ; CHECK: ; %bb.0: ; %entry
149 ; CHECK-NEXT: movi.2d v0, #0000000000000000
150 ; CHECK-NEXT: movi.2d v2, #0000000000000000
151 ; CHECK-NEXT: uaddlv.4s d1, v0
152 ; CHECK-NEXT: str d2, [x0, #16]
153 ; CHECK-NEXT: mov.d v0[0], v1[0]
154 ; CHECK-NEXT: ucvtf.2d v0, v0
155 ; CHECK-NEXT: fcvtn v0.2s, v0.2d
156 ; CHECK-NEXT: str q0, [x0]
160 %vaddlv = tail call i64 @llvm.aarch64.neon.uaddlv.i64.v4i32(<4 x i32> zeroinitializer)
161 %1 = insertelement <6 x i64> zeroinitializer, i64 %vaddlv, i64 0
162 %2 = uitofp <6 x i64> %1 to <6 x float>
163 store <6 x float> %2, ptr %0, align 8
167 define void @insert_vec_v2i64_uaddlv_from_v4i32(ptr %0) {
168 ; CHECK-LABEL: insert_vec_v2i64_uaddlv_from_v4i32:
169 ; CHECK: ; %bb.0: ; %entry
170 ; CHECK-NEXT: movi.2d v0, #0000000000000000
171 ; CHECK-NEXT: uaddlv.4s d1, v0
172 ; CHECK-NEXT: mov.d v0[0], v1[0]
173 ; CHECK-NEXT: ucvtf.2d v0, v0
174 ; CHECK-NEXT: fcvtn v0.2s, v0.2d
175 ; CHECK-NEXT: str d0, [x0]
179 %vaddlv = tail call i64 @llvm.aarch64.neon.uaddlv.i64.v4i32(<4 x i32> zeroinitializer)
180 %1 = insertelement <2 x i64> zeroinitializer, i64 %vaddlv, i64 0
181 %2 = uitofp <2 x i64> %1 to <2 x float>
182 store <2 x float> %2, ptr %0, align 8
186 define void @insert_vec_v5i64_uaddlv_from_v4i32(ptr %0) {
187 ; CHECK-LABEL: insert_vec_v5i64_uaddlv_from_v4i32:
188 ; CHECK: ; %bb.0: ; %entry
189 ; CHECK-NEXT: movi.2d v0, #0000000000000000
190 ; CHECK-NEXT: str wzr, [x0, #16]
191 ; CHECK-NEXT: uaddlv.4s d1, v0
192 ; CHECK-NEXT: mov.d v0[0], v1[0]
193 ; CHECK-NEXT: ucvtf.2d v0, v0
194 ; CHECK-NEXT: fcvtn v0.2s, v0.2d
195 ; CHECK-NEXT: str q0, [x0]
199 %vaddlv = tail call i64 @llvm.aarch64.neon.uaddlv.i64.v4i32(<4 x i32> zeroinitializer)
200 %1 = insertelement <5 x i64> zeroinitializer, i64 %vaddlv, i64 0
201 %2 = uitofp <5 x i64> %1 to <5 x float>
202 store <5 x float> %2, ptr %0, align 8
206 define void @insert_vec_v8i16_uaddlv_from_v8i16(ptr %0) {
207 ; CHECK-LABEL: insert_vec_v8i16_uaddlv_from_v8i16:
208 ; CHECK: ; %bb.0: ; %entry
209 ; CHECK-NEXT: movi.2d v0, #0000000000000000
210 ; CHECK-NEXT: movi.2d v1, #0000000000000000
211 ; CHECK-NEXT: stp xzr, xzr, [x0, #16]
212 ; CHECK-NEXT: uaddlv.8h s0, v0
213 ; CHECK-NEXT: mov.h v1[0], v0[0]
214 ; CHECK-NEXT: ushll.4s v1, v1, #0
215 ; CHECK-NEXT: ucvtf.4s v1, v1
216 ; CHECK-NEXT: str q1, [x0]
220 %vaddlv = tail call i32 @llvm.aarch64.neon.uaddlv.i32.v8i16(<8 x i16> zeroinitializer)
221 %1 = trunc i32 %vaddlv to i16
222 %2 = insertelement <8 x i16> zeroinitializer, i16 %1, i64 0
223 %3 = uitofp <8 x i16> %2 to <8 x float>
224 store <8 x float> %3, ptr %0, align 8
228 define void @insert_vec_v3i16_uaddlv_from_v8i16(ptr %0) {
229 ; CHECK-LABEL: insert_vec_v3i16_uaddlv_from_v8i16:
230 ; CHECK: ; %bb.0: ; %entry
231 ; CHECK-NEXT: movi.2d v0, #0000000000000000
232 ; CHECK-NEXT: movi.2d v1, #0000000000000000
233 ; CHECK-NEXT: add x8, x0, #8
234 ; CHECK-NEXT: uaddlv.8h s0, v0
235 ; CHECK-NEXT: mov.h v1[0], v0[0]
236 ; CHECK-NEXT: ushll.4s v1, v1, #0
237 ; CHECK-NEXT: ucvtf.4s v1, v1
238 ; CHECK-NEXT: st1.s { v1 }[2], [x8]
239 ; CHECK-NEXT: str d1, [x0]
243 %vaddlv = tail call i32 @llvm.aarch64.neon.uaddlv.i32.v8i16(<8 x i16> zeroinitializer)
244 %1 = trunc i32 %vaddlv to i16
245 %2 = insertelement <3 x i16> zeroinitializer, i16 %1, i64 0
246 %3 = uitofp <3 x i16> %2 to <3 x float>
247 store <3 x float> %3, ptr %0, align 8
251 define void @insert_vec_v16i64_uaddlv_from_v4i16(ptr %0) {
252 ; CHECK-LABEL: insert_vec_v16i64_uaddlv_from_v4i16:
253 ; CHECK: ; %bb.0: ; %entry
254 ; CHECK-NEXT: movi.2d v0, #0000000000000000
255 ; CHECK-NEXT: movi.2d v2, #0000000000000000
256 ; CHECK-NEXT: uaddlv.4h s1, v0
257 ; CHECK-NEXT: stp q0, q0, [x0, #32]
258 ; CHECK-NEXT: mov.s v2[0], v1[0]
259 ; CHECK-NEXT: ucvtf.2d v1, v2
260 ; CHECK-NEXT: fcvtn v1.2s, v1.2d
261 ; CHECK-NEXT: stp q1, q0, [x0]
265 %vaddlv = tail call i32 @llvm.aarch64.neon.uaddlv.i32.v4i16(<4 x i16> zeroinitializer)
266 %1 = zext i32 %vaddlv to i64
267 %2 = insertelement <16 x i64> zeroinitializer, i64 %1, i64 0
268 %3 = uitofp <16 x i64> %2 to <16 x float>
269 store <16 x float> %3, ptr %0, align 8
273 define void @insert_vec_v16i8_uaddlv_from_v8i8(ptr %0) {
274 ; CHECK-LABEL: insert_vec_v16i8_uaddlv_from_v8i8:
275 ; CHECK: ; %bb.0: ; %entry
276 ; CHECK-NEXT: movi.2d v0, #0000000000000000
277 ; CHECK-NEXT: movi.2d v2, #0000000000000000
278 ; CHECK-NEXT: uaddlv.8b h1, v0
279 ; CHECK-NEXT: stp q0, q0, [x0, #32]
280 ; CHECK-NEXT: mov.h v2[0], v1[0]
281 ; CHECK-NEXT: bic.4h v2, #255, lsl #8
282 ; CHECK-NEXT: ushll.4s v2, v2, #0
283 ; CHECK-NEXT: ucvtf.4s v2, v2
284 ; CHECK-NEXT: stp q2, q0, [x0]
288 %vaddlv = tail call i32 @llvm.aarch64.neon.uaddlv.i32.v8i8(<8 x i8> zeroinitializer)
289 %1 = trunc i32 %vaddlv to i8
290 %2 = insertelement <16 x i8> zeroinitializer, i8 %1, i64 0
291 %3 = uitofp <16 x i8> %2 to <16 x float>
292 store <16 x float> %3, ptr %0, align 8
296 define void @insert_vec_v8i8_uaddlv_from_v8i8(ptr %0) {
297 ; CHECK-LABEL: insert_vec_v8i8_uaddlv_from_v8i8:
298 ; CHECK: ; %bb.0: ; %entry
299 ; CHECK-NEXT: movi.2d v0, #0000000000000000
300 ; CHECK-NEXT: stp xzr, xzr, [x0, #16]
301 ; CHECK-NEXT: uaddlv.8b h1, v0
302 ; CHECK-NEXT: mov.h v0[0], v1[0]
303 ; CHECK-NEXT: bic.4h v0, #7, lsl #8
304 ; CHECK-NEXT: ushll.4s v0, v0, #0
305 ; CHECK-NEXT: ucvtf.4s v0, v0
306 ; CHECK-NEXT: str q0, [x0]
310 %vaddlv = tail call i32 @llvm.aarch64.neon.uaddlv.i32.v8i8(<8 x i8> zeroinitializer)
311 %1 = trunc i32 %vaddlv to i8
312 %2 = insertelement <8 x i8> zeroinitializer, i8 %1, i64 0
313 %3 = uitofp <8 x i8> %2 to <8 x float>
314 store <8 x float> %3, ptr %0, align 8
318 define void @insert_vec_v12i16_uaddlv_from_v4i16(ptr %0) {
319 ; CHECK-LABEL: insert_vec_v12i16_uaddlv_from_v4i16:
320 ; CHECK: ; %bb.0: ; %entry
321 ; CHECK-NEXT: movi.2d v0, #0000000000000000
322 ; CHECK-NEXT: stp xzr, xzr, [x0, #16]
323 ; CHECK-NEXT: stp xzr, xzr, [x0, #32]
324 ; CHECK-NEXT: uaddlv.4h s1, v0
325 ; CHECK-NEXT: mov.h v0[0], v1[0]
326 ; CHECK-NEXT: ushll.4s v0, v0, #0
327 ; CHECK-NEXT: ucvtf.4s v0, v0
328 ; CHECK-NEXT: str q0, [x0]
332 %vaddlv = tail call i32 @llvm.aarch64.neon.uaddlv.i32.v4i16(<4 x i16> zeroinitializer)
333 %1 = trunc i32 %vaddlv to i16
334 %2 = insertelement <12 x i16> zeroinitializer, i16 %1, i64 0
335 %3 = uitofp <12 x i16> %2 to <12 x float>
336 store <12 x float> %3, ptr %0, align 8
340 define void @insert_vec_v8i32_uaddlv_from_v4i32(ptr %0) {
341 ; CHECK-LABEL: insert_vec_v8i32_uaddlv_from_v4i32:
342 ; CHECK: ; %bb.0: ; %entry
343 ; CHECK-NEXT: movi.2d v0, #0000000000000000
344 ; CHECK-NEXT: stp xzr, xzr, [x0, #16]
345 ; CHECK-NEXT: uaddlv.4s d1, v0
346 ; CHECK-NEXT: mov.s v0[0], v1[0]
347 ; CHECK-NEXT: ucvtf.4s v0, v0
348 ; CHECK-NEXT: str q0, [x0]
352 %vaddlv = tail call i64 @llvm.aarch64.neon.uaddlv.i64.v4i32(<4 x i32> zeroinitializer)
353 %1 = trunc i64 %vaddlv to i32
354 %2 = insertelement <8 x i32> zeroinitializer, i32 %1, i64 0
355 %3 = uitofp <8 x i32> %2 to <8 x float>
356 store <8 x float> %3, ptr %0, align 8
360 define void @insert_vec_v16i32_uaddlv_from_v4i32(ptr %0) {
361 ; CHECK-LABEL: insert_vec_v16i32_uaddlv_from_v4i32:
362 ; CHECK: ; %bb.0: ; %entry
363 ; CHECK-NEXT: movi.2d v0, #0000000000000000
364 ; CHECK-NEXT: movi.2d v2, #0000000000000000
365 ; CHECK-NEXT: uaddlv.4s d1, v0
366 ; CHECK-NEXT: stp q0, q0, [x0, #32]
367 ; CHECK-NEXT: mov.s v2[0], v1[0]
368 ; CHECK-NEXT: ucvtf.4s v1, v2
369 ; CHECK-NEXT: stp q1, q0, [x0]
373 %vaddlv = tail call i64 @llvm.aarch64.neon.uaddlv.i64.v4i32(<4 x i32> zeroinitializer)
374 %1 = trunc i64 %vaddlv to i32
375 %2 = insertelement <16 x i32> zeroinitializer, i32 %1, i64 0
376 %3 = uitofp <16 x i32> %2 to <16 x float>
377 store <16 x float> %3, ptr %0, align 8
381 define void @insert_vec_v4i16_uaddlv_from_v4i32(ptr %0) {
382 ; CHECK-LABEL: insert_vec_v4i16_uaddlv_from_v4i32:
383 ; CHECK: ; %bb.0: ; %entry
384 ; CHECK-NEXT: movi.2d v0, #0000000000000000
385 ; CHECK-NEXT: movi.2d v1, #0000000000000000
386 ; CHECK-NEXT: uaddlv.4s d0, v0
387 ; CHECK-NEXT: mov.h v1[0], v0[0]
388 ; CHECK-NEXT: ushll.4s v0, v1, #0
389 ; CHECK-NEXT: ucvtf.4s v0, v0
390 ; CHECK-NEXT: str q0, [x0]
394 %vaddlv = tail call i64 @llvm.aarch64.neon.uaddlv.i64.v4i32(<4 x i32> zeroinitializer)
395 %1 = trunc i64 %vaddlv to i16
396 %2 = insertelement <4 x i16> zeroinitializer, i16 %1, i64 0
397 %3 = uitofp <4 x i16> %2 to <4 x float>
398 store <4 x float> %3, ptr %0, align 8
402 define void @insert_vec_v16i16_uaddlv_from_v4i32(ptr %0) {
403 ; CHECK-LABEL: insert_vec_v16i16_uaddlv_from_v4i32:
404 ; CHECK: ; %bb.0: ; %entry
405 ; CHECK-NEXT: movi.2d v0, #0000000000000000
406 ; CHECK-NEXT: movi.2d v1, #0000000000000000
407 ; CHECK-NEXT: uaddlv.4s d0, v0
408 ; CHECK-NEXT: mov.h v1[0], v0[0]
409 ; CHECK-NEXT: movi.2d v0, #0000000000000000
410 ; CHECK-NEXT: ushll.4s v1, v1, #0
411 ; CHECK-NEXT: stp q0, q0, [x0, #32]
412 ; CHECK-NEXT: ucvtf.4s v1, v1
413 ; CHECK-NEXT: stp q1, q0, [x0]
417 %vaddlv = tail call i64 @llvm.aarch64.neon.uaddlv.i64.v4i32(<4 x i32> zeroinitializer)
418 %1 = trunc i64 %vaddlv to i16
419 %2 = insertelement <16 x i16> zeroinitializer, i16 %1, i64 0
420 %3 = uitofp <16 x i16> %2 to <16 x float>
421 store <16 x float> %3, ptr %0, align 8
425 define void @insert_vec_v8i8_uaddlv_from_v4i32(ptr %0) {
426 ; CHECK-LABEL: insert_vec_v8i8_uaddlv_from_v4i32:
427 ; CHECK: ; %bb.0: ; %entry
428 ; CHECK-NEXT: movi.2d v0, #0000000000000000
429 ; CHECK-NEXT: movi.2d v1, #0000000000000000
430 ; CHECK-NEXT: stp xzr, xzr, [x0, #16]
431 ; CHECK-NEXT: uaddlv.4s d0, v0
432 ; CHECK-NEXT: mov.h v1[0], v0[0]
433 ; CHECK-NEXT: bic.4h v1, #255, lsl #8
434 ; CHECK-NEXT: ushll.4s v0, v1, #0
435 ; CHECK-NEXT: ucvtf.4s v0, v0
436 ; CHECK-NEXT: str q0, [x0]
440 %vaddlv = tail call i64 @llvm.aarch64.neon.uaddlv.i64.v4i32(<4 x i32> zeroinitializer)
441 %1 = trunc i64 %vaddlv to i8
442 %2 = insertelement <8 x i8> zeroinitializer, i8 %1, i64 0
443 %3 = uitofp <8 x i8> %2 to <8 x float>
444 store <8 x float> %3, ptr %0, align 8
448 define void @insert_vec_v16i8_uaddlv_from_v4i32(ptr %0) {
449 ; CHECK-LABEL: insert_vec_v16i8_uaddlv_from_v4i32:
450 ; CHECK: ; %bb.0: ; %entry
451 ; CHECK-NEXT: movi.2d v0, #0000000000000000
452 ; CHECK-NEXT: movi.2d v1, #0000000000000000
453 ; CHECK-NEXT: uaddlv.4s d0, v0
454 ; CHECK-NEXT: mov.h v1[0], v0[0]
455 ; CHECK-NEXT: movi.2d v0, #0000000000000000
456 ; CHECK-NEXT: bic.4h v1, #255, lsl #8
457 ; CHECK-NEXT: stp q0, q0, [x0, #32]
458 ; CHECK-NEXT: ushll.4s v1, v1, #0
459 ; CHECK-NEXT: ucvtf.4s v1, v1
460 ; CHECK-NEXT: stp q1, q0, [x0]
464 %vaddlv = tail call i64 @llvm.aarch64.neon.uaddlv.i64.v4i32(<4 x i32> zeroinitializer)
465 %1 = trunc i64 %vaddlv to i8
466 %2 = insertelement <16 x i8> zeroinitializer, i8 %1, i64 0
467 %3 = uitofp <16 x i8> %2 to <16 x float>
468 store <16 x float> %3, ptr %0, align 8
472 define void @insert_vec_v2i32_uaddlv_from_v8i16_nz_index(ptr %0) {
473 ; CHECK-LABEL: insert_vec_v2i32_uaddlv_from_v8i16_nz_index:
474 ; CHECK: ; %bb.0: ; %entry
475 ; CHECK-NEXT: movi.2d v0, #0000000000000000
476 ; CHECK-NEXT: uaddlv.8h s1, v0
477 ; CHECK-NEXT: mov.s v0[2], v1[0]
478 ; CHECK-NEXT: ucvtf.4s v0, v0
479 ; CHECK-NEXT: str q0, [x0]
483 %vaddlv = tail call i32 @llvm.aarch64.neon.uaddlv.i32.v8i16(<8 x i16> zeroinitializer)
484 %1 = insertelement <4 x i32> zeroinitializer, i32 %vaddlv, i64 2
485 %2 = uitofp <4 x i32> %1 to <4 x float>
486 store <4 x float> %2, ptr %0, align 8