1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-MVE
3 ; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-MVEFP
5 define arm_aapcs_vfpcc void @vmovn32_trunc1(<4 x i32> %src1, <4 x i32> %src2, ptr %dest) {
6 ; CHECK-LABEL: vmovn32_trunc1:
7 ; CHECK: @ %bb.0: @ %entry
8 ; CHECK-NEXT: vmovnt.i32 q0, q1
9 ; CHECK-NEXT: vstrw.32 q0, [r0]
12 %strided.vec = shufflevector <4 x i32> %src1, <4 x i32> %src2, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
13 %out = trunc <8 x i32> %strided.vec to <8 x i16>
14 store <8 x i16> %out, ptr %dest, align 8
18 define arm_aapcs_vfpcc void @vmovn32_trunc2(<4 x i32> %src1, <4 x i32> %src2, ptr %dest) {
19 ; CHECK-LABEL: vmovn32_trunc2:
20 ; CHECK: @ %bb.0: @ %entry
21 ; CHECK-NEXT: vmovnt.i32 q1, q0
22 ; CHECK-NEXT: vstrw.32 q1, [r0]
25 %strided.vec = shufflevector <4 x i32> %src1, <4 x i32> %src2, <8 x i32> <i32 4, i32 0, i32 5, i32 1, i32 6, i32 2, i32 7, i32 3>
26 %out = trunc <8 x i32> %strided.vec to <8 x i16>
27 store <8 x i16> %out, ptr %dest, align 8
31 define arm_aapcs_vfpcc void @vmovn32_trunc1_onesrc(<8 x i32> %src1, ptr %dest) {
32 ; CHECK-LABEL: vmovn32_trunc1_onesrc:
33 ; CHECK: @ %bb.0: @ %entry
34 ; CHECK-NEXT: vmovnt.i32 q0, q1
35 ; CHECK-NEXT: vstrw.32 q0, [r0]
38 %strided.vec = shufflevector <8 x i32> %src1, <8 x i32> undef, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
39 %out = trunc <8 x i32> %strided.vec to <8 x i16>
40 store <8 x i16> %out, ptr %dest, align 8
44 define arm_aapcs_vfpcc void @vmovn32_trunc2_onesrc(<8 x i32> %src1, ptr %dest) {
45 ; CHECK-LABEL: vmovn32_trunc2_onesrc:
46 ; CHECK: @ %bb.0: @ %entry
47 ; CHECK-NEXT: vmovnt.i32 q1, q0
48 ; CHECK-NEXT: vstrw.32 q1, [r0]
51 %strided.vec = shufflevector <8 x i32> %src1, <8 x i32> undef, <8 x i32> <i32 4, i32 0, i32 5, i32 1, i32 6, i32 2, i32 7, i32 3>
52 %out = trunc <8 x i32> %strided.vec to <8 x i16>
53 store <8 x i16> %out, ptr %dest, align 8
57 define arm_aapcs_vfpcc void @vmovn16_trunc1(<8 x i16> %src1, <8 x i16> %src2, ptr %dest) {
58 ; CHECK-LABEL: vmovn16_trunc1:
59 ; CHECK: @ %bb.0: @ %entry
60 ; CHECK-NEXT: vmovnt.i16 q0, q1
61 ; CHECK-NEXT: vstrw.32 q0, [r0]
64 %strided.vec = shufflevector <8 x i16> %src1, <8 x i16> %src2, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
65 %out = trunc <16 x i16> %strided.vec to <16 x i8>
66 store <16 x i8> %out, ptr %dest, align 8
70 define arm_aapcs_vfpcc void @vmovn16_trunc2(<8 x i16> %src1, <8 x i16> %src2, ptr %dest) {
71 ; CHECK-LABEL: vmovn16_trunc2:
72 ; CHECK: @ %bb.0: @ %entry
73 ; CHECK-NEXT: vmovnt.i16 q1, q0
74 ; CHECK-NEXT: vstrw.32 q1, [r0]
77 %strided.vec = shufflevector <8 x i16> %src1, <8 x i16> %src2, <16 x i32> <i32 8, i32 0, i32 9, i32 1, i32 10, i32 2, i32 11, i32 3, i32 12, i32 4, i32 13, i32 5, i32 14, i32 6, i32 15, i32 7>
78 %out = trunc <16 x i16> %strided.vec to <16 x i8>
79 store <16 x i8> %out, ptr %dest, align 8
83 define arm_aapcs_vfpcc void @vmovn16_trunc1_onesrc(<16 x i16> %src1, ptr %dest) {
84 ; CHECK-LABEL: vmovn16_trunc1_onesrc:
85 ; CHECK: @ %bb.0: @ %entry
86 ; CHECK-NEXT: vmovnt.i16 q0, q1
87 ; CHECK-NEXT: vstrw.32 q0, [r0]
90 %strided.vec = shufflevector <16 x i16> %src1, <16 x i16> undef, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
91 %out = trunc <16 x i16> %strided.vec to <16 x i8>
92 store <16 x i8> %out, ptr %dest, align 8
96 define arm_aapcs_vfpcc void @vmovn16_trunc2_onesrc(<16 x i16> %src1, ptr %dest) {
97 ; CHECK-LABEL: vmovn16_trunc2_onesrc:
98 ; CHECK: @ %bb.0: @ %entry
99 ; CHECK-NEXT: vmovnt.i16 q1, q0
100 ; CHECK-NEXT: vstrw.32 q1, [r0]
103 %strided.vec = shufflevector <16 x i16> %src1, <16 x i16> undef, <16 x i32> <i32 8, i32 0, i32 9, i32 1, i32 10, i32 2, i32 11, i32 3, i32 12, i32 4, i32 13, i32 5, i32 14, i32 6, i32 15, i32 7>
104 %out = trunc <16 x i16> %strided.vec to <16 x i8>
105 store <16 x i8> %out, ptr %dest, align 8
110 define arm_aapcs_vfpcc void @vmovn64_t1(<2 x i64> %src1, <2 x i64> %src2, ptr %dest) {
111 ; CHECK-LABEL: vmovn64_t1:
112 ; CHECK: @ %bb.0: @ %entry
113 ; CHECK-NEXT: vmov.f32 s2, s4
114 ; CHECK-NEXT: vmov.f32 s3, s5
115 ; CHECK-NEXT: vstrw.32 q0, [r0]
118 %out = shufflevector <2 x i64> %src1, <2 x i64> %src2, <2 x i32> <i32 0, i32 2>
119 store <2 x i64> %out, ptr %dest, align 8
123 define arm_aapcs_vfpcc void @vmovn64_t2(<2 x i64> %src1, <2 x i64> %src2, ptr %dest) {
124 ; CHECK-LABEL: vmovn64_t2:
125 ; CHECK: @ %bb.0: @ %entry
126 ; CHECK-NEXT: vmov.f32 s6, s0
127 ; CHECK-NEXT: vmov.f32 s7, s1
128 ; CHECK-NEXT: vstrw.32 q1, [r0]
131 %out = shufflevector <2 x i64> %src1, <2 x i64> %src2, <2 x i32> <i32 2, i32 0>
132 store <2 x i64> %out, ptr %dest, align 8
136 define arm_aapcs_vfpcc void @vmovn64_b1(<2 x i64> %src1, <2 x i64> %src2, ptr %dest) {
137 ; CHECK-LABEL: vmovn64_b1:
138 ; CHECK: @ %bb.0: @ %entry
139 ; CHECK-NEXT: vmov.f32 s2, s6
140 ; CHECK-NEXT: vmov.f32 s3, s7
141 ; CHECK-NEXT: vstrw.32 q0, [r0]
144 %out = shufflevector <2 x i64> %src1, <2 x i64> %src2, <2 x i32> <i32 0, i32 3>
145 store <2 x i64> %out, ptr %dest, align 8
149 define arm_aapcs_vfpcc void @vmovn64_b2(<2 x i64> %src1, <2 x i64> %src2, ptr %dest) {
150 ; CHECK-LABEL: vmovn64_b2:
151 ; CHECK: @ %bb.0: @ %entry
152 ; CHECK-NEXT: vmov.f32 s4, s6
153 ; CHECK-NEXT: vmov.f32 s6, s0
154 ; CHECK-NEXT: vmov.f32 s5, s7
155 ; CHECK-NEXT: vmov.f32 s7, s1
156 ; CHECK-NEXT: vstrw.32 q1, [r0]
159 %out = shufflevector <2 x i64> %src1, <2 x i64> %src2, <2 x i32> <i32 3, i32 0>
160 store <2 x i64> %out, ptr %dest, align 8
164 define arm_aapcs_vfpcc void @vmovn64_b3(<2 x i64> %src1, <2 x i64> %src2, ptr %dest) {
165 ; CHECK-LABEL: vmovn64_b3:
166 ; CHECK: @ %bb.0: @ %entry
167 ; CHECK-NEXT: vmov.f32 s0, s2
168 ; CHECK-NEXT: vmov.f32 s2, s4
169 ; CHECK-NEXT: vmov.f32 s1, s3
170 ; CHECK-NEXT: vmov.f32 s3, s5
171 ; CHECK-NEXT: vstrw.32 q0, [r0]
174 %out = shufflevector <2 x i64> %src1, <2 x i64> %src2, <2 x i32> <i32 1, i32 2>
175 store <2 x i64> %out, ptr %dest, align 8
179 define arm_aapcs_vfpcc void @vmovn64_b4(<2 x i64> %src1, <2 x i64> %src2, ptr %dest) {
180 ; CHECK-LABEL: vmovn64_b4:
181 ; CHECK: @ %bb.0: @ %entry
182 ; CHECK-NEXT: vmov.f32 s6, s2
183 ; CHECK-NEXT: vmov.f32 s7, s3
184 ; CHECK-NEXT: vstrw.32 q1, [r0]
187 %out = shufflevector <2 x i64> %src1, <2 x i64> %src2, <2 x i32> <i32 2, i32 1>
188 store <2 x i64> %out, ptr %dest, align 8
194 define arm_aapcs_vfpcc void @vmovn32_t1(<4 x i32> %src1, <4 x i32> %src2, ptr %dest) {
195 ; CHECK-LABEL: vmovn32_t1:
196 ; CHECK: @ %bb.0: @ %entry
197 ; CHECK-NEXT: vmov.f32 s1, s4
198 ; CHECK-NEXT: vmov.f32 s3, s6
199 ; CHECK-NEXT: vstrw.32 q0, [r0]
202 %out = shufflevector <4 x i32> %src1, <4 x i32> %src2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
203 store <4 x i32> %out, ptr %dest, align 8
207 define arm_aapcs_vfpcc void @vmovn32_t2(<4 x i32> %src1, <4 x i32> %src2, ptr %dest) {
208 ; CHECK-LABEL: vmovn32_t2:
209 ; CHECK: @ %bb.0: @ %entry
210 ; CHECK-NEXT: vmov.f32 s5, s0
211 ; CHECK-NEXT: vmov.f32 s7, s2
212 ; CHECK-NEXT: vstrw.32 q1, [r0]
215 %out = shufflevector <4 x i32> %src1, <4 x i32> %src2, <4 x i32> <i32 4, i32 0, i32 6, i32 2>
216 store <4 x i32> %out, ptr %dest, align 8
220 define arm_aapcs_vfpcc void @vmovn32_b1(<4 x i32> %src1, <4 x i32> %src2, ptr %dest) {
221 ; CHECK-LABEL: vmovn32_b1:
222 ; CHECK: @ %bb.0: @ %entry
223 ; CHECK-NEXT: vmov.f32 s1, s5
224 ; CHECK-NEXT: vmov.f32 s3, s7
225 ; CHECK-NEXT: vstrw.32 q0, [r0]
228 %out = shufflevector <4 x i32> %src1, <4 x i32> %src2, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
229 store <4 x i32> %out, ptr %dest, align 8
233 define arm_aapcs_vfpcc void @vmovn32_b2(<4 x i32> %src1, <4 x i32> %src2, ptr %dest) {
234 ; CHECK-LABEL: vmovn32_b2:
235 ; CHECK: @ %bb.0: @ %entry
236 ; CHECK-NEXT: vmov.f32 s4, s5
237 ; CHECK-NEXT: vmov.f32 s6, s7
238 ; CHECK-NEXT: vmov.f32 s5, s0
239 ; CHECK-NEXT: vmov.f32 s7, s2
240 ; CHECK-NEXT: vstrw.32 q1, [r0]
243 %out = shufflevector <4 x i32> %src1, <4 x i32> %src2, <4 x i32> <i32 5, i32 0, i32 7, i32 2>
244 store <4 x i32> %out, ptr %dest, align 8
248 define arm_aapcs_vfpcc void @vmovn32_b3(<4 x i32> %src1, <4 x i32> %src2, ptr %dest) {
249 ; CHECK-LABEL: vmovn32_b3:
250 ; CHECK: @ %bb.0: @ %entry
251 ; CHECK-NEXT: vmov.f32 s0, s1
252 ; CHECK-NEXT: vmov.f32 s2, s3
253 ; CHECK-NEXT: vmov.f32 s1, s4
254 ; CHECK-NEXT: vmov.f32 s3, s6
255 ; CHECK-NEXT: vstrw.32 q0, [r0]
258 %out = shufflevector <4 x i32> %src1, <4 x i32> %src2, <4 x i32> <i32 1, i32 4, i32 3, i32 6>
259 store <4 x i32> %out, ptr %dest, align 8
263 define arm_aapcs_vfpcc void @vmovn32_b4(<4 x i32> %src1, <4 x i32> %src2, ptr %dest) {
264 ; CHECK-LABEL: vmovn32_b4:
265 ; CHECK: @ %bb.0: @ %entry
266 ; CHECK-NEXT: vmov.f32 s5, s1
267 ; CHECK-NEXT: vmov.f32 s7, s3
268 ; CHECK-NEXT: vstrw.32 q1, [r0]
271 %out = shufflevector <4 x i32> %src1, <4 x i32> %src2, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
272 store <4 x i32> %out, ptr %dest, align 8
279 define arm_aapcs_vfpcc void @vmovn16_t1(<8 x i16> %src1, <8 x i16> %src2, ptr %dest) {
280 ; CHECK-LABEL: vmovn16_t1:
281 ; CHECK: @ %bb.0: @ %entry
282 ; CHECK-NEXT: vmovnt.i32 q0, q1
283 ; CHECK-NEXT: vstrw.32 q0, [r0]
286 %out = shufflevector <8 x i16> %src1, <8 x i16> %src2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
287 store <8 x i16> %out, ptr %dest, align 8
291 define arm_aapcs_vfpcc void @vmovn16_t2(<8 x i16> %src1, <8 x i16> %src2, ptr %dest) {
292 ; CHECK-LABEL: vmovn16_t2:
293 ; CHECK: @ %bb.0: @ %entry
294 ; CHECK-NEXT: vmovnt.i32 q1, q0
295 ; CHECK-NEXT: vstrw.32 q1, [r0]
298 %out = shufflevector <8 x i16> %src1, <8 x i16> %src2, <8 x i32> <i32 8, i32 0, i32 10, i32 2, i32 12, i32 4, i32 14, i32 6>
299 store <8 x i16> %out, ptr %dest, align 8
303 define arm_aapcs_vfpcc void @vmovn16_b1(<8 x i16> %src1, <8 x i16> %src2, ptr %dest) {
304 ; CHECK-LABEL: vmovn16_b1:
305 ; CHECK: @ %bb.0: @ %entry
306 ; CHECK-NEXT: vmovnb.i32 q1, q0
307 ; CHECK-NEXT: vstrw.32 q1, [r0]
310 %out = shufflevector <8 x i16> %src1, <8 x i16> %src2, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
311 store <8 x i16> %out, ptr %dest, align 8
315 define arm_aapcs_vfpcc void @vmovn16_b2(<8 x i16> %src1, <8 x i16> %src2, ptr %dest) {
316 ; CHECK-MVE-LABEL: vmovn16_b2:
317 ; CHECK-MVE: @ %bb.0: @ %entry
318 ; CHECK-MVE-NEXT: vmov.u16 r1, q1[1]
319 ; CHECK-MVE-NEXT: vmov.16 q2[0], r1
320 ; CHECK-MVE-NEXT: vmov.u16 r1, q0[0]
321 ; CHECK-MVE-NEXT: vmov.16 q2[1], r1
322 ; CHECK-MVE-NEXT: vmov.u16 r1, q1[3]
323 ; CHECK-MVE-NEXT: vmov.16 q2[2], r1
324 ; CHECK-MVE-NEXT: vmov.u16 r1, q0[2]
325 ; CHECK-MVE-NEXT: vmov.16 q2[3], r1
326 ; CHECK-MVE-NEXT: vmov.u16 r1, q1[5]
327 ; CHECK-MVE-NEXT: vmov.16 q2[4], r1
328 ; CHECK-MVE-NEXT: vmov.u16 r1, q0[4]
329 ; CHECK-MVE-NEXT: vmov.16 q2[5], r1
330 ; CHECK-MVE-NEXT: vmov.u16 r1, q1[7]
331 ; CHECK-MVE-NEXT: vmov.16 q2[6], r1
332 ; CHECK-MVE-NEXT: vmov.u16 r1, q0[6]
333 ; CHECK-MVE-NEXT: vmov.16 q2[7], r1
334 ; CHECK-MVE-NEXT: vstrw.32 q2, [r0]
335 ; CHECK-MVE-NEXT: bx lr
337 ; CHECK-MVEFP-LABEL: vmovn16_b2:
338 ; CHECK-MVEFP: @ %bb.0: @ %entry
339 ; CHECK-MVEFP-NEXT: vmovx.f16 s4, s4
340 ; CHECK-MVEFP-NEXT: vmovx.f16 s5, s5
341 ; CHECK-MVEFP-NEXT: vmovx.f16 s6, s6
342 ; CHECK-MVEFP-NEXT: vmovx.f16 s7, s7
343 ; CHECK-MVEFP-NEXT: vins.f16 s4, s0
344 ; CHECK-MVEFP-NEXT: vins.f16 s5, s1
345 ; CHECK-MVEFP-NEXT: vins.f16 s6, s2
346 ; CHECK-MVEFP-NEXT: vins.f16 s7, s3
347 ; CHECK-MVEFP-NEXT: vstrw.32 q1, [r0]
348 ; CHECK-MVEFP-NEXT: bx lr
350 %out = shufflevector <8 x i16> %src1, <8 x i16> %src2, <8 x i32> <i32 9, i32 0, i32 11, i32 2, i32 13, i32 4, i32 15, i32 6>
351 store <8 x i16> %out, ptr %dest, align 8
355 define arm_aapcs_vfpcc void @vmovn16_b3(<8 x i16> %src1, <8 x i16> %src2, ptr %dest) {
356 ; CHECK-MVE-LABEL: vmovn16_b3:
357 ; CHECK-MVE: @ %bb.0: @ %entry
358 ; CHECK-MVE-NEXT: vmov.u16 r1, q0[1]
359 ; CHECK-MVE-NEXT: vmov.16 q2[0], r1
360 ; CHECK-MVE-NEXT: vmov.u16 r1, q1[0]
361 ; CHECK-MVE-NEXT: vmov.16 q2[1], r1
362 ; CHECK-MVE-NEXT: vmov.u16 r1, q0[3]
363 ; CHECK-MVE-NEXT: vmov.16 q2[2], r1
364 ; CHECK-MVE-NEXT: vmov.u16 r1, q1[2]
365 ; CHECK-MVE-NEXT: vmov.16 q2[3], r1
366 ; CHECK-MVE-NEXT: vmov.u16 r1, q0[5]
367 ; CHECK-MVE-NEXT: vmov.16 q2[4], r1
368 ; CHECK-MVE-NEXT: vmov.u16 r1, q1[4]
369 ; CHECK-MVE-NEXT: vmov.16 q2[5], r1
370 ; CHECK-MVE-NEXT: vmov.u16 r1, q0[7]
371 ; CHECK-MVE-NEXT: vmov.16 q2[6], r1
372 ; CHECK-MVE-NEXT: vmov.u16 r1, q1[6]
373 ; CHECK-MVE-NEXT: vmov.16 q2[7], r1
374 ; CHECK-MVE-NEXT: vstrw.32 q2, [r0]
375 ; CHECK-MVE-NEXT: bx lr
377 ; CHECK-MVEFP-LABEL: vmovn16_b3:
378 ; CHECK-MVEFP: @ %bb.0: @ %entry
379 ; CHECK-MVEFP-NEXT: vmovx.f16 s0, s0
380 ; CHECK-MVEFP-NEXT: vmovx.f16 s1, s1
381 ; CHECK-MVEFP-NEXT: vmovx.f16 s2, s2
382 ; CHECK-MVEFP-NEXT: vmovx.f16 s3, s3
383 ; CHECK-MVEFP-NEXT: vins.f16 s0, s4
384 ; CHECK-MVEFP-NEXT: vins.f16 s1, s5
385 ; CHECK-MVEFP-NEXT: vins.f16 s2, s6
386 ; CHECK-MVEFP-NEXT: vins.f16 s3, s7
387 ; CHECK-MVEFP-NEXT: vstrw.32 q0, [r0]
388 ; CHECK-MVEFP-NEXT: bx lr
390 %out = shufflevector <8 x i16> %src1, <8 x i16> %src2, <8 x i32> <i32 1, i32 8, i32 3, i32 10, i32 5, i32 12, i32 7, i32 14>
391 store <8 x i16> %out, ptr %dest, align 8
395 define arm_aapcs_vfpcc void @vmovn16_b4(<8 x i16> %src1, <8 x i16> %src2, ptr %dest) {
396 ; CHECK-LABEL: vmovn16_b4:
397 ; CHECK: @ %bb.0: @ %entry
398 ; CHECK-NEXT: vmovnb.i32 q0, q1
399 ; CHECK-NEXT: vstrw.32 q0, [r0]
402 %out = shufflevector <8 x i16> %src1, <8 x i16> %src2, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7>
403 store <8 x i16> %out, ptr %dest, align 8
408 define arm_aapcs_vfpcc void @vmovn8_b1(<16 x i8> %src1, <16 x i8> %src2, ptr %dest) {
409 ; CHECK-LABEL: vmovn8_b1:
410 ; CHECK: @ %bb.0: @ %entry
411 ; CHECK-NEXT: vmovnt.i16 q0, q1
412 ; CHECK-NEXT: vstrw.32 q0, [r0]
415 %out = shufflevector <16 x i8> %src1, <16 x i8> %src2, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
416 store <16 x i8> %out, ptr %dest, align 8
420 define arm_aapcs_vfpcc void @vmovn8_b2(<16 x i8> %src1, <16 x i8> %src2, ptr %dest) {
421 ; CHECK-LABEL: vmovn8_b2:
422 ; CHECK: @ %bb.0: @ %entry
423 ; CHECK-NEXT: vmovnt.i16 q1, q0
424 ; CHECK-NEXT: vstrw.32 q1, [r0]
427 %out = shufflevector <16 x i8> %src1, <16 x i8> %src2, <16 x i32> <i32 16, i32 0, i32 18, i32 2, i32 20, i32 4, i32 22, i32 6, i32 24, i32 8, i32 26, i32 10, i32 28, i32 12, i32 30, i32 14>
428 store <16 x i8> %out, ptr %dest, align 8
432 define arm_aapcs_vfpcc void @vmovn8_t1(<16 x i8> %src1, <16 x i8> %src2, ptr %dest) {
433 ; CHECK-LABEL: vmovn8_t1:
434 ; CHECK: @ %bb.0: @ %entry
435 ; CHECK-NEXT: vmovnb.i16 q1, q0
436 ; CHECK-NEXT: vstrw.32 q1, [r0]
439 %out = shufflevector <16 x i8> %src1, <16 x i8> %src2, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
440 store <16 x i8> %out, ptr %dest, align 8
444 define arm_aapcs_vfpcc void @vmovn8_t2(<16 x i8> %src1, <16 x i8> %src2, ptr %dest) {
445 ; CHECK-LABEL: vmovn8_t2:
446 ; CHECK: @ %bb.0: @ %entry
447 ; CHECK-NEXT: vmov.u8 r1, q1[1]
448 ; CHECK-NEXT: vmov.8 q2[0], r1
449 ; CHECK-NEXT: vmov.u8 r1, q0[0]
450 ; CHECK-NEXT: vmov.8 q2[1], r1
451 ; CHECK-NEXT: vmov.u8 r1, q1[3]
452 ; CHECK-NEXT: vmov.8 q2[2], r1
453 ; CHECK-NEXT: vmov.u8 r1, q0[2]
454 ; CHECK-NEXT: vmov.8 q2[3], r1
455 ; CHECK-NEXT: vmov.u8 r1, q1[5]
456 ; CHECK-NEXT: vmov.8 q2[4], r1
457 ; CHECK-NEXT: vmov.u8 r1, q0[4]
458 ; CHECK-NEXT: vmov.8 q2[5], r1
459 ; CHECK-NEXT: vmov.u8 r1, q1[7]
460 ; CHECK-NEXT: vmov.8 q2[6], r1
461 ; CHECK-NEXT: vmov.u8 r1, q0[6]
462 ; CHECK-NEXT: vmov.8 q2[7], r1
463 ; CHECK-NEXT: vmov.u8 r1, q1[9]
464 ; CHECK-NEXT: vmov.8 q2[8], r1
465 ; CHECK-NEXT: vmov.u8 r1, q0[8]
466 ; CHECK-NEXT: vmov.8 q2[9], r1
467 ; CHECK-NEXT: vmov.u8 r1, q1[11]
468 ; CHECK-NEXT: vmov.8 q2[10], r1
469 ; CHECK-NEXT: vmov.u8 r1, q0[10]
470 ; CHECK-NEXT: vmov.8 q2[11], r1
471 ; CHECK-NEXT: vmov.u8 r1, q1[13]
472 ; CHECK-NEXT: vmov.8 q2[12], r1
473 ; CHECK-NEXT: vmov.u8 r1, q0[12]
474 ; CHECK-NEXT: vmov.8 q2[13], r1
475 ; CHECK-NEXT: vmov.u8 r1, q1[15]
476 ; CHECK-NEXT: vmov.8 q2[14], r1
477 ; CHECK-NEXT: vmov.u8 r1, q0[14]
478 ; CHECK-NEXT: vmov.8 q2[15], r1
479 ; CHECK-NEXT: vstrw.32 q2, [r0]
482 %out = shufflevector <16 x i8> %src1, <16 x i8> %src2, <16 x i32> <i32 17, i32 0, i32 19, i32 2, i32 21, i32 4, i32 23, i32 6, i32 25, i32 8, i32 27, i32 10, i32 29, i32 12, i32 31, i32 14>
483 store <16 x i8> %out, ptr %dest, align 8
487 define arm_aapcs_vfpcc void @vmovn8_t3(<16 x i8> %src1, <16 x i8> %src2, ptr %dest) {
488 ; CHECK-LABEL: vmovn8_t3:
489 ; CHECK: @ %bb.0: @ %entry
490 ; CHECK-NEXT: vmov.u8 r1, q0[1]
491 ; CHECK-NEXT: vmov.8 q2[0], r1
492 ; CHECK-NEXT: vmov.u8 r1, q1[0]
493 ; CHECK-NEXT: vmov.8 q2[1], r1
494 ; CHECK-NEXT: vmov.u8 r1, q0[3]
495 ; CHECK-NEXT: vmov.8 q2[2], r1
496 ; CHECK-NEXT: vmov.u8 r1, q1[2]
497 ; CHECK-NEXT: vmov.8 q2[3], r1
498 ; CHECK-NEXT: vmov.u8 r1, q0[5]
499 ; CHECK-NEXT: vmov.8 q2[4], r1
500 ; CHECK-NEXT: vmov.u8 r1, q1[4]
501 ; CHECK-NEXT: vmov.8 q2[5], r1
502 ; CHECK-NEXT: vmov.u8 r1, q0[7]
503 ; CHECK-NEXT: vmov.8 q2[6], r1
504 ; CHECK-NEXT: vmov.u8 r1, q1[6]
505 ; CHECK-NEXT: vmov.8 q2[7], r1
506 ; CHECK-NEXT: vmov.u8 r1, q0[9]
507 ; CHECK-NEXT: vmov.8 q2[8], r1
508 ; CHECK-NEXT: vmov.u8 r1, q1[8]
509 ; CHECK-NEXT: vmov.8 q2[9], r1
510 ; CHECK-NEXT: vmov.u8 r1, q0[11]
511 ; CHECK-NEXT: vmov.8 q2[10], r1
512 ; CHECK-NEXT: vmov.u8 r1, q1[10]
513 ; CHECK-NEXT: vmov.8 q2[11], r1
514 ; CHECK-NEXT: vmov.u8 r1, q0[13]
515 ; CHECK-NEXT: vmov.8 q2[12], r1
516 ; CHECK-NEXT: vmov.u8 r1, q1[12]
517 ; CHECK-NEXT: vmov.8 q2[13], r1
518 ; CHECK-NEXT: vmov.u8 r1, q0[15]
519 ; CHECK-NEXT: vmov.8 q2[14], r1
520 ; CHECK-NEXT: vmov.u8 r1, q1[14]
521 ; CHECK-NEXT: vmov.8 q2[15], r1
522 ; CHECK-NEXT: vstrw.32 q2, [r0]
525 %out = shufflevector <16 x i8> %src1, <16 x i8> %src2, <16 x i32> <i32 1, i32 16, i32 3, i32 18, i32 5, i32 20, i32 7, i32 22, i32 9, i32 24, i32 11, i32 26, i32 13, i32 28, i32 15, i32 30>
526 store <16 x i8> %out, ptr %dest, align 8
530 define arm_aapcs_vfpcc void @vmovn8_t4(<16 x i8> %src1, <16 x i8> %src2, ptr %dest) {
531 ; CHECK-LABEL: vmovn8_t4:
532 ; CHECK: @ %bb.0: @ %entry
533 ; CHECK-NEXT: vmovnb.i16 q0, q1
534 ; CHECK-NEXT: vstrw.32 q0, [r0]
537 %out = shufflevector <16 x i8> %src1, <16 x i8> %src2, <16 x i32> <i32 16, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 13, i32 30, i32 15>
538 store <16 x i8> %out, ptr %dest, align 8