1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s
4 define arm_aapcs_vfpcc <8 x i16> @vmovn32_trunc1(<4 x i32> %src1, <4 x i32> %src2) {
5 ; CHECK-LABEL: vmovn32_trunc1:
6 ; CHECK: @ %bb.0: @ %entry
7 ; CHECK-NEXT: vmovnt.i32 q0, q1
10 %strided.vec = shufflevector <4 x i32> %src1, <4 x i32> %src2, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
11 %out = trunc <8 x i32> %strided.vec to <8 x i16>
15 define arm_aapcs_vfpcc <8 x i16> @vmovn32_trunc2(<4 x i32> %src1, <4 x i32> %src2) {
16 ; CHECK-LABEL: vmovn32_trunc2:
17 ; CHECK: @ %bb.0: @ %entry
18 ; CHECK-NEXT: vmovnt.i32 q1, q0
19 ; CHECK-NEXT: vmov q0, q1
22 %strided.vec = shufflevector <4 x i32> %src1, <4 x i32> %src2, <8 x i32> <i32 4, i32 0, i32 5, i32 1, i32 6, i32 2, i32 7, i32 3>
23 %out = trunc <8 x i32> %strided.vec to <8 x i16>
27 define arm_aapcs_vfpcc <16 x i8> @vmovn16_trunc1(<8 x i16> %src1, <8 x i16> %src2) {
28 ; CHECK-LABEL: vmovn16_trunc1:
29 ; CHECK: @ %bb.0: @ %entry
30 ; CHECK-NEXT: vmovnt.i16 q0, q1
33 %strided.vec = shufflevector <8 x i16> %src1, <8 x i16> %src2, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
34 %out = trunc <16 x i16> %strided.vec to <16 x i8>
38 define arm_aapcs_vfpcc <16 x i8> @vmovn16_trunc2(<8 x i16> %src1, <8 x i16> %src2) {
39 ; CHECK-LABEL: vmovn16_trunc2:
40 ; CHECK: @ %bb.0: @ %entry
41 ; CHECK-NEXT: vmovnt.i16 q1, q0
42 ; CHECK-NEXT: vmov q0, q1
45 %strided.vec = shufflevector <8 x i16> %src1, <8 x i16> %src2, <16 x i32> <i32 8, i32 0, i32 9, i32 1, i32 10, i32 2, i32 11, i32 3, i32 12, i32 4, i32 13, i32 5, i32 14, i32 6, i32 15, i32 7>
46 %out = trunc <16 x i16> %strided.vec to <16 x i8>
51 define arm_aapcs_vfpcc <2 x i64> @vmovn64_t1(<2 x i64> %src1, <2 x i64> %src2) {
52 ; CHECK-LABEL: vmovn64_t1:
53 ; CHECK: @ %bb.0: @ %entry
54 ; CHECK-NEXT: vmov.f32 s2, s4
55 ; CHECK-NEXT: vmov.f32 s3, s5
58 %out = shufflevector <2 x i64> %src1, <2 x i64> %src2, <2 x i32> <i32 0, i32 2>
62 define arm_aapcs_vfpcc <2 x i64> @vmovn64_t2(<2 x i64> %src1, <2 x i64> %src2) {
63 ; CHECK-LABEL: vmovn64_t2:
64 ; CHECK: @ %bb.0: @ %entry
65 ; CHECK-NEXT: vmov.f32 s6, s0
66 ; CHECK-NEXT: vmov.f32 s7, s1
67 ; CHECK-NEXT: vmov q0, q1
70 %out = shufflevector <2 x i64> %src1, <2 x i64> %src2, <2 x i32> <i32 2, i32 0>
74 define arm_aapcs_vfpcc <2 x i64> @vmovn64_b1(<2 x i64> %src1, <2 x i64> %src2) {
75 ; CHECK-LABEL: vmovn64_b1:
76 ; CHECK: @ %bb.0: @ %entry
77 ; CHECK-NEXT: vmov.f32 s2, s6
78 ; CHECK-NEXT: vmov.f32 s3, s7
81 %out = shufflevector <2 x i64> %src1, <2 x i64> %src2, <2 x i32> <i32 0, i32 3>
85 define arm_aapcs_vfpcc <2 x i64> @vmovn64_b2(<2 x i64> %src1, <2 x i64> %src2) {
86 ; CHECK-LABEL: vmovn64_b2:
87 ; CHECK: @ %bb.0: @ %entry
88 ; CHECK-NEXT: vmov.f32 s4, s6
89 ; CHECK-NEXT: vmov.f32 s5, s7
90 ; CHECK-NEXT: vmov.f32 s6, s0
91 ; CHECK-NEXT: vmov.f32 s7, s1
92 ; CHECK-NEXT: vmov q0, q1
95 %out = shufflevector <2 x i64> %src1, <2 x i64> %src2, <2 x i32> <i32 3, i32 0>
99 define arm_aapcs_vfpcc <2 x i64> @vmovn64_b3(<2 x i64> %src1, <2 x i64> %src2) {
100 ; CHECK-LABEL: vmovn64_b3:
101 ; CHECK: @ %bb.0: @ %entry
102 ; CHECK-NEXT: vmov.f32 s0, s2
103 ; CHECK-NEXT: vmov.f32 s1, s3
104 ; CHECK-NEXT: vmov.f32 s2, s4
105 ; CHECK-NEXT: vmov.f32 s3, s5
108 %out = shufflevector <2 x i64> %src1, <2 x i64> %src2, <2 x i32> <i32 1, i32 2>
112 define arm_aapcs_vfpcc <2 x i64> @vmovn64_b4(<2 x i64> %src1, <2 x i64> %src2) {
113 ; CHECK-LABEL: vmovn64_b4:
114 ; CHECK: @ %bb.0: @ %entry
115 ; CHECK-NEXT: vmov.f32 s6, s2
116 ; CHECK-NEXT: vmov.f32 s7, s3
117 ; CHECK-NEXT: vmov q0, q1
120 %out = shufflevector <2 x i64> %src1, <2 x i64> %src2, <2 x i32> <i32 2, i32 1>
126 define arm_aapcs_vfpcc <4 x i32> @vmovn32_t1(<4 x i32> %src1, <4 x i32> %src2) {
127 ; CHECK-LABEL: vmovn32_t1:
128 ; CHECK: @ %bb.0: @ %entry
129 ; CHECK-NEXT: vmov.f32 s1, s4
130 ; CHECK-NEXT: vmov.f32 s3, s6
133 %out = shufflevector <4 x i32> %src1, <4 x i32> %src2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
137 define arm_aapcs_vfpcc <4 x i32> @vmovn32_t2(<4 x i32> %src1, <4 x i32> %src2) {
138 ; CHECK-LABEL: vmovn32_t2:
139 ; CHECK: @ %bb.0: @ %entry
140 ; CHECK-NEXT: vmov.f32 s5, s0
141 ; CHECK-NEXT: vmov.f32 s7, s2
142 ; CHECK-NEXT: vmov q0, q1
145 %out = shufflevector <4 x i32> %src1, <4 x i32> %src2, <4 x i32> <i32 4, i32 0, i32 6, i32 2>
149 define arm_aapcs_vfpcc <4 x i32> @vmovn32_b1(<4 x i32> %src1, <4 x i32> %src2) {
150 ; CHECK-LABEL: vmovn32_b1:
151 ; CHECK: @ %bb.0: @ %entry
152 ; CHECK-NEXT: vmov.f32 s1, s5
153 ; CHECK-NEXT: vmov.f32 s3, s7
156 %out = shufflevector <4 x i32> %src1, <4 x i32> %src2, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
160 define arm_aapcs_vfpcc <4 x i32> @vmovn32_b2(<4 x i32> %src1, <4 x i32> %src2) {
161 ; CHECK-LABEL: vmovn32_b2:
162 ; CHECK: @ %bb.0: @ %entry
163 ; CHECK-NEXT: vmov.f32 s8, s5
164 ; CHECK-NEXT: vmov.f32 s9, s0
165 ; CHECK-NEXT: vmov.f32 s10, s7
166 ; CHECK-NEXT: vmov.f32 s11, s2
167 ; CHECK-NEXT: vmov q0, q2
170 %out = shufflevector <4 x i32> %src1, <4 x i32> %src2, <4 x i32> <i32 5, i32 0, i32 7, i32 2>
174 define arm_aapcs_vfpcc <4 x i32> @vmovn32_b3(<4 x i32> %src1, <4 x i32> %src2) {
175 ; CHECK-LABEL: vmovn32_b3:
176 ; CHECK: @ %bb.0: @ %entry
177 ; CHECK-NEXT: vmov.f32 s8, s1
178 ; CHECK-NEXT: vmov.f32 s9, s4
179 ; CHECK-NEXT: vmov.f32 s10, s3
180 ; CHECK-NEXT: vmov.f32 s11, s6
181 ; CHECK-NEXT: vmov q0, q2
184 %out = shufflevector <4 x i32> %src1, <4 x i32> %src2, <4 x i32> <i32 1, i32 4, i32 3, i32 6>
188 define arm_aapcs_vfpcc <4 x i32> @vmovn32_b4(<4 x i32> %src1, <4 x i32> %src2) {
189 ; CHECK-LABEL: vmovn32_b4:
190 ; CHECK: @ %bb.0: @ %entry
191 ; CHECK-NEXT: vmov.f32 s5, s1
192 ; CHECK-NEXT: vmov.f32 s7, s3
193 ; CHECK-NEXT: vmov q0, q1
196 %out = shufflevector <4 x i32> %src1, <4 x i32> %src2, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
203 define arm_aapcs_vfpcc <8 x i16> @vmovn16_t1(<8 x i16> %src1, <8 x i16> %src2) {
204 ; CHECK-LABEL: vmovn16_t1:
205 ; CHECK: @ %bb.0: @ %entry
206 ; CHECK-NEXT: vmovnt.i32 q0, q1
209 %out = shufflevector <8 x i16> %src1, <8 x i16> %src2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
213 define arm_aapcs_vfpcc <8 x i16> @vmovn16_t2(<8 x i16> %src1, <8 x i16> %src2) {
214 ; CHECK-LABEL: vmovn16_t2:
215 ; CHECK: @ %bb.0: @ %entry
216 ; CHECK-NEXT: vmovnt.i32 q1, q0
217 ; CHECK-NEXT: vmov q0, q1
220 %out = shufflevector <8 x i16> %src1, <8 x i16> %src2, <8 x i32> <i32 8, i32 0, i32 10, i32 2, i32 12, i32 4, i32 14, i32 6>
224 define arm_aapcs_vfpcc <8 x i16> @vmovn16_b1(<8 x i16> %src1, <8 x i16> %src2) {
225 ; CHECK-LABEL: vmovn16_b1:
226 ; CHECK: @ %bb.0: @ %entry
227 ; CHECK-NEXT: vmovnb.i32 q1, q0
228 ; CHECK-NEXT: vmov q0, q1
231 %out = shufflevector <8 x i16> %src1, <8 x i16> %src2, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
235 define arm_aapcs_vfpcc <8 x i16> @vmovn16_b2(<8 x i16> %src1, <8 x i16> %src2) {
236 ; CHECK-LABEL: vmovn16_b2:
237 ; CHECK: @ %bb.0: @ %entry
238 ; CHECK-NEXT: vmov q2, q0
239 ; CHECK-NEXT: vmov.u16 r0, q1[1]
240 ; CHECK-NEXT: vmov.16 q0[0], r0
241 ; CHECK-NEXT: vmov.u16 r0, q2[0]
242 ; CHECK-NEXT: vmov.16 q0[1], r0
243 ; CHECK-NEXT: vmov.u16 r0, q1[3]
244 ; CHECK-NEXT: vmov.16 q0[2], r0
245 ; CHECK-NEXT: vmov.u16 r0, q2[2]
246 ; CHECK-NEXT: vmov.16 q0[3], r0
247 ; CHECK-NEXT: vmov.u16 r0, q1[5]
248 ; CHECK-NEXT: vmov.16 q0[4], r0
249 ; CHECK-NEXT: vmov.u16 r0, q2[4]
250 ; CHECK-NEXT: vmov.16 q0[5], r0
251 ; CHECK-NEXT: vmov.u16 r0, q1[7]
252 ; CHECK-NEXT: vmov.16 q0[6], r0
253 ; CHECK-NEXT: vmov.u16 r0, q2[6]
254 ; CHECK-NEXT: vmov.16 q0[7], r0
257 %out = shufflevector <8 x i16> %src1, <8 x i16> %src2, <8 x i32> <i32 9, i32 0, i32 11, i32 2, i32 13, i32 4, i32 15, i32 6>
261 define arm_aapcs_vfpcc <8 x i16> @vmovn16_b3(<8 x i16> %src1, <8 x i16> %src2) {
262 ; CHECK-LABEL: vmovn16_b3:
263 ; CHECK: @ %bb.0: @ %entry
264 ; CHECK-NEXT: vmov.u16 r0, q0[1]
265 ; CHECK-NEXT: vmov q2, q0
266 ; CHECK-NEXT: vmov.16 q0[0], r0
267 ; CHECK-NEXT: vmov.u16 r0, q1[0]
268 ; CHECK-NEXT: vmov.16 q0[1], r0
269 ; CHECK-NEXT: vmov.u16 r0, q2[3]
270 ; CHECK-NEXT: vmov.16 q0[2], r0
271 ; CHECK-NEXT: vmov.u16 r0, q1[2]
272 ; CHECK-NEXT: vmov.16 q0[3], r0
273 ; CHECK-NEXT: vmov.u16 r0, q2[5]
274 ; CHECK-NEXT: vmov.16 q0[4], r0
275 ; CHECK-NEXT: vmov.u16 r0, q1[4]
276 ; CHECK-NEXT: vmov.16 q0[5], r0
277 ; CHECK-NEXT: vmov.u16 r0, q2[7]
278 ; CHECK-NEXT: vmov.16 q0[6], r0
279 ; CHECK-NEXT: vmov.u16 r0, q1[6]
280 ; CHECK-NEXT: vmov.16 q0[7], r0
283 %out = shufflevector <8 x i16> %src1, <8 x i16> %src2, <8 x i32> <i32 1, i32 8, i32 3, i32 10, i32 5, i32 12, i32 7, i32 14>
287 define arm_aapcs_vfpcc <8 x i16> @vmovn16_b4(<8 x i16> %src1, <8 x i16> %src2) {
288 ; CHECK-LABEL: vmovn16_b4:
289 ; CHECK: @ %bb.0: @ %entry
290 ; CHECK-NEXT: vmovnb.i32 q0, q1
293 %out = shufflevector <8 x i16> %src1, <8 x i16> %src2, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7>
298 define arm_aapcs_vfpcc <16 x i8> @vmovn8_b1(<16 x i8> %src1, <16 x i8> %src2) {
299 ; CHECK-LABEL: vmovn8_b1:
300 ; CHECK: @ %bb.0: @ %entry
301 ; CHECK-NEXT: vmovnt.i16 q0, q1
304 %out = shufflevector <16 x i8> %src1, <16 x i8> %src2, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
308 define arm_aapcs_vfpcc <16 x i8> @vmovn8_b2(<16 x i8> %src1, <16 x i8> %src2) {
309 ; CHECK-LABEL: vmovn8_b2:
310 ; CHECK: @ %bb.0: @ %entry
311 ; CHECK-NEXT: vmovnt.i16 q1, q0
312 ; CHECK-NEXT: vmov q0, q1
315 %out = shufflevector <16 x i8> %src1, <16 x i8> %src2, <16 x i32> <i32 16, i32 0, i32 18, i32 2, i32 20, i32 4, i32 22, i32 6, i32 24, i32 8, i32 26, i32 10, i32 28, i32 12, i32 30, i32 14>
319 define arm_aapcs_vfpcc <16 x i8> @vmovn8_t1(<16 x i8> %src1, <16 x i8> %src2) {
320 ; CHECK-LABEL: vmovn8_t1:
321 ; CHECK: @ %bb.0: @ %entry
322 ; CHECK-NEXT: vmovnb.i16 q1, q0
323 ; CHECK-NEXT: vmov q0, q1
326 %out = shufflevector <16 x i8> %src1, <16 x i8> %src2, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
330 define arm_aapcs_vfpcc <16 x i8> @vmovn8_t2(<16 x i8> %src1, <16 x i8> %src2) {
331 ; CHECK-LABEL: vmovn8_t2:
332 ; CHECK: @ %bb.0: @ %entry
333 ; CHECK-NEXT: vmov q2, q0
334 ; CHECK-NEXT: vmov.u8 r0, q1[1]
335 ; CHECK-NEXT: vmov.8 q0[0], r0
336 ; CHECK-NEXT: vmov.u8 r0, q2[0]
337 ; CHECK-NEXT: vmov.8 q0[1], r0
338 ; CHECK-NEXT: vmov.u8 r0, q1[3]
339 ; CHECK-NEXT: vmov.8 q0[2], r0
340 ; CHECK-NEXT: vmov.u8 r0, q2[2]
341 ; CHECK-NEXT: vmov.8 q0[3], r0
342 ; CHECK-NEXT: vmov.u8 r0, q1[5]
343 ; CHECK-NEXT: vmov.8 q0[4], r0
344 ; CHECK-NEXT: vmov.u8 r0, q2[4]
345 ; CHECK-NEXT: vmov.8 q0[5], r0
346 ; CHECK-NEXT: vmov.u8 r0, q1[7]
347 ; CHECK-NEXT: vmov.8 q0[6], r0
348 ; CHECK-NEXT: vmov.u8 r0, q2[6]
349 ; CHECK-NEXT: vmov.8 q0[7], r0
350 ; CHECK-NEXT: vmov.u8 r0, q1[9]
351 ; CHECK-NEXT: vmov.8 q0[8], r0
352 ; CHECK-NEXT: vmov.u8 r0, q2[8]
353 ; CHECK-NEXT: vmov.8 q0[9], r0
354 ; CHECK-NEXT: vmov.u8 r0, q1[11]
355 ; CHECK-NEXT: vmov.8 q0[10], r0
356 ; CHECK-NEXT: vmov.u8 r0, q2[10]
357 ; CHECK-NEXT: vmov.8 q0[11], r0
358 ; CHECK-NEXT: vmov.u8 r0, q1[13]
359 ; CHECK-NEXT: vmov.8 q0[12], r0
360 ; CHECK-NEXT: vmov.u8 r0, q2[12]
361 ; CHECK-NEXT: vmov.8 q0[13], r0
362 ; CHECK-NEXT: vmov.u8 r0, q1[15]
363 ; CHECK-NEXT: vmov.8 q0[14], r0
364 ; CHECK-NEXT: vmov.u8 r0, q2[14]
365 ; CHECK-NEXT: vmov.8 q0[15], r0
368 %out = shufflevector <16 x i8> %src1, <16 x i8> %src2, <16 x i32> <i32 17, i32 0, i32 19, i32 2, i32 21, i32 4, i32 23, i32 6, i32 25, i32 8, i32 27, i32 10, i32 29, i32 12, i32 31, i32 14>
372 define arm_aapcs_vfpcc <16 x i8> @vmovn8_t3(<16 x i8> %src1, <16 x i8> %src2) {
373 ; CHECK-LABEL: vmovn8_t3:
374 ; CHECK: @ %bb.0: @ %entry
375 ; CHECK-NEXT: vmov.u8 r0, q0[1]
376 ; CHECK-NEXT: vmov q2, q0
377 ; CHECK-NEXT: vmov.8 q0[0], r0
378 ; CHECK-NEXT: vmov.u8 r0, q1[0]
379 ; CHECK-NEXT: vmov.8 q0[1], r0
380 ; CHECK-NEXT: vmov.u8 r0, q2[3]
381 ; CHECK-NEXT: vmov.8 q0[2], r0
382 ; CHECK-NEXT: vmov.u8 r0, q1[2]
383 ; CHECK-NEXT: vmov.8 q0[3], r0
384 ; CHECK-NEXT: vmov.u8 r0, q2[5]
385 ; CHECK-NEXT: vmov.8 q0[4], r0
386 ; CHECK-NEXT: vmov.u8 r0, q1[4]
387 ; CHECK-NEXT: vmov.8 q0[5], r0
388 ; CHECK-NEXT: vmov.u8 r0, q2[7]
389 ; CHECK-NEXT: vmov.8 q0[6], r0
390 ; CHECK-NEXT: vmov.u8 r0, q1[6]
391 ; CHECK-NEXT: vmov.8 q0[7], r0
392 ; CHECK-NEXT: vmov.u8 r0, q2[9]
393 ; CHECK-NEXT: vmov.8 q0[8], r0
394 ; CHECK-NEXT: vmov.u8 r0, q1[8]
395 ; CHECK-NEXT: vmov.8 q0[9], r0
396 ; CHECK-NEXT: vmov.u8 r0, q2[11]
397 ; CHECK-NEXT: vmov.8 q0[10], r0
398 ; CHECK-NEXT: vmov.u8 r0, q1[10]
399 ; CHECK-NEXT: vmov.8 q0[11], r0
400 ; CHECK-NEXT: vmov.u8 r0, q2[13]
401 ; CHECK-NEXT: vmov.8 q0[12], r0
402 ; CHECK-NEXT: vmov.u8 r0, q1[12]
403 ; CHECK-NEXT: vmov.8 q0[13], r0
404 ; CHECK-NEXT: vmov.u8 r0, q2[15]
405 ; CHECK-NEXT: vmov.8 q0[14], r0
406 ; CHECK-NEXT: vmov.u8 r0, q1[14]
407 ; CHECK-NEXT: vmov.8 q0[15], r0
410 %out = shufflevector <16 x i8> %src1, <16 x i8> %src2, <16 x i32> <i32 1, i32 16, i32 3, i32 18, i32 5, i32 20, i32 7, i32 22, i32 9, i32 24, i32 11, i32 26, i32 13, i32 28, i32 15, i32 30>
414 define arm_aapcs_vfpcc <16 x i8> @vmovn8_t4(<16 x i8> %src1, <16 x i8> %src2) {
415 ; CHECK-LABEL: vmovn8_t4:
416 ; CHECK: @ %bb.0: @ %entry
417 ; CHECK-NEXT: vmovnb.i16 q0, q1
420 %out = shufflevector <16 x i8> %src1, <16 x i8> %src2, <16 x i32> <i32 16, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 13, i32 30, i32 15>