1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-MVE
3 ; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-MVEFP
5 define arm_aapcs_vfpcc <4 x i32> @shuffle1_i32(<4 x i32> %src) {
6 ; CHECK-LABEL: shuffle1_i32:
7 ; CHECK: @ %bb.0: @ %entry
8 ; CHECK-NEXT: vmov.f32 s4, s3
9 ; CHECK-NEXT: vmov.f32 s5, s2
10 ; CHECK-NEXT: vmov.f32 s6, s1
11 ; CHECK-NEXT: vmov.f32 s7, s0
12 ; CHECK-NEXT: vmov q0, q1
15 %out = shufflevector <4 x i32> %src, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
19 define arm_aapcs_vfpcc <4 x i32> @shuffle2_i32(<4 x i32> %src) {
20 ; CHECK-LABEL: shuffle2_i32:
21 ; CHECK: @ %bb.0: @ %entry
24 %out = shufflevector <4 x i32> %src, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
28 define arm_aapcs_vfpcc <4 x i32> @shuffle3_i32(<4 x i32> %src) {
29 ; CHECK-LABEL: shuffle3_i32:
30 ; CHECK: @ %bb.0: @ %entry
31 ; CHECK-NEXT: vmov.f32 s4, s3
32 ; CHECK-NEXT: vmov.f32 s5, s1
33 ; CHECK-NEXT: vmov.f32 s6, s2
34 ; CHECK-NEXT: vmov.f32 s7, s0
35 ; CHECK-NEXT: vmov q0, q1
38 %out = shufflevector <4 x i32> %src, <4 x i32> undef, <4 x i32> <i32 3, i32 1, i32 2, i32 0>
42 define arm_aapcs_vfpcc <4 x i32> @shuffle5_i32(<4 x i32> %src) {
43 ; CHECK-LABEL: shuffle5_i32:
44 ; CHECK: @ %bb.0: @ %entry
45 ; CHECK-NEXT: vrev64.32 q1, q0
46 ; CHECK-NEXT: vmov q0, q1
49 %out = shufflevector <4 x i32> %src, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
53 define arm_aapcs_vfpcc <4 x i32> @shuffle6_i32(<4 x i32> %src) {
54 ; CHECK-LABEL: shuffle6_i32:
55 ; CHECK: @ %bb.0: @ %entry
58 %out = shufflevector <4 x i32> %src, <4 x i32> undef, <4 x i32> <i32 undef, i32 undef, i32 undef, i32 3>
62 define arm_aapcs_vfpcc <8 x i16> @shuffle1_i16(<8 x i16> %src) {
63 ; CHECK-LABEL: shuffle1_i16:
64 ; CHECK: @ %bb.0: @ %entry
65 ; CHECK-NEXT: vmov q1, q0
66 ; CHECK-NEXT: vmov.u16 r0, q0[7]
67 ; CHECK-NEXT: vmov.16 q0[0], r0
68 ; CHECK-NEXT: vmov.u16 r0, q1[6]
69 ; CHECK-NEXT: vmov.16 q0[1], r0
70 ; CHECK-NEXT: vmov.u16 r0, q1[5]
71 ; CHECK-NEXT: vmov.16 q0[2], r0
72 ; CHECK-NEXT: vmov.u16 r0, q1[4]
73 ; CHECK-NEXT: vmov.16 q0[3], r0
74 ; CHECK-NEXT: vmov.u16 r0, q1[3]
75 ; CHECK-NEXT: vmov.16 q0[4], r0
76 ; CHECK-NEXT: vmov.u16 r0, q1[2]
77 ; CHECK-NEXT: vmov.16 q0[5], r0
78 ; CHECK-NEXT: vmov.u16 r0, q1[1]
79 ; CHECK-NEXT: vmov.16 q0[6], r0
80 ; CHECK-NEXT: vmov.u16 r0, q1[0]
81 ; CHECK-NEXT: vmov.16 q0[7], r0
84 %out = shufflevector <8 x i16> %src, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
88 define arm_aapcs_vfpcc <8 x i16> @shuffle2_i16(<8 x i16> %src) {
89 ; CHECK-LABEL: shuffle2_i16:
90 ; CHECK: @ %bb.0: @ %entry
93 %out = shufflevector <8 x i16> %src, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
97 define arm_aapcs_vfpcc <8 x i16> @shuffle3_i16(<8 x i16> %src) {
98 ; CHECK-LABEL: shuffle3_i16:
99 ; CHECK: @ %bb.0: @ %entry
100 ; CHECK-NEXT: vmov q1, q0
101 ; CHECK-NEXT: vmov.u16 r0, q0[4]
102 ; CHECK-NEXT: vmov.16 q0[0], r0
103 ; CHECK-NEXT: vmov.u16 r0, q1[5]
104 ; CHECK-NEXT: vmov.16 q0[1], r0
105 ; CHECK-NEXT: vmov.u16 r0, q1[7]
106 ; CHECK-NEXT: vmov.16 q0[2], r0
107 ; CHECK-NEXT: vmov.u16 r0, q1[6]
108 ; CHECK-NEXT: vmov.16 q0[3], r0
109 ; CHECK-NEXT: vmov.u16 r0, q1[3]
110 ; CHECK-NEXT: vmov.16 q0[4], r0
111 ; CHECK-NEXT: vmov.u16 r0, q1[1]
112 ; CHECK-NEXT: vmov.16 q0[5], r0
113 ; CHECK-NEXT: vmov.u16 r0, q1[2]
114 ; CHECK-NEXT: vmov.16 q0[6], r0
115 ; CHECK-NEXT: vmov.u16 r0, q1[0]
116 ; CHECK-NEXT: vmov.16 q0[7], r0
119 %out = shufflevector <8 x i16> %src, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 7, i32 6, i32 3, i32 1, i32 2, i32 0>
123 define arm_aapcs_vfpcc <8 x i16> @shuffle5_i16(<8 x i16> %src) {
124 ; CHECK-LABEL: shuffle5_i16:
125 ; CHECK: @ %bb.0: @ %entry
126 ; CHECK-NEXT: vrev64.16 q1, q0
127 ; CHECK-NEXT: vmov q0, q1
130 %out = shufflevector <8 x i16> %src, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
134 define arm_aapcs_vfpcc <8 x i16> @shuffle6_i16(<8 x i16> %src) {
135 ; CHECK-LABEL: shuffle6_i16:
136 ; CHECK: @ %bb.0: @ %entry
137 ; CHECK-NEXT: vrev32.16 q0, q0
140 %out = shufflevector <8 x i16> %src, <8 x i16> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
144 define arm_aapcs_vfpcc <16 x i8> @shuffle1_i8(<16 x i8> %src) {
145 ; CHECK-LABEL: shuffle1_i8:
146 ; CHECK: @ %bb.0: @ %entry
147 ; CHECK-NEXT: vmov q1, q0
148 ; CHECK-NEXT: vmov.u8 r0, q0[15]
149 ; CHECK-NEXT: vmov.8 q0[0], r0
150 ; CHECK-NEXT: vmov.u8 r0, q1[14]
151 ; CHECK-NEXT: vmov.8 q0[1], r0
152 ; CHECK-NEXT: vmov.u8 r0, q1[13]
153 ; CHECK-NEXT: vmov.8 q0[2], r0
154 ; CHECK-NEXT: vmov.u8 r0, q1[12]
155 ; CHECK-NEXT: vmov.8 q0[3], r0
156 ; CHECK-NEXT: vmov.u8 r0, q1[11]
157 ; CHECK-NEXT: vmov.8 q0[4], r0
158 ; CHECK-NEXT: vmov.u8 r0, q1[10]
159 ; CHECK-NEXT: vmov.8 q0[5], r0
160 ; CHECK-NEXT: vmov.u8 r0, q1[9]
161 ; CHECK-NEXT: vmov.8 q0[6], r0
162 ; CHECK-NEXT: vmov.u8 r0, q1[8]
163 ; CHECK-NEXT: vmov.8 q0[7], r0
164 ; CHECK-NEXT: vmov.u8 r0, q1[7]
165 ; CHECK-NEXT: vmov.8 q0[8], r0
166 ; CHECK-NEXT: vmov.u8 r0, q1[6]
167 ; CHECK-NEXT: vmov.8 q0[9], r0
168 ; CHECK-NEXT: vmov.u8 r0, q1[5]
169 ; CHECK-NEXT: vmov.8 q0[10], r0
170 ; CHECK-NEXT: vmov.u8 r0, q1[4]
171 ; CHECK-NEXT: vmov.8 q0[11], r0
172 ; CHECK-NEXT: vmov.u8 r0, q1[3]
173 ; CHECK-NEXT: vmov.8 q0[12], r0
174 ; CHECK-NEXT: vmov.u8 r0, q1[2]
175 ; CHECK-NEXT: vmov.8 q0[13], r0
176 ; CHECK-NEXT: vmov.u8 r0, q1[1]
177 ; CHECK-NEXT: vmov.8 q0[14], r0
178 ; CHECK-NEXT: vmov.u8 r0, q1[0]
179 ; CHECK-NEXT: vmov.8 q0[15], r0
182 %out = shufflevector <16 x i8> %src, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
186 define arm_aapcs_vfpcc <16 x i8> @shuffle2_i8(<16 x i8> %src) {
187 ; CHECK-LABEL: shuffle2_i8:
188 ; CHECK: @ %bb.0: @ %entry
191 %out = shufflevector <16 x i8> %src, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
195 define arm_aapcs_vfpcc <16 x i8> @shuffle3_i8(<16 x i8> %src) {
196 ; CHECK-LABEL: shuffle3_i8:
197 ; CHECK: @ %bb.0: @ %entry
198 ; CHECK-NEXT: vmov q1, q0
199 ; CHECK-NEXT: vmov.u8 r0, q0[4]
200 ; CHECK-NEXT: vmov.8 q0[0], r0
201 ; CHECK-NEXT: vmov.u8 r0, q1[5]
202 ; CHECK-NEXT: vmov.8 q0[1], r0
203 ; CHECK-NEXT: vmov.u8 r0, q1[15]
204 ; CHECK-NEXT: vmov.8 q0[2], r0
205 ; CHECK-NEXT: vmov.u8 r0, q1[7]
206 ; CHECK-NEXT: vmov.8 q0[3], r0
207 ; CHECK-NEXT: vmov.u8 r0, q1[14]
208 ; CHECK-NEXT: vmov.8 q0[4], r0
209 ; CHECK-NEXT: vmov.u8 r0, q1[9]
210 ; CHECK-NEXT: vmov.8 q0[5], r0
211 ; CHECK-NEXT: vmov.u8 r0, q1[6]
212 ; CHECK-NEXT: vmov.8 q0[6], r0
213 ; CHECK-NEXT: vmov.u8 r0, q1[3]
214 ; CHECK-NEXT: vmov.8 q0[7], r0
215 ; CHECK-NEXT: vmov.u8 r0, q1[10]
216 ; CHECK-NEXT: vmov.8 q0[8], r0
217 ; CHECK-NEXT: vmov.u8 r0, q1[12]
218 ; CHECK-NEXT: vmov.8 q0[9], r0
219 ; CHECK-NEXT: vmov.u8 r0, q1[1]
220 ; CHECK-NEXT: vmov.8 q0[10], r0
221 ; CHECK-NEXT: vmov.u8 r0, q1[13]
222 ; CHECK-NEXT: vmov.8 q0[11], r0
223 ; CHECK-NEXT: vmov.u8 r0, q1[2]
224 ; CHECK-NEXT: vmov.8 q0[12], r0
225 ; CHECK-NEXT: vmov.u8 r0, q1[8]
226 ; CHECK-NEXT: vmov.8 q0[13], r0
227 ; CHECK-NEXT: vmov.u8 r0, q1[0]
228 ; CHECK-NEXT: vmov.8 q0[14], r0
229 ; CHECK-NEXT: vmov.u8 r0, q1[11]
230 ; CHECK-NEXT: vmov.8 q0[15], r0
233 %out = shufflevector <16 x i8> %src, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 15, i32 7, i32 14, i32 9, i32 6, i32 3, i32 10, i32 12, i32 1, i32 13, i32 2, i32 8, i32 0, i32 11>
237 define arm_aapcs_vfpcc <16 x i8> @shuffle5_i8(<16 x i8> %src) {
238 ; CHECK-LABEL: shuffle5_i8:
239 ; CHECK: @ %bb.0: @ %entry
240 ; CHECK-NEXT: vrev64.8 q1, q0
241 ; CHECK-NEXT: vmov q0, q1
244 %out = shufflevector <16 x i8> %src, <16 x i8> undef, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
248 define arm_aapcs_vfpcc <16 x i8> @shuffle6_i8(<16 x i8> %src) {
249 ; CHECK-LABEL: shuffle6_i8:
250 ; CHECK: @ %bb.0: @ %entry
251 ; CHECK-NEXT: vrev32.8 q0, q0
254 %out = shufflevector <16 x i8> %src, <16 x i8> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
258 define arm_aapcs_vfpcc <16 x i8> @shuffle7_i8(<16 x i8> %src) {
259 ; CHECK-LABEL: shuffle7_i8:
260 ; CHECK: @ %bb.0: @ %entry
261 ; CHECK-NEXT: vrev16.8 q0, q0
264 %out = shufflevector <16 x i8> %src, <16 x i8> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
268 define arm_aapcs_vfpcc <2 x i64> @shuffle1_i64(<2 x i64> %src) {
269 ; CHECK-LABEL: shuffle1_i64:
270 ; CHECK: @ %bb.0: @ %entry
273 %out = shufflevector <2 x i64> %src, <2 x i64> undef, <2 x i32> <i32 0, i32 1>
277 define arm_aapcs_vfpcc <2 x i64> @shuffle2_i64(<2 x i64> %src) {
278 ; CHECK-LABEL: shuffle2_i64:
279 ; CHECK: @ %bb.0: @ %entry
280 ; CHECK-NEXT: vmov.f32 s4, s2
281 ; CHECK-NEXT: vmov.f32 s5, s3
282 ; CHECK-NEXT: vmov.f32 s6, s0
283 ; CHECK-NEXT: vmov.f32 s7, s1
284 ; CHECK-NEXT: vmov q0, q1
287 %out = shufflevector <2 x i64> %src, <2 x i64> undef, <2 x i32> <i32 1, i32 0>
291 define arm_aapcs_vfpcc <2 x i64> @shuffle3_i64(<2 x i64> %src) {
292 ; CHECK-LABEL: shuffle3_i64:
293 ; CHECK: @ %bb.0: @ %entry
296 %out = shufflevector <2 x i64> %src, <2 x i64> undef, <2 x i32> <i32 undef, i32 1>
300 define arm_aapcs_vfpcc <4 x float> @shuffle1_f32(<4 x float> %src) {
301 ; CHECK-LABEL: shuffle1_f32:
302 ; CHECK: @ %bb.0: @ %entry
303 ; CHECK-NEXT: vmov.f32 s4, s3
304 ; CHECK-NEXT: vmov.f32 s5, s2
305 ; CHECK-NEXT: vmov.f32 s6, s1
306 ; CHECK-NEXT: vmov.f32 s7, s0
307 ; CHECK-NEXT: vmov q0, q1
310 %out = shufflevector <4 x float> %src, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
314 define arm_aapcs_vfpcc <4 x float> @shuffle2_f32(<4 x float> %src) {
315 ; CHECK-LABEL: shuffle2_f32:
316 ; CHECK: @ %bb.0: @ %entry
319 %out = shufflevector <4 x float> %src, <4 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
323 define arm_aapcs_vfpcc <4 x float> @shuffle3_f32(<4 x float> %src) {
324 ; CHECK-LABEL: shuffle3_f32:
325 ; CHECK: @ %bb.0: @ %entry
326 ; CHECK-NEXT: vmov.f32 s4, s3
327 ; CHECK-NEXT: vmov.f32 s5, s1
328 ; CHECK-NEXT: vmov.f32 s6, s2
329 ; CHECK-NEXT: vmov.f32 s7, s0
330 ; CHECK-NEXT: vmov q0, q1
333 %out = shufflevector <4 x float> %src, <4 x float> undef, <4 x i32> <i32 3, i32 1, i32 2, i32 0>
337 define arm_aapcs_vfpcc <4 x float> @shuffle5_f32(<4 x float> %src) {
338 ; CHECK-LABEL: shuffle5_f32:
339 ; CHECK: @ %bb.0: @ %entry
340 ; CHECK-NEXT: vrev64.32 q1, q0
341 ; CHECK-NEXT: vmov q0, q1
344 %out = shufflevector <4 x float> %src, <4 x float> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
348 define arm_aapcs_vfpcc <8 x half> @shuffle1_f16(<8 x half> %src) {
349 ; CHECK-LABEL: shuffle1_f16:
350 ; CHECK: @ %bb.0: @ %entry
351 ; CHECK-NEXT: vmovx.f16 s4, s3
352 ; CHECK-NEXT: vmov r0, s3
353 ; CHECK-NEXT: vmov r1, s4
354 ; CHECK-NEXT: vmovx.f16 s8, s2
355 ; CHECK-NEXT: vmov.16 q1[0], r1
356 ; CHECK-NEXT: vmov.16 q1[1], r0
357 ; CHECK-NEXT: vmov r0, s8
358 ; CHECK-NEXT: vmov.16 q1[2], r0
359 ; CHECK-NEXT: vmov r0, s2
360 ; CHECK-NEXT: vmovx.f16 s8, s1
361 ; CHECK-NEXT: vmov.16 q1[3], r0
362 ; CHECK-NEXT: vmov r0, s8
363 ; CHECK-NEXT: vmovx.f16 s8, s0
364 ; CHECK-NEXT: vmov.16 q1[4], r0
365 ; CHECK-NEXT: vmov r0, s1
366 ; CHECK-NEXT: vmov.16 q1[5], r0
367 ; CHECK-NEXT: vmov r0, s8
368 ; CHECK-NEXT: vmov.16 q1[6], r0
369 ; CHECK-NEXT: vmov r0, s0
370 ; CHECK-NEXT: vmov.16 q1[7], r0
371 ; CHECK-NEXT: vmov q0, q1
374 %out = shufflevector <8 x half> %src, <8 x half> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
378 define arm_aapcs_vfpcc <8 x half> @shuffle2_f16(<8 x half> %src) {
379 ; CHECK-LABEL: shuffle2_f16:
380 ; CHECK: @ %bb.0: @ %entry
383 %out = shufflevector <8 x half> %src, <8 x half> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
387 define arm_aapcs_vfpcc <8 x half> @shuffle3_f16(<8 x half> %src) {
388 ; CHECK-LABEL: shuffle3_f16:
389 ; CHECK: @ %bb.0: @ %entry
390 ; CHECK-NEXT: vmovx.f16 s4, s2
391 ; CHECK-NEXT: vmov r1, s2
392 ; CHECK-NEXT: vmov r0, s4
393 ; CHECK-NEXT: vmov.16 q1[0], r1
394 ; CHECK-NEXT: vmovx.f16 s8, s3
395 ; CHECK-NEXT: vmov.16 q1[1], r0
396 ; CHECK-NEXT: vmov r0, s8
397 ; CHECK-NEXT: vmovx.f16 s8, s1
398 ; CHECK-NEXT: vmov.16 q1[2], r0
399 ; CHECK-NEXT: vmov r0, s3
400 ; CHECK-NEXT: vmov.16 q1[3], r0
401 ; CHECK-NEXT: vmov r0, s8
402 ; CHECK-NEXT: vmovx.f16 s8, s0
403 ; CHECK-NEXT: vmov.16 q1[4], r0
404 ; CHECK-NEXT: vmov r0, s8
405 ; CHECK-NEXT: vmov.16 q1[5], r0
406 ; CHECK-NEXT: vmov r0, s1
407 ; CHECK-NEXT: vmov.16 q1[6], r0
408 ; CHECK-NEXT: vmov r0, s0
409 ; CHECK-NEXT: vmov.16 q1[7], r0
410 ; CHECK-NEXT: vmov q0, q1
413 %out = shufflevector <8 x half> %src, <8 x half> undef, <8 x i32> <i32 4, i32 5, i32 7, i32 6, i32 3, i32 1, i32 2, i32 0>
417 define arm_aapcs_vfpcc <8 x half> @shuffle5_f16(<8 x half> %src) {
418 ; CHECK-LABEL: shuffle5_f16:
419 ; CHECK: @ %bb.0: @ %entry
420 ; CHECK-NEXT: vrev64.16 q1, q0
421 ; CHECK-NEXT: vmov q0, q1
424 %out = shufflevector <8 x half> %src, <8 x half> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
428 define arm_aapcs_vfpcc <8 x half> @shuffle6_f16(<8 x half> %src) {
429 ; CHECK-LABEL: shuffle6_f16:
430 ; CHECK: @ %bb.0: @ %entry
431 ; CHECK-NEXT: vrev32.16 q0, q0
434 %out = shufflevector <8 x half> %src, <8 x half> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
438 define arm_aapcs_vfpcc <2 x double> @shuffle1_f64(<2 x double> %src) {
439 ; CHECK-LABEL: shuffle1_f64:
440 ; CHECK: @ %bb.0: @ %entry
443 %out = shufflevector <2 x double> %src, <2 x double> undef, <2 x i32> <i32 0, i32 1>
444 ret <2 x double> %out
447 define arm_aapcs_vfpcc <2 x double> @shuffle2_f64(<2 x double> %src) {
448 ; CHECK-LABEL: shuffle2_f64:
449 ; CHECK: @ %bb.0: @ %entry
450 ; CHECK-NEXT: vmov.f32 s4, s2
451 ; CHECK-NEXT: vmov.f32 s5, s3
452 ; CHECK-NEXT: vmov.f32 s6, s0
453 ; CHECK-NEXT: vmov.f32 s7, s1
454 ; CHECK-NEXT: vmov q0, q1
457 %out = shufflevector <2 x double> %src, <2 x double> undef, <2 x i32> <i32 1, i32 0>
458 ret <2 x double> %out
461 define arm_aapcs_vfpcc <2 x double> @shuffle3_f64(<2 x double> %src) {
462 ; CHECK-LABEL: shuffle3_f64:
463 ; CHECK: @ %bb.0: @ %entry
466 %out = shufflevector <2 x double> %src, <2 x double> undef, <2 x i32> <i32 undef, i32 1>
467 ret <2 x double> %out
471 define arm_aapcs_vfpcc <4 x i32> @insert_i32(i32 %a) {
472 ; CHECK-LABEL: insert_i32:
473 ; CHECK: @ %bb.0: @ %entry
474 ; CHECK-NEXT: vmov.32 q0[0], r0
477 %res = insertelement <4 x i32> undef, i32 %a, i32 0
481 define arm_aapcs_vfpcc <8 x i16> @insert_i16(i16 %a) {
482 ; CHECK-LABEL: insert_i16:
483 ; CHECK: @ %bb.0: @ %entry
484 ; CHECK-NEXT: vmov.16 q0[0], r0
487 %res = insertelement <8 x i16> undef, i16 %a, i32 0
491 define arm_aapcs_vfpcc <16 x i8> @insert_i8(i8 %a) {
492 ; CHECK-LABEL: insert_i8:
493 ; CHECK: @ %bb.0: @ %entry
494 ; CHECK-NEXT: vmov.8 q0[0], r0
497 %res = insertelement <16 x i8> undef, i8 %a, i32 0
501 define arm_aapcs_vfpcc <2 x i64> @insert_i64(i64 %a) {
502 ; CHECK-LABEL: insert_i64:
503 ; CHECK: @ %bb.0: @ %entry
504 ; CHECK-NEXT: vmov.32 q0[0], r0
505 ; CHECK-NEXT: vmov.32 q0[1], r1
508 %res = insertelement <2 x i64> undef, i64 %a, i32 0
512 define arm_aapcs_vfpcc <4 x float> @insert_f32(float %a) {
513 ; CHECK-LABEL: insert_f32:
514 ; CHECK: @ %bb.0: @ %entry
515 ; CHECK-NEXT: @ kill: def $s0 killed $s0 def $q0
518 %res = insertelement <4 x float> undef, float %a, i32 0
522 ; TODO: Calling convention needs fixing to pass half types directly to functions
523 define arm_aapcs_vfpcc <8 x half> @insert_f16(half *%aa) {
524 ; CHECK-LABEL: insert_f16:
525 ; CHECK: @ %bb.0: @ %entry
526 ; CHECK-NEXT: vldr.16 s0, [r0]
529 %a = load half, half* %aa
530 %res = insertelement <8 x half> undef, half %a, i32 0
534 define arm_aapcs_vfpcc <2 x double> @insert_f64(double %a) {
535 ; CHECK-LABEL: insert_f64:
536 ; CHECK: @ %bb.0: @ %entry
537 ; CHECK-NEXT: .save {r4, r6, r7, lr}
538 ; CHECK-NEXT: push {r4, r6, r7, lr}
539 ; CHECK-NEXT: .setfp r7, sp, #8
540 ; CHECK-NEXT: add r7, sp, #8
541 ; CHECK-NEXT: .pad #16
542 ; CHECK-NEXT: sub sp, #16
543 ; CHECK-NEXT: mov r4, sp
544 ; CHECK-NEXT: bfc r4, #0, #4
545 ; CHECK-NEXT: mov sp, r4
546 ; CHECK-NEXT: sub.w r4, r7, #8
547 ; CHECK-NEXT: vstr d0, [sp]
548 ; CHECK-NEXT: mov r0, sp
549 ; CHECK-NEXT: vldrw.u32 q0, [r0]
550 ; CHECK-NEXT: mov sp, r4
551 ; CHECK-NEXT: pop {r4, r6, r7, pc}
553 %res = insertelement <2 x double> undef, double %a, i32 0
554 ret <2 x double> %res
557 define arm_aapcs_vfpcc i64 @scalar_to_vector_i32(<8 x i16> %v) {
558 ; CHECK-LABEL: scalar_to_vector_i32:
559 ; CHECK: @ %bb.0: @ %entry
560 ; CHECK-NEXT: .pad #8
561 ; CHECK-NEXT: sub sp, #8
562 ; CHECK-NEXT: adr r1, .LCPI38_0
563 ; CHECK-NEXT: vmov.u16 r0, q0[0]
564 ; CHECK-NEXT: vldrw.u32 q1, [r1]
565 ; CHECK-NEXT: vmov.32 q0[0], r0
566 ; CHECK-NEXT: mov r2, sp
567 ; CHECK-NEXT: vmov.f32 s1, s5
568 ; CHECK-NEXT: vmov.f32 s2, s6
569 ; CHECK-NEXT: vmov.f32 s3, s7
570 ; CHECK-NEXT: vstrh.32 q0, [r2]
571 ; CHECK-NEXT: ldrd r0, r1, [sp], #8
573 ; CHECK-NEXT: .p2align 4
574 ; CHECK-NEXT: @ %bb.1:
575 ; CHECK-NEXT: .LCPI38_0:
576 ; CHECK-NEXT: .zero 4
577 ; CHECK-NEXT: .long 7 @ 0x7
578 ; CHECK-NEXT: .long 1 @ 0x1
579 ; CHECK-NEXT: .long 9 @ 0x9
581 %f = shufflevector <8 x i16> %v, <8 x i16> <i16 undef, i16 7, i16 1, i16 9, i16 undef, i16 undef, i16 undef, i16 undef>, <4 x i32> <i32 0, i32 9, i32 10, i32 11>
582 %0 = bitcast <4 x i16> %f to i64
587 define arm_aapcs_vfpcc i32 @extract_i32_0(<4 x i32> %a) {
588 ; CHECK-LABEL: extract_i32_0:
589 ; CHECK: @ %bb.0: @ %entry
590 ; CHECK-NEXT: vmov r0, s0
593 %res = extractelement <4 x i32> %a, i32 0
597 define arm_aapcs_vfpcc i32 @extract_i32_3(<4 x i32> %a) {
598 ; CHECK-LABEL: extract_i32_3:
599 ; CHECK: @ %bb.0: @ %entry
600 ; CHECK-NEXT: vmov r0, s3
603 %res = extractelement <4 x i32> %a, i32 3
607 define arm_aapcs_vfpcc i16 @extract_i16_0(<8 x i16> %a) {
608 ; CHECK-LABEL: extract_i16_0:
609 ; CHECK: @ %bb.0: @ %entry
610 ; CHECK-NEXT: vmov.u16 r0, q0[0]
613 %res = extractelement <8 x i16> %a, i32 0
617 define arm_aapcs_vfpcc i16 @extract_i16_3(<8 x i16> %a) {
618 ; CHECK-LABEL: extract_i16_3:
619 ; CHECK: @ %bb.0: @ %entry
620 ; CHECK-NEXT: vmov.u16 r0, q0[3]
623 %res = extractelement <8 x i16> %a, i32 3
627 define arm_aapcs_vfpcc i8 @extract_i8_0(<16 x i8> %a) {
628 ; CHECK-LABEL: extract_i8_0:
629 ; CHECK: @ %bb.0: @ %entry
630 ; CHECK-NEXT: vmov.u8 r0, q0[0]
633 %res = extractelement <16 x i8> %a, i32 0
637 define arm_aapcs_vfpcc i8 @extract_i8_3(<16 x i8> %a) {
638 ; CHECK-LABEL: extract_i8_3:
639 ; CHECK: @ %bb.0: @ %entry
640 ; CHECK-NEXT: vmov.u8 r0, q0[3]
643 %res = extractelement <16 x i8> %a, i32 3
647 define arm_aapcs_vfpcc i64 @extract_i64_0(<2 x i64> %a) {
648 ; CHECK-LABEL: extract_i64_0:
649 ; CHECK: @ %bb.0: @ %entry
650 ; CHECK-NEXT: vmov r0, s0
651 ; CHECK-NEXT: vmov r1, s1
654 %res = extractelement <2 x i64> %a, i32 0
658 define arm_aapcs_vfpcc i64 @extract_i64_1(<2 x i64> %a) {
659 ; CHECK-LABEL: extract_i64_1:
660 ; CHECK: @ %bb.0: @ %entry
661 ; CHECK-NEXT: vmov r0, s2
662 ; CHECK-NEXT: vmov r1, s3
665 %res = extractelement <2 x i64> %a, i32 1
669 define arm_aapcs_vfpcc float @extract_f32_0(<4 x float> %a) {
670 ; CHECK-LABEL: extract_f32_0:
671 ; CHECK: @ %bb.0: @ %entry
672 ; CHECK-NEXT: @ kill: def $s0 killed $s0 killed $q0
675 %res = extractelement <4 x float> %a, i32 0
679 define arm_aapcs_vfpcc float @extract_f32_3(<4 x float> %a) {
680 ; CHECK-LABEL: extract_f32_3:
681 ; CHECK: @ %bb.0: @ %entry
682 ; CHECK-NEXT: vmov.f32 s0, s3
685 %res = extractelement <4 x float> %a, i32 3
689 define arm_aapcs_vfpcc half @extract_f16_0(<8 x half> %a) {
690 ; CHECK-LABEL: extract_f16_0:
691 ; CHECK: @ %bb.0: @ %entry
692 ; CHECK-NEXT: vstr.16 s0, [r0]
695 %res = extractelement <8 x half> %a, i32 0
699 define arm_aapcs_vfpcc half @extract_f16_3(<8 x half> %a) {
700 ; CHECK-LABEL: extract_f16_3:
701 ; CHECK: @ %bb.0: @ %entry
702 ; CHECK-NEXT: vmovx.f16 s0, s1
703 ; CHECK-NEXT: vstr.16 s0, [r0]
706 %res = extractelement <8 x half> %a, i32 3
710 define arm_aapcs_vfpcc double @extract_f64_0(<2 x double> %a) {
711 ; CHECK-LABEL: extract_f64_0:
712 ; CHECK: @ %bb.0: @ %entry
713 ; CHECK-NEXT: @ kill: def $d0 killed $d0 killed $q0
716 %res = extractelement <2 x double> %a, i32 0
720 define arm_aapcs_vfpcc double @extract_f64_1(<2 x double> %a) {
721 ; CHECK-LABEL: extract_f64_1:
722 ; CHECK: @ %bb.0: @ %entry
723 ; CHECK-NEXT: vmov.f32 s0, s2
724 ; CHECK-NEXT: vmov.f32 s1, s3
727 %res = extractelement <2 x double> %a, i32 1